Merge branch 'develop' into arm_soft_fp_abi
This commit is contained in:
commit
503dcbfde6
|
@ -14,6 +14,21 @@ lapack-3.4.2.tgz
|
||||||
lapack-netlib/make.inc
|
lapack-netlib/make.inc
|
||||||
lapack-netlib/lapacke/include/lapacke_mangling.h
|
lapack-netlib/lapacke/include/lapacke_mangling.h
|
||||||
lapack-netlib/TESTING/testing_results.txt
|
lapack-netlib/TESTING/testing_results.txt
|
||||||
|
lapack-netlib/INSTALL/test*
|
||||||
|
lapack-netlib/TESTING/xeigtstc
|
||||||
|
lapack-netlib/TESTING/xeigtstd
|
||||||
|
lapack-netlib/TESTING/xeigtsts
|
||||||
|
lapack-netlib/TESTING/xeigtstz
|
||||||
|
lapack-netlib/TESTING/xlintstc
|
||||||
|
lapack-netlib/TESTING/xlintstd
|
||||||
|
lapack-netlib/TESTING/xlintstds
|
||||||
|
lapack-netlib/TESTING/xlintstrfc
|
||||||
|
lapack-netlib/TESTING/xlintstrfd
|
||||||
|
lapack-netlib/TESTING/xlintstrfs
|
||||||
|
lapack-netlib/TESTING/xlintstrfz
|
||||||
|
lapack-netlib/TESTING/xlintsts
|
||||||
|
lapack-netlib/TESTING/xlintstz
|
||||||
|
lapack-netlib/TESTING/xlintstzc
|
||||||
*.so
|
*.so
|
||||||
*.so.*
|
*.so.*
|
||||||
*.a
|
*.a
|
||||||
|
@ -69,3 +84,6 @@ test/zblat3
|
||||||
build
|
build
|
||||||
build.*
|
build.*
|
||||||
*.swp
|
*.swp
|
||||||
|
benchmark/*.goto
|
||||||
|
benchmark/smallscaling
|
||||||
|
|
||||||
|
|
|
@ -24,7 +24,12 @@ before_install:
|
||||||
- if [[ "$TARGET_BOX" == "WIN64" ]]; then sudo apt-get install -qq binutils-mingw-w64-x86-64 gcc-mingw-w64-x86-64 gfortran-mingw-w64-x86-64; fi
|
- if [[ "$TARGET_BOX" == "WIN64" ]]; then sudo apt-get install -qq binutils-mingw-w64-x86-64 gcc-mingw-w64-x86-64 gfortran-mingw-w64-x86-64; fi
|
||||||
- if [[ "$TARGET_BOX" == "LINUX32" ]]; then sudo apt-get install -qq gcc-multilib gfortran-multilib; fi
|
- if [[ "$TARGET_BOX" == "LINUX32" ]]; then sudo apt-get install -qq gcc-multilib gfortran-multilib; fi
|
||||||
|
|
||||||
script: make QUIET_MAKE=1 DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32 $BTYPE
|
script:
|
||||||
|
- set -e
|
||||||
|
- make QUIET_MAKE=1 DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32 $BTYPE
|
||||||
|
- if [ "$TARGET_BOX" == "LINUX32" ] || [ "$TARGET_BOX" == "LINUX64" ]; then make -C test DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32 $BTYPE; fi
|
||||||
|
- if [ "$TARGET_BOX" == "LINUX32" ] || [ "$TARGET_BOX" == "LINUX64" ]; then make -C ctest DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32 $BTYPE; fi
|
||||||
|
- if [ "$TARGET_BOX" == "LINUX32" ] || [ "$TARGET_BOX" == "LINUX64" ]; then make -C utest DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32 $BTYPE; fi
|
||||||
|
|
||||||
# whitelist
|
# whitelist
|
||||||
branches:
|
branches:
|
||||||
|
|
|
@ -6,7 +6,7 @@ cmake_minimum_required(VERSION 2.8.4)
|
||||||
project(OpenBLAS)
|
project(OpenBLAS)
|
||||||
set(OpenBLAS_MAJOR_VERSION 0)
|
set(OpenBLAS_MAJOR_VERSION 0)
|
||||||
set(OpenBLAS_MINOR_VERSION 2)
|
set(OpenBLAS_MINOR_VERSION 2)
|
||||||
set(OpenBLAS_PATCH_VERSION 16.dev)
|
set(OpenBLAS_PATCH_VERSION 20.dev)
|
||||||
set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}")
|
set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}")
|
||||||
|
|
||||||
enable_language(ASM)
|
enable_language(ASM)
|
||||||
|
@ -30,10 +30,20 @@ set(NO_LAPACK 1)
|
||||||
set(NO_LAPACKE 1)
|
set(NO_LAPACKE 1)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if(BUILD_DEBUG)
|
if(CMAKE_CONFIGURATION_TYPES) # multiconfig generator?
|
||||||
set(CMAKE_BUILD_TYPE Debug)
|
set(CMAKE_CONFIGURATION_TYPES "Debug;Release" CACHE STRING "" FORCE)
|
||||||
|
set(CMAKE_BUILD_TYPE
|
||||||
|
Debug Debug
|
||||||
|
Release Release
|
||||||
|
)
|
||||||
else()
|
else()
|
||||||
set(CMAKE_BUILD_TYPE Release)
|
if( NOT CMAKE_BUILD_TYPE )
|
||||||
|
if(BUILD_DEBUG)
|
||||||
|
set(CMAKE_BUILD_TYPE Debug)
|
||||||
|
else()
|
||||||
|
set(CMAKE_BUILD_TYPE Release)
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if(BUILD_WITHOUT_CBLAS)
|
if(BUILD_WITHOUT_CBLAS)
|
||||||
|
@ -45,8 +55,8 @@ endif()
|
||||||
|
|
||||||
message(WARNING "CMake support is experimental. This will not produce the same Makefiles that OpenBLAS ships with. Only x86 support is currently available.")
|
message(WARNING "CMake support is experimental. This will not produce the same Makefiles that OpenBLAS ships with. Only x86 support is currently available.")
|
||||||
|
|
||||||
include("${CMAKE_SOURCE_DIR}/cmake/utils.cmake")
|
include("${PROJECT_SOURCE_DIR}/cmake/utils.cmake")
|
||||||
include("${CMAKE_SOURCE_DIR}/cmake/system.cmake")
|
include("${PROJECT_SOURCE_DIR}/cmake/system.cmake")
|
||||||
|
|
||||||
set(BLASDIRS interface driver/level2 driver/level3 driver/others)
|
set(BLASDIRS interface driver/level2 driver/level3 driver/others)
|
||||||
|
|
||||||
|
@ -54,10 +64,6 @@ if (NOT DYNAMIC_ARCH)
|
||||||
list(APPEND BLASDIRS kernel)
|
list(APPEND BLASDIRS kernel)
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
if (DEFINED UTEST_CHECK)
|
|
||||||
set(SANITY_CHECK 1)
|
|
||||||
endif ()
|
|
||||||
|
|
||||||
if (DEFINED SANITY_CHECK)
|
if (DEFINED SANITY_CHECK)
|
||||||
list(APPEND BLASDIRS reference)
|
list(APPEND BLASDIRS reference)
|
||||||
endif ()
|
endif ()
|
||||||
|
@ -110,6 +116,10 @@ if (${NO_STATIC} AND ${NO_SHARED})
|
||||||
message(FATAL_ERROR "Neither static nor shared are enabled.")
|
message(FATAL_ERROR "Neither static nor shared are enabled.")
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
|
#Set default output directory
|
||||||
|
set( CMAKE_LIBRARY_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib )
|
||||||
|
set( CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib )
|
||||||
|
|
||||||
# get obj vars into format that add_library likes: $<TARGET_OBJS:objlib> (see http://www.cmake.org/cmake/help/v3.0/command/add_library.html)
|
# get obj vars into format that add_library likes: $<TARGET_OBJS:objlib> (see http://www.cmake.org/cmake/help/v3.0/command/add_library.html)
|
||||||
set(TARGET_OBJS "")
|
set(TARGET_OBJS "")
|
||||||
foreach (SUBDIR ${SUBDIRS})
|
foreach (SUBDIR ${SUBDIRS})
|
||||||
|
@ -123,9 +133,9 @@ endforeach ()
|
||||||
# Can't just use lapack-netlib's CMake files, since they are set up to search for BLAS, build and install a binary. We just want to build a couple of lib files out of lapack and lapacke.
|
# Can't just use lapack-netlib's CMake files, since they are set up to search for BLAS, build and install a binary. We just want to build a couple of lib files out of lapack and lapacke.
|
||||||
# Not using add_subdirectory here because lapack-netlib already has its own CMakeLists.txt. Instead include a cmake script with the sources we want.
|
# Not using add_subdirectory here because lapack-netlib already has its own CMakeLists.txt. Instead include a cmake script with the sources we want.
|
||||||
if (NOT NOFORTRAN AND NOT NO_LAPACK)
|
if (NOT NOFORTRAN AND NOT NO_LAPACK)
|
||||||
include("${CMAKE_SOURCE_DIR}/cmake/lapack.cmake")
|
include("${PROJECT_SOURCE_DIR}/cmake/lapack.cmake")
|
||||||
if (NOT NO_LAPACKE)
|
if (NOT NO_LAPACKE)
|
||||||
include("${CMAKE_SOURCE_DIR}/cmake/lapacke.cmake")
|
include("${PROJECT_SOURCE_DIR}/cmake/lapacke.cmake")
|
||||||
endif ()
|
endif ()
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
|
@ -137,22 +147,36 @@ endif()
|
||||||
# add objects to the openblas lib
|
# add objects to the openblas lib
|
||||||
add_library(${OpenBLAS_LIBNAME} SHARED ${LA_SOURCES} ${LAPACKE_SOURCES} ${TARGET_OBJS} ${OpenBLAS_DEF_FILE})
|
add_library(${OpenBLAS_LIBNAME} SHARED ${LA_SOURCES} ${LAPACKE_SOURCES} ${TARGET_OBJS} ${OpenBLAS_DEF_FILE})
|
||||||
|
|
||||||
include("${CMAKE_SOURCE_DIR}/cmake/export.cmake")
|
include("${PROJECT_SOURCE_DIR}/cmake/export.cmake")
|
||||||
|
|
||||||
|
# Set output for libopenblas
|
||||||
|
set_target_properties( ${OpenBLAS_LIBNAME} PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib)
|
||||||
|
set_target_properties( ${OpenBLAS_LIBNAME} PROPERTIES LIBRARY_OUTPUT_NAME_DEBUG "${OpenBLAS_LIBNAME}_d")
|
||||||
|
|
||||||
|
foreach (OUTPUTCONFIG ${CMAKE_CONFIGURATION_TYPES})
|
||||||
|
string( TOUPPER ${OUTPUTCONFIG} OUTPUTCONFIG )
|
||||||
|
|
||||||
|
set_target_properties( ${OpenBLAS_LIBNAME} PROPERTIES RUNTIME_OUTPUT_DIRECTORY_${OUTPUTCONFIG} ${PROJECT_BINARY_DIR}/lib)
|
||||||
|
set_target_properties( ${OpenBLAS_LIBNAME} PROPERTIES LIBRARY_OUTPUT_DIRECTORY_${OUTPUTCONFIG} ${PROJECT_BINARY_DIR}/lib)
|
||||||
|
set_target_properties( ${OpenBLAS_LIBNAME} PROPERTIES ARCHIVE_OUTPUT_DIRECTORY_${OUTPUTCONFIG} ${PROJECT_BINARY_DIR}/lib)
|
||||||
|
endforeach()
|
||||||
|
|
||||||
|
enable_testing()
|
||||||
|
add_subdirectory(utest)
|
||||||
|
|
||||||
if(NOT MSVC)
|
if(NOT MSVC)
|
||||||
#only build shared library for MSVC
|
#only build shared library for MSVC
|
||||||
add_library(${OpenBLAS_LIBNAME}_static STATIC ${LA_SOURCES} ${LAPACKE_SOURCES} ${TARGET_OBJS})
|
|
||||||
set_target_properties(${OpenBLAS_LIBNAME}_static PROPERTIES OUTPUT_NAME ${OpenBLAS_LIBNAME})
|
|
||||||
set_target_properties(${OpenBLAS_LIBNAME}_static PROPERTIES CLEAN_DIRECT_OUTPUT 1)
|
|
||||||
|
|
||||||
if(SMP)
|
add_library(${OpenBLAS_LIBNAME}_static STATIC ${LA_SOURCES} ${LAPACKE_SOURCES} ${TARGET_OBJS})
|
||||||
target_link_libraries(${OpenBLAS_LIBNAME} pthread)
|
set_target_properties(${OpenBLAS_LIBNAME}_static PROPERTIES OUTPUT_NAME ${OpenBLAS_LIBNAME})
|
||||||
target_link_libraries(${OpenBLAS_LIBNAME}_static pthread)
|
set_target_properties(${OpenBLAS_LIBNAME}_static PROPERTIES CLEAN_DIRECT_OUTPUT 1)
|
||||||
|
|
||||||
|
if(SMP)
|
||||||
|
target_link_libraries(${OpenBLAS_LIBNAME} pthread)
|
||||||
|
target_link_libraries(${OpenBLAS_LIBNAME}_static pthread)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
#build test and ctest
|
#build test and ctest
|
||||||
enable_testing()
|
|
||||||
add_subdirectory(test)
|
add_subdirectory(test)
|
||||||
if(NOT NO_CBLAS)
|
if(NOT NO_CBLAS)
|
||||||
add_subdirectory(ctest)
|
add_subdirectory(ctest)
|
||||||
|
@ -188,3 +212,27 @@ set_target_properties(${OpenBLAS_LIBNAME} PROPERTIES
|
||||||
#endif
|
#endif
|
||||||
# @touch lib.grd
|
# @touch lib.grd
|
||||||
|
|
||||||
|
# Install project
|
||||||
|
|
||||||
|
# Install libraries
|
||||||
|
install(TARGETS ${OpenBLAS_LIBNAME}
|
||||||
|
RUNTIME DESTINATION bin
|
||||||
|
ARCHIVE DESTINATION lib
|
||||||
|
LIBRARY DESTINATION lib )
|
||||||
|
|
||||||
|
# Install include files
|
||||||
|
FILE(GLOB_RECURSE INCLUDE_FILES "${CMAKE_CURRENT_SOURCE_DIR}/*.h")
|
||||||
|
install (FILES ${INCLUDE_FILES} DESTINATION include)
|
||||||
|
|
||||||
|
if(NOT MSVC)
|
||||||
|
install (TARGETS ${OpenBLAS_LIBNAME}_static DESTINATION lib)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
include(FindPkgConfig QUIET)
|
||||||
|
if(PKG_CONFIG_FOUND)
|
||||||
|
set(prefix ${CMAKE_INSTALL_PREFIX})
|
||||||
|
set(libdir ${CMAKE_INSTALL_PREFIX}/lib)
|
||||||
|
set(includedir ${CMAKE_INSTALL_PREFIX}/include)
|
||||||
|
configure_file(${PROJECT_SOURCE_DIR}/cmake/openblas.pc.in ${PROJECT_BINARY_DIR}/openblas.pc @ONLY)
|
||||||
|
install (FILES ${PROJECT_BINARY_DIR}/openblas.pc DESTINATION lib/pkgconfig/)
|
||||||
|
endif()
|
||||||
|
|
|
@ -121,6 +121,17 @@ In chronological order:
|
||||||
* [2014-10-10] trmm and sgemm kernels (optimized for APM's X-Gene 1).
|
* [2014-10-10] trmm and sgemm kernels (optimized for APM's X-Gene 1).
|
||||||
ARMv8 support.
|
ARMv8 support.
|
||||||
|
|
||||||
|
* Jerome Robert <jeromerobert@gmx.com>
|
||||||
|
* [2015-01-01] Speed-up small `ger` and `gemv` using stack allocation (bug #478)
|
||||||
|
* [2015-12-23] `stack_check` in `gemv.c` (bug #722)
|
||||||
|
* [2015-12-28] Allow to force the number of parallel make job
|
||||||
|
* [2015-12-28] Fix detection of AMD E2-3200 detection
|
||||||
|
* [2015-12-31] Let `make MAX_STACK_ALLOC=0` do what expected
|
||||||
|
* [2016-01-19] Disable multi-threading in `ger` and `swap` for small matrices (bug #731)
|
||||||
|
* [2016-01-24] Use `GEMM_MULTITHREAD_THRESHOLD` as a number of ops (bug #742)
|
||||||
|
* [2016-01-26] Let `openblas_get_num_threads` return the number of active threads (bug #760)
|
||||||
|
* [2016-01-30] Speed-up small `zger`, `zgemv`, `ztrmv` using stack allocation (bug #727)
|
||||||
|
|
||||||
* Dan Kortschak
|
* Dan Kortschak
|
||||||
* [2015-01-07] Added test for drotmg bug #484.
|
* [2015-01-07] Added test for drotmg bug #484.
|
||||||
|
|
||||||
|
@ -130,5 +141,29 @@ In chronological order:
|
||||||
* Martin Koehler <https://github.com/grisuthedragon/>
|
* Martin Koehler <https://github.com/grisuthedragon/>
|
||||||
* [2015-09-07] Improved imatcopy
|
* [2015-09-07] Improved imatcopy
|
||||||
|
|
||||||
* [Your name or handle] <[email or website]>
|
* Ashwin Sekhar T K <https://github.com/ashwinyes/>
|
||||||
* [Date] [Brief summary of your changes]
|
* [2015-11-09] Assembly kernels for Cortex-A57 (ARMv8)
|
||||||
|
* [2015-11-20] lapack-test fixes for Cortex-A57
|
||||||
|
* [2016-03-14] Additional functional Assembly Kernels for Cortex-A57
|
||||||
|
* [2016-03-14] Optimize Dgemm 4x4 for Cortex-A57
|
||||||
|
|
||||||
|
* theoractice <https://github.com/theoractice/>
|
||||||
|
* [2016-03-20] Fix compiler error in VisualStudio with CMake
|
||||||
|
* [2016-03-22] Fix access violation on Windows while static linking
|
||||||
|
|
||||||
|
* Paul Mustière <https://github.com/buffer51/>
|
||||||
|
* [2016-02-04] Fix Android build on ARMV7
|
||||||
|
* [2016-04-26] Android build with LAPACK for ARMV7 & ARMV8
|
||||||
|
|
||||||
|
* Shivraj Patil <https://github.com/sva-img/>
|
||||||
|
* [2016-05-03] DGEMM optimization for MIPS P5600 and I6400 using MSA
|
||||||
|
|
||||||
|
* Kaustubh Raste <https://github.com/ksraste/>
|
||||||
|
* [2016-05-09] DTRSM optimization for MIPS P5600 and I6400 using MSA
|
||||||
|
* [2016-05-20] STRSM optimization for MIPS P5600 and I6400 using MSA
|
||||||
|
|
||||||
|
* Abdelrauf <https://github.com/quickwritereader>
|
||||||
|
* [2017-01-01] dgemm and dtrmm kernels for IBM z13
|
||||||
|
* [2017-02-26] ztrmm kernel for IBM z13
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,4 +1,99 @@
|
||||||
OpenBLAS ChangeLog
|
OpenBLAS ChangeLog
|
||||||
|
====================================================================
|
||||||
|
Version 0.2.19
|
||||||
|
1-Sep-2016
|
||||||
|
common:
|
||||||
|
* Improved cross compiling.
|
||||||
|
* Fix the bug on musl libc.
|
||||||
|
|
||||||
|
POWER:
|
||||||
|
* Optimize BLAS on Power8
|
||||||
|
* Fixed Julia+OpenBLAS bugs on Power8
|
||||||
|
|
||||||
|
MIPS:
|
||||||
|
* Optimize BLAS on MIPS P5600 and I6400 (Thanks, Shivraj Patil, Kaustubh Raste)
|
||||||
|
|
||||||
|
ARM:
|
||||||
|
* Improved on ARM Cortex-A57. (Thanks, Ashwin Sekhar T K)
|
||||||
|
|
||||||
|
|
||||||
|
====================================================================
|
||||||
|
Version 0.2.18
|
||||||
|
12-Apr-2016
|
||||||
|
common:
|
||||||
|
* If you set MAKE_NB_JOBS flag less or equal than zero,
|
||||||
|
make will be without -j.
|
||||||
|
|
||||||
|
x86/x86_64:
|
||||||
|
* Support building Visual Studio static library. (#813, Thanks, theoractice)
|
||||||
|
* Fix bugs to pass buidbot CI tests (http://build.openblas.net)
|
||||||
|
|
||||||
|
ARM:
|
||||||
|
* Provide DGEMM 8x4 kernel for Cortex-A57 (Thanks, Ashwin Sekhar T K)
|
||||||
|
|
||||||
|
POWER:
|
||||||
|
* Optimize S and C BLAS3 on Power8
|
||||||
|
* Optimize BLAS2/1 on Power8
|
||||||
|
|
||||||
|
====================================================================
|
||||||
|
Version 0.2.17
|
||||||
|
20-Mar-2016
|
||||||
|
common:
|
||||||
|
* Enable BUILD_LAPACK_DEPRECATED=1 by default.
|
||||||
|
|
||||||
|
====================================================================
|
||||||
|
Version 0.2.16
|
||||||
|
15-Mar-2016
|
||||||
|
common:
|
||||||
|
* Avoid potential getenv segfault. (#716)
|
||||||
|
* Import LAPACK svn bugfix #142-#147,#150-#155
|
||||||
|
|
||||||
|
x86/x86_64:
|
||||||
|
* Optimize c/zgemv for AMD Bulldozer, Piledriver, Steamroller
|
||||||
|
* Fix bug with scipy linalg test.
|
||||||
|
|
||||||
|
ARM:
|
||||||
|
* Improve DGEMM for ARM Cortex-A57. (Thanks, Ashwin Sekhar T K)
|
||||||
|
|
||||||
|
POWER:
|
||||||
|
* Optimize D and Z BLAS3 functions for Power8.
|
||||||
|
|
||||||
|
====================================================================
|
||||||
|
Version 0.2.16.rc1
|
||||||
|
23-Feb-2016
|
||||||
|
common:
|
||||||
|
* Upgrade LAPACK to 3.6.0 version.
|
||||||
|
Add BUILD_LAPACK_DEPRECATED option in Makefile.rule to build
|
||||||
|
LAPACK deprecated functions.
|
||||||
|
* Add MAKE_NB_JOBS option in Makefile.
|
||||||
|
Force number of make jobs.This is particularly
|
||||||
|
useful when using distcc. (#735. Thanks, Jerome Robert.)
|
||||||
|
* Redesign unit test. Run unit/regression test at every build (Travis-CI and Appveyor).
|
||||||
|
* Disable multi-threading for small size swap and ger. (#744. Thanks, Jerome Robert)
|
||||||
|
* Improve small zger, zgemv, ztrmv using stack alloction (#727. Thanks, Jerome Robert)
|
||||||
|
* Let openblas_get_num_threads return the number of active threads.
|
||||||
|
(#760. Thanks, Jerome Robert)
|
||||||
|
* Support illumos(OmniOS). (#749. Thanks, Lauri Tirkkonen)
|
||||||
|
* Fix LAPACK Dormbr, Dormlq bug. (#711, #713. Thanks, Brendan Tracey)
|
||||||
|
* Update scipy benchmark script. (#745. Thanks, John Kirkham)
|
||||||
|
|
||||||
|
x86/x86_64:
|
||||||
|
* Optimize trsm kernels for AMD Bulldozer, Piledriver, Steamroller.
|
||||||
|
* Detect Intel Avoton.
|
||||||
|
* Detect AMD Trinity, Richland, E2-3200.
|
||||||
|
* Fix gemv performance bug on Mac OSX Intel Haswell.
|
||||||
|
* Fix some bugs with CMake and Visual Studio
|
||||||
|
|
||||||
|
ARM:
|
||||||
|
* Support and optimize Cortex-A57 AArch64.
|
||||||
|
(#686. Thanks, Ashwin Sekhar TK)
|
||||||
|
* Fix Android build on ARMV7 (#778. Thanks, Paul Mustiere)
|
||||||
|
* Update ARMV6 kernels.
|
||||||
|
|
||||||
|
POWER:
|
||||||
|
* Fix detection of POWER architecture
|
||||||
|
(#684. Thanks, Sebastien Villemot)
|
||||||
|
|
||||||
====================================================================
|
====================================================================
|
||||||
Version 0.2.15
|
Version 0.2.15
|
||||||
27-Oct-2015
|
27-Oct-2015
|
||||||
|
|
47
Makefile
47
Makefile
|
@ -7,10 +7,6 @@ ifneq ($(DYNAMIC_ARCH), 1)
|
||||||
BLASDIRS += kernel
|
BLASDIRS += kernel
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifdef UTEST_CHECK
|
|
||||||
SANITY_CHECK = 1
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifdef SANITY_CHECK
|
ifdef SANITY_CHECK
|
||||||
BLASDIRS += reference
|
BLASDIRS += reference
|
||||||
endif
|
endif
|
||||||
|
@ -85,22 +81,22 @@ endif
|
||||||
|
|
||||||
shared :
|
shared :
|
||||||
ifndef NO_SHARED
|
ifndef NO_SHARED
|
||||||
ifeq ($(OSNAME), Linux)
|
ifeq ($(OSNAME), $(filter $(OSNAME),Linux SunOS))
|
||||||
@$(MAKE) -C exports so
|
@$(MAKE) -C exports so
|
||||||
@-ln -fs $(LIBSONAME) $(LIBPREFIX).so
|
@ln -fs $(LIBSONAME) $(LIBPREFIX).so
|
||||||
@-ln -fs $(LIBSONAME) $(LIBPREFIX).so.$(MAJOR_VERSION)
|
@ln -fs $(LIBSONAME) $(LIBPREFIX).so.$(MAJOR_VERSION)
|
||||||
endif
|
endif
|
||||||
ifeq ($(OSNAME), FreeBSD)
|
ifeq ($(OSNAME), FreeBSD)
|
||||||
@$(MAKE) -C exports so
|
@$(MAKE) -C exports so
|
||||||
@-ln -fs $(LIBSONAME) $(LIBPREFIX).so
|
@ln -fs $(LIBSONAME) $(LIBPREFIX).so
|
||||||
endif
|
endif
|
||||||
ifeq ($(OSNAME), NetBSD)
|
ifeq ($(OSNAME), NetBSD)
|
||||||
@$(MAKE) -C exports so
|
@$(MAKE) -C exports so
|
||||||
@-ln -fs $(LIBSONAME) $(LIBPREFIX).so
|
@ln -fs $(LIBSONAME) $(LIBPREFIX).so
|
||||||
endif
|
endif
|
||||||
ifeq ($(OSNAME), Darwin)
|
ifeq ($(OSNAME), Darwin)
|
||||||
@$(MAKE) -C exports dyn
|
@$(MAKE) -C exports dyn
|
||||||
@-ln -fs $(LIBDYNNAME) $(LIBPREFIX).dylib
|
@ln -fs $(LIBDYNNAME) $(LIBPREFIX).dylib
|
||||||
endif
|
endif
|
||||||
ifeq ($(OSNAME), WINNT)
|
ifeq ($(OSNAME), WINNT)
|
||||||
@$(MAKE) -C exports dll
|
@$(MAKE) -C exports dll
|
||||||
|
@ -112,21 +108,15 @@ endif
|
||||||
|
|
||||||
tests :
|
tests :
|
||||||
ifndef NOFORTRAN
|
ifndef NOFORTRAN
|
||||||
ifndef TARGET
|
|
||||||
ifndef CROSS
|
|
||||||
touch $(LIBNAME)
|
touch $(LIBNAME)
|
||||||
ifndef NO_FBLAS
|
ifndef NO_FBLAS
|
||||||
$(MAKE) -C test all
|
$(MAKE) -C test all
|
||||||
ifdef UTEST_CHECK
|
|
||||||
$(MAKE) -C utest all
|
$(MAKE) -C utest all
|
||||||
endif
|
endif
|
||||||
endif
|
|
||||||
ifndef NO_CBLAS
|
ifndef NO_CBLAS
|
||||||
$(MAKE) -C ctest all
|
$(MAKE) -C ctest all
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
endif
|
|
||||||
endif
|
|
||||||
|
|
||||||
libs :
|
libs :
|
||||||
ifeq ($(CORE), UNKOWN)
|
ifeq ($(CORE), UNKOWN)
|
||||||
|
@ -249,16 +239,23 @@ ifndef NOFORTRAN
|
||||||
-@echo "SUFFIX = $(SUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
-@echo "SUFFIX = $(SUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||||
-@echo "PSUFFIX = $(PSUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
-@echo "PSUFFIX = $(PSUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||||
-@echo "CEXTRALIB = $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
-@echo "CEXTRALIB = $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||||
ifeq ($(FC), gfortran)
|
ifeq ($(F_COMPILER), GFORTRAN)
|
||||||
-@echo "TIMER = INT_ETIME" >> $(NETLIB_LAPACK_DIR)/make.inc
|
-@echo "TIMER = INT_ETIME" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||||
ifdef SMP
|
ifdef SMP
|
||||||
|
ifeq ($(OSNAME), WINNT)
|
||||||
|
-@echo "LOADER = $(FC)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||||
|
else
|
||||||
-@echo "LOADER = $(FC) -pthread" >> $(NETLIB_LAPACK_DIR)/make.inc
|
-@echo "LOADER = $(FC) -pthread" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||||
|
endif
|
||||||
else
|
else
|
||||||
-@echo "LOADER = $(FC)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
-@echo "LOADER = $(FC)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||||
endif
|
endif
|
||||||
else
|
else
|
||||||
-@echo "TIMER = NONE" >> $(NETLIB_LAPACK_DIR)/make.inc
|
-@echo "TIMER = NONE" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||||
-@echo "LOADER = $(FC)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
-@echo "LOADER = $(FC)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||||
|
endif
|
||||||
|
ifeq ($(BUILD_LAPACK_DEPRECATED), 1)
|
||||||
|
-@echo "BUILD_DEPRECATED = 1" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||||
endif
|
endif
|
||||||
-@cat make.inc >> $(NETLIB_LAPACK_DIR)/make.inc
|
-@cat make.inc >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||||
endif
|
endif
|
||||||
|
@ -281,18 +278,28 @@ lapack-timing : large.tgz timing.tgz
|
||||||
ifndef NOFORTRAN
|
ifndef NOFORTRAN
|
||||||
(cd $(NETLIB_LAPACK_DIR); $(TAR) zxf ../timing.tgz TIMING)
|
(cd $(NETLIB_LAPACK_DIR); $(TAR) zxf ../timing.tgz TIMING)
|
||||||
(cd $(NETLIB_LAPACK_DIR)/TIMING; $(TAR) zxf ../../large.tgz )
|
(cd $(NETLIB_LAPACK_DIR)/TIMING; $(TAR) zxf ../../large.tgz )
|
||||||
make -C $(NETLIB_LAPACK_DIR)/TIMING
|
$(MAKE) -C $(NETLIB_LAPACK_DIR)/TIMING
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
|
||||||
lapack-test :
|
lapack-test :
|
||||||
(cd $(NETLIB_LAPACK_DIR)/TESTING && rm -f x* *.out)
|
(cd $(NETLIB_LAPACK_DIR)/TESTING && rm -f x* *.out)
|
||||||
make -j 1 -C $(NETLIB_LAPACK_DIR)/TESTING xeigtstc xeigtstd xeigtsts xeigtstz xlintstc xlintstd xlintstds xlintstrfd xlintstrfz xlintsts xlintstz xlintstzc xlintstrfs xlintstrfc
|
$(MAKE) -j 1 -C $(NETLIB_LAPACK_DIR)/TESTING xeigtstc xeigtstd xeigtsts xeigtstz xlintstc xlintstd xlintstds xlintstrfd xlintstrfz xlintsts xlintstz xlintstzc xlintstrfs xlintstrfc
|
||||||
|
ifneq ($(CROSS), 1)
|
||||||
|
( cd $(NETLIB_LAPACK_DIR)/INSTALL; ./testlsame; ./testslamch; ./testdlamch; \
|
||||||
|
./testsecond; ./testdsecnd; ./testieee; ./testversion )
|
||||||
(cd $(NETLIB_LAPACK_DIR); ./lapack_testing.py -r )
|
(cd $(NETLIB_LAPACK_DIR); ./lapack_testing.py -r )
|
||||||
|
endif
|
||||||
|
|
||||||
|
lapack-runtest:
|
||||||
|
( cd $(NETLIB_LAPACK_DIR)/INSTALL; ./testlsame; ./testslamch; ./testdlamch; \
|
||||||
|
./testsecond; ./testdsecnd; ./testieee; ./testversion )
|
||||||
|
(cd $(NETLIB_LAPACK_DIR); ./lapack_testing.py -r )
|
||||||
|
|
||||||
|
|
||||||
blas-test:
|
blas-test:
|
||||||
(cd $(NETLIB_LAPACK_DIR)/BLAS && rm -f x* *.out)
|
(cd $(NETLIB_LAPACK_DIR)/BLAS && rm -f x* *.out)
|
||||||
make -j 1 -C $(NETLIB_LAPACK_DIR) blas_testing
|
$(MAKE) -j 1 -C $(NETLIB_LAPACK_DIR) blas_testing
|
||||||
(cd $(NETLIB_LAPACK_DIR)/BLAS && cat *.out)
|
(cd $(NETLIB_LAPACK_DIR)/BLAS && cat *.out)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
# ifeq logical or
|
ifeq logical or
|
||||||
ifeq ($(CORE), $(filter $(CORE),CORTEXA9 CORTEXA15))
|
ifeq ($(CORE), $(filter $(CORE),CORTEXA9 CORTEXA15))
|
||||||
ifeq ($(OSNAME), Android)
|
ifeq ($(OSNAME), Android)
|
||||||
CCOMMON_OPT += -mfpu=neon -march=armv7-a
|
CCOMMON_OPT += -mfpu=neon -march=armv7-a
|
||||||
|
@ -11,9 +11,14 @@ endif
|
||||||
|
|
||||||
ifeq ($(CORE), ARMV7)
|
ifeq ($(CORE), ARMV7)
|
||||||
ifeq ($(OSNAME), Android)
|
ifeq ($(OSNAME), Android)
|
||||||
|
ifeq ($(ARM_SOFTFP), 1)
|
||||||
CCOMMON_OPT += -mfpu=neon -march=armv7-a
|
CCOMMON_OPT += -mfpu=neon -march=armv7-a
|
||||||
FCOMMON_OPT += -mfpu=neon -march=armv7-a
|
FCOMMON_OPT += -mfpu=neon -march=armv7-a
|
||||||
else
|
else
|
||||||
|
CCOMMON_OPT += -marm -mfpu=neon -mfloat-abi=hard -march=armv7-a -Wl,--no-warn-mismatch
|
||||||
|
FCOMMON_OPT += -marm -mfpu=neon -mfloat-abi=hard -march=armv7-a -Wl,--no-warn-mismatch
|
||||||
|
endif
|
||||||
|
else
|
||||||
CCOMMON_OPT += -mfpu=vfpv3 -march=armv7-a
|
CCOMMON_OPT += -mfpu=vfpv3 -march=armv7-a
|
||||||
FCOMMON_OPT += -mfpu=vfpv3 -march=armv7-a
|
FCOMMON_OPT += -mfpu=vfpv3 -march=armv7-a
|
||||||
endif
|
endif
|
||||||
|
@ -29,5 +34,3 @@ ifeq ($(CORE), ARMV5)
|
||||||
CCOMMON_OPT += -marm -march=armv5
|
CCOMMON_OPT += -marm -march=armv5
|
||||||
FCOMMON_OPT += -marm -march=armv5
|
FCOMMON_OPT += -marm -march=armv5
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -9,3 +9,17 @@ CCOMMON_OPT += -march=armv8-a+crc+crypto+fp+simd -mtune=cortex-a57
|
||||||
FCOMMON_OPT += -march=armv8-a+crc+crypto+fp+simd -mtune=cortex-a57
|
FCOMMON_OPT += -march=armv8-a+crc+crypto+fp+simd -mtune=cortex-a57
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
ifeq ($(CORE), VULCAN)
|
||||||
|
CCOMMON_OPT += -mtune=vulcan -mcpu=vulcan
|
||||||
|
FCOMMON_OPT += -mtune=vulcan -mcpu=vulcan
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifeq ($(CORE), THUNDERX)
|
||||||
|
CCOMMON_OPT += -mtune=thunderx -mcpu=thunderx
|
||||||
|
FCOMMON_OPT += -mtune=thunderx -mcpu=thunderx
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifeq ($(CORE), THUNDERX2T99)
|
||||||
|
CCOMMON_OPT += -mtune=vulcan -mcpu=vulcan
|
||||||
|
FCOMMON_OPT += -mtune=vulcan -mcpu=vulcan
|
||||||
|
endif
|
||||||
|
|
115
Makefile.install
115
Makefile.install
|
@ -12,6 +12,7 @@ OPENBLAS_BUILD_DIR := $(CURDIR)
|
||||||
OPENBLAS_CMAKE_DIR := $(OPENBLAS_LIBRARY_DIR)/cmake/openblas
|
OPENBLAS_CMAKE_DIR := $(OPENBLAS_LIBRARY_DIR)/cmake/openblas
|
||||||
OPENBLAS_CMAKE_CONFIG := OpenBLASConfig.cmake
|
OPENBLAS_CMAKE_CONFIG := OpenBLASConfig.cmake
|
||||||
OPENBLAS_CMAKE_CONFIG_VERSION := OpenBLASConfigVersion.cmake
|
OPENBLAS_CMAKE_CONFIG_VERSION := OpenBLASConfigVersion.cmake
|
||||||
|
OPENBLAS_PKGCONFIG_DIR := $(OPENBLAS_LIBRARY_DIR)/pkgconfig
|
||||||
|
|
||||||
.PHONY : install
|
.PHONY : install
|
||||||
.NOTPARALLEL : install
|
.NOTPARALLEL : install
|
||||||
|
@ -20,110 +21,122 @@ lib.grd :
|
||||||
$(error OpenBLAS: Please run "make" firstly)
|
$(error OpenBLAS: Please run "make" firstly)
|
||||||
|
|
||||||
install : lib.grd
|
install : lib.grd
|
||||||
@-mkdir -p $(DESTDIR)$(PREFIX)
|
@-mkdir -p "$(DESTDIR)$(PREFIX)"
|
||||||
@-mkdir -p $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)
|
@-mkdir -p "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)"
|
||||||
@-mkdir -p $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
|
@-mkdir -p "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
|
||||||
@-mkdir -p $(DESTDIR)$(OPENBLAS_BINARY_DIR)
|
@-mkdir -p "$(DESTDIR)$(OPENBLAS_BINARY_DIR)"
|
||||||
@-mkdir -p $(DESTDIR)$(OPENBLAS_CMAKE_DIR)
|
@-mkdir -p "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)"
|
||||||
|
@-mkdir -p "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)"
|
||||||
@echo Generating openblas_config.h in $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)
|
@echo Generating openblas_config.h in $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)
|
||||||
#for inc
|
#for inc
|
||||||
@echo \#ifndef OPENBLAS_CONFIG_H > $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h
|
@echo \#ifndef OPENBLAS_CONFIG_H > "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h"
|
||||||
@echo \#define OPENBLAS_CONFIG_H >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h
|
@echo \#define OPENBLAS_CONFIG_H >> "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h"
|
||||||
@awk 'NF {print $$1, "OPENBLAS_"$$2, $$3}' config_last.h >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h
|
@$(AWK) 'NF {print $$1, "OPENBLAS_"$$2, $$3}' config_last.h >> "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h"
|
||||||
@echo \#define OPENBLAS_VERSION \" OpenBLAS $(VERSION) \" >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h
|
@echo \#define OPENBLAS_VERSION \" OpenBLAS $(VERSION) \" >> "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h"
|
||||||
@cat openblas_config_template.h >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h
|
@cat openblas_config_template.h >> "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h"
|
||||||
@echo \#endif \/\* OPENBLAS_CONFIG_H \*\/ >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h
|
@echo \#endif \/\* OPENBLAS_CONFIG_H \*\/ >> "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h"
|
||||||
|
|
||||||
@echo Generating f77blas.h in $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)
|
@echo Generating f77blas.h in $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)
|
||||||
@echo \#ifndef OPENBLAS_F77BLAS_H > $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/f77blas.h
|
@echo \#ifndef OPENBLAS_F77BLAS_H > "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/f77blas.h"
|
||||||
@echo \#define OPENBLAS_F77BLAS_H >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/f77blas.h
|
@echo \#define OPENBLAS_F77BLAS_H >> "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/f77blas.h"
|
||||||
@echo \#include \"openblas_config.h\" >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/f77blas.h
|
@echo \#include \"openblas_config.h\" >> "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/f77blas.h"
|
||||||
@cat common_interface.h >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/f77blas.h
|
@cat common_interface.h >> "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/f77blas.h"
|
||||||
@echo \#endif >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/f77blas.h
|
@echo \#endif >> "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/f77blas.h"
|
||||||
|
|
||||||
ifndef NO_CBLAS
|
ifndef NO_CBLAS
|
||||||
@echo Generating cblas.h in $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)
|
@echo Generating cblas.h in $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)
|
||||||
@sed 's/common/openblas_config/g' cblas.h > $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/cblas.h
|
@sed 's/common/openblas_config/g' cblas.h > "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/cblas.h"
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifndef NO_LAPACKE
|
ifndef NO_LAPACKE
|
||||||
@echo Copying LAPACKE header files to $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)
|
@echo Copying LAPACKE header files to $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)
|
||||||
@-install -pm644 $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke.h
|
@-install -pm644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke.h"
|
||||||
@-install -pm644 $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke_config.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_config.h
|
@-install -pm644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke_config.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_config.h"
|
||||||
@-install -pm644 $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke_mangling_with_flags.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_mangling.h
|
@-install -pm644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke_mangling_with_flags.h.in "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_mangling.h"
|
||||||
@-install -pm644 $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke_utils.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_utils.h
|
@-install -pm644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke_utils.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_utils.h"
|
||||||
endif
|
endif
|
||||||
|
|
||||||
#for install static library
|
#for install static library
|
||||||
ifndef NO_STATIC
|
ifndef NO_STATIC
|
||||||
@echo Copying the static library to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
|
@echo Copying the static library to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
|
||||||
@install -pm644 $(LIBNAME) $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
|
@install -pm644 $(LIBNAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
|
||||||
@cd $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) ; \
|
@cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \
|
||||||
ln -fs $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX)
|
ln -fs $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX)
|
||||||
endif
|
endif
|
||||||
#for install shared library
|
#for install shared library
|
||||||
ifndef NO_SHARED
|
ifndef NO_SHARED
|
||||||
@echo Copying the shared library to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
|
@echo Copying the shared library to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
|
||||||
ifeq ($(OSNAME), Linux)
|
ifeq ($(OSNAME), $(filter $(OSNAME),Linux SunOS))
|
||||||
@install -pm755 $(LIBSONAME) $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
|
@install -pm755 $(LIBSONAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
|
||||||
@cd $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) ; \
|
@cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \
|
||||||
ln -fs $(LIBSONAME) $(LIBPREFIX).so ; \
|
ln -fs $(LIBSONAME) $(LIBPREFIX).so ; \
|
||||||
ln -fs $(LIBSONAME) $(LIBPREFIX).so.$(MAJOR_VERSION)
|
ln -fs $(LIBSONAME) $(LIBPREFIX).so.$(MAJOR_VERSION)
|
||||||
endif
|
endif
|
||||||
ifeq ($(OSNAME), FreeBSD)
|
ifeq ($(OSNAME), FreeBSD)
|
||||||
@cp $(LIBSONAME) $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
|
@cp $(LIBSONAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
|
||||||
@cd $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) ; \
|
@cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \
|
||||||
ln -fs $(LIBSONAME) $(LIBPREFIX).so
|
ln -fs $(LIBSONAME) $(LIBPREFIX).so
|
||||||
endif
|
endif
|
||||||
ifeq ($(OSNAME), NetBSD)
|
ifeq ($(OSNAME), NetBSD)
|
||||||
@cp $(LIBSONAME) $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
|
@cp $(LIBSONAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
|
||||||
@cd $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) ; \
|
@cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \
|
||||||
ln -fs $(LIBSONAME) $(LIBPREFIX).so
|
ln -fs $(LIBSONAME) $(LIBPREFIX).so
|
||||||
endif
|
endif
|
||||||
ifeq ($(OSNAME), Darwin)
|
ifeq ($(OSNAME), Darwin)
|
||||||
@-cp $(LIBDYNNAME) $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
|
@-cp $(LIBDYNNAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
|
||||||
@-install_name_tool -id $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)/$(LIBDYNNAME) $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)/$(LIBDYNNAME)
|
@-install_name_tool -id "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)/$(LIBDYNNAME)" "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)/$(LIBDYNNAME)"
|
||||||
@cd $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) ; \
|
@cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \
|
||||||
ln -fs $(LIBDYNNAME) $(LIBPREFIX).dylib
|
ln -fs $(LIBDYNNAME) $(LIBPREFIX).dylib
|
||||||
endif
|
endif
|
||||||
ifeq ($(OSNAME), WINNT)
|
ifeq ($(OSNAME), WINNT)
|
||||||
@-cp $(LIBDLLNAME) $(DESTDIR)$(OPENBLAS_BINARY_DIR)
|
@-cp $(LIBDLLNAME) "$(DESTDIR)$(OPENBLAS_BINARY_DIR)"
|
||||||
@-cp $(LIBDLLNAME).a $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
|
@-cp $(LIBDLLNAME).a "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
|
||||||
endif
|
endif
|
||||||
ifeq ($(OSNAME), CYGWIN_NT)
|
ifeq ($(OSNAME), CYGWIN_NT)
|
||||||
@-cp $(LIBDLLNAME) $(OPENBLAS_BINARY_DIR)
|
@-cp $(LIBDLLNAME) "$(DESTDIR)$(OPENBLAS_BINARY_DIR)"
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
|
||||||
|
#Generating openblas.pc
|
||||||
|
@echo Generating openblas.pc in $(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)
|
||||||
|
@echo 'libdir='$(OPENBLAS_LIBRARY_DIR) >> $(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc
|
||||||
|
@echo 'includedir='$(OPENBLAS_INCLUDE_DIR) >> $(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc
|
||||||
|
@echo 'version='$(VERSION) >> $(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc
|
||||||
|
@echo 'extralib='$(EXTRALIB) >> $(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc
|
||||||
|
@cat openblas.pc.in >> $(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc
|
||||||
|
|
||||||
|
|
||||||
#Generating OpenBLASConfig.cmake
|
#Generating OpenBLASConfig.cmake
|
||||||
@echo Generating $(OPENBLAS_CMAKE_CONFIG) in $(DESTDIR)$(OPENBLAS_CMAKE_DIR)
|
@echo Generating $(OPENBLAS_CMAKE_CONFIG) in $(DESTDIR)$(OPENBLAS_CMAKE_DIR)
|
||||||
@echo "SET(OpenBLAS_VERSION \"${VERSION}\")" > $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)
|
@echo "SET(OpenBLAS_VERSION \"${VERSION}\")" > "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)"
|
||||||
@echo "SET(OpenBLAS_INCLUDE_DIRS ${OPENBLAS_INCLUDE_DIR})" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)
|
@echo "SET(OpenBLAS_INCLUDE_DIRS ${OPENBLAS_INCLUDE_DIR})" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)"
|
||||||
|
|
||||||
ifndef NO_SHARED
|
ifndef NO_SHARED
|
||||||
#ifeq logical or
|
#ifeq logical or
|
||||||
ifeq ($(OSNAME), $(filter $(OSNAME),Linux FreeBSD NetBSD))
|
ifeq ($(OSNAME), $(filter $(OSNAME),Linux FreeBSD NetBSD))
|
||||||
@echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_LIBRARY_DIR}/$(LIBPREFIX).so)" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)
|
@echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_LIBRARY_DIR}/$(LIBPREFIX).so)" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)"
|
||||||
endif
|
endif
|
||||||
ifeq ($(OSNAME), $(filter $(OSNAME),WINNT CYGWIN_NT))
|
ifeq ($(OSNAME), $(filter $(OSNAME),WINNT CYGWIN_NT))
|
||||||
@echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_BINARY_DIR}/$(LIBDLLNAME))" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)
|
@echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_BINARY_DIR}/$(LIBDLLNAME))" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)"
|
||||||
endif
|
endif
|
||||||
ifeq ($(OSNAME), Darwin)
|
ifeq ($(OSNAME), Darwin)
|
||||||
@echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_LIBRARY_DIR}/$(LIBPREFIX).dylib)" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)
|
@echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_LIBRARY_DIR}/$(LIBPREFIX).dylib)" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)"
|
||||||
endif
|
endif
|
||||||
else
|
else
|
||||||
#only static
|
#only static
|
||||||
@echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_LIBRARY_DIR}/$(LIBPREFIX).$(LIBSUFFIX))" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)
|
@echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_LIBRARY_DIR}/$(LIBPREFIX).$(LIBSUFFIX))" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)"
|
||||||
endif
|
endif
|
||||||
#Generating OpenBLASConfigVersion.cmake
|
#Generating OpenBLASConfigVersion.cmake
|
||||||
@echo Generating $(OPENBLAS_CMAKE_CONFIG_VERSION) in $(DESTDIR)$(OPENBLAS_CMAKE_DIR)
|
@echo Generating $(OPENBLAS_CMAKE_CONFIG_VERSION) in $(DESTDIR)$(OPENBLAS_CMAKE_DIR)
|
||||||
@echo "set (PACKAGE_VERSION \"${VERSION}\")" > $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)
|
@echo "set (PACKAGE_VERSION \"${VERSION}\")" > "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)"
|
||||||
@echo "if (PACKAGE_VERSION VERSION_LESS PACKAGE_FIND_VERSION)" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)
|
@echo "if (PACKAGE_VERSION VERSION_LESS PACKAGE_FIND_VERSION)" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)"
|
||||||
@echo " set (PACKAGE_VERSION_COMPATIBLE FALSE)" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)
|
@echo " set (PACKAGE_VERSION_COMPATIBLE FALSE)" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)"
|
||||||
@echo "else ()" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)
|
@echo "else ()" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)"
|
||||||
@echo " set (PACKAGE_VERSION_COMPATIBLE TRUE)" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)
|
@echo " set (PACKAGE_VERSION_COMPATIBLE TRUE)" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)"
|
||||||
@echo " if (PACKAGE_FIND_VERSION STREQUAL PACKAGE_VERSION)" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)
|
@echo " if (PACKAGE_FIND_VERSION STREQUAL PACKAGE_VERSION)" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)"
|
||||||
@echo " set (PACKAGE_VERSION_EXACT TRUE)" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)
|
@echo " set (PACKAGE_VERSION_EXACT TRUE)" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)"
|
||||||
@echo " endif ()" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)
|
@echo " endif ()" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)"
|
||||||
@echo "endif ()" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)
|
@echo "endif ()" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)"
|
||||||
@echo Install OK!
|
@echo Install OK!
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,3 @@
|
||||||
|
ifdef BINARY64
|
||||||
|
else
|
||||||
|
endif
|
|
@ -1,4 +1,26 @@
|
||||||
# CCOMMON_OPT += -DALLOC_SHM
|
|
||||||
|
ifdef USE_THREAD
|
||||||
|
ifeq ($(USE_THREAD), 0)
|
||||||
|
USE_OPENMP = 0
|
||||||
|
else
|
||||||
|
USE_OPENMP = 1
|
||||||
|
endif
|
||||||
|
else
|
||||||
|
USE_OPENMP = 1
|
||||||
|
endif
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
ifeq ($(CORE), POWER8)
|
||||||
|
ifeq ($(USE_OPENMP), 1)
|
||||||
|
COMMON_OPT += -Ofast -mcpu=power8 -mtune=power8 -mvsx -malign-power -DUSE_OPENMP -fno-fast-math -fopenmp
|
||||||
|
FCOMMON_OPT += -O2 -frecursive -mcpu=power8 -mtune=power8 -malign-power -DUSE_OPENMP -fno-fast-math -fopenmp
|
||||||
|
else
|
||||||
|
COMMON_OPT += -Ofast -mcpu=power8 -mtune=power8 -mvsx -malign-power -fno-fast-math
|
||||||
|
FCOMMON_OPT += -O2 -frecursive -mcpu=power8 -mtune=power8 -malign-power -fno-fast-math
|
||||||
|
endif
|
||||||
|
endif
|
||||||
|
|
||||||
|
|
||||||
FLAMEPATH = $(HOME)/flame/lib
|
FLAMEPATH = $(HOME)/flame/lib
|
||||||
|
|
||||||
|
@ -16,6 +38,16 @@ else
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
#Either uncomment below line or run make with `USE_MASS=1` to enable support of MASS library
|
||||||
|
#USE_MASS = 1
|
||||||
|
|
||||||
|
ifeq ($(USE_MASS), 1)
|
||||||
|
# Path to MASS libs, change it if the libs are installed at any other location
|
||||||
|
MASSPATH = /opt/ibm/xlmass/8.1.3/lib
|
||||||
|
COMMON_OPT += -mveclibabi=mass -ftree-vectorize -funsafe-math-optimizations -DUSE_MASS
|
||||||
|
EXTRALIB += -L$(MASSPATH) -lmass -lmassvp8 -lmass_simdp8
|
||||||
|
endif
|
||||||
|
|
||||||
ifdef BINARY64
|
ifdef BINARY64
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -17,14 +17,26 @@ ifdef CPUIDEMU
|
||||||
EXFLAGS = -DCPUIDEMU -DVENDOR=99
|
EXFLAGS = -DCPUIDEMU -DVENDOR=99
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
ifeq ($(TARGET), P5600)
|
||||||
|
TARGET_FLAGS = -mips32r5
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifeq ($(TARGET), I6400)
|
||||||
|
TARGET_FLAGS = -mips64r6
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifeq ($(TARGET), P6600)
|
||||||
|
TARGET_FLAGS = -mips64r6
|
||||||
|
endif
|
||||||
|
|
||||||
all: getarch_2nd
|
all: getarch_2nd
|
||||||
./getarch_2nd 0 >> $(TARGET_MAKE)
|
./getarch_2nd 0 >> $(TARGET_MAKE)
|
||||||
./getarch_2nd 1 >> $(TARGET_CONF)
|
./getarch_2nd 1 >> $(TARGET_CONF)
|
||||||
|
|
||||||
config.h : c_check f_check getarch
|
config.h : c_check f_check getarch
|
||||||
perl ./c_check $(TARGET_MAKE) $(TARGET_CONF) $(CC)
|
perl ./c_check $(TARGET_MAKE) $(TARGET_CONF) $(CC) $(TARGET_FLAGS)
|
||||||
ifneq ($(ONLY_CBLAS), 1)
|
ifneq ($(ONLY_CBLAS), 1)
|
||||||
perl ./f_check $(TARGET_MAKE) $(TARGET_CONF) $(FC)
|
perl ./f_check $(TARGET_MAKE) $(TARGET_CONF) $(FC) $(TARGET_FLAGS)
|
||||||
else
|
else
|
||||||
#When we only build CBLAS, we set NOFORTRAN=2
|
#When we only build CBLAS, we set NOFORTRAN=2
|
||||||
echo "NOFORTRAN=2" >> $(TARGET_MAKE)
|
echo "NOFORTRAN=2" >> $(TARGET_MAKE)
|
||||||
|
|
|
@ -3,7 +3,7 @@
|
||||||
#
|
#
|
||||||
|
|
||||||
# This library's version
|
# This library's version
|
||||||
VERSION = 0.2.16.dev
|
VERSION = 0.2.20.dev
|
||||||
|
|
||||||
# If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a
|
# If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a
|
||||||
# and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library
|
# and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library
|
||||||
|
@ -52,6 +52,7 @@ VERSION = 0.2.16.dev
|
||||||
# USE_THREAD = 0
|
# USE_THREAD = 0
|
||||||
|
|
||||||
# If you're going to use this library with OpenMP, please comment it in.
|
# If you're going to use this library with OpenMP, please comment it in.
|
||||||
|
# This flag is always set for POWER8. Don't modify the flag
|
||||||
# USE_OPENMP = 1
|
# USE_OPENMP = 1
|
||||||
|
|
||||||
# You can define maximum number of threads. Basically it should be
|
# You can define maximum number of threads. Basically it should be
|
||||||
|
@ -79,6 +80,9 @@ VERSION = 0.2.16.dev
|
||||||
# If you don't need LAPACKE (C Interface to LAPACK), please comment it in.
|
# If you don't need LAPACKE (C Interface to LAPACK), please comment it in.
|
||||||
# NO_LAPACKE = 1
|
# NO_LAPACKE = 1
|
||||||
|
|
||||||
|
# Build LAPACK Deprecated functions since LAPACK 3.6.0
|
||||||
|
BUILD_LAPACK_DEPRECATED = 1
|
||||||
|
|
||||||
# If you want to use legacy threaded Level 3 implementation.
|
# If you want to use legacy threaded Level 3 implementation.
|
||||||
# USE_SIMPLE_THREADED_LEVEL3 = 1
|
# USE_SIMPLE_THREADED_LEVEL3 = 1
|
||||||
|
|
||||||
|
@ -108,6 +112,13 @@ NO_AFFINITY = 1
|
||||||
# Don't use parallel make.
|
# Don't use parallel make.
|
||||||
# NO_PARALLEL_MAKE = 1
|
# NO_PARALLEL_MAKE = 1
|
||||||
|
|
||||||
|
# Force number of make jobs. The default is the number of logical CPU of the host.
|
||||||
|
# This is particularly useful when using distcc.
|
||||||
|
# A negative value will disable adding a -j flag to make, allowing to use a parent
|
||||||
|
# make -j value. This is useful to call OpenBLAS make from an other project
|
||||||
|
# makefile
|
||||||
|
# MAKE_NB_JOBS = 2
|
||||||
|
|
||||||
# If you would like to know minute performance report of GotoBLAS.
|
# If you would like to know minute performance report of GotoBLAS.
|
||||||
# FUNCTION_PROFILE = 1
|
# FUNCTION_PROFILE = 1
|
||||||
|
|
||||||
|
@ -138,19 +149,17 @@ NO_AFFINITY = 1
|
||||||
# slow (Not implemented yet).
|
# slow (Not implemented yet).
|
||||||
# SANITY_CHECK = 1
|
# SANITY_CHECK = 1
|
||||||
|
|
||||||
# Run testcases in utest/ . When you enable UTEST_CHECK, it would enable
|
|
||||||
# SANITY_CHECK to compare the result with reference BLAS.
|
|
||||||
# UTEST_CHECK = 1
|
|
||||||
|
|
||||||
# The installation directory.
|
# The installation directory.
|
||||||
# PREFIX = /opt/OpenBLAS
|
# PREFIX = /opt/OpenBLAS
|
||||||
|
|
||||||
# Common Optimization Flag;
|
# Common Optimization Flag;
|
||||||
# The default -O2 is enough.
|
# The default -O2 is enough.
|
||||||
|
# Flags for POWER8 are defined in Makefile.power. Don't modify COMMON_OPT
|
||||||
# COMMON_OPT = -O2
|
# COMMON_OPT = -O2
|
||||||
|
|
||||||
# gfortran option for LAPACK
|
# gfortran option for LAPACK
|
||||||
# enable this flag only on 64bit Linux and if you need a thread safe lapack library
|
# enable this flag only on 64bit Linux and if you need a thread safe lapack library
|
||||||
|
# Flags for POWER8 are defined in Makefile.power. Don't modify FCOMMON_OPT
|
||||||
# FCOMMON_OPT = -frecursive
|
# FCOMMON_OPT = -frecursive
|
||||||
|
|
||||||
# Profiling flags
|
# Profiling flags
|
||||||
|
@ -159,10 +168,11 @@ COMMON_PROF = -pg
|
||||||
# Build Debug version
|
# Build Debug version
|
||||||
# DEBUG = 1
|
# DEBUG = 1
|
||||||
|
|
||||||
# Improve GEMV and GER for small matrices by stack allocation.
|
# Set maximum stack allocation.
|
||||||
# For details, https://github.com/xianyi/OpenBLAS/pull/482
|
# The default value is 2048. 0 disable stack allocation a may reduce GER and GEMV
|
||||||
|
# performance. For details, https://github.com/xianyi/OpenBLAS/pull/482
|
||||||
#
|
#
|
||||||
MAX_STACK_ALLOC=2048
|
# MAX_STACK_ALLOC = 0
|
||||||
|
|
||||||
# Add a prefix or suffix to all exported symbol names in the shared library.
|
# Add a prefix or suffix to all exported symbol names in the shared library.
|
||||||
# Avoid conflicts with other BLAS libraries, especially when using
|
# Avoid conflicts with other BLAS libraries, especially when using
|
||||||
|
|
|
@ -139,6 +139,10 @@ NO_PARALLEL_MAKE=0
|
||||||
endif
|
endif
|
||||||
GETARCH_FLAGS += -DNO_PARALLEL_MAKE=$(NO_PARALLEL_MAKE)
|
GETARCH_FLAGS += -DNO_PARALLEL_MAKE=$(NO_PARALLEL_MAKE)
|
||||||
|
|
||||||
|
ifdef MAKE_NB_JOBS
|
||||||
|
GETARCH_FLAGS += -DMAKE_NB_JOBS=$(MAKE_NB_JOBS)
|
||||||
|
endif
|
||||||
|
|
||||||
ifeq ($(HOSTCC), loongcc)
|
ifeq ($(HOSTCC), loongcc)
|
||||||
GETARCH_FLAGS += -static
|
GETARCH_FLAGS += -static
|
||||||
endif
|
endif
|
||||||
|
@ -155,7 +159,7 @@ ifndef GOTOBLAS_MAKEFILE
|
||||||
export GOTOBLAS_MAKEFILE = 1
|
export GOTOBLAS_MAKEFILE = 1
|
||||||
|
|
||||||
# Generating Makefile.conf and config.h
|
# Generating Makefile.conf and config.h
|
||||||
DUMMY := $(shell $(MAKE) -C $(TOPDIR) -f Makefile.prebuild CC="$(CC)" FC="$(FC)" HOSTCC="$(HOSTCC)" CFLAGS="$(GETARCH_FLAGS)" BINARY=$(BINARY) USE_OPENMP=$(USE_OPENMP) TARGET_CORE=$(TARGET_CORE) ONLY_CBLAS=$(ONLY_CBLAS) all)
|
DUMMY := $(shell $(MAKE) -C $(TOPDIR) -f Makefile.prebuild CC="$(CC)" FC="$(FC)" HOSTCC="$(HOSTCC)" CFLAGS="$(GETARCH_FLAGS)" BINARY=$(BINARY) USE_OPENMP=$(USE_OPENMP) TARGET_CORE=$(TARGET_CORE) ONLY_CBLAS=$(ONLY_CBLAS) TARGET=$(TARGET) all)
|
||||||
|
|
||||||
ifndef TARGET_CORE
|
ifndef TARGET_CORE
|
||||||
include $(TOPDIR)/Makefile.conf
|
include $(TOPDIR)/Makefile.conf
|
||||||
|
@ -213,7 +217,9 @@ endif
|
||||||
#
|
#
|
||||||
|
|
||||||
ifeq ($(OSNAME), Darwin)
|
ifeq ($(OSNAME), Darwin)
|
||||||
|
ifndef MACOSX_DEPLOYMENT_TARGET
|
||||||
export MACOSX_DEPLOYMENT_TARGET=10.6
|
export MACOSX_DEPLOYMENT_TARGET=10.6
|
||||||
|
endif
|
||||||
MD5SUM = md5 -r
|
MD5SUM = md5 -r
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
@ -292,12 +298,14 @@ endif
|
||||||
ifneq ($(OSNAME), WINNT)
|
ifneq ($(OSNAME), WINNT)
|
||||||
ifneq ($(OSNAME), CYGWIN_NT)
|
ifneq ($(OSNAME), CYGWIN_NT)
|
||||||
ifneq ($(OSNAME), Interix)
|
ifneq ($(OSNAME), Interix)
|
||||||
|
ifneq ($(OSNAME), Android)
|
||||||
ifdef SMP
|
ifdef SMP
|
||||||
EXTRALIB += -lpthread
|
EXTRALIB += -lpthread
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
|
endif
|
||||||
|
|
||||||
# ifeq logical or
|
# ifeq logical or
|
||||||
ifeq ($(OSNAME), $(filter $(OSNAME),WINNT CYGWIN_NT Interix))
|
ifeq ($(OSNAME), $(filter $(OSNAME),WINNT CYGWIN_NT Interix))
|
||||||
|
@ -324,7 +332,8 @@ ifdef SANITY_CHECK
|
||||||
CCOMMON_OPT += -DSANITY_CHECK -DREFNAME=$(*F)f$(BU)
|
CCOMMON_OPT += -DSANITY_CHECK -DREFNAME=$(*F)f$(BU)
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifdef MAX_STACK_ALLOC
|
MAX_STACK_ALLOC ?= 2048
|
||||||
|
ifneq ($(MAX_STACK_ALLOC), 0)
|
||||||
CCOMMON_OPT += -DMAX_STACK_ALLOC=$(MAX_STACK_ALLOC)
|
CCOMMON_OPT += -DMAX_STACK_ALLOC=$(MAX_STACK_ALLOC)
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
@ -374,7 +383,7 @@ FCOMMON_OPT += -m128bit-long-double
|
||||||
endif
|
endif
|
||||||
ifeq ($(C_COMPILER), CLANG)
|
ifeq ($(C_COMPILER), CLANG)
|
||||||
EXPRECISION = 1
|
EXPRECISION = 1
|
||||||
CCOMMON_OPT += -DEXPRECISION
|
CCOMMON_OPT += -DEXPRECISION
|
||||||
FCOMMON_OPT += -m128bit-long-double
|
FCOMMON_OPT += -m128bit-long-double
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
|
@ -388,7 +397,7 @@ endif
|
||||||
|
|
||||||
ifeq ($(USE_OPENMP), 1)
|
ifeq ($(USE_OPENMP), 1)
|
||||||
|
|
||||||
#check
|
#check
|
||||||
ifeq ($(USE_THREAD), 0)
|
ifeq ($(USE_THREAD), 0)
|
||||||
$(error OpenBLAS: Cannot set both USE_OPENMP=1 and USE_THREAD=0. The USE_THREAD=0 is only for building single thread version.)
|
$(error OpenBLAS: Cannot set both USE_OPENMP=1 and USE_THREAD=0. The USE_THREAD=0 is only for building single thread version.)
|
||||||
endif
|
endif
|
||||||
|
@ -455,7 +464,7 @@ endif
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq ($(ARCH), mips64)
|
ifeq ($(ARCH), $(filter $(ARCH),mips64 mips))
|
||||||
NO_BINARY_MODE = 1
|
NO_BINARY_MODE = 1
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
@ -506,13 +515,16 @@ endif
|
||||||
|
|
||||||
ifdef NO_BINARY_MODE
|
ifdef NO_BINARY_MODE
|
||||||
|
|
||||||
ifeq ($(ARCH), mips64)
|
ifeq ($(ARCH), $(filter $(ARCH),mips64))
|
||||||
ifdef BINARY64
|
ifdef BINARY64
|
||||||
CCOMMON_OPT += -mabi=64
|
CCOMMON_OPT += -mabi=64
|
||||||
else
|
else
|
||||||
CCOMMON_OPT += -mabi=n32
|
CCOMMON_OPT += -mabi=n32
|
||||||
endif
|
endif
|
||||||
BINARY_DEFINED = 1
|
BINARY_DEFINED = 1
|
||||||
|
else ifeq ($(ARCH), $(filter $(ARCH),mips))
|
||||||
|
CCOMMON_OPT += -mabi=32
|
||||||
|
BINARY_DEFINED = 1
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq ($(CORE), LOONGSON3A)
|
ifeq ($(CORE), LOONGSON3A)
|
||||||
|
@ -525,6 +537,21 @@ CCOMMON_OPT += -march=mips64
|
||||||
FCOMMON_OPT += -march=mips64
|
FCOMMON_OPT += -march=mips64
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
ifeq ($(CORE), P5600)
|
||||||
|
CCOMMON_OPT += -mips32r5 -mnan=2008 -mtune=p5600 $(MSA_FLAGS)
|
||||||
|
FCOMMON_OPT += -mips32r5 -mnan=2008 -mtune=p5600 $(MSA_FLAGS)
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifeq ($(CORE), I6400)
|
||||||
|
CCOMMON_OPT += -mips64r6 -mnan=2008 -mtune=i6400 $(MSA_FLAGS)
|
||||||
|
FCOMMON_OPT += -mips64r6 -mnan=2008 -mtune=i6400 $(MSA_FLAGS)
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifeq ($(CORE), P6600)
|
||||||
|
CCOMMON_OPT += -mips64r6 -mnan=2008 -mtune=p6600 $(MSA_FLAGS)
|
||||||
|
FCOMMON_OPT += -mips64r6 -mnan=2008 -mtune=p6600 $(MSA_FLAGS)
|
||||||
|
endif
|
||||||
|
|
||||||
ifeq ($(OSNAME), AIX)
|
ifeq ($(OSNAME), AIX)
|
||||||
BINARY_DEFINED = 1
|
BINARY_DEFINED = 1
|
||||||
endif
|
endif
|
||||||
|
@ -593,12 +620,14 @@ ifneq ($(NO_LAPACK), 1)
|
||||||
EXTRALIB += -lgfortran
|
EXTRALIB += -lgfortran
|
||||||
endif
|
endif
|
||||||
ifdef NO_BINARY_MODE
|
ifdef NO_BINARY_MODE
|
||||||
ifeq ($(ARCH), mips64)
|
ifeq ($(ARCH), $(filter $(ARCH),mips64))
|
||||||
ifdef BINARY64
|
ifdef BINARY64
|
||||||
FCOMMON_OPT += -mabi=64
|
FCOMMON_OPT += -mabi=64
|
||||||
else
|
else
|
||||||
FCOMMON_OPT += -mabi=n32
|
FCOMMON_OPT += -mabi=n32
|
||||||
endif
|
endif
|
||||||
|
else ifeq ($(ARCH), $(filter $(ARCH),mips))
|
||||||
|
FCOMMON_OPT += -mabi=32
|
||||||
endif
|
endif
|
||||||
else
|
else
|
||||||
ifdef BINARY64
|
ifdef BINARY64
|
||||||
|
@ -681,21 +710,7 @@ FCOMMON_OPT += -i8
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifneq ($(ARCH), mips64)
|
|
||||||
ifndef BINARY64
|
|
||||||
FCOMMON_OPT += -m32
|
|
||||||
else
|
|
||||||
FCOMMON_OPT += -m64
|
|
||||||
endif
|
|
||||||
else
|
|
||||||
ifdef BINARY64
|
|
||||||
FCOMMON_OPT += -mabi=64
|
|
||||||
else
|
|
||||||
FCOMMON_OPT += -mabi=n32
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(USE_OPENMP), 1)
|
ifeq ($(USE_OPENMP), 1)
|
||||||
FCOMMON_OPT += -mp
|
FCOMMON_OPT += -mp
|
||||||
endif
|
endif
|
||||||
|
@ -711,7 +726,7 @@ endif
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq ($(ARCH), mips64)
|
ifeq ($(ARCH), $(filter $(ARCH),mips64 mips))
|
||||||
ifndef BINARY64
|
ifndef BINARY64
|
||||||
FCOMMON_OPT += -n32
|
FCOMMON_OPT += -n32
|
||||||
else
|
else
|
||||||
|
@ -741,7 +756,7 @@ endif
|
||||||
|
|
||||||
ifeq ($(C_COMPILER), OPEN64)
|
ifeq ($(C_COMPILER), OPEN64)
|
||||||
|
|
||||||
ifeq ($(ARCH), mips64)
|
ifeq ($(ARCH), $(filter $(ARCH),mips64 mips))
|
||||||
ifndef BINARY64
|
ifndef BINARY64
|
||||||
CCOMMON_OPT += -n32
|
CCOMMON_OPT += -n32
|
||||||
else
|
else
|
||||||
|
@ -963,17 +978,18 @@ ifeq ($(OSNAME), SunOS)
|
||||||
TAR = gtar
|
TAR = gtar
|
||||||
PATCH = gpatch
|
PATCH = gpatch
|
||||||
GREP = ggrep
|
GREP = ggrep
|
||||||
|
AWK = nawk
|
||||||
else
|
else
|
||||||
TAR = tar
|
TAR = tar
|
||||||
PATCH = patch
|
PATCH = patch
|
||||||
GREP = grep
|
GREP = grep
|
||||||
|
AWK = awk
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifndef MD5SUM
|
ifndef MD5SUM
|
||||||
MD5SUM = md5sum
|
MD5SUM = md5sum
|
||||||
endif
|
endif
|
||||||
|
|
||||||
AWK = awk
|
|
||||||
|
|
||||||
REVISION = -r$(VERSION)
|
REVISION = -r$(VERSION)
|
||||||
MAJOR_VERSION = $(word 1,$(subst ., ,$(VERSION)))
|
MAJOR_VERSION = $(word 1,$(subst ., ,$(VERSION)))
|
||||||
|
@ -982,16 +998,25 @@ ifeq ($(DEBUG), 1)
|
||||||
COMMON_OPT += -g
|
COMMON_OPT += -g
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
ifeq ($(DEBUG), 1)
|
||||||
|
FCOMMON_OPT += -g
|
||||||
|
endif
|
||||||
|
|
||||||
ifndef COMMON_OPT
|
ifndef COMMON_OPT
|
||||||
COMMON_OPT = -O2
|
COMMON_OPT = -O2
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
ifndef FCOMMON_OPT
|
||||||
|
FCOMMON_OPT = -O2 -frecursive
|
||||||
|
endif
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
override CFLAGS += $(COMMON_OPT) $(CCOMMON_OPT) -I$(TOPDIR)
|
override CFLAGS += $(COMMON_OPT) $(CCOMMON_OPT) -I$(TOPDIR)
|
||||||
override PFLAGS += $(COMMON_OPT) $(CCOMMON_OPT) -I$(TOPDIR) -DPROFILE $(COMMON_PROF)
|
override PFLAGS += $(COMMON_OPT) $(CCOMMON_OPT) -I$(TOPDIR) -DPROFILE $(COMMON_PROF)
|
||||||
|
|
||||||
override FFLAGS += $(COMMON_OPT) $(FCOMMON_OPT)
|
override FFLAGS += $(COMMON_OPT) $(FCOMMON_OPT)
|
||||||
override FPFLAGS += $(COMMON_OPT) $(FCOMMON_OPT) $(COMMON_PROF)
|
override FPFLAGS += $(FCOMMON_OPT) $(COMMON_PROF)
|
||||||
#MAKEOVERRIDES =
|
#MAKEOVERRIDES =
|
||||||
|
|
||||||
#For LAPACK Fortran codes.
|
#For LAPACK Fortran codes.
|
||||||
|
@ -1120,6 +1145,8 @@ export HAVE_VFP
|
||||||
export HAVE_VFPV3
|
export HAVE_VFPV3
|
||||||
export HAVE_VFPV4
|
export HAVE_VFPV4
|
||||||
export HAVE_NEON
|
export HAVE_NEON
|
||||||
|
export HAVE_MSA
|
||||||
|
export MSA_FLAGS
|
||||||
export KERNELDIR
|
export KERNELDIR
|
||||||
export FUNCTION_PROFILE
|
export FUNCTION_PROFILE
|
||||||
export TARGET_CORE
|
export TARGET_CORE
|
||||||
|
@ -1181,4 +1208,3 @@ SUNPATH = /opt/sunstudio12.1
|
||||||
else
|
else
|
||||||
SUNPATH = /opt/SUNWspro
|
SUNPATH = /opt/SUNWspro
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,6 @@
|
||||||
|
|
||||||
|
ifeq ($(CORE), Z13)
|
||||||
|
CCOMMON_OPT += -march=z13 -mzvector
|
||||||
|
FCOMMON_OPT += -march=z13 -mzvector
|
||||||
|
endif
|
||||||
|
|
37
README.md
37
README.md
|
@ -43,6 +43,35 @@ On X86 box, compile this library for loongson3a CPU with loongcc (based on Open6
|
||||||
|
|
||||||
make DEBUG=1
|
make DEBUG=1
|
||||||
|
|
||||||
|
### Compile with MASS Support on Power CPU (Optional dependency)
|
||||||
|
|
||||||
|
[IBM MASS](http://www-01.ibm.com/software/awdtools/mass/linux/mass-linux.html) library consists of a set of mathematical functions for C, C++, and
|
||||||
|
Fortran-language applications that are tuned for optimum performance on POWER architectures. OpenBLAS with MASS requires 64-bit, little-endian OS on POWER.
|
||||||
|
The library can be installed as below -
|
||||||
|
|
||||||
|
* On Ubuntu:
|
||||||
|
|
||||||
|
wget -q http://public.dhe.ibm.com/software/server/POWER/Linux/xl-compiler/eval/ppc64le/ubuntu/public.gpg -O- | sudo apt-key add -
|
||||||
|
echo "deb http://public.dhe.ibm.com/software/server/POWER/Linux/xl-compiler/eval/ppc64le/ubuntu/ trusty main" | sudo tee /etc/apt/sources.list.d/ibm-xl-compiler-eval.list
|
||||||
|
sudo apt-get update
|
||||||
|
sudo apt-get install libxlmass-devel.8.1.3
|
||||||
|
|
||||||
|
* On RHEL/CentOS:
|
||||||
|
|
||||||
|
wget http://public.dhe.ibm.com/software/server/POWER/Linux/xl-compiler/eval/ppc64le/rhel7/repodata/repomd.xml.key
|
||||||
|
sudo rpm --import repomd.xml.key
|
||||||
|
wget http://public.dhe.ibm.com/software/server/POWER/Linux/xl-compiler/eval/ppc64le/rhel7/ibm-xl-compiler-eval.repo
|
||||||
|
sudo cp ibm-xl-compiler-eval.repo /etc/yum.repos.d/
|
||||||
|
sudo yum install libxlmass-devel.8.1.3
|
||||||
|
|
||||||
|
After installing MASS library, compile openblas with USE_MASS=1.
|
||||||
|
|
||||||
|
Example:
|
||||||
|
|
||||||
|
Compiling on Power8 with MASS support -
|
||||||
|
|
||||||
|
make USE_MASS=1 TARGET=POWER8
|
||||||
|
|
||||||
### Install to the directory (optional)
|
### Install to the directory (optional)
|
||||||
|
|
||||||
Example:
|
Example:
|
||||||
|
@ -75,12 +104,18 @@ Please read GotoBLAS_01Readme.txt
|
||||||
|
|
||||||
#### ARM64:
|
#### ARM64:
|
||||||
- **ARMV8**: Experimental
|
- **ARMV8**: Experimental
|
||||||
|
- **ARM Cortex-A57**: Experimental
|
||||||
|
|
||||||
|
#### IBM zEnterprise System:
|
||||||
|
- **Z13**: blas3 for double
|
||||||
|
|
||||||
|
|
||||||
### Support OS:
|
### Support OS:
|
||||||
- **GNU/Linux**
|
- **GNU/Linux**
|
||||||
- **MingWin/Windows**: Please read <https://github.com/xianyi/OpenBLAS/wiki/How-to-use-OpenBLAS-in-Microsoft-Visual-Studio>.
|
- **MingWin or Visual Studio(CMake)/Windows**: Please read <https://github.com/xianyi/OpenBLAS/wiki/How-to-use-OpenBLAS-in-Microsoft-Visual-Studio>.
|
||||||
- **Darwin/Mac OS X**: Experimental. Although GotoBLAS2 supports Darwin, we are the beginner on Mac OS X.
|
- **Darwin/Mac OS X**: Experimental. Although GotoBLAS2 supports Darwin, we are the beginner on Mac OS X.
|
||||||
- **FreeBSD**: Supported by community. We didn't test the library on this OS.
|
- **FreeBSD**: Supported by community. We didn't test the library on this OS.
|
||||||
|
- **Android**: Supported by community. Please read <https://github.com/xianyi/OpenBLAS/wiki/How-to-build-OpenBLAS-for-Android>.
|
||||||
|
|
||||||
## Usages
|
## Usages
|
||||||
Link with libopenblas.a or -lopenblas for shared library.
|
Link with libopenblas.a or -lopenblas for shared library.
|
||||||
|
|
|
@ -53,26 +53,34 @@ PPC440
|
||||||
PPC440FP2
|
PPC440FP2
|
||||||
CELL
|
CELL
|
||||||
|
|
||||||
3.MIPS64 CPU:
|
3.MIPS CPU:
|
||||||
|
P5600
|
||||||
|
|
||||||
|
4.MIPS64 CPU:
|
||||||
SICORTEX
|
SICORTEX
|
||||||
LOONGSON3A
|
LOONGSON3A
|
||||||
LOONGSON3B
|
LOONGSON3B
|
||||||
|
I6400
|
||||||
|
P6600
|
||||||
|
|
||||||
4.IA64 CPU:
|
5.IA64 CPU:
|
||||||
ITANIUM2
|
ITANIUM2
|
||||||
|
|
||||||
5.SPARC CPU:
|
6.SPARC CPU:
|
||||||
SPARC
|
SPARC
|
||||||
SPARCV7
|
SPARCV7
|
||||||
|
|
||||||
6.ARM CPU:
|
7.ARM CPU:
|
||||||
CORTEXA15
|
CORTEXA15
|
||||||
CORTEXA9
|
CORTEXA9
|
||||||
ARMV7
|
ARMV7
|
||||||
ARMV6
|
ARMV6
|
||||||
ARMV5
|
ARMV5
|
||||||
|
|
||||||
7.ARM 64-bit CPU:
|
8.ARM 64-bit CPU:
|
||||||
ARMV8
|
ARMV8
|
||||||
CORTEXA57
|
CORTEXA57
|
||||||
|
VULCAN
|
||||||
|
THUNDERX
|
||||||
|
THUNDERX2T99
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,199 @@
|
||||||
|
# Notes on OpenBLAS usage
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
#### Program is Terminated. Because you tried to allocate too many memory regions
|
||||||
|
|
||||||
|
In OpenBLAS, we mange a pool of memory buffers and allocate the number of
|
||||||
|
buffers as the following.
|
||||||
|
```
|
||||||
|
#define NUM_BUFFERS (MAX_CPU_NUMBER * 2)
|
||||||
|
```
|
||||||
|
This error indicates that the program exceeded the number of buffers.
|
||||||
|
|
||||||
|
Please build OpenBLAS with larger `NUM_THREADS`. For example, `make
|
||||||
|
NUM_THREADS=32` or `make NUM_THREADS=64`. In `Makefile.system`, we will set
|
||||||
|
`MAX_CPU_NUMBER=NUM_THREADS`.
|
||||||
|
|
||||||
|
#### How can I use OpenBLAS in multi-threaded applications?
|
||||||
|
|
||||||
|
If your application is already multi-threaded, it will conflict with OpenBLAS
|
||||||
|
multi-threading. Thus, you must set OpenBLAS to use single thread in any of the
|
||||||
|
following ways:
|
||||||
|
|
||||||
|
* `export OPENBLAS_NUM_THREADS=1` in the environment variables.
|
||||||
|
* Call `openblas_set_num_threads(1)` in the application on runtime.
|
||||||
|
* Build OpenBLAS single thread version, e.g. `make USE_THREAD=0`
|
||||||
|
|
||||||
|
If the application is parallelized by OpenMP, please use OpenBLAS built with
|
||||||
|
`USE_OPENMP=1`
|
||||||
|
|
||||||
|
#### How to choose TARGET manually at runtime when compiled with DYNAMIC_ARCH
|
||||||
|
|
||||||
|
The environment variable which control the kernel selection is
|
||||||
|
`OPENBLAS_CORETYPE` (see `driver/others/dynamic.c`) e.g. `export
|
||||||
|
OPENBLAS_CORETYPE=Haswell` and the function `char* openblas_get_corename()`
|
||||||
|
returns the used target.
|
||||||
|
|
||||||
|
#### How could I disable OpenBLAS threading affinity on runtime?
|
||||||
|
|
||||||
|
You can define the `OPENBLAS_MAIN_FREE` or `GOTOBLAS_MAIN_FREE` environment
|
||||||
|
variable to disable threading affinity on runtime. For example, before the
|
||||||
|
running,
|
||||||
|
```
|
||||||
|
export OPENBLAS_MAIN_FREE=1
|
||||||
|
```
|
||||||
|
|
||||||
|
Alternatively, you can disable affinity feature with enabling `NO_AFFINITY=1`
|
||||||
|
in `Makefile.rule`.
|
||||||
|
|
||||||
|
## Linking with the library
|
||||||
|
|
||||||
|
* Link with shared library
|
||||||
|
|
||||||
|
`gcc -o test test.c -I /your_path/OpenBLAS/include/ -L/your_path/OpenBLAS/lib -lopenblas`
|
||||||
|
|
||||||
|
If the library is multithreaded, please add `-lpthread`. If the library
|
||||||
|
contains LAPACK functions, please add `-lgfortran` or other Fortran libs.
|
||||||
|
|
||||||
|
* Link with static library
|
||||||
|
|
||||||
|
`gcc -o test test.c /your/path/libopenblas.a`
|
||||||
|
|
||||||
|
You can download `test.c` from https://gist.github.com/xianyi/5780018
|
||||||
|
|
||||||
|
On Linux, if OpenBLAS was compiled with threading support (`USE_THREAD=1` by
|
||||||
|
default), custom programs statically linked against `libopenblas.a` should also
|
||||||
|
link with the pthread library e.g.:
|
||||||
|
|
||||||
|
```
|
||||||
|
gcc -static -I/opt/OpenBLAS/include -L/opt/OpenBLAS/lib -o my_program my_program.c -lopenblas -lpthread
|
||||||
|
```
|
||||||
|
|
||||||
|
Failing to add the `-lpthread` flag will cause errors such as:
|
||||||
|
|
||||||
|
```
|
||||||
|
/opt/OpenBLAS/libopenblas.a(memory.o): In function `_touch_memory':
|
||||||
|
memory.c:(.text+0x15): undefined reference to `pthread_mutex_lock'
|
||||||
|
memory.c:(.text+0x41): undefined reference to `pthread_mutex_unlock'
|
||||||
|
...
|
||||||
|
```
|
||||||
|
|
||||||
|
## Code examples
|
||||||
|
|
||||||
|
#### Call CBLAS interface
|
||||||
|
This example shows calling cblas_dgemm in C. https://gist.github.com/xianyi/6930656
|
||||||
|
```
|
||||||
|
#include <cblas.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
void main()
|
||||||
|
{
|
||||||
|
int i=0;
|
||||||
|
double A[6] = {1.0,2.0,1.0,-3.0,4.0,-1.0};
|
||||||
|
double B[6] = {1.0,2.0,1.0,-3.0,4.0,-1.0};
|
||||||
|
double C[9] = {.5,.5,.5,.5,.5,.5,.5,.5,.5};
|
||||||
|
cblas_dgemm(CblasColMajor, CblasNoTrans, CblasTrans,3,3,2,1,A, 3, B, 3,2,C,3);
|
||||||
|
|
||||||
|
for(i=0; i<9; i++)
|
||||||
|
printf("%lf ", C[i]);
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
```
|
||||||
|
`gcc -o test_cblas_open test_cblas_dgemm.c -I /your_path/OpenBLAS/include/ -L/your_path/OpenBLAS/lib -lopenblas -lpthread -lgfortran`
|
||||||
|
|
||||||
|
#### Call BLAS Fortran interface
|
||||||
|
|
||||||
|
This example shows calling dgemm Fortran interface in C. https://gist.github.com/xianyi/5780018
|
||||||
|
|
||||||
|
```
|
||||||
|
#include "stdio.h"
|
||||||
|
#include "stdlib.h"
|
||||||
|
#include "sys/time.h"
|
||||||
|
#include "time.h"
|
||||||
|
|
||||||
|
extern void dgemm_(char*, char*, int*, int*,int*, double*, double*, int*, double*, int*, double*, double*, int*);
|
||||||
|
|
||||||
|
int main(int argc, char* argv[])
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
printf("test!\n");
|
||||||
|
if(argc<4){
|
||||||
|
printf("Input Error\n");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
int m = atoi(argv[1]);
|
||||||
|
int n = atoi(argv[2]);
|
||||||
|
int k = atoi(argv[3]);
|
||||||
|
int sizeofa = m * k;
|
||||||
|
int sizeofb = k * n;
|
||||||
|
int sizeofc = m * n;
|
||||||
|
char ta = 'N';
|
||||||
|
char tb = 'N';
|
||||||
|
double alpha = 1.2;
|
||||||
|
double beta = 0.001;
|
||||||
|
|
||||||
|
struct timeval start,finish;
|
||||||
|
double duration;
|
||||||
|
|
||||||
|
double* A = (double*)malloc(sizeof(double) * sizeofa);
|
||||||
|
double* B = (double*)malloc(sizeof(double) * sizeofb);
|
||||||
|
double* C = (double*)malloc(sizeof(double) * sizeofc);
|
||||||
|
|
||||||
|
srand((unsigned)time(NULL));
|
||||||
|
|
||||||
|
for (i=0; i<sizeofa; i++)
|
||||||
|
A[i] = i%3+1;//(rand()%100)/10.0;
|
||||||
|
|
||||||
|
for (i=0; i<sizeofb; i++)
|
||||||
|
B[i] = i%3+1;//(rand()%100)/10.0;
|
||||||
|
|
||||||
|
for (i=0; i<sizeofc; i++)
|
||||||
|
C[i] = i%3+1;//(rand()%100)/10.0;
|
||||||
|
//#if 0
|
||||||
|
printf("m=%d,n=%d,k=%d,alpha=%lf,beta=%lf,sizeofc=%d\n",m,n,k,alpha,beta,sizeofc);
|
||||||
|
gettimeofday(&start, NULL);
|
||||||
|
dgemm_(&ta, &tb, &m, &n, &k, &alpha, A, &m, B, &k, &beta, C, &m);
|
||||||
|
gettimeofday(&finish, NULL);
|
||||||
|
|
||||||
|
duration = ((double)(finish.tv_sec-start.tv_sec)*1000000 + (double)(finish.tv_usec-start.tv_usec)) / 1000000;
|
||||||
|
double gflops = 2.0 * m *n*k;
|
||||||
|
gflops = gflops/duration*1.0e-6;
|
||||||
|
|
||||||
|
FILE *fp;
|
||||||
|
fp = fopen("timeDGEMM.txt", "a");
|
||||||
|
fprintf(fp, "%dx%dx%d\t%lf s\t%lf MFLOPS\n", m, n, k, duration, gflops);
|
||||||
|
fclose(fp);
|
||||||
|
|
||||||
|
free(A);
|
||||||
|
free(B);
|
||||||
|
free(C);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
` gcc -o time_dgemm time_dgemm.c /your/path/libopenblas.a`
|
||||||
|
|
||||||
|
` ./time_dgemm <m> <n> <k> `
|
||||||
|
|
||||||
|
## Troubleshooting
|
||||||
|
* Please read [Faq](https://github.com/xianyi/OpenBLAS/wiki/Faq) at first.
|
||||||
|
* Please use gcc version 4.6 and above to compile Sandy Bridge AVX kernels on Linux/MingW/BSD.
|
||||||
|
* Please use Clang version 3.1 and above to compile the library on Sandy Bridge microarchitecture. The Clang 3.0 will generate the wrong AVX binary code.
|
||||||
|
* The number of CPUs/Cores should less than or equal to 256. On Linux x86_64(amd64), there is experimental support for up to 1024 CPUs/Cores and 128 numa nodes if you build the library with BIGNUMA=1.
|
||||||
|
* OpenBLAS does not set processor affinity by default. On Linux, you can enable processor affinity by commenting the line NO_AFFINITY=1 in Makefile.rule. But this may cause [the conflict with R parallel](https://stat.ethz.ch/pipermail/r-sig-hpc/2012-April/001348.html).
|
||||||
|
* On Loongson 3A. make test would be failed because of pthread_create error. The error code is EAGAIN. However, it will be OK when you run the same testcase on shell.
|
||||||
|
|
||||||
|
## BLAS reference manual
|
||||||
|
If you want to understand every BLAS function and definition, please read
|
||||||
|
[Intel MKL reference manual](https://software.intel.com/sites/products/documentation/doclib/iss/2013/mkl/mklman/GUID-F7ED9FB8-6663-4F44-A62B-61B63C4F0491.htm)
|
||||||
|
or [netlib.org](http://netlib.org/blas/)
|
||||||
|
|
||||||
|
Here are [OpenBLAS extension functions](https://github.com/xianyi/OpenBLAS/wiki/OpenBLAS-Extensions)
|
||||||
|
|
||||||
|
## How to reference OpenBLAS.
|
||||||
|
|
||||||
|
You can reference our [papers](https://github.com/xianyi/OpenBLAS/wiki/publications).
|
||||||
|
|
||||||
|
Alternatively, you can cite the OpenBLAS homepage http://www.openblas.net directly.
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
version: 0.2.15.{build}
|
version: 0.2.19.{build}
|
||||||
|
|
||||||
#environment:
|
#environment:
|
||||||
|
|
||||||
|
@ -39,4 +39,6 @@ before_build:
|
||||||
- cmake -G "Visual Studio 12 Win64" .
|
- cmake -G "Visual Studio 12 Win64" .
|
||||||
|
|
||||||
test_script:
|
test_script:
|
||||||
- echo Build OK!
|
- echo Running Test
|
||||||
|
- cd c:\projects\OpenBLAS\utest
|
||||||
|
- openblas_utest
|
||||||
|
|
|
@ -33,6 +33,22 @@ LIBMKL = -L$(MKL) -lmkl_intel_lp64 -lmkl_gnu_thread -lmkl_core -lgomp -lpthread
|
||||||
# Apple vecLib
|
# Apple vecLib
|
||||||
LIBVECLIB = -framework Accelerate
|
LIBVECLIB = -framework Accelerate
|
||||||
|
|
||||||
|
ESSL=/opt/ibm/lib
|
||||||
|
#LIBESSL = -lesslsmp $(ESSL)/libxlomp_ser.so.1 $(ESSL)/libxlf90_r.so.1 $(ESSL)/libxlfmath.so.1 $(ESSL)/libxlsmp.so.1 /opt/ibm/xlC/13.1.3/lib/libxl.a
|
||||||
|
LIBESSL = -lesslsmp $(ESSL)/libxlf90_r.so.1 $(ESSL)/libxlfmath.so.1 $(ESSL)/libxlsmp.so.1 /opt/ibm/xlC/13.1.3/lib/libxl.a
|
||||||
|
|
||||||
|
ifneq ($(NO_LAPACK), 1)
|
||||||
|
GOTO_LAPACK_TARGETS=slinpack.goto dlinpack.goto clinpack.goto zlinpack.goto \
|
||||||
|
scholesky.goto dcholesky.goto ccholesky.goto zcholesky.goto \
|
||||||
|
sgesv.goto dgesv.goto cgesv.goto zgesv.goto \
|
||||||
|
sgeev.goto dgeev.goto cgeev.goto zgeev.goto \
|
||||||
|
csymv.goto zsymv.goto \
|
||||||
|
sgetri.goto dgetri.goto cgetri.goto zgetri.goto \
|
||||||
|
spotrf.goto dpotrf.goto cpotrf.goto zpotrf.goto
|
||||||
|
else
|
||||||
|
GOTO_LAPACK_TARGETS=
|
||||||
|
endif
|
||||||
|
|
||||||
ifeq ($(OSNAME), WINNT)
|
ifeq ($(OSNAME), WINNT)
|
||||||
|
|
||||||
goto :: slinpack.goto dlinpack.goto clinpack.goto zlinpack.goto \
|
goto :: slinpack.goto dlinpack.goto clinpack.goto zlinpack.goto \
|
||||||
|
@ -44,6 +60,7 @@ goto :: slinpack.goto dlinpack.goto clinpack.goto zlinpack.goto \
|
||||||
ssyr2k.goto dsyr2k.goto csyr2k.goto zsyr2k.goto \
|
ssyr2k.goto dsyr2k.goto csyr2k.goto zsyr2k.goto \
|
||||||
sger.goto dger.goto cger.goto zger.goto \
|
sger.goto dger.goto cger.goto zger.goto \
|
||||||
sdot.goto ddot.goto \
|
sdot.goto ddot.goto \
|
||||||
|
srot.goto drot.goto \
|
||||||
saxpy.goto daxpy.goto caxpy.goto zaxpy.goto \
|
saxpy.goto daxpy.goto caxpy.goto zaxpy.goto \
|
||||||
scopy.goto dcopy.goto ccopy.goto zcopy.goto \
|
scopy.goto dcopy.goto ccopy.goto zcopy.goto \
|
||||||
sswap.goto dswap.goto cswap.goto zswap.goto \
|
sswap.goto dswap.goto cswap.goto zswap.goto \
|
||||||
|
@ -142,31 +159,29 @@ mkl :: slinpack.mkl dlinpack.mkl clinpack.mkl zlinpack.mkl \
|
||||||
|
|
||||||
else
|
else
|
||||||
|
|
||||||
goto :: slinpack.goto dlinpack.goto clinpack.goto zlinpack.goto \
|
goto :: sgemm.goto dgemm.goto cgemm.goto zgemm.goto \
|
||||||
scholesky.goto dcholesky.goto ccholesky.goto zcholesky.goto \
|
|
||||||
sgemm.goto dgemm.goto cgemm.goto zgemm.goto \
|
|
||||||
strmm.goto dtrmm.goto ctrmm.goto ztrmm.goto \
|
strmm.goto dtrmm.goto ctrmm.goto ztrmm.goto \
|
||||||
strsm.goto dtrsm.goto ctrsm.goto ztrsm.goto \
|
strsm.goto dtrsm.goto ctrsm.goto ztrsm.goto \
|
||||||
ssyrk.goto dsyrk.goto csyrk.goto zsyrk.goto \
|
ssyrk.goto dsyrk.goto csyrk.goto zsyrk.goto \
|
||||||
ssyr2k.goto dsyr2k.goto csyr2k.goto zsyr2k.goto \
|
ssyr2k.goto dsyr2k.goto csyr2k.goto zsyr2k.goto \
|
||||||
sger.goto dger.goto cger.goto zger.goto \
|
sger.goto dger.goto cger.goto zger.goto \
|
||||||
sdot.goto ddot.goto cdot.goto zdot.goto \
|
sdot.goto ddot.goto cdot.goto zdot.goto \
|
||||||
|
srot.goto drot.goto \
|
||||||
saxpy.goto daxpy.goto caxpy.goto zaxpy.goto \
|
saxpy.goto daxpy.goto caxpy.goto zaxpy.goto \
|
||||||
scopy.goto dcopy.goto ccopy.goto zcopy.goto \
|
scopy.goto dcopy.goto ccopy.goto zcopy.goto \
|
||||||
sswap.goto dswap.goto cswap.goto zswap.goto \
|
sswap.goto dswap.goto cswap.goto zswap.goto \
|
||||||
sscal.goto dscal.goto cscal.goto zscal.goto \
|
sscal.goto dscal.goto cscal.goto zscal.goto \
|
||||||
sasum.goto dasum.goto casum.goto zasum.goto \
|
sasum.goto dasum.goto casum.goto zasum.goto \
|
||||||
ssymv.goto dsymv.goto csymv.goto zsymv.goto \
|
ssymv.goto dsymv.goto \
|
||||||
chemv.goto zhemv.goto \
|
chemv.goto zhemv.goto \
|
||||||
chemm.goto zhemm.goto \
|
chemm.goto zhemm.goto \
|
||||||
cherk.goto zherk.goto \
|
cherk.goto zherk.goto \
|
||||||
cher2k.goto zher2k.goto \
|
cher2k.goto zher2k.goto \
|
||||||
sgemv.goto dgemv.goto cgemv.goto zgemv.goto \
|
sgemv.goto dgemv.goto cgemv.goto zgemv.goto \
|
||||||
sgesv.goto dgesv.goto cgesv.goto zgesv.goto \
|
ssymm.goto dsymm.goto csymm.goto zsymm.goto \
|
||||||
sgeev.goto dgeev.goto cgeev.goto zgeev.goto \
|
smallscaling \
|
||||||
sgetri.goto dgetri.goto cgetri.goto zgetri.goto \
|
isamax.goto idamax.goto icamax.goto izamax.goto \
|
||||||
spotrf.goto dpotrf.goto cpotrf.goto zpotrf.goto \
|
snrm2.goto dnrm2.goto scnrm2.goto dznrm2.goto $(GOTO_LAPACK_TARGETS)
|
||||||
ssymm.goto dsymm.goto csymm.goto zsymm.goto
|
|
||||||
|
|
||||||
acml :: slinpack.acml dlinpack.acml clinpack.acml zlinpack.acml \
|
acml :: slinpack.acml dlinpack.acml clinpack.acml zlinpack.acml \
|
||||||
scholesky.acml dcholesky.acml ccholesky.acml zcholesky.acml \
|
scholesky.acml dcholesky.acml ccholesky.acml zcholesky.acml \
|
||||||
|
@ -219,7 +234,9 @@ atlas :: slinpack.atlas dlinpack.atlas clinpack.atlas zlinpack.atlas \
|
||||||
sgesv.atlas dgesv.atlas cgesv.atlas zgesv.atlas \
|
sgesv.atlas dgesv.atlas cgesv.atlas zgesv.atlas \
|
||||||
sgetri.atlas dgetri.atlas cgetri.atlas zgetri.atlas \
|
sgetri.atlas dgetri.atlas cgetri.atlas zgetri.atlas \
|
||||||
spotrf.atlas dpotrf.atlas cpotrf.atlas zpotrf.atlas \
|
spotrf.atlas dpotrf.atlas cpotrf.atlas zpotrf.atlas \
|
||||||
ssymm.atlas dsymm.atlas csymm.atlas zsymm.atlas
|
ssymm.atlas dsymm.atlas csymm.atlas zsymm.atlas \
|
||||||
|
isamax.atlas idamax.atlas icamax.atlas izamax.atlas \
|
||||||
|
snrm2.goto dnrm2.goto scnrm2.goto dznrm2.goto
|
||||||
|
|
||||||
mkl :: slinpack.mkl dlinpack.mkl clinpack.mkl zlinpack.mkl \
|
mkl :: slinpack.mkl dlinpack.mkl clinpack.mkl zlinpack.mkl \
|
||||||
scholesky.mkl dcholesky.mkl ccholesky.mkl zcholesky.mkl \
|
scholesky.mkl dcholesky.mkl ccholesky.mkl zcholesky.mkl \
|
||||||
|
@ -252,7 +269,11 @@ mkl :: slinpack.mkl dlinpack.mkl clinpack.mkl zlinpack.mkl \
|
||||||
|
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
essl :: sgemm.essl strmm.essl dgemm.essl dtrmm.essl \
|
||||||
|
cgemm.essl ctrmm.essl zgemm.essl ztrmm.essl \
|
||||||
|
slinpack.essl clinpack.essl dlinpack.essl zlinpack.essl \
|
||||||
|
scholesky.essl ccholesky.essl dcholesky.essl zcholesky.essl \
|
||||||
|
strsm.essl dtrsm.essl ctrsm.essl ztrsm.essl
|
||||||
|
|
||||||
veclib :: slinpack.veclib dlinpack.veclib clinpack.veclib zlinpack.veclib \
|
veclib :: slinpack.veclib dlinpack.veclib clinpack.veclib zlinpack.veclib \
|
||||||
scholesky.veclib dcholesky.veclib ccholesky.veclib zcholesky.veclib \
|
scholesky.veclib dcholesky.veclib ccholesky.veclib zcholesky.veclib \
|
||||||
|
@ -305,6 +326,9 @@ slinpack.mkl : slinpack.$(SUFFIX)
|
||||||
slinpack.veclib : slinpack.$(SUFFIX)
|
slinpack.veclib : slinpack.$(SUFFIX)
|
||||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
slinpack.essl : slinpack.$(SUFFIX)
|
||||||
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
##################################### Dlinpack ####################################################
|
##################################### Dlinpack ####################################################
|
||||||
dlinpack.goto : dlinpack.$(SUFFIX) ../$(LIBNAME)
|
dlinpack.goto : dlinpack.$(SUFFIX) ../$(LIBNAME)
|
||||||
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||||
|
@ -321,6 +345,9 @@ dlinpack.mkl : dlinpack.$(SUFFIX)
|
||||||
dlinpack.veclib : dlinpack.$(SUFFIX)
|
dlinpack.veclib : dlinpack.$(SUFFIX)
|
||||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
dlinpack.essl : dlinpack.$(SUFFIX)
|
||||||
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
##################################### Clinpack ####################################################
|
##################################### Clinpack ####################################################
|
||||||
|
|
||||||
clinpack.goto : clinpack.$(SUFFIX) ../$(LIBNAME)
|
clinpack.goto : clinpack.$(SUFFIX) ../$(LIBNAME)
|
||||||
|
@ -338,6 +365,9 @@ clinpack.mkl : clinpack.$(SUFFIX)
|
||||||
clinpack.veclib : clinpack.$(SUFFIX)
|
clinpack.veclib : clinpack.$(SUFFIX)
|
||||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
clinpack.essl : clinpack.$(SUFFIX)
|
||||||
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
##################################### Zlinpack ####################################################
|
##################################### Zlinpack ####################################################
|
||||||
|
|
||||||
zlinpack.goto : zlinpack.$(SUFFIX) ../$(LIBNAME)
|
zlinpack.goto : zlinpack.$(SUFFIX) ../$(LIBNAME)
|
||||||
|
@ -355,6 +385,9 @@ zlinpack.mkl : zlinpack.$(SUFFIX)
|
||||||
zlinpack.veclib : zlinpack.$(SUFFIX)
|
zlinpack.veclib : zlinpack.$(SUFFIX)
|
||||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
zlinpack.essl : zlinpack.$(SUFFIX)
|
||||||
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
##################################### Scholesky ###################################################
|
##################################### Scholesky ###################################################
|
||||||
|
|
||||||
scholesky.goto : scholesky.$(SUFFIX) ../$(LIBNAME)
|
scholesky.goto : scholesky.$(SUFFIX) ../$(LIBNAME)
|
||||||
|
@ -372,6 +405,9 @@ scholesky.mkl : scholesky.$(SUFFIX)
|
||||||
scholesky.veclib : scholesky.$(SUFFIX)
|
scholesky.veclib : scholesky.$(SUFFIX)
|
||||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
scholesky.essl : scholesky.$(SUFFIX)
|
||||||
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
##################################### Dcholesky ###################################################
|
##################################### Dcholesky ###################################################
|
||||||
|
|
||||||
dcholesky.goto : dcholesky.$(SUFFIX) ../$(LIBNAME)
|
dcholesky.goto : dcholesky.$(SUFFIX) ../$(LIBNAME)
|
||||||
|
@ -389,6 +425,9 @@ dcholesky.mkl : dcholesky.$(SUFFIX)
|
||||||
dcholesky.veclib : dcholesky.$(SUFFIX)
|
dcholesky.veclib : dcholesky.$(SUFFIX)
|
||||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
dcholesky.essl : dcholesky.$(SUFFIX)
|
||||||
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
##################################### Ccholesky ###################################################
|
##################################### Ccholesky ###################################################
|
||||||
|
|
||||||
ccholesky.goto : ccholesky.$(SUFFIX) ../$(LIBNAME)
|
ccholesky.goto : ccholesky.$(SUFFIX) ../$(LIBNAME)
|
||||||
|
@ -406,6 +445,9 @@ ccholesky.mkl : ccholesky.$(SUFFIX)
|
||||||
ccholesky.veclib : ccholesky.$(SUFFIX)
|
ccholesky.veclib : ccholesky.$(SUFFIX)
|
||||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
ccholesky.essl : ccholesky.$(SUFFIX)
|
||||||
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
|
||||||
##################################### Zcholesky ###################################################
|
##################################### Zcholesky ###################################################
|
||||||
|
|
||||||
|
@ -424,6 +466,9 @@ zcholesky.mkl : zcholesky.$(SUFFIX)
|
||||||
zcholesky.veclib : zcholesky.$(SUFFIX)
|
zcholesky.veclib : zcholesky.$(SUFFIX)
|
||||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
zcholesky.essl : zcholesky.$(SUFFIX)
|
||||||
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
##################################### Sgemm ####################################################
|
##################################### Sgemm ####################################################
|
||||||
sgemm.goto : sgemm.$(SUFFIX) ../$(LIBNAME)
|
sgemm.goto : sgemm.$(SUFFIX) ../$(LIBNAME)
|
||||||
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||||
|
@ -440,6 +485,9 @@ sgemm.mkl : sgemm.$(SUFFIX)
|
||||||
sgemm.veclib : sgemm.$(SUFFIX)
|
sgemm.veclib : sgemm.$(SUFFIX)
|
||||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
sgemm.essl : sgemm.$(SUFFIX)
|
||||||
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
##################################### Dgemm ####################################################
|
##################################### Dgemm ####################################################
|
||||||
dgemm.goto : dgemm.$(SUFFIX) ../$(LIBNAME)
|
dgemm.goto : dgemm.$(SUFFIX) ../$(LIBNAME)
|
||||||
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||||
|
@ -456,6 +504,9 @@ dgemm.mkl : dgemm.$(SUFFIX)
|
||||||
dgemm.veclib : dgemm.$(SUFFIX)
|
dgemm.veclib : dgemm.$(SUFFIX)
|
||||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
dgemm.essl : dgemm.$(SUFFIX)
|
||||||
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
##################################### Cgemm ####################################################
|
##################################### Cgemm ####################################################
|
||||||
|
|
||||||
cgemm.goto : cgemm.$(SUFFIX) ../$(LIBNAME)
|
cgemm.goto : cgemm.$(SUFFIX) ../$(LIBNAME)
|
||||||
|
@ -473,6 +524,9 @@ cgemm.mkl : cgemm.$(SUFFIX)
|
||||||
cgemm.veclib : cgemm.$(SUFFIX)
|
cgemm.veclib : cgemm.$(SUFFIX)
|
||||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
cgemm.essl : cgemm.$(SUFFIX)
|
||||||
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
##################################### Zgemm ####################################################
|
##################################### Zgemm ####################################################
|
||||||
|
|
||||||
zgemm.goto : zgemm.$(SUFFIX) ../$(LIBNAME)
|
zgemm.goto : zgemm.$(SUFFIX) ../$(LIBNAME)
|
||||||
|
@ -490,6 +544,9 @@ zgemm.mkl : zgemm.$(SUFFIX)
|
||||||
zgemm.veclib : zgemm.$(SUFFIX)
|
zgemm.veclib : zgemm.$(SUFFIX)
|
||||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
zgemm.essl : zgemm.$(SUFFIX)
|
||||||
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
##################################### Ssymm ####################################################
|
##################################### Ssymm ####################################################
|
||||||
ssymm.goto : ssymm.$(SUFFIX) ../$(LIBNAME)
|
ssymm.goto : ssymm.$(SUFFIX) ../$(LIBNAME)
|
||||||
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||||
|
@ -572,6 +629,9 @@ strmm.mkl : strmm.$(SUFFIX)
|
||||||
strmm.veclib : strmm.$(SUFFIX)
|
strmm.veclib : strmm.$(SUFFIX)
|
||||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
strmm.essl : strmm.$(SUFFIX)
|
||||||
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
##################################### Dtrmm ####################################################
|
##################################### Dtrmm ####################################################
|
||||||
dtrmm.goto : dtrmm.$(SUFFIX) ../$(LIBNAME)
|
dtrmm.goto : dtrmm.$(SUFFIX) ../$(LIBNAME)
|
||||||
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||||
|
@ -588,6 +648,9 @@ dtrmm.mkl : dtrmm.$(SUFFIX)
|
||||||
dtrmm.veclib : dtrmm.$(SUFFIX)
|
dtrmm.veclib : dtrmm.$(SUFFIX)
|
||||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
dtrmm.essl : dtrmm.$(SUFFIX)
|
||||||
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
##################################### Ctrmm ####################################################
|
##################################### Ctrmm ####################################################
|
||||||
|
|
||||||
ctrmm.goto : ctrmm.$(SUFFIX) ../$(LIBNAME)
|
ctrmm.goto : ctrmm.$(SUFFIX) ../$(LIBNAME)
|
||||||
|
@ -605,6 +668,9 @@ ctrmm.mkl : ctrmm.$(SUFFIX)
|
||||||
ctrmm.veclib : ctrmm.$(SUFFIX)
|
ctrmm.veclib : ctrmm.$(SUFFIX)
|
||||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
ctrmm.essl : ctrmm.$(SUFFIX)
|
||||||
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
##################################### Ztrmm ####################################################
|
##################################### Ztrmm ####################################################
|
||||||
|
|
||||||
ztrmm.goto : ztrmm.$(SUFFIX) ../$(LIBNAME)
|
ztrmm.goto : ztrmm.$(SUFFIX) ../$(LIBNAME)
|
||||||
|
@ -622,6 +688,9 @@ ztrmm.mkl : ztrmm.$(SUFFIX)
|
||||||
ztrmm.veclib : ztrmm.$(SUFFIX)
|
ztrmm.veclib : ztrmm.$(SUFFIX)
|
||||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
ztrmm.essl : ztrmm.$(SUFFIX)
|
||||||
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
##################################### Strsm ####################################################
|
##################################### Strsm ####################################################
|
||||||
strsm.goto : strsm.$(SUFFIX) ../$(LIBNAME)
|
strsm.goto : strsm.$(SUFFIX) ../$(LIBNAME)
|
||||||
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||||
|
@ -638,6 +707,9 @@ strsm.mkl : strsm.$(SUFFIX)
|
||||||
strsm.veclib : strsm.$(SUFFIX)
|
strsm.veclib : strsm.$(SUFFIX)
|
||||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
strsm.essl : strsm.$(SUFFIX)
|
||||||
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
##################################### Dtrsm ####################################################
|
##################################### Dtrsm ####################################################
|
||||||
dtrsm.goto : dtrsm.$(SUFFIX) ../$(LIBNAME)
|
dtrsm.goto : dtrsm.$(SUFFIX) ../$(LIBNAME)
|
||||||
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||||
|
@ -654,6 +726,9 @@ dtrsm.mkl : dtrsm.$(SUFFIX)
|
||||||
dtrsm.veclib : dtrsm.$(SUFFIX)
|
dtrsm.veclib : dtrsm.$(SUFFIX)
|
||||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
dtrsm.essl : dtrsm.$(SUFFIX)
|
||||||
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
##################################### Ctrsm ####################################################
|
##################################### Ctrsm ####################################################
|
||||||
|
|
||||||
ctrsm.goto : ctrsm.$(SUFFIX) ../$(LIBNAME)
|
ctrsm.goto : ctrsm.$(SUFFIX) ../$(LIBNAME)
|
||||||
|
@ -671,6 +746,9 @@ ctrsm.mkl : ctrsm.$(SUFFIX)
|
||||||
ctrsm.veclib : ctrsm.$(SUFFIX)
|
ctrsm.veclib : ctrsm.$(SUFFIX)
|
||||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
ctrsm.essl : ctrsm.$(SUFFIX)
|
||||||
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
##################################### Ztrsm ####################################################
|
##################################### Ztrsm ####################################################
|
||||||
|
|
||||||
ztrsm.goto : ztrsm.$(SUFFIX) ../$(LIBNAME)
|
ztrsm.goto : ztrsm.$(SUFFIX) ../$(LIBNAME)
|
||||||
|
@ -688,6 +766,9 @@ ztrsm.mkl : ztrsm.$(SUFFIX)
|
||||||
ztrsm.veclib : ztrsm.$(SUFFIX)
|
ztrsm.veclib : ztrsm.$(SUFFIX)
|
||||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
ztrsm.essl : ztrsm.$(SUFFIX)
|
||||||
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
##################################### Ssyrk ####################################################
|
##################################### Ssyrk ####################################################
|
||||||
ssyrk.goto : ssyrk.$(SUFFIX) ../$(LIBNAME)
|
ssyrk.goto : ssyrk.$(SUFFIX) ../$(LIBNAME)
|
||||||
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||||
|
@ -1412,6 +1493,39 @@ zdot.mkl : zdot-intel.$(SUFFIX)
|
||||||
zdot.veclib : zdot-intel.$(SUFFIX)
|
zdot.veclib : zdot-intel.$(SUFFIX)
|
||||||
$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
##################################### Srot ####################################################
|
||||||
|
srot.goto : srot.$(SUFFIX) ../$(LIBNAME)
|
||||||
|
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||||
|
|
||||||
|
srot.acml : srot.$(SUFFIX)
|
||||||
|
$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
srot.atlas : srot.$(SUFFIX)
|
||||||
|
$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
srot.mkl : srot.$(SUFFIX)
|
||||||
|
$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
srot.veclib : srot.$(SUFFIX)
|
||||||
|
$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
##################################### Drot ####################################################
|
||||||
|
drot.goto : drot.$(SUFFIX) ../$(LIBNAME)
|
||||||
|
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||||
|
|
||||||
|
drot.acml : drot.$(SUFFIX)
|
||||||
|
$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
drot.atlas : drot.$(SUFFIX)
|
||||||
|
$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
drot.mkl : drot.$(SUFFIX)
|
||||||
|
$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
drot.veclib : drot.$(SUFFIX)
|
||||||
|
$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
|
||||||
##################################### Saxpy ####################################################
|
##################################### Saxpy ####################################################
|
||||||
saxpy.goto : saxpy.$(SUFFIX) ../$(LIBNAME)
|
saxpy.goto : saxpy.$(SUFFIX) ../$(LIBNAME)
|
||||||
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||||
|
@ -1833,6 +1947,63 @@ zgemm3m.mkl : zgemm3m.$(SUFFIX)
|
||||||
zgemm3m.veclib : zgemm3m.$(SUFFIX)
|
zgemm3m.veclib : zgemm3m.$(SUFFIX)
|
||||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
############################################## ISAMAX ##############################################
|
||||||
|
isamax.goto : isamax.$(SUFFIX) ../$(LIBNAME)
|
||||||
|
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||||
|
|
||||||
|
isamax.atlas : isamax.$(SUFFIX)
|
||||||
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
############################################## IDAMAX ##############################################
|
||||||
|
idamax.goto : idamax.$(SUFFIX) ../$(LIBNAME)
|
||||||
|
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||||
|
|
||||||
|
idamax.atlas : idamax.$(SUFFIX)
|
||||||
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
############################################## ICAMAX ##############################################
|
||||||
|
icamax.goto : icamax.$(SUFFIX) ../$(LIBNAME)
|
||||||
|
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||||
|
|
||||||
|
icamax.atlas : icamax.$(SUFFIX)
|
||||||
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
############################################## IZAMAX ##############################################
|
||||||
|
izamax.goto : izamax.$(SUFFIX) ../$(LIBNAME)
|
||||||
|
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||||
|
|
||||||
|
izamax.atlas : izamax.$(SUFFIX)
|
||||||
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
############################################## SNRM2 ##############################################
|
||||||
|
snrm2.goto : snrm2.$(SUFFIX) ../$(LIBNAME)
|
||||||
|
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||||
|
|
||||||
|
snrm2.atlas : snrm2.$(SUFFIX)
|
||||||
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
############################################## DNRM2 ##############################################
|
||||||
|
dnrm2.goto : dnrm2.$(SUFFIX) ../$(LIBNAME)
|
||||||
|
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||||
|
|
||||||
|
dnrm2.atlas : dnrm2.$(SUFFIX)
|
||||||
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
############################################## Sscnrm2 ##############################################
|
||||||
|
scnrm2.goto : scnrm2.$(SUFFIX) ../$(LIBNAME)
|
||||||
|
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||||
|
|
||||||
|
scnrm2.atlas : scnrm2.$(SUFFIX)
|
||||||
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
############################################## Ddznrm2 ##############################################
|
||||||
|
dznrm2.goto : dznrm2.$(SUFFIX) ../$(LIBNAME)
|
||||||
|
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||||
|
|
||||||
|
dznrm2.atlas : dznrm2.$(SUFFIX)
|
||||||
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
|
||||||
###################################################################################################
|
###################################################################################################
|
||||||
|
|
||||||
slinpack.$(SUFFIX) : linpack.c
|
slinpack.$(SUFFIX) : linpack.c
|
||||||
|
@ -2123,6 +2294,13 @@ cgesv.$(SUFFIX) : gesv.c
|
||||||
zgesv.$(SUFFIX) : gesv.c
|
zgesv.$(SUFFIX) : gesv.c
|
||||||
$(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^
|
$(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^
|
||||||
|
|
||||||
|
srot.$(SUFFIX) : rot.c
|
||||||
|
$(CC) $(CFLAGS) -c -UCOMPLEX -UDOUBLE -o $(@F) $^
|
||||||
|
|
||||||
|
drot.$(SUFFIX) : rot.c
|
||||||
|
$(CC) $(CFLAGS) -c -UCOMPLEX -DDOUBLE -o $(@F) $^
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -2133,8 +2311,37 @@ zgemm3m.$(SUFFIX) : gemm3m.c
|
||||||
$(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^
|
$(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^
|
||||||
|
|
||||||
|
|
||||||
|
isamax.$(SUFFIX) : iamax.c
|
||||||
|
$(CC) $(CFLAGS) -c -UCOMPLEX -UDOUBLE -o $(@F) $^
|
||||||
|
|
||||||
|
idamax.$(SUFFIX) : iamax.c
|
||||||
|
$(CC) $(CFLAGS) -c -UCOMPLEX -DDOUBLE -o $(@F) $^
|
||||||
|
|
||||||
|
icamax.$(SUFFIX) : iamax.c
|
||||||
|
$(CC) $(CFLAGS) -c -DCOMPLEX -UDOUBLE -o $(@F) $^
|
||||||
|
|
||||||
|
izamax.$(SUFFIX) : iamax.c
|
||||||
|
$(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^
|
||||||
|
|
||||||
|
|
||||||
|
snrm2.$(SUFFIX) : nrm2.c
|
||||||
|
$(CC) $(CFLAGS) -c -UCOMPLEX -UDOUBLE -o $(@F) $^
|
||||||
|
|
||||||
|
dnrm2.$(SUFFIX) : nrm2.c
|
||||||
|
$(CC) $(CFLAGS) -c -UCOMPLEX -DDOUBLE -o $(@F) $^
|
||||||
|
|
||||||
|
scnrm2.$(SUFFIX) : nrm2.c
|
||||||
|
$(CC) $(CFLAGS) -c -DCOMPLEX -UDOUBLE -o $(@F) $^
|
||||||
|
|
||||||
|
dznrm2.$(SUFFIX) : nrm2.c
|
||||||
|
$(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^
|
||||||
|
|
||||||
|
|
||||||
|
smallscaling: smallscaling.c ../$(LIBNAME)
|
||||||
|
$(CC) $(CFLAGS) -o $(@F) $^ $(EXTRALIB) -fopenmp -lm -lpthread
|
||||||
|
|
||||||
clean ::
|
clean ::
|
||||||
@rm -f *.goto *.mkl *.acml *.atlas *.veclib
|
@rm -f *.goto *.mkl *.acml *.atlas *.veclib *.essl smallscaling
|
||||||
|
|
||||||
include $(TOPDIR)/Makefile.tail
|
include $(TOPDIR)/Makefile.tail
|
||||||
|
|
||||||
|
|
|
@ -183,9 +183,9 @@ int main(int argc, char *argv[]){
|
||||||
timeg /= loops;
|
timeg /= loops;
|
||||||
|
|
||||||
#ifdef COMPLEX
|
#ifdef COMPLEX
|
||||||
fprintf(stderr, " %10.2f MFlops\n", 4. * (double)m / timeg * 1.e-6);
|
fprintf(stderr, " %10.2f MFlops %10.6f sec\n", 4. * (double)m / timeg * 1.e-6, timeg);
|
||||||
#else
|
#else
|
||||||
fprintf(stderr, " %10.2f MFlops\n", 2. * (double)m / timeg * 1.e-6);
|
fprintf(stderr, " %10.2f MFlops %10.6f sec\n", 2. * (double)m / timeg * 1.e-6, timeg);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -190,8 +190,8 @@ int main(int argc, char *argv[]){
|
||||||
timeg /= loops;
|
timeg /= loops;
|
||||||
|
|
||||||
fprintf(stderr,
|
fprintf(stderr,
|
||||||
" %10.2f MFlops\n",
|
" %10.2f MFlops %10.6f sec\n",
|
||||||
COMPSIZE * COMPSIZE * 2. * (double)m / timeg * 1.e-6);
|
COMPSIZE * COMPSIZE * 2. * (double)m / timeg * 1.e-6, timeg);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -190,8 +190,8 @@ int main(int argc, char *argv[]){
|
||||||
timeg /= loops;
|
timeg /= loops;
|
||||||
|
|
||||||
fprintf(stderr,
|
fprintf(stderr,
|
||||||
" %10.2f MBytes\n",
|
" %10.2f MBytes %10.6f sec\n",
|
||||||
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6);
|
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -184,8 +184,8 @@ int main(int argc, char *argv[]){
|
||||||
timeg /= loops;
|
timeg /= loops;
|
||||||
|
|
||||||
fprintf(stderr,
|
fprintf(stderr,
|
||||||
" %10.2f MFlops\n",
|
" %10.2f MFlops %10.6f sec\n",
|
||||||
COMPSIZE * COMPSIZE * 2. * (double)m / timeg * 1.e-6);
|
COMPSIZE * COMPSIZE * 2. * (double)m / timeg * 1.e-6, timeg);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -221,7 +221,7 @@ int main(int argc, char *argv[]){
|
||||||
|
|
||||||
timeg /= loops;
|
timeg /= loops;
|
||||||
|
|
||||||
fprintf(stderr, " %10.2f MFlops\n", COMPSIZE * COMPSIZE * 2. * (double)m * (double)n / timeg * 1.e-6);
|
fprintf(stderr, " %10.2f MFlops %10.6f sec\n", COMPSIZE * COMPSIZE * 2. * (double)m * (double)n / timeg * 1.e-6, timeg);
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -258,7 +258,7 @@ int main(int argc, char *argv[]){
|
||||||
|
|
||||||
timeg /= loops;
|
timeg /= loops;
|
||||||
|
|
||||||
fprintf(stderr, " %10.2f MFlops\n", COMPSIZE * COMPSIZE * 2. * (double)m * (double)n / timeg * 1.e-6);
|
fprintf(stderr, " %10.2f MFlops %10.6f sec\n", COMPSIZE * COMPSIZE * 2. * (double)m * (double)n / timeg * 1.e-6, timeg);
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,192 @@
|
||||||
|
/***************************************************************************
|
||||||
|
Copyright (c) 2016, The OpenBLAS Project
|
||||||
|
All rights reserved.
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
1. Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in
|
||||||
|
the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
3. Neither the name of the OpenBLAS project nor the names of
|
||||||
|
its contributors may be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written permission.
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||||
|
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||||
|
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*****************************************************************************/
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#ifdef __CYGWIN32__
|
||||||
|
#include <sys/time.h>
|
||||||
|
#endif
|
||||||
|
#include "common.h"
|
||||||
|
|
||||||
|
|
||||||
|
#undef IAMAX
|
||||||
|
|
||||||
|
#ifdef COMPLEX
|
||||||
|
#ifdef DOUBLE
|
||||||
|
#define IAMAX BLASFUNC(izamax)
|
||||||
|
#else
|
||||||
|
#define IAMAX BLASFUNC(icamax)
|
||||||
|
#endif
|
||||||
|
#else
|
||||||
|
#ifdef DOUBLE
|
||||||
|
#define IAMAX BLASFUNC(idamax)
|
||||||
|
#else
|
||||||
|
#define IAMAX BLASFUNC(isamax)
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(__WIN32__) || defined(__WIN64__)
|
||||||
|
|
||||||
|
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||||
|
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||||
|
#endif
|
||||||
|
|
||||||
|
int gettimeofday(struct timeval *tv, void *tz){
|
||||||
|
|
||||||
|
FILETIME ft;
|
||||||
|
unsigned __int64 tmpres = 0;
|
||||||
|
static int tzflag;
|
||||||
|
|
||||||
|
if (NULL != tv)
|
||||||
|
{
|
||||||
|
GetSystemTimeAsFileTime(&ft);
|
||||||
|
|
||||||
|
tmpres |= ft.dwHighDateTime;
|
||||||
|
tmpres <<= 32;
|
||||||
|
tmpres |= ft.dwLowDateTime;
|
||||||
|
|
||||||
|
/*converting file time to unix epoch*/
|
||||||
|
tmpres /= 10; /*convert into microseconds*/
|
||||||
|
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||||
|
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||||
|
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||||
|
|
||||||
|
static void *huge_malloc(BLASLONG size){
|
||||||
|
int shmid;
|
||||||
|
void *address;
|
||||||
|
|
||||||
|
#ifndef SHM_HUGETLB
|
||||||
|
#define SHM_HUGETLB 04000
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if ((shmid =shmget(IPC_PRIVATE,
|
||||||
|
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||||
|
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||||
|
printf( "Memory allocation failed(shmget).\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
address = shmat(shmid, NULL, SHM_RND);
|
||||||
|
|
||||||
|
if ((BLASLONG)address == -1){
|
||||||
|
printf( "Memory allocation failed(shmat).\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
shmctl(shmid, IPC_RMID, 0);
|
||||||
|
|
||||||
|
return address;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define malloc huge_malloc
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
int main(int argc, char *argv[]){
|
||||||
|
|
||||||
|
FLOAT *x;
|
||||||
|
blasint m, i;
|
||||||
|
blasint inc_x=1;
|
||||||
|
int loops = 1;
|
||||||
|
int l;
|
||||||
|
char *p;
|
||||||
|
|
||||||
|
int from = 1;
|
||||||
|
int to = 200;
|
||||||
|
int step = 1;
|
||||||
|
|
||||||
|
struct timeval start, stop;
|
||||||
|
double time1,timeg;
|
||||||
|
|
||||||
|
argc--;argv++;
|
||||||
|
|
||||||
|
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||||
|
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||||
|
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||||
|
|
||||||
|
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
|
||||||
|
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
|
||||||
|
|
||||||
|
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step,inc_x,loops);
|
||||||
|
|
||||||
|
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
|
||||||
|
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef linux
|
||||||
|
srandom(getpid());
|
||||||
|
#endif
|
||||||
|
|
||||||
|
fprintf(stderr, " SIZE Flops\n");
|
||||||
|
|
||||||
|
for(m = from; m <= to; m += step)
|
||||||
|
{
|
||||||
|
|
||||||
|
timeg=0;
|
||||||
|
|
||||||
|
fprintf(stderr, " %6d : ", (int)m);
|
||||||
|
|
||||||
|
|
||||||
|
for (l=0; l<loops; l++)
|
||||||
|
{
|
||||||
|
|
||||||
|
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
|
||||||
|
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||||
|
}
|
||||||
|
|
||||||
|
gettimeofday( &start, (struct timezone *)0);
|
||||||
|
|
||||||
|
IAMAX (&m, x, &inc_x);
|
||||||
|
|
||||||
|
gettimeofday( &stop, (struct timezone *)0);
|
||||||
|
|
||||||
|
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||||
|
|
||||||
|
timeg += time1;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
timeg /= loops;
|
||||||
|
|
||||||
|
fprintf(stderr,
|
||||||
|
" %10.2f MFlops %10.6f sec\n",
|
||||||
|
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
|
@ -0,0 +1,193 @@
|
||||||
|
/***************************************************************************
|
||||||
|
Copyright (c) 2016, The OpenBLAS Project
|
||||||
|
All rights reserved.
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
1. Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in
|
||||||
|
the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
3. Neither the name of the OpenBLAS project nor the names of
|
||||||
|
its contributors may be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written permission.
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||||
|
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||||
|
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*****************************************************************************/
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#ifdef __CYGWIN32__
|
||||||
|
#include <sys/time.h>
|
||||||
|
#endif
|
||||||
|
#include "common.h"
|
||||||
|
|
||||||
|
|
||||||
|
#undef NRM2
|
||||||
|
|
||||||
|
#ifdef COMPLEX
|
||||||
|
#ifdef DOUBLE
|
||||||
|
#define NRM2 BLASFUNC(dznrm2)
|
||||||
|
#else
|
||||||
|
#define NRM2 BLASFUNC(scnrm2)
|
||||||
|
#endif
|
||||||
|
#else
|
||||||
|
#ifdef DOUBLE
|
||||||
|
#define NRM2 BLASFUNC(dnrm2)
|
||||||
|
#else
|
||||||
|
#define NRM2 BLASFUNC(snrm2)
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(__WIN32__) || defined(__WIN64__)
|
||||||
|
|
||||||
|
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||||
|
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||||
|
#endif
|
||||||
|
|
||||||
|
int gettimeofday(struct timeval *tv, void *tz){
|
||||||
|
|
||||||
|
FILETIME ft;
|
||||||
|
unsigned __int64 tmpres = 0;
|
||||||
|
static int tzflag;
|
||||||
|
|
||||||
|
if (NULL != tv)
|
||||||
|
{
|
||||||
|
GetSystemTimeAsFileTime(&ft);
|
||||||
|
|
||||||
|
tmpres |= ft.dwHighDateTime;
|
||||||
|
tmpres <<= 32;
|
||||||
|
tmpres |= ft.dwLowDateTime;
|
||||||
|
|
||||||
|
/*converting file time to unix epoch*/
|
||||||
|
tmpres /= 10; /*convert into microseconds*/
|
||||||
|
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||||
|
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||||
|
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||||
|
|
||||||
|
static void *huge_malloc(BLASLONG size){
|
||||||
|
int shmid;
|
||||||
|
void *address;
|
||||||
|
|
||||||
|
#ifndef SHM_HUGETLB
|
||||||
|
#define SHM_HUGETLB 04000
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if ((shmid =shmget(IPC_PRIVATE,
|
||||||
|
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||||
|
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||||
|
printf( "Memory allocation failed(shmget).\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
address = shmat(shmid, NULL, SHM_RND);
|
||||||
|
|
||||||
|
if ((BLASLONG)address == -1){
|
||||||
|
printf( "Memory allocation failed(shmat).\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
shmctl(shmid, IPC_RMID, 0);
|
||||||
|
|
||||||
|
return address;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define malloc huge_malloc
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
int main(int argc, char *argv[]){
|
||||||
|
|
||||||
|
FLOAT *x;
|
||||||
|
blasint m, i;
|
||||||
|
blasint inc_x=1;
|
||||||
|
int loops = 1;
|
||||||
|
int l;
|
||||||
|
char *p;
|
||||||
|
|
||||||
|
int from = 1;
|
||||||
|
int to = 200;
|
||||||
|
int step = 1;
|
||||||
|
|
||||||
|
struct timeval start, stop;
|
||||||
|
double time1,timeg;
|
||||||
|
|
||||||
|
argc--;argv++;
|
||||||
|
|
||||||
|
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||||
|
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||||
|
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||||
|
|
||||||
|
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
|
||||||
|
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
|
||||||
|
|
||||||
|
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step,inc_x,loops);
|
||||||
|
|
||||||
|
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
|
||||||
|
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef linux
|
||||||
|
srandom(getpid());
|
||||||
|
#endif
|
||||||
|
|
||||||
|
fprintf(stderr, " SIZE Flops\n");
|
||||||
|
|
||||||
|
for(m = from; m <= to; m += step)
|
||||||
|
{
|
||||||
|
|
||||||
|
timeg=0;
|
||||||
|
|
||||||
|
fprintf(stderr, " %6d : ", (int)m);
|
||||||
|
|
||||||
|
|
||||||
|
for (l=0; l<loops; l++)
|
||||||
|
{
|
||||||
|
|
||||||
|
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
|
||||||
|
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||||
|
}
|
||||||
|
|
||||||
|
gettimeofday( &start, (struct timezone *)0);
|
||||||
|
|
||||||
|
NRM2 (&m, x, &inc_x);
|
||||||
|
|
||||||
|
gettimeofday( &stop, (struct timezone *)0);
|
||||||
|
|
||||||
|
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||||
|
|
||||||
|
timeg += time1;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
timeg /= loops;
|
||||||
|
|
||||||
|
fprintf(stderr,
|
||||||
|
" %10.2f MFlops %10.6f sec\n",
|
||||||
|
COMPSIZE * COMPSIZE * 2. * (double)m / timeg * 1.e-6, timeg);
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
|
@ -0,0 +1,197 @@
|
||||||
|
/***************************************************************************
|
||||||
|
Copyright (c) 2014, The OpenBLAS Project
|
||||||
|
All rights reserved.
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
1. Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in
|
||||||
|
the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
3. Neither the name of the OpenBLAS project nor the names of
|
||||||
|
its contributors may be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written permission.
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||||
|
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||||
|
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*****************************************************************************/
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#ifdef __CYGWIN32__
|
||||||
|
#include <sys/time.h>
|
||||||
|
#endif
|
||||||
|
#include "common.h"
|
||||||
|
|
||||||
|
|
||||||
|
#undef DOT
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef DOUBLE
|
||||||
|
#define ROT BLASFUNC(drot)
|
||||||
|
#else
|
||||||
|
#define ROT BLASFUNC(srot)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
#if defined(__WIN32__) || defined(__WIN64__)
|
||||||
|
|
||||||
|
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||||
|
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||||
|
#endif
|
||||||
|
|
||||||
|
int gettimeofday(struct timeval *tv, void *tz){
|
||||||
|
|
||||||
|
FILETIME ft;
|
||||||
|
unsigned __int64 tmpres = 0;
|
||||||
|
static int tzflag;
|
||||||
|
|
||||||
|
if (NULL != tv)
|
||||||
|
{
|
||||||
|
GetSystemTimeAsFileTime(&ft);
|
||||||
|
|
||||||
|
tmpres |= ft.dwHighDateTime;
|
||||||
|
tmpres <<= 32;
|
||||||
|
tmpres |= ft.dwLowDateTime;
|
||||||
|
|
||||||
|
/*converting file time to unix epoch*/
|
||||||
|
tmpres /= 10; /*convert into microseconds*/
|
||||||
|
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||||
|
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||||
|
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||||
|
|
||||||
|
static void *huge_malloc(BLASLONG size){
|
||||||
|
int shmid;
|
||||||
|
void *address;
|
||||||
|
|
||||||
|
#ifndef SHM_HUGETLB
|
||||||
|
#define SHM_HUGETLB 04000
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if ((shmid =shmget(IPC_PRIVATE,
|
||||||
|
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||||
|
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||||
|
printf( "Memory allocation failed(shmget).\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
address = shmat(shmid, NULL, SHM_RND);
|
||||||
|
|
||||||
|
if ((BLASLONG)address == -1){
|
||||||
|
printf( "Memory allocation failed(shmat).\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
shmctl(shmid, IPC_RMID, 0);
|
||||||
|
|
||||||
|
return address;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define malloc huge_malloc
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
int main(int argc, char *argv[]){
|
||||||
|
|
||||||
|
FLOAT *x, *y;
|
||||||
|
// FLOAT result;
|
||||||
|
blasint m, i;
|
||||||
|
blasint inc_x=1,inc_y=1;
|
||||||
|
FLOAT c[1] = { 2.0 };
|
||||||
|
FLOAT s[1] = { 2.0 };
|
||||||
|
int loops = 1;
|
||||||
|
int l;
|
||||||
|
char *p;
|
||||||
|
|
||||||
|
int from = 1;
|
||||||
|
int to = 200;
|
||||||
|
int step = 1;
|
||||||
|
|
||||||
|
struct timeval start, stop;
|
||||||
|
double time1,timeg;
|
||||||
|
|
||||||
|
argc--;argv++;
|
||||||
|
|
||||||
|
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||||
|
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||||
|
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||||
|
|
||||||
|
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
|
||||||
|
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
|
||||||
|
if ((p = getenv("OPENBLAS_INCY"))) inc_y = atoi(p);
|
||||||
|
|
||||||
|
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Inc_y = %d Loops = %d\n", from, to, step,inc_x,inc_y,loops);
|
||||||
|
|
||||||
|
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
|
||||||
|
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL){
|
||||||
|
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef linux
|
||||||
|
srandom(getpid());
|
||||||
|
#endif
|
||||||
|
|
||||||
|
fprintf(stderr, " SIZE Flops\n");
|
||||||
|
|
||||||
|
for(m = from; m <= to; m += step)
|
||||||
|
{
|
||||||
|
|
||||||
|
timeg=0;
|
||||||
|
|
||||||
|
fprintf(stderr, " %6d : ", (int)m);
|
||||||
|
|
||||||
|
|
||||||
|
for (l=0; l<loops; l++)
|
||||||
|
{
|
||||||
|
|
||||||
|
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
|
||||||
|
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||||
|
}
|
||||||
|
|
||||||
|
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
||||||
|
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||||
|
}
|
||||||
|
gettimeofday( &start, (struct timezone *)0);
|
||||||
|
|
||||||
|
ROT (&m, x, &inc_x, y, &inc_y, c, s);
|
||||||
|
|
||||||
|
gettimeofday( &stop, (struct timezone *)0);
|
||||||
|
|
||||||
|
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||||
|
|
||||||
|
timeg += time1;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
timeg /= loops;
|
||||||
|
|
||||||
|
fprintf(stderr,
|
||||||
|
" %10.2f MFlops %10.6f sec\n",
|
||||||
|
COMPSIZE * COMPSIZE * 6. * (double)m / timeg * 1.e-6, timeg);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
|
@ -189,9 +189,9 @@ int main(int argc, char *argv[]){
|
||||||
timeg /= loops;
|
timeg /= loops;
|
||||||
|
|
||||||
#ifdef COMPLEX
|
#ifdef COMPLEX
|
||||||
fprintf(stderr, " %10.2f MFlops\n", 6. * (double)m / timeg * 1.e-6);
|
fprintf(stderr, " %10.2f MFlops %10.6f sec\n", 6. * (double)m / timeg * 1.e-6, timeg);
|
||||||
#else
|
#else
|
||||||
fprintf(stderr, " %10.2f MFlops\n", 1. * (double)m / timeg * 1.e-6);
|
fprintf(stderr, " %10.2f MFlops %10.6f sec\n", 1. * (double)m / timeg * 1.e-6, timeg);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,61 +2,54 @@
|
||||||
|
|
||||||
argv <- commandArgs(trailingOnly = TRUE)
|
argv <- commandArgs(trailingOnly = TRUE)
|
||||||
|
|
||||||
nfrom = 128
|
nfrom <- 128
|
||||||
nto = 2048
|
nto <- 2048
|
||||||
nstep = 128
|
nstep <- 128
|
||||||
loops = 1
|
loops <- 1
|
||||||
|
|
||||||
if ( length(argv) > 0 ) {
|
if (length(argv) > 0) {
|
||||||
|
for (z in 1:length(argv)) {
|
||||||
for ( z in 1:length(argv) ) {
|
if (z == 1) {
|
||||||
|
nfrom <- as.numeric(argv[z])
|
||||||
if ( z == 1 ) {
|
} else if (z == 2) {
|
||||||
nfrom <- as.numeric(argv[z])
|
nto <- as.numeric(argv[z])
|
||||||
} else if ( z==2 ) {
|
} else if (z == 3) {
|
||||||
nto <- as.numeric(argv[z])
|
nstep <- as.numeric(argv[z])
|
||||||
} else if ( z==3 ) {
|
} else if (z == 4) {
|
||||||
nstep <- as.numeric(argv[z])
|
loops <- as.numeric(argv[z])
|
||||||
} else if ( z==4 ) {
|
}
|
||||||
loops <- as.numeric(argv[z])
|
}
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
p=Sys.getenv("OPENBLAS_LOOPS")
|
p <- Sys.getenv("OPENBLAS_LOOPS")
|
||||||
if ( p != "" ) {
|
if (p != "") {
|
||||||
loops <- as.numeric(p)
|
loops <- as.numeric(p)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
cat(sprintf("From %.0f To %.0f Step=%.0f Loops=%.0f\n",nfrom, nto, nstep, loops))
|
cat(sprintf(
|
||||||
|
"From %.0f To %.0f Step=%.0f Loops=%.0f\n",
|
||||||
|
nfrom,
|
||||||
|
nto,
|
||||||
|
nstep,
|
||||||
|
loops
|
||||||
|
))
|
||||||
cat(sprintf(" SIZE Flops Time\n"))
|
cat(sprintf(" SIZE Flops Time\n"))
|
||||||
|
|
||||||
n = nfrom
|
n <- nfrom
|
||||||
while ( n <= nto ) {
|
while (n <= nto) {
|
||||||
|
A <- matrix(rnorm(n * n), ncol = n, nrow = n)
|
||||||
|
ev <- 0
|
||||||
|
z <- system.time(for (l in 1:loops) {
|
||||||
|
ev <- eigen(A)
|
||||||
|
})
|
||||||
|
|
||||||
A <- matrix(runif(n*n), ncol = n, nrow = n, byrow = TRUE)
|
mflops <- (26.66 * n * n * n) * loops / (z[3] * 1.0e6)
|
||||||
|
|
||||||
l = 1
|
|
||||||
|
|
||||||
start <- proc.time()[3]
|
st <- sprintf("%.0fx%.0f :", n, n)
|
||||||
|
cat(sprintf("%20s %10.2f MFlops %10.6f sec\n", st, mflops, z[3]))
|
||||||
|
|
||||||
while ( l <= loops ) {
|
n <- n + nstep
|
||||||
|
|
||||||
ev <- eigen(A)
|
|
||||||
l = l + 1
|
|
||||||
}
|
|
||||||
|
|
||||||
end <- proc.time()[3]
|
|
||||||
timeg = end - start
|
|
||||||
mflops = (26.66 *n*n*n ) * loops / ( timeg * 1.0e6 )
|
|
||||||
|
|
||||||
st = sprintf("%.0fx%.0f :",n , n)
|
|
||||||
cat(sprintf("%20s %10.2f MFlops %10.6f sec\n", st, mflops, timeg))
|
|
||||||
|
|
||||||
n = n + nstep
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -2,62 +2,63 @@
|
||||||
|
|
||||||
argv <- commandArgs(trailingOnly = TRUE)
|
argv <- commandArgs(trailingOnly = TRUE)
|
||||||
|
|
||||||
nfrom = 128
|
nfrom <- 128
|
||||||
nto = 2048
|
nto <- 2048
|
||||||
nstep = 128
|
nstep <- 128
|
||||||
loops = 1
|
loops <- 1
|
||||||
|
|
||||||
if ( length(argv) > 0 ) {
|
if (length(argv) > 0) {
|
||||||
|
for (z in 1:length(argv)) {
|
||||||
for ( z in 1:length(argv) ) {
|
if (z == 1) {
|
||||||
|
nfrom <- as.numeric(argv[z])
|
||||||
if ( z == 1 ) {
|
} else if (z == 2) {
|
||||||
nfrom <- as.numeric(argv[z])
|
nto <- as.numeric(argv[z])
|
||||||
} else if ( z==2 ) {
|
} else if (z == 3) {
|
||||||
nto <- as.numeric(argv[z])
|
nstep <- as.numeric(argv[z])
|
||||||
} else if ( z==3 ) {
|
} else if (z == 4) {
|
||||||
nstep <- as.numeric(argv[z])
|
loops <- as.numeric(argv[z])
|
||||||
} else if ( z==4 ) {
|
}
|
||||||
loops <- as.numeric(argv[z])
|
}
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
p=Sys.getenv("OPENBLAS_LOOPS")
|
p <- Sys.getenv("OPENBLAS_LOOPS")
|
||||||
if ( p != "" ) {
|
if (p != "") {
|
||||||
loops <- as.numeric(p)
|
loops <- as.numeric(p)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
cat(sprintf("From %.0f To %.0f Step=%.0f Loops=%.0f\n",nfrom, nto, nstep, loops))
|
cat(sprintf(
|
||||||
|
"From %.0f To %.0f Step=%.0f Loops=%.0f\n",
|
||||||
|
nfrom,
|
||||||
|
nto,
|
||||||
|
nstep,
|
||||||
|
loops
|
||||||
|
))
|
||||||
cat(sprintf(" SIZE Flops Time\n"))
|
cat(sprintf(" SIZE Flops Time\n"))
|
||||||
|
|
||||||
n = nfrom
|
n <- nfrom
|
||||||
while ( n <= nto ) {
|
while (n <= nto) {
|
||||||
|
A <- matrix(runif(n * n),
|
||||||
|
ncol = n,
|
||||||
|
nrow = n,
|
||||||
|
byrow = TRUE)
|
||||||
|
B <- matrix(runif(n * n),
|
||||||
|
ncol = n,
|
||||||
|
nrow = n,
|
||||||
|
byrow = TRUE)
|
||||||
|
C <- 1
|
||||||
|
|
||||||
A <- matrix(runif(n*n), ncol = n, nrow = n, byrow = TRUE)
|
z <- system.time(for (l in 1:loops) {
|
||||||
B <- matrix(runif(n*n), ncol = n, nrow = n, byrow = TRUE)
|
C <- A %*% B
|
||||||
|
l <- l + 1
|
||||||
l = 1
|
})
|
||||||
|
|
||||||
start <- proc.time()[3]
|
mflops <- (2.0 * n * n * n) * loops / (z[3] * 1.0e6)
|
||||||
|
|
||||||
while ( l <= loops ) {
|
st <- sprintf("%.0fx%.0f :", n, n)
|
||||||
|
cat(sprintf("%20s %10.2f MFlops %10.6f sec\n", st, mflops, z[3]))
|
||||||
|
|
||||||
C <- A %*% B
|
n <- n + nstep
|
||||||
l = l + 1
|
|
||||||
}
|
|
||||||
|
|
||||||
end <- proc.time()[3]
|
|
||||||
timeg = end - start
|
|
||||||
mflops = ( 2.0 *n*n*n ) * loops / ( timeg * 1.0e6 )
|
|
||||||
|
|
||||||
st = sprintf("%.0fx%.0f :",n , n)
|
|
||||||
cat(sprintf("%20s %10.2f MFlops %10.6f sec\n", st, mflops, timeg))
|
|
||||||
|
|
||||||
n = n + nstep
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -2,62 +2,56 @@
|
||||||
|
|
||||||
argv <- commandArgs(trailingOnly = TRUE)
|
argv <- commandArgs(trailingOnly = TRUE)
|
||||||
|
|
||||||
nfrom = 128
|
nfrom <- 128
|
||||||
nto = 2048
|
nto <- 2048
|
||||||
nstep = 128
|
nstep <- 128
|
||||||
loops = 1
|
loops <- 1
|
||||||
|
|
||||||
if ( length(argv) > 0 ) {
|
if (length(argv) > 0) {
|
||||||
|
for (z in 1:length(argv)) {
|
||||||
for ( z in 1:length(argv) ) {
|
if (z == 1) {
|
||||||
|
nfrom <- as.numeric(argv[z])
|
||||||
if ( z == 1 ) {
|
} else if (z == 2) {
|
||||||
nfrom <- as.numeric(argv[z])
|
nto <- as.numeric(argv[z])
|
||||||
} else if ( z==2 ) {
|
} else if (z == 3) {
|
||||||
nto <- as.numeric(argv[z])
|
nstep <- as.numeric(argv[z])
|
||||||
} else if ( z==3 ) {
|
} else if (z == 4) {
|
||||||
nstep <- as.numeric(argv[z])
|
loops <- as.numeric(argv[z])
|
||||||
} else if ( z==4 ) {
|
}
|
||||||
loops <- as.numeric(argv[z])
|
}
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
p=Sys.getenv("OPENBLAS_LOOPS")
|
p <- Sys.getenv("OPENBLAS_LOOPS")
|
||||||
if ( p != "" ) {
|
if (p != "") {
|
||||||
loops <- as.numeric(p)
|
loops <- as.numeric(p)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
cat(sprintf("From %.0f To %.0f Step=%.0f Loops=%.0f\n",nfrom, nto, nstep, loops))
|
cat(sprintf(
|
||||||
|
"From %.0f To %.0f Step=%.0f Loops=%.0f\n",
|
||||||
|
nfrom,
|
||||||
|
nto,
|
||||||
|
nstep,
|
||||||
|
loops
|
||||||
|
))
|
||||||
cat(sprintf(" SIZE Flops Time\n"))
|
cat(sprintf(" SIZE Flops Time\n"))
|
||||||
|
|
||||||
n = nfrom
|
n <- nfrom
|
||||||
while ( n <= nto ) {
|
while (n <= nto) {
|
||||||
|
A <- matrix(rnorm(n * n), ncol = n, nrow = n)
|
||||||
|
B <- matrix(rnorm(n * n), ncol = n, nrow = n)
|
||||||
|
|
||||||
A <- matrix(runif(n*n), ncol = n, nrow = n, byrow = TRUE)
|
z <- system.time(for (l in 1:loops) {
|
||||||
B <- matrix(runif(n*n), ncol = n, nrow = n, byrow = TRUE)
|
solve(A, B)
|
||||||
|
})
|
||||||
l = 1
|
|
||||||
|
|
||||||
start <- proc.time()[3]
|
mflops <-
|
||||||
|
(2.0 / 3.0 * n * n * n + 2.0 * n * n * n) * loops / (z[3] * 1.0e6)
|
||||||
|
|
||||||
while ( l <= loops ) {
|
st <- sprintf("%.0fx%.0f :", n, n)
|
||||||
|
cat(sprintf("%20s %10.2f MFlops %10.6f sec\n", st, mflops, z[3]))
|
||||||
|
|
||||||
solve(A,B)
|
n <- n + nstep
|
||||||
l = l + 1
|
|
||||||
}
|
|
||||||
|
|
||||||
end <- proc.time()[3]
|
|
||||||
timeg = end - start
|
|
||||||
mflops = (2.0/3.0 *n*n*n + 2.0 *n*n*n ) * loops / ( timeg * 1.0e6 )
|
|
||||||
|
|
||||||
st = sprintf("%.0fx%.0f :",n , n)
|
|
||||||
cat(sprintf("%20s %10.2f MFlops %10.6f sec\n", st, mflops, timeg))
|
|
||||||
|
|
||||||
n = n + nstep
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,58 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
import numpy
|
||||||
|
from numpy import zeros
|
||||||
|
from numpy.random import randn
|
||||||
|
from scipy.linalg import blas
|
||||||
|
|
||||||
|
|
||||||
|
def run_dsyrk(N, l):
|
||||||
|
|
||||||
|
A = randn(N, N).astype('float64', order='F')
|
||||||
|
C = zeros((N, N), dtype='float64', order='F')
|
||||||
|
|
||||||
|
start = time.time()
|
||||||
|
for i in range(0, l):
|
||||||
|
blas.dsyrk(1.0, A, c=C, overwrite_c=True)
|
||||||
|
end = time.time()
|
||||||
|
|
||||||
|
timediff = (end - start)
|
||||||
|
mflops = (N * N * N) * l / timediff
|
||||||
|
mflops *= 1e-6
|
||||||
|
|
||||||
|
size = "%dx%d" % (N, N)
|
||||||
|
print("%14s :\t%20f MFlops\t%20f sec" % (size, mflops, timediff))
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
N = 128
|
||||||
|
NMAX = 2048
|
||||||
|
NINC = 128
|
||||||
|
LOOPS = 1
|
||||||
|
|
||||||
|
z = 0
|
||||||
|
for arg in sys.argv:
|
||||||
|
if z == 1:
|
||||||
|
N = int(arg)
|
||||||
|
elif z == 2:
|
||||||
|
NMAX = int(arg)
|
||||||
|
elif z == 3:
|
||||||
|
NINC = int(arg)
|
||||||
|
elif z == 4:
|
||||||
|
LOOPS = int(arg)
|
||||||
|
|
||||||
|
z = z + 1
|
||||||
|
|
||||||
|
if 'OPENBLAS_LOOPS' in os.environ:
|
||||||
|
p = os.environ['OPENBLAS_LOOPS']
|
||||||
|
if p:
|
||||||
|
LOOPS = int(p)
|
||||||
|
|
||||||
|
print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
|
||||||
|
print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
|
||||||
|
|
||||||
|
for i in range(N, NMAX + NINC, NINC):
|
||||||
|
run_dsyrk(i, LOOPS)
|
|
@ -0,0 +1,58 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
import numpy
|
||||||
|
from numpy import zeros
|
||||||
|
from numpy.random import randn
|
||||||
|
from scipy.linalg import blas
|
||||||
|
|
||||||
|
|
||||||
|
def run_ssyrk(N, l):
|
||||||
|
|
||||||
|
A = randn(N, N).astype('float32', order='F')
|
||||||
|
C = zeros((N, N), dtype='float32', order='F')
|
||||||
|
|
||||||
|
start = time.time()
|
||||||
|
for i in range(0, l):
|
||||||
|
blas.ssyrk(1.0, A, c=C, overwrite_c=True)
|
||||||
|
end = time.time()
|
||||||
|
|
||||||
|
timediff = (end - start)
|
||||||
|
mflops = (N * N * N) * l / timediff
|
||||||
|
mflops *= 1e-6
|
||||||
|
|
||||||
|
size = "%dx%d" % (N, N)
|
||||||
|
print("%14s :\t%20f MFlops\t%20f sec" % (size, mflops, timediff))
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
N = 128
|
||||||
|
NMAX = 2048
|
||||||
|
NINC = 128
|
||||||
|
LOOPS = 1
|
||||||
|
|
||||||
|
z = 0
|
||||||
|
for arg in sys.argv:
|
||||||
|
if z == 1:
|
||||||
|
N = int(arg)
|
||||||
|
elif z == 2:
|
||||||
|
NMAX = int(arg)
|
||||||
|
elif z == 3:
|
||||||
|
NINC = int(arg)
|
||||||
|
elif z == 4:
|
||||||
|
LOOPS = int(arg)
|
||||||
|
|
||||||
|
z = z + 1
|
||||||
|
|
||||||
|
if 'OPENBLAS_LOOPS' in os.environ:
|
||||||
|
p = os.environ['OPENBLAS_LOOPS']
|
||||||
|
if p:
|
||||||
|
LOOPS = int(p)
|
||||||
|
|
||||||
|
print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
|
||||||
|
print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
|
||||||
|
|
||||||
|
for i in range(N, NMAX + NINC, NINC):
|
||||||
|
run_ssyrk(i, LOOPS)
|
|
@ -0,0 +1,197 @@
|
||||||
|
// run with OPENBLAS_NUM_THREADS=1 and OMP_NUM_THREADS=n
|
||||||
|
#include <math.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <time.h>
|
||||||
|
#include <cblas.h>
|
||||||
|
#include <omp.h>
|
||||||
|
#include <pthread.h>
|
||||||
|
#define MIN_SIZE 5
|
||||||
|
#define MAX_SIZE 60
|
||||||
|
#define NB_SIZE 10
|
||||||
|
|
||||||
|
// number of loop for a 1x1 matrix. Lower it if the test is
|
||||||
|
// too slow on you computer.
|
||||||
|
#define NLOOP 2e7
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
int matrix_size;
|
||||||
|
int n_loop;
|
||||||
|
void (* bench_func)();
|
||||||
|
void (* blas_func)();
|
||||||
|
void * (* create_matrix)(int size);
|
||||||
|
} BenchParam;
|
||||||
|
|
||||||
|
void * s_create_matrix(int size) {
|
||||||
|
float * r = malloc(size * sizeof(double));
|
||||||
|
int i;
|
||||||
|
for(i = 0; i < size; i++)
|
||||||
|
r[i] = 1e3 * i / size;
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
void * c_create_matrix(int size) {
|
||||||
|
float * r = malloc(size * 2 * sizeof(double));
|
||||||
|
int i;
|
||||||
|
for(i = 0; i < 2 * size; i++)
|
||||||
|
r[i] = 1e3 * i / size;
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
void * z_create_matrix(int size) {
|
||||||
|
double * r = malloc(size * 2 * sizeof(double));
|
||||||
|
int i;
|
||||||
|
for(i = 0; i < 2 * size; i++)
|
||||||
|
r[i] = 1e3 * i / size;
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
void * d_create_matrix(int size) {
|
||||||
|
double * r = malloc(size * sizeof(double));
|
||||||
|
int i;
|
||||||
|
for(i = 0; i < size; i++)
|
||||||
|
r[i] = 1e3 * i / size;
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
void trmv_bench(BenchParam * param)
|
||||||
|
{
|
||||||
|
int i, n;
|
||||||
|
int size = param->matrix_size;
|
||||||
|
n = param->n_loop / size;
|
||||||
|
int one = 1;
|
||||||
|
void * A = param->create_matrix(size * size);
|
||||||
|
void * y = param->create_matrix(size);
|
||||||
|
for(i = 0; i < n; i++) {
|
||||||
|
param->blas_func("U", "N", "N", &size, A, &size, y, &one);
|
||||||
|
}
|
||||||
|
free(A);
|
||||||
|
free(y);
|
||||||
|
}
|
||||||
|
|
||||||
|
void gemv_bench(BenchParam * param)
|
||||||
|
{
|
||||||
|
int i, n;
|
||||||
|
int size = param->matrix_size;
|
||||||
|
n = param->n_loop / size;
|
||||||
|
double v = 1.01;
|
||||||
|
int one = 1;
|
||||||
|
void * A = param->create_matrix(size * size);
|
||||||
|
void * y = param->create_matrix(size);
|
||||||
|
for(i = 0; i < n; i++) {
|
||||||
|
param->blas_func("N", &size, &size, &v, A, &size, y, &one, &v, y, &one);
|
||||||
|
}
|
||||||
|
free(A);
|
||||||
|
free(y);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ger_bench(BenchParam * param) {
|
||||||
|
int i, n;
|
||||||
|
int size = param->matrix_size;
|
||||||
|
n = param->n_loop / size;
|
||||||
|
double v = 1.01;
|
||||||
|
int one = 1;
|
||||||
|
void * A = param->create_matrix(size * size);
|
||||||
|
void * y = param->create_matrix(size);
|
||||||
|
for(i = 0; i < n; i++) {
|
||||||
|
param->blas_func(&size, &size, &v, y, &one, y, &one, A, &size);
|
||||||
|
}
|
||||||
|
free(A);
|
||||||
|
free(y);
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifndef _WIN32
|
||||||
|
void * pthread_func_wrapper(void * param) {
|
||||||
|
((BenchParam *)param)->bench_func(param);
|
||||||
|
pthread_exit(NULL);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define NB_TESTS 5
|
||||||
|
void * TESTS[4 * NB_TESTS] = {
|
||||||
|
trmv_bench, ztrmv_, z_create_matrix, "ztrmv",
|
||||||
|
gemv_bench, dgemv_, d_create_matrix, "dgemv",
|
||||||
|
gemv_bench, zgemv_, z_create_matrix, "zgemv",
|
||||||
|
ger_bench, dger_, d_create_matrix, "dger",
|
||||||
|
ger_bench, zgerc_, z_create_matrix, "zgerc",
|
||||||
|
};
|
||||||
|
|
||||||
|
inline static double delta_time(struct timespec tick) {
|
||||||
|
struct timespec tock;
|
||||||
|
clock_gettime(CLOCK_MONOTONIC, &tock);
|
||||||
|
return (tock.tv_sec - tick.tv_sec) + (tock.tv_nsec - tick.tv_nsec) / 1e9;
|
||||||
|
}
|
||||||
|
|
||||||
|
double pthread_bench(BenchParam * param, int nb_threads)
|
||||||
|
{
|
||||||
|
#ifdef _WIN32
|
||||||
|
return 0;
|
||||||
|
#else
|
||||||
|
BenchParam threaded_param = *param;
|
||||||
|
pthread_t threads[nb_threads];
|
||||||
|
int t, rc;
|
||||||
|
struct timespec tick;
|
||||||
|
threaded_param.n_loop /= nb_threads;
|
||||||
|
clock_gettime(CLOCK_MONOTONIC, &tick);
|
||||||
|
for(t=0; t<nb_threads; t++){
|
||||||
|
rc = pthread_create(&threads[t], NULL, pthread_func_wrapper, &threaded_param);
|
||||||
|
if (rc){
|
||||||
|
printf("ERROR; return code from pthread_create() is %d\n", rc);
|
||||||
|
exit(-1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for(t=0; t<nb_threads; t++){
|
||||||
|
pthread_join(threads[t], NULL);
|
||||||
|
}
|
||||||
|
return delta_time(tick);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
double seq_bench(BenchParam * param) {
|
||||||
|
struct timespec tick;
|
||||||
|
clock_gettime(CLOCK_MONOTONIC, &tick);
|
||||||
|
param->bench_func(param);
|
||||||
|
return delta_time(tick);
|
||||||
|
}
|
||||||
|
|
||||||
|
double omp_bench(BenchParam * param) {
|
||||||
|
BenchParam threaded_param = *param;
|
||||||
|
struct timespec tick;
|
||||||
|
int t;
|
||||||
|
int nb_threads = omp_get_max_threads();
|
||||||
|
threaded_param.n_loop /= nb_threads;
|
||||||
|
clock_gettime(CLOCK_MONOTONIC, &tick);
|
||||||
|
#pragma omp parallel for
|
||||||
|
for(t = 0; t < nb_threads; t ++){
|
||||||
|
param->bench_func(&threaded_param);
|
||||||
|
}
|
||||||
|
return delta_time(tick);
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char * argv[]) {
|
||||||
|
double inc_factor = exp(log((double)MAX_SIZE / MIN_SIZE) / NB_SIZE);
|
||||||
|
BenchParam param;
|
||||||
|
int test_id;
|
||||||
|
printf ("Running on %d threads\n", omp_get_max_threads());
|
||||||
|
for(test_id = 0; test_id < NB_TESTS; test_id ++) {
|
||||||
|
double size = MIN_SIZE;
|
||||||
|
param.bench_func = TESTS[test_id * 4];
|
||||||
|
param.blas_func = TESTS[test_id * 4 + 1];
|
||||||
|
param.create_matrix = TESTS[test_id * 4 + 2];
|
||||||
|
printf("\nBenchmark of %s\n", (char*)TESTS[test_id * 4 + 3]);
|
||||||
|
param.n_loop = NLOOP;
|
||||||
|
while(size <= MAX_SIZE) {
|
||||||
|
param.matrix_size = (int)(size + 0.5);
|
||||||
|
double seq_time = seq_bench(¶m);
|
||||||
|
double omp_time = omp_bench(¶m);
|
||||||
|
double pthread_time = pthread_bench(¶m, omp_get_max_threads());
|
||||||
|
printf("matrix size %d, sequential %gs, openmp %gs, speedup %g, "
|
||||||
|
"pthread %gs, speedup %g\n",
|
||||||
|
param.matrix_size, seq_time,
|
||||||
|
omp_time, seq_time / omp_time,
|
||||||
|
pthread_time, seq_time / pthread_time);
|
||||||
|
size *= inc_factor;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return(0);
|
||||||
|
}
|
|
@ -190,8 +190,8 @@ int main(int argc, char *argv[]){
|
||||||
timeg /= loops;
|
timeg /= loops;
|
||||||
|
|
||||||
fprintf(stderr,
|
fprintf(stderr,
|
||||||
" %10.2f MBytes\n",
|
" %10.2f MBytes %10.6f sec\n",
|
||||||
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6);
|
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -191,8 +191,8 @@ int main(int argc, char *argv[]){
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
gettimeofday( &start, (struct timezone *)0);
|
||||||
|
|
||||||
fprintf(stderr,
|
fprintf(stderr,
|
||||||
" %10.2f MFlops\n",
|
" %10.2f MFlops %10.6f sec\n",
|
||||||
COMPSIZE * COMPSIZE * 1. * (double)m * (double)m * (double)m / time1 * 1.e-6);
|
COMPSIZE * COMPSIZE * 1. * (double)m * (double)m * (double)m / time1 * 1.e-6, time1);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -184,8 +184,8 @@ int main(int argc, char *argv[]){
|
||||||
timeg /= loops;
|
timeg /= loops;
|
||||||
|
|
||||||
fprintf(stderr,
|
fprintf(stderr,
|
||||||
" %10.2f MFlops\n",
|
" %10.2f MFlops %10.6f sec\n",
|
||||||
COMPSIZE * COMPSIZE * 2. * (double)m / timeg * 1.e-6);
|
COMPSIZE * COMPSIZE * 2. * (double)m / timeg * 1.e-6, timeg);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
65
c_check
65
c_check
|
@ -1,5 +1,8 @@
|
||||||
#!/usr/bin/perl
|
#!/usr/bin/perl
|
||||||
|
|
||||||
|
use File::Basename;
|
||||||
|
use File::Temp qw(tempfile);
|
||||||
|
|
||||||
# Checking cross compile
|
# Checking cross compile
|
||||||
$hostos = `uname -s | sed -e s/\-.*//`; chop($hostos);
|
$hostos = `uname -s | sed -e s/\-.*//`; chop($hostos);
|
||||||
$hostarch = `uname -m | sed -e s/i.86/x86/`;chop($hostarch);
|
$hostarch = `uname -m | sed -e s/i.86/x86/`;chop($hostarch);
|
||||||
|
@ -7,7 +10,9 @@ $hostarch = "x86_64" if ($hostarch eq "amd64");
|
||||||
$hostarch = "arm" if ($hostarch =~ /^arm.*/);
|
$hostarch = "arm" if ($hostarch =~ /^arm.*/);
|
||||||
$hostarch = "arm64" if ($hostarch eq "aarch64");
|
$hostarch = "arm64" if ($hostarch eq "aarch64");
|
||||||
$hostarch = "power" if ($hostarch =~ /^(powerpc|ppc).*/);
|
$hostarch = "power" if ($hostarch =~ /^(powerpc|ppc).*/);
|
||||||
|
$hostarch = "zarch" if ($hostarch eq "s390x");
|
||||||
|
|
||||||
|
$tmpf = new File::Temp( UNLINK => 1 );
|
||||||
$binary = $ENV{"BINARY"};
|
$binary = $ENV{"BINARY"};
|
||||||
|
|
||||||
$makefile = shift(@ARGV);
|
$makefile = shift(@ARGV);
|
||||||
|
@ -26,14 +31,12 @@ if ($?) {
|
||||||
|
|
||||||
$cross_suffix = "";
|
$cross_suffix = "";
|
||||||
|
|
||||||
if ($ARGV[0] =~ /(.*)(-[.\d]+)/) {
|
if (dirname($compiler_name) ne ".") {
|
||||||
if ($1 =~ /(.*-)(.*)/) {
|
$cross_suffix .= dirname($compiler_name) . "/";
|
||||||
$cross_suffix = $1;
|
}
|
||||||
}
|
|
||||||
} else {
|
if (basename($compiler_name) =~ /([^\s]*-)(.*)/) {
|
||||||
if ($ARGV[0] =~ /([^\/]*-)([^\/]*$)/) {
|
$cross_suffix .= $1;
|
||||||
$cross_suffix = $1;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
$compiler = "";
|
$compiler = "";
|
||||||
|
@ -63,13 +66,14 @@ $os = Android if ($data =~ /OS_ANDROID/);
|
||||||
$architecture = x86 if ($data =~ /ARCH_X86/);
|
$architecture = x86 if ($data =~ /ARCH_X86/);
|
||||||
$architecture = x86_64 if ($data =~ /ARCH_X86_64/);
|
$architecture = x86_64 if ($data =~ /ARCH_X86_64/);
|
||||||
$architecture = power if ($data =~ /ARCH_POWER/);
|
$architecture = power if ($data =~ /ARCH_POWER/);
|
||||||
$architecture = mips32 if ($data =~ /ARCH_MIPS32/);
|
$architecture = mips if ($data =~ /ARCH_MIPS/);
|
||||||
$architecture = mips64 if ($data =~ /ARCH_MIPS64/);
|
$architecture = mips64 if ($data =~ /ARCH_MIPS64/);
|
||||||
$architecture = alpha if ($data =~ /ARCH_ALPHA/);
|
$architecture = alpha if ($data =~ /ARCH_ALPHA/);
|
||||||
$architecture = sparc if ($data =~ /ARCH_SPARC/);
|
$architecture = sparc if ($data =~ /ARCH_SPARC/);
|
||||||
$architecture = ia64 if ($data =~ /ARCH_IA64/);
|
$architecture = ia64 if ($data =~ /ARCH_IA64/);
|
||||||
$architecture = arm if ($data =~ /ARCH_ARM/);
|
$architecture = arm if ($data =~ /ARCH_ARM/);
|
||||||
$architecture = arm64 if ($data =~ /ARCH_ARM64/);
|
$architecture = arm64 if ($data =~ /ARCH_ARM64/);
|
||||||
|
$architecture = zarch if ($data =~ /ARCH_ZARCH/);
|
||||||
|
|
||||||
$defined = 0;
|
$defined = 0;
|
||||||
|
|
||||||
|
@ -79,7 +83,12 @@ if ($os eq "AIX") {
|
||||||
$defined = 1;
|
$defined = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (($architecture eq "mips32") || ($architecture eq "mips64")) {
|
if ($architecture eq "mips") {
|
||||||
|
$compiler_name .= " -mabi=32";
|
||||||
|
$defined = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($architecture eq "mips64") {
|
||||||
$compiler_name .= " -mabi=n32" if ($binary eq "32");
|
$compiler_name .= " -mabi=n32" if ($binary eq "32");
|
||||||
$compiler_name .= " -mabi=64" if ($binary eq "64");
|
$compiler_name .= " -mabi=64" if ($binary eq "64");
|
||||||
$defined = 1;
|
$defined = 1;
|
||||||
|
@ -89,6 +98,11 @@ if (($architecture eq "arm") || ($architecture eq "arm64")) {
|
||||||
$defined = 1;
|
$defined = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ($architecture eq "zarch") {
|
||||||
|
$defined = 1;
|
||||||
|
$binary = 64;
|
||||||
|
}
|
||||||
|
|
||||||
if ($architecture eq "alpha") {
|
if ($architecture eq "alpha") {
|
||||||
$defined = 1;
|
$defined = 1;
|
||||||
$binary = 64;
|
$binary = 64;
|
||||||
|
@ -152,16 +166,35 @@ if ($?) {
|
||||||
die 1;
|
die 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
$have_msa = 0;
|
||||||
|
if (($architecture eq "mips") || ($architecture eq "mips64")) {
|
||||||
|
$code = '"addvi.b $w0, $w1, 1"';
|
||||||
|
$msa_flags = "-mmsa -mfp64 -msched-weight -mload-store-pairs";
|
||||||
|
print $tmpf "#include <msa.h>\n\n";
|
||||||
|
print $tmpf "void main(void){ __asm__ volatile($code); }\n";
|
||||||
|
|
||||||
|
$args = "$msa_flags -o $tmpf.o -x c $tmpf";
|
||||||
|
my @cmd = ("$compiler_name $args");
|
||||||
|
system(@cmd) == 0;
|
||||||
|
if ($? != 0) {
|
||||||
|
$have_msa = 0;
|
||||||
|
} else {
|
||||||
|
$have_msa = 1;
|
||||||
|
}
|
||||||
|
unlink("$tmpf.o");
|
||||||
|
}
|
||||||
|
|
||||||
$architecture = x86 if ($data =~ /ARCH_X86/);
|
$architecture = x86 if ($data =~ /ARCH_X86/);
|
||||||
$architecture = x86_64 if ($data =~ /ARCH_X86_64/);
|
$architecture = x86_64 if ($data =~ /ARCH_X86_64/);
|
||||||
$architecture = power if ($data =~ /ARCH_POWER/);
|
$architecture = power if ($data =~ /ARCH_POWER/);
|
||||||
$architecture = mips32 if ($data =~ /ARCH_MIPS32/);
|
$architecture = mips if ($data =~ /ARCH_MIPS/);
|
||||||
$architecture = mips64 if ($data =~ /ARCH_MIPS64/);
|
$architecture = mips64 if ($data =~ /ARCH_MIPS64/);
|
||||||
$architecture = alpha if ($data =~ /ARCH_ALPHA/);
|
$architecture = alpha if ($data =~ /ARCH_ALPHA/);
|
||||||
$architecture = sparc if ($data =~ /ARCH_SPARC/);
|
$architecture = sparc if ($data =~ /ARCH_SPARC/);
|
||||||
$architecture = ia64 if ($data =~ /ARCH_IA64/);
|
$architecture = ia64 if ($data =~ /ARCH_IA64/);
|
||||||
$architecture = arm if ($data =~ /ARCH_ARM/);
|
$architecture = arm if ($data =~ /ARCH_ARM/);
|
||||||
$architecture = arm64 if ($data =~ /ARCH_ARM64/);
|
$architecture = arm64 if ($data =~ /ARCH_ARM64/);
|
||||||
|
$architecture = zarch if ($data =~ /ARCH_ZARCH/);
|
||||||
|
|
||||||
$binformat = bin32;
|
$binformat = bin32;
|
||||||
$binformat = bin64 if ($data =~ /BINARY_64/);
|
$binformat = bin64 if ($data =~ /BINARY_64/);
|
||||||
|
@ -209,6 +242,11 @@ $linker_a = "";
|
||||||
$linker_L .= "-Wl,". $flags . " "
|
$linker_L .= "-Wl,". $flags . " "
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ($flags =~ /^\--exclude-libs/) {
|
||||||
|
$linker_L .= "-Wl,". $flags . " ";
|
||||||
|
$flags="";
|
||||||
|
}
|
||||||
|
|
||||||
if (
|
if (
|
||||||
($flags =~ /^\-l/)
|
($flags =~ /^\-l/)
|
||||||
&& ($flags !~ /gfortranbegin/)
|
&& ($flags !~ /gfortranbegin/)
|
||||||
|
@ -243,9 +281,11 @@ print MAKEFILE "BINARY64=\n" if $binformat ne bin64;
|
||||||
print MAKEFILE "BINARY32=1\n" if $binformat eq bin32;
|
print MAKEFILE "BINARY32=1\n" if $binformat eq bin32;
|
||||||
print MAKEFILE "BINARY64=1\n" if $binformat eq bin64;
|
print MAKEFILE "BINARY64=1\n" if $binformat eq bin64;
|
||||||
print MAKEFILE "FU=$need_fu\n" if $need_fu ne "";
|
print MAKEFILE "FU=$need_fu\n" if $need_fu ne "";
|
||||||
print MAKEFILE "CROSS_SUFFIX=$cross_suffix\n" if $cross_suffix ne "";
|
print MAKEFILE "CROSS_SUFFIX=$cross_suffix\n" if $cross != 0 && $cross_suffix ne "";
|
||||||
print MAKEFILE "CROSS=1\n" if $cross != 0;
|
print MAKEFILE "CROSS=1\n" if $cross != 0;
|
||||||
print MAKEFILE "CEXTRALIB=$linker_L $linker_l $linker_a\n";
|
print MAKEFILE "CEXTRALIB=$linker_L $linker_l $linker_a\n";
|
||||||
|
print MAKEFILE "HAVE_MSA=1\n" if $have_msa eq 1;
|
||||||
|
print MAKEFILE "MSA_FLAGS=$msa_flags\n" if $have_msa eq 1;
|
||||||
|
|
||||||
$os =~ tr/[a-z]/[A-Z]/;
|
$os =~ tr/[a-z]/[A-Z]/;
|
||||||
$architecture =~ tr/[a-z]/[A-Z]/;
|
$architecture =~ tr/[a-z]/[A-Z]/;
|
||||||
|
@ -257,6 +297,7 @@ print CONFFILE "#define C_$compiler\t1\n";
|
||||||
print CONFFILE "#define __32BIT__\t1\n" if $binformat eq bin32;
|
print CONFFILE "#define __32BIT__\t1\n" if $binformat eq bin32;
|
||||||
print CONFFILE "#define __64BIT__\t1\n" if $binformat eq bin64;
|
print CONFFILE "#define __64BIT__\t1\n" if $binformat eq bin64;
|
||||||
print CONFFILE "#define FUNDERSCORE\t$need_fu\n" if $need_fu ne "";
|
print CONFFILE "#define FUNDERSCORE\t$need_fu\n" if $need_fu ne "";
|
||||||
|
print CONFFILE "#define HAVE_MSA\t1\n" if $have_msa eq 1;
|
||||||
|
|
||||||
if ($os eq "LINUX") {
|
if ($os eq "LINUX") {
|
||||||
|
|
||||||
|
|
|
@ -14,12 +14,12 @@ if (${ARCH} STREQUAL "x86" OR ${ARCH} STREQUAL "x86_64")
|
||||||
if (NOT NO_EXPRECISION)
|
if (NOT NO_EXPRECISION)
|
||||||
if (${F_COMPILER} MATCHES "GFORTRAN")
|
if (${F_COMPILER} MATCHES "GFORTRAN")
|
||||||
# N.B. I'm not sure if CMake differentiates between GCC and LSB -hpa
|
# N.B. I'm not sure if CMake differentiates between GCC and LSB -hpa
|
||||||
if (${CMAKE_C_COMPILER} STREQUAL "GNU" OR ${CMAKE_C_COMPILER} STREQUAL "LSB")
|
if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU" OR ${CMAKE_C_COMPILER_ID} STREQUAL "LSB")
|
||||||
set(EXPRECISION 1)
|
set(EXPRECISION 1)
|
||||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DEXPRECISION -m128bit-long-double")
|
set(CCOMMON_OPT "${CCOMMON_OPT} -DEXPRECISION -m128bit-long-double")
|
||||||
set(FCOMMON_OPT "${FCOMMON_OPT} -m128bit-long-double")
|
set(FCOMMON_OPT "${FCOMMON_OPT} -m128bit-long-double")
|
||||||
endif ()
|
endif ()
|
||||||
if (${CMAKE_C_COMPILER} STREQUAL "Clang")
|
if (${CMAKE_C_COMPILER_ID} STREQUAL "Clang")
|
||||||
set(EXPRECISION 1)
|
set(EXPRECISION 1)
|
||||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DEXPRECISION")
|
set(CCOMMON_OPT "${CCOMMON_OPT} -DEXPRECISION")
|
||||||
set(FCOMMON_OPT "${FCOMMON_OPT} -m128bit-long-double")
|
set(FCOMMON_OPT "${FCOMMON_OPT} -m128bit-long-double")
|
||||||
|
@ -28,35 +28,35 @@ if (${ARCH} STREQUAL "x86" OR ${ARCH} STREQUAL "x86_64")
|
||||||
endif ()
|
endif ()
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
if (${CMAKE_C_COMPILER} STREQUAL "Intel")
|
if (${CMAKE_C_COMPILER_ID} STREQUAL "Intel")
|
||||||
set(CCOMMON_OPT "${CCOMMON_OPT} -wd981")
|
set(CCOMMON_OPT "${CCOMMON_OPT} -wd981")
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
if (USE_OPENMP)
|
if (USE_OPENMP)
|
||||||
|
|
||||||
if (${CMAKE_C_COMPILER} STREQUAL "GNU" OR ${CMAKE_C_COMPILER} STREQUAL "LSB")
|
if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU" OR ${CMAKE_C_COMPILER_ID} STREQUAL "LSB")
|
||||||
set(CCOMMON_OPT "${CCOMMON_OPT} -fopenmp")
|
set(CCOMMON_OPT "${CCOMMON_OPT} -fopenmp")
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
if (${CMAKE_C_COMPILER} STREQUAL "Clang")
|
if (${CMAKE_C_COMPILER_ID} STREQUAL "Clang")
|
||||||
message(WARNING "Clang doesn't support OpenMP yet.")
|
message(WARNING "Clang doesn't support OpenMP yet.")
|
||||||
set(CCOMMON_OPT "${CCOMMON_OPT} -fopenmp")
|
set(CCOMMON_OPT "${CCOMMON_OPT} -fopenmp")
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
if (${CMAKE_C_COMPILER} STREQUAL "Intel")
|
if (${CMAKE_C_COMPILER_ID} STREQUAL "Intel")
|
||||||
set(CCOMMON_OPT "${CCOMMON_OPT} -openmp")
|
set(CCOMMON_OPT "${CCOMMON_OPT} -openmp")
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
if (${CMAKE_C_COMPILER} STREQUAL "PGI")
|
if (${CMAKE_C_COMPILER_ID} STREQUAL "PGI")
|
||||||
set(CCOMMON_OPT "${CCOMMON_OPT} -mp")
|
set(CCOMMON_OPT "${CCOMMON_OPT} -mp")
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
if (${CMAKE_C_COMPILER} STREQUAL "OPEN64")
|
if (${CMAKE_C_COMPILER_ID} STREQUAL "OPEN64")
|
||||||
set(CCOMMON_OPT "${CCOMMON_OPT} -mp")
|
set(CCOMMON_OPT "${CCOMMON_OPT} -mp")
|
||||||
set(CEXTRALIB "${CEXTRALIB} -lstdc++")
|
set(CEXTRALIB "${CEXTRALIB} -lstdc++")
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
if (${CMAKE_C_COMPILER} STREQUAL "PATHSCALE")
|
if (${CMAKE_C_COMPILER_ID} STREQUAL "PATHSCALE")
|
||||||
set(CCOMMON_OPT "${CCOMMON_OPT} -mp")
|
set(CCOMMON_OPT "${CCOMMON_OPT} -mp")
|
||||||
endif ()
|
endif ()
|
||||||
endif ()
|
endif ()
|
||||||
|
@ -87,7 +87,7 @@ if (${ARCH} STREQUAL "ia64")
|
||||||
set(BINARY_DEFINED 1)
|
set(BINARY_DEFINED 1)
|
||||||
|
|
||||||
if (${F_COMPILER} MATCHES "GFORTRAN")
|
if (${F_COMPILER} MATCHES "GFORTRAN")
|
||||||
if (${CMAKE_C_COMPILER} STREQUAL "GNU")
|
if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU")
|
||||||
# EXPRECISION = 1
|
# EXPRECISION = 1
|
||||||
# CCOMMON_OPT += -DEXPRECISION
|
# CCOMMON_OPT += -DEXPRECISION
|
||||||
endif ()
|
endif ()
|
||||||
|
|
|
@ -53,7 +53,7 @@ endif()
|
||||||
add_custom_command(
|
add_custom_command(
|
||||||
TARGET ${OpenBLAS_LIBNAME} PRE_LINK
|
TARGET ${OpenBLAS_LIBNAME} PRE_LINK
|
||||||
COMMAND perl
|
COMMAND perl
|
||||||
ARGS "${CMAKE_SOURCE_DIR}/exports/gensymbol" "win2k" "${ARCH_IN}" "dummy" "${EXPRECISION_IN}" "${NO_CBLAS_IN}" "${NO_LAPACK_IN}" "${NO_LAPACKE_IN}" "${NEED2UNDERSCORES_IN}" "${ONLY_CBLAS_IN}" "${SYMBOLPREFIX}" "${SYMBOLSUFFIX}" > "${PROJECT_BINARY_DIR}/openblas.def"
|
ARGS "${PROJECT_SOURCE_DIR}/exports/gensymbol" "win2k" "${ARCH_IN}" "dummy" "${EXPRECISION_IN}" "${NO_CBLAS_IN}" "${NO_LAPACK_IN}" "${NO_LAPACKE_IN}" "${NEED2UNDERSCORES_IN}" "${ONLY_CBLAS_IN}" "${SYMBOLPREFIX}" "${SYMBOLSUFFIX}" > "${PROJECT_BINARY_DIR}/openblas.def"
|
||||||
COMMENT "Create openblas.def file"
|
COMMENT "Create openblas.def file"
|
||||||
VERBATIM)
|
VERBATIM)
|
||||||
|
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
|
|
||||||
set(ALLAUX
|
set(ALLAUX
|
||||||
ilaenv.f ieeeck.f lsamen.f xerbla_array.f iparmq.f
|
ilaenv.f ieeeck.f lsamen.f xerbla_array.f iparmq.f
|
||||||
ilaprec.f ilatrans.f ilauplo.f iladiag.f chla_transtype.f
|
ilaprec.f ilatrans.f ilauplo.f iladiag.f iparam2stage.F chla_transtype.f
|
||||||
../INSTALL/ilaver.f ../INSTALL/slamch.f
|
../INSTALL/ilaver.f ../INSTALL/slamch.f
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -26,7 +26,7 @@ set(SCLAUX
|
||||||
)
|
)
|
||||||
|
|
||||||
set(DZLAUX
|
set(DZLAUX
|
||||||
dbdsdc.f
|
dbdsdc.f dbdsvdx.f
|
||||||
dbdsqr.f ddisna.f dlabad.f dlacpy.f dladiv.f dlae2.f dlaebz.f
|
dbdsqr.f ddisna.f dlabad.f dlacpy.f dladiv.f dlae2.f dlaebz.f
|
||||||
dlaed0.f dlaed1.f dlaed2.f dlaed3.f dlaed4.f dlaed5.f dlaed6.f
|
dlaed0.f dlaed1.f dlaed2.f dlaed3.f dlaed4.f dlaed5.f dlaed6.f
|
||||||
dlaed7.f dlaed8.f dlaed9.f dlaeda.f dlaev2.f dlagtf.f
|
dlaed7.f dlaed8.f dlaed9.f dlaeda.f dlaev2.f dlagtf.f
|
||||||
|
@ -42,24 +42,32 @@ set(DZLAUX
|
||||||
dsteqr.f dsterf.f dlaisnan.f disnan.f
|
dsteqr.f dsterf.f dlaisnan.f disnan.f
|
||||||
dlartgp.f dlartgs.f
|
dlartgp.f dlartgs.f
|
||||||
../INSTALL/dlamch.f ../INSTALL/dsecnd_${TIMER}.f
|
../INSTALL/dlamch.f ../INSTALL/dsecnd_${TIMER}.f
|
||||||
|
dgelq.f dgelqt.f dgelqt3.f dgemlq.f dgemlqt.f dgemqr.f dgeqr.f
|
||||||
|
dgetsls.f dlamswlq.f dlamtsqr.f dlaswlq.f dlatsqr.f dtplqt.f
|
||||||
|
dtplqt2.f dtpmlqt.f dsysv_aa.f dsytrf_aa.f dsytrs_aa.f dlasyf_aa.f
|
||||||
|
dsytf2_rk.f dlasyf_rk.f dsytrf_rk.f dsytrs_3.f dsycon_3.f dsytri_3.f
|
||||||
|
dsytri_3x.f dsysv_rk.f dsb2st_kernels.f dsbev_2stage.f dsbevd_2stage.f
|
||||||
|
dsbevx_2stage.f dsyev_2stage.f dsyevd_2stage.f dsyevr_2stage.f
|
||||||
|
dsyevx_2stage.f dsygv_2stage.f dsytrd_2stage.f dsytrd_sb2st.F
|
||||||
|
dsytrd_sy2sb.f dlarfy.f
|
||||||
)
|
)
|
||||||
|
|
||||||
set(SLASRC
|
set(SLASRC
|
||||||
sgbbrd.f sgbcon.f sgbequ.f sgbrfs.f sgbsv.f
|
sbdsvdx.f sgbbrd.f sgbcon.f sgbequ.f sgbrfs.f sgbsv.f
|
||||||
sgbsvx.f sgbtf2.f sgbtrf.f sgbtrs.f sgebak.f sgebal.f sgebd2.f
|
sgbsvx.f sgbtf2.f sgbtrf.f sgbtrs.f sgebak.f sgebal.f sgebd2.f
|
||||||
sgebrd.f sgecon.f sgeequ.f sgees.f sgeesx.f sgeev.f sgeevx.f
|
sgebrd.f sgecon.f sgeequ.f sgees.f sgeesx.f sgeev.f sgeevx.f
|
||||||
sgegs.f sgegv.f sgehd2.f sgehrd.f sgelq2.f sgelqf.f
|
DEPRECATED/sgegs.f DEPRECATED/sgegv.f sgehd2.f sgehrd.f sgelq2.f sgelqf.f
|
||||||
sgels.f sgelsd.f sgelss.f sgelsx.f sgelsy.f sgeql2.f sgeqlf.f
|
sgels.f sgelsd.f sgelss.f DEPRECATED/sgelsx.f sgelsy.f sgeql2.f sgeqlf.f
|
||||||
sgeqp3.f sgeqpf.f sgeqr2.f sgeqr2p.f sgeqrf.f sgeqrfp.f sgerfs.f
|
sgeqp3.f DEPRECATED/sgeqpf.f sgeqr2.f sgeqr2p.f sgeqrf.f sgeqrfp.f sgerfs.f
|
||||||
sgerq2.f sgerqf.f sgesc2.f sgesdd.f sgesvd.f sgesvx.f
|
sgerq2.f sgerqf.f sgesc2.f sgesdd.f sgesvd.f sgesvdx.f sgesvx.f
|
||||||
sgetc2.f sgetri.f
|
sgetc2.f sgetri.f sgetrf2.f
|
||||||
sggbak.f sggbal.f sgges.f sggesx.f sggev.f sggevx.f
|
sggbak.f sggbal.f sgghd3.f sgges.f sgges3.f sggesx.f sggev.f sggev3.f sggevx.f
|
||||||
sggglm.f sgghrd.f sgglse.f sggqrf.f
|
sggglm.f sgghrd.f sgglse.f sggqrf.f
|
||||||
sggrqf.f sggsvd.f sggsvp.f sgtcon.f sgtrfs.f sgtsv.f
|
sggrqf.f DEPRECATED/sggsvd.f sggsvd3.f DEPRECATED/sggsvp.f sggsvp3.f sgtcon.f sgtrfs.f sgtsv.f
|
||||||
sgtsvx.f sgttrf.f sgttrs.f sgtts2.f shgeqz.f
|
sgtsvx.f sgttrf.f sgttrs.f sgtts2.f shgeqz.f
|
||||||
shsein.f shseqr.f slabrd.f slacon.f slacn2.f
|
shsein.f shseqr.f slabrd.f slacon.f slacn2.f
|
||||||
slaein.f slaexc.f slag2.f slags2.f slagtm.f slagv2.f slahqr.f
|
slaein.f slaexc.f slag2.f slags2.f slagtm.f slagv2.f slahqr.f
|
||||||
slahrd.f slahr2.f slaic1.f slaln2.f slals0.f slalsa.f slalsd.f
|
DEPRECATED/slahrd.f slahr2.f slaic1.f slaln2.f slals0.f slalsa.f slalsd.f
|
||||||
slangb.f slange.f slangt.f slanhs.f slansb.f slansp.f
|
slangb.f slange.f slangt.f slanhs.f slansb.f slansp.f
|
||||||
slansy.f slantb.f slantp.f slantr.f slanv2.f
|
slansy.f slantb.f slantp.f slantr.f slanv2.f
|
||||||
slapll.f slapmt.f
|
slapll.f slapmt.f
|
||||||
|
@ -69,10 +77,10 @@ set(SLASRC
|
||||||
slarf.f slarfb.f slarfg.f slarfgp.f slarft.f slarfx.f slargv.f
|
slarf.f slarfb.f slarfg.f slarfgp.f slarft.f slarfx.f slargv.f
|
||||||
slarrv.f slartv.f
|
slarrv.f slartv.f
|
||||||
slarz.f slarzb.f slarzt.f slasy2.f slasyf.f slasyf_rook.f
|
slarz.f slarzb.f slarzt.f slasy2.f slasyf.f slasyf_rook.f
|
||||||
slatbs.f slatdf.f slatps.f slatrd.f slatrs.f slatrz.f slatzm.f
|
slatbs.f slatdf.f slatps.f slatrd.f slatrs.f slatrz.f DEPRECATED/slatzm.f
|
||||||
sopgtr.f sopmtr.f sorg2l.f sorg2r.f
|
sopgtr.f sopmtr.f sorg2l.f sorg2r.f
|
||||||
sorgbr.f sorghr.f sorgl2.f sorglq.f sorgql.f sorgqr.f sorgr2.f
|
sorgbr.f sorghr.f sorgl2.f sorglq.f sorgql.f sorgqr.f sorgr2.f
|
||||||
sorgrq.f sorgtr.f sorm2l.f sorm2r.f
|
sorgrq.f sorgtr.f sorm2l.f sorm2r.f sorm22.f
|
||||||
sormbr.f sormhr.f sorml2.f sormlq.f sormql.f sormqr.f sormr2.f
|
sormbr.f sormhr.f sorml2.f sormlq.f sormql.f sormqr.f sormr2.f
|
||||||
sormr3.f sormrq.f sormrz.f sormtr.f spbcon.f spbequ.f spbrfs.f
|
sormr3.f sormrq.f sormrz.f sormtr.f spbcon.f spbequ.f spbrfs.f
|
||||||
spbstf.f spbsv.f spbsvx.f
|
spbstf.f spbsv.f spbsvx.f
|
||||||
|
@ -96,8 +104,8 @@ set(SLASRC
|
||||||
stbrfs.f stbtrs.f stgevc.f stgex2.f stgexc.f stgsen.f
|
stbrfs.f stbtrs.f stgevc.f stgex2.f stgexc.f stgsen.f
|
||||||
stgsja.f stgsna.f stgsy2.f stgsyl.f stpcon.f stprfs.f stptri.f
|
stgsja.f stgsna.f stgsy2.f stgsyl.f stpcon.f stprfs.f stptri.f
|
||||||
stptrs.f
|
stptrs.f
|
||||||
strcon.f strevc.f strexc.f strrfs.f strsen.f strsna.f strsyl.f
|
strcon.f strevc.f strevc3.f strexc.f strrfs.f strsen.f strsna.f strsyl.f
|
||||||
strtrs.f stzrqf.f stzrzf.f sstemr.f
|
strtrs.f DEPRECATED/stzrqf.f stzrzf.f sstemr.f
|
||||||
slansf.f spftrf.f spftri.f spftrs.f ssfrk.f stfsm.f stftri.f stfttp.f
|
slansf.f spftrf.f spftri.f spftrs.f ssfrk.f stfsm.f stftri.f stfttp.f
|
||||||
stfttr.f stpttf.f stpttr.f strttf.f strttp.f
|
stfttr.f stpttf.f stpttr.f strttf.f strttp.f
|
||||||
sgejsv.f sgesvj.f sgsvj0.f sgsvj1.f
|
sgejsv.f sgesvj.f sgsvj0.f sgsvj1.f
|
||||||
|
@ -106,22 +114,29 @@ set(SLASRC
|
||||||
sorbdb5.f sorbdb6.f sorcsd.f sorcsd2by1.f
|
sorbdb5.f sorbdb6.f sorcsd.f sorcsd2by1.f
|
||||||
sgeqrt.f sgeqrt2.f sgeqrt3.f sgemqrt.f
|
sgeqrt.f sgeqrt2.f sgeqrt3.f sgemqrt.f
|
||||||
stpqrt.f stpqrt2.f stpmqrt.f stprfb.f spotri.f
|
stpqrt.f stpqrt2.f stpmqrt.f stprfb.f spotri.f
|
||||||
|
sgelq.f sgelqt.f sgelqt3.f sgemlq.f sgemlqt.f sgemqr.f sgeqr.f sgetsls.f
|
||||||
|
slamswlq.f slamtsqr.f slaswlq.f slatsqr.f stplqt.f stplqt2.f stpmlqt.f
|
||||||
|
ssysv_aa.f ssytrf_aa.f ssytrs_aa.f slasyf_aa.f ssytf2_rk.f slasyf_rk.f
|
||||||
|
ssytrf_rk.f ssytrs_3.f ssycon_3.f ssytri_3.f ssytri_3x.f ssysv_rk.f
|
||||||
|
ssb2st_kernels.f ssbev_2stage.f ssbevd_2stage.f ssbevx_2stage.f
|
||||||
|
ssyev_2stage.f ssyevd_2stage.f ssyevr_2stage.f ssyevx_2stage.f
|
||||||
|
ssygv_2stage.f ssytrd_2stage.f ssytrd_sb2st.F ssytrd_sy2sb.f slarfy.f
|
||||||
)
|
)
|
||||||
|
|
||||||
set(DSLASRC spotrs.f)
|
set(DSLASRC spotrs.f spotrf2.f)
|
||||||
|
|
||||||
set(CLASRC
|
set(CLASRC
|
||||||
cbdsqr.f cgbbrd.f cgbcon.f cgbequ.f cgbrfs.f cgbsv.f cgbsvx.f
|
cbdsqr.f cgbbrd.f cgbcon.f cgbequ.f cgbrfs.f cgbsv.f cgbsvx.f
|
||||||
cgbtf2.f cgbtrf.f cgbtrs.f cgebak.f cgebal.f cgebd2.f cgebrd.f
|
cgbtf2.f cgbtrf.f cgbtrs.f cgebak.f cgebal.f cgebd2.f cgebrd.f
|
||||||
cgecon.f cgeequ.f cgees.f cgeesx.f cgeev.f cgeevx.f
|
cgecon.f cgeequ.f cgees.f cgeesx.f cgeev.f cgeevx.f
|
||||||
cgegs.f cgegv.f cgehd2.f cgehrd.f cgelq2.f cgelqf.f
|
DEPRECATED/cgegs.f DEPRECATED/cgegv.f cgehd2.f cgehrd.f cgelq2.f cgelqf.f
|
||||||
cgels.f cgelsd.f cgelss.f cgelsx.f cgelsy.f cgeql2.f cgeqlf.f cgeqp3.f
|
cgels.f cgelsd.f cgelss.f DEPRECATED/cgelsx.f cgelsy.f cgeql2.f cgeqlf.f cgeqp3.f
|
||||||
cgeqpf.f cgeqr2.f cgeqr2p.f cgeqrf.f cgeqrfp.f cgerfs.f
|
DEPRECATED/cgeqpf.f cgeqr2.f cgeqr2p.f cgeqrf.f cgeqrfp.f cgerfs.f
|
||||||
cgerq2.f cgerqf.f cgesc2.f cgesdd.f cgesvd.f
|
cgerq2.f cgerqf.f cgesc2.f cgesdd.f cgesvd.f
|
||||||
cgesvx.f cgetc2.f cgetri.f
|
cgesvx.f cgetc2.f cgetri.f
|
||||||
cggbak.f cggbal.f cgges.f cggesx.f cggev.f cggevx.f cggglm.f
|
cggbak.f cggbal.f cgges.f cggesx.f cggev.f cggevx.f cggglm.f
|
||||||
cgghrd.f cgglse.f cggqrf.f cggrqf.f
|
cgghrd.f cgglse.f cggqrf.f cggrqf.f
|
||||||
cggsvd.f cggsvp.f
|
DEPRECATED/cggsvd.f DEPRECATED/cggsvp.f
|
||||||
cgtcon.f cgtrfs.f cgtsv.f cgtsvx.f cgttrf.f cgttrs.f cgtts2.f chbev.f
|
cgtcon.f cgtrfs.f cgtsv.f cgtsvx.f cgttrf.f cgttrs.f cgtts2.f chbev.f
|
||||||
chbevd.f chbevx.f chbgst.f chbgv.f chbgvd.f chbgvx.f chbtrd.f
|
chbevd.f chbevx.f chbgst.f chbgv.f chbgvd.f chbgvx.f chbtrd.f
|
||||||
checon.f cheev.f cheevd.f cheevr.f cheevx.f chegs2.f chegst.f
|
checon.f cheev.f cheevd.f cheevr.f cheevx.f chegs2.f chegst.f
|
||||||
|
@ -138,7 +153,7 @@ set(CLASRC
|
||||||
claed0.f claed7.f claed8.f
|
claed0.f claed7.f claed8.f
|
||||||
claein.f claesy.f claev2.f clags2.f clagtm.f
|
claein.f claesy.f claev2.f clags2.f clagtm.f
|
||||||
clahef.f clahef_rook.f clahqr.f
|
clahef.f clahef_rook.f clahqr.f
|
||||||
clahrd.f clahr2.f claic1.f clals0.f clalsa.f clalsd.f clangb.f clange.f clangt.f
|
DEPRECATED/clahrd.f clahr2.f claic1.f clals0.f clalsa.f clalsd.f clangb.f clange.f clangt.f
|
||||||
clanhb.f clanhe.f
|
clanhb.f clanhe.f
|
||||||
clanhp.f clanhs.f clanht.f clansb.f clansp.f clansy.f clantb.f
|
clanhp.f clanhs.f clanht.f clansb.f clansp.f clansy.f clantb.f
|
||||||
clantp.f clantr.f clapll.f clapmt.f clarcm.f claqgb.f claqge.f
|
clantp.f clantr.f clapll.f clapmt.f clarcm.f claqgb.f claqge.f
|
||||||
|
@ -149,7 +164,7 @@ set(CLASRC
|
||||||
clarfx.f clargv.f clarnv.f clarrv.f clartg.f clartv.f
|
clarfx.f clargv.f clarnv.f clarrv.f clartg.f clartv.f
|
||||||
clarz.f clarzb.f clarzt.f clascl.f claset.f clasr.f classq.f
|
clarz.f clarzb.f clarzt.f clascl.f claset.f clasr.f classq.f
|
||||||
clasyf.f clasyf_rook.f clatbs.f clatdf.f clatps.f clatrd.f clatrs.f clatrz.f
|
clasyf.f clasyf_rook.f clatbs.f clatdf.f clatps.f clatrd.f clatrs.f clatrz.f
|
||||||
clatzm.f cpbcon.f cpbequ.f cpbrfs.f cpbstf.f cpbsv.f
|
DEPRECATED/clatzm.f cpbcon.f cpbequ.f cpbrfs.f cpbstf.f cpbsv.f
|
||||||
cpbsvx.f cpbtf2.f cpbtrf.f cpbtrs.f cpocon.f cpoequ.f cporfs.f
|
cpbsvx.f cpbtf2.f cpbtrf.f cpbtrs.f cpocon.f cpoequ.f cporfs.f
|
||||||
cposv.f cposvx.f cpstrf.f cpstf2.f
|
cposv.f cposvx.f cpstrf.f cpstf2.f
|
||||||
cppcon.f cppequ.f cpprfs.f cppsv.f cppsvx.f cpptrf.f cpptri.f cpptrs.f
|
cppcon.f cppequ.f cpprfs.f cppsv.f cppsvx.f cpptrf.f cpptri.f cpptrs.f
|
||||||
|
@ -165,8 +180,8 @@ set(CLASRC
|
||||||
ctbcon.f ctbrfs.f ctbtrs.f ctgevc.f ctgex2.f
|
ctbcon.f ctbrfs.f ctbtrs.f ctgevc.f ctgex2.f
|
||||||
ctgexc.f ctgsen.f ctgsja.f ctgsna.f ctgsy2.f ctgsyl.f ctpcon.f
|
ctgexc.f ctgsen.f ctgsja.f ctgsna.f ctgsy2.f ctgsyl.f ctpcon.f
|
||||||
ctprfs.f ctptri.f
|
ctprfs.f ctptri.f
|
||||||
ctptrs.f ctrcon.f ctrevc.f ctrexc.f ctrrfs.f ctrsen.f ctrsna.f
|
ctptrs.f ctrcon.f ctrevc.f ctrevc3.f ctrexc.f ctrrfs.f ctrsen.f ctrsna.f
|
||||||
ctrsyl.f ctrtrs.f ctzrqf.f ctzrzf.f cung2l.f cung2r.f
|
ctrsyl.f ctrtrs.f DEPRECATED/ctzrqf.f ctzrzf.f cung2l.f cung2r.f
|
||||||
cungbr.f cunghr.f cungl2.f cunglq.f cungql.f cungqr.f cungr2.f
|
cungbr.f cunghr.f cungl2.f cunglq.f cungql.f cungqr.f cungr2.f
|
||||||
cungrq.f cungtr.f cunm2l.f cunm2r.f cunmbr.f cunmhr.f cunml2.f
|
cungrq.f cungtr.f cunm2l.f cunm2r.f cunmbr.f cunmhr.f cunml2.f
|
||||||
cunmlq.f cunmql.f cunmqr.f cunmr2.f cunmr3.f cunmrq.f cunmrz.f
|
cunmlq.f cunmql.f cunmqr.f cunmr2.f cunmr3.f cunmrq.f cunmrz.f
|
||||||
|
@ -178,6 +193,14 @@ set(CLASRC
|
||||||
cunbdb5.f cunbdb6.f cuncsd.f cuncsd2by1.f
|
cunbdb5.f cunbdb6.f cuncsd.f cuncsd2by1.f
|
||||||
cgeqrt.f cgeqrt2.f cgeqrt3.f cgemqrt.f
|
cgeqrt.f cgeqrt2.f cgeqrt3.f cgemqrt.f
|
||||||
ctpqrt.f ctpqrt2.f ctpmqrt.f ctprfb.f cpotri.f
|
ctpqrt.f ctpqrt2.f ctpmqrt.f ctprfb.f cpotri.f
|
||||||
|
cgelq.f cgelqt.f cgelqt3.f cgemlq.f cgemlqt.f cgemqr.f cgeqr.f cgetsls.f
|
||||||
|
clamswlq.f clamtsqr.f claswlq.f clatsqr.f ctplqt.f ctplqt2.f ctpmlqt.f
|
||||||
|
chesv_aa.f chetrf_aa.f chetrs_aa.f clahef_aa.f csytf2_rk.f clasyf_rk.f
|
||||||
|
csytrf_rk.f csytrs_3.f csycon_3.f csytri_3.f csytri_3x.f csysv_rk.f
|
||||||
|
chetf2_rk.f clahef_rk.f chetrf_rk.f chetrs_3.f checon_3.f chetri_3.f
|
||||||
|
chetri_3x.f chesv_rk.f chb2st_kernels.f chbev_2stage.f chbevd_2stage.f
|
||||||
|
chbevx_2stage.f cheev_2stage.f cheevd_2stage.f cheevr_2stage.f cheevx_2stage.f
|
||||||
|
chegv_2stage.f chetrd_2stage.f chetrd_hb2st.F chetrd_he2hb.f clarfy.f
|
||||||
)
|
)
|
||||||
|
|
||||||
set(ZCLASRC cpotrs.f)
|
set(ZCLASRC cpotrs.f)
|
||||||
|
@ -186,18 +209,18 @@ set(DLASRC
|
||||||
dgbbrd.f dgbcon.f dgbequ.f dgbrfs.f dgbsv.f
|
dgbbrd.f dgbcon.f dgbequ.f dgbrfs.f dgbsv.f
|
||||||
dgbsvx.f dgbtf2.f dgbtrf.f dgbtrs.f dgebak.f dgebal.f dgebd2.f
|
dgbsvx.f dgbtf2.f dgbtrf.f dgbtrs.f dgebak.f dgebal.f dgebd2.f
|
||||||
dgebrd.f dgecon.f dgeequ.f dgees.f dgeesx.f dgeev.f dgeevx.f
|
dgebrd.f dgecon.f dgeequ.f dgees.f dgeesx.f dgeev.f dgeevx.f
|
||||||
dgegs.f dgegv.f dgehd2.f dgehrd.f dgelq2.f dgelqf.f
|
DEPRECATED/dgegs.f DEPRECATED/dgegv.f dgehd2.f dgehrd.f dgelq2.f dgelqf.f
|
||||||
dgels.f dgelsd.f dgelss.f dgelsx.f dgelsy.f dgeql2.f dgeqlf.f
|
dgels.f dgelsd.f dgelss.f DEPRECATED/dgelsx.f dgelsy.f dgeql2.f dgeqlf.f
|
||||||
dgeqp3.f dgeqpf.f dgeqr2.f dgeqr2p.f dgeqrf.f dgeqrfp.f dgerfs.f
|
dgeqp3.f DEPRECATED/dgeqpf.f dgeqr2.f dgeqr2p.f dgeqrf.f dgeqrfp.f dgerfs.f
|
||||||
dgerq2.f dgerqf.f dgesc2.f dgesdd.f dgesvd.f dgesvx.f
|
dgerq2.f dgerqf.f dgesc2.f dgesdd.f dgesvd.f dgesvdx.f dgesvx.f
|
||||||
dgetc2.f dgetri.f
|
dgetc2.f dgetri.f dgetrf2.f
|
||||||
dggbak.f dggbal.f dgges.f dggesx.f dggev.f dggevx.f
|
dggbak.f dggbal.f dgges.f dgges3.f dggesx.f dggev.f dggev3.f dggevx.f
|
||||||
dggglm.f dgghrd.f dgglse.f dggqrf.f
|
dggglm.f dgghd3.f dgghrd.f dgglse.f dggqrf.f
|
||||||
dggrqf.f dggsvd.f dggsvp.f dgtcon.f dgtrfs.f dgtsv.f
|
dggrqf.f dggsvd3.f dggsvp3.f DEPRECATED/dggsvd.f DEPRECATED/dggsvp.f dgtcon.f dgtrfs.f dgtsv.f
|
||||||
dgtsvx.f dgttrf.f dgttrs.f dgtts2.f dhgeqz.f
|
dgtsvx.f dgttrf.f dgttrs.f dgtts2.f dhgeqz.f
|
||||||
dhsein.f dhseqr.f dlabrd.f dlacon.f dlacn2.f
|
dhsein.f dhseqr.f dlabrd.f dlacon.f dlacn2.f
|
||||||
dlaein.f dlaexc.f dlag2.f dlags2.f dlagtm.f dlagv2.f dlahqr.f
|
dlaein.f dlaexc.f dlag2.f dlags2.f dlagtm.f dlagv2.f dlahqr.f
|
||||||
dlahrd.f dlahr2.f dlaic1.f dlaln2.f dlals0.f dlalsa.f dlalsd.f
|
DEPRECATED/dlahrd.f dlahr2.f dlaic1.f dlaln2.f dlals0.f dlalsa.f dlalsd.f
|
||||||
dlangb.f dlange.f dlangt.f dlanhs.f dlansb.f dlansp.f
|
dlangb.f dlange.f dlangt.f dlanhs.f dlansb.f dlansp.f
|
||||||
dlansy.f dlantb.f dlantp.f dlantr.f dlanv2.f
|
dlansy.f dlantb.f dlantp.f dlantr.f dlanv2.f
|
||||||
dlapll.f dlapmt.f
|
dlapll.f dlapmt.f
|
||||||
|
@ -207,15 +230,15 @@ set(DLASRC
|
||||||
dlarf.f dlarfb.f dlarfg.f dlarfgp.f dlarft.f dlarfx.f
|
dlarf.f dlarfb.f dlarfg.f dlarfgp.f dlarft.f dlarfx.f
|
||||||
dlargv.f dlarrv.f dlartv.f
|
dlargv.f dlarrv.f dlartv.f
|
||||||
dlarz.f dlarzb.f dlarzt.f dlasy2.f dlasyf.f dlasyf_rook.f
|
dlarz.f dlarzb.f dlarzt.f dlasy2.f dlasyf.f dlasyf_rook.f
|
||||||
dlatbs.f dlatdf.f dlatps.f dlatrd.f dlatrs.f dlatrz.f dlatzm.f
|
dlatbs.f dlatdf.f dlatps.f dlatrd.f dlatrs.f dlatrz.f DEPRECATED/dlatzm.f
|
||||||
dopgtr.f dopmtr.f dorg2l.f dorg2r.f
|
dopgtr.f dopmtr.f dorg2l.f dorg2r.f
|
||||||
dorgbr.f dorghr.f dorgl2.f dorglq.f dorgql.f dorgqr.f dorgr2.f
|
dorgbr.f dorghr.f dorgl2.f dorglq.f dorgql.f dorgqr.f dorgr2.f
|
||||||
dorgrq.f dorgtr.f dorm2l.f dorm2r.f
|
dorgrq.f dorgtr.f dorm2l.f dorm2r.f dorm22.f
|
||||||
dormbr.f dormhr.f dorml2.f dormlq.f dormql.f dormqr.f dormr2.f
|
dormbr.f dormhr.f dorml2.f dormlq.f dormql.f dormqr.f dormr2.f
|
||||||
dormr3.f dormrq.f dormrz.f dormtr.f dpbcon.f dpbequ.f dpbrfs.f
|
dormr3.f dormrq.f dormrz.f dormtr.f dpbcon.f dpbequ.f dpbrfs.f
|
||||||
dpbstf.f dpbsv.f dpbsvx.f
|
dpbstf.f dpbsv.f dpbsvx.f
|
||||||
dpbtf2.f dpbtrf.f dpbtrs.f dpocon.f dpoequ.f dporfs.f dposv.f
|
dpbtf2.f dpbtrf.f dpbtrs.f dpocon.f dpoequ.f dporfs.f dposv.f
|
||||||
dposvx.f dpotrs.f dpstrf.f dpstf2.f
|
dposvx.f dpotrf2.f dpotrs.f dpstrf.f dpstf2.f
|
||||||
dppcon.f dppequ.f
|
dppcon.f dppequ.f
|
||||||
dpprfs.f dppsv.f dppsvx.f dpptrf.f dpptri.f dpptrs.f dptcon.f
|
dpprfs.f dppsv.f dppsvx.f dpptrf.f dpptri.f dpptrs.f dptcon.f
|
||||||
dpteqr.f dptrfs.f dptsv.f dptsvx.f dpttrs.f dptts2.f drscl.f
|
dpteqr.f dptrfs.f dptsv.f dptsvx.f dpttrs.f dptts2.f drscl.f
|
||||||
|
@ -234,8 +257,8 @@ set(DLASRC
|
||||||
dtbcon.f dtbrfs.f dtbtrs.f dtgevc.f dtgex2.f dtgexc.f dtgsen.f
|
dtbcon.f dtbrfs.f dtbtrs.f dtgevc.f dtgex2.f dtgexc.f dtgsen.f
|
||||||
dtgsja.f dtgsna.f dtgsy2.f dtgsyl.f dtpcon.f dtprfs.f dtptri.f
|
dtgsja.f dtgsna.f dtgsy2.f dtgsyl.f dtpcon.f dtprfs.f dtptri.f
|
||||||
dtptrs.f
|
dtptrs.f
|
||||||
dtrcon.f dtrevc.f dtrexc.f dtrrfs.f dtrsen.f dtrsna.f dtrsyl.f
|
dtrcon.f dtrevc.f dtrevc3.f dtrexc.f dtrrfs.f dtrsen.f dtrsna.f dtrsyl.f
|
||||||
dtrtrs.f dtzrqf.f dtzrzf.f dstemr.f
|
dtrtrs.f DEPRECATED/dtzrqf.f dtzrzf.f dstemr.f
|
||||||
dsgesv.f dsposv.f dlag2s.f slag2d.f dlat2s.f
|
dsgesv.f dsposv.f dlag2s.f slag2d.f dlat2s.f
|
||||||
dlansf.f dpftrf.f dpftri.f dpftrs.f dsfrk.f dtfsm.f dtftri.f dtfttp.f
|
dlansf.f dpftrf.f dpftri.f dpftrs.f dsfrk.f dtfsm.f dtftri.f dtfttp.f
|
||||||
dtfttr.f dtpttf.f dtpttr.f dtrttf.f dtrttp.f
|
dtfttr.f dtpttf.f dtpttr.f dtrttf.f dtrttp.f
|
||||||
|
@ -245,20 +268,28 @@ set(DLASRC
|
||||||
dorbdb5.f dorbdb6.f dorcsd.f dorcsd2by1.f
|
dorbdb5.f dorbdb6.f dorcsd.f dorcsd2by1.f
|
||||||
dgeqrt.f dgeqrt2.f dgeqrt3.f dgemqrt.f
|
dgeqrt.f dgeqrt2.f dgeqrt3.f dgemqrt.f
|
||||||
dtpqrt.f dtpqrt2.f dtpmqrt.f dtprfb.f dpotri.f
|
dtpqrt.f dtpqrt2.f dtpmqrt.f dtprfb.f dpotri.f
|
||||||
|
dgelq.f dgelqt.f dgelqt3.f dgemlq.f dgemlqt.f dgemqr.f dgeqr.f dgetsls.f
|
||||||
|
dlamswlq.f dlamtsqr.f dlaswlq.f dlatsqr.f dtplqt.f dtplqt2.f dtpmlqt.f
|
||||||
|
dsysv_aa.f dsytrf_aa.f dsytrs_aa.f dlasyf_aa.f dsytf2_rk.f dlasyf_rk.f
|
||||||
|
dsytrf_rk.f dsytrs_3.f dsycon_3.f dsytri_3.f dsytri_3x.f dsysv_rk.f
|
||||||
|
dsb2st_kernels.f dsbev_2stage.f dsbevd_2stage.f dsbevx_2stage.f
|
||||||
|
dsyev_2stage.f dsyevd_2stage.f dsyevr_2stage.f dsyevx_2stage.f
|
||||||
|
dsygv_2stage.f dsytrd_2stage.f dsytrd_sb2st.F dsytrd_sy2sb.f dlarfy.f
|
||||||
)
|
)
|
||||||
|
|
||||||
set(ZLASRC
|
set(ZLASRC
|
||||||
zbdsqr.f zgbbrd.f zgbcon.f zgbequ.f zgbrfs.f zgbsv.f zgbsvx.f
|
zbdsqr.f zgbbrd.f zgbcon.f zgbequ.f zgbrfs.f zgbsv.f zgbsvx.f
|
||||||
zgbtf2.f zgbtrf.f zgbtrs.f zgebak.f zgebal.f zgebd2.f zgebrd.f
|
zgbtf2.f zgbtrf.f zgbtrs.f zgebak.f zgebal.f zgebd2.f zgebrd.f
|
||||||
zgecon.f zgeequ.f zgees.f zgeesx.f zgeev.f zgeevx.f
|
zgecon.f zgeequ.f zgees.f zgeesx.f zgeev.f zgeevx.f
|
||||||
zgegs.f zgegv.f zgehd2.f zgehrd.f zgelq2.f zgelqf.f
|
DEPRECATED/zgegs.f DEPRECATED/zgegv.f zgehd2.f zgehrd.f zgejsv.f zgelq2.f zgelqf.f
|
||||||
zgels.f zgelsd.f zgelss.f zgelsx.f zgelsy.f zgeql2.f zgeqlf.f zgeqp3.f
|
zgels.f zgelsd.f zgelss.f DEPRECATED/zgelsx.f zgelsy.f zgeql2.f zgeqlf.f zgeqp3.f
|
||||||
zgeqpf.f zgeqr2.f zgeqr2p.f zgeqrf.f zgeqrfp.f zgerfs.f zgerq2.f zgerqf.f
|
DEPRECATED/zgeqpf.f zgeqr2.f zgeqr2p.f zgeqrf.f zgeqrfp.f zgerfs.f zgerq2.f zgerqf.f
|
||||||
zgesc2.f zgesdd.f zgesvd.f zgesvx.f zgetc2.f
|
zgesc2.f zgesdd.f zgesvd.f zgesvdx.f zgesvj.f zgesvx.f zgetc2.f
|
||||||
zgetri.f
|
zgetri.f zgetrf2.f
|
||||||
zggbak.f zggbal.f zgges.f zggesx.f zggev.f zggevx.f zggglm.f
|
zggbak.f zggbal.f zgges.f zgges3.f zggesx.f zggev.f zggev3.f zggevx.f zggglm.f
|
||||||
zgghrd.f zgglse.f zggqrf.f zggrqf.f
|
zgghd3.f zgghrd.f zgglse.f zggqrf.f zggrqf.f
|
||||||
zggsvd.f zggsvp.f
|
DEPRECATED/zggsvd.f zggsvd3.f DEPRECATED/zggsvp.f zggsvp3.f
|
||||||
|
zgsvj0.f zgsvj1.f
|
||||||
zgtcon.f zgtrfs.f zgtsv.f zgtsvx.f zgttrf.f zgttrs.f zgtts2.f zhbev.f
|
zgtcon.f zgtrfs.f zgtsv.f zgtsvx.f zgttrf.f zgttrs.f zgtts2.f zhbev.f
|
||||||
zhbevd.f zhbevx.f zhbgst.f zhbgv.f zhbgvd.f zhbgvx.f zhbtrd.f
|
zhbevd.f zhbevx.f zhbgst.f zhbgv.f zhbgvd.f zhbgvx.f zhbtrd.f
|
||||||
zhecon.f zheev.f zheevd.f zheevr.f zheevx.f zhegs2.f zhegst.f
|
zhecon.f zheev.f zheevd.f zheevr.f zheevx.f zhegs2.f zhegst.f
|
||||||
|
@ -275,7 +306,7 @@ set(ZLASRC
|
||||||
zlaed0.f zlaed7.f zlaed8.f
|
zlaed0.f zlaed7.f zlaed8.f
|
||||||
zlaein.f zlaesy.f zlaev2.f zlags2.f zlagtm.f
|
zlaein.f zlaesy.f zlaev2.f zlags2.f zlagtm.f
|
||||||
zlahef.f zlahef_rook.f zlahqr.f
|
zlahef.f zlahef_rook.f zlahqr.f
|
||||||
zlahrd.f zlahr2.f zlaic1.f zlals0.f zlalsa.f zlalsd.f zlangb.f zlange.f
|
DEPRECATED/zlahrd.f zlahr2.f zlaic1.f zlals0.f zlalsa.f zlalsd.f zlangb.f zlange.f
|
||||||
zlangt.f zlanhb.f
|
zlangt.f zlanhb.f
|
||||||
zlanhe.f
|
zlanhe.f
|
||||||
zlanhp.f zlanhs.f zlanht.f zlansb.f zlansp.f zlansy.f zlantb.f
|
zlanhp.f zlanhs.f zlanht.f zlansb.f zlansp.f zlansy.f zlantb.f
|
||||||
|
@ -287,28 +318,28 @@ set(ZLASRC
|
||||||
zlarfg.f zlarft.f zlarfgp.f
|
zlarfg.f zlarft.f zlarfgp.f
|
||||||
zlarfx.f zlargv.f zlarnv.f zlarrv.f zlartg.f zlartv.f
|
zlarfx.f zlargv.f zlarnv.f zlarrv.f zlartg.f zlartv.f
|
||||||
zlarz.f zlarzb.f zlarzt.f zlascl.f zlaset.f zlasr.f
|
zlarz.f zlarzb.f zlarzt.f zlascl.f zlaset.f zlasr.f
|
||||||
zlassq.f zlasyf.f zlasyf_rook.f
|
zlassq.f zlasyf.f zlasyf_rook.f zlasy_aa.f
|
||||||
zlatbs.f zlatdf.f zlatps.f zlatrd.f zlatrs.f zlatrz.f zlatzm.f
|
zlatbs.f zlatdf.f zlatps.f zlatrd.f zlatrs.f zlatrz.f DEPRECATED/zlatzm.f
|
||||||
zpbcon.f zpbequ.f zpbrfs.f zpbstf.f zpbsv.f
|
zpbcon.f zpbequ.f zpbrfs.f zpbstf.f zpbsv.f
|
||||||
zpbsvx.f zpbtf2.f zpbtrf.f zpbtrs.f zpocon.f zpoequ.f zporfs.f
|
zpbsvx.f zpbtf2.f zpbtrf.f zpbtrs.f zpocon.f zpoequ.f zporfs.f
|
||||||
zposv.f zposvx.f zpotrs.f zpstrf.f zpstf2.f
|
zposv.f zposvx.f zpotrf2.f zpotrs.f zpstrf.f zpstf2.f
|
||||||
zppcon.f zppequ.f zpprfs.f zppsv.f zppsvx.f zpptrf.f zpptri.f zpptrs.f
|
zppcon.f zppequ.f zpprfs.f zppsv.f zppsvx.f zpptrf.f zpptri.f zpptrs.f
|
||||||
zptcon.f zpteqr.f zptrfs.f zptsv.f zptsvx.f zpttrf.f zpttrs.f zptts2.f
|
zptcon.f zpteqr.f zptrfs.f zptsv.f zptsvx.f zpttrf.f zpttrs.f zptts2.f
|
||||||
zrot.f zspcon.f zsprfs.f zspsv.f
|
zrot.f zspcon.f zsprfs.f zspsv.f
|
||||||
zspsvx.f zsptrf.f zsptri.f zsptrs.f zdrscl.f zstedc.f
|
zspsvx.f zsptrf.f zsptri.f zsptrs.f zdrscl.f zstedc.f
|
||||||
zstegr.f zstein.f zsteqr.f
|
zstegr.f zstein.f zsteqr.f
|
||||||
zsycon.f
|
zsycon.f zsysv_aa.f
|
||||||
zsyrfs.f zsysv.f zsysvx.f zsytf2.f zsytrf.f zsytri.f zsytri2.f zsytri2x.f
|
zsyrfs.f zsysv.f zsysvx.f zsytf2.f zsytrf.f zsytri.f zsytri2.f zsytri2x.f
|
||||||
zsyswapr.f zsytrs.f zsytrs2.f zsyconv.f
|
zsyswapr.f zsytrs.f zsytrs_aa.f zsytrs2.f zsyconv.f
|
||||||
zsytf2_rook.f zsytrf_rook.f zsytrs_rook.f
|
zsytf2_rook.f zsytrf_rook.f zsytrs_rook.f
|
||||||
zsytri_rook.f zsycon_rook.f zsysv_rook.f
|
zsytri_rook.f zsycon_rook.f zsysv_rook.f
|
||||||
ztbcon.f ztbrfs.f ztbtrs.f ztgevc.f ztgex2.f
|
ztbcon.f ztbrfs.f ztbtrs.f ztgevc.f ztgex2.f
|
||||||
ztgexc.f ztgsen.f ztgsja.f ztgsna.f ztgsy2.f ztgsyl.f ztpcon.f
|
ztgexc.f ztgsen.f ztgsja.f ztgsna.f ztgsy2.f ztgsyl.f ztpcon.f
|
||||||
ztprfs.f ztptri.f
|
ztprfs.f ztptri.f
|
||||||
ztptrs.f ztrcon.f ztrevc.f ztrexc.f ztrrfs.f ztrsen.f ztrsna.f
|
ztptrs.f ztrcon.f ztrevc.f ztrevc3.f ztrexc.f ztrrfs.f ztrsen.f ztrsna.f
|
||||||
ztrsyl.f ztrtrs.f ztzrqf.f ztzrzf.f zung2l.f
|
ztrsyl.f ztrtrs.f DEPRECATED/ztzrqf.f ztzrzf.f zung2l.f
|
||||||
zung2r.f zungbr.f zunghr.f zungl2.f zunglq.f zungql.f zungqr.f zungr2.f
|
zung2r.f zungbr.f zunghr.f zungl2.f zunglq.f zungql.f zungqr.f zungr2.f
|
||||||
zungrq.f zungtr.f zunm2l.f zunm2r.f zunmbr.f zunmhr.f zunml2.f
|
zungrq.f zungtr.f zunm2l.f zunm2r.f zunmbr.f zunmhr.f zunm22.f zunml2.f
|
||||||
zunmlq.f zunmql.f zunmqr.f zunmr2.f zunmr3.f zunmrq.f zunmrz.f
|
zunmlq.f zunmql.f zunmqr.f zunmr2.f zunmr3.f zunmrq.f zunmrz.f
|
||||||
zunmtr.f zupgtr.f
|
zunmtr.f zupgtr.f
|
||||||
zupmtr.f izmax1.f dzsum1.f zstemr.f
|
zupmtr.f izmax1.f dzsum1.f zstemr.f
|
||||||
|
@ -320,6 +351,15 @@ set(ZLASRC
|
||||||
zunbdb5.f zunbdb6.f zuncsd.f zuncsd2by1.f
|
zunbdb5.f zunbdb6.f zuncsd.f zuncsd2by1.f
|
||||||
zgeqrt.f zgeqrt2.f zgeqrt3.f zgemqrt.f
|
zgeqrt.f zgeqrt2.f zgeqrt3.f zgemqrt.f
|
||||||
ztpqrt.f ztpqrt2.f ztpmqrt.f ztprfb.f zpotri.f
|
ztpqrt.f ztpqrt2.f ztpmqrt.f ztprfb.f zpotri.f
|
||||||
|
zgelq.f zgelqt.f zgelqt3.f zgemlq.f zgemlqt.f zgemqr.f zgeqr.f zgetsls.f
|
||||||
|
zlamswlq.f zlamtsqr.f zlaswlq.f zlatsqr.f ztplqt.f ztplqt2.f ztpmlqt.f
|
||||||
|
zhesv_aa.f zhetrf_aa.f zhetrs_aa.f zlahef_aa.f zsytf2_rk.f zlasyf_rk.f
|
||||||
|
zsytrf_aa.f zsytrf_rk.f zsytrs_3.f zsycon_3.f zsytri_3.f zsytri_3x.f zsysv_rk.f
|
||||||
|
zhetf2_rk.f zlahef_rk.f zhetrf_rk.f zhetrs_3.f zhecon_3.f zhetri_3.f
|
||||||
|
zhetri_3x.f zhesv_rk.f zhb2st_kernels.f zhbev_2stage.f zhbevd_2stage.f
|
||||||
|
zhbevx_2stage.f zheev_2stage.f zheevd_2stage.f zheevr_2stage.f
|
||||||
|
zheevx_2stage.f zhegv_2stage.f zhetrd_2stage.f zhetrd_hb2st.F zhetrd_he2hb.f
|
||||||
|
zlarfy.f
|
||||||
)
|
)
|
||||||
|
|
||||||
set(LA_REL_SRC ${ALLAUX})
|
set(LA_REL_SRC ${ALLAUX})
|
||||||
|
|
4285
cmake/lapacke.cmake
4285
cmake/lapacke.cmake
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,10 @@
|
||||||
|
prefix=@prefix@
|
||||||
|
libdir=@libdir@
|
||||||
|
includedir=@includedir@
|
||||||
|
|
||||||
|
Name: OpenBLAS
|
||||||
|
Description: OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version
|
||||||
|
Version: @OPENBLAS_VERSION@
|
||||||
|
URL: https://github.com/xianyi/OpenBLAS
|
||||||
|
Libs: -L${libdir} -lopenblas
|
||||||
|
Cflags: -I${includedir}
|
|
@ -50,20 +50,20 @@ else()
|
||||||
set(TARGET_CONF "config.h")
|
set(TARGET_CONF "config.h")
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
include("${CMAKE_SOURCE_DIR}/cmake/c_check.cmake")
|
include("${PROJECT_SOURCE_DIR}/cmake/c_check.cmake")
|
||||||
|
|
||||||
if (NOT NOFORTRAN)
|
if (NOT NOFORTRAN)
|
||||||
include("${CMAKE_SOURCE_DIR}/cmake/f_check.cmake")
|
include("${PROJECT_SOURCE_DIR}/cmake/f_check.cmake")
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
# compile getarch
|
# compile getarch
|
||||||
set(GETARCH_SRC
|
set(GETARCH_SRC
|
||||||
${CMAKE_SOURCE_DIR}/getarch.c
|
${PROJECT_SOURCE_DIR}/getarch.c
|
||||||
${CPUIDEMO}
|
${CPUIDEMO}
|
||||||
)
|
)
|
||||||
|
|
||||||
if (NOT MSVC)
|
if (NOT MSVC)
|
||||||
list(APPEND GETARCH_SRC ${CMAKE_SOURCE_DIR}/cpuid.S)
|
list(APPEND GETARCH_SRC ${PROJECT_SOURCE_DIR}/cpuid.S)
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
if (MSVC)
|
if (MSVC)
|
||||||
|
@ -76,7 +76,7 @@ set(GETARCH_BIN "getarch${CMAKE_EXECUTABLE_SUFFIX}")
|
||||||
file(MAKE_DIRECTORY ${GETARCH_DIR})
|
file(MAKE_DIRECTORY ${GETARCH_DIR})
|
||||||
try_compile(GETARCH_RESULT ${GETARCH_DIR}
|
try_compile(GETARCH_RESULT ${GETARCH_DIR}
|
||||||
SOURCES ${GETARCH_SRC}
|
SOURCES ${GETARCH_SRC}
|
||||||
COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} -I${CMAKE_SOURCE_DIR}
|
COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} -I${PROJECT_SOURCE_DIR}
|
||||||
OUTPUT_VARIABLE GETARCH_LOG
|
OUTPUT_VARIABLE GETARCH_LOG
|
||||||
COPY_FILE ${PROJECT_BINARY_DIR}/${GETARCH_BIN}
|
COPY_FILE ${PROJECT_BINARY_DIR}/${GETARCH_BIN}
|
||||||
)
|
)
|
||||||
|
@ -97,8 +97,8 @@ set(GETARCH2_DIR "${PROJECT_BINARY_DIR}/getarch2_build")
|
||||||
set(GETARCH2_BIN "getarch_2nd${CMAKE_EXECUTABLE_SUFFIX}")
|
set(GETARCH2_BIN "getarch_2nd${CMAKE_EXECUTABLE_SUFFIX}")
|
||||||
file(MAKE_DIRECTORY ${GETARCH2_DIR})
|
file(MAKE_DIRECTORY ${GETARCH2_DIR})
|
||||||
try_compile(GETARCH2_RESULT ${GETARCH2_DIR}
|
try_compile(GETARCH2_RESULT ${GETARCH2_DIR}
|
||||||
SOURCES ${CMAKE_SOURCE_DIR}/getarch_2nd.c
|
SOURCES ${PROJECT_SOURCE_DIR}/getarch_2nd.c
|
||||||
COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} ${GETARCH2_FLAGS} -I${CMAKE_SOURCE_DIR}
|
COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} ${GETARCH2_FLAGS} -I${PROJECT_SOURCE_DIR}
|
||||||
OUTPUT_VARIABLE GETARCH2_LOG
|
OUTPUT_VARIABLE GETARCH2_LOG
|
||||||
COPY_FILE ${PROJECT_BINARY_DIR}/${GETARCH2_BIN}
|
COPY_FILE ${PROJECT_BINARY_DIR}/${GETARCH2_BIN}
|
||||||
)
|
)
|
||||||
|
|
|
@ -3,7 +3,7 @@
|
||||||
## Description: Ported from OpenBLAS/Makefile.system
|
## Description: Ported from OpenBLAS/Makefile.system
|
||||||
##
|
##
|
||||||
|
|
||||||
set(NETLIB_LAPACK_DIR "${CMAKE_SOURCE_DIR}/lapack-netlib")
|
set(NETLIB_LAPACK_DIR "${PROJECT_SOURCE_DIR}/lapack-netlib")
|
||||||
|
|
||||||
# TODO: Makefile.system detects Darwin (mac) and switches to clang here -hpa
|
# TODO: Makefile.system detects Darwin (mac) and switches to clang here -hpa
|
||||||
# http://stackoverflow.com/questions/714100/os-detecting-makefile
|
# http://stackoverflow.com/questions/714100/os-detecting-makefile
|
||||||
|
@ -78,7 +78,7 @@ else ()
|
||||||
set(ONLY_CBLAS 0)
|
set(ONLY_CBLAS 0)
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
include("${CMAKE_SOURCE_DIR}/cmake/prebuild.cmake")
|
include("${PROJECT_SOURCE_DIR}/cmake/prebuild.cmake")
|
||||||
|
|
||||||
if (NOT DEFINED NUM_THREADS)
|
if (NOT DEFINED NUM_THREADS)
|
||||||
set(NUM_THREADS ${NUM_CORES})
|
set(NUM_THREADS ${NUM_CORES})
|
||||||
|
@ -124,17 +124,17 @@ set(OBJCOPY "${CROSS_SUFFIX}objcopy")
|
||||||
set(OBJCONV "${CROSS_SUFFIX}objconv")
|
set(OBJCONV "${CROSS_SUFFIX}objconv")
|
||||||
|
|
||||||
# OS dependent settings
|
# OS dependent settings
|
||||||
include("${CMAKE_SOURCE_DIR}/cmake/os.cmake")
|
include("${PROJECT_SOURCE_DIR}/cmake/os.cmake")
|
||||||
|
|
||||||
# Architecture dependent settings
|
# Architecture dependent settings
|
||||||
include("${CMAKE_SOURCE_DIR}/cmake/arch.cmake")
|
include("${PROJECT_SOURCE_DIR}/cmake/arch.cmake")
|
||||||
|
|
||||||
# C Compiler dependent settings
|
# C Compiler dependent settings
|
||||||
include("${CMAKE_SOURCE_DIR}/cmake/cc.cmake")
|
include("${PROJECT_SOURCE_DIR}/cmake/cc.cmake")
|
||||||
|
|
||||||
if (NOT NOFORTRAN)
|
if (NOT NOFORTRAN)
|
||||||
# Fortran Compiler dependent settings
|
# Fortran Compiler dependent settings
|
||||||
include("${CMAKE_SOURCE_DIR}/cmake/fc.cmake")
|
include("${PROJECT_SOURCE_DIR}/cmake/fc.cmake")
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
if (BINARY64)
|
if (BINARY64)
|
||||||
|
@ -247,10 +247,10 @@ if (NOT DEFINED SYMBOLSUFFIX)
|
||||||
set(SYMBOLSUFFIX "")
|
set(SYMBOLSUFFIX "")
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
set(KERNELDIR "${CMAKE_SOURCE_DIR}/kernel/${ARCH}")
|
set(KERNELDIR "${PROJECT_SOURCE_DIR}/kernel/${ARCH}")
|
||||||
|
|
||||||
# TODO: nead to convert these Makefiles
|
# TODO: nead to convert these Makefiles
|
||||||
# include ${CMAKE_SOURCE_DIR}/cmake/${ARCH}.cmake
|
# include ${PROJECT_SOURCE_DIR}/cmake/${ARCH}.cmake
|
||||||
|
|
||||||
if (${CORE} STREQUAL "PPC440")
|
if (${CORE} STREQUAL "PPC440")
|
||||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DALLOC_QALLOC")
|
set(CCOMMON_OPT "${CCOMMON_OPT} -DALLOC_QALLOC")
|
||||||
|
@ -410,8 +410,8 @@ set(LIBDEFNAME "${LIBNAME}.${LIBSUFFIX}.def")
|
||||||
set(LIBEXPNAME "${LIBNAME}.${LIBSUFFIX}.exp")
|
set(LIBEXPNAME "${LIBNAME}.${LIBSUFFIX}.exp")
|
||||||
set(LIBZIPNAME "${LIBNAME}.${LIBSUFFIX}.zip")
|
set(LIBZIPNAME "${LIBNAME}.${LIBSUFFIX}.zip")
|
||||||
|
|
||||||
set(LIBS "${CMAKE_SOURCE_DIR}/${LIBNAME}")
|
set(LIBS "${PROJECT_SOURCE_DIR}/${LIBNAME}")
|
||||||
set(LIBS_P "${CMAKE_SOURCE_DIR}/${LIBNAME_P}")
|
set(LIBS_P "${PROJECT_SOURCE_DIR}/${LIBNAME_P}")
|
||||||
|
|
||||||
|
|
||||||
set(LIB_COMPONENTS BLAS)
|
set(LIB_COMPONENTS BLAS)
|
||||||
|
|
26
common.h
26
common.h
|
@ -93,7 +93,7 @@ extern "C" {
|
||||||
#include <sched.h>
|
#include <sched.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(OS_DARWIN) || defined(OS_FREEBSD) || defined(OS_NETBSD)
|
#if defined(OS_DARWIN) || defined(OS_FREEBSD) || defined(OS_NETBSD) || defined(OS_ANDROID)
|
||||||
#include <sched.h>
|
#include <sched.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -332,12 +332,20 @@ typedef int blasint;
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef POWER8
|
||||||
|
#ifndef YIELDING
|
||||||
|
#define YIELDING __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop;\n");
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
#ifdef PILEDRIVER
|
#ifdef PILEDRIVER
|
||||||
#ifndef YIELDING
|
#ifndef YIELDING
|
||||||
#define YIELDING __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop;\n");
|
#define YIELDING __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop;\n");
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
*/
|
||||||
|
|
||||||
/*
|
/*
|
||||||
#ifdef STEAMROLLER
|
#ifdef STEAMROLLER
|
||||||
|
@ -396,6 +404,10 @@ please https://github.com/xianyi/OpenBLAS/issues/246
|
||||||
#include "common_sparc.h"
|
#include "common_sparc.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef ARCH_MIPS
|
||||||
|
#include "common_mips.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef ARCH_MIPS64
|
#ifdef ARCH_MIPS64
|
||||||
#include "common_mips64.h"
|
#include "common_mips64.h"
|
||||||
#endif
|
#endif
|
||||||
|
@ -408,10 +420,14 @@ please https://github.com/xianyi/OpenBLAS/issues/246
|
||||||
#include "common_arm64.h"
|
#include "common_arm64.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef ARCH_ZARCH
|
||||||
|
#include "common_zarch.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifndef ASSEMBLER
|
#ifndef ASSEMBLER
|
||||||
#ifdef OS_WINDOWS
|
#ifdef OS_WINDOWS
|
||||||
typedef char env_var_t[MAX_PATH];
|
typedef char env_var_t[MAX_PATH];
|
||||||
#define readenv(p, n) GetEnvironmentVariable((n), (p), sizeof(p))
|
#define readenv(p, n) GetEnvironmentVariable((LPCTSTR)(n), (LPTSTR)(p), sizeof(p))
|
||||||
#else
|
#else
|
||||||
typedef char* env_var_t;
|
typedef char* env_var_t;
|
||||||
#define readenv(p, n) ((p)=getenv(n))
|
#define readenv(p, n) ((p)=getenv(n))
|
||||||
|
@ -614,9 +630,14 @@ void gotoblas_profile_init(void);
|
||||||
void gotoblas_profile_quit(void);
|
void gotoblas_profile_quit(void);
|
||||||
|
|
||||||
#ifdef USE_OPENMP
|
#ifdef USE_OPENMP
|
||||||
|
#ifndef C_MSVC
|
||||||
int omp_in_parallel(void);
|
int omp_in_parallel(void);
|
||||||
int omp_get_num_procs(void);
|
int omp_get_num_procs(void);
|
||||||
#else
|
#else
|
||||||
|
__declspec(dllimport) int __cdecl omp_in_parallel(void);
|
||||||
|
__declspec(dllimport) int __cdecl omp_get_num_procs(void);
|
||||||
|
#endif
|
||||||
|
#else
|
||||||
#ifdef __ELF__
|
#ifdef __ELF__
|
||||||
int omp_in_parallel (void) __attribute__ ((weak));
|
int omp_in_parallel (void) __attribute__ ((weak));
|
||||||
int omp_get_num_procs(void) __attribute__ ((weak));
|
int omp_get_num_procs(void) __attribute__ ((weak));
|
||||||
|
@ -727,6 +748,7 @@ typedef struct {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef ASSEMBLER
|
#ifndef ASSEMBLER
|
||||||
|
#include "common_stackalloc.h"
|
||||||
#if 0
|
#if 0
|
||||||
#include "symcopy.h"
|
#include "symcopy.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -105,7 +105,6 @@ static inline int blas_quickdivide(blasint x, blasint y){
|
||||||
#define PROLOGUE \
|
#define PROLOGUE \
|
||||||
.arm ;\
|
.arm ;\
|
||||||
.global REALNAME ;\
|
.global REALNAME ;\
|
||||||
.func REALNAME ;\
|
|
||||||
REALNAME:
|
REALNAME:
|
||||||
|
|
||||||
#define EPILOGUE
|
#define EPILOGUE
|
||||||
|
|
|
@ -43,28 +43,39 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#ifndef ASSEMBLER
|
#ifndef ASSEMBLER
|
||||||
|
|
||||||
|
|
||||||
static void __inline blas_lock(volatile BLASULONG *address){
|
static void __inline blas_lock(volatile BLASULONG *address){
|
||||||
|
|
||||||
long register ret;
|
BLASULONG ret;
|
||||||
|
|
||||||
do {
|
do {
|
||||||
while (*address) {YIELDING;};
|
while (*address) {YIELDING;};
|
||||||
|
|
||||||
__asm__ __volatile__(
|
__asm__ __volatile__(
|
||||||
"ldaxr %0, [%1] \n\t"
|
"mov x4, #1 \n\t"
|
||||||
"stlxr w2, %2, [%1] \n\t"
|
"1: \n\t"
|
||||||
"orr %0, %0, x2 \n\t"
|
"ldaxr x2, [%1] \n\t"
|
||||||
: "=r"(ret)
|
"cbnz x2, 1b \n\t"
|
||||||
: "r"(address), "r"(1l)
|
"2: \n\t"
|
||||||
: "memory", "x2"
|
"stxr w3, x4, [%1] \n\t"
|
||||||
|
"cbnz w3, 1b \n\t"
|
||||||
|
"mov %0, #0 \n\t"
|
||||||
|
: "=r"(ret), "=r"(address)
|
||||||
|
: "1"(address)
|
||||||
|
: "memory", "x2" , "x3", "x4"
|
||||||
|
|
||||||
|
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|
||||||
} while (ret);
|
} while (ret);
|
||||||
MB;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#define BLAS_LOCK_DEFINED
|
#define BLAS_LOCK_DEFINED
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
static inline int blas_quickdivide(blasint x, blasint y){
|
static inline int blas_quickdivide(blasint x, blasint y){
|
||||||
return x / y;
|
return x / y;
|
||||||
}
|
}
|
||||||
|
@ -110,7 +121,7 @@ REALNAME:
|
||||||
#define HUGE_PAGESIZE ( 4 << 20)
|
#define HUGE_PAGESIZE ( 4 << 20)
|
||||||
|
|
||||||
#if defined(CORTEXA57)
|
#if defined(CORTEXA57)
|
||||||
#define BUFFER_SIZE (40 << 20)
|
#define BUFFER_SIZE (20 << 20)
|
||||||
#else
|
#else
|
||||||
#define BUFFER_SIZE (16 << 20)
|
#define BUFFER_SIZE (16 << 20)
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -70,7 +70,7 @@ extern long int syscall (long int __sysno, ...);
|
||||||
static inline int my_mbind(void *addr, unsigned long len, int mode,
|
static inline int my_mbind(void *addr, unsigned long len, int mode,
|
||||||
unsigned long *nodemask, unsigned long maxnode,
|
unsigned long *nodemask, unsigned long maxnode,
|
||||||
unsigned flags) {
|
unsigned flags) {
|
||||||
#if defined (__LSB_VERSION__)
|
#if defined (__LSB_VERSION__) || defined(ARCH_ZARCH)
|
||||||
// So far, LSB (Linux Standard Base) don't support syscall().
|
// So far, LSB (Linux Standard Base) don't support syscall().
|
||||||
// https://lsbbugs.linuxfoundation.org/show_bug.cgi?id=3482
|
// https://lsbbugs.linuxfoundation.org/show_bug.cgi?id=3482
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -90,7 +90,7 @@ static inline int my_mbind(void *addr, unsigned long len, int mode,
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int my_set_mempolicy(int mode, const unsigned long *addr, unsigned long flag) {
|
static inline int my_set_mempolicy(int mode, const unsigned long *addr, unsigned long flag) {
|
||||||
#if defined (__LSB_VERSION__)
|
#if defined (__LSB_VERSION__) || defined(ARCH_ZARCH)
|
||||||
// So far, LSB (Linux Standard Base) don't support syscall().
|
// So far, LSB (Linux Standard Base) don't support syscall().
|
||||||
// https://lsbbugs.linuxfoundation.org/show_bug.cgi?id=3482
|
// https://lsbbugs.linuxfoundation.org/show_bug.cgi?id=3482
|
||||||
return 0;
|
return 0;
|
||||||
|
|
|
@ -2193,7 +2193,7 @@
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef ASSEMBLER
|
#ifndef ASSEMBLER
|
||||||
#if defined(ARCH_X86) || defined(ARCH_X86_64) || defined(ARCH_IA64) || defined(ARCH_MIPS64)
|
#if defined(ARCH_X86) || defined(ARCH_X86_64) || defined(ARCH_IA64) || defined(ARCH_MIPS64) || defined(ARCH_ARM64)
|
||||||
extern BLASLONG gemm_offset_a;
|
extern BLASLONG gemm_offset_a;
|
||||||
extern BLASLONG gemm_offset_b;
|
extern BLASLONG gemm_offset_b;
|
||||||
extern BLASLONG sgemm_p;
|
extern BLASLONG sgemm_p;
|
||||||
|
|
|
@ -0,0 +1,108 @@
|
||||||
|
/*****************************************************************************
|
||||||
|
Copyright (c) 2016, The OpenBLAS Project
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
|
||||||
|
1. Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in
|
||||||
|
the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
3. Neither the name of the OpenBLAS project nor the names of
|
||||||
|
its contributors may be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written
|
||||||
|
permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||||
|
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||||
|
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
**********************************************************************************/
|
||||||
|
|
||||||
|
#ifndef COMMON_MIPS
|
||||||
|
#define COMMON_MIPS
|
||||||
|
|
||||||
|
#define MB
|
||||||
|
#define WMB
|
||||||
|
|
||||||
|
#define INLINE inline
|
||||||
|
|
||||||
|
#define RETURN_BY_COMPLEX
|
||||||
|
|
||||||
|
#ifndef ASSEMBLER
|
||||||
|
|
||||||
|
static void INLINE blas_lock(volatile unsigned long *address){
|
||||||
|
|
||||||
|
}
|
||||||
|
#define BLAS_LOCK_DEFINED
|
||||||
|
|
||||||
|
static inline unsigned int rpcc(void){
|
||||||
|
unsigned long ret;
|
||||||
|
|
||||||
|
__asm__ __volatile__(".set push \n"
|
||||||
|
"rdhwr %0, $30 \n"
|
||||||
|
".set pop" : "=r"(ret) : : "memory");
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
#define RPCC_DEFINED
|
||||||
|
|
||||||
|
static inline int blas_quickdivide(blasint x, blasint y){
|
||||||
|
return x / y;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define GET_IMAGE(res)
|
||||||
|
|
||||||
|
#define GET_IMAGE_CANCEL
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
#ifndef F_INTERFACE
|
||||||
|
#define REALNAME ASMNAME
|
||||||
|
#else
|
||||||
|
#define REALNAME ASMFNAME
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(ASSEMBLER) && !defined(NEEDPARAM)
|
||||||
|
|
||||||
|
#define PROLOGUE \
|
||||||
|
.arm ;\
|
||||||
|
.global REALNAME ;\
|
||||||
|
REALNAME:
|
||||||
|
|
||||||
|
#define EPILOGUE
|
||||||
|
|
||||||
|
#define PROFCODE
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
#define SEEK_ADDRESS
|
||||||
|
|
||||||
|
#ifndef PAGESIZE
|
||||||
|
#define PAGESIZE ( 4 << 10)
|
||||||
|
#endif
|
||||||
|
#define HUGE_PAGESIZE ( 4 << 20)
|
||||||
|
|
||||||
|
#define BUFFER_SIZE (16 << 20)
|
||||||
|
|
||||||
|
|
||||||
|
#define BASE_ADDRESS (START_ADDRESS - BUFFER_SIZE * MAX_CPU_NUMBER)
|
||||||
|
|
||||||
|
#ifndef MAP_ANONYMOUS
|
||||||
|
#define MAP_ANONYMOUS MAP_ANON
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif
|
|
@ -102,7 +102,7 @@ static void INLINE blas_lock(volatile unsigned long *address){
|
||||||
|
|
||||||
static inline unsigned int rpcc(void){
|
static inline unsigned int rpcc(void){
|
||||||
unsigned long ret;
|
unsigned long ret;
|
||||||
#if defined(LOONGSON3A) || defined(LOONGSON3B)
|
|
||||||
// unsigned long long tmp;
|
// unsigned long long tmp;
|
||||||
//__asm__ __volatile__("dmfc0 %0, $25, 1": "=r"(tmp):: "memory");
|
//__asm__ __volatile__("dmfc0 %0, $25, 1": "=r"(tmp):: "memory");
|
||||||
//ret=tmp;
|
//ret=tmp;
|
||||||
|
@ -111,17 +111,10 @@ static inline unsigned int rpcc(void){
|
||||||
"rdhwr %0, $2\n"
|
"rdhwr %0, $2\n"
|
||||||
".set pop": "=r"(ret):: "memory");
|
".set pop": "=r"(ret):: "memory");
|
||||||
|
|
||||||
#else
|
|
||||||
__asm__ __volatile__(".set push \n"
|
|
||||||
".set mips32r2\n"
|
|
||||||
"rdhwr %0, $30 \n"
|
|
||||||
".set pop" : "=r"(ret) : : "memory");
|
|
||||||
#endif
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
#define RPCC_DEFINED
|
#define RPCC_DEFINED
|
||||||
|
|
||||||
#if defined(LOONGSON3A) || defined(LOONGSON3B)
|
|
||||||
#ifndef NO_AFFINITY
|
#ifndef NO_AFFINITY
|
||||||
#define WHEREAMI
|
#define WHEREAMI
|
||||||
static inline int WhereAmI(void){
|
static inline int WhereAmI(void){
|
||||||
|
@ -134,7 +127,6 @@ static inline int WhereAmI(void){
|
||||||
|
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
#endif
|
|
||||||
|
|
||||||
static inline int blas_quickdivide(blasint x, blasint y){
|
static inline int blas_quickdivide(blasint x, blasint y){
|
||||||
return x / y;
|
return x / y;
|
||||||
|
|
|
@ -39,8 +39,13 @@
|
||||||
#ifndef COMMON_POWER
|
#ifndef COMMON_POWER
|
||||||
#define COMMON_POWER
|
#define COMMON_POWER
|
||||||
|
|
||||||
|
#if defined(POWER8)
|
||||||
|
#define MB __asm__ __volatile__ ("eieio":::"memory")
|
||||||
|
#define WMB __asm__ __volatile__ ("eieio":::"memory")
|
||||||
|
#else
|
||||||
#define MB __asm__ __volatile__ ("sync")
|
#define MB __asm__ __volatile__ ("sync")
|
||||||
#define WMB __asm__ __volatile__ ("sync")
|
#define WMB __asm__ __volatile__ ("sync")
|
||||||
|
#endif
|
||||||
|
|
||||||
#define INLINE inline
|
#define INLINE inline
|
||||||
|
|
||||||
|
@ -236,7 +241,7 @@ static inline int blas_quickdivide(blasint x, blasint y){
|
||||||
#define HAVE_PREFETCH
|
#define HAVE_PREFETCH
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(POWER3) || defined(POWER6) || defined(PPCG4) || defined(CELL)
|
#if defined(POWER3) || defined(POWER6) || defined(PPCG4) || defined(CELL) || defined(POWER8)
|
||||||
#define DCBT_ARG 0
|
#define DCBT_ARG 0
|
||||||
#else
|
#else
|
||||||
#define DCBT_ARG 8
|
#define DCBT_ARG 8
|
||||||
|
@ -258,6 +263,13 @@ static inline int blas_quickdivide(blasint x, blasint y){
|
||||||
#define L1_PREFETCH dcbtst
|
#define L1_PREFETCH dcbtst
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if defined(POWER8)
|
||||||
|
#define L1_DUALFETCH
|
||||||
|
#define L1_PREFETCHSIZE (16 + 128 * 100)
|
||||||
|
#define L1_PREFETCH dcbtst
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#
|
||||||
#ifndef L1_PREFETCH
|
#ifndef L1_PREFETCH
|
||||||
#define L1_PREFETCH dcbt
|
#define L1_PREFETCH dcbt
|
||||||
#endif
|
#endif
|
||||||
|
@ -790,6 +802,8 @@ Lmcount$lazy_ptr:
|
||||||
#define BUFFER_SIZE ( 2 << 20)
|
#define BUFFER_SIZE ( 2 << 20)
|
||||||
#elif defined(PPC440FP2)
|
#elif defined(PPC440FP2)
|
||||||
#define BUFFER_SIZE ( 16 << 20)
|
#define BUFFER_SIZE ( 16 << 20)
|
||||||
|
#elif defined(POWER8)
|
||||||
|
#define BUFFER_SIZE ( 64 << 20)
|
||||||
#else
|
#else
|
||||||
#define BUFFER_SIZE ( 16 << 20)
|
#define BUFFER_SIZE ( 16 << 20)
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -0,0 +1,73 @@
|
||||||
|
/*******************************************************************************
|
||||||
|
Copyright (c) 2016, The OpenBLAS Project
|
||||||
|
All rights reserved.
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
1. Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in
|
||||||
|
the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
3. Neither the name of the OpenBLAS project nor the names of
|
||||||
|
its contributors may be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written permission.
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||||
|
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||||
|
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*******************************************************************************/
|
||||||
|
|
||||||
|
#define STACK_ALLOC_PROTECT
|
||||||
|
#ifdef STACK_ALLOC_PROTECT
|
||||||
|
// Try to detect stack smashing
|
||||||
|
#include <assert.h>
|
||||||
|
#define STACK_ALLOC_PROTECT_SET volatile int stack_check = 0x7fc01234;
|
||||||
|
#define STACK_ALLOC_PROTECT_CHECK assert(stack_check == 0x7fc01234);
|
||||||
|
#else
|
||||||
|
#define STACK_ALLOC_PROTECT_SET
|
||||||
|
#define STACK_ALLOC_PROTECT_CHECK
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(MAX_STACK_ALLOC) && MAX_STACK_ALLOC > 0
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Allocate a buffer on the stack if the size is smaller than MAX_STACK_ALLOC.
|
||||||
|
* Stack allocation is much faster than blas_memory_alloc or malloc, particularly
|
||||||
|
* when OpenBLAS is used from a multi-threaded application.
|
||||||
|
* SIZE must be carefully chosen to be:
|
||||||
|
* - as small as possible to maximize the number of stack allocation
|
||||||
|
* - large enough to support all architectures and kernel
|
||||||
|
* Chosing a too small SIZE will lead to a stack smashing.
|
||||||
|
*/
|
||||||
|
#define STACK_ALLOC(SIZE, TYPE, BUFFER) \
|
||||||
|
/* make it volatile because some function (ex: dgemv_n.S) */ \
|
||||||
|
/* do not restore all register */ \
|
||||||
|
volatile int stack_alloc_size = SIZE; \
|
||||||
|
if(stack_alloc_size > MAX_STACK_ALLOC / sizeof(TYPE)) \
|
||||||
|
stack_alloc_size = 0; \
|
||||||
|
STACK_ALLOC_PROTECT_SET \
|
||||||
|
TYPE stack_buffer[stack_alloc_size] __attribute__((aligned(0x20))); \
|
||||||
|
BUFFER = stack_alloc_size ? stack_buffer : (TYPE *)blas_memory_alloc(1);
|
||||||
|
#else
|
||||||
|
//Original OpenBLAS/GotoBLAS codes.
|
||||||
|
#define STACK_ALLOC(SIZE, TYPE, BUFFER) BUFFER = (TYPE *)blas_memory_alloc(1)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
#if defined(MAX_STACK_ALLOC) && MAX_STACK_ALLOC > 0
|
||||||
|
#define STACK_FREE(BUFFER) \
|
||||||
|
STACK_ALLOC_PROTECT_CHECK \
|
||||||
|
if(!stack_alloc_size) \
|
||||||
|
blas_memory_free(BUFFER);
|
||||||
|
#else
|
||||||
|
#define STACK_FREE(BUFFER) blas_memory_free(BUFFER)
|
||||||
|
#endif
|
||||||
|
|
15
common_x86.h
15
common_x86.h
|
@ -41,6 +41,10 @@
|
||||||
|
|
||||||
#ifndef ASSEMBLER
|
#ifndef ASSEMBLER
|
||||||
|
|
||||||
|
#ifdef C_MSVC
|
||||||
|
#include <intrin.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
#define MB
|
#define MB
|
||||||
#define WMB
|
#define WMB
|
||||||
|
|
||||||
|
@ -58,7 +62,7 @@ static void __inline blas_lock(volatile BLASULONG *address){
|
||||||
|
|
||||||
#if defined(_MSC_VER) && !defined(__clang__)
|
#if defined(_MSC_VER) && !defined(__clang__)
|
||||||
// use intrinsic instead of inline assembly
|
// use intrinsic instead of inline assembly
|
||||||
ret = _InterlockedExchange(address, 1);
|
ret = _InterlockedExchange((volatile LONG *)address, 1);
|
||||||
// inline assembly
|
// inline assembly
|
||||||
/*__asm {
|
/*__asm {
|
||||||
mov eax, address
|
mov eax, address
|
||||||
|
@ -170,12 +174,13 @@ static __inline int blas_quickdivide(unsigned int x, unsigned int y){
|
||||||
|
|
||||||
if (y <= 1) return x;
|
if (y <= 1) return x;
|
||||||
|
|
||||||
|
#if defined(_MSC_VER) && !defined(__clang__)
|
||||||
|
result = x/y;
|
||||||
|
return result;
|
||||||
|
#else
|
||||||
|
|
||||||
y = blas_quick_divide_table[y];
|
y = blas_quick_divide_table[y];
|
||||||
|
|
||||||
#if defined(_MSC_VER) && !defined(__clang__)
|
|
||||||
(void*)result;
|
|
||||||
return x*y;
|
|
||||||
#else
|
|
||||||
__asm__ __volatile__ ("mull %0" :"=d" (result) :"a"(x), "0" (y));
|
__asm__ __volatile__ ("mull %0" :"=d" (result) :"a"(x), "0" (y));
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
|
|
|
@ -396,7 +396,7 @@ REALNAME:
|
||||||
|
|
||||||
#define PROFCODE
|
#define PROFCODE
|
||||||
|
|
||||||
#define EPILOGUE .end REALNAME
|
#define EPILOGUE .end
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(OS_LINUX) || defined(OS_FREEBSD) || defined(OS_NETBSD) || defined(__ELF__) || defined(C_PGI)
|
#if defined(OS_LINUX) || defined(OS_FREEBSD) || defined(OS_NETBSD) || defined(__ELF__) || defined(C_PGI)
|
||||||
|
|
|
@ -0,0 +1,140 @@
|
||||||
|
/*****************************************************************************
|
||||||
|
Copyright (c) 2011-2016, The OpenBLAS Project
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
|
||||||
|
1. Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in
|
||||||
|
the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
3. Neither the name of the OpenBLAS project nor the names of
|
||||||
|
its contributors may be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written
|
||||||
|
permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||||
|
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||||
|
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
**********************************************************************************/
|
||||||
|
|
||||||
|
#ifndef COMMON_ZARCH
|
||||||
|
#define COMMON_ZARCH
|
||||||
|
|
||||||
|
#define MB
|
||||||
|
//__asm__ __volatile__ ("dmb ish" : : : "memory")
|
||||||
|
#define WMB
|
||||||
|
//__asm__ __volatile__ ("dmb ishst" : : : "memory")
|
||||||
|
|
||||||
|
|
||||||
|
#define INLINE inline
|
||||||
|
|
||||||
|
#define RETURN_BY_COMPLEX
|
||||||
|
|
||||||
|
#ifndef ASSEMBLER
|
||||||
|
|
||||||
|
/*
|
||||||
|
static void __inline blas_lock(volatile BLASULONG *address){
|
||||||
|
|
||||||
|
BLASULONG ret;
|
||||||
|
|
||||||
|
do {
|
||||||
|
while (*address) {YIELDING;};
|
||||||
|
|
||||||
|
__asm__ __volatile__(
|
||||||
|
"mov x4, #1 \n\t"
|
||||||
|
"1: \n\t"
|
||||||
|
"ldaxr x2, [%1] \n\t"
|
||||||
|
"cbnz x2, 1b \n\t"
|
||||||
|
"2: \n\t"
|
||||||
|
"stxr w3, x4, [%1] \n\t"
|
||||||
|
"cbnz w3, 1b \n\t"
|
||||||
|
"mov %0, #0 \n\t"
|
||||||
|
: "=r"(ret), "=r"(address)
|
||||||
|
: "1"(address)
|
||||||
|
: "memory", "x2" , "x3", "x4"
|
||||||
|
|
||||||
|
|
||||||
|
);
|
||||||
|
|
||||||
|
|
||||||
|
} while (ret);
|
||||||
|
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
//#define BLAS_LOCK_DEFINED
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
static inline int blas_quickdivide(blasint x, blasint y){
|
||||||
|
return x / y;
|
||||||
|
}
|
||||||
|
|
||||||
|
#if defined(DOUBLE)
|
||||||
|
#define GET_IMAGE(res) __asm__ __volatile__("str d1, %0" : "=m"(res) : : "memory")
|
||||||
|
#else
|
||||||
|
#define GET_IMAGE(res) __asm__ __volatile__("str s1, %0" : "=m"(res) : : "memory")
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define GET_IMAGE_CANCEL
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
#ifndef F_INTERFACE
|
||||||
|
#define REALNAME ASMNAME
|
||||||
|
#else
|
||||||
|
#define REALNAME ASMFNAME
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(ASSEMBLER) && !defined(NEEDPARAM)
|
||||||
|
|
||||||
|
#define PROLOGUE \
|
||||||
|
.text ;\
|
||||||
|
.align 256 ;\
|
||||||
|
.global REALNAME ;\
|
||||||
|
.type REALNAME, %function ;\
|
||||||
|
REALNAME:
|
||||||
|
|
||||||
|
|
||||||
|
#define EPILOGUE
|
||||||
|
|
||||||
|
#define PROFCODE
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
#define SEEK_ADDRESS
|
||||||
|
|
||||||
|
#ifndef PAGESIZE
|
||||||
|
#define PAGESIZE ( 4 << 10)
|
||||||
|
#endif
|
||||||
|
#define HUGE_PAGESIZE ( 4 << 20)
|
||||||
|
|
||||||
|
#if defined(CORTEXA57)
|
||||||
|
#define BUFFER_SIZE (20 << 20)
|
||||||
|
#else
|
||||||
|
#define BUFFER_SIZE (16 << 20)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
#define BASE_ADDRESS (START_ADDRESS - BUFFER_SIZE * MAX_CPU_NUMBER)
|
||||||
|
|
||||||
|
#ifndef MAP_ANONYMOUS
|
||||||
|
#define MAP_ANONYMOUS MAP_ANON
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
26
cpuid_arm.c
26
cpuid_arm.c
|
@ -74,7 +74,7 @@ int get_feature(char *search)
|
||||||
fclose(infile);
|
fclose(infile);
|
||||||
|
|
||||||
|
|
||||||
if( p == NULL ) return;
|
if( p == NULL ) return 0;
|
||||||
|
|
||||||
t = strtok(p," ");
|
t = strtok(p," ");
|
||||||
while( t = strtok(NULL," "))
|
while( t = strtok(NULL," "))
|
||||||
|
@ -115,6 +115,9 @@ int detect(void)
|
||||||
if (strstr(p, "0xc0f")) {
|
if (strstr(p, "0xc0f")) {
|
||||||
return CPU_CORTEXA15;
|
return CPU_CORTEXA15;
|
||||||
}
|
}
|
||||||
|
if (strstr(p, "0xd07")) {
|
||||||
|
return CPU_ARMV7; //ARMV8 on 32-bit
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -158,6 +161,27 @@ int detect(void)
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
p = (char *) NULL ;
|
||||||
|
infile = fopen("/proc/cpuinfo", "r");
|
||||||
|
|
||||||
|
while (fgets(buffer, sizeof(buffer), infile))
|
||||||
|
{
|
||||||
|
|
||||||
|
if ((!strncmp("CPU architecture", buffer, 16)))
|
||||||
|
{
|
||||||
|
p = strchr(buffer, ':') + 2;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
fclose(infile);
|
||||||
|
if(p != NULL) {
|
||||||
|
if (strstr(p, "8")) {
|
||||||
|
return CPU_ARMV7; //ARMV8 on 32-bit
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
return CPU_UNKNOWN;
|
return CPU_UNKNOWN;
|
||||||
|
|
106
cpuid_arm64.c
106
cpuid_arm64.c
|
@ -30,17 +30,26 @@
|
||||||
#define CPU_UNKNOWN 0
|
#define CPU_UNKNOWN 0
|
||||||
#define CPU_ARMV8 1
|
#define CPU_ARMV8 1
|
||||||
#define CPU_CORTEXA57 2
|
#define CPU_CORTEXA57 2
|
||||||
|
#define CPU_VULCAN 3
|
||||||
|
#define CPU_THUNDERX 4
|
||||||
|
#define CPU_THUNDERX2T99 5
|
||||||
|
|
||||||
static char *cpuname[] = {
|
static char *cpuname[] = {
|
||||||
"UNKNOWN",
|
"UNKNOWN",
|
||||||
"ARMV8" ,
|
"ARMV8" ,
|
||||||
"CORTEXA57"
|
"CORTEXA57",
|
||||||
|
"VULCAN",
|
||||||
|
"THUNDERX",
|
||||||
|
"THUNDERX2T99"
|
||||||
};
|
};
|
||||||
|
|
||||||
static char *cpuname_lower[] = {
|
static char *cpuname_lower[] = {
|
||||||
"unknown",
|
"unknown",
|
||||||
"armv8" ,
|
"armv8" ,
|
||||||
"cortexa57"
|
"cortexa57",
|
||||||
|
"vulcan",
|
||||||
|
"thunderx",
|
||||||
|
"thunderx2t99"
|
||||||
};
|
};
|
||||||
|
|
||||||
int get_feature(char *search)
|
int get_feature(char *search)
|
||||||
|
@ -85,25 +94,34 @@ int detect(void)
|
||||||
#ifdef linux
|
#ifdef linux
|
||||||
|
|
||||||
FILE *infile;
|
FILE *infile;
|
||||||
char buffer[512], *p;
|
char buffer[512], *p, *cpu_part = NULL, *cpu_implementer = NULL;
|
||||||
p = (char *) NULL ;
|
p = (char *) NULL ;
|
||||||
|
|
||||||
infile = fopen("/proc/cpuinfo", "r");
|
infile = fopen("/proc/cpuinfo", "r");
|
||||||
while (fgets(buffer, sizeof(buffer), infile))
|
while (fgets(buffer, sizeof(buffer), infile)) {
|
||||||
{
|
if ((cpu_part != NULL) && (cpu_implementer != NULL)) {
|
||||||
|
|
||||||
if (!strncmp("CPU part", buffer, 8))
|
|
||||||
{
|
|
||||||
p = strchr(buffer, ':') + 2;
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ((cpu_part == NULL) && !strncmp("CPU part", buffer, 8)) {
|
||||||
|
cpu_part = strchr(buffer, ':') + 2;
|
||||||
|
cpu_part = strdup(cpu_part);
|
||||||
|
} else if ((cpu_implementer == NULL) && !strncmp("CPU implementer", buffer, 15)) {
|
||||||
|
cpu_implementer = strchr(buffer, ':') + 2;
|
||||||
|
cpu_implementer = strdup(cpu_implementer);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fclose(infile);
|
fclose(infile);
|
||||||
if(p != NULL) {
|
if(cpu_part != NULL && cpu_implementer != NULL) {
|
||||||
if (strstr(p, "0xd07")) {
|
if (strstr(cpu_part, "0xd07") && strstr(cpu_implementer, "0x41"))
|
||||||
return CPU_CORTEXA57;
|
return CPU_CORTEXA57;
|
||||||
}
|
else if (strstr(cpu_part, "0x516") && strstr(cpu_implementer, "0x42"))
|
||||||
|
return CPU_VULCAN;
|
||||||
|
else if (strstr(cpu_part, "0x0a1") && strstr(cpu_implementer, "0x43"))
|
||||||
|
return CPU_THUNDERX;
|
||||||
|
else if (strstr(cpu_part, "0xFFF") && strstr(cpu_implementer, "0x43")) /* TODO */
|
||||||
|
return CPU_THUNDERX2T99;
|
||||||
}
|
}
|
||||||
|
|
||||||
p = (char *) NULL ;
|
p = (char *) NULL ;
|
||||||
|
@ -176,6 +194,28 @@ void get_cpuconfig(void)
|
||||||
printf("#define L2_ASSOCIATIVE 4\n");
|
printf("#define L2_ASSOCIATIVE 4\n");
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case CPU_VULCAN:
|
||||||
|
printf("#define VULCAN \n");
|
||||||
|
printf("#define HAVE_VFP \n");
|
||||||
|
printf("#define HAVE_VFPV3 \n");
|
||||||
|
printf("#define HAVE_NEON \n");
|
||||||
|
printf("#define HAVE_VFPV4 \n");
|
||||||
|
printf("#define L1_CODE_SIZE 32768 \n");
|
||||||
|
printf("#define L1_CODE_LINESIZE 64 \n");
|
||||||
|
printf("#define L1_CODE_ASSOCIATIVE 8 \n");
|
||||||
|
printf("#define L1_DATA_SIZE 32768 \n");
|
||||||
|
printf("#define L1_DATA_LINESIZE 64 \n");
|
||||||
|
printf("#define L1_DATA_ASSOCIATIVE 8 \n");
|
||||||
|
printf("#define L2_SIZE 262144 \n");
|
||||||
|
printf("#define L2_LINESIZE 64 \n");
|
||||||
|
printf("#define L2_ASSOCIATIVE 8 \n");
|
||||||
|
printf("#define L3_SIZE 33554432 \n");
|
||||||
|
printf("#define L3_LINESIZE 64 \n");
|
||||||
|
printf("#define L3_ASSOCIATIVE 32 \n");
|
||||||
|
printf("#define DTB_DEFAULT_ENTRIES 64 \n");
|
||||||
|
printf("#define DTB_SIZE 4096 \n");
|
||||||
|
break;
|
||||||
|
|
||||||
case CPU_CORTEXA57:
|
case CPU_CORTEXA57:
|
||||||
printf("#define CORTEXA57\n");
|
printf("#define CORTEXA57\n");
|
||||||
printf("#define HAVE_VFP\n");
|
printf("#define HAVE_VFP\n");
|
||||||
|
@ -191,6 +231,42 @@ void get_cpuconfig(void)
|
||||||
printf("#define L2_SIZE 2097152\n");
|
printf("#define L2_SIZE 2097152\n");
|
||||||
printf("#define L2_LINESIZE 64\n");
|
printf("#define L2_LINESIZE 64\n");
|
||||||
printf("#define L2_ASSOCIATIVE 16\n");
|
printf("#define L2_ASSOCIATIVE 16\n");
|
||||||
|
printf("#define DTB_DEFAULT_ENTRIES 64\n");
|
||||||
|
printf("#define DTB_SIZE 4096\n");
|
||||||
|
break;
|
||||||
|
|
||||||
|
case CPU_THUNDERX:
|
||||||
|
printf("#define ARMV8\n");
|
||||||
|
printf("#define THUNDERX\n");
|
||||||
|
printf("#define L1_DATA_SIZE 32768\n");
|
||||||
|
printf("#define L1_DATA_LINESIZE 128\n");
|
||||||
|
printf("#define L2_SIZE 16777216\n");
|
||||||
|
printf("#define L2_LINESIZE 128\n");
|
||||||
|
printf("#define DTB_DEFAULT_ENTRIES 64\n");
|
||||||
|
printf("#define DTB_SIZE 4096\n");
|
||||||
|
printf("#define L2_ASSOCIATIVE 16\n");
|
||||||
|
break;
|
||||||
|
|
||||||
|
case CPU_THUNDERX2T99:
|
||||||
|
printf("#define VULCAN \n");
|
||||||
|
printf("#define HAVE_VFP \n");
|
||||||
|
printf("#define HAVE_VFPV3 \n");
|
||||||
|
printf("#define HAVE_NEON \n");
|
||||||
|
printf("#define HAVE_VFPV4 \n");
|
||||||
|
printf("#define L1_CODE_SIZE 32768 \n");
|
||||||
|
printf("#define L1_CODE_LINESIZE 64 \n");
|
||||||
|
printf("#define L1_CODE_ASSOCIATIVE 8 \n");
|
||||||
|
printf("#define L1_DATA_SIZE 32768 \n");
|
||||||
|
printf("#define L1_DATA_LINESIZE 64 \n");
|
||||||
|
printf("#define L1_DATA_ASSOCIATIVE 8 \n");
|
||||||
|
printf("#define L2_SIZE 262144 \n");
|
||||||
|
printf("#define L2_LINESIZE 64 \n");
|
||||||
|
printf("#define L2_ASSOCIATIVE 8 \n");
|
||||||
|
printf("#define L3_SIZE 33554432 \n");
|
||||||
|
printf("#define L3_LINESIZE 64 \n");
|
||||||
|
printf("#define L3_ASSOCIATIVE 32 \n");
|
||||||
|
printf("#define DTB_DEFAULT_ENTRIES 64 \n");
|
||||||
|
printf("#define DTB_SIZE 4096 \n");
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
60
cpuid_mips.c
60
cpuid_mips.c
|
@ -71,15 +71,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
/*********************************************************************/
|
/*********************************************************************/
|
||||||
|
|
||||||
#define CPU_UNKNOWN 0
|
#define CPU_UNKNOWN 0
|
||||||
#define CPU_SICORTEX 1
|
#define CPU_P5600 1
|
||||||
#define CPU_LOONGSON3A 2
|
|
||||||
#define CPU_LOONGSON3B 3
|
|
||||||
|
|
||||||
static char *cpuname[] = {
|
static char *cpuname[] = {
|
||||||
"UNKOWN",
|
"UNKOWN",
|
||||||
"SICORTEX",
|
"P5600"
|
||||||
"LOONGSON3A",
|
|
||||||
"LOONGSON3B"
|
|
||||||
};
|
};
|
||||||
|
|
||||||
int detect(void){
|
int detect(void){
|
||||||
|
@ -120,7 +116,7 @@ int detect(void){
|
||||||
if (strstr(p, "loongson3a"))
|
if (strstr(p, "loongson3a"))
|
||||||
return CPU_LOONGSON3A;
|
return CPU_LOONGSON3A;
|
||||||
}else{
|
}else{
|
||||||
return CPU_SICORTEX;
|
return CPU_UNKNOWN;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
//Check model name for Loongson3
|
//Check model name for Loongson3
|
||||||
|
@ -149,64 +145,40 @@ char *get_corename(void){
|
||||||
}
|
}
|
||||||
|
|
||||||
void get_architecture(void){
|
void get_architecture(void){
|
||||||
printf("MIPS64");
|
printf("MIPS");
|
||||||
}
|
}
|
||||||
|
|
||||||
void get_subarchitecture(void){
|
void get_subarchitecture(void){
|
||||||
if(detect()==CPU_LOONGSON3A) {
|
if(detect()==CPU_P5600){
|
||||||
printf("LOONGSON3A");
|
printf("P5600");
|
||||||
}else if(detect()==CPU_LOONGSON3B){
|
|
||||||
printf("LOONGSON3B");
|
|
||||||
}else{
|
}else{
|
||||||
printf("SICORTEX");
|
printf("UNKNOWN");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void get_subdirname(void){
|
void get_subdirname(void){
|
||||||
printf("mips64");
|
printf("mips");
|
||||||
}
|
}
|
||||||
|
|
||||||
void get_cpuconfig(void){
|
void get_cpuconfig(void){
|
||||||
if(detect()==CPU_LOONGSON3A) {
|
if(detect()==CPU_P5600){
|
||||||
printf("#define LOONGSON3A\n");
|
printf("#define P5600\n");
|
||||||
printf("#define L1_DATA_SIZE 65536\n");
|
printf("#define L1_DATA_SIZE 65536\n");
|
||||||
printf("#define L1_DATA_LINESIZE 32\n");
|
printf("#define L1_DATA_LINESIZE 32\n");
|
||||||
printf("#define L2_SIZE 512488\n");
|
printf("#define L2_SIZE 1048576\n");
|
||||||
printf("#define L2_LINESIZE 32\n");
|
printf("#define L2_LINESIZE 32\n");
|
||||||
printf("#define DTB_DEFAULT_ENTRIES 64\n");
|
printf("#define DTB_DEFAULT_ENTRIES 64\n");
|
||||||
printf("#define DTB_SIZE 4096\n");
|
printf("#define DTB_SIZE 4096\n");
|
||||||
printf("#define L2_ASSOCIATIVE 4\n");
|
|
||||||
}else if(detect()==CPU_LOONGSON3B){
|
|
||||||
printf("#define LOONGSON3B\n");
|
|
||||||
printf("#define L1_DATA_SIZE 65536\n");
|
|
||||||
printf("#define L1_DATA_LINESIZE 32\n");
|
|
||||||
printf("#define L2_SIZE 512488\n");
|
|
||||||
printf("#define L2_LINESIZE 32\n");
|
|
||||||
printf("#define DTB_DEFAULT_ENTRIES 64\n");
|
|
||||||
printf("#define DTB_SIZE 4096\n");
|
|
||||||
printf("#define L2_ASSOCIATIVE 4\n");
|
|
||||||
}else{
|
|
||||||
printf("#define SICORTEX\n");
|
|
||||||
printf("#define L1_DATA_SIZE 32768\n");
|
|
||||||
printf("#define L1_DATA_LINESIZE 32\n");
|
|
||||||
printf("#define L2_SIZE 512488\n");
|
|
||||||
printf("#define L2_LINESIZE 32\n");
|
|
||||||
printf("#define DTB_DEFAULT_ENTRIES 32\n");
|
|
||||||
printf("#define DTB_SIZE 4096\n");
|
|
||||||
printf("#define L2_ASSOCIATIVE 8\n");
|
printf("#define L2_ASSOCIATIVE 8\n");
|
||||||
|
}else{
|
||||||
|
printf("#define UNKNOWN\n");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void get_libname(void){
|
void get_libname(void){
|
||||||
if(detect()==CPU_LOONGSON3A) {
|
if(detect()==CPU_P5600) {
|
||||||
printf("loongson3a\n");
|
printf("p5600\n");
|
||||||
}else if(detect()==CPU_LOONGSON3B) {
|
|
||||||
printf("loongson3b\n");
|
|
||||||
}else{
|
}else{
|
||||||
#ifdef __mips64
|
printf("mips\n");
|
||||||
printf("mips64\n");
|
|
||||||
#else
|
|
||||||
printf("mips32\n");
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,238 @@
|
||||||
|
/*****************************************************************************
|
||||||
|
Copyright (c) 2011-2014, The OpenBLAS Project
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
|
||||||
|
1. Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in
|
||||||
|
the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
3. Neither the name of the OpenBLAS project nor the names of
|
||||||
|
its contributors may be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written
|
||||||
|
permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||||
|
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||||
|
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
**********************************************************************************/
|
||||||
|
|
||||||
|
|
||||||
|
/*********************************************************************/
|
||||||
|
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
||||||
|
/* All rights reserved. */
|
||||||
|
/* */
|
||||||
|
/* Redistribution and use in source and binary forms, with or */
|
||||||
|
/* without modification, are permitted provided that the following */
|
||||||
|
/* conditions are met: */
|
||||||
|
/* */
|
||||||
|
/* 1. Redistributions of source code must retain the above */
|
||||||
|
/* copyright notice, this list of conditions and the following */
|
||||||
|
/* disclaimer. */
|
||||||
|
/* */
|
||||||
|
/* 2. Redistributions in binary form must reproduce the above */
|
||||||
|
/* copyright notice, this list of conditions and the following */
|
||||||
|
/* disclaimer in the documentation and/or other materials */
|
||||||
|
/* provided with the distribution. */
|
||||||
|
/* */
|
||||||
|
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
|
||||||
|
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
|
||||||
|
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
|
||||||
|
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
|
||||||
|
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
|
||||||
|
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
|
||||||
|
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
|
||||||
|
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
|
||||||
|
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
|
||||||
|
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
|
||||||
|
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
|
||||||
|
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
|
||||||
|
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
||||||
|
/* POSSIBILITY OF SUCH DAMAGE. */
|
||||||
|
/* */
|
||||||
|
/* The views and conclusions contained in the software and */
|
||||||
|
/* documentation are those of the authors and should not be */
|
||||||
|
/* interpreted as representing official policies, either expressed */
|
||||||
|
/* or implied, of The University of Texas at Austin. */
|
||||||
|
/*********************************************************************/
|
||||||
|
|
||||||
|
#define CPU_UNKNOWN 0
|
||||||
|
#define CPU_SICORTEX 1
|
||||||
|
#define CPU_LOONGSON3A 2
|
||||||
|
#define CPU_LOONGSON3B 3
|
||||||
|
#define CPU_I6400 4
|
||||||
|
#define CPU_P6600 5
|
||||||
|
|
||||||
|
static char *cpuname[] = {
|
||||||
|
"UNKOWN",
|
||||||
|
"SICORTEX",
|
||||||
|
"LOONGSON3A",
|
||||||
|
"LOONGSON3B",
|
||||||
|
"I6400",
|
||||||
|
"P6600"
|
||||||
|
};
|
||||||
|
|
||||||
|
int detect(void){
|
||||||
|
|
||||||
|
#ifdef linux
|
||||||
|
FILE *infile;
|
||||||
|
char buffer[512], *p;
|
||||||
|
|
||||||
|
p = (char *)NULL;
|
||||||
|
infile = fopen("/proc/cpuinfo", "r");
|
||||||
|
while (fgets(buffer, sizeof(buffer), infile)){
|
||||||
|
if (!strncmp("cpu", buffer, 3)){
|
||||||
|
p = strchr(buffer, ':') + 2;
|
||||||
|
#if 0
|
||||||
|
fprintf(stderr, "%s\n", p);
|
||||||
|
#endif
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fclose(infile);
|
||||||
|
|
||||||
|
if(p != NULL){
|
||||||
|
if (strstr(p, "Loongson-3A")){
|
||||||
|
return CPU_LOONGSON3A;
|
||||||
|
}else if(strstr(p, "Loongson-3B")){
|
||||||
|
return CPU_LOONGSON3B;
|
||||||
|
}else if (strstr(p, "Loongson-3")){
|
||||||
|
infile = fopen("/proc/cpuinfo", "r");
|
||||||
|
p = (char *)NULL;
|
||||||
|
while (fgets(buffer, sizeof(buffer), infile)){
|
||||||
|
if (!strncmp("system type", buffer, 11)){
|
||||||
|
p = strchr(buffer, ':') + 2;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
fclose(infile);
|
||||||
|
if (strstr(p, "loongson3a"))
|
||||||
|
return CPU_LOONGSON3A;
|
||||||
|
}else{
|
||||||
|
return CPU_SICORTEX;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
//Check model name for Loongson3
|
||||||
|
infile = fopen("/proc/cpuinfo", "r");
|
||||||
|
p = (char *)NULL;
|
||||||
|
while (fgets(buffer, sizeof(buffer), infile)){
|
||||||
|
if (!strncmp("model name", buffer, 10)){
|
||||||
|
p = strchr(buffer, ':') + 2;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
fclose(infile);
|
||||||
|
if(p != NULL){
|
||||||
|
if (strstr(p, "Loongson-3A")){
|
||||||
|
return CPU_LOONGSON3A;
|
||||||
|
}else if(strstr(p, "Loongson-3B")){
|
||||||
|
return CPU_LOONGSON3B;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
return CPU_UNKNOWN;
|
||||||
|
}
|
||||||
|
|
||||||
|
char *get_corename(void){
|
||||||
|
return cpuname[detect()];
|
||||||
|
}
|
||||||
|
|
||||||
|
void get_architecture(void){
|
||||||
|
printf("MIPS64");
|
||||||
|
}
|
||||||
|
|
||||||
|
void get_subarchitecture(void){
|
||||||
|
if(detect()==CPU_LOONGSON3A) {
|
||||||
|
printf("LOONGSON3A");
|
||||||
|
}else if(detect()==CPU_LOONGSON3B){
|
||||||
|
printf("LOONGSON3B");
|
||||||
|
}else if(detect()==CPU_I6400){
|
||||||
|
printf("I6400");
|
||||||
|
}else if(detect()==CPU_P6600){
|
||||||
|
printf("P6600");
|
||||||
|
}else{
|
||||||
|
printf("SICORTEX");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void get_subdirname(void){
|
||||||
|
printf("mips64");
|
||||||
|
}
|
||||||
|
|
||||||
|
void get_cpuconfig(void){
|
||||||
|
if(detect()==CPU_LOONGSON3A) {
|
||||||
|
printf("#define LOONGSON3A\n");
|
||||||
|
printf("#define L1_DATA_SIZE 65536\n");
|
||||||
|
printf("#define L1_DATA_LINESIZE 32\n");
|
||||||
|
printf("#define L2_SIZE 512488\n");
|
||||||
|
printf("#define L2_LINESIZE 32\n");
|
||||||
|
printf("#define DTB_DEFAULT_ENTRIES 64\n");
|
||||||
|
printf("#define DTB_SIZE 4096\n");
|
||||||
|
printf("#define L2_ASSOCIATIVE 4\n");
|
||||||
|
}else if(detect()==CPU_LOONGSON3B){
|
||||||
|
printf("#define LOONGSON3B\n");
|
||||||
|
printf("#define L1_DATA_SIZE 65536\n");
|
||||||
|
printf("#define L1_DATA_LINESIZE 32\n");
|
||||||
|
printf("#define L2_SIZE 512488\n");
|
||||||
|
printf("#define L2_LINESIZE 32\n");
|
||||||
|
printf("#define DTB_DEFAULT_ENTRIES 64\n");
|
||||||
|
printf("#define DTB_SIZE 4096\n");
|
||||||
|
printf("#define L2_ASSOCIATIVE 4\n");
|
||||||
|
}else if(detect()==CPU_I6400){
|
||||||
|
printf("#define I6400\n");
|
||||||
|
printf("#define L1_DATA_SIZE 65536\n");
|
||||||
|
printf("#define L1_DATA_LINESIZE 32\n");
|
||||||
|
printf("#define L2_SIZE 1048576\n");
|
||||||
|
printf("#define L2_LINESIZE 32\n");
|
||||||
|
printf("#define DTB_DEFAULT_ENTRIES 64\n");
|
||||||
|
printf("#define DTB_SIZE 4096\n");
|
||||||
|
printf("#define L2_ASSOCIATIVE 8\n");
|
||||||
|
}else if(detect()==CPU_P6600){
|
||||||
|
printf("#define P6600\n");
|
||||||
|
printf("#define L1_DATA_SIZE 65536\n");
|
||||||
|
printf("#define L1_DATA_LINESIZE 32\n");
|
||||||
|
printf("#define L2_SIZE 1048576\n");
|
||||||
|
printf("#define L2_LINESIZE 32\n");
|
||||||
|
printf("#define DTB_DEFAULT_ENTRIES 64\n");
|
||||||
|
printf("#define DTB_SIZE 4096\n");
|
||||||
|
printf("#define L2_ASSOCIATIVE 8\n");
|
||||||
|
}else{
|
||||||
|
printf("#define SICORTEX\n");
|
||||||
|
printf("#define L1_DATA_SIZE 32768\n");
|
||||||
|
printf("#define L1_DATA_LINESIZE 32\n");
|
||||||
|
printf("#define L2_SIZE 512488\n");
|
||||||
|
printf("#define L2_LINESIZE 32\n");
|
||||||
|
printf("#define DTB_DEFAULT_ENTRIES 32\n");
|
||||||
|
printf("#define DTB_SIZE 4096\n");
|
||||||
|
printf("#define L2_ASSOCIATIVE 8\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void get_libname(void){
|
||||||
|
if(detect()==CPU_LOONGSON3A) {
|
||||||
|
printf("loongson3a\n");
|
||||||
|
}else if(detect()==CPU_LOONGSON3B) {
|
||||||
|
printf("loongson3b\n");
|
||||||
|
}else if(detect()==CPU_I6400) {
|
||||||
|
printf("i6400\n");
|
||||||
|
}else if(detect()==CPU_P6600) {
|
||||||
|
printf("p6600\n");
|
||||||
|
}else{
|
||||||
|
printf("mips64\n");
|
||||||
|
}
|
||||||
|
}
|
|
@ -55,6 +55,7 @@
|
||||||
#define CPUTYPE_POWER6 5
|
#define CPUTYPE_POWER6 5
|
||||||
#define CPUTYPE_CELL 6
|
#define CPUTYPE_CELL 6
|
||||||
#define CPUTYPE_PPCG4 7
|
#define CPUTYPE_PPCG4 7
|
||||||
|
#define CPUTYPE_POWER8 8
|
||||||
|
|
||||||
char *cpuname[] = {
|
char *cpuname[] = {
|
||||||
"UNKNOWN",
|
"UNKNOWN",
|
||||||
|
@ -65,6 +66,7 @@ char *cpuname[] = {
|
||||||
"POWER6",
|
"POWER6",
|
||||||
"CELL",
|
"CELL",
|
||||||
"PPCG4",
|
"PPCG4",
|
||||||
|
"POWER8"
|
||||||
};
|
};
|
||||||
|
|
||||||
char *lowercpuname[] = {
|
char *lowercpuname[] = {
|
||||||
|
@ -76,6 +78,7 @@ char *lowercpuname[] = {
|
||||||
"power6",
|
"power6",
|
||||||
"cell",
|
"cell",
|
||||||
"ppcg4",
|
"ppcg4",
|
||||||
|
"power8"
|
||||||
};
|
};
|
||||||
|
|
||||||
char *corename[] = {
|
char *corename[] = {
|
||||||
|
@ -87,6 +90,7 @@ char *corename[] = {
|
||||||
"POWER6",
|
"POWER6",
|
||||||
"CELL",
|
"CELL",
|
||||||
"PPCG4",
|
"PPCG4",
|
||||||
|
"POWER8"
|
||||||
};
|
};
|
||||||
|
|
||||||
int detect(void){
|
int detect(void){
|
||||||
|
@ -115,7 +119,7 @@ int detect(void){
|
||||||
if (!strncasecmp(p, "POWER5", 6)) return CPUTYPE_POWER5;
|
if (!strncasecmp(p, "POWER5", 6)) return CPUTYPE_POWER5;
|
||||||
if (!strncasecmp(p, "POWER6", 6)) return CPUTYPE_POWER6;
|
if (!strncasecmp(p, "POWER6", 6)) return CPUTYPE_POWER6;
|
||||||
if (!strncasecmp(p, "POWER7", 6)) return CPUTYPE_POWER6;
|
if (!strncasecmp(p, "POWER7", 6)) return CPUTYPE_POWER6;
|
||||||
if (!strncasecmp(p, "POWER8", 6)) return CPUTYPE_POWER6;
|
if (!strncasecmp(p, "POWER8", 6)) return CPUTYPE_POWER8;
|
||||||
if (!strncasecmp(p, "Cell", 4)) return CPUTYPE_CELL;
|
if (!strncasecmp(p, "Cell", 4)) return CPUTYPE_CELL;
|
||||||
if (!strncasecmp(p, "7447", 4)) return CPUTYPE_PPCG4;
|
if (!strncasecmp(p, "7447", 4)) return CPUTYPE_PPCG4;
|
||||||
|
|
||||||
|
|
92
cpuid_x86.c
92
cpuid_x86.c
|
@ -636,6 +636,13 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){
|
||||||
LD1.associative = 8;
|
LD1.associative = 8;
|
||||||
LD1.linesize = 64;
|
LD1.linesize = 64;
|
||||||
break;
|
break;
|
||||||
|
case 0x63 :
|
||||||
|
DTB.size = 2048;
|
||||||
|
DTB.associative = 4;
|
||||||
|
DTB.linesize = 32;
|
||||||
|
LDTB.size = 4096;
|
||||||
|
LDTB.associative= 4;
|
||||||
|
LDTB.linesize = 32;
|
||||||
case 0x66 :
|
case 0x66 :
|
||||||
LD1.size = 8;
|
LD1.size = 8;
|
||||||
LD1.associative = 4;
|
LD1.associative = 4;
|
||||||
|
@ -667,6 +674,13 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){
|
||||||
LC1.size = 64;
|
LC1.size = 64;
|
||||||
LC1.associative = 8;
|
LC1.associative = 8;
|
||||||
break;
|
break;
|
||||||
|
case 0x76 :
|
||||||
|
ITB.size = 2048;
|
||||||
|
ITB.associative = 0;
|
||||||
|
ITB.linesize = 8;
|
||||||
|
LITB.size = 4096;
|
||||||
|
LITB.associative= 0;
|
||||||
|
LITB.linesize = 8;
|
||||||
case 0x77 :
|
case 0x77 :
|
||||||
LC1.size = 16;
|
LC1.size = 16;
|
||||||
LC1.associative = 4;
|
LC1.associative = 4;
|
||||||
|
@ -1110,6 +1124,9 @@ int get_cpuname(void){
|
||||||
break;
|
break;
|
||||||
case 3:
|
case 3:
|
||||||
switch (model) {
|
switch (model) {
|
||||||
|
case 7:
|
||||||
|
// Bay Trail
|
||||||
|
return CPUTYPE_ATOM;
|
||||||
case 10:
|
case 10:
|
||||||
case 14:
|
case 14:
|
||||||
// Ivy Bridge
|
// Ivy Bridge
|
||||||
|
@ -1172,6 +1189,11 @@ int get_cpuname(void){
|
||||||
#endif
|
#endif
|
||||||
else
|
else
|
||||||
return CPUTYPE_NEHALEM;
|
return CPUTYPE_NEHALEM;
|
||||||
|
case 12:
|
||||||
|
// Braswell
|
||||||
|
case 13:
|
||||||
|
// Avoton
|
||||||
|
return CPUTYPE_NEHALEM;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case 5:
|
case 5:
|
||||||
|
@ -1197,8 +1219,35 @@ int get_cpuname(void){
|
||||||
#endif
|
#endif
|
||||||
else
|
else
|
||||||
return CPUTYPE_NEHALEM;
|
return CPUTYPE_NEHALEM;
|
||||||
|
case 7:
|
||||||
|
// Xeon Phi Knights Landing
|
||||||
|
if(support_avx())
|
||||||
|
#ifndef NO_AVX2
|
||||||
|
return CPUTYPE_HASWELL;
|
||||||
|
#else
|
||||||
|
return CPUTYPE_SANDYBRIDGE;
|
||||||
|
#endif
|
||||||
|
else
|
||||||
|
return CPUTYPE_NEHALEM;
|
||||||
|
case 12:
|
||||||
|
// Apollo Lake
|
||||||
|
return CPUTYPE_NEHALEM;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
case 9:
|
||||||
|
case 8:
|
||||||
|
switch (model) {
|
||||||
|
case 14: // Kaby Lake
|
||||||
|
if(support_avx())
|
||||||
|
#ifndef NO_AVX2
|
||||||
|
return CPUTYPE_HASWELL;
|
||||||
|
#else
|
||||||
|
return CPUTYPE_SANDYBRIDGE;
|
||||||
|
#endif
|
||||||
|
else
|
||||||
|
return CPUTYPE_NEHALEM;
|
||||||
|
}
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case 0x7:
|
case 0x7:
|
||||||
|
@ -1229,6 +1278,7 @@ int get_cpuname(void){
|
||||||
case 2:
|
case 2:
|
||||||
return CPUTYPE_OPTERON;
|
return CPUTYPE_OPTERON;
|
||||||
case 1:
|
case 1:
|
||||||
|
case 3:
|
||||||
case 10:
|
case 10:
|
||||||
return CPUTYPE_BARCELONA;
|
return CPUTYPE_BARCELONA;
|
||||||
case 6:
|
case 6:
|
||||||
|
@ -1245,6 +1295,11 @@ int get_cpuname(void){
|
||||||
return CPUTYPE_PILEDRIVER;
|
return CPUTYPE_PILEDRIVER;
|
||||||
else
|
else
|
||||||
return CPUTYPE_BARCELONA; //OS don't support AVX.
|
return CPUTYPE_BARCELONA; //OS don't support AVX.
|
||||||
|
case 5: // New EXCAVATOR CPUS
|
||||||
|
if(support_avx())
|
||||||
|
return CPUTYPE_EXCAVATOR;
|
||||||
|
else
|
||||||
|
return CPUTYPE_BARCELONA; //OS don't support AVX.
|
||||||
case 0:
|
case 0:
|
||||||
switch(exmodel){
|
switch(exmodel){
|
||||||
case 1: //AMD Trinity
|
case 1: //AMD Trinity
|
||||||
|
@ -1674,6 +1729,11 @@ int get_coretype(void){
|
||||||
#endif
|
#endif
|
||||||
else
|
else
|
||||||
return CORE_NEHALEM;
|
return CORE_NEHALEM;
|
||||||
|
case 12:
|
||||||
|
// Braswell
|
||||||
|
case 13:
|
||||||
|
// Avoton
|
||||||
|
return CORE_NEHALEM;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case 5:
|
case 5:
|
||||||
|
@ -1699,8 +1759,32 @@ int get_coretype(void){
|
||||||
#endif
|
#endif
|
||||||
else
|
else
|
||||||
return CORE_NEHALEM;
|
return CORE_NEHALEM;
|
||||||
}
|
case 7:
|
||||||
|
// Phi Knights Landing
|
||||||
|
if(support_avx())
|
||||||
|
#ifndef NO_AVX2
|
||||||
|
return CORE_HASWELL;
|
||||||
|
#else
|
||||||
|
return CORE_SANDYBRIDGE;
|
||||||
|
#endif
|
||||||
|
else
|
||||||
|
return CORE_NEHALEM;
|
||||||
|
case 12:
|
||||||
|
// Apollo Lake
|
||||||
|
return CORE_NEHALEM;
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
|
case 9:
|
||||||
|
case 8:
|
||||||
|
if (model == 14) // Kaby Lake
|
||||||
|
if(support_avx())
|
||||||
|
#ifndef NO_AVX2
|
||||||
|
return CORE_HASWELL;
|
||||||
|
#else
|
||||||
|
return CORE_SANDYBRIDGE;
|
||||||
|
#endif
|
||||||
|
else
|
||||||
|
return CORE_NEHALEM;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
@ -1730,7 +1814,11 @@ int get_coretype(void){
|
||||||
return CORE_PILEDRIVER;
|
return CORE_PILEDRIVER;
|
||||||
else
|
else
|
||||||
return CORE_BARCELONA; //OS don't support AVX.
|
return CORE_BARCELONA; //OS don't support AVX.
|
||||||
|
case 5: // New EXCAVATOR
|
||||||
|
if(support_avx())
|
||||||
|
return CORE_EXCAVATOR;
|
||||||
|
else
|
||||||
|
return CORE_BARCELONA; //OS don't support AVX.
|
||||||
case 0:
|
case 0:
|
||||||
switch(exmodel){
|
switch(exmodel){
|
||||||
case 1: //AMD Trinity
|
case 1: //AMD Trinity
|
||||||
|
|
|
@ -0,0 +1,93 @@
|
||||||
|
/**************************************************************************
|
||||||
|
Copyright (c) 2016, The OpenBLAS Project
|
||||||
|
All rights reserved.
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
1. Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in
|
||||||
|
the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
3. Neither the name of the OpenBLAS project nor the names of
|
||||||
|
its contributors may be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written permission.
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||||
|
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||||
|
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*****************************************************************************/
|
||||||
|
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
#define CPU_GENERIC 0
|
||||||
|
#define CPU_Z13 1
|
||||||
|
|
||||||
|
static char *cpuname[] = {
|
||||||
|
"ZARCH_GENERIC",
|
||||||
|
"Z13"
|
||||||
|
};
|
||||||
|
|
||||||
|
static char *cpuname_lower[] = {
|
||||||
|
"zarch_generic",
|
||||||
|
"z13"
|
||||||
|
};
|
||||||
|
|
||||||
|
int detect(void)
|
||||||
|
{
|
||||||
|
// return CPU_GENERIC;
|
||||||
|
return CPU_Z13;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
void get_libname(void)
|
||||||
|
{
|
||||||
|
|
||||||
|
int d = detect();
|
||||||
|
printf("%s", cpuname_lower[d]);
|
||||||
|
}
|
||||||
|
|
||||||
|
char *get_corename(void)
|
||||||
|
{
|
||||||
|
return cpuname[detect()];
|
||||||
|
}
|
||||||
|
|
||||||
|
void get_architecture(void)
|
||||||
|
{
|
||||||
|
printf("ZARCH");
|
||||||
|
}
|
||||||
|
|
||||||
|
void get_subarchitecture(void)
|
||||||
|
{
|
||||||
|
int d = detect();
|
||||||
|
printf("%s", cpuname[d]);
|
||||||
|
}
|
||||||
|
|
||||||
|
void get_subdirname(void)
|
||||||
|
{
|
||||||
|
printf("zarch");
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void get_cpuconfig(void)
|
||||||
|
{
|
||||||
|
|
||||||
|
int d = detect();
|
||||||
|
switch (d){
|
||||||
|
case CPU_GENERIC:
|
||||||
|
printf("#define ZARCH_GENERIC\n");
|
||||||
|
printf("#define DTB_DEFAULT_ENTRIES 64\n");
|
||||||
|
break;
|
||||||
|
case CPU_Z13:
|
||||||
|
printf("#define Z13\n");
|
||||||
|
printf("#define DTB_DEFAULT_ENTRIES 64\n");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
6
ctest.c
6
ctest.c
|
@ -105,12 +105,16 @@ ARCH_X86_64
|
||||||
ARCH_POWER
|
ARCH_POWER
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if defined(__s390x__) || defined(__zarch__)
|
||||||
|
ARCH_ZARCH
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef __mips64
|
#ifdef __mips64
|
||||||
ARCH_MIPS64
|
ARCH_MIPS64
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__mips32) || defined(__mips)
|
#if defined(__mips32) || defined(__mips)
|
||||||
ARCH_MIPS32
|
ARCH_MIPS
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef __alpha
|
#ifdef __alpha
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
include_directories(${CMAKE_SOURCE_DIR})
|
include_directories(${PROJECT_SOURCE_DIR})
|
||||||
|
|
||||||
enable_language(Fortran)
|
enable_language(Fortran)
|
||||||
|
|
||||||
|
|
|
@ -42,6 +42,7 @@ ztestl3o_3m = c_zblas3_3m.o c_z3chke_3m.o auxiliary.o c_xerbla.o constant.o
|
||||||
all :: all1 all2 all3
|
all :: all1 all2 all3
|
||||||
|
|
||||||
all1: xscblat1 xdcblat1 xccblat1 xzcblat1
|
all1: xscblat1 xdcblat1 xccblat1 xzcblat1
|
||||||
|
ifndef CROSS
|
||||||
ifeq ($(USE_OPENMP), 1)
|
ifeq ($(USE_OPENMP), 1)
|
||||||
OMP_NUM_THREADS=2 ./xscblat1
|
OMP_NUM_THREADS=2 ./xscblat1
|
||||||
OMP_NUM_THREADS=2 ./xdcblat1
|
OMP_NUM_THREADS=2 ./xdcblat1
|
||||||
|
@ -53,8 +54,10 @@ else
|
||||||
OPENBLAS_NUM_THREADS=2 ./xccblat1
|
OPENBLAS_NUM_THREADS=2 ./xccblat1
|
||||||
OPENBLAS_NUM_THREADS=2 ./xzcblat1
|
OPENBLAS_NUM_THREADS=2 ./xzcblat1
|
||||||
endif
|
endif
|
||||||
|
endif
|
||||||
|
|
||||||
all2: xscblat2 xdcblat2 xccblat2 xzcblat2
|
all2: xscblat2 xdcblat2 xccblat2 xzcblat2
|
||||||
|
ifndef CROSS
|
||||||
ifeq ($(USE_OPENMP), 1)
|
ifeq ($(USE_OPENMP), 1)
|
||||||
OMP_NUM_THREADS=2 ./xscblat2 < sin2
|
OMP_NUM_THREADS=2 ./xscblat2 < sin2
|
||||||
OMP_NUM_THREADS=2 ./xdcblat2 < din2
|
OMP_NUM_THREADS=2 ./xdcblat2 < din2
|
||||||
|
@ -66,8 +69,10 @@ else
|
||||||
OPENBLAS_NUM_THREADS=2 ./xccblat2 < cin2
|
OPENBLAS_NUM_THREADS=2 ./xccblat2 < cin2
|
||||||
OPENBLAS_NUM_THREADS=2 ./xzcblat2 < zin2
|
OPENBLAS_NUM_THREADS=2 ./xzcblat2 < zin2
|
||||||
endif
|
endif
|
||||||
|
endif
|
||||||
|
|
||||||
all3: xscblat3 xdcblat3 xccblat3 xzcblat3
|
all3: xscblat3 xdcblat3 xccblat3 xzcblat3
|
||||||
|
ifndef CROSS
|
||||||
ifeq ($(USE_OPENMP), 1)
|
ifeq ($(USE_OPENMP), 1)
|
||||||
OMP_NUM_THREADS=2 ./xscblat3 < sin3
|
OMP_NUM_THREADS=2 ./xscblat3 < sin3
|
||||||
OMP_NUM_THREADS=2 ./xdcblat3 < din3
|
OMP_NUM_THREADS=2 ./xdcblat3 < din3
|
||||||
|
@ -88,6 +93,7 @@ else
|
||||||
OPENBLAS_NUM_THREADS=2 ./xccblat3_3m < cin3_3m
|
OPENBLAS_NUM_THREADS=2 ./xccblat3_3m < cin3_3m
|
||||||
OPENBLAS_NUM_THREADS=2 ./xzcblat3_3m < zin3_3m
|
OPENBLAS_NUM_THREADS=2 ./xzcblat3_3m < zin3_3m
|
||||||
endif
|
endif
|
||||||
|
endif
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1365,8 +1365,9 @@
|
||||||
*
|
*
|
||||||
150 CONTINUE
|
150 CONTINUE
|
||||||
WRITE( NOUT, FMT = 9996 )SNAME
|
WRITE( NOUT, FMT = 9996 )SNAME
|
||||||
CALL CPRCN3( NTRA, NC, SNAME, IORDER, SIDE, UPLO, TRANSA, DIAG,
|
IF( TRACE )
|
||||||
$ M, N, ALPHA, LDA, LDB)
|
$ CALL CPRCN3( NTRA, NC, SNAME, IORDER, SIDE, UPLO, TRANSA, DIAG,
|
||||||
|
$ M, N, ALPHA, LDA, LDB)
|
||||||
*
|
*
|
||||||
160 CONTINUE
|
160 CONTINUE
|
||||||
RETURN
|
RETURN
|
||||||
|
|
|
@ -1365,8 +1365,9 @@
|
||||||
*
|
*
|
||||||
150 CONTINUE
|
150 CONTINUE
|
||||||
WRITE( NOUT, FMT = 9996 )SNAME
|
WRITE( NOUT, FMT = 9996 )SNAME
|
||||||
CALL CPRCN3( NTRA, NC, SNAME, IORDER, SIDE, UPLO, TRANSA, DIAG,
|
IF( TRACE )
|
||||||
$ M, N, ALPHA, LDA, LDB)
|
$ CALL CPRCN3( NTRA, NC, SNAME, IORDER, SIDE, UPLO, TRANSA, DIAG,
|
||||||
|
$ M, N, ALPHA, LDA, LDB)
|
||||||
*
|
*
|
||||||
160 CONTINUE
|
160 CONTINUE
|
||||||
RETURN
|
RETURN
|
||||||
|
|
|
@ -1335,8 +1335,9 @@
|
||||||
*
|
*
|
||||||
150 CONTINUE
|
150 CONTINUE
|
||||||
WRITE( NOUT, FMT = 9996 )SNAME
|
WRITE( NOUT, FMT = 9996 )SNAME
|
||||||
CALL DPRCN3( NTRA, NC, SNAME, IORDER, SIDE, UPLO, TRANSA, DIAG,
|
IF( TRACE )
|
||||||
$ M, N, ALPHA, LDA, LDB)
|
$ CALL DPRCN3( NTRA, NC, SNAME, IORDER, SIDE, UPLO, TRANSA, DIAG,
|
||||||
|
$ M, N, ALPHA, LDA, LDB)
|
||||||
*
|
*
|
||||||
160 CONTINUE
|
160 CONTINUE
|
||||||
RETURN
|
RETURN
|
||||||
|
|
|
@ -1339,8 +1339,9 @@
|
||||||
*
|
*
|
||||||
150 CONTINUE
|
150 CONTINUE
|
||||||
WRITE( NOUT, FMT = 9996 )SNAME
|
WRITE( NOUT, FMT = 9996 )SNAME
|
||||||
CALL SPRCN3( NTRA, NC, SNAME, IORDER, SIDE, UPLO, TRANSA, DIAG,
|
IF( TRACE )
|
||||||
$ M, N, ALPHA, LDA, LDB)
|
$ CALL SPRCN3( NTRA, NC, SNAME, IORDER, SIDE, UPLO, TRANSA, DIAG,
|
||||||
|
$ M, N, ALPHA, LDA, LDB)
|
||||||
*
|
*
|
||||||
160 CONTINUE
|
160 CONTINUE
|
||||||
RETURN
|
RETURN
|
||||||
|
|
|
@ -1350,7 +1350,7 @@
|
||||||
*
|
*
|
||||||
* Call the subroutine.
|
* Call the subroutine.
|
||||||
*
|
*
|
||||||
IF( SNAME( 4: 5 ).EQ.'mv' )THEN
|
IF( SNAME( 10: 11 ).EQ.'mv' )THEN
|
||||||
IF( FULL )THEN
|
IF( FULL )THEN
|
||||||
IF( TRACE )
|
IF( TRACE )
|
||||||
$ WRITE( NTRA, FMT = 9993 )NC, SNAME,
|
$ WRITE( NTRA, FMT = 9993 )NC, SNAME,
|
||||||
|
@ -1376,7 +1376,7 @@
|
||||||
CALL CZTPMV( IORDER, UPLO, TRANS, DIAG,
|
CALL CZTPMV( IORDER, UPLO, TRANS, DIAG,
|
||||||
$ N, AA, XX, INCX )
|
$ N, AA, XX, INCX )
|
||||||
END IF
|
END IF
|
||||||
ELSE IF( SNAME( 4: 5 ).EQ.'sv' )THEN
|
ELSE IF( SNAME( 10: 11 ).EQ.'sv' )THEN
|
||||||
IF( FULL )THEN
|
IF( FULL )THEN
|
||||||
IF( TRACE )
|
IF( TRACE )
|
||||||
$ WRITE( NTRA, FMT = 9993 )NC, SNAME,
|
$ WRITE( NTRA, FMT = 9993 )NC, SNAME,
|
||||||
|
@ -1465,7 +1465,7 @@
|
||||||
END IF
|
END IF
|
||||||
*
|
*
|
||||||
IF( .NOT.NULL )THEN
|
IF( .NOT.NULL )THEN
|
||||||
IF( SNAME( 4: 5 ).EQ.'mv' )THEN
|
IF( SNAME( 10: 11 ).EQ.'mv' )THEN
|
||||||
*
|
*
|
||||||
* Check the result.
|
* Check the result.
|
||||||
*
|
*
|
||||||
|
@ -1473,7 +1473,7 @@
|
||||||
$ INCX, ZERO, Z, INCX, XT, G,
|
$ INCX, ZERO, Z, INCX, XT, G,
|
||||||
$ XX, EPS, ERR, FATAL, NOUT,
|
$ XX, EPS, ERR, FATAL, NOUT,
|
||||||
$ .TRUE. )
|
$ .TRUE. )
|
||||||
ELSE IF( SNAME( 4: 5 ).EQ.'sv' )THEN
|
ELSE IF( SNAME( 10: 11 ).EQ.'sv' )THEN
|
||||||
*
|
*
|
||||||
* Compute approximation to original vector.
|
* Compute approximation to original vector.
|
||||||
*
|
*
|
||||||
|
@ -1611,7 +1611,7 @@
|
||||||
* .. Common blocks ..
|
* .. Common blocks ..
|
||||||
COMMON /INFOC/INFOT, NOUTC, OK
|
COMMON /INFOC/INFOT, NOUTC, OK
|
||||||
* .. Executable Statements ..
|
* .. Executable Statements ..
|
||||||
CONJ = SNAME( 5: 5 ).EQ.'c'
|
CONJ = SNAME( 11: 11 ).EQ.'c'
|
||||||
* Define the number of arguments.
|
* Define the number of arguments.
|
||||||
NARGS = 9
|
NARGS = 9
|
||||||
*
|
*
|
||||||
|
|
|
@ -1366,8 +1366,9 @@
|
||||||
*
|
*
|
||||||
150 CONTINUE
|
150 CONTINUE
|
||||||
WRITE( NOUT, FMT = 9996 )SNAME
|
WRITE( NOUT, FMT = 9996 )SNAME
|
||||||
CALL ZPRCN3( NTRA, NC, SNAME, IORDER, SIDE, UPLO, TRANSA, DIAG,
|
IF( TRACE )
|
||||||
$ M, N, ALPHA, LDA, LDB)
|
$ CALL ZPRCN3( NTRA, NC, SNAME, IORDER, SIDE, UPLO, TRANSA, DIAG,
|
||||||
|
$ M, N, ALPHA, LDA, LDB)
|
||||||
*
|
*
|
||||||
160 CONTINUE
|
160 CONTINUE
|
||||||
RETURN
|
RETURN
|
||||||
|
|
|
@ -1366,8 +1366,9 @@
|
||||||
*
|
*
|
||||||
150 CONTINUE
|
150 CONTINUE
|
||||||
WRITE( NOUT, FMT = 9996 )SNAME
|
WRITE( NOUT, FMT = 9996 )SNAME
|
||||||
CALL ZPRCN3( NTRA, NC, SNAME, IORDER, SIDE, UPLO, TRANSA, DIAG,
|
IF( TRACE )
|
||||||
$ M, N, ALPHA, LDA, LDB)
|
$ CALL ZPRCN3( NTRA, NC, SNAME, IORDER, SIDE, UPLO, TRANSA, DIAG,
|
||||||
|
$ M, N, ALPHA, LDA, LDB)
|
||||||
*
|
*
|
||||||
160 CONTINUE
|
160 CONTINUE
|
||||||
RETURN
|
RETURN
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
'CBLAT2.SNAP' NAME OF SNAPSHOT OUTPUT FILE
|
'CBLAT2.SNAP' NAME OF SNAPSHOT OUTPUT FILE
|
||||||
-1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0)
|
-1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0)
|
||||||
F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD.
|
F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD.
|
||||||
F LOGICAL FLAG, T TO STOP ON FAILURES.
|
T LOGICAL FLAG, T TO STOP ON FAILURES.
|
||||||
T LOGICAL FLAG, T TO TEST ERROR EXITS.
|
T LOGICAL FLAG, T TO TEST ERROR EXITS.
|
||||||
2 LOGICAL FLAG, T TO TEST ROW-MAJOR (IF FALSE COLUMN-MAJOR IS TESTED)
|
2 LOGICAL FLAG, T TO TEST ROW-MAJOR (IF FALSE COLUMN-MAJOR IS TESTED)
|
||||||
16.0 THRESHOLD VALUE OF TEST RATIO
|
16.0 THRESHOLD VALUE OF TEST RATIO
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
'CBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE
|
'CBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE
|
||||||
-1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0)
|
-1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0)
|
||||||
F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD.
|
F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD.
|
||||||
F LOGICAL FLAG, T TO STOP ON FAILURES.
|
T LOGICAL FLAG, T TO STOP ON FAILURES.
|
||||||
T LOGICAL FLAG, T TO TEST ERROR EXITS.
|
T LOGICAL FLAG, T TO TEST ERROR EXITS.
|
||||||
2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH
|
2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH
|
||||||
16.0 THRESHOLD VALUE OF TEST RATIO
|
16.0 THRESHOLD VALUE OF TEST RATIO
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
'CBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE
|
'CBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE
|
||||||
-1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0)
|
-1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0)
|
||||||
F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD.
|
F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD.
|
||||||
F LOGICAL FLAG, T TO STOP ON FAILURES.
|
T LOGICAL FLAG, T TO STOP ON FAILURES.
|
||||||
T LOGICAL FLAG, T TO TEST ERROR EXITS.
|
T LOGICAL FLAG, T TO TEST ERROR EXITS.
|
||||||
2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH
|
2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH
|
||||||
16.0 THRESHOLD VALUE OF TEST RATIO
|
16.0 THRESHOLD VALUE OF TEST RATIO
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
'DBLAT2.SNAP' NAME OF SNAPSHOT OUTPUT FILE
|
'DBLAT2.SNAP' NAME OF SNAPSHOT OUTPUT FILE
|
||||||
-1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0)
|
-1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0)
|
||||||
F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD.
|
F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD.
|
||||||
F LOGICAL FLAG, T TO STOP ON FAILURES.
|
T LOGICAL FLAG, T TO STOP ON FAILURES.
|
||||||
T LOGICAL FLAG, T TO TEST ERROR EXITS.
|
T LOGICAL FLAG, T TO TEST ERROR EXITS.
|
||||||
2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH
|
2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH
|
||||||
16.0 THRESHOLD VALUE OF TEST RATIO
|
16.0 THRESHOLD VALUE OF TEST RATIO
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
'DBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE
|
'DBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE
|
||||||
-1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0)
|
-1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0)
|
||||||
F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD.
|
F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD.
|
||||||
F LOGICAL FLAG, T TO STOP ON FAILURES.
|
T LOGICAL FLAG, T TO STOP ON FAILURES.
|
||||||
T LOGICAL FLAG, T TO TEST ERROR EXITS.
|
T LOGICAL FLAG, T TO TEST ERROR EXITS.
|
||||||
2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH
|
2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH
|
||||||
16.0 THRESHOLD VALUE OF TEST RATIO
|
16.0 THRESHOLD VALUE OF TEST RATIO
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
'SBLAT2.SNAP' NAME OF SNAPSHOT OUTPUT FILE
|
'SBLAT2.SNAP' NAME OF SNAPSHOT OUTPUT FILE
|
||||||
-1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0)
|
-1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0)
|
||||||
F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD.
|
F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD.
|
||||||
F LOGICAL FLAG, T TO STOP ON FAILURES.
|
T LOGICAL FLAG, T TO STOP ON FAILURES.
|
||||||
T LOGICAL FLAG, T TO TEST ERROR EXITS.
|
T LOGICAL FLAG, T TO TEST ERROR EXITS.
|
||||||
2 LOGICAL FLAG, T TO TEST ROW-MAJOR (IF FALSE COLUMN-MAJOR IS TESTED)
|
2 LOGICAL FLAG, T TO TEST ROW-MAJOR (IF FALSE COLUMN-MAJOR IS TESTED)
|
||||||
16.0 THRESHOLD VALUE OF TEST RATIO
|
16.0 THRESHOLD VALUE OF TEST RATIO
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
'SBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE
|
'SBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE
|
||||||
-1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0)
|
-1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0)
|
||||||
F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD.
|
F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD.
|
||||||
F LOGICAL FLAG, T TO STOP ON FAILURES.
|
T LOGICAL FLAG, T TO STOP ON FAILURES.
|
||||||
T LOGICAL FLAG, T TO TEST ERROR EXITS.
|
T LOGICAL FLAG, T TO TEST ERROR EXITS.
|
||||||
2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH
|
2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH
|
||||||
16.0 THRESHOLD VALUE OF TEST RATIO
|
16.0 THRESHOLD VALUE OF TEST RATIO
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
'ZBLAT2.SNAP' NAME OF SNAPSHOT OUTPUT FILE
|
'ZBLAT2.SNAP' NAME OF SNAPSHOT OUTPUT FILE
|
||||||
-1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0)
|
-1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0)
|
||||||
F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD.
|
F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD.
|
||||||
F LOGICAL FLAG, T TO STOP ON FAILURES.
|
T LOGICAL FLAG, T TO STOP ON FAILURES.
|
||||||
T LOGICAL FLAG, T TO TEST ERROR EXITS.
|
T LOGICAL FLAG, T TO TEST ERROR EXITS.
|
||||||
2 LOGICAL FLAG, T TO TEST ROW-MAJOR (IF FALSE COLUMN-MAJOR IS TESTED)
|
2 LOGICAL FLAG, T TO TEST ROW-MAJOR (IF FALSE COLUMN-MAJOR IS TESTED)
|
||||||
16.0 THRESHOLD VALUE OF TEST RATIO
|
16.0 THRESHOLD VALUE OF TEST RATIO
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
'ZBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE
|
'ZBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE
|
||||||
-1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0)
|
-1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0)
|
||||||
F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD.
|
F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD.
|
||||||
F LOGICAL FLAG, T TO STOP ON FAILURES.
|
T LOGICAL FLAG, T TO STOP ON FAILURES.
|
||||||
T LOGICAL FLAG, T TO TEST ERROR EXITS.
|
T LOGICAL FLAG, T TO TEST ERROR EXITS.
|
||||||
2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH
|
2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH
|
||||||
16.0 THRESHOLD VALUE OF TEST RATIO
|
16.0 THRESHOLD VALUE OF TEST RATIO
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
'ZBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE
|
'ZBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE
|
||||||
-1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0)
|
-1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0)
|
||||||
F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD.
|
F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD.
|
||||||
F LOGICAL FLAG, T TO STOP ON FAILURES.
|
T LOGICAL FLAG, T TO STOP ON FAILURES.
|
||||||
T LOGICAL FLAG, T TO TEST ERROR EXITS.
|
T LOGICAL FLAG, T TO TEST ERROR EXITS.
|
||||||
2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH
|
2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH
|
||||||
16.0 THRESHOLD VALUE OF TEST RATIO
|
16.0 THRESHOLD VALUE OF TEST RATIO
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
|
|
||||||
include_directories(${CMAKE_SOURCE_DIR})
|
include_directories(${PROJECT_SOURCE_DIR})
|
||||||
|
|
||||||
# sources that need to be compiled twice, once with no flags and once with LOWER
|
# sources that need to be compiled twice, once with no flags and once with LOWER
|
||||||
set(UL_SOURCES
|
set(UL_SOURCES
|
||||||
|
|
|
@ -119,7 +119,7 @@ static int trmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
x = buffer;
|
x = buffer;
|
||||||
buffer += ((COMPSIZE * args -> m + 1023) & ~1023);
|
buffer += ((COMPSIZE * args -> m + 3) & ~3);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifndef TRANS
|
#ifndef TRANS
|
||||||
|
@ -403,7 +403,7 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG incx, FLOAT *bu
|
||||||
|
|
||||||
if (num_cpu) {
|
if (num_cpu) {
|
||||||
queue[0].sa = NULL;
|
queue[0].sa = NULL;
|
||||||
queue[0].sb = buffer + num_cpu * (((m + 255) & ~255) + 16) * COMPSIZE;
|
queue[0].sb = buffer + num_cpu * (((m + 3) & ~3) + 16) * COMPSIZE;
|
||||||
|
|
||||||
queue[num_cpu - 1].next = NULL;
|
queue[num_cpu - 1].next = NULL;
|
||||||
|
|
||||||
|
|
|
@ -56,7 +56,7 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, FLOAT *bu
|
||||||
|
|
||||||
if (incb != 1) {
|
if (incb != 1) {
|
||||||
B = buffer;
|
B = buffer;
|
||||||
gemvbuffer = (FLOAT *)(((BLASLONG)buffer + m * sizeof(FLOAT) * 2 + 4095) & ~4095);
|
gemvbuffer = (FLOAT *)(((BLASLONG)buffer + m * sizeof(FLOAT) * 2 + 15) & ~15);
|
||||||
COPY_K(m, b, incb, buffer, 1);
|
COPY_K(m, b, incb, buffer, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -56,7 +56,7 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, FLOAT *bu
|
||||||
|
|
||||||
if (incb != 1) {
|
if (incb != 1) {
|
||||||
B = buffer;
|
B = buffer;
|
||||||
gemvbuffer = (FLOAT *)(((BLASLONG)buffer + m * sizeof(FLOAT) * 2 + 4095) & ~4095);
|
gemvbuffer = (FLOAT *)(((BLASLONG)buffer + m * sizeof(FLOAT) * 2 + 15) & ~15);
|
||||||
COPY_K(m, b, incb, buffer, 1);
|
COPY_K(m, b, incb, buffer, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
include_directories(${CMAKE_SOURCE_DIR})
|
include_directories(${PROJECT_SOURCE_DIR})
|
||||||
|
|
||||||
# N.B. In the original makefile there was a BLOCKS define used in the compilation of these files but I don't see any evidence of it being set anywhere. -hpa
|
# N.B. In the original makefile there was a BLOCKS define used in the compilation of these files but I don't see any evidence of it being set anywhere. -hpa
|
||||||
|
|
||||||
|
@ -48,8 +48,7 @@ foreach (float_type ${FLOAT_TYPES})
|
||||||
# TRANS needs to be set/unset when CONJ is set/unset, so can't use it as a combination
|
# TRANS needs to be set/unset when CONJ is set/unset, so can't use it as a combination
|
||||||
GenerateCombinationObjects("zherk_k.c" "LOWER" "U" "HERK" 3 "herk_N" false ${float_type})
|
GenerateCombinationObjects("zherk_k.c" "LOWER" "U" "HERK" 3 "herk_N" false ${float_type})
|
||||||
GenerateCombinationObjects("zherk_k.c" "LOWER" "U" "HERK;TRANS;CONJ" 3 "herk_C" false ${float_type})
|
GenerateCombinationObjects("zherk_k.c" "LOWER" "U" "HERK;TRANS;CONJ" 3 "herk_C" false ${float_type})
|
||||||
GenerateCombinationObjects("zherk_k.c" "LOWER" "U" "HERK;THREADED_LEVEL3" 3 "herk_thread_N" false ${float_type})
|
|
||||||
GenerateCombinationObjects("zherk_k.c" "LOWER" "U" "HERK;THREADED_LEVEL3;TRANS;CONJ" 3 "herk_thread_C" false ${float_type})
|
|
||||||
# Need to set CONJ for trmm and trsm
|
# Need to set CONJ for trmm and trsm
|
||||||
GenerateCombinationObjects("trmm_L.c" "UPPER;UNIT" "L;N" "CONJ" 0 "trmm_LR" false ${float_type})
|
GenerateCombinationObjects("trmm_L.c" "UPPER;UNIT" "L;N" "CONJ" 0 "trmm_LR" false ${float_type})
|
||||||
GenerateCombinationObjects("trmm_L.c" "UPPER;UNIT" "L;N" "TRANSA;CONJ" 0 "trmm_LC" false ${float_type})
|
GenerateCombinationObjects("trmm_L.c" "UPPER;UNIT" "L;N" "TRANSA;CONJ" 0 "trmm_LC" false ${float_type})
|
||||||
|
@ -72,6 +71,10 @@ foreach (float_type ${FLOAT_TYPES})
|
||||||
GenerateNamedObjects("zher2k_k.c" "HER2K;LOWER;TRANS;CONJ" "her2k_LC" false "" "" false ${float_type})
|
GenerateNamedObjects("zher2k_k.c" "HER2K;LOWER;TRANS;CONJ" "her2k_LC" false "" "" false ${float_type})
|
||||||
|
|
||||||
if (SMP AND NOT USE_SIMPLE_THREADED_LEVEL3)
|
if (SMP AND NOT USE_SIMPLE_THREADED_LEVEL3)
|
||||||
|
#herk
|
||||||
|
GenerateCombinationObjects("zherk_k.c" "LOWER" "U" "HERK;THREADED_LEVEL3" 3 "herk_thread_N" false ${float_type})
|
||||||
|
GenerateCombinationObjects("zherk_k.c" "LOWER" "U" "HERK;THREADED_LEVEL3;TRANS;CONJ" 3 "herk_thread_C" false ${float_type})
|
||||||
|
|
||||||
#hemm
|
#hemm
|
||||||
GenerateCombinationObjects("zhemm_k.c" "LOWER" "U" "NN;THREADED_LEVEL3" 0 "hemm_thread_L" false ${float_type})
|
GenerateCombinationObjects("zhemm_k.c" "LOWER" "U" "NN;THREADED_LEVEL3" 0 "hemm_thread_L" false ${float_type})
|
||||||
GenerateCombinationObjects("zhemm_k.c" "LOWER" "U" "NC;RSIDE;THREADED_LEVEL3" 0 "hemm_thread_R" false ${float_type})
|
GenerateCombinationObjects("zhemm_k.c" "LOWER" "U" "NC;RSIDE;THREADED_LEVEL3" 0 "hemm_thread_R" false ${float_type})
|
||||||
|
@ -96,6 +99,17 @@ foreach (float_type ${FLOAT_TYPES})
|
||||||
endif()
|
endif()
|
||||||
endif ()
|
endif ()
|
||||||
endforeach ()
|
endforeach ()
|
||||||
|
|
||||||
|
# for gemm3m
|
||||||
|
if(USE_GEMM3M)
|
||||||
|
foreach (GEMM_DEFINE ${GEMM_DEFINES})
|
||||||
|
string(TOLOWER ${GEMM_DEFINE} GEMM_DEFINE_LC)
|
||||||
|
GenerateNamedObjects("gemm3m.c" "${GEMM_DEFINE}" "gemm3m_${GEMM_DEFINE_LC}" false "" "" false ${float_type})
|
||||||
|
if (SMP AND NOT USE_SIMPLE_THREADED_LEVEL3)
|
||||||
|
GenerateNamedObjects("gemm3m.c" "${GEMM_DEFINE};THREADED_LEVEL3" "gemm3m_thread_${GEMM_DEFINE_LC}" false "" "" false ${float_type})
|
||||||
|
endif ()
|
||||||
|
endforeach ()
|
||||||
|
endif()
|
||||||
endif ()
|
endif ()
|
||||||
endforeach ()
|
endforeach ()
|
||||||
|
|
||||||
|
|
|
@ -316,7 +316,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
|
||||||
if (min_l > GEMM3M_Q) {
|
if (min_l > GEMM3M_Q) {
|
||||||
min_l = (min_l + 1) / 2;
|
min_l = (min_l + 1) / 2;
|
||||||
#ifdef UNROLL_X
|
#ifdef UNROLL_X
|
||||||
min_l = (min_l + UNROLL_X - 1) & ~(UNROLL_X - 1);
|
min_l = ((min_l + UNROLL_X - 1)/UNROLL_X) * UNROLL_X;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -326,7 +326,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
|
||||||
min_i = GEMM3M_P;
|
min_i = GEMM3M_P;
|
||||||
} else {
|
} else {
|
||||||
if (min_i > GEMM3M_P) {
|
if (min_i > GEMM3M_P) {
|
||||||
min_i = (min_i / 2 + GEMM3M_UNROLL_M - 1) & ~(GEMM3M_UNROLL_M - 1);
|
min_i = ((min_i / 2 + GEMM3M_UNROLL_M - 1)/GEMM3M_UNROLL_M) * GEMM3M_UNROLL_M;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -365,7 +365,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
|
||||||
min_i = GEMM3M_P;
|
min_i = GEMM3M_P;
|
||||||
} else
|
} else
|
||||||
if (min_i > GEMM3M_P) {
|
if (min_i > GEMM3M_P) {
|
||||||
min_i = (min_i / 2 + GEMM3M_UNROLL_M - 1) & ~(GEMM3M_UNROLL_M - 1);
|
min_i = ((min_i / 2 + GEMM3M_UNROLL_M - 1)/GEMM3M_UNROLL_M) * GEMM3M_UNROLL_M;
|
||||||
}
|
}
|
||||||
|
|
||||||
START_RPCC();
|
START_RPCC();
|
||||||
|
@ -386,7 +386,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
|
||||||
min_i = GEMM3M_P;
|
min_i = GEMM3M_P;
|
||||||
} else {
|
} else {
|
||||||
if (min_i > GEMM3M_P) {
|
if (min_i > GEMM3M_P) {
|
||||||
min_i = (min_i / 2 + GEMM3M_UNROLL_M - 1) & ~(GEMM3M_UNROLL_M - 1);
|
min_i = ((min_i / 2 + GEMM3M_UNROLL_M - 1)/GEMM3M_UNROLL_M) * GEMM3M_UNROLL_M;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -429,7 +429,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
|
||||||
min_i = GEMM3M_P;
|
min_i = GEMM3M_P;
|
||||||
} else
|
} else
|
||||||
if (min_i > GEMM3M_P) {
|
if (min_i > GEMM3M_P) {
|
||||||
min_i = (min_i / 2 + GEMM3M_UNROLL_M - 1) & ~(GEMM3M_UNROLL_M - 1);
|
min_i = ((min_i / 2 + GEMM3M_UNROLL_M - 1)/GEMM3M_UNROLL_M) * GEMM3M_UNROLL_M;
|
||||||
}
|
}
|
||||||
|
|
||||||
START_RPCC();
|
START_RPCC();
|
||||||
|
@ -451,7 +451,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
|
||||||
min_i = GEMM3M_P;
|
min_i = GEMM3M_P;
|
||||||
} else {
|
} else {
|
||||||
if (min_i > GEMM3M_P) {
|
if (min_i > GEMM3M_P) {
|
||||||
min_i = (min_i / 2 + GEMM3M_UNROLL_M - 1) & ~(GEMM3M_UNROLL_M - 1);
|
min_i = ((min_i / 2 + GEMM3M_UNROLL_M - 1)/GEMM3M_UNROLL_M) * GEMM3M_UNROLL_M;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -494,7 +494,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
|
||||||
min_i = GEMM3M_P;
|
min_i = GEMM3M_P;
|
||||||
} else
|
} else
|
||||||
if (min_i > GEMM3M_P) {
|
if (min_i > GEMM3M_P) {
|
||||||
min_i = (min_i / 2 + GEMM3M_UNROLL_M - 1) & ~(GEMM3M_UNROLL_M - 1);
|
min_i = ((min_i / 2 + GEMM3M_UNROLL_M - 1)/GEMM3M_UNROLL_M) * GEMM3M_UNROLL_M;
|
||||||
}
|
}
|
||||||
|
|
||||||
START_RPCC();
|
START_RPCC();
|
||||||
|
|
|
@ -297,9 +297,9 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
|
||||||
min_l = GEMM_Q;
|
min_l = GEMM_Q;
|
||||||
} else {
|
} else {
|
||||||
if (min_l > GEMM_Q) {
|
if (min_l > GEMM_Q) {
|
||||||
min_l = (min_l / 2 + GEMM_UNROLL_M - 1) & ~(GEMM_UNROLL_M - 1);
|
min_l = ((min_l / 2 + GEMM_UNROLL_M - 1)/GEMM_UNROLL_M) * GEMM_UNROLL_M;
|
||||||
}
|
}
|
||||||
gemm_p = ((l2size / min_l + GEMM_UNROLL_M - 1) & ~(GEMM_UNROLL_M - 1));
|
gemm_p = ((l2size / min_l + GEMM_UNROLL_M - 1)/GEMM_UNROLL_M) * GEMM_UNROLL_M;
|
||||||
while (gemm_p * min_l > l2size) gemm_p -= GEMM_UNROLL_M;
|
while (gemm_p * min_l > l2size) gemm_p -= GEMM_UNROLL_M;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -311,7 +311,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
|
||||||
min_i = GEMM_P;
|
min_i = GEMM_P;
|
||||||
} else {
|
} else {
|
||||||
if (min_i > GEMM_P) {
|
if (min_i > GEMM_P) {
|
||||||
min_i = (min_i / 2 + GEMM_UNROLL_M - 1) & ~(GEMM_UNROLL_M - 1);
|
min_i = ((min_i / 2 + GEMM_UNROLL_M - 1)/GEMM_UNROLL_M) * GEMM_UNROLL_M;
|
||||||
} else {
|
} else {
|
||||||
l1stride = 0;
|
l1stride = 0;
|
||||||
}
|
}
|
||||||
|
@ -335,7 +335,9 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
|
||||||
|
|
||||||
if (min_jj >= 3*GEMM_UNROLL_N) min_jj = 3*GEMM_UNROLL_N;
|
if (min_jj >= 3*GEMM_UNROLL_N) min_jj = 3*GEMM_UNROLL_N;
|
||||||
else
|
else
|
||||||
if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N;
|
if (min_jj >= 2*GEMM_UNROLL_N) min_jj = 2*GEMM_UNROLL_N;
|
||||||
|
else
|
||||||
|
if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -367,7 +369,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
|
||||||
min_i = GEMM_P;
|
min_i = GEMM_P;
|
||||||
} else
|
} else
|
||||||
if (min_i > GEMM_P) {
|
if (min_i > GEMM_P) {
|
||||||
min_i = (min_i / 2 + GEMM_UNROLL_M - 1) & ~(GEMM_UNROLL_M - 1);
|
min_i = ((min_i / 2 + GEMM_UNROLL_M - 1)/GEMM_UNROLL_M) * GEMM_UNROLL_M;
|
||||||
}
|
}
|
||||||
|
|
||||||
START_RPCC();
|
START_RPCC();
|
||||||
|
|
|
@ -365,7 +365,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
|
||||||
|
|
||||||
buffer[0] = sb;
|
buffer[0] = sb;
|
||||||
for (i = 1; i < DIVIDE_RATE; i++) {
|
for (i = 1; i < DIVIDE_RATE; i++) {
|
||||||
buffer[i] = buffer[i - 1] + GEMM3M_Q * ((div_n + GEMM3M_UNROLL_N - 1) & ~(GEMM3M_UNROLL_N - 1));
|
buffer[i] = buffer[i - 1] + GEMM3M_Q * (((div_n + GEMM3M_UNROLL_N - 1)/GEMM3M_UNROLL_N) * GEMM3M_UNROLL_N);
|
||||||
}
|
}
|
||||||
|
|
||||||
for(ls = 0; ls < k; ls += min_l){
|
for(ls = 0; ls < k; ls += min_l){
|
||||||
|
@ -384,7 +384,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
|
||||||
min_i = GEMM3M_P;
|
min_i = GEMM3M_P;
|
||||||
} else {
|
} else {
|
||||||
if (min_i > GEMM3M_P) {
|
if (min_i > GEMM3M_P) {
|
||||||
min_i = (min_i / 2 + GEMM3M_UNROLL_M - 1) & ~(GEMM3M_UNROLL_M - 1);
|
min_i = ((min_i / 2 + GEMM3M_UNROLL_M - 1)/GEMM3M_UNROLL_M) * GEMM3M_UNROLL_M;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -482,7 +482,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
|
||||||
min_i = GEMM3M_P;
|
min_i = GEMM3M_P;
|
||||||
} else
|
} else
|
||||||
if (min_i > GEMM3M_P) {
|
if (min_i > GEMM3M_P) {
|
||||||
min_i = ((min_i + 1) / 2 + GEMM3M_UNROLL_M - 1) & ~(GEMM3M_UNROLL_M - 1);
|
min_i = (((min_i + 1) / 2 + GEMM3M_UNROLL_M - 1)/GEMM3M_UNROLL_M) * GEMM3M_UNROLL_M;
|
||||||
}
|
}
|
||||||
|
|
||||||
START_RPCC();
|
START_RPCC();
|
||||||
|
@ -618,7 +618,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
|
||||||
min_i = GEMM3M_P;
|
min_i = GEMM3M_P;
|
||||||
} else
|
} else
|
||||||
if (min_i > GEMM3M_P) {
|
if (min_i > GEMM3M_P) {
|
||||||
min_i = ((min_i + 1) / 2 + GEMM3M_UNROLL_M - 1) & ~(GEMM3M_UNROLL_M - 1);
|
min_i = (((min_i + 1) / 2 + GEMM3M_UNROLL_M - 1)/GEMM3M_UNROLL_M) * GEMM3M_UNROLL_M;
|
||||||
}
|
}
|
||||||
|
|
||||||
START_RPCC();
|
START_RPCC();
|
||||||
|
@ -754,7 +754,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
|
||||||
min_i = GEMM3M_P;
|
min_i = GEMM3M_P;
|
||||||
} else
|
} else
|
||||||
if (min_i > GEMM3M_P) {
|
if (min_i > GEMM3M_P) {
|
||||||
min_i = ((min_i + 1) / 2 + GEMM3M_UNROLL_M - 1) & ~(GEMM3M_UNROLL_M - 1);
|
min_i = (((min_i + 1) / 2 + GEMM3M_UNROLL_M - 1)/GEMM3M_UNROLL_M) * GEMM3M_UNROLL_M;
|
||||||
}
|
}
|
||||||
|
|
||||||
START_RPCC();
|
START_RPCC();
|
||||||
|
|
|
@ -189,7 +189,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
|
||||||
min_i = GEMM_P;
|
min_i = GEMM_P;
|
||||||
} else
|
} else
|
||||||
if (min_i > GEMM_P) {
|
if (min_i > GEMM_P) {
|
||||||
min_i = (min_i / 2 + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1);
|
min_i = ((min_i / 2 + GEMM_UNROLL_MN - 1)/GEMM_UNROLL_MN) * GEMM_UNROLL_MN;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifndef LOWER
|
#ifndef LOWER
|
||||||
|
@ -230,7 +230,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
|
||||||
min_i = GEMM_P;
|
min_i = GEMM_P;
|
||||||
} else
|
} else
|
||||||
if (min_i > GEMM_P) {
|
if (min_i > GEMM_P) {
|
||||||
min_i = (min_i / 2 + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1);
|
min_i = ((min_i / 2 + GEMM_UNROLL_MN - 1)/GEMM_UNROLL_MN) * GEMM_UNROLL_MN;
|
||||||
}
|
}
|
||||||
|
|
||||||
ICOPY_OPERATION(min_l, min_i, a, lda, ls, is, sa);
|
ICOPY_OPERATION(min_l, min_i, a, lda, ls, is, sa);
|
||||||
|
@ -245,7 +245,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
|
||||||
min_i = GEMM_P;
|
min_i = GEMM_P;
|
||||||
} else
|
} else
|
||||||
if (min_i > GEMM_P) {
|
if (min_i > GEMM_P) {
|
||||||
min_i = (min_i / 2 + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1);
|
min_i = ((min_i / 2 + GEMM_UNROLL_MN - 1)/GEMM_UNROLL_MN) * GEMM_UNROLL_MN;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (m_start >= js) {
|
if (m_start >= js) {
|
||||||
|
@ -284,7 +284,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
|
||||||
min_i = GEMM_P;
|
min_i = GEMM_P;
|
||||||
} else
|
} else
|
||||||
if (min_i > GEMM_P) {
|
if (min_i > GEMM_P) {
|
||||||
min_i = (min_i / 2 + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1);
|
min_i = ((min_i / 2 + GEMM_UNROLL_MN - 1)/GEMM_UNROLL_MN) * GEMM_UNROLL_MN;
|
||||||
}
|
}
|
||||||
|
|
||||||
ICOPY_OPERATION(min_l, min_i, b, ldb, ls, is, sa);
|
ICOPY_OPERATION(min_l, min_i, b, ldb, ls, is, sa);
|
||||||
|
@ -322,7 +322,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
|
||||||
min_i = GEMM_P;
|
min_i = GEMM_P;
|
||||||
} else
|
} else
|
||||||
if (min_i > GEMM_P) {
|
if (min_i > GEMM_P) {
|
||||||
min_i = (min_i / 2 + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1);
|
min_i = ((min_i / 2 + GEMM_UNROLL_MN - 1)/GEMM_UNROLL_MN) * GEMM_UNROLL_MN;
|
||||||
}
|
}
|
||||||
|
|
||||||
aa = sb + min_l * (is - js) * COMPSIZE;
|
aa = sb + min_l * (is - js) * COMPSIZE;
|
||||||
|
@ -353,7 +353,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
|
||||||
min_i = GEMM_P;
|
min_i = GEMM_P;
|
||||||
} else
|
} else
|
||||||
if (min_i > GEMM_P) {
|
if (min_i > GEMM_P) {
|
||||||
min_i = (min_i / 2 + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1);
|
min_i = ((min_i / 2 + GEMM_UNROLL_MN - 1)/GEMM_UNROLL_MN) * GEMM_UNROLL_MN;
|
||||||
}
|
}
|
||||||
|
|
||||||
aa = sb + min_l * (m_start - js) * COMPSIZE;
|
aa = sb + min_l * (m_start - js) * COMPSIZE;
|
||||||
|
@ -383,7 +383,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
|
||||||
min_i = GEMM_P;
|
min_i = GEMM_P;
|
||||||
} else
|
} else
|
||||||
if (min_i > GEMM_P) {
|
if (min_i > GEMM_P) {
|
||||||
min_i = (min_i / 2 + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1);
|
min_i = ((min_i / 2 + GEMM_UNROLL_MN - 1)/GEMM_UNROLL_MN) * GEMM_UNROLL_MN;
|
||||||
}
|
}
|
||||||
|
|
||||||
aa = sb + min_l * (is - js) * COMPSIZE;
|
aa = sb + min_l * (is - js) * COMPSIZE;
|
||||||
|
|
|
@ -198,7 +198,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
|
||||||
min_i = GEMM_P;
|
min_i = GEMM_P;
|
||||||
} else
|
} else
|
||||||
if (min_i > GEMM_P) {
|
if (min_i > GEMM_P) {
|
||||||
min_i = (min_i / 2 + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1);
|
min_i = ((min_i / 2 + GEMM_UNROLL_MN - 1)/GEMM_UNROLL_MN) * GEMM_UNROLL_MN;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifndef LOWER
|
#ifndef LOWER
|
||||||
|
@ -239,7 +239,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
|
||||||
min_i = GEMM_P;
|
min_i = GEMM_P;
|
||||||
} else
|
} else
|
||||||
if (min_i > GEMM_P) {
|
if (min_i > GEMM_P) {
|
||||||
min_i = (min_i / 2 + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1);
|
min_i = ((min_i / 2 + GEMM_UNROLL_MN - 1)/GEMM_UNROLL_MN) * GEMM_UNROLL_MN;
|
||||||
}
|
}
|
||||||
|
|
||||||
aa = sb + min_l * (is - js) * COMPSIZE;
|
aa = sb + min_l * (is - js) * COMPSIZE;
|
||||||
|
@ -303,7 +303,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
|
||||||
min_i = GEMM_P;
|
min_i = GEMM_P;
|
||||||
} else
|
} else
|
||||||
if (min_i > GEMM_P) {
|
if (min_i > GEMM_P) {
|
||||||
min_i = (min_i / 2 + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1);
|
min_i = ((min_i / 2 + GEMM_UNROLL_MN - 1)/GEMM_UNROLL_MN) * GEMM_UNROLL_MN;
|
||||||
}
|
}
|
||||||
|
|
||||||
START_RPCC();
|
START_RPCC();
|
||||||
|
@ -375,7 +375,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
|
||||||
min_i = GEMM_P;
|
min_i = GEMM_P;
|
||||||
} else
|
} else
|
||||||
if (min_i > GEMM_P) {
|
if (min_i > GEMM_P) {
|
||||||
min_i = (min_i / 2 + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1);
|
min_i = ((min_i / 2 + GEMM_UNROLL_MN - 1)/GEMM_UNROLL_MN) * GEMM_UNROLL_MN;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (is < js + min_j) {
|
if (is < js + min_j) {
|
||||||
|
@ -460,7 +460,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
|
||||||
min_i = GEMM_P;
|
min_i = GEMM_P;
|
||||||
} else
|
} else
|
||||||
if (min_i > GEMM_P) {
|
if (min_i > GEMM_P) {
|
||||||
min_i = (min_i / 2 + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1);
|
min_i = ((min_i / 2 + GEMM_UNROLL_MN - 1)/GEMM_UNROLL_MN) * GEMM_UNROLL_MN;
|
||||||
}
|
}
|
||||||
|
|
||||||
START_RPCC();
|
START_RPCC();
|
||||||
|
|
|
@ -210,8 +210,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
|
||||||
fprintf(stderr, "Thread[%ld] m_from : %ld m_to : %ld n_from : %ld n_to : %ld\n", mypos, m_from, m_to, n_from, n_to);
|
fprintf(stderr, "Thread[%ld] m_from : %ld m_to : %ld n_from : %ld n_to : %ld\n", mypos, m_from, m_to, n_from, n_to);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
div_n = ((m_to - m_from + DIVIDE_RATE - 1) / DIVIDE_RATE
|
div_n = (((m_to - m_from + DIVIDE_RATE - 1) / DIVIDE_RATE + GEMM_UNROLL_MN - 1)/GEMM_UNROLL_MN) * GEMM_UNROLL_MN;
|
||||||
+ GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1);
|
|
||||||
|
|
||||||
buffer[0] = sb;
|
buffer[0] = sb;
|
||||||
for (i = 1; i < DIVIDE_RATE; i++) {
|
for (i = 1; i < DIVIDE_RATE; i++) {
|
||||||
|
@ -233,7 +232,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
|
||||||
min_i = GEMM_P;
|
min_i = GEMM_P;
|
||||||
} else {
|
} else {
|
||||||
if (min_i > GEMM_P) {
|
if (min_i > GEMM_P) {
|
||||||
min_i = (min_i / 2 + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1);
|
min_i = ((min_i / 2 + GEMM_UNROLL_MN - 1)/GEMM_UNROLL_MN) * GEMM_UNROLL_MN;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -253,8 +252,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
|
||||||
|
|
||||||
STOP_RPCC(copy_A);
|
STOP_RPCC(copy_A);
|
||||||
|
|
||||||
div_n = ((m_to - m_from + DIVIDE_RATE - 1) / DIVIDE_RATE
|
div_n = (((m_to - m_from + DIVIDE_RATE - 1) / DIVIDE_RATE + GEMM_UNROLL_MN - 1)/GEMM_UNROLL_MN) * GEMM_UNROLL_MN;
|
||||||
+ GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1);
|
|
||||||
|
|
||||||
for (xxx = m_from, bufferside = 0; xxx < m_to; xxx += div_n, bufferside ++) {
|
for (xxx = m_from, bufferside = 0; xxx < m_to; xxx += div_n, bufferside ++) {
|
||||||
|
|
||||||
|
@ -353,9 +351,8 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
|
||||||
while (current >= 0) {
|
while (current >= 0) {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
div_n = ((range_n[current + 1] - range_n[current] + DIVIDE_RATE - 1) / DIVIDE_RATE
|
div_n = (((range_n[current + 1] - range_n[current] + DIVIDE_RATE - 1) / DIVIDE_RATE + GEMM_UNROLL_MN - 1)/GEMM_UNROLL_MN) * GEMM_UNROLL_MN;
|
||||||
+ GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1);
|
|
||||||
|
|
||||||
for (xxx = range_n[current], bufferside = 0; xxx < range_n[current + 1]; xxx += div_n, bufferside ++) {
|
for (xxx = range_n[current], bufferside = 0; xxx < range_n[current + 1]; xxx += div_n, bufferside ++) {
|
||||||
|
|
||||||
START_RPCC();
|
START_RPCC();
|
||||||
|
@ -412,7 +409,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
|
||||||
min_i = GEMM_P;
|
min_i = GEMM_P;
|
||||||
} else
|
} else
|
||||||
if (min_i > GEMM_P) {
|
if (min_i > GEMM_P) {
|
||||||
min_i = ((min_i + 1) / 2 + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1);
|
min_i = (((min_i + 1) / 2 + GEMM_UNROLL_MN - 1)/GEMM_UNROLL_MN) * GEMM_UNROLL_MN;
|
||||||
}
|
}
|
||||||
|
|
||||||
START_RPCC();
|
START_RPCC();
|
||||||
|
@ -425,8 +422,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
|
||||||
|
|
||||||
do {
|
do {
|
||||||
|
|
||||||
div_n = ((range_n[current + 1] - range_n[current] + DIVIDE_RATE - 1) / DIVIDE_RATE
|
div_n = (((range_n[current + 1] - range_n[current] + DIVIDE_RATE - 1) / DIVIDE_RATE + GEMM_UNROLL_MN - 1)/GEMM_UNROLL_MN) * GEMM_UNROLL_MN;
|
||||||
+ GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1);
|
|
||||||
|
|
||||||
for (xxx = range_n[current], bufferside = 0; xxx < range_n[current + 1]; xxx += div_n, bufferside ++) {
|
for (xxx = range_n[current], bufferside = 0; xxx < range_n[current + 1]; xxx += div_n, bufferside ++) {
|
||||||
|
|
||||||
|
@ -602,9 +598,9 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
|
||||||
|
|
||||||
double di = (double)i;
|
double di = (double)i;
|
||||||
|
|
||||||
width = (((BLASLONG)(sqrt(di * di + dnum) - di) + mask) & ~mask);
|
width = (((BLASLONG)((sqrt(di * di + dnum) - di) + mask)/(mask+1)) * (mask+1) );
|
||||||
|
|
||||||
if (num_cpu == 0) width = n - ((n - width) & ~mask);
|
if (num_cpu == 0) width = n - (((n - width)/(mask+1)) * (mask+1) );
|
||||||
|
|
||||||
if ((width > n - i) || (width < mask)) width = n - i;
|
if ((width > n - i) || (width < mask)) width = n - i;
|
||||||
|
|
||||||
|
@ -644,7 +640,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
|
||||||
|
|
||||||
double di = (double)i;
|
double di = (double)i;
|
||||||
|
|
||||||
width = (((BLASLONG)(sqrt(di * di + dnum) - di) + mask) & ~mask);
|
width = (((BLASLONG)((sqrt(di * di + dnum) - di) + mask)/(mask+1)) * (mask+1));
|
||||||
|
|
||||||
if ((width > n - i) || (width < mask)) width = n - i;
|
if ((width > n - i) || (width < mask)) width = n - i;
|
||||||
|
|
||||||
|
|
|
@ -310,7 +310,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
|
||||||
|
|
||||||
buffer[0] = sb;
|
buffer[0] = sb;
|
||||||
for (i = 1; i < DIVIDE_RATE; i++) {
|
for (i = 1; i < DIVIDE_RATE; i++) {
|
||||||
buffer[i] = buffer[i - 1] + GEMM_Q * ((div_n + GEMM_UNROLL_N - 1) & ~(GEMM_UNROLL_N - 1)) * COMPSIZE;
|
buffer[i] = buffer[i - 1] + GEMM_Q * ((div_n + GEMM_UNROLL_N - 1)/GEMM_UNROLL_N) * GEMM_UNROLL_N * COMPSIZE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -331,7 +331,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
|
||||||
min_i = GEMM_P;
|
min_i = GEMM_P;
|
||||||
} else {
|
} else {
|
||||||
if (min_i > GEMM_P) {
|
if (min_i > GEMM_P) {
|
||||||
min_i = (min_i / 2 + GEMM_UNROLL_M - 1) & ~(GEMM_UNROLL_M - 1);
|
min_i = ((min_i / 2 + GEMM_UNROLL_M - 1)/GEMM_UNROLL_M) * GEMM_UNROLL_M;
|
||||||
} else {
|
} else {
|
||||||
if (args -> nthreads == 1) l1stride = 0;
|
if (args -> nthreads == 1) l1stride = 0;
|
||||||
}
|
}
|
||||||
|
@ -367,7 +367,9 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
|
||||||
|
|
||||||
if (min_jj >= 3*GEMM_UNROLL_N) min_jj = 3*GEMM_UNROLL_N;
|
if (min_jj >= 3*GEMM_UNROLL_N) min_jj = 3*GEMM_UNROLL_N;
|
||||||
else
|
else
|
||||||
if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N;
|
if (min_jj >= 2*GEMM_UNROLL_N) min_jj = 2*GEMM_UNROLL_N;
|
||||||
|
else
|
||||||
|
if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N;
|
||||||
|
|
||||||
|
|
||||||
START_RPCC();
|
START_RPCC();
|
||||||
|
@ -441,7 +443,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
|
||||||
min_i = GEMM_P;
|
min_i = GEMM_P;
|
||||||
} else
|
} else
|
||||||
if (min_i > GEMM_P) {
|
if (min_i > GEMM_P) {
|
||||||
min_i = ((min_i + 1) / 2 + GEMM_UNROLL_M - 1) & ~(GEMM_UNROLL_M - 1);
|
min_i = (((min_i + 1) / 2 + GEMM_UNROLL_M - 1)/GEMM_UNROLL_M) * GEMM_UNROLL_M;
|
||||||
}
|
}
|
||||||
|
|
||||||
START_RPCC();
|
START_RPCC();
|
||||||
|
|
|
@ -158,7 +158,7 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha_r,
|
||||||
|
|
||||||
int mm, nn;
|
int mm, nn;
|
||||||
|
|
||||||
mm = (loop & ~(GEMM_UNROLL_MN - 1));
|
mm = (loop/GEMM_UNROLL_MN) * GEMM_UNROLL_MN;
|
||||||
nn = MIN(GEMM_UNROLL_MN, n - loop);
|
nn = MIN(GEMM_UNROLL_MN, n - loop);
|
||||||
|
|
||||||
#ifndef LOWER
|
#ifndef LOWER
|
||||||
|
|
|
@ -149,7 +149,7 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i,
|
||||||
|
|
||||||
int mm, nn;
|
int mm, nn;
|
||||||
|
|
||||||
mm = (loop & ~(GEMM_UNROLL_MN - 1));
|
mm = (loop/GEMM_UNROLL_MN) * GEMM_UNROLL_MN;
|
||||||
nn = MIN(GEMM_UNROLL_MN, n - loop);
|
nn = MIN(GEMM_UNROLL_MN, n - loop);
|
||||||
|
|
||||||
#ifndef LOWER
|
#ifndef LOWER
|
||||||
|
|
|
@ -132,7 +132,7 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha_r,
|
||||||
|
|
||||||
int mm, nn;
|
int mm, nn;
|
||||||
|
|
||||||
mm = (loop & ~(GEMM_UNROLL_MN - 1));
|
mm = (loop/GEMM_UNROLL_MN) * GEMM_UNROLL_MN;
|
||||||
nn = MIN(GEMM_UNROLL_MN, n - loop);
|
nn = MIN(GEMM_UNROLL_MN, n - loop);
|
||||||
|
|
||||||
#ifndef LOWER
|
#ifndef LOWER
|
||||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue