Merge branch 'develop' into arm_soft_fp_abi

This commit is contained in:
Zhang Xianyi 2017-03-06 13:53:56 +08:00
commit 503dcbfde6
6787 changed files with 544722 additions and 88939 deletions

18
.gitignore vendored
View File

@ -14,6 +14,21 @@ lapack-3.4.2.tgz
lapack-netlib/make.inc lapack-netlib/make.inc
lapack-netlib/lapacke/include/lapacke_mangling.h lapack-netlib/lapacke/include/lapacke_mangling.h
lapack-netlib/TESTING/testing_results.txt lapack-netlib/TESTING/testing_results.txt
lapack-netlib/INSTALL/test*
lapack-netlib/TESTING/xeigtstc
lapack-netlib/TESTING/xeigtstd
lapack-netlib/TESTING/xeigtsts
lapack-netlib/TESTING/xeigtstz
lapack-netlib/TESTING/xlintstc
lapack-netlib/TESTING/xlintstd
lapack-netlib/TESTING/xlintstds
lapack-netlib/TESTING/xlintstrfc
lapack-netlib/TESTING/xlintstrfd
lapack-netlib/TESTING/xlintstrfs
lapack-netlib/TESTING/xlintstrfz
lapack-netlib/TESTING/xlintsts
lapack-netlib/TESTING/xlintstz
lapack-netlib/TESTING/xlintstzc
*.so *.so
*.so.* *.so.*
*.a *.a
@ -69,3 +84,6 @@ test/zblat3
build build
build.* build.*
*.swp *.swp
benchmark/*.goto
benchmark/smallscaling

View File

@ -24,7 +24,12 @@ before_install:
- if [[ "$TARGET_BOX" == "WIN64" ]]; then sudo apt-get install -qq binutils-mingw-w64-x86-64 gcc-mingw-w64-x86-64 gfortran-mingw-w64-x86-64; fi - if [[ "$TARGET_BOX" == "WIN64" ]]; then sudo apt-get install -qq binutils-mingw-w64-x86-64 gcc-mingw-w64-x86-64 gfortran-mingw-w64-x86-64; fi
- if [[ "$TARGET_BOX" == "LINUX32" ]]; then sudo apt-get install -qq gcc-multilib gfortran-multilib; fi - if [[ "$TARGET_BOX" == "LINUX32" ]]; then sudo apt-get install -qq gcc-multilib gfortran-multilib; fi
script: make QUIET_MAKE=1 DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32 $BTYPE script:
- set -e
- make QUIET_MAKE=1 DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32 $BTYPE
- if [ "$TARGET_BOX" == "LINUX32" ] || [ "$TARGET_BOX" == "LINUX64" ]; then make -C test DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32 $BTYPE; fi
- if [ "$TARGET_BOX" == "LINUX32" ] || [ "$TARGET_BOX" == "LINUX64" ]; then make -C ctest DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32 $BTYPE; fi
- if [ "$TARGET_BOX" == "LINUX32" ] || [ "$TARGET_BOX" == "LINUX64" ]; then make -C utest DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32 $BTYPE; fi
# whitelist # whitelist
branches: branches:

View File

@ -6,7 +6,7 @@ cmake_minimum_required(VERSION 2.8.4)
project(OpenBLAS) project(OpenBLAS)
set(OpenBLAS_MAJOR_VERSION 0) set(OpenBLAS_MAJOR_VERSION 0)
set(OpenBLAS_MINOR_VERSION 2) set(OpenBLAS_MINOR_VERSION 2)
set(OpenBLAS_PATCH_VERSION 16.dev) set(OpenBLAS_PATCH_VERSION 20.dev)
set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}") set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}")
enable_language(ASM) enable_language(ASM)
@ -30,10 +30,20 @@ set(NO_LAPACK 1)
set(NO_LAPACKE 1) set(NO_LAPACKE 1)
endif() endif()
if(BUILD_DEBUG) if(CMAKE_CONFIGURATION_TYPES) # multiconfig generator?
set(CMAKE_BUILD_TYPE Debug) set(CMAKE_CONFIGURATION_TYPES "Debug;Release" CACHE STRING "" FORCE)
set(CMAKE_BUILD_TYPE
Debug Debug
Release Release
)
else() else()
set(CMAKE_BUILD_TYPE Release) if( NOT CMAKE_BUILD_TYPE )
if(BUILD_DEBUG)
set(CMAKE_BUILD_TYPE Debug)
else()
set(CMAKE_BUILD_TYPE Release)
endif()
endif()
endif() endif()
if(BUILD_WITHOUT_CBLAS) if(BUILD_WITHOUT_CBLAS)
@ -45,8 +55,8 @@ endif()
message(WARNING "CMake support is experimental. This will not produce the same Makefiles that OpenBLAS ships with. Only x86 support is currently available.") message(WARNING "CMake support is experimental. This will not produce the same Makefiles that OpenBLAS ships with. Only x86 support is currently available.")
include("${CMAKE_SOURCE_DIR}/cmake/utils.cmake") include("${PROJECT_SOURCE_DIR}/cmake/utils.cmake")
include("${CMAKE_SOURCE_DIR}/cmake/system.cmake") include("${PROJECT_SOURCE_DIR}/cmake/system.cmake")
set(BLASDIRS interface driver/level2 driver/level3 driver/others) set(BLASDIRS interface driver/level2 driver/level3 driver/others)
@ -54,10 +64,6 @@ if (NOT DYNAMIC_ARCH)
list(APPEND BLASDIRS kernel) list(APPEND BLASDIRS kernel)
endif () endif ()
if (DEFINED UTEST_CHECK)
set(SANITY_CHECK 1)
endif ()
if (DEFINED SANITY_CHECK) if (DEFINED SANITY_CHECK)
list(APPEND BLASDIRS reference) list(APPEND BLASDIRS reference)
endif () endif ()
@ -110,6 +116,10 @@ if (${NO_STATIC} AND ${NO_SHARED})
message(FATAL_ERROR "Neither static nor shared are enabled.") message(FATAL_ERROR "Neither static nor shared are enabled.")
endif () endif ()
#Set default output directory
set( CMAKE_LIBRARY_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib )
set( CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib )
# get obj vars into format that add_library likes: $<TARGET_OBJS:objlib> (see http://www.cmake.org/cmake/help/v3.0/command/add_library.html) # get obj vars into format that add_library likes: $<TARGET_OBJS:objlib> (see http://www.cmake.org/cmake/help/v3.0/command/add_library.html)
set(TARGET_OBJS "") set(TARGET_OBJS "")
foreach (SUBDIR ${SUBDIRS}) foreach (SUBDIR ${SUBDIRS})
@ -123,9 +133,9 @@ endforeach ()
# Can't just use lapack-netlib's CMake files, since they are set up to search for BLAS, build and install a binary. We just want to build a couple of lib files out of lapack and lapacke. # Can't just use lapack-netlib's CMake files, since they are set up to search for BLAS, build and install a binary. We just want to build a couple of lib files out of lapack and lapacke.
# Not using add_subdirectory here because lapack-netlib already has its own CMakeLists.txt. Instead include a cmake script with the sources we want. # Not using add_subdirectory here because lapack-netlib already has its own CMakeLists.txt. Instead include a cmake script with the sources we want.
if (NOT NOFORTRAN AND NOT NO_LAPACK) if (NOT NOFORTRAN AND NOT NO_LAPACK)
include("${CMAKE_SOURCE_DIR}/cmake/lapack.cmake") include("${PROJECT_SOURCE_DIR}/cmake/lapack.cmake")
if (NOT NO_LAPACKE) if (NOT NO_LAPACKE)
include("${CMAKE_SOURCE_DIR}/cmake/lapacke.cmake") include("${PROJECT_SOURCE_DIR}/cmake/lapacke.cmake")
endif () endif ()
endif () endif ()
@ -137,22 +147,36 @@ endif()
# add objects to the openblas lib # add objects to the openblas lib
add_library(${OpenBLAS_LIBNAME} SHARED ${LA_SOURCES} ${LAPACKE_SOURCES} ${TARGET_OBJS} ${OpenBLAS_DEF_FILE}) add_library(${OpenBLAS_LIBNAME} SHARED ${LA_SOURCES} ${LAPACKE_SOURCES} ${TARGET_OBJS} ${OpenBLAS_DEF_FILE})
include("${CMAKE_SOURCE_DIR}/cmake/export.cmake") include("${PROJECT_SOURCE_DIR}/cmake/export.cmake")
# Set output for libopenblas
set_target_properties( ${OpenBLAS_LIBNAME} PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib)
set_target_properties( ${OpenBLAS_LIBNAME} PROPERTIES LIBRARY_OUTPUT_NAME_DEBUG "${OpenBLAS_LIBNAME}_d")
foreach (OUTPUTCONFIG ${CMAKE_CONFIGURATION_TYPES})
string( TOUPPER ${OUTPUTCONFIG} OUTPUTCONFIG )
set_target_properties( ${OpenBLAS_LIBNAME} PROPERTIES RUNTIME_OUTPUT_DIRECTORY_${OUTPUTCONFIG} ${PROJECT_BINARY_DIR}/lib)
set_target_properties( ${OpenBLAS_LIBNAME} PROPERTIES LIBRARY_OUTPUT_DIRECTORY_${OUTPUTCONFIG} ${PROJECT_BINARY_DIR}/lib)
set_target_properties( ${OpenBLAS_LIBNAME} PROPERTIES ARCHIVE_OUTPUT_DIRECTORY_${OUTPUTCONFIG} ${PROJECT_BINARY_DIR}/lib)
endforeach()
enable_testing()
add_subdirectory(utest)
if(NOT MSVC) if(NOT MSVC)
#only build shared library for MSVC #only build shared library for MSVC
add_library(${OpenBLAS_LIBNAME}_static STATIC ${LA_SOURCES} ${LAPACKE_SOURCES} ${TARGET_OBJS})
set_target_properties(${OpenBLAS_LIBNAME}_static PROPERTIES OUTPUT_NAME ${OpenBLAS_LIBNAME})
set_target_properties(${OpenBLAS_LIBNAME}_static PROPERTIES CLEAN_DIRECT_OUTPUT 1)
if(SMP) add_library(${OpenBLAS_LIBNAME}_static STATIC ${LA_SOURCES} ${LAPACKE_SOURCES} ${TARGET_OBJS})
target_link_libraries(${OpenBLAS_LIBNAME} pthread) set_target_properties(${OpenBLAS_LIBNAME}_static PROPERTIES OUTPUT_NAME ${OpenBLAS_LIBNAME})
target_link_libraries(${OpenBLAS_LIBNAME}_static pthread) set_target_properties(${OpenBLAS_LIBNAME}_static PROPERTIES CLEAN_DIRECT_OUTPUT 1)
if(SMP)
target_link_libraries(${OpenBLAS_LIBNAME} pthread)
target_link_libraries(${OpenBLAS_LIBNAME}_static pthread)
endif() endif()
#build test and ctest #build test and ctest
enable_testing()
add_subdirectory(test) add_subdirectory(test)
if(NOT NO_CBLAS) if(NOT NO_CBLAS)
add_subdirectory(ctest) add_subdirectory(ctest)
@ -188,3 +212,27 @@ set_target_properties(${OpenBLAS_LIBNAME} PROPERTIES
#endif #endif
# @touch lib.grd # @touch lib.grd
# Install project
# Install libraries
install(TARGETS ${OpenBLAS_LIBNAME}
RUNTIME DESTINATION bin
ARCHIVE DESTINATION lib
LIBRARY DESTINATION lib )
# Install include files
FILE(GLOB_RECURSE INCLUDE_FILES "${CMAKE_CURRENT_SOURCE_DIR}/*.h")
install (FILES ${INCLUDE_FILES} DESTINATION include)
if(NOT MSVC)
install (TARGETS ${OpenBLAS_LIBNAME}_static DESTINATION lib)
endif()
include(FindPkgConfig QUIET)
if(PKG_CONFIG_FOUND)
set(prefix ${CMAKE_INSTALL_PREFIX})
set(libdir ${CMAKE_INSTALL_PREFIX}/lib)
set(includedir ${CMAKE_INSTALL_PREFIX}/include)
configure_file(${PROJECT_SOURCE_DIR}/cmake/openblas.pc.in ${PROJECT_BINARY_DIR}/openblas.pc @ONLY)
install (FILES ${PROJECT_BINARY_DIR}/openblas.pc DESTINATION lib/pkgconfig/)
endif()

View File

@ -121,6 +121,17 @@ In chronological order:
* [2014-10-10] trmm and sgemm kernels (optimized for APM's X-Gene 1). * [2014-10-10] trmm and sgemm kernels (optimized for APM's X-Gene 1).
ARMv8 support. ARMv8 support.
* Jerome Robert <jeromerobert@gmx.com>
* [2015-01-01] Speed-up small `ger` and `gemv` using stack allocation (bug #478)
* [2015-12-23] `stack_check` in `gemv.c` (bug #722)
* [2015-12-28] Allow to force the number of parallel make job
* [2015-12-28] Fix detection of AMD E2-3200 detection
* [2015-12-31] Let `make MAX_STACK_ALLOC=0` do what expected
* [2016-01-19] Disable multi-threading in `ger` and `swap` for small matrices (bug #731)
* [2016-01-24] Use `GEMM_MULTITHREAD_THRESHOLD` as a number of ops (bug #742)
* [2016-01-26] Let `openblas_get_num_threads` return the number of active threads (bug #760)
* [2016-01-30] Speed-up small `zger`, `zgemv`, `ztrmv` using stack allocation (bug #727)
* Dan Kortschak * Dan Kortschak
* [2015-01-07] Added test for drotmg bug #484. * [2015-01-07] Added test for drotmg bug #484.
@ -130,5 +141,29 @@ In chronological order:
* Martin Koehler <https://github.com/grisuthedragon/> * Martin Koehler <https://github.com/grisuthedragon/>
* [2015-09-07] Improved imatcopy * [2015-09-07] Improved imatcopy
* [Your name or handle] <[email or website]> * Ashwin Sekhar T K <https://github.com/ashwinyes/>
* [Date] [Brief summary of your changes] * [2015-11-09] Assembly kernels for Cortex-A57 (ARMv8)
* [2015-11-20] lapack-test fixes for Cortex-A57
* [2016-03-14] Additional functional Assembly Kernels for Cortex-A57
* [2016-03-14] Optimize Dgemm 4x4 for Cortex-A57
* theoractice <https://github.com/theoractice/>
* [2016-03-20] Fix compiler error in VisualStudio with CMake
* [2016-03-22] Fix access violation on Windows while static linking
* Paul Mustière <https://github.com/buffer51/>
* [2016-02-04] Fix Android build on ARMV7
* [2016-04-26] Android build with LAPACK for ARMV7 & ARMV8
* Shivraj Patil <https://github.com/sva-img/>
* [2016-05-03] DGEMM optimization for MIPS P5600 and I6400 using MSA
* Kaustubh Raste <https://github.com/ksraste/>
* [2016-05-09] DTRSM optimization for MIPS P5600 and I6400 using MSA
* [2016-05-20] STRSM optimization for MIPS P5600 and I6400 using MSA
* Abdelrauf <https://github.com/quickwritereader>
* [2017-01-01] dgemm and dtrmm kernels for IBM z13
* [2017-02-26] ztrmm kernel for IBM z13

View File

@ -1,4 +1,99 @@
OpenBLAS ChangeLog OpenBLAS ChangeLog
====================================================================
Version 0.2.19
1-Sep-2016
common:
* Improved cross compiling.
* Fix the bug on musl libc.
POWER:
* Optimize BLAS on Power8
* Fixed Julia+OpenBLAS bugs on Power8
MIPS:
* Optimize BLAS on MIPS P5600 and I6400 (Thanks, Shivraj Patil, Kaustubh Raste)
ARM:
* Improved on ARM Cortex-A57. (Thanks, Ashwin Sekhar T K)
====================================================================
Version 0.2.18
12-Apr-2016
common:
* If you set MAKE_NB_JOBS flag less or equal than zero,
make will be without -j.
x86/x86_64:
* Support building Visual Studio static library. (#813, Thanks, theoractice)
* Fix bugs to pass buidbot CI tests (http://build.openblas.net)
ARM:
* Provide DGEMM 8x4 kernel for Cortex-A57 (Thanks, Ashwin Sekhar T K)
POWER:
* Optimize S and C BLAS3 on Power8
* Optimize BLAS2/1 on Power8
====================================================================
Version 0.2.17
20-Mar-2016
common:
* Enable BUILD_LAPACK_DEPRECATED=1 by default.
====================================================================
Version 0.2.16
15-Mar-2016
common:
* Avoid potential getenv segfault. (#716)
* Import LAPACK svn bugfix #142-#147,#150-#155
x86/x86_64:
* Optimize c/zgemv for AMD Bulldozer, Piledriver, Steamroller
* Fix bug with scipy linalg test.
ARM:
* Improve DGEMM for ARM Cortex-A57. (Thanks, Ashwin Sekhar T K)
POWER:
* Optimize D and Z BLAS3 functions for Power8.
====================================================================
Version 0.2.16.rc1
23-Feb-2016
common:
* Upgrade LAPACK to 3.6.0 version.
Add BUILD_LAPACK_DEPRECATED option in Makefile.rule to build
LAPACK deprecated functions.
* Add MAKE_NB_JOBS option in Makefile.
Force number of make jobs.This is particularly
useful when using distcc. (#735. Thanks, Jerome Robert.)
* Redesign unit test. Run unit/regression test at every build (Travis-CI and Appveyor).
* Disable multi-threading for small size swap and ger. (#744. Thanks, Jerome Robert)
* Improve small zger, zgemv, ztrmv using stack alloction (#727. Thanks, Jerome Robert)
* Let openblas_get_num_threads return the number of active threads.
(#760. Thanks, Jerome Robert)
* Support illumos(OmniOS). (#749. Thanks, Lauri Tirkkonen)
* Fix LAPACK Dormbr, Dormlq bug. (#711, #713. Thanks, Brendan Tracey)
* Update scipy benchmark script. (#745. Thanks, John Kirkham)
x86/x86_64:
* Optimize trsm kernels for AMD Bulldozer, Piledriver, Steamroller.
* Detect Intel Avoton.
* Detect AMD Trinity, Richland, E2-3200.
* Fix gemv performance bug on Mac OSX Intel Haswell.
* Fix some bugs with CMake and Visual Studio
ARM:
* Support and optimize Cortex-A57 AArch64.
(#686. Thanks, Ashwin Sekhar TK)
* Fix Android build on ARMV7 (#778. Thanks, Paul Mustiere)
* Update ARMV6 kernels.
POWER:
* Fix detection of POWER architecture
(#684. Thanks, Sebastien Villemot)
==================================================================== ====================================================================
Version 0.2.15 Version 0.2.15
27-Oct-2015 27-Oct-2015

View File

@ -7,10 +7,6 @@ ifneq ($(DYNAMIC_ARCH), 1)
BLASDIRS += kernel BLASDIRS += kernel
endif endif
ifdef UTEST_CHECK
SANITY_CHECK = 1
endif
ifdef SANITY_CHECK ifdef SANITY_CHECK
BLASDIRS += reference BLASDIRS += reference
endif endif
@ -85,22 +81,22 @@ endif
shared : shared :
ifndef NO_SHARED ifndef NO_SHARED
ifeq ($(OSNAME), Linux) ifeq ($(OSNAME), $(filter $(OSNAME),Linux SunOS))
@$(MAKE) -C exports so @$(MAKE) -C exports so
@-ln -fs $(LIBSONAME) $(LIBPREFIX).so @ln -fs $(LIBSONAME) $(LIBPREFIX).so
@-ln -fs $(LIBSONAME) $(LIBPREFIX).so.$(MAJOR_VERSION) @ln -fs $(LIBSONAME) $(LIBPREFIX).so.$(MAJOR_VERSION)
endif endif
ifeq ($(OSNAME), FreeBSD) ifeq ($(OSNAME), FreeBSD)
@$(MAKE) -C exports so @$(MAKE) -C exports so
@-ln -fs $(LIBSONAME) $(LIBPREFIX).so @ln -fs $(LIBSONAME) $(LIBPREFIX).so
endif endif
ifeq ($(OSNAME), NetBSD) ifeq ($(OSNAME), NetBSD)
@$(MAKE) -C exports so @$(MAKE) -C exports so
@-ln -fs $(LIBSONAME) $(LIBPREFIX).so @ln -fs $(LIBSONAME) $(LIBPREFIX).so
endif endif
ifeq ($(OSNAME), Darwin) ifeq ($(OSNAME), Darwin)
@$(MAKE) -C exports dyn @$(MAKE) -C exports dyn
@-ln -fs $(LIBDYNNAME) $(LIBPREFIX).dylib @ln -fs $(LIBDYNNAME) $(LIBPREFIX).dylib
endif endif
ifeq ($(OSNAME), WINNT) ifeq ($(OSNAME), WINNT)
@$(MAKE) -C exports dll @$(MAKE) -C exports dll
@ -112,21 +108,15 @@ endif
tests : tests :
ifndef NOFORTRAN ifndef NOFORTRAN
ifndef TARGET
ifndef CROSS
touch $(LIBNAME) touch $(LIBNAME)
ifndef NO_FBLAS ifndef NO_FBLAS
$(MAKE) -C test all $(MAKE) -C test all
ifdef UTEST_CHECK
$(MAKE) -C utest all $(MAKE) -C utest all
endif endif
endif
ifndef NO_CBLAS ifndef NO_CBLAS
$(MAKE) -C ctest all $(MAKE) -C ctest all
endif endif
endif endif
endif
endif
libs : libs :
ifeq ($(CORE), UNKOWN) ifeq ($(CORE), UNKOWN)
@ -249,16 +239,23 @@ ifndef NOFORTRAN
-@echo "SUFFIX = $(SUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc -@echo "SUFFIX = $(SUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "PSUFFIX = $(PSUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc -@echo "PSUFFIX = $(PSUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "CEXTRALIB = $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc -@echo "CEXTRALIB = $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc
ifeq ($(FC), gfortran) ifeq ($(F_COMPILER), GFORTRAN)
-@echo "TIMER = INT_ETIME" >> $(NETLIB_LAPACK_DIR)/make.inc -@echo "TIMER = INT_ETIME" >> $(NETLIB_LAPACK_DIR)/make.inc
ifdef SMP ifdef SMP
ifeq ($(OSNAME), WINNT)
-@echo "LOADER = $(FC)" >> $(NETLIB_LAPACK_DIR)/make.inc
else
-@echo "LOADER = $(FC) -pthread" >> $(NETLIB_LAPACK_DIR)/make.inc -@echo "LOADER = $(FC) -pthread" >> $(NETLIB_LAPACK_DIR)/make.inc
endif
else else
-@echo "LOADER = $(FC)" >> $(NETLIB_LAPACK_DIR)/make.inc -@echo "LOADER = $(FC)" >> $(NETLIB_LAPACK_DIR)/make.inc
endif endif
else else
-@echo "TIMER = NONE" >> $(NETLIB_LAPACK_DIR)/make.inc -@echo "TIMER = NONE" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "LOADER = $(FC)" >> $(NETLIB_LAPACK_DIR)/make.inc -@echo "LOADER = $(FC)" >> $(NETLIB_LAPACK_DIR)/make.inc
endif
ifeq ($(BUILD_LAPACK_DEPRECATED), 1)
-@echo "BUILD_DEPRECATED = 1" >> $(NETLIB_LAPACK_DIR)/make.inc
endif endif
-@cat make.inc >> $(NETLIB_LAPACK_DIR)/make.inc -@cat make.inc >> $(NETLIB_LAPACK_DIR)/make.inc
endif endif
@ -281,18 +278,28 @@ lapack-timing : large.tgz timing.tgz
ifndef NOFORTRAN ifndef NOFORTRAN
(cd $(NETLIB_LAPACK_DIR); $(TAR) zxf ../timing.tgz TIMING) (cd $(NETLIB_LAPACK_DIR); $(TAR) zxf ../timing.tgz TIMING)
(cd $(NETLIB_LAPACK_DIR)/TIMING; $(TAR) zxf ../../large.tgz ) (cd $(NETLIB_LAPACK_DIR)/TIMING; $(TAR) zxf ../../large.tgz )
make -C $(NETLIB_LAPACK_DIR)/TIMING $(MAKE) -C $(NETLIB_LAPACK_DIR)/TIMING
endif endif
lapack-test : lapack-test :
(cd $(NETLIB_LAPACK_DIR)/TESTING && rm -f x* *.out) (cd $(NETLIB_LAPACK_DIR)/TESTING && rm -f x* *.out)
make -j 1 -C $(NETLIB_LAPACK_DIR)/TESTING xeigtstc xeigtstd xeigtsts xeigtstz xlintstc xlintstd xlintstds xlintstrfd xlintstrfz xlintsts xlintstz xlintstzc xlintstrfs xlintstrfc $(MAKE) -j 1 -C $(NETLIB_LAPACK_DIR)/TESTING xeigtstc xeigtstd xeigtsts xeigtstz xlintstc xlintstd xlintstds xlintstrfd xlintstrfz xlintsts xlintstz xlintstzc xlintstrfs xlintstrfc
ifneq ($(CROSS), 1)
( cd $(NETLIB_LAPACK_DIR)/INSTALL; ./testlsame; ./testslamch; ./testdlamch; \
./testsecond; ./testdsecnd; ./testieee; ./testversion )
(cd $(NETLIB_LAPACK_DIR); ./lapack_testing.py -r ) (cd $(NETLIB_LAPACK_DIR); ./lapack_testing.py -r )
endif
lapack-runtest:
( cd $(NETLIB_LAPACK_DIR)/INSTALL; ./testlsame; ./testslamch; ./testdlamch; \
./testsecond; ./testdsecnd; ./testieee; ./testversion )
(cd $(NETLIB_LAPACK_DIR); ./lapack_testing.py -r )
blas-test: blas-test:
(cd $(NETLIB_LAPACK_DIR)/BLAS && rm -f x* *.out) (cd $(NETLIB_LAPACK_DIR)/BLAS && rm -f x* *.out)
make -j 1 -C $(NETLIB_LAPACK_DIR) blas_testing $(MAKE) -j 1 -C $(NETLIB_LAPACK_DIR) blas_testing
(cd $(NETLIB_LAPACK_DIR)/BLAS && cat *.out) (cd $(NETLIB_LAPACK_DIR)/BLAS && cat *.out)

View File

@ -1,4 +1,4 @@
# ifeq logical or ifeq logical or
ifeq ($(CORE), $(filter $(CORE),CORTEXA9 CORTEXA15)) ifeq ($(CORE), $(filter $(CORE),CORTEXA9 CORTEXA15))
ifeq ($(OSNAME), Android) ifeq ($(OSNAME), Android)
CCOMMON_OPT += -mfpu=neon -march=armv7-a CCOMMON_OPT += -mfpu=neon -march=armv7-a
@ -11,9 +11,14 @@ endif
ifeq ($(CORE), ARMV7) ifeq ($(CORE), ARMV7)
ifeq ($(OSNAME), Android) ifeq ($(OSNAME), Android)
ifeq ($(ARM_SOFTFP), 1)
CCOMMON_OPT += -mfpu=neon -march=armv7-a CCOMMON_OPT += -mfpu=neon -march=armv7-a
FCOMMON_OPT += -mfpu=neon -march=armv7-a FCOMMON_OPT += -mfpu=neon -march=armv7-a
else else
CCOMMON_OPT += -marm -mfpu=neon -mfloat-abi=hard -march=armv7-a -Wl,--no-warn-mismatch
FCOMMON_OPT += -marm -mfpu=neon -mfloat-abi=hard -march=armv7-a -Wl,--no-warn-mismatch
endif
else
CCOMMON_OPT += -mfpu=vfpv3 -march=armv7-a CCOMMON_OPT += -mfpu=vfpv3 -march=armv7-a
FCOMMON_OPT += -mfpu=vfpv3 -march=armv7-a FCOMMON_OPT += -mfpu=vfpv3 -march=armv7-a
endif endif
@ -29,5 +34,3 @@ ifeq ($(CORE), ARMV5)
CCOMMON_OPT += -marm -march=armv5 CCOMMON_OPT += -marm -march=armv5
FCOMMON_OPT += -marm -march=armv5 FCOMMON_OPT += -marm -march=armv5
endif endif

View File

@ -9,3 +9,17 @@ CCOMMON_OPT += -march=armv8-a+crc+crypto+fp+simd -mtune=cortex-a57
FCOMMON_OPT += -march=armv8-a+crc+crypto+fp+simd -mtune=cortex-a57 FCOMMON_OPT += -march=armv8-a+crc+crypto+fp+simd -mtune=cortex-a57
endif endif
ifeq ($(CORE), VULCAN)
CCOMMON_OPT += -mtune=vulcan -mcpu=vulcan
FCOMMON_OPT += -mtune=vulcan -mcpu=vulcan
endif
ifeq ($(CORE), THUNDERX)
CCOMMON_OPT += -mtune=thunderx -mcpu=thunderx
FCOMMON_OPT += -mtune=thunderx -mcpu=thunderx
endif
ifeq ($(CORE), THUNDERX2T99)
CCOMMON_OPT += -mtune=vulcan -mcpu=vulcan
FCOMMON_OPT += -mtune=vulcan -mcpu=vulcan
endif

View File

@ -12,6 +12,7 @@ OPENBLAS_BUILD_DIR := $(CURDIR)
OPENBLAS_CMAKE_DIR := $(OPENBLAS_LIBRARY_DIR)/cmake/openblas OPENBLAS_CMAKE_DIR := $(OPENBLAS_LIBRARY_DIR)/cmake/openblas
OPENBLAS_CMAKE_CONFIG := OpenBLASConfig.cmake OPENBLAS_CMAKE_CONFIG := OpenBLASConfig.cmake
OPENBLAS_CMAKE_CONFIG_VERSION := OpenBLASConfigVersion.cmake OPENBLAS_CMAKE_CONFIG_VERSION := OpenBLASConfigVersion.cmake
OPENBLAS_PKGCONFIG_DIR := $(OPENBLAS_LIBRARY_DIR)/pkgconfig
.PHONY : install .PHONY : install
.NOTPARALLEL : install .NOTPARALLEL : install
@ -20,110 +21,122 @@ lib.grd :
$(error OpenBLAS: Please run "make" firstly) $(error OpenBLAS: Please run "make" firstly)
install : lib.grd install : lib.grd
@-mkdir -p $(DESTDIR)$(PREFIX) @-mkdir -p "$(DESTDIR)$(PREFIX)"
@-mkdir -p $(DESTDIR)$(OPENBLAS_INCLUDE_DIR) @-mkdir -p "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)"
@-mkdir -p $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) @-mkdir -p "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
@-mkdir -p $(DESTDIR)$(OPENBLAS_BINARY_DIR) @-mkdir -p "$(DESTDIR)$(OPENBLAS_BINARY_DIR)"
@-mkdir -p $(DESTDIR)$(OPENBLAS_CMAKE_DIR) @-mkdir -p "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)"
@-mkdir -p "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)"
@echo Generating openblas_config.h in $(DESTDIR)$(OPENBLAS_INCLUDE_DIR) @echo Generating openblas_config.h in $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)
#for inc #for inc
@echo \#ifndef OPENBLAS_CONFIG_H > $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h @echo \#ifndef OPENBLAS_CONFIG_H > "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h"
@echo \#define OPENBLAS_CONFIG_H >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h @echo \#define OPENBLAS_CONFIG_H >> "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h"
@awk 'NF {print $$1, "OPENBLAS_"$$2, $$3}' config_last.h >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h @$(AWK) 'NF {print $$1, "OPENBLAS_"$$2, $$3}' config_last.h >> "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h"
@echo \#define OPENBLAS_VERSION \" OpenBLAS $(VERSION) \" >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h @echo \#define OPENBLAS_VERSION \" OpenBLAS $(VERSION) \" >> "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h"
@cat openblas_config_template.h >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h @cat openblas_config_template.h >> "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h"
@echo \#endif \/\* OPENBLAS_CONFIG_H \*\/ >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h @echo \#endif \/\* OPENBLAS_CONFIG_H \*\/ >> "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h"
@echo Generating f77blas.h in $(DESTDIR)$(OPENBLAS_INCLUDE_DIR) @echo Generating f77blas.h in $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)
@echo \#ifndef OPENBLAS_F77BLAS_H > $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/f77blas.h @echo \#ifndef OPENBLAS_F77BLAS_H > "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/f77blas.h"
@echo \#define OPENBLAS_F77BLAS_H >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/f77blas.h @echo \#define OPENBLAS_F77BLAS_H >> "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/f77blas.h"
@echo \#include \"openblas_config.h\" >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/f77blas.h @echo \#include \"openblas_config.h\" >> "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/f77blas.h"
@cat common_interface.h >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/f77blas.h @cat common_interface.h >> "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/f77blas.h"
@echo \#endif >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/f77blas.h @echo \#endif >> "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/f77blas.h"
ifndef NO_CBLAS ifndef NO_CBLAS
@echo Generating cblas.h in $(DESTDIR)$(OPENBLAS_INCLUDE_DIR) @echo Generating cblas.h in $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)
@sed 's/common/openblas_config/g' cblas.h > $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/cblas.h @sed 's/common/openblas_config/g' cblas.h > "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/cblas.h"
endif endif
ifndef NO_LAPACKE ifndef NO_LAPACKE
@echo Copying LAPACKE header files to $(DESTDIR)$(OPENBLAS_INCLUDE_DIR) @echo Copying LAPACKE header files to $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)
@-install -pm644 $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke.h @-install -pm644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke.h"
@-install -pm644 $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke_config.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_config.h @-install -pm644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke_config.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_config.h"
@-install -pm644 $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke_mangling_with_flags.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_mangling.h @-install -pm644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke_mangling_with_flags.h.in "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_mangling.h"
@-install -pm644 $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke_utils.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_utils.h @-install -pm644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke_utils.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_utils.h"
endif endif
#for install static library #for install static library
ifndef NO_STATIC ifndef NO_STATIC
@echo Copying the static library to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) @echo Copying the static library to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
@install -pm644 $(LIBNAME) $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) @install -pm644 $(LIBNAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
@cd $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) ; \ @cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \
ln -fs $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX) ln -fs $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX)
endif endif
#for install shared library #for install shared library
ifndef NO_SHARED ifndef NO_SHARED
@echo Copying the shared library to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) @echo Copying the shared library to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
ifeq ($(OSNAME), Linux) ifeq ($(OSNAME), $(filter $(OSNAME),Linux SunOS))
@install -pm755 $(LIBSONAME) $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) @install -pm755 $(LIBSONAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
@cd $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) ; \ @cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \
ln -fs $(LIBSONAME) $(LIBPREFIX).so ; \ ln -fs $(LIBSONAME) $(LIBPREFIX).so ; \
ln -fs $(LIBSONAME) $(LIBPREFIX).so.$(MAJOR_VERSION) ln -fs $(LIBSONAME) $(LIBPREFIX).so.$(MAJOR_VERSION)
endif endif
ifeq ($(OSNAME), FreeBSD) ifeq ($(OSNAME), FreeBSD)
@cp $(LIBSONAME) $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) @cp $(LIBSONAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
@cd $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) ; \ @cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \
ln -fs $(LIBSONAME) $(LIBPREFIX).so ln -fs $(LIBSONAME) $(LIBPREFIX).so
endif endif
ifeq ($(OSNAME), NetBSD) ifeq ($(OSNAME), NetBSD)
@cp $(LIBSONAME) $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) @cp $(LIBSONAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
@cd $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) ; \ @cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \
ln -fs $(LIBSONAME) $(LIBPREFIX).so ln -fs $(LIBSONAME) $(LIBPREFIX).so
endif endif
ifeq ($(OSNAME), Darwin) ifeq ($(OSNAME), Darwin)
@-cp $(LIBDYNNAME) $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) @-cp $(LIBDYNNAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
@-install_name_tool -id $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)/$(LIBDYNNAME) $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)/$(LIBDYNNAME) @-install_name_tool -id "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)/$(LIBDYNNAME)" "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)/$(LIBDYNNAME)"
@cd $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) ; \ @cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \
ln -fs $(LIBDYNNAME) $(LIBPREFIX).dylib ln -fs $(LIBDYNNAME) $(LIBPREFIX).dylib
endif endif
ifeq ($(OSNAME), WINNT) ifeq ($(OSNAME), WINNT)
@-cp $(LIBDLLNAME) $(DESTDIR)$(OPENBLAS_BINARY_DIR) @-cp $(LIBDLLNAME) "$(DESTDIR)$(OPENBLAS_BINARY_DIR)"
@-cp $(LIBDLLNAME).a $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) @-cp $(LIBDLLNAME).a "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
endif endif
ifeq ($(OSNAME), CYGWIN_NT) ifeq ($(OSNAME), CYGWIN_NT)
@-cp $(LIBDLLNAME) $(OPENBLAS_BINARY_DIR) @-cp $(LIBDLLNAME) "$(DESTDIR)$(OPENBLAS_BINARY_DIR)"
endif endif
endif endif
#Generating openblas.pc
@echo Generating openblas.pc in $(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)
@echo 'libdir='$(OPENBLAS_LIBRARY_DIR) >> $(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc
@echo 'includedir='$(OPENBLAS_INCLUDE_DIR) >> $(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc
@echo 'version='$(VERSION) >> $(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc
@echo 'extralib='$(EXTRALIB) >> $(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc
@cat openblas.pc.in >> $(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc
#Generating OpenBLASConfig.cmake #Generating OpenBLASConfig.cmake
@echo Generating $(OPENBLAS_CMAKE_CONFIG) in $(DESTDIR)$(OPENBLAS_CMAKE_DIR) @echo Generating $(OPENBLAS_CMAKE_CONFIG) in $(DESTDIR)$(OPENBLAS_CMAKE_DIR)
@echo "SET(OpenBLAS_VERSION \"${VERSION}\")" > $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG) @echo "SET(OpenBLAS_VERSION \"${VERSION}\")" > "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)"
@echo "SET(OpenBLAS_INCLUDE_DIRS ${OPENBLAS_INCLUDE_DIR})" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG) @echo "SET(OpenBLAS_INCLUDE_DIRS ${OPENBLAS_INCLUDE_DIR})" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)"
ifndef NO_SHARED ifndef NO_SHARED
#ifeq logical or #ifeq logical or
ifeq ($(OSNAME), $(filter $(OSNAME),Linux FreeBSD NetBSD)) ifeq ($(OSNAME), $(filter $(OSNAME),Linux FreeBSD NetBSD))
@echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_LIBRARY_DIR}/$(LIBPREFIX).so)" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG) @echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_LIBRARY_DIR}/$(LIBPREFIX).so)" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)"
endif endif
ifeq ($(OSNAME), $(filter $(OSNAME),WINNT CYGWIN_NT)) ifeq ($(OSNAME), $(filter $(OSNAME),WINNT CYGWIN_NT))
@echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_BINARY_DIR}/$(LIBDLLNAME))" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG) @echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_BINARY_DIR}/$(LIBDLLNAME))" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)"
endif endif
ifeq ($(OSNAME), Darwin) ifeq ($(OSNAME), Darwin)
@echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_LIBRARY_DIR}/$(LIBPREFIX).dylib)" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG) @echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_LIBRARY_DIR}/$(LIBPREFIX).dylib)" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)"
endif endif
else else
#only static #only static
@echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_LIBRARY_DIR}/$(LIBPREFIX).$(LIBSUFFIX))" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG) @echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_LIBRARY_DIR}/$(LIBPREFIX).$(LIBSUFFIX))" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)"
endif endif
#Generating OpenBLASConfigVersion.cmake #Generating OpenBLASConfigVersion.cmake
@echo Generating $(OPENBLAS_CMAKE_CONFIG_VERSION) in $(DESTDIR)$(OPENBLAS_CMAKE_DIR) @echo Generating $(OPENBLAS_CMAKE_CONFIG_VERSION) in $(DESTDIR)$(OPENBLAS_CMAKE_DIR)
@echo "set (PACKAGE_VERSION \"${VERSION}\")" > $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION) @echo "set (PACKAGE_VERSION \"${VERSION}\")" > "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)"
@echo "if (PACKAGE_VERSION VERSION_LESS PACKAGE_FIND_VERSION)" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION) @echo "if (PACKAGE_VERSION VERSION_LESS PACKAGE_FIND_VERSION)" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)"
@echo " set (PACKAGE_VERSION_COMPATIBLE FALSE)" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION) @echo " set (PACKAGE_VERSION_COMPATIBLE FALSE)" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)"
@echo "else ()" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION) @echo "else ()" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)"
@echo " set (PACKAGE_VERSION_COMPATIBLE TRUE)" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION) @echo " set (PACKAGE_VERSION_COMPATIBLE TRUE)" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)"
@echo " if (PACKAGE_FIND_VERSION STREQUAL PACKAGE_VERSION)" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION) @echo " if (PACKAGE_FIND_VERSION STREQUAL PACKAGE_VERSION)" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)"
@echo " set (PACKAGE_VERSION_EXACT TRUE)" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION) @echo " set (PACKAGE_VERSION_EXACT TRUE)" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)"
@echo " endif ()" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION) @echo " endif ()" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)"
@echo "endif ()" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION) @echo "endif ()" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)"
@echo Install OK! @echo Install OK!

3
Makefile.mips Normal file
View File

@ -0,0 +1,3 @@
ifdef BINARY64
else
endif

View File

@ -1,4 +1,26 @@
# CCOMMON_OPT += -DALLOC_SHM
ifdef USE_THREAD
ifeq ($(USE_THREAD), 0)
USE_OPENMP = 0
else
USE_OPENMP = 1
endif
else
USE_OPENMP = 1
endif
ifeq ($(CORE), POWER8)
ifeq ($(USE_OPENMP), 1)
COMMON_OPT += -Ofast -mcpu=power8 -mtune=power8 -mvsx -malign-power -DUSE_OPENMP -fno-fast-math -fopenmp
FCOMMON_OPT += -O2 -frecursive -mcpu=power8 -mtune=power8 -malign-power -DUSE_OPENMP -fno-fast-math -fopenmp
else
COMMON_OPT += -Ofast -mcpu=power8 -mtune=power8 -mvsx -malign-power -fno-fast-math
FCOMMON_OPT += -O2 -frecursive -mcpu=power8 -mtune=power8 -malign-power -fno-fast-math
endif
endif
FLAMEPATH = $(HOME)/flame/lib FLAMEPATH = $(HOME)/flame/lib
@ -16,6 +38,16 @@ else
endif endif
endif endif
#Either uncomment below line or run make with `USE_MASS=1` to enable support of MASS library
#USE_MASS = 1
ifeq ($(USE_MASS), 1)
# Path to MASS libs, change it if the libs are installed at any other location
MASSPATH = /opt/ibm/xlmass/8.1.3/lib
COMMON_OPT += -mveclibabi=mass -ftree-vectorize -funsafe-math-optimizations -DUSE_MASS
EXTRALIB += -L$(MASSPATH) -lmass -lmassvp8 -lmass_simdp8
endif
ifdef BINARY64 ifdef BINARY64

View File

@ -17,14 +17,26 @@ ifdef CPUIDEMU
EXFLAGS = -DCPUIDEMU -DVENDOR=99 EXFLAGS = -DCPUIDEMU -DVENDOR=99
endif endif
ifeq ($(TARGET), P5600)
TARGET_FLAGS = -mips32r5
endif
ifeq ($(TARGET), I6400)
TARGET_FLAGS = -mips64r6
endif
ifeq ($(TARGET), P6600)
TARGET_FLAGS = -mips64r6
endif
all: getarch_2nd all: getarch_2nd
./getarch_2nd 0 >> $(TARGET_MAKE) ./getarch_2nd 0 >> $(TARGET_MAKE)
./getarch_2nd 1 >> $(TARGET_CONF) ./getarch_2nd 1 >> $(TARGET_CONF)
config.h : c_check f_check getarch config.h : c_check f_check getarch
perl ./c_check $(TARGET_MAKE) $(TARGET_CONF) $(CC) perl ./c_check $(TARGET_MAKE) $(TARGET_CONF) $(CC) $(TARGET_FLAGS)
ifneq ($(ONLY_CBLAS), 1) ifneq ($(ONLY_CBLAS), 1)
perl ./f_check $(TARGET_MAKE) $(TARGET_CONF) $(FC) perl ./f_check $(TARGET_MAKE) $(TARGET_CONF) $(FC) $(TARGET_FLAGS)
else else
#When we only build CBLAS, we set NOFORTRAN=2 #When we only build CBLAS, we set NOFORTRAN=2
echo "NOFORTRAN=2" >> $(TARGET_MAKE) echo "NOFORTRAN=2" >> $(TARGET_MAKE)

View File

@ -3,7 +3,7 @@
# #
# This library's version # This library's version
VERSION = 0.2.16.dev VERSION = 0.2.20.dev
# If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a # If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a
# and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library # and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library
@ -52,6 +52,7 @@ VERSION = 0.2.16.dev
# USE_THREAD = 0 # USE_THREAD = 0
# If you're going to use this library with OpenMP, please comment it in. # If you're going to use this library with OpenMP, please comment it in.
# This flag is always set for POWER8. Don't modify the flag
# USE_OPENMP = 1 # USE_OPENMP = 1
# You can define maximum number of threads. Basically it should be # You can define maximum number of threads. Basically it should be
@ -79,6 +80,9 @@ VERSION = 0.2.16.dev
# If you don't need LAPACKE (C Interface to LAPACK), please comment it in. # If you don't need LAPACKE (C Interface to LAPACK), please comment it in.
# NO_LAPACKE = 1 # NO_LAPACKE = 1
# Build LAPACK Deprecated functions since LAPACK 3.6.0
BUILD_LAPACK_DEPRECATED = 1
# If you want to use legacy threaded Level 3 implementation. # If you want to use legacy threaded Level 3 implementation.
# USE_SIMPLE_THREADED_LEVEL3 = 1 # USE_SIMPLE_THREADED_LEVEL3 = 1
@ -108,6 +112,13 @@ NO_AFFINITY = 1
# Don't use parallel make. # Don't use parallel make.
# NO_PARALLEL_MAKE = 1 # NO_PARALLEL_MAKE = 1
# Force number of make jobs. The default is the number of logical CPU of the host.
# This is particularly useful when using distcc.
# A negative value will disable adding a -j flag to make, allowing to use a parent
# make -j value. This is useful to call OpenBLAS make from an other project
# makefile
# MAKE_NB_JOBS = 2
# If you would like to know minute performance report of GotoBLAS. # If you would like to know minute performance report of GotoBLAS.
# FUNCTION_PROFILE = 1 # FUNCTION_PROFILE = 1
@ -138,19 +149,17 @@ NO_AFFINITY = 1
# slow (Not implemented yet). # slow (Not implemented yet).
# SANITY_CHECK = 1 # SANITY_CHECK = 1
# Run testcases in utest/ . When you enable UTEST_CHECK, it would enable
# SANITY_CHECK to compare the result with reference BLAS.
# UTEST_CHECK = 1
# The installation directory. # The installation directory.
# PREFIX = /opt/OpenBLAS # PREFIX = /opt/OpenBLAS
# Common Optimization Flag; # Common Optimization Flag;
# The default -O2 is enough. # The default -O2 is enough.
# Flags for POWER8 are defined in Makefile.power. Don't modify COMMON_OPT
# COMMON_OPT = -O2 # COMMON_OPT = -O2
# gfortran option for LAPACK # gfortran option for LAPACK
# enable this flag only on 64bit Linux and if you need a thread safe lapack library # enable this flag only on 64bit Linux and if you need a thread safe lapack library
# Flags for POWER8 are defined in Makefile.power. Don't modify FCOMMON_OPT
# FCOMMON_OPT = -frecursive # FCOMMON_OPT = -frecursive
# Profiling flags # Profiling flags
@ -159,10 +168,11 @@ COMMON_PROF = -pg
# Build Debug version # Build Debug version
# DEBUG = 1 # DEBUG = 1
# Improve GEMV and GER for small matrices by stack allocation. # Set maximum stack allocation.
# For details, https://github.com/xianyi/OpenBLAS/pull/482 # The default value is 2048. 0 disable stack allocation a may reduce GER and GEMV
# performance. For details, https://github.com/xianyi/OpenBLAS/pull/482
# #
MAX_STACK_ALLOC=2048 # MAX_STACK_ALLOC = 0
# Add a prefix or suffix to all exported symbol names in the shared library. # Add a prefix or suffix to all exported symbol names in the shared library.
# Avoid conflicts with other BLAS libraries, especially when using # Avoid conflicts with other BLAS libraries, especially when using

View File

@ -139,6 +139,10 @@ NO_PARALLEL_MAKE=0
endif endif
GETARCH_FLAGS += -DNO_PARALLEL_MAKE=$(NO_PARALLEL_MAKE) GETARCH_FLAGS += -DNO_PARALLEL_MAKE=$(NO_PARALLEL_MAKE)
ifdef MAKE_NB_JOBS
GETARCH_FLAGS += -DMAKE_NB_JOBS=$(MAKE_NB_JOBS)
endif
ifeq ($(HOSTCC), loongcc) ifeq ($(HOSTCC), loongcc)
GETARCH_FLAGS += -static GETARCH_FLAGS += -static
endif endif
@ -155,7 +159,7 @@ ifndef GOTOBLAS_MAKEFILE
export GOTOBLAS_MAKEFILE = 1 export GOTOBLAS_MAKEFILE = 1
# Generating Makefile.conf and config.h # Generating Makefile.conf and config.h
DUMMY := $(shell $(MAKE) -C $(TOPDIR) -f Makefile.prebuild CC="$(CC)" FC="$(FC)" HOSTCC="$(HOSTCC)" CFLAGS="$(GETARCH_FLAGS)" BINARY=$(BINARY) USE_OPENMP=$(USE_OPENMP) TARGET_CORE=$(TARGET_CORE) ONLY_CBLAS=$(ONLY_CBLAS) all) DUMMY := $(shell $(MAKE) -C $(TOPDIR) -f Makefile.prebuild CC="$(CC)" FC="$(FC)" HOSTCC="$(HOSTCC)" CFLAGS="$(GETARCH_FLAGS)" BINARY=$(BINARY) USE_OPENMP=$(USE_OPENMP) TARGET_CORE=$(TARGET_CORE) ONLY_CBLAS=$(ONLY_CBLAS) TARGET=$(TARGET) all)
ifndef TARGET_CORE ifndef TARGET_CORE
include $(TOPDIR)/Makefile.conf include $(TOPDIR)/Makefile.conf
@ -213,7 +217,9 @@ endif
# #
ifeq ($(OSNAME), Darwin) ifeq ($(OSNAME), Darwin)
ifndef MACOSX_DEPLOYMENT_TARGET
export MACOSX_DEPLOYMENT_TARGET=10.6 export MACOSX_DEPLOYMENT_TARGET=10.6
endif
MD5SUM = md5 -r MD5SUM = md5 -r
endif endif
@ -292,12 +298,14 @@ endif
ifneq ($(OSNAME), WINNT) ifneq ($(OSNAME), WINNT)
ifneq ($(OSNAME), CYGWIN_NT) ifneq ($(OSNAME), CYGWIN_NT)
ifneq ($(OSNAME), Interix) ifneq ($(OSNAME), Interix)
ifneq ($(OSNAME), Android)
ifdef SMP ifdef SMP
EXTRALIB += -lpthread EXTRALIB += -lpthread
endif endif
endif endif
endif endif
endif endif
endif
# ifeq logical or # ifeq logical or
ifeq ($(OSNAME), $(filter $(OSNAME),WINNT CYGWIN_NT Interix)) ifeq ($(OSNAME), $(filter $(OSNAME),WINNT CYGWIN_NT Interix))
@ -324,7 +332,8 @@ ifdef SANITY_CHECK
CCOMMON_OPT += -DSANITY_CHECK -DREFNAME=$(*F)f$(BU) CCOMMON_OPT += -DSANITY_CHECK -DREFNAME=$(*F)f$(BU)
endif endif
ifdef MAX_STACK_ALLOC MAX_STACK_ALLOC ?= 2048
ifneq ($(MAX_STACK_ALLOC), 0)
CCOMMON_OPT += -DMAX_STACK_ALLOC=$(MAX_STACK_ALLOC) CCOMMON_OPT += -DMAX_STACK_ALLOC=$(MAX_STACK_ALLOC)
endif endif
@ -374,7 +383,7 @@ FCOMMON_OPT += -m128bit-long-double
endif endif
ifeq ($(C_COMPILER), CLANG) ifeq ($(C_COMPILER), CLANG)
EXPRECISION = 1 EXPRECISION = 1
CCOMMON_OPT += -DEXPRECISION CCOMMON_OPT += -DEXPRECISION
FCOMMON_OPT += -m128bit-long-double FCOMMON_OPT += -m128bit-long-double
endif endif
endif endif
@ -388,7 +397,7 @@ endif
ifeq ($(USE_OPENMP), 1) ifeq ($(USE_OPENMP), 1)
#check #check
ifeq ($(USE_THREAD), 0) ifeq ($(USE_THREAD), 0)
$(error OpenBLAS: Cannot set both USE_OPENMP=1 and USE_THREAD=0. The USE_THREAD=0 is only for building single thread version.) $(error OpenBLAS: Cannot set both USE_OPENMP=1 and USE_THREAD=0. The USE_THREAD=0 is only for building single thread version.)
endif endif
@ -455,7 +464,7 @@ endif
endif endif
endif endif
ifeq ($(ARCH), mips64) ifeq ($(ARCH), $(filter $(ARCH),mips64 mips))
NO_BINARY_MODE = 1 NO_BINARY_MODE = 1
endif endif
@ -506,13 +515,16 @@ endif
ifdef NO_BINARY_MODE ifdef NO_BINARY_MODE
ifeq ($(ARCH), mips64) ifeq ($(ARCH), $(filter $(ARCH),mips64))
ifdef BINARY64 ifdef BINARY64
CCOMMON_OPT += -mabi=64 CCOMMON_OPT += -mabi=64
else else
CCOMMON_OPT += -mabi=n32 CCOMMON_OPT += -mabi=n32
endif endif
BINARY_DEFINED = 1 BINARY_DEFINED = 1
else ifeq ($(ARCH), $(filter $(ARCH),mips))
CCOMMON_OPT += -mabi=32
BINARY_DEFINED = 1
endif endif
ifeq ($(CORE), LOONGSON3A) ifeq ($(CORE), LOONGSON3A)
@ -525,6 +537,21 @@ CCOMMON_OPT += -march=mips64
FCOMMON_OPT += -march=mips64 FCOMMON_OPT += -march=mips64
endif endif
ifeq ($(CORE), P5600)
CCOMMON_OPT += -mips32r5 -mnan=2008 -mtune=p5600 $(MSA_FLAGS)
FCOMMON_OPT += -mips32r5 -mnan=2008 -mtune=p5600 $(MSA_FLAGS)
endif
ifeq ($(CORE), I6400)
CCOMMON_OPT += -mips64r6 -mnan=2008 -mtune=i6400 $(MSA_FLAGS)
FCOMMON_OPT += -mips64r6 -mnan=2008 -mtune=i6400 $(MSA_FLAGS)
endif
ifeq ($(CORE), P6600)
CCOMMON_OPT += -mips64r6 -mnan=2008 -mtune=p6600 $(MSA_FLAGS)
FCOMMON_OPT += -mips64r6 -mnan=2008 -mtune=p6600 $(MSA_FLAGS)
endif
ifeq ($(OSNAME), AIX) ifeq ($(OSNAME), AIX)
BINARY_DEFINED = 1 BINARY_DEFINED = 1
endif endif
@ -593,12 +620,14 @@ ifneq ($(NO_LAPACK), 1)
EXTRALIB += -lgfortran EXTRALIB += -lgfortran
endif endif
ifdef NO_BINARY_MODE ifdef NO_BINARY_MODE
ifeq ($(ARCH), mips64) ifeq ($(ARCH), $(filter $(ARCH),mips64))
ifdef BINARY64 ifdef BINARY64
FCOMMON_OPT += -mabi=64 FCOMMON_OPT += -mabi=64
else else
FCOMMON_OPT += -mabi=n32 FCOMMON_OPT += -mabi=n32
endif endif
else ifeq ($(ARCH), $(filter $(ARCH),mips))
FCOMMON_OPT += -mabi=32
endif endif
else else
ifdef BINARY64 ifdef BINARY64
@ -681,21 +710,7 @@ FCOMMON_OPT += -i8
endif endif
endif endif
endif endif
ifneq ($(ARCH), mips64)
ifndef BINARY64
FCOMMON_OPT += -m32
else
FCOMMON_OPT += -m64
endif
else
ifdef BINARY64
FCOMMON_OPT += -mabi=64
else
FCOMMON_OPT += -mabi=n32
endif
endif
ifeq ($(USE_OPENMP), 1) ifeq ($(USE_OPENMP), 1)
FCOMMON_OPT += -mp FCOMMON_OPT += -mp
endif endif
@ -711,7 +726,7 @@ endif
endif endif
endif endif
ifeq ($(ARCH), mips64) ifeq ($(ARCH), $(filter $(ARCH),mips64 mips))
ifndef BINARY64 ifndef BINARY64
FCOMMON_OPT += -n32 FCOMMON_OPT += -n32
else else
@ -741,7 +756,7 @@ endif
ifeq ($(C_COMPILER), OPEN64) ifeq ($(C_COMPILER), OPEN64)
ifeq ($(ARCH), mips64) ifeq ($(ARCH), $(filter $(ARCH),mips64 mips))
ifndef BINARY64 ifndef BINARY64
CCOMMON_OPT += -n32 CCOMMON_OPT += -n32
else else
@ -963,17 +978,18 @@ ifeq ($(OSNAME), SunOS)
TAR = gtar TAR = gtar
PATCH = gpatch PATCH = gpatch
GREP = ggrep GREP = ggrep
AWK = nawk
else else
TAR = tar TAR = tar
PATCH = patch PATCH = patch
GREP = grep GREP = grep
AWK = awk
endif endif
ifndef MD5SUM ifndef MD5SUM
MD5SUM = md5sum MD5SUM = md5sum
endif endif
AWK = awk
REVISION = -r$(VERSION) REVISION = -r$(VERSION)
MAJOR_VERSION = $(word 1,$(subst ., ,$(VERSION))) MAJOR_VERSION = $(word 1,$(subst ., ,$(VERSION)))
@ -982,16 +998,25 @@ ifeq ($(DEBUG), 1)
COMMON_OPT += -g COMMON_OPT += -g
endif endif
ifeq ($(DEBUG), 1)
FCOMMON_OPT += -g
endif
ifndef COMMON_OPT ifndef COMMON_OPT
COMMON_OPT = -O2 COMMON_OPT = -O2
endif endif
ifndef FCOMMON_OPT
FCOMMON_OPT = -O2 -frecursive
endif
override CFLAGS += $(COMMON_OPT) $(CCOMMON_OPT) -I$(TOPDIR) override CFLAGS += $(COMMON_OPT) $(CCOMMON_OPT) -I$(TOPDIR)
override PFLAGS += $(COMMON_OPT) $(CCOMMON_OPT) -I$(TOPDIR) -DPROFILE $(COMMON_PROF) override PFLAGS += $(COMMON_OPT) $(CCOMMON_OPT) -I$(TOPDIR) -DPROFILE $(COMMON_PROF)
override FFLAGS += $(COMMON_OPT) $(FCOMMON_OPT) override FFLAGS += $(COMMON_OPT) $(FCOMMON_OPT)
override FPFLAGS += $(COMMON_OPT) $(FCOMMON_OPT) $(COMMON_PROF) override FPFLAGS += $(FCOMMON_OPT) $(COMMON_PROF)
#MAKEOVERRIDES = #MAKEOVERRIDES =
#For LAPACK Fortran codes. #For LAPACK Fortran codes.
@ -1120,6 +1145,8 @@ export HAVE_VFP
export HAVE_VFPV3 export HAVE_VFPV3
export HAVE_VFPV4 export HAVE_VFPV4
export HAVE_NEON export HAVE_NEON
export HAVE_MSA
export MSA_FLAGS
export KERNELDIR export KERNELDIR
export FUNCTION_PROFILE export FUNCTION_PROFILE
export TARGET_CORE export TARGET_CORE
@ -1181,4 +1208,3 @@ SUNPATH = /opt/sunstudio12.1
else else
SUNPATH = /opt/SUNWspro SUNPATH = /opt/SUNWspro
endif endif

6
Makefile.zarch Normal file
View File

@ -0,0 +1,6 @@
ifeq ($(CORE), Z13)
CCOMMON_OPT += -march=z13 -mzvector
FCOMMON_OPT += -march=z13 -mzvector
endif

View File

@ -43,6 +43,35 @@ On X86 box, compile this library for loongson3a CPU with loongcc (based on Open6
make DEBUG=1 make DEBUG=1
### Compile with MASS Support on Power CPU (Optional dependency)
[IBM MASS](http://www-01.ibm.com/software/awdtools/mass/linux/mass-linux.html) library consists of a set of mathematical functions for C, C++, and
Fortran-language applications that are tuned for optimum performance on POWER architectures. OpenBLAS with MASS requires 64-bit, little-endian OS on POWER.
The library can be installed as below -
* On Ubuntu:
wget -q http://public.dhe.ibm.com/software/server/POWER/Linux/xl-compiler/eval/ppc64le/ubuntu/public.gpg -O- | sudo apt-key add -
echo "deb http://public.dhe.ibm.com/software/server/POWER/Linux/xl-compiler/eval/ppc64le/ubuntu/ trusty main" | sudo tee /etc/apt/sources.list.d/ibm-xl-compiler-eval.list
sudo apt-get update
sudo apt-get install libxlmass-devel.8.1.3
* On RHEL/CentOS:
wget http://public.dhe.ibm.com/software/server/POWER/Linux/xl-compiler/eval/ppc64le/rhel7/repodata/repomd.xml.key
sudo rpm --import repomd.xml.key
wget http://public.dhe.ibm.com/software/server/POWER/Linux/xl-compiler/eval/ppc64le/rhel7/ibm-xl-compiler-eval.repo
sudo cp ibm-xl-compiler-eval.repo /etc/yum.repos.d/
sudo yum install libxlmass-devel.8.1.3
After installing MASS library, compile openblas with USE_MASS=1.
Example:
Compiling on Power8 with MASS support -
make USE_MASS=1 TARGET=POWER8
### Install to the directory (optional) ### Install to the directory (optional)
Example: Example:
@ -75,12 +104,18 @@ Please read GotoBLAS_01Readme.txt
#### ARM64: #### ARM64:
- **ARMV8**: Experimental - **ARMV8**: Experimental
- **ARM Cortex-A57**: Experimental
#### IBM zEnterprise System:
- **Z13**: blas3 for double
### Support OS: ### Support OS:
- **GNU/Linux** - **GNU/Linux**
- **MingWin/Windows**: Please read <https://github.com/xianyi/OpenBLAS/wiki/How-to-use-OpenBLAS-in-Microsoft-Visual-Studio>. - **MingWin or Visual Studio(CMake)/Windows**: Please read <https://github.com/xianyi/OpenBLAS/wiki/How-to-use-OpenBLAS-in-Microsoft-Visual-Studio>.
- **Darwin/Mac OS X**: Experimental. Although GotoBLAS2 supports Darwin, we are the beginner on Mac OS X. - **Darwin/Mac OS X**: Experimental. Although GotoBLAS2 supports Darwin, we are the beginner on Mac OS X.
- **FreeBSD**: Supported by community. We didn't test the library on this OS. - **FreeBSD**: Supported by community. We didn't test the library on this OS.
- **Android**: Supported by community. Please read <https://github.com/xianyi/OpenBLAS/wiki/How-to-build-OpenBLAS-for-Android>.
## Usages ## Usages
Link with libopenblas.a or -lopenblas for shared library. Link with libopenblas.a or -lopenblas for shared library.

View File

@ -53,26 +53,34 @@ PPC440
PPC440FP2 PPC440FP2
CELL CELL
3.MIPS64 CPU: 3.MIPS CPU:
P5600
4.MIPS64 CPU:
SICORTEX SICORTEX
LOONGSON3A LOONGSON3A
LOONGSON3B LOONGSON3B
I6400
P6600
4.IA64 CPU: 5.IA64 CPU:
ITANIUM2 ITANIUM2
5.SPARC CPU: 6.SPARC CPU:
SPARC SPARC
SPARCV7 SPARCV7
6.ARM CPU: 7.ARM CPU:
CORTEXA15 CORTEXA15
CORTEXA9 CORTEXA9
ARMV7 ARMV7
ARMV6 ARMV6
ARMV5 ARMV5
7.ARM 64-bit CPU: 8.ARM 64-bit CPU:
ARMV8 ARMV8
CORTEXA57 CORTEXA57
VULCAN
THUNDERX
THUNDERX2T99

199
USAGE.md Normal file
View File

@ -0,0 +1,199 @@
# Notes on OpenBLAS usage
## Usage
#### Program is Terminated. Because you tried to allocate too many memory regions
In OpenBLAS, we mange a pool of memory buffers and allocate the number of
buffers as the following.
```
#define NUM_BUFFERS (MAX_CPU_NUMBER * 2)
```
This error indicates that the program exceeded the number of buffers.
Please build OpenBLAS with larger `NUM_THREADS`. For example, `make
NUM_THREADS=32` or `make NUM_THREADS=64`. In `Makefile.system`, we will set
`MAX_CPU_NUMBER=NUM_THREADS`.
#### How can I use OpenBLAS in multi-threaded applications?
If your application is already multi-threaded, it will conflict with OpenBLAS
multi-threading. Thus, you must set OpenBLAS to use single thread in any of the
following ways:
* `export OPENBLAS_NUM_THREADS=1` in the environment variables.
* Call `openblas_set_num_threads(1)` in the application on runtime.
* Build OpenBLAS single thread version, e.g. `make USE_THREAD=0`
If the application is parallelized by OpenMP, please use OpenBLAS built with
`USE_OPENMP=1`
#### How to choose TARGET manually at runtime when compiled with DYNAMIC_ARCH
The environment variable which control the kernel selection is
`OPENBLAS_CORETYPE` (see `driver/others/dynamic.c`) e.g. `export
OPENBLAS_CORETYPE=Haswell` and the function `char* openblas_get_corename()`
returns the used target.
#### How could I disable OpenBLAS threading affinity on runtime?
You can define the `OPENBLAS_MAIN_FREE` or `GOTOBLAS_MAIN_FREE` environment
variable to disable threading affinity on runtime. For example, before the
running,
```
export OPENBLAS_MAIN_FREE=1
```
Alternatively, you can disable affinity feature with enabling `NO_AFFINITY=1`
in `Makefile.rule`.
## Linking with the library
* Link with shared library
`gcc -o test test.c -I /your_path/OpenBLAS/include/ -L/your_path/OpenBLAS/lib -lopenblas`
If the library is multithreaded, please add `-lpthread`. If the library
contains LAPACK functions, please add `-lgfortran` or other Fortran libs.
* Link with static library
`gcc -o test test.c /your/path/libopenblas.a`
You can download `test.c` from https://gist.github.com/xianyi/5780018
On Linux, if OpenBLAS was compiled with threading support (`USE_THREAD=1` by
default), custom programs statically linked against `libopenblas.a` should also
link with the pthread library e.g.:
```
gcc -static -I/opt/OpenBLAS/include -L/opt/OpenBLAS/lib -o my_program my_program.c -lopenblas -lpthread
```
Failing to add the `-lpthread` flag will cause errors such as:
```
/opt/OpenBLAS/libopenblas.a(memory.o): In function `_touch_memory':
memory.c:(.text+0x15): undefined reference to `pthread_mutex_lock'
memory.c:(.text+0x41): undefined reference to `pthread_mutex_unlock'
...
```
## Code examples
#### Call CBLAS interface
This example shows calling cblas_dgemm in C. https://gist.github.com/xianyi/6930656
```
#include <cblas.h>
#include <stdio.h>
void main()
{
int i=0;
double A[6] = {1.0,2.0,1.0,-3.0,4.0,-1.0};
double B[6] = {1.0,2.0,1.0,-3.0,4.0,-1.0};
double C[9] = {.5,.5,.5,.5,.5,.5,.5,.5,.5};
cblas_dgemm(CblasColMajor, CblasNoTrans, CblasTrans,3,3,2,1,A, 3, B, 3,2,C,3);
for(i=0; i<9; i++)
printf("%lf ", C[i]);
printf("\n");
}
```
`gcc -o test_cblas_open test_cblas_dgemm.c -I /your_path/OpenBLAS/include/ -L/your_path/OpenBLAS/lib -lopenblas -lpthread -lgfortran`
#### Call BLAS Fortran interface
This example shows calling dgemm Fortran interface in C. https://gist.github.com/xianyi/5780018
```
#include "stdio.h"
#include "stdlib.h"
#include "sys/time.h"
#include "time.h"
extern void dgemm_(char*, char*, int*, int*,int*, double*, double*, int*, double*, int*, double*, double*, int*);
int main(int argc, char* argv[])
{
int i;
printf("test!\n");
if(argc<4){
printf("Input Error\n");
return 1;
}
int m = atoi(argv[1]);
int n = atoi(argv[2]);
int k = atoi(argv[3]);
int sizeofa = m * k;
int sizeofb = k * n;
int sizeofc = m * n;
char ta = 'N';
char tb = 'N';
double alpha = 1.2;
double beta = 0.001;
struct timeval start,finish;
double duration;
double* A = (double*)malloc(sizeof(double) * sizeofa);
double* B = (double*)malloc(sizeof(double) * sizeofb);
double* C = (double*)malloc(sizeof(double) * sizeofc);
srand((unsigned)time(NULL));
for (i=0; i<sizeofa; i++)
A[i] = i%3+1;//(rand()%100)/10.0;
for (i=0; i<sizeofb; i++)
B[i] = i%3+1;//(rand()%100)/10.0;
for (i=0; i<sizeofc; i++)
C[i] = i%3+1;//(rand()%100)/10.0;
//#if 0
printf("m=%d,n=%d,k=%d,alpha=%lf,beta=%lf,sizeofc=%d\n",m,n,k,alpha,beta,sizeofc);
gettimeofday(&start, NULL);
dgemm_(&ta, &tb, &m, &n, &k, &alpha, A, &m, B, &k, &beta, C, &m);
gettimeofday(&finish, NULL);
duration = ((double)(finish.tv_sec-start.tv_sec)*1000000 + (double)(finish.tv_usec-start.tv_usec)) / 1000000;
double gflops = 2.0 * m *n*k;
gflops = gflops/duration*1.0e-6;
FILE *fp;
fp = fopen("timeDGEMM.txt", "a");
fprintf(fp, "%dx%dx%d\t%lf s\t%lf MFLOPS\n", m, n, k, duration, gflops);
fclose(fp);
free(A);
free(B);
free(C);
return 0;
}
```
` gcc -o time_dgemm time_dgemm.c /your/path/libopenblas.a`
` ./time_dgemm <m> <n> <k> `
## Troubleshooting
* Please read [Faq](https://github.com/xianyi/OpenBLAS/wiki/Faq) at first.
* Please use gcc version 4.6 and above to compile Sandy Bridge AVX kernels on Linux/MingW/BSD.
* Please use Clang version 3.1 and above to compile the library on Sandy Bridge microarchitecture. The Clang 3.0 will generate the wrong AVX binary code.
* The number of CPUs/Cores should less than or equal to 256. On Linux x86_64(amd64), there is experimental support for up to 1024 CPUs/Cores and 128 numa nodes if you build the library with BIGNUMA=1.
* OpenBLAS does not set processor affinity by default. On Linux, you can enable processor affinity by commenting the line NO_AFFINITY=1 in Makefile.rule. But this may cause [the conflict with R parallel](https://stat.ethz.ch/pipermail/r-sig-hpc/2012-April/001348.html).
* On Loongson 3A. make test would be failed because of pthread_create error. The error code is EAGAIN. However, it will be OK when you run the same testcase on shell.
## BLAS reference manual
If you want to understand every BLAS function and definition, please read
[Intel MKL reference manual](https://software.intel.com/sites/products/documentation/doclib/iss/2013/mkl/mklman/GUID-F7ED9FB8-6663-4F44-A62B-61B63C4F0491.htm)
or [netlib.org](http://netlib.org/blas/)
Here are [OpenBLAS extension functions](https://github.com/xianyi/OpenBLAS/wiki/OpenBLAS-Extensions)
## How to reference OpenBLAS.
You can reference our [papers](https://github.com/xianyi/OpenBLAS/wiki/publications).
Alternatively, you can cite the OpenBLAS homepage http://www.openblas.net directly.

View File

@ -1,4 +1,4 @@
version: 0.2.15.{build} version: 0.2.19.{build}
#environment: #environment:
@ -39,4 +39,6 @@ before_build:
- cmake -G "Visual Studio 12 Win64" . - cmake -G "Visual Studio 12 Win64" .
test_script: test_script:
- echo Build OK! - echo Running Test
- cd c:\projects\OpenBLAS\utest
- openblas_utest

View File

@ -33,6 +33,22 @@ LIBMKL = -L$(MKL) -lmkl_intel_lp64 -lmkl_gnu_thread -lmkl_core -lgomp -lpthread
# Apple vecLib # Apple vecLib
LIBVECLIB = -framework Accelerate LIBVECLIB = -framework Accelerate
ESSL=/opt/ibm/lib
#LIBESSL = -lesslsmp $(ESSL)/libxlomp_ser.so.1 $(ESSL)/libxlf90_r.so.1 $(ESSL)/libxlfmath.so.1 $(ESSL)/libxlsmp.so.1 /opt/ibm/xlC/13.1.3/lib/libxl.a
LIBESSL = -lesslsmp $(ESSL)/libxlf90_r.so.1 $(ESSL)/libxlfmath.so.1 $(ESSL)/libxlsmp.so.1 /opt/ibm/xlC/13.1.3/lib/libxl.a
ifneq ($(NO_LAPACK), 1)
GOTO_LAPACK_TARGETS=slinpack.goto dlinpack.goto clinpack.goto zlinpack.goto \
scholesky.goto dcholesky.goto ccholesky.goto zcholesky.goto \
sgesv.goto dgesv.goto cgesv.goto zgesv.goto \
sgeev.goto dgeev.goto cgeev.goto zgeev.goto \
csymv.goto zsymv.goto \
sgetri.goto dgetri.goto cgetri.goto zgetri.goto \
spotrf.goto dpotrf.goto cpotrf.goto zpotrf.goto
else
GOTO_LAPACK_TARGETS=
endif
ifeq ($(OSNAME), WINNT) ifeq ($(OSNAME), WINNT)
goto :: slinpack.goto dlinpack.goto clinpack.goto zlinpack.goto \ goto :: slinpack.goto dlinpack.goto clinpack.goto zlinpack.goto \
@ -44,6 +60,7 @@ goto :: slinpack.goto dlinpack.goto clinpack.goto zlinpack.goto \
ssyr2k.goto dsyr2k.goto csyr2k.goto zsyr2k.goto \ ssyr2k.goto dsyr2k.goto csyr2k.goto zsyr2k.goto \
sger.goto dger.goto cger.goto zger.goto \ sger.goto dger.goto cger.goto zger.goto \
sdot.goto ddot.goto \ sdot.goto ddot.goto \
srot.goto drot.goto \
saxpy.goto daxpy.goto caxpy.goto zaxpy.goto \ saxpy.goto daxpy.goto caxpy.goto zaxpy.goto \
scopy.goto dcopy.goto ccopy.goto zcopy.goto \ scopy.goto dcopy.goto ccopy.goto zcopy.goto \
sswap.goto dswap.goto cswap.goto zswap.goto \ sswap.goto dswap.goto cswap.goto zswap.goto \
@ -142,31 +159,29 @@ mkl :: slinpack.mkl dlinpack.mkl clinpack.mkl zlinpack.mkl \
else else
goto :: slinpack.goto dlinpack.goto clinpack.goto zlinpack.goto \ goto :: sgemm.goto dgemm.goto cgemm.goto zgemm.goto \
scholesky.goto dcholesky.goto ccholesky.goto zcholesky.goto \
sgemm.goto dgemm.goto cgemm.goto zgemm.goto \
strmm.goto dtrmm.goto ctrmm.goto ztrmm.goto \ strmm.goto dtrmm.goto ctrmm.goto ztrmm.goto \
strsm.goto dtrsm.goto ctrsm.goto ztrsm.goto \ strsm.goto dtrsm.goto ctrsm.goto ztrsm.goto \
ssyrk.goto dsyrk.goto csyrk.goto zsyrk.goto \ ssyrk.goto dsyrk.goto csyrk.goto zsyrk.goto \
ssyr2k.goto dsyr2k.goto csyr2k.goto zsyr2k.goto \ ssyr2k.goto dsyr2k.goto csyr2k.goto zsyr2k.goto \
sger.goto dger.goto cger.goto zger.goto \ sger.goto dger.goto cger.goto zger.goto \
sdot.goto ddot.goto cdot.goto zdot.goto \ sdot.goto ddot.goto cdot.goto zdot.goto \
srot.goto drot.goto \
saxpy.goto daxpy.goto caxpy.goto zaxpy.goto \ saxpy.goto daxpy.goto caxpy.goto zaxpy.goto \
scopy.goto dcopy.goto ccopy.goto zcopy.goto \ scopy.goto dcopy.goto ccopy.goto zcopy.goto \
sswap.goto dswap.goto cswap.goto zswap.goto \ sswap.goto dswap.goto cswap.goto zswap.goto \
sscal.goto dscal.goto cscal.goto zscal.goto \ sscal.goto dscal.goto cscal.goto zscal.goto \
sasum.goto dasum.goto casum.goto zasum.goto \ sasum.goto dasum.goto casum.goto zasum.goto \
ssymv.goto dsymv.goto csymv.goto zsymv.goto \ ssymv.goto dsymv.goto \
chemv.goto zhemv.goto \ chemv.goto zhemv.goto \
chemm.goto zhemm.goto \ chemm.goto zhemm.goto \
cherk.goto zherk.goto \ cherk.goto zherk.goto \
cher2k.goto zher2k.goto \ cher2k.goto zher2k.goto \
sgemv.goto dgemv.goto cgemv.goto zgemv.goto \ sgemv.goto dgemv.goto cgemv.goto zgemv.goto \
sgesv.goto dgesv.goto cgesv.goto zgesv.goto \ ssymm.goto dsymm.goto csymm.goto zsymm.goto \
sgeev.goto dgeev.goto cgeev.goto zgeev.goto \ smallscaling \
sgetri.goto dgetri.goto cgetri.goto zgetri.goto \ isamax.goto idamax.goto icamax.goto izamax.goto \
spotrf.goto dpotrf.goto cpotrf.goto zpotrf.goto \ snrm2.goto dnrm2.goto scnrm2.goto dznrm2.goto $(GOTO_LAPACK_TARGETS)
ssymm.goto dsymm.goto csymm.goto zsymm.goto
acml :: slinpack.acml dlinpack.acml clinpack.acml zlinpack.acml \ acml :: slinpack.acml dlinpack.acml clinpack.acml zlinpack.acml \
scholesky.acml dcholesky.acml ccholesky.acml zcholesky.acml \ scholesky.acml dcholesky.acml ccholesky.acml zcholesky.acml \
@ -219,7 +234,9 @@ atlas :: slinpack.atlas dlinpack.atlas clinpack.atlas zlinpack.atlas \
sgesv.atlas dgesv.atlas cgesv.atlas zgesv.atlas \ sgesv.atlas dgesv.atlas cgesv.atlas zgesv.atlas \
sgetri.atlas dgetri.atlas cgetri.atlas zgetri.atlas \ sgetri.atlas dgetri.atlas cgetri.atlas zgetri.atlas \
spotrf.atlas dpotrf.atlas cpotrf.atlas zpotrf.atlas \ spotrf.atlas dpotrf.atlas cpotrf.atlas zpotrf.atlas \
ssymm.atlas dsymm.atlas csymm.atlas zsymm.atlas ssymm.atlas dsymm.atlas csymm.atlas zsymm.atlas \
isamax.atlas idamax.atlas icamax.atlas izamax.atlas \
snrm2.goto dnrm2.goto scnrm2.goto dznrm2.goto
mkl :: slinpack.mkl dlinpack.mkl clinpack.mkl zlinpack.mkl \ mkl :: slinpack.mkl dlinpack.mkl clinpack.mkl zlinpack.mkl \
scholesky.mkl dcholesky.mkl ccholesky.mkl zcholesky.mkl \ scholesky.mkl dcholesky.mkl ccholesky.mkl zcholesky.mkl \
@ -252,7 +269,11 @@ mkl :: slinpack.mkl dlinpack.mkl clinpack.mkl zlinpack.mkl \
endif endif
essl :: sgemm.essl strmm.essl dgemm.essl dtrmm.essl \
cgemm.essl ctrmm.essl zgemm.essl ztrmm.essl \
slinpack.essl clinpack.essl dlinpack.essl zlinpack.essl \
scholesky.essl ccholesky.essl dcholesky.essl zcholesky.essl \
strsm.essl dtrsm.essl ctrsm.essl ztrsm.essl
veclib :: slinpack.veclib dlinpack.veclib clinpack.veclib zlinpack.veclib \ veclib :: slinpack.veclib dlinpack.veclib clinpack.veclib zlinpack.veclib \
scholesky.veclib dcholesky.veclib ccholesky.veclib zcholesky.veclib \ scholesky.veclib dcholesky.veclib ccholesky.veclib zcholesky.veclib \
@ -305,6 +326,9 @@ slinpack.mkl : slinpack.$(SUFFIX)
slinpack.veclib : slinpack.$(SUFFIX) slinpack.veclib : slinpack.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
slinpack.essl : slinpack.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
##################################### Dlinpack #################################################### ##################################### Dlinpack ####################################################
dlinpack.goto : dlinpack.$(SUFFIX) ../$(LIBNAME) dlinpack.goto : dlinpack.$(SUFFIX) ../$(LIBNAME)
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
@ -321,6 +345,9 @@ dlinpack.mkl : dlinpack.$(SUFFIX)
dlinpack.veclib : dlinpack.$(SUFFIX) dlinpack.veclib : dlinpack.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
dlinpack.essl : dlinpack.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
##################################### Clinpack #################################################### ##################################### Clinpack ####################################################
clinpack.goto : clinpack.$(SUFFIX) ../$(LIBNAME) clinpack.goto : clinpack.$(SUFFIX) ../$(LIBNAME)
@ -338,6 +365,9 @@ clinpack.mkl : clinpack.$(SUFFIX)
clinpack.veclib : clinpack.$(SUFFIX) clinpack.veclib : clinpack.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
clinpack.essl : clinpack.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
##################################### Zlinpack #################################################### ##################################### Zlinpack ####################################################
zlinpack.goto : zlinpack.$(SUFFIX) ../$(LIBNAME) zlinpack.goto : zlinpack.$(SUFFIX) ../$(LIBNAME)
@ -355,6 +385,9 @@ zlinpack.mkl : zlinpack.$(SUFFIX)
zlinpack.veclib : zlinpack.$(SUFFIX) zlinpack.veclib : zlinpack.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
zlinpack.essl : zlinpack.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
##################################### Scholesky ################################################### ##################################### Scholesky ###################################################
scholesky.goto : scholesky.$(SUFFIX) ../$(LIBNAME) scholesky.goto : scholesky.$(SUFFIX) ../$(LIBNAME)
@ -372,6 +405,9 @@ scholesky.mkl : scholesky.$(SUFFIX)
scholesky.veclib : scholesky.$(SUFFIX) scholesky.veclib : scholesky.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
scholesky.essl : scholesky.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
##################################### Dcholesky ################################################### ##################################### Dcholesky ###################################################
dcholesky.goto : dcholesky.$(SUFFIX) ../$(LIBNAME) dcholesky.goto : dcholesky.$(SUFFIX) ../$(LIBNAME)
@ -389,6 +425,9 @@ dcholesky.mkl : dcholesky.$(SUFFIX)
dcholesky.veclib : dcholesky.$(SUFFIX) dcholesky.veclib : dcholesky.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
dcholesky.essl : dcholesky.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
##################################### Ccholesky ################################################### ##################################### Ccholesky ###################################################
ccholesky.goto : ccholesky.$(SUFFIX) ../$(LIBNAME) ccholesky.goto : ccholesky.$(SUFFIX) ../$(LIBNAME)
@ -406,6 +445,9 @@ ccholesky.mkl : ccholesky.$(SUFFIX)
ccholesky.veclib : ccholesky.$(SUFFIX) ccholesky.veclib : ccholesky.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
ccholesky.essl : ccholesky.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
##################################### Zcholesky ################################################### ##################################### Zcholesky ###################################################
@ -424,6 +466,9 @@ zcholesky.mkl : zcholesky.$(SUFFIX)
zcholesky.veclib : zcholesky.$(SUFFIX) zcholesky.veclib : zcholesky.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
zcholesky.essl : zcholesky.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
##################################### Sgemm #################################################### ##################################### Sgemm ####################################################
sgemm.goto : sgemm.$(SUFFIX) ../$(LIBNAME) sgemm.goto : sgemm.$(SUFFIX) ../$(LIBNAME)
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
@ -440,6 +485,9 @@ sgemm.mkl : sgemm.$(SUFFIX)
sgemm.veclib : sgemm.$(SUFFIX) sgemm.veclib : sgemm.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
sgemm.essl : sgemm.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
##################################### Dgemm #################################################### ##################################### Dgemm ####################################################
dgemm.goto : dgemm.$(SUFFIX) ../$(LIBNAME) dgemm.goto : dgemm.$(SUFFIX) ../$(LIBNAME)
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
@ -456,6 +504,9 @@ dgemm.mkl : dgemm.$(SUFFIX)
dgemm.veclib : dgemm.$(SUFFIX) dgemm.veclib : dgemm.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
dgemm.essl : dgemm.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
##################################### Cgemm #################################################### ##################################### Cgemm ####################################################
cgemm.goto : cgemm.$(SUFFIX) ../$(LIBNAME) cgemm.goto : cgemm.$(SUFFIX) ../$(LIBNAME)
@ -473,6 +524,9 @@ cgemm.mkl : cgemm.$(SUFFIX)
cgemm.veclib : cgemm.$(SUFFIX) cgemm.veclib : cgemm.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
cgemm.essl : cgemm.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
##################################### Zgemm #################################################### ##################################### Zgemm ####################################################
zgemm.goto : zgemm.$(SUFFIX) ../$(LIBNAME) zgemm.goto : zgemm.$(SUFFIX) ../$(LIBNAME)
@ -490,6 +544,9 @@ zgemm.mkl : zgemm.$(SUFFIX)
zgemm.veclib : zgemm.$(SUFFIX) zgemm.veclib : zgemm.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
zgemm.essl : zgemm.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
##################################### Ssymm #################################################### ##################################### Ssymm ####################################################
ssymm.goto : ssymm.$(SUFFIX) ../$(LIBNAME) ssymm.goto : ssymm.$(SUFFIX) ../$(LIBNAME)
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
@ -572,6 +629,9 @@ strmm.mkl : strmm.$(SUFFIX)
strmm.veclib : strmm.$(SUFFIX) strmm.veclib : strmm.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
strmm.essl : strmm.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
##################################### Dtrmm #################################################### ##################################### Dtrmm ####################################################
dtrmm.goto : dtrmm.$(SUFFIX) ../$(LIBNAME) dtrmm.goto : dtrmm.$(SUFFIX) ../$(LIBNAME)
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
@ -588,6 +648,9 @@ dtrmm.mkl : dtrmm.$(SUFFIX)
dtrmm.veclib : dtrmm.$(SUFFIX) dtrmm.veclib : dtrmm.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
dtrmm.essl : dtrmm.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
##################################### Ctrmm #################################################### ##################################### Ctrmm ####################################################
ctrmm.goto : ctrmm.$(SUFFIX) ../$(LIBNAME) ctrmm.goto : ctrmm.$(SUFFIX) ../$(LIBNAME)
@ -605,6 +668,9 @@ ctrmm.mkl : ctrmm.$(SUFFIX)
ctrmm.veclib : ctrmm.$(SUFFIX) ctrmm.veclib : ctrmm.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
ctrmm.essl : ctrmm.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
##################################### Ztrmm #################################################### ##################################### Ztrmm ####################################################
ztrmm.goto : ztrmm.$(SUFFIX) ../$(LIBNAME) ztrmm.goto : ztrmm.$(SUFFIX) ../$(LIBNAME)
@ -622,6 +688,9 @@ ztrmm.mkl : ztrmm.$(SUFFIX)
ztrmm.veclib : ztrmm.$(SUFFIX) ztrmm.veclib : ztrmm.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
ztrmm.essl : ztrmm.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
##################################### Strsm #################################################### ##################################### Strsm ####################################################
strsm.goto : strsm.$(SUFFIX) ../$(LIBNAME) strsm.goto : strsm.$(SUFFIX) ../$(LIBNAME)
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
@ -638,6 +707,9 @@ strsm.mkl : strsm.$(SUFFIX)
strsm.veclib : strsm.$(SUFFIX) strsm.veclib : strsm.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
strsm.essl : strsm.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
##################################### Dtrsm #################################################### ##################################### Dtrsm ####################################################
dtrsm.goto : dtrsm.$(SUFFIX) ../$(LIBNAME) dtrsm.goto : dtrsm.$(SUFFIX) ../$(LIBNAME)
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
@ -654,6 +726,9 @@ dtrsm.mkl : dtrsm.$(SUFFIX)
dtrsm.veclib : dtrsm.$(SUFFIX) dtrsm.veclib : dtrsm.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
dtrsm.essl : dtrsm.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
##################################### Ctrsm #################################################### ##################################### Ctrsm ####################################################
ctrsm.goto : ctrsm.$(SUFFIX) ../$(LIBNAME) ctrsm.goto : ctrsm.$(SUFFIX) ../$(LIBNAME)
@ -671,6 +746,9 @@ ctrsm.mkl : ctrsm.$(SUFFIX)
ctrsm.veclib : ctrsm.$(SUFFIX) ctrsm.veclib : ctrsm.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
ctrsm.essl : ctrsm.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
##################################### Ztrsm #################################################### ##################################### Ztrsm ####################################################
ztrsm.goto : ztrsm.$(SUFFIX) ../$(LIBNAME) ztrsm.goto : ztrsm.$(SUFFIX) ../$(LIBNAME)
@ -688,6 +766,9 @@ ztrsm.mkl : ztrsm.$(SUFFIX)
ztrsm.veclib : ztrsm.$(SUFFIX) ztrsm.veclib : ztrsm.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
ztrsm.essl : ztrsm.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
##################################### Ssyrk #################################################### ##################################### Ssyrk ####################################################
ssyrk.goto : ssyrk.$(SUFFIX) ../$(LIBNAME) ssyrk.goto : ssyrk.$(SUFFIX) ../$(LIBNAME)
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
@ -1412,6 +1493,39 @@ zdot.mkl : zdot-intel.$(SUFFIX)
zdot.veclib : zdot-intel.$(SUFFIX) zdot.veclib : zdot-intel.$(SUFFIX)
$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) $(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
##################################### Srot ####################################################
srot.goto : srot.$(SUFFIX) ../$(LIBNAME)
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
srot.acml : srot.$(SUFFIX)
$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
srot.atlas : srot.$(SUFFIX)
$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
srot.mkl : srot.$(SUFFIX)
$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
srot.veclib : srot.$(SUFFIX)
$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
##################################### Drot ####################################################
drot.goto : drot.$(SUFFIX) ../$(LIBNAME)
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
drot.acml : drot.$(SUFFIX)
$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
drot.atlas : drot.$(SUFFIX)
$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
drot.mkl : drot.$(SUFFIX)
$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
drot.veclib : drot.$(SUFFIX)
$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
##################################### Saxpy #################################################### ##################################### Saxpy ####################################################
saxpy.goto : saxpy.$(SUFFIX) ../$(LIBNAME) saxpy.goto : saxpy.$(SUFFIX) ../$(LIBNAME)
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
@ -1833,6 +1947,63 @@ zgemm3m.mkl : zgemm3m.$(SUFFIX)
zgemm3m.veclib : zgemm3m.$(SUFFIX) zgemm3m.veclib : zgemm3m.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
############################################## ISAMAX ##############################################
isamax.goto : isamax.$(SUFFIX) ../$(LIBNAME)
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
isamax.atlas : isamax.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
############################################## IDAMAX ##############################################
idamax.goto : idamax.$(SUFFIX) ../$(LIBNAME)
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
idamax.atlas : idamax.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
############################################## ICAMAX ##############################################
icamax.goto : icamax.$(SUFFIX) ../$(LIBNAME)
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
icamax.atlas : icamax.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
############################################## IZAMAX ##############################################
izamax.goto : izamax.$(SUFFIX) ../$(LIBNAME)
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
izamax.atlas : izamax.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
############################################## SNRM2 ##############################################
snrm2.goto : snrm2.$(SUFFIX) ../$(LIBNAME)
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
snrm2.atlas : snrm2.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
############################################## DNRM2 ##############################################
dnrm2.goto : dnrm2.$(SUFFIX) ../$(LIBNAME)
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
dnrm2.atlas : dnrm2.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
############################################## Sscnrm2 ##############################################
scnrm2.goto : scnrm2.$(SUFFIX) ../$(LIBNAME)
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
scnrm2.atlas : scnrm2.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
############################################## Ddznrm2 ##############################################
dznrm2.goto : dznrm2.$(SUFFIX) ../$(LIBNAME)
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
dznrm2.atlas : dznrm2.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
################################################################################################### ###################################################################################################
slinpack.$(SUFFIX) : linpack.c slinpack.$(SUFFIX) : linpack.c
@ -2123,6 +2294,13 @@ cgesv.$(SUFFIX) : gesv.c
zgesv.$(SUFFIX) : gesv.c zgesv.$(SUFFIX) : gesv.c
$(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^ $(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^
srot.$(SUFFIX) : rot.c
$(CC) $(CFLAGS) -c -UCOMPLEX -UDOUBLE -o $(@F) $^
drot.$(SUFFIX) : rot.c
$(CC) $(CFLAGS) -c -UCOMPLEX -DDOUBLE -o $(@F) $^
@ -2133,8 +2311,37 @@ zgemm3m.$(SUFFIX) : gemm3m.c
$(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^ $(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^
isamax.$(SUFFIX) : iamax.c
$(CC) $(CFLAGS) -c -UCOMPLEX -UDOUBLE -o $(@F) $^
idamax.$(SUFFIX) : iamax.c
$(CC) $(CFLAGS) -c -UCOMPLEX -DDOUBLE -o $(@F) $^
icamax.$(SUFFIX) : iamax.c
$(CC) $(CFLAGS) -c -DCOMPLEX -UDOUBLE -o $(@F) $^
izamax.$(SUFFIX) : iamax.c
$(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^
snrm2.$(SUFFIX) : nrm2.c
$(CC) $(CFLAGS) -c -UCOMPLEX -UDOUBLE -o $(@F) $^
dnrm2.$(SUFFIX) : nrm2.c
$(CC) $(CFLAGS) -c -UCOMPLEX -DDOUBLE -o $(@F) $^
scnrm2.$(SUFFIX) : nrm2.c
$(CC) $(CFLAGS) -c -DCOMPLEX -UDOUBLE -o $(@F) $^
dznrm2.$(SUFFIX) : nrm2.c
$(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^
smallscaling: smallscaling.c ../$(LIBNAME)
$(CC) $(CFLAGS) -o $(@F) $^ $(EXTRALIB) -fopenmp -lm -lpthread
clean :: clean ::
@rm -f *.goto *.mkl *.acml *.atlas *.veclib @rm -f *.goto *.mkl *.acml *.atlas *.veclib *.essl smallscaling
include $(TOPDIR)/Makefile.tail include $(TOPDIR)/Makefile.tail

View File

@ -183,9 +183,9 @@ int main(int argc, char *argv[]){
timeg /= loops; timeg /= loops;
#ifdef COMPLEX #ifdef COMPLEX
fprintf(stderr, " %10.2f MFlops\n", 4. * (double)m / timeg * 1.e-6); fprintf(stderr, " %10.2f MFlops %10.6f sec\n", 4. * (double)m / timeg * 1.e-6, timeg);
#else #else
fprintf(stderr, " %10.2f MFlops\n", 2. * (double)m / timeg * 1.e-6); fprintf(stderr, " %10.2f MFlops %10.6f sec\n", 2. * (double)m / timeg * 1.e-6, timeg);
#endif #endif
} }

View File

@ -190,8 +190,8 @@ int main(int argc, char *argv[]){
timeg /= loops; timeg /= loops;
fprintf(stderr, fprintf(stderr,
" %10.2f MFlops\n", " %10.2f MFlops %10.6f sec\n",
COMPSIZE * COMPSIZE * 2. * (double)m / timeg * 1.e-6); COMPSIZE * COMPSIZE * 2. * (double)m / timeg * 1.e-6, timeg);
} }

View File

@ -190,8 +190,8 @@ int main(int argc, char *argv[]){
timeg /= loops; timeg /= loops;
fprintf(stderr, fprintf(stderr,
" %10.2f MBytes\n", " %10.2f MBytes %10.6f sec\n",
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6); COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg);
} }

View File

@ -184,8 +184,8 @@ int main(int argc, char *argv[]){
timeg /= loops; timeg /= loops;
fprintf(stderr, fprintf(stderr,
" %10.2f MFlops\n", " %10.2f MFlops %10.6f sec\n",
COMPSIZE * COMPSIZE * 2. * (double)m / timeg * 1.e-6); COMPSIZE * COMPSIZE * 2. * (double)m / timeg * 1.e-6, timeg);
} }

View File

@ -221,7 +221,7 @@ int main(int argc, char *argv[]){
timeg /= loops; timeg /= loops;
fprintf(stderr, " %10.2f MFlops\n", COMPSIZE * COMPSIZE * 2. * (double)m * (double)n / timeg * 1.e-6); fprintf(stderr, " %10.2f MFlops %10.6f sec\n", COMPSIZE * COMPSIZE * 2. * (double)m * (double)n / timeg * 1.e-6, timeg);
} }
} }
@ -258,7 +258,7 @@ int main(int argc, char *argv[]){
timeg /= loops; timeg /= loops;
fprintf(stderr, " %10.2f MFlops\n", COMPSIZE * COMPSIZE * 2. * (double)m * (double)n / timeg * 1.e-6); fprintf(stderr, " %10.2f MFlops %10.6f sec\n", COMPSIZE * COMPSIZE * 2. * (double)m * (double)n / timeg * 1.e-6, timeg);
} }
} }

192
benchmark/iamax.c Normal file
View File

@ -0,0 +1,192 @@
/***************************************************************************
Copyright (c) 2016, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#undef IAMAX
#ifdef COMPLEX
#ifdef DOUBLE
#define IAMAX BLASFUNC(izamax)
#else
#define IAMAX BLASFUNC(icamax)
#endif
#else
#ifdef DOUBLE
#define IAMAX BLASFUNC(idamax)
#else
#define IAMAX BLASFUNC(isamax)
#endif
#endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size){
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){
FLOAT *x;
blasint m, i;
blasint inc_x=1;
int loops = 1;
int l;
char *p;
int from = 1;
int to = 200;
int step = 1;
struct timeval start, stop;
double time1,timeg;
argc--;argv++;
if (argc > 0) { from = atol(*argv); argc--; argv++;}
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
if (argc > 0) { step = atol(*argv); argc--; argv++;}
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step,inc_x,loops);
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
#ifdef linux
srandom(getpid());
#endif
fprintf(stderr, " SIZE Flops\n");
for(m = from; m <= to; m += step)
{
timeg=0;
fprintf(stderr, " %6d : ", (int)m);
for (l=0; l<loops; l++)
{
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
gettimeofday( &start, (struct timezone *)0);
IAMAX (&m, x, &inc_x);
gettimeofday( &stop, (struct timezone *)0);
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
timeg += time1;
}
timeg /= loops;
fprintf(stderr,
" %10.2f MFlops %10.6f sec\n",
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg);
}
return 0;
}
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

193
benchmark/nrm2.c Normal file
View File

@ -0,0 +1,193 @@
/***************************************************************************
Copyright (c) 2016, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#undef NRM2
#ifdef COMPLEX
#ifdef DOUBLE
#define NRM2 BLASFUNC(dznrm2)
#else
#define NRM2 BLASFUNC(scnrm2)
#endif
#else
#ifdef DOUBLE
#define NRM2 BLASFUNC(dnrm2)
#else
#define NRM2 BLASFUNC(snrm2)
#endif
#endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size){
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){
FLOAT *x;
blasint m, i;
blasint inc_x=1;
int loops = 1;
int l;
char *p;
int from = 1;
int to = 200;
int step = 1;
struct timeval start, stop;
double time1,timeg;
argc--;argv++;
if (argc > 0) { from = atol(*argv); argc--; argv++;}
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
if (argc > 0) { step = atol(*argv); argc--; argv++;}
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step,inc_x,loops);
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
#ifdef linux
srandom(getpid());
#endif
fprintf(stderr, " SIZE Flops\n");
for(m = from; m <= to; m += step)
{
timeg=0;
fprintf(stderr, " %6d : ", (int)m);
for (l=0; l<loops; l++)
{
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
gettimeofday( &start, (struct timezone *)0);
NRM2 (&m, x, &inc_x);
gettimeofday( &stop, (struct timezone *)0);
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
timeg += time1;
}
timeg /= loops;
fprintf(stderr,
" %10.2f MFlops %10.6f sec\n",
COMPSIZE * COMPSIZE * 2. * (double)m / timeg * 1.e-6, timeg);
}
return 0;
}
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

197
benchmark/rot.c Normal file
View File

@ -0,0 +1,197 @@
/***************************************************************************
Copyright (c) 2014, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#undef DOT
#ifdef DOUBLE
#define ROT BLASFUNC(drot)
#else
#define ROT BLASFUNC(srot)
#endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size){
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){
FLOAT *x, *y;
// FLOAT result;
blasint m, i;
blasint inc_x=1,inc_y=1;
FLOAT c[1] = { 2.0 };
FLOAT s[1] = { 2.0 };
int loops = 1;
int l;
char *p;
int from = 1;
int to = 200;
int step = 1;
struct timeval start, stop;
double time1,timeg;
argc--;argv++;
if (argc > 0) { from = atol(*argv); argc--; argv++;}
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
if (argc > 0) { step = atol(*argv); argc--; argv++;}
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
if ((p = getenv("OPENBLAS_INCY"))) inc_y = atoi(p);
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Inc_y = %d Loops = %d\n", from, to, step,inc_x,inc_y,loops);
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
#ifdef linux
srandom(getpid());
#endif
fprintf(stderr, " SIZE Flops\n");
for(m = from; m <= to; m += step)
{
timeg=0;
fprintf(stderr, " %6d : ", (int)m);
for (l=0; l<loops; l++)
{
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
gettimeofday( &start, (struct timezone *)0);
ROT (&m, x, &inc_x, y, &inc_y, c, s);
gettimeofday( &stop, (struct timezone *)0);
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
timeg += time1;
}
timeg /= loops;
fprintf(stderr,
" %10.2f MFlops %10.6f sec\n",
COMPSIZE * COMPSIZE * 6. * (double)m / timeg * 1.e-6, timeg);
}
return 0;
}
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

View File

@ -189,9 +189,9 @@ int main(int argc, char *argv[]){
timeg /= loops; timeg /= loops;
#ifdef COMPLEX #ifdef COMPLEX
fprintf(stderr, " %10.2f MFlops\n", 6. * (double)m / timeg * 1.e-6); fprintf(stderr, " %10.2f MFlops %10.6f sec\n", 6. * (double)m / timeg * 1.e-6, timeg);
#else #else
fprintf(stderr, " %10.2f MFlops\n", 1. * (double)m / timeg * 1.e-6); fprintf(stderr, " %10.2f MFlops %10.6f sec\n", 1. * (double)m / timeg * 1.e-6, timeg);
#endif #endif
} }

View File

@ -2,61 +2,54 @@
argv <- commandArgs(trailingOnly = TRUE) argv <- commandArgs(trailingOnly = TRUE)
nfrom = 128 nfrom <- 128
nto = 2048 nto <- 2048
nstep = 128 nstep <- 128
loops = 1 loops <- 1
if ( length(argv) > 0 ) { if (length(argv) > 0) {
for (z in 1:length(argv)) {
for ( z in 1:length(argv) ) { if (z == 1) {
nfrom <- as.numeric(argv[z])
if ( z == 1 ) { } else if (z == 2) {
nfrom <- as.numeric(argv[z]) nto <- as.numeric(argv[z])
} else if ( z==2 ) { } else if (z == 3) {
nto <- as.numeric(argv[z]) nstep <- as.numeric(argv[z])
} else if ( z==3 ) { } else if (z == 4) {
nstep <- as.numeric(argv[z]) loops <- as.numeric(argv[z])
} else if ( z==4 ) { }
loops <- as.numeric(argv[z]) }
}
}
} }
p=Sys.getenv("OPENBLAS_LOOPS") p <- Sys.getenv("OPENBLAS_LOOPS")
if ( p != "" ) { if (p != "") {
loops <- as.numeric(p) loops <- as.numeric(p)
} }
cat(sprintf("From %.0f To %.0f Step=%.0f Loops=%.0f\n",nfrom, nto, nstep, loops)) cat(sprintf(
"From %.0f To %.0f Step=%.0f Loops=%.0f\n",
nfrom,
nto,
nstep,
loops
))
cat(sprintf(" SIZE Flops Time\n")) cat(sprintf(" SIZE Flops Time\n"))
n = nfrom n <- nfrom
while ( n <= nto ) { while (n <= nto) {
A <- matrix(rnorm(n * n), ncol = n, nrow = n)
ev <- 0
z <- system.time(for (l in 1:loops) {
ev <- eigen(A)
})
A <- matrix(runif(n*n), ncol = n, nrow = n, byrow = TRUE) mflops <- (26.66 * n * n * n) * loops / (z[3] * 1.0e6)
l = 1
start <- proc.time()[3] st <- sprintf("%.0fx%.0f :", n, n)
cat(sprintf("%20s %10.2f MFlops %10.6f sec\n", st, mflops, z[3]))
while ( l <= loops ) { n <- n + nstep
ev <- eigen(A)
l = l + 1
}
end <- proc.time()[3]
timeg = end - start
mflops = (26.66 *n*n*n ) * loops / ( timeg * 1.0e6 )
st = sprintf("%.0fx%.0f :",n , n)
cat(sprintf("%20s %10.2f MFlops %10.6f sec\n", st, mflops, timeg))
n = n + nstep
} }

View File

@ -2,62 +2,63 @@
argv <- commandArgs(trailingOnly = TRUE) argv <- commandArgs(trailingOnly = TRUE)
nfrom = 128 nfrom <- 128
nto = 2048 nto <- 2048
nstep = 128 nstep <- 128
loops = 1 loops <- 1
if ( length(argv) > 0 ) { if (length(argv) > 0) {
for (z in 1:length(argv)) {
for ( z in 1:length(argv) ) { if (z == 1) {
nfrom <- as.numeric(argv[z])
if ( z == 1 ) { } else if (z == 2) {
nfrom <- as.numeric(argv[z]) nto <- as.numeric(argv[z])
} else if ( z==2 ) { } else if (z == 3) {
nto <- as.numeric(argv[z]) nstep <- as.numeric(argv[z])
} else if ( z==3 ) { } else if (z == 4) {
nstep <- as.numeric(argv[z]) loops <- as.numeric(argv[z])
} else if ( z==4 ) { }
loops <- as.numeric(argv[z]) }
}
}
} }
p=Sys.getenv("OPENBLAS_LOOPS") p <- Sys.getenv("OPENBLAS_LOOPS")
if ( p != "" ) { if (p != "") {
loops <- as.numeric(p) loops <- as.numeric(p)
} }
cat(sprintf("From %.0f To %.0f Step=%.0f Loops=%.0f\n",nfrom, nto, nstep, loops)) cat(sprintf(
"From %.0f To %.0f Step=%.0f Loops=%.0f\n",
nfrom,
nto,
nstep,
loops
))
cat(sprintf(" SIZE Flops Time\n")) cat(sprintf(" SIZE Flops Time\n"))
n = nfrom n <- nfrom
while ( n <= nto ) { while (n <= nto) {
A <- matrix(runif(n * n),
ncol = n,
nrow = n,
byrow = TRUE)
B <- matrix(runif(n * n),
ncol = n,
nrow = n,
byrow = TRUE)
C <- 1
A <- matrix(runif(n*n), ncol = n, nrow = n, byrow = TRUE) z <- system.time(for (l in 1:loops) {
B <- matrix(runif(n*n), ncol = n, nrow = n, byrow = TRUE) C <- A %*% B
l <- l + 1
l = 1 })
start <- proc.time()[3] mflops <- (2.0 * n * n * n) * loops / (z[3] * 1.0e6)
while ( l <= loops ) { st <- sprintf("%.0fx%.0f :", n, n)
cat(sprintf("%20s %10.2f MFlops %10.6f sec\n", st, mflops, z[3]))
C <- A %*% B n <- n + nstep
l = l + 1
}
end <- proc.time()[3]
timeg = end - start
mflops = ( 2.0 *n*n*n ) * loops / ( timeg * 1.0e6 )
st = sprintf("%.0fx%.0f :",n , n)
cat(sprintf("%20s %10.2f MFlops %10.6f sec\n", st, mflops, timeg))
n = n + nstep
} }

View File

@ -2,62 +2,56 @@
argv <- commandArgs(trailingOnly = TRUE) argv <- commandArgs(trailingOnly = TRUE)
nfrom = 128 nfrom <- 128
nto = 2048 nto <- 2048
nstep = 128 nstep <- 128
loops = 1 loops <- 1
if ( length(argv) > 0 ) { if (length(argv) > 0) {
for (z in 1:length(argv)) {
for ( z in 1:length(argv) ) { if (z == 1) {
nfrom <- as.numeric(argv[z])
if ( z == 1 ) { } else if (z == 2) {
nfrom <- as.numeric(argv[z]) nto <- as.numeric(argv[z])
} else if ( z==2 ) { } else if (z == 3) {
nto <- as.numeric(argv[z]) nstep <- as.numeric(argv[z])
} else if ( z==3 ) { } else if (z == 4) {
nstep <- as.numeric(argv[z]) loops <- as.numeric(argv[z])
} else if ( z==4 ) { }
loops <- as.numeric(argv[z]) }
}
}
} }
p=Sys.getenv("OPENBLAS_LOOPS") p <- Sys.getenv("OPENBLAS_LOOPS")
if ( p != "" ) { if (p != "") {
loops <- as.numeric(p) loops <- as.numeric(p)
} }
cat(sprintf("From %.0f To %.0f Step=%.0f Loops=%.0f\n",nfrom, nto, nstep, loops)) cat(sprintf(
"From %.0f To %.0f Step=%.0f Loops=%.0f\n",
nfrom,
nto,
nstep,
loops
))
cat(sprintf(" SIZE Flops Time\n")) cat(sprintf(" SIZE Flops Time\n"))
n = nfrom n <- nfrom
while ( n <= nto ) { while (n <= nto) {
A <- matrix(rnorm(n * n), ncol = n, nrow = n)
B <- matrix(rnorm(n * n), ncol = n, nrow = n)
A <- matrix(runif(n*n), ncol = n, nrow = n, byrow = TRUE) z <- system.time(for (l in 1:loops) {
B <- matrix(runif(n*n), ncol = n, nrow = n, byrow = TRUE) solve(A, B)
})
l = 1
start <- proc.time()[3] mflops <-
(2.0 / 3.0 * n * n * n + 2.0 * n * n * n) * loops / (z[3] * 1.0e6)
while ( l <= loops ) { st <- sprintf("%.0fx%.0f :", n, n)
cat(sprintf("%20s %10.2f MFlops %10.6f sec\n", st, mflops, z[3]))
solve(A,B) n <- n + nstep
l = l + 1
}
end <- proc.time()[3]
timeg = end - start
mflops = (2.0/3.0 *n*n*n + 2.0 *n*n*n ) * loops / ( timeg * 1.0e6 )
st = sprintf("%.0fx%.0f :",n , n)
cat(sprintf("%20s %10.2f MFlops %10.6f sec\n", st, mflops, timeg))
n = n + nstep
} }

View File

@ -0,0 +1,58 @@
#!/usr/bin/env python
import os
import sys
import time
import numpy
from numpy import zeros
from numpy.random import randn
from scipy.linalg import blas
def run_dsyrk(N, l):
A = randn(N, N).astype('float64', order='F')
C = zeros((N, N), dtype='float64', order='F')
start = time.time()
for i in range(0, l):
blas.dsyrk(1.0, A, c=C, overwrite_c=True)
end = time.time()
timediff = (end - start)
mflops = (N * N * N) * l / timediff
mflops *= 1e-6
size = "%dx%d" % (N, N)
print("%14s :\t%20f MFlops\t%20f sec" % (size, mflops, timediff))
if __name__ == "__main__":
N = 128
NMAX = 2048
NINC = 128
LOOPS = 1
z = 0
for arg in sys.argv:
if z == 1:
N = int(arg)
elif z == 2:
NMAX = int(arg)
elif z == 3:
NINC = int(arg)
elif z == 4:
LOOPS = int(arg)
z = z + 1
if 'OPENBLAS_LOOPS' in os.environ:
p = os.environ['OPENBLAS_LOOPS']
if p:
LOOPS = int(p)
print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
for i in range(N, NMAX + NINC, NINC):
run_dsyrk(i, LOOPS)

View File

@ -0,0 +1,58 @@
#!/usr/bin/env python
import os
import sys
import time
import numpy
from numpy import zeros
from numpy.random import randn
from scipy.linalg import blas
def run_ssyrk(N, l):
A = randn(N, N).astype('float32', order='F')
C = zeros((N, N), dtype='float32', order='F')
start = time.time()
for i in range(0, l):
blas.ssyrk(1.0, A, c=C, overwrite_c=True)
end = time.time()
timediff = (end - start)
mflops = (N * N * N) * l / timediff
mflops *= 1e-6
size = "%dx%d" % (N, N)
print("%14s :\t%20f MFlops\t%20f sec" % (size, mflops, timediff))
if __name__ == "__main__":
N = 128
NMAX = 2048
NINC = 128
LOOPS = 1
z = 0
for arg in sys.argv:
if z == 1:
N = int(arg)
elif z == 2:
NMAX = int(arg)
elif z == 3:
NINC = int(arg)
elif z == 4:
LOOPS = int(arg)
z = z + 1
if 'OPENBLAS_LOOPS' in os.environ:
p = os.environ['OPENBLAS_LOOPS']
if p:
LOOPS = int(p)
print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
for i in range(N, NMAX + NINC, NINC):
run_ssyrk(i, LOOPS)

197
benchmark/smallscaling.c Normal file
View File

@ -0,0 +1,197 @@
// run with OPENBLAS_NUM_THREADS=1 and OMP_NUM_THREADS=n
#include <math.h>
#include <stdlib.h>
#include <stdio.h>
#include <time.h>
#include <cblas.h>
#include <omp.h>
#include <pthread.h>
#define MIN_SIZE 5
#define MAX_SIZE 60
#define NB_SIZE 10
// number of loop for a 1x1 matrix. Lower it if the test is
// too slow on you computer.
#define NLOOP 2e7
typedef struct {
int matrix_size;
int n_loop;
void (* bench_func)();
void (* blas_func)();
void * (* create_matrix)(int size);
} BenchParam;
void * s_create_matrix(int size) {
float * r = malloc(size * sizeof(double));
int i;
for(i = 0; i < size; i++)
r[i] = 1e3 * i / size;
return r;
}
void * c_create_matrix(int size) {
float * r = malloc(size * 2 * sizeof(double));
int i;
for(i = 0; i < 2 * size; i++)
r[i] = 1e3 * i / size;
return r;
}
void * z_create_matrix(int size) {
double * r = malloc(size * 2 * sizeof(double));
int i;
for(i = 0; i < 2 * size; i++)
r[i] = 1e3 * i / size;
return r;
}
void * d_create_matrix(int size) {
double * r = malloc(size * sizeof(double));
int i;
for(i = 0; i < size; i++)
r[i] = 1e3 * i / size;
return r;
}
void trmv_bench(BenchParam * param)
{
int i, n;
int size = param->matrix_size;
n = param->n_loop / size;
int one = 1;
void * A = param->create_matrix(size * size);
void * y = param->create_matrix(size);
for(i = 0; i < n; i++) {
param->blas_func("U", "N", "N", &size, A, &size, y, &one);
}
free(A);
free(y);
}
void gemv_bench(BenchParam * param)
{
int i, n;
int size = param->matrix_size;
n = param->n_loop / size;
double v = 1.01;
int one = 1;
void * A = param->create_matrix(size * size);
void * y = param->create_matrix(size);
for(i = 0; i < n; i++) {
param->blas_func("N", &size, &size, &v, A, &size, y, &one, &v, y, &one);
}
free(A);
free(y);
}
void ger_bench(BenchParam * param) {
int i, n;
int size = param->matrix_size;
n = param->n_loop / size;
double v = 1.01;
int one = 1;
void * A = param->create_matrix(size * size);
void * y = param->create_matrix(size);
for(i = 0; i < n; i++) {
param->blas_func(&size, &size, &v, y, &one, y, &one, A, &size);
}
free(A);
free(y);
}
#ifndef _WIN32
void * pthread_func_wrapper(void * param) {
((BenchParam *)param)->bench_func(param);
pthread_exit(NULL);
}
#endif
#define NB_TESTS 5
void * TESTS[4 * NB_TESTS] = {
trmv_bench, ztrmv_, z_create_matrix, "ztrmv",
gemv_bench, dgemv_, d_create_matrix, "dgemv",
gemv_bench, zgemv_, z_create_matrix, "zgemv",
ger_bench, dger_, d_create_matrix, "dger",
ger_bench, zgerc_, z_create_matrix, "zgerc",
};
inline static double delta_time(struct timespec tick) {
struct timespec tock;
clock_gettime(CLOCK_MONOTONIC, &tock);
return (tock.tv_sec - tick.tv_sec) + (tock.tv_nsec - tick.tv_nsec) / 1e9;
}
double pthread_bench(BenchParam * param, int nb_threads)
{
#ifdef _WIN32
return 0;
#else
BenchParam threaded_param = *param;
pthread_t threads[nb_threads];
int t, rc;
struct timespec tick;
threaded_param.n_loop /= nb_threads;
clock_gettime(CLOCK_MONOTONIC, &tick);
for(t=0; t<nb_threads; t++){
rc = pthread_create(&threads[t], NULL, pthread_func_wrapper, &threaded_param);
if (rc){
printf("ERROR; return code from pthread_create() is %d\n", rc);
exit(-1);
}
}
for(t=0; t<nb_threads; t++){
pthread_join(threads[t], NULL);
}
return delta_time(tick);
#endif
}
double seq_bench(BenchParam * param) {
struct timespec tick;
clock_gettime(CLOCK_MONOTONIC, &tick);
param->bench_func(param);
return delta_time(tick);
}
double omp_bench(BenchParam * param) {
BenchParam threaded_param = *param;
struct timespec tick;
int t;
int nb_threads = omp_get_max_threads();
threaded_param.n_loop /= nb_threads;
clock_gettime(CLOCK_MONOTONIC, &tick);
#pragma omp parallel for
for(t = 0; t < nb_threads; t ++){
param->bench_func(&threaded_param);
}
return delta_time(tick);
}
int main(int argc, char * argv[]) {
double inc_factor = exp(log((double)MAX_SIZE / MIN_SIZE) / NB_SIZE);
BenchParam param;
int test_id;
printf ("Running on %d threads\n", omp_get_max_threads());
for(test_id = 0; test_id < NB_TESTS; test_id ++) {
double size = MIN_SIZE;
param.bench_func = TESTS[test_id * 4];
param.blas_func = TESTS[test_id * 4 + 1];
param.create_matrix = TESTS[test_id * 4 + 2];
printf("\nBenchmark of %s\n", (char*)TESTS[test_id * 4 + 3]);
param.n_loop = NLOOP;
while(size <= MAX_SIZE) {
param.matrix_size = (int)(size + 0.5);
double seq_time = seq_bench(&param);
double omp_time = omp_bench(&param);
double pthread_time = pthread_bench(&param, omp_get_max_threads());
printf("matrix size %d, sequential %gs, openmp %gs, speedup %g, "
"pthread %gs, speedup %g\n",
param.matrix_size, seq_time,
omp_time, seq_time / omp_time,
pthread_time, seq_time / pthread_time);
size *= inc_factor;
}
}
return(0);
}

View File

@ -190,8 +190,8 @@ int main(int argc, char *argv[]){
timeg /= loops; timeg /= loops;
fprintf(stderr, fprintf(stderr,
" %10.2f MBytes\n", " %10.2f MBytes %10.6f sec\n",
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6); COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg);
} }

View File

@ -191,8 +191,8 @@ int main(int argc, char *argv[]){
gettimeofday( &start, (struct timezone *)0); gettimeofday( &start, (struct timezone *)0);
fprintf(stderr, fprintf(stderr,
" %10.2f MFlops\n", " %10.2f MFlops %10.6f sec\n",
COMPSIZE * COMPSIZE * 1. * (double)m * (double)m * (double)m / time1 * 1.e-6); COMPSIZE * COMPSIZE * 1. * (double)m * (double)m * (double)m / time1 * 1.e-6, time1);
} }

View File

@ -184,8 +184,8 @@ int main(int argc, char *argv[]){
timeg /= loops; timeg /= loops;
fprintf(stderr, fprintf(stderr,
" %10.2f MFlops\n", " %10.2f MFlops %10.6f sec\n",
COMPSIZE * COMPSIZE * 2. * (double)m / timeg * 1.e-6); COMPSIZE * COMPSIZE * 2. * (double)m / timeg * 1.e-6, timeg);
} }

65
c_check
View File

@ -1,5 +1,8 @@
#!/usr/bin/perl #!/usr/bin/perl
use File::Basename;
use File::Temp qw(tempfile);
# Checking cross compile # Checking cross compile
$hostos = `uname -s | sed -e s/\-.*//`; chop($hostos); $hostos = `uname -s | sed -e s/\-.*//`; chop($hostos);
$hostarch = `uname -m | sed -e s/i.86/x86/`;chop($hostarch); $hostarch = `uname -m | sed -e s/i.86/x86/`;chop($hostarch);
@ -7,7 +10,9 @@ $hostarch = "x86_64" if ($hostarch eq "amd64");
$hostarch = "arm" if ($hostarch =~ /^arm.*/); $hostarch = "arm" if ($hostarch =~ /^arm.*/);
$hostarch = "arm64" if ($hostarch eq "aarch64"); $hostarch = "arm64" if ($hostarch eq "aarch64");
$hostarch = "power" if ($hostarch =~ /^(powerpc|ppc).*/); $hostarch = "power" if ($hostarch =~ /^(powerpc|ppc).*/);
$hostarch = "zarch" if ($hostarch eq "s390x");
$tmpf = new File::Temp( UNLINK => 1 );
$binary = $ENV{"BINARY"}; $binary = $ENV{"BINARY"};
$makefile = shift(@ARGV); $makefile = shift(@ARGV);
@ -26,14 +31,12 @@ if ($?) {
$cross_suffix = ""; $cross_suffix = "";
if ($ARGV[0] =~ /(.*)(-[.\d]+)/) { if (dirname($compiler_name) ne ".") {
if ($1 =~ /(.*-)(.*)/) { $cross_suffix .= dirname($compiler_name) . "/";
$cross_suffix = $1; }
}
} else { if (basename($compiler_name) =~ /([^\s]*-)(.*)/) {
if ($ARGV[0] =~ /([^\/]*-)([^\/]*$)/) { $cross_suffix .= $1;
$cross_suffix = $1;
}
} }
$compiler = ""; $compiler = "";
@ -63,13 +66,14 @@ $os = Android if ($data =~ /OS_ANDROID/);
$architecture = x86 if ($data =~ /ARCH_X86/); $architecture = x86 if ($data =~ /ARCH_X86/);
$architecture = x86_64 if ($data =~ /ARCH_X86_64/); $architecture = x86_64 if ($data =~ /ARCH_X86_64/);
$architecture = power if ($data =~ /ARCH_POWER/); $architecture = power if ($data =~ /ARCH_POWER/);
$architecture = mips32 if ($data =~ /ARCH_MIPS32/); $architecture = mips if ($data =~ /ARCH_MIPS/);
$architecture = mips64 if ($data =~ /ARCH_MIPS64/); $architecture = mips64 if ($data =~ /ARCH_MIPS64/);
$architecture = alpha if ($data =~ /ARCH_ALPHA/); $architecture = alpha if ($data =~ /ARCH_ALPHA/);
$architecture = sparc if ($data =~ /ARCH_SPARC/); $architecture = sparc if ($data =~ /ARCH_SPARC/);
$architecture = ia64 if ($data =~ /ARCH_IA64/); $architecture = ia64 if ($data =~ /ARCH_IA64/);
$architecture = arm if ($data =~ /ARCH_ARM/); $architecture = arm if ($data =~ /ARCH_ARM/);
$architecture = arm64 if ($data =~ /ARCH_ARM64/); $architecture = arm64 if ($data =~ /ARCH_ARM64/);
$architecture = zarch if ($data =~ /ARCH_ZARCH/);
$defined = 0; $defined = 0;
@ -79,7 +83,12 @@ if ($os eq "AIX") {
$defined = 1; $defined = 1;
} }
if (($architecture eq "mips32") || ($architecture eq "mips64")) { if ($architecture eq "mips") {
$compiler_name .= " -mabi=32";
$defined = 1;
}
if ($architecture eq "mips64") {
$compiler_name .= " -mabi=n32" if ($binary eq "32"); $compiler_name .= " -mabi=n32" if ($binary eq "32");
$compiler_name .= " -mabi=64" if ($binary eq "64"); $compiler_name .= " -mabi=64" if ($binary eq "64");
$defined = 1; $defined = 1;
@ -89,6 +98,11 @@ if (($architecture eq "arm") || ($architecture eq "arm64")) {
$defined = 1; $defined = 1;
} }
if ($architecture eq "zarch") {
$defined = 1;
$binary = 64;
}
if ($architecture eq "alpha") { if ($architecture eq "alpha") {
$defined = 1; $defined = 1;
$binary = 64; $binary = 64;
@ -152,16 +166,35 @@ if ($?) {
die 1; die 1;
} }
$have_msa = 0;
if (($architecture eq "mips") || ($architecture eq "mips64")) {
$code = '"addvi.b $w0, $w1, 1"';
$msa_flags = "-mmsa -mfp64 -msched-weight -mload-store-pairs";
print $tmpf "#include <msa.h>\n\n";
print $tmpf "void main(void){ __asm__ volatile($code); }\n";
$args = "$msa_flags -o $tmpf.o -x c $tmpf";
my @cmd = ("$compiler_name $args");
system(@cmd) == 0;
if ($? != 0) {
$have_msa = 0;
} else {
$have_msa = 1;
}
unlink("$tmpf.o");
}
$architecture = x86 if ($data =~ /ARCH_X86/); $architecture = x86 if ($data =~ /ARCH_X86/);
$architecture = x86_64 if ($data =~ /ARCH_X86_64/); $architecture = x86_64 if ($data =~ /ARCH_X86_64/);
$architecture = power if ($data =~ /ARCH_POWER/); $architecture = power if ($data =~ /ARCH_POWER/);
$architecture = mips32 if ($data =~ /ARCH_MIPS32/); $architecture = mips if ($data =~ /ARCH_MIPS/);
$architecture = mips64 if ($data =~ /ARCH_MIPS64/); $architecture = mips64 if ($data =~ /ARCH_MIPS64/);
$architecture = alpha if ($data =~ /ARCH_ALPHA/); $architecture = alpha if ($data =~ /ARCH_ALPHA/);
$architecture = sparc if ($data =~ /ARCH_SPARC/); $architecture = sparc if ($data =~ /ARCH_SPARC/);
$architecture = ia64 if ($data =~ /ARCH_IA64/); $architecture = ia64 if ($data =~ /ARCH_IA64/);
$architecture = arm if ($data =~ /ARCH_ARM/); $architecture = arm if ($data =~ /ARCH_ARM/);
$architecture = arm64 if ($data =~ /ARCH_ARM64/); $architecture = arm64 if ($data =~ /ARCH_ARM64/);
$architecture = zarch if ($data =~ /ARCH_ZARCH/);
$binformat = bin32; $binformat = bin32;
$binformat = bin64 if ($data =~ /BINARY_64/); $binformat = bin64 if ($data =~ /BINARY_64/);
@ -209,6 +242,11 @@ $linker_a = "";
$linker_L .= "-Wl,". $flags . " " $linker_L .= "-Wl,". $flags . " "
} }
if ($flags =~ /^\--exclude-libs/) {
$linker_L .= "-Wl,". $flags . " ";
$flags="";
}
if ( if (
($flags =~ /^\-l/) ($flags =~ /^\-l/)
&& ($flags !~ /gfortranbegin/) && ($flags !~ /gfortranbegin/)
@ -243,9 +281,11 @@ print MAKEFILE "BINARY64=\n" if $binformat ne bin64;
print MAKEFILE "BINARY32=1\n" if $binformat eq bin32; print MAKEFILE "BINARY32=1\n" if $binformat eq bin32;
print MAKEFILE "BINARY64=1\n" if $binformat eq bin64; print MAKEFILE "BINARY64=1\n" if $binformat eq bin64;
print MAKEFILE "FU=$need_fu\n" if $need_fu ne ""; print MAKEFILE "FU=$need_fu\n" if $need_fu ne "";
print MAKEFILE "CROSS_SUFFIX=$cross_suffix\n" if $cross_suffix ne ""; print MAKEFILE "CROSS_SUFFIX=$cross_suffix\n" if $cross != 0 && $cross_suffix ne "";
print MAKEFILE "CROSS=1\n" if $cross != 0; print MAKEFILE "CROSS=1\n" if $cross != 0;
print MAKEFILE "CEXTRALIB=$linker_L $linker_l $linker_a\n"; print MAKEFILE "CEXTRALIB=$linker_L $linker_l $linker_a\n";
print MAKEFILE "HAVE_MSA=1\n" if $have_msa eq 1;
print MAKEFILE "MSA_FLAGS=$msa_flags\n" if $have_msa eq 1;
$os =~ tr/[a-z]/[A-Z]/; $os =~ tr/[a-z]/[A-Z]/;
$architecture =~ tr/[a-z]/[A-Z]/; $architecture =~ tr/[a-z]/[A-Z]/;
@ -257,6 +297,7 @@ print CONFFILE "#define C_$compiler\t1\n";
print CONFFILE "#define __32BIT__\t1\n" if $binformat eq bin32; print CONFFILE "#define __32BIT__\t1\n" if $binformat eq bin32;
print CONFFILE "#define __64BIT__\t1\n" if $binformat eq bin64; print CONFFILE "#define __64BIT__\t1\n" if $binformat eq bin64;
print CONFFILE "#define FUNDERSCORE\t$need_fu\n" if $need_fu ne ""; print CONFFILE "#define FUNDERSCORE\t$need_fu\n" if $need_fu ne "";
print CONFFILE "#define HAVE_MSA\t1\n" if $have_msa eq 1;
if ($os eq "LINUX") { if ($os eq "LINUX") {

View File

@ -14,12 +14,12 @@ if (${ARCH} STREQUAL "x86" OR ${ARCH} STREQUAL "x86_64")
if (NOT NO_EXPRECISION) if (NOT NO_EXPRECISION)
if (${F_COMPILER} MATCHES "GFORTRAN") if (${F_COMPILER} MATCHES "GFORTRAN")
# N.B. I'm not sure if CMake differentiates between GCC and LSB -hpa # N.B. I'm not sure if CMake differentiates between GCC and LSB -hpa
if (${CMAKE_C_COMPILER} STREQUAL "GNU" OR ${CMAKE_C_COMPILER} STREQUAL "LSB") if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU" OR ${CMAKE_C_COMPILER_ID} STREQUAL "LSB")
set(EXPRECISION 1) set(EXPRECISION 1)
set(CCOMMON_OPT "${CCOMMON_OPT} -DEXPRECISION -m128bit-long-double") set(CCOMMON_OPT "${CCOMMON_OPT} -DEXPRECISION -m128bit-long-double")
set(FCOMMON_OPT "${FCOMMON_OPT} -m128bit-long-double") set(FCOMMON_OPT "${FCOMMON_OPT} -m128bit-long-double")
endif () endif ()
if (${CMAKE_C_COMPILER} STREQUAL "Clang") if (${CMAKE_C_COMPILER_ID} STREQUAL "Clang")
set(EXPRECISION 1) set(EXPRECISION 1)
set(CCOMMON_OPT "${CCOMMON_OPT} -DEXPRECISION") set(CCOMMON_OPT "${CCOMMON_OPT} -DEXPRECISION")
set(FCOMMON_OPT "${FCOMMON_OPT} -m128bit-long-double") set(FCOMMON_OPT "${FCOMMON_OPT} -m128bit-long-double")
@ -28,35 +28,35 @@ if (${ARCH} STREQUAL "x86" OR ${ARCH} STREQUAL "x86_64")
endif () endif ()
endif () endif ()
if (${CMAKE_C_COMPILER} STREQUAL "Intel") if (${CMAKE_C_COMPILER_ID} STREQUAL "Intel")
set(CCOMMON_OPT "${CCOMMON_OPT} -wd981") set(CCOMMON_OPT "${CCOMMON_OPT} -wd981")
endif () endif ()
if (USE_OPENMP) if (USE_OPENMP)
if (${CMAKE_C_COMPILER} STREQUAL "GNU" OR ${CMAKE_C_COMPILER} STREQUAL "LSB") if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU" OR ${CMAKE_C_COMPILER_ID} STREQUAL "LSB")
set(CCOMMON_OPT "${CCOMMON_OPT} -fopenmp") set(CCOMMON_OPT "${CCOMMON_OPT} -fopenmp")
endif () endif ()
if (${CMAKE_C_COMPILER} STREQUAL "Clang") if (${CMAKE_C_COMPILER_ID} STREQUAL "Clang")
message(WARNING "Clang doesn't support OpenMP yet.") message(WARNING "Clang doesn't support OpenMP yet.")
set(CCOMMON_OPT "${CCOMMON_OPT} -fopenmp") set(CCOMMON_OPT "${CCOMMON_OPT} -fopenmp")
endif () endif ()
if (${CMAKE_C_COMPILER} STREQUAL "Intel") if (${CMAKE_C_COMPILER_ID} STREQUAL "Intel")
set(CCOMMON_OPT "${CCOMMON_OPT} -openmp") set(CCOMMON_OPT "${CCOMMON_OPT} -openmp")
endif () endif ()
if (${CMAKE_C_COMPILER} STREQUAL "PGI") if (${CMAKE_C_COMPILER_ID} STREQUAL "PGI")
set(CCOMMON_OPT "${CCOMMON_OPT} -mp") set(CCOMMON_OPT "${CCOMMON_OPT} -mp")
endif () endif ()
if (${CMAKE_C_COMPILER} STREQUAL "OPEN64") if (${CMAKE_C_COMPILER_ID} STREQUAL "OPEN64")
set(CCOMMON_OPT "${CCOMMON_OPT} -mp") set(CCOMMON_OPT "${CCOMMON_OPT} -mp")
set(CEXTRALIB "${CEXTRALIB} -lstdc++") set(CEXTRALIB "${CEXTRALIB} -lstdc++")
endif () endif ()
if (${CMAKE_C_COMPILER} STREQUAL "PATHSCALE") if (${CMAKE_C_COMPILER_ID} STREQUAL "PATHSCALE")
set(CCOMMON_OPT "${CCOMMON_OPT} -mp") set(CCOMMON_OPT "${CCOMMON_OPT} -mp")
endif () endif ()
endif () endif ()
@ -87,7 +87,7 @@ if (${ARCH} STREQUAL "ia64")
set(BINARY_DEFINED 1) set(BINARY_DEFINED 1)
if (${F_COMPILER} MATCHES "GFORTRAN") if (${F_COMPILER} MATCHES "GFORTRAN")
if (${CMAKE_C_COMPILER} STREQUAL "GNU") if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU")
# EXPRECISION = 1 # EXPRECISION = 1
# CCOMMON_OPT += -DEXPRECISION # CCOMMON_OPT += -DEXPRECISION
endif () endif ()

View File

@ -53,7 +53,7 @@ endif()
add_custom_command( add_custom_command(
TARGET ${OpenBLAS_LIBNAME} PRE_LINK TARGET ${OpenBLAS_LIBNAME} PRE_LINK
COMMAND perl COMMAND perl
ARGS "${CMAKE_SOURCE_DIR}/exports/gensymbol" "win2k" "${ARCH_IN}" "dummy" "${EXPRECISION_IN}" "${NO_CBLAS_IN}" "${NO_LAPACK_IN}" "${NO_LAPACKE_IN}" "${NEED2UNDERSCORES_IN}" "${ONLY_CBLAS_IN}" "${SYMBOLPREFIX}" "${SYMBOLSUFFIX}" > "${PROJECT_BINARY_DIR}/openblas.def" ARGS "${PROJECT_SOURCE_DIR}/exports/gensymbol" "win2k" "${ARCH_IN}" "dummy" "${EXPRECISION_IN}" "${NO_CBLAS_IN}" "${NO_LAPACK_IN}" "${NO_LAPACKE_IN}" "${NEED2UNDERSCORES_IN}" "${ONLY_CBLAS_IN}" "${SYMBOLPREFIX}" "${SYMBOLSUFFIX}" > "${PROJECT_BINARY_DIR}/openblas.def"
COMMENT "Create openblas.def file" COMMENT "Create openblas.def file"
VERBATIM) VERBATIM)

View File

@ -2,7 +2,7 @@
set(ALLAUX set(ALLAUX
ilaenv.f ieeeck.f lsamen.f xerbla_array.f iparmq.f ilaenv.f ieeeck.f lsamen.f xerbla_array.f iparmq.f
ilaprec.f ilatrans.f ilauplo.f iladiag.f chla_transtype.f ilaprec.f ilatrans.f ilauplo.f iladiag.f iparam2stage.F chla_transtype.f
../INSTALL/ilaver.f ../INSTALL/slamch.f ../INSTALL/ilaver.f ../INSTALL/slamch.f
) )
@ -26,7 +26,7 @@ set(SCLAUX
) )
set(DZLAUX set(DZLAUX
dbdsdc.f dbdsdc.f dbdsvdx.f
dbdsqr.f ddisna.f dlabad.f dlacpy.f dladiv.f dlae2.f dlaebz.f dbdsqr.f ddisna.f dlabad.f dlacpy.f dladiv.f dlae2.f dlaebz.f
dlaed0.f dlaed1.f dlaed2.f dlaed3.f dlaed4.f dlaed5.f dlaed6.f dlaed0.f dlaed1.f dlaed2.f dlaed3.f dlaed4.f dlaed5.f dlaed6.f
dlaed7.f dlaed8.f dlaed9.f dlaeda.f dlaev2.f dlagtf.f dlaed7.f dlaed8.f dlaed9.f dlaeda.f dlaev2.f dlagtf.f
@ -42,24 +42,32 @@ set(DZLAUX
dsteqr.f dsterf.f dlaisnan.f disnan.f dsteqr.f dsterf.f dlaisnan.f disnan.f
dlartgp.f dlartgs.f dlartgp.f dlartgs.f
../INSTALL/dlamch.f ../INSTALL/dsecnd_${TIMER}.f ../INSTALL/dlamch.f ../INSTALL/dsecnd_${TIMER}.f
dgelq.f dgelqt.f dgelqt3.f dgemlq.f dgemlqt.f dgemqr.f dgeqr.f
dgetsls.f dlamswlq.f dlamtsqr.f dlaswlq.f dlatsqr.f dtplqt.f
dtplqt2.f dtpmlqt.f dsysv_aa.f dsytrf_aa.f dsytrs_aa.f dlasyf_aa.f
dsytf2_rk.f dlasyf_rk.f dsytrf_rk.f dsytrs_3.f dsycon_3.f dsytri_3.f
dsytri_3x.f dsysv_rk.f dsb2st_kernels.f dsbev_2stage.f dsbevd_2stage.f
dsbevx_2stage.f dsyev_2stage.f dsyevd_2stage.f dsyevr_2stage.f
dsyevx_2stage.f dsygv_2stage.f dsytrd_2stage.f dsytrd_sb2st.F
dsytrd_sy2sb.f dlarfy.f
) )
set(SLASRC set(SLASRC
sgbbrd.f sgbcon.f sgbequ.f sgbrfs.f sgbsv.f sbdsvdx.f sgbbrd.f sgbcon.f sgbequ.f sgbrfs.f sgbsv.f
sgbsvx.f sgbtf2.f sgbtrf.f sgbtrs.f sgebak.f sgebal.f sgebd2.f sgbsvx.f sgbtf2.f sgbtrf.f sgbtrs.f sgebak.f sgebal.f sgebd2.f
sgebrd.f sgecon.f sgeequ.f sgees.f sgeesx.f sgeev.f sgeevx.f sgebrd.f sgecon.f sgeequ.f sgees.f sgeesx.f sgeev.f sgeevx.f
sgegs.f sgegv.f sgehd2.f sgehrd.f sgelq2.f sgelqf.f DEPRECATED/sgegs.f DEPRECATED/sgegv.f sgehd2.f sgehrd.f sgelq2.f sgelqf.f
sgels.f sgelsd.f sgelss.f sgelsx.f sgelsy.f sgeql2.f sgeqlf.f sgels.f sgelsd.f sgelss.f DEPRECATED/sgelsx.f sgelsy.f sgeql2.f sgeqlf.f
sgeqp3.f sgeqpf.f sgeqr2.f sgeqr2p.f sgeqrf.f sgeqrfp.f sgerfs.f sgeqp3.f DEPRECATED/sgeqpf.f sgeqr2.f sgeqr2p.f sgeqrf.f sgeqrfp.f sgerfs.f
sgerq2.f sgerqf.f sgesc2.f sgesdd.f sgesvd.f sgesvx.f sgerq2.f sgerqf.f sgesc2.f sgesdd.f sgesvd.f sgesvdx.f sgesvx.f
sgetc2.f sgetri.f sgetc2.f sgetri.f sgetrf2.f
sggbak.f sggbal.f sgges.f sggesx.f sggev.f sggevx.f sggbak.f sggbal.f sgghd3.f sgges.f sgges3.f sggesx.f sggev.f sggev3.f sggevx.f
sggglm.f sgghrd.f sgglse.f sggqrf.f sggglm.f sgghrd.f sgglse.f sggqrf.f
sggrqf.f sggsvd.f sggsvp.f sgtcon.f sgtrfs.f sgtsv.f sggrqf.f DEPRECATED/sggsvd.f sggsvd3.f DEPRECATED/sggsvp.f sggsvp3.f sgtcon.f sgtrfs.f sgtsv.f
sgtsvx.f sgttrf.f sgttrs.f sgtts2.f shgeqz.f sgtsvx.f sgttrf.f sgttrs.f sgtts2.f shgeqz.f
shsein.f shseqr.f slabrd.f slacon.f slacn2.f shsein.f shseqr.f slabrd.f slacon.f slacn2.f
slaein.f slaexc.f slag2.f slags2.f slagtm.f slagv2.f slahqr.f slaein.f slaexc.f slag2.f slags2.f slagtm.f slagv2.f slahqr.f
slahrd.f slahr2.f slaic1.f slaln2.f slals0.f slalsa.f slalsd.f DEPRECATED/slahrd.f slahr2.f slaic1.f slaln2.f slals0.f slalsa.f slalsd.f
slangb.f slange.f slangt.f slanhs.f slansb.f slansp.f slangb.f slange.f slangt.f slanhs.f slansb.f slansp.f
slansy.f slantb.f slantp.f slantr.f slanv2.f slansy.f slantb.f slantp.f slantr.f slanv2.f
slapll.f slapmt.f slapll.f slapmt.f
@ -69,10 +77,10 @@ set(SLASRC
slarf.f slarfb.f slarfg.f slarfgp.f slarft.f slarfx.f slargv.f slarf.f slarfb.f slarfg.f slarfgp.f slarft.f slarfx.f slargv.f
slarrv.f slartv.f slarrv.f slartv.f
slarz.f slarzb.f slarzt.f slasy2.f slasyf.f slasyf_rook.f slarz.f slarzb.f slarzt.f slasy2.f slasyf.f slasyf_rook.f
slatbs.f slatdf.f slatps.f slatrd.f slatrs.f slatrz.f slatzm.f slatbs.f slatdf.f slatps.f slatrd.f slatrs.f slatrz.f DEPRECATED/slatzm.f
sopgtr.f sopmtr.f sorg2l.f sorg2r.f sopgtr.f sopmtr.f sorg2l.f sorg2r.f
sorgbr.f sorghr.f sorgl2.f sorglq.f sorgql.f sorgqr.f sorgr2.f sorgbr.f sorghr.f sorgl2.f sorglq.f sorgql.f sorgqr.f sorgr2.f
sorgrq.f sorgtr.f sorm2l.f sorm2r.f sorgrq.f sorgtr.f sorm2l.f sorm2r.f sorm22.f
sormbr.f sormhr.f sorml2.f sormlq.f sormql.f sormqr.f sormr2.f sormbr.f sormhr.f sorml2.f sormlq.f sormql.f sormqr.f sormr2.f
sormr3.f sormrq.f sormrz.f sormtr.f spbcon.f spbequ.f spbrfs.f sormr3.f sormrq.f sormrz.f sormtr.f spbcon.f spbequ.f spbrfs.f
spbstf.f spbsv.f spbsvx.f spbstf.f spbsv.f spbsvx.f
@ -96,8 +104,8 @@ set(SLASRC
stbrfs.f stbtrs.f stgevc.f stgex2.f stgexc.f stgsen.f stbrfs.f stbtrs.f stgevc.f stgex2.f stgexc.f stgsen.f
stgsja.f stgsna.f stgsy2.f stgsyl.f stpcon.f stprfs.f stptri.f stgsja.f stgsna.f stgsy2.f stgsyl.f stpcon.f stprfs.f stptri.f
stptrs.f stptrs.f
strcon.f strevc.f strexc.f strrfs.f strsen.f strsna.f strsyl.f strcon.f strevc.f strevc3.f strexc.f strrfs.f strsen.f strsna.f strsyl.f
strtrs.f stzrqf.f stzrzf.f sstemr.f strtrs.f DEPRECATED/stzrqf.f stzrzf.f sstemr.f
slansf.f spftrf.f spftri.f spftrs.f ssfrk.f stfsm.f stftri.f stfttp.f slansf.f spftrf.f spftri.f spftrs.f ssfrk.f stfsm.f stftri.f stfttp.f
stfttr.f stpttf.f stpttr.f strttf.f strttp.f stfttr.f stpttf.f stpttr.f strttf.f strttp.f
sgejsv.f sgesvj.f sgsvj0.f sgsvj1.f sgejsv.f sgesvj.f sgsvj0.f sgsvj1.f
@ -106,22 +114,29 @@ set(SLASRC
sorbdb5.f sorbdb6.f sorcsd.f sorcsd2by1.f sorbdb5.f sorbdb6.f sorcsd.f sorcsd2by1.f
sgeqrt.f sgeqrt2.f sgeqrt3.f sgemqrt.f sgeqrt.f sgeqrt2.f sgeqrt3.f sgemqrt.f
stpqrt.f stpqrt2.f stpmqrt.f stprfb.f spotri.f stpqrt.f stpqrt2.f stpmqrt.f stprfb.f spotri.f
sgelq.f sgelqt.f sgelqt3.f sgemlq.f sgemlqt.f sgemqr.f sgeqr.f sgetsls.f
slamswlq.f slamtsqr.f slaswlq.f slatsqr.f stplqt.f stplqt2.f stpmlqt.f
ssysv_aa.f ssytrf_aa.f ssytrs_aa.f slasyf_aa.f ssytf2_rk.f slasyf_rk.f
ssytrf_rk.f ssytrs_3.f ssycon_3.f ssytri_3.f ssytri_3x.f ssysv_rk.f
ssb2st_kernels.f ssbev_2stage.f ssbevd_2stage.f ssbevx_2stage.f
ssyev_2stage.f ssyevd_2stage.f ssyevr_2stage.f ssyevx_2stage.f
ssygv_2stage.f ssytrd_2stage.f ssytrd_sb2st.F ssytrd_sy2sb.f slarfy.f
) )
set(DSLASRC spotrs.f) set(DSLASRC spotrs.f spotrf2.f)
set(CLASRC set(CLASRC
cbdsqr.f cgbbrd.f cgbcon.f cgbequ.f cgbrfs.f cgbsv.f cgbsvx.f cbdsqr.f cgbbrd.f cgbcon.f cgbequ.f cgbrfs.f cgbsv.f cgbsvx.f
cgbtf2.f cgbtrf.f cgbtrs.f cgebak.f cgebal.f cgebd2.f cgebrd.f cgbtf2.f cgbtrf.f cgbtrs.f cgebak.f cgebal.f cgebd2.f cgebrd.f
cgecon.f cgeequ.f cgees.f cgeesx.f cgeev.f cgeevx.f cgecon.f cgeequ.f cgees.f cgeesx.f cgeev.f cgeevx.f
cgegs.f cgegv.f cgehd2.f cgehrd.f cgelq2.f cgelqf.f DEPRECATED/cgegs.f DEPRECATED/cgegv.f cgehd2.f cgehrd.f cgelq2.f cgelqf.f
cgels.f cgelsd.f cgelss.f cgelsx.f cgelsy.f cgeql2.f cgeqlf.f cgeqp3.f cgels.f cgelsd.f cgelss.f DEPRECATED/cgelsx.f cgelsy.f cgeql2.f cgeqlf.f cgeqp3.f
cgeqpf.f cgeqr2.f cgeqr2p.f cgeqrf.f cgeqrfp.f cgerfs.f DEPRECATED/cgeqpf.f cgeqr2.f cgeqr2p.f cgeqrf.f cgeqrfp.f cgerfs.f
cgerq2.f cgerqf.f cgesc2.f cgesdd.f cgesvd.f cgerq2.f cgerqf.f cgesc2.f cgesdd.f cgesvd.f
cgesvx.f cgetc2.f cgetri.f cgesvx.f cgetc2.f cgetri.f
cggbak.f cggbal.f cgges.f cggesx.f cggev.f cggevx.f cggglm.f cggbak.f cggbal.f cgges.f cggesx.f cggev.f cggevx.f cggglm.f
cgghrd.f cgglse.f cggqrf.f cggrqf.f cgghrd.f cgglse.f cggqrf.f cggrqf.f
cggsvd.f cggsvp.f DEPRECATED/cggsvd.f DEPRECATED/cggsvp.f
cgtcon.f cgtrfs.f cgtsv.f cgtsvx.f cgttrf.f cgttrs.f cgtts2.f chbev.f cgtcon.f cgtrfs.f cgtsv.f cgtsvx.f cgttrf.f cgttrs.f cgtts2.f chbev.f
chbevd.f chbevx.f chbgst.f chbgv.f chbgvd.f chbgvx.f chbtrd.f chbevd.f chbevx.f chbgst.f chbgv.f chbgvd.f chbgvx.f chbtrd.f
checon.f cheev.f cheevd.f cheevr.f cheevx.f chegs2.f chegst.f checon.f cheev.f cheevd.f cheevr.f cheevx.f chegs2.f chegst.f
@ -138,7 +153,7 @@ set(CLASRC
claed0.f claed7.f claed8.f claed0.f claed7.f claed8.f
claein.f claesy.f claev2.f clags2.f clagtm.f claein.f claesy.f claev2.f clags2.f clagtm.f
clahef.f clahef_rook.f clahqr.f clahef.f clahef_rook.f clahqr.f
clahrd.f clahr2.f claic1.f clals0.f clalsa.f clalsd.f clangb.f clange.f clangt.f DEPRECATED/clahrd.f clahr2.f claic1.f clals0.f clalsa.f clalsd.f clangb.f clange.f clangt.f
clanhb.f clanhe.f clanhb.f clanhe.f
clanhp.f clanhs.f clanht.f clansb.f clansp.f clansy.f clantb.f clanhp.f clanhs.f clanht.f clansb.f clansp.f clansy.f clantb.f
clantp.f clantr.f clapll.f clapmt.f clarcm.f claqgb.f claqge.f clantp.f clantr.f clapll.f clapmt.f clarcm.f claqgb.f claqge.f
@ -149,7 +164,7 @@ set(CLASRC
clarfx.f clargv.f clarnv.f clarrv.f clartg.f clartv.f clarfx.f clargv.f clarnv.f clarrv.f clartg.f clartv.f
clarz.f clarzb.f clarzt.f clascl.f claset.f clasr.f classq.f clarz.f clarzb.f clarzt.f clascl.f claset.f clasr.f classq.f
clasyf.f clasyf_rook.f clatbs.f clatdf.f clatps.f clatrd.f clatrs.f clatrz.f clasyf.f clasyf_rook.f clatbs.f clatdf.f clatps.f clatrd.f clatrs.f clatrz.f
clatzm.f cpbcon.f cpbequ.f cpbrfs.f cpbstf.f cpbsv.f DEPRECATED/clatzm.f cpbcon.f cpbequ.f cpbrfs.f cpbstf.f cpbsv.f
cpbsvx.f cpbtf2.f cpbtrf.f cpbtrs.f cpocon.f cpoequ.f cporfs.f cpbsvx.f cpbtf2.f cpbtrf.f cpbtrs.f cpocon.f cpoequ.f cporfs.f
cposv.f cposvx.f cpstrf.f cpstf2.f cposv.f cposvx.f cpstrf.f cpstf2.f
cppcon.f cppequ.f cpprfs.f cppsv.f cppsvx.f cpptrf.f cpptri.f cpptrs.f cppcon.f cppequ.f cpprfs.f cppsv.f cppsvx.f cpptrf.f cpptri.f cpptrs.f
@ -165,8 +180,8 @@ set(CLASRC
ctbcon.f ctbrfs.f ctbtrs.f ctgevc.f ctgex2.f ctbcon.f ctbrfs.f ctbtrs.f ctgevc.f ctgex2.f
ctgexc.f ctgsen.f ctgsja.f ctgsna.f ctgsy2.f ctgsyl.f ctpcon.f ctgexc.f ctgsen.f ctgsja.f ctgsna.f ctgsy2.f ctgsyl.f ctpcon.f
ctprfs.f ctptri.f ctprfs.f ctptri.f
ctptrs.f ctrcon.f ctrevc.f ctrexc.f ctrrfs.f ctrsen.f ctrsna.f ctptrs.f ctrcon.f ctrevc.f ctrevc3.f ctrexc.f ctrrfs.f ctrsen.f ctrsna.f
ctrsyl.f ctrtrs.f ctzrqf.f ctzrzf.f cung2l.f cung2r.f ctrsyl.f ctrtrs.f DEPRECATED/ctzrqf.f ctzrzf.f cung2l.f cung2r.f
cungbr.f cunghr.f cungl2.f cunglq.f cungql.f cungqr.f cungr2.f cungbr.f cunghr.f cungl2.f cunglq.f cungql.f cungqr.f cungr2.f
cungrq.f cungtr.f cunm2l.f cunm2r.f cunmbr.f cunmhr.f cunml2.f cungrq.f cungtr.f cunm2l.f cunm2r.f cunmbr.f cunmhr.f cunml2.f
cunmlq.f cunmql.f cunmqr.f cunmr2.f cunmr3.f cunmrq.f cunmrz.f cunmlq.f cunmql.f cunmqr.f cunmr2.f cunmr3.f cunmrq.f cunmrz.f
@ -178,6 +193,14 @@ set(CLASRC
cunbdb5.f cunbdb6.f cuncsd.f cuncsd2by1.f cunbdb5.f cunbdb6.f cuncsd.f cuncsd2by1.f
cgeqrt.f cgeqrt2.f cgeqrt3.f cgemqrt.f cgeqrt.f cgeqrt2.f cgeqrt3.f cgemqrt.f
ctpqrt.f ctpqrt2.f ctpmqrt.f ctprfb.f cpotri.f ctpqrt.f ctpqrt2.f ctpmqrt.f ctprfb.f cpotri.f
cgelq.f cgelqt.f cgelqt3.f cgemlq.f cgemlqt.f cgemqr.f cgeqr.f cgetsls.f
clamswlq.f clamtsqr.f claswlq.f clatsqr.f ctplqt.f ctplqt2.f ctpmlqt.f
chesv_aa.f chetrf_aa.f chetrs_aa.f clahef_aa.f csytf2_rk.f clasyf_rk.f
csytrf_rk.f csytrs_3.f csycon_3.f csytri_3.f csytri_3x.f csysv_rk.f
chetf2_rk.f clahef_rk.f chetrf_rk.f chetrs_3.f checon_3.f chetri_3.f
chetri_3x.f chesv_rk.f chb2st_kernels.f chbev_2stage.f chbevd_2stage.f
chbevx_2stage.f cheev_2stage.f cheevd_2stage.f cheevr_2stage.f cheevx_2stage.f
chegv_2stage.f chetrd_2stage.f chetrd_hb2st.F chetrd_he2hb.f clarfy.f
) )
set(ZCLASRC cpotrs.f) set(ZCLASRC cpotrs.f)
@ -186,18 +209,18 @@ set(DLASRC
dgbbrd.f dgbcon.f dgbequ.f dgbrfs.f dgbsv.f dgbbrd.f dgbcon.f dgbequ.f dgbrfs.f dgbsv.f
dgbsvx.f dgbtf2.f dgbtrf.f dgbtrs.f dgebak.f dgebal.f dgebd2.f dgbsvx.f dgbtf2.f dgbtrf.f dgbtrs.f dgebak.f dgebal.f dgebd2.f
dgebrd.f dgecon.f dgeequ.f dgees.f dgeesx.f dgeev.f dgeevx.f dgebrd.f dgecon.f dgeequ.f dgees.f dgeesx.f dgeev.f dgeevx.f
dgegs.f dgegv.f dgehd2.f dgehrd.f dgelq2.f dgelqf.f DEPRECATED/dgegs.f DEPRECATED/dgegv.f dgehd2.f dgehrd.f dgelq2.f dgelqf.f
dgels.f dgelsd.f dgelss.f dgelsx.f dgelsy.f dgeql2.f dgeqlf.f dgels.f dgelsd.f dgelss.f DEPRECATED/dgelsx.f dgelsy.f dgeql2.f dgeqlf.f
dgeqp3.f dgeqpf.f dgeqr2.f dgeqr2p.f dgeqrf.f dgeqrfp.f dgerfs.f dgeqp3.f DEPRECATED/dgeqpf.f dgeqr2.f dgeqr2p.f dgeqrf.f dgeqrfp.f dgerfs.f
dgerq2.f dgerqf.f dgesc2.f dgesdd.f dgesvd.f dgesvx.f dgerq2.f dgerqf.f dgesc2.f dgesdd.f dgesvd.f dgesvdx.f dgesvx.f
dgetc2.f dgetri.f dgetc2.f dgetri.f dgetrf2.f
dggbak.f dggbal.f dgges.f dggesx.f dggev.f dggevx.f dggbak.f dggbal.f dgges.f dgges3.f dggesx.f dggev.f dggev3.f dggevx.f
dggglm.f dgghrd.f dgglse.f dggqrf.f dggglm.f dgghd3.f dgghrd.f dgglse.f dggqrf.f
dggrqf.f dggsvd.f dggsvp.f dgtcon.f dgtrfs.f dgtsv.f dggrqf.f dggsvd3.f dggsvp3.f DEPRECATED/dggsvd.f DEPRECATED/dggsvp.f dgtcon.f dgtrfs.f dgtsv.f
dgtsvx.f dgttrf.f dgttrs.f dgtts2.f dhgeqz.f dgtsvx.f dgttrf.f dgttrs.f dgtts2.f dhgeqz.f
dhsein.f dhseqr.f dlabrd.f dlacon.f dlacn2.f dhsein.f dhseqr.f dlabrd.f dlacon.f dlacn2.f
dlaein.f dlaexc.f dlag2.f dlags2.f dlagtm.f dlagv2.f dlahqr.f dlaein.f dlaexc.f dlag2.f dlags2.f dlagtm.f dlagv2.f dlahqr.f
dlahrd.f dlahr2.f dlaic1.f dlaln2.f dlals0.f dlalsa.f dlalsd.f DEPRECATED/dlahrd.f dlahr2.f dlaic1.f dlaln2.f dlals0.f dlalsa.f dlalsd.f
dlangb.f dlange.f dlangt.f dlanhs.f dlansb.f dlansp.f dlangb.f dlange.f dlangt.f dlanhs.f dlansb.f dlansp.f
dlansy.f dlantb.f dlantp.f dlantr.f dlanv2.f dlansy.f dlantb.f dlantp.f dlantr.f dlanv2.f
dlapll.f dlapmt.f dlapll.f dlapmt.f
@ -207,15 +230,15 @@ set(DLASRC
dlarf.f dlarfb.f dlarfg.f dlarfgp.f dlarft.f dlarfx.f dlarf.f dlarfb.f dlarfg.f dlarfgp.f dlarft.f dlarfx.f
dlargv.f dlarrv.f dlartv.f dlargv.f dlarrv.f dlartv.f
dlarz.f dlarzb.f dlarzt.f dlasy2.f dlasyf.f dlasyf_rook.f dlarz.f dlarzb.f dlarzt.f dlasy2.f dlasyf.f dlasyf_rook.f
dlatbs.f dlatdf.f dlatps.f dlatrd.f dlatrs.f dlatrz.f dlatzm.f dlatbs.f dlatdf.f dlatps.f dlatrd.f dlatrs.f dlatrz.f DEPRECATED/dlatzm.f
dopgtr.f dopmtr.f dorg2l.f dorg2r.f dopgtr.f dopmtr.f dorg2l.f dorg2r.f
dorgbr.f dorghr.f dorgl2.f dorglq.f dorgql.f dorgqr.f dorgr2.f dorgbr.f dorghr.f dorgl2.f dorglq.f dorgql.f dorgqr.f dorgr2.f
dorgrq.f dorgtr.f dorm2l.f dorm2r.f dorgrq.f dorgtr.f dorm2l.f dorm2r.f dorm22.f
dormbr.f dormhr.f dorml2.f dormlq.f dormql.f dormqr.f dormr2.f dormbr.f dormhr.f dorml2.f dormlq.f dormql.f dormqr.f dormr2.f
dormr3.f dormrq.f dormrz.f dormtr.f dpbcon.f dpbequ.f dpbrfs.f dormr3.f dormrq.f dormrz.f dormtr.f dpbcon.f dpbequ.f dpbrfs.f
dpbstf.f dpbsv.f dpbsvx.f dpbstf.f dpbsv.f dpbsvx.f
dpbtf2.f dpbtrf.f dpbtrs.f dpocon.f dpoequ.f dporfs.f dposv.f dpbtf2.f dpbtrf.f dpbtrs.f dpocon.f dpoequ.f dporfs.f dposv.f
dposvx.f dpotrs.f dpstrf.f dpstf2.f dposvx.f dpotrf2.f dpotrs.f dpstrf.f dpstf2.f
dppcon.f dppequ.f dppcon.f dppequ.f
dpprfs.f dppsv.f dppsvx.f dpptrf.f dpptri.f dpptrs.f dptcon.f dpprfs.f dppsv.f dppsvx.f dpptrf.f dpptri.f dpptrs.f dptcon.f
dpteqr.f dptrfs.f dptsv.f dptsvx.f dpttrs.f dptts2.f drscl.f dpteqr.f dptrfs.f dptsv.f dptsvx.f dpttrs.f dptts2.f drscl.f
@ -234,8 +257,8 @@ set(DLASRC
dtbcon.f dtbrfs.f dtbtrs.f dtgevc.f dtgex2.f dtgexc.f dtgsen.f dtbcon.f dtbrfs.f dtbtrs.f dtgevc.f dtgex2.f dtgexc.f dtgsen.f
dtgsja.f dtgsna.f dtgsy2.f dtgsyl.f dtpcon.f dtprfs.f dtptri.f dtgsja.f dtgsna.f dtgsy2.f dtgsyl.f dtpcon.f dtprfs.f dtptri.f
dtptrs.f dtptrs.f
dtrcon.f dtrevc.f dtrexc.f dtrrfs.f dtrsen.f dtrsna.f dtrsyl.f dtrcon.f dtrevc.f dtrevc3.f dtrexc.f dtrrfs.f dtrsen.f dtrsna.f dtrsyl.f
dtrtrs.f dtzrqf.f dtzrzf.f dstemr.f dtrtrs.f DEPRECATED/dtzrqf.f dtzrzf.f dstemr.f
dsgesv.f dsposv.f dlag2s.f slag2d.f dlat2s.f dsgesv.f dsposv.f dlag2s.f slag2d.f dlat2s.f
dlansf.f dpftrf.f dpftri.f dpftrs.f dsfrk.f dtfsm.f dtftri.f dtfttp.f dlansf.f dpftrf.f dpftri.f dpftrs.f dsfrk.f dtfsm.f dtftri.f dtfttp.f
dtfttr.f dtpttf.f dtpttr.f dtrttf.f dtrttp.f dtfttr.f dtpttf.f dtpttr.f dtrttf.f dtrttp.f
@ -245,20 +268,28 @@ set(DLASRC
dorbdb5.f dorbdb6.f dorcsd.f dorcsd2by1.f dorbdb5.f dorbdb6.f dorcsd.f dorcsd2by1.f
dgeqrt.f dgeqrt2.f dgeqrt3.f dgemqrt.f dgeqrt.f dgeqrt2.f dgeqrt3.f dgemqrt.f
dtpqrt.f dtpqrt2.f dtpmqrt.f dtprfb.f dpotri.f dtpqrt.f dtpqrt2.f dtpmqrt.f dtprfb.f dpotri.f
dgelq.f dgelqt.f dgelqt3.f dgemlq.f dgemlqt.f dgemqr.f dgeqr.f dgetsls.f
dlamswlq.f dlamtsqr.f dlaswlq.f dlatsqr.f dtplqt.f dtplqt2.f dtpmlqt.f
dsysv_aa.f dsytrf_aa.f dsytrs_aa.f dlasyf_aa.f dsytf2_rk.f dlasyf_rk.f
dsytrf_rk.f dsytrs_3.f dsycon_3.f dsytri_3.f dsytri_3x.f dsysv_rk.f
dsb2st_kernels.f dsbev_2stage.f dsbevd_2stage.f dsbevx_2stage.f
dsyev_2stage.f dsyevd_2stage.f dsyevr_2stage.f dsyevx_2stage.f
dsygv_2stage.f dsytrd_2stage.f dsytrd_sb2st.F dsytrd_sy2sb.f dlarfy.f
) )
set(ZLASRC set(ZLASRC
zbdsqr.f zgbbrd.f zgbcon.f zgbequ.f zgbrfs.f zgbsv.f zgbsvx.f zbdsqr.f zgbbrd.f zgbcon.f zgbequ.f zgbrfs.f zgbsv.f zgbsvx.f
zgbtf2.f zgbtrf.f zgbtrs.f zgebak.f zgebal.f zgebd2.f zgebrd.f zgbtf2.f zgbtrf.f zgbtrs.f zgebak.f zgebal.f zgebd2.f zgebrd.f
zgecon.f zgeequ.f zgees.f zgeesx.f zgeev.f zgeevx.f zgecon.f zgeequ.f zgees.f zgeesx.f zgeev.f zgeevx.f
zgegs.f zgegv.f zgehd2.f zgehrd.f zgelq2.f zgelqf.f DEPRECATED/zgegs.f DEPRECATED/zgegv.f zgehd2.f zgehrd.f zgejsv.f zgelq2.f zgelqf.f
zgels.f zgelsd.f zgelss.f zgelsx.f zgelsy.f zgeql2.f zgeqlf.f zgeqp3.f zgels.f zgelsd.f zgelss.f DEPRECATED/zgelsx.f zgelsy.f zgeql2.f zgeqlf.f zgeqp3.f
zgeqpf.f zgeqr2.f zgeqr2p.f zgeqrf.f zgeqrfp.f zgerfs.f zgerq2.f zgerqf.f DEPRECATED/zgeqpf.f zgeqr2.f zgeqr2p.f zgeqrf.f zgeqrfp.f zgerfs.f zgerq2.f zgerqf.f
zgesc2.f zgesdd.f zgesvd.f zgesvx.f zgetc2.f zgesc2.f zgesdd.f zgesvd.f zgesvdx.f zgesvj.f zgesvx.f zgetc2.f
zgetri.f zgetri.f zgetrf2.f
zggbak.f zggbal.f zgges.f zggesx.f zggev.f zggevx.f zggglm.f zggbak.f zggbal.f zgges.f zgges3.f zggesx.f zggev.f zggev3.f zggevx.f zggglm.f
zgghrd.f zgglse.f zggqrf.f zggrqf.f zgghd3.f zgghrd.f zgglse.f zggqrf.f zggrqf.f
zggsvd.f zggsvp.f DEPRECATED/zggsvd.f zggsvd3.f DEPRECATED/zggsvp.f zggsvp3.f
zgsvj0.f zgsvj1.f
zgtcon.f zgtrfs.f zgtsv.f zgtsvx.f zgttrf.f zgttrs.f zgtts2.f zhbev.f zgtcon.f zgtrfs.f zgtsv.f zgtsvx.f zgttrf.f zgttrs.f zgtts2.f zhbev.f
zhbevd.f zhbevx.f zhbgst.f zhbgv.f zhbgvd.f zhbgvx.f zhbtrd.f zhbevd.f zhbevx.f zhbgst.f zhbgv.f zhbgvd.f zhbgvx.f zhbtrd.f
zhecon.f zheev.f zheevd.f zheevr.f zheevx.f zhegs2.f zhegst.f zhecon.f zheev.f zheevd.f zheevr.f zheevx.f zhegs2.f zhegst.f
@ -275,7 +306,7 @@ set(ZLASRC
zlaed0.f zlaed7.f zlaed8.f zlaed0.f zlaed7.f zlaed8.f
zlaein.f zlaesy.f zlaev2.f zlags2.f zlagtm.f zlaein.f zlaesy.f zlaev2.f zlags2.f zlagtm.f
zlahef.f zlahef_rook.f zlahqr.f zlahef.f zlahef_rook.f zlahqr.f
zlahrd.f zlahr2.f zlaic1.f zlals0.f zlalsa.f zlalsd.f zlangb.f zlange.f DEPRECATED/zlahrd.f zlahr2.f zlaic1.f zlals0.f zlalsa.f zlalsd.f zlangb.f zlange.f
zlangt.f zlanhb.f zlangt.f zlanhb.f
zlanhe.f zlanhe.f
zlanhp.f zlanhs.f zlanht.f zlansb.f zlansp.f zlansy.f zlantb.f zlanhp.f zlanhs.f zlanht.f zlansb.f zlansp.f zlansy.f zlantb.f
@ -287,28 +318,28 @@ set(ZLASRC
zlarfg.f zlarft.f zlarfgp.f zlarfg.f zlarft.f zlarfgp.f
zlarfx.f zlargv.f zlarnv.f zlarrv.f zlartg.f zlartv.f zlarfx.f zlargv.f zlarnv.f zlarrv.f zlartg.f zlartv.f
zlarz.f zlarzb.f zlarzt.f zlascl.f zlaset.f zlasr.f zlarz.f zlarzb.f zlarzt.f zlascl.f zlaset.f zlasr.f
zlassq.f zlasyf.f zlasyf_rook.f zlassq.f zlasyf.f zlasyf_rook.f zlasy_aa.f
zlatbs.f zlatdf.f zlatps.f zlatrd.f zlatrs.f zlatrz.f zlatzm.f zlatbs.f zlatdf.f zlatps.f zlatrd.f zlatrs.f zlatrz.f DEPRECATED/zlatzm.f
zpbcon.f zpbequ.f zpbrfs.f zpbstf.f zpbsv.f zpbcon.f zpbequ.f zpbrfs.f zpbstf.f zpbsv.f
zpbsvx.f zpbtf2.f zpbtrf.f zpbtrs.f zpocon.f zpoequ.f zporfs.f zpbsvx.f zpbtf2.f zpbtrf.f zpbtrs.f zpocon.f zpoequ.f zporfs.f
zposv.f zposvx.f zpotrs.f zpstrf.f zpstf2.f zposv.f zposvx.f zpotrf2.f zpotrs.f zpstrf.f zpstf2.f
zppcon.f zppequ.f zpprfs.f zppsv.f zppsvx.f zpptrf.f zpptri.f zpptrs.f zppcon.f zppequ.f zpprfs.f zppsv.f zppsvx.f zpptrf.f zpptri.f zpptrs.f
zptcon.f zpteqr.f zptrfs.f zptsv.f zptsvx.f zpttrf.f zpttrs.f zptts2.f zptcon.f zpteqr.f zptrfs.f zptsv.f zptsvx.f zpttrf.f zpttrs.f zptts2.f
zrot.f zspcon.f zsprfs.f zspsv.f zrot.f zspcon.f zsprfs.f zspsv.f
zspsvx.f zsptrf.f zsptri.f zsptrs.f zdrscl.f zstedc.f zspsvx.f zsptrf.f zsptri.f zsptrs.f zdrscl.f zstedc.f
zstegr.f zstein.f zsteqr.f zstegr.f zstein.f zsteqr.f
zsycon.f zsycon.f zsysv_aa.f
zsyrfs.f zsysv.f zsysvx.f zsytf2.f zsytrf.f zsytri.f zsytri2.f zsytri2x.f zsyrfs.f zsysv.f zsysvx.f zsytf2.f zsytrf.f zsytri.f zsytri2.f zsytri2x.f
zsyswapr.f zsytrs.f zsytrs2.f zsyconv.f zsyswapr.f zsytrs.f zsytrs_aa.f zsytrs2.f zsyconv.f
zsytf2_rook.f zsytrf_rook.f zsytrs_rook.f zsytf2_rook.f zsytrf_rook.f zsytrs_rook.f
zsytri_rook.f zsycon_rook.f zsysv_rook.f zsytri_rook.f zsycon_rook.f zsysv_rook.f
ztbcon.f ztbrfs.f ztbtrs.f ztgevc.f ztgex2.f ztbcon.f ztbrfs.f ztbtrs.f ztgevc.f ztgex2.f
ztgexc.f ztgsen.f ztgsja.f ztgsna.f ztgsy2.f ztgsyl.f ztpcon.f ztgexc.f ztgsen.f ztgsja.f ztgsna.f ztgsy2.f ztgsyl.f ztpcon.f
ztprfs.f ztptri.f ztprfs.f ztptri.f
ztptrs.f ztrcon.f ztrevc.f ztrexc.f ztrrfs.f ztrsen.f ztrsna.f ztptrs.f ztrcon.f ztrevc.f ztrevc3.f ztrexc.f ztrrfs.f ztrsen.f ztrsna.f
ztrsyl.f ztrtrs.f ztzrqf.f ztzrzf.f zung2l.f ztrsyl.f ztrtrs.f DEPRECATED/ztzrqf.f ztzrzf.f zung2l.f
zung2r.f zungbr.f zunghr.f zungl2.f zunglq.f zungql.f zungqr.f zungr2.f zung2r.f zungbr.f zunghr.f zungl2.f zunglq.f zungql.f zungqr.f zungr2.f
zungrq.f zungtr.f zunm2l.f zunm2r.f zunmbr.f zunmhr.f zunml2.f zungrq.f zungtr.f zunm2l.f zunm2r.f zunmbr.f zunmhr.f zunm22.f zunml2.f
zunmlq.f zunmql.f zunmqr.f zunmr2.f zunmr3.f zunmrq.f zunmrz.f zunmlq.f zunmql.f zunmqr.f zunmr2.f zunmr3.f zunmrq.f zunmrz.f
zunmtr.f zupgtr.f zunmtr.f zupgtr.f
zupmtr.f izmax1.f dzsum1.f zstemr.f zupmtr.f izmax1.f dzsum1.f zstemr.f
@ -320,6 +351,15 @@ set(ZLASRC
zunbdb5.f zunbdb6.f zuncsd.f zuncsd2by1.f zunbdb5.f zunbdb6.f zuncsd.f zuncsd2by1.f
zgeqrt.f zgeqrt2.f zgeqrt3.f zgemqrt.f zgeqrt.f zgeqrt2.f zgeqrt3.f zgemqrt.f
ztpqrt.f ztpqrt2.f ztpmqrt.f ztprfb.f zpotri.f ztpqrt.f ztpqrt2.f ztpmqrt.f ztprfb.f zpotri.f
zgelq.f zgelqt.f zgelqt3.f zgemlq.f zgemlqt.f zgemqr.f zgeqr.f zgetsls.f
zlamswlq.f zlamtsqr.f zlaswlq.f zlatsqr.f ztplqt.f ztplqt2.f ztpmlqt.f
zhesv_aa.f zhetrf_aa.f zhetrs_aa.f zlahef_aa.f zsytf2_rk.f zlasyf_rk.f
zsytrf_aa.f zsytrf_rk.f zsytrs_3.f zsycon_3.f zsytri_3.f zsytri_3x.f zsysv_rk.f
zhetf2_rk.f zlahef_rk.f zhetrf_rk.f zhetrs_3.f zhecon_3.f zhetri_3.f
zhetri_3x.f zhesv_rk.f zhb2st_kernels.f zhbev_2stage.f zhbevd_2stage.f
zhbevx_2stage.f zheev_2stage.f zheevd_2stage.f zheevr_2stage.f
zheevx_2stage.f zhegv_2stage.f zhetrd_2stage.f zhetrd_hb2st.F zhetrd_he2hb.f
zlarfy.f
) )
set(LA_REL_SRC ${ALLAUX}) set(LA_REL_SRC ${ALLAUX})

File diff suppressed because it is too large Load Diff

10
cmake/openblas.pc.in Normal file
View File

@ -0,0 +1,10 @@
prefix=@prefix@
libdir=@libdir@
includedir=@includedir@
Name: OpenBLAS
Description: OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version
Version: @OPENBLAS_VERSION@
URL: https://github.com/xianyi/OpenBLAS
Libs: -L${libdir} -lopenblas
Cflags: -I${includedir}

View File

@ -50,20 +50,20 @@ else()
set(TARGET_CONF "config.h") set(TARGET_CONF "config.h")
endif () endif ()
include("${CMAKE_SOURCE_DIR}/cmake/c_check.cmake") include("${PROJECT_SOURCE_DIR}/cmake/c_check.cmake")
if (NOT NOFORTRAN) if (NOT NOFORTRAN)
include("${CMAKE_SOURCE_DIR}/cmake/f_check.cmake") include("${PROJECT_SOURCE_DIR}/cmake/f_check.cmake")
endif () endif ()
# compile getarch # compile getarch
set(GETARCH_SRC set(GETARCH_SRC
${CMAKE_SOURCE_DIR}/getarch.c ${PROJECT_SOURCE_DIR}/getarch.c
${CPUIDEMO} ${CPUIDEMO}
) )
if (NOT MSVC) if (NOT MSVC)
list(APPEND GETARCH_SRC ${CMAKE_SOURCE_DIR}/cpuid.S) list(APPEND GETARCH_SRC ${PROJECT_SOURCE_DIR}/cpuid.S)
endif () endif ()
if (MSVC) if (MSVC)
@ -76,7 +76,7 @@ set(GETARCH_BIN "getarch${CMAKE_EXECUTABLE_SUFFIX}")
file(MAKE_DIRECTORY ${GETARCH_DIR}) file(MAKE_DIRECTORY ${GETARCH_DIR})
try_compile(GETARCH_RESULT ${GETARCH_DIR} try_compile(GETARCH_RESULT ${GETARCH_DIR}
SOURCES ${GETARCH_SRC} SOURCES ${GETARCH_SRC}
COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} -I${CMAKE_SOURCE_DIR} COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} -I${PROJECT_SOURCE_DIR}
OUTPUT_VARIABLE GETARCH_LOG OUTPUT_VARIABLE GETARCH_LOG
COPY_FILE ${PROJECT_BINARY_DIR}/${GETARCH_BIN} COPY_FILE ${PROJECT_BINARY_DIR}/${GETARCH_BIN}
) )
@ -97,8 +97,8 @@ set(GETARCH2_DIR "${PROJECT_BINARY_DIR}/getarch2_build")
set(GETARCH2_BIN "getarch_2nd${CMAKE_EXECUTABLE_SUFFIX}") set(GETARCH2_BIN "getarch_2nd${CMAKE_EXECUTABLE_SUFFIX}")
file(MAKE_DIRECTORY ${GETARCH2_DIR}) file(MAKE_DIRECTORY ${GETARCH2_DIR})
try_compile(GETARCH2_RESULT ${GETARCH2_DIR} try_compile(GETARCH2_RESULT ${GETARCH2_DIR}
SOURCES ${CMAKE_SOURCE_DIR}/getarch_2nd.c SOURCES ${PROJECT_SOURCE_DIR}/getarch_2nd.c
COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} ${GETARCH2_FLAGS} -I${CMAKE_SOURCE_DIR} COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} ${GETARCH2_FLAGS} -I${PROJECT_SOURCE_DIR}
OUTPUT_VARIABLE GETARCH2_LOG OUTPUT_VARIABLE GETARCH2_LOG
COPY_FILE ${PROJECT_BINARY_DIR}/${GETARCH2_BIN} COPY_FILE ${PROJECT_BINARY_DIR}/${GETARCH2_BIN}
) )

View File

@ -3,7 +3,7 @@
## Description: Ported from OpenBLAS/Makefile.system ## Description: Ported from OpenBLAS/Makefile.system
## ##
set(NETLIB_LAPACK_DIR "${CMAKE_SOURCE_DIR}/lapack-netlib") set(NETLIB_LAPACK_DIR "${PROJECT_SOURCE_DIR}/lapack-netlib")
# TODO: Makefile.system detects Darwin (mac) and switches to clang here -hpa # TODO: Makefile.system detects Darwin (mac) and switches to clang here -hpa
# http://stackoverflow.com/questions/714100/os-detecting-makefile # http://stackoverflow.com/questions/714100/os-detecting-makefile
@ -78,7 +78,7 @@ else ()
set(ONLY_CBLAS 0) set(ONLY_CBLAS 0)
endif () endif ()
include("${CMAKE_SOURCE_DIR}/cmake/prebuild.cmake") include("${PROJECT_SOURCE_DIR}/cmake/prebuild.cmake")
if (NOT DEFINED NUM_THREADS) if (NOT DEFINED NUM_THREADS)
set(NUM_THREADS ${NUM_CORES}) set(NUM_THREADS ${NUM_CORES})
@ -124,17 +124,17 @@ set(OBJCOPY "${CROSS_SUFFIX}objcopy")
set(OBJCONV "${CROSS_SUFFIX}objconv") set(OBJCONV "${CROSS_SUFFIX}objconv")
# OS dependent settings # OS dependent settings
include("${CMAKE_SOURCE_DIR}/cmake/os.cmake") include("${PROJECT_SOURCE_DIR}/cmake/os.cmake")
# Architecture dependent settings # Architecture dependent settings
include("${CMAKE_SOURCE_DIR}/cmake/arch.cmake") include("${PROJECT_SOURCE_DIR}/cmake/arch.cmake")
# C Compiler dependent settings # C Compiler dependent settings
include("${CMAKE_SOURCE_DIR}/cmake/cc.cmake") include("${PROJECT_SOURCE_DIR}/cmake/cc.cmake")
if (NOT NOFORTRAN) if (NOT NOFORTRAN)
# Fortran Compiler dependent settings # Fortran Compiler dependent settings
include("${CMAKE_SOURCE_DIR}/cmake/fc.cmake") include("${PROJECT_SOURCE_DIR}/cmake/fc.cmake")
endif () endif ()
if (BINARY64) if (BINARY64)
@ -247,10 +247,10 @@ if (NOT DEFINED SYMBOLSUFFIX)
set(SYMBOLSUFFIX "") set(SYMBOLSUFFIX "")
endif () endif ()
set(KERNELDIR "${CMAKE_SOURCE_DIR}/kernel/${ARCH}") set(KERNELDIR "${PROJECT_SOURCE_DIR}/kernel/${ARCH}")
# TODO: nead to convert these Makefiles # TODO: nead to convert these Makefiles
# include ${CMAKE_SOURCE_DIR}/cmake/${ARCH}.cmake # include ${PROJECT_SOURCE_DIR}/cmake/${ARCH}.cmake
if (${CORE} STREQUAL "PPC440") if (${CORE} STREQUAL "PPC440")
set(CCOMMON_OPT "${CCOMMON_OPT} -DALLOC_QALLOC") set(CCOMMON_OPT "${CCOMMON_OPT} -DALLOC_QALLOC")
@ -410,8 +410,8 @@ set(LIBDEFNAME "${LIBNAME}.${LIBSUFFIX}.def")
set(LIBEXPNAME "${LIBNAME}.${LIBSUFFIX}.exp") set(LIBEXPNAME "${LIBNAME}.${LIBSUFFIX}.exp")
set(LIBZIPNAME "${LIBNAME}.${LIBSUFFIX}.zip") set(LIBZIPNAME "${LIBNAME}.${LIBSUFFIX}.zip")
set(LIBS "${CMAKE_SOURCE_DIR}/${LIBNAME}") set(LIBS "${PROJECT_SOURCE_DIR}/${LIBNAME}")
set(LIBS_P "${CMAKE_SOURCE_DIR}/${LIBNAME_P}") set(LIBS_P "${PROJECT_SOURCE_DIR}/${LIBNAME_P}")
set(LIB_COMPONENTS BLAS) set(LIB_COMPONENTS BLAS)

View File

@ -93,7 +93,7 @@ extern "C" {
#include <sched.h> #include <sched.h>
#endif #endif
#if defined(OS_DARWIN) || defined(OS_FREEBSD) || defined(OS_NETBSD) #if defined(OS_DARWIN) || defined(OS_FREEBSD) || defined(OS_NETBSD) || defined(OS_ANDROID)
#include <sched.h> #include <sched.h>
#endif #endif
@ -332,12 +332,20 @@ typedef int blasint;
#endif #endif
#endif #endif
#ifdef POWER8
#ifndef YIELDING
#define YIELDING __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop;\n");
#endif
#endif
/*
#ifdef PILEDRIVER #ifdef PILEDRIVER
#ifndef YIELDING #ifndef YIELDING
#define YIELDING __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop;\n"); #define YIELDING __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop;\n");
#endif #endif
#endif #endif
*/
/* /*
#ifdef STEAMROLLER #ifdef STEAMROLLER
@ -396,6 +404,10 @@ please https://github.com/xianyi/OpenBLAS/issues/246
#include "common_sparc.h" #include "common_sparc.h"
#endif #endif
#ifdef ARCH_MIPS
#include "common_mips.h"
#endif
#ifdef ARCH_MIPS64 #ifdef ARCH_MIPS64
#include "common_mips64.h" #include "common_mips64.h"
#endif #endif
@ -408,10 +420,14 @@ please https://github.com/xianyi/OpenBLAS/issues/246
#include "common_arm64.h" #include "common_arm64.h"
#endif #endif
#ifdef ARCH_ZARCH
#include "common_zarch.h"
#endif
#ifndef ASSEMBLER #ifndef ASSEMBLER
#ifdef OS_WINDOWS #ifdef OS_WINDOWS
typedef char env_var_t[MAX_PATH]; typedef char env_var_t[MAX_PATH];
#define readenv(p, n) GetEnvironmentVariable((n), (p), sizeof(p)) #define readenv(p, n) GetEnvironmentVariable((LPCTSTR)(n), (LPTSTR)(p), sizeof(p))
#else #else
typedef char* env_var_t; typedef char* env_var_t;
#define readenv(p, n) ((p)=getenv(n)) #define readenv(p, n) ((p)=getenv(n))
@ -614,9 +630,14 @@ void gotoblas_profile_init(void);
void gotoblas_profile_quit(void); void gotoblas_profile_quit(void);
#ifdef USE_OPENMP #ifdef USE_OPENMP
#ifndef C_MSVC
int omp_in_parallel(void); int omp_in_parallel(void);
int omp_get_num_procs(void); int omp_get_num_procs(void);
#else #else
__declspec(dllimport) int __cdecl omp_in_parallel(void);
__declspec(dllimport) int __cdecl omp_get_num_procs(void);
#endif
#else
#ifdef __ELF__ #ifdef __ELF__
int omp_in_parallel (void) __attribute__ ((weak)); int omp_in_parallel (void) __attribute__ ((weak));
int omp_get_num_procs(void) __attribute__ ((weak)); int omp_get_num_procs(void) __attribute__ ((weak));
@ -727,6 +748,7 @@ typedef struct {
#endif #endif
#ifndef ASSEMBLER #ifndef ASSEMBLER
#include "common_stackalloc.h"
#if 0 #if 0
#include "symcopy.h" #include "symcopy.h"
#endif #endif

View File

@ -105,7 +105,6 @@ static inline int blas_quickdivide(blasint x, blasint y){
#define PROLOGUE \ #define PROLOGUE \
.arm ;\ .arm ;\
.global REALNAME ;\ .global REALNAME ;\
.func REALNAME ;\
REALNAME: REALNAME:
#define EPILOGUE #define EPILOGUE

View File

@ -43,28 +43,39 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#ifndef ASSEMBLER #ifndef ASSEMBLER
static void __inline blas_lock(volatile BLASULONG *address){ static void __inline blas_lock(volatile BLASULONG *address){
long register ret; BLASULONG ret;
do { do {
while (*address) {YIELDING;}; while (*address) {YIELDING;};
__asm__ __volatile__( __asm__ __volatile__(
"ldaxr %0, [%1] \n\t" "mov x4, #1 \n\t"
"stlxr w2, %2, [%1] \n\t" "1: \n\t"
"orr %0, %0, x2 \n\t" "ldaxr x2, [%1] \n\t"
: "=r"(ret) "cbnz x2, 1b \n\t"
: "r"(address), "r"(1l) "2: \n\t"
: "memory", "x2" "stxr w3, x4, [%1] \n\t"
"cbnz w3, 1b \n\t"
"mov %0, #0 \n\t"
: "=r"(ret), "=r"(address)
: "1"(address)
: "memory", "x2" , "x3", "x4"
); );
} while (ret); } while (ret);
MB;
} }
#define BLAS_LOCK_DEFINED #define BLAS_LOCK_DEFINED
static inline int blas_quickdivide(blasint x, blasint y){ static inline int blas_quickdivide(blasint x, blasint y){
return x / y; return x / y;
} }
@ -110,7 +121,7 @@ REALNAME:
#define HUGE_PAGESIZE ( 4 << 20) #define HUGE_PAGESIZE ( 4 << 20)
#if defined(CORTEXA57) #if defined(CORTEXA57)
#define BUFFER_SIZE (40 << 20) #define BUFFER_SIZE (20 << 20)
#else #else
#define BUFFER_SIZE (16 << 20) #define BUFFER_SIZE (16 << 20)
#endif #endif

View File

@ -70,7 +70,7 @@ extern long int syscall (long int __sysno, ...);
static inline int my_mbind(void *addr, unsigned long len, int mode, static inline int my_mbind(void *addr, unsigned long len, int mode,
unsigned long *nodemask, unsigned long maxnode, unsigned long *nodemask, unsigned long maxnode,
unsigned flags) { unsigned flags) {
#if defined (__LSB_VERSION__) #if defined (__LSB_VERSION__) || defined(ARCH_ZARCH)
// So far, LSB (Linux Standard Base) don't support syscall(). // So far, LSB (Linux Standard Base) don't support syscall().
// https://lsbbugs.linuxfoundation.org/show_bug.cgi?id=3482 // https://lsbbugs.linuxfoundation.org/show_bug.cgi?id=3482
return 0; return 0;
@ -90,7 +90,7 @@ static inline int my_mbind(void *addr, unsigned long len, int mode,
} }
static inline int my_set_mempolicy(int mode, const unsigned long *addr, unsigned long flag) { static inline int my_set_mempolicy(int mode, const unsigned long *addr, unsigned long flag) {
#if defined (__LSB_VERSION__) #if defined (__LSB_VERSION__) || defined(ARCH_ZARCH)
// So far, LSB (Linux Standard Base) don't support syscall(). // So far, LSB (Linux Standard Base) don't support syscall().
// https://lsbbugs.linuxfoundation.org/show_bug.cgi?id=3482 // https://lsbbugs.linuxfoundation.org/show_bug.cgi?id=3482
return 0; return 0;

View File

@ -2193,7 +2193,7 @@
#endif #endif
#ifndef ASSEMBLER #ifndef ASSEMBLER
#if defined(ARCH_X86) || defined(ARCH_X86_64) || defined(ARCH_IA64) || defined(ARCH_MIPS64) #if defined(ARCH_X86) || defined(ARCH_X86_64) || defined(ARCH_IA64) || defined(ARCH_MIPS64) || defined(ARCH_ARM64)
extern BLASLONG gemm_offset_a; extern BLASLONG gemm_offset_a;
extern BLASLONG gemm_offset_b; extern BLASLONG gemm_offset_b;
extern BLASLONG sgemm_p; extern BLASLONG sgemm_p;

108
common_mips.h Normal file
View File

@ -0,0 +1,108 @@
/*****************************************************************************
Copyright (c) 2016, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written
permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************************/
#ifndef COMMON_MIPS
#define COMMON_MIPS
#define MB
#define WMB
#define INLINE inline
#define RETURN_BY_COMPLEX
#ifndef ASSEMBLER
static void INLINE blas_lock(volatile unsigned long *address){
}
#define BLAS_LOCK_DEFINED
static inline unsigned int rpcc(void){
unsigned long ret;
__asm__ __volatile__(".set push \n"
"rdhwr %0, $30 \n"
".set pop" : "=r"(ret) : : "memory");
return ret;
}
#define RPCC_DEFINED
static inline int blas_quickdivide(blasint x, blasint y){
return x / y;
}
#define GET_IMAGE(res)
#define GET_IMAGE_CANCEL
#endif
#ifndef F_INTERFACE
#define REALNAME ASMNAME
#else
#define REALNAME ASMFNAME
#endif
#if defined(ASSEMBLER) && !defined(NEEDPARAM)
#define PROLOGUE \
.arm ;\
.global REALNAME ;\
REALNAME:
#define EPILOGUE
#define PROFCODE
#endif
#define SEEK_ADDRESS
#ifndef PAGESIZE
#define PAGESIZE ( 4 << 10)
#endif
#define HUGE_PAGESIZE ( 4 << 20)
#define BUFFER_SIZE (16 << 20)
#define BASE_ADDRESS (START_ADDRESS - BUFFER_SIZE * MAX_CPU_NUMBER)
#ifndef MAP_ANONYMOUS
#define MAP_ANONYMOUS MAP_ANON
#endif
#endif

View File

@ -102,7 +102,7 @@ static void INLINE blas_lock(volatile unsigned long *address){
static inline unsigned int rpcc(void){ static inline unsigned int rpcc(void){
unsigned long ret; unsigned long ret;
#if defined(LOONGSON3A) || defined(LOONGSON3B)
// unsigned long long tmp; // unsigned long long tmp;
//__asm__ __volatile__("dmfc0 %0, $25, 1": "=r"(tmp):: "memory"); //__asm__ __volatile__("dmfc0 %0, $25, 1": "=r"(tmp):: "memory");
//ret=tmp; //ret=tmp;
@ -111,17 +111,10 @@ static inline unsigned int rpcc(void){
"rdhwr %0, $2\n" "rdhwr %0, $2\n"
".set pop": "=r"(ret):: "memory"); ".set pop": "=r"(ret):: "memory");
#else
__asm__ __volatile__(".set push \n"
".set mips32r2\n"
"rdhwr %0, $30 \n"
".set pop" : "=r"(ret) : : "memory");
#endif
return ret; return ret;
} }
#define RPCC_DEFINED #define RPCC_DEFINED
#if defined(LOONGSON3A) || defined(LOONGSON3B)
#ifndef NO_AFFINITY #ifndef NO_AFFINITY
#define WHEREAMI #define WHEREAMI
static inline int WhereAmI(void){ static inline int WhereAmI(void){
@ -134,7 +127,6 @@ static inline int WhereAmI(void){
} }
#endif #endif
#endif
static inline int blas_quickdivide(blasint x, blasint y){ static inline int blas_quickdivide(blasint x, blasint y){
return x / y; return x / y;

View File

@ -39,8 +39,13 @@
#ifndef COMMON_POWER #ifndef COMMON_POWER
#define COMMON_POWER #define COMMON_POWER
#if defined(POWER8)
#define MB __asm__ __volatile__ ("eieio":::"memory")
#define WMB __asm__ __volatile__ ("eieio":::"memory")
#else
#define MB __asm__ __volatile__ ("sync") #define MB __asm__ __volatile__ ("sync")
#define WMB __asm__ __volatile__ ("sync") #define WMB __asm__ __volatile__ ("sync")
#endif
#define INLINE inline #define INLINE inline
@ -236,7 +241,7 @@ static inline int blas_quickdivide(blasint x, blasint y){
#define HAVE_PREFETCH #define HAVE_PREFETCH
#endif #endif
#if defined(POWER3) || defined(POWER6) || defined(PPCG4) || defined(CELL) #if defined(POWER3) || defined(POWER6) || defined(PPCG4) || defined(CELL) || defined(POWER8)
#define DCBT_ARG 0 #define DCBT_ARG 0
#else #else
#define DCBT_ARG 8 #define DCBT_ARG 8
@ -258,6 +263,13 @@ static inline int blas_quickdivide(blasint x, blasint y){
#define L1_PREFETCH dcbtst #define L1_PREFETCH dcbtst
#endif #endif
#if defined(POWER8)
#define L1_DUALFETCH
#define L1_PREFETCHSIZE (16 + 128 * 100)
#define L1_PREFETCH dcbtst
#endif
#
#ifndef L1_PREFETCH #ifndef L1_PREFETCH
#define L1_PREFETCH dcbt #define L1_PREFETCH dcbt
#endif #endif
@ -790,6 +802,8 @@ Lmcount$lazy_ptr:
#define BUFFER_SIZE ( 2 << 20) #define BUFFER_SIZE ( 2 << 20)
#elif defined(PPC440FP2) #elif defined(PPC440FP2)
#define BUFFER_SIZE ( 16 << 20) #define BUFFER_SIZE ( 16 << 20)
#elif defined(POWER8)
#define BUFFER_SIZE ( 64 << 20)
#else #else
#define BUFFER_SIZE ( 16 << 20) #define BUFFER_SIZE ( 16 << 20)
#endif #endif

73
common_stackalloc.h Normal file
View File

@ -0,0 +1,73 @@
/*******************************************************************************
Copyright (c) 2016, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*******************************************************************************/
#define STACK_ALLOC_PROTECT
#ifdef STACK_ALLOC_PROTECT
// Try to detect stack smashing
#include <assert.h>
#define STACK_ALLOC_PROTECT_SET volatile int stack_check = 0x7fc01234;
#define STACK_ALLOC_PROTECT_CHECK assert(stack_check == 0x7fc01234);
#else
#define STACK_ALLOC_PROTECT_SET
#define STACK_ALLOC_PROTECT_CHECK
#endif
#if defined(MAX_STACK_ALLOC) && MAX_STACK_ALLOC > 0
/*
* Allocate a buffer on the stack if the size is smaller than MAX_STACK_ALLOC.
* Stack allocation is much faster than blas_memory_alloc or malloc, particularly
* when OpenBLAS is used from a multi-threaded application.
* SIZE must be carefully chosen to be:
* - as small as possible to maximize the number of stack allocation
* - large enough to support all architectures and kernel
* Chosing a too small SIZE will lead to a stack smashing.
*/
#define STACK_ALLOC(SIZE, TYPE, BUFFER) \
/* make it volatile because some function (ex: dgemv_n.S) */ \
/* do not restore all register */ \
volatile int stack_alloc_size = SIZE; \
if(stack_alloc_size > MAX_STACK_ALLOC / sizeof(TYPE)) \
stack_alloc_size = 0; \
STACK_ALLOC_PROTECT_SET \
TYPE stack_buffer[stack_alloc_size] __attribute__((aligned(0x20))); \
BUFFER = stack_alloc_size ? stack_buffer : (TYPE *)blas_memory_alloc(1);
#else
//Original OpenBLAS/GotoBLAS codes.
#define STACK_ALLOC(SIZE, TYPE, BUFFER) BUFFER = (TYPE *)blas_memory_alloc(1)
#endif
#if defined(MAX_STACK_ALLOC) && MAX_STACK_ALLOC > 0
#define STACK_FREE(BUFFER) \
STACK_ALLOC_PROTECT_CHECK \
if(!stack_alloc_size) \
blas_memory_free(BUFFER);
#else
#define STACK_FREE(BUFFER) blas_memory_free(BUFFER)
#endif

View File

@ -41,6 +41,10 @@
#ifndef ASSEMBLER #ifndef ASSEMBLER
#ifdef C_MSVC
#include <intrin.h>
#endif
#define MB #define MB
#define WMB #define WMB
@ -58,7 +62,7 @@ static void __inline blas_lock(volatile BLASULONG *address){
#if defined(_MSC_VER) && !defined(__clang__) #if defined(_MSC_VER) && !defined(__clang__)
// use intrinsic instead of inline assembly // use intrinsic instead of inline assembly
ret = _InterlockedExchange(address, 1); ret = _InterlockedExchange((volatile LONG *)address, 1);
// inline assembly // inline assembly
/*__asm { /*__asm {
mov eax, address mov eax, address
@ -170,12 +174,13 @@ static __inline int blas_quickdivide(unsigned int x, unsigned int y){
if (y <= 1) return x; if (y <= 1) return x;
#if defined(_MSC_VER) && !defined(__clang__)
result = x/y;
return result;
#else
y = blas_quick_divide_table[y]; y = blas_quick_divide_table[y];
#if defined(_MSC_VER) && !defined(__clang__)
(void*)result;
return x*y;
#else
__asm__ __volatile__ ("mull %0" :"=d" (result) :"a"(x), "0" (y)); __asm__ __volatile__ ("mull %0" :"=d" (result) :"a"(x), "0" (y));
return result; return result;

View File

@ -396,7 +396,7 @@ REALNAME:
#define PROFCODE #define PROFCODE
#define EPILOGUE .end REALNAME #define EPILOGUE .end
#endif #endif
#if defined(OS_LINUX) || defined(OS_FREEBSD) || defined(OS_NETBSD) || defined(__ELF__) || defined(C_PGI) #if defined(OS_LINUX) || defined(OS_FREEBSD) || defined(OS_NETBSD) || defined(__ELF__) || defined(C_PGI)

140
common_zarch.h Normal file
View File

@ -0,0 +1,140 @@
/*****************************************************************************
Copyright (c) 2011-2016, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written
permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************************/
#ifndef COMMON_ZARCH
#define COMMON_ZARCH
#define MB
//__asm__ __volatile__ ("dmb ish" : : : "memory")
#define WMB
//__asm__ __volatile__ ("dmb ishst" : : : "memory")
#define INLINE inline
#define RETURN_BY_COMPLEX
#ifndef ASSEMBLER
/*
static void __inline blas_lock(volatile BLASULONG *address){
BLASULONG ret;
do {
while (*address) {YIELDING;};
__asm__ __volatile__(
"mov x4, #1 \n\t"
"1: \n\t"
"ldaxr x2, [%1] \n\t"
"cbnz x2, 1b \n\t"
"2: \n\t"
"stxr w3, x4, [%1] \n\t"
"cbnz w3, 1b \n\t"
"mov %0, #0 \n\t"
: "=r"(ret), "=r"(address)
: "1"(address)
: "memory", "x2" , "x3", "x4"
);
} while (ret);
}
*/
//#define BLAS_LOCK_DEFINED
static inline int blas_quickdivide(blasint x, blasint y){
return x / y;
}
#if defined(DOUBLE)
#define GET_IMAGE(res) __asm__ __volatile__("str d1, %0" : "=m"(res) : : "memory")
#else
#define GET_IMAGE(res) __asm__ __volatile__("str s1, %0" : "=m"(res) : : "memory")
#endif
#define GET_IMAGE_CANCEL
#endif
#ifndef F_INTERFACE
#define REALNAME ASMNAME
#else
#define REALNAME ASMFNAME
#endif
#if defined(ASSEMBLER) && !defined(NEEDPARAM)
#define PROLOGUE \
.text ;\
.align 256 ;\
.global REALNAME ;\
.type REALNAME, %function ;\
REALNAME:
#define EPILOGUE
#define PROFCODE
#endif
#define SEEK_ADDRESS
#ifndef PAGESIZE
#define PAGESIZE ( 4 << 10)
#endif
#define HUGE_PAGESIZE ( 4 << 20)
#if defined(CORTEXA57)
#define BUFFER_SIZE (20 << 20)
#else
#define BUFFER_SIZE (16 << 20)
#endif
#define BASE_ADDRESS (START_ADDRESS - BUFFER_SIZE * MAX_CPU_NUMBER)
#ifndef MAP_ANONYMOUS
#define MAP_ANONYMOUS MAP_ANON
#endif
#endif

View File

@ -74,7 +74,7 @@ int get_feature(char *search)
fclose(infile); fclose(infile);
if( p == NULL ) return; if( p == NULL ) return 0;
t = strtok(p," "); t = strtok(p," ");
while( t = strtok(NULL," ")) while( t = strtok(NULL," "))
@ -115,6 +115,9 @@ int detect(void)
if (strstr(p, "0xc0f")) { if (strstr(p, "0xc0f")) {
return CPU_CORTEXA15; return CPU_CORTEXA15;
} }
if (strstr(p, "0xd07")) {
return CPU_ARMV7; //ARMV8 on 32-bit
}
} }
@ -158,6 +161,27 @@ int detect(void)
} }
p = (char *) NULL ;
infile = fopen("/proc/cpuinfo", "r");
while (fgets(buffer, sizeof(buffer), infile))
{
if ((!strncmp("CPU architecture", buffer, 16)))
{
p = strchr(buffer, ':') + 2;
break;
}
}
fclose(infile);
if(p != NULL) {
if (strstr(p, "8")) {
return CPU_ARMV7; //ARMV8 on 32-bit
}
}
#endif #endif
return CPU_UNKNOWN; return CPU_UNKNOWN;

View File

@ -30,17 +30,26 @@
#define CPU_UNKNOWN 0 #define CPU_UNKNOWN 0
#define CPU_ARMV8 1 #define CPU_ARMV8 1
#define CPU_CORTEXA57 2 #define CPU_CORTEXA57 2
#define CPU_VULCAN 3
#define CPU_THUNDERX 4
#define CPU_THUNDERX2T99 5
static char *cpuname[] = { static char *cpuname[] = {
"UNKNOWN", "UNKNOWN",
"ARMV8" , "ARMV8" ,
"CORTEXA57" "CORTEXA57",
"VULCAN",
"THUNDERX",
"THUNDERX2T99"
}; };
static char *cpuname_lower[] = { static char *cpuname_lower[] = {
"unknown", "unknown",
"armv8" , "armv8" ,
"cortexa57" "cortexa57",
"vulcan",
"thunderx",
"thunderx2t99"
}; };
int get_feature(char *search) int get_feature(char *search)
@ -85,25 +94,34 @@ int detect(void)
#ifdef linux #ifdef linux
FILE *infile; FILE *infile;
char buffer[512], *p; char buffer[512], *p, *cpu_part = NULL, *cpu_implementer = NULL;
p = (char *) NULL ; p = (char *) NULL ;
infile = fopen("/proc/cpuinfo", "r"); infile = fopen("/proc/cpuinfo", "r");
while (fgets(buffer, sizeof(buffer), infile)) while (fgets(buffer, sizeof(buffer), infile)) {
{ if ((cpu_part != NULL) && (cpu_implementer != NULL)) {
if (!strncmp("CPU part", buffer, 8))
{
p = strchr(buffer, ':') + 2;
break; break;
} }
if ((cpu_part == NULL) && !strncmp("CPU part", buffer, 8)) {
cpu_part = strchr(buffer, ':') + 2;
cpu_part = strdup(cpu_part);
} else if ((cpu_implementer == NULL) && !strncmp("CPU implementer", buffer, 15)) {
cpu_implementer = strchr(buffer, ':') + 2;
cpu_implementer = strdup(cpu_implementer);
}
} }
fclose(infile); fclose(infile);
if(p != NULL) { if(cpu_part != NULL && cpu_implementer != NULL) {
if (strstr(p, "0xd07")) { if (strstr(cpu_part, "0xd07") && strstr(cpu_implementer, "0x41"))
return CPU_CORTEXA57; return CPU_CORTEXA57;
} else if (strstr(cpu_part, "0x516") && strstr(cpu_implementer, "0x42"))
return CPU_VULCAN;
else if (strstr(cpu_part, "0x0a1") && strstr(cpu_implementer, "0x43"))
return CPU_THUNDERX;
else if (strstr(cpu_part, "0xFFF") && strstr(cpu_implementer, "0x43")) /* TODO */
return CPU_THUNDERX2T99;
} }
p = (char *) NULL ; p = (char *) NULL ;
@ -176,6 +194,28 @@ void get_cpuconfig(void)
printf("#define L2_ASSOCIATIVE 4\n"); printf("#define L2_ASSOCIATIVE 4\n");
break; break;
case CPU_VULCAN:
printf("#define VULCAN \n");
printf("#define HAVE_VFP \n");
printf("#define HAVE_VFPV3 \n");
printf("#define HAVE_NEON \n");
printf("#define HAVE_VFPV4 \n");
printf("#define L1_CODE_SIZE 32768 \n");
printf("#define L1_CODE_LINESIZE 64 \n");
printf("#define L1_CODE_ASSOCIATIVE 8 \n");
printf("#define L1_DATA_SIZE 32768 \n");
printf("#define L1_DATA_LINESIZE 64 \n");
printf("#define L1_DATA_ASSOCIATIVE 8 \n");
printf("#define L2_SIZE 262144 \n");
printf("#define L2_LINESIZE 64 \n");
printf("#define L2_ASSOCIATIVE 8 \n");
printf("#define L3_SIZE 33554432 \n");
printf("#define L3_LINESIZE 64 \n");
printf("#define L3_ASSOCIATIVE 32 \n");
printf("#define DTB_DEFAULT_ENTRIES 64 \n");
printf("#define DTB_SIZE 4096 \n");
break;
case CPU_CORTEXA57: case CPU_CORTEXA57:
printf("#define CORTEXA57\n"); printf("#define CORTEXA57\n");
printf("#define HAVE_VFP\n"); printf("#define HAVE_VFP\n");
@ -191,6 +231,42 @@ void get_cpuconfig(void)
printf("#define L2_SIZE 2097152\n"); printf("#define L2_SIZE 2097152\n");
printf("#define L2_LINESIZE 64\n"); printf("#define L2_LINESIZE 64\n");
printf("#define L2_ASSOCIATIVE 16\n"); printf("#define L2_ASSOCIATIVE 16\n");
printf("#define DTB_DEFAULT_ENTRIES 64\n");
printf("#define DTB_SIZE 4096\n");
break;
case CPU_THUNDERX:
printf("#define ARMV8\n");
printf("#define THUNDERX\n");
printf("#define L1_DATA_SIZE 32768\n");
printf("#define L1_DATA_LINESIZE 128\n");
printf("#define L2_SIZE 16777216\n");
printf("#define L2_LINESIZE 128\n");
printf("#define DTB_DEFAULT_ENTRIES 64\n");
printf("#define DTB_SIZE 4096\n");
printf("#define L2_ASSOCIATIVE 16\n");
break;
case CPU_THUNDERX2T99:
printf("#define VULCAN \n");
printf("#define HAVE_VFP \n");
printf("#define HAVE_VFPV3 \n");
printf("#define HAVE_NEON \n");
printf("#define HAVE_VFPV4 \n");
printf("#define L1_CODE_SIZE 32768 \n");
printf("#define L1_CODE_LINESIZE 64 \n");
printf("#define L1_CODE_ASSOCIATIVE 8 \n");
printf("#define L1_DATA_SIZE 32768 \n");
printf("#define L1_DATA_LINESIZE 64 \n");
printf("#define L1_DATA_ASSOCIATIVE 8 \n");
printf("#define L2_SIZE 262144 \n");
printf("#define L2_LINESIZE 64 \n");
printf("#define L2_ASSOCIATIVE 8 \n");
printf("#define L3_SIZE 33554432 \n");
printf("#define L3_LINESIZE 64 \n");
printf("#define L3_ASSOCIATIVE 32 \n");
printf("#define DTB_DEFAULT_ENTRIES 64 \n");
printf("#define DTB_SIZE 4096 \n");
break; break;
} }
} }

View File

@ -71,15 +71,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
/*********************************************************************/ /*********************************************************************/
#define CPU_UNKNOWN 0 #define CPU_UNKNOWN 0
#define CPU_SICORTEX 1 #define CPU_P5600 1
#define CPU_LOONGSON3A 2
#define CPU_LOONGSON3B 3
static char *cpuname[] = { static char *cpuname[] = {
"UNKOWN", "UNKOWN",
"SICORTEX", "P5600"
"LOONGSON3A",
"LOONGSON3B"
}; };
int detect(void){ int detect(void){
@ -120,7 +116,7 @@ int detect(void){
if (strstr(p, "loongson3a")) if (strstr(p, "loongson3a"))
return CPU_LOONGSON3A; return CPU_LOONGSON3A;
}else{ }else{
return CPU_SICORTEX; return CPU_UNKNOWN;
} }
} }
//Check model name for Loongson3 //Check model name for Loongson3
@ -149,64 +145,40 @@ char *get_corename(void){
} }
void get_architecture(void){ void get_architecture(void){
printf("MIPS64"); printf("MIPS");
} }
void get_subarchitecture(void){ void get_subarchitecture(void){
if(detect()==CPU_LOONGSON3A) { if(detect()==CPU_P5600){
printf("LOONGSON3A"); printf("P5600");
}else if(detect()==CPU_LOONGSON3B){
printf("LOONGSON3B");
}else{ }else{
printf("SICORTEX"); printf("UNKNOWN");
} }
} }
void get_subdirname(void){ void get_subdirname(void){
printf("mips64"); printf("mips");
} }
void get_cpuconfig(void){ void get_cpuconfig(void){
if(detect()==CPU_LOONGSON3A) { if(detect()==CPU_P5600){
printf("#define LOONGSON3A\n"); printf("#define P5600\n");
printf("#define L1_DATA_SIZE 65536\n"); printf("#define L1_DATA_SIZE 65536\n");
printf("#define L1_DATA_LINESIZE 32\n"); printf("#define L1_DATA_LINESIZE 32\n");
printf("#define L2_SIZE 512488\n"); printf("#define L2_SIZE 1048576\n");
printf("#define L2_LINESIZE 32\n"); printf("#define L2_LINESIZE 32\n");
printf("#define DTB_DEFAULT_ENTRIES 64\n"); printf("#define DTB_DEFAULT_ENTRIES 64\n");
printf("#define DTB_SIZE 4096\n"); printf("#define DTB_SIZE 4096\n");
printf("#define L2_ASSOCIATIVE 4\n");
}else if(detect()==CPU_LOONGSON3B){
printf("#define LOONGSON3B\n");
printf("#define L1_DATA_SIZE 65536\n");
printf("#define L1_DATA_LINESIZE 32\n");
printf("#define L2_SIZE 512488\n");
printf("#define L2_LINESIZE 32\n");
printf("#define DTB_DEFAULT_ENTRIES 64\n");
printf("#define DTB_SIZE 4096\n");
printf("#define L2_ASSOCIATIVE 4\n");
}else{
printf("#define SICORTEX\n");
printf("#define L1_DATA_SIZE 32768\n");
printf("#define L1_DATA_LINESIZE 32\n");
printf("#define L2_SIZE 512488\n");
printf("#define L2_LINESIZE 32\n");
printf("#define DTB_DEFAULT_ENTRIES 32\n");
printf("#define DTB_SIZE 4096\n");
printf("#define L2_ASSOCIATIVE 8\n"); printf("#define L2_ASSOCIATIVE 8\n");
}else{
printf("#define UNKNOWN\n");
} }
} }
void get_libname(void){ void get_libname(void){
if(detect()==CPU_LOONGSON3A) { if(detect()==CPU_P5600) {
printf("loongson3a\n"); printf("p5600\n");
}else if(detect()==CPU_LOONGSON3B) {
printf("loongson3b\n");
}else{ }else{
#ifdef __mips64 printf("mips\n");
printf("mips64\n");
#else
printf("mips32\n");
#endif
} }
} }

238
cpuid_mips64.c Normal file
View File

@ -0,0 +1,238 @@
/*****************************************************************************
Copyright (c) 2011-2014, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written
permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************************/
/*********************************************************************/
/* Copyright 2009, 2010 The University of Texas at Austin. */
/* All rights reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the following */
/* conditions are met: */
/* */
/* 1. Redistributions of source code must retain the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer. */
/* */
/* 2. Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
/* POSSIBILITY OF SUCH DAMAGE. */
/* */
/* The views and conclusions contained in the software and */
/* documentation are those of the authors and should not be */
/* interpreted as representing official policies, either expressed */
/* or implied, of The University of Texas at Austin. */
/*********************************************************************/
#define CPU_UNKNOWN 0
#define CPU_SICORTEX 1
#define CPU_LOONGSON3A 2
#define CPU_LOONGSON3B 3
#define CPU_I6400 4
#define CPU_P6600 5
static char *cpuname[] = {
"UNKOWN",
"SICORTEX",
"LOONGSON3A",
"LOONGSON3B",
"I6400",
"P6600"
};
int detect(void){
#ifdef linux
FILE *infile;
char buffer[512], *p;
p = (char *)NULL;
infile = fopen("/proc/cpuinfo", "r");
while (fgets(buffer, sizeof(buffer), infile)){
if (!strncmp("cpu", buffer, 3)){
p = strchr(buffer, ':') + 2;
#if 0
fprintf(stderr, "%s\n", p);
#endif
break;
}
}
fclose(infile);
if(p != NULL){
if (strstr(p, "Loongson-3A")){
return CPU_LOONGSON3A;
}else if(strstr(p, "Loongson-3B")){
return CPU_LOONGSON3B;
}else if (strstr(p, "Loongson-3")){
infile = fopen("/proc/cpuinfo", "r");
p = (char *)NULL;
while (fgets(buffer, sizeof(buffer), infile)){
if (!strncmp("system type", buffer, 11)){
p = strchr(buffer, ':') + 2;
break;
}
}
fclose(infile);
if (strstr(p, "loongson3a"))
return CPU_LOONGSON3A;
}else{
return CPU_SICORTEX;
}
}
//Check model name for Loongson3
infile = fopen("/proc/cpuinfo", "r");
p = (char *)NULL;
while (fgets(buffer, sizeof(buffer), infile)){
if (!strncmp("model name", buffer, 10)){
p = strchr(buffer, ':') + 2;
break;
}
}
fclose(infile);
if(p != NULL){
if (strstr(p, "Loongson-3A")){
return CPU_LOONGSON3A;
}else if(strstr(p, "Loongson-3B")){
return CPU_LOONGSON3B;
}
}
#endif
return CPU_UNKNOWN;
}
char *get_corename(void){
return cpuname[detect()];
}
void get_architecture(void){
printf("MIPS64");
}
void get_subarchitecture(void){
if(detect()==CPU_LOONGSON3A) {
printf("LOONGSON3A");
}else if(detect()==CPU_LOONGSON3B){
printf("LOONGSON3B");
}else if(detect()==CPU_I6400){
printf("I6400");
}else if(detect()==CPU_P6600){
printf("P6600");
}else{
printf("SICORTEX");
}
}
void get_subdirname(void){
printf("mips64");
}
void get_cpuconfig(void){
if(detect()==CPU_LOONGSON3A) {
printf("#define LOONGSON3A\n");
printf("#define L1_DATA_SIZE 65536\n");
printf("#define L1_DATA_LINESIZE 32\n");
printf("#define L2_SIZE 512488\n");
printf("#define L2_LINESIZE 32\n");
printf("#define DTB_DEFAULT_ENTRIES 64\n");
printf("#define DTB_SIZE 4096\n");
printf("#define L2_ASSOCIATIVE 4\n");
}else if(detect()==CPU_LOONGSON3B){
printf("#define LOONGSON3B\n");
printf("#define L1_DATA_SIZE 65536\n");
printf("#define L1_DATA_LINESIZE 32\n");
printf("#define L2_SIZE 512488\n");
printf("#define L2_LINESIZE 32\n");
printf("#define DTB_DEFAULT_ENTRIES 64\n");
printf("#define DTB_SIZE 4096\n");
printf("#define L2_ASSOCIATIVE 4\n");
}else if(detect()==CPU_I6400){
printf("#define I6400\n");
printf("#define L1_DATA_SIZE 65536\n");
printf("#define L1_DATA_LINESIZE 32\n");
printf("#define L2_SIZE 1048576\n");
printf("#define L2_LINESIZE 32\n");
printf("#define DTB_DEFAULT_ENTRIES 64\n");
printf("#define DTB_SIZE 4096\n");
printf("#define L2_ASSOCIATIVE 8\n");
}else if(detect()==CPU_P6600){
printf("#define P6600\n");
printf("#define L1_DATA_SIZE 65536\n");
printf("#define L1_DATA_LINESIZE 32\n");
printf("#define L2_SIZE 1048576\n");
printf("#define L2_LINESIZE 32\n");
printf("#define DTB_DEFAULT_ENTRIES 64\n");
printf("#define DTB_SIZE 4096\n");
printf("#define L2_ASSOCIATIVE 8\n");
}else{
printf("#define SICORTEX\n");
printf("#define L1_DATA_SIZE 32768\n");
printf("#define L1_DATA_LINESIZE 32\n");
printf("#define L2_SIZE 512488\n");
printf("#define L2_LINESIZE 32\n");
printf("#define DTB_DEFAULT_ENTRIES 32\n");
printf("#define DTB_SIZE 4096\n");
printf("#define L2_ASSOCIATIVE 8\n");
}
}
void get_libname(void){
if(detect()==CPU_LOONGSON3A) {
printf("loongson3a\n");
}else if(detect()==CPU_LOONGSON3B) {
printf("loongson3b\n");
}else if(detect()==CPU_I6400) {
printf("i6400\n");
}else if(detect()==CPU_P6600) {
printf("p6600\n");
}else{
printf("mips64\n");
}
}

View File

@ -55,6 +55,7 @@
#define CPUTYPE_POWER6 5 #define CPUTYPE_POWER6 5
#define CPUTYPE_CELL 6 #define CPUTYPE_CELL 6
#define CPUTYPE_PPCG4 7 #define CPUTYPE_PPCG4 7
#define CPUTYPE_POWER8 8
char *cpuname[] = { char *cpuname[] = {
"UNKNOWN", "UNKNOWN",
@ -65,6 +66,7 @@ char *cpuname[] = {
"POWER6", "POWER6",
"CELL", "CELL",
"PPCG4", "PPCG4",
"POWER8"
}; };
char *lowercpuname[] = { char *lowercpuname[] = {
@ -76,6 +78,7 @@ char *lowercpuname[] = {
"power6", "power6",
"cell", "cell",
"ppcg4", "ppcg4",
"power8"
}; };
char *corename[] = { char *corename[] = {
@ -87,6 +90,7 @@ char *corename[] = {
"POWER6", "POWER6",
"CELL", "CELL",
"PPCG4", "PPCG4",
"POWER8"
}; };
int detect(void){ int detect(void){
@ -115,7 +119,7 @@ int detect(void){
if (!strncasecmp(p, "POWER5", 6)) return CPUTYPE_POWER5; if (!strncasecmp(p, "POWER5", 6)) return CPUTYPE_POWER5;
if (!strncasecmp(p, "POWER6", 6)) return CPUTYPE_POWER6; if (!strncasecmp(p, "POWER6", 6)) return CPUTYPE_POWER6;
if (!strncasecmp(p, "POWER7", 6)) return CPUTYPE_POWER6; if (!strncasecmp(p, "POWER7", 6)) return CPUTYPE_POWER6;
if (!strncasecmp(p, "POWER8", 6)) return CPUTYPE_POWER6; if (!strncasecmp(p, "POWER8", 6)) return CPUTYPE_POWER8;
if (!strncasecmp(p, "Cell", 4)) return CPUTYPE_CELL; if (!strncasecmp(p, "Cell", 4)) return CPUTYPE_CELL;
if (!strncasecmp(p, "7447", 4)) return CPUTYPE_PPCG4; if (!strncasecmp(p, "7447", 4)) return CPUTYPE_PPCG4;

View File

@ -636,6 +636,13 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){
LD1.associative = 8; LD1.associative = 8;
LD1.linesize = 64; LD1.linesize = 64;
break; break;
case 0x63 :
DTB.size = 2048;
DTB.associative = 4;
DTB.linesize = 32;
LDTB.size = 4096;
LDTB.associative= 4;
LDTB.linesize = 32;
case 0x66 : case 0x66 :
LD1.size = 8; LD1.size = 8;
LD1.associative = 4; LD1.associative = 4;
@ -667,6 +674,13 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){
LC1.size = 64; LC1.size = 64;
LC1.associative = 8; LC1.associative = 8;
break; break;
case 0x76 :
ITB.size = 2048;
ITB.associative = 0;
ITB.linesize = 8;
LITB.size = 4096;
LITB.associative= 0;
LITB.linesize = 8;
case 0x77 : case 0x77 :
LC1.size = 16; LC1.size = 16;
LC1.associative = 4; LC1.associative = 4;
@ -1110,6 +1124,9 @@ int get_cpuname(void){
break; break;
case 3: case 3:
switch (model) { switch (model) {
case 7:
// Bay Trail
return CPUTYPE_ATOM;
case 10: case 10:
case 14: case 14:
// Ivy Bridge // Ivy Bridge
@ -1172,6 +1189,11 @@ int get_cpuname(void){
#endif #endif
else else
return CPUTYPE_NEHALEM; return CPUTYPE_NEHALEM;
case 12:
// Braswell
case 13:
// Avoton
return CPUTYPE_NEHALEM;
} }
break; break;
case 5: case 5:
@ -1197,8 +1219,35 @@ int get_cpuname(void){
#endif #endif
else else
return CPUTYPE_NEHALEM; return CPUTYPE_NEHALEM;
case 7:
// Xeon Phi Knights Landing
if(support_avx())
#ifndef NO_AVX2
return CPUTYPE_HASWELL;
#else
return CPUTYPE_SANDYBRIDGE;
#endif
else
return CPUTYPE_NEHALEM;
case 12:
// Apollo Lake
return CPUTYPE_NEHALEM;
} }
break; break;
case 9:
case 8:
switch (model) {
case 14: // Kaby Lake
if(support_avx())
#ifndef NO_AVX2
return CPUTYPE_HASWELL;
#else
return CPUTYPE_SANDYBRIDGE;
#endif
else
return CPUTYPE_NEHALEM;
}
break;
} }
break; break;
case 0x7: case 0x7:
@ -1229,6 +1278,7 @@ int get_cpuname(void){
case 2: case 2:
return CPUTYPE_OPTERON; return CPUTYPE_OPTERON;
case 1: case 1:
case 3:
case 10: case 10:
return CPUTYPE_BARCELONA; return CPUTYPE_BARCELONA;
case 6: case 6:
@ -1245,6 +1295,11 @@ int get_cpuname(void){
return CPUTYPE_PILEDRIVER; return CPUTYPE_PILEDRIVER;
else else
return CPUTYPE_BARCELONA; //OS don't support AVX. return CPUTYPE_BARCELONA; //OS don't support AVX.
case 5: // New EXCAVATOR CPUS
if(support_avx())
return CPUTYPE_EXCAVATOR;
else
return CPUTYPE_BARCELONA; //OS don't support AVX.
case 0: case 0:
switch(exmodel){ switch(exmodel){
case 1: //AMD Trinity case 1: //AMD Trinity
@ -1674,6 +1729,11 @@ int get_coretype(void){
#endif #endif
else else
return CORE_NEHALEM; return CORE_NEHALEM;
case 12:
// Braswell
case 13:
// Avoton
return CORE_NEHALEM;
} }
break; break;
case 5: case 5:
@ -1699,8 +1759,32 @@ int get_coretype(void){
#endif #endif
else else
return CORE_NEHALEM; return CORE_NEHALEM;
} case 7:
// Phi Knights Landing
if(support_avx())
#ifndef NO_AVX2
return CORE_HASWELL;
#else
return CORE_SANDYBRIDGE;
#endif
else
return CORE_NEHALEM;
case 12:
// Apollo Lake
return CORE_NEHALEM;
}
break; break;
case 9:
case 8:
if (model == 14) // Kaby Lake
if(support_avx())
#ifndef NO_AVX2
return CORE_HASWELL;
#else
return CORE_SANDYBRIDGE;
#endif
else
return CORE_NEHALEM;
} }
break; break;
@ -1730,7 +1814,11 @@ int get_coretype(void){
return CORE_PILEDRIVER; return CORE_PILEDRIVER;
else else
return CORE_BARCELONA; //OS don't support AVX. return CORE_BARCELONA; //OS don't support AVX.
case 5: // New EXCAVATOR
if(support_avx())
return CORE_EXCAVATOR;
else
return CORE_BARCELONA; //OS don't support AVX.
case 0: case 0:
switch(exmodel){ switch(exmodel){
case 1: //AMD Trinity case 1: //AMD Trinity

93
cpuid_zarch.c Normal file
View File

@ -0,0 +1,93 @@
/**************************************************************************
Copyright (c) 2016, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
#include <string.h>
#define CPU_GENERIC 0
#define CPU_Z13 1
static char *cpuname[] = {
"ZARCH_GENERIC",
"Z13"
};
static char *cpuname_lower[] = {
"zarch_generic",
"z13"
};
int detect(void)
{
// return CPU_GENERIC;
return CPU_Z13;
}
void get_libname(void)
{
int d = detect();
printf("%s", cpuname_lower[d]);
}
char *get_corename(void)
{
return cpuname[detect()];
}
void get_architecture(void)
{
printf("ZARCH");
}
void get_subarchitecture(void)
{
int d = detect();
printf("%s", cpuname[d]);
}
void get_subdirname(void)
{
printf("zarch");
}
void get_cpuconfig(void)
{
int d = detect();
switch (d){
case CPU_GENERIC:
printf("#define ZARCH_GENERIC\n");
printf("#define DTB_DEFAULT_ENTRIES 64\n");
break;
case CPU_Z13:
printf("#define Z13\n");
printf("#define DTB_DEFAULT_ENTRIES 64\n");
break;
}
}

View File

@ -105,12 +105,16 @@ ARCH_X86_64
ARCH_POWER ARCH_POWER
#endif #endif
#if defined(__s390x__) || defined(__zarch__)
ARCH_ZARCH
#endif
#ifdef __mips64 #ifdef __mips64
ARCH_MIPS64 ARCH_MIPS64
#endif #endif
#if defined(__mips32) || defined(__mips) #if defined(__mips32) || defined(__mips)
ARCH_MIPS32 ARCH_MIPS
#endif #endif
#ifdef __alpha #ifdef __alpha

View File

@ -1,4 +1,4 @@
include_directories(${CMAKE_SOURCE_DIR}) include_directories(${PROJECT_SOURCE_DIR})
enable_language(Fortran) enable_language(Fortran)

View File

@ -42,6 +42,7 @@ ztestl3o_3m = c_zblas3_3m.o c_z3chke_3m.o auxiliary.o c_xerbla.o constant.o
all :: all1 all2 all3 all :: all1 all2 all3
all1: xscblat1 xdcblat1 xccblat1 xzcblat1 all1: xscblat1 xdcblat1 xccblat1 xzcblat1
ifndef CROSS
ifeq ($(USE_OPENMP), 1) ifeq ($(USE_OPENMP), 1)
OMP_NUM_THREADS=2 ./xscblat1 OMP_NUM_THREADS=2 ./xscblat1
OMP_NUM_THREADS=2 ./xdcblat1 OMP_NUM_THREADS=2 ./xdcblat1
@ -53,8 +54,10 @@ else
OPENBLAS_NUM_THREADS=2 ./xccblat1 OPENBLAS_NUM_THREADS=2 ./xccblat1
OPENBLAS_NUM_THREADS=2 ./xzcblat1 OPENBLAS_NUM_THREADS=2 ./xzcblat1
endif endif
endif
all2: xscblat2 xdcblat2 xccblat2 xzcblat2 all2: xscblat2 xdcblat2 xccblat2 xzcblat2
ifndef CROSS
ifeq ($(USE_OPENMP), 1) ifeq ($(USE_OPENMP), 1)
OMP_NUM_THREADS=2 ./xscblat2 < sin2 OMP_NUM_THREADS=2 ./xscblat2 < sin2
OMP_NUM_THREADS=2 ./xdcblat2 < din2 OMP_NUM_THREADS=2 ./xdcblat2 < din2
@ -66,8 +69,10 @@ else
OPENBLAS_NUM_THREADS=2 ./xccblat2 < cin2 OPENBLAS_NUM_THREADS=2 ./xccblat2 < cin2
OPENBLAS_NUM_THREADS=2 ./xzcblat2 < zin2 OPENBLAS_NUM_THREADS=2 ./xzcblat2 < zin2
endif endif
endif
all3: xscblat3 xdcblat3 xccblat3 xzcblat3 all3: xscblat3 xdcblat3 xccblat3 xzcblat3
ifndef CROSS
ifeq ($(USE_OPENMP), 1) ifeq ($(USE_OPENMP), 1)
OMP_NUM_THREADS=2 ./xscblat3 < sin3 OMP_NUM_THREADS=2 ./xscblat3 < sin3
OMP_NUM_THREADS=2 ./xdcblat3 < din3 OMP_NUM_THREADS=2 ./xdcblat3 < din3
@ -88,6 +93,7 @@ else
OPENBLAS_NUM_THREADS=2 ./xccblat3_3m < cin3_3m OPENBLAS_NUM_THREADS=2 ./xccblat3_3m < cin3_3m
OPENBLAS_NUM_THREADS=2 ./xzcblat3_3m < zin3_3m OPENBLAS_NUM_THREADS=2 ./xzcblat3_3m < zin3_3m
endif endif
endif

View File

@ -1365,8 +1365,9 @@
* *
150 CONTINUE 150 CONTINUE
WRITE( NOUT, FMT = 9996 )SNAME WRITE( NOUT, FMT = 9996 )SNAME
CALL CPRCN3( NTRA, NC, SNAME, IORDER, SIDE, UPLO, TRANSA, DIAG, IF( TRACE )
$ M, N, ALPHA, LDA, LDB) $ CALL CPRCN3( NTRA, NC, SNAME, IORDER, SIDE, UPLO, TRANSA, DIAG,
$ M, N, ALPHA, LDA, LDB)
* *
160 CONTINUE 160 CONTINUE
RETURN RETURN

View File

@ -1365,8 +1365,9 @@
* *
150 CONTINUE 150 CONTINUE
WRITE( NOUT, FMT = 9996 )SNAME WRITE( NOUT, FMT = 9996 )SNAME
CALL CPRCN3( NTRA, NC, SNAME, IORDER, SIDE, UPLO, TRANSA, DIAG, IF( TRACE )
$ M, N, ALPHA, LDA, LDB) $ CALL CPRCN3( NTRA, NC, SNAME, IORDER, SIDE, UPLO, TRANSA, DIAG,
$ M, N, ALPHA, LDA, LDB)
* *
160 CONTINUE 160 CONTINUE
RETURN RETURN

View File

@ -1335,8 +1335,9 @@
* *
150 CONTINUE 150 CONTINUE
WRITE( NOUT, FMT = 9996 )SNAME WRITE( NOUT, FMT = 9996 )SNAME
CALL DPRCN3( NTRA, NC, SNAME, IORDER, SIDE, UPLO, TRANSA, DIAG, IF( TRACE )
$ M, N, ALPHA, LDA, LDB) $ CALL DPRCN3( NTRA, NC, SNAME, IORDER, SIDE, UPLO, TRANSA, DIAG,
$ M, N, ALPHA, LDA, LDB)
* *
160 CONTINUE 160 CONTINUE
RETURN RETURN

View File

@ -1339,8 +1339,9 @@
* *
150 CONTINUE 150 CONTINUE
WRITE( NOUT, FMT = 9996 )SNAME WRITE( NOUT, FMT = 9996 )SNAME
CALL SPRCN3( NTRA, NC, SNAME, IORDER, SIDE, UPLO, TRANSA, DIAG, IF( TRACE )
$ M, N, ALPHA, LDA, LDB) $ CALL SPRCN3( NTRA, NC, SNAME, IORDER, SIDE, UPLO, TRANSA, DIAG,
$ M, N, ALPHA, LDA, LDB)
* *
160 CONTINUE 160 CONTINUE
RETURN RETURN

View File

@ -1350,7 +1350,7 @@
* *
* Call the subroutine. * Call the subroutine.
* *
IF( SNAME( 4: 5 ).EQ.'mv' )THEN IF( SNAME( 10: 11 ).EQ.'mv' )THEN
IF( FULL )THEN IF( FULL )THEN
IF( TRACE ) IF( TRACE )
$ WRITE( NTRA, FMT = 9993 )NC, SNAME, $ WRITE( NTRA, FMT = 9993 )NC, SNAME,
@ -1376,7 +1376,7 @@
CALL CZTPMV( IORDER, UPLO, TRANS, DIAG, CALL CZTPMV( IORDER, UPLO, TRANS, DIAG,
$ N, AA, XX, INCX ) $ N, AA, XX, INCX )
END IF END IF
ELSE IF( SNAME( 4: 5 ).EQ.'sv' )THEN ELSE IF( SNAME( 10: 11 ).EQ.'sv' )THEN
IF( FULL )THEN IF( FULL )THEN
IF( TRACE ) IF( TRACE )
$ WRITE( NTRA, FMT = 9993 )NC, SNAME, $ WRITE( NTRA, FMT = 9993 )NC, SNAME,
@ -1465,7 +1465,7 @@
END IF END IF
* *
IF( .NOT.NULL )THEN IF( .NOT.NULL )THEN
IF( SNAME( 4: 5 ).EQ.'mv' )THEN IF( SNAME( 10: 11 ).EQ.'mv' )THEN
* *
* Check the result. * Check the result.
* *
@ -1473,7 +1473,7 @@
$ INCX, ZERO, Z, INCX, XT, G, $ INCX, ZERO, Z, INCX, XT, G,
$ XX, EPS, ERR, FATAL, NOUT, $ XX, EPS, ERR, FATAL, NOUT,
$ .TRUE. ) $ .TRUE. )
ELSE IF( SNAME( 4: 5 ).EQ.'sv' )THEN ELSE IF( SNAME( 10: 11 ).EQ.'sv' )THEN
* *
* Compute approximation to original vector. * Compute approximation to original vector.
* *
@ -1611,7 +1611,7 @@
* .. Common blocks .. * .. Common blocks ..
COMMON /INFOC/INFOT, NOUTC, OK COMMON /INFOC/INFOT, NOUTC, OK
* .. Executable Statements .. * .. Executable Statements ..
CONJ = SNAME( 5: 5 ).EQ.'c' CONJ = SNAME( 11: 11 ).EQ.'c'
* Define the number of arguments. * Define the number of arguments.
NARGS = 9 NARGS = 9
* *

View File

@ -1366,8 +1366,9 @@
* *
150 CONTINUE 150 CONTINUE
WRITE( NOUT, FMT = 9996 )SNAME WRITE( NOUT, FMT = 9996 )SNAME
CALL ZPRCN3( NTRA, NC, SNAME, IORDER, SIDE, UPLO, TRANSA, DIAG, IF( TRACE )
$ M, N, ALPHA, LDA, LDB) $ CALL ZPRCN3( NTRA, NC, SNAME, IORDER, SIDE, UPLO, TRANSA, DIAG,
$ M, N, ALPHA, LDA, LDB)
* *
160 CONTINUE 160 CONTINUE
RETURN RETURN

View File

@ -1366,8 +1366,9 @@
* *
150 CONTINUE 150 CONTINUE
WRITE( NOUT, FMT = 9996 )SNAME WRITE( NOUT, FMT = 9996 )SNAME
CALL ZPRCN3( NTRA, NC, SNAME, IORDER, SIDE, UPLO, TRANSA, DIAG, IF( TRACE )
$ M, N, ALPHA, LDA, LDB) $ CALL ZPRCN3( NTRA, NC, SNAME, IORDER, SIDE, UPLO, TRANSA, DIAG,
$ M, N, ALPHA, LDA, LDB)
* *
160 CONTINUE 160 CONTINUE
RETURN RETURN

View File

@ -1,7 +1,7 @@
'CBLAT2.SNAP' NAME OF SNAPSHOT OUTPUT FILE 'CBLAT2.SNAP' NAME OF SNAPSHOT OUTPUT FILE
-1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0)
F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD.
F LOGICAL FLAG, T TO STOP ON FAILURES. T LOGICAL FLAG, T TO STOP ON FAILURES.
T LOGICAL FLAG, T TO TEST ERROR EXITS. T LOGICAL FLAG, T TO TEST ERROR EXITS.
2 LOGICAL FLAG, T TO TEST ROW-MAJOR (IF FALSE COLUMN-MAJOR IS TESTED) 2 LOGICAL FLAG, T TO TEST ROW-MAJOR (IF FALSE COLUMN-MAJOR IS TESTED)
16.0 THRESHOLD VALUE OF TEST RATIO 16.0 THRESHOLD VALUE OF TEST RATIO

View File

@ -1,7 +1,7 @@
'CBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE 'CBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE
-1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0)
F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD.
F LOGICAL FLAG, T TO STOP ON FAILURES. T LOGICAL FLAG, T TO STOP ON FAILURES.
T LOGICAL FLAG, T TO TEST ERROR EXITS. T LOGICAL FLAG, T TO TEST ERROR EXITS.
2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH 2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH
16.0 THRESHOLD VALUE OF TEST RATIO 16.0 THRESHOLD VALUE OF TEST RATIO

View File

@ -1,7 +1,7 @@
'CBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE 'CBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE
-1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0)
F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD.
F LOGICAL FLAG, T TO STOP ON FAILURES. T LOGICAL FLAG, T TO STOP ON FAILURES.
T LOGICAL FLAG, T TO TEST ERROR EXITS. T LOGICAL FLAG, T TO TEST ERROR EXITS.
2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH 2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH
16.0 THRESHOLD VALUE OF TEST RATIO 16.0 THRESHOLD VALUE OF TEST RATIO

View File

@ -1,7 +1,7 @@
'DBLAT2.SNAP' NAME OF SNAPSHOT OUTPUT FILE 'DBLAT2.SNAP' NAME OF SNAPSHOT OUTPUT FILE
-1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0)
F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD.
F LOGICAL FLAG, T TO STOP ON FAILURES. T LOGICAL FLAG, T TO STOP ON FAILURES.
T LOGICAL FLAG, T TO TEST ERROR EXITS. T LOGICAL FLAG, T TO TEST ERROR EXITS.
2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH 2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH
16.0 THRESHOLD VALUE OF TEST RATIO 16.0 THRESHOLD VALUE OF TEST RATIO

View File

@ -1,7 +1,7 @@
'DBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE 'DBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE
-1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0)
F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD.
F LOGICAL FLAG, T TO STOP ON FAILURES. T LOGICAL FLAG, T TO STOP ON FAILURES.
T LOGICAL FLAG, T TO TEST ERROR EXITS. T LOGICAL FLAG, T TO TEST ERROR EXITS.
2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH 2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH
16.0 THRESHOLD VALUE OF TEST RATIO 16.0 THRESHOLD VALUE OF TEST RATIO

View File

@ -1,7 +1,7 @@
'SBLAT2.SNAP' NAME OF SNAPSHOT OUTPUT FILE 'SBLAT2.SNAP' NAME OF SNAPSHOT OUTPUT FILE
-1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0)
F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD.
F LOGICAL FLAG, T TO STOP ON FAILURES. T LOGICAL FLAG, T TO STOP ON FAILURES.
T LOGICAL FLAG, T TO TEST ERROR EXITS. T LOGICAL FLAG, T TO TEST ERROR EXITS.
2 LOGICAL FLAG, T TO TEST ROW-MAJOR (IF FALSE COLUMN-MAJOR IS TESTED) 2 LOGICAL FLAG, T TO TEST ROW-MAJOR (IF FALSE COLUMN-MAJOR IS TESTED)
16.0 THRESHOLD VALUE OF TEST RATIO 16.0 THRESHOLD VALUE OF TEST RATIO

View File

@ -1,7 +1,7 @@
'SBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE 'SBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE
-1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0)
F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD.
F LOGICAL FLAG, T TO STOP ON FAILURES. T LOGICAL FLAG, T TO STOP ON FAILURES.
T LOGICAL FLAG, T TO TEST ERROR EXITS. T LOGICAL FLAG, T TO TEST ERROR EXITS.
2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH 2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH
16.0 THRESHOLD VALUE OF TEST RATIO 16.0 THRESHOLD VALUE OF TEST RATIO

View File

@ -1,7 +1,7 @@
'ZBLAT2.SNAP' NAME OF SNAPSHOT OUTPUT FILE 'ZBLAT2.SNAP' NAME OF SNAPSHOT OUTPUT FILE
-1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0)
F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD.
F LOGICAL FLAG, T TO STOP ON FAILURES. T LOGICAL FLAG, T TO STOP ON FAILURES.
T LOGICAL FLAG, T TO TEST ERROR EXITS. T LOGICAL FLAG, T TO TEST ERROR EXITS.
2 LOGICAL FLAG, T TO TEST ROW-MAJOR (IF FALSE COLUMN-MAJOR IS TESTED) 2 LOGICAL FLAG, T TO TEST ROW-MAJOR (IF FALSE COLUMN-MAJOR IS TESTED)
16.0 THRESHOLD VALUE OF TEST RATIO 16.0 THRESHOLD VALUE OF TEST RATIO

View File

@ -1,7 +1,7 @@
'ZBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE 'ZBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE
-1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0)
F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD.
F LOGICAL FLAG, T TO STOP ON FAILURES. T LOGICAL FLAG, T TO STOP ON FAILURES.
T LOGICAL FLAG, T TO TEST ERROR EXITS. T LOGICAL FLAG, T TO TEST ERROR EXITS.
2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH 2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH
16.0 THRESHOLD VALUE OF TEST RATIO 16.0 THRESHOLD VALUE OF TEST RATIO

View File

@ -1,7 +1,7 @@
'ZBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE 'ZBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE
-1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0)
F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD.
F LOGICAL FLAG, T TO STOP ON FAILURES. T LOGICAL FLAG, T TO STOP ON FAILURES.
T LOGICAL FLAG, T TO TEST ERROR EXITS. T LOGICAL FLAG, T TO TEST ERROR EXITS.
2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH 2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH
16.0 THRESHOLD VALUE OF TEST RATIO 16.0 THRESHOLD VALUE OF TEST RATIO

View File

@ -1,5 +1,5 @@
include_directories(${CMAKE_SOURCE_DIR}) include_directories(${PROJECT_SOURCE_DIR})
# sources that need to be compiled twice, once with no flags and once with LOWER # sources that need to be compiled twice, once with no flags and once with LOWER
set(UL_SOURCES set(UL_SOURCES

View File

@ -119,7 +119,7 @@ static int trmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F
#endif #endif
x = buffer; x = buffer;
buffer += ((COMPSIZE * args -> m + 1023) & ~1023); buffer += ((COMPSIZE * args -> m + 3) & ~3);
} }
#ifndef TRANS #ifndef TRANS
@ -403,7 +403,7 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG incx, FLOAT *bu
if (num_cpu) { if (num_cpu) {
queue[0].sa = NULL; queue[0].sa = NULL;
queue[0].sb = buffer + num_cpu * (((m + 255) & ~255) + 16) * COMPSIZE; queue[0].sb = buffer + num_cpu * (((m + 3) & ~3) + 16) * COMPSIZE;
queue[num_cpu - 1].next = NULL; queue[num_cpu - 1].next = NULL;

View File

@ -56,7 +56,7 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, FLOAT *bu
if (incb != 1) { if (incb != 1) {
B = buffer; B = buffer;
gemvbuffer = (FLOAT *)(((BLASLONG)buffer + m * sizeof(FLOAT) * 2 + 4095) & ~4095); gemvbuffer = (FLOAT *)(((BLASLONG)buffer + m * sizeof(FLOAT) * 2 + 15) & ~15);
COPY_K(m, b, incb, buffer, 1); COPY_K(m, b, incb, buffer, 1);
} }

View File

@ -56,7 +56,7 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, FLOAT *bu
if (incb != 1) { if (incb != 1) {
B = buffer; B = buffer;
gemvbuffer = (FLOAT *)(((BLASLONG)buffer + m * sizeof(FLOAT) * 2 + 4095) & ~4095); gemvbuffer = (FLOAT *)(((BLASLONG)buffer + m * sizeof(FLOAT) * 2 + 15) & ~15);
COPY_K(m, b, incb, buffer, 1); COPY_K(m, b, incb, buffer, 1);
} }

View File

@ -1,4 +1,4 @@
include_directories(${CMAKE_SOURCE_DIR}) include_directories(${PROJECT_SOURCE_DIR})
# N.B. In the original makefile there was a BLOCKS define used in the compilation of these files but I don't see any evidence of it being set anywhere. -hpa # N.B. In the original makefile there was a BLOCKS define used in the compilation of these files but I don't see any evidence of it being set anywhere. -hpa
@ -48,8 +48,7 @@ foreach (float_type ${FLOAT_TYPES})
# TRANS needs to be set/unset when CONJ is set/unset, so can't use it as a combination # TRANS needs to be set/unset when CONJ is set/unset, so can't use it as a combination
GenerateCombinationObjects("zherk_k.c" "LOWER" "U" "HERK" 3 "herk_N" false ${float_type}) GenerateCombinationObjects("zherk_k.c" "LOWER" "U" "HERK" 3 "herk_N" false ${float_type})
GenerateCombinationObjects("zherk_k.c" "LOWER" "U" "HERK;TRANS;CONJ" 3 "herk_C" false ${float_type}) GenerateCombinationObjects("zherk_k.c" "LOWER" "U" "HERK;TRANS;CONJ" 3 "herk_C" false ${float_type})
GenerateCombinationObjects("zherk_k.c" "LOWER" "U" "HERK;THREADED_LEVEL3" 3 "herk_thread_N" false ${float_type})
GenerateCombinationObjects("zherk_k.c" "LOWER" "U" "HERK;THREADED_LEVEL3;TRANS;CONJ" 3 "herk_thread_C" false ${float_type})
# Need to set CONJ for trmm and trsm # Need to set CONJ for trmm and trsm
GenerateCombinationObjects("trmm_L.c" "UPPER;UNIT" "L;N" "CONJ" 0 "trmm_LR" false ${float_type}) GenerateCombinationObjects("trmm_L.c" "UPPER;UNIT" "L;N" "CONJ" 0 "trmm_LR" false ${float_type})
GenerateCombinationObjects("trmm_L.c" "UPPER;UNIT" "L;N" "TRANSA;CONJ" 0 "trmm_LC" false ${float_type}) GenerateCombinationObjects("trmm_L.c" "UPPER;UNIT" "L;N" "TRANSA;CONJ" 0 "trmm_LC" false ${float_type})
@ -72,6 +71,10 @@ foreach (float_type ${FLOAT_TYPES})
GenerateNamedObjects("zher2k_k.c" "HER2K;LOWER;TRANS;CONJ" "her2k_LC" false "" "" false ${float_type}) GenerateNamedObjects("zher2k_k.c" "HER2K;LOWER;TRANS;CONJ" "her2k_LC" false "" "" false ${float_type})
if (SMP AND NOT USE_SIMPLE_THREADED_LEVEL3) if (SMP AND NOT USE_SIMPLE_THREADED_LEVEL3)
#herk
GenerateCombinationObjects("zherk_k.c" "LOWER" "U" "HERK;THREADED_LEVEL3" 3 "herk_thread_N" false ${float_type})
GenerateCombinationObjects("zherk_k.c" "LOWER" "U" "HERK;THREADED_LEVEL3;TRANS;CONJ" 3 "herk_thread_C" false ${float_type})
#hemm #hemm
GenerateCombinationObjects("zhemm_k.c" "LOWER" "U" "NN;THREADED_LEVEL3" 0 "hemm_thread_L" false ${float_type}) GenerateCombinationObjects("zhemm_k.c" "LOWER" "U" "NN;THREADED_LEVEL3" 0 "hemm_thread_L" false ${float_type})
GenerateCombinationObjects("zhemm_k.c" "LOWER" "U" "NC;RSIDE;THREADED_LEVEL3" 0 "hemm_thread_R" false ${float_type}) GenerateCombinationObjects("zhemm_k.c" "LOWER" "U" "NC;RSIDE;THREADED_LEVEL3" 0 "hemm_thread_R" false ${float_type})
@ -96,6 +99,17 @@ foreach (float_type ${FLOAT_TYPES})
endif() endif()
endif () endif ()
endforeach () endforeach ()
# for gemm3m
if(USE_GEMM3M)
foreach (GEMM_DEFINE ${GEMM_DEFINES})
string(TOLOWER ${GEMM_DEFINE} GEMM_DEFINE_LC)
GenerateNamedObjects("gemm3m.c" "${GEMM_DEFINE}" "gemm3m_${GEMM_DEFINE_LC}" false "" "" false ${float_type})
if (SMP AND NOT USE_SIMPLE_THREADED_LEVEL3)
GenerateNamedObjects("gemm3m.c" "${GEMM_DEFINE};THREADED_LEVEL3" "gemm3m_thread_${GEMM_DEFINE_LC}" false "" "" false ${float_type})
endif ()
endforeach ()
endif()
endif () endif ()
endforeach () endforeach ()

View File

@ -316,7 +316,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
if (min_l > GEMM3M_Q) { if (min_l > GEMM3M_Q) {
min_l = (min_l + 1) / 2; min_l = (min_l + 1) / 2;
#ifdef UNROLL_X #ifdef UNROLL_X
min_l = (min_l + UNROLL_X - 1) & ~(UNROLL_X - 1); min_l = ((min_l + UNROLL_X - 1)/UNROLL_X) * UNROLL_X;
#endif #endif
} }
} }
@ -326,7 +326,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
min_i = GEMM3M_P; min_i = GEMM3M_P;
} else { } else {
if (min_i > GEMM3M_P) { if (min_i > GEMM3M_P) {
min_i = (min_i / 2 + GEMM3M_UNROLL_M - 1) & ~(GEMM3M_UNROLL_M - 1); min_i = ((min_i / 2 + GEMM3M_UNROLL_M - 1)/GEMM3M_UNROLL_M) * GEMM3M_UNROLL_M;
} }
} }
@ -365,7 +365,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
min_i = GEMM3M_P; min_i = GEMM3M_P;
} else } else
if (min_i > GEMM3M_P) { if (min_i > GEMM3M_P) {
min_i = (min_i / 2 + GEMM3M_UNROLL_M - 1) & ~(GEMM3M_UNROLL_M - 1); min_i = ((min_i / 2 + GEMM3M_UNROLL_M - 1)/GEMM3M_UNROLL_M) * GEMM3M_UNROLL_M;
} }
START_RPCC(); START_RPCC();
@ -386,7 +386,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
min_i = GEMM3M_P; min_i = GEMM3M_P;
} else { } else {
if (min_i > GEMM3M_P) { if (min_i > GEMM3M_P) {
min_i = (min_i / 2 + GEMM3M_UNROLL_M - 1) & ~(GEMM3M_UNROLL_M - 1); min_i = ((min_i / 2 + GEMM3M_UNROLL_M - 1)/GEMM3M_UNROLL_M) * GEMM3M_UNROLL_M;
} }
} }
@ -429,7 +429,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
min_i = GEMM3M_P; min_i = GEMM3M_P;
} else } else
if (min_i > GEMM3M_P) { if (min_i > GEMM3M_P) {
min_i = (min_i / 2 + GEMM3M_UNROLL_M - 1) & ~(GEMM3M_UNROLL_M - 1); min_i = ((min_i / 2 + GEMM3M_UNROLL_M - 1)/GEMM3M_UNROLL_M) * GEMM3M_UNROLL_M;
} }
START_RPCC(); START_RPCC();
@ -451,7 +451,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
min_i = GEMM3M_P; min_i = GEMM3M_P;
} else { } else {
if (min_i > GEMM3M_P) { if (min_i > GEMM3M_P) {
min_i = (min_i / 2 + GEMM3M_UNROLL_M - 1) & ~(GEMM3M_UNROLL_M - 1); min_i = ((min_i / 2 + GEMM3M_UNROLL_M - 1)/GEMM3M_UNROLL_M) * GEMM3M_UNROLL_M;
} }
} }
@ -494,7 +494,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
min_i = GEMM3M_P; min_i = GEMM3M_P;
} else } else
if (min_i > GEMM3M_P) { if (min_i > GEMM3M_P) {
min_i = (min_i / 2 + GEMM3M_UNROLL_M - 1) & ~(GEMM3M_UNROLL_M - 1); min_i = ((min_i / 2 + GEMM3M_UNROLL_M - 1)/GEMM3M_UNROLL_M) * GEMM3M_UNROLL_M;
} }
START_RPCC(); START_RPCC();

View File

@ -297,9 +297,9 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
min_l = GEMM_Q; min_l = GEMM_Q;
} else { } else {
if (min_l > GEMM_Q) { if (min_l > GEMM_Q) {
min_l = (min_l / 2 + GEMM_UNROLL_M - 1) & ~(GEMM_UNROLL_M - 1); min_l = ((min_l / 2 + GEMM_UNROLL_M - 1)/GEMM_UNROLL_M) * GEMM_UNROLL_M;
} }
gemm_p = ((l2size / min_l + GEMM_UNROLL_M - 1) & ~(GEMM_UNROLL_M - 1)); gemm_p = ((l2size / min_l + GEMM_UNROLL_M - 1)/GEMM_UNROLL_M) * GEMM_UNROLL_M;
while (gemm_p * min_l > l2size) gemm_p -= GEMM_UNROLL_M; while (gemm_p * min_l > l2size) gemm_p -= GEMM_UNROLL_M;
} }
@ -311,7 +311,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
min_i = GEMM_P; min_i = GEMM_P;
} else { } else {
if (min_i > GEMM_P) { if (min_i > GEMM_P) {
min_i = (min_i / 2 + GEMM_UNROLL_M - 1) & ~(GEMM_UNROLL_M - 1); min_i = ((min_i / 2 + GEMM_UNROLL_M - 1)/GEMM_UNROLL_M) * GEMM_UNROLL_M;
} else { } else {
l1stride = 0; l1stride = 0;
} }
@ -335,7 +335,9 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
if (min_jj >= 3*GEMM_UNROLL_N) min_jj = 3*GEMM_UNROLL_N; if (min_jj >= 3*GEMM_UNROLL_N) min_jj = 3*GEMM_UNROLL_N;
else else
if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N; if (min_jj >= 2*GEMM_UNROLL_N) min_jj = 2*GEMM_UNROLL_N;
else
if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N;
@ -367,7 +369,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
min_i = GEMM_P; min_i = GEMM_P;
} else } else
if (min_i > GEMM_P) { if (min_i > GEMM_P) {
min_i = (min_i / 2 + GEMM_UNROLL_M - 1) & ~(GEMM_UNROLL_M - 1); min_i = ((min_i / 2 + GEMM_UNROLL_M - 1)/GEMM_UNROLL_M) * GEMM_UNROLL_M;
} }
START_RPCC(); START_RPCC();

View File

@ -365,7 +365,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
buffer[0] = sb; buffer[0] = sb;
for (i = 1; i < DIVIDE_RATE; i++) { for (i = 1; i < DIVIDE_RATE; i++) {
buffer[i] = buffer[i - 1] + GEMM3M_Q * ((div_n + GEMM3M_UNROLL_N - 1) & ~(GEMM3M_UNROLL_N - 1)); buffer[i] = buffer[i - 1] + GEMM3M_Q * (((div_n + GEMM3M_UNROLL_N - 1)/GEMM3M_UNROLL_N) * GEMM3M_UNROLL_N);
} }
for(ls = 0; ls < k; ls += min_l){ for(ls = 0; ls < k; ls += min_l){
@ -384,7 +384,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
min_i = GEMM3M_P; min_i = GEMM3M_P;
} else { } else {
if (min_i > GEMM3M_P) { if (min_i > GEMM3M_P) {
min_i = (min_i / 2 + GEMM3M_UNROLL_M - 1) & ~(GEMM3M_UNROLL_M - 1); min_i = ((min_i / 2 + GEMM3M_UNROLL_M - 1)/GEMM3M_UNROLL_M) * GEMM3M_UNROLL_M;
} }
} }
@ -482,7 +482,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
min_i = GEMM3M_P; min_i = GEMM3M_P;
} else } else
if (min_i > GEMM3M_P) { if (min_i > GEMM3M_P) {
min_i = ((min_i + 1) / 2 + GEMM3M_UNROLL_M - 1) & ~(GEMM3M_UNROLL_M - 1); min_i = (((min_i + 1) / 2 + GEMM3M_UNROLL_M - 1)/GEMM3M_UNROLL_M) * GEMM3M_UNROLL_M;
} }
START_RPCC(); START_RPCC();
@ -618,7 +618,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
min_i = GEMM3M_P; min_i = GEMM3M_P;
} else } else
if (min_i > GEMM3M_P) { if (min_i > GEMM3M_P) {
min_i = ((min_i + 1) / 2 + GEMM3M_UNROLL_M - 1) & ~(GEMM3M_UNROLL_M - 1); min_i = (((min_i + 1) / 2 + GEMM3M_UNROLL_M - 1)/GEMM3M_UNROLL_M) * GEMM3M_UNROLL_M;
} }
START_RPCC(); START_RPCC();
@ -754,7 +754,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
min_i = GEMM3M_P; min_i = GEMM3M_P;
} else } else
if (min_i > GEMM3M_P) { if (min_i > GEMM3M_P) {
min_i = ((min_i + 1) / 2 + GEMM3M_UNROLL_M - 1) & ~(GEMM3M_UNROLL_M - 1); min_i = (((min_i + 1) / 2 + GEMM3M_UNROLL_M - 1)/GEMM3M_UNROLL_M) * GEMM3M_UNROLL_M;
} }
START_RPCC(); START_RPCC();

View File

@ -189,7 +189,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
min_i = GEMM_P; min_i = GEMM_P;
} else } else
if (min_i > GEMM_P) { if (min_i > GEMM_P) {
min_i = (min_i / 2 + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1); min_i = ((min_i / 2 + GEMM_UNROLL_MN - 1)/GEMM_UNROLL_MN) * GEMM_UNROLL_MN;
} }
#ifndef LOWER #ifndef LOWER
@ -230,7 +230,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
min_i = GEMM_P; min_i = GEMM_P;
} else } else
if (min_i > GEMM_P) { if (min_i > GEMM_P) {
min_i = (min_i / 2 + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1); min_i = ((min_i / 2 + GEMM_UNROLL_MN - 1)/GEMM_UNROLL_MN) * GEMM_UNROLL_MN;
} }
ICOPY_OPERATION(min_l, min_i, a, lda, ls, is, sa); ICOPY_OPERATION(min_l, min_i, a, lda, ls, is, sa);
@ -245,7 +245,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
min_i = GEMM_P; min_i = GEMM_P;
} else } else
if (min_i > GEMM_P) { if (min_i > GEMM_P) {
min_i = (min_i / 2 + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1); min_i = ((min_i / 2 + GEMM_UNROLL_MN - 1)/GEMM_UNROLL_MN) * GEMM_UNROLL_MN;
} }
if (m_start >= js) { if (m_start >= js) {
@ -284,7 +284,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
min_i = GEMM_P; min_i = GEMM_P;
} else } else
if (min_i > GEMM_P) { if (min_i > GEMM_P) {
min_i = (min_i / 2 + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1); min_i = ((min_i / 2 + GEMM_UNROLL_MN - 1)/GEMM_UNROLL_MN) * GEMM_UNROLL_MN;
} }
ICOPY_OPERATION(min_l, min_i, b, ldb, ls, is, sa); ICOPY_OPERATION(min_l, min_i, b, ldb, ls, is, sa);
@ -322,7 +322,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
min_i = GEMM_P; min_i = GEMM_P;
} else } else
if (min_i > GEMM_P) { if (min_i > GEMM_P) {
min_i = (min_i / 2 + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1); min_i = ((min_i / 2 + GEMM_UNROLL_MN - 1)/GEMM_UNROLL_MN) * GEMM_UNROLL_MN;
} }
aa = sb + min_l * (is - js) * COMPSIZE; aa = sb + min_l * (is - js) * COMPSIZE;
@ -353,7 +353,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
min_i = GEMM_P; min_i = GEMM_P;
} else } else
if (min_i > GEMM_P) { if (min_i > GEMM_P) {
min_i = (min_i / 2 + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1); min_i = ((min_i / 2 + GEMM_UNROLL_MN - 1)/GEMM_UNROLL_MN) * GEMM_UNROLL_MN;
} }
aa = sb + min_l * (m_start - js) * COMPSIZE; aa = sb + min_l * (m_start - js) * COMPSIZE;
@ -383,7 +383,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
min_i = GEMM_P; min_i = GEMM_P;
} else } else
if (min_i > GEMM_P) { if (min_i > GEMM_P) {
min_i = (min_i / 2 + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1); min_i = ((min_i / 2 + GEMM_UNROLL_MN - 1)/GEMM_UNROLL_MN) * GEMM_UNROLL_MN;
} }
aa = sb + min_l * (is - js) * COMPSIZE; aa = sb + min_l * (is - js) * COMPSIZE;

View File

@ -198,7 +198,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
min_i = GEMM_P; min_i = GEMM_P;
} else } else
if (min_i > GEMM_P) { if (min_i > GEMM_P) {
min_i = (min_i / 2 + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1); min_i = ((min_i / 2 + GEMM_UNROLL_MN - 1)/GEMM_UNROLL_MN) * GEMM_UNROLL_MN;
} }
#ifndef LOWER #ifndef LOWER
@ -239,7 +239,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
min_i = GEMM_P; min_i = GEMM_P;
} else } else
if (min_i > GEMM_P) { if (min_i > GEMM_P) {
min_i = (min_i / 2 + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1); min_i = ((min_i / 2 + GEMM_UNROLL_MN - 1)/GEMM_UNROLL_MN) * GEMM_UNROLL_MN;
} }
aa = sb + min_l * (is - js) * COMPSIZE; aa = sb + min_l * (is - js) * COMPSIZE;
@ -303,7 +303,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
min_i = GEMM_P; min_i = GEMM_P;
} else } else
if (min_i > GEMM_P) { if (min_i > GEMM_P) {
min_i = (min_i / 2 + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1); min_i = ((min_i / 2 + GEMM_UNROLL_MN - 1)/GEMM_UNROLL_MN) * GEMM_UNROLL_MN;
} }
START_RPCC(); START_RPCC();
@ -375,7 +375,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
min_i = GEMM_P; min_i = GEMM_P;
} else } else
if (min_i > GEMM_P) { if (min_i > GEMM_P) {
min_i = (min_i / 2 + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1); min_i = ((min_i / 2 + GEMM_UNROLL_MN - 1)/GEMM_UNROLL_MN) * GEMM_UNROLL_MN;
} }
if (is < js + min_j) { if (is < js + min_j) {
@ -460,7 +460,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
min_i = GEMM_P; min_i = GEMM_P;
} else } else
if (min_i > GEMM_P) { if (min_i > GEMM_P) {
min_i = (min_i / 2 + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1); min_i = ((min_i / 2 + GEMM_UNROLL_MN - 1)/GEMM_UNROLL_MN) * GEMM_UNROLL_MN;
} }
START_RPCC(); START_RPCC();

View File

@ -210,8 +210,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
fprintf(stderr, "Thread[%ld] m_from : %ld m_to : %ld n_from : %ld n_to : %ld\n", mypos, m_from, m_to, n_from, n_to); fprintf(stderr, "Thread[%ld] m_from : %ld m_to : %ld n_from : %ld n_to : %ld\n", mypos, m_from, m_to, n_from, n_to);
#endif #endif
div_n = ((m_to - m_from + DIVIDE_RATE - 1) / DIVIDE_RATE div_n = (((m_to - m_from + DIVIDE_RATE - 1) / DIVIDE_RATE + GEMM_UNROLL_MN - 1)/GEMM_UNROLL_MN) * GEMM_UNROLL_MN;
+ GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1);
buffer[0] = sb; buffer[0] = sb;
for (i = 1; i < DIVIDE_RATE; i++) { for (i = 1; i < DIVIDE_RATE; i++) {
@ -233,7 +232,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
min_i = GEMM_P; min_i = GEMM_P;
} else { } else {
if (min_i > GEMM_P) { if (min_i > GEMM_P) {
min_i = (min_i / 2 + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1); min_i = ((min_i / 2 + GEMM_UNROLL_MN - 1)/GEMM_UNROLL_MN) * GEMM_UNROLL_MN;
} }
} }
@ -253,8 +252,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
STOP_RPCC(copy_A); STOP_RPCC(copy_A);
div_n = ((m_to - m_from + DIVIDE_RATE - 1) / DIVIDE_RATE div_n = (((m_to - m_from + DIVIDE_RATE - 1) / DIVIDE_RATE + GEMM_UNROLL_MN - 1)/GEMM_UNROLL_MN) * GEMM_UNROLL_MN;
+ GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1);
for (xxx = m_from, bufferside = 0; xxx < m_to; xxx += div_n, bufferside ++) { for (xxx = m_from, bufferside = 0; xxx < m_to; xxx += div_n, bufferside ++) {
@ -353,9 +351,8 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
while (current >= 0) { while (current >= 0) {
#endif #endif
div_n = ((range_n[current + 1] - range_n[current] + DIVIDE_RATE - 1) / DIVIDE_RATE div_n = (((range_n[current + 1] - range_n[current] + DIVIDE_RATE - 1) / DIVIDE_RATE + GEMM_UNROLL_MN - 1)/GEMM_UNROLL_MN) * GEMM_UNROLL_MN;
+ GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1);
for (xxx = range_n[current], bufferside = 0; xxx < range_n[current + 1]; xxx += div_n, bufferside ++) { for (xxx = range_n[current], bufferside = 0; xxx < range_n[current + 1]; xxx += div_n, bufferside ++) {
START_RPCC(); START_RPCC();
@ -412,7 +409,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
min_i = GEMM_P; min_i = GEMM_P;
} else } else
if (min_i > GEMM_P) { if (min_i > GEMM_P) {
min_i = ((min_i + 1) / 2 + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1); min_i = (((min_i + 1) / 2 + GEMM_UNROLL_MN - 1)/GEMM_UNROLL_MN) * GEMM_UNROLL_MN;
} }
START_RPCC(); START_RPCC();
@ -425,8 +422,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
do { do {
div_n = ((range_n[current + 1] - range_n[current] + DIVIDE_RATE - 1) / DIVIDE_RATE div_n = (((range_n[current + 1] - range_n[current] + DIVIDE_RATE - 1) / DIVIDE_RATE + GEMM_UNROLL_MN - 1)/GEMM_UNROLL_MN) * GEMM_UNROLL_MN;
+ GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1);
for (xxx = range_n[current], bufferside = 0; xxx < range_n[current + 1]; xxx += div_n, bufferside ++) { for (xxx = range_n[current], bufferside = 0; xxx < range_n[current + 1]; xxx += div_n, bufferside ++) {
@ -602,9 +598,9 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
double di = (double)i; double di = (double)i;
width = (((BLASLONG)(sqrt(di * di + dnum) - di) + mask) & ~mask); width = (((BLASLONG)((sqrt(di * di + dnum) - di) + mask)/(mask+1)) * (mask+1) );
if (num_cpu == 0) width = n - ((n - width) & ~mask); if (num_cpu == 0) width = n - (((n - width)/(mask+1)) * (mask+1) );
if ((width > n - i) || (width < mask)) width = n - i; if ((width > n - i) || (width < mask)) width = n - i;
@ -644,7 +640,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
double di = (double)i; double di = (double)i;
width = (((BLASLONG)(sqrt(di * di + dnum) - di) + mask) & ~mask); width = (((BLASLONG)((sqrt(di * di + dnum) - di) + mask)/(mask+1)) * (mask+1));
if ((width > n - i) || (width < mask)) width = n - i; if ((width > n - i) || (width < mask)) width = n - i;

View File

@ -310,7 +310,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
buffer[0] = sb; buffer[0] = sb;
for (i = 1; i < DIVIDE_RATE; i++) { for (i = 1; i < DIVIDE_RATE; i++) {
buffer[i] = buffer[i - 1] + GEMM_Q * ((div_n + GEMM_UNROLL_N - 1) & ~(GEMM_UNROLL_N - 1)) * COMPSIZE; buffer[i] = buffer[i - 1] + GEMM_Q * ((div_n + GEMM_UNROLL_N - 1)/GEMM_UNROLL_N) * GEMM_UNROLL_N * COMPSIZE;
} }
@ -331,7 +331,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
min_i = GEMM_P; min_i = GEMM_P;
} else { } else {
if (min_i > GEMM_P) { if (min_i > GEMM_P) {
min_i = (min_i / 2 + GEMM_UNROLL_M - 1) & ~(GEMM_UNROLL_M - 1); min_i = ((min_i / 2 + GEMM_UNROLL_M - 1)/GEMM_UNROLL_M) * GEMM_UNROLL_M;
} else { } else {
if (args -> nthreads == 1) l1stride = 0; if (args -> nthreads == 1) l1stride = 0;
} }
@ -367,7 +367,9 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
if (min_jj >= 3*GEMM_UNROLL_N) min_jj = 3*GEMM_UNROLL_N; if (min_jj >= 3*GEMM_UNROLL_N) min_jj = 3*GEMM_UNROLL_N;
else else
if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N; if (min_jj >= 2*GEMM_UNROLL_N) min_jj = 2*GEMM_UNROLL_N;
else
if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N;
START_RPCC(); START_RPCC();
@ -441,7 +443,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
min_i = GEMM_P; min_i = GEMM_P;
} else } else
if (min_i > GEMM_P) { if (min_i > GEMM_P) {
min_i = ((min_i + 1) / 2 + GEMM_UNROLL_M - 1) & ~(GEMM_UNROLL_M - 1); min_i = (((min_i + 1) / 2 + GEMM_UNROLL_M - 1)/GEMM_UNROLL_M) * GEMM_UNROLL_M;
} }
START_RPCC(); START_RPCC();

View File

@ -158,7 +158,7 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha_r,
int mm, nn; int mm, nn;
mm = (loop & ~(GEMM_UNROLL_MN - 1)); mm = (loop/GEMM_UNROLL_MN) * GEMM_UNROLL_MN;
nn = MIN(GEMM_UNROLL_MN, n - loop); nn = MIN(GEMM_UNROLL_MN, n - loop);
#ifndef LOWER #ifndef LOWER

View File

@ -149,7 +149,7 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i,
int mm, nn; int mm, nn;
mm = (loop & ~(GEMM_UNROLL_MN - 1)); mm = (loop/GEMM_UNROLL_MN) * GEMM_UNROLL_MN;
nn = MIN(GEMM_UNROLL_MN, n - loop); nn = MIN(GEMM_UNROLL_MN, n - loop);
#ifndef LOWER #ifndef LOWER

View File

@ -132,7 +132,7 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha_r,
int mm, nn; int mm, nn;
mm = (loop & ~(GEMM_UNROLL_MN - 1)); mm = (loop/GEMM_UNROLL_MN) * GEMM_UNROLL_MN;
nn = MIN(GEMM_UNROLL_MN, n - loop); nn = MIN(GEMM_UNROLL_MN, n - loop);
#ifndef LOWER #ifndef LOWER

Some files were not shown because too many files have changed in this diff Show More