Compare commits
198 Commits
integer_da
...
v0.2.15
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
53e849f4fc | ||
|
|
8447498b50 | ||
|
|
b6519159f5 | ||
|
|
63c56d3da9 | ||
|
|
718d0f18e3 | ||
|
|
6040858b22 | ||
|
|
70642fe4ed | ||
|
|
79d4a62e10 | ||
|
|
1ac8c32f1d | ||
|
|
0b2ad98e48 | ||
|
|
69363622a8 | ||
|
|
e6d754fddc | ||
|
|
f74ff6da38 | ||
|
|
2feef49fa8 | ||
|
|
53b6023a6c | ||
|
|
309875de3c | ||
|
|
b809f99cee | ||
|
|
5a291606ad | ||
|
|
1ce054fcb3 | ||
|
|
8fade093aa | ||
|
|
96f0bbe067 | ||
|
|
d8392c1245 | ||
|
|
aca7d7e953 | ||
|
|
94b125255f | ||
|
|
3684706a12 | ||
|
|
90aa8e24b9 | ||
|
|
11ac4665c8 | ||
|
|
c666158b79 | ||
|
|
ccf581f94d | ||
|
|
e9493f69eb | ||
|
|
88bef3bffc | ||
|
|
f27942a68a | ||
|
|
0cc2b3de0b | ||
|
|
b9534bbd76 | ||
|
|
45c8b5e756 | ||
|
|
a96a4cb012 | ||
|
|
baec8f5cac | ||
|
|
d6e8459f20 | ||
|
|
dfe1eef33b | ||
|
|
cc7cab8a45 | ||
|
|
61ae47eb99 | ||
|
|
22353b1727 | ||
|
|
efffd28739 | ||
|
|
62cabef857 | ||
|
|
711ca33bc6 | ||
|
|
40a3fed6b8 | ||
|
|
2297a2d989 | ||
|
|
5408074941 | ||
|
|
bbcdf63bb4 | ||
|
|
43eabab62f | ||
|
|
50901943fd | ||
|
|
7df0820160 | ||
|
|
17ee2237c3 | ||
|
|
4b7381b7a4 | ||
|
|
abade3f896 | ||
|
|
d1349e7a11 | ||
|
|
3efeaed0d8 | ||
|
|
d38a1ddc7a | ||
|
|
6b92204a7c | ||
|
|
f2ac1a5cee | ||
|
|
e12cf1123e | ||
|
|
d3e2f0a1af | ||
|
|
c2323dd4d2 | ||
|
|
f8eba3d548 | ||
|
|
40ab5cfc50 | ||
|
|
b7a8f9ad47 | ||
|
|
f874465bb8 | ||
|
|
bb6e050509 | ||
|
|
87336b9acf | ||
|
|
19664f3ef4 | ||
|
|
c50661e5b7 | ||
|
|
b8d64a856a | ||
|
|
898fc7552a | ||
|
|
ab0a0a75fc | ||
|
|
1cf2b10224 | ||
|
|
7ac7e147d4 | ||
|
|
7ba4fe5afb | ||
|
|
a55377e9a4 | ||
|
|
dcd5ba4443 | ||
|
|
d0c51c4de9 | ||
|
|
1d183dcda8 | ||
|
|
e19bf3a28b | ||
|
|
3649cfbd7b | ||
|
|
5ae8993752 | ||
|
|
84d90d6ed8 | ||
|
|
518e2424a8 | ||
|
|
00e373aea6 | ||
|
|
9eaea02f33 | ||
|
|
ab7043373f | ||
|
|
504cdb10ed | ||
|
|
a8002b0c5f | ||
|
|
0553476fba | ||
|
|
2416d9dbac | ||
|
|
0d8e227ea7 | ||
|
|
12d1fb2e40 | ||
|
|
1b7f427401 | ||
|
|
b2284647a3 | ||
|
|
a6116e5859 | ||
|
|
fb5d5bb971 | ||
|
|
371071d461 | ||
|
|
8a143516e3 | ||
|
|
e5897ecb9b | ||
|
|
714638c187 | ||
|
|
e27c372e53 | ||
|
|
f3f2b3d768 | ||
|
|
9492298048 | ||
|
|
43725b82c5 | ||
|
|
14fd3d35de | ||
|
|
cebc07cebd | ||
|
|
33c5e8db7f | ||
|
|
67e39bd8fb | ||
|
|
9eb1499095 | ||
|
|
4662a0b13a | ||
|
|
e74462a3f5 | ||
|
|
056ba26755 | ||
|
|
a0d9a7fd83 | ||
|
|
5d3fc092e9 | ||
|
|
c94fe71278 | ||
|
|
d60b49e5c5 | ||
|
|
64b5a0ef84 | ||
|
|
162791e30e | ||
|
|
8743093bd7 | ||
|
|
96cf6779ca | ||
|
|
3b20b62423 | ||
|
|
6ddbfea700 | ||
|
|
c0624a26be | ||
|
|
4bfaf1ce66 | ||
|
|
e8c39138c6 | ||
|
|
f992799226 | ||
|
|
4c65afcce1 | ||
|
|
7fa5c4e2fd | ||
|
|
fa0e6a6c93 | ||
|
|
2f59135eb6 | ||
|
|
38681fb1c6 | ||
|
|
6b5d26e07b | ||
|
|
13d2d48e67 | ||
|
|
189fadfde0 | ||
|
|
627d5e7401 | ||
|
|
943fa2fb58 | ||
|
|
1b62a4f3c9 | ||
|
|
461e691127 | ||
|
|
cfaf1c678f | ||
|
|
0d7bad1f35 | ||
|
|
373a1bdadb | ||
|
|
2828f6630c | ||
|
|
58cff2fed8 | ||
|
|
5690cf3f0e | ||
|
|
a0aeda6187 | ||
|
|
84b3d760c4 | ||
|
|
0beea3a5a5 | ||
|
|
560c96a9a7 | ||
|
|
0ccfa60a53 | ||
|
|
30be551502 | ||
|
|
be1ce38f24 | ||
|
|
e818ace11a | ||
|
|
e4bfbd8258 | ||
|
|
2d5b442f5b | ||
|
|
af11aff309 | ||
|
|
e66aa5f3b7 | ||
|
|
31cf22cb4b | ||
|
|
20e593a44a | ||
|
|
7194424fef | ||
|
|
d11bde60d0 | ||
|
|
9e154aba58 | ||
|
|
5057a4b4df | ||
|
|
3e8ea7a351 | ||
|
|
d3dcdddf75 | ||
|
|
e5e7595bf9 | ||
|
|
7693887d61 | ||
|
|
8d9b196e0d | ||
|
|
a6cf8aafc0 | ||
|
|
dbdca7bf0c | ||
|
|
dabaecb2bc | ||
|
|
8c23965da3 | ||
|
|
61f21b5d03 | ||
|
|
8ede4a8da4 | ||
|
|
1c5b6bb4f7 | ||
|
|
c5f5c7a076 | ||
|
|
9a508abdc7 | ||
|
|
5eefe18ae4 | ||
|
|
1e8bb0e0e0 | ||
|
|
864b8b31de | ||
|
|
d2d15e522f | ||
|
|
f4d1e7a265 | ||
|
|
0f6bec0a32 | ||
|
|
92cdac5f87 | ||
|
|
1a41022e3e | ||
|
|
e5c47e44f6 | ||
|
|
51ce5ef447 | ||
|
|
37aee1f9b1 | ||
|
|
7e4e195e82 | ||
|
|
a7126c2ce4 | ||
|
|
f20c0f9819 | ||
|
|
21b5347fbe | ||
|
|
f9991fd5f6 | ||
|
|
da3d70420a | ||
|
|
f773f492f3 | ||
|
|
3e068e78e2 |
3
.gitignore
vendored
3
.gitignore
vendored
@@ -15,6 +15,7 @@ lapack-netlib/make.inc
|
||||
lapack-netlib/lapacke/include/lapacke_mangling.h
|
||||
lapack-netlib/TESTING/testing_results.txt
|
||||
*.so
|
||||
*.so.*
|
||||
*.a
|
||||
.svn
|
||||
*~
|
||||
@@ -65,3 +66,5 @@ test/sblat3
|
||||
test/zblat1
|
||||
test/zblat2
|
||||
test/zblat3
|
||||
build
|
||||
build.*
|
||||
|
||||
@@ -1,4 +1,13 @@
|
||||
language: c
|
||||
|
||||
notifications:
|
||||
webhooks:
|
||||
urls:
|
||||
- https://webhooks.gitter.im/e/8a6e4470a0cebd090344
|
||||
on_success: change # options: [always|never|change] default: always
|
||||
on_failure: always # options: [always|never|change] default: always
|
||||
on_start: never # options: [always|never|change] default: always
|
||||
|
||||
compiler:
|
||||
- gcc
|
||||
|
||||
|
||||
190
CMakeLists.txt
Normal file
190
CMakeLists.txt
Normal file
@@ -0,0 +1,190 @@
|
||||
##
|
||||
## Author: Hank Anderson <hank@statease.com>
|
||||
##
|
||||
|
||||
cmake_minimum_required(VERSION 2.8.4)
|
||||
project(OpenBLAS)
|
||||
set(OpenBLAS_MAJOR_VERSION 0)
|
||||
set(OpenBLAS_MINOR_VERSION 2)
|
||||
set(OpenBLAS_PATCH_VERSION 14)
|
||||
set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}")
|
||||
|
||||
enable_language(ASM)
|
||||
enable_language(C)
|
||||
|
||||
if(MSVC)
|
||||
set(OpenBLAS_LIBNAME libopenblas)
|
||||
else()
|
||||
set(OpenBLAS_LIBNAME openblas)
|
||||
endif()
|
||||
|
||||
#######
|
||||
if(MSVC)
|
||||
option(BUILD_WITHOUT_LAPACK "Without LAPACK and LAPACKE (Only BLAS or CBLAS)" ON)
|
||||
endif()
|
||||
option(BUILD_WITHOUT_CBLAS "Without CBLAS" OFF)
|
||||
option(BUILD_DEBUG "Build Debug Version" OFF)
|
||||
#######
|
||||
if(BUILD_WITHOUT_LAPACK)
|
||||
set(NO_LAPACK 1)
|
||||
set(NO_LAPACKE 1)
|
||||
endif()
|
||||
|
||||
if(BUILD_DEBUG)
|
||||
set(CMAKE_BUILD_TYPE Debug)
|
||||
else()
|
||||
set(CMAKE_BUILD_TYPE Release)
|
||||
endif()
|
||||
|
||||
if(BUILD_WITHOUT_CBLAS)
|
||||
set(NO_CBLAS 1)
|
||||
endif()
|
||||
|
||||
#######
|
||||
|
||||
|
||||
message(WARNING "CMake support is experimental. This will not produce the same Makefiles that OpenBLAS ships with. Only x86 support is currently available.")
|
||||
|
||||
include("${CMAKE_SOURCE_DIR}/cmake/utils.cmake")
|
||||
include("${CMAKE_SOURCE_DIR}/cmake/system.cmake")
|
||||
|
||||
set(BLASDIRS interface driver/level2 driver/level3 driver/others)
|
||||
|
||||
if (NOT DYNAMIC_ARCH)
|
||||
list(APPEND BLASDIRS kernel)
|
||||
endif ()
|
||||
|
||||
if (DEFINED UTEST_CHECK)
|
||||
set(SANITY_CHECK 1)
|
||||
endif ()
|
||||
|
||||
if (DEFINED SANITY_CHECK)
|
||||
list(APPEND BLASDIRS reference)
|
||||
endif ()
|
||||
|
||||
set(SUBDIRS ${BLASDIRS})
|
||||
if (NOT NO_LAPACK)
|
||||
list(APPEND SUBDIRS lapack)
|
||||
endif ()
|
||||
|
||||
# set which float types we want to build for
|
||||
if (NOT DEFINED BUILD_SINGLE AND NOT DEFINED BUILD_DOUBLE AND NOT DEFINED BUILD_COMPLEX AND NOT DEFINED BUILD_COMPLEX16)
|
||||
# if none are defined, build for all
|
||||
set(BUILD_SINGLE true)
|
||||
set(BUILD_DOUBLE true)
|
||||
set(BUILD_COMPLEX true)
|
||||
set(BUILD_COMPLEX16 true)
|
||||
endif ()
|
||||
|
||||
set(FLOAT_TYPES "")
|
||||
if (BUILD_SINGLE)
|
||||
message(STATUS "Building Single Precision")
|
||||
list(APPEND FLOAT_TYPES "SINGLE") # defines nothing
|
||||
endif ()
|
||||
|
||||
if (BUILD_DOUBLE)
|
||||
message(STATUS "Building Double Precision")
|
||||
list(APPEND FLOAT_TYPES "DOUBLE") # defines DOUBLE
|
||||
endif ()
|
||||
|
||||
if (BUILD_COMPLEX)
|
||||
message(STATUS "Building Complex Precision")
|
||||
list(APPEND FLOAT_TYPES "COMPLEX") # defines COMPLEX
|
||||
endif ()
|
||||
|
||||
if (BUILD_COMPLEX16)
|
||||
message(STATUS "Building Double Complex Precision")
|
||||
list(APPEND FLOAT_TYPES "ZCOMPLEX") # defines COMPLEX and DOUBLE
|
||||
endif ()
|
||||
|
||||
set(SUBDIRS_ALL ${SUBDIRS} test ctest utest exports benchmark ../laswp ../bench)
|
||||
|
||||
# all :: libs netlib tests shared
|
||||
|
||||
# libs :
|
||||
if (NOT DEFINED CORE OR "${CORE}" STREQUAL "UNKNOWN")
|
||||
message(FATAL_ERROR "Detecting CPU failed. Please set TARGET explicitly, e.g. make TARGET=your_cpu_target. Please read README for details.")
|
||||
endif ()
|
||||
|
||||
if (${NO_STATIC} AND ${NO_SHARED})
|
||||
message(FATAL_ERROR "Neither static nor shared are enabled.")
|
||||
endif ()
|
||||
|
||||
# get obj vars into format that add_library likes: $<TARGET_OBJS:objlib> (see http://www.cmake.org/cmake/help/v3.0/command/add_library.html)
|
||||
set(TARGET_OBJS "")
|
||||
foreach (SUBDIR ${SUBDIRS})
|
||||
add_subdirectory(${SUBDIR})
|
||||
string(REPLACE "/" "_" subdir_obj ${SUBDIR})
|
||||
list(APPEND TARGET_OBJS "$<TARGET_OBJECTS:${subdir_obj}>")
|
||||
endforeach ()
|
||||
|
||||
# netlib:
|
||||
|
||||
# Can't just use lapack-netlib's CMake files, since they are set up to search for BLAS, build and install a binary. We just want to build a couple of lib files out of lapack and lapacke.
|
||||
# Not using add_subdirectory here because lapack-netlib already has its own CMakeLists.txt. Instead include a cmake script with the sources we want.
|
||||
if (NOT NOFORTRAN AND NOT NO_LAPACK)
|
||||
include("${CMAKE_SOURCE_DIR}/cmake/lapack.cmake")
|
||||
if (NOT NO_LAPACKE)
|
||||
include("${CMAKE_SOURCE_DIR}/cmake/lapacke.cmake")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
#Only generate .def for dll on MSVC
|
||||
if(MSVC)
|
||||
set(OpenBLAS_DEF_FILE "${PROJECT_BINARY_DIR}/openblas.def")
|
||||
endif()
|
||||
|
||||
# add objects to the openblas lib
|
||||
add_library(${OpenBLAS_LIBNAME} SHARED ${LA_SOURCES} ${LAPACKE_SOURCES} ${TARGET_OBJS} ${OpenBLAS_DEF_FILE})
|
||||
|
||||
include("${CMAKE_SOURCE_DIR}/cmake/export.cmake")
|
||||
|
||||
|
||||
if(NOT MSVC)
|
||||
#only build shared library for MSVC
|
||||
add_library(${OpenBLAS_LIBNAME}_static STATIC ${LA_SOURCES} ${LAPACKE_SOURCES} ${TARGET_OBJS})
|
||||
set_target_properties(${OpenBLAS_LIBNAME}_static PROPERTIES OUTPUT_NAME ${OpenBLAS_LIBNAME})
|
||||
set_target_properties(${OpenBLAS_LIBNAME}_static PROPERTIES CLEAN_DIRECT_OUTPUT 1)
|
||||
|
||||
if(SMP)
|
||||
target_link_libraries(${OpenBLAS_LIBNAME} pthread)
|
||||
target_link_libraries(${OpenBLAS_LIBNAME}_static pthread)
|
||||
endif()
|
||||
|
||||
#build test and ctest
|
||||
enable_testing()
|
||||
add_subdirectory(test)
|
||||
if(NOT NO_CBLAS)
|
||||
add_subdirectory(ctest)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
set_target_properties(${OpenBLAS_LIBNAME} PROPERTIES
|
||||
VERSION ${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}
|
||||
SOVERSION ${OpenBLAS_MAJOR_VERSION}
|
||||
)
|
||||
|
||||
|
||||
# TODO: Why is the config saved here? Is this necessary with CMake?
|
||||
#Save the config files for installation
|
||||
# @cp Makefile.conf Makefile.conf_last
|
||||
# @cp config.h config_last.h
|
||||
#ifdef QUAD_PRECISION
|
||||
# @echo "#define QUAD_PRECISION">> config_last.h
|
||||
#endif
|
||||
#ifeq ($(EXPRECISION), 1)
|
||||
# @echo "#define EXPRECISION">> config_last.h
|
||||
#endif
|
||||
###
|
||||
#ifeq ($(DYNAMIC_ARCH), 1)
|
||||
# @$(MAKE) -C kernel commonlibs || exit 1
|
||||
# @for d in $(DYNAMIC_CORE) ; \
|
||||
# do $(MAKE) GOTOBLAS_MAKEFILE= -C kernel TARGET_CORE=$$d kernel || exit 1 ;\
|
||||
# done
|
||||
# @echo DYNAMIC_ARCH=1 >> Makefile.conf_last
|
||||
#endif
|
||||
#ifdef USE_THREAD
|
||||
# @echo USE_THREAD=$(USE_THREAD) >> Makefile.conf_last
|
||||
#endif
|
||||
# @touch lib.grd
|
||||
|
||||
@@ -127,5 +127,8 @@ In chronological order:
|
||||
* Ton van den Heuvel <https://github.com/ton>
|
||||
* [2015-03-18] Fix race condition during shutdown causing a crash in gotoblas_set_affinity().
|
||||
|
||||
* Martin Koehler <https://github.com/grisuthedragon/>
|
||||
* [2015-09-07] Improved imatcopy
|
||||
|
||||
* [Your name or handle] <[email or website]>
|
||||
* [Date] [Brief summary of your changes]
|
||||
|
||||
@@ -1,4 +1,57 @@
|
||||
OpenBLAS ChangeLog
|
||||
====================================================================
|
||||
Version 0.2.15
|
||||
27-Oct-2015
|
||||
common:
|
||||
* Support cmake on x86/x86-64. Natively compiling on MS Visual Studio.
|
||||
(experimental. Thank Hank Anderson for the initial cmake porting work.)
|
||||
|
||||
On Linux and Mac OSX, OpenBLAS cmake supports assembly kernels.
|
||||
e.g. cmake .
|
||||
make
|
||||
make test (Optional)
|
||||
|
||||
On Windows MS Visual Studio, OpenBLAS cmake only support C kernels.
|
||||
(OpenBLAS uses AT&T style assembly, which is not supported by MSVC.)
|
||||
e.g. cmake -G "Visual Studio 12 Win64" .
|
||||
Open OpenBLAS.sln and build.
|
||||
|
||||
* Enable MAX_STACK_ALLOC flags by default.
|
||||
Improve ger and gemv for small matrices.
|
||||
* Improve gemv parallel with small m and large n case.
|
||||
* Improve ?imatcopy when lda==ldb (#633. Thanks, Martin Koehler)
|
||||
* Add vecLib benchmarks (#565. Thanks, Andreas Noack.)
|
||||
* Fix LAPACK lantr for row major matrices (#634. Thanks, Dan Kortschak)
|
||||
* Fix LAPACKE lansy (#640. Thanks, Dan Kortschak)
|
||||
* Import bug fixes for LAPACKE s/dormlq, c/zunmlq
|
||||
* Raise the signal when pthread_create fails (#668. Thanks, James K. Lowden)
|
||||
* Remove g77 from compiler list.
|
||||
* Enable AppVeyor Windows CI.
|
||||
|
||||
x86/x86-64:
|
||||
* Support pure C generic kernels for x86/x86-64.
|
||||
* Support Intel Boardwell and Skylake by Haswell kernels.
|
||||
* Support AMD Excavator by Steamroller kernels.
|
||||
* Optimize s/d/c/zdot for Intel SandyBridge and Haswell.
|
||||
* Optimize s/d/c/zdot for AMD Piledriver and Steamroller.
|
||||
* Optimize s/d/c/zapxy for Intel SandyBridge and Haswell.
|
||||
* Optimize s/d/c/zapxy for AMD Piledriver and Steamroller.
|
||||
* Optimize d/c/zscal for Intel Haswell, dscal for Intel SandyBridge.
|
||||
* Optimize d/c/zscal for AMD Bulldozer, Piledriver and Steamroller.
|
||||
* Optimize s/dger for Intel SandyBridge.
|
||||
* Optimize s/dsymv for Intel SandyBridge.
|
||||
* Optimize ssymv for Intel Haswell.
|
||||
* Optimize dgemv for Intel Nehalem and Haswell.
|
||||
* Optimize dtrmm for Intel Haswell.
|
||||
|
||||
ARM:
|
||||
* Support Android NDK armeabi-v7a-hard ABI (-mfloat-abi=hard)
|
||||
e.g. make HOSTCC=gcc CC=arm-linux-androideabi-gcc NO_LAPACK=1 TARGET=ARMV7
|
||||
* Fix lock, rpcc bugs (#616, #617. Thanks, Grazvydas Ignotas)
|
||||
POWER:
|
||||
* Support ppc64le platform (ELF ABI v2. #612. Thanks, Matthew Brandyberry.)
|
||||
* Support POWER7/8 by POWER6 kernels. (#612. Thanks, Fábio Perez.)
|
||||
|
||||
====================================================================
|
||||
Version 0.2.14
|
||||
24-Mar-2015
|
||||
|
||||
@@ -26,8 +26,8 @@ endif
|
||||
|
||||
|
||||
ifeq ($(CORE), ARMV5)
|
||||
CCOMMON_OPT += -marm -mfpu=vfp -mfloat-abi=hard -march=armv6
|
||||
FCOMMON_OPT += -marm -mfpu=vfp -mfloat-abi=hard -march=armv6
|
||||
CCOMMON_OPT += -marm -march=armv5
|
||||
FCOMMON_OPT += -marm -march=armv5
|
||||
endif
|
||||
|
||||
|
||||
|
||||
@@ -11,6 +11,7 @@ OPENBLAS_BINARY_DIR := $(PREFIX)/bin
|
||||
OPENBLAS_BUILD_DIR := $(CURDIR)
|
||||
OPENBLAS_CMAKE_DIR := $(OPENBLAS_LIBRARY_DIR)/cmake/openblas
|
||||
OPENBLAS_CMAKE_CONFIG := OpenBLASConfig.cmake
|
||||
OPENBLAS_CMAKE_CONFIG_VERSION := OpenBLASConfigVersion.cmake
|
||||
|
||||
.PHONY : install
|
||||
.NOTPARALLEL : install
|
||||
@@ -97,6 +98,7 @@ endif
|
||||
@echo Generating $(OPENBLAS_CMAKE_CONFIG) in $(DESTDIR)$(OPENBLAS_CMAKE_DIR)
|
||||
@echo "SET(OpenBLAS_VERSION \"${VERSION}\")" > $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)
|
||||
@echo "SET(OpenBLAS_INCLUDE_DIRS ${OPENBLAS_INCLUDE_DIR})" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)
|
||||
|
||||
ifndef NO_SHARED
|
||||
#ifeq logical or
|
||||
ifeq ($(OSNAME), $(filter $(OSNAME),Linux FreeBSD NetBSD))
|
||||
@@ -112,5 +114,16 @@ else
|
||||
#only static
|
||||
@echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_LIBRARY_DIR}/$(LIBPREFIX).$(LIBSUFFIX))" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)
|
||||
endif
|
||||
#Generating OpenBLASConfigVersion.cmake
|
||||
@echo Generating $(OPENBLAS_CMAKE_CONFIG_VERSION) in $(DESTDIR)$(OPENBLAS_CMAKE_DIR)
|
||||
@echo "set (PACKAGE_VERSION \"${VERSION}\")" > $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)
|
||||
@echo "if (PACKAGE_VERSION VERSION_LESS PACKAGE_FIND_VERSION)" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)
|
||||
@echo " set (PACKAGE_VERSION_COMPATIBLE FALSE)" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)
|
||||
@echo "else ()" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)
|
||||
@echo " set (PACKAGE_VERSION_COMPATIBLE TRUE)" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)
|
||||
@echo " if (PACKAGE_FIND_VERSION STREQUAL PACKAGE_VERSION)" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)
|
||||
@echo " set (PACKAGE_VERSION_EXACT TRUE)" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)
|
||||
@echo " endif ()" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)
|
||||
@echo "endif ()" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)
|
||||
@echo Install OK!
|
||||
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
#
|
||||
|
||||
# This library's version
|
||||
VERSION = 0.2.14
|
||||
VERSION = 0.2.15
|
||||
|
||||
# If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a
|
||||
# and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library
|
||||
@@ -114,9 +114,6 @@ NO_AFFINITY = 1
|
||||
# Support for IEEE quad precision(it's *real* REAL*16)( under testing)
|
||||
# QUAD_PRECISION = 1
|
||||
|
||||
# Support for integer matrix and vector (e.g. iaxpy)
|
||||
# INTEGER_PRECISION = 1
|
||||
|
||||
# Theads are still working for a while after finishing BLAS operation
|
||||
# to reduce thread activate/deactivate overhead. You can determine
|
||||
# time out to improve performance. This number should be from 4 to 30
|
||||
@@ -172,6 +169,9 @@ COMMON_PROF = -pg
|
||||
# 64 bit integer interfaces in OpenBLAS.
|
||||
# For details, https://github.com/xianyi/OpenBLAS/pull/459
|
||||
#
|
||||
# The same prefix and suffix are also added to the library name,
|
||||
# i.e. you get lib$(SYMBOLPREFIX)openblas$(SYMBOLSUFFIX) rather than libopenblas
|
||||
#
|
||||
# SYMBOLPREFIX=
|
||||
# SYMBOLSUFFIX=
|
||||
|
||||
|
||||
@@ -309,10 +309,6 @@ CCOMMON_OPT += -DQUAD_PRECISION
|
||||
NO_EXPRECISION = 1
|
||||
endif
|
||||
|
||||
ifdef INTEGER_PRECISION
|
||||
CCOMMON_OPT += -DINTEGER_PRECISION
|
||||
endif
|
||||
|
||||
ifneq ($(ARCH), x86)
|
||||
ifneq ($(ARCH), x86_64)
|
||||
NO_EXPRECISION = 1
|
||||
@@ -340,6 +336,11 @@ ifeq ($(ARCH), x86)
|
||||
ifndef BINARY
|
||||
NO_BINARY_MODE = 1
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), generic)
|
||||
NO_EXPRECISION = 1
|
||||
endif
|
||||
|
||||
ifndef NO_EXPRECISION
|
||||
ifeq ($(F_COMPILER), GFORTRAN)
|
||||
# ifeq logical or. GCC or LSB
|
||||
@@ -358,6 +359,11 @@ endif
|
||||
endif
|
||||
|
||||
ifeq ($(ARCH), x86_64)
|
||||
|
||||
ifeq ($(CORE), generic)
|
||||
NO_EXPRECISION = 1
|
||||
endif
|
||||
|
||||
ifndef NO_EXPRECISION
|
||||
ifeq ($(F_COMPILER), GFORTRAN)
|
||||
# ifeq logical or. GCC or LSB
|
||||
@@ -874,12 +880,6 @@ ifdef USE_SIMPLE_THREADED_LEVEL3
|
||||
CCOMMON_OPT += -DUSE_SIMPLE_THREADED_LEVEL3
|
||||
endif
|
||||
|
||||
ifndef LIBNAMESUFFIX
|
||||
LIBPREFIX = libopenblas
|
||||
else
|
||||
LIBPREFIX = libopenblas_$(LIBNAMESUFFIX)
|
||||
endif
|
||||
|
||||
ifndef SYMBOLPREFIX
|
||||
SYMBOLPREFIX =
|
||||
endif
|
||||
@@ -888,6 +888,12 @@ ifndef SYMBOLSUFFIX
|
||||
SYMBOLSUFFIX =
|
||||
endif
|
||||
|
||||
ifndef LIBNAMESUFFIX
|
||||
LIBPREFIX = lib$(SYMBOLPREFIX)openblas$(SYMBOLSUFFIX)
|
||||
else
|
||||
LIBPREFIX = lib$(SYMBOLPREFIX)openblas$(SYMBOLSUFFIX)_$(LIBNAMESUFFIX)
|
||||
endif
|
||||
|
||||
KERNELDIR = $(TOPDIR)/kernel/$(ARCH)
|
||||
|
||||
include $(TOPDIR)/Makefile.$(ARCH)
|
||||
|
||||
@@ -4,7 +4,6 @@ QBLASOBJS_P = $(QBLASOBJS:.$(SUFFIX)=.$(PSUFFIX))
|
||||
CBLASOBJS_P = $(CBLASOBJS:.$(SUFFIX)=.$(PSUFFIX))
|
||||
ZBLASOBJS_P = $(ZBLASOBJS:.$(SUFFIX)=.$(PSUFFIX))
|
||||
XBLASOBJS_P = $(XBLASOBJS:.$(SUFFIX)=.$(PSUFFIX))
|
||||
IBLASOBJS_P = $(IBLASOBJS:.$(SUFFIX)=.$(PSUFFIX))
|
||||
|
||||
COMMONOBJS_P = $(COMMONOBJS:.$(SUFFIX)=.$(PSUFFIX))
|
||||
|
||||
@@ -23,18 +22,12 @@ BLASOBJS += $(QBLASOBJS) $(XBLASOBJS)
|
||||
BLASOBJS_P += $(QBLASOBJS_P) $(XBLASOBJS_P)
|
||||
endif
|
||||
|
||||
ifdef INTEGER_PRECISION
|
||||
BLASOBJS += $(IBLASOBJS)
|
||||
BLASOBJS_P += $(IBLASOBJS_P)
|
||||
endif
|
||||
|
||||
$(SBLASOBJS) $(SBLASOBJS_P) : override CFLAGS += -UDOUBLE -UCOMPLEX
|
||||
$(DBLASOBJS) $(DBLASOBJS_P) : override CFLAGS += -DDOUBLE -UCOMPLEX
|
||||
$(QBLASOBJS) $(QBLASOBJS_P) : override CFLAGS += -DXDOUBLE -UCOMPLEX
|
||||
$(CBLASOBJS) $(CBLASOBJS_P) : override CFLAGS += -UDOUBLE -DCOMPLEX
|
||||
$(ZBLASOBJS) $(ZBLASOBJS_P) : override CFLAGS += -DDOUBLE -DCOMPLEX
|
||||
$(XBLASOBJS) $(XBLASOBJS_P) : override CFLAGS += -DXDOUBLE -DCOMPLEX
|
||||
$(IBLASOBJS) $(IBLASOBJS_P) : override CFLAGS += -DINTEGER -UCOMPLEX
|
||||
|
||||
$(SBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
|
||||
$(DBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
|
||||
@@ -42,7 +35,6 @@ $(QBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
|
||||
$(CBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
|
||||
$(ZBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
|
||||
$(XBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
|
||||
$(IBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
|
||||
|
||||
libs :: $(BLASOBJS) $(COMMONOBJS)
|
||||
$(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^
|
||||
|
||||
@@ -1,7 +1,10 @@
|
||||
# OpenBLAS
|
||||
|
||||
[](https://travis-ci.org/xianyi/OpenBLAS)
|
||||
[](https://gitter.im/xianyi/OpenBLAS?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
|
||||
|
||||
Travis CI: [](https://travis-ci.org/xianyi/OpenBLAS)
|
||||
|
||||
AppVeyor: [](https://ci.appveyor.com/project/xianyi/openblas/branch/develop)
|
||||
## Introduction
|
||||
OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version.
|
||||
|
||||
|
||||
@@ -44,6 +44,8 @@ NANO
|
||||
POWER4
|
||||
POWER5
|
||||
POWER6
|
||||
POWER7
|
||||
POWER8
|
||||
PPCG4
|
||||
PPC970
|
||||
PPC970MP
|
||||
|
||||
42
appveyor.yml
Normal file
42
appveyor.yml
Normal file
@@ -0,0 +1,42 @@
|
||||
version: 0.2.15.{build}
|
||||
|
||||
#environment:
|
||||
|
||||
platform:
|
||||
- x64
|
||||
|
||||
configuration: Release
|
||||
|
||||
clone_folder: c:\projects\OpenBLAS
|
||||
|
||||
init:
|
||||
- git config --global core.autocrlf input
|
||||
|
||||
build:
|
||||
project: OpenBLAS.sln
|
||||
|
||||
clone_depth: 5
|
||||
|
||||
#branches to build
|
||||
branches:
|
||||
only:
|
||||
- master
|
||||
- develop
|
||||
- cmake
|
||||
|
||||
skip_tags: true
|
||||
|
||||
matrix:
|
||||
fast_finish: true
|
||||
|
||||
skip_commits:
|
||||
# Add [av skip] to commit messages
|
||||
message: /\[av skip\]/
|
||||
|
||||
before_build:
|
||||
- echo Running cmake...
|
||||
- cd c:\projects\OpenBLAS
|
||||
- cmake -G "Visual Studio 12 Win64" .
|
||||
|
||||
test_script:
|
||||
- echo Build OK!
|
||||
2
c_check
2
c_check
@@ -30,7 +30,7 @@ if ($ARGV[0] =~ /(.*)(-[.\d]+)/) {
|
||||
$cross_suffix = $1;
|
||||
}
|
||||
} else {
|
||||
if ($ARGV[0] =~ /(.*-)(.*)/) {
|
||||
if ($ARGV[0] =~ /([^\/]*-)([^\/]*$)/) {
|
||||
$cross_suffix = $1;
|
||||
}
|
||||
}
|
||||
|
||||
350
cblas_noconst.h
350
cblas_noconst.h
@@ -1,350 +0,0 @@
|
||||
#ifndef CBLAS_H
|
||||
#define CBLAS_H
|
||||
|
||||
#include <stddef.h>
|
||||
#include "common.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
/* Assume C declarations for C++ */
|
||||
#endif /* __cplusplus */
|
||||
|
||||
/*Set the number of threads on runtime.*/
|
||||
void openblas_set_num_threads(int num_threads);
|
||||
void goto_set_num_threads(int num_threads);
|
||||
|
||||
/*Get the number of threads on runtime.*/
|
||||
int openblas_get_num_threads(void);
|
||||
|
||||
/*Get the number of physical processors (cores).*/
|
||||
int openblas_get_num_procs(void);
|
||||
|
||||
/*Get the build configure on runtime.*/
|
||||
char* openblas_get_config(void);
|
||||
|
||||
/* Get the parallelization type which is used by OpenBLAS */
|
||||
int openblas_get_parallel(void);
|
||||
/* OpenBLAS is compiled for sequential use */
|
||||
#define OPENBLAS_SEQUENTIAL 0
|
||||
/* OpenBLAS is compiled using normal threading model */
|
||||
#define OPENBLAS_THREAD 1
|
||||
/* OpenBLAS is compiled using OpenMP threading model */
|
||||
#define OPENBLAS_OPENMP 2
|
||||
|
||||
|
||||
#define CBLAS_INDEX size_t
|
||||
|
||||
typedef enum CBLAS_ORDER {CblasRowMajor=101, CblasColMajor=102} CBLAS_ORDER;
|
||||
typedef enum CBLAS_TRANSPOSE {CblasNoTrans=111, CblasTrans=112, CblasConjTrans=113, CblasConjNoTrans=114} CBLAS_TRANSPOSE;
|
||||
typedef enum CBLAS_UPLO {CblasUpper=121, CblasLower=122} CBLAS_UPLO;
|
||||
typedef enum CBLAS_DIAG {CblasNonUnit=131, CblasUnit=132} CBLAS_DIAG;
|
||||
typedef enum CBLAS_SIDE {CblasLeft=141, CblasRight=142} CBLAS_SIDE;
|
||||
|
||||
float cblas_sdsdot(blasint n, float alpha, float *x, blasint incx, float *y, blasint incy);
|
||||
double cblas_dsdot (blasint n, float *x, blasint incx, float *y, blasint incy);
|
||||
float cblas_sdot(blasint n, float *x, blasint incx, float *y, blasint incy);
|
||||
double cblas_ddot(blasint n, double *x, blasint incx, double *y, blasint incy);
|
||||
|
||||
openblas_complex_float cblas_cdotu(blasint n, float *x, blasint incx, float *y, blasint incy);
|
||||
openblas_complex_float cblas_cdotc(blasint n, float *x, blasint incx, float *y, blasint incy);
|
||||
openblas_complex_double cblas_zdotu(blasint n, double *x, blasint incx, double *y, blasint incy);
|
||||
openblas_complex_double cblas_zdotc(blasint n, double *x, blasint incx, double *y, blasint incy);
|
||||
|
||||
void cblas_cdotu_sub(blasint n, float *x, blasint incx, float *y, blasint incy, openblas_complex_float *ret);
|
||||
void cblas_cdotc_sub(blasint n, float *x, blasint incx, float *y, blasint incy, openblas_complex_float *ret);
|
||||
void cblas_zdotu_sub(blasint n, double *x, blasint incx, double *y, blasint incy, openblas_complex_double *ret);
|
||||
void cblas_zdotc_sub(blasint n, double *x, blasint incx, double *y, blasint incy, openblas_complex_double *ret);
|
||||
|
||||
float cblas_sasum (blasint n, float *x, blasint incx);
|
||||
double cblas_dasum (blasint n, double *x, blasint incx);
|
||||
float cblas_scasum(blasint n, float *x, blasint incx);
|
||||
double cblas_dzasum(blasint n, double *x, blasint incx);
|
||||
|
||||
float cblas_snrm2 (blasint N, float *X, blasint incX);
|
||||
double cblas_dnrm2 (blasint N, double *X, blasint incX);
|
||||
float cblas_scnrm2(blasint N, float *X, blasint incX);
|
||||
double cblas_dznrm2(blasint N, double *X, blasint incX);
|
||||
|
||||
CBLAS_INDEX cblas_isamax(blasint n, float *x, blasint incx);
|
||||
CBLAS_INDEX cblas_idamax(blasint n, double *x, blasint incx);
|
||||
CBLAS_INDEX cblas_icamax(blasint n, float *x, blasint incx);
|
||||
CBLAS_INDEX cblas_izamax(blasint n, double *x, blasint incx);
|
||||
|
||||
void cblas_saxpy(blasint n, float alpha, float *x, blasint incx, float *y, blasint incy);
|
||||
void cblas_daxpy(blasint n, double alpha, double *x, blasint incx, double *y, blasint incy);
|
||||
void cblas_caxpy(blasint n, float *alpha, float *x, blasint incx, float *y, blasint incy);
|
||||
void cblas_zaxpy(blasint n, double *alpha, double *x, blasint incx, double *y, blasint incy);
|
||||
|
||||
void cblas_scopy(blasint n, float *x, blasint incx, float *y, blasint incy);
|
||||
void cblas_dcopy(blasint n, double *x, blasint incx, double *y, blasint incy);
|
||||
void cblas_ccopy(blasint n, float *x, blasint incx, float *y, blasint incy);
|
||||
void cblas_zcopy(blasint n, double *x, blasint incx, double *y, blasint incy);
|
||||
|
||||
void cblas_sswap(blasint n, float *x, blasint incx, float *y, blasint incy);
|
||||
void cblas_dswap(blasint n, double *x, blasint incx, double *y, blasint incy);
|
||||
void cblas_cswap(blasint n, float *x, blasint incx, float *y, blasint incy);
|
||||
void cblas_zswap(blasint n, double *x, blasint incx, double *y, blasint incy);
|
||||
|
||||
void cblas_srot(blasint N, float *X, blasint incX, float *Y, blasint incY, float c, float s);
|
||||
void cblas_drot(blasint N, double *X, blasint incX, double *Y, blasint incY, double c, double s);
|
||||
|
||||
void cblas_srotg(float *a, float *b, float *c, float *s);
|
||||
void cblas_drotg(double *a, double *b, double *c, double *s);
|
||||
|
||||
void cblas_srotm(blasint N, float *X, blasint incX, float *Y, blasint incY, float *P);
|
||||
void cblas_drotm(blasint N, double *X, blasint incX, double *Y, blasint incY, double *P);
|
||||
|
||||
void cblas_srotmg(float *d1, float *d2, float *b1, float b2, float *P);
|
||||
void cblas_drotmg(double *d1, double *d2, double *b1, double b2, double *P);
|
||||
|
||||
void cblas_sscal(blasint N, float alpha, float *X, blasint incX);
|
||||
void cblas_dscal(blasint N, double alpha, double *X, blasint incX);
|
||||
void cblas_cscal(blasint N, float *alpha, float *X, blasint incX);
|
||||
void cblas_zscal(blasint N, double *alpha, double *X, blasint incX);
|
||||
void cblas_csscal(blasint N, float alpha, float *X, blasint incX);
|
||||
void cblas_zdscal(blasint N, double alpha, double *X, blasint incX);
|
||||
|
||||
void cblas_sgemv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE trans, blasint m, blasint n,
|
||||
float alpha, float *a, blasint lda, float *x, blasint incx, float beta, float *y, blasint incy);
|
||||
void cblas_dgemv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE trans, blasint m, blasint n,
|
||||
double alpha, double *a, blasint lda, double *x, blasint incx, double beta, double *y, blasint incy);
|
||||
void cblas_cgemv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE trans, blasint m, blasint n,
|
||||
float *alpha, float *a, blasint lda, float *x, blasint incx, float *beta, float *y, blasint incy);
|
||||
void cblas_zgemv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE trans, blasint m, blasint n,
|
||||
double *alpha, double *a, blasint lda, double *x, blasint incx, double *beta, double *y, blasint incy);
|
||||
|
||||
void cblas_sger (enum CBLAS_ORDER order, blasint M, blasint N, float alpha, float *X, blasint incX, float *Y, blasint incY, float *A, blasint lda);
|
||||
void cblas_dger (enum CBLAS_ORDER order, blasint M, blasint N, double alpha, double *X, blasint incX, double *Y, blasint incY, double *A, blasint lda);
|
||||
void cblas_cgeru(enum CBLAS_ORDER order, blasint M, blasint N, float *alpha, float *X, blasint incX, float *Y, blasint incY, float *A, blasint lda);
|
||||
void cblas_cgerc(enum CBLAS_ORDER order, blasint M, blasint N, float *alpha, float *X, blasint incX, float *Y, blasint incY, float *A, blasint lda);
|
||||
void cblas_zgeru(enum CBLAS_ORDER order, blasint M, blasint N, double *alpha, double *X, blasint incX, double *Y, blasint incY, double *A, blasint lda);
|
||||
void cblas_zgerc(enum CBLAS_ORDER order, blasint M, blasint N, double *alpha, double *X, blasint incX, double *Y, blasint incY, double *A, blasint lda);
|
||||
|
||||
void cblas_strsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, float *A, blasint lda, float *X, blasint incX);
|
||||
void cblas_dtrsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, double *A, blasint lda, double *X, blasint incX);
|
||||
void cblas_ctrsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, float *A, blasint lda, float *X, blasint incX);
|
||||
void cblas_ztrsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, double *A, blasint lda, double *X, blasint incX);
|
||||
|
||||
void cblas_strmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, float *A, blasint lda, float *X, blasint incX);
|
||||
void cblas_dtrmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, double *A, blasint lda, double *X, blasint incX);
|
||||
void cblas_ctrmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, float *A, blasint lda, float *X, blasint incX);
|
||||
void cblas_ztrmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, double *A, blasint lda, double *X, blasint incX);
|
||||
|
||||
void cblas_ssyr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float alpha, float *X, blasint incX, float *A, blasint lda);
|
||||
void cblas_dsyr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *X, blasint incX, double *A, blasint lda);
|
||||
void cblas_cher(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float alpha, float *X, blasint incX, float *A, blasint lda);
|
||||
void cblas_zher(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *X, blasint incX, double *A, blasint lda);
|
||||
|
||||
void cblas_ssyr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,blasint N, float alpha, float *X,
|
||||
blasint incX, float *Y, blasint incY, float *A, blasint lda);
|
||||
void cblas_dsyr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *X,
|
||||
blasint incX, double *Y, blasint incY, double *A, blasint lda);
|
||||
void cblas_cher2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float *alpha, float *X, blasint incX,
|
||||
float *Y, blasint incY, float *A, blasint lda);
|
||||
void cblas_zher2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double *alpha, double *X, blasint incX,
|
||||
double *Y, blasint incY, double *A, blasint lda);
|
||||
|
||||
void cblas_sgbmv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, blasint M, blasint N,
|
||||
blasint KL, blasint KU, float alpha, float *A, blasint lda, float *X, blasint incX, float beta, float *Y, blasint incY);
|
||||
void cblas_dgbmv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, blasint M, blasint N,
|
||||
blasint KL, blasint KU, double alpha, double *A, blasint lda, double *X, blasint incX, double beta, double *Y, blasint incY);
|
||||
void cblas_cgbmv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, blasint M, blasint N,
|
||||
blasint KL, blasint KU, float *alpha, float *A, blasint lda, float *X, blasint incX, float *beta, float *Y, blasint incY);
|
||||
void cblas_zgbmv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, blasint M, blasint N,
|
||||
blasint KL, blasint KU, double *alpha, double *A, blasint lda, double *X, blasint incX, double *beta, double *Y, blasint incY);
|
||||
|
||||
void cblas_ssbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, blasint K, float alpha, float *A,
|
||||
blasint lda, float *X, blasint incX, float beta, float *Y, blasint incY);
|
||||
void cblas_dsbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, blasint K, double alpha, double *A,
|
||||
blasint lda, double *X, blasint incX, double beta, double *Y, blasint incY);
|
||||
|
||||
|
||||
void cblas_stbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, blasint K, float *A, blasint lda, float *X, blasint incX);
|
||||
void cblas_dtbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, blasint K, double *A, blasint lda, double *X, blasint incX);
|
||||
void cblas_ctbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, blasint K, float *A, blasint lda, float *X, blasint incX);
|
||||
void cblas_ztbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, blasint K, double *A, blasint lda, double *X, blasint incX);
|
||||
|
||||
void cblas_stbsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, blasint K, float *A, blasint lda, float *X, blasint incX);
|
||||
void cblas_dtbsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, blasint K, double *A, blasint lda, double *X, blasint incX);
|
||||
void cblas_ctbsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, blasint K, float *A, blasint lda, float *X, blasint incX);
|
||||
void cblas_ztbsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, blasint K, double *A, blasint lda, double *X, blasint incX);
|
||||
|
||||
void cblas_stpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, float *Ap, float *X, blasint incX);
|
||||
void cblas_dtpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, double *Ap, double *X, blasint incX);
|
||||
void cblas_ctpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, float *Ap, float *X, blasint incX);
|
||||
void cblas_ztpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, double *Ap, double *X, blasint incX);
|
||||
|
||||
void cblas_stpsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, float *Ap, float *X, blasint incX);
|
||||
void cblas_dtpsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, double *Ap, double *X, blasint incX);
|
||||
void cblas_ctpsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, float *Ap, float *X, blasint incX);
|
||||
void cblas_ztpsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
||||
blasint N, double *Ap, double *X, blasint incX);
|
||||
|
||||
void cblas_ssymv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float alpha, float *A,
|
||||
blasint lda, float *X, blasint incX, float beta, float *Y, blasint incY);
|
||||
void cblas_dsymv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *A,
|
||||
blasint lda, double *X, blasint incX, double beta, double *Y, blasint incY);
|
||||
void cblas_chemv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float *alpha, float *A,
|
||||
blasint lda, float *X, blasint incX, float *beta, float *Y, blasint incY);
|
||||
void cblas_zhemv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double *alpha, double *A,
|
||||
blasint lda, double *X, blasint incX, double *beta, double *Y, blasint incY);
|
||||
|
||||
|
||||
void cblas_sspmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float alpha, float *Ap,
|
||||
float *X, blasint incX, float beta, float *Y, blasint incY);
|
||||
void cblas_dspmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *Ap,
|
||||
double *X, blasint incX, double beta, double *Y, blasint incY);
|
||||
|
||||
void cblas_sspr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float alpha, float *X, blasint incX, float *Ap);
|
||||
void cblas_dspr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *X, blasint incX, double *Ap);
|
||||
|
||||
void cblas_chpr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float alpha, float *X, blasint incX, float *A);
|
||||
void cblas_zhpr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *X,blasint incX, double *A);
|
||||
|
||||
void cblas_sspr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float alpha, float *X, blasint incX, float *Y, blasint incY, float *A);
|
||||
void cblas_dspr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *X, blasint incX, double *Y, blasint incY, double *A);
|
||||
void cblas_chpr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float *alpha, float *X, blasint incX, float *Y, blasint incY, float *Ap);
|
||||
void cblas_zhpr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double *alpha, double *X, blasint incX, double *Y, blasint incY, double *Ap);
|
||||
|
||||
void cblas_chbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, blasint K,
|
||||
float *alpha, float *A, blasint lda, float *X, blasint incX, float *beta, float *Y, blasint incY);
|
||||
void cblas_zhbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, blasint K,
|
||||
double *alpha, double *A, blasint lda, double *X, blasint incX, double *beta, double *Y, blasint incY);
|
||||
|
||||
void cblas_chpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N,
|
||||
float *alpha, float *Ap, float *X, blasint incX, float *beta, float *Y, blasint incY);
|
||||
void cblas_zhpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N,
|
||||
double *alpha, double *Ap, double *X, blasint incX, double *beta, double *Y, blasint incY);
|
||||
|
||||
void cblas_sgemm(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, blasint M, blasint N, blasint K,
|
||||
float alpha, float *A, blasint lda, float *B, blasint ldb, float beta, float *C, blasint ldc);
|
||||
void cblas_dgemm(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, blasint M, blasint N, blasint K,
|
||||
double alpha, double *A, blasint lda, double *B, blasint ldb, double beta, double *C, blasint ldc);
|
||||
void cblas_cgemm(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, blasint M, blasint N, blasint K,
|
||||
float *alpha, float *A, blasint lda, float *B, blasint ldb, float *beta, float *C, blasint ldc);
|
||||
void cblas_cgemm3m(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, blasint M, blasint N, blasint K,
|
||||
float *alpha, float *A, blasint lda, float *B, blasint ldb, float *beta, float *C, blasint ldc);
|
||||
void cblas_zgemm(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, blasint M, blasint N, blasint K,
|
||||
double *alpha, double *A, blasint lda, double *B, blasint ldb, double *beta, double *C, blasint ldc);
|
||||
void cblas_zgemm3m(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, blasint M, blasint N, blasint K,
|
||||
double *alpha, double *A, blasint lda, double *B, blasint ldb, double *beta, double *C, blasint ldc);
|
||||
|
||||
void cblas_ssymm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, blasint M, blasint N,
|
||||
float alpha, float *A, blasint lda, float *B, blasint ldb, float beta, float *C, blasint ldc);
|
||||
void cblas_dsymm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, blasint M, blasint N,
|
||||
double alpha, double *A, blasint lda, double *B, blasint ldb, double beta, double *C, blasint ldc);
|
||||
void cblas_csymm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, blasint M, blasint N,
|
||||
float *alpha, float *A, blasint lda, float *B, blasint ldb, float *beta, float *C, blasint ldc);
|
||||
void cblas_zsymm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, blasint M, blasint N,
|
||||
double *alpha, double *A, blasint lda, double *B, blasint ldb, double *beta, double *C, blasint ldc);
|
||||
|
||||
void cblas_ssyrk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans,
|
||||
blasint N, blasint K, float alpha, float *A, blasint lda, float beta, float *C, blasint ldc);
|
||||
void cblas_dsyrk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans,
|
||||
blasint N, blasint K, double alpha, double *A, blasint lda, double beta, double *C, blasint ldc);
|
||||
void cblas_csyrk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans,
|
||||
blasint N, blasint K, float *alpha, float *A, blasint lda, float *beta, float *C, blasint ldc);
|
||||
void cblas_zsyrk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans,
|
||||
blasint N, blasint K, double *alpha, double *A, blasint lda, double *beta, double *C, blasint ldc);
|
||||
|
||||
void cblas_ssyr2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans,
|
||||
blasint N, blasint K, float alpha, float *A, blasint lda, float *B, blasint ldb, float beta, float *C, blasint ldc);
|
||||
void cblas_dsyr2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans,
|
||||
blasint N, blasint K, double alpha, double *A, blasint lda, double *B, blasint ldb, double beta, double *C, blasint ldc);
|
||||
void cblas_csyr2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans,
|
||||
blasint N, blasint K, float *alpha, float *A, blasint lda, float *B, blasint ldb, float *beta, float *C, blasint ldc);
|
||||
void cblas_zsyr2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans,
|
||||
blasint N, blasint K, double *alpha, double *A, blasint lda, double *B, blasint ldb, double *beta, double *C, blasint ldc);
|
||||
|
||||
void cblas_strmm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA,
|
||||
enum CBLAS_DIAG Diag, blasint M, blasint N, float alpha, float *A, blasint lda, float *B, blasint ldb);
|
||||
void cblas_dtrmm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA,
|
||||
enum CBLAS_DIAG Diag, blasint M, blasint N, double alpha, double *A, blasint lda, double *B, blasint ldb);
|
||||
void cblas_ctrmm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA,
|
||||
enum CBLAS_DIAG Diag, blasint M, blasint N, float *alpha, float *A, blasint lda, float *B, blasint ldb);
|
||||
void cblas_ztrmm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA,
|
||||
enum CBLAS_DIAG Diag, blasint M, blasint N, double *alpha, double *A, blasint lda, double *B, blasint ldb);
|
||||
|
||||
void cblas_strsm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA,
|
||||
enum CBLAS_DIAG Diag, blasint M, blasint N, float alpha, float *A, blasint lda, float *B, blasint ldb);
|
||||
void cblas_dtrsm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA,
|
||||
enum CBLAS_DIAG Diag, blasint M, blasint N, double alpha, double *A, blasint lda, double *B, blasint ldb);
|
||||
void cblas_ctrsm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA,
|
||||
enum CBLAS_DIAG Diag, blasint M, blasint N, float *alpha, float *A, blasint lda, float *B, blasint ldb);
|
||||
void cblas_ztrsm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA,
|
||||
enum CBLAS_DIAG Diag, blasint M, blasint N, double *alpha, double *A, blasint lda, double *B, blasint ldb);
|
||||
|
||||
void cblas_chemm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, blasint M, blasint N,
|
||||
float *alpha, float *A, blasint lda, float *B, blasint ldb, float *beta, float *C, blasint ldc);
|
||||
void cblas_zhemm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, blasint M, blasint N,
|
||||
double *alpha, double *A, blasint lda, double *B, blasint ldb, double *beta, double *C, blasint ldc);
|
||||
|
||||
void cblas_cherk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans, blasint N, blasint K,
|
||||
float alpha, float *A, blasint lda, float beta, float *C, blasint ldc);
|
||||
void cblas_zherk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans, blasint N, blasint K,
|
||||
double alpha, double *A, blasint lda, double beta, double *C, blasint ldc);
|
||||
|
||||
void cblas_cher2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans, blasint N, blasint K,
|
||||
float *alpha, float *A, blasint lda, float *B, blasint ldb, float beta, float *C, blasint ldc);
|
||||
void cblas_zher2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans, blasint N, blasint K,
|
||||
double *alpha, double *A, blasint lda, double *B, blasint ldb, double beta, double *C, blasint ldc);
|
||||
|
||||
void cblas_xerbla(blasint p, char *rout, char *form, ...);
|
||||
|
||||
/*** BLAS extensions ***/
|
||||
|
||||
void cblas_saxpby(blasint n, float alpha, float *x, blasint incx,float beta, float *y, blasint incy);
|
||||
|
||||
void cblas_daxpby(blasint n, double alpha, double *x, blasint incx,double beta, double *y, blasint incy);
|
||||
|
||||
void cblas_caxpby(blasint n, float *alpha, float *x, blasint incx,float *beta, float *y, blasint incy);
|
||||
|
||||
void cblas_zaxpby(blasint n, double *alpha, double *x, blasint incx,double *beta, double *y, blasint incy);
|
||||
|
||||
void cblas_somatcopy( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows, blasint ccols, float calpha, float *a,
|
||||
blasint clda, float *b, blasint cldb);
|
||||
void cblas_domatcopy( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows, blasint ccols, double calpha, double *a,
|
||||
blasint clda, double *b, blasint cldb);
|
||||
void cblas_comatcopy( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows, blasint ccols, void* calpha, void* a,
|
||||
blasint clda, void *b, blasint cldb);
|
||||
void cblas_zomatcopy( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows, blasint ccols, void* calpha, void* a,
|
||||
blasint clda, void *b, blasint cldb);
|
||||
|
||||
void cblas_simatcopy( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows, blasint ccols, float calpha, float *a,
|
||||
blasint clda, blasint cldb);
|
||||
void cblas_dimatcopy( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows, blasint ccols, double calpha, double *a,
|
||||
blasint clda, blasint cldb);
|
||||
void cblas_cimatcopy( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows, blasint ccols, float* calpha, float* a,
|
||||
blasint clda, blasint cldb);
|
||||
void cblas_zimatcopy( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows, blasint ccols, double* calpha, double* a,
|
||||
blasint clda, blasint cldb);
|
||||
|
||||
void cblas_sgeadd( enum CBLAS_ORDER CORDER, blasint crows, blasint ccols, float calpha, float *a, blasint clda, float cbeta,
|
||||
float *c, blasint cldc);
|
||||
void cblas_dgeadd( enum CBLAS_ORDER CORDER, blasint crows, blasint ccols, double calpha, double *a, blasint clda, double cbeta,
|
||||
double *c, blasint cldc);
|
||||
void cblas_cgeadd( enum CBLAS_ORDER CORDER, blasint crows, blasint ccols, float *calpha, float *a, blasint clda, float *cbeta,
|
||||
float *c, blasint cldc);
|
||||
void cblas_zgeadd( enum CBLAS_ORDER CORDER, blasint crows, blasint ccols, double *calpha, double *a, blasint clda, double *cbeta,
|
||||
double *c, blasint cldc);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif /* __cplusplus */
|
||||
|
||||
#endif
|
||||
115
cmake/arch.cmake
Normal file
115
cmake/arch.cmake
Normal file
@@ -0,0 +1,115 @@
|
||||
##
|
||||
## Author: Hank Anderson <hank@statease.com>
|
||||
## Description: Ported from portion of OpenBLAS/Makefile.system
|
||||
## Sets various variables based on architecture.
|
||||
|
||||
if (${ARCH} STREQUAL "x86" OR ${ARCH} STREQUAL "x86_64")
|
||||
|
||||
if (${ARCH} STREQUAL "x86")
|
||||
if (NOT BINARY)
|
||||
set(NO_BINARY_MODE 1)
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (NOT NO_EXPRECISION)
|
||||
if (${F_COMPILER} MATCHES "GFORTRAN")
|
||||
# N.B. I'm not sure if CMake differentiates between GCC and LSB -hpa
|
||||
if (${CMAKE_C_COMPILER} STREQUAL "GNU" OR ${CMAKE_C_COMPILER} STREQUAL "LSB")
|
||||
set(EXPRECISION 1)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DEXPRECISION -m128bit-long-double")
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -m128bit-long-double")
|
||||
endif ()
|
||||
if (${CMAKE_C_COMPILER} STREQUAL "Clang")
|
||||
set(EXPRECISION 1)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DEXPRECISION")
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -m128bit-long-double")
|
||||
endif ()
|
||||
endif ()
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (${CMAKE_C_COMPILER} STREQUAL "Intel")
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -wd981")
|
||||
endif ()
|
||||
|
||||
if (USE_OPENMP)
|
||||
|
||||
if (${CMAKE_C_COMPILER} STREQUAL "GNU" OR ${CMAKE_C_COMPILER} STREQUAL "LSB")
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -fopenmp")
|
||||
endif ()
|
||||
|
||||
if (${CMAKE_C_COMPILER} STREQUAL "Clang")
|
||||
message(WARNING "Clang doesn't support OpenMP yet.")
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -fopenmp")
|
||||
endif ()
|
||||
|
||||
if (${CMAKE_C_COMPILER} STREQUAL "Intel")
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -openmp")
|
||||
endif ()
|
||||
|
||||
if (${CMAKE_C_COMPILER} STREQUAL "PGI")
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -mp")
|
||||
endif ()
|
||||
|
||||
if (${CMAKE_C_COMPILER} STREQUAL "OPEN64")
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -mp")
|
||||
set(CEXTRALIB "${CEXTRALIB} -lstdc++")
|
||||
endif ()
|
||||
|
||||
if (${CMAKE_C_COMPILER} STREQUAL "PATHSCALE")
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -mp")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
|
||||
if (DYNAMIC_ARCH)
|
||||
if (${ARCH} STREQUAL "x86")
|
||||
set(DYNAMIC_CORE "KATMAI COPPERMINE NORTHWOOD PRESCOTT BANIAS CORE2 PENRYN DUNNINGTON NEHALEM ATHLON OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO")
|
||||
endif ()
|
||||
|
||||
if (${ARCH} STREQUAL "x86_64")
|
||||
set(DYNAMIC_CORE "PRESCOTT CORE2 PENRYN DUNNINGTON NEHALEM OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO")
|
||||
if (NOT NO_AVX)
|
||||
set(DYNAMIC_CORE "${DYNAMIC_CORE} SANDYBRIDGE BULLDOZER PILEDRIVER STEAMROLLER")
|
||||
endif ()
|
||||
if (NOT NO_AVX2)
|
||||
set(DYNAMIC_CORE "${DYNAMIC_CORE} HASWELL")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (NOT DYNAMIC_CORE)
|
||||
unset(DYNAMIC_ARCH)
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (${ARCH} STREQUAL "ia64")
|
||||
set(NO_BINARY_MODE 1)
|
||||
set(BINARY_DEFINED 1)
|
||||
|
||||
if (${F_COMPILER} MATCHES "GFORTRAN")
|
||||
if (${CMAKE_C_COMPILER} STREQUAL "GNU")
|
||||
# EXPRECISION = 1
|
||||
# CCOMMON_OPT += -DEXPRECISION
|
||||
endif ()
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (${ARCH} STREQUAL "mips64")
|
||||
set(NO_BINARY_MODE 1)
|
||||
endif ()
|
||||
|
||||
if (${ARCH} STREQUAL "alpha")
|
||||
set(NO_BINARY_MODE 1)
|
||||
set(BINARY_DEFINED 1)
|
||||
endif ()
|
||||
|
||||
if (${ARCH} STREQUAL "arm")
|
||||
set(NO_BINARY_MODE 1)
|
||||
set(BINARY_DEFINED 1)
|
||||
endif ()
|
||||
|
||||
if (${ARCH} STREQUAL "arm64")
|
||||
set(NO_BINARY_MODE 1)
|
||||
set(BINARY_DEFINED 1)
|
||||
endif ()
|
||||
|
||||
89
cmake/c_check.cmake
Normal file
89
cmake/c_check.cmake
Normal file
@@ -0,0 +1,89 @@
|
||||
##
|
||||
## Author: Hank Anderson <hank@statease.com>
|
||||
## Description: Ported from the OpenBLAS/c_check perl script.
|
||||
## This is triggered by prebuild.cmake and runs before any of the code is built.
|
||||
## Creates config.h and Makefile.conf.
|
||||
|
||||
# CMake vars set by this file:
|
||||
# OSNAME (use CMAKE_SYSTEM_NAME)
|
||||
# ARCH
|
||||
# C_COMPILER (use CMAKE_C_COMPILER)
|
||||
# BINARY32
|
||||
# BINARY64
|
||||
# FU
|
||||
# CROSS_SUFFIX
|
||||
# CROSS
|
||||
# CEXTRALIB
|
||||
|
||||
# Defines set by this file:
|
||||
# OS_
|
||||
# ARCH_
|
||||
# C_
|
||||
# __32BIT__
|
||||
# __64BIT__
|
||||
# FUNDERSCORE
|
||||
# PTHREAD_CREATE_FUNC
|
||||
|
||||
# N.B. c_check (and ctest.c) is not cross-platform, so instead try to use CMake variables.
|
||||
set(FU "")
|
||||
if(APPLE)
|
||||
set(FU "_")
|
||||
elseif(MSVC)
|
||||
set(FU "_")
|
||||
elseif(UNIX)
|
||||
set(FU "")
|
||||
endif()
|
||||
|
||||
# Convert CMake vars into the format that OpenBLAS expects
|
||||
string(TOUPPER ${CMAKE_SYSTEM_NAME} HOST_OS)
|
||||
if (${HOST_OS} STREQUAL "WINDOWS")
|
||||
set(HOST_OS WINNT)
|
||||
endif ()
|
||||
|
||||
# added by hpa - check size of void ptr to detect 64-bit compile
|
||||
if (NOT DEFINED BINARY)
|
||||
set(BINARY 32)
|
||||
if (CMAKE_SIZEOF_VOID_P EQUAL 8)
|
||||
set(BINARY 64)
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (BINARY EQUAL 64)
|
||||
set(BINARY64 1)
|
||||
else ()
|
||||
set(BINARY32 1)
|
||||
endif ()
|
||||
|
||||
# CMake docs define these:
|
||||
# CMAKE_SYSTEM_PROCESSOR - The name of the CPU CMake is building for.
|
||||
# CMAKE_HOST_SYSTEM_PROCESSOR - The name of the CPU CMake is running on.
|
||||
#
|
||||
# TODO: CMAKE_SYSTEM_PROCESSOR doesn't seem to be correct - instead get it from the compiler a la c_check
|
||||
set(ARCH ${CMAKE_SYSTEM_PROCESSOR})
|
||||
if (${ARCH} STREQUAL "AMD64")
|
||||
set(ARCH "x86_64")
|
||||
endif ()
|
||||
|
||||
# If you are using a 32-bit compiler on a 64-bit system CMAKE_SYSTEM_PROCESSOR will be wrong
|
||||
if (${ARCH} STREQUAL "x86_64" AND BINARY EQUAL 32)
|
||||
set(ARCH x86)
|
||||
endif ()
|
||||
|
||||
if (${ARCH} STREQUAL "X86")
|
||||
set(ARCH x86)
|
||||
endif ()
|
||||
|
||||
set(COMPILER_ID ${CMAKE_CXX_COMPILER_ID})
|
||||
if (${COMPILER_ID} STREQUAL "GNU")
|
||||
set(COMPILER_ID "GCC")
|
||||
endif ()
|
||||
|
||||
string(TOUPPER ${ARCH} UC_ARCH)
|
||||
|
||||
file(WRITE ${TARGET_CONF}
|
||||
"#define OS_${HOST_OS}\t1\n"
|
||||
"#define ARCH_${UC_ARCH}\t1\n"
|
||||
"#define C_${COMPILER_ID}\t1\n"
|
||||
"#define __${BINARY}BIT__\t1\n"
|
||||
"#define FUNDERSCORE\t${FU}\n")
|
||||
|
||||
103
cmake/cc.cmake
Normal file
103
cmake/cc.cmake
Normal file
@@ -0,0 +1,103 @@
|
||||
##
|
||||
## Author: Hank Anderson <hank@statease.com>
|
||||
## Description: Ported from portion of OpenBLAS/Makefile.system
|
||||
## Sets C related variables.
|
||||
|
||||
if (${CMAKE_C_COMPILER} STREQUAL "GNU" OR ${CMAKE_C_COMPILER} STREQUAL "LSB" OR ${CMAKE_C_COMPILER} STREQUAL "Clang")
|
||||
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -Wall")
|
||||
set(COMMON_PROF "${COMMON_PROF} -fno-inline")
|
||||
set(NO_UNINITIALIZED_WARN "-Wno-uninitialized")
|
||||
|
||||
if (QUIET_MAKE)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} ${NO_UNINITIALIZED_WARN} -Wno-unused")
|
||||
endif ()
|
||||
|
||||
if (NO_BINARY_MODE)
|
||||
|
||||
if (${ARCH} STREQUAL "mips64")
|
||||
if (BINARY64)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -mabi=64")
|
||||
else ()
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -mabi=n32")
|
||||
endif ()
|
||||
set(BINARY_DEFINED 1)
|
||||
endif ()
|
||||
|
||||
if (${CORE} STREQUAL "LOONGSON3A")
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -march=mips64")
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -march=mips64")
|
||||
endif ()
|
||||
|
||||
if (${CORE} STREQUAL "LOONGSON3B")
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -march=mips64")
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -march=mips64")
|
||||
endif ()
|
||||
|
||||
if (${OSNAME} STREQUAL "AIX")
|
||||
set(BINARY_DEFINED 1)
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (NOT BINARY_DEFINED)
|
||||
if (BINARY64)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -m64")
|
||||
else ()
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -m32")
|
||||
endif ()
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (${CMAKE_C_COMPILER} STREQUAL "PGI")
|
||||
if (BINARY64)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -tp p7-64")
|
||||
else ()
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -tp p7")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (${CMAKE_C_COMPILER} STREQUAL "PATHSCALE")
|
||||
if (BINARY64)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -m64")
|
||||
else ()
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -m32")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (${CMAKE_C_COMPILER} STREQUAL "OPEN64")
|
||||
|
||||
if (${ARCH} STREQUAL "mips64")
|
||||
|
||||
if (NOT BINARY64)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -n32")
|
||||
else ()
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -n64")
|
||||
endif ()
|
||||
|
||||
if (${CORE} STREQUAL "LOONGSON3A")
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -loongson3 -static")
|
||||
endif ()
|
||||
|
||||
if (${CORE} STREQUAL "LOONGSON3B")
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -loongson3 -static")
|
||||
endif ()
|
||||
|
||||
else ()
|
||||
|
||||
if (BINARY64)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -m32")
|
||||
else ()
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -m64")
|
||||
endif ()
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (${CMAKE_C_COMPILER} STREQUAL "SUN")
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -w")
|
||||
if (${ARCH} STREQUAL "x86")
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -m32")
|
||||
else ()
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -m64")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
60
cmake/export.cmake
Normal file
60
cmake/export.cmake
Normal file
@@ -0,0 +1,60 @@
|
||||
|
||||
#Only generate .def for dll on MSVC
|
||||
if(MSVC)
|
||||
|
||||
set_source_files_properties(${OpenBLAS_DEF_FILE} PROPERTIES GENERATED 1)
|
||||
|
||||
if (NOT DEFINED ARCH)
|
||||
set(ARCH_IN "x86_64")
|
||||
else()
|
||||
set(ARCH_IN ${ARCH})
|
||||
endif()
|
||||
|
||||
if (${CORE} STREQUAL "generic")
|
||||
set(ARCH_IN "GENERIC")
|
||||
endif ()
|
||||
|
||||
if (NOT DEFINED EXPRECISION)
|
||||
set(EXPRECISION_IN 0)
|
||||
else()
|
||||
set(EXPRECISION_IN ${EXPRECISION})
|
||||
endif()
|
||||
|
||||
if (NOT DEFINED NO_CBLAS)
|
||||
set(NO_CBLAS_IN 0)
|
||||
else()
|
||||
set(NO_CBLAS_IN ${NO_CBLAS})
|
||||
endif()
|
||||
|
||||
if (NOT DEFINED NO_LAPACK)
|
||||
set(NO_LAPACK_IN 0)
|
||||
else()
|
||||
set(NO_LAPACK_IN ${NO_LAPACK})
|
||||
endif()
|
||||
|
||||
if (NOT DEFINED NO_LAPACKE)
|
||||
set(NO_LAPACKE_IN 0)
|
||||
else()
|
||||
set(NO_LAPACKE_IN ${NO_LAPACKE})
|
||||
endif()
|
||||
|
||||
if (NOT DEFINED NEED2UNDERSCORES)
|
||||
set(NEED2UNDERSCORES_IN 0)
|
||||
else()
|
||||
set(NEED2UNDERSCORES_IN ${NEED2UNDERSCORES})
|
||||
endif()
|
||||
|
||||
if (NOT DEFINED ONLY_CBLAS)
|
||||
set(ONLY_CBLAS_IN 0)
|
||||
else()
|
||||
set(ONLY_CBLAS_IN ${ONLY_CBLAS})
|
||||
endif()
|
||||
|
||||
add_custom_command(
|
||||
TARGET ${OpenBLAS_LIBNAME} PRE_LINK
|
||||
COMMAND perl
|
||||
ARGS "${CMAKE_SOURCE_DIR}/exports/gensymbol" "win2k" "${ARCH_IN}" "dummy" "${EXPRECISION_IN}" "${NO_CBLAS_IN}" "${NO_LAPACK_IN}" "${NO_LAPACKE_IN}" "${NEED2UNDERSCORES_IN}" "${ONLY_CBLAS_IN}" "${SYMBOLPREFIX}" "${SYMBOLSUFFIX}" > "${PROJECT_BINARY_DIR}/openblas.def"
|
||||
COMMENT "Create openblas.def file"
|
||||
VERBATIM)
|
||||
|
||||
endif()
|
||||
66
cmake/f_check.cmake
Normal file
66
cmake/f_check.cmake
Normal file
@@ -0,0 +1,66 @@
|
||||
##
|
||||
## Author: Hank Anderson <hank@statease.com>
|
||||
## Copyright: (c) Stat-Ease, Inc.
|
||||
## Created: 12/29/14
|
||||
## Last Modified: 12/29/14
|
||||
## Description: Ported from the OpenBLAS/f_check perl script.
|
||||
## This is triggered by prebuild.cmake and runs before any of the code is built.
|
||||
## Appends Fortran information to config.h and Makefile.conf.
|
||||
|
||||
# CMake vars set by this file:
|
||||
# F_COMPILER
|
||||
# FC
|
||||
# BU
|
||||
# NOFORTRAN
|
||||
# NEED2UNDERSCORES
|
||||
# FEXTRALIB
|
||||
|
||||
# Defines set by this file:
|
||||
# BUNDERSCORE
|
||||
# NEEDBUNDERSCORE
|
||||
# NEED2UNDERSCORES
|
||||
|
||||
if (MSVC)
|
||||
# had to do this for MSVC, else CMake automatically assumes I have ifort... -hpa
|
||||
include(CMakeForceCompiler)
|
||||
CMAKE_FORCE_Fortran_COMPILER(gfortran GNU)
|
||||
endif ()
|
||||
|
||||
if (NOT NO_LAPACK)
|
||||
enable_language(Fortran)
|
||||
else()
|
||||
include(CMakeForceCompiler)
|
||||
CMAKE_FORCE_Fortran_COMPILER(gfortran GNU)
|
||||
endif()
|
||||
|
||||
if (NOT ONLY_CBLAS)
|
||||
# N.B. f_check is not cross-platform, so instead try to use CMake variables
|
||||
# run f_check (appends to TARGET files)
|
||||
# message(STATUS "Running f_check...")
|
||||
# execute_process(COMMAND perl f_check ${TARGET_MAKE} ${TARGET_CONF} ${CMAKE_Fortran_COMPILER}
|
||||
# WORKING_DIRECTORY ${PROJECT_SOURCE_DIR})
|
||||
|
||||
# TODO: detect whether underscore needed, set #defines and BU appropriately - use try_compile
|
||||
# TODO: set FEXTRALIB flags a la f_check?
|
||||
|
||||
set(BU "_")
|
||||
file(APPEND ${TARGET_CONF}
|
||||
"#define BUNDERSCORE _\n"
|
||||
"#define NEEDBUNDERSCORE 1\n"
|
||||
"#define NEED2UNDERSCORES 0\n")
|
||||
|
||||
else ()
|
||||
|
||||
#When we only build CBLAS, we set NOFORTRAN=2
|
||||
set(NOFORTRAN 2)
|
||||
set(NO_FBLAS 1)
|
||||
#set(F_COMPILER GFORTRAN) # CMake handles the fortran compiler
|
||||
set(BU "_")
|
||||
file(APPEND ${TARGET_CONF}
|
||||
"#define BUNDERSCORE _\n"
|
||||
"#define NEEDBUNDERSCORE 1\n")
|
||||
endif()
|
||||
|
||||
get_filename_component(F_COMPILER ${CMAKE_Fortran_COMPILER} NAME_WE)
|
||||
string(TOUPPER ${F_COMPILER} F_COMPILER)
|
||||
|
||||
200
cmake/fc.cmake
Normal file
200
cmake/fc.cmake
Normal file
@@ -0,0 +1,200 @@
|
||||
##
|
||||
## Author: Hank Anderson <hank@statease.com>
|
||||
## Description: Ported from portion of OpenBLAS/Makefile.system
|
||||
## Sets Fortran related variables.
|
||||
|
||||
if (${F_COMPILER} STREQUAL "G77")
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_G77")
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -Wall")
|
||||
if (NOT NO_BINARY_MODE)
|
||||
if (BINARY64)
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -m64")
|
||||
else ()
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -m32")
|
||||
endif ()
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (${F_COMPILER} STREQUAL "G95")
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_G95")
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -Wall")
|
||||
if (NOT NO_BINARY_MODE)
|
||||
if (BINARY64)
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -m64")
|
||||
else ()
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -m32")
|
||||
endif ()
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (${F_COMPILER} STREQUAL "GFORTRAN")
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_GFORT")
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -Wall")
|
||||
#Don't include -lgfortran, when NO_LAPACK=1 or lsbcc
|
||||
if (NOT NO_LAPACK)
|
||||
set(EXTRALIB "{EXTRALIB} -lgfortran")
|
||||
endif ()
|
||||
if (NO_BINARY_MODE)
|
||||
if (${ARCH} STREQUAL "mips64")
|
||||
if (BINARY64)
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -mabi=64")
|
||||
else ()
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -mabi=n32")
|
||||
endif ()
|
||||
endif ()
|
||||
else ()
|
||||
if (BINARY64)
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -m64")
|
||||
if (INTERFACE64)
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -fdefault-integer-8")
|
||||
endif ()
|
||||
else ()
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -m32")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (USE_OPENMP)
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -fopenmp")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (${F_COMPILER} STREQUAL "INTEL")
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_INTEL")
|
||||
if (INTERFACE64)
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -i8")
|
||||
endif ()
|
||||
if (USE_OPENMP)
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -openmp")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (${F_COMPILER} STREQUAL "FUJITSU")
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_FUJITSU")
|
||||
if (USE_OPENMP)
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -openmp")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (${F_COMPILER} STREQUAL "IBM")
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_IBM")
|
||||
# FCOMMON_OPT += -qarch=440
|
||||
if (BINARY64)
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -q64")
|
||||
if (INTERFACE64)
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -qintsize=8")
|
||||
endif ()
|
||||
else ()
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -q32")
|
||||
endif ()
|
||||
if (USE_OPENMP)
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -openmp")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (${F_COMPILER} STREQUAL "PGI")
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_PGI")
|
||||
set(COMMON_PROF "${COMMON_PROF} -DPGICOMPILER")
|
||||
if (BINARY64)
|
||||
if (INTERFACE64)
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -i8")
|
||||
endif ()
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -tp p7-64")
|
||||
else ()
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -tp p7")
|
||||
endif ()
|
||||
if (USE_OPENMP)
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -mp")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (${F_COMPILER} STREQUAL "PATHSCALE")
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_PATHSCALE")
|
||||
if (BINARY64)
|
||||
if (INTERFACE64)
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -i8")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (NOT ${ARCH} STREQUAL "mips64")
|
||||
if (NOT BINARY64)
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -m32")
|
||||
else ()
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -m64")
|
||||
endif ()
|
||||
else ()
|
||||
if (BINARY64)
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -mabi=64")
|
||||
else ()
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -mabi=n32")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (USE_OPENMP)
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -mp")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (${F_COMPILER} STREQUAL "OPEN64")
|
||||
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_OPEN64")
|
||||
if (BINARY64)
|
||||
if (INTERFACE64)
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -i8")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (${ARCH} STREQUAL "mips64")
|
||||
|
||||
if (NOT BINARY64)
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -n32")
|
||||
else ()
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -n64")
|
||||
endif ()
|
||||
|
||||
if (${CORE} STREQUAL "LOONGSON3A")
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -loongson3 -static")
|
||||
endif ()
|
||||
|
||||
if (${CORE} STREQUAL "LOONGSON3B")
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -loongson3 -static")
|
||||
endif ()
|
||||
else ()
|
||||
if (NOT BINARY64)
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -m32")
|
||||
else ()
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -m64")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (USE_OPENMP)
|
||||
set(FEXTRALIB "${FEXTRALIB} -lstdc++")
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -mp")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (${F_COMPILER} STREQUAL "SUN")
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_SUN")
|
||||
if (${ARCH} STREQUAL "x86")
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -m32")
|
||||
else ()
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -m64")
|
||||
endif ()
|
||||
if (USE_OPENMP)
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -xopenmp=parallel")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (${F_COMPILER} STREQUAL "COMPAQ")
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_COMPAQ")
|
||||
if (USE_OPENMP)
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -openmp")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
# from the root Makefile - this is for lapack-netlib to compile the correct secnd file.
|
||||
if (${F_COMPILER} STREQUAL "GFORTRAN")
|
||||
set(TIMER "INT_ETIME")
|
||||
else ()
|
||||
set(TIMER "NONE")
|
||||
endif ()
|
||||
|
||||
165
cmake/kernel.cmake
Normal file
165
cmake/kernel.cmake
Normal file
@@ -0,0 +1,165 @@
|
||||
# helper functions for the kernel CMakeLists.txt
|
||||
|
||||
|
||||
# Set the default filenames for L1 objects. Most of these will be overriden by the appropriate KERNEL file.
|
||||
macro(SetDefaultL1)
|
||||
set(SAMAXKERNEL amax.S)
|
||||
set(DAMAXKERNEL amax.S)
|
||||
set(QAMAXKERNEL amax.S)
|
||||
set(CAMAXKERNEL zamax.S)
|
||||
set(ZAMAXKERNEL zamax.S)
|
||||
set(XAMAXKERNEL zamax.S)
|
||||
set(SAMINKERNEL amin.S)
|
||||
set(DAMINKERNEL amin.S)
|
||||
set(QAMINKERNEL amin.S)
|
||||
set(CAMINKERNEL zamin.S)
|
||||
set(ZAMINKERNEL zamin.S)
|
||||
set(XAMINKERNEL zamin.S)
|
||||
set(SMAXKERNEL max.S)
|
||||
set(DMAXKERNEL max.S)
|
||||
set(QMAXKERNEL max.S)
|
||||
set(SMINKERNEL min.S)
|
||||
set(DMINKERNEL min.S)
|
||||
set(QMINKERNEL min.S)
|
||||
set(ISAMAXKERNEL iamax.S)
|
||||
set(IDAMAXKERNEL iamax.S)
|
||||
set(IQAMAXKERNEL iamax.S)
|
||||
set(ICAMAXKERNEL izamax.S)
|
||||
set(IZAMAXKERNEL izamax.S)
|
||||
set(IXAMAXKERNEL izamax.S)
|
||||
set(ISAMINKERNEL iamin.S)
|
||||
set(IDAMINKERNEL iamin.S)
|
||||
set(IQAMINKERNEL iamin.S)
|
||||
set(ICAMINKERNEL izamin.S)
|
||||
set(IZAMINKERNEL izamin.S)
|
||||
set(IXAMINKERNEL izamin.S)
|
||||
set(ISMAXKERNEL iamax.S)
|
||||
set(IDMAXKERNEL iamax.S)
|
||||
set(IQMAXKERNEL iamax.S)
|
||||
set(ISMINKERNEL iamin.S)
|
||||
set(IDMINKERNEL iamin.S)
|
||||
set(IQMINKERNEL iamin.S)
|
||||
set(SASUMKERNEL asum.S)
|
||||
set(DASUMKERNEL asum.S)
|
||||
set(CASUMKERNEL zasum.S)
|
||||
set(ZASUMKERNEL zasum.S)
|
||||
set(QASUMKERNEL asum.S)
|
||||
set(XASUMKERNEL zasum.S)
|
||||
set(SAXPYKERNEL axpy.S)
|
||||
set(DAXPYKERNEL axpy.S)
|
||||
set(CAXPYKERNEL zaxpy.S)
|
||||
set(ZAXPYKERNEL zaxpy.S)
|
||||
set(QAXPYKERNEL axpy.S)
|
||||
set(XAXPYKERNEL zaxpy.S)
|
||||
set(SCOPYKERNEL copy.S)
|
||||
set(DCOPYKERNEL copy.S)
|
||||
set(CCOPYKERNEL zcopy.S)
|
||||
set(ZCOPYKERNEL zcopy.S)
|
||||
set(QCOPYKERNEL copy.S)
|
||||
set(XCOPYKERNEL zcopy.S)
|
||||
set(SDOTKERNEL dot.S)
|
||||
set(DDOTKERNEL dot.S)
|
||||
set(CDOTKERNEL zdot.S)
|
||||
set(ZDOTKERNEL zdot.S)
|
||||
set(QDOTKERNEL dot.S)
|
||||
set(XDOTKERNEL zdot.S)
|
||||
set(SNRM2KERNEL nrm2.S)
|
||||
set(DNRM2KERNEL nrm2.S)
|
||||
set(QNRM2KERNEL nrm2.S)
|
||||
set(CNRM2KERNEL znrm2.S)
|
||||
set(ZNRM2KERNEL znrm2.S)
|
||||
set(XNRM2KERNEL znrm2.S)
|
||||
set(SROTKERNEL rot.S)
|
||||
set(DROTKERNEL rot.S)
|
||||
set(QROTKERNEL rot.S)
|
||||
set(CROTKERNEL zrot.S)
|
||||
set(ZROTKERNEL zrot.S)
|
||||
set(XROTKERNEL zrot.S)
|
||||
set(SSCALKERNEL scal.S)
|
||||
set(DSCALKERNEL scal.S)
|
||||
set(CSCALKERNEL zscal.S)
|
||||
set(ZSCALKERNEL zscal.S)
|
||||
set(QSCALKERNEL scal.S)
|
||||
set(XSCALKERNEL zscal.S)
|
||||
set(SSWAPKERNEL swap.S)
|
||||
set(DSWAPKERNEL swap.S)
|
||||
set(CSWAPKERNEL zswap.S)
|
||||
set(ZSWAPKERNEL zswap.S)
|
||||
set(QSWAPKERNEL swap.S)
|
||||
set(XSWAPKERNEL zswap.S)
|
||||
set(SGEMVNKERNEL gemv_n.S)
|
||||
set(SGEMVTKERNEL gemv_t.S)
|
||||
set(DGEMVNKERNEL gemv_n.S)
|
||||
set(DGEMVTKERNEL gemv_t.S)
|
||||
set(CGEMVNKERNEL zgemv_n.S)
|
||||
set(CGEMVTKERNEL zgemv_t.S)
|
||||
set(ZGEMVNKERNEL zgemv_n.S)
|
||||
set(ZGEMVTKERNEL zgemv_t.S)
|
||||
set(QGEMVNKERNEL gemv_n.S)
|
||||
set(QGEMVTKERNEL gemv_t.S)
|
||||
set(XGEMVNKERNEL zgemv_n.S)
|
||||
set(XGEMVTKERNEL zgemv_t.S)
|
||||
set(SCABS_KERNEL ../generic/cabs.c)
|
||||
set(DCABS_KERNEL ../generic/cabs.c)
|
||||
set(QCABS_KERNEL ../generic/cabs.c)
|
||||
set(LSAME_KERNEL ../generic/lsame.c)
|
||||
set(SAXPBYKERNEL ../arm/axpby.c)
|
||||
set(DAXPBYKERNEL ../arm/axpby.c)
|
||||
set(CAXPBYKERNEL ../arm/zaxpby.c)
|
||||
set(ZAXPBYKERNEL ../arm/zaxpby.c)
|
||||
endmacro ()
|
||||
|
||||
macro(SetDefaultL2)
|
||||
set(SGEMVNKERNEL gemv_n.S)
|
||||
set(SGEMVTKERNEL gemv_t.S)
|
||||
set(DGEMVNKERNEL gemv_n.S)
|
||||
set(DGEMVTKERNEL gemv_t.S)
|
||||
set(CGEMVNKERNEL zgemv_n.S)
|
||||
set(CGEMVTKERNEL zgemv_t.S)
|
||||
set(ZGEMVNKERNEL zgemv_n.S)
|
||||
set(ZGEMVTKERNEL zgemv_t.S)
|
||||
set(QGEMVNKERNEL gemv_n.S)
|
||||
set(QGEMVTKERNEL gemv_t.S)
|
||||
set(XGEMVNKERNEL zgemv_n.S)
|
||||
set(XGEMVTKERNEL zgemv_t.S)
|
||||
set(SGERKERNEL ../generic/ger.c)
|
||||
set(DGERKERNEL ../generic/ger.c)
|
||||
set(QGERKERNEL ../generic/ger.c)
|
||||
set(CGERUKERNEL ../generic/zger.c)
|
||||
set(CGERCKERNEL ../generic/zger.c)
|
||||
set(ZGERUKERNEL ../generic/zger.c)
|
||||
set(ZGERCKERNEL ../generic/zger.c)
|
||||
set(XGERUKERNEL ../generic/zger.c)
|
||||
set(XGERCKERNEL ../generic/zger.c)
|
||||
set(SSYMV_U_KERNEL ../generic/symv_k.c)
|
||||
set(SSYMV_L_KERNEL ../generic/symv_k.c)
|
||||
set(DSYMV_U_KERNEL ../generic/symv_k.c)
|
||||
set(DSYMV_L_KERNEL ../generic/symv_k.c)
|
||||
set(QSYMV_U_KERNEL ../generic/symv_k.c)
|
||||
set(QSYMV_L_KERNEL ../generic/symv_k.c)
|
||||
set(CSYMV_U_KERNEL ../generic/zsymv_k.c)
|
||||
set(CSYMV_L_KERNEL ../generic/zsymv_k.c)
|
||||
set(ZSYMV_U_KERNEL ../generic/zsymv_k.c)
|
||||
set(ZSYMV_L_KERNEL ../generic/zsymv_k.c)
|
||||
set(XSYMV_U_KERNEL ../generic/zsymv_k.c)
|
||||
set(XSYMV_L_KERNEL ../generic/zsymv_k.c)
|
||||
set(CHEMV_U_KERNEL ../generic/zhemv_k.c)
|
||||
set(CHEMV_L_KERNEL ../generic/zhemv_k.c)
|
||||
set(CHEMV_V_KERNEL ../generic/zhemv_k.c)
|
||||
set(CHEMV_M_KERNEL ../generic/zhemv_k.c)
|
||||
set(ZHEMV_U_KERNEL ../generic/zhemv_k.c)
|
||||
set(ZHEMV_L_KERNEL ../generic/zhemv_k.c)
|
||||
set(ZHEMV_V_KERNEL ../generic/zhemv_k.c)
|
||||
set(ZHEMV_M_KERNEL ../generic/zhemv_k.c)
|
||||
set(XHEMV_U_KERNEL ../generic/zhemv_k.c)
|
||||
set(XHEMV_L_KERNEL ../generic/zhemv_k.c)
|
||||
set(XHEMV_V_KERNEL ../generic/zhemv_k.c)
|
||||
set(XHEMV_M_KERNEL ../generic/zhemv_k.c)
|
||||
endmacro ()
|
||||
|
||||
macro(SetDefaultL3)
|
||||
set(SGEADD_KERNEL ../generic/geadd.c)
|
||||
set(DGEADD_KERNEL ../generic/geadd.c)
|
||||
set(CGEADD_KERNEL ../generic/zgeadd.c)
|
||||
set(ZGEADD_KERNEL ../generic/zgeadd.c)
|
||||
endmacro ()
|
||||
347
cmake/lapack.cmake
Normal file
347
cmake/lapack.cmake
Normal file
@@ -0,0 +1,347 @@
|
||||
# Sources for compiling lapack-netlib. Can't use CMakeLists.txt because lapack-netlib already has its own cmake files.
|
||||
|
||||
set(ALLAUX
|
||||
ilaenv.f ieeeck.f lsamen.f xerbla_array.f iparmq.f
|
||||
ilaprec.f ilatrans.f ilauplo.f iladiag.f chla_transtype.f
|
||||
../INSTALL/ilaver.f ../INSTALL/slamch.f
|
||||
)
|
||||
|
||||
set(SCLAUX
|
||||
sbdsdc.f
|
||||
sbdsqr.f sdisna.f slabad.f slacpy.f sladiv.f slae2.f slaebz.f
|
||||
slaed0.f slaed1.f slaed2.f slaed3.f slaed4.f slaed5.f slaed6.f
|
||||
slaed7.f slaed8.f slaed9.f slaeda.f slaev2.f slagtf.f
|
||||
slagts.f slamrg.f slanst.f
|
||||
slapy2.f slapy3.f slarnv.f
|
||||
slarra.f slarrb.f slarrc.f slarrd.f slarre.f slarrf.f slarrj.f
|
||||
slarrk.f slarrr.f slaneg.f
|
||||
slartg.f slaruv.f slas2.f slascl.f
|
||||
slasd0.f slasd1.f slasd2.f slasd3.f slasd4.f slasd5.f slasd6.f
|
||||
slasd7.f slasd8.f slasda.f slasdq.f slasdt.f
|
||||
slaset.f slasq1.f slasq2.f slasq3.f slasq4.f slasq5.f slasq6.f
|
||||
slasr.f slasrt.f slassq.f slasv2.f spttrf.f sstebz.f sstedc.f
|
||||
ssteqr.f ssterf.f slaisnan.f sisnan.f
|
||||
slartgp.f slartgs.f
|
||||
../INSTALL/second_${TIMER}.f
|
||||
)
|
||||
|
||||
set(DZLAUX
|
||||
dbdsdc.f
|
||||
dbdsqr.f ddisna.f dlabad.f dlacpy.f dladiv.f dlae2.f dlaebz.f
|
||||
dlaed0.f dlaed1.f dlaed2.f dlaed3.f dlaed4.f dlaed5.f dlaed6.f
|
||||
dlaed7.f dlaed8.f dlaed9.f dlaeda.f dlaev2.f dlagtf.f
|
||||
dlagts.f dlamrg.f dlanst.f
|
||||
dlapy2.f dlapy3.f dlarnv.f
|
||||
dlarra.f dlarrb.f dlarrc.f dlarrd.f dlarre.f dlarrf.f dlarrj.f
|
||||
dlarrk.f dlarrr.f dlaneg.f
|
||||
dlartg.f dlaruv.f dlas2.f dlascl.f
|
||||
dlasd0.f dlasd1.f dlasd2.f dlasd3.f dlasd4.f dlasd5.f dlasd6.f
|
||||
dlasd7.f dlasd8.f dlasda.f dlasdq.f dlasdt.f
|
||||
dlaset.f dlasq1.f dlasq2.f dlasq3.f dlasq4.f dlasq5.f dlasq6.f
|
||||
dlasr.f dlasrt.f dlassq.f dlasv2.f dpttrf.f dstebz.f dstedc.f
|
||||
dsteqr.f dsterf.f dlaisnan.f disnan.f
|
||||
dlartgp.f dlartgs.f
|
||||
../INSTALL/dlamch.f ../INSTALL/dsecnd_${TIMER}.f
|
||||
)
|
||||
|
||||
set(SLASRC
|
||||
sgbbrd.f sgbcon.f sgbequ.f sgbrfs.f sgbsv.f
|
||||
sgbsvx.f sgbtf2.f sgbtrf.f sgbtrs.f sgebak.f sgebal.f sgebd2.f
|
||||
sgebrd.f sgecon.f sgeequ.f sgees.f sgeesx.f sgeev.f sgeevx.f
|
||||
sgegs.f sgegv.f sgehd2.f sgehrd.f sgelq2.f sgelqf.f
|
||||
sgels.f sgelsd.f sgelss.f sgelsx.f sgelsy.f sgeql2.f sgeqlf.f
|
||||
sgeqp3.f sgeqpf.f sgeqr2.f sgeqr2p.f sgeqrf.f sgeqrfp.f sgerfs.f
|
||||
sgerq2.f sgerqf.f sgesc2.f sgesdd.f sgesvd.f sgesvx.f
|
||||
sgetc2.f sgetri.f
|
||||
sggbak.f sggbal.f sgges.f sggesx.f sggev.f sggevx.f
|
||||
sggglm.f sgghrd.f sgglse.f sggqrf.f
|
||||
sggrqf.f sggsvd.f sggsvp.f sgtcon.f sgtrfs.f sgtsv.f
|
||||
sgtsvx.f sgttrf.f sgttrs.f sgtts2.f shgeqz.f
|
||||
shsein.f shseqr.f slabrd.f slacon.f slacn2.f
|
||||
slaein.f slaexc.f slag2.f slags2.f slagtm.f slagv2.f slahqr.f
|
||||
slahrd.f slahr2.f slaic1.f slaln2.f slals0.f slalsa.f slalsd.f
|
||||
slangb.f slange.f slangt.f slanhs.f slansb.f slansp.f
|
||||
slansy.f slantb.f slantp.f slantr.f slanv2.f
|
||||
slapll.f slapmt.f
|
||||
slaqgb.f slaqge.f slaqp2.f slaqps.f slaqsb.f slaqsp.f slaqsy.f
|
||||
slaqr0.f slaqr1.f slaqr2.f slaqr3.f slaqr4.f slaqr5.f
|
||||
slaqtr.f slar1v.f slar2v.f ilaslr.f ilaslc.f
|
||||
slarf.f slarfb.f slarfg.f slarfgp.f slarft.f slarfx.f slargv.f
|
||||
slarrv.f slartv.f
|
||||
slarz.f slarzb.f slarzt.f slasy2.f slasyf.f slasyf_rook.f
|
||||
slatbs.f slatdf.f slatps.f slatrd.f slatrs.f slatrz.f slatzm.f
|
||||
sopgtr.f sopmtr.f sorg2l.f sorg2r.f
|
||||
sorgbr.f sorghr.f sorgl2.f sorglq.f sorgql.f sorgqr.f sorgr2.f
|
||||
sorgrq.f sorgtr.f sorm2l.f sorm2r.f
|
||||
sormbr.f sormhr.f sorml2.f sormlq.f sormql.f sormqr.f sormr2.f
|
||||
sormr3.f sormrq.f sormrz.f sormtr.f spbcon.f spbequ.f spbrfs.f
|
||||
spbstf.f spbsv.f spbsvx.f
|
||||
spbtf2.f spbtrf.f spbtrs.f spocon.f spoequ.f sporfs.f sposv.f
|
||||
sposvx.f spstrf.f spstf2.f
|
||||
sppcon.f sppequ.f
|
||||
spprfs.f sppsv.f sppsvx.f spptrf.f spptri.f spptrs.f sptcon.f
|
||||
spteqr.f sptrfs.f sptsv.f sptsvx.f spttrs.f sptts2.f srscl.f
|
||||
ssbev.f ssbevd.f ssbevx.f ssbgst.f ssbgv.f ssbgvd.f ssbgvx.f
|
||||
ssbtrd.f sspcon.f sspev.f sspevd.f sspevx.f sspgst.f
|
||||
sspgv.f sspgvd.f sspgvx.f ssprfs.f sspsv.f sspsvx.f ssptrd.f
|
||||
ssptrf.f ssptri.f ssptrs.f sstegr.f sstein.f sstev.f sstevd.f sstevr.f
|
||||
sstevx.f
|
||||
ssycon.f ssyev.f ssyevd.f ssyevr.f ssyevx.f ssygs2.f
|
||||
ssygst.f ssygv.f ssygvd.f ssygvx.f ssyrfs.f ssysv.f ssysvx.f
|
||||
ssytd2.f ssytf2.f ssytrd.f ssytrf.f ssytri.f ssytri2.f ssytri2x.f
|
||||
ssyswapr.f ssytrs.f ssytrs2.f ssyconv.f
|
||||
ssytf2_rook.f ssytrf_rook.f ssytrs_rook.f
|
||||
ssytri_rook.f ssycon_rook.f ssysv_rook.f
|
||||
stbcon.f
|
||||
stbrfs.f stbtrs.f stgevc.f stgex2.f stgexc.f stgsen.f
|
||||
stgsja.f stgsna.f stgsy2.f stgsyl.f stpcon.f stprfs.f stptri.f
|
||||
stptrs.f
|
||||
strcon.f strevc.f strexc.f strrfs.f strsen.f strsna.f strsyl.f
|
||||
strtrs.f stzrqf.f stzrzf.f sstemr.f
|
||||
slansf.f spftrf.f spftri.f spftrs.f ssfrk.f stfsm.f stftri.f stfttp.f
|
||||
stfttr.f stpttf.f stpttr.f strttf.f strttp.f
|
||||
sgejsv.f sgesvj.f sgsvj0.f sgsvj1.f
|
||||
sgeequb.f ssyequb.f spoequb.f sgbequb.f
|
||||
sbbcsd.f slapmr.f sorbdb.f sorbdb1.f sorbdb2.f sorbdb3.f sorbdb4.f
|
||||
sorbdb5.f sorbdb6.f sorcsd.f sorcsd2by1.f
|
||||
sgeqrt.f sgeqrt2.f sgeqrt3.f sgemqrt.f
|
||||
stpqrt.f stpqrt2.f stpmqrt.f stprfb.f spotri.f
|
||||
)
|
||||
|
||||
set(DSLASRC spotrs.f)
|
||||
|
||||
set(CLASRC
|
||||
cbdsqr.f cgbbrd.f cgbcon.f cgbequ.f cgbrfs.f cgbsv.f cgbsvx.f
|
||||
cgbtf2.f cgbtrf.f cgbtrs.f cgebak.f cgebal.f cgebd2.f cgebrd.f
|
||||
cgecon.f cgeequ.f cgees.f cgeesx.f cgeev.f cgeevx.f
|
||||
cgegs.f cgegv.f cgehd2.f cgehrd.f cgelq2.f cgelqf.f
|
||||
cgels.f cgelsd.f cgelss.f cgelsx.f cgelsy.f cgeql2.f cgeqlf.f cgeqp3.f
|
||||
cgeqpf.f cgeqr2.f cgeqr2p.f cgeqrf.f cgeqrfp.f cgerfs.f
|
||||
cgerq2.f cgerqf.f cgesc2.f cgesdd.f cgesvd.f
|
||||
cgesvx.f cgetc2.f cgetri.f
|
||||
cggbak.f cggbal.f cgges.f cggesx.f cggev.f cggevx.f cggglm.f
|
||||
cgghrd.f cgglse.f cggqrf.f cggrqf.f
|
||||
cggsvd.f cggsvp.f
|
||||
cgtcon.f cgtrfs.f cgtsv.f cgtsvx.f cgttrf.f cgttrs.f cgtts2.f chbev.f
|
||||
chbevd.f chbevx.f chbgst.f chbgv.f chbgvd.f chbgvx.f chbtrd.f
|
||||
checon.f cheev.f cheevd.f cheevr.f cheevx.f chegs2.f chegst.f
|
||||
chegv.f chegvd.f chegvx.f cherfs.f chesv.f chesvx.f chetd2.f
|
||||
chetf2.f chetrd.f
|
||||
chetrf.f chetri.f chetri2.f chetri2x.f cheswapr.f
|
||||
chetrs.f chetrs2.f
|
||||
chetf2_rook.f chetrf_rook.f chetri_rook.f chetrs_rook.f checon_rook.f chesv_rook.f
|
||||
chgeqz.f chpcon.f chpev.f chpevd.f
|
||||
chpevx.f chpgst.f chpgv.f chpgvd.f chpgvx.f chprfs.f chpsv.f
|
||||
chpsvx.f
|
||||
chptrd.f chptrf.f chptri.f chptrs.f chsein.f chseqr.f clabrd.f
|
||||
clacgv.f clacon.f clacn2.f clacp2.f clacpy.f clacrm.f clacrt.f cladiv.f
|
||||
claed0.f claed7.f claed8.f
|
||||
claein.f claesy.f claev2.f clags2.f clagtm.f
|
||||
clahef.f clahef_rook.f clahqr.f
|
||||
clahrd.f clahr2.f claic1.f clals0.f clalsa.f clalsd.f clangb.f clange.f clangt.f
|
||||
clanhb.f clanhe.f
|
||||
clanhp.f clanhs.f clanht.f clansb.f clansp.f clansy.f clantb.f
|
||||
clantp.f clantr.f clapll.f clapmt.f clarcm.f claqgb.f claqge.f
|
||||
claqhb.f claqhe.f claqhp.f claqp2.f claqps.f claqsb.f
|
||||
claqr0.f claqr1.f claqr2.f claqr3.f claqr4.f claqr5.f
|
||||
claqsp.f claqsy.f clar1v.f clar2v.f ilaclr.f ilaclc.f
|
||||
clarf.f clarfb.f clarfg.f clarft.f clarfgp.f
|
||||
clarfx.f clargv.f clarnv.f clarrv.f clartg.f clartv.f
|
||||
clarz.f clarzb.f clarzt.f clascl.f claset.f clasr.f classq.f
|
||||
clasyf.f clasyf_rook.f clatbs.f clatdf.f clatps.f clatrd.f clatrs.f clatrz.f
|
||||
clatzm.f cpbcon.f cpbequ.f cpbrfs.f cpbstf.f cpbsv.f
|
||||
cpbsvx.f cpbtf2.f cpbtrf.f cpbtrs.f cpocon.f cpoequ.f cporfs.f
|
||||
cposv.f cposvx.f cpstrf.f cpstf2.f
|
||||
cppcon.f cppequ.f cpprfs.f cppsv.f cppsvx.f cpptrf.f cpptri.f cpptrs.f
|
||||
cptcon.f cpteqr.f cptrfs.f cptsv.f cptsvx.f cpttrf.f cpttrs.f cptts2.f
|
||||
crot.f cspcon.f csprfs.f cspsv.f
|
||||
cspsvx.f csptrf.f csptri.f csptrs.f csrscl.f cstedc.f
|
||||
cstegr.f cstein.f csteqr.f
|
||||
csycon.f
|
||||
csyrfs.f csysv.f csysvx.f csytf2.f csytrf.f csytri.f csytri2.f csytri2x.f
|
||||
csyswapr.f csytrs.f csytrs2.f csyconv.f
|
||||
csytf2_rook.f csytrf_rook.f csytrs_rook.f
|
||||
csytri_rook.f csycon_rook.f csysv_rook.f
|
||||
ctbcon.f ctbrfs.f ctbtrs.f ctgevc.f ctgex2.f
|
||||
ctgexc.f ctgsen.f ctgsja.f ctgsna.f ctgsy2.f ctgsyl.f ctpcon.f
|
||||
ctprfs.f ctptri.f
|
||||
ctptrs.f ctrcon.f ctrevc.f ctrexc.f ctrrfs.f ctrsen.f ctrsna.f
|
||||
ctrsyl.f ctrtrs.f ctzrqf.f ctzrzf.f cung2l.f cung2r.f
|
||||
cungbr.f cunghr.f cungl2.f cunglq.f cungql.f cungqr.f cungr2.f
|
||||
cungrq.f cungtr.f cunm2l.f cunm2r.f cunmbr.f cunmhr.f cunml2.f
|
||||
cunmlq.f cunmql.f cunmqr.f cunmr2.f cunmr3.f cunmrq.f cunmrz.f
|
||||
cunmtr.f cupgtr.f cupmtr.f icmax1.f scsum1.f cstemr.f
|
||||
chfrk.f ctfttp.f clanhf.f cpftrf.f cpftri.f cpftrs.f ctfsm.f ctftri.f
|
||||
ctfttr.f ctpttf.f ctpttr.f ctrttf.f ctrttp.f
|
||||
cgeequb.f cgbequb.f csyequb.f cpoequb.f cheequb.f
|
||||
cbbcsd.f clapmr.f cunbdb.f cunbdb1.f cunbdb2.f cunbdb3.f cunbdb4.f
|
||||
cunbdb5.f cunbdb6.f cuncsd.f cuncsd2by1.f
|
||||
cgeqrt.f cgeqrt2.f cgeqrt3.f cgemqrt.f
|
||||
ctpqrt.f ctpqrt2.f ctpmqrt.f ctprfb.f cpotri.f
|
||||
)
|
||||
|
||||
set(ZCLASRC cpotrs.f)
|
||||
|
||||
set(DLASRC
|
||||
dgbbrd.f dgbcon.f dgbequ.f dgbrfs.f dgbsv.f
|
||||
dgbsvx.f dgbtf2.f dgbtrf.f dgbtrs.f dgebak.f dgebal.f dgebd2.f
|
||||
dgebrd.f dgecon.f dgeequ.f dgees.f dgeesx.f dgeev.f dgeevx.f
|
||||
dgegs.f dgegv.f dgehd2.f dgehrd.f dgelq2.f dgelqf.f
|
||||
dgels.f dgelsd.f dgelss.f dgelsx.f dgelsy.f dgeql2.f dgeqlf.f
|
||||
dgeqp3.f dgeqpf.f dgeqr2.f dgeqr2p.f dgeqrf.f dgeqrfp.f dgerfs.f
|
||||
dgerq2.f dgerqf.f dgesc2.f dgesdd.f dgesvd.f dgesvx.f
|
||||
dgetc2.f dgetri.f
|
||||
dggbak.f dggbal.f dgges.f dggesx.f dggev.f dggevx.f
|
||||
dggglm.f dgghrd.f dgglse.f dggqrf.f
|
||||
dggrqf.f dggsvd.f dggsvp.f dgtcon.f dgtrfs.f dgtsv.f
|
||||
dgtsvx.f dgttrf.f dgttrs.f dgtts2.f dhgeqz.f
|
||||
dhsein.f dhseqr.f dlabrd.f dlacon.f dlacn2.f
|
||||
dlaein.f dlaexc.f dlag2.f dlags2.f dlagtm.f dlagv2.f dlahqr.f
|
||||
dlahrd.f dlahr2.f dlaic1.f dlaln2.f dlals0.f dlalsa.f dlalsd.f
|
||||
dlangb.f dlange.f dlangt.f dlanhs.f dlansb.f dlansp.f
|
||||
dlansy.f dlantb.f dlantp.f dlantr.f dlanv2.f
|
||||
dlapll.f dlapmt.f
|
||||
dlaqgb.f dlaqge.f dlaqp2.f dlaqps.f dlaqsb.f dlaqsp.f dlaqsy.f
|
||||
dlaqr0.f dlaqr1.f dlaqr2.f dlaqr3.f dlaqr4.f dlaqr5.f
|
||||
dlaqtr.f dlar1v.f dlar2v.f iladlr.f iladlc.f
|
||||
dlarf.f dlarfb.f dlarfg.f dlarfgp.f dlarft.f dlarfx.f
|
||||
dlargv.f dlarrv.f dlartv.f
|
||||
dlarz.f dlarzb.f dlarzt.f dlasy2.f dlasyf.f dlasyf_rook.f
|
||||
dlatbs.f dlatdf.f dlatps.f dlatrd.f dlatrs.f dlatrz.f dlatzm.f
|
||||
dopgtr.f dopmtr.f dorg2l.f dorg2r.f
|
||||
dorgbr.f dorghr.f dorgl2.f dorglq.f dorgql.f dorgqr.f dorgr2.f
|
||||
dorgrq.f dorgtr.f dorm2l.f dorm2r.f
|
||||
dormbr.f dormhr.f dorml2.f dormlq.f dormql.f dormqr.f dormr2.f
|
||||
dormr3.f dormrq.f dormrz.f dormtr.f dpbcon.f dpbequ.f dpbrfs.f
|
||||
dpbstf.f dpbsv.f dpbsvx.f
|
||||
dpbtf2.f dpbtrf.f dpbtrs.f dpocon.f dpoequ.f dporfs.f dposv.f
|
||||
dposvx.f dpotrs.f dpstrf.f dpstf2.f
|
||||
dppcon.f dppequ.f
|
||||
dpprfs.f dppsv.f dppsvx.f dpptrf.f dpptri.f dpptrs.f dptcon.f
|
||||
dpteqr.f dptrfs.f dptsv.f dptsvx.f dpttrs.f dptts2.f drscl.f
|
||||
dsbev.f dsbevd.f dsbevx.f dsbgst.f dsbgv.f dsbgvd.f dsbgvx.f
|
||||
dsbtrd.f dspcon.f dspev.f dspevd.f dspevx.f dspgst.f
|
||||
dspgv.f dspgvd.f dspgvx.f dsprfs.f dspsv.f dspsvx.f dsptrd.f
|
||||
dsptrf.f dsptri.f dsptrs.f dstegr.f dstein.f dstev.f dstevd.f dstevr.f
|
||||
dstevx.f
|
||||
dsycon.f dsyev.f dsyevd.f dsyevr.f
|
||||
dsyevx.f dsygs2.f dsygst.f dsygv.f dsygvd.f dsygvx.f dsyrfs.f
|
||||
dsysv.f dsysvx.f
|
||||
dsytd2.f dsytf2.f dsytrd.f dsytrf.f dsytri.f dsytri2.f dsytri2x.f
|
||||
dsyswapr.f dsytrs.f dsytrs2.f dsyconv.f
|
||||
dsytf2_rook.f dsytrf_rook.f dsytrs_rook.f
|
||||
dsytri_rook.f dsycon_rook.f dsysv_rook.f
|
||||
dtbcon.f dtbrfs.f dtbtrs.f dtgevc.f dtgex2.f dtgexc.f dtgsen.f
|
||||
dtgsja.f dtgsna.f dtgsy2.f dtgsyl.f dtpcon.f dtprfs.f dtptri.f
|
||||
dtptrs.f
|
||||
dtrcon.f dtrevc.f dtrexc.f dtrrfs.f dtrsen.f dtrsna.f dtrsyl.f
|
||||
dtrtrs.f dtzrqf.f dtzrzf.f dstemr.f
|
||||
dsgesv.f dsposv.f dlag2s.f slag2d.f dlat2s.f
|
||||
dlansf.f dpftrf.f dpftri.f dpftrs.f dsfrk.f dtfsm.f dtftri.f dtfttp.f
|
||||
dtfttr.f dtpttf.f dtpttr.f dtrttf.f dtrttp.f
|
||||
dgejsv.f dgesvj.f dgsvj0.f dgsvj1.f
|
||||
dgeequb.f dsyequb.f dpoequb.f dgbequb.f
|
||||
dbbcsd.f dlapmr.f dorbdb.f dorbdb1.f dorbdb2.f dorbdb3.f dorbdb4.f
|
||||
dorbdb5.f dorbdb6.f dorcsd.f dorcsd2by1.f
|
||||
dgeqrt.f dgeqrt2.f dgeqrt3.f dgemqrt.f
|
||||
dtpqrt.f dtpqrt2.f dtpmqrt.f dtprfb.f dpotri.f
|
||||
)
|
||||
|
||||
set(ZLASRC
|
||||
zbdsqr.f zgbbrd.f zgbcon.f zgbequ.f zgbrfs.f zgbsv.f zgbsvx.f
|
||||
zgbtf2.f zgbtrf.f zgbtrs.f zgebak.f zgebal.f zgebd2.f zgebrd.f
|
||||
zgecon.f zgeequ.f zgees.f zgeesx.f zgeev.f zgeevx.f
|
||||
zgegs.f zgegv.f zgehd2.f zgehrd.f zgelq2.f zgelqf.f
|
||||
zgels.f zgelsd.f zgelss.f zgelsx.f zgelsy.f zgeql2.f zgeqlf.f zgeqp3.f
|
||||
zgeqpf.f zgeqr2.f zgeqr2p.f zgeqrf.f zgeqrfp.f zgerfs.f zgerq2.f zgerqf.f
|
||||
zgesc2.f zgesdd.f zgesvd.f zgesvx.f zgetc2.f
|
||||
zgetri.f
|
||||
zggbak.f zggbal.f zgges.f zggesx.f zggev.f zggevx.f zggglm.f
|
||||
zgghrd.f zgglse.f zggqrf.f zggrqf.f
|
||||
zggsvd.f zggsvp.f
|
||||
zgtcon.f zgtrfs.f zgtsv.f zgtsvx.f zgttrf.f zgttrs.f zgtts2.f zhbev.f
|
||||
zhbevd.f zhbevx.f zhbgst.f zhbgv.f zhbgvd.f zhbgvx.f zhbtrd.f
|
||||
zhecon.f zheev.f zheevd.f zheevr.f zheevx.f zhegs2.f zhegst.f
|
||||
zhegv.f zhegvd.f zhegvx.f zherfs.f zhesv.f zhesvx.f zhetd2.f
|
||||
zhetf2.f zhetrd.f
|
||||
zhetrf.f zhetri.f zhetri2.f zhetri2x.f zheswapr.f
|
||||
zhetrs.f zhetrs2.f
|
||||
zhetf2_rook.f zhetrf_rook.f zhetri_rook.f zhetrs_rook.f zhecon_rook.f zhesv_rook.f
|
||||
zhgeqz.f zhpcon.f zhpev.f zhpevd.f
|
||||
zhpevx.f zhpgst.f zhpgv.f zhpgvd.f zhpgvx.f zhprfs.f zhpsv.f
|
||||
zhpsvx.f
|
||||
zhptrd.f zhptrf.f zhptri.f zhptrs.f zhsein.f zhseqr.f zlabrd.f
|
||||
zlacgv.f zlacon.f zlacn2.f zlacp2.f zlacpy.f zlacrm.f zlacrt.f zladiv.f
|
||||
zlaed0.f zlaed7.f zlaed8.f
|
||||
zlaein.f zlaesy.f zlaev2.f zlags2.f zlagtm.f
|
||||
zlahef.f zlahef_rook.f zlahqr.f
|
||||
zlahrd.f zlahr2.f zlaic1.f zlals0.f zlalsa.f zlalsd.f zlangb.f zlange.f
|
||||
zlangt.f zlanhb.f
|
||||
zlanhe.f
|
||||
zlanhp.f zlanhs.f zlanht.f zlansb.f zlansp.f zlansy.f zlantb.f
|
||||
zlantp.f zlantr.f zlapll.f zlapmt.f zlaqgb.f zlaqge.f
|
||||
zlaqhb.f zlaqhe.f zlaqhp.f zlaqp2.f zlaqps.f zlaqsb.f
|
||||
zlaqr0.f zlaqr1.f zlaqr2.f zlaqr3.f zlaqr4.f zlaqr5.f
|
||||
zlaqsp.f zlaqsy.f zlar1v.f zlar2v.f ilazlr.f ilazlc.f
|
||||
zlarcm.f zlarf.f zlarfb.f
|
||||
zlarfg.f zlarft.f zlarfgp.f
|
||||
zlarfx.f zlargv.f zlarnv.f zlarrv.f zlartg.f zlartv.f
|
||||
zlarz.f zlarzb.f zlarzt.f zlascl.f zlaset.f zlasr.f
|
||||
zlassq.f zlasyf.f zlasyf_rook.f
|
||||
zlatbs.f zlatdf.f zlatps.f zlatrd.f zlatrs.f zlatrz.f zlatzm.f
|
||||
zpbcon.f zpbequ.f zpbrfs.f zpbstf.f zpbsv.f
|
||||
zpbsvx.f zpbtf2.f zpbtrf.f zpbtrs.f zpocon.f zpoequ.f zporfs.f
|
||||
zposv.f zposvx.f zpotrs.f zpstrf.f zpstf2.f
|
||||
zppcon.f zppequ.f zpprfs.f zppsv.f zppsvx.f zpptrf.f zpptri.f zpptrs.f
|
||||
zptcon.f zpteqr.f zptrfs.f zptsv.f zptsvx.f zpttrf.f zpttrs.f zptts2.f
|
||||
zrot.f zspcon.f zsprfs.f zspsv.f
|
||||
zspsvx.f zsptrf.f zsptri.f zsptrs.f zdrscl.f zstedc.f
|
||||
zstegr.f zstein.f zsteqr.f
|
||||
zsycon.f
|
||||
zsyrfs.f zsysv.f zsysvx.f zsytf2.f zsytrf.f zsytri.f zsytri2.f zsytri2x.f
|
||||
zsyswapr.f zsytrs.f zsytrs2.f zsyconv.f
|
||||
zsytf2_rook.f zsytrf_rook.f zsytrs_rook.f
|
||||
zsytri_rook.f zsycon_rook.f zsysv_rook.f
|
||||
ztbcon.f ztbrfs.f ztbtrs.f ztgevc.f ztgex2.f
|
||||
ztgexc.f ztgsen.f ztgsja.f ztgsna.f ztgsy2.f ztgsyl.f ztpcon.f
|
||||
ztprfs.f ztptri.f
|
||||
ztptrs.f ztrcon.f ztrevc.f ztrexc.f ztrrfs.f ztrsen.f ztrsna.f
|
||||
ztrsyl.f ztrtrs.f ztzrqf.f ztzrzf.f zung2l.f
|
||||
zung2r.f zungbr.f zunghr.f zungl2.f zunglq.f zungql.f zungqr.f zungr2.f
|
||||
zungrq.f zungtr.f zunm2l.f zunm2r.f zunmbr.f zunmhr.f zunml2.f
|
||||
zunmlq.f zunmql.f zunmqr.f zunmr2.f zunmr3.f zunmrq.f zunmrz.f
|
||||
zunmtr.f zupgtr.f
|
||||
zupmtr.f izmax1.f dzsum1.f zstemr.f
|
||||
zcgesv.f zcposv.f zlag2c.f clag2z.f zlat2c.f
|
||||
zhfrk.f ztfttp.f zlanhf.f zpftrf.f zpftri.f zpftrs.f ztfsm.f ztftri.f
|
||||
ztfttr.f ztpttf.f ztpttr.f ztrttf.f ztrttp.f
|
||||
zgeequb.f zgbequb.f zsyequb.f zpoequb.f zheequb.f
|
||||
zbbcsd.f zlapmr.f zunbdb.f zunbdb1.f zunbdb2.f zunbdb3.f zunbdb4.f
|
||||
zunbdb5.f zunbdb6.f zuncsd.f zuncsd2by1.f
|
||||
zgeqrt.f zgeqrt2.f zgeqrt3.f zgemqrt.f
|
||||
ztpqrt.f ztpqrt2.f ztpmqrt.f ztprfb.f zpotri.f
|
||||
)
|
||||
|
||||
set(LA_REL_SRC ${ALLAUX})
|
||||
if (BUILD_SINGLE)
|
||||
list(APPEND LA_REL_SRC ${SLASRC} ${DSLASRC} ${SCLAUX})
|
||||
endif ()
|
||||
|
||||
if (BUILD_DOUBLE)
|
||||
list(APPEND LA_REL_SRC ${DLASRC} ${DSLASRC} ${DZLAUX})
|
||||
endif ()
|
||||
|
||||
if (BUILD_COMPLEX)
|
||||
list(APPEND LA_REL_SRC ${CLASRC} ${ZCLASRC} ${SCLAUX})
|
||||
endif ()
|
||||
|
||||
if (BUILD_COMPLEX16)
|
||||
list(APPEND LA_REL_SRC ${ZLASRC} ${ZCLASRC} ${DZLAUX})
|
||||
endif ()
|
||||
|
||||
# add lapack-netlib folder to the sources
|
||||
set(LA_SOURCES "")
|
||||
foreach (LA_FILE ${LA_REL_SRC})
|
||||
list(APPEND LA_SOURCES "${NETLIB_LAPACK_DIR}/SRC/${LA_FILE}")
|
||||
endforeach ()
|
||||
set_source_files_properties(${LA_SOURCES} PROPERTIES COMPILE_FLAGS "${LAPACK_FFLAGS}")
|
||||
2067
cmake/lapacke.cmake
Normal file
2067
cmake/lapacke.cmake
Normal file
File diff suppressed because it is too large
Load Diff
104
cmake/os.cmake
Normal file
104
cmake/os.cmake
Normal file
@@ -0,0 +1,104 @@
|
||||
##
|
||||
## Author: Hank Anderson <hank@statease.com>
|
||||
## Description: Ported from portion of OpenBLAS/Makefile.system
|
||||
## Detects the OS and sets appropriate variables.
|
||||
|
||||
if (${CMAKE_SYSTEM_NAME} STREQUAL "Darwin")
|
||||
set(ENV{MACOSX_DEPLOYMENT_TARGET} "10.2") # TODO: should be exported as an env var
|
||||
set(MD5SUM "md5 -r")
|
||||
endif ()
|
||||
|
||||
if (${CMAKE_SYSTEM_NAME} STREQUAL "FreeBSD")
|
||||
set(MD5SUM "md5 -r")
|
||||
endif ()
|
||||
|
||||
if (${CMAKE_SYSTEM_NAME} STREQUAL "NetBSD")
|
||||
set(MD5SUM "md5 -n")
|
||||
endif ()
|
||||
|
||||
if (${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
|
||||
set(EXTRALIB "${EXTRALIB} -lm")
|
||||
set(NO_EXPRECISION 1)
|
||||
endif ()
|
||||
|
||||
if (${CMAKE_SYSTEM_NAME} STREQUAL "AIX")
|
||||
set(EXTRALIB "${EXTRALIB} -lm")
|
||||
endif ()
|
||||
|
||||
# TODO: this is probably meant for mingw, not other windows compilers
|
||||
if (${CMAKE_SYSTEM_NAME} STREQUAL "Windows")
|
||||
|
||||
set(NEED_PIC 0)
|
||||
set(NO_EXPRECISION 1)
|
||||
|
||||
set(EXTRALIB "${EXTRALIB} -defaultlib:advapi32")
|
||||
|
||||
# probably not going to use these
|
||||
set(SUFFIX "obj")
|
||||
set(PSUFFIX "pobj")
|
||||
set(LIBSUFFIX "a")
|
||||
|
||||
if (${CMAKE_C_COMPILER_ID} STREQUAL "Clang")
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DMS_ABI")
|
||||
endif ()
|
||||
|
||||
if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU")
|
||||
|
||||
# Test for supporting MS_ABI
|
||||
# removed string parsing in favor of CMake's version comparison -hpa
|
||||
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
|
||||
if (${GCC_VERSION} VERSION_GREATER 4.7 OR ${GCC_VERSION} VERSION_EQUAL 4.7)
|
||||
# GCC Version >=4.7
|
||||
# It is compatible with MSVC ABI.
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DMS_ABI")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
# Ensure the correct stack alignment on Win32
|
||||
# http://permalink.gmane.org/gmane.comp.lib.openblas.general/97
|
||||
if (${ARCH} STREQUAL "x86")
|
||||
if (NOT MSVC AND NOT ${CMAKE_C_COMPILER_ID} STREQUAL "Clang")
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -mincoming-stack-boundary=2")
|
||||
endif ()
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -mincoming-stack-boundary=2")
|
||||
endif ()
|
||||
|
||||
endif ()
|
||||
|
||||
if (${CMAKE_SYSTEM_NAME} STREQUAL "Interix")
|
||||
set(NEED_PIC 0)
|
||||
set(NO_EXPRECISION 1)
|
||||
|
||||
set(INTERIX_TOOL_DIR STREQUAL "/opt/gcc.3.3/i586-pc-interix3/bin")
|
||||
endif ()
|
||||
|
||||
if (CYGWIN)
|
||||
set(NEED_PIC 0)
|
||||
set(NO_EXPRECISION 1)
|
||||
endif ()
|
||||
|
||||
if (NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Windows" AND NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Interix")
|
||||
if (SMP)
|
||||
set(EXTRALIB "${EXTRALIB} -lpthread")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (QUAD_PRECISION)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DQUAD_PRECISION")
|
||||
set(NO_EXPRECISION 1)
|
||||
endif ()
|
||||
|
||||
if (${ARCH} STREQUAL "x86")
|
||||
set(NO_EXPRECISION 1)
|
||||
endif ()
|
||||
|
||||
if (UTEST_CHECK)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DUTEST_CHECK")
|
||||
set(SANITY_CHECK 1)
|
||||
endif ()
|
||||
|
||||
if (SANITY_CHECK)
|
||||
# TODO: need some way to get $(*F) (target filename)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DSANITY_CHECK -DREFNAME=$(*F)f${BU}")
|
||||
endif ()
|
||||
|
||||
113
cmake/prebuild.cmake
Normal file
113
cmake/prebuild.cmake
Normal file
@@ -0,0 +1,113 @@
|
||||
##
|
||||
## Author: Hank Anderson <hank@statease.com>
|
||||
## Description: Ported from OpenBLAS/Makefile.prebuild
|
||||
## This is triggered by system.cmake and runs before any of the code is built.
|
||||
## Creates config.h and Makefile.conf by first running the c_check perl script (which creates those files).
|
||||
## Next it runs f_check and appends some fortran information to the files.
|
||||
## Finally it runs getarch and getarch_2nd for even more environment information.
|
||||
|
||||
# CMake vars set by this file:
|
||||
# CORE
|
||||
# LIBCORE
|
||||
# NUM_CORES
|
||||
# HAVE_MMX
|
||||
# HAVE_SSE
|
||||
# HAVE_SSE2
|
||||
# HAVE_SSE3
|
||||
# MAKE
|
||||
# SGEMM_UNROLL_M
|
||||
# SGEMM_UNROLL_N
|
||||
# DGEMM_UNROLL_M
|
||||
# DGEMM_UNROLL_M
|
||||
# QGEMM_UNROLL_N
|
||||
# QGEMM_UNROLL_N
|
||||
# CGEMM_UNROLL_M
|
||||
# CGEMM_UNROLL_M
|
||||
# ZGEMM_UNROLL_N
|
||||
# ZGEMM_UNROLL_N
|
||||
# XGEMM_UNROLL_M
|
||||
# XGEMM_UNROLL_N
|
||||
# CGEMM3M_UNROLL_M
|
||||
# CGEMM3M_UNROLL_N
|
||||
# ZGEMM3M_UNROLL_M
|
||||
# ZGEMM3M_UNROLL_M
|
||||
# XGEMM3M_UNROLL_N
|
||||
# XGEMM3M_UNROLL_N
|
||||
|
||||
# CPUIDEMU = ../../cpuid/table.o
|
||||
|
||||
if (DEFINED CPUIDEMU)
|
||||
set(EXFLAGS "-DCPUIDEMU -DVENDOR=99")
|
||||
endif ()
|
||||
|
||||
if (DEFINED TARGET_CORE)
|
||||
# set the C flags for just this file
|
||||
set(GETARCH2_FLAGS "-DBUILD_KERNEL")
|
||||
set(TARGET_MAKE "Makefile_kernel.conf")
|
||||
set(TARGET_CONF "config_kernel.h")
|
||||
else()
|
||||
set(TARGET_MAKE "Makefile.conf")
|
||||
set(TARGET_CONF "config.h")
|
||||
endif ()
|
||||
|
||||
include("${CMAKE_SOURCE_DIR}/cmake/c_check.cmake")
|
||||
|
||||
if (NOT NOFORTRAN)
|
||||
include("${CMAKE_SOURCE_DIR}/cmake/f_check.cmake")
|
||||
endif ()
|
||||
|
||||
# compile getarch
|
||||
set(GETARCH_SRC
|
||||
${CMAKE_SOURCE_DIR}/getarch.c
|
||||
${CPUIDEMO}
|
||||
)
|
||||
|
||||
if (NOT MSVC)
|
||||
list(APPEND GETARCH_SRC ${CMAKE_SOURCE_DIR}/cpuid.S)
|
||||
endif ()
|
||||
|
||||
if (MSVC)
|
||||
#Use generic for MSVC now
|
||||
set(GETARCH_FLAGS ${GETARCH_FLAGS} -DFORCE_GENERIC)
|
||||
endif()
|
||||
|
||||
set(GETARCH_DIR "${PROJECT_BINARY_DIR}/getarch_build")
|
||||
set(GETARCH_BIN "getarch${CMAKE_EXECUTABLE_SUFFIX}")
|
||||
file(MAKE_DIRECTORY ${GETARCH_DIR})
|
||||
try_compile(GETARCH_RESULT ${GETARCH_DIR}
|
||||
SOURCES ${GETARCH_SRC}
|
||||
COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} -I${CMAKE_SOURCE_DIR}
|
||||
OUTPUT_VARIABLE GETARCH_LOG
|
||||
COPY_FILE ${PROJECT_BINARY_DIR}/${GETARCH_BIN}
|
||||
)
|
||||
|
||||
message(STATUS "Running getarch")
|
||||
|
||||
# use the cmake binary w/ the -E param to run a shell command in a cross-platform way
|
||||
execute_process(COMMAND ${PROJECT_BINARY_DIR}/${GETARCH_BIN} 0 OUTPUT_VARIABLE GETARCH_MAKE_OUT)
|
||||
execute_process(COMMAND ${PROJECT_BINARY_DIR}/${GETARCH_BIN} 1 OUTPUT_VARIABLE GETARCH_CONF_OUT)
|
||||
|
||||
message(STATUS "GETARCH results:\n${GETARCH_MAKE_OUT}")
|
||||
|
||||
# append config data from getarch to the TARGET file and read in CMake vars
|
||||
file(APPEND ${TARGET_CONF} ${GETARCH_CONF_OUT})
|
||||
ParseGetArchVars(${GETARCH_MAKE_OUT})
|
||||
|
||||
set(GETARCH2_DIR "${PROJECT_BINARY_DIR}/getarch2_build")
|
||||
set(GETARCH2_BIN "getarch_2nd${CMAKE_EXECUTABLE_SUFFIX}")
|
||||
file(MAKE_DIRECTORY ${GETARCH2_DIR})
|
||||
try_compile(GETARCH2_RESULT ${GETARCH2_DIR}
|
||||
SOURCES ${CMAKE_SOURCE_DIR}/getarch_2nd.c
|
||||
COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} ${GETARCH2_FLAGS} -I${CMAKE_SOURCE_DIR}
|
||||
OUTPUT_VARIABLE GETARCH2_LOG
|
||||
COPY_FILE ${PROJECT_BINARY_DIR}/${GETARCH2_BIN}
|
||||
)
|
||||
|
||||
# use the cmake binary w/ the -E param to run a shell command in a cross-platform way
|
||||
execute_process(COMMAND ${PROJECT_BINARY_DIR}/${GETARCH2_BIN} 0 OUTPUT_VARIABLE GETARCH2_MAKE_OUT)
|
||||
execute_process(COMMAND ${PROJECT_BINARY_DIR}/${GETARCH2_BIN} 1 OUTPUT_VARIABLE GETARCH2_CONF_OUT)
|
||||
|
||||
# append config data from getarch_2nd to the TARGET file and read in CMake vars
|
||||
file(APPEND ${TARGET_CONF} ${GETARCH2_CONF_OUT})
|
||||
ParseGetArchVars(${GETARCH2_MAKE_OUT})
|
||||
|
||||
552
cmake/system.cmake
Normal file
552
cmake/system.cmake
Normal file
@@ -0,0 +1,552 @@
|
||||
##
|
||||
## Author: Hank Anderson <hank@statease.com>
|
||||
## Description: Ported from OpenBLAS/Makefile.system
|
||||
##
|
||||
|
||||
set(NETLIB_LAPACK_DIR "${CMAKE_SOURCE_DIR}/lapack-netlib")
|
||||
|
||||
# TODO: Makefile.system detects Darwin (mac) and switches to clang here -hpa
|
||||
# http://stackoverflow.com/questions/714100/os-detecting-makefile
|
||||
|
||||
# TODO: Makefile.system sets HOSTCC = $(CC) here if not already set -hpa
|
||||
|
||||
# TARGET_CORE will override TARGET which is used in DYNAMIC_ARCH=1.
|
||||
if (DEFINED TARGET_CORE)
|
||||
set(TARGET ${TARGET_CORE})
|
||||
endif ()
|
||||
|
||||
# Force fallbacks for 32bit
|
||||
if (DEFINED BINARY AND DEFINED TARGET AND BINARY EQUAL 32)
|
||||
message(STATUS "Compiling a ${BINARY}-bit binary.")
|
||||
set(NO_AVX 1)
|
||||
if (${TARGET} STREQUAL "HASWELL" OR ${TARGET} STREQUAL "SANDYBRIDGE")
|
||||
set(TARGET "NEHALEM")
|
||||
endif ()
|
||||
if (${TARGET} STREQUAL "BULLDOZER" OR ${TARGET} STREQUAL "PILEDRIVER")
|
||||
set(TARGET "BARCELONA")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (DEFINED TARGET)
|
||||
message(STATUS "Targetting the ${TARGET} architecture.")
|
||||
set(GETARCH_FLAGS "-DFORCE_${TARGET}")
|
||||
endif ()
|
||||
|
||||
if (INTERFACE64)
|
||||
message(STATUS "Using 64-bit integers.")
|
||||
set(GETARCH_FLAGS "${GETARCH_FLAGS} -DUSE64BITINT")
|
||||
endif ()
|
||||
|
||||
if (NOT DEFINED GEMM_MULTITHREAD_THRESHOLD)
|
||||
set(GEMM_MULTITHREAD_THRESHOLD 4)
|
||||
endif ()
|
||||
message(STATUS "GEMM multithread threshold set to ${GEMM_MULTITHREAD_THRESHOLD}.")
|
||||
set(GETARCH_FLAGS "${GETARCH_FLAGS} -DGEMM_MULTITHREAD_THRESHOLD=${GEMM_MULTITHREAD_THRESHOLD}")
|
||||
|
||||
if (NO_AVX)
|
||||
message(STATUS "Disabling Advanced Vector Extensions (AVX).")
|
||||
set(GETARCH_FLAGS "${GETARCH_FLAGS} -DNO_AVX")
|
||||
endif ()
|
||||
|
||||
if (NO_AVX2)
|
||||
message(STATUS "Disabling Advanced Vector Extensions 2 (AVX2).")
|
||||
set(GETARCH_FLAGS "${GETARCH_FLAGS} -DNO_AVX2")
|
||||
endif ()
|
||||
|
||||
if (CMAKE_BUILD_TYPE STREQUAL Debug)
|
||||
set(GETARCH_FLAGS "${GETARCH_FLAGS} -g")
|
||||
endif ()
|
||||
|
||||
# TODO: let CMake handle this? -hpa
|
||||
#if (${QUIET_MAKE})
|
||||
# set(MAKE "${MAKE} -s")
|
||||
#endif()
|
||||
|
||||
if (NOT DEFINED NO_PARALLEL_MAKE)
|
||||
set(NO_PARALLEL_MAKE 0)
|
||||
endif ()
|
||||
set(GETARCH_FLAGS "${GETARCH_FLAGS} -DNO_PARALLEL_MAKE=${NO_PARALLEL_MAKE}")
|
||||
|
||||
if (CMAKE_CXX_COMPILER STREQUAL loongcc)
|
||||
set(GETARCH_FLAGS "${GETARCH_FLAGS} -static")
|
||||
endif ()
|
||||
|
||||
#if don't use Fortran, it will only compile CBLAS.
|
||||
if (ONLY_CBLAS)
|
||||
set(NO_LAPACK 1)
|
||||
else ()
|
||||
set(ONLY_CBLAS 0)
|
||||
endif ()
|
||||
|
||||
include("${CMAKE_SOURCE_DIR}/cmake/prebuild.cmake")
|
||||
|
||||
if (NOT DEFINED NUM_THREADS)
|
||||
set(NUM_THREADS ${NUM_CORES})
|
||||
endif ()
|
||||
|
||||
if (${NUM_THREADS} EQUAL 1)
|
||||
set(USE_THREAD 0)
|
||||
endif ()
|
||||
|
||||
if (DEFINED USE_THREAD)
|
||||
if (NOT ${USE_THREAD})
|
||||
unset(SMP)
|
||||
else ()
|
||||
set(SMP 1)
|
||||
endif ()
|
||||
else ()
|
||||
# N.B. this is NUM_THREAD in Makefile.system which is probably a bug -hpa
|
||||
if (${NUM_THREADS} EQUAL 1)
|
||||
unset(SMP)
|
||||
else ()
|
||||
set(SMP 1)
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (${SMP})
|
||||
message(STATUS "SMP enabled.")
|
||||
endif ()
|
||||
|
||||
if (NOT DEFINED NEED_PIC)
|
||||
set(NEED_PIC 1)
|
||||
endif ()
|
||||
|
||||
# TODO: I think CMake should be handling all this stuff -hpa
|
||||
unset(ARFLAGS)
|
||||
set(CPP "${COMPILER} -E")
|
||||
set(AR "${CROSS_SUFFIX}ar")
|
||||
set(AS "${CROSS_SUFFIX}as")
|
||||
set(LD "${CROSS_SUFFIX}ld")
|
||||
set(RANLIB "${CROSS_SUFFIX}ranlib")
|
||||
set(NM "${CROSS_SUFFIX}nm")
|
||||
set(DLLWRAP "${CROSS_SUFFIX}dllwrap")
|
||||
set(OBJCOPY "${CROSS_SUFFIX}objcopy")
|
||||
set(OBJCONV "${CROSS_SUFFIX}objconv")
|
||||
|
||||
# OS dependent settings
|
||||
include("${CMAKE_SOURCE_DIR}/cmake/os.cmake")
|
||||
|
||||
# Architecture dependent settings
|
||||
include("${CMAKE_SOURCE_DIR}/cmake/arch.cmake")
|
||||
|
||||
# C Compiler dependent settings
|
||||
include("${CMAKE_SOURCE_DIR}/cmake/cc.cmake")
|
||||
|
||||
if (NOT NOFORTRAN)
|
||||
# Fortran Compiler dependent settings
|
||||
include("${CMAKE_SOURCE_DIR}/cmake/fc.cmake")
|
||||
endif ()
|
||||
|
||||
if (BINARY64)
|
||||
if (INTERFACE64)
|
||||
# CCOMMON_OPT += -DUSE64BITINT
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (NEED_PIC)
|
||||
if (${CMAKE_C_COMPILER} STREQUAL "IBM")
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -qpic=large")
|
||||
else ()
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -fPIC")
|
||||
endif ()
|
||||
|
||||
if (${F_COMPILER} STREQUAL "SUN")
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -pic")
|
||||
else ()
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -fPIC")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (DYNAMIC_ARCH)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DDYNAMIC_ARCH")
|
||||
endif ()
|
||||
|
||||
if (NO_LAPACK)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_LAPACK")
|
||||
#Disable LAPACK C interface
|
||||
set(NO_LAPACKE 1)
|
||||
endif ()
|
||||
|
||||
if (NO_LAPACKE)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_LAPACKE")
|
||||
endif ()
|
||||
|
||||
if (NO_AVX)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_AVX")
|
||||
endif ()
|
||||
|
||||
if (${ARCH} STREQUAL "x86")
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_AVX")
|
||||
endif ()
|
||||
|
||||
if (NO_AVX2)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_AVX2")
|
||||
endif ()
|
||||
|
||||
if (SMP)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DSMP_SERVER")
|
||||
|
||||
if (${ARCH} STREQUAL "mips64")
|
||||
if (NOT ${CORE} STREQUAL "LOONGSON3B")
|
||||
set(USE_SIMPLE_THREADED_LEVEL3 1)
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (USE_OPENMP)
|
||||
# USE_SIMPLE_THREADED_LEVEL3 = 1
|
||||
# NO_AFFINITY = 1
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DUSE_OPENMP")
|
||||
endif ()
|
||||
|
||||
if (BIGNUMA)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DBIGNUMA")
|
||||
endif ()
|
||||
|
||||
endif ()
|
||||
|
||||
if (NO_WARMUP)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_WARMUP")
|
||||
endif ()
|
||||
|
||||
if (CONSISTENT_FPCSR)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DCONSISTENT_FPCSR")
|
||||
endif ()
|
||||
|
||||
# Only for development
|
||||
# set(CCOMMON_OPT "${CCOMMON_OPT} -DPARAMTEST")
|
||||
# set(CCOMMON_OPT "${CCOMMON_OPT} -DPREFETCHTEST")
|
||||
# set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_SWITCHING")
|
||||
# set(USE_PAPI 1)
|
||||
|
||||
if (USE_PAPI)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DUSE_PAPI")
|
||||
set(EXTRALIB "${EXTRALIB} -lpapi -lperfctr")
|
||||
endif ()
|
||||
|
||||
if (DYNAMIC_THREADS)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DDYNAMIC_THREADS")
|
||||
endif ()
|
||||
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DMAX_CPU_NUMBER=${NUM_THREADS}")
|
||||
|
||||
if (USE_SIMPLE_THREADED_LEVEL3)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DUSE_SIMPLE_THREADED_LEVEL3")
|
||||
endif ()
|
||||
|
||||
if (DEFINED LIBNAMESUFFIX)
|
||||
set(LIBPREFIX "libopenblas_${LIBNAMESUFFIX}")
|
||||
else ()
|
||||
set(LIBPREFIX "libopenblas")
|
||||
endif ()
|
||||
|
||||
if (NOT DEFINED SYMBOLPREFIX)
|
||||
set(SYMBOLPREFIX "")
|
||||
endif ()
|
||||
|
||||
if (NOT DEFINED SYMBOLSUFFIX)
|
||||
set(SYMBOLSUFFIX "")
|
||||
endif ()
|
||||
|
||||
set(KERNELDIR "${CMAKE_SOURCE_DIR}/kernel/${ARCH}")
|
||||
|
||||
# TODO: nead to convert these Makefiles
|
||||
# include ${CMAKE_SOURCE_DIR}/cmake/${ARCH}.cmake
|
||||
|
||||
if (${CORE} STREQUAL "PPC440")
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DALLOC_QALLOC")
|
||||
endif ()
|
||||
|
||||
if (${CORE} STREQUAL "PPC440FP2")
|
||||
set(STATIC_ALLOCATION 1)
|
||||
endif ()
|
||||
|
||||
if (NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
|
||||
set(NO_AFFINITY 1)
|
||||
endif ()
|
||||
|
||||
if (NOT ${ARCH} STREQUAL "x86_64" AND NOT ${ARCH} STREQUAL "x86" AND NOT ${CORE} STREQUAL "LOONGSON3B")
|
||||
set(NO_AFFINITY 1)
|
||||
endif ()
|
||||
|
||||
if (NO_AFFINITY)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_AFFINITY")
|
||||
endif ()
|
||||
|
||||
if (FUNCTION_PROFILE)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DFUNCTION_PROFILE")
|
||||
endif ()
|
||||
|
||||
if (HUGETLB_ALLOCATION)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DALLOC_HUGETLB")
|
||||
endif ()
|
||||
|
||||
if (DEFINED HUGETLBFILE_ALLOCATION)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DALLOC_HUGETLBFILE -DHUGETLB_FILE_NAME=${HUGETLBFILE_ALLOCATION})")
|
||||
endif ()
|
||||
|
||||
if (STATIC_ALLOCATION)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DALLOC_STATIC")
|
||||
endif ()
|
||||
|
||||
if (DEVICEDRIVER_ALLOCATION)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DALLOC_DEVICEDRIVER -DDEVICEDRIVER_NAME=\"/dev/mapper\"")
|
||||
endif ()
|
||||
|
||||
if (MIXED_MEMORY_ALLOCATION)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DMIXED_MEMORY_ALLOCATION")
|
||||
endif ()
|
||||
|
||||
if (${CMAKE_SYSTEM_NAME} STREQUAL "SunOS")
|
||||
set(TAR gtar)
|
||||
set(PATCH gpatch)
|
||||
set(GREP ggrep)
|
||||
else ()
|
||||
set(TAR tar)
|
||||
set(PATCH patch)
|
||||
set(GREP grep)
|
||||
endif ()
|
||||
|
||||
if (NOT DEFINED MD5SUM)
|
||||
set(MD5SUM md5sum)
|
||||
endif ()
|
||||
|
||||
set(AWK awk)
|
||||
|
||||
set(REVISION "-r${OpenBLAS_VERSION}")
|
||||
set(MAJOR_VERSION ${OpenBLAS_MAJOR_VERSION})
|
||||
|
||||
if (DEBUG)
|
||||
set(COMMON_OPT "${COMMON_OPT} -g")
|
||||
endif ()
|
||||
|
||||
if (NOT DEFINED COMMON_OPT)
|
||||
set(COMMON_OPT "-O2")
|
||||
endif ()
|
||||
|
||||
#For x86 32-bit
|
||||
if (DEFINED BINARY AND BINARY EQUAL 32)
|
||||
if (NOT MSVC)
|
||||
set(COMMON_OPT "${COMMON_OPT} -m32")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${COMMON_OPT} ${CCOMMON_OPT}")
|
||||
if(NOT MSVC)
|
||||
set(CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} ${COMMON_OPT} ${CCOMMON_OPT}")
|
||||
endif()
|
||||
# TODO: not sure what PFLAGS is -hpa
|
||||
set(PFLAGS "${PFLAGS} ${COMMON_OPT} ${CCOMMON_OPT} -I${TOPDIR} -DPROFILE ${COMMON_PROF}")
|
||||
|
||||
set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} ${COMMON_OPT} ${FCOMMON_OPT}")
|
||||
# TODO: not sure what FPFLAGS is -hpa
|
||||
set(FPFLAGS "${FPFLAGS} ${COMMON_OPT} ${FCOMMON_OPT} ${COMMON_PROF}")
|
||||
|
||||
#For LAPACK Fortran codes.
|
||||
set(LAPACK_FFLAGS "${LAPACK_FFLAGS} ${CMAKE_Fortran_FLAGS}")
|
||||
set(LAPACK_FPFLAGS "${LAPACK_FPFLAGS} ${FPFLAGS}")
|
||||
|
||||
#Disable -fopenmp for LAPACK Fortran codes on Windows.
|
||||
if (${CMAKE_SYSTEM_NAME} STREQUAL "Windows")
|
||||
set(FILTER_FLAGS "-fopenmp;-mp;-openmp;-xopenmp=parralel")
|
||||
foreach (FILTER_FLAG ${FILTER_FLAGS})
|
||||
string(REPLACE ${FILTER_FLAG} "" LAPACK_FFLAGS ${LAPACK_FFLAGS})
|
||||
string(REPLACE ${FILTER_FLAG} "" LAPACK_FPFLAGS ${LAPACK_FPFLAGS})
|
||||
endforeach ()
|
||||
endif ()
|
||||
|
||||
if ("${F_COMPILER}" STREQUAL "GFORTRAN")
|
||||
# lapack-netlib is rife with uninitialized warnings -hpa
|
||||
set(LAPACK_FFLAGS "${LAPACK_FFLAGS} -Wno-maybe-uninitialized")
|
||||
endif ()
|
||||
|
||||
set(LAPACK_CFLAGS "${CMAKE_C_CFLAGS} -DHAVE_LAPACK_CONFIG_H")
|
||||
if (INTERFACE64)
|
||||
set(LAPACK_CFLAGS "${LAPACK_CFLAGS} -DLAPACK_ILP64")
|
||||
endif ()
|
||||
|
||||
if (${CMAKE_SYSTEM_NAME} STREQUAL "Windows")
|
||||
set(LAPACK_CFLAGS "${LAPACK_CFLAGS} -DOPENBLAS_OS_WINDOWS")
|
||||
endif ()
|
||||
|
||||
if (${CMAKE_C_COMPILER} STREQUAL "LSB" OR ${CMAKE_SYSTEM_NAME} STREQUAL "Windows")
|
||||
set(LAPACK_CFLAGS "${LAPACK_CFLAGS} -DLAPACK_COMPLEX_STRUCTURE")
|
||||
endif ()
|
||||
|
||||
if (NOT DEFINED SUFFIX)
|
||||
set(SUFFIX o)
|
||||
endif ()
|
||||
|
||||
if (NOT DEFINED PSUFFIX)
|
||||
set(PSUFFIX po)
|
||||
endif ()
|
||||
|
||||
if (NOT DEFINED LIBSUFFIX)
|
||||
set(LIBSUFFIX a)
|
||||
endif ()
|
||||
|
||||
if (DYNAMIC_ARCH)
|
||||
if (DEFINED SMP)
|
||||
set(LIBNAME "${LIBPREFIX}p${REVISION}.${LIBSUFFIX}")
|
||||
set(LIBNAME_P "${LIBPREFIX}p${REVISION}_p.${LIBSUFFIX}")
|
||||
else ()
|
||||
set(LIBNAME "${LIBPREFIX}${REVISION}.${LIBSUFFIX}")
|
||||
set(LIBNAME_P "${LIBPREFIX}${REVISION}_p.${LIBSUFFIX}")
|
||||
endif ()
|
||||
else ()
|
||||
if (DEFINED SMP)
|
||||
set(LIBNAME "${LIBPREFIX}_${LIBCORE}p${REVISION}.${LIBSUFFIX}")
|
||||
set(LIBNAME_P "${LIBPREFIX}_${LIBCORE}p${REVISION}_p.${LIBSUFFIX}")
|
||||
else ()
|
||||
set(LIBNAME "${LIBPREFIX}_${LIBCORE}${REVISION}.${LIBSUFFIX}")
|
||||
set(LIBNAME_P "${LIBPREFIX}_${LIBCORE}${REVISION}_p.${LIBSUFFIX}")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
|
||||
set(LIBDLLNAME "${LIBPREFIX}.dll")
|
||||
set(LIBSONAME "${LIBNAME}.${LIBSUFFIX}.so")
|
||||
set(LIBDYNNAME "${LIBNAME}.${LIBSUFFIX}.dylib")
|
||||
set(LIBDEFNAME "${LIBNAME}.${LIBSUFFIX}.def")
|
||||
set(LIBEXPNAME "${LIBNAME}.${LIBSUFFIX}.exp")
|
||||
set(LIBZIPNAME "${LIBNAME}.${LIBSUFFIX}.zip")
|
||||
|
||||
set(LIBS "${CMAKE_SOURCE_DIR}/${LIBNAME}")
|
||||
set(LIBS_P "${CMAKE_SOURCE_DIR}/${LIBNAME_P}")
|
||||
|
||||
|
||||
set(LIB_COMPONENTS BLAS)
|
||||
if (NOT NO_CBLAS)
|
||||
set(LIB_COMPONENTS "${LIB_COMPONENTS} CBLAS")
|
||||
endif ()
|
||||
|
||||
if (NOT NO_LAPACK)
|
||||
set(LIB_COMPONENTS "${LIB_COMPONENTS} LAPACK")
|
||||
if (NOT NO_LAPACKE)
|
||||
set(LIB_COMPONENTS "${LIB_COMPONENTS} LAPACKE")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (ONLY_CBLAS)
|
||||
set(LIB_COMPONENTS CBLAS)
|
||||
endif ()
|
||||
|
||||
|
||||
# For GEMM3M
|
||||
set(USE_GEMM3M 0)
|
||||
|
||||
if (DEFINED ARCH)
|
||||
if (${ARCH} STREQUAL "x86" OR ${ARCH} STREQUAL "x86_64" OR ${ARCH} STREQUAL "ia64" OR ${ARCH} STREQUAL "MIPS")
|
||||
set(USE_GEMM3M 1)
|
||||
endif ()
|
||||
|
||||
if (${CORE} STREQUAL "generic")
|
||||
set(USE_GEMM3M 0)
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
|
||||
#export OSNAME
|
||||
#export ARCH
|
||||
#export CORE
|
||||
#export LIBCORE
|
||||
#export PGCPATH
|
||||
#export CONFIG
|
||||
#export CC
|
||||
#export FC
|
||||
#export BU
|
||||
#export FU
|
||||
#export NEED2UNDERSCORES
|
||||
#export USE_THREAD
|
||||
#export NUM_THREADS
|
||||
#export NUM_CORES
|
||||
#export SMP
|
||||
#export MAKEFILE_RULE
|
||||
#export NEED_PIC
|
||||
#export BINARY
|
||||
#export BINARY32
|
||||
#export BINARY64
|
||||
#export F_COMPILER
|
||||
#export C_COMPILER
|
||||
#export USE_OPENMP
|
||||
#export CROSS
|
||||
#export CROSS_SUFFIX
|
||||
#export NOFORTRAN
|
||||
#export NO_FBLAS
|
||||
#export EXTRALIB
|
||||
#export CEXTRALIB
|
||||
#export FEXTRALIB
|
||||
#export HAVE_SSE
|
||||
#export HAVE_SSE2
|
||||
#export HAVE_SSE3
|
||||
#export HAVE_SSSE3
|
||||
#export HAVE_SSE4_1
|
||||
#export HAVE_SSE4_2
|
||||
#export HAVE_SSE4A
|
||||
#export HAVE_SSE5
|
||||
#export HAVE_AVX
|
||||
#export HAVE_VFP
|
||||
#export HAVE_VFPV3
|
||||
#export HAVE_VFPV4
|
||||
#export HAVE_NEON
|
||||
#export KERNELDIR
|
||||
#export FUNCTION_PROFILE
|
||||
#export TARGET_CORE
|
||||
#
|
||||
#export SGEMM_UNROLL_M
|
||||
#export SGEMM_UNROLL_N
|
||||
#export DGEMM_UNROLL_M
|
||||
#export DGEMM_UNROLL_N
|
||||
#export QGEMM_UNROLL_M
|
||||
#export QGEMM_UNROLL_N
|
||||
#export CGEMM_UNROLL_M
|
||||
#export CGEMM_UNROLL_N
|
||||
#export ZGEMM_UNROLL_M
|
||||
#export ZGEMM_UNROLL_N
|
||||
#export XGEMM_UNROLL_M
|
||||
#export XGEMM_UNROLL_N
|
||||
#export CGEMM3M_UNROLL_M
|
||||
#export CGEMM3M_UNROLL_N
|
||||
#export ZGEMM3M_UNROLL_M
|
||||
#export ZGEMM3M_UNROLL_N
|
||||
#export XGEMM3M_UNROLL_M
|
||||
#export XGEMM3M_UNROLL_N
|
||||
|
||||
|
||||
#if (USE_CUDA)
|
||||
# export CUDADIR
|
||||
# export CUCC
|
||||
# export CUFLAGS
|
||||
# export CULIB
|
||||
#endif
|
||||
|
||||
#.SUFFIXES: .$(PSUFFIX) .$(SUFFIX) .f
|
||||
#
|
||||
#.f.$(SUFFIX):
|
||||
# $(FC) $(FFLAGS) -c $< -o $(@F)
|
||||
#
|
||||
#.f.$(PSUFFIX):
|
||||
# $(FC) $(FPFLAGS) -pg -c $< -o $(@F)
|
||||
|
||||
# these are not cross-platform
|
||||
#ifdef BINARY64
|
||||
#PATHSCALEPATH = /opt/pathscale/lib/3.1
|
||||
#PGIPATH = /opt/pgi/linux86-64/7.1-5/lib
|
||||
#else
|
||||
#PATHSCALEPATH = /opt/pathscale/lib/3.1/32
|
||||
#PGIPATH = /opt/pgi/linux86/7.1-5/lib
|
||||
#endif
|
||||
|
||||
#ACMLPATH = /opt/acml/4.3.0
|
||||
#ifneq ($(OSNAME), Darwin)
|
||||
#MKLPATH = /opt/intel/mkl/10.2.2.025/lib
|
||||
#else
|
||||
#MKLPATH = /Library/Frameworks/Intel_MKL.framework/Versions/10.0.1.014/lib
|
||||
#endif
|
||||
#ATLASPATH = /opt/atlas/3.9.17/opteron
|
||||
#FLAMEPATH = $(HOME)/flame/lib
|
||||
#ifneq ($(OSNAME), SunOS)
|
||||
#SUNPATH = /opt/sunstudio12.1
|
||||
#else
|
||||
#SUNPATH = /opt/SUNWspro
|
||||
#endif
|
||||
|
||||
346
cmake/utils.cmake
Normal file
346
cmake/utils.cmake
Normal file
@@ -0,0 +1,346 @@
|
||||
# Functions to help with the OpenBLAS build
|
||||
|
||||
# Reads string from getarch into CMake vars. Format of getarch vars is VARNAME=VALUE
|
||||
function(ParseGetArchVars GETARCH_IN)
|
||||
string(REGEX MATCHALL "[0-9_a-zA-Z]+=[0-9_a-zA-Z]+" GETARCH_RESULT_LIST "${GETARCH_IN}")
|
||||
foreach (GETARCH_LINE ${GETARCH_RESULT_LIST})
|
||||
# split the line into var and value, then assign the value to a CMake var
|
||||
string(REGEX MATCHALL "[0-9_a-zA-Z]+" SPLIT_VAR "${GETARCH_LINE}")
|
||||
list(GET SPLIT_VAR 0 VAR_NAME)
|
||||
list(GET SPLIT_VAR 1 VAR_VALUE)
|
||||
set(${VAR_NAME} ${VAR_VALUE} PARENT_SCOPE)
|
||||
endforeach ()
|
||||
endfunction ()
|
||||
|
||||
# Reads a Makefile into CMake vars.
|
||||
macro(ParseMakefileVars MAKEFILE_IN)
|
||||
message(STATUS "Reading vars from ${MAKEFILE_IN}...")
|
||||
file(STRINGS ${MAKEFILE_IN} makefile_contents)
|
||||
foreach (makefile_line ${makefile_contents})
|
||||
string(REGEX MATCH "([0-9_a-zA-Z]+)[ \t]*=[ \t]*(.+)$" line_match "${makefile_line}")
|
||||
if (NOT "${line_match}" STREQUAL "")
|
||||
set(var_name ${CMAKE_MATCH_1})
|
||||
set(var_value ${CMAKE_MATCH_2})
|
||||
# check for Makefile variables in the string, e.g. $(TSUFFIX)
|
||||
string(REGEX MATCHALL "\\$\\(([0-9_a-zA-Z]+)\\)" make_var_matches ${var_value})
|
||||
foreach (make_var ${make_var_matches})
|
||||
# strip out Makefile $() markup
|
||||
string(REGEX REPLACE "\\$\\(([0-9_a-zA-Z]+)\\)" "\\1" make_var ${make_var})
|
||||
# now replace the instance of the Makefile variable with the value of the CMake variable (note the double quote)
|
||||
string(REPLACE "$(${make_var})" "${${make_var}}" var_value ${var_value})
|
||||
endforeach ()
|
||||
set(${var_name} ${var_value})
|
||||
else ()
|
||||
string(REGEX MATCH "include \\$\\(KERNELDIR\\)/(.+)$" line_match "${makefile_line}")
|
||||
if (NOT "${line_match}" STREQUAL "")
|
||||
ParseMakefileVars(${KERNELDIR}/${CMAKE_MATCH_1})
|
||||
endif ()
|
||||
endif ()
|
||||
endforeach ()
|
||||
endmacro ()
|
||||
|
||||
# Returns all combinations of the input list, as a list with colon-separated combinations
|
||||
# E.g. input of A B C returns A B C A:B A:C B:C
|
||||
# N.B. The input is meant to be a list, and to past a list to a function in CMake you must quote it (e.g. AllCombinations("${LIST_VAR}")).
|
||||
# #param absent_codes codes to use when an element is absent from a combination. For example, if you have TRANS;UNIT;UPPER you may want the code to be NNL when nothing is present.
|
||||
# @returns LIST_OUT a list of combinations
|
||||
# CODES_OUT a list of codes corresponding to each combination, with N meaning the item is not present, and the first letter of the list item meaning it is presen
|
||||
function(AllCombinations list_in absent_codes_in)
|
||||
list(LENGTH list_in list_count)
|
||||
set(num_combos 1)
|
||||
# subtract 1 since we will iterate from 0 to num_combos
|
||||
math(EXPR num_combos "(${num_combos} << ${list_count}) - 1")
|
||||
set(LIST_OUT "")
|
||||
set(CODES_OUT "")
|
||||
foreach (c RANGE 0 ${num_combos})
|
||||
|
||||
set(current_combo "")
|
||||
set(current_code "")
|
||||
|
||||
# this is a little ridiculous just to iterate through a list w/ indices
|
||||
math(EXPR last_list_index "${list_count} - 1")
|
||||
foreach (list_index RANGE 0 ${last_list_index})
|
||||
math(EXPR bit "1 << ${list_index}")
|
||||
math(EXPR combo_has_bit "${c} & ${bit}")
|
||||
list(GET list_in ${list_index} list_elem)
|
||||
if (combo_has_bit)
|
||||
if (current_combo)
|
||||
set(current_combo "${current_combo}:${list_elem}")
|
||||
else ()
|
||||
set(current_combo ${list_elem})
|
||||
endif ()
|
||||
string(SUBSTRING ${list_elem} 0 1 code_char)
|
||||
else ()
|
||||
list(GET absent_codes_in ${list_index} code_char)
|
||||
endif ()
|
||||
set(current_code "${current_code}${code_char}")
|
||||
endforeach ()
|
||||
|
||||
if (current_combo STREQUAL "")
|
||||
list(APPEND LIST_OUT " ") # Empty set is a valid combination, but CMake isn't appending the empty string for some reason, use a space
|
||||
else ()
|
||||
list(APPEND LIST_OUT ${current_combo})
|
||||
endif ()
|
||||
list(APPEND CODES_OUT ${current_code})
|
||||
|
||||
endforeach ()
|
||||
|
||||
set(LIST_OUT ${LIST_OUT} PARENT_SCOPE)
|
||||
set(CODES_OUT ${CODES_OUT} PARENT_SCOPE)
|
||||
endfunction ()
|
||||
|
||||
# generates object files for each of the sources, using the BLAS naming scheme to pass the funciton name as a preprocessor definition
|
||||
# @param sources_in the source files to build from
|
||||
# @param defines_in (optional) preprocessor definitions that will be applied to all objects
|
||||
# @param name_in (optional) if this is set this name will be used instead of the filename. Use a * to indicate where the float character should go, if no star the character will be prepended.
|
||||
# e.g. with DOUBLE set, "i*max" will generate the name "idmax", and "max" will be "dmax"
|
||||
# @param replace_last_with replaces the last character in the filename with this string (e.g. symm_k should be symm_TU)
|
||||
# @param append_with appends the filename with this string (e.g. trmm_R should be trmm_RTUU or some other combination of characters)
|
||||
# @param no_float_type turns off the float type define for this build (e.g. SINGLE/DOUBLE/etc)
|
||||
# @param complex_filename_scheme some routines have separate source files for complex and non-complex float types.
|
||||
# 0 - compiles for all types
|
||||
# 1 - compiles the sources for non-complex types only (SINGLE/DOUBLE)
|
||||
# 2 - compiles for complex types only (COMPLEX/DOUBLE COMPLEX)
|
||||
# 3 - compiles for all types, but changes source names for complex by prepending z (e.g. axpy.c becomes zaxpy.c)
|
||||
# 4 - compiles for complex types only, but changes source names for complex by prepending z (e.g. hemv.c becomes zhemv.c)
|
||||
# STRING - compiles only the given type (e.g. DOUBLE)
|
||||
function(GenerateNamedObjects sources_in)
|
||||
|
||||
if (DEFINED ARGV1)
|
||||
set(defines_in ${ARGV1})
|
||||
endif ()
|
||||
|
||||
if (DEFINED ARGV2 AND NOT "${ARGV2}" STREQUAL "")
|
||||
set(name_in ${ARGV2})
|
||||
# strip off extension for kernel files that pass in the object name.
|
||||
get_filename_component(name_in ${name_in} NAME_WE)
|
||||
endif ()
|
||||
|
||||
if (DEFINED ARGV3)
|
||||
set(use_cblas ${ARGV3})
|
||||
else ()
|
||||
set(use_cblas false)
|
||||
endif ()
|
||||
|
||||
if (DEFINED ARGV4)
|
||||
set(replace_last_with ${ARGV4})
|
||||
endif ()
|
||||
|
||||
if (DEFINED ARGV5)
|
||||
set(append_with ${ARGV5})
|
||||
endif ()
|
||||
|
||||
if (DEFINED ARGV6)
|
||||
set(no_float_type ${ARGV6})
|
||||
else ()
|
||||
set(no_float_type false)
|
||||
endif ()
|
||||
|
||||
if (no_float_type)
|
||||
set(float_list "DUMMY") # still need to loop once
|
||||
else ()
|
||||
set(float_list "${FLOAT_TYPES}")
|
||||
endif ()
|
||||
|
||||
set(real_only false)
|
||||
set(complex_only false)
|
||||
set(mangle_complex_sources false)
|
||||
if (DEFINED ARGV7 AND NOT "${ARGV7}" STREQUAL "")
|
||||
if (${ARGV7} EQUAL 1)
|
||||
set(real_only true)
|
||||
elseif (${ARGV7} EQUAL 2)
|
||||
set(complex_only true)
|
||||
elseif (${ARGV7} EQUAL 3)
|
||||
set(mangle_complex_sources true)
|
||||
elseif (${ARGV7} EQUAL 4)
|
||||
set(mangle_complex_sources true)
|
||||
set(complex_only true)
|
||||
elseif (NOT ${ARGV7} EQUAL 0)
|
||||
set(float_list ${ARGV7})
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (complex_only)
|
||||
list(REMOVE_ITEM float_list "SINGLE")
|
||||
list(REMOVE_ITEM float_list "DOUBLE")
|
||||
elseif (real_only)
|
||||
list(REMOVE_ITEM float_list "COMPLEX")
|
||||
list(REMOVE_ITEM float_list "ZCOMPLEX")
|
||||
endif ()
|
||||
|
||||
set(float_char "")
|
||||
set(OBJ_LIST_OUT "")
|
||||
foreach (float_type ${float_list})
|
||||
foreach (source_file ${sources_in})
|
||||
|
||||
if (NOT no_float_type)
|
||||
string(SUBSTRING ${float_type} 0 1 float_char)
|
||||
string(TOLOWER ${float_char} float_char)
|
||||
endif ()
|
||||
|
||||
if (NOT name_in)
|
||||
get_filename_component(source_name ${source_file} NAME_WE)
|
||||
set(obj_name "${float_char}${source_name}")
|
||||
else ()
|
||||
# replace * with float_char
|
||||
if (${name_in} MATCHES "\\*")
|
||||
string(REPLACE "*" ${float_char} obj_name ${name_in})
|
||||
else ()
|
||||
set(obj_name "${float_char}${name_in}")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (replace_last_with)
|
||||
string(REGEX REPLACE ".$" ${replace_last_with} obj_name ${obj_name})
|
||||
else ()
|
||||
set(obj_name "${obj_name}${append_with}")
|
||||
endif ()
|
||||
|
||||
# now add the object and set the defines
|
||||
set(obj_defines ${defines_in})
|
||||
|
||||
if (use_cblas)
|
||||
set(obj_name "cblas_${obj_name}")
|
||||
list(APPEND obj_defines "CBLAS")
|
||||
endif ()
|
||||
|
||||
list(APPEND obj_defines "ASMNAME=${FU}${obj_name};ASMFNAME=${FU}${obj_name}${BU};NAME=${obj_name}${BU};CNAME=${obj_name};CHAR_NAME=\"${obj_name}${BU}\";CHAR_CNAME=\"${obj_name}\"")
|
||||
if (${float_type} STREQUAL "DOUBLE" OR ${float_type} STREQUAL "ZCOMPLEX")
|
||||
list(APPEND obj_defines "DOUBLE")
|
||||
endif ()
|
||||
if (${float_type} STREQUAL "COMPLEX" OR ${float_type} STREQUAL "ZCOMPLEX")
|
||||
list(APPEND obj_defines "COMPLEX")
|
||||
if (mangle_complex_sources)
|
||||
# add a z to the filename
|
||||
get_filename_component(source_name ${source_file} NAME)
|
||||
get_filename_component(source_dir ${source_file} DIRECTORY)
|
||||
string(REPLACE ${source_name} "z${source_name}" source_file ${source_file})
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (VERBOSE_GEN)
|
||||
message(STATUS "${obj_name}:${source_file}")
|
||||
message(STATUS "${obj_defines}")
|
||||
endif ()
|
||||
|
||||
# create a copy of the source to avoid duplicate obj filename problem with ar.exe
|
||||
get_filename_component(source_extension ${source_file} EXT)
|
||||
set(new_source_file "${CMAKE_CURRENT_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/${obj_name}${source_extension}")
|
||||
if (IS_ABSOLUTE ${source_file})
|
||||
set(old_source_file ${source_file})
|
||||
else ()
|
||||
set(old_source_file "${CMAKE_CURRENT_LIST_DIR}/${source_file}")
|
||||
endif ()
|
||||
|
||||
string(REPLACE ";" "\n#define " define_source "${obj_defines}")
|
||||
string(REPLACE "=" " " define_source "${define_source}")
|
||||
file(WRITE ${new_source_file} "#define ${define_source}\n#include \"${old_source_file}\"")
|
||||
list(APPEND SRC_LIST_OUT ${new_source_file})
|
||||
|
||||
endforeach ()
|
||||
endforeach ()
|
||||
|
||||
list(APPEND OPENBLAS_SRC ${SRC_LIST_OUT})
|
||||
set(OPENBLAS_SRC ${OPENBLAS_SRC} PARENT_SCOPE)
|
||||
endfunction ()
|
||||
|
||||
# generates object files for each of the sources for each of the combinations of the preprocessor definitions passed in
|
||||
# @param sources_in the source files to build from
|
||||
# @param defines_in the preprocessor definitions that will be combined to create the object files
|
||||
# @param all_defines_in (optional) preprocessor definitions that will be applied to all objects
|
||||
# @param replace_scheme If 1, replace the "k" in the filename with the define combo letters. E.g. symm_k.c with TRANS and UNIT defined will be symm_TU.
|
||||
# If 0, it will simply append the code, e.g. symm_L.c with TRANS and UNIT will be symm_LTU.
|
||||
# If 2, it will append the code with an underscore, e.g. symm.c with TRANS and UNIT will be symm_TU.
|
||||
# If 3, it will insert the code *around* the last character with an underscore, e.g. symm_L.c with TRANS and UNIT will be symm_TLU (required by BLAS level2 objects).
|
||||
# If 4, it will insert the code before the last underscore. E.g. trtri_U_parallel with TRANS will be trtri_UT_parallel
|
||||
# @param alternate_name replaces the source name as the object name (define codes are still appended)
|
||||
# @param no_float_type turns off the float type define for this build (e.g. SINGLE/DOUBLE/etc)
|
||||
# @param complex_filename_scheme see GenerateNamedObjects
|
||||
function(GenerateCombinationObjects sources_in defines_in absent_codes_in all_defines_in replace_scheme)
|
||||
|
||||
set(alternate_name_in "")
|
||||
if (DEFINED ARGV5)
|
||||
set(alternate_name_in ${ARGV5})
|
||||
endif ()
|
||||
|
||||
set(no_float_type false)
|
||||
if (DEFINED ARGV6)
|
||||
set(no_float_type ${ARGV6})
|
||||
endif ()
|
||||
|
||||
set(complex_filename_scheme "")
|
||||
if (DEFINED ARGV7)
|
||||
set(complex_filename_scheme ${ARGV7})
|
||||
endif ()
|
||||
|
||||
AllCombinations("${defines_in}" "${absent_codes_in}")
|
||||
set(define_combos ${LIST_OUT})
|
||||
set(define_codes ${CODES_OUT})
|
||||
|
||||
list(LENGTH define_combos num_combos)
|
||||
math(EXPR num_combos "${num_combos} - 1")
|
||||
|
||||
foreach (c RANGE 0 ${num_combos})
|
||||
|
||||
list(GET define_combos ${c} define_combo)
|
||||
list(GET define_codes ${c} define_code)
|
||||
|
||||
foreach (source_file ${sources_in})
|
||||
|
||||
set(alternate_name ${alternate_name_in})
|
||||
|
||||
# replace colon separated list with semicolons, this turns it into a CMake list that we can use foreach with
|
||||
string(REPLACE ":" ";" define_combo ${define_combo})
|
||||
|
||||
# now add the object and set the defines
|
||||
set(cur_defines ${define_combo})
|
||||
if ("${cur_defines}" STREQUAL " ")
|
||||
set(cur_defines ${all_defines_in})
|
||||
else ()
|
||||
list(APPEND cur_defines ${all_defines_in})
|
||||
endif ()
|
||||
|
||||
set(replace_code "")
|
||||
set(append_code "")
|
||||
if (replace_scheme EQUAL 1)
|
||||
set(replace_code ${define_code})
|
||||
else ()
|
||||
if (replace_scheme EQUAL 2)
|
||||
set(append_code "_${define_code}")
|
||||
elseif (replace_scheme EQUAL 3)
|
||||
if ("${alternate_name}" STREQUAL "")
|
||||
string(REGEX MATCH "[a-zA-Z]\\." last_letter ${source_file})
|
||||
else ()
|
||||
string(REGEX MATCH "[a-zA-Z]$" last_letter ${alternate_name})
|
||||
endif ()
|
||||
# first extract the last letter
|
||||
string(SUBSTRING ${last_letter} 0 1 last_letter) # remove period from match
|
||||
# break the code up into the first letter and the remaining (should only be 2 anyway)
|
||||
string(SUBSTRING ${define_code} 0 1 define_code_first)
|
||||
string(SUBSTRING ${define_code} 1 -1 define_code_second)
|
||||
set(replace_code "${define_code_first}${last_letter}${define_code_second}")
|
||||
elseif (replace_scheme EQUAL 4)
|
||||
# insert code before the last underscore and pass that in as the alternate_name
|
||||
if ("${alternate_name}" STREQUAL "")
|
||||
get_filename_component(alternate_name ${source_file} NAME_WE)
|
||||
endif ()
|
||||
set(extra_underscore "")
|
||||
# check if filename has two underscores, insert another if not (e.g. getrs_parallel needs to become getrs_U_parallel not getrsU_parallel)
|
||||
string(REGEX MATCH "_[a-zA-Z]+_" underscores ${alternate_name})
|
||||
string(LENGTH "${underscores}" underscores)
|
||||
if (underscores EQUAL 0)
|
||||
set(extra_underscore "_")
|
||||
endif ()
|
||||
string(REGEX REPLACE "(.+)(_[^_]+)$" "\\1${extra_underscore}${define_code}\\2" alternate_name ${alternate_name})
|
||||
else()
|
||||
set(append_code ${define_code}) # replace_scheme should be 0
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
GenerateNamedObjects("${source_file}" "${cur_defines}" "${alternate_name}" false "${replace_code}" "${append_code}" "${no_float_type}" "${complex_filename_scheme}")
|
||||
endforeach ()
|
||||
endforeach ()
|
||||
|
||||
set(OPENBLAS_SRC ${OPENBLAS_SRC} PARENT_SCOPE)
|
||||
endfunction ()
|
||||
|
||||
110
common.h
110
common.h
@@ -82,7 +82,10 @@ extern "C" {
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#if !defined(_MSC_VER)
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
#ifdef OS_LINUX
|
||||
#include <malloc.h>
|
||||
@@ -95,6 +98,10 @@ extern "C" {
|
||||
|
||||
#ifdef OS_ANDROID
|
||||
#define NO_SYSV_IPC
|
||||
//Android NDK only supports complex.h since Android 5.0
|
||||
#if __ANDROID_API__ < 21
|
||||
#define FORCE_OPENBLAS_COMPLEX_STRUCT
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef OS_WINDOWS
|
||||
@@ -114,6 +121,7 @@ extern "C" {
|
||||
#include <sys/shm.h>
|
||||
#endif
|
||||
#include <sys/time.h>
|
||||
#include <time.h>
|
||||
#include <unistd.h>
|
||||
#include <math.h>
|
||||
#ifdef SMP
|
||||
@@ -276,11 +284,6 @@ typedef int blasint;
|
||||
#define SIZE 8
|
||||
#define BASE_SHIFT 3
|
||||
#define ZBASE_SHIFT 4
|
||||
#elif defined(INTEGER) //extend for integer matrix
|
||||
#define FLOAT int
|
||||
#define SIZE 4
|
||||
#define BASE_SHIFT 2
|
||||
#define ZBASE_SHIFT 3
|
||||
#else
|
||||
#define FLOAT float
|
||||
#define SIZE 4
|
||||
@@ -298,13 +301,6 @@ typedef int blasint;
|
||||
#define COMPSIZE 2
|
||||
#endif
|
||||
|
||||
#if defined(C_PGI) || defined(C_SUN)
|
||||
#define CREAL(X) (*((FLOAT *)&X + 0))
|
||||
#define CIMAG(X) (*((FLOAT *)&X + 1))
|
||||
#else
|
||||
#define CREAL __real__
|
||||
#define CIMAG __imag__
|
||||
#endif
|
||||
|
||||
#define Address_H(x) (((x)+(1<<15))>>16)
|
||||
#define Address_L(x) ((x)-((Address_H(x))<<16))
|
||||
@@ -318,8 +314,12 @@ typedef int blasint;
|
||||
#endif
|
||||
|
||||
#if defined(OS_WINDOWS)
|
||||
#if defined(_MSC_VER) && !defined(__clang__)
|
||||
#define YIELDING YieldProcessor()
|
||||
#else
|
||||
#define YIELDING SwitchToThread()
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(ARMV7) || defined(ARMV6) || defined(ARMV8) || defined(ARMV5)
|
||||
#define YIELDING asm volatile ("nop;nop;nop;nop;nop;nop;nop;nop; \n");
|
||||
@@ -415,7 +415,51 @@ typedef char env_var_t[MAX_PATH];
|
||||
typedef char* env_var_t;
|
||||
#define readenv(p, n) ((p)=getenv(n))
|
||||
#endif
|
||||
|
||||
#if !defined(RPCC_DEFINED) && !defined(OS_WINDOWS)
|
||||
#ifdef _POSIX_MONOTONIC_CLOCK
|
||||
#if defined(__GLIBC_PREREQ) // cut the if condition if two lines, otherwise will fail at __GLIBC_PREREQ(2, 17)
|
||||
#if __GLIBC_PREREQ(2, 17) // don't require -lrt
|
||||
#define USE_MONOTONIC
|
||||
#endif
|
||||
#elif defined(OS_ANDROID)
|
||||
#define USE_MONOTONIC
|
||||
#endif
|
||||
#endif
|
||||
/* use similar scale as x86 rdtsc for timeouts to work correctly */
|
||||
static inline unsigned long long rpcc(void){
|
||||
#ifdef USE_MONOTONIC
|
||||
struct timespec ts;
|
||||
clock_gettime(CLOCK_MONOTONIC, &ts);
|
||||
return (unsigned long long)ts.tv_sec * 1000000000ull + ts.tv_nsec;
|
||||
#else
|
||||
struct timeval tv;
|
||||
gettimeofday(&tv,NULL);
|
||||
return (unsigned long long)tv.tv_sec * 1000000000ull + tv.tv_usec * 1000;
|
||||
#endif
|
||||
}
|
||||
#define RPCC_DEFINED
|
||||
#define RPCC64BIT
|
||||
#endif // !RPCC_DEFINED
|
||||
|
||||
#if !defined(BLAS_LOCK_DEFINED) && defined(__GNUC__)
|
||||
static void __inline blas_lock(volatile BLASULONG *address){
|
||||
|
||||
do {
|
||||
while (*address) {YIELDING;};
|
||||
|
||||
} while (!__sync_bool_compare_and_swap(address, 0, 1));
|
||||
}
|
||||
#define BLAS_LOCK_DEFINED
|
||||
#endif
|
||||
|
||||
#ifndef RPCC_DEFINED
|
||||
#error "rpcc() implementation is missing for your platform"
|
||||
#endif
|
||||
#ifndef BLAS_LOCK_DEFINED
|
||||
#error "blas_lock() implementation is missing for your platform"
|
||||
#endif
|
||||
#endif // !ASSEMBLER
|
||||
|
||||
#ifdef OS_LINUX
|
||||
#include "common_linux.h"
|
||||
@@ -461,18 +505,52 @@ typedef char* env_var_t;
|
||||
/* C99 supports complex floating numbers natively, which GCC also offers as an
|
||||
extension since version 3.0. If neither are available, use a compatible
|
||||
structure as fallback (see Clause 6.2.5.13 of the C99 standard). */
|
||||
#if (defined(__STDC_IEC_559_COMPLEX__) || __STDC_VERSION__ >= 199901L || \
|
||||
(__GNUC__ >= 3 && !defined(__cplusplus)))
|
||||
#if ((defined(__STDC_IEC_559_COMPLEX__) || __STDC_VERSION__ >= 199901L || \
|
||||
(__GNUC__ >= 3 && !defined(__cplusplus))) && !(defined(FORCE_OPENBLAS_COMPLEX_STRUCT)))
|
||||
#define OPENBLAS_COMPLEX_C99
|
||||
#ifndef __cplusplus
|
||||
#include <complex.h>
|
||||
#endif
|
||||
typedef float _Complex openblas_complex_float;
|
||||
typedef double _Complex openblas_complex_double;
|
||||
typedef xdouble _Complex openblas_complex_xdouble;
|
||||
#define openblas_make_complex_float(real, imag) ((real) + ((imag) * _Complex_I))
|
||||
#define openblas_make_complex_double(real, imag) ((real) + ((imag) * _Complex_I))
|
||||
#define openblas_make_complex_xdouble(real, imag) ((real) + ((imag) * _Complex_I))
|
||||
#else
|
||||
#define OPENBLAS_COMPLEX_STRUCT
|
||||
typedef struct { float real, imag; } openblas_complex_float;
|
||||
typedef struct { double real, imag; } openblas_complex_double;
|
||||
typedef struct { xdouble real, imag; } openblas_complex_xdouble;
|
||||
#define openblas_make_complex_float(real, imag) {(real), (imag)}
|
||||
#define openblas_make_complex_double(real, imag) {(real), (imag)}
|
||||
#define openblas_make_complex_xdouble(real, imag) {(real), (imag)}
|
||||
#endif
|
||||
|
||||
#ifdef XDOUBLE
|
||||
#define OPENBLAS_COMPLEX_FLOAT openblas_complex_xdouble
|
||||
#define OPENBLAS_MAKE_COMPLEX_FLOAT(r,i) openblas_make_complex_xdouble(r,i)
|
||||
#elif defined(DOUBLE)
|
||||
#define OPENBLAS_COMPLEX_FLOAT openblas_complex_double
|
||||
#define OPENBLAS_MAKE_COMPLEX_FLOAT(r,i) openblas_make_complex_double(r,i)
|
||||
#else
|
||||
#define OPENBLAS_COMPLEX_FLOAT openblas_complex_float
|
||||
#define OPENBLAS_MAKE_COMPLEX_FLOAT(r,i) openblas_make_complex_float(r,i)
|
||||
#endif
|
||||
|
||||
#if defined(C_PGI) || defined(C_SUN)
|
||||
#define CREAL(X) (*((FLOAT *)&X + 0))
|
||||
#define CIMAG(X) (*((FLOAT *)&X + 1))
|
||||
#else
|
||||
#ifdef OPENBLAS_COMPLEX_STRUCT
|
||||
#define CREAL(Z) ((Z).real)
|
||||
#define CIMAG(Z) ((Z).imag)
|
||||
#else
|
||||
#define CREAL __real__
|
||||
#define CIMAG __imag__
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#endif // ASSEMBLER
|
||||
|
||||
#ifndef IFLUSH
|
||||
@@ -489,6 +567,10 @@ typedef char* env_var_t;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(C_MSVC)
|
||||
#define inline __inline
|
||||
#endif
|
||||
|
||||
#ifndef ASSEMBLER
|
||||
|
||||
#ifndef MIN
|
||||
|
||||
@@ -76,6 +76,7 @@ static void __inline blas_lock(unsigned long *address){
|
||||
"30:", address);
|
||||
#endif
|
||||
}
|
||||
#define BLAS_LOCK_DEFINED
|
||||
|
||||
static __inline unsigned int rpcc(void){
|
||||
|
||||
@@ -89,6 +90,7 @@ static __inline unsigned int rpcc(void){
|
||||
|
||||
return r0;
|
||||
}
|
||||
#define RPCC_DEFINED
|
||||
|
||||
|
||||
#define HALT ldq $0, 0($0)
|
||||
|
||||
43
common_arm.h
43
common_arm.h
@@ -51,6 +51,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
#ifndef ASSEMBLER
|
||||
|
||||
#if defined(ARMV6) || defined(ARMV7) || defined(ARMV8)
|
||||
|
||||
static void __inline blas_lock(volatile BLASULONG *address){
|
||||
|
||||
int register ret;
|
||||
@@ -59,40 +61,29 @@ static void __inline blas_lock(volatile BLASULONG *address){
|
||||
while (*address) {YIELDING;};
|
||||
|
||||
__asm__ __volatile__(
|
||||
"1: \n\t"
|
||||
"ldrex r2, [%1] \n\t"
|
||||
"mov r2, #0 \n\t"
|
||||
"strex r3, r2, [%1] \n\t"
|
||||
"cmp r3, #0 \n\t"
|
||||
"bne 1b \n\t"
|
||||
"mov %0 , r3 \n\t"
|
||||
: "=r"(ret), "=r"(address)
|
||||
: "1"(address)
|
||||
: "memory", "r2" , "r3"
|
||||
|
||||
|
||||
"ldrex r2, [%1] \n\t"
|
||||
"strex %0, %2, [%1] \n\t"
|
||||
"orr %0, r2 \n\t"
|
||||
: "=&r"(ret)
|
||||
: "r"(address), "r"(1)
|
||||
: "memory", "r2"
|
||||
);
|
||||
|
||||
} while (ret);
|
||||
|
||||
MB;
|
||||
}
|
||||
|
||||
|
||||
static inline unsigned long long rpcc(void){
|
||||
unsigned long long ret=0;
|
||||
double v;
|
||||
struct timeval tv;
|
||||
gettimeofday(&tv,NULL);
|
||||
v=(double) tv.tv_sec + (double) tv.tv_usec * 1e-6;
|
||||
ret = (unsigned long long) ( v * 1000.0d );
|
||||
return ret;
|
||||
}
|
||||
#define BLAS_LOCK_DEFINED
|
||||
#endif
|
||||
|
||||
static inline int blas_quickdivide(blasint x, blasint y){
|
||||
return x / y;
|
||||
}
|
||||
|
||||
#if defined(DOUBLE)
|
||||
#if !defined(HAVE_VFP)
|
||||
/* no FPU, soft float */
|
||||
#define GET_IMAGE(res)
|
||||
#elif defined(DOUBLE)
|
||||
#define GET_IMAGE(res) __asm__ __volatile__("vstr.f64 d1, %0" : "=m"(res) : : "memory")
|
||||
#else
|
||||
#define GET_IMAGE(res) __asm__ __volatile__("vstr.f32 s1, %0" : "=m"(res) : : "memory")
|
||||
@@ -140,4 +131,8 @@ REALNAME:
|
||||
#define MAP_ANONYMOUS MAP_ANON
|
||||
#endif
|
||||
|
||||
#if !defined(ARMV5) && !defined(ARMV6) && !defined(ARMV7) && !defined(ARMV8)
|
||||
#error "you must define ARMV5, ARMV6, ARMV7 or ARMV8"
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
@@ -45,42 +45,26 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
static void __inline blas_lock(volatile BLASULONG *address){
|
||||
|
||||
int register ret;
|
||||
int register tmp;
|
||||
long register ret;
|
||||
|
||||
do {
|
||||
while (*address) {YIELDING;};
|
||||
|
||||
__asm__ __volatile__(
|
||||
"1: \n\t"
|
||||
"ldaxr %2, [%1] \n\t"
|
||||
"mov %2, #0 \n\t"
|
||||
"stlxr %w0, %2, [%1] \n\t"
|
||||
"cbnz %w0, 1b \n\t"
|
||||
"mov %0 , #0 \n\t"
|
||||
: "=r"(ret), "=r"(address), "=r"(tmp)
|
||||
: "1"(address)
|
||||
: "memory", "%w0"
|
||||
//, "%r2" , "%r3"
|
||||
|
||||
|
||||
"ldaxr %0, [%1] \n\t"
|
||||
"stlxr w2, %2, [%1] \n\t"
|
||||
"orr %0, %0, x2 \n\t"
|
||||
: "=r"(ret)
|
||||
: "r"(address), "r"(1l)
|
||||
: "memory", "x2"
|
||||
);
|
||||
|
||||
} while (ret);
|
||||
|
||||
MB;
|
||||
}
|
||||
#define BLAS_LOCK_DEFINED
|
||||
|
||||
|
||||
static inline unsigned long long rpcc(void){
|
||||
unsigned long long ret=0;
|
||||
double v;
|
||||
struct timeval tv;
|
||||
gettimeofday(&tv,NULL);
|
||||
v=(double) tv.tv_sec + (double) tv.tv_usec * 1e-6;
|
||||
ret = (unsigned long long) ( v * 1000.0d );
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline int blas_quickdivide(blasint x, blasint y){
|
||||
return x / y;
|
||||
}
|
||||
|
||||
19
common_c.h
19
common_c.h
@@ -220,6 +220,15 @@
|
||||
#define COMATCOPY_K_CTC comatcopy_k_ctc
|
||||
#define COMATCOPY_K_RTC comatcopy_k_rtc
|
||||
|
||||
#define CIMATCOPY_K_CN cimatcopy_k_cn
|
||||
#define CIMATCOPY_K_RN cimatcopy_k_rn
|
||||
#define CIMATCOPY_K_CT cimatcopy_k_ct
|
||||
#define CIMATCOPY_K_RT cimatcopy_k_rt
|
||||
#define CIMATCOPY_K_CNC cimatcopy_k_cnc
|
||||
#define CIMATCOPY_K_RNC cimatcopy_k_rnc
|
||||
#define CIMATCOPY_K_CTC cimatcopy_k_ctc
|
||||
#define CIMATCOPY_K_RTC cimatcopy_k_rtc
|
||||
|
||||
#define CGEADD_K cgeadd_k
|
||||
|
||||
#else
|
||||
@@ -403,6 +412,16 @@
|
||||
#define COMATCOPY_K_RNC gotoblas -> comatcopy_k_rnc
|
||||
#define COMATCOPY_K_CTC gotoblas -> comatcopy_k_ctc
|
||||
#define COMATCOPY_K_RTC gotoblas -> comatcopy_k_rtc
|
||||
|
||||
#define CIMATCOPY_K_CN gotoblas -> cimatcopy_k_cn
|
||||
#define CIMATCOPY_K_RN gotoblas -> cimatcopy_k_rn
|
||||
#define CIMATCOPY_K_CT gotoblas -> cimatcopy_k_ct
|
||||
#define CIMATCOPY_K_RT gotoblas -> cimatcopy_k_rt
|
||||
#define CIMATCOPY_K_CNC gotoblas -> cimatcopy_k_cnc
|
||||
#define CIMATCOPY_K_RNC gotoblas -> cimatcopy_k_rnc
|
||||
#define CIMATCOPY_K_CTC gotoblas -> cimatcopy_k_ctc
|
||||
#define CIMATCOPY_K_RTC gotoblas -> cimatcopy_k_rtc
|
||||
|
||||
#define CGEADD_K gotoblas -> cgeadd_k
|
||||
|
||||
#endif
|
||||
|
||||
@@ -149,6 +149,11 @@
|
||||
#define DOMATCOPY_K_RN domatcopy_k_rn
|
||||
#define DOMATCOPY_K_CT domatcopy_k_ct
|
||||
#define DOMATCOPY_K_RT domatcopy_k_rt
|
||||
|
||||
#define DIMATCOPY_K_CN dimatcopy_k_cn
|
||||
#define DIMATCOPY_K_RN dimatcopy_k_rn
|
||||
#define DIMATCOPY_K_CT dimatcopy_k_ct
|
||||
#define DIMATCOPY_K_RT dimatcopy_k_rt
|
||||
#define DGEADD_K dgeadd_k
|
||||
|
||||
#else
|
||||
@@ -267,6 +272,10 @@
|
||||
#define DOMATCOPY_K_RN gotoblas -> domatcopy_k_rn
|
||||
#define DOMATCOPY_K_CT gotoblas -> domatcopy_k_ct
|
||||
#define DOMATCOPY_K_RT gotoblas -> domatcopy_k_rt
|
||||
#define DIMATCOPY_K_CN gotoblas -> dimatcopy_k_cn
|
||||
#define DIMATCOPY_K_RN gotoblas -> dimatcopy_k_rn
|
||||
#define DIMATCOPY_K_CT gotoblas -> dimatcopy_k_ct
|
||||
#define DIMATCOPY_K_RT gotoblas -> dimatcopy_k_rt
|
||||
|
||||
#define DGEADD_K gotoblas -> dgeadd_k
|
||||
|
||||
|
||||
@@ -1,9 +0,0 @@
|
||||
#ifndef COMMON_I_H
|
||||
#define COMMON_I_H
|
||||
|
||||
#ifndef DYNAMIC_ARCH
|
||||
#define IAXPYU_K iaxpy_k
|
||||
#else
|
||||
#error
|
||||
#endif
|
||||
#endif
|
||||
@@ -68,6 +68,7 @@ static __inline void blas_lock(volatile unsigned long *address){
|
||||
: "ar.ccv", "memory");
|
||||
} while (ret);
|
||||
}
|
||||
#define BLAS_LOCK_DEFINED
|
||||
|
||||
static __inline unsigned long rpcc(void) {
|
||||
unsigned long clocks;
|
||||
@@ -75,6 +76,7 @@ static __inline unsigned long rpcc(void) {
|
||||
__asm__ __volatile__ ("mov %0=ar.itc" : "=r"(clocks));
|
||||
return clocks;
|
||||
}
|
||||
#define RPCC_DEFINED
|
||||
|
||||
|
||||
static __inline unsigned long stmxcsr(void){
|
||||
@@ -99,10 +101,12 @@ static __inline void blas_lock(volatile unsigned long *address){
|
||||
while (*address || _InterlockedCompareExchange((volatile int *) address,1,0))
|
||||
;
|
||||
}
|
||||
#define BLAS_LOCK_DEFINED
|
||||
|
||||
static __inline unsigned int rpcc(void) {
|
||||
return __getReg(_IA64_REG_AR_ITC);
|
||||
}
|
||||
#define RPCC_DEFINED
|
||||
|
||||
static __inline unsigned int stmxcsr(void) {
|
||||
return __getReg(_IA64_REG_AR_FPSR);
|
||||
|
||||
@@ -93,7 +93,6 @@ openblas_complex_xdouble BLASFUNC(xdotc) (blasint *, xdouble *, blasint *, xdo
|
||||
|
||||
void BLASFUNC(saxpy) (blasint *, float *, float *, blasint *, float *, blasint *);
|
||||
void BLASFUNC(daxpy) (blasint *, double *, double *, blasint *, double *, blasint *);
|
||||
void BLASFUNC(iaxpy) (blasint *, int *, int *, blasint *, int *, blasint *);
|
||||
void BLASFUNC(qaxpy) (blasint *, xdouble *, xdouble *, blasint *, xdouble *, blasint *);
|
||||
void BLASFUNC(caxpy) (blasint *, float *, float *, blasint *, float *, blasint *);
|
||||
void BLASFUNC(zaxpy) (blasint *, double *, double *, blasint *, double *, blasint *);
|
||||
|
||||
@@ -47,12 +47,12 @@ double dsdot_k(BLASLONG, float *, BLASLONG, float *, BLASLONG);
|
||||
double ddot_k(BLASLONG, double *, BLASLONG, double *, BLASLONG);
|
||||
xdouble qdot_k(BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
|
||||
|
||||
float _Complex cdotc_k (BLASLONG, float *, BLASLONG, float *, BLASLONG);
|
||||
float _Complex cdotu_k (BLASLONG, float *, BLASLONG, float *, BLASLONG);
|
||||
double _Complex zdotc_k (BLASLONG, double *, BLASLONG, double *, BLASLONG);
|
||||
double _Complex zdotu_k (BLASLONG, double *, BLASLONG, double *, BLASLONG);
|
||||
xdouble _Complex xdotc_k (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
|
||||
xdouble _Complex xdotu_k (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
|
||||
openblas_complex_float cdotc_k (BLASLONG, float *, BLASLONG, float *, BLASLONG);
|
||||
openblas_complex_float cdotu_k (BLASLONG, float *, BLASLONG, float *, BLASLONG);
|
||||
openblas_complex_double zdotc_k (BLASLONG, double *, BLASLONG, double *, BLASLONG);
|
||||
openblas_complex_double zdotu_k (BLASLONG, double *, BLASLONG, double *, BLASLONG);
|
||||
openblas_complex_xdouble xdotc_k (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
|
||||
openblas_complex_xdouble xdotu_k (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
|
||||
|
||||
int saxpy_k (BLASLONG, BLASLONG, BLASLONG, float,
|
||||
float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
|
||||
@@ -60,8 +60,6 @@ int daxpy_k (BLASLONG, BLASLONG, BLASLONG, double,
|
||||
double *, BLASLONG, double *, BLASLONG, double *, BLASLONG);
|
||||
int qaxpy_k (BLASLONG, BLASLONG, BLASLONG, xdouble,
|
||||
xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
|
||||
int iaxpy_k (BLASLONG, BLASLONG, BLASLONG, int,
|
||||
int *, BLASLONG, int *, BLASLONG, int *, BLASLONG);
|
||||
int caxpy_k (BLASLONG, BLASLONG, BLASLONG, float, float,
|
||||
float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
|
||||
int zaxpy_k (BLASLONG, BLASLONG, BLASLONG, double, double,
|
||||
|
||||
@@ -1736,31 +1736,55 @@ int somatcopy_k_cn(BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLAS
|
||||
int somatcopy_k_rn(BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG);
|
||||
int somatcopy_k_ct(BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG);
|
||||
int somatcopy_k_rt(BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG);
|
||||
int simatcopy_k_cn(BLASLONG, BLASLONG, float, float *, BLASLONG);
|
||||
int simatcopy_k_rn(BLASLONG, BLASLONG, float, float *, BLASLONG);
|
||||
int simatcopy_k_ct(BLASLONG, BLASLONG, float, float *, BLASLONG);
|
||||
int simatcopy_k_rt(BLASLONG, BLASLONG, float, float *, BLASLONG);
|
||||
|
||||
int domatcopy_k_cn(BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG);
|
||||
int domatcopy_k_rn(BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG);
|
||||
int domatcopy_k_ct(BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG);
|
||||
int domatcopy_k_rt(BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG);
|
||||
int dimatcopy_k_cn(BLASLONG, BLASLONG, double, double *, BLASLONG);
|
||||
int dimatcopy_k_rn(BLASLONG, BLASLONG, double, double *, BLASLONG);
|
||||
int dimatcopy_k_ct(BLASLONG, BLASLONG, double, double *, BLASLONG);
|
||||
int dimatcopy_k_rt(BLASLONG, BLASLONG, double, double *, BLASLONG);
|
||||
|
||||
int comatcopy_k_cn(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG);
|
||||
int comatcopy_k_rn(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG);
|
||||
int comatcopy_k_ct(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG);
|
||||
int comatcopy_k_rt(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG);
|
||||
int cimatcopy_k_cn(BLASLONG, BLASLONG, float, float, float *, BLASLONG);
|
||||
int cimatcopy_k_rn(BLASLONG, BLASLONG, float, float, float *, BLASLONG);
|
||||
int cimatcopy_k_ct(BLASLONG, BLASLONG, float, float, float *, BLASLONG);
|
||||
int cimatcopy_k_rt(BLASLONG, BLASLONG, float, float, float *, BLASLONG);
|
||||
|
||||
int comatcopy_k_cnc(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG);
|
||||
int comatcopy_k_rnc(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG);
|
||||
int comatcopy_k_ctc(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG);
|
||||
int comatcopy_k_rtc(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG);
|
||||
int cimatcopy_k_cnc(BLASLONG, BLASLONG, float, float, float *, BLASLONG);
|
||||
int cimatcopy_k_rnc(BLASLONG, BLASLONG, float, float, float *, BLASLONG);
|
||||
int cimatcopy_k_ctc(BLASLONG, BLASLONG, float, float, float *, BLASLONG);
|
||||
int cimatcopy_k_rtc(BLASLONG, BLASLONG, float, float, float *, BLASLONG);
|
||||
|
||||
int zomatcopy_k_cn(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG);
|
||||
int zomatcopy_k_rn(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG);
|
||||
int zomatcopy_k_ct(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG);
|
||||
int zomatcopy_k_rt(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG);
|
||||
int zimatcopy_k_cn(BLASLONG, BLASLONG, double, double, double *, BLASLONG);
|
||||
int zimatcopy_k_rn(BLASLONG, BLASLONG, double, double, double *, BLASLONG);
|
||||
int zimatcopy_k_ct(BLASLONG, BLASLONG, double, double, double *, BLASLONG);
|
||||
int zimatcopy_k_rt(BLASLONG, BLASLONG, double, double, double *, BLASLONG);
|
||||
|
||||
int zomatcopy_k_cnc(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG);
|
||||
int zomatcopy_k_rnc(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG);
|
||||
int zomatcopy_k_ctc(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG);
|
||||
int zomatcopy_k_rtc(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG);
|
||||
int zimatcopy_k_cnc(BLASLONG, BLASLONG, double, double, double *, BLASLONG);
|
||||
int zimatcopy_k_rnc(BLASLONG, BLASLONG, double, double, double *, BLASLONG);
|
||||
int zimatcopy_k_ctc(BLASLONG, BLASLONG, double, double, double *, BLASLONG);
|
||||
int zimatcopy_k_rtc(BLASLONG, BLASLONG, double, double, double *, BLASLONG);
|
||||
|
||||
int sgeadd_k(BLASLONG, BLASLONG, float, float*, BLASLONG, float, float *, BLASLONG);
|
||||
int dgeadd_k(BLASLONG, BLASLONG, double, double*, BLASLONG, double, double *, BLASLONG);
|
||||
|
||||
@@ -47,10 +47,6 @@
|
||||
#include "common_z.h"
|
||||
#include "common_x.h"
|
||||
|
||||
#ifdef INTEGER_PRECISION
|
||||
#include "common_i.h"
|
||||
#endif
|
||||
|
||||
#ifndef COMPLEX
|
||||
#ifdef XDOUBLE
|
||||
|
||||
@@ -638,10 +634,12 @@
|
||||
#define OMATCOPY_K_RN DOMATCOPY_K_RN
|
||||
#define OMATCOPY_K_CT DOMATCOPY_K_CT
|
||||
#define OMATCOPY_K_RT DOMATCOPY_K_RT
|
||||
#define GEADD_K DGEADD_K
|
||||
#define IMATCOPY_K_CN DIMATCOPY_K_CN
|
||||
#define IMATCOPY_K_RN DIMATCOPY_K_RN
|
||||
#define IMATCOPY_K_CT DIMATCOPY_K_CT
|
||||
#define IMATCOPY_K_RT DIMATCOPY_K_RT
|
||||
|
||||
#elif defined(INTEGER)
|
||||
#define AXPYU_K IAXPYU_K
|
||||
#define GEADD_K DGEADD_K
|
||||
#else
|
||||
|
||||
#define AMAX_K SAMAX_K
|
||||
@@ -938,6 +936,10 @@
|
||||
#define OMATCOPY_K_RN SOMATCOPY_K_RN
|
||||
#define OMATCOPY_K_CT SOMATCOPY_K_CT
|
||||
#define OMATCOPY_K_RT SOMATCOPY_K_RT
|
||||
#define IMATCOPY_K_CN SIMATCOPY_K_CN
|
||||
#define IMATCOPY_K_RN SIMATCOPY_K_RN
|
||||
#define IMATCOPY_K_CT SIMATCOPY_K_CT
|
||||
#define IMATCOPY_K_RT SIMATCOPY_K_RT
|
||||
|
||||
#define GEADD_K SGEADD_K
|
||||
#endif
|
||||
@@ -1754,6 +1756,15 @@
|
||||
#define OMATCOPY_K_RNC ZOMATCOPY_K_RNC
|
||||
#define OMATCOPY_K_CTC ZOMATCOPY_K_CTC
|
||||
#define OMATCOPY_K_RTC ZOMATCOPY_K_RTC
|
||||
#define IMATCOPY_K_CN ZIMATCOPY_K_CN
|
||||
#define IMATCOPY_K_RN ZIMATCOPY_K_RN
|
||||
#define IMATCOPY_K_CT ZIMATCOPY_K_CT
|
||||
#define IMATCOPY_K_RT ZIMATCOPY_K_RT
|
||||
#define IMATCOPY_K_CNC ZIMATCOPY_K_CNC
|
||||
#define IMATCOPY_K_RNC ZIMATCOPY_K_RNC
|
||||
#define IMATCOPY_K_CTC ZIMATCOPY_K_CTC
|
||||
#define IMATCOPY_K_RTC ZIMATCOPY_K_RTC
|
||||
|
||||
#define GEADD_K ZGEADD_K
|
||||
|
||||
#else
|
||||
@@ -2167,6 +2178,14 @@
|
||||
#define OMATCOPY_K_RNC COMATCOPY_K_RNC
|
||||
#define OMATCOPY_K_CTC COMATCOPY_K_CTC
|
||||
#define OMATCOPY_K_RTC COMATCOPY_K_RTC
|
||||
#define IMATCOPY_K_CN CIMATCOPY_K_CN
|
||||
#define IMATCOPY_K_RN CIMATCOPY_K_RN
|
||||
#define IMATCOPY_K_CT CIMATCOPY_K_CT
|
||||
#define IMATCOPY_K_RT CIMATCOPY_K_RT
|
||||
#define IMATCOPY_K_CNC CIMATCOPY_K_CNC
|
||||
#define IMATCOPY_K_RNC CIMATCOPY_K_RNC
|
||||
#define IMATCOPY_K_CTC CIMATCOPY_K_CTC
|
||||
#define IMATCOPY_K_RTC CIMATCOPY_K_RTC
|
||||
|
||||
#define GEADD_K CGEADD_K
|
||||
|
||||
|
||||
@@ -98,6 +98,7 @@ static void INLINE blas_lock(volatile unsigned long *address){
|
||||
|
||||
} while (ret);
|
||||
}
|
||||
#define BLAS_LOCK_DEFINED
|
||||
|
||||
static inline unsigned int rpcc(void){
|
||||
unsigned long ret;
|
||||
@@ -118,6 +119,7 @@ static inline unsigned int rpcc(void){
|
||||
#endif
|
||||
return ret;
|
||||
}
|
||||
#define RPCC_DEFINED
|
||||
|
||||
#if defined(LOONGSON3A) || defined(LOONGSON3B)
|
||||
#ifndef NO_AFFINITY
|
||||
|
||||
@@ -855,6 +855,36 @@ BLASLONG (*ixamin_k)(BLASLONG, xdouble *, BLASLONG);
|
||||
int (*zomatcopy_k_rnc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG);
|
||||
int (*zomatcopy_k_rtc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG);
|
||||
|
||||
int (*simatcopy_k_cn) (BLASLONG, BLASLONG, float, float*, BLASLONG);
|
||||
int (*simatcopy_k_ct) (BLASLONG, BLASLONG, float, float*, BLASLONG);
|
||||
int (*simatcopy_k_rn) (BLASLONG, BLASLONG, float, float*, BLASLONG);
|
||||
int (*simatcopy_k_rt) (BLASLONG, BLASLONG, float, float*, BLASLONG);
|
||||
|
||||
int (*dimatcopy_k_cn) (BLASLONG, BLASLONG, double, double*, BLASLONG);
|
||||
int (*dimatcopy_k_ct) (BLASLONG, BLASLONG, double, double*, BLASLONG);
|
||||
int (*dimatcopy_k_rn) (BLASLONG, BLASLONG, double, double*, BLASLONG);
|
||||
int (*dimatcopy_k_rt) (BLASLONG, BLASLONG, double, double*, BLASLONG);
|
||||
|
||||
int (*cimatcopy_k_cn) (BLASLONG, BLASLONG, float, float, float*, BLASLONG);
|
||||
int (*cimatcopy_k_ct) (BLASLONG, BLASLONG, float, float, float*, BLASLONG);
|
||||
int (*cimatcopy_k_rn) (BLASLONG, BLASLONG, float, float, float*, BLASLONG);
|
||||
int (*cimatcopy_k_rt) (BLASLONG, BLASLONG, float, float, float*, BLASLONG);
|
||||
|
||||
int (*cimatcopy_k_cnc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG);
|
||||
int (*cimatcopy_k_ctc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG);
|
||||
int (*cimatcopy_k_rnc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG);
|
||||
int (*cimatcopy_k_rtc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG);
|
||||
|
||||
int (*zimatcopy_k_cn) (BLASLONG, BLASLONG, double, double, double*, BLASLONG);
|
||||
int (*zimatcopy_k_ct) (BLASLONG, BLASLONG, double, double, double*, BLASLONG);
|
||||
int (*zimatcopy_k_rn) (BLASLONG, BLASLONG, double, double, double*, BLASLONG);
|
||||
int (*zimatcopy_k_rt) (BLASLONG, BLASLONG, double, double, double*, BLASLONG);
|
||||
|
||||
int (*zimatcopy_k_cnc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG);
|
||||
int (*zimatcopy_k_ctc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG);
|
||||
int (*zimatcopy_k_rnc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG);
|
||||
int (*zimatcopy_k_rtc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG);
|
||||
|
||||
int (*sgeadd_k) (BLASLONG, BLASLONG, float, float *, BLASLONG, float, float *, BLASLONG);
|
||||
int (*dgeadd_k) (BLASLONG, BLASLONG, double, double *, BLASLONG, double, double *, BLASLONG);
|
||||
int (*cgeadd_k) (BLASLONG, BLASLONG, float, float, float *, BLASLONG, float, float, float *, BLASLONG);
|
||||
|
||||
@@ -87,6 +87,7 @@ static void INLINE blas_lock(volatile unsigned long *address){
|
||||
#endif
|
||||
} while (ret);
|
||||
}
|
||||
#define BLAS_LOCK_DEFINED
|
||||
|
||||
static inline unsigned long rpcc(void){
|
||||
unsigned long ret;
|
||||
@@ -103,6 +104,7 @@ static inline unsigned long rpcc(void){
|
||||
#endif
|
||||
|
||||
}
|
||||
#define RPCC_DEFINED
|
||||
|
||||
#ifdef __64BIT__
|
||||
#define RPCC64BIT
|
||||
@@ -495,6 +497,15 @@ static inline int blas_quickdivide(blasint x, blasint y){
|
||||
REALNAME:
|
||||
#define EPILOGUE .size REALNAME, .-REALNAME
|
||||
#else
|
||||
#if _CALL_ELF == 2
|
||||
#define PROLOGUE \
|
||||
.section .text;\
|
||||
.align 6;\
|
||||
.globl REALNAME;\
|
||||
.type REALNAME, @function;\
|
||||
REALNAME:
|
||||
#define EPILOGUE .size REALNAME, .-REALNAME
|
||||
#else
|
||||
#define PROLOGUE \
|
||||
.section .text;\
|
||||
.align 5;\
|
||||
@@ -514,6 +525,7 @@ REALNAME:;\
|
||||
.size .REALNAME, .-.REALNAME; \
|
||||
.section .note.GNU-stack,"",@progbits
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef PROFILE
|
||||
#ifndef __64BIT__
|
||||
@@ -792,4 +804,25 @@ Lmcount$lazy_ptr:
|
||||
#ifndef MAP_ANONYMOUS
|
||||
#define MAP_ANONYMOUS MAP_ANON
|
||||
#endif
|
||||
|
||||
#ifdef OS_LINUX
|
||||
#ifndef __64BIT__
|
||||
#define FRAMESLOT(X) (((X) * 4) + 8)
|
||||
#else
|
||||
#if _CALL_ELF == 2
|
||||
#define FRAMESLOT(X) (((X) * 8) + 96)
|
||||
#else
|
||||
#define FRAMESLOT(X) (((X) * 8) + 112)
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(OS_AIX) || defined(OS_DARWIN)
|
||||
#ifndef __64BIT__
|
||||
#define FRAMESLOT(X) (((X) * 4) + 56)
|
||||
#else
|
||||
#define FRAMESLOT(X) (((X) * 8) + 112)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
@@ -152,6 +152,10 @@
|
||||
#define SOMATCOPY_K_RN somatcopy_k_rn
|
||||
#define SOMATCOPY_K_CT somatcopy_k_ct
|
||||
#define SOMATCOPY_K_RT somatcopy_k_rt
|
||||
#define SIMATCOPY_K_CN simatcopy_k_cn
|
||||
#define SIMATCOPY_K_RN simatcopy_k_rn
|
||||
#define SIMATCOPY_K_CT simatcopy_k_ct
|
||||
#define SIMATCOPY_K_RT simatcopy_k_rt
|
||||
|
||||
#define SGEADD_K sgeadd_k
|
||||
|
||||
@@ -274,6 +278,10 @@
|
||||
#define SOMATCOPY_K_RN gotoblas -> somatcopy_k_rn
|
||||
#define SOMATCOPY_K_CT gotoblas -> somatcopy_k_ct
|
||||
#define SOMATCOPY_K_RT gotoblas -> somatcopy_k_rt
|
||||
#define SIMATCOPY_K_CN gotoblas -> simatcopy_k_cn
|
||||
#define SIMATCOPY_K_RN gotoblas -> simatcopy_k_rn
|
||||
#define SIMATCOPY_K_CT gotoblas -> simatcopy_k_ct
|
||||
#define SIMATCOPY_K_RT gotoblas -> simatcopy_k_rt
|
||||
|
||||
#define SGEADD_K gotoblas -> sgeadd_k
|
||||
|
||||
|
||||
@@ -58,6 +58,7 @@ static void __inline blas_lock(volatile unsigned long *address){
|
||||
: "memory");
|
||||
} while (ret);
|
||||
}
|
||||
#define BLAS_LOCK_DEFINED
|
||||
|
||||
static __inline unsigned long rpcc(void){
|
||||
unsigned long clocks;
|
||||
@@ -66,6 +67,7 @@ static __inline unsigned long rpcc(void){
|
||||
|
||||
return clocks;
|
||||
};
|
||||
#define RPCC_DEFINED
|
||||
|
||||
#ifdef __64BIT__
|
||||
#define RPCC64BIT
|
||||
|
||||
@@ -65,7 +65,6 @@ extern int blas_omp_linked;
|
||||
#define BLAS_XDOUBLE 0x0002U
|
||||
#define BLAS_REAL 0x0000U
|
||||
#define BLAS_COMPLEX 0x0004U
|
||||
#define BLAS_INTEGER 0x0008U
|
||||
|
||||
#define BLAS_TRANSA 0x0030U /* 2bit */
|
||||
#define BLAS_TRANSA_N 0x0000U
|
||||
|
||||
37
common_x86.h
37
common_x86.h
@@ -56,41 +56,67 @@ static void __inline blas_lock(volatile BLASULONG *address){
|
||||
do {
|
||||
while (*address) {YIELDING;};
|
||||
|
||||
#if defined(_MSC_VER) && !defined(__clang__)
|
||||
// use intrinsic instead of inline assembly
|
||||
ret = _InterlockedExchange(address, 1);
|
||||
// inline assembly
|
||||
/*__asm {
|
||||
mov eax, address
|
||||
mov ebx, 1
|
||||
xchg [eax], ebx
|
||||
mov ret, ebx
|
||||
}*/
|
||||
#else
|
||||
__asm__ __volatile__(
|
||||
"xchgl %0, %1\n"
|
||||
: "=r"(ret), "=m"(*address)
|
||||
: "0"(1), "m"(*address)
|
||||
: "memory");
|
||||
#endif
|
||||
|
||||
} while (ret);
|
||||
|
||||
}
|
||||
#define BLAS_LOCK_DEFINED
|
||||
|
||||
static __inline unsigned long long rpcc(void){
|
||||
#if defined(_MSC_VER) && !defined(__clang__)
|
||||
return __rdtsc(); // use MSVC intrinsic
|
||||
#else
|
||||
unsigned int a, d;
|
||||
|
||||
__asm__ __volatile__ ("rdtsc" : "=a" (a), "=d" (d));
|
||||
|
||||
return ((unsigned long long)a + ((unsigned long long)d << 32));
|
||||
#endif
|
||||
};
|
||||
#define RPCC_DEFINED
|
||||
|
||||
static __inline unsigned long getstackaddr(void){
|
||||
#if defined(_MSC_VER) && !defined(__clang__)
|
||||
return (unsigned long)_ReturnAddress(); // use MSVC intrinsic
|
||||
#else
|
||||
unsigned long addr;
|
||||
|
||||
__asm__ __volatile__ ("mov %%esp, %0"
|
||||
: "=r"(addr) : : "memory");
|
||||
|
||||
return addr;
|
||||
#endif
|
||||
};
|
||||
|
||||
|
||||
static __inline long double sqrt_long(long double val) {
|
||||
#if defined(_MSC_VER) && !defined(__clang__)
|
||||
return sqrt(val); // not sure if this will use fsqrt
|
||||
#else
|
||||
long double result;
|
||||
|
||||
__asm__ __volatile__ ("fldt %1\n"
|
||||
"fsqrt\n"
|
||||
"fstpt %0\n" : "=m" (result) : "m"(val));
|
||||
return result;
|
||||
#endif
|
||||
}
|
||||
|
||||
#define SQRT(a) sqrt_long(a)
|
||||
@@ -100,7 +126,7 @@ void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx);
|
||||
|
||||
#define WHEREAMI
|
||||
|
||||
static inline int WhereAmI(void){
|
||||
static __inline int WhereAmI(void){
|
||||
int eax, ebx, ecx, edx;
|
||||
int apicid;
|
||||
|
||||
@@ -146,9 +172,14 @@ static __inline int blas_quickdivide(unsigned int x, unsigned int y){
|
||||
|
||||
y = blas_quick_divide_table[y];
|
||||
|
||||
#if defined(_MSC_VER) && !defined(__clang__)
|
||||
(void*)result;
|
||||
return x*y;
|
||||
#else
|
||||
__asm__ __volatile__ ("mull %0" :"=d" (result) :"a"(x), "0" (y));
|
||||
|
||||
return result;
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -284,8 +315,12 @@ REALNAME:
|
||||
|
||||
#define PROFCODE
|
||||
|
||||
#ifdef __clang__
|
||||
#define EPILOGUE .end
|
||||
#else
|
||||
#define EPILOGUE .end REALNAME
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(OS_LINUX) || defined(OS_FREEBSD) || defined(OS_NETBSD) || defined(__ELF__)
|
||||
#define PROLOGUE \
|
||||
|
||||
@@ -41,6 +41,10 @@
|
||||
|
||||
#ifndef ASSEMBLER
|
||||
|
||||
#ifdef C_MSVC
|
||||
#include <intrin.h>
|
||||
#endif
|
||||
|
||||
#ifdef C_SUN
|
||||
#define __asm__ __asm
|
||||
#define __volatile__
|
||||
@@ -61,30 +65,45 @@
|
||||
|
||||
static void __inline blas_lock(volatile BLASULONG *address){
|
||||
|
||||
#ifndef C_MSVC
|
||||
int ret;
|
||||
#else
|
||||
BLASULONG ret;
|
||||
#endif
|
||||
|
||||
do {
|
||||
while (*address) {YIELDING;};
|
||||
|
||||
#ifndef C_MSVC
|
||||
__asm__ __volatile__(
|
||||
"xchgl %0, %1\n"
|
||||
: "=r"(ret), "=m"(*address)
|
||||
: "0"(1), "m"(*address)
|
||||
: "memory");
|
||||
|
||||
#else
|
||||
ret=InterlockedExchange64((volatile LONG64 *)(address), 1);
|
||||
#endif
|
||||
} while (ret);
|
||||
|
||||
}
|
||||
#define BLAS_LOCK_DEFINED
|
||||
|
||||
static __inline BLASULONG rpcc(void){
|
||||
#ifdef C_MSVC
|
||||
return __rdtsc();
|
||||
#else
|
||||
BLASULONG a, d;
|
||||
|
||||
__asm__ __volatile__ ("rdtsc" : "=a" (a), "=d" (d));
|
||||
|
||||
return ((BLASULONG)a + ((BLASULONG)d << 32));
|
||||
#endif
|
||||
}
|
||||
#define RPCC_DEFINED
|
||||
|
||||
#define RPCC64BIT
|
||||
|
||||
#ifndef C_MSVC
|
||||
static __inline BLASULONG getstackaddr(void){
|
||||
BLASULONG addr;
|
||||
|
||||
@@ -93,22 +112,32 @@ static __inline BLASULONG getstackaddr(void){
|
||||
|
||||
return addr;
|
||||
}
|
||||
#endif
|
||||
|
||||
static __inline void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx){
|
||||
|
||||
#ifdef C_MSVC
|
||||
int cpuinfo[4];
|
||||
__cpuid(cpuinfo, op);
|
||||
*eax=cpuinfo[0];
|
||||
*ebx=cpuinfo[1];
|
||||
*ecx=cpuinfo[2];
|
||||
*edx=cpuinfo[3];
|
||||
#else
|
||||
__asm__ __volatile__("cpuid"
|
||||
: "=a" (*eax),
|
||||
"=b" (*ebx),
|
||||
"=c" (*ecx),
|
||||
"=d" (*edx)
|
||||
: "0" (op));
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
#define WHEREAMI
|
||||
*/
|
||||
|
||||
static inline int WhereAmI(void){
|
||||
static __inline int WhereAmI(void){
|
||||
int eax, ebx, ecx, edx;
|
||||
int apicid;
|
||||
|
||||
@@ -150,10 +179,14 @@ static inline int WhereAmI(void){
|
||||
#define GET_IMAGE_CANCEL
|
||||
|
||||
#ifdef SMP
|
||||
#ifdef USE64BITINT
|
||||
#if defined(USE64BITINT)
|
||||
static __inline blasint blas_quickdivide(blasint x, blasint y){
|
||||
return x / y;
|
||||
}
|
||||
#elif defined (C_MSVC)
|
||||
static __inline BLASLONG blas_quickdivide(BLASLONG x, BLASLONG y){
|
||||
return x / y;
|
||||
}
|
||||
#else
|
||||
extern unsigned int blas_quick_divide_table[];
|
||||
|
||||
|
||||
18
common_z.h
18
common_z.h
@@ -220,6 +220,15 @@
|
||||
#define ZOMATCOPY_K_CTC zomatcopy_k_ctc
|
||||
#define ZOMATCOPY_K_RTC zomatcopy_k_rtc
|
||||
|
||||
#define ZIMATCOPY_K_CN zimatcopy_k_cn
|
||||
#define ZIMATCOPY_K_RN zimatcopy_k_rn
|
||||
#define ZIMATCOPY_K_CT zimatcopy_k_ct
|
||||
#define ZIMATCOPY_K_RT zimatcopy_k_rt
|
||||
#define ZIMATCOPY_K_CNC zimatcopy_k_cnc
|
||||
#define ZIMATCOPY_K_RNC zimatcopy_k_rnc
|
||||
#define ZIMATCOPY_K_CTC zimatcopy_k_ctc
|
||||
#define ZIMATCOPY_K_RTC zimatcopy_k_rtc
|
||||
|
||||
#define ZGEADD_K zgeadd_k
|
||||
|
||||
#else
|
||||
@@ -404,6 +413,15 @@
|
||||
#define ZOMATCOPY_K_CTC gotoblas -> zomatcopy_k_ctc
|
||||
#define ZOMATCOPY_K_RTC gotoblas -> zomatcopy_k_rtc
|
||||
|
||||
#define ZIMATCOPY_K_CN gotoblas -> zimatcopy_k_cn
|
||||
#define ZIMATCOPY_K_RN gotoblas -> zimatcopy_k_rn
|
||||
#define ZIMATCOPY_K_CT gotoblas -> zimatcopy_k_ct
|
||||
#define ZIMATCOPY_K_RT gotoblas -> zimatcopy_k_rt
|
||||
#define ZIMATCOPY_K_CNC gotoblas -> zimatcopy_k_cnc
|
||||
#define ZIMATCOPY_K_RNC gotoblas -> zimatcopy_k_rnc
|
||||
#define ZIMATCOPY_K_CTC gotoblas -> zimatcopy_k_ctc
|
||||
#define ZIMATCOPY_K_RTC gotoblas -> zimatcopy_k_rtc
|
||||
|
||||
#define ZGEADD_K gotoblas -> zgeadd_k
|
||||
|
||||
#endif
|
||||
|
||||
6
cpuid.h
6
cpuid.h
@@ -39,6 +39,10 @@
|
||||
#ifndef CPUID_H
|
||||
#define CPUID_H
|
||||
|
||||
#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64)
|
||||
#define INTEL_AMD
|
||||
#endif
|
||||
|
||||
#define VENDOR_INTEL 1
|
||||
#define VENDOR_UMC 2
|
||||
#define VENDOR_AMD 3
|
||||
@@ -59,7 +63,7 @@
|
||||
#define FAMILY_PM 7
|
||||
#define FAMILY_IA64 8
|
||||
|
||||
#if defined(__i386__) || defined(__x86_64__)
|
||||
#ifdef INTEL_AMD
|
||||
#define GET_EXFAMILY 1
|
||||
#define GET_EXMODEL 2
|
||||
#define GET_TYPE 3
|
||||
|
||||
@@ -192,6 +192,7 @@ void get_cpuconfig(void)
|
||||
{
|
||||
case CPU_CORTEXA9:
|
||||
printf("#define CORTEXA9\n");
|
||||
printf("#define ARMV7\n");
|
||||
printf("#define HAVE_VFP\n");
|
||||
printf("#define HAVE_VFPV3\n");
|
||||
if ( get_feature("neon")) printf("#define HAVE_NEON\n");
|
||||
@@ -207,6 +208,7 @@ void get_cpuconfig(void)
|
||||
|
||||
case CPU_CORTEXA15:
|
||||
printf("#define CORTEXA15\n");
|
||||
printf("#define ARMV7\n");
|
||||
printf("#define HAVE_VFP\n");
|
||||
printf("#define HAVE_VFPV3\n");
|
||||
if ( get_feature("neon")) printf("#define HAVE_NEON\n");
|
||||
|
||||
@@ -115,6 +115,7 @@ int detect(void){
|
||||
if (!strncasecmp(p, "POWER5", 6)) return CPUTYPE_POWER5;
|
||||
if (!strncasecmp(p, "POWER6", 6)) return CPUTYPE_POWER6;
|
||||
if (!strncasecmp(p, "POWER7", 6)) return CPUTYPE_POWER6;
|
||||
if (!strncasecmp(p, "POWER8", 6)) return CPUTYPE_POWER6;
|
||||
if (!strncasecmp(p, "Cell", 4)) return CPUTYPE_CELL;
|
||||
if (!strncasecmp(p, "7447", 4)) return CPUTYPE_PPCG4;
|
||||
|
||||
|
||||
76
cpuid_x86.c
76
cpuid_x86.c
@@ -40,6 +40,12 @@
|
||||
#include <string.h>
|
||||
#include "cpuid.h"
|
||||
|
||||
#if defined(_MSC_VER) && !defined(__clang__)
|
||||
#define C_INLINE __inline
|
||||
#else
|
||||
#define C_INLINE inline
|
||||
#endif
|
||||
|
||||
/*
|
||||
#ifdef NO_AVX
|
||||
#define CPUTYPE_HASWELL CPUTYPE_NEHALEM
|
||||
@@ -53,12 +59,26 @@
|
||||
#endif
|
||||
*/
|
||||
|
||||
#if defined(_MSC_VER) && !defined(__clang__)
|
||||
|
||||
void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx)
|
||||
{
|
||||
int cpuInfo[4] = {-1};
|
||||
__cpuid(cpuInfo, op);
|
||||
*eax = cpuInfo[0];
|
||||
*ebx = cpuInfo[1];
|
||||
*ecx = cpuInfo[2];
|
||||
*edx = cpuInfo[3];
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
#ifndef CPUIDEMU
|
||||
|
||||
#if defined(__APPLE__) && defined(__i386__)
|
||||
void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx);
|
||||
#else
|
||||
static inline void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx){
|
||||
static C_INLINE void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx){
|
||||
#if defined(__i386__) && defined(__PIC__)
|
||||
__asm__ __volatile__
|
||||
("mov %%ebx, %%edi;"
|
||||
@@ -115,14 +135,16 @@ void cpuid(unsigned int op, unsigned int *eax, unsigned int *ebx, unsigned int *
|
||||
|
||||
#endif
|
||||
|
||||
static inline int have_cpuid(void){
|
||||
#endif // _MSC_VER
|
||||
|
||||
static C_INLINE int have_cpuid(void){
|
||||
int eax, ebx, ecx, edx;
|
||||
|
||||
cpuid(0, &eax, &ebx, &ecx, &edx);
|
||||
return eax;
|
||||
}
|
||||
|
||||
static inline int have_excpuid(void){
|
||||
static C_INLINE int have_excpuid(void){
|
||||
int eax, ebx, ecx, edx;
|
||||
|
||||
cpuid(0x80000000, &eax, &ebx, &ecx, &edx);
|
||||
@@ -130,10 +152,14 @@ static inline int have_excpuid(void){
|
||||
}
|
||||
|
||||
#ifndef NO_AVX
|
||||
static inline void xgetbv(int op, int * eax, int * edx){
|
||||
static C_INLINE void xgetbv(int op, int * eax, int * edx){
|
||||
//Use binary code for xgetbv
|
||||
#if defined(_MSC_VER) && !defined(__clang__)
|
||||
*eax = __xgetbv(op);
|
||||
#else
|
||||
__asm__ __volatile__
|
||||
(".byte 0x0f, 0x01, 0xd0": "=a" (*eax), "=d" (*edx) : "c" (op) : "cc");
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -1133,6 +1159,16 @@ int get_cpuname(void){
|
||||
return CPUTYPE_HASWELL;
|
||||
#else
|
||||
return CPUTYPE_SANDYBRIDGE;
|
||||
#endif
|
||||
else
|
||||
return CPUTYPE_NEHALEM;
|
||||
case 14:
|
||||
//Skylake
|
||||
if(support_avx())
|
||||
#ifndef NO_AVX2
|
||||
return CPUTYPE_HASWELL;
|
||||
#else
|
||||
return CPUTYPE_SANDYBRIDGE;
|
||||
#endif
|
||||
else
|
||||
return CPUTYPE_NEHALEM;
|
||||
@@ -1147,6 +1183,17 @@ int get_cpuname(void){
|
||||
return CPUTYPE_HASWELL;
|
||||
#else
|
||||
return CPUTYPE_SANDYBRIDGE;
|
||||
#endif
|
||||
else
|
||||
return CPUTYPE_NEHALEM;
|
||||
case 5:
|
||||
case 14:
|
||||
// Skylake
|
||||
if(support_avx())
|
||||
#ifndef NO_AVX2
|
||||
return CPUTYPE_HASWELL;
|
||||
#else
|
||||
return CPUTYPE_SANDYBRIDGE;
|
||||
#endif
|
||||
else
|
||||
return CPUTYPE_NEHALEM;
|
||||
@@ -1608,6 +1655,16 @@ int get_coretype(void){
|
||||
return CORE_HASWELL;
|
||||
#else
|
||||
return CORE_SANDYBRIDGE;
|
||||
#endif
|
||||
else
|
||||
return CORE_NEHALEM;
|
||||
case 14:
|
||||
//Skylake
|
||||
if(support_avx())
|
||||
#ifndef NO_AVX2
|
||||
return CORE_HASWELL;
|
||||
#else
|
||||
return CORE_SANDYBRIDGE;
|
||||
#endif
|
||||
else
|
||||
return CORE_NEHALEM;
|
||||
@@ -1622,6 +1679,17 @@ int get_coretype(void){
|
||||
return CORE_HASWELL;
|
||||
#else
|
||||
return CORE_SANDYBRIDGE;
|
||||
#endif
|
||||
else
|
||||
return CORE_NEHALEM;
|
||||
case 5:
|
||||
case 14:
|
||||
// Skylake
|
||||
if(support_avx())
|
||||
#ifndef NO_AVX2
|
||||
return CORE_HASWELL;
|
||||
#else
|
||||
return CORE_SANDYBRIDGE;
|
||||
#endif
|
||||
else
|
||||
return CORE_NEHALEM;
|
||||
|
||||
46
ctest/CMakeLists.txt
Normal file
46
ctest/CMakeLists.txt
Normal file
@@ -0,0 +1,46 @@
|
||||
include_directories(${CMAKE_SOURCE_DIR})
|
||||
|
||||
enable_language(Fortran)
|
||||
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DADD${BU} -DCBLAS")
|
||||
|
||||
FILE(WRITE ${CMAKE_CURRENT_BINARY_DIR}/test_cblas_helper.sh
|
||||
"$1 < $2\n"
|
||||
)
|
||||
|
||||
foreach(float_type ${FLOAT_TYPES})
|
||||
string(SUBSTRING ${float_type} 0 1 float_char_upper)
|
||||
string(TOLOWER ${float_char_upper} float_char)
|
||||
#level1
|
||||
add_executable(x${float_char}cblat1
|
||||
c_${float_char}blat1.f
|
||||
c_${float_char}blas1.c)
|
||||
target_link_libraries(x${float_char}cblat1 ${OpenBLAS_LIBNAME}_static)
|
||||
add_test(NAME "x${float_char}cblat1"
|
||||
COMMAND "${CMAKE_CURRENT_BINARY_DIR}/x${float_char}cblat1")
|
||||
|
||||
#level2
|
||||
add_executable(x${float_char}cblat2
|
||||
c_${float_char}blat2.f
|
||||
c_${float_char}blas2.c
|
||||
c_${float_char}2chke.c
|
||||
auxiliary.c
|
||||
c_xerbla.c
|
||||
constant.c)
|
||||
target_link_libraries(x${float_char}cblat2 ${OpenBLAS_LIBNAME}_static)
|
||||
add_test(NAME "x${float_char}cblat2"
|
||||
COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_cblas_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/x${float_char}cblat2" "${PROJECT_SOURCE_DIR}/ctest/${float_char}in2")
|
||||
|
||||
#level3
|
||||
add_executable(x${float_char}cblat3
|
||||
c_${float_char}blat3.f
|
||||
c_${float_char}blas3.c
|
||||
c_${float_char}3chke.c
|
||||
auxiliary.c
|
||||
c_xerbla.c
|
||||
constant.c)
|
||||
target_link_libraries(x${float_char}cblat3 ${OpenBLAS_LIBNAME}_static)
|
||||
add_test(NAME "x${float_char}cblat3"
|
||||
COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_cblas_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/x${float_char}cblat3" "${PROJECT_SOURCE_DIR}/ctest/${float_char}in3")
|
||||
|
||||
endforeach()
|
||||
203
driver/level2/CMakeLists.txt
Normal file
203
driver/level2/CMakeLists.txt
Normal file
@@ -0,0 +1,203 @@
|
||||
|
||||
include_directories(${CMAKE_SOURCE_DIR})
|
||||
|
||||
# sources that need to be compiled twice, once with no flags and once with LOWER
|
||||
set(UL_SOURCES
|
||||
sbmv_k.c
|
||||
spmv_k.c
|
||||
spr_k.c
|
||||
spr2_k.c
|
||||
syr_k.c
|
||||
syr2_k.c
|
||||
)
|
||||
|
||||
# sources that need to be compiled several times, for UNIT, TRANSA
|
||||
set(U_SOURCES
|
||||
trmv_U.c
|
||||
tbmv_U.c
|
||||
tbsv_U.c
|
||||
tpmv_U.c
|
||||
tpsv_U.c
|
||||
trsv_U.c
|
||||
)
|
||||
|
||||
set(L_SOURCES
|
||||
trmv_L.c
|
||||
tbmv_L.c
|
||||
tbsv_L.c
|
||||
tpmv_L.c
|
||||
tpsv_L.c
|
||||
trsv_L.c
|
||||
)
|
||||
|
||||
set(UL_SMP_SOURCES
|
||||
symv_thread.c
|
||||
syr_thread.c
|
||||
syr2_thread.c
|
||||
spr_thread.c
|
||||
spr2_thread.c
|
||||
spmv_thread.c
|
||||
sbmv_thread.c
|
||||
)
|
||||
|
||||
set(NU_SMP_SOURCES
|
||||
trmv_thread.c
|
||||
tpmv_thread.c
|
||||
tbmv_thread.c
|
||||
)
|
||||
|
||||
set(ULVM_COMPLEX_SOURCES
|
||||
hbmv_k.c
|
||||
hpmv_k.c
|
||||
hpr_k.c
|
||||
hpr2_k.c
|
||||
her_k.c
|
||||
her2_k.c
|
||||
)
|
||||
|
||||
# objects that need LOWER set
|
||||
GenerateCombinationObjects("${UL_SOURCES}" "LOWER" "U" "" 1 "" "" 3)
|
||||
|
||||
# gbmv uses a lowercase n and t
|
||||
GenerateNamedObjects("gbmv_k.c" "" "gbmv_n" false "" "" "" 3)
|
||||
GenerateNamedObjects("gbmv_k.c" "TRANS" "gbmv_t" false "" "" "" 3)
|
||||
# c/zgbmv
|
||||
GenerateNamedObjects("zgbmv_k.c" "CONJ" "gbmv_r" false "" "" "" 2)
|
||||
GenerateNamedObjects("zgbmv_k.c" "TRANS;CONJ" "gbmv_c" false "" "" "" 2)
|
||||
GenerateNamedObjects("zgbmv_k.c" "XCONJ" "gbmv_o" false "" "" "" 2)
|
||||
GenerateNamedObjects("zgbmv_k.c" "TRANS;XCONJ" "gbmv_u" false "" "" "" 2)
|
||||
GenerateNamedObjects("zgbmv_k.c" "CONJ;XCONJ" "gbmv_s" false "" "" "" 2)
|
||||
GenerateNamedObjects("zgbmv_k.c" "TRANS;CONJ;XCONJ" "gbmv_d" false "" "" "" 2)
|
||||
|
||||
# special defines for complex
|
||||
foreach (float_type ${FLOAT_TYPES})
|
||||
|
||||
if (SMP)
|
||||
GenerateNamedObjects("gemv_thread.c" "" "gemv_thread_n" false "" "" false ${float_type})
|
||||
GenerateNamedObjects("gemv_thread.c" "TRANSA" "gemv_thread_t" false "" "" false ${float_type})
|
||||
|
||||
GenerateNamedObjects("gbmv_thread.c" "" "gbmv_thread_n" false "" "" false ${float_type})
|
||||
GenerateNamedObjects("gbmv_thread.c" "TRANSA" "gbmv_thread_t" false "" "" false ${float_type})
|
||||
endif ()
|
||||
|
||||
if (${float_type} STREQUAL "COMPLEX" OR ${float_type} STREQUAL "ZCOMPLEX")
|
||||
|
||||
foreach (u_source ${U_SOURCES})
|
||||
string(REGEX MATCH "[a-z]+" op_name ${u_source})
|
||||
GenerateCombinationObjects("z${u_source}" "UNIT" "N" "TRANSA=1" 0 "${op_name}_NU" false ${float_type})
|
||||
GenerateCombinationObjects("z${u_source}" "UNIT" "N" "TRANSA=2" 0 "${op_name}_TL" false ${float_type})
|
||||
GenerateCombinationObjects("z${u_source}" "UNIT" "N" "TRANSA=3" 0 "${op_name}_RU" false ${float_type})
|
||||
GenerateCombinationObjects("z${u_source}" "UNIT" "N" "TRANSA=4" 0 "${op_name}_CL" false ${float_type})
|
||||
endforeach ()
|
||||
|
||||
foreach (l_source ${L_SOURCES})
|
||||
string(REGEX MATCH "[a-z]+" op_name ${l_source})
|
||||
GenerateCombinationObjects("z${l_source}" "UNIT" "N" "TRANSA=1" 0 "${op_name}_NL" false ${float_type})
|
||||
GenerateCombinationObjects("z${l_source}" "UNIT" "N" "TRANSA=2" 0 "${op_name}_TU" false ${float_type})
|
||||
GenerateCombinationObjects("z${l_source}" "UNIT" "N" "TRANSA=3" 0 "${op_name}_RL" false ${float_type})
|
||||
GenerateCombinationObjects("z${l_source}" "UNIT" "N" "TRANSA=4" 0 "${op_name}_CU" false ${float_type})
|
||||
endforeach ()
|
||||
|
||||
foreach (ulvm_source ${ULVM_COMPLEX_SOURCES})
|
||||
string(REGEX MATCH "[a-z0-9]+" op_name ${ulvm_source})
|
||||
GenerateNamedObjects("z${ulvm_source}" "" "${op_name}_U" false "" "" false ${float_type})
|
||||
GenerateNamedObjects("z${ulvm_source}" "LOWER" "${op_name}_L" false "" "" false ${float_type})
|
||||
GenerateNamedObjects("z${ulvm_source}" "HEMVREV" "${op_name}_V" false "" "" false ${float_type})
|
||||
GenerateNamedObjects("z${ulvm_source}" "LOWER;HEMVREV" "${op_name}_M" false "" "" false ${float_type})
|
||||
endforeach()
|
||||
|
||||
if (SMP)
|
||||
|
||||
GenerateNamedObjects("gemv_thread.c" "CONJ" "gemv_thread_r" false "" "" false ${float_type})
|
||||
GenerateNamedObjects("gemv_thread.c" "CONJ;TRANSA" "gemv_thread_c" false "" "" false ${float_type})
|
||||
GenerateNamedObjects("gemv_thread.c" "XCONJ" "gemv_thread_o" false "" "" false ${float_type})
|
||||
GenerateNamedObjects("gemv_thread.c" "XCONJ;TRANSA" "gemv_thread_u" false "" "" false ${float_type})
|
||||
GenerateNamedObjects("gemv_thread.c" "XCONJ;CONJ" "gemv_thread_s" false "" "" false ${float_type})
|
||||
GenerateNamedObjects("gemv_thread.c" "XCONJ;CONJ;TRANSA" "gemv_thread_d" false "" "" false ${float_type})
|
||||
|
||||
GenerateNamedObjects("gbmv_thread.c" "CONJ" "gbmv_thread_r" false "" "" false ${float_type})
|
||||
GenerateNamedObjects("gbmv_thread.c" "CONJ;TRANSA" "gbmv_thread_c" false "" "" false ${float_type})
|
||||
GenerateNamedObjects("gbmv_thread.c" "XCONJ" "gbmv_thread_o" false "" "" false ${float_type})
|
||||
GenerateNamedObjects("gbmv_thread.c" "XCONJ;TRANSA" "gbmv_thread_u" false "" "" false ${float_type})
|
||||
GenerateNamedObjects("gbmv_thread.c" "XCONJ;CONJ" "gbmv_thread_s" false "" "" false ${float_type})
|
||||
GenerateNamedObjects("gbmv_thread.c" "XCONJ;CONJ;TRANSA" "gbmv_thread_d" false "" "" false ${float_type})
|
||||
|
||||
GenerateNamedObjects("ger_thread.c" "" "ger_thread_U" false "" "" false ${float_type})
|
||||
GenerateNamedObjects("ger_thread.c" "CONJ" "ger_thread_C" false "" "" false ${float_type})
|
||||
GenerateNamedObjects("ger_thread.c" "XCONJ" "ger_thread_V" false "" "" false ${float_type})
|
||||
GenerateNamedObjects("ger_thread.c" "XCONJ;CONJ" "ger_thread_D" false "" "" false ${float_type})
|
||||
|
||||
GenerateNamedObjects("sbmv_thread.c" "HEMV" "hbmv_thread_U" false "" "" false ${float_type})
|
||||
GenerateNamedObjects("sbmv_thread.c" "HEMV;LOWER" "hbmv_thread_L" false "" "" false ${float_type})
|
||||
GenerateNamedObjects("sbmv_thread.c" "HEMVREV" "hbmv_thread_V" false "" "" false ${float_type})
|
||||
GenerateNamedObjects("sbmv_thread.c" "LOWER;HEMVREV" "hbmv_thread_M" false "" "" false ${float_type})
|
||||
|
||||
GenerateNamedObjects("spmv_thread.c" "HEMV" "hpmv_thread_U" false "" "" false ${float_type})
|
||||
GenerateNamedObjects("spmv_thread.c" "HEMV;LOWER" "hpmv_thread_L" false "" "" false ${float_type})
|
||||
GenerateNamedObjects("spmv_thread.c" "HEMVREV" "hpmv_thread_V" false "" "" false ${float_type})
|
||||
GenerateNamedObjects("spmv_thread.c" "LOWER;HEMVREV" "hpmv_thread_M" false "" "" false ${float_type})
|
||||
|
||||
GenerateNamedObjects("spr_thread.c" "HEMV" "hpr_thread_U" false "" "" false ${float_type})
|
||||
GenerateNamedObjects("spr_thread.c" "HEMV;LOWER" "hpr_thread_L" false "" "" false ${float_type})
|
||||
GenerateNamedObjects("spr_thread.c" "HEMVREV" "hpr_thread_V" false "" "" false ${float_type})
|
||||
GenerateNamedObjects("spr_thread.c" "LOWER;HEMVREV" "hpr_thread_M" false "" "" false ${float_type})
|
||||
|
||||
GenerateNamedObjects("spr2_thread.c" "HEMV" "hpr2_thread_U" false "" "" false ${float_type})
|
||||
GenerateNamedObjects("spr2_thread.c" "HEMV;LOWER" "hpr2_thread_L" false "" "" false ${float_type})
|
||||
GenerateNamedObjects("spr2_thread.c" "HEMVREV" "hpr2_thread_V" false "" "" false ${float_type})
|
||||
GenerateNamedObjects("spr2_thread.c" "LOWER;HEMVREV" "hpr2_thread_M" false "" "" false ${float_type})
|
||||
|
||||
GenerateNamedObjects("symv_thread.c" "HEMV" "hemv_thread_U" false "" "" false ${float_type})
|
||||
GenerateNamedObjects("symv_thread.c" "HEMV;LOWER" "hemv_thread_L" false "" "" false ${float_type})
|
||||
GenerateNamedObjects("symv_thread.c" "HEMVREV" "hemv_thread_V" false "" "" false ${float_type})
|
||||
GenerateNamedObjects("symv_thread.c" "LOWER;HEMVREV" "hemv_thread_M" false "" "" false ${float_type})
|
||||
|
||||
GenerateNamedObjects("syr_thread.c" "HER" "her_thread_U" false "" "" false ${float_type})
|
||||
GenerateNamedObjects("syr_thread.c" "HER;LOWER" "her_thread_L" false "" "" false ${float_type})
|
||||
GenerateNamedObjects("syr_thread.c" "HERREV" "her_thread_V" false "" "" false ${float_type})
|
||||
GenerateNamedObjects("syr_thread.c" "LOWER;HERREV" "her_thread_M" false "" "" false ${float_type})
|
||||
|
||||
GenerateNamedObjects("syr2_thread.c" "HER" "her2_thread_U" false "" "" false ${float_type})
|
||||
GenerateNamedObjects("syr2_thread.c" "HER;LOWER" "her2_thread_L" false "" "" false ${float_type})
|
||||
GenerateNamedObjects("syr2_thread.c" "HERREV" "her2_thread_V" false "" "" false ${float_type})
|
||||
GenerateNamedObjects("syr2_thread.c" "LOWER;HERREV" "her2_thread_M" false "" "" false ${float_type})
|
||||
|
||||
foreach (nu_smp_src ${NU_SMP_SOURCES})
|
||||
string(REGEX MATCH "[a-z]+_[a-z]+" op_name ${nu_smp_src})
|
||||
GenerateCombinationObjects("${nu_smp_src}" "LOWER;UNIT" "U;N" "TRANSA=1" 0 "${op_name}_N" false ${float_type})
|
||||
GenerateCombinationObjects("${nu_smp_src}" "LOWER;UNIT" "U;N" "TRANSA=2" 0 "${op_name}_T" false ${float_type})
|
||||
GenerateCombinationObjects("${nu_smp_src}" "LOWER;UNIT" "U;N" "TRANSA=3" 0 "${op_name}_R" false ${float_type})
|
||||
GenerateCombinationObjects("${nu_smp_src}" "LOWER;UNIT" "U;N" "TRANSA=4" 0 "${op_name}_C" false ${float_type})
|
||||
endforeach ()
|
||||
endif ()
|
||||
|
||||
else ()
|
||||
# For real number functions
|
||||
foreach (u_source ${U_SOURCES})
|
||||
string(REGEX MATCH "[a-z]+" op_name ${u_source})
|
||||
GenerateCombinationObjects("${u_source}" "UNIT" "N" "" 0 "${op_name}_NU" false ${float_type})
|
||||
GenerateCombinationObjects("${u_source}" "UNIT" "N" "TRANSA" 0 "${op_name}_TL" false ${float_type})
|
||||
endforeach ()
|
||||
|
||||
foreach (l_source ${L_SOURCES})
|
||||
string(REGEX MATCH "[a-z]+" op_name ${l_source})
|
||||
GenerateCombinationObjects("${l_source}" "UNIT" "N" "" 0 "${op_name}_NL" false ${float_type})
|
||||
GenerateCombinationObjects("${l_source}" "UNIT" "N" "TRANSA" 0 "${op_name}_TU" false ${float_type})
|
||||
endforeach ()
|
||||
|
||||
if (SMP)
|
||||
GenerateNamedObjects("ger_thread.c" "" "" false "" "" false ${float_type})
|
||||
foreach(nu_smp_source ${NU_SMP_SOURCES})
|
||||
string(REGEX MATCH "[a-z]+_[a-z]+" op_name ${nu_smp_source})
|
||||
GenerateCombinationObjects("${nu_smp_source}" "LOWER;UNIT" "U;N" "" 0 "${op_name}_N" false ${float_type})
|
||||
GenerateCombinationObjects("${nu_smp_source}" "LOWER;UNIT" "U;N" "TRANSA" 0 "${op_name}_T" false ${float_type})
|
||||
endforeach()
|
||||
endif ()
|
||||
endif ()
|
||||
endforeach ()
|
||||
|
||||
if (SMP)
|
||||
GenerateCombinationObjects("${UL_SMP_SOURCES}" "LOWER" "U" "" 2)
|
||||
endif ()
|
||||
|
||||
add_library(driver_level2 OBJECT ${OPENBLAS_SRC})
|
||||
@@ -64,7 +64,7 @@ static int gbmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F
|
||||
#ifndef COMPLEX
|
||||
FLOAT result;
|
||||
#else
|
||||
FLOAT _Complex result;
|
||||
OPENBLAS_COMPLEX_FLOAT result;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
@@ -60,7 +60,7 @@ static int sbmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F
|
||||
#ifndef COMPLEX
|
||||
FLOAT result;
|
||||
#else
|
||||
FLOAT _Complex result;
|
||||
OPENBLAS_COMPLEX_FLOAT result;
|
||||
#endif
|
||||
|
||||
a = (FLOAT *)args -> a;
|
||||
|
||||
@@ -60,7 +60,7 @@ static int spmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F
|
||||
#ifndef COMPLEX
|
||||
FLOAT result;
|
||||
#else
|
||||
FLOAT _Complex result;
|
||||
OPENBLAS_COMPLEX_FLOAT result;
|
||||
#endif
|
||||
|
||||
a = (FLOAT *)args -> a;
|
||||
|
||||
@@ -76,7 +76,7 @@ static int trmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F
|
||||
#ifndef COMPLEX
|
||||
FLOAT result;
|
||||
#else
|
||||
FLOAT _Complex result;
|
||||
OPENBLAS_COMPLEX_FLOAT result;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
@@ -81,7 +81,7 @@ static int tpmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F
|
||||
#ifndef COMPLEX
|
||||
FLOAT result;
|
||||
#else
|
||||
FLOAT _Complex result;
|
||||
OPENBLAS_COMPLEX_FLOAT result;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
@@ -87,7 +87,7 @@ static int trmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F
|
||||
#ifndef COMPLEX
|
||||
FLOAT result;
|
||||
#else
|
||||
FLOAT _Complex result;
|
||||
OPENBLAS_COMPLEX_FLOAT result;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
@@ -77,7 +77,7 @@ void CNAME(BLASLONG m, BLASLONG n, BLASLONG ku, BLASLONG kl, FLOAT alpha_r, FLOA
|
||||
FLOAT *bufferY = gemvbuffer;
|
||||
FLOAT *bufferX = gemvbuffer;
|
||||
#ifdef TRANS
|
||||
FLOAT _Complex temp;
|
||||
OPENBLAS_COMPLEX_FLOAT temp;
|
||||
#endif
|
||||
|
||||
if (incy != 1) {
|
||||
|
||||
@@ -56,6 +56,8 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i,
|
||||
FLOAT *bufferX = sbmvbuffer;
|
||||
FLOAT temp[2];
|
||||
|
||||
OPENBLAS_COMPLEX_FLOAT result;
|
||||
|
||||
if (incy != 1) {
|
||||
Y = bufferY;
|
||||
bufferX = (FLOAT *)(((BLASLONG)bufferY + n * sizeof(FLOAT) * COMPSIZE + 4095) & ~4095);
|
||||
@@ -93,7 +95,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i,
|
||||
Y[i * 2 + 1] += alpha_r * temp[1] + alpha_i * temp[0];
|
||||
|
||||
if (length > 0) {
|
||||
FLOAT _Complex result = DOTC_K(length, a + offset * COMPSIZE, 1, X + (i - length) * COMPSIZE, 1);
|
||||
result = DOTC_K(length, a + offset * COMPSIZE, 1, X + (i - length) * COMPSIZE, 1);
|
||||
|
||||
Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result);
|
||||
Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result);
|
||||
@@ -118,7 +120,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i,
|
||||
Y[i * 2 + 1] += alpha_r * temp[1] + alpha_i * temp[0];
|
||||
|
||||
if (length > 0) {
|
||||
FLOAT _Complex result = DOTC_K(length, a + COMPSIZE, 1, X + (i + 1) * COMPSIZE, 1);
|
||||
result = DOTC_K(length, a + COMPSIZE, 1, X + (i + 1) * COMPSIZE, 1);
|
||||
|
||||
Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result);
|
||||
Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result);
|
||||
@@ -143,7 +145,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i,
|
||||
Y[i * 2 + 1] += alpha_r * temp[1] + alpha_i * temp[0];
|
||||
|
||||
if (length > 0) {
|
||||
FLOAT _Complex result = DOTU_K(length, a + offset * COMPSIZE, 1, X + (i - length) * COMPSIZE, 1);
|
||||
result = DOTU_K(length, a + offset * COMPSIZE, 1, X + (i - length) * COMPSIZE, 1);
|
||||
|
||||
Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result);
|
||||
Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result);
|
||||
@@ -168,7 +170,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i,
|
||||
Y[i * 2 + 1] += alpha_r * temp[1] + alpha_i * temp[0];
|
||||
|
||||
if (length > 0) {
|
||||
FLOAT _Complex result = DOTU_K(length, a + COMPSIZE, 1, X + (i + 1) * COMPSIZE, 1);
|
||||
result = DOTU_K(length, a + COMPSIZE, 1, X + (i + 1) * COMPSIZE, 1);
|
||||
|
||||
Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result);
|
||||
Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result);
|
||||
|
||||
@@ -51,6 +51,8 @@ int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT alpha_i,
|
||||
FLOAT *bufferX = gemvbuffer;
|
||||
FLOAT temp[2];
|
||||
|
||||
OPENBLAS_COMPLEX_FLOAT result;
|
||||
|
||||
if (incy != 1) {
|
||||
Y = bufferY;
|
||||
bufferX = (FLOAT *)(((BLASLONG)bufferY + m * sizeof(FLOAT) * 2 + 4095) & ~4095);
|
||||
@@ -69,7 +71,7 @@ int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT alpha_i,
|
||||
#ifndef HEMVREV
|
||||
#ifndef LOWER
|
||||
if (i > 0) {
|
||||
FLOAT _Complex result = DOTC_K(i, a, 1, X, 1);
|
||||
result = DOTC_K(i, a, 1, X, 1);
|
||||
|
||||
Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result);
|
||||
Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result);
|
||||
@@ -93,7 +95,7 @@ int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT alpha_i,
|
||||
#else
|
||||
|
||||
if (m - i > 1) {
|
||||
FLOAT _Complex result = DOTC_K(m - i - 1, a + (i + 1) * 2, 1, X + (i + 1) * 2, 1);
|
||||
result = DOTC_K(m - i - 1, a + (i + 1) * 2, 1, X + (i + 1) * 2, 1);
|
||||
|
||||
Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result);
|
||||
Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result);
|
||||
@@ -118,7 +120,7 @@ int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT alpha_i,
|
||||
#else
|
||||
#ifndef LOWER
|
||||
if (i > 0) {
|
||||
FLOAT _Complex result = DOTU_K(i, a, 1, X, 1);
|
||||
result = DOTU_K(i, a, 1, X, 1);
|
||||
|
||||
Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result);
|
||||
Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result);
|
||||
@@ -142,7 +144,7 @@ int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT alpha_i,
|
||||
#else
|
||||
|
||||
if (m - i > 1) {
|
||||
FLOAT _Complex result = DOTU_K(m - i - 1, a + (i + 1) * 2, 1, X + (i + 1) * 2, 1);
|
||||
result = DOTU_K(m - i - 1, a + (i + 1) * 2, 1, X + (i + 1) * 2, 1);
|
||||
|
||||
Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result);
|
||||
Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result);
|
||||
|
||||
@@ -55,6 +55,8 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i,
|
||||
FLOAT *bufferY = sbmvbuffer;
|
||||
FLOAT *bufferX = sbmvbuffer;
|
||||
|
||||
OPENBLAS_COMPLEX_FLOAT result;
|
||||
|
||||
if (incy != 1) {
|
||||
Y = bufferY;
|
||||
bufferX = (FLOAT *)(((BLASLONG)bufferY + n * sizeof(FLOAT) * COMPSIZE + 4095) & ~4095);
|
||||
@@ -83,7 +85,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i,
|
||||
a + offset * COMPSIZE, 1, Y + (i - length) * COMPSIZE, 1, NULL, 0);
|
||||
|
||||
if (length > 0) {
|
||||
FLOAT _Complex result = DOTU_K(length, a + offset * COMPSIZE, 1, X + (i - length) * COMPSIZE, 1);
|
||||
result = DOTU_K(length, a + offset * COMPSIZE, 1, X + (i - length) * COMPSIZE, 1);
|
||||
|
||||
Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result);
|
||||
Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result);
|
||||
@@ -100,7 +102,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i,
|
||||
a, 1, Y + i * COMPSIZE, 1, NULL, 0);
|
||||
|
||||
if (length > 0) {
|
||||
FLOAT _Complex result = DOTU_K(length, a + COMPSIZE, 1, X + (i + 1) * COMPSIZE, 1);
|
||||
result = DOTU_K(length, a + COMPSIZE, 1, X + (i + 1) * COMPSIZE, 1);
|
||||
|
||||
Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result);
|
||||
Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result);
|
||||
|
||||
@@ -49,7 +49,8 @@ int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT alpha_i,
|
||||
FLOAT *gemvbuffer = (FLOAT *)buffer;
|
||||
FLOAT *bufferY = gemvbuffer;
|
||||
FLOAT *bufferX = gemvbuffer;
|
||||
FLOAT _Complex result;
|
||||
|
||||
OPENBLAS_COMPLEX_FLOAT result;
|
||||
|
||||
if (incy != 1) {
|
||||
Y = bufferY;
|
||||
|
||||
@@ -49,7 +49,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG inc
|
||||
FLOAT *B = b;
|
||||
BLASLONG length;
|
||||
#if (TRANSA == 2) || (TRANSA == 4)
|
||||
FLOAT _Complex temp;
|
||||
OPENBLAS_COMPLEX_FLOAT temp;
|
||||
#endif
|
||||
#ifndef UNIT
|
||||
FLOAT atemp1, atemp2, btemp1, btemp2;
|
||||
|
||||
@@ -49,7 +49,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG inc
|
||||
FLOAT *B = b;
|
||||
BLASLONG length;
|
||||
#if (TRANSA == 2) || (TRANSA == 4)
|
||||
FLOAT _Complex temp;
|
||||
OPENBLAS_COMPLEX_FLOAT temp;
|
||||
#endif
|
||||
#ifndef UNIT
|
||||
FLOAT atemp1, atemp2, btemp1, btemp2;
|
||||
|
||||
@@ -49,7 +49,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG inc
|
||||
FLOAT *B = b;
|
||||
BLASLONG length;
|
||||
#if (TRANSA == 2) || (TRANSA == 4)
|
||||
FLOAT _Complex temp;
|
||||
OPENBLAS_COMPLEX_FLOAT temp;
|
||||
#endif
|
||||
#ifndef UNIT
|
||||
FLOAT ar, ai, br, bi, ratio, den;
|
||||
|
||||
@@ -49,7 +49,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG inc
|
||||
FLOAT *B = b;
|
||||
BLASLONG length;
|
||||
#if (TRANSA == 2) || (TRANSA == 4)
|
||||
FLOAT _Complex temp;
|
||||
OPENBLAS_COMPLEX_FLOAT temp;
|
||||
#endif
|
||||
#ifndef UNIT
|
||||
FLOAT ar, ai, br, bi, ratio, den;
|
||||
|
||||
@@ -44,7 +44,7 @@ int CNAME(BLASLONG m, FLOAT *a, FLOAT *b, BLASLONG incb, void *buffer){
|
||||
|
||||
BLASLONG i;
|
||||
#if (TRANSA == 2) || (TRANSA == 4)
|
||||
FLOAT _Complex temp;
|
||||
OPENBLAS_COMPLEX_FLOAT temp;
|
||||
#endif
|
||||
#ifndef UNIT
|
||||
FLOAT atemp1, atemp2, btemp1, btemp2;
|
||||
|
||||
@@ -44,7 +44,7 @@ int CNAME(BLASLONG m, FLOAT *a, FLOAT *b, BLASLONG incb, void *buffer){
|
||||
|
||||
BLASLONG i;
|
||||
#if (TRANSA == 2) || (TRANSA == 4)
|
||||
FLOAT _Complex temp;
|
||||
OPENBLAS_COMPLEX_FLOAT temp;
|
||||
#endif
|
||||
#ifndef UNIT
|
||||
FLOAT atemp1, atemp2, btemp1, btemp2;
|
||||
|
||||
@@ -46,7 +46,7 @@ int CNAME(BLASLONG m, FLOAT *a, FLOAT *b, BLASLONG incb, void *buffer){
|
||||
|
||||
BLASLONG i;
|
||||
#if (TRANSA == 2) || (TRANSA == 4)
|
||||
FLOAT _Complex result;
|
||||
OPENBLAS_COMPLEX_FLOAT result;
|
||||
#endif
|
||||
#ifndef UNIT
|
||||
FLOAT ar, ai, br, bi, ratio, den;
|
||||
|
||||
@@ -44,7 +44,7 @@ int CNAME(BLASLONG m, FLOAT *a, FLOAT *b, BLASLONG incb, void *buffer){
|
||||
|
||||
BLASLONG i;
|
||||
#if (TRANSA == 2) || (TRANSA == 4)
|
||||
FLOAT _Complex result;
|
||||
OPENBLAS_COMPLEX_FLOAT result;
|
||||
#endif
|
||||
#ifndef UNIT
|
||||
FLOAT ar, ai, br, bi, ratio, den;
|
||||
|
||||
@@ -46,7 +46,7 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, FLOAT *bu
|
||||
|
||||
BLASLONG i, is, min_i;
|
||||
#if (TRANSA == 2) || (TRANSA == 4)
|
||||
FLOAT _Complex temp;
|
||||
OPENBLAS_COMPLEX_FLOAT temp;
|
||||
#endif
|
||||
#ifndef UNIT
|
||||
FLOAT atemp1, atemp2, btemp1, btemp2;
|
||||
|
||||
@@ -46,7 +46,7 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, FLOAT *bu
|
||||
|
||||
BLASLONG i, is, min_i;
|
||||
#if (TRANSA == 2) || (TRANSA == 4)
|
||||
FLOAT _Complex temp;
|
||||
OPENBLAS_COMPLEX_FLOAT temp;
|
||||
#endif
|
||||
#ifndef UNIT
|
||||
FLOAT atemp1, atemp2, btemp1, btemp2;
|
||||
|
||||
@@ -46,7 +46,7 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buf
|
||||
|
||||
BLASLONG i, is, min_i;
|
||||
#if (TRANSA == 2) || (TRANSA == 4)
|
||||
FLOAT _Complex result;
|
||||
OPENBLAS_COMPLEX_FLOAT result;
|
||||
#endif
|
||||
#ifndef UNIT
|
||||
FLOAT ar, ai, br, bi, ratio, den;
|
||||
|
||||
@@ -46,7 +46,7 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buf
|
||||
|
||||
BLASLONG i, is, min_i;
|
||||
#if (TRANSA == 2) || (TRANSA == 4)
|
||||
FLOAT _Complex result;
|
||||
OPENBLAS_COMPLEX_FLOAT result;
|
||||
#endif
|
||||
#ifndef UNIT
|
||||
FLOAT ar, ai, br, bi, ratio, den;
|
||||
|
||||
115
driver/level3/CMakeLists.txt
Normal file
115
driver/level3/CMakeLists.txt
Normal file
@@ -0,0 +1,115 @@
|
||||
include_directories(${CMAKE_SOURCE_DIR})
|
||||
|
||||
# N.B. In the original makefile there was a BLOCKS define used in the compilation of these files but I don't see any evidence of it being set anywhere. -hpa
|
||||
|
||||
# loop through gemm.c defines
|
||||
set(GEMM_DEFINES NN NT TN TT)
|
||||
set(GEMM_COMPLEX_DEFINES RN CN RT CT NR TR RR CR NC TC RC CC)
|
||||
foreach (GEMM_DEFINE ${GEMM_DEFINES})
|
||||
string(TOLOWER ${GEMM_DEFINE} GEMM_DEFINE_LC)
|
||||
GenerateNamedObjects("gemm.c" "${GEMM_DEFINE}" "gemm_${GEMM_DEFINE_LC}" 0)
|
||||
if (SMP AND NOT USE_SIMPLE_THREADED_LEVEL3)
|
||||
GenerateNamedObjects("gemm.c" "${GEMM_DEFINE};THREADED_LEVEL3" "gemm_thread_${GEMM_DEFINE_LC}" 0)
|
||||
endif ()
|
||||
endforeach ()
|
||||
|
||||
|
||||
set(TRMM_TRSM_SOURCES
|
||||
trmm_L.c
|
||||
trmm_R.c
|
||||
trsm_L.c
|
||||
trsm_R.c)
|
||||
|
||||
foreach(trmm_trsm_source ${TRMM_TRSM_SOURCES})
|
||||
string(REGEX MATCH "[a-z]+_[A-Z]+" op_name ${trmm_trsm_source})
|
||||
GenerateCombinationObjects("${trmm_trsm_source}" "UPPER;UNIT" "L;N" "" 0 "${op_name}N")
|
||||
GenerateCombinationObjects("${trmm_trsm_source}" "UPPER;UNIT" "L;N" "TRANSA" 0 "${op_name}T")
|
||||
endforeach()
|
||||
|
||||
GenerateCombinationObjects("symm_k.c" "RSIDE;LOWER" "L;U" "NN" 1)
|
||||
GenerateCombinationObjects("syrk_k.c" "LOWER;TRANS" "U;N" "" 1)
|
||||
GenerateCombinationObjects("syr2k_k.c" "LOWER;TRANS" "U;N" "" 1)
|
||||
GenerateCombinationObjects("syrk_kernel.c" "LOWER" "U" "" 2)
|
||||
GenerateCombinationObjects("syr2k_kernel.c" "LOWER" "U" "" 2)
|
||||
if (SMP)
|
||||
|
||||
# N.B. these do NOT have a float type (e.g. DOUBLE) defined!
|
||||
GenerateNamedObjects("gemm_thread_m.c;gemm_thread_n.c;gemm_thread_mn.c;gemm_thread_variable.c;syrk_thread.c" "" "" 0 "" "" 1)
|
||||
|
||||
if (NOT USE_SIMPLE_THREADED_LEVEL3)
|
||||
GenerateCombinationObjects("syrk_k.c" "LOWER;TRANS" "U;N" "THREADED_LEVEL3" 2 "syrk_thread")
|
||||
GenerateCombinationObjects("symm_k.c" "RSIDE;LOWER" "L;U" "THREADED_LEVEL3;NN" 2 "symm_thread")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
foreach (float_type ${FLOAT_TYPES})
|
||||
if (${float_type} STREQUAL "COMPLEX" OR ${float_type} STREQUAL "ZCOMPLEX")
|
||||
GenerateCombinationObjects("zherk_kernel.c" "LOWER;CONJ" "U;N" "HERK" 2 "herk_kernel" false ${float_type})
|
||||
# TRANS needs to be set/unset when CONJ is set/unset, so can't use it as a combination
|
||||
GenerateCombinationObjects("zherk_k.c" "LOWER" "U" "HERK" 3 "herk_N" false ${float_type})
|
||||
GenerateCombinationObjects("zherk_k.c" "LOWER" "U" "HERK;TRANS;CONJ" 3 "herk_C" false ${float_type})
|
||||
GenerateCombinationObjects("zherk_k.c" "LOWER" "U" "HERK;THREADED_LEVEL3" 3 "herk_thread_N" false ${float_type})
|
||||
GenerateCombinationObjects("zherk_k.c" "LOWER" "U" "HERK;THREADED_LEVEL3;TRANS;CONJ" 3 "herk_thread_C" false ${float_type})
|
||||
# Need to set CONJ for trmm and trsm
|
||||
GenerateCombinationObjects("trmm_L.c" "UPPER;UNIT" "L;N" "CONJ" 0 "trmm_LR" false ${float_type})
|
||||
GenerateCombinationObjects("trmm_L.c" "UPPER;UNIT" "L;N" "TRANSA;CONJ" 0 "trmm_LC" false ${float_type})
|
||||
GenerateCombinationObjects("trmm_R.c" "UPPER;UNIT" "L;N" "CONJ" 0 "trmm_RR" false ${float_type})
|
||||
GenerateCombinationObjects("trmm_R.c" "UPPER;UNIT" "L;N" "TRANSA;CONJ" 0 "trmm_RC" false ${float_type})
|
||||
GenerateCombinationObjects("trsm_L.c" "UPPER;UNIT" "L;N" "CONJ" 0 "trsm_LR" false ${float_type})
|
||||
GenerateCombinationObjects("trsm_L.c" "UPPER;UNIT" "L;N" "TRANSA;CONJ" 0 "trsm_LC" false ${float_type})
|
||||
GenerateCombinationObjects("trsm_R.c" "UPPER;UNIT" "L;N" "CONJ" 0 "trsm_RR" false ${float_type})
|
||||
GenerateCombinationObjects("trsm_R.c" "UPPER;UNIT" "L;N" "TRANSA;CONJ" 0 "trsm_RC" false ${float_type})
|
||||
|
||||
#hemm
|
||||
GenerateCombinationObjects("zhemm_k.c" "LOWER" "U" "NN" 0 "hemm_L" false ${float_type})
|
||||
GenerateCombinationObjects("zhemm_k.c" "LOWER" "U" "NC;RSIDE" 0 "hemm_R" false ${float_type})
|
||||
|
||||
#her2k
|
||||
GenerateCombinationObjects("zher2k_kernel.c" "LOWER;CONJ" "U;N" "" 2 "her2k_kernel" false ${float_type})
|
||||
GenerateNamedObjects("zher2k_k.c" "HER2K" "her2k_UN" false "" "" false ${float_type})
|
||||
GenerateNamedObjects("zher2k_k.c" "HER2K;TRANS;CONJ" "her2k_UC" false "" "" false ${float_type})
|
||||
GenerateNamedObjects("zher2k_k.c" "HER2K;LOWER" "her2k_LN" false "" "" false ${float_type})
|
||||
GenerateNamedObjects("zher2k_k.c" "HER2K;LOWER;TRANS;CONJ" "her2k_LC" false "" "" false ${float_type})
|
||||
|
||||
if (SMP AND NOT USE_SIMPLE_THREADED_LEVEL3)
|
||||
#hemm
|
||||
GenerateCombinationObjects("zhemm_k.c" "LOWER" "U" "NN;THREADED_LEVEL3" 0 "hemm_thread_L" false ${float_type})
|
||||
GenerateCombinationObjects("zhemm_k.c" "LOWER" "U" "NC;RSIDE;THREADED_LEVEL3" 0 "hemm_thread_R" false ${float_type})
|
||||
#her2k
|
||||
GenerateNamedObjects("zher2k_k.c" "HER2K" "her2k_UN" false "" "" false ${float_type})
|
||||
GenerateNamedObjects("zher2k_k.c" "HER2K;TRANS;CONJ" "her2k_UC" false "" "" false ${float_type})
|
||||
GenerateNamedObjects("zher2k_k.c" "HER2K;LOWER" "her2k_LN" false "" "" false ${float_type})
|
||||
GenerateNamedObjects("zher2k_k.c" "HER2K;LOWER;TRANS;CONJ" "her2k_LC" false "" "" false ${float_type})
|
||||
endif()
|
||||
|
||||
# special gemm defines for complex
|
||||
foreach (gemm_define ${GEMM_COMPLEX_DEFINES})
|
||||
string(TOLOWER ${gemm_define} gemm_define_LC)
|
||||
GenerateNamedObjects("gemm.c" "${gemm_define}" "gemm_${gemm_define_LC}" false "" "" false ${float_type})
|
||||
if(USE_GEMM3M)
|
||||
GenerateNamedObjects("gemm3m.c" "${gemm_define}" "gemm3m_${gemm_define_LC}" false "" "" false ${float_type})
|
||||
endif()
|
||||
if (SMP AND NOT USE_SIMPLE_THREADED_LEVEL3)
|
||||
GenerateNamedObjects("gemm.c" "${gemm_define};THREADED_LEVEL3" "gemm_thread_${gemm_define_LC}" false "" "" false ${float_type})
|
||||
if(USE_GEMM3M)
|
||||
GenerateNamedObjects("gemm3m.c" "${gemm_define};THREADED_LEVEL3" "gemm3m_thread_${gemm_define_LC}" false "" "" false ${float_type})
|
||||
endif()
|
||||
endif ()
|
||||
endforeach ()
|
||||
endif ()
|
||||
endforeach ()
|
||||
|
||||
#HPLOBJS =
|
||||
# dgemm_nn.c dgemm_nt.c dgemm_tn.c dgemm_tt.c
|
||||
# dtrsm_LNUU.c dtrsm_LNUN.c dtrsm_LNLU.c dtrsm_LNLN.c
|
||||
# dtrsm_LTUU.c dtrsm_LTUN.c dtrsm_LTLU.c dtrsm_LTLN.c
|
||||
# dtrsm_RNUU.c dtrsm_RNUN.c dtrsm_RNLU.c dtrsm_RNLN.c
|
||||
# dtrsm_RTUU.c dtrsm_RTUN.c dtrsm_RTLU.c dtrsm_RTLN.c
|
||||
#
|
||||
#if (USE_SIMPLE_THREADED_LEVEL3)
|
||||
# HPLOBJS += dgemm_thread_nn.c dgemm_thread_nt.c
|
||||
# dgemm_thread_tn.c dgemm_thread_tt.c
|
||||
#endif
|
||||
#
|
||||
|
||||
add_library(driver_level3 OBJECT ${OPENBLAS_SRC})
|
||||
@@ -47,7 +47,7 @@
|
||||
#endif
|
||||
#endif
|
||||
|
||||
static inline int syrk_beta(BLASLONG m_from, BLASLONG m_to, BLASLONG n_from, BLASLONG n_to, FLOAT *alpha, FLOAT *c, BLASLONG ldc) {
|
||||
static __inline int syrk_beta(BLASLONG m_from, BLASLONG m_to, BLASLONG n_from, BLASLONG n_to, FLOAT *alpha, FLOAT *c, BLASLONG ldc) {
|
||||
|
||||
BLASLONG i;
|
||||
|
||||
|
||||
@@ -49,7 +49,7 @@
|
||||
#endif
|
||||
#endif
|
||||
|
||||
static inline int syrk_beta(BLASLONG m_from, BLASLONG m_to, BLASLONG n_from, BLASLONG n_to, FLOAT *alpha, FLOAT *c, BLASLONG ldc) {
|
||||
static __inline int syrk_beta(BLASLONG m_from, BLASLONG m_to, BLASLONG n_from, BLASLONG n_to, FLOAT *alpha, FLOAT *c, BLASLONG ldc) {
|
||||
|
||||
BLASLONG i;
|
||||
|
||||
|
||||
@@ -70,6 +70,9 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
|
||||
BLASLONG ls, is, js;
|
||||
BLASLONG min_l, min_i, min_j;
|
||||
BLASLONG jjs, min_jj;
|
||||
#if !((!defined(UPPER) && !defined(TRANSA)) || (defined(UPPER) && defined(TRANSA)))
|
||||
BLASLONG start_ls;
|
||||
#endif
|
||||
|
||||
m = args -> m;
|
||||
n = args -> n;
|
||||
@@ -226,8 +229,6 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
|
||||
}
|
||||
|
||||
#else
|
||||
BLASLONG start_ls;
|
||||
|
||||
for(js = n; js > 0; js -= GEMM_R){
|
||||
min_j = js;
|
||||
if (min_j > GEMM_R) min_j = GEMM_R;
|
||||
|
||||
@@ -76,6 +76,9 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
|
||||
BLASLONG ls, is, js;
|
||||
BLASLONG min_l, min_i, min_j;
|
||||
BLASLONG jjs, min_jj;
|
||||
#if !((!defined(UPPER) && !defined(TRANSA)) || (defined(UPPER) && defined(TRANSA)))
|
||||
BLASLONG start_is;
|
||||
#endif
|
||||
|
||||
m = args -> m;
|
||||
n = args -> n;
|
||||
@@ -178,8 +181,6 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
|
||||
}
|
||||
}
|
||||
#else
|
||||
BLASLONG start_is;
|
||||
|
||||
for(ls = m; ls > 0; ls -= GEMM_Q){
|
||||
min_l = ls;
|
||||
if (min_l > GEMM_Q) min_l = GEMM_Q;
|
||||
|
||||
@@ -75,6 +75,9 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
|
||||
BLASLONG ls, is, js;
|
||||
BLASLONG min_l, min_i, min_j;
|
||||
BLASLONG jjs, min_jj;
|
||||
#if !((defined(UPPER) && !defined(TRANSA)) || (!defined(UPPER) && defined(TRANSA)))
|
||||
BLASLONG start_ls;
|
||||
#endif
|
||||
|
||||
m = args -> m;
|
||||
n = args -> n;
|
||||
@@ -226,8 +229,6 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
|
||||
}
|
||||
|
||||
#else
|
||||
BLASLONG start_ls;
|
||||
|
||||
for(js = n; js > 0; js -= GEMM_R){
|
||||
min_j = js;
|
||||
if (min_j > GEMM_R) min_j = GEMM_R;
|
||||
|
||||
75
driver/others/CMakeLists.txt
Normal file
75
driver/others/CMakeLists.txt
Normal file
@@ -0,0 +1,75 @@
|
||||
include_directories(${CMAKE_SOURCE_DIR})
|
||||
|
||||
if (${CORE} STREQUAL "PPC440")
|
||||
set(MEMORY memory_qalloc.c)
|
||||
else ()
|
||||
set(MEMORY memory.c)
|
||||
endif ()
|
||||
|
||||
if (SMP)
|
||||
|
||||
if (USE_OPENMP)
|
||||
set(BLAS_SERVER blas_server_omp.c)
|
||||
elseif (${CMAKE_SYSTEM_NAME} STREQUAL "Windows")
|
||||
set(BLAS_SERVER blas_server_win32.c)
|
||||
endif ()
|
||||
|
||||
if (NOT DEFINED BLAS_SERVER)
|
||||
set(BLAS_SERVER blas_server.c)
|
||||
endif ()
|
||||
|
||||
set(SMP_SOURCES
|
||||
${BLAS_SERVER}
|
||||
divtable.c # TODO: Makefile has -UDOUBLE
|
||||
blas_l1_thread.c
|
||||
)
|
||||
|
||||
if (NOT NO_AFFINITY)
|
||||
list(APPEND SMP_SOURCES init.c)
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
set(COMMON_SOURCES
|
||||
xerbla.c
|
||||
openblas_set_num_threads.c
|
||||
openblas_error_handle.c
|
||||
openblas_get_num_procs.c
|
||||
openblas_get_num_threads.c
|
||||
)
|
||||
|
||||
# these need to have NAME/CNAME set, so use GenerateNamedObjects, but don't use standard name mangling
|
||||
GenerateNamedObjects("abs.c" "" "c_abs" 0 "" "" 1 )
|
||||
GenerateNamedObjects("abs.c" "DOUBLE" "z_abs" 0 "" "" 1)
|
||||
GenerateNamedObjects("openblas_get_config.c;openblas_get_parallel.c" "" "" 0 "" "" 1)
|
||||
|
||||
if (DYNAMIC_ARCH)
|
||||
list(APPEND COMMON_SOURCES dynamic.c)
|
||||
else ()
|
||||
list(APPEND COMMON_SOURCES parameter.c)
|
||||
endif ()
|
||||
|
||||
#ifdef EXPRECISION
|
||||
#COMMONOBJS += x_abs.$(SUFFIX) qlamch.$(SUFFIX) qlamc3.$(SUFFIX)
|
||||
#endif
|
||||
#
|
||||
#ifdef QUAD_PRECISION
|
||||
#COMMONOBJS += addx.$(SUFFIX) mulx.$(SUFFIX)
|
||||
#endif
|
||||
#
|
||||
#ifdef USE_CUDA
|
||||
#COMMONOBJS += cuda_init.$(SUFFIX)
|
||||
#endif
|
||||
#
|
||||
#ifdef FUNCTION_PROFILE
|
||||
#COMMONOBJS += profile.$(SUFFIX)
|
||||
#endif
|
||||
|
||||
#LIBOTHERS = libothers.$(LIBSUFFIX)
|
||||
|
||||
#ifeq ($(DYNAMIC_ARCH), 1)
|
||||
#HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) dynamic.$(SUFFIX)
|
||||
#else
|
||||
#HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) parameter.$(SUFFIX)
|
||||
#endif
|
||||
|
||||
add_library(driver_others OBJECT ${OPENBLAS_SRC} ${MEMORY} ${SMP_SOURCES} ${COMMON_SOURCES})
|
||||
@@ -70,9 +70,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
/*********************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
#ifdef OS_LINUX
|
||||
#if defined(OS_LINUX) || defined(OS_NETBSD) || defined(OS_DARWIN) || defined(OS_ANDROID)
|
||||
#include <dlfcn.h>
|
||||
#include <signal.h>
|
||||
#include <sys/resource.h>
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
|
||||
#ifndef likely
|
||||
@@ -189,20 +191,6 @@ static void legacy_exec(void *func, int mode, blas_arg_t *args, void *sb){
|
||||
args -> b, args -> ldb,
|
||||
args -> c, args -> ldc, sb);
|
||||
} else
|
||||
#endif
|
||||
#ifdef INTEGER_PRECISION
|
||||
if (mode & BLAS_INTEGER){
|
||||
/* REAL / Extended Double */
|
||||
void (*afunc)(BLASLONG, BLASLONG, BLASLONG, int,
|
||||
int *, BLASLONG, int *, BLASLONG,
|
||||
int *, BLASLONG, void *) = func;
|
||||
|
||||
afunc(args -> m, args -> n, args -> k,
|
||||
((int *)args -> alpha)[0],
|
||||
args -> a, args -> lda,
|
||||
args -> b, args -> ldb,
|
||||
args -> c, args -> ldc, sb);
|
||||
} else
|
||||
#endif
|
||||
if (mode & BLAS_DOUBLE){
|
||||
/* REAL / Double */
|
||||
@@ -279,7 +267,7 @@ int get_node(void);
|
||||
|
||||
static int increased_threads = 0;
|
||||
|
||||
static int blas_thread_server(void *arg){
|
||||
static void* blas_thread_server(void *arg){
|
||||
|
||||
/* Thread identifier */
|
||||
BLASLONG cpu = (BLASLONG)arg;
|
||||
@@ -439,6 +427,10 @@ static int blas_thread_server(void *arg){
|
||||
main_status[cpu] = MAIN_FINISH;
|
||||
#endif
|
||||
|
||||
// arm: make sure all results are written out _before_
|
||||
// thread is marked as done and other threads use them
|
||||
WMB;
|
||||
|
||||
thread_status[cpu].queue = (blas_queue_t * volatile) ((long)thread_status[cpu].queue & 0); /* Need a trick */
|
||||
WMB;
|
||||
|
||||
@@ -468,7 +460,7 @@ static int blas_thread_server(void *arg){
|
||||
|
||||
//pthread_exit(NULL);
|
||||
|
||||
return 0;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#ifdef MONITOR
|
||||
@@ -575,14 +567,23 @@ int blas_thread_init(void){
|
||||
|
||||
#ifdef NEED_STACKATTR
|
||||
ret=pthread_create(&blas_threads[i], &attr,
|
||||
(void *)&blas_thread_server, (void *)i);
|
||||
&blas_thread_server, (void *)i);
|
||||
#else
|
||||
ret=pthread_create(&blas_threads[i], NULL,
|
||||
(void *)&blas_thread_server, (void *)i);
|
||||
&blas_thread_server, (void *)i);
|
||||
#endif
|
||||
if(ret!=0){
|
||||
fprintf(STDERR,"OpenBLAS: pthread_creat error in blas_thread_init function. Error code:%d\n",ret);
|
||||
exit(1);
|
||||
struct rlimit rlim;
|
||||
const char *msg = strerror(ret);
|
||||
fprintf(STDERR, "OpenBLAS blas_thread_init: pthread_create: %s\n", msg);
|
||||
if(0 == getrlimit(RLIMIT_NPROC, &rlim)) {
|
||||
fprintf(STDERR, "OpenBLAS blas_thread_init: RLIMIT_NPROC "
|
||||
"%ld current, %ld max\n", (long)(rlim.rlim_cur), (long)(rlim.rlim_max));
|
||||
}
|
||||
if(0 != raise(SIGINT)) {
|
||||
fprintf(STDERR, "OpenBLAS blas_thread_init: calling exit(3)\n");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -789,7 +790,12 @@ int exec_blas(BLASLONG num, blas_queue_t *queue){
|
||||
stop = rpcc();
|
||||
#endif
|
||||
|
||||
if ((num > 1) && queue -> next) exec_blas_async_wait(num - 1, queue -> next);
|
||||
if ((num > 1) && queue -> next) {
|
||||
exec_blas_async_wait(num - 1, queue -> next);
|
||||
|
||||
// arm: make sure results from other threads are visible
|
||||
MB;
|
||||
}
|
||||
|
||||
#ifdef TIMING_DEBUG
|
||||
fprintf(STDERR, "Thread[0] : %16lu %16lu (%8lu cycles)\n",
|
||||
@@ -837,10 +843,10 @@ void goto_set_num_threads(int num_threads) {
|
||||
|
||||
#ifdef NEED_STACKATTR
|
||||
pthread_create(&blas_threads[i], &attr,
|
||||
(void *)&blas_thread_server, (void *)i);
|
||||
&blas_thread_server, (void *)i);
|
||||
#else
|
||||
pthread_create(&blas_threads[i], NULL,
|
||||
(void *)&blas_thread_server, (void *)i);
|
||||
&blas_thread_server, (void *)i);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
@@ -252,6 +252,15 @@ static gotoblas_t *get_coretype(void){
|
||||
return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels.
|
||||
}
|
||||
}
|
||||
//Intel Skylake
|
||||
if (model == 14) {
|
||||
if(support_avx())
|
||||
return &gotoblas_HASWELL;
|
||||
else{
|
||||
openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK);
|
||||
return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels.
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
case 5:
|
||||
//Intel Broadwell
|
||||
@@ -263,6 +272,15 @@ static gotoblas_t *get_coretype(void){
|
||||
return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels.
|
||||
}
|
||||
}
|
||||
//Intel Skylake
|
||||
if (model == 14 || model == 5) {
|
||||
if(support_avx())
|
||||
return &gotoblas_HASWELL;
|
||||
else{
|
||||
openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK);
|
||||
return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels.
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
case 0xf:
|
||||
|
||||
@@ -139,8 +139,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
#define BITMASK(a, b, c) ((((a) >> (b)) & (c)))
|
||||
|
||||
#if defined(_MSC_VER) && !defined(__clang__)
|
||||
#define CONSTRUCTOR __cdecl
|
||||
#define DESTRUCTOR __cdecl
|
||||
#elif defined(OS_DARWIN) && defined(C_GCC)
|
||||
#define CONSTRUCTOR __attribute__ ((constructor))
|
||||
#define DESTRUCTOR __attribute__ ((destructor))
|
||||
#else
|
||||
#define CONSTRUCTOR __attribute__ ((constructor(101)))
|
||||
#define DESTRUCTOR __attribute__ ((destructor(101)))
|
||||
#endif
|
||||
|
||||
#ifdef DYNAMIC_ARCH
|
||||
gotoblas_t *gotoblas = NULL;
|
||||
@@ -795,12 +803,12 @@ static void *alloc_hugetlb(void *address){
|
||||
|
||||
if (LookupPrivilegeValue(NULL, SE_LOCK_MEMORY_NAME, &tp.Privileges[0].Luid) != TRUE) {
|
||||
CloseHandle(hToken);
|
||||
return -1;
|
||||
return (void*)-1;
|
||||
}
|
||||
|
||||
if (AdjustTokenPrivileges(hToken, FALSE, &tp, 0, NULL, NULL) != TRUE) {
|
||||
CloseHandle(hToken);
|
||||
return -1;
|
||||
return (void*)-1;
|
||||
}
|
||||
|
||||
map_address = (void *)VirtualAlloc(address,
|
||||
@@ -1153,6 +1161,9 @@ void blas_memory_free(void *free_area){
|
||||
printf(" Position : %d\n", position);
|
||||
#endif
|
||||
|
||||
// arm: ensure all writes are finished before other thread takes this memory
|
||||
WMB;
|
||||
|
||||
memory[position].used = 0;
|
||||
|
||||
#ifdef DEBUG
|
||||
@@ -1399,6 +1410,28 @@ void DESTRUCTOR gotoblas_quit(void) {
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined(_MSC_VER) && !defined(__clang__)
|
||||
BOOL APIENTRY DllMain(HMODULE hModule, DWORD ul_reason_for_call, LPVOID lpReserved)
|
||||
{
|
||||
switch (ul_reason_for_call)
|
||||
{
|
||||
case DLL_PROCESS_ATTACH:
|
||||
gotoblas_init();
|
||||
break;
|
||||
case DLL_THREAD_ATTACH:
|
||||
break;
|
||||
case DLL_THREAD_DETACH:
|
||||
break;
|
||||
case DLL_PROCESS_DETACH:
|
||||
gotoblas_quit();
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return TRUE;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if (defined(C_PGI) || (!defined(C_SUN) && defined(F_INTERFACE_SUN))) && (defined(ARCH_X86) || defined(ARCH_X86_64))
|
||||
/* Don't call me; this is just work around for PGI / Sun bug */
|
||||
void gotoblas_dummy_for_PGI(void) {
|
||||
|
||||
25
getarch.c
25
getarch.c
@@ -69,10 +69,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
/* or implied, of The University of Texas at Austin. */
|
||||
/*********************************************************************/
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__) || defined(__CYGWIN32__) || defined(__CYGWIN64__)
|
||||
#if defined(__WIN32__) || defined(__WIN64__) || defined(__CYGWIN32__) || defined(__CYGWIN64__) || defined(_WIN32) || defined(_WIN64)
|
||||
#define OS_WINDOWS
|
||||
#endif
|
||||
|
||||
#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64)
|
||||
#define INTEL_AMD
|
||||
#endif
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#ifdef OS_WINDOWS
|
||||
@@ -116,6 +120,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
/* #define FORCE_POWER4 */
|
||||
/* #define FORCE_POWER5 */
|
||||
/* #define FORCE_POWER6 */
|
||||
/* #define FORCE_POWER7 */
|
||||
/* #define FORCE_POWER8 */
|
||||
/* #define FORCE_PPCG4 */
|
||||
/* #define FORCE_PPC970 */
|
||||
/* #define FORCE_PPC970MP */
|
||||
@@ -546,7 +552,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#define CORENAME "POWER5"
|
||||
#endif
|
||||
|
||||
#ifdef FORCE_POWER6
|
||||
#if defined(FORCE_POWER6) || defined(FORCE_POWER7) || defined(FORCE_POWER8)
|
||||
#define FORCE
|
||||
#define ARCHITECTURE "POWER"
|
||||
#define SUBARCHITECTURE "POWER6"
|
||||
@@ -748,7 +754,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#define ARCHITECTURE "ARM"
|
||||
#define SUBARCHITECTURE "CORTEXA9"
|
||||
#define SUBDIRNAME "arm"
|
||||
#define ARCHCONFIG "-DCORTEXA9 " \
|
||||
#define ARCHCONFIG "-DCORTEXA9 -DARMV7 " \
|
||||
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=32 " \
|
||||
"-DL2_SIZE=1048576 -DL2_LINESIZE=32 " \
|
||||
"-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " \
|
||||
@@ -763,7 +769,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#define ARCHITECTURE "ARM"
|
||||
#define SUBARCHITECTURE "CORTEXA15"
|
||||
#define SUBDIRNAME "arm"
|
||||
#define ARCHCONFIG "-DCORTEXA15 " \
|
||||
#define ARCHCONFIG "-DCORTEXA15 -DARMV7 " \
|
||||
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=32 " \
|
||||
"-DL2_SIZE=1048576 -DL2_LINESIZE=32 " \
|
||||
"-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " \
|
||||
@@ -796,8 +802,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#define ARCHCONFIG "-DARMV5 " \
|
||||
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \
|
||||
"-DL2_SIZE=512488 -DL2_LINESIZE=32 " \
|
||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " \
|
||||
"-DHAVE_VFP"
|
||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 "
|
||||
#define LIBNAME "armv5"
|
||||
#define CORENAME "ARMV5"
|
||||
#else
|
||||
@@ -829,7 +834,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#define OPENBLAS_SUPPORTED
|
||||
#endif
|
||||
|
||||
#if defined(__i386__) || (__x86_64__)
|
||||
#ifdef INTEL_AMD
|
||||
#include "cpuid_x86.c"
|
||||
#define OPENBLAS_SUPPORTED
|
||||
#endif
|
||||
@@ -924,7 +929,7 @@ int main(int argc, char *argv[]){
|
||||
#ifdef FORCE
|
||||
printf("CORE=%s\n", CORENAME);
|
||||
#else
|
||||
#if defined(__i386__) || defined(__x86_64__) || defined(POWER) || defined(__mips__) || defined(__arm__) || defined(__aarch64__)
|
||||
#if defined(INTEL_AMD) || defined(POWER) || defined(__mips__) || defined(__arm__) || defined(__aarch64__)
|
||||
printf("CORE=%s\n", get_corename());
|
||||
#endif
|
||||
#endif
|
||||
@@ -944,7 +949,7 @@ int main(int argc, char *argv[]){
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(__i386__) || defined(__x86_64__)
|
||||
#ifdef INTEL_AMD
|
||||
#ifndef FORCE
|
||||
get_sse();
|
||||
#else
|
||||
@@ -1024,7 +1029,7 @@ int main(int argc, char *argv[]){
|
||||
#ifdef FORCE
|
||||
printf("#define CHAR_CORENAME \"%s\"\n", CORENAME);
|
||||
#else
|
||||
#if defined(__i386__) || defined(__x86_64__) || defined(POWER) || defined(__mips__) || defined(__arm__) || defined(__aarch64__)
|
||||
#if defined(INTEL_AMD) || defined(POWER) || defined(__mips__) || defined(__arm__) || defined(__aarch64__)
|
||||
printf("#define CHAR_CORENAME \"%s\"\n", get_corename());
|
||||
#endif
|
||||
#endif
|
||||
|
||||
166
interface/CMakeLists.txt
Normal file
166
interface/CMakeLists.txt
Normal file
@@ -0,0 +1,166 @@
|
||||
|
||||
include_directories(${CMAKE_SOURCE_DIR})
|
||||
|
||||
|
||||
set(BLAS1_SOURCES
|
||||
copy.c
|
||||
nrm2.c
|
||||
)
|
||||
|
||||
set(BLAS1_REAL_ONLY_SOURCES
|
||||
rotm.c rotmg.c # N.B. these do not have complex counterparts
|
||||
rot.c
|
||||
asum.c
|
||||
)
|
||||
|
||||
# these will have 'z' prepended for the complex version
|
||||
set(BLAS1_MANGLED_SOURCES
|
||||
axpy.c swap.c
|
||||
scal.c
|
||||
dot.c
|
||||
rotg.c
|
||||
axpby.c
|
||||
)
|
||||
|
||||
# TODO: USE_NETLIB_GEMV shoudl switch gemv.c to netlib/*gemv.f
|
||||
# these all have 'z' sources for complex versions
|
||||
set(BLAS2_SOURCES
|
||||
gemv.c ger.c
|
||||
trsv.c trmv.c symv.c
|
||||
syr.c syr2.c gbmv.c
|
||||
sbmv.c spmv.c
|
||||
spr.c spr2.c
|
||||
tbsv.c tbmv.c
|
||||
tpsv.c tpmv.c
|
||||
)
|
||||
|
||||
set(BLAS2_COMPLEX_ONLY_MANGLED_SOURCES
|
||||
hemv.c hbmv.c
|
||||
her.c her2.c
|
||||
hpmv.c hpr.c
|
||||
hpr2.c
|
||||
)
|
||||
|
||||
# these do not have separate 'z' sources
|
||||
set(BLAS3_SOURCES
|
||||
gemm.c symm.c
|
||||
trsm.c syrk.c syr2k.c
|
||||
)
|
||||
|
||||
set(BLAS3_MANGLED_SOURCES
|
||||
omatcopy.c imatcopy.c
|
||||
geadd.c
|
||||
)
|
||||
|
||||
# generate the BLAS objs once with and once without cblas
|
||||
set (CBLAS_FLAGS "")
|
||||
|
||||
if (NOT DEFINED NO_FBLAS)
|
||||
list(APPEND CBLAS_FLAGS 0)
|
||||
endif ()
|
||||
|
||||
if (NOT DEFINED NO_CBLAS)
|
||||
list(APPEND CBLAS_FLAGS 1)
|
||||
endif ()
|
||||
|
||||
foreach (CBLAS_FLAG ${CBLAS_FLAGS})
|
||||
|
||||
# TODO: don't compile complex sources with cblas for now, the naming schemes are all different and they will have to be handled separately from SINGLE/DOUBLE
|
||||
set(DISABLE_COMPLEX 0)
|
||||
set(MANGLE_COMPLEX 3)
|
||||
if (CBLAS_FLAG EQUAL 1)
|
||||
# set(DISABLE_COMPLEX 1)
|
||||
# set(MANGLE_COMPLEX 1)
|
||||
endif ()
|
||||
GenerateNamedObjects("${BLAS1_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false ${DISABLE_COMPLEX})
|
||||
GenerateNamedObjects("${BLAS1_REAL_ONLY_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false 1)
|
||||
GenerateNamedObjects("${BLAS1_MANGLED_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false ${MANGLE_COMPLEX})
|
||||
GenerateNamedObjects("${BLAS2_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false ${MANGLE_COMPLEX})
|
||||
GenerateNamedObjects("${BLAS2_COMPLEX_ONLY_MANGLED_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false 4)
|
||||
GenerateNamedObjects("${BLAS3_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false ${DISABLE_COMPLEX})
|
||||
GenerateNamedObjects("${BLAS3_MANGLED_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false ${MANGLE_COMPLEX})
|
||||
|
||||
#sdsdot, dsdot
|
||||
GenerateNamedObjects("sdsdot.c" "" "sdsdot" ${CBLAS_FLAG} "" "" true "SINGLE")
|
||||
GenerateNamedObjects("dsdot.c" "" "dsdot" ${CBLAS_FLAG} "" "" true "SINGLE")
|
||||
|
||||
# trmm is trsm with a compiler flag set
|
||||
GenerateNamedObjects("trsm.c" "TRMM" "trmm" ${CBLAS_FLAG})
|
||||
|
||||
# max and imax are compiled 4 times
|
||||
GenerateNamedObjects("max.c" "" "" ${CBLAS_FLAG})
|
||||
GenerateNamedObjects("max.c" "USE_ABS" "amax" ${CBLAS_FLAG})
|
||||
GenerateNamedObjects("max.c" "USE_ABS;USE_MIN" "amin" ${CBLAS_FLAG})
|
||||
GenerateNamedObjects("max.c" "USE_MIN" "min" ${CBLAS_FLAG})
|
||||
|
||||
GenerateNamedObjects("imax.c" "" "i*max" ${CBLAS_FLAG})
|
||||
GenerateNamedObjects("imax.c" "USE_ABS" "i*amax" ${CBLAS_FLAG})
|
||||
GenerateNamedObjects("imax.c" "USE_ABS;USE_MIN" "i*amin" ${CBLAS_FLAG})
|
||||
GenerateNamedObjects("imax.c" "USE_MIN" "i*min" ${CBLAS_FLAG})
|
||||
|
||||
|
||||
# complex-specific sources
|
||||
foreach (float_type ${FLOAT_TYPES})
|
||||
|
||||
if (${float_type} STREQUAL "COMPLEX" OR ${float_type} STREQUAL "ZCOMPLEX")
|
||||
GenerateNamedObjects("zger.c" "" "geru" ${CBLAS_FLAG} "" "" false ${float_type})
|
||||
GenerateNamedObjects("zger.c" "CONJ" "gerc" ${CBLAS_FLAG} "" "" false ${float_type})
|
||||
GenerateNamedObjects("zdot.c" "CONJ" "dotc" ${CBLAS_FLAG} "" "" false ${float_type})
|
||||
GenerateNamedObjects("zdot.c" "" "dotu" ${CBLAS_FLAG} "" "" false ${float_type})
|
||||
|
||||
GenerateNamedObjects("symm.c" "HEMM" "hemm" ${CBLAS_FLAG} "" "" false ${float_type})
|
||||
GenerateNamedObjects("syrk.c" "HEMM" "herk" ${CBLAS_FLAG} "" "" false ${float_type})
|
||||
GenerateNamedObjects("syr2k.c" "HEMM" "her2k" ${CBLAS_FLAG} "" "" false ${float_type})
|
||||
|
||||
if (USE_GEMM3M)
|
||||
GenerateNamedObjects("gemm.c" "GEMM3M" "gemm3m" false "" "" false ${float_type})
|
||||
endif()
|
||||
endif ()
|
||||
if (${float_type} STREQUAL "COMPLEX")
|
||||
GenerateNamedObjects("zscal.c" "SSCAL" "sscal" ${CBLAS_FLAG} "" "" false "COMPLEX")
|
||||
GenerateNamedObjects("nrm2.c" "" "scnrm2" ${CBLAS_FLAG} "" "" true "COMPLEX")
|
||||
GenerateNamedObjects("zrot.c" "" "csrot" ${CBLAS_FLAG} "" "" true "COMPLEX")
|
||||
GenerateNamedObjects("max.c" "USE_ABS;USE_MIN" "scamin" ${CBLAS_FLAG} "" "" true "COMPLEX")
|
||||
GenerateNamedObjects("max.c" "USE_ABS" "scamax" ${CBLAS_FLAG} "" "" true "COMPLEX")
|
||||
GenerateNamedObjects("asum.c" "" "scasum" ${CBLAS_FLAG} "" "" true "COMPLEX")
|
||||
endif ()
|
||||
if (${float_type} STREQUAL "ZCOMPLEX")
|
||||
GenerateNamedObjects("zscal.c" "SSCAL" "dscal" ${CBLAS_FLAG} "" "" false "ZCOMPLEX")
|
||||
GenerateNamedObjects("nrm2.c" "" "dznrm2" ${CBLAS_FLAG} "" "" true "ZCOMPLEX")
|
||||
GenerateNamedObjects("zrot.c" "" "zdrot" ${CBLAS_FLAG} "" "" true "ZCOMPLEX")
|
||||
GenerateNamedObjects("max.c" "USE_ABS;USE_MIN" "dzamin" ${CBLAS_FLAG} "" "" true "ZCOMPLEX")
|
||||
GenerateNamedObjects("max.c" "USE_ABS" "dzamax" ${CBLAS_FLAG} "" "" true "ZCOMPLEX")
|
||||
GenerateNamedObjects("asum.c" "" "dzasum" ${CBLAS_FLAG} "" "" true "ZCOMPLEX")
|
||||
endif ()
|
||||
endforeach ()
|
||||
|
||||
endforeach ()
|
||||
|
||||
#Special functions for CBLAS
|
||||
if (NOT DEFINED NO_CBLAS)
|
||||
foreach (float_type ${FLOAT_TYPES})
|
||||
if (${float_type} STREQUAL "COMPLEX" OR ${float_type} STREQUAL "ZCOMPLEX")
|
||||
#cblas_dotc_sub cblas_dotu_sub
|
||||
GenerateNamedObjects("zdot.c" "FORCE_USE_STACK" "dotu_sub" 1 "" "" false ${float_type})
|
||||
GenerateNamedObjects("zdot.c" "FORCE_USE_STACK;CONJ" "dotc_sub" 1 "" "" false ${float_type})
|
||||
endif()
|
||||
endforeach ()
|
||||
endif()
|
||||
|
||||
if (NOT DEFINED NO_LAPACK)
|
||||
set(LAPACK_SOURCES
|
||||
lapack/gesv.c
|
||||
)
|
||||
|
||||
# prepend z for complex versions
|
||||
set(LAPACK_MANGLED_SOURCES
|
||||
lapack/getrf.c lapack/getrs.c lapack/potrf.c lapack/getf2.c
|
||||
lapack/potf2.c lapack/laswp.c lapack/lauu2.c
|
||||
lapack/lauum.c lapack/trti2.c lapack/trtri.c
|
||||
)
|
||||
|
||||
GenerateNamedObjects("${LAPACK_SOURCES}")
|
||||
GenerateNamedObjects("${LAPACK_MANGLED_SOURCES}" "" "" 0 "" "" 0 3)
|
||||
endif ()
|
||||
|
||||
add_library(interface OBJECT ${OPENBLAS_SRC})
|
||||
@@ -253,15 +253,6 @@ XBLAS3OBJS += xgemm3m.$(SUFFIX) xsymm3m.$(SUFFIX) xhemm3m.$(SUFFIX)
|
||||
endif
|
||||
endif
|
||||
|
||||
ifdef INTEGER_PRECISION
|
||||
|
||||
IBLAS1OBJS = \
|
||||
iaxpy.$(SUFFIX)
|
||||
|
||||
IBLAS2OBJS =
|
||||
IBLAS3OBJS =
|
||||
endif
|
||||
|
||||
endif
|
||||
|
||||
HPLOBJS = dgemm.$(SUFFIX) dtrsm.$(SUFFIX) \
|
||||
@@ -352,9 +343,6 @@ CZBLAS3OBJS = \
|
||||
cblas_zomatcopy.$(SUFFIX) cblas_zimatcopy.$(SUFFIX) \
|
||||
cblas_zgeadd.$(SUFFIX)
|
||||
|
||||
CIBLAS1OBJS = \
|
||||
cblas_iaxpy.$(SUFFIX)
|
||||
|
||||
|
||||
ifeq ($(SUPPORT_GEMM3M), 1)
|
||||
|
||||
@@ -384,10 +372,6 @@ ZBLAS1OBJS += $(CZBLAS1OBJS)
|
||||
ZBLAS2OBJS += $(CZBLAS2OBJS)
|
||||
ZBLAS3OBJS += $(CZBLAS3OBJS)
|
||||
|
||||
IBLAS1OBJS += $(CIBLAS1OBJS)
|
||||
IBLAS2OBJS += $(CIBLAS2OBJS)
|
||||
IBLAS3OBJS += $(CIBLAS3OBJS)
|
||||
|
||||
endif
|
||||
|
||||
SBLASOBJS = $(SBLAS1OBJS) $(SBLAS2OBJS) $(SBLAS3OBJS)
|
||||
@@ -396,7 +380,6 @@ QBLASOBJS = $(QBLAS1OBJS) $(QBLAS2OBJS) $(QBLAS3OBJS)
|
||||
CBLASOBJS = $(CBLAS1OBJS) $(CBLAS2OBJS) $(CBLAS3OBJS)
|
||||
ZBLASOBJS = $(ZBLAS1OBJS) $(ZBLAS2OBJS) $(ZBLAS3OBJS)
|
||||
XBLASOBJS = $(XBLAS1OBJS) $(XBLAS2OBJS) $(XBLAS3OBJS)
|
||||
IBLASOBJS = $(IBLAS1OBJS) $(IBLAS2OBJS) $(IBLAS3OBJS)
|
||||
|
||||
#SLAPACKOBJS = \
|
||||
# sgetrf.$(SUFFIX) sgetrs.$(SUFFIX) spotrf.$(SUFFIX) sgetf2.$(SUFFIX) \
|
||||
@@ -475,10 +458,6 @@ ifdef QUAD_PRECISION
|
||||
FUNCOBJS += $(QBLASOBJS) $(XBLASOBJS)
|
||||
endif
|
||||
|
||||
ifdef INTEGER_PRECISION
|
||||
FUNCOBJS += $(IBLASOBJS)
|
||||
endif
|
||||
|
||||
FUNCALLFILES = $(FUNCOBJS:.$(SUFFIX)=)
|
||||
|
||||
include $(TOPDIR)/Makefile.tail
|
||||
@@ -497,18 +476,17 @@ endif
|
||||
clean ::
|
||||
@rm -f functable.h
|
||||
|
||||
level1 : $(SBLAS1OBJS) $(DBLAS1OBJS) $(QBLAS1OBJS) $(CBLAS1OBJS) $(ZBLAS1OBJS) $(XBLAS1OBJS) $(IBLAS1OBJS)
|
||||
level1 : $(SBLAS1OBJS) $(DBLAS1OBJS) $(QBLAS1OBJS) $(CBLAS1OBJS) $(ZBLAS1OBJS) $(XBLAS1OBJS)
|
||||
$(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^
|
||||
|
||||
level2 : $(SBLAS2OBJS) $(DBLAS2OBJS) $(QBLAS2OBJS) $(CBLAS2OBJS) $(ZBLAS2OBJS) $(XBLAS2OBJS) $(IBLAS2OBJS)
|
||||
level2 : $(SBLAS2OBJS) $(DBLAS2OBJS) $(QBLAS2OBJS) $(CBLAS2OBJS) $(ZBLAS2OBJS) $(XBLAS2OBJS)
|
||||
$(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^
|
||||
|
||||
level3 : $(SBLAS3OBJS) $(DBLAS3OBJS) $(QBLAS3OBJS) $(CBLAS3OBJS) $(ZBLAS3OBJS) $(XBLAS3OBJS) $(IBLAS3OBJS)
|
||||
level3 : $(SBLAS3OBJS) $(DBLAS3OBJS) $(QBLAS3OBJS) $(CBLAS3OBJS) $(ZBLAS3OBJS) $(XBLAS3OBJS)
|
||||
$(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^
|
||||
|
||||
$(CSBLASOBJS) $(CSBLASOBJS_P) $(CDBLASOBJS) $(CDBLASOBJS_P) $(CQBLASOBJS) $(CQBLASOBJS_P) \
|
||||
$(CCBLASOBJS) $(CCBLASOBJS_P) $(CZBLASOBJS) $(CZBLASOBJS_P) $(CXBLASOBJS) $(CXBLASOBJS_P) \
|
||||
$(CIBLASOBJS) $(CIBLASOBJS_P) : override CFLAGS += -DCBLAS
|
||||
$(CCBLASOBJS) $(CCBLASOBJS_P) $(CZBLASOBJS) $(CZBLASOBJS_P) $(CXBLASOBJS) $(CXBLASOBJS_P) : override CFLAGS += -DCBLAS
|
||||
|
||||
srot.$(SUFFIX) srot.$(PSUFFIX) : rot.c
|
||||
$(CC) $(CFLAGS) -c $< -o $(@F)
|
||||
@@ -747,9 +725,6 @@ saxpy.$(SUFFIX) saxpy.$(PSUFFIX) : axpy.c
|
||||
daxpy.$(SUFFIX) daxpy.$(PSUFFIX) : axpy.c
|
||||
$(CC) $(CFLAGS) -c $< -o $(@F)
|
||||
|
||||
iaxpy.$(SUFFIX) iaxpy.$(PSUFFIX) : axpy.c
|
||||
$(CC) $(CFLAGS) -c $< -o $(@F)
|
||||
|
||||
qaxpy.$(SUFFIX) qaxpy.$(PSUFFIX) : axpy.c
|
||||
$(CC) $(CFLAGS) -c $< -o $(@F)
|
||||
|
||||
@@ -1462,9 +1437,6 @@ cblas_saxpy.$(SUFFIX) cblas_saxpy.$(PSUFFIX) : axpy.c
|
||||
cblas_daxpy.$(SUFFIX) cblas_daxpy.$(PSUFFIX) : axpy.c
|
||||
$(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F)
|
||||
|
||||
cblas_iaxpy.$(SUFFIX) cblas_iaxpy.$(PSUFFIX) : axpy.c
|
||||
$(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F)
|
||||
|
||||
cblas_caxpy.$(SUFFIX) cblas_caxpy.$(PSUFFIX) : zaxpy.c
|
||||
$(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F)
|
||||
|
||||
|
||||
@@ -103,8 +103,6 @@ void CNAME(blasint n, FLOAT alpha, FLOAT *x, blasint incx, FLOAT *y, blasint inc
|
||||
mode = BLAS_XDOUBLE | BLAS_REAL;
|
||||
#elif defined(DOUBLE)
|
||||
mode = BLAS_DOUBLE | BLAS_REAL;
|
||||
#elif defined(INTEGER)
|
||||
mode = BLAS_INTEGER | BLAS_REAL;
|
||||
#else
|
||||
mode = BLAS_SINGLE | BLAS_REAL;
|
||||
#endif
|
||||
|
||||
@@ -121,6 +121,9 @@ void NAME(char *TRANSA, char *TRANSB,
|
||||
FLOAT *sa, *sb;
|
||||
|
||||
#ifdef SMP
|
||||
int nthreads_max;
|
||||
int nthreads_avail;
|
||||
double MNK;
|
||||
#ifndef COMPLEX
|
||||
#ifdef XDOUBLE
|
||||
int mode = BLAS_XDOUBLE | BLAS_REAL;
|
||||
@@ -237,6 +240,9 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANS
|
||||
XFLOAT *sa, *sb;
|
||||
|
||||
#ifdef SMP
|
||||
int nthreads_max;
|
||||
int nthreads_avail;
|
||||
double MNK;
|
||||
#ifndef COMPLEX
|
||||
#ifdef XDOUBLE
|
||||
int mode = BLAS_XDOUBLE | BLAS_REAL;
|
||||
@@ -400,15 +406,15 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANS
|
||||
mode |= (transa << BLAS_TRANSA_SHIFT);
|
||||
mode |= (transb << BLAS_TRANSB_SHIFT);
|
||||
|
||||
int nthreads_max = num_cpu_avail(3);
|
||||
int nthreads_avail = nthreads_max;
|
||||
nthreads_max = num_cpu_avail(3);
|
||||
nthreads_avail = nthreads_max;
|
||||
|
||||
#ifndef COMPLEX
|
||||
double MNK = (double) args.m * (double) args.n * (double) args.k;
|
||||
MNK = (double) args.m * (double) args.n * (double) args.k;
|
||||
if ( MNK <= (65536.0 * (double) GEMM_MULTITHREAD_THRESHOLD) )
|
||||
nthreads_max = 1;
|
||||
#else
|
||||
double MNK = (double) args.m * (double) args.n * (double) args.k;
|
||||
MNK = (double) args.m * (double) args.n * (double) args.k;
|
||||
if ( MNK <= (8192.0 * (double) GEMM_MULTITHREAD_THRESHOLD) )
|
||||
nthreads_max = 1;
|
||||
#endif
|
||||
|
||||
@@ -81,6 +81,9 @@ void NAME(char *TRANS, blasint *M, blasint *N,
|
||||
FLOAT *buffer;
|
||||
#ifdef SMP
|
||||
int nthreads;
|
||||
int nthreads_max;
|
||||
int nthreads_avail;
|
||||
double MNK;
|
||||
#endif
|
||||
|
||||
int (*gemv[])(BLASLONG, BLASLONG, BLASLONG, FLOAT, FLOAT *, BLASLONG, FLOAT * , BLASLONG, FLOAT *, BLASLONG, FLOAT *) = {
|
||||
@@ -135,6 +138,9 @@ void CNAME(enum CBLAS_ORDER order,
|
||||
blasint info, t;
|
||||
#ifdef SMP
|
||||
int nthreads;
|
||||
int nthreads_max;
|
||||
int nthreads_avail;
|
||||
double MNK;
|
||||
#endif
|
||||
|
||||
int (*gemv[])(BLASLONG, BLASLONG, BLASLONG, FLOAT, FLOAT *, BLASLONG, FLOAT * , BLASLONG, FLOAT *, BLASLONG, FLOAT *) = {
|
||||
@@ -235,10 +241,10 @@ void CNAME(enum CBLAS_ORDER order,
|
||||
|
||||
#ifdef SMP
|
||||
|
||||
int nthreads_max = num_cpu_avail(2);
|
||||
int nthreads_avail = nthreads_max;
|
||||
nthreads_max = num_cpu_avail(2);
|
||||
nthreads_avail = nthreads_max;
|
||||
|
||||
double MNK = (double) m * (double) n;
|
||||
MNK = (double) m * (double) n;
|
||||
if ( MNK <= (24.0 * 24.0 * (double) (GEMM_MULTITHREAD_THRESHOLD*GEMM_MULTITHREAD_THRESHOLD) ) )
|
||||
nthreads_max = 1;
|
||||
|
||||
|
||||
@@ -26,7 +26,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
/***********************************************************
|
||||
* 2014/06/10 Saar
|
||||
* 2014-06-10 Saar
|
||||
* 2015-09-07 grisuthedragon
|
||||
***********************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
@@ -50,6 +51,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#undef malloc
|
||||
#undef free
|
||||
|
||||
/* Enables the New IMATCOPY code with inplace operation if lda == ldb */
|
||||
#define NEW_IMATCOPY
|
||||
|
||||
#ifndef CBLAS
|
||||
void NAME( char* ORDER, char* TRANS, blasint *rows, blasint *cols, FLOAT *alpha, FLOAT *a, blasint *lda, blasint *ldb)
|
||||
{
|
||||
@@ -75,7 +79,6 @@ void NAME( char* ORDER, char* TRANS, blasint *rows, blasint *cols, FLOAT *alpha,
|
||||
#else
|
||||
void CNAME( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows, blasint ccols, FLOAT calpha, FLOAT *a, blasint clda, blasint cldb)
|
||||
{
|
||||
char Order, Trans;
|
||||
int order=-1,trans=-1;
|
||||
blasint info = -1;
|
||||
FLOAT *b;
|
||||
@@ -117,6 +120,34 @@ void CNAME( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows,
|
||||
BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME));
|
||||
return;
|
||||
}
|
||||
#ifdef NEW_IMATCOPY
|
||||
if ( *lda == *ldb ) {
|
||||
if ( order == BlasColMajor )
|
||||
{
|
||||
if ( trans == BlasNoTrans )
|
||||
{
|
||||
IMATCOPY_K_CN(*rows, *cols, *alpha, a, *lda );
|
||||
}
|
||||
else
|
||||
{
|
||||
IMATCOPY_K_CT(*rows, *cols, *alpha, a, *lda );
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if ( trans == BlasNoTrans )
|
||||
{
|
||||
IMATCOPY_K_RN(*rows, *cols, *alpha, a, *lda );
|
||||
}
|
||||
else
|
||||
{
|
||||
IMATCOPY_K_RT(*rows, *cols, *alpha, a, *lda );
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
if ( *lda > *ldb )
|
||||
msize = (*lda) * (*ldb) * sizeof(FLOAT);
|
||||
|
||||
@@ -136,6 +136,8 @@ blasint NAME(blasint *N, FLOAT *x, blasint *INCX){
|
||||
|
||||
ret = (blasint)MAX_K(n, x, incx);
|
||||
|
||||
if(ret > n) ret=n;
|
||||
|
||||
FUNCTION_PROFILE_END(COMPSIZE, n, 0);
|
||||
|
||||
IDEBUG_END;
|
||||
@@ -159,6 +161,8 @@ CBLAS_INDEX CNAME(blasint n, FLOAT *x, blasint incx){
|
||||
|
||||
ret = MAX_K(n, x, incx);
|
||||
|
||||
if (ret > n) ret=n;
|
||||
|
||||
if (ret) ret --;
|
||||
|
||||
FUNCTION_PROFILE_END(COMPSIZE, n, 0);
|
||||
|
||||
@@ -14,8 +14,7 @@ void CNAME(FLOAT *DA, FLOAT *DB, FLOAT *C, FLOAT *S){
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(__i386__) || defined(__x86_64__) || defined(__ia64__)
|
||||
#if defined(__i386__) || defined(__x86_64__) || defined(__ia64__) || defined(_M_X64) || defined(_M_IX86)
|
||||
|
||||
long double da = *DA;
|
||||
long double db = *DB;
|
||||
|
||||
@@ -53,13 +53,13 @@ void CNAME(blasint n, FLOAT *ALPHA, FLOAT *x, blasint incx, FLOAT *BETA, FLOAT *
|
||||
|
||||
#endif
|
||||
|
||||
if (n <= 0) return;
|
||||
|
||||
FLOAT alpha_r = *(ALPHA + 0);
|
||||
FLOAT alpha_i = *(ALPHA + 1);
|
||||
FLOAT beta_r = *(BETA + 0);
|
||||
FLOAT beta_i = *(BETA + 1);
|
||||
|
||||
if (n <= 0) return;
|
||||
|
||||
FUNCTION_PROFILE_START();
|
||||
|
||||
if (incx < 0) x -= (n - 1) * incx * 2;
|
||||
|
||||
@@ -57,21 +57,25 @@
|
||||
#ifdef RETURN_BY_STRUCT
|
||||
MYTYPE NAME( blasint *N, FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY) {
|
||||
#elif defined RETURN_BY_STACK
|
||||
void NAME(FLOAT _Complex *result, blasint *N, FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY) {
|
||||
void NAME(OPENBLAS_COMPLEX_FLOAT *result, blasint *N, FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY) {
|
||||
#else
|
||||
FLOAT _Complex NAME( blasint *N, FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY) {
|
||||
OPENBLAS_COMPLEX_FLOAT NAME( blasint *N, FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY) {
|
||||
#endif
|
||||
|
||||
BLASLONG n = *N;
|
||||
BLASLONG incx = *INCX;
|
||||
BLASLONG incy = *INCY;
|
||||
#ifndef RETURN_BY_STACK
|
||||
FLOAT _Complex ret;
|
||||
OPENBLAS_COMPLEX_FLOAT ret;
|
||||
#endif
|
||||
#ifdef RETURN_BY_STRUCT
|
||||
MYTYPE myret;
|
||||
#endif
|
||||
|
||||
#ifndef RETURN_BY_STRUCT
|
||||
OPENBLAS_COMPLEX_FLOAT zero=OPENBLAS_MAKE_COMPLEX_FLOAT(0.0, 0.0);
|
||||
#endif
|
||||
|
||||
PRINT_DEBUG_NAME;
|
||||
|
||||
if (n <= 0) {
|
||||
@@ -80,10 +84,10 @@ FLOAT _Complex NAME( blasint *N, FLOAT *x, blasint *INCX,
|
||||
myret.i = 0.;
|
||||
return myret;
|
||||
#elif defined RETURN_BY_STACK
|
||||
*result = ZERO;
|
||||
*result = zero;
|
||||
return;
|
||||
#else
|
||||
return ZERO;
|
||||
return zero;
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -144,21 +148,24 @@ FLOAT _Complex NAME( blasint *N, FLOAT *x, blasint *INCX,
|
||||
#else
|
||||
|
||||
#ifdef FORCE_USE_STACK
|
||||
void CNAME(blasint n, FLOAT *x, blasint incx, FLOAT *y, blasint incy, FLOAT _Complex *result){
|
||||
void CNAME(blasint n, FLOAT *x, blasint incx, FLOAT *y, blasint incy, OPENBLAS_COMPLEX_FLOAT *result){
|
||||
#else
|
||||
FLOAT _Complex CNAME(blasint n, FLOAT *x, blasint incx, FLOAT *y, blasint incy){
|
||||
OPENBLAS_COMPLEX_FLOAT CNAME(blasint n, FLOAT *x, blasint incx, FLOAT *y, blasint incy){
|
||||
|
||||
FLOAT _Complex ret;
|
||||
OPENBLAS_COMPLEX_FLOAT ret;
|
||||
OPENBLAS_COMPLEX_FLOAT zero=OPENBLAS_MAKE_COMPLEX_FLOAT(0.0, 0.0);
|
||||
#endif
|
||||
|
||||
PRINT_DEBUG_CNAME;
|
||||
|
||||
if (n <= 0) {
|
||||
#ifdef FORCE_USE_STACK
|
||||
*result = ZERO;
|
||||
//*result = OPENBLAS_MAKE_COMPLEX_FLOAT(0.0, 0.0);
|
||||
CREAL(*result) = 0.0;
|
||||
CIMAG(*result) = 0.0;
|
||||
return;
|
||||
#else
|
||||
return ZERO;
|
||||
return zero;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
@@ -79,6 +79,9 @@ void NAME(char *TRANS, blasint *M, blasint *N,
|
||||
FLOAT *buffer;
|
||||
#ifdef SMP
|
||||
int nthreads;
|
||||
int nthreads_max;
|
||||
int nthreads_avail;
|
||||
double MNK;
|
||||
#endif
|
||||
|
||||
int (*gemv[])(BLASLONG, BLASLONG, BLASLONG, FLOAT, FLOAT, FLOAT *, BLASLONG,
|
||||
@@ -91,14 +94,14 @@ void NAME(char *TRANS, blasint *M, blasint *N,
|
||||
blasint lenx, leny;
|
||||
blasint i;
|
||||
|
||||
PRINT_DEBUG_NAME;
|
||||
|
||||
FLOAT alpha_r = *(ALPHA + 0);
|
||||
FLOAT alpha_i = *(ALPHA + 1);
|
||||
|
||||
FLOAT beta_r = *(BETA + 0);
|
||||
FLOAT beta_i = *(BETA + 1);
|
||||
|
||||
PRINT_DEBUG_NAME;
|
||||
|
||||
TOUPPER(trans);
|
||||
|
||||
info = 0;
|
||||
@@ -145,6 +148,9 @@ void CNAME(enum CBLAS_ORDER order,
|
||||
blasint info, t;
|
||||
#ifdef SMP
|
||||
int nthreads;
|
||||
int nthreads_max;
|
||||
int nthreads_avail;
|
||||
double MNK;
|
||||
#endif
|
||||
|
||||
int (*gemv[])(BLASLONG, BLASLONG, BLASLONG, FLOAT, FLOAT, FLOAT *, BLASLONG,
|
||||
@@ -153,14 +159,14 @@ void CNAME(enum CBLAS_ORDER order,
|
||||
GEMV_O, GEMV_U, GEMV_S, GEMV_D,
|
||||
};
|
||||
|
||||
PRINT_DEBUG_CNAME;
|
||||
|
||||
FLOAT alpha_r = *(ALPHA + 0);
|
||||
FLOAT alpha_i = *(ALPHA + 1);
|
||||
|
||||
FLOAT beta_r = *(BETA + 0);
|
||||
FLOAT beta_i = *(BETA + 1);
|
||||
|
||||
PRINT_DEBUG_CNAME;
|
||||
|
||||
trans = -1;
|
||||
info = 0;
|
||||
|
||||
@@ -234,10 +240,10 @@ void CNAME(enum CBLAS_ORDER order,
|
||||
|
||||
#ifdef SMP
|
||||
|
||||
int nthreads_max = num_cpu_avail(2);
|
||||
int nthreads_avail = nthreads_max;
|
||||
nthreads_max = num_cpu_avail(2);
|
||||
nthreads_avail = nthreads_max;
|
||||
|
||||
double MNK = (double) m * (double) n;
|
||||
MNK = (double) m * (double) n;
|
||||
if ( MNK <= ( 256.0 * (double) (GEMM_MULTITHREAD_THRESHOLD * GEMM_MULTITHREAD_THRESHOLD) ))
|
||||
nthreads_max = 1;
|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user