Compare commits

..

1 Commits

Author SHA1 Message Date
Zhang Xianyi
034ffa93fa Provide iaxpy and cblas_iaxpy for integer vectors. make INTEGER_PRECISION=1 2015-07-01 03:11:27 +08:00
3603 changed files with 31972 additions and 212635 deletions

4
.gitignore vendored
View File

@@ -15,7 +15,6 @@ lapack-netlib/make.inc
lapack-netlib/lapacke/include/lapacke_mangling.h
lapack-netlib/TESTING/testing_results.txt
*.so
*.so.*
*.a
.svn
*~
@@ -66,6 +65,3 @@ test/sblat3
test/zblat1
test/zblat2
test/zblat3
build
build.*
*.swp

View File

@@ -1,13 +1,4 @@
language: c
notifications:
webhooks:
urls:
- https://webhooks.gitter.im/e/8a6e4470a0cebd090344
on_success: change # options: [always|never|change] default: always
on_failure: always # options: [always|never|change] default: always
on_start: never # options: [always|never|change] default: always
compiler:
- gcc
@@ -24,12 +15,7 @@ before_install:
- if [[ "$TARGET_BOX" == "WIN64" ]]; then sudo apt-get install -qq binutils-mingw-w64-x86-64 gcc-mingw-w64-x86-64 gfortran-mingw-w64-x86-64; fi
- if [[ "$TARGET_BOX" == "LINUX32" ]]; then sudo apt-get install -qq gcc-multilib gfortran-multilib; fi
script:
- set -e
- make QUIET_MAKE=1 DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32 $BTYPE
- if [ "$TARGET_BOX" == "LINUX32" ] || [ "$TARGET_BOX" == "LINUX64" ]; then make -C test DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32 $BTYPE; fi
- if [ "$TARGET_BOX" == "LINUX32" ] || [ "$TARGET_BOX" == "LINUX64" ]; then make -C ctest DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32 $BTYPE; fi
- if [ "$TARGET_BOX" == "LINUX32" ] || [ "$TARGET_BOX" == "LINUX64" ]; then make -C utest DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32 $BTYPE; fi
script: make QUIET_MAKE=1 DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32 $BTYPE
# whitelist
branches:

View File

@@ -1,200 +0,0 @@
##
## Author: Hank Anderson <hank@statease.com>
##
cmake_minimum_required(VERSION 2.8.4)
project(OpenBLAS)
set(OpenBLAS_MAJOR_VERSION 0)
set(OpenBLAS_MINOR_VERSION 2)
set(OpenBLAS_PATCH_VERSION 16)
set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}")
enable_language(ASM)
enable_language(C)
if(MSVC)
set(OpenBLAS_LIBNAME libopenblas)
else()
set(OpenBLAS_LIBNAME openblas)
endif()
#######
if(MSVC)
option(BUILD_WITHOUT_LAPACK "Without LAPACK and LAPACKE (Only BLAS or CBLAS)" ON)
endif()
option(BUILD_WITHOUT_CBLAS "Without CBLAS" OFF)
option(BUILD_DEBUG "Build Debug Version" OFF)
#######
if(BUILD_WITHOUT_LAPACK)
set(NO_LAPACK 1)
set(NO_LAPACKE 1)
endif()
if(BUILD_DEBUG)
set(CMAKE_BUILD_TYPE Debug)
else()
set(CMAKE_BUILD_TYPE Release)
endif()
if(BUILD_WITHOUT_CBLAS)
set(NO_CBLAS 1)
endif()
#######
message(WARNING "CMake support is experimental. This will not produce the same Makefiles that OpenBLAS ships with. Only x86 support is currently available.")
include("${CMAKE_SOURCE_DIR}/cmake/utils.cmake")
include("${CMAKE_SOURCE_DIR}/cmake/system.cmake")
set(BLASDIRS interface driver/level2 driver/level3 driver/others)
if (NOT DYNAMIC_ARCH)
list(APPEND BLASDIRS kernel)
endif ()
if (DEFINED SANITY_CHECK)
list(APPEND BLASDIRS reference)
endif ()
set(SUBDIRS ${BLASDIRS})
if (NOT NO_LAPACK)
list(APPEND SUBDIRS lapack)
endif ()
# set which float types we want to build for
if (NOT DEFINED BUILD_SINGLE AND NOT DEFINED BUILD_DOUBLE AND NOT DEFINED BUILD_COMPLEX AND NOT DEFINED BUILD_COMPLEX16)
# if none are defined, build for all
set(BUILD_SINGLE true)
set(BUILD_DOUBLE true)
set(BUILD_COMPLEX true)
set(BUILD_COMPLEX16 true)
endif ()
set(FLOAT_TYPES "")
if (BUILD_SINGLE)
message(STATUS "Building Single Precision")
list(APPEND FLOAT_TYPES "SINGLE") # defines nothing
endif ()
if (BUILD_DOUBLE)
message(STATUS "Building Double Precision")
list(APPEND FLOAT_TYPES "DOUBLE") # defines DOUBLE
endif ()
if (BUILD_COMPLEX)
message(STATUS "Building Complex Precision")
list(APPEND FLOAT_TYPES "COMPLEX") # defines COMPLEX
endif ()
if (BUILD_COMPLEX16)
message(STATUS "Building Double Complex Precision")
list(APPEND FLOAT_TYPES "ZCOMPLEX") # defines COMPLEX and DOUBLE
endif ()
set(SUBDIRS_ALL ${SUBDIRS} test ctest utest exports benchmark ../laswp ../bench)
# all :: libs netlib tests shared
# libs :
if (NOT DEFINED CORE OR "${CORE}" STREQUAL "UNKNOWN")
message(FATAL_ERROR "Detecting CPU failed. Please set TARGET explicitly, e.g. make TARGET=your_cpu_target. Please read README for details.")
endif ()
if (${NO_STATIC} AND ${NO_SHARED})
message(FATAL_ERROR "Neither static nor shared are enabled.")
endif ()
#Set default output directory
set( CMAKE_LIBRARY_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib )
set( CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib )
# get obj vars into format that add_library likes: $<TARGET_OBJS:objlib> (see http://www.cmake.org/cmake/help/v3.0/command/add_library.html)
set(TARGET_OBJS "")
foreach (SUBDIR ${SUBDIRS})
add_subdirectory(${SUBDIR})
string(REPLACE "/" "_" subdir_obj ${SUBDIR})
list(APPEND TARGET_OBJS "$<TARGET_OBJECTS:${subdir_obj}>")
endforeach ()
# netlib:
# Can't just use lapack-netlib's CMake files, since they are set up to search for BLAS, build and install a binary. We just want to build a couple of lib files out of lapack and lapacke.
# Not using add_subdirectory here because lapack-netlib already has its own CMakeLists.txt. Instead include a cmake script with the sources we want.
if (NOT NOFORTRAN AND NOT NO_LAPACK)
include("${CMAKE_SOURCE_DIR}/cmake/lapack.cmake")
if (NOT NO_LAPACKE)
include("${CMAKE_SOURCE_DIR}/cmake/lapacke.cmake")
endif ()
endif ()
#Only generate .def for dll on MSVC
if(MSVC)
set(OpenBLAS_DEF_FILE "${PROJECT_BINARY_DIR}/openblas.def")
endif()
# add objects to the openblas lib
add_library(${OpenBLAS_LIBNAME} SHARED ${LA_SOURCES} ${LAPACKE_SOURCES} ${TARGET_OBJS} ${OpenBLAS_DEF_FILE})
include("${CMAKE_SOURCE_DIR}/cmake/export.cmake")
# Set output for libopenblas
set_target_properties( ${OpenBLAS_LIBNAME} PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib)
foreach (OUTPUTCONFIG ${CMAKE_CONFIGURATION_TYPES})
string( TOUPPER ${OUTPUTCONFIG} OUTPUTCONFIG )
set_target_properties( ${OpenBLAS_LIBNAME} PROPERTIES RUNTIME_OUTPUT_DIRECTORY_${OUTPUTCONFIG} ${PROJECT_BINARY_DIR}/lib)
set_target_properties( ${OpenBLAS_LIBNAME} PROPERTIES LIBRARY_OUTPUT_DIRECTORY_${OUTPUTCONFIG} ${PROJECT_BINARY_DIR}/lib)
set_target_properties( ${OpenBLAS_LIBNAME} PROPERTIES ARCHIVE_OUTPUT_DIRECTORY_${OUTPUTCONFIG} ${PROJECT_BINARY_DIR}/lib)
endforeach()
enable_testing()
add_subdirectory(utest)
if(NOT MSVC)
#only build shared library for MSVC
add_library(${OpenBLAS_LIBNAME}_static STATIC ${LA_SOURCES} ${LAPACKE_SOURCES} ${TARGET_OBJS})
set_target_properties(${OpenBLAS_LIBNAME}_static PROPERTIES OUTPUT_NAME ${OpenBLAS_LIBNAME})
set_target_properties(${OpenBLAS_LIBNAME}_static PROPERTIES CLEAN_DIRECT_OUTPUT 1)
if(SMP)
target_link_libraries(${OpenBLAS_LIBNAME} pthread)
target_link_libraries(${OpenBLAS_LIBNAME}_static pthread)
endif()
#build test and ctest
add_subdirectory(test)
if(NOT NO_CBLAS)
add_subdirectory(ctest)
endif()
endif()
set_target_properties(${OpenBLAS_LIBNAME} PROPERTIES
VERSION ${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}
SOVERSION ${OpenBLAS_MAJOR_VERSION}
)
# TODO: Why is the config saved here? Is this necessary with CMake?
#Save the config files for installation
# @cp Makefile.conf Makefile.conf_last
# @cp config.h config_last.h
#ifdef QUAD_PRECISION
# @echo "#define QUAD_PRECISION">> config_last.h
#endif
#ifeq ($(EXPRECISION), 1)
# @echo "#define EXPRECISION">> config_last.h
#endif
###
#ifeq ($(DYNAMIC_ARCH), 1)
# @$(MAKE) -C kernel commonlibs || exit 1
# @for d in $(DYNAMIC_CORE) ; \
# do $(MAKE) GOTOBLAS_MAKEFILE= -C kernel TARGET_CORE=$$d kernel || exit 1 ;\
# done
# @echo DYNAMIC_ARCH=1 >> Makefile.conf_last
#endif
#ifdef USE_THREAD
# @echo USE_THREAD=$(USE_THREAD) >> Makefile.conf_last
#endif
# @touch lib.grd

View File

@@ -121,31 +121,11 @@ In chronological order:
* [2014-10-10] trmm and sgemm kernels (optimized for APM's X-Gene 1).
ARMv8 support.
* Jerome Robert <jeromerobert@gmx.com>
* [2015-01-01] Speed-up small `ger` and `gemv` using stack allocation (bug #478)
* [2015-12-23] `stack_check` in `gemv.c` (bug #722)
* [2015-12-28] Allow to force the number of parallel make job
* [2015-12-28] Fix detection of AMD E2-3200 detection
* [2015-12-31] Let `make MAX_STACK_ALLOC=0` do what expected
* [2016-01-19] Disable multi-threading in `ger` and `swap` for small matrices (bug #731)
* [2016-01-24] Use `GEMM_MULTITHREAD_THRESHOLD` as a number of ops (bug #742)
* [2016-01-26] Let `openblas_get_num_threads` return the number of active threads (bug #760)
* [2016-01-30] Speed-up small `zger`, `zgemv`, `ztrmv` using stack allocation (bug #727)
* Dan Kortschak
* [2015-01-07] Added test for drotmg bug #484.
* Ton van den Heuvel <https://github.com/ton>
* [2015-03-18] Fix race condition during shutdown causing a crash in gotoblas_set_affinity().
* Martin Koehler <https://github.com/grisuthedragon/>
* [2015-09-07] Improved imatcopy
* Ashwin Sekhar T K <https://github.com/ashwinyes/>
* [2015-11-09] Assembly kernels for Cortex-A57 (ARMv8)
* [2015-11-20] lapack-test fixes for Cortex-A57
* [2016-03-14] Additional functional Assembly Kernels for Cortex-A57
* [2016-03-14] Optimize Dgemm 4x4 for Cortex-A57
* [Your name or handle] <[email or website]>
* [Date] [Brief summary of your changes]

View File

@@ -1,110 +1,4 @@
OpenBLAS ChangeLog
====================================================================
Version 0.2.16
15-Mar-2016
common:
* Avoid potential getenv segfault. (#716)
* Import LAPACK svn bugfix #142-#147,#150-#155
x86/x86_64:
* Optimize c/zgemv for AMD Bulldozer, Piledriver, Steamroller
* Fix bug with scipy linalg test.
ARM:
* Improve DGEMM for ARM Cortex-A57. (Thanks, Ashwin Sekhar T K)
POWER:
* Optimize D and Z BLAS3 functions for Power8.
====================================================================
Version 0.2.16.rc1
23-Feb-2016
common:
* Upgrade LAPACK to 3.6.0 version.
Add BUILD_LAPACK_DEPRECATED option in Makefile.rule to build
LAPACK deprecated functions.
* Add MAKE_NB_JOBS option in Makefile.
Force number of make jobs.This is particularly
useful when using distcc. (#735. Thanks, Jerome Robert.)
* Redesign unit test. Run unit/regression test at every build (Travis-CI and Appveyor).
* Disable multi-threading for small size swap and ger. (#744. Thanks, Jerome Robert)
* Improve small zger, zgemv, ztrmv using stack alloction (#727. Thanks, Jerome Robert)
* Let openblas_get_num_threads return the number of active threads.
(#760. Thanks, Jerome Robert)
* Support illumos(OmniOS). (#749. Thanks, Lauri Tirkkonen)
* Fix LAPACK Dormbr, Dormlq bug. (#711, #713. Thanks, Brendan Tracey)
* Update scipy benchmark script. (#745. Thanks, John Kirkham)
x86/x86_64:
* Optimize trsm kernels for AMD Bulldozer, Piledriver, Steamroller.
* Detect Intel Avoton.
* Detect AMD Trinity, Richland, E2-3200.
* Fix gemv performance bug on Mac OSX Intel Haswell.
* Fix some bugs with CMake and Visual Studio
ARM:
* Support and optimize Cortex-A57 AArch64.
(#686. Thanks, Ashwin Sekhar TK)
* Fix Android build on ARMV7 (#778. Thanks, Paul Mustiere)
* Update ARMV6 kernels.
POWER:
* Fix detection of POWER architecture
(#684. Thanks, Sebastien Villemot)
====================================================================
Version 0.2.15
27-Oct-2015
common:
* Support cmake on x86/x86-64. Natively compiling on MS Visual Studio.
(experimental. Thank Hank Anderson for the initial cmake porting work.)
On Linux and Mac OSX, OpenBLAS cmake supports assembly kernels.
e.g. cmake .
make
make test (Optional)
On Windows MS Visual Studio, OpenBLAS cmake only support C kernels.
(OpenBLAS uses AT&T style assembly, which is not supported by MSVC.)
e.g. cmake -G "Visual Studio 12 Win64" .
Open OpenBLAS.sln and build.
* Enable MAX_STACK_ALLOC flags by default.
Improve ger and gemv for small matrices.
* Improve gemv parallel with small m and large n case.
* Improve ?imatcopy when lda==ldb (#633. Thanks, Martin Koehler)
* Add vecLib benchmarks (#565. Thanks, Andreas Noack.)
* Fix LAPACK lantr for row major matrices (#634. Thanks, Dan Kortschak)
* Fix LAPACKE lansy (#640. Thanks, Dan Kortschak)
* Import bug fixes for LAPACKE s/dormlq, c/zunmlq
* Raise the signal when pthread_create fails (#668. Thanks, James K. Lowden)
* Remove g77 from compiler list.
* Enable AppVeyor Windows CI.
x86/x86-64:
* Support pure C generic kernels for x86/x86-64.
* Support Intel Boardwell and Skylake by Haswell kernels.
* Support AMD Excavator by Steamroller kernels.
* Optimize s/d/c/zdot for Intel SandyBridge and Haswell.
* Optimize s/d/c/zdot for AMD Piledriver and Steamroller.
* Optimize s/d/c/zapxy for Intel SandyBridge and Haswell.
* Optimize s/d/c/zapxy for AMD Piledriver and Steamroller.
* Optimize d/c/zscal for Intel Haswell, dscal for Intel SandyBridge.
* Optimize d/c/zscal for AMD Bulldozer, Piledriver and Steamroller.
* Optimize s/dger for Intel SandyBridge.
* Optimize s/dsymv for Intel SandyBridge.
* Optimize ssymv for Intel Haswell.
* Optimize dgemv for Intel Nehalem and Haswell.
* Optimize dtrmm for Intel Haswell.
ARM:
* Support Android NDK armeabi-v7a-hard ABI (-mfloat-abi=hard)
e.g. make HOSTCC=gcc CC=arm-linux-androideabi-gcc NO_LAPACK=1 TARGET=ARMV7
* Fix lock, rpcc bugs (#616, #617. Thanks, Grazvydas Ignotas)
POWER:
* Support ppc64le platform (ELF ABI v2. #612. Thanks, Matthew Brandyberry.)
* Support POWER7/8 by POWER6 kernels. (#612. Thanks, Fábio Perez.)
====================================================================
Version 0.2.14
24-Mar-2015

View File

@@ -7,6 +7,10 @@ ifneq ($(DYNAMIC_ARCH), 1)
BLASDIRS += kernel
endif
ifdef UTEST_CHECK
SANITY_CHECK = 1
endif
ifdef SANITY_CHECK
BLASDIRS += reference
endif
@@ -81,22 +85,22 @@ endif
shared :
ifndef NO_SHARED
ifeq ($(OSNAME), $(filter $(OSNAME),Linux SunOS))
ifeq ($(OSNAME), Linux)
@$(MAKE) -C exports so
@ln -fs $(LIBSONAME) $(LIBPREFIX).so
@ln -fs $(LIBSONAME) $(LIBPREFIX).so.$(MAJOR_VERSION)
@-ln -fs $(LIBSONAME) $(LIBPREFIX).so
@-ln -fs $(LIBSONAME) $(LIBPREFIX).so.$(MAJOR_VERSION)
endif
ifeq ($(OSNAME), FreeBSD)
@$(MAKE) -C exports so
@ln -fs $(LIBSONAME) $(LIBPREFIX).so
@-ln -fs $(LIBSONAME) $(LIBPREFIX).so
endif
ifeq ($(OSNAME), NetBSD)
@$(MAKE) -C exports so
@ln -fs $(LIBSONAME) $(LIBPREFIX).so
@-ln -fs $(LIBSONAME) $(LIBPREFIX).so
endif
ifeq ($(OSNAME), Darwin)
@$(MAKE) -C exports dyn
@ln -fs $(LIBDYNNAME) $(LIBPREFIX).dylib
@-ln -fs $(LIBDYNNAME) $(LIBPREFIX).dylib
endif
ifeq ($(OSNAME), WINNT)
@$(MAKE) -C exports dll
@@ -113,8 +117,10 @@ ifndef CROSS
touch $(LIBNAME)
ifndef NO_FBLAS
$(MAKE) -C test all
ifdef UTEST_CHECK
$(MAKE) -C utest all
endif
endif
ifndef NO_CBLAS
$(MAKE) -C ctest all
endif
@@ -243,23 +249,16 @@ ifndef NOFORTRAN
-@echo "SUFFIX = $(SUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "PSUFFIX = $(PSUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "CEXTRALIB = $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc
ifeq ($(F_COMPILER), GFORTRAN)
ifeq ($(FC), gfortran)
-@echo "TIMER = INT_ETIME" >> $(NETLIB_LAPACK_DIR)/make.inc
ifdef SMP
ifeq ($(OSNAME), WINNT)
-@echo "LOADER = $(FC)" >> $(NETLIB_LAPACK_DIR)/make.inc
else
-@echo "LOADER = $(FC) -pthread" >> $(NETLIB_LAPACK_DIR)/make.inc
endif
else
-@echo "LOADER = $(FC)" >> $(NETLIB_LAPACK_DIR)/make.inc
endif
else
-@echo "TIMER = NONE" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "LOADER = $(FC)" >> $(NETLIB_LAPACK_DIR)/make.inc
endif
ifeq ($(BUILD_LAPACK_DEPRECATED), 1)
-@echo "BUILD_DEPRECATED = 1" >> $(NETLIB_LAPACK_DIR)/make.inc
endif
-@cat make.inc >> $(NETLIB_LAPACK_DIR)/make.inc
endif
@@ -289,17 +288,7 @@ endif
lapack-test :
(cd $(NETLIB_LAPACK_DIR)/TESTING && rm -f x* *.out)
make -j 1 -C $(NETLIB_LAPACK_DIR)/TESTING xeigtstc xeigtstd xeigtsts xeigtstz xlintstc xlintstd xlintstds xlintstrfd xlintstrfz xlintsts xlintstz xlintstzc xlintstrfs xlintstrfc
ifneq ($(CROSS), 1)
( cd $(NETLIB_LAPACK_DIR)/INSTALL; ./testlsame; ./testslamch; ./testdlamch; \
./testsecond; ./testdsecnd; ./testieee; ./testversion )
(cd $(NETLIB_LAPACK_DIR); ./lapack_testing.py -r )
endif
lapack-runtest:
( cd $(NETLIB_LAPACK_DIR)/INSTALL; ./testlsame; ./testslamch; ./testdlamch; \
./testsecond; ./testdsecnd; ./testieee; ./testversion )
(cd $(NETLIB_LAPACK_DIR); ./lapack_testing.py -r )
blas-test:
(cd $(NETLIB_LAPACK_DIR)/BLAS && rm -f x* *.out)

View File

@@ -11,8 +11,8 @@ endif
ifeq ($(CORE), ARMV7)
ifeq ($(OSNAME), Android)
CCOMMON_OPT += -marm -mfpu=neon -mfloat-abi=hard -march=armv7-a -Wl,--no-warn-mismatch
FCOMMON_OPT += -marm -mfpu=neon -mfloat-abi=hard -march=armv7-a -Wl,--no-warn-mismatch
CCOMMON_OPT += -marm -mfpu=neon -mfloat-abi=hard -march=armv7-a
FCOMMON_OPT += -marm -mfpu=neon -mfloat-abi=hard -march=armv7-a
else
CCOMMON_OPT += -marm -mfpu=vfpv3 -mfloat-abi=hard -march=armv7-a
FCOMMON_OPT += -marm -mfpu=vfpv3 -mfloat-abi=hard -march=armv7-a
@@ -26,6 +26,8 @@ endif
ifeq ($(CORE), ARMV5)
CCOMMON_OPT += -marm -march=armv5
FCOMMON_OPT += -marm -march=armv5
CCOMMON_OPT += -marm -mfpu=vfp -mfloat-abi=hard -march=armv6
FCOMMON_OPT += -marm -mfpu=vfp -mfloat-abi=hard -march=armv6
endif

View File

@@ -4,8 +4,4 @@ CCOMMON_OPT += -march=armv8-a
FCOMMON_OPT += -march=armv8-a
endif
ifeq ($(CORE), CORTEXA57)
CCOMMON_OPT += -march=armv8-a+crc+crypto+fp+simd -mtune=cortex-a57
FCOMMON_OPT += -march=armv8-a+crc+crypto+fp+simd -mtune=cortex-a57
endif

View File

@@ -11,7 +11,6 @@ OPENBLAS_BINARY_DIR := $(PREFIX)/bin
OPENBLAS_BUILD_DIR := $(CURDIR)
OPENBLAS_CMAKE_DIR := $(OPENBLAS_LIBRARY_DIR)/cmake/openblas
OPENBLAS_CMAKE_CONFIG := OpenBLASConfig.cmake
OPENBLAS_CMAKE_CONFIG_VERSION := OpenBLASConfigVersion.cmake
.PHONY : install
.NOTPARALLEL : install
@@ -29,7 +28,7 @@ install : lib.grd
#for inc
@echo \#ifndef OPENBLAS_CONFIG_H > $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h
@echo \#define OPENBLAS_CONFIG_H >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h
@$(AWK) 'NF {print $$1, "OPENBLAS_"$$2, $$3}' config_last.h >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h
@awk 'NF {print $$1, "OPENBLAS_"$$2, $$3}' config_last.h >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h
@echo \#define OPENBLAS_VERSION \" OpenBLAS $(VERSION) \" >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h
@cat openblas_config_template.h >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h
@echo \#endif \/\* OPENBLAS_CONFIG_H \*\/ >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h
@@ -48,10 +47,10 @@ endif
ifndef NO_LAPACKE
@echo Copying LAPACKE header files to $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)
@-install -pm644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke.h
@-install -pm644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke_config.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_config.h
@-install -pm644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke_mangling_with_flags.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_mangling.h
@-install -pm644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke_utils.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_utils.h
@-install -pm644 $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke.h
@-install -pm644 $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke_config.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_config.h
@-install -pm644 $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke_mangling_with_flags.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_mangling.h
@-install -pm644 $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke_utils.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_utils.h
endif
#for install static library
@@ -64,7 +63,7 @@ endif
#for install shared library
ifndef NO_SHARED
@echo Copying the shared library to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
ifeq ($(OSNAME), $(filter $(OSNAME),Linux SunOS))
ifeq ($(OSNAME), Linux)
@install -pm755 $(LIBSONAME) $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
@cd $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) ; \
ln -fs $(LIBSONAME) $(LIBPREFIX).so ; \
@@ -98,7 +97,6 @@ endif
@echo Generating $(OPENBLAS_CMAKE_CONFIG) in $(DESTDIR)$(OPENBLAS_CMAKE_DIR)
@echo "SET(OpenBLAS_VERSION \"${VERSION}\")" > $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)
@echo "SET(OpenBLAS_INCLUDE_DIRS ${OPENBLAS_INCLUDE_DIR})" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)
ifndef NO_SHARED
#ifeq logical or
ifeq ($(OSNAME), $(filter $(OSNAME),Linux FreeBSD NetBSD))
@@ -114,16 +112,5 @@ else
#only static
@echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_LIBRARY_DIR}/$(LIBPREFIX).$(LIBSUFFIX))" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)
endif
#Generating OpenBLASConfigVersion.cmake
@echo Generating $(OPENBLAS_CMAKE_CONFIG_VERSION) in $(DESTDIR)$(OPENBLAS_CMAKE_DIR)
@echo "set (PACKAGE_VERSION \"${VERSION}\")" > $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)
@echo "if (PACKAGE_VERSION VERSION_LESS PACKAGE_FIND_VERSION)" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)
@echo " set (PACKAGE_VERSION_COMPATIBLE FALSE)" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)
@echo "else ()" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)
@echo " set (PACKAGE_VERSION_COMPATIBLE TRUE)" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)
@echo " if (PACKAGE_FIND_VERSION STREQUAL PACKAGE_VERSION)" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)
@echo " set (PACKAGE_VERSION_EXACT TRUE)" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)
@echo " endif ()" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)
@echo "endif ()" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)
@echo Install OK!

View File

@@ -3,7 +3,7 @@
#
# This library's version
VERSION = 0.2.16
VERSION = 0.2.14
# If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a
# and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library
@@ -79,9 +79,6 @@ VERSION = 0.2.16
# If you don't need LAPACKE (C Interface to LAPACK), please comment it in.
# NO_LAPACKE = 1
# Build LAPACK Deprecated functions since LAPACK 3.6.0
# BUILD_LAPACK_DEPRECATED = 1
# If you want to use legacy threaded Level 3 implementation.
# USE_SIMPLE_THREADED_LEVEL3 = 1
@@ -111,16 +108,15 @@ NO_AFFINITY = 1
# Don't use parallel make.
# NO_PARALLEL_MAKE = 1
# Force number of make jobs. The default is the number of logical CPU of the host.
# This is particularly useful when using distcc
# MAKE_NB_JOBS = 2
# If you would like to know minute performance report of GotoBLAS.
# FUNCTION_PROFILE = 1
# Support for IEEE quad precision(it's *real* REAL*16)( under testing)
# QUAD_PRECISION = 1
# Support for integer matrix and vector (e.g. iaxpy)
# INTEGER_PRECISION = 1
# Theads are still working for a while after finishing BLAS operation
# to reduce thread activate/deactivate overhead. You can determine
# time out to improve performance. This number should be from 4 to 30
@@ -145,6 +141,10 @@ NO_AFFINITY = 1
# slow (Not implemented yet).
# SANITY_CHECK = 1
# Run testcases in utest/ . When you enable UTEST_CHECK, it would enable
# SANITY_CHECK to compare the result with reference BLAS.
# UTEST_CHECK = 1
# The installation directory.
# PREFIX = /opt/OpenBLAS
@@ -162,20 +162,16 @@ COMMON_PROF = -pg
# Build Debug version
# DEBUG = 1
# Set maximum stack allocation.
# The default value is 2048. 0 disable stack allocation a may reduce GER and GEMV
# performance. For details, https://github.com/xianyi/OpenBLAS/pull/482
# Improve GEMV and GER for small matrices by stack allocation.
# For details, https://github.com/xianyi/OpenBLAS/pull/482
#
# MAX_STACK_ALLOC = 0
MAX_STACK_ALLOC=2048
# Add a prefix or suffix to all exported symbol names in the shared library.
# Avoid conflicts with other BLAS libraries, especially when using
# 64 bit integer interfaces in OpenBLAS.
# For details, https://github.com/xianyi/OpenBLAS/pull/459
#
# The same prefix and suffix are also added to the library name,
# i.e. you get lib$(SYMBOLPREFIX)openblas$(SYMBOLSUFFIX) rather than libopenblas
#
# SYMBOLPREFIX=
# SYMBOLSUFFIX=

View File

@@ -139,10 +139,6 @@ NO_PARALLEL_MAKE=0
endif
GETARCH_FLAGS += -DNO_PARALLEL_MAKE=$(NO_PARALLEL_MAKE)
ifdef MAKE_NB_JOBS
GETARCH_FLAGS += -DMAKE_NB_JOBS=$(MAKE_NB_JOBS)
endif
ifeq ($(HOSTCC), loongcc)
GETARCH_FLAGS += -static
endif
@@ -296,14 +292,12 @@ endif
ifneq ($(OSNAME), WINNT)
ifneq ($(OSNAME), CYGWIN_NT)
ifneq ($(OSNAME), Interix)
ifneq ($(OSNAME), Android)
ifdef SMP
EXTRALIB += -lpthread
endif
endif
endif
endif
endif
# ifeq logical or
ifeq ($(OSNAME), $(filter $(OSNAME),WINNT CYGWIN_NT Interix))
@@ -315,6 +309,10 @@ CCOMMON_OPT += -DQUAD_PRECISION
NO_EXPRECISION = 1
endif
ifdef INTEGER_PRECISION
CCOMMON_OPT += -DINTEGER_PRECISION
endif
ifneq ($(ARCH), x86)
ifneq ($(ARCH), x86_64)
NO_EXPRECISION = 1
@@ -330,8 +328,7 @@ ifdef SANITY_CHECK
CCOMMON_OPT += -DSANITY_CHECK -DREFNAME=$(*F)f$(BU)
endif
MAX_STACK_ALLOC ?= 2048
ifneq ($(MAX_STACK_ALLOC), 0)
ifdef MAX_STACK_ALLOC
CCOMMON_OPT += -DMAX_STACK_ALLOC=$(MAX_STACK_ALLOC)
endif
@@ -343,11 +340,6 @@ ifeq ($(ARCH), x86)
ifndef BINARY
NO_BINARY_MODE = 1
endif
ifeq ($(CORE), generic)
NO_EXPRECISION = 1
endif
ifndef NO_EXPRECISION
ifeq ($(F_COMPILER), GFORTRAN)
# ifeq logical or. GCC or LSB
@@ -366,11 +358,6 @@ endif
endif
ifeq ($(ARCH), x86_64)
ifeq ($(CORE), generic)
NO_EXPRECISION = 1
endif
ifndef NO_EXPRECISION
ifeq ($(F_COMPILER), GFORTRAN)
# ifeq logical or. GCC or LSB
@@ -381,7 +368,7 @@ FCOMMON_OPT += -m128bit-long-double
endif
ifeq ($(C_COMPILER), CLANG)
EXPRECISION = 1
CCOMMON_OPT += -DEXPRECISION
CCOMMON_OPT += -DEXPRECISION
FCOMMON_OPT += -m128bit-long-double
endif
endif
@@ -395,7 +382,7 @@ endif
ifeq ($(USE_OPENMP), 1)
#check
#check
ifeq ($(USE_THREAD), 0)
$(error OpenBLAS: Cannot set both USE_OPENMP=1 and USE_THREAD=0. The USE_THREAD=0 is only for building single thread version.)
endif
@@ -887,6 +874,12 @@ ifdef USE_SIMPLE_THREADED_LEVEL3
CCOMMON_OPT += -DUSE_SIMPLE_THREADED_LEVEL3
endif
ifndef LIBNAMESUFFIX
LIBPREFIX = libopenblas
else
LIBPREFIX = libopenblas_$(LIBNAMESUFFIX)
endif
ifndef SYMBOLPREFIX
SYMBOLPREFIX =
endif
@@ -895,12 +888,6 @@ ifndef SYMBOLSUFFIX
SYMBOLSUFFIX =
endif
ifndef LIBNAMESUFFIX
LIBPREFIX = lib$(SYMBOLPREFIX)openblas$(SYMBOLSUFFIX)
else
LIBPREFIX = lib$(SYMBOLPREFIX)openblas$(SYMBOLSUFFIX)_$(LIBNAMESUFFIX)
endif
KERNELDIR = $(TOPDIR)/kernel/$(ARCH)
include $(TOPDIR)/Makefile.$(ARCH)
@@ -959,18 +946,17 @@ ifeq ($(OSNAME), SunOS)
TAR = gtar
PATCH = gpatch
GREP = ggrep
AWK = nawk
else
TAR = tar
PATCH = patch
GREP = grep
AWK = awk
endif
ifndef MD5SUM
MD5SUM = md5sum
endif
AWK = awk
REVISION = -r$(VERSION)
MAJOR_VERSION = $(word 1,$(subst ., ,$(VERSION)))
@@ -979,25 +965,16 @@ ifeq ($(DEBUG), 1)
COMMON_OPT += -g
endif
ifeq ($(DEBUG), 1)
FCOMMON_OPT += -g
endif
ifndef COMMON_OPT
COMMON_OPT = -O2
endif
ifndef FCOMMON_OPT
FCOMMON_OPT = -O2 -frecursive
endif
override CFLAGS += $(COMMON_OPT) $(CCOMMON_OPT) -I$(TOPDIR)
override PFLAGS += $(COMMON_OPT) $(CCOMMON_OPT) -I$(TOPDIR) -DPROFILE $(COMMON_PROF)
override FFLAGS += $(FCOMMON_OPT)
override FPFLAGS += $(FCOMMON_OPT) $(COMMON_PROF)
override FFLAGS += $(COMMON_OPT) $(FCOMMON_OPT)
override FPFLAGS += $(COMMON_OPT) $(FCOMMON_OPT) $(COMMON_PROF)
#MAKEOVERRIDES =
#For LAPACK Fortran codes.
@@ -1187,3 +1164,4 @@ SUNPATH = /opt/sunstudio12.1
else
SUNPATH = /opt/SUNWspro
endif

View File

@@ -4,6 +4,7 @@ QBLASOBJS_P = $(QBLASOBJS:.$(SUFFIX)=.$(PSUFFIX))
CBLASOBJS_P = $(CBLASOBJS:.$(SUFFIX)=.$(PSUFFIX))
ZBLASOBJS_P = $(ZBLASOBJS:.$(SUFFIX)=.$(PSUFFIX))
XBLASOBJS_P = $(XBLASOBJS:.$(SUFFIX)=.$(PSUFFIX))
IBLASOBJS_P = $(IBLASOBJS:.$(SUFFIX)=.$(PSUFFIX))
COMMONOBJS_P = $(COMMONOBJS:.$(SUFFIX)=.$(PSUFFIX))
@@ -22,12 +23,18 @@ BLASOBJS += $(QBLASOBJS) $(XBLASOBJS)
BLASOBJS_P += $(QBLASOBJS_P) $(XBLASOBJS_P)
endif
ifdef INTEGER_PRECISION
BLASOBJS += $(IBLASOBJS)
BLASOBJS_P += $(IBLASOBJS_P)
endif
$(SBLASOBJS) $(SBLASOBJS_P) : override CFLAGS += -UDOUBLE -UCOMPLEX
$(DBLASOBJS) $(DBLASOBJS_P) : override CFLAGS += -DDOUBLE -UCOMPLEX
$(QBLASOBJS) $(QBLASOBJS_P) : override CFLAGS += -DXDOUBLE -UCOMPLEX
$(CBLASOBJS) $(CBLASOBJS_P) : override CFLAGS += -UDOUBLE -DCOMPLEX
$(ZBLASOBJS) $(ZBLASOBJS_P) : override CFLAGS += -DDOUBLE -DCOMPLEX
$(XBLASOBJS) $(XBLASOBJS_P) : override CFLAGS += -DXDOUBLE -DCOMPLEX
$(IBLASOBJS) $(IBLASOBJS_P) : override CFLAGS += -DINTEGER -UCOMPLEX
$(SBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
$(DBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
@@ -35,6 +42,7 @@ $(QBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
$(CBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
$(ZBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
$(XBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
$(IBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
libs :: $(BLASOBJS) $(COMMONOBJS)
$(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^

View File

@@ -1,10 +1,7 @@
# OpenBLAS
[![Join the chat at https://gitter.im/xianyi/OpenBLAS](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/xianyi/OpenBLAS?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
[![Build Status](https://travis-ci.org/xianyi/OpenBLAS.png?branch=develop)](https://travis-ci.org/xianyi/OpenBLAS)
Travis CI: [![Build Status](https://travis-ci.org/xianyi/OpenBLAS.png?branch=develop)](https://travis-ci.org/xianyi/OpenBLAS)
AppVeyor: [![Build status](https://ci.appveyor.com/api/projects/status/09sohd35n8nkkx64/branch/develop?svg=true)](https://ci.appveyor.com/project/xianyi/openblas/branch/develop)
## Introduction
OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version.
@@ -75,11 +72,10 @@ Please read GotoBLAS_01Readme.txt
#### ARM64:
- **ARMV8**: Experimental
- **ARM Cortex-A57**: Experimental
### Support OS:
- **GNU/Linux**
- **MingWin or Visual Studio(CMake)/Windows**: Please read <https://github.com/xianyi/OpenBLAS/wiki/How-to-use-OpenBLAS-in-Microsoft-Visual-Studio>.
- **MingWin/Windows**: Please read <https://github.com/xianyi/OpenBLAS/wiki/How-to-use-OpenBLAS-in-Microsoft-Visual-Studio>.
- **Darwin/Mac OS X**: Experimental. Although GotoBLAS2 supports Darwin, we are the beginner on Mac OS X.
- **FreeBSD**: Supported by community. We didn't test the library on this OS.

View File

@@ -44,8 +44,6 @@ NANO
POWER4
POWER5
POWER6
POWER7
POWER8
PPCG4
PPC970
PPC970MP
@@ -74,5 +72,3 @@ ARMV5
7.ARM 64-bit CPU:
ARMV8
CORTEXA57

199
USAGE.md
View File

@@ -1,199 +0,0 @@
# Notes on OpenBLAS usage
## Usage
#### Program is Terminated. Because you tried to allocate too many memory regions
In OpenBLAS, we mange a pool of memory buffers and allocate the number of
buffers as the following.
```
#define NUM_BUFFERS (MAX_CPU_NUMBER * 2)
```
This error indicates that the program exceeded the number of buffers.
Please build OpenBLAS with larger `NUM_THREADS`. For example, `make
NUM_THREADS=32` or `make NUM_THREADS=64`. In `Makefile.system`, we will set
`MAX_CPU_NUMBER=NUM_THREADS`.
#### How can I use OpenBLAS in multi-threaded applications?
If your application is already multi-threaded, it will conflict with OpenBLAS
multi-threading. Thus, you must set OpenBLAS to use single thread in any of the
following ways:
* `export OPENBLAS_NUM_THREADS=1` in the environment variables.
* Call `openblas_set_num_threads(1)` in the application on runtime.
* Build OpenBLAS single thread version, e.g. `make USE_THREAD=0`
If the application is parallelized by OpenMP, please use OpenBLAS built with
`USE_OPENMP=1`
#### How to choose TARGET manually at runtime when compiled with DYNAMIC_ARCH
The environment variable which control the kernel selection is
`OPENBLAS_CORETYPE` (see `driver/others/dynamic.c`) e.g. `export
OPENBLAS_CORETYPE=Haswell` and the function `char* openblas_get_corename()`
returns the used target.
#### How could I disable OpenBLAS threading affinity on runtime?
You can define the `OPENBLAS_MAIN_FREE` or `GOTOBLAS_MAIN_FREE` environment
variable to disable threading affinity on runtime. For example, before the
running,
```
export OPENBLAS_MAIN_FREE=1
```
Alternatively, you can disable affinity feature with enabling `NO_AFFINITY=1`
in `Makefile.rule`.
## Linking with the library
* Link with shared library
`gcc -o test test.c -I /your_path/OpenBLAS/include/ -L/your_path/OpenBLAS/lib -lopenblas`
If the library is multithreaded, please add `-lpthread`. If the library
contains LAPACK functions, please add `-lgfortran` or other Fortran libs.
* Link with static library
`gcc -o test test.c /your/path/libopenblas.a`
You can download `test.c` from https://gist.github.com/xianyi/5780018
On Linux, if OpenBLAS was compiled with threading support (`USE_THREAD=1` by
default), custom programs statically linked against `libopenblas.a` should also
link with the pthread library e.g.:
```
gcc -static -I/opt/OpenBLAS/include -L/opt/OpenBLAS/lib -o my_program my_program.c -lopenblas -lpthread
```
Failing to add the `-lpthread` flag will cause errors such as:
```
/opt/OpenBLAS/libopenblas.a(memory.o): In function `_touch_memory':
memory.c:(.text+0x15): undefined reference to `pthread_mutex_lock'
memory.c:(.text+0x41): undefined reference to `pthread_mutex_unlock'
...
```
## Code examples
#### Call CBLAS interface
This example shows calling cblas_dgemm in C. https://gist.github.com/xianyi/6930656
```
#include <cblas.h>
#include <stdio.h>
void main()
{
int i=0;
double A[6] = {1.0,2.0,1.0,-3.0,4.0,-1.0};
double B[6] = {1.0,2.0,1.0,-3.0,4.0,-1.0};
double C[9] = {.5,.5,.5,.5,.5,.5,.5,.5,.5};
cblas_dgemm(CblasColMajor, CblasNoTrans, CblasTrans,3,3,2,1,A, 3, B, 3,2,C,3);
for(i=0; i<9; i++)
printf("%lf ", C[i]);
printf("\n");
}
```
`gcc -o test_cblas_open test_cblas_dgemm.c -I /your_path/OpenBLAS/include/ -L/your_path/OpenBLAS/lib -lopenblas -lpthread -lgfortran`
#### Call BLAS Fortran interface
This example shows calling dgemm Fortran interface in C. https://gist.github.com/xianyi/5780018
```
#include "stdio.h"
#include "stdlib.h"
#include "sys/time.h"
#include "time.h"
extern void dgemm_(char*, char*, int*, int*,int*, double*, double*, int*, double*, int*, double*, double*, int*);
int main(int argc, char* argv[])
{
int i;
printf("test!\n");
if(argc<4){
printf("Input Error\n");
return 1;
}
int m = atoi(argv[1]);
int n = atoi(argv[2]);
int k = atoi(argv[3]);
int sizeofa = m * k;
int sizeofb = k * n;
int sizeofc = m * n;
char ta = 'N';
char tb = 'N';
double alpha = 1.2;
double beta = 0.001;
struct timeval start,finish;
double duration;
double* A = (double*)malloc(sizeof(double) * sizeofa);
double* B = (double*)malloc(sizeof(double) * sizeofb);
double* C = (double*)malloc(sizeof(double) * sizeofc);
srand((unsigned)time(NULL));
for (i=0; i<sizeofa; i++)
A[i] = i%3+1;//(rand()%100)/10.0;
for (i=0; i<sizeofb; i++)
B[i] = i%3+1;//(rand()%100)/10.0;
for (i=0; i<sizeofc; i++)
C[i] = i%3+1;//(rand()%100)/10.0;
//#if 0
printf("m=%d,n=%d,k=%d,alpha=%lf,beta=%lf,sizeofc=%d\n",m,n,k,alpha,beta,sizeofc);
gettimeofday(&start, NULL);
dgemm_(&ta, &tb, &m, &n, &k, &alpha, A, &m, B, &k, &beta, C, &m);
gettimeofday(&finish, NULL);
duration = ((double)(finish.tv_sec-start.tv_sec)*1000000 + (double)(finish.tv_usec-start.tv_usec)) / 1000000;
double gflops = 2.0 * m *n*k;
gflops = gflops/duration*1.0e-6;
FILE *fp;
fp = fopen("timeDGEMM.txt", "a");
fprintf(fp, "%dx%dx%d\t%lf s\t%lf MFLOPS\n", m, n, k, duration, gflops);
fclose(fp);
free(A);
free(B);
free(C);
return 0;
}
```
` gcc -o time_dgemm time_dgemm.c /your/path/libopenblas.a`
` ./time_dgemm <m> <n> <k> `
## Troubleshooting
* Please read [Faq](https://github.com/xianyi/OpenBLAS/wiki/Faq) at first.
* Please use gcc version 4.6 and above to compile Sandy Bridge AVX kernels on Linux/MingW/BSD.
* Please use Clang version 3.1 and above to compile the library on Sandy Bridge microarchitecture. The Clang 3.0 will generate the wrong AVX binary code.
* The number of CPUs/Cores should less than or equal to 256. On Linux x86_64(amd64), there is experimental support for up to 1024 CPUs/Cores and 128 numa nodes if you build the library with BIGNUMA=1.
* OpenBLAS does not set processor affinity by default. On Linux, you can enable processor affinity by commenting the line NO_AFFINITY=1 in Makefile.rule. But this may cause [the conflict with R parallel](https://stat.ethz.ch/pipermail/r-sig-hpc/2012-April/001348.html).
* On Loongson 3A. make test would be failed because of pthread_create error. The error code is EAGAIN. However, it will be OK when you run the same testcase on shell.
## BLAS reference manual
If you want to understand every BLAS function and definition, please read
[Intel MKL reference manual](https://software.intel.com/sites/products/documentation/doclib/iss/2013/mkl/mklman/GUID-F7ED9FB8-6663-4F44-A62B-61B63C4F0491.htm)
or [netlib.org](http://netlib.org/blas/)
Here are [OpenBLAS extension functions](https://github.com/xianyi/OpenBLAS/wiki/OpenBLAS-Extensions)
## How to reference OpenBLAS.
You can reference our [papers](https://github.com/xianyi/OpenBLAS/wiki/publications).
Alternatively, you can cite the OpenBLAS homepage http://www.openblas.net directly.

View File

@@ -1,44 +0,0 @@
version: 0.2.15.{build}
#environment:
platform:
- x64
configuration: Release
clone_folder: c:\projects\OpenBLAS
init:
- git config --global core.autocrlf input
build:
project: OpenBLAS.sln
clone_depth: 5
#branches to build
branches:
only:
- master
- develop
- cmake
skip_tags: true
matrix:
fast_finish: true
skip_commits:
# Add [av skip] to commit messages
message: /\[av skip\]/
before_build:
- echo Running cmake...
- cd c:\projects\OpenBLAS
- cmake -G "Visual Studio 12 Win64" .
test_script:
- echo Running Test
- cd c:\projects\OpenBLAS\utest
- openblas_utest

View File

@@ -166,8 +166,7 @@ goto :: slinpack.goto dlinpack.goto clinpack.goto zlinpack.goto \
sgeev.goto dgeev.goto cgeev.goto zgeev.goto \
sgetri.goto dgetri.goto cgetri.goto zgetri.goto \
spotrf.goto dpotrf.goto cpotrf.goto zpotrf.goto \
ssymm.goto dsymm.goto csymm.goto zsymm.goto \
smallscaling
ssymm.goto dsymm.goto csymm.goto zsymm.goto
acml :: slinpack.acml dlinpack.acml clinpack.acml zlinpack.acml \
scholesky.acml dcholesky.acml ccholesky.acml zcholesky.acml \
@@ -2133,8 +2132,6 @@ cgemm3m.$(SUFFIX) : gemm3m.c
zgemm3m.$(SUFFIX) : gemm3m.c
$(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^
smallscaling: smallscaling.c ../$(LIBNAME)
$(CC) $(CFLAGS) -o $(@F) $^ $(EXTRALIB) -fopenmp -lm
clean ::
@rm -f *.goto *.mkl *.acml *.atlas *.veclib

View File

@@ -172,7 +172,7 @@ int main(int argc, char *argv[]){
srandom(getpid());
#endif
for(j = 0; j < to; j++){
for(j = 0; j < m; j++){
for(i = 0; i < to * COMPSIZE; i++){
a[i + j * to * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
b[i + j * to * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;

View File

@@ -1,58 +0,0 @@
#!/usr/bin/env python
import os
import sys
import time
import numpy
from numpy import zeros
from numpy.random import randn
from scipy.linalg import blas
def run_dsyrk(N, l):
A = randn(N, N).astype('float64', order='F')
C = zeros((N, N), dtype='float64', order='F')
start = time.time()
for i in range(0, l):
blas.dsyrk(1.0, A, c=C, overwrite_c=True)
end = time.time()
timediff = (end - start)
mflops = (N * N * N) * l / timediff
mflops *= 1e-6
size = "%dx%d" % (N, N)
print("%14s :\t%20f MFlops\t%20f sec" % (size, mflops, timediff))
if __name__ == "__main__":
N = 128
NMAX = 2048
NINC = 128
LOOPS = 1
z = 0
for arg in sys.argv:
if z == 1:
N = int(arg)
elif z == 2:
NMAX = int(arg)
elif z == 3:
NINC = int(arg)
elif z == 4:
LOOPS = int(arg)
z = z + 1
if 'OPENBLAS_LOOPS' in os.environ:
p = os.environ['OPENBLAS_LOOPS']
if p:
LOOPS = int(p)
print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
for i in range(N, NMAX + NINC, NINC):
run_dsyrk(i, LOOPS)

View File

@@ -1,58 +0,0 @@
#!/usr/bin/env python
import os
import sys
import time
import numpy
from numpy import zeros
from numpy.random import randn
from scipy.linalg import blas
def run_ssyrk(N, l):
A = randn(N, N).astype('float32', order='F')
C = zeros((N, N), dtype='float32', order='F')
start = time.time()
for i in range(0, l):
blas.ssyrk(1.0, A, c=C, overwrite_c=True)
end = time.time()
timediff = (end - start)
mflops = (N * N * N) * l / timediff
mflops *= 1e-6
size = "%dx%d" % (N, N)
print("%14s :\t%20f MFlops\t%20f sec" % (size, mflops, timediff))
if __name__ == "__main__":
N = 128
NMAX = 2048
NINC = 128
LOOPS = 1
z = 0
for arg in sys.argv:
if z == 1:
N = int(arg)
elif z == 2:
NMAX = int(arg)
elif z == 3:
NINC = int(arg)
elif z == 4:
LOOPS = int(arg)
z = z + 1
if 'OPENBLAS_LOOPS' in os.environ:
p = os.environ['OPENBLAS_LOOPS']
if p:
LOOPS = int(p)
print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
for i in range(N, NMAX + NINC, NINC):
run_ssyrk(i, LOOPS)

View File

@@ -1,196 +0,0 @@
// run with OPENBLAS_NUM_THREADS=1 and OMP_NUM_THREADS=n
#include <math.h>
#include <stdlib.h>
#include <stdio.h>
#include <time.h>
#include <cblas.h>
#include <omp.h>
#define MIN_SIZE 5
#define MAX_SIZE 60
#define NB_SIZE 10
// number of loop for a 1x1 matrix. Lower it if the test is
// too slow on you computer.
#define NLOOP 2e7
typedef struct {
int matrix_size;
int n_loop;
void (* bench_func)();
void (* blas_func)();
void * (* create_matrix)(int size);
} BenchParam;
void * s_create_matrix(int size) {
float * r = malloc(size * sizeof(double));
int i;
for(i = 0; i < size; i++)
r[i] = 1e3 * i / size;
return r;
}
void * c_create_matrix(int size) {
float * r = malloc(size * 2 * sizeof(double));
int i;
for(i = 0; i < 2 * size; i++)
r[i] = 1e3 * i / size;
return r;
}
void * z_create_matrix(int size) {
double * r = malloc(size * 2 * sizeof(double));
int i;
for(i = 0; i < 2 * size; i++)
r[i] = 1e3 * i / size;
return r;
}
void * d_create_matrix(int size) {
double * r = malloc(size * sizeof(double));
int i;
for(i = 0; i < size; i++)
r[i] = 1e3 * i / size;
return r;
}
void trmv_bench(BenchParam * param)
{
int i, n;
int size = param->matrix_size;
n = param->n_loop / size;
int one = 1;
void * A = param->create_matrix(size * size);
void * y = param->create_matrix(size);
for(i = 0; i < n; i++) {
param->blas_func("U", "N", "N", &size, A, &size, y, &one);
}
free(A);
free(y);
}
void gemv_bench(BenchParam * param)
{
int i, n;
int size = param->matrix_size;
n = param->n_loop / size;
double v = 1.01;
int one = 1;
void * A = param->create_matrix(size * size);
void * y = param->create_matrix(size);
for(i = 0; i < n; i++) {
param->blas_func("N", &size, &size, &v, A, &size, y, &one, &v, y, &one);
}
free(A);
free(y);
}
void ger_bench(BenchParam * param) {
int i, n;
int size = param->matrix_size;
n = param->n_loop / size;
double v = 1.01;
int one = 1;
void * A = param->create_matrix(size * size);
void * y = param->create_matrix(size);
for(i = 0; i < n; i++) {
param->blas_func(&size, &size, &v, y, &one, y, &one, A, &size);
}
free(A);
free(y);
}
#ifndef _WIN32
void * pthread_func_wrapper(void * param) {
((BenchParam *)param)->bench_func(param);
pthread_exit(NULL);
}
#endif
#define NB_TESTS 5
void * TESTS[4 * NB_TESTS] = {
trmv_bench, ztrmv_, z_create_matrix, "ztrmv",
gemv_bench, dgemv_, d_create_matrix, "dgemv",
gemv_bench, zgemv_, z_create_matrix, "zgemv",
ger_bench, dger_, d_create_matrix, "dger",
ger_bench, zgerc_, z_create_matrix, "zgerc",
};
inline static double delta_time(struct timespec tick) {
struct timespec tock;
clock_gettime(CLOCK_MONOTONIC, &tock);
return (tock.tv_sec - tick.tv_sec) + (tock.tv_nsec - tick.tv_nsec) / 1e9;
}
double pthread_bench(BenchParam * param, int nb_threads)
{
#ifdef _WIN32
return 0;
#else
BenchParam threaded_param = *param;
pthread_t threads[nb_threads];
int t, rc;
struct timespec tick;
threaded_param.n_loop /= nb_threads;
clock_gettime(CLOCK_MONOTONIC, &tick);
for(t=0; t<nb_threads; t++){
rc = pthread_create(&threads[t], NULL, pthread_func_wrapper, &threaded_param);
if (rc){
printf("ERROR; return code from pthread_create() is %d\n", rc);
exit(-1);
}
}
for(t=0; t<nb_threads; t++){
pthread_join(threads[t], NULL);
}
return delta_time(tick);
#endif
}
double seq_bench(BenchParam * param) {
struct timespec tick;
clock_gettime(CLOCK_MONOTONIC, &tick);
param->bench_func(param);
return delta_time(tick);
}
double omp_bench(BenchParam * param) {
BenchParam threaded_param = *param;
struct timespec tick;
int t;
int nb_threads = omp_get_max_threads();
threaded_param.n_loop /= nb_threads;
clock_gettime(CLOCK_MONOTONIC, &tick);
#pragma omp parallel for
for(t = 0; t < nb_threads; t ++){
param->bench_func(&threaded_param);
}
return delta_time(tick);
}
int main(int argc, char * argv[]) {
double inc_factor = exp(log((double)MAX_SIZE / MIN_SIZE) / NB_SIZE);
BenchParam param;
int test_id;
printf ("Running on %d threads\n", omp_get_max_threads());
for(test_id = 0; test_id < NB_TESTS; test_id ++) {
double size = MIN_SIZE;
param.bench_func = TESTS[test_id * 4];
param.blas_func = TESTS[test_id * 4 + 1];
param.create_matrix = TESTS[test_id * 4 + 2];
printf("\nBenchmark of %s\n", (char*)TESTS[test_id * 4 + 3]);
param.n_loop = NLOOP;
while(size <= MAX_SIZE) {
param.matrix_size = (int)(size + 0.5);
double seq_time = seq_bench(&param);
double omp_time = omp_bench(&param);
double pthread_time = pthread_bench(&param, omp_get_max_threads());
printf("matrix size %d, sequential %gs, openmp %gs, speedup %g, "
"pthread %gs, speedup %g\n",
param.matrix_size, seq_time,
omp_time, seq_time / omp_time,
pthread_time, seq_time / pthread_time);
size *= inc_factor;
}
}
return(0);
}

View File

@@ -6,7 +6,6 @@ $hostarch = `uname -m | sed -e s/i.86/x86/`;chop($hostarch);
$hostarch = "x86_64" if ($hostarch eq "amd64");
$hostarch = "arm" if ($hostarch =~ /^arm.*/);
$hostarch = "arm64" if ($hostarch eq "aarch64");
$hostarch = "power" if ($hostarch =~ /^(powerpc|ppc).*/);
$binary = $ENV{"BINARY"};
@@ -31,7 +30,7 @@ if ($ARGV[0] =~ /(.*)(-[.\d]+)/) {
$cross_suffix = $1;
}
} else {
if ($ARGV[0] =~ /([^\/]*-)([^\/]*$)/) {
if ($ARGV[0] =~ /(.*-)(.*)/) {
$cross_suffix = $1;
}
}

350
cblas_noconst.h Normal file
View File

@@ -0,0 +1,350 @@
#ifndef CBLAS_H
#define CBLAS_H
#include <stddef.h>
#include "common.h"
#ifdef __cplusplus
extern "C" {
/* Assume C declarations for C++ */
#endif /* __cplusplus */
/*Set the number of threads on runtime.*/
void openblas_set_num_threads(int num_threads);
void goto_set_num_threads(int num_threads);
/*Get the number of threads on runtime.*/
int openblas_get_num_threads(void);
/*Get the number of physical processors (cores).*/
int openblas_get_num_procs(void);
/*Get the build configure on runtime.*/
char* openblas_get_config(void);
/* Get the parallelization type which is used by OpenBLAS */
int openblas_get_parallel(void);
/* OpenBLAS is compiled for sequential use */
#define OPENBLAS_SEQUENTIAL 0
/* OpenBLAS is compiled using normal threading model */
#define OPENBLAS_THREAD 1
/* OpenBLAS is compiled using OpenMP threading model */
#define OPENBLAS_OPENMP 2
#define CBLAS_INDEX size_t
typedef enum CBLAS_ORDER {CblasRowMajor=101, CblasColMajor=102} CBLAS_ORDER;
typedef enum CBLAS_TRANSPOSE {CblasNoTrans=111, CblasTrans=112, CblasConjTrans=113, CblasConjNoTrans=114} CBLAS_TRANSPOSE;
typedef enum CBLAS_UPLO {CblasUpper=121, CblasLower=122} CBLAS_UPLO;
typedef enum CBLAS_DIAG {CblasNonUnit=131, CblasUnit=132} CBLAS_DIAG;
typedef enum CBLAS_SIDE {CblasLeft=141, CblasRight=142} CBLAS_SIDE;
float cblas_sdsdot(blasint n, float alpha, float *x, blasint incx, float *y, blasint incy);
double cblas_dsdot (blasint n, float *x, blasint incx, float *y, blasint incy);
float cblas_sdot(blasint n, float *x, blasint incx, float *y, blasint incy);
double cblas_ddot(blasint n, double *x, blasint incx, double *y, blasint incy);
openblas_complex_float cblas_cdotu(blasint n, float *x, blasint incx, float *y, blasint incy);
openblas_complex_float cblas_cdotc(blasint n, float *x, blasint incx, float *y, blasint incy);
openblas_complex_double cblas_zdotu(blasint n, double *x, blasint incx, double *y, blasint incy);
openblas_complex_double cblas_zdotc(blasint n, double *x, blasint incx, double *y, blasint incy);
void cblas_cdotu_sub(blasint n, float *x, blasint incx, float *y, blasint incy, openblas_complex_float *ret);
void cblas_cdotc_sub(blasint n, float *x, blasint incx, float *y, blasint incy, openblas_complex_float *ret);
void cblas_zdotu_sub(blasint n, double *x, blasint incx, double *y, blasint incy, openblas_complex_double *ret);
void cblas_zdotc_sub(blasint n, double *x, blasint incx, double *y, blasint incy, openblas_complex_double *ret);
float cblas_sasum (blasint n, float *x, blasint incx);
double cblas_dasum (blasint n, double *x, blasint incx);
float cblas_scasum(blasint n, float *x, blasint incx);
double cblas_dzasum(blasint n, double *x, blasint incx);
float cblas_snrm2 (blasint N, float *X, blasint incX);
double cblas_dnrm2 (blasint N, double *X, blasint incX);
float cblas_scnrm2(blasint N, float *X, blasint incX);
double cblas_dznrm2(blasint N, double *X, blasint incX);
CBLAS_INDEX cblas_isamax(blasint n, float *x, blasint incx);
CBLAS_INDEX cblas_idamax(blasint n, double *x, blasint incx);
CBLAS_INDEX cblas_icamax(blasint n, float *x, blasint incx);
CBLAS_INDEX cblas_izamax(blasint n, double *x, blasint incx);
void cblas_saxpy(blasint n, float alpha, float *x, blasint incx, float *y, blasint incy);
void cblas_daxpy(blasint n, double alpha, double *x, blasint incx, double *y, blasint incy);
void cblas_caxpy(blasint n, float *alpha, float *x, blasint incx, float *y, blasint incy);
void cblas_zaxpy(blasint n, double *alpha, double *x, blasint incx, double *y, blasint incy);
void cblas_scopy(blasint n, float *x, blasint incx, float *y, blasint incy);
void cblas_dcopy(blasint n, double *x, blasint incx, double *y, blasint incy);
void cblas_ccopy(blasint n, float *x, blasint incx, float *y, blasint incy);
void cblas_zcopy(blasint n, double *x, blasint incx, double *y, blasint incy);
void cblas_sswap(blasint n, float *x, blasint incx, float *y, blasint incy);
void cblas_dswap(blasint n, double *x, blasint incx, double *y, blasint incy);
void cblas_cswap(blasint n, float *x, blasint incx, float *y, blasint incy);
void cblas_zswap(blasint n, double *x, blasint incx, double *y, blasint incy);
void cblas_srot(blasint N, float *X, blasint incX, float *Y, blasint incY, float c, float s);
void cblas_drot(blasint N, double *X, blasint incX, double *Y, blasint incY, double c, double s);
void cblas_srotg(float *a, float *b, float *c, float *s);
void cblas_drotg(double *a, double *b, double *c, double *s);
void cblas_srotm(blasint N, float *X, blasint incX, float *Y, blasint incY, float *P);
void cblas_drotm(blasint N, double *X, blasint incX, double *Y, blasint incY, double *P);
void cblas_srotmg(float *d1, float *d2, float *b1, float b2, float *P);
void cblas_drotmg(double *d1, double *d2, double *b1, double b2, double *P);
void cblas_sscal(blasint N, float alpha, float *X, blasint incX);
void cblas_dscal(blasint N, double alpha, double *X, blasint incX);
void cblas_cscal(blasint N, float *alpha, float *X, blasint incX);
void cblas_zscal(blasint N, double *alpha, double *X, blasint incX);
void cblas_csscal(blasint N, float alpha, float *X, blasint incX);
void cblas_zdscal(blasint N, double alpha, double *X, blasint incX);
void cblas_sgemv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE trans, blasint m, blasint n,
float alpha, float *a, blasint lda, float *x, blasint incx, float beta, float *y, blasint incy);
void cblas_dgemv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE trans, blasint m, blasint n,
double alpha, double *a, blasint lda, double *x, blasint incx, double beta, double *y, blasint incy);
void cblas_cgemv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE trans, blasint m, blasint n,
float *alpha, float *a, blasint lda, float *x, blasint incx, float *beta, float *y, blasint incy);
void cblas_zgemv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE trans, blasint m, blasint n,
double *alpha, double *a, blasint lda, double *x, blasint incx, double *beta, double *y, blasint incy);
void cblas_sger (enum CBLAS_ORDER order, blasint M, blasint N, float alpha, float *X, blasint incX, float *Y, blasint incY, float *A, blasint lda);
void cblas_dger (enum CBLAS_ORDER order, blasint M, blasint N, double alpha, double *X, blasint incX, double *Y, blasint incY, double *A, blasint lda);
void cblas_cgeru(enum CBLAS_ORDER order, blasint M, blasint N, float *alpha, float *X, blasint incX, float *Y, blasint incY, float *A, blasint lda);
void cblas_cgerc(enum CBLAS_ORDER order, blasint M, blasint N, float *alpha, float *X, blasint incX, float *Y, blasint incY, float *A, blasint lda);
void cblas_zgeru(enum CBLAS_ORDER order, blasint M, blasint N, double *alpha, double *X, blasint incX, double *Y, blasint incY, double *A, blasint lda);
void cblas_zgerc(enum CBLAS_ORDER order, blasint M, blasint N, double *alpha, double *X, blasint incX, double *Y, blasint incY, double *A, blasint lda);
void cblas_strsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, float *A, blasint lda, float *X, blasint incX);
void cblas_dtrsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, double *A, blasint lda, double *X, blasint incX);
void cblas_ctrsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, float *A, blasint lda, float *X, blasint incX);
void cblas_ztrsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, double *A, blasint lda, double *X, blasint incX);
void cblas_strmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, float *A, blasint lda, float *X, blasint incX);
void cblas_dtrmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, double *A, blasint lda, double *X, blasint incX);
void cblas_ctrmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, float *A, blasint lda, float *X, blasint incX);
void cblas_ztrmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, double *A, blasint lda, double *X, blasint incX);
void cblas_ssyr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float alpha, float *X, blasint incX, float *A, blasint lda);
void cblas_dsyr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *X, blasint incX, double *A, blasint lda);
void cblas_cher(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float alpha, float *X, blasint incX, float *A, blasint lda);
void cblas_zher(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *X, blasint incX, double *A, blasint lda);
void cblas_ssyr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,blasint N, float alpha, float *X,
blasint incX, float *Y, blasint incY, float *A, blasint lda);
void cblas_dsyr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *X,
blasint incX, double *Y, blasint incY, double *A, blasint lda);
void cblas_cher2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float *alpha, float *X, blasint incX,
float *Y, blasint incY, float *A, blasint lda);
void cblas_zher2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double *alpha, double *X, blasint incX,
double *Y, blasint incY, double *A, blasint lda);
void cblas_sgbmv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, blasint M, blasint N,
blasint KL, blasint KU, float alpha, float *A, blasint lda, float *X, blasint incX, float beta, float *Y, blasint incY);
void cblas_dgbmv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, blasint M, blasint N,
blasint KL, blasint KU, double alpha, double *A, blasint lda, double *X, blasint incX, double beta, double *Y, blasint incY);
void cblas_cgbmv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, blasint M, blasint N,
blasint KL, blasint KU, float *alpha, float *A, blasint lda, float *X, blasint incX, float *beta, float *Y, blasint incY);
void cblas_zgbmv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, blasint M, blasint N,
blasint KL, blasint KU, double *alpha, double *A, blasint lda, double *X, blasint incX, double *beta, double *Y, blasint incY);
void cblas_ssbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, blasint K, float alpha, float *A,
blasint lda, float *X, blasint incX, float beta, float *Y, blasint incY);
void cblas_dsbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, blasint K, double alpha, double *A,
blasint lda, double *X, blasint incX, double beta, double *Y, blasint incY);
void cblas_stbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
blasint N, blasint K, float *A, blasint lda, float *X, blasint incX);
void cblas_dtbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
blasint N, blasint K, double *A, blasint lda, double *X, blasint incX);
void cblas_ctbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
blasint N, blasint K, float *A, blasint lda, float *X, blasint incX);
void cblas_ztbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
blasint N, blasint K, double *A, blasint lda, double *X, blasint incX);
void cblas_stbsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
blasint N, blasint K, float *A, blasint lda, float *X, blasint incX);
void cblas_dtbsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
blasint N, blasint K, double *A, blasint lda, double *X, blasint incX);
void cblas_ctbsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
blasint N, blasint K, float *A, blasint lda, float *X, blasint incX);
void cblas_ztbsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
blasint N, blasint K, double *A, blasint lda, double *X, blasint incX);
void cblas_stpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
blasint N, float *Ap, float *X, blasint incX);
void cblas_dtpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
blasint N, double *Ap, double *X, blasint incX);
void cblas_ctpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
blasint N, float *Ap, float *X, blasint incX);
void cblas_ztpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
blasint N, double *Ap, double *X, blasint incX);
void cblas_stpsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
blasint N, float *Ap, float *X, blasint incX);
void cblas_dtpsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
blasint N, double *Ap, double *X, blasint incX);
void cblas_ctpsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
blasint N, float *Ap, float *X, blasint incX);
void cblas_ztpsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
blasint N, double *Ap, double *X, blasint incX);
void cblas_ssymv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float alpha, float *A,
blasint lda, float *X, blasint incX, float beta, float *Y, blasint incY);
void cblas_dsymv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *A,
blasint lda, double *X, blasint incX, double beta, double *Y, blasint incY);
void cblas_chemv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float *alpha, float *A,
blasint lda, float *X, blasint incX, float *beta, float *Y, blasint incY);
void cblas_zhemv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double *alpha, double *A,
blasint lda, double *X, blasint incX, double *beta, double *Y, blasint incY);
void cblas_sspmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float alpha, float *Ap,
float *X, blasint incX, float beta, float *Y, blasint incY);
void cblas_dspmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *Ap,
double *X, blasint incX, double beta, double *Y, blasint incY);
void cblas_sspr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float alpha, float *X, blasint incX, float *Ap);
void cblas_dspr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *X, blasint incX, double *Ap);
void cblas_chpr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float alpha, float *X, blasint incX, float *A);
void cblas_zhpr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *X,blasint incX, double *A);
void cblas_sspr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float alpha, float *X, blasint incX, float *Y, blasint incY, float *A);
void cblas_dspr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *X, blasint incX, double *Y, blasint incY, double *A);
void cblas_chpr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float *alpha, float *X, blasint incX, float *Y, blasint incY, float *Ap);
void cblas_zhpr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double *alpha, double *X, blasint incX, double *Y, blasint incY, double *Ap);
void cblas_chbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, blasint K,
float *alpha, float *A, blasint lda, float *X, blasint incX, float *beta, float *Y, blasint incY);
void cblas_zhbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, blasint K,
double *alpha, double *A, blasint lda, double *X, blasint incX, double *beta, double *Y, blasint incY);
void cblas_chpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N,
float *alpha, float *Ap, float *X, blasint incX, float *beta, float *Y, blasint incY);
void cblas_zhpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N,
double *alpha, double *Ap, double *X, blasint incX, double *beta, double *Y, blasint incY);
void cblas_sgemm(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, blasint M, blasint N, blasint K,
float alpha, float *A, blasint lda, float *B, blasint ldb, float beta, float *C, blasint ldc);
void cblas_dgemm(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, blasint M, blasint N, blasint K,
double alpha, double *A, blasint lda, double *B, blasint ldb, double beta, double *C, blasint ldc);
void cblas_cgemm(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, blasint M, blasint N, blasint K,
float *alpha, float *A, blasint lda, float *B, blasint ldb, float *beta, float *C, blasint ldc);
void cblas_cgemm3m(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, blasint M, blasint N, blasint K,
float *alpha, float *A, blasint lda, float *B, blasint ldb, float *beta, float *C, blasint ldc);
void cblas_zgemm(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, blasint M, blasint N, blasint K,
double *alpha, double *A, blasint lda, double *B, blasint ldb, double *beta, double *C, blasint ldc);
void cblas_zgemm3m(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, blasint M, blasint N, blasint K,
double *alpha, double *A, blasint lda, double *B, blasint ldb, double *beta, double *C, blasint ldc);
void cblas_ssymm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, blasint M, blasint N,
float alpha, float *A, blasint lda, float *B, blasint ldb, float beta, float *C, blasint ldc);
void cblas_dsymm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, blasint M, blasint N,
double alpha, double *A, blasint lda, double *B, blasint ldb, double beta, double *C, blasint ldc);
void cblas_csymm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, blasint M, blasint N,
float *alpha, float *A, blasint lda, float *B, blasint ldb, float *beta, float *C, blasint ldc);
void cblas_zsymm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, blasint M, blasint N,
double *alpha, double *A, blasint lda, double *B, blasint ldb, double *beta, double *C, blasint ldc);
void cblas_ssyrk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans,
blasint N, blasint K, float alpha, float *A, blasint lda, float beta, float *C, blasint ldc);
void cblas_dsyrk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans,
blasint N, blasint K, double alpha, double *A, blasint lda, double beta, double *C, blasint ldc);
void cblas_csyrk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans,
blasint N, blasint K, float *alpha, float *A, blasint lda, float *beta, float *C, blasint ldc);
void cblas_zsyrk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans,
blasint N, blasint K, double *alpha, double *A, blasint lda, double *beta, double *C, blasint ldc);
void cblas_ssyr2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans,
blasint N, blasint K, float alpha, float *A, blasint lda, float *B, blasint ldb, float beta, float *C, blasint ldc);
void cblas_dsyr2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans,
blasint N, blasint K, double alpha, double *A, blasint lda, double *B, blasint ldb, double beta, double *C, blasint ldc);
void cblas_csyr2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans,
blasint N, blasint K, float *alpha, float *A, blasint lda, float *B, blasint ldb, float *beta, float *C, blasint ldc);
void cblas_zsyr2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans,
blasint N, blasint K, double *alpha, double *A, blasint lda, double *B, blasint ldb, double *beta, double *C, blasint ldc);
void cblas_strmm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA,
enum CBLAS_DIAG Diag, blasint M, blasint N, float alpha, float *A, blasint lda, float *B, blasint ldb);
void cblas_dtrmm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA,
enum CBLAS_DIAG Diag, blasint M, blasint N, double alpha, double *A, blasint lda, double *B, blasint ldb);
void cblas_ctrmm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA,
enum CBLAS_DIAG Diag, blasint M, blasint N, float *alpha, float *A, blasint lda, float *B, blasint ldb);
void cblas_ztrmm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA,
enum CBLAS_DIAG Diag, blasint M, blasint N, double *alpha, double *A, blasint lda, double *B, blasint ldb);
void cblas_strsm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA,
enum CBLAS_DIAG Diag, blasint M, blasint N, float alpha, float *A, blasint lda, float *B, blasint ldb);
void cblas_dtrsm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA,
enum CBLAS_DIAG Diag, blasint M, blasint N, double alpha, double *A, blasint lda, double *B, blasint ldb);
void cblas_ctrsm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA,
enum CBLAS_DIAG Diag, blasint M, blasint N, float *alpha, float *A, blasint lda, float *B, blasint ldb);
void cblas_ztrsm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA,
enum CBLAS_DIAG Diag, blasint M, blasint N, double *alpha, double *A, blasint lda, double *B, blasint ldb);
void cblas_chemm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, blasint M, blasint N,
float *alpha, float *A, blasint lda, float *B, blasint ldb, float *beta, float *C, blasint ldc);
void cblas_zhemm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, blasint M, blasint N,
double *alpha, double *A, blasint lda, double *B, blasint ldb, double *beta, double *C, blasint ldc);
void cblas_cherk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans, blasint N, blasint K,
float alpha, float *A, blasint lda, float beta, float *C, blasint ldc);
void cblas_zherk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans, blasint N, blasint K,
double alpha, double *A, blasint lda, double beta, double *C, blasint ldc);
void cblas_cher2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans, blasint N, blasint K,
float *alpha, float *A, blasint lda, float *B, blasint ldb, float beta, float *C, blasint ldc);
void cblas_zher2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans, blasint N, blasint K,
double *alpha, double *A, blasint lda, double *B, blasint ldb, double beta, double *C, blasint ldc);
void cblas_xerbla(blasint p, char *rout, char *form, ...);
/*** BLAS extensions ***/
void cblas_saxpby(blasint n, float alpha, float *x, blasint incx,float beta, float *y, blasint incy);
void cblas_daxpby(blasint n, double alpha, double *x, blasint incx,double beta, double *y, blasint incy);
void cblas_caxpby(blasint n, float *alpha, float *x, blasint incx,float *beta, float *y, blasint incy);
void cblas_zaxpby(blasint n, double *alpha, double *x, blasint incx,double *beta, double *y, blasint incy);
void cblas_somatcopy( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows, blasint ccols, float calpha, float *a,
blasint clda, float *b, blasint cldb);
void cblas_domatcopy( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows, blasint ccols, double calpha, double *a,
blasint clda, double *b, blasint cldb);
void cblas_comatcopy( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows, blasint ccols, void* calpha, void* a,
blasint clda, void *b, blasint cldb);
void cblas_zomatcopy( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows, blasint ccols, void* calpha, void* a,
blasint clda, void *b, blasint cldb);
void cblas_simatcopy( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows, blasint ccols, float calpha, float *a,
blasint clda, blasint cldb);
void cblas_dimatcopy( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows, blasint ccols, double calpha, double *a,
blasint clda, blasint cldb);
void cblas_cimatcopy( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows, blasint ccols, float* calpha, float* a,
blasint clda, blasint cldb);
void cblas_zimatcopy( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows, blasint ccols, double* calpha, double* a,
blasint clda, blasint cldb);
void cblas_sgeadd( enum CBLAS_ORDER CORDER, blasint crows, blasint ccols, float calpha, float *a, blasint clda, float cbeta,
float *c, blasint cldc);
void cblas_dgeadd( enum CBLAS_ORDER CORDER, blasint crows, blasint ccols, double calpha, double *a, blasint clda, double cbeta,
double *c, blasint cldc);
void cblas_cgeadd( enum CBLAS_ORDER CORDER, blasint crows, blasint ccols, float *calpha, float *a, blasint clda, float *cbeta,
float *c, blasint cldc);
void cblas_zgeadd( enum CBLAS_ORDER CORDER, blasint crows, blasint ccols, double *calpha, double *a, blasint clda, double *cbeta,
double *c, blasint cldc);
#ifdef __cplusplus
}
#endif /* __cplusplus */
#endif

View File

@@ -1,115 +0,0 @@
##
## Author: Hank Anderson <hank@statease.com>
## Description: Ported from portion of OpenBLAS/Makefile.system
## Sets various variables based on architecture.
if (${ARCH} STREQUAL "x86" OR ${ARCH} STREQUAL "x86_64")
if (${ARCH} STREQUAL "x86")
if (NOT BINARY)
set(NO_BINARY_MODE 1)
endif ()
endif ()
if (NOT NO_EXPRECISION)
if (${F_COMPILER} MATCHES "GFORTRAN")
# N.B. I'm not sure if CMake differentiates between GCC and LSB -hpa
if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU" OR ${CMAKE_C_COMPILER_ID} STREQUAL "LSB")
set(EXPRECISION 1)
set(CCOMMON_OPT "${CCOMMON_OPT} -DEXPRECISION -m128bit-long-double")
set(FCOMMON_OPT "${FCOMMON_OPT} -m128bit-long-double")
endif ()
if (${CMAKE_C_COMPILER_ID} STREQUAL "Clang")
set(EXPRECISION 1)
set(CCOMMON_OPT "${CCOMMON_OPT} -DEXPRECISION")
set(FCOMMON_OPT "${FCOMMON_OPT} -m128bit-long-double")
endif ()
endif ()
endif ()
endif ()
if (${CMAKE_C_COMPILER_ID} STREQUAL "Intel")
set(CCOMMON_OPT "${CCOMMON_OPT} -wd981")
endif ()
if (USE_OPENMP)
if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU" OR ${CMAKE_C_COMPILER_ID} STREQUAL "LSB")
set(CCOMMON_OPT "${CCOMMON_OPT} -fopenmp")
endif ()
if (${CMAKE_C_COMPILER_ID} STREQUAL "Clang")
message(WARNING "Clang doesn't support OpenMP yet.")
set(CCOMMON_OPT "${CCOMMON_OPT} -fopenmp")
endif ()
if (${CMAKE_C_COMPILER_ID} STREQUAL "Intel")
set(CCOMMON_OPT "${CCOMMON_OPT} -openmp")
endif ()
if (${CMAKE_C_COMPILER_ID} STREQUAL "PGI")
set(CCOMMON_OPT "${CCOMMON_OPT} -mp")
endif ()
if (${CMAKE_C_COMPILER_ID} STREQUAL "OPEN64")
set(CCOMMON_OPT "${CCOMMON_OPT} -mp")
set(CEXTRALIB "${CEXTRALIB} -lstdc++")
endif ()
if (${CMAKE_C_COMPILER_ID} STREQUAL "PATHSCALE")
set(CCOMMON_OPT "${CCOMMON_OPT} -mp")
endif ()
endif ()
if (DYNAMIC_ARCH)
if (${ARCH} STREQUAL "x86")
set(DYNAMIC_CORE "KATMAI COPPERMINE NORTHWOOD PRESCOTT BANIAS CORE2 PENRYN DUNNINGTON NEHALEM ATHLON OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO")
endif ()
if (${ARCH} STREQUAL "x86_64")
set(DYNAMIC_CORE "PRESCOTT CORE2 PENRYN DUNNINGTON NEHALEM OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO")
if (NOT NO_AVX)
set(DYNAMIC_CORE "${DYNAMIC_CORE} SANDYBRIDGE BULLDOZER PILEDRIVER STEAMROLLER")
endif ()
if (NOT NO_AVX2)
set(DYNAMIC_CORE "${DYNAMIC_CORE} HASWELL")
endif ()
endif ()
if (NOT DYNAMIC_CORE)
unset(DYNAMIC_ARCH)
endif ()
endif ()
if (${ARCH} STREQUAL "ia64")
set(NO_BINARY_MODE 1)
set(BINARY_DEFINED 1)
if (${F_COMPILER} MATCHES "GFORTRAN")
if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU")
# EXPRECISION = 1
# CCOMMON_OPT += -DEXPRECISION
endif ()
endif ()
endif ()
if (${ARCH} STREQUAL "mips64")
set(NO_BINARY_MODE 1)
endif ()
if (${ARCH} STREQUAL "alpha")
set(NO_BINARY_MODE 1)
set(BINARY_DEFINED 1)
endif ()
if (${ARCH} STREQUAL "arm")
set(NO_BINARY_MODE 1)
set(BINARY_DEFINED 1)
endif ()
if (${ARCH} STREQUAL "arm64")
set(NO_BINARY_MODE 1)
set(BINARY_DEFINED 1)
endif ()

View File

@@ -1,89 +0,0 @@
##
## Author: Hank Anderson <hank@statease.com>
## Description: Ported from the OpenBLAS/c_check perl script.
## This is triggered by prebuild.cmake and runs before any of the code is built.
## Creates config.h and Makefile.conf.
# CMake vars set by this file:
# OSNAME (use CMAKE_SYSTEM_NAME)
# ARCH
# C_COMPILER (use CMAKE_C_COMPILER)
# BINARY32
# BINARY64
# FU
# CROSS_SUFFIX
# CROSS
# CEXTRALIB
# Defines set by this file:
# OS_
# ARCH_
# C_
# __32BIT__
# __64BIT__
# FUNDERSCORE
# PTHREAD_CREATE_FUNC
# N.B. c_check (and ctest.c) is not cross-platform, so instead try to use CMake variables.
set(FU "")
if(APPLE)
set(FU "_")
elseif(MSVC)
set(FU "_")
elseif(UNIX)
set(FU "")
endif()
# Convert CMake vars into the format that OpenBLAS expects
string(TOUPPER ${CMAKE_SYSTEM_NAME} HOST_OS)
if (${HOST_OS} STREQUAL "WINDOWS")
set(HOST_OS WINNT)
endif ()
# added by hpa - check size of void ptr to detect 64-bit compile
if (NOT DEFINED BINARY)
set(BINARY 32)
if (CMAKE_SIZEOF_VOID_P EQUAL 8)
set(BINARY 64)
endif ()
endif ()
if (BINARY EQUAL 64)
set(BINARY64 1)
else ()
set(BINARY32 1)
endif ()
# CMake docs define these:
# CMAKE_SYSTEM_PROCESSOR - The name of the CPU CMake is building for.
# CMAKE_HOST_SYSTEM_PROCESSOR - The name of the CPU CMake is running on.
#
# TODO: CMAKE_SYSTEM_PROCESSOR doesn't seem to be correct - instead get it from the compiler a la c_check
set(ARCH ${CMAKE_SYSTEM_PROCESSOR})
if (${ARCH} STREQUAL "AMD64")
set(ARCH "x86_64")
endif ()
# If you are using a 32-bit compiler on a 64-bit system CMAKE_SYSTEM_PROCESSOR will be wrong
if (${ARCH} STREQUAL "x86_64" AND BINARY EQUAL 32)
set(ARCH x86)
endif ()
if (${ARCH} STREQUAL "X86")
set(ARCH x86)
endif ()
set(COMPILER_ID ${CMAKE_CXX_COMPILER_ID})
if (${COMPILER_ID} STREQUAL "GNU")
set(COMPILER_ID "GCC")
endif ()
string(TOUPPER ${ARCH} UC_ARCH)
file(WRITE ${TARGET_CONF}
"#define OS_${HOST_OS}\t1\n"
"#define ARCH_${UC_ARCH}\t1\n"
"#define C_${COMPILER_ID}\t1\n"
"#define __${BINARY}BIT__\t1\n"
"#define FUNDERSCORE\t${FU}\n")

View File

@@ -1,103 +0,0 @@
##
## Author: Hank Anderson <hank@statease.com>
## Description: Ported from portion of OpenBLAS/Makefile.system
## Sets C related variables.
if (${CMAKE_C_COMPILER} STREQUAL "GNU" OR ${CMAKE_C_COMPILER} STREQUAL "LSB" OR ${CMAKE_C_COMPILER} STREQUAL "Clang")
set(CCOMMON_OPT "${CCOMMON_OPT} -Wall")
set(COMMON_PROF "${COMMON_PROF} -fno-inline")
set(NO_UNINITIALIZED_WARN "-Wno-uninitialized")
if (QUIET_MAKE)
set(CCOMMON_OPT "${CCOMMON_OPT} ${NO_UNINITIALIZED_WARN} -Wno-unused")
endif ()
if (NO_BINARY_MODE)
if (${ARCH} STREQUAL "mips64")
if (BINARY64)
set(CCOMMON_OPT "${CCOMMON_OPT} -mabi=64")
else ()
set(CCOMMON_OPT "${CCOMMON_OPT} -mabi=n32")
endif ()
set(BINARY_DEFINED 1)
endif ()
if (${CORE} STREQUAL "LOONGSON3A")
set(CCOMMON_OPT "${CCOMMON_OPT} -march=mips64")
set(FCOMMON_OPT "${FCOMMON_OPT} -march=mips64")
endif ()
if (${CORE} STREQUAL "LOONGSON3B")
set(CCOMMON_OPT "${CCOMMON_OPT} -march=mips64")
set(FCOMMON_OPT "${FCOMMON_OPT} -march=mips64")
endif ()
if (${OSNAME} STREQUAL "AIX")
set(BINARY_DEFINED 1)
endif ()
endif ()
if (NOT BINARY_DEFINED)
if (BINARY64)
set(CCOMMON_OPT "${CCOMMON_OPT} -m64")
else ()
set(CCOMMON_OPT "${CCOMMON_OPT} -m32")
endif ()
endif ()
endif ()
if (${CMAKE_C_COMPILER} STREQUAL "PGI")
if (BINARY64)
set(CCOMMON_OPT "${CCOMMON_OPT} -tp p7-64")
else ()
set(CCOMMON_OPT "${CCOMMON_OPT} -tp p7")
endif ()
endif ()
if (${CMAKE_C_COMPILER} STREQUAL "PATHSCALE")
if (BINARY64)
set(CCOMMON_OPT "${CCOMMON_OPT} -m64")
else ()
set(CCOMMON_OPT "${CCOMMON_OPT} -m32")
endif ()
endif ()
if (${CMAKE_C_COMPILER} STREQUAL "OPEN64")
if (${ARCH} STREQUAL "mips64")
if (NOT BINARY64)
set(CCOMMON_OPT "${CCOMMON_OPT} -n32")
else ()
set(CCOMMON_OPT "${CCOMMON_OPT} -n64")
endif ()
if (${CORE} STREQUAL "LOONGSON3A")
set(CCOMMON_OPT "${CCOMMON_OPT} -loongson3 -static")
endif ()
if (${CORE} STREQUAL "LOONGSON3B")
set(CCOMMON_OPT "${CCOMMON_OPT} -loongson3 -static")
endif ()
else ()
if (BINARY64)
set(CCOMMON_OPT "${CCOMMON_OPT} -m32")
else ()
set(CCOMMON_OPT "${CCOMMON_OPT} -m64")
endif ()
endif ()
endif ()
if (${CMAKE_C_COMPILER} STREQUAL "SUN")
set(CCOMMON_OPT "${CCOMMON_OPT} -w")
if (${ARCH} STREQUAL "x86")
set(CCOMMON_OPT "${CCOMMON_OPT} -m32")
else ()
set(FCOMMON_OPT "${FCOMMON_OPT} -m64")
endif ()
endif ()

View File

@@ -1,60 +0,0 @@
#Only generate .def for dll on MSVC
if(MSVC)
set_source_files_properties(${OpenBLAS_DEF_FILE} PROPERTIES GENERATED 1)
if (NOT DEFINED ARCH)
set(ARCH_IN "x86_64")
else()
set(ARCH_IN ${ARCH})
endif()
if (${CORE} STREQUAL "generic")
set(ARCH_IN "GENERIC")
endif ()
if (NOT DEFINED EXPRECISION)
set(EXPRECISION_IN 0)
else()
set(EXPRECISION_IN ${EXPRECISION})
endif()
if (NOT DEFINED NO_CBLAS)
set(NO_CBLAS_IN 0)
else()
set(NO_CBLAS_IN ${NO_CBLAS})
endif()
if (NOT DEFINED NO_LAPACK)
set(NO_LAPACK_IN 0)
else()
set(NO_LAPACK_IN ${NO_LAPACK})
endif()
if (NOT DEFINED NO_LAPACKE)
set(NO_LAPACKE_IN 0)
else()
set(NO_LAPACKE_IN ${NO_LAPACKE})
endif()
if (NOT DEFINED NEED2UNDERSCORES)
set(NEED2UNDERSCORES_IN 0)
else()
set(NEED2UNDERSCORES_IN ${NEED2UNDERSCORES})
endif()
if (NOT DEFINED ONLY_CBLAS)
set(ONLY_CBLAS_IN 0)
else()
set(ONLY_CBLAS_IN ${ONLY_CBLAS})
endif()
add_custom_command(
TARGET ${OpenBLAS_LIBNAME} PRE_LINK
COMMAND perl
ARGS "${CMAKE_SOURCE_DIR}/exports/gensymbol" "win2k" "${ARCH_IN}" "dummy" "${EXPRECISION_IN}" "${NO_CBLAS_IN}" "${NO_LAPACK_IN}" "${NO_LAPACKE_IN}" "${NEED2UNDERSCORES_IN}" "${ONLY_CBLAS_IN}" "${SYMBOLPREFIX}" "${SYMBOLSUFFIX}" > "${PROJECT_BINARY_DIR}/openblas.def"
COMMENT "Create openblas.def file"
VERBATIM)
endif()

View File

@@ -1,66 +0,0 @@
##
## Author: Hank Anderson <hank@statease.com>
## Copyright: (c) Stat-Ease, Inc.
## Created: 12/29/14
## Last Modified: 12/29/14
## Description: Ported from the OpenBLAS/f_check perl script.
## This is triggered by prebuild.cmake and runs before any of the code is built.
## Appends Fortran information to config.h and Makefile.conf.
# CMake vars set by this file:
# F_COMPILER
# FC
# BU
# NOFORTRAN
# NEED2UNDERSCORES
# FEXTRALIB
# Defines set by this file:
# BUNDERSCORE
# NEEDBUNDERSCORE
# NEED2UNDERSCORES
if (MSVC)
# had to do this for MSVC, else CMake automatically assumes I have ifort... -hpa
include(CMakeForceCompiler)
CMAKE_FORCE_Fortran_COMPILER(gfortran GNU)
endif ()
if (NOT NO_LAPACK)
enable_language(Fortran)
else()
include(CMakeForceCompiler)
CMAKE_FORCE_Fortran_COMPILER(gfortran GNU)
endif()
if (NOT ONLY_CBLAS)
# N.B. f_check is not cross-platform, so instead try to use CMake variables
# run f_check (appends to TARGET files)
# message(STATUS "Running f_check...")
# execute_process(COMMAND perl f_check ${TARGET_MAKE} ${TARGET_CONF} ${CMAKE_Fortran_COMPILER}
# WORKING_DIRECTORY ${PROJECT_SOURCE_DIR})
# TODO: detect whether underscore needed, set #defines and BU appropriately - use try_compile
# TODO: set FEXTRALIB flags a la f_check?
set(BU "_")
file(APPEND ${TARGET_CONF}
"#define BUNDERSCORE _\n"
"#define NEEDBUNDERSCORE 1\n"
"#define NEED2UNDERSCORES 0\n")
else ()
#When we only build CBLAS, we set NOFORTRAN=2
set(NOFORTRAN 2)
set(NO_FBLAS 1)
#set(F_COMPILER GFORTRAN) # CMake handles the fortran compiler
set(BU "_")
file(APPEND ${TARGET_CONF}
"#define BUNDERSCORE _\n"
"#define NEEDBUNDERSCORE 1\n")
endif()
get_filename_component(F_COMPILER ${CMAKE_Fortran_COMPILER} NAME_WE)
string(TOUPPER ${F_COMPILER} F_COMPILER)

View File

@@ -1,200 +0,0 @@
##
## Author: Hank Anderson <hank@statease.com>
## Description: Ported from portion of OpenBLAS/Makefile.system
## Sets Fortran related variables.
if (${F_COMPILER} STREQUAL "G77")
set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_G77")
set(FCOMMON_OPT "${FCOMMON_OPT} -Wall")
if (NOT NO_BINARY_MODE)
if (BINARY64)
set(FCOMMON_OPT "${FCOMMON_OPT} -m64")
else ()
set(FCOMMON_OPT "${FCOMMON_OPT} -m32")
endif ()
endif ()
endif ()
if (${F_COMPILER} STREQUAL "G95")
set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_G95")
set(FCOMMON_OPT "${FCOMMON_OPT} -Wall")
if (NOT NO_BINARY_MODE)
if (BINARY64)
set(FCOMMON_OPT "${FCOMMON_OPT} -m64")
else ()
set(FCOMMON_OPT "${FCOMMON_OPT} -m32")
endif ()
endif ()
endif ()
if (${F_COMPILER} STREQUAL "GFORTRAN")
set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_GFORT")
set(FCOMMON_OPT "${FCOMMON_OPT} -Wall")
#Don't include -lgfortran, when NO_LAPACK=1 or lsbcc
if (NOT NO_LAPACK)
set(EXTRALIB "{EXTRALIB} -lgfortran")
endif ()
if (NO_BINARY_MODE)
if (${ARCH} STREQUAL "mips64")
if (BINARY64)
set(FCOMMON_OPT "${FCOMMON_OPT} -mabi=64")
else ()
set(FCOMMON_OPT "${FCOMMON_OPT} -mabi=n32")
endif ()
endif ()
else ()
if (BINARY64)
set(FCOMMON_OPT "${FCOMMON_OPT} -m64")
if (INTERFACE64)
set(FCOMMON_OPT "${FCOMMON_OPT} -fdefault-integer-8")
endif ()
else ()
set(FCOMMON_OPT "${FCOMMON_OPT} -m32")
endif ()
endif ()
if (USE_OPENMP)
set(FCOMMON_OPT "${FCOMMON_OPT} -fopenmp")
endif ()
endif ()
if (${F_COMPILER} STREQUAL "INTEL")
set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_INTEL")
if (INTERFACE64)
set(FCOMMON_OPT "${FCOMMON_OPT} -i8")
endif ()
if (USE_OPENMP)
set(FCOMMON_OPT "${FCOMMON_OPT} -openmp")
endif ()
endif ()
if (${F_COMPILER} STREQUAL "FUJITSU")
set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_FUJITSU")
if (USE_OPENMP)
set(FCOMMON_OPT "${FCOMMON_OPT} -openmp")
endif ()
endif ()
if (${F_COMPILER} STREQUAL "IBM")
set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_IBM")
# FCOMMON_OPT += -qarch=440
if (BINARY64)
set(FCOMMON_OPT "${FCOMMON_OPT} -q64")
if (INTERFACE64)
set(FCOMMON_OPT "${FCOMMON_OPT} -qintsize=8")
endif ()
else ()
set(FCOMMON_OPT "${FCOMMON_OPT} -q32")
endif ()
if (USE_OPENMP)
set(FCOMMON_OPT "${FCOMMON_OPT} -openmp")
endif ()
endif ()
if (${F_COMPILER} STREQUAL "PGI")
set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_PGI")
set(COMMON_PROF "${COMMON_PROF} -DPGICOMPILER")
if (BINARY64)
if (INTERFACE64)
set(FCOMMON_OPT "${FCOMMON_OPT} -i8")
endif ()
set(FCOMMON_OPT "${FCOMMON_OPT} -tp p7-64")
else ()
set(FCOMMON_OPT "${FCOMMON_OPT} -tp p7")
endif ()
if (USE_OPENMP)
set(FCOMMON_OPT "${FCOMMON_OPT} -mp")
endif ()
endif ()
if (${F_COMPILER} STREQUAL "PATHSCALE")
set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_PATHSCALE")
if (BINARY64)
if (INTERFACE64)
set(FCOMMON_OPT "${FCOMMON_OPT} -i8")
endif ()
endif ()
if (NOT ${ARCH} STREQUAL "mips64")
if (NOT BINARY64)
set(FCOMMON_OPT "${FCOMMON_OPT} -m32")
else ()
set(FCOMMON_OPT "${FCOMMON_OPT} -m64")
endif ()
else ()
if (BINARY64)
set(FCOMMON_OPT "${FCOMMON_OPT} -mabi=64")
else ()
set(FCOMMON_OPT "${FCOMMON_OPT} -mabi=n32")
endif ()
endif ()
if (USE_OPENMP)
set(FCOMMON_OPT "${FCOMMON_OPT} -mp")
endif ()
endif ()
if (${F_COMPILER} STREQUAL "OPEN64")
set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_OPEN64")
if (BINARY64)
if (INTERFACE64)
set(FCOMMON_OPT "${FCOMMON_OPT} -i8")
endif ()
endif ()
if (${ARCH} STREQUAL "mips64")
if (NOT BINARY64)
set(FCOMMON_OPT "${FCOMMON_OPT} -n32")
else ()
set(FCOMMON_OPT "${FCOMMON_OPT} -n64")
endif ()
if (${CORE} STREQUAL "LOONGSON3A")
set(FCOMMON_OPT "${FCOMMON_OPT} -loongson3 -static")
endif ()
if (${CORE} STREQUAL "LOONGSON3B")
set(FCOMMON_OPT "${FCOMMON_OPT} -loongson3 -static")
endif ()
else ()
if (NOT BINARY64)
set(FCOMMON_OPT "${FCOMMON_OPT} -m32")
else ()
set(FCOMMON_OPT "${FCOMMON_OPT} -m64")
endif ()
endif ()
if (USE_OPENMP)
set(FEXTRALIB "${FEXTRALIB} -lstdc++")
set(FCOMMON_OPT "${FCOMMON_OPT} -mp")
endif ()
endif ()
if (${F_COMPILER} STREQUAL "SUN")
set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_SUN")
if (${ARCH} STREQUAL "x86")
set(FCOMMON_OPT "${FCOMMON_OPT} -m32")
else ()
set(FCOMMON_OPT "${FCOMMON_OPT} -m64")
endif ()
if (USE_OPENMP)
set(FCOMMON_OPT "${FCOMMON_OPT} -xopenmp=parallel")
endif ()
endif ()
if (${F_COMPILER} STREQUAL "COMPAQ")
set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_COMPAQ")
if (USE_OPENMP)
set(FCOMMON_OPT "${FCOMMON_OPT} -openmp")
endif ()
endif ()
# from the root Makefile - this is for lapack-netlib to compile the correct secnd file.
if (${F_COMPILER} STREQUAL "GFORTRAN")
set(TIMER "INT_ETIME")
else ()
set(TIMER "NONE")
endif ()

View File

@@ -1,165 +0,0 @@
# helper functions for the kernel CMakeLists.txt
# Set the default filenames for L1 objects. Most of these will be overriden by the appropriate KERNEL file.
macro(SetDefaultL1)
set(SAMAXKERNEL amax.S)
set(DAMAXKERNEL amax.S)
set(QAMAXKERNEL amax.S)
set(CAMAXKERNEL zamax.S)
set(ZAMAXKERNEL zamax.S)
set(XAMAXKERNEL zamax.S)
set(SAMINKERNEL amin.S)
set(DAMINKERNEL amin.S)
set(QAMINKERNEL amin.S)
set(CAMINKERNEL zamin.S)
set(ZAMINKERNEL zamin.S)
set(XAMINKERNEL zamin.S)
set(SMAXKERNEL max.S)
set(DMAXKERNEL max.S)
set(QMAXKERNEL max.S)
set(SMINKERNEL min.S)
set(DMINKERNEL min.S)
set(QMINKERNEL min.S)
set(ISAMAXKERNEL iamax.S)
set(IDAMAXKERNEL iamax.S)
set(IQAMAXKERNEL iamax.S)
set(ICAMAXKERNEL izamax.S)
set(IZAMAXKERNEL izamax.S)
set(IXAMAXKERNEL izamax.S)
set(ISAMINKERNEL iamin.S)
set(IDAMINKERNEL iamin.S)
set(IQAMINKERNEL iamin.S)
set(ICAMINKERNEL izamin.S)
set(IZAMINKERNEL izamin.S)
set(IXAMINKERNEL izamin.S)
set(ISMAXKERNEL iamax.S)
set(IDMAXKERNEL iamax.S)
set(IQMAXKERNEL iamax.S)
set(ISMINKERNEL iamin.S)
set(IDMINKERNEL iamin.S)
set(IQMINKERNEL iamin.S)
set(SASUMKERNEL asum.S)
set(DASUMKERNEL asum.S)
set(CASUMKERNEL zasum.S)
set(ZASUMKERNEL zasum.S)
set(QASUMKERNEL asum.S)
set(XASUMKERNEL zasum.S)
set(SAXPYKERNEL axpy.S)
set(DAXPYKERNEL axpy.S)
set(CAXPYKERNEL zaxpy.S)
set(ZAXPYKERNEL zaxpy.S)
set(QAXPYKERNEL axpy.S)
set(XAXPYKERNEL zaxpy.S)
set(SCOPYKERNEL copy.S)
set(DCOPYKERNEL copy.S)
set(CCOPYKERNEL zcopy.S)
set(ZCOPYKERNEL zcopy.S)
set(QCOPYKERNEL copy.S)
set(XCOPYKERNEL zcopy.S)
set(SDOTKERNEL dot.S)
set(DDOTKERNEL dot.S)
set(CDOTKERNEL zdot.S)
set(ZDOTKERNEL zdot.S)
set(QDOTKERNEL dot.S)
set(XDOTKERNEL zdot.S)
set(SNRM2KERNEL nrm2.S)
set(DNRM2KERNEL nrm2.S)
set(QNRM2KERNEL nrm2.S)
set(CNRM2KERNEL znrm2.S)
set(ZNRM2KERNEL znrm2.S)
set(XNRM2KERNEL znrm2.S)
set(SROTKERNEL rot.S)
set(DROTKERNEL rot.S)
set(QROTKERNEL rot.S)
set(CROTKERNEL zrot.S)
set(ZROTKERNEL zrot.S)
set(XROTKERNEL zrot.S)
set(SSCALKERNEL scal.S)
set(DSCALKERNEL scal.S)
set(CSCALKERNEL zscal.S)
set(ZSCALKERNEL zscal.S)
set(QSCALKERNEL scal.S)
set(XSCALKERNEL zscal.S)
set(SSWAPKERNEL swap.S)
set(DSWAPKERNEL swap.S)
set(CSWAPKERNEL zswap.S)
set(ZSWAPKERNEL zswap.S)
set(QSWAPKERNEL swap.S)
set(XSWAPKERNEL zswap.S)
set(SGEMVNKERNEL gemv_n.S)
set(SGEMVTKERNEL gemv_t.S)
set(DGEMVNKERNEL gemv_n.S)
set(DGEMVTKERNEL gemv_t.S)
set(CGEMVNKERNEL zgemv_n.S)
set(CGEMVTKERNEL zgemv_t.S)
set(ZGEMVNKERNEL zgemv_n.S)
set(ZGEMVTKERNEL zgemv_t.S)
set(QGEMVNKERNEL gemv_n.S)
set(QGEMVTKERNEL gemv_t.S)
set(XGEMVNKERNEL zgemv_n.S)
set(XGEMVTKERNEL zgemv_t.S)
set(SCABS_KERNEL ../generic/cabs.c)
set(DCABS_KERNEL ../generic/cabs.c)
set(QCABS_KERNEL ../generic/cabs.c)
set(LSAME_KERNEL ../generic/lsame.c)
set(SAXPBYKERNEL ../arm/axpby.c)
set(DAXPBYKERNEL ../arm/axpby.c)
set(CAXPBYKERNEL ../arm/zaxpby.c)
set(ZAXPBYKERNEL ../arm/zaxpby.c)
endmacro ()
macro(SetDefaultL2)
set(SGEMVNKERNEL gemv_n.S)
set(SGEMVTKERNEL gemv_t.S)
set(DGEMVNKERNEL gemv_n.S)
set(DGEMVTKERNEL gemv_t.S)
set(CGEMVNKERNEL zgemv_n.S)
set(CGEMVTKERNEL zgemv_t.S)
set(ZGEMVNKERNEL zgemv_n.S)
set(ZGEMVTKERNEL zgemv_t.S)
set(QGEMVNKERNEL gemv_n.S)
set(QGEMVTKERNEL gemv_t.S)
set(XGEMVNKERNEL zgemv_n.S)
set(XGEMVTKERNEL zgemv_t.S)
set(SGERKERNEL ../generic/ger.c)
set(DGERKERNEL ../generic/ger.c)
set(QGERKERNEL ../generic/ger.c)
set(CGERUKERNEL ../generic/zger.c)
set(CGERCKERNEL ../generic/zger.c)
set(ZGERUKERNEL ../generic/zger.c)
set(ZGERCKERNEL ../generic/zger.c)
set(XGERUKERNEL ../generic/zger.c)
set(XGERCKERNEL ../generic/zger.c)
set(SSYMV_U_KERNEL ../generic/symv_k.c)
set(SSYMV_L_KERNEL ../generic/symv_k.c)
set(DSYMV_U_KERNEL ../generic/symv_k.c)
set(DSYMV_L_KERNEL ../generic/symv_k.c)
set(QSYMV_U_KERNEL ../generic/symv_k.c)
set(QSYMV_L_KERNEL ../generic/symv_k.c)
set(CSYMV_U_KERNEL ../generic/zsymv_k.c)
set(CSYMV_L_KERNEL ../generic/zsymv_k.c)
set(ZSYMV_U_KERNEL ../generic/zsymv_k.c)
set(ZSYMV_L_KERNEL ../generic/zsymv_k.c)
set(XSYMV_U_KERNEL ../generic/zsymv_k.c)
set(XSYMV_L_KERNEL ../generic/zsymv_k.c)
set(CHEMV_U_KERNEL ../generic/zhemv_k.c)
set(CHEMV_L_KERNEL ../generic/zhemv_k.c)
set(CHEMV_V_KERNEL ../generic/zhemv_k.c)
set(CHEMV_M_KERNEL ../generic/zhemv_k.c)
set(ZHEMV_U_KERNEL ../generic/zhemv_k.c)
set(ZHEMV_L_KERNEL ../generic/zhemv_k.c)
set(ZHEMV_V_KERNEL ../generic/zhemv_k.c)
set(ZHEMV_M_KERNEL ../generic/zhemv_k.c)
set(XHEMV_U_KERNEL ../generic/zhemv_k.c)
set(XHEMV_L_KERNEL ../generic/zhemv_k.c)
set(XHEMV_V_KERNEL ../generic/zhemv_k.c)
set(XHEMV_M_KERNEL ../generic/zhemv_k.c)
endmacro ()
macro(SetDefaultL3)
set(SGEADD_KERNEL ../generic/geadd.c)
set(DGEADD_KERNEL ../generic/geadd.c)
set(CGEADD_KERNEL ../generic/zgeadd.c)
set(ZGEADD_KERNEL ../generic/zgeadd.c)
endmacro ()

View File

@@ -1,347 +0,0 @@
# Sources for compiling lapack-netlib. Can't use CMakeLists.txt because lapack-netlib already has its own cmake files.
set(ALLAUX
ilaenv.f ieeeck.f lsamen.f xerbla_array.f iparmq.f
ilaprec.f ilatrans.f ilauplo.f iladiag.f chla_transtype.f
../INSTALL/ilaver.f ../INSTALL/slamch.f
)
set(SCLAUX
sbdsdc.f
sbdsqr.f sdisna.f slabad.f slacpy.f sladiv.f slae2.f slaebz.f
slaed0.f slaed1.f slaed2.f slaed3.f slaed4.f slaed5.f slaed6.f
slaed7.f slaed8.f slaed9.f slaeda.f slaev2.f slagtf.f
slagts.f slamrg.f slanst.f
slapy2.f slapy3.f slarnv.f
slarra.f slarrb.f slarrc.f slarrd.f slarre.f slarrf.f slarrj.f
slarrk.f slarrr.f slaneg.f
slartg.f slaruv.f slas2.f slascl.f
slasd0.f slasd1.f slasd2.f slasd3.f slasd4.f slasd5.f slasd6.f
slasd7.f slasd8.f slasda.f slasdq.f slasdt.f
slaset.f slasq1.f slasq2.f slasq3.f slasq4.f slasq5.f slasq6.f
slasr.f slasrt.f slassq.f slasv2.f spttrf.f sstebz.f sstedc.f
ssteqr.f ssterf.f slaisnan.f sisnan.f
slartgp.f slartgs.f
../INSTALL/second_${TIMER}.f
)
set(DZLAUX
dbdsdc.f
dbdsqr.f ddisna.f dlabad.f dlacpy.f dladiv.f dlae2.f dlaebz.f
dlaed0.f dlaed1.f dlaed2.f dlaed3.f dlaed4.f dlaed5.f dlaed6.f
dlaed7.f dlaed8.f dlaed9.f dlaeda.f dlaev2.f dlagtf.f
dlagts.f dlamrg.f dlanst.f
dlapy2.f dlapy3.f dlarnv.f
dlarra.f dlarrb.f dlarrc.f dlarrd.f dlarre.f dlarrf.f dlarrj.f
dlarrk.f dlarrr.f dlaneg.f
dlartg.f dlaruv.f dlas2.f dlascl.f
dlasd0.f dlasd1.f dlasd2.f dlasd3.f dlasd4.f dlasd5.f dlasd6.f
dlasd7.f dlasd8.f dlasda.f dlasdq.f dlasdt.f
dlaset.f dlasq1.f dlasq2.f dlasq3.f dlasq4.f dlasq5.f dlasq6.f
dlasr.f dlasrt.f dlassq.f dlasv2.f dpttrf.f dstebz.f dstedc.f
dsteqr.f dsterf.f dlaisnan.f disnan.f
dlartgp.f dlartgs.f
../INSTALL/dlamch.f ../INSTALL/dsecnd_${TIMER}.f
)
set(SLASRC
sgbbrd.f sgbcon.f sgbequ.f sgbrfs.f sgbsv.f
sgbsvx.f sgbtf2.f sgbtrf.f sgbtrs.f sgebak.f sgebal.f sgebd2.f
sgebrd.f sgecon.f sgeequ.f sgees.f sgeesx.f sgeev.f sgeevx.f
DEPRECATED/sgegs.f DEPRECATED/sgegv.f sgehd2.f sgehrd.f sgelq2.f sgelqf.f
sgels.f sgelsd.f sgelss.f DEPRECATED/sgelsx.f sgelsy.f sgeql2.f sgeqlf.f
sgeqp3.f DEPRECATED/sgeqpf.f sgeqr2.f sgeqr2p.f sgeqrf.f sgeqrfp.f sgerfs.f
sgerq2.f sgerqf.f sgesc2.f sgesdd.f sgesvd.f sgesvx.f
sgetc2.f sgetri.f
sggbak.f sggbal.f sgges.f sggesx.f sggev.f sggevx.f
sggglm.f sgghrd.f sgglse.f sggqrf.f
sggrqf.f DEPRECATED/sggsvd.f DEPRECATED/sggsvp.f sgtcon.f sgtrfs.f sgtsv.f
sgtsvx.f sgttrf.f sgttrs.f sgtts2.f shgeqz.f
shsein.f shseqr.f slabrd.f slacon.f slacn2.f
slaein.f slaexc.f slag2.f slags2.f slagtm.f slagv2.f slahqr.f
DEPRECATED/slahrd.f slahr2.f slaic1.f slaln2.f slals0.f slalsa.f slalsd.f
slangb.f slange.f slangt.f slanhs.f slansb.f slansp.f
slansy.f slantb.f slantp.f slantr.f slanv2.f
slapll.f slapmt.f
slaqgb.f slaqge.f slaqp2.f slaqps.f slaqsb.f slaqsp.f slaqsy.f
slaqr0.f slaqr1.f slaqr2.f slaqr3.f slaqr4.f slaqr5.f
slaqtr.f slar1v.f slar2v.f ilaslr.f ilaslc.f
slarf.f slarfb.f slarfg.f slarfgp.f slarft.f slarfx.f slargv.f
slarrv.f slartv.f
slarz.f slarzb.f slarzt.f slasy2.f slasyf.f slasyf_rook.f
slatbs.f slatdf.f slatps.f slatrd.f slatrs.f slatrz.f DEPRECATED/slatzm.f
sopgtr.f sopmtr.f sorg2l.f sorg2r.f
sorgbr.f sorghr.f sorgl2.f sorglq.f sorgql.f sorgqr.f sorgr2.f
sorgrq.f sorgtr.f sorm2l.f sorm2r.f
sormbr.f sormhr.f sorml2.f sormlq.f sormql.f sormqr.f sormr2.f
sormr3.f sormrq.f sormrz.f sormtr.f spbcon.f spbequ.f spbrfs.f
spbstf.f spbsv.f spbsvx.f
spbtf2.f spbtrf.f spbtrs.f spocon.f spoequ.f sporfs.f sposv.f
sposvx.f spstrf.f spstf2.f
sppcon.f sppequ.f
spprfs.f sppsv.f sppsvx.f spptrf.f spptri.f spptrs.f sptcon.f
spteqr.f sptrfs.f sptsv.f sptsvx.f spttrs.f sptts2.f srscl.f
ssbev.f ssbevd.f ssbevx.f ssbgst.f ssbgv.f ssbgvd.f ssbgvx.f
ssbtrd.f sspcon.f sspev.f sspevd.f sspevx.f sspgst.f
sspgv.f sspgvd.f sspgvx.f ssprfs.f sspsv.f sspsvx.f ssptrd.f
ssptrf.f ssptri.f ssptrs.f sstegr.f sstein.f sstev.f sstevd.f sstevr.f
sstevx.f
ssycon.f ssyev.f ssyevd.f ssyevr.f ssyevx.f ssygs2.f
ssygst.f ssygv.f ssygvd.f ssygvx.f ssyrfs.f ssysv.f ssysvx.f
ssytd2.f ssytf2.f ssytrd.f ssytrf.f ssytri.f ssytri2.f ssytri2x.f
ssyswapr.f ssytrs.f ssytrs2.f ssyconv.f
ssytf2_rook.f ssytrf_rook.f ssytrs_rook.f
ssytri_rook.f ssycon_rook.f ssysv_rook.f
stbcon.f
stbrfs.f stbtrs.f stgevc.f stgex2.f stgexc.f stgsen.f
stgsja.f stgsna.f stgsy2.f stgsyl.f stpcon.f stprfs.f stptri.f
stptrs.f
strcon.f strevc.f strexc.f strrfs.f strsen.f strsna.f strsyl.f
strtrs.f DEPRECATED/stzrqf.f stzrzf.f sstemr.f
slansf.f spftrf.f spftri.f spftrs.f ssfrk.f stfsm.f stftri.f stfttp.f
stfttr.f stpttf.f stpttr.f strttf.f strttp.f
sgejsv.f sgesvj.f sgsvj0.f sgsvj1.f
sgeequb.f ssyequb.f spoequb.f sgbequb.f
sbbcsd.f slapmr.f sorbdb.f sorbdb1.f sorbdb2.f sorbdb3.f sorbdb4.f
sorbdb5.f sorbdb6.f sorcsd.f sorcsd2by1.f
sgeqrt.f sgeqrt2.f sgeqrt3.f sgemqrt.f
stpqrt.f stpqrt2.f stpmqrt.f stprfb.f spotri.f
)
set(DSLASRC spotrs.f)
set(CLASRC
cbdsqr.f cgbbrd.f cgbcon.f cgbequ.f cgbrfs.f cgbsv.f cgbsvx.f
cgbtf2.f cgbtrf.f cgbtrs.f cgebak.f cgebal.f cgebd2.f cgebrd.f
cgecon.f cgeequ.f cgees.f cgeesx.f cgeev.f cgeevx.f
DEPRECATED/cgegs.f DEPRECATED/cgegv.f cgehd2.f cgehrd.f cgelq2.f cgelqf.f
cgels.f cgelsd.f cgelss.f DEPRECATED/cgelsx.f cgelsy.f cgeql2.f cgeqlf.f cgeqp3.f
DEPRECATED/cgeqpf.f cgeqr2.f cgeqr2p.f cgeqrf.f cgeqrfp.f cgerfs.f
cgerq2.f cgerqf.f cgesc2.f cgesdd.f cgesvd.f
cgesvx.f cgetc2.f cgetri.f
cggbak.f cggbal.f cgges.f cggesx.f cggev.f cggevx.f cggglm.f
cgghrd.f cgglse.f cggqrf.f cggrqf.f
DEPRECATED/cggsvd.f DEPRECATED/cggsvp.f
cgtcon.f cgtrfs.f cgtsv.f cgtsvx.f cgttrf.f cgttrs.f cgtts2.f chbev.f
chbevd.f chbevx.f chbgst.f chbgv.f chbgvd.f chbgvx.f chbtrd.f
checon.f cheev.f cheevd.f cheevr.f cheevx.f chegs2.f chegst.f
chegv.f chegvd.f chegvx.f cherfs.f chesv.f chesvx.f chetd2.f
chetf2.f chetrd.f
chetrf.f chetri.f chetri2.f chetri2x.f cheswapr.f
chetrs.f chetrs2.f
chetf2_rook.f chetrf_rook.f chetri_rook.f chetrs_rook.f checon_rook.f chesv_rook.f
chgeqz.f chpcon.f chpev.f chpevd.f
chpevx.f chpgst.f chpgv.f chpgvd.f chpgvx.f chprfs.f chpsv.f
chpsvx.f
chptrd.f chptrf.f chptri.f chptrs.f chsein.f chseqr.f clabrd.f
clacgv.f clacon.f clacn2.f clacp2.f clacpy.f clacrm.f clacrt.f cladiv.f
claed0.f claed7.f claed8.f
claein.f claesy.f claev2.f clags2.f clagtm.f
clahef.f clahef_rook.f clahqr.f
DEPRECATED/clahrd.f clahr2.f claic1.f clals0.f clalsa.f clalsd.f clangb.f clange.f clangt.f
clanhb.f clanhe.f
clanhp.f clanhs.f clanht.f clansb.f clansp.f clansy.f clantb.f
clantp.f clantr.f clapll.f clapmt.f clarcm.f claqgb.f claqge.f
claqhb.f claqhe.f claqhp.f claqp2.f claqps.f claqsb.f
claqr0.f claqr1.f claqr2.f claqr3.f claqr4.f claqr5.f
claqsp.f claqsy.f clar1v.f clar2v.f ilaclr.f ilaclc.f
clarf.f clarfb.f clarfg.f clarft.f clarfgp.f
clarfx.f clargv.f clarnv.f clarrv.f clartg.f clartv.f
clarz.f clarzb.f clarzt.f clascl.f claset.f clasr.f classq.f
clasyf.f clasyf_rook.f clatbs.f clatdf.f clatps.f clatrd.f clatrs.f clatrz.f
DEPRECATED/clatzm.f cpbcon.f cpbequ.f cpbrfs.f cpbstf.f cpbsv.f
cpbsvx.f cpbtf2.f cpbtrf.f cpbtrs.f cpocon.f cpoequ.f cporfs.f
cposv.f cposvx.f cpstrf.f cpstf2.f
cppcon.f cppequ.f cpprfs.f cppsv.f cppsvx.f cpptrf.f cpptri.f cpptrs.f
cptcon.f cpteqr.f cptrfs.f cptsv.f cptsvx.f cpttrf.f cpttrs.f cptts2.f
crot.f cspcon.f csprfs.f cspsv.f
cspsvx.f csptrf.f csptri.f csptrs.f csrscl.f cstedc.f
cstegr.f cstein.f csteqr.f
csycon.f
csyrfs.f csysv.f csysvx.f csytf2.f csytrf.f csytri.f csytri2.f csytri2x.f
csyswapr.f csytrs.f csytrs2.f csyconv.f
csytf2_rook.f csytrf_rook.f csytrs_rook.f
csytri_rook.f csycon_rook.f csysv_rook.f
ctbcon.f ctbrfs.f ctbtrs.f ctgevc.f ctgex2.f
ctgexc.f ctgsen.f ctgsja.f ctgsna.f ctgsy2.f ctgsyl.f ctpcon.f
ctprfs.f ctptri.f
ctptrs.f ctrcon.f ctrevc.f ctrexc.f ctrrfs.f ctrsen.f ctrsna.f
ctrsyl.f ctrtrs.f DEPRECATED/ctzrqf.f ctzrzf.f cung2l.f cung2r.f
cungbr.f cunghr.f cungl2.f cunglq.f cungql.f cungqr.f cungr2.f
cungrq.f cungtr.f cunm2l.f cunm2r.f cunmbr.f cunmhr.f cunml2.f
cunmlq.f cunmql.f cunmqr.f cunmr2.f cunmr3.f cunmrq.f cunmrz.f
cunmtr.f cupgtr.f cupmtr.f icmax1.f scsum1.f cstemr.f
chfrk.f ctfttp.f clanhf.f cpftrf.f cpftri.f cpftrs.f ctfsm.f ctftri.f
ctfttr.f ctpttf.f ctpttr.f ctrttf.f ctrttp.f
cgeequb.f cgbequb.f csyequb.f cpoequb.f cheequb.f
cbbcsd.f clapmr.f cunbdb.f cunbdb1.f cunbdb2.f cunbdb3.f cunbdb4.f
cunbdb5.f cunbdb6.f cuncsd.f cuncsd2by1.f
cgeqrt.f cgeqrt2.f cgeqrt3.f cgemqrt.f
ctpqrt.f ctpqrt2.f ctpmqrt.f ctprfb.f cpotri.f
)
set(ZCLASRC cpotrs.f)
set(DLASRC
dgbbrd.f dgbcon.f dgbequ.f dgbrfs.f dgbsv.f
dgbsvx.f dgbtf2.f dgbtrf.f dgbtrs.f dgebak.f dgebal.f dgebd2.f
dgebrd.f dgecon.f dgeequ.f dgees.f dgeesx.f dgeev.f dgeevx.f
DEPRECATED/dgegs.f DEPRECATED/dgegv.f dgehd2.f dgehrd.f dgelq2.f dgelqf.f
dgels.f dgelsd.f dgelss.f DEPRECATED/dgelsx.f dgelsy.f dgeql2.f dgeqlf.f
dgeqp3.f DEPRECATED/dgeqpf.f dgeqr2.f dgeqr2p.f dgeqrf.f dgeqrfp.f dgerfs.f
dgerq2.f dgerqf.f dgesc2.f dgesdd.f dgesvd.f dgesvx.f
dgetc2.f dgetri.f
dggbak.f dggbal.f dgges.f dggesx.f dggev.f dggevx.f
dggglm.f dgghrd.f dgglse.f dggqrf.f
dggrqf.f DEPRECATED/dggsvd.f DEPRECATED/dggsvp.f dgtcon.f dgtrfs.f dgtsv.f
dgtsvx.f dgttrf.f dgttrs.f dgtts2.f dhgeqz.f
dhsein.f dhseqr.f dlabrd.f dlacon.f dlacn2.f
dlaein.f dlaexc.f dlag2.f dlags2.f dlagtm.f dlagv2.f dlahqr.f
DEPRECATED/dlahrd.f dlahr2.f dlaic1.f dlaln2.f dlals0.f dlalsa.f dlalsd.f
dlangb.f dlange.f dlangt.f dlanhs.f dlansb.f dlansp.f
dlansy.f dlantb.f dlantp.f dlantr.f dlanv2.f
dlapll.f dlapmt.f
dlaqgb.f dlaqge.f dlaqp2.f dlaqps.f dlaqsb.f dlaqsp.f dlaqsy.f
dlaqr0.f dlaqr1.f dlaqr2.f dlaqr3.f dlaqr4.f dlaqr5.f
dlaqtr.f dlar1v.f dlar2v.f iladlr.f iladlc.f
dlarf.f dlarfb.f dlarfg.f dlarfgp.f dlarft.f dlarfx.f
dlargv.f dlarrv.f dlartv.f
dlarz.f dlarzb.f dlarzt.f dlasy2.f dlasyf.f dlasyf_rook.f
dlatbs.f dlatdf.f dlatps.f dlatrd.f dlatrs.f dlatrz.f DEPRECATED/dlatzm.f
dopgtr.f dopmtr.f dorg2l.f dorg2r.f
dorgbr.f dorghr.f dorgl2.f dorglq.f dorgql.f dorgqr.f dorgr2.f
dorgrq.f dorgtr.f dorm2l.f dorm2r.f
dormbr.f dormhr.f dorml2.f dormlq.f dormql.f dormqr.f dormr2.f
dormr3.f dormrq.f dormrz.f dormtr.f dpbcon.f dpbequ.f dpbrfs.f
dpbstf.f dpbsv.f dpbsvx.f
dpbtf2.f dpbtrf.f dpbtrs.f dpocon.f dpoequ.f dporfs.f dposv.f
dposvx.f dpotrs.f dpstrf.f dpstf2.f
dppcon.f dppequ.f
dpprfs.f dppsv.f dppsvx.f dpptrf.f dpptri.f dpptrs.f dptcon.f
dpteqr.f dptrfs.f dptsv.f dptsvx.f dpttrs.f dptts2.f drscl.f
dsbev.f dsbevd.f dsbevx.f dsbgst.f dsbgv.f dsbgvd.f dsbgvx.f
dsbtrd.f dspcon.f dspev.f dspevd.f dspevx.f dspgst.f
dspgv.f dspgvd.f dspgvx.f dsprfs.f dspsv.f dspsvx.f dsptrd.f
dsptrf.f dsptri.f dsptrs.f dstegr.f dstein.f dstev.f dstevd.f dstevr.f
dstevx.f
dsycon.f dsyev.f dsyevd.f dsyevr.f
dsyevx.f dsygs2.f dsygst.f dsygv.f dsygvd.f dsygvx.f dsyrfs.f
dsysv.f dsysvx.f
dsytd2.f dsytf2.f dsytrd.f dsytrf.f dsytri.f dsytri2.f dsytri2x.f
dsyswapr.f dsytrs.f dsytrs2.f dsyconv.f
dsytf2_rook.f dsytrf_rook.f dsytrs_rook.f
dsytri_rook.f dsycon_rook.f dsysv_rook.f
dtbcon.f dtbrfs.f dtbtrs.f dtgevc.f dtgex2.f dtgexc.f dtgsen.f
dtgsja.f dtgsna.f dtgsy2.f dtgsyl.f dtpcon.f dtprfs.f dtptri.f
dtptrs.f
dtrcon.f dtrevc.f dtrexc.f dtrrfs.f dtrsen.f dtrsna.f dtrsyl.f
dtrtrs.f DEPRECATED/dtzrqf.f dtzrzf.f dstemr.f
dsgesv.f dsposv.f dlag2s.f slag2d.f dlat2s.f
dlansf.f dpftrf.f dpftri.f dpftrs.f dsfrk.f dtfsm.f dtftri.f dtfttp.f
dtfttr.f dtpttf.f dtpttr.f dtrttf.f dtrttp.f
dgejsv.f dgesvj.f dgsvj0.f dgsvj1.f
dgeequb.f dsyequb.f dpoequb.f dgbequb.f
dbbcsd.f dlapmr.f dorbdb.f dorbdb1.f dorbdb2.f dorbdb3.f dorbdb4.f
dorbdb5.f dorbdb6.f dorcsd.f dorcsd2by1.f
dgeqrt.f dgeqrt2.f dgeqrt3.f dgemqrt.f
dtpqrt.f dtpqrt2.f dtpmqrt.f dtprfb.f dpotri.f
)
set(ZLASRC
zbdsqr.f zgbbrd.f zgbcon.f zgbequ.f zgbrfs.f zgbsv.f zgbsvx.f
zgbtf2.f zgbtrf.f zgbtrs.f zgebak.f zgebal.f zgebd2.f zgebrd.f
zgecon.f zgeequ.f zgees.f zgeesx.f zgeev.f zgeevx.f
DEPRECATED/zgegs.f DEPRECATED/zgegv.f zgehd2.f zgehrd.f zgelq2.f zgelqf.f
zgels.f zgelsd.f zgelss.f DEPRECATED/zgelsx.f zgelsy.f zgeql2.f zgeqlf.f zgeqp3.f
DEPRECATED/zgeqpf.f zgeqr2.f zgeqr2p.f zgeqrf.f zgeqrfp.f zgerfs.f zgerq2.f zgerqf.f
zgesc2.f zgesdd.f zgesvd.f zgesvx.f zgetc2.f
zgetri.f
zggbak.f zggbal.f zgges.f zggesx.f zggev.f zggevx.f zggglm.f
zgghrd.f zgglse.f zggqrf.f zggrqf.f
DEPRECATED/zggsvd.f DEPRECATED/zggsvp.f
zgtcon.f zgtrfs.f zgtsv.f zgtsvx.f zgttrf.f zgttrs.f zgtts2.f zhbev.f
zhbevd.f zhbevx.f zhbgst.f zhbgv.f zhbgvd.f zhbgvx.f zhbtrd.f
zhecon.f zheev.f zheevd.f zheevr.f zheevx.f zhegs2.f zhegst.f
zhegv.f zhegvd.f zhegvx.f zherfs.f zhesv.f zhesvx.f zhetd2.f
zhetf2.f zhetrd.f
zhetrf.f zhetri.f zhetri2.f zhetri2x.f zheswapr.f
zhetrs.f zhetrs2.f
zhetf2_rook.f zhetrf_rook.f zhetri_rook.f zhetrs_rook.f zhecon_rook.f zhesv_rook.f
zhgeqz.f zhpcon.f zhpev.f zhpevd.f
zhpevx.f zhpgst.f zhpgv.f zhpgvd.f zhpgvx.f zhprfs.f zhpsv.f
zhpsvx.f
zhptrd.f zhptrf.f zhptri.f zhptrs.f zhsein.f zhseqr.f zlabrd.f
zlacgv.f zlacon.f zlacn2.f zlacp2.f zlacpy.f zlacrm.f zlacrt.f zladiv.f
zlaed0.f zlaed7.f zlaed8.f
zlaein.f zlaesy.f zlaev2.f zlags2.f zlagtm.f
zlahef.f zlahef_rook.f zlahqr.f
DEPRECATED/zlahrd.f zlahr2.f zlaic1.f zlals0.f zlalsa.f zlalsd.f zlangb.f zlange.f
zlangt.f zlanhb.f
zlanhe.f
zlanhp.f zlanhs.f zlanht.f zlansb.f zlansp.f zlansy.f zlantb.f
zlantp.f zlantr.f zlapll.f zlapmt.f zlaqgb.f zlaqge.f
zlaqhb.f zlaqhe.f zlaqhp.f zlaqp2.f zlaqps.f zlaqsb.f
zlaqr0.f zlaqr1.f zlaqr2.f zlaqr3.f zlaqr4.f zlaqr5.f
zlaqsp.f zlaqsy.f zlar1v.f zlar2v.f ilazlr.f ilazlc.f
zlarcm.f zlarf.f zlarfb.f
zlarfg.f zlarft.f zlarfgp.f
zlarfx.f zlargv.f zlarnv.f zlarrv.f zlartg.f zlartv.f
zlarz.f zlarzb.f zlarzt.f zlascl.f zlaset.f zlasr.f
zlassq.f zlasyf.f zlasyf_rook.f
zlatbs.f zlatdf.f zlatps.f zlatrd.f zlatrs.f zlatrz.f DEPRECATED/zlatzm.f
zpbcon.f zpbequ.f zpbrfs.f zpbstf.f zpbsv.f
zpbsvx.f zpbtf2.f zpbtrf.f zpbtrs.f zpocon.f zpoequ.f zporfs.f
zposv.f zposvx.f zpotrs.f zpstrf.f zpstf2.f
zppcon.f zppequ.f zpprfs.f zppsv.f zppsvx.f zpptrf.f zpptri.f zpptrs.f
zptcon.f zpteqr.f zptrfs.f zptsv.f zptsvx.f zpttrf.f zpttrs.f zptts2.f
zrot.f zspcon.f zsprfs.f zspsv.f
zspsvx.f zsptrf.f zsptri.f zsptrs.f zdrscl.f zstedc.f
zstegr.f zstein.f zsteqr.f
zsycon.f
zsyrfs.f zsysv.f zsysvx.f zsytf2.f zsytrf.f zsytri.f zsytri2.f zsytri2x.f
zsyswapr.f zsytrs.f zsytrs2.f zsyconv.f
zsytf2_rook.f zsytrf_rook.f zsytrs_rook.f
zsytri_rook.f zsycon_rook.f zsysv_rook.f
ztbcon.f ztbrfs.f ztbtrs.f ztgevc.f ztgex2.f
ztgexc.f ztgsen.f ztgsja.f ztgsna.f ztgsy2.f ztgsyl.f ztpcon.f
ztprfs.f ztptri.f
ztptrs.f ztrcon.f ztrevc.f ztrexc.f ztrrfs.f ztrsen.f ztrsna.f
ztrsyl.f ztrtrs.f DEPRECATED/ztzrqf.f ztzrzf.f zung2l.f
zung2r.f zungbr.f zunghr.f zungl2.f zunglq.f zungql.f zungqr.f zungr2.f
zungrq.f zungtr.f zunm2l.f zunm2r.f zunmbr.f zunmhr.f zunml2.f
zunmlq.f zunmql.f zunmqr.f zunmr2.f zunmr3.f zunmrq.f zunmrz.f
zunmtr.f zupgtr.f
zupmtr.f izmax1.f dzsum1.f zstemr.f
zcgesv.f zcposv.f zlag2c.f clag2z.f zlat2c.f
zhfrk.f ztfttp.f zlanhf.f zpftrf.f zpftri.f zpftrs.f ztfsm.f ztftri.f
ztfttr.f ztpttf.f ztpttr.f ztrttf.f ztrttp.f
zgeequb.f zgbequb.f zsyequb.f zpoequb.f zheequb.f
zbbcsd.f zlapmr.f zunbdb.f zunbdb1.f zunbdb2.f zunbdb3.f zunbdb4.f
zunbdb5.f zunbdb6.f zuncsd.f zuncsd2by1.f
zgeqrt.f zgeqrt2.f zgeqrt3.f zgemqrt.f
ztpqrt.f ztpqrt2.f ztpmqrt.f ztprfb.f zpotri.f
)
set(LA_REL_SRC ${ALLAUX})
if (BUILD_SINGLE)
list(APPEND LA_REL_SRC ${SLASRC} ${DSLASRC} ${SCLAUX})
endif ()
if (BUILD_DOUBLE)
list(APPEND LA_REL_SRC ${DLASRC} ${DSLASRC} ${DZLAUX})
endif ()
if (BUILD_COMPLEX)
list(APPEND LA_REL_SRC ${CLASRC} ${ZCLASRC} ${SCLAUX})
endif ()
if (BUILD_COMPLEX16)
list(APPEND LA_REL_SRC ${ZLASRC} ${ZCLASRC} ${DZLAUX})
endif ()
# add lapack-netlib folder to the sources
set(LA_SOURCES "")
foreach (LA_FILE ${LA_REL_SRC})
list(APPEND LA_SOURCES "${NETLIB_LAPACK_DIR}/SRC/${LA_FILE}")
endforeach ()
set_source_files_properties(${LA_SOURCES} PROPERTIES COMPILE_FLAGS "${LAPACK_FFLAGS}")

File diff suppressed because it is too large Load Diff

View File

@@ -1,104 +0,0 @@
##
## Author: Hank Anderson <hank@statease.com>
## Description: Ported from portion of OpenBLAS/Makefile.system
## Detects the OS and sets appropriate variables.
if (${CMAKE_SYSTEM_NAME} STREQUAL "Darwin")
set(ENV{MACOSX_DEPLOYMENT_TARGET} "10.2") # TODO: should be exported as an env var
set(MD5SUM "md5 -r")
endif ()
if (${CMAKE_SYSTEM_NAME} STREQUAL "FreeBSD")
set(MD5SUM "md5 -r")
endif ()
if (${CMAKE_SYSTEM_NAME} STREQUAL "NetBSD")
set(MD5SUM "md5 -n")
endif ()
if (${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
set(EXTRALIB "${EXTRALIB} -lm")
set(NO_EXPRECISION 1)
endif ()
if (${CMAKE_SYSTEM_NAME} STREQUAL "AIX")
set(EXTRALIB "${EXTRALIB} -lm")
endif ()
# TODO: this is probably meant for mingw, not other windows compilers
if (${CMAKE_SYSTEM_NAME} STREQUAL "Windows")
set(NEED_PIC 0)
set(NO_EXPRECISION 1)
set(EXTRALIB "${EXTRALIB} -defaultlib:advapi32")
# probably not going to use these
set(SUFFIX "obj")
set(PSUFFIX "pobj")
set(LIBSUFFIX "a")
if (${CMAKE_C_COMPILER_ID} STREQUAL "Clang")
set(CCOMMON_OPT "${CCOMMON_OPT} -DMS_ABI")
endif ()
if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU")
# Test for supporting MS_ABI
# removed string parsing in favor of CMake's version comparison -hpa
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
if (${GCC_VERSION} VERSION_GREATER 4.7 OR ${GCC_VERSION} VERSION_EQUAL 4.7)
# GCC Version >=4.7
# It is compatible with MSVC ABI.
set(CCOMMON_OPT "${CCOMMON_OPT} -DMS_ABI")
endif ()
endif ()
# Ensure the correct stack alignment on Win32
# http://permalink.gmane.org/gmane.comp.lib.openblas.general/97
if (${ARCH} STREQUAL "x86")
if (NOT MSVC AND NOT ${CMAKE_C_COMPILER_ID} STREQUAL "Clang")
set(CCOMMON_OPT "${CCOMMON_OPT} -mincoming-stack-boundary=2")
endif ()
set(FCOMMON_OPT "${FCOMMON_OPT} -mincoming-stack-boundary=2")
endif ()
endif ()
if (${CMAKE_SYSTEM_NAME} STREQUAL "Interix")
set(NEED_PIC 0)
set(NO_EXPRECISION 1)
set(INTERIX_TOOL_DIR STREQUAL "/opt/gcc.3.3/i586-pc-interix3/bin")
endif ()
if (CYGWIN)
set(NEED_PIC 0)
set(NO_EXPRECISION 1)
endif ()
if (NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Windows" AND NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Interix")
if (SMP)
set(EXTRALIB "${EXTRALIB} -lpthread")
endif ()
endif ()
if (QUAD_PRECISION)
set(CCOMMON_OPT "${CCOMMON_OPT} -DQUAD_PRECISION")
set(NO_EXPRECISION 1)
endif ()
if (${ARCH} STREQUAL "x86")
set(NO_EXPRECISION 1)
endif ()
if (UTEST_CHECK)
set(CCOMMON_OPT "${CCOMMON_OPT} -DUTEST_CHECK")
set(SANITY_CHECK 1)
endif ()
if (SANITY_CHECK)
# TODO: need some way to get $(*F) (target filename)
set(CCOMMON_OPT "${CCOMMON_OPT} -DSANITY_CHECK -DREFNAME=$(*F)f${BU}")
endif ()

View File

@@ -1,113 +0,0 @@
##
## Author: Hank Anderson <hank@statease.com>
## Description: Ported from OpenBLAS/Makefile.prebuild
## This is triggered by system.cmake and runs before any of the code is built.
## Creates config.h and Makefile.conf by first running the c_check perl script (which creates those files).
## Next it runs f_check and appends some fortran information to the files.
## Finally it runs getarch and getarch_2nd for even more environment information.
# CMake vars set by this file:
# CORE
# LIBCORE
# NUM_CORES
# HAVE_MMX
# HAVE_SSE
# HAVE_SSE2
# HAVE_SSE3
# MAKE
# SGEMM_UNROLL_M
# SGEMM_UNROLL_N
# DGEMM_UNROLL_M
# DGEMM_UNROLL_M
# QGEMM_UNROLL_N
# QGEMM_UNROLL_N
# CGEMM_UNROLL_M
# CGEMM_UNROLL_M
# ZGEMM_UNROLL_N
# ZGEMM_UNROLL_N
# XGEMM_UNROLL_M
# XGEMM_UNROLL_N
# CGEMM3M_UNROLL_M
# CGEMM3M_UNROLL_N
# ZGEMM3M_UNROLL_M
# ZGEMM3M_UNROLL_M
# XGEMM3M_UNROLL_N
# XGEMM3M_UNROLL_N
# CPUIDEMU = ../../cpuid/table.o
if (DEFINED CPUIDEMU)
set(EXFLAGS "-DCPUIDEMU -DVENDOR=99")
endif ()
if (DEFINED TARGET_CORE)
# set the C flags for just this file
set(GETARCH2_FLAGS "-DBUILD_KERNEL")
set(TARGET_MAKE "Makefile_kernel.conf")
set(TARGET_CONF "config_kernel.h")
else()
set(TARGET_MAKE "Makefile.conf")
set(TARGET_CONF "config.h")
endif ()
include("${CMAKE_SOURCE_DIR}/cmake/c_check.cmake")
if (NOT NOFORTRAN)
include("${CMAKE_SOURCE_DIR}/cmake/f_check.cmake")
endif ()
# compile getarch
set(GETARCH_SRC
${CMAKE_SOURCE_DIR}/getarch.c
${CPUIDEMO}
)
if (NOT MSVC)
list(APPEND GETARCH_SRC ${CMAKE_SOURCE_DIR}/cpuid.S)
endif ()
if (MSVC)
#Use generic for MSVC now
set(GETARCH_FLAGS ${GETARCH_FLAGS} -DFORCE_GENERIC)
endif()
set(GETARCH_DIR "${PROJECT_BINARY_DIR}/getarch_build")
set(GETARCH_BIN "getarch${CMAKE_EXECUTABLE_SUFFIX}")
file(MAKE_DIRECTORY ${GETARCH_DIR})
try_compile(GETARCH_RESULT ${GETARCH_DIR}
SOURCES ${GETARCH_SRC}
COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} -I${CMAKE_SOURCE_DIR}
OUTPUT_VARIABLE GETARCH_LOG
COPY_FILE ${PROJECT_BINARY_DIR}/${GETARCH_BIN}
)
message(STATUS "Running getarch")
# use the cmake binary w/ the -E param to run a shell command in a cross-platform way
execute_process(COMMAND ${PROJECT_BINARY_DIR}/${GETARCH_BIN} 0 OUTPUT_VARIABLE GETARCH_MAKE_OUT)
execute_process(COMMAND ${PROJECT_BINARY_DIR}/${GETARCH_BIN} 1 OUTPUT_VARIABLE GETARCH_CONF_OUT)
message(STATUS "GETARCH results:\n${GETARCH_MAKE_OUT}")
# append config data from getarch to the TARGET file and read in CMake vars
file(APPEND ${TARGET_CONF} ${GETARCH_CONF_OUT})
ParseGetArchVars(${GETARCH_MAKE_OUT})
set(GETARCH2_DIR "${PROJECT_BINARY_DIR}/getarch2_build")
set(GETARCH2_BIN "getarch_2nd${CMAKE_EXECUTABLE_SUFFIX}")
file(MAKE_DIRECTORY ${GETARCH2_DIR})
try_compile(GETARCH2_RESULT ${GETARCH2_DIR}
SOURCES ${CMAKE_SOURCE_DIR}/getarch_2nd.c
COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} ${GETARCH2_FLAGS} -I${CMAKE_SOURCE_DIR}
OUTPUT_VARIABLE GETARCH2_LOG
COPY_FILE ${PROJECT_BINARY_DIR}/${GETARCH2_BIN}
)
# use the cmake binary w/ the -E param to run a shell command in a cross-platform way
execute_process(COMMAND ${PROJECT_BINARY_DIR}/${GETARCH2_BIN} 0 OUTPUT_VARIABLE GETARCH2_MAKE_OUT)
execute_process(COMMAND ${PROJECT_BINARY_DIR}/${GETARCH2_BIN} 1 OUTPUT_VARIABLE GETARCH2_CONF_OUT)
# append config data from getarch_2nd to the TARGET file and read in CMake vars
file(APPEND ${TARGET_CONF} ${GETARCH2_CONF_OUT})
ParseGetArchVars(${GETARCH2_MAKE_OUT})

View File

@@ -1,552 +0,0 @@
##
## Author: Hank Anderson <hank@statease.com>
## Description: Ported from OpenBLAS/Makefile.system
##
set(NETLIB_LAPACK_DIR "${CMAKE_SOURCE_DIR}/lapack-netlib")
# TODO: Makefile.system detects Darwin (mac) and switches to clang here -hpa
# http://stackoverflow.com/questions/714100/os-detecting-makefile
# TODO: Makefile.system sets HOSTCC = $(CC) here if not already set -hpa
# TARGET_CORE will override TARGET which is used in DYNAMIC_ARCH=1.
if (DEFINED TARGET_CORE)
set(TARGET ${TARGET_CORE})
endif ()
# Force fallbacks for 32bit
if (DEFINED BINARY AND DEFINED TARGET AND BINARY EQUAL 32)
message(STATUS "Compiling a ${BINARY}-bit binary.")
set(NO_AVX 1)
if (${TARGET} STREQUAL "HASWELL" OR ${TARGET} STREQUAL "SANDYBRIDGE")
set(TARGET "NEHALEM")
endif ()
if (${TARGET} STREQUAL "BULLDOZER" OR ${TARGET} STREQUAL "PILEDRIVER")
set(TARGET "BARCELONA")
endif ()
endif ()
if (DEFINED TARGET)
message(STATUS "Targetting the ${TARGET} architecture.")
set(GETARCH_FLAGS "-DFORCE_${TARGET}")
endif ()
if (INTERFACE64)
message(STATUS "Using 64-bit integers.")
set(GETARCH_FLAGS "${GETARCH_FLAGS} -DUSE64BITINT")
endif ()
if (NOT DEFINED GEMM_MULTITHREAD_THRESHOLD)
set(GEMM_MULTITHREAD_THRESHOLD 4)
endif ()
message(STATUS "GEMM multithread threshold set to ${GEMM_MULTITHREAD_THRESHOLD}.")
set(GETARCH_FLAGS "${GETARCH_FLAGS} -DGEMM_MULTITHREAD_THRESHOLD=${GEMM_MULTITHREAD_THRESHOLD}")
if (NO_AVX)
message(STATUS "Disabling Advanced Vector Extensions (AVX).")
set(GETARCH_FLAGS "${GETARCH_FLAGS} -DNO_AVX")
endif ()
if (NO_AVX2)
message(STATUS "Disabling Advanced Vector Extensions 2 (AVX2).")
set(GETARCH_FLAGS "${GETARCH_FLAGS} -DNO_AVX2")
endif ()
if (CMAKE_BUILD_TYPE STREQUAL Debug)
set(GETARCH_FLAGS "${GETARCH_FLAGS} -g")
endif ()
# TODO: let CMake handle this? -hpa
#if (${QUIET_MAKE})
# set(MAKE "${MAKE} -s")
#endif()
if (NOT DEFINED NO_PARALLEL_MAKE)
set(NO_PARALLEL_MAKE 0)
endif ()
set(GETARCH_FLAGS "${GETARCH_FLAGS} -DNO_PARALLEL_MAKE=${NO_PARALLEL_MAKE}")
if (CMAKE_CXX_COMPILER STREQUAL loongcc)
set(GETARCH_FLAGS "${GETARCH_FLAGS} -static")
endif ()
#if don't use Fortran, it will only compile CBLAS.
if (ONLY_CBLAS)
set(NO_LAPACK 1)
else ()
set(ONLY_CBLAS 0)
endif ()
include("${CMAKE_SOURCE_DIR}/cmake/prebuild.cmake")
if (NOT DEFINED NUM_THREADS)
set(NUM_THREADS ${NUM_CORES})
endif ()
if (${NUM_THREADS} EQUAL 1)
set(USE_THREAD 0)
endif ()
if (DEFINED USE_THREAD)
if (NOT ${USE_THREAD})
unset(SMP)
else ()
set(SMP 1)
endif ()
else ()
# N.B. this is NUM_THREAD in Makefile.system which is probably a bug -hpa
if (${NUM_THREADS} EQUAL 1)
unset(SMP)
else ()
set(SMP 1)
endif ()
endif ()
if (${SMP})
message(STATUS "SMP enabled.")
endif ()
if (NOT DEFINED NEED_PIC)
set(NEED_PIC 1)
endif ()
# TODO: I think CMake should be handling all this stuff -hpa
unset(ARFLAGS)
set(CPP "${COMPILER} -E")
set(AR "${CROSS_SUFFIX}ar")
set(AS "${CROSS_SUFFIX}as")
set(LD "${CROSS_SUFFIX}ld")
set(RANLIB "${CROSS_SUFFIX}ranlib")
set(NM "${CROSS_SUFFIX}nm")
set(DLLWRAP "${CROSS_SUFFIX}dllwrap")
set(OBJCOPY "${CROSS_SUFFIX}objcopy")
set(OBJCONV "${CROSS_SUFFIX}objconv")
# OS dependent settings
include("${CMAKE_SOURCE_DIR}/cmake/os.cmake")
# Architecture dependent settings
include("${CMAKE_SOURCE_DIR}/cmake/arch.cmake")
# C Compiler dependent settings
include("${CMAKE_SOURCE_DIR}/cmake/cc.cmake")
if (NOT NOFORTRAN)
# Fortran Compiler dependent settings
include("${CMAKE_SOURCE_DIR}/cmake/fc.cmake")
endif ()
if (BINARY64)
if (INTERFACE64)
# CCOMMON_OPT += -DUSE64BITINT
endif ()
endif ()
if (NEED_PIC)
if (${CMAKE_C_COMPILER} STREQUAL "IBM")
set(CCOMMON_OPT "${CCOMMON_OPT} -qpic=large")
else ()
set(CCOMMON_OPT "${CCOMMON_OPT} -fPIC")
endif ()
if (${F_COMPILER} STREQUAL "SUN")
set(FCOMMON_OPT "${FCOMMON_OPT} -pic")
else ()
set(FCOMMON_OPT "${FCOMMON_OPT} -fPIC")
endif ()
endif ()
if (DYNAMIC_ARCH)
set(CCOMMON_OPT "${CCOMMON_OPT} -DDYNAMIC_ARCH")
endif ()
if (NO_LAPACK)
set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_LAPACK")
#Disable LAPACK C interface
set(NO_LAPACKE 1)
endif ()
if (NO_LAPACKE)
set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_LAPACKE")
endif ()
if (NO_AVX)
set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_AVX")
endif ()
if (${ARCH} STREQUAL "x86")
set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_AVX")
endif ()
if (NO_AVX2)
set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_AVX2")
endif ()
if (SMP)
set(CCOMMON_OPT "${CCOMMON_OPT} -DSMP_SERVER")
if (${ARCH} STREQUAL "mips64")
if (NOT ${CORE} STREQUAL "LOONGSON3B")
set(USE_SIMPLE_THREADED_LEVEL3 1)
endif ()
endif ()
if (USE_OPENMP)
# USE_SIMPLE_THREADED_LEVEL3 = 1
# NO_AFFINITY = 1
set(CCOMMON_OPT "${CCOMMON_OPT} -DUSE_OPENMP")
endif ()
if (BIGNUMA)
set(CCOMMON_OPT "${CCOMMON_OPT} -DBIGNUMA")
endif ()
endif ()
if (NO_WARMUP)
set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_WARMUP")
endif ()
if (CONSISTENT_FPCSR)
set(CCOMMON_OPT "${CCOMMON_OPT} -DCONSISTENT_FPCSR")
endif ()
# Only for development
# set(CCOMMON_OPT "${CCOMMON_OPT} -DPARAMTEST")
# set(CCOMMON_OPT "${CCOMMON_OPT} -DPREFETCHTEST")
# set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_SWITCHING")
# set(USE_PAPI 1)
if (USE_PAPI)
set(CCOMMON_OPT "${CCOMMON_OPT} -DUSE_PAPI")
set(EXTRALIB "${EXTRALIB} -lpapi -lperfctr")
endif ()
if (DYNAMIC_THREADS)
set(CCOMMON_OPT "${CCOMMON_OPT} -DDYNAMIC_THREADS")
endif ()
set(CCOMMON_OPT "${CCOMMON_OPT} -DMAX_CPU_NUMBER=${NUM_THREADS}")
if (USE_SIMPLE_THREADED_LEVEL3)
set(CCOMMON_OPT "${CCOMMON_OPT} -DUSE_SIMPLE_THREADED_LEVEL3")
endif ()
if (DEFINED LIBNAMESUFFIX)
set(LIBPREFIX "libopenblas_${LIBNAMESUFFIX}")
else ()
set(LIBPREFIX "libopenblas")
endif ()
if (NOT DEFINED SYMBOLPREFIX)
set(SYMBOLPREFIX "")
endif ()
if (NOT DEFINED SYMBOLSUFFIX)
set(SYMBOLSUFFIX "")
endif ()
set(KERNELDIR "${CMAKE_SOURCE_DIR}/kernel/${ARCH}")
# TODO: nead to convert these Makefiles
# include ${CMAKE_SOURCE_DIR}/cmake/${ARCH}.cmake
if (${CORE} STREQUAL "PPC440")
set(CCOMMON_OPT "${CCOMMON_OPT} -DALLOC_QALLOC")
endif ()
if (${CORE} STREQUAL "PPC440FP2")
set(STATIC_ALLOCATION 1)
endif ()
if (NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
set(NO_AFFINITY 1)
endif ()
if (NOT ${ARCH} STREQUAL "x86_64" AND NOT ${ARCH} STREQUAL "x86" AND NOT ${CORE} STREQUAL "LOONGSON3B")
set(NO_AFFINITY 1)
endif ()
if (NO_AFFINITY)
set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_AFFINITY")
endif ()
if (FUNCTION_PROFILE)
set(CCOMMON_OPT "${CCOMMON_OPT} -DFUNCTION_PROFILE")
endif ()
if (HUGETLB_ALLOCATION)
set(CCOMMON_OPT "${CCOMMON_OPT} -DALLOC_HUGETLB")
endif ()
if (DEFINED HUGETLBFILE_ALLOCATION)
set(CCOMMON_OPT "${CCOMMON_OPT} -DALLOC_HUGETLBFILE -DHUGETLB_FILE_NAME=${HUGETLBFILE_ALLOCATION})")
endif ()
if (STATIC_ALLOCATION)
set(CCOMMON_OPT "${CCOMMON_OPT} -DALLOC_STATIC")
endif ()
if (DEVICEDRIVER_ALLOCATION)
set(CCOMMON_OPT "${CCOMMON_OPT} -DALLOC_DEVICEDRIVER -DDEVICEDRIVER_NAME=\"/dev/mapper\"")
endif ()
if (MIXED_MEMORY_ALLOCATION)
set(CCOMMON_OPT "${CCOMMON_OPT} -DMIXED_MEMORY_ALLOCATION")
endif ()
if (${CMAKE_SYSTEM_NAME} STREQUAL "SunOS")
set(TAR gtar)
set(PATCH gpatch)
set(GREP ggrep)
else ()
set(TAR tar)
set(PATCH patch)
set(GREP grep)
endif ()
if (NOT DEFINED MD5SUM)
set(MD5SUM md5sum)
endif ()
set(AWK awk)
set(REVISION "-r${OpenBLAS_VERSION}")
set(MAJOR_VERSION ${OpenBLAS_MAJOR_VERSION})
if (DEBUG)
set(COMMON_OPT "${COMMON_OPT} -g")
endif ()
if (NOT DEFINED COMMON_OPT)
set(COMMON_OPT "-O2")
endif ()
#For x86 32-bit
if (DEFINED BINARY AND BINARY EQUAL 32)
if (NOT MSVC)
set(COMMON_OPT "${COMMON_OPT} -m32")
endif()
endif()
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${COMMON_OPT} ${CCOMMON_OPT}")
if(NOT MSVC)
set(CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} ${COMMON_OPT} ${CCOMMON_OPT}")
endif()
# TODO: not sure what PFLAGS is -hpa
set(PFLAGS "${PFLAGS} ${COMMON_OPT} ${CCOMMON_OPT} -I${TOPDIR} -DPROFILE ${COMMON_PROF}")
set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} ${COMMON_OPT} ${FCOMMON_OPT}")
# TODO: not sure what FPFLAGS is -hpa
set(FPFLAGS "${FPFLAGS} ${COMMON_OPT} ${FCOMMON_OPT} ${COMMON_PROF}")
#For LAPACK Fortran codes.
set(LAPACK_FFLAGS "${LAPACK_FFLAGS} ${CMAKE_Fortran_FLAGS}")
set(LAPACK_FPFLAGS "${LAPACK_FPFLAGS} ${FPFLAGS}")
#Disable -fopenmp for LAPACK Fortran codes on Windows.
if (${CMAKE_SYSTEM_NAME} STREQUAL "Windows")
set(FILTER_FLAGS "-fopenmp;-mp;-openmp;-xopenmp=parralel")
foreach (FILTER_FLAG ${FILTER_FLAGS})
string(REPLACE ${FILTER_FLAG} "" LAPACK_FFLAGS ${LAPACK_FFLAGS})
string(REPLACE ${FILTER_FLAG} "" LAPACK_FPFLAGS ${LAPACK_FPFLAGS})
endforeach ()
endif ()
if ("${F_COMPILER}" STREQUAL "GFORTRAN")
# lapack-netlib is rife with uninitialized warnings -hpa
set(LAPACK_FFLAGS "${LAPACK_FFLAGS} -Wno-maybe-uninitialized")
endif ()
set(LAPACK_CFLAGS "${CMAKE_C_CFLAGS} -DHAVE_LAPACK_CONFIG_H")
if (INTERFACE64)
set(LAPACK_CFLAGS "${LAPACK_CFLAGS} -DLAPACK_ILP64")
endif ()
if (${CMAKE_SYSTEM_NAME} STREQUAL "Windows")
set(LAPACK_CFLAGS "${LAPACK_CFLAGS} -DOPENBLAS_OS_WINDOWS")
endif ()
if (${CMAKE_C_COMPILER} STREQUAL "LSB" OR ${CMAKE_SYSTEM_NAME} STREQUAL "Windows")
set(LAPACK_CFLAGS "${LAPACK_CFLAGS} -DLAPACK_COMPLEX_STRUCTURE")
endif ()
if (NOT DEFINED SUFFIX)
set(SUFFIX o)
endif ()
if (NOT DEFINED PSUFFIX)
set(PSUFFIX po)
endif ()
if (NOT DEFINED LIBSUFFIX)
set(LIBSUFFIX a)
endif ()
if (DYNAMIC_ARCH)
if (DEFINED SMP)
set(LIBNAME "${LIBPREFIX}p${REVISION}.${LIBSUFFIX}")
set(LIBNAME_P "${LIBPREFIX}p${REVISION}_p.${LIBSUFFIX}")
else ()
set(LIBNAME "${LIBPREFIX}${REVISION}.${LIBSUFFIX}")
set(LIBNAME_P "${LIBPREFIX}${REVISION}_p.${LIBSUFFIX}")
endif ()
else ()
if (DEFINED SMP)
set(LIBNAME "${LIBPREFIX}_${LIBCORE}p${REVISION}.${LIBSUFFIX}")
set(LIBNAME_P "${LIBPREFIX}_${LIBCORE}p${REVISION}_p.${LIBSUFFIX}")
else ()
set(LIBNAME "${LIBPREFIX}_${LIBCORE}${REVISION}.${LIBSUFFIX}")
set(LIBNAME_P "${LIBPREFIX}_${LIBCORE}${REVISION}_p.${LIBSUFFIX}")
endif ()
endif ()
set(LIBDLLNAME "${LIBPREFIX}.dll")
set(LIBSONAME "${LIBNAME}.${LIBSUFFIX}.so")
set(LIBDYNNAME "${LIBNAME}.${LIBSUFFIX}.dylib")
set(LIBDEFNAME "${LIBNAME}.${LIBSUFFIX}.def")
set(LIBEXPNAME "${LIBNAME}.${LIBSUFFIX}.exp")
set(LIBZIPNAME "${LIBNAME}.${LIBSUFFIX}.zip")
set(LIBS "${CMAKE_SOURCE_DIR}/${LIBNAME}")
set(LIBS_P "${CMAKE_SOURCE_DIR}/${LIBNAME_P}")
set(LIB_COMPONENTS BLAS)
if (NOT NO_CBLAS)
set(LIB_COMPONENTS "${LIB_COMPONENTS} CBLAS")
endif ()
if (NOT NO_LAPACK)
set(LIB_COMPONENTS "${LIB_COMPONENTS} LAPACK")
if (NOT NO_LAPACKE)
set(LIB_COMPONENTS "${LIB_COMPONENTS} LAPACKE")
endif ()
endif ()
if (ONLY_CBLAS)
set(LIB_COMPONENTS CBLAS)
endif ()
# For GEMM3M
set(USE_GEMM3M 0)
if (DEFINED ARCH)
if (${ARCH} STREQUAL "x86" OR ${ARCH} STREQUAL "x86_64" OR ${ARCH} STREQUAL "ia64" OR ${ARCH} STREQUAL "MIPS")
set(USE_GEMM3M 1)
endif ()
if (${CORE} STREQUAL "generic")
set(USE_GEMM3M 0)
endif ()
endif ()
#export OSNAME
#export ARCH
#export CORE
#export LIBCORE
#export PGCPATH
#export CONFIG
#export CC
#export FC
#export BU
#export FU
#export NEED2UNDERSCORES
#export USE_THREAD
#export NUM_THREADS
#export NUM_CORES
#export SMP
#export MAKEFILE_RULE
#export NEED_PIC
#export BINARY
#export BINARY32
#export BINARY64
#export F_COMPILER
#export C_COMPILER
#export USE_OPENMP
#export CROSS
#export CROSS_SUFFIX
#export NOFORTRAN
#export NO_FBLAS
#export EXTRALIB
#export CEXTRALIB
#export FEXTRALIB
#export HAVE_SSE
#export HAVE_SSE2
#export HAVE_SSE3
#export HAVE_SSSE3
#export HAVE_SSE4_1
#export HAVE_SSE4_2
#export HAVE_SSE4A
#export HAVE_SSE5
#export HAVE_AVX
#export HAVE_VFP
#export HAVE_VFPV3
#export HAVE_VFPV4
#export HAVE_NEON
#export KERNELDIR
#export FUNCTION_PROFILE
#export TARGET_CORE
#
#export SGEMM_UNROLL_M
#export SGEMM_UNROLL_N
#export DGEMM_UNROLL_M
#export DGEMM_UNROLL_N
#export QGEMM_UNROLL_M
#export QGEMM_UNROLL_N
#export CGEMM_UNROLL_M
#export CGEMM_UNROLL_N
#export ZGEMM_UNROLL_M
#export ZGEMM_UNROLL_N
#export XGEMM_UNROLL_M
#export XGEMM_UNROLL_N
#export CGEMM3M_UNROLL_M
#export CGEMM3M_UNROLL_N
#export ZGEMM3M_UNROLL_M
#export ZGEMM3M_UNROLL_N
#export XGEMM3M_UNROLL_M
#export XGEMM3M_UNROLL_N
#if (USE_CUDA)
# export CUDADIR
# export CUCC
# export CUFLAGS
# export CULIB
#endif
#.SUFFIXES: .$(PSUFFIX) .$(SUFFIX) .f
#
#.f.$(SUFFIX):
# $(FC) $(FFLAGS) -c $< -o $(@F)
#
#.f.$(PSUFFIX):
# $(FC) $(FPFLAGS) -pg -c $< -o $(@F)
# these are not cross-platform
#ifdef BINARY64
#PATHSCALEPATH = /opt/pathscale/lib/3.1
#PGIPATH = /opt/pgi/linux86-64/7.1-5/lib
#else
#PATHSCALEPATH = /opt/pathscale/lib/3.1/32
#PGIPATH = /opt/pgi/linux86/7.1-5/lib
#endif
#ACMLPATH = /opt/acml/4.3.0
#ifneq ($(OSNAME), Darwin)
#MKLPATH = /opt/intel/mkl/10.2.2.025/lib
#else
#MKLPATH = /Library/Frameworks/Intel_MKL.framework/Versions/10.0.1.014/lib
#endif
#ATLASPATH = /opt/atlas/3.9.17/opteron
#FLAMEPATH = $(HOME)/flame/lib
#ifneq ($(OSNAME), SunOS)
#SUNPATH = /opt/sunstudio12.1
#else
#SUNPATH = /opt/SUNWspro
#endif

View File

@@ -1,346 +0,0 @@
# Functions to help with the OpenBLAS build
# Reads string from getarch into CMake vars. Format of getarch vars is VARNAME=VALUE
function(ParseGetArchVars GETARCH_IN)
string(REGEX MATCHALL "[0-9_a-zA-Z]+=[0-9_a-zA-Z]+" GETARCH_RESULT_LIST "${GETARCH_IN}")
foreach (GETARCH_LINE ${GETARCH_RESULT_LIST})
# split the line into var and value, then assign the value to a CMake var
string(REGEX MATCHALL "[0-9_a-zA-Z]+" SPLIT_VAR "${GETARCH_LINE}")
list(GET SPLIT_VAR 0 VAR_NAME)
list(GET SPLIT_VAR 1 VAR_VALUE)
set(${VAR_NAME} ${VAR_VALUE} PARENT_SCOPE)
endforeach ()
endfunction ()
# Reads a Makefile into CMake vars.
macro(ParseMakefileVars MAKEFILE_IN)
message(STATUS "Reading vars from ${MAKEFILE_IN}...")
file(STRINGS ${MAKEFILE_IN} makefile_contents)
foreach (makefile_line ${makefile_contents})
string(REGEX MATCH "([0-9_a-zA-Z]+)[ \t]*=[ \t]*(.+)$" line_match "${makefile_line}")
if (NOT "${line_match}" STREQUAL "")
set(var_name ${CMAKE_MATCH_1})
set(var_value ${CMAKE_MATCH_2})
# check for Makefile variables in the string, e.g. $(TSUFFIX)
string(REGEX MATCHALL "\\$\\(([0-9_a-zA-Z]+)\\)" make_var_matches ${var_value})
foreach (make_var ${make_var_matches})
# strip out Makefile $() markup
string(REGEX REPLACE "\\$\\(([0-9_a-zA-Z]+)\\)" "\\1" make_var ${make_var})
# now replace the instance of the Makefile variable with the value of the CMake variable (note the double quote)
string(REPLACE "$(${make_var})" "${${make_var}}" var_value ${var_value})
endforeach ()
set(${var_name} ${var_value})
else ()
string(REGEX MATCH "include \\$\\(KERNELDIR\\)/(.+)$" line_match "${makefile_line}")
if (NOT "${line_match}" STREQUAL "")
ParseMakefileVars(${KERNELDIR}/${CMAKE_MATCH_1})
endif ()
endif ()
endforeach ()
endmacro ()
# Returns all combinations of the input list, as a list with colon-separated combinations
# E.g. input of A B C returns A B C A:B A:C B:C
# N.B. The input is meant to be a list, and to past a list to a function in CMake you must quote it (e.g. AllCombinations("${LIST_VAR}")).
# #param absent_codes codes to use when an element is absent from a combination. For example, if you have TRANS;UNIT;UPPER you may want the code to be NNL when nothing is present.
# @returns LIST_OUT a list of combinations
# CODES_OUT a list of codes corresponding to each combination, with N meaning the item is not present, and the first letter of the list item meaning it is presen
function(AllCombinations list_in absent_codes_in)
list(LENGTH list_in list_count)
set(num_combos 1)
# subtract 1 since we will iterate from 0 to num_combos
math(EXPR num_combos "(${num_combos} << ${list_count}) - 1")
set(LIST_OUT "")
set(CODES_OUT "")
foreach (c RANGE 0 ${num_combos})
set(current_combo "")
set(current_code "")
# this is a little ridiculous just to iterate through a list w/ indices
math(EXPR last_list_index "${list_count} - 1")
foreach (list_index RANGE 0 ${last_list_index})
math(EXPR bit "1 << ${list_index}")
math(EXPR combo_has_bit "${c} & ${bit}")
list(GET list_in ${list_index} list_elem)
if (combo_has_bit)
if (current_combo)
set(current_combo "${current_combo}:${list_elem}")
else ()
set(current_combo ${list_elem})
endif ()
string(SUBSTRING ${list_elem} 0 1 code_char)
else ()
list(GET absent_codes_in ${list_index} code_char)
endif ()
set(current_code "${current_code}${code_char}")
endforeach ()
if (current_combo STREQUAL "")
list(APPEND LIST_OUT " ") # Empty set is a valid combination, but CMake isn't appending the empty string for some reason, use a space
else ()
list(APPEND LIST_OUT ${current_combo})
endif ()
list(APPEND CODES_OUT ${current_code})
endforeach ()
set(LIST_OUT ${LIST_OUT} PARENT_SCOPE)
set(CODES_OUT ${CODES_OUT} PARENT_SCOPE)
endfunction ()
# generates object files for each of the sources, using the BLAS naming scheme to pass the funciton name as a preprocessor definition
# @param sources_in the source files to build from
# @param defines_in (optional) preprocessor definitions that will be applied to all objects
# @param name_in (optional) if this is set this name will be used instead of the filename. Use a * to indicate where the float character should go, if no star the character will be prepended.
# e.g. with DOUBLE set, "i*max" will generate the name "idmax", and "max" will be "dmax"
# @param replace_last_with replaces the last character in the filename with this string (e.g. symm_k should be symm_TU)
# @param append_with appends the filename with this string (e.g. trmm_R should be trmm_RTUU or some other combination of characters)
# @param no_float_type turns off the float type define for this build (e.g. SINGLE/DOUBLE/etc)
# @param complex_filename_scheme some routines have separate source files for complex and non-complex float types.
# 0 - compiles for all types
# 1 - compiles the sources for non-complex types only (SINGLE/DOUBLE)
# 2 - compiles for complex types only (COMPLEX/DOUBLE COMPLEX)
# 3 - compiles for all types, but changes source names for complex by prepending z (e.g. axpy.c becomes zaxpy.c)
# 4 - compiles for complex types only, but changes source names for complex by prepending z (e.g. hemv.c becomes zhemv.c)
# STRING - compiles only the given type (e.g. DOUBLE)
function(GenerateNamedObjects sources_in)
if (DEFINED ARGV1)
set(defines_in ${ARGV1})
endif ()
if (DEFINED ARGV2 AND NOT "${ARGV2}" STREQUAL "")
set(name_in ${ARGV2})
# strip off extension for kernel files that pass in the object name.
get_filename_component(name_in ${name_in} NAME_WE)
endif ()
if (DEFINED ARGV3)
set(use_cblas ${ARGV3})
else ()
set(use_cblas false)
endif ()
if (DEFINED ARGV4)
set(replace_last_with ${ARGV4})
endif ()
if (DEFINED ARGV5)
set(append_with ${ARGV5})
endif ()
if (DEFINED ARGV6)
set(no_float_type ${ARGV6})
else ()
set(no_float_type false)
endif ()
if (no_float_type)
set(float_list "DUMMY") # still need to loop once
else ()
set(float_list "${FLOAT_TYPES}")
endif ()
set(real_only false)
set(complex_only false)
set(mangle_complex_sources false)
if (DEFINED ARGV7 AND NOT "${ARGV7}" STREQUAL "")
if (${ARGV7} EQUAL 1)
set(real_only true)
elseif (${ARGV7} EQUAL 2)
set(complex_only true)
elseif (${ARGV7} EQUAL 3)
set(mangle_complex_sources true)
elseif (${ARGV7} EQUAL 4)
set(mangle_complex_sources true)
set(complex_only true)
elseif (NOT ${ARGV7} EQUAL 0)
set(float_list ${ARGV7})
endif ()
endif ()
if (complex_only)
list(REMOVE_ITEM float_list "SINGLE")
list(REMOVE_ITEM float_list "DOUBLE")
elseif (real_only)
list(REMOVE_ITEM float_list "COMPLEX")
list(REMOVE_ITEM float_list "ZCOMPLEX")
endif ()
set(float_char "")
set(OBJ_LIST_OUT "")
foreach (float_type ${float_list})
foreach (source_file ${sources_in})
if (NOT no_float_type)
string(SUBSTRING ${float_type} 0 1 float_char)
string(TOLOWER ${float_char} float_char)
endif ()
if (NOT name_in)
get_filename_component(source_name ${source_file} NAME_WE)
set(obj_name "${float_char}${source_name}")
else ()
# replace * with float_char
if (${name_in} MATCHES "\\*")
string(REPLACE "*" ${float_char} obj_name ${name_in})
else ()
set(obj_name "${float_char}${name_in}")
endif ()
endif ()
if (replace_last_with)
string(REGEX REPLACE ".$" ${replace_last_with} obj_name ${obj_name})
else ()
set(obj_name "${obj_name}${append_with}")
endif ()
# now add the object and set the defines
set(obj_defines ${defines_in})
if (use_cblas)
set(obj_name "cblas_${obj_name}")
list(APPEND obj_defines "CBLAS")
endif ()
list(APPEND obj_defines "ASMNAME=${FU}${obj_name};ASMFNAME=${FU}${obj_name}${BU};NAME=${obj_name}${BU};CNAME=${obj_name};CHAR_NAME=\"${obj_name}${BU}\";CHAR_CNAME=\"${obj_name}\"")
if (${float_type} STREQUAL "DOUBLE" OR ${float_type} STREQUAL "ZCOMPLEX")
list(APPEND obj_defines "DOUBLE")
endif ()
if (${float_type} STREQUAL "COMPLEX" OR ${float_type} STREQUAL "ZCOMPLEX")
list(APPEND obj_defines "COMPLEX")
if (mangle_complex_sources)
# add a z to the filename
get_filename_component(source_name ${source_file} NAME)
get_filename_component(source_dir ${source_file} DIRECTORY)
string(REPLACE ${source_name} "z${source_name}" source_file ${source_file})
endif ()
endif ()
if (VERBOSE_GEN)
message(STATUS "${obj_name}:${source_file}")
message(STATUS "${obj_defines}")
endif ()
# create a copy of the source to avoid duplicate obj filename problem with ar.exe
get_filename_component(source_extension ${source_file} EXT)
set(new_source_file "${CMAKE_CURRENT_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/${obj_name}${source_extension}")
if (IS_ABSOLUTE ${source_file})
set(old_source_file ${source_file})
else ()
set(old_source_file "${CMAKE_CURRENT_LIST_DIR}/${source_file}")
endif ()
string(REPLACE ";" "\n#define " define_source "${obj_defines}")
string(REPLACE "=" " " define_source "${define_source}")
file(WRITE ${new_source_file} "#define ${define_source}\n#include \"${old_source_file}\"")
list(APPEND SRC_LIST_OUT ${new_source_file})
endforeach ()
endforeach ()
list(APPEND OPENBLAS_SRC ${SRC_LIST_OUT})
set(OPENBLAS_SRC ${OPENBLAS_SRC} PARENT_SCOPE)
endfunction ()
# generates object files for each of the sources for each of the combinations of the preprocessor definitions passed in
# @param sources_in the source files to build from
# @param defines_in the preprocessor definitions that will be combined to create the object files
# @param all_defines_in (optional) preprocessor definitions that will be applied to all objects
# @param replace_scheme If 1, replace the "k" in the filename with the define combo letters. E.g. symm_k.c with TRANS and UNIT defined will be symm_TU.
# If 0, it will simply append the code, e.g. symm_L.c with TRANS and UNIT will be symm_LTU.
# If 2, it will append the code with an underscore, e.g. symm.c with TRANS and UNIT will be symm_TU.
# If 3, it will insert the code *around* the last character with an underscore, e.g. symm_L.c with TRANS and UNIT will be symm_TLU (required by BLAS level2 objects).
# If 4, it will insert the code before the last underscore. E.g. trtri_U_parallel with TRANS will be trtri_UT_parallel
# @param alternate_name replaces the source name as the object name (define codes are still appended)
# @param no_float_type turns off the float type define for this build (e.g. SINGLE/DOUBLE/etc)
# @param complex_filename_scheme see GenerateNamedObjects
function(GenerateCombinationObjects sources_in defines_in absent_codes_in all_defines_in replace_scheme)
set(alternate_name_in "")
if (DEFINED ARGV5)
set(alternate_name_in ${ARGV5})
endif ()
set(no_float_type false)
if (DEFINED ARGV6)
set(no_float_type ${ARGV6})
endif ()
set(complex_filename_scheme "")
if (DEFINED ARGV7)
set(complex_filename_scheme ${ARGV7})
endif ()
AllCombinations("${defines_in}" "${absent_codes_in}")
set(define_combos ${LIST_OUT})
set(define_codes ${CODES_OUT})
list(LENGTH define_combos num_combos)
math(EXPR num_combos "${num_combos} - 1")
foreach (c RANGE 0 ${num_combos})
list(GET define_combos ${c} define_combo)
list(GET define_codes ${c} define_code)
foreach (source_file ${sources_in})
set(alternate_name ${alternate_name_in})
# replace colon separated list with semicolons, this turns it into a CMake list that we can use foreach with
string(REPLACE ":" ";" define_combo ${define_combo})
# now add the object and set the defines
set(cur_defines ${define_combo})
if ("${cur_defines}" STREQUAL " ")
set(cur_defines ${all_defines_in})
else ()
list(APPEND cur_defines ${all_defines_in})
endif ()
set(replace_code "")
set(append_code "")
if (replace_scheme EQUAL 1)
set(replace_code ${define_code})
else ()
if (replace_scheme EQUAL 2)
set(append_code "_${define_code}")
elseif (replace_scheme EQUAL 3)
if ("${alternate_name}" STREQUAL "")
string(REGEX MATCH "[a-zA-Z]\\." last_letter ${source_file})
else ()
string(REGEX MATCH "[a-zA-Z]$" last_letter ${alternate_name})
endif ()
# first extract the last letter
string(SUBSTRING ${last_letter} 0 1 last_letter) # remove period from match
# break the code up into the first letter and the remaining (should only be 2 anyway)
string(SUBSTRING ${define_code} 0 1 define_code_first)
string(SUBSTRING ${define_code} 1 -1 define_code_second)
set(replace_code "${define_code_first}${last_letter}${define_code_second}")
elseif (replace_scheme EQUAL 4)
# insert code before the last underscore and pass that in as the alternate_name
if ("${alternate_name}" STREQUAL "")
get_filename_component(alternate_name ${source_file} NAME_WE)
endif ()
set(extra_underscore "")
# check if filename has two underscores, insert another if not (e.g. getrs_parallel needs to become getrs_U_parallel not getrsU_parallel)
string(REGEX MATCH "_[a-zA-Z]+_" underscores ${alternate_name})
string(LENGTH "${underscores}" underscores)
if (underscores EQUAL 0)
set(extra_underscore "_")
endif ()
string(REGEX REPLACE "(.+)(_[^_]+)$" "\\1${extra_underscore}${define_code}\\2" alternate_name ${alternate_name})
else()
set(append_code ${define_code}) # replace_scheme should be 0
endif ()
endif ()
GenerateNamedObjects("${source_file}" "${cur_defines}" "${alternate_name}" false "${replace_code}" "${append_code}" "${no_float_type}" "${complex_filename_scheme}")
endforeach ()
endforeach ()
set(OPENBLAS_SRC ${OPENBLAS_SRC} PARENT_SCOPE)
endfunction ()

119
common.h
View File

@@ -82,27 +82,19 @@ extern "C" {
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#if !defined(_MSC_VER)
#include <unistd.h>
#endif
#include <time.h>
#ifdef OS_LINUX
#include <malloc.h>
#include <sched.h>
#endif
#if defined(OS_DARWIN) || defined(OS_FREEBSD) || defined(OS_NETBSD) || defined(OS_ANDROID)
#if defined(OS_DARWIN) || defined(OS_FREEBSD) || defined(OS_NETBSD)
#include <sched.h>
#endif
#ifdef OS_ANDROID
#define NO_SYSV_IPC
//Android NDK only supports complex.h since Android 5.0
#if __ANDROID_API__ < 21
#define FORCE_OPENBLAS_COMPLEX_STRUCT
#endif
#endif
#ifdef OS_WINDOWS
@@ -122,7 +114,6 @@ extern "C" {
#include <sys/shm.h>
#endif
#include <sys/time.h>
#include <time.h>
#include <unistd.h>
#include <math.h>
#ifdef SMP
@@ -285,6 +276,11 @@ typedef int blasint;
#define SIZE 8
#define BASE_SHIFT 3
#define ZBASE_SHIFT 4
#elif defined(INTEGER) //extend for integer matrix
#define FLOAT int
#define SIZE 4
#define BASE_SHIFT 2
#define ZBASE_SHIFT 3
#else
#define FLOAT float
#define SIZE 4
@@ -302,6 +298,13 @@ typedef int blasint;
#define COMPSIZE 2
#endif
#if defined(C_PGI) || defined(C_SUN)
#define CREAL(X) (*((FLOAT *)&X + 0))
#define CIMAG(X) (*((FLOAT *)&X + 1))
#else
#define CREAL __real__
#define CIMAG __imag__
#endif
#define Address_H(x) (((x)+(1<<15))>>16)
#define Address_L(x) ((x)-((Address_H(x))<<16))
@@ -315,12 +318,8 @@ typedef int blasint;
#endif
#if defined(OS_WINDOWS)
#if defined(_MSC_VER) && !defined(__clang__)
#define YIELDING YieldProcessor()
#else
#define YIELDING SwitchToThread()
#endif
#endif
#if defined(ARMV7) || defined(ARMV6) || defined(ARMV8) || defined(ARMV5)
#define YIELDING asm volatile ("nop;nop;nop;nop;nop;nop;nop;nop; \n");
@@ -332,13 +331,12 @@ typedef int blasint;
#endif
#endif
/*
#ifdef PILEDRIVER
#ifndef YIELDING
#define YIELDING __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop;\n");
#endif
#endif
*/
/*
#ifdef STEAMROLLER
@@ -412,56 +410,12 @@ please https://github.com/xianyi/OpenBLAS/issues/246
#ifndef ASSEMBLER
#ifdef OS_WINDOWS
typedef char env_var_t[MAX_PATH];
#define readenv(p, n) GetEnvironmentVariable((LPCTSTR)(n), (LPTSTR)(p), sizeof(p))
#define readenv(p, n) GetEnvironmentVariable((n), (p), sizeof(p))
#else
typedef char* env_var_t;
#define readenv(p, n) ((p)=getenv(n))
#endif
#if !defined(RPCC_DEFINED) && !defined(OS_WINDOWS)
#ifdef _POSIX_MONOTONIC_CLOCK
#if defined(__GLIBC_PREREQ) // cut the if condition if two lines, otherwise will fail at __GLIBC_PREREQ(2, 17)
#if __GLIBC_PREREQ(2, 17) // don't require -lrt
#define USE_MONOTONIC
#endif
#elif defined(OS_ANDROID)
#define USE_MONOTONIC
#endif
#endif
/* use similar scale as x86 rdtsc for timeouts to work correctly */
static inline unsigned long long rpcc(void){
#ifdef USE_MONOTONIC
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC, &ts);
return (unsigned long long)ts.tv_sec * 1000000000ull + ts.tv_nsec;
#else
struct timeval tv;
gettimeofday(&tv,NULL);
return (unsigned long long)tv.tv_sec * 1000000000ull + tv.tv_usec * 1000;
#endif
}
#define RPCC_DEFINED
#define RPCC64BIT
#endif // !RPCC_DEFINED
#if !defined(BLAS_LOCK_DEFINED) && defined(__GNUC__)
static void __inline blas_lock(volatile BLASULONG *address){
do {
while (*address) {YIELDING;};
} while (!__sync_bool_compare_and_swap(address, 0, 1));
}
#define BLAS_LOCK_DEFINED
#endif
#ifndef RPCC_DEFINED
#error "rpcc() implementation is missing for your platform"
#endif
#ifndef BLAS_LOCK_DEFINED
#error "blas_lock() implementation is missing for your platform"
#endif
#endif // !ASSEMBLER
#ifdef OS_LINUX
#include "common_linux.h"
@@ -507,52 +461,18 @@ static void __inline blas_lock(volatile BLASULONG *address){
/* C99 supports complex floating numbers natively, which GCC also offers as an
extension since version 3.0. If neither are available, use a compatible
structure as fallback (see Clause 6.2.5.13 of the C99 standard). */
#if ((defined(__STDC_IEC_559_COMPLEX__) || __STDC_VERSION__ >= 199901L || \
(__GNUC__ >= 3 && !defined(__cplusplus))) && !(defined(FORCE_OPENBLAS_COMPLEX_STRUCT)))
#if (defined(__STDC_IEC_559_COMPLEX__) || __STDC_VERSION__ >= 199901L || \
(__GNUC__ >= 3 && !defined(__cplusplus)))
#define OPENBLAS_COMPLEX_C99
#ifndef __cplusplus
#include <complex.h>
#endif
typedef float _Complex openblas_complex_float;
typedef double _Complex openblas_complex_double;
typedef xdouble _Complex openblas_complex_xdouble;
#define openblas_make_complex_float(real, imag) ((real) + ((imag) * _Complex_I))
#define openblas_make_complex_double(real, imag) ((real) + ((imag) * _Complex_I))
#define openblas_make_complex_xdouble(real, imag) ((real) + ((imag) * _Complex_I))
#else
#define OPENBLAS_COMPLEX_STRUCT
typedef struct { float real, imag; } openblas_complex_float;
typedef struct { double real, imag; } openblas_complex_double;
typedef struct { xdouble real, imag; } openblas_complex_xdouble;
#define openblas_make_complex_float(real, imag) {(real), (imag)}
#define openblas_make_complex_double(real, imag) {(real), (imag)}
#define openblas_make_complex_xdouble(real, imag) {(real), (imag)}
#endif
#ifdef XDOUBLE
#define OPENBLAS_COMPLEX_FLOAT openblas_complex_xdouble
#define OPENBLAS_MAKE_COMPLEX_FLOAT(r,i) openblas_make_complex_xdouble(r,i)
#elif defined(DOUBLE)
#define OPENBLAS_COMPLEX_FLOAT openblas_complex_double
#define OPENBLAS_MAKE_COMPLEX_FLOAT(r,i) openblas_make_complex_double(r,i)
#else
#define OPENBLAS_COMPLEX_FLOAT openblas_complex_float
#define OPENBLAS_MAKE_COMPLEX_FLOAT(r,i) openblas_make_complex_float(r,i)
#endif
#if defined(C_PGI) || defined(C_SUN)
#define CREAL(X) (*((FLOAT *)&X + 0))
#define CIMAG(X) (*((FLOAT *)&X + 1))
#else
#ifdef OPENBLAS_COMPLEX_STRUCT
#define CREAL(Z) ((Z).real)
#define CIMAG(Z) ((Z).imag)
#else
#define CREAL __real__
#define CIMAG __imag__
#endif
#endif
#endif // ASSEMBLER
#ifndef IFLUSH
@@ -569,10 +489,6 @@ static void __inline blas_lock(volatile BLASULONG *address){
#endif
#endif
#if defined(C_MSVC)
#define inline __inline
#endif
#ifndef ASSEMBLER
#ifndef MIN
@@ -728,7 +644,6 @@ typedef struct {
#endif
#ifndef ASSEMBLER
#include "common_stackalloc.h"
#if 0
#include "symcopy.h"
#endif

View File

@@ -76,7 +76,6 @@ static void __inline blas_lock(unsigned long *address){
"30:", address);
#endif
}
#define BLAS_LOCK_DEFINED
static __inline unsigned int rpcc(void){
@@ -90,7 +89,6 @@ static __inline unsigned int rpcc(void){
return r0;
}
#define RPCC_DEFINED
#define HALT ldq $0, 0($0)

View File

@@ -51,8 +51,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#ifndef ASSEMBLER
#if defined(ARMV6) || defined(ARMV7) || defined(ARMV8)
static void __inline blas_lock(volatile BLASULONG *address){
int register ret;
@@ -61,29 +59,40 @@ static void __inline blas_lock(volatile BLASULONG *address){
while (*address) {YIELDING;};
__asm__ __volatile__(
"ldrex r2, [%1] \n\t"
"strex %0, %2, [%1] \n\t"
"orr %0, r2 \n\t"
: "=&r"(ret)
: "r"(address), "r"(1)
: "memory", "r2"
"1: \n\t"
"ldrex r2, [%1] \n\t"
"mov r2, #0 \n\t"
"strex r3, r2, [%1] \n\t"
"cmp r3, #0 \n\t"
"bne 1b \n\t"
"mov %0 , r3 \n\t"
: "=r"(ret), "=r"(address)
: "1"(address)
: "memory", "r2" , "r3"
);
} while (ret);
MB;
}
#define BLAS_LOCK_DEFINED
#endif
static inline unsigned long long rpcc(void){
unsigned long long ret=0;
double v;
struct timeval tv;
gettimeofday(&tv,NULL);
v=(double) tv.tv_sec + (double) tv.tv_usec * 1e-6;
ret = (unsigned long long) ( v * 1000.0d );
return ret;
}
static inline int blas_quickdivide(blasint x, blasint y){
return x / y;
}
#if !defined(HAVE_VFP)
/* no FPU, soft float */
#define GET_IMAGE(res)
#elif defined(DOUBLE)
#if defined(DOUBLE)
#define GET_IMAGE(res) __asm__ __volatile__("vstr.f64 d1, %0" : "=m"(res) : : "memory")
#else
#define GET_IMAGE(res) __asm__ __volatile__("vstr.f32 s1, %0" : "=m"(res) : : "memory")
@@ -131,8 +140,4 @@ REALNAME:
#define MAP_ANONYMOUS MAP_ANON
#endif
#if !defined(ARMV5) && !defined(ARMV6) && !defined(ARMV7) && !defined(ARMV8)
#error "you must define ARMV5, ARMV6, ARMV7 or ARMV8"
#endif
#endif

View File

@@ -43,38 +43,43 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#ifndef ASSEMBLER
static void __inline blas_lock(volatile BLASULONG *address){
BLASULONG ret;
int register ret;
int register tmp;
do {
while (*address) {YIELDING;};
__asm__ __volatile__(
"mov x4, #1 \n\t"
"1: \n\t"
"ldaxr x2, [%1] \n\t"
"cbnz x2, 1b \n\t"
"2: \n\t"
"stxr w3, x4, [%1] \n\t"
"cbnz w3, 1b \n\t"
"mov %0, #0 \n\t"
: "=r"(ret), "=r"(address)
"ldaxr %2, [%1] \n\t"
"mov %2, #0 \n\t"
"stlxr %w0, %2, [%1] \n\t"
"cbnz %w0, 1b \n\t"
"mov %0 , #0 \n\t"
: "=r"(ret), "=r"(address), "=r"(tmp)
: "1"(address)
: "memory", "x2" , "x3", "x4"
: "memory", "%w0"
//, "%r2" , "%r3"
);
} while (ret);
}
#define BLAS_LOCK_DEFINED
static inline unsigned long long rpcc(void){
unsigned long long ret=0;
double v;
struct timeval tv;
gettimeofday(&tv,NULL);
v=(double) tv.tv_sec + (double) tv.tv_usec * 1e-6;
ret = (unsigned long long) ( v * 1000.0d );
return ret;
}
static inline int blas_quickdivide(blasint x, blasint y){
return x / y;
@@ -100,10 +105,8 @@ static inline int blas_quickdivide(blasint x, blasint y){
#if defined(ASSEMBLER) && !defined(NEEDPARAM)
#define PROLOGUE \
.text ;\
.align 4 ;\
.global REALNAME ;\
.type REALNAME, %function ;\
.func REALNAME ;\
REALNAME:
#define EPILOGUE
@@ -120,11 +123,7 @@ REALNAME:
#endif
#define HUGE_PAGESIZE ( 4 << 20)
#if defined(CORTEXA57)
#define BUFFER_SIZE (20 << 20)
#else
#define BUFFER_SIZE (16 << 20)
#endif
#define BASE_ADDRESS (START_ADDRESS - BUFFER_SIZE * MAX_CPU_NUMBER)

View File

@@ -220,15 +220,6 @@
#define COMATCOPY_K_CTC comatcopy_k_ctc
#define COMATCOPY_K_RTC comatcopy_k_rtc
#define CIMATCOPY_K_CN cimatcopy_k_cn
#define CIMATCOPY_K_RN cimatcopy_k_rn
#define CIMATCOPY_K_CT cimatcopy_k_ct
#define CIMATCOPY_K_RT cimatcopy_k_rt
#define CIMATCOPY_K_CNC cimatcopy_k_cnc
#define CIMATCOPY_K_RNC cimatcopy_k_rnc
#define CIMATCOPY_K_CTC cimatcopy_k_ctc
#define CIMATCOPY_K_RTC cimatcopy_k_rtc
#define CGEADD_K cgeadd_k
#else
@@ -412,16 +403,6 @@
#define COMATCOPY_K_RNC gotoblas -> comatcopy_k_rnc
#define COMATCOPY_K_CTC gotoblas -> comatcopy_k_ctc
#define COMATCOPY_K_RTC gotoblas -> comatcopy_k_rtc
#define CIMATCOPY_K_CN gotoblas -> cimatcopy_k_cn
#define CIMATCOPY_K_RN gotoblas -> cimatcopy_k_rn
#define CIMATCOPY_K_CT gotoblas -> cimatcopy_k_ct
#define CIMATCOPY_K_RT gotoblas -> cimatcopy_k_rt
#define CIMATCOPY_K_CNC gotoblas -> cimatcopy_k_cnc
#define CIMATCOPY_K_RNC gotoblas -> cimatcopy_k_rnc
#define CIMATCOPY_K_CTC gotoblas -> cimatcopy_k_ctc
#define CIMATCOPY_K_RTC gotoblas -> cimatcopy_k_rtc
#define CGEADD_K gotoblas -> cgeadd_k
#endif

View File

@@ -149,11 +149,6 @@
#define DOMATCOPY_K_RN domatcopy_k_rn
#define DOMATCOPY_K_CT domatcopy_k_ct
#define DOMATCOPY_K_RT domatcopy_k_rt
#define DIMATCOPY_K_CN dimatcopy_k_cn
#define DIMATCOPY_K_RN dimatcopy_k_rn
#define DIMATCOPY_K_CT dimatcopy_k_ct
#define DIMATCOPY_K_RT dimatcopy_k_rt
#define DGEADD_K dgeadd_k
#else
@@ -272,10 +267,6 @@
#define DOMATCOPY_K_RN gotoblas -> domatcopy_k_rn
#define DOMATCOPY_K_CT gotoblas -> domatcopy_k_ct
#define DOMATCOPY_K_RT gotoblas -> domatcopy_k_rt
#define DIMATCOPY_K_CN gotoblas -> dimatcopy_k_cn
#define DIMATCOPY_K_RN gotoblas -> dimatcopy_k_rn
#define DIMATCOPY_K_CT gotoblas -> dimatcopy_k_ct
#define DIMATCOPY_K_RT gotoblas -> dimatcopy_k_rt
#define DGEADD_K gotoblas -> dgeadd_k

9
common_i.h Normal file
View File

@@ -0,0 +1,9 @@
#ifndef COMMON_I_H
#define COMMON_I_H
#ifndef DYNAMIC_ARCH
#define IAXPYU_K iaxpy_k
#else
#error
#endif
#endif

View File

@@ -68,7 +68,6 @@ static __inline void blas_lock(volatile unsigned long *address){
: "ar.ccv", "memory");
} while (ret);
}
#define BLAS_LOCK_DEFINED
static __inline unsigned long rpcc(void) {
unsigned long clocks;
@@ -76,7 +75,6 @@ static __inline unsigned long rpcc(void) {
__asm__ __volatile__ ("mov %0=ar.itc" : "=r"(clocks));
return clocks;
}
#define RPCC_DEFINED
static __inline unsigned long stmxcsr(void){
@@ -101,12 +99,10 @@ static __inline void blas_lock(volatile unsigned long *address){
while (*address || _InterlockedCompareExchange((volatile int *) address,1,0))
;
}
#define BLAS_LOCK_DEFINED
static __inline unsigned int rpcc(void) {
return __getReg(_IA64_REG_AR_ITC);
}
#define RPCC_DEFINED
static __inline unsigned int stmxcsr(void) {
return __getReg(_IA64_REG_AR_FPSR);

View File

@@ -93,6 +93,7 @@ openblas_complex_xdouble BLASFUNC(xdotc) (blasint *, xdouble *, blasint *, xdo
void BLASFUNC(saxpy) (blasint *, float *, float *, blasint *, float *, blasint *);
void BLASFUNC(daxpy) (blasint *, double *, double *, blasint *, double *, blasint *);
void BLASFUNC(iaxpy) (blasint *, int *, int *, blasint *, int *, blasint *);
void BLASFUNC(qaxpy) (blasint *, xdouble *, xdouble *, blasint *, xdouble *, blasint *);
void BLASFUNC(caxpy) (blasint *, float *, float *, blasint *, float *, blasint *);
void BLASFUNC(zaxpy) (blasint *, double *, double *, blasint *, double *, blasint *);

View File

@@ -47,12 +47,12 @@ double dsdot_k(BLASLONG, float *, BLASLONG, float *, BLASLONG);
double ddot_k(BLASLONG, double *, BLASLONG, double *, BLASLONG);
xdouble qdot_k(BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
openblas_complex_float cdotc_k (BLASLONG, float *, BLASLONG, float *, BLASLONG);
openblas_complex_float cdotu_k (BLASLONG, float *, BLASLONG, float *, BLASLONG);
openblas_complex_double zdotc_k (BLASLONG, double *, BLASLONG, double *, BLASLONG);
openblas_complex_double zdotu_k (BLASLONG, double *, BLASLONG, double *, BLASLONG);
openblas_complex_xdouble xdotc_k (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
openblas_complex_xdouble xdotu_k (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
float _Complex cdotc_k (BLASLONG, float *, BLASLONG, float *, BLASLONG);
float _Complex cdotu_k (BLASLONG, float *, BLASLONG, float *, BLASLONG);
double _Complex zdotc_k (BLASLONG, double *, BLASLONG, double *, BLASLONG);
double _Complex zdotu_k (BLASLONG, double *, BLASLONG, double *, BLASLONG);
xdouble _Complex xdotc_k (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
xdouble _Complex xdotu_k (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
int saxpy_k (BLASLONG, BLASLONG, BLASLONG, float,
float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
@@ -60,6 +60,8 @@ int daxpy_k (BLASLONG, BLASLONG, BLASLONG, double,
double *, BLASLONG, double *, BLASLONG, double *, BLASLONG);
int qaxpy_k (BLASLONG, BLASLONG, BLASLONG, xdouble,
xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
int iaxpy_k (BLASLONG, BLASLONG, BLASLONG, int,
int *, BLASLONG, int *, BLASLONG, int *, BLASLONG);
int caxpy_k (BLASLONG, BLASLONG, BLASLONG, float, float,
float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
int zaxpy_k (BLASLONG, BLASLONG, BLASLONG, double, double,

View File

@@ -1736,55 +1736,31 @@ int somatcopy_k_cn(BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLAS
int somatcopy_k_rn(BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG);
int somatcopy_k_ct(BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG);
int somatcopy_k_rt(BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG);
int simatcopy_k_cn(BLASLONG, BLASLONG, float, float *, BLASLONG);
int simatcopy_k_rn(BLASLONG, BLASLONG, float, float *, BLASLONG);
int simatcopy_k_ct(BLASLONG, BLASLONG, float, float *, BLASLONG);
int simatcopy_k_rt(BLASLONG, BLASLONG, float, float *, BLASLONG);
int domatcopy_k_cn(BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG);
int domatcopy_k_rn(BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG);
int domatcopy_k_ct(BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG);
int domatcopy_k_rt(BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG);
int dimatcopy_k_cn(BLASLONG, BLASLONG, double, double *, BLASLONG);
int dimatcopy_k_rn(BLASLONG, BLASLONG, double, double *, BLASLONG);
int dimatcopy_k_ct(BLASLONG, BLASLONG, double, double *, BLASLONG);
int dimatcopy_k_rt(BLASLONG, BLASLONG, double, double *, BLASLONG);
int comatcopy_k_cn(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG);
int comatcopy_k_rn(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG);
int comatcopy_k_ct(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG);
int comatcopy_k_rt(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG);
int cimatcopy_k_cn(BLASLONG, BLASLONG, float, float, float *, BLASLONG);
int cimatcopy_k_rn(BLASLONG, BLASLONG, float, float, float *, BLASLONG);
int cimatcopy_k_ct(BLASLONG, BLASLONG, float, float, float *, BLASLONG);
int cimatcopy_k_rt(BLASLONG, BLASLONG, float, float, float *, BLASLONG);
int comatcopy_k_cnc(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG);
int comatcopy_k_rnc(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG);
int comatcopy_k_ctc(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG);
int comatcopy_k_rtc(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG);
int cimatcopy_k_cnc(BLASLONG, BLASLONG, float, float, float *, BLASLONG);
int cimatcopy_k_rnc(BLASLONG, BLASLONG, float, float, float *, BLASLONG);
int cimatcopy_k_ctc(BLASLONG, BLASLONG, float, float, float *, BLASLONG);
int cimatcopy_k_rtc(BLASLONG, BLASLONG, float, float, float *, BLASLONG);
int zomatcopy_k_cn(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG);
int zomatcopy_k_rn(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG);
int zomatcopy_k_ct(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG);
int zomatcopy_k_rt(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG);
int zimatcopy_k_cn(BLASLONG, BLASLONG, double, double, double *, BLASLONG);
int zimatcopy_k_rn(BLASLONG, BLASLONG, double, double, double *, BLASLONG);
int zimatcopy_k_ct(BLASLONG, BLASLONG, double, double, double *, BLASLONG);
int zimatcopy_k_rt(BLASLONG, BLASLONG, double, double, double *, BLASLONG);
int zomatcopy_k_cnc(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG);
int zomatcopy_k_rnc(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG);
int zomatcopy_k_ctc(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG);
int zomatcopy_k_rtc(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG);
int zimatcopy_k_cnc(BLASLONG, BLASLONG, double, double, double *, BLASLONG);
int zimatcopy_k_rnc(BLASLONG, BLASLONG, double, double, double *, BLASLONG);
int zimatcopy_k_ctc(BLASLONG, BLASLONG, double, double, double *, BLASLONG);
int zimatcopy_k_rtc(BLASLONG, BLASLONG, double, double, double *, BLASLONG);
int sgeadd_k(BLASLONG, BLASLONG, float, float*, BLASLONG, float, float *, BLASLONG);
int dgeadd_k(BLASLONG, BLASLONG, double, double*, BLASLONG, double, double *, BLASLONG);

View File

@@ -47,6 +47,10 @@
#include "common_z.h"
#include "common_x.h"
#ifdef INTEGER_PRECISION
#include "common_i.h"
#endif
#ifndef COMPLEX
#ifdef XDOUBLE
@@ -634,12 +638,10 @@
#define OMATCOPY_K_RN DOMATCOPY_K_RN
#define OMATCOPY_K_CT DOMATCOPY_K_CT
#define OMATCOPY_K_RT DOMATCOPY_K_RT
#define IMATCOPY_K_CN DIMATCOPY_K_CN
#define IMATCOPY_K_RN DIMATCOPY_K_RN
#define IMATCOPY_K_CT DIMATCOPY_K_CT
#define IMATCOPY_K_RT DIMATCOPY_K_RT
#define GEADD_K DGEADD_K
#elif defined(INTEGER)
#define AXPYU_K IAXPYU_K
#else
#define AMAX_K SAMAX_K
@@ -936,10 +938,6 @@
#define OMATCOPY_K_RN SOMATCOPY_K_RN
#define OMATCOPY_K_CT SOMATCOPY_K_CT
#define OMATCOPY_K_RT SOMATCOPY_K_RT
#define IMATCOPY_K_CN SIMATCOPY_K_CN
#define IMATCOPY_K_RN SIMATCOPY_K_RN
#define IMATCOPY_K_CT SIMATCOPY_K_CT
#define IMATCOPY_K_RT SIMATCOPY_K_RT
#define GEADD_K SGEADD_K
#endif
@@ -1756,15 +1754,6 @@
#define OMATCOPY_K_RNC ZOMATCOPY_K_RNC
#define OMATCOPY_K_CTC ZOMATCOPY_K_CTC
#define OMATCOPY_K_RTC ZOMATCOPY_K_RTC
#define IMATCOPY_K_CN ZIMATCOPY_K_CN
#define IMATCOPY_K_RN ZIMATCOPY_K_RN
#define IMATCOPY_K_CT ZIMATCOPY_K_CT
#define IMATCOPY_K_RT ZIMATCOPY_K_RT
#define IMATCOPY_K_CNC ZIMATCOPY_K_CNC
#define IMATCOPY_K_RNC ZIMATCOPY_K_RNC
#define IMATCOPY_K_CTC ZIMATCOPY_K_CTC
#define IMATCOPY_K_RTC ZIMATCOPY_K_RTC
#define GEADD_K ZGEADD_K
#else
@@ -2178,14 +2167,6 @@
#define OMATCOPY_K_RNC COMATCOPY_K_RNC
#define OMATCOPY_K_CTC COMATCOPY_K_CTC
#define OMATCOPY_K_RTC COMATCOPY_K_RTC
#define IMATCOPY_K_CN CIMATCOPY_K_CN
#define IMATCOPY_K_RN CIMATCOPY_K_RN
#define IMATCOPY_K_CT CIMATCOPY_K_CT
#define IMATCOPY_K_RT CIMATCOPY_K_RT
#define IMATCOPY_K_CNC CIMATCOPY_K_CNC
#define IMATCOPY_K_RNC CIMATCOPY_K_RNC
#define IMATCOPY_K_CTC CIMATCOPY_K_CTC
#define IMATCOPY_K_RTC CIMATCOPY_K_RTC
#define GEADD_K CGEADD_K

View File

@@ -98,7 +98,6 @@ static void INLINE blas_lock(volatile unsigned long *address){
} while (ret);
}
#define BLAS_LOCK_DEFINED
static inline unsigned int rpcc(void){
unsigned long ret;
@@ -119,7 +118,6 @@ static inline unsigned int rpcc(void){
#endif
return ret;
}
#define RPCC_DEFINED
#if defined(LOONGSON3A) || defined(LOONGSON3B)
#ifndef NO_AFFINITY

View File

@@ -855,36 +855,6 @@ BLASLONG (*ixamin_k)(BLASLONG, xdouble *, BLASLONG);
int (*zomatcopy_k_rnc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG);
int (*zomatcopy_k_rtc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG);
int (*simatcopy_k_cn) (BLASLONG, BLASLONG, float, float*, BLASLONG);
int (*simatcopy_k_ct) (BLASLONG, BLASLONG, float, float*, BLASLONG);
int (*simatcopy_k_rn) (BLASLONG, BLASLONG, float, float*, BLASLONG);
int (*simatcopy_k_rt) (BLASLONG, BLASLONG, float, float*, BLASLONG);
int (*dimatcopy_k_cn) (BLASLONG, BLASLONG, double, double*, BLASLONG);
int (*dimatcopy_k_ct) (BLASLONG, BLASLONG, double, double*, BLASLONG);
int (*dimatcopy_k_rn) (BLASLONG, BLASLONG, double, double*, BLASLONG);
int (*dimatcopy_k_rt) (BLASLONG, BLASLONG, double, double*, BLASLONG);
int (*cimatcopy_k_cn) (BLASLONG, BLASLONG, float, float, float*, BLASLONG);
int (*cimatcopy_k_ct) (BLASLONG, BLASLONG, float, float, float*, BLASLONG);
int (*cimatcopy_k_rn) (BLASLONG, BLASLONG, float, float, float*, BLASLONG);
int (*cimatcopy_k_rt) (BLASLONG, BLASLONG, float, float, float*, BLASLONG);
int (*cimatcopy_k_cnc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG);
int (*cimatcopy_k_ctc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG);
int (*cimatcopy_k_rnc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG);
int (*cimatcopy_k_rtc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG);
int (*zimatcopy_k_cn) (BLASLONG, BLASLONG, double, double, double*, BLASLONG);
int (*zimatcopy_k_ct) (BLASLONG, BLASLONG, double, double, double*, BLASLONG);
int (*zimatcopy_k_rn) (BLASLONG, BLASLONG, double, double, double*, BLASLONG);
int (*zimatcopy_k_rt) (BLASLONG, BLASLONG, double, double, double*, BLASLONG);
int (*zimatcopy_k_cnc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG);
int (*zimatcopy_k_ctc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG);
int (*zimatcopy_k_rnc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG);
int (*zimatcopy_k_rtc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG);
int (*sgeadd_k) (BLASLONG, BLASLONG, float, float *, BLASLONG, float, float *, BLASLONG);
int (*dgeadd_k) (BLASLONG, BLASLONG, double, double *, BLASLONG, double, double *, BLASLONG);
int (*cgeadd_k) (BLASLONG, BLASLONG, float, float, float *, BLASLONG, float, float, float *, BLASLONG);

View File

@@ -87,7 +87,6 @@ static void INLINE blas_lock(volatile unsigned long *address){
#endif
} while (ret);
}
#define BLAS_LOCK_DEFINED
static inline unsigned long rpcc(void){
unsigned long ret;
@@ -104,7 +103,6 @@ static inline unsigned long rpcc(void){
#endif
}
#define RPCC_DEFINED
#ifdef __64BIT__
#define RPCC64BIT
@@ -236,7 +234,7 @@ static inline int blas_quickdivide(blasint x, blasint y){
#define HAVE_PREFETCH
#endif
#if defined(POWER3) || defined(POWER6) || defined(PPCG4) || defined(CELL) || defined(POWER8)
#if defined(POWER3) || defined(POWER6) || defined(PPCG4) || defined(CELL)
#define DCBT_ARG 0
#else
#define DCBT_ARG 8
@@ -258,13 +256,6 @@ static inline int blas_quickdivide(blasint x, blasint y){
#define L1_PREFETCH dcbtst
#endif
#if defined(POWER8)
#define L1_DUALFETCH
#define L1_PREFETCHSIZE (16 + 128 * 100)
#define L1_PREFETCH dcbtst
#endif
#
#ifndef L1_PREFETCH
#define L1_PREFETCH dcbt
#endif
@@ -504,15 +495,6 @@ static inline int blas_quickdivide(blasint x, blasint y){
REALNAME:
#define EPILOGUE .size REALNAME, .-REALNAME
#else
#if _CALL_ELF == 2
#define PROLOGUE \
.section .text;\
.align 6;\
.globl REALNAME;\
.type REALNAME, @function;\
REALNAME:
#define EPILOGUE .size REALNAME, .-REALNAME
#else
#define PROLOGUE \
.section .text;\
.align 5;\
@@ -532,7 +514,6 @@ REALNAME:;\
.size .REALNAME, .-.REALNAME; \
.section .note.GNU-stack,"",@progbits
#endif
#endif
#ifdef PROFILE
#ifndef __64BIT__
@@ -797,8 +778,6 @@ Lmcount$lazy_ptr:
#define BUFFER_SIZE ( 2 << 20)
#elif defined(PPC440FP2)
#define BUFFER_SIZE ( 16 << 20)
#elif defined(POWER8)
#define BUFFER_SIZE ( 64 << 20)
#else
#define BUFFER_SIZE ( 16 << 20)
#endif
@@ -813,25 +792,4 @@ Lmcount$lazy_ptr:
#ifndef MAP_ANONYMOUS
#define MAP_ANONYMOUS MAP_ANON
#endif
#ifdef OS_LINUX
#ifndef __64BIT__
#define FRAMESLOT(X) (((X) * 4) + 8)
#else
#if _CALL_ELF == 2
#define FRAMESLOT(X) (((X) * 8) + 96)
#else
#define FRAMESLOT(X) (((X) * 8) + 112)
#endif
#endif
#endif
#if defined(OS_AIX) || defined(OS_DARWIN)
#ifndef __64BIT__
#define FRAMESLOT(X) (((X) * 4) + 56)
#else
#define FRAMESLOT(X) (((X) * 8) + 112)
#endif
#endif
#endif

View File

@@ -152,10 +152,6 @@
#define SOMATCOPY_K_RN somatcopy_k_rn
#define SOMATCOPY_K_CT somatcopy_k_ct
#define SOMATCOPY_K_RT somatcopy_k_rt
#define SIMATCOPY_K_CN simatcopy_k_cn
#define SIMATCOPY_K_RN simatcopy_k_rn
#define SIMATCOPY_K_CT simatcopy_k_ct
#define SIMATCOPY_K_RT simatcopy_k_rt
#define SGEADD_K sgeadd_k
@@ -278,10 +274,6 @@
#define SOMATCOPY_K_RN gotoblas -> somatcopy_k_rn
#define SOMATCOPY_K_CT gotoblas -> somatcopy_k_ct
#define SOMATCOPY_K_RT gotoblas -> somatcopy_k_rt
#define SIMATCOPY_K_CN gotoblas -> simatcopy_k_cn
#define SIMATCOPY_K_RN gotoblas -> simatcopy_k_rn
#define SIMATCOPY_K_CT gotoblas -> simatcopy_k_ct
#define SIMATCOPY_K_RT gotoblas -> simatcopy_k_rt
#define SGEADD_K gotoblas -> sgeadd_k

View File

@@ -58,7 +58,6 @@ static void __inline blas_lock(volatile unsigned long *address){
: "memory");
} while (ret);
}
#define BLAS_LOCK_DEFINED
static __inline unsigned long rpcc(void){
unsigned long clocks;
@@ -67,7 +66,6 @@ static __inline unsigned long rpcc(void){
return clocks;
};
#define RPCC_DEFINED
#ifdef __64BIT__
#define RPCC64BIT

View File

@@ -1,73 +0,0 @@
/*******************************************************************************
Copyright (c) 2016, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*******************************************************************************/
#define STACK_ALLOC_PROTECT
#ifdef STACK_ALLOC_PROTECT
// Try to detect stack smashing
#include <assert.h>
#define STACK_ALLOC_PROTECT_SET volatile int stack_check = 0x7fc01234;
#define STACK_ALLOC_PROTECT_CHECK assert(stack_check == 0x7fc01234);
#else
#define STACK_ALLOC_PROTECT_SET
#define STACK_ALLOC_PROTECT_CHECK
#endif
#if defined(MAX_STACK_ALLOC) && MAX_STACK_ALLOC > 0
/*
* Allocate a buffer on the stack if the size is smaller than MAX_STACK_ALLOC.
* Stack allocation is much faster than blas_memory_alloc or malloc, particularly
* when OpenBLAS is used from a multi-threaded application.
* SIZE must be carefully chosen to be:
* - as small as possible to maximize the number of stack allocation
* - large enough to support all architectures and kernel
* Chosing a too small SIZE will lead to a stack smashing.
*/
#define STACK_ALLOC(SIZE, TYPE, BUFFER) \
/* make it volatile because some function (ex: dgemv_n.S) */ \
/* do not restore all register */ \
volatile int stack_alloc_size = SIZE; \
if(stack_alloc_size > MAX_STACK_ALLOC / sizeof(TYPE)) \
stack_alloc_size = 0; \
STACK_ALLOC_PROTECT_SET \
TYPE stack_buffer[stack_alloc_size] __attribute__((aligned(0x20))); \
BUFFER = stack_alloc_size ? stack_buffer : (TYPE *)blas_memory_alloc(1);
#else
//Original OpenBLAS/GotoBLAS codes.
#define STACK_ALLOC(SIZE, TYPE, BUFFER) BUFFER = (TYPE *)blas_memory_alloc(1)
#endif
#if defined(MAX_STACK_ALLOC) && MAX_STACK_ALLOC > 0
#define STACK_FREE(BUFFER) \
STACK_ALLOC_PROTECT_CHECK \
if(!stack_alloc_size) \
blas_memory_free(BUFFER);
#else
#define STACK_FREE(BUFFER) blas_memory_free(BUFFER)
#endif

View File

@@ -65,6 +65,7 @@ extern int blas_omp_linked;
#define BLAS_XDOUBLE 0x0002U
#define BLAS_REAL 0x0000U
#define BLAS_COMPLEX 0x0004U
#define BLAS_INTEGER 0x0008U
#define BLAS_TRANSA 0x0030U /* 2bit */
#define BLAS_TRANSA_N 0x0000U

View File

@@ -41,10 +41,6 @@
#ifndef ASSEMBLER
#ifdef C_MSVC
#include <intrin.h>
#endif
#define MB
#define WMB
@@ -60,67 +56,41 @@ static void __inline blas_lock(volatile BLASULONG *address){
do {
while (*address) {YIELDING;};
#if defined(_MSC_VER) && !defined(__clang__)
// use intrinsic instead of inline assembly
ret = _InterlockedExchange(address, 1);
// inline assembly
/*__asm {
mov eax, address
mov ebx, 1
xchg [eax], ebx
mov ret, ebx
}*/
#else
__asm__ __volatile__(
"xchgl %0, %1\n"
: "=r"(ret), "=m"(*address)
: "0"(1), "m"(*address)
: "memory");
#endif
} while (ret);
}
#define BLAS_LOCK_DEFINED
static __inline unsigned long long rpcc(void){
#if defined(_MSC_VER) && !defined(__clang__)
return __rdtsc(); // use MSVC intrinsic
#else
unsigned int a, d;
__asm__ __volatile__ ("rdtsc" : "=a" (a), "=d" (d));
return ((unsigned long long)a + ((unsigned long long)d << 32));
#endif
};
#define RPCC_DEFINED
static __inline unsigned long getstackaddr(void){
#if defined(_MSC_VER) && !defined(__clang__)
return (unsigned long)_ReturnAddress(); // use MSVC intrinsic
#else
unsigned long addr;
__asm__ __volatile__ ("mov %%esp, %0"
: "=r"(addr) : : "memory");
return addr;
#endif
};
static __inline long double sqrt_long(long double val) {
#if defined(_MSC_VER) && !defined(__clang__)
return sqrt(val); // not sure if this will use fsqrt
#else
long double result;
__asm__ __volatile__ ("fldt %1\n"
"fsqrt\n"
"fstpt %0\n" : "=m" (result) : "m"(val));
return result;
#endif
}
#define SQRT(a) sqrt_long(a)
@@ -130,7 +100,7 @@ void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx);
#define WHEREAMI
static __inline int WhereAmI(void){
static inline int WhereAmI(void){
int eax, ebx, ecx, edx;
int apicid;
@@ -174,17 +144,11 @@ static __inline int blas_quickdivide(unsigned int x, unsigned int y){
if (y <= 1) return x;
#if defined(_MSC_VER) && !defined(__clang__)
result = x/y;
return result;
#else
y = blas_quick_divide_table[y];
__asm__ __volatile__ ("mull %0" :"=d" (result) :"a"(x), "0" (y));
return result;
#endif
}
#endif
@@ -320,12 +284,8 @@ REALNAME:
#define PROFCODE
#ifdef __clang__
#define EPILOGUE .end
#else
#define EPILOGUE .end REALNAME
#endif
#endif
#if defined(OS_LINUX) || defined(OS_FREEBSD) || defined(OS_NETBSD) || defined(__ELF__)
#define PROLOGUE \

View File

@@ -41,10 +41,6 @@
#ifndef ASSEMBLER
#ifdef C_MSVC
#include <intrin.h>
#endif
#ifdef C_SUN
#define __asm__ __asm
#define __volatile__
@@ -65,45 +61,30 @@
static void __inline blas_lock(volatile BLASULONG *address){
#ifndef C_MSVC
int ret;
#else
BLASULONG ret;
#endif
do {
while (*address) {YIELDING;};
#ifndef C_MSVC
__asm__ __volatile__(
"xchgl %0, %1\n"
: "=r"(ret), "=m"(*address)
: "0"(1), "m"(*address)
: "memory");
#else
ret=InterlockedExchange64((volatile LONG64 *)(address), 1);
#endif
} while (ret);
} while (ret);
}
#define BLAS_LOCK_DEFINED
static __inline BLASULONG rpcc(void){
#ifdef C_MSVC
return __rdtsc();
#else
BLASULONG a, d;
__asm__ __volatile__ ("rdtsc" : "=a" (a), "=d" (d));
return ((BLASULONG)a + ((BLASULONG)d << 32));
#endif
}
#define RPCC_DEFINED
#define RPCC64BIT
#ifndef C_MSVC
static __inline BLASULONG getstackaddr(void){
BLASULONG addr;
@@ -112,32 +93,22 @@ static __inline BLASULONG getstackaddr(void){
return addr;
}
#endif
static __inline void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx){
#ifdef C_MSVC
int cpuinfo[4];
__cpuid(cpuinfo, op);
*eax=cpuinfo[0];
*ebx=cpuinfo[1];
*ecx=cpuinfo[2];
*edx=cpuinfo[3];
#else
__asm__ __volatile__("cpuid"
: "=a" (*eax),
"=b" (*ebx),
"=c" (*ecx),
"=d" (*edx)
: "0" (op));
#endif
}
/*
#define WHEREAMI
*/
static __inline int WhereAmI(void){
static inline int WhereAmI(void){
int eax, ebx, ecx, edx;
int apicid;
@@ -179,14 +150,10 @@ static __inline int WhereAmI(void){
#define GET_IMAGE_CANCEL
#ifdef SMP
#if defined(USE64BITINT)
#ifdef USE64BITINT
static __inline blasint blas_quickdivide(blasint x, blasint y){
return x / y;
}
#elif defined (C_MSVC)
static __inline BLASLONG blas_quickdivide(BLASLONG x, BLASLONG y){
return x / y;
}
#else
extern unsigned int blas_quick_divide_table[];
@@ -396,7 +363,7 @@ REALNAME:
#define PROFCODE
#define EPILOGUE .end
#define EPILOGUE .end REALNAME
#endif
#if defined(OS_LINUX) || defined(OS_FREEBSD) || defined(OS_NETBSD) || defined(__ELF__) || defined(C_PGI)

View File

@@ -220,15 +220,6 @@
#define ZOMATCOPY_K_CTC zomatcopy_k_ctc
#define ZOMATCOPY_K_RTC zomatcopy_k_rtc
#define ZIMATCOPY_K_CN zimatcopy_k_cn
#define ZIMATCOPY_K_RN zimatcopy_k_rn
#define ZIMATCOPY_K_CT zimatcopy_k_ct
#define ZIMATCOPY_K_RT zimatcopy_k_rt
#define ZIMATCOPY_K_CNC zimatcopy_k_cnc
#define ZIMATCOPY_K_RNC zimatcopy_k_rnc
#define ZIMATCOPY_K_CTC zimatcopy_k_ctc
#define ZIMATCOPY_K_RTC zimatcopy_k_rtc
#define ZGEADD_K zgeadd_k
#else
@@ -413,15 +404,6 @@
#define ZOMATCOPY_K_CTC gotoblas -> zomatcopy_k_ctc
#define ZOMATCOPY_K_RTC gotoblas -> zomatcopy_k_rtc
#define ZIMATCOPY_K_CN gotoblas -> zimatcopy_k_cn
#define ZIMATCOPY_K_RN gotoblas -> zimatcopy_k_rn
#define ZIMATCOPY_K_CT gotoblas -> zimatcopy_k_ct
#define ZIMATCOPY_K_RT gotoblas -> zimatcopy_k_rt
#define ZIMATCOPY_K_CNC gotoblas -> zimatcopy_k_cnc
#define ZIMATCOPY_K_RNC gotoblas -> zimatcopy_k_rnc
#define ZIMATCOPY_K_CTC gotoblas -> zimatcopy_k_ctc
#define ZIMATCOPY_K_RTC gotoblas -> zimatcopy_k_rtc
#define ZGEADD_K gotoblas -> zgeadd_k
#endif

View File

@@ -39,10 +39,6 @@
#ifndef CPUID_H
#define CPUID_H
#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64)
#define INTEL_AMD
#endif
#define VENDOR_INTEL 1
#define VENDOR_UMC 2
#define VENDOR_AMD 3
@@ -63,7 +59,7 @@
#define FAMILY_PM 7
#define FAMILY_IA64 8
#ifdef INTEL_AMD
#if defined(__i386__) || defined(__x86_64__)
#define GET_EXFAMILY 1
#define GET_EXMODEL 2
#define GET_TYPE 3

View File

@@ -115,9 +115,6 @@ int detect(void)
if (strstr(p, "0xc0f")) {
return CPU_CORTEXA15;
}
if (strstr(p, "0xd07")) {
return CPU_ARMV7; //ARMV8 on 32-bit
}
}
@@ -161,27 +158,6 @@ int detect(void)
}
p = (char *) NULL ;
infile = fopen("/proc/cpuinfo", "r");
while (fgets(buffer, sizeof(buffer), infile))
{
if ((!strncmp("CPU architecture", buffer, 16)))
{
p = strchr(buffer, ':') + 2;
break;
}
}
fclose(infile);
if(p != NULL) {
if (strstr(p, "8")) {
return CPU_ARMV7; //ARMV8 on 32-bit
}
}
#endif
return CPU_UNKNOWN;
@@ -216,7 +192,6 @@ void get_cpuconfig(void)
{
case CPU_CORTEXA9:
printf("#define CORTEXA9\n");
printf("#define ARMV7\n");
printf("#define HAVE_VFP\n");
printf("#define HAVE_VFPV3\n");
if ( get_feature("neon")) printf("#define HAVE_NEON\n");
@@ -232,7 +207,6 @@ void get_cpuconfig(void)
case CPU_CORTEXA15:
printf("#define CORTEXA15\n");
printf("#define ARMV7\n");
printf("#define HAVE_VFP\n");
printf("#define HAVE_VFPV3\n");
if ( get_feature("neon")) printf("#define HAVE_NEON\n");

View File

@@ -29,19 +29,12 @@
#define CPU_UNKNOWN 0
#define CPU_ARMV8 1
#define CPU_CORTEXA57 2
static char *cpuname[] = {
"UNKNOWN",
"ARMV8" ,
"CORTEXA57"
"UNKOWN",
"ARMV8"
};
static char *cpuname_lower[] = {
"unknown",
"armv8" ,
"cortexa57"
};
int get_feature(char *search)
{
@@ -60,13 +53,13 @@ int get_feature(char *search)
{
p = strchr(buffer, ':') + 2;
break;
}
}
}
}
fclose(infile);
fclose(infile);
if( p == NULL ) return 0;
if( p == NULL ) return;
t = strtok(p," ");
while( t = strtok(NULL," "))
@@ -89,30 +82,11 @@ int detect(void)
p = (char *) NULL ;
infile = fopen("/proc/cpuinfo", "r");
while (fgets(buffer, sizeof(buffer), infile))
{
if (!strncmp("CPU part", buffer, 8))
{
p = strchr(buffer, ':') + 2;
break;
}
}
fclose(infile);
if(p != NULL) {
if (strstr(p, "0xd07")) {
return CPU_CORTEXA57;
}
}
p = (char *) NULL ;
infile = fopen("/proc/cpuinfo", "r");
while (fgets(buffer, sizeof(buffer), infile))
{
if ((!strncmp("model name", buffer, 10)) || (!strncmp("Processor", buffer, 9)) ||
(!strncmp("CPU architecture", buffer, 16)))
if ((!strncmp("model name", buffer, 10)) || (!strncmp("Processor", buffer, 9)))
{
p = strchr(buffer, ':') + 2;
break;
@@ -126,7 +100,7 @@ int detect(void)
if (strstr(p, "AArch64"))
{
return CPU_ARMV8;
return CPU_ARMV8;
}
@@ -144,13 +118,23 @@ char *get_corename(void)
void get_architecture(void)
{
printf("ARM64");
printf("ARM");
}
void get_subarchitecture(void)
{
int d = detect();
printf("%s", cpuname[d]);
switch (d)
{
case CPU_ARMV8:
printf("ARMV8");
break;
default:
printf("UNKNOWN");
break;
}
}
void get_subdirname(void)
@@ -176,34 +160,26 @@ void get_cpuconfig(void)
printf("#define L2_ASSOCIATIVE 4\n");
break;
case CPU_CORTEXA57:
printf("#define CORTEXA57\n");
printf("#define HAVE_VFP\n");
printf("#define HAVE_VFPV3\n");
printf("#define HAVE_NEON\n");
printf("#define HAVE_VFPV4\n");
printf("#define L1_CODE_SIZE 49152\n");
printf("#define L1_CODE_LINESIZE 64\n");
printf("#define L1_CODE_ASSOCIATIVE 3\n");
printf("#define L1_DATA_SIZE 32768\n");
printf("#define L1_DATA_LINESIZE 64\n");
printf("#define L1_DATA_ASSOCIATIVE 2\n");
printf("#define L2_SIZE 2097152\n");
printf("#define L2_LINESIZE 64\n");
printf("#define L2_ASSOCIATIVE 16\n");
printf("#define DTB_DEFAULT_ENTRIES 64\n");
printf("#define DTB_SIZE 4096\n");
break;
}
}
void get_libname(void)
{
int d = detect();
printf("%s", cpuname_lower[d]);
switch (d)
{
case CPU_ARMV8:
printf("armv8\n");
break;
}
}
void get_features(void)
{

View File

@@ -55,7 +55,6 @@
#define CPUTYPE_POWER6 5
#define CPUTYPE_CELL 6
#define CPUTYPE_PPCG4 7
#define CPUTYPE_POWER8 8
char *cpuname[] = {
"UNKNOWN",
@@ -66,7 +65,6 @@ char *cpuname[] = {
"POWER6",
"CELL",
"PPCG4",
"POWER8"
};
char *lowercpuname[] = {
@@ -78,7 +76,6 @@ char *lowercpuname[] = {
"power6",
"cell",
"ppcg4",
"power8"
};
char *corename[] = {
@@ -90,7 +87,6 @@ char *corename[] = {
"POWER6",
"CELL",
"PPCG4",
"POWER8"
};
int detect(void){
@@ -119,7 +115,6 @@ int detect(void){
if (!strncasecmp(p, "POWER5", 6)) return CPUTYPE_POWER5;
if (!strncasecmp(p, "POWER6", 6)) return CPUTYPE_POWER6;
if (!strncasecmp(p, "POWER7", 6)) return CPUTYPE_POWER6;
if (!strncasecmp(p, "POWER8", 6)) return CPUTYPE_POWER8;
if (!strncasecmp(p, "Cell", 4)) return CPUTYPE_CELL;
if (!strncasecmp(p, "7447", 4)) return CPUTYPE_PPCG4;

View File

@@ -40,12 +40,6 @@
#include <string.h>
#include "cpuid.h"
#if defined(_MSC_VER) && !defined(__clang__)
#define C_INLINE __inline
#else
#define C_INLINE inline
#endif
/*
#ifdef NO_AVX
#define CPUTYPE_HASWELL CPUTYPE_NEHALEM
@@ -59,26 +53,12 @@
#endif
*/
#if defined(_MSC_VER) && !defined(__clang__)
void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx)
{
int cpuInfo[4] = {-1};
__cpuid(cpuInfo, op);
*eax = cpuInfo[0];
*ebx = cpuInfo[1];
*ecx = cpuInfo[2];
*edx = cpuInfo[3];
}
#else
#ifndef CPUIDEMU
#if defined(__APPLE__) && defined(__i386__)
void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx);
#else
static C_INLINE void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx){
static inline void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx){
#if defined(__i386__) && defined(__PIC__)
__asm__ __volatile__
("mov %%ebx, %%edi;"
@@ -135,16 +115,14 @@ void cpuid(unsigned int op, unsigned int *eax, unsigned int *ebx, unsigned int *
#endif
#endif // _MSC_VER
static C_INLINE int have_cpuid(void){
static inline int have_cpuid(void){
int eax, ebx, ecx, edx;
cpuid(0, &eax, &ebx, &ecx, &edx);
return eax;
}
static C_INLINE int have_excpuid(void){
static inline int have_excpuid(void){
int eax, ebx, ecx, edx;
cpuid(0x80000000, &eax, &ebx, &ecx, &edx);
@@ -152,14 +130,10 @@ static C_INLINE int have_excpuid(void){
}
#ifndef NO_AVX
static C_INLINE void xgetbv(int op, int * eax, int * edx){
static inline void xgetbv(int op, int * eax, int * edx){
//Use binary code for xgetbv
#if defined(_MSC_VER) && !defined(__clang__)
*eax = __xgetbv(op);
#else
__asm__ __volatile__
(".byte 0x0f, 0x01, 0xd0": "=a" (*eax), "=d" (*edx) : "c" (op) : "cc");
#endif
}
#endif
@@ -1162,19 +1136,6 @@ int get_cpuname(void){
#endif
else
return CPUTYPE_NEHALEM;
case 14:
//Skylake
if(support_avx())
#ifndef NO_AVX2
return CPUTYPE_HASWELL;
#else
return CPUTYPE_SANDYBRIDGE;
#endif
else
return CPUTYPE_NEHALEM;
case 13:
// Avoton
return CPUTYPE_NEHALEM;
}
break;
case 5:
@@ -1186,17 +1147,6 @@ int get_cpuname(void){
return CPUTYPE_HASWELL;
#else
return CPUTYPE_SANDYBRIDGE;
#endif
else
return CPUTYPE_NEHALEM;
case 5:
case 14:
// Skylake
if(support_avx())
#ifndef NO_AVX2
return CPUTYPE_HASWELL;
#else
return CPUTYPE_SANDYBRIDGE;
#endif
else
return CPUTYPE_NEHALEM;
@@ -1232,7 +1182,6 @@ int get_cpuname(void){
case 2:
return CPUTYPE_OPTERON;
case 1:
case 3:
case 10:
return CPUTYPE_BARCELONA;
case 6:
@@ -1243,19 +1192,13 @@ int get_cpuname(void){
return CPUTYPE_BULLDOZER;
else
return CPUTYPE_BARCELONA; //OS don't support AVX.
case 2: //AMD Piledriver
case 3: //AMD Richland
case 2:
if(support_avx())
return CPUTYPE_PILEDRIVER;
else
return CPUTYPE_BARCELONA; //OS don't support AVX.
case 0:
switch(exmodel){
case 1: //AMD Trinity
if(support_avx())
return CPUTYPE_PILEDRIVER;
else
return CPUTYPE_BARCELONA; //OS don't support AVX.
case 3:
if(support_avx())
return CPUTYPE_STEAMROLLER;
@@ -1668,19 +1611,6 @@ int get_coretype(void){
#endif
else
return CORE_NEHALEM;
case 14:
//Skylake
if(support_avx())
#ifndef NO_AVX2
return CORE_HASWELL;
#else
return CORE_SANDYBRIDGE;
#endif
else
return CORE_NEHALEM;
case 13:
// Avoton
return CORE_NEHALEM;
}
break;
case 5:
@@ -1692,17 +1622,6 @@ int get_coretype(void){
return CORE_HASWELL;
#else
return CORE_SANDYBRIDGE;
#endif
else
return CORE_NEHALEM;
case 5:
case 14:
// Skylake
if(support_avx())
#ifndef NO_AVX2
return CORE_HASWELL;
#else
return CORE_SANDYBRIDGE;
#endif
else
return CORE_NEHALEM;
@@ -1731,8 +1650,7 @@ int get_coretype(void){
return CORE_BULLDOZER;
else
return CORE_BARCELONA; //OS don't support AVX.
case 2: //AMD Piledriver
case 3: //AMD Richland
case 2:
if(support_avx())
return CORE_PILEDRIVER;
else
@@ -1740,12 +1658,6 @@ int get_coretype(void){
case 0:
switch(exmodel){
case 1: //AMD Trinity
if(support_avx())
return CORE_PILEDRIVER;
else
return CORE_BARCELONA; //OS don't support AVX.
case 3:
if(support_avx())
return CORE_STEAMROLLER;

View File

@@ -1,46 +0,0 @@
include_directories(${CMAKE_SOURCE_DIR})
enable_language(Fortran)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DADD${BU} -DCBLAS")
FILE(WRITE ${CMAKE_CURRENT_BINARY_DIR}/test_cblas_helper.sh
"$1 < $2\n"
)
foreach(float_type ${FLOAT_TYPES})
string(SUBSTRING ${float_type} 0 1 float_char_upper)
string(TOLOWER ${float_char_upper} float_char)
#level1
add_executable(x${float_char}cblat1
c_${float_char}blat1.f
c_${float_char}blas1.c)
target_link_libraries(x${float_char}cblat1 ${OpenBLAS_LIBNAME}_static)
add_test(NAME "x${float_char}cblat1"
COMMAND "${CMAKE_CURRENT_BINARY_DIR}/x${float_char}cblat1")
#level2
add_executable(x${float_char}cblat2
c_${float_char}blat2.f
c_${float_char}blas2.c
c_${float_char}2chke.c
auxiliary.c
c_xerbla.c
constant.c)
target_link_libraries(x${float_char}cblat2 ${OpenBLAS_LIBNAME}_static)
add_test(NAME "x${float_char}cblat2"
COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_cblas_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/x${float_char}cblat2" "${PROJECT_SOURCE_DIR}/ctest/${float_char}in2")
#level3
add_executable(x${float_char}cblat3
c_${float_char}blat3.f
c_${float_char}blas3.c
c_${float_char}3chke.c
auxiliary.c
c_xerbla.c
constant.c)
target_link_libraries(x${float_char}cblat3 ${OpenBLAS_LIBNAME}_static)
add_test(NAME "x${float_char}cblat3"
COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_cblas_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/x${float_char}cblat3" "${PROJECT_SOURCE_DIR}/ctest/${float_char}in3")
endforeach()

View File

@@ -1365,9 +1365,8 @@
*
150 CONTINUE
WRITE( NOUT, FMT = 9996 )SNAME
IF( TRACE )
$ CALL CPRCN3( NTRA, NC, SNAME, IORDER, SIDE, UPLO, TRANSA, DIAG,
$ M, N, ALPHA, LDA, LDB)
CALL CPRCN3( NTRA, NC, SNAME, IORDER, SIDE, UPLO, TRANSA, DIAG,
$ M, N, ALPHA, LDA, LDB)
*
160 CONTINUE
RETURN

View File

@@ -1365,9 +1365,8 @@
*
150 CONTINUE
WRITE( NOUT, FMT = 9996 )SNAME
IF( TRACE )
$ CALL CPRCN3( NTRA, NC, SNAME, IORDER, SIDE, UPLO, TRANSA, DIAG,
$ M, N, ALPHA, LDA, LDB)
CALL CPRCN3( NTRA, NC, SNAME, IORDER, SIDE, UPLO, TRANSA, DIAG,
$ M, N, ALPHA, LDA, LDB)
*
160 CONTINUE
RETURN

View File

@@ -1335,9 +1335,8 @@
*
150 CONTINUE
WRITE( NOUT, FMT = 9996 )SNAME
IF( TRACE )
$ CALL DPRCN3( NTRA, NC, SNAME, IORDER, SIDE, UPLO, TRANSA, DIAG,
$ M, N, ALPHA, LDA, LDB)
CALL DPRCN3( NTRA, NC, SNAME, IORDER, SIDE, UPLO, TRANSA, DIAG,
$ M, N, ALPHA, LDA, LDB)
*
160 CONTINUE
RETURN

View File

@@ -1339,9 +1339,8 @@
*
150 CONTINUE
WRITE( NOUT, FMT = 9996 )SNAME
IF( TRACE )
$ CALL SPRCN3( NTRA, NC, SNAME, IORDER, SIDE, UPLO, TRANSA, DIAG,
$ M, N, ALPHA, LDA, LDB)
CALL SPRCN3( NTRA, NC, SNAME, IORDER, SIDE, UPLO, TRANSA, DIAG,
$ M, N, ALPHA, LDA, LDB)
*
160 CONTINUE
RETURN

View File

@@ -1350,7 +1350,7 @@
*
* Call the subroutine.
*
IF( SNAME( 10: 11 ).EQ.'mv' )THEN
IF( SNAME( 4: 5 ).EQ.'mv' )THEN
IF( FULL )THEN
IF( TRACE )
$ WRITE( NTRA, FMT = 9993 )NC, SNAME,
@@ -1376,7 +1376,7 @@
CALL CZTPMV( IORDER, UPLO, TRANS, DIAG,
$ N, AA, XX, INCX )
END IF
ELSE IF( SNAME( 10: 11 ).EQ.'sv' )THEN
ELSE IF( SNAME( 4: 5 ).EQ.'sv' )THEN
IF( FULL )THEN
IF( TRACE )
$ WRITE( NTRA, FMT = 9993 )NC, SNAME,
@@ -1465,7 +1465,7 @@
END IF
*
IF( .NOT.NULL )THEN
IF( SNAME( 10: 11 ).EQ.'mv' )THEN
IF( SNAME( 4: 5 ).EQ.'mv' )THEN
*
* Check the result.
*
@@ -1473,7 +1473,7 @@
$ INCX, ZERO, Z, INCX, XT, G,
$ XX, EPS, ERR, FATAL, NOUT,
$ .TRUE. )
ELSE IF( SNAME( 10: 11 ).EQ.'sv' )THEN
ELSE IF( SNAME( 4: 5 ).EQ.'sv' )THEN
*
* Compute approximation to original vector.
*
@@ -1611,7 +1611,7 @@
* .. Common blocks ..
COMMON /INFOC/INFOT, NOUTC, OK
* .. Executable Statements ..
CONJ = SNAME( 11: 11 ).EQ.'c'
CONJ = SNAME( 5: 5 ).EQ.'c'
* Define the number of arguments.
NARGS = 9
*

View File

@@ -1366,9 +1366,8 @@
*
150 CONTINUE
WRITE( NOUT, FMT = 9996 )SNAME
IF( TRACE )
$ CALL ZPRCN3( NTRA, NC, SNAME, IORDER, SIDE, UPLO, TRANSA, DIAG,
$ M, N, ALPHA, LDA, LDB)
CALL ZPRCN3( NTRA, NC, SNAME, IORDER, SIDE, UPLO, TRANSA, DIAG,
$ M, N, ALPHA, LDA, LDB)
*
160 CONTINUE
RETURN

View File

@@ -1366,9 +1366,8 @@
*
150 CONTINUE
WRITE( NOUT, FMT = 9996 )SNAME
IF( TRACE )
$ CALL ZPRCN3( NTRA, NC, SNAME, IORDER, SIDE, UPLO, TRANSA, DIAG,
$ M, N, ALPHA, LDA, LDB)
CALL ZPRCN3( NTRA, NC, SNAME, IORDER, SIDE, UPLO, TRANSA, DIAG,
$ M, N, ALPHA, LDA, LDB)
*
160 CONTINUE
RETURN

View File

@@ -1,7 +1,7 @@
'CBLAT2.SNAP' NAME OF SNAPSHOT OUTPUT FILE
-1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0)
F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD.
T LOGICAL FLAG, T TO STOP ON FAILURES.
F LOGICAL FLAG, T TO STOP ON FAILURES.
T LOGICAL FLAG, T TO TEST ERROR EXITS.
2 LOGICAL FLAG, T TO TEST ROW-MAJOR (IF FALSE COLUMN-MAJOR IS TESTED)
16.0 THRESHOLD VALUE OF TEST RATIO

View File

@@ -1,7 +1,7 @@
'CBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE
-1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0)
F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD.
T LOGICAL FLAG, T TO STOP ON FAILURES.
F LOGICAL FLAG, T TO STOP ON FAILURES.
T LOGICAL FLAG, T TO TEST ERROR EXITS.
2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH
16.0 THRESHOLD VALUE OF TEST RATIO

View File

@@ -1,7 +1,7 @@
'CBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE
-1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0)
F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD.
T LOGICAL FLAG, T TO STOP ON FAILURES.
F LOGICAL FLAG, T TO STOP ON FAILURES.
T LOGICAL FLAG, T TO TEST ERROR EXITS.
2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH
16.0 THRESHOLD VALUE OF TEST RATIO

View File

@@ -1,7 +1,7 @@
'DBLAT2.SNAP' NAME OF SNAPSHOT OUTPUT FILE
-1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0)
F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD.
T LOGICAL FLAG, T TO STOP ON FAILURES.
F LOGICAL FLAG, T TO STOP ON FAILURES.
T LOGICAL FLAG, T TO TEST ERROR EXITS.
2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH
16.0 THRESHOLD VALUE OF TEST RATIO

View File

@@ -1,7 +1,7 @@
'DBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE
-1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0)
F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD.
T LOGICAL FLAG, T TO STOP ON FAILURES.
F LOGICAL FLAG, T TO STOP ON FAILURES.
T LOGICAL FLAG, T TO TEST ERROR EXITS.
2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH
16.0 THRESHOLD VALUE OF TEST RATIO

View File

@@ -1,7 +1,7 @@
'SBLAT2.SNAP' NAME OF SNAPSHOT OUTPUT FILE
-1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0)
F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD.
T LOGICAL FLAG, T TO STOP ON FAILURES.
F LOGICAL FLAG, T TO STOP ON FAILURES.
T LOGICAL FLAG, T TO TEST ERROR EXITS.
2 LOGICAL FLAG, T TO TEST ROW-MAJOR (IF FALSE COLUMN-MAJOR IS TESTED)
16.0 THRESHOLD VALUE OF TEST RATIO

View File

@@ -1,7 +1,7 @@
'SBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE
-1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0)
F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD.
T LOGICAL FLAG, T TO STOP ON FAILURES.
F LOGICAL FLAG, T TO STOP ON FAILURES.
T LOGICAL FLAG, T TO TEST ERROR EXITS.
2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH
16.0 THRESHOLD VALUE OF TEST RATIO

View File

@@ -1,7 +1,7 @@
'ZBLAT2.SNAP' NAME OF SNAPSHOT OUTPUT FILE
-1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0)
F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD.
T LOGICAL FLAG, T TO STOP ON FAILURES.
F LOGICAL FLAG, T TO STOP ON FAILURES.
T LOGICAL FLAG, T TO TEST ERROR EXITS.
2 LOGICAL FLAG, T TO TEST ROW-MAJOR (IF FALSE COLUMN-MAJOR IS TESTED)
16.0 THRESHOLD VALUE OF TEST RATIO

View File

@@ -1,7 +1,7 @@
'ZBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE
-1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0)
F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD.
T LOGICAL FLAG, T TO STOP ON FAILURES.
F LOGICAL FLAG, T TO STOP ON FAILURES.
T LOGICAL FLAG, T TO TEST ERROR EXITS.
2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH
16.0 THRESHOLD VALUE OF TEST RATIO

View File

@@ -1,7 +1,7 @@
'ZBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE
-1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0)
F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD.
T LOGICAL FLAG, T TO STOP ON FAILURES.
F LOGICAL FLAG, T TO STOP ON FAILURES.
T LOGICAL FLAG, T TO TEST ERROR EXITS.
2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH
16.0 THRESHOLD VALUE OF TEST RATIO

View File

@@ -1,203 +0,0 @@
include_directories(${CMAKE_SOURCE_DIR})
# sources that need to be compiled twice, once with no flags and once with LOWER
set(UL_SOURCES
sbmv_k.c
spmv_k.c
spr_k.c
spr2_k.c
syr_k.c
syr2_k.c
)
# sources that need to be compiled several times, for UNIT, TRANSA
set(U_SOURCES
trmv_U.c
tbmv_U.c
tbsv_U.c
tpmv_U.c
tpsv_U.c
trsv_U.c
)
set(L_SOURCES
trmv_L.c
tbmv_L.c
tbsv_L.c
tpmv_L.c
tpsv_L.c
trsv_L.c
)
set(UL_SMP_SOURCES
symv_thread.c
syr_thread.c
syr2_thread.c
spr_thread.c
spr2_thread.c
spmv_thread.c
sbmv_thread.c
)
set(NU_SMP_SOURCES
trmv_thread.c
tpmv_thread.c
tbmv_thread.c
)
set(ULVM_COMPLEX_SOURCES
hbmv_k.c
hpmv_k.c
hpr_k.c
hpr2_k.c
her_k.c
her2_k.c
)
# objects that need LOWER set
GenerateCombinationObjects("${UL_SOURCES}" "LOWER" "U" "" 1 "" "" 3)
# gbmv uses a lowercase n and t
GenerateNamedObjects("gbmv_k.c" "" "gbmv_n" false "" "" "" 3)
GenerateNamedObjects("gbmv_k.c" "TRANS" "gbmv_t" false "" "" "" 3)
# c/zgbmv
GenerateNamedObjects("zgbmv_k.c" "CONJ" "gbmv_r" false "" "" "" 2)
GenerateNamedObjects("zgbmv_k.c" "TRANS;CONJ" "gbmv_c" false "" "" "" 2)
GenerateNamedObjects("zgbmv_k.c" "XCONJ" "gbmv_o" false "" "" "" 2)
GenerateNamedObjects("zgbmv_k.c" "TRANS;XCONJ" "gbmv_u" false "" "" "" 2)
GenerateNamedObjects("zgbmv_k.c" "CONJ;XCONJ" "gbmv_s" false "" "" "" 2)
GenerateNamedObjects("zgbmv_k.c" "TRANS;CONJ;XCONJ" "gbmv_d" false "" "" "" 2)
# special defines for complex
foreach (float_type ${FLOAT_TYPES})
if (SMP)
GenerateNamedObjects("gemv_thread.c" "" "gemv_thread_n" false "" "" false ${float_type})
GenerateNamedObjects("gemv_thread.c" "TRANSA" "gemv_thread_t" false "" "" false ${float_type})
GenerateNamedObjects("gbmv_thread.c" "" "gbmv_thread_n" false "" "" false ${float_type})
GenerateNamedObjects("gbmv_thread.c" "TRANSA" "gbmv_thread_t" false "" "" false ${float_type})
endif ()
if (${float_type} STREQUAL "COMPLEX" OR ${float_type} STREQUAL "ZCOMPLEX")
foreach (u_source ${U_SOURCES})
string(REGEX MATCH "[a-z]+" op_name ${u_source})
GenerateCombinationObjects("z${u_source}" "UNIT" "N" "TRANSA=1" 0 "${op_name}_NU" false ${float_type})
GenerateCombinationObjects("z${u_source}" "UNIT" "N" "TRANSA=2" 0 "${op_name}_TL" false ${float_type})
GenerateCombinationObjects("z${u_source}" "UNIT" "N" "TRANSA=3" 0 "${op_name}_RU" false ${float_type})
GenerateCombinationObjects("z${u_source}" "UNIT" "N" "TRANSA=4" 0 "${op_name}_CL" false ${float_type})
endforeach ()
foreach (l_source ${L_SOURCES})
string(REGEX MATCH "[a-z]+" op_name ${l_source})
GenerateCombinationObjects("z${l_source}" "UNIT" "N" "TRANSA=1" 0 "${op_name}_NL" false ${float_type})
GenerateCombinationObjects("z${l_source}" "UNIT" "N" "TRANSA=2" 0 "${op_name}_TU" false ${float_type})
GenerateCombinationObjects("z${l_source}" "UNIT" "N" "TRANSA=3" 0 "${op_name}_RL" false ${float_type})
GenerateCombinationObjects("z${l_source}" "UNIT" "N" "TRANSA=4" 0 "${op_name}_CU" false ${float_type})
endforeach ()
foreach (ulvm_source ${ULVM_COMPLEX_SOURCES})
string(REGEX MATCH "[a-z0-9]+" op_name ${ulvm_source})
GenerateNamedObjects("z${ulvm_source}" "" "${op_name}_U" false "" "" false ${float_type})
GenerateNamedObjects("z${ulvm_source}" "LOWER" "${op_name}_L" false "" "" false ${float_type})
GenerateNamedObjects("z${ulvm_source}" "HEMVREV" "${op_name}_V" false "" "" false ${float_type})
GenerateNamedObjects("z${ulvm_source}" "LOWER;HEMVREV" "${op_name}_M" false "" "" false ${float_type})
endforeach()
if (SMP)
GenerateNamedObjects("gemv_thread.c" "CONJ" "gemv_thread_r" false "" "" false ${float_type})
GenerateNamedObjects("gemv_thread.c" "CONJ;TRANSA" "gemv_thread_c" false "" "" false ${float_type})
GenerateNamedObjects("gemv_thread.c" "XCONJ" "gemv_thread_o" false "" "" false ${float_type})
GenerateNamedObjects("gemv_thread.c" "XCONJ;TRANSA" "gemv_thread_u" false "" "" false ${float_type})
GenerateNamedObjects("gemv_thread.c" "XCONJ;CONJ" "gemv_thread_s" false "" "" false ${float_type})
GenerateNamedObjects("gemv_thread.c" "XCONJ;CONJ;TRANSA" "gemv_thread_d" false "" "" false ${float_type})
GenerateNamedObjects("gbmv_thread.c" "CONJ" "gbmv_thread_r" false "" "" false ${float_type})
GenerateNamedObjects("gbmv_thread.c" "CONJ;TRANSA" "gbmv_thread_c" false "" "" false ${float_type})
GenerateNamedObjects("gbmv_thread.c" "XCONJ" "gbmv_thread_o" false "" "" false ${float_type})
GenerateNamedObjects("gbmv_thread.c" "XCONJ;TRANSA" "gbmv_thread_u" false "" "" false ${float_type})
GenerateNamedObjects("gbmv_thread.c" "XCONJ;CONJ" "gbmv_thread_s" false "" "" false ${float_type})
GenerateNamedObjects("gbmv_thread.c" "XCONJ;CONJ;TRANSA" "gbmv_thread_d" false "" "" false ${float_type})
GenerateNamedObjects("ger_thread.c" "" "ger_thread_U" false "" "" false ${float_type})
GenerateNamedObjects("ger_thread.c" "CONJ" "ger_thread_C" false "" "" false ${float_type})
GenerateNamedObjects("ger_thread.c" "XCONJ" "ger_thread_V" false "" "" false ${float_type})
GenerateNamedObjects("ger_thread.c" "XCONJ;CONJ" "ger_thread_D" false "" "" false ${float_type})
GenerateNamedObjects("sbmv_thread.c" "HEMV" "hbmv_thread_U" false "" "" false ${float_type})
GenerateNamedObjects("sbmv_thread.c" "HEMV;LOWER" "hbmv_thread_L" false "" "" false ${float_type})
GenerateNamedObjects("sbmv_thread.c" "HEMVREV" "hbmv_thread_V" false "" "" false ${float_type})
GenerateNamedObjects("sbmv_thread.c" "LOWER;HEMVREV" "hbmv_thread_M" false "" "" false ${float_type})
GenerateNamedObjects("spmv_thread.c" "HEMV" "hpmv_thread_U" false "" "" false ${float_type})
GenerateNamedObjects("spmv_thread.c" "HEMV;LOWER" "hpmv_thread_L" false "" "" false ${float_type})
GenerateNamedObjects("spmv_thread.c" "HEMVREV" "hpmv_thread_V" false "" "" false ${float_type})
GenerateNamedObjects("spmv_thread.c" "LOWER;HEMVREV" "hpmv_thread_M" false "" "" false ${float_type})
GenerateNamedObjects("spr_thread.c" "HEMV" "hpr_thread_U" false "" "" false ${float_type})
GenerateNamedObjects("spr_thread.c" "HEMV;LOWER" "hpr_thread_L" false "" "" false ${float_type})
GenerateNamedObjects("spr_thread.c" "HEMVREV" "hpr_thread_V" false "" "" false ${float_type})
GenerateNamedObjects("spr_thread.c" "LOWER;HEMVREV" "hpr_thread_M" false "" "" false ${float_type})
GenerateNamedObjects("spr2_thread.c" "HEMV" "hpr2_thread_U" false "" "" false ${float_type})
GenerateNamedObjects("spr2_thread.c" "HEMV;LOWER" "hpr2_thread_L" false "" "" false ${float_type})
GenerateNamedObjects("spr2_thread.c" "HEMVREV" "hpr2_thread_V" false "" "" false ${float_type})
GenerateNamedObjects("spr2_thread.c" "LOWER;HEMVREV" "hpr2_thread_M" false "" "" false ${float_type})
GenerateNamedObjects("symv_thread.c" "HEMV" "hemv_thread_U" false "" "" false ${float_type})
GenerateNamedObjects("symv_thread.c" "HEMV;LOWER" "hemv_thread_L" false "" "" false ${float_type})
GenerateNamedObjects("symv_thread.c" "HEMVREV" "hemv_thread_V" false "" "" false ${float_type})
GenerateNamedObjects("symv_thread.c" "LOWER;HEMVREV" "hemv_thread_M" false "" "" false ${float_type})
GenerateNamedObjects("syr_thread.c" "HER" "her_thread_U" false "" "" false ${float_type})
GenerateNamedObjects("syr_thread.c" "HER;LOWER" "her_thread_L" false "" "" false ${float_type})
GenerateNamedObjects("syr_thread.c" "HERREV" "her_thread_V" false "" "" false ${float_type})
GenerateNamedObjects("syr_thread.c" "LOWER;HERREV" "her_thread_M" false "" "" false ${float_type})
GenerateNamedObjects("syr2_thread.c" "HER" "her2_thread_U" false "" "" false ${float_type})
GenerateNamedObjects("syr2_thread.c" "HER;LOWER" "her2_thread_L" false "" "" false ${float_type})
GenerateNamedObjects("syr2_thread.c" "HERREV" "her2_thread_V" false "" "" false ${float_type})
GenerateNamedObjects("syr2_thread.c" "LOWER;HERREV" "her2_thread_M" false "" "" false ${float_type})
foreach (nu_smp_src ${NU_SMP_SOURCES})
string(REGEX MATCH "[a-z]+_[a-z]+" op_name ${nu_smp_src})
GenerateCombinationObjects("${nu_smp_src}" "LOWER;UNIT" "U;N" "TRANSA=1" 0 "${op_name}_N" false ${float_type})
GenerateCombinationObjects("${nu_smp_src}" "LOWER;UNIT" "U;N" "TRANSA=2" 0 "${op_name}_T" false ${float_type})
GenerateCombinationObjects("${nu_smp_src}" "LOWER;UNIT" "U;N" "TRANSA=3" 0 "${op_name}_R" false ${float_type})
GenerateCombinationObjects("${nu_smp_src}" "LOWER;UNIT" "U;N" "TRANSA=4" 0 "${op_name}_C" false ${float_type})
endforeach ()
endif ()
else ()
# For real number functions
foreach (u_source ${U_SOURCES})
string(REGEX MATCH "[a-z]+" op_name ${u_source})
GenerateCombinationObjects("${u_source}" "UNIT" "N" "" 0 "${op_name}_NU" false ${float_type})
GenerateCombinationObjects("${u_source}" "UNIT" "N" "TRANSA" 0 "${op_name}_TL" false ${float_type})
endforeach ()
foreach (l_source ${L_SOURCES})
string(REGEX MATCH "[a-z]+" op_name ${l_source})
GenerateCombinationObjects("${l_source}" "UNIT" "N" "" 0 "${op_name}_NL" false ${float_type})
GenerateCombinationObjects("${l_source}" "UNIT" "N" "TRANSA" 0 "${op_name}_TU" false ${float_type})
endforeach ()
if (SMP)
GenerateNamedObjects("ger_thread.c" "" "" false "" "" false ${float_type})
foreach(nu_smp_source ${NU_SMP_SOURCES})
string(REGEX MATCH "[a-z]+_[a-z]+" op_name ${nu_smp_source})
GenerateCombinationObjects("${nu_smp_source}" "LOWER;UNIT" "U;N" "" 0 "${op_name}_N" false ${float_type})
GenerateCombinationObjects("${nu_smp_source}" "LOWER;UNIT" "U;N" "TRANSA" 0 "${op_name}_T" false ${float_type})
endforeach()
endif ()
endif ()
endforeach ()
if (SMP)
GenerateCombinationObjects("${UL_SMP_SOURCES}" "LOWER" "U" "" 2)
endif ()
add_library(driver_level2 OBJECT ${OPENBLAS_SRC})

View File

@@ -64,7 +64,7 @@ static int gbmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F
#ifndef COMPLEX
FLOAT result;
#else
OPENBLAS_COMPLEX_FLOAT result;
FLOAT _Complex result;
#endif
#endif

View File

@@ -60,7 +60,7 @@ static int sbmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F
#ifndef COMPLEX
FLOAT result;
#else
OPENBLAS_COMPLEX_FLOAT result;
FLOAT _Complex result;
#endif
a = (FLOAT *)args -> a;

View File

@@ -55,12 +55,12 @@
static int spmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *dummy1, FLOAT *buffer, BLASLONG pos){
FLOAT *a, *x, *y;
BLASLONG incx;
BLASLONG incx, incy;
BLASLONG m_from, m_to, i;
#ifndef COMPLEX
FLOAT result;
#else
OPENBLAS_COMPLEX_FLOAT result;
FLOAT _Complex result;
#endif
a = (FLOAT *)args -> a;
@@ -68,6 +68,7 @@ static int spmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F
y = (FLOAT *)args -> c;
incx = args -> ldb;
incy = args -> ldc;
m_from = 0;
m_to = args -> m;

View File

@@ -43,7 +43,7 @@
static int syr_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *dummy1, FLOAT *buffer, BLASLONG pos){
FLOAT *a, *x, *y;
BLASLONG incx, incy;
BLASLONG lda, incx, incy;
BLASLONG i, m_from, m_to;
FLOAT alpha_r;
#ifdef COMPLEX
@@ -56,6 +56,7 @@ static int syr_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FL
incx = args -> lda;
incy = args -> ldb;
lda = args -> ldc;
alpha_r = *((FLOAT *)args -> alpha + 0);
#ifdef COMPLEX

View File

@@ -46,7 +46,7 @@ static int syr_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FL
BLASLONG incx;
BLASLONG i, m_from, m_to;
FLOAT alpha_r;
#if defined(COMPLEX) && !defined(HEMV) && !defined(HEMVREV)
#if defined(COMPLEX) && !defined(HER) && !defined(HERREV)
FLOAT alpha_i;
#endif
@@ -56,7 +56,7 @@ static int syr_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FL
incx = args -> lda;
alpha_r = *((FLOAT *)args -> alpha + 0);
#if defined(COMPLEX) && !defined(HEMV) && !defined(HEMVREV)
#if defined(COMPLEX) && !defined(HER) && !defined(HERREV)
alpha_i = *((FLOAT *)args -> alpha + 1);
#endif

View File

@@ -55,7 +55,7 @@
static int symv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *dummy1, FLOAT *buffer, BLASLONG pos){
FLOAT *a, *x, *y;
BLASLONG lda, incx;
BLASLONG lda, incx, incy;
BLASLONG m_from, m_to;
a = (FLOAT *)args -> a;
@@ -64,6 +64,7 @@ static int symv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F
lda = args -> lda;
incx = args -> ldb;
incy = args -> ldc;
m_from = 0;
m_to = args -> m;

View File

@@ -45,11 +45,13 @@ const static FLOAT dp1 = 1.;
int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buffer){
BLASLONG i;
FLOAT *gemvbuffer = (FLOAT *)buffer;
FLOAT *B = b;
BLASLONG length;
if (incb != 1) {
B = buffer;
gemvbuffer = (FLOAT *)(((BLASLONG)buffer + n * sizeof(FLOAT) + 4095) & ~4095);
COPY_K(n, b, incb, buffer, 1);
}

View File

@@ -45,11 +45,13 @@ const static FLOAT dp1 = 1.;
int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buffer){
BLASLONG i;
FLOAT *gemvbuffer = (FLOAT *)buffer;
FLOAT *B = b;
BLASLONG length;
if (incb != 1) {
B = buffer;
gemvbuffer = (FLOAT *)(((BLASLONG)buffer + n * sizeof(FLOAT) + 4095) & ~4095);
COPY_K(n, b, incb, buffer, 1);
}

View File

@@ -76,7 +76,7 @@ static int trmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F
#ifndef COMPLEX
FLOAT result;
#else
OPENBLAS_COMPLEX_FLOAT result;
FLOAT _Complex result;
#endif
#endif

View File

@@ -45,11 +45,13 @@ const static FLOAT dp1 = 1.;
int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buffer){
BLASLONG i;
FLOAT *gemvbuffer = (FLOAT *)buffer;
FLOAT *B = b;
BLASLONG length;
if (incb != 1) {
B = buffer;
gemvbuffer = (FLOAT *)(((BLASLONG)buffer + n * sizeof(FLOAT) + 4095) & ~4095);
COPY_K(n, b, incb, buffer, 1);
}

View File

@@ -45,11 +45,13 @@ const static FLOAT dp1 = 1.;
int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buffer){
BLASLONG i;
FLOAT *gemvbuffer = (FLOAT *)buffer;
FLOAT *B = b;
BLASLONG length;
if (incb != 1) {
B = buffer;
gemvbuffer = (FLOAT *)(((BLASLONG)buffer + n * sizeof(FLOAT) + 4095) & ~4095);
COPY_K(n, b, incb, buffer, 1);
}

View File

@@ -81,7 +81,7 @@ static int tpmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F
#ifndef COMPLEX
FLOAT result;
#else
OPENBLAS_COMPLEX_FLOAT result;
FLOAT _Complex result;
#endif
#endif

View File

@@ -43,10 +43,12 @@
int CNAME(BLASLONG m, FLOAT *a, FLOAT *b, BLASLONG incb, void *buffer){
BLASLONG i;
FLOAT *gemvbuffer = (FLOAT *)buffer;
FLOAT *B = b;
if (incb != 1) {
B = buffer;
gemvbuffer = (FLOAT *)(((BLASLONG)buffer + m * sizeof(FLOAT) + 4095) & ~4095);
COPY_K(m, b, incb, buffer, 1);
}

View File

@@ -43,10 +43,12 @@
int CNAME(BLASLONG m, FLOAT *a, FLOAT *b, BLASLONG incb, void *buffer){
BLASLONG i;
FLOAT *gemvbuffer = (FLOAT *)buffer;
FLOAT *B = b;
if (incb != 1) {
B = buffer;
gemvbuffer = (FLOAT *)(((BLASLONG)buffer + m * sizeof(FLOAT) + 4095) & ~4095);
COPY_K(m, b, incb, buffer, 1);
}

View File

@@ -87,7 +87,7 @@ static int trmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F
#ifndef COMPLEX
FLOAT result;
#else
OPENBLAS_COMPLEX_FLOAT result;
FLOAT _Complex result;
#endif
#endif
@@ -119,7 +119,7 @@ static int trmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F
#endif
x = buffer;
buffer += ((COMPSIZE * args -> m + 3) & ~3);
buffer += ((COMPSIZE * args -> m + 1023) & ~1023);
}
#ifndef TRANS
@@ -403,7 +403,7 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG incx, FLOAT *bu
if (num_cpu) {
queue[0].sa = NULL;
queue[0].sb = buffer + num_cpu * (((m + 3) & ~3) + 16) * COMPSIZE;
queue[0].sb = buffer + num_cpu * (((m + 255) & ~255) + 16) * COMPSIZE;
queue[num_cpu - 1].next = NULL;

View File

@@ -77,7 +77,7 @@ void CNAME(BLASLONG m, BLASLONG n, BLASLONG ku, BLASLONG kl, FLOAT alpha_r, FLOA
FLOAT *bufferY = gemvbuffer;
FLOAT *bufferX = gemvbuffer;
#ifdef TRANS
OPENBLAS_COMPLEX_FLOAT temp;
FLOAT _Complex temp;
#endif
if (incy != 1) {

View File

@@ -56,8 +56,6 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i,
FLOAT *bufferX = sbmvbuffer;
FLOAT temp[2];
OPENBLAS_COMPLEX_FLOAT result;
if (incy != 1) {
Y = bufferY;
bufferX = (FLOAT *)(((BLASLONG)bufferY + n * sizeof(FLOAT) * COMPSIZE + 4095) & ~4095);
@@ -95,7 +93,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i,
Y[i * 2 + 1] += alpha_r * temp[1] + alpha_i * temp[0];
if (length > 0) {
result = DOTC_K(length, a + offset * COMPSIZE, 1, X + (i - length) * COMPSIZE, 1);
FLOAT _Complex result = DOTC_K(length, a + offset * COMPSIZE, 1, X + (i - length) * COMPSIZE, 1);
Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result);
Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result);
@@ -120,7 +118,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i,
Y[i * 2 + 1] += alpha_r * temp[1] + alpha_i * temp[0];
if (length > 0) {
result = DOTC_K(length, a + COMPSIZE, 1, X + (i + 1) * COMPSIZE, 1);
FLOAT _Complex result = DOTC_K(length, a + COMPSIZE, 1, X + (i + 1) * COMPSIZE, 1);
Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result);
Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result);
@@ -145,7 +143,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i,
Y[i * 2 + 1] += alpha_r * temp[1] + alpha_i * temp[0];
if (length > 0) {
result = DOTU_K(length, a + offset * COMPSIZE, 1, X + (i - length) * COMPSIZE, 1);
FLOAT _Complex result = DOTU_K(length, a + offset * COMPSIZE, 1, X + (i - length) * COMPSIZE, 1);
Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result);
Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result);
@@ -170,7 +168,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i,
Y[i * 2 + 1] += alpha_r * temp[1] + alpha_i * temp[0];
if (length > 0) {
result = DOTU_K(length, a + COMPSIZE, 1, X + (i + 1) * COMPSIZE, 1);
FLOAT _Complex result = DOTU_K(length, a + COMPSIZE, 1, X + (i + 1) * COMPSIZE, 1);
Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result);
Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result);

View File

@@ -51,8 +51,6 @@ int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT alpha_i,
FLOAT *bufferX = gemvbuffer;
FLOAT temp[2];
OPENBLAS_COMPLEX_FLOAT result;
if (incy != 1) {
Y = bufferY;
bufferX = (FLOAT *)(((BLASLONG)bufferY + m * sizeof(FLOAT) * 2 + 4095) & ~4095);
@@ -71,7 +69,7 @@ int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT alpha_i,
#ifndef HEMVREV
#ifndef LOWER
if (i > 0) {
result = DOTC_K(i, a, 1, X, 1);
FLOAT _Complex result = DOTC_K(i, a, 1, X, 1);
Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result);
Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result);
@@ -95,7 +93,7 @@ int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT alpha_i,
#else
if (m - i > 1) {
result = DOTC_K(m - i - 1, a + (i + 1) * 2, 1, X + (i + 1) * 2, 1);
FLOAT _Complex result = DOTC_K(m - i - 1, a + (i + 1) * 2, 1, X + (i + 1) * 2, 1);
Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result);
Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result);
@@ -120,7 +118,7 @@ int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT alpha_i,
#else
#ifndef LOWER
if (i > 0) {
result = DOTU_K(i, a, 1, X, 1);
FLOAT _Complex result = DOTU_K(i, a, 1, X, 1);
Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result);
Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result);
@@ -144,7 +142,7 @@ int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT alpha_i,
#else
if (m - i > 1) {
result = DOTU_K(m - i - 1, a + (i + 1) * 2, 1, X + (i + 1) * 2, 1);
FLOAT _Complex result = DOTU_K(m - i - 1, a + (i + 1) * 2, 1, X + (i + 1) * 2, 1);
Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result);
Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result);

Some files were not shown because too many files have changed in this diff Show More