Merge branch 'develop' into arm_soft_fp_abi

This commit is contained in:
Zhang Xianyi 2015-10-28 12:12:31 +00:00
commit ccf41ebf78
95 changed files with 6318 additions and 218 deletions

2
.gitignore vendored
View File

@ -66,3 +66,5 @@ test/sblat3
test/zblat1 test/zblat1
test/zblat2 test/zblat2
test/zblat3 test/zblat3
build
build.*

190
CMakeLists.txt Normal file
View File

@ -0,0 +1,190 @@
##
## Author: Hank Anderson <hank@statease.com>
##
cmake_minimum_required(VERSION 2.8.4)
project(OpenBLAS)
set(OpenBLAS_MAJOR_VERSION 0)
set(OpenBLAS_MINOR_VERSION 2)
set(OpenBLAS_PATCH_VERSION 14)
set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}")
enable_language(ASM)
enable_language(C)
if(MSVC)
set(OpenBLAS_LIBNAME libopenblas)
else()
set(OpenBLAS_LIBNAME openblas)
endif()
#######
if(MSVC)
option(BUILD_WITHOUT_LAPACK "Without LAPACK and LAPACKE (Only BLAS or CBLAS)" ON)
endif()
option(BUILD_WITHOUT_CBLAS "Without CBLAS" OFF)
option(BUILD_DEBUG "Build Debug Version" OFF)
#######
if(BUILD_WITHOUT_LAPACK)
set(NO_LAPACK 1)
set(NO_LAPACKE 1)
endif()
if(BUILD_DEBUG)
set(CMAKE_BUILD_TYPE Debug)
else()
set(CMAKE_BUILD_TYPE Release)
endif()
if(BUILD_WITHOUT_CBLAS)
set(NO_CBLAS 1)
endif()
#######
message(WARNING "CMake support is experimental. This will not produce the same Makefiles that OpenBLAS ships with. Only x86 support is currently available.")
include("${CMAKE_SOURCE_DIR}/cmake/utils.cmake")
include("${CMAKE_SOURCE_DIR}/cmake/system.cmake")
set(BLASDIRS interface driver/level2 driver/level3 driver/others)
if (NOT DYNAMIC_ARCH)
list(APPEND BLASDIRS kernel)
endif ()
if (DEFINED UTEST_CHECK)
set(SANITY_CHECK 1)
endif ()
if (DEFINED SANITY_CHECK)
list(APPEND BLASDIRS reference)
endif ()
set(SUBDIRS ${BLASDIRS})
if (NOT NO_LAPACK)
list(APPEND SUBDIRS lapack)
endif ()
# set which float types we want to build for
if (NOT DEFINED BUILD_SINGLE AND NOT DEFINED BUILD_DOUBLE AND NOT DEFINED BUILD_COMPLEX AND NOT DEFINED BUILD_COMPLEX16)
# if none are defined, build for all
set(BUILD_SINGLE true)
set(BUILD_DOUBLE true)
set(BUILD_COMPLEX true)
set(BUILD_COMPLEX16 true)
endif ()
set(FLOAT_TYPES "")
if (BUILD_SINGLE)
message(STATUS "Building Single Precision")
list(APPEND FLOAT_TYPES "SINGLE") # defines nothing
endif ()
if (BUILD_DOUBLE)
message(STATUS "Building Double Precision")
list(APPEND FLOAT_TYPES "DOUBLE") # defines DOUBLE
endif ()
if (BUILD_COMPLEX)
message(STATUS "Building Complex Precision")
list(APPEND FLOAT_TYPES "COMPLEX") # defines COMPLEX
endif ()
if (BUILD_COMPLEX16)
message(STATUS "Building Double Complex Precision")
list(APPEND FLOAT_TYPES "ZCOMPLEX") # defines COMPLEX and DOUBLE
endif ()
set(SUBDIRS_ALL ${SUBDIRS} test ctest utest exports benchmark ../laswp ../bench)
# all :: libs netlib tests shared
# libs :
if (NOT DEFINED CORE OR "${CORE}" STREQUAL "UNKNOWN")
message(FATAL_ERROR "Detecting CPU failed. Please set TARGET explicitly, e.g. make TARGET=your_cpu_target. Please read README for details.")
endif ()
if (${NO_STATIC} AND ${NO_SHARED})
message(FATAL_ERROR "Neither static nor shared are enabled.")
endif ()
# get obj vars into format that add_library likes: $<TARGET_OBJS:objlib> (see http://www.cmake.org/cmake/help/v3.0/command/add_library.html)
set(TARGET_OBJS "")
foreach (SUBDIR ${SUBDIRS})
add_subdirectory(${SUBDIR})
string(REPLACE "/" "_" subdir_obj ${SUBDIR})
list(APPEND TARGET_OBJS "$<TARGET_OBJECTS:${subdir_obj}>")
endforeach ()
# netlib:
# Can't just use lapack-netlib's CMake files, since they are set up to search for BLAS, build and install a binary. We just want to build a couple of lib files out of lapack and lapacke.
# Not using add_subdirectory here because lapack-netlib already has its own CMakeLists.txt. Instead include a cmake script with the sources we want.
if (NOT NOFORTRAN AND NOT NO_LAPACK)
include("${CMAKE_SOURCE_DIR}/cmake/lapack.cmake")
if (NOT NO_LAPACKE)
include("${CMAKE_SOURCE_DIR}/cmake/lapacke.cmake")
endif ()
endif ()
#Only generate .def for dll on MSVC
if(MSVC)
set(OpenBLAS_DEF_FILE "${PROJECT_BINARY_DIR}/openblas.def")
endif()
# add objects to the openblas lib
add_library(${OpenBLAS_LIBNAME} SHARED ${LA_SOURCES} ${LAPACKE_SOURCES} ${TARGET_OBJS} ${OpenBLAS_DEF_FILE})
include("${CMAKE_SOURCE_DIR}/cmake/export.cmake")
if(NOT MSVC)
#only build shared library for MSVC
add_library(${OpenBLAS_LIBNAME}_static STATIC ${LA_SOURCES} ${LAPACKE_SOURCES} ${TARGET_OBJS})
set_target_properties(${OpenBLAS_LIBNAME}_static PROPERTIES OUTPUT_NAME ${OpenBLAS_LIBNAME})
set_target_properties(${OpenBLAS_LIBNAME}_static PROPERTIES CLEAN_DIRECT_OUTPUT 1)
if(SMP)
target_link_libraries(${OpenBLAS_LIBNAME} pthread)
target_link_libraries(${OpenBLAS_LIBNAME}_static pthread)
endif()
#build test and ctest
enable_testing()
add_subdirectory(test)
if(NOT NO_CBLAS)
add_subdirectory(ctest)
endif()
endif()
set_target_properties(${OpenBLAS_LIBNAME} PROPERTIES
VERSION ${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}
SOVERSION ${OpenBLAS_MAJOR_VERSION}
)
# TODO: Why is the config saved here? Is this necessary with CMake?
#Save the config files for installation
# @cp Makefile.conf Makefile.conf_last
# @cp config.h config_last.h
#ifdef QUAD_PRECISION
# @echo "#define QUAD_PRECISION">> config_last.h
#endif
#ifeq ($(EXPRECISION), 1)
# @echo "#define EXPRECISION">> config_last.h
#endif
###
#ifeq ($(DYNAMIC_ARCH), 1)
# @$(MAKE) -C kernel commonlibs || exit 1
# @for d in $(DYNAMIC_CORE) ; \
# do $(MAKE) GOTOBLAS_MAKEFILE= -C kernel TARGET_CORE=$$d kernel || exit 1 ;\
# done
# @echo DYNAMIC_ARCH=1 >> Makefile.conf_last
#endif
#ifdef USE_THREAD
# @echo USE_THREAD=$(USE_THREAD) >> Makefile.conf_last
#endif
# @touch lib.grd

View File

@ -1,4 +1,57 @@
OpenBLAS ChangeLog OpenBLAS ChangeLog
====================================================================
Version 0.2.15
27-Oct-2015
common:
* Support cmake on x86/x86-64. Natively compiling on MS Visual Studio.
(experimental. Thank Hank Anderson for the initial cmake porting work.)
On Linux and Mac OSX, OpenBLAS cmake supports assembly kernels.
e.g. cmake .
make
make test (Optional)
On Windows MS Visual Studio, OpenBLAS cmake only support C kernels.
(OpenBLAS uses AT&T style assembly, which is not supported by MSVC.)
e.g. cmake -G "Visual Studio 12 Win64" .
Open OpenBLAS.sln and build.
* Enable MAX_STACK_ALLOC flags by default.
Improve ger and gemv for small matrices.
* Improve gemv parallel with small m and large n case.
* Improve ?imatcopy when lda==ldb (#633. Thanks, Martin Koehler)
* Add vecLib benchmarks (#565. Thanks, Andreas Noack.)
* Fix LAPACK lantr for row major matrices (#634. Thanks, Dan Kortschak)
* Fix LAPACKE lansy (#640. Thanks, Dan Kortschak)
* Import bug fixes for LAPACKE s/dormlq, c/zunmlq
* Raise the signal when pthread_create fails (#668. Thanks, James K. Lowden)
* Remove g77 from compiler list.
* Enable AppVeyor Windows CI.
x86/x86-64:
* Support pure C generic kernels for x86/x86-64.
* Support Intel Boardwell and Skylake by Haswell kernels.
* Support AMD Excavator by Steamroller kernels.
* Optimize s/d/c/zdot for Intel SandyBridge and Haswell.
* Optimize s/d/c/zdot for AMD Piledriver and Steamroller.
* Optimize s/d/c/zapxy for Intel SandyBridge and Haswell.
* Optimize s/d/c/zapxy for AMD Piledriver and Steamroller.
* Optimize d/c/zscal for Intel Haswell, dscal for Intel SandyBridge.
* Optimize d/c/zscal for AMD Bulldozer, Piledriver and Steamroller.
* Optimize s/dger for Intel SandyBridge.
* Optimize s/dsymv for Intel SandyBridge.
* Optimize ssymv for Intel Haswell.
* Optimize dgemv for Intel Nehalem and Haswell.
* Optimize dtrmm for Intel Haswell.
ARM:
* Support Android NDK armeabi-v7a-hard ABI (-mfloat-abi=hard)
e.g. make HOSTCC=gcc CC=arm-linux-androideabi-gcc NO_LAPACK=1 TARGET=ARMV7
* Fix lock, rpcc bugs (#616, #617. Thanks, Grazvydas Ignotas)
POWER:
* Support ppc64le platform (ELF ABI v2. #612. Thanks, Matthew Brandyberry.)
* Support POWER7/8 by POWER6 kernels. (#612. Thanks, Fábio Perez.)
==================================================================== ====================================================================
Version 0.2.14 Version 0.2.14
24-Mar-2015 24-Mar-2015

View File

@ -3,7 +3,7 @@
# #
# This library's version # This library's version
VERSION = 0.2.14 VERSION = 0.2.15
# If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a # If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a
# and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library # and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library
@ -169,6 +169,9 @@ COMMON_PROF = -pg
# 64 bit integer interfaces in OpenBLAS. # 64 bit integer interfaces in OpenBLAS.
# For details, https://github.com/xianyi/OpenBLAS/pull/459 # For details, https://github.com/xianyi/OpenBLAS/pull/459
# #
# The same prefix and suffix are also added to the library name,
# i.e. you get lib$(SYMBOLPREFIX)openblas$(SYMBOLSUFFIX) rather than libopenblas
#
# SYMBOLPREFIX= # SYMBOLPREFIX=
# SYMBOLSUFFIX= # SYMBOLSUFFIX=

View File

@ -891,12 +891,6 @@ ifdef USE_SIMPLE_THREADED_LEVEL3
CCOMMON_OPT += -DUSE_SIMPLE_THREADED_LEVEL3 CCOMMON_OPT += -DUSE_SIMPLE_THREADED_LEVEL3
endif endif
ifndef LIBNAMESUFFIX
LIBPREFIX = libopenblas
else
LIBPREFIX = libopenblas_$(LIBNAMESUFFIX)
endif
ifndef SYMBOLPREFIX ifndef SYMBOLPREFIX
SYMBOLPREFIX = SYMBOLPREFIX =
endif endif
@ -905,6 +899,12 @@ ifndef SYMBOLSUFFIX
SYMBOLSUFFIX = SYMBOLSUFFIX =
endif endif
ifndef LIBNAMESUFFIX
LIBPREFIX = lib$(SYMBOLPREFIX)openblas$(SYMBOLSUFFIX)
else
LIBPREFIX = lib$(SYMBOLPREFIX)openblas$(SYMBOLSUFFIX)_$(LIBNAMESUFFIX)
endif
KERNELDIR = $(TOPDIR)/kernel/$(ARCH) KERNELDIR = $(TOPDIR)/kernel/$(ARCH)
include $(TOPDIR)/Makefile.$(ARCH) include $(TOPDIR)/Makefile.$(ARCH)

View File

@ -2,8 +2,9 @@
[![Join the chat at https://gitter.im/xianyi/OpenBLAS](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/xianyi/OpenBLAS?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) [![Join the chat at https://gitter.im/xianyi/OpenBLAS](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/xianyi/OpenBLAS?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
[![Build Status](https://travis-ci.org/xianyi/OpenBLAS.png?branch=develop)](https://travis-ci.org/xianyi/OpenBLAS) Travis CI: [![Build Status](https://travis-ci.org/xianyi/OpenBLAS.png?branch=develop)](https://travis-ci.org/xianyi/OpenBLAS)
AppVeyor: [![Build status](https://ci.appveyor.com/api/projects/status/09sohd35n8nkkx64/branch/develop?svg=true)](https://ci.appveyor.com/project/xianyi/openblas/branch/develop)
## Introduction ## Introduction
OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version. OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version.

42
appveyor.yml Normal file
View File

@ -0,0 +1,42 @@
version: 0.2.15.{build}
#environment:
platform:
- x64
configuration: Release
clone_folder: c:\projects\OpenBLAS
init:
- git config --global core.autocrlf input
build:
project: OpenBLAS.sln
clone_depth: 5
#branches to build
branches:
only:
- master
- develop
- cmake
skip_tags: true
matrix:
fast_finish: true
skip_commits:
# Add [av skip] to commit messages
message: /\[av skip\]/
before_build:
- echo Running cmake...
- cd c:\projects\OpenBLAS
- cmake -G "Visual Studio 12 Win64" .
test_script:
- echo Build OK!

View File

@ -30,7 +30,7 @@ if ($ARGV[0] =~ /(.*)(-[.\d]+)/) {
$cross_suffix = $1; $cross_suffix = $1;
} }
} else { } else {
if ($ARGV[0] =~ /(.*-)(.*)/) { if ($ARGV[0] =~ /([^\/]*-)([^\/]*$)/) {
$cross_suffix = $1; $cross_suffix = $1;
} }
} }

115
cmake/arch.cmake Normal file
View File

@ -0,0 +1,115 @@
##
## Author: Hank Anderson <hank@statease.com>
## Description: Ported from portion of OpenBLAS/Makefile.system
## Sets various variables based on architecture.
if (${ARCH} STREQUAL "x86" OR ${ARCH} STREQUAL "x86_64")
if (${ARCH} STREQUAL "x86")
if (NOT BINARY)
set(NO_BINARY_MODE 1)
endif ()
endif ()
if (NOT NO_EXPRECISION)
if (${F_COMPILER} MATCHES "GFORTRAN")
# N.B. I'm not sure if CMake differentiates between GCC and LSB -hpa
if (${CMAKE_C_COMPILER} STREQUAL "GNU" OR ${CMAKE_C_COMPILER} STREQUAL "LSB")
set(EXPRECISION 1)
set(CCOMMON_OPT "${CCOMMON_OPT} -DEXPRECISION -m128bit-long-double")
set(FCOMMON_OPT "${FCOMMON_OPT} -m128bit-long-double")
endif ()
if (${CMAKE_C_COMPILER} STREQUAL "Clang")
set(EXPRECISION 1)
set(CCOMMON_OPT "${CCOMMON_OPT} -DEXPRECISION")
set(FCOMMON_OPT "${FCOMMON_OPT} -m128bit-long-double")
endif ()
endif ()
endif ()
endif ()
if (${CMAKE_C_COMPILER} STREQUAL "Intel")
set(CCOMMON_OPT "${CCOMMON_OPT} -wd981")
endif ()
if (USE_OPENMP)
if (${CMAKE_C_COMPILER} STREQUAL "GNU" OR ${CMAKE_C_COMPILER} STREQUAL "LSB")
set(CCOMMON_OPT "${CCOMMON_OPT} -fopenmp")
endif ()
if (${CMAKE_C_COMPILER} STREQUAL "Clang")
message(WARNING "Clang doesn't support OpenMP yet.")
set(CCOMMON_OPT "${CCOMMON_OPT} -fopenmp")
endif ()
if (${CMAKE_C_COMPILER} STREQUAL "Intel")
set(CCOMMON_OPT "${CCOMMON_OPT} -openmp")
endif ()
if (${CMAKE_C_COMPILER} STREQUAL "PGI")
set(CCOMMON_OPT "${CCOMMON_OPT} -mp")
endif ()
if (${CMAKE_C_COMPILER} STREQUAL "OPEN64")
set(CCOMMON_OPT "${CCOMMON_OPT} -mp")
set(CEXTRALIB "${CEXTRALIB} -lstdc++")
endif ()
if (${CMAKE_C_COMPILER} STREQUAL "PATHSCALE")
set(CCOMMON_OPT "${CCOMMON_OPT} -mp")
endif ()
endif ()
if (DYNAMIC_ARCH)
if (${ARCH} STREQUAL "x86")
set(DYNAMIC_CORE "KATMAI COPPERMINE NORTHWOOD PRESCOTT BANIAS CORE2 PENRYN DUNNINGTON NEHALEM ATHLON OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO")
endif ()
if (${ARCH} STREQUAL "x86_64")
set(DYNAMIC_CORE "PRESCOTT CORE2 PENRYN DUNNINGTON NEHALEM OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO")
if (NOT NO_AVX)
set(DYNAMIC_CORE "${DYNAMIC_CORE} SANDYBRIDGE BULLDOZER PILEDRIVER STEAMROLLER")
endif ()
if (NOT NO_AVX2)
set(DYNAMIC_CORE "${DYNAMIC_CORE} HASWELL")
endif ()
endif ()
if (NOT DYNAMIC_CORE)
unset(DYNAMIC_ARCH)
endif ()
endif ()
if (${ARCH} STREQUAL "ia64")
set(NO_BINARY_MODE 1)
set(BINARY_DEFINED 1)
if (${F_COMPILER} MATCHES "GFORTRAN")
if (${CMAKE_C_COMPILER} STREQUAL "GNU")
# EXPRECISION = 1
# CCOMMON_OPT += -DEXPRECISION
endif ()
endif ()
endif ()
if (${ARCH} STREQUAL "mips64")
set(NO_BINARY_MODE 1)
endif ()
if (${ARCH} STREQUAL "alpha")
set(NO_BINARY_MODE 1)
set(BINARY_DEFINED 1)
endif ()
if (${ARCH} STREQUAL "arm")
set(NO_BINARY_MODE 1)
set(BINARY_DEFINED 1)
endif ()
if (${ARCH} STREQUAL "arm64")
set(NO_BINARY_MODE 1)
set(BINARY_DEFINED 1)
endif ()

89
cmake/c_check.cmake Normal file
View File

@ -0,0 +1,89 @@
##
## Author: Hank Anderson <hank@statease.com>
## Description: Ported from the OpenBLAS/c_check perl script.
## This is triggered by prebuild.cmake and runs before any of the code is built.
## Creates config.h and Makefile.conf.
# CMake vars set by this file:
# OSNAME (use CMAKE_SYSTEM_NAME)
# ARCH
# C_COMPILER (use CMAKE_C_COMPILER)
# BINARY32
# BINARY64
# FU
# CROSS_SUFFIX
# CROSS
# CEXTRALIB
# Defines set by this file:
# OS_
# ARCH_
# C_
# __32BIT__
# __64BIT__
# FUNDERSCORE
# PTHREAD_CREATE_FUNC
# N.B. c_check (and ctest.c) is not cross-platform, so instead try to use CMake variables.
set(FU "")
if(APPLE)
set(FU "_")
elseif(MSVC)
set(FU "_")
elseif(UNIX)
set(FU "")
endif()
# Convert CMake vars into the format that OpenBLAS expects
string(TOUPPER ${CMAKE_SYSTEM_NAME} HOST_OS)
if (${HOST_OS} STREQUAL "WINDOWS")
set(HOST_OS WINNT)
endif ()
# added by hpa - check size of void ptr to detect 64-bit compile
if (NOT DEFINED BINARY)
set(BINARY 32)
if (CMAKE_SIZEOF_VOID_P EQUAL 8)
set(BINARY 64)
endif ()
endif ()
if (BINARY EQUAL 64)
set(BINARY64 1)
else ()
set(BINARY32 1)
endif ()
# CMake docs define these:
# CMAKE_SYSTEM_PROCESSOR - The name of the CPU CMake is building for.
# CMAKE_HOST_SYSTEM_PROCESSOR - The name of the CPU CMake is running on.
#
# TODO: CMAKE_SYSTEM_PROCESSOR doesn't seem to be correct - instead get it from the compiler a la c_check
set(ARCH ${CMAKE_SYSTEM_PROCESSOR})
if (${ARCH} STREQUAL "AMD64")
set(ARCH "x86_64")
endif ()
# If you are using a 32-bit compiler on a 64-bit system CMAKE_SYSTEM_PROCESSOR will be wrong
if (${ARCH} STREQUAL "x86_64" AND BINARY EQUAL 32)
set(ARCH x86)
endif ()
if (${ARCH} STREQUAL "X86")
set(ARCH x86)
endif ()
set(COMPILER_ID ${CMAKE_CXX_COMPILER_ID})
if (${COMPILER_ID} STREQUAL "GNU")
set(COMPILER_ID "GCC")
endif ()
string(TOUPPER ${ARCH} UC_ARCH)
file(WRITE ${TARGET_CONF}
"#define OS_${HOST_OS}\t1\n"
"#define ARCH_${UC_ARCH}\t1\n"
"#define C_${COMPILER_ID}\t1\n"
"#define __${BINARY}BIT__\t1\n"
"#define FUNDERSCORE\t${FU}\n")

103
cmake/cc.cmake Normal file
View File

@ -0,0 +1,103 @@
##
## Author: Hank Anderson <hank@statease.com>
## Description: Ported from portion of OpenBLAS/Makefile.system
## Sets C related variables.
if (${CMAKE_C_COMPILER} STREQUAL "GNU" OR ${CMAKE_C_COMPILER} STREQUAL "LSB" OR ${CMAKE_C_COMPILER} STREQUAL "Clang")
set(CCOMMON_OPT "${CCOMMON_OPT} -Wall")
set(COMMON_PROF "${COMMON_PROF} -fno-inline")
set(NO_UNINITIALIZED_WARN "-Wno-uninitialized")
if (QUIET_MAKE)
set(CCOMMON_OPT "${CCOMMON_OPT} ${NO_UNINITIALIZED_WARN} -Wno-unused")
endif ()
if (NO_BINARY_MODE)
if (${ARCH} STREQUAL "mips64")
if (BINARY64)
set(CCOMMON_OPT "${CCOMMON_OPT} -mabi=64")
else ()
set(CCOMMON_OPT "${CCOMMON_OPT} -mabi=n32")
endif ()
set(BINARY_DEFINED 1)
endif ()
if (${CORE} STREQUAL "LOONGSON3A")
set(CCOMMON_OPT "${CCOMMON_OPT} -march=mips64")
set(FCOMMON_OPT "${FCOMMON_OPT} -march=mips64")
endif ()
if (${CORE} STREQUAL "LOONGSON3B")
set(CCOMMON_OPT "${CCOMMON_OPT} -march=mips64")
set(FCOMMON_OPT "${FCOMMON_OPT} -march=mips64")
endif ()
if (${OSNAME} STREQUAL "AIX")
set(BINARY_DEFINED 1)
endif ()
endif ()
if (NOT BINARY_DEFINED)
if (BINARY64)
set(CCOMMON_OPT "${CCOMMON_OPT} -m64")
else ()
set(CCOMMON_OPT "${CCOMMON_OPT} -m32")
endif ()
endif ()
endif ()
if (${CMAKE_C_COMPILER} STREQUAL "PGI")
if (BINARY64)
set(CCOMMON_OPT "${CCOMMON_OPT} -tp p7-64")
else ()
set(CCOMMON_OPT "${CCOMMON_OPT} -tp p7")
endif ()
endif ()
if (${CMAKE_C_COMPILER} STREQUAL "PATHSCALE")
if (BINARY64)
set(CCOMMON_OPT "${CCOMMON_OPT} -m64")
else ()
set(CCOMMON_OPT "${CCOMMON_OPT} -m32")
endif ()
endif ()
if (${CMAKE_C_COMPILER} STREQUAL "OPEN64")
if (${ARCH} STREQUAL "mips64")
if (NOT BINARY64)
set(CCOMMON_OPT "${CCOMMON_OPT} -n32")
else ()
set(CCOMMON_OPT "${CCOMMON_OPT} -n64")
endif ()
if (${CORE} STREQUAL "LOONGSON3A")
set(CCOMMON_OPT "${CCOMMON_OPT} -loongson3 -static")
endif ()
if (${CORE} STREQUAL "LOONGSON3B")
set(CCOMMON_OPT "${CCOMMON_OPT} -loongson3 -static")
endif ()
else ()
if (BINARY64)
set(CCOMMON_OPT "${CCOMMON_OPT} -m32")
else ()
set(CCOMMON_OPT "${CCOMMON_OPT} -m64")
endif ()
endif ()
endif ()
if (${CMAKE_C_COMPILER} STREQUAL "SUN")
set(CCOMMON_OPT "${CCOMMON_OPT} -w")
if (${ARCH} STREQUAL "x86")
set(CCOMMON_OPT "${CCOMMON_OPT} -m32")
else ()
set(FCOMMON_OPT "${FCOMMON_OPT} -m64")
endif ()
endif ()

60
cmake/export.cmake Normal file
View File

@ -0,0 +1,60 @@
#Only generate .def for dll on MSVC
if(MSVC)
set_source_files_properties(${OpenBLAS_DEF_FILE} PROPERTIES GENERATED 1)
if (NOT DEFINED ARCH)
set(ARCH_IN "x86_64")
else()
set(ARCH_IN ${ARCH})
endif()
if (${CORE} STREQUAL "generic")
set(ARCH_IN "GENERIC")
endif ()
if (NOT DEFINED EXPRECISION)
set(EXPRECISION_IN 0)
else()
set(EXPRECISION_IN ${EXPRECISION})
endif()
if (NOT DEFINED NO_CBLAS)
set(NO_CBLAS_IN 0)
else()
set(NO_CBLAS_IN ${NO_CBLAS})
endif()
if (NOT DEFINED NO_LAPACK)
set(NO_LAPACK_IN 0)
else()
set(NO_LAPACK_IN ${NO_LAPACK})
endif()
if (NOT DEFINED NO_LAPACKE)
set(NO_LAPACKE_IN 0)
else()
set(NO_LAPACKE_IN ${NO_LAPACKE})
endif()
if (NOT DEFINED NEED2UNDERSCORES)
set(NEED2UNDERSCORES_IN 0)
else()
set(NEED2UNDERSCORES_IN ${NEED2UNDERSCORES})
endif()
if (NOT DEFINED ONLY_CBLAS)
set(ONLY_CBLAS_IN 0)
else()
set(ONLY_CBLAS_IN ${ONLY_CBLAS})
endif()
add_custom_command(
TARGET ${OpenBLAS_LIBNAME} PRE_LINK
COMMAND perl
ARGS "${CMAKE_SOURCE_DIR}/exports/gensymbol" "win2k" "${ARCH_IN}" "dummy" "${EXPRECISION_IN}" "${NO_CBLAS_IN}" "${NO_LAPACK_IN}" "${NO_LAPACKE_IN}" "${NEED2UNDERSCORES_IN}" "${ONLY_CBLAS_IN}" "${SYMBOLPREFIX}" "${SYMBOLSUFFIX}" > "${PROJECT_BINARY_DIR}/openblas.def"
COMMENT "Create openblas.def file"
VERBATIM)
endif()

66
cmake/f_check.cmake Normal file
View File

@ -0,0 +1,66 @@
##
## Author: Hank Anderson <hank@statease.com>
## Copyright: (c) Stat-Ease, Inc.
## Created: 12/29/14
## Last Modified: 12/29/14
## Description: Ported from the OpenBLAS/f_check perl script.
## This is triggered by prebuild.cmake and runs before any of the code is built.
## Appends Fortran information to config.h and Makefile.conf.
# CMake vars set by this file:
# F_COMPILER
# FC
# BU
# NOFORTRAN
# NEED2UNDERSCORES
# FEXTRALIB
# Defines set by this file:
# BUNDERSCORE
# NEEDBUNDERSCORE
# NEED2UNDERSCORES
if (MSVC)
# had to do this for MSVC, else CMake automatically assumes I have ifort... -hpa
include(CMakeForceCompiler)
CMAKE_FORCE_Fortran_COMPILER(gfortran GNU)
endif ()
if (NOT NO_LAPACK)
enable_language(Fortran)
else()
include(CMakeForceCompiler)
CMAKE_FORCE_Fortran_COMPILER(gfortran GNU)
endif()
if (NOT ONLY_CBLAS)
# N.B. f_check is not cross-platform, so instead try to use CMake variables
# run f_check (appends to TARGET files)
# message(STATUS "Running f_check...")
# execute_process(COMMAND perl f_check ${TARGET_MAKE} ${TARGET_CONF} ${CMAKE_Fortran_COMPILER}
# WORKING_DIRECTORY ${PROJECT_SOURCE_DIR})
# TODO: detect whether underscore needed, set #defines and BU appropriately - use try_compile
# TODO: set FEXTRALIB flags a la f_check?
set(BU "_")
file(APPEND ${TARGET_CONF}
"#define BUNDERSCORE _\n"
"#define NEEDBUNDERSCORE 1\n"
"#define NEED2UNDERSCORES 0\n")
else ()
#When we only build CBLAS, we set NOFORTRAN=2
set(NOFORTRAN 2)
set(NO_FBLAS 1)
#set(F_COMPILER GFORTRAN) # CMake handles the fortran compiler
set(BU "_")
file(APPEND ${TARGET_CONF}
"#define BUNDERSCORE _\n"
"#define NEEDBUNDERSCORE 1\n")
endif()
get_filename_component(F_COMPILER ${CMAKE_Fortran_COMPILER} NAME_WE)
string(TOUPPER ${F_COMPILER} F_COMPILER)

200
cmake/fc.cmake Normal file
View File

@ -0,0 +1,200 @@
##
## Author: Hank Anderson <hank@statease.com>
## Description: Ported from portion of OpenBLAS/Makefile.system
## Sets Fortran related variables.
if (${F_COMPILER} STREQUAL "G77")
set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_G77")
set(FCOMMON_OPT "${FCOMMON_OPT} -Wall")
if (NOT NO_BINARY_MODE)
if (BINARY64)
set(FCOMMON_OPT "${FCOMMON_OPT} -m64")
else ()
set(FCOMMON_OPT "${FCOMMON_OPT} -m32")
endif ()
endif ()
endif ()
if (${F_COMPILER} STREQUAL "G95")
set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_G95")
set(FCOMMON_OPT "${FCOMMON_OPT} -Wall")
if (NOT NO_BINARY_MODE)
if (BINARY64)
set(FCOMMON_OPT "${FCOMMON_OPT} -m64")
else ()
set(FCOMMON_OPT "${FCOMMON_OPT} -m32")
endif ()
endif ()
endif ()
if (${F_COMPILER} STREQUAL "GFORTRAN")
set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_GFORT")
set(FCOMMON_OPT "${FCOMMON_OPT} -Wall")
#Don't include -lgfortran, when NO_LAPACK=1 or lsbcc
if (NOT NO_LAPACK)
set(EXTRALIB "{EXTRALIB} -lgfortran")
endif ()
if (NO_BINARY_MODE)
if (${ARCH} STREQUAL "mips64")
if (BINARY64)
set(FCOMMON_OPT "${FCOMMON_OPT} -mabi=64")
else ()
set(FCOMMON_OPT "${FCOMMON_OPT} -mabi=n32")
endif ()
endif ()
else ()
if (BINARY64)
set(FCOMMON_OPT "${FCOMMON_OPT} -m64")
if (INTERFACE64)
set(FCOMMON_OPT "${FCOMMON_OPT} -fdefault-integer-8")
endif ()
else ()
set(FCOMMON_OPT "${FCOMMON_OPT} -m32")
endif ()
endif ()
if (USE_OPENMP)
set(FCOMMON_OPT "${FCOMMON_OPT} -fopenmp")
endif ()
endif ()
if (${F_COMPILER} STREQUAL "INTEL")
set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_INTEL")
if (INTERFACE64)
set(FCOMMON_OPT "${FCOMMON_OPT} -i8")
endif ()
if (USE_OPENMP)
set(FCOMMON_OPT "${FCOMMON_OPT} -openmp")
endif ()
endif ()
if (${F_COMPILER} STREQUAL "FUJITSU")
set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_FUJITSU")
if (USE_OPENMP)
set(FCOMMON_OPT "${FCOMMON_OPT} -openmp")
endif ()
endif ()
if (${F_COMPILER} STREQUAL "IBM")
set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_IBM")
# FCOMMON_OPT += -qarch=440
if (BINARY64)
set(FCOMMON_OPT "${FCOMMON_OPT} -q64")
if (INTERFACE64)
set(FCOMMON_OPT "${FCOMMON_OPT} -qintsize=8")
endif ()
else ()
set(FCOMMON_OPT "${FCOMMON_OPT} -q32")
endif ()
if (USE_OPENMP)
set(FCOMMON_OPT "${FCOMMON_OPT} -openmp")
endif ()
endif ()
if (${F_COMPILER} STREQUAL "PGI")
set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_PGI")
set(COMMON_PROF "${COMMON_PROF} -DPGICOMPILER")
if (BINARY64)
if (INTERFACE64)
set(FCOMMON_OPT "${FCOMMON_OPT} -i8")
endif ()
set(FCOMMON_OPT "${FCOMMON_OPT} -tp p7-64")
else ()
set(FCOMMON_OPT "${FCOMMON_OPT} -tp p7")
endif ()
if (USE_OPENMP)
set(FCOMMON_OPT "${FCOMMON_OPT} -mp")
endif ()
endif ()
if (${F_COMPILER} STREQUAL "PATHSCALE")
set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_PATHSCALE")
if (BINARY64)
if (INTERFACE64)
set(FCOMMON_OPT "${FCOMMON_OPT} -i8")
endif ()
endif ()
if (NOT ${ARCH} STREQUAL "mips64")
if (NOT BINARY64)
set(FCOMMON_OPT "${FCOMMON_OPT} -m32")
else ()
set(FCOMMON_OPT "${FCOMMON_OPT} -m64")
endif ()
else ()
if (BINARY64)
set(FCOMMON_OPT "${FCOMMON_OPT} -mabi=64")
else ()
set(FCOMMON_OPT "${FCOMMON_OPT} -mabi=n32")
endif ()
endif ()
if (USE_OPENMP)
set(FCOMMON_OPT "${FCOMMON_OPT} -mp")
endif ()
endif ()
if (${F_COMPILER} STREQUAL "OPEN64")
set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_OPEN64")
if (BINARY64)
if (INTERFACE64)
set(FCOMMON_OPT "${FCOMMON_OPT} -i8")
endif ()
endif ()
if (${ARCH} STREQUAL "mips64")
if (NOT BINARY64)
set(FCOMMON_OPT "${FCOMMON_OPT} -n32")
else ()
set(FCOMMON_OPT "${FCOMMON_OPT} -n64")
endif ()
if (${CORE} STREQUAL "LOONGSON3A")
set(FCOMMON_OPT "${FCOMMON_OPT} -loongson3 -static")
endif ()
if (${CORE} STREQUAL "LOONGSON3B")
set(FCOMMON_OPT "${FCOMMON_OPT} -loongson3 -static")
endif ()
else ()
if (NOT BINARY64)
set(FCOMMON_OPT "${FCOMMON_OPT} -m32")
else ()
set(FCOMMON_OPT "${FCOMMON_OPT} -m64")
endif ()
endif ()
if (USE_OPENMP)
set(FEXTRALIB "${FEXTRALIB} -lstdc++")
set(FCOMMON_OPT "${FCOMMON_OPT} -mp")
endif ()
endif ()
if (${F_COMPILER} STREQUAL "SUN")
set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_SUN")
if (${ARCH} STREQUAL "x86")
set(FCOMMON_OPT "${FCOMMON_OPT} -m32")
else ()
set(FCOMMON_OPT "${FCOMMON_OPT} -m64")
endif ()
if (USE_OPENMP)
set(FCOMMON_OPT "${FCOMMON_OPT} -xopenmp=parallel")
endif ()
endif ()
if (${F_COMPILER} STREQUAL "COMPAQ")
set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_COMPAQ")
if (USE_OPENMP)
set(FCOMMON_OPT "${FCOMMON_OPT} -openmp")
endif ()
endif ()
# from the root Makefile - this is for lapack-netlib to compile the correct secnd file.
if (${F_COMPILER} STREQUAL "GFORTRAN")
set(TIMER "INT_ETIME")
else ()
set(TIMER "NONE")
endif ()

165
cmake/kernel.cmake Normal file
View File

@ -0,0 +1,165 @@
# helper functions for the kernel CMakeLists.txt
# Set the default filenames for L1 objects. Most of these will be overriden by the appropriate KERNEL file.
macro(SetDefaultL1)
set(SAMAXKERNEL amax.S)
set(DAMAXKERNEL amax.S)
set(QAMAXKERNEL amax.S)
set(CAMAXKERNEL zamax.S)
set(ZAMAXKERNEL zamax.S)
set(XAMAXKERNEL zamax.S)
set(SAMINKERNEL amin.S)
set(DAMINKERNEL amin.S)
set(QAMINKERNEL amin.S)
set(CAMINKERNEL zamin.S)
set(ZAMINKERNEL zamin.S)
set(XAMINKERNEL zamin.S)
set(SMAXKERNEL max.S)
set(DMAXKERNEL max.S)
set(QMAXKERNEL max.S)
set(SMINKERNEL min.S)
set(DMINKERNEL min.S)
set(QMINKERNEL min.S)
set(ISAMAXKERNEL iamax.S)
set(IDAMAXKERNEL iamax.S)
set(IQAMAXKERNEL iamax.S)
set(ICAMAXKERNEL izamax.S)
set(IZAMAXKERNEL izamax.S)
set(IXAMAXKERNEL izamax.S)
set(ISAMINKERNEL iamin.S)
set(IDAMINKERNEL iamin.S)
set(IQAMINKERNEL iamin.S)
set(ICAMINKERNEL izamin.S)
set(IZAMINKERNEL izamin.S)
set(IXAMINKERNEL izamin.S)
set(ISMAXKERNEL iamax.S)
set(IDMAXKERNEL iamax.S)
set(IQMAXKERNEL iamax.S)
set(ISMINKERNEL iamin.S)
set(IDMINKERNEL iamin.S)
set(IQMINKERNEL iamin.S)
set(SASUMKERNEL asum.S)
set(DASUMKERNEL asum.S)
set(CASUMKERNEL zasum.S)
set(ZASUMKERNEL zasum.S)
set(QASUMKERNEL asum.S)
set(XASUMKERNEL zasum.S)
set(SAXPYKERNEL axpy.S)
set(DAXPYKERNEL axpy.S)
set(CAXPYKERNEL zaxpy.S)
set(ZAXPYKERNEL zaxpy.S)
set(QAXPYKERNEL axpy.S)
set(XAXPYKERNEL zaxpy.S)
set(SCOPYKERNEL copy.S)
set(DCOPYKERNEL copy.S)
set(CCOPYKERNEL zcopy.S)
set(ZCOPYKERNEL zcopy.S)
set(QCOPYKERNEL copy.S)
set(XCOPYKERNEL zcopy.S)
set(SDOTKERNEL dot.S)
set(DDOTKERNEL dot.S)
set(CDOTKERNEL zdot.S)
set(ZDOTKERNEL zdot.S)
set(QDOTKERNEL dot.S)
set(XDOTKERNEL zdot.S)
set(SNRM2KERNEL nrm2.S)
set(DNRM2KERNEL nrm2.S)
set(QNRM2KERNEL nrm2.S)
set(CNRM2KERNEL znrm2.S)
set(ZNRM2KERNEL znrm2.S)
set(XNRM2KERNEL znrm2.S)
set(SROTKERNEL rot.S)
set(DROTKERNEL rot.S)
set(QROTKERNEL rot.S)
set(CROTKERNEL zrot.S)
set(ZROTKERNEL zrot.S)
set(XROTKERNEL zrot.S)
set(SSCALKERNEL scal.S)
set(DSCALKERNEL scal.S)
set(CSCALKERNEL zscal.S)
set(ZSCALKERNEL zscal.S)
set(QSCALKERNEL scal.S)
set(XSCALKERNEL zscal.S)
set(SSWAPKERNEL swap.S)
set(DSWAPKERNEL swap.S)
set(CSWAPKERNEL zswap.S)
set(ZSWAPKERNEL zswap.S)
set(QSWAPKERNEL swap.S)
set(XSWAPKERNEL zswap.S)
set(SGEMVNKERNEL gemv_n.S)
set(SGEMVTKERNEL gemv_t.S)
set(DGEMVNKERNEL gemv_n.S)
set(DGEMVTKERNEL gemv_t.S)
set(CGEMVNKERNEL zgemv_n.S)
set(CGEMVTKERNEL zgemv_t.S)
set(ZGEMVNKERNEL zgemv_n.S)
set(ZGEMVTKERNEL zgemv_t.S)
set(QGEMVNKERNEL gemv_n.S)
set(QGEMVTKERNEL gemv_t.S)
set(XGEMVNKERNEL zgemv_n.S)
set(XGEMVTKERNEL zgemv_t.S)
set(SCABS_KERNEL ../generic/cabs.c)
set(DCABS_KERNEL ../generic/cabs.c)
set(QCABS_KERNEL ../generic/cabs.c)
set(LSAME_KERNEL ../generic/lsame.c)
set(SAXPBYKERNEL ../arm/axpby.c)
set(DAXPBYKERNEL ../arm/axpby.c)
set(CAXPBYKERNEL ../arm/zaxpby.c)
set(ZAXPBYKERNEL ../arm/zaxpby.c)
endmacro ()
macro(SetDefaultL2)
set(SGEMVNKERNEL gemv_n.S)
set(SGEMVTKERNEL gemv_t.S)
set(DGEMVNKERNEL gemv_n.S)
set(DGEMVTKERNEL gemv_t.S)
set(CGEMVNKERNEL zgemv_n.S)
set(CGEMVTKERNEL zgemv_t.S)
set(ZGEMVNKERNEL zgemv_n.S)
set(ZGEMVTKERNEL zgemv_t.S)
set(QGEMVNKERNEL gemv_n.S)
set(QGEMVTKERNEL gemv_t.S)
set(XGEMVNKERNEL zgemv_n.S)
set(XGEMVTKERNEL zgemv_t.S)
set(SGERKERNEL ../generic/ger.c)
set(DGERKERNEL ../generic/ger.c)
set(QGERKERNEL ../generic/ger.c)
set(CGERUKERNEL ../generic/zger.c)
set(CGERCKERNEL ../generic/zger.c)
set(ZGERUKERNEL ../generic/zger.c)
set(ZGERCKERNEL ../generic/zger.c)
set(XGERUKERNEL ../generic/zger.c)
set(XGERCKERNEL ../generic/zger.c)
set(SSYMV_U_KERNEL ../generic/symv_k.c)
set(SSYMV_L_KERNEL ../generic/symv_k.c)
set(DSYMV_U_KERNEL ../generic/symv_k.c)
set(DSYMV_L_KERNEL ../generic/symv_k.c)
set(QSYMV_U_KERNEL ../generic/symv_k.c)
set(QSYMV_L_KERNEL ../generic/symv_k.c)
set(CSYMV_U_KERNEL ../generic/zsymv_k.c)
set(CSYMV_L_KERNEL ../generic/zsymv_k.c)
set(ZSYMV_U_KERNEL ../generic/zsymv_k.c)
set(ZSYMV_L_KERNEL ../generic/zsymv_k.c)
set(XSYMV_U_KERNEL ../generic/zsymv_k.c)
set(XSYMV_L_KERNEL ../generic/zsymv_k.c)
set(CHEMV_U_KERNEL ../generic/zhemv_k.c)
set(CHEMV_L_KERNEL ../generic/zhemv_k.c)
set(CHEMV_V_KERNEL ../generic/zhemv_k.c)
set(CHEMV_M_KERNEL ../generic/zhemv_k.c)
set(ZHEMV_U_KERNEL ../generic/zhemv_k.c)
set(ZHEMV_L_KERNEL ../generic/zhemv_k.c)
set(ZHEMV_V_KERNEL ../generic/zhemv_k.c)
set(ZHEMV_M_KERNEL ../generic/zhemv_k.c)
set(XHEMV_U_KERNEL ../generic/zhemv_k.c)
set(XHEMV_L_KERNEL ../generic/zhemv_k.c)
set(XHEMV_V_KERNEL ../generic/zhemv_k.c)
set(XHEMV_M_KERNEL ../generic/zhemv_k.c)
endmacro ()
macro(SetDefaultL3)
set(SGEADD_KERNEL ../generic/geadd.c)
set(DGEADD_KERNEL ../generic/geadd.c)
set(CGEADD_KERNEL ../generic/zgeadd.c)
set(ZGEADD_KERNEL ../generic/zgeadd.c)
endmacro ()

347
cmake/lapack.cmake Normal file
View File

@ -0,0 +1,347 @@
# Sources for compiling lapack-netlib. Can't use CMakeLists.txt because lapack-netlib already has its own cmake files.
set(ALLAUX
ilaenv.f ieeeck.f lsamen.f xerbla_array.f iparmq.f
ilaprec.f ilatrans.f ilauplo.f iladiag.f chla_transtype.f
../INSTALL/ilaver.f ../INSTALL/slamch.f
)
set(SCLAUX
sbdsdc.f
sbdsqr.f sdisna.f slabad.f slacpy.f sladiv.f slae2.f slaebz.f
slaed0.f slaed1.f slaed2.f slaed3.f slaed4.f slaed5.f slaed6.f
slaed7.f slaed8.f slaed9.f slaeda.f slaev2.f slagtf.f
slagts.f slamrg.f slanst.f
slapy2.f slapy3.f slarnv.f
slarra.f slarrb.f slarrc.f slarrd.f slarre.f slarrf.f slarrj.f
slarrk.f slarrr.f slaneg.f
slartg.f slaruv.f slas2.f slascl.f
slasd0.f slasd1.f slasd2.f slasd3.f slasd4.f slasd5.f slasd6.f
slasd7.f slasd8.f slasda.f slasdq.f slasdt.f
slaset.f slasq1.f slasq2.f slasq3.f slasq4.f slasq5.f slasq6.f
slasr.f slasrt.f slassq.f slasv2.f spttrf.f sstebz.f sstedc.f
ssteqr.f ssterf.f slaisnan.f sisnan.f
slartgp.f slartgs.f
../INSTALL/second_${TIMER}.f
)
set(DZLAUX
dbdsdc.f
dbdsqr.f ddisna.f dlabad.f dlacpy.f dladiv.f dlae2.f dlaebz.f
dlaed0.f dlaed1.f dlaed2.f dlaed3.f dlaed4.f dlaed5.f dlaed6.f
dlaed7.f dlaed8.f dlaed9.f dlaeda.f dlaev2.f dlagtf.f
dlagts.f dlamrg.f dlanst.f
dlapy2.f dlapy3.f dlarnv.f
dlarra.f dlarrb.f dlarrc.f dlarrd.f dlarre.f dlarrf.f dlarrj.f
dlarrk.f dlarrr.f dlaneg.f
dlartg.f dlaruv.f dlas2.f dlascl.f
dlasd0.f dlasd1.f dlasd2.f dlasd3.f dlasd4.f dlasd5.f dlasd6.f
dlasd7.f dlasd8.f dlasda.f dlasdq.f dlasdt.f
dlaset.f dlasq1.f dlasq2.f dlasq3.f dlasq4.f dlasq5.f dlasq6.f
dlasr.f dlasrt.f dlassq.f dlasv2.f dpttrf.f dstebz.f dstedc.f
dsteqr.f dsterf.f dlaisnan.f disnan.f
dlartgp.f dlartgs.f
../INSTALL/dlamch.f ../INSTALL/dsecnd_${TIMER}.f
)
set(SLASRC
sgbbrd.f sgbcon.f sgbequ.f sgbrfs.f sgbsv.f
sgbsvx.f sgbtf2.f sgbtrf.f sgbtrs.f sgebak.f sgebal.f sgebd2.f
sgebrd.f sgecon.f sgeequ.f sgees.f sgeesx.f sgeev.f sgeevx.f
sgegs.f sgegv.f sgehd2.f sgehrd.f sgelq2.f sgelqf.f
sgels.f sgelsd.f sgelss.f sgelsx.f sgelsy.f sgeql2.f sgeqlf.f
sgeqp3.f sgeqpf.f sgeqr2.f sgeqr2p.f sgeqrf.f sgeqrfp.f sgerfs.f
sgerq2.f sgerqf.f sgesc2.f sgesdd.f sgesvd.f sgesvx.f
sgetc2.f sgetri.f
sggbak.f sggbal.f sgges.f sggesx.f sggev.f sggevx.f
sggglm.f sgghrd.f sgglse.f sggqrf.f
sggrqf.f sggsvd.f sggsvp.f sgtcon.f sgtrfs.f sgtsv.f
sgtsvx.f sgttrf.f sgttrs.f sgtts2.f shgeqz.f
shsein.f shseqr.f slabrd.f slacon.f slacn2.f
slaein.f slaexc.f slag2.f slags2.f slagtm.f slagv2.f slahqr.f
slahrd.f slahr2.f slaic1.f slaln2.f slals0.f slalsa.f slalsd.f
slangb.f slange.f slangt.f slanhs.f slansb.f slansp.f
slansy.f slantb.f slantp.f slantr.f slanv2.f
slapll.f slapmt.f
slaqgb.f slaqge.f slaqp2.f slaqps.f slaqsb.f slaqsp.f slaqsy.f
slaqr0.f slaqr1.f slaqr2.f slaqr3.f slaqr4.f slaqr5.f
slaqtr.f slar1v.f slar2v.f ilaslr.f ilaslc.f
slarf.f slarfb.f slarfg.f slarfgp.f slarft.f slarfx.f slargv.f
slarrv.f slartv.f
slarz.f slarzb.f slarzt.f slasy2.f slasyf.f slasyf_rook.f
slatbs.f slatdf.f slatps.f slatrd.f slatrs.f slatrz.f slatzm.f
sopgtr.f sopmtr.f sorg2l.f sorg2r.f
sorgbr.f sorghr.f sorgl2.f sorglq.f sorgql.f sorgqr.f sorgr2.f
sorgrq.f sorgtr.f sorm2l.f sorm2r.f
sormbr.f sormhr.f sorml2.f sormlq.f sormql.f sormqr.f sormr2.f
sormr3.f sormrq.f sormrz.f sormtr.f spbcon.f spbequ.f spbrfs.f
spbstf.f spbsv.f spbsvx.f
spbtf2.f spbtrf.f spbtrs.f spocon.f spoequ.f sporfs.f sposv.f
sposvx.f spstrf.f spstf2.f
sppcon.f sppequ.f
spprfs.f sppsv.f sppsvx.f spptrf.f spptri.f spptrs.f sptcon.f
spteqr.f sptrfs.f sptsv.f sptsvx.f spttrs.f sptts2.f srscl.f
ssbev.f ssbevd.f ssbevx.f ssbgst.f ssbgv.f ssbgvd.f ssbgvx.f
ssbtrd.f sspcon.f sspev.f sspevd.f sspevx.f sspgst.f
sspgv.f sspgvd.f sspgvx.f ssprfs.f sspsv.f sspsvx.f ssptrd.f
ssptrf.f ssptri.f ssptrs.f sstegr.f sstein.f sstev.f sstevd.f sstevr.f
sstevx.f
ssycon.f ssyev.f ssyevd.f ssyevr.f ssyevx.f ssygs2.f
ssygst.f ssygv.f ssygvd.f ssygvx.f ssyrfs.f ssysv.f ssysvx.f
ssytd2.f ssytf2.f ssytrd.f ssytrf.f ssytri.f ssytri2.f ssytri2x.f
ssyswapr.f ssytrs.f ssytrs2.f ssyconv.f
ssytf2_rook.f ssytrf_rook.f ssytrs_rook.f
ssytri_rook.f ssycon_rook.f ssysv_rook.f
stbcon.f
stbrfs.f stbtrs.f stgevc.f stgex2.f stgexc.f stgsen.f
stgsja.f stgsna.f stgsy2.f stgsyl.f stpcon.f stprfs.f stptri.f
stptrs.f
strcon.f strevc.f strexc.f strrfs.f strsen.f strsna.f strsyl.f
strtrs.f stzrqf.f stzrzf.f sstemr.f
slansf.f spftrf.f spftri.f spftrs.f ssfrk.f stfsm.f stftri.f stfttp.f
stfttr.f stpttf.f stpttr.f strttf.f strttp.f
sgejsv.f sgesvj.f sgsvj0.f sgsvj1.f
sgeequb.f ssyequb.f spoequb.f sgbequb.f
sbbcsd.f slapmr.f sorbdb.f sorbdb1.f sorbdb2.f sorbdb3.f sorbdb4.f
sorbdb5.f sorbdb6.f sorcsd.f sorcsd2by1.f
sgeqrt.f sgeqrt2.f sgeqrt3.f sgemqrt.f
stpqrt.f stpqrt2.f stpmqrt.f stprfb.f spotri.f
)
set(DSLASRC spotrs.f)
set(CLASRC
cbdsqr.f cgbbrd.f cgbcon.f cgbequ.f cgbrfs.f cgbsv.f cgbsvx.f
cgbtf2.f cgbtrf.f cgbtrs.f cgebak.f cgebal.f cgebd2.f cgebrd.f
cgecon.f cgeequ.f cgees.f cgeesx.f cgeev.f cgeevx.f
cgegs.f cgegv.f cgehd2.f cgehrd.f cgelq2.f cgelqf.f
cgels.f cgelsd.f cgelss.f cgelsx.f cgelsy.f cgeql2.f cgeqlf.f cgeqp3.f
cgeqpf.f cgeqr2.f cgeqr2p.f cgeqrf.f cgeqrfp.f cgerfs.f
cgerq2.f cgerqf.f cgesc2.f cgesdd.f cgesvd.f
cgesvx.f cgetc2.f cgetri.f
cggbak.f cggbal.f cgges.f cggesx.f cggev.f cggevx.f cggglm.f
cgghrd.f cgglse.f cggqrf.f cggrqf.f
cggsvd.f cggsvp.f
cgtcon.f cgtrfs.f cgtsv.f cgtsvx.f cgttrf.f cgttrs.f cgtts2.f chbev.f
chbevd.f chbevx.f chbgst.f chbgv.f chbgvd.f chbgvx.f chbtrd.f
checon.f cheev.f cheevd.f cheevr.f cheevx.f chegs2.f chegst.f
chegv.f chegvd.f chegvx.f cherfs.f chesv.f chesvx.f chetd2.f
chetf2.f chetrd.f
chetrf.f chetri.f chetri2.f chetri2x.f cheswapr.f
chetrs.f chetrs2.f
chetf2_rook.f chetrf_rook.f chetri_rook.f chetrs_rook.f checon_rook.f chesv_rook.f
chgeqz.f chpcon.f chpev.f chpevd.f
chpevx.f chpgst.f chpgv.f chpgvd.f chpgvx.f chprfs.f chpsv.f
chpsvx.f
chptrd.f chptrf.f chptri.f chptrs.f chsein.f chseqr.f clabrd.f
clacgv.f clacon.f clacn2.f clacp2.f clacpy.f clacrm.f clacrt.f cladiv.f
claed0.f claed7.f claed8.f
claein.f claesy.f claev2.f clags2.f clagtm.f
clahef.f clahef_rook.f clahqr.f
clahrd.f clahr2.f claic1.f clals0.f clalsa.f clalsd.f clangb.f clange.f clangt.f
clanhb.f clanhe.f
clanhp.f clanhs.f clanht.f clansb.f clansp.f clansy.f clantb.f
clantp.f clantr.f clapll.f clapmt.f clarcm.f claqgb.f claqge.f
claqhb.f claqhe.f claqhp.f claqp2.f claqps.f claqsb.f
claqr0.f claqr1.f claqr2.f claqr3.f claqr4.f claqr5.f
claqsp.f claqsy.f clar1v.f clar2v.f ilaclr.f ilaclc.f
clarf.f clarfb.f clarfg.f clarft.f clarfgp.f
clarfx.f clargv.f clarnv.f clarrv.f clartg.f clartv.f
clarz.f clarzb.f clarzt.f clascl.f claset.f clasr.f classq.f
clasyf.f clasyf_rook.f clatbs.f clatdf.f clatps.f clatrd.f clatrs.f clatrz.f
clatzm.f cpbcon.f cpbequ.f cpbrfs.f cpbstf.f cpbsv.f
cpbsvx.f cpbtf2.f cpbtrf.f cpbtrs.f cpocon.f cpoequ.f cporfs.f
cposv.f cposvx.f cpstrf.f cpstf2.f
cppcon.f cppequ.f cpprfs.f cppsv.f cppsvx.f cpptrf.f cpptri.f cpptrs.f
cptcon.f cpteqr.f cptrfs.f cptsv.f cptsvx.f cpttrf.f cpttrs.f cptts2.f
crot.f cspcon.f csprfs.f cspsv.f
cspsvx.f csptrf.f csptri.f csptrs.f csrscl.f cstedc.f
cstegr.f cstein.f csteqr.f
csycon.f
csyrfs.f csysv.f csysvx.f csytf2.f csytrf.f csytri.f csytri2.f csytri2x.f
csyswapr.f csytrs.f csytrs2.f csyconv.f
csytf2_rook.f csytrf_rook.f csytrs_rook.f
csytri_rook.f csycon_rook.f csysv_rook.f
ctbcon.f ctbrfs.f ctbtrs.f ctgevc.f ctgex2.f
ctgexc.f ctgsen.f ctgsja.f ctgsna.f ctgsy2.f ctgsyl.f ctpcon.f
ctprfs.f ctptri.f
ctptrs.f ctrcon.f ctrevc.f ctrexc.f ctrrfs.f ctrsen.f ctrsna.f
ctrsyl.f ctrtrs.f ctzrqf.f ctzrzf.f cung2l.f cung2r.f
cungbr.f cunghr.f cungl2.f cunglq.f cungql.f cungqr.f cungr2.f
cungrq.f cungtr.f cunm2l.f cunm2r.f cunmbr.f cunmhr.f cunml2.f
cunmlq.f cunmql.f cunmqr.f cunmr2.f cunmr3.f cunmrq.f cunmrz.f
cunmtr.f cupgtr.f cupmtr.f icmax1.f scsum1.f cstemr.f
chfrk.f ctfttp.f clanhf.f cpftrf.f cpftri.f cpftrs.f ctfsm.f ctftri.f
ctfttr.f ctpttf.f ctpttr.f ctrttf.f ctrttp.f
cgeequb.f cgbequb.f csyequb.f cpoequb.f cheequb.f
cbbcsd.f clapmr.f cunbdb.f cunbdb1.f cunbdb2.f cunbdb3.f cunbdb4.f
cunbdb5.f cunbdb6.f cuncsd.f cuncsd2by1.f
cgeqrt.f cgeqrt2.f cgeqrt3.f cgemqrt.f
ctpqrt.f ctpqrt2.f ctpmqrt.f ctprfb.f cpotri.f
)
set(ZCLASRC cpotrs.f)
set(DLASRC
dgbbrd.f dgbcon.f dgbequ.f dgbrfs.f dgbsv.f
dgbsvx.f dgbtf2.f dgbtrf.f dgbtrs.f dgebak.f dgebal.f dgebd2.f
dgebrd.f dgecon.f dgeequ.f dgees.f dgeesx.f dgeev.f dgeevx.f
dgegs.f dgegv.f dgehd2.f dgehrd.f dgelq2.f dgelqf.f
dgels.f dgelsd.f dgelss.f dgelsx.f dgelsy.f dgeql2.f dgeqlf.f
dgeqp3.f dgeqpf.f dgeqr2.f dgeqr2p.f dgeqrf.f dgeqrfp.f dgerfs.f
dgerq2.f dgerqf.f dgesc2.f dgesdd.f dgesvd.f dgesvx.f
dgetc2.f dgetri.f
dggbak.f dggbal.f dgges.f dggesx.f dggev.f dggevx.f
dggglm.f dgghrd.f dgglse.f dggqrf.f
dggrqf.f dggsvd.f dggsvp.f dgtcon.f dgtrfs.f dgtsv.f
dgtsvx.f dgttrf.f dgttrs.f dgtts2.f dhgeqz.f
dhsein.f dhseqr.f dlabrd.f dlacon.f dlacn2.f
dlaein.f dlaexc.f dlag2.f dlags2.f dlagtm.f dlagv2.f dlahqr.f
dlahrd.f dlahr2.f dlaic1.f dlaln2.f dlals0.f dlalsa.f dlalsd.f
dlangb.f dlange.f dlangt.f dlanhs.f dlansb.f dlansp.f
dlansy.f dlantb.f dlantp.f dlantr.f dlanv2.f
dlapll.f dlapmt.f
dlaqgb.f dlaqge.f dlaqp2.f dlaqps.f dlaqsb.f dlaqsp.f dlaqsy.f
dlaqr0.f dlaqr1.f dlaqr2.f dlaqr3.f dlaqr4.f dlaqr5.f
dlaqtr.f dlar1v.f dlar2v.f iladlr.f iladlc.f
dlarf.f dlarfb.f dlarfg.f dlarfgp.f dlarft.f dlarfx.f
dlargv.f dlarrv.f dlartv.f
dlarz.f dlarzb.f dlarzt.f dlasy2.f dlasyf.f dlasyf_rook.f
dlatbs.f dlatdf.f dlatps.f dlatrd.f dlatrs.f dlatrz.f dlatzm.f
dopgtr.f dopmtr.f dorg2l.f dorg2r.f
dorgbr.f dorghr.f dorgl2.f dorglq.f dorgql.f dorgqr.f dorgr2.f
dorgrq.f dorgtr.f dorm2l.f dorm2r.f
dormbr.f dormhr.f dorml2.f dormlq.f dormql.f dormqr.f dormr2.f
dormr3.f dormrq.f dormrz.f dormtr.f dpbcon.f dpbequ.f dpbrfs.f
dpbstf.f dpbsv.f dpbsvx.f
dpbtf2.f dpbtrf.f dpbtrs.f dpocon.f dpoequ.f dporfs.f dposv.f
dposvx.f dpotrs.f dpstrf.f dpstf2.f
dppcon.f dppequ.f
dpprfs.f dppsv.f dppsvx.f dpptrf.f dpptri.f dpptrs.f dptcon.f
dpteqr.f dptrfs.f dptsv.f dptsvx.f dpttrs.f dptts2.f drscl.f
dsbev.f dsbevd.f dsbevx.f dsbgst.f dsbgv.f dsbgvd.f dsbgvx.f
dsbtrd.f dspcon.f dspev.f dspevd.f dspevx.f dspgst.f
dspgv.f dspgvd.f dspgvx.f dsprfs.f dspsv.f dspsvx.f dsptrd.f
dsptrf.f dsptri.f dsptrs.f dstegr.f dstein.f dstev.f dstevd.f dstevr.f
dstevx.f
dsycon.f dsyev.f dsyevd.f dsyevr.f
dsyevx.f dsygs2.f dsygst.f dsygv.f dsygvd.f dsygvx.f dsyrfs.f
dsysv.f dsysvx.f
dsytd2.f dsytf2.f dsytrd.f dsytrf.f dsytri.f dsytri2.f dsytri2x.f
dsyswapr.f dsytrs.f dsytrs2.f dsyconv.f
dsytf2_rook.f dsytrf_rook.f dsytrs_rook.f
dsytri_rook.f dsycon_rook.f dsysv_rook.f
dtbcon.f dtbrfs.f dtbtrs.f dtgevc.f dtgex2.f dtgexc.f dtgsen.f
dtgsja.f dtgsna.f dtgsy2.f dtgsyl.f dtpcon.f dtprfs.f dtptri.f
dtptrs.f
dtrcon.f dtrevc.f dtrexc.f dtrrfs.f dtrsen.f dtrsna.f dtrsyl.f
dtrtrs.f dtzrqf.f dtzrzf.f dstemr.f
dsgesv.f dsposv.f dlag2s.f slag2d.f dlat2s.f
dlansf.f dpftrf.f dpftri.f dpftrs.f dsfrk.f dtfsm.f dtftri.f dtfttp.f
dtfttr.f dtpttf.f dtpttr.f dtrttf.f dtrttp.f
dgejsv.f dgesvj.f dgsvj0.f dgsvj1.f
dgeequb.f dsyequb.f dpoequb.f dgbequb.f
dbbcsd.f dlapmr.f dorbdb.f dorbdb1.f dorbdb2.f dorbdb3.f dorbdb4.f
dorbdb5.f dorbdb6.f dorcsd.f dorcsd2by1.f
dgeqrt.f dgeqrt2.f dgeqrt3.f dgemqrt.f
dtpqrt.f dtpqrt2.f dtpmqrt.f dtprfb.f dpotri.f
)
set(ZLASRC
zbdsqr.f zgbbrd.f zgbcon.f zgbequ.f zgbrfs.f zgbsv.f zgbsvx.f
zgbtf2.f zgbtrf.f zgbtrs.f zgebak.f zgebal.f zgebd2.f zgebrd.f
zgecon.f zgeequ.f zgees.f zgeesx.f zgeev.f zgeevx.f
zgegs.f zgegv.f zgehd2.f zgehrd.f zgelq2.f zgelqf.f
zgels.f zgelsd.f zgelss.f zgelsx.f zgelsy.f zgeql2.f zgeqlf.f zgeqp3.f
zgeqpf.f zgeqr2.f zgeqr2p.f zgeqrf.f zgeqrfp.f zgerfs.f zgerq2.f zgerqf.f
zgesc2.f zgesdd.f zgesvd.f zgesvx.f zgetc2.f
zgetri.f
zggbak.f zggbal.f zgges.f zggesx.f zggev.f zggevx.f zggglm.f
zgghrd.f zgglse.f zggqrf.f zggrqf.f
zggsvd.f zggsvp.f
zgtcon.f zgtrfs.f zgtsv.f zgtsvx.f zgttrf.f zgttrs.f zgtts2.f zhbev.f
zhbevd.f zhbevx.f zhbgst.f zhbgv.f zhbgvd.f zhbgvx.f zhbtrd.f
zhecon.f zheev.f zheevd.f zheevr.f zheevx.f zhegs2.f zhegst.f
zhegv.f zhegvd.f zhegvx.f zherfs.f zhesv.f zhesvx.f zhetd2.f
zhetf2.f zhetrd.f
zhetrf.f zhetri.f zhetri2.f zhetri2x.f zheswapr.f
zhetrs.f zhetrs2.f
zhetf2_rook.f zhetrf_rook.f zhetri_rook.f zhetrs_rook.f zhecon_rook.f zhesv_rook.f
zhgeqz.f zhpcon.f zhpev.f zhpevd.f
zhpevx.f zhpgst.f zhpgv.f zhpgvd.f zhpgvx.f zhprfs.f zhpsv.f
zhpsvx.f
zhptrd.f zhptrf.f zhptri.f zhptrs.f zhsein.f zhseqr.f zlabrd.f
zlacgv.f zlacon.f zlacn2.f zlacp2.f zlacpy.f zlacrm.f zlacrt.f zladiv.f
zlaed0.f zlaed7.f zlaed8.f
zlaein.f zlaesy.f zlaev2.f zlags2.f zlagtm.f
zlahef.f zlahef_rook.f zlahqr.f
zlahrd.f zlahr2.f zlaic1.f zlals0.f zlalsa.f zlalsd.f zlangb.f zlange.f
zlangt.f zlanhb.f
zlanhe.f
zlanhp.f zlanhs.f zlanht.f zlansb.f zlansp.f zlansy.f zlantb.f
zlantp.f zlantr.f zlapll.f zlapmt.f zlaqgb.f zlaqge.f
zlaqhb.f zlaqhe.f zlaqhp.f zlaqp2.f zlaqps.f zlaqsb.f
zlaqr0.f zlaqr1.f zlaqr2.f zlaqr3.f zlaqr4.f zlaqr5.f
zlaqsp.f zlaqsy.f zlar1v.f zlar2v.f ilazlr.f ilazlc.f
zlarcm.f zlarf.f zlarfb.f
zlarfg.f zlarft.f zlarfgp.f
zlarfx.f zlargv.f zlarnv.f zlarrv.f zlartg.f zlartv.f
zlarz.f zlarzb.f zlarzt.f zlascl.f zlaset.f zlasr.f
zlassq.f zlasyf.f zlasyf_rook.f
zlatbs.f zlatdf.f zlatps.f zlatrd.f zlatrs.f zlatrz.f zlatzm.f
zpbcon.f zpbequ.f zpbrfs.f zpbstf.f zpbsv.f
zpbsvx.f zpbtf2.f zpbtrf.f zpbtrs.f zpocon.f zpoequ.f zporfs.f
zposv.f zposvx.f zpotrs.f zpstrf.f zpstf2.f
zppcon.f zppequ.f zpprfs.f zppsv.f zppsvx.f zpptrf.f zpptri.f zpptrs.f
zptcon.f zpteqr.f zptrfs.f zptsv.f zptsvx.f zpttrf.f zpttrs.f zptts2.f
zrot.f zspcon.f zsprfs.f zspsv.f
zspsvx.f zsptrf.f zsptri.f zsptrs.f zdrscl.f zstedc.f
zstegr.f zstein.f zsteqr.f
zsycon.f
zsyrfs.f zsysv.f zsysvx.f zsytf2.f zsytrf.f zsytri.f zsytri2.f zsytri2x.f
zsyswapr.f zsytrs.f zsytrs2.f zsyconv.f
zsytf2_rook.f zsytrf_rook.f zsytrs_rook.f
zsytri_rook.f zsycon_rook.f zsysv_rook.f
ztbcon.f ztbrfs.f ztbtrs.f ztgevc.f ztgex2.f
ztgexc.f ztgsen.f ztgsja.f ztgsna.f ztgsy2.f ztgsyl.f ztpcon.f
ztprfs.f ztptri.f
ztptrs.f ztrcon.f ztrevc.f ztrexc.f ztrrfs.f ztrsen.f ztrsna.f
ztrsyl.f ztrtrs.f ztzrqf.f ztzrzf.f zung2l.f
zung2r.f zungbr.f zunghr.f zungl2.f zunglq.f zungql.f zungqr.f zungr2.f
zungrq.f zungtr.f zunm2l.f zunm2r.f zunmbr.f zunmhr.f zunml2.f
zunmlq.f zunmql.f zunmqr.f zunmr2.f zunmr3.f zunmrq.f zunmrz.f
zunmtr.f zupgtr.f
zupmtr.f izmax1.f dzsum1.f zstemr.f
zcgesv.f zcposv.f zlag2c.f clag2z.f zlat2c.f
zhfrk.f ztfttp.f zlanhf.f zpftrf.f zpftri.f zpftrs.f ztfsm.f ztftri.f
ztfttr.f ztpttf.f ztpttr.f ztrttf.f ztrttp.f
zgeequb.f zgbequb.f zsyequb.f zpoequb.f zheequb.f
zbbcsd.f zlapmr.f zunbdb.f zunbdb1.f zunbdb2.f zunbdb3.f zunbdb4.f
zunbdb5.f zunbdb6.f zuncsd.f zuncsd2by1.f
zgeqrt.f zgeqrt2.f zgeqrt3.f zgemqrt.f
ztpqrt.f ztpqrt2.f ztpmqrt.f ztprfb.f zpotri.f
)
set(LA_REL_SRC ${ALLAUX})
if (BUILD_SINGLE)
list(APPEND LA_REL_SRC ${SLASRC} ${DSLASRC} ${SCLAUX})
endif ()
if (BUILD_DOUBLE)
list(APPEND LA_REL_SRC ${DLASRC} ${DSLASRC} ${DZLAUX})
endif ()
if (BUILD_COMPLEX)
list(APPEND LA_REL_SRC ${CLASRC} ${ZCLASRC} ${SCLAUX})
endif ()
if (BUILD_COMPLEX16)
list(APPEND LA_REL_SRC ${ZLASRC} ${ZCLASRC} ${DZLAUX})
endif ()
# add lapack-netlib folder to the sources
set(LA_SOURCES "")
foreach (LA_FILE ${LA_REL_SRC})
list(APPEND LA_SOURCES "${NETLIB_LAPACK_DIR}/SRC/${LA_FILE}")
endforeach ()
set_source_files_properties(${LA_SOURCES} PROPERTIES COMPILE_FLAGS "${LAPACK_FFLAGS}")

2067
cmake/lapacke.cmake Normal file

File diff suppressed because it is too large Load Diff

104
cmake/os.cmake Normal file
View File

@ -0,0 +1,104 @@
##
## Author: Hank Anderson <hank@statease.com>
## Description: Ported from portion of OpenBLAS/Makefile.system
## Detects the OS and sets appropriate variables.
if (${CMAKE_SYSTEM_NAME} STREQUAL "Darwin")
set(ENV{MACOSX_DEPLOYMENT_TARGET} "10.2") # TODO: should be exported as an env var
set(MD5SUM "md5 -r")
endif ()
if (${CMAKE_SYSTEM_NAME} STREQUAL "FreeBSD")
set(MD5SUM "md5 -r")
endif ()
if (${CMAKE_SYSTEM_NAME} STREQUAL "NetBSD")
set(MD5SUM "md5 -n")
endif ()
if (${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
set(EXTRALIB "${EXTRALIB} -lm")
set(NO_EXPRECISION 1)
endif ()
if (${CMAKE_SYSTEM_NAME} STREQUAL "AIX")
set(EXTRALIB "${EXTRALIB} -lm")
endif ()
# TODO: this is probably meant for mingw, not other windows compilers
if (${CMAKE_SYSTEM_NAME} STREQUAL "Windows")
set(NEED_PIC 0)
set(NO_EXPRECISION 1)
set(EXTRALIB "${EXTRALIB} -defaultlib:advapi32")
# probably not going to use these
set(SUFFIX "obj")
set(PSUFFIX "pobj")
set(LIBSUFFIX "a")
if (${CMAKE_C_COMPILER_ID} STREQUAL "Clang")
set(CCOMMON_OPT "${CCOMMON_OPT} -DMS_ABI")
endif ()
if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU")
# Test for supporting MS_ABI
# removed string parsing in favor of CMake's version comparison -hpa
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
if (${GCC_VERSION} VERSION_GREATER 4.7 OR ${GCC_VERSION} VERSION_EQUAL 4.7)
# GCC Version >=4.7
# It is compatible with MSVC ABI.
set(CCOMMON_OPT "${CCOMMON_OPT} -DMS_ABI")
endif ()
endif ()
# Ensure the correct stack alignment on Win32
# http://permalink.gmane.org/gmane.comp.lib.openblas.general/97
if (${ARCH} STREQUAL "x86")
if (NOT MSVC AND NOT ${CMAKE_C_COMPILER_ID} STREQUAL "Clang")
set(CCOMMON_OPT "${CCOMMON_OPT} -mincoming-stack-boundary=2")
endif ()
set(FCOMMON_OPT "${FCOMMON_OPT} -mincoming-stack-boundary=2")
endif ()
endif ()
if (${CMAKE_SYSTEM_NAME} STREQUAL "Interix")
set(NEED_PIC 0)
set(NO_EXPRECISION 1)
set(INTERIX_TOOL_DIR STREQUAL "/opt/gcc.3.3/i586-pc-interix3/bin")
endif ()
if (CYGWIN)
set(NEED_PIC 0)
set(NO_EXPRECISION 1)
endif ()
if (NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Windows" AND NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Interix")
if (SMP)
set(EXTRALIB "${EXTRALIB} -lpthread")
endif ()
endif ()
if (QUAD_PRECISION)
set(CCOMMON_OPT "${CCOMMON_OPT} -DQUAD_PRECISION")
set(NO_EXPRECISION 1)
endif ()
if (${ARCH} STREQUAL "x86")
set(NO_EXPRECISION 1)
endif ()
if (UTEST_CHECK)
set(CCOMMON_OPT "${CCOMMON_OPT} -DUTEST_CHECK")
set(SANITY_CHECK 1)
endif ()
if (SANITY_CHECK)
# TODO: need some way to get $(*F) (target filename)
set(CCOMMON_OPT "${CCOMMON_OPT} -DSANITY_CHECK -DREFNAME=$(*F)f${BU}")
endif ()

113
cmake/prebuild.cmake Normal file
View File

@ -0,0 +1,113 @@
##
## Author: Hank Anderson <hank@statease.com>
## Description: Ported from OpenBLAS/Makefile.prebuild
## This is triggered by system.cmake and runs before any of the code is built.
## Creates config.h and Makefile.conf by first running the c_check perl script (which creates those files).
## Next it runs f_check and appends some fortran information to the files.
## Finally it runs getarch and getarch_2nd for even more environment information.
# CMake vars set by this file:
# CORE
# LIBCORE
# NUM_CORES
# HAVE_MMX
# HAVE_SSE
# HAVE_SSE2
# HAVE_SSE3
# MAKE
# SGEMM_UNROLL_M
# SGEMM_UNROLL_N
# DGEMM_UNROLL_M
# DGEMM_UNROLL_M
# QGEMM_UNROLL_N
# QGEMM_UNROLL_N
# CGEMM_UNROLL_M
# CGEMM_UNROLL_M
# ZGEMM_UNROLL_N
# ZGEMM_UNROLL_N
# XGEMM_UNROLL_M
# XGEMM_UNROLL_N
# CGEMM3M_UNROLL_M
# CGEMM3M_UNROLL_N
# ZGEMM3M_UNROLL_M
# ZGEMM3M_UNROLL_M
# XGEMM3M_UNROLL_N
# XGEMM3M_UNROLL_N
# CPUIDEMU = ../../cpuid/table.o
if (DEFINED CPUIDEMU)
set(EXFLAGS "-DCPUIDEMU -DVENDOR=99")
endif ()
if (DEFINED TARGET_CORE)
# set the C flags for just this file
set(GETARCH2_FLAGS "-DBUILD_KERNEL")
set(TARGET_MAKE "Makefile_kernel.conf")
set(TARGET_CONF "config_kernel.h")
else()
set(TARGET_MAKE "Makefile.conf")
set(TARGET_CONF "config.h")
endif ()
include("${CMAKE_SOURCE_DIR}/cmake/c_check.cmake")
if (NOT NOFORTRAN)
include("${CMAKE_SOURCE_DIR}/cmake/f_check.cmake")
endif ()
# compile getarch
set(GETARCH_SRC
${CMAKE_SOURCE_DIR}/getarch.c
${CPUIDEMO}
)
if (NOT MSVC)
list(APPEND GETARCH_SRC ${CMAKE_SOURCE_DIR}/cpuid.S)
endif ()
if (MSVC)
#Use generic for MSVC now
set(GETARCH_FLAGS ${GETARCH_FLAGS} -DFORCE_GENERIC)
endif()
set(GETARCH_DIR "${PROJECT_BINARY_DIR}/getarch_build")
set(GETARCH_BIN "getarch${CMAKE_EXECUTABLE_SUFFIX}")
file(MAKE_DIRECTORY ${GETARCH_DIR})
try_compile(GETARCH_RESULT ${GETARCH_DIR}
SOURCES ${GETARCH_SRC}
COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} -I${CMAKE_SOURCE_DIR}
OUTPUT_VARIABLE GETARCH_LOG
COPY_FILE ${PROJECT_BINARY_DIR}/${GETARCH_BIN}
)
message(STATUS "Running getarch")
# use the cmake binary w/ the -E param to run a shell command in a cross-platform way
execute_process(COMMAND ${PROJECT_BINARY_DIR}/${GETARCH_BIN} 0 OUTPUT_VARIABLE GETARCH_MAKE_OUT)
execute_process(COMMAND ${PROJECT_BINARY_DIR}/${GETARCH_BIN} 1 OUTPUT_VARIABLE GETARCH_CONF_OUT)
message(STATUS "GETARCH results:\n${GETARCH_MAKE_OUT}")
# append config data from getarch to the TARGET file and read in CMake vars
file(APPEND ${TARGET_CONF} ${GETARCH_CONF_OUT})
ParseGetArchVars(${GETARCH_MAKE_OUT})
set(GETARCH2_DIR "${PROJECT_BINARY_DIR}/getarch2_build")
set(GETARCH2_BIN "getarch_2nd${CMAKE_EXECUTABLE_SUFFIX}")
file(MAKE_DIRECTORY ${GETARCH2_DIR})
try_compile(GETARCH2_RESULT ${GETARCH2_DIR}
SOURCES ${CMAKE_SOURCE_DIR}/getarch_2nd.c
COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} ${GETARCH2_FLAGS} -I${CMAKE_SOURCE_DIR}
OUTPUT_VARIABLE GETARCH2_LOG
COPY_FILE ${PROJECT_BINARY_DIR}/${GETARCH2_BIN}
)
# use the cmake binary w/ the -E param to run a shell command in a cross-platform way
execute_process(COMMAND ${PROJECT_BINARY_DIR}/${GETARCH2_BIN} 0 OUTPUT_VARIABLE GETARCH2_MAKE_OUT)
execute_process(COMMAND ${PROJECT_BINARY_DIR}/${GETARCH2_BIN} 1 OUTPUT_VARIABLE GETARCH2_CONF_OUT)
# append config data from getarch_2nd to the TARGET file and read in CMake vars
file(APPEND ${TARGET_CONF} ${GETARCH2_CONF_OUT})
ParseGetArchVars(${GETARCH2_MAKE_OUT})

552
cmake/system.cmake Normal file
View File

@ -0,0 +1,552 @@
##
## Author: Hank Anderson <hank@statease.com>
## Description: Ported from OpenBLAS/Makefile.system
##
set(NETLIB_LAPACK_DIR "${CMAKE_SOURCE_DIR}/lapack-netlib")
# TODO: Makefile.system detects Darwin (mac) and switches to clang here -hpa
# http://stackoverflow.com/questions/714100/os-detecting-makefile
# TODO: Makefile.system sets HOSTCC = $(CC) here if not already set -hpa
# TARGET_CORE will override TARGET which is used in DYNAMIC_ARCH=1.
if (DEFINED TARGET_CORE)
set(TARGET ${TARGET_CORE})
endif ()
# Force fallbacks for 32bit
if (DEFINED BINARY AND DEFINED TARGET AND BINARY EQUAL 32)
message(STATUS "Compiling a ${BINARY}-bit binary.")
set(NO_AVX 1)
if (${TARGET} STREQUAL "HASWELL" OR ${TARGET} STREQUAL "SANDYBRIDGE")
set(TARGET "NEHALEM")
endif ()
if (${TARGET} STREQUAL "BULLDOZER" OR ${TARGET} STREQUAL "PILEDRIVER")
set(TARGET "BARCELONA")
endif ()
endif ()
if (DEFINED TARGET)
message(STATUS "Targetting the ${TARGET} architecture.")
set(GETARCH_FLAGS "-DFORCE_${TARGET}")
endif ()
if (INTERFACE64)
message(STATUS "Using 64-bit integers.")
set(GETARCH_FLAGS "${GETARCH_FLAGS} -DUSE64BITINT")
endif ()
if (NOT DEFINED GEMM_MULTITHREAD_THRESHOLD)
set(GEMM_MULTITHREAD_THRESHOLD 4)
endif ()
message(STATUS "GEMM multithread threshold set to ${GEMM_MULTITHREAD_THRESHOLD}.")
set(GETARCH_FLAGS "${GETARCH_FLAGS} -DGEMM_MULTITHREAD_THRESHOLD=${GEMM_MULTITHREAD_THRESHOLD}")
if (NO_AVX)
message(STATUS "Disabling Advanced Vector Extensions (AVX).")
set(GETARCH_FLAGS "${GETARCH_FLAGS} -DNO_AVX")
endif ()
if (NO_AVX2)
message(STATUS "Disabling Advanced Vector Extensions 2 (AVX2).")
set(GETARCH_FLAGS "${GETARCH_FLAGS} -DNO_AVX2")
endif ()
if (CMAKE_BUILD_TYPE STREQUAL Debug)
set(GETARCH_FLAGS "${GETARCH_FLAGS} -g")
endif ()
# TODO: let CMake handle this? -hpa
#if (${QUIET_MAKE})
# set(MAKE "${MAKE} -s")
#endif()
if (NOT DEFINED NO_PARALLEL_MAKE)
set(NO_PARALLEL_MAKE 0)
endif ()
set(GETARCH_FLAGS "${GETARCH_FLAGS} -DNO_PARALLEL_MAKE=${NO_PARALLEL_MAKE}")
if (CMAKE_CXX_COMPILER STREQUAL loongcc)
set(GETARCH_FLAGS "${GETARCH_FLAGS} -static")
endif ()
#if don't use Fortran, it will only compile CBLAS.
if (ONLY_CBLAS)
set(NO_LAPACK 1)
else ()
set(ONLY_CBLAS 0)
endif ()
include("${CMAKE_SOURCE_DIR}/cmake/prebuild.cmake")
if (NOT DEFINED NUM_THREADS)
set(NUM_THREADS ${NUM_CORES})
endif ()
if (${NUM_THREADS} EQUAL 1)
set(USE_THREAD 0)
endif ()
if (DEFINED USE_THREAD)
if (NOT ${USE_THREAD})
unset(SMP)
else ()
set(SMP 1)
endif ()
else ()
# N.B. this is NUM_THREAD in Makefile.system which is probably a bug -hpa
if (${NUM_THREADS} EQUAL 1)
unset(SMP)
else ()
set(SMP 1)
endif ()
endif ()
if (${SMP})
message(STATUS "SMP enabled.")
endif ()
if (NOT DEFINED NEED_PIC)
set(NEED_PIC 1)
endif ()
# TODO: I think CMake should be handling all this stuff -hpa
unset(ARFLAGS)
set(CPP "${COMPILER} -E")
set(AR "${CROSS_SUFFIX}ar")
set(AS "${CROSS_SUFFIX}as")
set(LD "${CROSS_SUFFIX}ld")
set(RANLIB "${CROSS_SUFFIX}ranlib")
set(NM "${CROSS_SUFFIX}nm")
set(DLLWRAP "${CROSS_SUFFIX}dllwrap")
set(OBJCOPY "${CROSS_SUFFIX}objcopy")
set(OBJCONV "${CROSS_SUFFIX}objconv")
# OS dependent settings
include("${CMAKE_SOURCE_DIR}/cmake/os.cmake")
# Architecture dependent settings
include("${CMAKE_SOURCE_DIR}/cmake/arch.cmake")
# C Compiler dependent settings
include("${CMAKE_SOURCE_DIR}/cmake/cc.cmake")
if (NOT NOFORTRAN)
# Fortran Compiler dependent settings
include("${CMAKE_SOURCE_DIR}/cmake/fc.cmake")
endif ()
if (BINARY64)
if (INTERFACE64)
# CCOMMON_OPT += -DUSE64BITINT
endif ()
endif ()
if (NEED_PIC)
if (${CMAKE_C_COMPILER} STREQUAL "IBM")
set(CCOMMON_OPT "${CCOMMON_OPT} -qpic=large")
else ()
set(CCOMMON_OPT "${CCOMMON_OPT} -fPIC")
endif ()
if (${F_COMPILER} STREQUAL "SUN")
set(FCOMMON_OPT "${FCOMMON_OPT} -pic")
else ()
set(FCOMMON_OPT "${FCOMMON_OPT} -fPIC")
endif ()
endif ()
if (DYNAMIC_ARCH)
set(CCOMMON_OPT "${CCOMMON_OPT} -DDYNAMIC_ARCH")
endif ()
if (NO_LAPACK)
set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_LAPACK")
#Disable LAPACK C interface
set(NO_LAPACKE 1)
endif ()
if (NO_LAPACKE)
set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_LAPACKE")
endif ()
if (NO_AVX)
set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_AVX")
endif ()
if (${ARCH} STREQUAL "x86")
set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_AVX")
endif ()
if (NO_AVX2)
set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_AVX2")
endif ()
if (SMP)
set(CCOMMON_OPT "${CCOMMON_OPT} -DSMP_SERVER")
if (${ARCH} STREQUAL "mips64")
if (NOT ${CORE} STREQUAL "LOONGSON3B")
set(USE_SIMPLE_THREADED_LEVEL3 1)
endif ()
endif ()
if (USE_OPENMP)
# USE_SIMPLE_THREADED_LEVEL3 = 1
# NO_AFFINITY = 1
set(CCOMMON_OPT "${CCOMMON_OPT} -DUSE_OPENMP")
endif ()
if (BIGNUMA)
set(CCOMMON_OPT "${CCOMMON_OPT} -DBIGNUMA")
endif ()
endif ()
if (NO_WARMUP)
set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_WARMUP")
endif ()
if (CONSISTENT_FPCSR)
set(CCOMMON_OPT "${CCOMMON_OPT} -DCONSISTENT_FPCSR")
endif ()
# Only for development
# set(CCOMMON_OPT "${CCOMMON_OPT} -DPARAMTEST")
# set(CCOMMON_OPT "${CCOMMON_OPT} -DPREFETCHTEST")
# set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_SWITCHING")
# set(USE_PAPI 1)
if (USE_PAPI)
set(CCOMMON_OPT "${CCOMMON_OPT} -DUSE_PAPI")
set(EXTRALIB "${EXTRALIB} -lpapi -lperfctr")
endif ()
if (DYNAMIC_THREADS)
set(CCOMMON_OPT "${CCOMMON_OPT} -DDYNAMIC_THREADS")
endif ()
set(CCOMMON_OPT "${CCOMMON_OPT} -DMAX_CPU_NUMBER=${NUM_THREADS}")
if (USE_SIMPLE_THREADED_LEVEL3)
set(CCOMMON_OPT "${CCOMMON_OPT} -DUSE_SIMPLE_THREADED_LEVEL3")
endif ()
if (DEFINED LIBNAMESUFFIX)
set(LIBPREFIX "libopenblas_${LIBNAMESUFFIX}")
else ()
set(LIBPREFIX "libopenblas")
endif ()
if (NOT DEFINED SYMBOLPREFIX)
set(SYMBOLPREFIX "")
endif ()
if (NOT DEFINED SYMBOLSUFFIX)
set(SYMBOLSUFFIX "")
endif ()
set(KERNELDIR "${CMAKE_SOURCE_DIR}/kernel/${ARCH}")
# TODO: nead to convert these Makefiles
# include ${CMAKE_SOURCE_DIR}/cmake/${ARCH}.cmake
if (${CORE} STREQUAL "PPC440")
set(CCOMMON_OPT "${CCOMMON_OPT} -DALLOC_QALLOC")
endif ()
if (${CORE} STREQUAL "PPC440FP2")
set(STATIC_ALLOCATION 1)
endif ()
if (NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
set(NO_AFFINITY 1)
endif ()
if (NOT ${ARCH} STREQUAL "x86_64" AND NOT ${ARCH} STREQUAL "x86" AND NOT ${CORE} STREQUAL "LOONGSON3B")
set(NO_AFFINITY 1)
endif ()
if (NO_AFFINITY)
set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_AFFINITY")
endif ()
if (FUNCTION_PROFILE)
set(CCOMMON_OPT "${CCOMMON_OPT} -DFUNCTION_PROFILE")
endif ()
if (HUGETLB_ALLOCATION)
set(CCOMMON_OPT "${CCOMMON_OPT} -DALLOC_HUGETLB")
endif ()
if (DEFINED HUGETLBFILE_ALLOCATION)
set(CCOMMON_OPT "${CCOMMON_OPT} -DALLOC_HUGETLBFILE -DHUGETLB_FILE_NAME=${HUGETLBFILE_ALLOCATION})")
endif ()
if (STATIC_ALLOCATION)
set(CCOMMON_OPT "${CCOMMON_OPT} -DALLOC_STATIC")
endif ()
if (DEVICEDRIVER_ALLOCATION)
set(CCOMMON_OPT "${CCOMMON_OPT} -DALLOC_DEVICEDRIVER -DDEVICEDRIVER_NAME=\"/dev/mapper\"")
endif ()
if (MIXED_MEMORY_ALLOCATION)
set(CCOMMON_OPT "${CCOMMON_OPT} -DMIXED_MEMORY_ALLOCATION")
endif ()
if (${CMAKE_SYSTEM_NAME} STREQUAL "SunOS")
set(TAR gtar)
set(PATCH gpatch)
set(GREP ggrep)
else ()
set(TAR tar)
set(PATCH patch)
set(GREP grep)
endif ()
if (NOT DEFINED MD5SUM)
set(MD5SUM md5sum)
endif ()
set(AWK awk)
set(REVISION "-r${OpenBLAS_VERSION}")
set(MAJOR_VERSION ${OpenBLAS_MAJOR_VERSION})
if (DEBUG)
set(COMMON_OPT "${COMMON_OPT} -g")
endif ()
if (NOT DEFINED COMMON_OPT)
set(COMMON_OPT "-O2")
endif ()
#For x86 32-bit
if (DEFINED BINARY AND BINARY EQUAL 32)
if (NOT MSVC)
set(COMMON_OPT "${COMMON_OPT} -m32")
endif()
endif()
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${COMMON_OPT} ${CCOMMON_OPT}")
if(NOT MSVC)
set(CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} ${COMMON_OPT} ${CCOMMON_OPT}")
endif()
# TODO: not sure what PFLAGS is -hpa
set(PFLAGS "${PFLAGS} ${COMMON_OPT} ${CCOMMON_OPT} -I${TOPDIR} -DPROFILE ${COMMON_PROF}")
set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} ${COMMON_OPT} ${FCOMMON_OPT}")
# TODO: not sure what FPFLAGS is -hpa
set(FPFLAGS "${FPFLAGS} ${COMMON_OPT} ${FCOMMON_OPT} ${COMMON_PROF}")
#For LAPACK Fortran codes.
set(LAPACK_FFLAGS "${LAPACK_FFLAGS} ${CMAKE_Fortran_FLAGS}")
set(LAPACK_FPFLAGS "${LAPACK_FPFLAGS} ${FPFLAGS}")
#Disable -fopenmp for LAPACK Fortran codes on Windows.
if (${CMAKE_SYSTEM_NAME} STREQUAL "Windows")
set(FILTER_FLAGS "-fopenmp;-mp;-openmp;-xopenmp=parralel")
foreach (FILTER_FLAG ${FILTER_FLAGS})
string(REPLACE ${FILTER_FLAG} "" LAPACK_FFLAGS ${LAPACK_FFLAGS})
string(REPLACE ${FILTER_FLAG} "" LAPACK_FPFLAGS ${LAPACK_FPFLAGS})
endforeach ()
endif ()
if ("${F_COMPILER}" STREQUAL "GFORTRAN")
# lapack-netlib is rife with uninitialized warnings -hpa
set(LAPACK_FFLAGS "${LAPACK_FFLAGS} -Wno-maybe-uninitialized")
endif ()
set(LAPACK_CFLAGS "${CMAKE_C_CFLAGS} -DHAVE_LAPACK_CONFIG_H")
if (INTERFACE64)
set(LAPACK_CFLAGS "${LAPACK_CFLAGS} -DLAPACK_ILP64")
endif ()
if (${CMAKE_SYSTEM_NAME} STREQUAL "Windows")
set(LAPACK_CFLAGS "${LAPACK_CFLAGS} -DOPENBLAS_OS_WINDOWS")
endif ()
if (${CMAKE_C_COMPILER} STREQUAL "LSB" OR ${CMAKE_SYSTEM_NAME} STREQUAL "Windows")
set(LAPACK_CFLAGS "${LAPACK_CFLAGS} -DLAPACK_COMPLEX_STRUCTURE")
endif ()
if (NOT DEFINED SUFFIX)
set(SUFFIX o)
endif ()
if (NOT DEFINED PSUFFIX)
set(PSUFFIX po)
endif ()
if (NOT DEFINED LIBSUFFIX)
set(LIBSUFFIX a)
endif ()
if (DYNAMIC_ARCH)
if (DEFINED SMP)
set(LIBNAME "${LIBPREFIX}p${REVISION}.${LIBSUFFIX}")
set(LIBNAME_P "${LIBPREFIX}p${REVISION}_p.${LIBSUFFIX}")
else ()
set(LIBNAME "${LIBPREFIX}${REVISION}.${LIBSUFFIX}")
set(LIBNAME_P "${LIBPREFIX}${REVISION}_p.${LIBSUFFIX}")
endif ()
else ()
if (DEFINED SMP)
set(LIBNAME "${LIBPREFIX}_${LIBCORE}p${REVISION}.${LIBSUFFIX}")
set(LIBNAME_P "${LIBPREFIX}_${LIBCORE}p${REVISION}_p.${LIBSUFFIX}")
else ()
set(LIBNAME "${LIBPREFIX}_${LIBCORE}${REVISION}.${LIBSUFFIX}")
set(LIBNAME_P "${LIBPREFIX}_${LIBCORE}${REVISION}_p.${LIBSUFFIX}")
endif ()
endif ()
set(LIBDLLNAME "${LIBPREFIX}.dll")
set(LIBSONAME "${LIBNAME}.${LIBSUFFIX}.so")
set(LIBDYNNAME "${LIBNAME}.${LIBSUFFIX}.dylib")
set(LIBDEFNAME "${LIBNAME}.${LIBSUFFIX}.def")
set(LIBEXPNAME "${LIBNAME}.${LIBSUFFIX}.exp")
set(LIBZIPNAME "${LIBNAME}.${LIBSUFFIX}.zip")
set(LIBS "${CMAKE_SOURCE_DIR}/${LIBNAME}")
set(LIBS_P "${CMAKE_SOURCE_DIR}/${LIBNAME_P}")
set(LIB_COMPONENTS BLAS)
if (NOT NO_CBLAS)
set(LIB_COMPONENTS "${LIB_COMPONENTS} CBLAS")
endif ()
if (NOT NO_LAPACK)
set(LIB_COMPONENTS "${LIB_COMPONENTS} LAPACK")
if (NOT NO_LAPACKE)
set(LIB_COMPONENTS "${LIB_COMPONENTS} LAPACKE")
endif ()
endif ()
if (ONLY_CBLAS)
set(LIB_COMPONENTS CBLAS)
endif ()
# For GEMM3M
set(USE_GEMM3M 0)
if (DEFINED ARCH)
if (${ARCH} STREQUAL "x86" OR ${ARCH} STREQUAL "x86_64" OR ${ARCH} STREQUAL "ia64" OR ${ARCH} STREQUAL "MIPS")
set(USE_GEMM3M 1)
endif ()
if (${CORE} STREQUAL "generic")
set(USE_GEMM3M 0)
endif ()
endif ()
#export OSNAME
#export ARCH
#export CORE
#export LIBCORE
#export PGCPATH
#export CONFIG
#export CC
#export FC
#export BU
#export FU
#export NEED2UNDERSCORES
#export USE_THREAD
#export NUM_THREADS
#export NUM_CORES
#export SMP
#export MAKEFILE_RULE
#export NEED_PIC
#export BINARY
#export BINARY32
#export BINARY64
#export F_COMPILER
#export C_COMPILER
#export USE_OPENMP
#export CROSS
#export CROSS_SUFFIX
#export NOFORTRAN
#export NO_FBLAS
#export EXTRALIB
#export CEXTRALIB
#export FEXTRALIB
#export HAVE_SSE
#export HAVE_SSE2
#export HAVE_SSE3
#export HAVE_SSSE3
#export HAVE_SSE4_1
#export HAVE_SSE4_2
#export HAVE_SSE4A
#export HAVE_SSE5
#export HAVE_AVX
#export HAVE_VFP
#export HAVE_VFPV3
#export HAVE_VFPV4
#export HAVE_NEON
#export KERNELDIR
#export FUNCTION_PROFILE
#export TARGET_CORE
#
#export SGEMM_UNROLL_M
#export SGEMM_UNROLL_N
#export DGEMM_UNROLL_M
#export DGEMM_UNROLL_N
#export QGEMM_UNROLL_M
#export QGEMM_UNROLL_N
#export CGEMM_UNROLL_M
#export CGEMM_UNROLL_N
#export ZGEMM_UNROLL_M
#export ZGEMM_UNROLL_N
#export XGEMM_UNROLL_M
#export XGEMM_UNROLL_N
#export CGEMM3M_UNROLL_M
#export CGEMM3M_UNROLL_N
#export ZGEMM3M_UNROLL_M
#export ZGEMM3M_UNROLL_N
#export XGEMM3M_UNROLL_M
#export XGEMM3M_UNROLL_N
#if (USE_CUDA)
# export CUDADIR
# export CUCC
# export CUFLAGS
# export CULIB
#endif
#.SUFFIXES: .$(PSUFFIX) .$(SUFFIX) .f
#
#.f.$(SUFFIX):
# $(FC) $(FFLAGS) -c $< -o $(@F)
#
#.f.$(PSUFFIX):
# $(FC) $(FPFLAGS) -pg -c $< -o $(@F)
# these are not cross-platform
#ifdef BINARY64
#PATHSCALEPATH = /opt/pathscale/lib/3.1
#PGIPATH = /opt/pgi/linux86-64/7.1-5/lib
#else
#PATHSCALEPATH = /opt/pathscale/lib/3.1/32
#PGIPATH = /opt/pgi/linux86/7.1-5/lib
#endif
#ACMLPATH = /opt/acml/4.3.0
#ifneq ($(OSNAME), Darwin)
#MKLPATH = /opt/intel/mkl/10.2.2.025/lib
#else
#MKLPATH = /Library/Frameworks/Intel_MKL.framework/Versions/10.0.1.014/lib
#endif
#ATLASPATH = /opt/atlas/3.9.17/opteron
#FLAMEPATH = $(HOME)/flame/lib
#ifneq ($(OSNAME), SunOS)
#SUNPATH = /opt/sunstudio12.1
#else
#SUNPATH = /opt/SUNWspro
#endif

346
cmake/utils.cmake Normal file
View File

@ -0,0 +1,346 @@
# Functions to help with the OpenBLAS build
# Reads string from getarch into CMake vars. Format of getarch vars is VARNAME=VALUE
function(ParseGetArchVars GETARCH_IN)
string(REGEX MATCHALL "[0-9_a-zA-Z]+=[0-9_a-zA-Z]+" GETARCH_RESULT_LIST "${GETARCH_IN}")
foreach (GETARCH_LINE ${GETARCH_RESULT_LIST})
# split the line into var and value, then assign the value to a CMake var
string(REGEX MATCHALL "[0-9_a-zA-Z]+" SPLIT_VAR "${GETARCH_LINE}")
list(GET SPLIT_VAR 0 VAR_NAME)
list(GET SPLIT_VAR 1 VAR_VALUE)
set(${VAR_NAME} ${VAR_VALUE} PARENT_SCOPE)
endforeach ()
endfunction ()
# Reads a Makefile into CMake vars.
macro(ParseMakefileVars MAKEFILE_IN)
message(STATUS "Reading vars from ${MAKEFILE_IN}...")
file(STRINGS ${MAKEFILE_IN} makefile_contents)
foreach (makefile_line ${makefile_contents})
string(REGEX MATCH "([0-9_a-zA-Z]+)[ \t]*=[ \t]*(.+)$" line_match "${makefile_line}")
if (NOT "${line_match}" STREQUAL "")
set(var_name ${CMAKE_MATCH_1})
set(var_value ${CMAKE_MATCH_2})
# check for Makefile variables in the string, e.g. $(TSUFFIX)
string(REGEX MATCHALL "\\$\\(([0-9_a-zA-Z]+)\\)" make_var_matches ${var_value})
foreach (make_var ${make_var_matches})
# strip out Makefile $() markup
string(REGEX REPLACE "\\$\\(([0-9_a-zA-Z]+)\\)" "\\1" make_var ${make_var})
# now replace the instance of the Makefile variable with the value of the CMake variable (note the double quote)
string(REPLACE "$(${make_var})" "${${make_var}}" var_value ${var_value})
endforeach ()
set(${var_name} ${var_value})
else ()
string(REGEX MATCH "include \\$\\(KERNELDIR\\)/(.+)$" line_match "${makefile_line}")
if (NOT "${line_match}" STREQUAL "")
ParseMakefileVars(${KERNELDIR}/${CMAKE_MATCH_1})
endif ()
endif ()
endforeach ()
endmacro ()
# Returns all combinations of the input list, as a list with colon-separated combinations
# E.g. input of A B C returns A B C A:B A:C B:C
# N.B. The input is meant to be a list, and to past a list to a function in CMake you must quote it (e.g. AllCombinations("${LIST_VAR}")).
# #param absent_codes codes to use when an element is absent from a combination. For example, if you have TRANS;UNIT;UPPER you may want the code to be NNL when nothing is present.
# @returns LIST_OUT a list of combinations
# CODES_OUT a list of codes corresponding to each combination, with N meaning the item is not present, and the first letter of the list item meaning it is presen
function(AllCombinations list_in absent_codes_in)
list(LENGTH list_in list_count)
set(num_combos 1)
# subtract 1 since we will iterate from 0 to num_combos
math(EXPR num_combos "(${num_combos} << ${list_count}) - 1")
set(LIST_OUT "")
set(CODES_OUT "")
foreach (c RANGE 0 ${num_combos})
set(current_combo "")
set(current_code "")
# this is a little ridiculous just to iterate through a list w/ indices
math(EXPR last_list_index "${list_count} - 1")
foreach (list_index RANGE 0 ${last_list_index})
math(EXPR bit "1 << ${list_index}")
math(EXPR combo_has_bit "${c} & ${bit}")
list(GET list_in ${list_index} list_elem)
if (combo_has_bit)
if (current_combo)
set(current_combo "${current_combo}:${list_elem}")
else ()
set(current_combo ${list_elem})
endif ()
string(SUBSTRING ${list_elem} 0 1 code_char)
else ()
list(GET absent_codes_in ${list_index} code_char)
endif ()
set(current_code "${current_code}${code_char}")
endforeach ()
if (current_combo STREQUAL "")
list(APPEND LIST_OUT " ") # Empty set is a valid combination, but CMake isn't appending the empty string for some reason, use a space
else ()
list(APPEND LIST_OUT ${current_combo})
endif ()
list(APPEND CODES_OUT ${current_code})
endforeach ()
set(LIST_OUT ${LIST_OUT} PARENT_SCOPE)
set(CODES_OUT ${CODES_OUT} PARENT_SCOPE)
endfunction ()
# generates object files for each of the sources, using the BLAS naming scheme to pass the funciton name as a preprocessor definition
# @param sources_in the source files to build from
# @param defines_in (optional) preprocessor definitions that will be applied to all objects
# @param name_in (optional) if this is set this name will be used instead of the filename. Use a * to indicate where the float character should go, if no star the character will be prepended.
# e.g. with DOUBLE set, "i*max" will generate the name "idmax", and "max" will be "dmax"
# @param replace_last_with replaces the last character in the filename with this string (e.g. symm_k should be symm_TU)
# @param append_with appends the filename with this string (e.g. trmm_R should be trmm_RTUU or some other combination of characters)
# @param no_float_type turns off the float type define for this build (e.g. SINGLE/DOUBLE/etc)
# @param complex_filename_scheme some routines have separate source files for complex and non-complex float types.
# 0 - compiles for all types
# 1 - compiles the sources for non-complex types only (SINGLE/DOUBLE)
# 2 - compiles for complex types only (COMPLEX/DOUBLE COMPLEX)
# 3 - compiles for all types, but changes source names for complex by prepending z (e.g. axpy.c becomes zaxpy.c)
# 4 - compiles for complex types only, but changes source names for complex by prepending z (e.g. hemv.c becomes zhemv.c)
# STRING - compiles only the given type (e.g. DOUBLE)
function(GenerateNamedObjects sources_in)
if (DEFINED ARGV1)
set(defines_in ${ARGV1})
endif ()
if (DEFINED ARGV2 AND NOT "${ARGV2}" STREQUAL "")
set(name_in ${ARGV2})
# strip off extension for kernel files that pass in the object name.
get_filename_component(name_in ${name_in} NAME_WE)
endif ()
if (DEFINED ARGV3)
set(use_cblas ${ARGV3})
else ()
set(use_cblas false)
endif ()
if (DEFINED ARGV4)
set(replace_last_with ${ARGV4})
endif ()
if (DEFINED ARGV5)
set(append_with ${ARGV5})
endif ()
if (DEFINED ARGV6)
set(no_float_type ${ARGV6})
else ()
set(no_float_type false)
endif ()
if (no_float_type)
set(float_list "DUMMY") # still need to loop once
else ()
set(float_list "${FLOAT_TYPES}")
endif ()
set(real_only false)
set(complex_only false)
set(mangle_complex_sources false)
if (DEFINED ARGV7 AND NOT "${ARGV7}" STREQUAL "")
if (${ARGV7} EQUAL 1)
set(real_only true)
elseif (${ARGV7} EQUAL 2)
set(complex_only true)
elseif (${ARGV7} EQUAL 3)
set(mangle_complex_sources true)
elseif (${ARGV7} EQUAL 4)
set(mangle_complex_sources true)
set(complex_only true)
elseif (NOT ${ARGV7} EQUAL 0)
set(float_list ${ARGV7})
endif ()
endif ()
if (complex_only)
list(REMOVE_ITEM float_list "SINGLE")
list(REMOVE_ITEM float_list "DOUBLE")
elseif (real_only)
list(REMOVE_ITEM float_list "COMPLEX")
list(REMOVE_ITEM float_list "ZCOMPLEX")
endif ()
set(float_char "")
set(OBJ_LIST_OUT "")
foreach (float_type ${float_list})
foreach (source_file ${sources_in})
if (NOT no_float_type)
string(SUBSTRING ${float_type} 0 1 float_char)
string(TOLOWER ${float_char} float_char)
endif ()
if (NOT name_in)
get_filename_component(source_name ${source_file} NAME_WE)
set(obj_name "${float_char}${source_name}")
else ()
# replace * with float_char
if (${name_in} MATCHES "\\*")
string(REPLACE "*" ${float_char} obj_name ${name_in})
else ()
set(obj_name "${float_char}${name_in}")
endif ()
endif ()
if (replace_last_with)
string(REGEX REPLACE ".$" ${replace_last_with} obj_name ${obj_name})
else ()
set(obj_name "${obj_name}${append_with}")
endif ()
# now add the object and set the defines
set(obj_defines ${defines_in})
if (use_cblas)
set(obj_name "cblas_${obj_name}")
list(APPEND obj_defines "CBLAS")
endif ()
list(APPEND obj_defines "ASMNAME=${FU}${obj_name};ASMFNAME=${FU}${obj_name}${BU};NAME=${obj_name}${BU};CNAME=${obj_name};CHAR_NAME=\"${obj_name}${BU}\";CHAR_CNAME=\"${obj_name}\"")
if (${float_type} STREQUAL "DOUBLE" OR ${float_type} STREQUAL "ZCOMPLEX")
list(APPEND obj_defines "DOUBLE")
endif ()
if (${float_type} STREQUAL "COMPLEX" OR ${float_type} STREQUAL "ZCOMPLEX")
list(APPEND obj_defines "COMPLEX")
if (mangle_complex_sources)
# add a z to the filename
get_filename_component(source_name ${source_file} NAME)
get_filename_component(source_dir ${source_file} DIRECTORY)
string(REPLACE ${source_name} "z${source_name}" source_file ${source_file})
endif ()
endif ()
if (VERBOSE_GEN)
message(STATUS "${obj_name}:${source_file}")
message(STATUS "${obj_defines}")
endif ()
# create a copy of the source to avoid duplicate obj filename problem with ar.exe
get_filename_component(source_extension ${source_file} EXT)
set(new_source_file "${CMAKE_CURRENT_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/${obj_name}${source_extension}")
if (IS_ABSOLUTE ${source_file})
set(old_source_file ${source_file})
else ()
set(old_source_file "${CMAKE_CURRENT_LIST_DIR}/${source_file}")
endif ()
string(REPLACE ";" "\n#define " define_source "${obj_defines}")
string(REPLACE "=" " " define_source "${define_source}")
file(WRITE ${new_source_file} "#define ${define_source}\n#include \"${old_source_file}\"")
list(APPEND SRC_LIST_OUT ${new_source_file})
endforeach ()
endforeach ()
list(APPEND OPENBLAS_SRC ${SRC_LIST_OUT})
set(OPENBLAS_SRC ${OPENBLAS_SRC} PARENT_SCOPE)
endfunction ()
# generates object files for each of the sources for each of the combinations of the preprocessor definitions passed in
# @param sources_in the source files to build from
# @param defines_in the preprocessor definitions that will be combined to create the object files
# @param all_defines_in (optional) preprocessor definitions that will be applied to all objects
# @param replace_scheme If 1, replace the "k" in the filename with the define combo letters. E.g. symm_k.c with TRANS and UNIT defined will be symm_TU.
# If 0, it will simply append the code, e.g. symm_L.c with TRANS and UNIT will be symm_LTU.
# If 2, it will append the code with an underscore, e.g. symm.c with TRANS and UNIT will be symm_TU.
# If 3, it will insert the code *around* the last character with an underscore, e.g. symm_L.c with TRANS and UNIT will be symm_TLU (required by BLAS level2 objects).
# If 4, it will insert the code before the last underscore. E.g. trtri_U_parallel with TRANS will be trtri_UT_parallel
# @param alternate_name replaces the source name as the object name (define codes are still appended)
# @param no_float_type turns off the float type define for this build (e.g. SINGLE/DOUBLE/etc)
# @param complex_filename_scheme see GenerateNamedObjects
function(GenerateCombinationObjects sources_in defines_in absent_codes_in all_defines_in replace_scheme)
set(alternate_name_in "")
if (DEFINED ARGV5)
set(alternate_name_in ${ARGV5})
endif ()
set(no_float_type false)
if (DEFINED ARGV6)
set(no_float_type ${ARGV6})
endif ()
set(complex_filename_scheme "")
if (DEFINED ARGV7)
set(complex_filename_scheme ${ARGV7})
endif ()
AllCombinations("${defines_in}" "${absent_codes_in}")
set(define_combos ${LIST_OUT})
set(define_codes ${CODES_OUT})
list(LENGTH define_combos num_combos)
math(EXPR num_combos "${num_combos} - 1")
foreach (c RANGE 0 ${num_combos})
list(GET define_combos ${c} define_combo)
list(GET define_codes ${c} define_code)
foreach (source_file ${sources_in})
set(alternate_name ${alternate_name_in})
# replace colon separated list with semicolons, this turns it into a CMake list that we can use foreach with
string(REPLACE ":" ";" define_combo ${define_combo})
# now add the object and set the defines
set(cur_defines ${define_combo})
if ("${cur_defines}" STREQUAL " ")
set(cur_defines ${all_defines_in})
else ()
list(APPEND cur_defines ${all_defines_in})
endif ()
set(replace_code "")
set(append_code "")
if (replace_scheme EQUAL 1)
set(replace_code ${define_code})
else ()
if (replace_scheme EQUAL 2)
set(append_code "_${define_code}")
elseif (replace_scheme EQUAL 3)
if ("${alternate_name}" STREQUAL "")
string(REGEX MATCH "[a-zA-Z]\\." last_letter ${source_file})
else ()
string(REGEX MATCH "[a-zA-Z]$" last_letter ${alternate_name})
endif ()
# first extract the last letter
string(SUBSTRING ${last_letter} 0 1 last_letter) # remove period from match
# break the code up into the first letter and the remaining (should only be 2 anyway)
string(SUBSTRING ${define_code} 0 1 define_code_first)
string(SUBSTRING ${define_code} 1 -1 define_code_second)
set(replace_code "${define_code_first}${last_letter}${define_code_second}")
elseif (replace_scheme EQUAL 4)
# insert code before the last underscore and pass that in as the alternate_name
if ("${alternate_name}" STREQUAL "")
get_filename_component(alternate_name ${source_file} NAME_WE)
endif ()
set(extra_underscore "")
# check if filename has two underscores, insert another if not (e.g. getrs_parallel needs to become getrs_U_parallel not getrsU_parallel)
string(REGEX MATCH "_[a-zA-Z]+_" underscores ${alternate_name})
string(LENGTH "${underscores}" underscores)
if (underscores EQUAL 0)
set(extra_underscore "_")
endif ()
string(REGEX REPLACE "(.+)(_[^_]+)$" "\\1${extra_underscore}${define_code}\\2" alternate_name ${alternate_name})
else()
set(append_code ${define_code}) # replace_scheme should be 0
endif ()
endif ()
GenerateNamedObjects("${source_file}" "${cur_defines}" "${alternate_name}" false "${replace_code}" "${append_code}" "${no_float_type}" "${complex_filename_scheme}")
endforeach ()
endforeach ()
set(OPENBLAS_SRC ${OPENBLAS_SRC} PARENT_SCOPE)
endfunction ()

View File

@ -82,7 +82,10 @@ extern "C" {
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#if !defined(_MSC_VER)
#include <unistd.h> #include <unistd.h>
#endif
#ifdef OS_LINUX #ifdef OS_LINUX
#include <malloc.h> #include <malloc.h>
@ -95,6 +98,10 @@ extern "C" {
#ifdef OS_ANDROID #ifdef OS_ANDROID
#define NO_SYSV_IPC #define NO_SYSV_IPC
//Android NDK only supports complex.h since Android 5.0
#if __ANDROID_API__ < 21
#define FORCE_OPENBLAS_COMPLEX_STRUCT
#endif
#endif #endif
#ifdef OS_WINDOWS #ifdef OS_WINDOWS
@ -114,6 +121,7 @@ extern "C" {
#include <sys/shm.h> #include <sys/shm.h>
#endif #endif
#include <sys/time.h> #include <sys/time.h>
#include <time.h>
#include <unistd.h> #include <unistd.h>
#include <math.h> #include <math.h>
#ifdef SMP #ifdef SMP
@ -293,13 +301,6 @@ typedef int blasint;
#define COMPSIZE 2 #define COMPSIZE 2
#endif #endif
#if defined(C_PGI) || defined(C_SUN)
#define CREAL(X) (*((FLOAT *)&X + 0))
#define CIMAG(X) (*((FLOAT *)&X + 1))
#else
#define CREAL __real__
#define CIMAG __imag__
#endif
#define Address_H(x) (((x)+(1<<15))>>16) #define Address_H(x) (((x)+(1<<15))>>16)
#define Address_L(x) ((x)-((Address_H(x))<<16)) #define Address_L(x) ((x)-((Address_H(x))<<16))
@ -313,8 +314,12 @@ typedef int blasint;
#endif #endif
#if defined(OS_WINDOWS) #if defined(OS_WINDOWS)
#if defined(_MSC_VER) && !defined(__clang__)
#define YIELDING YieldProcessor()
#else
#define YIELDING SwitchToThread() #define YIELDING SwitchToThread()
#endif #endif
#endif
#if defined(ARMV7) || defined(ARMV6) || defined(ARMV8) || defined(ARMV5) #if defined(ARMV7) || defined(ARMV6) || defined(ARMV8) || defined(ARMV5)
#define YIELDING asm volatile ("nop;nop;nop;nop;nop;nop;nop;nop; \n"); #define YIELDING asm volatile ("nop;nop;nop;nop;nop;nop;nop;nop; \n");
@ -500,18 +505,52 @@ static void __inline blas_lock(volatile BLASULONG *address){
/* C99 supports complex floating numbers natively, which GCC also offers as an /* C99 supports complex floating numbers natively, which GCC also offers as an
extension since version 3.0. If neither are available, use a compatible extension since version 3.0. If neither are available, use a compatible
structure as fallback (see Clause 6.2.5.13 of the C99 standard). */ structure as fallback (see Clause 6.2.5.13 of the C99 standard). */
#if (defined(__STDC_IEC_559_COMPLEX__) || __STDC_VERSION__ >= 199901L || \ #if ((defined(__STDC_IEC_559_COMPLEX__) || __STDC_VERSION__ >= 199901L || \
(__GNUC__ >= 3 && !defined(__cplusplus))) (__GNUC__ >= 3 && !defined(__cplusplus))) && !(defined(FORCE_OPENBLAS_COMPLEX_STRUCT)))
#define OPENBLAS_COMPLEX_C99 #define OPENBLAS_COMPLEX_C99
#ifndef __cplusplus
#include <complex.h>
#endif
typedef float _Complex openblas_complex_float; typedef float _Complex openblas_complex_float;
typedef double _Complex openblas_complex_double; typedef double _Complex openblas_complex_double;
typedef xdouble _Complex openblas_complex_xdouble; typedef xdouble _Complex openblas_complex_xdouble;
#define openblas_make_complex_float(real, imag) ((real) + ((imag) * _Complex_I))
#define openblas_make_complex_double(real, imag) ((real) + ((imag) * _Complex_I))
#define openblas_make_complex_xdouble(real, imag) ((real) + ((imag) * _Complex_I))
#else #else
#define OPENBLAS_COMPLEX_STRUCT #define OPENBLAS_COMPLEX_STRUCT
typedef struct { float real, imag; } openblas_complex_float; typedef struct { float real, imag; } openblas_complex_float;
typedef struct { double real, imag; } openblas_complex_double; typedef struct { double real, imag; } openblas_complex_double;
typedef struct { xdouble real, imag; } openblas_complex_xdouble; typedef struct { xdouble real, imag; } openblas_complex_xdouble;
#define openblas_make_complex_float(real, imag) {(real), (imag)}
#define openblas_make_complex_double(real, imag) {(real), (imag)}
#define openblas_make_complex_xdouble(real, imag) {(real), (imag)}
#endif #endif
#ifdef XDOUBLE
#define OPENBLAS_COMPLEX_FLOAT openblas_complex_xdouble
#define OPENBLAS_MAKE_COMPLEX_FLOAT(r,i) openblas_make_complex_xdouble(r,i)
#elif defined(DOUBLE)
#define OPENBLAS_COMPLEX_FLOAT openblas_complex_double
#define OPENBLAS_MAKE_COMPLEX_FLOAT(r,i) openblas_make_complex_double(r,i)
#else
#define OPENBLAS_COMPLEX_FLOAT openblas_complex_float
#define OPENBLAS_MAKE_COMPLEX_FLOAT(r,i) openblas_make_complex_float(r,i)
#endif
#if defined(C_PGI) || defined(C_SUN)
#define CREAL(X) (*((FLOAT *)&X + 0))
#define CIMAG(X) (*((FLOAT *)&X + 1))
#else
#ifdef OPENBLAS_COMPLEX_STRUCT
#define CREAL(Z) ((Z).real)
#define CIMAG(Z) ((Z).imag)
#else
#define CREAL __real__
#define CIMAG __imag__
#endif
#endif
#endif // ASSEMBLER #endif // ASSEMBLER
#ifndef IFLUSH #ifndef IFLUSH
@ -528,6 +567,10 @@ static void __inline blas_lock(volatile BLASULONG *address){
#endif #endif
#endif #endif
#if defined(C_MSVC)
#define inline __inline
#endif
#ifndef ASSEMBLER #ifndef ASSEMBLER
#ifndef MIN #ifndef MIN

View File

@ -47,12 +47,12 @@ double dsdot_k(BLASLONG, float *, BLASLONG, float *, BLASLONG);
double ddot_k(BLASLONG, double *, BLASLONG, double *, BLASLONG); double ddot_k(BLASLONG, double *, BLASLONG, double *, BLASLONG);
xdouble qdot_k(BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG); xdouble qdot_k(BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
float _Complex cdotc_k (BLASLONG, float *, BLASLONG, float *, BLASLONG); openblas_complex_float cdotc_k (BLASLONG, float *, BLASLONG, float *, BLASLONG);
float _Complex cdotu_k (BLASLONG, float *, BLASLONG, float *, BLASLONG); openblas_complex_float cdotu_k (BLASLONG, float *, BLASLONG, float *, BLASLONG);
double _Complex zdotc_k (BLASLONG, double *, BLASLONG, double *, BLASLONG); openblas_complex_double zdotc_k (BLASLONG, double *, BLASLONG, double *, BLASLONG);
double _Complex zdotu_k (BLASLONG, double *, BLASLONG, double *, BLASLONG); openblas_complex_double zdotu_k (BLASLONG, double *, BLASLONG, double *, BLASLONG);
xdouble _Complex xdotc_k (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG); openblas_complex_xdouble xdotc_k (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
xdouble _Complex xdotu_k (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG); openblas_complex_xdouble xdotu_k (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
int saxpy_k (BLASLONG, BLASLONG, BLASLONG, float, int saxpy_k (BLASLONG, BLASLONG, BLASLONG, float,
float *, BLASLONG, float *, BLASLONG, float *, BLASLONG); float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);

View File

@ -830,56 +830,56 @@ BLASLONG (*ixamin_k)(BLASLONG, xdouble *, BLASLONG);
int (*somatcopy_k_rn) (BLASLONG, BLASLONG, float, float*, BLASLONG, float*, BLASLONG); int (*somatcopy_k_rn) (BLASLONG, BLASLONG, float, float*, BLASLONG, float*, BLASLONG);
int (*somatcopy_k_rt) (BLASLONG, BLASLONG, float, float*, BLASLONG, float*, BLASLONG); int (*somatcopy_k_rt) (BLASLONG, BLASLONG, float, float*, BLASLONG, float*, BLASLONG);
int (*simatcopy_k_cn) (BLASLONG, BLASLONG, float, float*, BLASLONG);
int (*simatcopy_k_ct) (BLASLONG, BLASLONG, float, float*, BLASLONG);
int (*simatcopy_k_rn) (BLASLONG, BLASLONG, float, float*, BLASLONG);
int (*simatcopy_k_rt) (BLASLONG, BLASLONG, float, float*, BLASLONG);
int (*domatcopy_k_cn) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG); int (*domatcopy_k_cn) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG);
int (*domatcopy_k_ct) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG); int (*domatcopy_k_ct) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG);
int (*domatcopy_k_rn) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG); int (*domatcopy_k_rn) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG);
int (*domatcopy_k_rt) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG); int (*domatcopy_k_rt) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG);
int (*dimatcopy_k_cn) (BLASLONG, BLASLONG, double, double*, BLASLONG);
int (*dimatcopy_k_ct) (BLASLONG, BLASLONG, double, double*, BLASLONG);
int (*dimatcopy_k_rn) (BLASLONG, BLASLONG, double, double*, BLASLONG);
int (*dimatcopy_k_rt) (BLASLONG, BLASLONG, double, double*, BLASLONG);
int (*comatcopy_k_cn) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG); int (*comatcopy_k_cn) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG);
int (*comatcopy_k_ct) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG); int (*comatcopy_k_ct) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG);
int (*comatcopy_k_rn) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG); int (*comatcopy_k_rn) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG);
int (*comatcopy_k_rt) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG); int (*comatcopy_k_rt) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG);
int (*cimatcopy_k_cn) (BLASLONG, BLASLONG, float, float, float*, BLASLONG);
int (*cimatcopy_k_ct) (BLASLONG, BLASLONG, float, float, float*, BLASLONG);
int (*cimatcopy_k_rn) (BLASLONG, BLASLONG, float, float, float*, BLASLONG);
int (*cimatcopy_k_rt) (BLASLONG, BLASLONG, float, float, float*, BLASLONG);
int (*comatcopy_k_cnc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG); int (*comatcopy_k_cnc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG);
int (*comatcopy_k_ctc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG); int (*comatcopy_k_ctc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG);
int (*comatcopy_k_rnc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG); int (*comatcopy_k_rnc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG);
int (*comatcopy_k_rtc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG); int (*comatcopy_k_rtc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG);
int (*cimatcopy_k_cnc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG);
int (*cimatcopy_k_ctc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG);
int (*cimatcopy_k_rnc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG);
int (*cimatcopy_k_rtc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG);
int (*zomatcopy_k_cn) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG); int (*zomatcopy_k_cn) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG);
int (*zomatcopy_k_ct) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG); int (*zomatcopy_k_ct) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG);
int (*zomatcopy_k_rn) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG); int (*zomatcopy_k_rn) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG);
int (*zomatcopy_k_rt) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG); int (*zomatcopy_k_rt) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG);
int (*zimatcopy_k_cn) (BLASLONG, BLASLONG, double, double, double*, BLASLONG);
int (*zimatcopy_k_ct) (BLASLONG, BLASLONG, double, double, double*, BLASLONG);
int (*zimatcopy_k_rn) (BLASLONG, BLASLONG, double, double, double*, BLASLONG);
int (*zimatcopy_k_rt) (BLASLONG, BLASLONG, double, double, double*, BLASLONG);
int (*zomatcopy_k_cnc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG); int (*zomatcopy_k_cnc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG);
int (*zomatcopy_k_ctc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG); int (*zomatcopy_k_ctc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG);
int (*zomatcopy_k_rnc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG); int (*zomatcopy_k_rnc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG);
int (*zomatcopy_k_rtc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG); int (*zomatcopy_k_rtc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG);
int (*simatcopy_k_cn) (BLASLONG, BLASLONG, float, float*, BLASLONG);
int (*simatcopy_k_ct) (BLASLONG, BLASLONG, float, float*, BLASLONG);
int (*simatcopy_k_rn) (BLASLONG, BLASLONG, float, float*, BLASLONG);
int (*simatcopy_k_rt) (BLASLONG, BLASLONG, float, float*, BLASLONG);
int (*dimatcopy_k_cn) (BLASLONG, BLASLONG, double, double*, BLASLONG);
int (*dimatcopy_k_ct) (BLASLONG, BLASLONG, double, double*, BLASLONG);
int (*dimatcopy_k_rn) (BLASLONG, BLASLONG, double, double*, BLASLONG);
int (*dimatcopy_k_rt) (BLASLONG, BLASLONG, double, double*, BLASLONG);
int (*cimatcopy_k_cn) (BLASLONG, BLASLONG, float, float, float*, BLASLONG);
int (*cimatcopy_k_ct) (BLASLONG, BLASLONG, float, float, float*, BLASLONG);
int (*cimatcopy_k_rn) (BLASLONG, BLASLONG, float, float, float*, BLASLONG);
int (*cimatcopy_k_rt) (BLASLONG, BLASLONG, float, float, float*, BLASLONG);
int (*cimatcopy_k_cnc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG);
int (*cimatcopy_k_ctc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG);
int (*cimatcopy_k_rnc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG);
int (*cimatcopy_k_rtc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG);
int (*zimatcopy_k_cn) (BLASLONG, BLASLONG, double, double, double*, BLASLONG);
int (*zimatcopy_k_ct) (BLASLONG, BLASLONG, double, double, double*, BLASLONG);
int (*zimatcopy_k_rn) (BLASLONG, BLASLONG, double, double, double*, BLASLONG);
int (*zimatcopy_k_rt) (BLASLONG, BLASLONG, double, double, double*, BLASLONG);
int (*zimatcopy_k_cnc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG); int (*zimatcopy_k_cnc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG);
int (*zimatcopy_k_ctc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG); int (*zimatcopy_k_ctc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG);
int (*zimatcopy_k_rnc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG); int (*zimatcopy_k_rnc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG);

View File

@ -56,11 +56,23 @@ static void __inline blas_lock(volatile BLASULONG *address){
do { do {
while (*address) {YIELDING;}; while (*address) {YIELDING;};
#if defined(_MSC_VER) && !defined(__clang__)
// use intrinsic instead of inline assembly
ret = _InterlockedExchange(address, 1);
// inline assembly
/*__asm {
mov eax, address
mov ebx, 1
xchg [eax], ebx
mov ret, ebx
}*/
#else
__asm__ __volatile__( __asm__ __volatile__(
"xchgl %0, %1\n" "xchgl %0, %1\n"
: "=r"(ret), "=m"(*address) : "=r"(ret), "=m"(*address)
: "0"(1), "m"(*address) : "0"(1), "m"(*address)
: "memory"); : "memory");
#endif
} while (ret); } while (ret);
@ -68,31 +80,43 @@ static void __inline blas_lock(volatile BLASULONG *address){
#define BLAS_LOCK_DEFINED #define BLAS_LOCK_DEFINED
static __inline unsigned long long rpcc(void){ static __inline unsigned long long rpcc(void){
#if defined(_MSC_VER) && !defined(__clang__)
return __rdtsc(); // use MSVC intrinsic
#else
unsigned int a, d; unsigned int a, d;
__asm__ __volatile__ ("rdtsc" : "=a" (a), "=d" (d)); __asm__ __volatile__ ("rdtsc" : "=a" (a), "=d" (d));
return ((unsigned long long)a + ((unsigned long long)d << 32)); return ((unsigned long long)a + ((unsigned long long)d << 32));
#endif
}; };
#define RPCC_DEFINED #define RPCC_DEFINED
static __inline unsigned long getstackaddr(void){ static __inline unsigned long getstackaddr(void){
#if defined(_MSC_VER) && !defined(__clang__)
return (unsigned long)_ReturnAddress(); // use MSVC intrinsic
#else
unsigned long addr; unsigned long addr;
__asm__ __volatile__ ("mov %%esp, %0" __asm__ __volatile__ ("mov %%esp, %0"
: "=r"(addr) : : "memory"); : "=r"(addr) : : "memory");
return addr; return addr;
#endif
}; };
static __inline long double sqrt_long(long double val) { static __inline long double sqrt_long(long double val) {
#if defined(_MSC_VER) && !defined(__clang__)
return sqrt(val); // not sure if this will use fsqrt
#else
long double result; long double result;
__asm__ __volatile__ ("fldt %1\n" __asm__ __volatile__ ("fldt %1\n"
"fsqrt\n" "fsqrt\n"
"fstpt %0\n" : "=m" (result) : "m"(val)); "fstpt %0\n" : "=m" (result) : "m"(val));
return result; return result;
#endif
} }
#define SQRT(a) sqrt_long(a) #define SQRT(a) sqrt_long(a)
@ -102,7 +126,7 @@ void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx);
#define WHEREAMI #define WHEREAMI
static inline int WhereAmI(void){ static __inline int WhereAmI(void){
int eax, ebx, ecx, edx; int eax, ebx, ecx, edx;
int apicid; int apicid;
@ -148,9 +172,14 @@ static __inline int blas_quickdivide(unsigned int x, unsigned int y){
y = blas_quick_divide_table[y]; y = blas_quick_divide_table[y];
#if defined(_MSC_VER) && !defined(__clang__)
(void*)result;
return x*y;
#else
__asm__ __volatile__ ("mull %0" :"=d" (result) :"a"(x), "0" (y)); __asm__ __volatile__ ("mull %0" :"=d" (result) :"a"(x), "0" (y));
return result; return result;
#endif
} }
#endif #endif
@ -286,8 +315,12 @@ REALNAME:
#define PROFCODE #define PROFCODE
#ifdef __clang__
#define EPILOGUE .end
#else
#define EPILOGUE .end REALNAME #define EPILOGUE .end REALNAME
#endif #endif
#endif
#if defined(OS_LINUX) || defined(OS_FREEBSD) || defined(OS_NETBSD) || defined(__ELF__) #if defined(OS_LINUX) || defined(OS_FREEBSD) || defined(OS_NETBSD) || defined(__ELF__)
#define PROLOGUE \ #define PROLOGUE \

View File

@ -41,6 +41,10 @@
#ifndef ASSEMBLER #ifndef ASSEMBLER
#ifdef C_MSVC
#include <intrin.h>
#endif
#ifdef C_SUN #ifdef C_SUN
#define __asm__ __asm #define __asm__ __asm
#define __volatile__ #define __volatile__
@ -61,32 +65,45 @@
static void __inline blas_lock(volatile BLASULONG *address){ static void __inline blas_lock(volatile BLASULONG *address){
#ifndef C_MSVC
int ret; int ret;
#else
BLASULONG ret;
#endif
do { do {
while (*address) {YIELDING;}; while (*address) {YIELDING;};
#ifndef C_MSVC
__asm__ __volatile__( __asm__ __volatile__(
"xchgl %0, %1\n" "xchgl %0, %1\n"
: "=r"(ret), "=m"(*address) : "=r"(ret), "=m"(*address)
: "0"(1), "m"(*address) : "0"(1), "m"(*address)
: "memory"); : "memory");
#else
ret=InterlockedExchange64((volatile LONG64 *)(address), 1);
#endif
} while (ret); } while (ret);
} }
#define BLAS_LOCK_DEFINED #define BLAS_LOCK_DEFINED
static __inline BLASULONG rpcc(void){ static __inline BLASULONG rpcc(void){
#ifdef C_MSVC
return __rdtsc();
#else
BLASULONG a, d; BLASULONG a, d;
__asm__ __volatile__ ("rdtsc" : "=a" (a), "=d" (d)); __asm__ __volatile__ ("rdtsc" : "=a" (a), "=d" (d));
return ((BLASULONG)a + ((BLASULONG)d << 32)); return ((BLASULONG)a + ((BLASULONG)d << 32));
#endif
} }
#define RPCC_DEFINED #define RPCC_DEFINED
#define RPCC64BIT #define RPCC64BIT
#ifndef C_MSVC
static __inline BLASULONG getstackaddr(void){ static __inline BLASULONG getstackaddr(void){
BLASULONG addr; BLASULONG addr;
@ -95,22 +112,32 @@ static __inline BLASULONG getstackaddr(void){
return addr; return addr;
} }
#endif
static __inline void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx){ static __inline void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx){
#ifdef C_MSVC
int cpuinfo[4];
__cpuid(cpuinfo, op);
*eax=cpuinfo[0];
*ebx=cpuinfo[1];
*ecx=cpuinfo[2];
*edx=cpuinfo[3];
#else
__asm__ __volatile__("cpuid" __asm__ __volatile__("cpuid"
: "=a" (*eax), : "=a" (*eax),
"=b" (*ebx), "=b" (*ebx),
"=c" (*ecx), "=c" (*ecx),
"=d" (*edx) "=d" (*edx)
: "0" (op)); : "0" (op));
#endif
} }
/* /*
#define WHEREAMI #define WHEREAMI
*/ */
static inline int WhereAmI(void){ static __inline int WhereAmI(void){
int eax, ebx, ecx, edx; int eax, ebx, ecx, edx;
int apicid; int apicid;
@ -152,10 +179,14 @@ static inline int WhereAmI(void){
#define GET_IMAGE_CANCEL #define GET_IMAGE_CANCEL
#ifdef SMP #ifdef SMP
#ifdef USE64BITINT #if defined(USE64BITINT)
static __inline blasint blas_quickdivide(blasint x, blasint y){ static __inline blasint blas_quickdivide(blasint x, blasint y){
return x / y; return x / y;
} }
#elif defined (C_MSVC)
static __inline BLASLONG blas_quickdivide(BLASLONG x, BLASLONG y){
return x / y;
}
#else #else
extern unsigned int blas_quick_divide_table[]; extern unsigned int blas_quick_divide_table[];

View File

@ -39,6 +39,10 @@
#ifndef CPUID_H #ifndef CPUID_H
#define CPUID_H #define CPUID_H
#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64)
#define INTEL_AMD
#endif
#define VENDOR_INTEL 1 #define VENDOR_INTEL 1
#define VENDOR_UMC 2 #define VENDOR_UMC 2
#define VENDOR_AMD 3 #define VENDOR_AMD 3
@ -59,7 +63,7 @@
#define FAMILY_PM 7 #define FAMILY_PM 7
#define FAMILY_IA64 8 #define FAMILY_IA64 8
#if defined(__i386__) || defined(__x86_64__) #ifdef INTEL_AMD
#define GET_EXFAMILY 1 #define GET_EXFAMILY 1
#define GET_EXMODEL 2 #define GET_EXMODEL 2
#define GET_TYPE 3 #define GET_TYPE 3

View File

@ -40,6 +40,12 @@
#include <string.h> #include <string.h>
#include "cpuid.h" #include "cpuid.h"
#if defined(_MSC_VER) && !defined(__clang__)
#define C_INLINE __inline
#else
#define C_INLINE inline
#endif
/* /*
#ifdef NO_AVX #ifdef NO_AVX
#define CPUTYPE_HASWELL CPUTYPE_NEHALEM #define CPUTYPE_HASWELL CPUTYPE_NEHALEM
@ -53,12 +59,26 @@
#endif #endif
*/ */
#if defined(_MSC_VER) && !defined(__clang__)
void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx)
{
int cpuInfo[4] = {-1};
__cpuid(cpuInfo, op);
*eax = cpuInfo[0];
*ebx = cpuInfo[1];
*ecx = cpuInfo[2];
*edx = cpuInfo[3];
}
#else
#ifndef CPUIDEMU #ifndef CPUIDEMU
#if defined(__APPLE__) && defined(__i386__) #if defined(__APPLE__) && defined(__i386__)
void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx); void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx);
#else #else
static inline void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx){ static C_INLINE void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx){
#if defined(__i386__) && defined(__PIC__) #if defined(__i386__) && defined(__PIC__)
__asm__ __volatile__ __asm__ __volatile__
("mov %%ebx, %%edi;" ("mov %%ebx, %%edi;"
@ -115,14 +135,16 @@ void cpuid(unsigned int op, unsigned int *eax, unsigned int *ebx, unsigned int *
#endif #endif
static inline int have_cpuid(void){ #endif // _MSC_VER
static C_INLINE int have_cpuid(void){
int eax, ebx, ecx, edx; int eax, ebx, ecx, edx;
cpuid(0, &eax, &ebx, &ecx, &edx); cpuid(0, &eax, &ebx, &ecx, &edx);
return eax; return eax;
} }
static inline int have_excpuid(void){ static C_INLINE int have_excpuid(void){
int eax, ebx, ecx, edx; int eax, ebx, ecx, edx;
cpuid(0x80000000, &eax, &ebx, &ecx, &edx); cpuid(0x80000000, &eax, &ebx, &ecx, &edx);
@ -130,10 +152,14 @@ static inline int have_excpuid(void){
} }
#ifndef NO_AVX #ifndef NO_AVX
static inline void xgetbv(int op, int * eax, int * edx){ static C_INLINE void xgetbv(int op, int * eax, int * edx){
//Use binary code for xgetbv //Use binary code for xgetbv
#if defined(_MSC_VER) && !defined(__clang__)
*eax = __xgetbv(op);
#else
__asm__ __volatile__ __asm__ __volatile__
(".byte 0x0f, 0x01, 0xd0": "=a" (*eax), "=d" (*edx) : "c" (op) : "cc"); (".byte 0x0f, 0x01, 0xd0": "=a" (*eax), "=d" (*edx) : "c" (op) : "cc");
#endif
} }
#endif #endif

46
ctest/CMakeLists.txt Normal file
View File

@ -0,0 +1,46 @@
include_directories(${CMAKE_SOURCE_DIR})
enable_language(Fortran)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DADD${BU} -DCBLAS")
FILE(WRITE ${CMAKE_CURRENT_BINARY_DIR}/test_cblas_helper.sh
"$1 < $2\n"
)
foreach(float_type ${FLOAT_TYPES})
string(SUBSTRING ${float_type} 0 1 float_char_upper)
string(TOLOWER ${float_char_upper} float_char)
#level1
add_executable(x${float_char}cblat1
c_${float_char}blat1.f
c_${float_char}blas1.c)
target_link_libraries(x${float_char}cblat1 ${OpenBLAS_LIBNAME}_static)
add_test(NAME "x${float_char}cblat1"
COMMAND "${CMAKE_CURRENT_BINARY_DIR}/x${float_char}cblat1")
#level2
add_executable(x${float_char}cblat2
c_${float_char}blat2.f
c_${float_char}blas2.c
c_${float_char}2chke.c
auxiliary.c
c_xerbla.c
constant.c)
target_link_libraries(x${float_char}cblat2 ${OpenBLAS_LIBNAME}_static)
add_test(NAME "x${float_char}cblat2"
COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_cblas_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/x${float_char}cblat2" "${PROJECT_SOURCE_DIR}/ctest/${float_char}in2")
#level3
add_executable(x${float_char}cblat3
c_${float_char}blat3.f
c_${float_char}blas3.c
c_${float_char}3chke.c
auxiliary.c
c_xerbla.c
constant.c)
target_link_libraries(x${float_char}cblat3 ${OpenBLAS_LIBNAME}_static)
add_test(NAME "x${float_char}cblat3"
COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_cblas_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/x${float_char}cblat3" "${PROJECT_SOURCE_DIR}/ctest/${float_char}in3")
endforeach()

View File

@ -0,0 +1,203 @@
include_directories(${CMAKE_SOURCE_DIR})
# sources that need to be compiled twice, once with no flags and once with LOWER
set(UL_SOURCES
sbmv_k.c
spmv_k.c
spr_k.c
spr2_k.c
syr_k.c
syr2_k.c
)
# sources that need to be compiled several times, for UNIT, TRANSA
set(U_SOURCES
trmv_U.c
tbmv_U.c
tbsv_U.c
tpmv_U.c
tpsv_U.c
trsv_U.c
)
set(L_SOURCES
trmv_L.c
tbmv_L.c
tbsv_L.c
tpmv_L.c
tpsv_L.c
trsv_L.c
)
set(UL_SMP_SOURCES
symv_thread.c
syr_thread.c
syr2_thread.c
spr_thread.c
spr2_thread.c
spmv_thread.c
sbmv_thread.c
)
set(NU_SMP_SOURCES
trmv_thread.c
tpmv_thread.c
tbmv_thread.c
)
set(ULVM_COMPLEX_SOURCES
hbmv_k.c
hpmv_k.c
hpr_k.c
hpr2_k.c
her_k.c
her2_k.c
)
# objects that need LOWER set
GenerateCombinationObjects("${UL_SOURCES}" "LOWER" "U" "" 1 "" "" 3)
# gbmv uses a lowercase n and t
GenerateNamedObjects("gbmv_k.c" "" "gbmv_n" false "" "" "" 3)
GenerateNamedObjects("gbmv_k.c" "TRANS" "gbmv_t" false "" "" "" 3)
# c/zgbmv
GenerateNamedObjects("zgbmv_k.c" "CONJ" "gbmv_r" false "" "" "" 2)
GenerateNamedObjects("zgbmv_k.c" "TRANS;CONJ" "gbmv_c" false "" "" "" 2)
GenerateNamedObjects("zgbmv_k.c" "XCONJ" "gbmv_o" false "" "" "" 2)
GenerateNamedObjects("zgbmv_k.c" "TRANS;XCONJ" "gbmv_u" false "" "" "" 2)
GenerateNamedObjects("zgbmv_k.c" "CONJ;XCONJ" "gbmv_s" false "" "" "" 2)
GenerateNamedObjects("zgbmv_k.c" "TRANS;CONJ;XCONJ" "gbmv_d" false "" "" "" 2)
# special defines for complex
foreach (float_type ${FLOAT_TYPES})
if (SMP)
GenerateNamedObjects("gemv_thread.c" "" "gemv_thread_n" false "" "" false ${float_type})
GenerateNamedObjects("gemv_thread.c" "TRANSA" "gemv_thread_t" false "" "" false ${float_type})
GenerateNamedObjects("gbmv_thread.c" "" "gbmv_thread_n" false "" "" false ${float_type})
GenerateNamedObjects("gbmv_thread.c" "TRANSA" "gbmv_thread_t" false "" "" false ${float_type})
endif ()
if (${float_type} STREQUAL "COMPLEX" OR ${float_type} STREQUAL "ZCOMPLEX")
foreach (u_source ${U_SOURCES})
string(REGEX MATCH "[a-z]+" op_name ${u_source})
GenerateCombinationObjects("z${u_source}" "UNIT" "N" "TRANSA=1" 0 "${op_name}_NU" false ${float_type})
GenerateCombinationObjects("z${u_source}" "UNIT" "N" "TRANSA=2" 0 "${op_name}_TL" false ${float_type})
GenerateCombinationObjects("z${u_source}" "UNIT" "N" "TRANSA=3" 0 "${op_name}_RU" false ${float_type})
GenerateCombinationObjects("z${u_source}" "UNIT" "N" "TRANSA=4" 0 "${op_name}_CL" false ${float_type})
endforeach ()
foreach (l_source ${L_SOURCES})
string(REGEX MATCH "[a-z]+" op_name ${l_source})
GenerateCombinationObjects("z${l_source}" "UNIT" "N" "TRANSA=1" 0 "${op_name}_NL" false ${float_type})
GenerateCombinationObjects("z${l_source}" "UNIT" "N" "TRANSA=2" 0 "${op_name}_TU" false ${float_type})
GenerateCombinationObjects("z${l_source}" "UNIT" "N" "TRANSA=3" 0 "${op_name}_RL" false ${float_type})
GenerateCombinationObjects("z${l_source}" "UNIT" "N" "TRANSA=4" 0 "${op_name}_CU" false ${float_type})
endforeach ()
foreach (ulvm_source ${ULVM_COMPLEX_SOURCES})
string(REGEX MATCH "[a-z0-9]+" op_name ${ulvm_source})
GenerateNamedObjects("z${ulvm_source}" "" "${op_name}_U" false "" "" false ${float_type})
GenerateNamedObjects("z${ulvm_source}" "LOWER" "${op_name}_L" false "" "" false ${float_type})
GenerateNamedObjects("z${ulvm_source}" "HEMVREV" "${op_name}_V" false "" "" false ${float_type})
GenerateNamedObjects("z${ulvm_source}" "LOWER;HEMVREV" "${op_name}_M" false "" "" false ${float_type})
endforeach()
if (SMP)
GenerateNamedObjects("gemv_thread.c" "CONJ" "gemv_thread_r" false "" "" false ${float_type})
GenerateNamedObjects("gemv_thread.c" "CONJ;TRANSA" "gemv_thread_c" false "" "" false ${float_type})
GenerateNamedObjects("gemv_thread.c" "XCONJ" "gemv_thread_o" false "" "" false ${float_type})
GenerateNamedObjects("gemv_thread.c" "XCONJ;TRANSA" "gemv_thread_u" false "" "" false ${float_type})
GenerateNamedObjects("gemv_thread.c" "XCONJ;CONJ" "gemv_thread_s" false "" "" false ${float_type})
GenerateNamedObjects("gemv_thread.c" "XCONJ;CONJ;TRANSA" "gemv_thread_d" false "" "" false ${float_type})
GenerateNamedObjects("gbmv_thread.c" "CONJ" "gbmv_thread_r" false "" "" false ${float_type})
GenerateNamedObjects("gbmv_thread.c" "CONJ;TRANSA" "gbmv_thread_c" false "" "" false ${float_type})
GenerateNamedObjects("gbmv_thread.c" "XCONJ" "gbmv_thread_o" false "" "" false ${float_type})
GenerateNamedObjects("gbmv_thread.c" "XCONJ;TRANSA" "gbmv_thread_u" false "" "" false ${float_type})
GenerateNamedObjects("gbmv_thread.c" "XCONJ;CONJ" "gbmv_thread_s" false "" "" false ${float_type})
GenerateNamedObjects("gbmv_thread.c" "XCONJ;CONJ;TRANSA" "gbmv_thread_d" false "" "" false ${float_type})
GenerateNamedObjects("ger_thread.c" "" "ger_thread_U" false "" "" false ${float_type})
GenerateNamedObjects("ger_thread.c" "CONJ" "ger_thread_C" false "" "" false ${float_type})
GenerateNamedObjects("ger_thread.c" "XCONJ" "ger_thread_V" false "" "" false ${float_type})
GenerateNamedObjects("ger_thread.c" "XCONJ;CONJ" "ger_thread_D" false "" "" false ${float_type})
GenerateNamedObjects("sbmv_thread.c" "HEMV" "hbmv_thread_U" false "" "" false ${float_type})
GenerateNamedObjects("sbmv_thread.c" "HEMV;LOWER" "hbmv_thread_L" false "" "" false ${float_type})
GenerateNamedObjects("sbmv_thread.c" "HEMVREV" "hbmv_thread_V" false "" "" false ${float_type})
GenerateNamedObjects("sbmv_thread.c" "LOWER;HEMVREV" "hbmv_thread_M" false "" "" false ${float_type})
GenerateNamedObjects("spmv_thread.c" "HEMV" "hpmv_thread_U" false "" "" false ${float_type})
GenerateNamedObjects("spmv_thread.c" "HEMV;LOWER" "hpmv_thread_L" false "" "" false ${float_type})
GenerateNamedObjects("spmv_thread.c" "HEMVREV" "hpmv_thread_V" false "" "" false ${float_type})
GenerateNamedObjects("spmv_thread.c" "LOWER;HEMVREV" "hpmv_thread_M" false "" "" false ${float_type})
GenerateNamedObjects("spr_thread.c" "HEMV" "hpr_thread_U" false "" "" false ${float_type})
GenerateNamedObjects("spr_thread.c" "HEMV;LOWER" "hpr_thread_L" false "" "" false ${float_type})
GenerateNamedObjects("spr_thread.c" "HEMVREV" "hpr_thread_V" false "" "" false ${float_type})
GenerateNamedObjects("spr_thread.c" "LOWER;HEMVREV" "hpr_thread_M" false "" "" false ${float_type})
GenerateNamedObjects("spr2_thread.c" "HEMV" "hpr2_thread_U" false "" "" false ${float_type})
GenerateNamedObjects("spr2_thread.c" "HEMV;LOWER" "hpr2_thread_L" false "" "" false ${float_type})
GenerateNamedObjects("spr2_thread.c" "HEMVREV" "hpr2_thread_V" false "" "" false ${float_type})
GenerateNamedObjects("spr2_thread.c" "LOWER;HEMVREV" "hpr2_thread_M" false "" "" false ${float_type})
GenerateNamedObjects("symv_thread.c" "HEMV" "hemv_thread_U" false "" "" false ${float_type})
GenerateNamedObjects("symv_thread.c" "HEMV;LOWER" "hemv_thread_L" false "" "" false ${float_type})
GenerateNamedObjects("symv_thread.c" "HEMVREV" "hemv_thread_V" false "" "" false ${float_type})
GenerateNamedObjects("symv_thread.c" "LOWER;HEMVREV" "hemv_thread_M" false "" "" false ${float_type})
GenerateNamedObjects("syr_thread.c" "HER" "her_thread_U" false "" "" false ${float_type})
GenerateNamedObjects("syr_thread.c" "HER;LOWER" "her_thread_L" false "" "" false ${float_type})
GenerateNamedObjects("syr_thread.c" "HERREV" "her_thread_V" false "" "" false ${float_type})
GenerateNamedObjects("syr_thread.c" "LOWER;HERREV" "her_thread_M" false "" "" false ${float_type})
GenerateNamedObjects("syr2_thread.c" "HER" "her2_thread_U" false "" "" false ${float_type})
GenerateNamedObjects("syr2_thread.c" "HER;LOWER" "her2_thread_L" false "" "" false ${float_type})
GenerateNamedObjects("syr2_thread.c" "HERREV" "her2_thread_V" false "" "" false ${float_type})
GenerateNamedObjects("syr2_thread.c" "LOWER;HERREV" "her2_thread_M" false "" "" false ${float_type})
foreach (nu_smp_src ${NU_SMP_SOURCES})
string(REGEX MATCH "[a-z]+_[a-z]+" op_name ${nu_smp_src})
GenerateCombinationObjects("${nu_smp_src}" "LOWER;UNIT" "U;N" "TRANSA=1" 0 "${op_name}_N" false ${float_type})
GenerateCombinationObjects("${nu_smp_src}" "LOWER;UNIT" "U;N" "TRANSA=2" 0 "${op_name}_T" false ${float_type})
GenerateCombinationObjects("${nu_smp_src}" "LOWER;UNIT" "U;N" "TRANSA=3" 0 "${op_name}_R" false ${float_type})
GenerateCombinationObjects("${nu_smp_src}" "LOWER;UNIT" "U;N" "TRANSA=4" 0 "${op_name}_C" false ${float_type})
endforeach ()
endif ()
else ()
# For real number functions
foreach (u_source ${U_SOURCES})
string(REGEX MATCH "[a-z]+" op_name ${u_source})
GenerateCombinationObjects("${u_source}" "UNIT" "N" "" 0 "${op_name}_NU" false ${float_type})
GenerateCombinationObjects("${u_source}" "UNIT" "N" "TRANSA" 0 "${op_name}_TL" false ${float_type})
endforeach ()
foreach (l_source ${L_SOURCES})
string(REGEX MATCH "[a-z]+" op_name ${l_source})
GenerateCombinationObjects("${l_source}" "UNIT" "N" "" 0 "${op_name}_NL" false ${float_type})
GenerateCombinationObjects("${l_source}" "UNIT" "N" "TRANSA" 0 "${op_name}_TU" false ${float_type})
endforeach ()
if (SMP)
GenerateNamedObjects("ger_thread.c" "" "" false "" "" false ${float_type})
foreach(nu_smp_source ${NU_SMP_SOURCES})
string(REGEX MATCH "[a-z]+_[a-z]+" op_name ${nu_smp_source})
GenerateCombinationObjects("${nu_smp_source}" "LOWER;UNIT" "U;N" "" 0 "${op_name}_N" false ${float_type})
GenerateCombinationObjects("${nu_smp_source}" "LOWER;UNIT" "U;N" "TRANSA" 0 "${op_name}_T" false ${float_type})
endforeach()
endif ()
endif ()
endforeach ()
if (SMP)
GenerateCombinationObjects("${UL_SMP_SOURCES}" "LOWER" "U" "" 2)
endif ()
add_library(driver_level2 OBJECT ${OPENBLAS_SRC})

View File

@ -64,7 +64,7 @@ static int gbmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F
#ifndef COMPLEX #ifndef COMPLEX
FLOAT result; FLOAT result;
#else #else
FLOAT _Complex result; OPENBLAS_COMPLEX_FLOAT result;
#endif #endif
#endif #endif

View File

@ -60,7 +60,7 @@ static int sbmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F
#ifndef COMPLEX #ifndef COMPLEX
FLOAT result; FLOAT result;
#else #else
FLOAT _Complex result; OPENBLAS_COMPLEX_FLOAT result;
#endif #endif
a = (FLOAT *)args -> a; a = (FLOAT *)args -> a;

View File

@ -60,7 +60,7 @@ static int spmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F
#ifndef COMPLEX #ifndef COMPLEX
FLOAT result; FLOAT result;
#else #else
FLOAT _Complex result; OPENBLAS_COMPLEX_FLOAT result;
#endif #endif
a = (FLOAT *)args -> a; a = (FLOAT *)args -> a;

View File

@ -76,7 +76,7 @@ static int trmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F
#ifndef COMPLEX #ifndef COMPLEX
FLOAT result; FLOAT result;
#else #else
FLOAT _Complex result; OPENBLAS_COMPLEX_FLOAT result;
#endif #endif
#endif #endif

View File

@ -81,7 +81,7 @@ static int tpmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F
#ifndef COMPLEX #ifndef COMPLEX
FLOAT result; FLOAT result;
#else #else
FLOAT _Complex result; OPENBLAS_COMPLEX_FLOAT result;
#endif #endif
#endif #endif

View File

@ -87,7 +87,7 @@ static int trmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F
#ifndef COMPLEX #ifndef COMPLEX
FLOAT result; FLOAT result;
#else #else
FLOAT _Complex result; OPENBLAS_COMPLEX_FLOAT result;
#endif #endif
#endif #endif

View File

@ -77,7 +77,7 @@ void CNAME(BLASLONG m, BLASLONG n, BLASLONG ku, BLASLONG kl, FLOAT alpha_r, FLOA
FLOAT *bufferY = gemvbuffer; FLOAT *bufferY = gemvbuffer;
FLOAT *bufferX = gemvbuffer; FLOAT *bufferX = gemvbuffer;
#ifdef TRANS #ifdef TRANS
FLOAT _Complex temp; OPENBLAS_COMPLEX_FLOAT temp;
#endif #endif
if (incy != 1) { if (incy != 1) {

View File

@ -56,6 +56,8 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i,
FLOAT *bufferX = sbmvbuffer; FLOAT *bufferX = sbmvbuffer;
FLOAT temp[2]; FLOAT temp[2];
OPENBLAS_COMPLEX_FLOAT result;
if (incy != 1) { if (incy != 1) {
Y = bufferY; Y = bufferY;
bufferX = (FLOAT *)(((BLASLONG)bufferY + n * sizeof(FLOAT) * COMPSIZE + 4095) & ~4095); bufferX = (FLOAT *)(((BLASLONG)bufferY + n * sizeof(FLOAT) * COMPSIZE + 4095) & ~4095);
@ -93,7 +95,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i,
Y[i * 2 + 1] += alpha_r * temp[1] + alpha_i * temp[0]; Y[i * 2 + 1] += alpha_r * temp[1] + alpha_i * temp[0];
if (length > 0) { if (length > 0) {
FLOAT _Complex result = DOTC_K(length, a + offset * COMPSIZE, 1, X + (i - length) * COMPSIZE, 1); result = DOTC_K(length, a + offset * COMPSIZE, 1, X + (i - length) * COMPSIZE, 1);
Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result); Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result);
Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result); Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result);
@ -118,7 +120,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i,
Y[i * 2 + 1] += alpha_r * temp[1] + alpha_i * temp[0]; Y[i * 2 + 1] += alpha_r * temp[1] + alpha_i * temp[0];
if (length > 0) { if (length > 0) {
FLOAT _Complex result = DOTC_K(length, a + COMPSIZE, 1, X + (i + 1) * COMPSIZE, 1); result = DOTC_K(length, a + COMPSIZE, 1, X + (i + 1) * COMPSIZE, 1);
Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result); Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result);
Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result); Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result);
@ -143,7 +145,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i,
Y[i * 2 + 1] += alpha_r * temp[1] + alpha_i * temp[0]; Y[i * 2 + 1] += alpha_r * temp[1] + alpha_i * temp[0];
if (length > 0) { if (length > 0) {
FLOAT _Complex result = DOTU_K(length, a + offset * COMPSIZE, 1, X + (i - length) * COMPSIZE, 1); result = DOTU_K(length, a + offset * COMPSIZE, 1, X + (i - length) * COMPSIZE, 1);
Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result); Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result);
Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result); Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result);
@ -168,7 +170,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i,
Y[i * 2 + 1] += alpha_r * temp[1] + alpha_i * temp[0]; Y[i * 2 + 1] += alpha_r * temp[1] + alpha_i * temp[0];
if (length > 0) { if (length > 0) {
FLOAT _Complex result = DOTU_K(length, a + COMPSIZE, 1, X + (i + 1) * COMPSIZE, 1); result = DOTU_K(length, a + COMPSIZE, 1, X + (i + 1) * COMPSIZE, 1);
Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result); Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result);
Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result); Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result);

View File

@ -51,6 +51,8 @@ int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT alpha_i,
FLOAT *bufferX = gemvbuffer; FLOAT *bufferX = gemvbuffer;
FLOAT temp[2]; FLOAT temp[2];
OPENBLAS_COMPLEX_FLOAT result;
if (incy != 1) { if (incy != 1) {
Y = bufferY; Y = bufferY;
bufferX = (FLOAT *)(((BLASLONG)bufferY + m * sizeof(FLOAT) * 2 + 4095) & ~4095); bufferX = (FLOAT *)(((BLASLONG)bufferY + m * sizeof(FLOAT) * 2 + 4095) & ~4095);
@ -69,7 +71,7 @@ int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT alpha_i,
#ifndef HEMVREV #ifndef HEMVREV
#ifndef LOWER #ifndef LOWER
if (i > 0) { if (i > 0) {
FLOAT _Complex result = DOTC_K(i, a, 1, X, 1); result = DOTC_K(i, a, 1, X, 1);
Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result); Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result);
Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result); Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result);
@ -93,7 +95,7 @@ int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT alpha_i,
#else #else
if (m - i > 1) { if (m - i > 1) {
FLOAT _Complex result = DOTC_K(m - i - 1, a + (i + 1) * 2, 1, X + (i + 1) * 2, 1); result = DOTC_K(m - i - 1, a + (i + 1) * 2, 1, X + (i + 1) * 2, 1);
Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result); Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result);
Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result); Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result);
@ -118,7 +120,7 @@ int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT alpha_i,
#else #else
#ifndef LOWER #ifndef LOWER
if (i > 0) { if (i > 0) {
FLOAT _Complex result = DOTU_K(i, a, 1, X, 1); result = DOTU_K(i, a, 1, X, 1);
Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result); Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result);
Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result); Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result);
@ -142,7 +144,7 @@ int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT alpha_i,
#else #else
if (m - i > 1) { if (m - i > 1) {
FLOAT _Complex result = DOTU_K(m - i - 1, a + (i + 1) * 2, 1, X + (i + 1) * 2, 1); result = DOTU_K(m - i - 1, a + (i + 1) * 2, 1, X + (i + 1) * 2, 1);
Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result); Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result);
Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result); Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result);

View File

@ -55,6 +55,8 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i,
FLOAT *bufferY = sbmvbuffer; FLOAT *bufferY = sbmvbuffer;
FLOAT *bufferX = sbmvbuffer; FLOAT *bufferX = sbmvbuffer;
OPENBLAS_COMPLEX_FLOAT result;
if (incy != 1) { if (incy != 1) {
Y = bufferY; Y = bufferY;
bufferX = (FLOAT *)(((BLASLONG)bufferY + n * sizeof(FLOAT) * COMPSIZE + 4095) & ~4095); bufferX = (FLOAT *)(((BLASLONG)bufferY + n * sizeof(FLOAT) * COMPSIZE + 4095) & ~4095);
@ -83,7 +85,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i,
a + offset * COMPSIZE, 1, Y + (i - length) * COMPSIZE, 1, NULL, 0); a + offset * COMPSIZE, 1, Y + (i - length) * COMPSIZE, 1, NULL, 0);
if (length > 0) { if (length > 0) {
FLOAT _Complex result = DOTU_K(length, a + offset * COMPSIZE, 1, X + (i - length) * COMPSIZE, 1); result = DOTU_K(length, a + offset * COMPSIZE, 1, X + (i - length) * COMPSIZE, 1);
Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result); Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result);
Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result); Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result);
@ -100,7 +102,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i,
a, 1, Y + i * COMPSIZE, 1, NULL, 0); a, 1, Y + i * COMPSIZE, 1, NULL, 0);
if (length > 0) { if (length > 0) {
FLOAT _Complex result = DOTU_K(length, a + COMPSIZE, 1, X + (i + 1) * COMPSIZE, 1); result = DOTU_K(length, a + COMPSIZE, 1, X + (i + 1) * COMPSIZE, 1);
Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result); Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result);
Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result); Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result);

View File

@ -49,7 +49,8 @@ int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT alpha_i,
FLOAT *gemvbuffer = (FLOAT *)buffer; FLOAT *gemvbuffer = (FLOAT *)buffer;
FLOAT *bufferY = gemvbuffer; FLOAT *bufferY = gemvbuffer;
FLOAT *bufferX = gemvbuffer; FLOAT *bufferX = gemvbuffer;
FLOAT _Complex result;
OPENBLAS_COMPLEX_FLOAT result;
if (incy != 1) { if (incy != 1) {
Y = bufferY; Y = bufferY;

View File

@ -49,7 +49,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG inc
FLOAT *B = b; FLOAT *B = b;
BLASLONG length; BLASLONG length;
#if (TRANSA == 2) || (TRANSA == 4) #if (TRANSA == 2) || (TRANSA == 4)
FLOAT _Complex temp; OPENBLAS_COMPLEX_FLOAT temp;
#endif #endif
#ifndef UNIT #ifndef UNIT
FLOAT atemp1, atemp2, btemp1, btemp2; FLOAT atemp1, atemp2, btemp1, btemp2;

View File

@ -49,7 +49,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG inc
FLOAT *B = b; FLOAT *B = b;
BLASLONG length; BLASLONG length;
#if (TRANSA == 2) || (TRANSA == 4) #if (TRANSA == 2) || (TRANSA == 4)
FLOAT _Complex temp; OPENBLAS_COMPLEX_FLOAT temp;
#endif #endif
#ifndef UNIT #ifndef UNIT
FLOAT atemp1, atemp2, btemp1, btemp2; FLOAT atemp1, atemp2, btemp1, btemp2;

View File

@ -49,7 +49,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG inc
FLOAT *B = b; FLOAT *B = b;
BLASLONG length; BLASLONG length;
#if (TRANSA == 2) || (TRANSA == 4) #if (TRANSA == 2) || (TRANSA == 4)
FLOAT _Complex temp; OPENBLAS_COMPLEX_FLOAT temp;
#endif #endif
#ifndef UNIT #ifndef UNIT
FLOAT ar, ai, br, bi, ratio, den; FLOAT ar, ai, br, bi, ratio, den;

View File

@ -49,7 +49,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG inc
FLOAT *B = b; FLOAT *B = b;
BLASLONG length; BLASLONG length;
#if (TRANSA == 2) || (TRANSA == 4) #if (TRANSA == 2) || (TRANSA == 4)
FLOAT _Complex temp; OPENBLAS_COMPLEX_FLOAT temp;
#endif #endif
#ifndef UNIT #ifndef UNIT
FLOAT ar, ai, br, bi, ratio, den; FLOAT ar, ai, br, bi, ratio, den;

View File

@ -44,7 +44,7 @@ int CNAME(BLASLONG m, FLOAT *a, FLOAT *b, BLASLONG incb, void *buffer){
BLASLONG i; BLASLONG i;
#if (TRANSA == 2) || (TRANSA == 4) #if (TRANSA == 2) || (TRANSA == 4)
FLOAT _Complex temp; OPENBLAS_COMPLEX_FLOAT temp;
#endif #endif
#ifndef UNIT #ifndef UNIT
FLOAT atemp1, atemp2, btemp1, btemp2; FLOAT atemp1, atemp2, btemp1, btemp2;

View File

@ -44,7 +44,7 @@ int CNAME(BLASLONG m, FLOAT *a, FLOAT *b, BLASLONG incb, void *buffer){
BLASLONG i; BLASLONG i;
#if (TRANSA == 2) || (TRANSA == 4) #if (TRANSA == 2) || (TRANSA == 4)
FLOAT _Complex temp; OPENBLAS_COMPLEX_FLOAT temp;
#endif #endif
#ifndef UNIT #ifndef UNIT
FLOAT atemp1, atemp2, btemp1, btemp2; FLOAT atemp1, atemp2, btemp1, btemp2;

View File

@ -46,7 +46,7 @@ int CNAME(BLASLONG m, FLOAT *a, FLOAT *b, BLASLONG incb, void *buffer){
BLASLONG i; BLASLONG i;
#if (TRANSA == 2) || (TRANSA == 4) #if (TRANSA == 2) || (TRANSA == 4)
FLOAT _Complex result; OPENBLAS_COMPLEX_FLOAT result;
#endif #endif
#ifndef UNIT #ifndef UNIT
FLOAT ar, ai, br, bi, ratio, den; FLOAT ar, ai, br, bi, ratio, den;

View File

@ -44,7 +44,7 @@ int CNAME(BLASLONG m, FLOAT *a, FLOAT *b, BLASLONG incb, void *buffer){
BLASLONG i; BLASLONG i;
#if (TRANSA == 2) || (TRANSA == 4) #if (TRANSA == 2) || (TRANSA == 4)
FLOAT _Complex result; OPENBLAS_COMPLEX_FLOAT result;
#endif #endif
#ifndef UNIT #ifndef UNIT
FLOAT ar, ai, br, bi, ratio, den; FLOAT ar, ai, br, bi, ratio, den;

View File

@ -46,7 +46,7 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, FLOAT *bu
BLASLONG i, is, min_i; BLASLONG i, is, min_i;
#if (TRANSA == 2) || (TRANSA == 4) #if (TRANSA == 2) || (TRANSA == 4)
FLOAT _Complex temp; OPENBLAS_COMPLEX_FLOAT temp;
#endif #endif
#ifndef UNIT #ifndef UNIT
FLOAT atemp1, atemp2, btemp1, btemp2; FLOAT atemp1, atemp2, btemp1, btemp2;

View File

@ -46,7 +46,7 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, FLOAT *bu
BLASLONG i, is, min_i; BLASLONG i, is, min_i;
#if (TRANSA == 2) || (TRANSA == 4) #if (TRANSA == 2) || (TRANSA == 4)
FLOAT _Complex temp; OPENBLAS_COMPLEX_FLOAT temp;
#endif #endif
#ifndef UNIT #ifndef UNIT
FLOAT atemp1, atemp2, btemp1, btemp2; FLOAT atemp1, atemp2, btemp1, btemp2;

View File

@ -46,7 +46,7 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buf
BLASLONG i, is, min_i; BLASLONG i, is, min_i;
#if (TRANSA == 2) || (TRANSA == 4) #if (TRANSA == 2) || (TRANSA == 4)
FLOAT _Complex result; OPENBLAS_COMPLEX_FLOAT result;
#endif #endif
#ifndef UNIT #ifndef UNIT
FLOAT ar, ai, br, bi, ratio, den; FLOAT ar, ai, br, bi, ratio, den;

View File

@ -46,7 +46,7 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buf
BLASLONG i, is, min_i; BLASLONG i, is, min_i;
#if (TRANSA == 2) || (TRANSA == 4) #if (TRANSA == 2) || (TRANSA == 4)
FLOAT _Complex result; OPENBLAS_COMPLEX_FLOAT result;
#endif #endif
#ifndef UNIT #ifndef UNIT
FLOAT ar, ai, br, bi, ratio, den; FLOAT ar, ai, br, bi, ratio, den;

View File

@ -0,0 +1,115 @@
include_directories(${CMAKE_SOURCE_DIR})
# N.B. In the original makefile there was a BLOCKS define used in the compilation of these files but I don't see any evidence of it being set anywhere. -hpa
# loop through gemm.c defines
set(GEMM_DEFINES NN NT TN TT)
set(GEMM_COMPLEX_DEFINES RN CN RT CT NR TR RR CR NC TC RC CC)
foreach (GEMM_DEFINE ${GEMM_DEFINES})
string(TOLOWER ${GEMM_DEFINE} GEMM_DEFINE_LC)
GenerateNamedObjects("gemm.c" "${GEMM_DEFINE}" "gemm_${GEMM_DEFINE_LC}" 0)
if (SMP AND NOT USE_SIMPLE_THREADED_LEVEL3)
GenerateNamedObjects("gemm.c" "${GEMM_DEFINE};THREADED_LEVEL3" "gemm_thread_${GEMM_DEFINE_LC}" 0)
endif ()
endforeach ()
set(TRMM_TRSM_SOURCES
trmm_L.c
trmm_R.c
trsm_L.c
trsm_R.c)
foreach(trmm_trsm_source ${TRMM_TRSM_SOURCES})
string(REGEX MATCH "[a-z]+_[A-Z]+" op_name ${trmm_trsm_source})
GenerateCombinationObjects("${trmm_trsm_source}" "UPPER;UNIT" "L;N" "" 0 "${op_name}N")
GenerateCombinationObjects("${trmm_trsm_source}" "UPPER;UNIT" "L;N" "TRANSA" 0 "${op_name}T")
endforeach()
GenerateCombinationObjects("symm_k.c" "RSIDE;LOWER" "L;U" "NN" 1)
GenerateCombinationObjects("syrk_k.c" "LOWER;TRANS" "U;N" "" 1)
GenerateCombinationObjects("syr2k_k.c" "LOWER;TRANS" "U;N" "" 1)
GenerateCombinationObjects("syrk_kernel.c" "LOWER" "U" "" 2)
GenerateCombinationObjects("syr2k_kernel.c" "LOWER" "U" "" 2)
if (SMP)
# N.B. these do NOT have a float type (e.g. DOUBLE) defined!
GenerateNamedObjects("gemm_thread_m.c;gemm_thread_n.c;gemm_thread_mn.c;gemm_thread_variable.c;syrk_thread.c" "" "" 0 "" "" 1)
if (NOT USE_SIMPLE_THREADED_LEVEL3)
GenerateCombinationObjects("syrk_k.c" "LOWER;TRANS" "U;N" "THREADED_LEVEL3" 2 "syrk_thread")
GenerateCombinationObjects("symm_k.c" "RSIDE;LOWER" "L;U" "THREADED_LEVEL3;NN" 2 "symm_thread")
endif ()
endif ()
foreach (float_type ${FLOAT_TYPES})
if (${float_type} STREQUAL "COMPLEX" OR ${float_type} STREQUAL "ZCOMPLEX")
GenerateCombinationObjects("zherk_kernel.c" "LOWER;CONJ" "U;N" "HERK" 2 "herk_kernel" false ${float_type})
# TRANS needs to be set/unset when CONJ is set/unset, so can't use it as a combination
GenerateCombinationObjects("zherk_k.c" "LOWER" "U" "HERK" 3 "herk_N" false ${float_type})
GenerateCombinationObjects("zherk_k.c" "LOWER" "U" "HERK;TRANS;CONJ" 3 "herk_C" false ${float_type})
GenerateCombinationObjects("zherk_k.c" "LOWER" "U" "HERK;THREADED_LEVEL3" 3 "herk_thread_N" false ${float_type})
GenerateCombinationObjects("zherk_k.c" "LOWER" "U" "HERK;THREADED_LEVEL3;TRANS;CONJ" 3 "herk_thread_C" false ${float_type})
# Need to set CONJ for trmm and trsm
GenerateCombinationObjects("trmm_L.c" "UPPER;UNIT" "L;N" "CONJ" 0 "trmm_LR" false ${float_type})
GenerateCombinationObjects("trmm_L.c" "UPPER;UNIT" "L;N" "TRANSA;CONJ" 0 "trmm_LC" false ${float_type})
GenerateCombinationObjects("trmm_R.c" "UPPER;UNIT" "L;N" "CONJ" 0 "trmm_RR" false ${float_type})
GenerateCombinationObjects("trmm_R.c" "UPPER;UNIT" "L;N" "TRANSA;CONJ" 0 "trmm_RC" false ${float_type})
GenerateCombinationObjects("trsm_L.c" "UPPER;UNIT" "L;N" "CONJ" 0 "trsm_LR" false ${float_type})
GenerateCombinationObjects("trsm_L.c" "UPPER;UNIT" "L;N" "TRANSA;CONJ" 0 "trsm_LC" false ${float_type})
GenerateCombinationObjects("trsm_R.c" "UPPER;UNIT" "L;N" "CONJ" 0 "trsm_RR" false ${float_type})
GenerateCombinationObjects("trsm_R.c" "UPPER;UNIT" "L;N" "TRANSA;CONJ" 0 "trsm_RC" false ${float_type})
#hemm
GenerateCombinationObjects("zhemm_k.c" "LOWER" "U" "NN" 0 "hemm_L" false ${float_type})
GenerateCombinationObjects("zhemm_k.c" "LOWER" "U" "NC;RSIDE" 0 "hemm_R" false ${float_type})
#her2k
GenerateCombinationObjects("zher2k_kernel.c" "LOWER;CONJ" "U;N" "" 2 "her2k_kernel" false ${float_type})
GenerateNamedObjects("zher2k_k.c" "HER2K" "her2k_UN" false "" "" false ${float_type})
GenerateNamedObjects("zher2k_k.c" "HER2K;TRANS;CONJ" "her2k_UC" false "" "" false ${float_type})
GenerateNamedObjects("zher2k_k.c" "HER2K;LOWER" "her2k_LN" false "" "" false ${float_type})
GenerateNamedObjects("zher2k_k.c" "HER2K;LOWER;TRANS;CONJ" "her2k_LC" false "" "" false ${float_type})
if (SMP AND NOT USE_SIMPLE_THREADED_LEVEL3)
#hemm
GenerateCombinationObjects("zhemm_k.c" "LOWER" "U" "NN;THREADED_LEVEL3" 0 "hemm_thread_L" false ${float_type})
GenerateCombinationObjects("zhemm_k.c" "LOWER" "U" "NC;RSIDE;THREADED_LEVEL3" 0 "hemm_thread_R" false ${float_type})
#her2k
GenerateNamedObjects("zher2k_k.c" "HER2K" "her2k_UN" false "" "" false ${float_type})
GenerateNamedObjects("zher2k_k.c" "HER2K;TRANS;CONJ" "her2k_UC" false "" "" false ${float_type})
GenerateNamedObjects("zher2k_k.c" "HER2K;LOWER" "her2k_LN" false "" "" false ${float_type})
GenerateNamedObjects("zher2k_k.c" "HER2K;LOWER;TRANS;CONJ" "her2k_LC" false "" "" false ${float_type})
endif()
# special gemm defines for complex
foreach (gemm_define ${GEMM_COMPLEX_DEFINES})
string(TOLOWER ${gemm_define} gemm_define_LC)
GenerateNamedObjects("gemm.c" "${gemm_define}" "gemm_${gemm_define_LC}" false "" "" false ${float_type})
if(USE_GEMM3M)
GenerateNamedObjects("gemm3m.c" "${gemm_define}" "gemm3m_${gemm_define_LC}" false "" "" false ${float_type})
endif()
if (SMP AND NOT USE_SIMPLE_THREADED_LEVEL3)
GenerateNamedObjects("gemm.c" "${gemm_define};THREADED_LEVEL3" "gemm_thread_${gemm_define_LC}" false "" "" false ${float_type})
if(USE_GEMM3M)
GenerateNamedObjects("gemm3m.c" "${gemm_define};THREADED_LEVEL3" "gemm3m_thread_${gemm_define_LC}" false "" "" false ${float_type})
endif()
endif ()
endforeach ()
endif ()
endforeach ()
#HPLOBJS =
# dgemm_nn.c dgemm_nt.c dgemm_tn.c dgemm_tt.c
# dtrsm_LNUU.c dtrsm_LNUN.c dtrsm_LNLU.c dtrsm_LNLN.c
# dtrsm_LTUU.c dtrsm_LTUN.c dtrsm_LTLU.c dtrsm_LTLN.c
# dtrsm_RNUU.c dtrsm_RNUN.c dtrsm_RNLU.c dtrsm_RNLN.c
# dtrsm_RTUU.c dtrsm_RTUN.c dtrsm_RTLU.c dtrsm_RTLN.c
#
#if (USE_SIMPLE_THREADED_LEVEL3)
# HPLOBJS += dgemm_thread_nn.c dgemm_thread_nt.c
# dgemm_thread_tn.c dgemm_thread_tt.c
#endif
#
add_library(driver_level3 OBJECT ${OPENBLAS_SRC})

View File

@ -47,7 +47,7 @@
#endif #endif
#endif #endif
static inline int syrk_beta(BLASLONG m_from, BLASLONG m_to, BLASLONG n_from, BLASLONG n_to, FLOAT *alpha, FLOAT *c, BLASLONG ldc) { static __inline int syrk_beta(BLASLONG m_from, BLASLONG m_to, BLASLONG n_from, BLASLONG n_to, FLOAT *alpha, FLOAT *c, BLASLONG ldc) {
BLASLONG i; BLASLONG i;

View File

@ -49,7 +49,7 @@
#endif #endif
#endif #endif
static inline int syrk_beta(BLASLONG m_from, BLASLONG m_to, BLASLONG n_from, BLASLONG n_to, FLOAT *alpha, FLOAT *c, BLASLONG ldc) { static __inline int syrk_beta(BLASLONG m_from, BLASLONG m_to, BLASLONG n_from, BLASLONG n_to, FLOAT *alpha, FLOAT *c, BLASLONG ldc) {
BLASLONG i; BLASLONG i;

View File

@ -70,6 +70,9 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
BLASLONG ls, is, js; BLASLONG ls, is, js;
BLASLONG min_l, min_i, min_j; BLASLONG min_l, min_i, min_j;
BLASLONG jjs, min_jj; BLASLONG jjs, min_jj;
#if !((!defined(UPPER) && !defined(TRANSA)) || (defined(UPPER) && defined(TRANSA)))
BLASLONG start_ls;
#endif
m = args -> m; m = args -> m;
n = args -> n; n = args -> n;
@ -226,8 +229,6 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
} }
#else #else
BLASLONG start_ls;
for(js = n; js > 0; js -= GEMM_R){ for(js = n; js > 0; js -= GEMM_R){
min_j = js; min_j = js;
if (min_j > GEMM_R) min_j = GEMM_R; if (min_j > GEMM_R) min_j = GEMM_R;

View File

@ -76,6 +76,9 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
BLASLONG ls, is, js; BLASLONG ls, is, js;
BLASLONG min_l, min_i, min_j; BLASLONG min_l, min_i, min_j;
BLASLONG jjs, min_jj; BLASLONG jjs, min_jj;
#if !((!defined(UPPER) && !defined(TRANSA)) || (defined(UPPER) && defined(TRANSA)))
BLASLONG start_is;
#endif
m = args -> m; m = args -> m;
n = args -> n; n = args -> n;
@ -178,8 +181,6 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
} }
} }
#else #else
BLASLONG start_is;
for(ls = m; ls > 0; ls -= GEMM_Q){ for(ls = m; ls > 0; ls -= GEMM_Q){
min_l = ls; min_l = ls;
if (min_l > GEMM_Q) min_l = GEMM_Q; if (min_l > GEMM_Q) min_l = GEMM_Q;

View File

@ -75,6 +75,9 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
BLASLONG ls, is, js; BLASLONG ls, is, js;
BLASLONG min_l, min_i, min_j; BLASLONG min_l, min_i, min_j;
BLASLONG jjs, min_jj; BLASLONG jjs, min_jj;
#if !((defined(UPPER) && !defined(TRANSA)) || (!defined(UPPER) && defined(TRANSA)))
BLASLONG start_ls;
#endif
m = args -> m; m = args -> m;
n = args -> n; n = args -> n;
@ -226,8 +229,6 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
} }
#else #else
BLASLONG start_ls;
for(js = n; js > 0; js -= GEMM_R){ for(js = n; js > 0; js -= GEMM_R){
min_j = js; min_j = js;
if (min_j > GEMM_R) min_j = GEMM_R; if (min_j > GEMM_R) min_j = GEMM_R;

View File

@ -0,0 +1,75 @@
include_directories(${CMAKE_SOURCE_DIR})
if (${CORE} STREQUAL "PPC440")
set(MEMORY memory_qalloc.c)
else ()
set(MEMORY memory.c)
endif ()
if (SMP)
if (USE_OPENMP)
set(BLAS_SERVER blas_server_omp.c)
elseif (${CMAKE_SYSTEM_NAME} STREQUAL "Windows")
set(BLAS_SERVER blas_server_win32.c)
endif ()
if (NOT DEFINED BLAS_SERVER)
set(BLAS_SERVER blas_server.c)
endif ()
set(SMP_SOURCES
${BLAS_SERVER}
divtable.c # TODO: Makefile has -UDOUBLE
blas_l1_thread.c
)
if (NOT NO_AFFINITY)
list(APPEND SMP_SOURCES init.c)
endif ()
endif ()
set(COMMON_SOURCES
xerbla.c
openblas_set_num_threads.c
openblas_error_handle.c
openblas_get_num_procs.c
openblas_get_num_threads.c
)
# these need to have NAME/CNAME set, so use GenerateNamedObjects, but don't use standard name mangling
GenerateNamedObjects("abs.c" "" "c_abs" 0 "" "" 1 )
GenerateNamedObjects("abs.c" "DOUBLE" "z_abs" 0 "" "" 1)
GenerateNamedObjects("openblas_get_config.c;openblas_get_parallel.c" "" "" 0 "" "" 1)
if (DYNAMIC_ARCH)
list(APPEND COMMON_SOURCES dynamic.c)
else ()
list(APPEND COMMON_SOURCES parameter.c)
endif ()
#ifdef EXPRECISION
#COMMONOBJS += x_abs.$(SUFFIX) qlamch.$(SUFFIX) qlamc3.$(SUFFIX)
#endif
#
#ifdef QUAD_PRECISION
#COMMONOBJS += addx.$(SUFFIX) mulx.$(SUFFIX)
#endif
#
#ifdef USE_CUDA
#COMMONOBJS += cuda_init.$(SUFFIX)
#endif
#
#ifdef FUNCTION_PROFILE
#COMMONOBJS += profile.$(SUFFIX)
#endif
#LIBOTHERS = libothers.$(LIBSUFFIX)
#ifeq ($(DYNAMIC_ARCH), 1)
#HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) dynamic.$(SUFFIX)
#else
#HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) parameter.$(SUFFIX)
#endif
add_library(driver_others OBJECT ${OPENBLAS_SRC} ${MEMORY} ${SMP_SOURCES} ${COMMON_SOURCES})

View File

@ -70,9 +70,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
/*********************************************************************/ /*********************************************************************/
#include "common.h" #include "common.h"
#ifdef OS_LINUX #if defined(OS_LINUX) || defined(OS_NETBSD) || defined(OS_DARWIN) || defined(OS_ANDROID)
#include <dlfcn.h> #include <dlfcn.h>
#include <signal.h>
#include <sys/resource.h> #include <sys/resource.h>
#include <sys/time.h>
#endif #endif
#ifndef likely #ifndef likely
@ -265,7 +267,7 @@ int get_node(void);
static int increased_threads = 0; static int increased_threads = 0;
static int blas_thread_server(void *arg){ static void* blas_thread_server(void *arg){
/* Thread identifier */ /* Thread identifier */
BLASLONG cpu = (BLASLONG)arg; BLASLONG cpu = (BLASLONG)arg;
@ -458,7 +460,7 @@ static int blas_thread_server(void *arg){
//pthread_exit(NULL); //pthread_exit(NULL);
return 0; return NULL;
} }
#ifdef MONITOR #ifdef MONITOR
@ -565,14 +567,23 @@ int blas_thread_init(void){
#ifdef NEED_STACKATTR #ifdef NEED_STACKATTR
ret=pthread_create(&blas_threads[i], &attr, ret=pthread_create(&blas_threads[i], &attr,
(void *)&blas_thread_server, (void *)i); &blas_thread_server, (void *)i);
#else #else
ret=pthread_create(&blas_threads[i], NULL, ret=pthread_create(&blas_threads[i], NULL,
(void *)&blas_thread_server, (void *)i); &blas_thread_server, (void *)i);
#endif #endif
if(ret!=0){ if(ret!=0){
fprintf(STDERR,"OpenBLAS: pthread_creat error in blas_thread_init function. Error code:%d\n",ret); struct rlimit rlim;
exit(1); const char *msg = strerror(ret);
fprintf(STDERR, "OpenBLAS blas_thread_init: pthread_create: %s\n", msg);
if(0 == getrlimit(RLIMIT_NPROC, &rlim)) {
fprintf(STDERR, "OpenBLAS blas_thread_init: RLIMIT_NPROC "
"%ld current, %ld max\n", (long)(rlim.rlim_cur), (long)(rlim.rlim_max));
}
if(0 != raise(SIGINT)) {
fprintf(STDERR, "OpenBLAS blas_thread_init: calling exit(3)\n");
exit(EXIT_FAILURE);
}
} }
} }
@ -832,10 +843,10 @@ void goto_set_num_threads(int num_threads) {
#ifdef NEED_STACKATTR #ifdef NEED_STACKATTR
pthread_create(&blas_threads[i], &attr, pthread_create(&blas_threads[i], &attr,
(void *)&blas_thread_server, (void *)i); &blas_thread_server, (void *)i);
#else #else
pthread_create(&blas_threads[i], NULL, pthread_create(&blas_threads[i], NULL,
(void *)&blas_thread_server, (void *)i); &blas_thread_server, (void *)i);
#endif #endif
} }

View File

@ -139,8 +139,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define BITMASK(a, b, c) ((((a) >> (b)) & (c))) #define BITMASK(a, b, c) ((((a) >> (b)) & (c)))
#if defined(_MSC_VER) && !defined(__clang__)
#define CONSTRUCTOR __cdecl
#define DESTRUCTOR __cdecl
#elif defined(OS_DARWIN) && defined(C_GCC)
#define CONSTRUCTOR __attribute__ ((constructor)) #define CONSTRUCTOR __attribute__ ((constructor))
#define DESTRUCTOR __attribute__ ((destructor)) #define DESTRUCTOR __attribute__ ((destructor))
#else
#define CONSTRUCTOR __attribute__ ((constructor(101)))
#define DESTRUCTOR __attribute__ ((destructor(101)))
#endif
#ifdef DYNAMIC_ARCH #ifdef DYNAMIC_ARCH
gotoblas_t *gotoblas = NULL; gotoblas_t *gotoblas = NULL;
@ -795,12 +803,12 @@ static void *alloc_hugetlb(void *address){
if (LookupPrivilegeValue(NULL, SE_LOCK_MEMORY_NAME, &tp.Privileges[0].Luid) != TRUE) { if (LookupPrivilegeValue(NULL, SE_LOCK_MEMORY_NAME, &tp.Privileges[0].Luid) != TRUE) {
CloseHandle(hToken); CloseHandle(hToken);
return -1; return (void*)-1;
} }
if (AdjustTokenPrivileges(hToken, FALSE, &tp, 0, NULL, NULL) != TRUE) { if (AdjustTokenPrivileges(hToken, FALSE, &tp, 0, NULL, NULL) != TRUE) {
CloseHandle(hToken); CloseHandle(hToken);
return -1; return (void*)-1;
} }
map_address = (void *)VirtualAlloc(address, map_address = (void *)VirtualAlloc(address,
@ -1402,6 +1410,28 @@ void DESTRUCTOR gotoblas_quit(void) {
#endif #endif
} }
#if defined(_MSC_VER) && !defined(__clang__)
BOOL APIENTRY DllMain(HMODULE hModule, DWORD ul_reason_for_call, LPVOID lpReserved)
{
switch (ul_reason_for_call)
{
case DLL_PROCESS_ATTACH:
gotoblas_init();
break;
case DLL_THREAD_ATTACH:
break;
case DLL_THREAD_DETACH:
break;
case DLL_PROCESS_DETACH:
gotoblas_quit();
break;
default:
break;
}
return TRUE;
}
#endif
#if (defined(C_PGI) || (!defined(C_SUN) && defined(F_INTERFACE_SUN))) && (defined(ARCH_X86) || defined(ARCH_X86_64)) #if (defined(C_PGI) || (!defined(C_SUN) && defined(F_INTERFACE_SUN))) && (defined(ARCH_X86) || defined(ARCH_X86_64))
/* Don't call me; this is just work around for PGI / Sun bug */ /* Don't call me; this is just work around for PGI / Sun bug */
void gotoblas_dummy_for_PGI(void) { void gotoblas_dummy_for_PGI(void) {

View File

@ -69,10 +69,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
/* or implied, of The University of Texas at Austin. */ /* or implied, of The University of Texas at Austin. */
/*********************************************************************/ /*********************************************************************/
#if defined(__WIN32__) || defined(__WIN64__) || defined(__CYGWIN32__) || defined(__CYGWIN64__) #if defined(__WIN32__) || defined(__WIN64__) || defined(__CYGWIN32__) || defined(__CYGWIN64__) || defined(_WIN32) || defined(_WIN64)
#define OS_WINDOWS #define OS_WINDOWS
#endif #endif
#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64)
#define INTEL_AMD
#endif
#include <stdio.h> #include <stdio.h>
#include <string.h> #include <string.h>
#ifdef OS_WINDOWS #ifdef OS_WINDOWS
@ -750,7 +754,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define ARCHITECTURE "ARM" #define ARCHITECTURE "ARM"
#define SUBARCHITECTURE "CORTEXA9" #define SUBARCHITECTURE "CORTEXA9"
#define SUBDIRNAME "arm" #define SUBDIRNAME "arm"
#define ARCHCONFIG "-DCORTEXA9 " \ #define ARCHCONFIG "-DCORTEXA9 -DARMV7 " \
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=32 " \ "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=32 " \
"-DL2_SIZE=1048576 -DL2_LINESIZE=32 " \ "-DL2_SIZE=1048576 -DL2_LINESIZE=32 " \
"-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " \ "-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " \
@ -765,7 +769,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define ARCHITECTURE "ARM" #define ARCHITECTURE "ARM"
#define SUBARCHITECTURE "CORTEXA15" #define SUBARCHITECTURE "CORTEXA15"
#define SUBDIRNAME "arm" #define SUBDIRNAME "arm"
#define ARCHCONFIG "-DCORTEXA15 " \ #define ARCHCONFIG "-DCORTEXA15 -DARMV7 " \
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=32 " \ "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=32 " \
"-DL2_SIZE=1048576 -DL2_LINESIZE=32 " \ "-DL2_SIZE=1048576 -DL2_LINESIZE=32 " \
"-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " \ "-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " \
@ -830,7 +834,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define OPENBLAS_SUPPORTED #define OPENBLAS_SUPPORTED
#endif #endif
#if defined(__i386__) || (__x86_64__) #ifdef INTEL_AMD
#include "cpuid_x86.c" #include "cpuid_x86.c"
#define OPENBLAS_SUPPORTED #define OPENBLAS_SUPPORTED
#endif #endif
@ -925,7 +929,7 @@ int main(int argc, char *argv[]){
#ifdef FORCE #ifdef FORCE
printf("CORE=%s\n", CORENAME); printf("CORE=%s\n", CORENAME);
#else #else
#if defined(__i386__) || defined(__x86_64__) || defined(POWER) || defined(__mips__) || defined(__arm__) || defined(__aarch64__) #if defined(INTEL_AMD) || defined(POWER) || defined(__mips__) || defined(__arm__) || defined(__aarch64__)
printf("CORE=%s\n", get_corename()); printf("CORE=%s\n", get_corename());
#endif #endif
#endif #endif
@ -945,7 +949,7 @@ int main(int argc, char *argv[]){
#endif #endif
#if defined(__i386__) || defined(__x86_64__) #ifdef INTEL_AMD
#ifndef FORCE #ifndef FORCE
get_sse(); get_sse();
#else #else
@ -1025,7 +1029,7 @@ int main(int argc, char *argv[]){
#ifdef FORCE #ifdef FORCE
printf("#define CHAR_CORENAME \"%s\"\n", CORENAME); printf("#define CHAR_CORENAME \"%s\"\n", CORENAME);
#else #else
#if defined(__i386__) || defined(__x86_64__) || defined(POWER) || defined(__mips__) || defined(__arm__) || defined(__aarch64__) #if defined(INTEL_AMD) || defined(POWER) || defined(__mips__) || defined(__arm__) || defined(__aarch64__)
printf("#define CHAR_CORENAME \"%s\"\n", get_corename()); printf("#define CHAR_CORENAME \"%s\"\n", get_corename());
#endif #endif
#endif #endif

166
interface/CMakeLists.txt Normal file
View File

@ -0,0 +1,166 @@
include_directories(${CMAKE_SOURCE_DIR})
set(BLAS1_SOURCES
copy.c
nrm2.c
)
set(BLAS1_REAL_ONLY_SOURCES
rotm.c rotmg.c # N.B. these do not have complex counterparts
rot.c
asum.c
)
# these will have 'z' prepended for the complex version
set(BLAS1_MANGLED_SOURCES
axpy.c swap.c
scal.c
dot.c
rotg.c
axpby.c
)
# TODO: USE_NETLIB_GEMV shoudl switch gemv.c to netlib/*gemv.f
# these all have 'z' sources for complex versions
set(BLAS2_SOURCES
gemv.c ger.c
trsv.c trmv.c symv.c
syr.c syr2.c gbmv.c
sbmv.c spmv.c
spr.c spr2.c
tbsv.c tbmv.c
tpsv.c tpmv.c
)
set(BLAS2_COMPLEX_ONLY_MANGLED_SOURCES
hemv.c hbmv.c
her.c her2.c
hpmv.c hpr.c
hpr2.c
)
# these do not have separate 'z' sources
set(BLAS3_SOURCES
gemm.c symm.c
trsm.c syrk.c syr2k.c
)
set(BLAS3_MANGLED_SOURCES
omatcopy.c imatcopy.c
geadd.c
)
# generate the BLAS objs once with and once without cblas
set (CBLAS_FLAGS "")
if (NOT DEFINED NO_FBLAS)
list(APPEND CBLAS_FLAGS 0)
endif ()
if (NOT DEFINED NO_CBLAS)
list(APPEND CBLAS_FLAGS 1)
endif ()
foreach (CBLAS_FLAG ${CBLAS_FLAGS})
# TODO: don't compile complex sources with cblas for now, the naming schemes are all different and they will have to be handled separately from SINGLE/DOUBLE
set(DISABLE_COMPLEX 0)
set(MANGLE_COMPLEX 3)
if (CBLAS_FLAG EQUAL 1)
# set(DISABLE_COMPLEX 1)
# set(MANGLE_COMPLEX 1)
endif ()
GenerateNamedObjects("${BLAS1_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false ${DISABLE_COMPLEX})
GenerateNamedObjects("${BLAS1_REAL_ONLY_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false 1)
GenerateNamedObjects("${BLAS1_MANGLED_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false ${MANGLE_COMPLEX})
GenerateNamedObjects("${BLAS2_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false ${MANGLE_COMPLEX})
GenerateNamedObjects("${BLAS2_COMPLEX_ONLY_MANGLED_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false 4)
GenerateNamedObjects("${BLAS3_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false ${DISABLE_COMPLEX})
GenerateNamedObjects("${BLAS3_MANGLED_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false ${MANGLE_COMPLEX})
#sdsdot, dsdot
GenerateNamedObjects("sdsdot.c" "" "sdsdot" ${CBLAS_FLAG} "" "" true "SINGLE")
GenerateNamedObjects("dsdot.c" "" "dsdot" ${CBLAS_FLAG} "" "" true "SINGLE")
# trmm is trsm with a compiler flag set
GenerateNamedObjects("trsm.c" "TRMM" "trmm" ${CBLAS_FLAG})
# max and imax are compiled 4 times
GenerateNamedObjects("max.c" "" "" ${CBLAS_FLAG})
GenerateNamedObjects("max.c" "USE_ABS" "amax" ${CBLAS_FLAG})
GenerateNamedObjects("max.c" "USE_ABS;USE_MIN" "amin" ${CBLAS_FLAG})
GenerateNamedObjects("max.c" "USE_MIN" "min" ${CBLAS_FLAG})
GenerateNamedObjects("imax.c" "" "i*max" ${CBLAS_FLAG})
GenerateNamedObjects("imax.c" "USE_ABS" "i*amax" ${CBLAS_FLAG})
GenerateNamedObjects("imax.c" "USE_ABS;USE_MIN" "i*amin" ${CBLAS_FLAG})
GenerateNamedObjects("imax.c" "USE_MIN" "i*min" ${CBLAS_FLAG})
# complex-specific sources
foreach (float_type ${FLOAT_TYPES})
if (${float_type} STREQUAL "COMPLEX" OR ${float_type} STREQUAL "ZCOMPLEX")
GenerateNamedObjects("zger.c" "" "geru" ${CBLAS_FLAG} "" "" false ${float_type})
GenerateNamedObjects("zger.c" "CONJ" "gerc" ${CBLAS_FLAG} "" "" false ${float_type})
GenerateNamedObjects("zdot.c" "CONJ" "dotc" ${CBLAS_FLAG} "" "" false ${float_type})
GenerateNamedObjects("zdot.c" "" "dotu" ${CBLAS_FLAG} "" "" false ${float_type})
GenerateNamedObjects("symm.c" "HEMM" "hemm" ${CBLAS_FLAG} "" "" false ${float_type})
GenerateNamedObjects("syrk.c" "HEMM" "herk" ${CBLAS_FLAG} "" "" false ${float_type})
GenerateNamedObjects("syr2k.c" "HEMM" "her2k" ${CBLAS_FLAG} "" "" false ${float_type})
if (USE_GEMM3M)
GenerateNamedObjects("gemm.c" "GEMM3M" "gemm3m" false "" "" false ${float_type})
endif()
endif ()
if (${float_type} STREQUAL "COMPLEX")
GenerateNamedObjects("zscal.c" "SSCAL" "sscal" ${CBLAS_FLAG} "" "" false "COMPLEX")
GenerateNamedObjects("nrm2.c" "" "scnrm2" ${CBLAS_FLAG} "" "" true "COMPLEX")
GenerateNamedObjects("zrot.c" "" "csrot" ${CBLAS_FLAG} "" "" true "COMPLEX")
GenerateNamedObjects("max.c" "USE_ABS;USE_MIN" "scamin" ${CBLAS_FLAG} "" "" true "COMPLEX")
GenerateNamedObjects("max.c" "USE_ABS" "scamax" ${CBLAS_FLAG} "" "" true "COMPLEX")
GenerateNamedObjects("asum.c" "" "scasum" ${CBLAS_FLAG} "" "" true "COMPLEX")
endif ()
if (${float_type} STREQUAL "ZCOMPLEX")
GenerateNamedObjects("zscal.c" "SSCAL" "dscal" ${CBLAS_FLAG} "" "" false "ZCOMPLEX")
GenerateNamedObjects("nrm2.c" "" "dznrm2" ${CBLAS_FLAG} "" "" true "ZCOMPLEX")
GenerateNamedObjects("zrot.c" "" "zdrot" ${CBLAS_FLAG} "" "" true "ZCOMPLEX")
GenerateNamedObjects("max.c" "USE_ABS;USE_MIN" "dzamin" ${CBLAS_FLAG} "" "" true "ZCOMPLEX")
GenerateNamedObjects("max.c" "USE_ABS" "dzamax" ${CBLAS_FLAG} "" "" true "ZCOMPLEX")
GenerateNamedObjects("asum.c" "" "dzasum" ${CBLAS_FLAG} "" "" true "ZCOMPLEX")
endif ()
endforeach ()
endforeach ()
#Special functions for CBLAS
if (NOT DEFINED NO_CBLAS)
foreach (float_type ${FLOAT_TYPES})
if (${float_type} STREQUAL "COMPLEX" OR ${float_type} STREQUAL "ZCOMPLEX")
#cblas_dotc_sub cblas_dotu_sub
GenerateNamedObjects("zdot.c" "FORCE_USE_STACK" "dotu_sub" 1 "" "" false ${float_type})
GenerateNamedObjects("zdot.c" "FORCE_USE_STACK;CONJ" "dotc_sub" 1 "" "" false ${float_type})
endif()
endforeach ()
endif()
if (NOT DEFINED NO_LAPACK)
set(LAPACK_SOURCES
lapack/gesv.c
)
# prepend z for complex versions
set(LAPACK_MANGLED_SOURCES
lapack/getrf.c lapack/getrs.c lapack/potrf.c lapack/getf2.c
lapack/potf2.c lapack/laswp.c lapack/lauu2.c
lapack/lauum.c lapack/trti2.c lapack/trtri.c
)
GenerateNamedObjects("${LAPACK_SOURCES}")
GenerateNamedObjects("${LAPACK_MANGLED_SOURCES}" "" "" 0 "" "" 0 3)
endif ()
add_library(interface OBJECT ${OPENBLAS_SRC})

View File

@ -121,6 +121,9 @@ void NAME(char *TRANSA, char *TRANSB,
FLOAT *sa, *sb; FLOAT *sa, *sb;
#ifdef SMP #ifdef SMP
int nthreads_max;
int nthreads_avail;
double MNK;
#ifndef COMPLEX #ifndef COMPLEX
#ifdef XDOUBLE #ifdef XDOUBLE
int mode = BLAS_XDOUBLE | BLAS_REAL; int mode = BLAS_XDOUBLE | BLAS_REAL;
@ -237,6 +240,9 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANS
XFLOAT *sa, *sb; XFLOAT *sa, *sb;
#ifdef SMP #ifdef SMP
int nthreads_max;
int nthreads_avail;
double MNK;
#ifndef COMPLEX #ifndef COMPLEX
#ifdef XDOUBLE #ifdef XDOUBLE
int mode = BLAS_XDOUBLE | BLAS_REAL; int mode = BLAS_XDOUBLE | BLAS_REAL;
@ -400,15 +406,15 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANS
mode |= (transa << BLAS_TRANSA_SHIFT); mode |= (transa << BLAS_TRANSA_SHIFT);
mode |= (transb << BLAS_TRANSB_SHIFT); mode |= (transb << BLAS_TRANSB_SHIFT);
int nthreads_max = num_cpu_avail(3); nthreads_max = num_cpu_avail(3);
int nthreads_avail = nthreads_max; nthreads_avail = nthreads_max;
#ifndef COMPLEX #ifndef COMPLEX
double MNK = (double) args.m * (double) args.n * (double) args.k; MNK = (double) args.m * (double) args.n * (double) args.k;
if ( MNK <= (65536.0 * (double) GEMM_MULTITHREAD_THRESHOLD) ) if ( MNK <= (65536.0 * (double) GEMM_MULTITHREAD_THRESHOLD) )
nthreads_max = 1; nthreads_max = 1;
#else #else
double MNK = (double) args.m * (double) args.n * (double) args.k; MNK = (double) args.m * (double) args.n * (double) args.k;
if ( MNK <= (8192.0 * (double) GEMM_MULTITHREAD_THRESHOLD) ) if ( MNK <= (8192.0 * (double) GEMM_MULTITHREAD_THRESHOLD) )
nthreads_max = 1; nthreads_max = 1;
#endif #endif

View File

@ -81,6 +81,9 @@ void NAME(char *TRANS, blasint *M, blasint *N,
FLOAT *buffer; FLOAT *buffer;
#ifdef SMP #ifdef SMP
int nthreads; int nthreads;
int nthreads_max;
int nthreads_avail;
double MNK;
#endif #endif
int (*gemv[])(BLASLONG, BLASLONG, BLASLONG, FLOAT, FLOAT *, BLASLONG, FLOAT * , BLASLONG, FLOAT *, BLASLONG, FLOAT *) = { int (*gemv[])(BLASLONG, BLASLONG, BLASLONG, FLOAT, FLOAT *, BLASLONG, FLOAT * , BLASLONG, FLOAT *, BLASLONG, FLOAT *) = {
@ -135,6 +138,9 @@ void CNAME(enum CBLAS_ORDER order,
blasint info, t; blasint info, t;
#ifdef SMP #ifdef SMP
int nthreads; int nthreads;
int nthreads_max;
int nthreads_avail;
double MNK;
#endif #endif
int (*gemv[])(BLASLONG, BLASLONG, BLASLONG, FLOAT, FLOAT *, BLASLONG, FLOAT * , BLASLONG, FLOAT *, BLASLONG, FLOAT *) = { int (*gemv[])(BLASLONG, BLASLONG, BLASLONG, FLOAT, FLOAT *, BLASLONG, FLOAT * , BLASLONG, FLOAT *, BLASLONG, FLOAT *) = {
@ -235,10 +241,10 @@ void CNAME(enum CBLAS_ORDER order,
#ifdef SMP #ifdef SMP
int nthreads_max = num_cpu_avail(2); nthreads_max = num_cpu_avail(2);
int nthreads_avail = nthreads_max; nthreads_avail = nthreads_max;
double MNK = (double) m * (double) n; MNK = (double) m * (double) n;
if ( MNK <= (24.0 * 24.0 * (double) (GEMM_MULTITHREAD_THRESHOLD*GEMM_MULTITHREAD_THRESHOLD) ) ) if ( MNK <= (24.0 * 24.0 * (double) (GEMM_MULTITHREAD_THRESHOLD*GEMM_MULTITHREAD_THRESHOLD) ) )
nthreads_max = 1; nthreads_max = 1;

View File

@ -136,6 +136,8 @@ blasint NAME(blasint *N, FLOAT *x, blasint *INCX){
ret = (blasint)MAX_K(n, x, incx); ret = (blasint)MAX_K(n, x, incx);
if(ret > n) ret=n;
FUNCTION_PROFILE_END(COMPSIZE, n, 0); FUNCTION_PROFILE_END(COMPSIZE, n, 0);
IDEBUG_END; IDEBUG_END;
@ -159,6 +161,8 @@ CBLAS_INDEX CNAME(blasint n, FLOAT *x, blasint incx){
ret = MAX_K(n, x, incx); ret = MAX_K(n, x, incx);
if (ret > n) ret=n;
if (ret) ret --; if (ret) ret --;
FUNCTION_PROFILE_END(COMPSIZE, n, 0); FUNCTION_PROFILE_END(COMPSIZE, n, 0);

View File

@ -14,8 +14,7 @@ void CNAME(FLOAT *DA, FLOAT *DB, FLOAT *C, FLOAT *S){
#endif #endif
#if defined(__i386__) || defined(__x86_64__) || defined(__ia64__) || defined(_M_X64) || defined(_M_IX86)
#if defined(__i386__) || defined(__x86_64__) || defined(__ia64__)
long double da = *DA; long double da = *DA;
long double db = *DB; long double db = *DB;

View File

@ -53,13 +53,13 @@ void CNAME(blasint n, FLOAT *ALPHA, FLOAT *x, blasint incx, FLOAT *BETA, FLOAT *
#endif #endif
if (n <= 0) return;
FLOAT alpha_r = *(ALPHA + 0); FLOAT alpha_r = *(ALPHA + 0);
FLOAT alpha_i = *(ALPHA + 1); FLOAT alpha_i = *(ALPHA + 1);
FLOAT beta_r = *(BETA + 0); FLOAT beta_r = *(BETA + 0);
FLOAT beta_i = *(BETA + 1); FLOAT beta_i = *(BETA + 1);
if (n <= 0) return;
FUNCTION_PROFILE_START(); FUNCTION_PROFILE_START();
if (incx < 0) x -= (n - 1) * incx * 2; if (incx < 0) x -= (n - 1) * incx * 2;

View File

@ -57,21 +57,25 @@
#ifdef RETURN_BY_STRUCT #ifdef RETURN_BY_STRUCT
MYTYPE NAME( blasint *N, FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY) { MYTYPE NAME( blasint *N, FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY) {
#elif defined RETURN_BY_STACK #elif defined RETURN_BY_STACK
void NAME(FLOAT _Complex *result, blasint *N, FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY) { void NAME(OPENBLAS_COMPLEX_FLOAT *result, blasint *N, FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY) {
#else #else
FLOAT _Complex NAME( blasint *N, FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY) { OPENBLAS_COMPLEX_FLOAT NAME( blasint *N, FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY) {
#endif #endif
BLASLONG n = *N; BLASLONG n = *N;
BLASLONG incx = *INCX; BLASLONG incx = *INCX;
BLASLONG incy = *INCY; BLASLONG incy = *INCY;
#ifndef RETURN_BY_STACK #ifndef RETURN_BY_STACK
FLOAT _Complex ret; OPENBLAS_COMPLEX_FLOAT ret;
#endif #endif
#ifdef RETURN_BY_STRUCT #ifdef RETURN_BY_STRUCT
MYTYPE myret; MYTYPE myret;
#endif #endif
#ifndef RETURN_BY_STRUCT
OPENBLAS_COMPLEX_FLOAT zero=OPENBLAS_MAKE_COMPLEX_FLOAT(0.0, 0.0);
#endif
PRINT_DEBUG_NAME; PRINT_DEBUG_NAME;
if (n <= 0) { if (n <= 0) {
@ -80,10 +84,10 @@ FLOAT _Complex NAME( blasint *N, FLOAT *x, blasint *INCX,
myret.i = 0.; myret.i = 0.;
return myret; return myret;
#elif defined RETURN_BY_STACK #elif defined RETURN_BY_STACK
*result = ZERO; *result = zero;
return; return;
#else #else
return ZERO; return zero;
#endif #endif
} }
@ -144,21 +148,24 @@ FLOAT _Complex NAME( blasint *N, FLOAT *x, blasint *INCX,
#else #else
#ifdef FORCE_USE_STACK #ifdef FORCE_USE_STACK
void CNAME(blasint n, FLOAT *x, blasint incx, FLOAT *y, blasint incy, FLOAT _Complex *result){ void CNAME(blasint n, FLOAT *x, blasint incx, FLOAT *y, blasint incy, OPENBLAS_COMPLEX_FLOAT *result){
#else #else
FLOAT _Complex CNAME(blasint n, FLOAT *x, blasint incx, FLOAT *y, blasint incy){ OPENBLAS_COMPLEX_FLOAT CNAME(blasint n, FLOAT *x, blasint incx, FLOAT *y, blasint incy){
FLOAT _Complex ret; OPENBLAS_COMPLEX_FLOAT ret;
OPENBLAS_COMPLEX_FLOAT zero=OPENBLAS_MAKE_COMPLEX_FLOAT(0.0, 0.0);
#endif #endif
PRINT_DEBUG_CNAME; PRINT_DEBUG_CNAME;
if (n <= 0) { if (n <= 0) {
#ifdef FORCE_USE_STACK #ifdef FORCE_USE_STACK
*result = ZERO; //*result = OPENBLAS_MAKE_COMPLEX_FLOAT(0.0, 0.0);
CREAL(*result) = 0.0;
CIMAG(*result) = 0.0;
return; return;
#else #else
return ZERO; return zero;
#endif #endif
} }

View File

@ -79,6 +79,9 @@ void NAME(char *TRANS, blasint *M, blasint *N,
FLOAT *buffer; FLOAT *buffer;
#ifdef SMP #ifdef SMP
int nthreads; int nthreads;
int nthreads_max;
int nthreads_avail;
double MNK;
#endif #endif
int (*gemv[])(BLASLONG, BLASLONG, BLASLONG, FLOAT, FLOAT, FLOAT *, BLASLONG, int (*gemv[])(BLASLONG, BLASLONG, BLASLONG, FLOAT, FLOAT, FLOAT *, BLASLONG,
@ -91,14 +94,14 @@ void NAME(char *TRANS, blasint *M, blasint *N,
blasint lenx, leny; blasint lenx, leny;
blasint i; blasint i;
PRINT_DEBUG_NAME;
FLOAT alpha_r = *(ALPHA + 0); FLOAT alpha_r = *(ALPHA + 0);
FLOAT alpha_i = *(ALPHA + 1); FLOAT alpha_i = *(ALPHA + 1);
FLOAT beta_r = *(BETA + 0); FLOAT beta_r = *(BETA + 0);
FLOAT beta_i = *(BETA + 1); FLOAT beta_i = *(BETA + 1);
PRINT_DEBUG_NAME;
TOUPPER(trans); TOUPPER(trans);
info = 0; info = 0;
@ -145,6 +148,9 @@ void CNAME(enum CBLAS_ORDER order,
blasint info, t; blasint info, t;
#ifdef SMP #ifdef SMP
int nthreads; int nthreads;
int nthreads_max;
int nthreads_avail;
double MNK;
#endif #endif
int (*gemv[])(BLASLONG, BLASLONG, BLASLONG, FLOAT, FLOAT, FLOAT *, BLASLONG, int (*gemv[])(BLASLONG, BLASLONG, BLASLONG, FLOAT, FLOAT, FLOAT *, BLASLONG,
@ -153,14 +159,14 @@ void CNAME(enum CBLAS_ORDER order,
GEMV_O, GEMV_U, GEMV_S, GEMV_D, GEMV_O, GEMV_U, GEMV_S, GEMV_D,
}; };
PRINT_DEBUG_CNAME;
FLOAT alpha_r = *(ALPHA + 0); FLOAT alpha_r = *(ALPHA + 0);
FLOAT alpha_i = *(ALPHA + 1); FLOAT alpha_i = *(ALPHA + 1);
FLOAT beta_r = *(BETA + 0); FLOAT beta_r = *(BETA + 0);
FLOAT beta_i = *(BETA + 1); FLOAT beta_i = *(BETA + 1);
PRINT_DEBUG_CNAME;
trans = -1; trans = -1;
info = 0; info = 0;
@ -234,10 +240,10 @@ void CNAME(enum CBLAS_ORDER order,
#ifdef SMP #ifdef SMP
int nthreads_max = num_cpu_avail(2); nthreads_max = num_cpu_avail(2);
int nthreads_avail = nthreads_max; nthreads_avail = nthreads_max;
double MNK = (double) m * (double) n; MNK = (double) m * (double) n;
if ( MNK <= ( 256.0 * (double) (GEMM_MULTITHREAD_THRESHOLD * GEMM_MULTITHREAD_THRESHOLD) )) if ( MNK <= ( 256.0 * (double) (GEMM_MULTITHREAD_THRESHOLD * GEMM_MULTITHREAD_THRESHOLD) ))
nthreads_max = 1; nthreads_max = 1;

View File

@ -6,13 +6,7 @@
void NAME(FLOAT *DA, FLOAT *DB, FLOAT *C, FLOAT *S){ void NAME(FLOAT *DA, FLOAT *DB, FLOAT *C, FLOAT *S){
PRINT_DEBUG_NAME; #if defined(__i386__) || defined(__x86_64__) || defined(__ia64__) || defined(_M_X64) || defined(_M_IX86)
IDEBUG_START;
FUNCTION_PROFILE_START();
#if defined(__i386__) || defined(__x86_64__) || defined(__ia64__)
long double da_r = *(DA + 0); long double da_r = *(DA + 0);
long double da_i = *(DA + 1); long double da_i = *(DA + 1);
@ -22,6 +16,12 @@ void NAME(FLOAT *DA, FLOAT *DB, FLOAT *C, FLOAT *S){
long double ada = fabs(da_r) + fabs(da_i); long double ada = fabs(da_r) + fabs(da_i);
PRINT_DEBUG_NAME;
IDEBUG_START;
FUNCTION_PROFILE_START();
if (ada == ZERO) { if (ada == ZERO) {
*C = ZERO; *C = ZERO;
*(S + 0) = ONE; *(S + 0) = ONE;
@ -54,6 +54,12 @@ void NAME(FLOAT *DA, FLOAT *DB, FLOAT *C, FLOAT *S){
FLOAT ada = fabs(da_r) + fabs(da_i); FLOAT ada = fabs(da_r) + fabs(da_i);
FLOAT adb; FLOAT adb;
PRINT_DEBUG_NAME;
IDEBUG_START;
FUNCTION_PROFILE_START();
if (ada == ZERO) { if (ada == ZERO) {
*C = ZERO; *C = ZERO;
*(S + 0) = ONE; *(S + 0) = ONE;

View File

@ -121,6 +121,9 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, int n, FLOAT alpha, FLO
FLOAT *buffer; FLOAT *buffer;
int trans, uplo; int trans, uplo;
blasint info; blasint info;
FLOAT * ALPHA = &alpha;
FLOAT alpha_r = ALPHA[0];
FLOAT alpha_i = ALPHA[1];
#ifdef SMP #ifdef SMP
int nthreads; int nthreads;
#endif #endif

428
kernel/CMakeLists.txt Normal file
View File

@ -0,0 +1,428 @@
include_directories(${CMAKE_SOURCE_DIR})
include("${CMAKE_SOURCE_DIR}/cmake/kernel.cmake")
# Makefile
if (DEFINED TARGET_CORE)
#override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE)
set(BUILD_KERNEL 1)
set(KDIR "")
set(TSUFFIX "_${TARGET_CORE}")
else ()
set(TARGET_CORE ${CORE})
set(KDIR "")
set(TSUFFIX "")
endif ()
SetDefaultL1()
SetDefaultL2()
SetDefaultL3()
ParseMakefileVars("${KERNELDIR}/KERNEL")
ParseMakefileVars("${KERNELDIR}/KERNEL.${TARGET_CORE}")
if (${ARCH} STREQUAL "x86")
if (NOT MSVC)
GenerateNamedObjects("${KERNELDIR}/cpuid.S" "" "" false "" "" true)
else()
GenerateNamedObjects("${KERNELDIR}/cpuid_win.c" "" "" false "" "" true)
endif()
endif ()
# don't use float type name mangling here
GenerateNamedObjects("${KERNELDIR}/${LSAME_KERNEL}" "F_INTERFACE" "lsame" false "" "" true)
GenerateNamedObjects("${KERNELDIR}/${SCABS_KERNEL}" "COMPLEX;F_INTERFACE" "scabs1" false "" "" true)
GenerateNamedObjects("${KERNELDIR}/${DCABS_KERNEL}" "DOUBLE;COMPLEX;F_INTERFACE" "dcabs1" false "" "" true)
# Makefile.L1
foreach (float_type ${FLOAT_TYPES})
# a bit of metaprogramming here to pull out the appropriate KERNEL var
string(SUBSTRING ${float_type} 0 1 float_char)
GenerateNamedObjects("${KERNELDIR}/${${float_char}AMAXKERNEL}" "USE_ABS" "amax_k" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}AMINKERNEL}" "USE_ABS;USE_MIN" "amin_k" false "" "" false ${float_type})
if (DEFINED ${float_char}MAXKERNEL)
GenerateNamedObjects("${KERNELDIR}/${${float_char}MAXKERNEL}" "" "max_k" false "" "" false ${float_type})
endif ()
if (DEFINED ${float_char}MINKERNEL)
GenerateNamedObjects("${KERNELDIR}/${${float_char}MINKERNEL}" "" "min_k" false "" "" false ${float_type})
endif ()
GenerateNamedObjects("${KERNELDIR}/${I${float_char}AMAXKERNEL}" "USE_ABS" "i*amax_k" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${I${float_char}AMINKERNEL}" "USE_ABS;USE_MIN" "i*amin_k" false "" "" false ${float_type})
if (DEFINED I${float_char}MAXKERNEL)
GenerateNamedObjects("${KERNELDIR}/${I${float_char}MAXKERNEL}" "" "i*max_k" false "" "" false ${float_type})
endif ()
if (DEFINED I${float_char}MINKERNEL)
GenerateNamedObjects("${KERNELDIR}/${I${float_char}MINKERNEL}" "" "i*min_k" false "" "" false ${float_type})
endif ()
GenerateNamedObjects("${KERNELDIR}/${${float_char}ASUMKERNEL}" "" "asum_k" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}AXPYKERNEL}" "" "axpy_k" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}COPYKERNEL}" "C_INTERFACE" "copy_k" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}NRM2KERNEL}" "" "nrm2_k" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}ROTKERNEL}" "" "rot_k" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}SCALKERNEL}" "" "scal_k" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}SWAPKERNEL}" "" "swap_k" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}AXPBYKERNEL}" "" "axpby_k" false "" "" false ${float_type})
if (${float_type} STREQUAL "COMPLEX" OR ${float_type} STREQUAL "ZCOMPLEX")
GenerateNamedObjects("${KERNELDIR}/${${float_char}AXPYKERNEL}" "CONJ" "axpyc_k" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}DOTKERNEL}" "" "dotu_k" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}DOTKERNEL}" "CONJ" "dotc_k" false "" "" false ${float_type})
else ()
GenerateNamedObjects("${KERNELDIR}/${${float_char}DOTKERNEL}" "" "dot_k" false "" "" false ${float_type})
endif ()
if (${float_type} STREQUAL "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${${float_char}ROTKERNEL}" "" "srot_k" false "" "" false ${float_type})
endif()
if (${float_type} STREQUAL "ZCOMPLEX")
GenerateNamedObjects("${KERNELDIR}/${${float_char}ROTKERNEL}" "" "drot_k" false "" "" false ${float_type})
endif()
endforeach ()
#dsdot,sdsdot
GenerateNamedObjects("${KERNELDIR}/${DSDOTKERNEL}" "DSDOT" "d*dot_k" false "" "" false "SINGLE")
GenerateNamedObjects("${KERNELDIR}/${DSDOTKERNEL}" "DSDOT" "dsdot_k" false "" "" false "SINGLE")
# Makefile.L2
GenerateCombinationObjects("generic/symv_k.c" "LOWER" "U" "" 1 "" "" 3)
GenerateNamedObjects("generic/ger.c" "" "ger_k" false "" "" "" 3)
foreach (float_type ${FLOAT_TYPES})
string(SUBSTRING ${float_type} 0 1 float_char)
if (${float_type} STREQUAL "COMPLEX" OR ${float_type} STREQUAL "ZCOMPLEX")
GenerateNamedObjects("${KERNELDIR}/${${float_char}GERUKERNEL}" "" "geru_k" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}GERCKERNEL}" "CONJ" "gerc_k" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}GERUKERNEL}" "XCONJ" "gerv_k" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}GERCKERNEL}" "CONJ;XCONJ" "gerd_k" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVNKERNEL}" "" "gemv_n" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVTKERNEL}" "TRANSA" "gemv_t" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVNKERNEL}" "CONJ" "gemv_r" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVTKERNEL}" "CONJ;TRANSA" "gemv_c" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVNKERNEL}" "XCONJ" "gemv_o" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVTKERNEL}" "XCONJ;TRANSA" "gemv_u" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVNKERNEL}" "XCONJ;CONJ" "gemv_s" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVTKERNEL}" "XCONJ;CONJ;TRANSA" "gemv_d" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}HEMV_U_KERNEL}" "HEMV" "hemv_U" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}HEMV_L_KERNEL}" "HEMV;LOWER" "hemv_L" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}HEMV_V_KERNEL}" "HEMV;HEMVREV" "hemv_V" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}HEMV_M_KERNEL}" "HEMV;HEMVREV;LOWER" "hemv_M" false "" "" false ${float_type})
else ()
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVNKERNEL}" "" "gemv_n" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVTKERNEL}" "TRANS" "gemv_t" false "" "" false ${float_type})
endif ()
endforeach ()
# Makefile.L3
set(USE_TRMM false)
if (${ARCH} STREQUAL "arm" OR ${ARCH} STREQUAL "arm64" OR "${TARGET}" STREQUAL "LONGSOON3B" OR "${TARGET}" STREQUAL "GENERIC" OR "${CORE}" STREQUAL "generic" OR "${TARGET}" STREQUAL "HASWELL" OR "${CORE}" STREQUAL "haswell")
set(USE_TRMM true)
endif ()
foreach (float_type ${FLOAT_TYPES})
string(SUBSTRING ${float_type} 0 1 float_char)
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMKERNEL}" "" "gemm_kernel" false "" "" false ${float_type})
if (${float_char}GEMMINCOPY)
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMINCOPY}" "${float_type}" "${${float_char}GEMMINCOPYOBJ}" false "" "" true ${float_type})
endif ()
if (${float_char}GEMMITCOPY)
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMITCOPY}" "${float_type}" "${${float_char}GEMMITCOPYOBJ}" false "" "" true ${float_type})
endif ()
if (${float_char}GEMMONCOPY)
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMONCOPY}" "${float_type}" "${${float_char}GEMMONCOPYOBJ}" false "" "" true ${float_type})
endif ()
if (${float_char}GEMMOTCOPY)
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMOTCOPY}" "${float_type}" "${${float_char}GEMMOTCOPYOBJ}" false "" "" true ${float_type})
endif ()
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_BETA}" "" "gemm_beta" false "" "" false ${float_type})
if (USE_TRMM)
set(TRMM_KERNEL "${${float_char}TRMMKERNEL}")
else ()
set(TRMM_KERNEL "${${float_char}GEMMKERNEL}")
endif ()
if (${float_type} STREQUAL "COMPLEX" OR ${float_type} STREQUAL "ZCOMPLEX")
# just enumerate all these. there is an extra define for these indicating which side is a conjugate (e.g. CN NC NN) that I don't really want to work into GenerateCombinationObjects
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMKERNEL}" "NN" "gemm_kernel_n" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMKERNEL}" "CN" "gemm_kernel_l" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMKERNEL}" "NC" "gemm_kernel_r" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMKERNEL}" "CC" "gemm_kernel_b" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${TRMM_KERNEL}" "TRMMKERNEL;LEFT;NN" "trmm_kernel_LN" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${TRMM_KERNEL}" "TRMMKERNEL;LEFT;TRANSA;NN" "trmm_kernel_LT" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${TRMM_KERNEL}" "TRMMKERNEL;LEFT;CONJ;CN" "trmm_kernel_LR" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${TRMM_KERNEL}" "TRMMKERNEL;LEFT;TRANSA;CONJ;CN" "trmm_kernel_LC" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${TRMM_KERNEL}" "TRMMKERNEL;NN" "trmm_kernel_RN" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${TRMM_KERNEL}" "TRMMKERNEL;TRANSA;NN" "trmm_kernel_RT" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${TRMM_KERNEL}" "TRMMKERNEL;CONJ;NC" "trmm_kernel_RR" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${TRMM_KERNEL}" "TRMMKERNEL;TRANSA;CONJ;NC" "trmm_kernel_RC" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}TRSMKERNEL_LN}" "UPPER;LN;TRSMKERNEL;CONJ" "trsm_kernel_LR" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}TRSMKERNEL_LT}" "LT;TRSMKERNEL;CONJ" "trsm_kernel_LC" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}TRSMKERNEL_RN}" "UPPER;RN;TRSMKERNEL;CONJ" "trsm_kernel_RR" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}TRSMKERNEL_RT}" "RT;TRSMKERNEL;CONJ" "trsm_kernel_RC" false "" "" false ${float_type})
#hemm
GenerateNamedObjects("generic/zhemm_utcopy_${${float_char}GEMM_UNROLL_M}.c" "" "hemm_iutcopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/zhemm_ltcopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER" "hemm_iltcopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/zhemm_utcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER" "hemm_outcopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/zhemm_ltcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER" "hemm_oltcopy" false "" "" false ${float_type})
# symm for c and z
GenerateNamedObjects("generic/zsymm_ucopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER" "symm_outcopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/zsymm_ucopy_${${float_char}GEMM_UNROLL_M}.c" "" "symm_iutcopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/zsymm_lcopy_${${float_char}GEMM_UNROLL_N}.c" "LOWER;OUTER" "symm_oltcopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/zsymm_lcopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER" "symm_iltcopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/ztrmm_uncopy_${${float_char}GEMM_UNROLL_M}.c" "UNIT" "trmm_iunucopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/ztrmm_uncopy_${${float_char}GEMM_UNROLL_M}.c" "" "trmm_iunncopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/ztrmm_uncopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;UNIT" "trmm_ounucopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/ztrmm_uncopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER" "trmm_ounncopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/ztrmm_lncopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER;UNIT" "trmm_ilnucopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/ztrmm_lncopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER" "trmm_ilnncopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/ztrmm_lncopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER;UNIT" "trmm_olnucopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/ztrmm_lncopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER" "trmm_olnncopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/ztrmm_utcopy_${${float_char}GEMM_UNROLL_M}.c" "UNIT" "trmm_iutucopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/ztrmm_utcopy_${${float_char}GEMM_UNROLL_M}.c" "" "trmm_iutncopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/ztrmm_utcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;UNIT" "trmm_outucopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/ztrmm_utcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER" "trmm_outncopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/ztrmm_ltcopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER;UNIT" "trmm_iltucopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/ztrmm_ltcopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER" "trmm_iltncopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/ztrmm_ltcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER;UNIT" "trmm_oltucopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/ztrmm_ltcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER" "trmm_oltncopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/ztrsm_uncopy_${${float_char}GEMM_UNROLL_M}.c" "UNIT" "trsm_iunucopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/ztrsm_uncopy_${${float_char}GEMM_UNROLL_M}.c" "" "trsm_iunncopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/ztrsm_uncopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;UNIT" "trsm_ounucopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/ztrsm_uncopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER" "trsm_ounncopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/ztrsm_lncopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER;UNIT" "trsm_ilnucopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/ztrsm_lncopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER" "trsm_ilnncopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/ztrsm_lncopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER;UNIT" "trsm_olnucopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/ztrsm_lncopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER" "trsm_olnncopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/ztrsm_utcopy_${${float_char}GEMM_UNROLL_M}.c" "UNIT" "trsm_iutucopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/ztrsm_utcopy_${${float_char}GEMM_UNROLL_M}.c" "" "trsm_iutncopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/ztrsm_utcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;UNIT" "trsm_outucopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/ztrsm_utcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER" "trsm_outncopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/ztrsm_ltcopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER;UNIT" "trsm_iltucopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/ztrsm_ltcopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER" "trsm_iltncopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/ztrsm_ltcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER;UNIT" "trsm_oltucopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/ztrsm_ltcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER" "trsm_oltncopy" false "" "" false ${float_type})
else () #For real
GenerateCombinationObjects("${KERNELDIR}/${TRMM_KERNEL}" "LEFT;TRANSA" "R;N" "TRMMKERNEL" 2 "trmm_kernel" false ${float_type})
# symm for s and d
GenerateNamedObjects("generic/symm_ucopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER" "symm_outcopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/symm_ucopy_${${float_char}GEMM_UNROLL_M}.c" "" "symm_iutcopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/symm_lcopy_${${float_char}GEMM_UNROLL_N}.c" "LOWER;OUTER" "symm_oltcopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/symm_lcopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER" "symm_iltcopy" false "" "" false ${float_type})
# These don't use a scheme that is easy to iterate over - the filenames have part of the DEFINE codes in them, for UPPER/TRANS but not for UNIT/OUTER. Also TRANS is not passed in as a define.
# Could simplify it a bit by pairing up by -UUNIT/-DUNIT.
GenerateNamedObjects("generic/trmm_uncopy_${${float_char}GEMM_UNROLL_M}.c" "UNIT" "trmm_iunucopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/trmm_uncopy_${${float_char}GEMM_UNROLL_M}.c" "" "trmm_iunncopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/trmm_uncopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;UNIT" "trmm_ounucopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/trmm_uncopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER" "trmm_ounncopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/trmm_lncopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER;UNIT" "trmm_ilnucopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/trmm_lncopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER" "trmm_ilnncopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/trmm_lncopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER;UNIT" "trmm_olnucopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/trmm_lncopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER" "trmm_olnncopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/trmm_utcopy_${${float_char}GEMM_UNROLL_M}.c" "UNIT" "trmm_iutucopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/trmm_utcopy_${${float_char}GEMM_UNROLL_M}.c" "" "trmm_iutncopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/trmm_utcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;UNIT" "trmm_outucopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/trmm_utcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER" "trmm_outncopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/trmm_ltcopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER;UNIT" "trmm_iltucopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/trmm_ltcopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER" "trmm_iltncopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/trmm_ltcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER;UNIT" "trmm_oltucopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/trmm_ltcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER" "trmm_oltncopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/trsm_uncopy_${${float_char}GEMM_UNROLL_M}.c" "UNIT" "trsm_iunucopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/trsm_uncopy_${${float_char}GEMM_UNROLL_M}.c" "" "trsm_iunncopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/trsm_uncopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;UNIT" "trsm_ounucopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/trsm_uncopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER" "trsm_ounncopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/trsm_lncopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER;UNIT" "trsm_ilnucopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/trsm_lncopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER" "trsm_ilnncopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/trsm_lncopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER;UNIT" "trsm_olnucopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/trsm_lncopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER" "trsm_olnncopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/trsm_utcopy_${${float_char}GEMM_UNROLL_M}.c" "UNIT" "trsm_iutucopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/trsm_utcopy_${${float_char}GEMM_UNROLL_M}.c" "" "trsm_iutncopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/trsm_utcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;UNIT" "trsm_outucopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/trsm_utcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER" "trsm_outncopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/trsm_ltcopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER;UNIT" "trsm_iltucopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/trsm_ltcopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER" "trsm_iltncopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/trsm_ltcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER;UNIT" "trsm_oltucopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/trsm_ltcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER" "trsm_oltncopy" false "" "" false ${float_type})
endif ()
GenerateNamedObjects("${KERNELDIR}/${${float_char}TRSMKERNEL_LN}" "UPPER;LN;TRSMKERNEL" "trsm_kernel_LN" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}TRSMKERNEL_LT}" "LT;TRSMKERNEL" "trsm_kernel_LT" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}TRSMKERNEL_RN}" "UPPER;RN;TRSMKERNEL" "trsm_kernel_RN" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}TRSMKERNEL_RT}" "RT;TRSMKERNEL" "trsm_kernel_RT" false "" "" false ${float_type})
if (NOT DEFINED ${float_char}OMATCOPY_CN)
if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C")
set(${float_char}OMATCOPY_CN ../arm/zomatcopy_cn.c)
else ()
set(${float_char}OMATCOPY_CN ../arm/omatcopy_cn.c)
endif ()
endif ()
if (NOT DEFINED ${float_char}OMATCOPY_RN)
if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C")
set(${float_char}OMATCOPY_RN ../arm/zomatcopy_rn.c)
else ()
set(${float_char}OMATCOPY_RN ../arm/omatcopy_rn.c)
endif ()
endif ()
if (NOT DEFINED ${float_char}OMATCOPY_CT)
if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C")
set(${float_char}OMATCOPY_CT ../arm/zomatcopy_ct.c)
else ()
set(${float_char}OMATCOPY_CT ../arm/omatcopy_ct.c)
endif ()
endif ()
if (NOT DEFINED ${float_char}OMATCOPY_RT)
if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C")
set(${float_char}OMATCOPY_RT ../arm/zomatcopy_rt.c)
else ()
set(${float_char}OMATCOPY_RT ../arm/omatcopy_rt.c)
endif ()
endif ()
GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_CN}" "" "omatcopy_k_cn" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_RN}" "ROWM" "omatcopy_k_rn" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_CT}" "" "omatcopy_k_ct" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_RT}" "ROWM" "omatcopy_k_rt" false "" "" false ${float_type})
if (NOT DEFINED ${float_char}OMATCOPY_CNC)
if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C")
set(${float_char}OMATCOPY_CNC ../arm/zomatcopy_cnc.c)
endif ()
endif ()
if (NOT DEFINED ${float_char}OMATCOPY_RNC)
if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C")
set(${float_char}OMATCOPY_RNC ../arm/zomatcopy_rnc.c)
endif ()
endif ()
if (NOT DEFINED ${float_char}OMATCOPY_CTC)
if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C")
set(${float_char}OMATCOPY_CTC ../arm/zomatcopy_ctc.c)
endif ()
endif ()
if (NOT DEFINED ${float_char}OMATCOPY_RTC)
if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C")
set(${float_char}OMATCOPY_RTC ../arm/zomatcopy_rtc.c)
endif ()
endif ()
if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C")
GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_CNC}" "CONJ" "omatcopy_k_cnc" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_RNC}" "CONJ;ROWM" "omatcopy_k_rnc" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_CTC}" "CONJ" "omatcopy_k_ctc" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_RTC}" "CONJ;ROWM" "omatcopy_k_rtc" false "" "" false ${float_type})
endif()
#imatcopy
if (NOT DEFINED ${float_char}IMATCOPY_CN)
if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C")
set(${float_char}IMATCOPY_CN ../generic/zimatcopy_cn.c)
else ()
set(${float_char}IMATCOPY_CN ../generic/imatcopy_cn.c)
endif ()
endif ()
if (NOT DEFINED ${float_char}IMATCOPY_RN)
if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C")
set(${float_char}IMATCOPY_RN ../generic/zimatcopy_rn.c)
else ()
set(${float_char}IMATCOPY_RN ../generic/imatcopy_rn.c)
endif ()
endif ()
if (NOT DEFINED ${float_char}IMATCOPY_CT)
if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C")
set(${float_char}IMATCOPY_CT ../generic/zimatcopy_ct.c)
else ()
set(${float_char}IMATCOPY_CT ../generic/imatcopy_ct.c)
endif ()
endif ()
if (NOT DEFINED ${float_char}IMATCOPY_RT)
if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C")
set(${float_char}IMATCOPY_RT ../generic/zimatcopy_rt.c)
else ()
set(${float_char}IMATCOPY_RT ../generic/imatcopy_rt.c)
endif ()
endif ()
GenerateNamedObjects("${KERNELDIR}/${${float_char}IMATCOPY_CN}" "" "imatcopy_k_cn" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}IMATCOPY_RN}" "ROWM" "imatcopy_k_rn" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}IMATCOPY_CT}" "" "imatcopy_k_ct" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}IMATCOPY_RT}" "ROWM" "imatcopy_k_rt" false "" "" false ${float_type})
if (NOT DEFINED ${float_char}IMATCOPY_CNC)
if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C")
set(${float_char}IMATCOPY_CNC ../generic/zimatcopy_cnc.c)
endif ()
endif ()
if (NOT DEFINED ${float_char}IMATCOPY_RNC)
if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C")
set(${float_char}IMATCOPY_RNC ../generic/zimatcopy_rnc.c)
endif ()
endif ()
if (NOT DEFINED ${float_char}IMATCOPY_CTC)
if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C")
set(${float_char}IMATCOPY_CTC ../generic/zimatcopy_ctc.c)
endif ()
endif ()
if (NOT DEFINED ${float_char}IMATCOPY_RTC)
if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C")
set(${float_char}IMATCOPY_RTC ../generic/zimatcopy_rtc.c)
endif ()
endif ()
if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C")
GenerateNamedObjects("${KERNELDIR}/${${float_char}IMATCOPY_CNC}" "CONJ" "imatcopy_k_cnc" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}IMATCOPY_RNC}" "CONJ;ROWM" "imatcopy_k_rnc" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}IMATCOPY_CTC}" "CONJ" "imatcopy_k_ctc" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}IMATCOPY_RTC}" "CONJ;ROWM" "imatcopy_k_rtc" false "" "" false ${float_type})
endif()
#geadd
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEADD_KERNEL}" "" "geadd_k" false "" "" false ${float_type})
endforeach ()
# Makefile.LA
#DBLASOBJS += dneg_tcopy$(TSUFFIX).$(SUFFIX) dlaswp_ncopy$(TSUFFIX).$(SUFFIX)
add_library(kernel OBJECT ${OPENBLAS_SRC})

View File

@ -3640,7 +3640,7 @@ ifndef DGEADD_K
DGEADD_K = ../generic/geadd.c DGEADD_K = ../generic/geadd.c
endif endif
$(KDIR)dgeadd_k$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEADD_K) $(KDIR)dgeadd_k$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DGEADD_K)
$(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX -UROWM $< -o $@ $(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX -UROWM $< -o $@
ifndef CGEADD_K ifndef CGEADD_K

View File

@ -38,13 +38,16 @@ int CNAME(BLASLONG n, FLOAT alpha_r, FLOAT alpha_i, FLOAT *x, BLASLONG inc_x, FL
BLASLONG ix,iy; BLASLONG ix,iy;
FLOAT temp; FLOAT temp;
BLASLONG inc_x2;
BLASLONG inc_y2;
if ( n < 0 ) return(0); if ( n < 0 ) return(0);
ix = 0; ix = 0;
iy = 0; iy = 0;
BLASLONG inc_x2 = 2 * inc_x; inc_x2 = 2 * inc_x;
BLASLONG inc_y2 = 2 * inc_y; inc_y2 = 2 * inc_y;
if ( beta_r == 0.0 && beta_i == 0.0) if ( beta_r == 0.0 && beta_i == 0.0)
{ {

View File

@ -41,6 +41,8 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i,
{ {
BLASLONG i=0; BLASLONG i=0;
BLASLONG ix,iy; BLASLONG ix,iy;
BLASLONG inc_x2;
BLASLONG inc_y2;
if ( n < 0 ) return(0); if ( n < 0 ) return(0);
if ( da_r == 0.0 && da_i == 0.0 ) return(0); if ( da_r == 0.0 && da_i == 0.0 ) return(0);
@ -48,8 +50,8 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i,
ix = 0; ix = 0;
iy = 0; iy = 0;
BLASLONG inc_x2 = 2 * inc_x; inc_x2 = 2 * inc_x;
BLASLONG inc_y2 = 2 * inc_y; inc_y2 = 2 * inc_y;
while(i < n) while(i < n)
{ {

View File

@ -40,11 +40,13 @@ int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
{ {
BLASLONG i=0; BLASLONG i=0;
BLASLONG ix=0,iy=0; BLASLONG ix=0,iy=0;
BLASLONG inc_x2;
BLASLONG inc_y2;
if ( n < 0 ) return(0); if ( n < 0 ) return(0);
BLASLONG inc_x2 = 2 * inc_x; inc_x2 = 2 * inc_x;
BLASLONG inc_y2 = 2 * inc_y; inc_y2 = 2 * inc_y;
while(i < n) while(i < n)
{ {

View File

@ -35,25 +35,31 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**************************************************************************************/ **************************************************************************************/
#include "common.h" #include "common.h"
#include <complex.h>
#ifndef _MSC_VER
#include <complex.h>
FLOAT _Complex CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) FLOAT _Complex CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
#else
OPENBLAS_COMPLEX_FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
#endif
{ {
BLASLONG i=0; BLASLONG i=0;
BLASLONG ix=0,iy=0; BLASLONG ix=0,iy=0;
FLOAT dot[2]; FLOAT dot[2];
FLOAT _Complex result; OPENBLAS_COMPLEX_FLOAT result;
BLASLONG inc_x2;
BLASLONG inc_y2;
dot[0]=0.0; dot[0]=0.0;
dot[1]=0.0; dot[1]=0.0;
__real__ result = 0.0 ; CREAL(result) = 0.0 ;
__imag__ result = 0.0 ; CIMAG(result) = 0.0 ;
if ( n < 1 ) return(result); if ( n < 1 ) return(result);
BLASLONG inc_x2 = 2 * inc_x ; inc_x2 = 2 * inc_x ;
BLASLONG inc_y2 = 2 * inc_y ; inc_y2 = 2 * inc_y ;
while(i < n) while(i < n)
{ {
@ -69,8 +75,8 @@ FLOAT _Complex CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG in
i++ ; i++ ;
} }
__real__ result = dot[0]; CREAL(result) = dot[0];
__imag__ result = dot[1]; CIMAG(result) = dot[1];
return(result); return(result);
} }

View File

@ -41,11 +41,13 @@ int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT
BLASLONG i=0; BLASLONG i=0;
BLASLONG ix=0,iy=0; BLASLONG ix=0,iy=0;
FLOAT temp[2]; FLOAT temp[2];
BLASLONG inc_x2;
BLASLONG inc_y2;
if ( n <= 0 ) return(0); if ( n <= 0 ) return(0);
BLASLONG inc_x2 = 2 * inc_x ; inc_x2 = 2 * inc_x ;
BLASLONG inc_y2 = 2 * inc_y ; inc_y2 = 2 * inc_y ;
while(i < n) while(i < n)
{ {

View File

@ -42,11 +42,13 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3, FLOAT dumm
BLASLONG i=0; BLASLONG i=0;
BLASLONG ix=0,iy=0; BLASLONG ix=0,iy=0;
FLOAT temp[2]; FLOAT temp[2];
BLASLONG inc_x2;
BLASLONG inc_y2;
if ( n < 0 ) return(0); if ( n < 0 ) return(0);
BLASLONG inc_x2 = 2 * inc_x; inc_x2 = 2 * inc_x;
BLASLONG inc_y2 = 2 * inc_y; inc_y2 = 2 * inc_y;
while(i < n) while(i < n)
{ {

View File

@ -550,6 +550,13 @@ gotoblas_t TABLE_NAME = {
zomatcopy_k_cnTS, zomatcopy_k_ctTS, zomatcopy_k_rnTS, zomatcopy_k_rtTS, zomatcopy_k_cnTS, zomatcopy_k_ctTS, zomatcopy_k_rnTS, zomatcopy_k_rtTS,
zomatcopy_k_cncTS, zomatcopy_k_ctcTS, zomatcopy_k_rncTS, zomatcopy_k_rtcTS, zomatcopy_k_cncTS, zomatcopy_k_ctcTS, zomatcopy_k_rncTS, zomatcopy_k_rtcTS,
simatcopy_k_cnTS, simatcopy_k_ctTS, simatcopy_k_rnTS, simatcopy_k_rtTS,
dimatcopy_k_cnTS, dimatcopy_k_ctTS, dimatcopy_k_rnTS, dimatcopy_k_rtTS,
cimatcopy_k_cnTS, cimatcopy_k_ctTS, cimatcopy_k_rnTS, cimatcopy_k_rtTS,
cimatcopy_k_cncTS, cimatcopy_k_ctcTS, cimatcopy_k_rncTS, cimatcopy_k_rtcTS,
zimatcopy_k_cnTS, zimatcopy_k_ctTS, zimatcopy_k_rnTS, zimatcopy_k_rtTS,
zimatcopy_k_cncTS, zimatcopy_k_ctcTS, zimatcopy_k_rncTS, zimatcopy_k_rtcTS,
sgeadd_kTS, dgeadd_kTS, cgeadd_kTS, zgeadd_kTS sgeadd_kTS, dgeadd_kTS, cgeadd_kTS, zgeadd_kTS
}; };

41
kernel/x86/cpuid_win.c Normal file
View File

@ -0,0 +1,41 @@
/***************************************************************************
Copyright (c) 2015, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
#if defined(_MSC_VER) && !defined(__clang__)
#include<intrin.h>
void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx)
{
int cpuInfo[4] = {-1};
__cpuid(cpuInfo, op);
*eax = cpuInfo[0];
*ebx = cpuInfo[1];
*ecx = cpuInfo[2];
*edx = cpuInfo[3];
}
#endif

View File

@ -119,11 +119,11 @@ XCOPYKERNEL = zcopy.S
endif endif
ifndef SDOTKERNEL ifndef SDOTKERNEL
SDOTKERNEL = ../generic/dot.c SDOTKERNEL = ../generic/dot.c
endif endif
ifndef DSDOTKERNEL ifndef DSDOTKERNEL
DSDOTKERNEL = ../generic/dot.c DSDOTKERNEL = ../generic/dot.c
endif endif
ifndef DDOTKERNEL ifndef DDOTKERNEL

View File

@ -155,5 +155,11 @@ XSYMV_L_KERNEL = ../generic/zsymv_k.c
ZHEMV_U_KERNEL = ../generic/zhemv_k.c ZHEMV_U_KERNEL = ../generic/zhemv_k.c
ZHEMV_L_KERNEL = ../generic/zhemv_k.c ZHEMV_L_KERNEL = ../generic/zhemv_k.c
LSAME_KERNEL = ../generic/lsame.c
SCABS_KERNEL = ../generic/cabs.c
DCABS_KERNEL = ../generic/cabs.c
QCABS_KERNEL = ../generic/cabs.c
#Dump kernel
CGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c CGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c
ZGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c ZGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c

View File

@ -7,7 +7,7 @@ static void dtrmm_kernel_4x8( BLASLONG n, FLOAT *alpha ,FLOAT *a, FLOAT *b, FLOA
static void dtrmm_kernel_4x8( BLASLONG n, FLOAT *alpha ,FLOAT *a, FLOAT *b, FLOAT *C0, FLOAT *C1, FLOAT *C2,FLOAT *C3, FLOAT *C4, FLOAT *C5,FLOAT *C6, FLOAT *C7) static void dtrmm_kernel_4x8( BLASLONG n, FLOAT *alpha ,FLOAT *a, FLOAT *b, FLOAT *C0, FLOAT *C1, FLOAT *C2,FLOAT *C3, FLOAT *C4, FLOAT *C5,FLOAT *C6, FLOAT *C7)
{ {
BLASLONG I = 0; BLASLONG i = 0;
BLASLONG temp1 = n * 8; BLASLONG temp1 = n * 8;
__asm__ __volatile__ __asm__ __volatile__
@ -110,7 +110,7 @@ static void dtrmm_kernel_4x8( BLASLONG n, FLOAT *alpha ,FLOAT *a, FLOAT *b, FLOA
: :
: :
"a" (I), // 0 "a" (i), // 0
"r" (temp1), // 1 "r" (temp1), // 1
"S" (a), // 2 "S" (a), // 2
"D" (b), // 3 "D" (b), // 3

View File

@ -1,5 +1,5 @@
/***************************************************************************** /*****************************************************************************
Copyright (c) 2011, Intel Corp. Copyright (c) 2014, Intel Corp.
All rights reserved. All rights reserved.
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without
@ -33,7 +33,7 @@
#include "lapacke_utils.h" #include "lapacke_utils.h"
lapack_int LAPACKE_cunmlq_work( int matrix_order, char side, char trans, lapack_int LAPACKE_cunmlq_work( int matrix_layout, char side, char trans,
lapack_int m, lapack_int n, lapack_int k, lapack_int m, lapack_int n, lapack_int k,
const lapack_complex_float* a, lapack_int lda, const lapack_complex_float* a, lapack_int lda,
const lapack_complex_float* tau, const lapack_complex_float* tau,
@ -41,20 +41,22 @@ lapack_int LAPACKE_cunmlq_work( int matrix_order, char side, char trans,
lapack_complex_float* work, lapack_int lwork ) lapack_complex_float* work, lapack_int lwork )
{ {
lapack_int info = 0; lapack_int info = 0;
if( matrix_order == LAPACK_COL_MAJOR ) { lapack_int r;
if( matrix_layout == LAPACK_COL_MAJOR ) {
/* Call LAPACK function and adjust info */ /* Call LAPACK function and adjust info */
LAPACK_cunmlq( &side, &trans, &m, &n, &k, a, &lda, tau, c, &ldc, work, LAPACK_cunmlq( &side, &trans, &m, &n, &k, a, &lda, tau, c, &ldc, work,
&lwork, &info ); &lwork, &info );
if( info < 0 ) { if( info < 0 ) {
info = info - 1; info = info - 1;
} }
} else if( matrix_order == LAPACK_ROW_MAJOR ) { } else if( matrix_layout == LAPACK_ROW_MAJOR ) {
r = LAPACKE_lsame( side, 'l' ) ? m : n;
lapack_int lda_t = MAX(1,k); lapack_int lda_t = MAX(1,k);
lapack_int ldc_t = MAX(1,m); lapack_int ldc_t = MAX(1,m);
lapack_complex_float* a_t = NULL; lapack_complex_float* a_t = NULL;
lapack_complex_float* c_t = NULL; lapack_complex_float* c_t = NULL;
/* Check leading dimension(s) */ /* Check leading dimension(s) */
if( lda < m ) { if( lda < r ) {
info = -8; info = -8;
LAPACKE_xerbla( "LAPACKE_cunmlq_work", info ); LAPACKE_xerbla( "LAPACKE_cunmlq_work", info );
return info; return info;
@ -84,8 +86,8 @@ lapack_int LAPACKE_cunmlq_work( int matrix_order, char side, char trans,
goto exit_level_1; goto exit_level_1;
} }
/* Transpose input matrices */ /* Transpose input matrices */
LAPACKE_cge_trans( matrix_order, k, m, a, lda, a_t, lda_t ); LAPACKE_cge_trans( matrix_layout, k, m, a, lda, a_t, lda_t );
LAPACKE_cge_trans( matrix_order, m, n, c, ldc, c_t, ldc_t ); LAPACKE_cge_trans( matrix_layout, m, n, c, ldc, c_t, ldc_t );
/* Call LAPACK function and adjust info */ /* Call LAPACK function and adjust info */
LAPACK_cunmlq( &side, &trans, &m, &n, &k, a_t, &lda_t, tau, c_t, &ldc_t, LAPACK_cunmlq( &side, &trans, &m, &n, &k, a_t, &lda_t, tau, c_t, &ldc_t,
work, &lwork, &info ); work, &lwork, &info );

View File

@ -1,5 +1,5 @@
/***************************************************************************** /*****************************************************************************
Copyright (c) 2011, Intel Corp. Copyright (c) 2014, Intel Corp.
All rights reserved. All rights reserved.
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without
@ -33,27 +33,29 @@
#include "lapacke_utils.h" #include "lapacke_utils.h"
lapack_int LAPACKE_dormlq_work( int matrix_order, char side, char trans, lapack_int LAPACKE_dormlq_work( int matrix_layout, char side, char trans,
lapack_int m, lapack_int n, lapack_int k, lapack_int m, lapack_int n, lapack_int k,
const double* a, lapack_int lda, const double* a, lapack_int lda,
const double* tau, double* c, lapack_int ldc, const double* tau, double* c, lapack_int ldc,
double* work, lapack_int lwork ) double* work, lapack_int lwork )
{ {
lapack_int info = 0; lapack_int info = 0;
lapack_int r;
lapack_int lda_t, ldc_t; lapack_int lda_t, ldc_t;
double *a_t = NULL, *c_t = NULL; double *a_t = NULL, *c_t = NULL;
if( matrix_order == LAPACK_COL_MAJOR ) { if( matrix_layout == LAPACK_COL_MAJOR ) {
/* Call LAPACK function and adjust info */ /* Call LAPACK function and adjust info */
LAPACK_dormlq( &side, &trans, &m, &n, &k, a, &lda, tau, c, &ldc, work, LAPACK_dormlq( &side, &trans, &m, &n, &k, a, &lda, tau, c, &ldc, work,
&lwork, &info ); &lwork, &info );
if( info < 0 ) { if( info < 0 ) {
info = info - 1; info = info - 1;
} }
} else if( matrix_order == LAPACK_ROW_MAJOR ) { } else if( matrix_layout == LAPACK_ROW_MAJOR ) {
r = LAPACKE_lsame( side, 'l' ) ? m : n;
lda_t = MAX(1,k); lda_t = MAX(1,k);
ldc_t = MAX(1,m); ldc_t = MAX(1,m);
/* Check leading dimension(s) */ /* Check leading dimension(s) */
if( lda < m ) { if( lda < r ) {
info = -8; info = -8;
LAPACKE_xerbla( "LAPACKE_dormlq_work", info ); LAPACKE_xerbla( "LAPACKE_dormlq_work", info );
return info; return info;
@ -81,8 +83,8 @@ lapack_int LAPACKE_dormlq_work( int matrix_order, char side, char trans,
goto exit_level_1; goto exit_level_1;
} }
/* Transpose input matrices */ /* Transpose input matrices */
LAPACKE_dge_trans( matrix_order, k, m, a, lda, a_t, lda_t ); LAPACKE_dge_trans( matrix_layout, k, m, a, lda, a_t, lda_t );
LAPACKE_dge_trans( matrix_order, m, n, c, ldc, c_t, ldc_t ); LAPACKE_dge_trans( matrix_layout, m, n, c, ldc, c_t, ldc_t );
/* Call LAPACK function and adjust info */ /* Call LAPACK function and adjust info */
LAPACK_dormlq( &side, &trans, &m, &n, &k, a_t, &lda_t, tau, c_t, &ldc_t, LAPACK_dormlq( &side, &trans, &m, &n, &k, a_t, &lda_t, tau, c_t, &ldc_t,
work, &lwork, &info ); work, &lwork, &info );

View File

@ -1,5 +1,5 @@
/***************************************************************************** /*****************************************************************************
Copyright (c) 2011, Intel Corp. Copyright (c) 2014, Intel Corp.
All rights reserved. All rights reserved.
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without
@ -33,27 +33,29 @@
#include "lapacke_utils.h" #include "lapacke_utils.h"
lapack_int LAPACKE_sormlq_work( int matrix_order, char side, char trans, lapack_int LAPACKE_sormlq_work( int matrix_layout, char side, char trans,
lapack_int m, lapack_int n, lapack_int k, lapack_int m, lapack_int n, lapack_int k,
const float* a, lapack_int lda, const float* a, lapack_int lda,
const float* tau, float* c, lapack_int ldc, const float* tau, float* c, lapack_int ldc,
float* work, lapack_int lwork ) float* work, lapack_int lwork )
{ {
lapack_int info = 0; lapack_int info = 0;
lapack_int r;
lapack_int lda_t, ldc_t; lapack_int lda_t, ldc_t;
float *a_t = NULL, *c_t = NULL; float *a_t = NULL, *c_t = NULL;
if( matrix_order == LAPACK_COL_MAJOR ) { if( matrix_layout == LAPACK_COL_MAJOR ) {
/* Call LAPACK function and adjust info */ /* Call LAPACK function and adjust info */
LAPACK_sormlq( &side, &trans, &m, &n, &k, a, &lda, tau, c, &ldc, work, LAPACK_sormlq( &side, &trans, &m, &n, &k, a, &lda, tau, c, &ldc, work,
&lwork, &info ); &lwork, &info );
if( info < 0 ) { if( info < 0 ) {
info = info - 1; info = info - 1;
} }
} else if( matrix_order == LAPACK_ROW_MAJOR ) { } else if( matrix_layout == LAPACK_ROW_MAJOR ) {
r = LAPACKE_lsame( side, 'l' ) ? m : n;
lda_t = MAX(1,k); lda_t = MAX(1,k);
ldc_t = MAX(1,m); ldc_t = MAX(1,m);
/* Check leading dimension(s) */ /* Check leading dimension(s) */
if( lda < m ) { if( lda < r ) {
info = -8; info = -8;
LAPACKE_xerbla( "LAPACKE_sormlq_work", info ); LAPACKE_xerbla( "LAPACKE_sormlq_work", info );
return info; return info;
@ -81,8 +83,8 @@ lapack_int LAPACKE_sormlq_work( int matrix_order, char side, char trans,
goto exit_level_1; goto exit_level_1;
} }
/* Transpose input matrices */ /* Transpose input matrices */
LAPACKE_sge_trans( matrix_order, k, m, a, lda, a_t, lda_t ); LAPACKE_sge_trans( matrix_layout, k, m, a, lda, a_t, lda_t );
LAPACKE_sge_trans( matrix_order, m, n, c, ldc, c_t, ldc_t ); LAPACKE_sge_trans( matrix_layout, m, n, c, ldc, c_t, ldc_t );
/* Call LAPACK function and adjust info */ /* Call LAPACK function and adjust info */
LAPACK_sormlq( &side, &trans, &m, &n, &k, a_t, &lda_t, tau, c_t, &ldc_t, LAPACK_sormlq( &side, &trans, &m, &n, &k, a_t, &lda_t, tau, c_t, &ldc_t,
work, &lwork, &info ); work, &lwork, &info );

View File

@ -1,5 +1,5 @@
/***************************************************************************** /*****************************************************************************
Copyright (c) 2011, Intel Corp. Copyright (c) 2014, Intel Corp.
All rights reserved. All rights reserved.
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without
@ -33,7 +33,7 @@
#include "lapacke_utils.h" #include "lapacke_utils.h"
lapack_int LAPACKE_zunmlq_work( int matrix_order, char side, char trans, lapack_int LAPACKE_zunmlq_work( int matrix_layout, char side, char trans,
lapack_int m, lapack_int n, lapack_int k, lapack_int m, lapack_int n, lapack_int k,
const lapack_complex_double* a, lapack_int lda, const lapack_complex_double* a, lapack_int lda,
const lapack_complex_double* tau, const lapack_complex_double* tau,
@ -41,20 +41,22 @@ lapack_int LAPACKE_zunmlq_work( int matrix_order, char side, char trans,
lapack_complex_double* work, lapack_int lwork ) lapack_complex_double* work, lapack_int lwork )
{ {
lapack_int info = 0; lapack_int info = 0;
if( matrix_order == LAPACK_COL_MAJOR ) { lapack_int r;
if( matrix_layout == LAPACK_COL_MAJOR ) {
/* Call LAPACK function and adjust info */ /* Call LAPACK function and adjust info */
LAPACK_zunmlq( &side, &trans, &m, &n, &k, a, &lda, tau, c, &ldc, work, LAPACK_zunmlq( &side, &trans, &m, &n, &k, a, &lda, tau, c, &ldc, work,
&lwork, &info ); &lwork, &info );
if( info < 0 ) { if( info < 0 ) {
info = info - 1; info = info - 1;
} }
} else if( matrix_order == LAPACK_ROW_MAJOR ) { } else if( matrix_layout == LAPACK_ROW_MAJOR ) {
r = LAPACKE_lsame( side, 'l' ) ? m : n;
lapack_int lda_t = MAX(1,k); lapack_int lda_t = MAX(1,k);
lapack_int ldc_t = MAX(1,m); lapack_int ldc_t = MAX(1,m);
lapack_complex_double* a_t = NULL; lapack_complex_double* a_t = NULL;
lapack_complex_double* c_t = NULL; lapack_complex_double* c_t = NULL;
/* Check leading dimension(s) */ /* Check leading dimension(s) */
if( lda < m ) { if( lda < r ) {
info = -8; info = -8;
LAPACKE_xerbla( "LAPACKE_zunmlq_work", info ); LAPACKE_xerbla( "LAPACKE_zunmlq_work", info );
return info; return info;
@ -84,8 +86,8 @@ lapack_int LAPACKE_zunmlq_work( int matrix_order, char side, char trans,
goto exit_level_1; goto exit_level_1;
} }
/* Transpose input matrices */ /* Transpose input matrices */
LAPACKE_zge_trans( matrix_order, k, m, a, lda, a_t, lda_t ); LAPACKE_zge_trans( matrix_layout, k, m, a, lda, a_t, lda_t );
LAPACKE_zge_trans( matrix_order, m, n, c, ldc, c_t, ldc_t ); LAPACKE_zge_trans( matrix_layout, m, n, c, ldc, c_t, ldc_t );
/* Call LAPACK function and adjust info */ /* Call LAPACK function and adjust info */
LAPACK_zunmlq( &side, &trans, &m, &n, &k, a_t, &lda_t, tau, c_t, &ldc_t, LAPACK_zunmlq( &side, &trans, &m, &n, &k, a_t, &lda_t, tau, c_t, &ldc_t,
work, &lwork, &info ); work, &lwork, &info );

98
lapack/CMakeLists.txt Normal file
View File

@ -0,0 +1,98 @@
include_directories(${CMAKE_SOURCE_DIR})
set(LAPACK_SOURCES
getrf/getrf_single.c
potrf/potrf_U_single.c
potrf/potrf_L_single.c
lauum/lauum_U_single.c
lauum/lauum_L_single.c
)
# add a 'z' to filename for complex version
set(LAPACK_MANGLED_SOURCES
getf2/getf2_k.c
lauu2/lauu2_U.c
lauu2/lauu2_L.c
potf2/potf2_U.c
potf2/potf2_L.c
)
# sources that need TRANS set
# this has a 'z' version
set(TRANS_SOURCES
getrs/getrs_single.c
)
# sources that need UNIT set
# these do NOT have a z version
set(UNIT_SOURCES
trtri/trtri_U_single.c
trtri/trtri_L_single.c
)
# these have a 'z' version
set(UNIT_SOURCES2
trti2/trti2_U.c
trti2/trti2_L.c
)
GenerateNamedObjects("${LAPACK_SOURCES}")
GenerateNamedObjects("${LAPACK_MANGLED_SOURCES}" "" "" false "" "" false 3)
# TODO: laswp needs arch specific code
GenerateNamedObjects("laswp/generic/laswp_k.c" "" "laswp_plus" false "" "" false 3)
GenerateNamedObjects("laswp/generic/laswp_k.c" "MINUS" "laswp_minus" false "" "" false 3)
if (SMP)
if (USE_OPENMP)
set(GETRF_SRC getrf/getrf_parallel_omp.c)
else ()
set(GETRF_SRC getrf/getrf_parallel.c)
endif ()
# these do not have 'z' versions
set(PARALLEL_SOURCES
${GETRF_SRC}
lauum/lauum_U_parallel.c
lauum/lauum_L_parallel.c
potrf/potrf_U_parallel.c
potrf/potrf_L_parallel.c
)
# this has a z version
list(APPEND TRANS_SOURCES
getrs/getrs_parallel.c
)
# these do NOT have a z version
list(APPEND UNIT_SOURCES
trtri/trtri_U_parallel.c
trtri/trtri_L_parallel.c
)
GenerateNamedObjects("${PARALLEL_SOURCES}")
endif ()
foreach (float_type ${FLOAT_TYPES})
if (${float_type} STREQUAL "COMPLEX" OR ${float_type} STREQUAL "ZCOMPLEX")
foreach (trans_src ${TRANS_SOURCES})
string(REGEX MATCH "[a-z]/([a-z]+_)([a-z]+)" op_name ${trans_src})
string(REPLACE "/" "/z" ztrans_src ${trans_src})
GenerateNamedObjects("${ztrans_src}" "TRANS=1" "${CMAKE_MATCH_1}N_${CMAKE_MATCH_2}" false "" "" false ${float_type})
GenerateNamedObjects("${ztrans_src}" "TRANS=2" "${CMAKE_MATCH_1}T_${CMAKE_MATCH_2}" false "" "" false ${float_type})
GenerateNamedObjects("${ztrans_src}" "TRANS=3" "${CMAKE_MATCH_1}R_${CMAKE_MATCH_2}" false "" "" false ${float_type})
GenerateNamedObjects("${ztrans_src}" "TRANS=4" "${CMAKE_MATCH_1}C_${CMAKE_MATCH_2}" false "" "" false ${float_type})
endforeach ()
else ()
GenerateCombinationObjects("${TRANS_SOURCES}" "TRANS" "N" "" 4 "" false ${float_type})
endif ()
endforeach ()
GenerateCombinationObjects("${UNIT_SOURCES}" "UNIT" "N" "" 4)
GenerateCombinationObjects("${UNIT_SOURCES2}" "UNIT" "N" "" 0 "" "" 3)
add_library(lapack OBJECT ${OPENBLAS_SRC})

View File

@ -67,7 +67,7 @@ double sqrt(double);
#undef GETRF_FACTOR #undef GETRF_FACTOR
#define GETRF_FACTOR 1.00 #define GETRF_FACTOR 1.00
static inline BLASLONG FORMULA1(BLASLONG M, BLASLONG N, BLASLONG IS, BLASLONG BK, BLASLONG T) { static __inline BLASLONG FORMULA1(BLASLONG M, BLASLONG N, BLASLONG IS, BLASLONG BK, BLASLONG T) {
double m = (double)(M - IS - BK); double m = (double)(M - IS - BK);
double n = (double)(N - IS - BK); double n = (double)(N - IS - BK);
@ -373,7 +373,11 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
BLASLONG num_cpu; BLASLONG num_cpu;
#ifdef _MSC_VER
BLASLONG flag[MAX_CPU_NUMBER * CACHE_LINE_SIZE];
#else
volatile BLASLONG flag[MAX_CPU_NUMBER * CACHE_LINE_SIZE] __attribute__((aligned(128))); volatile BLASLONG flag[MAX_CPU_NUMBER * CACHE_LINE_SIZE] __attribute__((aligned(128)));
#endif
#ifndef COMPLEX #ifndef COMPLEX
#ifdef XDOUBLE #ifdef XDOUBLE

View File

@ -59,7 +59,8 @@ typedef int blasint;
extension since version 3.0. If neither are available, use a compatible extension since version 3.0. If neither are available, use a compatible
structure as fallback (see Clause 6.2.5.13 of the C99 standard). */ structure as fallback (see Clause 6.2.5.13 of the C99 standard). */
#if (defined(__STDC_IEC_559_COMPLEX__) || __STDC_VERSION__ >= 199901L || \ #if (defined(__STDC_IEC_559_COMPLEX__) || __STDC_VERSION__ >= 199901L || \
(__GNUC__ >= 3 && !defined(__cplusplus))) (__GNUC__ >= 3 && !defined(__cplusplus)) || \
_MSC_VER >= 1800) // Visual Studio 2013 supports complex
#define OPENBLAS_COMPLEX_C99 #define OPENBLAS_COMPLEX_C99
#ifndef __cplusplus #ifndef __cplusplus
#include <complex.h> #include <complex.h>

View File

@ -43,7 +43,7 @@
#if !defined(XDOUBLE) || !defined(QUAD_PRECISION) #if !defined(XDOUBLE) || !defined(QUAD_PRECISION)
static inline void SYMCOPY_L(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ static __inline void SYMCOPY_L(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
BLASLONG is, js; BLASLONG is, js;
FLOAT *aa1, *aa2; FLOAT *aa1, *aa2;
@ -141,7 +141,7 @@ static inline void SYMCOPY_L(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
} }
} }
static inline void SYMCOPY_U(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ static __inline void SYMCOPY_U(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
BLASLONG is, js; BLASLONG is, js;
FLOAT *aa1, *aa2; FLOAT *aa1, *aa2;
@ -232,7 +232,7 @@ static inline void SYMCOPY_U(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
} }
static inline void ZSYMCOPY_L(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ static __inline void ZSYMCOPY_L(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
BLASLONG is, js; BLASLONG is, js;
FLOAT *aa1, *aa2; FLOAT *aa1, *aa2;
@ -362,7 +362,7 @@ static inline void ZSYMCOPY_L(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
} }
} }
static inline void ZSYMCOPY_U(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ static __inline void ZSYMCOPY_U(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
BLASLONG is, js; BLASLONG is, js;
FLOAT *aa1, *aa2; FLOAT *aa1, *aa2;
@ -486,7 +486,7 @@ static inline void ZSYMCOPY_U(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
} }
} }
static inline void ZHEMCOPY_L(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ static __inline void ZHEMCOPY_L(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
BLASLONG is, js; BLASLONG is, js;
FLOAT *aa1, *aa2; FLOAT *aa1, *aa2;
@ -613,7 +613,7 @@ static inline void ZHEMCOPY_L(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
} }
} }
static inline void ZHEMCOPY_U(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ static __inline void ZHEMCOPY_U(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
BLASLONG is, js; BLASLONG is, js;
FLOAT *aa1, *aa2; FLOAT *aa1, *aa2;
@ -735,7 +735,7 @@ static inline void ZHEMCOPY_U(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
} }
static inline void ZHEMCOPY_M(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ static __inline void ZHEMCOPY_M(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
BLASLONG is, js; BLASLONG is, js;
FLOAT *aa1, *aa2; FLOAT *aa1, *aa2;
@ -862,7 +862,7 @@ static inline void ZHEMCOPY_M(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
} }
} }
static inline void ZHEMCOPY_V(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ static __inline void ZHEMCOPY_V(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
BLASLONG is, js; BLASLONG is, js;
FLOAT *aa1, *aa2; FLOAT *aa1, *aa2;
@ -984,7 +984,7 @@ static inline void ZHEMCOPY_V(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
} }
static inline void TRMCOPY_NL(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ static __inline void TRMCOPY_NL(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
BLASLONG is, js; BLASLONG is, js;
FLOAT *aa1, *aa2; FLOAT *aa1, *aa2;
@ -1082,7 +1082,7 @@ static inline void TRMCOPY_NL(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
} }
} }
static inline void TRMCOPY_TL(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ static __inline void TRMCOPY_TL(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
BLASLONG is, js; BLASLONG is, js;
FLOAT *aa1, *aa2; FLOAT *aa1, *aa2;
@ -1180,7 +1180,7 @@ static inline void TRMCOPY_TL(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
} }
} }
static inline void TRMCOPY_NU(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ static __inline void TRMCOPY_NU(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
BLASLONG is, js; BLASLONG is, js;
FLOAT *aa1, *aa2; FLOAT *aa1, *aa2;
@ -1270,7 +1270,7 @@ static inline void TRMCOPY_NU(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
} }
} }
static inline void TRMCOPY_TU(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ static __inline void TRMCOPY_TU(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
BLASLONG is, js; BLASLONG is, js;
FLOAT *aa1, *aa2; FLOAT *aa1, *aa2;
@ -1360,7 +1360,7 @@ static inline void TRMCOPY_TU(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
} }
} }
static inline void ZTRMCOPY_NL(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ static __inline void ZTRMCOPY_NL(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
BLASLONG is, js; BLASLONG is, js;
FLOAT *aa1, *aa2; FLOAT *aa1, *aa2;
@ -1490,7 +1490,7 @@ static inline void ZTRMCOPY_NL(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
} }
} }
static inline void ZTRMCOPY_TL(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ static __inline void ZTRMCOPY_TL(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
BLASLONG is, js; BLASLONG is, js;
FLOAT *aa1, *aa2; FLOAT *aa1, *aa2;
@ -1620,7 +1620,7 @@ static inline void ZTRMCOPY_TL(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
} }
} }
static inline void ZTRMCOPY_NU(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ static __inline void ZTRMCOPY_NU(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
BLASLONG is, js; BLASLONG is, js;
FLOAT *aa1, *aa2; FLOAT *aa1, *aa2;
@ -1744,7 +1744,7 @@ static inline void ZTRMCOPY_NU(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
} }
} }
static inline void ZTRMCOPY_TU(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ static __inline void ZTRMCOPY_TU(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
BLASLONG is, js; BLASLONG is, js;
FLOAT *aa1, *aa2; FLOAT *aa1, *aa2;

38
test/CMakeLists.txt Normal file
View File

@ -0,0 +1,38 @@
include_directories(${CMAKE_SOURCE_DIR})
enable_language(Fortran)
set(OpenBLAS_Tests
sblat1 sblat2 sblat3
dblat1 dblat2 dblat3
cblat1 cblat2 cblat3
zblat1 zblat2 zblat3)
foreach(test_bin ${OpenBLAS_Tests})
add_executable(${test_bin} ${test_bin}.f)
target_link_libraries(${test_bin} ${OpenBLAS_LIBNAME}_static)
endforeach()
# $1 exec, $2 input, $3 output_result
FILE(WRITE ${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh
"rm -f $3\n"
"$1 < $2\n"
"grep -q FATAL $3\n"
"if [ $? -eq 0 ]; then\n"
"echo Error\n"
"exit 1\n"
"else\n"
"exit 0\n"
"fi\n"
)
set(float_types s d c z)
foreach(float_type ${float_types})
string(TOUPPER ${float_type} float_type_upper)
add_test(NAME "${float_type}blas1"
COMMAND "${CMAKE_CURRENT_BINARY_DIR}/${float_type}blat1")
add_test(NAME "${float_type}blas2"
COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/${float_type}blat2" "${PROJECT_SOURCE_DIR}/test/${float_type}blat2.dat" ${float_type_upper}BLAT2.SUMM)
add_test(NAME "${float_type}blas3"
COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/${float_type}blat3" "${PROJECT_SOURCE_DIR}/test/${float_type}blat3.dat" ${float_type_upper}BLAT3.SUMM)
endforeach()