Use cmake to build OpenBLAS GENERIC Target on MSVC x86 64-bit.
Disable CBLAS and LAPACK.
This commit is contained in:
parent
ab0a0a75fc
commit
f874465bb8
|
@ -15,11 +15,13 @@ enable_language(C)
|
||||||
set(OpenBLAS_LIBNAME openblas)
|
set(OpenBLAS_LIBNAME openblas)
|
||||||
|
|
||||||
#######
|
#######
|
||||||
option(BUILD_WITHOUT_LAPACK "Without LAPACK and LAPACKE (Only BLAS and CBLAS)" ON)
|
option(BUILD_WITHOUT_LAPACK "Without LAPACK and LAPACKE (Only BLAS or CBLAS)" ON)
|
||||||
|
option(BUILD_WITHOUT_CBLAS "Without CBLAS" ON)
|
||||||
option(BUILD_DEBUG "Build Debug Version" OFF)
|
option(BUILD_DEBUG "Build Debug Version" OFF)
|
||||||
#######
|
#######
|
||||||
if(BUILD_WITHOUT_LAPACK)
|
if(BUILD_WITHOUT_LAPACK)
|
||||||
set(NO_LAPACK 1)
|
set(NO_LAPACK 1)
|
||||||
|
set(NO_LAPACKE 1)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if(BUILD_DEBUG)
|
if(BUILD_DEBUG)
|
||||||
|
@ -27,6 +29,11 @@ set(CMAKE_BUILD_TYPE Debug)
|
||||||
else()
|
else()
|
||||||
set(CMAKE_BUILD_TYPE Release)
|
set(CMAKE_BUILD_TYPE Release)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
if(BUILD_WITHOUT_CBLAS)
|
||||||
|
set(NO_CBLAS 1)
|
||||||
|
endif()
|
||||||
|
|
||||||
#######
|
#######
|
||||||
|
|
||||||
|
|
||||||
|
@ -51,7 +58,6 @@ endif ()
|
||||||
|
|
||||||
set(SUBDIRS ${BLASDIRS})
|
set(SUBDIRS ${BLASDIRS})
|
||||||
if (NOT NO_LAPACK)
|
if (NOT NO_LAPACK)
|
||||||
message ("error 1")
|
|
||||||
list(APPEND SUBDIRS lapack)
|
list(APPEND SUBDIRS lapack)
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
|
@ -111,15 +117,21 @@ endforeach ()
|
||||||
# Can't just use lapack-netlib's CMake files, since they are set up to search for BLAS, build and install a binary. We just want to build a couple of lib files out of lapack and lapacke.
|
# Can't just use lapack-netlib's CMake files, since they are set up to search for BLAS, build and install a binary. We just want to build a couple of lib files out of lapack and lapacke.
|
||||||
# Not using add_subdirectory here because lapack-netlib already has its own CMakeLists.txt. Instead include a cmake script with the sources we want.
|
# Not using add_subdirectory here because lapack-netlib already has its own CMakeLists.txt. Instead include a cmake script with the sources we want.
|
||||||
if (NOT NOFORTRAN AND NOT NO_LAPACK)
|
if (NOT NOFORTRAN AND NOT NO_LAPACK)
|
||||||
message ("error 2")
|
|
||||||
include("${CMAKE_SOURCE_DIR}/cmake/lapack.cmake")
|
include("${CMAKE_SOURCE_DIR}/cmake/lapack.cmake")
|
||||||
if (NOT NO_LAPACKE)
|
if (NOT NO_LAPACKE)
|
||||||
include("${CMAKE_SOURCE_DIR}/cmake/lapacke.cmake")
|
include("${CMAKE_SOURCE_DIR}/cmake/lapacke.cmake")
|
||||||
endif ()
|
endif ()
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
|
#Only generate .def for dll on MSVC
|
||||||
|
if(MSVC)
|
||||||
|
set(OpenBLAS_DEF_FILE "${PROJECT_BINARY_DIR}/openblas.def")
|
||||||
|
endif()
|
||||||
|
|
||||||
# add objects to the openblas lib
|
# add objects to the openblas lib
|
||||||
add_library(${OpenBLAS_LIBNAME} SHARED ${LA_SOURCES} ${LAPACKE_SOURCES} ${TARGET_OBJS})
|
add_library(${OpenBLAS_LIBNAME} SHARED ${LA_SOURCES} ${LAPACKE_SOURCES} ${TARGET_OBJS} ${PROJECT_BINARY_DIR}/openblas.def)
|
||||||
|
|
||||||
|
include("${CMAKE_SOURCE_DIR}/cmake/export.cmake")
|
||||||
|
|
||||||
#only build shared library for MSVC
|
#only build shared library for MSVC
|
||||||
if(NOT MSVC)
|
if(NOT MSVC)
|
||||||
|
|
|
@ -0,0 +1,60 @@
|
||||||
|
|
||||||
|
#Only generate .def for dll on MSVC
|
||||||
|
if(MSVC)
|
||||||
|
|
||||||
|
set_source_files_properties(${OpenBLAS_DEF_FILE} PROPERTIES GENERATED 1)
|
||||||
|
|
||||||
|
if (NOT DEFINED ARCH)
|
||||||
|
set(ARCH_IN "x86_64")
|
||||||
|
else()
|
||||||
|
set(ARCH_IN ${ARCH})
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if (${CORE} STREQUAL "generic")
|
||||||
|
set(ARCH_IN "GENERIC")
|
||||||
|
endif ()
|
||||||
|
|
||||||
|
if (NOT DEFINED EXPRECISION)
|
||||||
|
set(EXPRECISION_IN 0)
|
||||||
|
else()
|
||||||
|
set(EXPRECISION_IN ${EXPRECISION})
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if (NOT DEFINED NO_CBLAS)
|
||||||
|
set(NO_CBLAS_IN 0)
|
||||||
|
else()
|
||||||
|
set(NO_CBLAS_IN ${NO_CBLAS})
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if (NOT DEFINED NO_LAPACK)
|
||||||
|
set(NO_LAPACK_IN 0)
|
||||||
|
else()
|
||||||
|
set(NO_LAPACK_IN ${NO_LAPACK})
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if (NOT DEFINED NO_LAPACKE)
|
||||||
|
set(NO_LAPACKE_IN 0)
|
||||||
|
else()
|
||||||
|
set(NO_LAPACKE_IN ${NO_LAPACKE})
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if (NOT DEFINED NEED2UNDERSCORES)
|
||||||
|
set(NEED2UNDERSCORES_IN 0)
|
||||||
|
else()
|
||||||
|
set(NEED2UNDERSCORES_IN ${NEED2UNDERSCORES})
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if (NOT DEFINED ONLY_CBLAS)
|
||||||
|
set(ONLY_CBLAS_IN 0)
|
||||||
|
else()
|
||||||
|
set(ONLY_CBLAS_IN ${ONLY_CBLAS})
|
||||||
|
endif()
|
||||||
|
|
||||||
|
add_custom_command(
|
||||||
|
TARGET ${OpenBLAS_LIBNAME} PRE_LINK
|
||||||
|
COMMAND perl
|
||||||
|
ARGS "${CMAKE_SOURCE_DIR}/exports/gensymbol" "win2k" "${ARCH_IN}" "dummy" "${EXPRECISION_IN}" "${NO_CBLAS_IN}" "${NO_LAPACK_IN}" "${NO_LAPACKE_IN}" "${NEED2UNDERSCORES_IN}" "${ONLY_CBLAS_IN}" "${SYMBOLPREFIX}" "${SYMBOLSUFFIX}" > "${PROJECT_BINARY_DIR}/openblas.def"
|
||||||
|
COMMENT "Create openblas.def file"
|
||||||
|
VERBATIM)
|
||||||
|
|
||||||
|
endif()
|
|
@ -25,7 +25,10 @@ if (MSVC)
|
||||||
include(CMakeForceCompiler)
|
include(CMakeForceCompiler)
|
||||||
CMAKE_FORCE_Fortran_COMPILER(gfortran GNU)
|
CMAKE_FORCE_Fortran_COMPILER(gfortran GNU)
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
|
if (NOT NO_LAPACK)
|
||||||
enable_language(Fortran)
|
enable_language(Fortran)
|
||||||
|
endif()
|
||||||
|
|
||||||
if (NOT ONLY_CBLAS)
|
if (NOT ONLY_CBLAS)
|
||||||
# N.B. f_check is not cross-platform, so instead try to use CMake variables
|
# N.B. f_check is not cross-platform, so instead try to use CMake variables
|
||||||
|
|
|
@ -99,10 +99,10 @@ macro(SetDefaultL1)
|
||||||
set(QGEMVTKERNEL gemv_t.S)
|
set(QGEMVTKERNEL gemv_t.S)
|
||||||
set(XGEMVNKERNEL zgemv_n.S)
|
set(XGEMVNKERNEL zgemv_n.S)
|
||||||
set(XGEMVTKERNEL zgemv_t.S)
|
set(XGEMVTKERNEL zgemv_t.S)
|
||||||
set(SCABS_KERNEL cabs.S)
|
set(SCABS_KERNEL ../generic/cabs.c)
|
||||||
set(DCABS_KERNEL cabs.S)
|
set(DCABS_KERNEL ../generic/cabs.S)
|
||||||
set(QCABS_KERNEL cabs.S)
|
set(QCABS_KERNEL ../generic/cabs.S)
|
||||||
set(LSAME_KERNEL lsame.S)
|
set(LSAME_KERNEL ../generic/lsame.c)
|
||||||
set(SAXPBYKERNEL ../arm/axpby.c)
|
set(SAXPBYKERNEL ../arm/axpby.c)
|
||||||
set(DAXPBYKERNEL ../arm/axpby.c)
|
set(DAXPBYKERNEL ../arm/axpby.c)
|
||||||
set(CAXPBYKERNEL ../arm/zaxpby.c)
|
set(CAXPBYKERNEL ../arm/zaxpby.c)
|
||||||
|
@ -156,3 +156,10 @@ macro(SetDefaultL2)
|
||||||
set(XHEMV_V_KERNEL ../generic/zhemv_k.c)
|
set(XHEMV_V_KERNEL ../generic/zhemv_k.c)
|
||||||
set(XHEMV_M_KERNEL ../generic/zhemv_k.c)
|
set(XHEMV_M_KERNEL ../generic/zhemv_k.c)
|
||||||
endmacro ()
|
endmacro ()
|
||||||
|
|
||||||
|
macro(SetDefaultL3)
|
||||||
|
set(SGEADD_KERNEL ../generic/geadd.c)
|
||||||
|
set(DGEADD_KERNEL ../generic/geadd.c)
|
||||||
|
set(CGEADD_KERNEL ../generic/zgeadd.c)
|
||||||
|
set(ZGEADD_KERNEL ../generic/zgeadd.c)
|
||||||
|
endmacro ()
|
|
@ -66,6 +66,11 @@ if (NOT MSVC)
|
||||||
list(APPEND GETARCH_SRC ${CMAKE_SOURCE_DIR}/cpuid.S)
|
list(APPEND GETARCH_SRC ${CMAKE_SOURCE_DIR}/cpuid.S)
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
|
if (MSVC)
|
||||||
|
#Use generic for MSVC now
|
||||||
|
set(GETARCH_FLAGS ${GETARCH_FLAGS} -DFORCE_GENERIC)
|
||||||
|
endif()
|
||||||
|
|
||||||
set(GETARCH_DIR "${PROJECT_BINARY_DIR}/getarch_build")
|
set(GETARCH_DIR "${PROJECT_BINARY_DIR}/getarch_build")
|
||||||
set(GETARCH_BIN "getarch${CMAKE_EXECUTABLE_SUFFIX}")
|
set(GETARCH_BIN "getarch${CMAKE_EXECUTABLE_SUFFIX}")
|
||||||
file(MAKE_DIRECTORY ${GETARCH_DIR})
|
file(MAKE_DIRECTORY ${GETARCH_DIR})
|
||||||
|
@ -73,7 +78,7 @@ try_compile(GETARCH_RESULT ${GETARCH_DIR}
|
||||||
SOURCES ${GETARCH_SRC}
|
SOURCES ${GETARCH_SRC}
|
||||||
COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} -I${CMAKE_SOURCE_DIR}
|
COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} -I${CMAKE_SOURCE_DIR}
|
||||||
OUTPUT_VARIABLE GETARCH_LOG
|
OUTPUT_VARIABLE GETARCH_LOG
|
||||||
COPY_FILE ${GETARCH_BIN}
|
COPY_FILE ${PROJECT_BINARY_DIR}/${GETARCH_BIN}
|
||||||
)
|
)
|
||||||
|
|
||||||
message(STATUS "Running getarch")
|
message(STATUS "Running getarch")
|
||||||
|
@ -95,7 +100,7 @@ try_compile(GETARCH2_RESULT ${GETARCH2_DIR}
|
||||||
SOURCES ${CMAKE_SOURCE_DIR}/getarch_2nd.c
|
SOURCES ${CMAKE_SOURCE_DIR}/getarch_2nd.c
|
||||||
COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} ${GETARCH2_FLAGS} -I${CMAKE_SOURCE_DIR}
|
COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} ${GETARCH2_FLAGS} -I${CMAKE_SOURCE_DIR}
|
||||||
OUTPUT_VARIABLE GETARCH2_LOG
|
OUTPUT_VARIABLE GETARCH2_LOG
|
||||||
COPY_FILE ${GETARCH2_BIN}
|
COPY_FILE ${PROJECT_BINARY_DIR}/${GETARCH2_BIN}
|
||||||
)
|
)
|
||||||
|
|
||||||
# use the cmake binary w/ the -E param to run a shell command in a cross-platform way
|
# use the cmake binary w/ the -E param to run a shell command in a cross-platform way
|
||||||
|
|
|
@ -420,6 +420,21 @@ if (ONLY_CBLAS)
|
||||||
set(LIB_COMPONENTS CBLAS)
|
set(LIB_COMPONENTS CBLAS)
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
|
|
||||||
|
# For GEMM3M
|
||||||
|
set(USE_GEMM3M 0)
|
||||||
|
|
||||||
|
if (DEFINED ARCH)
|
||||||
|
if (${ARCH} STREQUAL "x86" OR ${ARCH} STREQUAL "x86_64" OR ${ARCH} STREQUAL "ia64" OR ${ARCH} STREQUAL "MIPS")
|
||||||
|
set(USE_GEMM3M 1)
|
||||||
|
endif ()
|
||||||
|
|
||||||
|
if (${CORE} STREQUAL "generic")
|
||||||
|
set(USE_GEMM3M 0)
|
||||||
|
endif ()
|
||||||
|
endif ()
|
||||||
|
|
||||||
|
|
||||||
#export OSNAME
|
#export OSNAME
|
||||||
#export ARCH
|
#export ARCH
|
||||||
#export CORE
|
#export CORE
|
||||||
|
|
|
@ -102,6 +102,7 @@ endfunction ()
|
||||||
# 1 - compiles the sources for non-complex types only (SINGLE/DOUBLE)
|
# 1 - compiles the sources for non-complex types only (SINGLE/DOUBLE)
|
||||||
# 2 - compiles for complex types only (COMPLEX/DOUBLE COMPLEX)
|
# 2 - compiles for complex types only (COMPLEX/DOUBLE COMPLEX)
|
||||||
# 3 - compiles for all types, but changes source names for complex by prepending z (e.g. axpy.c becomes zaxpy.c)
|
# 3 - compiles for all types, but changes source names for complex by prepending z (e.g. axpy.c becomes zaxpy.c)
|
||||||
|
# 4 - compiles for complex types only, but changes source names for complex by prepending z (e.g. hemv.c becomes zhemv.c)
|
||||||
# STRING - compiles only the given type (e.g. DOUBLE)
|
# STRING - compiles only the given type (e.g. DOUBLE)
|
||||||
function(GenerateNamedObjects sources_in)
|
function(GenerateNamedObjects sources_in)
|
||||||
|
|
||||||
|
@ -151,6 +152,9 @@ function(GenerateNamedObjects sources_in)
|
||||||
set(complex_only true)
|
set(complex_only true)
|
||||||
elseif (${ARGV7} EQUAL 3)
|
elseif (${ARGV7} EQUAL 3)
|
||||||
set(mangle_complex_sources true)
|
set(mangle_complex_sources true)
|
||||||
|
elseif (${ARGV7} EQUAL 4)
|
||||||
|
set(mangle_complex_sources true)
|
||||||
|
set(complex_only true)
|
||||||
elseif (NOT ${ARGV7} EQUAL 0)
|
elseif (NOT ${ARGV7} EQUAL 0)
|
||||||
set(float_list ${ARGV7})
|
set(float_list ${ARGV7})
|
||||||
endif ()
|
endif ()
|
||||||
|
|
45
common.h
45
common.h
|
@ -296,13 +296,6 @@ typedef int blasint;
|
||||||
#define COMPSIZE 2
|
#define COMPSIZE 2
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(C_PGI) || defined(C_SUN)
|
|
||||||
#define CREAL(X) (*((FLOAT *)&X + 0))
|
|
||||||
#define CIMAG(X) (*((FLOAT *)&X + 1))
|
|
||||||
#else
|
|
||||||
#define CREAL __real__
|
|
||||||
#define CIMAG __imag__
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#define Address_H(x) (((x)+(1<<15))>>16)
|
#define Address_H(x) (((x)+(1<<15))>>16)
|
||||||
#define Address_L(x) ((x)-((Address_H(x))<<16))
|
#define Address_L(x) ((x)-((Address_H(x))<<16))
|
||||||
|
@ -464,17 +457,49 @@ typedef char* env_var_t;
|
||||||
extension since version 3.0. If neither are available, use a compatible
|
extension since version 3.0. If neither are available, use a compatible
|
||||||
structure as fallback (see Clause 6.2.5.13 of the C99 standard). */
|
structure as fallback (see Clause 6.2.5.13 of the C99 standard). */
|
||||||
#if (defined(__STDC_IEC_559_COMPLEX__) || __STDC_VERSION__ >= 199901L || \
|
#if (defined(__STDC_IEC_559_COMPLEX__) || __STDC_VERSION__ >= 199901L || \
|
||||||
(__GNUC__ >= 3 && !defined(__cplusplus)))
|
(__GNUC__ >= 3 && !defined(__cplusplus)) || \
|
||||||
|
_MSC_VER >= 1800) // Visual Studio 2013 supports complex
|
||||||
#define OPENBLAS_COMPLEX_C99
|
#define OPENBLAS_COMPLEX_C99
|
||||||
typedef float _Complex openblas_complex_float;
|
typedef float _Complex openblas_complex_float;
|
||||||
typedef double _Complex openblas_complex_double;
|
typedef double _Complex openblas_complex_double;
|
||||||
typedef xdouble _Complex openblas_complex_xdouble;
|
typedef xdouble _Complex openblas_complex_xdouble;
|
||||||
|
#define openblas_make_complex_float(real, imag) ((real) + ((imag) * _Complex_I))
|
||||||
|
#define openblas_make_complex_double(real, imag) ((real) + ((imag) * _Complex_I))
|
||||||
|
#define openblas_make_complex_xdouble(real, imag) ((real) + ((imag) * _Complex_I))
|
||||||
#else
|
#else
|
||||||
#define OPENBLAS_COMPLEX_STRUCT
|
#define OPENBLAS_COMPLEX_STRUCT
|
||||||
typedef struct { float real, imag; } openblas_complex_float;
|
typedef struct { float real, imag; } openblas_complex_float;
|
||||||
typedef struct { double real, imag; } openblas_complex_double;
|
typedef struct { double real, imag; } openblas_complex_double;
|
||||||
typedef struct { xdouble real, imag; } openblas_complex_xdouble;
|
typedef struct { xdouble real, imag; } openblas_complex_xdouble;
|
||||||
|
#define openblas_make_complex_float(real, imag) {(real), (imag)}
|
||||||
|
#define openblas_make_complex_double(real, imag) {(real), (imag)}
|
||||||
|
#define openblas_make_complex_xdouble(real, imag) {(real), (imag)}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef XDOUBLE
|
||||||
|
#define OPENBLAS_COMPLEX_FLOAT openblas_complex_xdouble
|
||||||
|
#define OPENBLAS_MAKE_COMPLEX_FLOAT(r,i) openblas_make_complex_xdouble(r,i)
|
||||||
|
#elif defined(DOUBLE)
|
||||||
|
#define OPENBLAS_COMPLEX_FLOAT openblas_complex_double
|
||||||
|
#define OPENBLAS_MAKE_COMPLEX_FLOAT(r,i) openblas_make_complex_double(r,i)
|
||||||
|
#else
|
||||||
|
#define OPENBLAS_COMPLEX_FLOAT openblas_complex_float
|
||||||
|
#define OPENBLAS_MAKE_COMPLEX_FLOAT(r,i) openblas_make_complex_float(r,i)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(C_PGI) || defined(C_SUN)
|
||||||
|
#define CREAL(X) (*((FLOAT *)&X + 0))
|
||||||
|
#define CIMAG(X) (*((FLOAT *)&X + 1))
|
||||||
|
#else
|
||||||
|
#ifdef OPENBLAS_COMPLEX_STRUCT
|
||||||
|
#define CREAL(Z) ((Z).real)
|
||||||
|
#define CIMAG(Z) ((Z).imag)
|
||||||
|
#else
|
||||||
|
#define CREAL __real__
|
||||||
|
#define CIMAG __imag__
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif // ASSEMBLER
|
#endif // ASSEMBLER
|
||||||
|
|
||||||
#ifndef IFLUSH
|
#ifndef IFLUSH
|
||||||
|
@ -491,6 +516,10 @@ typedef char* env_var_t;
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if defined(C_MSVC)
|
||||||
|
#define inline __inline
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifndef ASSEMBLER
|
#ifndef ASSEMBLER
|
||||||
|
|
||||||
#ifndef MIN
|
#ifndef MIN
|
||||||
|
|
|
@ -41,6 +41,10 @@
|
||||||
|
|
||||||
#ifndef ASSEMBLER
|
#ifndef ASSEMBLER
|
||||||
|
|
||||||
|
#ifdef C_MSVC
|
||||||
|
#include <intrin.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef C_SUN
|
#ifdef C_SUN
|
||||||
#define __asm__ __asm
|
#define __asm__ __asm
|
||||||
#define __volatile__
|
#define __volatile__
|
||||||
|
@ -61,30 +65,39 @@
|
||||||
|
|
||||||
static void __inline blas_lock(volatile BLASULONG *address){
|
static void __inline blas_lock(volatile BLASULONG *address){
|
||||||
|
|
||||||
int ret;
|
BLASULONG ret;
|
||||||
|
|
||||||
do {
|
do {
|
||||||
while (*address) {YIELDING;};
|
while (*address) {YIELDING;};
|
||||||
|
|
||||||
|
#ifndef C_MSVC
|
||||||
__asm__ __volatile__(
|
__asm__ __volatile__(
|
||||||
"xchgl %0, %1\n"
|
"xchgl %0, %1\n"
|
||||||
: "=r"(ret), "=m"(*address)
|
: "=r"(ret), "=m"(*address)
|
||||||
: "0"(1), "m"(*address)
|
: "0"(1), "m"(*address)
|
||||||
: "memory");
|
: "memory");
|
||||||
|
#else
|
||||||
|
ret=InterlockedExchange64((volatile LONG64 *)(address), 1);
|
||||||
|
#endif
|
||||||
} while (ret);
|
} while (ret);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline BLASULONG rpcc(void){
|
static __inline BLASULONG rpcc(void){
|
||||||
|
#ifdef C_MSVC
|
||||||
|
return __rdtsc();
|
||||||
|
#else
|
||||||
BLASULONG a, d;
|
BLASULONG a, d;
|
||||||
|
|
||||||
__asm__ __volatile__ ("rdtsc" : "=a" (a), "=d" (d));
|
__asm__ __volatile__ ("rdtsc" : "=a" (a), "=d" (d));
|
||||||
|
|
||||||
return ((BLASULONG)a + ((BLASULONG)d << 32));
|
return ((BLASULONG)a + ((BLASULONG)d << 32));
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
#define RPCC64BIT
|
#define RPCC64BIT
|
||||||
|
|
||||||
|
#ifndef C_MSVC
|
||||||
static __inline BLASULONG getstackaddr(void){
|
static __inline BLASULONG getstackaddr(void){
|
||||||
BLASULONG addr;
|
BLASULONG addr;
|
||||||
|
|
||||||
|
@ -93,22 +106,32 @@ static __inline BLASULONG getstackaddr(void){
|
||||||
|
|
||||||
return addr;
|
return addr;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
static __inline void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx){
|
static __inline void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx){
|
||||||
|
|
||||||
|
#ifdef C_MSVC
|
||||||
|
int cpuinfo[4];
|
||||||
|
__cpuid(cpuinfo, op);
|
||||||
|
*eax=cpuinfo[0];
|
||||||
|
*ebx=cpuinfo[1];
|
||||||
|
*ecx=cpuinfo[2];
|
||||||
|
*edx=cpuinfo[3];
|
||||||
|
#else
|
||||||
__asm__ __volatile__("cpuid"
|
__asm__ __volatile__("cpuid"
|
||||||
: "=a" (*eax),
|
: "=a" (*eax),
|
||||||
"=b" (*ebx),
|
"=b" (*ebx),
|
||||||
"=c" (*ecx),
|
"=c" (*ecx),
|
||||||
"=d" (*edx)
|
"=d" (*edx)
|
||||||
: "0" (op));
|
: "0" (op));
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
#define WHEREAMI
|
#define WHEREAMI
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static inline int WhereAmI(void){
|
static __inline int WhereAmI(void){
|
||||||
int eax, ebx, ecx, edx;
|
int eax, ebx, ecx, edx;
|
||||||
int apicid;
|
int apicid;
|
||||||
|
|
||||||
|
@ -150,10 +173,14 @@ static inline int WhereAmI(void){
|
||||||
#define GET_IMAGE_CANCEL
|
#define GET_IMAGE_CANCEL
|
||||||
|
|
||||||
#ifdef SMP
|
#ifdef SMP
|
||||||
#ifdef USE64BITINT
|
#if defined(USE64BITINT)
|
||||||
static __inline blasint blas_quickdivide(blasint x, blasint y){
|
static __inline blasint blas_quickdivide(blasint x, blasint y){
|
||||||
return x / y;
|
return x / y;
|
||||||
}
|
}
|
||||||
|
#elif defined (C_MSVC)
|
||||||
|
static __inline BLASLONG blas_quickdivide(BLASLONG x, BLASLONG y){
|
||||||
|
return x / y;
|
||||||
|
}
|
||||||
#else
|
#else
|
||||||
extern unsigned int blas_quick_divide_table[];
|
extern unsigned int blas_quick_divide_table[];
|
||||||
|
|
||||||
|
|
|
@ -46,12 +46,28 @@ set(NU_SMP_SOURCES
|
||||||
tbmv_thread.c
|
tbmv_thread.c
|
||||||
)
|
)
|
||||||
|
|
||||||
|
set(ULVM_COMPLEX_SOURCES
|
||||||
|
hbmv_k.c
|
||||||
|
hpmv_k.c
|
||||||
|
hpr_k.c
|
||||||
|
hpr2_k.c
|
||||||
|
her_k.c
|
||||||
|
her2_k.c
|
||||||
|
)
|
||||||
|
|
||||||
# objects that need LOWER set
|
# objects that need LOWER set
|
||||||
GenerateCombinationObjects("${UL_SOURCES}" "LOWER" "U" "" 1 "" "" 3)
|
GenerateCombinationObjects("${UL_SOURCES}" "LOWER" "U" "" 1 "" "" 3)
|
||||||
|
|
||||||
# gbmv uses a lowercase n and t
|
# gbmv uses a lowercase n and t
|
||||||
GenerateNamedObjects("gbmv_k.c" "" "gbmv_n" false "" "" "" 3)
|
GenerateNamedObjects("gbmv_k.c" "" "gbmv_n" false "" "" "" 3)
|
||||||
GenerateNamedObjects("gbmv_k.c" "TRANS" "gbmv_t" false "" "" "" 3)
|
GenerateNamedObjects("gbmv_k.c" "TRANS" "gbmv_t" false "" "" "" 3)
|
||||||
|
# c/zgbmv
|
||||||
|
GenerateNamedObjects("zgbmv_k.c" "CONJ" "gbmv_r" false "" "" "" 2)
|
||||||
|
GenerateNamedObjects("zgbmv_k.c" "TRANS;CONJ" "gbmv_c" false "" "" "" 2)
|
||||||
|
GenerateNamedObjects("zgbmv_k.c" "XCONJ" "gbmv_o" false "" "" "" 2)
|
||||||
|
GenerateNamedObjects("zgbmv_k.c" "TRANS;XCONJ" "gbmv_u" false "" "" "" 2)
|
||||||
|
GenerateNamedObjects("zgbmv_k.c" "CONJ;XCONJ" "gbmv_s" false "" "" "" 2)
|
||||||
|
GenerateNamedObjects("zgbmv_k.c" "TRANS;CONJ;XCONJ" "gbmv_d" false "" "" "" 2)
|
||||||
|
|
||||||
# special defines for complex
|
# special defines for complex
|
||||||
foreach (float_type ${FLOAT_TYPES})
|
foreach (float_type ${FLOAT_TYPES})
|
||||||
|
@ -82,6 +98,14 @@ foreach (float_type ${FLOAT_TYPES})
|
||||||
GenerateCombinationObjects("z${l_source}" "UNIT" "N" "TRANSA=4" 0 "${op_name}_CU" false ${float_type})
|
GenerateCombinationObjects("z${l_source}" "UNIT" "N" "TRANSA=4" 0 "${op_name}_CU" false ${float_type})
|
||||||
endforeach ()
|
endforeach ()
|
||||||
|
|
||||||
|
foreach (ulvm_source ${ULVM_COMPLEX_SOURCES})
|
||||||
|
string(REGEX MATCH "[a-z0-9]+" op_name ${ulvm_source})
|
||||||
|
GenerateNamedObjects("z${ulvm_source}" "" "${op_name}_U" false "" "" false ${float_type})
|
||||||
|
GenerateNamedObjects("z${ulvm_source}" "LOWER" "${op_name}_L" false "" "" false ${float_type})
|
||||||
|
GenerateNamedObjects("z${ulvm_source}" "HEMVREV" "${op_name}_V" false "" "" false ${float_type})
|
||||||
|
GenerateNamedObjects("z${ulvm_source}" "LOWER;HEMVREV" "${op_name}_M" false "" "" false ${float_type})
|
||||||
|
endforeach()
|
||||||
|
|
||||||
if (SMP)
|
if (SMP)
|
||||||
|
|
||||||
GenerateNamedObjects("gemv_thread.c" "CONJ" "gemv_thread_r" false "" "" false ${float_type})
|
GenerateNamedObjects("gemv_thread.c" "CONJ" "gemv_thread_r" false "" "" false ${float_type})
|
||||||
|
@ -103,6 +127,41 @@ foreach (float_type ${FLOAT_TYPES})
|
||||||
GenerateNamedObjects("ger_thread.c" "XCONJ" "ger_thread_V" false "" "" false ${float_type})
|
GenerateNamedObjects("ger_thread.c" "XCONJ" "ger_thread_V" false "" "" false ${float_type})
|
||||||
GenerateNamedObjects("ger_thread.c" "XCONJ;CONJ" "ger_thread_D" false "" "" false ${float_type})
|
GenerateNamedObjects("ger_thread.c" "XCONJ;CONJ" "ger_thread_D" false "" "" false ${float_type})
|
||||||
|
|
||||||
|
GenerateNamedObjects("sbmv_thread.c" "HEMV" "hbmv_thread_U" false "" "" false ${float_type})
|
||||||
|
GenerateNamedObjects("sbmv_thread.c" "HEMV;LOWER" "hbmv_thread_L" false "" "" false ${float_type})
|
||||||
|
GenerateNamedObjects("sbmv_thread.c" "HEMVREV" "hbmv_thread_V" false "" "" false ${float_type})
|
||||||
|
GenerateNamedObjects("sbmv_thread.c" "LOWER;HEMVREV" "hbmv_thread_M" false "" "" false ${float_type})
|
||||||
|
|
||||||
|
GenerateNamedObjects("spmv_thread.c" "HEMV" "hpmv_thread_U" false "" "" false ${float_type})
|
||||||
|
GenerateNamedObjects("spmv_thread.c" "HEMV;LOWER" "hpmv_thread_L" false "" "" false ${float_type})
|
||||||
|
GenerateNamedObjects("spmv_thread.c" "HEMVREV" "hpmv_thread_V" false "" "" false ${float_type})
|
||||||
|
GenerateNamedObjects("spmv_thread.c" "LOWER;HEMVREV" "hpmv_thread_M" false "" "" false ${float_type})
|
||||||
|
|
||||||
|
GenerateNamedObjects("spr_thread.c" "HEMV" "hpr_thread_U" false "" "" false ${float_type})
|
||||||
|
GenerateNamedObjects("spr_thread.c" "HEMV;LOWER" "hpr_thread_L" false "" "" false ${float_type})
|
||||||
|
GenerateNamedObjects("spr_thread.c" "HEMVREV" "hpr_thread_V" false "" "" false ${float_type})
|
||||||
|
GenerateNamedObjects("spr_thread.c" "LOWER;HEMVREV" "hpr_thread_M" false "" "" false ${float_type})
|
||||||
|
|
||||||
|
GenerateNamedObjects("spr2_thread.c" "HEMV" "hpr2_thread_U" false "" "" false ${float_type})
|
||||||
|
GenerateNamedObjects("spr2_thread.c" "HEMV;LOWER" "hpr2_thread_L" false "" "" false ${float_type})
|
||||||
|
GenerateNamedObjects("spr2_thread.c" "HEMVREV" "hpr2_thread_V" false "" "" false ${float_type})
|
||||||
|
GenerateNamedObjects("spr2_thread.c" "LOWER;HEMVREV" "hpr2_thread_M" false "" "" false ${float_type})
|
||||||
|
|
||||||
|
GenerateNamedObjects("symv_thread.c" "HEMV" "hemv_thread_U" false "" "" false ${float_type})
|
||||||
|
GenerateNamedObjects("symv_thread.c" "HEMV;LOWER" "hemv_thread_L" false "" "" false ${float_type})
|
||||||
|
GenerateNamedObjects("symv_thread.c" "HEMVREV" "hemv_thread_V" false "" "" false ${float_type})
|
||||||
|
GenerateNamedObjects("symv_thread.c" "LOWER;HEMVREV" "hemv_thread_M" false "" "" false ${float_type})
|
||||||
|
|
||||||
|
GenerateNamedObjects("syr_thread.c" "HER" "her_thread_U" false "" "" false ${float_type})
|
||||||
|
GenerateNamedObjects("syr_thread.c" "HER;LOWER" "her_thread_L" false "" "" false ${float_type})
|
||||||
|
GenerateNamedObjects("syr_thread.c" "HEMVREV" "her_thread_V" false "" "" false ${float_type})
|
||||||
|
GenerateNamedObjects("syr_thread.c" "LOWER;HEMVREV" "her_thread_M" false "" "" false ${float_type})
|
||||||
|
|
||||||
|
GenerateNamedObjects("syr2_thread.c" "HER2" "her2_thread_U" false "" "" false ${float_type})
|
||||||
|
GenerateNamedObjects("syr2_thread.c" "HER2;LOWER" "her2_thread_L" false "" "" false ${float_type})
|
||||||
|
GenerateNamedObjects("syr2_thread.c" "HEMVREV" "her2_thread_V" false "" "" false ${float_type})
|
||||||
|
GenerateNamedObjects("syr2_thread.c" "LOWER;HEMVREV" "her2_thread_M" false "" "" false ${float_type})
|
||||||
|
|
||||||
foreach (nu_smp_src ${NU_SMP_SOURCES})
|
foreach (nu_smp_src ${NU_SMP_SOURCES})
|
||||||
string(REGEX MATCH "[a-z]+_[a-z]+" op_name ${nu_smp_src})
|
string(REGEX MATCH "[a-z]+_[a-z]+" op_name ${nu_smp_src})
|
||||||
GenerateCombinationObjects("${nu_smp_src}" "LOWER;UNIT" "U;N" "TRANSA=1" 0 "${op_name}_N" false ${float_type})
|
GenerateCombinationObjects("${nu_smp_src}" "LOWER;UNIT" "U;N" "TRANSA=1" 0 "${op_name}_N" false ${float_type})
|
||||||
|
|
|
@ -64,7 +64,7 @@ static int gbmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F
|
||||||
#ifndef COMPLEX
|
#ifndef COMPLEX
|
||||||
FLOAT result;
|
FLOAT result;
|
||||||
#else
|
#else
|
||||||
FLOAT _Complex result;
|
OPENBLAS_COMPLEX_FLOAT result;
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -60,7 +60,7 @@ static int sbmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F
|
||||||
#ifndef COMPLEX
|
#ifndef COMPLEX
|
||||||
FLOAT result;
|
FLOAT result;
|
||||||
#else
|
#else
|
||||||
FLOAT _Complex result;
|
OPENBLAS_COMPLEX_FLOAT result;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
a = (FLOAT *)args -> a;
|
a = (FLOAT *)args -> a;
|
||||||
|
|
|
@ -60,7 +60,7 @@ static int spmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F
|
||||||
#ifndef COMPLEX
|
#ifndef COMPLEX
|
||||||
FLOAT result;
|
FLOAT result;
|
||||||
#else
|
#else
|
||||||
FLOAT _Complex result;
|
OPENBLAS_COMPLEX_FLOAT result;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
a = (FLOAT *)args -> a;
|
a = (FLOAT *)args -> a;
|
||||||
|
|
|
@ -76,7 +76,7 @@ static int trmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F
|
||||||
#ifndef COMPLEX
|
#ifndef COMPLEX
|
||||||
FLOAT result;
|
FLOAT result;
|
||||||
#else
|
#else
|
||||||
FLOAT _Complex result;
|
OPENBLAS_COMPLEX_FLOAT result;
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -81,7 +81,7 @@ static int tpmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F
|
||||||
#ifndef COMPLEX
|
#ifndef COMPLEX
|
||||||
FLOAT result;
|
FLOAT result;
|
||||||
#else
|
#else
|
||||||
FLOAT _Complex result;
|
OPENBLAS_COMPLEX_FLOAT result;
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -87,7 +87,7 @@ static int trmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F
|
||||||
#ifndef COMPLEX
|
#ifndef COMPLEX
|
||||||
FLOAT result;
|
FLOAT result;
|
||||||
#else
|
#else
|
||||||
FLOAT _Complex result;
|
OPENBLAS_COMPLEX_FLOAT result;
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -77,7 +77,7 @@ void CNAME(BLASLONG m, BLASLONG n, BLASLONG ku, BLASLONG kl, FLOAT alpha_r, FLOA
|
||||||
FLOAT *bufferY = gemvbuffer;
|
FLOAT *bufferY = gemvbuffer;
|
||||||
FLOAT *bufferX = gemvbuffer;
|
FLOAT *bufferX = gemvbuffer;
|
||||||
#ifdef TRANS
|
#ifdef TRANS
|
||||||
FLOAT _Complex temp;
|
OPENBLAS_COMPLEX_FLOAT temp;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (incy != 1) {
|
if (incy != 1) {
|
||||||
|
|
|
@ -56,6 +56,8 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i,
|
||||||
FLOAT *bufferX = sbmvbuffer;
|
FLOAT *bufferX = sbmvbuffer;
|
||||||
FLOAT temp[2];
|
FLOAT temp[2];
|
||||||
|
|
||||||
|
OPENBLAS_COMPLEX_FLOAT result;
|
||||||
|
|
||||||
if (incy != 1) {
|
if (incy != 1) {
|
||||||
Y = bufferY;
|
Y = bufferY;
|
||||||
bufferX = (FLOAT *)(((BLASLONG)bufferY + n * sizeof(FLOAT) * COMPSIZE + 4095) & ~4095);
|
bufferX = (FLOAT *)(((BLASLONG)bufferY + n * sizeof(FLOAT) * COMPSIZE + 4095) & ~4095);
|
||||||
|
@ -93,7 +95,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i,
|
||||||
Y[i * 2 + 1] += alpha_r * temp[1] + alpha_i * temp[0];
|
Y[i * 2 + 1] += alpha_r * temp[1] + alpha_i * temp[0];
|
||||||
|
|
||||||
if (length > 0) {
|
if (length > 0) {
|
||||||
FLOAT _Complex result = DOTC_K(length, a + offset * COMPSIZE, 1, X + (i - length) * COMPSIZE, 1);
|
result = DOTC_K(length, a + offset * COMPSIZE, 1, X + (i - length) * COMPSIZE, 1);
|
||||||
|
|
||||||
Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result);
|
Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result);
|
||||||
Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result);
|
Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result);
|
||||||
|
@ -118,7 +120,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i,
|
||||||
Y[i * 2 + 1] += alpha_r * temp[1] + alpha_i * temp[0];
|
Y[i * 2 + 1] += alpha_r * temp[1] + alpha_i * temp[0];
|
||||||
|
|
||||||
if (length > 0) {
|
if (length > 0) {
|
||||||
FLOAT _Complex result = DOTC_K(length, a + COMPSIZE, 1, X + (i + 1) * COMPSIZE, 1);
|
result = DOTC_K(length, a + COMPSIZE, 1, X + (i + 1) * COMPSIZE, 1);
|
||||||
|
|
||||||
Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result);
|
Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result);
|
||||||
Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result);
|
Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result);
|
||||||
|
@ -143,7 +145,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i,
|
||||||
Y[i * 2 + 1] += alpha_r * temp[1] + alpha_i * temp[0];
|
Y[i * 2 + 1] += alpha_r * temp[1] + alpha_i * temp[0];
|
||||||
|
|
||||||
if (length > 0) {
|
if (length > 0) {
|
||||||
FLOAT _Complex result = DOTU_K(length, a + offset * COMPSIZE, 1, X + (i - length) * COMPSIZE, 1);
|
result = DOTU_K(length, a + offset * COMPSIZE, 1, X + (i - length) * COMPSIZE, 1);
|
||||||
|
|
||||||
Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result);
|
Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result);
|
||||||
Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result);
|
Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result);
|
||||||
|
@ -168,7 +170,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i,
|
||||||
Y[i * 2 + 1] += alpha_r * temp[1] + alpha_i * temp[0];
|
Y[i * 2 + 1] += alpha_r * temp[1] + alpha_i * temp[0];
|
||||||
|
|
||||||
if (length > 0) {
|
if (length > 0) {
|
||||||
FLOAT _Complex result = DOTU_K(length, a + COMPSIZE, 1, X + (i + 1) * COMPSIZE, 1);
|
result = DOTU_K(length, a + COMPSIZE, 1, X + (i + 1) * COMPSIZE, 1);
|
||||||
|
|
||||||
Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result);
|
Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result);
|
||||||
Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result);
|
Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result);
|
||||||
|
|
|
@ -51,6 +51,8 @@ int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT alpha_i,
|
||||||
FLOAT *bufferX = gemvbuffer;
|
FLOAT *bufferX = gemvbuffer;
|
||||||
FLOAT temp[2];
|
FLOAT temp[2];
|
||||||
|
|
||||||
|
OPENBLAS_COMPLEX_FLOAT result;
|
||||||
|
|
||||||
if (incy != 1) {
|
if (incy != 1) {
|
||||||
Y = bufferY;
|
Y = bufferY;
|
||||||
bufferX = (FLOAT *)(((BLASLONG)bufferY + m * sizeof(FLOAT) * 2 + 4095) & ~4095);
|
bufferX = (FLOAT *)(((BLASLONG)bufferY + m * sizeof(FLOAT) * 2 + 4095) & ~4095);
|
||||||
|
@ -69,7 +71,7 @@ int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT alpha_i,
|
||||||
#ifndef HEMVREV
|
#ifndef HEMVREV
|
||||||
#ifndef LOWER
|
#ifndef LOWER
|
||||||
if (i > 0) {
|
if (i > 0) {
|
||||||
FLOAT _Complex result = DOTC_K(i, a, 1, X, 1);
|
result = DOTC_K(i, a, 1, X, 1);
|
||||||
|
|
||||||
Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result);
|
Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result);
|
||||||
Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result);
|
Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result);
|
||||||
|
@ -93,7 +95,7 @@ int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT alpha_i,
|
||||||
#else
|
#else
|
||||||
|
|
||||||
if (m - i > 1) {
|
if (m - i > 1) {
|
||||||
FLOAT _Complex result = DOTC_K(m - i - 1, a + (i + 1) * 2, 1, X + (i + 1) * 2, 1);
|
result = DOTC_K(m - i - 1, a + (i + 1) * 2, 1, X + (i + 1) * 2, 1);
|
||||||
|
|
||||||
Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result);
|
Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result);
|
||||||
Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result);
|
Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result);
|
||||||
|
@ -118,7 +120,7 @@ int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT alpha_i,
|
||||||
#else
|
#else
|
||||||
#ifndef LOWER
|
#ifndef LOWER
|
||||||
if (i > 0) {
|
if (i > 0) {
|
||||||
FLOAT _Complex result = DOTU_K(i, a, 1, X, 1);
|
result = DOTU_K(i, a, 1, X, 1);
|
||||||
|
|
||||||
Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result);
|
Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result);
|
||||||
Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result);
|
Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result);
|
||||||
|
@ -142,7 +144,7 @@ int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT alpha_i,
|
||||||
#else
|
#else
|
||||||
|
|
||||||
if (m - i > 1) {
|
if (m - i > 1) {
|
||||||
FLOAT _Complex result = DOTU_K(m - i - 1, a + (i + 1) * 2, 1, X + (i + 1) * 2, 1);
|
result = DOTU_K(m - i - 1, a + (i + 1) * 2, 1, X + (i + 1) * 2, 1);
|
||||||
|
|
||||||
Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result);
|
Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result);
|
||||||
Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result);
|
Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result);
|
||||||
|
|
|
@ -55,6 +55,8 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i,
|
||||||
FLOAT *bufferY = sbmvbuffer;
|
FLOAT *bufferY = sbmvbuffer;
|
||||||
FLOAT *bufferX = sbmvbuffer;
|
FLOAT *bufferX = sbmvbuffer;
|
||||||
|
|
||||||
|
OPENBLAS_COMPLEX_FLOAT result;
|
||||||
|
|
||||||
if (incy != 1) {
|
if (incy != 1) {
|
||||||
Y = bufferY;
|
Y = bufferY;
|
||||||
bufferX = (FLOAT *)(((BLASLONG)bufferY + n * sizeof(FLOAT) * COMPSIZE + 4095) & ~4095);
|
bufferX = (FLOAT *)(((BLASLONG)bufferY + n * sizeof(FLOAT) * COMPSIZE + 4095) & ~4095);
|
||||||
|
@ -83,7 +85,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i,
|
||||||
a + offset * COMPSIZE, 1, Y + (i - length) * COMPSIZE, 1, NULL, 0);
|
a + offset * COMPSIZE, 1, Y + (i - length) * COMPSIZE, 1, NULL, 0);
|
||||||
|
|
||||||
if (length > 0) {
|
if (length > 0) {
|
||||||
FLOAT _Complex result = DOTU_K(length, a + offset * COMPSIZE, 1, X + (i - length) * COMPSIZE, 1);
|
result = DOTU_K(length, a + offset * COMPSIZE, 1, X + (i - length) * COMPSIZE, 1);
|
||||||
|
|
||||||
Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result);
|
Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result);
|
||||||
Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result);
|
Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result);
|
||||||
|
@ -100,7 +102,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i,
|
||||||
a, 1, Y + i * COMPSIZE, 1, NULL, 0);
|
a, 1, Y + i * COMPSIZE, 1, NULL, 0);
|
||||||
|
|
||||||
if (length > 0) {
|
if (length > 0) {
|
||||||
FLOAT _Complex result = DOTU_K(length, a + COMPSIZE, 1, X + (i + 1) * COMPSIZE, 1);
|
result = DOTU_K(length, a + COMPSIZE, 1, X + (i + 1) * COMPSIZE, 1);
|
||||||
|
|
||||||
Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result);
|
Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result);
|
||||||
Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result);
|
Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result);
|
||||||
|
|
|
@ -49,7 +49,8 @@ int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT alpha_i,
|
||||||
FLOAT *gemvbuffer = (FLOAT *)buffer;
|
FLOAT *gemvbuffer = (FLOAT *)buffer;
|
||||||
FLOAT *bufferY = gemvbuffer;
|
FLOAT *bufferY = gemvbuffer;
|
||||||
FLOAT *bufferX = gemvbuffer;
|
FLOAT *bufferX = gemvbuffer;
|
||||||
FLOAT _Complex result;
|
|
||||||
|
OPENBLAS_COMPLEX_FLOAT result;
|
||||||
|
|
||||||
if (incy != 1) {
|
if (incy != 1) {
|
||||||
Y = bufferY;
|
Y = bufferY;
|
||||||
|
|
|
@ -49,7 +49,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG inc
|
||||||
FLOAT *B = b;
|
FLOAT *B = b;
|
||||||
BLASLONG length;
|
BLASLONG length;
|
||||||
#if (TRANSA == 2) || (TRANSA == 4)
|
#if (TRANSA == 2) || (TRANSA == 4)
|
||||||
FLOAT _Complex temp;
|
OPENBLAS_COMPLEX_FLOAT temp;
|
||||||
#endif
|
#endif
|
||||||
#ifndef UNIT
|
#ifndef UNIT
|
||||||
FLOAT atemp1, atemp2, btemp1, btemp2;
|
FLOAT atemp1, atemp2, btemp1, btemp2;
|
||||||
|
|
|
@ -49,7 +49,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG inc
|
||||||
FLOAT *B = b;
|
FLOAT *B = b;
|
||||||
BLASLONG length;
|
BLASLONG length;
|
||||||
#if (TRANSA == 2) || (TRANSA == 4)
|
#if (TRANSA == 2) || (TRANSA == 4)
|
||||||
FLOAT _Complex temp;
|
OPENBLAS_COMPLEX_FLOAT temp;
|
||||||
#endif
|
#endif
|
||||||
#ifndef UNIT
|
#ifndef UNIT
|
||||||
FLOAT atemp1, atemp2, btemp1, btemp2;
|
FLOAT atemp1, atemp2, btemp1, btemp2;
|
||||||
|
|
|
@ -49,7 +49,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG inc
|
||||||
FLOAT *B = b;
|
FLOAT *B = b;
|
||||||
BLASLONG length;
|
BLASLONG length;
|
||||||
#if (TRANSA == 2) || (TRANSA == 4)
|
#if (TRANSA == 2) || (TRANSA == 4)
|
||||||
FLOAT _Complex temp;
|
OPENBLAS_COMPLEX_FLOAT temp;
|
||||||
#endif
|
#endif
|
||||||
#ifndef UNIT
|
#ifndef UNIT
|
||||||
FLOAT ar, ai, br, bi, ratio, den;
|
FLOAT ar, ai, br, bi, ratio, den;
|
||||||
|
|
|
@ -49,7 +49,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG inc
|
||||||
FLOAT *B = b;
|
FLOAT *B = b;
|
||||||
BLASLONG length;
|
BLASLONG length;
|
||||||
#if (TRANSA == 2) || (TRANSA == 4)
|
#if (TRANSA == 2) || (TRANSA == 4)
|
||||||
FLOAT _Complex temp;
|
OPENBLAS_COMPLEX_FLOAT temp;
|
||||||
#endif
|
#endif
|
||||||
#ifndef UNIT
|
#ifndef UNIT
|
||||||
FLOAT ar, ai, br, bi, ratio, den;
|
FLOAT ar, ai, br, bi, ratio, den;
|
||||||
|
|
|
@ -44,7 +44,7 @@ int CNAME(BLASLONG m, FLOAT *a, FLOAT *b, BLASLONG incb, void *buffer){
|
||||||
|
|
||||||
BLASLONG i;
|
BLASLONG i;
|
||||||
#if (TRANSA == 2) || (TRANSA == 4)
|
#if (TRANSA == 2) || (TRANSA == 4)
|
||||||
FLOAT _Complex temp;
|
OPENBLAS_COMPLEX_FLOAT temp;
|
||||||
#endif
|
#endif
|
||||||
#ifndef UNIT
|
#ifndef UNIT
|
||||||
FLOAT atemp1, atemp2, btemp1, btemp2;
|
FLOAT atemp1, atemp2, btemp1, btemp2;
|
||||||
|
|
|
@ -44,7 +44,7 @@ int CNAME(BLASLONG m, FLOAT *a, FLOAT *b, BLASLONG incb, void *buffer){
|
||||||
|
|
||||||
BLASLONG i;
|
BLASLONG i;
|
||||||
#if (TRANSA == 2) || (TRANSA == 4)
|
#if (TRANSA == 2) || (TRANSA == 4)
|
||||||
FLOAT _Complex temp;
|
OPENBLAS_COMPLEX_FLOAT temp;
|
||||||
#endif
|
#endif
|
||||||
#ifndef UNIT
|
#ifndef UNIT
|
||||||
FLOAT atemp1, atemp2, btemp1, btemp2;
|
FLOAT atemp1, atemp2, btemp1, btemp2;
|
||||||
|
|
|
@ -46,7 +46,7 @@ int CNAME(BLASLONG m, FLOAT *a, FLOAT *b, BLASLONG incb, void *buffer){
|
||||||
|
|
||||||
BLASLONG i;
|
BLASLONG i;
|
||||||
#if (TRANSA == 2) || (TRANSA == 4)
|
#if (TRANSA == 2) || (TRANSA == 4)
|
||||||
FLOAT _Complex result;
|
OPENBLAS_COMPLEX_FLOAT result;
|
||||||
#endif
|
#endif
|
||||||
#ifndef UNIT
|
#ifndef UNIT
|
||||||
FLOAT ar, ai, br, bi, ratio, den;
|
FLOAT ar, ai, br, bi, ratio, den;
|
||||||
|
|
|
@ -44,7 +44,7 @@ int CNAME(BLASLONG m, FLOAT *a, FLOAT *b, BLASLONG incb, void *buffer){
|
||||||
|
|
||||||
BLASLONG i;
|
BLASLONG i;
|
||||||
#if (TRANSA == 2) || (TRANSA == 4)
|
#if (TRANSA == 2) || (TRANSA == 4)
|
||||||
FLOAT _Complex result;
|
OPENBLAS_COMPLEX_FLOAT result;
|
||||||
#endif
|
#endif
|
||||||
#ifndef UNIT
|
#ifndef UNIT
|
||||||
FLOAT ar, ai, br, bi, ratio, den;
|
FLOAT ar, ai, br, bi, ratio, den;
|
||||||
|
|
|
@ -46,7 +46,7 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, FLOAT *bu
|
||||||
|
|
||||||
BLASLONG i, is, min_i;
|
BLASLONG i, is, min_i;
|
||||||
#if (TRANSA == 2) || (TRANSA == 4)
|
#if (TRANSA == 2) || (TRANSA == 4)
|
||||||
FLOAT _Complex temp;
|
OPENBLAS_COMPLEX_FLOAT temp;
|
||||||
#endif
|
#endif
|
||||||
#ifndef UNIT
|
#ifndef UNIT
|
||||||
FLOAT atemp1, atemp2, btemp1, btemp2;
|
FLOAT atemp1, atemp2, btemp1, btemp2;
|
||||||
|
|
|
@ -46,7 +46,7 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, FLOAT *bu
|
||||||
|
|
||||||
BLASLONG i, is, min_i;
|
BLASLONG i, is, min_i;
|
||||||
#if (TRANSA == 2) || (TRANSA == 4)
|
#if (TRANSA == 2) || (TRANSA == 4)
|
||||||
FLOAT _Complex temp;
|
OPENBLAS_COMPLEX_FLOAT temp;
|
||||||
#endif
|
#endif
|
||||||
#ifndef UNIT
|
#ifndef UNIT
|
||||||
FLOAT atemp1, atemp2, btemp1, btemp2;
|
FLOAT atemp1, atemp2, btemp1, btemp2;
|
||||||
|
|
|
@ -46,7 +46,7 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buf
|
||||||
|
|
||||||
BLASLONG i, is, min_i;
|
BLASLONG i, is, min_i;
|
||||||
#if (TRANSA == 2) || (TRANSA == 4)
|
#if (TRANSA == 2) || (TRANSA == 4)
|
||||||
FLOAT _Complex result;
|
OPENBLAS_COMPLEX_FLOAT result;
|
||||||
#endif
|
#endif
|
||||||
#ifndef UNIT
|
#ifndef UNIT
|
||||||
FLOAT ar, ai, br, bi, ratio, den;
|
FLOAT ar, ai, br, bi, ratio, den;
|
||||||
|
|
|
@ -46,7 +46,7 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buf
|
||||||
|
|
||||||
BLASLONG i, is, min_i;
|
BLASLONG i, is, min_i;
|
||||||
#if (TRANSA == 2) || (TRANSA == 4)
|
#if (TRANSA == 2) || (TRANSA == 4)
|
||||||
FLOAT _Complex result;
|
OPENBLAS_COMPLEX_FLOAT result;
|
||||||
#endif
|
#endif
|
||||||
#ifndef UNIT
|
#ifndef UNIT
|
||||||
FLOAT ar, ai, br, bi, ratio, den;
|
FLOAT ar, ai, br, bi, ratio, den;
|
||||||
|
|
|
@ -1,13 +1,5 @@
|
||||||
include_directories(${CMAKE_SOURCE_DIR})
|
include_directories(${CMAKE_SOURCE_DIR})
|
||||||
|
|
||||||
set(USE_GEMM3M 0)
|
|
||||||
|
|
||||||
if (DEFINED ARCH)
|
|
||||||
if (${ARCH} STREQUAL "x86" OR ${ARCH} STREQUAL "x86_64" OR ${ARCH} STREQUAL "ia64" OR ${ARCH} STREQUAL "MIPS")
|
|
||||||
set(USE_GEMM3M 1)
|
|
||||||
endif ()
|
|
||||||
endif ()
|
|
||||||
|
|
||||||
# N.B. In the original makefile there was a BLOCKS define used in the compilation of these files but I don't see any evidence of it being set anywhere. -hpa
|
# N.B. In the original makefile there was a BLOCKS define used in the compilation of these files but I don't see any evidence of it being set anywhere. -hpa
|
||||||
|
|
||||||
# loop through gemm.c defines
|
# loop through gemm.c defines
|
||||||
|
@ -54,12 +46,41 @@ foreach (float_type ${FLOAT_TYPES})
|
||||||
GenerateCombinationObjects("trsm_L.c" "UPPER;UNIT" "L;N" "TRANS;CONJ" 0 "trsm_LC" false ${float_type})
|
GenerateCombinationObjects("trsm_L.c" "UPPER;UNIT" "L;N" "TRANS;CONJ" 0 "trsm_LC" false ${float_type})
|
||||||
GenerateCombinationObjects("trsm_R.c" "UPPER;UNIT" "L;N" "CONJ" 0 "trsm_RR" false ${float_type})
|
GenerateCombinationObjects("trsm_R.c" "UPPER;UNIT" "L;N" "CONJ" 0 "trsm_RR" false ${float_type})
|
||||||
GenerateCombinationObjects("trsm_R.c" "UPPER;UNIT" "L;N" "TRANS;CONJ" 0 "trsm_RC" false ${float_type})
|
GenerateCombinationObjects("trsm_R.c" "UPPER;UNIT" "L;N" "TRANS;CONJ" 0 "trsm_RC" false ${float_type})
|
||||||
|
|
||||||
|
#hemm
|
||||||
|
GenerateCombinationObjects("zhemm_k.c" "LOWER" "U" "NN" 0 "hemm_L" false ${float_type})
|
||||||
|
GenerateCombinationObjects("zhemm_k.c" "LOWER" "U" "NC;RSIDE" 0 "hemm_R" false ${float_type})
|
||||||
|
|
||||||
|
#her2k
|
||||||
|
GenerateCombinationObjects("zher2k_kernel.c" "LOWER;CONJ" "U;N" "" 2 "her2k_kernel" false ${float_type})
|
||||||
|
GenerateNamedObjects("zher2k_k.c" "HER2K" "her2k_UN" false "" "" false ${float_type})
|
||||||
|
GenerateNamedObjects("zher2k_k.c" "HER2K;TRANS;CONJ" "her2k_UC" false "" "" false ${float_type})
|
||||||
|
GenerateNamedObjects("zher2k_k.c" "HER2K;LOWER" "her2k_LN" false "" "" false ${float_type})
|
||||||
|
GenerateNamedObjects("zher2k_k.c" "HER2K;LOWER;TRANS;CONJ" "her2k_LC" false "" "" false ${float_type})
|
||||||
|
|
||||||
|
if (SMP AND NOT USE_SIMPLE_THREADED_LEVEL3)
|
||||||
|
#hemm
|
||||||
|
GenerateCombinationObjects("zhemm_k.c" "LOWER" "U" "NN;THREADED_LEVEL3" 0 "hemm_thread_L" false ${float_type})
|
||||||
|
GenerateCombinationObjects("zhemm_k.c" "LOWER" "U" "NC;RSIDE;THREADED_LEVEL3" 0 "hemm_thread_R" false ${float_type})
|
||||||
|
#her2k
|
||||||
|
GenerateNamedObjects("zher2k_k.c" "HER2K" "her2k_UN" false "" "" false ${float_type})
|
||||||
|
GenerateNamedObjects("zher2k_k.c" "HER2K;TRANS;CONJ" "her2k_UC" false "" "" false ${float_type})
|
||||||
|
GenerateNamedObjects("zher2k_k.c" "HER2K;LOWER" "her2k_LN" false "" "" false ${float_type})
|
||||||
|
GenerateNamedObjects("zher2k_k.c" "HER2K;LOWER;TRANS;CONJ" "her2k_LC" false "" "" false ${float_type})
|
||||||
|
endif()
|
||||||
|
|
||||||
# special gemm defines for complex
|
# special gemm defines for complex
|
||||||
foreach (gemm_define ${GEMM_COMPLEX_DEFINES})
|
foreach (gemm_define ${GEMM_COMPLEX_DEFINES})
|
||||||
string(TOLOWER ${gemm_define} gemm_define_LC)
|
string(TOLOWER ${gemm_define} gemm_define_LC)
|
||||||
GenerateNamedObjects("gemm.c" "${gemm_define}" "gemm_${gemm_define_LC}" false "" "" false ${float_type})
|
GenerateNamedObjects("gemm.c" "${gemm_define}" "gemm_${gemm_define_LC}" false "" "" false ${float_type})
|
||||||
|
if(USE_GEMM3M)
|
||||||
|
GenerateNamedObjects("gemm3m.c" "${gemm_define}" "gemm3m_${gemm_define_LC}" false "" "" false ${float_type})
|
||||||
|
endif()
|
||||||
if (SMP AND NOT USE_SIMPLE_THREADED_LEVEL3)
|
if (SMP AND NOT USE_SIMPLE_THREADED_LEVEL3)
|
||||||
GenerateNamedObjects("gemm.c" "${gemm_define};THREADED_LEVEL3" "gemm_thread_${gemm_define_LC}" false "" "" false ${float_type})
|
GenerateNamedObjects("gemm.c" "${gemm_define};THREADED_LEVEL3" "gemm_thread_${gemm_define_LC}" false "" "" false ${float_type})
|
||||||
|
if(USE_GEMM3M)
|
||||||
|
GenerateNamedObjects("gemm3m.c" "${gemm_define};THREADED_LEVEL3" "gemm3m_thread_${gemm_define_LC}" false "" "" false ${float_type})
|
||||||
|
endif()
|
||||||
endif ()
|
endif ()
|
||||||
endforeach ()
|
endforeach ()
|
||||||
endif ()
|
endif ()
|
||||||
|
|
|
@ -33,6 +33,8 @@ set(COMMON_SOURCES
|
||||||
xerbla.c
|
xerbla.c
|
||||||
openblas_set_num_threads.c
|
openblas_set_num_threads.c
|
||||||
openblas_error_handle.c
|
openblas_error_handle.c
|
||||||
|
openblas_get_num_procs.c
|
||||||
|
openblas_get_num_threads.c
|
||||||
)
|
)
|
||||||
|
|
||||||
# these need to have NAME/CNAME set, so use GenerateNamedObjects, but don't use standard name mangling
|
# these need to have NAME/CNAME set, so use GenerateNamedObjects, but don't use standard name mangling
|
||||||
|
|
|
@ -1,13 +1,16 @@
|
||||||
|
|
||||||
include_directories(${CMAKE_SOURCE_DIR})
|
include_directories(${CMAKE_SOURCE_DIR})
|
||||||
|
|
||||||
|
|
||||||
set(BLAS1_SOURCES
|
set(BLAS1_SOURCES
|
||||||
copy.c
|
copy.c
|
||||||
asum.c nrm2.c
|
nrm2.c
|
||||||
)
|
)
|
||||||
|
|
||||||
set(BLAS1_REAL_ONLY_SOURCES
|
set(BLAS1_REAL_ONLY_SOURCES
|
||||||
rotm.c rotmg.c # N.B. these do not have complex counterparts
|
rotm.c rotmg.c # N.B. these do not have complex counterparts
|
||||||
|
rot.c
|
||||||
|
asum.c
|
||||||
)
|
)
|
||||||
|
|
||||||
# these will have 'z' prepended for the complex version
|
# these will have 'z' prepended for the complex version
|
||||||
|
@ -15,7 +18,7 @@ set(BLAS1_MANGLED_SOURCES
|
||||||
axpy.c swap.c
|
axpy.c swap.c
|
||||||
scal.c
|
scal.c
|
||||||
dot.c
|
dot.c
|
||||||
rot.c rotg.c
|
rotg.c
|
||||||
axpby.c
|
axpby.c
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -31,6 +34,13 @@ set(BLAS2_SOURCES
|
||||||
tpsv.c tpmv.c
|
tpsv.c tpmv.c
|
||||||
)
|
)
|
||||||
|
|
||||||
|
set(BLAS2_COMPLEX_ONLY_MANGLED_SOURCES
|
||||||
|
hemv.c hbmv.c
|
||||||
|
her.c her2.c
|
||||||
|
hpmv.c hpr.c
|
||||||
|
hpr2.c
|
||||||
|
)
|
||||||
|
|
||||||
# these do not have separate 'z' sources
|
# these do not have separate 'z' sources
|
||||||
set(BLAS3_SOURCES
|
set(BLAS3_SOURCES
|
||||||
gemm.c symm.c
|
gemm.c symm.c
|
||||||
|
@ -39,6 +49,7 @@ set(BLAS3_SOURCES
|
||||||
|
|
||||||
set(BLAS3_MANGLED_SOURCES
|
set(BLAS3_MANGLED_SOURCES
|
||||||
omatcopy.c imatcopy.c
|
omatcopy.c imatcopy.c
|
||||||
|
geadd.c
|
||||||
)
|
)
|
||||||
|
|
||||||
# generate the BLAS objs once with and once without cblas
|
# generate the BLAS objs once with and once without cblas
|
||||||
|
@ -65,9 +76,14 @@ foreach (CBLAS_FLAG ${CBLAS_FLAGS})
|
||||||
GenerateNamedObjects("${BLAS1_REAL_ONLY_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false 1)
|
GenerateNamedObjects("${BLAS1_REAL_ONLY_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false 1)
|
||||||
GenerateNamedObjects("${BLAS1_MANGLED_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false ${MANGLE_COMPLEX})
|
GenerateNamedObjects("${BLAS1_MANGLED_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false ${MANGLE_COMPLEX})
|
||||||
GenerateNamedObjects("${BLAS2_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false ${MANGLE_COMPLEX})
|
GenerateNamedObjects("${BLAS2_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false ${MANGLE_COMPLEX})
|
||||||
|
GenerateNamedObjects("${BLAS2_COMPLEX_ONLY_MANGLED_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false 4)
|
||||||
GenerateNamedObjects("${BLAS3_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false ${DISABLE_COMPLEX})
|
GenerateNamedObjects("${BLAS3_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false ${DISABLE_COMPLEX})
|
||||||
GenerateNamedObjects("${BLAS3_MANGLED_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false ${MANGLE_COMPLEX})
|
GenerateNamedObjects("${BLAS3_MANGLED_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false ${MANGLE_COMPLEX})
|
||||||
|
|
||||||
|
#sdsdot, dsdot
|
||||||
|
GenerateNamedObjects("sdsdot.c" "" "sdsdot" ${CBLAS_FLAG} "" "" true "SINGLE")
|
||||||
|
GenerateNamedObjects("dsdot.c" "" "dsdot" ${CBLAS_FLAG} "" "" true "SINGLE")
|
||||||
|
|
||||||
# trmm is trsm with a compiler flag set
|
# trmm is trsm with a compiler flag set
|
||||||
GenerateNamedObjects("trsm.c" "TRMM" "trmm" ${CBLAS_FLAG})
|
GenerateNamedObjects("trsm.c" "TRMM" "trmm" ${CBLAS_FLAG})
|
||||||
|
|
||||||
|
@ -86,17 +102,36 @@ endforeach ()
|
||||||
|
|
||||||
# complex-specific sources
|
# complex-specific sources
|
||||||
foreach (float_type ${FLOAT_TYPES})
|
foreach (float_type ${FLOAT_TYPES})
|
||||||
|
|
||||||
if (${float_type} STREQUAL "COMPLEX" OR ${float_type} STREQUAL "ZCOMPLEX")
|
if (${float_type} STREQUAL "COMPLEX" OR ${float_type} STREQUAL "ZCOMPLEX")
|
||||||
GenerateNamedObjects("zger.c" "" "geru" false "" "" false ${float_type})
|
GenerateNamedObjects("zger.c" "" "geru" false "" "" false ${float_type})
|
||||||
GenerateNamedObjects("zger.c" "CONJ" "gerc" false "" "" false ${float_type})
|
GenerateNamedObjects("zger.c" "CONJ" "gerc" false "" "" false ${float_type})
|
||||||
|
GenerateNamedObjects("zdot.c" "CONJ" "dotc" false "" "" false ${float_type})
|
||||||
|
GenerateNamedObjects("zdot.c" "" "dotu" false "" "" false ${float_type})
|
||||||
|
|
||||||
|
GenerateNamedObjects("symm.c" "HEMM" "hemm" false "" "" false ${float_type})
|
||||||
|
GenerateNamedObjects("syrk.c" "HEMM" "herk" false "" "" false ${float_type})
|
||||||
|
GenerateNamedObjects("syr2k.c" "HEMM" "her2k" false "" "" false ${float_type})
|
||||||
|
|
||||||
|
if (USE_GEMM3M)
|
||||||
|
GenerateNamedObjects("gemm.c" "GEMM3M" "gemm3m" false "" "" false ${float_type})
|
||||||
|
endif()
|
||||||
endif ()
|
endif ()
|
||||||
if (${float_type} STREQUAL "COMPLEX")
|
if (${float_type} STREQUAL "COMPLEX")
|
||||||
GenerateNamedObjects("zscal.c" "SSCAL" "sscal" false "" "" false "COMPLEX")
|
GenerateNamedObjects("zscal.c" "SSCAL" "sscal" false "" "" false "COMPLEX")
|
||||||
GenerateNamedObjects("nrm2.c" "" "scnrm2" false "" "" true "COMPLEX")
|
GenerateNamedObjects("nrm2.c" "" "scnrm2" false "" "" true "COMPLEX")
|
||||||
|
GenerateNamedObjects("zrot.c" "" "csrot" false "" "" true "COMPLEX")
|
||||||
|
GenerateNamedObjects("max.c" "USE_ABS;USE_MIN" "scamin" false "" "" true "COMPLEX")
|
||||||
|
GenerateNamedObjects("max.c" "USE_ABS" "scamax" false "" "" true "COMPLEX")
|
||||||
|
GenerateNamedObjects("asum.c" "" "scasum" false "" "" true "COMPLEX")
|
||||||
endif ()
|
endif ()
|
||||||
if (${float_type} STREQUAL "ZCOMPLEX")
|
if (${float_type} STREQUAL "ZCOMPLEX")
|
||||||
GenerateNamedObjects("zscal.c" "SSCAL" "dscal" false "" "" false "ZCOMPLEX")
|
GenerateNamedObjects("zscal.c" "SSCAL" "dscal" false "" "" false "ZCOMPLEX")
|
||||||
GenerateNamedObjects("nrm2.c" "" "dznrm2" false "" "" true "ZCOMPLEX")
|
GenerateNamedObjects("nrm2.c" "" "dznrm2" false "" "" true "ZCOMPLEX")
|
||||||
|
GenerateNamedObjects("zrot.c" "" "zdrot" false "" "" true "ZCOMPLEX")
|
||||||
|
GenerateNamedObjects("max.c" "USE_ABS;USE_MIN" "dzamin" false "" "" true "ZCOMPLEX")
|
||||||
|
GenerateNamedObjects("max.c" "USE_ABS" "dzamax" false "" "" true "ZCOMPLEX")
|
||||||
|
GenerateNamedObjects("asum.c" "" "dzasum" false "" "" true "ZCOMPLEX")
|
||||||
endif ()
|
endif ()
|
||||||
endforeach ()
|
endforeach ()
|
||||||
|
|
||||||
|
|
|
@ -14,8 +14,7 @@ void CNAME(FLOAT *DA, FLOAT *DB, FLOAT *C, FLOAT *S){
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if defined(__i386__) || defined(__x86_64__) || defined(__ia64__) || defined(_M_X64) || defined(_M_IX86)
|
||||||
#if defined(__i386__) || defined(__x86_64__) || defined(__ia64__)
|
|
||||||
|
|
||||||
long double da = *DA;
|
long double da = *DA;
|
||||||
long double db = *DB;
|
long double db = *DB;
|
||||||
|
|
|
@ -53,13 +53,13 @@ void CNAME(blasint n, FLOAT *ALPHA, FLOAT *x, blasint incx, FLOAT *BETA, FLOAT *
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (n <= 0) return;
|
|
||||||
|
|
||||||
FLOAT alpha_r = *(ALPHA + 0);
|
FLOAT alpha_r = *(ALPHA + 0);
|
||||||
FLOAT alpha_i = *(ALPHA + 1);
|
FLOAT alpha_i = *(ALPHA + 1);
|
||||||
FLOAT beta_r = *(BETA + 0);
|
FLOAT beta_r = *(BETA + 0);
|
||||||
FLOAT beta_i = *(BETA + 1);
|
FLOAT beta_i = *(BETA + 1);
|
||||||
|
|
||||||
|
if (n <= 0) return;
|
||||||
|
|
||||||
FUNCTION_PROFILE_START();
|
FUNCTION_PROFILE_START();
|
||||||
|
|
||||||
if (incx < 0) x -= (n - 1) * incx * 2;
|
if (incx < 0) x -= (n - 1) * incx * 2;
|
||||||
|
|
|
@ -57,21 +57,25 @@
|
||||||
#ifdef RETURN_BY_STRUCT
|
#ifdef RETURN_BY_STRUCT
|
||||||
MYTYPE NAME( blasint *N, FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY) {
|
MYTYPE NAME( blasint *N, FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY) {
|
||||||
#elif defined RETURN_BY_STACK
|
#elif defined RETURN_BY_STACK
|
||||||
void NAME(FLOAT _Complex *result, blasint *N, FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY) {
|
void NAME(OPENBLAS_COMPLEX_FLOAT *result, blasint *N, FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY) {
|
||||||
#else
|
#else
|
||||||
FLOAT _Complex NAME( blasint *N, FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY) {
|
OPENBLAS_COMPLEX_FLOAT NAME( blasint *N, FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY) {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
BLASLONG n = *N;
|
BLASLONG n = *N;
|
||||||
BLASLONG incx = *INCX;
|
BLASLONG incx = *INCX;
|
||||||
BLASLONG incy = *INCY;
|
BLASLONG incy = *INCY;
|
||||||
#ifndef RETURN_BY_STACK
|
#ifndef RETURN_BY_STACK
|
||||||
FLOAT _Complex ret;
|
OPENBLAS_COMPLEX_FLOAT ret;
|
||||||
#endif
|
#endif
|
||||||
#ifdef RETURN_BY_STRUCT
|
#ifdef RETURN_BY_STRUCT
|
||||||
MYTYPE myret;
|
MYTYPE myret;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifndef RETURN_BY_STRUCT
|
||||||
|
OPENBLAS_COMPLEX_FLOAT zero=OPENBLAS_MAKE_COMPLEX_FLOAT(0.0, 0.0);
|
||||||
|
#endif
|
||||||
|
|
||||||
PRINT_DEBUG_NAME;
|
PRINT_DEBUG_NAME;
|
||||||
|
|
||||||
if (n <= 0) {
|
if (n <= 0) {
|
||||||
|
@ -80,10 +84,10 @@ FLOAT _Complex NAME( blasint *N, FLOAT *x, blasint *INCX,
|
||||||
myret.i = 0.;
|
myret.i = 0.;
|
||||||
return myret;
|
return myret;
|
||||||
#elif defined RETURN_BY_STACK
|
#elif defined RETURN_BY_STACK
|
||||||
*result = ZERO;
|
*result = zero;
|
||||||
return;
|
return;
|
||||||
#else
|
#else
|
||||||
return ZERO;
|
return zero;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -144,21 +148,21 @@ FLOAT _Complex NAME( blasint *N, FLOAT *x, blasint *INCX,
|
||||||
#else
|
#else
|
||||||
|
|
||||||
#ifdef FORCE_USE_STACK
|
#ifdef FORCE_USE_STACK
|
||||||
void CNAME(blasint n, FLOAT *x, blasint incx, FLOAT *y, blasint incy, FLOAT _Complex *result){
|
void CNAME(blasint n, FLOAT *x, blasint incx, FLOAT *y, blasint incy, OPENBLAS_COMPLEX_FLOAT *result){
|
||||||
#else
|
#else
|
||||||
FLOAT _Complex CNAME(blasint n, FLOAT *x, blasint incx, FLOAT *y, blasint incy){
|
OPENBLAS_COMPLEX_FLOAT CNAME(blasint n, FLOAT *x, blasint incx, FLOAT *y, blasint incy){
|
||||||
|
|
||||||
FLOAT _Complex ret;
|
OPENBLAS_COMPLEX_FLOAT ret;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
PRINT_DEBUG_CNAME;
|
PRINT_DEBUG_CNAME;
|
||||||
|
|
||||||
if (n <= 0) {
|
if (n <= 0) {
|
||||||
#ifdef FORCE_USE_STACK
|
#ifdef FORCE_USE_STACK
|
||||||
*result = ZERO;
|
*result = OPENBLAS_MAKE_COMPLEX_FLOAT(0.0, 0.0);
|
||||||
return;
|
return;
|
||||||
#else
|
#else
|
||||||
return ZERO;
|
return OPENBLAS_MAKE_COMPLEX_FLOAT(0.0, 0.0);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -79,6 +79,9 @@ void NAME(char *TRANS, blasint *M, blasint *N,
|
||||||
FLOAT *buffer;
|
FLOAT *buffer;
|
||||||
#ifdef SMP
|
#ifdef SMP
|
||||||
int nthreads;
|
int nthreads;
|
||||||
|
int nthreads_max;
|
||||||
|
int nthreads_avail;
|
||||||
|
double MNK;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
int (*gemv[])(BLASLONG, BLASLONG, BLASLONG, FLOAT, FLOAT, FLOAT *, BLASLONG,
|
int (*gemv[])(BLASLONG, BLASLONG, BLASLONG, FLOAT, FLOAT, FLOAT *, BLASLONG,
|
||||||
|
@ -91,14 +94,14 @@ void NAME(char *TRANS, blasint *M, blasint *N,
|
||||||
blasint lenx, leny;
|
blasint lenx, leny;
|
||||||
blasint i;
|
blasint i;
|
||||||
|
|
||||||
PRINT_DEBUG_NAME;
|
|
||||||
|
|
||||||
FLOAT alpha_r = *(ALPHA + 0);
|
FLOAT alpha_r = *(ALPHA + 0);
|
||||||
FLOAT alpha_i = *(ALPHA + 1);
|
FLOAT alpha_i = *(ALPHA + 1);
|
||||||
|
|
||||||
FLOAT beta_r = *(BETA + 0);
|
FLOAT beta_r = *(BETA + 0);
|
||||||
FLOAT beta_i = *(BETA + 1);
|
FLOAT beta_i = *(BETA + 1);
|
||||||
|
|
||||||
|
PRINT_DEBUG_NAME;
|
||||||
|
|
||||||
TOUPPER(trans);
|
TOUPPER(trans);
|
||||||
|
|
||||||
info = 0;
|
info = 0;
|
||||||
|
@ -153,14 +156,14 @@ void CNAME(enum CBLAS_ORDER order,
|
||||||
GEMV_O, GEMV_U, GEMV_S, GEMV_D,
|
GEMV_O, GEMV_U, GEMV_S, GEMV_D,
|
||||||
};
|
};
|
||||||
|
|
||||||
PRINT_DEBUG_CNAME;
|
|
||||||
|
|
||||||
FLOAT alpha_r = *(ALPHA + 0);
|
FLOAT alpha_r = *(ALPHA + 0);
|
||||||
FLOAT alpha_i = *(ALPHA + 1);
|
FLOAT alpha_i = *(ALPHA + 1);
|
||||||
|
|
||||||
FLOAT beta_r = *(BETA + 0);
|
FLOAT beta_r = *(BETA + 0);
|
||||||
FLOAT beta_i = *(BETA + 1);
|
FLOAT beta_i = *(BETA + 1);
|
||||||
|
|
||||||
|
PRINT_DEBUG_CNAME;
|
||||||
|
|
||||||
trans = -1;
|
trans = -1;
|
||||||
info = 0;
|
info = 0;
|
||||||
|
|
||||||
|
@ -234,10 +237,10 @@ void CNAME(enum CBLAS_ORDER order,
|
||||||
|
|
||||||
#ifdef SMP
|
#ifdef SMP
|
||||||
|
|
||||||
int nthreads_max = num_cpu_avail(2);
|
nthreads_max = num_cpu_avail(2);
|
||||||
int nthreads_avail = nthreads_max;
|
nthreads_avail = nthreads_max;
|
||||||
|
|
||||||
double MNK = (double) m * (double) n;
|
MNK = (double) m * (double) n;
|
||||||
if ( MNK <= ( 256.0 * (double) (GEMM_MULTITHREAD_THRESHOLD * GEMM_MULTITHREAD_THRESHOLD) ))
|
if ( MNK <= ( 256.0 * (double) (GEMM_MULTITHREAD_THRESHOLD * GEMM_MULTITHREAD_THRESHOLD) ))
|
||||||
nthreads_max = 1;
|
nthreads_max = 1;
|
||||||
|
|
||||||
|
|
|
@ -6,13 +6,7 @@
|
||||||
|
|
||||||
void NAME(FLOAT *DA, FLOAT *DB, FLOAT *C, FLOAT *S){
|
void NAME(FLOAT *DA, FLOAT *DB, FLOAT *C, FLOAT *S){
|
||||||
|
|
||||||
PRINT_DEBUG_NAME;
|
#if defined(__i386__) || defined(__x86_64__) || defined(__ia64__) || defined(_M_X64) || defined(_M_IX86)
|
||||||
|
|
||||||
IDEBUG_START;
|
|
||||||
|
|
||||||
FUNCTION_PROFILE_START();
|
|
||||||
|
|
||||||
#if defined(__i386__) || defined(__x86_64__) || defined(__ia64__)
|
|
||||||
|
|
||||||
long double da_r = *(DA + 0);
|
long double da_r = *(DA + 0);
|
||||||
long double da_i = *(DA + 1);
|
long double da_i = *(DA + 1);
|
||||||
|
@ -22,6 +16,12 @@ void NAME(FLOAT *DA, FLOAT *DB, FLOAT *C, FLOAT *S){
|
||||||
|
|
||||||
long double ada = fabs(da_r) + fabs(da_i);
|
long double ada = fabs(da_r) + fabs(da_i);
|
||||||
|
|
||||||
|
PRINT_DEBUG_NAME;
|
||||||
|
|
||||||
|
IDEBUG_START;
|
||||||
|
|
||||||
|
FUNCTION_PROFILE_START();
|
||||||
|
|
||||||
if (ada == ZERO) {
|
if (ada == ZERO) {
|
||||||
*C = ZERO;
|
*C = ZERO;
|
||||||
*(S + 0) = ONE;
|
*(S + 0) = ONE;
|
||||||
|
@ -54,6 +54,12 @@ void NAME(FLOAT *DA, FLOAT *DB, FLOAT *C, FLOAT *S){
|
||||||
FLOAT ada = fabs(da_r) + fabs(da_i);
|
FLOAT ada = fabs(da_r) + fabs(da_i);
|
||||||
FLOAT adb;
|
FLOAT adb;
|
||||||
|
|
||||||
|
PRINT_DEBUG_NAME;
|
||||||
|
|
||||||
|
IDEBUG_START;
|
||||||
|
|
||||||
|
FUNCTION_PROFILE_START();
|
||||||
|
|
||||||
if (ada == ZERO) {
|
if (ada == ZERO) {
|
||||||
*C = ZERO;
|
*C = ZERO;
|
||||||
*(S + 0) = ONE;
|
*(S + 0) = ONE;
|
||||||
|
|
|
@ -17,6 +17,7 @@ endif ()
|
||||||
|
|
||||||
SetDefaultL1()
|
SetDefaultL1()
|
||||||
SetDefaultL2()
|
SetDefaultL2()
|
||||||
|
SetDefaultL3()
|
||||||
ParseMakefileVars("${KERNELDIR}/KERNEL")
|
ParseMakefileVars("${KERNELDIR}/KERNEL")
|
||||||
ParseMakefileVars("${KERNELDIR}/KERNEL.${TARGET_CORE}")
|
ParseMakefileVars("${KERNELDIR}/KERNEL.${TARGET_CORE}")
|
||||||
|
|
||||||
|
@ -65,8 +66,20 @@ foreach (float_type ${FLOAT_TYPES})
|
||||||
else ()
|
else ()
|
||||||
GenerateNamedObjects("${KERNELDIR}/${${float_char}DOTKERNEL}" "" "dot_k" false "" "" false ${float_type})
|
GenerateNamedObjects("${KERNELDIR}/${${float_char}DOTKERNEL}" "" "dot_k" false "" "" false ${float_type})
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
|
if (${float_type} STREQUAL "COMPLEX")
|
||||||
|
GenerateNamedObjects("${KERNELDIR}/${${float_char}ROTKERNEL}" "" "srot_k" false "" "" false ${float_type})
|
||||||
|
endif()
|
||||||
|
if (${float_type} STREQUAL "ZCOMPLEX")
|
||||||
|
GenerateNamedObjects("${KERNELDIR}/${${float_char}ROTKERNEL}" "" "drot_k" false "" "" false ${float_type})
|
||||||
|
endif()
|
||||||
|
|
||||||
endforeach ()
|
endforeach ()
|
||||||
|
|
||||||
|
#dsdot,sdsdot
|
||||||
|
GenerateNamedObjects("${KERNELDIR}/${DSDOTKERNEL}" "DSDOT" "d*dot_k" false "" "" false "SINGLE")
|
||||||
|
GenerateNamedObjects("${KERNELDIR}/${DSDOTKERNEL}" "DSDOT" "dsdot_k" false "" "" false "SINGLE")
|
||||||
|
|
||||||
# Makefile.L2
|
# Makefile.L2
|
||||||
GenerateCombinationObjects("generic/symv_k.c" "LOWER" "U" "" 1 "" "" 3)
|
GenerateCombinationObjects("generic/symv_k.c" "LOWER" "U" "" 1 "" "" 3)
|
||||||
GenerateNamedObjects("generic/ger.c" "" "ger_k" false "" "" "" 3)
|
GenerateNamedObjects("generic/ger.c" "" "ger_k" false "" "" "" 3)
|
||||||
|
@ -86,6 +99,12 @@ foreach (float_type ${FLOAT_TYPES})
|
||||||
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVTKERNEL}" "XCONJ;TRANSA" "gemv_u" false "" "" false ${float_type})
|
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVTKERNEL}" "XCONJ;TRANSA" "gemv_u" false "" "" false ${float_type})
|
||||||
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVNKERNEL}" "XCONJ;CONJ" "gemv_s" false "" "" false ${float_type})
|
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVNKERNEL}" "XCONJ;CONJ" "gemv_s" false "" "" false ${float_type})
|
||||||
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVTKERNEL}" "XCONJ;CONJ;TRANSA" "gemv_d" false "" "" false ${float_type})
|
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVTKERNEL}" "XCONJ;CONJ;TRANSA" "gemv_d" false "" "" false ${float_type})
|
||||||
|
|
||||||
|
GenerateNamedObjects("${KERNELDIR}/${${float_char}HEMV_U_KERNEL}" "HEMV" "hemv_U" false "" "" false ${float_type})
|
||||||
|
GenerateNamedObjects("${KERNELDIR}/${${float_char}HEMV_L_KERNEL}" "HEMV;LOWER" "hemv_L" false "" "" false ${float_type})
|
||||||
|
GenerateNamedObjects("${KERNELDIR}/${${float_char}HEMV_V_KERNEL}" "HEMV;HEMVREV" "hemv_V" false "" "" false ${float_type})
|
||||||
|
GenerateNamedObjects("${KERNELDIR}/${${float_char}HEMV_M_KERNEL}" "HEMV;HEMVREV;LOWER" "hemv_M" false "" "" false ${float_type})
|
||||||
|
|
||||||
else ()
|
else ()
|
||||||
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVNKERNEL}" "" "gemv_n" false "" "" false ${float_type})
|
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVNKERNEL}" "" "gemv_n" false "" "" false ${float_type})
|
||||||
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVTKERNEL}" "TRANS" "gemv_t" false "" "" false ${float_type})
|
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVTKERNEL}" "TRANS" "gemv_t" false "" "" false ${float_type})
|
||||||
|
@ -93,14 +112,9 @@ foreach (float_type ${FLOAT_TYPES})
|
||||||
endforeach ()
|
endforeach ()
|
||||||
|
|
||||||
# Makefile.L3
|
# Makefile.L3
|
||||||
set(USE_GEMM3M false)
|
|
||||||
set(USE_TRMM false)
|
set(USE_TRMM false)
|
||||||
|
|
||||||
if (${ARCH} STREQUAL "x86" OR ${ARCH} STREQUAL "x86_64" OR ${ARCH} STREQUAL "ia64" OR ${ARCH} STREQUAL "MIPS")
|
if (${ARCH} STREQUAL "arm" OR ${ARCH} STREQUAL "arm64" OR "${TARGET}" STREQUAL "LONGSOON3B" OR "${TARGET}" STREQUAL "GENERIC" OR "${CORE}" STREQUAL "generic")
|
||||||
set(USE_GEMM3M true)
|
|
||||||
endif ()
|
|
||||||
|
|
||||||
if (${ARCH} STREQUAL "arm" OR ${ARCH} STREQUAL "arm64" OR "${TARGET}" STREQUAL "LONGSOON3B" OR "${TARGET}" STREQUAL "GENERIC")
|
|
||||||
set(USE_TRMM true)
|
set(USE_TRMM true)
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
|
@ -155,6 +169,13 @@ foreach (float_type ${FLOAT_TYPES})
|
||||||
GenerateNamedObjects("${KERNELDIR}/${${float_char}TRSMKERNEL_RN}" "UPPER;RN;TRSMKERNEL;CONJ" "trsm_kernel_RR" false "" "" false ${float_type})
|
GenerateNamedObjects("${KERNELDIR}/${${float_char}TRSMKERNEL_RN}" "UPPER;RN;TRSMKERNEL;CONJ" "trsm_kernel_RR" false "" "" false ${float_type})
|
||||||
GenerateNamedObjects("${KERNELDIR}/${${float_char}TRSMKERNEL_RT}" "UPPER;RN;TRSMKERNEL;CONJ" "trsm_kernel_RC" false "" "" false ${float_type})
|
GenerateNamedObjects("${KERNELDIR}/${${float_char}TRSMKERNEL_RT}" "UPPER;RN;TRSMKERNEL;CONJ" "trsm_kernel_RC" false "" "" false ${float_type})
|
||||||
|
|
||||||
|
|
||||||
|
#hemm
|
||||||
|
GenerateNamedObjects("generic/zhemm_utcopy_${${float_char}GEMM_UNROLL_M}.c" "" "hemm_iutcopy" false "" "" false ${float_type})
|
||||||
|
GenerateNamedObjects("generic/zhemm_ltcopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER" "hemm_iltcopy" false "" "" false ${float_type})
|
||||||
|
GenerateNamedObjects("generic/zhemm_utcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER" "hemm_outcopy" false "" "" false ${float_type})
|
||||||
|
GenerateNamedObjects("generic/zhemm_ltcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER" "hemm_oltcopy" false "" "" false ${float_type})
|
||||||
|
|
||||||
else ()
|
else ()
|
||||||
GenerateCombinationObjects("${KERNELDIR}/${TRMM_KERNEL}" "LEFT;TRANSA" "R;N" "TRMMKERNEL" 2 "trmm_kernel" false ${float_type})
|
GenerateCombinationObjects("${KERNELDIR}/${TRMM_KERNEL}" "LEFT;TRANSA" "R;N" "TRMMKERNEL" 2 "trmm_kernel" false ${float_type})
|
||||||
endif ()
|
endif ()
|
||||||
|
@ -241,11 +262,40 @@ foreach (float_type ${FLOAT_TYPES})
|
||||||
endif ()
|
endif ()
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_CN}" "" "domatcopy_k_cn" false "" "" false ${float_type})
|
GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_CN}" "" "omatcopy_k_cn" false "" "" false ${float_type})
|
||||||
GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_RN}" "ROWM" "domatcopy_k_rn" false "" "" false ${float_type})
|
GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_RN}" "ROWM" "omatcopy_k_rn" false "" "" false ${float_type})
|
||||||
GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_CT}" "" "domatcopy_k_ct" false "" "" false ${float_type})
|
GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_CT}" "" "omatcopy_k_ct" false "" "" false ${float_type})
|
||||||
GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_RT}" "ROWM" "domatcopy_k_rt" false "" "" false ${float_type})
|
GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_RT}" "ROWM" "omatcopy_k_rt" false "" "" false ${float_type})
|
||||||
|
|
||||||
|
if (NOT DEFINED ${float_char}OMATCOPY_CNC)
|
||||||
|
if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C")
|
||||||
|
set(${float_char}OMATCOPY_CNC ../arm/zomatcopy_cnc.c)
|
||||||
|
endif ()
|
||||||
|
endif ()
|
||||||
|
if (NOT DEFINED ${float_char}OMATCOPY_RNC)
|
||||||
|
if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C")
|
||||||
|
set(${float_char}OMATCOPY_RNC ../arm/zomatcopy_rnc.c)
|
||||||
|
endif ()
|
||||||
|
endif ()
|
||||||
|
if (NOT DEFINED ${float_char}OMATCOPY_CTC)
|
||||||
|
if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C")
|
||||||
|
set(${float_char}OMATCOPY_CTC ../arm/zomatcopy_ctc.c)
|
||||||
|
endif ()
|
||||||
|
endif ()
|
||||||
|
if (NOT DEFINED ${float_char}OMATCOPY_RTC)
|
||||||
|
if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C")
|
||||||
|
set(${float_char}OMATCOPY_RTC ../arm/zomatcopy_rtc.c)
|
||||||
|
endif ()
|
||||||
|
endif ()
|
||||||
|
|
||||||
|
if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C")
|
||||||
|
GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_CNC}" "CONJ" "omatcopy_k_cnc" false "" "" false ${float_type})
|
||||||
|
GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_RNC}" "CONJ;ROWM" "omatcopy_k_rnc" false "" "" false ${float_type})
|
||||||
|
GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_CTC}" "CONJ" "omatcopy_k_ctc" false "" "" false ${float_type})
|
||||||
|
GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_RTC}" "CONJ;ROWM" "omatcopy_k_rtc" false "" "" false ${float_type})
|
||||||
|
endif()
|
||||||
|
|
||||||
|
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEADD_KERNEL}" "" "geadd_k" false "" "" false ${float_type})
|
||||||
endforeach ()
|
endforeach ()
|
||||||
|
|
||||||
# Makefile.LA
|
# Makefile.LA
|
||||||
|
|
|
@ -3459,7 +3459,7 @@ ifndef DGEADD_K
|
||||||
DGEADD_K = ../generic/geadd.c
|
DGEADD_K = ../generic/geadd.c
|
||||||
endif
|
endif
|
||||||
|
|
||||||
$(KDIR)dgeadd_k$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEADD_K)
|
$(KDIR)dgeadd_k$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DGEADD_K)
|
||||||
$(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX -UROWM $< -o $@
|
$(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX -UROWM $< -o $@
|
||||||
|
|
||||||
ifndef CGEADD_K
|
ifndef CGEADD_K
|
||||||
|
|
|
@ -38,13 +38,16 @@ int CNAME(BLASLONG n, FLOAT alpha_r, FLOAT alpha_i, FLOAT *x, BLASLONG inc_x, FL
|
||||||
BLASLONG ix,iy;
|
BLASLONG ix,iy;
|
||||||
FLOAT temp;
|
FLOAT temp;
|
||||||
|
|
||||||
|
BLASLONG inc_x2;
|
||||||
|
BLASLONG inc_y2;
|
||||||
|
|
||||||
if ( n < 0 ) return(0);
|
if ( n < 0 ) return(0);
|
||||||
|
|
||||||
ix = 0;
|
ix = 0;
|
||||||
iy = 0;
|
iy = 0;
|
||||||
|
|
||||||
BLASLONG inc_x2 = 2 * inc_x;
|
inc_x2 = 2 * inc_x;
|
||||||
BLASLONG inc_y2 = 2 * inc_y;
|
inc_y2 = 2 * inc_y;
|
||||||
|
|
||||||
if ( beta_r == 0.0 && beta_i == 0.0)
|
if ( beta_r == 0.0 && beta_i == 0.0)
|
||||||
{
|
{
|
||||||
|
|
|
@ -41,6 +41,8 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i,
|
||||||
{
|
{
|
||||||
BLASLONG i=0;
|
BLASLONG i=0;
|
||||||
BLASLONG ix,iy;
|
BLASLONG ix,iy;
|
||||||
|
BLASLONG inc_x2;
|
||||||
|
BLASLONG inc_y2;
|
||||||
|
|
||||||
if ( n < 0 ) return(0);
|
if ( n < 0 ) return(0);
|
||||||
if ( da_r == 0.0 && da_i == 0.0 ) return(0);
|
if ( da_r == 0.0 && da_i == 0.0 ) return(0);
|
||||||
|
@ -48,8 +50,8 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i,
|
||||||
ix = 0;
|
ix = 0;
|
||||||
iy = 0;
|
iy = 0;
|
||||||
|
|
||||||
BLASLONG inc_x2 = 2 * inc_x;
|
inc_x2 = 2 * inc_x;
|
||||||
BLASLONG inc_y2 = 2 * inc_y;
|
inc_y2 = 2 * inc_y;
|
||||||
|
|
||||||
while(i < n)
|
while(i < n)
|
||||||
{
|
{
|
||||||
|
|
|
@ -40,11 +40,13 @@ int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
|
||||||
{
|
{
|
||||||
BLASLONG i=0;
|
BLASLONG i=0;
|
||||||
BLASLONG ix=0,iy=0;
|
BLASLONG ix=0,iy=0;
|
||||||
|
BLASLONG inc_x2;
|
||||||
|
BLASLONG inc_y2;
|
||||||
|
|
||||||
if ( n < 0 ) return(0);
|
if ( n < 0 ) return(0);
|
||||||
|
|
||||||
BLASLONG inc_x2 = 2 * inc_x;
|
inc_x2 = 2 * inc_x;
|
||||||
BLASLONG inc_y2 = 2 * inc_y;
|
inc_y2 = 2 * inc_y;
|
||||||
|
|
||||||
while(i < n)
|
while(i < n)
|
||||||
{
|
{
|
||||||
|
|
|
@ -40,24 +40,26 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#include <complex.h>
|
#include <complex.h>
|
||||||
FLOAT _Complex CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
|
FLOAT _Complex CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
|
||||||
#else
|
#else
|
||||||
openblas_complex_double CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
|
OPENBLAS_COMPLEX_FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
|
||||||
#endif
|
#endif
|
||||||
{
|
{
|
||||||
BLASLONG i=0;
|
BLASLONG i=0;
|
||||||
BLASLONG ix=0,iy=0;
|
BLASLONG ix=0,iy=0;
|
||||||
FLOAT dot[2];
|
FLOAT dot[2];
|
||||||
FLOAT _Complex result;
|
OPENBLAS_COMPLEX_FLOAT result;
|
||||||
|
BLASLONG inc_x2;
|
||||||
|
BLASLONG inc_y2;
|
||||||
|
|
||||||
dot[0]=0.0;
|
dot[0]=0.0;
|
||||||
dot[1]=0.0;
|
dot[1]=0.0;
|
||||||
|
|
||||||
__real__ result = 0.0 ;
|
CREAL(result) = 0.0 ;
|
||||||
__imag__ result = 0.0 ;
|
CIMAG(result) = 0.0 ;
|
||||||
|
|
||||||
if ( n < 1 ) return(result);
|
if ( n < 1 ) return(result);
|
||||||
|
|
||||||
BLASLONG inc_x2 = 2 * inc_x ;
|
inc_x2 = 2 * inc_x ;
|
||||||
BLASLONG inc_y2 = 2 * inc_y ;
|
inc_y2 = 2 * inc_y ;
|
||||||
|
|
||||||
while(i < n)
|
while(i < n)
|
||||||
{
|
{
|
||||||
|
@ -73,8 +75,8 @@ openblas_complex_double CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BL
|
||||||
i++ ;
|
i++ ;
|
||||||
|
|
||||||
}
|
}
|
||||||
__real__ result = dot[0];
|
CREAL(result) = dot[0];
|
||||||
__imag__ result = dot[1];
|
CIMAG(result) = dot[1];
|
||||||
return(result);
|
return(result);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -41,11 +41,13 @@ int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT
|
||||||
BLASLONG i=0;
|
BLASLONG i=0;
|
||||||
BLASLONG ix=0,iy=0;
|
BLASLONG ix=0,iy=0;
|
||||||
FLOAT temp[2];
|
FLOAT temp[2];
|
||||||
|
BLASLONG inc_x2;
|
||||||
|
BLASLONG inc_y2;
|
||||||
|
|
||||||
if ( n <= 0 ) return(0);
|
if ( n <= 0 ) return(0);
|
||||||
|
|
||||||
BLASLONG inc_x2 = 2 * inc_x ;
|
inc_x2 = 2 * inc_x ;
|
||||||
BLASLONG inc_y2 = 2 * inc_y ;
|
inc_y2 = 2 * inc_y ;
|
||||||
|
|
||||||
while(i < n)
|
while(i < n)
|
||||||
{
|
{
|
||||||
|
|
|
@ -42,11 +42,13 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3, FLOAT dumm
|
||||||
BLASLONG i=0;
|
BLASLONG i=0;
|
||||||
BLASLONG ix=0,iy=0;
|
BLASLONG ix=0,iy=0;
|
||||||
FLOAT temp[2];
|
FLOAT temp[2];
|
||||||
|
BLASLONG inc_x2;
|
||||||
|
BLASLONG inc_y2;
|
||||||
|
|
||||||
if ( n < 0 ) return(0);
|
if ( n < 0 ) return(0);
|
||||||
|
|
||||||
BLASLONG inc_x2 = 2 * inc_x;
|
inc_x2 = 2 * inc_x;
|
||||||
BLASLONG inc_y2 = 2 * inc_y;
|
inc_y2 = 2 * inc_y;
|
||||||
|
|
||||||
while(i < n)
|
while(i < n)
|
||||||
{
|
{
|
||||||
|
|
|
@ -155,5 +155,11 @@ XSYMV_L_KERNEL = ../generic/zsymv_k.c
|
||||||
ZHEMV_U_KERNEL = ../generic/zhemv_k.c
|
ZHEMV_U_KERNEL = ../generic/zhemv_k.c
|
||||||
ZHEMV_L_KERNEL = ../generic/zhemv_k.c
|
ZHEMV_L_KERNEL = ../generic/zhemv_k.c
|
||||||
|
|
||||||
|
LSAME_KERNEL = ../generic/lsame.c
|
||||||
|
SCABS_KERNEL = ../generic/cabs.c
|
||||||
|
DCABS_KERNEL = ../generic/cabs.c
|
||||||
|
QCABS_KERNEL = ../generic/cabs.c
|
||||||
|
|
||||||
|
#Dump kernel
|
||||||
CGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c
|
CGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c
|
||||||
ZGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c
|
ZGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c
|
||||||
|
|
|
@ -59,7 +59,8 @@ typedef int blasint;
|
||||||
extension since version 3.0. If neither are available, use a compatible
|
extension since version 3.0. If neither are available, use a compatible
|
||||||
structure as fallback (see Clause 6.2.5.13 of the C99 standard). */
|
structure as fallback (see Clause 6.2.5.13 of the C99 standard). */
|
||||||
#if (defined(__STDC_IEC_559_COMPLEX__) || __STDC_VERSION__ >= 199901L || \
|
#if (defined(__STDC_IEC_559_COMPLEX__) || __STDC_VERSION__ >= 199901L || \
|
||||||
(__GNUC__ >= 3 && !defined(__cplusplus)))
|
(__GNUC__ >= 3 && !defined(__cplusplus)) || \
|
||||||
|
_MSC_VER >= 1800) // Visual Studio 2013 supports complex
|
||||||
#define OPENBLAS_COMPLEX_C99
|
#define OPENBLAS_COMPLEX_C99
|
||||||
#ifndef __cplusplus
|
#ifndef __cplusplus
|
||||||
#include <complex.h>
|
#include <complex.h>
|
||||||
|
|
Loading…
Reference in New Issue