Use cmake to build OpenBLAS GENERIC Target on MSVC x86 64-bit.

Disable CBLAS and LAPACK.
This commit is contained in:
Zhang Xianyi 2015-08-10 14:10:44 -05:00
parent ab0a0a75fc
commit f874465bb8
51 changed files with 488 additions and 120 deletions

View File

@ -15,11 +15,13 @@ enable_language(C)
set(OpenBLAS_LIBNAME openblas) set(OpenBLAS_LIBNAME openblas)
####### #######
option(BUILD_WITHOUT_LAPACK "Without LAPACK and LAPACKE (Only BLAS and CBLAS)" ON) option(BUILD_WITHOUT_LAPACK "Without LAPACK and LAPACKE (Only BLAS or CBLAS)" ON)
option(BUILD_WITHOUT_CBLAS "Without CBLAS" ON)
option(BUILD_DEBUG "Build Debug Version" OFF) option(BUILD_DEBUG "Build Debug Version" OFF)
####### #######
if(BUILD_WITHOUT_LAPACK) if(BUILD_WITHOUT_LAPACK)
set(NO_LAPACK 1) set(NO_LAPACK 1)
set(NO_LAPACKE 1)
endif() endif()
if(BUILD_DEBUG) if(BUILD_DEBUG)
@ -27,6 +29,11 @@ set(CMAKE_BUILD_TYPE Debug)
else() else()
set(CMAKE_BUILD_TYPE Release) set(CMAKE_BUILD_TYPE Release)
endif() endif()
if(BUILD_WITHOUT_CBLAS)
set(NO_CBLAS 1)
endif()
####### #######
@ -51,7 +58,6 @@ endif ()
set(SUBDIRS ${BLASDIRS}) set(SUBDIRS ${BLASDIRS})
if (NOT NO_LAPACK) if (NOT NO_LAPACK)
message ("error 1")
list(APPEND SUBDIRS lapack) list(APPEND SUBDIRS lapack)
endif () endif ()
@ -111,15 +117,21 @@ endforeach ()
# Can't just use lapack-netlib's CMake files, since they are set up to search for BLAS, build and install a binary. We just want to build a couple of lib files out of lapack and lapacke. # Can't just use lapack-netlib's CMake files, since they are set up to search for BLAS, build and install a binary. We just want to build a couple of lib files out of lapack and lapacke.
# Not using add_subdirectory here because lapack-netlib already has its own CMakeLists.txt. Instead include a cmake script with the sources we want. # Not using add_subdirectory here because lapack-netlib already has its own CMakeLists.txt. Instead include a cmake script with the sources we want.
if (NOT NOFORTRAN AND NOT NO_LAPACK) if (NOT NOFORTRAN AND NOT NO_LAPACK)
message ("error 2")
include("${CMAKE_SOURCE_DIR}/cmake/lapack.cmake") include("${CMAKE_SOURCE_DIR}/cmake/lapack.cmake")
if (NOT NO_LAPACKE) if (NOT NO_LAPACKE)
include("${CMAKE_SOURCE_DIR}/cmake/lapacke.cmake") include("${CMAKE_SOURCE_DIR}/cmake/lapacke.cmake")
endif () endif ()
endif () endif ()
#Only generate .def for dll on MSVC
if(MSVC)
set(OpenBLAS_DEF_FILE "${PROJECT_BINARY_DIR}/openblas.def")
endif()
# add objects to the openblas lib # add objects to the openblas lib
add_library(${OpenBLAS_LIBNAME} SHARED ${LA_SOURCES} ${LAPACKE_SOURCES} ${TARGET_OBJS}) add_library(${OpenBLAS_LIBNAME} SHARED ${LA_SOURCES} ${LAPACKE_SOURCES} ${TARGET_OBJS} ${PROJECT_BINARY_DIR}/openblas.def)
include("${CMAKE_SOURCE_DIR}/cmake/export.cmake")
#only build shared library for MSVC #only build shared library for MSVC
if(NOT MSVC) if(NOT MSVC)

60
cmake/export.cmake Normal file
View File

@ -0,0 +1,60 @@
#Only generate .def for dll on MSVC
if(MSVC)
set_source_files_properties(${OpenBLAS_DEF_FILE} PROPERTIES GENERATED 1)
if (NOT DEFINED ARCH)
set(ARCH_IN "x86_64")
else()
set(ARCH_IN ${ARCH})
endif()
if (${CORE} STREQUAL "generic")
set(ARCH_IN "GENERIC")
endif ()
if (NOT DEFINED EXPRECISION)
set(EXPRECISION_IN 0)
else()
set(EXPRECISION_IN ${EXPRECISION})
endif()
if (NOT DEFINED NO_CBLAS)
set(NO_CBLAS_IN 0)
else()
set(NO_CBLAS_IN ${NO_CBLAS})
endif()
if (NOT DEFINED NO_LAPACK)
set(NO_LAPACK_IN 0)
else()
set(NO_LAPACK_IN ${NO_LAPACK})
endif()
if (NOT DEFINED NO_LAPACKE)
set(NO_LAPACKE_IN 0)
else()
set(NO_LAPACKE_IN ${NO_LAPACKE})
endif()
if (NOT DEFINED NEED2UNDERSCORES)
set(NEED2UNDERSCORES_IN 0)
else()
set(NEED2UNDERSCORES_IN ${NEED2UNDERSCORES})
endif()
if (NOT DEFINED ONLY_CBLAS)
set(ONLY_CBLAS_IN 0)
else()
set(ONLY_CBLAS_IN ${ONLY_CBLAS})
endif()
add_custom_command(
TARGET ${OpenBLAS_LIBNAME} PRE_LINK
COMMAND perl
ARGS "${CMAKE_SOURCE_DIR}/exports/gensymbol" "win2k" "${ARCH_IN}" "dummy" "${EXPRECISION_IN}" "${NO_CBLAS_IN}" "${NO_LAPACK_IN}" "${NO_LAPACKE_IN}" "${NEED2UNDERSCORES_IN}" "${ONLY_CBLAS_IN}" "${SYMBOLPREFIX}" "${SYMBOLSUFFIX}" > "${PROJECT_BINARY_DIR}/openblas.def"
COMMENT "Create openblas.def file"
VERBATIM)
endif()

View File

@ -25,7 +25,10 @@ if (MSVC)
include(CMakeForceCompiler) include(CMakeForceCompiler)
CMAKE_FORCE_Fortran_COMPILER(gfortran GNU) CMAKE_FORCE_Fortran_COMPILER(gfortran GNU)
endif () endif ()
if (NOT NO_LAPACK)
enable_language(Fortran) enable_language(Fortran)
endif()
if (NOT ONLY_CBLAS) if (NOT ONLY_CBLAS)
# N.B. f_check is not cross-platform, so instead try to use CMake variables # N.B. f_check is not cross-platform, so instead try to use CMake variables

View File

@ -99,10 +99,10 @@ macro(SetDefaultL1)
set(QGEMVTKERNEL gemv_t.S) set(QGEMVTKERNEL gemv_t.S)
set(XGEMVNKERNEL zgemv_n.S) set(XGEMVNKERNEL zgemv_n.S)
set(XGEMVTKERNEL zgemv_t.S) set(XGEMVTKERNEL zgemv_t.S)
set(SCABS_KERNEL cabs.S) set(SCABS_KERNEL ../generic/cabs.c)
set(DCABS_KERNEL cabs.S) set(DCABS_KERNEL ../generic/cabs.S)
set(QCABS_KERNEL cabs.S) set(QCABS_KERNEL ../generic/cabs.S)
set(LSAME_KERNEL lsame.S) set(LSAME_KERNEL ../generic/lsame.c)
set(SAXPBYKERNEL ../arm/axpby.c) set(SAXPBYKERNEL ../arm/axpby.c)
set(DAXPBYKERNEL ../arm/axpby.c) set(DAXPBYKERNEL ../arm/axpby.c)
set(CAXPBYKERNEL ../arm/zaxpby.c) set(CAXPBYKERNEL ../arm/zaxpby.c)
@ -156,3 +156,10 @@ macro(SetDefaultL2)
set(XHEMV_V_KERNEL ../generic/zhemv_k.c) set(XHEMV_V_KERNEL ../generic/zhemv_k.c)
set(XHEMV_M_KERNEL ../generic/zhemv_k.c) set(XHEMV_M_KERNEL ../generic/zhemv_k.c)
endmacro () endmacro ()
macro(SetDefaultL3)
set(SGEADD_KERNEL ../generic/geadd.c)
set(DGEADD_KERNEL ../generic/geadd.c)
set(CGEADD_KERNEL ../generic/zgeadd.c)
set(ZGEADD_KERNEL ../generic/zgeadd.c)
endmacro ()

View File

@ -66,6 +66,11 @@ if (NOT MSVC)
list(APPEND GETARCH_SRC ${CMAKE_SOURCE_DIR}/cpuid.S) list(APPEND GETARCH_SRC ${CMAKE_SOURCE_DIR}/cpuid.S)
endif () endif ()
if (MSVC)
#Use generic for MSVC now
set(GETARCH_FLAGS ${GETARCH_FLAGS} -DFORCE_GENERIC)
endif()
set(GETARCH_DIR "${PROJECT_BINARY_DIR}/getarch_build") set(GETARCH_DIR "${PROJECT_BINARY_DIR}/getarch_build")
set(GETARCH_BIN "getarch${CMAKE_EXECUTABLE_SUFFIX}") set(GETARCH_BIN "getarch${CMAKE_EXECUTABLE_SUFFIX}")
file(MAKE_DIRECTORY ${GETARCH_DIR}) file(MAKE_DIRECTORY ${GETARCH_DIR})
@ -73,7 +78,7 @@ try_compile(GETARCH_RESULT ${GETARCH_DIR}
SOURCES ${GETARCH_SRC} SOURCES ${GETARCH_SRC}
COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} -I${CMAKE_SOURCE_DIR} COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} -I${CMAKE_SOURCE_DIR}
OUTPUT_VARIABLE GETARCH_LOG OUTPUT_VARIABLE GETARCH_LOG
COPY_FILE ${GETARCH_BIN} COPY_FILE ${PROJECT_BINARY_DIR}/${GETARCH_BIN}
) )
message(STATUS "Running getarch") message(STATUS "Running getarch")
@ -95,7 +100,7 @@ try_compile(GETARCH2_RESULT ${GETARCH2_DIR}
SOURCES ${CMAKE_SOURCE_DIR}/getarch_2nd.c SOURCES ${CMAKE_SOURCE_DIR}/getarch_2nd.c
COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} ${GETARCH2_FLAGS} -I${CMAKE_SOURCE_DIR} COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} ${GETARCH2_FLAGS} -I${CMAKE_SOURCE_DIR}
OUTPUT_VARIABLE GETARCH2_LOG OUTPUT_VARIABLE GETARCH2_LOG
COPY_FILE ${GETARCH2_BIN} COPY_FILE ${PROJECT_BINARY_DIR}/${GETARCH2_BIN}
) )
# use the cmake binary w/ the -E param to run a shell command in a cross-platform way # use the cmake binary w/ the -E param to run a shell command in a cross-platform way

View File

@ -420,6 +420,21 @@ if (ONLY_CBLAS)
set(LIB_COMPONENTS CBLAS) set(LIB_COMPONENTS CBLAS)
endif () endif ()
# For GEMM3M
set(USE_GEMM3M 0)
if (DEFINED ARCH)
if (${ARCH} STREQUAL "x86" OR ${ARCH} STREQUAL "x86_64" OR ${ARCH} STREQUAL "ia64" OR ${ARCH} STREQUAL "MIPS")
set(USE_GEMM3M 1)
endif ()
if (${CORE} STREQUAL "generic")
set(USE_GEMM3M 0)
endif ()
endif ()
#export OSNAME #export OSNAME
#export ARCH #export ARCH
#export CORE #export CORE

View File

@ -102,6 +102,7 @@ endfunction ()
# 1 - compiles the sources for non-complex types only (SINGLE/DOUBLE) # 1 - compiles the sources for non-complex types only (SINGLE/DOUBLE)
# 2 - compiles for complex types only (COMPLEX/DOUBLE COMPLEX) # 2 - compiles for complex types only (COMPLEX/DOUBLE COMPLEX)
# 3 - compiles for all types, but changes source names for complex by prepending z (e.g. axpy.c becomes zaxpy.c) # 3 - compiles for all types, but changes source names for complex by prepending z (e.g. axpy.c becomes zaxpy.c)
# 4 - compiles for complex types only, but changes source names for complex by prepending z (e.g. hemv.c becomes zhemv.c)
# STRING - compiles only the given type (e.g. DOUBLE) # STRING - compiles only the given type (e.g. DOUBLE)
function(GenerateNamedObjects sources_in) function(GenerateNamedObjects sources_in)
@ -151,6 +152,9 @@ function(GenerateNamedObjects sources_in)
set(complex_only true) set(complex_only true)
elseif (${ARGV7} EQUAL 3) elseif (${ARGV7} EQUAL 3)
set(mangle_complex_sources true) set(mangle_complex_sources true)
elseif (${ARGV7} EQUAL 4)
set(mangle_complex_sources true)
set(complex_only true)
elseif (NOT ${ARGV7} EQUAL 0) elseif (NOT ${ARGV7} EQUAL 0)
set(float_list ${ARGV7}) set(float_list ${ARGV7})
endif () endif ()

View File

@ -296,13 +296,6 @@ typedef int blasint;
#define COMPSIZE 2 #define COMPSIZE 2
#endif #endif
#if defined(C_PGI) || defined(C_SUN)
#define CREAL(X) (*((FLOAT *)&X + 0))
#define CIMAG(X) (*((FLOAT *)&X + 1))
#else
#define CREAL __real__
#define CIMAG __imag__
#endif
#define Address_H(x) (((x)+(1<<15))>>16) #define Address_H(x) (((x)+(1<<15))>>16)
#define Address_L(x) ((x)-((Address_H(x))<<16)) #define Address_L(x) ((x)-((Address_H(x))<<16))
@ -464,17 +457,49 @@ typedef char* env_var_t;
extension since version 3.0. If neither are available, use a compatible extension since version 3.0. If neither are available, use a compatible
structure as fallback (see Clause 6.2.5.13 of the C99 standard). */ structure as fallback (see Clause 6.2.5.13 of the C99 standard). */
#if (defined(__STDC_IEC_559_COMPLEX__) || __STDC_VERSION__ >= 199901L || \ #if (defined(__STDC_IEC_559_COMPLEX__) || __STDC_VERSION__ >= 199901L || \
(__GNUC__ >= 3 && !defined(__cplusplus))) (__GNUC__ >= 3 && !defined(__cplusplus)) || \
_MSC_VER >= 1800) // Visual Studio 2013 supports complex
#define OPENBLAS_COMPLEX_C99 #define OPENBLAS_COMPLEX_C99
typedef float _Complex openblas_complex_float; typedef float _Complex openblas_complex_float;
typedef double _Complex openblas_complex_double; typedef double _Complex openblas_complex_double;
typedef xdouble _Complex openblas_complex_xdouble; typedef xdouble _Complex openblas_complex_xdouble;
#define openblas_make_complex_float(real, imag) ((real) + ((imag) * _Complex_I))
#define openblas_make_complex_double(real, imag) ((real) + ((imag) * _Complex_I))
#define openblas_make_complex_xdouble(real, imag) ((real) + ((imag) * _Complex_I))
#else #else
#define OPENBLAS_COMPLEX_STRUCT #define OPENBLAS_COMPLEX_STRUCT
typedef struct { float real, imag; } openblas_complex_float; typedef struct { float real, imag; } openblas_complex_float;
typedef struct { double real, imag; } openblas_complex_double; typedef struct { double real, imag; } openblas_complex_double;
typedef struct { xdouble real, imag; } openblas_complex_xdouble; typedef struct { xdouble real, imag; } openblas_complex_xdouble;
#define openblas_make_complex_float(real, imag) {(real), (imag)}
#define openblas_make_complex_double(real, imag) {(real), (imag)}
#define openblas_make_complex_xdouble(real, imag) {(real), (imag)}
#endif #endif
#ifdef XDOUBLE
#define OPENBLAS_COMPLEX_FLOAT openblas_complex_xdouble
#define OPENBLAS_MAKE_COMPLEX_FLOAT(r,i) openblas_make_complex_xdouble(r,i)
#elif defined(DOUBLE)
#define OPENBLAS_COMPLEX_FLOAT openblas_complex_double
#define OPENBLAS_MAKE_COMPLEX_FLOAT(r,i) openblas_make_complex_double(r,i)
#else
#define OPENBLAS_COMPLEX_FLOAT openblas_complex_float
#define OPENBLAS_MAKE_COMPLEX_FLOAT(r,i) openblas_make_complex_float(r,i)
#endif
#if defined(C_PGI) || defined(C_SUN)
#define CREAL(X) (*((FLOAT *)&X + 0))
#define CIMAG(X) (*((FLOAT *)&X + 1))
#else
#ifdef OPENBLAS_COMPLEX_STRUCT
#define CREAL(Z) ((Z).real)
#define CIMAG(Z) ((Z).imag)
#else
#define CREAL __real__
#define CIMAG __imag__
#endif
#endif
#endif // ASSEMBLER #endif // ASSEMBLER
#ifndef IFLUSH #ifndef IFLUSH
@ -491,6 +516,10 @@ typedef char* env_var_t;
#endif #endif
#endif #endif
#if defined(C_MSVC)
#define inline __inline
#endif
#ifndef ASSEMBLER #ifndef ASSEMBLER
#ifndef MIN #ifndef MIN

View File

@ -41,6 +41,10 @@
#ifndef ASSEMBLER #ifndef ASSEMBLER
#ifdef C_MSVC
#include <intrin.h>
#endif
#ifdef C_SUN #ifdef C_SUN
#define __asm__ __asm #define __asm__ __asm
#define __volatile__ #define __volatile__
@ -61,30 +65,39 @@
static void __inline blas_lock(volatile BLASULONG *address){ static void __inline blas_lock(volatile BLASULONG *address){
int ret; BLASULONG ret;
do { do {
while (*address) {YIELDING;}; while (*address) {YIELDING;};
#ifndef C_MSVC
__asm__ __volatile__( __asm__ __volatile__(
"xchgl %0, %1\n" "xchgl %0, %1\n"
: "=r"(ret), "=m"(*address) : "=r"(ret), "=m"(*address)
: "0"(1), "m"(*address) : "0"(1), "m"(*address)
: "memory"); : "memory");
#else
ret=InterlockedExchange64((volatile LONG64 *)(address), 1);
#endif
} while (ret); } while (ret);
} }
static __inline BLASULONG rpcc(void){ static __inline BLASULONG rpcc(void){
#ifdef C_MSVC
return __rdtsc();
#else
BLASULONG a, d; BLASULONG a, d;
__asm__ __volatile__ ("rdtsc" : "=a" (a), "=d" (d)); __asm__ __volatile__ ("rdtsc" : "=a" (a), "=d" (d));
return ((BLASULONG)a + ((BLASULONG)d << 32)); return ((BLASULONG)a + ((BLASULONG)d << 32));
#endif
} }
#define RPCC64BIT #define RPCC64BIT
#ifndef C_MSVC
static __inline BLASULONG getstackaddr(void){ static __inline BLASULONG getstackaddr(void){
BLASULONG addr; BLASULONG addr;
@ -93,22 +106,32 @@ static __inline BLASULONG getstackaddr(void){
return addr; return addr;
} }
#endif
static __inline void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx){ static __inline void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx){
#ifdef C_MSVC
int cpuinfo[4];
__cpuid(cpuinfo, op);
*eax=cpuinfo[0];
*ebx=cpuinfo[1];
*ecx=cpuinfo[2];
*edx=cpuinfo[3];
#else
__asm__ __volatile__("cpuid" __asm__ __volatile__("cpuid"
: "=a" (*eax), : "=a" (*eax),
"=b" (*ebx), "=b" (*ebx),
"=c" (*ecx), "=c" (*ecx),
"=d" (*edx) "=d" (*edx)
: "0" (op)); : "0" (op));
#endif
} }
/* /*
#define WHEREAMI #define WHEREAMI
*/ */
static inline int WhereAmI(void){ static __inline int WhereAmI(void){
int eax, ebx, ecx, edx; int eax, ebx, ecx, edx;
int apicid; int apicid;
@ -150,10 +173,14 @@ static inline int WhereAmI(void){
#define GET_IMAGE_CANCEL #define GET_IMAGE_CANCEL
#ifdef SMP #ifdef SMP
#ifdef USE64BITINT #if defined(USE64BITINT)
static __inline blasint blas_quickdivide(blasint x, blasint y){ static __inline blasint blas_quickdivide(blasint x, blasint y){
return x / y; return x / y;
} }
#elif defined (C_MSVC)
static __inline BLASLONG blas_quickdivide(BLASLONG x, BLASLONG y){
return x / y;
}
#else #else
extern unsigned int blas_quick_divide_table[]; extern unsigned int blas_quick_divide_table[];

View File

@ -46,12 +46,28 @@ set(NU_SMP_SOURCES
tbmv_thread.c tbmv_thread.c
) )
set(ULVM_COMPLEX_SOURCES
hbmv_k.c
hpmv_k.c
hpr_k.c
hpr2_k.c
her_k.c
her2_k.c
)
# objects that need LOWER set # objects that need LOWER set
GenerateCombinationObjects("${UL_SOURCES}" "LOWER" "U" "" 1 "" "" 3) GenerateCombinationObjects("${UL_SOURCES}" "LOWER" "U" "" 1 "" "" 3)
# gbmv uses a lowercase n and t # gbmv uses a lowercase n and t
GenerateNamedObjects("gbmv_k.c" "" "gbmv_n" false "" "" "" 3) GenerateNamedObjects("gbmv_k.c" "" "gbmv_n" false "" "" "" 3)
GenerateNamedObjects("gbmv_k.c" "TRANS" "gbmv_t" false "" "" "" 3) GenerateNamedObjects("gbmv_k.c" "TRANS" "gbmv_t" false "" "" "" 3)
# c/zgbmv
GenerateNamedObjects("zgbmv_k.c" "CONJ" "gbmv_r" false "" "" "" 2)
GenerateNamedObjects("zgbmv_k.c" "TRANS;CONJ" "gbmv_c" false "" "" "" 2)
GenerateNamedObjects("zgbmv_k.c" "XCONJ" "gbmv_o" false "" "" "" 2)
GenerateNamedObjects("zgbmv_k.c" "TRANS;XCONJ" "gbmv_u" false "" "" "" 2)
GenerateNamedObjects("zgbmv_k.c" "CONJ;XCONJ" "gbmv_s" false "" "" "" 2)
GenerateNamedObjects("zgbmv_k.c" "TRANS;CONJ;XCONJ" "gbmv_d" false "" "" "" 2)
# special defines for complex # special defines for complex
foreach (float_type ${FLOAT_TYPES}) foreach (float_type ${FLOAT_TYPES})
@ -82,6 +98,14 @@ foreach (float_type ${FLOAT_TYPES})
GenerateCombinationObjects("z${l_source}" "UNIT" "N" "TRANSA=4" 0 "${op_name}_CU" false ${float_type}) GenerateCombinationObjects("z${l_source}" "UNIT" "N" "TRANSA=4" 0 "${op_name}_CU" false ${float_type})
endforeach () endforeach ()
foreach (ulvm_source ${ULVM_COMPLEX_SOURCES})
string(REGEX MATCH "[a-z0-9]+" op_name ${ulvm_source})
GenerateNamedObjects("z${ulvm_source}" "" "${op_name}_U" false "" "" false ${float_type})
GenerateNamedObjects("z${ulvm_source}" "LOWER" "${op_name}_L" false "" "" false ${float_type})
GenerateNamedObjects("z${ulvm_source}" "HEMVREV" "${op_name}_V" false "" "" false ${float_type})
GenerateNamedObjects("z${ulvm_source}" "LOWER;HEMVREV" "${op_name}_M" false "" "" false ${float_type})
endforeach()
if (SMP) if (SMP)
GenerateNamedObjects("gemv_thread.c" "CONJ" "gemv_thread_r" false "" "" false ${float_type}) GenerateNamedObjects("gemv_thread.c" "CONJ" "gemv_thread_r" false "" "" false ${float_type})
@ -103,6 +127,41 @@ foreach (float_type ${FLOAT_TYPES})
GenerateNamedObjects("ger_thread.c" "XCONJ" "ger_thread_V" false "" "" false ${float_type}) GenerateNamedObjects("ger_thread.c" "XCONJ" "ger_thread_V" false "" "" false ${float_type})
GenerateNamedObjects("ger_thread.c" "XCONJ;CONJ" "ger_thread_D" false "" "" false ${float_type}) GenerateNamedObjects("ger_thread.c" "XCONJ;CONJ" "ger_thread_D" false "" "" false ${float_type})
GenerateNamedObjects("sbmv_thread.c" "HEMV" "hbmv_thread_U" false "" "" false ${float_type})
GenerateNamedObjects("sbmv_thread.c" "HEMV;LOWER" "hbmv_thread_L" false "" "" false ${float_type})
GenerateNamedObjects("sbmv_thread.c" "HEMVREV" "hbmv_thread_V" false "" "" false ${float_type})
GenerateNamedObjects("sbmv_thread.c" "LOWER;HEMVREV" "hbmv_thread_M" false "" "" false ${float_type})
GenerateNamedObjects("spmv_thread.c" "HEMV" "hpmv_thread_U" false "" "" false ${float_type})
GenerateNamedObjects("spmv_thread.c" "HEMV;LOWER" "hpmv_thread_L" false "" "" false ${float_type})
GenerateNamedObjects("spmv_thread.c" "HEMVREV" "hpmv_thread_V" false "" "" false ${float_type})
GenerateNamedObjects("spmv_thread.c" "LOWER;HEMVREV" "hpmv_thread_M" false "" "" false ${float_type})
GenerateNamedObjects("spr_thread.c" "HEMV" "hpr_thread_U" false "" "" false ${float_type})
GenerateNamedObjects("spr_thread.c" "HEMV;LOWER" "hpr_thread_L" false "" "" false ${float_type})
GenerateNamedObjects("spr_thread.c" "HEMVREV" "hpr_thread_V" false "" "" false ${float_type})
GenerateNamedObjects("spr_thread.c" "LOWER;HEMVREV" "hpr_thread_M" false "" "" false ${float_type})
GenerateNamedObjects("spr2_thread.c" "HEMV" "hpr2_thread_U" false "" "" false ${float_type})
GenerateNamedObjects("spr2_thread.c" "HEMV;LOWER" "hpr2_thread_L" false "" "" false ${float_type})
GenerateNamedObjects("spr2_thread.c" "HEMVREV" "hpr2_thread_V" false "" "" false ${float_type})
GenerateNamedObjects("spr2_thread.c" "LOWER;HEMVREV" "hpr2_thread_M" false "" "" false ${float_type})
GenerateNamedObjects("symv_thread.c" "HEMV" "hemv_thread_U" false "" "" false ${float_type})
GenerateNamedObjects("symv_thread.c" "HEMV;LOWER" "hemv_thread_L" false "" "" false ${float_type})
GenerateNamedObjects("symv_thread.c" "HEMVREV" "hemv_thread_V" false "" "" false ${float_type})
GenerateNamedObjects("symv_thread.c" "LOWER;HEMVREV" "hemv_thread_M" false "" "" false ${float_type})
GenerateNamedObjects("syr_thread.c" "HER" "her_thread_U" false "" "" false ${float_type})
GenerateNamedObjects("syr_thread.c" "HER;LOWER" "her_thread_L" false "" "" false ${float_type})
GenerateNamedObjects("syr_thread.c" "HEMVREV" "her_thread_V" false "" "" false ${float_type})
GenerateNamedObjects("syr_thread.c" "LOWER;HEMVREV" "her_thread_M" false "" "" false ${float_type})
GenerateNamedObjects("syr2_thread.c" "HER2" "her2_thread_U" false "" "" false ${float_type})
GenerateNamedObjects("syr2_thread.c" "HER2;LOWER" "her2_thread_L" false "" "" false ${float_type})
GenerateNamedObjects("syr2_thread.c" "HEMVREV" "her2_thread_V" false "" "" false ${float_type})
GenerateNamedObjects("syr2_thread.c" "LOWER;HEMVREV" "her2_thread_M" false "" "" false ${float_type})
foreach (nu_smp_src ${NU_SMP_SOURCES}) foreach (nu_smp_src ${NU_SMP_SOURCES})
string(REGEX MATCH "[a-z]+_[a-z]+" op_name ${nu_smp_src}) string(REGEX MATCH "[a-z]+_[a-z]+" op_name ${nu_smp_src})
GenerateCombinationObjects("${nu_smp_src}" "LOWER;UNIT" "U;N" "TRANSA=1" 0 "${op_name}_N" false ${float_type}) GenerateCombinationObjects("${nu_smp_src}" "LOWER;UNIT" "U;N" "TRANSA=1" 0 "${op_name}_N" false ${float_type})

View File

@ -64,7 +64,7 @@ static int gbmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F
#ifndef COMPLEX #ifndef COMPLEX
FLOAT result; FLOAT result;
#else #else
FLOAT _Complex result; OPENBLAS_COMPLEX_FLOAT result;
#endif #endif
#endif #endif

View File

@ -60,7 +60,7 @@ static int sbmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F
#ifndef COMPLEX #ifndef COMPLEX
FLOAT result; FLOAT result;
#else #else
FLOAT _Complex result; OPENBLAS_COMPLEX_FLOAT result;
#endif #endif
a = (FLOAT *)args -> a; a = (FLOAT *)args -> a;

View File

@ -60,7 +60,7 @@ static int spmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F
#ifndef COMPLEX #ifndef COMPLEX
FLOAT result; FLOAT result;
#else #else
FLOAT _Complex result; OPENBLAS_COMPLEX_FLOAT result;
#endif #endif
a = (FLOAT *)args -> a; a = (FLOAT *)args -> a;

View File

@ -76,7 +76,7 @@ static int trmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F
#ifndef COMPLEX #ifndef COMPLEX
FLOAT result; FLOAT result;
#else #else
FLOAT _Complex result; OPENBLAS_COMPLEX_FLOAT result;
#endif #endif
#endif #endif

View File

@ -81,7 +81,7 @@ static int tpmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F
#ifndef COMPLEX #ifndef COMPLEX
FLOAT result; FLOAT result;
#else #else
FLOAT _Complex result; OPENBLAS_COMPLEX_FLOAT result;
#endif #endif
#endif #endif

View File

@ -87,7 +87,7 @@ static int trmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F
#ifndef COMPLEX #ifndef COMPLEX
FLOAT result; FLOAT result;
#else #else
FLOAT _Complex result; OPENBLAS_COMPLEX_FLOAT result;
#endif #endif
#endif #endif

View File

@ -77,7 +77,7 @@ void CNAME(BLASLONG m, BLASLONG n, BLASLONG ku, BLASLONG kl, FLOAT alpha_r, FLOA
FLOAT *bufferY = gemvbuffer; FLOAT *bufferY = gemvbuffer;
FLOAT *bufferX = gemvbuffer; FLOAT *bufferX = gemvbuffer;
#ifdef TRANS #ifdef TRANS
FLOAT _Complex temp; OPENBLAS_COMPLEX_FLOAT temp;
#endif #endif
if (incy != 1) { if (incy != 1) {

View File

@ -56,6 +56,8 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i,
FLOAT *bufferX = sbmvbuffer; FLOAT *bufferX = sbmvbuffer;
FLOAT temp[2]; FLOAT temp[2];
OPENBLAS_COMPLEX_FLOAT result;
if (incy != 1) { if (incy != 1) {
Y = bufferY; Y = bufferY;
bufferX = (FLOAT *)(((BLASLONG)bufferY + n * sizeof(FLOAT) * COMPSIZE + 4095) & ~4095); bufferX = (FLOAT *)(((BLASLONG)bufferY + n * sizeof(FLOAT) * COMPSIZE + 4095) & ~4095);
@ -93,7 +95,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i,
Y[i * 2 + 1] += alpha_r * temp[1] + alpha_i * temp[0]; Y[i * 2 + 1] += alpha_r * temp[1] + alpha_i * temp[0];
if (length > 0) { if (length > 0) {
FLOAT _Complex result = DOTC_K(length, a + offset * COMPSIZE, 1, X + (i - length) * COMPSIZE, 1); result = DOTC_K(length, a + offset * COMPSIZE, 1, X + (i - length) * COMPSIZE, 1);
Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result); Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result);
Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result); Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result);
@ -118,7 +120,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i,
Y[i * 2 + 1] += alpha_r * temp[1] + alpha_i * temp[0]; Y[i * 2 + 1] += alpha_r * temp[1] + alpha_i * temp[0];
if (length > 0) { if (length > 0) {
FLOAT _Complex result = DOTC_K(length, a + COMPSIZE, 1, X + (i + 1) * COMPSIZE, 1); result = DOTC_K(length, a + COMPSIZE, 1, X + (i + 1) * COMPSIZE, 1);
Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result); Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result);
Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result); Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result);
@ -143,7 +145,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i,
Y[i * 2 + 1] += alpha_r * temp[1] + alpha_i * temp[0]; Y[i * 2 + 1] += alpha_r * temp[1] + alpha_i * temp[0];
if (length > 0) { if (length > 0) {
FLOAT _Complex result = DOTU_K(length, a + offset * COMPSIZE, 1, X + (i - length) * COMPSIZE, 1); result = DOTU_K(length, a + offset * COMPSIZE, 1, X + (i - length) * COMPSIZE, 1);
Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result); Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result);
Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result); Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result);
@ -168,7 +170,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i,
Y[i * 2 + 1] += alpha_r * temp[1] + alpha_i * temp[0]; Y[i * 2 + 1] += alpha_r * temp[1] + alpha_i * temp[0];
if (length > 0) { if (length > 0) {
FLOAT _Complex result = DOTU_K(length, a + COMPSIZE, 1, X + (i + 1) * COMPSIZE, 1); result = DOTU_K(length, a + COMPSIZE, 1, X + (i + 1) * COMPSIZE, 1);
Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result); Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result);
Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result); Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result);

View File

@ -51,6 +51,8 @@ int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT alpha_i,
FLOAT *bufferX = gemvbuffer; FLOAT *bufferX = gemvbuffer;
FLOAT temp[2]; FLOAT temp[2];
OPENBLAS_COMPLEX_FLOAT result;
if (incy != 1) { if (incy != 1) {
Y = bufferY; Y = bufferY;
bufferX = (FLOAT *)(((BLASLONG)bufferY + m * sizeof(FLOAT) * 2 + 4095) & ~4095); bufferX = (FLOAT *)(((BLASLONG)bufferY + m * sizeof(FLOAT) * 2 + 4095) & ~4095);
@ -69,7 +71,7 @@ int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT alpha_i,
#ifndef HEMVREV #ifndef HEMVREV
#ifndef LOWER #ifndef LOWER
if (i > 0) { if (i > 0) {
FLOAT _Complex result = DOTC_K(i, a, 1, X, 1); result = DOTC_K(i, a, 1, X, 1);
Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result); Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result);
Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result); Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result);
@ -93,7 +95,7 @@ int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT alpha_i,
#else #else
if (m - i > 1) { if (m - i > 1) {
FLOAT _Complex result = DOTC_K(m - i - 1, a + (i + 1) * 2, 1, X + (i + 1) * 2, 1); result = DOTC_K(m - i - 1, a + (i + 1) * 2, 1, X + (i + 1) * 2, 1);
Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result); Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result);
Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result); Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result);
@ -118,7 +120,7 @@ int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT alpha_i,
#else #else
#ifndef LOWER #ifndef LOWER
if (i > 0) { if (i > 0) {
FLOAT _Complex result = DOTU_K(i, a, 1, X, 1); result = DOTU_K(i, a, 1, X, 1);
Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result); Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result);
Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result); Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result);
@ -142,7 +144,7 @@ int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT alpha_i,
#else #else
if (m - i > 1) { if (m - i > 1) {
FLOAT _Complex result = DOTU_K(m - i - 1, a + (i + 1) * 2, 1, X + (i + 1) * 2, 1); result = DOTU_K(m - i - 1, a + (i + 1) * 2, 1, X + (i + 1) * 2, 1);
Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result); Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result);
Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result); Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result);

View File

@ -55,6 +55,8 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i,
FLOAT *bufferY = sbmvbuffer; FLOAT *bufferY = sbmvbuffer;
FLOAT *bufferX = sbmvbuffer; FLOAT *bufferX = sbmvbuffer;
OPENBLAS_COMPLEX_FLOAT result;
if (incy != 1) { if (incy != 1) {
Y = bufferY; Y = bufferY;
bufferX = (FLOAT *)(((BLASLONG)bufferY + n * sizeof(FLOAT) * COMPSIZE + 4095) & ~4095); bufferX = (FLOAT *)(((BLASLONG)bufferY + n * sizeof(FLOAT) * COMPSIZE + 4095) & ~4095);
@ -83,7 +85,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i,
a + offset * COMPSIZE, 1, Y + (i - length) * COMPSIZE, 1, NULL, 0); a + offset * COMPSIZE, 1, Y + (i - length) * COMPSIZE, 1, NULL, 0);
if (length > 0) { if (length > 0) {
FLOAT _Complex result = DOTU_K(length, a + offset * COMPSIZE, 1, X + (i - length) * COMPSIZE, 1); result = DOTU_K(length, a + offset * COMPSIZE, 1, X + (i - length) * COMPSIZE, 1);
Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result); Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result);
Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result); Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result);
@ -100,7 +102,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i,
a, 1, Y + i * COMPSIZE, 1, NULL, 0); a, 1, Y + i * COMPSIZE, 1, NULL, 0);
if (length > 0) { if (length > 0) {
FLOAT _Complex result = DOTU_K(length, a + COMPSIZE, 1, X + (i + 1) * COMPSIZE, 1); result = DOTU_K(length, a + COMPSIZE, 1, X + (i + 1) * COMPSIZE, 1);
Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result); Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result);
Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result); Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result);

View File

@ -49,7 +49,8 @@ int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT alpha_i,
FLOAT *gemvbuffer = (FLOAT *)buffer; FLOAT *gemvbuffer = (FLOAT *)buffer;
FLOAT *bufferY = gemvbuffer; FLOAT *bufferY = gemvbuffer;
FLOAT *bufferX = gemvbuffer; FLOAT *bufferX = gemvbuffer;
FLOAT _Complex result;
OPENBLAS_COMPLEX_FLOAT result;
if (incy != 1) { if (incy != 1) {
Y = bufferY; Y = bufferY;

View File

@ -49,7 +49,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG inc
FLOAT *B = b; FLOAT *B = b;
BLASLONG length; BLASLONG length;
#if (TRANSA == 2) || (TRANSA == 4) #if (TRANSA == 2) || (TRANSA == 4)
FLOAT _Complex temp; OPENBLAS_COMPLEX_FLOAT temp;
#endif #endif
#ifndef UNIT #ifndef UNIT
FLOAT atemp1, atemp2, btemp1, btemp2; FLOAT atemp1, atemp2, btemp1, btemp2;

View File

@ -49,7 +49,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG inc
FLOAT *B = b; FLOAT *B = b;
BLASLONG length; BLASLONG length;
#if (TRANSA == 2) || (TRANSA == 4) #if (TRANSA == 2) || (TRANSA == 4)
FLOAT _Complex temp; OPENBLAS_COMPLEX_FLOAT temp;
#endif #endif
#ifndef UNIT #ifndef UNIT
FLOAT atemp1, atemp2, btemp1, btemp2; FLOAT atemp1, atemp2, btemp1, btemp2;

View File

@ -49,7 +49,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG inc
FLOAT *B = b; FLOAT *B = b;
BLASLONG length; BLASLONG length;
#if (TRANSA == 2) || (TRANSA == 4) #if (TRANSA == 2) || (TRANSA == 4)
FLOAT _Complex temp; OPENBLAS_COMPLEX_FLOAT temp;
#endif #endif
#ifndef UNIT #ifndef UNIT
FLOAT ar, ai, br, bi, ratio, den; FLOAT ar, ai, br, bi, ratio, den;

View File

@ -49,7 +49,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG inc
FLOAT *B = b; FLOAT *B = b;
BLASLONG length; BLASLONG length;
#if (TRANSA == 2) || (TRANSA == 4) #if (TRANSA == 2) || (TRANSA == 4)
FLOAT _Complex temp; OPENBLAS_COMPLEX_FLOAT temp;
#endif #endif
#ifndef UNIT #ifndef UNIT
FLOAT ar, ai, br, bi, ratio, den; FLOAT ar, ai, br, bi, ratio, den;

View File

@ -44,7 +44,7 @@ int CNAME(BLASLONG m, FLOAT *a, FLOAT *b, BLASLONG incb, void *buffer){
BLASLONG i; BLASLONG i;
#if (TRANSA == 2) || (TRANSA == 4) #if (TRANSA == 2) || (TRANSA == 4)
FLOAT _Complex temp; OPENBLAS_COMPLEX_FLOAT temp;
#endif #endif
#ifndef UNIT #ifndef UNIT
FLOAT atemp1, atemp2, btemp1, btemp2; FLOAT atemp1, atemp2, btemp1, btemp2;

View File

@ -44,7 +44,7 @@ int CNAME(BLASLONG m, FLOAT *a, FLOAT *b, BLASLONG incb, void *buffer){
BLASLONG i; BLASLONG i;
#if (TRANSA == 2) || (TRANSA == 4) #if (TRANSA == 2) || (TRANSA == 4)
FLOAT _Complex temp; OPENBLAS_COMPLEX_FLOAT temp;
#endif #endif
#ifndef UNIT #ifndef UNIT
FLOAT atemp1, atemp2, btemp1, btemp2; FLOAT atemp1, atemp2, btemp1, btemp2;

View File

@ -46,7 +46,7 @@ int CNAME(BLASLONG m, FLOAT *a, FLOAT *b, BLASLONG incb, void *buffer){
BLASLONG i; BLASLONG i;
#if (TRANSA == 2) || (TRANSA == 4) #if (TRANSA == 2) || (TRANSA == 4)
FLOAT _Complex result; OPENBLAS_COMPLEX_FLOAT result;
#endif #endif
#ifndef UNIT #ifndef UNIT
FLOAT ar, ai, br, bi, ratio, den; FLOAT ar, ai, br, bi, ratio, den;

View File

@ -44,7 +44,7 @@ int CNAME(BLASLONG m, FLOAT *a, FLOAT *b, BLASLONG incb, void *buffer){
BLASLONG i; BLASLONG i;
#if (TRANSA == 2) || (TRANSA == 4) #if (TRANSA == 2) || (TRANSA == 4)
FLOAT _Complex result; OPENBLAS_COMPLEX_FLOAT result;
#endif #endif
#ifndef UNIT #ifndef UNIT
FLOAT ar, ai, br, bi, ratio, den; FLOAT ar, ai, br, bi, ratio, den;

View File

@ -46,7 +46,7 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, FLOAT *bu
BLASLONG i, is, min_i; BLASLONG i, is, min_i;
#if (TRANSA == 2) || (TRANSA == 4) #if (TRANSA == 2) || (TRANSA == 4)
FLOAT _Complex temp; OPENBLAS_COMPLEX_FLOAT temp;
#endif #endif
#ifndef UNIT #ifndef UNIT
FLOAT atemp1, atemp2, btemp1, btemp2; FLOAT atemp1, atemp2, btemp1, btemp2;

View File

@ -46,7 +46,7 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, FLOAT *bu
BLASLONG i, is, min_i; BLASLONG i, is, min_i;
#if (TRANSA == 2) || (TRANSA == 4) #if (TRANSA == 2) || (TRANSA == 4)
FLOAT _Complex temp; OPENBLAS_COMPLEX_FLOAT temp;
#endif #endif
#ifndef UNIT #ifndef UNIT
FLOAT atemp1, atemp2, btemp1, btemp2; FLOAT atemp1, atemp2, btemp1, btemp2;

View File

@ -46,7 +46,7 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buf
BLASLONG i, is, min_i; BLASLONG i, is, min_i;
#if (TRANSA == 2) || (TRANSA == 4) #if (TRANSA == 2) || (TRANSA == 4)
FLOAT _Complex result; OPENBLAS_COMPLEX_FLOAT result;
#endif #endif
#ifndef UNIT #ifndef UNIT
FLOAT ar, ai, br, bi, ratio, den; FLOAT ar, ai, br, bi, ratio, den;

View File

@ -46,7 +46,7 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buf
BLASLONG i, is, min_i; BLASLONG i, is, min_i;
#if (TRANSA == 2) || (TRANSA == 4) #if (TRANSA == 2) || (TRANSA == 4)
FLOAT _Complex result; OPENBLAS_COMPLEX_FLOAT result;
#endif #endif
#ifndef UNIT #ifndef UNIT
FLOAT ar, ai, br, bi, ratio, den; FLOAT ar, ai, br, bi, ratio, den;

View File

@ -1,13 +1,5 @@
include_directories(${CMAKE_SOURCE_DIR}) include_directories(${CMAKE_SOURCE_DIR})
set(USE_GEMM3M 0)
if (DEFINED ARCH)
if (${ARCH} STREQUAL "x86" OR ${ARCH} STREQUAL "x86_64" OR ${ARCH} STREQUAL "ia64" OR ${ARCH} STREQUAL "MIPS")
set(USE_GEMM3M 1)
endif ()
endif ()
# N.B. In the original makefile there was a BLOCKS define used in the compilation of these files but I don't see any evidence of it being set anywhere. -hpa # N.B. In the original makefile there was a BLOCKS define used in the compilation of these files but I don't see any evidence of it being set anywhere. -hpa
# loop through gemm.c defines # loop through gemm.c defines
@ -54,12 +46,41 @@ foreach (float_type ${FLOAT_TYPES})
GenerateCombinationObjects("trsm_L.c" "UPPER;UNIT" "L;N" "TRANS;CONJ" 0 "trsm_LC" false ${float_type}) GenerateCombinationObjects("trsm_L.c" "UPPER;UNIT" "L;N" "TRANS;CONJ" 0 "trsm_LC" false ${float_type})
GenerateCombinationObjects("trsm_R.c" "UPPER;UNIT" "L;N" "CONJ" 0 "trsm_RR" false ${float_type}) GenerateCombinationObjects("trsm_R.c" "UPPER;UNIT" "L;N" "CONJ" 0 "trsm_RR" false ${float_type})
GenerateCombinationObjects("trsm_R.c" "UPPER;UNIT" "L;N" "TRANS;CONJ" 0 "trsm_RC" false ${float_type}) GenerateCombinationObjects("trsm_R.c" "UPPER;UNIT" "L;N" "TRANS;CONJ" 0 "trsm_RC" false ${float_type})
#hemm
GenerateCombinationObjects("zhemm_k.c" "LOWER" "U" "NN" 0 "hemm_L" false ${float_type})
GenerateCombinationObjects("zhemm_k.c" "LOWER" "U" "NC;RSIDE" 0 "hemm_R" false ${float_type})
#her2k
GenerateCombinationObjects("zher2k_kernel.c" "LOWER;CONJ" "U;N" "" 2 "her2k_kernel" false ${float_type})
GenerateNamedObjects("zher2k_k.c" "HER2K" "her2k_UN" false "" "" false ${float_type})
GenerateNamedObjects("zher2k_k.c" "HER2K;TRANS;CONJ" "her2k_UC" false "" "" false ${float_type})
GenerateNamedObjects("zher2k_k.c" "HER2K;LOWER" "her2k_LN" false "" "" false ${float_type})
GenerateNamedObjects("zher2k_k.c" "HER2K;LOWER;TRANS;CONJ" "her2k_LC" false "" "" false ${float_type})
if (SMP AND NOT USE_SIMPLE_THREADED_LEVEL3)
#hemm
GenerateCombinationObjects("zhemm_k.c" "LOWER" "U" "NN;THREADED_LEVEL3" 0 "hemm_thread_L" false ${float_type})
GenerateCombinationObjects("zhemm_k.c" "LOWER" "U" "NC;RSIDE;THREADED_LEVEL3" 0 "hemm_thread_R" false ${float_type})
#her2k
GenerateNamedObjects("zher2k_k.c" "HER2K" "her2k_UN" false "" "" false ${float_type})
GenerateNamedObjects("zher2k_k.c" "HER2K;TRANS;CONJ" "her2k_UC" false "" "" false ${float_type})
GenerateNamedObjects("zher2k_k.c" "HER2K;LOWER" "her2k_LN" false "" "" false ${float_type})
GenerateNamedObjects("zher2k_k.c" "HER2K;LOWER;TRANS;CONJ" "her2k_LC" false "" "" false ${float_type})
endif()
# special gemm defines for complex # special gemm defines for complex
foreach (gemm_define ${GEMM_COMPLEX_DEFINES}) foreach (gemm_define ${GEMM_COMPLEX_DEFINES})
string(TOLOWER ${gemm_define} gemm_define_LC) string(TOLOWER ${gemm_define} gemm_define_LC)
GenerateNamedObjects("gemm.c" "${gemm_define}" "gemm_${gemm_define_LC}" false "" "" false ${float_type}) GenerateNamedObjects("gemm.c" "${gemm_define}" "gemm_${gemm_define_LC}" false "" "" false ${float_type})
if(USE_GEMM3M)
GenerateNamedObjects("gemm3m.c" "${gemm_define}" "gemm3m_${gemm_define_LC}" false "" "" false ${float_type})
endif()
if (SMP AND NOT USE_SIMPLE_THREADED_LEVEL3) if (SMP AND NOT USE_SIMPLE_THREADED_LEVEL3)
GenerateNamedObjects("gemm.c" "${gemm_define};THREADED_LEVEL3" "gemm_thread_${gemm_define_LC}" false "" "" false ${float_type}) GenerateNamedObjects("gemm.c" "${gemm_define};THREADED_LEVEL3" "gemm_thread_${gemm_define_LC}" false "" "" false ${float_type})
if(USE_GEMM3M)
GenerateNamedObjects("gemm3m.c" "${gemm_define};THREADED_LEVEL3" "gemm3m_thread_${gemm_define_LC}" false "" "" false ${float_type})
endif()
endif () endif ()
endforeach () endforeach ()
endif () endif ()

View File

@ -33,6 +33,8 @@ set(COMMON_SOURCES
xerbla.c xerbla.c
openblas_set_num_threads.c openblas_set_num_threads.c
openblas_error_handle.c openblas_error_handle.c
openblas_get_num_procs.c
openblas_get_num_threads.c
) )
# these need to have NAME/CNAME set, so use GenerateNamedObjects, but don't use standard name mangling # these need to have NAME/CNAME set, so use GenerateNamedObjects, but don't use standard name mangling

View File

@ -1,13 +1,16 @@
include_directories(${CMAKE_SOURCE_DIR}) include_directories(${CMAKE_SOURCE_DIR})
set(BLAS1_SOURCES set(BLAS1_SOURCES
copy.c copy.c
asum.c nrm2.c nrm2.c
) )
set(BLAS1_REAL_ONLY_SOURCES set(BLAS1_REAL_ONLY_SOURCES
rotm.c rotmg.c # N.B. these do not have complex counterparts rotm.c rotmg.c # N.B. these do not have complex counterparts
rot.c
asum.c
) )
# these will have 'z' prepended for the complex version # these will have 'z' prepended for the complex version
@ -15,7 +18,7 @@ set(BLAS1_MANGLED_SOURCES
axpy.c swap.c axpy.c swap.c
scal.c scal.c
dot.c dot.c
rot.c rotg.c rotg.c
axpby.c axpby.c
) )
@ -31,6 +34,13 @@ set(BLAS2_SOURCES
tpsv.c tpmv.c tpsv.c tpmv.c
) )
set(BLAS2_COMPLEX_ONLY_MANGLED_SOURCES
hemv.c hbmv.c
her.c her2.c
hpmv.c hpr.c
hpr2.c
)
# these do not have separate 'z' sources # these do not have separate 'z' sources
set(BLAS3_SOURCES set(BLAS3_SOURCES
gemm.c symm.c gemm.c symm.c
@ -39,6 +49,7 @@ set(BLAS3_SOURCES
set(BLAS3_MANGLED_SOURCES set(BLAS3_MANGLED_SOURCES
omatcopy.c imatcopy.c omatcopy.c imatcopy.c
geadd.c
) )
# generate the BLAS objs once with and once without cblas # generate the BLAS objs once with and once without cblas
@ -65,9 +76,14 @@ foreach (CBLAS_FLAG ${CBLAS_FLAGS})
GenerateNamedObjects("${BLAS1_REAL_ONLY_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false 1) GenerateNamedObjects("${BLAS1_REAL_ONLY_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false 1)
GenerateNamedObjects("${BLAS1_MANGLED_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false ${MANGLE_COMPLEX}) GenerateNamedObjects("${BLAS1_MANGLED_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false ${MANGLE_COMPLEX})
GenerateNamedObjects("${BLAS2_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false ${MANGLE_COMPLEX}) GenerateNamedObjects("${BLAS2_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false ${MANGLE_COMPLEX})
GenerateNamedObjects("${BLAS2_COMPLEX_ONLY_MANGLED_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false 4)
GenerateNamedObjects("${BLAS3_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false ${DISABLE_COMPLEX}) GenerateNamedObjects("${BLAS3_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false ${DISABLE_COMPLEX})
GenerateNamedObjects("${BLAS3_MANGLED_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false ${MANGLE_COMPLEX}) GenerateNamedObjects("${BLAS3_MANGLED_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false ${MANGLE_COMPLEX})
#sdsdot, dsdot
GenerateNamedObjects("sdsdot.c" "" "sdsdot" ${CBLAS_FLAG} "" "" true "SINGLE")
GenerateNamedObjects("dsdot.c" "" "dsdot" ${CBLAS_FLAG} "" "" true "SINGLE")
# trmm is trsm with a compiler flag set # trmm is trsm with a compiler flag set
GenerateNamedObjects("trsm.c" "TRMM" "trmm" ${CBLAS_FLAG}) GenerateNamedObjects("trsm.c" "TRMM" "trmm" ${CBLAS_FLAG})
@ -86,17 +102,36 @@ endforeach ()
# complex-specific sources # complex-specific sources
foreach (float_type ${FLOAT_TYPES}) foreach (float_type ${FLOAT_TYPES})
if (${float_type} STREQUAL "COMPLEX" OR ${float_type} STREQUAL "ZCOMPLEX") if (${float_type} STREQUAL "COMPLEX" OR ${float_type} STREQUAL "ZCOMPLEX")
GenerateNamedObjects("zger.c" "" "geru" false "" "" false ${float_type}) GenerateNamedObjects("zger.c" "" "geru" false "" "" false ${float_type})
GenerateNamedObjects("zger.c" "CONJ" "gerc" false "" "" false ${float_type}) GenerateNamedObjects("zger.c" "CONJ" "gerc" false "" "" false ${float_type})
GenerateNamedObjects("zdot.c" "CONJ" "dotc" false "" "" false ${float_type})
GenerateNamedObjects("zdot.c" "" "dotu" false "" "" false ${float_type})
GenerateNamedObjects("symm.c" "HEMM" "hemm" false "" "" false ${float_type})
GenerateNamedObjects("syrk.c" "HEMM" "herk" false "" "" false ${float_type})
GenerateNamedObjects("syr2k.c" "HEMM" "her2k" false "" "" false ${float_type})
if (USE_GEMM3M)
GenerateNamedObjects("gemm.c" "GEMM3M" "gemm3m" false "" "" false ${float_type})
endif()
endif () endif ()
if (${float_type} STREQUAL "COMPLEX") if (${float_type} STREQUAL "COMPLEX")
GenerateNamedObjects("zscal.c" "SSCAL" "sscal" false "" "" false "COMPLEX") GenerateNamedObjects("zscal.c" "SSCAL" "sscal" false "" "" false "COMPLEX")
GenerateNamedObjects("nrm2.c" "" "scnrm2" false "" "" true "COMPLEX") GenerateNamedObjects("nrm2.c" "" "scnrm2" false "" "" true "COMPLEX")
GenerateNamedObjects("zrot.c" "" "csrot" false "" "" true "COMPLEX")
GenerateNamedObjects("max.c" "USE_ABS;USE_MIN" "scamin" false "" "" true "COMPLEX")
GenerateNamedObjects("max.c" "USE_ABS" "scamax" false "" "" true "COMPLEX")
GenerateNamedObjects("asum.c" "" "scasum" false "" "" true "COMPLEX")
endif () endif ()
if (${float_type} STREQUAL "ZCOMPLEX") if (${float_type} STREQUAL "ZCOMPLEX")
GenerateNamedObjects("zscal.c" "SSCAL" "dscal" false "" "" false "ZCOMPLEX") GenerateNamedObjects("zscal.c" "SSCAL" "dscal" false "" "" false "ZCOMPLEX")
GenerateNamedObjects("nrm2.c" "" "dznrm2" false "" "" true "ZCOMPLEX") GenerateNamedObjects("nrm2.c" "" "dznrm2" false "" "" true "ZCOMPLEX")
GenerateNamedObjects("zrot.c" "" "zdrot" false "" "" true "ZCOMPLEX")
GenerateNamedObjects("max.c" "USE_ABS;USE_MIN" "dzamin" false "" "" true "ZCOMPLEX")
GenerateNamedObjects("max.c" "USE_ABS" "dzamax" false "" "" true "ZCOMPLEX")
GenerateNamedObjects("asum.c" "" "dzasum" false "" "" true "ZCOMPLEX")
endif () endif ()
endforeach () endforeach ()

View File

@ -14,8 +14,7 @@ void CNAME(FLOAT *DA, FLOAT *DB, FLOAT *C, FLOAT *S){
#endif #endif
#if defined(__i386__) || defined(__x86_64__) || defined(__ia64__) || defined(_M_X64) || defined(_M_IX86)
#if defined(__i386__) || defined(__x86_64__) || defined(__ia64__)
long double da = *DA; long double da = *DA;
long double db = *DB; long double db = *DB;

View File

@ -53,13 +53,13 @@ void CNAME(blasint n, FLOAT *ALPHA, FLOAT *x, blasint incx, FLOAT *BETA, FLOAT *
#endif #endif
if (n <= 0) return;
FLOAT alpha_r = *(ALPHA + 0); FLOAT alpha_r = *(ALPHA + 0);
FLOAT alpha_i = *(ALPHA + 1); FLOAT alpha_i = *(ALPHA + 1);
FLOAT beta_r = *(BETA + 0); FLOAT beta_r = *(BETA + 0);
FLOAT beta_i = *(BETA + 1); FLOAT beta_i = *(BETA + 1);
if (n <= 0) return;
FUNCTION_PROFILE_START(); FUNCTION_PROFILE_START();
if (incx < 0) x -= (n - 1) * incx * 2; if (incx < 0) x -= (n - 1) * incx * 2;

View File

@ -57,21 +57,25 @@
#ifdef RETURN_BY_STRUCT #ifdef RETURN_BY_STRUCT
MYTYPE NAME( blasint *N, FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY) { MYTYPE NAME( blasint *N, FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY) {
#elif defined RETURN_BY_STACK #elif defined RETURN_BY_STACK
void NAME(FLOAT _Complex *result, blasint *N, FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY) { void NAME(OPENBLAS_COMPLEX_FLOAT *result, blasint *N, FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY) {
#else #else
FLOAT _Complex NAME( blasint *N, FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY) { OPENBLAS_COMPLEX_FLOAT NAME( blasint *N, FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY) {
#endif #endif
BLASLONG n = *N; BLASLONG n = *N;
BLASLONG incx = *INCX; BLASLONG incx = *INCX;
BLASLONG incy = *INCY; BLASLONG incy = *INCY;
#ifndef RETURN_BY_STACK #ifndef RETURN_BY_STACK
FLOAT _Complex ret; OPENBLAS_COMPLEX_FLOAT ret;
#endif #endif
#ifdef RETURN_BY_STRUCT #ifdef RETURN_BY_STRUCT
MYTYPE myret; MYTYPE myret;
#endif #endif
#ifndef RETURN_BY_STRUCT
OPENBLAS_COMPLEX_FLOAT zero=OPENBLAS_MAKE_COMPLEX_FLOAT(0.0, 0.0);
#endif
PRINT_DEBUG_NAME; PRINT_DEBUG_NAME;
if (n <= 0) { if (n <= 0) {
@ -80,10 +84,10 @@ FLOAT _Complex NAME( blasint *N, FLOAT *x, blasint *INCX,
myret.i = 0.; myret.i = 0.;
return myret; return myret;
#elif defined RETURN_BY_STACK #elif defined RETURN_BY_STACK
*result = ZERO; *result = zero;
return; return;
#else #else
return ZERO; return zero;
#endif #endif
} }
@ -144,21 +148,21 @@ FLOAT _Complex NAME( blasint *N, FLOAT *x, blasint *INCX,
#else #else
#ifdef FORCE_USE_STACK #ifdef FORCE_USE_STACK
void CNAME(blasint n, FLOAT *x, blasint incx, FLOAT *y, blasint incy, FLOAT _Complex *result){ void CNAME(blasint n, FLOAT *x, blasint incx, FLOAT *y, blasint incy, OPENBLAS_COMPLEX_FLOAT *result){
#else #else
FLOAT _Complex CNAME(blasint n, FLOAT *x, blasint incx, FLOAT *y, blasint incy){ OPENBLAS_COMPLEX_FLOAT CNAME(blasint n, FLOAT *x, blasint incx, FLOAT *y, blasint incy){
FLOAT _Complex ret; OPENBLAS_COMPLEX_FLOAT ret;
#endif #endif
PRINT_DEBUG_CNAME; PRINT_DEBUG_CNAME;
if (n <= 0) { if (n <= 0) {
#ifdef FORCE_USE_STACK #ifdef FORCE_USE_STACK
*result = ZERO; *result = OPENBLAS_MAKE_COMPLEX_FLOAT(0.0, 0.0);
return; return;
#else #else
return ZERO; return OPENBLAS_MAKE_COMPLEX_FLOAT(0.0, 0.0);
#endif #endif
} }

View File

@ -79,6 +79,9 @@ void NAME(char *TRANS, blasint *M, blasint *N,
FLOAT *buffer; FLOAT *buffer;
#ifdef SMP #ifdef SMP
int nthreads; int nthreads;
int nthreads_max;
int nthreads_avail;
double MNK;
#endif #endif
int (*gemv[])(BLASLONG, BLASLONG, BLASLONG, FLOAT, FLOAT, FLOAT *, BLASLONG, int (*gemv[])(BLASLONG, BLASLONG, BLASLONG, FLOAT, FLOAT, FLOAT *, BLASLONG,
@ -91,14 +94,14 @@ void NAME(char *TRANS, blasint *M, blasint *N,
blasint lenx, leny; blasint lenx, leny;
blasint i; blasint i;
PRINT_DEBUG_NAME;
FLOAT alpha_r = *(ALPHA + 0); FLOAT alpha_r = *(ALPHA + 0);
FLOAT alpha_i = *(ALPHA + 1); FLOAT alpha_i = *(ALPHA + 1);
FLOAT beta_r = *(BETA + 0); FLOAT beta_r = *(BETA + 0);
FLOAT beta_i = *(BETA + 1); FLOAT beta_i = *(BETA + 1);
PRINT_DEBUG_NAME;
TOUPPER(trans); TOUPPER(trans);
info = 0; info = 0;
@ -153,14 +156,14 @@ void CNAME(enum CBLAS_ORDER order,
GEMV_O, GEMV_U, GEMV_S, GEMV_D, GEMV_O, GEMV_U, GEMV_S, GEMV_D,
}; };
PRINT_DEBUG_CNAME;
FLOAT alpha_r = *(ALPHA + 0); FLOAT alpha_r = *(ALPHA + 0);
FLOAT alpha_i = *(ALPHA + 1); FLOAT alpha_i = *(ALPHA + 1);
FLOAT beta_r = *(BETA + 0); FLOAT beta_r = *(BETA + 0);
FLOAT beta_i = *(BETA + 1); FLOAT beta_i = *(BETA + 1);
PRINT_DEBUG_CNAME;
trans = -1; trans = -1;
info = 0; info = 0;
@ -234,10 +237,10 @@ void CNAME(enum CBLAS_ORDER order,
#ifdef SMP #ifdef SMP
int nthreads_max = num_cpu_avail(2); nthreads_max = num_cpu_avail(2);
int nthreads_avail = nthreads_max; nthreads_avail = nthreads_max;
double MNK = (double) m * (double) n; MNK = (double) m * (double) n;
if ( MNK <= ( 256.0 * (double) (GEMM_MULTITHREAD_THRESHOLD * GEMM_MULTITHREAD_THRESHOLD) )) if ( MNK <= ( 256.0 * (double) (GEMM_MULTITHREAD_THRESHOLD * GEMM_MULTITHREAD_THRESHOLD) ))
nthreads_max = 1; nthreads_max = 1;

View File

@ -6,13 +6,7 @@
void NAME(FLOAT *DA, FLOAT *DB, FLOAT *C, FLOAT *S){ void NAME(FLOAT *DA, FLOAT *DB, FLOAT *C, FLOAT *S){
PRINT_DEBUG_NAME; #if defined(__i386__) || defined(__x86_64__) || defined(__ia64__) || defined(_M_X64) || defined(_M_IX86)
IDEBUG_START;
FUNCTION_PROFILE_START();
#if defined(__i386__) || defined(__x86_64__) || defined(__ia64__)
long double da_r = *(DA + 0); long double da_r = *(DA + 0);
long double da_i = *(DA + 1); long double da_i = *(DA + 1);
@ -22,6 +16,12 @@ void NAME(FLOAT *DA, FLOAT *DB, FLOAT *C, FLOAT *S){
long double ada = fabs(da_r) + fabs(da_i); long double ada = fabs(da_r) + fabs(da_i);
PRINT_DEBUG_NAME;
IDEBUG_START;
FUNCTION_PROFILE_START();
if (ada == ZERO) { if (ada == ZERO) {
*C = ZERO; *C = ZERO;
*(S + 0) = ONE; *(S + 0) = ONE;
@ -54,6 +54,12 @@ void NAME(FLOAT *DA, FLOAT *DB, FLOAT *C, FLOAT *S){
FLOAT ada = fabs(da_r) + fabs(da_i); FLOAT ada = fabs(da_r) + fabs(da_i);
FLOAT adb; FLOAT adb;
PRINT_DEBUG_NAME;
IDEBUG_START;
FUNCTION_PROFILE_START();
if (ada == ZERO) { if (ada == ZERO) {
*C = ZERO; *C = ZERO;
*(S + 0) = ONE; *(S + 0) = ONE;

View File

@ -17,6 +17,7 @@ endif ()
SetDefaultL1() SetDefaultL1()
SetDefaultL2() SetDefaultL2()
SetDefaultL3()
ParseMakefileVars("${KERNELDIR}/KERNEL") ParseMakefileVars("${KERNELDIR}/KERNEL")
ParseMakefileVars("${KERNELDIR}/KERNEL.${TARGET_CORE}") ParseMakefileVars("${KERNELDIR}/KERNEL.${TARGET_CORE}")
@ -65,8 +66,20 @@ foreach (float_type ${FLOAT_TYPES})
else () else ()
GenerateNamedObjects("${KERNELDIR}/${${float_char}DOTKERNEL}" "" "dot_k" false "" "" false ${float_type}) GenerateNamedObjects("${KERNELDIR}/${${float_char}DOTKERNEL}" "" "dot_k" false "" "" false ${float_type})
endif () endif ()
if (${float_type} STREQUAL "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${${float_char}ROTKERNEL}" "" "srot_k" false "" "" false ${float_type})
endif()
if (${float_type} STREQUAL "ZCOMPLEX")
GenerateNamedObjects("${KERNELDIR}/${${float_char}ROTKERNEL}" "" "drot_k" false "" "" false ${float_type})
endif()
endforeach () endforeach ()
#dsdot,sdsdot
GenerateNamedObjects("${KERNELDIR}/${DSDOTKERNEL}" "DSDOT" "d*dot_k" false "" "" false "SINGLE")
GenerateNamedObjects("${KERNELDIR}/${DSDOTKERNEL}" "DSDOT" "dsdot_k" false "" "" false "SINGLE")
# Makefile.L2 # Makefile.L2
GenerateCombinationObjects("generic/symv_k.c" "LOWER" "U" "" 1 "" "" 3) GenerateCombinationObjects("generic/symv_k.c" "LOWER" "U" "" 1 "" "" 3)
GenerateNamedObjects("generic/ger.c" "" "ger_k" false "" "" "" 3) GenerateNamedObjects("generic/ger.c" "" "ger_k" false "" "" "" 3)
@ -86,6 +99,12 @@ foreach (float_type ${FLOAT_TYPES})
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVTKERNEL}" "XCONJ;TRANSA" "gemv_u" false "" "" false ${float_type}) GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVTKERNEL}" "XCONJ;TRANSA" "gemv_u" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVNKERNEL}" "XCONJ;CONJ" "gemv_s" false "" "" false ${float_type}) GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVNKERNEL}" "XCONJ;CONJ" "gemv_s" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVTKERNEL}" "XCONJ;CONJ;TRANSA" "gemv_d" false "" "" false ${float_type}) GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVTKERNEL}" "XCONJ;CONJ;TRANSA" "gemv_d" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}HEMV_U_KERNEL}" "HEMV" "hemv_U" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}HEMV_L_KERNEL}" "HEMV;LOWER" "hemv_L" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}HEMV_V_KERNEL}" "HEMV;HEMVREV" "hemv_V" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}HEMV_M_KERNEL}" "HEMV;HEMVREV;LOWER" "hemv_M" false "" "" false ${float_type})
else () else ()
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVNKERNEL}" "" "gemv_n" false "" "" false ${float_type}) GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVNKERNEL}" "" "gemv_n" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVTKERNEL}" "TRANS" "gemv_t" false "" "" false ${float_type}) GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVTKERNEL}" "TRANS" "gemv_t" false "" "" false ${float_type})
@ -93,14 +112,9 @@ foreach (float_type ${FLOAT_TYPES})
endforeach () endforeach ()
# Makefile.L3 # Makefile.L3
set(USE_GEMM3M false)
set(USE_TRMM false) set(USE_TRMM false)
if (${ARCH} STREQUAL "x86" OR ${ARCH} STREQUAL "x86_64" OR ${ARCH} STREQUAL "ia64" OR ${ARCH} STREQUAL "MIPS") if (${ARCH} STREQUAL "arm" OR ${ARCH} STREQUAL "arm64" OR "${TARGET}" STREQUAL "LONGSOON3B" OR "${TARGET}" STREQUAL "GENERIC" OR "${CORE}" STREQUAL "generic")
set(USE_GEMM3M true)
endif ()
if (${ARCH} STREQUAL "arm" OR ${ARCH} STREQUAL "arm64" OR "${TARGET}" STREQUAL "LONGSOON3B" OR "${TARGET}" STREQUAL "GENERIC")
set(USE_TRMM true) set(USE_TRMM true)
endif () endif ()
@ -155,6 +169,13 @@ foreach (float_type ${FLOAT_TYPES})
GenerateNamedObjects("${KERNELDIR}/${${float_char}TRSMKERNEL_RN}" "UPPER;RN;TRSMKERNEL;CONJ" "trsm_kernel_RR" false "" "" false ${float_type}) GenerateNamedObjects("${KERNELDIR}/${${float_char}TRSMKERNEL_RN}" "UPPER;RN;TRSMKERNEL;CONJ" "trsm_kernel_RR" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}TRSMKERNEL_RT}" "UPPER;RN;TRSMKERNEL;CONJ" "trsm_kernel_RC" false "" "" false ${float_type}) GenerateNamedObjects("${KERNELDIR}/${${float_char}TRSMKERNEL_RT}" "UPPER;RN;TRSMKERNEL;CONJ" "trsm_kernel_RC" false "" "" false ${float_type})
#hemm
GenerateNamedObjects("generic/zhemm_utcopy_${${float_char}GEMM_UNROLL_M}.c" "" "hemm_iutcopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/zhemm_ltcopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER" "hemm_iltcopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/zhemm_utcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER" "hemm_outcopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/zhemm_ltcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER" "hemm_oltcopy" false "" "" false ${float_type})
else () else ()
GenerateCombinationObjects("${KERNELDIR}/${TRMM_KERNEL}" "LEFT;TRANSA" "R;N" "TRMMKERNEL" 2 "trmm_kernel" false ${float_type}) GenerateCombinationObjects("${KERNELDIR}/${TRMM_KERNEL}" "LEFT;TRANSA" "R;N" "TRMMKERNEL" 2 "trmm_kernel" false ${float_type})
endif () endif ()
@ -241,11 +262,40 @@ foreach (float_type ${FLOAT_TYPES})
endif () endif ()
endif () endif ()
GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_CN}" "" "domatcopy_k_cn" false "" "" false ${float_type}) GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_CN}" "" "omatcopy_k_cn" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_RN}" "ROWM" "domatcopy_k_rn" false "" "" false ${float_type}) GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_RN}" "ROWM" "omatcopy_k_rn" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_CT}" "" "domatcopy_k_ct" false "" "" false ${float_type}) GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_CT}" "" "omatcopy_k_ct" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_RT}" "ROWM" "domatcopy_k_rt" false "" "" false ${float_type}) GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_RT}" "ROWM" "omatcopy_k_rt" false "" "" false ${float_type})
if (NOT DEFINED ${float_char}OMATCOPY_CNC)
if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C")
set(${float_char}OMATCOPY_CNC ../arm/zomatcopy_cnc.c)
endif ()
endif ()
if (NOT DEFINED ${float_char}OMATCOPY_RNC)
if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C")
set(${float_char}OMATCOPY_RNC ../arm/zomatcopy_rnc.c)
endif ()
endif ()
if (NOT DEFINED ${float_char}OMATCOPY_CTC)
if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C")
set(${float_char}OMATCOPY_CTC ../arm/zomatcopy_ctc.c)
endif ()
endif ()
if (NOT DEFINED ${float_char}OMATCOPY_RTC)
if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C")
set(${float_char}OMATCOPY_RTC ../arm/zomatcopy_rtc.c)
endif ()
endif ()
if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C")
GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_CNC}" "CONJ" "omatcopy_k_cnc" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_RNC}" "CONJ;ROWM" "omatcopy_k_rnc" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_CTC}" "CONJ" "omatcopy_k_ctc" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_RTC}" "CONJ;ROWM" "omatcopy_k_rtc" false "" "" false ${float_type})
endif()
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEADD_KERNEL}" "" "geadd_k" false "" "" false ${float_type})
endforeach () endforeach ()
# Makefile.LA # Makefile.LA

View File

@ -3459,7 +3459,7 @@ ifndef DGEADD_K
DGEADD_K = ../generic/geadd.c DGEADD_K = ../generic/geadd.c
endif endif
$(KDIR)dgeadd_k$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEADD_K) $(KDIR)dgeadd_k$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DGEADD_K)
$(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX -UROWM $< -o $@ $(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX -UROWM $< -o $@
ifndef CGEADD_K ifndef CGEADD_K

View File

@ -38,13 +38,16 @@ int CNAME(BLASLONG n, FLOAT alpha_r, FLOAT alpha_i, FLOAT *x, BLASLONG inc_x, FL
BLASLONG ix,iy; BLASLONG ix,iy;
FLOAT temp; FLOAT temp;
BLASLONG inc_x2;
BLASLONG inc_y2;
if ( n < 0 ) return(0); if ( n < 0 ) return(0);
ix = 0; ix = 0;
iy = 0; iy = 0;
BLASLONG inc_x2 = 2 * inc_x; inc_x2 = 2 * inc_x;
BLASLONG inc_y2 = 2 * inc_y; inc_y2 = 2 * inc_y;
if ( beta_r == 0.0 && beta_i == 0.0) if ( beta_r == 0.0 && beta_i == 0.0)
{ {

View File

@ -41,6 +41,8 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i,
{ {
BLASLONG i=0; BLASLONG i=0;
BLASLONG ix,iy; BLASLONG ix,iy;
BLASLONG inc_x2;
BLASLONG inc_y2;
if ( n < 0 ) return(0); if ( n < 0 ) return(0);
if ( da_r == 0.0 && da_i == 0.0 ) return(0); if ( da_r == 0.0 && da_i == 0.0 ) return(0);
@ -48,8 +50,8 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i,
ix = 0; ix = 0;
iy = 0; iy = 0;
BLASLONG inc_x2 = 2 * inc_x; inc_x2 = 2 * inc_x;
BLASLONG inc_y2 = 2 * inc_y; inc_y2 = 2 * inc_y;
while(i < n) while(i < n)
{ {

View File

@ -40,11 +40,13 @@ int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
{ {
BLASLONG i=0; BLASLONG i=0;
BLASLONG ix=0,iy=0; BLASLONG ix=0,iy=0;
BLASLONG inc_x2;
BLASLONG inc_y2;
if ( n < 0 ) return(0); if ( n < 0 ) return(0);
BLASLONG inc_x2 = 2 * inc_x; inc_x2 = 2 * inc_x;
BLASLONG inc_y2 = 2 * inc_y; inc_y2 = 2 * inc_y;
while(i < n) while(i < n)
{ {

View File

@ -40,24 +40,26 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <complex.h> #include <complex.h>
FLOAT _Complex CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) FLOAT _Complex CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
#else #else
openblas_complex_double CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) OPENBLAS_COMPLEX_FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
#endif #endif
{ {
BLASLONG i=0; BLASLONG i=0;
BLASLONG ix=0,iy=0; BLASLONG ix=0,iy=0;
FLOAT dot[2]; FLOAT dot[2];
FLOAT _Complex result; OPENBLAS_COMPLEX_FLOAT result;
BLASLONG inc_x2;
BLASLONG inc_y2;
dot[0]=0.0; dot[0]=0.0;
dot[1]=0.0; dot[1]=0.0;
__real__ result = 0.0 ; CREAL(result) = 0.0 ;
__imag__ result = 0.0 ; CIMAG(result) = 0.0 ;
if ( n < 1 ) return(result); if ( n < 1 ) return(result);
BLASLONG inc_x2 = 2 * inc_x ; inc_x2 = 2 * inc_x ;
BLASLONG inc_y2 = 2 * inc_y ; inc_y2 = 2 * inc_y ;
while(i < n) while(i < n)
{ {
@ -73,8 +75,8 @@ openblas_complex_double CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BL
i++ ; i++ ;
} }
__real__ result = dot[0]; CREAL(result) = dot[0];
__imag__ result = dot[1]; CIMAG(result) = dot[1];
return(result); return(result);
} }

View File

@ -41,11 +41,13 @@ int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT
BLASLONG i=0; BLASLONG i=0;
BLASLONG ix=0,iy=0; BLASLONG ix=0,iy=0;
FLOAT temp[2]; FLOAT temp[2];
BLASLONG inc_x2;
BLASLONG inc_y2;
if ( n <= 0 ) return(0); if ( n <= 0 ) return(0);
BLASLONG inc_x2 = 2 * inc_x ; inc_x2 = 2 * inc_x ;
BLASLONG inc_y2 = 2 * inc_y ; inc_y2 = 2 * inc_y ;
while(i < n) while(i < n)
{ {

View File

@ -42,11 +42,13 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3, FLOAT dumm
BLASLONG i=0; BLASLONG i=0;
BLASLONG ix=0,iy=0; BLASLONG ix=0,iy=0;
FLOAT temp[2]; FLOAT temp[2];
BLASLONG inc_x2;
BLASLONG inc_y2;
if ( n < 0 ) return(0); if ( n < 0 ) return(0);
BLASLONG inc_x2 = 2 * inc_x; inc_x2 = 2 * inc_x;
BLASLONG inc_y2 = 2 * inc_y; inc_y2 = 2 * inc_y;
while(i < n) while(i < n)
{ {

View File

@ -155,5 +155,11 @@ XSYMV_L_KERNEL = ../generic/zsymv_k.c
ZHEMV_U_KERNEL = ../generic/zhemv_k.c ZHEMV_U_KERNEL = ../generic/zhemv_k.c
ZHEMV_L_KERNEL = ../generic/zhemv_k.c ZHEMV_L_KERNEL = ../generic/zhemv_k.c
LSAME_KERNEL = ../generic/lsame.c
SCABS_KERNEL = ../generic/cabs.c
DCABS_KERNEL = ../generic/cabs.c
QCABS_KERNEL = ../generic/cabs.c
#Dump kernel
CGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c CGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c
ZGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c ZGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c

View File

@ -59,7 +59,8 @@ typedef int blasint;
extension since version 3.0. If neither are available, use a compatible extension since version 3.0. If neither are available, use a compatible
structure as fallback (see Clause 6.2.5.13 of the C99 standard). */ structure as fallback (see Clause 6.2.5.13 of the C99 standard). */
#if (defined(__STDC_IEC_559_COMPLEX__) || __STDC_VERSION__ >= 199901L || \ #if (defined(__STDC_IEC_559_COMPLEX__) || __STDC_VERSION__ >= 199901L || \
(__GNUC__ >= 3 && !defined(__cplusplus))) (__GNUC__ >= 3 && !defined(__cplusplus)) || \
_MSC_VER >= 1800) // Visual Studio 2013 supports complex
#define OPENBLAS_COMPLEX_C99 #define OPENBLAS_COMPLEX_C99
#ifndef __cplusplus #ifndef __cplusplus
#include <complex.h> #include <complex.h>