Merge branch 'develop' into develop

This commit is contained in:
Martin Kroeker 2020-10-11 23:34:14 +02:00 committed by GitHub
commit 1c0b03efb4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 129 additions and 105 deletions

View File

@ -29,8 +29,10 @@ option(NO_AFFINITY "Disable support for CPU affinity masks to avoid binding proc
else() else()
set(NO_AFFINITY 1) set(NO_AFFINITY 1)
endif() endif()
option(CPP_THREAD_SAFETY_TEST "Run a massively parallel DGEMM test to confirm thread safety of the library (requires OpenMP and about 1.3GB of RAM)" OFF) option(BUILD_SINGLE "Single precision" OFF)
option(CPP_THREAD_SAFETY_GEMV "Run a massively parallel DGEMV test to confirm thread safety of the library (requires OpenMP)" OFF) option(BUILD_DOUBLE "Double precision" OFF)
option(BUILD_COMPLEX "Single precision" OFF)
option(BUILD_COMPLEX16 "Single precision" OFF)
# Add a prefix or suffix to all exported symbol names in the shared library. # Add a prefix or suffix to all exported symbol names in the shared library.
# Avoids conflicts with other BLAS libraries, especially when using # Avoids conflicts with other BLAS libraries, especially when using
@ -108,28 +110,33 @@ endif()
set(FLOAT_TYPES "") set(FLOAT_TYPES "")
if (BUILD_SINGLE) if (BUILD_SINGLE)
message(STATUS "Building Single Precision") message(STATUS "Building Songle Precision")
list(APPEND FLOAT_TYPES "SINGLE") # defines nothing list(APPEND FLOAT_TYPES "SINGLE")
# set(CCOMMON_OPT "${CCOMMON_OPT} -DBUILD_SINGLE=1")
endif () endif ()
if (BUILD_DOUBLE) if (BUILD_DOUBLE)
message(STATUS "Building Double Precision") message(STATUS "Building Double Precision")
list(APPEND FLOAT_TYPES "DOUBLE") # defines DOUBLE list(APPEND FLOAT_TYPES "DOUBLE")
#set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DBUILD_DOUBLE=1")
endif () endif ()
if (BUILD_COMPLEX) if (BUILD_COMPLEX)
message(STATUS "Building Complex Precision") message(STATUS "Building Complex Precision")
list(APPEND FLOAT_TYPES "COMPLEX") # defines COMPLEX list(APPEND FLOAT_TYPES "COMPLEX")
#set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DBUILD_COMPLEX=1")
endif () endif ()
if (BUILD_COMPLEX16) if (BUILD_COMPLEX16)
message(STATUS "Building Double Complex Precision") message(STATUS "Building Double Complex Precision")
list(APPEND FLOAT_TYPES "ZCOMPLEX") # defines COMPLEX and DOUBLE list(APPEND FLOAT_TYPES "ZCOMPLEX")
#set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DBUILD_COMPLEX16=1")
endif () endif ()
if (BUILD_HALF) if (BUILD_HALF)
message(STATUS "Building Half Precision") message(STATUS "Building Half Precision")
list(APPEND FLOAT_TYPES "HALF") # defines nothing list(APPEND FLOAT_TYPES "HALF")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DBUILD_HALF")
endif () endif ()
if (NOT DEFINED CORE OR "${CORE}" STREQUAL "UNKNOWN") if (NOT DEFINED CORE OR "${CORE}" STREQUAL "UNKNOWN")
@ -236,9 +243,6 @@ if (NOT MSVC AND NOT NOFORTRAN)
add_subdirectory(ctest) add_subdirectory(ctest)
endif() endif()
add_subdirectory(lapack-netlib/TESTING) add_subdirectory(lapack-netlib/TESTING)
if (CPP_THREAD_SAFETY_TEST OR CPP_THREAD_SAFETY_GEMV)
add_subdirectory(cpp_thread_test)
endif()
endif() endif()
set_target_properties(${OpenBLAS_LIBNAME} PROPERTIES set_target_properties(${OpenBLAS_LIBNAME} PROPERTIES

View File

@ -146,9 +146,6 @@ ifneq ($(NO_CBLAS), 1)
ifeq ($(CPP_THREAD_SAFETY_TEST), 1) ifeq ($(CPP_THREAD_SAFETY_TEST), 1)
$(MAKE) -C cpp_thread_test all $(MAKE) -C cpp_thread_test all
endif endif
ifeq ($(CPP_THREAD_SAFETY_GEMV), 1)
$(MAKE) -C cpp_thread_test dgemv_tester
endif
endif endif
endif endif

View File

@ -272,33 +272,17 @@ COMMON_PROF = -pg
# work at all. # work at all.
# #
# CPP_THREAD_SAFETY_TEST = 1 # CPP_THREAD_SAFETY_TEST = 1
#
# use this to run only the less memory-hungry GEMV test
# CPP_THREAD_SAFETY_GEMV = 1
# If you want to enable the experimental BFLOAT16 support # If you want to enable the experimental BFLOAT16 support
# BUILD_HALF = 1 # BUILD_HALF = 1
#
# Select if you need to build only select types
# Set the thread number threshold beyond which the job array for the threaded level3 BLAS # BUILD_SINGLE = 1
# will be allocated on the heap rather than the stack. (This array alone requires # BUILD_DOUBLE = 1
# NUM_THREADS*NUM_THREADS*128 bytes of memory so should not pose a problem at low cpu # BUILD_COMPLEX = 1
# counts, but obviously it is not the only item that ends up on the stack. # BUILD_COMPLEX16 = 1
# The default value of 32 ensures that the overall requirement is compatible #
# with the default 1MB stacksize imposed by having the Java VM loaded without use #
# of its -Xss parameter.
# The value of 160 formerly used from about version 0.2.7 until 0.3.10 is easily compatible
# with the common Linux stacksize of 8MB but will cause crashes with unwary use of the java
# VM e.g. in Octave or with the java-based libhdfs in numpy or scipy code
# BLAS3_MEM_ALLOC_THRESHOLD = 160
# the below is not yet configurable, use cmake if you need to build only select types
BUILD_SINGLE = 1
BUILD_DOUBLE = 1
BUILD_COMPLEX = 1
BUILD_COMPLEX16 = 1
# End of user configuration # End of user configuration
# #

View File

@ -70,9 +70,6 @@ if (DEFINED TARGET)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx2") set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx2")
endif() endif()
endif() endif()
if (DEFINED HAVE_SSE3)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse3")
endif()
endif() endif()
if (DEFINED TARGET) if (DEFINED TARGET)
@ -326,13 +323,7 @@ else ()
set(CCOMMON_OPT "${CCOMMON_OPT} -DMAX_STACK_ALLOC=2048") set(CCOMMON_OPT "${CCOMMON_OPT} -DMAX_STACK_ALLOC=2048")
endif () endif ()
endif () endif ()
if (NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Windows")
if (DEFINED BLAS3_MEM_ALLOC_THRESHOLD)
if (NOT ${BLAS3_MEM_ALLOC_THRESHOLD} EQUAL 32)
set(CCOMMON_OPT "${CCOMMON_OPT} -DBLAS3_MEM_ALLOC_THRESHOLD=${BLAS3_MEM_ALLOC_THRESHOLD}")
endif()
endif()
endif()
if (DEFINED LIBNAMESUFFIX) if (DEFINED LIBNAMESUFFIX)
set(LIBPREFIX "libopenblas_${LIBNAMESUFFIX}") set(LIBPREFIX "libopenblas_${LIBNAMESUFFIX}")
else () else ()
@ -410,16 +401,20 @@ if (NOT BUILD_SINGLE AND NOT BUILD_DOUBLE AND NOT BUILD_COMPLEX AND NOT BUILD_CO
set (BUILD_COMPLEX16 ON) set (BUILD_COMPLEX16 ON)
endif() endif()
if (BUILD_SINGLE) if (BUILD_SINGLE)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DBUILD_SINGLE") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DBUILD_SINGLE=1")
set(CCOMMON_OPT "${CCOMMON_OPT} -DBUILD_SINGLE=1")
endif() endif()
if (BUILD_DOUBLE) if (BUILD_DOUBLE)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DBUILD_DOUBLE") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DBUILD_DOUBLE=1")
set(CCOMMON_OPT "${CCOMMON_OPT} -DBUILD_SINGLE=1")
endif() endif()
if (BUILD_COMPLEX) if (BUILD_COMPLEX)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DBUILD_COMPLEX") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DBUILD_COMPLEX=1")
set(CCOMMON_OPT "${CCOMMON_OPT} -DBUILD_COMPLEX=1")
endif() endif()
if (BUILD_COMPLEX16) if (BUILD_COMPLEX16)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DBUILD_COMPLEX16") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DBUILD_COMPLEX16=1")
set(CCOMMON_OPT "${CCOMMON_OPT} -DBUILD_COMPLEX16=1")
endif() endif()
if(NOT MSVC) if(NOT MSVC)
set(CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} ${CCOMMON_OPT}") set(CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} ${CCOMMON_OPT}")

View File

@ -121,6 +121,6 @@ endif()
include(CheckIncludeFile) include(CheckIncludeFile)
CHECK_INCLUDE_FILE("stdatomic.h" HAVE_C11) CHECK_INCLUDE_FILE("stdatomic.h" HAVE_C11)
if (HAVE_C11) if (HAVE_C11 EQUAL 1)
set (CCOMMON_OPT "${CCOMMON_OPT} -DHAVE_C11") set (CCOMMON_OPT "${CCOMMON_OPT} -DHAVE_C11")
endif() endif()

View File

@ -147,14 +147,14 @@ BLASLONG (*ishmin_k) (BLASLONG, float *, BLASLONG);
#endif #endif
#if defined(BUILD_SINGLE) || defined(BUILD_COMPLEX) #if (BUILD_SINGLE) || (BUILD_DOUBLE) || (BUILD_COMPLEX) || (BUILD_COMPLEX16)
int sgemm_p, sgemm_q, sgemm_r; int sgemm_p, sgemm_q, sgemm_r;
int sgemm_unroll_m, sgemm_unroll_n, sgemm_unroll_mn; int sgemm_unroll_m, sgemm_unroll_n, sgemm_unroll_mn;
#endif #endif
int exclusive_cache; int exclusive_cache;
#if defined(BUILD_SINGLE) || defined(BUILD_COMPLEX) #if (BUILD_SINGLE) || (BUILD_DOUBLE) || (BUILD_COMPLEX)
float (*samax_k) (BLASLONG, float *, BLASLONG); float (*samax_k) (BLASLONG, float *, BLASLONG);
float (*samin_k) (BLASLONG, float *, BLASLONG); float (*samin_k) (BLASLONG, float *, BLASLONG);
float (*smax_k) (BLASLONG, float *, BLASLONG); float (*smax_k) (BLASLONG, float *, BLASLONG);
@ -167,10 +167,11 @@ BLASLONG (*ismin_k) (BLASLONG, float *, BLASLONG);
float (*snrm2_k) (BLASLONG, float *, BLASLONG); float (*snrm2_k) (BLASLONG, float *, BLASLONG);
float (*sasum_k) (BLASLONG, float *, BLASLONG); float (*sasum_k) (BLASLONG, float *, BLASLONG);
#endif #endif
#ifdef BUILD_SINGLE
#if BUILD_SINGLE
float (*ssum_k) (BLASLONG, float *, BLASLONG); float (*ssum_k) (BLASLONG, float *, BLASLONG);
#endif #endif
#if defined(BUILD_SINGLE) || defined(BUILD_COMPLEX) #if (BUILD_SINGLE) || (BUILD_DOUBLE) || (BUILD_COMPLEX)
int (*scopy_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG); int (*scopy_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG);
float (*sdot_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG); float (*sdot_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG);
//double (*dsdot_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG); //double (*dsdot_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG);
@ -178,20 +179,26 @@ BLASLONG (*ismin_k) (BLASLONG, float *, BLASLONG);
int (*srot_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG, float, float); int (*srot_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG, float, float);
int (*saxpy_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG); int (*saxpy_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
#endif
#if (BUILD_SINGLE) || (BUILD_DOUBLE) || (BUILD_COMPLEX) || (BUILD_COMPLEX16)
int (*sscal_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG); int (*sscal_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
#endif
#if (BUILD_SINGLE) || (BUILD_DOUBLE) || (BUILD_COMPLEX)
int (*sswap_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG); int (*sswap_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
int (*sgemv_n) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *); int (*sgemv_n) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
int (*sgemv_t) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *); int (*sgemv_t) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
#endif #endif
#ifdef BUILD_SINGLE
#if BUILD_SINGLE
int (*sger_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *); int (*sger_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
int (*ssymv_L) (BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *); int (*ssymv_L) (BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
int (*ssymv_U) (BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *); int (*ssymv_U) (BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
#endif #endif
#if defined(BUILD_SINGLE) || defined(BUILD_COMPLEX)
#if (BUILD_SINGLE) || (BUILD_DOUBLE) || (BUILD_COMPLEX)
#ifdef ARCH_X86_64 #ifdef ARCH_X86_64
void (*sgemm_direct) (BLASLONG, BLASLONG, BLASLONG, float *, BLASLONG , float *, BLASLONG , float * , BLASLONG); void (*sgemm_direct) (BLASLONG, BLASLONG, BLASLONG, float *, BLASLONG , float *, BLASLONG , float * , BLASLONG);
int (*sgemm_direct_performant) (BLASLONG M, BLASLONG N, BLASLONG K); int (*sgemm_direct_performant) (BLASLONG M, BLASLONG N, BLASLONG K);
@ -206,7 +213,8 @@ BLASLONG (*ismin_k) (BLASLONG, float *, BLASLONG);
int (*sgemm_oncopy )(BLASLONG, BLASLONG, float *, BLASLONG, float *); int (*sgemm_oncopy )(BLASLONG, BLASLONG, float *, BLASLONG, float *);
int (*sgemm_otcopy )(BLASLONG, BLASLONG, float *, BLASLONG, float *); int (*sgemm_otcopy )(BLASLONG, BLASLONG, float *, BLASLONG, float *);
#endif #endif
#ifdef BUILD_SINGLE
#if (BUILD_SINGLE) || (BUILD_DOUBLE)
int (*strsm_kernel_LN)(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG, BLASLONG); int (*strsm_kernel_LN)(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG, BLASLONG);
int (*strsm_kernel_LT)(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG, BLASLONG); int (*strsm_kernel_LT)(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG, BLASLONG);
int (*strsm_kernel_RN)(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG, BLASLONG); int (*strsm_kernel_RN)(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG, BLASLONG);
@ -228,7 +236,8 @@ BLASLONG (*ismin_k) (BLASLONG, float *, BLASLONG);
int (*strsm_olnncopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *); int (*strsm_olnncopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *);
int (*strsm_oltucopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *); int (*strsm_oltucopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *);
int (*strsm_oltncopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *); int (*strsm_oltncopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *);
#endif
#if BUILD_SINGLE
int (*strmm_kernel_RN)(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG, BLASLONG); int (*strmm_kernel_RN)(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG, BLASLONG);
int (*strmm_kernel_RT)(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG, BLASLONG); int (*strmm_kernel_RT)(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG, BLASLONG);
int (*strmm_kernel_LN)(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG, BLASLONG); int (*strmm_kernel_LN)(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG, BLASLONG);
@ -255,17 +264,18 @@ BLASLONG (*ismin_k) (BLASLONG, float *, BLASLONG);
int (*ssymm_iltcopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *); int (*ssymm_iltcopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
int (*ssymm_outcopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *); int (*ssymm_outcopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
int (*ssymm_oltcopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *); int (*ssymm_oltcopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
#endif
#if (BUILD_SINGLE) || (BUILD_DOUBLE)
int (*sneg_tcopy) (BLASLONG, BLASLONG, float *, BLASLONG, float *); int (*sneg_tcopy) (BLASLONG, BLASLONG, float *, BLASLONG, float *);
int (*slaswp_ncopy) (BLASLONG, BLASLONG, BLASLONG, float *, BLASLONG, blasint *, float *); int (*slaswp_ncopy) (BLASLONG, BLASLONG, BLASLONG, float *, BLASLONG, blasint *, float *);
#endif #endif
#if defined(BUILD_DOUBLE) || defined(BUILD_COMPLEX16) #if (BUILD_DOUBLE) || (BUILD_COMPLEX16)
int dgemm_p, dgemm_q, dgemm_r; int dgemm_p, dgemm_q, dgemm_r;
int dgemm_unroll_m, dgemm_unroll_n, dgemm_unroll_mn; int dgemm_unroll_m, dgemm_unroll_n, dgemm_unroll_mn;
#endif #endif
#if defined(BUILD_DOUBLE) || defined(BUILD_COMPLEX16) #if (BUILD_DOUBLE) || (BUILD_COMPLEX16)
double (*damax_k) (BLASLONG, double *, BLASLONG); double (*damax_k) (BLASLONG, double *, BLASLONG);
double (*damin_k) (BLASLONG, double *, BLASLONG); double (*damin_k) (BLASLONG, double *, BLASLONG);
double (*dmax_k) (BLASLONG, double *, BLASLONG); double (*dmax_k) (BLASLONG, double *, BLASLONG);
@ -276,21 +286,21 @@ BLASLONG (*idmax_k) (BLASLONG, double *, BLASLONG);
BLASLONG (*idmin_k) (BLASLONG, double *, BLASLONG); BLASLONG (*idmin_k) (BLASLONG, double *, BLASLONG);
#endif #endif
#if defined(BUILD_DOUBLE) || defined(BUILD_COMPLEX16) #if (BUILD_DOUBLE) || (BUILD_COMPLEX16)
double (*dnrm2_k) (BLASLONG, double *, BLASLONG); double (*dnrm2_k) (BLASLONG, double *, BLASLONG);
double (*dasum_k) (BLASLONG, double *, BLASLONG); double (*dasum_k) (BLASLONG, double *, BLASLONG);
#endif #endif
#ifdef BUILD_DOUBLE #if BUILD_DOUBLE
double (*dsum_k) (BLASLONG, double *, BLASLONG); double (*dsum_k) (BLASLONG, double *, BLASLONG);
#endif #endif
#if defined(BUILD_DOUBLE) || defined(BUILD_COMPLEX16) #if (BUILD_DOUBLE) || (BUILD_COMPLEX16)
int (*dcopy_k) (BLASLONG, double *, BLASLONG, double *, BLASLONG); int (*dcopy_k) (BLASLONG, double *, BLASLONG, double *, BLASLONG);
double (*ddot_k) (BLASLONG, double *, BLASLONG, double *, BLASLONG); double (*ddot_k) (BLASLONG, double *, BLASLONG, double *, BLASLONG);
#endif #endif
#if defined (BUILD_SINGLE) || defined(BUILD_DOUBLE) #if (BUILD_SINGLE) || (BUILD_DOUBLE)
double (*dsdot_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG); double (*dsdot_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG);
#endif #endif
#if defined(BUILD_DOUBLE) || defined(BUILD_COMPLEX16) #if (BUILD_DOUBLE) || (BUILD_COMPLEX16)
int (*drot_k) (BLASLONG, double *, BLASLONG, double *, BLASLONG, double, double); int (*drot_k) (BLASLONG, double *, BLASLONG, double *, BLASLONG, double, double);
int (*daxpy_k) (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG); int (*daxpy_k) (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG);
int (*dscal_k) (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG); int (*dscal_k) (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG);
@ -298,13 +308,15 @@ BLASLONG (*idmin_k) (BLASLONG, double *, BLASLONG);
int (*dgemv_n) (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *); int (*dgemv_n) (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *);
int (*dgemv_t) (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *); int (*dgemv_t) (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *);
#endif #endif
#ifdef BUILD_DOUBLE
#if BUILD_DOUBLE
int (*dger_k) (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *); int (*dger_k) (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *);
int (*dsymv_L) (BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *); int (*dsymv_L) (BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *);
int (*dsymv_U) (BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *); int (*dsymv_U) (BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *);
#endif #endif
#if defined(BUILD_DOUBLE) || defined(BUILD_COMPLEX16)
#if (BUILD_DOUBLE) || (BUILD_COMPLEX16)
int (*dgemm_kernel )(BLASLONG, BLASLONG, BLASLONG, double, double *, double *, double *, BLASLONG); int (*dgemm_kernel )(BLASLONG, BLASLONG, BLASLONG, double, double *, double *, double *, BLASLONG);
int (*dgemm_beta )(BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG); int (*dgemm_beta )(BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG);
@ -313,7 +325,8 @@ BLASLONG (*idmin_k) (BLASLONG, double *, BLASLONG);
int (*dgemm_oncopy )(BLASLONG, BLASLONG, double *, BLASLONG, double *); int (*dgemm_oncopy )(BLASLONG, BLASLONG, double *, BLASLONG, double *);
int (*dgemm_otcopy )(BLASLONG, BLASLONG, double *, BLASLONG, double *); int (*dgemm_otcopy )(BLASLONG, BLASLONG, double *, BLASLONG, double *);
#endif #endif
#ifdef BUILD_DOUBLE
#if BUILD_DOUBLE
int (*dtrsm_kernel_LN)(BLASLONG, BLASLONG, BLASLONG, double, double *, double *, double *, BLASLONG, BLASLONG); int (*dtrsm_kernel_LN)(BLASLONG, BLASLONG, BLASLONG, double, double *, double *, double *, BLASLONG, BLASLONG);
int (*dtrsm_kernel_LT)(BLASLONG, BLASLONG, BLASLONG, double, double *, double *, double *, BLASLONG, BLASLONG); int (*dtrsm_kernel_LT)(BLASLONG, BLASLONG, BLASLONG, double, double *, double *, double *, BLASLONG, BLASLONG);
int (*dtrsm_kernel_RN)(BLASLONG, BLASLONG, BLASLONG, double, double *, double *, double *, BLASLONG, BLASLONG); int (*dtrsm_kernel_RN)(BLASLONG, BLASLONG, BLASLONG, double, double *, double *, double *, BLASLONG, BLASLONG);
@ -460,23 +473,30 @@ BLASLONG (*iqmin_k) (BLASLONG, xdouble *, BLASLONG);
#endif #endif
#ifdef BUILD_COMPLEX
#if (BUILD_COMPLEX) || (BUILD_COMPLEX16)
int cgemm_p, cgemm_q, cgemm_r; int cgemm_p, cgemm_q, cgemm_r;
int cgemm_unroll_m, cgemm_unroll_n, cgemm_unroll_mn; int cgemm_unroll_m, cgemm_unroll_n, cgemm_unroll_mn;
float (*camax_k) (BLASLONG, float *, BLASLONG); float (*camax_k) (BLASLONG, float *, BLASLONG);
float (*camin_k) (BLASLONG, float *, BLASLONG); float (*camin_k) (BLASLONG, float *, BLASLONG);
BLASLONG (*icamax_k)(BLASLONG, float *, BLASLONG); BLASLONG (*icamax_k)(BLASLONG, float *, BLASLONG);
BLASLONG (*icamin_k)(BLASLONG, float *, BLASLONG); BLASLONG (*icamin_k)(BLASLONG, float *, BLASLONG);
#endif
#if BUILD_COMPLEX
float (*cnrm2_k) (BLASLONG, float *, BLASLONG); float (*cnrm2_k) (BLASLONG, float *, BLASLONG);
float (*casum_k) (BLASLONG, float *, BLASLONG); float (*casum_k) (BLASLONG, float *, BLASLONG);
float (*csum_k) (BLASLONG, float *, BLASLONG); float (*csum_k) (BLASLONG, float *, BLASLONG);
#endif
#if (BUILD_COMPLEX)|| (BUILD_COMPLEX16)
int (*ccopy_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG); int (*ccopy_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG);
openblas_complex_float (*cdotu_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG); openblas_complex_float (*cdotu_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG);
openblas_complex_float (*cdotc_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG); openblas_complex_float (*cdotc_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG);
#endif
#if BUILD_COMPLEX
int (*csrot_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG, float, float); int (*csrot_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG, float, float);
#endif
#if (BUILD_COMPLEX)|| (BUILD_COMPLEX16)
int (*caxpy_k) (BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG); int (*caxpy_k) (BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
int (*caxpyc_k)(BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG); int (*caxpyc_k)(BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
int (*cscal_k) (BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG); int (*cscal_k) (BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
@ -490,6 +510,8 @@ BLASLONG (*icamin_k)(BLASLONG, float *, BLASLONG);
int (*cgemv_u) (BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *); int (*cgemv_u) (BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
int (*cgemv_s) (BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *); int (*cgemv_s) (BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
int (*cgemv_d) (BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *); int (*cgemv_d) (BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
#endif
#if (BUILD_COMPLEX)
int (*cgeru_k) (BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *); int (*cgeru_k) (BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
int (*cgerc_k) (BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *); int (*cgerc_k) (BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
int (*cgerv_k) (BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *); int (*cgerv_k) (BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
@ -501,13 +523,14 @@ BLASLONG (*icamin_k)(BLASLONG, float *, BLASLONG);
int (*chemv_U) (BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *); int (*chemv_U) (BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
int (*chemv_M) (BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *); int (*chemv_M) (BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
int (*chemv_V) (BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *); int (*chemv_V) (BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
#endif
#if (BUILD_COMPLEX) || (BUILD_COMPLEX16)
int (*cgemm_kernel_n )(BLASLONG, BLASLONG, BLASLONG, float, float, float *, float *, float *, BLASLONG); int (*cgemm_kernel_n )(BLASLONG, BLASLONG, BLASLONG, float, float, float *, float *, float *, BLASLONG);
int (*cgemm_kernel_l )(BLASLONG, BLASLONG, BLASLONG, float, float, float *, float *, float *, BLASLONG); int (*cgemm_kernel_l )(BLASLONG, BLASLONG, BLASLONG, float, float, float *, float *, float *, BLASLONG);
int (*cgemm_kernel_r )(BLASLONG, BLASLONG, BLASLONG, float, float, float *, float *, float *, BLASLONG); int (*cgemm_kernel_r )(BLASLONG, BLASLONG, BLASLONG, float, float, float *, float *, float *, BLASLONG);
int (*cgemm_kernel_b )(BLASLONG, BLASLONG, BLASLONG, float, float, float *, float *, float *, BLASLONG); int (*cgemm_kernel_b )(BLASLONG, BLASLONG, BLASLONG, float, float, float *, float *, float *, BLASLONG);
int (*cgemm_beta )(BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG); int (*cgemm_beta )(BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
int (*cgemm_incopy )(BLASLONG, BLASLONG, float *, BLASLONG, float *); int (*cgemm_incopy )(BLASLONG, BLASLONG, float *, BLASLONG, float *);
int (*cgemm_itcopy )(BLASLONG, BLASLONG, float *, BLASLONG, float *); int (*cgemm_itcopy )(BLASLONG, BLASLONG, float *, BLASLONG, float *);
int (*cgemm_oncopy )(BLASLONG, BLASLONG, float *, BLASLONG, float *); int (*cgemm_oncopy )(BLASLONG, BLASLONG, float *, BLASLONG, float *);
@ -538,6 +561,8 @@ BLASLONG (*icamin_k)(BLASLONG, float *, BLASLONG);
int (*ctrsm_olnncopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *); int (*ctrsm_olnncopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *);
int (*ctrsm_oltucopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *); int (*ctrsm_oltucopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *);
int (*ctrsm_oltncopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *); int (*ctrsm_oltncopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *);
#endif
#if (BUILD_COMPLEX)
int (*ctrmm_kernel_RN)(BLASLONG, BLASLONG, BLASLONG, float, float, float *, float *, float *, BLASLONG, BLASLONG); int (*ctrmm_kernel_RN)(BLASLONG, BLASLONG, BLASLONG, float, float, float *, float *, float *, BLASLONG, BLASLONG);
int (*ctrmm_kernel_RT)(BLASLONG, BLASLONG, BLASLONG, float, float, float *, float *, float *, BLASLONG, BLASLONG); int (*ctrmm_kernel_RT)(BLASLONG, BLASLONG, BLASLONG, float, float, float *, float *, float *, BLASLONG, BLASLONG);
@ -621,12 +646,14 @@ BLASLONG (*icamin_k)(BLASLONG, float *, BLASLONG);
int (*chemm3m_olcopyr)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float, float, float *); int (*chemm3m_olcopyr)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float, float, float *);
int (*chemm3m_oucopyi)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float, float, float *); int (*chemm3m_oucopyi)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float, float, float *);
int (*chemm3m_olcopyi)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float, float, float *); int (*chemm3m_olcopyi)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float, float, float *);
#endif
#if (BUILD_COMPLEX) || (BUILD_COMPLEX16)
int (*cneg_tcopy) (BLASLONG, BLASLONG, float *, BLASLONG, float *); int (*cneg_tcopy) (BLASLONG, BLASLONG, float *, BLASLONG, float *);
int (*claswp_ncopy) (BLASLONG, BLASLONG, BLASLONG, float *, BLASLONG, blasint *, float *); int (*claswp_ncopy) (BLASLONG, BLASLONG, BLASLONG, float *, BLASLONG, blasint *, float *);
#endif #endif
#ifdef BUILD_COMPLEX16
#if BUILD_COMPLEX16
int zgemm_p, zgemm_q, zgemm_r; int zgemm_p, zgemm_q, zgemm_r;
int zgemm_unroll_m, zgemm_unroll_n, zgemm_unroll_mn; int zgemm_unroll_m, zgemm_unroll_n, zgemm_unroll_mn;
@ -964,34 +991,35 @@ BLASLONG (*ixamin_k)(BLASLONG, xdouble *, BLASLONG);
void (*init)(void); void (*init)(void);
int snum_opt, dnum_opt, qnum_opt; int snum_opt, dnum_opt, qnum_opt;
#ifdef BUILD_SINGLE
#if BUILD_SINGLE
int (*saxpby_k) (BLASLONG, float, float*, BLASLONG,float, float*, BLASLONG); int (*saxpby_k) (BLASLONG, float, float*, BLASLONG,float, float*, BLASLONG);
#endif #endif
#ifdef BUILD_DOUBLE #if BUILD_DOUBLE
int (*daxpby_k) (BLASLONG, double, double*, BLASLONG,double, double*, BLASLONG); int (*daxpby_k) (BLASLONG, double, double*, BLASLONG,double, double*, BLASLONG);
#endif #endif
#ifdef BUILD_COMPLEX #if BUILD_COMPLEX
int (*caxpby_k) (BLASLONG, float, float, float*, BLASLONG,float,float, float*, BLASLONG); int (*caxpby_k) (BLASLONG, float, float, float*, BLASLONG,float,float, float*, BLASLONG);
#endif #endif
#ifdef BUILD_COMPLEX16 #if BUILD_COMPLEX16
int (*zaxpby_k) (BLASLONG, double, double, double*, BLASLONG,double,double, double*, BLASLONG); int (*zaxpby_k) (BLASLONG, double, double, double*, BLASLONG,double,double, double*, BLASLONG);
#endif #endif
#ifdef BUILD_SINGLE #if BUILD_SINGLE
int (*somatcopy_k_cn) (BLASLONG, BLASLONG, float, float*, BLASLONG, float*, BLASLONG); int (*somatcopy_k_cn) (BLASLONG, BLASLONG, float, float*, BLASLONG, float*, BLASLONG);
int (*somatcopy_k_ct) (BLASLONG, BLASLONG, float, float*, BLASLONG, float*, BLASLONG); int (*somatcopy_k_ct) (BLASLONG, BLASLONG, float, float*, BLASLONG, float*, BLASLONG);
int (*somatcopy_k_rn) (BLASLONG, BLASLONG, float, float*, BLASLONG, float*, BLASLONG); int (*somatcopy_k_rn) (BLASLONG, BLASLONG, float, float*, BLASLONG, float*, BLASLONG);
int (*somatcopy_k_rt) (BLASLONG, BLASLONG, float, float*, BLASLONG, float*, BLASLONG); int (*somatcopy_k_rt) (BLASLONG, BLASLONG, float, float*, BLASLONG, float*, BLASLONG);
#endif #endif
#ifdef BUILD_DOUBLE #if BUILD_DOUBLE
int (*domatcopy_k_cn) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG); int (*domatcopy_k_cn) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG);
int (*domatcopy_k_ct) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG); int (*domatcopy_k_ct) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG);
int (*domatcopy_k_rn) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG); int (*domatcopy_k_rn) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG);
int (*domatcopy_k_rt) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG); int (*domatcopy_k_rt) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG);
#endif #endif
#ifdef BUILD_COMPLEX #if BUILD_COMPLEX
int (*comatcopy_k_cn) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG); int (*comatcopy_k_cn) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG);
int (*comatcopy_k_ct) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG); int (*comatcopy_k_ct) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG);
int (*comatcopy_k_rn) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG); int (*comatcopy_k_rn) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG);
@ -1003,7 +1031,7 @@ BLASLONG (*ixamin_k)(BLASLONG, xdouble *, BLASLONG);
int (*comatcopy_k_rtc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG); int (*comatcopy_k_rtc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG);
#endif #endif
#ifdef BUILD_COMPLEX16 #if BUILD_COMPLEX16
int (*zomatcopy_k_cn) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG); int (*zomatcopy_k_cn) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG);
int (*zomatcopy_k_ct) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG); int (*zomatcopy_k_ct) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG);
int (*zomatcopy_k_rn) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG); int (*zomatcopy_k_rn) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG);
@ -1015,21 +1043,21 @@ BLASLONG (*ixamin_k)(BLASLONG, xdouble *, BLASLONG);
int (*zomatcopy_k_rtc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG); int (*zomatcopy_k_rtc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG);
#endif #endif
#ifdef BUILD_SINGLE #if BUILD_SINGLE
int (*simatcopy_k_cn) (BLASLONG, BLASLONG, float, float*, BLASLONG); int (*simatcopy_k_cn) (BLASLONG, BLASLONG, float, float*, BLASLONG);
int (*simatcopy_k_ct) (BLASLONG, BLASLONG, float, float*, BLASLONG); int (*simatcopy_k_ct) (BLASLONG, BLASLONG, float, float*, BLASLONG);
int (*simatcopy_k_rn) (BLASLONG, BLASLONG, float, float*, BLASLONG); int (*simatcopy_k_rn) (BLASLONG, BLASLONG, float, float*, BLASLONG);
int (*simatcopy_k_rt) (BLASLONG, BLASLONG, float, float*, BLASLONG); int (*simatcopy_k_rt) (BLASLONG, BLASLONG, float, float*, BLASLONG);
#endif #endif
#ifdef BUILD_DOUBLE #if BUILD_DOUBLE
int (*dimatcopy_k_cn) (BLASLONG, BLASLONG, double, double*, BLASLONG); int (*dimatcopy_k_cn) (BLASLONG, BLASLONG, double, double*, BLASLONG);
int (*dimatcopy_k_ct) (BLASLONG, BLASLONG, double, double*, BLASLONG); int (*dimatcopy_k_ct) (BLASLONG, BLASLONG, double, double*, BLASLONG);
int (*dimatcopy_k_rn) (BLASLONG, BLASLONG, double, double*, BLASLONG); int (*dimatcopy_k_rn) (BLASLONG, BLASLONG, double, double*, BLASLONG);
int (*dimatcopy_k_rt) (BLASLONG, BLASLONG, double, double*, BLASLONG); int (*dimatcopy_k_rt) (BLASLONG, BLASLONG, double, double*, BLASLONG);
#endif #endif
#ifdef BUILD_COMPLEX #if BUILD_COMPLEX
int (*cimatcopy_k_cn) (BLASLONG, BLASLONG, float, float, float*, BLASLONG); int (*cimatcopy_k_cn) (BLASLONG, BLASLONG, float, float, float*, BLASLONG);
int (*cimatcopy_k_ct) (BLASLONG, BLASLONG, float, float, float*, BLASLONG); int (*cimatcopy_k_ct) (BLASLONG, BLASLONG, float, float, float*, BLASLONG);
int (*cimatcopy_k_rn) (BLASLONG, BLASLONG, float, float, float*, BLASLONG); int (*cimatcopy_k_rn) (BLASLONG, BLASLONG, float, float, float*, BLASLONG);
@ -1041,7 +1069,7 @@ BLASLONG (*ixamin_k)(BLASLONG, xdouble *, BLASLONG);
int (*cimatcopy_k_rtc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG); int (*cimatcopy_k_rtc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG);
#endif #endif
#ifdef BUILD_COMPLEX16 #if BUILD_COMPLEX16
int (*zimatcopy_k_cn) (BLASLONG, BLASLONG, double, double, double*, BLASLONG); int (*zimatcopy_k_cn) (BLASLONG, BLASLONG, double, double, double*, BLASLONG);
int (*zimatcopy_k_ct) (BLASLONG, BLASLONG, double, double, double*, BLASLONG); int (*zimatcopy_k_ct) (BLASLONG, BLASLONG, double, double, double*, BLASLONG);
int (*zimatcopy_k_rn) (BLASLONG, BLASLONG, double, double, double*, BLASLONG); int (*zimatcopy_k_rn) (BLASLONG, BLASLONG, double, double, double*, BLASLONG);
@ -1053,16 +1081,16 @@ BLASLONG (*ixamin_k)(BLASLONG, xdouble *, BLASLONG);
int (*zimatcopy_k_rtc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG); int (*zimatcopy_k_rtc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG);
#endif #endif
#ifdef BUILD_SINGLE #if BUILD_SINGLE
int (*sgeadd_k) (BLASLONG, BLASLONG, float, float *, BLASLONG, float, float *, BLASLONG); int (*sgeadd_k) (BLASLONG, BLASLONG, float, float *, BLASLONG, float, float *, BLASLONG);
#endif #endif
#ifdef BUILD_DOUBLE #if BUILD_DOUBLE
int (*dgeadd_k) (BLASLONG, BLASLONG, double, double *, BLASLONG, double, double *, BLASLONG); int (*dgeadd_k) (BLASLONG, BLASLONG, double, double *, BLASLONG, double, double *, BLASLONG);
#endif #endif
#ifdef BUILD_COMPLEX #if BUILD_COMPLEX
int (*cgeadd_k) (BLASLONG, BLASLONG, float, float, float *, BLASLONG, float, float, float *, BLASLONG); int (*cgeadd_k) (BLASLONG, BLASLONG, float, float, float *, BLASLONG, float, float, float *, BLASLONG);
#endif #endif
#ifdef BUILD_COMPLEX16 #if BUILD_COMPLEX16
int (*zgeadd_k) (BLASLONG, BLASLONG, double, double, double *, BLASLONG, double, double, double *, BLASLONG); int (*zgeadd_k) (BLASLONG, BLASLONG, double, double, double *, BLASLONG, double, double, double *, BLASLONG);
#endif #endif
} gotoblas_t; } gotoblas_t;
@ -1085,7 +1113,7 @@ extern gotoblas_t *gotoblas;
#define SHGEMM_UNROLL_MN gotoblas -> shgemm_unroll_mn #define SHGEMM_UNROLL_MN gotoblas -> shgemm_unroll_mn
#endif #endif
#if defined (BUILD_SINGLE) #if (BUILD_SINGLE)
#define SGEMM_P gotoblas -> sgemm_p #define SGEMM_P gotoblas -> sgemm_p
#define SGEMM_Q gotoblas -> sgemm_q #define SGEMM_Q gotoblas -> sgemm_q
#define SGEMM_R gotoblas -> sgemm_r #define SGEMM_R gotoblas -> sgemm_r
@ -1094,13 +1122,21 @@ extern gotoblas_t *gotoblas;
#define SGEMM_UNROLL_MN gotoblas -> sgemm_unroll_mn #define SGEMM_UNROLL_MN gotoblas -> sgemm_unroll_mn
#endif #endif
#if defined (BUILD_DOUBLE) #if (BUILD_DOUBLE)
#define DGEMM_P gotoblas -> dgemm_p #define DGEMM_P gotoblas -> dgemm_p
#define DGEMM_Q gotoblas -> dgemm_q #define DGEMM_Q gotoblas -> dgemm_q
#define DGEMM_R gotoblas -> dgemm_r #define DGEMM_R gotoblas -> dgemm_r
#define DGEMM_UNROLL_M gotoblas -> dgemm_unroll_m #define DGEMM_UNROLL_M gotoblas -> dgemm_unroll_m
#define DGEMM_UNROLL_N gotoblas -> dgemm_unroll_n #define DGEMM_UNROLL_N gotoblas -> dgemm_unroll_n
#define DGEMM_UNROLL_MN gotoblas -> dgemm_unroll_mn #define DGEMM_UNROLL_MN gotoblas -> dgemm_unroll_mn
#if ! (BUILD_SINGLE)
#define SGEMM_P gotoblas -> sgemm_p
#define SGEMM_Q gotoblas -> sgemm_q
#define SGEMM_R gotoblas -> sgemm_r
#define SGEMM_UNROLL_M gotoblas -> sgemm_unroll_m
#define SGEMM_UNROLL_N gotoblas -> sgemm_unroll_n
#define SGEMM_UNROLL_MN gotoblas -> sgemm_unroll_mn
#endif
#endif #endif
#define QGEMM_P gotoblas -> qgemm_p #define QGEMM_P gotoblas -> qgemm_p
@ -1110,7 +1146,7 @@ extern gotoblas_t *gotoblas;
#define QGEMM_UNROLL_N gotoblas -> qgemm_unroll_n #define QGEMM_UNROLL_N gotoblas -> qgemm_unroll_n
#define QGEMM_UNROLL_MN gotoblas -> qgemm_unroll_mn #define QGEMM_UNROLL_MN gotoblas -> qgemm_unroll_mn
#ifdef BUILD_COMPLEX #if BUILD_COMPLEX
#define CGEMM_P gotoblas -> cgemm_p #define CGEMM_P gotoblas -> cgemm_p
#define CGEMM_Q gotoblas -> cgemm_q #define CGEMM_Q gotoblas -> cgemm_q
#define CGEMM_R gotoblas -> cgemm_r #define CGEMM_R gotoblas -> cgemm_r
@ -1127,7 +1163,7 @@ extern gotoblas_t *gotoblas;
#endif #endif
#endif #endif
#ifdef BUILD_COMPLEX16 #if BUILD_COMPLEX16
#define ZGEMM_P gotoblas -> zgemm_p #define ZGEMM_P gotoblas -> zgemm_p
#define ZGEMM_Q gotoblas -> zgemm_q #define ZGEMM_Q gotoblas -> zgemm_q
#define ZGEMM_R gotoblas -> zgemm_r #define ZGEMM_R gotoblas -> zgemm_r
@ -1142,6 +1178,14 @@ extern gotoblas_t *gotoblas;
#define DGEMM_UNROLL_N gotoblas -> dgemm_unroll_n #define DGEMM_UNROLL_N gotoblas -> dgemm_unroll_n
#define DGEMM_UNROLL_MN gotoblas -> dgemm_unroll_mn #define DGEMM_UNROLL_MN gotoblas -> dgemm_unroll_mn
#endif #endif
#ifndef BUILD_COMPLEX
#define CGEMM_P gotoblas -> cgemm_p
#define CGEMM_Q gotoblas -> cgemm_q
#define CGEMM_R gotoblas -> cgemm_r
#define CGEMM_UNROLL_M gotoblas -> cgemm_unroll_m
#define CGEMM_UNROLL_N gotoblas -> cgemm_unroll_n
#define CGEMM_UNROLL_MN gotoblas -> cgemm_unroll_mn
#endif
#endif #endif
#define XGEMM_P gotoblas -> xgemm_p #define XGEMM_P gotoblas -> xgemm_p
@ -1310,7 +1354,7 @@ extern gotoblas_t *gotoblas;
#endif #endif
#ifndef COMPLEX #ifndef COMPLEX
#if defined(XDOUBLE) #if (XDOUBLE)
#define GEMM_P QGEMM_P #define GEMM_P QGEMM_P
#define GEMM_Q QGEMM_Q #define GEMM_Q QGEMM_Q
#define GEMM_R QGEMM_R #define GEMM_R QGEMM_R
@ -1334,7 +1378,7 @@ extern gotoblas_t *gotoblas;
#define GEMM_DEFAULT_R DGEMM_DEFAULT_R #define GEMM_DEFAULT_R DGEMM_DEFAULT_R
#define GEMM_DEFAULT_UNROLL_M DGEMM_DEFAULT_UNROLL_M #define GEMM_DEFAULT_UNROLL_M DGEMM_DEFAULT_UNROLL_M
#define GEMM_DEFAULT_UNROLL_N DGEMM_DEFAULT_UNROLL_N #define GEMM_DEFAULT_UNROLL_N DGEMM_DEFAULT_UNROLL_N
#elif defined(HALF) #elif (HALF)
#define GEMM_P SHGEMM_P #define GEMM_P SHGEMM_P
#define GEMM_Q SHGEMM_Q #define GEMM_Q SHGEMM_Q
#define GEMM_R SHGEMM_R #define GEMM_R SHGEMM_R
@ -1360,7 +1404,7 @@ extern gotoblas_t *gotoblas;
#define GEMM_DEFAULT_UNROLL_N SGEMM_DEFAULT_UNROLL_N #define GEMM_DEFAULT_UNROLL_N SGEMM_DEFAULT_UNROLL_N
#endif #endif
#else #else
#if defined(XDOUBLE) #if (XDOUBLE)
#define GEMM_P XGEMM_P #define GEMM_P XGEMM_P
#define GEMM_Q XGEMM_Q #define GEMM_Q XGEMM_Q
#define GEMM_R XGEMM_R #define GEMM_R XGEMM_R
@ -1474,7 +1518,7 @@ extern gotoblas_t *gotoblas;
#ifndef GEMM3M_P #ifndef GEMM3M_P
#ifdef XDOUBLE #ifdef XDOUBLE
#define GEMM3M_P XGEMM3M_P #define GEMM3M_P XGEMM3M_P
#elif defined(DOUBLE) #elif defined (DOUBLE)
#define GEMM3M_P ZGEMM3M_P #define GEMM3M_P ZGEMM3M_P
#else #else
#define GEMM3M_P CGEMM3M_P #define GEMM3M_P CGEMM3M_P
@ -1484,7 +1528,7 @@ extern gotoblas_t *gotoblas;
#ifndef GEMM3M_Q #ifndef GEMM3M_Q
#ifdef XDOUBLE #ifdef XDOUBLE
#define GEMM3M_Q XGEMM3M_Q #define GEMM3M_Q XGEMM3M_Q
#elif defined(DOUBLE) #elif defined (DOUBLE)
#define GEMM3M_Q ZGEMM3M_Q #define GEMM3M_Q ZGEMM3M_Q
#else #else
#define GEMM3M_Q CGEMM3M_Q #define GEMM3M_Q CGEMM3M_Q
@ -1494,7 +1538,7 @@ extern gotoblas_t *gotoblas;
#ifndef GEMM3M_R #ifndef GEMM3M_R
#ifdef XDOUBLE #ifdef XDOUBLE
#define GEMM3M_R XGEMM3M_R #define GEMM3M_R XGEMM3M_R
#elif defined(DOUBLE) #elif defined (DOUBLE)
#define GEMM3M_R ZGEMM3M_R #define GEMM3M_R ZGEMM3M_R
#else #else
#define GEMM3M_R CGEMM3M_R #define GEMM3M_R CGEMM3M_R