Allow building support for only a subset of variable types
This commit is contained in:
parent
68e6823d36
commit
e396ec8b56
|
@ -29,8 +29,10 @@ option(NO_AFFINITY "Disable support for CPU affinity masks to avoid binding proc
|
|||
else()
|
||||
set(NO_AFFINITY 1)
|
||||
endif()
|
||||
option(CPP_THREAD_SAFETY_TEST "Run a massively parallel DGEMM test to confirm thread safety of the library (requires OpenMP and about 1.3GB of RAM)" OFF)
|
||||
option(CPP_THREAD_SAFETY_GEMV "Run a massively parallel DGEMV test to confirm thread safety of the library (requires OpenMP)" OFF)
|
||||
option(BUILD_SINGLE "Single precision" OFF)
|
||||
option(BUILD_DOUBLE "Double precision" OFF)
|
||||
option(BUILD_COMPLEX "Single precision" OFF)
|
||||
option(BUILD_COMPLEX16 "Single precision" OFF)
|
||||
|
||||
# Add a prefix or suffix to all exported symbol names in the shared library.
|
||||
# Avoids conflicts with other BLAS libraries, especially when using
|
||||
|
@ -108,28 +110,33 @@ endif()
|
|||
|
||||
set(FLOAT_TYPES "")
|
||||
if (BUILD_SINGLE)
|
||||
message(STATUS "Building Single Precision")
|
||||
list(APPEND FLOAT_TYPES "SINGLE") # defines nothing
|
||||
message(STATUS "Building Songle Precision")
|
||||
list(APPEND FLOAT_TYPES "SINGLE")
|
||||
# set(CCOMMON_OPT "${CCOMMON_OPT} -DBUILD_SINGLE=1")
|
||||
endif ()
|
||||
|
||||
if (BUILD_DOUBLE)
|
||||
message(STATUS "Building Double Precision")
|
||||
list(APPEND FLOAT_TYPES "DOUBLE") # defines DOUBLE
|
||||
list(APPEND FLOAT_TYPES "DOUBLE")
|
||||
#set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DBUILD_DOUBLE=1")
|
||||
endif ()
|
||||
|
||||
if (BUILD_COMPLEX)
|
||||
message(STATUS "Building Complex Precision")
|
||||
list(APPEND FLOAT_TYPES "COMPLEX") # defines COMPLEX
|
||||
endif ()
|
||||
list(APPEND FLOAT_TYPES "COMPLEX")
|
||||
#set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DBUILD_COMPLEX=1")
|
||||
endif ()
|
||||
|
||||
if (BUILD_COMPLEX16)
|
||||
message(STATUS "Building Double Complex Precision")
|
||||
list(APPEND FLOAT_TYPES "ZCOMPLEX") # defines COMPLEX and DOUBLE
|
||||
list(APPEND FLOAT_TYPES "ZCOMPLEX")
|
||||
#set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DBUILD_COMPLEX16=1")
|
||||
endif ()
|
||||
|
||||
if (BUILD_HALF)
|
||||
message(STATUS "Building Half Precision")
|
||||
list(APPEND FLOAT_TYPES "HALF") # defines nothing
|
||||
list(APPEND FLOAT_TYPES "HALF")
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DBUILD_HALF")
|
||||
endif ()
|
||||
|
||||
if (NOT DEFINED CORE OR "${CORE}" STREQUAL "UNKNOWN")
|
||||
|
@ -236,9 +243,6 @@ if (NOT MSVC AND NOT NOFORTRAN)
|
|||
add_subdirectory(ctest)
|
||||
endif()
|
||||
add_subdirectory(lapack-netlib/TESTING)
|
||||
if (CPP_THREAD_SAFETY_TEST OR CPP_THREAD_SAFETY_GEMV)
|
||||
add_subdirectory(cpp_thread_test)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
set_target_properties(${OpenBLAS_LIBNAME} PROPERTIES
|
||||
|
|
15
Makefile
15
Makefile
|
@ -146,9 +146,6 @@ ifneq ($(NO_CBLAS), 1)
|
|||
ifeq ($(CPP_THREAD_SAFETY_TEST), 1)
|
||||
$(MAKE) -C cpp_thread_test all
|
||||
endif
|
||||
ifeq ($(CPP_THREAD_SAFETY_GEMV), 1)
|
||||
$(MAKE) -C cpp_thread_test dgemv_tester
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
|
@ -304,6 +301,18 @@ else
|
|||
endif
|
||||
ifeq ($(BUILD_LAPACK_DEPRECATED), 1)
|
||||
-@echo "BUILD_DEPRECATED = 1" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
endif
|
||||
ifeq ($(BUILD_SINGLE), 1)
|
||||
-@echo "BUILD_SINGLE = 1" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
endif
|
||||
ifeq ($(BUILD_DOUBLE), 1)
|
||||
-@echo "BUILD_DOUBLE = 1" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
endif
|
||||
ifeq ($(BUILD_COMPLEX), 1)
|
||||
-@echo "BUILD_COMPLEX = 1" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
endif
|
||||
ifeq ($(BUILD_COMPLEX16), 1)
|
||||
-@echo "BUILD_COMPLEX16 = 1" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
endif
|
||||
-@echo "LAPACKE_WITH_TMG = 1" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@cat make.inc >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
|
|
|
@ -272,33 +272,17 @@ COMMON_PROF = -pg
|
|||
# work at all.
|
||||
#
|
||||
# CPP_THREAD_SAFETY_TEST = 1
|
||||
#
|
||||
# use this to run only the less memory-hungry GEMV test
|
||||
# CPP_THREAD_SAFETY_GEMV = 1
|
||||
|
||||
|
||||
# If you want to enable the experimental BFLOAT16 support
|
||||
# BUILD_HALF = 1
|
||||
|
||||
|
||||
# Set the thread number threshold beyond which the job array for the threaded level3 BLAS
|
||||
# will be allocated on the heap rather than the stack. (This array alone requires
|
||||
# NUM_THREADS*NUM_THREADS*128 bytes of memory so should not pose a problem at low cpu
|
||||
# counts, but obviously it is not the only item that ends up on the stack.
|
||||
# The default value of 32 ensures that the overall requirement is compatible
|
||||
# with the default 1MB stacksize imposed by having the Java VM loaded without use
|
||||
# of its -Xss parameter.
|
||||
# The value of 160 formerly used from about version 0.2.7 until 0.3.10 is easily compatible
|
||||
# with the common Linux stacksize of 8MB but will cause crashes with unwary use of the java
|
||||
# VM e.g. in Octave or with the java-based libhdfs in numpy or scipy code
|
||||
# BLAS3_MEM_ALLOC_THRESHOLD = 160
|
||||
|
||||
|
||||
|
||||
# the below is not yet configurable, use cmake if you need to build only select types
|
||||
BUILD_SINGLE = 1
|
||||
BUILD_DOUBLE = 1
|
||||
BUILD_COMPLEX = 1
|
||||
BUILD_COMPLEX16 = 1
|
||||
#
|
||||
# Select if you need to build only select types
|
||||
# BUILD_SINGLE = 1
|
||||
# BUILD_DOUBLE = 1
|
||||
# BUILD_COMPLEX = 1
|
||||
# BUILD_COMPLEX16 = 1
|
||||
#
|
||||
#
|
||||
# End of user configuration
|
||||
#
|
||||
|
|
|
@ -11,8 +11,8 @@ COMMONOBJS_P = $(COMMONOBJS:.$(SUFFIX)=.$(PSUFFIX))
|
|||
|
||||
HPLOBJS_P = $(HPLOBJS:.$(SUFFIX)=.$(PSUFFIX))
|
||||
|
||||
BLASOBJS = $(SHEXTOBJS) $(SHBLASOBJS) $(SBLASOBJS) $(DBLASOBJS) $(CBLASOBJS) $(ZBLASOBJS)
|
||||
BLASOBJS_P = $(SHEXTOBJS_P) $(SHBLASOBJS_P) $(SBLASOBJS_P) $(DBLASOBJS_P) $(CBLASOBJS_P) $(ZBLASOBJS_P)
|
||||
BLASOBJS = $(SHEXTOBJS) $(SHBLASOBJS) $(SBLASOBJS) $(DBLASOBJS) $(CBLASOBJS) $(ZBLASOBJS) $(CBAUXOBJS)
|
||||
BLASOBJS_P = $(SHEXTOBJS_P) $(SHBLASOBJS_P) $(SBLASOBJS_P) $(DBLASOBJS_P) $(CBLASOBJS_P) $(ZBLASOBJS_P) $(CBAUXOBJS_P)
|
||||
|
||||
ifdef EXPRECISION
|
||||
BLASOBJS += $(QBLASOBJS) $(XBLASOBJS)
|
||||
|
|
166
common_param.h
166
common_param.h
|
@ -146,40 +146,56 @@ BLASLONG (*ishmin_k) (BLASLONG, float *, BLASLONG);
|
|||
int (*shlaswp_ncopy) (BLASLONG, BLASLONG, BLASLONG, float *, BLASLONG, blasint *, float *);
|
||||
|
||||
#endif
|
||||
|
||||
#if (BUILD_SINGLE) || (BUILD_DOUBLE) || (BUILD_COMPLEX) || (BUILD_COMPLEX16)
|
||||
int sgemm_p, sgemm_q, sgemm_r;
|
||||
int sgemm_unroll_m, sgemm_unroll_n, sgemm_unroll_mn;
|
||||
#endif
|
||||
|
||||
int exclusive_cache;
|
||||
|
||||
#if (BUILD_SINGLE) || (BUILD_DOUBLE) || (BUILD_COMPLEX)
|
||||
float (*samax_k) (BLASLONG, float *, BLASLONG);
|
||||
float (*samin_k) (BLASLONG, float *, BLASLONG);
|
||||
float (*smax_k) (BLASLONG, float *, BLASLONG);
|
||||
float (*smin_k) (BLASLONG, float *, BLASLONG);
|
||||
|
||||
BLASLONG (*isamax_k)(BLASLONG, float *, BLASLONG);
|
||||
BLASLONG (*isamin_k)(BLASLONG, float *, BLASLONG);
|
||||
BLASLONG (*ismax_k) (BLASLONG, float *, BLASLONG);
|
||||
BLASLONG (*ismin_k) (BLASLONG, float *, BLASLONG);
|
||||
|
||||
float (*snrm2_k) (BLASLONG, float *, BLASLONG);
|
||||
float (*sasum_k) (BLASLONG, float *, BLASLONG);
|
||||
#endif
|
||||
#if BUILD_SINGLE
|
||||
float (*ssum_k) (BLASLONG, float *, BLASLONG);
|
||||
#endif
|
||||
#if (BUILD_SINGLE) || (BUILD_DOUBLE) || (BUILD_COMPLEX)
|
||||
int (*scopy_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG);
|
||||
float (*sdot_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG);
|
||||
double (*dsdot_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG);
|
||||
//double (*dsdot_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG);
|
||||
|
||||
int (*srot_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG, float, float);
|
||||
|
||||
int (*saxpy_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
|
||||
#endif
|
||||
#if (BUILD_SINGLE) || (BUILD_DOUBLE) || (BUILD_COMPLEX) || (BUILD_COMPLEX16)
|
||||
int (*sscal_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
|
||||
#endif
|
||||
#if (BUILD_SINGLE) || (BUILD_DOUBLE) || (BUILD_COMPLEX)
|
||||
int (*sswap_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
|
||||
|
||||
int (*sgemv_n) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
|
||||
int (*sgemv_t) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
|
||||
#endif
|
||||
#if BUILD_SINGLE
|
||||
int (*sger_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
|
||||
|
||||
int (*ssymv_L) (BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
|
||||
int (*ssymv_U) (BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
|
||||
#endif
|
||||
|
||||
#if (BUILD_SINGLE) || (BUILD_DOUBLE) || (BUILD_COMPLEX)
|
||||
#ifdef ARCH_X86_64
|
||||
void (*sgemm_direct) (BLASLONG, BLASLONG, BLASLONG, float *, BLASLONG , float *, BLASLONG , float * , BLASLONG);
|
||||
int (*sgemm_direct_performant) (BLASLONG M, BLASLONG N, BLASLONG K);
|
||||
|
@ -193,7 +209,8 @@ BLASLONG (*ismin_k) (BLASLONG, float *, BLASLONG);
|
|||
int (*sgemm_itcopy )(BLASLONG, BLASLONG, float *, BLASLONG, float *);
|
||||
int (*sgemm_oncopy )(BLASLONG, BLASLONG, float *, BLASLONG, float *);
|
||||
int (*sgemm_otcopy )(BLASLONG, BLASLONG, float *, BLASLONG, float *);
|
||||
|
||||
#endif
|
||||
#if (BUILD_SINGLE) || (BUILD_DOUBLE)
|
||||
int (*strsm_kernel_LN)(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG, BLASLONG);
|
||||
int (*strsm_kernel_LT)(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG, BLASLONG);
|
||||
int (*strsm_kernel_RN)(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG, BLASLONG);
|
||||
|
@ -215,7 +232,8 @@ BLASLONG (*ismin_k) (BLASLONG, float *, BLASLONG);
|
|||
int (*strsm_olnncopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *);
|
||||
int (*strsm_oltucopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *);
|
||||
int (*strsm_oltncopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *);
|
||||
|
||||
#endif
|
||||
#if BUILD_SINGLE
|
||||
int (*strmm_kernel_RN)(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG, BLASLONG);
|
||||
int (*strmm_kernel_RT)(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG, BLASLONG);
|
||||
int (*strmm_kernel_LN)(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG, BLASLONG);
|
||||
|
@ -242,13 +260,18 @@ BLASLONG (*ismin_k) (BLASLONG, float *, BLASLONG);
|
|||
int (*ssymm_iltcopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
|
||||
int (*ssymm_outcopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
|
||||
int (*ssymm_oltcopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
|
||||
|
||||
#endif
|
||||
#if (BUILD_SINGLE) || (BUILD_DOUBLE)
|
||||
int (*sneg_tcopy) (BLASLONG, BLASLONG, float *, BLASLONG, float *);
|
||||
int (*slaswp_ncopy) (BLASLONG, BLASLONG, BLASLONG, float *, BLASLONG, blasint *, float *);
|
||||
#endif
|
||||
|
||||
#if (BUILD_DOUBLE) || (BUILD_COMPLEX16)
|
||||
int dgemm_p, dgemm_q, dgemm_r;
|
||||
int dgemm_unroll_m, dgemm_unroll_n, dgemm_unroll_mn;
|
||||
#endif
|
||||
|
||||
#if (BUILD_DOUBLE) || (BUILD_COMPLEX16)
|
||||
double (*damax_k) (BLASLONG, double *, BLASLONG);
|
||||
double (*damin_k) (BLASLONG, double *, BLASLONG);
|
||||
double (*dmax_k) (BLASLONG, double *, BLASLONG);
|
||||
|
@ -257,25 +280,37 @@ BLASLONG (*idamax_k)(BLASLONG, double *, BLASLONG);
|
|||
BLASLONG (*idamin_k)(BLASLONG, double *, BLASLONG);
|
||||
BLASLONG (*idmax_k) (BLASLONG, double *, BLASLONG);
|
||||
BLASLONG (*idmin_k) (BLASLONG, double *, BLASLONG);
|
||||
#endif
|
||||
|
||||
#if (BUILD_DOUBLE) || (BUILD_COMPLEX16)
|
||||
double (*dnrm2_k) (BLASLONG, double *, BLASLONG);
|
||||
double (*dasum_k) (BLASLONG, double *, BLASLONG);
|
||||
#endif
|
||||
#if BUILD_DOUBLE
|
||||
double (*dsum_k) (BLASLONG, double *, BLASLONG);
|
||||
#endif
|
||||
#if (BUILD_DOUBLE) || (BUILD_COMPLEX16)
|
||||
int (*dcopy_k) (BLASLONG, double *, BLASLONG, double *, BLASLONG);
|
||||
double (*ddot_k) (BLASLONG, double *, BLASLONG, double *, BLASLONG);
|
||||
#endif
|
||||
#if (BUILD_SINGLE) || (BUILD_DOUBLE)
|
||||
double (*dsdot_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG);
|
||||
#endif
|
||||
#if (BUILD_DOUBLE) || (BUILD_COMPLEX16)
|
||||
int (*drot_k) (BLASLONG, double *, BLASLONG, double *, BLASLONG, double, double);
|
||||
|
||||
int (*daxpy_k) (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG);
|
||||
int (*dscal_k) (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG);
|
||||
int (*dswap_k) (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG);
|
||||
|
||||
int (*dgemv_n) (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *);
|
||||
int (*dgemv_t) (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *);
|
||||
#endif
|
||||
#if BUILD_DOUBLE
|
||||
int (*dger_k) (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *);
|
||||
|
||||
int (*dsymv_L) (BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *);
|
||||
int (*dsymv_U) (BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *);
|
||||
|
||||
#endif
|
||||
#if (BUILD_DOUBLE) || (BUILD_COMPLEX16)
|
||||
int (*dgemm_kernel )(BLASLONG, BLASLONG, BLASLONG, double, double *, double *, double *, BLASLONG);
|
||||
int (*dgemm_beta )(BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG);
|
||||
|
||||
|
@ -283,7 +318,8 @@ BLASLONG (*idmin_k) (BLASLONG, double *, BLASLONG);
|
|||
int (*dgemm_itcopy )(BLASLONG, BLASLONG, double *, BLASLONG, double *);
|
||||
int (*dgemm_oncopy )(BLASLONG, BLASLONG, double *, BLASLONG, double *);
|
||||
int (*dgemm_otcopy )(BLASLONG, BLASLONG, double *, BLASLONG, double *);
|
||||
|
||||
#endif
|
||||
#if BUILD_DOUBLE
|
||||
int (*dtrsm_kernel_LN)(BLASLONG, BLASLONG, BLASLONG, double, double *, double *, double *, BLASLONG, BLASLONG);
|
||||
int (*dtrsm_kernel_LT)(BLASLONG, BLASLONG, BLASLONG, double, double *, double *, double *, BLASLONG, BLASLONG);
|
||||
int (*dtrsm_kernel_RN)(BLASLONG, BLASLONG, BLASLONG, double, double *, double *, double *, BLASLONG, BLASLONG);
|
||||
|
@ -335,7 +371,7 @@ BLASLONG (*idmin_k) (BLASLONG, double *, BLASLONG);
|
|||
|
||||
int (*dneg_tcopy) (BLASLONG, BLASLONG, double *, BLASLONG, double *);
|
||||
int (*dlaswp_ncopy) (BLASLONG, BLASLONG, BLASLONG, double *, BLASLONG, blasint *, double *);
|
||||
|
||||
#endif
|
||||
#ifdef EXPRECISION
|
||||
|
||||
int qgemm_p, qgemm_q, qgemm_r;
|
||||
|
@ -430,22 +466,29 @@ BLASLONG (*iqmin_k) (BLASLONG, xdouble *, BLASLONG);
|
|||
|
||||
#endif
|
||||
|
||||
#if (BUILD_COMPLEX) || (BUILD_COMPLEX16)
|
||||
int cgemm_p, cgemm_q, cgemm_r;
|
||||
int cgemm_unroll_m, cgemm_unroll_n, cgemm_unroll_mn;
|
||||
|
||||
float (*camax_k) (BLASLONG, float *, BLASLONG);
|
||||
float (*camin_k) (BLASLONG, float *, BLASLONG);
|
||||
BLASLONG (*icamax_k)(BLASLONG, float *, BLASLONG);
|
||||
BLASLONG (*icamin_k)(BLASLONG, float *, BLASLONG);
|
||||
#endif
|
||||
#if BUILD_COMPLEX
|
||||
|
||||
float (*cnrm2_k) (BLASLONG, float *, BLASLONG);
|
||||
float (*casum_k) (BLASLONG, float *, BLASLONG);
|
||||
float (*csum_k) (BLASLONG, float *, BLASLONG);
|
||||
#endif
|
||||
#if (BUILD_COMPLEX)|| (BUILD_COMPLEX16)
|
||||
int (*ccopy_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG);
|
||||
openblas_complex_float (*cdotu_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG);
|
||||
openblas_complex_float (*cdotc_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG);
|
||||
#endif
|
||||
#if BUILD_COMPLEX
|
||||
int (*csrot_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG, float, float);
|
||||
|
||||
#endif
|
||||
#if (BUILD_COMPLEX)|| (BUILD_COMPLEX16)
|
||||
int (*caxpy_k) (BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
|
||||
int (*caxpyc_k)(BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
|
||||
int (*cscal_k) (BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
|
||||
|
@ -459,6 +502,8 @@ BLASLONG (*icamin_k)(BLASLONG, float *, BLASLONG);
|
|||
int (*cgemv_u) (BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
|
||||
int (*cgemv_s) (BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
|
||||
int (*cgemv_d) (BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
|
||||
#endif
|
||||
#if (BUILD_COMPLEX)
|
||||
int (*cgeru_k) (BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
|
||||
int (*cgerc_k) (BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
|
||||
int (*cgerv_k) (BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
|
||||
|
@ -470,13 +515,14 @@ BLASLONG (*icamin_k)(BLASLONG, float *, BLASLONG);
|
|||
int (*chemv_U) (BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
|
||||
int (*chemv_M) (BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
|
||||
int (*chemv_V) (BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
|
||||
#endif
|
||||
#if (BUILD_COMPLEX) || (BUILD_COMPLEX16)
|
||||
|
||||
int (*cgemm_kernel_n )(BLASLONG, BLASLONG, BLASLONG, float, float, float *, float *, float *, BLASLONG);
|
||||
int (*cgemm_kernel_l )(BLASLONG, BLASLONG, BLASLONG, float, float, float *, float *, float *, BLASLONG);
|
||||
int (*cgemm_kernel_r )(BLASLONG, BLASLONG, BLASLONG, float, float, float *, float *, float *, BLASLONG);
|
||||
int (*cgemm_kernel_b )(BLASLONG, BLASLONG, BLASLONG, float, float, float *, float *, float *, BLASLONG);
|
||||
int (*cgemm_beta )(BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
|
||||
|
||||
int (*cgemm_incopy )(BLASLONG, BLASLONG, float *, BLASLONG, float *);
|
||||
int (*cgemm_itcopy )(BLASLONG, BLASLONG, float *, BLASLONG, float *);
|
||||
int (*cgemm_oncopy )(BLASLONG, BLASLONG, float *, BLASLONG, float *);
|
||||
|
@ -507,6 +553,8 @@ BLASLONG (*icamin_k)(BLASLONG, float *, BLASLONG);
|
|||
int (*ctrsm_olnncopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *);
|
||||
int (*ctrsm_oltucopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *);
|
||||
int (*ctrsm_oltncopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *);
|
||||
#endif
|
||||
#if (BUILD_COMPLEX)
|
||||
|
||||
int (*ctrmm_kernel_RN)(BLASLONG, BLASLONG, BLASLONG, float, float, float *, float *, float *, BLASLONG, BLASLONG);
|
||||
int (*ctrmm_kernel_RT)(BLASLONG, BLASLONG, BLASLONG, float, float, float *, float *, float *, BLASLONG, BLASLONG);
|
||||
|
@ -590,10 +638,13 @@ BLASLONG (*icamin_k)(BLASLONG, float *, BLASLONG);
|
|||
int (*chemm3m_olcopyr)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float, float, float *);
|
||||
int (*chemm3m_oucopyi)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float, float, float *);
|
||||
int (*chemm3m_olcopyi)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float, float, float *);
|
||||
|
||||
#endif
|
||||
#if (BUILD_COMPLEX) || (BUILD_COMPLEX16)
|
||||
int (*cneg_tcopy) (BLASLONG, BLASLONG, float *, BLASLONG, float *);
|
||||
int (*claswp_ncopy) (BLASLONG, BLASLONG, BLASLONG, float *, BLASLONG, blasint *, float *);
|
||||
#endif
|
||||
|
||||
#if BUILD_COMPLEX16
|
||||
int zgemm_p, zgemm_q, zgemm_r;
|
||||
int zgemm_unroll_m, zgemm_unroll_n, zgemm_unroll_mn;
|
||||
|
||||
|
@ -757,6 +808,7 @@ BLASLONG (*izamin_k)(BLASLONG, double *, BLASLONG);
|
|||
|
||||
int (*zneg_tcopy) (BLASLONG, BLASLONG, double *, BLASLONG, double *);
|
||||
int (*zlaswp_ncopy) (BLASLONG, BLASLONG, BLASLONG, double *, BLASLONG, blasint *, double *);
|
||||
#endif
|
||||
|
||||
#ifdef EXPRECISION
|
||||
|
||||
|
@ -930,22 +982,34 @@ BLASLONG (*ixamin_k)(BLASLONG, xdouble *, BLASLONG);
|
|||
void (*init)(void);
|
||||
|
||||
int snum_opt, dnum_opt, qnum_opt;
|
||||
|
||||
#if BUILD_SINGLE
|
||||
int (*saxpby_k) (BLASLONG, float, float*, BLASLONG,float, float*, BLASLONG);
|
||||
#endif
|
||||
#if BUILD_DOUBLE
|
||||
int (*daxpby_k) (BLASLONG, double, double*, BLASLONG,double, double*, BLASLONG);
|
||||
#endif
|
||||
#if BUILD_COMPLEX
|
||||
int (*caxpby_k) (BLASLONG, float, float, float*, BLASLONG,float,float, float*, BLASLONG);
|
||||
#endif
|
||||
#if BUILD_COMPLEX16
|
||||
int (*zaxpby_k) (BLASLONG, double, double, double*, BLASLONG,double,double, double*, BLASLONG);
|
||||
#endif
|
||||
|
||||
#if BUILD_SINGLE
|
||||
int (*somatcopy_k_cn) (BLASLONG, BLASLONG, float, float*, BLASLONG, float*, BLASLONG);
|
||||
int (*somatcopy_k_ct) (BLASLONG, BLASLONG, float, float*, BLASLONG, float*, BLASLONG);
|
||||
int (*somatcopy_k_rn) (BLASLONG, BLASLONG, float, float*, BLASLONG, float*, BLASLONG);
|
||||
int (*somatcopy_k_rt) (BLASLONG, BLASLONG, float, float*, BLASLONG, float*, BLASLONG);
|
||||
#endif
|
||||
|
||||
#if BUILD_DOUBLE
|
||||
int (*domatcopy_k_cn) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG);
|
||||
int (*domatcopy_k_ct) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG);
|
||||
int (*domatcopy_k_rn) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG);
|
||||
int (*domatcopy_k_rt) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG);
|
||||
#endif
|
||||
|
||||
#if BUILD_COMPLEX
|
||||
int (*comatcopy_k_cn) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG);
|
||||
int (*comatcopy_k_ct) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG);
|
||||
int (*comatcopy_k_rn) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG);
|
||||
|
@ -955,7 +1019,9 @@ BLASLONG (*ixamin_k)(BLASLONG, xdouble *, BLASLONG);
|
|||
int (*comatcopy_k_ctc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG);
|
||||
int (*comatcopy_k_rnc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG);
|
||||
int (*comatcopy_k_rtc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG);
|
||||
#endif
|
||||
|
||||
#if BUILD_COMPLEX16
|
||||
int (*zomatcopy_k_cn) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG);
|
||||
int (*zomatcopy_k_ct) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG);
|
||||
int (*zomatcopy_k_rn) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG);
|
||||
|
@ -965,17 +1031,23 @@ BLASLONG (*ixamin_k)(BLASLONG, xdouble *, BLASLONG);
|
|||
int (*zomatcopy_k_ctc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG);
|
||||
int (*zomatcopy_k_rnc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG);
|
||||
int (*zomatcopy_k_rtc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG);
|
||||
#endif
|
||||
|
||||
#if BUILD_SINGLE
|
||||
int (*simatcopy_k_cn) (BLASLONG, BLASLONG, float, float*, BLASLONG);
|
||||
int (*simatcopy_k_ct) (BLASLONG, BLASLONG, float, float*, BLASLONG);
|
||||
int (*simatcopy_k_rn) (BLASLONG, BLASLONG, float, float*, BLASLONG);
|
||||
int (*simatcopy_k_rt) (BLASLONG, BLASLONG, float, float*, BLASLONG);
|
||||
#endif
|
||||
|
||||
#if BUILD_DOUBLE
|
||||
int (*dimatcopy_k_cn) (BLASLONG, BLASLONG, double, double*, BLASLONG);
|
||||
int (*dimatcopy_k_ct) (BLASLONG, BLASLONG, double, double*, BLASLONG);
|
||||
int (*dimatcopy_k_rn) (BLASLONG, BLASLONG, double, double*, BLASLONG);
|
||||
int (*dimatcopy_k_rt) (BLASLONG, BLASLONG, double, double*, BLASLONG);
|
||||
#endif
|
||||
|
||||
#if BUILD_COMPLEX
|
||||
int (*cimatcopy_k_cn) (BLASLONG, BLASLONG, float, float, float*, BLASLONG);
|
||||
int (*cimatcopy_k_ct) (BLASLONG, BLASLONG, float, float, float*, BLASLONG);
|
||||
int (*cimatcopy_k_rn) (BLASLONG, BLASLONG, float, float, float*, BLASLONG);
|
||||
|
@ -985,7 +1057,9 @@ BLASLONG (*ixamin_k)(BLASLONG, xdouble *, BLASLONG);
|
|||
int (*cimatcopy_k_ctc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG);
|
||||
int (*cimatcopy_k_rnc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG);
|
||||
int (*cimatcopy_k_rtc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG);
|
||||
#endif
|
||||
|
||||
#if BUILD_COMPLEX16
|
||||
int (*zimatcopy_k_cn) (BLASLONG, BLASLONG, double, double, double*, BLASLONG);
|
||||
int (*zimatcopy_k_ct) (BLASLONG, BLASLONG, double, double, double*, BLASLONG);
|
||||
int (*zimatcopy_k_rn) (BLASLONG, BLASLONG, double, double, double*, BLASLONG);
|
||||
|
@ -995,12 +1069,20 @@ BLASLONG (*ixamin_k)(BLASLONG, xdouble *, BLASLONG);
|
|||
int (*zimatcopy_k_ctc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG);
|
||||
int (*zimatcopy_k_rnc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG);
|
||||
int (*zimatcopy_k_rtc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG);
|
||||
#endif
|
||||
|
||||
#if BUILD_SINGLE
|
||||
int (*sgeadd_k) (BLASLONG, BLASLONG, float, float *, BLASLONG, float, float *, BLASLONG);
|
||||
#endif
|
||||
#if BUILD_DOUBLE
|
||||
int (*dgeadd_k) (BLASLONG, BLASLONG, double, double *, BLASLONG, double, double *, BLASLONG);
|
||||
#endif
|
||||
#if BUILD_COMPLEX
|
||||
int (*cgeadd_k) (BLASLONG, BLASLONG, float, float, float *, BLASLONG, float, float, float *, BLASLONG);
|
||||
#endif
|
||||
#if BUILD_COMPLEX16
|
||||
int (*zgeadd_k) (BLASLONG, BLASLONG, double, double, double *, BLASLONG, double, double, double *, BLASLONG);
|
||||
|
||||
#endif
|
||||
} gotoblas_t;
|
||||
|
||||
extern gotoblas_t *gotoblas;
|
||||
|
@ -1021,19 +1103,31 @@ extern gotoblas_t *gotoblas;
|
|||
#define SHGEMM_UNROLL_MN gotoblas -> shgemm_unroll_mn
|
||||
#endif
|
||||
|
||||
#if (BUILD_SINGLE)
|
||||
#define SGEMM_P gotoblas -> sgemm_p
|
||||
#define SGEMM_Q gotoblas -> sgemm_q
|
||||
#define SGEMM_R gotoblas -> sgemm_r
|
||||
#define SGEMM_UNROLL_M gotoblas -> sgemm_unroll_m
|
||||
#define SGEMM_UNROLL_N gotoblas -> sgemm_unroll_n
|
||||
#define SGEMM_UNROLL_MN gotoblas -> sgemm_unroll_mn
|
||||
#endif
|
||||
|
||||
#if (BUILD_DOUBLE)
|
||||
#define DGEMM_P gotoblas -> dgemm_p
|
||||
#define DGEMM_Q gotoblas -> dgemm_q
|
||||
#define DGEMM_R gotoblas -> dgemm_r
|
||||
#define DGEMM_UNROLL_M gotoblas -> dgemm_unroll_m
|
||||
#define DGEMM_UNROLL_N gotoblas -> dgemm_unroll_n
|
||||
#define DGEMM_UNROLL_MN gotoblas -> dgemm_unroll_mn
|
||||
#if ! (BUILD_SINGLE)
|
||||
#define SGEMM_P gotoblas -> sgemm_p
|
||||
#define SGEMM_Q gotoblas -> sgemm_q
|
||||
#define SGEMM_R gotoblas -> sgemm_r
|
||||
#define SGEMM_UNROLL_M gotoblas -> sgemm_unroll_m
|
||||
#define SGEMM_UNROLL_N gotoblas -> sgemm_unroll_n
|
||||
#define SGEMM_UNROLL_MN gotoblas -> sgemm_unroll_mn
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#define QGEMM_P gotoblas -> qgemm_p
|
||||
#define QGEMM_Q gotoblas -> qgemm_q
|
||||
|
@ -1042,19 +1136,47 @@ extern gotoblas_t *gotoblas;
|
|||
#define QGEMM_UNROLL_N gotoblas -> qgemm_unroll_n
|
||||
#define QGEMM_UNROLL_MN gotoblas -> qgemm_unroll_mn
|
||||
|
||||
#if BUILD_COMPLEX
|
||||
#define CGEMM_P gotoblas -> cgemm_p
|
||||
#define CGEMM_Q gotoblas -> cgemm_q
|
||||
#define CGEMM_R gotoblas -> cgemm_r
|
||||
#define CGEMM_UNROLL_M gotoblas -> cgemm_unroll_m
|
||||
#define CGEMM_UNROLL_N gotoblas -> cgemm_unroll_n
|
||||
#define CGEMM_UNROLL_MN gotoblas -> cgemm_unroll_mn
|
||||
#ifndef BUILD_SINGLE
|
||||
#define SGEMM_P gotoblas -> sgemm_p
|
||||
#define SGEMM_Q gotoblas -> sgemm_q
|
||||
#define SGEMM_R 1024
|
||||
#define SGEMM_UNROLL_M gotoblas -> sgemm_unroll_m
|
||||
#define SGEMM_UNROLL_N gotoblas -> sgemm_unroll_n
|
||||
#define SGEMM_UNROLL_MN gotoblas -> sgemm_unroll_mn
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if BUILD_COMPLEX16
|
||||
#define ZGEMM_P gotoblas -> zgemm_p
|
||||
#define ZGEMM_Q gotoblas -> zgemm_q
|
||||
#define ZGEMM_R gotoblas -> zgemm_r
|
||||
#define ZGEMM_UNROLL_M gotoblas -> zgemm_unroll_m
|
||||
#define ZGEMM_UNROLL_N gotoblas -> zgemm_unroll_n
|
||||
#define ZGEMM_UNROLL_MN gotoblas -> zgemm_unroll_mn
|
||||
#ifndef BUILD_DOUBLE
|
||||
#define DGEMM_P gotoblas -> dgemm_p
|
||||
#define DGEMM_Q gotoblas -> dgemm_q
|
||||
#define DGEMM_R 1024
|
||||
#define DGEMM_UNROLL_M gotoblas -> dgemm_unroll_m
|
||||
#define DGEMM_UNROLL_N gotoblas -> dgemm_unroll_n
|
||||
#define DGEMM_UNROLL_MN gotoblas -> dgemm_unroll_mn
|
||||
#endif
|
||||
#ifndef BUILD_COMPLEX
|
||||
#define CGEMM_P gotoblas -> cgemm_p
|
||||
#define CGEMM_Q gotoblas -> cgemm_q
|
||||
#define CGEMM_R gotoblas -> cgemm_r
|
||||
#define CGEMM_UNROLL_M gotoblas -> cgemm_unroll_m
|
||||
#define CGEMM_UNROLL_N gotoblas -> cgemm_unroll_n
|
||||
#define CGEMM_UNROLL_MN gotoblas -> cgemm_unroll_mn
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#define XGEMM_P gotoblas -> xgemm_p
|
||||
#define XGEMM_Q gotoblas -> xgemm_q
|
||||
|
@ -1222,7 +1344,7 @@ extern gotoblas_t *gotoblas;
|
|||
#endif
|
||||
|
||||
#ifndef COMPLEX
|
||||
#if defined(XDOUBLE)
|
||||
#if (XDOUBLE)
|
||||
#define GEMM_P QGEMM_P
|
||||
#define GEMM_Q QGEMM_Q
|
||||
#define GEMM_R QGEMM_R
|
||||
|
@ -1246,7 +1368,7 @@ extern gotoblas_t *gotoblas;
|
|||
#define GEMM_DEFAULT_R DGEMM_DEFAULT_R
|
||||
#define GEMM_DEFAULT_UNROLL_M DGEMM_DEFAULT_UNROLL_M
|
||||
#define GEMM_DEFAULT_UNROLL_N DGEMM_DEFAULT_UNROLL_N
|
||||
#elif defined(HALF)
|
||||
#elif (HALF)
|
||||
#define GEMM_P SHGEMM_P
|
||||
#define GEMM_Q SHGEMM_Q
|
||||
#define GEMM_R SHGEMM_R
|
||||
|
@ -1272,7 +1394,7 @@ extern gotoblas_t *gotoblas;
|
|||
#define GEMM_DEFAULT_UNROLL_N SGEMM_DEFAULT_UNROLL_N
|
||||
#endif
|
||||
#else
|
||||
#if defined(XDOUBLE)
|
||||
#if (XDOUBLE)
|
||||
#define GEMM_P XGEMM_P
|
||||
#define GEMM_Q XGEMM_Q
|
||||
#define GEMM_R XGEMM_R
|
||||
|
@ -1386,7 +1508,7 @@ extern gotoblas_t *gotoblas;
|
|||
#ifndef GEMM3M_P
|
||||
#ifdef XDOUBLE
|
||||
#define GEMM3M_P XGEMM3M_P
|
||||
#elif defined(DOUBLE)
|
||||
#elif defined (DOUBLE)
|
||||
#define GEMM3M_P ZGEMM3M_P
|
||||
#else
|
||||
#define GEMM3M_P CGEMM3M_P
|
||||
|
@ -1396,7 +1518,7 @@ extern gotoblas_t *gotoblas;
|
|||
#ifndef GEMM3M_Q
|
||||
#ifdef XDOUBLE
|
||||
#define GEMM3M_Q XGEMM3M_Q
|
||||
#elif defined(DOUBLE)
|
||||
#elif defined (DOUBLE)
|
||||
#define GEMM3M_Q ZGEMM3M_Q
|
||||
#else
|
||||
#define GEMM3M_Q CGEMM3M_Q
|
||||
|
@ -1406,7 +1528,7 @@ extern gotoblas_t *gotoblas;
|
|||
#ifndef GEMM3M_R
|
||||
#ifdef XDOUBLE
|
||||
#define GEMM3M_R XGEMM3M_R
|
||||
#elif defined(DOUBLE)
|
||||
#elif defined (DOUBLE)
|
||||
#define GEMM3M_R ZGEMM3M_R
|
||||
#else
|
||||
#define GEMM3M_R CGEMM3M_R
|
||||
|
|
Loading…
Reference in New Issue