Merge branch 'develop'
This commit is contained in:
commit
da3d70420a
|
@ -21,8 +21,10 @@ lapack-netlib/TESTING/testing_results.txt
|
|||
lib.grd
|
||||
nohup.out
|
||||
config.h
|
||||
config_kernel.h
|
||||
Makefile.conf
|
||||
Makefile.conf_last
|
||||
Makefile_kernel.conf
|
||||
config_last.h
|
||||
getarch
|
||||
getarch_2nd
|
||||
|
@ -41,6 +43,8 @@ ctest/xzcblat2
|
|||
ctest/xzcblat3
|
||||
exports/linktest.c
|
||||
exports/linux.def
|
||||
kernel/setparam_*.c
|
||||
kernel/kernel_*.h
|
||||
test/CBLAT2.SUMM
|
||||
test/CBLAT3.SUMM
|
||||
test/DBLAT2.SUMM
|
||||
|
|
11
Makefile
11
Makefile
|
@ -23,7 +23,7 @@ endif
|
|||
SUBDIRS_ALL = $(SUBDIRS) test ctest utest exports benchmark ../laswp ../bench
|
||||
|
||||
.PHONY : all libs netlib test ctest shared install
|
||||
.NOTPARALLEL : all libs prof lapack-test install
|
||||
.NOTPARALLEL : all libs prof lapack-test install blas-test
|
||||
|
||||
all :: libs netlib tests shared
|
||||
@echo
|
||||
|
@ -36,9 +36,13 @@ ifndef BINARY64
|
|||
else
|
||||
@echo " BINARY ... 64bit "
|
||||
endif
|
||||
|
||||
ifdef INTERFACE64
|
||||
ifneq ($(INTERFACE64), 0)
|
||||
@echo " Use 64 bits int (equivalent to \"-i8\" in Fortran) "
|
||||
endif
|
||||
endif
|
||||
|
||||
@echo " C compiler ... $(C_COMPILER) (command line : $(CC))"
|
||||
ifndef NOFORTRAN
|
||||
@echo " Fortran compiler ... $(F_COMPILER) (command line : $(FC))"
|
||||
|
@ -278,6 +282,11 @@ lapack-test :
|
|||
make -j 1 -C $(NETLIB_LAPACK_DIR)/TESTING xeigtstc xeigtstd xeigtsts xeigtstz xlintstc xlintstd xlintstds xlintstrfd xlintstrfz xlintsts xlintstz xlintstzc xlintstrfs xlintstrfc
|
||||
(cd $(NETLIB_LAPACK_DIR); ./lapack_testing.py -r )
|
||||
|
||||
blas-test:
|
||||
(cd $(NETLIB_LAPACK_DIR)/BLAS && rm -f x* *.out)
|
||||
make -j 1 -C $(NETLIB_LAPACK_DIR) blas_testing
|
||||
(cd $(NETLIB_LAPACK_DIR)/BLAS && cat *.out)
|
||||
|
||||
|
||||
dummy :
|
||||
|
||||
|
|
|
@ -83,7 +83,7 @@ ifeq ($(OSNAME), Darwin)
|
|||
endif
|
||||
ifeq ($(OSNAME), WINNT)
|
||||
@-cp $(LIBDLLNAME) $(OPENBLAS_BINARY_DIR)
|
||||
@-cp $(LIBPREFIX).lib $(OPENBLAS_LIBRARY_DIR)
|
||||
@-cp $(LIBDLLNAME).a $(OPENBLAS_LIBRARY_DIR)
|
||||
endif
|
||||
ifeq ($(OSNAME), CYGWIN_NT)
|
||||
@-cp $(LIBDLLNAME) $(OPENBLAS_BINARY_DIR)
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
#
|
||||
|
||||
# This library's version
|
||||
VERSION = 0.2.9
|
||||
VERSION = 0.2.10.rc1
|
||||
|
||||
# If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a
|
||||
# and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library
|
||||
|
@ -133,7 +133,8 @@ NO_AFFINITY = 1
|
|||
# COMMON_OPT = -O2
|
||||
|
||||
# gfortran option for LAPACK
|
||||
FCOMMON_OPT = -frecursive
|
||||
# enable this flag only on 64bit Linux and if you need a thread safe lapack library
|
||||
# FCOMMON_OPT = -frecursive
|
||||
|
||||
# Profiling flags
|
||||
COMMON_PROF = -pg
|
||||
|
|
|
@ -46,15 +46,55 @@ ifdef TARGET
|
|||
GETARCH_FLAGS := -DFORCE_$(TARGET)
|
||||
endif
|
||||
|
||||
# Force fallbacks for 32bit
|
||||
|
||||
ifeq ($(BINARY), 32)
|
||||
ifeq ($(TARGET), HASWELL)
|
||||
GETARCH_FLAGS := -DFORCE_NEHALEM
|
||||
endif
|
||||
ifeq ($(TARGET), SANDYBRIDGE)
|
||||
GETARCH_FLAGS := -DFORCE_NEHALEM
|
||||
endif
|
||||
ifeq ($(TARGET), BULLDOZER)
|
||||
GETARCH_FLAGS := -DFORCE_BARCELONA
|
||||
endif
|
||||
ifeq ($(TARGET), PILEDRIVER)
|
||||
GETARCH_FLAGS := -DFORCE_BARCELONA
|
||||
endif
|
||||
endif
|
||||
|
||||
|
||||
#TARGET_CORE will override TARGET which is used in DYNAMIC_ARCH=1.
|
||||
#
|
||||
ifdef TARGET_CORE
|
||||
GETARCH_FLAGS := -DFORCE_$(TARGET_CORE)
|
||||
endif
|
||||
|
||||
# Force fallbacks for 32bit
|
||||
|
||||
ifeq ($(BINARY), 32)
|
||||
ifeq ($(TARGET_CORE), HASWELL)
|
||||
GETARCH_FLAGS := -DFORCE_NEHALEM
|
||||
endif
|
||||
ifeq ($(TARGET_CORE), SANDYBRIDGE)
|
||||
GETARCH_FLAGS := -DFORCE_NEHALEM
|
||||
endif
|
||||
ifeq ($(TARGET_CORE), BULLDOZER)
|
||||
GETARCH_FLAGS := -DFORCE_BARCELONA
|
||||
endif
|
||||
ifeq ($(TARGET_CORE), PILEDRIVER)
|
||||
GETARCH_FLAGS := -DFORCE_BARCELONA
|
||||
endif
|
||||
endif
|
||||
|
||||
|
||||
|
||||
|
||||
ifdef INTERFACE64
|
||||
ifneq ($(INTERFACE64), 0)
|
||||
GETARCH_FLAGS += -DUSE64BITINT
|
||||
endif
|
||||
endif
|
||||
|
||||
ifndef GEMM_MULTITHREAD_THRESHOLD
|
||||
GEMM_MULTITHREAD_THRESHOLD=4
|
||||
|
@ -65,6 +105,10 @@ ifeq ($(NO_AVX), 1)
|
|||
GETARCH_FLAGS += -DNO_AVX
|
||||
endif
|
||||
|
||||
ifeq ($(BINARY), 32)
|
||||
GETARCH_FLAGS += -DNO_AVX
|
||||
endif
|
||||
|
||||
ifeq ($(DEBUG), 1)
|
||||
GETARCH_FLAGS += -g
|
||||
endif
|
||||
|
@ -336,9 +380,6 @@ ifeq ($(DYNAMIC_ARCH), 1)
|
|||
ifeq ($(ARCH), x86)
|
||||
DYNAMIC_CORE = KATMAI COPPERMINE NORTHWOOD PRESCOTT BANIAS \
|
||||
CORE2 PENRYN DUNNINGTON NEHALEM ATHLON OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO
|
||||
ifneq ($(NO_AVX), 1)
|
||||
DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER HASWELL
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(ARCH), x86_64)
|
||||
|
@ -503,8 +544,10 @@ else
|
|||
ifdef BINARY64
|
||||
FCOMMON_OPT += -m64
|
||||
ifdef INTERFACE64
|
||||
ifneq ($(INTERFACE64), 0)
|
||||
FCOMMON_OPT += -fdefault-integer-8
|
||||
endif
|
||||
endif
|
||||
else
|
||||
FCOMMON_OPT += -m32
|
||||
endif
|
||||
|
@ -517,8 +560,10 @@ endif
|
|||
ifeq ($(F_COMPILER), INTEL)
|
||||
CCOMMON_OPT += -DF_INTERFACE_INTEL
|
||||
ifdef INTERFACE64
|
||||
ifneq ($(INTERFACE64), 0)
|
||||
FCOMMON_OPT += -i8
|
||||
endif
|
||||
endif
|
||||
ifdef USE_OPENMP
|
||||
FCOMMON_OPT += -openmp
|
||||
endif
|
||||
|
@ -537,8 +582,10 @@ CCOMMON_OPT += -DF_INTERFACE_IBM
|
|||
ifdef BINARY64
|
||||
FCOMMON_OPT += -q64
|
||||
ifdef INTERFACE64
|
||||
ifneq ($(INTERFACE64), 0)
|
||||
FCOMMON_OPT += -qintsize=8
|
||||
endif
|
||||
endif
|
||||
else
|
||||
FCOMMON_OPT += -q32
|
||||
endif
|
||||
|
@ -552,8 +599,10 @@ CCOMMON_OPT += -DF_INTERFACE_PGI
|
|||
COMMON_PROF += -DPGICOMPILER
|
||||
ifdef BINARY64
|
||||
ifdef INTERFACE64
|
||||
ifneq ($(INTERFACE64), 0)
|
||||
FCOMMON_OPT += -i8
|
||||
endif
|
||||
endif
|
||||
FCOMMON_OPT += -tp p7-64
|
||||
else
|
||||
FCOMMON_OPT += -tp p7
|
||||
|
@ -567,9 +616,11 @@ ifeq ($(F_COMPILER), PATHSCALE)
|
|||
CCOMMON_OPT += -DF_INTERFACE_PATHSCALE
|
||||
ifdef BINARY64
|
||||
ifdef INTERFACE64
|
||||
ifneq ($(INTERFACE64), 0)
|
||||
FCOMMON_OPT += -i8
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifneq ($(ARCH), mips64)
|
||||
ifndef BINARY64
|
||||
|
@ -594,9 +645,11 @@ ifeq ($(F_COMPILER), OPEN64)
|
|||
CCOMMON_OPT += -DF_INTERFACE_OPEN64
|
||||
ifdef BINARY64
|
||||
ifdef INTERFACE64
|
||||
ifneq ($(INTERFACE64), 0)
|
||||
FCOMMON_OPT += -i8
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(ARCH), mips64)
|
||||
ifndef BINARY64
|
||||
|
@ -682,10 +735,12 @@ endif
|
|||
|
||||
ifdef BINARY64
|
||||
ifdef INTERFACE64
|
||||
ifneq ($(INTERFACE64), 0)
|
||||
CCOMMON_OPT +=
|
||||
#-DUSE64BITINT
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(NEED_PIC), 1)
|
||||
ifeq ($(C_COMPILER), IBM)
|
||||
|
@ -718,6 +773,10 @@ ifeq ($(NO_AVX), 1)
|
|||
CCOMMON_OPT += -DNO_AVX
|
||||
endif
|
||||
|
||||
ifeq ($(BINARY), 32)
|
||||
CCOMMON_OPT += -DNO_AVX
|
||||
endif
|
||||
|
||||
ifdef SMP
|
||||
CCOMMON_OPT += -DSMP_SERVER
|
||||
|
||||
|
@ -872,8 +931,11 @@ endif
|
|||
LAPACK_CFLAGS = $(CFLAGS)
|
||||
LAPACK_CFLAGS += -DHAVE_LAPACK_CONFIG_H
|
||||
ifdef INTERFACE64
|
||||
ifneq ($(INTERFACE64), 0)
|
||||
LAPACK_CFLAGS += -DLAPACK_ILP64
|
||||
endif
|
||||
endif
|
||||
|
||||
ifdef OS_WINDOWS
|
||||
LAPACK_CFLAGS += -DOPENBLAS_OS_WINDOWS
|
||||
endif
|
||||
|
|
10
cblas.h
10
cblas.h
|
@ -305,6 +305,16 @@ void cblas_zher2k(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBL
|
|||
|
||||
void cblas_xerbla(blasint p, char *rout, char *form, ...);
|
||||
|
||||
/*** BLAS extensions ***/
|
||||
|
||||
void cblas_saxpby(OPENBLAS_CONST blasint n, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx,OPENBLAS_CONST float beta, float *y, OPENBLAS_CONST blasint incy);
|
||||
|
||||
void cblas_daxpby(OPENBLAS_CONST blasint n, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx,OPENBLAS_CONST double beta, double *y, OPENBLAS_CONST blasint incy);
|
||||
|
||||
void cblas_caxpby(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx,OPENBLAS_CONST float *beta, float *y, OPENBLAS_CONST blasint incy);
|
||||
|
||||
void cblas_zaxpby(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx,OPENBLAS_CONST double *beta, double *y, OPENBLAS_CONST blasint incy);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif /* __cplusplus */
|
||||
|
|
|
@ -296,6 +296,17 @@ void cblas_zher2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANS
|
|||
|
||||
void cblas_xerbla(blasint p, char *rout, char *form, ...);
|
||||
|
||||
/*** BLAS extensions ***/
|
||||
|
||||
void cblas_saxpby(blasint n, float alpha, float *x, blasint incx,float beta, float *y, blasint incy);
|
||||
|
||||
void cblas_daxpby(blasint n, double alpha, double *x, blasint incx,double beta, double *y, blasint incy);
|
||||
|
||||
void cblas_caxpby(blasint n, float *alpha, float *x, blasint incx,float *beta, float *y, blasint incy);
|
||||
|
||||
void cblas_zaxpby(blasint n, double *alpha, double *x, blasint incx,double *beta, double *y, blasint incy);
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif /* __cplusplus */
|
||||
|
|
23
common.h
23
common.h
|
@ -388,6 +388,15 @@ please https://github.com/xianyi/OpenBLAS/issues/246
|
|||
#include "common_arm64.h"
|
||||
#endif
|
||||
|
||||
#ifndef ASSEMBLER
|
||||
#ifdef OS_WINDOWS
|
||||
typedef char env_var_t[MAX_PATH];
|
||||
#define readenv(p, n) GetEnvironmentVariable((n), (p), sizeof(p))
|
||||
#else
|
||||
typedef char* env_var_t;
|
||||
#define readenv(p, n) ((p)=getenv(n))
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef OS_LINUX
|
||||
#include "common_linux.h"
|
||||
|
@ -515,13 +524,9 @@ static __inline void blas_unlock(volatile BLASULONG *address){
|
|||
*address = 0;
|
||||
}
|
||||
|
||||
static __inline int readenv(char *env) {
|
||||
|
||||
char *p;
|
||||
|
||||
p = getenv(env);
|
||||
|
||||
if (p == NULL) return 0; else return atoi(p);
|
||||
static __inline int readenv_atoi(char *env) {
|
||||
env_var_t p;
|
||||
return readenv(p,env) ? 0 : atoi(p);
|
||||
}
|
||||
|
||||
|
||||
|
@ -687,8 +692,8 @@ extern int gotoblas_profile;
|
|||
#define PRINT_DEBUG_CNAME
|
||||
#define PRINT_DEBUG_NAME
|
||||
#else
|
||||
#define PRINT_DEBUG_CNAME if (readenv("GOTO_DEBUG")) fprintf(stderr, "GotoBLAS : %s\n", CHAR_CNAME)
|
||||
#define PRINT_DEBUG_NAME if (readenv("GOTO_DEBUG")) fprintf(stderr, "GotoBLAS : %s\n", CHAR_NAME)
|
||||
#define PRINT_DEBUG_CNAME if (readenv_atoi("GOTO_DEBUG")) fprintf(stderr, "GotoBLAS : %s\n", CHAR_CNAME)
|
||||
#define PRINT_DEBUG_NAME if (readenv_atoi("GOTO_DEBUG")) fprintf(stderr, "GotoBLAS : %s\n", CHAR_NAME)
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
|
23
common_c.h
23
common_c.h
|
@ -209,6 +209,18 @@
|
|||
#define CNEG_TCOPY cneg_tcopy
|
||||
#define CLASWP_NCOPY claswp_ncopy
|
||||
|
||||
#define CAXPBY_K caxpby_k
|
||||
|
||||
#define COMATCOPY_K_CN comatcopy_k_cn
|
||||
#define COMATCOPY_K_RN comatcopy_k_rn
|
||||
#define COMATCOPY_K_CT comatcopy_k_ct
|
||||
#define COMATCOPY_K_RT comatcopy_k_rt
|
||||
#define COMATCOPY_K_CNC comatcopy_k_cnc
|
||||
#define COMATCOPY_K_RNC comatcopy_k_rnc
|
||||
#define COMATCOPY_K_CTC comatcopy_k_ctc
|
||||
#define COMATCOPY_K_RTC comatcopy_k_rtc
|
||||
|
||||
|
||||
#else
|
||||
|
||||
#define CAMAX_K gotoblas -> camax_k
|
||||
|
@ -380,6 +392,17 @@
|
|||
#define CNEG_TCOPY gotoblas -> cneg_tcopy
|
||||
#define CLASWP_NCOPY gotoblas -> claswp_ncopy
|
||||
|
||||
#define CAXPBY_K gotoblas -> caxpby_k
|
||||
|
||||
#define COMATCOPY_K_CN gotoblas -> comatcopy_k_cn
|
||||
#define COMATCOPY_K_RN gotoblas -> comatcopy_k_rn
|
||||
#define COMATCOPY_K_CT gotoblas -> comatcopy_k_ct
|
||||
#define COMATCOPY_K_RT gotoblas -> comatcopy_k_rt
|
||||
#define COMATCOPY_K_CNC gotoblas -> comatcopy_k_cnc
|
||||
#define COMATCOPY_K_RNC gotoblas -> comatcopy_k_rnc
|
||||
#define COMATCOPY_K_CTC gotoblas -> comatcopy_k_ctc
|
||||
#define COMATCOPY_K_RTC gotoblas -> comatcopy_k_rtc
|
||||
|
||||
#endif
|
||||
|
||||
#define CGEMM_NN cgemm_nn
|
||||
|
|
12
common_d.h
12
common_d.h
|
@ -144,6 +144,12 @@
|
|||
#define DNEG_TCOPY dneg_tcopy
|
||||
#define DLASWP_NCOPY dlaswp_ncopy
|
||||
|
||||
#define DAXPBY_K daxpby_k
|
||||
#define DOMATCOPY_K_CN domatcopy_k_cn
|
||||
#define DOMATCOPY_K_RN domatcopy_k_rn
|
||||
#define DOMATCOPY_K_CT domatcopy_k_ct
|
||||
#define DOMATCOPY_K_RT domatcopy_k_rt
|
||||
|
||||
#else
|
||||
|
||||
#define DAMAX_K gotoblas -> damax_k
|
||||
|
@ -255,6 +261,12 @@
|
|||
#define DNEG_TCOPY gotoblas -> dneg_tcopy
|
||||
#define DLASWP_NCOPY gotoblas -> dlaswp_ncopy
|
||||
|
||||
#define DAXPBY_K gotoblas -> daxpby_k
|
||||
#define DOMATCOPY_K_CN gotoblas -> domatcopy_k_cn
|
||||
#define DOMATCOPY_K_RN gotoblas -> domatcopy_k_rn
|
||||
#define DOMATCOPY_K_CT gotoblas -> domatcopy_k_ct
|
||||
#define DOMATCOPY_K_RT gotoblas -> domatcopy_k_rt
|
||||
|
||||
#endif
|
||||
|
||||
#define DGEMM_NN dgemm_nn
|
||||
|
|
|
@ -757,6 +757,23 @@ FLOATRET BLASFUNC(slamc3)(float *, float *);
|
|||
double BLASFUNC(dlamc3)(double *, double *);
|
||||
xdouble BLASFUNC(qlamc3)(xdouble *, xdouble *);
|
||||
|
||||
/* BLAS extensions */
|
||||
|
||||
void BLASFUNC(saxpby) (blasint *, float *, float *, blasint *, float *, float *, blasint *);
|
||||
void BLASFUNC(daxpby) (blasint *, double *, double *, blasint *, double *, double *, blasint *);
|
||||
void BLASFUNC(caxpby) (blasint *, float *, float *, blasint *, float *, float *, blasint *);
|
||||
void BLASFUNC(zaxpby) (blasint *, double *, double *, blasint *, double *, double *, blasint *);
|
||||
|
||||
void BLASFUNC(somatcopy) (char *, char *, blasint *, blasint *, float *, float *, blasint *, float *, blasint *);
|
||||
void BLASFUNC(domatcopy) (char *, char *, blasint *, blasint *, double *, double *, blasint *, double *, blasint *);
|
||||
void BLASFUNC(comatcopy) (char *, char *, blasint *, blasint *, float *, float *, blasint *, float *, blasint *);
|
||||
void BLASFUNC(zomatcopy) (char *, char *, blasint *, blasint *, double *, double *, blasint *, double *, blasint *);
|
||||
|
||||
void BLASFUNC(simatcopy) (char *, char *, blasint *, blasint *, float *, float *, blasint *, blasint *);
|
||||
void BLASFUNC(dimatcopy) (char *, char *, blasint *, blasint *, double *, double *, blasint *, blasint *);
|
||||
void BLASFUNC(cimatcopy) (char *, char *, blasint *, blasint *, float *, float *, blasint *, blasint *);
|
||||
void BLASFUNC(zimatcopy) (char *, char *, blasint *, blasint *, double *, double *, blasint *, blasint *);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
||||
|
|
|
@ -204,6 +204,13 @@ int srotm_k (BLASLONG, float, BLASLONG, float, BLASLONG, float);
|
|||
int drotm_k (BLASLONG, double, BLASLONG, double, BLASLONG, double);
|
||||
int qrotm_k (BLASLONG, xdouble, BLASLONG, xdouble, BLASLONG, xdouble);
|
||||
|
||||
|
||||
int saxpby_k (BLASLONG, float, float *, BLASLONG, float, float *, BLASLONG);
|
||||
int daxpby_k (BLASLONG, double, double *, BLASLONG, double, double *, BLASLONG);
|
||||
int caxpby_k (BLASLONG, float, float, float *, BLASLONG, float, float, float *, BLASLONG);
|
||||
int zaxpby_k (BLASLONG, double, double, double *, BLASLONG, double, double, double *, BLASLONG);
|
||||
|
||||
|
||||
#ifdef __CUDACC__
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -1732,6 +1732,37 @@ int zgemc_otcopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, double *b, BLA
|
|||
int xgemc_oncopy(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, xdouble *b, BLASLONG ldb, xdouble *c);
|
||||
int xgemc_otcopy(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, xdouble *b, BLASLONG ldb, xdouble *c);
|
||||
|
||||
int somatcopy_k_cn(BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG);
|
||||
int somatcopy_k_rn(BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG);
|
||||
int somatcopy_k_ct(BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG);
|
||||
int somatcopy_k_rt(BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG);
|
||||
|
||||
int domatcopy_k_cn(BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG);
|
||||
int domatcopy_k_rn(BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG);
|
||||
int domatcopy_k_ct(BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG);
|
||||
int domatcopy_k_rt(BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG);
|
||||
|
||||
int comatcopy_k_cn(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG);
|
||||
int comatcopy_k_rn(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG);
|
||||
int comatcopy_k_ct(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG);
|
||||
int comatcopy_k_rt(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG);
|
||||
|
||||
int comatcopy_k_cnc(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG);
|
||||
int comatcopy_k_rnc(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG);
|
||||
int comatcopy_k_ctc(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG);
|
||||
int comatcopy_k_rtc(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG);
|
||||
|
||||
int zomatcopy_k_cn(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG);
|
||||
int zomatcopy_k_rn(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG);
|
||||
int zomatcopy_k_ct(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG);
|
||||
int zomatcopy_k_rt(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG);
|
||||
|
||||
int zomatcopy_k_cnc(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG);
|
||||
int zomatcopy_k_rnc(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG);
|
||||
int zomatcopy_k_ctc(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG);
|
||||
int zomatcopy_k_rtc(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG);
|
||||
|
||||
|
||||
#ifdef __CUDACC__
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -628,6 +628,13 @@
|
|||
#define HERK_THREAD_LR DSYRK_THREAD_LN
|
||||
#define HERK_THREAD_LC DSYRK_THREAD_LT
|
||||
|
||||
#define AXPBY_K DAXPBY_K
|
||||
|
||||
#define OMATCOPY_K_CN DOMATCOPY_K_CN
|
||||
#define OMATCOPY_K_RN DOMATCOPY_K_RN
|
||||
#define OMATCOPY_K_CT DOMATCOPY_K_CT
|
||||
#define OMATCOPY_K_RT DOMATCOPY_K_RT
|
||||
|
||||
#else
|
||||
|
||||
#define AMAX_K SAMAX_K
|
||||
|
@ -918,6 +925,13 @@
|
|||
#define HERK_THREAD_LR SSYRK_THREAD_LN
|
||||
#define HERK_THREAD_LC SSYRK_THREAD_LT
|
||||
|
||||
#define AXPBY_K SAXPBY_K
|
||||
|
||||
#define OMATCOPY_K_CN SOMATCOPY_K_CN
|
||||
#define OMATCOPY_K_RN SOMATCOPY_K_RN
|
||||
#define OMATCOPY_K_CT SOMATCOPY_K_CT
|
||||
#define OMATCOPY_K_RT SOMATCOPY_K_RT
|
||||
|
||||
#endif
|
||||
#else
|
||||
#ifdef XDOUBLE
|
||||
|
@ -1722,6 +1736,17 @@
|
|||
#define SYMM_OUTCOPY ZSYMM_OUTCOPY
|
||||
#define SYMM_OLTCOPY ZSYMM_OLTCOPY
|
||||
|
||||
#define AXPBY_K ZAXPBY_K
|
||||
|
||||
#define OMATCOPY_K_CN ZOMATCOPY_K_CN
|
||||
#define OMATCOPY_K_RN ZOMATCOPY_K_RN
|
||||
#define OMATCOPY_K_CT ZOMATCOPY_K_CT
|
||||
#define OMATCOPY_K_RT ZOMATCOPY_K_RT
|
||||
#define OMATCOPY_K_CNC ZOMATCOPY_K_CNC
|
||||
#define OMATCOPY_K_RNC ZOMATCOPY_K_RNC
|
||||
#define OMATCOPY_K_CTC ZOMATCOPY_K_CTC
|
||||
#define OMATCOPY_K_RTC ZOMATCOPY_K_RTC
|
||||
|
||||
#else
|
||||
|
||||
#define AMAX_K CAMAX_K
|
||||
|
@ -2123,6 +2148,17 @@
|
|||
#define SYMM_OUTCOPY CSYMM_OUTCOPY
|
||||
#define SYMM_OLTCOPY CSYMM_OLTCOPY
|
||||
|
||||
#define AXPBY_K CAXPBY_K
|
||||
|
||||
#define OMATCOPY_K_CN COMATCOPY_K_CN
|
||||
#define OMATCOPY_K_RN COMATCOPY_K_RN
|
||||
#define OMATCOPY_K_CT COMATCOPY_K_CT
|
||||
#define OMATCOPY_K_RT COMATCOPY_K_RT
|
||||
#define OMATCOPY_K_CNC COMATCOPY_K_CNC
|
||||
#define OMATCOPY_K_RNC COMATCOPY_K_RNC
|
||||
#define OMATCOPY_K_CTC COMATCOPY_K_CTC
|
||||
#define OMATCOPY_K_RTC COMATCOPY_K_RTC
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
|
|
@ -806,10 +806,47 @@ BLASLONG (*ixamin_k)(BLASLONG, xdouble *, BLASLONG);
|
|||
|
||||
#endif
|
||||
|
||||
|
||||
void (*init)(void);
|
||||
|
||||
int snum_opt, dnum_opt, qnum_opt;
|
||||
|
||||
int (*saxpby_k) (BLASLONG, float, float*, BLASLONG,float, float*, BLASLONG);
|
||||
int (*daxpby_k) (BLASLONG, double, double*, BLASLONG,double, double*, BLASLONG);
|
||||
int (*caxpby_k) (BLASLONG, float, float, float*, BLASLONG,float,float, float*, BLASLONG);
|
||||
int (*zaxpby_k) (BLASLONG, double, double, double*, BLASLONG,double,double, double*, BLASLONG);
|
||||
|
||||
int (*somatcopy_k_cn) (BLASLONG, BLASLONG, float, float*, BLASLONG, float*, BLASLONG);
|
||||
int (*somatcopy_k_ct) (BLASLONG, BLASLONG, float, float*, BLASLONG, float*, BLASLONG);
|
||||
int (*somatcopy_k_rn) (BLASLONG, BLASLONG, float, float*, BLASLONG, float*, BLASLONG);
|
||||
int (*somatcopy_k_rt) (BLASLONG, BLASLONG, float, float*, BLASLONG, float*, BLASLONG);
|
||||
|
||||
int (*domatcopy_k_cn) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG);
|
||||
int (*domatcopy_k_ct) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG);
|
||||
int (*domatcopy_k_rn) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG);
|
||||
int (*domatcopy_k_rt) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG);
|
||||
|
||||
int (*comatcopy_k_cn) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG);
|
||||
int (*comatcopy_k_ct) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG);
|
||||
int (*comatcopy_k_rn) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG);
|
||||
int (*comatcopy_k_rt) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG);
|
||||
|
||||
int (*comatcopy_k_cnc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG);
|
||||
int (*comatcopy_k_ctc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG);
|
||||
int (*comatcopy_k_rnc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG);
|
||||
int (*comatcopy_k_rtc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG);
|
||||
|
||||
int (*zomatcopy_k_cn) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG);
|
||||
int (*zomatcopy_k_ct) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG);
|
||||
int (*zomatcopy_k_rn) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG);
|
||||
int (*zomatcopy_k_rt) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG);
|
||||
|
||||
int (*zomatcopy_k_cnc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG);
|
||||
int (*zomatcopy_k_ctc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG);
|
||||
int (*zomatcopy_k_rnc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG);
|
||||
int (*zomatcopy_k_rtc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG);
|
||||
|
||||
|
||||
} gotoblas_t;
|
||||
|
||||
extern gotoblas_t *gotoblas;
|
||||
|
|
16
common_s.h
16
common_s.h
|
@ -146,6 +146,14 @@
|
|||
#define SNEG_TCOPY sneg_tcopy
|
||||
#define SLASWP_NCOPY slaswp_ncopy
|
||||
|
||||
#define SAXPBY_K saxpby_k
|
||||
|
||||
#define SOMATCOPY_K_CN somatcopy_k_cn
|
||||
#define SOMATCOPY_K_RN somatcopy_k_rn
|
||||
#define SOMATCOPY_K_CT somatcopy_k_ct
|
||||
#define SOMATCOPY_K_RT somatcopy_k_rt
|
||||
|
||||
|
||||
#else
|
||||
|
||||
#define SAMAX_K gotoblas -> samax_k
|
||||
|
@ -259,6 +267,14 @@
|
|||
#define SNEG_TCOPY gotoblas -> sneg_tcopy
|
||||
#define SLASWP_NCOPY gotoblas -> slaswp_ncopy
|
||||
|
||||
#define SAXPBY_K gotoblas -> saxpby_k
|
||||
|
||||
#define SOMATCOPY_K_CN gotoblas -> somatcopy_k_cn
|
||||
#define SOMATCOPY_K_RN gotoblas -> somatcopy_k_rn
|
||||
#define SOMATCOPY_K_CT gotoblas -> somatcopy_k_ct
|
||||
#define SOMATCOPY_K_RT gotoblas -> somatcopy_k_rt
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
#define SGEMM_NN sgemm_nn
|
||||
|
|
23
common_z.h
23
common_z.h
|
@ -209,6 +209,18 @@
|
|||
#define ZNEG_TCOPY zneg_tcopy
|
||||
#define ZLASWP_NCOPY zlaswp_ncopy
|
||||
|
||||
#define ZAXPBY_K zaxpby_k
|
||||
|
||||
#define ZOMATCOPY_K_CN zomatcopy_k_cn
|
||||
#define ZOMATCOPY_K_RN zomatcopy_k_rn
|
||||
#define ZOMATCOPY_K_CT zomatcopy_k_ct
|
||||
#define ZOMATCOPY_K_RT zomatcopy_k_rt
|
||||
#define ZOMATCOPY_K_CNC zomatcopy_k_cnc
|
||||
#define ZOMATCOPY_K_RNC zomatcopy_k_rnc
|
||||
#define ZOMATCOPY_K_CTC zomatcopy_k_ctc
|
||||
#define ZOMATCOPY_K_RTC zomatcopy_k_rtc
|
||||
|
||||
|
||||
#else
|
||||
|
||||
#define ZAMAX_K gotoblas -> zamax_k
|
||||
|
@ -380,6 +392,17 @@
|
|||
#define ZNEG_TCOPY gotoblas -> zneg_tcopy
|
||||
#define ZLASWP_NCOPY gotoblas -> zlaswp_ncopy
|
||||
|
||||
#define ZAXPBY_K gotoblas -> zaxpby_k
|
||||
|
||||
#define ZOMATCOPY_K_CN gotoblas -> zomatcopy_k_cn
|
||||
#define ZOMATCOPY_K_RN gotoblas -> zomatcopy_k_rn
|
||||
#define ZOMATCOPY_K_CT gotoblas -> zomatcopy_k_ct
|
||||
#define ZOMATCOPY_K_RT gotoblas -> zomatcopy_k_rt
|
||||
#define ZOMATCOPY_K_CNC gotoblas -> zomatcopy_k_cnc
|
||||
#define ZOMATCOPY_K_RNC gotoblas -> zomatcopy_k_rnc
|
||||
#define ZOMATCOPY_K_CTC gotoblas -> zomatcopy_k_ctc
|
||||
#define ZOMATCOPY_K_RTC gotoblas -> zomatcopy_k_rtc
|
||||
|
||||
#endif
|
||||
|
||||
#define ZGEMM_NN zgemm_nn
|
||||
|
|
|
@ -40,6 +40,7 @@
|
|||
#include <string.h>
|
||||
#include "cpuid.h"
|
||||
|
||||
/*
|
||||
#ifdef NO_AVX
|
||||
#define CPUTYPE_HASWELL CPUTYPE_NEHALEM
|
||||
#define CORE_HASWELL CORE_NEHALEM
|
||||
|
@ -50,6 +51,7 @@
|
|||
#define CPUTYPE_PILEDRIVER CPUTYPE_BARCELONA
|
||||
#define CORE_PILEDRIVER CORE_BARCELONA
|
||||
#endif
|
||||
*/
|
||||
|
||||
#ifndef CPUIDEMU
|
||||
|
||||
|
|
|
@ -533,18 +533,15 @@ int blas_thread_init(void){
|
|||
|
||||
if (!blas_server_avail){
|
||||
|
||||
char *p;
|
||||
env_var_t p;
|
||||
|
||||
p = getenv("THREAD_TIMEOUT");
|
||||
|
||||
if (p) {
|
||||
if (readenv(p,"THREAD_TIMEOUT")) {
|
||||
thread_timeout = atoi(p);
|
||||
if (thread_timeout < 4) thread_timeout = 4;
|
||||
if (thread_timeout > 30) thread_timeout = 30;
|
||||
thread_timeout = (1 << thread_timeout);
|
||||
}else{
|
||||
p = getenv("GOTO_THREAD_TIMEOUT");
|
||||
if (p) {
|
||||
if (readenv(p,"GOTO_THREAD_TIMEOUT")) {
|
||||
thread_timeout = atoi(p);
|
||||
if (thread_timeout < 4) thread_timeout = 4;
|
||||
if (thread_timeout > 30) thread_timeout = 30;
|
||||
|
|
|
@ -39,7 +39,7 @@
|
|||
#include "common.h"
|
||||
|
||||
#ifdef SMP
|
||||
#ifndef USE64BITINT
|
||||
#if !defined(USE64BITINT) || defined(ARCH_X86)
|
||||
unsigned int blas_quick_divide_table[] = {
|
||||
0x00000000, 0x00000001, 0x80000001, 0x55555556,
|
||||
0x40000001, 0x33333334, 0x2aaaaaab, 0x24924925,
|
||||
|
|
|
@ -116,18 +116,24 @@ extern void openblas_warning(int verbose, const char * msg);
|
|||
|
||||
static int get_vendor(void){
|
||||
int eax, ebx, ecx, edx;
|
||||
char vendor[13];
|
||||
|
||||
union
|
||||
{
|
||||
char vchar[16];
|
||||
int vint[4];
|
||||
} vendor;
|
||||
|
||||
cpuid(0, &eax, &ebx, &ecx, &edx);
|
||||
|
||||
*(int *)(&vendor[0]) = ebx;
|
||||
*(int *)(&vendor[4]) = edx;
|
||||
*(int *)(&vendor[8]) = ecx;
|
||||
vendor[12] = (char)0;
|
||||
*(&vendor.vint[0]) = ebx;
|
||||
*(&vendor.vint[1]) = edx;
|
||||
*(&vendor.vint[2]) = ecx;
|
||||
|
||||
if (!strcmp(vendor, "GenuineIntel")) return VENDOR_INTEL;
|
||||
if (!strcmp(vendor, "AuthenticAMD")) return VENDOR_AMD;
|
||||
if (!strcmp(vendor, "CentaurHauls")) return VENDOR_CENTAUR;
|
||||
vendor.vchar[12] = '\0';
|
||||
|
||||
if (!strcmp(vendor.vchar, "GenuineIntel")) return VENDOR_INTEL;
|
||||
if (!strcmp(vendor.vchar, "AuthenticAMD")) return VENDOR_AMD;
|
||||
if (!strcmp(vendor.vchar, "CentaurHauls")) return VENDOR_CENTAUR;
|
||||
|
||||
if ((eax == 0) || ((eax & 0x500) != 0)) return VENDOR_INTEL;
|
||||
|
||||
|
@ -232,7 +238,7 @@ static gotoblas_t *get_coretype(void){
|
|||
if (family <= 0xe) {
|
||||
// Verify that CPU has 3dnow and 3dnowext before claiming it is Athlon
|
||||
cpuid(0x80000000, &eax, &ebx, &ecx, &edx);
|
||||
if (eax & 0xffff >= 0x01) {
|
||||
if ( (eax & 0xffff) >= 0x01) {
|
||||
cpuid(0x80000001, &eax, &ebx, &ecx, &edx);
|
||||
if ((edx & (1 << 30)) == 0 || (edx & (1 << 31)) == 0)
|
||||
return NULL;
|
||||
|
@ -333,11 +339,79 @@ char *gotoblas_corename(void) {
|
|||
return corename[0];
|
||||
}
|
||||
|
||||
|
||||
static gotoblas_t *force_coretype(char *coretype){
|
||||
|
||||
int i ;
|
||||
int found = -1;
|
||||
char message[128];
|
||||
char mname[20];
|
||||
|
||||
for ( i=1 ; i <= 20; i++)
|
||||
{
|
||||
if (!strncasecmp(coretype,corename[i],20))
|
||||
{
|
||||
found = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (found < 0)
|
||||
{
|
||||
strncpy(mname,coretype,20);
|
||||
sprintf(message, "Core not found: %s\n",mname);
|
||||
openblas_warning(1, message);
|
||||
return(NULL);
|
||||
}
|
||||
|
||||
switch (found)
|
||||
{
|
||||
|
||||
case 20: return (&gotoblas_HASWELL);
|
||||
case 19: return (&gotoblas_PILEDRIVER);
|
||||
case 18: return (&gotoblas_BULLDOZER);
|
||||
case 17: return (&gotoblas_BOBCAT);
|
||||
case 16: return (&gotoblas_SANDYBRIDGE);
|
||||
case 15: return (&gotoblas_NANO);
|
||||
case 14: return (&gotoblas_BARCELONA);
|
||||
case 13: return (&gotoblas_OPTERON);
|
||||
case 12: return (&gotoblas_OPTERON_SSE3);
|
||||
case 11: return (&gotoblas_ATHLON);
|
||||
case 10: return (&gotoblas_NEHALEM);
|
||||
case 9: return (&gotoblas_DUNNINGTON);
|
||||
case 8: return (&gotoblas_PENRYN);
|
||||
case 7: return (&gotoblas_CORE2);
|
||||
case 6: return (&gotoblas_ATOM);
|
||||
case 5: return (&gotoblas_BANIAS);
|
||||
case 4: return (&gotoblas_PRESCOTT);
|
||||
case 3: return (&gotoblas_NORTHWOOD);
|
||||
case 2: return (&gotoblas_COPPERMINE);
|
||||
case 1: return (&gotoblas_KATMAI);
|
||||
}
|
||||
return(NULL);
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
void gotoblas_dynamic_init(void) {
|
||||
|
||||
char coremsg[128];
|
||||
char coren[22];
|
||||
char *p;
|
||||
|
||||
|
||||
if (gotoblas) return;
|
||||
|
||||
p = getenv("OPENBLAS_CORETYPE");
|
||||
if ( p )
|
||||
{
|
||||
gotoblas = force_coretype(p);
|
||||
}
|
||||
else
|
||||
{
|
||||
gotoblas = get_coretype();
|
||||
}
|
||||
|
||||
#ifdef ARCH_X86
|
||||
if (gotoblas == NULL) gotoblas = &gotoblas_KATMAI;
|
||||
|
@ -355,6 +429,9 @@ void gotoblas_dynamic_init(void) {
|
|||
#endif
|
||||
|
||||
if (gotoblas && gotoblas -> init) {
|
||||
strncpy(coren,gotoblas_corename(),20);
|
||||
sprintf(coremsg, "Core: %s\n",coren);
|
||||
openblas_warning(2, coremsg);
|
||||
gotoblas -> init();
|
||||
} else {
|
||||
openblas_warning(0, "OpenBLAS : Architecture Initialization failed. No initialization function found.\n");
|
||||
|
|
|
@ -698,11 +698,11 @@ void gotoblas_affinity_init(void) {
|
|||
#ifdef USE_OPENMP
|
||||
numprocs = 0;
|
||||
#else
|
||||
numprocs = readenv("OPENBLAS_NUM_THREADS");
|
||||
if (numprocs == 0) numprocs = readenv("GOTO_NUM_THREADS");
|
||||
numprocs = readenv_atoi("OPENBLAS_NUM_THREADS");
|
||||
if (numprocs == 0) numprocs = readenv_atoi("GOTO_NUM_THREADS");
|
||||
#endif
|
||||
|
||||
if (numprocs == 0) numprocs = readenv("OMP_NUM_THREADS");
|
||||
if (numprocs == 0) numprocs = readenv_atoi("OMP_NUM_THREADS");
|
||||
|
||||
numnodes = 1;
|
||||
|
||||
|
@ -793,7 +793,7 @@ void gotoblas_affinity_init(void) {
|
|||
|
||||
setup_mempolicy();
|
||||
|
||||
if (readenv("OPENBLAS_MAIN_FREE") || readenv("GOTOBLAS_MAIN_FREE")) {
|
||||
if (readenv_atoi("OPENBLAS_MAIN_FREE") || readenv_atoi("GOTOBLAS_MAIN_FREE")) {
|
||||
sched_setaffinity(0, sizeof(cpu_orig_mask), &cpu_orig_mask[0]);
|
||||
}
|
||||
|
||||
|
|
|
@ -273,7 +273,7 @@ void openblas_fork_handler()
|
|||
}
|
||||
|
||||
int blas_get_cpu_number(void){
|
||||
char *p;
|
||||
env_var_t p;
|
||||
#if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_DARWIN)
|
||||
int max_num;
|
||||
#endif
|
||||
|
@ -288,21 +288,18 @@ int blas_get_cpu_number(void){
|
|||
|
||||
blas_goto_num = 0;
|
||||
#ifndef USE_OPENMP
|
||||
p = getenv("OPENBLAS_NUM_THREADS");
|
||||
if (p) blas_goto_num = atoi(p);
|
||||
if (readenv(p,"OPENBLAS_NUM_THREADS")) blas_goto_num = atoi(p);
|
||||
if (blas_goto_num < 0) blas_goto_num = 0;
|
||||
|
||||
if (blas_goto_num == 0) {
|
||||
p = getenv("GOTO_NUM_THREADS");
|
||||
if (p) blas_goto_num = atoi(p);
|
||||
if (readenv(p,"GOTO_NUM_THREADS")) blas_goto_num = atoi(p);
|
||||
if (blas_goto_num < 0) blas_goto_num = 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
blas_omp_num = 0;
|
||||
p = getenv("OMP_NUM_THREADS");
|
||||
if (p) blas_omp_num = atoi(p);
|
||||
if (readenv(p,"OMP_NUM_THREADS")) blas_omp_num = atoi(p);
|
||||
if (blas_omp_num < 0) blas_omp_num = 0;
|
||||
|
||||
if (blas_goto_num > 0) blas_num_threads = blas_goto_num;
|
||||
|
@ -769,16 +766,23 @@ static void *alloc_hugetlb(void *address){
|
|||
tp.PrivilegeCount = 1;
|
||||
tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED;
|
||||
|
||||
if (LookupPrivilegeValue(NULL, SE_LOCK_MEMORY_NAME, &tp.Privileges[0].Luid) != TRUE) return (void *) -1;
|
||||
if (LookupPrivilegeValue(NULL, SE_LOCK_MEMORY_NAME, &tp.Privileges[0].Luid) != TRUE) {
|
||||
CloseHandle(hToken);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (AdjustTokenPrivileges(hToken, FALSE, (PTOKEN_PRIVILEGES)&tp, 0, NULL, NULL) != TRUE) return (void *) -1;
|
||||
if (AdjustTokenPrivileges(hToken, FALSE, &tp, 0, NULL, NULL) != TRUE) {
|
||||
CloseHandle(hToken);
|
||||
return -1;
|
||||
}
|
||||
|
||||
map_address = (void *)VirtualAlloc(address,
|
||||
BUFFER_SIZE,
|
||||
MEM_LARGE_PAGES | MEM_RESERVE | MEM_COMMIT,
|
||||
PAGE_READWRITE);
|
||||
|
||||
AdjustTokenPrivileges(hToken, TRUE, &tp, 0, (PTOKEN_PRIVILEGES)NULL, NULL);
|
||||
tp.Privileges[0].Attributes = 0;
|
||||
AdjustTokenPrivileges(hToken, FALSE, &tp, 0, NULL, NULL);
|
||||
|
||||
if (map_address == (void *)NULL) map_address = (void *)-1;
|
||||
|
||||
|
|
|
@ -35,9 +35,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
int openblas_verbose() {
|
||||
int ret=0;
|
||||
char *p;
|
||||
p = getenv("OPENBLAS_VERBOSE");
|
||||
if (p) ret = atoi(p);
|
||||
env_var_t p;
|
||||
if (readenv(p,"OPENBLAS_VERBOSE")) ret = atoi(p);
|
||||
if(ret<0) ret=0;
|
||||
return ret;
|
||||
}
|
||||
|
|
|
@ -248,7 +248,7 @@ int get_L2_size(void){
|
|||
|
||||
void blas_set_parameter(void){
|
||||
|
||||
char *p;
|
||||
env_var_t p;
|
||||
int factor;
|
||||
int size = get_L2_size();
|
||||
|
||||
|
@ -463,9 +463,8 @@ void blas_set_parameter(void){
|
|||
#endif
|
||||
#endif
|
||||
|
||||
p = getenv("GOTO_BLOCK_FACTOR");
|
||||
|
||||
if (p) {
|
||||
if (readenv(p,"GOTO_BLOCK_FACTOR")) {
|
||||
factor = atoi(p);
|
||||
if (factor < 10) factor = 10;
|
||||
if (factor > 200) factor = 200;
|
||||
|
|
|
@ -84,8 +84,8 @@ dll : ../$(LIBDLLNAME)
|
|||
../$(LIBDLLNAME) : ../$(LIBNAME) libopenblas.def dllinit.$(SUFFIX)
|
||||
$(RANLIB) ../$(LIBNAME)
|
||||
$(CC) $(CFLAGS) $(LDFLAGS) libopenblas.def dllinit.$(SUFFIX) \
|
||||
-shared -o ../$(LIBDLLNAME) -Wl,--out-implib,../$(LIBPREFIX).lib \
|
||||
-Wl,--whole-archive ../$(LIBNAME) -Wl,--no-whole-archive $(FEXTRALIB)
|
||||
-shared -o ../$(LIBDLLNAME) -Wl,--out-implib,../$(LIBDLLNAME).a \
|
||||
-Wl,--whole-archive ../$(LIBNAME) -Wl,--no-whole-archive $(FEXTRALIB) $(EXTRALIB)
|
||||
|
||||
libopenblas.def : gensymbol
|
||||
perl ./gensymbol win2k $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) > $(@F)
|
||||
|
|
|
@ -22,7 +22,9 @@
|
|||
zhbmv,zhemm,zhemv,zher2,zher2k,zher,zherk,zhpmv,zhpr2,
|
||||
zhpr,zrotg,zscal,zswap,zsymm,zsyr2k,zsyrk,ztbmv,
|
||||
ztbsv,ztpmv,ztpsv,ztrmm,ztrmv,ztrsm,ztrsv, zsymv,
|
||||
xerbla);
|
||||
xerbla,
|
||||
saxpby,daxpby,caxpby,zaxpby
|
||||
);
|
||||
|
||||
@cblasobjs = (
|
||||
cblas_caxpy, cblas_ccopy, cblas_cdotc, cblas_cdotu, cblas_cgbmv, cblas_cgemm, cblas_cgemv,
|
||||
|
@ -49,7 +51,9 @@
|
|||
cblas_zhemv, cblas_zher2, cblas_zher2k, cblas_zher, cblas_zherk, cblas_zhpmv, cblas_zhpr2,
|
||||
cblas_zhpr, cblas_zscal, cblas_zswap, cblas_zsymm, cblas_zsyr2k, cblas_zsyrk,
|
||||
cblas_ztbmv, cblas_ztbsv, cblas_ztpmv, cblas_ztpsv, cblas_ztrmm, cblas_ztrmv, cblas_ztrsm,
|
||||
cblas_ztrsv, cblas_cdotc_sub, cblas_cdotu_sub, cblas_zdotc_sub, cblas_zdotu_sub );
|
||||
cblas_ztrsv, cblas_cdotc_sub, cblas_cdotu_sub, cblas_zdotc_sub, cblas_zdotu_sub,
|
||||
cblas_saxpby,cblas_daxpby,cblas_caxpby,cblas_zaxpby
|
||||
);
|
||||
|
||||
@exblasobjs = (
|
||||
qamax,qamin,qasum,qaxpy,qcabs1,qcopy,qdot,qgbmv,qgemm,
|
||||
|
|
|
@ -27,6 +27,7 @@ SBLAS1OBJS = \
|
|||
smax.$(SUFFIX) samax.$(SUFFIX) ismax.$(SUFFIX) isamax.$(SUFFIX) \
|
||||
smin.$(SUFFIX) samin.$(SUFFIX) ismin.$(SUFFIX) isamin.$(SUFFIX) \
|
||||
srot.$(SUFFIX) srotg.$(SUFFIX) srotm.$(SUFFIX) srotmg.$(SUFFIX) \
|
||||
saxpby.$(SUFFIX)
|
||||
|
||||
SBLAS2OBJS = \
|
||||
sgemv.$(SUFFIX) sger.$(SUFFIX) \
|
||||
|
@ -39,7 +40,9 @@ SBLAS2OBJS = \
|
|||
|
||||
SBLAS3OBJS = \
|
||||
sgemm.$(SUFFIX) ssymm.$(SUFFIX) strmm.$(SUFFIX) \
|
||||
strsm.$(SUFFIX) ssyrk.$(SUFFIX) ssyr2k.$(SUFFIX)
|
||||
strsm.$(SUFFIX) ssyrk.$(SUFFIX) ssyr2k.$(SUFFIX) \
|
||||
somatcopy.$(SUFFIX) simatcopy.$(SUFFIX)
|
||||
|
||||
|
||||
DBLAS1OBJS = \
|
||||
daxpy.$(SUFFIX) dswap.$(SUFFIX) \
|
||||
|
@ -49,6 +52,7 @@ DBLAS1OBJS = \
|
|||
dmax.$(SUFFIX) damax.$(SUFFIX) idmax.$(SUFFIX) idamax.$(SUFFIX) \
|
||||
dmin.$(SUFFIX) damin.$(SUFFIX) idmin.$(SUFFIX) idamin.$(SUFFIX) \
|
||||
drot.$(SUFFIX) drotg.$(SUFFIX) drotm.$(SUFFIX) drotmg.$(SUFFIX) \
|
||||
daxpby.$(SUFFIX)
|
||||
|
||||
DBLAS2OBJS = \
|
||||
dgemv.$(SUFFIX) dger.$(SUFFIX) \
|
||||
|
@ -61,7 +65,8 @@ DBLAS2OBJS = \
|
|||
|
||||
DBLAS3OBJS = \
|
||||
dgemm.$(SUFFIX) dsymm.$(SUFFIX) dtrmm.$(SUFFIX) \
|
||||
dtrsm.$(SUFFIX) dsyrk.$(SUFFIX) dsyr2k.$(SUFFIX)
|
||||
dtrsm.$(SUFFIX) dsyrk.$(SUFFIX) dsyr2k.$(SUFFIX) \
|
||||
domatcopy.$(SUFFIX) dimatcopy.$(SUFFIX)
|
||||
|
||||
CBLAS1OBJS = \
|
||||
caxpy.$(SUFFIX) caxpyc.$(SUFFIX) cswap.$(SUFFIX) \
|
||||
|
@ -71,6 +76,7 @@ CBLAS1OBJS = \
|
|||
scamax.$(SUFFIX) icamax.$(SUFFIX) \
|
||||
scamin.$(SUFFIX) icamin.$(SUFFIX) \
|
||||
csrot.$(SUFFIX) crotg.$(SUFFIX) \
|
||||
caxpby.$(SUFFIX)
|
||||
|
||||
CBLAS2OBJS = \
|
||||
cgemv.$(SUFFIX) cgeru.$(SUFFIX) cgerc.$(SUFFIX) \
|
||||
|
@ -87,7 +93,8 @@ CBLAS2OBJS = \
|
|||
CBLAS3OBJS = \
|
||||
cgemm.$(SUFFIX) csymm.$(SUFFIX) ctrmm.$(SUFFIX) \
|
||||
ctrsm.$(SUFFIX) csyrk.$(SUFFIX) csyr2k.$(SUFFIX) \
|
||||
chemm.$(SUFFIX) cherk.$(SUFFIX) cher2k.$(SUFFIX)
|
||||
chemm.$(SUFFIX) cherk.$(SUFFIX) cher2k.$(SUFFIX) \
|
||||
comatcopy.$(SUFFIX) cimatcopy.$(SUFFIX)
|
||||
|
||||
ZBLAS1OBJS = \
|
||||
zaxpy.$(SUFFIX) zaxpyc.$(SUFFIX) zswap.$(SUFFIX) \
|
||||
|
@ -97,6 +104,7 @@ ZBLAS1OBJS = \
|
|||
dzamax.$(SUFFIX) izamax.$(SUFFIX) \
|
||||
dzamin.$(SUFFIX) izamin.$(SUFFIX) \
|
||||
zdrot.$(SUFFIX) zrotg.$(SUFFIX) \
|
||||
zaxpby.$(SUFFIX)
|
||||
|
||||
ZBLAS2OBJS = \
|
||||
zgemv.$(SUFFIX) zgeru.$(SUFFIX) zgerc.$(SUFFIX) \
|
||||
|
@ -113,7 +121,8 @@ ZBLAS2OBJS = \
|
|||
ZBLAS3OBJS = \
|
||||
zgemm.$(SUFFIX) zsymm.$(SUFFIX) ztrmm.$(SUFFIX) \
|
||||
ztrsm.$(SUFFIX) zsyrk.$(SUFFIX) zsyr2k.$(SUFFIX) \
|
||||
zhemm.$(SUFFIX) zherk.$(SUFFIX) zher2k.$(SUFFIX)
|
||||
zhemm.$(SUFFIX) zherk.$(SUFFIX) zher2k.$(SUFFIX) \
|
||||
zomatcopy.$(SUFFIX) zimatcopy.$(SUFFIX)
|
||||
|
||||
ifdef SUPPORT_GEMM3M
|
||||
|
||||
|
@ -246,7 +255,7 @@ CSBLAS1OBJS = \
|
|||
cblas_isamax.$(SUFFIX) cblas_sasum.$(SUFFIX) cblas_saxpy.$(SUFFIX) \
|
||||
cblas_scopy.$(SUFFIX) cblas_sdot.$(SUFFIX) cblas_sdsdot.$(SUFFIX) cblas_dsdot.$(SUFFIX) \
|
||||
cblas_srot.$(SUFFIX) cblas_srotg.$(SUFFIX) cblas_srotm.$(SUFFIX) cblas_srotmg.$(SUFFIX) \
|
||||
cblas_sscal.$(SUFFIX) cblas_sswap.$(SUFFIX) cblas_snrm2.$(SUFFIX)
|
||||
cblas_sscal.$(SUFFIX) cblas_sswap.$(SUFFIX) cblas_snrm2.$(SUFFIX) cblas_saxpby.$(SUFFIX)
|
||||
|
||||
CSBLAS2OBJS = \
|
||||
cblas_sgemv.$(SUFFIX) cblas_sger.$(SUFFIX) cblas_ssymv.$(SUFFIX) cblas_strmv.$(SUFFIX) \
|
||||
|
@ -262,7 +271,7 @@ CDBLAS1OBJS = \
|
|||
cblas_idamax.$(SUFFIX) cblas_dasum.$(SUFFIX) cblas_daxpy.$(SUFFIX) \
|
||||
cblas_dcopy.$(SUFFIX) cblas_ddot.$(SUFFIX) \
|
||||
cblas_drot.$(SUFFIX) cblas_drotg.$(SUFFIX) cblas_drotm.$(SUFFIX) cblas_drotmg.$(SUFFIX) \
|
||||
cblas_dscal.$(SUFFIX) cblas_dswap.$(SUFFIX) cblas_dnrm2.$(SUFFIX)
|
||||
cblas_dscal.$(SUFFIX) cblas_dswap.$(SUFFIX) cblas_dnrm2.$(SUFFIX) cblas_daxpby.$(SUFFIX)
|
||||
|
||||
CDBLAS2OBJS = \
|
||||
cblas_dgemv.$(SUFFIX) cblas_dger.$(SUFFIX) cblas_dsymv.$(SUFFIX) cblas_dtrmv.$(SUFFIX) \
|
||||
|
@ -280,7 +289,8 @@ CCBLAS1OBJS = \
|
|||
cblas_cdotc.$(SUFFIX) cblas_cdotu.$(SUFFIX) \
|
||||
cblas_cdotc_sub.$(SUFFIX) cblas_cdotu_sub.$(SUFFIX) \
|
||||
cblas_cscal.$(SUFFIX) cblas_csscal.$(SUFFIX) \
|
||||
cblas_cswap.$(SUFFIX) cblas_scnrm2.$(SUFFIX)
|
||||
cblas_cswap.$(SUFFIX) cblas_scnrm2.$(SUFFIX) \
|
||||
cblas_caxpby.$(SUFFIX)
|
||||
|
||||
CCBLAS2OBJS = \
|
||||
cblas_cgemv.$(SUFFIX) cblas_cgerc.$(SUFFIX) cblas_cgeru.$(SUFFIX) \
|
||||
|
@ -301,7 +311,8 @@ CZBLAS1OBJS = \
|
|||
cblas_zdotc.$(SUFFIX) cblas_zdotu.$(SUFFIX) \
|
||||
cblas_zdotc_sub.$(SUFFIX) cblas_zdotu_sub.$(SUFFIX) \
|
||||
cblas_zscal.$(SUFFIX) cblas_zdscal.$(SUFFIX) \
|
||||
cblas_zswap.$(SUFFIX) cblas_dznrm2.$(SUFFIX)
|
||||
cblas_zswap.$(SUFFIX) cblas_dznrm2.$(SUFFIX) \
|
||||
cblas_zaxpby.$(SUFFIX)
|
||||
|
||||
CZBLAS2OBJS = \
|
||||
cblas_zgemv.$(SUFFIX) cblas_zgerc.$(SUFFIX) cblas_zgeru.$(SUFFIX) \
|
||||
|
@ -1991,3 +2002,55 @@ zlarf.$(SUFFIX) zlarf.$(PSUFFIX) : larf.c
|
|||
xlarf.$(SUFFIX) xlarf.$(PSUFFIX) : larf.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
|
||||
############# BLAS EXTENSIONS #####################################
|
||||
|
||||
daxpby.$(SUFFIX) daxpby.$(PSUFFIX) : axpby.c
|
||||
$(CC) $(CFLAGS) -c $< -o $(@F)
|
||||
|
||||
cblas_daxpby.$(SUFFIX) cblas_daxpby.$(PSUFFIX) : axpby.c
|
||||
$(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F)
|
||||
|
||||
saxpby.$(SUFFIX) saxpby.$(PSUFFIX) : axpby.c
|
||||
$(CC) $(CFLAGS) -c $< -o $(@F)
|
||||
|
||||
cblas_saxpby.$(SUFFIX) cblas_saxpby.$(PSUFFIX) : axpby.c
|
||||
$(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F)
|
||||
|
||||
zaxpby.$(SUFFIX) zaxpby.$(PSUFFIX) : zaxpby.c
|
||||
$(CC) $(CFLAGS) -c $< -o $(@F)
|
||||
|
||||
cblas_zaxpby.$(SUFFIX) cblas_zaxpby.$(PSUFFIX) : zaxpby.c
|
||||
$(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F)
|
||||
|
||||
caxpby.$(SUFFIX) caxpby.$(PSUFFIX) : zaxpby.c
|
||||
$(CC) $(CFLAGS) -c $< -o $(@F)
|
||||
|
||||
cblas_caxpby.$(SUFFIX) cblas_caxpby.$(PSUFFIX) : zaxpby.c
|
||||
$(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F)
|
||||
|
||||
domatcopy.$(SUFFIX) domatcopy.$(PSUFFIX) : omatcopy.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
somatcopy.$(SUFFIX) somatcopy.$(PSUFFIX) : omatcopy.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
comatcopy.$(SUFFIX) comatcopy.$(PSUFFIX) : zomatcopy.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
zomatcopy.$(SUFFIX) zomatcopy.$(PSUFFIX) : zomatcopy.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
dimatcopy.$(SUFFIX) dimatcopy.$(PSUFFIX) : imatcopy.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
simatcopy.$(SUFFIX) simatcopy.$(PSUFFIX) : imatcopy.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
cimatcopy.$(SUFFIX) cimatcopy.$(PSUFFIX) : zimatcopy.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
zimatcopy.$(SUFFIX) zimatcopy.$(PSUFFIX) : zimatcopy.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
|
||||
|
|
|
@ -0,0 +1,72 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2013, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
/******************************************************************
|
||||
2014/06/07 Saar
|
||||
******************************************************************/
|
||||
|
||||
|
||||
#include <stdio.h>
|
||||
#include "common.h"
|
||||
#ifdef FUNCTION_PROFILE
|
||||
#include "functable.h"
|
||||
#endif
|
||||
|
||||
#ifndef CBLAS
|
||||
|
||||
void NAME(blasint *N, FLOAT *ALPHA, FLOAT *x, blasint *INCX, FLOAT *BETA, FLOAT *y, blasint *INCY)
|
||||
{
|
||||
|
||||
BLASLONG n = *N;
|
||||
BLASLONG incx = *INCX;
|
||||
BLASLONG incy = *INCY;
|
||||
FLOAT alpha = *ALPHA;
|
||||
FLOAT beta = *BETA;
|
||||
|
||||
#else
|
||||
|
||||
void CNAME(blasint n, FLOAT alpha, FLOAT *x, blasint incx, FLOAT beta, FLOAT *y, blasint incy)
|
||||
{
|
||||
|
||||
#endif
|
||||
|
||||
if (n <= 0) return;
|
||||
|
||||
FUNCTION_PROFILE_START();
|
||||
|
||||
if (incx < 0) x -= (n - 1) * incx;
|
||||
if (incy < 0) y -= (n - 1) * incy;
|
||||
|
||||
AXPBY_K(n, alpha, x, incx, beta, y, incy);
|
||||
|
||||
FUNCTION_PROFILE_END(1, 2 * n, 2 * n);
|
||||
|
||||
return;
|
||||
|
||||
}
|
||||
|
||||
|
|
@ -400,14 +400,63 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANS
|
|||
mode |= (transa << BLAS_TRANSA_SHIFT);
|
||||
mode |= (transb << BLAS_TRANSB_SHIFT);
|
||||
|
||||
int nthreads_max = num_cpu_avail(3);
|
||||
int nthreads_avail = nthreads_max;
|
||||
|
||||
#ifndef COMPLEX
|
||||
double MNK = (double) args.m * (double) args.n * (double) args.k;
|
||||
if ( MNK <= (1024.0 * (double) GEMM_MULTITHREAD_THRESHOLD) )
|
||||
nthreads_max = 1;
|
||||
else
|
||||
{
|
||||
if ( MNK <= (65536.0 * (double) GEMM_MULTITHREAD_THRESHOLD) )
|
||||
{
|
||||
nthreads_max = 4;
|
||||
if ( args.m < 16 * GEMM_MULTITHREAD_THRESHOLD )
|
||||
{
|
||||
nthreads_max = 2;
|
||||
if ( args.m < 3 * GEMM_MULTITHREAD_THRESHOLD ) nthreads_max = 1;
|
||||
if ( args.n < 1 * GEMM_MULTITHREAD_THRESHOLD ) nthreads_max = 1;
|
||||
if ( args.k < 3 * GEMM_MULTITHREAD_THRESHOLD ) nthreads_max = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
if ( args.n <= 1 * GEMM_MULTITHREAD_THRESHOLD ) nthreads_max = 2;
|
||||
}
|
||||
}
|
||||
}
|
||||
#else
|
||||
double MNK = (double) args.m * (double) args.n * (double) args.k;
|
||||
if ( MNK <= (256.0 * (double) GEMM_MULTITHREAD_THRESHOLD) )
|
||||
nthreads_max = 1;
|
||||
else
|
||||
{
|
||||
if ( MNK <= (16384.0 * (double) GEMM_MULTITHREAD_THRESHOLD) )
|
||||
{
|
||||
nthreads_max = 4;
|
||||
if ( args.m < 3 * GEMM_MULTITHREAD_THRESHOLD )
|
||||
{
|
||||
nthreads_max = 2;
|
||||
if ( args.m <= 1 * GEMM_MULTITHREAD_THRESHOLD ) nthreads_max = 1;
|
||||
if ( args.n < 1 * GEMM_MULTITHREAD_THRESHOLD ) nthreads_max = 1;
|
||||
if ( args.k < 1 * GEMM_MULTITHREAD_THRESHOLD ) nthreads_max = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
if ( args.n < 2 * GEMM_MULTITHREAD_THRESHOLD ) nthreads_max = 2;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
args.common = NULL;
|
||||
|
||||
if(args.m <= GEMM_MULTITHREAD_THRESHOLD || args.n <= GEMM_MULTITHREAD_THRESHOLD
|
||||
|| args.k <=GEMM_MULTITHREAD_THRESHOLD){
|
||||
args.nthreads = 1;
|
||||
}else{
|
||||
args.nthreads = num_cpu_avail(3);
|
||||
}
|
||||
if ( nthreads_max > nthreads_avail )
|
||||
args.nthreads = nthreads_avail;
|
||||
else
|
||||
args.nthreads = nthreads_max;
|
||||
|
||||
|
||||
if (args.nthreads == 1) {
|
||||
#endif
|
||||
|
||||
|
|
|
@ -75,7 +75,7 @@ void NAME(blasint *M, blasint *N, FLOAT *Alpha,
|
|||
blasint incy = *INCY;
|
||||
blasint lda = *LDA;
|
||||
FLOAT *buffer;
|
||||
#ifdef SMP
|
||||
#ifdef SMPBUG
|
||||
int nthreads;
|
||||
#endif
|
||||
|
||||
|
@ -107,7 +107,7 @@ void CNAME(enum CBLAS_ORDER order,
|
|||
|
||||
FLOAT *buffer;
|
||||
blasint info, t;
|
||||
#ifdef SMP
|
||||
#ifdef SMPBUG
|
||||
int nthreads;
|
||||
#endif
|
||||
|
||||
|
@ -167,15 +167,16 @@ void CNAME(enum CBLAS_ORDER order,
|
|||
|
||||
buffer = (FLOAT *)blas_memory_alloc(1);
|
||||
|
||||
#ifdef SMP
|
||||
#ifdef SMPBUG
|
||||
nthreads = num_cpu_avail(2);
|
||||
|
||||
|
||||
if (nthreads == 1) {
|
||||
#endif
|
||||
|
||||
GER(m, n, 0, alpha, x, incx, y, incy, a, lda, buffer);
|
||||
|
||||
#ifdef SMP
|
||||
#ifdef SMPBUG
|
||||
} else {
|
||||
|
||||
GER_THREAD(m, n, alpha, x, incx, y, incy, a, lda, buffer, nthreads);
|
||||
|
|
|
@ -0,0 +1,142 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
/***********************************************************
|
||||
* 2014/06/10 Saar
|
||||
***********************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include "common.h"
|
||||
#ifdef FUNCTION_PROFILE
|
||||
#include "functable.h"
|
||||
#endif
|
||||
|
||||
#if defined(DOUBLE)
|
||||
#define ERROR_NAME "DIMATCOPY"
|
||||
#else
|
||||
#define ERROR_NAME "SIMATCOPY"
|
||||
#endif
|
||||
|
||||
#define BlasRowMajor 0
|
||||
#define BlasColMajor 1
|
||||
#define BlasNoTrans 0
|
||||
#define BlasTrans 1
|
||||
|
||||
#undef malloc
|
||||
#undef free
|
||||
|
||||
void NAME( char* ORDER, char* TRANS, blasint *rows, blasint *cols, FLOAT *alpha, FLOAT *a, blasint *lda, blasint *ldb)
|
||||
{
|
||||
|
||||
char Order, Trans;
|
||||
int order=-1,trans=-1;
|
||||
blasint info = -1;
|
||||
FLOAT *b;
|
||||
size_t msize;
|
||||
|
||||
Order = *ORDER;
|
||||
Trans = *TRANS;
|
||||
|
||||
TOUPPER(Order);
|
||||
TOUPPER(Trans);
|
||||
|
||||
if ( Order == 'C' ) order = BlasColMajor;
|
||||
if ( Order == 'R' ) order = BlasRowMajor;
|
||||
if ( Trans == 'N' ) trans = BlasNoTrans;
|
||||
if ( Trans == 'R' ) trans = BlasNoTrans;
|
||||
if ( Trans == 'T' ) trans = BlasTrans;
|
||||
if ( Trans == 'C' ) trans = BlasTrans;
|
||||
|
||||
if ( order == BlasColMajor)
|
||||
{
|
||||
if ( trans == BlasNoTrans && *ldb < *rows ) info = 9;
|
||||
if ( trans == BlasTrans && *ldb < *cols ) info = 9;
|
||||
}
|
||||
if ( order == BlasRowMajor)
|
||||
{
|
||||
if ( trans == BlasNoTrans && *ldb < *cols ) info = 9;
|
||||
if ( trans == BlasTrans && *ldb < *rows ) info = 9;
|
||||
}
|
||||
|
||||
if ( order == BlasColMajor && *lda < *rows ) info = 7;
|
||||
if ( order == BlasRowMajor && *lda < *cols ) info = 7;
|
||||
if ( *cols <= 0 ) info = 4;
|
||||
if ( *rows <= 0 ) info = 3;
|
||||
if ( trans < 0 ) info = 2;
|
||||
if ( order < 0 ) info = 1;
|
||||
|
||||
if (info >= 0) {
|
||||
BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME));
|
||||
return;
|
||||
}
|
||||
|
||||
if ( *lda > *ldb )
|
||||
msize = (*lda) * (*ldb) * sizeof(FLOAT);
|
||||
else
|
||||
msize = (*ldb) * (*ldb) * sizeof(FLOAT);
|
||||
|
||||
b = malloc(msize);
|
||||
if ( b == NULL )
|
||||
{
|
||||
printf("Memory alloc failed\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if ( order == BlasColMajor )
|
||||
{
|
||||
if ( trans == BlasNoTrans )
|
||||
{
|
||||
OMATCOPY_K_CN(*rows, *cols, *alpha, a, *lda, b, *ldb );
|
||||
OMATCOPY_K_CN(*rows, *cols, (FLOAT) 1.0 , b, *ldb, a, *ldb );
|
||||
}
|
||||
else
|
||||
{
|
||||
OMATCOPY_K_CT(*rows, *cols, *alpha, a, *lda, b, *ldb );
|
||||
OMATCOPY_K_CN(*rows, *cols, (FLOAT) 1.0, b, *ldb, a, *ldb );
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if ( trans == BlasNoTrans )
|
||||
{
|
||||
OMATCOPY_K_RN(*rows, *cols, *alpha, a, *lda, b, *ldb );
|
||||
OMATCOPY_K_RN(*rows, *cols, (FLOAT) 1.0, b, *ldb, a, *ldb );
|
||||
}
|
||||
else
|
||||
{
|
||||
OMATCOPY_K_RT(*rows, *cols, *alpha, a, *lda, b, *ldb );
|
||||
OMATCOPY_K_RN(*rows, *cols, (FLOAT) 1.0, b, *ldb, a, *ldb );
|
||||
}
|
||||
}
|
||||
|
||||
free(b);
|
||||
return;
|
||||
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,120 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
/***********************************************************
|
||||
* 2014/06/09 Saar
|
||||
***********************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include "common.h"
|
||||
#ifdef FUNCTION_PROFILE
|
||||
#include "functable.h"
|
||||
#endif
|
||||
|
||||
#if defined(DOUBLE)
|
||||
#define ERROR_NAME "DOMATCOPY"
|
||||
#else
|
||||
#define ERROR_NAME "SOMATCOPY"
|
||||
#endif
|
||||
|
||||
#define BlasRowMajor 0
|
||||
#define BlasColMajor 1
|
||||
#define BlasNoTrans 0
|
||||
#define BlasTrans 1
|
||||
|
||||
void NAME( char* ORDER, char* TRANS, blasint *rows, blasint *cols, FLOAT *alpha, FLOAT *a, blasint *lda, FLOAT *b, blasint *ldb)
|
||||
{
|
||||
|
||||
char Order, Trans;
|
||||
int order=-1,trans=-1;
|
||||
blasint info = -1;
|
||||
|
||||
Order = *ORDER;
|
||||
Trans = *TRANS;
|
||||
|
||||
TOUPPER(Order);
|
||||
TOUPPER(Trans);
|
||||
|
||||
if ( Order == 'C' ) order = BlasColMajor;
|
||||
if ( Order == 'R' ) order = BlasRowMajor;
|
||||
if ( Trans == 'N' ) trans = BlasNoTrans;
|
||||
if ( Trans == 'R' ) trans = BlasNoTrans;
|
||||
if ( Trans == 'T' ) trans = BlasTrans;
|
||||
if ( Trans == 'C' ) trans = BlasTrans;
|
||||
|
||||
if ( order == BlasColMajor)
|
||||
{
|
||||
if ( trans == BlasNoTrans && *ldb < *rows ) info = 9;
|
||||
if ( trans == BlasTrans && *ldb < *cols ) info = 9;
|
||||
}
|
||||
if ( order == BlasRowMajor)
|
||||
{
|
||||
if ( trans == BlasNoTrans && *ldb < *cols ) info = 9;
|
||||
if ( trans == BlasTrans && *ldb < *rows ) info = 9;
|
||||
}
|
||||
|
||||
if ( order == BlasColMajor && *lda < *rows ) info = 7;
|
||||
if ( order == BlasRowMajor && *lda < *cols ) info = 7;
|
||||
if ( *cols <= 0 ) info = 4;
|
||||
if ( *rows <= 0 ) info = 3;
|
||||
if ( trans < 0 ) info = 2;
|
||||
if ( order < 0 ) info = 1;
|
||||
|
||||
if (info >= 0) {
|
||||
BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME));
|
||||
return;
|
||||
}
|
||||
|
||||
if ( order == BlasColMajor )
|
||||
{
|
||||
if ( trans == BlasNoTrans )
|
||||
{
|
||||
OMATCOPY_K_CN(*rows, *cols, *alpha, a, *lda, b, *ldb );
|
||||
}
|
||||
else
|
||||
{
|
||||
OMATCOPY_K_CT(*rows, *cols, *alpha, a, *lda, b, *ldb );
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if ( trans == BlasNoTrans )
|
||||
{
|
||||
OMATCOPY_K_RN(*rows, *cols, *alpha, a, *lda, b, *ldb );
|
||||
}
|
||||
else
|
||||
{
|
||||
OMATCOPY_K_RT(*rows, *cols, *alpha, a, *lda, b, *ldb );
|
||||
}
|
||||
}
|
||||
|
||||
return;
|
||||
|
||||
}
|
||||
|
||||
|
|
@ -62,7 +62,7 @@ void CNAME(FLOAT *dd1, FLOAT *dd2, FLOAT *dx1, FLOAT dy1, FLOAT *dparam){
|
|||
|
||||
#endif
|
||||
|
||||
FLOAT du, dp1, dp2, dq2, dq1, dh11, dh21, dh12, dh22, dflag, dtemp;
|
||||
FLOAT du, dp1, dp2, dq2, dq1, dh11=ZERO, dh21=ZERO, dh12=ZERO, dh22=ZERO, dflag=-ONE, dtemp;
|
||||
|
||||
if(*dd1 < ZERO)
|
||||
{
|
||||
|
|
|
@ -78,6 +78,9 @@ void CNAME(blasint n, FLOAT alpha, FLOAT *x, blasint incx){
|
|||
#ifdef SMP
|
||||
nthreads = num_cpu_avail(1);
|
||||
|
||||
if (n <= 1048576 )
|
||||
nthreads = 1;
|
||||
|
||||
if (nthreads == 1) {
|
||||
#endif
|
||||
|
||||
|
|
|
@ -0,0 +1,74 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2013, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
/**********************************************************************
|
||||
2014/06/07 Saar
|
||||
|
||||
**********************************************************************/
|
||||
|
||||
|
||||
#include <stdio.h>
|
||||
#include "common.h"
|
||||
#ifdef FUNCTION_PROFILE
|
||||
#include "functable.h"
|
||||
#endif
|
||||
|
||||
#ifndef CBLAS
|
||||
|
||||
void NAME(blasint *N, FLOAT *ALPHA, FLOAT *x, blasint *INCX, FLOAT *BETA, FLOAT *y, blasint *INCY)
|
||||
{
|
||||
|
||||
blasint n = *N;
|
||||
blasint incx = *INCX;
|
||||
blasint incy = *INCY;
|
||||
|
||||
#else
|
||||
|
||||
void CNAME(blasint n, FLOAT *ALPHA, FLOAT *x, blasint incx, FLOAT *BETA, FLOAT *y, blasint incy)
|
||||
{
|
||||
|
||||
#endif
|
||||
|
||||
if (n <= 0) return;
|
||||
|
||||
FLOAT alpha_r = *(ALPHA + 0);
|
||||
FLOAT alpha_i = *(ALPHA + 1);
|
||||
FLOAT beta_r = *(BETA + 0);
|
||||
FLOAT beta_i = *(BETA + 1);
|
||||
|
||||
FUNCTION_PROFILE_START();
|
||||
|
||||
if (incx < 0) x -= (n - 1) * incx * 2;
|
||||
if (incy < 0) y -= (n - 1) * incy * 2;
|
||||
|
||||
AXPBY_K (n, alpha_r, alpha_i, x, incx, beta_r, beta_i, y, incy);
|
||||
|
||||
FUNCTION_PROFILE_END(4, 2 * n, 2 * n);
|
||||
|
||||
return;
|
||||
|
||||
}
|
|
@ -109,7 +109,7 @@ void NAME(blasint *M, blasint *N, FLOAT *Alpha,
|
|||
blasint incy = *INCY;
|
||||
blasint lda = *LDA;
|
||||
FLOAT *buffer;
|
||||
#ifdef SMP
|
||||
#ifdef SMPBUG
|
||||
int nthreads;
|
||||
#endif
|
||||
|
||||
|
@ -144,7 +144,7 @@ void CNAME(enum CBLAS_ORDER order,
|
|||
|
||||
FLOAT *buffer;
|
||||
blasint info, t;
|
||||
#ifdef SMP
|
||||
#ifdef SMPBUG
|
||||
int nthreads;
|
||||
#endif
|
||||
|
||||
|
@ -205,7 +205,7 @@ void CNAME(enum CBLAS_ORDER order,
|
|||
|
||||
buffer = (FLOAT *)blas_memory_alloc(1);
|
||||
|
||||
#ifdef SMP
|
||||
#ifdef SMPBUG
|
||||
nthreads = num_cpu_avail(2);
|
||||
|
||||
if (nthreads == 1) {
|
||||
|
@ -221,7 +221,7 @@ void CNAME(enum CBLAS_ORDER order,
|
|||
}
|
||||
#endif
|
||||
|
||||
#ifdef SMP
|
||||
#ifdef SMPBUG
|
||||
|
||||
} else {
|
||||
|
||||
|
|
|
@ -0,0 +1,185 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
/***********************************************************
|
||||
* 2014/06/10 Saar
|
||||
***********************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include "common.h"
|
||||
#ifdef FUNCTION_PROFILE
|
||||
#include "functable.h"
|
||||
#endif
|
||||
|
||||
#if defined(DOUBLE)
|
||||
#define ERROR_NAME "ZIMATCOPY"
|
||||
#else
|
||||
#define ERROR_NAME "CIMATCOPY"
|
||||
#endif
|
||||
|
||||
#define BlasRowMajor 0
|
||||
#define BlasColMajor 1
|
||||
#define BlasNoTrans 0
|
||||
#define BlasTrans 1
|
||||
#define BlasTransConj 2
|
||||
#define BlasConj 3
|
||||
|
||||
void NAME( char* ORDER, char* TRANS, blasint *rows, blasint *cols, FLOAT *alpha, FLOAT *a, blasint *lda, blasint *ldb)
|
||||
{
|
||||
|
||||
char Order, Trans;
|
||||
int order=-1,trans=-1;
|
||||
blasint info = -1;
|
||||
FLOAT *b;
|
||||
size_t msize;
|
||||
|
||||
Order = *ORDER;
|
||||
Trans = *TRANS;
|
||||
|
||||
TOUPPER(Order);
|
||||
TOUPPER(Trans);
|
||||
|
||||
if ( Order == 'C' ) order = BlasColMajor;
|
||||
if ( Order == 'R' ) order = BlasRowMajor;
|
||||
if ( Trans == 'N' ) trans = BlasNoTrans;
|
||||
if ( Trans == 'T' ) trans = BlasTrans;
|
||||
if ( Trans == 'C' ) trans = BlasTransConj;
|
||||
if ( Trans == 'R' ) trans = BlasConj;
|
||||
|
||||
if ( order == BlasColMajor)
|
||||
{
|
||||
if ( trans == BlasNoTrans && *ldb < *rows ) info = 9;
|
||||
if ( trans == BlasConj && *ldb < *rows ) info = 9;
|
||||
if ( trans == BlasTrans && *ldb < *cols ) info = 9;
|
||||
if ( trans == BlasTransConj && *ldb < *cols ) info = 9;
|
||||
}
|
||||
if ( order == BlasRowMajor)
|
||||
{
|
||||
if ( trans == BlasNoTrans && *ldb < *cols ) info = 9;
|
||||
if ( trans == BlasConj && *ldb < *cols ) info = 9;
|
||||
if ( trans == BlasTrans && *ldb < *rows ) info = 9;
|
||||
if ( trans == BlasTransConj && *ldb < *rows ) info = 9;
|
||||
}
|
||||
|
||||
if ( order == BlasColMajor && *lda < *rows ) info = 7;
|
||||
if ( order == BlasRowMajor && *lda < *cols ) info = 7;
|
||||
if ( *cols <= 0 ) info = 4;
|
||||
if ( *rows <= 0 ) info = 3;
|
||||
if ( trans < 0 ) info = 2;
|
||||
if ( order < 0 ) info = 1;
|
||||
|
||||
if (info >= 0) {
|
||||
BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME));
|
||||
return;
|
||||
}
|
||||
|
||||
if ( *lda > *ldb )
|
||||
msize = (*lda) * (*ldb) * sizeof(FLOAT) * 2;
|
||||
else
|
||||
msize = (*ldb) * (*ldb) * sizeof(FLOAT) * 2;
|
||||
|
||||
b = malloc(msize);
|
||||
if ( b == NULL )
|
||||
{
|
||||
printf("Memory alloc failed\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
|
||||
if ( order == BlasColMajor )
|
||||
{
|
||||
|
||||
if ( trans == BlasNoTrans )
|
||||
{
|
||||
OMATCOPY_K_CN(*rows, *cols, alpha[0], alpha[1], a, *lda, b, *ldb );
|
||||
OMATCOPY_K_CN(*rows, *cols, (FLOAT) 1.0, (FLOAT) 0.0 , b, *ldb, a, *ldb );
|
||||
free(b);
|
||||
return;
|
||||
}
|
||||
if ( trans == BlasConj )
|
||||
{
|
||||
OMATCOPY_K_CNC(*rows, *cols, alpha[0], alpha[1], a, *lda, b, *ldb );
|
||||
OMATCOPY_K_CN(*rows, *cols, (FLOAT) 1.0, (FLOAT) 0.0 , b, *ldb, a, *ldb );
|
||||
free(b);
|
||||
return;
|
||||
}
|
||||
if ( trans == BlasTrans )
|
||||
{
|
||||
OMATCOPY_K_CT(*rows, *cols, alpha[0], alpha[1], a, *lda, b, *ldb );
|
||||
OMATCOPY_K_CN(*rows, *cols, (FLOAT) 1.0, (FLOAT) 0.0 , b, *ldb, a, *ldb );
|
||||
free(b);
|
||||
return;
|
||||
}
|
||||
if ( trans == BlasTransConj )
|
||||
{
|
||||
OMATCOPY_K_CTC(*rows, *cols, alpha[0], alpha[1], a, *lda, b, *ldb );
|
||||
OMATCOPY_K_CN(*rows, *cols, (FLOAT) 1.0, (FLOAT) 0.0 , b, *ldb, a, *ldb );
|
||||
free(b);
|
||||
return;
|
||||
}
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
if ( trans == BlasNoTrans )
|
||||
{
|
||||
OMATCOPY_K_RN(*rows, *cols, alpha[0], alpha[1], a, *lda, b, *ldb );
|
||||
OMATCOPY_K_RN(*rows, *cols, (FLOAT) 1.0, (FLOAT) 0.0 , b, *ldb, a, *ldb );
|
||||
free(b);
|
||||
return;
|
||||
}
|
||||
if ( trans == BlasConj )
|
||||
{
|
||||
OMATCOPY_K_RNC(*rows, *cols, alpha[0], alpha[1], a, *lda, b, *ldb );
|
||||
OMATCOPY_K_RN(*rows, *cols, (FLOAT) 1.0, (FLOAT) 0.0 , b, *ldb, a, *ldb );
|
||||
free(b);
|
||||
return;
|
||||
}
|
||||
if ( trans == BlasTrans )
|
||||
{
|
||||
OMATCOPY_K_RT(*rows, *cols, alpha[0], alpha[1], a, *lda, b, *ldb );
|
||||
OMATCOPY_K_RN(*rows, *cols, (FLOAT) 1.0, (FLOAT) 0.0 , b, *ldb, a, *ldb );
|
||||
free(b);
|
||||
return;
|
||||
}
|
||||
if ( trans == BlasTransConj )
|
||||
{
|
||||
OMATCOPY_K_RTC(*rows, *cols, alpha[0], alpha[1], a, *lda, b, *ldb );
|
||||
OMATCOPY_K_RN(*rows, *cols, (FLOAT) 1.0, (FLOAT) 0.0 , b, *ldb, a, *ldb );
|
||||
free(b);
|
||||
return;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return;
|
||||
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,154 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
/***********************************************************
|
||||
* 2014/06/09 Saar
|
||||
***********************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include "common.h"
|
||||
#ifdef FUNCTION_PROFILE
|
||||
#include "functable.h"
|
||||
#endif
|
||||
|
||||
#if defined(DOUBLE)
|
||||
#define ERROR_NAME "ZOMATCOPY"
|
||||
#else
|
||||
#define ERROR_NAME "COMATCOPY"
|
||||
#endif
|
||||
|
||||
#define BlasRowMajor 0
|
||||
#define BlasColMajor 1
|
||||
#define BlasNoTrans 0
|
||||
#define BlasTrans 1
|
||||
#define BlasTransConj 2
|
||||
#define BlasConj 3
|
||||
|
||||
void NAME( char* ORDER, char* TRANS, blasint *rows, blasint *cols, FLOAT *alpha, FLOAT *a, blasint *lda, FLOAT *b, blasint *ldb)
|
||||
{
|
||||
|
||||
char Order, Trans;
|
||||
int order=-1,trans=-1;
|
||||
blasint info = -1;
|
||||
|
||||
Order = *ORDER;
|
||||
Trans = *TRANS;
|
||||
|
||||
TOUPPER(Order);
|
||||
TOUPPER(Trans);
|
||||
|
||||
if ( Order == 'C' ) order = BlasColMajor;
|
||||
if ( Order == 'R' ) order = BlasRowMajor;
|
||||
if ( Trans == 'N' ) trans = BlasNoTrans;
|
||||
if ( Trans == 'T' ) trans = BlasTrans;
|
||||
if ( Trans == 'C' ) trans = BlasTransConj;
|
||||
if ( Trans == 'R' ) trans = BlasConj;
|
||||
|
||||
if ( order == BlasColMajor)
|
||||
{
|
||||
if ( trans == BlasNoTrans && *ldb < *rows ) info = 9;
|
||||
if ( trans == BlasConj && *ldb < *rows ) info = 9;
|
||||
if ( trans == BlasTrans && *ldb < *cols ) info = 9;
|
||||
if ( trans == BlasTransConj && *ldb < *cols ) info = 9;
|
||||
}
|
||||
if ( order == BlasRowMajor)
|
||||
{
|
||||
if ( trans == BlasNoTrans && *ldb < *cols ) info = 9;
|
||||
if ( trans == BlasConj && *ldb < *cols ) info = 9;
|
||||
if ( trans == BlasTrans && *ldb < *rows ) info = 9;
|
||||
if ( trans == BlasTransConj && *ldb < *rows ) info = 9;
|
||||
}
|
||||
|
||||
if ( order == BlasColMajor && *lda < *rows ) info = 7;
|
||||
if ( order == BlasRowMajor && *lda < *cols ) info = 7;
|
||||
if ( *cols <= 0 ) info = 4;
|
||||
if ( *rows <= 0 ) info = 3;
|
||||
if ( trans < 0 ) info = 2;
|
||||
if ( order < 0 ) info = 1;
|
||||
|
||||
if (info >= 0) {
|
||||
BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME));
|
||||
return;
|
||||
}
|
||||
|
||||
if ( order == BlasColMajor )
|
||||
{
|
||||
|
||||
if ( trans == BlasNoTrans )
|
||||
{
|
||||
OMATCOPY_K_CN(*rows, *cols, alpha[0], alpha[1], a, *lda, b, *ldb );
|
||||
return;
|
||||
}
|
||||
if ( trans == BlasConj )
|
||||
{
|
||||
OMATCOPY_K_CNC(*rows, *cols, alpha[0], alpha[1], a, *lda, b, *ldb );
|
||||
return;
|
||||
}
|
||||
if ( trans == BlasTrans )
|
||||
{
|
||||
OMATCOPY_K_CT(*rows, *cols, alpha[0], alpha[1], a, *lda, b, *ldb );
|
||||
return;
|
||||
}
|
||||
if ( trans == BlasTransConj )
|
||||
{
|
||||
OMATCOPY_K_CTC(*rows, *cols, alpha[0], alpha[1], a, *lda, b, *ldb );
|
||||
return;
|
||||
}
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
if ( trans == BlasNoTrans )
|
||||
{
|
||||
OMATCOPY_K_RN(*rows, *cols, alpha[0], alpha[1], a, *lda, b, *ldb );
|
||||
return;
|
||||
}
|
||||
if ( trans == BlasConj )
|
||||
{
|
||||
OMATCOPY_K_RNC(*rows, *cols, alpha[0], alpha[1], a, *lda, b, *ldb );
|
||||
return;
|
||||
}
|
||||
if ( trans == BlasTrans )
|
||||
{
|
||||
OMATCOPY_K_RT(*rows, *cols, alpha[0], alpha[1], a, *lda, b, *ldb );
|
||||
return;
|
||||
}
|
||||
if ( trans == BlasTransConj )
|
||||
{
|
||||
OMATCOPY_K_RTC(*rows, *cols, alpha[0], alpha[1], a, *lda, b, *ldb );
|
||||
return;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return;
|
||||
|
||||
}
|
||||
|
||||
|
|
@ -90,6 +90,9 @@ void CNAME(blasint n, FLOAT alpha_r, FLOAT *x, blasint incx){
|
|||
#ifdef SMP
|
||||
nthreads = num_cpu_avail(1);
|
||||
|
||||
if ( n <= 1048576 )
|
||||
nthreads = 1;
|
||||
|
||||
if (nthreads == 1) {
|
||||
#endif
|
||||
|
||||
|
|
|
@ -432,18 +432,38 @@ ifndef LSAME_KERNEL
|
|||
LSAME_KERNEL = lsame.S
|
||||
endif
|
||||
|
||||
### AXPBY ###
|
||||
|
||||
ifndef SAXPBYKERNEL
|
||||
SAXPBYKERNEL = ../arm/axpby.c
|
||||
endif
|
||||
|
||||
ifndef DAXPBYKERNEL
|
||||
DAXPBYKERNEL = ../arm/axpby.c
|
||||
endif
|
||||
|
||||
ifndef CAXPBYKERNEL
|
||||
CAXPBYKERNEL = ../arm/zaxpby.c
|
||||
endif
|
||||
|
||||
ifndef ZAXPBYKERNEL
|
||||
ZAXPBYKERNEL = ../arm/zaxpby.c
|
||||
endif
|
||||
|
||||
SBLASOBJS += \
|
||||
samax_k$(TSUFFIX).$(SUFFIX) samin_k$(TSUFFIX).$(SUFFIX) smax_k$(TSUFFIX).$(SUFFIX) smin_k$(TSUFFIX).$(SUFFIX) \
|
||||
isamax_k$(TSUFFIX).$(SUFFIX) isamin_k$(TSUFFIX).$(SUFFIX) ismax_k$(TSUFFIX).$(SUFFIX) ismin_k$(TSUFFIX).$(SUFFIX) \
|
||||
sasum_k$(TSUFFIX).$(SUFFIX) saxpy_k$(TSUFFIX).$(SUFFIX) scopy_k$(TSUFFIX).$(SUFFIX) \
|
||||
sdot_k$(TSUFFIX).$(SUFFIX) sdsdot_k$(TSUFFIX).$(SUFFIX) dsdot_k$(TSUFFIX).$(SUFFIX) \
|
||||
snrm2_k$(TSUFFIX).$(SUFFIX) srot_k$(TSUFFIX).$(SUFFIX) sscal_k$(TSUFFIX).$(SUFFIX) sswap_k$(TSUFFIX).$(SUFFIX)
|
||||
snrm2_k$(TSUFFIX).$(SUFFIX) srot_k$(TSUFFIX).$(SUFFIX) sscal_k$(TSUFFIX).$(SUFFIX) sswap_k$(TSUFFIX).$(SUFFIX) \
|
||||
saxpby_k$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
DBLASOBJS += \
|
||||
damax_k$(TSUFFIX).$(SUFFIX) damin_k$(TSUFFIX).$(SUFFIX) dmax_k$(TSUFFIX).$(SUFFIX) dmin_k$(TSUFFIX).$(SUFFIX) \
|
||||
idamax_k$(TSUFFIX).$(SUFFIX) idamin_k$(TSUFFIX).$(SUFFIX) idmax_k$(TSUFFIX).$(SUFFIX) idmin_k$(TSUFFIX).$(SUFFIX) \
|
||||
dasum_k$(TSUFFIX).$(SUFFIX) daxpy_k$(TSUFFIX).$(SUFFIX) dcopy_k$(TSUFFIX).$(SUFFIX) ddot_k$(TSUFFIX).$(SUFFIX) \
|
||||
dnrm2_k$(TSUFFIX).$(SUFFIX) drot_k$(TSUFFIX).$(SUFFIX) dscal_k$(TSUFFIX).$(SUFFIX) dswap_k$(TSUFFIX).$(SUFFIX)
|
||||
dnrm2_k$(TSUFFIX).$(SUFFIX) drot_k$(TSUFFIX).$(SUFFIX) dscal_k$(TSUFFIX).$(SUFFIX) dswap_k$(TSUFFIX).$(SUFFIX) \
|
||||
daxpby_k$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
QBLASOBJS += \
|
||||
qamax_k$(TSUFFIX).$(SUFFIX) qamin_k$(TSUFFIX).$(SUFFIX) qmax_k$(TSUFFIX).$(SUFFIX) qmin_k$(TSUFFIX).$(SUFFIX) \
|
||||
|
@ -455,13 +475,13 @@ CBLASOBJS += \
|
|||
camax_k$(TSUFFIX).$(SUFFIX) camin_k$(TSUFFIX).$(SUFFIX) icamax_k$(TSUFFIX).$(SUFFIX) icamin_k$(TSUFFIX).$(SUFFIX) \
|
||||
casum_k$(TSUFFIX).$(SUFFIX) caxpy_k$(TSUFFIX).$(SUFFIX) caxpyc_k$(TSUFFIX).$(SUFFIX) ccopy_k$(TSUFFIX).$(SUFFIX) \
|
||||
cdotc_k$(TSUFFIX).$(SUFFIX) cdotu_k$(TSUFFIX).$(SUFFIX) cnrm2_k$(TSUFFIX).$(SUFFIX) csrot_k$(TSUFFIX).$(SUFFIX) \
|
||||
cscal_k$(TSUFFIX).$(SUFFIX) cswap_k$(TSUFFIX).$(SUFFIX)
|
||||
cscal_k$(TSUFFIX).$(SUFFIX) cswap_k$(TSUFFIX).$(SUFFIX) caxpby_k$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
ZBLASOBJS += \
|
||||
zamax_k$(TSUFFIX).$(SUFFIX) zamin_k$(TSUFFIX).$(SUFFIX) izamax_k$(TSUFFIX).$(SUFFIX) izamin_k$(TSUFFIX).$(SUFFIX) \
|
||||
zasum_k$(TSUFFIX).$(SUFFIX) zaxpy_k$(TSUFFIX).$(SUFFIX) zaxpyc_k$(TSUFFIX).$(SUFFIX) zcopy_k$(TSUFFIX).$(SUFFIX) \
|
||||
zdotc_k$(TSUFFIX).$(SUFFIX) zdotu_k$(TSUFFIX).$(SUFFIX) znrm2_k$(TSUFFIX).$(SUFFIX) zdrot_k$(TSUFFIX).$(SUFFIX) \
|
||||
zscal_k$(TSUFFIX).$(SUFFIX) zswap_k$(TSUFFIX).$(SUFFIX)
|
||||
zscal_k$(TSUFFIX).$(SUFFIX) zswap_k$(TSUFFIX).$(SUFFIX) zaxpby_k$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
XBLASOBJS += \
|
||||
xamax_k$(TSUFFIX).$(SUFFIX) xamin_k$(TSUFFIX).$(SUFFIX) ixamax_k$(TSUFFIX).$(SUFFIX) ixamin_k$(TSUFFIX).$(SUFFIX) \
|
||||
|
@ -667,15 +687,27 @@ $(KDIR)ddot_k$(TSUFFIX).$(SUFFIX) $(KDIR)ddot_k$(TPSUFFIX).$(PSUFFIX) : $(KERNEL
|
|||
$(KDIR)qdot_k$(TSUFFIX).$(SUFFIX) $(KDIR)qdot_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(QDOTKERNEL)
|
||||
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE $< -o $@
|
||||
|
||||
$(KDIR)dsdot_k$(TSUFFIX).$(SUFFIX) $(KDIR)dsdot_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SDOTKERNEL)
|
||||
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DDSDOT $< -o $@
|
||||
|
||||
$(KDIR)sdot_k$(TSUFFIX).$(SUFFIX) $(KDIR)sdot_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SDOTKERNEL)
|
||||
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE $< -o $@
|
||||
|
||||
ifdef DSDOTKERNEL
|
||||
|
||||
$(KDIR)dsdot_k$(TSUFFIX).$(SUFFIX) $(KDIR)dsdot_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(DSDOTKERNEL)
|
||||
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DDSDOT $< -o $@
|
||||
|
||||
$(KDIR)sdsdot_k$(TSUFFIX).$(SUFFIX) $(KDIR)sdsdot_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(DSDOTKERNEL)
|
||||
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DDSDOT $< -o $@
|
||||
|
||||
else
|
||||
|
||||
$(KDIR)dsdot_k$(TSUFFIX).$(SUFFIX) $(KDIR)dsdot_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SDOTKERNEL)
|
||||
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DDSDOT $< -o $@
|
||||
|
||||
$(KDIR)sdsdot_k$(TSUFFIX).$(SUFFIX) $(KDIR)sdsdot_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SDOTKERNEL)
|
||||
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DDSDOT $< -o $@
|
||||
|
||||
endif
|
||||
|
||||
$(KDIR)zdotu_k$(TSUFFIX).$(SUFFIX) $(KDIR)zdotu_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(ZDOTKERNEL)
|
||||
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -UCONJ $< -o $@
|
||||
|
||||
|
@ -765,3 +797,17 @@ $(KDIR)zswap_k$(TSUFFIX).$(SUFFIX) $(KDIR)zswap_k$(TPSUFFIX).$(PSUFFIX) : $(KE
|
|||
|
||||
$(KDIR)xswap_k$(TSUFFIX).$(SUFFIX) $(KDIR)xswap_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(XSWAPKERNEL)
|
||||
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE $< -o $@
|
||||
|
||||
$(KDIR)saxpby_k$(TSUFFIX).$(SUFFIX) $(KDIR)saxpby_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SAXPBYKERNEL)
|
||||
$(CC) -c $(CFLAGS) -UCOMPLEX -UCOMPLEX -UDOUBLE $< -o $@
|
||||
|
||||
$(KDIR)daxpby_k$(TSUFFIX).$(SUFFIX) $(KDIR)daxpby_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(DAXPBYKERNEL)
|
||||
$(CC) -c $(CFLAGS) -UCOMPLEX -UCOMPLEX -DDOUBLE $< -o $@
|
||||
|
||||
$(KDIR)caxpby_k$(TSUFFIX).$(SUFFIX) $(KDIR)caxpby_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CAXPBYKERNEL)
|
||||
$(CC) -c $(CFLAGS) -DCOMPLEX -DCOMPLEX -UCONJ -UDOUBLE $< -o $@
|
||||
|
||||
$(KDIR)zaxpby_k$(TSUFFIX).$(SUFFIX) $(KDIR)zaxpby_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(ZAXPBYKERNEL)
|
||||
$(CC) -c $(CFLAGS) -DCOMPLEX -DCOMPLEX -UCONJ -DDOUBLE $< -o $@
|
||||
|
||||
|
||||
|
|
|
@ -320,6 +320,28 @@ XBLASOBJS += \
|
|||
|
||||
endif
|
||||
|
||||
###### BLAS extensions #####
|
||||
SBLASOBJS += \
|
||||
somatcopy_k_cn$(TSUFFIX).$(SUFFIX) somatcopy_k_rn$(TSUFFIX).$(SUFFIX) \
|
||||
somatcopy_k_ct$(TSUFFIX).$(SUFFIX) somatcopy_k_rt$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
DBLASOBJS += \
|
||||
domatcopy_k_cn$(TSUFFIX).$(SUFFIX) domatcopy_k_rn$(TSUFFIX).$(SUFFIX) \
|
||||
domatcopy_k_ct$(TSUFFIX).$(SUFFIX) domatcopy_k_rt$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
CBLASOBJS += \
|
||||
comatcopy_k_cn$(TSUFFIX).$(SUFFIX) comatcopy_k_rn$(TSUFFIX).$(SUFFIX) \
|
||||
comatcopy_k_ct$(TSUFFIX).$(SUFFIX) comatcopy_k_rt$(TSUFFIX).$(SUFFIX) \
|
||||
comatcopy_k_cnc$(TSUFFIX).$(SUFFIX) comatcopy_k_rnc$(TSUFFIX).$(SUFFIX) \
|
||||
comatcopy_k_ctc$(TSUFFIX).$(SUFFIX) comatcopy_k_rtc$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
ZBLASOBJS += \
|
||||
zomatcopy_k_cn$(TSUFFIX).$(SUFFIX) zomatcopy_k_rn$(TSUFFIX).$(SUFFIX) \
|
||||
zomatcopy_k_ct$(TSUFFIX).$(SUFFIX) zomatcopy_k_rt$(TSUFFIX).$(SUFFIX) \
|
||||
zomatcopy_k_cnc$(TSUFFIX).$(SUFFIX) zomatcopy_k_rnc$(TSUFFIX).$(SUFFIX) \
|
||||
zomatcopy_k_ctc$(TSUFFIX).$(SUFFIX) zomatcopy_k_rtc$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
|
||||
SGEMMINCOPYOBJ_P = $(SGEMMINCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX))
|
||||
SGEMMITCOPYOBJ_P = $(SGEMMITCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX))
|
||||
SGEMMONCOPYOBJ_P = $(SGEMMONCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX))
|
||||
|
@ -3237,3 +3259,178 @@ $(KDIR)xtrsm_oltucopy$(TSUFFIX).$(PSUFFIX) : generic/ztrsm_ltcopy_$(XGEMM_UNROLL
|
|||
|
||||
$(KDIR)xtrsm_oltncopy$(TSUFFIX).$(PSUFFIX) : generic/ztrsm_ltcopy_$(XGEMM_UNROLL_N).c
|
||||
$(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -DCOMPLEX -DOUTER -DLOWER -UUNIT $< -o $@
|
||||
|
||||
|
||||
##### BLAS extensions ######
|
||||
|
||||
ifndef DOMATCOPY_CN
|
||||
DOMATCOPY_CN = ../arm/omatcopy_cn.c
|
||||
endif
|
||||
|
||||
$(KDIR)domatcopy_k_cn$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DOMATCOPY_CN)
|
||||
$(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX -UROWM $< -o $@
|
||||
|
||||
ifndef DOMATCOPY_RN
|
||||
DOMATCOPY_RN = ../arm/omatcopy_rn.c
|
||||
endif
|
||||
|
||||
$(KDIR)domatcopy_k_rn$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DOMATCOPY_RN)
|
||||
$(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX -DROWM $< -o $@
|
||||
|
||||
ifndef DOMATCOPY_CT
|
||||
DOMATCOPY_CT = ../arm/omatcopy_ct.c
|
||||
endif
|
||||
|
||||
$(KDIR)domatcopy_k_ct$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DOMATCOPY_CT)
|
||||
$(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX -UROWM $< -o $@
|
||||
|
||||
ifndef DOMATCOPY_RT
|
||||
DOMATCOPY_RT = ../arm/omatcopy_rt.c
|
||||
endif
|
||||
|
||||
$(KDIR)domatcopy_k_rt$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DOMATCOPY_RT)
|
||||
$(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX -DROWM $< -o $@
|
||||
|
||||
ifndef SOMATCOPY_CN
|
||||
SOMATCOPY_CN = ../arm/omatcopy_cn.c
|
||||
endif
|
||||
|
||||
$(KDIR)somatcopy_k_cn$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SOMATCOPY_CN)
|
||||
$(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX -UROWM $< -o $@
|
||||
|
||||
ifndef SOMATCOPY_RN
|
||||
SOMATCOPY_RN = ../arm/omatcopy_rn.c
|
||||
endif
|
||||
|
||||
$(KDIR)somatcopy_k_rn$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SOMATCOPY_RN)
|
||||
$(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX -DROWM $< -o $@
|
||||
|
||||
ifndef SOMATCOPY_CT
|
||||
SOMATCOPY_CT = ../arm/omatcopy_ct.c
|
||||
endif
|
||||
|
||||
$(KDIR)somatcopy_k_ct$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SOMATCOPY_CT)
|
||||
$(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX -UROWM $< -o $@
|
||||
|
||||
ifndef SOMATCOPY_RT
|
||||
SOMATCOPY_RT = ../arm/omatcopy_rt.c
|
||||
endif
|
||||
|
||||
$(KDIR)somatcopy_k_rt$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SOMATCOPY_RT)
|
||||
$(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX -DROWM $< -o $@
|
||||
|
||||
|
||||
ifndef COMATCOPY_CN
|
||||
COMATCOPY_CN = ../arm/zomatcopy_cn.c
|
||||
endif
|
||||
|
||||
$(KDIR)comatcopy_k_cn$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(COMATCOPY_CN)
|
||||
$(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -UROWM -UCONJ $< -o $@
|
||||
|
||||
ifndef COMATCOPY_RN
|
||||
COMATCOPY_RN = ../arm/zomatcopy_rn.c
|
||||
endif
|
||||
|
||||
$(KDIR)comatcopy_k_rn$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(COMATCOPY_RN)
|
||||
$(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DROWM -UCONJ $< -o $@
|
||||
|
||||
ifndef COMATCOPY_CT
|
||||
COMATCOPY_CT = ../arm/zomatcopy_ct.c
|
||||
endif
|
||||
|
||||
$(KDIR)comatcopy_k_ct$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(COMATCOPY_CT)
|
||||
$(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -UROWM -UCONJ $< -o $@
|
||||
|
||||
ifndef COMATCOPY_RT
|
||||
COMATCOPY_RT = ../arm/zomatcopy_rt.c
|
||||
endif
|
||||
|
||||
$(KDIR)comatcopy_k_rt$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(COMATCOPY_RT)
|
||||
$(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DROWM -UCONJ $< -o $@
|
||||
|
||||
ifndef COMATCOPY_CNC
|
||||
COMATCOPY_CNC = ../arm/zomatcopy_cnc.c
|
||||
endif
|
||||
|
||||
$(KDIR)comatcopy_k_cnc$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(COMATCOPY_CNC)
|
||||
$(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -UROWM -DCONJ $< -o $@
|
||||
|
||||
ifndef COMATCOPY_RNC
|
||||
COMATCOPY_RNC = ../arm/zomatcopy_rnc.c
|
||||
endif
|
||||
|
||||
$(KDIR)comatcopy_k_rnc$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(COMATCOPY_RNC)
|
||||
$(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DROWM -DCONJ $< -o $@
|
||||
|
||||
ifndef COMATCOPY_CTC
|
||||
COMATCOPY_CTC = ../arm/zomatcopy_ctc.c
|
||||
endif
|
||||
|
||||
$(KDIR)comatcopy_k_ctc$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(COMATCOPY_CTC)
|
||||
$(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -UROWM -DCONJ $< -o $@
|
||||
|
||||
ifndef COMATCOPY_RTC
|
||||
COMATCOPY_RTC = ../arm/zomatcopy_rtc.c
|
||||
endif
|
||||
|
||||
$(KDIR)comatcopy_k_rtc$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(COMATCOPY_RTC)
|
||||
$(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DROWM -DCONJ $< -o $@
|
||||
|
||||
|
||||
ifndef ZOMATCOPY_CN
|
||||
ZOMATCOPY_CN = ../arm/zomatcopy_cn.c
|
||||
endif
|
||||
|
||||
$(KDIR)zomatcopy_k_cn$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZOMATCOPY_CN)
|
||||
$(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -UROWM -UCONJ $< -o $@
|
||||
|
||||
ifndef ZOMATCOPY_RN
|
||||
ZOMATCOPY_RN = ../arm/zomatcopy_rn.c
|
||||
endif
|
||||
|
||||
$(KDIR)zomatcopy_k_rn$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZOMATCOPY_RN)
|
||||
$(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DROWM -UCONJ $< -o $@
|
||||
|
||||
ifndef ZOMATCOPY_CT
|
||||
ZOMATCOPY_CT = ../arm/zomatcopy_ct.c
|
||||
endif
|
||||
|
||||
$(KDIR)zomatcopy_k_ct$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZOMATCOPY_CT)
|
||||
$(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -UROWM -UCONJ $< -o $@
|
||||
|
||||
ifndef ZOMATCOPY_RT
|
||||
ZOMATCOPY_RT = ../arm/zomatcopy_rt.c
|
||||
endif
|
||||
|
||||
$(KDIR)zomatcopy_k_rt$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZOMATCOPY_RT)
|
||||
$(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DROWM -UCONJ $< -o $@
|
||||
|
||||
ifndef ZOMATCOPY_CNC
|
||||
ZOMATCOPY_CNC = ../arm/zomatcopy_cnc.c
|
||||
endif
|
||||
|
||||
$(KDIR)zomatcopy_k_cnc$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZOMATCOPY_CNC)
|
||||
$(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -UROWM -DCONJ $< -o $@
|
||||
|
||||
ifndef ZOMATCOPY_RNC
|
||||
ZOMATCOPY_RNC = ../arm/zomatcopy_rnc.c
|
||||
endif
|
||||
|
||||
$(KDIR)zomatcopy_k_rnc$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZOMATCOPY_RNC)
|
||||
$(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DROWM -DCONJ $< -o $@
|
||||
|
||||
ifndef ZOMATCOPY_CTC
|
||||
ZOMATCOPY_CTC = ../arm/zomatcopy_ctc.c
|
||||
endif
|
||||
|
||||
$(KDIR)zomatcopy_k_ctc$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZOMATCOPY_CTC)
|
||||
$(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -UROWM -DCONJ $< -o $@
|
||||
|
||||
ifndef ZOMATCOPY_RTC
|
||||
ZOMATCOPY_RTC = ../arm/zomatcopy_rtc.c
|
||||
endif
|
||||
|
||||
$(KDIR)zomatcopy_k_rtc$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZOMATCOPY_RTC)
|
||||
$(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DROWM -DCONJ $< -o $@
|
||||
|
||||
|
||||
|
|
|
@ -0,0 +1,96 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2013, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
|
||||
#include "common.h"
|
||||
|
||||
int CNAME(BLASLONG n, FLOAT alpha, FLOAT *x, BLASLONG inc_x, FLOAT beta, FLOAT *y, BLASLONG inc_y)
|
||||
{
|
||||
BLASLONG i=0;
|
||||
BLASLONG ix,iy;
|
||||
|
||||
if ( n < 0 ) return(0);
|
||||
|
||||
ix = 0;
|
||||
iy = 0;
|
||||
|
||||
if ( beta == 0.0 )
|
||||
{
|
||||
|
||||
if ( alpha == 0.0 )
|
||||
{
|
||||
while(i < n)
|
||||
{
|
||||
y[iy] = 0.0 ;
|
||||
iy += inc_y ;
|
||||
i++ ;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
while(i < n)
|
||||
{
|
||||
y[iy] = alpha * x[ix] ;
|
||||
ix += inc_x ;
|
||||
iy += inc_y ;
|
||||
i++ ;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
if ( alpha == 0.0 )
|
||||
{
|
||||
while(i < n)
|
||||
{
|
||||
y[iy] = beta * y[iy] ;
|
||||
iy += inc_y ;
|
||||
i++ ;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
while(i < n)
|
||||
{
|
||||
y[iy] = alpha * x[ix] + beta * y[iy] ;
|
||||
ix += inc_x ;
|
||||
iy += inc_y ;
|
||||
i++ ;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return(0);
|
||||
|
||||
}
|
||||
|
||||
|
|
@ -61,7 +61,7 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLO
|
|||
a_ptr += lda;
|
||||
ix += inc_x;
|
||||
}
|
||||
|
||||
return(0);
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -61,6 +61,7 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLO
|
|||
iy += inc_y;
|
||||
a_ptr += lda;
|
||||
}
|
||||
return(0);
|
||||
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,90 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2013, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
|
||||
/*****************************************************
|
||||
* 2014/06/09 Saar
|
||||
*
|
||||
* Order ColMajor
|
||||
* No Trans
|
||||
*
|
||||
******************************************************/
|
||||
|
||||
int CNAME(BLASLONG rows, BLASLONG cols, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG ldb)
|
||||
{
|
||||
BLASLONG i,j;
|
||||
FLOAT *aptr,*bptr;
|
||||
|
||||
if ( rows <= 0 ) return(0);
|
||||
if ( cols <= 0 ) return(0);
|
||||
|
||||
aptr = a;
|
||||
bptr = b;
|
||||
|
||||
if ( alpha == 0.0 )
|
||||
{
|
||||
for ( i=0; i<cols ; i++ )
|
||||
{
|
||||
for(j=0; j<rows; j++)
|
||||
{
|
||||
bptr[j] = 0.0;
|
||||
}
|
||||
bptr += ldb;
|
||||
}
|
||||
return(0);
|
||||
}
|
||||
|
||||
if ( alpha == 1.0 )
|
||||
{
|
||||
for ( i=0; i<cols ; i++ )
|
||||
{
|
||||
for(j=0; j<rows; j++)
|
||||
{
|
||||
bptr[j] = aptr[j];
|
||||
}
|
||||
aptr += lda;
|
||||
bptr += ldb;
|
||||
}
|
||||
return(0);
|
||||
}
|
||||
|
||||
for ( i=0; i<cols ; i++ )
|
||||
{
|
||||
for(j=0; j<rows; j++)
|
||||
{
|
||||
bptr[j] = alpha * aptr[j];
|
||||
}
|
||||
aptr += lda;
|
||||
bptr += ldb;
|
||||
}
|
||||
|
||||
return(0);
|
||||
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,89 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2013, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
|
||||
/*****************************************************
|
||||
* 2014/06/09 Saar
|
||||
*
|
||||
* Order ColMajor
|
||||
* Trans
|
||||
*
|
||||
******************************************************/
|
||||
|
||||
int CNAME(BLASLONG rows, BLASLONG cols, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG ldb)
|
||||
{
|
||||
BLASLONG i,j;
|
||||
FLOAT *aptr,*bptr;
|
||||
|
||||
if ( rows <= 0 ) return(0);
|
||||
if ( cols <= 0 ) return(0);
|
||||
|
||||
aptr = a;
|
||||
|
||||
if ( alpha == 0.0 )
|
||||
{
|
||||
for ( i=0; i<cols ; i++ )
|
||||
{
|
||||
bptr = &b[i];
|
||||
for(j=0; j<rows; j++)
|
||||
{
|
||||
bptr[j*ldb] = 0.0;
|
||||
}
|
||||
}
|
||||
return(0);
|
||||
}
|
||||
|
||||
if ( alpha == 1.0 )
|
||||
{
|
||||
for ( i=0; i<cols ; i++ )
|
||||
{
|
||||
bptr = &b[i];
|
||||
for(j=0; j<rows; j++)
|
||||
{
|
||||
bptr[j*ldb] = aptr[j];
|
||||
}
|
||||
aptr += lda;
|
||||
}
|
||||
return(0);
|
||||
}
|
||||
|
||||
for ( i=0; i<cols ; i++ )
|
||||
{
|
||||
bptr = &b[i];
|
||||
for(j=0; j<rows; j++)
|
||||
{
|
||||
bptr[j*ldb] = alpha * aptr[j];
|
||||
}
|
||||
aptr += lda;
|
||||
}
|
||||
|
||||
return(0);
|
||||
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,90 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2013, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
|
||||
/*****************************************************
|
||||
* 2014/06/09 Saar
|
||||
*
|
||||
* Order rowMajor
|
||||
* No Trans
|
||||
*
|
||||
******************************************************/
|
||||
|
||||
int CNAME(BLASLONG rows, BLASLONG cols, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG ldb)
|
||||
{
|
||||
BLASLONG i,j;
|
||||
FLOAT *aptr,*bptr;
|
||||
|
||||
if ( rows <= 0 ) return(0);
|
||||
if ( cols <= 0 ) return(0);
|
||||
|
||||
aptr = a;
|
||||
bptr = b;
|
||||
|
||||
if ( alpha == 0.0 )
|
||||
{
|
||||
for ( i=0; i<rows ; i++ )
|
||||
{
|
||||
for(j=0; j<cols; j++)
|
||||
{
|
||||
bptr[j] = 0.0;
|
||||
}
|
||||
bptr += ldb;
|
||||
}
|
||||
return(0);
|
||||
}
|
||||
|
||||
if ( alpha == 1.0 )
|
||||
{
|
||||
for ( i=0; i<rows ; i++ )
|
||||
{
|
||||
for(j=0; j<cols; j++)
|
||||
{
|
||||
bptr[j] = aptr[j];
|
||||
}
|
||||
aptr += lda;
|
||||
bptr += ldb;
|
||||
}
|
||||
return(0);
|
||||
}
|
||||
|
||||
for ( i=0; i<rows ; i++ )
|
||||
{
|
||||
for(j=0; j<cols; j++)
|
||||
{
|
||||
bptr[j] = alpha * aptr[j];
|
||||
}
|
||||
aptr += lda;
|
||||
bptr += ldb;
|
||||
}
|
||||
|
||||
return(0);
|
||||
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,62 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2013, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
|
||||
/*****************************************************
|
||||
* 2014/06/09 Saar
|
||||
*
|
||||
* Order rowMajor
|
||||
* Trans
|
||||
*
|
||||
******************************************************/
|
||||
|
||||
int CNAME(BLASLONG rows, BLASLONG cols, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG ldb)
|
||||
{
|
||||
BLASLONG i,j;
|
||||
FLOAT *aptr,*bptr;
|
||||
|
||||
if ( rows <= 0 ) return(0);
|
||||
if ( cols <= 0 ) return(0);
|
||||
|
||||
aptr = a;
|
||||
|
||||
for ( i=0; i<rows ; i++ )
|
||||
{
|
||||
bptr = &b[i];
|
||||
for(j=0; j<cols; j++)
|
||||
{
|
||||
bptr[j*ldb] = alpha * aptr[j];
|
||||
}
|
||||
aptr += lda;
|
||||
}
|
||||
|
||||
return(0);
|
||||
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,117 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2013, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
/***************************************************************************
|
||||
* 2014/06/07 Saar
|
||||
*
|
||||
***************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
|
||||
int CNAME(BLASLONG n, FLOAT alpha_r, FLOAT alpha_i, FLOAT *x, BLASLONG inc_x, FLOAT beta_r, FLOAT beta_i,FLOAT *y, BLASLONG inc_y)
|
||||
{
|
||||
BLASLONG i=0;
|
||||
BLASLONG ix,iy;
|
||||
FLOAT temp;
|
||||
|
||||
if ( n < 0 ) return(0);
|
||||
|
||||
ix = 0;
|
||||
iy = 0;
|
||||
|
||||
BLASLONG inc_x2 = 2 * inc_x;
|
||||
BLASLONG inc_y2 = 2 * inc_y;
|
||||
|
||||
if ( beta_r == 0.0 && beta_i == 0.0)
|
||||
{
|
||||
if ( alpha_r == 0.0 && alpha_i == 0.0 )
|
||||
{
|
||||
|
||||
while(i < n)
|
||||
{
|
||||
y[iy] = 0.0 ;
|
||||
y[iy+1] = 0.0 ;
|
||||
iy += inc_y2 ;
|
||||
i++ ;
|
||||
}
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
while(i < n)
|
||||
{
|
||||
y[iy] = ( alpha_r * x[ix] - alpha_i * x[ix+1] ) ;
|
||||
y[iy+1] = ( alpha_r * x[ix+1] + alpha_i * x[ix] ) ;
|
||||
ix += inc_x2 ;
|
||||
iy += inc_y2 ;
|
||||
i++ ;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
if ( alpha_r == 0.0 && alpha_i == 0.0 )
|
||||
{
|
||||
|
||||
while(i < n)
|
||||
{
|
||||
temp = ( beta_r * y[iy] - beta_i * y[iy+1] ) ;
|
||||
y[iy+1] = ( beta_r * y[iy+1] + beta_i * y[iy] ) ;
|
||||
y[iy] = temp;
|
||||
iy += inc_y2 ;
|
||||
i++ ;
|
||||
}
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
while(i < n)
|
||||
{
|
||||
temp = ( alpha_r * x[ix] - alpha_i * x[ix+1] ) + ( beta_r * y[iy] - beta_i * y[iy+1] ) ;
|
||||
y[iy+1] = ( alpha_r * x[ix+1] + alpha_i * x[ix] ) + ( beta_r * y[iy+1] + beta_i * y[iy] ) ;
|
||||
y[iy] = temp;
|
||||
ix += inc_x2 ;
|
||||
iy += inc_y2 ;
|
||||
i++ ;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
return(0);
|
||||
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,70 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2013, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
|
||||
/*****************************************************
|
||||
* 2014/06/09 Saar
|
||||
*
|
||||
* Order ColMajor
|
||||
* No Trans
|
||||
*
|
||||
******************************************************/
|
||||
|
||||
int CNAME(BLASLONG rows, BLASLONG cols, FLOAT alpha_r, FLOAT alpha_i, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG ldb)
|
||||
{
|
||||
BLASLONG i,j,ia;
|
||||
FLOAT *aptr,*bptr;
|
||||
|
||||
if ( rows <= 0 ) return(0);
|
||||
if ( cols <= 0 ) return(0);
|
||||
|
||||
aptr = a;
|
||||
bptr = b;
|
||||
|
||||
lda *= 2;
|
||||
ldb *= 2;
|
||||
|
||||
for ( i=0; i<cols ; i++ )
|
||||
{
|
||||
ia = 0;
|
||||
|
||||
for(j=0; j<rows; j++)
|
||||
{
|
||||
bptr[ia] = alpha_r * aptr[ia] - alpha_i * aptr[ia+1];
|
||||
bptr[ia+1] = alpha_r * aptr[ia+1] + alpha_i * aptr[ia];
|
||||
ia+=2;
|
||||
}
|
||||
aptr += lda;
|
||||
bptr += ldb;
|
||||
}
|
||||
|
||||
return(0);
|
||||
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,69 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2013, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
|
||||
/*****************************************************
|
||||
* 2014/06/09 Saar
|
||||
*
|
||||
* Order ColMajor
|
||||
* No Trans, conjugate
|
||||
*
|
||||
******************************************************/
|
||||
|
||||
int CNAME(BLASLONG rows, BLASLONG cols, FLOAT alpha_r, FLOAT alpha_i, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG ldb)
|
||||
{
|
||||
BLASLONG i,j,ia;
|
||||
FLOAT *aptr,*bptr;
|
||||
|
||||
if ( rows <= 0 ) return(0);
|
||||
if ( cols <= 0 ) return(0);
|
||||
|
||||
aptr = a;
|
||||
bptr = b;
|
||||
lda *= 2;
|
||||
ldb *= 2;
|
||||
|
||||
for ( i=0; i<cols ; i++ )
|
||||
{
|
||||
ia = 0;
|
||||
|
||||
for(j=0; j<rows; j++)
|
||||
{
|
||||
bptr[ia] = alpha_r * aptr[ia] + alpha_i * aptr[ia+1];
|
||||
bptr[ia+1] = - alpha_r * aptr[ia+1] + alpha_i * aptr[ia];
|
||||
ia += 2;
|
||||
}
|
||||
aptr += lda;
|
||||
bptr += ldb;
|
||||
}
|
||||
|
||||
return(0);
|
||||
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,71 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2013, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
|
||||
/*****************************************************
|
||||
* 2014/06/09 Saar
|
||||
*
|
||||
* Order ColMajor
|
||||
* Trans
|
||||
*
|
||||
******************************************************/
|
||||
|
||||
int CNAME(BLASLONG rows, BLASLONG cols, FLOAT alpha_r, FLOAT alpha_i, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG ldb)
|
||||
{
|
||||
BLASLONG i,j,ia,ib;
|
||||
FLOAT *aptr,*bptr;
|
||||
|
||||
if ( rows <= 0 ) return(0);
|
||||
if ( cols <= 0 ) return(0);
|
||||
|
||||
aptr = a;
|
||||
|
||||
lda *= 2;
|
||||
ldb *= 2;
|
||||
ib = 0;
|
||||
for ( i=0; i<cols ; i++ )
|
||||
{
|
||||
bptr = &b[ib];
|
||||
ia = 0;
|
||||
|
||||
for(j=0; j<rows; j++)
|
||||
{
|
||||
bptr[0] = alpha_r * aptr[ia] - alpha_i * aptr[ia+1];
|
||||
bptr[1] = alpha_r * aptr[ia+1] + alpha_i * aptr[ia];
|
||||
ia += 2;
|
||||
bptr += ldb;
|
||||
}
|
||||
aptr += lda;
|
||||
ib += 2;
|
||||
}
|
||||
|
||||
return(0);
|
||||
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,71 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2013, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
|
||||
/*****************************************************
|
||||
* 2014/06/09 Saar
|
||||
*
|
||||
* Order ColMajor
|
||||
* Trans, conjugate
|
||||
*
|
||||
******************************************************/
|
||||
|
||||
int CNAME(BLASLONG rows, BLASLONG cols, FLOAT alpha_r, FLOAT alpha_i, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG ldb)
|
||||
{
|
||||
BLASLONG i,j,ia,ib;
|
||||
FLOAT *aptr,*bptr;
|
||||
|
||||
if ( rows <= 0 ) return(0);
|
||||
if ( cols <= 0 ) return(0);
|
||||
|
||||
aptr = a;
|
||||
|
||||
lda *= 2;
|
||||
ldb *= 2;
|
||||
ib = 0;
|
||||
for ( i=0; i<cols ; i++ )
|
||||
{
|
||||
bptr = &b[ib];
|
||||
ia = 0;
|
||||
|
||||
for(j=0; j<rows; j++)
|
||||
{
|
||||
bptr[0] = alpha_r * aptr[ia] + alpha_i * aptr[ia+1];
|
||||
bptr[1] = - alpha_r * aptr[ia+1] + alpha_i * aptr[ia];
|
||||
ia += 2;
|
||||
bptr += ldb;
|
||||
}
|
||||
aptr += lda;
|
||||
ib += 2;
|
||||
}
|
||||
|
||||
return(0);
|
||||
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,70 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2013, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
|
||||
/*****************************************************
|
||||
* 2014/06/09 Saar
|
||||
*
|
||||
* Order rowMajor
|
||||
* No Trans
|
||||
*
|
||||
******************************************************/
|
||||
|
||||
int CNAME(BLASLONG rows, BLASLONG cols, FLOAT alpha_r, FLOAT alpha_i, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG ldb)
|
||||
{
|
||||
BLASLONG i,j,ia;
|
||||
FLOAT *aptr,*bptr;
|
||||
|
||||
if ( rows <= 0 ) return(0);
|
||||
if ( cols <= 0 ) return(0);
|
||||
|
||||
aptr = a;
|
||||
bptr = b;
|
||||
|
||||
lda *=2;
|
||||
ldb *=2;
|
||||
|
||||
for ( i=0; i<rows ; i++ )
|
||||
{
|
||||
ia = 0;
|
||||
|
||||
for(j=0; j<cols; j++)
|
||||
{
|
||||
bptr[ia] = alpha_r * aptr[ia] - alpha_i * aptr[ia+1];
|
||||
bptr[ia+1] = alpha_r * aptr[ia+1] + alpha_i * aptr[ia];
|
||||
ia += 2;
|
||||
}
|
||||
aptr += lda;
|
||||
bptr += ldb;
|
||||
}
|
||||
|
||||
return(0);
|
||||
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,69 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2013, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
|
||||
/*****************************************************
|
||||
* 2014/06/09 Saar
|
||||
*
|
||||
* Order rowMajor
|
||||
* No Trans , conjugate
|
||||
*
|
||||
******************************************************/
|
||||
|
||||
int CNAME(BLASLONG rows, BLASLONG cols, FLOAT alpha_r, FLOAT alpha_i, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG ldb)
|
||||
{
|
||||
BLASLONG i,j,ia;
|
||||
FLOAT *aptr,*bptr;
|
||||
|
||||
if ( rows <= 0 ) return(0);
|
||||
if ( cols <= 0 ) return(0);
|
||||
|
||||
aptr = a;
|
||||
bptr = b;
|
||||
|
||||
lda *=2;
|
||||
ldb *=2;
|
||||
|
||||
for ( i=0; i<rows ; i++ )
|
||||
{
|
||||
ia = 0;
|
||||
for(j=0; j<cols; j++)
|
||||
{
|
||||
bptr[ia] = alpha_r * aptr[ia] + alpha_i * aptr[ia+1];
|
||||
bptr[ia+1] = - alpha_r * aptr[ia+1] + alpha_i * aptr[ia];
|
||||
ia += 2;
|
||||
}
|
||||
aptr += lda;
|
||||
bptr += ldb;
|
||||
}
|
||||
|
||||
return(0);
|
||||
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,72 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2013, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
|
||||
/*****************************************************
|
||||
* 2014/06/09 Saar
|
||||
*
|
||||
* Order rowMajor
|
||||
* Trans
|
||||
*
|
||||
******************************************************/
|
||||
|
||||
int CNAME(BLASLONG rows, BLASLONG cols, FLOAT alpha_r, FLOAT alpha_i, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG ldb)
|
||||
{
|
||||
BLASLONG i,j,ia,ib;
|
||||
FLOAT *aptr,*bptr;
|
||||
|
||||
if ( rows <= 0 ) return(0);
|
||||
if ( cols <= 0 ) return(0);
|
||||
|
||||
aptr = a;
|
||||
|
||||
lda *= 2;
|
||||
ldb *= 2;
|
||||
ib = 0;
|
||||
|
||||
for ( i=0; i<rows ; i++ )
|
||||
{
|
||||
bptr = &b[ib];
|
||||
ia = 0;
|
||||
|
||||
for(j=0; j<cols; j++)
|
||||
{
|
||||
bptr[0] = alpha_r * aptr[ia] - alpha_i * aptr[ia+1];
|
||||
bptr[1] = alpha_r * aptr[ia+1] + alpha_i * aptr[ia];
|
||||
ia += 2;
|
||||
bptr += ldb;
|
||||
}
|
||||
aptr += lda;
|
||||
ib += 2;
|
||||
}
|
||||
|
||||
return(0);
|
||||
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,72 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2013, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
|
||||
/*****************************************************
|
||||
* 2014/06/09 Saar
|
||||
*
|
||||
* Order rowMajor
|
||||
* Trans, conjugate
|
||||
*
|
||||
******************************************************/
|
||||
|
||||
int CNAME(BLASLONG rows, BLASLONG cols, FLOAT alpha_r, FLOAT alpha_i, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG ldb)
|
||||
{
|
||||
BLASLONG i,j,ia,ib;
|
||||
FLOAT *aptr,*bptr;
|
||||
|
||||
if ( rows <= 0 ) return(0);
|
||||
if ( cols <= 0 ) return(0);
|
||||
|
||||
aptr = a;
|
||||
|
||||
lda *= 2;
|
||||
ldb *= 2;
|
||||
ib = 0;
|
||||
|
||||
for ( i=0; i<rows ; i++ )
|
||||
{
|
||||
bptr = &b[ib];
|
||||
ia = 0;
|
||||
|
||||
for(j=0; j<cols; j++)
|
||||
{
|
||||
bptr[0] = alpha_r * aptr[ia] + alpha_i * aptr[ia+1];
|
||||
bptr[1] = - alpha_r * aptr[ia+1] + alpha_i * aptr[ia];
|
||||
ia += 2;
|
||||
bptr += ldb;
|
||||
}
|
||||
aptr += lda;
|
||||
ib += 2;
|
||||
}
|
||||
|
||||
return(0);
|
||||
|
||||
}
|
||||
|
||||
|
|
@ -500,6 +500,16 @@ gotoblas_t TABLE_NAME = {
|
|||
|
||||
SNUMOPT, DNUMOPT, QNUMOPT,
|
||||
|
||||
saxpby_kTS, daxpby_kTS, caxpby_kTS, zaxpby_kTS,
|
||||
|
||||
somatcopy_k_cnTS, somatcopy_k_ctTS, somatcopy_k_rnTS, somatcopy_k_rtTS,
|
||||
domatcopy_k_cnTS, domatcopy_k_ctTS, domatcopy_k_rnTS, domatcopy_k_rtTS,
|
||||
comatcopy_k_cnTS, comatcopy_k_ctTS, comatcopy_k_rnTS, comatcopy_k_rtTS,
|
||||
comatcopy_k_cncTS, comatcopy_k_ctcTS, comatcopy_k_rncTS, comatcopy_k_rtcTS,
|
||||
zomatcopy_k_cnTS, zomatcopy_k_ctTS, zomatcopy_k_rnTS, zomatcopy_k_rtTS,
|
||||
zomatcopy_k_cncTS, zomatcopy_k_ctcTS, zomatcopy_k_rncTS, zomatcopy_k_rtcTS
|
||||
|
||||
|
||||
};
|
||||
|
||||
#ifdef ARCH_X86
|
||||
|
|
|
@ -119,9 +119,15 @@ XCOPYKERNEL = zcopy.S
|
|||
endif
|
||||
|
||||
ifndef SDOTKERNEL
|
||||
SDOTKERNEL = ../arm/dot.c
|
||||
SDOTKERNEL = dot_sse.S
|
||||
endif
|
||||
|
||||
|
||||
ifndef DSDOTKERNEL
|
||||
DSDOTKERNEL = ../arm/dot.c
|
||||
endif
|
||||
|
||||
|
||||
ifndef DDOTKERNEL
|
||||
DDOTKERNEL = dot_sse2.S
|
||||
endif
|
||||
|
|
|
@ -1,5 +1,8 @@
|
|||
SGEMVNKERNEL = sgemv_n.S
|
||||
SGEMVTKERNEL = sgemv_t.S
|
||||
|
||||
ZGEMVNKERNEL = zgemv_n_dup.S
|
||||
ZGEMVTKERNEL = zgemv_t_dup.S
|
||||
ZGEMVTKERNEL = zgemv_t.S
|
||||
|
||||
SGEMMKERNEL = gemm_kernel_8x4_barcelona.S
|
||||
SGEMMINCOPY = ../generic/gemm_ncopy_8.c
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
ZGEMVNKERNEL = zgemv_n_dup.S
|
||||
ZGEMVTKERNEL = zgemv_t_dup.S
|
||||
ZGEMVTKERNEL = zgemv_t.S
|
||||
|
||||
SGEMMKERNEL = gemm_kernel_8x4_barcelona.S
|
||||
SGEMMINCOPY = ../generic/gemm_ncopy_8.c
|
||||
|
|
|
@ -1,5 +1,8 @@
|
|||
SGEMVNKERNEL = sgemv_n.S
|
||||
SGEMVTKERNEL = sgemv_t.S
|
||||
|
||||
ZGEMVNKERNEL = zgemv_n_dup.S
|
||||
ZGEMVTKERNEL = zgemv_t_dup.S
|
||||
ZGEMVTKERNEL = zgemv_t.S
|
||||
|
||||
DGEMVNKERNEL = dgemv_n_bulldozer.S
|
||||
DGEMVTKERNEL = dgemv_t_bulldozer.S
|
||||
|
|
|
@ -1,3 +1,7 @@
|
|||
SGEMVNKERNEL = sgemv_n.S
|
||||
SGEMVTKERNEL = sgemv_t.S
|
||||
|
||||
|
||||
SGEMMKERNEL = sgemm_kernel_16x4_haswell.S
|
||||
SGEMMINCOPY = ../generic/gemm_ncopy_16.c
|
||||
SGEMMITCOPY = ../generic/gemm_tcopy_16.c
|
||||
|
|
|
@ -1,3 +1,7 @@
|
|||
SGEMVNKERNEL = sgemv_n.S
|
||||
SGEMVTKERNEL = sgemv_t.S
|
||||
|
||||
|
||||
SGEMMKERNEL = gemm_kernel_4x8_nehalem.S
|
||||
SGEMMINCOPY = gemm_ncopy_4.S
|
||||
SGEMMITCOPY = gemm_tcopy_4.S
|
||||
|
@ -9,13 +13,13 @@ SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
|||
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
|
||||
DGEMMKERNEL = gemm_kernel_4x4_core2.S
|
||||
DGEMMINCOPY =
|
||||
DGEMMITCOPY =
|
||||
DGEMMONCOPY = gemm_ncopy_4.S
|
||||
DGEMMOTCOPY = gemm_tcopy_4.S
|
||||
DGEMMINCOPYOBJ =
|
||||
DGEMMITCOPYOBJ =
|
||||
DGEMMKERNEL = gemm_kernel_2x8_nehalem.S
|
||||
DGEMMINCOPY = ../generic/gemm_ncopy_2.c
|
||||
DGEMMITCOPY = ../generic/gemm_tcopy_2.c
|
||||
DGEMMONCOPY = ../generic/gemm_ncopy_8.c
|
||||
DGEMMOTCOPY = ../generic/gemm_tcopy_8.c
|
||||
DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
|
@ -44,11 +48,10 @@ STRSMKERNEL_LT = trsm_kernel_LT_4x8_nehalem.S
|
|||
STRSMKERNEL_RN = trsm_kernel_LT_4x8_nehalem.S
|
||||
STRSMKERNEL_RT = trsm_kernel_RT_4x8_nehalem.S
|
||||
|
||||
DTRSMKERNEL_LN = trsm_kernel_LN_4x4_core2.S
|
||||
DTRSMKERNEL_LT = trsm_kernel_LT_4x4_core2.S
|
||||
DTRSMKERNEL_RN = trsm_kernel_LT_4x4_core2.S
|
||||
DTRSMKERNEL_RT = trsm_kernel_RT_4x4_core2.S
|
||||
|
||||
DTRSMKERNEL_LN = trsm_kernel_LN_2x8_nehalem.S
|
||||
DTRSMKERNEL_LT = trsm_kernel_LT_2x8_nehalem.S
|
||||
DTRSMKERNEL_RN = trsm_kernel_LT_2x8_nehalem.S
|
||||
DTRSMKERNEL_RT = trsm_kernel_RT_2x8_nehalem.S
|
||||
|
||||
CTRSMKERNEL_LN = ztrsm_kernel_LN_2x4_nehalem.S
|
||||
CTRSMKERNEL_LT = ztrsm_kernel_LT_2x4_nehalem.S
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
ZGEMVNKERNEL = zgemv_n_dup.S
|
||||
ZGEMVTKERNEL = zgemv_t_dup.S
|
||||
ZGEMVTKERNEL = zgemv_t.S
|
||||
|
||||
SGEMMKERNEL = gemm_kernel_8x4_sse.S
|
||||
SGEMMINCOPY = ../generic/gemm_ncopy_8.c
|
||||
|
|
|
@ -1,5 +1,8 @@
|
|||
SGEMVNKERNEL = sgemv_n.S
|
||||
SGEMVTKERNEL = sgemv_t.S
|
||||
|
||||
ZGEMVNKERNEL = zgemv_n_dup.S
|
||||
ZGEMVTKERNEL = zgemv_t_dup.S
|
||||
ZGEMVTKERNEL = zgemv_t.S
|
||||
|
||||
DGEMVNKERNEL = dgemv_n_bulldozer.S
|
||||
DGEMVTKERNEL = dgemv_t_bulldozer.S
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
ZGEMVNKERNEL = zgemv_n_dup.S
|
||||
ZGEMVTKERNEL = zgemv_t_dup.S
|
||||
ZGEMVTKERNEL = zgemv_t.S
|
||||
|
||||
SGEMMKERNEL = gemm_kernel_8x4_sse3.S
|
||||
SGEMMINCOPY = ../generic/gemm_ncopy_8.c
|
||||
|
|
|
@ -1,14 +1,16 @@
|
|||
SGEMMKERNEL = gemm_kernel_4x8_nehalem.S
|
||||
SGEMMINCOPY = gemm_ncopy_4.S
|
||||
SGEMMITCOPY = gemm_tcopy_4.S
|
||||
SGEMMONCOPY = ../generic/gemm_ncopy_8.c
|
||||
SGEMMOTCOPY = ../generic/gemm_tcopy_8.c
|
||||
SGEMVNKERNEL = sgemv_n.S
|
||||
SGEMVTKERNEL = sgemv_t.S
|
||||
|
||||
SGEMMKERNEL = sgemm_kernel_16x4_sandy.S
|
||||
SGEMMINCOPY = ../generic/gemm_ncopy_16.c
|
||||
SGEMMITCOPY = ../generic/gemm_tcopy_16.c
|
||||
SGEMMONCOPY = ../generic/gemm_ncopy_4.c
|
||||
SGEMMOTCOPY = ../generic/gemm_tcopy_4.c
|
||||
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
|
||||
DGEMMKERNEL = dgemm_kernel_4x8_sandy.S
|
||||
DGEMMINCOPY = ../generic/gemm_ncopy_8.c
|
||||
DGEMMITCOPY = ../generic/gemm_tcopy_8.c
|
||||
|
@ -19,11 +21,11 @@ DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
|||
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
CGEMMKERNEL = zgemm_kernel_2x4_nehalem.S
|
||||
CGEMMINCOPY = zgemm_ncopy_2.S
|
||||
CGEMMITCOPY = zgemm_tcopy_2.S
|
||||
CGEMMONCOPY = ../generic/zgemm_ncopy_4.c
|
||||
CGEMMOTCOPY = ../generic/zgemm_tcopy_4.c
|
||||
CGEMMKERNEL = cgemm_kernel_8x2_sandy.S
|
||||
CGEMMINCOPY = ../generic/zgemm_ncopy_8.c
|
||||
CGEMMITCOPY = ../generic/zgemm_tcopy_8.c
|
||||
CGEMMONCOPY = ../generic/zgemm_ncopy_2.c
|
||||
CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
|
||||
CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||
CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
|
|
|
@ -79,8 +79,7 @@
|
|||
|
||||
#endif
|
||||
|
||||
#define L_BUFFER_SIZE 512*8*4
|
||||
#define LB2_OFFSET 512*8*2
|
||||
#define L_BUFFER_SIZE 8192
|
||||
|
||||
#define Ndiv6 24(%rsp)
|
||||
#define Nmod6 32(%rsp)
|
||||
|
@ -523,16 +522,16 @@
|
|||
#ifdef WINDOWS_ABI
|
||||
movq %rdi, 48(%rsp)
|
||||
movq %rsi, 56(%rsp)
|
||||
movups %xmm6, 64(%rsp)
|
||||
movups %xmm7, 80(%rsp)
|
||||
movups %xmm8, 96(%rsp)
|
||||
movups %xmm9, 112(%rsp)
|
||||
movups %xmm10, 128(%rsp)
|
||||
movups %xmm11, 144(%rsp)
|
||||
movups %xmm12, 160(%rsp)
|
||||
movups %xmm13, 176(%rsp)
|
||||
movups %xmm14, 192(%rsp)
|
||||
movups %xmm15, 208(%rsp)
|
||||
vmovups %xmm6, 64(%rsp)
|
||||
vmovups %xmm7, 80(%rsp)
|
||||
vmovups %xmm8, 96(%rsp)
|
||||
vmovups %xmm9, 112(%rsp)
|
||||
vmovups %xmm10, 128(%rsp)
|
||||
vmovups %xmm11, 144(%rsp)
|
||||
vmovups %xmm12, 160(%rsp)
|
||||
vmovups %xmm13, 176(%rsp)
|
||||
vmovups %xmm14, 192(%rsp)
|
||||
vmovups %xmm15, 208(%rsp)
|
||||
|
||||
movq ARG1, OLD_M
|
||||
movq ARG2, OLD_N
|
||||
|
@ -542,14 +541,15 @@
|
|||
movq OLD_C, C
|
||||
movq OLD_LDC, LDC
|
||||
#ifdef TRMMKERNEL
|
||||
movsd OLD_OFFSET, %xmm12
|
||||
vmovsd OLD_OFFSET, %xmm12
|
||||
#endif
|
||||
vmovaps %xmm3, %xmm0
|
||||
vmovsd OLD_ALPHA_I, %xmm1
|
||||
|
||||
#else
|
||||
movq STACKSIZE + 8(%rsp), LDC
|
||||
#ifdef TRMMKERNEL
|
||||
movsd STACKSIZE + 16(%rsp), %xmm12
|
||||
vmovsd STACKSIZE + 16(%rsp), %xmm12
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
@ -1866,6 +1866,8 @@
|
|||
|
||||
|
||||
.L999:
|
||||
vzeroupper
|
||||
|
||||
movq SP, %rsp
|
||||
movq (%rsp), %rbx
|
||||
movq 8(%rsp), %rbp
|
||||
|
@ -1877,16 +1879,16 @@
|
|||
#ifdef WINDOWS_ABI
|
||||
movq 48(%rsp), %rdi
|
||||
movq 56(%rsp), %rsi
|
||||
movups 64(%rsp), %xmm6
|
||||
movups 80(%rsp), %xmm7
|
||||
movups 96(%rsp), %xmm8
|
||||
movups 112(%rsp), %xmm9
|
||||
movups 128(%rsp), %xmm10
|
||||
movups 144(%rsp), %xmm11
|
||||
movups 160(%rsp), %xmm12
|
||||
movups 176(%rsp), %xmm13
|
||||
movups 192(%rsp), %xmm14
|
||||
movups 208(%rsp), %xmm15
|
||||
vmovups 64(%rsp), %xmm6
|
||||
vmovups 80(%rsp), %xmm7
|
||||
vmovups 96(%rsp), %xmm8
|
||||
vmovups 112(%rsp), %xmm9
|
||||
vmovups 128(%rsp), %xmm10
|
||||
vmovups 144(%rsp), %xmm11
|
||||
vmovups 160(%rsp), %xmm12
|
||||
vmovups 176(%rsp), %xmm13
|
||||
vmovups 192(%rsp), %xmm14
|
||||
vmovups 208(%rsp), %xmm15
|
||||
#endif
|
||||
|
||||
addq $STACKSIZE, %rsp
|
||||
|
|
|
@ -26,7 +26,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
*****************************************************************************/
|
||||
/*********************************************************************
|
||||
*
|
||||
* 2013/10/31 Saar
|
||||
* 2014/06/28 Saar
|
||||
* BLASTEST : OK
|
||||
* CTEST : OK
|
||||
* TEST : OK
|
||||
|
@ -104,8 +104,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#endif
|
||||
|
||||
#define L_BUFFER_SIZE 512*8*4
|
||||
#define LB2_OFFSET 512*8*2
|
||||
#define L_BUFFER_SIZE 256*8*4
|
||||
|
||||
#define Ndiv6 24(%rsp)
|
||||
#define Nmod6 32(%rsp)
|
||||
|
@ -116,7 +115,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#define KK 72(%rsp)
|
||||
#define KKK 80(%rsp)
|
||||
#define BUFFER1 128(%rsp)
|
||||
#define BUFFER2 LB2_OFFSET+128(%rsp)
|
||||
|
||||
#if defined(OS_WINDOWS)
|
||||
#if L_BUFFER_SIZE > 16384
|
||||
|
@ -548,16 +546,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#ifdef WINDOWS_ABI
|
||||
movq %rdi, 48(%rsp)
|
||||
movq %rsi, 56(%rsp)
|
||||
movups %xmm6, 64(%rsp)
|
||||
movups %xmm7, 80(%rsp)
|
||||
movups %xmm8, 96(%rsp)
|
||||
movups %xmm9, 112(%rsp)
|
||||
movups %xmm10, 128(%rsp)
|
||||
movups %xmm11, 144(%rsp)
|
||||
movups %xmm12, 160(%rsp)
|
||||
movups %xmm13, 176(%rsp)
|
||||
movups %xmm14, 192(%rsp)
|
||||
movups %xmm15, 208(%rsp)
|
||||
vmovups %xmm6, 64(%rsp)
|
||||
vmovups %xmm7, 80(%rsp)
|
||||
vmovups %xmm8, 96(%rsp)
|
||||
vmovups %xmm9, 112(%rsp)
|
||||
vmovups %xmm10, 128(%rsp)
|
||||
vmovups %xmm11, 144(%rsp)
|
||||
vmovups %xmm12, 160(%rsp)
|
||||
vmovups %xmm13, 176(%rsp)
|
||||
vmovups %xmm14, 192(%rsp)
|
||||
vmovups %xmm15, 208(%rsp)
|
||||
|
||||
movq ARG1, OLD_M
|
||||
movq ARG2, OLD_N
|
||||
|
@ -570,6 +568,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
movsd OLD_OFFSET, %xmm12
|
||||
#endif
|
||||
vmovaps %xmm3, %xmm0
|
||||
vmovsd OLD_ALPHA_I, %xmm1
|
||||
|
||||
#else
|
||||
movq STACKSIZE + 8(%rsp), LDC
|
||||
|
@ -1891,6 +1890,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
|
||||
.L999:
|
||||
vzeroupper
|
||||
|
||||
movq SP, %rsp
|
||||
movq (%rsp), %rbx
|
||||
movq 8(%rsp), %rbp
|
||||
|
@ -1902,16 +1903,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#ifdef WINDOWS_ABI
|
||||
movq 48(%rsp), %rdi
|
||||
movq 56(%rsp), %rsi
|
||||
movups 64(%rsp), %xmm6
|
||||
movups 80(%rsp), %xmm7
|
||||
movups 96(%rsp), %xmm8
|
||||
movups 112(%rsp), %xmm9
|
||||
movups 128(%rsp), %xmm10
|
||||
movups 144(%rsp), %xmm11
|
||||
movups 160(%rsp), %xmm12
|
||||
movups 176(%rsp), %xmm13
|
||||
movups 192(%rsp), %xmm14
|
||||
movups 208(%rsp), %xmm15
|
||||
vmovups 64(%rsp), %xmm6
|
||||
vmovups 80(%rsp), %xmm7
|
||||
vmovups 96(%rsp), %xmm8
|
||||
vmovups 112(%rsp), %xmm9
|
||||
vmovups 128(%rsp), %xmm10
|
||||
vmovups 144(%rsp), %xmm11
|
||||
vmovups 160(%rsp), %xmm12
|
||||
vmovups 176(%rsp), %xmm13
|
||||
vmovups 192(%rsp), %xmm14
|
||||
vmovups 208(%rsp), %xmm15
|
||||
#endif
|
||||
|
||||
addq $STACKSIZE, %rsp
|
||||
|
|
|
@ -26,7 +26,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
**********************************************************************************/
|
||||
|
||||
/*********************************************************************
|
||||
* 2013/11/13 Saar
|
||||
* 2014/06/28 Saar
|
||||
* BLASTEST : OK
|
||||
* CTEST : OK
|
||||
* TEST : OK
|
||||
|
@ -93,8 +93,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#endif
|
||||
|
||||
#define L_BUFFER_SIZE 512*8*4
|
||||
#define LB2_OFFSET 512*8*2
|
||||
#define L_BUFFER_SIZE 8192
|
||||
|
||||
#define Ndiv6 24(%rsp)
|
||||
#define Nmod6 32(%rsp)
|
||||
|
@ -105,7 +104,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#define KK 72(%rsp)
|
||||
#define KKK 80(%rsp)
|
||||
#define BUFFER1 128(%rsp)
|
||||
#define BUFFER2 LB2_OFFSET+128(%rsp)
|
||||
|
||||
#if defined(OS_WINDOWS)
|
||||
#if L_BUFFER_SIZE > 16384
|
||||
|
@ -818,16 +816,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#ifdef WINDOWS_ABI
|
||||
movq %rdi, 48(%rsp)
|
||||
movq %rsi, 56(%rsp)
|
||||
movups %xmm6, 64(%rsp)
|
||||
movups %xmm7, 80(%rsp)
|
||||
movups %xmm8, 96(%rsp)
|
||||
movups %xmm9, 112(%rsp)
|
||||
movups %xmm10, 128(%rsp)
|
||||
movups %xmm11, 144(%rsp)
|
||||
movups %xmm12, 160(%rsp)
|
||||
movups %xmm13, 176(%rsp)
|
||||
movups %xmm14, 192(%rsp)
|
||||
movups %xmm15, 208(%rsp)
|
||||
vmovups %xmm6, 64(%rsp)
|
||||
vmovups %xmm7, 80(%rsp)
|
||||
vmovups %xmm8, 96(%rsp)
|
||||
vmovups %xmm9, 112(%rsp)
|
||||
vmovups %xmm10, 128(%rsp)
|
||||
vmovups %xmm11, 144(%rsp)
|
||||
vmovups %xmm12, 160(%rsp)
|
||||
vmovups %xmm13, 176(%rsp)
|
||||
vmovups %xmm14, 192(%rsp)
|
||||
vmovups %xmm15, 208(%rsp)
|
||||
|
||||
movq ARG1, OLD_M
|
||||
movq ARG2, OLD_N
|
||||
|
@ -840,6 +838,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
movsd OLD_OFFSET, %xmm12
|
||||
#endif
|
||||
vmovaps %xmm3, %xmm0
|
||||
vmovsd OLD_ALPHA_I, %xmm1
|
||||
|
||||
#else
|
||||
movq STACKSIZE + 8(%rsp), LDC
|
||||
|
@ -2255,6 +2254,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
|
||||
.L999:
|
||||
vzeroupper
|
||||
|
||||
movq SP, %rsp
|
||||
movq (%rsp), %rbx
|
||||
movq 8(%rsp), %rbp
|
||||
|
@ -2266,16 +2267,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#ifdef WINDOWS_ABI
|
||||
movq 48(%rsp), %rdi
|
||||
movq 56(%rsp), %rsi
|
||||
movups 64(%rsp), %xmm6
|
||||
movups 80(%rsp), %xmm7
|
||||
movups 96(%rsp), %xmm8
|
||||
movups 112(%rsp), %xmm9
|
||||
movups 128(%rsp), %xmm10
|
||||
movups 144(%rsp), %xmm11
|
||||
movups 160(%rsp), %xmm12
|
||||
movups 176(%rsp), %xmm13
|
||||
movups 192(%rsp), %xmm14
|
||||
movups 208(%rsp), %xmm15
|
||||
vmovups 64(%rsp), %xmm6
|
||||
vmovups 80(%rsp), %xmm7
|
||||
vmovups 96(%rsp), %xmm8
|
||||
vmovups 112(%rsp), %xmm9
|
||||
vmovups 128(%rsp), %xmm10
|
||||
vmovups 144(%rsp), %xmm11
|
||||
vmovups 160(%rsp), %xmm12
|
||||
vmovups 176(%rsp), %xmm13
|
||||
vmovups 192(%rsp), %xmm14
|
||||
vmovups 208(%rsp), %xmm15
|
||||
#endif
|
||||
|
||||
addq $ STACKSIZE, %rsp
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -85,7 +85,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#else
|
||||
|
||||
#define STACKSIZE 256
|
||||
#define L_BUFFER_SIZE 128*8*12+4096
|
||||
#define L_BUFFER_SIZE 128*8*12+512
|
||||
|
||||
#define OLD_A 40 + STACKSIZE(%rsp)
|
||||
#define OLD_B 48 + STACKSIZE(%rsp)
|
||||
|
|
|
@ -148,8 +148,8 @@
|
|||
|
||||
#endif
|
||||
|
||||
#define L_BUFFER_SIZE 512*8*4
|
||||
#define LB2_OFFSET 512*8*2
|
||||
#define L_BUFFER_SIZE 8192
|
||||
#define LB2_OFFSET 4096
|
||||
|
||||
#define Ndiv6 24(%rsp)
|
||||
#define Nmod6 32(%rsp)
|
||||
|
|
|
@ -105,8 +105,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#endif
|
||||
|
||||
#define L_BUFFER_SIZE 512*8*4
|
||||
#define LB2_OFFSET 512*8*2
|
||||
#define L_BUFFER_SIZE 8192
|
||||
#define LB2_OFFSET 4096
|
||||
|
||||
#define Ndiv6 24(%rsp)
|
||||
#define Nmod6 32(%rsp)
|
||||
|
|
|
@ -78,8 +78,8 @@
|
|||
|
||||
#endif
|
||||
|
||||
#define L_BUFFER_SIZE 512*8*4
|
||||
#define LB2_OFFSET 512*8*2
|
||||
#define L_BUFFER_SIZE 8192
|
||||
#define LB2_OFFSET 4096
|
||||
|
||||
#define Ndiv6 24(%rsp)
|
||||
#define Nmod6 32(%rsp)
|
||||
|
|
|
@ -105,8 +105,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#endif
|
||||
|
||||
#define L_BUFFER_SIZE 512*8*4
|
||||
#define LB2_OFFSET 512*8*2
|
||||
#define L_BUFFER_SIZE 8192
|
||||
#define LB2_OFFSET 4096
|
||||
|
||||
#define Ndiv6 24(%rsp)
|
||||
#define Nmod6 32(%rsp)
|
||||
|
|
|
@ -90,8 +90,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#endif
|
||||
|
||||
#define L_BUFFER_SIZE 512*8*4
|
||||
#define LB2_OFFSET 512*8*2
|
||||
#define L_BUFFER_SIZE 8192
|
||||
|
||||
#define Ndiv6 24(%rsp)
|
||||
#define Nmod6 32(%rsp)
|
||||
|
@ -101,7 +100,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#define KK 64(%rsp)
|
||||
#define KKK 72(%rsp)
|
||||
#define BUFFER1 128(%rsp)
|
||||
#define BUFFER2 LB2_OFFSET+128(%rsp)
|
||||
|
||||
#if defined(OS_WINDOWS)
|
||||
#if L_BUFFER_SIZE > 16384
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -79,8 +79,7 @@
|
|||
|
||||
#endif
|
||||
|
||||
#define L_BUFFER_SIZE 512*8*4
|
||||
#define LB2_OFFSET 512*8*2
|
||||
#define L_BUFFER_SIZE 8192
|
||||
|
||||
#define Ndiv6 24(%rsp)
|
||||
#define Nmod6 32(%rsp)
|
||||
|
@ -91,7 +90,6 @@
|
|||
#define KK 72(%rsp)
|
||||
#define KKK 80(%rsp)
|
||||
#define BUFFER1 128(%rsp)
|
||||
#define BUFFER2 LB2_OFFSET+128(%rsp)
|
||||
|
||||
#if defined(OS_WINDOWS)
|
||||
#if L_BUFFER_SIZE > 16384
|
||||
|
@ -414,16 +412,16 @@
|
|||
#ifdef WINDOWS_ABI
|
||||
movq %rdi, 48(%rsp)
|
||||
movq %rsi, 56(%rsp)
|
||||
movups %xmm6, 64(%rsp)
|
||||
movups %xmm7, 80(%rsp)
|
||||
movups %xmm8, 96(%rsp)
|
||||
movups %xmm9, 112(%rsp)
|
||||
movups %xmm10, 128(%rsp)
|
||||
movups %xmm11, 144(%rsp)
|
||||
movups %xmm12, 160(%rsp)
|
||||
movups %xmm13, 176(%rsp)
|
||||
movups %xmm14, 192(%rsp)
|
||||
movups %xmm15, 208(%rsp)
|
||||
vmovups %xmm6, 64(%rsp)
|
||||
vmovups %xmm7, 80(%rsp)
|
||||
vmovups %xmm8, 96(%rsp)
|
||||
vmovups %xmm9, 112(%rsp)
|
||||
vmovups %xmm10, 128(%rsp)
|
||||
vmovups %xmm11, 144(%rsp)
|
||||
vmovups %xmm12, 160(%rsp)
|
||||
vmovups %xmm13, 176(%rsp)
|
||||
vmovups %xmm14, 192(%rsp)
|
||||
vmovups %xmm15, 208(%rsp)
|
||||
|
||||
movq ARG1, OLD_M
|
||||
movq ARG2, OLD_N
|
||||
|
@ -433,14 +431,15 @@
|
|||
movq OLD_C, C
|
||||
movq OLD_LDC, LDC
|
||||
#ifdef TRMMKERNEL
|
||||
movsd OLD_OFFSET, %xmm12
|
||||
vmovsd OLD_OFFSET, %xmm12
|
||||
#endif
|
||||
vmovaps %xmm3, %xmm0
|
||||
vmovsd OLD_ALPHA_I, %xmm1
|
||||
|
||||
#else
|
||||
movq STACKSIZE + 8(%rsp), LDC
|
||||
#ifdef TRMMKERNEL
|
||||
movsd STACKSIZE + 16(%rsp), %xmm12
|
||||
vmovsd STACKSIZE + 16(%rsp), %xmm12
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
@ -1374,6 +1373,8 @@
|
|||
|
||||
|
||||
.L999:
|
||||
vzeroupper
|
||||
|
||||
movq SP, %rsp
|
||||
movq (%rsp), %rbx
|
||||
movq 8(%rsp), %rbp
|
||||
|
@ -1385,16 +1386,16 @@
|
|||
#ifdef WINDOWS_ABI
|
||||
movq 48(%rsp), %rdi
|
||||
movq 56(%rsp), %rsi
|
||||
movups 64(%rsp), %xmm6
|
||||
movups 80(%rsp), %xmm7
|
||||
movups 96(%rsp), %xmm8
|
||||
movups 112(%rsp), %xmm9
|
||||
movups 128(%rsp), %xmm10
|
||||
movups 144(%rsp), %xmm11
|
||||
movups 160(%rsp), %xmm12
|
||||
movups 176(%rsp), %xmm13
|
||||
movups 192(%rsp), %xmm14
|
||||
movups 208(%rsp), %xmm15
|
||||
vmovups 64(%rsp), %xmm6
|
||||
vmovups 80(%rsp), %xmm7
|
||||
vmovups 96(%rsp), %xmm8
|
||||
vmovups 112(%rsp), %xmm9
|
||||
vmovups 128(%rsp), %xmm10
|
||||
vmovups 144(%rsp), %xmm11
|
||||
vmovups 160(%rsp), %xmm12
|
||||
vmovups 176(%rsp), %xmm13
|
||||
vmovups 192(%rsp), %xmm14
|
||||
vmovups 208(%rsp), %xmm15
|
||||
#endif
|
||||
|
||||
addq $STACKSIZE, %rsp
|
||||
|
|
|
@ -27,7 +27,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
/*********************************************************************
|
||||
*
|
||||
* 2013/10/30 Saar
|
||||
* 2014/06/28 Saar
|
||||
* BLASTEST : OK
|
||||
* CTEST : OK
|
||||
* TEST : OK
|
||||
|
@ -104,8 +104,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#endif
|
||||
|
||||
#define L_BUFFER_SIZE 512*8*4
|
||||
#define LB2_OFFSET 512*8*2
|
||||
#define L_BUFFER_SIZE 256*8*4
|
||||
|
||||
#define Ndiv6 24(%rsp)
|
||||
#define Nmod6 32(%rsp)
|
||||
|
@ -116,7 +115,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#define KK 72(%rsp)
|
||||
#define KKK 80(%rsp)
|
||||
#define BUFFER1 128(%rsp)
|
||||
#define BUFFER2 LB2_OFFSET+128(%rsp)
|
||||
|
||||
#if defined(OS_WINDOWS)
|
||||
#if L_BUFFER_SIZE > 16384
|
||||
|
@ -439,16 +437,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#ifdef WINDOWS_ABI
|
||||
movq %rdi, 48(%rsp)
|
||||
movq %rsi, 56(%rsp)
|
||||
movups %xmm6, 64(%rsp)
|
||||
movups %xmm7, 80(%rsp)
|
||||
movups %xmm8, 96(%rsp)
|
||||
movups %xmm9, 112(%rsp)
|
||||
movups %xmm10, 128(%rsp)
|
||||
movups %xmm11, 144(%rsp)
|
||||
movups %xmm12, 160(%rsp)
|
||||
movups %xmm13, 176(%rsp)
|
||||
movups %xmm14, 192(%rsp)
|
||||
movups %xmm15, 208(%rsp)
|
||||
vmovups %xmm6, 64(%rsp)
|
||||
vmovups %xmm7, 80(%rsp)
|
||||
vmovups %xmm8, 96(%rsp)
|
||||
vmovups %xmm9, 112(%rsp)
|
||||
vmovups %xmm10, 128(%rsp)
|
||||
vmovups %xmm11, 144(%rsp)
|
||||
vmovups %xmm12, 160(%rsp)
|
||||
vmovups %xmm13, 176(%rsp)
|
||||
vmovups %xmm14, 192(%rsp)
|
||||
vmovups %xmm15, 208(%rsp)
|
||||
|
||||
movq ARG1, OLD_M
|
||||
movq ARG2, OLD_N
|
||||
|
@ -458,14 +456,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
movq OLD_C, C
|
||||
movq OLD_LDC, LDC
|
||||
#ifdef TRMMKERNEL
|
||||
movsd OLD_OFFSET, %xmm12
|
||||
vmovsd OLD_OFFSET, %xmm12
|
||||
#endif
|
||||
vmovaps %xmm3, %xmm0
|
||||
vmovsd OLD_ALPHA_I, %xmm1
|
||||
|
||||
#else
|
||||
movq STACKSIZE + 8(%rsp), LDC
|
||||
#ifdef TRMMKERNEL
|
||||
movsd STACKSIZE + 16(%rsp), %xmm12
|
||||
vmovsd STACKSIZE + 16(%rsp), %xmm12
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
@ -1399,6 +1398,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
|
||||
.L999:
|
||||
vzeroupper
|
||||
|
||||
movq SP, %rsp
|
||||
movq (%rsp), %rbx
|
||||
movq 8(%rsp), %rbp
|
||||
|
@ -1410,16 +1411,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#ifdef WINDOWS_ABI
|
||||
movq 48(%rsp), %rdi
|
||||
movq 56(%rsp), %rsi
|
||||
movups 64(%rsp), %xmm6
|
||||
movups 80(%rsp), %xmm7
|
||||
movups 96(%rsp), %xmm8
|
||||
movups 112(%rsp), %xmm9
|
||||
movups 128(%rsp), %xmm10
|
||||
movups 144(%rsp), %xmm11
|
||||
movups 160(%rsp), %xmm12
|
||||
movups 176(%rsp), %xmm13
|
||||
movups 192(%rsp), %xmm14
|
||||
movups 208(%rsp), %xmm15
|
||||
vmovups 64(%rsp), %xmm6
|
||||
vmovups 80(%rsp), %xmm7
|
||||
vmovups 96(%rsp), %xmm8
|
||||
vmovups 112(%rsp), %xmm9
|
||||
vmovups 128(%rsp), %xmm10
|
||||
vmovups 144(%rsp), %xmm11
|
||||
vmovups 160(%rsp), %xmm12
|
||||
vmovups 176(%rsp), %xmm13
|
||||
vmovups 192(%rsp), %xmm14
|
||||
vmovups 208(%rsp), %xmm15
|
||||
#endif
|
||||
|
||||
addq $STACKSIZE, %rsp
|
||||
|
|
|
@ -26,7 +26,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
**********************************************************************************/
|
||||
|
||||
/********************************************************************************
|
||||
* 2013/11/13 Saar
|
||||
* 2014/06/28 Saar
|
||||
* BLASTEST : OK
|
||||
* CTEST : OK
|
||||
* TEST : OK
|
||||
|
@ -92,8 +92,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#endif
|
||||
|
||||
#define L_BUFFER_SIZE 512*8*4
|
||||
#define LB2_OFFSET 512*8*2
|
||||
#define L_BUFFER_SIZE 8192
|
||||
|
||||
#define Ndiv6 24(%rsp)
|
||||
#define Nmod6 32(%rsp)
|
||||
|
@ -104,7 +103,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#define KK 72(%rsp)
|
||||
#define KKK 80(%rsp)
|
||||
#define BUFFER1 128(%rsp)
|
||||
#define BUFFER2 LB2_OFFSET+128(%rsp)
|
||||
|
||||
#if defined(OS_WINDOWS)
|
||||
#if L_BUFFER_SIZE > 16384
|
||||
|
@ -695,16 +693,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#ifdef WINDOWS_ABI
|
||||
movq %rdi, 48(%rsp)
|
||||
movq %rsi, 56(%rsp)
|
||||
movups %xmm6, 64(%rsp)
|
||||
movups %xmm7, 80(%rsp)
|
||||
movups %xmm8, 96(%rsp)
|
||||
movups %xmm9, 112(%rsp)
|
||||
movups %xmm10, 128(%rsp)
|
||||
movups %xmm11, 144(%rsp)
|
||||
movups %xmm12, 160(%rsp)
|
||||
movups %xmm13, 176(%rsp)
|
||||
movups %xmm14, 192(%rsp)
|
||||
movups %xmm15, 208(%rsp)
|
||||
vmovups %xmm6, 64(%rsp)
|
||||
vmovups %xmm7, 80(%rsp)
|
||||
vmovups %xmm8, 96(%rsp)
|
||||
vmovups %xmm9, 112(%rsp)
|
||||
vmovups %xmm10, 128(%rsp)
|
||||
vmovups %xmm11, 144(%rsp)
|
||||
vmovups %xmm12, 160(%rsp)
|
||||
vmovups %xmm13, 176(%rsp)
|
||||
vmovups %xmm14, 192(%rsp)
|
||||
vmovups %xmm15, 208(%rsp)
|
||||
|
||||
movq ARG1, OLD_M
|
||||
movq ARG2, OLD_N
|
||||
|
@ -717,6 +715,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
movsd OLD_OFFSET, %xmm12
|
||||
#endif
|
||||
vmovaps %xmm3, %xmm0
|
||||
vmovsd OLD_ALPHA_I, %xmm1
|
||||
|
||||
#else
|
||||
movq STACKSIZE + 8(%rsp), LDC
|
||||
|
@ -1783,6 +1782,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
|
||||
.L999:
|
||||
vzeroupper
|
||||
|
||||
movq SP, %rsp
|
||||
movq (%rsp), %rbx
|
||||
movq 8(%rsp), %rbp
|
||||
|
@ -1794,16 +1795,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#ifdef WINDOWS_ABI
|
||||
movq 48(%rsp), %rdi
|
||||
movq 56(%rsp), %rsi
|
||||
movups 64(%rsp), %xmm6
|
||||
movups 80(%rsp), %xmm7
|
||||
movups 96(%rsp), %xmm8
|
||||
movups 112(%rsp), %xmm9
|
||||
movups 128(%rsp), %xmm10
|
||||
movups 144(%rsp), %xmm11
|
||||
movups 160(%rsp), %xmm12
|
||||
movups 176(%rsp), %xmm13
|
||||
movups 192(%rsp), %xmm14
|
||||
movups 208(%rsp), %xmm15
|
||||
vmovups 64(%rsp), %xmm6
|
||||
vmovups 80(%rsp), %xmm7
|
||||
vmovups 96(%rsp), %xmm8
|
||||
vmovups 112(%rsp), %xmm9
|
||||
vmovups 128(%rsp), %xmm10
|
||||
vmovups 144(%rsp), %xmm11
|
||||
vmovups 160(%rsp), %xmm12
|
||||
vmovups 176(%rsp), %xmm13
|
||||
vmovups 192(%rsp), %xmm14
|
||||
vmovups 208(%rsp), %xmm15
|
||||
#endif
|
||||
|
||||
addq $ STACKSIZE, %rsp
|
||||
|
|
|
@ -10,7 +10,7 @@ NEP: Data file for testing Nonsymmetric Eigenvalue Problem routines
|
|||
0 5 7 3 200 Values of INIBL (nibble crossover point)
|
||||
1 2 4 2 1 Values of ISHFTS (number of simultaneous shifts)
|
||||
0 1 2 0 1 Values of IACC22 (select structured matrix multiply: 0, 1 or 2)
|
||||
20.0 Threshold value
|
||||
70.0 Threshold value
|
||||
T Put T to test the error exits
|
||||
1 Code to interpret the seed
|
||||
NEP 21
|
||||
|
|
2
make.inc
2
make.inc
|
@ -1,7 +1,7 @@
|
|||
SHELL = /bin/sh
|
||||
PLAT = _LINUX
|
||||
DRVOPTS = $(OPTS)
|
||||
LOADER = $(FORTRAN) -pthread
|
||||
LOADER = $(FORTRAN)
|
||||
ARCHFLAGS= -ru
|
||||
#RANLIB = ranlib
|
||||
|
||||
|
|
29
param.h
29
param.h
|
@ -1032,14 +1032,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#define XGEMM_DEFAULT_UNROLL_N 1
|
||||
#else
|
||||
#define SGEMM_DEFAULT_UNROLL_M 4
|
||||
#define DGEMM_DEFAULT_UNROLL_M 4
|
||||
#define DGEMM_DEFAULT_UNROLL_M 2
|
||||
#define QGEMM_DEFAULT_UNROLL_M 2
|
||||
#define CGEMM_DEFAULT_UNROLL_M 2
|
||||
#define ZGEMM_DEFAULT_UNROLL_M 1
|
||||
#define XGEMM_DEFAULT_UNROLL_M 1
|
||||
|
||||
#define SGEMM_DEFAULT_UNROLL_N 8
|
||||
#define DGEMM_DEFAULT_UNROLL_N 4
|
||||
#define DGEMM_DEFAULT_UNROLL_N 8
|
||||
#define QGEMM_DEFAULT_UNROLL_N 2
|
||||
#define CGEMM_DEFAULT_UNROLL_N 4
|
||||
#define ZGEMM_DEFAULT_UNROLL_N 4
|
||||
|
@ -1073,6 +1073,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#define GETRF_FACTOR 0.72
|
||||
|
||||
#define CGEMM3M_DEFAULT_UNROLL_N 4
|
||||
#define CGEMM3M_DEFAULT_UNROLL_M 8
|
||||
#define ZGEMM3M_DEFAULT_UNROLL_N 2
|
||||
#define ZGEMM3M_DEFAULT_UNROLL_M 8
|
||||
#endif
|
||||
|
||||
|
||||
|
@ -1104,22 +1108,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#define ZGEMM_DEFAULT_UNROLL_N 2
|
||||
#define XGEMM_DEFAULT_UNROLL_N 1
|
||||
#else
|
||||
#define SGEMM_DEFAULT_UNROLL_M 4
|
||||
#define SGEMM_DEFAULT_UNROLL_M 16
|
||||
#define DGEMM_DEFAULT_UNROLL_M 8
|
||||
#define QGEMM_DEFAULT_UNROLL_M 2
|
||||
#define CGEMM_DEFAULT_UNROLL_M 2
|
||||
#define CGEMM_DEFAULT_UNROLL_M 8
|
||||
#define ZGEMM_DEFAULT_UNROLL_M 4
|
||||
#define XGEMM_DEFAULT_UNROLL_M 1
|
||||
|
||||
#define SGEMM_DEFAULT_UNROLL_N 8
|
||||
#define SGEMM_DEFAULT_UNROLL_N 4
|
||||
#define DGEMM_DEFAULT_UNROLL_N 4
|
||||
#define QGEMM_DEFAULT_UNROLL_N 2
|
||||
#define CGEMM_DEFAULT_UNROLL_N 4
|
||||
#define CGEMM_DEFAULT_UNROLL_N 2
|
||||
#define ZGEMM_DEFAULT_UNROLL_N 4
|
||||
#define XGEMM_DEFAULT_UNROLL_N 1
|
||||
#endif
|
||||
|
||||
#define SGEMM_DEFAULT_P 512
|
||||
#define SGEMM_DEFAULT_P 768
|
||||
#define SGEMM_DEFAULT_R sgemm_r
|
||||
//#define SGEMM_DEFAULT_R 1024
|
||||
|
||||
|
@ -1130,7 +1134,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#define QGEMM_DEFAULT_P 504
|
||||
#define QGEMM_DEFAULT_R qgemm_r
|
||||
|
||||
#define CGEMM_DEFAULT_P 128
|
||||
#define CGEMM_DEFAULT_P 384
|
||||
//#define CGEMM_DEFAULT_R cgemm_r
|
||||
#define CGEMM_DEFAULT_R 1024
|
||||
|
||||
|
@ -1141,13 +1145,18 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#define XGEMM_DEFAULT_P 252
|
||||
#define XGEMM_DEFAULT_R xgemm_r
|
||||
|
||||
#define SGEMM_DEFAULT_Q 256
|
||||
#define SGEMM_DEFAULT_Q 384
|
||||
#define DGEMM_DEFAULT_Q 256
|
||||
#define QGEMM_DEFAULT_Q 128
|
||||
#define CGEMM_DEFAULT_Q 256
|
||||
#define CGEMM_DEFAULT_Q 192
|
||||
#define ZGEMM_DEFAULT_Q 192
|
||||
#define XGEMM_DEFAULT_Q 128
|
||||
|
||||
#define CGEMM3M_DEFAULT_UNROLL_N 4
|
||||
#define CGEMM3M_DEFAULT_UNROLL_M 8
|
||||
#define ZGEMM3M_DEFAULT_UNROLL_N 2
|
||||
#define ZGEMM3M_DEFAULT_UNROLL_M 8
|
||||
|
||||
#define GETRF_FACTOR 0.72
|
||||
|
||||
#endif
|
||||
|
|
Loading…
Reference in New Issue