Merge branch 'develop' of github.com:xianyi/OpenBLAS into bulldozer
This commit is contained in:
commit
299b5a44dc
2
Makefile
2
Makefile
|
@ -314,7 +314,7 @@ clean ::
|
||||||
#endif
|
#endif
|
||||||
@$(MAKE) -C reference clean
|
@$(MAKE) -C reference clean
|
||||||
@rm -f *.$(LIBSUFFIX) *.so *~ *.exe getarch getarch_2nd *.dll *.lib *.$(SUFFIX) *.dwf $(LIBPREFIX).$(LIBSUFFIX) $(LIBPREFIX)_p.$(LIBSUFFIX) $(LIBPREFIX).so.$(MAJOR_VERSION) *.lnk myconfig.h
|
@rm -f *.$(LIBSUFFIX) *.so *~ *.exe getarch getarch_2nd *.dll *.lib *.$(SUFFIX) *.dwf $(LIBPREFIX).$(LIBSUFFIX) $(LIBPREFIX)_p.$(LIBSUFFIX) $(LIBPREFIX).so.$(MAJOR_VERSION) *.lnk myconfig.h
|
||||||
@rm -f Makefile.conf config.h Makefile_kernel.conf config_kernel.h st* *.dylib
|
@rm -f Makefile.conf config.h cblas_noconst.h Makefile_kernel.conf config_kernel.h st* *.dylib
|
||||||
@if test -d $(NETLIB_LAPACK_DIR); then \
|
@if test -d $(NETLIB_LAPACK_DIR); then \
|
||||||
echo deleting $(NETLIB_LAPACK_DIR); \
|
echo deleting $(NETLIB_LAPACK_DIR); \
|
||||||
rm -rf $(NETLIB_LAPACK_DIR) ;\
|
rm -rf $(NETLIB_LAPACK_DIR) ;\
|
||||||
|
|
|
@ -1,3 +1,5 @@
|
||||||
|
# This is triggered by Makefile.system and runs before any of the code is built.
|
||||||
|
|
||||||
export BINARY
|
export BINARY
|
||||||
export USE_OPENMP
|
export USE_OPENMP
|
||||||
|
|
||||||
|
@ -15,7 +17,7 @@ ifdef CPUIDEMU
|
||||||
EXFLAGS = -DCPUIDEMU -DVENDOR=99
|
EXFLAGS = -DCPUIDEMU -DVENDOR=99
|
||||||
endif
|
endif
|
||||||
|
|
||||||
all: getarch_2nd
|
all: getarch_2nd cblas_noconst.h
|
||||||
./getarch_2nd 0 >> $(TARGET_MAKE)
|
./getarch_2nd 0 >> $(TARGET_MAKE)
|
||||||
./getarch_2nd 1 >> $(TARGET_CONF)
|
./getarch_2nd 1 >> $(TARGET_CONF)
|
||||||
|
|
||||||
|
@ -36,4 +38,7 @@ else
|
||||||
$(HOSTCC) -I. $(CFLAGS) -DBUILD_KERNEL -o $(@F) getarch_2nd.c
|
$(HOSTCC) -I. $(CFLAGS) -DBUILD_KERNEL -o $(@F) getarch_2nd.c
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
cblas_noconst.h : cblas.h
|
||||||
|
perl -ane ' s/\bconst\b\s*//g; print; ' < cblas.h > cblas_noconst.h
|
||||||
|
|
||||||
dummy:
|
dummy:
|
|
@ -70,7 +70,7 @@ ifndef GOTOBLAS_MAKEFILE
|
||||||
export GOTOBLAS_MAKEFILE = 1
|
export GOTOBLAS_MAKEFILE = 1
|
||||||
|
|
||||||
# Generating Makefile.conf and config.h
|
# Generating Makefile.conf and config.h
|
||||||
DUMMY := $(shell $(MAKE) -C $(TOPDIR) -f Makefile.getarch CC="$(CC)" FC="$(FC)" HOSTCC="$(HOSTCC)" CFLAGS="$(GETARCH_FLAGS)" BINARY=$(BINARY) USE_OPENMP=$(USE_OPENMP) TARGET_CORE=$(TARGET_CORE) all)
|
DUMMY := $(shell $(MAKE) -C $(TOPDIR) -f Makefile.prebuild CC="$(CC)" FC="$(FC)" HOSTCC="$(HOSTCC)" CFLAGS="$(GETARCH_FLAGS)" BINARY=$(BINARY) USE_OPENMP=$(USE_OPENMP) TARGET_CORE=$(TARGET_CORE) all)
|
||||||
|
|
||||||
ifndef TARGET_CORE
|
ifndef TARGET_CORE
|
||||||
include $(TOPDIR)/Makefile.conf
|
include $(TOPDIR)/Makefile.conf
|
||||||
|
|
448
cblas.h
448
cblas.h
|
@ -1,291 +1,293 @@
|
||||||
#ifndef CBLAS_H
|
#ifndef CBLAS_H
|
||||||
#define CBLAS_H
|
#define CBLAS_H
|
||||||
|
|
||||||
|
#include <stddef.h>
|
||||||
|
#include "common.h"
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
extern "C" {
|
extern "C" {
|
||||||
/* Assume C declarations for C++ */
|
/* Assume C declarations for C++ */
|
||||||
#endif /* __cplusplus */
|
#endif /* __cplusplus */
|
||||||
|
|
||||||
#include <stddef.h>
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
/*Set the number of threads on runtime.*/
|
/*Set the number of threads on runtime.*/
|
||||||
void openblas_set_num_threads(int num_threads);
|
void openblas_set_num_threads(int num_threads);
|
||||||
void goto_set_num_threads(int num_threads);
|
void goto_set_num_threads(int num_threads);
|
||||||
|
|
||||||
|
/*Get the build configure on runtime.*/
|
||||||
|
char* openblas_get_config(void);
|
||||||
|
|
||||||
#define CBLAS_INDEX size_t
|
#define CBLAS_INDEX size_t
|
||||||
|
|
||||||
enum CBLAS_ORDER {CblasRowMajor=101, CblasColMajor=102};
|
typedef enum CBLAS_ORDER {CblasRowMajor=101, CblasColMajor=102} CBLAS_ORDER;
|
||||||
enum CBLAS_TRANSPOSE {CblasNoTrans=111, CblasTrans=112, CblasConjTrans=113, CblasConjNoTrans=114};
|
typedef enum CBLAS_TRANSPOSE {CblasNoTrans=111, CblasTrans=112, CblasConjTrans=113, CblasConjNoTrans=114} CBLAS_TRANSPOSE;
|
||||||
enum CBLAS_UPLO {CblasUpper=121, CblasLower=122};
|
typedef enum CBLAS_UPLO {CblasUpper=121, CblasLower=122} CBLAS_UPLO;
|
||||||
enum CBLAS_DIAG {CblasNonUnit=131, CblasUnit=132};
|
typedef enum CBLAS_DIAG {CblasNonUnit=131, CblasUnit=132} CBLAS_DIAG;
|
||||||
enum CBLAS_SIDE {CblasLeft=141, CblasRight=142};
|
typedef enum CBLAS_SIDE {CblasLeft=141, CblasRight=142} CBLAS_SIDE;
|
||||||
|
|
||||||
float cblas_sdsdot(blasint n, float, float *x, blasint incx, float *y, blasint incy);
|
float cblas_sdsdot(const blasint n, const float alpha, const float *x, const blasint incx, const float *y, const blasint incy);
|
||||||
double cblas_dsdot (blasint n, float *x, blasint incx, float *y, blasint incy);
|
double cblas_dsdot (const blasint n, const float *x, const blasint incx, const float *y, const blasint incy);
|
||||||
float cblas_sdot(blasint n, float *x, blasint incx, float *y, blasint incy);
|
float cblas_sdot(const blasint n, const float *x, const blasint incx, const float *y, const blasint incy);
|
||||||
double cblas_ddot(blasint n, double *x, blasint incx, double *y, blasint incy);
|
double cblas_ddot(const blasint n, const double *x, const blasint incx, const double *y, const blasint incy);
|
||||||
|
|
||||||
openblas_complex_float cblas_cdotu(blasint n, float *x, blasint incx, float *y, blasint incy);
|
openblas_complex_float cblas_cdotu(const blasint n, const float *x, const blasint incx, const float *y, const blasint incy);
|
||||||
openblas_complex_float cblas_cdotc(blasint n, float *x, blasint incx, float *y, blasint incy);
|
openblas_complex_float cblas_cdotc(const blasint n, const float *x, const blasint incx, const float *y, const blasint incy);
|
||||||
openblas_complex_double cblas_zdotu(blasint n, double *x, blasint incx, double *y, blasint incy);
|
openblas_complex_double cblas_zdotu(const blasint n, const double *x, const blasint incx, const double *y, const blasint incy);
|
||||||
openblas_complex_double cblas_zdotc(blasint n, double *x, blasint incx, double *y, blasint incy);
|
openblas_complex_double cblas_zdotc(const blasint n, const double *x, const blasint incx, const double *y, const blasint incy);
|
||||||
|
|
||||||
void cblas_cdotu_sub(blasint n, float *x, blasint incx, float *y, blasint incy, openblas_complex_float *ret);
|
void cblas_cdotu_sub(const blasint n, const float *x, const blasint incx, const float *y, const blasint incy, openblas_complex_float *ret);
|
||||||
void cblas_cdotc_sub(blasint n, float *x, blasint incx, float *y, blasint incy, openblas_complex_float *ret);
|
void cblas_cdotc_sub(const blasint n, const float *x, const blasint incx, const float *y, const blasint incy, openblas_complex_float *ret);
|
||||||
void cblas_zdotu_sub(blasint n, double *x, blasint incx, double *y, blasint incy, openblas_complex_double *ret);
|
void cblas_zdotu_sub(const blasint n, const double *x, const blasint incx, const double *y, const blasint incy, openblas_complex_double *ret);
|
||||||
void cblas_zdotc_sub(blasint n, double *x, blasint incx, double *y, blasint incy, openblas_complex_double *ret);
|
void cblas_zdotc_sub(const blasint n, const double *x, const blasint incx, const double *y, const blasint incy, openblas_complex_double *ret);
|
||||||
|
|
||||||
float cblas_sasum (blasint n, float *x, blasint incx);
|
float cblas_sasum (const blasint n, const float *x, const blasint incx);
|
||||||
double cblas_dasum (blasint n, double *x, blasint incx);
|
double cblas_dasum (const blasint n, const double *x, const blasint incx);
|
||||||
float cblas_scasum(blasint n, float *x, blasint incx);
|
float cblas_scasum(const blasint n, const float *x, const blasint incx);
|
||||||
double cblas_dzasum(blasint n, double *x, blasint incx);
|
double cblas_dzasum(const blasint n, const double *x, const blasint incx);
|
||||||
|
|
||||||
float cblas_snrm2 (blasint N, float *X, blasint incX);
|
float cblas_snrm2 (const blasint N, const float *X, const blasint incX);
|
||||||
double cblas_dnrm2 (blasint N, double *X, blasint incX);
|
double cblas_dnrm2 (const blasint N, const double *X, const blasint incX);
|
||||||
float cblas_scnrm2(blasint N, float *X, blasint incX);
|
float cblas_scnrm2(const blasint N, const float *X, const blasint incX);
|
||||||
double cblas_dznrm2(blasint N, double *X, blasint incX);
|
double cblas_dznrm2(const blasint N, const double *X, const blasint incX);
|
||||||
|
|
||||||
CBLAS_INDEX cblas_isamax(blasint n, float *x, blasint incx);
|
CBLAS_INDEX cblas_isamax(const blasint n, const float *x, const blasint incx);
|
||||||
CBLAS_INDEX cblas_idamax(blasint n, double *x, blasint incx);
|
CBLAS_INDEX cblas_idamax(const blasint n, const double *x, const blasint incx);
|
||||||
CBLAS_INDEX cblas_icamax(blasint n, float *x, blasint incx);
|
CBLAS_INDEX cblas_icamax(const blasint n, const float *x, const blasint incx);
|
||||||
CBLAS_INDEX cblas_izamax(blasint n, double *x, blasint incx);
|
CBLAS_INDEX cblas_izamax(const blasint n, const double *x, const blasint incx);
|
||||||
|
|
||||||
void cblas_saxpy(blasint n, float, float *x, blasint incx, float *y, blasint incy);
|
void cblas_saxpy(const blasint n, const float alpha, const float *x, const blasint incx, float *y, const blasint incy);
|
||||||
void cblas_daxpy(blasint n, double, double *x, blasint incx, double *y, blasint incy);
|
void cblas_daxpy(const blasint n, const double alpha, const double *x, const blasint incx, double *y, const blasint incy);
|
||||||
void cblas_caxpy(blasint n, float *, float *x, blasint incx, float *y, blasint incy);
|
void cblas_caxpy(const blasint n, const float *alpha, const float *x, const blasint incx, float *y, const blasint incy);
|
||||||
void cblas_zaxpy(blasint n, double *, double *x, blasint incx, double *y, blasint incy);
|
void cblas_zaxpy(const blasint n, const double *alpha, const double *x, const blasint incx, double *y, const blasint incy);
|
||||||
|
|
||||||
void cblas_scopy(blasint n, float *x, blasint incx, float *y, blasint incy);
|
void cblas_scopy(const blasint n, const float *x, const blasint incx, float *y, const blasint incy);
|
||||||
void cblas_dcopy(blasint n, double *x, blasint incx, double *y, blasint incy);
|
void cblas_dcopy(const blasint n, const double *x, const blasint incx, double *y, const blasint incy);
|
||||||
void cblas_ccopy(blasint n, float *x, blasint incx, float *y, blasint incy);
|
void cblas_ccopy(const blasint n, const float *x, const blasint incx, float *y, const blasint incy);
|
||||||
void cblas_zcopy(blasint n, double *x, blasint incx, double *y, blasint incy);
|
void cblas_zcopy(const blasint n, const double *x, const blasint incx, double *y, const blasint incy);
|
||||||
|
|
||||||
void cblas_sswap(blasint n, float *x, blasint incx, float *y, blasint incy);
|
void cblas_sswap(const blasint n, float *x, const blasint incx, float *y, const blasint incy);
|
||||||
void cblas_dswap(blasint n, double *x, blasint incx, double *y, blasint incy);
|
void cblas_dswap(const blasint n, double *x, const blasint incx, double *y, const blasint incy);
|
||||||
void cblas_cswap(blasint n, float *x, blasint incx, float *y, blasint incy);
|
void cblas_cswap(const blasint n, float *x, const blasint incx, float *y, const blasint incy);
|
||||||
void cblas_zswap(blasint n, double *x, blasint incx, double *y, blasint incy);
|
void cblas_zswap(const blasint n, double *x, const blasint incx, double *y, const blasint incy);
|
||||||
|
|
||||||
void cblas_srot(blasint N, float *X, blasint incX, float *Y, blasint incY, float c, float s);
|
void cblas_srot(const blasint N, float *X, const blasint incX, float *Y, const blasint incY, const float c, const float s);
|
||||||
void cblas_drot(blasint N, double *X, blasint incX, double *Y, blasint incY, double c, double s);
|
void cblas_drot(const blasint N, double *X, const blasint incX, double *Y, const blasint incY, const double c, const double s);
|
||||||
|
|
||||||
void cblas_srotg(float *a, float *b, float *c, float *s);
|
void cblas_srotg(float *a, float *b, float *c, float *s);
|
||||||
void cblas_drotg(double *a, double *b, double *c, double *s);
|
void cblas_drotg(double *a, double *b, double *c, double *s);
|
||||||
|
|
||||||
void cblas_srotm(blasint N, float *X, blasint incX, float *Y, blasint incY, float *P);
|
void cblas_srotm(const blasint N, float *X, const blasint incX, float *Y, const blasint incY, const float *P);
|
||||||
void cblas_drotm(blasint N, double *X, blasint incX, double *Y, blasint incY, double *P);
|
void cblas_drotm(const blasint N, double *X, const blasint incX, double *Y, const blasint incY, const double *P);
|
||||||
|
|
||||||
void cblas_srotmg(float *d1, float *d2, float *b1, float b2, float *P);
|
void cblas_srotmg(float *d1, float *d2, float *b1, const float b2, float *P);
|
||||||
void cblas_drotmg(double *d1, double *d2, double *b1, double b2, double *P);
|
void cblas_drotmg(double *d1, double *d2, double *b1, const double b2, double *P);
|
||||||
|
|
||||||
void cblas_sscal(blasint N, float alpha, float *X, blasint incX);
|
void cblas_sscal(const blasint N, const float alpha, float *X, const blasint incX);
|
||||||
void cblas_dscal(blasint N, double alpha, double *X, blasint incX);
|
void cblas_dscal(const blasint N, const double alpha, double *X, const blasint incX);
|
||||||
void cblas_cscal(blasint N, float *alpha, float *X, blasint incX);
|
void cblas_cscal(const blasint N, const float *alpha, float *X, const blasint incX);
|
||||||
void cblas_zscal(blasint N, double *alpha, double *X, blasint incX);
|
void cblas_zscal(const blasint N, const double *alpha, double *X, const blasint incX);
|
||||||
void cblas_csscal(blasint N, float alpha, float *X, blasint incX);
|
void cblas_csscal(const blasint N, const float alpha, float *X, const blasint incX);
|
||||||
void cblas_zdscal(blasint N, double alpha, double *X, blasint incX);
|
void cblas_zdscal(const blasint N, const double alpha, double *X, const blasint incX);
|
||||||
|
|
||||||
void cblas_sgemv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE trans, blasint m, blasint n,
|
void cblas_sgemv(const enum CBLAS_ORDER order, const enum CBLAS_TRANSPOSE trans, const blasint m, const blasint n,
|
||||||
float alpha, float *a, blasint lda, float *x, blasint incx, float beta, float *y, blasint incy);
|
const float alpha, const float *a, const blasint lda, const float *x, const blasint incx, const float beta, float *y, const blasint incy);
|
||||||
void cblas_dgemv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE trans, blasint m, blasint n,
|
void cblas_dgemv(const enum CBLAS_ORDER order, const enum CBLAS_TRANSPOSE trans, const blasint m, const blasint n,
|
||||||
double alpha, double *a, blasint lda, double *x, blasint incx, double beta, double *y, blasint incy);
|
const double alpha, const double *a, const blasint lda, const double *x, const blasint incx, const double beta, double *y, const blasint incy);
|
||||||
void cblas_cgemv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE trans, blasint m, blasint n,
|
void cblas_cgemv(const enum CBLAS_ORDER order, const enum CBLAS_TRANSPOSE trans, const blasint m, const blasint n,
|
||||||
float *alpha, float *a, blasint lda, float *x, blasint incx, float *beta, float *y, blasint incy);
|
const float *alpha, const float *a, const blasint lda, const float *x, const blasint incx, const float *beta, float *y, const blasint incy);
|
||||||
void cblas_zgemv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE trans, blasint m, blasint n,
|
void cblas_zgemv(const enum CBLAS_ORDER order, const enum CBLAS_TRANSPOSE trans, const blasint m, const blasint n,
|
||||||
double *alpha, double *a, blasint lda, double *x, blasint incx, double *beta, double *y, blasint incy);
|
const double *alpha, const double *a, const blasint lda, const double *x, const blasint incx, const double *beta, double *y, const blasint incy);
|
||||||
|
|
||||||
void cblas_sger (enum CBLAS_ORDER order, blasint M, blasint N, float alpha, float *X, blasint incX, float *Y, blasint incY, float *A, blasint lda);
|
void cblas_sger (const enum CBLAS_ORDER order, const blasint M, const blasint N, const float alpha, const float *X, const blasint incX, const float *Y, const blasint incY, float *A, const blasint lda);
|
||||||
void cblas_dger (enum CBLAS_ORDER order, blasint M, blasint N, double alpha, double *X, blasint incX, double *Y, blasint incY, double *A, blasint lda);
|
void cblas_dger (const enum CBLAS_ORDER order, const blasint M, const blasint N, const double alpha, const double *X, const blasint incX, const double *Y, const blasint incY, double *A, const blasint lda);
|
||||||
void cblas_cgeru(enum CBLAS_ORDER order, blasint M, blasint N, float *alpha, float *X, blasint incX, float *Y, blasint incY, float *A, blasint lda);
|
void cblas_cgeru(const enum CBLAS_ORDER order, const blasint M, const blasint N, const float *alpha, const float *X, const blasint incX, const float *Y, const blasint incY, float *A, const blasint lda);
|
||||||
void cblas_cgerc(enum CBLAS_ORDER order, blasint M, blasint N, float *alpha, float *X, blasint incX, float *Y, blasint incY, float *A, blasint lda);
|
void cblas_cgerc(const enum CBLAS_ORDER order, const blasint M, const blasint N, const float *alpha, const float *X, const blasint incX, const float *Y, const blasint incY, float *A, const blasint lda);
|
||||||
void cblas_zgeru(enum CBLAS_ORDER order, blasint M, blasint N, double *alpha, double *X, blasint incX, double *Y, blasint incY, double *A, blasint lda);
|
void cblas_zgeru(const enum CBLAS_ORDER order, const blasint M, const blasint N, const double *alpha, const double *X, const blasint incX, const double *Y, const blasint incY, double *A, const blasint lda);
|
||||||
void cblas_zgerc(enum CBLAS_ORDER order, blasint M, blasint N, double *alpha, double *X, blasint incX, double *Y, blasint incY, double *A, blasint lda);
|
void cblas_zgerc(const enum CBLAS_ORDER order, const blasint M, const blasint N, const double *alpha, const double *X, const blasint incX, const double *Y, const blasint incY, double *A, const blasint lda);
|
||||||
|
|
||||||
void cblas_strsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, float *A, blasint lda, float *X, blasint incX);
|
void cblas_strsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const blasint N, const float *A, const blasint lda, float *X, const blasint incX);
|
||||||
void cblas_dtrsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, double *A, blasint lda, double *X, blasint incX);
|
void cblas_dtrsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const blasint N, const double *A, const blasint lda, double *X, const blasint incX);
|
||||||
void cblas_ctrsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, float *A, blasint lda, float *X, blasint incX);
|
void cblas_ctrsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const blasint N, const float *A, const blasint lda, float *X, const blasint incX);
|
||||||
void cblas_ztrsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, double *A, blasint lda, double *X, blasint incX);
|
void cblas_ztrsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const blasint N, const double *A, const blasint lda, double *X, const blasint incX);
|
||||||
|
|
||||||
void cblas_strmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, float *A, blasint lda, float *X, blasint incX);
|
void cblas_strmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const blasint N, const float *A, const blasint lda, float *X, const blasint incX);
|
||||||
void cblas_dtrmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, double *A, blasint lda, double *X, blasint incX);
|
void cblas_dtrmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const blasint N, const double *A, const blasint lda, double *X, const blasint incX);
|
||||||
void cblas_ctrmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, float *A, blasint lda, float *X, blasint incX);
|
void cblas_ctrmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const blasint N, const float *A, const blasint lda, float *X, const blasint incX);
|
||||||
void cblas_ztrmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, double *A, blasint lda, double *X, blasint incX);
|
void cblas_ztrmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const blasint N, const double *A, const blasint lda, double *X, const blasint incX);
|
||||||
|
|
||||||
void cblas_ssyr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float alpha, float *X, blasint incX, float *A, blasint lda);
|
void cblas_ssyr(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const blasint N, const float alpha, const float *X, const blasint incX, float *A, const blasint lda);
|
||||||
void cblas_dsyr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *X, blasint incX, double *A, blasint lda);
|
void cblas_dsyr(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const blasint N, const double alpha, const double *X, const blasint incX, double *A, const blasint lda);
|
||||||
void cblas_cher(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float alpha, float *X, blasint incX, float *A, blasint lda);
|
void cblas_cher(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const blasint N, const float alpha, const float *X, const blasint incX, float *A, const blasint lda);
|
||||||
void cblas_zher(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *X, blasint incX, double *A, blasint lda);
|
void cblas_zher(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const blasint N, const double alpha, const double *X, const blasint incX, double *A, const blasint lda);
|
||||||
|
|
||||||
void cblas_ssyr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,blasint N, float alpha, float *X,
|
void cblas_ssyr2(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,const blasint N, const float alpha, const float *X,
|
||||||
blasint incX, float *Y, blasint incY, float *A, blasint lda);
|
const blasint incX, const float *Y, const blasint incY, float *A, const blasint lda);
|
||||||
void cblas_dsyr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *X,
|
void cblas_dsyr2(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const blasint N, const double alpha, const double *X,
|
||||||
blasint incX, double *Y, blasint incY, double *A, blasint lda);
|
const blasint incX, const double *Y, const blasint incY, double *A, const blasint lda);
|
||||||
void cblas_cher2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float *alpha, float *X, blasint incX,
|
void cblas_cher2(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const blasint N, const float *alpha, const float *X, const blasint incX,
|
||||||
float *Y, blasint incY, float *A, blasint lda);
|
const float *Y, const blasint incY, float *A, const blasint lda);
|
||||||
void cblas_zher2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double *alpha, double *X, blasint incX,
|
void cblas_zher2(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const blasint N, const double *alpha, const double *X, const blasint incX,
|
||||||
double *Y, blasint incY, double *A, blasint lda);
|
const double *Y, const blasint incY, double *A, const blasint lda);
|
||||||
|
|
||||||
void cblas_sgbmv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, blasint M, blasint N,
|
void cblas_sgbmv(const enum CBLAS_ORDER order, const enum CBLAS_TRANSPOSE TransA, const blasint M, const blasint N,
|
||||||
blasint KL, blasint KU, float alpha, float *A, blasint lda, float *X, blasint incX, float beta, float *Y, blasint incY);
|
const blasint KL, const blasint KU, const float alpha, const float *A, const blasint lda, const float *X, const blasint incX, const float beta, float *Y, const blasint incY);
|
||||||
void cblas_dgbmv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, blasint M, blasint N,
|
void cblas_dgbmv(const enum CBLAS_ORDER order, const enum CBLAS_TRANSPOSE TransA, const blasint M, const blasint N,
|
||||||
blasint KL, blasint KU, double alpha, double *A, blasint lda, double *X, blasint incX, double beta, double *Y, blasint incY);
|
const blasint KL, const blasint KU, const double alpha, const double *A, const blasint lda, const double *X, const blasint incX, const double beta, double *Y, const blasint incY);
|
||||||
void cblas_cgbmv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, blasint M, blasint N,
|
void cblas_cgbmv(const enum CBLAS_ORDER order, const enum CBLAS_TRANSPOSE TransA, const blasint M, const blasint N,
|
||||||
blasint KL, blasint KU, float *alpha, float *A, blasint lda, float *X, blasint incX, float *beta, float *Y, blasint incY);
|
const blasint KL, const blasint KU, const float *alpha, const float *A, const blasint lda, const float *X, const blasint incX, const float *beta, float *Y, const blasint incY);
|
||||||
void cblas_zgbmv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, blasint M, blasint N,
|
void cblas_zgbmv(const enum CBLAS_ORDER order, const enum CBLAS_TRANSPOSE TransA, const blasint M, const blasint N,
|
||||||
blasint KL, blasint KU, double *alpha, double *A, blasint lda, double *X, blasint incX, double *beta, double *Y, blasint incY);
|
const blasint KL, const blasint KU, const double *alpha, const double *A, const blasint lda, const double *X, const blasint incX, const double *beta, double *Y, const blasint incY);
|
||||||
|
|
||||||
void cblas_ssbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, blasint K, float alpha, float *A,
|
void cblas_ssbmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const blasint N, const blasint K, const float alpha, const float *A,
|
||||||
blasint lda, float *X, blasint incX, float beta, float *Y, blasint incY);
|
const blasint lda, const float *X, const blasint incX, const float beta, float *Y, const blasint incY);
|
||||||
void cblas_dsbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, blasint K, double alpha, double *A,
|
void cblas_dsbmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const blasint N, const blasint K, const double alpha, const double *A,
|
||||||
blasint lda, double *X, blasint incX, double beta, double *Y, blasint incY);
|
const blasint lda, const double *X, const blasint incX, const double beta, double *Y, const blasint incY);
|
||||||
|
|
||||||
|
|
||||||
void cblas_stbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
void cblas_stbmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
|
||||||
blasint N, blasint K, float *A, blasint lda, float *X, blasint incX);
|
const blasint N, const blasint K, const float *A, const blasint lda, float *X, const blasint incX);
|
||||||
void cblas_dtbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
void cblas_dtbmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
|
||||||
blasint N, blasint K, double *A, blasint lda, double *X, blasint incX);
|
const blasint N, const blasint K, const double *A, const blasint lda, double *X, const blasint incX);
|
||||||
void cblas_ctbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
void cblas_ctbmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
|
||||||
blasint N, blasint K, float *A, blasint lda, float *X, blasint incX);
|
const blasint N, const blasint K, const float *A, const blasint lda, float *X, const blasint incX);
|
||||||
void cblas_ztbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
void cblas_ztbmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
|
||||||
blasint N, blasint K, double *A, blasint lda, double *X, blasint incX);
|
const blasint N, const blasint K, const double *A, const blasint lda, double *X, const blasint incX);
|
||||||
|
|
||||||
void cblas_stbsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
void cblas_stbsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
|
||||||
blasint N, blasint K, float *A, blasint lda, float *X, blasint incX);
|
const blasint N, const blasint K, const float *A, const blasint lda, float *X, const blasint incX);
|
||||||
void cblas_dtbsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
void cblas_dtbsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
|
||||||
blasint N, blasint K, double *A, blasint lda, double *X, blasint incX);
|
const blasint N, const blasint K, const double *A, const blasint lda, double *X, const blasint incX);
|
||||||
void cblas_ctbsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
void cblas_ctbsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
|
||||||
blasint N, blasint K, float *A, blasint lda, float *X, blasint incX);
|
const blasint N, const blasint K, const float *A, const blasint lda, float *X, const blasint incX);
|
||||||
void cblas_ztbsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
void cblas_ztbsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
|
||||||
blasint N, blasint K, double *A, blasint lda, double *X, blasint incX);
|
const blasint N, const blasint K, const double *A, const blasint lda, double *X, const blasint incX);
|
||||||
|
|
||||||
void cblas_stpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
void cblas_stpmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
|
||||||
blasint N, float *Ap, float *X, blasint incX);
|
const blasint N, const float *Ap, float *X, const blasint incX);
|
||||||
void cblas_dtpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
void cblas_dtpmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
|
||||||
blasint N, double *Ap, double *X, blasint incX);
|
const blasint N, const double *Ap, double *X, const blasint incX);
|
||||||
void cblas_ctpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
void cblas_ctpmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
|
||||||
blasint N, float *Ap, float *X, blasint incX);
|
const blasint N, const float *Ap, float *X, const blasint incX);
|
||||||
void cblas_ztpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
void cblas_ztpmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
|
||||||
blasint N, double *Ap, double *X, blasint incX);
|
const blasint N, const double *Ap, double *X, const blasint incX);
|
||||||
|
|
||||||
void cblas_stpsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
void cblas_stpsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
|
||||||
blasint N, float *Ap, float *X, blasint incX);
|
const blasint N, const float *Ap, float *X, const blasint incX);
|
||||||
void cblas_dtpsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
void cblas_dtpsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
|
||||||
blasint N, double *Ap, double *X, blasint incX);
|
const blasint N, const double *Ap, double *X, const blasint incX);
|
||||||
void cblas_ctpsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
void cblas_ctpsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
|
||||||
blasint N, float *Ap, float *X, blasint incX);
|
const blasint N, const float *Ap, float *X, const blasint incX);
|
||||||
void cblas_ztpsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
|
void cblas_ztpsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
|
||||||
blasint N, double *Ap, double *X, blasint incX);
|
const blasint N, const double *Ap, double *X, const blasint incX);
|
||||||
|
|
||||||
void cblas_ssymv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float alpha, float *A,
|
void cblas_ssymv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const blasint N, const float alpha, const float *A,
|
||||||
blasint lda, float *X, blasint incX, float beta, float *Y, blasint incY);
|
const blasint lda, const float *X, const blasint incX, const float beta, float *Y, const blasint incY);
|
||||||
void cblas_dsymv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *A,
|
void cblas_dsymv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const blasint N, const double alpha, const double *A,
|
||||||
blasint lda, double *X, blasint incX, double beta, double *Y, blasint incY);
|
const blasint lda, const double *X, const blasint incX, const double beta, double *Y, const blasint incY);
|
||||||
void cblas_chemv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float *alpha, float *A,
|
void cblas_chemv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const blasint N, const float *alpha, const float *A,
|
||||||
blasint lda, float *X, blasint incX, float *beta, float *Y, blasint incY);
|
const blasint lda, const float *X, const blasint incX, const float *beta, float *Y, const blasint incY);
|
||||||
void cblas_zhemv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double *alpha, double *A,
|
void cblas_zhemv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const blasint N, const double *alpha, const double *A,
|
||||||
blasint lda, double *X, blasint incX, double *beta, double *Y, blasint incY);
|
const blasint lda, const double *X, const blasint incX, const double *beta, double *Y, const blasint incY);
|
||||||
|
|
||||||
|
|
||||||
void cblas_sspmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float alpha, float *Ap,
|
void cblas_sspmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const blasint N, const float alpha, const float *Ap,
|
||||||
float *X, blasint incX, float beta, float *Y, blasint incY);
|
const float *X, const blasint incX, const float beta, float *Y, const blasint incY);
|
||||||
void cblas_dspmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *Ap,
|
void cblas_dspmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const blasint N, const double alpha, const double *Ap,
|
||||||
double *X, blasint incX, double beta, double *Y, blasint incY);
|
const double *X, const blasint incX, const double beta, double *Y, const blasint incY);
|
||||||
|
|
||||||
void cblas_sspr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float alpha, float *X, blasint incX, float *Ap);
|
void cblas_sspr(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const blasint N, const float alpha, const float *X, const blasint incX, float *Ap);
|
||||||
void cblas_dspr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *X, blasint incX, double *Ap);
|
void cblas_dspr(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const blasint N, const double alpha, const double *X, const blasint incX, double *Ap);
|
||||||
|
|
||||||
void cblas_chpr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float alpha, float *X, blasint incX, float *A);
|
void cblas_chpr(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const blasint N, const float alpha, const float *X, const blasint incX, float *A);
|
||||||
void cblas_zhpr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *X,blasint incX, double *A);
|
void cblas_zhpr(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const blasint N, const double alpha, const double *X,const blasint incX, double *A);
|
||||||
|
|
||||||
void cblas_sspr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float alpha, float *X, blasint incX, float *Y, blasint incY, float *A);
|
void cblas_sspr2(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const blasint N, const float alpha, const float *X, const blasint incX, const float *Y, const blasint incY, float *A);
|
||||||
void cblas_dspr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *X, blasint incX, double *Y, blasint incY, double *A);
|
void cblas_dspr2(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const blasint N, const double alpha, const double *X, const blasint incX, const double *Y, const blasint incY, double *A);
|
||||||
void cblas_chpr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float *alpha, float *X, blasint incX, float *Y, blasint incY, float *Ap);
|
void cblas_chpr2(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const blasint N, const float *alpha, const float *X, const blasint incX, const float *Y, const blasint incY, float *Ap);
|
||||||
void cblas_zhpr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double *alpha, double *X, blasint incX, double *Y, blasint incY, double *Ap);
|
void cblas_zhpr2(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const blasint N, const double *alpha, const double *X, const blasint incX, const double *Y, const blasint incY, double *Ap);
|
||||||
|
|
||||||
void cblas_chbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, blasint K,
|
void cblas_chbmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const blasint N, const blasint K,
|
||||||
float *alpha, float *A, blasint lda, float *X, blasint incX, float *beta, float *Y, blasint incY);
|
const float *alpha, const float *A, const blasint lda, const float *X, const blasint incX, const float *beta, float *Y, const blasint incY);
|
||||||
void cblas_zhbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, blasint K,
|
void cblas_zhbmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const blasint N, const blasint K,
|
||||||
double *alpha, double *A, blasint lda, double *X, blasint incX, double *beta, double *Y, blasint incY);
|
const double *alpha, const double *A, const blasint lda, const double *X, const blasint incX, const double *beta, double *Y, const blasint incY);
|
||||||
|
|
||||||
void cblas_chpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N,
|
void cblas_chpmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const blasint N,
|
||||||
float *alpha, float *Ap, float *X, blasint incX, float *beta, float *Y, blasint incY);
|
const float *alpha, const float *Ap, const float *X, const blasint incX, const float *beta, float *Y, const blasint incY);
|
||||||
void cblas_zhpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N,
|
void cblas_zhpmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const blasint N,
|
||||||
double *alpha, double *Ap, double *X, blasint incX, double *beta, double *Y, blasint incY);
|
const double *alpha, const double *Ap, const double *X, const blasint incX, const double *beta, double *Y, const blasint incY);
|
||||||
|
|
||||||
void cblas_sgemm(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, blasint M, blasint N, blasint K,
|
void cblas_sgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const blasint M, const blasint N, const blasint K,
|
||||||
float alpha, float *A, blasint lda, float *B, blasint ldb, float beta, float *C, blasint ldc);
|
const float alpha, const float *A, const blasint lda, const float *B, const blasint ldb, const float beta, float *C, const blasint ldc);
|
||||||
void cblas_dgemm(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, blasint M, blasint N, blasint K,
|
void cblas_dgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const blasint M, const blasint N, const blasint K,
|
||||||
double alpha, double *A, blasint lda, double *B, blasint ldb, double beta, double *C, blasint ldc);
|
const double alpha, const double *A, const blasint lda, const double *B, const blasint ldb, const double beta, double *C, const blasint ldc);
|
||||||
void cblas_cgemm(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, blasint M, blasint N, blasint K,
|
void cblas_cgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const blasint M, const blasint N, const blasint K,
|
||||||
float *alpha, float *A, blasint lda, float *B, blasint ldb, float *beta, float *C, blasint ldc);
|
const float *alpha, const float *A, const blasint lda, const float *B, const blasint ldb, const float *beta, float *C, const blasint ldc);
|
||||||
void cblas_zgemm(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, blasint M, blasint N, blasint K,
|
void cblas_zgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const blasint M, const blasint N, const blasint K,
|
||||||
double *alpha, double *A, blasint lda, double *B, blasint ldb, double *beta, double *C, blasint ldc);
|
const double *alpha, const double *A, const blasint lda, const double *B, const blasint ldb, const double *beta, double *C, const blasint ldc);
|
||||||
|
|
||||||
void cblas_ssymm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, blasint M, blasint N,
|
void cblas_ssymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const blasint M, const blasint N,
|
||||||
float alpha, float *A, blasint lda, float *B, blasint ldb, float beta, float *C, blasint ldc);
|
const float alpha, const float *A, const blasint lda, const float *B, const blasint ldb, const float beta, float *C, const blasint ldc);
|
||||||
void cblas_dsymm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, blasint M, blasint N,
|
void cblas_dsymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const blasint M, const blasint N,
|
||||||
double alpha, double *A, blasint lda, double *B, blasint ldb, double beta, double *C, blasint ldc);
|
const double alpha, const double *A, const blasint lda, const double *B, const blasint ldb, const double beta, double *C, const blasint ldc);
|
||||||
void cblas_csymm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, blasint M, blasint N,
|
void cblas_csymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const blasint M, const blasint N,
|
||||||
float *alpha, float *A, blasint lda, float *B, blasint ldb, float *beta, float *C, blasint ldc);
|
const float *alpha, const float *A, const blasint lda, const float *B, const blasint ldb, const float *beta, float *C, const blasint ldc);
|
||||||
void cblas_zsymm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, blasint M, blasint N,
|
void cblas_zsymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const blasint M, const blasint N,
|
||||||
double *alpha, double *A, blasint lda, double *B, blasint ldb, double *beta, double *C, blasint ldc);
|
const double *alpha, const double *A, const blasint lda, const double *B, const blasint ldb, const double *beta, double *C, const blasint ldc);
|
||||||
|
|
||||||
void cblas_ssyrk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans,
|
void cblas_ssyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans,
|
||||||
blasint N, blasint K, float alpha, float *A, blasint lda, float beta, float *C, blasint ldc);
|
const blasint N, const blasint K, const float alpha, const float *A, const blasint lda, const float beta, float *C, const blasint ldc);
|
||||||
void cblas_dsyrk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans,
|
void cblas_dsyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans,
|
||||||
blasint N, blasint K, double alpha, double *A, blasint lda, double beta, double *C, blasint ldc);
|
const blasint N, const blasint K, const double alpha, const double *A, const blasint lda, const double beta, double *C, const blasint ldc);
|
||||||
void cblas_csyrk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans,
|
void cblas_csyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans,
|
||||||
blasint N, blasint K, float *alpha, float *A, blasint lda, float *beta, float *C, blasint ldc);
|
const blasint N, const blasint K, const float *alpha, const float *A, const blasint lda, const float *beta, float *C, const blasint ldc);
|
||||||
void cblas_zsyrk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans,
|
void cblas_zsyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans,
|
||||||
blasint N, blasint K, double *alpha, double *A, blasint lda, double *beta, double *C, blasint ldc);
|
const blasint N, const blasint K, const double *alpha, const double *A, const blasint lda, const double *beta, double *C, const blasint ldc);
|
||||||
|
|
||||||
void cblas_ssyr2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans,
|
void cblas_ssyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans,
|
||||||
blasint N, blasint K, float alpha, float *A, blasint lda, float *B, blasint ldb, float beta, float *C, blasint ldc);
|
const blasint N, const blasint K, const float alpha, const float *A, const blasint lda, const float *B, const blasint ldb, const float beta, float *C, const blasint ldc);
|
||||||
void cblas_dsyr2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans,
|
void cblas_dsyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans,
|
||||||
blasint N, blasint K, double alpha, double *A, blasint lda, double *B, blasint ldb, double beta, double *C, blasint ldc);
|
const blasint N, const blasint K, const double alpha, const double *A, const blasint lda, const double *B, const blasint ldb, const double beta, double *C, const blasint ldc);
|
||||||
void cblas_csyr2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans,
|
void cblas_csyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans,
|
||||||
blasint N, blasint K, float *alpha, float *A, blasint lda, float *B, blasint ldb, float *beta, float *C, blasint ldc);
|
const blasint N, const blasint K, const float *alpha, const float *A, const blasint lda, const float *B, const blasint ldb, const float *beta, float *C, const blasint ldc);
|
||||||
void cblas_zsyr2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans,
|
void cblas_zsyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans,
|
||||||
blasint N, blasint K, double *alpha, double *A, blasint lda, double *B, blasint ldb, double *beta, double *C, blasint ldc);
|
const blasint N, const blasint K, const double *alpha, const double *A, const blasint lda, const double *B, const blasint ldb, const double *beta, double *C, const blasint ldc);
|
||||||
|
|
||||||
void cblas_strmm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA,
|
void cblas_strmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA,
|
||||||
enum CBLAS_DIAG Diag, blasint M, blasint N, float alpha, float *A, blasint lda, float *B, blasint ldb);
|
const enum CBLAS_DIAG Diag, const blasint M, const blasint N, const float alpha, const float *A, const blasint lda, float *B, const blasint ldb);
|
||||||
void cblas_dtrmm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA,
|
void cblas_dtrmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA,
|
||||||
enum CBLAS_DIAG Diag, blasint M, blasint N, double alpha, double *A, blasint lda, double *B, blasint ldb);
|
const enum CBLAS_DIAG Diag, const blasint M, const blasint N, const double alpha, const double *A, const blasint lda, double *B, const blasint ldb);
|
||||||
void cblas_ctrmm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA,
|
void cblas_ctrmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA,
|
||||||
enum CBLAS_DIAG Diag, blasint M, blasint N, float *alpha, float *A, blasint lda, float *B, blasint ldb);
|
const enum CBLAS_DIAG Diag, const blasint M, const blasint N, const float *alpha, const float *A, const blasint lda, float *B, const blasint ldb);
|
||||||
void cblas_ztrmm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA,
|
void cblas_ztrmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA,
|
||||||
enum CBLAS_DIAG Diag, blasint M, blasint N, double *alpha, double *A, blasint lda, double *B, blasint ldb);
|
const enum CBLAS_DIAG Diag, const blasint M, const blasint N, const double *alpha, const double *A, const blasint lda, double *B, const blasint ldb);
|
||||||
|
|
||||||
void cblas_strsm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA,
|
void cblas_strsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA,
|
||||||
enum CBLAS_DIAG Diag, blasint M, blasint N, float alpha, float *A, blasint lda, float *B, blasint ldb);
|
const enum CBLAS_DIAG Diag, const blasint M, const blasint N, const float alpha, const float *A, const blasint lda, float *B, const blasint ldb);
|
||||||
void cblas_dtrsm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA,
|
void cblas_dtrsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA,
|
||||||
enum CBLAS_DIAG Diag, blasint M, blasint N, double alpha, double *A, blasint lda, double *B, blasint ldb);
|
const enum CBLAS_DIAG Diag, const blasint M, const blasint N, const double alpha, const double *A, const blasint lda, double *B, const blasint ldb);
|
||||||
void cblas_ctrsm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA,
|
void cblas_ctrsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA,
|
||||||
enum CBLAS_DIAG Diag, blasint M, blasint N, float *alpha, float *A, blasint lda, float *B, blasint ldb);
|
const enum CBLAS_DIAG Diag, const blasint M, const blasint N, const float *alpha, const float *A, const blasint lda, float *B, const blasint ldb);
|
||||||
void cblas_ztrsm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA,
|
void cblas_ztrsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA,
|
||||||
enum CBLAS_DIAG Diag, blasint M, blasint N, double *alpha, double *A, blasint lda, double *B, blasint ldb);
|
const enum CBLAS_DIAG Diag, const blasint M, const blasint N, const double *alpha, const double *A, const blasint lda, double *B, const blasint ldb);
|
||||||
|
|
||||||
void cblas_chemm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, blasint M, blasint N,
|
void cblas_chemm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const blasint M, const blasint N,
|
||||||
float *alpha, float *A, blasint lda, float *B, blasint ldb, float *beta, float *C, blasint ldc);
|
const float *alpha, const float *A, const blasint lda, const float *B, const blasint ldb, const float *beta, float *C, const blasint ldc);
|
||||||
void cblas_zhemm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, blasint M, blasint N,
|
void cblas_zhemm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const blasint M, const blasint N,
|
||||||
double *alpha, double *A, blasint lda, double *B, blasint ldb, double *beta, double *C, blasint ldc);
|
const double *alpha, const double *A, const blasint lda, const double *B, const blasint ldb, const double *beta, double *C, const blasint ldc);
|
||||||
|
|
||||||
void cblas_cherk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans, blasint N, blasint K,
|
void cblas_cherk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const blasint N, const blasint K,
|
||||||
float alpha, float *A, blasint lda, float beta, float *C, blasint ldc);
|
const float alpha, const float *A, const blasint lda, const float beta, float *C, const blasint ldc);
|
||||||
void cblas_zherk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans, blasint N, blasint K,
|
void cblas_zherk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const blasint N, const blasint K,
|
||||||
double alpha, double *A, blasint lda, double beta, double *C, blasint ldc);
|
const double alpha, const double *A, const blasint lda, const double beta, double *C, const blasint ldc);
|
||||||
|
|
||||||
void cblas_cher2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans, blasint N, blasint K,
|
void cblas_cher2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const blasint N, const blasint K,
|
||||||
float *alpha, float *A, blasint lda, float *B, blasint ldb, float beta, float *C, blasint ldc);
|
const float *alpha, const float *A, const blasint lda, const float *B, const blasint ldb, const float beta, float *C, const blasint ldc);
|
||||||
void cblas_zher2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans, blasint N, blasint K,
|
void cblas_zher2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const blasint N, const blasint K,
|
||||||
double *alpha, double *A, blasint lda, double *B, blasint ldb, double beta, double *C, blasint ldc);
|
const double *alpha, const double *A, const blasint lda, const double *B, const blasint ldb, const double beta, double *C, const blasint ldc);
|
||||||
|
|
||||||
void cblas_xerbla(blasint p, char *rout, char *form, ...);
|
void cblas_xerbla(blasint p, char *rout, char *form, ...);
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif /* __cplusplus */
|
#endif /* __cplusplus */
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
6
common.h
6
common.h
|
@ -390,7 +390,8 @@ typedef int blasint;
|
||||||
/* C99 supports complex floating numbers natively, which GCC also offers as an
|
/* C99 supports complex floating numbers natively, which GCC also offers as an
|
||||||
extension since version 3.0. If neither are available, use a compatible
|
extension since version 3.0. If neither are available, use a compatible
|
||||||
structure as fallback (see Clause 6.2.5.13 of the C99 standard). */
|
structure as fallback (see Clause 6.2.5.13 of the C99 standard). */
|
||||||
#if defined(__STDC_IEC_559_COMPLEX__) || __STDC_VERSION__ >= 199901L || __GNUC__ >= 3
|
#if (defined(__STDC_IEC_559_COMPLEX__) || __STDC_VERSION__ >= 199901L || \
|
||||||
|
(__GNUC__ >= 3 && !defined(__cplusplus)))
|
||||||
#define OPENBLAS_COMPLEX_C99
|
#define OPENBLAS_COMPLEX_C99
|
||||||
typedef float _Complex openblas_complex_float;
|
typedef float _Complex openblas_complex_float;
|
||||||
typedef double _Complex openblas_complex_double;
|
typedef double _Complex openblas_complex_double;
|
||||||
|
@ -557,7 +558,8 @@ typedef struct {
|
||||||
#include "common_level3.h"
|
#include "common_level3.h"
|
||||||
#include "common_lapack.h"
|
#include "common_lapack.h"
|
||||||
#ifdef CBLAS
|
#ifdef CBLAS
|
||||||
#include "cblas.h"
|
/* This header file is generated from "cblas.h" (see Makefile.prebuild). */
|
||||||
|
#include "cblas_noconst.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef ASSEMBLER
|
#ifndef ASSEMBLER
|
||||||
|
|
10
cpuid_x86.c
10
cpuid_x86.c
|
@ -118,8 +118,9 @@ static inline int have_excpuid(void){
|
||||||
|
|
||||||
#ifndef NO_AVX
|
#ifndef NO_AVX
|
||||||
static inline void xgetbv(int op, int * eax, int * edx){
|
static inline void xgetbv(int op, int * eax, int * edx){
|
||||||
|
//Use binary code for xgetbv
|
||||||
__asm__ __volatile__
|
__asm__ __volatile__
|
||||||
("xgetbv": "=a" (*eax), "=d" (*edx) : "c" (op) : "cc");
|
(".byte 0x0f, 0x01, 0xd0": "=a" (*eax), "=d" (*edx) : "c" (op) : "cc");
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -1035,6 +1036,8 @@ int get_cpuname(void){
|
||||||
return CPUTYPE_SANDYBRIDGE;
|
return CPUTYPE_SANDYBRIDGE;
|
||||||
else
|
else
|
||||||
return CPUTYPE_NEHALEM;
|
return CPUTYPE_NEHALEM;
|
||||||
|
case 14:
|
||||||
|
// Xeon E7540
|
||||||
case 15:
|
case 15:
|
||||||
//Xeon Processor E7 (Westmere-EX)
|
//Xeon Processor E7 (Westmere-EX)
|
||||||
return CPUTYPE_NEHALEM;
|
return CPUTYPE_NEHALEM;
|
||||||
|
@ -1407,6 +1410,8 @@ int get_coretype(void){
|
||||||
return CORE_SANDYBRIDGE;
|
return CORE_SANDYBRIDGE;
|
||||||
else
|
else
|
||||||
return CORE_NEHALEM; //OS doesn't support AVX
|
return CORE_NEHALEM; //OS doesn't support AVX
|
||||||
|
case 14:
|
||||||
|
//Xeon E7540
|
||||||
case 15:
|
case 15:
|
||||||
//Xeon Processor E7 (Westmere-EX)
|
//Xeon Processor E7 (Westmere-EX)
|
||||||
return CORE_NEHALEM;
|
return CORE_NEHALEM;
|
||||||
|
@ -1508,6 +1513,9 @@ void get_cpuconfig(void){
|
||||||
printf("#define DTB_SIZE %d\n", info.size * 1024);
|
printf("#define DTB_SIZE %d\n", info.size * 1024);
|
||||||
printf("#define DTB_ASSOCIATIVE %d\n", info.associative);
|
printf("#define DTB_ASSOCIATIVE %d\n", info.associative);
|
||||||
printf("#define DTB_DEFAULT_ENTRIES %d\n", info.linesize);
|
printf("#define DTB_DEFAULT_ENTRIES %d\n", info.linesize);
|
||||||
|
} else {
|
||||||
|
//fall back for some virtual machines.
|
||||||
|
printf("#define DTB_DEFAULT_ENTRIES 32\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
features = get_cputype(GET_FEATURE);
|
features = get_cputype(GET_FEATURE);
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
TOPDIR = ../..
|
TOPDIR = ../..
|
||||||
include ../../Makefile.system
|
include ../../Makefile.system
|
||||||
|
|
||||||
COMMONOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) c_abs.$(SUFFIX) z_abs.$(SUFFIX) openblas_set_num_threads.$(SUFFIX)
|
COMMONOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) c_abs.$(SUFFIX) z_abs.$(SUFFIX) openblas_set_num_threads.$(SUFFIX) openblas_get_config.$(SUFFIX)
|
||||||
|
|
||||||
COMMONOBJS += slamch.$(SUFFIX) slamc3.$(SUFFIX) dlamch.$(SUFFIX) dlamc3.$(SUFFIX)
|
COMMONOBJS += slamch.$(SUFFIX) slamc3.$(SUFFIX) dlamch.$(SUFFIX) dlamc3.$(SUFFIX)
|
||||||
|
|
||||||
|
@ -103,6 +103,9 @@ blas_server.$(SUFFIX) : $(BLAS_SERVER) ../../common.h ../../common_thread.h ../.
|
||||||
openblas_set_num_threads.$(SUFFIX) : openblas_set_num_threads.c
|
openblas_set_num_threads.$(SUFFIX) : openblas_set_num_threads.c
|
||||||
$(CC) $(CFLAGS) -c $< -o $(@F)
|
$(CC) $(CFLAGS) -c $< -o $(@F)
|
||||||
|
|
||||||
|
openblas_get_config.$(SUFFIX) : openblas_get_config.c
|
||||||
|
$(CC) $(CFLAGS) -c $< -o $(@F)
|
||||||
|
|
||||||
blasL1thread.$(SUFFIX) : blas_l1_thread.c ../../common.h ../../common_thread.h
|
blasL1thread.$(SUFFIX) : blas_l1_thread.c ../../common.h ../../common_thread.h
|
||||||
$(CC) $(CFLAGS) -c $< -o $(@F)
|
$(CC) $(CFLAGS) -c $< -o $(@F)
|
||||||
|
|
||||||
|
|
|
@ -80,8 +80,9 @@ extern gotoblas_t gotoblas_BULLDOZER;
|
||||||
|
|
||||||
#ifndef NO_AVX
|
#ifndef NO_AVX
|
||||||
static inline void xgetbv(int op, int * eax, int * edx){
|
static inline void xgetbv(int op, int * eax, int * edx){
|
||||||
|
//Use binary code for xgetbv
|
||||||
__asm__ __volatile__
|
__asm__ __volatile__
|
||||||
("xgetbv": "=a" (*eax), "=d" (*edx) : "c" (op) : "cc");
|
(".byte 0x0f, 0x01, 0xd0": "=a" (*eax), "=d" (*edx) : "c" (op) : "cc");
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -165,7 +166,8 @@ static gotoblas_t *get_coretype(void){
|
||||||
|
|
||||||
//Intel Xeon Processor 5600 (Westmere-EP)
|
//Intel Xeon Processor 5600 (Westmere-EP)
|
||||||
//Xeon Processor E7 (Westmere-EX)
|
//Xeon Processor E7 (Westmere-EX)
|
||||||
if (model == 12 || model == 15) return &gotoblas_NEHALEM;
|
//Xeon E7540
|
||||||
|
if (model == 12 || model == 14 || model == 15) return &gotoblas_NEHALEM;
|
||||||
|
|
||||||
//Intel Core i5-2000 /i7-2000 (Sandy Bridge)
|
//Intel Core i5-2000 /i7-2000 (Sandy Bridge)
|
||||||
//Intel Core i7-3000 / Xeon E5
|
//Intel Core i7-3000 / Xeon E5
|
||||||
|
@ -285,6 +287,15 @@ void gotoblas_dynamic_init(void) {
|
||||||
if (gotoblas == NULL) gotoblas = &gotoblas_KATMAI;
|
if (gotoblas == NULL) gotoblas = &gotoblas_KATMAI;
|
||||||
#else
|
#else
|
||||||
if (gotoblas == NULL) gotoblas = &gotoblas_PRESCOTT;
|
if (gotoblas == NULL) gotoblas = &gotoblas_PRESCOTT;
|
||||||
|
/* sanity check, if 64bit pointer we can't have a 32 bit cpu */
|
||||||
|
if (sizeof(void*) == 8) {
|
||||||
|
if (gotoblas == &gotoblas_KATMAI ||
|
||||||
|
gotoblas == &gotoblas_COPPERMINE ||
|
||||||
|
gotoblas == &gotoblas_NORTHWOOD ||
|
||||||
|
gotoblas == &gotoblas_BANIAS ||
|
||||||
|
gotoblas == &gotoblas_ATHLON)
|
||||||
|
gotoblas = &gotoblas_PRESCOTT;
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (gotoblas && gotoblas -> init) {
|
if (gotoblas && gotoblas -> init) {
|
||||||
|
|
|
@ -0,0 +1,59 @@
|
||||||
|
/*****************************************************************************
|
||||||
|
Copyright (c) 2011,2012 Lab of Parallel Software and Computational Science,ISCAS
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
|
||||||
|
1. Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in
|
||||||
|
the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
3. Neither the name of the ISCAS nor the names of its contributors may
|
||||||
|
be used to endorse or promote products derived from this software
|
||||||
|
without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||||
|
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||||
|
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
**********************************************************************************/
|
||||||
|
|
||||||
|
#include "common.h"
|
||||||
|
|
||||||
|
static char* openblas_config_str=""
|
||||||
|
#ifdef USE64BITINT
|
||||||
|
"USE64BITINT "
|
||||||
|
#endif
|
||||||
|
#ifdef NO_CBLAS
|
||||||
|
"NO_CBLAS "
|
||||||
|
#endif
|
||||||
|
#ifdef NO_LAPACK
|
||||||
|
"NO_LAPACK "
|
||||||
|
#endif
|
||||||
|
#ifdef NO_LAPACKE
|
||||||
|
"NO_LAPACKE "
|
||||||
|
#endif
|
||||||
|
#ifdef DYNAMIC_ARCH
|
||||||
|
"DYNAMIC_ARCH "
|
||||||
|
#endif
|
||||||
|
#ifdef NO_AFFINITY
|
||||||
|
"NO_AFFINITY "
|
||||||
|
#endif
|
||||||
|
;
|
||||||
|
|
||||||
|
char* CNAME() {
|
||||||
|
return openblas_config_str;
|
||||||
|
}
|
||||||
|
|
|
@ -74,6 +74,7 @@
|
||||||
|
|
||||||
@misc_no_underscore_objs = (
|
@misc_no_underscore_objs = (
|
||||||
openblas_set_num_threads, goto_set_num_threads,
|
openblas_set_num_threads, goto_set_num_threads,
|
||||||
|
openblas_get_config,
|
||||||
);
|
);
|
||||||
|
|
||||||
@misc_underscore_objs = (
|
@misc_underscore_objs = (
|
||||||
|
|
|
@ -34,7 +34,7 @@ int main(int argc, char **argv) {
|
||||||
#ifdef USE64BITINT
|
#ifdef USE64BITINT
|
||||||
printf("#define USE64BITINT\n");
|
printf("#define USE64BITINT\n");
|
||||||
#endif
|
#endif
|
||||||
printf("#define GEMM_MULTITHREAD_THRESHOLD\t%ld\n", GEMM_MULTITHREAD_THRESHOLD);
|
printf("#define GEMM_MULTITHREAD_THRESHOLD\t%ld\n", (long int)GEMM_MULTITHREAD_THRESHOLD);
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
|
@ -634,10 +634,10 @@ static void init_parameter(void) {
|
||||||
TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
|
TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(CORE_KATMAI) || defined(CORE_COPPERMINE) || defined(CORE_BANIAS) || defined(CORE_YONAH)
|
#if defined(CORE_KATMAI) || defined(CORE_COPPERMINE) || defined(CORE_BANIAS) || defined(CORE_YONAH) || defined(CORE_ATHLON)
|
||||||
|
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
fprintf(stderr, "Katmai, Coppermine, Banias\n");
|
fprintf(stderr, "Katmai, Coppermine, Banias, Athlon\n");
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
TABLE_NAME.sgemm_p = 64 * (l2 >> 7);
|
TABLE_NAME.sgemm_p = 64 * (l2 >> 7);
|
||||||
|
|
|
@ -89,17 +89,22 @@
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define STACKSIZE 16
|
#define STACKSIZE 16
|
||||||
|
#define ARGS 16
|
||||||
|
|
||||||
#define M 4 + STACKSIZE(%esp)
|
#define M 4 + STACKSIZE+ARGS(%esp)
|
||||||
#define N 8 + STACKSIZE(%esp)
|
#define N 8 + STACKSIZE+ARGS(%esp)
|
||||||
#define ALPHA 16 + STACKSIZE(%esp)
|
#define ALPHA 16 + STACKSIZE+ARGS(%esp)
|
||||||
#define A 20 + STACKSIZE(%esp)
|
#define A 20 + STACKSIZE+ARGS(%esp)
|
||||||
#define STACK_LDA 24 + STACKSIZE(%esp)
|
#define STACK_LDA 24 + STACKSIZE+ARGS(%esp)
|
||||||
#define STACK_X 28 + STACKSIZE(%esp)
|
#define STACK_X 28 + STACKSIZE+ARGS(%esp)
|
||||||
#define STACK_INCX 32 + STACKSIZE(%esp)
|
#define STACK_INCX 32 + STACKSIZE+ARGS(%esp)
|
||||||
#define Y 36 + STACKSIZE(%esp)
|
#define Y 36 + STACKSIZE+ARGS(%esp)
|
||||||
#define STACK_INCY 40 + STACKSIZE(%esp)
|
#define STACK_INCY 40 + STACKSIZE+ARGS(%esp)
|
||||||
#define BUFFER 44 + STACKSIZE(%esp)
|
#define BUFFER 44 + STACKSIZE+ARGS(%esp)
|
||||||
|
#define MMM 0+ARGS(%esp)
|
||||||
|
#define YY 4+ARGS(%esp)
|
||||||
|
#define AA 8+ARGS(%esp)
|
||||||
|
#define LDAX 12+ARGS(%esp)
|
||||||
|
|
||||||
#define I %eax
|
#define I %eax
|
||||||
#define J %ebx
|
#define J %ebx
|
||||||
|
@ -114,6 +119,7 @@
|
||||||
|
|
||||||
PROLOGUE
|
PROLOGUE
|
||||||
|
|
||||||
|
subl $ARGS,%esp
|
||||||
pushl %ebp
|
pushl %ebp
|
||||||
pushl %edi
|
pushl %edi
|
||||||
pushl %esi
|
pushl %esi
|
||||||
|
@ -121,7 +127,34 @@
|
||||||
|
|
||||||
PROFCODE
|
PROFCODE
|
||||||
|
|
||||||
|
movl Y,J
|
||||||
|
movl J,YY # backup Y
|
||||||
|
movl A,J
|
||||||
|
movl J,AA # backup A
|
||||||
|
movl M,J
|
||||||
|
movl J,MMM # backup MM
|
||||||
|
.L0t:
|
||||||
|
xorl J,J
|
||||||
|
addl $1,J
|
||||||
|
sall $21,J
|
||||||
|
subl J,MMM
|
||||||
|
movl J,M
|
||||||
|
jge .L00t
|
||||||
|
ALIGN_4
|
||||||
|
|
||||||
|
movl MMM,%eax
|
||||||
|
addl J,%eax
|
||||||
|
jle .L999x
|
||||||
|
movl %eax,M
|
||||||
|
|
||||||
|
.L00t:
|
||||||
|
movl AA,%eax
|
||||||
|
movl %eax,A
|
||||||
|
|
||||||
|
movl YY,J
|
||||||
|
movl J,Y
|
||||||
movl STACK_LDA, LDA
|
movl STACK_LDA, LDA
|
||||||
|
|
||||||
movl STACK_X, X
|
movl STACK_X, X
|
||||||
movl STACK_INCX, INCX
|
movl STACK_INCX, INCX
|
||||||
|
|
||||||
|
@ -651,12 +684,22 @@
|
||||||
addss 0 * SIZE(X), %xmm0
|
addss 0 * SIZE(X), %xmm0
|
||||||
movss %xmm0, (Y1)
|
movss %xmm0, (Y1)
|
||||||
ALIGN_3
|
ALIGN_3
|
||||||
|
|
||||||
.L999:
|
.L999:
|
||||||
|
movl M,J
|
||||||
|
leal (,J,SIZE),%eax
|
||||||
|
addl %eax,AA
|
||||||
|
movl YY,J
|
||||||
|
addl %eax,J
|
||||||
|
movl J,YY
|
||||||
|
jmp .L0t
|
||||||
|
ALIGN_4
|
||||||
|
|
||||||
|
.L999x:
|
||||||
popl %ebx
|
popl %ebx
|
||||||
popl %esi
|
popl %esi
|
||||||
popl %edi
|
popl %edi
|
||||||
popl %ebp
|
popl %ebp
|
||||||
|
addl $ARGS,%esp
|
||||||
ret
|
ret
|
||||||
|
|
||||||
EPILOGUE
|
EPILOGUE
|
||||||
|
|
|
@ -76,17 +76,22 @@
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define STACKSIZE 16
|
#define STACKSIZE 16
|
||||||
|
#define ARGS 16
|
||||||
|
|
||||||
#define M 4 + STACKSIZE(%esp)
|
#define M 4 + STACKSIZE+ARGS(%esp)
|
||||||
#define N 8 + STACKSIZE(%esp)
|
#define N 8 + STACKSIZE+ARGS(%esp)
|
||||||
#define ALPHA 16 + STACKSIZE(%esp)
|
#define ALPHA 16 + STACKSIZE+ARGS(%esp)
|
||||||
#define A 24 + STACKSIZE(%esp)
|
#define A 24 + STACKSIZE+ARGS(%esp)
|
||||||
#define STACK_LDA 28 + STACKSIZE(%esp)
|
#define STACK_LDA 28 + STACKSIZE+ARGS(%esp)
|
||||||
#define STACK_X 32 + STACKSIZE(%esp)
|
#define STACK_X 32 + STACKSIZE+ARGS(%esp)
|
||||||
#define STACK_INCX 36 + STACKSIZE(%esp)
|
#define STACK_INCX 36 + STACKSIZE+ARGS(%esp)
|
||||||
#define Y 40 + STACKSIZE(%esp)
|
#define Y 40 + STACKSIZE+ARGS(%esp)
|
||||||
#define STACK_INCY 44 + STACKSIZE(%esp)
|
#define STACK_INCY 44 + STACKSIZE+ARGS(%esp)
|
||||||
#define BUFFER 48 + STACKSIZE(%esp)
|
#define BUFFER 48 + STACKSIZE+ARGS(%esp)
|
||||||
|
|
||||||
|
#define MMM 0+ARGS(%esp)
|
||||||
|
#define YY 4+ARGS(%esp)
|
||||||
|
#define AA 8+ARGS(%esp)
|
||||||
|
|
||||||
#define I %eax
|
#define I %eax
|
||||||
#define J %ebx
|
#define J %ebx
|
||||||
|
@ -101,6 +106,8 @@
|
||||||
|
|
||||||
PROLOGUE
|
PROLOGUE
|
||||||
|
|
||||||
|
|
||||||
|
subl $ARGS,%esp
|
||||||
pushl %ebp
|
pushl %ebp
|
||||||
pushl %edi
|
pushl %edi
|
||||||
pushl %esi
|
pushl %esi
|
||||||
|
@ -108,6 +115,33 @@
|
||||||
|
|
||||||
PROFCODE
|
PROFCODE
|
||||||
|
|
||||||
|
movl Y,J
|
||||||
|
movl J,YY # backup Y
|
||||||
|
movl A,J
|
||||||
|
movl J,AA # backup A
|
||||||
|
movl M,J
|
||||||
|
movl J,MMM # backup MM
|
||||||
|
.L0t:
|
||||||
|
xorl J,J
|
||||||
|
addl $1,J
|
||||||
|
sall $20,J
|
||||||
|
subl J,MMM
|
||||||
|
movl J,M
|
||||||
|
jge .L00t
|
||||||
|
ALIGN_4
|
||||||
|
|
||||||
|
movl MMM,%eax
|
||||||
|
addl J,%eax
|
||||||
|
jle .L999x
|
||||||
|
movl %eax,M
|
||||||
|
|
||||||
|
.L00t:
|
||||||
|
movl AA,%eax
|
||||||
|
movl %eax,A
|
||||||
|
|
||||||
|
movl YY,J
|
||||||
|
movl J,Y
|
||||||
|
|
||||||
movl STACK_LDA, LDA
|
movl STACK_LDA, LDA
|
||||||
movl STACK_X, X
|
movl STACK_X, X
|
||||||
movl STACK_INCX, INCX
|
movl STACK_INCX, INCX
|
||||||
|
@ -677,10 +711,22 @@
|
||||||
ALIGN_3
|
ALIGN_3
|
||||||
|
|
||||||
.L999:
|
.L999:
|
||||||
|
movl M,J
|
||||||
|
leal (,J,SIZE),%eax
|
||||||
|
addl %eax,AA
|
||||||
|
movl YY,J
|
||||||
|
addl %eax,J
|
||||||
|
movl J,YY
|
||||||
|
jmp .L0t
|
||||||
|
ALIGN_4
|
||||||
|
|
||||||
|
.L999x:
|
||||||
|
|
||||||
popl %ebx
|
popl %ebx
|
||||||
popl %esi
|
popl %esi
|
||||||
popl %edi
|
popl %edi
|
||||||
popl %ebp
|
popl %ebp
|
||||||
|
addl $ARGS,%esp
|
||||||
ret
|
ret
|
||||||
|
|
||||||
EPILOGUE
|
EPILOGUE
|
||||||
|
|
|
@ -89,17 +89,24 @@
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define STACKSIZE 16
|
#define STACKSIZE 16
|
||||||
|
#define ARGS 20
|
||||||
|
|
||||||
#define M 4 + STACKSIZE(%esp)
|
#define M 4 + STACKSIZE+ARGS(%esp)
|
||||||
#define N 8 + STACKSIZE(%esp)
|
#define N 8 + STACKSIZE+ARGS(%esp)
|
||||||
#define ALPHA 16 + STACKSIZE(%esp)
|
#define ALPHA 16 + STACKSIZE+ARGS(%esp)
|
||||||
#define A 20 + STACKSIZE(%esp)
|
#define A 20 + STACKSIZE+ARGS(%esp)
|
||||||
#define STACK_LDA 24 + STACKSIZE(%esp)
|
#define STACK_LDA 24 + STACKSIZE+ARGS(%esp)
|
||||||
#define STACK_X 28 + STACKSIZE(%esp)
|
#define STACK_X 28 + STACKSIZE+ARGS(%esp)
|
||||||
#define STACK_INCX 32 + STACKSIZE(%esp)
|
#define STACK_INCX 32 + STACKSIZE+ARGS(%esp)
|
||||||
#define Y 36 + STACKSIZE(%esp)
|
#define Y 36 + STACKSIZE+ARGS(%esp)
|
||||||
#define STACK_INCY 40 + STACKSIZE(%esp)
|
#define STACK_INCY 40 + STACKSIZE+ARGS(%esp)
|
||||||
#define BUFFER 44 + STACKSIZE(%esp)
|
#define BUFFER 44 + STACKSIZE+ARGS(%esp)
|
||||||
|
|
||||||
|
#define MMM 0+STACKSIZE(%esp)
|
||||||
|
#define NN 4+STACKSIZE(%esp)
|
||||||
|
#define AA 8+STACKSIZE(%esp)
|
||||||
|
#define LDAX 12+STACKSIZE(%esp)
|
||||||
|
#define XX 16+STACKSIZE(%esp)
|
||||||
|
|
||||||
#define I %eax
|
#define I %eax
|
||||||
#define J %ebx
|
#define J %ebx
|
||||||
|
@ -114,6 +121,7 @@
|
||||||
|
|
||||||
PROLOGUE
|
PROLOGUE
|
||||||
|
|
||||||
|
subl $ARGS,%esp
|
||||||
pushl %ebp
|
pushl %ebp
|
||||||
pushl %edi
|
pushl %edi
|
||||||
pushl %esi
|
pushl %esi
|
||||||
|
@ -122,7 +130,42 @@
|
||||||
PROFCODE
|
PROFCODE
|
||||||
|
|
||||||
movl STACK_LDA, LDA
|
movl STACK_LDA, LDA
|
||||||
|
movl LDA,LDAX # backup LDA
|
||||||
movl STACK_X, X
|
movl STACK_X, X
|
||||||
|
movl X,XX
|
||||||
|
movl N,J
|
||||||
|
movl J,NN # backup N
|
||||||
|
movl A,J
|
||||||
|
movl J,AA # backup A
|
||||||
|
movl M,J
|
||||||
|
movl J,MMM # mov M to MMM
|
||||||
|
.L0t:
|
||||||
|
xorl J,J
|
||||||
|
addl $1,J
|
||||||
|
sall $22,J # J=2^24*sizeof(float)=buffer size(16MB)
|
||||||
|
subl $8, J # Don't use last 8 float in the buffer.
|
||||||
|
# Now, split M by block J
|
||||||
|
subl J,MMM # MMM=MMM-J
|
||||||
|
movl J,M
|
||||||
|
jge .L00t
|
||||||
|
ALIGN_4
|
||||||
|
|
||||||
|
movl MMM,%eax
|
||||||
|
addl J,%eax
|
||||||
|
jle .L999x
|
||||||
|
movl %eax,M
|
||||||
|
|
||||||
|
.L00t:
|
||||||
|
movl AA,%eax
|
||||||
|
movl %eax,A # mov AA to A
|
||||||
|
|
||||||
|
movl NN,%eax
|
||||||
|
movl %eax,N # reset N
|
||||||
|
|
||||||
|
|
||||||
|
movl LDAX, LDA # reset LDA
|
||||||
|
movl XX,X
|
||||||
|
|
||||||
movl STACK_INCX, INCX
|
movl STACK_INCX, INCX
|
||||||
movl STACK_INCY, INCY
|
movl STACK_INCY, INCY
|
||||||
|
|
||||||
|
@ -198,6 +241,20 @@
|
||||||
jg .L06
|
jg .L06
|
||||||
ALIGN_4
|
ALIGN_4
|
||||||
|
|
||||||
|
//Padding zero to prevent loading the dirty number from buffer.
|
||||||
|
movl M, I
|
||||||
|
movl $8, J
|
||||||
|
andl $7, I
|
||||||
|
xorps %xmm0, %xmm0
|
||||||
|
subl I, J
|
||||||
|
ALIGN_2
|
||||||
|
.L07:
|
||||||
|
movss %xmm0, 0 * SIZE(Y1)
|
||||||
|
addl $SIZE, Y1
|
||||||
|
decl J
|
||||||
|
jg .L07
|
||||||
|
ALIGN_4
|
||||||
|
|
||||||
.L10:
|
.L10:
|
||||||
movl Y, Y1
|
movl Y, Y1
|
||||||
|
|
||||||
|
@ -628,10 +685,22 @@
|
||||||
ALIGN_4
|
ALIGN_4
|
||||||
|
|
||||||
.L999:
|
.L999:
|
||||||
|
movl M,J
|
||||||
|
leal (,J,SIZE),%eax
|
||||||
|
addl %eax,AA
|
||||||
|
movl XX,J
|
||||||
|
addl %eax,J
|
||||||
|
movl J,XX
|
||||||
|
jmp .L0t
|
||||||
|
ALIGN_4
|
||||||
|
|
||||||
|
.L999x:
|
||||||
popl %ebx
|
popl %ebx
|
||||||
popl %esi
|
popl %esi
|
||||||
popl %edi
|
popl %edi
|
||||||
popl %ebp
|
popl %ebp
|
||||||
|
|
||||||
|
addl $ARGS,%esp
|
||||||
ret
|
ret
|
||||||
|
|
||||||
EPILOGUE
|
EPILOGUE
|
||||||
|
|
|
@ -76,17 +76,23 @@
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define STACKSIZE 16
|
#define STACKSIZE 16
|
||||||
|
#define ARGS 16
|
||||||
|
|
||||||
#define M 4 + STACKSIZE(%esp)
|
#define M 4 + STACKSIZE+ARGS(%esp)
|
||||||
#define N 8 + STACKSIZE(%esp)
|
#define N 8 + STACKSIZE+ARGS(%esp)
|
||||||
#define ALPHA 16 + STACKSIZE(%esp)
|
#define ALPHA 16 + STACKSIZE+ARGS(%esp)
|
||||||
#define A 24 + STACKSIZE(%esp)
|
#define A 24 + STACKSIZE+ARGS(%esp)
|
||||||
#define STACK_LDA 28 + STACKSIZE(%esp)
|
#define STACK_LDA 28 + STACKSIZE+ARGS(%esp)
|
||||||
#define STACK_X 32 + STACKSIZE(%esp)
|
#define STACK_X 32 + STACKSIZE+ARGS(%esp)
|
||||||
#define STACK_INCX 36 + STACKSIZE(%esp)
|
#define STACK_INCX 36 + STACKSIZE+ARGS(%esp)
|
||||||
#define Y 40 + STACKSIZE(%esp)
|
#define Y 40 + STACKSIZE+ARGS(%esp)
|
||||||
#define STACK_INCY 44 + STACKSIZE(%esp)
|
#define STACK_INCY 44 + STACKSIZE+ARGS(%esp)
|
||||||
#define BUFFER 48 + STACKSIZE(%esp)
|
#define BUFFER 48 + STACKSIZE+ARGS(%esp)
|
||||||
|
|
||||||
|
#define MMM 0+STACKSIZE(%esp)
|
||||||
|
#define AA 4+STACKSIZE(%esp)
|
||||||
|
#define LDAX 8+STACKSIZE(%esp)
|
||||||
|
#define NN 12+STACKSIZE(%esp)
|
||||||
|
|
||||||
#define I %eax
|
#define I %eax
|
||||||
#define J %ebx
|
#define J %ebx
|
||||||
|
@ -101,6 +107,8 @@
|
||||||
|
|
||||||
PROLOGUE
|
PROLOGUE
|
||||||
|
|
||||||
|
subl $ARGS,%esp
|
||||||
|
|
||||||
pushl %ebp
|
pushl %ebp
|
||||||
pushl %edi
|
pushl %edi
|
||||||
pushl %esi
|
pushl %esi
|
||||||
|
@ -108,7 +116,40 @@
|
||||||
|
|
||||||
PROFCODE
|
PROFCODE
|
||||||
|
|
||||||
|
|
||||||
movl STACK_LDA, LDA
|
movl STACK_LDA, LDA
|
||||||
|
movl LDA,LDAX # backup LDA
|
||||||
|
movl N,J
|
||||||
|
movl J,NN # backup N
|
||||||
|
movl A,J
|
||||||
|
movl J,AA # backup A
|
||||||
|
movl M,J
|
||||||
|
movl J,MMM # mov M to MMM
|
||||||
|
.L0t:
|
||||||
|
xorl J,J
|
||||||
|
addl $1,J
|
||||||
|
sall $21,J # J=2^21*sizeof(double)=buffer size(16MB)
|
||||||
|
subl $4, J # Don't use last 4 double in the buffer.
|
||||||
|
# Now, split M by block J
|
||||||
|
subl J,MMM # MMM=MMM-J
|
||||||
|
movl J,M
|
||||||
|
jge .L00t
|
||||||
|
ALIGN_4
|
||||||
|
|
||||||
|
movl MMM,%eax
|
||||||
|
addl J,%eax
|
||||||
|
jle .L999x
|
||||||
|
movl %eax,M
|
||||||
|
|
||||||
|
.L00t:
|
||||||
|
movl AA,%eax
|
||||||
|
movl %eax,A # mov AA to A
|
||||||
|
|
||||||
|
movl NN,%eax
|
||||||
|
movl %eax,N # reset N
|
||||||
|
|
||||||
|
|
||||||
|
movl LDAX, LDA # reset LDA
|
||||||
movl STACK_X, X
|
movl STACK_X, X
|
||||||
movl STACK_INCX, INCX
|
movl STACK_INCX, INCX
|
||||||
movl STACK_INCY, INCY
|
movl STACK_INCY, INCY
|
||||||
|
@ -117,6 +158,7 @@
|
||||||
leal (,INCY, SIZE), INCY
|
leal (,INCY, SIZE), INCY
|
||||||
leal (,LDA, SIZE), LDA
|
leal (,LDA, SIZE), LDA
|
||||||
|
|
||||||
|
|
||||||
subl $-16 * SIZE, A
|
subl $-16 * SIZE, A
|
||||||
|
|
||||||
cmpl $0, N
|
cmpl $0, N
|
||||||
|
@ -560,10 +602,19 @@
|
||||||
ALIGN_4
|
ALIGN_4
|
||||||
|
|
||||||
.L999:
|
.L999:
|
||||||
|
movl M,J
|
||||||
|
leal (,J,SIZE),%eax
|
||||||
|
addl %eax,AA
|
||||||
|
jmp .L0t
|
||||||
|
ALIGN_4
|
||||||
|
|
||||||
|
.L999x:
|
||||||
popl %ebx
|
popl %ebx
|
||||||
popl %esi
|
popl %esi
|
||||||
popl %edi
|
popl %edi
|
||||||
popl %ebp
|
popl %ebp
|
||||||
|
|
||||||
|
addl $ARGS,%esp
|
||||||
ret
|
ret
|
||||||
|
|
||||||
EPILOGUE
|
EPILOGUE
|
||||||
|
|
|
@ -530,7 +530,7 @@
|
||||||
#endif
|
#endif
|
||||||
movsd -32 * SIZE(Y), %xmm8
|
movsd -32 * SIZE(Y), %xmm8
|
||||||
|
|
||||||
pshufd $0x39, %xmm4, %xmm5
|
pshufd $0x29, %xmm4, %xmm5
|
||||||
|
|
||||||
mulps %xmm8, %xmm5
|
mulps %xmm8, %xmm5
|
||||||
addps %xmm5, %xmm3
|
addps %xmm5, %xmm3
|
||||||
|
@ -750,7 +750,8 @@
|
||||||
xorps %xmm5, %xmm5
|
xorps %xmm5, %xmm5
|
||||||
movhlps %xmm4, %xmm5
|
movhlps %xmm4, %xmm5
|
||||||
|
|
||||||
mulps -32 * SIZE(Y), %xmm5
|
movlps -32 * SIZE(Y), %xmm4
|
||||||
|
mulps %xmm4, %xmm5
|
||||||
addps %xmm5, %xmm0
|
addps %xmm5, %xmm0
|
||||||
|
|
||||||
addq $2 * SIZE, X
|
addq $2 * SIZE, X
|
||||||
|
@ -992,7 +993,7 @@
|
||||||
movsd -32 * SIZE(Y), %xmm8
|
movsd -32 * SIZE(Y), %xmm8
|
||||||
|
|
||||||
movss %xmm5, %xmm4
|
movss %xmm5, %xmm4
|
||||||
shufps $0x93, %xmm5, %xmm4
|
shufps $0x93, %xmm4, %xmm4
|
||||||
|
|
||||||
mulps %xmm8, %xmm4
|
mulps %xmm8, %xmm4
|
||||||
addps %xmm4, %xmm3
|
addps %xmm4, %xmm3
|
||||||
|
|
|
@ -1,4 +1,3 @@
|
||||||
/*********************************************************************/
|
|
||||||
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
||||||
/* All rights reserved. */
|
/* All rights reserved. */
|
||||||
/* */
|
/* */
|
||||||
|
@ -47,7 +46,7 @@
|
||||||
|
|
||||||
#ifndef WINDOWS_ABI
|
#ifndef WINDOWS_ABI
|
||||||
|
|
||||||
#define STACKSIZE 64
|
#define STACKSIZE 128
|
||||||
|
|
||||||
#define OLD_M %rdi
|
#define OLD_M %rdi
|
||||||
#define OLD_N %rsi
|
#define OLD_N %rsi
|
||||||
|
@ -57,6 +56,10 @@
|
||||||
#define STACK_Y 16 + STACKSIZE(%rsp)
|
#define STACK_Y 16 + STACKSIZE(%rsp)
|
||||||
#define STACK_INCY 24 + STACKSIZE(%rsp)
|
#define STACK_INCY 24 + STACKSIZE(%rsp)
|
||||||
#define STACK_BUFFER 32 + STACKSIZE(%rsp)
|
#define STACK_BUFFER 32 + STACKSIZE(%rsp)
|
||||||
|
#define MMM 56(%rsp)
|
||||||
|
#define NN 64(%rsp)
|
||||||
|
#define AA 72(%rsp)
|
||||||
|
#define LDAX 80(%rsp)
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
|
@ -71,6 +74,10 @@
|
||||||
#define STACK_Y 72 + STACKSIZE(%rsp)
|
#define STACK_Y 72 + STACKSIZE(%rsp)
|
||||||
#define STACK_INCY 80 + STACKSIZE(%rsp)
|
#define STACK_INCY 80 + STACKSIZE(%rsp)
|
||||||
#define STACK_BUFFER 88 + STACKSIZE(%rsp)
|
#define STACK_BUFFER 88 + STACKSIZE(%rsp)
|
||||||
|
#define MMM 216(%rsp)
|
||||||
|
#define NN 224(%rsp)
|
||||||
|
#define AA 232(%rsp)
|
||||||
|
#define LDAX 240(%rsp)
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -127,29 +134,48 @@
|
||||||
movups %xmm14, 192(%rsp)
|
movups %xmm14, 192(%rsp)
|
||||||
movups %xmm15, 208(%rsp)
|
movups %xmm15, 208(%rsp)
|
||||||
|
|
||||||
movq OLD_M, M
|
movq OLD_M, MMM
|
||||||
movq OLD_N, N
|
movq OLD_N, NN
|
||||||
movq OLD_A, A
|
movq OLD_A, X
|
||||||
movq OLD_LDA, LDA
|
movq X, AA
|
||||||
|
movq OLD_LDA, X
|
||||||
|
movq X, LDAX
|
||||||
movq OLD_X, X
|
movq OLD_X, X
|
||||||
#else
|
#else
|
||||||
movq OLD_M, M
|
movq OLD_M, MMM
|
||||||
movq OLD_N, N
|
movq OLD_N, NN
|
||||||
movq OLD_A, A
|
movq OLD_A, AA
|
||||||
movq OLD_LDA, LDA
|
movq OLD_LDA, LDAX
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
movq STACK_INCX, INCX
|
|
||||||
movq STACK_Y, Y
|
|
||||||
movq STACK_INCY, INCY
|
|
||||||
movq STACK_BUFFER, BUFFER
|
|
||||||
|
|
||||||
#ifndef WINDOWS_ABI
|
#ifndef WINDOWS_ABI
|
||||||
pshufd $0, %xmm0, ALPHA
|
pshufd $0, %xmm0, ALPHA
|
||||||
#else
|
#else
|
||||||
pshufd $0, %xmm3, ALPHA
|
pshufd $0, %xmm3, ALPHA
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
.L0t:
|
||||||
|
xorq M,M
|
||||||
|
addq $1,M
|
||||||
|
salq $22,M
|
||||||
|
subq M,MMM
|
||||||
|
jge .L00t
|
||||||
|
ALIGN_4
|
||||||
|
|
||||||
|
movq MMM,%rax
|
||||||
|
addq M,%rax
|
||||||
|
jle .L999x
|
||||||
|
movq %rax,M
|
||||||
|
|
||||||
|
.L00t:
|
||||||
|
movq LDAX,LDA
|
||||||
|
movq NN,N
|
||||||
|
movq AA,A
|
||||||
|
movq STACK_INCX, INCX
|
||||||
|
movq STACK_Y, Y
|
||||||
|
movq STACK_INCY, INCY
|
||||||
|
movq STACK_BUFFER, BUFFER
|
||||||
|
|
||||||
leaq (,INCX, SIZE), INCX
|
leaq (,INCX, SIZE), INCX
|
||||||
leaq (,INCY, SIZE), INCY
|
leaq (,INCY, SIZE), INCY
|
||||||
leaq (,LDA, SIZE), LDA
|
leaq (,LDA, SIZE), LDA
|
||||||
|
@ -6341,6 +6367,12 @@
|
||||||
ALIGN_4
|
ALIGN_4
|
||||||
|
|
||||||
.L999:
|
.L999:
|
||||||
|
leaq (,M,SIZE),%rax
|
||||||
|
addq %rax,AA
|
||||||
|
jmp .L0t
|
||||||
|
ALIGN_4
|
||||||
|
|
||||||
|
.L999x:
|
||||||
movq 0(%rsp), %rbx
|
movq 0(%rsp), %rbx
|
||||||
movq 8(%rsp), %rbp
|
movq 8(%rsp), %rbp
|
||||||
movq 16(%rsp), %r12
|
movq 16(%rsp), %r12
|
||||||
|
|
|
@ -699,7 +699,7 @@
|
||||||
movsd -32 * SIZE(X), %xmm4
|
movsd -32 * SIZE(X), %xmm4
|
||||||
|
|
||||||
pshufd $0xb1, %xmm4, %xmm12
|
pshufd $0xb1, %xmm4, %xmm12
|
||||||
shufps $0x39, %xmm8, %xmm8
|
shufps $0x59, %xmm8, %xmm8
|
||||||
mulps %xmm8, %xmm4
|
mulps %xmm8, %xmm4
|
||||||
addps %xmm4, %xmm0
|
addps %xmm4, %xmm0
|
||||||
mulps %xmm8, %xmm12
|
mulps %xmm8, %xmm12
|
||||||
|
@ -1336,7 +1336,7 @@
|
||||||
|
|
||||||
movss %xmm9, %xmm8
|
movss %xmm9, %xmm8
|
||||||
pshufd $0xb1, %xmm4, %xmm12
|
pshufd $0xb1, %xmm4, %xmm12
|
||||||
shufps $0x93, %xmm8, %xmm8
|
shufps $0x03, %xmm8, %xmm8
|
||||||
mulps %xmm8, %xmm4
|
mulps %xmm8, %xmm4
|
||||||
addps %xmm4, %xmm0
|
addps %xmm4, %xmm0
|
||||||
mulps %xmm8, %xmm12
|
mulps %xmm8, %xmm12
|
||||||
|
@ -1697,7 +1697,7 @@
|
||||||
movsd -32 * SIZE(Y), %xmm4
|
movsd -32 * SIZE(Y), %xmm4
|
||||||
|
|
||||||
pshufd $0xb1, %xmm4, %xmm12
|
pshufd $0xb1, %xmm4, %xmm12
|
||||||
shufps $0x39, %xmm8, %xmm8
|
shufps $0xa9, %xmm8, %xmm8
|
||||||
mulps %xmm8, %xmm4
|
mulps %xmm8, %xmm4
|
||||||
addps %xmm4, %xmm0
|
addps %xmm4, %xmm0
|
||||||
mulps %xmm8, %xmm12
|
mulps %xmm8, %xmm12
|
||||||
|
@ -2024,7 +2024,7 @@
|
||||||
|
|
||||||
movss %xmm9, %xmm8
|
movss %xmm9, %xmm8
|
||||||
pshufd $0xb1, %xmm4, %xmm12
|
pshufd $0xb1, %xmm4, %xmm12
|
||||||
shufps $0x93, %xmm8, %xmm8
|
shufps $0x03, %xmm8, %xmm8
|
||||||
mulps %xmm8, %xmm4
|
mulps %xmm8, %xmm4
|
||||||
addps %xmm4, %xmm0
|
addps %xmm4, %xmm0
|
||||||
mulps %xmm8, %xmm12
|
mulps %xmm8, %xmm12
|
||||||
|
|
2
make.inc
2
make.inc
|
@ -4,7 +4,7 @@ DRVOPTS = $(OPTS)
|
||||||
LOADER = $(FORTRAN)
|
LOADER = $(FORTRAN)
|
||||||
TIMER = NONE
|
TIMER = NONE
|
||||||
ARCHFLAGS= -ru
|
ARCHFLAGS= -ru
|
||||||
RANLIB = ranlib
|
#RANLIB = ranlib
|
||||||
BLASLIB =
|
BLASLIB =
|
||||||
TMGLIB = tmglib.a
|
TMGLIB = tmglib.a
|
||||||
EIGSRCLIB = eigsrc.a
|
EIGSRCLIB = eigsrc.a
|
||||||
|
|
|
@ -48,7 +48,8 @@ typedef int blasint;
|
||||||
/* C99 supports complex floating numbers natively, which GCC also offers as an
|
/* C99 supports complex floating numbers natively, which GCC also offers as an
|
||||||
extension since version 3.0. If neither are available, use a compatible
|
extension since version 3.0. If neither are available, use a compatible
|
||||||
structure as fallback (see Clause 6.2.5.13 of the C99 standard). */
|
structure as fallback (see Clause 6.2.5.13 of the C99 standard). */
|
||||||
#if defined(__STDC_IEC_559_COMPLEX__) || __STDC_VERSION__ >= 199901L || __GNUC__ >= 3
|
#if (defined(__STDC_IEC_559_COMPLEX__) || __STDC_VERSION__ >= 199901L || \
|
||||||
|
(__GNUC__ >= 3 && !defined(__cplusplus)))
|
||||||
#define OPENBLAS_COMPLEX_C99
|
#define OPENBLAS_COMPLEX_C99
|
||||||
#include <complex.h>
|
#include <complex.h>
|
||||||
typedef float _Complex openblas_complex_float;
|
typedef float _Complex openblas_complex_float;
|
||||||
|
|
Loading…
Reference in New Issue