commit
b31ec99372
|
@ -51,7 +51,7 @@ common:
|
||||||
parallelization model is used by OpenBLAS. (Thank grisuthedragon)
|
parallelization model is used by OpenBLAS. (Thank grisuthedragon)
|
||||||
* Detect LLVM/Clang compiler. The default compiler is Clang on Mac OS X.
|
* Detect LLVM/Clang compiler. The default compiler is Clang on Mac OS X.
|
||||||
* Change LIBSUFFIX from .lib to .a on windows.
|
* Change LIBSUFFIX from .lib to .a on windows.
|
||||||
* A walk round for dtrti_U single thread bug. Replace it with LAPACK codes. (#191)
|
* A work-around for dtrti_U single thread bug. Replace it with LAPACK codes. (#191)
|
||||||
|
|
||||||
x86/x86-64:
|
x86/x86-64:
|
||||||
* Optimize c/zgemm, trsm, dgemv_n, ddot, daxpy, dcopy on
|
* Optimize c/zgemm, trsm, dgemv_n, ddot, daxpy, dcopy on
|
||||||
|
@ -284,7 +284,7 @@ x86/x86_64:
|
||||||
* Fixed #28 a wrong result of dsdot on x86_64.
|
* Fixed #28 a wrong result of dsdot on x86_64.
|
||||||
* Fixed #32 a SEGFAULT bug of zdotc with gcc-4.6.
|
* Fixed #32 a SEGFAULT bug of zdotc with gcc-4.6.
|
||||||
* Fixed #33 ztrmm bug on Nehalem.
|
* Fixed #33 ztrmm bug on Nehalem.
|
||||||
* Walk round #27 the low performance axpy issue with small imput size & multithreads.
|
* Work-around #27 the low performance axpy issue with small imput size & multithreads.
|
||||||
|
|
||||||
MIPS64:
|
MIPS64:
|
||||||
* Fixed #28 a wrong result of dsdot on Loongson3A/MIPS64.
|
* Fixed #28 a wrong result of dsdot on Loongson3A/MIPS64.
|
||||||
|
@ -308,7 +308,7 @@ common:
|
||||||
|
|
||||||
x86/x86_64:
|
x86/x86_64:
|
||||||
* On x86 32bits, fixed a bug in zdot_sse2.S line 191. This would casue
|
* On x86 32bits, fixed a bug in zdot_sse2.S line 191. This would casue
|
||||||
zdotu & zdotc failures.Instead,Walk around it. (Refs issue #8 #9 on github)
|
zdotu & zdotc failures. Instead, work-around it. (Refs issue #8 #9 on github)
|
||||||
* Modified ?axpy functions to return same netlib BLAS results
|
* Modified ?axpy functions to return same netlib BLAS results
|
||||||
when incx==0 or incy==0 (Refs issue #7 on github)
|
when incx==0 or incy==0 (Refs issue #7 on github)
|
||||||
* Modified ?swap functions to return same netlib BLAS results
|
* Modified ?swap functions to return same netlib BLAS results
|
||||||
|
|
5
Makefile
5
Makefile
|
@ -128,6 +128,11 @@ ifeq ($(CORE), UNKOWN)
|
||||||
endif
|
endif
|
||||||
ifeq ($(NOFORTRAN), 1)
|
ifeq ($(NOFORTRAN), 1)
|
||||||
$(error OpenBLAS: Detecting fortran compiler failed. Please install fortran compiler, e.g. gfortran, ifort, openf90.)
|
$(error OpenBLAS: Detecting fortran compiler failed. Please install fortran compiler, e.g. gfortran, ifort, openf90.)
|
||||||
|
endif
|
||||||
|
ifeq ($(NO_STATIC), 1)
|
||||||
|
ifeq ($(NO_SHARED), 1)
|
||||||
|
$(error OpenBLAS: neither static nor shared are enabled.)
|
||||||
|
endif
|
||||||
endif
|
endif
|
||||||
@-ln -fs $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX)
|
@-ln -fs $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX)
|
||||||
@for d in $(SUBDIRS) ; \
|
@for d in $(SUBDIRS) ; \
|
||||||
|
|
|
@ -50,10 +50,12 @@ ifndef NO_LAPACKE
|
||||||
endif
|
endif
|
||||||
|
|
||||||
#for install static library
|
#for install static library
|
||||||
|
ifndef NO_STATIC
|
||||||
@echo Copying the static library to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
|
@echo Copying the static library to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
|
||||||
@install -pm644 $(LIBNAME) $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
|
@install -pm644 $(LIBNAME) $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
|
||||||
@cd $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) ; \
|
@cd $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) ; \
|
||||||
ln -fs $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX)
|
ln -fs $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX)
|
||||||
|
endif
|
||||||
#for install shared library
|
#for install shared library
|
||||||
ifndef NO_SHARED
|
ifndef NO_SHARED
|
||||||
@echo Copying the shared library to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
|
@echo Copying the shared library to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
|
||||||
|
@ -80,6 +82,7 @@ ifeq ($(OSNAME), Darwin)
|
||||||
endif
|
endif
|
||||||
ifeq ($(OSNAME), WINNT)
|
ifeq ($(OSNAME), WINNT)
|
||||||
@-cp $(LIBDLLNAME) $(OPENBLAS_BINARY_DIR)
|
@-cp $(LIBDLLNAME) $(OPENBLAS_BINARY_DIR)
|
||||||
|
@-cp $(LIBDLLNAME).a $(OPENBLAS_LIBRARY_DIR)
|
||||||
endif
|
endif
|
||||||
ifeq ($(OSNAME), CYGWIN_NT)
|
ifeq ($(OSNAME), CYGWIN_NT)
|
||||||
@-cp $(LIBDLLNAME) $(OPENBLAS_BINARY_DIR)
|
@-cp $(LIBDLLNAME) $(OPENBLAS_BINARY_DIR)
|
||||||
|
|
|
@ -48,6 +48,9 @@ VERSION = 0.2.9.rc2
|
||||||
# automatically detected by the the script.
|
# automatically detected by the the script.
|
||||||
# NUM_THREADS = 24
|
# NUM_THREADS = 24
|
||||||
|
|
||||||
|
# if you don't need to install the static library, please comment it in.
|
||||||
|
# NO_STATIC = 1
|
||||||
|
|
||||||
# if you don't need generate the shared library, please comment it in.
|
# if you don't need generate the shared library, please comment it in.
|
||||||
# NO_SHARED = 1
|
# NO_SHARED = 1
|
||||||
|
|
||||||
|
|
|
@ -83,15 +83,9 @@ dll : ../$(LIBDLLNAME)
|
||||||
# For more details see: https://github.com/xianyi/OpenBLAS/issues/127.
|
# For more details see: https://github.com/xianyi/OpenBLAS/issues/127.
|
||||||
../$(LIBDLLNAME) : ../$(LIBNAME) libopenblas.def dllinit.$(SUFFIX)
|
../$(LIBDLLNAME) : ../$(LIBNAME) libopenblas.def dllinit.$(SUFFIX)
|
||||||
$(RANLIB) ../$(LIBNAME)
|
$(RANLIB) ../$(LIBNAME)
|
||||||
ifeq ($(BINARY32), 1)
|
$(CC) $(CFLAGS) $(LDFLAGS) libopenblas.def dllinit.$(SUFFIX) \
|
||||||
$(DLLWRAP) -o ../$(LIBDLLNAME) --def libopenblas.def \
|
-shared -o ../$(LIBDLLNAME) -Wl,--out-implib,../$(LIBDLLNAME).a \
|
||||||
--entry _dllinit@12 -s dllinit.$(SUFFIX) --dllname $(@F) ../$(LIBNAME) $(EXTRALIB)
|
-Wl,--whole-archive ../$(LIBNAME) -Wl,--no-whole-archive $(FEXTRALIB)
|
||||||
-lib /machine:i386 /def:libopenblas.def
|
|
||||||
else
|
|
||||||
$(DLLWRAP) -o ../$(LIBDLLNAME) --def libopenblas.def \
|
|
||||||
--entry $(FU)dllinit -s dllinit.$(SUFFIX) --dllname $(@F) ../$(LIBNAME) $(EXTRALIB)
|
|
||||||
-lib /machine:X64 /def:libopenblas.def
|
|
||||||
endif
|
|
||||||
|
|
||||||
libopenblas.def : gensymbol
|
libopenblas.def : gensymbol
|
||||||
perl ./gensymbol win2k $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) > $(@F)
|
perl ./gensymbol win2k $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) > $(@F)
|
||||||
|
|
|
@ -41,7 +41,7 @@
|
||||||
void gotoblas_init(void);
|
void gotoblas_init(void);
|
||||||
void gotoblas_quit(void);
|
void gotoblas_quit(void);
|
||||||
|
|
||||||
BOOL APIENTRY dllinit(HINSTANCE hInst, DWORD reason, LPVOID reserved) {
|
BOOL APIENTRY DllMain(HINSTANCE hInst, DWORD reason, LPVOID reserved) {
|
||||||
|
|
||||||
if (reason == DLL_PROCESS_ATTACH) {
|
if (reason == DLL_PROCESS_ATTACH) {
|
||||||
gotoblas_init();
|
gotoblas_init();
|
||||||
|
|
|
@ -86,7 +86,8 @@ void CNAME(blasint n, FLOAT alpha, FLOAT *x, blasint incx, FLOAT *y, blasint inc
|
||||||
if (incx == 0 || incy == 0)
|
if (incx == 0 || incy == 0)
|
||||||
nthreads = 1;
|
nthreads = 1;
|
||||||
|
|
||||||
//Temporarily walk around the low performance issue with small imput size & multithreads.
|
//Temporarily work-around the low performance issue with small imput size &
|
||||||
|
//multithreads.
|
||||||
if (n <= 10000)
|
if (n <= 10000)
|
||||||
nthreads = 1;
|
nthreads = 1;
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue