diff --git a/Changelog.txt b/Changelog.txt index 198c2d8f1..f53cadc61 100644 --- a/Changelog.txt +++ b/Changelog.txt @@ -51,7 +51,7 @@ common: parallelization model is used by OpenBLAS. (Thank grisuthedragon) * Detect LLVM/Clang compiler. The default compiler is Clang on Mac OS X. * Change LIBSUFFIX from .lib to .a on windows. - * A walk round for dtrti_U single thread bug. Replace it with LAPACK codes. (#191) + * A work-around for dtrti_U single thread bug. Replace it with LAPACK codes. (#191) x86/x86-64: * Optimize c/zgemm, trsm, dgemv_n, ddot, daxpy, dcopy on @@ -284,7 +284,7 @@ x86/x86_64: * Fixed #28 a wrong result of dsdot on x86_64. * Fixed #32 a SEGFAULT bug of zdotc with gcc-4.6. * Fixed #33 ztrmm bug on Nehalem. - * Walk round #27 the low performance axpy issue with small imput size & multithreads. + * Work-around #27 the low performance axpy issue with small imput size & multithreads. MIPS64: * Fixed #28 a wrong result of dsdot on Loongson3A/MIPS64. @@ -308,7 +308,7 @@ common: x86/x86_64: * On x86 32bits, fixed a bug in zdot_sse2.S line 191. This would casue - zdotu & zdotc failures.Instead,Walk around it. (Refs issue #8 #9 on github) + zdotu & zdotc failures. Instead, work-around it. (Refs issue #8 #9 on github) * Modified ?axpy functions to return same netlib BLAS results when incx==0 or incy==0 (Refs issue #7 on github) * Modified ?swap functions to return same netlib BLAS results diff --git a/Makefile b/Makefile index 01bedaf4d..2f5d032cf 100644 --- a/Makefile +++ b/Makefile @@ -128,6 +128,11 @@ ifeq ($(CORE), UNKOWN) endif ifeq ($(NOFORTRAN), 1) $(error OpenBLAS: Detecting fortran compiler failed. Please install fortran compiler, e.g. gfortran, ifort, openf90.) +endif +ifeq ($(NO_STATIC), 1) +ifeq ($(NO_SHARED), 1) + $(error OpenBLAS: neither static nor shared are enabled.) +endif endif @-ln -fs $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX) @for d in $(SUBDIRS) ; \ diff --git a/Makefile.install b/Makefile.install index 6fcbcc3e1..e04112089 100644 --- a/Makefile.install +++ b/Makefile.install @@ -50,10 +50,12 @@ ifndef NO_LAPACKE endif #for install static library +ifndef NO_STATIC @echo Copying the static library to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) @install -pm644 $(LIBNAME) $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) @cd $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) ; \ ln -fs $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX) +endif #for install shared library ifndef NO_SHARED @echo Copying the shared library to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) @@ -80,6 +82,7 @@ ifeq ($(OSNAME), Darwin) endif ifeq ($(OSNAME), WINNT) @-cp $(LIBDLLNAME) $(OPENBLAS_BINARY_DIR) + @-cp $(LIBDLLNAME).a $(OPENBLAS_LIBRARY_DIR) endif ifeq ($(OSNAME), CYGWIN_NT) @-cp $(LIBDLLNAME) $(OPENBLAS_BINARY_DIR) diff --git a/Makefile.rule b/Makefile.rule index 6b604b781..cf631d843 100644 --- a/Makefile.rule +++ b/Makefile.rule @@ -48,6 +48,9 @@ VERSION = 0.2.9.rc2 # automatically detected by the the script. # NUM_THREADS = 24 +# if you don't need to install the static library, please comment it in. +# NO_STATIC = 1 + # if you don't need generate the shared library, please comment it in. # NO_SHARED = 1 diff --git a/exports/Makefile b/exports/Makefile index e09b3c3ad..ce28ab5cc 100644 --- a/exports/Makefile +++ b/exports/Makefile @@ -83,15 +83,9 @@ dll : ../$(LIBDLLNAME) # For more details see: https://github.com/xianyi/OpenBLAS/issues/127. ../$(LIBDLLNAME) : ../$(LIBNAME) libopenblas.def dllinit.$(SUFFIX) $(RANLIB) ../$(LIBNAME) -ifeq ($(BINARY32), 1) - $(DLLWRAP) -o ../$(LIBDLLNAME) --def libopenblas.def \ - --entry _dllinit@12 -s dllinit.$(SUFFIX) --dllname $(@F) ../$(LIBNAME) $(EXTRALIB) - -lib /machine:i386 /def:libopenblas.def -else - $(DLLWRAP) -o ../$(LIBDLLNAME) --def libopenblas.def \ - --entry $(FU)dllinit -s dllinit.$(SUFFIX) --dllname $(@F) ../$(LIBNAME) $(EXTRALIB) - -lib /machine:X64 /def:libopenblas.def -endif + $(CC) $(CFLAGS) $(LDFLAGS) libopenblas.def dllinit.$(SUFFIX) \ + -shared -o ../$(LIBDLLNAME) -Wl,--out-implib,../$(LIBDLLNAME).a \ + -Wl,--whole-archive ../$(LIBNAME) -Wl,--no-whole-archive $(FEXTRALIB) libopenblas.def : gensymbol perl ./gensymbol win2k $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) > $(@F) diff --git a/exports/dllinit.c b/exports/dllinit.c index 54ec1c36c..0f25824f1 100644 --- a/exports/dllinit.c +++ b/exports/dllinit.c @@ -41,7 +41,7 @@ void gotoblas_init(void); void gotoblas_quit(void); -BOOL APIENTRY dllinit(HINSTANCE hInst, DWORD reason, LPVOID reserved) { +BOOL APIENTRY DllMain(HINSTANCE hInst, DWORD reason, LPVOID reserved) { if (reason == DLL_PROCESS_ATTACH) { gotoblas_init(); diff --git a/interface/axpy.c b/interface/axpy.c index 82b0ee234..5e288e3b9 100644 --- a/interface/axpy.c +++ b/interface/axpy.c @@ -86,7 +86,8 @@ void CNAME(blasint n, FLOAT alpha, FLOAT *x, blasint incx, FLOAT *y, blasint inc if (incx == 0 || incy == 0) nthreads = 1; - //Temporarily walk around the low performance issue with small imput size & multithreads. + //Temporarily work-around the low performance issue with small imput size & + //multithreads. if (n <= 10000) nthreads = 1;