Merge branch 'release-v0.1alpha2' into develop

This commit is contained in:
Xianyi Zhang 2011-06-23 16:07:34 +08:00
commit ca8bf5abb0
15 changed files with 12578 additions and 22 deletions

View File

@ -1,7 +1,8 @@
OpenBLAS ChangeLog OpenBLAS ChangeLog
==================================================================== ====================================================================
Version 0.1 alpha2(in development) Version 0.1 alpha2
0;136;0c 23-Jun-2011
common: common:
* Fixed blasint undefined bug in <cblas.h> file. Other software * Fixed blasint undefined bug in <cblas.h> file. Other software
could include this header successfully(Refs issue #13 on github) could include this header successfully(Refs issue #13 on github)
@ -31,6 +32,8 @@ x86/x86_64:
MIPS64: MIPS64:
* Fixed #28 a wrong result of dsdot on Loongson3A/MIPS64. * Fixed #28 a wrong result of dsdot on Loongson3A/MIPS64.
* Optimized single/double precision BLAS Level3 on Loongson3A/MIPS64. (Refs #2)
* Optimized single/double precision axpy function on Loongson3A/MIPS64. (Refs #3)
==================================================================== ====================================================================
Version 0.1 alpha1 Version 0.1 alpha1

View File

@ -74,7 +74,7 @@ ifeq ($(OSNAME), Darwin)
endif endif
ifeq ($(OSNAME), WINNT) ifeq ($(OSNAME), WINNT)
$(MAKE) -C exports dll $(MAKE) -C exports dll
# -ln -fs $(LIBDLLNAME) libopenblas.dll -ln -fs $(LIBDLLNAME) libopenblas.dll
endif endif
ifeq ($(OSNAME), CYGWIN_NT) ifeq ($(OSNAME), CYGWIN_NT)
$(MAKE) -C exports dll $(MAKE) -C exports dll

3
README
View File

@ -72,6 +72,7 @@ Please see Changelog.txt to obtain the differences between GotoBLAS2 1.13 BSD ve
9.Known Issues 9.Known Issues
* The number of CPUs/Cores should less than or equal to 8*sizeof(unsigned long). On 64 bits, the limit * The number of CPUs/Cores should less than or equal to 8*sizeof(unsigned long). On 64 bits, the limit
is 64. On 32 bits, it is 32. is 64. On 32 bits, it is 32.
* This library is not compatible with EKOPath Compiler Suite 4.0.10 (http://www.pathscale.com/ekopath-compiler-suite). However, Path64 (https://github.com/path64/compiler) could compile the codes successfully.
10. Specification of Git Branches 10. Specification of Git Branches
We used the git branching model in this article (http://nvie.com/posts/a-successful-git-branching-model/). We used the git branching model in this article (http://nvie.com/posts/a-successful-git-branching-model/).
@ -79,4 +80,4 @@ Now, there are 4 branches in github.com.
* The master branch. This a main branch to reflect a production-ready state. * The master branch. This a main branch to reflect a production-ready state.
* The develop branch. This a main branch to reflect a state with the latest delivered development changes for the next release. * The develop branch. This a main branch to reflect a state with the latest delivered development changes for the next release.
* The loongson3a branch. This is a feature branch. We develop Loongson3A codes on this branch. We will merge this feature to develop branch in future. * The loongson3a branch. This is a feature branch. We develop Loongson3A codes on this branch. We will merge this feature to develop branch in future.
* The gh-pages branch. This is for web pages * The gh-pages branch. This is for web pages

View File

@ -220,6 +220,11 @@ REALNAME: ;\
#define BUFFER_SIZE ( 8 << 20) #define BUFFER_SIZE ( 8 << 20)
#if defined(LOONGSON3A)
#define PAGESIZE (16UL << 10)
#define FIXED_PAGESIZE (16UL << 10)
#endif
#ifndef PAGESIZE #ifndef PAGESIZE
#define PAGESIZE (64UL << 10) #define PAGESIZE (64UL << 10)
#endif #endif

View File

@ -38,7 +38,7 @@
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <sys/mman.h> //#include <sys/mman.h>
#include "common.h" #include "common.h"
#ifndef USE_OPENMP #ifndef USE_OPENMP

View File

@ -53,18 +53,19 @@ dyn : $(LIBDYNNAME)
zip : dll zip : dll
zip $(LIBZIPNAME) $(LIBDLLNAME) $(LIBNAME) zip $(LIBZIPNAME) $(LIBDLLNAME) $(LIBNAME)
dll : libgoto2.dll dll : ../$(LIBDLLNAME)
#libgoto2.dll
dll2 : libgoto2_shared.dll dll2 : libgoto2_shared.dll
libgoto2.dll : ../$(LIBNAME) libgoto2.def dllinit.$(SUFFIX) ../$(LIBDLLNAME) : ../$(LIBNAME) libgoto2.def dllinit.$(SUFFIX)
$(RANLIB) ../$(LIBNAME) $(RANLIB) ../$(LIBNAME)
ifeq ($(BINARY32), 1) ifeq ($(BINARY32), 1)
$(DLLWRAP) -o $(@F) --def libgoto2.def \ $(DLLWRAP) -o ../$(LIBDLLNAME) --def libgoto2.def \
--entry _dllinit@12 -s dllinit.$(SUFFIX) --dllname $(@F) ../$(LIBNAME) $(FEXTRALIB) --entry _dllinit@12 -s dllinit.$(SUFFIX) --dllname $(@F) ../$(LIBNAME) $(FEXTRALIB)
-lib /machine:i386 /def:libgoto2.def -lib /machine:i386 /def:libgoto2.def
else else
$(DLLWRAP) -o $(@F) --def libgoto2.def \ $(DLLWRAP) -o ../$(LIBDLLNAME) --def libgoto2.def \
--entry _dllinit -s dllinit.$(SUFFIX) --dllname $(@F) ../$(LIBNAME) $(FEXTRALIB) --entry _dllinit -s dllinit.$(SUFFIX) --dllname $(@F) ../$(LIBNAME) $(FEXTRALIB)
-lib /machine:X64 /def:libgoto2.def -lib /machine:X64 /def:libgoto2.def
endif endif

View File

@ -91,15 +91,37 @@ ifndef ZGEMM_BETA
ZGEMM_BETA = ../generic/zgemm_beta.c ZGEMM_BETA = ../generic/zgemm_beta.c
endif endif
ifndef STRSMKERNEL_LN
STRSMKERNEL_LN = trsm_kernel_LN.S STRSMKERNEL_LN = trsm_kernel_LN.S
STRSMKERNEL_LT = trsm_kernel_LT.S endif
STRSMKERNEL_RN = trsm_kernel_LT.S
STRSMKERNEL_RT = trsm_kernel_RT.S
ifndef STRSMKERNEL_LT
STRSMKERNEL_LT = trsm_kernel_LT.S
endif
ifndef STRSMKERNEL_RN
STRSMKERNEL_RN = trsm_kernel_LT.S
endif
ifndef STRSMKERNEL_RT
STRSMKERNEL_RT = trsm_kernel_RT.S
endif
ifndef DTRSMKERNEL_LN
DTRSMKERNEL_LN = trsm_kernel_LN.S DTRSMKERNEL_LN = trsm_kernel_LN.S
endif
ifndef DTRSMKERNEL_LT
DTRSMKERNEL_LT = trsm_kernel_LT.S DTRSMKERNEL_LT = trsm_kernel_LT.S
endif
ifndef DTRSMKERNEL_RN
DTRSMKERNEL_RN = trsm_kernel_LT.S DTRSMKERNEL_RN = trsm_kernel_LT.S
endif
ifndef DTRSMKERNEL_RT
DTRSMKERNEL_RT = trsm_kernel_RT.S DTRSMKERNEL_RT = trsm_kernel_RT.S
endif
CTRSMKERNEL_LN = ztrsm_kernel_LT.S CTRSMKERNEL_LN = ztrsm_kernel_LT.S
CTRSMKERNEL_LT = ztrsm_kernel_LT.S CTRSMKERNEL_LT = ztrsm_kernel_LT.S

View File

@ -1,2 +1,24 @@
SAXPYKERNEL=axpy_loongson3a.S SAXPYKERNEL=axpy_loongson3a.S
DAXPYKERNEL=daxpy_loongson3a_simd.S DAXPYKERNEL=daxpy_loongson3a_simd.S
SGEMMKERNEL = sgemm_kernel_loongson3a.S
SGEMMONCOPY = ../generic/gemm_ncopy_4.c
SGEMMOTCOPY = ../generic/gemm_tcopy_4.c
SGEMMONCOPYOBJ = sgemm_oncopy.o
SGEMMOTCOPYOBJ = sgemm_otcopy.o
DGEMMKERNEL = gemm_kernel_loongson3a.S
DGEMMONCOPY = ../generic/gemm_ncopy_4.c
DGEMMOTCOPY = ../generic/gemm_tcopy_4.c
DGEMMONCOPYOBJ = dgemm_oncopy.o
DGEMMOTCOPYOBJ = dgemm_otcopy.o
STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

22
param.h
View File

@ -1480,27 +1480,29 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define GEMM_DEFAULT_OFFSET_B 0 #define GEMM_DEFAULT_OFFSET_B 0
#define GEMM_DEFAULT_ALIGN 0x03fffUL #define GEMM_DEFAULT_ALIGN 0x03fffUL
#define SGEMM_DEFAULT_UNROLL_M 2 #define SGEMM_DEFAULT_UNROLL_M 4
#define SGEMM_DEFAULT_UNROLL_N 8 #define SGEMM_DEFAULT_UNROLL_N 4
#define DGEMM_DEFAULT_UNROLL_M 2
#define DGEMM_DEFAULT_UNROLL_N 8 #define DGEMM_DEFAULT_UNROLL_M 4
#define DGEMM_DEFAULT_UNROLL_N 4
#define CGEMM_DEFAULT_UNROLL_M 1 #define CGEMM_DEFAULT_UNROLL_M 1
#define CGEMM_DEFAULT_UNROLL_N 4 #define CGEMM_DEFAULT_UNROLL_N 4
#define ZGEMM_DEFAULT_UNROLL_M 1 #define ZGEMM_DEFAULT_UNROLL_M 1
#define ZGEMM_DEFAULT_UNROLL_N 4 #define ZGEMM_DEFAULT_UNROLL_N 4
#define SGEMM_DEFAULT_P 108 #define SGEMM_DEFAULT_P 32
#define DGEMM_DEFAULT_P 112 #define DGEMM_DEFAULT_P 32
#define CGEMM_DEFAULT_P 108 #define CGEMM_DEFAULT_P 108
#define ZGEMM_DEFAULT_P 112 #define ZGEMM_DEFAULT_P 112
#define SGEMM_DEFAULT_Q 288 #define SGEMM_DEFAULT_Q 116
#define DGEMM_DEFAULT_Q 144 #define DGEMM_DEFAULT_Q 116
#define CGEMM_DEFAULT_Q 144 #define CGEMM_DEFAULT_Q 144
#define ZGEMM_DEFAULT_Q 72 #define ZGEMM_DEFAULT_Q 72
#define SGEMM_DEFAULT_R 2000 #define SGEMM_DEFAULT_R 1000
#define DGEMM_DEFAULT_R 2000 #define DGEMM_DEFAULT_R 1000
#define CGEMM_DEFAULT_R 2000 #define CGEMM_DEFAULT_R 2000
#define ZGEMM_DEFAULT_R 2000 #define ZGEMM_DEFAULT_R 2000