Merge branch 'release-v0.1alpha2' into develop
This commit is contained in:
commit
ca8bf5abb0
|
@ -1,7 +1,8 @@
|
|||
OpenBLAS ChangeLog
|
||||
====================================================================
|
||||
Version 0.1 alpha2(in development)
|
||||
0;136;0c
|
||||
Version 0.1 alpha2
|
||||
23-Jun-2011
|
||||
|
||||
common:
|
||||
* Fixed blasint undefined bug in <cblas.h> file. Other software
|
||||
could include this header successfully(Refs issue #13 on github)
|
||||
|
@ -31,6 +32,8 @@ x86/x86_64:
|
|||
|
||||
MIPS64:
|
||||
* Fixed #28 a wrong result of dsdot on Loongson3A/MIPS64.
|
||||
* Optimized single/double precision BLAS Level3 on Loongson3A/MIPS64. (Refs #2)
|
||||
* Optimized single/double precision axpy function on Loongson3A/MIPS64. (Refs #3)
|
||||
|
||||
====================================================================
|
||||
Version 0.1 alpha1
|
||||
|
|
2
Makefile
2
Makefile
|
@ -74,7 +74,7 @@ ifeq ($(OSNAME), Darwin)
|
|||
endif
|
||||
ifeq ($(OSNAME), WINNT)
|
||||
$(MAKE) -C exports dll
|
||||
# -ln -fs $(LIBDLLNAME) libopenblas.dll
|
||||
-ln -fs $(LIBDLLNAME) libopenblas.dll
|
||||
endif
|
||||
ifeq ($(OSNAME), CYGWIN_NT)
|
||||
$(MAKE) -C exports dll
|
||||
|
|
3
README
3
README
|
@ -72,6 +72,7 @@ Please see Changelog.txt to obtain the differences between GotoBLAS2 1.13 BSD ve
|
|||
9.Known Issues
|
||||
* The number of CPUs/Cores should less than or equal to 8*sizeof(unsigned long). On 64 bits, the limit
|
||||
is 64. On 32 bits, it is 32.
|
||||
* This library is not compatible with EKOPath Compiler Suite 4.0.10 (http://www.pathscale.com/ekopath-compiler-suite). However, Path64 (https://github.com/path64/compiler) could compile the codes successfully.
|
||||
|
||||
10. Specification of Git Branches
|
||||
We used the git branching model in this article (http://nvie.com/posts/a-successful-git-branching-model/).
|
||||
|
@ -79,4 +80,4 @@ Now, there are 4 branches in github.com.
|
|||
* The master branch. This a main branch to reflect a production-ready state.
|
||||
* The develop branch. This a main branch to reflect a state with the latest delivered development changes for the next release.
|
||||
* The loongson3a branch. This is a feature branch. We develop Loongson3A codes on this branch. We will merge this feature to develop branch in future.
|
||||
* The gh-pages branch. This is for web pages
|
||||
* The gh-pages branch. This is for web pages
|
||||
|
|
|
@ -220,6 +220,11 @@ REALNAME: ;\
|
|||
|
||||
#define BUFFER_SIZE ( 8 << 20)
|
||||
|
||||
#if defined(LOONGSON3A)
|
||||
#define PAGESIZE (16UL << 10)
|
||||
#define FIXED_PAGESIZE (16UL << 10)
|
||||
#endif
|
||||
|
||||
#ifndef PAGESIZE
|
||||
#define PAGESIZE (64UL << 10)
|
||||
#endif
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <sys/mman.h>
|
||||
//#include <sys/mman.h>
|
||||
#include "common.h"
|
||||
|
||||
#ifndef USE_OPENMP
|
||||
|
|
|
@ -53,18 +53,19 @@ dyn : $(LIBDYNNAME)
|
|||
zip : dll
|
||||
zip $(LIBZIPNAME) $(LIBDLLNAME) $(LIBNAME)
|
||||
|
||||
dll : libgoto2.dll
|
||||
dll : ../$(LIBDLLNAME)
|
||||
#libgoto2.dll
|
||||
|
||||
dll2 : libgoto2_shared.dll
|
||||
|
||||
libgoto2.dll : ../$(LIBNAME) libgoto2.def dllinit.$(SUFFIX)
|
||||
../$(LIBDLLNAME) : ../$(LIBNAME) libgoto2.def dllinit.$(SUFFIX)
|
||||
$(RANLIB) ../$(LIBNAME)
|
||||
ifeq ($(BINARY32), 1)
|
||||
$(DLLWRAP) -o $(@F) --def libgoto2.def \
|
||||
$(DLLWRAP) -o ../$(LIBDLLNAME) --def libgoto2.def \
|
||||
--entry _dllinit@12 -s dllinit.$(SUFFIX) --dllname $(@F) ../$(LIBNAME) $(FEXTRALIB)
|
||||
-lib /machine:i386 /def:libgoto2.def
|
||||
else
|
||||
$(DLLWRAP) -o $(@F) --def libgoto2.def \
|
||||
$(DLLWRAP) -o ../$(LIBDLLNAME) --def libgoto2.def \
|
||||
--entry _dllinit -s dllinit.$(SUFFIX) --dllname $(@F) ../$(LIBNAME) $(FEXTRALIB)
|
||||
-lib /machine:X64 /def:libgoto2.def
|
||||
endif
|
||||
|
|
|
@ -91,15 +91,37 @@ ifndef ZGEMM_BETA
|
|||
ZGEMM_BETA = ../generic/zgemm_beta.c
|
||||
endif
|
||||
|
||||
ifndef STRSMKERNEL_LN
|
||||
STRSMKERNEL_LN = trsm_kernel_LN.S
|
||||
STRSMKERNEL_LT = trsm_kernel_LT.S
|
||||
STRSMKERNEL_RN = trsm_kernel_LT.S
|
||||
STRSMKERNEL_RT = trsm_kernel_RT.S
|
||||
endif
|
||||
|
||||
ifndef STRSMKERNEL_LT
|
||||
STRSMKERNEL_LT = trsm_kernel_LT.S
|
||||
endif
|
||||
|
||||
ifndef STRSMKERNEL_RN
|
||||
STRSMKERNEL_RN = trsm_kernel_LT.S
|
||||
endif
|
||||
|
||||
ifndef STRSMKERNEL_RT
|
||||
STRSMKERNEL_RT = trsm_kernel_RT.S
|
||||
endif
|
||||
|
||||
ifndef DTRSMKERNEL_LN
|
||||
DTRSMKERNEL_LN = trsm_kernel_LN.S
|
||||
endif
|
||||
|
||||
ifndef DTRSMKERNEL_LT
|
||||
DTRSMKERNEL_LT = trsm_kernel_LT.S
|
||||
endif
|
||||
|
||||
ifndef DTRSMKERNEL_RN
|
||||
DTRSMKERNEL_RN = trsm_kernel_LT.S
|
||||
endif
|
||||
|
||||
ifndef DTRSMKERNEL_RT
|
||||
DTRSMKERNEL_RT = trsm_kernel_RT.S
|
||||
endif
|
||||
|
||||
CTRSMKERNEL_LN = ztrsm_kernel_LT.S
|
||||
CTRSMKERNEL_LT = ztrsm_kernel_LT.S
|
||||
|
|
|
@ -1,2 +1,24 @@
|
|||
SAXPYKERNEL=axpy_loongson3a.S
|
||||
DAXPYKERNEL=daxpy_loongson3a_simd.S
|
||||
|
||||
SGEMMKERNEL = sgemm_kernel_loongson3a.S
|
||||
SGEMMONCOPY = ../generic/gemm_ncopy_4.c
|
||||
SGEMMOTCOPY = ../generic/gemm_tcopy_4.c
|
||||
SGEMMONCOPYOBJ = sgemm_oncopy.o
|
||||
SGEMMOTCOPYOBJ = sgemm_otcopy.o
|
||||
|
||||
DGEMMKERNEL = gemm_kernel_loongson3a.S
|
||||
DGEMMONCOPY = ../generic/gemm_ncopy_4.c
|
||||
DGEMMOTCOPY = ../generic/gemm_tcopy_4.c
|
||||
DGEMMONCOPYOBJ = dgemm_oncopy.o
|
||||
DGEMMOTCOPYOBJ = dgemm_otcopy.o
|
||||
|
||||
STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||
STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||
STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||
|
||||
DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||
DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||
DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||
DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
22
param.h
22
param.h
|
@ -1480,27 +1480,29 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#define GEMM_DEFAULT_OFFSET_B 0
|
||||
#define GEMM_DEFAULT_ALIGN 0x03fffUL
|
||||
|
||||
#define SGEMM_DEFAULT_UNROLL_M 2
|
||||
#define SGEMM_DEFAULT_UNROLL_N 8
|
||||
#define DGEMM_DEFAULT_UNROLL_M 2
|
||||
#define DGEMM_DEFAULT_UNROLL_N 8
|
||||
#define SGEMM_DEFAULT_UNROLL_M 4
|
||||
#define SGEMM_DEFAULT_UNROLL_N 4
|
||||
|
||||
#define DGEMM_DEFAULT_UNROLL_M 4
|
||||
#define DGEMM_DEFAULT_UNROLL_N 4
|
||||
|
||||
#define CGEMM_DEFAULT_UNROLL_M 1
|
||||
#define CGEMM_DEFAULT_UNROLL_N 4
|
||||
#define ZGEMM_DEFAULT_UNROLL_M 1
|
||||
#define ZGEMM_DEFAULT_UNROLL_N 4
|
||||
|
||||
#define SGEMM_DEFAULT_P 108
|
||||
#define DGEMM_DEFAULT_P 112
|
||||
#define SGEMM_DEFAULT_P 32
|
||||
#define DGEMM_DEFAULT_P 32
|
||||
#define CGEMM_DEFAULT_P 108
|
||||
#define ZGEMM_DEFAULT_P 112
|
||||
|
||||
#define SGEMM_DEFAULT_Q 288
|
||||
#define DGEMM_DEFAULT_Q 144
|
||||
#define SGEMM_DEFAULT_Q 116
|
||||
#define DGEMM_DEFAULT_Q 116
|
||||
#define CGEMM_DEFAULT_Q 144
|
||||
#define ZGEMM_DEFAULT_Q 72
|
||||
|
||||
#define SGEMM_DEFAULT_R 2000
|
||||
#define DGEMM_DEFAULT_R 2000
|
||||
#define SGEMM_DEFAULT_R 1000
|
||||
#define DGEMM_DEFAULT_R 1000
|
||||
#define CGEMM_DEFAULT_R 2000
|
||||
#define ZGEMM_DEFAULT_R 2000
|
||||
|
||||
|
|
Loading…
Reference in New Issue