Merge branch 'release-v0.1alpha2' into develop
This commit is contained in:
@@ -1,7 +1,8 @@
|
|||||||
OpenBLAS ChangeLog
|
OpenBLAS ChangeLog
|
||||||
====================================================================
|
====================================================================
|
||||||
Version 0.1 alpha2(in development)
|
Version 0.1 alpha2
|
||||||
0;136;0c
|
23-Jun-2011
|
||||||
|
|
||||||
common:
|
common:
|
||||||
* Fixed blasint undefined bug in <cblas.h> file. Other software
|
* Fixed blasint undefined bug in <cblas.h> file. Other software
|
||||||
could include this header successfully(Refs issue #13 on github)
|
could include this header successfully(Refs issue #13 on github)
|
||||||
@@ -31,6 +32,8 @@ x86/x86_64:
|
|||||||
|
|
||||||
MIPS64:
|
MIPS64:
|
||||||
* Fixed #28 a wrong result of dsdot on Loongson3A/MIPS64.
|
* Fixed #28 a wrong result of dsdot on Loongson3A/MIPS64.
|
||||||
|
* Optimized single/double precision BLAS Level3 on Loongson3A/MIPS64. (Refs #2)
|
||||||
|
* Optimized single/double precision axpy function on Loongson3A/MIPS64. (Refs #3)
|
||||||
|
|
||||||
====================================================================
|
====================================================================
|
||||||
Version 0.1 alpha1
|
Version 0.1 alpha1
|
||||||
|
|||||||
2
Makefile
2
Makefile
@@ -74,7 +74,7 @@ ifeq ($(OSNAME), Darwin)
|
|||||||
endif
|
endif
|
||||||
ifeq ($(OSNAME), WINNT)
|
ifeq ($(OSNAME), WINNT)
|
||||||
$(MAKE) -C exports dll
|
$(MAKE) -C exports dll
|
||||||
# -ln -fs $(LIBDLLNAME) libopenblas.dll
|
-ln -fs $(LIBDLLNAME) libopenblas.dll
|
||||||
endif
|
endif
|
||||||
ifeq ($(OSNAME), CYGWIN_NT)
|
ifeq ($(OSNAME), CYGWIN_NT)
|
||||||
$(MAKE) -C exports dll
|
$(MAKE) -C exports dll
|
||||||
|
|||||||
3
README
3
README
@@ -72,6 +72,7 @@ Please see Changelog.txt to obtain the differences between GotoBLAS2 1.13 BSD ve
|
|||||||
9.Known Issues
|
9.Known Issues
|
||||||
* The number of CPUs/Cores should less than or equal to 8*sizeof(unsigned long). On 64 bits, the limit
|
* The number of CPUs/Cores should less than or equal to 8*sizeof(unsigned long). On 64 bits, the limit
|
||||||
is 64. On 32 bits, it is 32.
|
is 64. On 32 bits, it is 32.
|
||||||
|
* This library is not compatible with EKOPath Compiler Suite 4.0.10 (http://www.pathscale.com/ekopath-compiler-suite). However, Path64 (https://github.com/path64/compiler) could compile the codes successfully.
|
||||||
|
|
||||||
10. Specification of Git Branches
|
10. Specification of Git Branches
|
||||||
We used the git branching model in this article (http://nvie.com/posts/a-successful-git-branching-model/).
|
We used the git branching model in this article (http://nvie.com/posts/a-successful-git-branching-model/).
|
||||||
@@ -79,4 +80,4 @@ Now, there are 4 branches in github.com.
|
|||||||
* The master branch. This a main branch to reflect a production-ready state.
|
* The master branch. This a main branch to reflect a production-ready state.
|
||||||
* The develop branch. This a main branch to reflect a state with the latest delivered development changes for the next release.
|
* The develop branch. This a main branch to reflect a state with the latest delivered development changes for the next release.
|
||||||
* The loongson3a branch. This is a feature branch. We develop Loongson3A codes on this branch. We will merge this feature to develop branch in future.
|
* The loongson3a branch. This is a feature branch. We develop Loongson3A codes on this branch. We will merge this feature to develop branch in future.
|
||||||
* The gh-pages branch. This is for web pages
|
* The gh-pages branch. This is for web pages
|
||||||
|
|||||||
@@ -220,6 +220,11 @@ REALNAME: ;\
|
|||||||
|
|
||||||
#define BUFFER_SIZE ( 8 << 20)
|
#define BUFFER_SIZE ( 8 << 20)
|
||||||
|
|
||||||
|
#if defined(LOONGSON3A)
|
||||||
|
#define PAGESIZE (16UL << 10)
|
||||||
|
#define FIXED_PAGESIZE (16UL << 10)
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifndef PAGESIZE
|
#ifndef PAGESIZE
|
||||||
#define PAGESIZE (64UL << 10)
|
#define PAGESIZE (64UL << 10)
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -38,7 +38,7 @@
|
|||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <sys/mman.h>
|
//#include <sys/mman.h>
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
|
|
||||||
#ifndef USE_OPENMP
|
#ifndef USE_OPENMP
|
||||||
|
|||||||
@@ -53,18 +53,19 @@ dyn : $(LIBDYNNAME)
|
|||||||
zip : dll
|
zip : dll
|
||||||
zip $(LIBZIPNAME) $(LIBDLLNAME) $(LIBNAME)
|
zip $(LIBZIPNAME) $(LIBDLLNAME) $(LIBNAME)
|
||||||
|
|
||||||
dll : libgoto2.dll
|
dll : ../$(LIBDLLNAME)
|
||||||
|
#libgoto2.dll
|
||||||
|
|
||||||
dll2 : libgoto2_shared.dll
|
dll2 : libgoto2_shared.dll
|
||||||
|
|
||||||
libgoto2.dll : ../$(LIBNAME) libgoto2.def dllinit.$(SUFFIX)
|
../$(LIBDLLNAME) : ../$(LIBNAME) libgoto2.def dllinit.$(SUFFIX)
|
||||||
$(RANLIB) ../$(LIBNAME)
|
$(RANLIB) ../$(LIBNAME)
|
||||||
ifeq ($(BINARY32), 1)
|
ifeq ($(BINARY32), 1)
|
||||||
$(DLLWRAP) -o $(@F) --def libgoto2.def \
|
$(DLLWRAP) -o ../$(LIBDLLNAME) --def libgoto2.def \
|
||||||
--entry _dllinit@12 -s dllinit.$(SUFFIX) --dllname $(@F) ../$(LIBNAME) $(FEXTRALIB)
|
--entry _dllinit@12 -s dllinit.$(SUFFIX) --dllname $(@F) ../$(LIBNAME) $(FEXTRALIB)
|
||||||
-lib /machine:i386 /def:libgoto2.def
|
-lib /machine:i386 /def:libgoto2.def
|
||||||
else
|
else
|
||||||
$(DLLWRAP) -o $(@F) --def libgoto2.def \
|
$(DLLWRAP) -o ../$(LIBDLLNAME) --def libgoto2.def \
|
||||||
--entry _dllinit -s dllinit.$(SUFFIX) --dllname $(@F) ../$(LIBNAME) $(FEXTRALIB)
|
--entry _dllinit -s dllinit.$(SUFFIX) --dllname $(@F) ../$(LIBNAME) $(FEXTRALIB)
|
||||||
-lib /machine:X64 /def:libgoto2.def
|
-lib /machine:X64 /def:libgoto2.def
|
||||||
endif
|
endif
|
||||||
|
|||||||
@@ -91,15 +91,37 @@ ifndef ZGEMM_BETA
|
|||||||
ZGEMM_BETA = ../generic/zgemm_beta.c
|
ZGEMM_BETA = ../generic/zgemm_beta.c
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
ifndef STRSMKERNEL_LN
|
||||||
STRSMKERNEL_LN = trsm_kernel_LN.S
|
STRSMKERNEL_LN = trsm_kernel_LN.S
|
||||||
STRSMKERNEL_LT = trsm_kernel_LT.S
|
endif
|
||||||
STRSMKERNEL_RN = trsm_kernel_LT.S
|
|
||||||
STRSMKERNEL_RT = trsm_kernel_RT.S
|
|
||||||
|
|
||||||
|
ifndef STRSMKERNEL_LT
|
||||||
|
STRSMKERNEL_LT = trsm_kernel_LT.S
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifndef STRSMKERNEL_RN
|
||||||
|
STRSMKERNEL_RN = trsm_kernel_LT.S
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifndef STRSMKERNEL_RT
|
||||||
|
STRSMKERNEL_RT = trsm_kernel_RT.S
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifndef DTRSMKERNEL_LN
|
||||||
DTRSMKERNEL_LN = trsm_kernel_LN.S
|
DTRSMKERNEL_LN = trsm_kernel_LN.S
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifndef DTRSMKERNEL_LT
|
||||||
DTRSMKERNEL_LT = trsm_kernel_LT.S
|
DTRSMKERNEL_LT = trsm_kernel_LT.S
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifndef DTRSMKERNEL_RN
|
||||||
DTRSMKERNEL_RN = trsm_kernel_LT.S
|
DTRSMKERNEL_RN = trsm_kernel_LT.S
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifndef DTRSMKERNEL_RT
|
||||||
DTRSMKERNEL_RT = trsm_kernel_RT.S
|
DTRSMKERNEL_RT = trsm_kernel_RT.S
|
||||||
|
endif
|
||||||
|
|
||||||
CTRSMKERNEL_LN = ztrsm_kernel_LT.S
|
CTRSMKERNEL_LN = ztrsm_kernel_LT.S
|
||||||
CTRSMKERNEL_LT = ztrsm_kernel_LT.S
|
CTRSMKERNEL_LT = ztrsm_kernel_LT.S
|
||||||
|
|||||||
@@ -1,2 +1,24 @@
|
|||||||
SAXPYKERNEL=axpy_loongson3a.S
|
SAXPYKERNEL=axpy_loongson3a.S
|
||||||
DAXPYKERNEL=daxpy_loongson3a_simd.S
|
DAXPYKERNEL=daxpy_loongson3a_simd.S
|
||||||
|
|
||||||
|
SGEMMKERNEL = sgemm_kernel_loongson3a.S
|
||||||
|
SGEMMONCOPY = ../generic/gemm_ncopy_4.c
|
||||||
|
SGEMMOTCOPY = ../generic/gemm_tcopy_4.c
|
||||||
|
SGEMMONCOPYOBJ = sgemm_oncopy.o
|
||||||
|
SGEMMOTCOPYOBJ = sgemm_otcopy.o
|
||||||
|
|
||||||
|
DGEMMKERNEL = gemm_kernel_loongson3a.S
|
||||||
|
DGEMMONCOPY = ../generic/gemm_ncopy_4.c
|
||||||
|
DGEMMOTCOPY = ../generic/gemm_tcopy_4.c
|
||||||
|
DGEMMONCOPYOBJ = dgemm_oncopy.o
|
||||||
|
DGEMMOTCOPYOBJ = dgemm_otcopy.o
|
||||||
|
|
||||||
|
STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||||
|
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||||
|
STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||||
|
STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||||
|
|
||||||
|
DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||||
|
DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||||
|
DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||||
|
DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||||
|
|||||||
2390
kernel/mips64/gemm_kernel_loongson3a.S
Normal file
2390
kernel/mips64/gemm_kernel_loongson3a.S
Normal file
File diff suppressed because it is too large
Load Diff
2579
kernel/mips64/sgemm_kernel_loongson3a.S
Normal file
2579
kernel/mips64/sgemm_kernel_loongson3a.S
Normal file
File diff suppressed because it is too large
Load Diff
1938
kernel/mips64/trsm_kernel_LN_loongson3a.S
Normal file
1938
kernel/mips64/trsm_kernel_LN_loongson3a.S
Normal file
File diff suppressed because it is too large
Load Diff
1783
kernel/mips64/trsm_kernel_LT_loongson3a.S
Normal file
1783
kernel/mips64/trsm_kernel_LT_loongson3a.S
Normal file
File diff suppressed because it is too large
Load Diff
1852
kernel/mips64/trsm_kernel_RN_loongson3a.S
Normal file
1852
kernel/mips64/trsm_kernel_RN_loongson3a.S
Normal file
File diff suppressed because it is too large
Load Diff
1958
kernel/mips64/trsm_kernel_RT_loongson3a.S
Normal file
1958
kernel/mips64/trsm_kernel_RT_loongson3a.S
Normal file
File diff suppressed because it is too large
Load Diff
22
param.h
22
param.h
@@ -1480,27 +1480,29 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||||||
#define GEMM_DEFAULT_OFFSET_B 0
|
#define GEMM_DEFAULT_OFFSET_B 0
|
||||||
#define GEMM_DEFAULT_ALIGN 0x03fffUL
|
#define GEMM_DEFAULT_ALIGN 0x03fffUL
|
||||||
|
|
||||||
#define SGEMM_DEFAULT_UNROLL_M 2
|
#define SGEMM_DEFAULT_UNROLL_M 4
|
||||||
#define SGEMM_DEFAULT_UNROLL_N 8
|
#define SGEMM_DEFAULT_UNROLL_N 4
|
||||||
#define DGEMM_DEFAULT_UNROLL_M 2
|
|
||||||
#define DGEMM_DEFAULT_UNROLL_N 8
|
#define DGEMM_DEFAULT_UNROLL_M 4
|
||||||
|
#define DGEMM_DEFAULT_UNROLL_N 4
|
||||||
|
|
||||||
#define CGEMM_DEFAULT_UNROLL_M 1
|
#define CGEMM_DEFAULT_UNROLL_M 1
|
||||||
#define CGEMM_DEFAULT_UNROLL_N 4
|
#define CGEMM_DEFAULT_UNROLL_N 4
|
||||||
#define ZGEMM_DEFAULT_UNROLL_M 1
|
#define ZGEMM_DEFAULT_UNROLL_M 1
|
||||||
#define ZGEMM_DEFAULT_UNROLL_N 4
|
#define ZGEMM_DEFAULT_UNROLL_N 4
|
||||||
|
|
||||||
#define SGEMM_DEFAULT_P 108
|
#define SGEMM_DEFAULT_P 32
|
||||||
#define DGEMM_DEFAULT_P 112
|
#define DGEMM_DEFAULT_P 32
|
||||||
#define CGEMM_DEFAULT_P 108
|
#define CGEMM_DEFAULT_P 108
|
||||||
#define ZGEMM_DEFAULT_P 112
|
#define ZGEMM_DEFAULT_P 112
|
||||||
|
|
||||||
#define SGEMM_DEFAULT_Q 288
|
#define SGEMM_DEFAULT_Q 116
|
||||||
#define DGEMM_DEFAULT_Q 144
|
#define DGEMM_DEFAULT_Q 116
|
||||||
#define CGEMM_DEFAULT_Q 144
|
#define CGEMM_DEFAULT_Q 144
|
||||||
#define ZGEMM_DEFAULT_Q 72
|
#define ZGEMM_DEFAULT_Q 72
|
||||||
|
|
||||||
#define SGEMM_DEFAULT_R 2000
|
#define SGEMM_DEFAULT_R 1000
|
||||||
#define DGEMM_DEFAULT_R 2000
|
#define DGEMM_DEFAULT_R 1000
|
||||||
#define CGEMM_DEFAULT_R 2000
|
#define CGEMM_DEFAULT_R 2000
|
||||||
#define ZGEMM_DEFAULT_R 2000
|
#define ZGEMM_DEFAULT_R 2000
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user