Merge branch 'release-v0.1alpha2' into develop
This commit is contained in:
@@ -1,7 +1,8 @@
|
||||
OpenBLAS ChangeLog
|
||||
====================================================================
|
||||
Version 0.1 alpha2(in development)
|
||||
0;136;0c
|
||||
Version 0.1 alpha2
|
||||
23-Jun-2011
|
||||
|
||||
common:
|
||||
* Fixed blasint undefined bug in <cblas.h> file. Other software
|
||||
could include this header successfully(Refs issue #13 on github)
|
||||
@@ -31,6 +32,8 @@ x86/x86_64:
|
||||
|
||||
MIPS64:
|
||||
* Fixed #28 a wrong result of dsdot on Loongson3A/MIPS64.
|
||||
* Optimized single/double precision BLAS Level3 on Loongson3A/MIPS64. (Refs #2)
|
||||
* Optimized single/double precision axpy function on Loongson3A/MIPS64. (Refs #3)
|
||||
|
||||
====================================================================
|
||||
Version 0.1 alpha1
|
||||
|
||||
2
Makefile
2
Makefile
@@ -74,7 +74,7 @@ ifeq ($(OSNAME), Darwin)
|
||||
endif
|
||||
ifeq ($(OSNAME), WINNT)
|
||||
$(MAKE) -C exports dll
|
||||
# -ln -fs $(LIBDLLNAME) libopenblas.dll
|
||||
-ln -fs $(LIBDLLNAME) libopenblas.dll
|
||||
endif
|
||||
ifeq ($(OSNAME), CYGWIN_NT)
|
||||
$(MAKE) -C exports dll
|
||||
|
||||
3
README
3
README
@@ -72,6 +72,7 @@ Please see Changelog.txt to obtain the differences between GotoBLAS2 1.13 BSD ve
|
||||
9.Known Issues
|
||||
* The number of CPUs/Cores should less than or equal to 8*sizeof(unsigned long). On 64 bits, the limit
|
||||
is 64. On 32 bits, it is 32.
|
||||
* This library is not compatible with EKOPath Compiler Suite 4.0.10 (http://www.pathscale.com/ekopath-compiler-suite). However, Path64 (https://github.com/path64/compiler) could compile the codes successfully.
|
||||
|
||||
10. Specification of Git Branches
|
||||
We used the git branching model in this article (http://nvie.com/posts/a-successful-git-branching-model/).
|
||||
@@ -79,4 +80,4 @@ Now, there are 4 branches in github.com.
|
||||
* The master branch. This a main branch to reflect a production-ready state.
|
||||
* The develop branch. This a main branch to reflect a state with the latest delivered development changes for the next release.
|
||||
* The loongson3a branch. This is a feature branch. We develop Loongson3A codes on this branch. We will merge this feature to develop branch in future.
|
||||
* The gh-pages branch. This is for web pages
|
||||
* The gh-pages branch. This is for web pages
|
||||
|
||||
@@ -220,6 +220,11 @@ REALNAME: ;\
|
||||
|
||||
#define BUFFER_SIZE ( 8 << 20)
|
||||
|
||||
#if defined(LOONGSON3A)
|
||||
#define PAGESIZE (16UL << 10)
|
||||
#define FIXED_PAGESIZE (16UL << 10)
|
||||
#endif
|
||||
|
||||
#ifndef PAGESIZE
|
||||
#define PAGESIZE (64UL << 10)
|
||||
#endif
|
||||
|
||||
@@ -38,7 +38,7 @@
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <sys/mman.h>
|
||||
//#include <sys/mman.h>
|
||||
#include "common.h"
|
||||
|
||||
#ifndef USE_OPENMP
|
||||
|
||||
@@ -53,18 +53,19 @@ dyn : $(LIBDYNNAME)
|
||||
zip : dll
|
||||
zip $(LIBZIPNAME) $(LIBDLLNAME) $(LIBNAME)
|
||||
|
||||
dll : libgoto2.dll
|
||||
dll : ../$(LIBDLLNAME)
|
||||
#libgoto2.dll
|
||||
|
||||
dll2 : libgoto2_shared.dll
|
||||
|
||||
libgoto2.dll : ../$(LIBNAME) libgoto2.def dllinit.$(SUFFIX)
|
||||
../$(LIBDLLNAME) : ../$(LIBNAME) libgoto2.def dllinit.$(SUFFIX)
|
||||
$(RANLIB) ../$(LIBNAME)
|
||||
ifeq ($(BINARY32), 1)
|
||||
$(DLLWRAP) -o $(@F) --def libgoto2.def \
|
||||
$(DLLWRAP) -o ../$(LIBDLLNAME) --def libgoto2.def \
|
||||
--entry _dllinit@12 -s dllinit.$(SUFFIX) --dllname $(@F) ../$(LIBNAME) $(FEXTRALIB)
|
||||
-lib /machine:i386 /def:libgoto2.def
|
||||
else
|
||||
$(DLLWRAP) -o $(@F) --def libgoto2.def \
|
||||
$(DLLWRAP) -o ../$(LIBDLLNAME) --def libgoto2.def \
|
||||
--entry _dllinit -s dllinit.$(SUFFIX) --dllname $(@F) ../$(LIBNAME) $(FEXTRALIB)
|
||||
-lib /machine:X64 /def:libgoto2.def
|
||||
endif
|
||||
|
||||
@@ -91,15 +91,37 @@ ifndef ZGEMM_BETA
|
||||
ZGEMM_BETA = ../generic/zgemm_beta.c
|
||||
endif
|
||||
|
||||
ifndef STRSMKERNEL_LN
|
||||
STRSMKERNEL_LN = trsm_kernel_LN.S
|
||||
STRSMKERNEL_LT = trsm_kernel_LT.S
|
||||
STRSMKERNEL_RN = trsm_kernel_LT.S
|
||||
STRSMKERNEL_RT = trsm_kernel_RT.S
|
||||
endif
|
||||
|
||||
ifndef STRSMKERNEL_LT
|
||||
STRSMKERNEL_LT = trsm_kernel_LT.S
|
||||
endif
|
||||
|
||||
ifndef STRSMKERNEL_RN
|
||||
STRSMKERNEL_RN = trsm_kernel_LT.S
|
||||
endif
|
||||
|
||||
ifndef STRSMKERNEL_RT
|
||||
STRSMKERNEL_RT = trsm_kernel_RT.S
|
||||
endif
|
||||
|
||||
ifndef DTRSMKERNEL_LN
|
||||
DTRSMKERNEL_LN = trsm_kernel_LN.S
|
||||
endif
|
||||
|
||||
ifndef DTRSMKERNEL_LT
|
||||
DTRSMKERNEL_LT = trsm_kernel_LT.S
|
||||
endif
|
||||
|
||||
ifndef DTRSMKERNEL_RN
|
||||
DTRSMKERNEL_RN = trsm_kernel_LT.S
|
||||
endif
|
||||
|
||||
ifndef DTRSMKERNEL_RT
|
||||
DTRSMKERNEL_RT = trsm_kernel_RT.S
|
||||
endif
|
||||
|
||||
CTRSMKERNEL_LN = ztrsm_kernel_LT.S
|
||||
CTRSMKERNEL_LT = ztrsm_kernel_LT.S
|
||||
|
||||
@@ -1,2 +1,24 @@
|
||||
SAXPYKERNEL=axpy_loongson3a.S
|
||||
DAXPYKERNEL=daxpy_loongson3a_simd.S
|
||||
|
||||
SGEMMKERNEL = sgemm_kernel_loongson3a.S
|
||||
SGEMMONCOPY = ../generic/gemm_ncopy_4.c
|
||||
SGEMMOTCOPY = ../generic/gemm_tcopy_4.c
|
||||
SGEMMONCOPYOBJ = sgemm_oncopy.o
|
||||
SGEMMOTCOPYOBJ = sgemm_otcopy.o
|
||||
|
||||
DGEMMKERNEL = gemm_kernel_loongson3a.S
|
||||
DGEMMONCOPY = ../generic/gemm_ncopy_4.c
|
||||
DGEMMOTCOPY = ../generic/gemm_tcopy_4.c
|
||||
DGEMMONCOPYOBJ = dgemm_oncopy.o
|
||||
DGEMMOTCOPYOBJ = dgemm_otcopy.o
|
||||
|
||||
STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||
STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||
STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||
|
||||
DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||
DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||
DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||
DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||
|
||||
2390
kernel/mips64/gemm_kernel_loongson3a.S
Normal file
2390
kernel/mips64/gemm_kernel_loongson3a.S
Normal file
File diff suppressed because it is too large
Load Diff
2579
kernel/mips64/sgemm_kernel_loongson3a.S
Normal file
2579
kernel/mips64/sgemm_kernel_loongson3a.S
Normal file
File diff suppressed because it is too large
Load Diff
1938
kernel/mips64/trsm_kernel_LN_loongson3a.S
Normal file
1938
kernel/mips64/trsm_kernel_LN_loongson3a.S
Normal file
File diff suppressed because it is too large
Load Diff
1783
kernel/mips64/trsm_kernel_LT_loongson3a.S
Normal file
1783
kernel/mips64/trsm_kernel_LT_loongson3a.S
Normal file
File diff suppressed because it is too large
Load Diff
1852
kernel/mips64/trsm_kernel_RN_loongson3a.S
Normal file
1852
kernel/mips64/trsm_kernel_RN_loongson3a.S
Normal file
File diff suppressed because it is too large
Load Diff
1958
kernel/mips64/trsm_kernel_RT_loongson3a.S
Normal file
1958
kernel/mips64/trsm_kernel_RT_loongson3a.S
Normal file
File diff suppressed because it is too large
Load Diff
22
param.h
22
param.h
@@ -1480,27 +1480,29 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#define GEMM_DEFAULT_OFFSET_B 0
|
||||
#define GEMM_DEFAULT_ALIGN 0x03fffUL
|
||||
|
||||
#define SGEMM_DEFAULT_UNROLL_M 2
|
||||
#define SGEMM_DEFAULT_UNROLL_N 8
|
||||
#define DGEMM_DEFAULT_UNROLL_M 2
|
||||
#define DGEMM_DEFAULT_UNROLL_N 8
|
||||
#define SGEMM_DEFAULT_UNROLL_M 4
|
||||
#define SGEMM_DEFAULT_UNROLL_N 4
|
||||
|
||||
#define DGEMM_DEFAULT_UNROLL_M 4
|
||||
#define DGEMM_DEFAULT_UNROLL_N 4
|
||||
|
||||
#define CGEMM_DEFAULT_UNROLL_M 1
|
||||
#define CGEMM_DEFAULT_UNROLL_N 4
|
||||
#define ZGEMM_DEFAULT_UNROLL_M 1
|
||||
#define ZGEMM_DEFAULT_UNROLL_N 4
|
||||
|
||||
#define SGEMM_DEFAULT_P 108
|
||||
#define DGEMM_DEFAULT_P 112
|
||||
#define SGEMM_DEFAULT_P 32
|
||||
#define DGEMM_DEFAULT_P 32
|
||||
#define CGEMM_DEFAULT_P 108
|
||||
#define ZGEMM_DEFAULT_P 112
|
||||
|
||||
#define SGEMM_DEFAULT_Q 288
|
||||
#define DGEMM_DEFAULT_Q 144
|
||||
#define SGEMM_DEFAULT_Q 116
|
||||
#define DGEMM_DEFAULT_Q 116
|
||||
#define CGEMM_DEFAULT_Q 144
|
||||
#define ZGEMM_DEFAULT_Q 72
|
||||
|
||||
#define SGEMM_DEFAULT_R 2000
|
||||
#define DGEMM_DEFAULT_R 2000
|
||||
#define SGEMM_DEFAULT_R 1000
|
||||
#define DGEMM_DEFAULT_R 1000
|
||||
#define CGEMM_DEFAULT_R 2000
|
||||
#define ZGEMM_DEFAULT_R 2000
|
||||
|
||||
|
||||
Reference in New Issue
Block a user