Merge branch 'release-v0.1alpha2'

This commit is contained in:
traits 2011-06-23 15:18:40 +08:00
commit 4a73f5c5ea
41 changed files with 12961 additions and 53 deletions

5
.gitignore vendored
View File

@ -1,8 +1,13 @@
*.obj
*.lib
*.dll
*.def
*.o
lapack-3.1.1
lapack-3.1.1.tgz
*.so
*.a
.svn
*~
config.h
Makefile.conf

View File

@ -1,6 +1,7 @@
OpenBLAS ChangeLog
====================================================================
Version 0.1 alpha2(in development)
Version 0.1 alpha2
23-Jun-2011
common:
* Fixed blasint undefined bug in <cblas.h> file. Other software
@ -15,11 +16,25 @@ common:
* Provided an error message when the arch is not supported.(Refs
issue #19 on github)
* Fixed issue #23. Fixed a bug of f_check script about generating link flags.
* Added openblas_set_num_threads for Fortran.
* Fixed #25 a wrong result of rotmg.
* Fixed a bug about detecting underscore prefix in c_check.
* Print the wall time (cycles) with enabling FUNCTION_PROFILE
* Fixed #35 a build bug with NO_LAPACK=1 & DYNAMIC_ARCH=1
* Added install target. You can use "make install". (Refs #20)
x86/x86_64:
*
* Fixed #28 a wrong result of dsdot on x86_64.
* Fixed #32 a SEGFAULT bug of zdotc with gcc-4.6.
* Fixed #33 ztrmm bug on Nehalem.
* Walk round #27 the low performance axpy issue with small imput size & multithreads.
MIPS64:
*
* Fixed #28 a wrong result of dsdot on Loongson3A/MIPS64.
* Optimized single/double precision BLAS Level3 on Loongson3A/MIPS64. (Refs #2)
* Optimized single/double precision axpy function on Loongson3A/MIPS64. (Refs #3)
====================================================================
Version 0.1 alpha1
20-Mar-2011

View File

@ -15,6 +15,10 @@ ifdef SANITY_CHECK
BLASDIRS += reference
endif
ifndef PREFIX
PREFIX = /opt/OpenBLAS
endif
SUBDIRS = $(BLASDIRS)
ifneq ($(NO_LAPACK), 1)
SUBDIRS += lapack
@ -22,8 +26,8 @@ endif
SUBDIRS_ALL = $(SUBDIRS) test ctest utest exports benchmark ../laswp ../bench
.PHONY : all libs netlib test ctest shared
.NOTPARALLEL : all libs prof lapack-test
.PHONY : all libs netlib test ctest shared install
.NOTPARALLEL : all libs prof lapack-test install
all :: libs netlib tests shared
@echo
@ -70,7 +74,7 @@ ifeq ($(OSNAME), Darwin)
endif
ifeq ($(OSNAME), WINNT)
$(MAKE) -C exports dll
# -ln -fs $(LIBDLLNAME) libopenblas.dll
-ln -fs $(LIBDLLNAME) libopenblas.dll
endif
ifeq ($(OSNAME), CYGWIN_NT)
$(MAKE) -C exports dll
@ -105,12 +109,17 @@ endif
$(MAKE) -C $$d $(@F) || exit 1 ; \
fi; \
done
#Save the config files for installation
cp Makefile.conf Makefile.conf_last
cp config.h config_last.h
ifdef DYNAMIC_ARCH
$(MAKE) -C kernel commonlibs || exit 1
for d in $(DYNAMIC_CORE) ; \
do $(MAKE) GOTOBLAS_MAKEFILE= -C kernel TARGET_CORE=$$d kernel || exit 1 ;\
done
echo DYNAMIC_ARCH=1 >> Makefile.conf_last
endif
touch lib.grd
prof : prof_blas prof_lapack
@ -230,19 +239,23 @@ lapack-test :
dummy :
install :
$(MAKE) -f Makefile.install install
clean ::
@for d in $(SUBDIRS_ALL) ; \
do if test -d $$d; then \
$(MAKE) -C $$d $(@F) || exit 1 ; \
fi; \
done
ifdef DYNAMIC_ARCH
#ifdef DYNAMIC_ARCH
@$(MAKE) -C kernel clean
endif
#endif
@rm -f *.$(LIBSUFFIX) *.so *~ *.exe getarch getarch_2nd *.dll *.lib *.$(SUFFIX) *.dwf libopenblas.$(LIBSUFFIX) libopenblas_p.$(LIBSUFFIX) *.lnk myconfig.h
@rm -f Makefile.conf config.h Makefile_kernel.conf config_kernel.h st* *.dylib
@if test -d lapack-3.1.1; then \
echo deleting lapack-3.1.1; \
rm -rf lapack-3.1.1 ;\
fi
@rm -f *.grd Makefile.conf_last config_last.h
@echo Done.

65
Makefile.install Normal file
View File

@ -0,0 +1,65 @@
TOPDIR = .
export GOTOBLAS_MAKEFILE = 1
-include $(TOPDIR)/Makefile.conf_last
include ./Makefile.system
.PHONY : install
.NOTPARALLEL : install
lib.grd :
$(error OpenBLAS: Please run "make" firstly)
install : lib.grd
@-mkdir -p $(PREFIX)
@echo Generating openblas_config.h in $(PREFIX)
#for inc
@echo \#ifndef OPENBLAS_CONFIG_H > $(PREFIX)/openblas_config.h
@echo \#define OPENBLAS_CONFIG_H >> $(PREFIX)/openblas_config.h
@cat config_last.h >> $(PREFIX)/openblas_config.h
@echo \#define VERSION \" OpenBLAS $(VERSION) \" >> $(PREFIX)/openblas_config.h
@cat openblas_config_template.h >> $(PREFIX)/openblas_config.h
@echo \#endif >> $(PREFIX)/openblas_config.h
@echo Generating f77blas.h in $(PREFIX)
@echo \#ifndef OPENBLAS_F77BLAS_H > $(PREFIX)/f77blas.h
@echo \#define OPENBLAS_F77BLAS_H >> $(PREFIX)/f77blas.h
@echo \#include \"openblas_config.h\" >> $(PREFIX)/f77blas.h
@cat common_interface.h >> $(PREFIX)/f77blas.h
@echo \#endif >> $(PREFIX)/f77blas.h
@echo Generating cblas.h in $(PREFIX)
@sed 's/common/openblas_config/g' cblas.h > $(PREFIX)/cblas.h
#for install static library
@echo Copy the static library to $(PREFIX)
@cp $(LIBNAME) $(PREFIX)
@-ln -fs $(PREFIX)/$(LIBNAME) $(PREFIX)/libopenblas.$(LIBSUFFIX)
#for install shared library
@echo Copy the shared library to $(PREFIX)
ifeq ($(OSNAME), Linux)
-cp $(LIBSONAME) $(PREFIX)
-ln -fs $(PREFIX)/$(LIBSONAME) $(PREFIX)/libopenblas.so
endif
ifeq ($(OSNAME), FreeBSD)
-cp $(LIBSONAME) $(PREFIX)
-ln -fs $(PREFIX)/$(LIBSONAME) $(PREFIX)/libopenblas.so
endif
ifeq ($(OSNAME), NetBSD)
-cp $(LIBSONAME) $(PREFIX)
-ln -fs $(PREFIX)/$(LIBSONAME) $(PREFIX)/libopenblas.so
endif
ifeq ($(OSNAME), Darwin)
-cp $(LIBDYNNAME) $(PREFIX)
-ln -fs $(PREFIX)/$(LIBDYNNAME) $(PREFIX)/libopenblas.dylib
endif
ifeq ($(OSNAME), WINNT)
-cp $(LIBDLLNAME) $(PREFIX)
-ln -fs $(PREFIX)/$(LIBDLLNAME) $(PREFIX)/libopenblas.dll
endif
ifeq ($(OSNAME), CYGWIN_NT)
-cp $(LIBDLLNAME) $(PREFIX)
-ln -fs $(PREFIX)/$(LIBDLLNAME) $(PREFIX)/libopenblas.dll
endif
@echo Install OK!

View File

@ -91,6 +91,9 @@ VERSION = 0.1alpha2
# SANITY_CHECK to compare the result with reference BLAS.
# UTEST_CHECK = 1
# The installation directory.
# PREFIX = /opt/OpenBLAS
# Common Optimization Flag; -O2 is enough.
# DEBUG = 1

View File

@ -515,6 +515,10 @@ ifeq ($(DYNAMIC_ARCH), 1)
CCOMMON_OPT += -DDYNAMIC_ARCH
endif
ifeq ($(NO_LAPACK), 1)
CCOMMON_OPT += -DNO_LAPACK
endif
ifdef SMP
CCOMMON_OPT += -DSMP_SERVER

8
README
View File

@ -22,6 +22,11 @@ make BINARY=64 CC=mips64el-unknown-linux-gnu-gcc FC=mips64el-unknown-linux-gnu-g
3)Debug version
make DEBUG=1
4)Intall to the directory (Optional)
e.g.
make install PREFIX=your_installation_directory
The default directory is /opt/OpenBLAS
3.Support CPU & OS
Please read GotoBLAS_01Readme.txt
@ -67,6 +72,7 @@ Please see Changelog.txt to obtain the differences between GotoBLAS2 1.13 BSD ve
9.Known Issues
* The number of CPUs/Cores should less than or equal to 8*sizeof(unsigned long). On 64 bits, the limit
is 64. On 32 bits, it is 32.
* This library is not compatible with EKOPath Compiler Suite 4.0.10 (http://www.pathscale.com/ekopath-compiler-suite). However, Path64 (https://github.com/path64/compiler) could compile the codes successfully.
10. Specification of Git Branches
We used the git branching model in this article (http://nvie.com/posts/a-successful-git-branching-model/).
@ -74,4 +80,4 @@ Now, there are 4 branches in github.com.
* The master branch. This a main branch to reflect a production-ready state.
* The develop branch. This a main branch to reflect a state with the latest delivered development changes for the next release.
* The loongson3a branch. This is a feature branch. We develop Loongson3A codes on this branch. We will merge this feature to develop branch in future.
* The gh-pages branch. This is for web pages
* The gh-pages branch. This is for web pages

View File

@ -149,7 +149,7 @@ $binformat = bin64 if ($data =~ /BINARY_64/);
$data = `$compiler_name -S ctest1.c && grep globl ctest1.s | head -n 1 && rm -f ctest1.s`;
$data =~ /globl\ ([_\.]*)(.*)/;
$data =~ /globl\s([_\.]*)(.*)/;
$need_fu = $1;

View File

@ -220,6 +220,11 @@ REALNAME: ;\
#define BUFFER_SIZE ( 8 << 20)
#if defined(LOONGSON3A)
#define PAGESIZE (16UL << 10)
#define FIXED_PAGESIZE (16UL << 10)
#endif
#ifndef PAGESIZE
#define PAGESIZE (64UL << 10)
#endif

View File

@ -60,4 +60,8 @@ float _Complex BLASFUNC_REF(cdotc) (blasint *, float *, blasint *, float *,
double _Complex BLASFUNC_REF(zdotu) (blasint *, double *, blasint *, double *, blasint *);
double _Complex BLASFUNC_REF(zdotc) (blasint *, double *, blasint *, double *, blasint *);
void BLASFUNC_REF(drotmg)(double *, double *, double *, double *, double *);
double BLASFUNC_REF(dsdot)(blasint *, float *, blasint *, float *, blasint*);
#endif

View File

@ -6,7 +6,7 @@ COMMONOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) c_abs.$(SUFFIX) z_abs.$(SUFFIX)
COMMONOBJS += slamch.$(SUFFIX) slamc3.$(SUFFIX) dlamch.$(SUFFIX) dlamc3.$(SUFFIX)
ifdef SMP
COMMONOBJS += blas_server.$(SUFFIX) divtable.$(SUFFIX) blasL1thread.$(SUFFIX)
COMMONOBJS += blas_server.$(SUFFIX) divtable.$(SUFFIX) blasL1thread.$(SUFFIX) openblas_set_num_threads.$(SUFFIX)
ifndef NO_AFFINITY
COMMONOBJS += init.$(SUFFIX)
endif
@ -100,6 +100,9 @@ memory.$(SUFFIX) : $(MEMORY) ../../common.h ../../param.h
blas_server.$(SUFFIX) : $(BLAS_SERVER) ../../common.h ../../common_thread.h ../../param.h
$(CC) $(CFLAGS) -c $< -o $(@F)
openblas_set_num_threads.$(SUFFIX) : openblas_set_num_threads.c
$(CC) $(CFLAGS) -c $< -o $(@F)
blasL1thread.$(SUFFIX) : blas_l1_thread.c ../../common.h ../../common_thread.h
$(CC) $(CFLAGS) -c $< -o $(@F)

View File

@ -38,7 +38,7 @@
#include <stdio.h>
#include <stdlib.h>
#include <sys/mman.h>
//#include <sys/mman.h>
#include "common.h"
#ifndef USE_OPENMP

View File

@ -0,0 +1,45 @@
/*****************************************************************************
Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the ISCAS nor the names of its contributors may
be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************************/
#include "common.h"
#ifdef SMP_SERVER
#ifdef OS_LINUX
extern void openblas_set_num_threads(int num_threads) ;
void NAME(int* num_threads){
openblas_set_num_threads(*num_threads);
}
#endif
#endif

View File

@ -74,20 +74,21 @@ void gotoblas_profile_quit(void) {
if (cycles > 0) {
fprintf(stderr, "\n\t====== BLAS Profiling Result =======\n\n");
fprintf(stderr, " Function No. of Calls Time Consumption Efficiency Bytes/cycle\n");
fprintf(stderr, " Function No. of Calls Time Consumption Efficiency Bytes/cycle Wall Time(Cycles)\n");
for (i = 0; i < MAX_PROF_TABLE; i ++) {
if (function_profile_table[i].calls) {
#ifndef OS_WINDOWS
fprintf(stderr, "%-12s : %10Ld %8.2f%% %10.3f%% %8.2f\n",
fprintf(stderr, "%-12s : %10Ld %8.2f%% %10.3f%% %8.2f %Ld\n",
#else
fprintf(stderr, "%-12s : %10lld %8.2f%% %10.3f%% %8.2f\n",
fprintf(stderr, "%-12s : %10lld %8.2f%% %10.3f%% %8.2f %lld\n",
#endif
func_table[i],
function_profile_table[i].calls,
(double)function_profile_table[i].cycles / (double)cycles * 100.,
(double)function_profile_table[i].fops / (double)function_profile_table[i].tcycles * 100.,
(double)function_profile_table[i].area / (double)function_profile_table[i].cycles
(double)function_profile_table[i].area / (double)function_profile_table[i].cycles,
function_profile_table[i].cycles
);
}
}

View File

@ -53,18 +53,19 @@ dyn : $(LIBDYNNAME)
zip : dll
zip $(LIBZIPNAME) $(LIBDLLNAME) $(LIBNAME)
dll : libgoto2.dll
dll : ../$(LIBDLLNAME)
#libgoto2.dll
dll2 : libgoto2_shared.dll
libgoto2.dll : ../$(LIBNAME) libgoto2.def dllinit.$(SUFFIX)
../$(LIBDLLNAME) : ../$(LIBNAME) libgoto2.def dllinit.$(SUFFIX)
$(RANLIB) ../$(LIBNAME)
ifeq ($(BINARY32), 1)
$(DLLWRAP) -o $(@F) --def libgoto2.def \
$(DLLWRAP) -o ../$(LIBDLLNAME) --def libgoto2.def \
--entry _dllinit@12 -s dllinit.$(SUFFIX) --dllname $(@F) ../$(LIBNAME) $(FEXTRALIB)
-lib /machine:i386 /def:libgoto2.def
else
$(DLLWRAP) -o $(@F) --def libgoto2.def \
$(DLLWRAP) -o ../$(LIBDLLNAME) --def libgoto2.def \
--entry _dllinit -s dllinit.$(SUFFIX) --dllname $(@F) ../$(LIBNAME) $(FEXTRALIB)
-lib /machine:X64 /def:libgoto2.def
endif
@ -84,7 +85,7 @@ libgoto_hpl.def : gensymbol
perl ./gensymbol win2khpl $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) > $(@F)
$(LIBDYNNAME) : ../$(LIBNAME) osx.def
$(PREFIX)gcc $(CFLAGS) -all_load -dynamiclib -o $(LIBDYNNAME) $< -Wl,-exported_symbols_list,osx.def $(FEXTRALIB)
$(PREFIX)gcc $(CFLAGS) -all_load -dynamiclib -o ../$(LIBDYNNAME) $< -Wl,-exported_symbols_list,osx.def $(FEXTRALIB)
symbol.$(SUFFIX) : symbol.S
$(CC) $(CFLAGS) -c -o $(@F) $^

View File

@ -85,7 +85,11 @@ void CNAME(blasint n, FLOAT alpha, FLOAT *x, blasint incx, FLOAT *y, blasint inc
//In that case, the threads would be dependent.
if (incx == 0 || incy == 0)
nthreads = 1;
//Temporarily walk around the low performance issue with small imput size & multithreads.
if (n <= 10000)
nthreads = 1;
if (nthreads == 1) {
#endif

0
interface/create Normal file → Executable file
View File

View File

@ -49,6 +49,7 @@ double NAME(blasint *N, float *x, blasint *INCX, float *y, blasint *INCY){
BLASLONG n = *N;
BLASLONG incx = *INCX;
BLASLONG incy = *INCY;
double ret = 0.0;
PRINT_DEBUG_NAME;
@ -61,19 +62,21 @@ double NAME(blasint *N, float *x, blasint *INCX, float *y, blasint *INCY){
if (incx < 0) x -= (n - 1) * incx;
if (incy < 0) y -= (n - 1) * incy;
return DSDOT_K(n, x, incx, y, incy);
ret=DSDOT_K(n, x, incx, y, incy);
FUNCTION_PROFILE_END(1, n, n);
IDEBUG_END;
return 0;
return ret;
}
#else
double CNAME(blasint n, float *x, blasint incx, float *y, blasint incy){
double ret = 0.0;
PRINT_DEBUG_CNAME;
@ -86,13 +89,13 @@ double CNAME(blasint n, float *x, blasint incx, float *y, blasint incy){
if (incx < 0) x -= (n - 1) * incx;
if (incy < 0) y -= (n - 1) * incy;
return DSDOT_K(n, x, incx, y, incy);
ret=DSDOT_K(n, x, incx, y, incy);
FUNCTION_PROFILE_END(1, n, n);
IDEBUG_END;
return 0;
return ret;
}

View File

@ -7,6 +7,12 @@
#define GAMSQ 16777216.e0
#define RGAMSQ 5.9604645e-8
#ifdef DOUBLE
#define ABS(x) fabs(x)
#else
#define ABS(x) fabsf(x)
#endif
#ifndef CBLAS
void NAME(FLOAT *dd1, FLOAT *dd2, FLOAT *dx1, FLOAT *DY1, FLOAT *dparam){
@ -47,7 +53,7 @@ void CNAME(FLOAT *dd1, FLOAT *dd2, FLOAT *dx1, FLOAT dy1, FLOAT *dparam){
dq2 = dp2 * dy1;
dq1 = dp1 * *dx1;
if (! (abs(dq1) > abs(dq2))) goto L40;
if (! (ABS(dq1) > ABS(dq2))) goto L40;
dh21 = -(dy1) / *dx1;
dh12 = dp2 / dp1;
@ -140,7 +146,7 @@ L150:
goto L130;
L160:
if (! (abs(*dd2) <= RGAMSQ)) {
if (! (ABS(*dd2) <= RGAMSQ)) {
goto L190;
}
if (*dd2 == ZERO) {
@ -157,7 +163,7 @@ L180:
goto L160;
L190:
if (! (abs(*dd2) >= GAMSQ)) {
if (! (ABS(*dd2) >= GAMSQ)) {
goto L220;
}
igo = 3;

View File

@ -53,6 +53,11 @@ SBLASOBJS += setparam$(TSUFFIX).$(SUFFIX)
CCOMMON_OPT += -DTS=$(TSUFFIX)
endif
KERNEL_INTERFACE = ../common_level1.h ../common_level2.h ../common_level3.h
ifneq ($(NO_LAPACK), 1)
KERNEL_INTERFACE += ../common_lapack.h
endif
ifeq ($(ARCH), x86)
COMMONOBJS += cpuid.$(SUFFIX)
endif
@ -88,9 +93,10 @@ setparam$(TSUFFIX).$(SUFFIX): setparam$(TSUFFIX).c kernel$(TSUFFIX).h
setparam$(TSUFFIX).c : setparam-ref.c
sed 's/TS/$(TSUFFIX)/g' $< > $(@F)
kernel$(TSUFFIX).h : ../common_level1.h ../common_level2.h ../common_level3.h ../common_lapack.h
kernel$(TSUFFIX).h : $(KERNEL_INTERFACE)
sed 's/\ *(/$(TSUFFIX)(/g' $^ > $(@F)
cpuid.$(SUFFIX): $(KERNELDIR)/cpuid.S
$(CC) -c $(CFLAGS) $< -o $(@F)
@ -112,10 +118,10 @@ lsame.$(PSUFFIX): $(KERNELDIR)/$(LSAME_KERNEL)
cpuid.$(PSUFFIX): $(KERNELDIR)/cpuid.S
$(CC) -c $(PFLAGS) $< -o $(@F)
ifdef DYNAMIC_ARCH
#ifdef DYNAMIC_ARCH
clean ::
@rm -f setparam_*.c kernel_*.h setparam.h kernel.h
endif
#endif
include $(TOPDIR)/Makefile.tail

View File

@ -668,7 +668,7 @@ $(KDIR)qdot_k$(TSUFFIX).$(SUFFIX) $(KDIR)qdot_k$(TPSUFFIX).$(PSUFFIX) : $(KERNEL
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE $< -o $@
$(KDIR)dsdot_k$(TSUFFIX).$(SUFFIX) $(KDIR)dsdot_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SDOTKERNEL)
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE $< -o $@
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DDSDOT $< -o $@
$(KDIR)sdot_k$(TSUFFIX).$(SUFFIX) $(KDIR)sdot_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SDOTKERNEL)
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE $< -o $@

View File

@ -91,15 +91,37 @@ ifndef ZGEMM_BETA
ZGEMM_BETA = ../generic/zgemm_beta.c
endif
ifndef STRSMKERNEL_LN
STRSMKERNEL_LN = trsm_kernel_LN.S
STRSMKERNEL_LT = trsm_kernel_LT.S
STRSMKERNEL_RN = trsm_kernel_LT.S
STRSMKERNEL_RT = trsm_kernel_RT.S
endif
ifndef STRSMKERNEL_LT
STRSMKERNEL_LT = trsm_kernel_LT.S
endif
ifndef STRSMKERNEL_RN
STRSMKERNEL_RN = trsm_kernel_LT.S
endif
ifndef STRSMKERNEL_RT
STRSMKERNEL_RT = trsm_kernel_RT.S
endif
ifndef DTRSMKERNEL_LN
DTRSMKERNEL_LN = trsm_kernel_LN.S
endif
ifndef DTRSMKERNEL_LT
DTRSMKERNEL_LT = trsm_kernel_LT.S
endif
ifndef DTRSMKERNEL_RN
DTRSMKERNEL_RN = trsm_kernel_LT.S
endif
ifndef DTRSMKERNEL_RT
DTRSMKERNEL_RT = trsm_kernel_RT.S
endif
CTRSMKERNEL_LN = ztrsm_kernel_LT.S
CTRSMKERNEL_LT = ztrsm_kernel_LT.S

View File

@ -1,2 +1,24 @@
SAXPYKERNEL=axpy_loongson3a.S
DAXPYKERNEL=daxpy_loongson3a_simd.S
SGEMMKERNEL = sgemm_kernel_loongson3a.S
SGEMMONCOPY = ../generic/gemm_ncopy_4.c
SGEMMOTCOPY = ../generic/gemm_tcopy_4.c
SGEMMONCOPYOBJ = sgemm_oncopy.o
SGEMMOTCOPYOBJ = sgemm_otcopy.o
DGEMMKERNEL = gemm_kernel_loongson3a.S
DGEMMONCOPY = ../generic/gemm_ncopy_4.c
DGEMMOTCOPY = ../generic/gemm_tcopy_4.c
DGEMMONCOPYOBJ = dgemm_oncopy.o
DGEMMOTCOPYOBJ = dgemm_otcopy.o
STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c

View File

@ -300,7 +300,11 @@
.align 3
.L999:
j $31
ADD s1, s1, s2
#ifdef DSDOT
cvt.d.s s1, s1
#endif
j $31
NOP
EPILOGUE

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -101,7 +101,11 @@ gotoblas_t TABLE_NAME = {
#endif
ssymm_outcopyTS, ssymm_oltcopyTS,
#ifndef NO_LAPACK
sneg_tcopyTS, slaswp_ncopyTS,
#else
NULL,NULL,
#endif
0, 0, 0,
DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N, MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N),
@ -147,7 +151,11 @@ gotoblas_t TABLE_NAME = {
#endif
dsymm_outcopyTS, dsymm_oltcopyTS,
#ifndef NO_LAPACK
dneg_tcopyTS, dlaswp_ncopyTS,
#else
NULL, NULL,
#endif
#ifdef EXPRECISION
@ -195,7 +203,11 @@ gotoblas_t TABLE_NAME = {
#endif
qsymm_outcopyTS, qsymm_oltcopyTS,
#ifndef NO_LAPACK
qneg_tcopyTS, qlaswp_ncopyTS,
#else
NULL, NULL,
#endif
#endif
@ -286,7 +298,11 @@ gotoblas_t TABLE_NAME = {
chemm3m_oucopyrTS, chemm3m_olcopyrTS,
chemm3m_oucopyiTS, chemm3m_olcopyiTS,
#ifndef NO_LAPACK
cneg_tcopyTS, claswp_ncopyTS,
#else
NULL, NULL,
#endif
0, 0, 0,
ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N, MAX(ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N),
@ -375,7 +391,11 @@ gotoblas_t TABLE_NAME = {
zhemm3m_oucopyrTS, zhemm3m_olcopyrTS,
zhemm3m_oucopyiTS, zhemm3m_olcopyiTS,
#ifndef NO_LAPACK
zneg_tcopyTS, zlaswp_ncopyTS,
#else
NULL, NULL,
#endif
#ifdef EXPRECISION
@ -466,7 +486,11 @@ gotoblas_t TABLE_NAME = {
xhemm3m_oucopyrTS, xhemm3m_olcopyrTS,
xhemm3m_oucopyiTS, xhemm3m_olcopyiTS,
#ifndef NO_LAPACK
xneg_tcopyTS, xlaswp_ncopyTS,
#else
NULL, NULL,
#endif
#endif

View File

@ -1541,5 +1541,8 @@
popl %ebx
popl %esi
popl %edi
/*remove the hidden return value address from the stack.*/
popl %ecx
xchgl %ecx, 0(%esp)
ret
EPILOGUE

View File

@ -1286,6 +1286,10 @@
haddps %xmm0, %xmm0
#endif
#ifdef DSDOT
cvtss2sd %xmm0, %xmm0
#endif
RESTOREREGISTERS
ret

View File

@ -544,7 +544,7 @@
jg .L11
#if defined(TRMMKERNEL) && !defined(LEFT)
addq $1, KK
addq $4, KK
#endif
leaq (C, LDC, 4), C
@ -594,7 +594,7 @@
jg .L11
#if defined(TRMMKERNEL) && !defined(LEFT)
addq $1, KK
addq $4, KK
#endif
leaq (C, LDC, 4), C

View File

@ -0,0 +1,21 @@
/*This is only for "make install" target.*/
#ifdef NEEDBUNDERSCORE
#define BLASFUNC(FUNC) FUNC##_
#else
#define BLASFUNC(FUNC) FUNC
#endif
#if defined(OS_WINDOWS) && defined(__64BIT__)
typedef long long BLASLONG;
typedef unsigned long long BLASULONG;
#else
typedef long BLASLONG;
typedef unsigned long BLASULONG;
#endif
#ifdef USE64BITINT
typedef BLASLONG blasint;
#else
typedef int blasint;
#endif

22
param.h
View File

@ -1480,27 +1480,29 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define GEMM_DEFAULT_OFFSET_B 0
#define GEMM_DEFAULT_ALIGN 0x03fffUL
#define SGEMM_DEFAULT_UNROLL_M 2
#define SGEMM_DEFAULT_UNROLL_N 8
#define DGEMM_DEFAULT_UNROLL_M 2
#define DGEMM_DEFAULT_UNROLL_N 8
#define SGEMM_DEFAULT_UNROLL_M 4
#define SGEMM_DEFAULT_UNROLL_N 4
#define DGEMM_DEFAULT_UNROLL_M 4
#define DGEMM_DEFAULT_UNROLL_N 4
#define CGEMM_DEFAULT_UNROLL_M 1
#define CGEMM_DEFAULT_UNROLL_N 4
#define ZGEMM_DEFAULT_UNROLL_M 1
#define ZGEMM_DEFAULT_UNROLL_N 4
#define SGEMM_DEFAULT_P 108
#define DGEMM_DEFAULT_P 112
#define SGEMM_DEFAULT_P 32
#define DGEMM_DEFAULT_P 32
#define CGEMM_DEFAULT_P 108
#define ZGEMM_DEFAULT_P 112
#define SGEMM_DEFAULT_Q 288
#define DGEMM_DEFAULT_Q 144
#define SGEMM_DEFAULT_Q 116
#define DGEMM_DEFAULT_Q 116
#define CGEMM_DEFAULT_Q 144
#define ZGEMM_DEFAULT_Q 72
#define SGEMM_DEFAULT_R 2000
#define DGEMM_DEFAULT_R 2000
#define SGEMM_DEFAULT_R 1000
#define DGEMM_DEFAULT_R 1000
#define CGEMM_DEFAULT_R 2000
#define ZGEMM_DEFAULT_R 2000

View File

@ -5,12 +5,12 @@ include $(TOPDIR)/Makefile.system
TARGET=openblas_utest
CUNIT_LIB=/usr/local/lib/libcunit.a
OBJS=main.o test_rot.o test_swap.o test_axpy.o test_dotu.o
OBJS=main.o test_rot.o test_swap.o test_axpy.o test_dotu.o test_rotmg.o test_dsdot.o
all : run_test
$(TARGET): $(OBJS)
$(CC) -o $@ $^ ../$(LIBNAME) $(CUNIT_LIB) $(EXTRALIB)
$(FC) -o $@ $^ ../$(LIBNAME) $(CUNIT_LIB) $(EXTRALIB)
run_test: $(TARGET)
./$(TARGET)

View File

@ -57,4 +57,8 @@ void test_caxpy_inc_0(void);
void test_zdotu_n_1(void);
void test_zdotu_offset_1(void);
void test_drotmg(void);
void test_dsdot_n_1(void);
#endif

View File

@ -54,7 +54,10 @@ CU_TestInfo test_level1[]={
{"Testing zdotu with n == 1",test_zdotu_n_1},
{"Testing zdotu with input x & y offset == 1",test_zdotu_offset_1},
{"Testing drotmg",test_drotmg},
{"Testing dsdot with n == 1",test_dsdot_n_1},
CU_TEST_INFO_NULL,
};

50
utest/test_dsdot.c Normal file
View File

@ -0,0 +1,50 @@
/*****************************************************************************
Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the ISCAS nor the names of its contributors may
be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************************/
#include "common_utest.h"
void test_dsdot_n_1()
{
float x= 0.172555164;
float y= -0.0138700781;
int incx=1;
int incy=1;
int n=1;
double res1=0.0f, res2=0.0f;
res1=BLASFUNC(dsdot)(&n, &x, &incx, &y, &incy);
res2=BLASFUNC_REF(dsdot)(&n, &x, &incx, &y, &incy);
CU_ASSERT_DOUBLE_EQUAL(res1, res2, CHECK_EPS);
}

60
utest/test_rotmg.c Normal file
View File

@ -0,0 +1,60 @@
/*****************************************************************************
Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the ISCAS nor the names of its contributors may
be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************************/
#include "common_utest.h"
void test_drotmg()
{
double te_d1, tr_d1;
double te_d2, tr_d2;
double te_x1, tr_x1;
double te_y1, tr_y1;
double te_param[5],tr_param[5];
int i=0;
te_d1= tr_d1=0.21149573940783739;
te_d2= tr_d2=0.046892057172954082;
te_x1= tr_x1=-0.42272687517106533;
te_y1= tr_y1=0.42211309121921659;
//OpenBLAS
BLASFUNC(drotmg)(&te_d1, &te_d2, &te_x1, &te_y1, te_param);
//reference
BLASFUNC_REF(drotmg)(&tr_d1, &tr_d2, &tr_x1, &tr_y1, tr_param);
CU_ASSERT_DOUBLE_EQUAL(te_d1, tr_d1, CHECK_EPS);
CU_ASSERT_DOUBLE_EQUAL(te_d2, tr_d2, CHECK_EPS);
CU_ASSERT_DOUBLE_EQUAL(te_x1, tr_x1, CHECK_EPS);
CU_ASSERT_DOUBLE_EQUAL(te_y1, tr_y1, CHECK_EPS);
for(i=0; i<5; i++){
CU_ASSERT_DOUBLE_EQUAL(te_param[i], tr_param[i], CHECK_EPS);
}
}