Merge branch 'release-v0.1alpha2'
This commit is contained in:
commit
4a73f5c5ea
|
@ -1,8 +1,13 @@
|
|||
*.obj
|
||||
*.lib
|
||||
*.dll
|
||||
*.def
|
||||
*.o
|
||||
lapack-3.1.1
|
||||
lapack-3.1.1.tgz
|
||||
*.so
|
||||
*.a
|
||||
.svn
|
||||
*~
|
||||
config.h
|
||||
Makefile.conf
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
OpenBLAS ChangeLog
|
||||
====================================================================
|
||||
Version 0.1 alpha2(in development)
|
||||
Version 0.1 alpha2
|
||||
23-Jun-2011
|
||||
|
||||
common:
|
||||
* Fixed blasint undefined bug in <cblas.h> file. Other software
|
||||
|
@ -15,11 +16,25 @@ common:
|
|||
* Provided an error message when the arch is not supported.(Refs
|
||||
issue #19 on github)
|
||||
* Fixed issue #23. Fixed a bug of f_check script about generating link flags.
|
||||
* Added openblas_set_num_threads for Fortran.
|
||||
* Fixed #25 a wrong result of rotmg.
|
||||
* Fixed a bug about detecting underscore prefix in c_check.
|
||||
* Print the wall time (cycles) with enabling FUNCTION_PROFILE
|
||||
* Fixed #35 a build bug with NO_LAPACK=1 & DYNAMIC_ARCH=1
|
||||
* Added install target. You can use "make install". (Refs #20)
|
||||
|
||||
|
||||
x86/x86_64:
|
||||
*
|
||||
* Fixed #28 a wrong result of dsdot on x86_64.
|
||||
* Fixed #32 a SEGFAULT bug of zdotc with gcc-4.6.
|
||||
* Fixed #33 ztrmm bug on Nehalem.
|
||||
* Walk round #27 the low performance axpy issue with small imput size & multithreads.
|
||||
|
||||
MIPS64:
|
||||
*
|
||||
* Fixed #28 a wrong result of dsdot on Loongson3A/MIPS64.
|
||||
* Optimized single/double precision BLAS Level3 on Loongson3A/MIPS64. (Refs #2)
|
||||
* Optimized single/double precision axpy function on Loongson3A/MIPS64. (Refs #3)
|
||||
|
||||
====================================================================
|
||||
Version 0.1 alpha1
|
||||
20-Mar-2011
|
||||
|
|
23
Makefile
23
Makefile
|
@ -15,6 +15,10 @@ ifdef SANITY_CHECK
|
|||
BLASDIRS += reference
|
||||
endif
|
||||
|
||||
ifndef PREFIX
|
||||
PREFIX = /opt/OpenBLAS
|
||||
endif
|
||||
|
||||
SUBDIRS = $(BLASDIRS)
|
||||
ifneq ($(NO_LAPACK), 1)
|
||||
SUBDIRS += lapack
|
||||
|
@ -22,8 +26,8 @@ endif
|
|||
|
||||
SUBDIRS_ALL = $(SUBDIRS) test ctest utest exports benchmark ../laswp ../bench
|
||||
|
||||
.PHONY : all libs netlib test ctest shared
|
||||
.NOTPARALLEL : all libs prof lapack-test
|
||||
.PHONY : all libs netlib test ctest shared install
|
||||
.NOTPARALLEL : all libs prof lapack-test install
|
||||
|
||||
all :: libs netlib tests shared
|
||||
@echo
|
||||
|
@ -70,7 +74,7 @@ ifeq ($(OSNAME), Darwin)
|
|||
endif
|
||||
ifeq ($(OSNAME), WINNT)
|
||||
$(MAKE) -C exports dll
|
||||
# -ln -fs $(LIBDLLNAME) libopenblas.dll
|
||||
-ln -fs $(LIBDLLNAME) libopenblas.dll
|
||||
endif
|
||||
ifeq ($(OSNAME), CYGWIN_NT)
|
||||
$(MAKE) -C exports dll
|
||||
|
@ -105,12 +109,17 @@ endif
|
|||
$(MAKE) -C $$d $(@F) || exit 1 ; \
|
||||
fi; \
|
||||
done
|
||||
#Save the config files for installation
|
||||
cp Makefile.conf Makefile.conf_last
|
||||
cp config.h config_last.h
|
||||
ifdef DYNAMIC_ARCH
|
||||
$(MAKE) -C kernel commonlibs || exit 1
|
||||
for d in $(DYNAMIC_CORE) ; \
|
||||
do $(MAKE) GOTOBLAS_MAKEFILE= -C kernel TARGET_CORE=$$d kernel || exit 1 ;\
|
||||
done
|
||||
echo DYNAMIC_ARCH=1 >> Makefile.conf_last
|
||||
endif
|
||||
touch lib.grd
|
||||
|
||||
prof : prof_blas prof_lapack
|
||||
|
||||
|
@ -230,19 +239,23 @@ lapack-test :
|
|||
|
||||
dummy :
|
||||
|
||||
install :
|
||||
$(MAKE) -f Makefile.install install
|
||||
|
||||
clean ::
|
||||
@for d in $(SUBDIRS_ALL) ; \
|
||||
do if test -d $$d; then \
|
||||
$(MAKE) -C $$d $(@F) || exit 1 ; \
|
||||
fi; \
|
||||
done
|
||||
ifdef DYNAMIC_ARCH
|
||||
#ifdef DYNAMIC_ARCH
|
||||
@$(MAKE) -C kernel clean
|
||||
endif
|
||||
#endif
|
||||
@rm -f *.$(LIBSUFFIX) *.so *~ *.exe getarch getarch_2nd *.dll *.lib *.$(SUFFIX) *.dwf libopenblas.$(LIBSUFFIX) libopenblas_p.$(LIBSUFFIX) *.lnk myconfig.h
|
||||
@rm -f Makefile.conf config.h Makefile_kernel.conf config_kernel.h st* *.dylib
|
||||
@if test -d lapack-3.1.1; then \
|
||||
echo deleting lapack-3.1.1; \
|
||||
rm -rf lapack-3.1.1 ;\
|
||||
fi
|
||||
@rm -f *.grd Makefile.conf_last config_last.h
|
||||
@echo Done.
|
|
@ -0,0 +1,65 @@
|
|||
TOPDIR = .
|
||||
export GOTOBLAS_MAKEFILE = 1
|
||||
-include $(TOPDIR)/Makefile.conf_last
|
||||
include ./Makefile.system
|
||||
|
||||
.PHONY : install
|
||||
.NOTPARALLEL : install
|
||||
|
||||
lib.grd :
|
||||
$(error OpenBLAS: Please run "make" firstly)
|
||||
|
||||
install : lib.grd
|
||||
@-mkdir -p $(PREFIX)
|
||||
@echo Generating openblas_config.h in $(PREFIX)
|
||||
#for inc
|
||||
@echo \#ifndef OPENBLAS_CONFIG_H > $(PREFIX)/openblas_config.h
|
||||
@echo \#define OPENBLAS_CONFIG_H >> $(PREFIX)/openblas_config.h
|
||||
@cat config_last.h >> $(PREFIX)/openblas_config.h
|
||||
@echo \#define VERSION \" OpenBLAS $(VERSION) \" >> $(PREFIX)/openblas_config.h
|
||||
@cat openblas_config_template.h >> $(PREFIX)/openblas_config.h
|
||||
@echo \#endif >> $(PREFIX)/openblas_config.h
|
||||
|
||||
@echo Generating f77blas.h in $(PREFIX)
|
||||
@echo \#ifndef OPENBLAS_F77BLAS_H > $(PREFIX)/f77blas.h
|
||||
@echo \#define OPENBLAS_F77BLAS_H >> $(PREFIX)/f77blas.h
|
||||
@echo \#include \"openblas_config.h\" >> $(PREFIX)/f77blas.h
|
||||
@cat common_interface.h >> $(PREFIX)/f77blas.h
|
||||
@echo \#endif >> $(PREFIX)/f77blas.h
|
||||
|
||||
@echo Generating cblas.h in $(PREFIX)
|
||||
@sed 's/common/openblas_config/g' cblas.h > $(PREFIX)/cblas.h
|
||||
|
||||
#for install static library
|
||||
@echo Copy the static library to $(PREFIX)
|
||||
@cp $(LIBNAME) $(PREFIX)
|
||||
@-ln -fs $(PREFIX)/$(LIBNAME) $(PREFIX)/libopenblas.$(LIBSUFFIX)
|
||||
#for install shared library
|
||||
@echo Copy the shared library to $(PREFIX)
|
||||
ifeq ($(OSNAME), Linux)
|
||||
-cp $(LIBSONAME) $(PREFIX)
|
||||
-ln -fs $(PREFIX)/$(LIBSONAME) $(PREFIX)/libopenblas.so
|
||||
endif
|
||||
ifeq ($(OSNAME), FreeBSD)
|
||||
-cp $(LIBSONAME) $(PREFIX)
|
||||
-ln -fs $(PREFIX)/$(LIBSONAME) $(PREFIX)/libopenblas.so
|
||||
endif
|
||||
ifeq ($(OSNAME), NetBSD)
|
||||
-cp $(LIBSONAME) $(PREFIX)
|
||||
-ln -fs $(PREFIX)/$(LIBSONAME) $(PREFIX)/libopenblas.so
|
||||
endif
|
||||
ifeq ($(OSNAME), Darwin)
|
||||
-cp $(LIBDYNNAME) $(PREFIX)
|
||||
-ln -fs $(PREFIX)/$(LIBDYNNAME) $(PREFIX)/libopenblas.dylib
|
||||
endif
|
||||
ifeq ($(OSNAME), WINNT)
|
||||
-cp $(LIBDLLNAME) $(PREFIX)
|
||||
-ln -fs $(PREFIX)/$(LIBDLLNAME) $(PREFIX)/libopenblas.dll
|
||||
endif
|
||||
ifeq ($(OSNAME), CYGWIN_NT)
|
||||
-cp $(LIBDLLNAME) $(PREFIX)
|
||||
-ln -fs $(PREFIX)/$(LIBDLLNAME) $(PREFIX)/libopenblas.dll
|
||||
endif
|
||||
|
||||
@echo Install OK!
|
||||
|
|
@ -91,6 +91,9 @@ VERSION = 0.1alpha2
|
|||
# SANITY_CHECK to compare the result with reference BLAS.
|
||||
# UTEST_CHECK = 1
|
||||
|
||||
# The installation directory.
|
||||
# PREFIX = /opt/OpenBLAS
|
||||
|
||||
# Common Optimization Flag; -O2 is enough.
|
||||
# DEBUG = 1
|
||||
|
||||
|
|
|
@ -515,6 +515,10 @@ ifeq ($(DYNAMIC_ARCH), 1)
|
|||
CCOMMON_OPT += -DDYNAMIC_ARCH
|
||||
endif
|
||||
|
||||
ifeq ($(NO_LAPACK), 1)
|
||||
CCOMMON_OPT += -DNO_LAPACK
|
||||
endif
|
||||
|
||||
ifdef SMP
|
||||
CCOMMON_OPT += -DSMP_SERVER
|
||||
|
||||
|
|
8
README
8
README
|
@ -22,6 +22,11 @@ make BINARY=64 CC=mips64el-unknown-linux-gnu-gcc FC=mips64el-unknown-linux-gnu-g
|
|||
3)Debug version
|
||||
make DEBUG=1
|
||||
|
||||
4)Intall to the directory (Optional)
|
||||
e.g.
|
||||
make install PREFIX=your_installation_directory
|
||||
The default directory is /opt/OpenBLAS
|
||||
|
||||
3.Support CPU & OS
|
||||
Please read GotoBLAS_01Readme.txt
|
||||
|
||||
|
@ -67,6 +72,7 @@ Please see Changelog.txt to obtain the differences between GotoBLAS2 1.13 BSD ve
|
|||
9.Known Issues
|
||||
* The number of CPUs/Cores should less than or equal to 8*sizeof(unsigned long). On 64 bits, the limit
|
||||
is 64. On 32 bits, it is 32.
|
||||
* This library is not compatible with EKOPath Compiler Suite 4.0.10 (http://www.pathscale.com/ekopath-compiler-suite). However, Path64 (https://github.com/path64/compiler) could compile the codes successfully.
|
||||
|
||||
10. Specification of Git Branches
|
||||
We used the git branching model in this article (http://nvie.com/posts/a-successful-git-branching-model/).
|
||||
|
@ -74,4 +80,4 @@ Now, there are 4 branches in github.com.
|
|||
* The master branch. This a main branch to reflect a production-ready state.
|
||||
* The develop branch. This a main branch to reflect a state with the latest delivered development changes for the next release.
|
||||
* The loongson3a branch. This is a feature branch. We develop Loongson3A codes on this branch. We will merge this feature to develop branch in future.
|
||||
* The gh-pages branch. This is for web pages
|
||||
* The gh-pages branch. This is for web pages
|
||||
|
|
2
c_check
2
c_check
|
@ -149,7 +149,7 @@ $binformat = bin64 if ($data =~ /BINARY_64/);
|
|||
|
||||
$data = `$compiler_name -S ctest1.c && grep globl ctest1.s | head -n 1 && rm -f ctest1.s`;
|
||||
|
||||
$data =~ /globl\ ([_\.]*)(.*)/;
|
||||
$data =~ /globl\s([_\.]*)(.*)/;
|
||||
|
||||
$need_fu = $1;
|
||||
|
||||
|
|
|
@ -220,6 +220,11 @@ REALNAME: ;\
|
|||
|
||||
#define BUFFER_SIZE ( 8 << 20)
|
||||
|
||||
#if defined(LOONGSON3A)
|
||||
#define PAGESIZE (16UL << 10)
|
||||
#define FIXED_PAGESIZE (16UL << 10)
|
||||
#endif
|
||||
|
||||
#ifndef PAGESIZE
|
||||
#define PAGESIZE (64UL << 10)
|
||||
#endif
|
||||
|
|
|
@ -60,4 +60,8 @@ float _Complex BLASFUNC_REF(cdotc) (blasint *, float *, blasint *, float *,
|
|||
double _Complex BLASFUNC_REF(zdotu) (blasint *, double *, blasint *, double *, blasint *);
|
||||
double _Complex BLASFUNC_REF(zdotc) (blasint *, double *, blasint *, double *, blasint *);
|
||||
|
||||
void BLASFUNC_REF(drotmg)(double *, double *, double *, double *, double *);
|
||||
|
||||
double BLASFUNC_REF(dsdot)(blasint *, float *, blasint *, float *, blasint*);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -6,7 +6,7 @@ COMMONOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) c_abs.$(SUFFIX) z_abs.$(SUFFIX)
|
|||
COMMONOBJS += slamch.$(SUFFIX) slamc3.$(SUFFIX) dlamch.$(SUFFIX) dlamc3.$(SUFFIX)
|
||||
|
||||
ifdef SMP
|
||||
COMMONOBJS += blas_server.$(SUFFIX) divtable.$(SUFFIX) blasL1thread.$(SUFFIX)
|
||||
COMMONOBJS += blas_server.$(SUFFIX) divtable.$(SUFFIX) blasL1thread.$(SUFFIX) openblas_set_num_threads.$(SUFFIX)
|
||||
ifndef NO_AFFINITY
|
||||
COMMONOBJS += init.$(SUFFIX)
|
||||
endif
|
||||
|
@ -100,6 +100,9 @@ memory.$(SUFFIX) : $(MEMORY) ../../common.h ../../param.h
|
|||
blas_server.$(SUFFIX) : $(BLAS_SERVER) ../../common.h ../../common_thread.h ../../param.h
|
||||
$(CC) $(CFLAGS) -c $< -o $(@F)
|
||||
|
||||
openblas_set_num_threads.$(SUFFIX) : openblas_set_num_threads.c
|
||||
$(CC) $(CFLAGS) -c $< -o $(@F)
|
||||
|
||||
blasL1thread.$(SUFFIX) : blas_l1_thread.c ../../common.h ../../common_thread.h
|
||||
$(CC) $(CFLAGS) -c $< -o $(@F)
|
||||
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <sys/mman.h>
|
||||
//#include <sys/mman.h>
|
||||
#include "common.h"
|
||||
|
||||
#ifndef USE_OPENMP
|
||||
|
|
|
@ -0,0 +1,45 @@
|
|||
/*****************************************************************************
|
||||
Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the ISCAS nor the names of its contributors may
|
||||
be used to endorse or promote products derived from this software
|
||||
without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
**********************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
|
||||
#ifdef SMP_SERVER
|
||||
#ifdef OS_LINUX
|
||||
|
||||
extern void openblas_set_num_threads(int num_threads) ;
|
||||
|
||||
void NAME(int* num_threads){
|
||||
openblas_set_num_threads(*num_threads);
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
|
@ -74,20 +74,21 @@ void gotoblas_profile_quit(void) {
|
|||
if (cycles > 0) {
|
||||
|
||||
fprintf(stderr, "\n\t====== BLAS Profiling Result =======\n\n");
|
||||
fprintf(stderr, " Function No. of Calls Time Consumption Efficiency Bytes/cycle\n");
|
||||
fprintf(stderr, " Function No. of Calls Time Consumption Efficiency Bytes/cycle Wall Time(Cycles)\n");
|
||||
|
||||
for (i = 0; i < MAX_PROF_TABLE; i ++) {
|
||||
if (function_profile_table[i].calls) {
|
||||
#ifndef OS_WINDOWS
|
||||
fprintf(stderr, "%-12s : %10Ld %8.2f%% %10.3f%% %8.2f\n",
|
||||
fprintf(stderr, "%-12s : %10Ld %8.2f%% %10.3f%% %8.2f %Ld\n",
|
||||
#else
|
||||
fprintf(stderr, "%-12s : %10lld %8.2f%% %10.3f%% %8.2f\n",
|
||||
fprintf(stderr, "%-12s : %10lld %8.2f%% %10.3f%% %8.2f %lld\n",
|
||||
#endif
|
||||
func_table[i],
|
||||
function_profile_table[i].calls,
|
||||
(double)function_profile_table[i].cycles / (double)cycles * 100.,
|
||||
(double)function_profile_table[i].fops / (double)function_profile_table[i].tcycles * 100.,
|
||||
(double)function_profile_table[i].area / (double)function_profile_table[i].cycles
|
||||
(double)function_profile_table[i].area / (double)function_profile_table[i].cycles,
|
||||
function_profile_table[i].cycles
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -53,18 +53,19 @@ dyn : $(LIBDYNNAME)
|
|||
zip : dll
|
||||
zip $(LIBZIPNAME) $(LIBDLLNAME) $(LIBNAME)
|
||||
|
||||
dll : libgoto2.dll
|
||||
dll : ../$(LIBDLLNAME)
|
||||
#libgoto2.dll
|
||||
|
||||
dll2 : libgoto2_shared.dll
|
||||
|
||||
libgoto2.dll : ../$(LIBNAME) libgoto2.def dllinit.$(SUFFIX)
|
||||
../$(LIBDLLNAME) : ../$(LIBNAME) libgoto2.def dllinit.$(SUFFIX)
|
||||
$(RANLIB) ../$(LIBNAME)
|
||||
ifeq ($(BINARY32), 1)
|
||||
$(DLLWRAP) -o $(@F) --def libgoto2.def \
|
||||
$(DLLWRAP) -o ../$(LIBDLLNAME) --def libgoto2.def \
|
||||
--entry _dllinit@12 -s dllinit.$(SUFFIX) --dllname $(@F) ../$(LIBNAME) $(FEXTRALIB)
|
||||
-lib /machine:i386 /def:libgoto2.def
|
||||
else
|
||||
$(DLLWRAP) -o $(@F) --def libgoto2.def \
|
||||
$(DLLWRAP) -o ../$(LIBDLLNAME) --def libgoto2.def \
|
||||
--entry _dllinit -s dllinit.$(SUFFIX) --dllname $(@F) ../$(LIBNAME) $(FEXTRALIB)
|
||||
-lib /machine:X64 /def:libgoto2.def
|
||||
endif
|
||||
|
@ -84,7 +85,7 @@ libgoto_hpl.def : gensymbol
|
|||
perl ./gensymbol win2khpl $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) > $(@F)
|
||||
|
||||
$(LIBDYNNAME) : ../$(LIBNAME) osx.def
|
||||
$(PREFIX)gcc $(CFLAGS) -all_load -dynamiclib -o $(LIBDYNNAME) $< -Wl,-exported_symbols_list,osx.def $(FEXTRALIB)
|
||||
$(PREFIX)gcc $(CFLAGS) -all_load -dynamiclib -o ../$(LIBDYNNAME) $< -Wl,-exported_symbols_list,osx.def $(FEXTRALIB)
|
||||
|
||||
symbol.$(SUFFIX) : symbol.S
|
||||
$(CC) $(CFLAGS) -c -o $(@F) $^
|
||||
|
|
|
@ -85,7 +85,11 @@ void CNAME(blasint n, FLOAT alpha, FLOAT *x, blasint incx, FLOAT *y, blasint inc
|
|||
//In that case, the threads would be dependent.
|
||||
if (incx == 0 || incy == 0)
|
||||
nthreads = 1;
|
||||
|
||||
|
||||
//Temporarily walk around the low performance issue with small imput size & multithreads.
|
||||
if (n <= 10000)
|
||||
nthreads = 1;
|
||||
|
||||
if (nthreads == 1) {
|
||||
#endif
|
||||
|
||||
|
|
|
@ -49,6 +49,7 @@ double NAME(blasint *N, float *x, blasint *INCX, float *y, blasint *INCY){
|
|||
BLASLONG n = *N;
|
||||
BLASLONG incx = *INCX;
|
||||
BLASLONG incy = *INCY;
|
||||
double ret = 0.0;
|
||||
|
||||
PRINT_DEBUG_NAME;
|
||||
|
||||
|
@ -61,19 +62,21 @@ double NAME(blasint *N, float *x, blasint *INCX, float *y, blasint *INCY){
|
|||
if (incx < 0) x -= (n - 1) * incx;
|
||||
if (incy < 0) y -= (n - 1) * incy;
|
||||
|
||||
return DSDOT_K(n, x, incx, y, incy);
|
||||
ret=DSDOT_K(n, x, incx, y, incy);
|
||||
|
||||
FUNCTION_PROFILE_END(1, n, n);
|
||||
|
||||
IDEBUG_END;
|
||||
|
||||
return 0;
|
||||
return ret;
|
||||
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
double CNAME(blasint n, float *x, blasint incx, float *y, blasint incy){
|
||||
|
||||
double ret = 0.0;
|
||||
|
||||
PRINT_DEBUG_CNAME;
|
||||
|
||||
|
@ -86,13 +89,13 @@ double CNAME(blasint n, float *x, blasint incx, float *y, blasint incy){
|
|||
if (incx < 0) x -= (n - 1) * incx;
|
||||
if (incy < 0) y -= (n - 1) * incy;
|
||||
|
||||
return DSDOT_K(n, x, incx, y, incy);
|
||||
ret=DSDOT_K(n, x, incx, y, incy);
|
||||
|
||||
FUNCTION_PROFILE_END(1, n, n);
|
||||
|
||||
IDEBUG_END;
|
||||
|
||||
return 0;
|
||||
return ret;
|
||||
|
||||
}
|
||||
|
||||
|
|
|
@ -7,6 +7,12 @@
|
|||
#define GAMSQ 16777216.e0
|
||||
#define RGAMSQ 5.9604645e-8
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define ABS(x) fabs(x)
|
||||
#else
|
||||
#define ABS(x) fabsf(x)
|
||||
#endif
|
||||
|
||||
#ifndef CBLAS
|
||||
|
||||
void NAME(FLOAT *dd1, FLOAT *dd2, FLOAT *dx1, FLOAT *DY1, FLOAT *dparam){
|
||||
|
@ -47,7 +53,7 @@ void CNAME(FLOAT *dd1, FLOAT *dd2, FLOAT *dx1, FLOAT dy1, FLOAT *dparam){
|
|||
dq2 = dp2 * dy1;
|
||||
dq1 = dp1 * *dx1;
|
||||
|
||||
if (! (abs(dq1) > abs(dq2))) goto L40;
|
||||
if (! (ABS(dq1) > ABS(dq2))) goto L40;
|
||||
|
||||
dh21 = -(dy1) / *dx1;
|
||||
dh12 = dp2 / dp1;
|
||||
|
@ -140,7 +146,7 @@ L150:
|
|||
goto L130;
|
||||
|
||||
L160:
|
||||
if (! (abs(*dd2) <= RGAMSQ)) {
|
||||
if (! (ABS(*dd2) <= RGAMSQ)) {
|
||||
goto L190;
|
||||
}
|
||||
if (*dd2 == ZERO) {
|
||||
|
@ -157,7 +163,7 @@ L180:
|
|||
goto L160;
|
||||
|
||||
L190:
|
||||
if (! (abs(*dd2) >= GAMSQ)) {
|
||||
if (! (ABS(*dd2) >= GAMSQ)) {
|
||||
goto L220;
|
||||
}
|
||||
igo = 3;
|
||||
|
|
|
@ -53,6 +53,11 @@ SBLASOBJS += setparam$(TSUFFIX).$(SUFFIX)
|
|||
CCOMMON_OPT += -DTS=$(TSUFFIX)
|
||||
endif
|
||||
|
||||
KERNEL_INTERFACE = ../common_level1.h ../common_level2.h ../common_level3.h
|
||||
ifneq ($(NO_LAPACK), 1)
|
||||
KERNEL_INTERFACE += ../common_lapack.h
|
||||
endif
|
||||
|
||||
ifeq ($(ARCH), x86)
|
||||
COMMONOBJS += cpuid.$(SUFFIX)
|
||||
endif
|
||||
|
@ -88,9 +93,10 @@ setparam$(TSUFFIX).$(SUFFIX): setparam$(TSUFFIX).c kernel$(TSUFFIX).h
|
|||
setparam$(TSUFFIX).c : setparam-ref.c
|
||||
sed 's/TS/$(TSUFFIX)/g' $< > $(@F)
|
||||
|
||||
kernel$(TSUFFIX).h : ../common_level1.h ../common_level2.h ../common_level3.h ../common_lapack.h
|
||||
kernel$(TSUFFIX).h : $(KERNEL_INTERFACE)
|
||||
sed 's/\ *(/$(TSUFFIX)(/g' $^ > $(@F)
|
||||
|
||||
|
||||
cpuid.$(SUFFIX): $(KERNELDIR)/cpuid.S
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
|
@ -112,10 +118,10 @@ lsame.$(PSUFFIX): $(KERNELDIR)/$(LSAME_KERNEL)
|
|||
cpuid.$(PSUFFIX): $(KERNELDIR)/cpuid.S
|
||||
$(CC) -c $(PFLAGS) $< -o $(@F)
|
||||
|
||||
ifdef DYNAMIC_ARCH
|
||||
#ifdef DYNAMIC_ARCH
|
||||
clean ::
|
||||
@rm -f setparam_*.c kernel_*.h setparam.h kernel.h
|
||||
|
||||
endif
|
||||
#endif
|
||||
|
||||
include $(TOPDIR)/Makefile.tail
|
||||
|
|
|
@ -668,7 +668,7 @@ $(KDIR)qdot_k$(TSUFFIX).$(SUFFIX) $(KDIR)qdot_k$(TPSUFFIX).$(PSUFFIX) : $(KERNEL
|
|||
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE $< -o $@
|
||||
|
||||
$(KDIR)dsdot_k$(TSUFFIX).$(SUFFIX) $(KDIR)dsdot_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SDOTKERNEL)
|
||||
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE $< -o $@
|
||||
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DDSDOT $< -o $@
|
||||
|
||||
$(KDIR)sdot_k$(TSUFFIX).$(SUFFIX) $(KDIR)sdot_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SDOTKERNEL)
|
||||
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE $< -o $@
|
||||
|
|
|
@ -91,15 +91,37 @@ ifndef ZGEMM_BETA
|
|||
ZGEMM_BETA = ../generic/zgemm_beta.c
|
||||
endif
|
||||
|
||||
ifndef STRSMKERNEL_LN
|
||||
STRSMKERNEL_LN = trsm_kernel_LN.S
|
||||
STRSMKERNEL_LT = trsm_kernel_LT.S
|
||||
STRSMKERNEL_RN = trsm_kernel_LT.S
|
||||
STRSMKERNEL_RT = trsm_kernel_RT.S
|
||||
endif
|
||||
|
||||
ifndef STRSMKERNEL_LT
|
||||
STRSMKERNEL_LT = trsm_kernel_LT.S
|
||||
endif
|
||||
|
||||
ifndef STRSMKERNEL_RN
|
||||
STRSMKERNEL_RN = trsm_kernel_LT.S
|
||||
endif
|
||||
|
||||
ifndef STRSMKERNEL_RT
|
||||
STRSMKERNEL_RT = trsm_kernel_RT.S
|
||||
endif
|
||||
|
||||
ifndef DTRSMKERNEL_LN
|
||||
DTRSMKERNEL_LN = trsm_kernel_LN.S
|
||||
endif
|
||||
|
||||
ifndef DTRSMKERNEL_LT
|
||||
DTRSMKERNEL_LT = trsm_kernel_LT.S
|
||||
endif
|
||||
|
||||
ifndef DTRSMKERNEL_RN
|
||||
DTRSMKERNEL_RN = trsm_kernel_LT.S
|
||||
endif
|
||||
|
||||
ifndef DTRSMKERNEL_RT
|
||||
DTRSMKERNEL_RT = trsm_kernel_RT.S
|
||||
endif
|
||||
|
||||
CTRSMKERNEL_LN = ztrsm_kernel_LT.S
|
||||
CTRSMKERNEL_LT = ztrsm_kernel_LT.S
|
||||
|
|
|
@ -1,2 +1,24 @@
|
|||
SAXPYKERNEL=axpy_loongson3a.S
|
||||
DAXPYKERNEL=daxpy_loongson3a_simd.S
|
||||
|
||||
SGEMMKERNEL = sgemm_kernel_loongson3a.S
|
||||
SGEMMONCOPY = ../generic/gemm_ncopy_4.c
|
||||
SGEMMOTCOPY = ../generic/gemm_tcopy_4.c
|
||||
SGEMMONCOPYOBJ = sgemm_oncopy.o
|
||||
SGEMMOTCOPYOBJ = sgemm_otcopy.o
|
||||
|
||||
DGEMMKERNEL = gemm_kernel_loongson3a.S
|
||||
DGEMMONCOPY = ../generic/gemm_ncopy_4.c
|
||||
DGEMMOTCOPY = ../generic/gemm_tcopy_4.c
|
||||
DGEMMONCOPYOBJ = dgemm_oncopy.o
|
||||
DGEMMOTCOPYOBJ = dgemm_otcopy.o
|
||||
|
||||
STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||
STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||
STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||
|
||||
DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||
DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||
DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||
DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||
|
|
|
@ -300,7 +300,11 @@
|
|||
.align 3
|
||||
|
||||
.L999:
|
||||
j $31
|
||||
ADD s1, s1, s2
|
||||
|
||||
#ifdef DSDOT
|
||||
cvt.d.s s1, s1
|
||||
#endif
|
||||
j $31
|
||||
NOP
|
||||
|
||||
EPILOGUE
|
||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -101,7 +101,11 @@ gotoblas_t TABLE_NAME = {
|
|||
#endif
|
||||
ssymm_outcopyTS, ssymm_oltcopyTS,
|
||||
|
||||
#ifndef NO_LAPACK
|
||||
sneg_tcopyTS, slaswp_ncopyTS,
|
||||
#else
|
||||
NULL,NULL,
|
||||
#endif
|
||||
|
||||
0, 0, 0,
|
||||
DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N, MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N),
|
||||
|
@ -147,7 +151,11 @@ gotoblas_t TABLE_NAME = {
|
|||
#endif
|
||||
dsymm_outcopyTS, dsymm_oltcopyTS,
|
||||
|
||||
#ifndef NO_LAPACK
|
||||
dneg_tcopyTS, dlaswp_ncopyTS,
|
||||
#else
|
||||
NULL, NULL,
|
||||
#endif
|
||||
|
||||
#ifdef EXPRECISION
|
||||
|
||||
|
@ -195,7 +203,11 @@ gotoblas_t TABLE_NAME = {
|
|||
#endif
|
||||
qsymm_outcopyTS, qsymm_oltcopyTS,
|
||||
|
||||
#ifndef NO_LAPACK
|
||||
qneg_tcopyTS, qlaswp_ncopyTS,
|
||||
#else
|
||||
NULL, NULL,
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
|
@ -286,7 +298,11 @@ gotoblas_t TABLE_NAME = {
|
|||
chemm3m_oucopyrTS, chemm3m_olcopyrTS,
|
||||
chemm3m_oucopyiTS, chemm3m_olcopyiTS,
|
||||
|
||||
#ifndef NO_LAPACK
|
||||
cneg_tcopyTS, claswp_ncopyTS,
|
||||
#else
|
||||
NULL, NULL,
|
||||
#endif
|
||||
|
||||
0, 0, 0,
|
||||
ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N, MAX(ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N),
|
||||
|
@ -375,7 +391,11 @@ gotoblas_t TABLE_NAME = {
|
|||
zhemm3m_oucopyrTS, zhemm3m_olcopyrTS,
|
||||
zhemm3m_oucopyiTS, zhemm3m_olcopyiTS,
|
||||
|
||||
#ifndef NO_LAPACK
|
||||
zneg_tcopyTS, zlaswp_ncopyTS,
|
||||
#else
|
||||
NULL, NULL,
|
||||
#endif
|
||||
|
||||
#ifdef EXPRECISION
|
||||
|
||||
|
@ -466,7 +486,11 @@ gotoblas_t TABLE_NAME = {
|
|||
xhemm3m_oucopyrTS, xhemm3m_olcopyrTS,
|
||||
xhemm3m_oucopyiTS, xhemm3m_olcopyiTS,
|
||||
|
||||
#ifndef NO_LAPACK
|
||||
xneg_tcopyTS, xlaswp_ncopyTS,
|
||||
#else
|
||||
NULL, NULL,
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
|
|
|
@ -1541,5 +1541,8 @@
|
|||
popl %ebx
|
||||
popl %esi
|
||||
popl %edi
|
||||
/*remove the hidden return value address from the stack.*/
|
||||
popl %ecx
|
||||
xchgl %ecx, 0(%esp)
|
||||
ret
|
||||
EPILOGUE
|
||||
|
|
|
@ -1286,6 +1286,10 @@
|
|||
haddps %xmm0, %xmm0
|
||||
#endif
|
||||
|
||||
#ifdef DSDOT
|
||||
cvtss2sd %xmm0, %xmm0
|
||||
#endif
|
||||
|
||||
RESTOREREGISTERS
|
||||
|
||||
ret
|
||||
|
|
|
@ -544,7 +544,7 @@
|
|||
jg .L11
|
||||
|
||||
#if defined(TRMMKERNEL) && !defined(LEFT)
|
||||
addq $1, KK
|
||||
addq $4, KK
|
||||
#endif
|
||||
|
||||
leaq (C, LDC, 4), C
|
||||
|
@ -594,7 +594,7 @@
|
|||
jg .L11
|
||||
|
||||
#if defined(TRMMKERNEL) && !defined(LEFT)
|
||||
addq $1, KK
|
||||
addq $4, KK
|
||||
#endif
|
||||
|
||||
leaq (C, LDC, 4), C
|
||||
|
|
|
@ -0,0 +1,21 @@
|
|||
/*This is only for "make install" target.*/
|
||||
|
||||
#ifdef NEEDBUNDERSCORE
|
||||
#define BLASFUNC(FUNC) FUNC##_
|
||||
#else
|
||||
#define BLASFUNC(FUNC) FUNC
|
||||
#endif
|
||||
|
||||
#if defined(OS_WINDOWS) && defined(__64BIT__)
|
||||
typedef long long BLASLONG;
|
||||
typedef unsigned long long BLASULONG;
|
||||
#else
|
||||
typedef long BLASLONG;
|
||||
typedef unsigned long BLASULONG;
|
||||
#endif
|
||||
|
||||
#ifdef USE64BITINT
|
||||
typedef BLASLONG blasint;
|
||||
#else
|
||||
typedef int blasint;
|
||||
#endif
|
22
param.h
22
param.h
|
@ -1480,27 +1480,29 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#define GEMM_DEFAULT_OFFSET_B 0
|
||||
#define GEMM_DEFAULT_ALIGN 0x03fffUL
|
||||
|
||||
#define SGEMM_DEFAULT_UNROLL_M 2
|
||||
#define SGEMM_DEFAULT_UNROLL_N 8
|
||||
#define DGEMM_DEFAULT_UNROLL_M 2
|
||||
#define DGEMM_DEFAULT_UNROLL_N 8
|
||||
#define SGEMM_DEFAULT_UNROLL_M 4
|
||||
#define SGEMM_DEFAULT_UNROLL_N 4
|
||||
|
||||
#define DGEMM_DEFAULT_UNROLL_M 4
|
||||
#define DGEMM_DEFAULT_UNROLL_N 4
|
||||
|
||||
#define CGEMM_DEFAULT_UNROLL_M 1
|
||||
#define CGEMM_DEFAULT_UNROLL_N 4
|
||||
#define ZGEMM_DEFAULT_UNROLL_M 1
|
||||
#define ZGEMM_DEFAULT_UNROLL_N 4
|
||||
|
||||
#define SGEMM_DEFAULT_P 108
|
||||
#define DGEMM_DEFAULT_P 112
|
||||
#define SGEMM_DEFAULT_P 32
|
||||
#define DGEMM_DEFAULT_P 32
|
||||
#define CGEMM_DEFAULT_P 108
|
||||
#define ZGEMM_DEFAULT_P 112
|
||||
|
||||
#define SGEMM_DEFAULT_Q 288
|
||||
#define DGEMM_DEFAULT_Q 144
|
||||
#define SGEMM_DEFAULT_Q 116
|
||||
#define DGEMM_DEFAULT_Q 116
|
||||
#define CGEMM_DEFAULT_Q 144
|
||||
#define ZGEMM_DEFAULT_Q 72
|
||||
|
||||
#define SGEMM_DEFAULT_R 2000
|
||||
#define DGEMM_DEFAULT_R 2000
|
||||
#define SGEMM_DEFAULT_R 1000
|
||||
#define DGEMM_DEFAULT_R 1000
|
||||
#define CGEMM_DEFAULT_R 2000
|
||||
#define ZGEMM_DEFAULT_R 2000
|
||||
|
||||
|
|
|
@ -5,12 +5,12 @@ include $(TOPDIR)/Makefile.system
|
|||
TARGET=openblas_utest
|
||||
CUNIT_LIB=/usr/local/lib/libcunit.a
|
||||
|
||||
OBJS=main.o test_rot.o test_swap.o test_axpy.o test_dotu.o
|
||||
OBJS=main.o test_rot.o test_swap.o test_axpy.o test_dotu.o test_rotmg.o test_dsdot.o
|
||||
|
||||
all : run_test
|
||||
|
||||
$(TARGET): $(OBJS)
|
||||
$(CC) -o $@ $^ ../$(LIBNAME) $(CUNIT_LIB) $(EXTRALIB)
|
||||
$(FC) -o $@ $^ ../$(LIBNAME) $(CUNIT_LIB) $(EXTRALIB)
|
||||
|
||||
run_test: $(TARGET)
|
||||
./$(TARGET)
|
||||
|
|
|
@ -57,4 +57,8 @@ void test_caxpy_inc_0(void);
|
|||
void test_zdotu_n_1(void);
|
||||
void test_zdotu_offset_1(void);
|
||||
|
||||
void test_drotmg(void);
|
||||
|
||||
void test_dsdot_n_1(void);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -54,7 +54,10 @@ CU_TestInfo test_level1[]={
|
|||
|
||||
{"Testing zdotu with n == 1",test_zdotu_n_1},
|
||||
{"Testing zdotu with input x & y offset == 1",test_zdotu_offset_1},
|
||||
|
||||
|
||||
{"Testing drotmg",test_drotmg},
|
||||
|
||||
{"Testing dsdot with n == 1",test_dsdot_n_1},
|
||||
CU_TEST_INFO_NULL,
|
||||
};
|
||||
|
||||
|
|
|
@ -0,0 +1,50 @@
|
|||
/*****************************************************************************
|
||||
Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the ISCAS nor the names of its contributors may
|
||||
be used to endorse or promote products derived from this software
|
||||
without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
**********************************************************************************/
|
||||
|
||||
#include "common_utest.h"
|
||||
|
||||
void test_dsdot_n_1()
|
||||
{
|
||||
float x= 0.172555164;
|
||||
float y= -0.0138700781;
|
||||
int incx=1;
|
||||
int incy=1;
|
||||
int n=1;
|
||||
|
||||
double res1=0.0f, res2=0.0f;
|
||||
|
||||
res1=BLASFUNC(dsdot)(&n, &x, &incx, &y, &incy);
|
||||
res2=BLASFUNC_REF(dsdot)(&n, &x, &incx, &y, &incy);
|
||||
|
||||
CU_ASSERT_DOUBLE_EQUAL(res1, res2, CHECK_EPS);
|
||||
|
||||
}
|
|
@ -0,0 +1,60 @@
|
|||
/*****************************************************************************
|
||||
Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the ISCAS nor the names of its contributors may
|
||||
be used to endorse or promote products derived from this software
|
||||
without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
**********************************************************************************/
|
||||
|
||||
#include "common_utest.h"
|
||||
|
||||
void test_drotmg()
|
||||
{
|
||||
double te_d1, tr_d1;
|
||||
double te_d2, tr_d2;
|
||||
double te_x1, tr_x1;
|
||||
double te_y1, tr_y1;
|
||||
double te_param[5],tr_param[5];
|
||||
int i=0;
|
||||
te_d1= tr_d1=0.21149573940783739;
|
||||
te_d2= tr_d2=0.046892057172954082;
|
||||
te_x1= tr_x1=-0.42272687517106533;
|
||||
te_y1= tr_y1=0.42211309121921659;
|
||||
//OpenBLAS
|
||||
BLASFUNC(drotmg)(&te_d1, &te_d2, &te_x1, &te_y1, te_param);
|
||||
//reference
|
||||
BLASFUNC_REF(drotmg)(&tr_d1, &tr_d2, &tr_x1, &tr_y1, tr_param);
|
||||
|
||||
CU_ASSERT_DOUBLE_EQUAL(te_d1, tr_d1, CHECK_EPS);
|
||||
CU_ASSERT_DOUBLE_EQUAL(te_d2, tr_d2, CHECK_EPS);
|
||||
CU_ASSERT_DOUBLE_EQUAL(te_x1, tr_x1, CHECK_EPS);
|
||||
CU_ASSERT_DOUBLE_EQUAL(te_y1, tr_y1, CHECK_EPS);
|
||||
|
||||
for(i=0; i<5; i++){
|
||||
CU_ASSERT_DOUBLE_EQUAL(te_param[i], tr_param[i], CHECK_EPS);
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue