diff --git a/.gitignore b/.gitignore index 44af57166..6cfc5b3c1 100644 --- a/.gitignore +++ b/.gitignore @@ -1,8 +1,13 @@ +*.obj +*.lib +*.dll +*.def *.o lapack-3.1.1 lapack-3.1.1.tgz *.so *.a +.svn *~ config.h Makefile.conf diff --git a/Changelog.txt b/Changelog.txt index ab8fb19c2..b54949ec5 100644 --- a/Changelog.txt +++ b/Changelog.txt @@ -1,13 +1,40 @@ OpenBLAS ChangeLog ==================================================================== -Version 0.1 alpha2(in development) +Version 0.1 alpha2 +23-Jun-2011 common: - * + * Fixed blasint undefined bug in file. Other software + could include this header successfully(Refs issue #13 on github) + * Fixed the SEGFAULT bug on 64 cores. On SMP server, the number + of CPUs or cores should be less than or equal to 64.(Refs issue #14 + on github) + * Support "void goto_set_num_threads(int num_threads)" and "void + openblas_set_num_threads(int num_threads)" when USE_OPENMP=1 + * Added extern "C" to support C++. Thank Tasio for the patch(Refs + issue #21 on github) + * Provided an error message when the arch is not supported.(Refs + issue #19 on github) + * Fixed issue #23. Fixed a bug of f_check script about generating link flags. + * Added openblas_set_num_threads for Fortran. + * Fixed #25 a wrong result of rotmg. + * Fixed a bug about detecting underscore prefix in c_check. + * Print the wall time (cycles) with enabling FUNCTION_PROFILE + * Fixed #35 a build bug with NO_LAPACK=1 & DYNAMIC_ARCH=1 + * Added install target. You can use "make install". (Refs #20) + + x86/x86_64: - * + * Fixed #28 a wrong result of dsdot on x86_64. + * Fixed #32 a SEGFAULT bug of zdotc with gcc-4.6. + * Fixed #33 ztrmm bug on Nehalem. + * Walk round #27 the low performance axpy issue with small imput size & multithreads. + MIPS64: - * + * Fixed #28 a wrong result of dsdot on Loongson3A/MIPS64. + * Optimized single/double precision BLAS Level3 on Loongson3A/MIPS64. (Refs #2) + * Optimized single/double precision axpy function on Loongson3A/MIPS64. (Refs #3) + ==================================================================== Version 0.1 alpha1 20-Mar-2011 diff --git a/Makefile b/Makefile index 52f649f77..798c56192 100644 --- a/Makefile +++ b/Makefile @@ -15,6 +15,10 @@ ifdef SANITY_CHECK BLASDIRS += reference endif +ifndef PREFIX +PREFIX = /opt/OpenBLAS +endif + SUBDIRS = $(BLASDIRS) ifneq ($(NO_LAPACK), 1) SUBDIRS += lapack @@ -22,8 +26,8 @@ endif SUBDIRS_ALL = $(SUBDIRS) test ctest utest exports benchmark ../laswp ../bench -.PHONY : all libs netlib test ctest shared -.NOTPARALLEL : all libs prof lapack-test +.PHONY : all libs netlib test ctest shared install +.NOTPARALLEL : all libs prof lapack-test install all :: libs netlib tests shared @echo @@ -70,7 +74,7 @@ ifeq ($(OSNAME), Darwin) endif ifeq ($(OSNAME), WINNT) $(MAKE) -C exports dll -# -ln -fs $(LIBDLLNAME) libopenblas.dll + -ln -fs $(LIBDLLNAME) libopenblas.dll endif ifeq ($(OSNAME), CYGWIN_NT) $(MAKE) -C exports dll @@ -96,18 +100,26 @@ endif endif libs : +ifeq ($(CORE), UNKOWN) + $(error OpenBLAS: Detecting CPU failed. Please set TARGET explicitly, e.g. make TARGET=your_cpu_target. Please read README for the detail.) +endif -ln -fs $(LIBNAME) libopenblas.$(LIBSUFFIX) for d in $(SUBDIRS) ; \ do if test -d $$d; then \ $(MAKE) -C $$d $(@F) || exit 1 ; \ fi; \ done +#Save the config files for installation + cp Makefile.conf Makefile.conf_last + cp config.h config_last.h ifdef DYNAMIC_ARCH $(MAKE) -C kernel commonlibs || exit 1 for d in $(DYNAMIC_CORE) ; \ do $(MAKE) GOTOBLAS_MAKEFILE= -C kernel TARGET_CORE=$$d kernel || exit 1 ;\ done + echo DYNAMIC_ARCH=1 >> Makefile.conf_last endif + touch lib.grd prof : prof_blas prof_lapack @@ -227,19 +239,23 @@ lapack-test : dummy : +install : + $(MAKE) -f Makefile.install install + clean :: @for d in $(SUBDIRS_ALL) ; \ do if test -d $$d; then \ $(MAKE) -C $$d $(@F) || exit 1 ; \ fi; \ done -ifdef DYNAMIC_ARCH +#ifdef DYNAMIC_ARCH @$(MAKE) -C kernel clean -endif +#endif @rm -f *.$(LIBSUFFIX) *.so *~ *.exe getarch getarch_2nd *.dll *.lib *.$(SUFFIX) *.dwf libopenblas.$(LIBSUFFIX) libopenblas_p.$(LIBSUFFIX) *.lnk myconfig.h @rm -f Makefile.conf config.h Makefile_kernel.conf config_kernel.h st* *.dylib @if test -d lapack-3.1.1; then \ echo deleting lapack-3.1.1; \ rm -rf lapack-3.1.1 ;\ fi + @rm -f *.grd Makefile.conf_last config_last.h @echo Done. \ No newline at end of file diff --git a/Makefile.install b/Makefile.install new file mode 100644 index 000000000..80dafc9c6 --- /dev/null +++ b/Makefile.install @@ -0,0 +1,65 @@ +TOPDIR = . +export GOTOBLAS_MAKEFILE = 1 +-include $(TOPDIR)/Makefile.conf_last +include ./Makefile.system + +.PHONY : install +.NOTPARALLEL : install + +lib.grd : + $(error OpenBLAS: Please run "make" firstly) + +install : lib.grd + @-mkdir -p $(PREFIX) + @echo Generating openblas_config.h in $(PREFIX) +#for inc + @echo \#ifndef OPENBLAS_CONFIG_H > $(PREFIX)/openblas_config.h + @echo \#define OPENBLAS_CONFIG_H >> $(PREFIX)/openblas_config.h + @cat config_last.h >> $(PREFIX)/openblas_config.h + @echo \#define VERSION \" OpenBLAS $(VERSION) \" >> $(PREFIX)/openblas_config.h + @cat openblas_config_template.h >> $(PREFIX)/openblas_config.h + @echo \#endif >> $(PREFIX)/openblas_config.h + + @echo Generating f77blas.h in $(PREFIX) + @echo \#ifndef OPENBLAS_F77BLAS_H > $(PREFIX)/f77blas.h + @echo \#define OPENBLAS_F77BLAS_H >> $(PREFIX)/f77blas.h + @echo \#include \"openblas_config.h\" >> $(PREFIX)/f77blas.h + @cat common_interface.h >> $(PREFIX)/f77blas.h + @echo \#endif >> $(PREFIX)/f77blas.h + + @echo Generating cblas.h in $(PREFIX) + @sed 's/common/openblas_config/g' cblas.h > $(PREFIX)/cblas.h + +#for install static library + @echo Copy the static library to $(PREFIX) + @cp $(LIBNAME) $(PREFIX) + @-ln -fs $(PREFIX)/$(LIBNAME) $(PREFIX)/libopenblas.$(LIBSUFFIX) +#for install shared library + @echo Copy the shared library to $(PREFIX) +ifeq ($(OSNAME), Linux) + -cp $(LIBSONAME) $(PREFIX) + -ln -fs $(PREFIX)/$(LIBSONAME) $(PREFIX)/libopenblas.so +endif +ifeq ($(OSNAME), FreeBSD) + -cp $(LIBSONAME) $(PREFIX) + -ln -fs $(PREFIX)/$(LIBSONAME) $(PREFIX)/libopenblas.so +endif +ifeq ($(OSNAME), NetBSD) + -cp $(LIBSONAME) $(PREFIX) + -ln -fs $(PREFIX)/$(LIBSONAME) $(PREFIX)/libopenblas.so +endif +ifeq ($(OSNAME), Darwin) + -cp $(LIBDYNNAME) $(PREFIX) + -ln -fs $(PREFIX)/$(LIBDYNNAME) $(PREFIX)/libopenblas.dylib +endif +ifeq ($(OSNAME), WINNT) + -cp $(LIBDLLNAME) $(PREFIX) + -ln -fs $(PREFIX)/$(LIBDLLNAME) $(PREFIX)/libopenblas.dll +endif +ifeq ($(OSNAME), CYGWIN_NT) + -cp $(LIBDLLNAME) $(PREFIX) + -ln -fs $(PREFIX)/$(LIBDLLNAME) $(PREFIX)/libopenblas.dll +endif + + @echo Install OK! + diff --git a/Makefile.rule b/Makefile.rule index 61f9eb91d..88d552495 100644 --- a/Makefile.rule +++ b/Makefile.rule @@ -91,6 +91,9 @@ VERSION = 0.1alpha2 # SANITY_CHECK to compare the result with reference BLAS. # UTEST_CHECK = 1 +# The installation directory. +# PREFIX = /opt/OpenBLAS + # Common Optimization Flag; -O2 is enough. # DEBUG = 1 diff --git a/Makefile.system b/Makefile.system index 5a129732f..6cf65c7bd 100644 --- a/Makefile.system +++ b/Makefile.system @@ -30,6 +30,10 @@ ifdef TARGET GETARCH_FLAGS += -DFORCE_$(TARGET) endif +ifdef INTERFACE64 +GETARCH_FLAGS += -DUSE64BITINT +endif + # This operation is expensive, so execution should be once. ifndef GOTOBLAS_MAKEFILE export GOTOBLAS_MAKEFILE = 1 @@ -185,7 +189,7 @@ ifeq ($(C_COMPILER), INTEL) CCOMMON_OPT += -wd981 endif -ifdef USE_OPENMP +ifeq ($(USE_OPENMP), 1) ifeq ($(C_COMPILER), GCC) CCOMMON_OPT += -fopenmp endif @@ -489,7 +493,8 @@ endif ifdef BINARY64 ifdef INTERFACE64 -CCOMMON_OPT += -DUSE64BITINT +CCOMMON_OPT += +#-DUSE64BITINT endif endif @@ -510,6 +515,10 @@ ifeq ($(DYNAMIC_ARCH), 1) CCOMMON_OPT += -DDYNAMIC_ARCH endif +ifeq ($(NO_LAPACK), 1) +CCOMMON_OPT += -DNO_LAPACK +endif + ifdef SMP CCOMMON_OPT += -DSMP_SERVER diff --git a/README b/README index 9b04f6f99..9a7b16326 100644 --- a/README +++ b/README @@ -8,7 +8,9 @@ Download from project homepage. http://xianyi.github.com/OpenBLAS/ Or, check out codes from git://github.com/xianyi/OpenBLAS.git 1)Normal compile -Please read GotoBLAS_02QuickInstall.txt or type "make" + (a) type "make" to detect the CPU automatically. + or + (b) type "make TARGET=xxx" to set target CPU, e.g. "make TARGET=NEHALEM". The full target list is in file TargetList.txt. 2)Cross compile Please set CC and FC with the cross toolchains. Then, set HOSTCC with your host C compiler. At last, set TARGET explicitly. @@ -20,6 +22,11 @@ make BINARY=64 CC=mips64el-unknown-linux-gnu-gcc FC=mips64el-unknown-linux-gnu-g 3)Debug version make DEBUG=1 +4)Intall to the directory (Optional) +e.g. +make install PREFIX=your_installation_directory +The default directory is /opt/OpenBLAS + 3.Support CPU & OS Please read GotoBLAS_01Readme.txt @@ -39,13 +46,17 @@ export GOTO_NUM_THREADS=4 or export OMP_NUM_THREADS=4 -The priorities are OPENBLAS_NUM_THREAD > GOTO_NUM_THREADS > OMP_NUM_THREADS. +The priorities are OPENBLAS_NUM_THREADS > GOTO_NUM_THREADS > OMP_NUM_THREADS. + +If you compile this lib with USE_OPENMP=1, you should only set OMP_NUM_THREADS environment variable. 4.2 Set the number of threads with calling functions. for example, void goto_set_num_threads(int num_threads); or void openblas_set_num_threads(int num_threads); +If you compile this lib with USE_OPENMP=1, you should use the above functions, too. + 5.Report Bugs Please add a issue in https://github.com/xianyi/OpenBLAS/issues @@ -56,4 +67,17 @@ Optimization on ICT Loongson 3A CPU OpenBLAS users mailing list: http://list.rdcps.ac.cn/mailman/listinfo/openblas 8.ChangeLog -Please see Changelog.txt to obtain the differences between GotoBLAS2 1.13 BSD version. \ No newline at end of file +Please see Changelog.txt to obtain the differences between GotoBLAS2 1.13 BSD version. + +9.Known Issues +* The number of CPUs/Cores should less than or equal to 8*sizeof(unsigned long). On 64 bits, the limit + is 64. On 32 bits, it is 32. +* This library is not compatible with EKOPath Compiler Suite 4.0.10 (http://www.pathscale.com/ekopath-compiler-suite). However, Path64 (https://github.com/path64/compiler) could compile the codes successfully. + +10. Specification of Git Branches +We used the git branching model in this article (http://nvie.com/posts/a-successful-git-branching-model/). +Now, there are 4 branches in github.com. + * The master branch. This a main branch to reflect a production-ready state. + * The develop branch. This a main branch to reflect a state with the latest delivered development changes for the next release. + * The loongson3a branch. This is a feature branch. We develop Loongson3A codes on this branch. We will merge this feature to develop branch in future. + * The gh-pages branch. This is for web pages diff --git a/TargetList.txt b/TargetList.txt new file mode 100644 index 000000000..1c3d7c5b9 --- /dev/null +++ b/TargetList.txt @@ -0,0 +1,57 @@ +Force Target Examples: + +make TARGET=NEHALEM +make TARGET=LOONGSON3A BINARY=64 +make TARGET=ISTANBUL + +Supported List: +1.X86/X86_64 +a)Intel CPU: +P2 +COPPERMINE +KATMAI +NORTHWOOD +PRESCOTT +BANIAS +YONAH +CORE2 +PENRYN +DUNNINGTON +NEHALEM +ATOM + +b)AMD CPU: +ATHLON +OPTERON +OPTERON_SSE3 +BARCELONA +SHANGHAI +ISTANBUL + +c)VIA CPU: +SSE_GENERIC +VIAC3 +NANO + +2.Power CPU: +POWER4 +POWER5 +POWER6 +PPCG4 +PPC970 +PPC970MP +PPC440 +PPC440FP2 +CELL + +3.MIPS64 CPU: +SICORTEX +LOONGSON3A + +4.IA64 CPU: +ITANIUM2 + +5.SPARC CPU: +SPARC +SPARCV7 + diff --git a/c_check b/c_check index d8025f9f3..263efeb3d 100644 --- a/c_check +++ b/c_check @@ -149,7 +149,7 @@ $binformat = bin64 if ($data =~ /BINARY_64/); $data = `$compiler_name -S ctest1.c && grep globl ctest1.s | head -n 1 && rm -f ctest1.s`; -$data =~ /globl\ ([_\.]*)(.*)/; +$data =~ /globl\s([_\.]*)(.*)/; $need_fu = $1; diff --git a/cblas.h b/cblas.h index ea0fbb629..34adc5e99 100644 --- a/cblas.h +++ b/cblas.h @@ -1,6 +1,14 @@ #ifndef CBLAS_H #define CBLAS_H +#ifdef __cplusplus +extern "C" { + /* Assume C declarations for C++ */ +#endif /* __cplusplus */ + +#include +#include "common.h" + #define CBLAS_INDEX size_t enum CBLAS_ORDER {CblasRowMajor=101, CblasColMajor=102}; @@ -270,4 +278,10 @@ void cblas_zher2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANS double *alpha, double *A, blasint lda, double *B, blasint ldb, double beta, double *C, blasint ldc); void cblas_xerbla(blasint p, char *rout, char *form, ...); + +#ifdef __cplusplus +} + +#endif /* __cplusplus */ + #endif diff --git a/common.h b/common.h index a481b2acb..1a7dd434a 100644 --- a/common.h +++ b/common.h @@ -39,6 +39,11 @@ #ifndef COMMON_H #define COMMON_H +#ifdef __cplusplus +extern "C" { + /* Assume C declarations for C++ */ +#endif /* __cplusplus */ + #ifndef _GNU_SOURCE #define _GNU_SOURCE #endif @@ -607,4 +612,9 @@ extern int gotoblas_profile; #define PRINT_DEBUG_NAME if (readenv("GOTO_DEBUG")) fprintf(stderr, "GotoBLAS : %s\n", CHAR_NAME) #endif +#ifdef __cplusplus +} + +#endif /* __cplusplus */ + #endif diff --git a/common_reference.h b/common_reference.h index 04b11f80f..4cc4be4fd 100644 --- a/common_reference.h +++ b/common_reference.h @@ -60,4 +60,8 @@ float _Complex BLASFUNC_REF(cdotc) (blasint *, float *, blasint *, float *, double _Complex BLASFUNC_REF(zdotu) (blasint *, double *, blasint *, double *, blasint *); double _Complex BLASFUNC_REF(zdotc) (blasint *, double *, blasint *, double *, blasint *); +void BLASFUNC_REF(drotmg)(double *, double *, double *, double *, double *); + +double BLASFUNC_REF(dsdot)(blasint *, float *, blasint *, float *, blasint*); + #endif diff --git a/cpuid_x86.c b/cpuid_x86.c index 0d091b37c..6e3e74f82 100644 --- a/cpuid_x86.c +++ b/cpuid_x86.c @@ -1302,24 +1302,25 @@ int get_coretype(void){ case 13: return CORE_DUNNINGTON; } - break; - case 2: - switch (model) { - case 5: - //Intel Core (Clarkdale) / Core (Arrandale) - // Pentium (Clarkdale) / Pentium Mobile (Arrandale) - // Xeon (Clarkdale), 32nm - return CORE_NEHALEM; - case 12: - //Xeon Processor 5600 (Westmere-EP) - return CORE_NEHALEM; - } - break; - + break; + case 2: + switch (model) { + case 5: + //Intel Core (Clarkdale) / Core (Arrandale) + // Pentium (Clarkdale) / Pentium Mobile (Arrandale) + // Xeon (Clarkdale), 32nm + return CORE_NEHALEM; + case 12: + //Xeon Processor 5600 (Westmere-EP) + return CORE_NEHALEM; + } + break; } + break; + case 15: - if (model <= 0x2) return CORE_NORTHWOOD; - return CORE_PRESCOTT; + if (model <= 0x2) return CORE_NORTHWOOD; + else return CORE_PRESCOTT; } } diff --git a/driver/others/Makefile b/driver/others/Makefile index bc5de3848..75b552b65 100644 --- a/driver/others/Makefile +++ b/driver/others/Makefile @@ -6,7 +6,7 @@ COMMONOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) c_abs.$(SUFFIX) z_abs.$(SUFFIX) COMMONOBJS += slamch.$(SUFFIX) slamc3.$(SUFFIX) dlamch.$(SUFFIX) dlamc3.$(SUFFIX) ifdef SMP -COMMONOBJS += blas_server.$(SUFFIX) divtable.$(SUFFIX) blasL1thread.$(SUFFIX) +COMMONOBJS += blas_server.$(SUFFIX) divtable.$(SUFFIX) blasL1thread.$(SUFFIX) openblas_set_num_threads.$(SUFFIX) ifndef NO_AFFINITY COMMONOBJS += init.$(SUFFIX) endif @@ -100,6 +100,9 @@ memory.$(SUFFIX) : $(MEMORY) ../../common.h ../../param.h blas_server.$(SUFFIX) : $(BLAS_SERVER) ../../common.h ../../common_thread.h ../../param.h $(CC) $(CFLAGS) -c $< -o $(@F) +openblas_set_num_threads.$(SUFFIX) : openblas_set_num_threads.c + $(CC) $(CFLAGS) -c $< -o $(@F) + blasL1thread.$(SUFFIX) : blas_l1_thread.c ../../common.h ../../common_thread.h $(CC) $(CFLAGS) -c $< -o $(@F) diff --git a/driver/others/blas_server_omp.c b/driver/others/blas_server_omp.c index 3e70d8549..4fd4cd440 100644 --- a/driver/others/blas_server_omp.c +++ b/driver/others/blas_server_omp.c @@ -38,7 +38,7 @@ #include #include -#include +//#include #include "common.h" #ifndef USE_OPENMP @@ -49,6 +49,26 @@ int blas_server_avail = 0; +void goto_set_num_threads(int num_threads) { + + if (num_threads < 1) num_threads = blas_num_threads; + + if (num_threads > MAX_CPU_NUMBER) num_threads = MAX_CPU_NUMBER; + + if (num_threads > blas_num_threads) { + blas_num_threads = num_threads; + } + + blas_cpu_number = num_threads; + + omp_set_num_threads(blas_cpu_number); + +} +void openblas_set_num_threads(int num_threads) { + + goto_set_num_threads(num_threads); +} + int blas_thread_init(void){ blas_get_cpu_number(); diff --git a/driver/others/init.c b/driver/others/init.c index 7ee7dc45d..4adba661f 100644 --- a/driver/others/init.c +++ b/driver/others/init.c @@ -172,13 +172,20 @@ static inline int rcount(unsigned long number) { return count; } +/*** + Known issue: The number of CPUs/cores should less + than sizeof(unsigned long). On 64 bits, the limit + is 64. On 32 bits, it is 32. +***/ static inline unsigned long get_cpumap(int node) { int infile; unsigned long affinity; char name[160]; + char cpumap[160]; char *p, *dummy; - + int i=0; + sprintf(name, CPUMAP_NAME, node); infile = open(name, O_RDONLY); @@ -187,13 +194,19 @@ static inline unsigned long get_cpumap(int node) { if (infile != -1) { - read(infile, name, sizeof(name)); - + read(infile, cpumap, sizeof(cpumap)); + p = cpumap; + while (*p != '\n' && i<160){ + if(*p != ',') { + name[i++]=*p; + } + p++; + } p = name; - while ((*p == '0') || (*p == ',')) p++; + // while ((*p == '0') || (*p == ',')) p++; - affinity = strtol(p, &dummy, 16); + affinity = strtoul(p, &dummy, 16); close(infile); } @@ -347,7 +360,13 @@ static void disable_hyperthread(void) { unsigned long share; int cpu; - common -> avail = (1UL << common -> num_procs) - 1; + if(common->num_procs > 64){ + fprintf(stderr, "\nOpenBLAS Warining : The number of CPU/Cores(%d) is beyond the limit(64). Terminated.\n", common->num_procs); + exit(1); + }else if(common->num_procs == 64){ + common -> avail = 0xFFFFFFFFFFFFFFFFUL; + }else + common -> avail = (1UL << common -> num_procs) - 1; #ifdef DEBUG fprintf(stderr, "\nAvail CPUs : %04lx.\n", common -> avail); @@ -376,7 +395,13 @@ static void disable_affinity(void) { fprintf(stderr, "CPU mask : %04lx.\n\n", *(unsigned long *)&cpu_orig_mask[0]); #endif - lprocmask = (1UL << common -> final_num_procs) - 1; + if(common->final_num_procs > 64){ + fprintf(stderr, "\nOpenBLAS Warining : The number of CPU/Cores(%d) is beyond the limit(64). Terminated.\n", common->final_num_procs); + exit(1); + }else if(common->final_num_procs == 64){ + lprocmask = 0xFFFFFFFFFFFFFFFFUL; + }else + lprocmask = (1UL << common -> final_num_procs) - 1; #ifndef USE_OPENMP lprocmask &= *(unsigned long *)&cpu_orig_mask[0]; diff --git a/driver/others/openblas_set_num_threads.c b/driver/others/openblas_set_num_threads.c new file mode 100644 index 000000000..7ca3b7114 --- /dev/null +++ b/driver/others/openblas_set_num_threads.c @@ -0,0 +1,45 @@ +/***************************************************************************** +Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + 3. Neither the name of the ISCAS nor the names of its contributors may + be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +**********************************************************************************/ + +#include "common.h" + +#ifdef SMP_SERVER +#ifdef OS_LINUX + +extern void openblas_set_num_threads(int num_threads) ; + +void NAME(int* num_threads){ + openblas_set_num_threads(*num_threads); +} + +#endif +#endif diff --git a/driver/others/profile.c b/driver/others/profile.c index f65550c9f..f464c0b6a 100644 --- a/driver/others/profile.c +++ b/driver/others/profile.c @@ -74,20 +74,21 @@ void gotoblas_profile_quit(void) { if (cycles > 0) { fprintf(stderr, "\n\t====== BLAS Profiling Result =======\n\n"); - fprintf(stderr, " Function No. of Calls Time Consumption Efficiency Bytes/cycle\n"); + fprintf(stderr, " Function No. of Calls Time Consumption Efficiency Bytes/cycle Wall Time(Cycles)\n"); for (i = 0; i < MAX_PROF_TABLE; i ++) { if (function_profile_table[i].calls) { #ifndef OS_WINDOWS - fprintf(stderr, "%-12s : %10Ld %8.2f%% %10.3f%% %8.2f\n", + fprintf(stderr, "%-12s : %10Ld %8.2f%% %10.3f%% %8.2f %Ld\n", #else - fprintf(stderr, "%-12s : %10lld %8.2f%% %10.3f%% %8.2f\n", + fprintf(stderr, "%-12s : %10lld %8.2f%% %10.3f%% %8.2f %lld\n", #endif func_table[i], function_profile_table[i].calls, (double)function_profile_table[i].cycles / (double)cycles * 100., (double)function_profile_table[i].fops / (double)function_profile_table[i].tcycles * 100., - (double)function_profile_table[i].area / (double)function_profile_table[i].cycles + (double)function_profile_table[i].area / (double)function_profile_table[i].cycles, + function_profile_table[i].cycles ); } } diff --git a/exports/Makefile b/exports/Makefile index 24cdc41c8..f4c9314f9 100644 --- a/exports/Makefile +++ b/exports/Makefile @@ -53,18 +53,19 @@ dyn : $(LIBDYNNAME) zip : dll zip $(LIBZIPNAME) $(LIBDLLNAME) $(LIBNAME) -dll : libgoto2.dll +dll : ../$(LIBDLLNAME) +#libgoto2.dll dll2 : libgoto2_shared.dll -libgoto2.dll : ../$(LIBNAME) libgoto2.def dllinit.$(SUFFIX) +../$(LIBDLLNAME) : ../$(LIBNAME) libgoto2.def dllinit.$(SUFFIX) $(RANLIB) ../$(LIBNAME) ifeq ($(BINARY32), 1) - $(DLLWRAP) -o $(@F) --def libgoto2.def \ + $(DLLWRAP) -o ../$(LIBDLLNAME) --def libgoto2.def \ --entry _dllinit@12 -s dllinit.$(SUFFIX) --dllname $(@F) ../$(LIBNAME) $(FEXTRALIB) -lib /machine:i386 /def:libgoto2.def else - $(DLLWRAP) -o $(@F) --def libgoto2.def \ + $(DLLWRAP) -o ../$(LIBDLLNAME) --def libgoto2.def \ --entry _dllinit -s dllinit.$(SUFFIX) --dllname $(@F) ../$(LIBNAME) $(FEXTRALIB) -lib /machine:X64 /def:libgoto2.def endif @@ -84,7 +85,7 @@ libgoto_hpl.def : gensymbol perl ./gensymbol win2khpl $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) > $(@F) $(LIBDYNNAME) : ../$(LIBNAME) osx.def - $(PREFIX)gcc $(CFLAGS) -all_load -dynamiclib -o $(LIBDYNNAME) $< -Wl,-exported_symbols_list,osx.def $(FEXTRALIB) + $(PREFIX)gcc $(CFLAGS) -all_load -dynamiclib -o ../$(LIBDYNNAME) $< -Wl,-exported_symbols_list,osx.def $(FEXTRALIB) symbol.$(SUFFIX) : symbol.S $(CC) $(CFLAGS) -c -o $(@F) $^ diff --git a/f_check b/f_check index 26c57bcc9..45a946eb6 100644 --- a/f_check +++ b/f_check @@ -274,6 +274,7 @@ if ($link ne "") { && ($flags !~ /kernel32/) && ($flags !~ /advapi32/) && ($flags !~ /shell32/) + && ($flags !~ /^\-l$/) ) { $linker_l .= $flags . " "; } diff --git a/getarch.c b/getarch.c index 1d9bbc175..8864753b7 100644 --- a/getarch.c +++ b/getarch.c @@ -604,30 +604,41 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef POWER #define POWER #endif +#define OPENBLAS_SUPPORTED #endif #if defined(__i386__) || (__x86_64__) #include "cpuid_x86.c" +#define OPENBLAS_SUPPORTED #endif #ifdef __ia64__ #include "cpuid_ia64.c" +#define OPENBLAS_SUPPORTED #endif #ifdef __alpha #include "cpuid_alpha.c" +#define OPENBLAS_SUPPORTED #endif #ifdef POWER #include "cpuid_power.c" +#define OPENBLAS_SUPPORTED #endif #ifdef sparc #include "cpuid_sparc.c" +#define OPENBLAS_SUPPORTED #endif #ifdef __mips__ #include "cpuid_mips.c" +#define OPENBLAS_SUPPORTED +#endif + +#ifndef OPENBLAS_SUPPORTED +#error "This arch/CPU is not supported by OpenBLAS." #endif #else diff --git a/getarch_2nd.c b/getarch_2nd.c index 31babd28a..018f08d31 100644 --- a/getarch_2nd.c +++ b/getarch_2nd.c @@ -30,6 +30,10 @@ int main(int argc, char **argv) { printf("#define DLOCAL_BUFFER_SIZE\t%ld\n", (DGEMM_DEFAULT_Q * DGEMM_DEFAULT_UNROLL_N * 2 * 1 * sizeof(double))); printf("#define CLOCAL_BUFFER_SIZE\t%ld\n", (CGEMM_DEFAULT_Q * CGEMM_DEFAULT_UNROLL_N * 4 * 2 * sizeof(float))); printf("#define ZLOCAL_BUFFER_SIZE\t%ld\n", (ZGEMM_DEFAULT_Q * ZGEMM_DEFAULT_UNROLL_N * 2 * 2 * sizeof(double))); + +#ifdef USE64BITINT + printf("#define USE64BITINT\n"); +#endif } return 0; diff --git a/interface/axpy.c b/interface/axpy.c index dd75b758c..82b0ee234 100644 --- a/interface/axpy.c +++ b/interface/axpy.c @@ -85,7 +85,11 @@ void CNAME(blasint n, FLOAT alpha, FLOAT *x, blasint incx, FLOAT *y, blasint inc //In that case, the threads would be dependent. if (incx == 0 || incy == 0) nthreads = 1; - + + //Temporarily walk around the low performance issue with small imput size & multithreads. + if (n <= 10000) + nthreads = 1; + if (nthreads == 1) { #endif diff --git a/interface/create b/interface/create old mode 100644 new mode 100755 diff --git a/interface/dsdot.c b/interface/dsdot.c index 66f7917d5..94237e0c4 100644 --- a/interface/dsdot.c +++ b/interface/dsdot.c @@ -49,6 +49,7 @@ double NAME(blasint *N, float *x, blasint *INCX, float *y, blasint *INCY){ BLASLONG n = *N; BLASLONG incx = *INCX; BLASLONG incy = *INCY; + double ret = 0.0; PRINT_DEBUG_NAME; @@ -61,19 +62,21 @@ double NAME(blasint *N, float *x, blasint *INCX, float *y, blasint *INCY){ if (incx < 0) x -= (n - 1) * incx; if (incy < 0) y -= (n - 1) * incy; - return DSDOT_K(n, x, incx, y, incy); + ret=DSDOT_K(n, x, incx, y, incy); FUNCTION_PROFILE_END(1, n, n); IDEBUG_END; - return 0; + return ret; } #else double CNAME(blasint n, float *x, blasint incx, float *y, blasint incy){ + + double ret = 0.0; PRINT_DEBUG_CNAME; @@ -86,13 +89,13 @@ double CNAME(blasint n, float *x, blasint incx, float *y, blasint incy){ if (incx < 0) x -= (n - 1) * incx; if (incy < 0) y -= (n - 1) * incy; - return DSDOT_K(n, x, incx, y, incy); + ret=DSDOT_K(n, x, incx, y, incy); FUNCTION_PROFILE_END(1, n, n); IDEBUG_END; - return 0; + return ret; } diff --git a/interface/rotmg.c b/interface/rotmg.c index c37c09914..3db891714 100644 --- a/interface/rotmg.c +++ b/interface/rotmg.c @@ -7,6 +7,12 @@ #define GAMSQ 16777216.e0 #define RGAMSQ 5.9604645e-8 +#ifdef DOUBLE +#define ABS(x) fabs(x) +#else +#define ABS(x) fabsf(x) +#endif + #ifndef CBLAS void NAME(FLOAT *dd1, FLOAT *dd2, FLOAT *dx1, FLOAT *DY1, FLOAT *dparam){ @@ -47,7 +53,7 @@ void CNAME(FLOAT *dd1, FLOAT *dd2, FLOAT *dx1, FLOAT dy1, FLOAT *dparam){ dq2 = dp2 * dy1; dq1 = dp1 * *dx1; - if (! (abs(dq1) > abs(dq2))) goto L40; + if (! (ABS(dq1) > ABS(dq2))) goto L40; dh21 = -(dy1) / *dx1; dh12 = dp2 / dp1; @@ -140,7 +146,7 @@ L150: goto L130; L160: - if (! (abs(*dd2) <= RGAMSQ)) { + if (! (ABS(*dd2) <= RGAMSQ)) { goto L190; } if (*dd2 == ZERO) { @@ -157,7 +163,7 @@ L180: goto L160; L190: - if (! (abs(*dd2) >= GAMSQ)) { + if (! (ABS(*dd2) >= GAMSQ)) { goto L220; } igo = 3; diff --git a/kernel/Makefile b/kernel/Makefile index 6084cbc3f..aed145b60 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -53,6 +53,11 @@ SBLASOBJS += setparam$(TSUFFIX).$(SUFFIX) CCOMMON_OPT += -DTS=$(TSUFFIX) endif +KERNEL_INTERFACE = ../common_level1.h ../common_level2.h ../common_level3.h +ifneq ($(NO_LAPACK), 1) +KERNEL_INTERFACE += ../common_lapack.h +endif + ifeq ($(ARCH), x86) COMMONOBJS += cpuid.$(SUFFIX) endif @@ -88,9 +93,10 @@ setparam$(TSUFFIX).$(SUFFIX): setparam$(TSUFFIX).c kernel$(TSUFFIX).h setparam$(TSUFFIX).c : setparam-ref.c sed 's/TS/$(TSUFFIX)/g' $< > $(@F) -kernel$(TSUFFIX).h : ../common_level1.h ../common_level2.h ../common_level3.h ../common_lapack.h +kernel$(TSUFFIX).h : $(KERNEL_INTERFACE) sed 's/\ *(/$(TSUFFIX)(/g' $^ > $(@F) + cpuid.$(SUFFIX): $(KERNELDIR)/cpuid.S $(CC) -c $(CFLAGS) $< -o $(@F) @@ -112,10 +118,10 @@ lsame.$(PSUFFIX): $(KERNELDIR)/$(LSAME_KERNEL) cpuid.$(PSUFFIX): $(KERNELDIR)/cpuid.S $(CC) -c $(PFLAGS) $< -o $(@F) -ifdef DYNAMIC_ARCH +#ifdef DYNAMIC_ARCH clean :: @rm -f setparam_*.c kernel_*.h setparam.h kernel.h -endif +#endif include $(TOPDIR)/Makefile.tail diff --git a/kernel/Makefile.L1 b/kernel/Makefile.L1 index 317f14363..b08664a8e 100644 --- a/kernel/Makefile.L1 +++ b/kernel/Makefile.L1 @@ -668,7 +668,7 @@ $(KDIR)qdot_k$(TSUFFIX).$(SUFFIX) $(KDIR)qdot_k$(TPSUFFIX).$(PSUFFIX) : $(KERNEL $(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE $< -o $@ $(KDIR)dsdot_k$(TSUFFIX).$(SUFFIX) $(KDIR)dsdot_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SDOTKERNEL) - $(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE $< -o $@ + $(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DDSDOT $< -o $@ $(KDIR)sdot_k$(TSUFFIX).$(SUFFIX) $(KDIR)sdot_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SDOTKERNEL) $(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE $< -o $@ diff --git a/kernel/mips64/dot.S b/kernel/mips64/dot.S index b1f599172..6220b6ac9 100644 --- a/kernel/mips64/dot.S +++ b/kernel/mips64/dot.S @@ -300,7 +300,11 @@ .align 3 .L999: - j $31 ADD s1, s1, s2 - +#ifdef DSDOT + cvt.d.s s1, s1 +#endif + j $31 + NOP + EPILOGUE diff --git a/kernel/setparam-ref.c b/kernel/setparam-ref.c index 0ab57f3b3..d3734bbd9 100644 --- a/kernel/setparam-ref.c +++ b/kernel/setparam-ref.c @@ -101,7 +101,11 @@ gotoblas_t TABLE_NAME = { #endif ssymm_outcopyTS, ssymm_oltcopyTS, +#ifndef NO_LAPACK sneg_tcopyTS, slaswp_ncopyTS, +#else + NULL,NULL, +#endif 0, 0, 0, DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N, MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N), @@ -147,7 +151,11 @@ gotoblas_t TABLE_NAME = { #endif dsymm_outcopyTS, dsymm_oltcopyTS, +#ifndef NO_LAPACK dneg_tcopyTS, dlaswp_ncopyTS, +#else + NULL, NULL, +#endif #ifdef EXPRECISION @@ -195,7 +203,11 @@ gotoblas_t TABLE_NAME = { #endif qsymm_outcopyTS, qsymm_oltcopyTS, +#ifndef NO_LAPACK qneg_tcopyTS, qlaswp_ncopyTS, +#else + NULL, NULL, +#endif #endif @@ -286,7 +298,11 @@ gotoblas_t TABLE_NAME = { chemm3m_oucopyrTS, chemm3m_olcopyrTS, chemm3m_oucopyiTS, chemm3m_olcopyiTS, +#ifndef NO_LAPACK cneg_tcopyTS, claswp_ncopyTS, +#else + NULL, NULL, +#endif 0, 0, 0, ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N, MAX(ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N), @@ -375,7 +391,11 @@ gotoblas_t TABLE_NAME = { zhemm3m_oucopyrTS, zhemm3m_olcopyrTS, zhemm3m_oucopyiTS, zhemm3m_olcopyiTS, +#ifndef NO_LAPACK zneg_tcopyTS, zlaswp_ncopyTS, +#else + NULL, NULL, +#endif #ifdef EXPRECISION @@ -466,7 +486,11 @@ gotoblas_t TABLE_NAME = { xhemm3m_oucopyrTS, xhemm3m_olcopyrTS, xhemm3m_oucopyiTS, xhemm3m_olcopyiTS, +#ifndef NO_LAPACK xneg_tcopyTS, xlaswp_ncopyTS, +#else + NULL, NULL, +#endif #endif diff --git a/kernel/x86/zdot_sse2.S b/kernel/x86/zdot_sse2.S index 5aeefde31..2a174fb5d 100644 --- a/kernel/x86/zdot_sse2.S +++ b/kernel/x86/zdot_sse2.S @@ -1541,5 +1541,8 @@ popl %ebx popl %esi popl %edi +/*remove the hidden return value address from the stack.*/ + popl %ecx + xchgl %ecx, 0(%esp) ret EPILOGUE diff --git a/kernel/x86_64/dot_sse.S b/kernel/x86_64/dot_sse.S index cc866a9c5..61c481064 100644 --- a/kernel/x86_64/dot_sse.S +++ b/kernel/x86_64/dot_sse.S @@ -1286,6 +1286,10 @@ haddps %xmm0, %xmm0 #endif +#ifdef DSDOT + cvtss2sd %xmm0, %xmm0 +#endif + RESTOREREGISTERS ret diff --git a/kernel/x86_64/zgemm_kernel_1x4_nehalem.S b/kernel/x86_64/zgemm_kernel_1x4_nehalem.S index e72a19c96..4ddfc488b 100644 --- a/kernel/x86_64/zgemm_kernel_1x4_nehalem.S +++ b/kernel/x86_64/zgemm_kernel_1x4_nehalem.S @@ -544,7 +544,7 @@ jg .L11 #if defined(TRMMKERNEL) && !defined(LEFT) - addq $1, KK + addq $4, KK #endif leaq (C, LDC, 4), C @@ -594,7 +594,7 @@ jg .L11 #if defined(TRMMKERNEL) && !defined(LEFT) - addq $1, KK + addq $4, KK #endif leaq (C, LDC, 4), C diff --git a/openblas_config_template.h b/openblas_config_template.h new file mode 100644 index 000000000..9fb80aa4f --- /dev/null +++ b/openblas_config_template.h @@ -0,0 +1,21 @@ +/*This is only for "make install" target.*/ + +#ifdef NEEDBUNDERSCORE +#define BLASFUNC(FUNC) FUNC##_ +#else +#define BLASFUNC(FUNC) FUNC +#endif + +#if defined(OS_WINDOWS) && defined(__64BIT__) +typedef long long BLASLONG; +typedef unsigned long long BLASULONG; +#else +typedef long BLASLONG; +typedef unsigned long BLASULONG; +#endif + +#ifdef USE64BITINT +typedef BLASLONG blasint; +#else +typedef int blasint; +#endif diff --git a/reference/Makefile b/reference/Makefile index 034f23244..d6368dcda 100644 --- a/reference/Makefile +++ b/reference/Makefile @@ -128,6 +128,8 @@ CBLASOBJS = $(CBLAS1OBJS) $(CBLAS2OBJS) $(CBLAS3OBJS) ZBLASOBJS = $(ZBLAS1OBJS) $(ZBLAS2OBJS) $(ZBLAS3OBJS) XBLASOBJS = $(XBLAS1OBJS) $(XBLAS2OBJS) $(XBLAS3OBJS) +ifneq ($(NO_LAPACK), 1) + SBLASOBJS += \ sgetf2f.$(SUFFIX) sgetrff.$(SUFFIX) slauu2f.$(SUFFIX) slauumf.$(SUFFIX) \ spotf2f.$(SUFFIX) spotrff.$(SUFFIX) strti2f.$(SUFFIX) strtrif.$(SUFFIX) \ @@ -160,6 +162,7 @@ XBLASOBJS += xpotf2f.$(SUFFIX) xpotrff.$(SUFFIX) xtrti2f.$(SUFFIX) xtrtrif.$(SUFFIX) \ xlaswpf.$(SUFFIX) xgetrsf.$(SUFFIX) xgesvf.$(SUFFIX) xpotrif.$(SUFFIX) \ +endif include $(TOPDIR)/Makefile.tail diff --git a/utest/Makefile b/utest/Makefile index 9d512b877..e7c5f3412 100644 --- a/utest/Makefile +++ b/utest/Makefile @@ -5,12 +5,12 @@ include $(TOPDIR)/Makefile.system TARGET=openblas_utest CUNIT_LIB=/usr/local/lib/libcunit.a -OBJS=main.o test_rot.o test_swap.o test_axpy.o test_dotu.o +OBJS=main.o test_rot.o test_swap.o test_axpy.o test_dotu.o test_rotmg.o test_dsdot.o all : run_test $(TARGET): $(OBJS) - $(CC) -o $@ $^ ../$(LIBNAME) $(CUNIT_LIB) $(EXTRALIB) + $(FC) -o $@ $^ ../$(LIBNAME) $(CUNIT_LIB) $(EXTRALIB) run_test: $(TARGET) ./$(TARGET) diff --git a/utest/common_utest.h b/utest/common_utest.h index 3e9ecb422..1332ef6ab 100644 --- a/utest/common_utest.h +++ b/utest/common_utest.h @@ -57,4 +57,8 @@ void test_caxpy_inc_0(void); void test_zdotu_n_1(void); void test_zdotu_offset_1(void); +void test_drotmg(void); + +void test_dsdot_n_1(void); + #endif diff --git a/utest/main.c b/utest/main.c index f6ecf3cc0..135709507 100644 --- a/utest/main.c +++ b/utest/main.c @@ -54,7 +54,10 @@ CU_TestInfo test_level1[]={ {"Testing zdotu with n == 1",test_zdotu_n_1}, {"Testing zdotu with input x & y offset == 1",test_zdotu_offset_1}, - + + {"Testing drotmg",test_drotmg}, + + {"Testing dsdot with n == 1",test_dsdot_n_1}, CU_TEST_INFO_NULL, }; diff --git a/utest/test_dsdot.c b/utest/test_dsdot.c new file mode 100644 index 000000000..8df7380be --- /dev/null +++ b/utest/test_dsdot.c @@ -0,0 +1,50 @@ +/***************************************************************************** +Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + 3. Neither the name of the ISCAS nor the names of its contributors may + be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +**********************************************************************************/ + +#include "common_utest.h" + +void test_dsdot_n_1() +{ + float x= 0.172555164; + float y= -0.0138700781; + int incx=1; + int incy=1; + int n=1; + + double res1=0.0f, res2=0.0f; + + res1=BLASFUNC(dsdot)(&n, &x, &incx, &y, &incy); + res2=BLASFUNC_REF(dsdot)(&n, &x, &incx, &y, &incy); + + CU_ASSERT_DOUBLE_EQUAL(res1, res2, CHECK_EPS); + +} diff --git a/utest/test_rotmg.c b/utest/test_rotmg.c new file mode 100644 index 000000000..e51e6b299 --- /dev/null +++ b/utest/test_rotmg.c @@ -0,0 +1,60 @@ +/***************************************************************************** +Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + 3. Neither the name of the ISCAS nor the names of its contributors may + be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +**********************************************************************************/ + +#include "common_utest.h" + +void test_drotmg() +{ + double te_d1, tr_d1; + double te_d2, tr_d2; + double te_x1, tr_x1; + double te_y1, tr_y1; + double te_param[5],tr_param[5]; + int i=0; + te_d1= tr_d1=0.21149573940783739; + te_d2= tr_d2=0.046892057172954082; + te_x1= tr_x1=-0.42272687517106533; + te_y1= tr_y1=0.42211309121921659; + //OpenBLAS + BLASFUNC(drotmg)(&te_d1, &te_d2, &te_x1, &te_y1, te_param); + //reference + BLASFUNC_REF(drotmg)(&tr_d1, &tr_d2, &tr_x1, &tr_y1, tr_param); + + CU_ASSERT_DOUBLE_EQUAL(te_d1, tr_d1, CHECK_EPS); + CU_ASSERT_DOUBLE_EQUAL(te_d2, tr_d2, CHECK_EPS); + CU_ASSERT_DOUBLE_EQUAL(te_x1, tr_x1, CHECK_EPS); + CU_ASSERT_DOUBLE_EQUAL(te_y1, tr_y1, CHECK_EPS); + + for(i=0; i<5; i++){ + CU_ASSERT_DOUBLE_EQUAL(te_param[i], tr_param[i], CHECK_EPS); + } +}