Merge branch 'loongson3a' of github.com:xianyi/OpenBLAS into loongson3a
This commit is contained in:
commit
68532fa9ec
|
@ -1,8 +1,13 @@
|
||||||
|
*.obj
|
||||||
|
*.lib
|
||||||
|
*.dll
|
||||||
|
*.def
|
||||||
*.o
|
*.o
|
||||||
lapack-3.1.1
|
lapack-3.1.1
|
||||||
lapack-3.1.1.tgz
|
lapack-3.1.1.tgz
|
||||||
*.so
|
*.so
|
||||||
*.a
|
*.a
|
||||||
|
.svn
|
||||||
*~
|
*~
|
||||||
config.h
|
config.h
|
||||||
Makefile.conf
|
Makefile.conf
|
||||||
|
|
|
@ -1,13 +1,40 @@
|
||||||
OpenBLAS ChangeLog
|
OpenBLAS ChangeLog
|
||||||
====================================================================
|
====================================================================
|
||||||
Version 0.1 alpha2(in development)
|
Version 0.1 alpha2
|
||||||
|
23-Jun-2011
|
||||||
|
|
||||||
common:
|
common:
|
||||||
*
|
* Fixed blasint undefined bug in <cblas.h> file. Other software
|
||||||
|
could include this header successfully(Refs issue #13 on github)
|
||||||
|
* Fixed the SEGFAULT bug on 64 cores. On SMP server, the number
|
||||||
|
of CPUs or cores should be less than or equal to 64.(Refs issue #14
|
||||||
|
on github)
|
||||||
|
* Support "void goto_set_num_threads(int num_threads)" and "void
|
||||||
|
openblas_set_num_threads(int num_threads)" when USE_OPENMP=1
|
||||||
|
* Added extern "C" to support C++. Thank Tasio for the patch(Refs
|
||||||
|
issue #21 on github)
|
||||||
|
* Provided an error message when the arch is not supported.(Refs
|
||||||
|
issue #19 on github)
|
||||||
|
* Fixed issue #23. Fixed a bug of f_check script about generating link flags.
|
||||||
|
* Added openblas_set_num_threads for Fortran.
|
||||||
|
* Fixed #25 a wrong result of rotmg.
|
||||||
|
* Fixed a bug about detecting underscore prefix in c_check.
|
||||||
|
* Print the wall time (cycles) with enabling FUNCTION_PROFILE
|
||||||
|
* Fixed #35 a build bug with NO_LAPACK=1 & DYNAMIC_ARCH=1
|
||||||
|
* Added install target. You can use "make install". (Refs #20)
|
||||||
|
|
||||||
|
|
||||||
x86/x86_64:
|
x86/x86_64:
|
||||||
*
|
* Fixed #28 a wrong result of dsdot on x86_64.
|
||||||
|
* Fixed #32 a SEGFAULT bug of zdotc with gcc-4.6.
|
||||||
|
* Fixed #33 ztrmm bug on Nehalem.
|
||||||
|
* Walk round #27 the low performance axpy issue with small imput size & multithreads.
|
||||||
|
|
||||||
MIPS64:
|
MIPS64:
|
||||||
*
|
* Fixed #28 a wrong result of dsdot on Loongson3A/MIPS64.
|
||||||
|
* Optimized single/double precision BLAS Level3 on Loongson3A/MIPS64. (Refs #2)
|
||||||
|
* Optimized single/double precision axpy function on Loongson3A/MIPS64. (Refs #3)
|
||||||
|
|
||||||
====================================================================
|
====================================================================
|
||||||
Version 0.1 alpha1
|
Version 0.1 alpha1
|
||||||
20-Mar-2011
|
20-Mar-2011
|
||||||
|
|
26
Makefile
26
Makefile
|
@ -15,6 +15,10 @@ ifdef SANITY_CHECK
|
||||||
BLASDIRS += reference
|
BLASDIRS += reference
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
ifndef PREFIX
|
||||||
|
PREFIX = /opt/OpenBLAS
|
||||||
|
endif
|
||||||
|
|
||||||
SUBDIRS = $(BLASDIRS)
|
SUBDIRS = $(BLASDIRS)
|
||||||
ifneq ($(NO_LAPACK), 1)
|
ifneq ($(NO_LAPACK), 1)
|
||||||
SUBDIRS += lapack
|
SUBDIRS += lapack
|
||||||
|
@ -22,8 +26,8 @@ endif
|
||||||
|
|
||||||
SUBDIRS_ALL = $(SUBDIRS) test ctest utest exports benchmark ../laswp ../bench
|
SUBDIRS_ALL = $(SUBDIRS) test ctest utest exports benchmark ../laswp ../bench
|
||||||
|
|
||||||
.PHONY : all libs netlib test ctest shared
|
.PHONY : all libs netlib test ctest shared install
|
||||||
.NOTPARALLEL : all libs prof lapack-test
|
.NOTPARALLEL : all libs prof lapack-test install
|
||||||
|
|
||||||
all :: libs netlib tests shared
|
all :: libs netlib tests shared
|
||||||
@echo
|
@echo
|
||||||
|
@ -70,7 +74,7 @@ ifeq ($(OSNAME), Darwin)
|
||||||
endif
|
endif
|
||||||
ifeq ($(OSNAME), WINNT)
|
ifeq ($(OSNAME), WINNT)
|
||||||
$(MAKE) -C exports dll
|
$(MAKE) -C exports dll
|
||||||
# -ln -fs $(LIBDLLNAME) libopenblas.dll
|
-ln -fs $(LIBDLLNAME) libopenblas.dll
|
||||||
endif
|
endif
|
||||||
ifeq ($(OSNAME), CYGWIN_NT)
|
ifeq ($(OSNAME), CYGWIN_NT)
|
||||||
$(MAKE) -C exports dll
|
$(MAKE) -C exports dll
|
||||||
|
@ -96,18 +100,26 @@ endif
|
||||||
endif
|
endif
|
||||||
|
|
||||||
libs :
|
libs :
|
||||||
|
ifeq ($(CORE), UNKOWN)
|
||||||
|
$(error OpenBLAS: Detecting CPU failed. Please set TARGET explicitly, e.g. make TARGET=your_cpu_target. Please read README for the detail.)
|
||||||
|
endif
|
||||||
-ln -fs $(LIBNAME) libopenblas.$(LIBSUFFIX)
|
-ln -fs $(LIBNAME) libopenblas.$(LIBSUFFIX)
|
||||||
for d in $(SUBDIRS) ; \
|
for d in $(SUBDIRS) ; \
|
||||||
do if test -d $$d; then \
|
do if test -d $$d; then \
|
||||||
$(MAKE) -C $$d $(@F) || exit 1 ; \
|
$(MAKE) -C $$d $(@F) || exit 1 ; \
|
||||||
fi; \
|
fi; \
|
||||||
done
|
done
|
||||||
|
#Save the config files for installation
|
||||||
|
cp Makefile.conf Makefile.conf_last
|
||||||
|
cp config.h config_last.h
|
||||||
ifdef DYNAMIC_ARCH
|
ifdef DYNAMIC_ARCH
|
||||||
$(MAKE) -C kernel commonlibs || exit 1
|
$(MAKE) -C kernel commonlibs || exit 1
|
||||||
for d in $(DYNAMIC_CORE) ; \
|
for d in $(DYNAMIC_CORE) ; \
|
||||||
do $(MAKE) GOTOBLAS_MAKEFILE= -C kernel TARGET_CORE=$$d kernel || exit 1 ;\
|
do $(MAKE) GOTOBLAS_MAKEFILE= -C kernel TARGET_CORE=$$d kernel || exit 1 ;\
|
||||||
done
|
done
|
||||||
|
echo DYNAMIC_ARCH=1 >> Makefile.conf_last
|
||||||
endif
|
endif
|
||||||
|
touch lib.grd
|
||||||
|
|
||||||
prof : prof_blas prof_lapack
|
prof : prof_blas prof_lapack
|
||||||
|
|
||||||
|
@ -227,19 +239,23 @@ lapack-test :
|
||||||
|
|
||||||
dummy :
|
dummy :
|
||||||
|
|
||||||
|
install :
|
||||||
|
$(MAKE) -f Makefile.install install
|
||||||
|
|
||||||
clean ::
|
clean ::
|
||||||
@for d in $(SUBDIRS_ALL) ; \
|
@for d in $(SUBDIRS_ALL) ; \
|
||||||
do if test -d $$d; then \
|
do if test -d $$d; then \
|
||||||
$(MAKE) -C $$d $(@F) || exit 1 ; \
|
$(MAKE) -C $$d $(@F) || exit 1 ; \
|
||||||
fi; \
|
fi; \
|
||||||
done
|
done
|
||||||
ifdef DYNAMIC_ARCH
|
#ifdef DYNAMIC_ARCH
|
||||||
@$(MAKE) -C kernel clean
|
@$(MAKE) -C kernel clean
|
||||||
endif
|
#endif
|
||||||
@rm -f *.$(LIBSUFFIX) *.so *~ *.exe getarch getarch_2nd *.dll *.lib *.$(SUFFIX) *.dwf libopenblas.$(LIBSUFFIX) libopenblas_p.$(LIBSUFFIX) *.lnk myconfig.h
|
@rm -f *.$(LIBSUFFIX) *.so *~ *.exe getarch getarch_2nd *.dll *.lib *.$(SUFFIX) *.dwf libopenblas.$(LIBSUFFIX) libopenblas_p.$(LIBSUFFIX) *.lnk myconfig.h
|
||||||
@rm -f Makefile.conf config.h Makefile_kernel.conf config_kernel.h st* *.dylib
|
@rm -f Makefile.conf config.h Makefile_kernel.conf config_kernel.h st* *.dylib
|
||||||
@if test -d lapack-3.1.1; then \
|
@if test -d lapack-3.1.1; then \
|
||||||
echo deleting lapack-3.1.1; \
|
echo deleting lapack-3.1.1; \
|
||||||
rm -rf lapack-3.1.1 ;\
|
rm -rf lapack-3.1.1 ;\
|
||||||
fi
|
fi
|
||||||
|
@rm -f *.grd Makefile.conf_last config_last.h
|
||||||
@echo Done.
|
@echo Done.
|
|
@ -0,0 +1,65 @@
|
||||||
|
TOPDIR = .
|
||||||
|
export GOTOBLAS_MAKEFILE = 1
|
||||||
|
-include $(TOPDIR)/Makefile.conf_last
|
||||||
|
include ./Makefile.system
|
||||||
|
|
||||||
|
.PHONY : install
|
||||||
|
.NOTPARALLEL : install
|
||||||
|
|
||||||
|
lib.grd :
|
||||||
|
$(error OpenBLAS: Please run "make" firstly)
|
||||||
|
|
||||||
|
install : lib.grd
|
||||||
|
@-mkdir -p $(PREFIX)
|
||||||
|
@echo Generating openblas_config.h in $(PREFIX)
|
||||||
|
#for inc
|
||||||
|
@echo \#ifndef OPENBLAS_CONFIG_H > $(PREFIX)/openblas_config.h
|
||||||
|
@echo \#define OPENBLAS_CONFIG_H >> $(PREFIX)/openblas_config.h
|
||||||
|
@cat config_last.h >> $(PREFIX)/openblas_config.h
|
||||||
|
@echo \#define VERSION \" OpenBLAS $(VERSION) \" >> $(PREFIX)/openblas_config.h
|
||||||
|
@cat openblas_config_template.h >> $(PREFIX)/openblas_config.h
|
||||||
|
@echo \#endif >> $(PREFIX)/openblas_config.h
|
||||||
|
|
||||||
|
@echo Generating f77blas.h in $(PREFIX)
|
||||||
|
@echo \#ifndef OPENBLAS_F77BLAS_H > $(PREFIX)/f77blas.h
|
||||||
|
@echo \#define OPENBLAS_F77BLAS_H >> $(PREFIX)/f77blas.h
|
||||||
|
@echo \#include \"openblas_config.h\" >> $(PREFIX)/f77blas.h
|
||||||
|
@cat common_interface.h >> $(PREFIX)/f77blas.h
|
||||||
|
@echo \#endif >> $(PREFIX)/f77blas.h
|
||||||
|
|
||||||
|
@echo Generating cblas.h in $(PREFIX)
|
||||||
|
@sed 's/common/openblas_config/g' cblas.h > $(PREFIX)/cblas.h
|
||||||
|
|
||||||
|
#for install static library
|
||||||
|
@echo Copy the static library to $(PREFIX)
|
||||||
|
@cp $(LIBNAME) $(PREFIX)
|
||||||
|
@-ln -fs $(PREFIX)/$(LIBNAME) $(PREFIX)/libopenblas.$(LIBSUFFIX)
|
||||||
|
#for install shared library
|
||||||
|
@echo Copy the shared library to $(PREFIX)
|
||||||
|
ifeq ($(OSNAME), Linux)
|
||||||
|
-cp $(LIBSONAME) $(PREFIX)
|
||||||
|
-ln -fs $(PREFIX)/$(LIBSONAME) $(PREFIX)/libopenblas.so
|
||||||
|
endif
|
||||||
|
ifeq ($(OSNAME), FreeBSD)
|
||||||
|
-cp $(LIBSONAME) $(PREFIX)
|
||||||
|
-ln -fs $(PREFIX)/$(LIBSONAME) $(PREFIX)/libopenblas.so
|
||||||
|
endif
|
||||||
|
ifeq ($(OSNAME), NetBSD)
|
||||||
|
-cp $(LIBSONAME) $(PREFIX)
|
||||||
|
-ln -fs $(PREFIX)/$(LIBSONAME) $(PREFIX)/libopenblas.so
|
||||||
|
endif
|
||||||
|
ifeq ($(OSNAME), Darwin)
|
||||||
|
-cp $(LIBDYNNAME) $(PREFIX)
|
||||||
|
-ln -fs $(PREFIX)/$(LIBDYNNAME) $(PREFIX)/libopenblas.dylib
|
||||||
|
endif
|
||||||
|
ifeq ($(OSNAME), WINNT)
|
||||||
|
-cp $(LIBDLLNAME) $(PREFIX)
|
||||||
|
-ln -fs $(PREFIX)/$(LIBDLLNAME) $(PREFIX)/libopenblas.dll
|
||||||
|
endif
|
||||||
|
ifeq ($(OSNAME), CYGWIN_NT)
|
||||||
|
-cp $(LIBDLLNAME) $(PREFIX)
|
||||||
|
-ln -fs $(PREFIX)/$(LIBDLLNAME) $(PREFIX)/libopenblas.dll
|
||||||
|
endif
|
||||||
|
|
||||||
|
@echo Install OK!
|
||||||
|
|
|
@ -91,6 +91,9 @@ VERSION = 0.1alpha2
|
||||||
# SANITY_CHECK to compare the result with reference BLAS.
|
# SANITY_CHECK to compare the result with reference BLAS.
|
||||||
# UTEST_CHECK = 1
|
# UTEST_CHECK = 1
|
||||||
|
|
||||||
|
# The installation directory.
|
||||||
|
# PREFIX = /opt/OpenBLAS
|
||||||
|
|
||||||
# Common Optimization Flag; -O2 is enough.
|
# Common Optimization Flag; -O2 is enough.
|
||||||
# DEBUG = 1
|
# DEBUG = 1
|
||||||
|
|
||||||
|
|
|
@ -30,6 +30,10 @@ ifdef TARGET
|
||||||
GETARCH_FLAGS += -DFORCE_$(TARGET)
|
GETARCH_FLAGS += -DFORCE_$(TARGET)
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
ifdef INTERFACE64
|
||||||
|
GETARCH_FLAGS += -DUSE64BITINT
|
||||||
|
endif
|
||||||
|
|
||||||
# This operation is expensive, so execution should be once.
|
# This operation is expensive, so execution should be once.
|
||||||
ifndef GOTOBLAS_MAKEFILE
|
ifndef GOTOBLAS_MAKEFILE
|
||||||
export GOTOBLAS_MAKEFILE = 1
|
export GOTOBLAS_MAKEFILE = 1
|
||||||
|
@ -185,7 +189,7 @@ ifeq ($(C_COMPILER), INTEL)
|
||||||
CCOMMON_OPT += -wd981
|
CCOMMON_OPT += -wd981
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifdef USE_OPENMP
|
ifeq ($(USE_OPENMP), 1)
|
||||||
ifeq ($(C_COMPILER), GCC)
|
ifeq ($(C_COMPILER), GCC)
|
||||||
CCOMMON_OPT += -fopenmp
|
CCOMMON_OPT += -fopenmp
|
||||||
endif
|
endif
|
||||||
|
@ -489,7 +493,8 @@ endif
|
||||||
|
|
||||||
ifdef BINARY64
|
ifdef BINARY64
|
||||||
ifdef INTERFACE64
|
ifdef INTERFACE64
|
||||||
CCOMMON_OPT += -DUSE64BITINT
|
CCOMMON_OPT +=
|
||||||
|
#-DUSE64BITINT
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
@ -510,6 +515,10 @@ ifeq ($(DYNAMIC_ARCH), 1)
|
||||||
CCOMMON_OPT += -DDYNAMIC_ARCH
|
CCOMMON_OPT += -DDYNAMIC_ARCH
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
ifeq ($(NO_LAPACK), 1)
|
||||||
|
CCOMMON_OPT += -DNO_LAPACK
|
||||||
|
endif
|
||||||
|
|
||||||
ifdef SMP
|
ifdef SMP
|
||||||
CCOMMON_OPT += -DSMP_SERVER
|
CCOMMON_OPT += -DSMP_SERVER
|
||||||
|
|
||||||
|
|
30
README
30
README
|
@ -8,7 +8,9 @@ Download from project homepage. http://xianyi.github.com/OpenBLAS/
|
||||||
Or,
|
Or,
|
||||||
check out codes from git://github.com/xianyi/OpenBLAS.git
|
check out codes from git://github.com/xianyi/OpenBLAS.git
|
||||||
1)Normal compile
|
1)Normal compile
|
||||||
Please read GotoBLAS_02QuickInstall.txt or type "make"
|
(a) type "make" to detect the CPU automatically.
|
||||||
|
or
|
||||||
|
(b) type "make TARGET=xxx" to set target CPU, e.g. "make TARGET=NEHALEM". The full target list is in file TargetList.txt.
|
||||||
|
|
||||||
2)Cross compile
|
2)Cross compile
|
||||||
Please set CC and FC with the cross toolchains. Then, set HOSTCC with your host C compiler. At last, set TARGET explicitly.
|
Please set CC and FC with the cross toolchains. Then, set HOSTCC with your host C compiler. At last, set TARGET explicitly.
|
||||||
|
@ -20,6 +22,11 @@ make BINARY=64 CC=mips64el-unknown-linux-gnu-gcc FC=mips64el-unknown-linux-gnu-g
|
||||||
3)Debug version
|
3)Debug version
|
||||||
make DEBUG=1
|
make DEBUG=1
|
||||||
|
|
||||||
|
4)Intall to the directory (Optional)
|
||||||
|
e.g.
|
||||||
|
make install PREFIX=your_installation_directory
|
||||||
|
The default directory is /opt/OpenBLAS
|
||||||
|
|
||||||
3.Support CPU & OS
|
3.Support CPU & OS
|
||||||
Please read GotoBLAS_01Readme.txt
|
Please read GotoBLAS_01Readme.txt
|
||||||
|
|
||||||
|
@ -39,13 +46,17 @@ export GOTO_NUM_THREADS=4
|
||||||
or
|
or
|
||||||
export OMP_NUM_THREADS=4
|
export OMP_NUM_THREADS=4
|
||||||
|
|
||||||
The priorities are OPENBLAS_NUM_THREAD > GOTO_NUM_THREADS > OMP_NUM_THREADS.
|
The priorities are OPENBLAS_NUM_THREADS > GOTO_NUM_THREADS > OMP_NUM_THREADS.
|
||||||
|
|
||||||
|
If you compile this lib with USE_OPENMP=1, you should only set OMP_NUM_THREADS environment variable.
|
||||||
|
|
||||||
4.2 Set the number of threads with calling functions. for example,
|
4.2 Set the number of threads with calling functions. for example,
|
||||||
void goto_set_num_threads(int num_threads);
|
void goto_set_num_threads(int num_threads);
|
||||||
or
|
or
|
||||||
void openblas_set_num_threads(int num_threads);
|
void openblas_set_num_threads(int num_threads);
|
||||||
|
|
||||||
|
If you compile this lib with USE_OPENMP=1, you should use the above functions, too.
|
||||||
|
|
||||||
5.Report Bugs
|
5.Report Bugs
|
||||||
Please add a issue in https://github.com/xianyi/OpenBLAS/issues
|
Please add a issue in https://github.com/xianyi/OpenBLAS/issues
|
||||||
|
|
||||||
|
@ -56,4 +67,17 @@ Optimization on ICT Loongson 3A CPU
|
||||||
OpenBLAS users mailing list: http://list.rdcps.ac.cn/mailman/listinfo/openblas
|
OpenBLAS users mailing list: http://list.rdcps.ac.cn/mailman/listinfo/openblas
|
||||||
|
|
||||||
8.ChangeLog
|
8.ChangeLog
|
||||||
Please see Changelog.txt to obtain the differences between GotoBLAS2 1.13 BSD version.
|
Please see Changelog.txt to obtain the differences between GotoBLAS2 1.13 BSD version.
|
||||||
|
|
||||||
|
9.Known Issues
|
||||||
|
* The number of CPUs/Cores should less than or equal to 8*sizeof(unsigned long). On 64 bits, the limit
|
||||||
|
is 64. On 32 bits, it is 32.
|
||||||
|
* This library is not compatible with EKOPath Compiler Suite 4.0.10 (http://www.pathscale.com/ekopath-compiler-suite). However, Path64 (https://github.com/path64/compiler) could compile the codes successfully.
|
||||||
|
|
||||||
|
10. Specification of Git Branches
|
||||||
|
We used the git branching model in this article (http://nvie.com/posts/a-successful-git-branching-model/).
|
||||||
|
Now, there are 4 branches in github.com.
|
||||||
|
* The master branch. This a main branch to reflect a production-ready state.
|
||||||
|
* The develop branch. This a main branch to reflect a state with the latest delivered development changes for the next release.
|
||||||
|
* The loongson3a branch. This is a feature branch. We develop Loongson3A codes on this branch. We will merge this feature to develop branch in future.
|
||||||
|
* The gh-pages branch. This is for web pages
|
||||||
|
|
|
@ -0,0 +1,57 @@
|
||||||
|
Force Target Examples:
|
||||||
|
|
||||||
|
make TARGET=NEHALEM
|
||||||
|
make TARGET=LOONGSON3A BINARY=64
|
||||||
|
make TARGET=ISTANBUL
|
||||||
|
|
||||||
|
Supported List:
|
||||||
|
1.X86/X86_64
|
||||||
|
a)Intel CPU:
|
||||||
|
P2
|
||||||
|
COPPERMINE
|
||||||
|
KATMAI
|
||||||
|
NORTHWOOD
|
||||||
|
PRESCOTT
|
||||||
|
BANIAS
|
||||||
|
YONAH
|
||||||
|
CORE2
|
||||||
|
PENRYN
|
||||||
|
DUNNINGTON
|
||||||
|
NEHALEM
|
||||||
|
ATOM
|
||||||
|
|
||||||
|
b)AMD CPU:
|
||||||
|
ATHLON
|
||||||
|
OPTERON
|
||||||
|
OPTERON_SSE3
|
||||||
|
BARCELONA
|
||||||
|
SHANGHAI
|
||||||
|
ISTANBUL
|
||||||
|
|
||||||
|
c)VIA CPU:
|
||||||
|
SSE_GENERIC
|
||||||
|
VIAC3
|
||||||
|
NANO
|
||||||
|
|
||||||
|
2.Power CPU:
|
||||||
|
POWER4
|
||||||
|
POWER5
|
||||||
|
POWER6
|
||||||
|
PPCG4
|
||||||
|
PPC970
|
||||||
|
PPC970MP
|
||||||
|
PPC440
|
||||||
|
PPC440FP2
|
||||||
|
CELL
|
||||||
|
|
||||||
|
3.MIPS64 CPU:
|
||||||
|
SICORTEX
|
||||||
|
LOONGSON3A
|
||||||
|
|
||||||
|
4.IA64 CPU:
|
||||||
|
ITANIUM2
|
||||||
|
|
||||||
|
5.SPARC CPU:
|
||||||
|
SPARC
|
||||||
|
SPARCV7
|
||||||
|
|
2
c_check
2
c_check
|
@ -149,7 +149,7 @@ $binformat = bin64 if ($data =~ /BINARY_64/);
|
||||||
|
|
||||||
$data = `$compiler_name -S ctest1.c && grep globl ctest1.s | head -n 1 && rm -f ctest1.s`;
|
$data = `$compiler_name -S ctest1.c && grep globl ctest1.s | head -n 1 && rm -f ctest1.s`;
|
||||||
|
|
||||||
$data =~ /globl\ ([_\.]*)(.*)/;
|
$data =~ /globl\s([_\.]*)(.*)/;
|
||||||
|
|
||||||
$need_fu = $1;
|
$need_fu = $1;
|
||||||
|
|
||||||
|
|
14
cblas.h
14
cblas.h
|
@ -1,6 +1,14 @@
|
||||||
#ifndef CBLAS_H
|
#ifndef CBLAS_H
|
||||||
#define CBLAS_H
|
#define CBLAS_H
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
/* Assume C declarations for C++ */
|
||||||
|
#endif /* __cplusplus */
|
||||||
|
|
||||||
|
#include <stddef.h>
|
||||||
|
#include "common.h"
|
||||||
|
|
||||||
#define CBLAS_INDEX size_t
|
#define CBLAS_INDEX size_t
|
||||||
|
|
||||||
enum CBLAS_ORDER {CblasRowMajor=101, CblasColMajor=102};
|
enum CBLAS_ORDER {CblasRowMajor=101, CblasColMajor=102};
|
||||||
|
@ -270,4 +278,10 @@ void cblas_zher2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANS
|
||||||
double *alpha, double *A, blasint lda, double *B, blasint ldb, double beta, double *C, blasint ldc);
|
double *alpha, double *A, blasint lda, double *B, blasint ldb, double beta, double *C, blasint ldc);
|
||||||
|
|
||||||
void cblas_xerbla(blasint p, char *rout, char *form, ...);
|
void cblas_xerbla(blasint p, char *rout, char *form, ...);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* __cplusplus */
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
10
common.h
10
common.h
|
@ -39,6 +39,11 @@
|
||||||
#ifndef COMMON_H
|
#ifndef COMMON_H
|
||||||
#define COMMON_H
|
#define COMMON_H
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
/* Assume C declarations for C++ */
|
||||||
|
#endif /* __cplusplus */
|
||||||
|
|
||||||
#ifndef _GNU_SOURCE
|
#ifndef _GNU_SOURCE
|
||||||
#define _GNU_SOURCE
|
#define _GNU_SOURCE
|
||||||
#endif
|
#endif
|
||||||
|
@ -607,4 +612,9 @@ extern int gotoblas_profile;
|
||||||
#define PRINT_DEBUG_NAME if (readenv("GOTO_DEBUG")) fprintf(stderr, "GotoBLAS : %s\n", CHAR_NAME)
|
#define PRINT_DEBUG_NAME if (readenv("GOTO_DEBUG")) fprintf(stderr, "GotoBLAS : %s\n", CHAR_NAME)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* __cplusplus */
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -60,4 +60,8 @@ float _Complex BLASFUNC_REF(cdotc) (blasint *, float *, blasint *, float *,
|
||||||
double _Complex BLASFUNC_REF(zdotu) (blasint *, double *, blasint *, double *, blasint *);
|
double _Complex BLASFUNC_REF(zdotu) (blasint *, double *, blasint *, double *, blasint *);
|
||||||
double _Complex BLASFUNC_REF(zdotc) (blasint *, double *, blasint *, double *, blasint *);
|
double _Complex BLASFUNC_REF(zdotc) (blasint *, double *, blasint *, double *, blasint *);
|
||||||
|
|
||||||
|
void BLASFUNC_REF(drotmg)(double *, double *, double *, double *, double *);
|
||||||
|
|
||||||
|
double BLASFUNC_REF(dsdot)(blasint *, float *, blasint *, float *, blasint*);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
33
cpuid_x86.c
33
cpuid_x86.c
|
@ -1302,24 +1302,25 @@ int get_coretype(void){
|
||||||
case 13:
|
case 13:
|
||||||
return CORE_DUNNINGTON;
|
return CORE_DUNNINGTON;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case 2:
|
case 2:
|
||||||
switch (model) {
|
switch (model) {
|
||||||
case 5:
|
case 5:
|
||||||
//Intel Core (Clarkdale) / Core (Arrandale)
|
//Intel Core (Clarkdale) / Core (Arrandale)
|
||||||
// Pentium (Clarkdale) / Pentium Mobile (Arrandale)
|
// Pentium (Clarkdale) / Pentium Mobile (Arrandale)
|
||||||
// Xeon (Clarkdale), 32nm
|
// Xeon (Clarkdale), 32nm
|
||||||
return CORE_NEHALEM;
|
return CORE_NEHALEM;
|
||||||
case 12:
|
case 12:
|
||||||
//Xeon Processor 5600 (Westmere-EP)
|
//Xeon Processor 5600 (Westmere-EP)
|
||||||
return CORE_NEHALEM;
|
return CORE_NEHALEM;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
case 15:
|
case 15:
|
||||||
if (model <= 0x2) return CORE_NORTHWOOD;
|
if (model <= 0x2) return CORE_NORTHWOOD;
|
||||||
return CORE_PRESCOTT;
|
else return CORE_PRESCOTT;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -6,7 +6,7 @@ COMMONOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) c_abs.$(SUFFIX) z_abs.$(SUFFIX)
|
||||||
COMMONOBJS += slamch.$(SUFFIX) slamc3.$(SUFFIX) dlamch.$(SUFFIX) dlamc3.$(SUFFIX)
|
COMMONOBJS += slamch.$(SUFFIX) slamc3.$(SUFFIX) dlamch.$(SUFFIX) dlamc3.$(SUFFIX)
|
||||||
|
|
||||||
ifdef SMP
|
ifdef SMP
|
||||||
COMMONOBJS += blas_server.$(SUFFIX) divtable.$(SUFFIX) blasL1thread.$(SUFFIX)
|
COMMONOBJS += blas_server.$(SUFFIX) divtable.$(SUFFIX) blasL1thread.$(SUFFIX) openblas_set_num_threads.$(SUFFIX)
|
||||||
ifndef NO_AFFINITY
|
ifndef NO_AFFINITY
|
||||||
COMMONOBJS += init.$(SUFFIX)
|
COMMONOBJS += init.$(SUFFIX)
|
||||||
endif
|
endif
|
||||||
|
@ -100,6 +100,9 @@ memory.$(SUFFIX) : $(MEMORY) ../../common.h ../../param.h
|
||||||
blas_server.$(SUFFIX) : $(BLAS_SERVER) ../../common.h ../../common_thread.h ../../param.h
|
blas_server.$(SUFFIX) : $(BLAS_SERVER) ../../common.h ../../common_thread.h ../../param.h
|
||||||
$(CC) $(CFLAGS) -c $< -o $(@F)
|
$(CC) $(CFLAGS) -c $< -o $(@F)
|
||||||
|
|
||||||
|
openblas_set_num_threads.$(SUFFIX) : openblas_set_num_threads.c
|
||||||
|
$(CC) $(CFLAGS) -c $< -o $(@F)
|
||||||
|
|
||||||
blasL1thread.$(SUFFIX) : blas_l1_thread.c ../../common.h ../../common_thread.h
|
blasL1thread.$(SUFFIX) : blas_l1_thread.c ../../common.h ../../common_thread.h
|
||||||
$(CC) $(CFLAGS) -c $< -o $(@F)
|
$(CC) $(CFLAGS) -c $< -o $(@F)
|
||||||
|
|
||||||
|
|
|
@ -38,7 +38,7 @@
|
||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <sys/mman.h>
|
//#include <sys/mman.h>
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
|
|
||||||
#ifndef USE_OPENMP
|
#ifndef USE_OPENMP
|
||||||
|
@ -49,6 +49,26 @@
|
||||||
|
|
||||||
int blas_server_avail = 0;
|
int blas_server_avail = 0;
|
||||||
|
|
||||||
|
void goto_set_num_threads(int num_threads) {
|
||||||
|
|
||||||
|
if (num_threads < 1) num_threads = blas_num_threads;
|
||||||
|
|
||||||
|
if (num_threads > MAX_CPU_NUMBER) num_threads = MAX_CPU_NUMBER;
|
||||||
|
|
||||||
|
if (num_threads > blas_num_threads) {
|
||||||
|
blas_num_threads = num_threads;
|
||||||
|
}
|
||||||
|
|
||||||
|
blas_cpu_number = num_threads;
|
||||||
|
|
||||||
|
omp_set_num_threads(blas_cpu_number);
|
||||||
|
|
||||||
|
}
|
||||||
|
void openblas_set_num_threads(int num_threads) {
|
||||||
|
|
||||||
|
goto_set_num_threads(num_threads);
|
||||||
|
}
|
||||||
|
|
||||||
int blas_thread_init(void){
|
int blas_thread_init(void){
|
||||||
|
|
||||||
blas_get_cpu_number();
|
blas_get_cpu_number();
|
||||||
|
|
|
@ -172,13 +172,20 @@ static inline int rcount(unsigned long number) {
|
||||||
return count;
|
return count;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/***
|
||||||
|
Known issue: The number of CPUs/cores should less
|
||||||
|
than sizeof(unsigned long). On 64 bits, the limit
|
||||||
|
is 64. On 32 bits, it is 32.
|
||||||
|
***/
|
||||||
static inline unsigned long get_cpumap(int node) {
|
static inline unsigned long get_cpumap(int node) {
|
||||||
|
|
||||||
int infile;
|
int infile;
|
||||||
unsigned long affinity;
|
unsigned long affinity;
|
||||||
char name[160];
|
char name[160];
|
||||||
|
char cpumap[160];
|
||||||
char *p, *dummy;
|
char *p, *dummy;
|
||||||
|
int i=0;
|
||||||
|
|
||||||
sprintf(name, CPUMAP_NAME, node);
|
sprintf(name, CPUMAP_NAME, node);
|
||||||
|
|
||||||
infile = open(name, O_RDONLY);
|
infile = open(name, O_RDONLY);
|
||||||
|
@ -187,13 +194,19 @@ static inline unsigned long get_cpumap(int node) {
|
||||||
|
|
||||||
if (infile != -1) {
|
if (infile != -1) {
|
||||||
|
|
||||||
read(infile, name, sizeof(name));
|
read(infile, cpumap, sizeof(cpumap));
|
||||||
|
p = cpumap;
|
||||||
|
while (*p != '\n' && i<160){
|
||||||
|
if(*p != ',') {
|
||||||
|
name[i++]=*p;
|
||||||
|
}
|
||||||
|
p++;
|
||||||
|
}
|
||||||
p = name;
|
p = name;
|
||||||
|
|
||||||
while ((*p == '0') || (*p == ',')) p++;
|
// while ((*p == '0') || (*p == ',')) p++;
|
||||||
|
|
||||||
affinity = strtol(p, &dummy, 16);
|
affinity = strtoul(p, &dummy, 16);
|
||||||
|
|
||||||
close(infile);
|
close(infile);
|
||||||
}
|
}
|
||||||
|
@ -347,7 +360,13 @@ static void disable_hyperthread(void) {
|
||||||
unsigned long share;
|
unsigned long share;
|
||||||
int cpu;
|
int cpu;
|
||||||
|
|
||||||
common -> avail = (1UL << common -> num_procs) - 1;
|
if(common->num_procs > 64){
|
||||||
|
fprintf(stderr, "\nOpenBLAS Warining : The number of CPU/Cores(%d) is beyond the limit(64). Terminated.\n", common->num_procs);
|
||||||
|
exit(1);
|
||||||
|
}else if(common->num_procs == 64){
|
||||||
|
common -> avail = 0xFFFFFFFFFFFFFFFFUL;
|
||||||
|
}else
|
||||||
|
common -> avail = (1UL << common -> num_procs) - 1;
|
||||||
|
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
fprintf(stderr, "\nAvail CPUs : %04lx.\n", common -> avail);
|
fprintf(stderr, "\nAvail CPUs : %04lx.\n", common -> avail);
|
||||||
|
@ -376,7 +395,13 @@ static void disable_affinity(void) {
|
||||||
fprintf(stderr, "CPU mask : %04lx.\n\n", *(unsigned long *)&cpu_orig_mask[0]);
|
fprintf(stderr, "CPU mask : %04lx.\n\n", *(unsigned long *)&cpu_orig_mask[0]);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
lprocmask = (1UL << common -> final_num_procs) - 1;
|
if(common->final_num_procs > 64){
|
||||||
|
fprintf(stderr, "\nOpenBLAS Warining : The number of CPU/Cores(%d) is beyond the limit(64). Terminated.\n", common->final_num_procs);
|
||||||
|
exit(1);
|
||||||
|
}else if(common->final_num_procs == 64){
|
||||||
|
lprocmask = 0xFFFFFFFFFFFFFFFFUL;
|
||||||
|
}else
|
||||||
|
lprocmask = (1UL << common -> final_num_procs) - 1;
|
||||||
|
|
||||||
#ifndef USE_OPENMP
|
#ifndef USE_OPENMP
|
||||||
lprocmask &= *(unsigned long *)&cpu_orig_mask[0];
|
lprocmask &= *(unsigned long *)&cpu_orig_mask[0];
|
||||||
|
|
|
@ -0,0 +1,45 @@
|
||||||
|
/*****************************************************************************
|
||||||
|
Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
|
||||||
|
1. Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in
|
||||||
|
the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
3. Neither the name of the ISCAS nor the names of its contributors may
|
||||||
|
be used to endorse or promote products derived from this software
|
||||||
|
without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||||
|
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||||
|
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
**********************************************************************************/
|
||||||
|
|
||||||
|
#include "common.h"
|
||||||
|
|
||||||
|
#ifdef SMP_SERVER
|
||||||
|
#ifdef OS_LINUX
|
||||||
|
|
||||||
|
extern void openblas_set_num_threads(int num_threads) ;
|
||||||
|
|
||||||
|
void NAME(int* num_threads){
|
||||||
|
openblas_set_num_threads(*num_threads);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
#endif
|
|
@ -74,20 +74,21 @@ void gotoblas_profile_quit(void) {
|
||||||
if (cycles > 0) {
|
if (cycles > 0) {
|
||||||
|
|
||||||
fprintf(stderr, "\n\t====== BLAS Profiling Result =======\n\n");
|
fprintf(stderr, "\n\t====== BLAS Profiling Result =======\n\n");
|
||||||
fprintf(stderr, " Function No. of Calls Time Consumption Efficiency Bytes/cycle\n");
|
fprintf(stderr, " Function No. of Calls Time Consumption Efficiency Bytes/cycle Wall Time(Cycles)\n");
|
||||||
|
|
||||||
for (i = 0; i < MAX_PROF_TABLE; i ++) {
|
for (i = 0; i < MAX_PROF_TABLE; i ++) {
|
||||||
if (function_profile_table[i].calls) {
|
if (function_profile_table[i].calls) {
|
||||||
#ifndef OS_WINDOWS
|
#ifndef OS_WINDOWS
|
||||||
fprintf(stderr, "%-12s : %10Ld %8.2f%% %10.3f%% %8.2f\n",
|
fprintf(stderr, "%-12s : %10Ld %8.2f%% %10.3f%% %8.2f %Ld\n",
|
||||||
#else
|
#else
|
||||||
fprintf(stderr, "%-12s : %10lld %8.2f%% %10.3f%% %8.2f\n",
|
fprintf(stderr, "%-12s : %10lld %8.2f%% %10.3f%% %8.2f %lld\n",
|
||||||
#endif
|
#endif
|
||||||
func_table[i],
|
func_table[i],
|
||||||
function_profile_table[i].calls,
|
function_profile_table[i].calls,
|
||||||
(double)function_profile_table[i].cycles / (double)cycles * 100.,
|
(double)function_profile_table[i].cycles / (double)cycles * 100.,
|
||||||
(double)function_profile_table[i].fops / (double)function_profile_table[i].tcycles * 100.,
|
(double)function_profile_table[i].fops / (double)function_profile_table[i].tcycles * 100.,
|
||||||
(double)function_profile_table[i].area / (double)function_profile_table[i].cycles
|
(double)function_profile_table[i].area / (double)function_profile_table[i].cycles,
|
||||||
|
function_profile_table[i].cycles
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -53,18 +53,19 @@ dyn : $(LIBDYNNAME)
|
||||||
zip : dll
|
zip : dll
|
||||||
zip $(LIBZIPNAME) $(LIBDLLNAME) $(LIBNAME)
|
zip $(LIBZIPNAME) $(LIBDLLNAME) $(LIBNAME)
|
||||||
|
|
||||||
dll : libgoto2.dll
|
dll : ../$(LIBDLLNAME)
|
||||||
|
#libgoto2.dll
|
||||||
|
|
||||||
dll2 : libgoto2_shared.dll
|
dll2 : libgoto2_shared.dll
|
||||||
|
|
||||||
libgoto2.dll : ../$(LIBNAME) libgoto2.def dllinit.$(SUFFIX)
|
../$(LIBDLLNAME) : ../$(LIBNAME) libgoto2.def dllinit.$(SUFFIX)
|
||||||
$(RANLIB) ../$(LIBNAME)
|
$(RANLIB) ../$(LIBNAME)
|
||||||
ifeq ($(BINARY32), 1)
|
ifeq ($(BINARY32), 1)
|
||||||
$(DLLWRAP) -o $(@F) --def libgoto2.def \
|
$(DLLWRAP) -o ../$(LIBDLLNAME) --def libgoto2.def \
|
||||||
--entry _dllinit@12 -s dllinit.$(SUFFIX) --dllname $(@F) ../$(LIBNAME) $(FEXTRALIB)
|
--entry _dllinit@12 -s dllinit.$(SUFFIX) --dllname $(@F) ../$(LIBNAME) $(FEXTRALIB)
|
||||||
-lib /machine:i386 /def:libgoto2.def
|
-lib /machine:i386 /def:libgoto2.def
|
||||||
else
|
else
|
||||||
$(DLLWRAP) -o $(@F) --def libgoto2.def \
|
$(DLLWRAP) -o ../$(LIBDLLNAME) --def libgoto2.def \
|
||||||
--entry _dllinit -s dllinit.$(SUFFIX) --dllname $(@F) ../$(LIBNAME) $(FEXTRALIB)
|
--entry _dllinit -s dllinit.$(SUFFIX) --dllname $(@F) ../$(LIBNAME) $(FEXTRALIB)
|
||||||
-lib /machine:X64 /def:libgoto2.def
|
-lib /machine:X64 /def:libgoto2.def
|
||||||
endif
|
endif
|
||||||
|
@ -84,7 +85,7 @@ libgoto_hpl.def : gensymbol
|
||||||
perl ./gensymbol win2khpl $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) > $(@F)
|
perl ./gensymbol win2khpl $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) > $(@F)
|
||||||
|
|
||||||
$(LIBDYNNAME) : ../$(LIBNAME) osx.def
|
$(LIBDYNNAME) : ../$(LIBNAME) osx.def
|
||||||
$(PREFIX)gcc $(CFLAGS) -all_load -dynamiclib -o $(LIBDYNNAME) $< -Wl,-exported_symbols_list,osx.def $(FEXTRALIB)
|
$(PREFIX)gcc $(CFLAGS) -all_load -dynamiclib -o ../$(LIBDYNNAME) $< -Wl,-exported_symbols_list,osx.def $(FEXTRALIB)
|
||||||
|
|
||||||
symbol.$(SUFFIX) : symbol.S
|
symbol.$(SUFFIX) : symbol.S
|
||||||
$(CC) $(CFLAGS) -c -o $(@F) $^
|
$(CC) $(CFLAGS) -c -o $(@F) $^
|
||||||
|
|
1
f_check
1
f_check
|
@ -274,6 +274,7 @@ if ($link ne "") {
|
||||||
&& ($flags !~ /kernel32/)
|
&& ($flags !~ /kernel32/)
|
||||||
&& ($flags !~ /advapi32/)
|
&& ($flags !~ /advapi32/)
|
||||||
&& ($flags !~ /shell32/)
|
&& ($flags !~ /shell32/)
|
||||||
|
&& ($flags !~ /^\-l$/)
|
||||||
) {
|
) {
|
||||||
$linker_l .= $flags . " ";
|
$linker_l .= $flags . " ";
|
||||||
}
|
}
|
||||||
|
|
11
getarch.c
11
getarch.c
|
@ -604,30 +604,41 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#ifndef POWER
|
#ifndef POWER
|
||||||
#define POWER
|
#define POWER
|
||||||
#endif
|
#endif
|
||||||
|
#define OPENBLAS_SUPPORTED
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__i386__) || (__x86_64__)
|
#if defined(__i386__) || (__x86_64__)
|
||||||
#include "cpuid_x86.c"
|
#include "cpuid_x86.c"
|
||||||
|
#define OPENBLAS_SUPPORTED
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef __ia64__
|
#ifdef __ia64__
|
||||||
#include "cpuid_ia64.c"
|
#include "cpuid_ia64.c"
|
||||||
|
#define OPENBLAS_SUPPORTED
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef __alpha
|
#ifdef __alpha
|
||||||
#include "cpuid_alpha.c"
|
#include "cpuid_alpha.c"
|
||||||
|
#define OPENBLAS_SUPPORTED
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef POWER
|
#ifdef POWER
|
||||||
#include "cpuid_power.c"
|
#include "cpuid_power.c"
|
||||||
|
#define OPENBLAS_SUPPORTED
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef sparc
|
#ifdef sparc
|
||||||
#include "cpuid_sparc.c"
|
#include "cpuid_sparc.c"
|
||||||
|
#define OPENBLAS_SUPPORTED
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef __mips__
|
#ifdef __mips__
|
||||||
#include "cpuid_mips.c"
|
#include "cpuid_mips.c"
|
||||||
|
#define OPENBLAS_SUPPORTED
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef OPENBLAS_SUPPORTED
|
||||||
|
#error "This arch/CPU is not supported by OpenBLAS."
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
|
@ -30,6 +30,10 @@ int main(int argc, char **argv) {
|
||||||
printf("#define DLOCAL_BUFFER_SIZE\t%ld\n", (DGEMM_DEFAULT_Q * DGEMM_DEFAULT_UNROLL_N * 2 * 1 * sizeof(double)));
|
printf("#define DLOCAL_BUFFER_SIZE\t%ld\n", (DGEMM_DEFAULT_Q * DGEMM_DEFAULT_UNROLL_N * 2 * 1 * sizeof(double)));
|
||||||
printf("#define CLOCAL_BUFFER_SIZE\t%ld\n", (CGEMM_DEFAULT_Q * CGEMM_DEFAULT_UNROLL_N * 4 * 2 * sizeof(float)));
|
printf("#define CLOCAL_BUFFER_SIZE\t%ld\n", (CGEMM_DEFAULT_Q * CGEMM_DEFAULT_UNROLL_N * 4 * 2 * sizeof(float)));
|
||||||
printf("#define ZLOCAL_BUFFER_SIZE\t%ld\n", (ZGEMM_DEFAULT_Q * ZGEMM_DEFAULT_UNROLL_N * 2 * 2 * sizeof(double)));
|
printf("#define ZLOCAL_BUFFER_SIZE\t%ld\n", (ZGEMM_DEFAULT_Q * ZGEMM_DEFAULT_UNROLL_N * 2 * 2 * sizeof(double)));
|
||||||
|
|
||||||
|
#ifdef USE64BITINT
|
||||||
|
printf("#define USE64BITINT\n");
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
|
@ -85,7 +85,11 @@ void CNAME(blasint n, FLOAT alpha, FLOAT *x, blasint incx, FLOAT *y, blasint inc
|
||||||
//In that case, the threads would be dependent.
|
//In that case, the threads would be dependent.
|
||||||
if (incx == 0 || incy == 0)
|
if (incx == 0 || incy == 0)
|
||||||
nthreads = 1;
|
nthreads = 1;
|
||||||
|
|
||||||
|
//Temporarily walk around the low performance issue with small imput size & multithreads.
|
||||||
|
if (n <= 10000)
|
||||||
|
nthreads = 1;
|
||||||
|
|
||||||
if (nthreads == 1) {
|
if (nthreads == 1) {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -49,6 +49,7 @@ double NAME(blasint *N, float *x, blasint *INCX, float *y, blasint *INCY){
|
||||||
BLASLONG n = *N;
|
BLASLONG n = *N;
|
||||||
BLASLONG incx = *INCX;
|
BLASLONG incx = *INCX;
|
||||||
BLASLONG incy = *INCY;
|
BLASLONG incy = *INCY;
|
||||||
|
double ret = 0.0;
|
||||||
|
|
||||||
PRINT_DEBUG_NAME;
|
PRINT_DEBUG_NAME;
|
||||||
|
|
||||||
|
@ -61,19 +62,21 @@ double NAME(blasint *N, float *x, blasint *INCX, float *y, blasint *INCY){
|
||||||
if (incx < 0) x -= (n - 1) * incx;
|
if (incx < 0) x -= (n - 1) * incx;
|
||||||
if (incy < 0) y -= (n - 1) * incy;
|
if (incy < 0) y -= (n - 1) * incy;
|
||||||
|
|
||||||
return DSDOT_K(n, x, incx, y, incy);
|
ret=DSDOT_K(n, x, incx, y, incy);
|
||||||
|
|
||||||
FUNCTION_PROFILE_END(1, n, n);
|
FUNCTION_PROFILE_END(1, n, n);
|
||||||
|
|
||||||
IDEBUG_END;
|
IDEBUG_END;
|
||||||
|
|
||||||
return 0;
|
return ret;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
double CNAME(blasint n, float *x, blasint incx, float *y, blasint incy){
|
double CNAME(blasint n, float *x, blasint incx, float *y, blasint incy){
|
||||||
|
|
||||||
|
double ret = 0.0;
|
||||||
|
|
||||||
PRINT_DEBUG_CNAME;
|
PRINT_DEBUG_CNAME;
|
||||||
|
|
||||||
|
@ -86,13 +89,13 @@ double CNAME(blasint n, float *x, blasint incx, float *y, blasint incy){
|
||||||
if (incx < 0) x -= (n - 1) * incx;
|
if (incx < 0) x -= (n - 1) * incx;
|
||||||
if (incy < 0) y -= (n - 1) * incy;
|
if (incy < 0) y -= (n - 1) * incy;
|
||||||
|
|
||||||
return DSDOT_K(n, x, incx, y, incy);
|
ret=DSDOT_K(n, x, incx, y, incy);
|
||||||
|
|
||||||
FUNCTION_PROFILE_END(1, n, n);
|
FUNCTION_PROFILE_END(1, n, n);
|
||||||
|
|
||||||
IDEBUG_END;
|
IDEBUG_END;
|
||||||
|
|
||||||
return 0;
|
return ret;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -7,6 +7,12 @@
|
||||||
#define GAMSQ 16777216.e0
|
#define GAMSQ 16777216.e0
|
||||||
#define RGAMSQ 5.9604645e-8
|
#define RGAMSQ 5.9604645e-8
|
||||||
|
|
||||||
|
#ifdef DOUBLE
|
||||||
|
#define ABS(x) fabs(x)
|
||||||
|
#else
|
||||||
|
#define ABS(x) fabsf(x)
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifndef CBLAS
|
#ifndef CBLAS
|
||||||
|
|
||||||
void NAME(FLOAT *dd1, FLOAT *dd2, FLOAT *dx1, FLOAT *DY1, FLOAT *dparam){
|
void NAME(FLOAT *dd1, FLOAT *dd2, FLOAT *dx1, FLOAT *DY1, FLOAT *dparam){
|
||||||
|
@ -47,7 +53,7 @@ void CNAME(FLOAT *dd1, FLOAT *dd2, FLOAT *dx1, FLOAT dy1, FLOAT *dparam){
|
||||||
dq2 = dp2 * dy1;
|
dq2 = dp2 * dy1;
|
||||||
dq1 = dp1 * *dx1;
|
dq1 = dp1 * *dx1;
|
||||||
|
|
||||||
if (! (abs(dq1) > abs(dq2))) goto L40;
|
if (! (ABS(dq1) > ABS(dq2))) goto L40;
|
||||||
|
|
||||||
dh21 = -(dy1) / *dx1;
|
dh21 = -(dy1) / *dx1;
|
||||||
dh12 = dp2 / dp1;
|
dh12 = dp2 / dp1;
|
||||||
|
@ -140,7 +146,7 @@ L150:
|
||||||
goto L130;
|
goto L130;
|
||||||
|
|
||||||
L160:
|
L160:
|
||||||
if (! (abs(*dd2) <= RGAMSQ)) {
|
if (! (ABS(*dd2) <= RGAMSQ)) {
|
||||||
goto L190;
|
goto L190;
|
||||||
}
|
}
|
||||||
if (*dd2 == ZERO) {
|
if (*dd2 == ZERO) {
|
||||||
|
@ -157,7 +163,7 @@ L180:
|
||||||
goto L160;
|
goto L160;
|
||||||
|
|
||||||
L190:
|
L190:
|
||||||
if (! (abs(*dd2) >= GAMSQ)) {
|
if (! (ABS(*dd2) >= GAMSQ)) {
|
||||||
goto L220;
|
goto L220;
|
||||||
}
|
}
|
||||||
igo = 3;
|
igo = 3;
|
||||||
|
|
|
@ -53,6 +53,11 @@ SBLASOBJS += setparam$(TSUFFIX).$(SUFFIX)
|
||||||
CCOMMON_OPT += -DTS=$(TSUFFIX)
|
CCOMMON_OPT += -DTS=$(TSUFFIX)
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
KERNEL_INTERFACE = ../common_level1.h ../common_level2.h ../common_level3.h
|
||||||
|
ifneq ($(NO_LAPACK), 1)
|
||||||
|
KERNEL_INTERFACE += ../common_lapack.h
|
||||||
|
endif
|
||||||
|
|
||||||
ifeq ($(ARCH), x86)
|
ifeq ($(ARCH), x86)
|
||||||
COMMONOBJS += cpuid.$(SUFFIX)
|
COMMONOBJS += cpuid.$(SUFFIX)
|
||||||
endif
|
endif
|
||||||
|
@ -88,9 +93,10 @@ setparam$(TSUFFIX).$(SUFFIX): setparam$(TSUFFIX).c kernel$(TSUFFIX).h
|
||||||
setparam$(TSUFFIX).c : setparam-ref.c
|
setparam$(TSUFFIX).c : setparam-ref.c
|
||||||
sed 's/TS/$(TSUFFIX)/g' $< > $(@F)
|
sed 's/TS/$(TSUFFIX)/g' $< > $(@F)
|
||||||
|
|
||||||
kernel$(TSUFFIX).h : ../common_level1.h ../common_level2.h ../common_level3.h ../common_lapack.h
|
kernel$(TSUFFIX).h : $(KERNEL_INTERFACE)
|
||||||
sed 's/\ *(/$(TSUFFIX)(/g' $^ > $(@F)
|
sed 's/\ *(/$(TSUFFIX)(/g' $^ > $(@F)
|
||||||
|
|
||||||
|
|
||||||
cpuid.$(SUFFIX): $(KERNELDIR)/cpuid.S
|
cpuid.$(SUFFIX): $(KERNELDIR)/cpuid.S
|
||||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||||
|
|
||||||
|
@ -112,10 +118,10 @@ lsame.$(PSUFFIX): $(KERNELDIR)/$(LSAME_KERNEL)
|
||||||
cpuid.$(PSUFFIX): $(KERNELDIR)/cpuid.S
|
cpuid.$(PSUFFIX): $(KERNELDIR)/cpuid.S
|
||||||
$(CC) -c $(PFLAGS) $< -o $(@F)
|
$(CC) -c $(PFLAGS) $< -o $(@F)
|
||||||
|
|
||||||
ifdef DYNAMIC_ARCH
|
#ifdef DYNAMIC_ARCH
|
||||||
clean ::
|
clean ::
|
||||||
@rm -f setparam_*.c kernel_*.h setparam.h kernel.h
|
@rm -f setparam_*.c kernel_*.h setparam.h kernel.h
|
||||||
|
|
||||||
endif
|
#endif
|
||||||
|
|
||||||
include $(TOPDIR)/Makefile.tail
|
include $(TOPDIR)/Makefile.tail
|
||||||
|
|
|
@ -668,7 +668,7 @@ $(KDIR)qdot_k$(TSUFFIX).$(SUFFIX) $(KDIR)qdot_k$(TPSUFFIX).$(PSUFFIX) : $(KERNEL
|
||||||
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE $< -o $@
|
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE $< -o $@
|
||||||
|
|
||||||
$(KDIR)dsdot_k$(TSUFFIX).$(SUFFIX) $(KDIR)dsdot_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SDOTKERNEL)
|
$(KDIR)dsdot_k$(TSUFFIX).$(SUFFIX) $(KDIR)dsdot_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SDOTKERNEL)
|
||||||
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE $< -o $@
|
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DDSDOT $< -o $@
|
||||||
|
|
||||||
$(KDIR)sdot_k$(TSUFFIX).$(SUFFIX) $(KDIR)sdot_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SDOTKERNEL)
|
$(KDIR)sdot_k$(TSUFFIX).$(SUFFIX) $(KDIR)sdot_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SDOTKERNEL)
|
||||||
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE $< -o $@
|
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE $< -o $@
|
||||||
|
|
|
@ -300,7 +300,11 @@
|
||||||
.align 3
|
.align 3
|
||||||
|
|
||||||
.L999:
|
.L999:
|
||||||
j $31
|
|
||||||
ADD s1, s1, s2
|
ADD s1, s1, s2
|
||||||
|
#ifdef DSDOT
|
||||||
|
cvt.d.s s1, s1
|
||||||
|
#endif
|
||||||
|
j $31
|
||||||
|
NOP
|
||||||
|
|
||||||
EPILOGUE
|
EPILOGUE
|
||||||
|
|
|
@ -101,7 +101,11 @@ gotoblas_t TABLE_NAME = {
|
||||||
#endif
|
#endif
|
||||||
ssymm_outcopyTS, ssymm_oltcopyTS,
|
ssymm_outcopyTS, ssymm_oltcopyTS,
|
||||||
|
|
||||||
|
#ifndef NO_LAPACK
|
||||||
sneg_tcopyTS, slaswp_ncopyTS,
|
sneg_tcopyTS, slaswp_ncopyTS,
|
||||||
|
#else
|
||||||
|
NULL,NULL,
|
||||||
|
#endif
|
||||||
|
|
||||||
0, 0, 0,
|
0, 0, 0,
|
||||||
DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N, MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N),
|
DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N, MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N),
|
||||||
|
@ -147,7 +151,11 @@ gotoblas_t TABLE_NAME = {
|
||||||
#endif
|
#endif
|
||||||
dsymm_outcopyTS, dsymm_oltcopyTS,
|
dsymm_outcopyTS, dsymm_oltcopyTS,
|
||||||
|
|
||||||
|
#ifndef NO_LAPACK
|
||||||
dneg_tcopyTS, dlaswp_ncopyTS,
|
dneg_tcopyTS, dlaswp_ncopyTS,
|
||||||
|
#else
|
||||||
|
NULL, NULL,
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef EXPRECISION
|
#ifdef EXPRECISION
|
||||||
|
|
||||||
|
@ -195,7 +203,11 @@ gotoblas_t TABLE_NAME = {
|
||||||
#endif
|
#endif
|
||||||
qsymm_outcopyTS, qsymm_oltcopyTS,
|
qsymm_outcopyTS, qsymm_oltcopyTS,
|
||||||
|
|
||||||
|
#ifndef NO_LAPACK
|
||||||
qneg_tcopyTS, qlaswp_ncopyTS,
|
qneg_tcopyTS, qlaswp_ncopyTS,
|
||||||
|
#else
|
||||||
|
NULL, NULL,
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -286,7 +298,11 @@ gotoblas_t TABLE_NAME = {
|
||||||
chemm3m_oucopyrTS, chemm3m_olcopyrTS,
|
chemm3m_oucopyrTS, chemm3m_olcopyrTS,
|
||||||
chemm3m_oucopyiTS, chemm3m_olcopyiTS,
|
chemm3m_oucopyiTS, chemm3m_olcopyiTS,
|
||||||
|
|
||||||
|
#ifndef NO_LAPACK
|
||||||
cneg_tcopyTS, claswp_ncopyTS,
|
cneg_tcopyTS, claswp_ncopyTS,
|
||||||
|
#else
|
||||||
|
NULL, NULL,
|
||||||
|
#endif
|
||||||
|
|
||||||
0, 0, 0,
|
0, 0, 0,
|
||||||
ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N, MAX(ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N),
|
ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N, MAX(ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N),
|
||||||
|
@ -375,7 +391,11 @@ gotoblas_t TABLE_NAME = {
|
||||||
zhemm3m_oucopyrTS, zhemm3m_olcopyrTS,
|
zhemm3m_oucopyrTS, zhemm3m_olcopyrTS,
|
||||||
zhemm3m_oucopyiTS, zhemm3m_olcopyiTS,
|
zhemm3m_oucopyiTS, zhemm3m_olcopyiTS,
|
||||||
|
|
||||||
|
#ifndef NO_LAPACK
|
||||||
zneg_tcopyTS, zlaswp_ncopyTS,
|
zneg_tcopyTS, zlaswp_ncopyTS,
|
||||||
|
#else
|
||||||
|
NULL, NULL,
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef EXPRECISION
|
#ifdef EXPRECISION
|
||||||
|
|
||||||
|
@ -466,7 +486,11 @@ gotoblas_t TABLE_NAME = {
|
||||||
xhemm3m_oucopyrTS, xhemm3m_olcopyrTS,
|
xhemm3m_oucopyrTS, xhemm3m_olcopyrTS,
|
||||||
xhemm3m_oucopyiTS, xhemm3m_olcopyiTS,
|
xhemm3m_oucopyiTS, xhemm3m_olcopyiTS,
|
||||||
|
|
||||||
|
#ifndef NO_LAPACK
|
||||||
xneg_tcopyTS, xlaswp_ncopyTS,
|
xneg_tcopyTS, xlaswp_ncopyTS,
|
||||||
|
#else
|
||||||
|
NULL, NULL,
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -1541,5 +1541,8 @@
|
||||||
popl %ebx
|
popl %ebx
|
||||||
popl %esi
|
popl %esi
|
||||||
popl %edi
|
popl %edi
|
||||||
|
/*remove the hidden return value address from the stack.*/
|
||||||
|
popl %ecx
|
||||||
|
xchgl %ecx, 0(%esp)
|
||||||
ret
|
ret
|
||||||
EPILOGUE
|
EPILOGUE
|
||||||
|
|
|
@ -1286,6 +1286,10 @@
|
||||||
haddps %xmm0, %xmm0
|
haddps %xmm0, %xmm0
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef DSDOT
|
||||||
|
cvtss2sd %xmm0, %xmm0
|
||||||
|
#endif
|
||||||
|
|
||||||
RESTOREREGISTERS
|
RESTOREREGISTERS
|
||||||
|
|
||||||
ret
|
ret
|
||||||
|
|
|
@ -544,7 +544,7 @@
|
||||||
jg .L11
|
jg .L11
|
||||||
|
|
||||||
#if defined(TRMMKERNEL) && !defined(LEFT)
|
#if defined(TRMMKERNEL) && !defined(LEFT)
|
||||||
addq $1, KK
|
addq $4, KK
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
leaq (C, LDC, 4), C
|
leaq (C, LDC, 4), C
|
||||||
|
@ -594,7 +594,7 @@
|
||||||
jg .L11
|
jg .L11
|
||||||
|
|
||||||
#if defined(TRMMKERNEL) && !defined(LEFT)
|
#if defined(TRMMKERNEL) && !defined(LEFT)
|
||||||
addq $1, KK
|
addq $4, KK
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
leaq (C, LDC, 4), C
|
leaq (C, LDC, 4), C
|
||||||
|
|
|
@ -0,0 +1,21 @@
|
||||||
|
/*This is only for "make install" target.*/
|
||||||
|
|
||||||
|
#ifdef NEEDBUNDERSCORE
|
||||||
|
#define BLASFUNC(FUNC) FUNC##_
|
||||||
|
#else
|
||||||
|
#define BLASFUNC(FUNC) FUNC
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(OS_WINDOWS) && defined(__64BIT__)
|
||||||
|
typedef long long BLASLONG;
|
||||||
|
typedef unsigned long long BLASULONG;
|
||||||
|
#else
|
||||||
|
typedef long BLASLONG;
|
||||||
|
typedef unsigned long BLASULONG;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef USE64BITINT
|
||||||
|
typedef BLASLONG blasint;
|
||||||
|
#else
|
||||||
|
typedef int blasint;
|
||||||
|
#endif
|
|
@ -128,6 +128,8 @@ CBLASOBJS = $(CBLAS1OBJS) $(CBLAS2OBJS) $(CBLAS3OBJS)
|
||||||
ZBLASOBJS = $(ZBLAS1OBJS) $(ZBLAS2OBJS) $(ZBLAS3OBJS)
|
ZBLASOBJS = $(ZBLAS1OBJS) $(ZBLAS2OBJS) $(ZBLAS3OBJS)
|
||||||
XBLASOBJS = $(XBLAS1OBJS) $(XBLAS2OBJS) $(XBLAS3OBJS)
|
XBLASOBJS = $(XBLAS1OBJS) $(XBLAS2OBJS) $(XBLAS3OBJS)
|
||||||
|
|
||||||
|
ifneq ($(NO_LAPACK), 1)
|
||||||
|
|
||||||
SBLASOBJS += \
|
SBLASOBJS += \
|
||||||
sgetf2f.$(SUFFIX) sgetrff.$(SUFFIX) slauu2f.$(SUFFIX) slauumf.$(SUFFIX) \
|
sgetf2f.$(SUFFIX) sgetrff.$(SUFFIX) slauu2f.$(SUFFIX) slauumf.$(SUFFIX) \
|
||||||
spotf2f.$(SUFFIX) spotrff.$(SUFFIX) strti2f.$(SUFFIX) strtrif.$(SUFFIX) \
|
spotf2f.$(SUFFIX) spotrff.$(SUFFIX) strti2f.$(SUFFIX) strtrif.$(SUFFIX) \
|
||||||
|
@ -160,6 +162,7 @@ XBLASOBJS +=
|
||||||
xpotf2f.$(SUFFIX) xpotrff.$(SUFFIX) xtrti2f.$(SUFFIX) xtrtrif.$(SUFFIX) \
|
xpotf2f.$(SUFFIX) xpotrff.$(SUFFIX) xtrti2f.$(SUFFIX) xtrtrif.$(SUFFIX) \
|
||||||
xlaswpf.$(SUFFIX) xgetrsf.$(SUFFIX) xgesvf.$(SUFFIX) xpotrif.$(SUFFIX) \
|
xlaswpf.$(SUFFIX) xgetrsf.$(SUFFIX) xgesvf.$(SUFFIX) xpotrif.$(SUFFIX) \
|
||||||
|
|
||||||
|
endif
|
||||||
|
|
||||||
include $(TOPDIR)/Makefile.tail
|
include $(TOPDIR)/Makefile.tail
|
||||||
|
|
||||||
|
|
|
@ -5,12 +5,12 @@ include $(TOPDIR)/Makefile.system
|
||||||
TARGET=openblas_utest
|
TARGET=openblas_utest
|
||||||
CUNIT_LIB=/usr/local/lib/libcunit.a
|
CUNIT_LIB=/usr/local/lib/libcunit.a
|
||||||
|
|
||||||
OBJS=main.o test_rot.o test_swap.o test_axpy.o test_dotu.o
|
OBJS=main.o test_rot.o test_swap.o test_axpy.o test_dotu.o test_rotmg.o test_dsdot.o
|
||||||
|
|
||||||
all : run_test
|
all : run_test
|
||||||
|
|
||||||
$(TARGET): $(OBJS)
|
$(TARGET): $(OBJS)
|
||||||
$(CC) -o $@ $^ ../$(LIBNAME) $(CUNIT_LIB) $(EXTRALIB)
|
$(FC) -o $@ $^ ../$(LIBNAME) $(CUNIT_LIB) $(EXTRALIB)
|
||||||
|
|
||||||
run_test: $(TARGET)
|
run_test: $(TARGET)
|
||||||
./$(TARGET)
|
./$(TARGET)
|
||||||
|
|
|
@ -57,4 +57,8 @@ void test_caxpy_inc_0(void);
|
||||||
void test_zdotu_n_1(void);
|
void test_zdotu_n_1(void);
|
||||||
void test_zdotu_offset_1(void);
|
void test_zdotu_offset_1(void);
|
||||||
|
|
||||||
|
void test_drotmg(void);
|
||||||
|
|
||||||
|
void test_dsdot_n_1(void);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -54,7 +54,10 @@ CU_TestInfo test_level1[]={
|
||||||
|
|
||||||
{"Testing zdotu with n == 1",test_zdotu_n_1},
|
{"Testing zdotu with n == 1",test_zdotu_n_1},
|
||||||
{"Testing zdotu with input x & y offset == 1",test_zdotu_offset_1},
|
{"Testing zdotu with input x & y offset == 1",test_zdotu_offset_1},
|
||||||
|
|
||||||
|
{"Testing drotmg",test_drotmg},
|
||||||
|
|
||||||
|
{"Testing dsdot with n == 1",test_dsdot_n_1},
|
||||||
CU_TEST_INFO_NULL,
|
CU_TEST_INFO_NULL,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,50 @@
|
||||||
|
/*****************************************************************************
|
||||||
|
Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
|
||||||
|
1. Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in
|
||||||
|
the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
3. Neither the name of the ISCAS nor the names of its contributors may
|
||||||
|
be used to endorse or promote products derived from this software
|
||||||
|
without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||||
|
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||||
|
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
**********************************************************************************/
|
||||||
|
|
||||||
|
#include "common_utest.h"
|
||||||
|
|
||||||
|
void test_dsdot_n_1()
|
||||||
|
{
|
||||||
|
float x= 0.172555164;
|
||||||
|
float y= -0.0138700781;
|
||||||
|
int incx=1;
|
||||||
|
int incy=1;
|
||||||
|
int n=1;
|
||||||
|
|
||||||
|
double res1=0.0f, res2=0.0f;
|
||||||
|
|
||||||
|
res1=BLASFUNC(dsdot)(&n, &x, &incx, &y, &incy);
|
||||||
|
res2=BLASFUNC_REF(dsdot)(&n, &x, &incx, &y, &incy);
|
||||||
|
|
||||||
|
CU_ASSERT_DOUBLE_EQUAL(res1, res2, CHECK_EPS);
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,60 @@
|
||||||
|
/*****************************************************************************
|
||||||
|
Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
|
||||||
|
1. Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in
|
||||||
|
the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
3. Neither the name of the ISCAS nor the names of its contributors may
|
||||||
|
be used to endorse or promote products derived from this software
|
||||||
|
without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||||
|
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||||
|
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
**********************************************************************************/
|
||||||
|
|
||||||
|
#include "common_utest.h"
|
||||||
|
|
||||||
|
void test_drotmg()
|
||||||
|
{
|
||||||
|
double te_d1, tr_d1;
|
||||||
|
double te_d2, tr_d2;
|
||||||
|
double te_x1, tr_x1;
|
||||||
|
double te_y1, tr_y1;
|
||||||
|
double te_param[5],tr_param[5];
|
||||||
|
int i=0;
|
||||||
|
te_d1= tr_d1=0.21149573940783739;
|
||||||
|
te_d2= tr_d2=0.046892057172954082;
|
||||||
|
te_x1= tr_x1=-0.42272687517106533;
|
||||||
|
te_y1= tr_y1=0.42211309121921659;
|
||||||
|
//OpenBLAS
|
||||||
|
BLASFUNC(drotmg)(&te_d1, &te_d2, &te_x1, &te_y1, te_param);
|
||||||
|
//reference
|
||||||
|
BLASFUNC_REF(drotmg)(&tr_d1, &tr_d2, &tr_x1, &tr_y1, tr_param);
|
||||||
|
|
||||||
|
CU_ASSERT_DOUBLE_EQUAL(te_d1, tr_d1, CHECK_EPS);
|
||||||
|
CU_ASSERT_DOUBLE_EQUAL(te_d2, tr_d2, CHECK_EPS);
|
||||||
|
CU_ASSERT_DOUBLE_EQUAL(te_x1, tr_x1, CHECK_EPS);
|
||||||
|
CU_ASSERT_DOUBLE_EQUAL(te_y1, tr_y1, CHECK_EPS);
|
||||||
|
|
||||||
|
for(i=0; i<5; i++){
|
||||||
|
CU_ASSERT_DOUBLE_EQUAL(te_param[i], tr_param[i], CHECK_EPS);
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue