Merge branch 'loongson3a' of github.com:xianyi/OpenBLAS into loongson3a
This commit is contained in:
commit
68532fa9ec
|
@ -1,8 +1,13 @@
|
|||
*.obj
|
||||
*.lib
|
||||
*.dll
|
||||
*.def
|
||||
*.o
|
||||
lapack-3.1.1
|
||||
lapack-3.1.1.tgz
|
||||
*.so
|
||||
*.a
|
||||
.svn
|
||||
*~
|
||||
config.h
|
||||
Makefile.conf
|
||||
|
|
|
@ -1,13 +1,40 @@
|
|||
OpenBLAS ChangeLog
|
||||
====================================================================
|
||||
Version 0.1 alpha2(in development)
|
||||
Version 0.1 alpha2
|
||||
23-Jun-2011
|
||||
|
||||
common:
|
||||
*
|
||||
* Fixed blasint undefined bug in <cblas.h> file. Other software
|
||||
could include this header successfully(Refs issue #13 on github)
|
||||
* Fixed the SEGFAULT bug on 64 cores. On SMP server, the number
|
||||
of CPUs or cores should be less than or equal to 64.(Refs issue #14
|
||||
on github)
|
||||
* Support "void goto_set_num_threads(int num_threads)" and "void
|
||||
openblas_set_num_threads(int num_threads)" when USE_OPENMP=1
|
||||
* Added extern "C" to support C++. Thank Tasio for the patch(Refs
|
||||
issue #21 on github)
|
||||
* Provided an error message when the arch is not supported.(Refs
|
||||
issue #19 on github)
|
||||
* Fixed issue #23. Fixed a bug of f_check script about generating link flags.
|
||||
* Added openblas_set_num_threads for Fortran.
|
||||
* Fixed #25 a wrong result of rotmg.
|
||||
* Fixed a bug about detecting underscore prefix in c_check.
|
||||
* Print the wall time (cycles) with enabling FUNCTION_PROFILE
|
||||
* Fixed #35 a build bug with NO_LAPACK=1 & DYNAMIC_ARCH=1
|
||||
* Added install target. You can use "make install". (Refs #20)
|
||||
|
||||
|
||||
x86/x86_64:
|
||||
*
|
||||
* Fixed #28 a wrong result of dsdot on x86_64.
|
||||
* Fixed #32 a SEGFAULT bug of zdotc with gcc-4.6.
|
||||
* Fixed #33 ztrmm bug on Nehalem.
|
||||
* Walk round #27 the low performance axpy issue with small imput size & multithreads.
|
||||
|
||||
MIPS64:
|
||||
*
|
||||
* Fixed #28 a wrong result of dsdot on Loongson3A/MIPS64.
|
||||
* Optimized single/double precision BLAS Level3 on Loongson3A/MIPS64. (Refs #2)
|
||||
* Optimized single/double precision axpy function on Loongson3A/MIPS64. (Refs #3)
|
||||
|
||||
====================================================================
|
||||
Version 0.1 alpha1
|
||||
20-Mar-2011
|
||||
|
|
26
Makefile
26
Makefile
|
@ -15,6 +15,10 @@ ifdef SANITY_CHECK
|
|||
BLASDIRS += reference
|
||||
endif
|
||||
|
||||
ifndef PREFIX
|
||||
PREFIX = /opt/OpenBLAS
|
||||
endif
|
||||
|
||||
SUBDIRS = $(BLASDIRS)
|
||||
ifneq ($(NO_LAPACK), 1)
|
||||
SUBDIRS += lapack
|
||||
|
@ -22,8 +26,8 @@ endif
|
|||
|
||||
SUBDIRS_ALL = $(SUBDIRS) test ctest utest exports benchmark ../laswp ../bench
|
||||
|
||||
.PHONY : all libs netlib test ctest shared
|
||||
.NOTPARALLEL : all libs prof lapack-test
|
||||
.PHONY : all libs netlib test ctest shared install
|
||||
.NOTPARALLEL : all libs prof lapack-test install
|
||||
|
||||
all :: libs netlib tests shared
|
||||
@echo
|
||||
|
@ -70,7 +74,7 @@ ifeq ($(OSNAME), Darwin)
|
|||
endif
|
||||
ifeq ($(OSNAME), WINNT)
|
||||
$(MAKE) -C exports dll
|
||||
# -ln -fs $(LIBDLLNAME) libopenblas.dll
|
||||
-ln -fs $(LIBDLLNAME) libopenblas.dll
|
||||
endif
|
||||
ifeq ($(OSNAME), CYGWIN_NT)
|
||||
$(MAKE) -C exports dll
|
||||
|
@ -96,18 +100,26 @@ endif
|
|||
endif
|
||||
|
||||
libs :
|
||||
ifeq ($(CORE), UNKOWN)
|
||||
$(error OpenBLAS: Detecting CPU failed. Please set TARGET explicitly, e.g. make TARGET=your_cpu_target. Please read README for the detail.)
|
||||
endif
|
||||
-ln -fs $(LIBNAME) libopenblas.$(LIBSUFFIX)
|
||||
for d in $(SUBDIRS) ; \
|
||||
do if test -d $$d; then \
|
||||
$(MAKE) -C $$d $(@F) || exit 1 ; \
|
||||
fi; \
|
||||
done
|
||||
#Save the config files for installation
|
||||
cp Makefile.conf Makefile.conf_last
|
||||
cp config.h config_last.h
|
||||
ifdef DYNAMIC_ARCH
|
||||
$(MAKE) -C kernel commonlibs || exit 1
|
||||
for d in $(DYNAMIC_CORE) ; \
|
||||
do $(MAKE) GOTOBLAS_MAKEFILE= -C kernel TARGET_CORE=$$d kernel || exit 1 ;\
|
||||
done
|
||||
echo DYNAMIC_ARCH=1 >> Makefile.conf_last
|
||||
endif
|
||||
touch lib.grd
|
||||
|
||||
prof : prof_blas prof_lapack
|
||||
|
||||
|
@ -227,19 +239,23 @@ lapack-test :
|
|||
|
||||
dummy :
|
||||
|
||||
install :
|
||||
$(MAKE) -f Makefile.install install
|
||||
|
||||
clean ::
|
||||
@for d in $(SUBDIRS_ALL) ; \
|
||||
do if test -d $$d; then \
|
||||
$(MAKE) -C $$d $(@F) || exit 1 ; \
|
||||
fi; \
|
||||
done
|
||||
ifdef DYNAMIC_ARCH
|
||||
#ifdef DYNAMIC_ARCH
|
||||
@$(MAKE) -C kernel clean
|
||||
endif
|
||||
#endif
|
||||
@rm -f *.$(LIBSUFFIX) *.so *~ *.exe getarch getarch_2nd *.dll *.lib *.$(SUFFIX) *.dwf libopenblas.$(LIBSUFFIX) libopenblas_p.$(LIBSUFFIX) *.lnk myconfig.h
|
||||
@rm -f Makefile.conf config.h Makefile_kernel.conf config_kernel.h st* *.dylib
|
||||
@if test -d lapack-3.1.1; then \
|
||||
echo deleting lapack-3.1.1; \
|
||||
rm -rf lapack-3.1.1 ;\
|
||||
fi
|
||||
@rm -f *.grd Makefile.conf_last config_last.h
|
||||
@echo Done.
|
|
@ -0,0 +1,65 @@
|
|||
TOPDIR = .
|
||||
export GOTOBLAS_MAKEFILE = 1
|
||||
-include $(TOPDIR)/Makefile.conf_last
|
||||
include ./Makefile.system
|
||||
|
||||
.PHONY : install
|
||||
.NOTPARALLEL : install
|
||||
|
||||
lib.grd :
|
||||
$(error OpenBLAS: Please run "make" firstly)
|
||||
|
||||
install : lib.grd
|
||||
@-mkdir -p $(PREFIX)
|
||||
@echo Generating openblas_config.h in $(PREFIX)
|
||||
#for inc
|
||||
@echo \#ifndef OPENBLAS_CONFIG_H > $(PREFIX)/openblas_config.h
|
||||
@echo \#define OPENBLAS_CONFIG_H >> $(PREFIX)/openblas_config.h
|
||||
@cat config_last.h >> $(PREFIX)/openblas_config.h
|
||||
@echo \#define VERSION \" OpenBLAS $(VERSION) \" >> $(PREFIX)/openblas_config.h
|
||||
@cat openblas_config_template.h >> $(PREFIX)/openblas_config.h
|
||||
@echo \#endif >> $(PREFIX)/openblas_config.h
|
||||
|
||||
@echo Generating f77blas.h in $(PREFIX)
|
||||
@echo \#ifndef OPENBLAS_F77BLAS_H > $(PREFIX)/f77blas.h
|
||||
@echo \#define OPENBLAS_F77BLAS_H >> $(PREFIX)/f77blas.h
|
||||
@echo \#include \"openblas_config.h\" >> $(PREFIX)/f77blas.h
|
||||
@cat common_interface.h >> $(PREFIX)/f77blas.h
|
||||
@echo \#endif >> $(PREFIX)/f77blas.h
|
||||
|
||||
@echo Generating cblas.h in $(PREFIX)
|
||||
@sed 's/common/openblas_config/g' cblas.h > $(PREFIX)/cblas.h
|
||||
|
||||
#for install static library
|
||||
@echo Copy the static library to $(PREFIX)
|
||||
@cp $(LIBNAME) $(PREFIX)
|
||||
@-ln -fs $(PREFIX)/$(LIBNAME) $(PREFIX)/libopenblas.$(LIBSUFFIX)
|
||||
#for install shared library
|
||||
@echo Copy the shared library to $(PREFIX)
|
||||
ifeq ($(OSNAME), Linux)
|
||||
-cp $(LIBSONAME) $(PREFIX)
|
||||
-ln -fs $(PREFIX)/$(LIBSONAME) $(PREFIX)/libopenblas.so
|
||||
endif
|
||||
ifeq ($(OSNAME), FreeBSD)
|
||||
-cp $(LIBSONAME) $(PREFIX)
|
||||
-ln -fs $(PREFIX)/$(LIBSONAME) $(PREFIX)/libopenblas.so
|
||||
endif
|
||||
ifeq ($(OSNAME), NetBSD)
|
||||
-cp $(LIBSONAME) $(PREFIX)
|
||||
-ln -fs $(PREFIX)/$(LIBSONAME) $(PREFIX)/libopenblas.so
|
||||
endif
|
||||
ifeq ($(OSNAME), Darwin)
|
||||
-cp $(LIBDYNNAME) $(PREFIX)
|
||||
-ln -fs $(PREFIX)/$(LIBDYNNAME) $(PREFIX)/libopenblas.dylib
|
||||
endif
|
||||
ifeq ($(OSNAME), WINNT)
|
||||
-cp $(LIBDLLNAME) $(PREFIX)
|
||||
-ln -fs $(PREFIX)/$(LIBDLLNAME) $(PREFIX)/libopenblas.dll
|
||||
endif
|
||||
ifeq ($(OSNAME), CYGWIN_NT)
|
||||
-cp $(LIBDLLNAME) $(PREFIX)
|
||||
-ln -fs $(PREFIX)/$(LIBDLLNAME) $(PREFIX)/libopenblas.dll
|
||||
endif
|
||||
|
||||
@echo Install OK!
|
||||
|
|
@ -91,6 +91,9 @@ VERSION = 0.1alpha2
|
|||
# SANITY_CHECK to compare the result with reference BLAS.
|
||||
# UTEST_CHECK = 1
|
||||
|
||||
# The installation directory.
|
||||
# PREFIX = /opt/OpenBLAS
|
||||
|
||||
# Common Optimization Flag; -O2 is enough.
|
||||
# DEBUG = 1
|
||||
|
||||
|
|
|
@ -30,6 +30,10 @@ ifdef TARGET
|
|||
GETARCH_FLAGS += -DFORCE_$(TARGET)
|
||||
endif
|
||||
|
||||
ifdef INTERFACE64
|
||||
GETARCH_FLAGS += -DUSE64BITINT
|
||||
endif
|
||||
|
||||
# This operation is expensive, so execution should be once.
|
||||
ifndef GOTOBLAS_MAKEFILE
|
||||
export GOTOBLAS_MAKEFILE = 1
|
||||
|
@ -185,7 +189,7 @@ ifeq ($(C_COMPILER), INTEL)
|
|||
CCOMMON_OPT += -wd981
|
||||
endif
|
||||
|
||||
ifdef USE_OPENMP
|
||||
ifeq ($(USE_OPENMP), 1)
|
||||
ifeq ($(C_COMPILER), GCC)
|
||||
CCOMMON_OPT += -fopenmp
|
||||
endif
|
||||
|
@ -489,7 +493,8 @@ endif
|
|||
|
||||
ifdef BINARY64
|
||||
ifdef INTERFACE64
|
||||
CCOMMON_OPT += -DUSE64BITINT
|
||||
CCOMMON_OPT +=
|
||||
#-DUSE64BITINT
|
||||
endif
|
||||
endif
|
||||
|
||||
|
@ -510,6 +515,10 @@ ifeq ($(DYNAMIC_ARCH), 1)
|
|||
CCOMMON_OPT += -DDYNAMIC_ARCH
|
||||
endif
|
||||
|
||||
ifeq ($(NO_LAPACK), 1)
|
||||
CCOMMON_OPT += -DNO_LAPACK
|
||||
endif
|
||||
|
||||
ifdef SMP
|
||||
CCOMMON_OPT += -DSMP_SERVER
|
||||
|
||||
|
|
30
README
30
README
|
@ -8,7 +8,9 @@ Download from project homepage. http://xianyi.github.com/OpenBLAS/
|
|||
Or,
|
||||
check out codes from git://github.com/xianyi/OpenBLAS.git
|
||||
1)Normal compile
|
||||
Please read GotoBLAS_02QuickInstall.txt or type "make"
|
||||
(a) type "make" to detect the CPU automatically.
|
||||
or
|
||||
(b) type "make TARGET=xxx" to set target CPU, e.g. "make TARGET=NEHALEM". The full target list is in file TargetList.txt.
|
||||
|
||||
2)Cross compile
|
||||
Please set CC and FC with the cross toolchains. Then, set HOSTCC with your host C compiler. At last, set TARGET explicitly.
|
||||
|
@ -20,6 +22,11 @@ make BINARY=64 CC=mips64el-unknown-linux-gnu-gcc FC=mips64el-unknown-linux-gnu-g
|
|||
3)Debug version
|
||||
make DEBUG=1
|
||||
|
||||
4)Intall to the directory (Optional)
|
||||
e.g.
|
||||
make install PREFIX=your_installation_directory
|
||||
The default directory is /opt/OpenBLAS
|
||||
|
||||
3.Support CPU & OS
|
||||
Please read GotoBLAS_01Readme.txt
|
||||
|
||||
|
@ -39,13 +46,17 @@ export GOTO_NUM_THREADS=4
|
|||
or
|
||||
export OMP_NUM_THREADS=4
|
||||
|
||||
The priorities are OPENBLAS_NUM_THREAD > GOTO_NUM_THREADS > OMP_NUM_THREADS.
|
||||
The priorities are OPENBLAS_NUM_THREADS > GOTO_NUM_THREADS > OMP_NUM_THREADS.
|
||||
|
||||
If you compile this lib with USE_OPENMP=1, you should only set OMP_NUM_THREADS environment variable.
|
||||
|
||||
4.2 Set the number of threads with calling functions. for example,
|
||||
void goto_set_num_threads(int num_threads);
|
||||
or
|
||||
void openblas_set_num_threads(int num_threads);
|
||||
|
||||
If you compile this lib with USE_OPENMP=1, you should use the above functions, too.
|
||||
|
||||
5.Report Bugs
|
||||
Please add a issue in https://github.com/xianyi/OpenBLAS/issues
|
||||
|
||||
|
@ -56,4 +67,17 @@ Optimization on ICT Loongson 3A CPU
|
|||
OpenBLAS users mailing list: http://list.rdcps.ac.cn/mailman/listinfo/openblas
|
||||
|
||||
8.ChangeLog
|
||||
Please see Changelog.txt to obtain the differences between GotoBLAS2 1.13 BSD version.
|
||||
Please see Changelog.txt to obtain the differences between GotoBLAS2 1.13 BSD version.
|
||||
|
||||
9.Known Issues
|
||||
* The number of CPUs/Cores should less than or equal to 8*sizeof(unsigned long). On 64 bits, the limit
|
||||
is 64. On 32 bits, it is 32.
|
||||
* This library is not compatible with EKOPath Compiler Suite 4.0.10 (http://www.pathscale.com/ekopath-compiler-suite). However, Path64 (https://github.com/path64/compiler) could compile the codes successfully.
|
||||
|
||||
10. Specification of Git Branches
|
||||
We used the git branching model in this article (http://nvie.com/posts/a-successful-git-branching-model/).
|
||||
Now, there are 4 branches in github.com.
|
||||
* The master branch. This a main branch to reflect a production-ready state.
|
||||
* The develop branch. This a main branch to reflect a state with the latest delivered development changes for the next release.
|
||||
* The loongson3a branch. This is a feature branch. We develop Loongson3A codes on this branch. We will merge this feature to develop branch in future.
|
||||
* The gh-pages branch. This is for web pages
|
||||
|
|
|
@ -0,0 +1,57 @@
|
|||
Force Target Examples:
|
||||
|
||||
make TARGET=NEHALEM
|
||||
make TARGET=LOONGSON3A BINARY=64
|
||||
make TARGET=ISTANBUL
|
||||
|
||||
Supported List:
|
||||
1.X86/X86_64
|
||||
a)Intel CPU:
|
||||
P2
|
||||
COPPERMINE
|
||||
KATMAI
|
||||
NORTHWOOD
|
||||
PRESCOTT
|
||||
BANIAS
|
||||
YONAH
|
||||
CORE2
|
||||
PENRYN
|
||||
DUNNINGTON
|
||||
NEHALEM
|
||||
ATOM
|
||||
|
||||
b)AMD CPU:
|
||||
ATHLON
|
||||
OPTERON
|
||||
OPTERON_SSE3
|
||||
BARCELONA
|
||||
SHANGHAI
|
||||
ISTANBUL
|
||||
|
||||
c)VIA CPU:
|
||||
SSE_GENERIC
|
||||
VIAC3
|
||||
NANO
|
||||
|
||||
2.Power CPU:
|
||||
POWER4
|
||||
POWER5
|
||||
POWER6
|
||||
PPCG4
|
||||
PPC970
|
||||
PPC970MP
|
||||
PPC440
|
||||
PPC440FP2
|
||||
CELL
|
||||
|
||||
3.MIPS64 CPU:
|
||||
SICORTEX
|
||||
LOONGSON3A
|
||||
|
||||
4.IA64 CPU:
|
||||
ITANIUM2
|
||||
|
||||
5.SPARC CPU:
|
||||
SPARC
|
||||
SPARCV7
|
||||
|
2
c_check
2
c_check
|
@ -149,7 +149,7 @@ $binformat = bin64 if ($data =~ /BINARY_64/);
|
|||
|
||||
$data = `$compiler_name -S ctest1.c && grep globl ctest1.s | head -n 1 && rm -f ctest1.s`;
|
||||
|
||||
$data =~ /globl\ ([_\.]*)(.*)/;
|
||||
$data =~ /globl\s([_\.]*)(.*)/;
|
||||
|
||||
$need_fu = $1;
|
||||
|
||||
|
|
14
cblas.h
14
cblas.h
|
@ -1,6 +1,14 @@
|
|||
#ifndef CBLAS_H
|
||||
#define CBLAS_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
/* Assume C declarations for C++ */
|
||||
#endif /* __cplusplus */
|
||||
|
||||
#include <stddef.h>
|
||||
#include "common.h"
|
||||
|
||||
#define CBLAS_INDEX size_t
|
||||
|
||||
enum CBLAS_ORDER {CblasRowMajor=101, CblasColMajor=102};
|
||||
|
@ -270,4 +278,10 @@ void cblas_zher2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANS
|
|||
double *alpha, double *A, blasint lda, double *B, blasint ldb, double beta, double *C, blasint ldc);
|
||||
|
||||
void cblas_xerbla(blasint p, char *rout, char *form, ...);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
||||
#endif /* __cplusplus */
|
||||
|
||||
#endif
|
||||
|
|
10
common.h
10
common.h
|
@ -39,6 +39,11 @@
|
|||
#ifndef COMMON_H
|
||||
#define COMMON_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
/* Assume C declarations for C++ */
|
||||
#endif /* __cplusplus */
|
||||
|
||||
#ifndef _GNU_SOURCE
|
||||
#define _GNU_SOURCE
|
||||
#endif
|
||||
|
@ -607,4 +612,9 @@ extern int gotoblas_profile;
|
|||
#define PRINT_DEBUG_NAME if (readenv("GOTO_DEBUG")) fprintf(stderr, "GotoBLAS : %s\n", CHAR_NAME)
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
||||
#endif /* __cplusplus */
|
||||
|
||||
#endif
|
||||
|
|
|
@ -60,4 +60,8 @@ float _Complex BLASFUNC_REF(cdotc) (blasint *, float *, blasint *, float *,
|
|||
double _Complex BLASFUNC_REF(zdotu) (blasint *, double *, blasint *, double *, blasint *);
|
||||
double _Complex BLASFUNC_REF(zdotc) (blasint *, double *, blasint *, double *, blasint *);
|
||||
|
||||
void BLASFUNC_REF(drotmg)(double *, double *, double *, double *, double *);
|
||||
|
||||
double BLASFUNC_REF(dsdot)(blasint *, float *, blasint *, float *, blasint*);
|
||||
|
||||
#endif
|
||||
|
|
33
cpuid_x86.c
33
cpuid_x86.c
|
@ -1302,24 +1302,25 @@ int get_coretype(void){
|
|||
case 13:
|
||||
return CORE_DUNNINGTON;
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
switch (model) {
|
||||
case 5:
|
||||
//Intel Core (Clarkdale) / Core (Arrandale)
|
||||
// Pentium (Clarkdale) / Pentium Mobile (Arrandale)
|
||||
// Xeon (Clarkdale), 32nm
|
||||
return CORE_NEHALEM;
|
||||
case 12:
|
||||
//Xeon Processor 5600 (Westmere-EP)
|
||||
return CORE_NEHALEM;
|
||||
}
|
||||
break;
|
||||
|
||||
break;
|
||||
case 2:
|
||||
switch (model) {
|
||||
case 5:
|
||||
//Intel Core (Clarkdale) / Core (Arrandale)
|
||||
// Pentium (Clarkdale) / Pentium Mobile (Arrandale)
|
||||
// Xeon (Clarkdale), 32nm
|
||||
return CORE_NEHALEM;
|
||||
case 12:
|
||||
//Xeon Processor 5600 (Westmere-EP)
|
||||
return CORE_NEHALEM;
|
||||
}
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
case 15:
|
||||
if (model <= 0x2) return CORE_NORTHWOOD;
|
||||
return CORE_PRESCOTT;
|
||||
if (model <= 0x2) return CORE_NORTHWOOD;
|
||||
else return CORE_PRESCOTT;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -6,7 +6,7 @@ COMMONOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) c_abs.$(SUFFIX) z_abs.$(SUFFIX)
|
|||
COMMONOBJS += slamch.$(SUFFIX) slamc3.$(SUFFIX) dlamch.$(SUFFIX) dlamc3.$(SUFFIX)
|
||||
|
||||
ifdef SMP
|
||||
COMMONOBJS += blas_server.$(SUFFIX) divtable.$(SUFFIX) blasL1thread.$(SUFFIX)
|
||||
COMMONOBJS += blas_server.$(SUFFIX) divtable.$(SUFFIX) blasL1thread.$(SUFFIX) openblas_set_num_threads.$(SUFFIX)
|
||||
ifndef NO_AFFINITY
|
||||
COMMONOBJS += init.$(SUFFIX)
|
||||
endif
|
||||
|
@ -100,6 +100,9 @@ memory.$(SUFFIX) : $(MEMORY) ../../common.h ../../param.h
|
|||
blas_server.$(SUFFIX) : $(BLAS_SERVER) ../../common.h ../../common_thread.h ../../param.h
|
||||
$(CC) $(CFLAGS) -c $< -o $(@F)
|
||||
|
||||
openblas_set_num_threads.$(SUFFIX) : openblas_set_num_threads.c
|
||||
$(CC) $(CFLAGS) -c $< -o $(@F)
|
||||
|
||||
blasL1thread.$(SUFFIX) : blas_l1_thread.c ../../common.h ../../common_thread.h
|
||||
$(CC) $(CFLAGS) -c $< -o $(@F)
|
||||
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <sys/mman.h>
|
||||
//#include <sys/mman.h>
|
||||
#include "common.h"
|
||||
|
||||
#ifndef USE_OPENMP
|
||||
|
@ -49,6 +49,26 @@
|
|||
|
||||
int blas_server_avail = 0;
|
||||
|
||||
void goto_set_num_threads(int num_threads) {
|
||||
|
||||
if (num_threads < 1) num_threads = blas_num_threads;
|
||||
|
||||
if (num_threads > MAX_CPU_NUMBER) num_threads = MAX_CPU_NUMBER;
|
||||
|
||||
if (num_threads > blas_num_threads) {
|
||||
blas_num_threads = num_threads;
|
||||
}
|
||||
|
||||
blas_cpu_number = num_threads;
|
||||
|
||||
omp_set_num_threads(blas_cpu_number);
|
||||
|
||||
}
|
||||
void openblas_set_num_threads(int num_threads) {
|
||||
|
||||
goto_set_num_threads(num_threads);
|
||||
}
|
||||
|
||||
int blas_thread_init(void){
|
||||
|
||||
blas_get_cpu_number();
|
||||
|
|
|
@ -172,13 +172,20 @@ static inline int rcount(unsigned long number) {
|
|||
return count;
|
||||
}
|
||||
|
||||
/***
|
||||
Known issue: The number of CPUs/cores should less
|
||||
than sizeof(unsigned long). On 64 bits, the limit
|
||||
is 64. On 32 bits, it is 32.
|
||||
***/
|
||||
static inline unsigned long get_cpumap(int node) {
|
||||
|
||||
int infile;
|
||||
unsigned long affinity;
|
||||
char name[160];
|
||||
char cpumap[160];
|
||||
char *p, *dummy;
|
||||
|
||||
int i=0;
|
||||
|
||||
sprintf(name, CPUMAP_NAME, node);
|
||||
|
||||
infile = open(name, O_RDONLY);
|
||||
|
@ -187,13 +194,19 @@ static inline unsigned long get_cpumap(int node) {
|
|||
|
||||
if (infile != -1) {
|
||||
|
||||
read(infile, name, sizeof(name));
|
||||
|
||||
read(infile, cpumap, sizeof(cpumap));
|
||||
p = cpumap;
|
||||
while (*p != '\n' && i<160){
|
||||
if(*p != ',') {
|
||||
name[i++]=*p;
|
||||
}
|
||||
p++;
|
||||
}
|
||||
p = name;
|
||||
|
||||
while ((*p == '0') || (*p == ',')) p++;
|
||||
// while ((*p == '0') || (*p == ',')) p++;
|
||||
|
||||
affinity = strtol(p, &dummy, 16);
|
||||
affinity = strtoul(p, &dummy, 16);
|
||||
|
||||
close(infile);
|
||||
}
|
||||
|
@ -347,7 +360,13 @@ static void disable_hyperthread(void) {
|
|||
unsigned long share;
|
||||
int cpu;
|
||||
|
||||
common -> avail = (1UL << common -> num_procs) - 1;
|
||||
if(common->num_procs > 64){
|
||||
fprintf(stderr, "\nOpenBLAS Warining : The number of CPU/Cores(%d) is beyond the limit(64). Terminated.\n", common->num_procs);
|
||||
exit(1);
|
||||
}else if(common->num_procs == 64){
|
||||
common -> avail = 0xFFFFFFFFFFFFFFFFUL;
|
||||
}else
|
||||
common -> avail = (1UL << common -> num_procs) - 1;
|
||||
|
||||
#ifdef DEBUG
|
||||
fprintf(stderr, "\nAvail CPUs : %04lx.\n", common -> avail);
|
||||
|
@ -376,7 +395,13 @@ static void disable_affinity(void) {
|
|||
fprintf(stderr, "CPU mask : %04lx.\n\n", *(unsigned long *)&cpu_orig_mask[0]);
|
||||
#endif
|
||||
|
||||
lprocmask = (1UL << common -> final_num_procs) - 1;
|
||||
if(common->final_num_procs > 64){
|
||||
fprintf(stderr, "\nOpenBLAS Warining : The number of CPU/Cores(%d) is beyond the limit(64). Terminated.\n", common->final_num_procs);
|
||||
exit(1);
|
||||
}else if(common->final_num_procs == 64){
|
||||
lprocmask = 0xFFFFFFFFFFFFFFFFUL;
|
||||
}else
|
||||
lprocmask = (1UL << common -> final_num_procs) - 1;
|
||||
|
||||
#ifndef USE_OPENMP
|
||||
lprocmask &= *(unsigned long *)&cpu_orig_mask[0];
|
||||
|
|
|
@ -0,0 +1,45 @@
|
|||
/*****************************************************************************
|
||||
Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the ISCAS nor the names of its contributors may
|
||||
be used to endorse or promote products derived from this software
|
||||
without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
**********************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
|
||||
#ifdef SMP_SERVER
|
||||
#ifdef OS_LINUX
|
||||
|
||||
extern void openblas_set_num_threads(int num_threads) ;
|
||||
|
||||
void NAME(int* num_threads){
|
||||
openblas_set_num_threads(*num_threads);
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
|
@ -74,20 +74,21 @@ void gotoblas_profile_quit(void) {
|
|||
if (cycles > 0) {
|
||||
|
||||
fprintf(stderr, "\n\t====== BLAS Profiling Result =======\n\n");
|
||||
fprintf(stderr, " Function No. of Calls Time Consumption Efficiency Bytes/cycle\n");
|
||||
fprintf(stderr, " Function No. of Calls Time Consumption Efficiency Bytes/cycle Wall Time(Cycles)\n");
|
||||
|
||||
for (i = 0; i < MAX_PROF_TABLE; i ++) {
|
||||
if (function_profile_table[i].calls) {
|
||||
#ifndef OS_WINDOWS
|
||||
fprintf(stderr, "%-12s : %10Ld %8.2f%% %10.3f%% %8.2f\n",
|
||||
fprintf(stderr, "%-12s : %10Ld %8.2f%% %10.3f%% %8.2f %Ld\n",
|
||||
#else
|
||||
fprintf(stderr, "%-12s : %10lld %8.2f%% %10.3f%% %8.2f\n",
|
||||
fprintf(stderr, "%-12s : %10lld %8.2f%% %10.3f%% %8.2f %lld\n",
|
||||
#endif
|
||||
func_table[i],
|
||||
function_profile_table[i].calls,
|
||||
(double)function_profile_table[i].cycles / (double)cycles * 100.,
|
||||
(double)function_profile_table[i].fops / (double)function_profile_table[i].tcycles * 100.,
|
||||
(double)function_profile_table[i].area / (double)function_profile_table[i].cycles
|
||||
(double)function_profile_table[i].area / (double)function_profile_table[i].cycles,
|
||||
function_profile_table[i].cycles
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -53,18 +53,19 @@ dyn : $(LIBDYNNAME)
|
|||
zip : dll
|
||||
zip $(LIBZIPNAME) $(LIBDLLNAME) $(LIBNAME)
|
||||
|
||||
dll : libgoto2.dll
|
||||
dll : ../$(LIBDLLNAME)
|
||||
#libgoto2.dll
|
||||
|
||||
dll2 : libgoto2_shared.dll
|
||||
|
||||
libgoto2.dll : ../$(LIBNAME) libgoto2.def dllinit.$(SUFFIX)
|
||||
../$(LIBDLLNAME) : ../$(LIBNAME) libgoto2.def dllinit.$(SUFFIX)
|
||||
$(RANLIB) ../$(LIBNAME)
|
||||
ifeq ($(BINARY32), 1)
|
||||
$(DLLWRAP) -o $(@F) --def libgoto2.def \
|
||||
$(DLLWRAP) -o ../$(LIBDLLNAME) --def libgoto2.def \
|
||||
--entry _dllinit@12 -s dllinit.$(SUFFIX) --dllname $(@F) ../$(LIBNAME) $(FEXTRALIB)
|
||||
-lib /machine:i386 /def:libgoto2.def
|
||||
else
|
||||
$(DLLWRAP) -o $(@F) --def libgoto2.def \
|
||||
$(DLLWRAP) -o ../$(LIBDLLNAME) --def libgoto2.def \
|
||||
--entry _dllinit -s dllinit.$(SUFFIX) --dllname $(@F) ../$(LIBNAME) $(FEXTRALIB)
|
||||
-lib /machine:X64 /def:libgoto2.def
|
||||
endif
|
||||
|
@ -84,7 +85,7 @@ libgoto_hpl.def : gensymbol
|
|||
perl ./gensymbol win2khpl $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) > $(@F)
|
||||
|
||||
$(LIBDYNNAME) : ../$(LIBNAME) osx.def
|
||||
$(PREFIX)gcc $(CFLAGS) -all_load -dynamiclib -o $(LIBDYNNAME) $< -Wl,-exported_symbols_list,osx.def $(FEXTRALIB)
|
||||
$(PREFIX)gcc $(CFLAGS) -all_load -dynamiclib -o ../$(LIBDYNNAME) $< -Wl,-exported_symbols_list,osx.def $(FEXTRALIB)
|
||||
|
||||
symbol.$(SUFFIX) : symbol.S
|
||||
$(CC) $(CFLAGS) -c -o $(@F) $^
|
||||
|
|
1
f_check
1
f_check
|
@ -274,6 +274,7 @@ if ($link ne "") {
|
|||
&& ($flags !~ /kernel32/)
|
||||
&& ($flags !~ /advapi32/)
|
||||
&& ($flags !~ /shell32/)
|
||||
&& ($flags !~ /^\-l$/)
|
||||
) {
|
||||
$linker_l .= $flags . " ";
|
||||
}
|
||||
|
|
11
getarch.c
11
getarch.c
|
@ -604,30 +604,41 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#ifndef POWER
|
||||
#define POWER
|
||||
#endif
|
||||
#define OPENBLAS_SUPPORTED
|
||||
#endif
|
||||
|
||||
#if defined(__i386__) || (__x86_64__)
|
||||
#include "cpuid_x86.c"
|
||||
#define OPENBLAS_SUPPORTED
|
||||
#endif
|
||||
|
||||
#ifdef __ia64__
|
||||
#include "cpuid_ia64.c"
|
||||
#define OPENBLAS_SUPPORTED
|
||||
#endif
|
||||
|
||||
#ifdef __alpha
|
||||
#include "cpuid_alpha.c"
|
||||
#define OPENBLAS_SUPPORTED
|
||||
#endif
|
||||
|
||||
#ifdef POWER
|
||||
#include "cpuid_power.c"
|
||||
#define OPENBLAS_SUPPORTED
|
||||
#endif
|
||||
|
||||
#ifdef sparc
|
||||
#include "cpuid_sparc.c"
|
||||
#define OPENBLAS_SUPPORTED
|
||||
#endif
|
||||
|
||||
#ifdef __mips__
|
||||
#include "cpuid_mips.c"
|
||||
#define OPENBLAS_SUPPORTED
|
||||
#endif
|
||||
|
||||
#ifndef OPENBLAS_SUPPORTED
|
||||
#error "This arch/CPU is not supported by OpenBLAS."
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
|
|
@ -30,6 +30,10 @@ int main(int argc, char **argv) {
|
|||
printf("#define DLOCAL_BUFFER_SIZE\t%ld\n", (DGEMM_DEFAULT_Q * DGEMM_DEFAULT_UNROLL_N * 2 * 1 * sizeof(double)));
|
||||
printf("#define CLOCAL_BUFFER_SIZE\t%ld\n", (CGEMM_DEFAULT_Q * CGEMM_DEFAULT_UNROLL_N * 4 * 2 * sizeof(float)));
|
||||
printf("#define ZLOCAL_BUFFER_SIZE\t%ld\n", (ZGEMM_DEFAULT_Q * ZGEMM_DEFAULT_UNROLL_N * 2 * 2 * sizeof(double)));
|
||||
|
||||
#ifdef USE64BITINT
|
||||
printf("#define USE64BITINT\n");
|
||||
#endif
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
|
|
@ -85,7 +85,11 @@ void CNAME(blasint n, FLOAT alpha, FLOAT *x, blasint incx, FLOAT *y, blasint inc
|
|||
//In that case, the threads would be dependent.
|
||||
if (incx == 0 || incy == 0)
|
||||
nthreads = 1;
|
||||
|
||||
|
||||
//Temporarily walk around the low performance issue with small imput size & multithreads.
|
||||
if (n <= 10000)
|
||||
nthreads = 1;
|
||||
|
||||
if (nthreads == 1) {
|
||||
#endif
|
||||
|
||||
|
|
|
@ -49,6 +49,7 @@ double NAME(blasint *N, float *x, blasint *INCX, float *y, blasint *INCY){
|
|||
BLASLONG n = *N;
|
||||
BLASLONG incx = *INCX;
|
||||
BLASLONG incy = *INCY;
|
||||
double ret = 0.0;
|
||||
|
||||
PRINT_DEBUG_NAME;
|
||||
|
||||
|
@ -61,19 +62,21 @@ double NAME(blasint *N, float *x, blasint *INCX, float *y, blasint *INCY){
|
|||
if (incx < 0) x -= (n - 1) * incx;
|
||||
if (incy < 0) y -= (n - 1) * incy;
|
||||
|
||||
return DSDOT_K(n, x, incx, y, incy);
|
||||
ret=DSDOT_K(n, x, incx, y, incy);
|
||||
|
||||
FUNCTION_PROFILE_END(1, n, n);
|
||||
|
||||
IDEBUG_END;
|
||||
|
||||
return 0;
|
||||
return ret;
|
||||
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
double CNAME(blasint n, float *x, blasint incx, float *y, blasint incy){
|
||||
|
||||
double ret = 0.0;
|
||||
|
||||
PRINT_DEBUG_CNAME;
|
||||
|
||||
|
@ -86,13 +89,13 @@ double CNAME(blasint n, float *x, blasint incx, float *y, blasint incy){
|
|||
if (incx < 0) x -= (n - 1) * incx;
|
||||
if (incy < 0) y -= (n - 1) * incy;
|
||||
|
||||
return DSDOT_K(n, x, incx, y, incy);
|
||||
ret=DSDOT_K(n, x, incx, y, incy);
|
||||
|
||||
FUNCTION_PROFILE_END(1, n, n);
|
||||
|
||||
IDEBUG_END;
|
||||
|
||||
return 0;
|
||||
return ret;
|
||||
|
||||
}
|
||||
|
||||
|
|
|
@ -7,6 +7,12 @@
|
|||
#define GAMSQ 16777216.e0
|
||||
#define RGAMSQ 5.9604645e-8
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define ABS(x) fabs(x)
|
||||
#else
|
||||
#define ABS(x) fabsf(x)
|
||||
#endif
|
||||
|
||||
#ifndef CBLAS
|
||||
|
||||
void NAME(FLOAT *dd1, FLOAT *dd2, FLOAT *dx1, FLOAT *DY1, FLOAT *dparam){
|
||||
|
@ -47,7 +53,7 @@ void CNAME(FLOAT *dd1, FLOAT *dd2, FLOAT *dx1, FLOAT dy1, FLOAT *dparam){
|
|||
dq2 = dp2 * dy1;
|
||||
dq1 = dp1 * *dx1;
|
||||
|
||||
if (! (abs(dq1) > abs(dq2))) goto L40;
|
||||
if (! (ABS(dq1) > ABS(dq2))) goto L40;
|
||||
|
||||
dh21 = -(dy1) / *dx1;
|
||||
dh12 = dp2 / dp1;
|
||||
|
@ -140,7 +146,7 @@ L150:
|
|||
goto L130;
|
||||
|
||||
L160:
|
||||
if (! (abs(*dd2) <= RGAMSQ)) {
|
||||
if (! (ABS(*dd2) <= RGAMSQ)) {
|
||||
goto L190;
|
||||
}
|
||||
if (*dd2 == ZERO) {
|
||||
|
@ -157,7 +163,7 @@ L180:
|
|||
goto L160;
|
||||
|
||||
L190:
|
||||
if (! (abs(*dd2) >= GAMSQ)) {
|
||||
if (! (ABS(*dd2) >= GAMSQ)) {
|
||||
goto L220;
|
||||
}
|
||||
igo = 3;
|
||||
|
|
|
@ -53,6 +53,11 @@ SBLASOBJS += setparam$(TSUFFIX).$(SUFFIX)
|
|||
CCOMMON_OPT += -DTS=$(TSUFFIX)
|
||||
endif
|
||||
|
||||
KERNEL_INTERFACE = ../common_level1.h ../common_level2.h ../common_level3.h
|
||||
ifneq ($(NO_LAPACK), 1)
|
||||
KERNEL_INTERFACE += ../common_lapack.h
|
||||
endif
|
||||
|
||||
ifeq ($(ARCH), x86)
|
||||
COMMONOBJS += cpuid.$(SUFFIX)
|
||||
endif
|
||||
|
@ -88,9 +93,10 @@ setparam$(TSUFFIX).$(SUFFIX): setparam$(TSUFFIX).c kernel$(TSUFFIX).h
|
|||
setparam$(TSUFFIX).c : setparam-ref.c
|
||||
sed 's/TS/$(TSUFFIX)/g' $< > $(@F)
|
||||
|
||||
kernel$(TSUFFIX).h : ../common_level1.h ../common_level2.h ../common_level3.h ../common_lapack.h
|
||||
kernel$(TSUFFIX).h : $(KERNEL_INTERFACE)
|
||||
sed 's/\ *(/$(TSUFFIX)(/g' $^ > $(@F)
|
||||
|
||||
|
||||
cpuid.$(SUFFIX): $(KERNELDIR)/cpuid.S
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
|
@ -112,10 +118,10 @@ lsame.$(PSUFFIX): $(KERNELDIR)/$(LSAME_KERNEL)
|
|||
cpuid.$(PSUFFIX): $(KERNELDIR)/cpuid.S
|
||||
$(CC) -c $(PFLAGS) $< -o $(@F)
|
||||
|
||||
ifdef DYNAMIC_ARCH
|
||||
#ifdef DYNAMIC_ARCH
|
||||
clean ::
|
||||
@rm -f setparam_*.c kernel_*.h setparam.h kernel.h
|
||||
|
||||
endif
|
||||
#endif
|
||||
|
||||
include $(TOPDIR)/Makefile.tail
|
||||
|
|
|
@ -668,7 +668,7 @@ $(KDIR)qdot_k$(TSUFFIX).$(SUFFIX) $(KDIR)qdot_k$(TPSUFFIX).$(PSUFFIX) : $(KERNEL
|
|||
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE $< -o $@
|
||||
|
||||
$(KDIR)dsdot_k$(TSUFFIX).$(SUFFIX) $(KDIR)dsdot_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SDOTKERNEL)
|
||||
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE $< -o $@
|
||||
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DDSDOT $< -o $@
|
||||
|
||||
$(KDIR)sdot_k$(TSUFFIX).$(SUFFIX) $(KDIR)sdot_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SDOTKERNEL)
|
||||
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE $< -o $@
|
||||
|
|
|
@ -300,7 +300,11 @@
|
|||
.align 3
|
||||
|
||||
.L999:
|
||||
j $31
|
||||
ADD s1, s1, s2
|
||||
|
||||
#ifdef DSDOT
|
||||
cvt.d.s s1, s1
|
||||
#endif
|
||||
j $31
|
||||
NOP
|
||||
|
||||
EPILOGUE
|
||||
|
|
|
@ -101,7 +101,11 @@ gotoblas_t TABLE_NAME = {
|
|||
#endif
|
||||
ssymm_outcopyTS, ssymm_oltcopyTS,
|
||||
|
||||
#ifndef NO_LAPACK
|
||||
sneg_tcopyTS, slaswp_ncopyTS,
|
||||
#else
|
||||
NULL,NULL,
|
||||
#endif
|
||||
|
||||
0, 0, 0,
|
||||
DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N, MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N),
|
||||
|
@ -147,7 +151,11 @@ gotoblas_t TABLE_NAME = {
|
|||
#endif
|
||||
dsymm_outcopyTS, dsymm_oltcopyTS,
|
||||
|
||||
#ifndef NO_LAPACK
|
||||
dneg_tcopyTS, dlaswp_ncopyTS,
|
||||
#else
|
||||
NULL, NULL,
|
||||
#endif
|
||||
|
||||
#ifdef EXPRECISION
|
||||
|
||||
|
@ -195,7 +203,11 @@ gotoblas_t TABLE_NAME = {
|
|||
#endif
|
||||
qsymm_outcopyTS, qsymm_oltcopyTS,
|
||||
|
||||
#ifndef NO_LAPACK
|
||||
qneg_tcopyTS, qlaswp_ncopyTS,
|
||||
#else
|
||||
NULL, NULL,
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
|
@ -286,7 +298,11 @@ gotoblas_t TABLE_NAME = {
|
|||
chemm3m_oucopyrTS, chemm3m_olcopyrTS,
|
||||
chemm3m_oucopyiTS, chemm3m_olcopyiTS,
|
||||
|
||||
#ifndef NO_LAPACK
|
||||
cneg_tcopyTS, claswp_ncopyTS,
|
||||
#else
|
||||
NULL, NULL,
|
||||
#endif
|
||||
|
||||
0, 0, 0,
|
||||
ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N, MAX(ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N),
|
||||
|
@ -375,7 +391,11 @@ gotoblas_t TABLE_NAME = {
|
|||
zhemm3m_oucopyrTS, zhemm3m_olcopyrTS,
|
||||
zhemm3m_oucopyiTS, zhemm3m_olcopyiTS,
|
||||
|
||||
#ifndef NO_LAPACK
|
||||
zneg_tcopyTS, zlaswp_ncopyTS,
|
||||
#else
|
||||
NULL, NULL,
|
||||
#endif
|
||||
|
||||
#ifdef EXPRECISION
|
||||
|
||||
|
@ -466,7 +486,11 @@ gotoblas_t TABLE_NAME = {
|
|||
xhemm3m_oucopyrTS, xhemm3m_olcopyrTS,
|
||||
xhemm3m_oucopyiTS, xhemm3m_olcopyiTS,
|
||||
|
||||
#ifndef NO_LAPACK
|
||||
xneg_tcopyTS, xlaswp_ncopyTS,
|
||||
#else
|
||||
NULL, NULL,
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
|
|
|
@ -1541,5 +1541,8 @@
|
|||
popl %ebx
|
||||
popl %esi
|
||||
popl %edi
|
||||
/*remove the hidden return value address from the stack.*/
|
||||
popl %ecx
|
||||
xchgl %ecx, 0(%esp)
|
||||
ret
|
||||
EPILOGUE
|
||||
|
|
|
@ -1286,6 +1286,10 @@
|
|||
haddps %xmm0, %xmm0
|
||||
#endif
|
||||
|
||||
#ifdef DSDOT
|
||||
cvtss2sd %xmm0, %xmm0
|
||||
#endif
|
||||
|
||||
RESTOREREGISTERS
|
||||
|
||||
ret
|
||||
|
|
|
@ -544,7 +544,7 @@
|
|||
jg .L11
|
||||
|
||||
#if defined(TRMMKERNEL) && !defined(LEFT)
|
||||
addq $1, KK
|
||||
addq $4, KK
|
||||
#endif
|
||||
|
||||
leaq (C, LDC, 4), C
|
||||
|
@ -594,7 +594,7 @@
|
|||
jg .L11
|
||||
|
||||
#if defined(TRMMKERNEL) && !defined(LEFT)
|
||||
addq $1, KK
|
||||
addq $4, KK
|
||||
#endif
|
||||
|
||||
leaq (C, LDC, 4), C
|
||||
|
|
|
@ -0,0 +1,21 @@
|
|||
/*This is only for "make install" target.*/
|
||||
|
||||
#ifdef NEEDBUNDERSCORE
|
||||
#define BLASFUNC(FUNC) FUNC##_
|
||||
#else
|
||||
#define BLASFUNC(FUNC) FUNC
|
||||
#endif
|
||||
|
||||
#if defined(OS_WINDOWS) && defined(__64BIT__)
|
||||
typedef long long BLASLONG;
|
||||
typedef unsigned long long BLASULONG;
|
||||
#else
|
||||
typedef long BLASLONG;
|
||||
typedef unsigned long BLASULONG;
|
||||
#endif
|
||||
|
||||
#ifdef USE64BITINT
|
||||
typedef BLASLONG blasint;
|
||||
#else
|
||||
typedef int blasint;
|
||||
#endif
|
|
@ -128,6 +128,8 @@ CBLASOBJS = $(CBLAS1OBJS) $(CBLAS2OBJS) $(CBLAS3OBJS)
|
|||
ZBLASOBJS = $(ZBLAS1OBJS) $(ZBLAS2OBJS) $(ZBLAS3OBJS)
|
||||
XBLASOBJS = $(XBLAS1OBJS) $(XBLAS2OBJS) $(XBLAS3OBJS)
|
||||
|
||||
ifneq ($(NO_LAPACK), 1)
|
||||
|
||||
SBLASOBJS += \
|
||||
sgetf2f.$(SUFFIX) sgetrff.$(SUFFIX) slauu2f.$(SUFFIX) slauumf.$(SUFFIX) \
|
||||
spotf2f.$(SUFFIX) spotrff.$(SUFFIX) strti2f.$(SUFFIX) strtrif.$(SUFFIX) \
|
||||
|
@ -160,6 +162,7 @@ XBLASOBJS +=
|
|||
xpotf2f.$(SUFFIX) xpotrff.$(SUFFIX) xtrti2f.$(SUFFIX) xtrtrif.$(SUFFIX) \
|
||||
xlaswpf.$(SUFFIX) xgetrsf.$(SUFFIX) xgesvf.$(SUFFIX) xpotrif.$(SUFFIX) \
|
||||
|
||||
endif
|
||||
|
||||
include $(TOPDIR)/Makefile.tail
|
||||
|
||||
|
|
|
@ -5,12 +5,12 @@ include $(TOPDIR)/Makefile.system
|
|||
TARGET=openblas_utest
|
||||
CUNIT_LIB=/usr/local/lib/libcunit.a
|
||||
|
||||
OBJS=main.o test_rot.o test_swap.o test_axpy.o test_dotu.o
|
||||
OBJS=main.o test_rot.o test_swap.o test_axpy.o test_dotu.o test_rotmg.o test_dsdot.o
|
||||
|
||||
all : run_test
|
||||
|
||||
$(TARGET): $(OBJS)
|
||||
$(CC) -o $@ $^ ../$(LIBNAME) $(CUNIT_LIB) $(EXTRALIB)
|
||||
$(FC) -o $@ $^ ../$(LIBNAME) $(CUNIT_LIB) $(EXTRALIB)
|
||||
|
||||
run_test: $(TARGET)
|
||||
./$(TARGET)
|
||||
|
|
|
@ -57,4 +57,8 @@ void test_caxpy_inc_0(void);
|
|||
void test_zdotu_n_1(void);
|
||||
void test_zdotu_offset_1(void);
|
||||
|
||||
void test_drotmg(void);
|
||||
|
||||
void test_dsdot_n_1(void);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -54,7 +54,10 @@ CU_TestInfo test_level1[]={
|
|||
|
||||
{"Testing zdotu with n == 1",test_zdotu_n_1},
|
||||
{"Testing zdotu with input x & y offset == 1",test_zdotu_offset_1},
|
||||
|
||||
|
||||
{"Testing drotmg",test_drotmg},
|
||||
|
||||
{"Testing dsdot with n == 1",test_dsdot_n_1},
|
||||
CU_TEST_INFO_NULL,
|
||||
};
|
||||
|
||||
|
|
|
@ -0,0 +1,50 @@
|
|||
/*****************************************************************************
|
||||
Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the ISCAS nor the names of its contributors may
|
||||
be used to endorse or promote products derived from this software
|
||||
without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
**********************************************************************************/
|
||||
|
||||
#include "common_utest.h"
|
||||
|
||||
void test_dsdot_n_1()
|
||||
{
|
||||
float x= 0.172555164;
|
||||
float y= -0.0138700781;
|
||||
int incx=1;
|
||||
int incy=1;
|
||||
int n=1;
|
||||
|
||||
double res1=0.0f, res2=0.0f;
|
||||
|
||||
res1=BLASFUNC(dsdot)(&n, &x, &incx, &y, &incy);
|
||||
res2=BLASFUNC_REF(dsdot)(&n, &x, &incx, &y, &incy);
|
||||
|
||||
CU_ASSERT_DOUBLE_EQUAL(res1, res2, CHECK_EPS);
|
||||
|
||||
}
|
|
@ -0,0 +1,60 @@
|
|||
/*****************************************************************************
|
||||
Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the ISCAS nor the names of its contributors may
|
||||
be used to endorse or promote products derived from this software
|
||||
without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
**********************************************************************************/
|
||||
|
||||
#include "common_utest.h"
|
||||
|
||||
void test_drotmg()
|
||||
{
|
||||
double te_d1, tr_d1;
|
||||
double te_d2, tr_d2;
|
||||
double te_x1, tr_x1;
|
||||
double te_y1, tr_y1;
|
||||
double te_param[5],tr_param[5];
|
||||
int i=0;
|
||||
te_d1= tr_d1=0.21149573940783739;
|
||||
te_d2= tr_d2=0.046892057172954082;
|
||||
te_x1= tr_x1=-0.42272687517106533;
|
||||
te_y1= tr_y1=0.42211309121921659;
|
||||
//OpenBLAS
|
||||
BLASFUNC(drotmg)(&te_d1, &te_d2, &te_x1, &te_y1, te_param);
|
||||
//reference
|
||||
BLASFUNC_REF(drotmg)(&tr_d1, &tr_d2, &tr_x1, &tr_y1, tr_param);
|
||||
|
||||
CU_ASSERT_DOUBLE_EQUAL(te_d1, tr_d1, CHECK_EPS);
|
||||
CU_ASSERT_DOUBLE_EQUAL(te_d2, tr_d2, CHECK_EPS);
|
||||
CU_ASSERT_DOUBLE_EQUAL(te_x1, tr_x1, CHECK_EPS);
|
||||
CU_ASSERT_DOUBLE_EQUAL(te_y1, tr_y1, CHECK_EPS);
|
||||
|
||||
for(i=0; i<5; i++){
|
||||
CU_ASSERT_DOUBLE_EQUAL(te_param[i], tr_param[i], CHECK_EPS);
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue