Merge remote branch 'origin/x86' into loongson3a
This commit is contained in:
commit
6eb02bbb9c
|
@ -0,0 +1,27 @@
|
|||
OpenBLAS ChangeLog
|
||||
====================================================================
|
||||
Version 0.1 (in development)
|
||||
26-Feb-2011
|
||||
|
||||
common:
|
||||
* Fixed randomly SEGFAULT when nodemask==NULL with above Linux 2.6.34.
|
||||
Thank Mr.Ei-ji Nakama providing this patch. (Refs issue #12 on github)
|
||||
* Added DEBUG=1 rule in Makefile.rule to build debug version.
|
||||
* Disable compiling quad precision in reference BLAS library(netlib BLAS).
|
||||
* Added unit testcases in utest/ subdir. Used CUnit framework.
|
||||
* Supported OPENBLAS_* & GOTO_* environment variables (Pleas see README)
|
||||
* Imported GotoBLAS2 1.13 BSD version
|
||||
|
||||
x86/x86 64:
|
||||
* Modified ?axpy functions to return same netlib BLAS results
|
||||
when incx==0 or incy==0 (Refs issue #7 on github)
|
||||
* Modified ?swap functions to return same netlib BLAS results
|
||||
when incx==0 or incy==0 (Refs issue #6 on github)
|
||||
* Modified ?rot functions to return same netlib BLAS results
|
||||
when incx==0 or incy==0 (Refs issue #4 on github)
|
||||
* Detect Intel Westmere to use Nehalem codes.
|
||||
* Fixed a typo bug about compiling dynamic ARCH library.
|
||||
MIPS64:
|
||||
* Improve daxpy performance on ICT Loongson 3A.
|
||||
* Supported ICT Loongson 3A CPU (Refs issue #1 on github)
|
||||
====================================================================
|
|
@ -70,7 +70,7 @@ VERSION = 0.1
|
|||
# time out to improve performance. This number should be from 4 to 30
|
||||
# which corresponds to (1 << n) cycles. For example, if you set to 26,
|
||||
# thread will be running for (1 << 26) cycles(about 25ms on 3.0GHz
|
||||
# system). Also you can control this mumber by GOTO_THREAD_TIMEOUT
|
||||
# system). Also you can control this mumber by THREAD_TIMEOUT
|
||||
# CCOMMON_OPT += -DTHREAD_TIMEOUT=26
|
||||
|
||||
# Using special device driver for mapping physically contigous memory
|
||||
|
@ -89,7 +89,13 @@ VERSION = 0.1
|
|||
# UTEST_CHECK = 1
|
||||
|
||||
# Common Optimization Flag; -O2 is enough.
|
||||
# DEBUG = 1
|
||||
|
||||
ifeq ($(DEBUG), 1)
|
||||
COMMON_OPT += -g -DDEBUG
|
||||
else
|
||||
COMMON_OPT += -O2
|
||||
endif
|
||||
|
||||
# Profiling flags
|
||||
COMMON_PROF = -pg
|
||||
|
|
28
README
28
README
|
@ -4,6 +4,8 @@ OpenBLAS Readme
|
|||
OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version. OpenBLAS is an open source project supported by Lab of Parallel Software and Computational Science, ISCAS.(http://www.rdcps.ac.cn)
|
||||
|
||||
2.Intallation
|
||||
Download from project homepage. http://xianyi.github.com/OpenBLAS/
|
||||
Or,
|
||||
check out codes from git://github.com/xianyi/OpenBLAS.git
|
||||
1)Normal compile
|
||||
Please read GotoBLAS_02QuickInstall.txt or type "make"
|
||||
|
@ -15,23 +17,43 @@ examples:
|
|||
On X86 box, compile this library for loongson3a CPU.
|
||||
make BINARY=64 CC=mips64el-unknown-linux-gnu-gcc FC=mips64el-unknown-linux-gnu-gfortran HOSTCC=gcc TARGET=LOONGSON3A
|
||||
|
||||
3)Debug version
|
||||
make DEBUG=1
|
||||
|
||||
3.Support CPU & OS
|
||||
Please read GotoBLAS_01Readme.txt
|
||||
|
||||
Additional support CPU:
|
||||
x86_64:
|
||||
Intel Xeon 56xx (Westmere) //Used GotoBLAS2 Nehalem codes.
|
||||
MIPS64:
|
||||
ICT Loongson 3A //The initial version used GotoBLAS2 MIPS64 kernels. Thus, the performance is not good.
|
||||
|
||||
4.Usages
|
||||
Link with libopenblas.a or -lopenblas for shared library.
|
||||
|
||||
Set the number of threads. for example,
|
||||
4.1 Set the number of threads with environment variables. for example,
|
||||
export OPENBLAS_NUM_THREADS=4
|
||||
or
|
||||
export GOTO_NUM_THREADS=4
|
||||
or
|
||||
export OMP_NUM_THREADS=4
|
||||
OPENBLAS_NUM_THREAD is prior to OMP_NUM_THREADS.
|
||||
|
||||
The priorities are OPENBLAS_NUM_THREAD > GOTO_NUM_THREADS > OMP_NUM_THREADS.
|
||||
|
||||
4.2 Set the number of threads with calling functions. for example,
|
||||
void goto_set_num_threads(int num_threads);
|
||||
or
|
||||
void openblas_set_num_threads(int num_threads);
|
||||
|
||||
5.Report Bugs
|
||||
Please add a issue in https://github.com/xianyi/OpenBLAS/issues
|
||||
|
||||
6.To-Do List:
|
||||
Support ICT Loongson 3A CPU
|
||||
Optimization on ICT Loongson 3A CPU
|
||||
|
||||
7.Contact
|
||||
OpenBLAS users mailing list: http://list.rdcps.ac.cn/mailman/listinfo/openblas
|
||||
|
||||
8.ChangeLog
|
||||
Please see Changelog.txt to obtain the differences between GotoBLAS2 1.13 BSD version.
|
|
@ -68,8 +68,9 @@ extern long int syscall (long int __sysno, ...);
|
|||
static inline int my_mbind(void *addr, unsigned long len, int mode,
|
||||
unsigned long *nodemask, unsigned long maxnode,
|
||||
unsigned flags) {
|
||||
|
||||
return syscall(SYS_mbind, addr, len, mode, nodemask, maxnode, flags);
|
||||
//Fixed randomly SEGFAULT when nodemask==NULL with above Linux 2.6.34
|
||||
unsigned long null_nodemask=0;
|
||||
return syscall(SYS_mbind, addr, len, mode, &null_nodemask, maxnode, flags);
|
||||
}
|
||||
|
||||
static inline int my_set_mempolicy(int mode, const unsigned long *addr, unsigned long flag) {
|
||||
|
|
|
@ -43,4 +43,21 @@ void BLASFUNC_REF(csrot) (blasint *, float *, blasint *, float *, blasint *,
|
|||
void BLASFUNC_REF(zdrot) (blasint *, double *, blasint *, double *, blasint *, double *, double *);
|
||||
void BLASFUNC_REF(xqrot) (blasint *, xdouble *, blasint *, xdouble *, blasint *, xdouble *, xdouble *);
|
||||
|
||||
void BLASFUNC_REF(sswap) (blasint *, float *, blasint *, float *, blasint *);
|
||||
void BLASFUNC_REF(dswap) (blasint *, double *, blasint *, double *, blasint *);
|
||||
void BLASFUNC_REF(qswap) (blasint *, xdouble *, blasint *, xdouble *, blasint *);
|
||||
void BLASFUNC_REF(cswap) (blasint *, float *, blasint *, float *, blasint *);
|
||||
void BLASFUNC_REF(zswap) (blasint *, double *, blasint *, double *, blasint *);
|
||||
void BLASFUNC_REF(xswap) (blasint *, xdouble *, blasint *, xdouble *, blasint *);
|
||||
|
||||
void BLASFUNC_REF(saxpy) (blasint *, float *, float *, blasint *, float *, blasint *);
|
||||
void BLASFUNC_REF(daxpy) (blasint *, double *, double *, blasint *, double *, blasint *);
|
||||
void BLASFUNC_REF(caxpy) (blasint *, float *, float *, blasint *, float *, blasint *);
|
||||
void BLASFUNC_REF(zaxpy) (blasint *, double *, double *, blasint *, double *, blasint *);
|
||||
|
||||
float _Complex BLASFUNC_REF(cdotu) (blasint *, float *, blasint *, float *, blasint *);
|
||||
float _Complex BLASFUNC_REF(cdotc) (blasint *, float *, blasint *, float *, blasint *);
|
||||
double _Complex BLASFUNC_REF(zdotu) (blasint *, double *, blasint *, double *, blasint *);
|
||||
double _Complex BLASFUNC_REF(zdotc) (blasint *, double *, blasint *, double *, blasint *);
|
||||
|
||||
#endif
|
||||
|
|
19
cpuid_x86.c
19
cpuid_x86.c
|
@ -972,8 +972,15 @@ int get_cpuname(void){
|
|||
return CPUTYPE_ATOM;
|
||||
case 13:
|
||||
return CPUTYPE_DUNNINGTON;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
switch (model) {
|
||||
case 12:
|
||||
//Xeon Processor 5600 (Westmere-EP)
|
||||
return CPUTYPE_NEHALEM;
|
||||
}
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case 0x7:
|
||||
|
@ -1289,8 +1296,16 @@ int get_coretype(void){
|
|||
return CORE_ATOM;
|
||||
case 13:
|
||||
return CORE_DUNNINGTON;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
switch (model) {
|
||||
case 12:
|
||||
//Xeon Processor 5600 (Westmere-EP)
|
||||
return CORE_NEHALEM;
|
||||
}
|
||||
break;
|
||||
|
||||
}
|
||||
case 15:
|
||||
if (model <= 0x2) return CORE_NORTHWOOD;
|
||||
|
|
|
@ -297,7 +297,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
|
|||
printf("GEMM: SA .. %p SB .. %p\n", sa, sb);
|
||||
#endif
|
||||
|
||||
#ifdef DEBUG
|
||||
#ifdef TIMING
|
||||
innercost = 0;
|
||||
outercost = 0;
|
||||
kernelcost = 0;
|
||||
|
|
|
@ -278,7 +278,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
|
|||
// fprintf(stderr, "A = %p B = %p C = %p\n\tlda = %ld ldb = %ld ldc = %ld\n", a, b, c, lda, ldb, ldc);
|
||||
#endif
|
||||
|
||||
#ifdef DEBUG
|
||||
#ifdef TIMING
|
||||
innercost = 0;
|
||||
outercost = 0;
|
||||
kernelcost = 0;
|
||||
|
|
|
@ -520,12 +520,21 @@ int blas_thread_init(void){
|
|||
|
||||
p = getenv("THREAD_TIMEOUT");
|
||||
|
||||
if (p) {
|
||||
thread_timeout = atoi(p);
|
||||
if (thread_timeout < 4) thread_timeout = 4;
|
||||
if (thread_timeout > 30) thread_timeout = 30;
|
||||
thread_timeout = (1 << thread_timeout);
|
||||
}else{
|
||||
p = getenv("GOTO_THREAD_TIMEOUT");
|
||||
if (p) {
|
||||
thread_timeout = atoi(p);
|
||||
if (thread_timeout < 4) thread_timeout = 4;
|
||||
if (thread_timeout > 30) thread_timeout = 30;
|
||||
thread_timeout = (1 << thread_timeout);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
for(i = 0; i < blas_num_threads - 1; i++){
|
||||
|
||||
|
@ -790,6 +799,11 @@ void goto_set_num_threads(int num_threads) {
|
|||
|
||||
}
|
||||
|
||||
void openblas_set_num_threads(int num_threads) {
|
||||
goto_set_num_threads(num_threads);
|
||||
|
||||
}
|
||||
|
||||
/* Compatible function with pthread_create / join */
|
||||
|
||||
int gotoblas_pthread(int numthreads, void *function, void *args, int stride) {
|
||||
|
|
|
@ -121,6 +121,11 @@ static gotoblas_t *get_coretype(void){
|
|||
if ((model == 10) || (model == 11) || (model == 14) || (model == 15)) return &gotoblas_NEHALEM;
|
||||
if (model == 12) return &gotoblas_ATOM;
|
||||
return NULL;
|
||||
|
||||
case 2:
|
||||
//Intel Xeon Processor 5600 (Westmere-EP)
|
||||
if (model == 12) return &gotoblas_NEHALEM;
|
||||
return NULL;
|
||||
}
|
||||
case 0xf:
|
||||
if (model <= 0x2) return &gotoblas_NORTHWOOD;
|
||||
|
|
|
@ -92,7 +92,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#define SHARE_NAME "/sys/devices/system/cpu/cpu%d/cache/index%d/shared_cpu_map"
|
||||
#define NODE_DIR "/sys/devices/system/node"
|
||||
|
||||
#undef DEBUG
|
||||
//#undef DEBUG
|
||||
|
||||
/* Private variables */
|
||||
typedef struct {
|
||||
|
@ -581,6 +581,7 @@ void gotoblas_affinity_init(void) {
|
|||
numprocs = 0;
|
||||
#else
|
||||
numprocs = readenv("OPENBLAS_NUM_THREADS");
|
||||
if (numprocs == 0) numprocs = readenv("GOTO_NUM_THREADS");
|
||||
#endif
|
||||
|
||||
if (numprocs == 0) numprocs = readenv("OMP_NUM_THREADS");
|
||||
|
@ -666,7 +667,7 @@ void gotoblas_affinity_init(void) {
|
|||
|
||||
setup_mempolicy();
|
||||
|
||||
if (readenv("OPENBLAS_MAIN_FREE")) {
|
||||
if (readenv("OPENBLAS_MAIN_FREE") || readenv("GOTOBLAS_MAIN_FREE")) {
|
||||
sched_setaffinity(0, sizeof(cpu_orig_mask), &cpu_orig_mask[0]);
|
||||
}
|
||||
|
||||
|
|
|
@ -68,9 +68,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
/* or implied, of The University of Texas at Austin. */
|
||||
/*********************************************************************/
|
||||
|
||||
#undef DEBUG
|
||||
//#undef DEBUG
|
||||
|
||||
#include "common.h"
|
||||
#include <errno.h>
|
||||
|
||||
#ifdef OS_WINDOWS
|
||||
#define ALLOC_WINDOWS
|
||||
|
@ -231,6 +232,13 @@ int blas_get_cpu_number(void){
|
|||
p = getenv("OPENBLAS_NUM_THREADS");
|
||||
if (p) blas_goto_num = atoi(p);
|
||||
if (blas_goto_num < 0) blas_goto_num = 0;
|
||||
|
||||
if (blas_goto_num == 0) {
|
||||
p = getenv("GOTO_NUM_THREADS");
|
||||
if (p) blas_goto_num = atoi(p);
|
||||
if (blas_goto_num < 0) blas_goto_num = 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
blas_omp_num = 0;
|
||||
|
@ -381,8 +389,20 @@ static void *alloc_mmap(void *address){
|
|||
if (map_address != (void *)-1) {
|
||||
|
||||
#ifdef OS_LINUX
|
||||
#ifdef DEBUG
|
||||
int ret;
|
||||
ret=my_mbind(map_address, BUFFER_SIZE * SCALING, MPOL_PREFERRED, NULL, 0, 0);
|
||||
if(ret==-1){
|
||||
int errsv=errno;
|
||||
perror("alloc_mmap:");
|
||||
printf("error code=%d,\tmap_address=%lx\n",errsv,map_address);
|
||||
}
|
||||
|
||||
#else
|
||||
my_mbind(map_address, BUFFER_SIZE * SCALING, MPOL_PREFERRED, NULL, 0, 0);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
allocsize = DGEMM_P * DGEMM_Q * sizeof(double);
|
||||
|
||||
|
@ -979,7 +999,7 @@ void *blas_memory_alloc(int procpos){
|
|||
memory[position].addr = map_address;
|
||||
|
||||
#ifdef DEBUG
|
||||
printf(" Mapping Succeeded. %p(%d)\n", (void *)alloc_area[position], position);
|
||||
printf(" Mapping Succeeded. %p(%d)\n", (void *)memory[position].addr, position);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -1010,7 +1030,7 @@ void *blas_memory_alloc(int procpos){
|
|||
|
||||
#ifdef DEBUG
|
||||
printf("Mapped : %p %3d\n\n",
|
||||
(void *)alloc_area[position], position);
|
||||
(void *)memory[position].addr, position);
|
||||
#endif
|
||||
|
||||
return (void *)memory[position].addr;
|
||||
|
@ -1053,7 +1073,7 @@ void blas_memory_free(void *free_area){
|
|||
|
||||
#ifdef DEBUG
|
||||
for (position = 0; position < NUM_BUFFERS; position++)
|
||||
printf("%4ld %p : %d\n", position, alloc_area[position], alloc_used[position]);
|
||||
printf("%4ld %p : %d\n", position, memory[position].addr, memory[position].used);
|
||||
#endif
|
||||
|
||||
return;
|
||||
|
|
|
@ -81,6 +81,11 @@ void CNAME(blasint n, FLOAT alpha, FLOAT *x, blasint incx, FLOAT *y, blasint inc
|
|||
#ifdef SMP
|
||||
nthreads = num_cpu_avail(1);
|
||||
|
||||
//disable multi-thread when incx==0 or incy==0
|
||||
//In that case, the threads would be dependent.
|
||||
if (incx == 0 || incy == 0)
|
||||
nthreads = 1;
|
||||
|
||||
if (nthreads == 1) {
|
||||
#endif
|
||||
|
||||
|
|
|
@ -79,6 +79,11 @@ void CNAME(blasint n, FLOAT *x, blasint incx, FLOAT *y, blasint incy){
|
|||
#ifdef SMP
|
||||
nthreads = num_cpu_avail(1);
|
||||
|
||||
//disable multi-thread when incx==0 or incy==0
|
||||
//In that case, the threads would be dependent.
|
||||
if (incx == 0 || incy == 0)
|
||||
nthreads = 1;
|
||||
|
||||
if (nthreads == 1) {
|
||||
#endif
|
||||
|
||||
|
|
|
@ -83,6 +83,11 @@ void CNAME(blasint n, FLOAT *ALPHA, FLOAT *x, blasint incx, FLOAT *y, blasint in
|
|||
#ifdef SMP
|
||||
nthreads = num_cpu_avail(1);
|
||||
|
||||
//disable multi-thread when incx==0 or incy==0
|
||||
//In that case, the threads would be dependent.
|
||||
if (incx == 0 || incy == 0)
|
||||
nthreads = 1;
|
||||
|
||||
if (nthreads == 1) {
|
||||
#endif
|
||||
|
||||
|
|
|
@ -80,6 +80,11 @@ void CNAME(blasint n, FLOAT *x, blasint incx, FLOAT *y, blasint incy){
|
|||
#ifdef SMP
|
||||
nthreads = num_cpu_avail(1);
|
||||
|
||||
//disable multi-thread when incx==0 or incy==0
|
||||
//In that case, the threads would be dependent.
|
||||
if (incx == 0 || incy == 0)
|
||||
nthreads = 1;
|
||||
|
||||
if (nthreads == 1) {
|
||||
#endif
|
||||
|
||||
|
|
|
@ -1440,6 +1440,12 @@
|
|||
.L50:
|
||||
movl M, %eax
|
||||
movl Y, YY
|
||||
//If incx==0 || incy==0, avoid unloop.
|
||||
cmpl $0, INCX
|
||||
je .L56
|
||||
cmpl $0, INCY
|
||||
je .L56
|
||||
|
||||
sarl $3, %eax
|
||||
jle .L55
|
||||
ALIGN_3
|
||||
|
|
|
@ -698,6 +698,12 @@
|
|||
.L40:
|
||||
movl Y, YY
|
||||
movl M, %eax
|
||||
//If incx==0 || incy==0, avoid unloop.
|
||||
cmpl $0, INCX
|
||||
je .L46
|
||||
cmpl $0, INCY
|
||||
je .L46
|
||||
|
||||
sarl $3, %eax
|
||||
jle .L45
|
||||
ALIGN_3
|
||||
|
|
|
@ -859,6 +859,10 @@
|
|||
|
||||
.L50:
|
||||
movl N, I
|
||||
cmpl $0, INCX
|
||||
je .L56
|
||||
cmpl $0, INCY
|
||||
je .L56
|
||||
sarl $2, I
|
||||
jle .L55
|
||||
ALIGN_3
|
||||
|
|
|
@ -2857,6 +2857,11 @@
|
|||
unpcklps ALPHA_I, ALPHA_R
|
||||
unpcklps %xmm5, ALPHA_I
|
||||
#endif
|
||||
//If incx==0 || incy==0, avoid unloop and jump to end.
|
||||
cmpl $0, INCX
|
||||
je .L200
|
||||
cmpl $0, INCY
|
||||
je .L200
|
||||
|
||||
movl Y, YY
|
||||
|
||||
|
@ -3090,8 +3095,41 @@
|
|||
addps %xmm1, %xmm4
|
||||
|
||||
movsd %xmm4, (Y)
|
||||
jmp .L999
|
||||
ALIGN_3
|
||||
|
||||
.L200:
|
||||
movl M, %eax
|
||||
cmpl $0, %eax
|
||||
jle .L999
|
||||
ALIGN_3
|
||||
|
||||
.L201:
|
||||
movsd (X), %xmm0
|
||||
|
||||
#ifdef HAVE_SSE3
|
||||
movshdup %xmm0, %xmm1
|
||||
movsldup %xmm0, %xmm0
|
||||
#else
|
||||
movaps %xmm0, %xmm1
|
||||
shufps $0xa0, %xmm0, %xmm0
|
||||
shufps $0xf5, %xmm1, %xmm1
|
||||
#endif
|
||||
|
||||
mulps ALPHA_R, %xmm0
|
||||
mulps ALPHA_I, %xmm1
|
||||
|
||||
movsd (Y), %xmm4
|
||||
|
||||
addps %xmm0, %xmm4
|
||||
addps %xmm1, %xmm4
|
||||
|
||||
movsd %xmm4, (Y)
|
||||
|
||||
decl %eax
|
||||
jg .L201
|
||||
|
||||
ALIGN_3
|
||||
.L999:
|
||||
popl %ebp
|
||||
popl %ebx
|
||||
|
|
|
@ -1318,6 +1318,12 @@
|
|||
|
||||
movl Y, YY
|
||||
movl M, %eax
|
||||
//If incx==0 || incy==0, avoid unloop and jump to end.
|
||||
cmpl $0, INCX
|
||||
je .L58
|
||||
cmpl $0, INCY
|
||||
je .L58
|
||||
|
||||
sarl $2, %eax
|
||||
jle .L55
|
||||
|
||||
|
@ -1498,6 +1504,7 @@
|
|||
andl $1, %eax
|
||||
jle .L999
|
||||
|
||||
.L58:
|
||||
MOVDDUP( 0 * SIZE, X, %xmm0)
|
||||
MOVDDUP( 1 * SIZE, X, %xmm1)
|
||||
|
||||
|
@ -1510,6 +1517,10 @@
|
|||
|
||||
movlpd %xmm4, 0 * SIZE(YY)
|
||||
movhpd %xmm4, 1 * SIZE(YY)
|
||||
|
||||
|
||||
decl %eax
|
||||
jg .L58
|
||||
ALIGN_3
|
||||
|
||||
.L999:
|
||||
|
|
|
@ -1285,6 +1285,12 @@
|
|||
|
||||
.L50:
|
||||
movl N, I
|
||||
//if incx ==0 || incy==0 jump to the tail
|
||||
cmpl $0, INCX
|
||||
je .L56
|
||||
cmpl $0, INCY
|
||||
je .L56
|
||||
|
||||
sarl $2, I
|
||||
jle .L55
|
||||
ALIGN_3
|
||||
|
|
|
@ -1463,6 +1463,12 @@
|
|||
.L50:
|
||||
movq M, %rax
|
||||
movq Y, YY
|
||||
//If incx==0 || incy==0, avoid unloop.
|
||||
cmpq $0, INCX
|
||||
je .L56
|
||||
cmpq $0, INCY
|
||||
je .L56
|
||||
|
||||
sarq $3, %rax
|
||||
jle .L55
|
||||
ALIGN_3
|
||||
|
|
|
@ -805,6 +805,12 @@
|
|||
.L40:
|
||||
movq Y, YY
|
||||
movq M, %rax
|
||||
//If incx==0 || incy==0, avoid unloop.
|
||||
cmpq $0, INCX
|
||||
je .L46
|
||||
cmpq $0, INCY
|
||||
je .L46
|
||||
|
||||
sarq $3, %rax
|
||||
jle .L45
|
||||
ALIGN_3
|
||||
|
|
|
@ -887,6 +887,10 @@
|
|||
|
||||
.L50:
|
||||
movq N, %rax
|
||||
cmpq $0, INCX
|
||||
je .L56
|
||||
cmpq $0, INCY
|
||||
je .L56
|
||||
sarq $2, %rax
|
||||
jle .L55
|
||||
ALIGN_3
|
||||
|
|
|
@ -2893,6 +2893,12 @@
|
|||
unpcklps %xmm13, %xmm15
|
||||
#endif
|
||||
|
||||
//If incx==0 || incy==0, avoid unloop and jump to end.
|
||||
cmpq $0, INCX
|
||||
je .L200
|
||||
cmpq $0, INCY
|
||||
je .L200
|
||||
|
||||
movq Y, YY
|
||||
|
||||
movq M, %rax
|
||||
|
@ -3105,6 +3111,40 @@
|
|||
addps %xmm1, %xmm8
|
||||
|
||||
movsd %xmm8, (Y)
|
||||
jmp .L999
|
||||
ALIGN_3
|
||||
|
||||
.L200:
|
||||
movq M, %rax
|
||||
cmpq $0, %rax
|
||||
jle .L999
|
||||
ALIGN_3
|
||||
|
||||
.L201:
|
||||
movsd (X), %xmm0
|
||||
addq INCX, X
|
||||
|
||||
#ifdef HAVE_SSE3
|
||||
movshdup %xmm0, %xmm1
|
||||
movsldup %xmm0, %xmm0
|
||||
#else
|
||||
pshufd $0xf5, %xmm0, %xmm1
|
||||
shufps $0xa0, %xmm0, %xmm0
|
||||
#endif
|
||||
|
||||
mulps %xmm14, %xmm0
|
||||
mulps %xmm15, %xmm1
|
||||
|
||||
movsd (Y), %xmm8
|
||||
|
||||
addps %xmm0, %xmm8
|
||||
addps %xmm1, %xmm8
|
||||
|
||||
movsd %xmm8, (Y)
|
||||
addq INCY, Y
|
||||
|
||||
decq %rax
|
||||
jg .L201
|
||||
ALIGN_3
|
||||
|
||||
.L999:
|
||||
|
|
|
@ -1416,6 +1416,12 @@
|
|||
|
||||
movq Y, YY
|
||||
movq M, %rax
|
||||
//If incx==0 || incy==0, avoid unloop and jump to end.
|
||||
cmpq $0, INCX
|
||||
je .L58
|
||||
cmpq $0, INCY
|
||||
je .L58
|
||||
|
||||
sarq $3, %rax
|
||||
jle .L55
|
||||
|
||||
|
@ -1769,6 +1775,7 @@
|
|||
andq $1, %rax
|
||||
jle .L999
|
||||
|
||||
.L58:
|
||||
MOVDDUP( 0 * SIZE, X, %xmm0)
|
||||
MOVDDUP( 1 * SIZE, X, %xmm1)
|
||||
|
||||
|
@ -1781,6 +1788,9 @@
|
|||
|
||||
movlpd %xmm8, 0 * SIZE(YY)
|
||||
movhpd %xmm8, 1 * SIZE(YY)
|
||||
|
||||
decq %rax
|
||||
jg .L58
|
||||
ALIGN_3
|
||||
|
||||
.L999:
|
||||
|
|
|
@ -1523,6 +1523,10 @@
|
|||
|
||||
.L50:
|
||||
movq N, %rax
|
||||
cmpq $0, INCX
|
||||
je .L56
|
||||
cmpq $0, INCY
|
||||
je .L56
|
||||
sarq $2, %rax
|
||||
jle .L55
|
||||
ALIGN_3
|
||||
|
|
|
@ -138,7 +138,8 @@ DBLASOBJS += \
|
|||
dpotf2f.$(SUFFIX) dpotrff.$(SUFFIX) dtrti2f.$(SUFFIX) dtrtrif.$(SUFFIX) \
|
||||
dlaswpf.$(SUFFIX) dgetrsf.$(SUFFIX) dgesvf.$(SUFFIX) dpotrif.$(SUFFIX) \
|
||||
|
||||
QBLASOBJS += \
|
||||
QBLASOBJS +=
|
||||
# \
|
||||
qgetf2f.$(SUFFIX) qgetrff.$(SUFFIX) qlauu2f.$(SUFFIX) qlauumf.$(SUFFIX) \
|
||||
qpotf2f.$(SUFFIX) qpotrff.$(SUFFIX) qtrti2f.$(SUFFIX) qtrtrif.$(SUFFIX) \
|
||||
qlaswpf.$(SUFFIX) qgetrsf.$(SUFFIX) qgesvf.$(SUFFIX) qpotrif.$(SUFFIX) \
|
||||
|
@ -153,7 +154,8 @@ ZBLASOBJS += \
|
|||
zpotf2f.$(SUFFIX) zpotrff.$(SUFFIX) ztrti2f.$(SUFFIX) ztrtrif.$(SUFFIX) \
|
||||
zlaswpf.$(SUFFIX) zgetrsf.$(SUFFIX) zgesvf.$(SUFFIX) zpotrif.$(SUFFIX) \
|
||||
|
||||
XBLASOBJS += \
|
||||
XBLASOBJS +=
|
||||
# \
|
||||
xgetf2f.$(SUFFIX) xgetrff.$(SUFFIX) xlauu2f.$(SUFFIX) xlauumf.$(SUFFIX) \
|
||||
xpotf2f.$(SUFFIX) xpotrff.$(SUFFIX) xtrti2f.$(SUFFIX) xtrtrif.$(SUFFIX) \
|
||||
xlaswpf.$(SUFFIX) xgetrsf.$(SUFFIX) xgesvf.$(SUFFIX) xpotrif.$(SUFFIX) \
|
||||
|
|
|
@ -5,12 +5,12 @@ include $(TOPDIR)/Makefile.system
|
|||
TARGET=openblas_utest
|
||||
CUNIT_LIB=/usr/local/lib/libcunit.a
|
||||
|
||||
OBJS=main.o test_rot.o
|
||||
OBJS=main.o test_rot.o test_swap.o test_axpy.o test_dotu.o
|
||||
|
||||
all : run_test
|
||||
|
||||
$(TARGET): $(OBJS)
|
||||
$(CC) -o $@ $^ ../$(LIBNAME) $(CUNIT_LIB)
|
||||
$(CC) -o $@ $^ ../$(LIBNAME) $(CUNIT_LIB) $(EXTRALIB)
|
||||
|
||||
run_test: $(TARGET)
|
||||
./$(TARGET)
|
||||
|
|
|
@ -36,9 +36,24 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#include <common.h>
|
||||
|
||||
#define CHECK_EPS 0.0002
|
||||
#define CHECK_EPS 0.00002
|
||||
|
||||
//Testcase list
|
||||
void test_drot_incx_0(void);
|
||||
void test_drot_inc_0(void);
|
||||
void test_srot_inc_0(void);
|
||||
void test_zdrot_inc_0(void);
|
||||
void test_csrot_inc_0(void);
|
||||
|
||||
void test_dswap_inc_0(void);
|
||||
void test_zswap_inc_0(void);
|
||||
void test_sswap_inc_0(void);
|
||||
void test_cswap_inc_0(void);
|
||||
|
||||
void test_daxpy_inc_0(void);
|
||||
void test_zaxpy_inc_0(void);
|
||||
void test_saxpy_inc_0(void);
|
||||
void test_caxpy_inc_0(void);
|
||||
|
||||
void test_zdotu_n_1(void);
|
||||
|
||||
#endif
|
||||
|
|
20
utest/main.c
20
utest/main.c
|
@ -33,12 +33,26 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
|
||||
#include "common_utest.h"
|
||||
#include <CUnit/Basic.h>
|
||||
|
||||
CU_TestInfo test_level1[]={
|
||||
{"Testing drot when incx & incy == 0",test_drot_incx_0},
|
||||
{"Testing srot when incx || incy == 0",test_srot_inc_0},
|
||||
{"Testing drot when incx || incy == 0",test_drot_inc_0},
|
||||
{"Testing csrot when incx || incy == 0",test_csrot_inc_0},
|
||||
{"Testing zdrot when incx || incy == 0",test_zdrot_inc_0},
|
||||
|
||||
{"Testing sswap with incx || incy == 0",test_sswap_inc_0},
|
||||
{"Testing dswap with incx || incy == 0",test_dswap_inc_0},
|
||||
{"Testing cswap with incx || incy == 0",test_cswap_inc_0},
|
||||
{"Testing zswap with incx || incy == 0",test_zswap_inc_0},
|
||||
|
||||
{"Testing saxpy with incx || incy == 0",test_saxpy_inc_0},
|
||||
{"Testing daxpy with incx || incy == 0",test_daxpy_inc_0},
|
||||
{"Testing caxpy with incx || incy == 0",test_caxpy_inc_0},
|
||||
{"Testing zaxpy with incx || incy == 0",test_zaxpy_inc_0},
|
||||
|
||||
{"Testing zdotu with n == 1",test_zdotu_n_1},
|
||||
CU_TEST_INFO_NULL,
|
||||
};
|
||||
|
||||
|
@ -64,6 +78,8 @@ int main()
|
|||
|
||||
|
||||
|
||||
printf("Seting OK\n");
|
||||
fflush(stdout);
|
||||
|
||||
/* Run all tests using the CUnit Basic interface */
|
||||
CU_basic_set_mode(CU_BRM_VERBOSE);
|
||||
|
|
|
@ -0,0 +1,117 @@
|
|||
/*****************************************************************************
|
||||
Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the ISCAS nor the names of its contributors may
|
||||
be used to endorse or promote products derived from this software
|
||||
without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
**********************************************************************************/
|
||||
|
||||
#include "common_utest.h"
|
||||
|
||||
void test_daxpy_inc_0(void)
|
||||
{
|
||||
int i;
|
||||
int N=8,incX=0,incY=0;
|
||||
double a=0.25;
|
||||
double x1[]={1.0,3.0,5.0,7.0,1.0,3.0,5.0,7.0};
|
||||
double y1[]={2.0,4.0,6.0,8.0,2.0,4.0,6.0,8.0};
|
||||
double x2[]={1.0,3.0,5.0,7.0,1.0,3.0,5.0,7.0};
|
||||
double y2[]={2.0,4.0,6.0,8.0,2.0,4.0,6.0,8.0};
|
||||
|
||||
//OpenBLAS
|
||||
BLASFUNC(daxpy)(&N,&a,x1,&incX,y1,&incY);
|
||||
//reference
|
||||
BLASFUNC_REF(daxpy)(&N,&a,x2,&incX,y2,&incY);
|
||||
|
||||
for(i=0; i<N; i++){
|
||||
CU_ASSERT_DOUBLE_EQUAL(x1[i], x2[i], CHECK_EPS);
|
||||
CU_ASSERT_DOUBLE_EQUAL(y1[i], y2[i], CHECK_EPS);
|
||||
}
|
||||
}
|
||||
|
||||
void test_zaxpy_inc_0(void)
|
||||
{
|
||||
int i;
|
||||
int N=4,incX=0,incY=0;
|
||||
double a[2]={0.25,0.5};
|
||||
double x1[]={1.0,3.0,5.0,7.0,1.0,3.0,5.0,7.0};
|
||||
double y1[]={2.0,4.0,6.0,8.0,2.0,4.0,6.0,8.0};
|
||||
double x2[]={1.0,3.0,5.0,7.0,1.0,3.0,5.0,7.0};
|
||||
double y2[]={2.0,4.0,6.0,8.0,2.0,4.0,6.0,8.0};
|
||||
|
||||
//OpenBLAS
|
||||
BLASFUNC(zaxpy)(&N,a,x1,&incX,y1,&incY);
|
||||
//reference
|
||||
BLASFUNC_REF(zaxpy)(&N,a,x2,&incX,y2,&incY);
|
||||
|
||||
for(i=0; i<2*N; i++){
|
||||
CU_ASSERT_DOUBLE_EQUAL(x1[i], x2[i], CHECK_EPS);
|
||||
CU_ASSERT_DOUBLE_EQUAL(y1[i], y2[i], CHECK_EPS);
|
||||
}
|
||||
}
|
||||
|
||||
void test_saxpy_inc_0(void)
|
||||
{
|
||||
int i;
|
||||
int N=8,incX=0,incY=0;
|
||||
float a=0.25;
|
||||
float x1[]={1.0,3.0,5.0,7.0,1.0,3.0,5.0,7.0};
|
||||
float y1[]={2.0,4.0,6.0,8.0,2.0,4.0,6.0,8.0};
|
||||
float x2[]={1.0,3.0,5.0,7.0,1.0,3.0,5.0,7.0};
|
||||
float y2[]={2.0,4.0,6.0,8.0,2.0,4.0,6.0,8.0};
|
||||
|
||||
//OpenBLAS
|
||||
BLASFUNC(saxpy)(&N,&a,x1,&incX,y1,&incY);
|
||||
//reference
|
||||
BLASFUNC_REF(saxpy)(&N,&a,x2,&incX,y2,&incY);
|
||||
|
||||
for(i=0; i<N; i++){
|
||||
CU_ASSERT_DOUBLE_EQUAL(x1[i], x2[i], CHECK_EPS);
|
||||
CU_ASSERT_DOUBLE_EQUAL(y1[i], y2[i], CHECK_EPS);
|
||||
}
|
||||
}
|
||||
|
||||
void test_caxpy_inc_0(void)
|
||||
{
|
||||
int i;
|
||||
int N=4,incX=0,incY=0;
|
||||
float a[2]={0.25,0.5};
|
||||
float x1[]={1.0,3.0,5.0,7.0,1.0,3.0,5.0,7.0};
|
||||
float y1[]={2.0,4.0,6.0,8.0,2.0,4.0,6.0,8.0};
|
||||
float x2[]={1.0,3.0,5.0,7.0,1.0,3.0,5.0,7.0};
|
||||
float y2[]={2.0,4.0,6.0,8.0,2.0,4.0,6.0,8.0};
|
||||
|
||||
//OpenBLAS
|
||||
BLASFUNC(caxpy)(&N,a,x1,&incX,y1,&incY);
|
||||
//reference
|
||||
BLASFUNC_REF(caxpy)(&N,a,x2,&incX,y2,&incY);
|
||||
|
||||
for(i=0; i<2*N; i++){
|
||||
CU_ASSERT_DOUBLE_EQUAL(x1[i], x2[i], CHECK_EPS);
|
||||
CU_ASSERT_DOUBLE_EQUAL(y1[i], y2[i], CHECK_EPS);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,56 @@
|
|||
/*****************************************************************************
|
||||
Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the ISCAS nor the names of its contributors may
|
||||
be used to endorse or promote products derived from this software
|
||||
without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
**********************************************************************************/
|
||||
|
||||
#include "common_utest.h"
|
||||
#include <complex.h>
|
||||
|
||||
void test_zdotu_n_1(void)
|
||||
{
|
||||
int N=1,incX=1,incY=1;
|
||||
double x1[]={1.0,1.0};
|
||||
double y1[]={1.0,2.0};
|
||||
double x2[]={1.0,1.0};
|
||||
double y2[]={1.0,2.0};
|
||||
double _Complex result1=0.0;
|
||||
double _Complex result2=0.0;
|
||||
//OpenBLAS
|
||||
result1=BLASFUNC(zdotu)(&N,x1,&incX,y1,&incY);
|
||||
//reference
|
||||
result2=BLASFUNC_REF(zdotu)(&N,x2,&incX,y2,&incY);
|
||||
|
||||
CU_ASSERT_DOUBLE_EQUAL(creal(result1), creal(result2), CHECK_EPS);
|
||||
CU_ASSERT_DOUBLE_EQUAL(cimag(result1), cimag(result2), CHECK_EPS);
|
||||
// printf("\%lf,%lf\n",creal(result1),cimag(result1));
|
||||
|
||||
}
|
||||
|
||||
|
|
@ -32,9 +32,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#include "common_utest.h"
|
||||
|
||||
void test_drot_incx_0(void)
|
||||
void test_drot_inc_0(void)
|
||||
{
|
||||
int i;
|
||||
int i=0;
|
||||
int N=4,incX=0,incY=0;
|
||||
double c=0.25,s=0.5;
|
||||
double x1[]={1.0,3.0,5.0,7.0};
|
||||
|
@ -43,12 +43,75 @@ void test_drot_incx_0(void)
|
|||
double y2[]={2.0,4.0,6.0,8.0};
|
||||
|
||||
//OpenBLAS
|
||||
drot_(&N,x1,&incX,y1,&incY,&c,&s);
|
||||
BLASFUNC(drot)(&N,x1,&incX,y1,&incY,&c,&s);
|
||||
//reference
|
||||
drotf_(&N,x2,&incX,y2,&incY,&c,&s);
|
||||
BLASFUNC_REF(drot)(&N,x2,&incX,y2,&incY,&c,&s);
|
||||
|
||||
for(i=0; i<N; i++){
|
||||
CU_ASSERT_DOUBLE_EQUAL(x1[i], x2[i], CHECK_EPS);
|
||||
CU_ASSERT_DOUBLE_EQUAL(y1[i], y2[i], CHECK_EPS);
|
||||
}
|
||||
}
|
||||
|
||||
void test_zdrot_inc_0(void)
|
||||
{
|
||||
int i=0;
|
||||
int N=4,incX=0,incY=0;
|
||||
double c=0.25,s=0.5;
|
||||
double x1[]={1.0,3.0,5.0,7.0,1.0,3.0,5.0,7.0};
|
||||
double y1[]={2.0,4.0,6.0,8.0,2.0,4.0,6.0,8.0};
|
||||
double x2[]={1.0,3.0,5.0,7.0,1.0,3.0,5.0,7.0};
|
||||
double y2[]={2.0,4.0,6.0,8.0,2.0,4.0,6.0,8.0};
|
||||
|
||||
//OpenBLAS
|
||||
BLASFUNC(zdrot)(&N,x1,&incX,y1,&incY,&c,&s);
|
||||
//reference
|
||||
BLASFUNC_REF(zdrot)(&N,x2,&incX,y2,&incY,&c,&s);
|
||||
|
||||
for(i=0; i<2*N; i++){
|
||||
CU_ASSERT_DOUBLE_EQUAL(x1[i], x2[i], CHECK_EPS);
|
||||
CU_ASSERT_DOUBLE_EQUAL(y1[i], y2[i], CHECK_EPS);
|
||||
}
|
||||
}
|
||||
|
||||
void test_srot_inc_0(void)
|
||||
{
|
||||
int i=0;
|
||||
int N=4,incX=0,incY=0;
|
||||
float c=0.25,s=0.5;
|
||||
float x1[]={1.0,3.0,5.0,7.0};
|
||||
float y1[]={2.0,4.0,6.0,8.0};
|
||||
float x2[]={1.0,3.0,5.0,7.0};
|
||||
float y2[]={2.0,4.0,6.0,8.0};
|
||||
|
||||
//OpenBLAS
|
||||
BLASFUNC(srot)(&N,x1,&incX,y1,&incY,&c,&s);
|
||||
//reference
|
||||
BLASFUNC_REF(srot)(&N,x2,&incX,y2,&incY,&c,&s);
|
||||
|
||||
for(i=0; i<N; i++){
|
||||
CU_ASSERT_DOUBLE_EQUAL(x1[i], x2[i], CHECK_EPS);
|
||||
CU_ASSERT_DOUBLE_EQUAL(y1[i], y2[i], CHECK_EPS);
|
||||
}
|
||||
}
|
||||
|
||||
void test_csrot_inc_0(void)
|
||||
{
|
||||
int i=0;
|
||||
int N=4,incX=0,incY=0;
|
||||
float c=0.25,s=0.5;
|
||||
float x1[]={1.0,3.0,5.0,7.0,1.0,3.0,5.0,7.0};
|
||||
float y1[]={2.0,4.0,6.0,8.0,2.0,4.0,6.0,8.0};
|
||||
float x2[]={1.0,3.0,5.0,7.0,1.0,3.0,5.0,7.0};
|
||||
float y2[]={2.0,4.0,6.0,8.0,2.0,4.0,6.0,8.0};
|
||||
|
||||
//OpenBLAS
|
||||
BLASFUNC(csrot)(&N,x1,&incX,y1,&incY,&c,&s);
|
||||
//reference
|
||||
BLASFUNC_REF(csrot)(&N,x2,&incX,y2,&incY,&c,&s);
|
||||
|
||||
for(i=0; i<2*N; i++){
|
||||
CU_ASSERT_DOUBLE_EQUAL(x1[i], x2[i], CHECK_EPS);
|
||||
CU_ASSERT_DOUBLE_EQUAL(y1[i], y2[i], CHECK_EPS);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,113 @@
|
|||
/*****************************************************************************
|
||||
Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the ISCAS nor the names of its contributors may
|
||||
be used to endorse or promote products derived from this software
|
||||
without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
**********************************************************************************/
|
||||
|
||||
#include "common_utest.h"
|
||||
|
||||
void test_dswap_inc_0(void)
|
||||
{
|
||||
int i=0;
|
||||
int N=4,incX=0,incY=0;
|
||||
double x1[]={1.0,3.0,5.0,7.0};
|
||||
double y1[]={2.0,4.0,6.0,8.0};
|
||||
double x2[]={1.0,3.0,5.0,7.0};
|
||||
double y2[]={2.0,4.0,6.0,8.0};
|
||||
|
||||
//OpenBLAS
|
||||
BLASFUNC(dswap)(&N,x1,&incX,y1,&incY);
|
||||
//reference
|
||||
BLASFUNC_REF(dswap)(&N,x2,&incX,y2,&incY);
|
||||
|
||||
for(i=0; i<N; i++){
|
||||
CU_ASSERT_DOUBLE_EQUAL(x1[i], x2[i], CHECK_EPS);
|
||||
CU_ASSERT_DOUBLE_EQUAL(y1[i], y2[i], CHECK_EPS);
|
||||
}
|
||||
}
|
||||
|
||||
void test_zswap_inc_0(void)
|
||||
{
|
||||
int i=0;
|
||||
int N=4,incX=0,incY=0;
|
||||
double x1[]={1.0,3.0,5.0,7.0,1.0,3.0,5.0,7.0};
|
||||
double y1[]={2.0,4.0,6.0,8.0,2.0,4.0,6.0,8.0};
|
||||
double x2[]={1.0,3.0,5.0,7.0,1.0,3.0,5.0,7.0};
|
||||
double y2[]={2.0,4.0,6.0,8.0,2.0,4.0,6.0,8.0};
|
||||
|
||||
//OpenBLAS
|
||||
BLASFUNC(zswap)(&N,x1,&incX,y1,&incY);
|
||||
//reference
|
||||
BLASFUNC_REF(zswap)(&N,x2,&incX,y2,&incY);
|
||||
|
||||
for(i=0; i<2*N; i++){
|
||||
CU_ASSERT_DOUBLE_EQUAL(x1[i], x2[i], CHECK_EPS);
|
||||
CU_ASSERT_DOUBLE_EQUAL(y1[i], y2[i], CHECK_EPS);
|
||||
}
|
||||
}
|
||||
|
||||
void test_sswap_inc_0(void)
|
||||
{
|
||||
int i=0;
|
||||
int N=4,incX=0,incY=0;
|
||||
float x1[]={1.0,3.0,5.0,7.0};
|
||||
float y1[]={2.0,4.0,6.0,8.0};
|
||||
float x2[]={1.0,3.0,5.0,7.0};
|
||||
float y2[]={2.0,4.0,6.0,8.0};
|
||||
|
||||
//OpenBLAS
|
||||
BLASFUNC(sswap)(&N,x1,&incX,y1,&incY);
|
||||
//reference
|
||||
BLASFUNC_REF(sswap)(&N,x2,&incX,y2,&incY);
|
||||
|
||||
for(i=0; i<N; i++){
|
||||
CU_ASSERT_DOUBLE_EQUAL(x1[i], x2[i], CHECK_EPS);
|
||||
CU_ASSERT_DOUBLE_EQUAL(y1[i], y2[i], CHECK_EPS);
|
||||
}
|
||||
}
|
||||
|
||||
void test_cswap_inc_0(void)
|
||||
{
|
||||
int i=0;
|
||||
int N=4,incX=0,incY=0;
|
||||
float x1[]={1.0,3.0,5.0,7.0,1.0,3.0,5.0,7.0};
|
||||
float y1[]={2.0,4.0,6.0,8.0,2.0,4.0,6.0,8.0};
|
||||
float x2[]={1.0,3.0,5.0,7.0,1.0,3.0,5.0,7.0};
|
||||
float y2[]={2.0,4.0,6.0,8.0,2.0,4.0,6.0,8.0};
|
||||
|
||||
//OpenBLAS
|
||||
BLASFUNC(cswap)(&N,x1,&incX,y1,&incY);
|
||||
//reference
|
||||
BLASFUNC_REF(cswap)(&N,x2,&incX,y2,&incY);
|
||||
|
||||
for(i=0; i<2*N; i++){
|
||||
CU_ASSERT_DOUBLE_EQUAL(x1[i], x2[i], CHECK_EPS);
|
||||
CU_ASSERT_DOUBLE_EQUAL(y1[i], y2[i], CHECK_EPS);
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue