Merge remote branch 'origin/x86' into loongson3a

This commit is contained in:
Xianyi Zhang 2011-03-02 13:52:05 +08:00
commit 6eb02bbb9c
36 changed files with 704 additions and 33 deletions

27
Changelog.txt Normal file
View File

@ -0,0 +1,27 @@
OpenBLAS ChangeLog
====================================================================
Version 0.1 (in development)
26-Feb-2011
common:
* Fixed randomly SEGFAULT when nodemask==NULL with above Linux 2.6.34.
Thank Mr.Ei-ji Nakama providing this patch. (Refs issue #12 on github)
* Added DEBUG=1 rule in Makefile.rule to build debug version.
* Disable compiling quad precision in reference BLAS library(netlib BLAS).
* Added unit testcases in utest/ subdir. Used CUnit framework.
* Supported OPENBLAS_* & GOTO_* environment variables (Pleas see README)
* Imported GotoBLAS2 1.13 BSD version
x86/x86 64:
* Modified ?axpy functions to return same netlib BLAS results
when incx==0 or incy==0 (Refs issue #7 on github)
* Modified ?swap functions to return same netlib BLAS results
when incx==0 or incy==0 (Refs issue #6 on github)
* Modified ?rot functions to return same netlib BLAS results
when incx==0 or incy==0 (Refs issue #4 on github)
* Detect Intel Westmere to use Nehalem codes.
* Fixed a typo bug about compiling dynamic ARCH library.
MIPS64:
* Improve daxpy performance on ICT Loongson 3A.
* Supported ICT Loongson 3A CPU (Refs issue #1 on github)
====================================================================

View File

@ -70,7 +70,7 @@ VERSION = 0.1
# time out to improve performance. This number should be from 4 to 30 # time out to improve performance. This number should be from 4 to 30
# which corresponds to (1 << n) cycles. For example, if you set to 26, # which corresponds to (1 << n) cycles. For example, if you set to 26,
# thread will be running for (1 << 26) cycles(about 25ms on 3.0GHz # thread will be running for (1 << 26) cycles(about 25ms on 3.0GHz
# system). Also you can control this mumber by GOTO_THREAD_TIMEOUT # system). Also you can control this mumber by THREAD_TIMEOUT
# CCOMMON_OPT += -DTHREAD_TIMEOUT=26 # CCOMMON_OPT += -DTHREAD_TIMEOUT=26
# Using special device driver for mapping physically contigous memory # Using special device driver for mapping physically contigous memory
@ -89,7 +89,13 @@ VERSION = 0.1
# UTEST_CHECK = 1 # UTEST_CHECK = 1
# Common Optimization Flag; -O2 is enough. # Common Optimization Flag; -O2 is enough.
# DEBUG = 1
ifeq ($(DEBUG), 1)
COMMON_OPT += -g -DDEBUG
else
COMMON_OPT += -O2 COMMON_OPT += -O2
endif
# Profiling flags # Profiling flags
COMMON_PROF = -pg COMMON_PROF = -pg

28
README
View File

@ -4,6 +4,8 @@ OpenBLAS Readme
OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version. OpenBLAS is an open source project supported by Lab of Parallel Software and Computational Science, ISCAS.(http://www.rdcps.ac.cn) OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version. OpenBLAS is an open source project supported by Lab of Parallel Software and Computational Science, ISCAS.(http://www.rdcps.ac.cn)
2.Intallation 2.Intallation
Download from project homepage. http://xianyi.github.com/OpenBLAS/
Or,
check out codes from git://github.com/xianyi/OpenBLAS.git check out codes from git://github.com/xianyi/OpenBLAS.git
1)Normal compile 1)Normal compile
Please read GotoBLAS_02QuickInstall.txt or type "make" Please read GotoBLAS_02QuickInstall.txt or type "make"
@ -15,23 +17,43 @@ examples:
On X86 box, compile this library for loongson3a CPU. On X86 box, compile this library for loongson3a CPU.
make BINARY=64 CC=mips64el-unknown-linux-gnu-gcc FC=mips64el-unknown-linux-gnu-gfortran HOSTCC=gcc TARGET=LOONGSON3A make BINARY=64 CC=mips64el-unknown-linux-gnu-gcc FC=mips64el-unknown-linux-gnu-gfortran HOSTCC=gcc TARGET=LOONGSON3A
3)Debug version
make DEBUG=1
3.Support CPU & OS 3.Support CPU & OS
Please read GotoBLAS_01Readme.txt Please read GotoBLAS_01Readme.txt
Additional support CPU:
x86_64:
Intel Xeon 56xx (Westmere) //Used GotoBLAS2 Nehalem codes.
MIPS64:
ICT Loongson 3A //The initial version used GotoBLAS2 MIPS64 kernels. Thus, the performance is not good.
4.Usages 4.Usages
Link with libopenblas.a or -lopenblas for shared library. Link with libopenblas.a or -lopenblas for shared library.
Set the number of threads. for example, 4.1 Set the number of threads with environment variables. for example,
export OPENBLAS_NUM_THREADS=4 export OPENBLAS_NUM_THREADS=4
or or
export GOTO_NUM_THREADS=4
or
export OMP_NUM_THREADS=4 export OMP_NUM_THREADS=4
OPENBLAS_NUM_THREAD is prior to OMP_NUM_THREADS.
The priorities are OPENBLAS_NUM_THREAD > GOTO_NUM_THREADS > OMP_NUM_THREADS.
4.2 Set the number of threads with calling functions. for example,
void goto_set_num_threads(int num_threads);
or
void openblas_set_num_threads(int num_threads);
5.Report Bugs 5.Report Bugs
Please add a issue in https://github.com/xianyi/OpenBLAS/issues Please add a issue in https://github.com/xianyi/OpenBLAS/issues
6.To-Do List: 6.To-Do List:
Support ICT Loongson 3A CPU Optimization on ICT Loongson 3A CPU
7.Contact 7.Contact
OpenBLAS users mailing list: http://list.rdcps.ac.cn/mailman/listinfo/openblas OpenBLAS users mailing list: http://list.rdcps.ac.cn/mailman/listinfo/openblas
8.ChangeLog
Please see Changelog.txt to obtain the differences between GotoBLAS2 1.13 BSD version.

View File

@ -68,8 +68,9 @@ extern long int syscall (long int __sysno, ...);
static inline int my_mbind(void *addr, unsigned long len, int mode, static inline int my_mbind(void *addr, unsigned long len, int mode,
unsigned long *nodemask, unsigned long maxnode, unsigned long *nodemask, unsigned long maxnode,
unsigned flags) { unsigned flags) {
//Fixed randomly SEGFAULT when nodemask==NULL with above Linux 2.6.34
return syscall(SYS_mbind, addr, len, mode, nodemask, maxnode, flags); unsigned long null_nodemask=0;
return syscall(SYS_mbind, addr, len, mode, &null_nodemask, maxnode, flags);
} }
static inline int my_set_mempolicy(int mode, const unsigned long *addr, unsigned long flag) { static inline int my_set_mempolicy(int mode, const unsigned long *addr, unsigned long flag) {

View File

@ -43,4 +43,21 @@ void BLASFUNC_REF(csrot) (blasint *, float *, blasint *, float *, blasint *,
void BLASFUNC_REF(zdrot) (blasint *, double *, blasint *, double *, blasint *, double *, double *); void BLASFUNC_REF(zdrot) (blasint *, double *, blasint *, double *, blasint *, double *, double *);
void BLASFUNC_REF(xqrot) (blasint *, xdouble *, blasint *, xdouble *, blasint *, xdouble *, xdouble *); void BLASFUNC_REF(xqrot) (blasint *, xdouble *, blasint *, xdouble *, blasint *, xdouble *, xdouble *);
void BLASFUNC_REF(sswap) (blasint *, float *, blasint *, float *, blasint *);
void BLASFUNC_REF(dswap) (blasint *, double *, blasint *, double *, blasint *);
void BLASFUNC_REF(qswap) (blasint *, xdouble *, blasint *, xdouble *, blasint *);
void BLASFUNC_REF(cswap) (blasint *, float *, blasint *, float *, blasint *);
void BLASFUNC_REF(zswap) (blasint *, double *, blasint *, double *, blasint *);
void BLASFUNC_REF(xswap) (blasint *, xdouble *, blasint *, xdouble *, blasint *);
void BLASFUNC_REF(saxpy) (blasint *, float *, float *, blasint *, float *, blasint *);
void BLASFUNC_REF(daxpy) (blasint *, double *, double *, blasint *, double *, blasint *);
void BLASFUNC_REF(caxpy) (blasint *, float *, float *, blasint *, float *, blasint *);
void BLASFUNC_REF(zaxpy) (blasint *, double *, double *, blasint *, double *, blasint *);
float _Complex BLASFUNC_REF(cdotu) (blasint *, float *, blasint *, float *, blasint *);
float _Complex BLASFUNC_REF(cdotc) (blasint *, float *, blasint *, float *, blasint *);
double _Complex BLASFUNC_REF(zdotu) (blasint *, double *, blasint *, double *, blasint *);
double _Complex BLASFUNC_REF(zdotc) (blasint *, double *, blasint *, double *, blasint *);
#endif #endif

View File

@ -972,8 +972,15 @@ int get_cpuname(void){
return CPUTYPE_ATOM; return CPUTYPE_ATOM;
case 13: case 13:
return CPUTYPE_DUNNINGTON; return CPUTYPE_DUNNINGTON;
break;
} }
break;
case 2:
switch (model) {
case 12:
//Xeon Processor 5600 (Westmere-EP)
return CPUTYPE_NEHALEM;
}
break;
} }
break; break;
case 0x7: case 0x7:
@ -1289,8 +1296,16 @@ int get_coretype(void){
return CORE_ATOM; return CORE_ATOM;
case 13: case 13:
return CORE_DUNNINGTON; return CORE_DUNNINGTON;
break;
} }
break;
case 2:
switch (model) {
case 12:
//Xeon Processor 5600 (Westmere-EP)
return CORE_NEHALEM;
}
break;
} }
case 15: case 15:
if (model <= 0x2) return CORE_NORTHWOOD; if (model <= 0x2) return CORE_NORTHWOOD;

View File

@ -297,7 +297,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
printf("GEMM: SA .. %p SB .. %p\n", sa, sb); printf("GEMM: SA .. %p SB .. %p\n", sa, sb);
#endif #endif
#ifdef DEBUG #ifdef TIMING
innercost = 0; innercost = 0;
outercost = 0; outercost = 0;
kernelcost = 0; kernelcost = 0;

View File

@ -278,7 +278,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
// fprintf(stderr, "A = %p B = %p C = %p\n\tlda = %ld ldb = %ld ldc = %ld\n", a, b, c, lda, ldb, ldc); // fprintf(stderr, "A = %p B = %p C = %p\n\tlda = %ld ldb = %ld ldc = %ld\n", a, b, c, lda, ldb, ldc);
#endif #endif
#ifdef DEBUG #ifdef TIMING
innercost = 0; innercost = 0;
outercost = 0; outercost = 0;
kernelcost = 0; kernelcost = 0;

View File

@ -520,12 +520,21 @@ int blas_thread_init(void){
p = getenv("THREAD_TIMEOUT"); p = getenv("THREAD_TIMEOUT");
if (p) {
thread_timeout = atoi(p);
if (thread_timeout < 4) thread_timeout = 4;
if (thread_timeout > 30) thread_timeout = 30;
thread_timeout = (1 << thread_timeout);
}else{
p = getenv("GOTO_THREAD_TIMEOUT");
if (p) { if (p) {
thread_timeout = atoi(p); thread_timeout = atoi(p);
if (thread_timeout < 4) thread_timeout = 4; if (thread_timeout < 4) thread_timeout = 4;
if (thread_timeout > 30) thread_timeout = 30; if (thread_timeout > 30) thread_timeout = 30;
thread_timeout = (1 << thread_timeout); thread_timeout = (1 << thread_timeout);
} }
}
for(i = 0; i < blas_num_threads - 1; i++){ for(i = 0; i < blas_num_threads - 1; i++){
@ -790,6 +799,11 @@ void goto_set_num_threads(int num_threads) {
} }
void openblas_set_num_threads(int num_threads) {
goto_set_num_threads(num_threads);
}
/* Compatible function with pthread_create / join */ /* Compatible function with pthread_create / join */
int gotoblas_pthread(int numthreads, void *function, void *args, int stride) { int gotoblas_pthread(int numthreads, void *function, void *args, int stride) {

View File

@ -121,6 +121,11 @@ static gotoblas_t *get_coretype(void){
if ((model == 10) || (model == 11) || (model == 14) || (model == 15)) return &gotoblas_NEHALEM; if ((model == 10) || (model == 11) || (model == 14) || (model == 15)) return &gotoblas_NEHALEM;
if (model == 12) return &gotoblas_ATOM; if (model == 12) return &gotoblas_ATOM;
return NULL; return NULL;
case 2:
//Intel Xeon Processor 5600 (Westmere-EP)
if (model == 12) return &gotoblas_NEHALEM;
return NULL;
} }
case 0xf: case 0xf:
if (model <= 0x2) return &gotoblas_NORTHWOOD; if (model <= 0x2) return &gotoblas_NORTHWOOD;

View File

@ -92,7 +92,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define SHARE_NAME "/sys/devices/system/cpu/cpu%d/cache/index%d/shared_cpu_map" #define SHARE_NAME "/sys/devices/system/cpu/cpu%d/cache/index%d/shared_cpu_map"
#define NODE_DIR "/sys/devices/system/node" #define NODE_DIR "/sys/devices/system/node"
#undef DEBUG //#undef DEBUG
/* Private variables */ /* Private variables */
typedef struct { typedef struct {
@ -581,6 +581,7 @@ void gotoblas_affinity_init(void) {
numprocs = 0; numprocs = 0;
#else #else
numprocs = readenv("OPENBLAS_NUM_THREADS"); numprocs = readenv("OPENBLAS_NUM_THREADS");
if (numprocs == 0) numprocs = readenv("GOTO_NUM_THREADS");
#endif #endif
if (numprocs == 0) numprocs = readenv("OMP_NUM_THREADS"); if (numprocs == 0) numprocs = readenv("OMP_NUM_THREADS");
@ -666,7 +667,7 @@ void gotoblas_affinity_init(void) {
setup_mempolicy(); setup_mempolicy();
if (readenv("OPENBLAS_MAIN_FREE")) { if (readenv("OPENBLAS_MAIN_FREE") || readenv("GOTOBLAS_MAIN_FREE")) {
sched_setaffinity(0, sizeof(cpu_orig_mask), &cpu_orig_mask[0]); sched_setaffinity(0, sizeof(cpu_orig_mask), &cpu_orig_mask[0]);
} }

View File

@ -68,9 +68,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
/* or implied, of The University of Texas at Austin. */ /* or implied, of The University of Texas at Austin. */
/*********************************************************************/ /*********************************************************************/
#undef DEBUG //#undef DEBUG
#include "common.h" #include "common.h"
#include <errno.h>
#ifdef OS_WINDOWS #ifdef OS_WINDOWS
#define ALLOC_WINDOWS #define ALLOC_WINDOWS
@ -231,6 +232,13 @@ int blas_get_cpu_number(void){
p = getenv("OPENBLAS_NUM_THREADS"); p = getenv("OPENBLAS_NUM_THREADS");
if (p) blas_goto_num = atoi(p); if (p) blas_goto_num = atoi(p);
if (blas_goto_num < 0) blas_goto_num = 0; if (blas_goto_num < 0) blas_goto_num = 0;
if (blas_goto_num == 0) {
p = getenv("GOTO_NUM_THREADS");
if (p) blas_goto_num = atoi(p);
if (blas_goto_num < 0) blas_goto_num = 0;
}
#endif #endif
blas_omp_num = 0; blas_omp_num = 0;
@ -381,8 +389,20 @@ static void *alloc_mmap(void *address){
if (map_address != (void *)-1) { if (map_address != (void *)-1) {
#ifdef OS_LINUX #ifdef OS_LINUX
#ifdef DEBUG
int ret;
ret=my_mbind(map_address, BUFFER_SIZE * SCALING, MPOL_PREFERRED, NULL, 0, 0);
if(ret==-1){
int errsv=errno;
perror("alloc_mmap:");
printf("error code=%d,\tmap_address=%lx\n",errsv,map_address);
}
#else
my_mbind(map_address, BUFFER_SIZE * SCALING, MPOL_PREFERRED, NULL, 0, 0); my_mbind(map_address, BUFFER_SIZE * SCALING, MPOL_PREFERRED, NULL, 0, 0);
#endif #endif
#endif
allocsize = DGEMM_P * DGEMM_Q * sizeof(double); allocsize = DGEMM_P * DGEMM_Q * sizeof(double);
@ -979,7 +999,7 @@ void *blas_memory_alloc(int procpos){
memory[position].addr = map_address; memory[position].addr = map_address;
#ifdef DEBUG #ifdef DEBUG
printf(" Mapping Succeeded. %p(%d)\n", (void *)alloc_area[position], position); printf(" Mapping Succeeded. %p(%d)\n", (void *)memory[position].addr, position);
#endif #endif
} }
@ -1010,7 +1030,7 @@ void *blas_memory_alloc(int procpos){
#ifdef DEBUG #ifdef DEBUG
printf("Mapped : %p %3d\n\n", printf("Mapped : %p %3d\n\n",
(void *)alloc_area[position], position); (void *)memory[position].addr, position);
#endif #endif
return (void *)memory[position].addr; return (void *)memory[position].addr;
@ -1053,7 +1073,7 @@ void blas_memory_free(void *free_area){
#ifdef DEBUG #ifdef DEBUG
for (position = 0; position < NUM_BUFFERS; position++) for (position = 0; position < NUM_BUFFERS; position++)
printf("%4ld %p : %d\n", position, alloc_area[position], alloc_used[position]); printf("%4ld %p : %d\n", position, memory[position].addr, memory[position].used);
#endif #endif
return; return;

View File

@ -81,6 +81,11 @@ void CNAME(blasint n, FLOAT alpha, FLOAT *x, blasint incx, FLOAT *y, blasint inc
#ifdef SMP #ifdef SMP
nthreads = num_cpu_avail(1); nthreads = num_cpu_avail(1);
//disable multi-thread when incx==0 or incy==0
//In that case, the threads would be dependent.
if (incx == 0 || incy == 0)
nthreads = 1;
if (nthreads == 1) { if (nthreads == 1) {
#endif #endif

View File

@ -79,6 +79,11 @@ void CNAME(blasint n, FLOAT *x, blasint incx, FLOAT *y, blasint incy){
#ifdef SMP #ifdef SMP
nthreads = num_cpu_avail(1); nthreads = num_cpu_avail(1);
//disable multi-thread when incx==0 or incy==0
//In that case, the threads would be dependent.
if (incx == 0 || incy == 0)
nthreads = 1;
if (nthreads == 1) { if (nthreads == 1) {
#endif #endif

View File

@ -83,6 +83,11 @@ void CNAME(blasint n, FLOAT *ALPHA, FLOAT *x, blasint incx, FLOAT *y, blasint in
#ifdef SMP #ifdef SMP
nthreads = num_cpu_avail(1); nthreads = num_cpu_avail(1);
//disable multi-thread when incx==0 or incy==0
//In that case, the threads would be dependent.
if (incx == 0 || incy == 0)
nthreads = 1;
if (nthreads == 1) { if (nthreads == 1) {
#endif #endif

View File

@ -80,6 +80,11 @@ void CNAME(blasint n, FLOAT *x, blasint incx, FLOAT *y, blasint incy){
#ifdef SMP #ifdef SMP
nthreads = num_cpu_avail(1); nthreads = num_cpu_avail(1);
//disable multi-thread when incx==0 or incy==0
//In that case, the threads would be dependent.
if (incx == 0 || incy == 0)
nthreads = 1;
if (nthreads == 1) { if (nthreads == 1) {
#endif #endif

View File

@ -1440,6 +1440,12 @@
.L50: .L50:
movl M, %eax movl M, %eax
movl Y, YY movl Y, YY
//If incx==0 || incy==0, avoid unloop.
cmpl $0, INCX
je .L56
cmpl $0, INCY
je .L56
sarl $3, %eax sarl $3, %eax
jle .L55 jle .L55
ALIGN_3 ALIGN_3

View File

@ -698,6 +698,12 @@
.L40: .L40:
movl Y, YY movl Y, YY
movl M, %eax movl M, %eax
//If incx==0 || incy==0, avoid unloop.
cmpl $0, INCX
je .L46
cmpl $0, INCY
je .L46
sarl $3, %eax sarl $3, %eax
jle .L45 jle .L45
ALIGN_3 ALIGN_3

View File

@ -859,6 +859,10 @@
.L50: .L50:
movl N, I movl N, I
cmpl $0, INCX
je .L56
cmpl $0, INCY
je .L56
sarl $2, I sarl $2, I
jle .L55 jle .L55
ALIGN_3 ALIGN_3

View File

@ -2857,6 +2857,11 @@
unpcklps ALPHA_I, ALPHA_R unpcklps ALPHA_I, ALPHA_R
unpcklps %xmm5, ALPHA_I unpcklps %xmm5, ALPHA_I
#endif #endif
//If incx==0 || incy==0, avoid unloop and jump to end.
cmpl $0, INCX
je .L200
cmpl $0, INCY
je .L200
movl Y, YY movl Y, YY
@ -3090,8 +3095,41 @@
addps %xmm1, %xmm4 addps %xmm1, %xmm4
movsd %xmm4, (Y) movsd %xmm4, (Y)
jmp .L999
ALIGN_3 ALIGN_3
.L200:
movl M, %eax
cmpl $0, %eax
jle .L999
ALIGN_3
.L201:
movsd (X), %xmm0
#ifdef HAVE_SSE3
movshdup %xmm0, %xmm1
movsldup %xmm0, %xmm0
#else
movaps %xmm0, %xmm1
shufps $0xa0, %xmm0, %xmm0
shufps $0xf5, %xmm1, %xmm1
#endif
mulps ALPHA_R, %xmm0
mulps ALPHA_I, %xmm1
movsd (Y), %xmm4
addps %xmm0, %xmm4
addps %xmm1, %xmm4
movsd %xmm4, (Y)
decl %eax
jg .L201
ALIGN_3
.L999: .L999:
popl %ebp popl %ebp
popl %ebx popl %ebx

View File

@ -1318,6 +1318,12 @@
movl Y, YY movl Y, YY
movl M, %eax movl M, %eax
//If incx==0 || incy==0, avoid unloop and jump to end.
cmpl $0, INCX
je .L58
cmpl $0, INCY
je .L58
sarl $2, %eax sarl $2, %eax
jle .L55 jle .L55
@ -1498,6 +1504,7 @@
andl $1, %eax andl $1, %eax
jle .L999 jle .L999
.L58:
MOVDDUP( 0 * SIZE, X, %xmm0) MOVDDUP( 0 * SIZE, X, %xmm0)
MOVDDUP( 1 * SIZE, X, %xmm1) MOVDDUP( 1 * SIZE, X, %xmm1)
@ -1510,6 +1517,10 @@
movlpd %xmm4, 0 * SIZE(YY) movlpd %xmm4, 0 * SIZE(YY)
movhpd %xmm4, 1 * SIZE(YY) movhpd %xmm4, 1 * SIZE(YY)
decl %eax
jg .L58
ALIGN_3 ALIGN_3
.L999: .L999:

View File

@ -1285,6 +1285,12 @@
.L50: .L50:
movl N, I movl N, I
//if incx ==0 || incy==0 jump to the tail
cmpl $0, INCX
je .L56
cmpl $0, INCY
je .L56
sarl $2, I sarl $2, I
jle .L55 jle .L55
ALIGN_3 ALIGN_3

View File

@ -1463,6 +1463,12 @@
.L50: .L50:
movq M, %rax movq M, %rax
movq Y, YY movq Y, YY
//If incx==0 || incy==0, avoid unloop.
cmpq $0, INCX
je .L56
cmpq $0, INCY
je .L56
sarq $3, %rax sarq $3, %rax
jle .L55 jle .L55
ALIGN_3 ALIGN_3

View File

@ -805,6 +805,12 @@
.L40: .L40:
movq Y, YY movq Y, YY
movq M, %rax movq M, %rax
//If incx==0 || incy==0, avoid unloop.
cmpq $0, INCX
je .L46
cmpq $0, INCY
je .L46
sarq $3, %rax sarq $3, %rax
jle .L45 jle .L45
ALIGN_3 ALIGN_3

View File

@ -887,6 +887,10 @@
.L50: .L50:
movq N, %rax movq N, %rax
cmpq $0, INCX
je .L56
cmpq $0, INCY
je .L56
sarq $2, %rax sarq $2, %rax
jle .L55 jle .L55
ALIGN_3 ALIGN_3

View File

@ -2893,6 +2893,12 @@
unpcklps %xmm13, %xmm15 unpcklps %xmm13, %xmm15
#endif #endif
//If incx==0 || incy==0, avoid unloop and jump to end.
cmpq $0, INCX
je .L200
cmpq $0, INCY
je .L200
movq Y, YY movq Y, YY
movq M, %rax movq M, %rax
@ -3105,6 +3111,40 @@
addps %xmm1, %xmm8 addps %xmm1, %xmm8
movsd %xmm8, (Y) movsd %xmm8, (Y)
jmp .L999
ALIGN_3
.L200:
movq M, %rax
cmpq $0, %rax
jle .L999
ALIGN_3
.L201:
movsd (X), %xmm0
addq INCX, X
#ifdef HAVE_SSE3
movshdup %xmm0, %xmm1
movsldup %xmm0, %xmm0
#else
pshufd $0xf5, %xmm0, %xmm1
shufps $0xa0, %xmm0, %xmm0
#endif
mulps %xmm14, %xmm0
mulps %xmm15, %xmm1
movsd (Y), %xmm8
addps %xmm0, %xmm8
addps %xmm1, %xmm8
movsd %xmm8, (Y)
addq INCY, Y
decq %rax
jg .L201
ALIGN_3 ALIGN_3
.L999: .L999:

View File

@ -1416,6 +1416,12 @@
movq Y, YY movq Y, YY
movq M, %rax movq M, %rax
//If incx==0 || incy==0, avoid unloop and jump to end.
cmpq $0, INCX
je .L58
cmpq $0, INCY
je .L58
sarq $3, %rax sarq $3, %rax
jle .L55 jle .L55
@ -1769,6 +1775,7 @@
andq $1, %rax andq $1, %rax
jle .L999 jle .L999
.L58:
MOVDDUP( 0 * SIZE, X, %xmm0) MOVDDUP( 0 * SIZE, X, %xmm0)
MOVDDUP( 1 * SIZE, X, %xmm1) MOVDDUP( 1 * SIZE, X, %xmm1)
@ -1781,6 +1788,9 @@
movlpd %xmm8, 0 * SIZE(YY) movlpd %xmm8, 0 * SIZE(YY)
movhpd %xmm8, 1 * SIZE(YY) movhpd %xmm8, 1 * SIZE(YY)
decq %rax
jg .L58
ALIGN_3 ALIGN_3
.L999: .L999:

View File

@ -1523,6 +1523,10 @@
.L50: .L50:
movq N, %rax movq N, %rax
cmpq $0, INCX
je .L56
cmpq $0, INCY
je .L56
sarq $2, %rax sarq $2, %rax
jle .L55 jle .L55
ALIGN_3 ALIGN_3

View File

@ -138,7 +138,8 @@ DBLASOBJS += \
dpotf2f.$(SUFFIX) dpotrff.$(SUFFIX) dtrti2f.$(SUFFIX) dtrtrif.$(SUFFIX) \ dpotf2f.$(SUFFIX) dpotrff.$(SUFFIX) dtrti2f.$(SUFFIX) dtrtrif.$(SUFFIX) \
dlaswpf.$(SUFFIX) dgetrsf.$(SUFFIX) dgesvf.$(SUFFIX) dpotrif.$(SUFFIX) \ dlaswpf.$(SUFFIX) dgetrsf.$(SUFFIX) dgesvf.$(SUFFIX) dpotrif.$(SUFFIX) \
QBLASOBJS += \ QBLASOBJS +=
# \
qgetf2f.$(SUFFIX) qgetrff.$(SUFFIX) qlauu2f.$(SUFFIX) qlauumf.$(SUFFIX) \ qgetf2f.$(SUFFIX) qgetrff.$(SUFFIX) qlauu2f.$(SUFFIX) qlauumf.$(SUFFIX) \
qpotf2f.$(SUFFIX) qpotrff.$(SUFFIX) qtrti2f.$(SUFFIX) qtrtrif.$(SUFFIX) \ qpotf2f.$(SUFFIX) qpotrff.$(SUFFIX) qtrti2f.$(SUFFIX) qtrtrif.$(SUFFIX) \
qlaswpf.$(SUFFIX) qgetrsf.$(SUFFIX) qgesvf.$(SUFFIX) qpotrif.$(SUFFIX) \ qlaswpf.$(SUFFIX) qgetrsf.$(SUFFIX) qgesvf.$(SUFFIX) qpotrif.$(SUFFIX) \
@ -153,7 +154,8 @@ ZBLASOBJS += \
zpotf2f.$(SUFFIX) zpotrff.$(SUFFIX) ztrti2f.$(SUFFIX) ztrtrif.$(SUFFIX) \ zpotf2f.$(SUFFIX) zpotrff.$(SUFFIX) ztrti2f.$(SUFFIX) ztrtrif.$(SUFFIX) \
zlaswpf.$(SUFFIX) zgetrsf.$(SUFFIX) zgesvf.$(SUFFIX) zpotrif.$(SUFFIX) \ zlaswpf.$(SUFFIX) zgetrsf.$(SUFFIX) zgesvf.$(SUFFIX) zpotrif.$(SUFFIX) \
XBLASOBJS += \ XBLASOBJS +=
# \
xgetf2f.$(SUFFIX) xgetrff.$(SUFFIX) xlauu2f.$(SUFFIX) xlauumf.$(SUFFIX) \ xgetf2f.$(SUFFIX) xgetrff.$(SUFFIX) xlauu2f.$(SUFFIX) xlauumf.$(SUFFIX) \
xpotf2f.$(SUFFIX) xpotrff.$(SUFFIX) xtrti2f.$(SUFFIX) xtrtrif.$(SUFFIX) \ xpotf2f.$(SUFFIX) xpotrff.$(SUFFIX) xtrti2f.$(SUFFIX) xtrtrif.$(SUFFIX) \
xlaswpf.$(SUFFIX) xgetrsf.$(SUFFIX) xgesvf.$(SUFFIX) xpotrif.$(SUFFIX) \ xlaswpf.$(SUFFIX) xgetrsf.$(SUFFIX) xgesvf.$(SUFFIX) xpotrif.$(SUFFIX) \

View File

@ -5,12 +5,12 @@ include $(TOPDIR)/Makefile.system
TARGET=openblas_utest TARGET=openblas_utest
CUNIT_LIB=/usr/local/lib/libcunit.a CUNIT_LIB=/usr/local/lib/libcunit.a
OBJS=main.o test_rot.o OBJS=main.o test_rot.o test_swap.o test_axpy.o test_dotu.o
all : run_test all : run_test
$(TARGET): $(OBJS) $(TARGET): $(OBJS)
$(CC) -o $@ $^ ../$(LIBNAME) $(CUNIT_LIB) $(CC) -o $@ $^ ../$(LIBNAME) $(CUNIT_LIB) $(EXTRALIB)
run_test: $(TARGET) run_test: $(TARGET)
./$(TARGET) ./$(TARGET)

View File

@ -36,9 +36,24 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <common.h> #include <common.h>
#define CHECK_EPS 0.0002 #define CHECK_EPS 0.00002
//Testcase list //Testcase list
void test_drot_incx_0(void); void test_drot_inc_0(void);
void test_srot_inc_0(void);
void test_zdrot_inc_0(void);
void test_csrot_inc_0(void);
void test_dswap_inc_0(void);
void test_zswap_inc_0(void);
void test_sswap_inc_0(void);
void test_cswap_inc_0(void);
void test_daxpy_inc_0(void);
void test_zaxpy_inc_0(void);
void test_saxpy_inc_0(void);
void test_caxpy_inc_0(void);
void test_zdotu_n_1(void);
#endif #endif

View File

@ -33,12 +33,26 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <stdio.h> #include <stdio.h>
#include <string.h> #include <string.h>
#include "common_utest.h" #include "common_utest.h"
#include <CUnit/Basic.h> #include <CUnit/Basic.h>
CU_TestInfo test_level1[]={ CU_TestInfo test_level1[]={
{"Testing drot when incx & incy == 0",test_drot_incx_0}, {"Testing srot when incx || incy == 0",test_srot_inc_0},
{"Testing drot when incx || incy == 0",test_drot_inc_0},
{"Testing csrot when incx || incy == 0",test_csrot_inc_0},
{"Testing zdrot when incx || incy == 0",test_zdrot_inc_0},
{"Testing sswap with incx || incy == 0",test_sswap_inc_0},
{"Testing dswap with incx || incy == 0",test_dswap_inc_0},
{"Testing cswap with incx || incy == 0",test_cswap_inc_0},
{"Testing zswap with incx || incy == 0",test_zswap_inc_0},
{"Testing saxpy with incx || incy == 0",test_saxpy_inc_0},
{"Testing daxpy with incx || incy == 0",test_daxpy_inc_0},
{"Testing caxpy with incx || incy == 0",test_caxpy_inc_0},
{"Testing zaxpy with incx || incy == 0",test_zaxpy_inc_0},
{"Testing zdotu with n == 1",test_zdotu_n_1},
CU_TEST_INFO_NULL, CU_TEST_INFO_NULL,
}; };
@ -64,6 +78,8 @@ int main()
printf("Seting OK\n");
fflush(stdout);
/* Run all tests using the CUnit Basic interface */ /* Run all tests using the CUnit Basic interface */
CU_basic_set_mode(CU_BRM_VERBOSE); CU_basic_set_mode(CU_BRM_VERBOSE);

117
utest/test_axpy.c Normal file
View File

@ -0,0 +1,117 @@
/*****************************************************************************
Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the ISCAS nor the names of its contributors may
be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************************/
#include "common_utest.h"
void test_daxpy_inc_0(void)
{
int i;
int N=8,incX=0,incY=0;
double a=0.25;
double x1[]={1.0,3.0,5.0,7.0,1.0,3.0,5.0,7.0};
double y1[]={2.0,4.0,6.0,8.0,2.0,4.0,6.0,8.0};
double x2[]={1.0,3.0,5.0,7.0,1.0,3.0,5.0,7.0};
double y2[]={2.0,4.0,6.0,8.0,2.0,4.0,6.0,8.0};
//OpenBLAS
BLASFUNC(daxpy)(&N,&a,x1,&incX,y1,&incY);
//reference
BLASFUNC_REF(daxpy)(&N,&a,x2,&incX,y2,&incY);
for(i=0; i<N; i++){
CU_ASSERT_DOUBLE_EQUAL(x1[i], x2[i], CHECK_EPS);
CU_ASSERT_DOUBLE_EQUAL(y1[i], y2[i], CHECK_EPS);
}
}
void test_zaxpy_inc_0(void)
{
int i;
int N=4,incX=0,incY=0;
double a[2]={0.25,0.5};
double x1[]={1.0,3.0,5.0,7.0,1.0,3.0,5.0,7.0};
double y1[]={2.0,4.0,6.0,8.0,2.0,4.0,6.0,8.0};
double x2[]={1.0,3.0,5.0,7.0,1.0,3.0,5.0,7.0};
double y2[]={2.0,4.0,6.0,8.0,2.0,4.0,6.0,8.0};
//OpenBLAS
BLASFUNC(zaxpy)(&N,a,x1,&incX,y1,&incY);
//reference
BLASFUNC_REF(zaxpy)(&N,a,x2,&incX,y2,&incY);
for(i=0; i<2*N; i++){
CU_ASSERT_DOUBLE_EQUAL(x1[i], x2[i], CHECK_EPS);
CU_ASSERT_DOUBLE_EQUAL(y1[i], y2[i], CHECK_EPS);
}
}
void test_saxpy_inc_0(void)
{
int i;
int N=8,incX=0,incY=0;
float a=0.25;
float x1[]={1.0,3.0,5.0,7.0,1.0,3.0,5.0,7.0};
float y1[]={2.0,4.0,6.0,8.0,2.0,4.0,6.0,8.0};
float x2[]={1.0,3.0,5.0,7.0,1.0,3.0,5.0,7.0};
float y2[]={2.0,4.0,6.0,8.0,2.0,4.0,6.0,8.0};
//OpenBLAS
BLASFUNC(saxpy)(&N,&a,x1,&incX,y1,&incY);
//reference
BLASFUNC_REF(saxpy)(&N,&a,x2,&incX,y2,&incY);
for(i=0; i<N; i++){
CU_ASSERT_DOUBLE_EQUAL(x1[i], x2[i], CHECK_EPS);
CU_ASSERT_DOUBLE_EQUAL(y1[i], y2[i], CHECK_EPS);
}
}
void test_caxpy_inc_0(void)
{
int i;
int N=4,incX=0,incY=0;
float a[2]={0.25,0.5};
float x1[]={1.0,3.0,5.0,7.0,1.0,3.0,5.0,7.0};
float y1[]={2.0,4.0,6.0,8.0,2.0,4.0,6.0,8.0};
float x2[]={1.0,3.0,5.0,7.0,1.0,3.0,5.0,7.0};
float y2[]={2.0,4.0,6.0,8.0,2.0,4.0,6.0,8.0};
//OpenBLAS
BLASFUNC(caxpy)(&N,a,x1,&incX,y1,&incY);
//reference
BLASFUNC_REF(caxpy)(&N,a,x2,&incX,y2,&incY);
for(i=0; i<2*N; i++){
CU_ASSERT_DOUBLE_EQUAL(x1[i], x2[i], CHECK_EPS);
CU_ASSERT_DOUBLE_EQUAL(y1[i], y2[i], CHECK_EPS);
}
}

56
utest/test_dotu.c Normal file
View File

@ -0,0 +1,56 @@
/*****************************************************************************
Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the ISCAS nor the names of its contributors may
be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************************/
#include "common_utest.h"
#include <complex.h>
void test_zdotu_n_1(void)
{
int N=1,incX=1,incY=1;
double x1[]={1.0,1.0};
double y1[]={1.0,2.0};
double x2[]={1.0,1.0};
double y2[]={1.0,2.0};
double _Complex result1=0.0;
double _Complex result2=0.0;
//OpenBLAS
result1=BLASFUNC(zdotu)(&N,x1,&incX,y1,&incY);
//reference
result2=BLASFUNC_REF(zdotu)(&N,x2,&incX,y2,&incY);
CU_ASSERT_DOUBLE_EQUAL(creal(result1), creal(result2), CHECK_EPS);
CU_ASSERT_DOUBLE_EQUAL(cimag(result1), cimag(result2), CHECK_EPS);
// printf("\%lf,%lf\n",creal(result1),cimag(result1));
}

View File

@ -32,9 +32,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "common_utest.h" #include "common_utest.h"
void test_drot_incx_0(void) void test_drot_inc_0(void)
{ {
int i; int i=0;
int N=4,incX=0,incY=0; int N=4,incX=0,incY=0;
double c=0.25,s=0.5; double c=0.25,s=0.5;
double x1[]={1.0,3.0,5.0,7.0}; double x1[]={1.0,3.0,5.0,7.0};
@ -43,12 +43,75 @@ void test_drot_incx_0(void)
double y2[]={2.0,4.0,6.0,8.0}; double y2[]={2.0,4.0,6.0,8.0};
//OpenBLAS //OpenBLAS
drot_(&N,x1,&incX,y1,&incY,&c,&s); BLASFUNC(drot)(&N,x1,&incX,y1,&incY,&c,&s);
//reference //reference
drotf_(&N,x2,&incX,y2,&incY,&c,&s); BLASFUNC_REF(drot)(&N,x2,&incX,y2,&incY,&c,&s);
for(i=0; i<N; i++){ for(i=0; i<N; i++){
CU_ASSERT_DOUBLE_EQUAL(x1[i], x2[i], CHECK_EPS); CU_ASSERT_DOUBLE_EQUAL(x1[i], x2[i], CHECK_EPS);
CU_ASSERT_DOUBLE_EQUAL(y1[i], y2[i], CHECK_EPS); CU_ASSERT_DOUBLE_EQUAL(y1[i], y2[i], CHECK_EPS);
} }
} }
void test_zdrot_inc_0(void)
{
int i=0;
int N=4,incX=0,incY=0;
double c=0.25,s=0.5;
double x1[]={1.0,3.0,5.0,7.0,1.0,3.0,5.0,7.0};
double y1[]={2.0,4.0,6.0,8.0,2.0,4.0,6.0,8.0};
double x2[]={1.0,3.0,5.0,7.0,1.0,3.0,5.0,7.0};
double y2[]={2.0,4.0,6.0,8.0,2.0,4.0,6.0,8.0};
//OpenBLAS
BLASFUNC(zdrot)(&N,x1,&incX,y1,&incY,&c,&s);
//reference
BLASFUNC_REF(zdrot)(&N,x2,&incX,y2,&incY,&c,&s);
for(i=0; i<2*N; i++){
CU_ASSERT_DOUBLE_EQUAL(x1[i], x2[i], CHECK_EPS);
CU_ASSERT_DOUBLE_EQUAL(y1[i], y2[i], CHECK_EPS);
}
}
void test_srot_inc_0(void)
{
int i=0;
int N=4,incX=0,incY=0;
float c=0.25,s=0.5;
float x1[]={1.0,3.0,5.0,7.0};
float y1[]={2.0,4.0,6.0,8.0};
float x2[]={1.0,3.0,5.0,7.0};
float y2[]={2.0,4.0,6.0,8.0};
//OpenBLAS
BLASFUNC(srot)(&N,x1,&incX,y1,&incY,&c,&s);
//reference
BLASFUNC_REF(srot)(&N,x2,&incX,y2,&incY,&c,&s);
for(i=0; i<N; i++){
CU_ASSERT_DOUBLE_EQUAL(x1[i], x2[i], CHECK_EPS);
CU_ASSERT_DOUBLE_EQUAL(y1[i], y2[i], CHECK_EPS);
}
}
void test_csrot_inc_0(void)
{
int i=0;
int N=4,incX=0,incY=0;
float c=0.25,s=0.5;
float x1[]={1.0,3.0,5.0,7.0,1.0,3.0,5.0,7.0};
float y1[]={2.0,4.0,6.0,8.0,2.0,4.0,6.0,8.0};
float x2[]={1.0,3.0,5.0,7.0,1.0,3.0,5.0,7.0};
float y2[]={2.0,4.0,6.0,8.0,2.0,4.0,6.0,8.0};
//OpenBLAS
BLASFUNC(csrot)(&N,x1,&incX,y1,&incY,&c,&s);
//reference
BLASFUNC_REF(csrot)(&N,x2,&incX,y2,&incY,&c,&s);
for(i=0; i<2*N; i++){
CU_ASSERT_DOUBLE_EQUAL(x1[i], x2[i], CHECK_EPS);
CU_ASSERT_DOUBLE_EQUAL(y1[i], y2[i], CHECK_EPS);
}
}

113
utest/test_swap.c Normal file
View File

@ -0,0 +1,113 @@
/*****************************************************************************
Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the ISCAS nor the names of its contributors may
be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************************/
#include "common_utest.h"
void test_dswap_inc_0(void)
{
int i=0;
int N=4,incX=0,incY=0;
double x1[]={1.0,3.0,5.0,7.0};
double y1[]={2.0,4.0,6.0,8.0};
double x2[]={1.0,3.0,5.0,7.0};
double y2[]={2.0,4.0,6.0,8.0};
//OpenBLAS
BLASFUNC(dswap)(&N,x1,&incX,y1,&incY);
//reference
BLASFUNC_REF(dswap)(&N,x2,&incX,y2,&incY);
for(i=0; i<N; i++){
CU_ASSERT_DOUBLE_EQUAL(x1[i], x2[i], CHECK_EPS);
CU_ASSERT_DOUBLE_EQUAL(y1[i], y2[i], CHECK_EPS);
}
}
void test_zswap_inc_0(void)
{
int i=0;
int N=4,incX=0,incY=0;
double x1[]={1.0,3.0,5.0,7.0,1.0,3.0,5.0,7.0};
double y1[]={2.0,4.0,6.0,8.0,2.0,4.0,6.0,8.0};
double x2[]={1.0,3.0,5.0,7.0,1.0,3.0,5.0,7.0};
double y2[]={2.0,4.0,6.0,8.0,2.0,4.0,6.0,8.0};
//OpenBLAS
BLASFUNC(zswap)(&N,x1,&incX,y1,&incY);
//reference
BLASFUNC_REF(zswap)(&N,x2,&incX,y2,&incY);
for(i=0; i<2*N; i++){
CU_ASSERT_DOUBLE_EQUAL(x1[i], x2[i], CHECK_EPS);
CU_ASSERT_DOUBLE_EQUAL(y1[i], y2[i], CHECK_EPS);
}
}
void test_sswap_inc_0(void)
{
int i=0;
int N=4,incX=0,incY=0;
float x1[]={1.0,3.0,5.0,7.0};
float y1[]={2.0,4.0,6.0,8.0};
float x2[]={1.0,3.0,5.0,7.0};
float y2[]={2.0,4.0,6.0,8.0};
//OpenBLAS
BLASFUNC(sswap)(&N,x1,&incX,y1,&incY);
//reference
BLASFUNC_REF(sswap)(&N,x2,&incX,y2,&incY);
for(i=0; i<N; i++){
CU_ASSERT_DOUBLE_EQUAL(x1[i], x2[i], CHECK_EPS);
CU_ASSERT_DOUBLE_EQUAL(y1[i], y2[i], CHECK_EPS);
}
}
void test_cswap_inc_0(void)
{
int i=0;
int N=4,incX=0,incY=0;
float x1[]={1.0,3.0,5.0,7.0,1.0,3.0,5.0,7.0};
float y1[]={2.0,4.0,6.0,8.0,2.0,4.0,6.0,8.0};
float x2[]={1.0,3.0,5.0,7.0,1.0,3.0,5.0,7.0};
float y2[]={2.0,4.0,6.0,8.0,2.0,4.0,6.0,8.0};
//OpenBLAS
BLASFUNC(cswap)(&N,x1,&incX,y1,&incY);
//reference
BLASFUNC_REF(cswap)(&N,x2,&incX,y2,&incY);
for(i=0; i<2*N; i++){
CU_ASSERT_DOUBLE_EQUAL(x1[i], x2[i], CHECK_EPS);
CU_ASSERT_DOUBLE_EQUAL(y1[i], y2[i], CHECK_EPS);
}
}