Merge remote branch 'origin/x86' into loongson3a
This commit is contained in:
commit
6eb02bbb9c
|
@ -0,0 +1,27 @@
|
||||||
|
OpenBLAS ChangeLog
|
||||||
|
====================================================================
|
||||||
|
Version 0.1 (in development)
|
||||||
|
26-Feb-2011
|
||||||
|
|
||||||
|
common:
|
||||||
|
* Fixed randomly SEGFAULT when nodemask==NULL with above Linux 2.6.34.
|
||||||
|
Thank Mr.Ei-ji Nakama providing this patch. (Refs issue #12 on github)
|
||||||
|
* Added DEBUG=1 rule in Makefile.rule to build debug version.
|
||||||
|
* Disable compiling quad precision in reference BLAS library(netlib BLAS).
|
||||||
|
* Added unit testcases in utest/ subdir. Used CUnit framework.
|
||||||
|
* Supported OPENBLAS_* & GOTO_* environment variables (Pleas see README)
|
||||||
|
* Imported GotoBLAS2 1.13 BSD version
|
||||||
|
|
||||||
|
x86/x86 64:
|
||||||
|
* Modified ?axpy functions to return same netlib BLAS results
|
||||||
|
when incx==0 or incy==0 (Refs issue #7 on github)
|
||||||
|
* Modified ?swap functions to return same netlib BLAS results
|
||||||
|
when incx==0 or incy==0 (Refs issue #6 on github)
|
||||||
|
* Modified ?rot functions to return same netlib BLAS results
|
||||||
|
when incx==0 or incy==0 (Refs issue #4 on github)
|
||||||
|
* Detect Intel Westmere to use Nehalem codes.
|
||||||
|
* Fixed a typo bug about compiling dynamic ARCH library.
|
||||||
|
MIPS64:
|
||||||
|
* Improve daxpy performance on ICT Loongson 3A.
|
||||||
|
* Supported ICT Loongson 3A CPU (Refs issue #1 on github)
|
||||||
|
====================================================================
|
|
@ -70,7 +70,7 @@ VERSION = 0.1
|
||||||
# time out to improve performance. This number should be from 4 to 30
|
# time out to improve performance. This number should be from 4 to 30
|
||||||
# which corresponds to (1 << n) cycles. For example, if you set to 26,
|
# which corresponds to (1 << n) cycles. For example, if you set to 26,
|
||||||
# thread will be running for (1 << 26) cycles(about 25ms on 3.0GHz
|
# thread will be running for (1 << 26) cycles(about 25ms on 3.0GHz
|
||||||
# system). Also you can control this mumber by GOTO_THREAD_TIMEOUT
|
# system). Also you can control this mumber by THREAD_TIMEOUT
|
||||||
# CCOMMON_OPT += -DTHREAD_TIMEOUT=26
|
# CCOMMON_OPT += -DTHREAD_TIMEOUT=26
|
||||||
|
|
||||||
# Using special device driver for mapping physically contigous memory
|
# Using special device driver for mapping physically contigous memory
|
||||||
|
@ -89,7 +89,13 @@ VERSION = 0.1
|
||||||
# UTEST_CHECK = 1
|
# UTEST_CHECK = 1
|
||||||
|
|
||||||
# Common Optimization Flag; -O2 is enough.
|
# Common Optimization Flag; -O2 is enough.
|
||||||
|
# DEBUG = 1
|
||||||
|
|
||||||
|
ifeq ($(DEBUG), 1)
|
||||||
|
COMMON_OPT += -g -DDEBUG
|
||||||
|
else
|
||||||
COMMON_OPT += -O2
|
COMMON_OPT += -O2
|
||||||
|
endif
|
||||||
|
|
||||||
# Profiling flags
|
# Profiling flags
|
||||||
COMMON_PROF = -pg
|
COMMON_PROF = -pg
|
||||||
|
|
28
README
28
README
|
@ -4,6 +4,8 @@ OpenBLAS Readme
|
||||||
OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version. OpenBLAS is an open source project supported by Lab of Parallel Software and Computational Science, ISCAS.(http://www.rdcps.ac.cn)
|
OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version. OpenBLAS is an open source project supported by Lab of Parallel Software and Computational Science, ISCAS.(http://www.rdcps.ac.cn)
|
||||||
|
|
||||||
2.Intallation
|
2.Intallation
|
||||||
|
Download from project homepage. http://xianyi.github.com/OpenBLAS/
|
||||||
|
Or,
|
||||||
check out codes from git://github.com/xianyi/OpenBLAS.git
|
check out codes from git://github.com/xianyi/OpenBLAS.git
|
||||||
1)Normal compile
|
1)Normal compile
|
||||||
Please read GotoBLAS_02QuickInstall.txt or type "make"
|
Please read GotoBLAS_02QuickInstall.txt or type "make"
|
||||||
|
@ -15,23 +17,43 @@ examples:
|
||||||
On X86 box, compile this library for loongson3a CPU.
|
On X86 box, compile this library for loongson3a CPU.
|
||||||
make BINARY=64 CC=mips64el-unknown-linux-gnu-gcc FC=mips64el-unknown-linux-gnu-gfortran HOSTCC=gcc TARGET=LOONGSON3A
|
make BINARY=64 CC=mips64el-unknown-linux-gnu-gcc FC=mips64el-unknown-linux-gnu-gfortran HOSTCC=gcc TARGET=LOONGSON3A
|
||||||
|
|
||||||
|
3)Debug version
|
||||||
|
make DEBUG=1
|
||||||
|
|
||||||
3.Support CPU & OS
|
3.Support CPU & OS
|
||||||
Please read GotoBLAS_01Readme.txt
|
Please read GotoBLAS_01Readme.txt
|
||||||
|
|
||||||
|
Additional support CPU:
|
||||||
|
x86_64:
|
||||||
|
Intel Xeon 56xx (Westmere) //Used GotoBLAS2 Nehalem codes.
|
||||||
|
MIPS64:
|
||||||
|
ICT Loongson 3A //The initial version used GotoBLAS2 MIPS64 kernels. Thus, the performance is not good.
|
||||||
|
|
||||||
4.Usages
|
4.Usages
|
||||||
Link with libopenblas.a or -lopenblas for shared library.
|
Link with libopenblas.a or -lopenblas for shared library.
|
||||||
|
|
||||||
Set the number of threads. for example,
|
4.1 Set the number of threads with environment variables. for example,
|
||||||
export OPENBLAS_NUM_THREADS=4
|
export OPENBLAS_NUM_THREADS=4
|
||||||
or
|
or
|
||||||
|
export GOTO_NUM_THREADS=4
|
||||||
|
or
|
||||||
export OMP_NUM_THREADS=4
|
export OMP_NUM_THREADS=4
|
||||||
OPENBLAS_NUM_THREAD is prior to OMP_NUM_THREADS.
|
|
||||||
|
The priorities are OPENBLAS_NUM_THREAD > GOTO_NUM_THREADS > OMP_NUM_THREADS.
|
||||||
|
|
||||||
|
4.2 Set the number of threads with calling functions. for example,
|
||||||
|
void goto_set_num_threads(int num_threads);
|
||||||
|
or
|
||||||
|
void openblas_set_num_threads(int num_threads);
|
||||||
|
|
||||||
5.Report Bugs
|
5.Report Bugs
|
||||||
Please add a issue in https://github.com/xianyi/OpenBLAS/issues
|
Please add a issue in https://github.com/xianyi/OpenBLAS/issues
|
||||||
|
|
||||||
6.To-Do List:
|
6.To-Do List:
|
||||||
Support ICT Loongson 3A CPU
|
Optimization on ICT Loongson 3A CPU
|
||||||
|
|
||||||
7.Contact
|
7.Contact
|
||||||
OpenBLAS users mailing list: http://list.rdcps.ac.cn/mailman/listinfo/openblas
|
OpenBLAS users mailing list: http://list.rdcps.ac.cn/mailman/listinfo/openblas
|
||||||
|
|
||||||
|
8.ChangeLog
|
||||||
|
Please see Changelog.txt to obtain the differences between GotoBLAS2 1.13 BSD version.
|
|
@ -68,8 +68,9 @@ extern long int syscall (long int __sysno, ...);
|
||||||
static inline int my_mbind(void *addr, unsigned long len, int mode,
|
static inline int my_mbind(void *addr, unsigned long len, int mode,
|
||||||
unsigned long *nodemask, unsigned long maxnode,
|
unsigned long *nodemask, unsigned long maxnode,
|
||||||
unsigned flags) {
|
unsigned flags) {
|
||||||
|
//Fixed randomly SEGFAULT when nodemask==NULL with above Linux 2.6.34
|
||||||
return syscall(SYS_mbind, addr, len, mode, nodemask, maxnode, flags);
|
unsigned long null_nodemask=0;
|
||||||
|
return syscall(SYS_mbind, addr, len, mode, &null_nodemask, maxnode, flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int my_set_mempolicy(int mode, const unsigned long *addr, unsigned long flag) {
|
static inline int my_set_mempolicy(int mode, const unsigned long *addr, unsigned long flag) {
|
||||||
|
|
|
@ -43,4 +43,21 @@ void BLASFUNC_REF(csrot) (blasint *, float *, blasint *, float *, blasint *,
|
||||||
void BLASFUNC_REF(zdrot) (blasint *, double *, blasint *, double *, blasint *, double *, double *);
|
void BLASFUNC_REF(zdrot) (blasint *, double *, blasint *, double *, blasint *, double *, double *);
|
||||||
void BLASFUNC_REF(xqrot) (blasint *, xdouble *, blasint *, xdouble *, blasint *, xdouble *, xdouble *);
|
void BLASFUNC_REF(xqrot) (blasint *, xdouble *, blasint *, xdouble *, blasint *, xdouble *, xdouble *);
|
||||||
|
|
||||||
|
void BLASFUNC_REF(sswap) (blasint *, float *, blasint *, float *, blasint *);
|
||||||
|
void BLASFUNC_REF(dswap) (blasint *, double *, blasint *, double *, blasint *);
|
||||||
|
void BLASFUNC_REF(qswap) (blasint *, xdouble *, blasint *, xdouble *, blasint *);
|
||||||
|
void BLASFUNC_REF(cswap) (blasint *, float *, blasint *, float *, blasint *);
|
||||||
|
void BLASFUNC_REF(zswap) (blasint *, double *, blasint *, double *, blasint *);
|
||||||
|
void BLASFUNC_REF(xswap) (blasint *, xdouble *, blasint *, xdouble *, blasint *);
|
||||||
|
|
||||||
|
void BLASFUNC_REF(saxpy) (blasint *, float *, float *, blasint *, float *, blasint *);
|
||||||
|
void BLASFUNC_REF(daxpy) (blasint *, double *, double *, blasint *, double *, blasint *);
|
||||||
|
void BLASFUNC_REF(caxpy) (blasint *, float *, float *, blasint *, float *, blasint *);
|
||||||
|
void BLASFUNC_REF(zaxpy) (blasint *, double *, double *, blasint *, double *, blasint *);
|
||||||
|
|
||||||
|
float _Complex BLASFUNC_REF(cdotu) (blasint *, float *, blasint *, float *, blasint *);
|
||||||
|
float _Complex BLASFUNC_REF(cdotc) (blasint *, float *, blasint *, float *, blasint *);
|
||||||
|
double _Complex BLASFUNC_REF(zdotu) (blasint *, double *, blasint *, double *, blasint *);
|
||||||
|
double _Complex BLASFUNC_REF(zdotc) (blasint *, double *, blasint *, double *, blasint *);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
19
cpuid_x86.c
19
cpuid_x86.c
|
@ -972,8 +972,15 @@ int get_cpuname(void){
|
||||||
return CPUTYPE_ATOM;
|
return CPUTYPE_ATOM;
|
||||||
case 13:
|
case 13:
|
||||||
return CPUTYPE_DUNNINGTON;
|
return CPUTYPE_DUNNINGTON;
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
switch (model) {
|
||||||
|
case 12:
|
||||||
|
//Xeon Processor 5600 (Westmere-EP)
|
||||||
|
return CPUTYPE_NEHALEM;
|
||||||
|
}
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case 0x7:
|
case 0x7:
|
||||||
|
@ -1289,8 +1296,16 @@ int get_coretype(void){
|
||||||
return CORE_ATOM;
|
return CORE_ATOM;
|
||||||
case 13:
|
case 13:
|
||||||
return CORE_DUNNINGTON;
|
return CORE_DUNNINGTON;
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
switch (model) {
|
||||||
|
case 12:
|
||||||
|
//Xeon Processor 5600 (Westmere-EP)
|
||||||
|
return CORE_NEHALEM;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
}
|
}
|
||||||
case 15:
|
case 15:
|
||||||
if (model <= 0x2) return CORE_NORTHWOOD;
|
if (model <= 0x2) return CORE_NORTHWOOD;
|
||||||
|
|
|
@ -297,7 +297,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
|
||||||
printf("GEMM: SA .. %p SB .. %p\n", sa, sb);
|
printf("GEMM: SA .. %p SB .. %p\n", sa, sb);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef DEBUG
|
#ifdef TIMING
|
||||||
innercost = 0;
|
innercost = 0;
|
||||||
outercost = 0;
|
outercost = 0;
|
||||||
kernelcost = 0;
|
kernelcost = 0;
|
||||||
|
|
|
@ -278,7 +278,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
|
||||||
// fprintf(stderr, "A = %p B = %p C = %p\n\tlda = %ld ldb = %ld ldc = %ld\n", a, b, c, lda, ldb, ldc);
|
// fprintf(stderr, "A = %p B = %p C = %p\n\tlda = %ld ldb = %ld ldc = %ld\n", a, b, c, lda, ldb, ldc);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef DEBUG
|
#ifdef TIMING
|
||||||
innercost = 0;
|
innercost = 0;
|
||||||
outercost = 0;
|
outercost = 0;
|
||||||
kernelcost = 0;
|
kernelcost = 0;
|
||||||
|
|
|
@ -520,12 +520,21 @@ int blas_thread_init(void){
|
||||||
|
|
||||||
p = getenv("THREAD_TIMEOUT");
|
p = getenv("THREAD_TIMEOUT");
|
||||||
|
|
||||||
|
if (p) {
|
||||||
|
thread_timeout = atoi(p);
|
||||||
|
if (thread_timeout < 4) thread_timeout = 4;
|
||||||
|
if (thread_timeout > 30) thread_timeout = 30;
|
||||||
|
thread_timeout = (1 << thread_timeout);
|
||||||
|
}else{
|
||||||
|
p = getenv("GOTO_THREAD_TIMEOUT");
|
||||||
if (p) {
|
if (p) {
|
||||||
thread_timeout = atoi(p);
|
thread_timeout = atoi(p);
|
||||||
if (thread_timeout < 4) thread_timeout = 4;
|
if (thread_timeout < 4) thread_timeout = 4;
|
||||||
if (thread_timeout > 30) thread_timeout = 30;
|
if (thread_timeout > 30) thread_timeout = 30;
|
||||||
thread_timeout = (1 << thread_timeout);
|
thread_timeout = (1 << thread_timeout);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
for(i = 0; i < blas_num_threads - 1; i++){
|
for(i = 0; i < blas_num_threads - 1; i++){
|
||||||
|
|
||||||
|
@ -790,6 +799,11 @@ void goto_set_num_threads(int num_threads) {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void openblas_set_num_threads(int num_threads) {
|
||||||
|
goto_set_num_threads(num_threads);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
/* Compatible function with pthread_create / join */
|
/* Compatible function with pthread_create / join */
|
||||||
|
|
||||||
int gotoblas_pthread(int numthreads, void *function, void *args, int stride) {
|
int gotoblas_pthread(int numthreads, void *function, void *args, int stride) {
|
||||||
|
|
|
@ -121,6 +121,11 @@ static gotoblas_t *get_coretype(void){
|
||||||
if ((model == 10) || (model == 11) || (model == 14) || (model == 15)) return &gotoblas_NEHALEM;
|
if ((model == 10) || (model == 11) || (model == 14) || (model == 15)) return &gotoblas_NEHALEM;
|
||||||
if (model == 12) return &gotoblas_ATOM;
|
if (model == 12) return &gotoblas_ATOM;
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
|
case 2:
|
||||||
|
//Intel Xeon Processor 5600 (Westmere-EP)
|
||||||
|
if (model == 12) return &gotoblas_NEHALEM;
|
||||||
|
return NULL;
|
||||||
}
|
}
|
||||||
case 0xf:
|
case 0xf:
|
||||||
if (model <= 0x2) return &gotoblas_NORTHWOOD;
|
if (model <= 0x2) return &gotoblas_NORTHWOOD;
|
||||||
|
|
|
@ -92,7 +92,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define SHARE_NAME "/sys/devices/system/cpu/cpu%d/cache/index%d/shared_cpu_map"
|
#define SHARE_NAME "/sys/devices/system/cpu/cpu%d/cache/index%d/shared_cpu_map"
|
||||||
#define NODE_DIR "/sys/devices/system/node"
|
#define NODE_DIR "/sys/devices/system/node"
|
||||||
|
|
||||||
#undef DEBUG
|
//#undef DEBUG
|
||||||
|
|
||||||
/* Private variables */
|
/* Private variables */
|
||||||
typedef struct {
|
typedef struct {
|
||||||
|
@ -581,6 +581,7 @@ void gotoblas_affinity_init(void) {
|
||||||
numprocs = 0;
|
numprocs = 0;
|
||||||
#else
|
#else
|
||||||
numprocs = readenv("OPENBLAS_NUM_THREADS");
|
numprocs = readenv("OPENBLAS_NUM_THREADS");
|
||||||
|
if (numprocs == 0) numprocs = readenv("GOTO_NUM_THREADS");
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (numprocs == 0) numprocs = readenv("OMP_NUM_THREADS");
|
if (numprocs == 0) numprocs = readenv("OMP_NUM_THREADS");
|
||||||
|
@ -666,7 +667,7 @@ void gotoblas_affinity_init(void) {
|
||||||
|
|
||||||
setup_mempolicy();
|
setup_mempolicy();
|
||||||
|
|
||||||
if (readenv("OPENBLAS_MAIN_FREE")) {
|
if (readenv("OPENBLAS_MAIN_FREE") || readenv("GOTOBLAS_MAIN_FREE")) {
|
||||||
sched_setaffinity(0, sizeof(cpu_orig_mask), &cpu_orig_mask[0]);
|
sched_setaffinity(0, sizeof(cpu_orig_mask), &cpu_orig_mask[0]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -68,9 +68,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
/* or implied, of The University of Texas at Austin. */
|
/* or implied, of The University of Texas at Austin. */
|
||||||
/*********************************************************************/
|
/*********************************************************************/
|
||||||
|
|
||||||
#undef DEBUG
|
//#undef DEBUG
|
||||||
|
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
|
#include <errno.h>
|
||||||
|
|
||||||
#ifdef OS_WINDOWS
|
#ifdef OS_WINDOWS
|
||||||
#define ALLOC_WINDOWS
|
#define ALLOC_WINDOWS
|
||||||
|
@ -231,6 +232,13 @@ int blas_get_cpu_number(void){
|
||||||
p = getenv("OPENBLAS_NUM_THREADS");
|
p = getenv("OPENBLAS_NUM_THREADS");
|
||||||
if (p) blas_goto_num = atoi(p);
|
if (p) blas_goto_num = atoi(p);
|
||||||
if (blas_goto_num < 0) blas_goto_num = 0;
|
if (blas_goto_num < 0) blas_goto_num = 0;
|
||||||
|
|
||||||
|
if (blas_goto_num == 0) {
|
||||||
|
p = getenv("GOTO_NUM_THREADS");
|
||||||
|
if (p) blas_goto_num = atoi(p);
|
||||||
|
if (blas_goto_num < 0) blas_goto_num = 0;
|
||||||
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
blas_omp_num = 0;
|
blas_omp_num = 0;
|
||||||
|
@ -381,8 +389,20 @@ static void *alloc_mmap(void *address){
|
||||||
if (map_address != (void *)-1) {
|
if (map_address != (void *)-1) {
|
||||||
|
|
||||||
#ifdef OS_LINUX
|
#ifdef OS_LINUX
|
||||||
|
#ifdef DEBUG
|
||||||
|
int ret;
|
||||||
|
ret=my_mbind(map_address, BUFFER_SIZE * SCALING, MPOL_PREFERRED, NULL, 0, 0);
|
||||||
|
if(ret==-1){
|
||||||
|
int errsv=errno;
|
||||||
|
perror("alloc_mmap:");
|
||||||
|
printf("error code=%d,\tmap_address=%lx\n",errsv,map_address);
|
||||||
|
}
|
||||||
|
|
||||||
|
#else
|
||||||
my_mbind(map_address, BUFFER_SIZE * SCALING, MPOL_PREFERRED, NULL, 0, 0);
|
my_mbind(map_address, BUFFER_SIZE * SCALING, MPOL_PREFERRED, NULL, 0, 0);
|
||||||
#endif
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
allocsize = DGEMM_P * DGEMM_Q * sizeof(double);
|
allocsize = DGEMM_P * DGEMM_Q * sizeof(double);
|
||||||
|
|
||||||
|
@ -979,7 +999,7 @@ void *blas_memory_alloc(int procpos){
|
||||||
memory[position].addr = map_address;
|
memory[position].addr = map_address;
|
||||||
|
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
printf(" Mapping Succeeded. %p(%d)\n", (void *)alloc_area[position], position);
|
printf(" Mapping Succeeded. %p(%d)\n", (void *)memory[position].addr, position);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1010,7 +1030,7 @@ void *blas_memory_alloc(int procpos){
|
||||||
|
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
printf("Mapped : %p %3d\n\n",
|
printf("Mapped : %p %3d\n\n",
|
||||||
(void *)alloc_area[position], position);
|
(void *)memory[position].addr, position);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
return (void *)memory[position].addr;
|
return (void *)memory[position].addr;
|
||||||
|
@ -1053,7 +1073,7 @@ void blas_memory_free(void *free_area){
|
||||||
|
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
for (position = 0; position < NUM_BUFFERS; position++)
|
for (position = 0; position < NUM_BUFFERS; position++)
|
||||||
printf("%4ld %p : %d\n", position, alloc_area[position], alloc_used[position]);
|
printf("%4ld %p : %d\n", position, memory[position].addr, memory[position].used);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
return;
|
return;
|
||||||
|
|
|
@ -81,6 +81,11 @@ void CNAME(blasint n, FLOAT alpha, FLOAT *x, blasint incx, FLOAT *y, blasint inc
|
||||||
#ifdef SMP
|
#ifdef SMP
|
||||||
nthreads = num_cpu_avail(1);
|
nthreads = num_cpu_avail(1);
|
||||||
|
|
||||||
|
//disable multi-thread when incx==0 or incy==0
|
||||||
|
//In that case, the threads would be dependent.
|
||||||
|
if (incx == 0 || incy == 0)
|
||||||
|
nthreads = 1;
|
||||||
|
|
||||||
if (nthreads == 1) {
|
if (nthreads == 1) {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -79,6 +79,11 @@ void CNAME(blasint n, FLOAT *x, blasint incx, FLOAT *y, blasint incy){
|
||||||
#ifdef SMP
|
#ifdef SMP
|
||||||
nthreads = num_cpu_avail(1);
|
nthreads = num_cpu_avail(1);
|
||||||
|
|
||||||
|
//disable multi-thread when incx==0 or incy==0
|
||||||
|
//In that case, the threads would be dependent.
|
||||||
|
if (incx == 0 || incy == 0)
|
||||||
|
nthreads = 1;
|
||||||
|
|
||||||
if (nthreads == 1) {
|
if (nthreads == 1) {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -83,6 +83,11 @@ void CNAME(blasint n, FLOAT *ALPHA, FLOAT *x, blasint incx, FLOAT *y, blasint in
|
||||||
#ifdef SMP
|
#ifdef SMP
|
||||||
nthreads = num_cpu_avail(1);
|
nthreads = num_cpu_avail(1);
|
||||||
|
|
||||||
|
//disable multi-thread when incx==0 or incy==0
|
||||||
|
//In that case, the threads would be dependent.
|
||||||
|
if (incx == 0 || incy == 0)
|
||||||
|
nthreads = 1;
|
||||||
|
|
||||||
if (nthreads == 1) {
|
if (nthreads == 1) {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -80,6 +80,11 @@ void CNAME(blasint n, FLOAT *x, blasint incx, FLOAT *y, blasint incy){
|
||||||
#ifdef SMP
|
#ifdef SMP
|
||||||
nthreads = num_cpu_avail(1);
|
nthreads = num_cpu_avail(1);
|
||||||
|
|
||||||
|
//disable multi-thread when incx==0 or incy==0
|
||||||
|
//In that case, the threads would be dependent.
|
||||||
|
if (incx == 0 || incy == 0)
|
||||||
|
nthreads = 1;
|
||||||
|
|
||||||
if (nthreads == 1) {
|
if (nthreads == 1) {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -1440,6 +1440,12 @@
|
||||||
.L50:
|
.L50:
|
||||||
movl M, %eax
|
movl M, %eax
|
||||||
movl Y, YY
|
movl Y, YY
|
||||||
|
//If incx==0 || incy==0, avoid unloop.
|
||||||
|
cmpl $0, INCX
|
||||||
|
je .L56
|
||||||
|
cmpl $0, INCY
|
||||||
|
je .L56
|
||||||
|
|
||||||
sarl $3, %eax
|
sarl $3, %eax
|
||||||
jle .L55
|
jle .L55
|
||||||
ALIGN_3
|
ALIGN_3
|
||||||
|
|
|
@ -698,6 +698,12 @@
|
||||||
.L40:
|
.L40:
|
||||||
movl Y, YY
|
movl Y, YY
|
||||||
movl M, %eax
|
movl M, %eax
|
||||||
|
//If incx==0 || incy==0, avoid unloop.
|
||||||
|
cmpl $0, INCX
|
||||||
|
je .L46
|
||||||
|
cmpl $0, INCY
|
||||||
|
je .L46
|
||||||
|
|
||||||
sarl $3, %eax
|
sarl $3, %eax
|
||||||
jle .L45
|
jle .L45
|
||||||
ALIGN_3
|
ALIGN_3
|
||||||
|
|
|
@ -859,6 +859,10 @@
|
||||||
|
|
||||||
.L50:
|
.L50:
|
||||||
movl N, I
|
movl N, I
|
||||||
|
cmpl $0, INCX
|
||||||
|
je .L56
|
||||||
|
cmpl $0, INCY
|
||||||
|
je .L56
|
||||||
sarl $2, I
|
sarl $2, I
|
||||||
jle .L55
|
jle .L55
|
||||||
ALIGN_3
|
ALIGN_3
|
||||||
|
|
|
@ -2857,6 +2857,11 @@
|
||||||
unpcklps ALPHA_I, ALPHA_R
|
unpcklps ALPHA_I, ALPHA_R
|
||||||
unpcklps %xmm5, ALPHA_I
|
unpcklps %xmm5, ALPHA_I
|
||||||
#endif
|
#endif
|
||||||
|
//If incx==0 || incy==0, avoid unloop and jump to end.
|
||||||
|
cmpl $0, INCX
|
||||||
|
je .L200
|
||||||
|
cmpl $0, INCY
|
||||||
|
je .L200
|
||||||
|
|
||||||
movl Y, YY
|
movl Y, YY
|
||||||
|
|
||||||
|
@ -3090,8 +3095,41 @@
|
||||||
addps %xmm1, %xmm4
|
addps %xmm1, %xmm4
|
||||||
|
|
||||||
movsd %xmm4, (Y)
|
movsd %xmm4, (Y)
|
||||||
|
jmp .L999
|
||||||
ALIGN_3
|
ALIGN_3
|
||||||
|
|
||||||
|
.L200:
|
||||||
|
movl M, %eax
|
||||||
|
cmpl $0, %eax
|
||||||
|
jle .L999
|
||||||
|
ALIGN_3
|
||||||
|
|
||||||
|
.L201:
|
||||||
|
movsd (X), %xmm0
|
||||||
|
|
||||||
|
#ifdef HAVE_SSE3
|
||||||
|
movshdup %xmm0, %xmm1
|
||||||
|
movsldup %xmm0, %xmm0
|
||||||
|
#else
|
||||||
|
movaps %xmm0, %xmm1
|
||||||
|
shufps $0xa0, %xmm0, %xmm0
|
||||||
|
shufps $0xf5, %xmm1, %xmm1
|
||||||
|
#endif
|
||||||
|
|
||||||
|
mulps ALPHA_R, %xmm0
|
||||||
|
mulps ALPHA_I, %xmm1
|
||||||
|
|
||||||
|
movsd (Y), %xmm4
|
||||||
|
|
||||||
|
addps %xmm0, %xmm4
|
||||||
|
addps %xmm1, %xmm4
|
||||||
|
|
||||||
|
movsd %xmm4, (Y)
|
||||||
|
|
||||||
|
decl %eax
|
||||||
|
jg .L201
|
||||||
|
|
||||||
|
ALIGN_3
|
||||||
.L999:
|
.L999:
|
||||||
popl %ebp
|
popl %ebp
|
||||||
popl %ebx
|
popl %ebx
|
||||||
|
|
|
@ -1318,6 +1318,12 @@
|
||||||
|
|
||||||
movl Y, YY
|
movl Y, YY
|
||||||
movl M, %eax
|
movl M, %eax
|
||||||
|
//If incx==0 || incy==0, avoid unloop and jump to end.
|
||||||
|
cmpl $0, INCX
|
||||||
|
je .L58
|
||||||
|
cmpl $0, INCY
|
||||||
|
je .L58
|
||||||
|
|
||||||
sarl $2, %eax
|
sarl $2, %eax
|
||||||
jle .L55
|
jle .L55
|
||||||
|
|
||||||
|
@ -1498,6 +1504,7 @@
|
||||||
andl $1, %eax
|
andl $1, %eax
|
||||||
jle .L999
|
jle .L999
|
||||||
|
|
||||||
|
.L58:
|
||||||
MOVDDUP( 0 * SIZE, X, %xmm0)
|
MOVDDUP( 0 * SIZE, X, %xmm0)
|
||||||
MOVDDUP( 1 * SIZE, X, %xmm1)
|
MOVDDUP( 1 * SIZE, X, %xmm1)
|
||||||
|
|
||||||
|
@ -1510,6 +1517,10 @@
|
||||||
|
|
||||||
movlpd %xmm4, 0 * SIZE(YY)
|
movlpd %xmm4, 0 * SIZE(YY)
|
||||||
movhpd %xmm4, 1 * SIZE(YY)
|
movhpd %xmm4, 1 * SIZE(YY)
|
||||||
|
|
||||||
|
|
||||||
|
decl %eax
|
||||||
|
jg .L58
|
||||||
ALIGN_3
|
ALIGN_3
|
||||||
|
|
||||||
.L999:
|
.L999:
|
||||||
|
|
|
@ -1285,6 +1285,12 @@
|
||||||
|
|
||||||
.L50:
|
.L50:
|
||||||
movl N, I
|
movl N, I
|
||||||
|
//if incx ==0 || incy==0 jump to the tail
|
||||||
|
cmpl $0, INCX
|
||||||
|
je .L56
|
||||||
|
cmpl $0, INCY
|
||||||
|
je .L56
|
||||||
|
|
||||||
sarl $2, I
|
sarl $2, I
|
||||||
jle .L55
|
jle .L55
|
||||||
ALIGN_3
|
ALIGN_3
|
||||||
|
|
|
@ -1463,6 +1463,12 @@
|
||||||
.L50:
|
.L50:
|
||||||
movq M, %rax
|
movq M, %rax
|
||||||
movq Y, YY
|
movq Y, YY
|
||||||
|
//If incx==0 || incy==0, avoid unloop.
|
||||||
|
cmpq $0, INCX
|
||||||
|
je .L56
|
||||||
|
cmpq $0, INCY
|
||||||
|
je .L56
|
||||||
|
|
||||||
sarq $3, %rax
|
sarq $3, %rax
|
||||||
jle .L55
|
jle .L55
|
||||||
ALIGN_3
|
ALIGN_3
|
||||||
|
|
|
@ -805,6 +805,12 @@
|
||||||
.L40:
|
.L40:
|
||||||
movq Y, YY
|
movq Y, YY
|
||||||
movq M, %rax
|
movq M, %rax
|
||||||
|
//If incx==0 || incy==0, avoid unloop.
|
||||||
|
cmpq $0, INCX
|
||||||
|
je .L46
|
||||||
|
cmpq $0, INCY
|
||||||
|
je .L46
|
||||||
|
|
||||||
sarq $3, %rax
|
sarq $3, %rax
|
||||||
jle .L45
|
jle .L45
|
||||||
ALIGN_3
|
ALIGN_3
|
||||||
|
|
|
@ -887,6 +887,10 @@
|
||||||
|
|
||||||
.L50:
|
.L50:
|
||||||
movq N, %rax
|
movq N, %rax
|
||||||
|
cmpq $0, INCX
|
||||||
|
je .L56
|
||||||
|
cmpq $0, INCY
|
||||||
|
je .L56
|
||||||
sarq $2, %rax
|
sarq $2, %rax
|
||||||
jle .L55
|
jle .L55
|
||||||
ALIGN_3
|
ALIGN_3
|
||||||
|
|
|
@ -2893,6 +2893,12 @@
|
||||||
unpcklps %xmm13, %xmm15
|
unpcklps %xmm13, %xmm15
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
//If incx==0 || incy==0, avoid unloop and jump to end.
|
||||||
|
cmpq $0, INCX
|
||||||
|
je .L200
|
||||||
|
cmpq $0, INCY
|
||||||
|
je .L200
|
||||||
|
|
||||||
movq Y, YY
|
movq Y, YY
|
||||||
|
|
||||||
movq M, %rax
|
movq M, %rax
|
||||||
|
@ -3105,6 +3111,40 @@
|
||||||
addps %xmm1, %xmm8
|
addps %xmm1, %xmm8
|
||||||
|
|
||||||
movsd %xmm8, (Y)
|
movsd %xmm8, (Y)
|
||||||
|
jmp .L999
|
||||||
|
ALIGN_3
|
||||||
|
|
||||||
|
.L200:
|
||||||
|
movq M, %rax
|
||||||
|
cmpq $0, %rax
|
||||||
|
jle .L999
|
||||||
|
ALIGN_3
|
||||||
|
|
||||||
|
.L201:
|
||||||
|
movsd (X), %xmm0
|
||||||
|
addq INCX, X
|
||||||
|
|
||||||
|
#ifdef HAVE_SSE3
|
||||||
|
movshdup %xmm0, %xmm1
|
||||||
|
movsldup %xmm0, %xmm0
|
||||||
|
#else
|
||||||
|
pshufd $0xf5, %xmm0, %xmm1
|
||||||
|
shufps $0xa0, %xmm0, %xmm0
|
||||||
|
#endif
|
||||||
|
|
||||||
|
mulps %xmm14, %xmm0
|
||||||
|
mulps %xmm15, %xmm1
|
||||||
|
|
||||||
|
movsd (Y), %xmm8
|
||||||
|
|
||||||
|
addps %xmm0, %xmm8
|
||||||
|
addps %xmm1, %xmm8
|
||||||
|
|
||||||
|
movsd %xmm8, (Y)
|
||||||
|
addq INCY, Y
|
||||||
|
|
||||||
|
decq %rax
|
||||||
|
jg .L201
|
||||||
ALIGN_3
|
ALIGN_3
|
||||||
|
|
||||||
.L999:
|
.L999:
|
||||||
|
|
|
@ -1416,6 +1416,12 @@
|
||||||
|
|
||||||
movq Y, YY
|
movq Y, YY
|
||||||
movq M, %rax
|
movq M, %rax
|
||||||
|
//If incx==0 || incy==0, avoid unloop and jump to end.
|
||||||
|
cmpq $0, INCX
|
||||||
|
je .L58
|
||||||
|
cmpq $0, INCY
|
||||||
|
je .L58
|
||||||
|
|
||||||
sarq $3, %rax
|
sarq $3, %rax
|
||||||
jle .L55
|
jle .L55
|
||||||
|
|
||||||
|
@ -1769,6 +1775,7 @@
|
||||||
andq $1, %rax
|
andq $1, %rax
|
||||||
jle .L999
|
jle .L999
|
||||||
|
|
||||||
|
.L58:
|
||||||
MOVDDUP( 0 * SIZE, X, %xmm0)
|
MOVDDUP( 0 * SIZE, X, %xmm0)
|
||||||
MOVDDUP( 1 * SIZE, X, %xmm1)
|
MOVDDUP( 1 * SIZE, X, %xmm1)
|
||||||
|
|
||||||
|
@ -1781,6 +1788,9 @@
|
||||||
|
|
||||||
movlpd %xmm8, 0 * SIZE(YY)
|
movlpd %xmm8, 0 * SIZE(YY)
|
||||||
movhpd %xmm8, 1 * SIZE(YY)
|
movhpd %xmm8, 1 * SIZE(YY)
|
||||||
|
|
||||||
|
decq %rax
|
||||||
|
jg .L58
|
||||||
ALIGN_3
|
ALIGN_3
|
||||||
|
|
||||||
.L999:
|
.L999:
|
||||||
|
|
|
@ -1523,6 +1523,10 @@
|
||||||
|
|
||||||
.L50:
|
.L50:
|
||||||
movq N, %rax
|
movq N, %rax
|
||||||
|
cmpq $0, INCX
|
||||||
|
je .L56
|
||||||
|
cmpq $0, INCY
|
||||||
|
je .L56
|
||||||
sarq $2, %rax
|
sarq $2, %rax
|
||||||
jle .L55
|
jle .L55
|
||||||
ALIGN_3
|
ALIGN_3
|
||||||
|
|
|
@ -138,7 +138,8 @@ DBLASOBJS += \
|
||||||
dpotf2f.$(SUFFIX) dpotrff.$(SUFFIX) dtrti2f.$(SUFFIX) dtrtrif.$(SUFFIX) \
|
dpotf2f.$(SUFFIX) dpotrff.$(SUFFIX) dtrti2f.$(SUFFIX) dtrtrif.$(SUFFIX) \
|
||||||
dlaswpf.$(SUFFIX) dgetrsf.$(SUFFIX) dgesvf.$(SUFFIX) dpotrif.$(SUFFIX) \
|
dlaswpf.$(SUFFIX) dgetrsf.$(SUFFIX) dgesvf.$(SUFFIX) dpotrif.$(SUFFIX) \
|
||||||
|
|
||||||
QBLASOBJS += \
|
QBLASOBJS +=
|
||||||
|
# \
|
||||||
qgetf2f.$(SUFFIX) qgetrff.$(SUFFIX) qlauu2f.$(SUFFIX) qlauumf.$(SUFFIX) \
|
qgetf2f.$(SUFFIX) qgetrff.$(SUFFIX) qlauu2f.$(SUFFIX) qlauumf.$(SUFFIX) \
|
||||||
qpotf2f.$(SUFFIX) qpotrff.$(SUFFIX) qtrti2f.$(SUFFIX) qtrtrif.$(SUFFIX) \
|
qpotf2f.$(SUFFIX) qpotrff.$(SUFFIX) qtrti2f.$(SUFFIX) qtrtrif.$(SUFFIX) \
|
||||||
qlaswpf.$(SUFFIX) qgetrsf.$(SUFFIX) qgesvf.$(SUFFIX) qpotrif.$(SUFFIX) \
|
qlaswpf.$(SUFFIX) qgetrsf.$(SUFFIX) qgesvf.$(SUFFIX) qpotrif.$(SUFFIX) \
|
||||||
|
@ -153,7 +154,8 @@ ZBLASOBJS += \
|
||||||
zpotf2f.$(SUFFIX) zpotrff.$(SUFFIX) ztrti2f.$(SUFFIX) ztrtrif.$(SUFFIX) \
|
zpotf2f.$(SUFFIX) zpotrff.$(SUFFIX) ztrti2f.$(SUFFIX) ztrtrif.$(SUFFIX) \
|
||||||
zlaswpf.$(SUFFIX) zgetrsf.$(SUFFIX) zgesvf.$(SUFFIX) zpotrif.$(SUFFIX) \
|
zlaswpf.$(SUFFIX) zgetrsf.$(SUFFIX) zgesvf.$(SUFFIX) zpotrif.$(SUFFIX) \
|
||||||
|
|
||||||
XBLASOBJS += \
|
XBLASOBJS +=
|
||||||
|
# \
|
||||||
xgetf2f.$(SUFFIX) xgetrff.$(SUFFIX) xlauu2f.$(SUFFIX) xlauumf.$(SUFFIX) \
|
xgetf2f.$(SUFFIX) xgetrff.$(SUFFIX) xlauu2f.$(SUFFIX) xlauumf.$(SUFFIX) \
|
||||||
xpotf2f.$(SUFFIX) xpotrff.$(SUFFIX) xtrti2f.$(SUFFIX) xtrtrif.$(SUFFIX) \
|
xpotf2f.$(SUFFIX) xpotrff.$(SUFFIX) xtrti2f.$(SUFFIX) xtrtrif.$(SUFFIX) \
|
||||||
xlaswpf.$(SUFFIX) xgetrsf.$(SUFFIX) xgesvf.$(SUFFIX) xpotrif.$(SUFFIX) \
|
xlaswpf.$(SUFFIX) xgetrsf.$(SUFFIX) xgesvf.$(SUFFIX) xpotrif.$(SUFFIX) \
|
||||||
|
|
|
@ -5,12 +5,12 @@ include $(TOPDIR)/Makefile.system
|
||||||
TARGET=openblas_utest
|
TARGET=openblas_utest
|
||||||
CUNIT_LIB=/usr/local/lib/libcunit.a
|
CUNIT_LIB=/usr/local/lib/libcunit.a
|
||||||
|
|
||||||
OBJS=main.o test_rot.o
|
OBJS=main.o test_rot.o test_swap.o test_axpy.o test_dotu.o
|
||||||
|
|
||||||
all : run_test
|
all : run_test
|
||||||
|
|
||||||
$(TARGET): $(OBJS)
|
$(TARGET): $(OBJS)
|
||||||
$(CC) -o $@ $^ ../$(LIBNAME) $(CUNIT_LIB)
|
$(CC) -o $@ $^ ../$(LIBNAME) $(CUNIT_LIB) $(EXTRALIB)
|
||||||
|
|
||||||
run_test: $(TARGET)
|
run_test: $(TARGET)
|
||||||
./$(TARGET)
|
./$(TARGET)
|
||||||
|
|
|
@ -36,9 +36,24 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#include <common.h>
|
#include <common.h>
|
||||||
|
|
||||||
#define CHECK_EPS 0.0002
|
#define CHECK_EPS 0.00002
|
||||||
|
|
||||||
//Testcase list
|
//Testcase list
|
||||||
void test_drot_incx_0(void);
|
void test_drot_inc_0(void);
|
||||||
|
void test_srot_inc_0(void);
|
||||||
|
void test_zdrot_inc_0(void);
|
||||||
|
void test_csrot_inc_0(void);
|
||||||
|
|
||||||
|
void test_dswap_inc_0(void);
|
||||||
|
void test_zswap_inc_0(void);
|
||||||
|
void test_sswap_inc_0(void);
|
||||||
|
void test_cswap_inc_0(void);
|
||||||
|
|
||||||
|
void test_daxpy_inc_0(void);
|
||||||
|
void test_zaxpy_inc_0(void);
|
||||||
|
void test_saxpy_inc_0(void);
|
||||||
|
void test_caxpy_inc_0(void);
|
||||||
|
|
||||||
|
void test_zdotu_n_1(void);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
20
utest/main.c
20
utest/main.c
|
@ -33,12 +33,26 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
|
||||||
|
|
||||||
#include "common_utest.h"
|
#include "common_utest.h"
|
||||||
#include <CUnit/Basic.h>
|
#include <CUnit/Basic.h>
|
||||||
|
|
||||||
CU_TestInfo test_level1[]={
|
CU_TestInfo test_level1[]={
|
||||||
{"Testing drot when incx & incy == 0",test_drot_incx_0},
|
{"Testing srot when incx || incy == 0",test_srot_inc_0},
|
||||||
|
{"Testing drot when incx || incy == 0",test_drot_inc_0},
|
||||||
|
{"Testing csrot when incx || incy == 0",test_csrot_inc_0},
|
||||||
|
{"Testing zdrot when incx || incy == 0",test_zdrot_inc_0},
|
||||||
|
|
||||||
|
{"Testing sswap with incx || incy == 0",test_sswap_inc_0},
|
||||||
|
{"Testing dswap with incx || incy == 0",test_dswap_inc_0},
|
||||||
|
{"Testing cswap with incx || incy == 0",test_cswap_inc_0},
|
||||||
|
{"Testing zswap with incx || incy == 0",test_zswap_inc_0},
|
||||||
|
|
||||||
|
{"Testing saxpy with incx || incy == 0",test_saxpy_inc_0},
|
||||||
|
{"Testing daxpy with incx || incy == 0",test_daxpy_inc_0},
|
||||||
|
{"Testing caxpy with incx || incy == 0",test_caxpy_inc_0},
|
||||||
|
{"Testing zaxpy with incx || incy == 0",test_zaxpy_inc_0},
|
||||||
|
|
||||||
|
{"Testing zdotu with n == 1",test_zdotu_n_1},
|
||||||
CU_TEST_INFO_NULL,
|
CU_TEST_INFO_NULL,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -64,6 +78,8 @@ int main()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
printf("Seting OK\n");
|
||||||
|
fflush(stdout);
|
||||||
|
|
||||||
/* Run all tests using the CUnit Basic interface */
|
/* Run all tests using the CUnit Basic interface */
|
||||||
CU_basic_set_mode(CU_BRM_VERBOSE);
|
CU_basic_set_mode(CU_BRM_VERBOSE);
|
||||||
|
|
|
@ -0,0 +1,117 @@
|
||||||
|
/*****************************************************************************
|
||||||
|
Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
|
||||||
|
1. Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in
|
||||||
|
the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
3. Neither the name of the ISCAS nor the names of its contributors may
|
||||||
|
be used to endorse or promote products derived from this software
|
||||||
|
without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||||
|
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||||
|
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
**********************************************************************************/
|
||||||
|
|
||||||
|
#include "common_utest.h"
|
||||||
|
|
||||||
|
void test_daxpy_inc_0(void)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
int N=8,incX=0,incY=0;
|
||||||
|
double a=0.25;
|
||||||
|
double x1[]={1.0,3.0,5.0,7.0,1.0,3.0,5.0,7.0};
|
||||||
|
double y1[]={2.0,4.0,6.0,8.0,2.0,4.0,6.0,8.0};
|
||||||
|
double x2[]={1.0,3.0,5.0,7.0,1.0,3.0,5.0,7.0};
|
||||||
|
double y2[]={2.0,4.0,6.0,8.0,2.0,4.0,6.0,8.0};
|
||||||
|
|
||||||
|
//OpenBLAS
|
||||||
|
BLASFUNC(daxpy)(&N,&a,x1,&incX,y1,&incY);
|
||||||
|
//reference
|
||||||
|
BLASFUNC_REF(daxpy)(&N,&a,x2,&incX,y2,&incY);
|
||||||
|
|
||||||
|
for(i=0; i<N; i++){
|
||||||
|
CU_ASSERT_DOUBLE_EQUAL(x1[i], x2[i], CHECK_EPS);
|
||||||
|
CU_ASSERT_DOUBLE_EQUAL(y1[i], y2[i], CHECK_EPS);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void test_zaxpy_inc_0(void)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
int N=4,incX=0,incY=0;
|
||||||
|
double a[2]={0.25,0.5};
|
||||||
|
double x1[]={1.0,3.0,5.0,7.0,1.0,3.0,5.0,7.0};
|
||||||
|
double y1[]={2.0,4.0,6.0,8.0,2.0,4.0,6.0,8.0};
|
||||||
|
double x2[]={1.0,3.0,5.0,7.0,1.0,3.0,5.0,7.0};
|
||||||
|
double y2[]={2.0,4.0,6.0,8.0,2.0,4.0,6.0,8.0};
|
||||||
|
|
||||||
|
//OpenBLAS
|
||||||
|
BLASFUNC(zaxpy)(&N,a,x1,&incX,y1,&incY);
|
||||||
|
//reference
|
||||||
|
BLASFUNC_REF(zaxpy)(&N,a,x2,&incX,y2,&incY);
|
||||||
|
|
||||||
|
for(i=0; i<2*N; i++){
|
||||||
|
CU_ASSERT_DOUBLE_EQUAL(x1[i], x2[i], CHECK_EPS);
|
||||||
|
CU_ASSERT_DOUBLE_EQUAL(y1[i], y2[i], CHECK_EPS);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void test_saxpy_inc_0(void)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
int N=8,incX=0,incY=0;
|
||||||
|
float a=0.25;
|
||||||
|
float x1[]={1.0,3.0,5.0,7.0,1.0,3.0,5.0,7.0};
|
||||||
|
float y1[]={2.0,4.0,6.0,8.0,2.0,4.0,6.0,8.0};
|
||||||
|
float x2[]={1.0,3.0,5.0,7.0,1.0,3.0,5.0,7.0};
|
||||||
|
float y2[]={2.0,4.0,6.0,8.0,2.0,4.0,6.0,8.0};
|
||||||
|
|
||||||
|
//OpenBLAS
|
||||||
|
BLASFUNC(saxpy)(&N,&a,x1,&incX,y1,&incY);
|
||||||
|
//reference
|
||||||
|
BLASFUNC_REF(saxpy)(&N,&a,x2,&incX,y2,&incY);
|
||||||
|
|
||||||
|
for(i=0; i<N; i++){
|
||||||
|
CU_ASSERT_DOUBLE_EQUAL(x1[i], x2[i], CHECK_EPS);
|
||||||
|
CU_ASSERT_DOUBLE_EQUAL(y1[i], y2[i], CHECK_EPS);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void test_caxpy_inc_0(void)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
int N=4,incX=0,incY=0;
|
||||||
|
float a[2]={0.25,0.5};
|
||||||
|
float x1[]={1.0,3.0,5.0,7.0,1.0,3.0,5.0,7.0};
|
||||||
|
float y1[]={2.0,4.0,6.0,8.0,2.0,4.0,6.0,8.0};
|
||||||
|
float x2[]={1.0,3.0,5.0,7.0,1.0,3.0,5.0,7.0};
|
||||||
|
float y2[]={2.0,4.0,6.0,8.0,2.0,4.0,6.0,8.0};
|
||||||
|
|
||||||
|
//OpenBLAS
|
||||||
|
BLASFUNC(caxpy)(&N,a,x1,&incX,y1,&incY);
|
||||||
|
//reference
|
||||||
|
BLASFUNC_REF(caxpy)(&N,a,x2,&incX,y2,&incY);
|
||||||
|
|
||||||
|
for(i=0; i<2*N; i++){
|
||||||
|
CU_ASSERT_DOUBLE_EQUAL(x1[i], x2[i], CHECK_EPS);
|
||||||
|
CU_ASSERT_DOUBLE_EQUAL(y1[i], y2[i], CHECK_EPS);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,56 @@
|
||||||
|
/*****************************************************************************
|
||||||
|
Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
|
||||||
|
1. Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in
|
||||||
|
the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
3. Neither the name of the ISCAS nor the names of its contributors may
|
||||||
|
be used to endorse or promote products derived from this software
|
||||||
|
without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||||
|
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||||
|
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
**********************************************************************************/
|
||||||
|
|
||||||
|
#include "common_utest.h"
|
||||||
|
#include <complex.h>
|
||||||
|
|
||||||
|
void test_zdotu_n_1(void)
|
||||||
|
{
|
||||||
|
int N=1,incX=1,incY=1;
|
||||||
|
double x1[]={1.0,1.0};
|
||||||
|
double y1[]={1.0,2.0};
|
||||||
|
double x2[]={1.0,1.0};
|
||||||
|
double y2[]={1.0,2.0};
|
||||||
|
double _Complex result1=0.0;
|
||||||
|
double _Complex result2=0.0;
|
||||||
|
//OpenBLAS
|
||||||
|
result1=BLASFUNC(zdotu)(&N,x1,&incX,y1,&incY);
|
||||||
|
//reference
|
||||||
|
result2=BLASFUNC_REF(zdotu)(&N,x2,&incX,y2,&incY);
|
||||||
|
|
||||||
|
CU_ASSERT_DOUBLE_EQUAL(creal(result1), creal(result2), CHECK_EPS);
|
||||||
|
CU_ASSERT_DOUBLE_EQUAL(cimag(result1), cimag(result2), CHECK_EPS);
|
||||||
|
// printf("\%lf,%lf\n",creal(result1),cimag(result1));
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
|
@ -32,9 +32,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#include "common_utest.h"
|
#include "common_utest.h"
|
||||||
|
|
||||||
void test_drot_incx_0(void)
|
void test_drot_inc_0(void)
|
||||||
{
|
{
|
||||||
int i;
|
int i=0;
|
||||||
int N=4,incX=0,incY=0;
|
int N=4,incX=0,incY=0;
|
||||||
double c=0.25,s=0.5;
|
double c=0.25,s=0.5;
|
||||||
double x1[]={1.0,3.0,5.0,7.0};
|
double x1[]={1.0,3.0,5.0,7.0};
|
||||||
|
@ -43,12 +43,75 @@ void test_drot_incx_0(void)
|
||||||
double y2[]={2.0,4.0,6.0,8.0};
|
double y2[]={2.0,4.0,6.0,8.0};
|
||||||
|
|
||||||
//OpenBLAS
|
//OpenBLAS
|
||||||
drot_(&N,x1,&incX,y1,&incY,&c,&s);
|
BLASFUNC(drot)(&N,x1,&incX,y1,&incY,&c,&s);
|
||||||
//reference
|
//reference
|
||||||
drotf_(&N,x2,&incX,y2,&incY,&c,&s);
|
BLASFUNC_REF(drot)(&N,x2,&incX,y2,&incY,&c,&s);
|
||||||
|
|
||||||
for(i=0; i<N; i++){
|
for(i=0; i<N; i++){
|
||||||
CU_ASSERT_DOUBLE_EQUAL(x1[i], x2[i], CHECK_EPS);
|
CU_ASSERT_DOUBLE_EQUAL(x1[i], x2[i], CHECK_EPS);
|
||||||
CU_ASSERT_DOUBLE_EQUAL(y1[i], y2[i], CHECK_EPS);
|
CU_ASSERT_DOUBLE_EQUAL(y1[i], y2[i], CHECK_EPS);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void test_zdrot_inc_0(void)
|
||||||
|
{
|
||||||
|
int i=0;
|
||||||
|
int N=4,incX=0,incY=0;
|
||||||
|
double c=0.25,s=0.5;
|
||||||
|
double x1[]={1.0,3.0,5.0,7.0,1.0,3.0,5.0,7.0};
|
||||||
|
double y1[]={2.0,4.0,6.0,8.0,2.0,4.0,6.0,8.0};
|
||||||
|
double x2[]={1.0,3.0,5.0,7.0,1.0,3.0,5.0,7.0};
|
||||||
|
double y2[]={2.0,4.0,6.0,8.0,2.0,4.0,6.0,8.0};
|
||||||
|
|
||||||
|
//OpenBLAS
|
||||||
|
BLASFUNC(zdrot)(&N,x1,&incX,y1,&incY,&c,&s);
|
||||||
|
//reference
|
||||||
|
BLASFUNC_REF(zdrot)(&N,x2,&incX,y2,&incY,&c,&s);
|
||||||
|
|
||||||
|
for(i=0; i<2*N; i++){
|
||||||
|
CU_ASSERT_DOUBLE_EQUAL(x1[i], x2[i], CHECK_EPS);
|
||||||
|
CU_ASSERT_DOUBLE_EQUAL(y1[i], y2[i], CHECK_EPS);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void test_srot_inc_0(void)
|
||||||
|
{
|
||||||
|
int i=0;
|
||||||
|
int N=4,incX=0,incY=0;
|
||||||
|
float c=0.25,s=0.5;
|
||||||
|
float x1[]={1.0,3.0,5.0,7.0};
|
||||||
|
float y1[]={2.0,4.0,6.0,8.0};
|
||||||
|
float x2[]={1.0,3.0,5.0,7.0};
|
||||||
|
float y2[]={2.0,4.0,6.0,8.0};
|
||||||
|
|
||||||
|
//OpenBLAS
|
||||||
|
BLASFUNC(srot)(&N,x1,&incX,y1,&incY,&c,&s);
|
||||||
|
//reference
|
||||||
|
BLASFUNC_REF(srot)(&N,x2,&incX,y2,&incY,&c,&s);
|
||||||
|
|
||||||
|
for(i=0; i<N; i++){
|
||||||
|
CU_ASSERT_DOUBLE_EQUAL(x1[i], x2[i], CHECK_EPS);
|
||||||
|
CU_ASSERT_DOUBLE_EQUAL(y1[i], y2[i], CHECK_EPS);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void test_csrot_inc_0(void)
|
||||||
|
{
|
||||||
|
int i=0;
|
||||||
|
int N=4,incX=0,incY=0;
|
||||||
|
float c=0.25,s=0.5;
|
||||||
|
float x1[]={1.0,3.0,5.0,7.0,1.0,3.0,5.0,7.0};
|
||||||
|
float y1[]={2.0,4.0,6.0,8.0,2.0,4.0,6.0,8.0};
|
||||||
|
float x2[]={1.0,3.0,5.0,7.0,1.0,3.0,5.0,7.0};
|
||||||
|
float y2[]={2.0,4.0,6.0,8.0,2.0,4.0,6.0,8.0};
|
||||||
|
|
||||||
|
//OpenBLAS
|
||||||
|
BLASFUNC(csrot)(&N,x1,&incX,y1,&incY,&c,&s);
|
||||||
|
//reference
|
||||||
|
BLASFUNC_REF(csrot)(&N,x2,&incX,y2,&incY,&c,&s);
|
||||||
|
|
||||||
|
for(i=0; i<2*N; i++){
|
||||||
|
CU_ASSERT_DOUBLE_EQUAL(x1[i], x2[i], CHECK_EPS);
|
||||||
|
CU_ASSERT_DOUBLE_EQUAL(y1[i], y2[i], CHECK_EPS);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -0,0 +1,113 @@
|
||||||
|
/*****************************************************************************
|
||||||
|
Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
|
||||||
|
1. Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in
|
||||||
|
the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
3. Neither the name of the ISCAS nor the names of its contributors may
|
||||||
|
be used to endorse or promote products derived from this software
|
||||||
|
without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||||
|
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||||
|
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
**********************************************************************************/
|
||||||
|
|
||||||
|
#include "common_utest.h"
|
||||||
|
|
||||||
|
void test_dswap_inc_0(void)
|
||||||
|
{
|
||||||
|
int i=0;
|
||||||
|
int N=4,incX=0,incY=0;
|
||||||
|
double x1[]={1.0,3.0,5.0,7.0};
|
||||||
|
double y1[]={2.0,4.0,6.0,8.0};
|
||||||
|
double x2[]={1.0,3.0,5.0,7.0};
|
||||||
|
double y2[]={2.0,4.0,6.0,8.0};
|
||||||
|
|
||||||
|
//OpenBLAS
|
||||||
|
BLASFUNC(dswap)(&N,x1,&incX,y1,&incY);
|
||||||
|
//reference
|
||||||
|
BLASFUNC_REF(dswap)(&N,x2,&incX,y2,&incY);
|
||||||
|
|
||||||
|
for(i=0; i<N; i++){
|
||||||
|
CU_ASSERT_DOUBLE_EQUAL(x1[i], x2[i], CHECK_EPS);
|
||||||
|
CU_ASSERT_DOUBLE_EQUAL(y1[i], y2[i], CHECK_EPS);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void test_zswap_inc_0(void)
|
||||||
|
{
|
||||||
|
int i=0;
|
||||||
|
int N=4,incX=0,incY=0;
|
||||||
|
double x1[]={1.0,3.0,5.0,7.0,1.0,3.0,5.0,7.0};
|
||||||
|
double y1[]={2.0,4.0,6.0,8.0,2.0,4.0,6.0,8.0};
|
||||||
|
double x2[]={1.0,3.0,5.0,7.0,1.0,3.0,5.0,7.0};
|
||||||
|
double y2[]={2.0,4.0,6.0,8.0,2.0,4.0,6.0,8.0};
|
||||||
|
|
||||||
|
//OpenBLAS
|
||||||
|
BLASFUNC(zswap)(&N,x1,&incX,y1,&incY);
|
||||||
|
//reference
|
||||||
|
BLASFUNC_REF(zswap)(&N,x2,&incX,y2,&incY);
|
||||||
|
|
||||||
|
for(i=0; i<2*N; i++){
|
||||||
|
CU_ASSERT_DOUBLE_EQUAL(x1[i], x2[i], CHECK_EPS);
|
||||||
|
CU_ASSERT_DOUBLE_EQUAL(y1[i], y2[i], CHECK_EPS);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void test_sswap_inc_0(void)
|
||||||
|
{
|
||||||
|
int i=0;
|
||||||
|
int N=4,incX=0,incY=0;
|
||||||
|
float x1[]={1.0,3.0,5.0,7.0};
|
||||||
|
float y1[]={2.0,4.0,6.0,8.0};
|
||||||
|
float x2[]={1.0,3.0,5.0,7.0};
|
||||||
|
float y2[]={2.0,4.0,6.0,8.0};
|
||||||
|
|
||||||
|
//OpenBLAS
|
||||||
|
BLASFUNC(sswap)(&N,x1,&incX,y1,&incY);
|
||||||
|
//reference
|
||||||
|
BLASFUNC_REF(sswap)(&N,x2,&incX,y2,&incY);
|
||||||
|
|
||||||
|
for(i=0; i<N; i++){
|
||||||
|
CU_ASSERT_DOUBLE_EQUAL(x1[i], x2[i], CHECK_EPS);
|
||||||
|
CU_ASSERT_DOUBLE_EQUAL(y1[i], y2[i], CHECK_EPS);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void test_cswap_inc_0(void)
|
||||||
|
{
|
||||||
|
int i=0;
|
||||||
|
int N=4,incX=0,incY=0;
|
||||||
|
float x1[]={1.0,3.0,5.0,7.0,1.0,3.0,5.0,7.0};
|
||||||
|
float y1[]={2.0,4.0,6.0,8.0,2.0,4.0,6.0,8.0};
|
||||||
|
float x2[]={1.0,3.0,5.0,7.0,1.0,3.0,5.0,7.0};
|
||||||
|
float y2[]={2.0,4.0,6.0,8.0,2.0,4.0,6.0,8.0};
|
||||||
|
|
||||||
|
//OpenBLAS
|
||||||
|
BLASFUNC(cswap)(&N,x1,&incX,y1,&incY);
|
||||||
|
//reference
|
||||||
|
BLASFUNC_REF(cswap)(&N,x2,&incX,y2,&incY);
|
||||||
|
|
||||||
|
for(i=0; i<2*N; i++){
|
||||||
|
CU_ASSERT_DOUBLE_EQUAL(x1[i], x2[i], CHECK_EPS);
|
||||||
|
CU_ASSERT_DOUBLE_EQUAL(y1[i], y2[i], CHECK_EPS);
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue