commit
9afc561be4
|
@ -1,4 +1,48 @@
|
|||
OpenBLAS ChangeLog
|
||||
====================================================================
|
||||
Version 0.3.9
|
||||
1-Mar-2020
|
||||
|
||||
common:
|
||||
* Fixed a miscompilation of the GETRF functions with CMAKE
|
||||
* Imported bugfix 390 from LAPACK (missing NaN propagation in xCOMBSSQ)
|
||||
* The size of the memory buffer used for splitting GEMM tasks across
|
||||
multiple threads can now be configured in the build system.
|
||||
|
||||
POWER:
|
||||
* Fixed several compilation problems related to endianness
|
||||
and ELF version on POWER8 and POWER9
|
||||
* Fixed use of the absolute value IAMIN/IAMAX instead of IMIN/IMAX
|
||||
* Fixed a race condition in the level3 blas code
|
||||
|
||||
MIPS64:
|
||||
* Fixed use of the absoltute value IAMIN/IAMAX instead of IMIN/IMAX
|
||||
|
||||
ARMV7:
|
||||
* Fixed a race condition in the level3 blas code
|
||||
* Fixed compilation on Android
|
||||
ARMV8:
|
||||
* Added support for Ampere EMAG8180
|
||||
* Added support for Neoverse N1
|
||||
* Improved performance of the blas_lock function
|
||||
* Fixed a race condition in the level3 blas code
|
||||
* Fixed a performance regression on TSV110-based servers
|
||||
|
||||
x86_64:
|
||||
* Fixed a long-standing error with undeclared register overwrites
|
||||
in the DSCAL microkernel for HASWELL,SKYLAKEX and ZEN
|
||||
* Fixed a long-standing bug in the SSE implementation of IAMAX
|
||||
* Fixed a CMAKE build failure with DYNAMIC_ARCH
|
||||
* Fixed cpu autodetection of Goldmont+, Cannon Lake and Ice Lake
|
||||
* Fixed a compilation failure on OSX with compiler name containing dash
|
||||
* Fixed compilation with MinGW on SkylakeX
|
||||
* Improved speed of the AVX512 GEMM3M kernel on SkylakeX
|
||||
* Added an AVX512 STRMM kernel for SkylakeX
|
||||
* Improved GEMM performance on Haswell and Zen
|
||||
|
||||
zarch:
|
||||
* fixed compilation of the DYNAMIC_ARCH code
|
||||
|
||||
====================================================================
|
||||
Version 0.3.8
|
||||
9-Feb-2020
|
||||
|
|
|
@ -327,7 +327,6 @@ ifeq ($(C_COMPILER), GCC)
|
|||
#Version tests for supporting specific features (MS_ABI, POWER9 intrinsics)
|
||||
GCCVERSIONGTEQ4 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 4)
|
||||
GCCVERSIONGT4 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \> 4)
|
||||
GCCVERSIONGT5 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \> 5)
|
||||
GCCVERSIONGTEQ7 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 7)
|
||||
GCCVERSIONGTEQ9 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 9)
|
||||
GCCMINORVERSIONGTEQ7 := $(shell expr `$(CC) -dumpversion | cut -f2 -d.` \>= 7)
|
||||
|
@ -575,6 +574,7 @@ ifneq ($(C_COMPILER), GCC)
|
|||
DYNAMIC_CORE += POWER9
|
||||
endif
|
||||
ifeq ($(C_COMPILER), GCC)
|
||||
GCCVERSIONGT5 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \> 5)
|
||||
ifeq ($(GCCVERSIONGT5), 1)
|
||||
DYNAMIC_CORE += POWER9
|
||||
else
|
||||
|
|
|
@ -89,6 +89,7 @@ CORTEXA57
|
|||
CORTEXA72
|
||||
CORTEXA73
|
||||
NEOVERSEN1
|
||||
EMAG8180
|
||||
FALKOR
|
||||
THUNDERX
|
||||
THUNDERX2T99
|
||||
|
|
|
@ -56,12 +56,16 @@ goto :: slinpack.goto dlinpack.goto clinpack.goto zlinpack.goto \
|
|||
sgemm.goto dgemm.goto cgemm.goto zgemm.goto \
|
||||
strmm.goto dtrmm.goto ctrmm.goto ztrmm.goto \
|
||||
strsm.goto dtrsm.goto ctrsm.goto ztrsm.goto \
|
||||
sspr.goto dspr.goto \
|
||||
sspr2.goto dspr2.goto \
|
||||
ssyr.goto dsyr.goto \
|
||||
ssyr2.goto dsyr2.goto \
|
||||
ssyrk.goto dsyrk.goto csyrk.goto zsyrk.goto \
|
||||
ssyr2k.goto dsyr2k.goto csyr2k.goto zsyr2k.goto \
|
||||
sger.goto dger.goto cger.goto zger.goto \
|
||||
sdot.goto ddot.goto \
|
||||
srot.goto drot.goto \
|
||||
srotm.goto drotm.goto \
|
||||
saxpy.goto daxpy.goto caxpy.goto zaxpy.goto \
|
||||
scopy.goto dcopy.goto ccopy.goto zcopy.goto \
|
||||
sswap.goto dswap.goto cswap.goto zswap.goto \
|
||||
|
@ -69,10 +73,14 @@ goto :: slinpack.goto dlinpack.goto clinpack.goto zlinpack.goto \
|
|||
sasum.goto dasum.goto casum.goto zasum.goto \
|
||||
ssymv.goto dsymv.goto csymv.goto zsymv.goto \
|
||||
chemv.goto zhemv.goto \
|
||||
chbmv.goto zhbmv.goto \
|
||||
chpmv.goto zhpmv.goto \
|
||||
chemm.goto zhemm.goto \
|
||||
cherk.goto zherk.goto \
|
||||
cher2k.goto zher2k.goto \
|
||||
sgemv.goto dgemv.goto cgemv.goto zgemv.goto \
|
||||
strmv.goto dtrmv.goto ctrmv.goto ztrmv.goto \
|
||||
strsv.goto dtrsv.goto ctrsv.goto ztrsv.goto \
|
||||
sgeev.goto dgeev.goto cgeev.goto zgeev.goto \
|
||||
sgesv.goto dgesv.goto cgesv.goto zgesv.goto \
|
||||
sgetri.goto dgetri.goto cgetri.goto zgetri.goto \
|
||||
|
@ -84,11 +92,15 @@ acml :: slinpack.acml dlinpack.acml clinpack.acml zlinpack.acml \
|
|||
sgemm.acml dgemm.acml cgemm.acml zgemm.acml \
|
||||
strmm.acml dtrmm.acml ctrmm.acml ztrmm.acml \
|
||||
strsm.acml dtrsm.acml ctrsm.acml ztrsm.acml \
|
||||
sspr.acml dspr.acml \
|
||||
sspr2.acml dspr2.acml \
|
||||
ssyr.acml dsyr.acml \
|
||||
ssyr2.acml dsyr2.acml \
|
||||
ssyrk.acml dsyrk.acml csyrk.acml zsyrk.acml \
|
||||
ssyr2k.acml dsyr2k.acml csyr2k.acml zsyr2k.acml \
|
||||
sger.acml dger.acml cger.acml zger.acml \
|
||||
sdot.acml ddot.acml \
|
||||
srotm.acml drotm.acml \
|
||||
saxpy.acml daxpy.acml caxpy.acml zaxpy.acml \
|
||||
scopy.acml dcopy.acml ccopy.acml zcopy.acml \
|
||||
sswap.acml dswap.acml cswap.acml zswap.acml \
|
||||
|
@ -96,10 +108,14 @@ acml :: slinpack.acml dlinpack.acml clinpack.acml zlinpack.acml \
|
|||
sasum.acml dasum.acml casum.acml zasum.acml \
|
||||
ssymv.acml dsymv.acml csymv.acml zsymv.acml \
|
||||
chemv.acml zhemv.acml \
|
||||
chbmv.acml zhbmv.acml \
|
||||
chpmv.acml zhpmv.acml \
|
||||
chemm.acml zhemm.acml \
|
||||
cherk.acml zherk.acml \
|
||||
cher2k.acml zher2k.acml \
|
||||
sgemv.acml dgemv.acml cgemv.acml zgemv.acml \
|
||||
strmv.acml dtrmv.acml ctrmv.acml ztrmv.acml \
|
||||
strsv.acml dtrsv.acml ctrsv.acml ztrsv.acml \
|
||||
sgeev.acml dgeev.acml cgeev.acml zgeev.acml \
|
||||
sgesv.acml dgesv.acml cgesv.acml zgesv.acml \
|
||||
sgetri.acml dgetri.acml cgetri.acml zgetri.acml \
|
||||
|
@ -111,11 +127,15 @@ atlas :: slinpack.atlas dlinpack.atlas clinpack.atlas zlinpack.atlas \
|
|||
sgemm.atlas dgemm.atlas cgemm.atlas zgemm.atlas \
|
||||
strmm.atlas dtrmm.atlas ctrmm.atlas ztrmm.atlas \
|
||||
strsm.atlas dtrsm.atlas ctrsm.atlas ztrsm.atlas \
|
||||
ssyr.goto dsyr.atlas \
|
||||
sspr.atlas dspr.atlas \
|
||||
sspr2.atlas dspr2.atlas \
|
||||
ssyr.atlas dsyr.atlas \
|
||||
ssyr2.atlas dsyr2.atlas \
|
||||
ssyrk.atlas dsyrk.atlas csyrk.atlas zsyrk.atlas \
|
||||
ssyr2k.atlas dsyr2k.atlas csyr2k.atlas zsyr2k.atlas \
|
||||
sger.atlas dger.atlas cger.atlas zger.atlas\
|
||||
sdot.atlas ddot.atlas \
|
||||
srotm.atlas drotm.atlas \
|
||||
saxpy.atlas daxpy.atlas caxpy.atlas zaxpy.atlas \
|
||||
scopy.atlas dcopy.atlas ccopy.atlas zcopy.atlas \
|
||||
sswap.atlas dswap.atlas cswap.atlas zswap.atlas \
|
||||
|
@ -123,11 +143,15 @@ atlas :: slinpack.atlas dlinpack.atlas clinpack.atlas zlinpack.atlas \
|
|||
sasum.atlas dasum.atlas casum.atlas zasum.atlas \
|
||||
ssymv.atlas dsymv.atlas csymv.atlas zsymv.atlas \
|
||||
chemv.atlas zhemv.atlas \
|
||||
chbmv.atlas zhbmv.atlas \
|
||||
chpmv.atlas zhpmv.atlas \
|
||||
chemm.acml zhemm.acml \
|
||||
chemm.atlas zhemm.atlas \
|
||||
cherk.atlas zherk.atlas \
|
||||
cher2k.atlas zher2k.atlas \
|
||||
sgemv.atlas dgemv.atlas cgemv.atlas zgemv.atlas \
|
||||
strmv.atlas dtrmv.atlas ctrmv.atlas ztrmv.atlas \
|
||||
strsv.atlas dtrsv.atlas ctrsv.atlas ztrsv.atlas \
|
||||
sgeev.atlas dgeev.atlas cgeev.atlas zgeev.atlas \
|
||||
sgesv.atlas dgesv.atlas cgesv.atlas zgesv.atlas \
|
||||
sgetri.atlas dgetri.atlas cgetri.atlas zgetri.atlas \
|
||||
|
@ -139,11 +163,15 @@ mkl :: slinpack.mkl dlinpack.mkl clinpack.mkl zlinpack.mkl \
|
|||
sgemm.mkl dgemm.mkl cgemm.mkl zgemm.mkl \
|
||||
strmm.mkl dtrmm.mkl ctrmm.mkl ztrmm.mkl \
|
||||
strsm.mkl dtrsm.mkl ctrsm.mkl ztrsm.mkl \
|
||||
sspr.mkl dspr.mkl \
|
||||
sspr2.mkl dspr2.mkl \
|
||||
ssyr.mkl dsyr.mkl \
|
||||
ssyr2.mkl dsyr2.mkl \
|
||||
ssyrk.mkl dsyrk.mkl csyrk.mkl zsyrk.mkl \
|
||||
ssyr2k.mkl dsyr2k.mkl csyr2k.mkl zsyr2k.mkl \
|
||||
sger.mkl dger.mkl cger.mkl zger.mkl \
|
||||
sdot.mkl ddot.mkl \
|
||||
srotm.mkl drotm.mkl \
|
||||
saxpy.mkl daxpy.mkl caxpy.mkl zaxpy.mkl \
|
||||
scopy.mkl dcopy.mkl ccopy.mkl zcopy.mkl \
|
||||
sswap.mkl dswap.mkl cswap.mkl zswap.mkl \
|
||||
|
@ -151,10 +179,14 @@ mkl :: slinpack.mkl dlinpack.mkl clinpack.mkl zlinpack.mkl \
|
|||
sasum.mkl dasum.mkl casum.mkl zasum.mkl \
|
||||
ssymv.mkl dsymv.mkl csymv.mkl zsymv.mkl \
|
||||
chemv.mkl zhemv.mkl \
|
||||
chbmv.mkl zhbmv.mkl \
|
||||
chpmv.mkl zhpmv.mkl \
|
||||
chemm.mkl zhemm.mkl \
|
||||
cherk.mkl zherk.mkl \
|
||||
cher2k.mkl zher2k.mkl \
|
||||
sgemv.mkl dgemv.mkl cgemv.mkl zgemv.mkl \
|
||||
strmv.mkl dtrmv.mkl ctrmv.mkl ztrmv.mkl \
|
||||
strsv.mkl dtrsv.mkl ctrsv.mkl ztrsv.mkl \
|
||||
sgeev.mkl dgeev.mkl cgeev.mkl zgeev.mkl \
|
||||
sgesv.mkl dgesv.mkl cgesv.mkl zgesv.mkl \
|
||||
sgetri.mkl dgetri.mkl cgetri.mkl zgetri.mkl \
|
||||
|
@ -166,12 +198,16 @@ else
|
|||
goto :: sgemm.goto dgemm.goto cgemm.goto zgemm.goto \
|
||||
strmm.goto dtrmm.goto ctrmm.goto ztrmm.goto \
|
||||
strsm.goto dtrsm.goto ctrsm.goto ztrsm.goto \
|
||||
sspr.goto dspr.goto \
|
||||
sspr2.goto dspr2.goto \
|
||||
ssyr.goto dsyr.goto \
|
||||
ssyr2.goto dsyr2.goto \
|
||||
ssyrk.goto dsyrk.goto csyrk.goto zsyrk.goto \
|
||||
ssyr2k.goto dsyr2k.goto csyr2k.goto zsyr2k.goto \
|
||||
sger.goto dger.goto cger.goto zger.goto \
|
||||
sdot.goto ddot.goto cdot.goto zdot.goto \
|
||||
srot.goto drot.goto \
|
||||
srotm.goto drotm.goto \
|
||||
saxpy.goto daxpy.goto caxpy.goto zaxpy.goto \
|
||||
scopy.goto dcopy.goto ccopy.goto zcopy.goto \
|
||||
sswap.goto dswap.goto cswap.goto zswap.goto \
|
||||
|
@ -179,10 +215,14 @@ goto :: sgemm.goto dgemm.goto cgemm.goto zgemm.goto \
|
|||
sasum.goto dasum.goto casum.goto zasum.goto \
|
||||
ssymv.goto dsymv.goto \
|
||||
chemv.goto zhemv.goto \
|
||||
chbmv.goto zhbmv.goto \
|
||||
chpmv.goto zhpmv.goto \
|
||||
chemm.goto zhemm.goto \
|
||||
cherk.goto zherk.goto \
|
||||
cher2k.goto zher2k.goto \
|
||||
sgemv.goto dgemv.goto cgemv.goto zgemv.goto \
|
||||
strmv.goto dtrmv.goto ctrmv.goto ztrmv.goto \
|
||||
strsv.goto dtrsv.goto ctrsv.goto ztrsv.goto \
|
||||
ssymm.goto dsymm.goto csymm.goto zsymm.goto \
|
||||
smallscaling \
|
||||
isamax.goto idamax.goto icamax.goto izamax.goto \
|
||||
|
@ -193,11 +233,15 @@ acml :: slinpack.acml dlinpack.acml clinpack.acml zlinpack.acml \
|
|||
sgemm.acml dgemm.acml cgemm.acml zgemm.acml \
|
||||
strmm.acml dtrmm.acml ctrmm.acml ztrmm.acml \
|
||||
strsm.acml dtrsm.acml ctrsm.acml ztrsm.acml \
|
||||
sspr.acml dspr.acml \
|
||||
sspr2.acml dspr2.acml \
|
||||
ssyr.acml dsyr.acml \
|
||||
ssyr2.acml dsyr2.acml \
|
||||
ssyrk.acml dsyrk.acml csyrk.acml zsyrk.acml \
|
||||
ssyr2k.acml dsyr2k.acml csyr2k.acml zsyr2k.acml \
|
||||
sger.acml dger.acml cger.acml zger.acml \
|
||||
sdot.acml ddot.acml \
|
||||
srotm.acml drotm.acml \
|
||||
saxpy.acml daxpy.acml caxpy.acml zaxpy.acml \
|
||||
scopy.acml dcopy.acml ccopy.acml zcopy.acml \
|
||||
sswap.acml dswap.acml cswap.acml zswap.acml \
|
||||
|
@ -205,10 +249,14 @@ acml :: slinpack.acml dlinpack.acml clinpack.acml zlinpack.acml \
|
|||
sasum.acml dasum.acml casum.acml zasum.acml \
|
||||
ssymv.acml dsymv.acml csymv.acml zsymv.acml \
|
||||
chemv.acml zhemv.acml \
|
||||
chbmv.acml zhbmv.acml \
|
||||
chpmv.acml zhpmv.acml \
|
||||
chemm.acml zhemm.acml \
|
||||
cherk.acml zherk.acml \
|
||||
cher2k.acml zher2k.acml \
|
||||
sgemv.acml dgemv.acml cgemv.acml zgemv.acml \
|
||||
strmv.acml dtrmv.acml ctrmv.acml ztrmv.acml \
|
||||
strsv.acml dtrsv.acml ctrsv.acml ztrsv.acml \
|
||||
sgeev.acml dgeev.acml cgeev.acml zgeev.acml \
|
||||
sgesv.acml dgesv.acml cgesv.acml zgesv.acml \
|
||||
sgetri.acml dgetri.acml cgetri.acml zgetri.acml \
|
||||
|
@ -220,11 +268,15 @@ atlas :: slinpack.atlas dlinpack.atlas clinpack.atlas zlinpack.atlas \
|
|||
sgemm.atlas dgemm.atlas cgemm.atlas zgemm.atlas \
|
||||
strmm.atlas dtrmm.atlas ctrmm.atlas ztrmm.atlas \
|
||||
strsm.atlas dtrsm.atlas ctrsm.atlas ztrsm.atlas \
|
||||
sspr.atlas dspr.atlas \
|
||||
sspr2.atlas dspr2.atlas \
|
||||
ssyr.atlas dsyr.atlas \
|
||||
ssyr2.atlas dsyr2.atlas \
|
||||
ssyrk.atlas dsyrk.atlas csyrk.atlas zsyrk.atlas \
|
||||
ssyr2k.atlas dsyr2k.atlas csyr2k.atlas zsyr2k.atlas \
|
||||
sger.atlas dger.atlas cger.atlas zger.atlas\
|
||||
sdot.atlas ddot.atlas \
|
||||
srotm.atlas drotm.atlas \
|
||||
saxpy.atlas daxpy.atlas caxpy.atlas zaxpy.atlas \
|
||||
scopy.atlas dcopy.atlas ccopy.atlas zcopy.atlas \
|
||||
sswap.atlas dswap.atlas cswap.atlas zswap.atlas \
|
||||
|
@ -232,11 +284,15 @@ atlas :: slinpack.atlas dlinpack.atlas clinpack.atlas zlinpack.atlas \
|
|||
sasum.atlas dasum.atlas casum.atlas zasum.atlas \
|
||||
ssymv.atlas dsymv.atlas csymv.atlas zsymv.atlas \
|
||||
chemv.atlas zhemv.atlas \
|
||||
chbmv.atlas zhbmv.atlas \
|
||||
chpmv.atlas zhpmv.atlas \
|
||||
chemm.acml zhemm.acml \
|
||||
chemm.atlas zhemm.atlas \
|
||||
cherk.atlas zherk.atlas \
|
||||
cher2k.atlas zher2k.atlas \
|
||||
sgemv.atlas dgemv.atlas cgemv.atlas zgemv.atlas \
|
||||
strmv.atlas dtrmv.atlas ctrmv.atlas ztrmv.atlas \
|
||||
strsv.atlas dtrsv.atlas ctrsv.atlas ztrsv.atlas \
|
||||
sgeev.atlas dgeev.atlas cgeev.atlas zgeev.atlas \
|
||||
sgesv.atlas dgesv.atlas cgesv.atlas zgesv.atlas \
|
||||
sgetri.atlas dgetri.atlas cgetri.atlas zgetri.atlas \
|
||||
|
@ -250,11 +306,15 @@ mkl :: slinpack.mkl dlinpack.mkl clinpack.mkl zlinpack.mkl \
|
|||
sgemm.mkl dgemm.mkl cgemm.mkl zgemm.mkl \
|
||||
strmm.mkl dtrmm.mkl ctrmm.mkl ztrmm.mkl \
|
||||
strsm.mkl dtrsm.mkl ctrsm.mkl ztrsm.mkl \
|
||||
sspr.mkl dspr.mkl \
|
||||
sspr2.mkl dspr2.mkl \
|
||||
ssyr.mkl dsyr.mkl \
|
||||
ssyr2.mkl dsyr2.mkl \
|
||||
ssyrk.mkl dsyrk.mkl csyrk.mkl zsyrk.mkl \
|
||||
ssyr2k.mkl dsyr2k.mkl csyr2k.mkl zsyr2k.mkl \
|
||||
sger.mkl dger.mkl cger.mkl zger.mkl \
|
||||
sdot.mkl ddot.mkl cdot.mkl zdot.mkl \
|
||||
srotm.atlas drotm.atlas \
|
||||
saxpy.mkl daxpy.mkl caxpy.mkl zaxpy.mkl \
|
||||
scopy.mkl dcopy.mkl ccopy.mkl zcopy.mkl \
|
||||
sswap.mkl dswap.mkl cswap.mkl zswap.mkl \
|
||||
|
@ -262,10 +322,14 @@ mkl :: slinpack.mkl dlinpack.mkl clinpack.mkl zlinpack.mkl \
|
|||
sasum.mkl dasum.mkl casum.mkl zasum.mkl \
|
||||
ssymv.mkl dsymv.mkl csymv.mkl zsymv.mkl \
|
||||
chemv.mkl zhemv.mkl \
|
||||
chbmv.mkl zhbmv.mkl \
|
||||
chpmv.mkl zhpmv.mkl \
|
||||
chemm.mkl zhemm.mkl \
|
||||
cherk.mkl zherk.mkl \
|
||||
cher2k.mkl zher2k.mkl \
|
||||
sgemv.mkl dgemv.mkl cgemv.mkl zgemv.mkl \
|
||||
strmv.mkl dtrmv.mkl ctrmv.mkl ztrmv.mkl \
|
||||
strsv.mkl dtrsv.mkl ctrsv.mkl ztrsv.mkl \
|
||||
sgeev.mkl dgeev.mkl cgeev.mkl zgeev.mkl \
|
||||
sgesv.mkl dgesv.mkl cgesv.mkl zgesv.mkl \
|
||||
sgetri.mkl dgetri.mkl cgetri.mkl zgetri.mkl \
|
||||
|
@ -288,11 +352,15 @@ veclib :: slinpack.veclib dlinpack.veclib clinpack.veclib zlinpack.veclib \
|
|||
sgemm.veclib dgemm.veclib cgemm.veclib zgemm.veclib \
|
||||
strmm.veclib dtrmm.veclib ctrmm.veclib ztrmm.veclib \
|
||||
strsm.veclib dtrsm.veclib ctrsm.veclib ztrsm.veclib \
|
||||
sspr.veclib dspr.veclib \
|
||||
sspr2.veclib dspr2.veclib \
|
||||
ssyr.veclib dsyr.veclib \
|
||||
ssyr2.veclib dsyr2.veclib \
|
||||
ssyrk.veclib dsyrk.veclib csyrk.veclib zsyrk.veclib \
|
||||
ssyr2k.veclib dsyr2k.veclib csyr2k.veclib zsyr2k.veclib \
|
||||
sger.veclib dger.veclib cger.veclib zger.veclib \
|
||||
sdot.veclib ddot.veclib cdot.veclib zdot.veclib \
|
||||
srotm.veclib drotm.veclib \
|
||||
saxpy.veclib daxpy.veclib caxpy.veclib zaxpy.veclib \
|
||||
scopy.veclib dcopy.veclib ccopy.veclib zcopy.veclib \
|
||||
sswap.veclib dswap.veclib cswap.veclib zswap.veclib \
|
||||
|
@ -300,10 +368,14 @@ veclib :: slinpack.veclib dlinpack.veclib clinpack.veclib zlinpack.veclib \
|
|||
sasum.veclib dasum.veclib casum.veclib zasum.veclib \
|
||||
ssymv.veclib dsymv.veclib csymv.veclib zsymv.veclib \
|
||||
chemv.veclib zhemv.veclib \
|
||||
chbmv.veclib zhbmv.veclib \
|
||||
chpmv.veclib zhpmv.veclib \
|
||||
chemm.veclib zhemm.veclib \
|
||||
cherk.veclib zherk.veclib \
|
||||
cher2k.veclib zher2k.veclib \
|
||||
sgemv.veclib dgemv.veclib cgemv.veclib zgemv.veclib \
|
||||
strmv.veclib dtrmv.veclib ctrmv.veclib ztrmv.veclib \
|
||||
strsv.veclib dtrsv.veclib ctrsv.veclib ztrsv.veclib \
|
||||
sgeev.veclib dgeev.veclib cgeev.veclib zgeev.veclib \
|
||||
sgesv.veclib dgesv.veclib cgesv.veclib zgesv.veclib \
|
||||
sgetri.veclib dgetri.veclib cgetri.veclib zgetri.veclib \
|
||||
|
@ -807,6 +879,100 @@ dsyr.mkl : dsyr.$(SUFFIX)
|
|||
|
||||
dsyr.veclib : dsyr.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
##################################### Sspr ####################################################
|
||||
sspr.goto : sspr.$(SUFFIX) ../$(LIBNAME)
|
||||
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||
|
||||
sspr.acml : sspr.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
sspr.atlas : sspr.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
sspr.mkl : sspr.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
sspr.veclib : sspr.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
##################################### Dspr ####################################################
|
||||
dspr.goto : dspr.$(SUFFIX) ../$(LIBNAME)
|
||||
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||
|
||||
dspr.acml : dspr.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
dspr.atlas : dspr.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
dspr.mkl : dspr.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
dspr.veclib : dspr.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
##################################### Sspr2 ####################################################
|
||||
sspr2.goto : sspr2.$(SUFFIX) ../$(LIBNAME)
|
||||
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||
|
||||
sspr2.acml : sspr2.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
sspr2.atlas : sspr2.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
sspr2.mkl : sspr2.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
sspr2.veclib : sspr2.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
##################################### Dspr2 ####################################################
|
||||
dspr2.goto : dspr2.$(SUFFIX) ../$(LIBNAME)
|
||||
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||
|
||||
dspr2.acml : dspr2.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
dspr2.atlas : dspr2.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
dspr2.mkl : dspr2.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
dspr2.veclib : dspr2.$(SUFFIX)
|
||||
|
||||
##################################### Ssyr2 ####################################################
|
||||
ssyr2.goto : ssyr2.$(SUFFIX) ../$(LIBNAME)
|
||||
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||
|
||||
ssyr2.acml : ssyr2.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
ssyr2.atlas : ssyr2.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
ssyr2.mkl : ssyr2.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
ssyr2.veclib : ssyr2.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
##################################### Dsyr2 ####################################################
|
||||
dsyr2.goto : dsyr2.$(SUFFIX) ../$(LIBNAME)
|
||||
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||
|
||||
dsyr2.acml : dsyr2.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
dsyr2.atlas : dsyr2.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
dsyr2.mkl : dsyr2.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
dsyr2.veclib : dsyr2.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
##################################### Ssyrk ####################################################
|
||||
ssyrk.goto : ssyrk.$(SUFFIX) ../$(LIBNAME)
|
||||
|
@ -1108,6 +1274,138 @@ zgemv.mkl : zgemv.$(SUFFIX)
|
|||
zgemv.veclib : zgemv.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
##################################### Strmv ####################################################
|
||||
strmv.goto : strmv.$(SUFFIX) ../$(LIBNAME)
|
||||
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||
|
||||
strmv.acml : strmv.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
strmv.atlas : strmv.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
strmv.mkl : strmv.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
strmv.veclib : strmv.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
##################################### Dtrmv ####################################################
|
||||
dtrmv.goto : dtrmv.$(SUFFIX) ../$(LIBNAME)
|
||||
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||
|
||||
dtrmv.acml : dtrmv.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
dtrmv.atlas : dtrmv.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
dtrmv.mkl : dtrmv.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
dtrmv.veclib : dtrmv.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
##################################### Ctrmv ####################################################
|
||||
|
||||
ctrmv.goto : ctrmv.$(SUFFIX) ../$(LIBNAME)
|
||||
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||
|
||||
ctrmv.acml : ctrmv.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
ctrmv.atlas : ctrmv.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
ctrmv.mkl : ctrmv.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
ctrmv.veclib : ctrmv.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
##################################### Ztrmv ####################################################
|
||||
|
||||
ztrmv.goto : ztrmv.$(SUFFIX) ../$(LIBNAME)
|
||||
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||
|
||||
ztrmv.acml : ztrmv.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
ztrmv.atlas : ztrmv.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
ztrmv.mkl : ztrmv.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
ztrmv.veclib : ztrmv.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
##################################### Strsv ####################################################
|
||||
strsv.goto : strsv.$(SUFFIX) ../$(LIBNAME)
|
||||
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||
|
||||
strsv.acml : strsv.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
strsv.atlas : strsv.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
strsv.mkl : strsv.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
strsv.veclib : strsv.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
##################################### Dtrsv ####################################################
|
||||
dtrsv.goto : dtrsv.$(SUFFIX) ../$(LIBNAME)
|
||||
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||
|
||||
dtrsv.acml : dtrsv.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
dtrsv.atlas : dtrsv.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
dtrsv.mkl : dtrsv.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
dtrsv.veclib : dtrsv.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
##################################### Ctrsv ####################################################
|
||||
|
||||
ctrsv.goto : ctrsv.$(SUFFIX) ../$(LIBNAME)
|
||||
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||
|
||||
ctrsv.acml : ctrsv.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
ctrsv.atlas : ctrsv.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
ctrsv.mkl : ctrsv.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
ctrsv.veclib : ctrsv.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
##################################### Ztrsv ####################################################
|
||||
|
||||
ztrsv.goto : ztrsv.$(SUFFIX) ../$(LIBNAME)
|
||||
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||
|
||||
ztrsv.acml : ztrsv.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
ztrsv.atlas : ztrsv.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
ztrsv.mkl : ztrsv.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
ztrsv.veclib : ztrsv.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
##################################### Sger ####################################################
|
||||
sger.goto : sger.$(SUFFIX) ../$(LIBNAME)
|
||||
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||
|
@ -1467,7 +1765,70 @@ zhemv.mkl : zhemv.$(SUFFIX)
|
|||
|
||||
zhemv.veclib : zhemv.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
##################################### Chbmv ####################################################
|
||||
|
||||
chbmv.goto : chbmv.$(SUFFIX) ../$(LIBNAME)
|
||||
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||
|
||||
chbmv.acml : chbmv.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
chbmv.atlas : chbmv.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
chbmv.mkl : chbmv.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
chbmv.veclib : chbmv.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
##################################### Zhbmv ####################################################
|
||||
|
||||
zhbmv.goto : zhbmv.$(SUFFIX) ../$(LIBNAME)
|
||||
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||
|
||||
zhbmv.acml : zhbmv.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
zhbmv.atlas : zhbmv.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
zhbmv.mkl : zhbmv.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
zhbmv.veclib : zhbmv.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
##################################### Chpmv ####################################################
|
||||
|
||||
chpmv.goto : chpmv.$(SUFFIX) ../$(LIBNAME)
|
||||
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||
|
||||
chpmv.acml : chpmv.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
chpmv.atlas : chpmv.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
chpmv.mkl : chpmv.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
chpmv.veclib : chpmv.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
##################################### Zhpmv ####################################################
|
||||
|
||||
zhpmv.goto : zhpmv.$(SUFFIX) ../$(LIBNAME)
|
||||
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||
|
||||
zhpmv.acml : zhpmv.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
zhpmv.atlas : zhpmv.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
zhpmv.mkl : zhpmv.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
zhpmv.veclib : zhpmv.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
##################################### Sdot ####################################################
|
||||
sdot.goto : sdot.$(SUFFIX) ../$(LIBNAME)
|
||||
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||
|
@ -1564,6 +1925,37 @@ drot.mkl : drot.$(SUFFIX)
|
|||
drot.veclib : drot.$(SUFFIX)
|
||||
$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
##################################### srotm ####################################################
|
||||
srotm.goto : srotm.$(SUFFIX) ../$(LIBNAME)
|
||||
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||
|
||||
srotm.acml : srotm.$(SUFFIX)
|
||||
$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
srotm.atlas : srotm.$(SUFFIX)
|
||||
$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
srotm.mkl : srotm.$(SUFFIX)
|
||||
$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
srotm.veclib : srotm.$(SUFFIX)
|
||||
$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
##################################### drotm ####################################################
|
||||
drotm.goto : drotm.$(SUFFIX) ../$(LIBNAME)
|
||||
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||
|
||||
drotm.acml : drotm.$(SUFFIX)
|
||||
$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
drotm.atlas : drotm.$(SUFFIX)
|
||||
$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
drotm.mkl : drotm.$(SUFFIX)
|
||||
$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
drotm.veclib : drotm.$(SUFFIX)
|
||||
$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
##################################### Saxpy ####################################################
|
||||
saxpy.goto : saxpy.$(SUFFIX) ../$(LIBNAME)
|
||||
|
@ -2122,6 +2514,24 @@ ssyr.$(SUFFIX) : syr.c
|
|||
|
||||
dsyr.$(SUFFIX) : syr.c
|
||||
$(CC) $(CFLAGS) -c -UCOMPLEX -DDOUBLE -o $(@F) $^
|
||||
|
||||
sspr.$(SUFFIX) : spr.c
|
||||
$(CC) $(CFLAGS) -c -UCOMPLEX -UDOUBLE -o $(@F) $^
|
||||
|
||||
dspr.$(SUFFIX) : spr.c
|
||||
$(CC) $(CFLAGS) -c -UCOMPLEX -DDOUBLE -o $(@F) $^
|
||||
|
||||
sspr2.$(SUFFIX) : spr2.c
|
||||
$(CC) $(CFLAGS) -c -UCOMPLEX -UDOUBLE -o $(@F) $^
|
||||
|
||||
dspr2.$(SUFFIX) : spr2.c
|
||||
$(CC) $(CFLAGS) -c -UCOMPLEX -DDOUBLE -o $(@F) $^
|
||||
|
||||
ssyr2.$(SUFFIX) : syr2.c
|
||||
$(CC) $(CFLAGS) -c -UCOMPLEX -UDOUBLE -o $(@F) $^
|
||||
|
||||
dsyr2.$(SUFFIX) : syr2.c
|
||||
$(CC) $(CFLAGS) -c -UCOMPLEX -DDOUBLE -o $(@F) $^
|
||||
|
||||
ssyrk.$(SUFFIX) : syrk.c
|
||||
$(CC) $(CFLAGS) -c -UCOMPLEX -UDOUBLE -o $(@F) $^
|
||||
|
@ -2177,6 +2587,30 @@ cgemv.$(SUFFIX) : gemv.c
|
|||
zgemv.$(SUFFIX) : gemv.c
|
||||
$(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^
|
||||
|
||||
strmv.$(SUFFIX) : trmv.c
|
||||
$(CC) $(CFLAGS) -c -UCOMPLEX -UDOUBLE -o $(@F) $^
|
||||
|
||||
dtrmv.$(SUFFIX) : trmv.c
|
||||
$(CC) $(CFLAGS) -c -UCOMPLEX -DDOUBLE -o $(@F) $^
|
||||
|
||||
ctrmv.$(SUFFIX) : trmv.c
|
||||
$(CC) $(CFLAGS) -c -DCOMPLEX -UDOUBLE -o $(@F) $^
|
||||
|
||||
ztrmv.$(SUFFIX) : trmv.c
|
||||
$(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^
|
||||
|
||||
strsv.$(SUFFIX) : trsv.c
|
||||
$(CC) $(CFLAGS) -c -UCOMPLEX -UDOUBLE -o $(@F) $^
|
||||
|
||||
dtrsv.$(SUFFIX) : trsv.c
|
||||
$(CC) $(CFLAGS) -c -UCOMPLEX -DDOUBLE -o $(@F) $^
|
||||
|
||||
ctrsv.$(SUFFIX) : trsv.c
|
||||
$(CC) $(CFLAGS) -c -DCOMPLEX -UDOUBLE -o $(@F) $^
|
||||
|
||||
ztrsv.$(SUFFIX) : trsv.c
|
||||
$(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^
|
||||
|
||||
sger.$(SUFFIX) : ger.c
|
||||
$(CC) $(CFLAGS) -c -UCOMPLEX -UDOUBLE -o $(@F) $^
|
||||
|
||||
|
@ -2244,6 +2678,18 @@ chemv.$(SUFFIX) : hemv.c
|
|||
zhemv.$(SUFFIX) : hemv.c
|
||||
$(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^
|
||||
|
||||
chbmv.$(SUFFIX) : hbmv.c
|
||||
$(CC) $(CFLAGS) -c -DCOMPLEX -UDOUBLE -o $(@F) $^
|
||||
|
||||
zhbmv.$(SUFFIX) : hbmv.c
|
||||
$(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^
|
||||
|
||||
chpmv.$(SUFFIX) : hpmv.c
|
||||
$(CC) $(CFLAGS) -c -DCOMPLEX -UDOUBLE -o $(@F) $^
|
||||
|
||||
zhpmv.$(SUFFIX) : hpmv.c
|
||||
$(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^
|
||||
|
||||
sdot.$(SUFFIX) : dot.c
|
||||
$(CC) $(CFLAGS) -c -UCOMPLEX -UDOUBLE -o $(@F) $^
|
||||
|
||||
|
@ -2345,7 +2791,11 @@ srot.$(SUFFIX) : rot.c
|
|||
drot.$(SUFFIX) : rot.c
|
||||
$(CC) $(CFLAGS) -c -UCOMPLEX -DDOUBLE -o $(@F) $^
|
||||
|
||||
srotm.$(SUFFIX) : rotm.c
|
||||
$(CC) $(CFLAGS) -c -UCOMPLEX -UDOUBLE -o $(@F) $^
|
||||
|
||||
drotm.$(SUFFIX) : rotm.c
|
||||
$(CC) $(CFLAGS) -c -UCOMPLEX -DDOUBLE -o $(@F) $^
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -128,7 +128,7 @@ int main(int argc, char *argv[]){
|
|||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
struct timespec start, stop;
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
@ -175,13 +175,13 @@ int main(int argc, char *argv[]){
|
|||
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
clock_gettime( CLOCK_REALTIME, &start);
|
||||
|
||||
AXPY (&m, alpha, x, &inc_x, y, &inc_y );
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
clock_gettime( CLOCK_REALTIME, &stop);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_nsec - start.tv_nsec)) * 1.e-9;
|
||||
|
||||
timeg += time1;
|
||||
|
||||
|
@ -190,7 +190,7 @@ int main(int argc, char *argv[]){
|
|||
timeg /= loops;
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops %10.6f sec\n",
|
||||
" %10.2f MFlops %10.9f sec\n",
|
||||
COMPSIZE * COMPSIZE * 2. * (double)m / timeg * 1.e-6, timeg);
|
||||
|
||||
}
|
||||
|
|
|
@ -173,46 +173,46 @@ int main(int argc, char *argv[]){
|
|||
#ifndef COMPLEX
|
||||
if (uplos & 1) {
|
||||
for (j = 0; j < m; j++) {
|
||||
for(i = 0; i < j; i++) a[i + j * m] = 0.;
|
||||
a[j + j * m] = ((double) rand() / (double) RAND_MAX) + 8.;
|
||||
for(i = j + 1; i < m; i++) a[i + j * m] = ((double) rand() / (double) RAND_MAX) - 0.5;
|
||||
for(i = 0; i < j; i++) a[(long)i + (long)j * (long)m] = 0.;
|
||||
a[(long)j + (long)j * (long)m] = ((double) rand() / (double) RAND_MAX) + 8.;
|
||||
for(i = j + 1; i < m; i++) a[(long)i + (long)j * (long)m] = ((double) rand() / (double) RAND_MAX) - 0.5;
|
||||
}
|
||||
} else {
|
||||
for (j = 0; j < m; j++) {
|
||||
for(i = 0; i < j; i++) a[i + j * m] = ((double) rand() / (double) RAND_MAX) - 0.5;
|
||||
a[j + j * m] = ((double) rand() / (double) RAND_MAX) + 8.;
|
||||
for(i = j + 1; i < m; i++) a[i + j * m] = 0.;
|
||||
for(i = 0; i < j; i++) a[(long)i + (long)j * (long)m] = ((double) rand() / (double) RAND_MAX) - 0.5;
|
||||
a[(long)j + (long)j * (long)m] = ((double) rand() / (double) RAND_MAX) + 8.;
|
||||
for(i = j + 1; i < m; i++) a[(long)i + (long)j * (long)m] = 0.;
|
||||
}
|
||||
}
|
||||
#else
|
||||
if (uplos & 1) {
|
||||
for (j = 0; j < m; j++) {
|
||||
for(i = 0; i < j; i++) {
|
||||
a[(i + j * m) * 2 + 0] = 0.;
|
||||
a[(i + j * m) * 2 + 1] = 0.;
|
||||
a[((long)i + (long)j * (long)m) * 2 + 0] = 0.;
|
||||
a[((long)i + (long)j * (long)m) * 2 + 1] = 0.;
|
||||
}
|
||||
|
||||
a[(j + j * m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) + 8.;
|
||||
a[(j + j * m) * 2 + 1] = 0.;
|
||||
a[((long)j + (long)j * (long)m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) + 8.;
|
||||
a[((long)j + (long)j * (long)m) * 2 + 1] = 0.;
|
||||
|
||||
for(i = j + 1; i < m; i++) {
|
||||
a[(i + j * m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) - 0.5;
|
||||
a[(i + j * m) * 2 + 1] = ((double) rand() / (double) RAND_MAX) - 0.5;
|
||||
a[((long)i + (long)j * (long)m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) - 0.5;
|
||||
a[((long)i + (long)j * (long)m) * 2 + 1] = ((double) rand() / (double) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (j = 0; j < m; j++) {
|
||||
for(i = 0; i < j; i++) {
|
||||
a[(i + j * m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) - 0.5;
|
||||
a[(i + j * m) * 2 + 1] = ((double) rand() / (double) RAND_MAX) - 0.5;
|
||||
a[((long)i + (long)j * (long)m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) - 0.5;
|
||||
a[((long)i + (long)j * (long)m) * 2 + 1] = ((double) rand() / (double) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
a[(j + j * m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) + 8.;
|
||||
a[(j + j * m) * 2 + 1] = 0.;
|
||||
a[((long)j + (long)j * (long)m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) + 8.;
|
||||
a[((long)j + (long)j * (long)m) * 2 + 1] = 0.;
|
||||
|
||||
for(i = j + 1; i < m; i++) {
|
||||
a[(i + j * m) * 2 + 0] = 0.;
|
||||
a[(i + j * m) * 2 + 1] = 0.;
|
||||
a[((long)i + (long)j * (long)m) * 2 + 0] = 0.;
|
||||
a[((long)i + (long)j * (long)m) * 2 + 1] = 0.;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -239,10 +239,13 @@ int main(int argc, char *argv[]){
|
|||
for (j = 0; j < m; j++) {
|
||||
for(i = 0; i <= j; i++) {
|
||||
#ifndef COMPLEX
|
||||
if (maxerr < fabs(a[i + j * m] - b[i + j * m])) maxerr = fabs(a[i + j * m] - b[i + j * m]);
|
||||
if (maxerr < fabs(a[(long)i + (long)j * (long)m] - b[(long)i + (long)j * (long)m]))
|
||||
maxerr = fabs(a[(long)i + (long)j * (long)m] - b[(long)i + (long)j * (long)m]);
|
||||
#else
|
||||
if (maxerr < fabs(a[(i + j * m) * 2 + 0] - b[(i + j * m) * 2 + 0])) maxerr = fabs(a[(i + j * m) * 2 + 0] - b[(i + j * m) * 2 + 0]);
|
||||
if (maxerr < fabs(a[(i + j * m) * 2 + 1] - b[(i + j * m) * 2 + 1])) maxerr = fabs(a[(i + j * m) * 2 + 1] - b[(i + j * m) * 2 + 1]);
|
||||
if (maxerr < fabs(a[((long)i + (long)j * (long)m) * 2 + 0] - b[((long)i + (long)j * (long)m) * 2 + 0]))
|
||||
maxerr = fabs(a[((long)i + (long)j * (long)m) * 2 + 0] - b[((long)i + (long)j * (long)m) * 2 + 0]);
|
||||
if (maxerr < fabs(a[((long)i + (long)j * (long)m) * 2 + 1] - b[((long)i + (long)j * (long)m) * 2 + 1]))
|
||||
maxerr = fabs(a[((long)i + (long)j * (long)m) * 2 + 1] - b[((long)i + (long)j * (long)m) * 2 + 1]);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
@ -250,10 +253,13 @@ int main(int argc, char *argv[]){
|
|||
for (j = 0; j < m; j++) {
|
||||
for(i = j; i < m; i++) {
|
||||
#ifndef COMPLEX
|
||||
if (maxerr < fabs(a[i + j * m] - b[i + j * m])) maxerr = fabs(a[i + j * m] - b[i + j * m]);
|
||||
if (maxerr < fabs(a[(long)i + (long)j * (long)m] - b[(long)i + (long)j * (long)m]))
|
||||
maxerr = fabs(a[(long)i + (long)j * (long)m] - b[(long)i + (long)j * (long)m]);
|
||||
#else
|
||||
if (maxerr < fabs(a[(i + j * m) * 2 + 0] - b[(i + j * m) * 2 + 0])) maxerr = fabs(a[(i + j * m) * 2 + 0] - b[(i + j * m) * 2 + 0]);
|
||||
if (maxerr < fabs(a[(i + j * m) * 2 + 1] - b[(i + j * m) * 2 + 1])) maxerr = fabs(a[(i + j * m) * 2 + 1] - b[(i + j * m) * 2 + 1]);
|
||||
if (maxerr < fabs(a[((long)i + (long)j * (long)m) * 2 + 0] - b[((long)i + (long)j * (long)m) * 2 + 0]))
|
||||
maxerr = fabs(a[((long)i + (long)j * (long)m) * 2 + 0] - b[((long)i + (long)j * (long)m) * 2 + 0]);
|
||||
if (maxerr < fabs(a[((long)i + (long)j * (long)m) * 2 + 1] - b[((long)i + (long)j * (long)m) * 2 + 1]))
|
||||
maxerr = fabs(a[((long)i + (long)j * (long)m) * 2 + 1] - b[((long)i + (long)j * (long)m) * 2 + 1]);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
|
|
@ -195,7 +195,7 @@ int main(int argc, char *argv[]){
|
|||
|
||||
for(j = 0; j < to; j++){
|
||||
for(i = 0; i < to * COMPSIZE; i++){
|
||||
a[i + j * to * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
a[(long)i + (long)j * (long)to * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -181,9 +181,9 @@ int main(int argc, char *argv[]){
|
|||
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < m * COMPSIZE; i++){
|
||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
b[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
c[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
b[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
c[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -197,7 +197,7 @@ int main(int argc, char *argv[]){
|
|||
fprintf(stderr, " %6dx%d : ", (int)m,(int)n);
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < n * COMPSIZE; i++){
|
||||
a[j + i * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
a[(long)j + (long)i * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -234,7 +234,7 @@ int main(int argc, char *argv[]){
|
|||
fprintf(stderr, " %6dx%d : ", (int)m,(int)n);
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < n * COMPSIZE; i++){
|
||||
a[j + i * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
a[(long)j + (long)i * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -182,7 +182,7 @@ int main(int argc, char *argv[]){
|
|||
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < n * COMPSIZE; i++){
|
||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -177,20 +177,20 @@ int main(int argc, char *argv[]){
|
|||
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < m * COMPSIZE; i++){
|
||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < m * COMPSIZE; i++){
|
||||
b[i + j * m * COMPSIZE] = 0.0;
|
||||
b[(long)i + (long)j * (long)m * COMPSIZE] = 0.0;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
for (j = 0; j < m; ++j) {
|
||||
for (i = 0; i < m * COMPSIZE; ++i) {
|
||||
b[i] += a[i + j * m * COMPSIZE];
|
||||
b[i] += a[(long)i + (long)j * (long)m * COMPSIZE];
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -172,7 +172,7 @@ int main(int argc, char *argv[]){
|
|||
|
||||
for(j = 0; j < to; j++){
|
||||
for(i = 0; i < to * COMPSIZE; i++){
|
||||
a[i + j * to * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
a[(long)i + (long)j * (long)to * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,210 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef HBMV
|
||||
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define HBMV BLASFUNC(zhbmv)
|
||||
#else
|
||||
#define HBMV BLASFUNC(chbmv)
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz) {
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size) {
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *x, *y;
|
||||
FLOAT alpha[] = {1.0, 1.0};
|
||||
FLOAT beta [] = {0.0, 0.0};
|
||||
blasint k = 1;
|
||||
char uplo='L';
|
||||
blasint m, i, j;
|
||||
blasint inc_x=1, inc_y=1;
|
||||
int loops = 1;
|
||||
int l;
|
||||
char *p;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCY"))) inc_y = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;
|
||||
if ((p = getenv("OPENBLAS_K"))) k = atoi(p);
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = '%c' k = %d Inc_x = %d Inc_y = %d Loops = %d\n",
|
||||
from, to, step, uplo, k, inc_x, inc_y, loops);
|
||||
|
||||
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL) {
|
||||
fprintf(stderr,"Out of Memory!!\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL) {
|
||||
fprintf(stderr,"Out of Memory!!\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL) {
|
||||
fprintf(stderr,"Out of Memory!!\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
|
||||
for(m = from; m <= to; m += step) {
|
||||
|
||||
timeg=0;
|
||||
|
||||
fprintf(stderr, " %6dx%d : ", (int)m, (int)m);
|
||||
|
||||
for(j = 0; j < m; j++) {
|
||||
for(i = 0; i < m * COMPSIZE; i++) {
|
||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
for (l = 0; l < loops; l++) {
|
||||
|
||||
for (i = 0; i < m * COMPSIZE * abs(inc_x); i++) {
|
||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
for (i = 0; i < m * COMPSIZE * abs(inc_y); i++) {
|
||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
HBMV (&uplo, &m, &k, alpha, a, &m, x, &inc_x, beta, y, &inc_y );
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg += time1;
|
||||
|
||||
}
|
||||
|
||||
timeg /= loops;
|
||||
|
||||
fprintf(stderr, " %10.2f MFlops\n",
|
||||
COMPSIZE * COMPSIZE * 2. * (double)(2 * k + 1) * (double)m / timeg * 1.e-6);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
|
@ -164,9 +164,9 @@ int main(int argc, char *argv[]){
|
|||
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < m * COMPSIZE; i++){
|
||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
b[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
c[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
b[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
c[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -167,7 +167,7 @@ int main(int argc, char *argv[]){
|
|||
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < m * COMPSIZE; i++){
|
||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -163,9 +163,9 @@ int main(int argc, char *argv[]){
|
|||
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < m * COMPSIZE; i++){
|
||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
b[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
c[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
b[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
c[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -162,8 +162,8 @@ int main(int argc, char *argv[]){
|
|||
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < m * COMPSIZE; i++){
|
||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
c[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
c[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,207 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef HPMV
|
||||
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define HPMV BLASFUNC(zhpmv)
|
||||
#else
|
||||
#define HPMV BLASFUNC(chpmv)
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz) {
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size) {
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *x, *y;
|
||||
FLOAT alpha[] = {1.0, 1.0};
|
||||
FLOAT beta [] = {1.0, 1.0};
|
||||
char uplo='L';
|
||||
blasint m, i, j;
|
||||
blasint inc_x=1, inc_y=1;
|
||||
int loops = 1;
|
||||
int l;
|
||||
char *p;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCY"))) inc_y = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = '%c' Inc_x = %d Inc_y = %d Loops = %d\n", from, to, step,uplo,inc_x,inc_y,loops);
|
||||
|
||||
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL) {
|
||||
fprintf(stderr,"Out of Memory!!\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL) {
|
||||
fprintf(stderr,"Out of Memory!!\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL) {
|
||||
fprintf(stderr,"Out of Memory!!\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
|
||||
for(m = from; m <= to; m += step) {
|
||||
|
||||
timeg=0;
|
||||
|
||||
fprintf(stderr, " %6dx%d : ", (int)m, (int)m);
|
||||
|
||||
for(j = 0; j < m; j++) {
|
||||
for(i = 0; i < m * COMPSIZE; i++) {
|
||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
for (l = 0; l < loops; l++) {
|
||||
|
||||
for (i = 0; i < m * COMPSIZE * abs(inc_x); i++) {
|
||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
for (i = 0; i < m * COMPSIZE * abs(inc_y); i++) {
|
||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
HPMV (&uplo, &m, alpha, a, x, &inc_x, beta, y, &inc_y );
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg += time1;
|
||||
|
||||
}
|
||||
|
||||
timeg /= loops;
|
||||
|
||||
fprintf(stderr, " %10.2f MFlops\n",
|
||||
COMPSIZE * COMPSIZE * 2. * (double)m * (double)m / timeg * 1.e-6);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
|
@ -186,7 +186,7 @@ int main(int argc, char *argv[]){
|
|||
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < m * COMPSIZE; i++){
|
||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -194,7 +194,7 @@ int main(int argc, char *argv[]){
|
|||
|
||||
for (j = 0; j < m; ++j) {
|
||||
for (i = 0; i < m * COMPSIZE; ++i) {
|
||||
b[i] += a[i + j * m * COMPSIZE];
|
||||
b[i] += a[(long)i + (long)j * (long)m * COMPSIZE];
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -170,46 +170,46 @@ int main(int argc, char *argv[]){
|
|||
#ifndef COMPLEX
|
||||
if (uplos & 1) {
|
||||
for (j = 0; j < m; j++) {
|
||||
for(i = 0; i < j; i++) a[i + j * m] = 0.;
|
||||
a[j + j * m] = ((double) rand() / (double) RAND_MAX) + 8.;
|
||||
for(i = j + 1; i < m; i++) a[i + j * m] = ((double) rand() / (double) RAND_MAX) - 0.5;
|
||||
for(i = 0; i < j; i++) a[(long)i + (long)j * (long)m] = 0.;
|
||||
a[(long)j + (long)j * (long)m] = ((double) rand() / (double) RAND_MAX) + 8.;
|
||||
for(i = j + 1; i < m; i++) a[(long)i + (long)j * (long)m] = ((double) rand() / (double) RAND_MAX) - 0.5;
|
||||
}
|
||||
} else {
|
||||
for (j = 0; j < m; j++) {
|
||||
for(i = 0; i < j; i++) a[i + j * m] = ((double) rand() / (double) RAND_MAX) - 0.5;
|
||||
a[j + j * m] = ((double) rand() / (double) RAND_MAX) + 8.;
|
||||
for(i = j + 1; i < m; i++) a[i + j * m] = 0.;
|
||||
for(i = 0; i < j; i++) a[(long)i + (long)j * (long)m] = ((double) rand() / (double) RAND_MAX) - 0.5;
|
||||
a[(long)j + (long)j * (long)m] = ((double) rand() / (double) RAND_MAX) + 8.;
|
||||
for(i = j + 1; i < m; i++) a[(long)i + (long)j * (long)m] = 0.;
|
||||
}
|
||||
}
|
||||
#else
|
||||
if (uplos & 1) {
|
||||
for (j = 0; j < m; j++) {
|
||||
for(i = 0; i < j; i++) {
|
||||
a[(i + j * m) * 2 + 0] = 0.;
|
||||
a[(i + j * m) * 2 + 1] = 0.;
|
||||
a[((long)i + (long)j * (long)m) * 2 + 0] = 0.;
|
||||
a[((long)i + (long)j * (long)m) * 2 + 1] = 0.;
|
||||
}
|
||||
|
||||
a[(j + j * m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) + 8.;
|
||||
a[(j + j * m) * 2 + 1] = 0.;
|
||||
a[((long)j + (long)j * (long)m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) + 8.;
|
||||
a[((long)j + (long)j * (long)m) * 2 + 1] = 0.;
|
||||
|
||||
for(i = j + 1; i < m; i++) {
|
||||
a[(i + j * m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) - 0.5;
|
||||
a[(i + j * m) * 2 + 1] = ((double) rand() / (double) RAND_MAX) - 0.5;
|
||||
a[((long)i + (long)j * (long)m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) - 0.5;
|
||||
a[((long)i + (long)j * (long)m) * 2 + 1] = ((double) rand() / (double) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (j = 0; j < m; j++) {
|
||||
for(i = 0; i < j; i++) {
|
||||
a[(i + j * m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) - 0.5;
|
||||
a[(i + j * m) * 2 + 1] = ((double) rand() / (double) RAND_MAX) - 0.5;
|
||||
a[((long)i + (long)j * (long)m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) - 0.5;
|
||||
a[((long)i + (long)j * (long)m) * 2 + 1] = ((double) rand() / (double) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
a[(j + j * m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) + 8.;
|
||||
a[(j + j * m) * 2 + 1] = 0.;
|
||||
a[((long)j + (long)j * (long)m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) + 8.;
|
||||
a[((long)j + (long)j * (long)m) * 2 + 1] = 0.;
|
||||
|
||||
for(i = j + 1; i < m; i++) {
|
||||
a[(i + j * m) * 2 + 0] = 0.;
|
||||
a[(i + j * m) * 2 + 1] = 0.;
|
||||
a[((long)i + (long)j * (long)m) * 2 + 0] = 0.;
|
||||
a[((long)i + (long)j * (long)m) * 2 + 1] = 0.;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,210 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
|
||||
GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
|
||||
THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
#undef ROTM
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define ROTM BLASFUNC(drotm)
|
||||
#else
|
||||
#define ROTM BLASFUNC(srotm)
|
||||
#endif
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz)
|
||||
{
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv) {
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size)
|
||||
{
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =
|
||||
shmget(IPC_PRIVATE, (size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT | 0600)) < 0) {
|
||||
printf("Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1) {
|
||||
printf("Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
|
||||
FLOAT *x, *y;
|
||||
// FLOAT result;
|
||||
blasint m, i;
|
||||
blasint inc_x = 1, inc_y = 1;
|
||||
FLOAT param[5] = {1, 2.0, 3.0, 4.0, 5.0};
|
||||
int loops = 1;
|
||||
int l;
|
||||
char *p;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1, timeg;
|
||||
|
||||
argc--;
|
||||
argv++;
|
||||
|
||||
if (argc > 0) {
|
||||
from = atol(*argv);
|
||||
argc--;
|
||||
argv++;
|
||||
}
|
||||
if (argc > 0) {
|
||||
to = MAX(atol(*argv), from);
|
||||
argc--;
|
||||
argv++;
|
||||
}
|
||||
if (argc > 0) {
|
||||
step = atol(*argv);
|
||||
argc--;
|
||||
argv++;
|
||||
}
|
||||
|
||||
if ((p = getenv("OPENBLAS_LOOPS")))
|
||||
loops = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCX")))
|
||||
inc_x = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCY")))
|
||||
inc_y = atoi(p);
|
||||
|
||||
fprintf(
|
||||
stderr,
|
||||
"From : %3d To : %3d Step = %3d Inc_x = %d Inc_y = %d Loops = %d\n",
|
||||
from, to, step, inc_x, inc_y, loops);
|
||||
|
||||
if ((x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) ==
|
||||
NULL) {
|
||||
fprintf(stderr, "Out of Memory!!\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if ((y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) ==
|
||||
NULL) {
|
||||
fprintf(stderr, "Out of Memory!!\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
|
||||
for (m = from; m <= to; m += step) {
|
||||
|
||||
timeg = 0;
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
for (i = 0; i < m * COMPSIZE * abs(inc_x); i++) {
|
||||
x[i] = ((FLOAT)rand() / (FLOAT)RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
for (i = 0; i < m * COMPSIZE * abs(inc_y); i++) {
|
||||
y[i] = ((FLOAT)rand() / (FLOAT)RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
for (l = 0; l < loops; l++) {
|
||||
gettimeofday(&start, (struct timezone *)0);
|
||||
|
||||
ROTM(&m, x, &inc_x, y, &inc_y, param);
|
||||
|
||||
gettimeofday(&stop, (struct timezone *)0);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) +
|
||||
(double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg += time1;
|
||||
}
|
||||
|
||||
timeg /= loops;
|
||||
|
||||
fprintf(stderr, " %10.2f MFlops %10.6f sec\n",
|
||||
COMPSIZE * COMPSIZE * 6. * (double)m / timeg * 1.e-6, timeg);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,198 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef SPR
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define SPR BLASFUNC(dspr)
|
||||
#else
|
||||
#define SPR BLASFUNC(sspr)
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a,*c;
|
||||
FLOAT alpha[] = {1.0, 1.0};
|
||||
blasint inc_x=1;
|
||||
int loops = 1;
|
||||
int l;
|
||||
char *p;
|
||||
|
||||
char uplo='U';
|
||||
|
||||
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;
|
||||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
|
||||
|
||||
blasint m, i, j;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = %c Inc_x = %d\n", from, to, step,uplo,inc_x);
|
||||
|
||||
|
||||
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( c = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops Time\n");
|
||||
|
||||
for(m = from; m <= to; m += step)
|
||||
{
|
||||
timeg=0;
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
|
||||
for (l=0; l<loops; l++)
|
||||
{
|
||||
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < m * COMPSIZE; i++){
|
||||
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
|
||||
c[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
SPR (&uplo, &m, alpha, c, &inc_x, a);
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg += time1;
|
||||
}
|
||||
|
||||
timeg /= loops;
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MBytes %10.6f sec\n",
|
||||
COMPSIZE * COMPSIZE * 1. * (double)m * (double)m / timeg * 1.e-6, timeg);
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
|
@ -0,0 +1,207 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef SPR2
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define SPR2 BLASFUNC(dspr2)
|
||||
#else
|
||||
#define SPR2 BLASFUNC(sspr2)
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a,*b,*c;
|
||||
FLOAT alpha[] = {1.0, 1.0};
|
||||
blasint inc_x=1,inc_y=1;
|
||||
int loops = 1;
|
||||
int l;
|
||||
char *p;
|
||||
|
||||
char uplo='U';
|
||||
|
||||
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;
|
||||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
|
||||
|
||||
blasint m, i, j;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = %c Inc_x = %d Inc_y = %d\n", from, to, step,uplo,inc_x,inc_y);
|
||||
|
||||
|
||||
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( b = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( c = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops Time\n");
|
||||
|
||||
for(m = from; m <= to; m += step)
|
||||
{
|
||||
timeg=0;
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
|
||||
for (l=0; l<loops; l++)
|
||||
{
|
||||
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < m * COMPSIZE; i++){
|
||||
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
||||
b[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
|
||||
c[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
SPR2 (&uplo, &m, alpha, c, &inc_x, b, &inc_y, a);
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg += time1;
|
||||
}
|
||||
|
||||
timeg /= loops;
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MBytes %10.6f sec\n",
|
||||
COMPSIZE * COMPSIZE * 2. * (double)m * (double)m / timeg * 1.e-6, timeg);
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
|
@ -175,9 +175,9 @@ int main(int argc, char *argv[]){
|
|||
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < m * COMPSIZE; i++){
|
||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
b[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
c[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
b[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
c[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -177,7 +177,7 @@ int main(int argc, char *argv[]){
|
|||
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < m * COMPSIZE; i++){
|
||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -173,11 +173,9 @@ int main(int argc, char *argv[]){
|
|||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops\n",
|
||||
COMPSIZE * COMPSIZE * 1. * (double)m * (double)m * (double)m / time1 * 1.e-6);
|
||||
COMPSIZE * COMPSIZE * 1. * (double)m * (double)m / time1 * 1.e-6);
|
||||
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,194 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef SYR2
|
||||
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define SYR2 BLASFUNC(dsyr2)
|
||||
#else
|
||||
#define SYR2 BLASFUNC(ssyr2)
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *x, *y, *a;
|
||||
FLOAT alpha[] = {1.0, 1.0};
|
||||
char *p;
|
||||
|
||||
char uplo='U';
|
||||
|
||||
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;
|
||||
|
||||
blasint m, i, j;
|
||||
blasint inc_x= 1;
|
||||
blasint inc_y= 1;
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = %c Inc_x = %d Inc_y = %d\n", from, to, step,uplo,inc_x,inc_y);
|
||||
|
||||
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
|
||||
for(m = from; m <= to; m += step)
|
||||
{
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
|
||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < m * COMPSIZE; i++){
|
||||
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
SYR2 (&uplo, &m, alpha, x, &inc_x, y, &inc_y, a, &m );
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops\n",
|
||||
COMPSIZE * COMPSIZE * 2. * (double)m * (double)m / time1 * 1.e-6);
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
|
@ -175,9 +175,9 @@ int main(int argc, char *argv[]){
|
|||
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < m * COMPSIZE; i++){
|
||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
b[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
c[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
b[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
c[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -172,8 +172,8 @@ int main(int argc, char *argv[]){
|
|||
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < m * COMPSIZE; i++){
|
||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
c[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
c[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -175,8 +175,8 @@ int main(int argc, char *argv[]){
|
|||
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < m * COMPSIZE; i++){
|
||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
b[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
b[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,172 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
#undef TRMV
|
||||
|
||||
#ifndef COMPLEX
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define TRMV BLASFUNC(dtrmv)
|
||||
#else
|
||||
#define TRMV BLASFUNC(strmv)
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define TRMV BLASFUNC(ztrmv)
|
||||
#else
|
||||
#define TRMV BLASFUNC(ctrmv)
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size)
|
||||
{
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1) {
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
|
||||
FLOAT *a, *x;
|
||||
char *p;
|
||||
|
||||
char uplo ='U';
|
||||
char trans='N';
|
||||
char diag ='U';
|
||||
|
||||
int loops = 1;
|
||||
int l;
|
||||
blasint inc_x=1;
|
||||
|
||||
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;
|
||||
if ((p = getenv("OPENBLAS_TRANS"))) trans=*p;
|
||||
if ((p = getenv("OPENBLAS_DIAG"))) diag=*p;
|
||||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
|
||||
|
||||
blasint n, i, j;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timespec start = { 0, 0 }, stop = { 0, 0 };
|
||||
double time1, timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = %c Trans = %c Diag = %c Loops=%d Inc_x=%d\n", from,
|
||||
to, step, uplo, trans, diag, loops, inc_x);
|
||||
|
||||
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL) {
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
|
||||
for(n = from; n <= to; n += step) {
|
||||
timeg=0;
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)n);
|
||||
for(j = 0; j < n; j++) {
|
||||
for(i = 0; i < n * COMPSIZE; i++) {
|
||||
a[(long)i + (long)j * (long)n * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < n * COMPSIZE * abs(inc_x); i++) {
|
||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
for (l = 0; l < loops; l++) {
|
||||
clock_gettime(CLOCK_REALTIME, &start);
|
||||
TRMV (&uplo, &trans, &diag, &n, a, &n, x, &inc_x);
|
||||
clock_gettime(CLOCK_REALTIME, &stop);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_nsec - start.tv_nsec)) / 1.e9;
|
||||
timeg += time1;
|
||||
}
|
||||
|
||||
timeg /= loops;
|
||||
fprintf(stderr, " %10.2f MFlops %12.9f sec\n",
|
||||
COMPSIZE * COMPSIZE * 1. * (double)n * (double)n / timeg / 1.e6, timeg);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
|
@ -191,8 +191,8 @@ int main(int argc, char *argv[]){
|
|||
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < m * COMPSIZE; i++){
|
||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
b[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
b[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,222 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include <time.h>
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef GEMV
|
||||
#undef TRSV
|
||||
|
||||
#ifndef COMPLEX
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define TRSV BLASFUNC(dtrsv)
|
||||
#else
|
||||
#define TRSV BLASFUNC(strsv)
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define TRSV BLASFUNC(ztrsv)
|
||||
#else
|
||||
#define TRSV BLASFUNC(ctrsv)
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *x;
|
||||
blasint n = 0, i, j;
|
||||
blasint inc_x=1;
|
||||
int loops = 1;
|
||||
int l;
|
||||
char *p;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timespec time_start, time_end;
|
||||
time_t seconds = 0;
|
||||
|
||||
double time1,timeg;
|
||||
long long nanos = 0;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
char uplo ='L';
|
||||
char transa = 'N';
|
||||
char diag ='U';
|
||||
|
||||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_TRANSA"))) transa=*p;
|
||||
if ((p = getenv("OPENBLAS_DIAG"))) diag=*p;
|
||||
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Transa = '%c' Inc_x = %d uplo=%c diag=%c loop = %d\n", from, to, step,transa,inc_x,
|
||||
uplo,diag,loops);
|
||||
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
fprintf(stderr, "============================================\n");
|
||||
|
||||
for(n = from; n <= to; n += step)
|
||||
{
|
||||
timeg=0;
|
||||
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * n * n * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * n * abs(inc_x) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
for(j = 0; j < n; j++){
|
||||
for(i = 0; i < n * COMPSIZE; i++){
|
||||
a[i + j * n * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
for(i = 0; i < n * COMPSIZE * abs(inc_x); i++){
|
||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
for(l =0;l< loops;l++){
|
||||
|
||||
clock_gettime(CLOCK_PROCESS_CPUTIME_ID,&time_start);
|
||||
|
||||
TRSV(&uplo,&transa,&diag,&n,a,&n,x,&inc_x);
|
||||
|
||||
clock_gettime(CLOCK_PROCESS_CPUTIME_ID,&time_end);
|
||||
nanos = time_end.tv_nsec - time_start.tv_nsec;
|
||||
seconds = time_end.tv_sec - time_start.tv_sec;
|
||||
|
||||
time1 = seconds + nanos /1.e9;
|
||||
timeg += time1;
|
||||
}
|
||||
|
||||
|
||||
timeg /= loops;
|
||||
long long muls = n*(n+1)/2.0;
|
||||
long long adds = (n - 1.0)*n/2.0;
|
||||
|
||||
fprintf(stderr, "%10d %10.2f MFlops %10.6f sec\n", n,(muls+adds) / timeg * 1.e-6, timeg);
|
||||
if(a != NULL){
|
||||
free(a);
|
||||
}
|
||||
|
||||
if( x != NULL){
|
||||
free(x);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -99,7 +99,7 @@ endif ()
|
|||
if (${CORE} STREQUAL "SKYLAKEX")
|
||||
if (NOT DYNAMIC_ARCH)
|
||||
if (NOT NO_AVX512)
|
||||
set (CCOMMON_OPT = "${CCOMMON_OPT} -march=skylake-avx512")
|
||||
set (CCOMMON_OPT "${CCOMMON_OPT} -march=skylake-avx512")
|
||||
endif ()
|
||||
endif ()
|
||||
endif ()
|
||||
|
|
|
@ -140,6 +140,16 @@ typedef struct {
|
|||
|
||||
} thread_status_t;
|
||||
|
||||
#if (__STDC_VERSION__ >= 201112L)
|
||||
#define atomic_load_queue(p) __atomic_load_n(p, __ATOMIC_RELAXED)
|
||||
#define atomic_store_queue(p, v) __atomic_store_n(p, v, __ATOMIC_RELAXED)
|
||||
#else
|
||||
#define atomic_load_queue(p) (blas_queue_t*)(*(volatile blas_queue_t**)(p))
|
||||
#define atomic_store_queue(p, v) (*(volatile blas_queue_t* volatile*)(p) = (v))
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
static thread_status_t thread_status[MAX_CPU_NUMBER] __attribute__((aligned(ATTRIBUTE_SIZE)));
|
||||
|
||||
#ifndef THREAD_TIMEOUT
|
||||
|
@ -312,20 +322,19 @@ blas_queue_t *tscq;
|
|||
|
||||
last_tick = (unsigned int)rpcc();
|
||||
|
||||
pthread_mutex_lock (&thread_status[cpu].lock);
|
||||
tscq=thread_status[cpu].queue;
|
||||
pthread_mutex_unlock (&thread_status[cpu].lock);
|
||||
tscq = atomic_load_queue(&thread_status[cpu].queue);
|
||||
|
||||
while(!tscq) {
|
||||
YIELDING;
|
||||
|
||||
if ((unsigned int)rpcc() - last_tick > thread_timeout) {
|
||||
|
||||
pthread_mutex_lock (&thread_status[cpu].lock);
|
||||
|
||||
if (!thread_status[cpu].queue) {
|
||||
if (!atomic_load_queue(&thread_status[cpu].queue)) {
|
||||
pthread_mutex_lock (&thread_status[cpu].lock);
|
||||
thread_status[cpu].status = THREAD_STATUS_SLEEP;
|
||||
while (thread_status[cpu].status == THREAD_STATUS_SLEEP) {
|
||||
while (thread_status[cpu].status == THREAD_STATUS_SLEEP &&
|
||||
!atomic_load_queue(&thread_status[cpu].queue)) {
|
||||
|
||||
#ifdef MONITOR
|
||||
main_status[cpu] = MAIN_SLEEPING;
|
||||
|
@ -333,19 +342,18 @@ blas_queue_t *tscq;
|
|||
|
||||
pthread_cond_wait(&thread_status[cpu].wakeup, &thread_status[cpu].lock);
|
||||
}
|
||||
pthread_mutex_unlock(&thread_status[cpu].lock);
|
||||
}
|
||||
|
||||
pthread_mutex_unlock(&thread_status[cpu].lock);
|
||||
|
||||
last_tick = (unsigned int)rpcc();
|
||||
}
|
||||
pthread_mutex_lock (&thread_status[cpu].lock);
|
||||
tscq=thread_status[cpu].queue;
|
||||
pthread_mutex_unlock (&thread_status[cpu].lock);
|
||||
|
||||
tscq = atomic_load_queue(&thread_status[cpu].queue);
|
||||
|
||||
}
|
||||
|
||||
queue = thread_status[cpu].queue;
|
||||
queue = atomic_load_queue(&thread_status[cpu].queue);
|
||||
MB;
|
||||
|
||||
if ((long)queue == -1) break;
|
||||
|
||||
|
@ -360,9 +368,7 @@ blas_queue_t *tscq;
|
|||
if (queue) {
|
||||
int (*routine)(blas_arg_t *, void *, void *, void *, void *, BLASLONG) = queue -> routine;
|
||||
|
||||
pthread_mutex_lock (&thread_status[cpu].lock);
|
||||
thread_status[cpu].queue = (blas_queue_t *)1;
|
||||
pthread_mutex_unlock (&thread_status[cpu].lock);
|
||||
atomic_store_queue(&thread_status[cpu].queue, (blas_queue_t *)1);
|
||||
|
||||
sa = queue -> sa;
|
||||
sb = queue -> sb;
|
||||
|
@ -442,13 +448,9 @@ blas_queue_t *tscq;
|
|||
|
||||
// arm: make sure all results are written out _before_
|
||||
// thread is marked as done and other threads use them
|
||||
WMB;
|
||||
MB;
|
||||
atomic_store_queue(&thread_status[cpu].queue, (blas_queue_t *)0);
|
||||
|
||||
pthread_mutex_lock (&thread_status[cpu].lock);
|
||||
thread_status[cpu].queue = (blas_queue_t * volatile) ((long)thread_status[cpu].queue & 0); /* Need a trick */
|
||||
pthread_mutex_unlock (&thread_status[cpu].lock);
|
||||
|
||||
WMB;
|
||||
|
||||
}
|
||||
|
||||
|
@ -566,7 +568,7 @@ int blas_thread_init(void){
|
|||
|
||||
for(i = 0; i < blas_num_threads - 1; i++){
|
||||
|
||||
thread_status[i].queue = (blas_queue_t *)NULL;
|
||||
atomic_store_queue(&thread_status[i].queue, (blas_queue_t *)0);
|
||||
thread_status[i].status = THREAD_STATUS_WAKEUP;
|
||||
|
||||
pthread_mutex_init(&thread_status[i].lock, NULL);
|
||||
|
@ -655,7 +657,8 @@ int exec_blas_async(BLASLONG pos, blas_queue_t *queue){
|
|||
if (queue -> mode & BLAS_NODE) {
|
||||
|
||||
do {
|
||||
while((thread_status[i].node != node || thread_status[i].queue) && (i < blas_num_threads - 1)) i ++;
|
||||
|
||||
while((thread_status[i].node != node || atomic_load_queue(&thread_status[i].queue)) && (i < blas_num_threads - 1)) i ++;
|
||||
|
||||
if (i < blas_num_threads - 1) break;
|
||||
|
||||
|
@ -669,36 +672,26 @@ int exec_blas_async(BLASLONG pos, blas_queue_t *queue){
|
|||
} while (1);
|
||||
|
||||
} else {
|
||||
pthread_mutex_lock (&thread_status[i].lock);
|
||||
tsiq = thread_status[i].queue;
|
||||
pthread_mutex_unlock (&thread_status[i].lock);
|
||||
tsiq = atomic_load_queue(&thread_status[i].queue);
|
||||
while(tsiq) {
|
||||
i ++;
|
||||
if (i >= blas_num_threads - 1) i = 0;
|
||||
pthread_mutex_lock (&thread_status[i].lock);
|
||||
tsiq = thread_status[i].queue;
|
||||
pthread_mutex_unlock (&thread_status[i].lock);
|
||||
tsiq = atomic_load_queue(&thread_status[i].queue);
|
||||
}
|
||||
}
|
||||
#else
|
||||
pthread_mutex_lock (&thread_status[i].lock);
|
||||
tsiq=thread_status[i].queue ;
|
||||
pthread_mutex_unlock (&thread_status[i].lock);
|
||||
tsiq = atomic_load_queue(&thread_status[i].queue);
|
||||
while(tsiq) {
|
||||
i ++;
|
||||
if (i >= blas_num_threads - 1) i = 0;
|
||||
pthread_mutex_lock (&thread_status[i].lock);
|
||||
tsiq=thread_status[i].queue ;
|
||||
pthread_mutex_unlock (&thread_status[i].lock);
|
||||
tsiq = atomic_load_queue(&thread_status[i].queue);
|
||||
}
|
||||
#endif
|
||||
|
||||
queue -> assigned = i;
|
||||
WMB;
|
||||
pthread_mutex_lock (&thread_status[i].lock);
|
||||
thread_status[i].queue = queue;
|
||||
pthread_mutex_unlock (&thread_status[i].lock);
|
||||
WMB;
|
||||
MB;
|
||||
|
||||
atomic_store_queue(&thread_status[i].queue, queue);
|
||||
|
||||
queue = queue -> next;
|
||||
pos ++;
|
||||
|
@ -718,9 +711,7 @@ int exec_blas_async(BLASLONG pos, blas_queue_t *queue){
|
|||
|
||||
pos = current -> assigned;
|
||||
|
||||
pthread_mutex_lock (&thread_status[pos].lock);
|
||||
tspq=thread_status[pos].queue;
|
||||
pthread_mutex_unlock (&thread_status[pos].lock);
|
||||
tspq = atomic_load_queue(&thread_status[pos].queue);
|
||||
|
||||
if ((BLASULONG)tspq > 1) {
|
||||
pthread_mutex_lock (&thread_status[pos].lock);
|
||||
|
@ -752,24 +743,20 @@ int exec_blas_async_wait(BLASLONG num, blas_queue_t *queue){
|
|||
|
||||
while ((num > 0) && queue) {
|
||||
|
||||
pthread_mutex_lock(&thread_status[queue->assigned].lock);
|
||||
tsqq=thread_status[queue -> assigned].queue;
|
||||
pthread_mutex_unlock(&thread_status[queue->assigned].lock);
|
||||
tsqq = atomic_load_queue(&thread_status[queue->assigned].queue);
|
||||
|
||||
|
||||
while(tsqq) {
|
||||
YIELDING;
|
||||
pthread_mutex_lock(&thread_status[queue->assigned].lock);
|
||||
tsqq=thread_status[queue -> assigned].queue;
|
||||
pthread_mutex_unlock(&thread_status[queue->assigned].lock);
|
||||
|
||||
|
||||
tsqq = atomic_load_queue(&thread_status[queue->assigned].queue);
|
||||
};
|
||||
|
||||
queue = queue -> next;
|
||||
num --;
|
||||
}
|
||||
|
||||
MB;
|
||||
|
||||
#ifdef SMP_DEBUG
|
||||
fprintf(STDERR, "Done.\n\n");
|
||||
#endif
|
||||
|
@ -880,7 +867,7 @@ void goto_set_num_threads(int num_threads) {
|
|||
|
||||
for(i = blas_num_threads - 1; i < num_threads - 1; i++){
|
||||
|
||||
thread_status[i].queue = (blas_queue_t *)NULL;
|
||||
atomic_store_queue(&thread_status[i].queue, (blas_queue_t *)0);
|
||||
thread_status[i].status = THREAD_STATUS_WAKEUP;
|
||||
|
||||
pthread_mutex_init(&thread_status[i].lock, NULL);
|
||||
|
@ -971,12 +958,11 @@ int BLASFUNC(blas_thread_shutdown)(void){
|
|||
|
||||
for (i = 0; i < blas_num_threads - 1; i++) {
|
||||
|
||||
|
||||
pthread_mutex_lock (&thread_status[i].lock);
|
||||
|
||||
thread_status[i].queue = (blas_queue_t *)-1;
|
||||
|
||||
atomic_store_queue(&thread_status[i].queue, (blas_queue_t *)-1);
|
||||
thread_status[i].status = THREAD_STATUS_WAKEUP;
|
||||
|
||||
pthread_cond_signal (&thread_status[i].wakeup);
|
||||
|
||||
pthread_mutex_unlock(&thread_status[i].lock);
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
|
||||
extern gotoblas_t gotoblas_POWER6;
|
||||
extern gotoblas_t gotoblas_POWER8;
|
||||
#if (!defined C_GCC) || (GCC_VERSION >= 60000)
|
||||
#if (!defined __GNUC__) || ( __GNUC__ >= 6)
|
||||
extern gotoblas_t gotoblas_POWER9;
|
||||
#endif
|
||||
|
||||
|
@ -21,7 +21,7 @@ static char *corename[] = {
|
|||
char *gotoblas_corename(void) {
|
||||
if (gotoblas == &gotoblas_POWER6) return corename[1];
|
||||
if (gotoblas == &gotoblas_POWER8) return corename[2];
|
||||
#if (!defined C_GCC) || (GCC_VERSION >= 60000)
|
||||
#if (!defined __GNUC__) || ( __GNUC__ >= 6)
|
||||
if (gotoblas == &gotoblas_POWER9) return corename[3];
|
||||
#endif
|
||||
return corename[0];
|
||||
|
@ -33,7 +33,7 @@ static gotoblas_t *get_coretype(void) {
|
|||
return &gotoblas_POWER6;
|
||||
if (__builtin_cpu_is("power8"))
|
||||
return &gotoblas_POWER8;
|
||||
#if (!defined C_GCC) || (GCC_VERSION >= 60000)
|
||||
#if (!defined __GNUC__) || ( __GNUC__ >= 6)
|
||||
if (__builtin_cpu_is("power9"))
|
||||
return &gotoblas_POWER9;
|
||||
#endif
|
||||
|
@ -59,7 +59,7 @@ static gotoblas_t *force_coretype(char * coretype) {
|
|||
{
|
||||
case 1: return (&gotoblas_POWER6);
|
||||
case 2: return (&gotoblas_POWER8);
|
||||
#if (!defined C_GCC) || (GCC_VERSION >= 60000)
|
||||
#if (!defined __GNUC__) || ( __GNUC__ >= 6)
|
||||
case 3: return (&gotoblas_POWER9);
|
||||
#endif
|
||||
default: return NULL;
|
||||
|
|
|
@ -1,3 +1,7 @@
|
|||
ifeq ($(__BYTE_ORDER__),__ORDER_BIG_ENDIAN__)
|
||||
include $(KERNELDIR)/KERNEL.POWER8
|
||||
else
|
||||
|
||||
#SGEMM_BETA = ../generic/gemm_beta.c
|
||||
#DGEMM_BETA = ../generic/gemm_beta.c
|
||||
#CGEMM_BETA = ../generic/zgemm_beta.c
|
||||
|
@ -206,3 +210,5 @@ QCABS_KERNEL = ../generic/cabs.c
|
|||
#Dump kernel
|
||||
CGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c
|
||||
ZGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c
|
||||
|
||||
endif
|
||||
|
|
|
@ -13,7 +13,11 @@
|
|||
|
||||
PROLOGUE
|
||||
|
||||
#ifdef CONJ
|
||||
caxpyc_k:
|
||||
#else
|
||||
caxpy_k:
|
||||
#endif
|
||||
.LCF0:
|
||||
0: addis 2,12,.TOC.-.LCF0@ha
|
||||
addi 2,2,.TOC.-.LCF0@l
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
/*
|
||||
.file "icamax.c"
|
||||
.abiversion 2
|
||||
.section ".text"
|
||||
|
@ -5,6 +6,12 @@
|
|||
.p2align 4,,15
|
||||
.globl icamax_k
|
||||
.type icamax_k, @function
|
||||
*/
|
||||
#define ASSEMBLER
|
||||
#include "common.h"
|
||||
|
||||
PROLOGUE
|
||||
|
||||
icamax_k:
|
||||
.LCF0:
|
||||
0: addis 2,12,.TOC.-.LCF0@ha
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
/*
|
||||
.file "icamin.c"
|
||||
.abiversion 2
|
||||
.section ".text"
|
||||
|
@ -5,6 +6,12 @@
|
|||
.p2align 4,,15
|
||||
.globl icamin_k
|
||||
.type icamin_k, @function
|
||||
*/
|
||||
#define ASSEMBLER
|
||||
#include "common.h"
|
||||
|
||||
PROLOGUE
|
||||
|
||||
icamin_k:
|
||||
.LCF0:
|
||||
0: addis 2,12,.TOC.-.LCF0@ha
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
/*
|
||||
.file "isamax.c"
|
||||
.abiversion 2
|
||||
.section ".text"
|
||||
|
@ -5,6 +6,12 @@
|
|||
.p2align 4,,15
|
||||
.globl isamax_k
|
||||
.type isamax_k, @function
|
||||
*/
|
||||
#define ASSEMBLER
|
||||
#include "common.h"
|
||||
|
||||
PROLOGUE
|
||||
|
||||
isamax_k:
|
||||
.LCF0:
|
||||
0: addis 2,12,.TOC.-.LCF0@ha
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
/*
|
||||
.file "isamin.c"
|
||||
.abiversion 2
|
||||
.section ".text"
|
||||
|
@ -5,6 +6,12 @@
|
|||
.p2align 4,,15
|
||||
.globl isamin_k
|
||||
.type isamin_k, @function
|
||||
*/
|
||||
#define ASSEMBLER
|
||||
#include "common.h"
|
||||
|
||||
PROLOGUE
|
||||
|
||||
isamin_k:
|
||||
.LCF0:
|
||||
0: addis 2,12,.TOC.-.LCF0@ha
|
||||
|
|
|
@ -68,23 +68,14 @@ double sqrt(double);
|
|||
#define GETRF_FACTOR 1.00
|
||||
|
||||
|
||||
#if defined(USE_PTHREAD_LOCK)
|
||||
static pthread_mutex_t getrf_lock = PTHREAD_MUTEX_INITIALIZER;
|
||||
#elif defined(USE_PTHREAD_SPINLOCK)
|
||||
static pthread_spinlock_t getrf_lock = 0;
|
||||
#if (__STDC_VERSION__ >= 201112L)
|
||||
#define atomic_load_long(p) __atomic_load_n(p, __ATOMIC_RELAXED)
|
||||
#define atomic_store_long(p, v) __atomic_store_n(p, v, __ATOMIC_RELAXED)
|
||||
#else
|
||||
static BLASULONG getrf_lock = 0UL;
|
||||
#define atomic_load_long(p) (BLASLONG)(*(volatile BLASLONG*)(p))
|
||||
#define atomic_store_long(p, v) (*(volatile BLASLONG *)(p)) = (v)
|
||||
#endif
|
||||
|
||||
#if defined(USE_PTHREAD_LOCK)
|
||||
static pthread_mutex_t getrf_flag_lock = PTHREAD_MUTEX_INITIALIZER;
|
||||
#elif defined(USE_PTHREAD_SPINLOCK)
|
||||
static pthread_spinlock_t getrf_flag_lock = 0;
|
||||
#else
|
||||
static BLASULONG getrf_flag_lock = 0UL;
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
||||
static __inline BLASLONG FORMULA1(BLASLONG M, BLASLONG N, BLASLONG IS, BLASLONG BK, BLASLONG T) {
|
||||
|
@ -119,11 +110,7 @@ static void inner_basic_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *ra
|
|||
FLOAT *d = (FLOAT *)args -> b + (k + k * lda) * COMPSIZE;
|
||||
FLOAT *sbb = sb;
|
||||
|
||||
#if __STDC_VERSION__ >= 201112L
|
||||
_Atomic BLASLONG *flag = (_Atomic BLASLONG *)args -> d;
|
||||
#else
|
||||
volatile BLASLONG *flag = (volatile BLASLONG *)args -> d;
|
||||
#endif
|
||||
|
||||
blasint *ipiv = (blasint *)args -> c;
|
||||
|
||||
|
@ -180,7 +167,10 @@ static void inner_basic_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *ra
|
|||
}
|
||||
}
|
||||
|
||||
if ((js + REAL_GEMM_R >= n) && (mypos >= 0)) flag[mypos * CACHE_LINE_SIZE] = 0;
|
||||
if ((js + REAL_GEMM_R >= n) && (mypos >= 0)) {
|
||||
MB;
|
||||
atomic_store_long(&flag[mypos * CACHE_LINE_SIZE], 0);
|
||||
}
|
||||
|
||||
for (is = 0; is < m; is += GEMM_P){
|
||||
min_i = m - is;
|
||||
|
@ -201,14 +191,10 @@ static void inner_basic_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *ra
|
|||
/* Non blocking implementation */
|
||||
|
||||
typedef struct {
|
||||
#if __STDC_VERSION__ >= 201112L
|
||||
_Atomic
|
||||
#else
|
||||
volatile
|
||||
#endif
|
||||
BLASLONG working[MAX_CPU_NUMBER][CACHE_LINE_SIZE * DIVIDE_RATE];
|
||||
volatile BLASLONG working[MAX_CPU_NUMBER][CACHE_LINE_SIZE * DIVIDE_RATE];
|
||||
} job_t;
|
||||
|
||||
|
||||
#define ICOPY_OPERATION(M, N, A, LDA, X, Y, BUFFER) GEMM_ITCOPY(M, N, (FLOAT *)(A) + ((Y) + (X) * (LDA)) * COMPSIZE, LDA, BUFFER);
|
||||
#define OCOPY_OPERATION(M, N, A, LDA, X, Y, BUFFER) GEMM_ONCOPY(M, N, (FLOAT *)(A) + ((X) + (Y) * (LDA)) * COMPSIZE, LDA, BUFFER);
|
||||
|
||||
|
@ -246,11 +232,8 @@ static int inner_advanced_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *
|
|||
|
||||
blasint *ipiv = (blasint *)args -> c;
|
||||
BLASLONG jw;
|
||||
#if __STDC_VERSION__ >= 201112L
|
||||
_Atomic BLASLONG *flag = (_Atomic BLASLONG *)args -> d;
|
||||
#else
|
||||
volatile BLASLONG *flag = (volatile BLASLONG *)args -> d;
|
||||
#endif
|
||||
|
||||
if (args -> a == NULL) {
|
||||
TRSM_ILTCOPY(k, k, (FLOAT *)args -> b, lda, 0, sb);
|
||||
sbb = (FLOAT *)((((BLASULONG)(sb + k * k * COMPSIZE) + GEMM_ALIGN) & ~GEMM_ALIGN) + GEMM_OFFSET_B);
|
||||
|
@ -280,10 +263,9 @@ static int inner_advanced_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *
|
|||
#if 1
|
||||
{
|
||||
do {
|
||||
LOCK_COMMAND(&getrf_lock);
|
||||
jw = job[mypos].working[i][CACHE_LINE_SIZE * bufferside];
|
||||
UNLOCK_COMMAND(&getrf_lock);
|
||||
jw = atomic_load_long(&job[mypos].working[i][CACHE_LINE_SIZE * bufferside]);
|
||||
} while (jw);
|
||||
MB;
|
||||
}
|
||||
#else
|
||||
while (job[mypos].working[i][CACHE_LINE_SIZE * bufferside]) {};
|
||||
|
@ -326,21 +308,17 @@ static int inner_advanced_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *
|
|||
}
|
||||
MB;
|
||||
for (i = 0; i < args -> nthreads; i++) {
|
||||
LOCK_COMMAND(&getrf_lock);
|
||||
job[mypos].working[i][CACHE_LINE_SIZE * bufferside] = (BLASLONG)buffer[bufferside];
|
||||
UNLOCK_COMMAND(&getrf_lock);
|
||||
atomic_store_long(&job[mypos].working[i][CACHE_LINE_SIZE * bufferside], (BLASLONG)buffer[bufferside]);
|
||||
}
|
||||
}
|
||||
|
||||
LOCK_COMMAND(&getrf_flag_lock);
|
||||
flag[mypos * CACHE_LINE_SIZE] = 0;
|
||||
UNLOCK_COMMAND(&getrf_flag_lock);
|
||||
MB;
|
||||
atomic_store_long(&flag[mypos * CACHE_LINE_SIZE], 0);
|
||||
|
||||
if (m == 0) {
|
||||
MB;
|
||||
for (xxx = 0; xxx < DIVIDE_RATE; xxx++) {
|
||||
LOCK_COMMAND(&getrf_lock);
|
||||
job[mypos].working[mypos][CACHE_LINE_SIZE * xxx] = 0;
|
||||
UNLOCK_COMMAND(&getrf_lock);
|
||||
atomic_store_long(&job[mypos].working[mypos][CACHE_LINE_SIZE * xxx], 0);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -366,10 +344,9 @@ static int inner_advanced_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *
|
|||
if ((current != mypos) && (!is)) {
|
||||
#if 1
|
||||
do {
|
||||
LOCK_COMMAND(&getrf_lock);
|
||||
jw = job[current].working[mypos][CACHE_LINE_SIZE * bufferside];
|
||||
UNLOCK_COMMAND(&getrf_lock);
|
||||
} while (jw == 0);
|
||||
jw = atomic_load_long(&job[current].working[mypos][CACHE_LINE_SIZE * bufferside]);
|
||||
} while (jw == 0);
|
||||
MB;
|
||||
#else
|
||||
while(job[current].working[mypos][CACHE_LINE_SIZE * bufferside] == 0) {};
|
||||
#endif
|
||||
|
@ -381,9 +358,7 @@ static int inner_advanced_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *
|
|||
|
||||
MB;
|
||||
if (is + min_i >= m) {
|
||||
LOCK_COMMAND(&getrf_lock);
|
||||
job[current].working[mypos][CACHE_LINE_SIZE * bufferside] = 0;
|
||||
UNLOCK_COMMAND(&getrf_lock);
|
||||
atomic_store_long(&job[current].working[mypos][CACHE_LINE_SIZE * bufferside], 0);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -397,10 +372,9 @@ static int inner_advanced_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *
|
|||
for (xxx = 0; xxx < DIVIDE_RATE; xxx++) {
|
||||
#if 1
|
||||
do {
|
||||
LOCK_COMMAND(&getrf_lock);
|
||||
jw = job[mypos].working[i][CACHE_LINE_SIZE *xxx];
|
||||
UNLOCK_COMMAND(&getrf_lock);
|
||||
jw = atomic_load_long(&job[mypos].working[i][CACHE_LINE_SIZE *xxx]);
|
||||
} while(jw != 0);
|
||||
MB;
|
||||
#else
|
||||
while (job[mypos].working[i][CACHE_LINE_SIZE * xxx] ) {};
|
||||
#endif
|
||||
|
@ -443,12 +417,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
|
|||
#ifdef _MSC_VER
|
||||
BLASLONG flag[MAX_CPU_NUMBER * CACHE_LINE_SIZE];
|
||||
#else
|
||||
#if __STDC_VERSION__ >= 201112L
|
||||
_Atomic
|
||||
#else
|
||||
volatile
|
||||
#endif
|
||||
BLASLONG flag[MAX_CPU_NUMBER * CACHE_LINE_SIZE] __attribute__((aligned(128)));
|
||||
volatile BLASLONG flag[MAX_CPU_NUMBER * CACHE_LINE_SIZE] __attribute__((aligned(128)));
|
||||
#endif
|
||||
|
||||
#ifndef COMPLEX
|
||||
|
@ -543,7 +512,11 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
|
|||
if (width > mn - is - bk) width = mn - is - bk;
|
||||
}
|
||||
|
||||
if (num_cpu > 0) exec_blas_async_wait(num_cpu, &queue[0]);
|
||||
|
||||
if (num_cpu > 0) {
|
||||
WMB;
|
||||
exec_blas_async_wait(num_cpu, &queue[0]);
|
||||
}
|
||||
|
||||
mm = m - bk - is;
|
||||
nn = n - bk - is;
|
||||
|
@ -608,7 +581,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
|
|||
queue[num_cpu].sa = NULL;
|
||||
queue[num_cpu].sb = NULL;
|
||||
queue[num_cpu].next = &queue[num_cpu + 1];
|
||||
flag[num_cpu * CACHE_LINE_SIZE] = 1;
|
||||
atomic_store_long(&flag[num_cpu * CACHE_LINE_SIZE], 1);
|
||||
|
||||
num_cpu ++;
|
||||
|
||||
|
@ -637,6 +610,8 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
|
|||
if (num_cpu > 0) {
|
||||
queue[num_cpu - 1].next = NULL;
|
||||
|
||||
WMB;
|
||||
|
||||
exec_blas_async(0, &queue[0]);
|
||||
|
||||
inner_basic_thread(&newarg, NULL, range_n_mine, sa, sbb, -1);
|
||||
|
@ -647,14 +622,10 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
|
|||
|
||||
for (i = 0; i < num_cpu; i ++) {
|
||||
#if 1
|
||||
LOCK_COMMAND(&getrf_flag_lock);
|
||||
f=flag[i*CACHE_LINE_SIZE];
|
||||
UNLOCK_COMMAND(&getrf_flag_lock);
|
||||
while (f!=0) {
|
||||
LOCK_COMMAND(&getrf_flag_lock);
|
||||
f=flag[i*CACHE_LINE_SIZE];
|
||||
UNLOCK_COMMAND(&getrf_flag_lock);
|
||||
};
|
||||
do {
|
||||
f = atomic_load_long(&flag[i*CACHE_LINE_SIZE]);
|
||||
} while (f != 0);
|
||||
MB;
|
||||
#else
|
||||
while (flag[i*CACHE_LINE_SIZE]) {};
|
||||
#endif
|
||||
|
@ -719,12 +690,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
|
|||
BLASLONG range[MAX_CPU_NUMBER + 1];
|
||||
|
||||
BLASLONG width, nn, num_cpu;
|
||||
#if __STDC_VERSION__ >= 201112L
|
||||
_Atomic
|
||||
#else
|
||||
volatile
|
||||
#endif
|
||||
BLASLONG flag[MAX_CPU_NUMBER * CACHE_LINE_SIZE] __attribute__((aligned(128)));
|
||||
volatile BLASLONG flag[MAX_CPU_NUMBER * CACHE_LINE_SIZE] __attribute__((aligned(128)));
|
||||
|
||||
#ifndef COMPLEX
|
||||
#ifdef XDOUBLE
|
||||
|
@ -833,6 +799,8 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
|
|||
nn = n - bk - is;
|
||||
if (width > nn) width = nn;
|
||||
|
||||
WMB;
|
||||
|
||||
if (num_cpu > 1) exec_blas_async_wait(num_cpu - 1, &queue[1]);
|
||||
|
||||
range[0] = 0;
|
||||
|
@ -867,7 +835,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
|
|||
queue[num_cpu].sa = NULL;
|
||||
queue[num_cpu].sb = NULL;
|
||||
queue[num_cpu].next = &queue[num_cpu + 1];
|
||||
flag[num_cpu * CACHE_LINE_SIZE] = 1;
|
||||
atomic_store_long(&flag[num_cpu * CACHE_LINE_SIZE], 1);
|
||||
|
||||
num_cpu ++;
|
||||
}
|
||||
|
@ -882,6 +850,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
|
|||
range_n_new[0] = offset + is;
|
||||
range_n_new[1] = offset + is + bk;
|
||||
|
||||
WMB;
|
||||
if (num_cpu > 1) {
|
||||
|
||||
exec_blas_async(1, &queue[1]);
|
||||
|
@ -917,7 +886,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
|
|||
|
||||
#endif
|
||||
|
||||
for (i = 1; i < num_cpu; i ++) while (flag[i * CACHE_LINE_SIZE]) {};
|
||||
for (i = 1; i < num_cpu; i ++) while (atomic_load_long(&flag[i * CACHE_LINE_SIZE])) {};
|
||||
|
||||
TRSM_ILTCOPY(bk, bk, a + (is + is * lda) * COMPSIZE, lda, 0, sb);
|
||||
|
||||
|
|
|
@ -6,6 +6,7 @@ if (MSVC AND "${CMAKE_C_COMPILER_ID}" MATCHES Clang)
|
|||
else ()
|
||||
set(OpenBLAS_utest_src
|
||||
utest_main.c
|
||||
test_min.c
|
||||
test_amax.c
|
||||
test_ismin.c
|
||||
test_rotmg.c
|
||||
|
|
|
@ -11,7 +11,7 @@ UTESTBIN=openblas_utest
|
|||
|
||||
include $(TOPDIR)/Makefile.system
|
||||
|
||||
OBJS=utest_main.o test_amax.o test_ismin.o test_rotmg.o test_axpy.o test_dotu.o test_dsdot.o test_swap.o test_rot.o
|
||||
OBJS=utest_main.o test_min.o test_amax.o test_ismin.o test_rotmg.o test_axpy.o test_dotu.o test_dsdot.o test_swap.o test_rot.o
|
||||
#test_rot.o test_swap.o test_axpy.o test_dotu.o test_dsdot.o test_fork.o
|
||||
|
||||
ifneq ($(NO_LAPACK), 1)
|
||||
|
|
|
@ -40,6 +40,17 @@ CTEST(amax, samax){
|
|||
|
||||
te_max=BLASFUNC(samax)(&N, x, &inc);
|
||||
tr_max=3.3;
|
||||
|
||||
|
||||
ASSERT_DBL_NEAR_TOL((double)(tr_max), (double)(te_max), SINGLE_EPS);
|
||||
}
|
||||
|
||||
CTEST(amax, damax){
|
||||
blasint N=3, inc=1;
|
||||
double te_max=0.0, tr_max=0.0;
|
||||
double x[]={-1.1, 2.2, -3.3};
|
||||
|
||||
te_max=BLASFUNC(damax)(&N, x, &inc);
|
||||
tr_max=3.3;
|
||||
|
||||
ASSERT_DBL_NEAR_TOL((double)(tr_max), (double)(te_max), DOUBLE_EPS);
|
||||
}
|
||||
|
|
|
@ -0,0 +1,100 @@
|
|||
/*****************************************************************************
|
||||
Copyright (c) 2011-2016, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written
|
||||
permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
**********************************************************************************/
|
||||
|
||||
#include "openblas_utest.h"
|
||||
|
||||
CTEST(min, smin_negative){
|
||||
blasint N=3, inc=1;
|
||||
float te_min=0.0, tr_min=0.0;
|
||||
float x[]={-1.1, -2.2, -3.3};
|
||||
|
||||
te_min=BLASFUNC(smin)(&N, x, &inc);
|
||||
tr_min=-3.3;
|
||||
|
||||
ASSERT_DBL_NEAR_TOL((double)(tr_min), (double)(te_min), SINGLE_EPS);
|
||||
}
|
||||
|
||||
CTEST(min, dmin_positive){
|
||||
blasint N=3, inc=1;
|
||||
double te_min=0.0, tr_min=0.0;
|
||||
double x[]={1.1, 0.0, 3.3};
|
||||
|
||||
te_min=BLASFUNC(dmin)(&N, x, &inc);
|
||||
tr_min=0.0;
|
||||
|
||||
ASSERT_DBL_NEAR_TOL((double)(tr_min), (double)(te_min), DOUBLE_EPS);
|
||||
}
|
||||
|
||||
CTEST(min, smin_zero){
|
||||
blasint N=3, inc=1;
|
||||
float te_min=0.0, tr_min=0.0;
|
||||
float x[]={1.1, 2.2, 0.0};
|
||||
|
||||
te_min=BLASFUNC(smin)(&N, x, &inc);
|
||||
tr_min=0.0;
|
||||
|
||||
ASSERT_DBL_NEAR_TOL((double)(tr_min), (double)(te_min), SINGLE_EPS);
|
||||
}
|
||||
|
||||
CTEST(max, smax_negative){
|
||||
blasint N=3, inc=1;
|
||||
float te_max=0.0, tr_max=0.0;
|
||||
float x[]={-1.1, -2.2, -3.3};
|
||||
|
||||
te_max=BLASFUNC(smax)(&N, x, &inc);
|
||||
tr_max=-1.1;
|
||||
|
||||
ASSERT_DBL_NEAR_TOL((double)(tr_max), (double)(te_max), SINGLE_EPS);
|
||||
}
|
||||
|
||||
CTEST(max, dmax_positive){
|
||||
blasint N=3, inc=1;
|
||||
double te_max=0.0, tr_max=0.0;
|
||||
double x[]={1.1, 0.0, 3.3};
|
||||
|
||||
te_max=BLASFUNC(dmax)(&N, x, &inc);
|
||||
tr_max=3.3;
|
||||
|
||||
ASSERT_DBL_NEAR_TOL((double)(tr_max), (double)(te_max), DOUBLE_EPS);
|
||||
}
|
||||
|
||||
CTEST(max, smax_zero){
|
||||
blasint N=3, inc=1;
|
||||
float te_max=0.0, tr_max=0.0;
|
||||
float x[]={-1.1, -2.2, 0.0};
|
||||
|
||||
te_max=BLASFUNC(smax)(&N, x, &inc);
|
||||
tr_max=0.0;
|
||||
|
||||
ASSERT_DBL_NEAR_TOL((double)(tr_max), (double)(te_max), SINGLE_EPS);
|
||||
}
|
|
@ -50,6 +50,17 @@ CTEST(amax, samax){
|
|||
ASSERT_DBL_NEAR_TOL((double)(tr_max), (double)(te_max), SINGLE_EPS);
|
||||
}
|
||||
|
||||
CTEST(amax, damax){
|
||||
blasint N=3, inc=1;
|
||||
double te_max=0.0, tr_max=0.0;
|
||||
double x[]={-1.1, 2.2, -3.3};
|
||||
|
||||
te_max=BLASFUNC(damax)(&N, x, &inc);
|
||||
tr_max=3.3;
|
||||
|
||||
ASSERT_DBL_NEAR_TOL((double)(tr_max), (double)(te_max), DOUBLE_EPS);
|
||||
}
|
||||
|
||||
CTEST (drotmg,rotmg)
|
||||
{
|
||||
double te_d1, tr_d1;
|
||||
|
@ -508,9 +519,82 @@ CTEST(swap,cswap_inc_0)
|
|||
}
|
||||
}
|
||||
|
||||
CTEST(min, smin_negative){
|
||||
blasint N=3, inc=1;
|
||||
float te_min=0.0, tr_min=0.0;
|
||||
float x[]={-1.1, -2.2, -3.3};
|
||||
|
||||
te_min=BLASFUNC(smin)(&N, x, &inc);
|
||||
tr_min=-3.3;
|
||||
|
||||
ASSERT_DBL_NEAR_TOL((double)(tr_min), (double)(te_min), SINGLE_EPS);
|
||||
}
|
||||
|
||||
CTEST(min, dmin_positive){
|
||||
blasint N=3, inc=1;
|
||||
double te_min=0.0, tr_min=0.0;
|
||||
double x[]={1.1, 0.0, 3.3};
|
||||
|
||||
te_min=BLASFUNC(dmin)(&N, x, &inc);
|
||||
tr_min=0.0;
|
||||
|
||||
ASSERT_DBL_NEAR_TOL((double)(tr_min), (double)(te_min), DOUBLE_EPS);
|
||||
}
|
||||
|
||||
CTEST(min, smin_zero){
|
||||
blasint N=3, inc=1;
|
||||
float te_min=0.0, tr_min=0.0;
|
||||
float x[]={1.1, 2.2, 0.0};
|
||||
|
||||
te_min=BLASFUNC(smin)(&N, x, &inc);
|
||||
tr_min=0.0;
|
||||
|
||||
ASSERT_DBL_NEAR_TOL((double)(tr_min), (double)(te_min), SINGLE_EPS);
|
||||
}
|
||||
|
||||
CTEST(max, smax_negative){
|
||||
blasint N=3, inc=1;
|
||||
float te_max=0.0, tr_max=0.0;
|
||||
float x[]={-1.1, -2.2, -3.3};
|
||||
|
||||
te_max=BLASFUNC(smax)(&N, x, &inc);
|
||||
tr_max=-1.1;
|
||||
|
||||
ASSERT_DBL_NEAR_TOL((double)(tr_max), (double)(te_max), SINGLE_EPS);
|
||||
}
|
||||
|
||||
CTEST(max, dmax_positive){
|
||||
blasint N=3, inc=1;
|
||||
double te_max=0.0, tr_max=0.0;
|
||||
double x[]={1.1, 0.0, 3.3};
|
||||
|
||||
te_max=BLASFUNC(dmax)(&N, x, &inc);
|
||||
tr_max=3.3;
|
||||
|
||||
ASSERT_DBL_NEAR_TOL((double)(tr_max), (double)(te_max), DOUBLE_EPS);
|
||||
}
|
||||
|
||||
CTEST(max, smax_zero){
|
||||
blasint N=3, inc=1;
|
||||
float te_max=0.0, tr_max=0.0;
|
||||
float x[]={-1.1, -2.2, 0.0};
|
||||
|
||||
te_max=BLASFUNC(smax)(&N, x, &inc);
|
||||
tr_max=0.0;
|
||||
|
||||
ASSERT_DBL_NEAR_TOL((double)(tr_max), (double)(te_max), SINGLE_EPS);
|
||||
}
|
||||
|
||||
int main(int argc, const char ** argv){
|
||||
|
||||
CTEST_ADD(amax, samax);
|
||||
CTEST_ADD (amax, samax);
|
||||
CTEST_ADD (amax, damax);
|
||||
CTEST_ADD (min, smin_negative);
|
||||
CTEST_ADD (min, dmin_positive);
|
||||
CTEST_ADD (min, smin_zero);
|
||||
CTEST_ADD (max, smax_negative);
|
||||
CTEST_ADD (max, dmax_positive);
|
||||
CTEST_ADD (max, smax_zero);
|
||||
CTEST_ADD (drotmg,rotmg);
|
||||
CTEST_ADD (drotmg,rotmg_issue1452);
|
||||
CTEST_ADD (drotmg,rotmg_D1eqD2_X1eqX2);
|
||||
|
|
Loading…
Reference in New Issue