commit
9afc561be4
|
@ -1,4 +1,48 @@
|
||||||
OpenBLAS ChangeLog
|
OpenBLAS ChangeLog
|
||||||
|
====================================================================
|
||||||
|
Version 0.3.9
|
||||||
|
1-Mar-2020
|
||||||
|
|
||||||
|
common:
|
||||||
|
* Fixed a miscompilation of the GETRF functions with CMAKE
|
||||||
|
* Imported bugfix 390 from LAPACK (missing NaN propagation in xCOMBSSQ)
|
||||||
|
* The size of the memory buffer used for splitting GEMM tasks across
|
||||||
|
multiple threads can now be configured in the build system.
|
||||||
|
|
||||||
|
POWER:
|
||||||
|
* Fixed several compilation problems related to endianness
|
||||||
|
and ELF version on POWER8 and POWER9
|
||||||
|
* Fixed use of the absolute value IAMIN/IAMAX instead of IMIN/IMAX
|
||||||
|
* Fixed a race condition in the level3 blas code
|
||||||
|
|
||||||
|
MIPS64:
|
||||||
|
* Fixed use of the absoltute value IAMIN/IAMAX instead of IMIN/IMAX
|
||||||
|
|
||||||
|
ARMV7:
|
||||||
|
* Fixed a race condition in the level3 blas code
|
||||||
|
* Fixed compilation on Android
|
||||||
|
ARMV8:
|
||||||
|
* Added support for Ampere EMAG8180
|
||||||
|
* Added support for Neoverse N1
|
||||||
|
* Improved performance of the blas_lock function
|
||||||
|
* Fixed a race condition in the level3 blas code
|
||||||
|
* Fixed a performance regression on TSV110-based servers
|
||||||
|
|
||||||
|
x86_64:
|
||||||
|
* Fixed a long-standing error with undeclared register overwrites
|
||||||
|
in the DSCAL microkernel for HASWELL,SKYLAKEX and ZEN
|
||||||
|
* Fixed a long-standing bug in the SSE implementation of IAMAX
|
||||||
|
* Fixed a CMAKE build failure with DYNAMIC_ARCH
|
||||||
|
* Fixed cpu autodetection of Goldmont+, Cannon Lake and Ice Lake
|
||||||
|
* Fixed a compilation failure on OSX with compiler name containing dash
|
||||||
|
* Fixed compilation with MinGW on SkylakeX
|
||||||
|
* Improved speed of the AVX512 GEMM3M kernel on SkylakeX
|
||||||
|
* Added an AVX512 STRMM kernel for SkylakeX
|
||||||
|
* Improved GEMM performance on Haswell and Zen
|
||||||
|
|
||||||
|
zarch:
|
||||||
|
* fixed compilation of the DYNAMIC_ARCH code
|
||||||
|
|
||||||
====================================================================
|
====================================================================
|
||||||
Version 0.3.8
|
Version 0.3.8
|
||||||
9-Feb-2020
|
9-Feb-2020
|
||||||
|
|
|
@ -327,7 +327,6 @@ ifeq ($(C_COMPILER), GCC)
|
||||||
#Version tests for supporting specific features (MS_ABI, POWER9 intrinsics)
|
#Version tests for supporting specific features (MS_ABI, POWER9 intrinsics)
|
||||||
GCCVERSIONGTEQ4 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 4)
|
GCCVERSIONGTEQ4 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 4)
|
||||||
GCCVERSIONGT4 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \> 4)
|
GCCVERSIONGT4 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \> 4)
|
||||||
GCCVERSIONGT5 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \> 5)
|
|
||||||
GCCVERSIONGTEQ7 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 7)
|
GCCVERSIONGTEQ7 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 7)
|
||||||
GCCVERSIONGTEQ9 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 9)
|
GCCVERSIONGTEQ9 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 9)
|
||||||
GCCMINORVERSIONGTEQ7 := $(shell expr `$(CC) -dumpversion | cut -f2 -d.` \>= 7)
|
GCCMINORVERSIONGTEQ7 := $(shell expr `$(CC) -dumpversion | cut -f2 -d.` \>= 7)
|
||||||
|
@ -575,6 +574,7 @@ ifneq ($(C_COMPILER), GCC)
|
||||||
DYNAMIC_CORE += POWER9
|
DYNAMIC_CORE += POWER9
|
||||||
endif
|
endif
|
||||||
ifeq ($(C_COMPILER), GCC)
|
ifeq ($(C_COMPILER), GCC)
|
||||||
|
GCCVERSIONGT5 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \> 5)
|
||||||
ifeq ($(GCCVERSIONGT5), 1)
|
ifeq ($(GCCVERSIONGT5), 1)
|
||||||
DYNAMIC_CORE += POWER9
|
DYNAMIC_CORE += POWER9
|
||||||
else
|
else
|
||||||
|
|
|
@ -89,6 +89,7 @@ CORTEXA57
|
||||||
CORTEXA72
|
CORTEXA72
|
||||||
CORTEXA73
|
CORTEXA73
|
||||||
NEOVERSEN1
|
NEOVERSEN1
|
||||||
|
EMAG8180
|
||||||
FALKOR
|
FALKOR
|
||||||
THUNDERX
|
THUNDERX
|
||||||
THUNDERX2T99
|
THUNDERX2T99
|
||||||
|
|
|
@ -56,12 +56,16 @@ goto :: slinpack.goto dlinpack.goto clinpack.goto zlinpack.goto \
|
||||||
sgemm.goto dgemm.goto cgemm.goto zgemm.goto \
|
sgemm.goto dgemm.goto cgemm.goto zgemm.goto \
|
||||||
strmm.goto dtrmm.goto ctrmm.goto ztrmm.goto \
|
strmm.goto dtrmm.goto ctrmm.goto ztrmm.goto \
|
||||||
strsm.goto dtrsm.goto ctrsm.goto ztrsm.goto \
|
strsm.goto dtrsm.goto ctrsm.goto ztrsm.goto \
|
||||||
|
sspr.goto dspr.goto \
|
||||||
|
sspr2.goto dspr2.goto \
|
||||||
ssyr.goto dsyr.goto \
|
ssyr.goto dsyr.goto \
|
||||||
|
ssyr2.goto dsyr2.goto \
|
||||||
ssyrk.goto dsyrk.goto csyrk.goto zsyrk.goto \
|
ssyrk.goto dsyrk.goto csyrk.goto zsyrk.goto \
|
||||||
ssyr2k.goto dsyr2k.goto csyr2k.goto zsyr2k.goto \
|
ssyr2k.goto dsyr2k.goto csyr2k.goto zsyr2k.goto \
|
||||||
sger.goto dger.goto cger.goto zger.goto \
|
sger.goto dger.goto cger.goto zger.goto \
|
||||||
sdot.goto ddot.goto \
|
sdot.goto ddot.goto \
|
||||||
srot.goto drot.goto \
|
srot.goto drot.goto \
|
||||||
|
srotm.goto drotm.goto \
|
||||||
saxpy.goto daxpy.goto caxpy.goto zaxpy.goto \
|
saxpy.goto daxpy.goto caxpy.goto zaxpy.goto \
|
||||||
scopy.goto dcopy.goto ccopy.goto zcopy.goto \
|
scopy.goto dcopy.goto ccopy.goto zcopy.goto \
|
||||||
sswap.goto dswap.goto cswap.goto zswap.goto \
|
sswap.goto dswap.goto cswap.goto zswap.goto \
|
||||||
|
@ -69,10 +73,14 @@ goto :: slinpack.goto dlinpack.goto clinpack.goto zlinpack.goto \
|
||||||
sasum.goto dasum.goto casum.goto zasum.goto \
|
sasum.goto dasum.goto casum.goto zasum.goto \
|
||||||
ssymv.goto dsymv.goto csymv.goto zsymv.goto \
|
ssymv.goto dsymv.goto csymv.goto zsymv.goto \
|
||||||
chemv.goto zhemv.goto \
|
chemv.goto zhemv.goto \
|
||||||
|
chbmv.goto zhbmv.goto \
|
||||||
|
chpmv.goto zhpmv.goto \
|
||||||
chemm.goto zhemm.goto \
|
chemm.goto zhemm.goto \
|
||||||
cherk.goto zherk.goto \
|
cherk.goto zherk.goto \
|
||||||
cher2k.goto zher2k.goto \
|
cher2k.goto zher2k.goto \
|
||||||
sgemv.goto dgemv.goto cgemv.goto zgemv.goto \
|
sgemv.goto dgemv.goto cgemv.goto zgemv.goto \
|
||||||
|
strmv.goto dtrmv.goto ctrmv.goto ztrmv.goto \
|
||||||
|
strsv.goto dtrsv.goto ctrsv.goto ztrsv.goto \
|
||||||
sgeev.goto dgeev.goto cgeev.goto zgeev.goto \
|
sgeev.goto dgeev.goto cgeev.goto zgeev.goto \
|
||||||
sgesv.goto dgesv.goto cgesv.goto zgesv.goto \
|
sgesv.goto dgesv.goto cgesv.goto zgesv.goto \
|
||||||
sgetri.goto dgetri.goto cgetri.goto zgetri.goto \
|
sgetri.goto dgetri.goto cgetri.goto zgetri.goto \
|
||||||
|
@ -84,11 +92,15 @@ acml :: slinpack.acml dlinpack.acml clinpack.acml zlinpack.acml \
|
||||||
sgemm.acml dgemm.acml cgemm.acml zgemm.acml \
|
sgemm.acml dgemm.acml cgemm.acml zgemm.acml \
|
||||||
strmm.acml dtrmm.acml ctrmm.acml ztrmm.acml \
|
strmm.acml dtrmm.acml ctrmm.acml ztrmm.acml \
|
||||||
strsm.acml dtrsm.acml ctrsm.acml ztrsm.acml \
|
strsm.acml dtrsm.acml ctrsm.acml ztrsm.acml \
|
||||||
|
sspr.acml dspr.acml \
|
||||||
|
sspr2.acml dspr2.acml \
|
||||||
ssyr.acml dsyr.acml \
|
ssyr.acml dsyr.acml \
|
||||||
|
ssyr2.acml dsyr2.acml \
|
||||||
ssyrk.acml dsyrk.acml csyrk.acml zsyrk.acml \
|
ssyrk.acml dsyrk.acml csyrk.acml zsyrk.acml \
|
||||||
ssyr2k.acml dsyr2k.acml csyr2k.acml zsyr2k.acml \
|
ssyr2k.acml dsyr2k.acml csyr2k.acml zsyr2k.acml \
|
||||||
sger.acml dger.acml cger.acml zger.acml \
|
sger.acml dger.acml cger.acml zger.acml \
|
||||||
sdot.acml ddot.acml \
|
sdot.acml ddot.acml \
|
||||||
|
srotm.acml drotm.acml \
|
||||||
saxpy.acml daxpy.acml caxpy.acml zaxpy.acml \
|
saxpy.acml daxpy.acml caxpy.acml zaxpy.acml \
|
||||||
scopy.acml dcopy.acml ccopy.acml zcopy.acml \
|
scopy.acml dcopy.acml ccopy.acml zcopy.acml \
|
||||||
sswap.acml dswap.acml cswap.acml zswap.acml \
|
sswap.acml dswap.acml cswap.acml zswap.acml \
|
||||||
|
@ -96,10 +108,14 @@ acml :: slinpack.acml dlinpack.acml clinpack.acml zlinpack.acml \
|
||||||
sasum.acml dasum.acml casum.acml zasum.acml \
|
sasum.acml dasum.acml casum.acml zasum.acml \
|
||||||
ssymv.acml dsymv.acml csymv.acml zsymv.acml \
|
ssymv.acml dsymv.acml csymv.acml zsymv.acml \
|
||||||
chemv.acml zhemv.acml \
|
chemv.acml zhemv.acml \
|
||||||
|
chbmv.acml zhbmv.acml \
|
||||||
|
chpmv.acml zhpmv.acml \
|
||||||
chemm.acml zhemm.acml \
|
chemm.acml zhemm.acml \
|
||||||
cherk.acml zherk.acml \
|
cherk.acml zherk.acml \
|
||||||
cher2k.acml zher2k.acml \
|
cher2k.acml zher2k.acml \
|
||||||
sgemv.acml dgemv.acml cgemv.acml zgemv.acml \
|
sgemv.acml dgemv.acml cgemv.acml zgemv.acml \
|
||||||
|
strmv.acml dtrmv.acml ctrmv.acml ztrmv.acml \
|
||||||
|
strsv.acml dtrsv.acml ctrsv.acml ztrsv.acml \
|
||||||
sgeev.acml dgeev.acml cgeev.acml zgeev.acml \
|
sgeev.acml dgeev.acml cgeev.acml zgeev.acml \
|
||||||
sgesv.acml dgesv.acml cgesv.acml zgesv.acml \
|
sgesv.acml dgesv.acml cgesv.acml zgesv.acml \
|
||||||
sgetri.acml dgetri.acml cgetri.acml zgetri.acml \
|
sgetri.acml dgetri.acml cgetri.acml zgetri.acml \
|
||||||
|
@ -111,11 +127,15 @@ atlas :: slinpack.atlas dlinpack.atlas clinpack.atlas zlinpack.atlas \
|
||||||
sgemm.atlas dgemm.atlas cgemm.atlas zgemm.atlas \
|
sgemm.atlas dgemm.atlas cgemm.atlas zgemm.atlas \
|
||||||
strmm.atlas dtrmm.atlas ctrmm.atlas ztrmm.atlas \
|
strmm.atlas dtrmm.atlas ctrmm.atlas ztrmm.atlas \
|
||||||
strsm.atlas dtrsm.atlas ctrsm.atlas ztrsm.atlas \
|
strsm.atlas dtrsm.atlas ctrsm.atlas ztrsm.atlas \
|
||||||
ssyr.goto dsyr.atlas \
|
sspr.atlas dspr.atlas \
|
||||||
|
sspr2.atlas dspr2.atlas \
|
||||||
|
ssyr.atlas dsyr.atlas \
|
||||||
|
ssyr2.atlas dsyr2.atlas \
|
||||||
ssyrk.atlas dsyrk.atlas csyrk.atlas zsyrk.atlas \
|
ssyrk.atlas dsyrk.atlas csyrk.atlas zsyrk.atlas \
|
||||||
ssyr2k.atlas dsyr2k.atlas csyr2k.atlas zsyr2k.atlas \
|
ssyr2k.atlas dsyr2k.atlas csyr2k.atlas zsyr2k.atlas \
|
||||||
sger.atlas dger.atlas cger.atlas zger.atlas\
|
sger.atlas dger.atlas cger.atlas zger.atlas\
|
||||||
sdot.atlas ddot.atlas \
|
sdot.atlas ddot.atlas \
|
||||||
|
srotm.atlas drotm.atlas \
|
||||||
saxpy.atlas daxpy.atlas caxpy.atlas zaxpy.atlas \
|
saxpy.atlas daxpy.atlas caxpy.atlas zaxpy.atlas \
|
||||||
scopy.atlas dcopy.atlas ccopy.atlas zcopy.atlas \
|
scopy.atlas dcopy.atlas ccopy.atlas zcopy.atlas \
|
||||||
sswap.atlas dswap.atlas cswap.atlas zswap.atlas \
|
sswap.atlas dswap.atlas cswap.atlas zswap.atlas \
|
||||||
|
@ -123,11 +143,15 @@ atlas :: slinpack.atlas dlinpack.atlas clinpack.atlas zlinpack.atlas \
|
||||||
sasum.atlas dasum.atlas casum.atlas zasum.atlas \
|
sasum.atlas dasum.atlas casum.atlas zasum.atlas \
|
||||||
ssymv.atlas dsymv.atlas csymv.atlas zsymv.atlas \
|
ssymv.atlas dsymv.atlas csymv.atlas zsymv.atlas \
|
||||||
chemv.atlas zhemv.atlas \
|
chemv.atlas zhemv.atlas \
|
||||||
|
chbmv.atlas zhbmv.atlas \
|
||||||
|
chpmv.atlas zhpmv.atlas \
|
||||||
chemm.acml zhemm.acml \
|
chemm.acml zhemm.acml \
|
||||||
chemm.atlas zhemm.atlas \
|
chemm.atlas zhemm.atlas \
|
||||||
cherk.atlas zherk.atlas \
|
cherk.atlas zherk.atlas \
|
||||||
cher2k.atlas zher2k.atlas \
|
cher2k.atlas zher2k.atlas \
|
||||||
sgemv.atlas dgemv.atlas cgemv.atlas zgemv.atlas \
|
sgemv.atlas dgemv.atlas cgemv.atlas zgemv.atlas \
|
||||||
|
strmv.atlas dtrmv.atlas ctrmv.atlas ztrmv.atlas \
|
||||||
|
strsv.atlas dtrsv.atlas ctrsv.atlas ztrsv.atlas \
|
||||||
sgeev.atlas dgeev.atlas cgeev.atlas zgeev.atlas \
|
sgeev.atlas dgeev.atlas cgeev.atlas zgeev.atlas \
|
||||||
sgesv.atlas dgesv.atlas cgesv.atlas zgesv.atlas \
|
sgesv.atlas dgesv.atlas cgesv.atlas zgesv.atlas \
|
||||||
sgetri.atlas dgetri.atlas cgetri.atlas zgetri.atlas \
|
sgetri.atlas dgetri.atlas cgetri.atlas zgetri.atlas \
|
||||||
|
@ -139,11 +163,15 @@ mkl :: slinpack.mkl dlinpack.mkl clinpack.mkl zlinpack.mkl \
|
||||||
sgemm.mkl dgemm.mkl cgemm.mkl zgemm.mkl \
|
sgemm.mkl dgemm.mkl cgemm.mkl zgemm.mkl \
|
||||||
strmm.mkl dtrmm.mkl ctrmm.mkl ztrmm.mkl \
|
strmm.mkl dtrmm.mkl ctrmm.mkl ztrmm.mkl \
|
||||||
strsm.mkl dtrsm.mkl ctrsm.mkl ztrsm.mkl \
|
strsm.mkl dtrsm.mkl ctrsm.mkl ztrsm.mkl \
|
||||||
|
sspr.mkl dspr.mkl \
|
||||||
|
sspr2.mkl dspr2.mkl \
|
||||||
ssyr.mkl dsyr.mkl \
|
ssyr.mkl dsyr.mkl \
|
||||||
|
ssyr2.mkl dsyr2.mkl \
|
||||||
ssyrk.mkl dsyrk.mkl csyrk.mkl zsyrk.mkl \
|
ssyrk.mkl dsyrk.mkl csyrk.mkl zsyrk.mkl \
|
||||||
ssyr2k.mkl dsyr2k.mkl csyr2k.mkl zsyr2k.mkl \
|
ssyr2k.mkl dsyr2k.mkl csyr2k.mkl zsyr2k.mkl \
|
||||||
sger.mkl dger.mkl cger.mkl zger.mkl \
|
sger.mkl dger.mkl cger.mkl zger.mkl \
|
||||||
sdot.mkl ddot.mkl \
|
sdot.mkl ddot.mkl \
|
||||||
|
srotm.mkl drotm.mkl \
|
||||||
saxpy.mkl daxpy.mkl caxpy.mkl zaxpy.mkl \
|
saxpy.mkl daxpy.mkl caxpy.mkl zaxpy.mkl \
|
||||||
scopy.mkl dcopy.mkl ccopy.mkl zcopy.mkl \
|
scopy.mkl dcopy.mkl ccopy.mkl zcopy.mkl \
|
||||||
sswap.mkl dswap.mkl cswap.mkl zswap.mkl \
|
sswap.mkl dswap.mkl cswap.mkl zswap.mkl \
|
||||||
|
@ -151,10 +179,14 @@ mkl :: slinpack.mkl dlinpack.mkl clinpack.mkl zlinpack.mkl \
|
||||||
sasum.mkl dasum.mkl casum.mkl zasum.mkl \
|
sasum.mkl dasum.mkl casum.mkl zasum.mkl \
|
||||||
ssymv.mkl dsymv.mkl csymv.mkl zsymv.mkl \
|
ssymv.mkl dsymv.mkl csymv.mkl zsymv.mkl \
|
||||||
chemv.mkl zhemv.mkl \
|
chemv.mkl zhemv.mkl \
|
||||||
|
chbmv.mkl zhbmv.mkl \
|
||||||
|
chpmv.mkl zhpmv.mkl \
|
||||||
chemm.mkl zhemm.mkl \
|
chemm.mkl zhemm.mkl \
|
||||||
cherk.mkl zherk.mkl \
|
cherk.mkl zherk.mkl \
|
||||||
cher2k.mkl zher2k.mkl \
|
cher2k.mkl zher2k.mkl \
|
||||||
sgemv.mkl dgemv.mkl cgemv.mkl zgemv.mkl \
|
sgemv.mkl dgemv.mkl cgemv.mkl zgemv.mkl \
|
||||||
|
strmv.mkl dtrmv.mkl ctrmv.mkl ztrmv.mkl \
|
||||||
|
strsv.mkl dtrsv.mkl ctrsv.mkl ztrsv.mkl \
|
||||||
sgeev.mkl dgeev.mkl cgeev.mkl zgeev.mkl \
|
sgeev.mkl dgeev.mkl cgeev.mkl zgeev.mkl \
|
||||||
sgesv.mkl dgesv.mkl cgesv.mkl zgesv.mkl \
|
sgesv.mkl dgesv.mkl cgesv.mkl zgesv.mkl \
|
||||||
sgetri.mkl dgetri.mkl cgetri.mkl zgetri.mkl \
|
sgetri.mkl dgetri.mkl cgetri.mkl zgetri.mkl \
|
||||||
|
@ -166,12 +198,16 @@ else
|
||||||
goto :: sgemm.goto dgemm.goto cgemm.goto zgemm.goto \
|
goto :: sgemm.goto dgemm.goto cgemm.goto zgemm.goto \
|
||||||
strmm.goto dtrmm.goto ctrmm.goto ztrmm.goto \
|
strmm.goto dtrmm.goto ctrmm.goto ztrmm.goto \
|
||||||
strsm.goto dtrsm.goto ctrsm.goto ztrsm.goto \
|
strsm.goto dtrsm.goto ctrsm.goto ztrsm.goto \
|
||||||
|
sspr.goto dspr.goto \
|
||||||
|
sspr2.goto dspr2.goto \
|
||||||
ssyr.goto dsyr.goto \
|
ssyr.goto dsyr.goto \
|
||||||
|
ssyr2.goto dsyr2.goto \
|
||||||
ssyrk.goto dsyrk.goto csyrk.goto zsyrk.goto \
|
ssyrk.goto dsyrk.goto csyrk.goto zsyrk.goto \
|
||||||
ssyr2k.goto dsyr2k.goto csyr2k.goto zsyr2k.goto \
|
ssyr2k.goto dsyr2k.goto csyr2k.goto zsyr2k.goto \
|
||||||
sger.goto dger.goto cger.goto zger.goto \
|
sger.goto dger.goto cger.goto zger.goto \
|
||||||
sdot.goto ddot.goto cdot.goto zdot.goto \
|
sdot.goto ddot.goto cdot.goto zdot.goto \
|
||||||
srot.goto drot.goto \
|
srot.goto drot.goto \
|
||||||
|
srotm.goto drotm.goto \
|
||||||
saxpy.goto daxpy.goto caxpy.goto zaxpy.goto \
|
saxpy.goto daxpy.goto caxpy.goto zaxpy.goto \
|
||||||
scopy.goto dcopy.goto ccopy.goto zcopy.goto \
|
scopy.goto dcopy.goto ccopy.goto zcopy.goto \
|
||||||
sswap.goto dswap.goto cswap.goto zswap.goto \
|
sswap.goto dswap.goto cswap.goto zswap.goto \
|
||||||
|
@ -179,10 +215,14 @@ goto :: sgemm.goto dgemm.goto cgemm.goto zgemm.goto \
|
||||||
sasum.goto dasum.goto casum.goto zasum.goto \
|
sasum.goto dasum.goto casum.goto zasum.goto \
|
||||||
ssymv.goto dsymv.goto \
|
ssymv.goto dsymv.goto \
|
||||||
chemv.goto zhemv.goto \
|
chemv.goto zhemv.goto \
|
||||||
|
chbmv.goto zhbmv.goto \
|
||||||
|
chpmv.goto zhpmv.goto \
|
||||||
chemm.goto zhemm.goto \
|
chemm.goto zhemm.goto \
|
||||||
cherk.goto zherk.goto \
|
cherk.goto zherk.goto \
|
||||||
cher2k.goto zher2k.goto \
|
cher2k.goto zher2k.goto \
|
||||||
sgemv.goto dgemv.goto cgemv.goto zgemv.goto \
|
sgemv.goto dgemv.goto cgemv.goto zgemv.goto \
|
||||||
|
strmv.goto dtrmv.goto ctrmv.goto ztrmv.goto \
|
||||||
|
strsv.goto dtrsv.goto ctrsv.goto ztrsv.goto \
|
||||||
ssymm.goto dsymm.goto csymm.goto zsymm.goto \
|
ssymm.goto dsymm.goto csymm.goto zsymm.goto \
|
||||||
smallscaling \
|
smallscaling \
|
||||||
isamax.goto idamax.goto icamax.goto izamax.goto \
|
isamax.goto idamax.goto icamax.goto izamax.goto \
|
||||||
|
@ -193,11 +233,15 @@ acml :: slinpack.acml dlinpack.acml clinpack.acml zlinpack.acml \
|
||||||
sgemm.acml dgemm.acml cgemm.acml zgemm.acml \
|
sgemm.acml dgemm.acml cgemm.acml zgemm.acml \
|
||||||
strmm.acml dtrmm.acml ctrmm.acml ztrmm.acml \
|
strmm.acml dtrmm.acml ctrmm.acml ztrmm.acml \
|
||||||
strsm.acml dtrsm.acml ctrsm.acml ztrsm.acml \
|
strsm.acml dtrsm.acml ctrsm.acml ztrsm.acml \
|
||||||
|
sspr.acml dspr.acml \
|
||||||
|
sspr2.acml dspr2.acml \
|
||||||
ssyr.acml dsyr.acml \
|
ssyr.acml dsyr.acml \
|
||||||
|
ssyr2.acml dsyr2.acml \
|
||||||
ssyrk.acml dsyrk.acml csyrk.acml zsyrk.acml \
|
ssyrk.acml dsyrk.acml csyrk.acml zsyrk.acml \
|
||||||
ssyr2k.acml dsyr2k.acml csyr2k.acml zsyr2k.acml \
|
ssyr2k.acml dsyr2k.acml csyr2k.acml zsyr2k.acml \
|
||||||
sger.acml dger.acml cger.acml zger.acml \
|
sger.acml dger.acml cger.acml zger.acml \
|
||||||
sdot.acml ddot.acml \
|
sdot.acml ddot.acml \
|
||||||
|
srotm.acml drotm.acml \
|
||||||
saxpy.acml daxpy.acml caxpy.acml zaxpy.acml \
|
saxpy.acml daxpy.acml caxpy.acml zaxpy.acml \
|
||||||
scopy.acml dcopy.acml ccopy.acml zcopy.acml \
|
scopy.acml dcopy.acml ccopy.acml zcopy.acml \
|
||||||
sswap.acml dswap.acml cswap.acml zswap.acml \
|
sswap.acml dswap.acml cswap.acml zswap.acml \
|
||||||
|
@ -205,10 +249,14 @@ acml :: slinpack.acml dlinpack.acml clinpack.acml zlinpack.acml \
|
||||||
sasum.acml dasum.acml casum.acml zasum.acml \
|
sasum.acml dasum.acml casum.acml zasum.acml \
|
||||||
ssymv.acml dsymv.acml csymv.acml zsymv.acml \
|
ssymv.acml dsymv.acml csymv.acml zsymv.acml \
|
||||||
chemv.acml zhemv.acml \
|
chemv.acml zhemv.acml \
|
||||||
|
chbmv.acml zhbmv.acml \
|
||||||
|
chpmv.acml zhpmv.acml \
|
||||||
chemm.acml zhemm.acml \
|
chemm.acml zhemm.acml \
|
||||||
cherk.acml zherk.acml \
|
cherk.acml zherk.acml \
|
||||||
cher2k.acml zher2k.acml \
|
cher2k.acml zher2k.acml \
|
||||||
sgemv.acml dgemv.acml cgemv.acml zgemv.acml \
|
sgemv.acml dgemv.acml cgemv.acml zgemv.acml \
|
||||||
|
strmv.acml dtrmv.acml ctrmv.acml ztrmv.acml \
|
||||||
|
strsv.acml dtrsv.acml ctrsv.acml ztrsv.acml \
|
||||||
sgeev.acml dgeev.acml cgeev.acml zgeev.acml \
|
sgeev.acml dgeev.acml cgeev.acml zgeev.acml \
|
||||||
sgesv.acml dgesv.acml cgesv.acml zgesv.acml \
|
sgesv.acml dgesv.acml cgesv.acml zgesv.acml \
|
||||||
sgetri.acml dgetri.acml cgetri.acml zgetri.acml \
|
sgetri.acml dgetri.acml cgetri.acml zgetri.acml \
|
||||||
|
@ -220,11 +268,15 @@ atlas :: slinpack.atlas dlinpack.atlas clinpack.atlas zlinpack.atlas \
|
||||||
sgemm.atlas dgemm.atlas cgemm.atlas zgemm.atlas \
|
sgemm.atlas dgemm.atlas cgemm.atlas zgemm.atlas \
|
||||||
strmm.atlas dtrmm.atlas ctrmm.atlas ztrmm.atlas \
|
strmm.atlas dtrmm.atlas ctrmm.atlas ztrmm.atlas \
|
||||||
strsm.atlas dtrsm.atlas ctrsm.atlas ztrsm.atlas \
|
strsm.atlas dtrsm.atlas ctrsm.atlas ztrsm.atlas \
|
||||||
|
sspr.atlas dspr.atlas \
|
||||||
|
sspr2.atlas dspr2.atlas \
|
||||||
ssyr.atlas dsyr.atlas \
|
ssyr.atlas dsyr.atlas \
|
||||||
|
ssyr2.atlas dsyr2.atlas \
|
||||||
ssyrk.atlas dsyrk.atlas csyrk.atlas zsyrk.atlas \
|
ssyrk.atlas dsyrk.atlas csyrk.atlas zsyrk.atlas \
|
||||||
ssyr2k.atlas dsyr2k.atlas csyr2k.atlas zsyr2k.atlas \
|
ssyr2k.atlas dsyr2k.atlas csyr2k.atlas zsyr2k.atlas \
|
||||||
sger.atlas dger.atlas cger.atlas zger.atlas\
|
sger.atlas dger.atlas cger.atlas zger.atlas\
|
||||||
sdot.atlas ddot.atlas \
|
sdot.atlas ddot.atlas \
|
||||||
|
srotm.atlas drotm.atlas \
|
||||||
saxpy.atlas daxpy.atlas caxpy.atlas zaxpy.atlas \
|
saxpy.atlas daxpy.atlas caxpy.atlas zaxpy.atlas \
|
||||||
scopy.atlas dcopy.atlas ccopy.atlas zcopy.atlas \
|
scopy.atlas dcopy.atlas ccopy.atlas zcopy.atlas \
|
||||||
sswap.atlas dswap.atlas cswap.atlas zswap.atlas \
|
sswap.atlas dswap.atlas cswap.atlas zswap.atlas \
|
||||||
|
@ -232,11 +284,15 @@ atlas :: slinpack.atlas dlinpack.atlas clinpack.atlas zlinpack.atlas \
|
||||||
sasum.atlas dasum.atlas casum.atlas zasum.atlas \
|
sasum.atlas dasum.atlas casum.atlas zasum.atlas \
|
||||||
ssymv.atlas dsymv.atlas csymv.atlas zsymv.atlas \
|
ssymv.atlas dsymv.atlas csymv.atlas zsymv.atlas \
|
||||||
chemv.atlas zhemv.atlas \
|
chemv.atlas zhemv.atlas \
|
||||||
|
chbmv.atlas zhbmv.atlas \
|
||||||
|
chpmv.atlas zhpmv.atlas \
|
||||||
chemm.acml zhemm.acml \
|
chemm.acml zhemm.acml \
|
||||||
chemm.atlas zhemm.atlas \
|
chemm.atlas zhemm.atlas \
|
||||||
cherk.atlas zherk.atlas \
|
cherk.atlas zherk.atlas \
|
||||||
cher2k.atlas zher2k.atlas \
|
cher2k.atlas zher2k.atlas \
|
||||||
sgemv.atlas dgemv.atlas cgemv.atlas zgemv.atlas \
|
sgemv.atlas dgemv.atlas cgemv.atlas zgemv.atlas \
|
||||||
|
strmv.atlas dtrmv.atlas ctrmv.atlas ztrmv.atlas \
|
||||||
|
strsv.atlas dtrsv.atlas ctrsv.atlas ztrsv.atlas \
|
||||||
sgeev.atlas dgeev.atlas cgeev.atlas zgeev.atlas \
|
sgeev.atlas dgeev.atlas cgeev.atlas zgeev.atlas \
|
||||||
sgesv.atlas dgesv.atlas cgesv.atlas zgesv.atlas \
|
sgesv.atlas dgesv.atlas cgesv.atlas zgesv.atlas \
|
||||||
sgetri.atlas dgetri.atlas cgetri.atlas zgetri.atlas \
|
sgetri.atlas dgetri.atlas cgetri.atlas zgetri.atlas \
|
||||||
|
@ -250,11 +306,15 @@ mkl :: slinpack.mkl dlinpack.mkl clinpack.mkl zlinpack.mkl \
|
||||||
sgemm.mkl dgemm.mkl cgemm.mkl zgemm.mkl \
|
sgemm.mkl dgemm.mkl cgemm.mkl zgemm.mkl \
|
||||||
strmm.mkl dtrmm.mkl ctrmm.mkl ztrmm.mkl \
|
strmm.mkl dtrmm.mkl ctrmm.mkl ztrmm.mkl \
|
||||||
strsm.mkl dtrsm.mkl ctrsm.mkl ztrsm.mkl \
|
strsm.mkl dtrsm.mkl ctrsm.mkl ztrsm.mkl \
|
||||||
|
sspr.mkl dspr.mkl \
|
||||||
|
sspr2.mkl dspr2.mkl \
|
||||||
ssyr.mkl dsyr.mkl \
|
ssyr.mkl dsyr.mkl \
|
||||||
|
ssyr2.mkl dsyr2.mkl \
|
||||||
ssyrk.mkl dsyrk.mkl csyrk.mkl zsyrk.mkl \
|
ssyrk.mkl dsyrk.mkl csyrk.mkl zsyrk.mkl \
|
||||||
ssyr2k.mkl dsyr2k.mkl csyr2k.mkl zsyr2k.mkl \
|
ssyr2k.mkl dsyr2k.mkl csyr2k.mkl zsyr2k.mkl \
|
||||||
sger.mkl dger.mkl cger.mkl zger.mkl \
|
sger.mkl dger.mkl cger.mkl zger.mkl \
|
||||||
sdot.mkl ddot.mkl cdot.mkl zdot.mkl \
|
sdot.mkl ddot.mkl cdot.mkl zdot.mkl \
|
||||||
|
srotm.atlas drotm.atlas \
|
||||||
saxpy.mkl daxpy.mkl caxpy.mkl zaxpy.mkl \
|
saxpy.mkl daxpy.mkl caxpy.mkl zaxpy.mkl \
|
||||||
scopy.mkl dcopy.mkl ccopy.mkl zcopy.mkl \
|
scopy.mkl dcopy.mkl ccopy.mkl zcopy.mkl \
|
||||||
sswap.mkl dswap.mkl cswap.mkl zswap.mkl \
|
sswap.mkl dswap.mkl cswap.mkl zswap.mkl \
|
||||||
|
@ -262,10 +322,14 @@ mkl :: slinpack.mkl dlinpack.mkl clinpack.mkl zlinpack.mkl \
|
||||||
sasum.mkl dasum.mkl casum.mkl zasum.mkl \
|
sasum.mkl dasum.mkl casum.mkl zasum.mkl \
|
||||||
ssymv.mkl dsymv.mkl csymv.mkl zsymv.mkl \
|
ssymv.mkl dsymv.mkl csymv.mkl zsymv.mkl \
|
||||||
chemv.mkl zhemv.mkl \
|
chemv.mkl zhemv.mkl \
|
||||||
|
chbmv.mkl zhbmv.mkl \
|
||||||
|
chpmv.mkl zhpmv.mkl \
|
||||||
chemm.mkl zhemm.mkl \
|
chemm.mkl zhemm.mkl \
|
||||||
cherk.mkl zherk.mkl \
|
cherk.mkl zherk.mkl \
|
||||||
cher2k.mkl zher2k.mkl \
|
cher2k.mkl zher2k.mkl \
|
||||||
sgemv.mkl dgemv.mkl cgemv.mkl zgemv.mkl \
|
sgemv.mkl dgemv.mkl cgemv.mkl zgemv.mkl \
|
||||||
|
strmv.mkl dtrmv.mkl ctrmv.mkl ztrmv.mkl \
|
||||||
|
strsv.mkl dtrsv.mkl ctrsv.mkl ztrsv.mkl \
|
||||||
sgeev.mkl dgeev.mkl cgeev.mkl zgeev.mkl \
|
sgeev.mkl dgeev.mkl cgeev.mkl zgeev.mkl \
|
||||||
sgesv.mkl dgesv.mkl cgesv.mkl zgesv.mkl \
|
sgesv.mkl dgesv.mkl cgesv.mkl zgesv.mkl \
|
||||||
sgetri.mkl dgetri.mkl cgetri.mkl zgetri.mkl \
|
sgetri.mkl dgetri.mkl cgetri.mkl zgetri.mkl \
|
||||||
|
@ -288,11 +352,15 @@ veclib :: slinpack.veclib dlinpack.veclib clinpack.veclib zlinpack.veclib \
|
||||||
sgemm.veclib dgemm.veclib cgemm.veclib zgemm.veclib \
|
sgemm.veclib dgemm.veclib cgemm.veclib zgemm.veclib \
|
||||||
strmm.veclib dtrmm.veclib ctrmm.veclib ztrmm.veclib \
|
strmm.veclib dtrmm.veclib ctrmm.veclib ztrmm.veclib \
|
||||||
strsm.veclib dtrsm.veclib ctrsm.veclib ztrsm.veclib \
|
strsm.veclib dtrsm.veclib ctrsm.veclib ztrsm.veclib \
|
||||||
|
sspr.veclib dspr.veclib \
|
||||||
|
sspr2.veclib dspr2.veclib \
|
||||||
ssyr.veclib dsyr.veclib \
|
ssyr.veclib dsyr.veclib \
|
||||||
|
ssyr2.veclib dsyr2.veclib \
|
||||||
ssyrk.veclib dsyrk.veclib csyrk.veclib zsyrk.veclib \
|
ssyrk.veclib dsyrk.veclib csyrk.veclib zsyrk.veclib \
|
||||||
ssyr2k.veclib dsyr2k.veclib csyr2k.veclib zsyr2k.veclib \
|
ssyr2k.veclib dsyr2k.veclib csyr2k.veclib zsyr2k.veclib \
|
||||||
sger.veclib dger.veclib cger.veclib zger.veclib \
|
sger.veclib dger.veclib cger.veclib zger.veclib \
|
||||||
sdot.veclib ddot.veclib cdot.veclib zdot.veclib \
|
sdot.veclib ddot.veclib cdot.veclib zdot.veclib \
|
||||||
|
srotm.veclib drotm.veclib \
|
||||||
saxpy.veclib daxpy.veclib caxpy.veclib zaxpy.veclib \
|
saxpy.veclib daxpy.veclib caxpy.veclib zaxpy.veclib \
|
||||||
scopy.veclib dcopy.veclib ccopy.veclib zcopy.veclib \
|
scopy.veclib dcopy.veclib ccopy.veclib zcopy.veclib \
|
||||||
sswap.veclib dswap.veclib cswap.veclib zswap.veclib \
|
sswap.veclib dswap.veclib cswap.veclib zswap.veclib \
|
||||||
|
@ -300,10 +368,14 @@ veclib :: slinpack.veclib dlinpack.veclib clinpack.veclib zlinpack.veclib \
|
||||||
sasum.veclib dasum.veclib casum.veclib zasum.veclib \
|
sasum.veclib dasum.veclib casum.veclib zasum.veclib \
|
||||||
ssymv.veclib dsymv.veclib csymv.veclib zsymv.veclib \
|
ssymv.veclib dsymv.veclib csymv.veclib zsymv.veclib \
|
||||||
chemv.veclib zhemv.veclib \
|
chemv.veclib zhemv.veclib \
|
||||||
|
chbmv.veclib zhbmv.veclib \
|
||||||
|
chpmv.veclib zhpmv.veclib \
|
||||||
chemm.veclib zhemm.veclib \
|
chemm.veclib zhemm.veclib \
|
||||||
cherk.veclib zherk.veclib \
|
cherk.veclib zherk.veclib \
|
||||||
cher2k.veclib zher2k.veclib \
|
cher2k.veclib zher2k.veclib \
|
||||||
sgemv.veclib dgemv.veclib cgemv.veclib zgemv.veclib \
|
sgemv.veclib dgemv.veclib cgemv.veclib zgemv.veclib \
|
||||||
|
strmv.veclib dtrmv.veclib ctrmv.veclib ztrmv.veclib \
|
||||||
|
strsv.veclib dtrsv.veclib ctrsv.veclib ztrsv.veclib \
|
||||||
sgeev.veclib dgeev.veclib cgeev.veclib zgeev.veclib \
|
sgeev.veclib dgeev.veclib cgeev.veclib zgeev.veclib \
|
||||||
sgesv.veclib dgesv.veclib cgesv.veclib zgesv.veclib \
|
sgesv.veclib dgesv.veclib cgesv.veclib zgesv.veclib \
|
||||||
sgetri.veclib dgetri.veclib cgetri.veclib zgetri.veclib \
|
sgetri.veclib dgetri.veclib cgetri.veclib zgetri.veclib \
|
||||||
|
@ -808,6 +880,100 @@ dsyr.mkl : dsyr.$(SUFFIX)
|
||||||
dsyr.veclib : dsyr.$(SUFFIX)
|
dsyr.veclib : dsyr.$(SUFFIX)
|
||||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
##################################### Sspr ####################################################
|
||||||
|
sspr.goto : sspr.$(SUFFIX) ../$(LIBNAME)
|
||||||
|
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||||
|
|
||||||
|
sspr.acml : sspr.$(SUFFIX)
|
||||||
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
sspr.atlas : sspr.$(SUFFIX)
|
||||||
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
sspr.mkl : sspr.$(SUFFIX)
|
||||||
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
sspr.veclib : sspr.$(SUFFIX)
|
||||||
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
##################################### Dspr ####################################################
|
||||||
|
dspr.goto : dspr.$(SUFFIX) ../$(LIBNAME)
|
||||||
|
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||||
|
|
||||||
|
dspr.acml : dspr.$(SUFFIX)
|
||||||
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
dspr.atlas : dspr.$(SUFFIX)
|
||||||
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
dspr.mkl : dspr.$(SUFFIX)
|
||||||
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
dspr.veclib : dspr.$(SUFFIX)
|
||||||
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
##################################### Sspr2 ####################################################
|
||||||
|
sspr2.goto : sspr2.$(SUFFIX) ../$(LIBNAME)
|
||||||
|
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||||
|
|
||||||
|
sspr2.acml : sspr2.$(SUFFIX)
|
||||||
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
sspr2.atlas : sspr2.$(SUFFIX)
|
||||||
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
sspr2.mkl : sspr2.$(SUFFIX)
|
||||||
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
sspr2.veclib : sspr2.$(SUFFIX)
|
||||||
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
##################################### Dspr2 ####################################################
|
||||||
|
dspr2.goto : dspr2.$(SUFFIX) ../$(LIBNAME)
|
||||||
|
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||||
|
|
||||||
|
dspr2.acml : dspr2.$(SUFFIX)
|
||||||
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
dspr2.atlas : dspr2.$(SUFFIX)
|
||||||
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
dspr2.mkl : dspr2.$(SUFFIX)
|
||||||
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
dspr2.veclib : dspr2.$(SUFFIX)
|
||||||
|
|
||||||
|
##################################### Ssyr2 ####################################################
|
||||||
|
ssyr2.goto : ssyr2.$(SUFFIX) ../$(LIBNAME)
|
||||||
|
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||||
|
|
||||||
|
ssyr2.acml : ssyr2.$(SUFFIX)
|
||||||
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
ssyr2.atlas : ssyr2.$(SUFFIX)
|
||||||
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
ssyr2.mkl : ssyr2.$(SUFFIX)
|
||||||
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
ssyr2.veclib : ssyr2.$(SUFFIX)
|
||||||
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
##################################### Dsyr2 ####################################################
|
||||||
|
dsyr2.goto : dsyr2.$(SUFFIX) ../$(LIBNAME)
|
||||||
|
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||||
|
|
||||||
|
dsyr2.acml : dsyr2.$(SUFFIX)
|
||||||
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
dsyr2.atlas : dsyr2.$(SUFFIX)
|
||||||
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
dsyr2.mkl : dsyr2.$(SUFFIX)
|
||||||
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
dsyr2.veclib : dsyr2.$(SUFFIX)
|
||||||
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
##################################### Ssyrk ####################################################
|
##################################### Ssyrk ####################################################
|
||||||
ssyrk.goto : ssyrk.$(SUFFIX) ../$(LIBNAME)
|
ssyrk.goto : ssyrk.$(SUFFIX) ../$(LIBNAME)
|
||||||
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||||
|
@ -1108,6 +1274,138 @@ zgemv.mkl : zgemv.$(SUFFIX)
|
||||||
zgemv.veclib : zgemv.$(SUFFIX)
|
zgemv.veclib : zgemv.$(SUFFIX)
|
||||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
##################################### Strmv ####################################################
|
||||||
|
strmv.goto : strmv.$(SUFFIX) ../$(LIBNAME)
|
||||||
|
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||||
|
|
||||||
|
strmv.acml : strmv.$(SUFFIX)
|
||||||
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
strmv.atlas : strmv.$(SUFFIX)
|
||||||
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
strmv.mkl : strmv.$(SUFFIX)
|
||||||
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
strmv.veclib : strmv.$(SUFFIX)
|
||||||
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
##################################### Dtrmv ####################################################
|
||||||
|
dtrmv.goto : dtrmv.$(SUFFIX) ../$(LIBNAME)
|
||||||
|
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||||
|
|
||||||
|
dtrmv.acml : dtrmv.$(SUFFIX)
|
||||||
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
dtrmv.atlas : dtrmv.$(SUFFIX)
|
||||||
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
dtrmv.mkl : dtrmv.$(SUFFIX)
|
||||||
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
dtrmv.veclib : dtrmv.$(SUFFIX)
|
||||||
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
##################################### Ctrmv ####################################################
|
||||||
|
|
||||||
|
ctrmv.goto : ctrmv.$(SUFFIX) ../$(LIBNAME)
|
||||||
|
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||||
|
|
||||||
|
ctrmv.acml : ctrmv.$(SUFFIX)
|
||||||
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
ctrmv.atlas : ctrmv.$(SUFFIX)
|
||||||
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
ctrmv.mkl : ctrmv.$(SUFFIX)
|
||||||
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
ctrmv.veclib : ctrmv.$(SUFFIX)
|
||||||
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
##################################### Ztrmv ####################################################
|
||||||
|
|
||||||
|
ztrmv.goto : ztrmv.$(SUFFIX) ../$(LIBNAME)
|
||||||
|
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||||
|
|
||||||
|
ztrmv.acml : ztrmv.$(SUFFIX)
|
||||||
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
ztrmv.atlas : ztrmv.$(SUFFIX)
|
||||||
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
ztrmv.mkl : ztrmv.$(SUFFIX)
|
||||||
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
ztrmv.veclib : ztrmv.$(SUFFIX)
|
||||||
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
##################################### Strsv ####################################################
|
||||||
|
strsv.goto : strsv.$(SUFFIX) ../$(LIBNAME)
|
||||||
|
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||||
|
|
||||||
|
strsv.acml : strsv.$(SUFFIX)
|
||||||
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
strsv.atlas : strsv.$(SUFFIX)
|
||||||
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
strsv.mkl : strsv.$(SUFFIX)
|
||||||
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
strsv.veclib : strsv.$(SUFFIX)
|
||||||
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
##################################### Dtrsv ####################################################
|
||||||
|
dtrsv.goto : dtrsv.$(SUFFIX) ../$(LIBNAME)
|
||||||
|
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||||
|
|
||||||
|
dtrsv.acml : dtrsv.$(SUFFIX)
|
||||||
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
dtrsv.atlas : dtrsv.$(SUFFIX)
|
||||||
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
dtrsv.mkl : dtrsv.$(SUFFIX)
|
||||||
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
dtrsv.veclib : dtrsv.$(SUFFIX)
|
||||||
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
##################################### Ctrsv ####################################################
|
||||||
|
|
||||||
|
ctrsv.goto : ctrsv.$(SUFFIX) ../$(LIBNAME)
|
||||||
|
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||||
|
|
||||||
|
ctrsv.acml : ctrsv.$(SUFFIX)
|
||||||
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
ctrsv.atlas : ctrsv.$(SUFFIX)
|
||||||
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
ctrsv.mkl : ctrsv.$(SUFFIX)
|
||||||
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
ctrsv.veclib : ctrsv.$(SUFFIX)
|
||||||
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
##################################### Ztrsv ####################################################
|
||||||
|
|
||||||
|
ztrsv.goto : ztrsv.$(SUFFIX) ../$(LIBNAME)
|
||||||
|
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||||
|
|
||||||
|
ztrsv.acml : ztrsv.$(SUFFIX)
|
||||||
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
ztrsv.atlas : ztrsv.$(SUFFIX)
|
||||||
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
ztrsv.mkl : ztrsv.$(SUFFIX)
|
||||||
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
ztrsv.veclib : ztrsv.$(SUFFIX)
|
||||||
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
##################################### Sger ####################################################
|
##################################### Sger ####################################################
|
||||||
sger.goto : sger.$(SUFFIX) ../$(LIBNAME)
|
sger.goto : sger.$(SUFFIX) ../$(LIBNAME)
|
||||||
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||||
|
@ -1467,7 +1765,70 @@ zhemv.mkl : zhemv.$(SUFFIX)
|
||||||
|
|
||||||
zhemv.veclib : zhemv.$(SUFFIX)
|
zhemv.veclib : zhemv.$(SUFFIX)
|
||||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
##################################### Chbmv ####################################################
|
||||||
|
|
||||||
|
chbmv.goto : chbmv.$(SUFFIX) ../$(LIBNAME)
|
||||||
|
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||||
|
|
||||||
|
chbmv.acml : chbmv.$(SUFFIX)
|
||||||
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
chbmv.atlas : chbmv.$(SUFFIX)
|
||||||
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
chbmv.mkl : chbmv.$(SUFFIX)
|
||||||
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
chbmv.veclib : chbmv.$(SUFFIX)
|
||||||
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
##################################### Zhbmv ####################################################
|
||||||
|
|
||||||
|
zhbmv.goto : zhbmv.$(SUFFIX) ../$(LIBNAME)
|
||||||
|
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||||
|
|
||||||
|
zhbmv.acml : zhbmv.$(SUFFIX)
|
||||||
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
zhbmv.atlas : zhbmv.$(SUFFIX)
|
||||||
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
zhbmv.mkl : zhbmv.$(SUFFIX)
|
||||||
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
zhbmv.veclib : zhbmv.$(SUFFIX)
|
||||||
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
##################################### Chpmv ####################################################
|
||||||
|
|
||||||
|
chpmv.goto : chpmv.$(SUFFIX) ../$(LIBNAME)
|
||||||
|
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||||
|
|
||||||
|
chpmv.acml : chpmv.$(SUFFIX)
|
||||||
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
chpmv.atlas : chpmv.$(SUFFIX)
|
||||||
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
chpmv.mkl : chpmv.$(SUFFIX)
|
||||||
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
chpmv.veclib : chpmv.$(SUFFIX)
|
||||||
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
##################################### Zhpmv ####################################################
|
||||||
|
|
||||||
|
zhpmv.goto : zhpmv.$(SUFFIX) ../$(LIBNAME)
|
||||||
|
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||||
|
|
||||||
|
zhpmv.acml : zhpmv.$(SUFFIX)
|
||||||
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
zhpmv.atlas : zhpmv.$(SUFFIX)
|
||||||
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
zhpmv.mkl : zhpmv.$(SUFFIX)
|
||||||
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
zhpmv.veclib : zhpmv.$(SUFFIX)
|
||||||
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
##################################### Sdot ####################################################
|
##################################### Sdot ####################################################
|
||||||
sdot.goto : sdot.$(SUFFIX) ../$(LIBNAME)
|
sdot.goto : sdot.$(SUFFIX) ../$(LIBNAME)
|
||||||
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||||
|
@ -1564,6 +1925,37 @@ drot.mkl : drot.$(SUFFIX)
|
||||||
drot.veclib : drot.$(SUFFIX)
|
drot.veclib : drot.$(SUFFIX)
|
||||||
$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
##################################### srotm ####################################################
|
||||||
|
srotm.goto : srotm.$(SUFFIX) ../$(LIBNAME)
|
||||||
|
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||||
|
|
||||||
|
srotm.acml : srotm.$(SUFFIX)
|
||||||
|
$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
srotm.atlas : srotm.$(SUFFIX)
|
||||||
|
$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
srotm.mkl : srotm.$(SUFFIX)
|
||||||
|
$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
srotm.veclib : srotm.$(SUFFIX)
|
||||||
|
$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
##################################### drotm ####################################################
|
||||||
|
drotm.goto : drotm.$(SUFFIX) ../$(LIBNAME)
|
||||||
|
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||||
|
|
||||||
|
drotm.acml : drotm.$(SUFFIX)
|
||||||
|
$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
drotm.atlas : drotm.$(SUFFIX)
|
||||||
|
$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
drotm.mkl : drotm.$(SUFFIX)
|
||||||
|
$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
|
drotm.veclib : drotm.$(SUFFIX)
|
||||||
|
$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
##################################### Saxpy ####################################################
|
##################################### Saxpy ####################################################
|
||||||
saxpy.goto : saxpy.$(SUFFIX) ../$(LIBNAME)
|
saxpy.goto : saxpy.$(SUFFIX) ../$(LIBNAME)
|
||||||
|
@ -2123,6 +2515,24 @@ ssyr.$(SUFFIX) : syr.c
|
||||||
dsyr.$(SUFFIX) : syr.c
|
dsyr.$(SUFFIX) : syr.c
|
||||||
$(CC) $(CFLAGS) -c -UCOMPLEX -DDOUBLE -o $(@F) $^
|
$(CC) $(CFLAGS) -c -UCOMPLEX -DDOUBLE -o $(@F) $^
|
||||||
|
|
||||||
|
sspr.$(SUFFIX) : spr.c
|
||||||
|
$(CC) $(CFLAGS) -c -UCOMPLEX -UDOUBLE -o $(@F) $^
|
||||||
|
|
||||||
|
dspr.$(SUFFIX) : spr.c
|
||||||
|
$(CC) $(CFLAGS) -c -UCOMPLEX -DDOUBLE -o $(@F) $^
|
||||||
|
|
||||||
|
sspr2.$(SUFFIX) : spr2.c
|
||||||
|
$(CC) $(CFLAGS) -c -UCOMPLEX -UDOUBLE -o $(@F) $^
|
||||||
|
|
||||||
|
dspr2.$(SUFFIX) : spr2.c
|
||||||
|
$(CC) $(CFLAGS) -c -UCOMPLEX -DDOUBLE -o $(@F) $^
|
||||||
|
|
||||||
|
ssyr2.$(SUFFIX) : syr2.c
|
||||||
|
$(CC) $(CFLAGS) -c -UCOMPLEX -UDOUBLE -o $(@F) $^
|
||||||
|
|
||||||
|
dsyr2.$(SUFFIX) : syr2.c
|
||||||
|
$(CC) $(CFLAGS) -c -UCOMPLEX -DDOUBLE -o $(@F) $^
|
||||||
|
|
||||||
ssyrk.$(SUFFIX) : syrk.c
|
ssyrk.$(SUFFIX) : syrk.c
|
||||||
$(CC) $(CFLAGS) -c -UCOMPLEX -UDOUBLE -o $(@F) $^
|
$(CC) $(CFLAGS) -c -UCOMPLEX -UDOUBLE -o $(@F) $^
|
||||||
|
|
||||||
|
@ -2177,6 +2587,30 @@ cgemv.$(SUFFIX) : gemv.c
|
||||||
zgemv.$(SUFFIX) : gemv.c
|
zgemv.$(SUFFIX) : gemv.c
|
||||||
$(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^
|
$(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^
|
||||||
|
|
||||||
|
strmv.$(SUFFIX) : trmv.c
|
||||||
|
$(CC) $(CFLAGS) -c -UCOMPLEX -UDOUBLE -o $(@F) $^
|
||||||
|
|
||||||
|
dtrmv.$(SUFFIX) : trmv.c
|
||||||
|
$(CC) $(CFLAGS) -c -UCOMPLEX -DDOUBLE -o $(@F) $^
|
||||||
|
|
||||||
|
ctrmv.$(SUFFIX) : trmv.c
|
||||||
|
$(CC) $(CFLAGS) -c -DCOMPLEX -UDOUBLE -o $(@F) $^
|
||||||
|
|
||||||
|
ztrmv.$(SUFFIX) : trmv.c
|
||||||
|
$(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^
|
||||||
|
|
||||||
|
strsv.$(SUFFIX) : trsv.c
|
||||||
|
$(CC) $(CFLAGS) -c -UCOMPLEX -UDOUBLE -o $(@F) $^
|
||||||
|
|
||||||
|
dtrsv.$(SUFFIX) : trsv.c
|
||||||
|
$(CC) $(CFLAGS) -c -UCOMPLEX -DDOUBLE -o $(@F) $^
|
||||||
|
|
||||||
|
ctrsv.$(SUFFIX) : trsv.c
|
||||||
|
$(CC) $(CFLAGS) -c -DCOMPLEX -UDOUBLE -o $(@F) $^
|
||||||
|
|
||||||
|
ztrsv.$(SUFFIX) : trsv.c
|
||||||
|
$(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^
|
||||||
|
|
||||||
sger.$(SUFFIX) : ger.c
|
sger.$(SUFFIX) : ger.c
|
||||||
$(CC) $(CFLAGS) -c -UCOMPLEX -UDOUBLE -o $(@F) $^
|
$(CC) $(CFLAGS) -c -UCOMPLEX -UDOUBLE -o $(@F) $^
|
||||||
|
|
||||||
|
@ -2244,6 +2678,18 @@ chemv.$(SUFFIX) : hemv.c
|
||||||
zhemv.$(SUFFIX) : hemv.c
|
zhemv.$(SUFFIX) : hemv.c
|
||||||
$(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^
|
$(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^
|
||||||
|
|
||||||
|
chbmv.$(SUFFIX) : hbmv.c
|
||||||
|
$(CC) $(CFLAGS) -c -DCOMPLEX -UDOUBLE -o $(@F) $^
|
||||||
|
|
||||||
|
zhbmv.$(SUFFIX) : hbmv.c
|
||||||
|
$(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^
|
||||||
|
|
||||||
|
chpmv.$(SUFFIX) : hpmv.c
|
||||||
|
$(CC) $(CFLAGS) -c -DCOMPLEX -UDOUBLE -o $(@F) $^
|
||||||
|
|
||||||
|
zhpmv.$(SUFFIX) : hpmv.c
|
||||||
|
$(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^
|
||||||
|
|
||||||
sdot.$(SUFFIX) : dot.c
|
sdot.$(SUFFIX) : dot.c
|
||||||
$(CC) $(CFLAGS) -c -UCOMPLEX -UDOUBLE -o $(@F) $^
|
$(CC) $(CFLAGS) -c -UCOMPLEX -UDOUBLE -o $(@F) $^
|
||||||
|
|
||||||
|
@ -2345,7 +2791,11 @@ srot.$(SUFFIX) : rot.c
|
||||||
drot.$(SUFFIX) : rot.c
|
drot.$(SUFFIX) : rot.c
|
||||||
$(CC) $(CFLAGS) -c -UCOMPLEX -DDOUBLE -o $(@F) $^
|
$(CC) $(CFLAGS) -c -UCOMPLEX -DDOUBLE -o $(@F) $^
|
||||||
|
|
||||||
|
srotm.$(SUFFIX) : rotm.c
|
||||||
|
$(CC) $(CFLAGS) -c -UCOMPLEX -UDOUBLE -o $(@F) $^
|
||||||
|
|
||||||
|
drotm.$(SUFFIX) : rotm.c
|
||||||
|
$(CC) $(CFLAGS) -c -UCOMPLEX -DDOUBLE -o $(@F) $^
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -128,7 +128,7 @@ int main(int argc, char *argv[]){
|
||||||
int to = 200;
|
int to = 200;
|
||||||
int step = 1;
|
int step = 1;
|
||||||
|
|
||||||
struct timeval start, stop;
|
struct timespec start, stop;
|
||||||
double time1,timeg;
|
double time1,timeg;
|
||||||
|
|
||||||
argc--;argv++;
|
argc--;argv++;
|
||||||
|
@ -175,13 +175,13 @@ int main(int argc, char *argv[]){
|
||||||
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
||||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||||
}
|
}
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
clock_gettime( CLOCK_REALTIME, &start);
|
||||||
|
|
||||||
AXPY (&m, alpha, x, &inc_x, y, &inc_y );
|
AXPY (&m, alpha, x, &inc_x, y, &inc_y );
|
||||||
|
|
||||||
gettimeofday( &stop, (struct timezone *)0);
|
clock_gettime( CLOCK_REALTIME, &stop);
|
||||||
|
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_nsec - start.tv_nsec)) * 1.e-9;
|
||||||
|
|
||||||
timeg += time1;
|
timeg += time1;
|
||||||
|
|
||||||
|
@ -190,7 +190,7 @@ int main(int argc, char *argv[]){
|
||||||
timeg /= loops;
|
timeg /= loops;
|
||||||
|
|
||||||
fprintf(stderr,
|
fprintf(stderr,
|
||||||
" %10.2f MFlops %10.6f sec\n",
|
" %10.2f MFlops %10.9f sec\n",
|
||||||
COMPSIZE * COMPSIZE * 2. * (double)m / timeg * 1.e-6, timeg);
|
COMPSIZE * COMPSIZE * 2. * (double)m / timeg * 1.e-6, timeg);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -173,46 +173,46 @@ int main(int argc, char *argv[]){
|
||||||
#ifndef COMPLEX
|
#ifndef COMPLEX
|
||||||
if (uplos & 1) {
|
if (uplos & 1) {
|
||||||
for (j = 0; j < m; j++) {
|
for (j = 0; j < m; j++) {
|
||||||
for(i = 0; i < j; i++) a[i + j * m] = 0.;
|
for(i = 0; i < j; i++) a[(long)i + (long)j * (long)m] = 0.;
|
||||||
a[j + j * m] = ((double) rand() / (double) RAND_MAX) + 8.;
|
a[(long)j + (long)j * (long)m] = ((double) rand() / (double) RAND_MAX) + 8.;
|
||||||
for(i = j + 1; i < m; i++) a[i + j * m] = ((double) rand() / (double) RAND_MAX) - 0.5;
|
for(i = j + 1; i < m; i++) a[(long)i + (long)j * (long)m] = ((double) rand() / (double) RAND_MAX) - 0.5;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
for (j = 0; j < m; j++) {
|
for (j = 0; j < m; j++) {
|
||||||
for(i = 0; i < j; i++) a[i + j * m] = ((double) rand() / (double) RAND_MAX) - 0.5;
|
for(i = 0; i < j; i++) a[(long)i + (long)j * (long)m] = ((double) rand() / (double) RAND_MAX) - 0.5;
|
||||||
a[j + j * m] = ((double) rand() / (double) RAND_MAX) + 8.;
|
a[(long)j + (long)j * (long)m] = ((double) rand() / (double) RAND_MAX) + 8.;
|
||||||
for(i = j + 1; i < m; i++) a[i + j * m] = 0.;
|
for(i = j + 1; i < m; i++) a[(long)i + (long)j * (long)m] = 0.;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
if (uplos & 1) {
|
if (uplos & 1) {
|
||||||
for (j = 0; j < m; j++) {
|
for (j = 0; j < m; j++) {
|
||||||
for(i = 0; i < j; i++) {
|
for(i = 0; i < j; i++) {
|
||||||
a[(i + j * m) * 2 + 0] = 0.;
|
a[((long)i + (long)j * (long)m) * 2 + 0] = 0.;
|
||||||
a[(i + j * m) * 2 + 1] = 0.;
|
a[((long)i + (long)j * (long)m) * 2 + 1] = 0.;
|
||||||
}
|
}
|
||||||
|
|
||||||
a[(j + j * m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) + 8.;
|
a[((long)j + (long)j * (long)m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) + 8.;
|
||||||
a[(j + j * m) * 2 + 1] = 0.;
|
a[((long)j + (long)j * (long)m) * 2 + 1] = 0.;
|
||||||
|
|
||||||
for(i = j + 1; i < m; i++) {
|
for(i = j + 1; i < m; i++) {
|
||||||
a[(i + j * m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) - 0.5;
|
a[((long)i + (long)j * (long)m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) - 0.5;
|
||||||
a[(i + j * m) * 2 + 1] = ((double) rand() / (double) RAND_MAX) - 0.5;
|
a[((long)i + (long)j * (long)m) * 2 + 1] = ((double) rand() / (double) RAND_MAX) - 0.5;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
for (j = 0; j < m; j++) {
|
for (j = 0; j < m; j++) {
|
||||||
for(i = 0; i < j; i++) {
|
for(i = 0; i < j; i++) {
|
||||||
a[(i + j * m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) - 0.5;
|
a[((long)i + (long)j * (long)m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) - 0.5;
|
||||||
a[(i + j * m) * 2 + 1] = ((double) rand() / (double) RAND_MAX) - 0.5;
|
a[((long)i + (long)j * (long)m) * 2 + 1] = ((double) rand() / (double) RAND_MAX) - 0.5;
|
||||||
}
|
}
|
||||||
|
|
||||||
a[(j + j * m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) + 8.;
|
a[((long)j + (long)j * (long)m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) + 8.;
|
||||||
a[(j + j * m) * 2 + 1] = 0.;
|
a[((long)j + (long)j * (long)m) * 2 + 1] = 0.;
|
||||||
|
|
||||||
for(i = j + 1; i < m; i++) {
|
for(i = j + 1; i < m; i++) {
|
||||||
a[(i + j * m) * 2 + 0] = 0.;
|
a[((long)i + (long)j * (long)m) * 2 + 0] = 0.;
|
||||||
a[(i + j * m) * 2 + 1] = 0.;
|
a[((long)i + (long)j * (long)m) * 2 + 1] = 0.;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -239,10 +239,13 @@ int main(int argc, char *argv[]){
|
||||||
for (j = 0; j < m; j++) {
|
for (j = 0; j < m; j++) {
|
||||||
for(i = 0; i <= j; i++) {
|
for(i = 0; i <= j; i++) {
|
||||||
#ifndef COMPLEX
|
#ifndef COMPLEX
|
||||||
if (maxerr < fabs(a[i + j * m] - b[i + j * m])) maxerr = fabs(a[i + j * m] - b[i + j * m]);
|
if (maxerr < fabs(a[(long)i + (long)j * (long)m] - b[(long)i + (long)j * (long)m]))
|
||||||
|
maxerr = fabs(a[(long)i + (long)j * (long)m] - b[(long)i + (long)j * (long)m]);
|
||||||
#else
|
#else
|
||||||
if (maxerr < fabs(a[(i + j * m) * 2 + 0] - b[(i + j * m) * 2 + 0])) maxerr = fabs(a[(i + j * m) * 2 + 0] - b[(i + j * m) * 2 + 0]);
|
if (maxerr < fabs(a[((long)i + (long)j * (long)m) * 2 + 0] - b[((long)i + (long)j * (long)m) * 2 + 0]))
|
||||||
if (maxerr < fabs(a[(i + j * m) * 2 + 1] - b[(i + j * m) * 2 + 1])) maxerr = fabs(a[(i + j * m) * 2 + 1] - b[(i + j * m) * 2 + 1]);
|
maxerr = fabs(a[((long)i + (long)j * (long)m) * 2 + 0] - b[((long)i + (long)j * (long)m) * 2 + 0]);
|
||||||
|
if (maxerr < fabs(a[((long)i + (long)j * (long)m) * 2 + 1] - b[((long)i + (long)j * (long)m) * 2 + 1]))
|
||||||
|
maxerr = fabs(a[((long)i + (long)j * (long)m) * 2 + 1] - b[((long)i + (long)j * (long)m) * 2 + 1]);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -250,10 +253,13 @@ int main(int argc, char *argv[]){
|
||||||
for (j = 0; j < m; j++) {
|
for (j = 0; j < m; j++) {
|
||||||
for(i = j; i < m; i++) {
|
for(i = j; i < m; i++) {
|
||||||
#ifndef COMPLEX
|
#ifndef COMPLEX
|
||||||
if (maxerr < fabs(a[i + j * m] - b[i + j * m])) maxerr = fabs(a[i + j * m] - b[i + j * m]);
|
if (maxerr < fabs(a[(long)i + (long)j * (long)m] - b[(long)i + (long)j * (long)m]))
|
||||||
|
maxerr = fabs(a[(long)i + (long)j * (long)m] - b[(long)i + (long)j * (long)m]);
|
||||||
#else
|
#else
|
||||||
if (maxerr < fabs(a[(i + j * m) * 2 + 0] - b[(i + j * m) * 2 + 0])) maxerr = fabs(a[(i + j * m) * 2 + 0] - b[(i + j * m) * 2 + 0]);
|
if (maxerr < fabs(a[((long)i + (long)j * (long)m) * 2 + 0] - b[((long)i + (long)j * (long)m) * 2 + 0]))
|
||||||
if (maxerr < fabs(a[(i + j * m) * 2 + 1] - b[(i + j * m) * 2 + 1])) maxerr = fabs(a[(i + j * m) * 2 + 1] - b[(i + j * m) * 2 + 1]);
|
maxerr = fabs(a[((long)i + (long)j * (long)m) * 2 + 0] - b[((long)i + (long)j * (long)m) * 2 + 0]);
|
||||||
|
if (maxerr < fabs(a[((long)i + (long)j * (long)m) * 2 + 1] - b[((long)i + (long)j * (long)m) * 2 + 1]))
|
||||||
|
maxerr = fabs(a[((long)i + (long)j * (long)m) * 2 + 1] - b[((long)i + (long)j * (long)m) * 2 + 1]);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -195,7 +195,7 @@ int main(int argc, char *argv[]){
|
||||||
|
|
||||||
for(j = 0; j < to; j++){
|
for(j = 0; j < to; j++){
|
||||||
for(i = 0; i < to * COMPSIZE; i++){
|
for(i = 0; i < to * COMPSIZE; i++){
|
||||||
a[i + j * to * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
a[(long)i + (long)j * (long)to * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -181,9 +181,9 @@ int main(int argc, char *argv[]){
|
||||||
|
|
||||||
for(j = 0; j < m; j++){
|
for(j = 0; j < m; j++){
|
||||||
for(i = 0; i < m * COMPSIZE; i++){
|
for(i = 0; i < m * COMPSIZE; i++){
|
||||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||||
b[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
b[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||||
c[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
c[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -197,7 +197,7 @@ int main(int argc, char *argv[]){
|
||||||
fprintf(stderr, " %6dx%d : ", (int)m,(int)n);
|
fprintf(stderr, " %6dx%d : ", (int)m,(int)n);
|
||||||
for(j = 0; j < m; j++){
|
for(j = 0; j < m; j++){
|
||||||
for(i = 0; i < n * COMPSIZE; i++){
|
for(i = 0; i < n * COMPSIZE; i++){
|
||||||
a[j + i * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
a[(long)j + (long)i * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -234,7 +234,7 @@ int main(int argc, char *argv[]){
|
||||||
fprintf(stderr, " %6dx%d : ", (int)m,(int)n);
|
fprintf(stderr, " %6dx%d : ", (int)m,(int)n);
|
||||||
for(j = 0; j < m; j++){
|
for(j = 0; j < m; j++){
|
||||||
for(i = 0; i < n * COMPSIZE; i++){
|
for(i = 0; i < n * COMPSIZE; i++){
|
||||||
a[j + i * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
a[(long)j + (long)i * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -182,7 +182,7 @@ int main(int argc, char *argv[]){
|
||||||
|
|
||||||
for(j = 0; j < m; j++){
|
for(j = 0; j < m; j++){
|
||||||
for(i = 0; i < n * COMPSIZE; i++){
|
for(i = 0; i < n * COMPSIZE; i++){
|
||||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -177,20 +177,20 @@ int main(int argc, char *argv[]){
|
||||||
|
|
||||||
for(j = 0; j < m; j++){
|
for(j = 0; j < m; j++){
|
||||||
for(i = 0; i < m * COMPSIZE; i++){
|
for(i = 0; i < m * COMPSIZE; i++){
|
||||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for(j = 0; j < m; j++){
|
for(j = 0; j < m; j++){
|
||||||
for(i = 0; i < m * COMPSIZE; i++){
|
for(i = 0; i < m * COMPSIZE; i++){
|
||||||
b[i + j * m * COMPSIZE] = 0.0;
|
b[(long)i + (long)j * (long)m * COMPSIZE] = 0.0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
for (j = 0; j < m; ++j) {
|
for (j = 0; j < m; ++j) {
|
||||||
for (i = 0; i < m * COMPSIZE; ++i) {
|
for (i = 0; i < m * COMPSIZE; ++i) {
|
||||||
b[i] += a[i + j * m * COMPSIZE];
|
b[i] += a[(long)i + (long)j * (long)m * COMPSIZE];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -172,7 +172,7 @@ int main(int argc, char *argv[]){
|
||||||
|
|
||||||
for(j = 0; j < to; j++){
|
for(j = 0; j < to; j++){
|
||||||
for(i = 0; i < to * COMPSIZE; i++){
|
for(i = 0; i < to * COMPSIZE; i++){
|
||||||
a[i + j * to * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
a[(long)i + (long)j * (long)to * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,210 @@
|
||||||
|
/***************************************************************************
|
||||||
|
Copyright (c) 2014, The OpenBLAS Project
|
||||||
|
All rights reserved.
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
1. Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in
|
||||||
|
the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
3. Neither the name of the OpenBLAS project nor the names of
|
||||||
|
its contributors may be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written permission.
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||||
|
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||||
|
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*****************************************************************************/
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#ifdef __CYGWIN32__
|
||||||
|
#include <sys/time.h>
|
||||||
|
#endif
|
||||||
|
#include "common.h"
|
||||||
|
|
||||||
|
|
||||||
|
#undef HBMV
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef DOUBLE
|
||||||
|
#define HBMV BLASFUNC(zhbmv)
|
||||||
|
#else
|
||||||
|
#define HBMV BLASFUNC(chbmv)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
#if defined(__WIN32__) || defined(__WIN64__)
|
||||||
|
|
||||||
|
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||||
|
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||||
|
#endif
|
||||||
|
|
||||||
|
int gettimeofday(struct timeval *tv, void *tz) {
|
||||||
|
|
||||||
|
FILETIME ft;
|
||||||
|
unsigned __int64 tmpres = 0;
|
||||||
|
static int tzflag;
|
||||||
|
|
||||||
|
if (NULL != tv)
|
||||||
|
{
|
||||||
|
GetSystemTimeAsFileTime(&ft);
|
||||||
|
|
||||||
|
tmpres |= ft.dwHighDateTime;
|
||||||
|
tmpres <<= 32;
|
||||||
|
tmpres |= ft.dwLowDateTime;
|
||||||
|
|
||||||
|
/*converting file time to unix epoch*/
|
||||||
|
tmpres /= 10; /*convert into microseconds*/
|
||||||
|
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||||
|
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||||
|
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||||
|
|
||||||
|
static void *huge_malloc(BLASLONG size) {
|
||||||
|
int shmid;
|
||||||
|
void *address;
|
||||||
|
|
||||||
|
#ifndef SHM_HUGETLB
|
||||||
|
#define SHM_HUGETLB 04000
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if ((shmid =shmget(IPC_PRIVATE,
|
||||||
|
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||||
|
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||||
|
printf( "Memory allocation failed(shmget).\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
address = shmat(shmid, NULL, SHM_RND);
|
||||||
|
|
||||||
|
if ((BLASLONG)address == -1){
|
||||||
|
printf( "Memory allocation failed(shmat).\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
shmctl(shmid, IPC_RMID, 0);
|
||||||
|
|
||||||
|
return address;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define malloc huge_malloc
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
int main(int argc, char *argv[]){
|
||||||
|
|
||||||
|
FLOAT *a, *x, *y;
|
||||||
|
FLOAT alpha[] = {1.0, 1.0};
|
||||||
|
FLOAT beta [] = {0.0, 0.0};
|
||||||
|
blasint k = 1;
|
||||||
|
char uplo='L';
|
||||||
|
blasint m, i, j;
|
||||||
|
blasint inc_x=1, inc_y=1;
|
||||||
|
int loops = 1;
|
||||||
|
int l;
|
||||||
|
char *p;
|
||||||
|
|
||||||
|
int from = 1;
|
||||||
|
int to = 200;
|
||||||
|
int step = 1;
|
||||||
|
|
||||||
|
struct timeval start, stop;
|
||||||
|
double time1,timeg;
|
||||||
|
|
||||||
|
argc--;argv++;
|
||||||
|
|
||||||
|
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||||
|
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||||
|
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||||
|
|
||||||
|
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
|
||||||
|
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
|
||||||
|
if ((p = getenv("OPENBLAS_INCY"))) inc_y = atoi(p);
|
||||||
|
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;
|
||||||
|
if ((p = getenv("OPENBLAS_K"))) k = atoi(p);
|
||||||
|
|
||||||
|
fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = '%c' k = %d Inc_x = %d Inc_y = %d Loops = %d\n",
|
||||||
|
from, to, step, uplo, k, inc_x, inc_y, loops);
|
||||||
|
|
||||||
|
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL) {
|
||||||
|
fprintf(stderr,"Out of Memory!!\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL) {
|
||||||
|
fprintf(stderr,"Out of Memory!!\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL) {
|
||||||
|
fprintf(stderr,"Out of Memory!!\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef linux
|
||||||
|
srandom(getpid());
|
||||||
|
#endif
|
||||||
|
|
||||||
|
fprintf(stderr, " SIZE Flops\n");
|
||||||
|
|
||||||
|
for(m = from; m <= to; m += step) {
|
||||||
|
|
||||||
|
timeg=0;
|
||||||
|
|
||||||
|
fprintf(stderr, " %6dx%d : ", (int)m, (int)m);
|
||||||
|
|
||||||
|
for(j = 0; j < m; j++) {
|
||||||
|
for(i = 0; i < m * COMPSIZE; i++) {
|
||||||
|
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (l = 0; l < loops; l++) {
|
||||||
|
|
||||||
|
for (i = 0; i < m * COMPSIZE * abs(inc_x); i++) {
|
||||||
|
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < m * COMPSIZE * abs(inc_y); i++) {
|
||||||
|
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||||
|
}
|
||||||
|
|
||||||
|
gettimeofday( &start, (struct timezone *)0);
|
||||||
|
|
||||||
|
HBMV (&uplo, &m, &k, alpha, a, &m, x, &inc_x, beta, y, &inc_y );
|
||||||
|
|
||||||
|
gettimeofday( &stop, (struct timezone *)0);
|
||||||
|
|
||||||
|
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||||
|
|
||||||
|
timeg += time1;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
timeg /= loops;
|
||||||
|
|
||||||
|
fprintf(stderr, " %10.2f MFlops\n",
|
||||||
|
COMPSIZE * COMPSIZE * 2. * (double)(2 * k + 1) * (double)m / timeg * 1.e-6);
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
|
@ -164,9 +164,9 @@ int main(int argc, char *argv[]){
|
||||||
|
|
||||||
for(j = 0; j < m; j++){
|
for(j = 0; j < m; j++){
|
||||||
for(i = 0; i < m * COMPSIZE; i++){
|
for(i = 0; i < m * COMPSIZE; i++){
|
||||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||||
b[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
b[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||||
c[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
c[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -167,7 +167,7 @@ int main(int argc, char *argv[]){
|
||||||
|
|
||||||
for(j = 0; j < m; j++){
|
for(j = 0; j < m; j++){
|
||||||
for(i = 0; i < m * COMPSIZE; i++){
|
for(i = 0; i < m * COMPSIZE; i++){
|
||||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -163,9 +163,9 @@ int main(int argc, char *argv[]){
|
||||||
|
|
||||||
for(j = 0; j < m; j++){
|
for(j = 0; j < m; j++){
|
||||||
for(i = 0; i < m * COMPSIZE; i++){
|
for(i = 0; i < m * COMPSIZE; i++){
|
||||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||||
b[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
b[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||||
c[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
c[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -162,8 +162,8 @@ int main(int argc, char *argv[]){
|
||||||
|
|
||||||
for(j = 0; j < m; j++){
|
for(j = 0; j < m; j++){
|
||||||
for(i = 0; i < m * COMPSIZE; i++){
|
for(i = 0; i < m * COMPSIZE; i++){
|
||||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||||
c[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
c[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,207 @@
|
||||||
|
/***************************************************************************
|
||||||
|
Copyright (c) 2014, The OpenBLAS Project
|
||||||
|
All rights reserved.
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
1. Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in
|
||||||
|
the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
3. Neither the name of the OpenBLAS project nor the names of
|
||||||
|
its contributors may be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written permission.
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||||
|
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||||
|
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*****************************************************************************/
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#ifdef __CYGWIN32__
|
||||||
|
#include <sys/time.h>
|
||||||
|
#endif
|
||||||
|
#include "common.h"
|
||||||
|
|
||||||
|
|
||||||
|
#undef HPMV
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef DOUBLE
|
||||||
|
#define HPMV BLASFUNC(zhpmv)
|
||||||
|
#else
|
||||||
|
#define HPMV BLASFUNC(chpmv)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
#if defined(__WIN32__) || defined(__WIN64__)
|
||||||
|
|
||||||
|
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||||
|
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||||
|
#endif
|
||||||
|
|
||||||
|
int gettimeofday(struct timeval *tv, void *tz) {
|
||||||
|
|
||||||
|
FILETIME ft;
|
||||||
|
unsigned __int64 tmpres = 0;
|
||||||
|
static int tzflag;
|
||||||
|
|
||||||
|
if (NULL != tv)
|
||||||
|
{
|
||||||
|
GetSystemTimeAsFileTime(&ft);
|
||||||
|
|
||||||
|
tmpres |= ft.dwHighDateTime;
|
||||||
|
tmpres <<= 32;
|
||||||
|
tmpres |= ft.dwLowDateTime;
|
||||||
|
|
||||||
|
/*converting file time to unix epoch*/
|
||||||
|
tmpres /= 10; /*convert into microseconds*/
|
||||||
|
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||||
|
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||||
|
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||||
|
|
||||||
|
static void *huge_malloc(BLASLONG size) {
|
||||||
|
int shmid;
|
||||||
|
void *address;
|
||||||
|
|
||||||
|
#ifndef SHM_HUGETLB
|
||||||
|
#define SHM_HUGETLB 04000
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if ((shmid =shmget(IPC_PRIVATE,
|
||||||
|
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||||
|
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||||
|
printf( "Memory allocation failed(shmget).\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
address = shmat(shmid, NULL, SHM_RND);
|
||||||
|
|
||||||
|
if ((BLASLONG)address == -1){
|
||||||
|
printf( "Memory allocation failed(shmat).\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
shmctl(shmid, IPC_RMID, 0);
|
||||||
|
|
||||||
|
return address;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define malloc huge_malloc
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
int main(int argc, char *argv[]){
|
||||||
|
|
||||||
|
FLOAT *a, *x, *y;
|
||||||
|
FLOAT alpha[] = {1.0, 1.0};
|
||||||
|
FLOAT beta [] = {1.0, 1.0};
|
||||||
|
char uplo='L';
|
||||||
|
blasint m, i, j;
|
||||||
|
blasint inc_x=1, inc_y=1;
|
||||||
|
int loops = 1;
|
||||||
|
int l;
|
||||||
|
char *p;
|
||||||
|
|
||||||
|
int from = 1;
|
||||||
|
int to = 200;
|
||||||
|
int step = 1;
|
||||||
|
|
||||||
|
struct timeval start, stop;
|
||||||
|
double time1,timeg;
|
||||||
|
|
||||||
|
argc--;argv++;
|
||||||
|
|
||||||
|
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||||
|
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||||
|
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||||
|
|
||||||
|
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
|
||||||
|
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
|
||||||
|
if ((p = getenv("OPENBLAS_INCY"))) inc_y = atoi(p);
|
||||||
|
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;
|
||||||
|
|
||||||
|
fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = '%c' Inc_x = %d Inc_y = %d Loops = %d\n", from, to, step,uplo,inc_x,inc_y,loops);
|
||||||
|
|
||||||
|
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL) {
|
||||||
|
fprintf(stderr,"Out of Memory!!\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL) {
|
||||||
|
fprintf(stderr,"Out of Memory!!\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL) {
|
||||||
|
fprintf(stderr,"Out of Memory!!\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef linux
|
||||||
|
srandom(getpid());
|
||||||
|
#endif
|
||||||
|
|
||||||
|
fprintf(stderr, " SIZE Flops\n");
|
||||||
|
|
||||||
|
for(m = from; m <= to; m += step) {
|
||||||
|
|
||||||
|
timeg=0;
|
||||||
|
|
||||||
|
fprintf(stderr, " %6dx%d : ", (int)m, (int)m);
|
||||||
|
|
||||||
|
for(j = 0; j < m; j++) {
|
||||||
|
for(i = 0; i < m * COMPSIZE; i++) {
|
||||||
|
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (l = 0; l < loops; l++) {
|
||||||
|
|
||||||
|
for (i = 0; i < m * COMPSIZE * abs(inc_x); i++) {
|
||||||
|
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < m * COMPSIZE * abs(inc_y); i++) {
|
||||||
|
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||||
|
}
|
||||||
|
|
||||||
|
gettimeofday( &start, (struct timezone *)0);
|
||||||
|
|
||||||
|
HPMV (&uplo, &m, alpha, a, x, &inc_x, beta, y, &inc_y );
|
||||||
|
|
||||||
|
gettimeofday( &stop, (struct timezone *)0);
|
||||||
|
|
||||||
|
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||||
|
|
||||||
|
timeg += time1;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
timeg /= loops;
|
||||||
|
|
||||||
|
fprintf(stderr, " %10.2f MFlops\n",
|
||||||
|
COMPSIZE * COMPSIZE * 2. * (double)m * (double)m / timeg * 1.e-6);
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
|
@ -186,7 +186,7 @@ int main(int argc, char *argv[]){
|
||||||
|
|
||||||
for(j = 0; j < m; j++){
|
for(j = 0; j < m; j++){
|
||||||
for(i = 0; i < m * COMPSIZE; i++){
|
for(i = 0; i < m * COMPSIZE; i++){
|
||||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -194,7 +194,7 @@ int main(int argc, char *argv[]){
|
||||||
|
|
||||||
for (j = 0; j < m; ++j) {
|
for (j = 0; j < m; ++j) {
|
||||||
for (i = 0; i < m * COMPSIZE; ++i) {
|
for (i = 0; i < m * COMPSIZE; ++i) {
|
||||||
b[i] += a[i + j * m * COMPSIZE];
|
b[i] += a[(long)i + (long)j * (long)m * COMPSIZE];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -170,46 +170,46 @@ int main(int argc, char *argv[]){
|
||||||
#ifndef COMPLEX
|
#ifndef COMPLEX
|
||||||
if (uplos & 1) {
|
if (uplos & 1) {
|
||||||
for (j = 0; j < m; j++) {
|
for (j = 0; j < m; j++) {
|
||||||
for(i = 0; i < j; i++) a[i + j * m] = 0.;
|
for(i = 0; i < j; i++) a[(long)i + (long)j * (long)m] = 0.;
|
||||||
a[j + j * m] = ((double) rand() / (double) RAND_MAX) + 8.;
|
a[(long)j + (long)j * (long)m] = ((double) rand() / (double) RAND_MAX) + 8.;
|
||||||
for(i = j + 1; i < m; i++) a[i + j * m] = ((double) rand() / (double) RAND_MAX) - 0.5;
|
for(i = j + 1; i < m; i++) a[(long)i + (long)j * (long)m] = ((double) rand() / (double) RAND_MAX) - 0.5;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
for (j = 0; j < m; j++) {
|
for (j = 0; j < m; j++) {
|
||||||
for(i = 0; i < j; i++) a[i + j * m] = ((double) rand() / (double) RAND_MAX) - 0.5;
|
for(i = 0; i < j; i++) a[(long)i + (long)j * (long)m] = ((double) rand() / (double) RAND_MAX) - 0.5;
|
||||||
a[j + j * m] = ((double) rand() / (double) RAND_MAX) + 8.;
|
a[(long)j + (long)j * (long)m] = ((double) rand() / (double) RAND_MAX) + 8.;
|
||||||
for(i = j + 1; i < m; i++) a[i + j * m] = 0.;
|
for(i = j + 1; i < m; i++) a[(long)i + (long)j * (long)m] = 0.;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
if (uplos & 1) {
|
if (uplos & 1) {
|
||||||
for (j = 0; j < m; j++) {
|
for (j = 0; j < m; j++) {
|
||||||
for(i = 0; i < j; i++) {
|
for(i = 0; i < j; i++) {
|
||||||
a[(i + j * m) * 2 + 0] = 0.;
|
a[((long)i + (long)j * (long)m) * 2 + 0] = 0.;
|
||||||
a[(i + j * m) * 2 + 1] = 0.;
|
a[((long)i + (long)j * (long)m) * 2 + 1] = 0.;
|
||||||
}
|
}
|
||||||
|
|
||||||
a[(j + j * m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) + 8.;
|
a[((long)j + (long)j * (long)m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) + 8.;
|
||||||
a[(j + j * m) * 2 + 1] = 0.;
|
a[((long)j + (long)j * (long)m) * 2 + 1] = 0.;
|
||||||
|
|
||||||
for(i = j + 1; i < m; i++) {
|
for(i = j + 1; i < m; i++) {
|
||||||
a[(i + j * m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) - 0.5;
|
a[((long)i + (long)j * (long)m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) - 0.5;
|
||||||
a[(i + j * m) * 2 + 1] = ((double) rand() / (double) RAND_MAX) - 0.5;
|
a[((long)i + (long)j * (long)m) * 2 + 1] = ((double) rand() / (double) RAND_MAX) - 0.5;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
for (j = 0; j < m; j++) {
|
for (j = 0; j < m; j++) {
|
||||||
for(i = 0; i < j; i++) {
|
for(i = 0; i < j; i++) {
|
||||||
a[(i + j * m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) - 0.5;
|
a[((long)i + (long)j * (long)m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) - 0.5;
|
||||||
a[(i + j * m) * 2 + 1] = ((double) rand() / (double) RAND_MAX) - 0.5;
|
a[((long)i + (long)j * (long)m) * 2 + 1] = ((double) rand() / (double) RAND_MAX) - 0.5;
|
||||||
}
|
}
|
||||||
|
|
||||||
a[(j + j * m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) + 8.;
|
a[((long)j + (long)j * (long)m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) + 8.;
|
||||||
a[(j + j * m) * 2 + 1] = 0.;
|
a[((long)j + (long)j * (long)m) * 2 + 1] = 0.;
|
||||||
|
|
||||||
for(i = j + 1; i < m; i++) {
|
for(i = j + 1; i < m; i++) {
|
||||||
a[(i + j * m) * 2 + 0] = 0.;
|
a[((long)i + (long)j * (long)m) * 2 + 0] = 0.;
|
||||||
a[(i + j * m) * 2 + 1] = 0.;
|
a[((long)i + (long)j * (long)m) * 2 + 1] = 0.;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,210 @@
|
||||||
|
/***************************************************************************
|
||||||
|
Copyright (c) 2014, The OpenBLAS Project
|
||||||
|
All rights reserved.
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
1. Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in
|
||||||
|
the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
3. Neither the name of the OpenBLAS project nor the names of
|
||||||
|
its contributors may be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written permission.
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
|
||||||
|
GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||||
|
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||||
|
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
|
||||||
|
THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*****************************************************************************/
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#ifdef __CYGWIN32__
|
||||||
|
#include <sys/time.h>
|
||||||
|
#endif
|
||||||
|
#include "common.h"
|
||||||
|
|
||||||
|
#undef ROTM
|
||||||
|
|
||||||
|
#ifdef DOUBLE
|
||||||
|
#define ROTM BLASFUNC(drotm)
|
||||||
|
#else
|
||||||
|
#define ROTM BLASFUNC(srotm)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(__WIN32__) || defined(__WIN64__)
|
||||||
|
|
||||||
|
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||||
|
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||||
|
#endif
|
||||||
|
|
||||||
|
int gettimeofday(struct timeval *tv, void *tz)
|
||||||
|
{
|
||||||
|
|
||||||
|
FILETIME ft;
|
||||||
|
unsigned __int64 tmpres = 0;
|
||||||
|
static int tzflag;
|
||||||
|
|
||||||
|
if (NULL != tv) {
|
||||||
|
GetSystemTimeAsFileTime(&ft);
|
||||||
|
|
||||||
|
tmpres |= ft.dwHighDateTime;
|
||||||
|
tmpres <<= 32;
|
||||||
|
tmpres |= ft.dwLowDateTime;
|
||||||
|
|
||||||
|
/*converting file time to unix epoch*/
|
||||||
|
tmpres /= 10; /*convert into microseconds*/
|
||||||
|
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||||
|
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||||
|
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||||
|
|
||||||
|
static void *huge_malloc(BLASLONG size)
|
||||||
|
{
|
||||||
|
int shmid;
|
||||||
|
void *address;
|
||||||
|
|
||||||
|
#ifndef SHM_HUGETLB
|
||||||
|
#define SHM_HUGETLB 04000
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if ((shmid =
|
||||||
|
shmget(IPC_PRIVATE, (size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||||
|
SHM_HUGETLB | IPC_CREAT | 0600)) < 0) {
|
||||||
|
printf("Memory allocation failed(shmget).\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
address = shmat(shmid, NULL, SHM_RND);
|
||||||
|
|
||||||
|
if ((BLASLONG)address == -1) {
|
||||||
|
printf("Memory allocation failed(shmat).\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
shmctl(shmid, IPC_RMID, 0);
|
||||||
|
|
||||||
|
return address;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define malloc huge_malloc
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
int main(int argc, char *argv[])
|
||||||
|
{
|
||||||
|
|
||||||
|
FLOAT *x, *y;
|
||||||
|
// FLOAT result;
|
||||||
|
blasint m, i;
|
||||||
|
blasint inc_x = 1, inc_y = 1;
|
||||||
|
FLOAT param[5] = {1, 2.0, 3.0, 4.0, 5.0};
|
||||||
|
int loops = 1;
|
||||||
|
int l;
|
||||||
|
char *p;
|
||||||
|
|
||||||
|
int from = 1;
|
||||||
|
int to = 200;
|
||||||
|
int step = 1;
|
||||||
|
|
||||||
|
struct timeval start, stop;
|
||||||
|
double time1, timeg;
|
||||||
|
|
||||||
|
argc--;
|
||||||
|
argv++;
|
||||||
|
|
||||||
|
if (argc > 0) {
|
||||||
|
from = atol(*argv);
|
||||||
|
argc--;
|
||||||
|
argv++;
|
||||||
|
}
|
||||||
|
if (argc > 0) {
|
||||||
|
to = MAX(atol(*argv), from);
|
||||||
|
argc--;
|
||||||
|
argv++;
|
||||||
|
}
|
||||||
|
if (argc > 0) {
|
||||||
|
step = atol(*argv);
|
||||||
|
argc--;
|
||||||
|
argv++;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((p = getenv("OPENBLAS_LOOPS")))
|
||||||
|
loops = atoi(p);
|
||||||
|
if ((p = getenv("OPENBLAS_INCX")))
|
||||||
|
inc_x = atoi(p);
|
||||||
|
if ((p = getenv("OPENBLAS_INCY")))
|
||||||
|
inc_y = atoi(p);
|
||||||
|
|
||||||
|
fprintf(
|
||||||
|
stderr,
|
||||||
|
"From : %3d To : %3d Step = %3d Inc_x = %d Inc_y = %d Loops = %d\n",
|
||||||
|
from, to, step, inc_x, inc_y, loops);
|
||||||
|
|
||||||
|
if ((x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) ==
|
||||||
|
NULL) {
|
||||||
|
fprintf(stderr, "Out of Memory!!\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) ==
|
||||||
|
NULL) {
|
||||||
|
fprintf(stderr, "Out of Memory!!\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef linux
|
||||||
|
srandom(getpid());
|
||||||
|
#endif
|
||||||
|
|
||||||
|
fprintf(stderr, " SIZE Flops\n");
|
||||||
|
|
||||||
|
for (m = from; m <= to; m += step) {
|
||||||
|
|
||||||
|
timeg = 0;
|
||||||
|
|
||||||
|
fprintf(stderr, " %6d : ", (int)m);
|
||||||
|
for (i = 0; i < m * COMPSIZE * abs(inc_x); i++) {
|
||||||
|
x[i] = ((FLOAT)rand() / (FLOAT)RAND_MAX) - 0.5;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < m * COMPSIZE * abs(inc_y); i++) {
|
||||||
|
y[i] = ((FLOAT)rand() / (FLOAT)RAND_MAX) - 0.5;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (l = 0; l < loops; l++) {
|
||||||
|
gettimeofday(&start, (struct timezone *)0);
|
||||||
|
|
||||||
|
ROTM(&m, x, &inc_x, y, &inc_y, param);
|
||||||
|
|
||||||
|
gettimeofday(&stop, (struct timezone *)0);
|
||||||
|
|
||||||
|
time1 = (double)(stop.tv_sec - start.tv_sec) +
|
||||||
|
(double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||||
|
|
||||||
|
timeg += time1;
|
||||||
|
}
|
||||||
|
|
||||||
|
timeg /= loops;
|
||||||
|
|
||||||
|
fprintf(stderr, " %10.2f MFlops %10.6f sec\n",
|
||||||
|
COMPSIZE * COMPSIZE * 6. * (double)m / timeg * 1.e-6, timeg);
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
|
@ -0,0 +1,198 @@
|
||||||
|
/***************************************************************************
|
||||||
|
Copyright (c) 2014, The OpenBLAS Project
|
||||||
|
All rights reserved.
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
1. Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in
|
||||||
|
the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
3. Neither the name of the OpenBLAS project nor the names of
|
||||||
|
its contributors may be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written permission.
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||||
|
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||||
|
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*****************************************************************************/
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#ifdef __CYGWIN32__
|
||||||
|
#include <sys/time.h>
|
||||||
|
#endif
|
||||||
|
#include "common.h"
|
||||||
|
|
||||||
|
|
||||||
|
#undef SPR
|
||||||
|
|
||||||
|
#ifdef DOUBLE
|
||||||
|
#define SPR BLASFUNC(dspr)
|
||||||
|
#else
|
||||||
|
#define SPR BLASFUNC(sspr)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#if defined(__WIN32__) || defined(__WIN64__)
|
||||||
|
|
||||||
|
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||||
|
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||||
|
#endif
|
||||||
|
|
||||||
|
int gettimeofday(struct timeval *tv, void *tz){
|
||||||
|
|
||||||
|
FILETIME ft;
|
||||||
|
unsigned __int64 tmpres = 0;
|
||||||
|
static int tzflag;
|
||||||
|
|
||||||
|
if (NULL != tv)
|
||||||
|
{
|
||||||
|
GetSystemTimeAsFileTime(&ft);
|
||||||
|
|
||||||
|
tmpres |= ft.dwHighDateTime;
|
||||||
|
tmpres <<= 32;
|
||||||
|
tmpres |= ft.dwLowDateTime;
|
||||||
|
|
||||||
|
/*converting file time to unix epoch*/
|
||||||
|
tmpres /= 10; /*convert into microseconds*/
|
||||||
|
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||||
|
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||||
|
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||||
|
|
||||||
|
static void *huge_malloc(BLASLONG size){
|
||||||
|
int shmid;
|
||||||
|
void *address;
|
||||||
|
|
||||||
|
#ifndef SHM_HUGETLB
|
||||||
|
#define SHM_HUGETLB 04000
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if ((shmid =shmget(IPC_PRIVATE,
|
||||||
|
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||||
|
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||||
|
printf( "Memory allocation failed(shmget).\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
address = shmat(shmid, NULL, SHM_RND);
|
||||||
|
|
||||||
|
if ((BLASLONG)address == -1){
|
||||||
|
printf( "Memory allocation failed(shmat).\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
shmctl(shmid, IPC_RMID, 0);
|
||||||
|
|
||||||
|
return address;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define malloc huge_malloc
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
int main(int argc, char *argv[]){
|
||||||
|
|
||||||
|
FLOAT *a,*c;
|
||||||
|
FLOAT alpha[] = {1.0, 1.0};
|
||||||
|
blasint inc_x=1;
|
||||||
|
int loops = 1;
|
||||||
|
int l;
|
||||||
|
char *p;
|
||||||
|
|
||||||
|
char uplo='U';
|
||||||
|
|
||||||
|
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;
|
||||||
|
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
|
||||||
|
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
|
||||||
|
|
||||||
|
blasint m, i, j;
|
||||||
|
|
||||||
|
int from = 1;
|
||||||
|
int to = 200;
|
||||||
|
int step = 1;
|
||||||
|
|
||||||
|
struct timeval start, stop;
|
||||||
|
double time1,timeg;
|
||||||
|
|
||||||
|
argc--;argv++;
|
||||||
|
|
||||||
|
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||||
|
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||||
|
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||||
|
|
||||||
|
fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = %c Inc_x = %d\n", from, to, step,uplo,inc_x);
|
||||||
|
|
||||||
|
|
||||||
|
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||||
|
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (( c = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
|
||||||
|
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef linux
|
||||||
|
srandom(getpid());
|
||||||
|
#endif
|
||||||
|
|
||||||
|
fprintf(stderr, " SIZE Flops Time\n");
|
||||||
|
|
||||||
|
for(m = from; m <= to; m += step)
|
||||||
|
{
|
||||||
|
timeg=0;
|
||||||
|
|
||||||
|
fprintf(stderr, " %6d : ", (int)m);
|
||||||
|
|
||||||
|
for (l=0; l<loops; l++)
|
||||||
|
{
|
||||||
|
|
||||||
|
for(j = 0; j < m; j++){
|
||||||
|
for(i = 0; i < m * COMPSIZE; i++){
|
||||||
|
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
|
||||||
|
c[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||||
|
}
|
||||||
|
|
||||||
|
gettimeofday( &start, (struct timezone *)0);
|
||||||
|
|
||||||
|
SPR (&uplo, &m, alpha, c, &inc_x, a);
|
||||||
|
|
||||||
|
gettimeofday( &stop, (struct timezone *)0);
|
||||||
|
|
||||||
|
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||||
|
|
||||||
|
timeg += time1;
|
||||||
|
}
|
||||||
|
|
||||||
|
timeg /= loops;
|
||||||
|
|
||||||
|
fprintf(stderr,
|
||||||
|
" %10.2f MBytes %10.6f sec\n",
|
||||||
|
COMPSIZE * COMPSIZE * 1. * (double)m * (double)m / timeg * 1.e-6, timeg);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
|
@ -0,0 +1,207 @@
|
||||||
|
/***************************************************************************
|
||||||
|
Copyright (c) 2014, The OpenBLAS Project
|
||||||
|
All rights reserved.
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
1. Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in
|
||||||
|
the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
3. Neither the name of the OpenBLAS project nor the names of
|
||||||
|
its contributors may be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written permission.
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||||
|
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||||
|
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*****************************************************************************/
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#ifdef __CYGWIN32__
|
||||||
|
#include <sys/time.h>
|
||||||
|
#endif
|
||||||
|
#include "common.h"
|
||||||
|
|
||||||
|
|
||||||
|
#undef SPR2
|
||||||
|
|
||||||
|
#ifdef DOUBLE
|
||||||
|
#define SPR2 BLASFUNC(dspr2)
|
||||||
|
#else
|
||||||
|
#define SPR2 BLASFUNC(sspr2)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#if defined(__WIN32__) || defined(__WIN64__)
|
||||||
|
|
||||||
|
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||||
|
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||||
|
#endif
|
||||||
|
|
||||||
|
int gettimeofday(struct timeval *tv, void *tz){
|
||||||
|
|
||||||
|
FILETIME ft;
|
||||||
|
unsigned __int64 tmpres = 0;
|
||||||
|
static int tzflag;
|
||||||
|
|
||||||
|
if (NULL != tv)
|
||||||
|
{
|
||||||
|
GetSystemTimeAsFileTime(&ft);
|
||||||
|
|
||||||
|
tmpres |= ft.dwHighDateTime;
|
||||||
|
tmpres <<= 32;
|
||||||
|
tmpres |= ft.dwLowDateTime;
|
||||||
|
|
||||||
|
/*converting file time to unix epoch*/
|
||||||
|
tmpres /= 10; /*convert into microseconds*/
|
||||||
|
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||||
|
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||||
|
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||||
|
|
||||||
|
static void *huge_malloc(BLASLONG size){
|
||||||
|
int shmid;
|
||||||
|
void *address;
|
||||||
|
|
||||||
|
#ifndef SHM_HUGETLB
|
||||||
|
#define SHM_HUGETLB 04000
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if ((shmid =shmget(IPC_PRIVATE,
|
||||||
|
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||||
|
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||||
|
printf( "Memory allocation failed(shmget).\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
address = shmat(shmid, NULL, SHM_RND);
|
||||||
|
|
||||||
|
if ((BLASLONG)address == -1){
|
||||||
|
printf( "Memory allocation failed(shmat).\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
shmctl(shmid, IPC_RMID, 0);
|
||||||
|
|
||||||
|
return address;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define malloc huge_malloc
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
int main(int argc, char *argv[]){
|
||||||
|
|
||||||
|
FLOAT *a,*b,*c;
|
||||||
|
FLOAT alpha[] = {1.0, 1.0};
|
||||||
|
blasint inc_x=1,inc_y=1;
|
||||||
|
int loops = 1;
|
||||||
|
int l;
|
||||||
|
char *p;
|
||||||
|
|
||||||
|
char uplo='U';
|
||||||
|
|
||||||
|
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;
|
||||||
|
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
|
||||||
|
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
|
||||||
|
|
||||||
|
blasint m, i, j;
|
||||||
|
|
||||||
|
int from = 1;
|
||||||
|
int to = 200;
|
||||||
|
int step = 1;
|
||||||
|
|
||||||
|
struct timeval start, stop;
|
||||||
|
double time1,timeg;
|
||||||
|
|
||||||
|
argc--;argv++;
|
||||||
|
|
||||||
|
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||||
|
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||||
|
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||||
|
|
||||||
|
fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = %c Inc_x = %d Inc_y = %d\n", from, to, step,uplo,inc_x,inc_y);
|
||||||
|
|
||||||
|
|
||||||
|
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||||
|
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (( b = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL){
|
||||||
|
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (( c = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
|
||||||
|
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef linux
|
||||||
|
srandom(getpid());
|
||||||
|
#endif
|
||||||
|
|
||||||
|
fprintf(stderr, " SIZE Flops Time\n");
|
||||||
|
|
||||||
|
for(m = from; m <= to; m += step)
|
||||||
|
{
|
||||||
|
timeg=0;
|
||||||
|
|
||||||
|
fprintf(stderr, " %6d : ", (int)m);
|
||||||
|
|
||||||
|
for (l=0; l<loops; l++)
|
||||||
|
{
|
||||||
|
|
||||||
|
for(j = 0; j < m; j++){
|
||||||
|
for(i = 0; i < m * COMPSIZE; i++){
|
||||||
|
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
||||||
|
b[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||||
|
}
|
||||||
|
|
||||||
|
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
|
||||||
|
c[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||||
|
}
|
||||||
|
|
||||||
|
gettimeofday( &start, (struct timezone *)0);
|
||||||
|
|
||||||
|
SPR2 (&uplo, &m, alpha, c, &inc_x, b, &inc_y, a);
|
||||||
|
|
||||||
|
gettimeofday( &stop, (struct timezone *)0);
|
||||||
|
|
||||||
|
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||||
|
|
||||||
|
timeg += time1;
|
||||||
|
}
|
||||||
|
|
||||||
|
timeg /= loops;
|
||||||
|
|
||||||
|
fprintf(stderr,
|
||||||
|
" %10.2f MBytes %10.6f sec\n",
|
||||||
|
COMPSIZE * COMPSIZE * 2. * (double)m * (double)m / timeg * 1.e-6, timeg);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
|
@ -175,9 +175,9 @@ int main(int argc, char *argv[]){
|
||||||
|
|
||||||
for(j = 0; j < m; j++){
|
for(j = 0; j < m; j++){
|
||||||
for(i = 0; i < m * COMPSIZE; i++){
|
for(i = 0; i < m * COMPSIZE; i++){
|
||||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||||
b[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
b[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||||
c[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
c[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -177,7 +177,7 @@ int main(int argc, char *argv[]){
|
||||||
|
|
||||||
for(j = 0; j < m; j++){
|
for(j = 0; j < m; j++){
|
||||||
for(i = 0; i < m * COMPSIZE; i++){
|
for(i = 0; i < m * COMPSIZE; i++){
|
||||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -173,11 +173,9 @@ int main(int argc, char *argv[]){
|
||||||
|
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||||
|
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
|
||||||
|
|
||||||
fprintf(stderr,
|
fprintf(stderr,
|
||||||
" %10.2f MFlops\n",
|
" %10.2f MFlops\n",
|
||||||
COMPSIZE * COMPSIZE * 1. * (double)m * (double)m * (double)m / time1 * 1.e-6);
|
COMPSIZE * COMPSIZE * 1. * (double)m * (double)m / time1 * 1.e-6);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,194 @@
|
||||||
|
/***************************************************************************
|
||||||
|
Copyright (c) 2014, The OpenBLAS Project
|
||||||
|
All rights reserved.
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
1. Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in
|
||||||
|
the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
3. Neither the name of the OpenBLAS project nor the names of
|
||||||
|
its contributors may be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written permission.
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||||
|
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||||
|
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*****************************************************************************/
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#ifdef __CYGWIN32__
|
||||||
|
#include <sys/time.h>
|
||||||
|
#endif
|
||||||
|
#include "common.h"
|
||||||
|
|
||||||
|
|
||||||
|
#undef SYR2
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef DOUBLE
|
||||||
|
#define SYR2 BLASFUNC(dsyr2)
|
||||||
|
#else
|
||||||
|
#define SYR2 BLASFUNC(ssyr2)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
#if defined(__WIN32__) || defined(__WIN64__)
|
||||||
|
|
||||||
|
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||||
|
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||||
|
#endif
|
||||||
|
|
||||||
|
int gettimeofday(struct timeval *tv, void *tz){
|
||||||
|
|
||||||
|
FILETIME ft;
|
||||||
|
unsigned __int64 tmpres = 0;
|
||||||
|
static int tzflag;
|
||||||
|
|
||||||
|
if (NULL != tv)
|
||||||
|
{
|
||||||
|
GetSystemTimeAsFileTime(&ft);
|
||||||
|
|
||||||
|
tmpres |= ft.dwHighDateTime;
|
||||||
|
tmpres <<= 32;
|
||||||
|
tmpres |= ft.dwLowDateTime;
|
||||||
|
|
||||||
|
/*converting file time to unix epoch*/
|
||||||
|
tmpres /= 10; /*convert into microseconds*/
|
||||||
|
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||||
|
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||||
|
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||||
|
|
||||||
|
static void *huge_malloc(BLASLONG size){
|
||||||
|
int shmid;
|
||||||
|
void *address;
|
||||||
|
|
||||||
|
#ifndef SHM_HUGETLB
|
||||||
|
#define SHM_HUGETLB 04000
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if ((shmid =shmget(IPC_PRIVATE,
|
||||||
|
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||||
|
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||||
|
printf( "Memory allocation failed(shmget).\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
address = shmat(shmid, NULL, SHM_RND);
|
||||||
|
|
||||||
|
if ((BLASLONG)address == -1){
|
||||||
|
printf( "Memory allocation failed(shmat).\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
shmctl(shmid, IPC_RMID, 0);
|
||||||
|
|
||||||
|
return address;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define malloc huge_malloc
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
int main(int argc, char *argv[]){
|
||||||
|
|
||||||
|
FLOAT *x, *y, *a;
|
||||||
|
FLOAT alpha[] = {1.0, 1.0};
|
||||||
|
char *p;
|
||||||
|
|
||||||
|
char uplo='U';
|
||||||
|
|
||||||
|
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;
|
||||||
|
|
||||||
|
blasint m, i, j;
|
||||||
|
blasint inc_x= 1;
|
||||||
|
blasint inc_y= 1;
|
||||||
|
int from = 1;
|
||||||
|
int to = 200;
|
||||||
|
int step = 1;
|
||||||
|
|
||||||
|
struct timeval start, stop;
|
||||||
|
double time1;
|
||||||
|
|
||||||
|
argc--;argv++;
|
||||||
|
|
||||||
|
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||||
|
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||||
|
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||||
|
|
||||||
|
fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = %c Inc_x = %d Inc_y = %d\n", from, to, step,uplo,inc_x,inc_y);
|
||||||
|
|
||||||
|
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||||
|
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
|
||||||
|
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL){
|
||||||
|
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef linux
|
||||||
|
srandom(getpid());
|
||||||
|
#endif
|
||||||
|
|
||||||
|
fprintf(stderr, " SIZE Flops\n");
|
||||||
|
|
||||||
|
for(m = from; m <= to; m += step)
|
||||||
|
{
|
||||||
|
|
||||||
|
fprintf(stderr, " %6d : ", (int)m);
|
||||||
|
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
|
||||||
|
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||||
|
}
|
||||||
|
|
||||||
|
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
||||||
|
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||||
|
}
|
||||||
|
|
||||||
|
for(j = 0; j < m; j++){
|
||||||
|
for(i = 0; i < m * COMPSIZE; i++){
|
||||||
|
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
gettimeofday( &start, (struct timezone *)0);
|
||||||
|
|
||||||
|
SYR2 (&uplo, &m, alpha, x, &inc_x, y, &inc_y, a, &m );
|
||||||
|
|
||||||
|
gettimeofday( &stop, (struct timezone *)0);
|
||||||
|
|
||||||
|
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||||
|
|
||||||
|
fprintf(stderr,
|
||||||
|
" %10.2f MFlops\n",
|
||||||
|
COMPSIZE * COMPSIZE * 2. * (double)m * (double)m / time1 * 1.e-6);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
|
@ -175,9 +175,9 @@ int main(int argc, char *argv[]){
|
||||||
|
|
||||||
for(j = 0; j < m; j++){
|
for(j = 0; j < m; j++){
|
||||||
for(i = 0; i < m * COMPSIZE; i++){
|
for(i = 0; i < m * COMPSIZE; i++){
|
||||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||||
b[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
b[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||||
c[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
c[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -172,8 +172,8 @@ int main(int argc, char *argv[]){
|
||||||
|
|
||||||
for(j = 0; j < m; j++){
|
for(j = 0; j < m; j++){
|
||||||
for(i = 0; i < m * COMPSIZE; i++){
|
for(i = 0; i < m * COMPSIZE; i++){
|
||||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||||
c[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
c[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -175,8 +175,8 @@ int main(int argc, char *argv[]){
|
||||||
|
|
||||||
for(j = 0; j < m; j++){
|
for(j = 0; j < m; j++){
|
||||||
for(i = 0; i < m * COMPSIZE; i++){
|
for(i = 0; i < m * COMPSIZE; i++){
|
||||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||||
b[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
b[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,172 @@
|
||||||
|
/***************************************************************************
|
||||||
|
Copyright (c) 2014, The OpenBLAS Project
|
||||||
|
All rights reserved.
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
1. Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in
|
||||||
|
the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
3. Neither the name of the OpenBLAS project nor the names of
|
||||||
|
its contributors may be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written permission.
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||||
|
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||||
|
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*****************************************************************************/
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#ifdef __CYGWIN32__
|
||||||
|
#include <sys/time.h>
|
||||||
|
#endif
|
||||||
|
#include "common.h"
|
||||||
|
|
||||||
|
#undef TRMV
|
||||||
|
|
||||||
|
#ifndef COMPLEX
|
||||||
|
|
||||||
|
#ifdef DOUBLE
|
||||||
|
#define TRMV BLASFUNC(dtrmv)
|
||||||
|
#else
|
||||||
|
#define TRMV BLASFUNC(strmv)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
#ifdef DOUBLE
|
||||||
|
#define TRMV BLASFUNC(ztrmv)
|
||||||
|
#else
|
||||||
|
#define TRMV BLASFUNC(ctrmv)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||||
|
|
||||||
|
static void *huge_malloc(BLASLONG size)
|
||||||
|
{
|
||||||
|
int shmid;
|
||||||
|
void *address;
|
||||||
|
|
||||||
|
#ifndef SHM_HUGETLB
|
||||||
|
#define SHM_HUGETLB 04000
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if ((shmid =shmget(IPC_PRIVATE,
|
||||||
|
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||||
|
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||||
|
printf( "Memory allocation failed(shmget).\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
address = shmat(shmid, NULL, SHM_RND);
|
||||||
|
|
||||||
|
if ((BLASLONG)address == -1) {
|
||||||
|
printf( "Memory allocation failed(shmat).\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
shmctl(shmid, IPC_RMID, 0);
|
||||||
|
|
||||||
|
return address;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define malloc huge_malloc
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
int main(int argc, char *argv[])
|
||||||
|
{
|
||||||
|
|
||||||
|
FLOAT *a, *x;
|
||||||
|
char *p;
|
||||||
|
|
||||||
|
char uplo ='U';
|
||||||
|
char trans='N';
|
||||||
|
char diag ='U';
|
||||||
|
|
||||||
|
int loops = 1;
|
||||||
|
int l;
|
||||||
|
blasint inc_x=1;
|
||||||
|
|
||||||
|
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;
|
||||||
|
if ((p = getenv("OPENBLAS_TRANS"))) trans=*p;
|
||||||
|
if ((p = getenv("OPENBLAS_DIAG"))) diag=*p;
|
||||||
|
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
|
||||||
|
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
|
||||||
|
|
||||||
|
blasint n, i, j;
|
||||||
|
|
||||||
|
int from = 1;
|
||||||
|
int to = 200;
|
||||||
|
int step = 1;
|
||||||
|
|
||||||
|
struct timespec start = { 0, 0 }, stop = { 0, 0 };
|
||||||
|
double time1, timeg;
|
||||||
|
|
||||||
|
argc--;argv++;
|
||||||
|
|
||||||
|
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||||
|
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||||
|
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||||
|
|
||||||
|
fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = %c Trans = %c Diag = %c Loops=%d Inc_x=%d\n", from,
|
||||||
|
to, step, uplo, trans, diag, loops, inc_x);
|
||||||
|
|
||||||
|
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL) {
|
||||||
|
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
|
||||||
|
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef linux
|
||||||
|
srandom(getpid());
|
||||||
|
#endif
|
||||||
|
|
||||||
|
fprintf(stderr, " SIZE Flops\n");
|
||||||
|
|
||||||
|
for(n = from; n <= to; n += step) {
|
||||||
|
timeg=0;
|
||||||
|
|
||||||
|
fprintf(stderr, " %6d : ", (int)n);
|
||||||
|
for(j = 0; j < n; j++) {
|
||||||
|
for(i = 0; i < n * COMPSIZE; i++) {
|
||||||
|
a[(long)i + (long)j * (long)n * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < n * COMPSIZE * abs(inc_x); i++) {
|
||||||
|
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (l = 0; l < loops; l++) {
|
||||||
|
clock_gettime(CLOCK_REALTIME, &start);
|
||||||
|
TRMV (&uplo, &trans, &diag, &n, a, &n, x, &inc_x);
|
||||||
|
clock_gettime(CLOCK_REALTIME, &stop);
|
||||||
|
|
||||||
|
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_nsec - start.tv_nsec)) / 1.e9;
|
||||||
|
timeg += time1;
|
||||||
|
}
|
||||||
|
|
||||||
|
timeg /= loops;
|
||||||
|
fprintf(stderr, " %10.2f MFlops %12.9f sec\n",
|
||||||
|
COMPSIZE * COMPSIZE * 1. * (double)n * (double)n / timeg / 1.e6, timeg);
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
|
@ -191,8 +191,8 @@ int main(int argc, char *argv[]){
|
||||||
|
|
||||||
for(j = 0; j < m; j++){
|
for(j = 0; j < m; j++){
|
||||||
for(i = 0; i < m * COMPSIZE; i++){
|
for(i = 0; i < m * COMPSIZE; i++){
|
||||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||||
b[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
b[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,222 @@
|
||||||
|
/***************************************************************************
|
||||||
|
Copyright (c) 2014, The OpenBLAS Project
|
||||||
|
All rights reserved.
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
1. Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in
|
||||||
|
the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
3. Neither the name of the OpenBLAS project nor the names of
|
||||||
|
its contributors may be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written permission.
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||||
|
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||||
|
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*****************************************************************************/
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#ifdef __CYGWIN32__
|
||||||
|
#include <sys/time.h>
|
||||||
|
#endif
|
||||||
|
#include <time.h>
|
||||||
|
#include "common.h"
|
||||||
|
|
||||||
|
|
||||||
|
#undef GEMV
|
||||||
|
#undef TRSV
|
||||||
|
|
||||||
|
#ifndef COMPLEX
|
||||||
|
|
||||||
|
#ifdef DOUBLE
|
||||||
|
#define TRSV BLASFUNC(dtrsv)
|
||||||
|
#else
|
||||||
|
#define TRSV BLASFUNC(strsv)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
#ifdef DOUBLE
|
||||||
|
#define TRSV BLASFUNC(ztrsv)
|
||||||
|
#else
|
||||||
|
#define TRSV BLASFUNC(ctrsv)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(__WIN32__) || defined(__WIN64__)
|
||||||
|
|
||||||
|
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||||
|
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||||
|
#endif
|
||||||
|
|
||||||
|
int gettimeofday(struct timeval *tv, void *tz){
|
||||||
|
|
||||||
|
FILETIME ft;
|
||||||
|
unsigned __int64 tmpres = 0;
|
||||||
|
static int tzflag;
|
||||||
|
|
||||||
|
if (NULL != tv)
|
||||||
|
{
|
||||||
|
GetSystemTimeAsFileTime(&ft);
|
||||||
|
|
||||||
|
tmpres |= ft.dwHighDateTime;
|
||||||
|
tmpres <<= 32;
|
||||||
|
tmpres |= ft.dwLowDateTime;
|
||||||
|
|
||||||
|
/*converting file time to unix epoch*/
|
||||||
|
tmpres /= 10; /*convert into microseconds*/
|
||||||
|
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||||
|
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||||
|
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||||
|
|
||||||
|
static void *huge_malloc(BLASLONG size){
|
||||||
|
int shmid;
|
||||||
|
void *address;
|
||||||
|
|
||||||
|
#ifndef SHM_HUGETLB
|
||||||
|
#define SHM_HUGETLB 04000
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if ((shmid =shmget(IPC_PRIVATE,
|
||||||
|
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||||
|
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||||
|
printf( "Memory allocation failed(shmget).\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
address = shmat(shmid, NULL, SHM_RND);
|
||||||
|
|
||||||
|
if ((BLASLONG)address == -1){
|
||||||
|
printf( "Memory allocation failed(shmat).\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
shmctl(shmid, IPC_RMID, 0);
|
||||||
|
|
||||||
|
return address;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define malloc huge_malloc
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
int main(int argc, char *argv[]){
|
||||||
|
|
||||||
|
FLOAT *a, *x;
|
||||||
|
blasint n = 0, i, j;
|
||||||
|
blasint inc_x=1;
|
||||||
|
int loops = 1;
|
||||||
|
int l;
|
||||||
|
char *p;
|
||||||
|
|
||||||
|
int from = 1;
|
||||||
|
int to = 200;
|
||||||
|
int step = 1;
|
||||||
|
|
||||||
|
struct timespec time_start, time_end;
|
||||||
|
time_t seconds = 0;
|
||||||
|
|
||||||
|
double time1,timeg;
|
||||||
|
long long nanos = 0;
|
||||||
|
|
||||||
|
argc--;argv++;
|
||||||
|
|
||||||
|
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||||
|
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||||
|
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||||
|
|
||||||
|
char uplo ='L';
|
||||||
|
char transa = 'N';
|
||||||
|
char diag ='U';
|
||||||
|
|
||||||
|
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
|
||||||
|
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
|
||||||
|
if ((p = getenv("OPENBLAS_TRANSA"))) transa=*p;
|
||||||
|
if ((p = getenv("OPENBLAS_DIAG"))) diag=*p;
|
||||||
|
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;
|
||||||
|
|
||||||
|
fprintf(stderr, "From : %3d To : %3d Step = %3d Transa = '%c' Inc_x = %d uplo=%c diag=%c loop = %d\n", from, to, step,transa,inc_x,
|
||||||
|
uplo,diag,loops);
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef linux
|
||||||
|
srandom(getpid());
|
||||||
|
#endif
|
||||||
|
|
||||||
|
fprintf(stderr, " SIZE Flops\n");
|
||||||
|
fprintf(stderr, "============================================\n");
|
||||||
|
|
||||||
|
for(n = from; n <= to; n += step)
|
||||||
|
{
|
||||||
|
timeg=0;
|
||||||
|
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * n * n * COMPSIZE)) == NULL){
|
||||||
|
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * n * abs(inc_x) * COMPSIZE)) == NULL){
|
||||||
|
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
for(j = 0; j < n; j++){
|
||||||
|
for(i = 0; i < n * COMPSIZE; i++){
|
||||||
|
a[i + j * n * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for(i = 0; i < n * COMPSIZE * abs(inc_x); i++){
|
||||||
|
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||||
|
}
|
||||||
|
|
||||||
|
for(l =0;l< loops;l++){
|
||||||
|
|
||||||
|
clock_gettime(CLOCK_PROCESS_CPUTIME_ID,&time_start);
|
||||||
|
|
||||||
|
TRSV(&uplo,&transa,&diag,&n,a,&n,x,&inc_x);
|
||||||
|
|
||||||
|
clock_gettime(CLOCK_PROCESS_CPUTIME_ID,&time_end);
|
||||||
|
nanos = time_end.tv_nsec - time_start.tv_nsec;
|
||||||
|
seconds = time_end.tv_sec - time_start.tv_sec;
|
||||||
|
|
||||||
|
time1 = seconds + nanos /1.e9;
|
||||||
|
timeg += time1;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
timeg /= loops;
|
||||||
|
long long muls = n*(n+1)/2.0;
|
||||||
|
long long adds = (n - 1.0)*n/2.0;
|
||||||
|
|
||||||
|
fprintf(stderr, "%10d %10.2f MFlops %10.6f sec\n", n,(muls+adds) / timeg * 1.e-6, timeg);
|
||||||
|
if(a != NULL){
|
||||||
|
free(a);
|
||||||
|
}
|
||||||
|
|
||||||
|
if( x != NULL){
|
||||||
|
free(x);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
|
@ -99,7 +99,7 @@ endif ()
|
||||||
if (${CORE} STREQUAL "SKYLAKEX")
|
if (${CORE} STREQUAL "SKYLAKEX")
|
||||||
if (NOT DYNAMIC_ARCH)
|
if (NOT DYNAMIC_ARCH)
|
||||||
if (NOT NO_AVX512)
|
if (NOT NO_AVX512)
|
||||||
set (CCOMMON_OPT = "${CCOMMON_OPT} -march=skylake-avx512")
|
set (CCOMMON_OPT "${CCOMMON_OPT} -march=skylake-avx512")
|
||||||
endif ()
|
endif ()
|
||||||
endif ()
|
endif ()
|
||||||
endif ()
|
endif ()
|
||||||
|
|
|
@ -140,6 +140,16 @@ typedef struct {
|
||||||
|
|
||||||
} thread_status_t;
|
} thread_status_t;
|
||||||
|
|
||||||
|
#if (__STDC_VERSION__ >= 201112L)
|
||||||
|
#define atomic_load_queue(p) __atomic_load_n(p, __ATOMIC_RELAXED)
|
||||||
|
#define atomic_store_queue(p, v) __atomic_store_n(p, v, __ATOMIC_RELAXED)
|
||||||
|
#else
|
||||||
|
#define atomic_load_queue(p) (blas_queue_t*)(*(volatile blas_queue_t**)(p))
|
||||||
|
#define atomic_store_queue(p, v) (*(volatile blas_queue_t* volatile*)(p) = (v))
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
static thread_status_t thread_status[MAX_CPU_NUMBER] __attribute__((aligned(ATTRIBUTE_SIZE)));
|
static thread_status_t thread_status[MAX_CPU_NUMBER] __attribute__((aligned(ATTRIBUTE_SIZE)));
|
||||||
|
|
||||||
#ifndef THREAD_TIMEOUT
|
#ifndef THREAD_TIMEOUT
|
||||||
|
@ -312,20 +322,19 @@ blas_queue_t *tscq;
|
||||||
|
|
||||||
last_tick = (unsigned int)rpcc();
|
last_tick = (unsigned int)rpcc();
|
||||||
|
|
||||||
pthread_mutex_lock (&thread_status[cpu].lock);
|
tscq = atomic_load_queue(&thread_status[cpu].queue);
|
||||||
tscq=thread_status[cpu].queue;
|
|
||||||
pthread_mutex_unlock (&thread_status[cpu].lock);
|
|
||||||
|
|
||||||
while(!tscq) {
|
while(!tscq) {
|
||||||
YIELDING;
|
YIELDING;
|
||||||
|
|
||||||
if ((unsigned int)rpcc() - last_tick > thread_timeout) {
|
if ((unsigned int)rpcc() - last_tick > thread_timeout) {
|
||||||
|
|
||||||
pthread_mutex_lock (&thread_status[cpu].lock);
|
|
||||||
|
|
||||||
if (!thread_status[cpu].queue) {
|
if (!atomic_load_queue(&thread_status[cpu].queue)) {
|
||||||
|
pthread_mutex_lock (&thread_status[cpu].lock);
|
||||||
thread_status[cpu].status = THREAD_STATUS_SLEEP;
|
thread_status[cpu].status = THREAD_STATUS_SLEEP;
|
||||||
while (thread_status[cpu].status == THREAD_STATUS_SLEEP) {
|
while (thread_status[cpu].status == THREAD_STATUS_SLEEP &&
|
||||||
|
!atomic_load_queue(&thread_status[cpu].queue)) {
|
||||||
|
|
||||||
#ifdef MONITOR
|
#ifdef MONITOR
|
||||||
main_status[cpu] = MAIN_SLEEPING;
|
main_status[cpu] = MAIN_SLEEPING;
|
||||||
|
@ -333,19 +342,18 @@ blas_queue_t *tscq;
|
||||||
|
|
||||||
pthread_cond_wait(&thread_status[cpu].wakeup, &thread_status[cpu].lock);
|
pthread_cond_wait(&thread_status[cpu].wakeup, &thread_status[cpu].lock);
|
||||||
}
|
}
|
||||||
|
pthread_mutex_unlock(&thread_status[cpu].lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
pthread_mutex_unlock(&thread_status[cpu].lock);
|
|
||||||
|
|
||||||
last_tick = (unsigned int)rpcc();
|
last_tick = (unsigned int)rpcc();
|
||||||
}
|
}
|
||||||
pthread_mutex_lock (&thread_status[cpu].lock);
|
|
||||||
tscq=thread_status[cpu].queue;
|
tscq = atomic_load_queue(&thread_status[cpu].queue);
|
||||||
pthread_mutex_unlock (&thread_status[cpu].lock);
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
queue = thread_status[cpu].queue;
|
queue = atomic_load_queue(&thread_status[cpu].queue);
|
||||||
|
MB;
|
||||||
|
|
||||||
if ((long)queue == -1) break;
|
if ((long)queue == -1) break;
|
||||||
|
|
||||||
|
@ -360,9 +368,7 @@ blas_queue_t *tscq;
|
||||||
if (queue) {
|
if (queue) {
|
||||||
int (*routine)(blas_arg_t *, void *, void *, void *, void *, BLASLONG) = queue -> routine;
|
int (*routine)(blas_arg_t *, void *, void *, void *, void *, BLASLONG) = queue -> routine;
|
||||||
|
|
||||||
pthread_mutex_lock (&thread_status[cpu].lock);
|
atomic_store_queue(&thread_status[cpu].queue, (blas_queue_t *)1);
|
||||||
thread_status[cpu].queue = (blas_queue_t *)1;
|
|
||||||
pthread_mutex_unlock (&thread_status[cpu].lock);
|
|
||||||
|
|
||||||
sa = queue -> sa;
|
sa = queue -> sa;
|
||||||
sb = queue -> sb;
|
sb = queue -> sb;
|
||||||
|
@ -442,13 +448,9 @@ blas_queue_t *tscq;
|
||||||
|
|
||||||
// arm: make sure all results are written out _before_
|
// arm: make sure all results are written out _before_
|
||||||
// thread is marked as done and other threads use them
|
// thread is marked as done and other threads use them
|
||||||
WMB;
|
MB;
|
||||||
|
atomic_store_queue(&thread_status[cpu].queue, (blas_queue_t *)0);
|
||||||
|
|
||||||
pthread_mutex_lock (&thread_status[cpu].lock);
|
|
||||||
thread_status[cpu].queue = (blas_queue_t * volatile) ((long)thread_status[cpu].queue & 0); /* Need a trick */
|
|
||||||
pthread_mutex_unlock (&thread_status[cpu].lock);
|
|
||||||
|
|
||||||
WMB;
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -566,7 +568,7 @@ int blas_thread_init(void){
|
||||||
|
|
||||||
for(i = 0; i < blas_num_threads - 1; i++){
|
for(i = 0; i < blas_num_threads - 1; i++){
|
||||||
|
|
||||||
thread_status[i].queue = (blas_queue_t *)NULL;
|
atomic_store_queue(&thread_status[i].queue, (blas_queue_t *)0);
|
||||||
thread_status[i].status = THREAD_STATUS_WAKEUP;
|
thread_status[i].status = THREAD_STATUS_WAKEUP;
|
||||||
|
|
||||||
pthread_mutex_init(&thread_status[i].lock, NULL);
|
pthread_mutex_init(&thread_status[i].lock, NULL);
|
||||||
|
@ -655,7 +657,8 @@ int exec_blas_async(BLASLONG pos, blas_queue_t *queue){
|
||||||
if (queue -> mode & BLAS_NODE) {
|
if (queue -> mode & BLAS_NODE) {
|
||||||
|
|
||||||
do {
|
do {
|
||||||
while((thread_status[i].node != node || thread_status[i].queue) && (i < blas_num_threads - 1)) i ++;
|
|
||||||
|
while((thread_status[i].node != node || atomic_load_queue(&thread_status[i].queue)) && (i < blas_num_threads - 1)) i ++;
|
||||||
|
|
||||||
if (i < blas_num_threads - 1) break;
|
if (i < blas_num_threads - 1) break;
|
||||||
|
|
||||||
|
@ -669,36 +672,26 @@ int exec_blas_async(BLASLONG pos, blas_queue_t *queue){
|
||||||
} while (1);
|
} while (1);
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
pthread_mutex_lock (&thread_status[i].lock);
|
tsiq = atomic_load_queue(&thread_status[i].queue);
|
||||||
tsiq = thread_status[i].queue;
|
|
||||||
pthread_mutex_unlock (&thread_status[i].lock);
|
|
||||||
while(tsiq) {
|
while(tsiq) {
|
||||||
i ++;
|
i ++;
|
||||||
if (i >= blas_num_threads - 1) i = 0;
|
if (i >= blas_num_threads - 1) i = 0;
|
||||||
pthread_mutex_lock (&thread_status[i].lock);
|
tsiq = atomic_load_queue(&thread_status[i].queue);
|
||||||
tsiq = thread_status[i].queue;
|
|
||||||
pthread_mutex_unlock (&thread_status[i].lock);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
pthread_mutex_lock (&thread_status[i].lock);
|
tsiq = atomic_load_queue(&thread_status[i].queue);
|
||||||
tsiq=thread_status[i].queue ;
|
|
||||||
pthread_mutex_unlock (&thread_status[i].lock);
|
|
||||||
while(tsiq) {
|
while(tsiq) {
|
||||||
i ++;
|
i ++;
|
||||||
if (i >= blas_num_threads - 1) i = 0;
|
if (i >= blas_num_threads - 1) i = 0;
|
||||||
pthread_mutex_lock (&thread_status[i].lock);
|
tsiq = atomic_load_queue(&thread_status[i].queue);
|
||||||
tsiq=thread_status[i].queue ;
|
|
||||||
pthread_mutex_unlock (&thread_status[i].lock);
|
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
queue -> assigned = i;
|
queue -> assigned = i;
|
||||||
WMB;
|
MB;
|
||||||
pthread_mutex_lock (&thread_status[i].lock);
|
|
||||||
thread_status[i].queue = queue;
|
atomic_store_queue(&thread_status[i].queue, queue);
|
||||||
pthread_mutex_unlock (&thread_status[i].lock);
|
|
||||||
WMB;
|
|
||||||
|
|
||||||
queue = queue -> next;
|
queue = queue -> next;
|
||||||
pos ++;
|
pos ++;
|
||||||
|
@ -718,9 +711,7 @@ int exec_blas_async(BLASLONG pos, blas_queue_t *queue){
|
||||||
|
|
||||||
pos = current -> assigned;
|
pos = current -> assigned;
|
||||||
|
|
||||||
pthread_mutex_lock (&thread_status[pos].lock);
|
tspq = atomic_load_queue(&thread_status[pos].queue);
|
||||||
tspq=thread_status[pos].queue;
|
|
||||||
pthread_mutex_unlock (&thread_status[pos].lock);
|
|
||||||
|
|
||||||
if ((BLASULONG)tspq > 1) {
|
if ((BLASULONG)tspq > 1) {
|
||||||
pthread_mutex_lock (&thread_status[pos].lock);
|
pthread_mutex_lock (&thread_status[pos].lock);
|
||||||
|
@ -752,24 +743,20 @@ int exec_blas_async_wait(BLASLONG num, blas_queue_t *queue){
|
||||||
|
|
||||||
while ((num > 0) && queue) {
|
while ((num > 0) && queue) {
|
||||||
|
|
||||||
pthread_mutex_lock(&thread_status[queue->assigned].lock);
|
tsqq = atomic_load_queue(&thread_status[queue->assigned].queue);
|
||||||
tsqq=thread_status[queue -> assigned].queue;
|
|
||||||
pthread_mutex_unlock(&thread_status[queue->assigned].lock);
|
|
||||||
|
|
||||||
|
|
||||||
while(tsqq) {
|
while(tsqq) {
|
||||||
YIELDING;
|
YIELDING;
|
||||||
pthread_mutex_lock(&thread_status[queue->assigned].lock);
|
tsqq = atomic_load_queue(&thread_status[queue->assigned].queue);
|
||||||
tsqq=thread_status[queue -> assigned].queue;
|
|
||||||
pthread_mutex_unlock(&thread_status[queue->assigned].lock);
|
|
||||||
|
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
queue = queue -> next;
|
queue = queue -> next;
|
||||||
num --;
|
num --;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
MB;
|
||||||
|
|
||||||
#ifdef SMP_DEBUG
|
#ifdef SMP_DEBUG
|
||||||
fprintf(STDERR, "Done.\n\n");
|
fprintf(STDERR, "Done.\n\n");
|
||||||
#endif
|
#endif
|
||||||
|
@ -880,7 +867,7 @@ void goto_set_num_threads(int num_threads) {
|
||||||
|
|
||||||
for(i = blas_num_threads - 1; i < num_threads - 1; i++){
|
for(i = blas_num_threads - 1; i < num_threads - 1; i++){
|
||||||
|
|
||||||
thread_status[i].queue = (blas_queue_t *)NULL;
|
atomic_store_queue(&thread_status[i].queue, (blas_queue_t *)0);
|
||||||
thread_status[i].status = THREAD_STATUS_WAKEUP;
|
thread_status[i].status = THREAD_STATUS_WAKEUP;
|
||||||
|
|
||||||
pthread_mutex_init(&thread_status[i].lock, NULL);
|
pthread_mutex_init(&thread_status[i].lock, NULL);
|
||||||
|
@ -971,12 +958,11 @@ int BLASFUNC(blas_thread_shutdown)(void){
|
||||||
|
|
||||||
for (i = 0; i < blas_num_threads - 1; i++) {
|
for (i = 0; i < blas_num_threads - 1; i++) {
|
||||||
|
|
||||||
|
|
||||||
pthread_mutex_lock (&thread_status[i].lock);
|
pthread_mutex_lock (&thread_status[i].lock);
|
||||||
|
|
||||||
thread_status[i].queue = (blas_queue_t *)-1;
|
atomic_store_queue(&thread_status[i].queue, (blas_queue_t *)-1);
|
||||||
|
|
||||||
thread_status[i].status = THREAD_STATUS_WAKEUP;
|
thread_status[i].status = THREAD_STATUS_WAKEUP;
|
||||||
|
|
||||||
pthread_cond_signal (&thread_status[i].wakeup);
|
pthread_cond_signal (&thread_status[i].wakeup);
|
||||||
|
|
||||||
pthread_mutex_unlock(&thread_status[i].lock);
|
pthread_mutex_unlock(&thread_status[i].lock);
|
||||||
|
|
|
@ -3,7 +3,7 @@
|
||||||
|
|
||||||
extern gotoblas_t gotoblas_POWER6;
|
extern gotoblas_t gotoblas_POWER6;
|
||||||
extern gotoblas_t gotoblas_POWER8;
|
extern gotoblas_t gotoblas_POWER8;
|
||||||
#if (!defined C_GCC) || (GCC_VERSION >= 60000)
|
#if (!defined __GNUC__) || ( __GNUC__ >= 6)
|
||||||
extern gotoblas_t gotoblas_POWER9;
|
extern gotoblas_t gotoblas_POWER9;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -21,7 +21,7 @@ static char *corename[] = {
|
||||||
char *gotoblas_corename(void) {
|
char *gotoblas_corename(void) {
|
||||||
if (gotoblas == &gotoblas_POWER6) return corename[1];
|
if (gotoblas == &gotoblas_POWER6) return corename[1];
|
||||||
if (gotoblas == &gotoblas_POWER8) return corename[2];
|
if (gotoblas == &gotoblas_POWER8) return corename[2];
|
||||||
#if (!defined C_GCC) || (GCC_VERSION >= 60000)
|
#if (!defined __GNUC__) || ( __GNUC__ >= 6)
|
||||||
if (gotoblas == &gotoblas_POWER9) return corename[3];
|
if (gotoblas == &gotoblas_POWER9) return corename[3];
|
||||||
#endif
|
#endif
|
||||||
return corename[0];
|
return corename[0];
|
||||||
|
@ -33,7 +33,7 @@ static gotoblas_t *get_coretype(void) {
|
||||||
return &gotoblas_POWER6;
|
return &gotoblas_POWER6;
|
||||||
if (__builtin_cpu_is("power8"))
|
if (__builtin_cpu_is("power8"))
|
||||||
return &gotoblas_POWER8;
|
return &gotoblas_POWER8;
|
||||||
#if (!defined C_GCC) || (GCC_VERSION >= 60000)
|
#if (!defined __GNUC__) || ( __GNUC__ >= 6)
|
||||||
if (__builtin_cpu_is("power9"))
|
if (__builtin_cpu_is("power9"))
|
||||||
return &gotoblas_POWER9;
|
return &gotoblas_POWER9;
|
||||||
#endif
|
#endif
|
||||||
|
@ -59,7 +59,7 @@ static gotoblas_t *force_coretype(char * coretype) {
|
||||||
{
|
{
|
||||||
case 1: return (&gotoblas_POWER6);
|
case 1: return (&gotoblas_POWER6);
|
||||||
case 2: return (&gotoblas_POWER8);
|
case 2: return (&gotoblas_POWER8);
|
||||||
#if (!defined C_GCC) || (GCC_VERSION >= 60000)
|
#if (!defined __GNUC__) || ( __GNUC__ >= 6)
|
||||||
case 3: return (&gotoblas_POWER9);
|
case 3: return (&gotoblas_POWER9);
|
||||||
#endif
|
#endif
|
||||||
default: return NULL;
|
default: return NULL;
|
||||||
|
|
|
@ -1,3 +1,7 @@
|
||||||
|
ifeq ($(__BYTE_ORDER__),__ORDER_BIG_ENDIAN__)
|
||||||
|
include $(KERNELDIR)/KERNEL.POWER8
|
||||||
|
else
|
||||||
|
|
||||||
#SGEMM_BETA = ../generic/gemm_beta.c
|
#SGEMM_BETA = ../generic/gemm_beta.c
|
||||||
#DGEMM_BETA = ../generic/gemm_beta.c
|
#DGEMM_BETA = ../generic/gemm_beta.c
|
||||||
#CGEMM_BETA = ../generic/zgemm_beta.c
|
#CGEMM_BETA = ../generic/zgemm_beta.c
|
||||||
|
@ -206,3 +210,5 @@ QCABS_KERNEL = ../generic/cabs.c
|
||||||
#Dump kernel
|
#Dump kernel
|
||||||
CGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c
|
CGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c
|
||||||
ZGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c
|
ZGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c
|
||||||
|
|
||||||
|
endif
|
||||||
|
|
|
@ -13,7 +13,11 @@
|
||||||
|
|
||||||
PROLOGUE
|
PROLOGUE
|
||||||
|
|
||||||
|
#ifdef CONJ
|
||||||
|
caxpyc_k:
|
||||||
|
#else
|
||||||
caxpy_k:
|
caxpy_k:
|
||||||
|
#endif
|
||||||
.LCF0:
|
.LCF0:
|
||||||
0: addis 2,12,.TOC.-.LCF0@ha
|
0: addis 2,12,.TOC.-.LCF0@ha
|
||||||
addi 2,2,.TOC.-.LCF0@l
|
addi 2,2,.TOC.-.LCF0@l
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
/*
|
||||||
.file "icamax.c"
|
.file "icamax.c"
|
||||||
.abiversion 2
|
.abiversion 2
|
||||||
.section ".text"
|
.section ".text"
|
||||||
|
@ -5,6 +6,12 @@
|
||||||
.p2align 4,,15
|
.p2align 4,,15
|
||||||
.globl icamax_k
|
.globl icamax_k
|
||||||
.type icamax_k, @function
|
.type icamax_k, @function
|
||||||
|
*/
|
||||||
|
#define ASSEMBLER
|
||||||
|
#include "common.h"
|
||||||
|
|
||||||
|
PROLOGUE
|
||||||
|
|
||||||
icamax_k:
|
icamax_k:
|
||||||
.LCF0:
|
.LCF0:
|
||||||
0: addis 2,12,.TOC.-.LCF0@ha
|
0: addis 2,12,.TOC.-.LCF0@ha
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
/*
|
||||||
.file "icamin.c"
|
.file "icamin.c"
|
||||||
.abiversion 2
|
.abiversion 2
|
||||||
.section ".text"
|
.section ".text"
|
||||||
|
@ -5,6 +6,12 @@
|
||||||
.p2align 4,,15
|
.p2align 4,,15
|
||||||
.globl icamin_k
|
.globl icamin_k
|
||||||
.type icamin_k, @function
|
.type icamin_k, @function
|
||||||
|
*/
|
||||||
|
#define ASSEMBLER
|
||||||
|
#include "common.h"
|
||||||
|
|
||||||
|
PROLOGUE
|
||||||
|
|
||||||
icamin_k:
|
icamin_k:
|
||||||
.LCF0:
|
.LCF0:
|
||||||
0: addis 2,12,.TOC.-.LCF0@ha
|
0: addis 2,12,.TOC.-.LCF0@ha
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
/*
|
||||||
.file "isamax.c"
|
.file "isamax.c"
|
||||||
.abiversion 2
|
.abiversion 2
|
||||||
.section ".text"
|
.section ".text"
|
||||||
|
@ -5,6 +6,12 @@
|
||||||
.p2align 4,,15
|
.p2align 4,,15
|
||||||
.globl isamax_k
|
.globl isamax_k
|
||||||
.type isamax_k, @function
|
.type isamax_k, @function
|
||||||
|
*/
|
||||||
|
#define ASSEMBLER
|
||||||
|
#include "common.h"
|
||||||
|
|
||||||
|
PROLOGUE
|
||||||
|
|
||||||
isamax_k:
|
isamax_k:
|
||||||
.LCF0:
|
.LCF0:
|
||||||
0: addis 2,12,.TOC.-.LCF0@ha
|
0: addis 2,12,.TOC.-.LCF0@ha
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
/*
|
||||||
.file "isamin.c"
|
.file "isamin.c"
|
||||||
.abiversion 2
|
.abiversion 2
|
||||||
.section ".text"
|
.section ".text"
|
||||||
|
@ -5,6 +6,12 @@
|
||||||
.p2align 4,,15
|
.p2align 4,,15
|
||||||
.globl isamin_k
|
.globl isamin_k
|
||||||
.type isamin_k, @function
|
.type isamin_k, @function
|
||||||
|
*/
|
||||||
|
#define ASSEMBLER
|
||||||
|
#include "common.h"
|
||||||
|
|
||||||
|
PROLOGUE
|
||||||
|
|
||||||
isamin_k:
|
isamin_k:
|
||||||
.LCF0:
|
.LCF0:
|
||||||
0: addis 2,12,.TOC.-.LCF0@ha
|
0: addis 2,12,.TOC.-.LCF0@ha
|
||||||
|
|
|
@ -68,23 +68,14 @@ double sqrt(double);
|
||||||
#define GETRF_FACTOR 1.00
|
#define GETRF_FACTOR 1.00
|
||||||
|
|
||||||
|
|
||||||
#if defined(USE_PTHREAD_LOCK)
|
#if (__STDC_VERSION__ >= 201112L)
|
||||||
static pthread_mutex_t getrf_lock = PTHREAD_MUTEX_INITIALIZER;
|
#define atomic_load_long(p) __atomic_load_n(p, __ATOMIC_RELAXED)
|
||||||
#elif defined(USE_PTHREAD_SPINLOCK)
|
#define atomic_store_long(p, v) __atomic_store_n(p, v, __ATOMIC_RELAXED)
|
||||||
static pthread_spinlock_t getrf_lock = 0;
|
|
||||||
#else
|
#else
|
||||||
static BLASULONG getrf_lock = 0UL;
|
#define atomic_load_long(p) (BLASLONG)(*(volatile BLASLONG*)(p))
|
||||||
|
#define atomic_store_long(p, v) (*(volatile BLASLONG *)(p)) = (v)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(USE_PTHREAD_LOCK)
|
|
||||||
static pthread_mutex_t getrf_flag_lock = PTHREAD_MUTEX_INITIALIZER;
|
|
||||||
#elif defined(USE_PTHREAD_SPINLOCK)
|
|
||||||
static pthread_spinlock_t getrf_flag_lock = 0;
|
|
||||||
#else
|
|
||||||
static BLASULONG getrf_flag_lock = 0UL;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
static __inline BLASLONG FORMULA1(BLASLONG M, BLASLONG N, BLASLONG IS, BLASLONG BK, BLASLONG T) {
|
static __inline BLASLONG FORMULA1(BLASLONG M, BLASLONG N, BLASLONG IS, BLASLONG BK, BLASLONG T) {
|
||||||
|
@ -119,11 +110,7 @@ static void inner_basic_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *ra
|
||||||
FLOAT *d = (FLOAT *)args -> b + (k + k * lda) * COMPSIZE;
|
FLOAT *d = (FLOAT *)args -> b + (k + k * lda) * COMPSIZE;
|
||||||
FLOAT *sbb = sb;
|
FLOAT *sbb = sb;
|
||||||
|
|
||||||
#if __STDC_VERSION__ >= 201112L
|
|
||||||
_Atomic BLASLONG *flag = (_Atomic BLASLONG *)args -> d;
|
|
||||||
#else
|
|
||||||
volatile BLASLONG *flag = (volatile BLASLONG *)args -> d;
|
volatile BLASLONG *flag = (volatile BLASLONG *)args -> d;
|
||||||
#endif
|
|
||||||
|
|
||||||
blasint *ipiv = (blasint *)args -> c;
|
blasint *ipiv = (blasint *)args -> c;
|
||||||
|
|
||||||
|
@ -180,7 +167,10 @@ static void inner_basic_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *ra
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((js + REAL_GEMM_R >= n) && (mypos >= 0)) flag[mypos * CACHE_LINE_SIZE] = 0;
|
if ((js + REAL_GEMM_R >= n) && (mypos >= 0)) {
|
||||||
|
MB;
|
||||||
|
atomic_store_long(&flag[mypos * CACHE_LINE_SIZE], 0);
|
||||||
|
}
|
||||||
|
|
||||||
for (is = 0; is < m; is += GEMM_P){
|
for (is = 0; is < m; is += GEMM_P){
|
||||||
min_i = m - is;
|
min_i = m - is;
|
||||||
|
@ -201,14 +191,10 @@ static void inner_basic_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *ra
|
||||||
/* Non blocking implementation */
|
/* Non blocking implementation */
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
#if __STDC_VERSION__ >= 201112L
|
volatile BLASLONG working[MAX_CPU_NUMBER][CACHE_LINE_SIZE * DIVIDE_RATE];
|
||||||
_Atomic
|
|
||||||
#else
|
|
||||||
volatile
|
|
||||||
#endif
|
|
||||||
BLASLONG working[MAX_CPU_NUMBER][CACHE_LINE_SIZE * DIVIDE_RATE];
|
|
||||||
} job_t;
|
} job_t;
|
||||||
|
|
||||||
|
|
||||||
#define ICOPY_OPERATION(M, N, A, LDA, X, Y, BUFFER) GEMM_ITCOPY(M, N, (FLOAT *)(A) + ((Y) + (X) * (LDA)) * COMPSIZE, LDA, BUFFER);
|
#define ICOPY_OPERATION(M, N, A, LDA, X, Y, BUFFER) GEMM_ITCOPY(M, N, (FLOAT *)(A) + ((Y) + (X) * (LDA)) * COMPSIZE, LDA, BUFFER);
|
||||||
#define OCOPY_OPERATION(M, N, A, LDA, X, Y, BUFFER) GEMM_ONCOPY(M, N, (FLOAT *)(A) + ((X) + (Y) * (LDA)) * COMPSIZE, LDA, BUFFER);
|
#define OCOPY_OPERATION(M, N, A, LDA, X, Y, BUFFER) GEMM_ONCOPY(M, N, (FLOAT *)(A) + ((X) + (Y) * (LDA)) * COMPSIZE, LDA, BUFFER);
|
||||||
|
|
||||||
|
@ -246,11 +232,8 @@ static int inner_advanced_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *
|
||||||
|
|
||||||
blasint *ipiv = (blasint *)args -> c;
|
blasint *ipiv = (blasint *)args -> c;
|
||||||
BLASLONG jw;
|
BLASLONG jw;
|
||||||
#if __STDC_VERSION__ >= 201112L
|
|
||||||
_Atomic BLASLONG *flag = (_Atomic BLASLONG *)args -> d;
|
|
||||||
#else
|
|
||||||
volatile BLASLONG *flag = (volatile BLASLONG *)args -> d;
|
volatile BLASLONG *flag = (volatile BLASLONG *)args -> d;
|
||||||
#endif
|
|
||||||
if (args -> a == NULL) {
|
if (args -> a == NULL) {
|
||||||
TRSM_ILTCOPY(k, k, (FLOAT *)args -> b, lda, 0, sb);
|
TRSM_ILTCOPY(k, k, (FLOAT *)args -> b, lda, 0, sb);
|
||||||
sbb = (FLOAT *)((((BLASULONG)(sb + k * k * COMPSIZE) + GEMM_ALIGN) & ~GEMM_ALIGN) + GEMM_OFFSET_B);
|
sbb = (FLOAT *)((((BLASULONG)(sb + k * k * COMPSIZE) + GEMM_ALIGN) & ~GEMM_ALIGN) + GEMM_OFFSET_B);
|
||||||
|
@ -280,10 +263,9 @@ static int inner_advanced_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *
|
||||||
#if 1
|
#if 1
|
||||||
{
|
{
|
||||||
do {
|
do {
|
||||||
LOCK_COMMAND(&getrf_lock);
|
jw = atomic_load_long(&job[mypos].working[i][CACHE_LINE_SIZE * bufferside]);
|
||||||
jw = job[mypos].working[i][CACHE_LINE_SIZE * bufferside];
|
|
||||||
UNLOCK_COMMAND(&getrf_lock);
|
|
||||||
} while (jw);
|
} while (jw);
|
||||||
|
MB;
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
while (job[mypos].working[i][CACHE_LINE_SIZE * bufferside]) {};
|
while (job[mypos].working[i][CACHE_LINE_SIZE * bufferside]) {};
|
||||||
|
@ -326,21 +308,17 @@ static int inner_advanced_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *
|
||||||
}
|
}
|
||||||
MB;
|
MB;
|
||||||
for (i = 0; i < args -> nthreads; i++) {
|
for (i = 0; i < args -> nthreads; i++) {
|
||||||
LOCK_COMMAND(&getrf_lock);
|
atomic_store_long(&job[mypos].working[i][CACHE_LINE_SIZE * bufferside], (BLASLONG)buffer[bufferside]);
|
||||||
job[mypos].working[i][CACHE_LINE_SIZE * bufferside] = (BLASLONG)buffer[bufferside];
|
|
||||||
UNLOCK_COMMAND(&getrf_lock);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
LOCK_COMMAND(&getrf_flag_lock);
|
MB;
|
||||||
flag[mypos * CACHE_LINE_SIZE] = 0;
|
atomic_store_long(&flag[mypos * CACHE_LINE_SIZE], 0);
|
||||||
UNLOCK_COMMAND(&getrf_flag_lock);
|
|
||||||
|
|
||||||
if (m == 0) {
|
if (m == 0) {
|
||||||
|
MB;
|
||||||
for (xxx = 0; xxx < DIVIDE_RATE; xxx++) {
|
for (xxx = 0; xxx < DIVIDE_RATE; xxx++) {
|
||||||
LOCK_COMMAND(&getrf_lock);
|
atomic_store_long(&job[mypos].working[mypos][CACHE_LINE_SIZE * xxx], 0);
|
||||||
job[mypos].working[mypos][CACHE_LINE_SIZE * xxx] = 0;
|
|
||||||
UNLOCK_COMMAND(&getrf_lock);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -366,10 +344,9 @@ static int inner_advanced_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *
|
||||||
if ((current != mypos) && (!is)) {
|
if ((current != mypos) && (!is)) {
|
||||||
#if 1
|
#if 1
|
||||||
do {
|
do {
|
||||||
LOCK_COMMAND(&getrf_lock);
|
jw = atomic_load_long(&job[current].working[mypos][CACHE_LINE_SIZE * bufferside]);
|
||||||
jw = job[current].working[mypos][CACHE_LINE_SIZE * bufferside];
|
} while (jw == 0);
|
||||||
UNLOCK_COMMAND(&getrf_lock);
|
MB;
|
||||||
} while (jw == 0);
|
|
||||||
#else
|
#else
|
||||||
while(job[current].working[mypos][CACHE_LINE_SIZE * bufferside] == 0) {};
|
while(job[current].working[mypos][CACHE_LINE_SIZE * bufferside] == 0) {};
|
||||||
#endif
|
#endif
|
||||||
|
@ -381,9 +358,7 @@ static int inner_advanced_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *
|
||||||
|
|
||||||
MB;
|
MB;
|
||||||
if (is + min_i >= m) {
|
if (is + min_i >= m) {
|
||||||
LOCK_COMMAND(&getrf_lock);
|
atomic_store_long(&job[current].working[mypos][CACHE_LINE_SIZE * bufferside], 0);
|
||||||
job[current].working[mypos][CACHE_LINE_SIZE * bufferside] = 0;
|
|
||||||
UNLOCK_COMMAND(&getrf_lock);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -397,10 +372,9 @@ static int inner_advanced_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *
|
||||||
for (xxx = 0; xxx < DIVIDE_RATE; xxx++) {
|
for (xxx = 0; xxx < DIVIDE_RATE; xxx++) {
|
||||||
#if 1
|
#if 1
|
||||||
do {
|
do {
|
||||||
LOCK_COMMAND(&getrf_lock);
|
jw = atomic_load_long(&job[mypos].working[i][CACHE_LINE_SIZE *xxx]);
|
||||||
jw = job[mypos].working[i][CACHE_LINE_SIZE *xxx];
|
|
||||||
UNLOCK_COMMAND(&getrf_lock);
|
|
||||||
} while(jw != 0);
|
} while(jw != 0);
|
||||||
|
MB;
|
||||||
#else
|
#else
|
||||||
while (job[mypos].working[i][CACHE_LINE_SIZE * xxx] ) {};
|
while (job[mypos].working[i][CACHE_LINE_SIZE * xxx] ) {};
|
||||||
#endif
|
#endif
|
||||||
|
@ -443,12 +417,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
|
||||||
#ifdef _MSC_VER
|
#ifdef _MSC_VER
|
||||||
BLASLONG flag[MAX_CPU_NUMBER * CACHE_LINE_SIZE];
|
BLASLONG flag[MAX_CPU_NUMBER * CACHE_LINE_SIZE];
|
||||||
#else
|
#else
|
||||||
#if __STDC_VERSION__ >= 201112L
|
volatile BLASLONG flag[MAX_CPU_NUMBER * CACHE_LINE_SIZE] __attribute__((aligned(128)));
|
||||||
_Atomic
|
|
||||||
#else
|
|
||||||
volatile
|
|
||||||
#endif
|
|
||||||
BLASLONG flag[MAX_CPU_NUMBER * CACHE_LINE_SIZE] __attribute__((aligned(128)));
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef COMPLEX
|
#ifndef COMPLEX
|
||||||
|
@ -543,7 +512,11 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
|
||||||
if (width > mn - is - bk) width = mn - is - bk;
|
if (width > mn - is - bk) width = mn - is - bk;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (num_cpu > 0) exec_blas_async_wait(num_cpu, &queue[0]);
|
|
||||||
|
if (num_cpu > 0) {
|
||||||
|
WMB;
|
||||||
|
exec_blas_async_wait(num_cpu, &queue[0]);
|
||||||
|
}
|
||||||
|
|
||||||
mm = m - bk - is;
|
mm = m - bk - is;
|
||||||
nn = n - bk - is;
|
nn = n - bk - is;
|
||||||
|
@ -608,7 +581,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
|
||||||
queue[num_cpu].sa = NULL;
|
queue[num_cpu].sa = NULL;
|
||||||
queue[num_cpu].sb = NULL;
|
queue[num_cpu].sb = NULL;
|
||||||
queue[num_cpu].next = &queue[num_cpu + 1];
|
queue[num_cpu].next = &queue[num_cpu + 1];
|
||||||
flag[num_cpu * CACHE_LINE_SIZE] = 1;
|
atomic_store_long(&flag[num_cpu * CACHE_LINE_SIZE], 1);
|
||||||
|
|
||||||
num_cpu ++;
|
num_cpu ++;
|
||||||
|
|
||||||
|
@ -637,6 +610,8 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
|
||||||
if (num_cpu > 0) {
|
if (num_cpu > 0) {
|
||||||
queue[num_cpu - 1].next = NULL;
|
queue[num_cpu - 1].next = NULL;
|
||||||
|
|
||||||
|
WMB;
|
||||||
|
|
||||||
exec_blas_async(0, &queue[0]);
|
exec_blas_async(0, &queue[0]);
|
||||||
|
|
||||||
inner_basic_thread(&newarg, NULL, range_n_mine, sa, sbb, -1);
|
inner_basic_thread(&newarg, NULL, range_n_mine, sa, sbb, -1);
|
||||||
|
@ -647,14 +622,10 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
|
||||||
|
|
||||||
for (i = 0; i < num_cpu; i ++) {
|
for (i = 0; i < num_cpu; i ++) {
|
||||||
#if 1
|
#if 1
|
||||||
LOCK_COMMAND(&getrf_flag_lock);
|
do {
|
||||||
f=flag[i*CACHE_LINE_SIZE];
|
f = atomic_load_long(&flag[i*CACHE_LINE_SIZE]);
|
||||||
UNLOCK_COMMAND(&getrf_flag_lock);
|
} while (f != 0);
|
||||||
while (f!=0) {
|
MB;
|
||||||
LOCK_COMMAND(&getrf_flag_lock);
|
|
||||||
f=flag[i*CACHE_LINE_SIZE];
|
|
||||||
UNLOCK_COMMAND(&getrf_flag_lock);
|
|
||||||
};
|
|
||||||
#else
|
#else
|
||||||
while (flag[i*CACHE_LINE_SIZE]) {};
|
while (flag[i*CACHE_LINE_SIZE]) {};
|
||||||
#endif
|
#endif
|
||||||
|
@ -719,12 +690,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
|
||||||
BLASLONG range[MAX_CPU_NUMBER + 1];
|
BLASLONG range[MAX_CPU_NUMBER + 1];
|
||||||
|
|
||||||
BLASLONG width, nn, num_cpu;
|
BLASLONG width, nn, num_cpu;
|
||||||
#if __STDC_VERSION__ >= 201112L
|
volatile BLASLONG flag[MAX_CPU_NUMBER * CACHE_LINE_SIZE] __attribute__((aligned(128)));
|
||||||
_Atomic
|
|
||||||
#else
|
|
||||||
volatile
|
|
||||||
#endif
|
|
||||||
BLASLONG flag[MAX_CPU_NUMBER * CACHE_LINE_SIZE] __attribute__((aligned(128)));
|
|
||||||
|
|
||||||
#ifndef COMPLEX
|
#ifndef COMPLEX
|
||||||
#ifdef XDOUBLE
|
#ifdef XDOUBLE
|
||||||
|
@ -833,6 +799,8 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
|
||||||
nn = n - bk - is;
|
nn = n - bk - is;
|
||||||
if (width > nn) width = nn;
|
if (width > nn) width = nn;
|
||||||
|
|
||||||
|
WMB;
|
||||||
|
|
||||||
if (num_cpu > 1) exec_blas_async_wait(num_cpu - 1, &queue[1]);
|
if (num_cpu > 1) exec_blas_async_wait(num_cpu - 1, &queue[1]);
|
||||||
|
|
||||||
range[0] = 0;
|
range[0] = 0;
|
||||||
|
@ -867,7 +835,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
|
||||||
queue[num_cpu].sa = NULL;
|
queue[num_cpu].sa = NULL;
|
||||||
queue[num_cpu].sb = NULL;
|
queue[num_cpu].sb = NULL;
|
||||||
queue[num_cpu].next = &queue[num_cpu + 1];
|
queue[num_cpu].next = &queue[num_cpu + 1];
|
||||||
flag[num_cpu * CACHE_LINE_SIZE] = 1;
|
atomic_store_long(&flag[num_cpu * CACHE_LINE_SIZE], 1);
|
||||||
|
|
||||||
num_cpu ++;
|
num_cpu ++;
|
||||||
}
|
}
|
||||||
|
@ -882,6 +850,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
|
||||||
range_n_new[0] = offset + is;
|
range_n_new[0] = offset + is;
|
||||||
range_n_new[1] = offset + is + bk;
|
range_n_new[1] = offset + is + bk;
|
||||||
|
|
||||||
|
WMB;
|
||||||
if (num_cpu > 1) {
|
if (num_cpu > 1) {
|
||||||
|
|
||||||
exec_blas_async(1, &queue[1]);
|
exec_blas_async(1, &queue[1]);
|
||||||
|
@ -917,7 +886,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
for (i = 1; i < num_cpu; i ++) while (flag[i * CACHE_LINE_SIZE]) {};
|
for (i = 1; i < num_cpu; i ++) while (atomic_load_long(&flag[i * CACHE_LINE_SIZE])) {};
|
||||||
|
|
||||||
TRSM_ILTCOPY(bk, bk, a + (is + is * lda) * COMPSIZE, lda, 0, sb);
|
TRSM_ILTCOPY(bk, bk, a + (is + is * lda) * COMPSIZE, lda, 0, sb);
|
||||||
|
|
||||||
|
|
|
@ -6,6 +6,7 @@ if (MSVC AND "${CMAKE_C_COMPILER_ID}" MATCHES Clang)
|
||||||
else ()
|
else ()
|
||||||
set(OpenBLAS_utest_src
|
set(OpenBLAS_utest_src
|
||||||
utest_main.c
|
utest_main.c
|
||||||
|
test_min.c
|
||||||
test_amax.c
|
test_amax.c
|
||||||
test_ismin.c
|
test_ismin.c
|
||||||
test_rotmg.c
|
test_rotmg.c
|
||||||
|
|
|
@ -11,7 +11,7 @@ UTESTBIN=openblas_utest
|
||||||
|
|
||||||
include $(TOPDIR)/Makefile.system
|
include $(TOPDIR)/Makefile.system
|
||||||
|
|
||||||
OBJS=utest_main.o test_amax.o test_ismin.o test_rotmg.o test_axpy.o test_dotu.o test_dsdot.o test_swap.o test_rot.o
|
OBJS=utest_main.o test_min.o test_amax.o test_ismin.o test_rotmg.o test_axpy.o test_dotu.o test_dsdot.o test_swap.o test_rot.o
|
||||||
#test_rot.o test_swap.o test_axpy.o test_dotu.o test_dsdot.o test_fork.o
|
#test_rot.o test_swap.o test_axpy.o test_dotu.o test_dsdot.o test_fork.o
|
||||||
|
|
||||||
ifneq ($(NO_LAPACK), 1)
|
ifneq ($(NO_LAPACK), 1)
|
||||||
|
|
|
@ -43,3 +43,14 @@ CTEST(amax, samax){
|
||||||
|
|
||||||
ASSERT_DBL_NEAR_TOL((double)(tr_max), (double)(te_max), SINGLE_EPS);
|
ASSERT_DBL_NEAR_TOL((double)(tr_max), (double)(te_max), SINGLE_EPS);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
CTEST(amax, damax){
|
||||||
|
blasint N=3, inc=1;
|
||||||
|
double te_max=0.0, tr_max=0.0;
|
||||||
|
double x[]={-1.1, 2.2, -3.3};
|
||||||
|
|
||||||
|
te_max=BLASFUNC(damax)(&N, x, &inc);
|
||||||
|
tr_max=3.3;
|
||||||
|
|
||||||
|
ASSERT_DBL_NEAR_TOL((double)(tr_max), (double)(te_max), DOUBLE_EPS);
|
||||||
|
}
|
||||||
|
|
|
@ -0,0 +1,100 @@
|
||||||
|
/*****************************************************************************
|
||||||
|
Copyright (c) 2011-2016, The OpenBLAS Project
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
|
||||||
|
1. Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in
|
||||||
|
the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
3. Neither the name of the OpenBLAS project nor the names of
|
||||||
|
its contributors may be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written
|
||||||
|
permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||||
|
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||||
|
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
**********************************************************************************/
|
||||||
|
|
||||||
|
#include "openblas_utest.h"
|
||||||
|
|
||||||
|
CTEST(min, smin_negative){
|
||||||
|
blasint N=3, inc=1;
|
||||||
|
float te_min=0.0, tr_min=0.0;
|
||||||
|
float x[]={-1.1, -2.2, -3.3};
|
||||||
|
|
||||||
|
te_min=BLASFUNC(smin)(&N, x, &inc);
|
||||||
|
tr_min=-3.3;
|
||||||
|
|
||||||
|
ASSERT_DBL_NEAR_TOL((double)(tr_min), (double)(te_min), SINGLE_EPS);
|
||||||
|
}
|
||||||
|
|
||||||
|
CTEST(min, dmin_positive){
|
||||||
|
blasint N=3, inc=1;
|
||||||
|
double te_min=0.0, tr_min=0.0;
|
||||||
|
double x[]={1.1, 0.0, 3.3};
|
||||||
|
|
||||||
|
te_min=BLASFUNC(dmin)(&N, x, &inc);
|
||||||
|
tr_min=0.0;
|
||||||
|
|
||||||
|
ASSERT_DBL_NEAR_TOL((double)(tr_min), (double)(te_min), DOUBLE_EPS);
|
||||||
|
}
|
||||||
|
|
||||||
|
CTEST(min, smin_zero){
|
||||||
|
blasint N=3, inc=1;
|
||||||
|
float te_min=0.0, tr_min=0.0;
|
||||||
|
float x[]={1.1, 2.2, 0.0};
|
||||||
|
|
||||||
|
te_min=BLASFUNC(smin)(&N, x, &inc);
|
||||||
|
tr_min=0.0;
|
||||||
|
|
||||||
|
ASSERT_DBL_NEAR_TOL((double)(tr_min), (double)(te_min), SINGLE_EPS);
|
||||||
|
}
|
||||||
|
|
||||||
|
CTEST(max, smax_negative){
|
||||||
|
blasint N=3, inc=1;
|
||||||
|
float te_max=0.0, tr_max=0.0;
|
||||||
|
float x[]={-1.1, -2.2, -3.3};
|
||||||
|
|
||||||
|
te_max=BLASFUNC(smax)(&N, x, &inc);
|
||||||
|
tr_max=-1.1;
|
||||||
|
|
||||||
|
ASSERT_DBL_NEAR_TOL((double)(tr_max), (double)(te_max), SINGLE_EPS);
|
||||||
|
}
|
||||||
|
|
||||||
|
CTEST(max, dmax_positive){
|
||||||
|
blasint N=3, inc=1;
|
||||||
|
double te_max=0.0, tr_max=0.0;
|
||||||
|
double x[]={1.1, 0.0, 3.3};
|
||||||
|
|
||||||
|
te_max=BLASFUNC(dmax)(&N, x, &inc);
|
||||||
|
tr_max=3.3;
|
||||||
|
|
||||||
|
ASSERT_DBL_NEAR_TOL((double)(tr_max), (double)(te_max), DOUBLE_EPS);
|
||||||
|
}
|
||||||
|
|
||||||
|
CTEST(max, smax_zero){
|
||||||
|
blasint N=3, inc=1;
|
||||||
|
float te_max=0.0, tr_max=0.0;
|
||||||
|
float x[]={-1.1, -2.2, 0.0};
|
||||||
|
|
||||||
|
te_max=BLASFUNC(smax)(&N, x, &inc);
|
||||||
|
tr_max=0.0;
|
||||||
|
|
||||||
|
ASSERT_DBL_NEAR_TOL((double)(tr_max), (double)(te_max), SINGLE_EPS);
|
||||||
|
}
|
|
@ -50,6 +50,17 @@ CTEST(amax, samax){
|
||||||
ASSERT_DBL_NEAR_TOL((double)(tr_max), (double)(te_max), SINGLE_EPS);
|
ASSERT_DBL_NEAR_TOL((double)(tr_max), (double)(te_max), SINGLE_EPS);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
CTEST(amax, damax){
|
||||||
|
blasint N=3, inc=1;
|
||||||
|
double te_max=0.0, tr_max=0.0;
|
||||||
|
double x[]={-1.1, 2.2, -3.3};
|
||||||
|
|
||||||
|
te_max=BLASFUNC(damax)(&N, x, &inc);
|
||||||
|
tr_max=3.3;
|
||||||
|
|
||||||
|
ASSERT_DBL_NEAR_TOL((double)(tr_max), (double)(te_max), DOUBLE_EPS);
|
||||||
|
}
|
||||||
|
|
||||||
CTEST (drotmg,rotmg)
|
CTEST (drotmg,rotmg)
|
||||||
{
|
{
|
||||||
double te_d1, tr_d1;
|
double te_d1, tr_d1;
|
||||||
|
@ -508,9 +519,82 @@ CTEST(swap,cswap_inc_0)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
CTEST(min, smin_negative){
|
||||||
|
blasint N=3, inc=1;
|
||||||
|
float te_min=0.0, tr_min=0.0;
|
||||||
|
float x[]={-1.1, -2.2, -3.3};
|
||||||
|
|
||||||
|
te_min=BLASFUNC(smin)(&N, x, &inc);
|
||||||
|
tr_min=-3.3;
|
||||||
|
|
||||||
|
ASSERT_DBL_NEAR_TOL((double)(tr_min), (double)(te_min), SINGLE_EPS);
|
||||||
|
}
|
||||||
|
|
||||||
|
CTEST(min, dmin_positive){
|
||||||
|
blasint N=3, inc=1;
|
||||||
|
double te_min=0.0, tr_min=0.0;
|
||||||
|
double x[]={1.1, 0.0, 3.3};
|
||||||
|
|
||||||
|
te_min=BLASFUNC(dmin)(&N, x, &inc);
|
||||||
|
tr_min=0.0;
|
||||||
|
|
||||||
|
ASSERT_DBL_NEAR_TOL((double)(tr_min), (double)(te_min), DOUBLE_EPS);
|
||||||
|
}
|
||||||
|
|
||||||
|
CTEST(min, smin_zero){
|
||||||
|
blasint N=3, inc=1;
|
||||||
|
float te_min=0.0, tr_min=0.0;
|
||||||
|
float x[]={1.1, 2.2, 0.0};
|
||||||
|
|
||||||
|
te_min=BLASFUNC(smin)(&N, x, &inc);
|
||||||
|
tr_min=0.0;
|
||||||
|
|
||||||
|
ASSERT_DBL_NEAR_TOL((double)(tr_min), (double)(te_min), SINGLE_EPS);
|
||||||
|
}
|
||||||
|
|
||||||
|
CTEST(max, smax_negative){
|
||||||
|
blasint N=3, inc=1;
|
||||||
|
float te_max=0.0, tr_max=0.0;
|
||||||
|
float x[]={-1.1, -2.2, -3.3};
|
||||||
|
|
||||||
|
te_max=BLASFUNC(smax)(&N, x, &inc);
|
||||||
|
tr_max=-1.1;
|
||||||
|
|
||||||
|
ASSERT_DBL_NEAR_TOL((double)(tr_max), (double)(te_max), SINGLE_EPS);
|
||||||
|
}
|
||||||
|
|
||||||
|
CTEST(max, dmax_positive){
|
||||||
|
blasint N=3, inc=1;
|
||||||
|
double te_max=0.0, tr_max=0.0;
|
||||||
|
double x[]={1.1, 0.0, 3.3};
|
||||||
|
|
||||||
|
te_max=BLASFUNC(dmax)(&N, x, &inc);
|
||||||
|
tr_max=3.3;
|
||||||
|
|
||||||
|
ASSERT_DBL_NEAR_TOL((double)(tr_max), (double)(te_max), DOUBLE_EPS);
|
||||||
|
}
|
||||||
|
|
||||||
|
CTEST(max, smax_zero){
|
||||||
|
blasint N=3, inc=1;
|
||||||
|
float te_max=0.0, tr_max=0.0;
|
||||||
|
float x[]={-1.1, -2.2, 0.0};
|
||||||
|
|
||||||
|
te_max=BLASFUNC(smax)(&N, x, &inc);
|
||||||
|
tr_max=0.0;
|
||||||
|
|
||||||
|
ASSERT_DBL_NEAR_TOL((double)(tr_max), (double)(te_max), SINGLE_EPS);
|
||||||
|
}
|
||||||
|
|
||||||
int main(int argc, const char ** argv){
|
int main(int argc, const char ** argv){
|
||||||
|
|
||||||
CTEST_ADD(amax, samax);
|
CTEST_ADD (amax, samax);
|
||||||
|
CTEST_ADD (amax, damax);
|
||||||
|
CTEST_ADD (min, smin_negative);
|
||||||
|
CTEST_ADD (min, dmin_positive);
|
||||||
|
CTEST_ADD (min, smin_zero);
|
||||||
|
CTEST_ADD (max, smax_negative);
|
||||||
|
CTEST_ADD (max, dmax_positive);
|
||||||
|
CTEST_ADD (max, smax_zero);
|
||||||
CTEST_ADD (drotmg,rotmg);
|
CTEST_ADD (drotmg,rotmg);
|
||||||
CTEST_ADD (drotmg,rotmg_issue1452);
|
CTEST_ADD (drotmg,rotmg_issue1452);
|
||||||
CTEST_ADD (drotmg,rotmg_D1eqD2_X1eqX2);
|
CTEST_ADD (drotmg,rotmg_D1eqD2_X1eqX2);
|
||||||
|
|
Loading…
Reference in New Issue