Merge branch 'develop' into risc-v

This commit is contained in:
Zhang Xianyi
2020-10-16 23:27:38 +08:00
581 changed files with 47657 additions and 6952 deletions

306
param.h
View File

@@ -72,6 +72,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#ifndef PARAM_H
#define PARAM_H
#define SBGEMM_DEFAULT_UNROLL_N 4
#define SBGEMM_DEFAULT_UNROLL_M 8
#define SBGEMM_DEFAULT_UNROLL_MN 32
#define SBGEMM_DEFAULT_P 256
#define SBGEMM_DEFAULT_R 256
#define SBGEMM_DEFAULT_Q 256
#ifdef OPTERON
#define SNUMOPT 4
@@ -1700,6 +1706,127 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#else
#define SGEMM_DEFAULT_P 448
#define DGEMM_DEFAULT_P 192
#define CGEMM_DEFAULT_P 384
#define ZGEMM_DEFAULT_P 256
#define SGEMM_DEFAULT_Q 448
#define DGEMM_DEFAULT_Q 384
#define CGEMM_DEFAULT_Q 192
#define ZGEMM_DEFAULT_Q 128
#define SGEMM_DEFAULT_R sgemm_r
#define DGEMM_DEFAULT_R 8640
#define CGEMM_DEFAULT_R cgemm_r
#define ZGEMM_DEFAULT_R zgemm_r
#define QGEMM_DEFAULT_Q 128
#define QGEMM_DEFAULT_P 504
#define QGEMM_DEFAULT_R qgemm_r
#define XGEMM_DEFAULT_P 252
#define XGEMM_DEFAULT_R xgemm_r
#define XGEMM_DEFAULT_Q 128
#define CGEMM3M_DEFAULT_UNROLL_N 4
#define CGEMM3M_DEFAULT_UNROLL_M 8
#define ZGEMM3M_DEFAULT_UNROLL_N 4
#define ZGEMM3M_DEFAULT_UNROLL_M 4
#define CGEMM3M_DEFAULT_P 320
#define ZGEMM3M_DEFAULT_P 256
#define XGEMM3M_DEFAULT_P 112
#define CGEMM3M_DEFAULT_Q 320
#define ZGEMM3M_DEFAULT_Q 256
#define XGEMM3M_DEFAULT_Q 224
#define CGEMM3M_DEFAULT_R 12288
#define ZGEMM3M_DEFAULT_R 12288
#define XGEMM3M_DEFAULT_R 12288
#endif
#endif
#ifdef COOPERLAKE
#define SNUMOPT 16
#define DNUMOPT 8
#define GEMM_DEFAULT_OFFSET_A 0
#define GEMM_DEFAULT_OFFSET_B 0
#define GEMM_DEFAULT_ALIGN 0x03fffUL
#define SYMV_P 8
#if defined(XDOUBLE) || defined(DOUBLE)
#define SWITCH_RATIO 8
#define GEMM_PREFERED_SIZE 8
#else
#define SWITCH_RATIO 16
#define GEMM_PREFERED_SIZE 16
#endif
#define USE_SGEMM_KERNEL_DIRECT 1
#ifdef ARCH_X86
#define SGEMM_DEFAULT_UNROLL_M 4
#define DGEMM_DEFAULT_UNROLL_M 2
#define QGEMM_DEFAULT_UNROLL_M 2
#define CGEMM_DEFAULT_UNROLL_M 2
#define ZGEMM_DEFAULT_UNROLL_M 1
#define XGEMM_DEFAULT_UNROLL_M 1
#define SGEMM_DEFAULT_UNROLL_N 4
#define DGEMM_DEFAULT_UNROLL_N 4
#define QGEMM_DEFAULT_UNROLL_N 2
#define CGEMM_DEFAULT_UNROLL_N 2
#define ZGEMM_DEFAULT_UNROLL_N 2
#define XGEMM_DEFAULT_UNROLL_N 1
#else
#define SGEMM_DEFAULT_UNROLL_M 16
#define DGEMM_DEFAULT_UNROLL_M 16
#define QGEMM_DEFAULT_UNROLL_M 2
#define CGEMM_DEFAULT_UNROLL_M 8
#define ZGEMM_DEFAULT_UNROLL_M 4
#define XGEMM_DEFAULT_UNROLL_M 1
#define SGEMM_DEFAULT_UNROLL_N 4
#define DGEMM_DEFAULT_UNROLL_N 2
#define QGEMM_DEFAULT_UNROLL_N 2
#define CGEMM_DEFAULT_UNROLL_N 2
#define ZGEMM_DEFAULT_UNROLL_N 2
#define XGEMM_DEFAULT_UNROLL_N 1
#define SGEMM_DEFAULT_UNROLL_MN 32
#define DGEMM_DEFAULT_UNROLL_MN 32
#endif
#ifdef ARCH_X86
#define SGEMM_DEFAULT_P 512
#define SGEMM_DEFAULT_R sgemm_r
#define DGEMM_DEFAULT_P 512
#define DGEMM_DEFAULT_R dgemm_r
#define QGEMM_DEFAULT_P 504
#define QGEMM_DEFAULT_R qgemm_r
#define CGEMM_DEFAULT_P 128
#define CGEMM_DEFAULT_R 1024
#define ZGEMM_DEFAULT_P 512
#define ZGEMM_DEFAULT_R zgemm_r
#define XGEMM_DEFAULT_P 252
#define XGEMM_DEFAULT_R xgemm_r
#define SGEMM_DEFAULT_Q 256
#define DGEMM_DEFAULT_Q 256
#define QGEMM_DEFAULT_Q 128
#define CGEMM_DEFAULT_Q 256
#define ZGEMM_DEFAULT_Q 192
#define XGEMM_DEFAULT_Q 128
#else
#define SGEMM_DEFAULT_P 640
#define DGEMM_DEFAULT_P 192
#define CGEMM_DEFAULT_P 384
@@ -1722,28 +1849,25 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define XGEMM_DEFAULT_R xgemm_r
#define XGEMM_DEFAULT_Q 128
#define CGEMM3M_DEFAULT_UNROLL_N 8
#define CGEMM3M_DEFAULT_UNROLL_M 4
#define ZGEMM3M_DEFAULT_UNROLL_N 8
#define ZGEMM3M_DEFAULT_UNROLL_M 2
#define CGEMM3M_DEFAULT_UNROLL_N 4
#define CGEMM3M_DEFAULT_UNROLL_M 8
#define ZGEMM3M_DEFAULT_UNROLL_N 4
#define ZGEMM3M_DEFAULT_UNROLL_M 4
#define CGEMM3M_DEFAULT_P 448
#define ZGEMM3M_DEFAULT_P 224
#define CGEMM3M_DEFAULT_P 320
#define ZGEMM3M_DEFAULT_P 256
#define XGEMM3M_DEFAULT_P 112
#define CGEMM3M_DEFAULT_Q 224
#define ZGEMM3M_DEFAULT_Q 224
#define CGEMM3M_DEFAULT_Q 320
#define ZGEMM3M_DEFAULT_Q 256
#define XGEMM3M_DEFAULT_Q 224
#define CGEMM3M_DEFAULT_R 12288
#define ZGEMM3M_DEFAULT_R 12288
#define XGEMM3M_DEFAULT_R 12288
#endif
#endif
#ifdef ATOM
#define SNUMOPT 2
@@ -1968,7 +2092,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define SGEMM_DEFAULT_UNROLL_N 4
#define DGEMM_DEFAULT_UNROLL_M 4
#define DGEMM_DEFAULT_UNROLL_N 4
#define CGEMM_DEFAULT_UNROLL_M 8
#define CGEMM_DEFAULT_UNROLL_M 2
#define CGEMM_DEFAULT_UNROLL_N 2
#define ZGEMM_DEFAULT_UNROLL_M 2
#define ZGEMM_DEFAULT_UNROLL_N 2
@@ -2219,7 +2343,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define GEMM_DEFAULT_OFFSET_A 0
#define GEMM_DEFAULT_OFFSET_B 65536
#define GEMM_DEFAULT_ALIGN 0x0ffffUL
#if defined(__32BIT__)
#warning using BINARY32==POWER6
#define SGEMM_DEFAULT_UNROLL_M 4
#define SGEMM_DEFAULT_UNROLL_N 4
#define DGEMM_DEFAULT_UNROLL_M 4
#define DGEMM_DEFAULT_UNROLL_N 4
#define CGEMM_DEFAULT_UNROLL_M 2
#define CGEMM_DEFAULT_UNROLL_N 4
#define ZGEMM_DEFAULT_UNROLL_M 2
#define ZGEMM_DEFAULT_UNROLL_N 4
#else
#define SGEMM_DEFAULT_UNROLL_M 16
#define SGEMM_DEFAULT_UNROLL_N 8
#define DGEMM_DEFAULT_UNROLL_M 16
@@ -2228,22 +2362,33 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define CGEMM_DEFAULT_UNROLL_N 4
#define ZGEMM_DEFAULT_UNROLL_M 8
#define ZGEMM_DEFAULT_UNROLL_N 2
#endif
#define SGEMM_DEFAULT_P 1280UL
#define DGEMM_DEFAULT_P 640UL
#define CGEMM_DEFAULT_P 640UL
#define ZGEMM_DEFAULT_P 320UL
#define SGEMM_DEFAULT_P 1280
#define DGEMM_DEFAULT_P 640
#define CGEMM_DEFAULT_P 640
#define ZGEMM_DEFAULT_P 320
#define SGEMM_DEFAULT_Q 640UL
#define DGEMM_DEFAULT_Q 720UL
#define CGEMM_DEFAULT_Q 640UL
#define ZGEMM_DEFAULT_Q 640UL
#define SGEMM_DEFAULT_Q 640
#define DGEMM_DEFAULT_Q 720
#define CGEMM_DEFAULT_Q 640
#define ZGEMM_DEFAULT_Q 640
#if 0
#define SGEMM_DEFAULT_R SGEMM_DEFAULT_P
#define DGEMM_DEFAULT_R DGEMM_DEFAULT_P
#define CGEMM_DEFAULT_R CGEMM_DEFAULT_P
#define ZGEMM_DEFAULT_R ZGEMM_DEFAULT_P
#endif
#define SGEMM_DEFAULT_R 4096
#define DGEMM_DEFAULT_R 4096
#define CGEMM_DEFAULT_R 4096
#define ZGEMM_DEFAULT_R 4096
#define SYMV_P 8
#endif
#if defined(POWER9)
#if defined(POWER9) || defined(POWER10)
#define SNUMOPT 16
#define DNUMOPT 8
@@ -2271,10 +2416,28 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define CGEMM_DEFAULT_Q 1026
#define ZGEMM_DEFAULT_Q 1026
#define SGEMM_DEFAULT_R 4096
#define DGEMM_DEFAULT_R 4096
#define CGEMM_DEFAULT_R 4096
#define ZGEMM_DEFAULT_R 4096
#define SYMV_P 8
#endif
#if defined(POWER10)
#undef SBGEMM_DEFAULT_UNROLL_N
#undef SBGEMM_DEFAULT_UNROLL_M
#undef SBGEMM_DEFAULT_P
#undef SBGEMM_DEFAULT_R
#undef SBGEMM_DEFAULT_Q
#define SBGEMM_DEFAULT_UNROLL_M 16
#define SBGEMM_DEFAULT_UNROLL_N 8
#define SBGEMM_DEFAULT_P 832
#define SBGEMM_DEFAULT_Q 1026
#define SBGEMM_DEFAULT_R 4096
#endif
#if defined(SPARC) && defined(V7)
#define SNUMOPT 4
@@ -2457,7 +2620,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define SYMV_P 16
#endif
#if defined(P5600) || defined(MIPS1004K) || defined(I6400) || defined(P6600) || defined(I6500)
#if defined(P5600) || defined(MIPS1004K) || defined(MIPS24K) || defined(I6400) || defined(P6600) || defined(I6500)
#define SNUMOPT 2
#define DNUMOPT 2
@@ -2640,7 +2803,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define SYMV_P 16
#if defined(CORTEXA53) || defined(CORTEXA57) || \
#if defined(CORTEXA57) || \
defined(CORTEXA72) || defined(CORTEXA73) || \
defined(FALKOR) || defined(TSV110) || defined(EMAG8180)
@@ -2659,7 +2822,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
/*FIXME: this should be using the cache size, but there is currently no easy way to
query that on ARM. So if getarch counted more than 8 cores we simply assume the host
is a big desktop or server with abundant cache rather than a phone or embedded device */
#if NUM_CORES > 8
#if NUM_CORES > 8 || defined(TSV110) || defined(EMAG8180)
#define SGEMM_DEFAULT_P 512
#define DGEMM_DEFAULT_P 256
#define CGEMM_DEFAULT_P 256
@@ -2686,6 +2849,35 @@ is a big desktop or server with abundant cache rather than a phone or embedded d
#define CGEMM_DEFAULT_R 4096
#define ZGEMM_DEFAULT_R 2048
#elif defined(CORTEXA53)
#define SGEMM_DEFAULT_UNROLL_M 8
#define SGEMM_DEFAULT_UNROLL_N 8
#define DGEMM_DEFAULT_UNROLL_M 8
#define DGEMM_DEFAULT_UNROLL_N 4
#define CGEMM_DEFAULT_UNROLL_M 8
#define CGEMM_DEFAULT_UNROLL_N 4
#define ZGEMM_DEFAULT_UNROLL_M 4
#define ZGEMM_DEFAULT_UNROLL_N 4
#define SGEMM_DEFAULT_P 256
#define DGEMM_DEFAULT_P 160
#define CGEMM_DEFAULT_P 128
#define ZGEMM_DEFAULT_P 128
#define SGEMM_DEFAULT_Q 256
#define DGEMM_DEFAULT_Q 128
#define CGEMM_DEFAULT_Q 224
#define ZGEMM_DEFAULT_Q 112
#define SGEMM_DEFAULT_R 4096
#define DGEMM_DEFAULT_R 4096
#define CGEMM_DEFAULT_R 4096
#define ZGEMM_DEFAULT_R 2048
#elif defined(THUNDERX)
#define SGEMM_DEFAULT_UNROLL_M 4
@@ -2744,6 +2936,64 @@ is a big desktop or server with abundant cache rather than a phone or embedded d
#define CGEMM_DEFAULT_R 4096
#define ZGEMM_DEFAULT_R 4096
#elif defined(THUNDERX3T110)
#define SGEMM_DEFAULT_UNROLL_M 16
#define SGEMM_DEFAULT_UNROLL_N 4
#define DGEMM_DEFAULT_UNROLL_M 8
#define DGEMM_DEFAULT_UNROLL_N 4
#define CGEMM_DEFAULT_UNROLL_M 8
#define CGEMM_DEFAULT_UNROLL_N 4
#define ZGEMM_DEFAULT_UNROLL_M 4
#define ZGEMM_DEFAULT_UNROLL_N 4
#define SGEMM_DEFAULT_P 128
#define DGEMM_DEFAULT_P 320
#define CGEMM_DEFAULT_P 128
#define ZGEMM_DEFAULT_P 128
#define SGEMM_DEFAULT_Q 352
#define DGEMM_DEFAULT_Q 128
#define CGEMM_DEFAULT_Q 224
#define ZGEMM_DEFAULT_Q 112
#define SGEMM_DEFAULT_R 4096
#define DGEMM_DEFAULT_R 4096
#define CGEMM_DEFAULT_R 4096
#define ZGEMM_DEFAULT_R 4096
#elif defined(NEOVERSEN1)
#define SGEMM_DEFAULT_UNROLL_M 16
#define SGEMM_DEFAULT_UNROLL_N 4
#define DGEMM_DEFAULT_UNROLL_M 8
#define DGEMM_DEFAULT_UNROLL_N 4
#define CGEMM_DEFAULT_UNROLL_M 8
#define CGEMM_DEFAULT_UNROLL_N 4
#define ZGEMM_DEFAULT_UNROLL_M 4
#define ZGEMM_DEFAULT_UNROLL_N 4
#define SGEMM_DEFAULT_P 128
#define DGEMM_DEFAULT_P 160
#define CGEMM_DEFAULT_P 128
#define ZGEMM_DEFAULT_P 128
#define SGEMM_DEFAULT_Q 352
#define DGEMM_DEFAULT_Q 128
#define CGEMM_DEFAULT_Q 224
#define ZGEMM_DEFAULT_Q 112
#define SGEMM_DEFAULT_R 4096
#define DGEMM_DEFAULT_R 4096
#define CGEMM_DEFAULT_R 4096
#define ZGEMM_DEFAULT_R 4096
#else // Other/undetected ARMv8 cores
#define SGEMM_DEFAULT_UNROLL_M 16
@@ -2987,7 +3237,7 @@ is a big desktop or server with abundant cache rather than a phone or embedded d
#define GEMM_DEFAULT_OFFSET_B 0
#define GEMM_DEFAULT_ALIGN 0x03fffUL
#define SGEMM_DEFAULT_UNROLL_M 8
#define SGEMM_DEFAULT_UNROLL_M 16
#define SGEMM_DEFAULT_UNROLL_N 4
#define DGEMM_DEFAULT_UNROLL_M 8
@@ -2999,12 +3249,12 @@ is a big desktop or server with abundant cache rather than a phone or embedded d
#define ZGEMM_DEFAULT_UNROLL_M 4
#define ZGEMM_DEFAULT_UNROLL_N 4
#define SGEMM_DEFAULT_P 456
#define SGEMM_DEFAULT_P 480
#define DGEMM_DEFAULT_P 320
#define CGEMM_DEFAULT_P 480
#define ZGEMM_DEFAULT_P 224
#define SGEMM_DEFAULT_Q 488
#define SGEMM_DEFAULT_Q 512
#define DGEMM_DEFAULT_Q 384
#define CGEMM_DEFAULT_Q 128
#define ZGEMM_DEFAULT_Q 352