ARM64: Use THUNDERX2T99 Neon Kernels for ARMV8

Currently the generic ARMV8 target uses C implementations
for many routines. Replace these with the neon implementations
written for THUNDERX2T99 target which are upto 6x faster for
certain routines.
This commit is contained in:
Ashwin Sekhar T K
2018-10-17 08:11:27 -07:00
parent caf339412f
commit 21f46a1cf2
4 changed files with 224 additions and 105 deletions

47
param.h
View File

@@ -2583,6 +2583,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif
#if defined(ARMV8)
#if defined(OS_DARWIN) && defined(CROSS)
#define SNUMOPT 2
#define DNUMOPT 2
@@ -2590,13 +2592,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define GEMM_DEFAULT_OFFSET_B 0
#define GEMM_DEFAULT_ALIGN 0x03fffUL
#if defined(OS_DARWIN) && defined(CROSS)
#define SGEMM_DEFAULT_UNROLL_M 2
#define SGEMM_DEFAULT_UNROLL N 2
#else
#define SGEMM_DEFAULT_UNROLL_M 4
#define SGEMM_DEFAULT_UNROLL_N 4
#endif
#define SGEMM_DEFAULT_UNROLL_N 2
#define DGEMM_DEFAULT_UNROLL_M 2
#define DGEMM_DEFAULT_UNROLL_N 2
@@ -2622,10 +2619,48 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define CGEMM_DEFAULT_R 4096
#define ZGEMM_DEFAULT_R 4096
#define SYMV_P 16
#else
#define SNUMOPT 2
#define DNUMOPT 2
#define GEMM_DEFAULT_OFFSET_A 0
#define GEMM_DEFAULT_OFFSET_B 0
#define GEMM_DEFAULT_ALIGN 0x03fffUL
#define SGEMM_DEFAULT_UNROLL_M 16
#define SGEMM_DEFAULT_UNROLL_N 4
#define DGEMM_DEFAULT_UNROLL_M 8
#define DGEMM_DEFAULT_UNROLL_N 4
#define CGEMM_DEFAULT_UNROLL_M 8
#define CGEMM_DEFAULT_UNROLL_N 4
#define ZGEMM_DEFAULT_UNROLL_M 4
#define ZGEMM_DEFAULT_UNROLL_N 4
#define SGEMM_DEFAULT_P sgemm_p
#define DGEMM_DEFAULT_P dgemm_p
#define CGEMM_DEFAULT_P cgemm_p
#define ZGEMM_DEFAULT_P zgemm_p
#define SGEMM_DEFAULT_Q sgemm_q
#define DGEMM_DEFAULT_Q dgemm_q
#define CGEMM_DEFAULT_Q cgemm_q
#define ZGEMM_DEFAULT_Q zgemm_q
#define SGEMM_DEFAULT_R sgemm_r
#define DGEMM_DEFAULT_R dgemm_r
#define CGEMM_DEFAULT_R cgemm_r
#define ZGEMM_DEFAULT_R zgemm_r
#define SYMV_P 16
#endif
#endif
#if defined(THUNDERX)
#define SNUMOPT 2
#define DNUMOPT 2