ARM64: Use THUNDERX2T99 Neon Kernels for ARMV8

Currently the generic ARMV8 target uses C implementations
for many routines. Replace these with the neon implementations
written for THUNDERX2T99 target which are upto 6x faster for
certain routines.
This commit is contained in:
Ashwin Sekhar T K
2018-10-17 08:11:27 -07:00
parent caf339412f
commit 21f46a1cf2
4 changed files with 224 additions and 105 deletions

View File

@@ -730,7 +730,7 @@ void blas_set_parameter(void){
#if defined(ARCH_ARM64)
#if defined(VULCAN) || defined(THUNDERX2T99)
#if defined(VULCAN) || defined(THUNDERX2T99) || defined(ARMV8)
unsigned long dgemm_prefetch_size_a;
unsigned long dgemm_prefetch_size_b;
unsigned long dgemm_prefetch_size_c;
@@ -738,7 +738,7 @@ unsigned long dgemm_prefetch_size_c;
void blas_set_parameter(void)
{
#if defined(VULCAN) || defined(THUNDERX2T99)
#if defined(VULCAN) || defined(THUNDERX2T99) || defined(ARMV8)
dgemm_p = 160;
dgemm_q = 128;
dgemm_r = 4096;