From 19f3a4091c41ec50b1f956e916680241cf202c91 Mon Sep 17 00:00:00 2001 From: Ali Saidi Date: Sat, 22 Feb 2020 05:07:55 +0000 Subject: [PATCH] Make rpcc() on arm64 get closer to what x86 returns The Arm implementation of rpcc() uses the architected timer which is defined by the SBSA to be between 10-400MHz. These numbers are much smaller than the cycle counter frequency used by x86. Make the numbers closer by shifting the cycle counter up by the number of leading zeros in the cntfrq_el0 register which gets us closer to a noraml cpu clock cycle range. --- common_arm64.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/common_arm64.h b/common_arm64.h index 5951e1ee5..66a1d1dc4 100644 --- a/common_arm64.h +++ b/common_arm64.h @@ -81,10 +81,12 @@ static void __inline blas_lock(volatile BLASULONG *address){ #if !defined(OS_DARWIN) && !defined (OS_ANDROID) static __inline BLASULONG rpcc(void){ BLASULONG ret = 0; + blasint shift; __asm__ __volatile__ ("isb; mrs %0,cntvct_el0":"=r"(ret)); + __asm__ __volatile__ ("mrs %0,cntfrq_el0; clz %w0, %w0":"=&r"(shift)); - return ret; + return ret << shift; } #define RPCC_DEFINED