Make rpcc() on arm64 get closer to what x86 returns
The Arm implementation of rpcc() uses the architected timer which is defined by the SBSA to be between 10-400MHz. These numbers are much smaller than the cycle counter frequency used by x86. Make the numbers closer by shifting the cycle counter up by the number of leading zeros in the cntfrq_el0 register which gets us closer to a noraml cpu clock cycle range.
This commit is contained in:
parent
430ee31e66
commit
19f3a4091c
|
@ -81,10 +81,12 @@ static void __inline blas_lock(volatile BLASULONG *address){
|
|||
#if !defined(OS_DARWIN) && !defined (OS_ANDROID)
|
||||
static __inline BLASULONG rpcc(void){
|
||||
BLASULONG ret = 0;
|
||||
blasint shift;
|
||||
|
||||
__asm__ __volatile__ ("isb; mrs %0,cntvct_el0":"=r"(ret));
|
||||
__asm__ __volatile__ ("mrs %0,cntfrq_el0; clz %w0, %w0":"=&r"(shift));
|
||||
|
||||
return ret;
|
||||
return ret << shift;
|
||||
}
|
||||
|
||||
#define RPCC_DEFINED
|
||||
|
|
Loading…
Reference in New Issue