From d1baf14a64d93062763f9899fa9c2d7e4bad62a3 Mon Sep 17 00:00:00 2001 From: Xianyi Zhang Date: Fri, 11 Nov 2011 17:49:41 +0000 Subject: [PATCH] Enable thread affinity on Loongson 3B. Fixed the bug of reading cycle counter. In Loongson 3A and 3B, the CPU core increases the counter in every 2 cycles by default. --- Makefile.system | 2 ++ common_mips64.h | 28 +++++++++++++++++++++------- 2 files changed, 23 insertions(+), 7 deletions(-) diff --git a/Makefile.system b/Makefile.system index 84f41a78f..985f95084 100644 --- a/Makefile.system +++ b/Makefile.system @@ -591,9 +591,11 @@ endif ifneq ($(ARCH), x86_64) ifneq ($(ARCH), x86) +ifneq ($(CORE), LOONGSON3B) NO_AFFINITY = 1 endif endif +endif ifdef NO_AFFINITY CCOMMON_OPT += -DNO_AFFINITY diff --git a/common_mips64.h b/common_mips64.h index 15f947eb8..5db96c4aa 100644 --- a/common_mips64.h +++ b/common_mips64.h @@ -101,13 +101,15 @@ static void INLINE blas_lock(volatile unsigned long *address){ static inline unsigned int rpcc(void){ unsigned long ret; -#if defined(LOONGSON3A) - unsigned long long tmp; - __asm__ __volatile__("dmfc0 %0, $25, 1": "=r"(tmp):: "memory"); - ret=tmp; -#elif defined(LOONGSON3B) - //Temp Implementation. - return 1; +#if defined(LOONGSON3A) || defined(LOONGSON3B) + // unsigned long long tmp; + //__asm__ __volatile__("dmfc0 %0, $25, 1": "=r"(tmp):: "memory"); + //ret=tmp; + __asm__ __volatile__(".set push \n" + ".set mips32r2\n" + "rdhwr %0, $2\n" + ".set pop": "=r"(ret):: "memory"); + #else __asm__ __volatile__(".set push \n" ".set mips32r2\n" @@ -117,6 +119,18 @@ static inline unsigned int rpcc(void){ return ret; } +//#if defined(LOONGSON3A) || defined(LOONGSON3B) +static inline int WhereAmI(void){ + int ret=0; + __asm__ __volatile__(".set push \n" + ".set mips32r2\n" + "rdhwr %0, $0\n" + ".set pop": "=r"(ret):: "memory"); + return ret; + +} +//#endif + static inline int blas_quickdivide(blasint x, blasint y){ return x / y; }