commit
d1349e7a11
|
@ -26,8 +26,8 @@ endif
|
||||||
|
|
||||||
|
|
||||||
ifeq ($(CORE), ARMV5)
|
ifeq ($(CORE), ARMV5)
|
||||||
CCOMMON_OPT += -marm -mfpu=vfp -mfloat-abi=hard -march=armv6
|
CCOMMON_OPT += -marm -march=armv5
|
||||||
FCOMMON_OPT += -marm -mfpu=vfp -mfloat-abi=hard -march=armv6
|
FCOMMON_OPT += -marm -march=armv5
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
|
||||||
|
|
42
common.h
42
common.h
|
@ -410,7 +410,49 @@ typedef char env_var_t[MAX_PATH];
|
||||||
typedef char* env_var_t;
|
typedef char* env_var_t;
|
||||||
#define readenv(p, n) ((p)=getenv(n))
|
#define readenv(p, n) ((p)=getenv(n))
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if !defined(RPCC_DEFINED) && !defined(OS_WINDOWS)
|
||||||
|
#ifdef _POSIX_MONOTONIC_CLOCK
|
||||||
|
#if defined(__GLIBC_PREREQ) && __GLIBC_PREREQ(2, 17) // don't require -lrt
|
||||||
|
#define USE_MONOTONIC
|
||||||
|
#elif defined(OS_ANDROID)
|
||||||
|
#define USE_MONOTONIC
|
||||||
#endif
|
#endif
|
||||||
|
#endif
|
||||||
|
/* use similar scale as x86 rdtsc for timeouts to work correctly */
|
||||||
|
static inline unsigned long long rpcc(void){
|
||||||
|
#ifdef USE_MONOTONIC
|
||||||
|
struct timespec ts;
|
||||||
|
clock_gettime(CLOCK_MONOTONIC, &ts);
|
||||||
|
return (unsigned long long)ts.tv_sec * 1000000000ull + ts.tv_nsec;
|
||||||
|
#else
|
||||||
|
struct timeval tv;
|
||||||
|
gettimeofday(&tv,NULL);
|
||||||
|
return (unsigned long long)tv.tv_sec * 1000000000ull + tv.tv_usec * 1000;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
#define RPCC_DEFINED
|
||||||
|
#define RPCC64BIT
|
||||||
|
#endif // !RPCC_DEFINED
|
||||||
|
|
||||||
|
#if !defined(BLAS_LOCK_DEFINED) && defined(__GNUC__)
|
||||||
|
static void __inline blas_lock(volatile BLASULONG *address){
|
||||||
|
|
||||||
|
do {
|
||||||
|
while (*address) {YIELDING;};
|
||||||
|
|
||||||
|
} while (!__sync_bool_compare_and_swap(address, 0, 1));
|
||||||
|
}
|
||||||
|
#define BLAS_LOCK_DEFINED
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef RPCC_DEFINED
|
||||||
|
#error "rpcc() implementation is missing for your platform"
|
||||||
|
#endif
|
||||||
|
#ifndef BLAS_LOCK_DEFINED
|
||||||
|
#error "blas_lock() implementation is missing for your platform"
|
||||||
|
#endif
|
||||||
|
#endif // !ASSEMBLER
|
||||||
|
|
||||||
#ifdef OS_LINUX
|
#ifdef OS_LINUX
|
||||||
#include "common_linux.h"
|
#include "common_linux.h"
|
||||||
|
|
|
@ -76,6 +76,7 @@ static void __inline blas_lock(unsigned long *address){
|
||||||
"30:", address);
|
"30:", address);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
#define BLAS_LOCK_DEFINED
|
||||||
|
|
||||||
static __inline unsigned int rpcc(void){
|
static __inline unsigned int rpcc(void){
|
||||||
|
|
||||||
|
@ -89,6 +90,7 @@ static __inline unsigned int rpcc(void){
|
||||||
|
|
||||||
return r0;
|
return r0;
|
||||||
}
|
}
|
||||||
|
#define RPCC_DEFINED
|
||||||
|
|
||||||
|
|
||||||
#define HALT ldq $0, 0($0)
|
#define HALT ldq $0, 0($0)
|
||||||
|
|
43
common_arm.h
43
common_arm.h
|
@ -51,6 +51,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#ifndef ASSEMBLER
|
#ifndef ASSEMBLER
|
||||||
|
|
||||||
|
#if defined(ARMV6) || defined(ARMV7) || defined(ARMV8)
|
||||||
|
|
||||||
static void __inline blas_lock(volatile BLASULONG *address){
|
static void __inline blas_lock(volatile BLASULONG *address){
|
||||||
|
|
||||||
int register ret;
|
int register ret;
|
||||||
|
@ -59,40 +61,29 @@ static void __inline blas_lock(volatile BLASULONG *address){
|
||||||
while (*address) {YIELDING;};
|
while (*address) {YIELDING;};
|
||||||
|
|
||||||
__asm__ __volatile__(
|
__asm__ __volatile__(
|
||||||
"1: \n\t"
|
"ldrex r2, [%1] \n\t"
|
||||||
"ldrex r2, [%1] \n\t"
|
"strex %0, %2, [%1] \n\t"
|
||||||
"mov r2, #0 \n\t"
|
"orr %0, r2 \n\t"
|
||||||
"strex r3, r2, [%1] \n\t"
|
: "=&r"(ret)
|
||||||
"cmp r3, #0 \n\t"
|
: "r"(address), "r"(1)
|
||||||
"bne 1b \n\t"
|
: "memory", "r2"
|
||||||
"mov %0 , r3 \n\t"
|
|
||||||
: "=r"(ret), "=r"(address)
|
|
||||||
: "1"(address)
|
|
||||||
: "memory", "r2" , "r3"
|
|
||||||
|
|
||||||
|
|
||||||
);
|
);
|
||||||
|
|
||||||
} while (ret);
|
} while (ret);
|
||||||
|
MB;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define BLAS_LOCK_DEFINED
|
||||||
static inline unsigned long long rpcc(void){
|
#endif
|
||||||
unsigned long long ret=0;
|
|
||||||
double v;
|
|
||||||
struct timeval tv;
|
|
||||||
gettimeofday(&tv,NULL);
|
|
||||||
v=(double) tv.tv_sec + (double) tv.tv_usec * 1e-6;
|
|
||||||
ret = (unsigned long long) ( v * 1000.0d );
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline int blas_quickdivide(blasint x, blasint y){
|
static inline int blas_quickdivide(blasint x, blasint y){
|
||||||
return x / y;
|
return x / y;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(DOUBLE)
|
#if !defined(HAVE_VFP)
|
||||||
|
/* no FPU, soft float */
|
||||||
|
#define GET_IMAGE(res)
|
||||||
|
#elif defined(DOUBLE)
|
||||||
#define GET_IMAGE(res) __asm__ __volatile__("vstr.f64 d1, %0" : "=m"(res) : : "memory")
|
#define GET_IMAGE(res) __asm__ __volatile__("vstr.f64 d1, %0" : "=m"(res) : : "memory")
|
||||||
#else
|
#else
|
||||||
#define GET_IMAGE(res) __asm__ __volatile__("vstr.f32 s1, %0" : "=m"(res) : : "memory")
|
#define GET_IMAGE(res) __asm__ __volatile__("vstr.f32 s1, %0" : "=m"(res) : : "memory")
|
||||||
|
@ -140,4 +131,8 @@ REALNAME:
|
||||||
#define MAP_ANONYMOUS MAP_ANON
|
#define MAP_ANONYMOUS MAP_ANON
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if !defined(ARMV5) && !defined(ARMV6) && !defined(ARMV7) && !defined(ARMV8)
|
||||||
|
#error "you must define ARMV5, ARMV6, ARMV7 or ARMV8"
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -69,18 +69,9 @@ static void __inline blas_lock(volatile BLASULONG *address){
|
||||||
} while (ret);
|
} while (ret);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
#define BLAS_LOCK_DEFINED
|
||||||
|
|
||||||
|
|
||||||
static inline unsigned long long rpcc(void){
|
|
||||||
unsigned long long ret=0;
|
|
||||||
double v;
|
|
||||||
struct timeval tv;
|
|
||||||
gettimeofday(&tv,NULL);
|
|
||||||
v=(double) tv.tv_sec + (double) tv.tv_usec * 1e-6;
|
|
||||||
ret = (unsigned long long) ( v * 1000.0d );
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline int blas_quickdivide(blasint x, blasint y){
|
static inline int blas_quickdivide(blasint x, blasint y){
|
||||||
return x / y;
|
return x / y;
|
||||||
}
|
}
|
||||||
|
|
|
@ -68,6 +68,7 @@ static __inline void blas_lock(volatile unsigned long *address){
|
||||||
: "ar.ccv", "memory");
|
: "ar.ccv", "memory");
|
||||||
} while (ret);
|
} while (ret);
|
||||||
}
|
}
|
||||||
|
#define BLAS_LOCK_DEFINED
|
||||||
|
|
||||||
static __inline unsigned long rpcc(void) {
|
static __inline unsigned long rpcc(void) {
|
||||||
unsigned long clocks;
|
unsigned long clocks;
|
||||||
|
@ -75,6 +76,7 @@ static __inline unsigned long rpcc(void) {
|
||||||
__asm__ __volatile__ ("mov %0=ar.itc" : "=r"(clocks));
|
__asm__ __volatile__ ("mov %0=ar.itc" : "=r"(clocks));
|
||||||
return clocks;
|
return clocks;
|
||||||
}
|
}
|
||||||
|
#define RPCC_DEFINED
|
||||||
|
|
||||||
|
|
||||||
static __inline unsigned long stmxcsr(void){
|
static __inline unsigned long stmxcsr(void){
|
||||||
|
@ -99,10 +101,12 @@ static __inline void blas_lock(volatile unsigned long *address){
|
||||||
while (*address || _InterlockedCompareExchange((volatile int *) address,1,0))
|
while (*address || _InterlockedCompareExchange((volatile int *) address,1,0))
|
||||||
;
|
;
|
||||||
}
|
}
|
||||||
|
#define BLAS_LOCK_DEFINED
|
||||||
|
|
||||||
static __inline unsigned int rpcc(void) {
|
static __inline unsigned int rpcc(void) {
|
||||||
return __getReg(_IA64_REG_AR_ITC);
|
return __getReg(_IA64_REG_AR_ITC);
|
||||||
}
|
}
|
||||||
|
#define RPCC_DEFINED
|
||||||
|
|
||||||
static __inline unsigned int stmxcsr(void) {
|
static __inline unsigned int stmxcsr(void) {
|
||||||
return __getReg(_IA64_REG_AR_FPSR);
|
return __getReg(_IA64_REG_AR_FPSR);
|
||||||
|
|
|
@ -98,6 +98,7 @@ static void INLINE blas_lock(volatile unsigned long *address){
|
||||||
|
|
||||||
} while (ret);
|
} while (ret);
|
||||||
}
|
}
|
||||||
|
#define BLAS_LOCK_DEFINED
|
||||||
|
|
||||||
static inline unsigned int rpcc(void){
|
static inline unsigned int rpcc(void){
|
||||||
unsigned long ret;
|
unsigned long ret;
|
||||||
|
@ -118,6 +119,7 @@ static inline unsigned int rpcc(void){
|
||||||
#endif
|
#endif
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
#define RPCC_DEFINED
|
||||||
|
|
||||||
#if defined(LOONGSON3A) || defined(LOONGSON3B)
|
#if defined(LOONGSON3A) || defined(LOONGSON3B)
|
||||||
#ifndef NO_AFFINITY
|
#ifndef NO_AFFINITY
|
||||||
|
|
|
@ -87,6 +87,7 @@ static void INLINE blas_lock(volatile unsigned long *address){
|
||||||
#endif
|
#endif
|
||||||
} while (ret);
|
} while (ret);
|
||||||
}
|
}
|
||||||
|
#define BLAS_LOCK_DEFINED
|
||||||
|
|
||||||
static inline unsigned long rpcc(void){
|
static inline unsigned long rpcc(void){
|
||||||
unsigned long ret;
|
unsigned long ret;
|
||||||
|
@ -103,6 +104,7 @@ static inline unsigned long rpcc(void){
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
}
|
}
|
||||||
|
#define RPCC_DEFINED
|
||||||
|
|
||||||
#ifdef __64BIT__
|
#ifdef __64BIT__
|
||||||
#define RPCC64BIT
|
#define RPCC64BIT
|
||||||
|
|
|
@ -58,6 +58,7 @@ static void __inline blas_lock(volatile unsigned long *address){
|
||||||
: "memory");
|
: "memory");
|
||||||
} while (ret);
|
} while (ret);
|
||||||
}
|
}
|
||||||
|
#define BLAS_LOCK_DEFINED
|
||||||
|
|
||||||
static __inline unsigned long rpcc(void){
|
static __inline unsigned long rpcc(void){
|
||||||
unsigned long clocks;
|
unsigned long clocks;
|
||||||
|
@ -66,6 +67,7 @@ static __inline unsigned long rpcc(void){
|
||||||
|
|
||||||
return clocks;
|
return clocks;
|
||||||
};
|
};
|
||||||
|
#define RPCC_DEFINED
|
||||||
|
|
||||||
#ifdef __64BIT__
|
#ifdef __64BIT__
|
||||||
#define RPCC64BIT
|
#define RPCC64BIT
|
||||||
|
|
|
@ -65,6 +65,7 @@ static void __inline blas_lock(volatile BLASULONG *address){
|
||||||
} while (ret);
|
} while (ret);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
#define BLAS_LOCK_DEFINED
|
||||||
|
|
||||||
static __inline unsigned long long rpcc(void){
|
static __inline unsigned long long rpcc(void){
|
||||||
unsigned int a, d;
|
unsigned int a, d;
|
||||||
|
@ -73,6 +74,7 @@ static __inline unsigned long long rpcc(void){
|
||||||
|
|
||||||
return ((unsigned long long)a + ((unsigned long long)d << 32));
|
return ((unsigned long long)a + ((unsigned long long)d << 32));
|
||||||
};
|
};
|
||||||
|
#define RPCC_DEFINED
|
||||||
|
|
||||||
static __inline unsigned long getstackaddr(void){
|
static __inline unsigned long getstackaddr(void){
|
||||||
unsigned long addr;
|
unsigned long addr;
|
||||||
|
|
|
@ -74,6 +74,7 @@ static void __inline blas_lock(volatile BLASULONG *address){
|
||||||
|
|
||||||
} while (ret);
|
} while (ret);
|
||||||
}
|
}
|
||||||
|
#define BLAS_LOCK_DEFINED
|
||||||
|
|
||||||
static __inline BLASULONG rpcc(void){
|
static __inline BLASULONG rpcc(void){
|
||||||
BLASULONG a, d;
|
BLASULONG a, d;
|
||||||
|
@ -82,6 +83,7 @@ static __inline BLASULONG rpcc(void){
|
||||||
|
|
||||||
return ((BLASULONG)a + ((BLASULONG)d << 32));
|
return ((BLASULONG)a + ((BLASULONG)d << 32));
|
||||||
}
|
}
|
||||||
|
#define RPCC_DEFINED
|
||||||
|
|
||||||
#define RPCC64BIT
|
#define RPCC64BIT
|
||||||
|
|
||||||
|
|
|
@ -192,6 +192,7 @@ void get_cpuconfig(void)
|
||||||
{
|
{
|
||||||
case CPU_CORTEXA9:
|
case CPU_CORTEXA9:
|
||||||
printf("#define CORTEXA9\n");
|
printf("#define CORTEXA9\n");
|
||||||
|
printf("#define ARMV7\n");
|
||||||
printf("#define HAVE_VFP\n");
|
printf("#define HAVE_VFP\n");
|
||||||
printf("#define HAVE_VFPV3\n");
|
printf("#define HAVE_VFPV3\n");
|
||||||
if ( get_feature("neon")) printf("#define HAVE_NEON\n");
|
if ( get_feature("neon")) printf("#define HAVE_NEON\n");
|
||||||
|
@ -207,6 +208,7 @@ void get_cpuconfig(void)
|
||||||
|
|
||||||
case CPU_CORTEXA15:
|
case CPU_CORTEXA15:
|
||||||
printf("#define CORTEXA15\n");
|
printf("#define CORTEXA15\n");
|
||||||
|
printf("#define ARMV7\n");
|
||||||
printf("#define HAVE_VFP\n");
|
printf("#define HAVE_VFP\n");
|
||||||
printf("#define HAVE_VFPV3\n");
|
printf("#define HAVE_VFPV3\n");
|
||||||
if ( get_feature("neon")) printf("#define HAVE_NEON\n");
|
if ( get_feature("neon")) printf("#define HAVE_NEON\n");
|
||||||
|
|
|
@ -425,6 +425,10 @@ static int blas_thread_server(void *arg){
|
||||||
main_status[cpu] = MAIN_FINISH;
|
main_status[cpu] = MAIN_FINISH;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
// arm: make sure all results are written out _before_
|
||||||
|
// thread is marked as done and other threads use them
|
||||||
|
WMB;
|
||||||
|
|
||||||
thread_status[cpu].queue = (blas_queue_t * volatile) ((long)thread_status[cpu].queue & 0); /* Need a trick */
|
thread_status[cpu].queue = (blas_queue_t * volatile) ((long)thread_status[cpu].queue & 0); /* Need a trick */
|
||||||
WMB;
|
WMB;
|
||||||
|
|
||||||
|
@ -775,7 +779,12 @@ int exec_blas(BLASLONG num, blas_queue_t *queue){
|
||||||
stop = rpcc();
|
stop = rpcc();
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if ((num > 1) && queue -> next) exec_blas_async_wait(num - 1, queue -> next);
|
if ((num > 1) && queue -> next) {
|
||||||
|
exec_blas_async_wait(num - 1, queue -> next);
|
||||||
|
|
||||||
|
// arm: make sure results from other threads are visible
|
||||||
|
MB;
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef TIMING_DEBUG
|
#ifdef TIMING_DEBUG
|
||||||
fprintf(STDERR, "Thread[0] : %16lu %16lu (%8lu cycles)\n",
|
fprintf(STDERR, "Thread[0] : %16lu %16lu (%8lu cycles)\n",
|
||||||
|
|
|
@ -1153,6 +1153,9 @@ void blas_memory_free(void *free_area){
|
||||||
printf(" Position : %d\n", position);
|
printf(" Position : %d\n", position);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
// arm: ensure all writes are finished before other thread takes this memory
|
||||||
|
WMB;
|
||||||
|
|
||||||
memory[position].used = 0;
|
memory[position].used = 0;
|
||||||
|
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
|
|
|
@ -798,8 +798,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define ARCHCONFIG "-DARMV5 " \
|
#define ARCHCONFIG "-DARMV5 " \
|
||||||
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \
|
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \
|
||||||
"-DL2_SIZE=512488 -DL2_LINESIZE=32 " \
|
"-DL2_SIZE=512488 -DL2_LINESIZE=32 " \
|
||||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " \
|
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 "
|
||||||
"-DHAVE_VFP"
|
|
||||||
#define LIBNAME "armv5"
|
#define LIBNAME "armv5"
|
||||||
#define CORENAME "ARMV5"
|
#define CORENAME "ARMV5"
|
||||||
#else
|
#else
|
||||||
|
|
Loading…
Reference in New Issue