Fix thread data races detected by helgrind 3.12

Ref. #995, may possibly help solve issues seen in 660,883
This commit is contained in:
Martin Kroeker 2017-01-08 23:33:51 +01:00 committed by GitHub
parent c61a7cd293
commit 87c7d10b34
2 changed files with 79 additions and 34 deletions

View File

@ -276,6 +276,9 @@ static void* blas_thread_server(void *arg){
unsigned int last_tick; unsigned int last_tick;
void *buffer, *sa, *sb; void *buffer, *sa, *sb;
blas_queue_t *queue; blas_queue_t *queue;
blas_queue_t *tscq;
#ifdef TIMING_DEBUG #ifdef TIMING_DEBUG
unsigned long start, stop; unsigned long start, stop;
#endif #endif
@ -309,8 +312,11 @@ static void* blas_thread_server(void *arg){
last_tick = (unsigned int)rpcc(); last_tick = (unsigned int)rpcc();
while (!thread_status[cpu].queue) { pthread_mutex_lock (&thread_status[cpu].lock);
tscq=thread_status[cpu].queue;
pthread_mutex_unlock (&thread_status[cpu].lock);
while(!tscq) {
YIELDING; YIELDING;
if ((unsigned int)rpcc() - last_tick > thread_timeout) { if ((unsigned int)rpcc() - last_tick > thread_timeout) {
@ -333,6 +339,9 @@ static void* blas_thread_server(void *arg){
last_tick = (unsigned int)rpcc(); last_tick = (unsigned int)rpcc();
} }
pthread_mutex_lock (&thread_status[cpu].lock);
tscq=thread_status[cpu].queue;
pthread_mutex_unlock (&thread_status[cpu].lock);
} }
@ -351,7 +360,9 @@ static void* blas_thread_server(void *arg){
if (queue) { if (queue) {
int (*routine)(blas_arg_t *, void *, void *, void *, void *, BLASLONG) = queue -> routine; int (*routine)(blas_arg_t *, void *, void *, void *, void *, BLASLONG) = queue -> routine;
pthread_mutex_lock (&thread_status[cpu].lock);
thread_status[cpu].queue = (blas_queue_t *)1; thread_status[cpu].queue = (blas_queue_t *)1;
pthread_mutex_unlock (&thread_status[cpu].lock);
sa = queue -> sa; sa = queue -> sa;
sb = queue -> sb; sb = queue -> sb;
@ -433,7 +444,10 @@ static void* blas_thread_server(void *arg){
// thread is marked as done and other threads use them // thread is marked as done and other threads use them
WMB; WMB;
pthread_mutex_lock (&thread_status[cpu].lock);
thread_status[cpu].queue = (blas_queue_t * volatile) ((long)thread_status[cpu].queue & 0); /* Need a trick */ thread_status[cpu].queue = (blas_queue_t * volatile) ((long)thread_status[cpu].queue & 0); /* Need a trick */
pthread_mutex_unlock (&thread_status[cpu].lock);
WMB; WMB;
} }
@ -613,6 +627,7 @@ int exec_blas_async(BLASLONG pos, blas_queue_t *queue){
#endif #endif
BLASLONG i = 0; BLASLONG i = 0;
blas_queue_t *current = queue; blas_queue_t *current = queue;
blas_queue_t *tsiq,*tspq;
#if defined(OS_LINUX) && !defined(NO_AFFINITY) && !defined(PARAMTEST) #if defined(OS_LINUX) && !defined(NO_AFFINITY) && !defined(PARAMTEST)
int node = get_node(); int node = get_node();
int nodes = get_num_nodes(); int nodes = get_num_nodes();
@ -660,15 +675,23 @@ int exec_blas_async(BLASLONG pos, blas_queue_t *queue){
} }
} }
#else #else
while(thread_status[i].queue) { pthread_mutex_lock (&thread_status[i].lock);
tsiq=thread_status[i].queue ;
pthread_mutex_unlock (&thread_status[i].lock);
while(tsiq) {
i ++; i ++;
if (i >= blas_num_threads - 1) i = 0; if (i >= blas_num_threads - 1) i = 0;
pthread_mutex_lock (&thread_status[i].lock);
tsiq=thread_status[i].queue ;
pthread_mutex_unlock (&thread_status[i].lock);
} }
#endif #endif
queue -> assigned = i; queue -> assigned = i;
WMB; WMB;
pthread_mutex_lock (&thread_status[i].lock);
thread_status[i].queue = queue; thread_status[i].queue = queue;
pthread_mutex_unlock (&thread_status[i].lock);
WMB; WMB;
queue = queue -> next; queue = queue -> next;
@ -689,11 +712,15 @@ int exec_blas_async(BLASLONG pos, blas_queue_t *queue){
pos = current -> assigned; pos = current -> assigned;
if ((BLASULONG)thread_status[pos].queue > 1) { pthread_mutex_lock (&thread_status[pos].lock);
tspq=thread_status[pos].queue;
pthread_mutex_unlock (&thread_status[pos].lock);
if ((BLASULONG)tspq > 1) {
pthread_mutex_lock (&thread_status[pos].lock);
if (thread_status[pos].status == THREAD_STATUS_SLEEP) { if (thread_status[pos].status == THREAD_STATUS_SLEEP) {
pthread_mutex_lock (&thread_status[pos].lock);
#ifdef MONITOR #ifdef MONITOR
num_suspend ++; num_suspend ++;
@ -703,8 +730,9 @@ int exec_blas_async(BLASLONG pos, blas_queue_t *queue){
thread_status[pos].status = THREAD_STATUS_WAKEUP; thread_status[pos].status = THREAD_STATUS_WAKEUP;
pthread_cond_signal(&thread_status[pos].wakeup); pthread_cond_signal(&thread_status[pos].wakeup);
} }
pthread_mutex_unlock(&thread_status[pos].lock);
} }
pthread_mutex_unlock(&thread_status[pos].lock);
} }
current = current -> next; current = current -> next;
@ -714,11 +742,22 @@ int exec_blas_async(BLASLONG pos, blas_queue_t *queue){
} }
int exec_blas_async_wait(BLASLONG num, blas_queue_t *queue){ int exec_blas_async_wait(BLASLONG num, blas_queue_t *queue){
blas_queue_t * tsqq;
while ((num > 0) && queue) { while ((num > 0) && queue) {
while(thread_status[queue -> assigned].queue) { pthread_mutex_lock(&thread_status[queue->assigned].lock);
tsqq=thread_status[queue -> assigned].queue;
pthread_mutex_unlock(&thread_status[queue->assigned].lock);
while(tsqq) {
YIELDING; YIELDING;
pthread_mutex_lock(&thread_status[queue->assigned].lock);
tsqq=thread_status[queue -> assigned].queue;
pthread_mutex_unlock(&thread_status[queue->assigned].lock);
}; };
queue = queue -> next; queue = queue -> next;

View File

@ -390,6 +390,16 @@ static void alloc_mmap_free(struct release_t *release){
} }
} }
/* Global lock for memory allocation */
#if defined(USE_PTHREAD_LOCK)
static pthread_mutex_t alloc_lock = PTHREAD_MUTEX_INITIALIZER;
#elif defined(USE_PTHREAD_SPINLOCK)
static pthread_spinlock_t alloc_lock = 0;
#else
static BLASULONG alloc_lock = 0UL;
#endif
#ifdef NO_WARMUP #ifdef NO_WARMUP
static void *alloc_mmap(void *address){ static void *alloc_mmap(void *address){
@ -406,9 +416,11 @@ static void *alloc_mmap(void *address){
} }
if (map_address != (void *)-1) { if (map_address != (void *)-1) {
LOCK_COMMAND(&alloc_lock);
release_info[release_pos].address = map_address; release_info[release_pos].address = map_address;
release_info[release_pos].func = alloc_mmap_free; release_info[release_pos].func = alloc_mmap_free;
release_pos ++; release_pos ++;
UNLOCK_COMMAND(&alloc_lock);
} }
#ifdef OS_LINUX #ifdef OS_LINUX
@ -550,12 +562,14 @@ static void *alloc_mmap(void *address){
#if defined(OS_LINUX) && !defined(NO_WARMUP) #if defined(OS_LINUX) && !defined(NO_WARMUP)
} }
#endif #endif
LOCK_COMMAND(&alloc_lock);
if (map_address != (void *)-1) { if (map_address != (void *)-1) {
release_info[release_pos].address = map_address; release_info[release_pos].address = map_address;
release_info[release_pos].func = alloc_mmap_free; release_info[release_pos].func = alloc_mmap_free;
release_pos ++; release_pos ++;
} }
UNLOCK_COMMAND(&alloc_lock);
return map_address; return map_address;
} }
@ -889,15 +903,6 @@ static void *alloc_hugetlbfile(void *address){
} }
#endif #endif
/* Global lock for memory allocation */
#if defined(USE_PTHREAD_LOCK)
static pthread_mutex_t alloc_lock = PTHREAD_MUTEX_INITIALIZER;
#elif defined(USE_PTHREAD_SPINLOCK)
static pthread_spinlock_t alloc_lock = 0;
#else
static BLASULONG alloc_lock = 0UL;
#endif
#ifdef SEEK_ADDRESS #ifdef SEEK_ADDRESS
static BLASULONG base_address = 0UL; static BLASULONG base_address = 0UL;
@ -963,45 +968,41 @@ void *blas_memory_alloc(int procpos){
NULL, NULL,
}; };
void *(**func)(void *address); void *(**func)(void *address);
LOCK_COMMAND(&alloc_lock);
if (!memory_initialized) { if (!memory_initialized) {
LOCK_COMMAND(&alloc_lock);
if (!memory_initialized) {
#if defined(WHEREAMI) && !defined(USE_OPENMP) #if defined(WHEREAMI) && !defined(USE_OPENMP)
for (position = 0; position < NUM_BUFFERS; position ++){ for (position = 0; position < NUM_BUFFERS; position ++){
memory[position].addr = (void *)0; memory[position].addr = (void *)0;
memory[position].pos = -1; memory[position].pos = -1;
memory[position].used = 0; memory[position].used = 0;
memory[position].lock = 0; memory[position].lock = 0;
} }
#endif #endif
#ifdef DYNAMIC_ARCH #ifdef DYNAMIC_ARCH
gotoblas_dynamic_init(); gotoblas_dynamic_init();
#endif #endif
#if defined(SMP) && defined(OS_LINUX) && !defined(NO_AFFINITY) #if defined(SMP) && defined(OS_LINUX) && !defined(NO_AFFINITY)
gotoblas_affinity_init(); gotoblas_affinity_init();
#endif #endif
#ifdef SMP #ifdef SMP
if (!blas_num_threads) blas_cpu_number = blas_get_cpu_number(); if (!blas_num_threads) blas_cpu_number = blas_get_cpu_number();
#endif #endif
#if defined(ARCH_X86) || defined(ARCH_X86_64) || defined(ARCH_IA64) || defined(ARCH_MIPS64) #if defined(ARCH_X86) || defined(ARCH_X86_64) || defined(ARCH_IA64) || defined(ARCH_MIPS64)
#ifndef DYNAMIC_ARCH #ifndef DYNAMIC_ARCH
blas_set_parameter(); blas_set_parameter();
#endif #endif
#endif #endif
memory_initialized = 1; memory_initialized = 1;
}
UNLOCK_COMMAND(&alloc_lock);
} }
UNLOCK_COMMAND(&alloc_lock);
#ifdef DEBUG #ifdef DEBUG
printf("Alloc Start ...\n"); printf("Alloc Start ...\n");
@ -1034,14 +1035,14 @@ void *blas_memory_alloc(int procpos){
position = 0; position = 0;
do { do {
if (!memory[position].used) { /* if (!memory[position].used) { */
blas_lock(&memory[position].lock); blas_lock(&memory[position].lock);
if (!memory[position].used) goto allocation; if (!memory[position].used) goto allocation;
blas_unlock(&memory[position].lock); blas_unlock(&memory[position].lock);
} /* } */
position ++; position ++;
@ -1103,7 +1104,9 @@ void *blas_memory_alloc(int procpos){
} while ((BLASLONG)map_address == -1); } while ((BLASLONG)map_address == -1);
LOCK_COMMAND(&alloc_lock);
memory[position].addr = map_address; memory[position].addr = map_address;
UNLOCK_COMMAND(&alloc_lock);
#ifdef DEBUG #ifdef DEBUG
printf(" Mapping Succeeded. %p(%d)\n", (void *)memory[position].addr, position); printf(" Mapping Succeeded. %p(%d)\n", (void *)memory[position].addr, position);
@ -1157,6 +1160,7 @@ void blas_memory_free(void *free_area){
#endif #endif
position = 0; position = 0;
LOCK_COMMAND(&alloc_lock);
while ((memory[position].addr != free_area) while ((memory[position].addr != free_area)
&& (position < NUM_BUFFERS)) position++; && (position < NUM_BUFFERS)) position++;
@ -1171,6 +1175,7 @@ void blas_memory_free(void *free_area){
WMB; WMB;
memory[position].used = 0; memory[position].used = 0;
UNLOCK_COMMAND(&alloc_lock);
#ifdef DEBUG #ifdef DEBUG
printf("Unmap Succeeded.\n\n"); printf("Unmap Succeeded.\n\n");
@ -1185,6 +1190,7 @@ void blas_memory_free(void *free_area){
for (position = 0; position < NUM_BUFFERS; position++) for (position = 0; position < NUM_BUFFERS; position++)
printf("%4ld %p : %d\n", position, memory[position].addr, memory[position].used); printf("%4ld %p : %d\n", position, memory[position].addr, memory[position].used);
#endif #endif
UNLOCK_COMMAND(&alloc_lock);
return; return;
} }