Fix thread data races detected by helgrind 3.12
Ref. #995, may possibly help solve issues seen in 660,883
This commit is contained in:
parent
c61a7cd293
commit
87c7d10b34
|
@ -276,6 +276,9 @@ static void* blas_thread_server(void *arg){
|
||||||
unsigned int last_tick;
|
unsigned int last_tick;
|
||||||
void *buffer, *sa, *sb;
|
void *buffer, *sa, *sb;
|
||||||
blas_queue_t *queue;
|
blas_queue_t *queue;
|
||||||
|
|
||||||
|
blas_queue_t *tscq;
|
||||||
|
|
||||||
#ifdef TIMING_DEBUG
|
#ifdef TIMING_DEBUG
|
||||||
unsigned long start, stop;
|
unsigned long start, stop;
|
||||||
#endif
|
#endif
|
||||||
|
@ -309,8 +312,11 @@ static void* blas_thread_server(void *arg){
|
||||||
|
|
||||||
last_tick = (unsigned int)rpcc();
|
last_tick = (unsigned int)rpcc();
|
||||||
|
|
||||||
while (!thread_status[cpu].queue) {
|
pthread_mutex_lock (&thread_status[cpu].lock);
|
||||||
|
tscq=thread_status[cpu].queue;
|
||||||
|
pthread_mutex_unlock (&thread_status[cpu].lock);
|
||||||
|
|
||||||
|
while(!tscq) {
|
||||||
YIELDING;
|
YIELDING;
|
||||||
|
|
||||||
if ((unsigned int)rpcc() - last_tick > thread_timeout) {
|
if ((unsigned int)rpcc() - last_tick > thread_timeout) {
|
||||||
|
@ -333,6 +339,9 @@ static void* blas_thread_server(void *arg){
|
||||||
|
|
||||||
last_tick = (unsigned int)rpcc();
|
last_tick = (unsigned int)rpcc();
|
||||||
}
|
}
|
||||||
|
pthread_mutex_lock (&thread_status[cpu].lock);
|
||||||
|
tscq=thread_status[cpu].queue;
|
||||||
|
pthread_mutex_unlock (&thread_status[cpu].lock);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -351,7 +360,9 @@ static void* blas_thread_server(void *arg){
|
||||||
if (queue) {
|
if (queue) {
|
||||||
int (*routine)(blas_arg_t *, void *, void *, void *, void *, BLASLONG) = queue -> routine;
|
int (*routine)(blas_arg_t *, void *, void *, void *, void *, BLASLONG) = queue -> routine;
|
||||||
|
|
||||||
|
pthread_mutex_lock (&thread_status[cpu].lock);
|
||||||
thread_status[cpu].queue = (blas_queue_t *)1;
|
thread_status[cpu].queue = (blas_queue_t *)1;
|
||||||
|
pthread_mutex_unlock (&thread_status[cpu].lock);
|
||||||
|
|
||||||
sa = queue -> sa;
|
sa = queue -> sa;
|
||||||
sb = queue -> sb;
|
sb = queue -> sb;
|
||||||
|
@ -433,7 +444,10 @@ static void* blas_thread_server(void *arg){
|
||||||
// thread is marked as done and other threads use them
|
// thread is marked as done and other threads use them
|
||||||
WMB;
|
WMB;
|
||||||
|
|
||||||
|
pthread_mutex_lock (&thread_status[cpu].lock);
|
||||||
thread_status[cpu].queue = (blas_queue_t * volatile) ((long)thread_status[cpu].queue & 0); /* Need a trick */
|
thread_status[cpu].queue = (blas_queue_t * volatile) ((long)thread_status[cpu].queue & 0); /* Need a trick */
|
||||||
|
pthread_mutex_unlock (&thread_status[cpu].lock);
|
||||||
|
|
||||||
WMB;
|
WMB;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -613,6 +627,7 @@ int exec_blas_async(BLASLONG pos, blas_queue_t *queue){
|
||||||
#endif
|
#endif
|
||||||
BLASLONG i = 0;
|
BLASLONG i = 0;
|
||||||
blas_queue_t *current = queue;
|
blas_queue_t *current = queue;
|
||||||
|
blas_queue_t *tsiq,*tspq;
|
||||||
#if defined(OS_LINUX) && !defined(NO_AFFINITY) && !defined(PARAMTEST)
|
#if defined(OS_LINUX) && !defined(NO_AFFINITY) && !defined(PARAMTEST)
|
||||||
int node = get_node();
|
int node = get_node();
|
||||||
int nodes = get_num_nodes();
|
int nodes = get_num_nodes();
|
||||||
|
@ -660,15 +675,23 @@ int exec_blas_async(BLASLONG pos, blas_queue_t *queue){
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
while(thread_status[i].queue) {
|
pthread_mutex_lock (&thread_status[i].lock);
|
||||||
|
tsiq=thread_status[i].queue ;
|
||||||
|
pthread_mutex_unlock (&thread_status[i].lock);
|
||||||
|
while(tsiq) {
|
||||||
i ++;
|
i ++;
|
||||||
if (i >= blas_num_threads - 1) i = 0;
|
if (i >= blas_num_threads - 1) i = 0;
|
||||||
|
pthread_mutex_lock (&thread_status[i].lock);
|
||||||
|
tsiq=thread_status[i].queue ;
|
||||||
|
pthread_mutex_unlock (&thread_status[i].lock);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
queue -> assigned = i;
|
queue -> assigned = i;
|
||||||
WMB;
|
WMB;
|
||||||
|
pthread_mutex_lock (&thread_status[i].lock);
|
||||||
thread_status[i].queue = queue;
|
thread_status[i].queue = queue;
|
||||||
|
pthread_mutex_unlock (&thread_status[i].lock);
|
||||||
WMB;
|
WMB;
|
||||||
|
|
||||||
queue = queue -> next;
|
queue = queue -> next;
|
||||||
|
@ -689,11 +712,15 @@ int exec_blas_async(BLASLONG pos, blas_queue_t *queue){
|
||||||
|
|
||||||
pos = current -> assigned;
|
pos = current -> assigned;
|
||||||
|
|
||||||
if ((BLASULONG)thread_status[pos].queue > 1) {
|
pthread_mutex_lock (&thread_status[pos].lock);
|
||||||
|
tspq=thread_status[pos].queue;
|
||||||
|
pthread_mutex_unlock (&thread_status[pos].lock);
|
||||||
|
|
||||||
|
if ((BLASULONG)tspq > 1) {
|
||||||
|
pthread_mutex_lock (&thread_status[pos].lock);
|
||||||
|
|
||||||
if (thread_status[pos].status == THREAD_STATUS_SLEEP) {
|
if (thread_status[pos].status == THREAD_STATUS_SLEEP) {
|
||||||
|
|
||||||
pthread_mutex_lock (&thread_status[pos].lock);
|
|
||||||
|
|
||||||
#ifdef MONITOR
|
#ifdef MONITOR
|
||||||
num_suspend ++;
|
num_suspend ++;
|
||||||
|
@ -703,8 +730,9 @@ int exec_blas_async(BLASLONG pos, blas_queue_t *queue){
|
||||||
thread_status[pos].status = THREAD_STATUS_WAKEUP;
|
thread_status[pos].status = THREAD_STATUS_WAKEUP;
|
||||||
pthread_cond_signal(&thread_status[pos].wakeup);
|
pthread_cond_signal(&thread_status[pos].wakeup);
|
||||||
}
|
}
|
||||||
pthread_mutex_unlock(&thread_status[pos].lock);
|
|
||||||
}
|
}
|
||||||
|
pthread_mutex_unlock(&thread_status[pos].lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
current = current -> next;
|
current = current -> next;
|
||||||
|
@ -714,11 +742,22 @@ int exec_blas_async(BLASLONG pos, blas_queue_t *queue){
|
||||||
}
|
}
|
||||||
|
|
||||||
int exec_blas_async_wait(BLASLONG num, blas_queue_t *queue){
|
int exec_blas_async_wait(BLASLONG num, blas_queue_t *queue){
|
||||||
|
blas_queue_t * tsqq;
|
||||||
|
|
||||||
while ((num > 0) && queue) {
|
while ((num > 0) && queue) {
|
||||||
|
|
||||||
while(thread_status[queue -> assigned].queue) {
|
pthread_mutex_lock(&thread_status[queue->assigned].lock);
|
||||||
|
tsqq=thread_status[queue -> assigned].queue;
|
||||||
|
pthread_mutex_unlock(&thread_status[queue->assigned].lock);
|
||||||
|
|
||||||
|
|
||||||
|
while(tsqq) {
|
||||||
YIELDING;
|
YIELDING;
|
||||||
|
pthread_mutex_lock(&thread_status[queue->assigned].lock);
|
||||||
|
tsqq=thread_status[queue -> assigned].queue;
|
||||||
|
pthread_mutex_unlock(&thread_status[queue->assigned].lock);
|
||||||
|
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
queue = queue -> next;
|
queue = queue -> next;
|
||||||
|
|
|
@ -390,6 +390,16 @@ static void alloc_mmap_free(struct release_t *release){
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Global lock for memory allocation */
|
||||||
|
|
||||||
|
#if defined(USE_PTHREAD_LOCK)
|
||||||
|
static pthread_mutex_t alloc_lock = PTHREAD_MUTEX_INITIALIZER;
|
||||||
|
#elif defined(USE_PTHREAD_SPINLOCK)
|
||||||
|
static pthread_spinlock_t alloc_lock = 0;
|
||||||
|
#else
|
||||||
|
static BLASULONG alloc_lock = 0UL;
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef NO_WARMUP
|
#ifdef NO_WARMUP
|
||||||
|
|
||||||
static void *alloc_mmap(void *address){
|
static void *alloc_mmap(void *address){
|
||||||
|
@ -406,9 +416,11 @@ static void *alloc_mmap(void *address){
|
||||||
}
|
}
|
||||||
|
|
||||||
if (map_address != (void *)-1) {
|
if (map_address != (void *)-1) {
|
||||||
|
LOCK_COMMAND(&alloc_lock);
|
||||||
release_info[release_pos].address = map_address;
|
release_info[release_pos].address = map_address;
|
||||||
release_info[release_pos].func = alloc_mmap_free;
|
release_info[release_pos].func = alloc_mmap_free;
|
||||||
release_pos ++;
|
release_pos ++;
|
||||||
|
UNLOCK_COMMAND(&alloc_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef OS_LINUX
|
#ifdef OS_LINUX
|
||||||
|
@ -550,12 +562,14 @@ static void *alloc_mmap(void *address){
|
||||||
#if defined(OS_LINUX) && !defined(NO_WARMUP)
|
#if defined(OS_LINUX) && !defined(NO_WARMUP)
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
LOCK_COMMAND(&alloc_lock);
|
||||||
|
|
||||||
if (map_address != (void *)-1) {
|
if (map_address != (void *)-1) {
|
||||||
release_info[release_pos].address = map_address;
|
release_info[release_pos].address = map_address;
|
||||||
release_info[release_pos].func = alloc_mmap_free;
|
release_info[release_pos].func = alloc_mmap_free;
|
||||||
release_pos ++;
|
release_pos ++;
|
||||||
}
|
}
|
||||||
|
UNLOCK_COMMAND(&alloc_lock);
|
||||||
|
|
||||||
return map_address;
|
return map_address;
|
||||||
}
|
}
|
||||||
|
@ -889,15 +903,6 @@ static void *alloc_hugetlbfile(void *address){
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* Global lock for memory allocation */
|
|
||||||
|
|
||||||
#if defined(USE_PTHREAD_LOCK)
|
|
||||||
static pthread_mutex_t alloc_lock = PTHREAD_MUTEX_INITIALIZER;
|
|
||||||
#elif defined(USE_PTHREAD_SPINLOCK)
|
|
||||||
static pthread_spinlock_t alloc_lock = 0;
|
|
||||||
#else
|
|
||||||
static BLASULONG alloc_lock = 0UL;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef SEEK_ADDRESS
|
#ifdef SEEK_ADDRESS
|
||||||
static BLASULONG base_address = 0UL;
|
static BLASULONG base_address = 0UL;
|
||||||
|
@ -963,9 +968,6 @@ void *blas_memory_alloc(int procpos){
|
||||||
NULL,
|
NULL,
|
||||||
};
|
};
|
||||||
void *(**func)(void *address);
|
void *(**func)(void *address);
|
||||||
|
|
||||||
if (!memory_initialized) {
|
|
||||||
|
|
||||||
LOCK_COMMAND(&alloc_lock);
|
LOCK_COMMAND(&alloc_lock);
|
||||||
|
|
||||||
if (!memory_initialized) {
|
if (!memory_initialized) {
|
||||||
|
@ -998,10 +1000,9 @@ void *blas_memory_alloc(int procpos){
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
memory_initialized = 1;
|
memory_initialized = 1;
|
||||||
}
|
|
||||||
|
|
||||||
UNLOCK_COMMAND(&alloc_lock);
|
|
||||||
}
|
}
|
||||||
|
UNLOCK_COMMAND(&alloc_lock);
|
||||||
|
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
printf("Alloc Start ...\n");
|
printf("Alloc Start ...\n");
|
||||||
|
@ -1034,14 +1035,14 @@ void *blas_memory_alloc(int procpos){
|
||||||
position = 0;
|
position = 0;
|
||||||
|
|
||||||
do {
|
do {
|
||||||
if (!memory[position].used) {
|
/* if (!memory[position].used) { */
|
||||||
|
|
||||||
blas_lock(&memory[position].lock);
|
blas_lock(&memory[position].lock);
|
||||||
|
|
||||||
if (!memory[position].used) goto allocation;
|
if (!memory[position].used) goto allocation;
|
||||||
|
|
||||||
blas_unlock(&memory[position].lock);
|
blas_unlock(&memory[position].lock);
|
||||||
}
|
/* } */
|
||||||
|
|
||||||
position ++;
|
position ++;
|
||||||
|
|
||||||
|
@ -1103,7 +1104,9 @@ void *blas_memory_alloc(int procpos){
|
||||||
|
|
||||||
} while ((BLASLONG)map_address == -1);
|
} while ((BLASLONG)map_address == -1);
|
||||||
|
|
||||||
|
LOCK_COMMAND(&alloc_lock);
|
||||||
memory[position].addr = map_address;
|
memory[position].addr = map_address;
|
||||||
|
UNLOCK_COMMAND(&alloc_lock);
|
||||||
|
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
printf(" Mapping Succeeded. %p(%d)\n", (void *)memory[position].addr, position);
|
printf(" Mapping Succeeded. %p(%d)\n", (void *)memory[position].addr, position);
|
||||||
|
@ -1157,6 +1160,7 @@ void blas_memory_free(void *free_area){
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
position = 0;
|
position = 0;
|
||||||
|
LOCK_COMMAND(&alloc_lock);
|
||||||
|
|
||||||
while ((memory[position].addr != free_area)
|
while ((memory[position].addr != free_area)
|
||||||
&& (position < NUM_BUFFERS)) position++;
|
&& (position < NUM_BUFFERS)) position++;
|
||||||
|
@ -1171,6 +1175,7 @@ void blas_memory_free(void *free_area){
|
||||||
WMB;
|
WMB;
|
||||||
|
|
||||||
memory[position].used = 0;
|
memory[position].used = 0;
|
||||||
|
UNLOCK_COMMAND(&alloc_lock);
|
||||||
|
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
printf("Unmap Succeeded.\n\n");
|
printf("Unmap Succeeded.\n\n");
|
||||||
|
@ -1185,6 +1190,7 @@ void blas_memory_free(void *free_area){
|
||||||
for (position = 0; position < NUM_BUFFERS; position++)
|
for (position = 0; position < NUM_BUFFERS; position++)
|
||||||
printf("%4ld %p : %d\n", position, memory[position].addr, memory[position].used);
|
printf("%4ld %p : %d\n", position, memory[position].addr, memory[position].used);
|
||||||
#endif
|
#endif
|
||||||
|
UNLOCK_COMMAND(&alloc_lock);
|
||||||
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue