Add missing barriers in gemm scheduler
a few places in the gemm scheduler code were missing barriers; the code likely worked OK due to heavy use of volatile / _Atomic but there's no reason to get this incorrect
This commit is contained in:
parent
6eb4b9ae7c
commit
73de17664d
|
@ -347,7 +347,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
|
||||||
/* Make sure if no one is using workspace */
|
/* Make sure if no one is using workspace */
|
||||||
START_RPCC();
|
START_RPCC();
|
||||||
for (i = 0; i < args -> nthreads; i++)
|
for (i = 0; i < args -> nthreads; i++)
|
||||||
while (job[mypos].working[i][CACHE_LINE_SIZE * bufferside]) {YIELDING;};
|
while (job[mypos].working[i][CACHE_LINE_SIZE * bufferside]) {YIELDING;MB;};
|
||||||
STOP_RPCC(waiting1);
|
STOP_RPCC(waiting1);
|
||||||
|
|
||||||
#if defined(FUSED_GEMM) && !defined(TIMING)
|
#if defined(FUSED_GEMM) && !defined(TIMING)
|
||||||
|
@ -409,7 +409,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
|
||||||
|
|
||||||
/* Wait until other region of B is initialized */
|
/* Wait until other region of B is initialized */
|
||||||
START_RPCC();
|
START_RPCC();
|
||||||
while(job[current].working[mypos][CACHE_LINE_SIZE * bufferside] == 0) {YIELDING;};
|
while(job[current].working[mypos][CACHE_LINE_SIZE * bufferside] == 0) {YIELDING;MB;};
|
||||||
STOP_RPCC(waiting2);
|
STOP_RPCC(waiting2);
|
||||||
|
|
||||||
/* Apply kernel with local region of A and part of other region of B */
|
/* Apply kernel with local region of A and part of other region of B */
|
||||||
|
@ -427,6 +427,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
|
||||||
/* Clear synchronization flag if this thread is done with other region of B */
|
/* Clear synchronization flag if this thread is done with other region of B */
|
||||||
if (m_to - m_from == min_i) {
|
if (m_to - m_from == min_i) {
|
||||||
job[current].working[mypos][CACHE_LINE_SIZE * bufferside] &= 0;
|
job[current].working[mypos][CACHE_LINE_SIZE * bufferside] &= 0;
|
||||||
|
WMB;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} while (current != mypos);
|
} while (current != mypos);
|
||||||
|
@ -488,7 +489,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
|
||||||
START_RPCC();
|
START_RPCC();
|
||||||
for (i = 0; i < args -> nthreads; i++) {
|
for (i = 0; i < args -> nthreads; i++) {
|
||||||
for (js = 0; js < DIVIDE_RATE; js++) {
|
for (js = 0; js < DIVIDE_RATE; js++) {
|
||||||
while (job[mypos].working[i][CACHE_LINE_SIZE * js] ) {YIELDING;};
|
while (job[mypos].working[i][CACHE_LINE_SIZE * js] ) {YIELDING;MB;};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
STOP_RPCC(waiting3);
|
STOP_RPCC(waiting3);
|
||||||
|
|
Loading…
Reference in New Issue