Revert "Rewrite &= -> = and simplify the initial blocking phase."
This commit is contained in:
parent
a83f01e0ee
commit
5f2a3c05cd
|
@ -344,6 +344,12 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
|
||||||
div_n = (n_to - n_from + DIVIDE_RATE - 1) / DIVIDE_RATE;
|
div_n = (n_to - n_from + DIVIDE_RATE - 1) / DIVIDE_RATE;
|
||||||
for (js = n_from, bufferside = 0; js < n_to; js += div_n, bufferside ++) {
|
for (js = n_from, bufferside = 0; js < n_to; js += div_n, bufferside ++) {
|
||||||
|
|
||||||
|
/* Make sure if no one is using workspace */
|
||||||
|
START_RPCC();
|
||||||
|
for (i = 0; i < args -> nthreads; i++)
|
||||||
|
while (job[mypos].working[i][CACHE_LINE_SIZE * bufferside]) {YIELDING;MB;};
|
||||||
|
STOP_RPCC(waiting1);
|
||||||
|
|
||||||
#if defined(FUSED_GEMM) && !defined(TIMING)
|
#if defined(FUSED_GEMM) && !defined(TIMING)
|
||||||
|
|
||||||
/* Fused operation to copy region of B into workspace and apply kernel */
|
/* Fused operation to copy region of B into workspace and apply kernel */
|
||||||
|
@ -381,16 +387,11 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
for (i = mypos_n * nthreads_m; i < (mypos_n + 1) * nthreads_m; i++) {
|
|
||||||
/* Make sure if no one is using workspace */
|
|
||||||
START_RPCC();
|
|
||||||
while (job[mypos].working[i][CACHE_LINE_SIZE * bufferside]) {YIELDING;MB;};
|
|
||||||
STOP_RPCC(waiting1);
|
|
||||||
/* Set flag so other threads can access local region of B */
|
/* Set flag so other threads can access local region of B */
|
||||||
|
for (i = mypos_n * nthreads_m; i < (mypos_n + 1) * nthreads_m; i++)
|
||||||
job[mypos].working[i][CACHE_LINE_SIZE * bufferside] = (BLASLONG)buffer[bufferside];
|
job[mypos].working[i][CACHE_LINE_SIZE * bufferside] = (BLASLONG)buffer[bufferside];
|
||||||
WMB;
|
WMB;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
/* Get regions of B from other threads and apply kernel */
|
/* Get regions of B from other threads and apply kernel */
|
||||||
current = mypos;
|
current = mypos;
|
||||||
|
@ -425,7 +426,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
|
||||||
|
|
||||||
/* Clear synchronization flag if this thread is done with other region of B */
|
/* Clear synchronization flag if this thread is done with other region of B */
|
||||||
if (m_to - m_from == min_i) {
|
if (m_to - m_from == min_i) {
|
||||||
job[current].working[mypos][CACHE_LINE_SIZE * bufferside] = 0;
|
job[current].working[mypos][CACHE_LINE_SIZE * bufferside] &= 0;
|
||||||
WMB;
|
WMB;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -468,7 +469,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
|
||||||
|
|
||||||
/* Clear synchronization flag if this thread is done with region of B */
|
/* Clear synchronization flag if this thread is done with region of B */
|
||||||
if (is + min_i >= m_to) {
|
if (is + min_i >= m_to) {
|
||||||
job[current].working[mypos][CACHE_LINE_SIZE * bufferside] = 0;
|
job[current].working[mypos][CACHE_LINE_SIZE * bufferside] &= 0;
|
||||||
WMB;
|
WMB;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue