use atomic operations as in the corresponding getrf
This commit is contained in:
parent
9af2a9dc3b
commit
2dda40d280
|
@ -105,6 +105,14 @@ typedef struct {
|
||||||
BLASLONG working[MAX_CPU_NUMBER][CACHE_LINE_SIZE * DIVIDE_RATE];
|
BLASLONG working[MAX_CPU_NUMBER][CACHE_LINE_SIZE * DIVIDE_RATE];
|
||||||
} job_t;
|
} job_t;
|
||||||
|
|
||||||
|
#ifdef HAVE_C11
|
||||||
|
#define atomic_load_long(p) __atomic_load_n(p, __ATOMIC_RELAXED)
|
||||||
|
#define atomic_store_long(p, v) __atomic_store_n(p, v, __ATOMIC_RELAXED)
|
||||||
|
#else
|
||||||
|
#define atomic_load_long(p) (BLASLONG)(*(volatile BLASLONG*)(p))
|
||||||
|
#define atomic_store_long(p, v) (*(volatile BLASLONG *)(p)) = (v)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
#ifndef KERNEL_OPERATION
|
#ifndef KERNEL_OPERATION
|
||||||
#ifndef COMPLEX
|
#ifndef COMPLEX
|
||||||
|
@ -233,14 +241,18 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifndef LOWER
|
#ifndef LOWER
|
||||||
|
MB;
|
||||||
for (i = 0; i <= mypos; i++)
|
for (i = 0; i <= mypos; i++)
|
||||||
job[mypos].working[i][CACHE_LINE_SIZE * bufferside] = (BLASLONG)buffer[bufferside];
|
atomic_store_long(&job[mypos].working[i][CACHE_LINE_SIZE * bufferside], (BLASLONG)buffer[bufferside]);
|
||||||
|
// job[mypos].working[i][CACHE_LINE_SIZE * bufferside] = (BLASLONG)buffer[bufferside];
|
||||||
#else
|
#else
|
||||||
|
MB
|
||||||
for (i = mypos; i < args -> nthreads; i++)
|
for (i = mypos; i < args -> nthreads; i++)
|
||||||
job[mypos].working[i][CACHE_LINE_SIZE * bufferside] = (BLASLONG)buffer[bufferside];
|
atomic_store_long(&job[mypos].working[i][CACHE_LINE_SIZE * bufferside], (BLASLONG)buffer[bufferside]);
|
||||||
|
// job[mypos].working[i][CACHE_LINE_SIZE * bufferside] = (BLASLONG)buffer[bufferside];
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
WMB;
|
// WMB;
|
||||||
}
|
}
|
||||||
|
|
||||||
min_i = m_to - m_from;
|
min_i = m_to - m_from;
|
||||||
|
@ -271,14 +283,21 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
|
||||||
for (xxx = range_n[current], bufferside = 0; xxx < range_n[current + 1]; xxx += div_n, bufferside ++) {
|
for (xxx = range_n[current], bufferside = 0; xxx < range_n[current + 1]; xxx += div_n, bufferside ++) {
|
||||||
|
|
||||||
/* thread has to wait */
|
/* thread has to wait */
|
||||||
if (current != mypos) while(job[current].working[mypos][CACHE_LINE_SIZE * bufferside] == 0) {YIELDING;};
|
if (current != mypos)
|
||||||
|
do {
|
||||||
|
jw = atomic_load_long(&job[current].working[mypos][CACHE_LINE_SIZE * bufferside]);
|
||||||
|
} while (jw == 0);
|
||||||
|
MB;
|
||||||
|
|
||||||
|
//while(job[current].working[mypos][CACHE_LINE_SIZE * bufferside] == 0) {YIELDING;};
|
||||||
|
|
||||||
KERNEL_OPERATION(min_i, MIN(range_n[current + 1] - xxx, div_n), k, alpha,
|
KERNEL_OPERATION(min_i, MIN(range_n[current + 1] - xxx, div_n), k, alpha,
|
||||||
sa, (FLOAT *)job[current].working[mypos][CACHE_LINE_SIZE * bufferside],
|
sa, (FLOAT *)job[current].working[mypos][CACHE_LINE_SIZE * bufferside],
|
||||||
c, lda, m_from, xxx);
|
c, lda, m_from, xxx);
|
||||||
|
|
||||||
if (m_from + min_i >= m_to) {
|
if (m_from + min_i >= m_to) {
|
||||||
job[current].working[mypos][CACHE_LINE_SIZE * bufferside] &= 0;
|
atomic_store_long(&job[current].working[mypos][CACHE_LINE_SIZE * bufferside], job[current].working[mypos][CACHE_LINE_SIZE * bufferside] &= 0);
|
||||||
|
// job[current].working[mypos][CACHE_LINE_SIZE * bufferside] &= 0;
|
||||||
WMB;
|
WMB;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -323,7 +342,8 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
|
||||||
c, lda, is, xxx);
|
c, lda, is, xxx);
|
||||||
|
|
||||||
if (is + min_i >= m_to) {
|
if (is + min_i >= m_to) {
|
||||||
job[current].working[mypos][CACHE_LINE_SIZE * bufferside] &= 0;
|
atomic_store_long(&job[current].working[mypos][CACHE_LINE_SIZE * bufferside], job[current].working[mypos][CACHE_LINE_SIZE * bufferside] &= 0);
|
||||||
|
// job[current].working[mypos][CACHE_LINE_SIZE * bufferside] &= 0;
|
||||||
WMB;
|
WMB;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -337,9 +357,18 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
|
||||||
|
|
||||||
for (i = 0; i < args -> nthreads; i++) {
|
for (i = 0; i < args -> nthreads; i++) {
|
||||||
if (i != mypos) {
|
if (i != mypos) {
|
||||||
for (xxx = 0; xxx < DIVIDE_RATE; xxx++) {
|
for (xxx = 0; xxx < DIVIDE_RATE; xxx++)
|
||||||
while (job[mypos].working[i][CACHE_LINE_SIZE * xxx] ) {YIELDING;};
|
#if 1
|
||||||
|
{
|
||||||
|
do {
|
||||||
|
jw = atomic_load_long(&job[mypos].working[i][CACHE_LINE_SIZE * xxx]);
|
||||||
|
} while (jw);
|
||||||
|
MB;
|
||||||
}
|
}
|
||||||
|
#else
|
||||||
|
while (job[mypos].working[i][CACHE_LINE_SIZE * xxx] ) {YIELDING;};
|
||||||
|
#endif
|
||||||
|
// }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue