Merge pull request #1618 from oon3m0oo/less_locking

Remove the need for most locking in memory.c.
This commit is contained in:
Martin Kroeker 2018-06-15 00:10:29 +02:00 committed by GitHub
commit 12603b7dbb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 43 additions and 156 deletions

View File

@ -139,6 +139,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define FIXED_PAGESIZE 4096
#endif
#ifndef BUFFERS_PER_THREAD
#ifdef USE_OPENMP
#define BUFFERS_PER_THREAD (MAX_CPU_NUMBER * 2 * MAX_PARALLEL_NUMBER)
#else
#define BUFFERS_PER_THREAD NUM_BUFFERS
#endif
#endif
#define BITMASK(a, b, c) ((((a) >> (b)) & (c)))
#if defined(_MSC_VER) && !defined(__clang__)
@ -415,8 +423,15 @@ struct release_t {
int hugetlb_allocated = 0;
static struct release_t release_info[NUM_BUFFERS];
static int release_pos = 0;
#if defined(OS_WINDOWS)
#define THREAD_LOCAL __declspec(thread)
#define UNLIKELY_TO_BE_ZERO(x) (x)
#else
#define THREAD_LOCAL __thread
#define UNLIKELY_TO_BE_ZERO(x) (__builtin_expect(x, 0))
#endif
static struct release_t THREAD_LOCAL release_info[BUFFERS_PER_THREAD];
static int THREAD_LOCAL release_pos = 0;
#if defined(OS_LINUX) && !defined(NO_WARMUP)
static int hot_alloc = 0;
@ -459,15 +474,9 @@ static void *alloc_mmap(void *address){
}
if (map_address != (void *)-1) {
#if defined(SMP) && !defined(USE_OPENMP)
LOCK_COMMAND(&alloc_lock);
#endif
release_info[release_pos].address = map_address;
release_info[release_pos].func = alloc_mmap_free;
release_pos ++;
#if defined(SMP) && !defined(USE_OPENMP)
UNLOCK_COMMAND(&alloc_lock);
#endif
}
#ifdef OS_LINUX
@ -611,15 +620,9 @@ static void *alloc_mmap(void *address){
#endif
if (map_address != (void *)-1) {
#if defined(SMP) && !defined(USE_OPENMP)
LOCK_COMMAND(&alloc_lock);
#endif
release_info[release_pos].address = map_address;
release_info[release_pos].func = alloc_mmap_free;
release_pos ++;
#if defined(SMP) && !defined(USE_OPENMP)
UNLOCK_COMMAND(&alloc_lock);
#endif
}
return map_address;
@ -961,20 +964,17 @@ static BLASULONG base_address = 0UL;
static BLASULONG base_address = BASE_ADDRESS;
#endif
static volatile struct {
BLASULONG lock;
struct memory_t {
void *addr;
#if defined(WHEREAMI) && !defined(USE_OPENMP)
int pos;
#endif
int used;
#ifndef __64BIT__
char dummy[48];
#else
char dummy[40];
#endif
};
} memory[NUM_BUFFERS];
static struct memory_t THREAD_LOCAL memory[BUFFERS_PER_THREAD];
static int memory_initialized = 0;
@ -987,9 +987,6 @@ static int memory_initialized = 0;
void *blas_memory_alloc(int procpos){
int position;
#if defined(WHEREAMI) && !defined(USE_OPENMP)
int mypos;
#endif
void *map_address;
@ -1020,23 +1017,13 @@ void *blas_memory_alloc(int procpos){
};
void *(**func)(void *address);
#if defined(USE_OPENMP)
if (!memory_initialized) {
#endif
if (UNLIKELY_TO_BE_ZERO(memory_initialized)) {
/* Only allow a single thread to initialize memory system */
LOCK_COMMAND(&alloc_lock);
if (!memory_initialized) {
#if defined(WHEREAMI) && !defined(USE_OPENMP)
for (position = 0; position < NUM_BUFFERS; position ++){
memory[position].addr = (void *)0;
memory[position].pos = -1;
memory[position].used = 0;
memory[position].lock = 0;
}
#endif
#ifdef DYNAMIC_ARCH
gotoblas_dynamic_init();
#endif
@ -1059,63 +1046,19 @@ void *blas_memory_alloc(int procpos){
}
UNLOCK_COMMAND(&alloc_lock);
#if defined(USE_OPENMP)
}
#endif
#ifdef DEBUG
printf("Alloc Start ...\n");
#endif
#if defined(WHEREAMI) && !defined(USE_OPENMP)
mypos = WhereAmI();
position = mypos;
while (position >= NUM_BUFFERS) position >>= 1;
do {
if (!memory[position].used && (memory[position].pos == mypos)) {
#if defined(SMP) && !defined(USE_OPENMP)
LOCK_COMMAND(&alloc_lock);
#else
blas_lock(&memory[position].lock);
#endif
if (!memory[position].used) goto allocation;
#if defined(SMP) && !defined(USE_OPENMP)
UNLOCK_COMMAND(&alloc_lock);
#else
blas_unlock(&memory[position].lock);
#endif
}
position ++;
} while (position < NUM_BUFFERS);
#endif
position = 0;
do {
#if defined(SMP) && !defined(USE_OPENMP)
LOCK_COMMAND(&alloc_lock);
#else
if (!memory[position].used) {
blas_lock(&memory[position].lock);
#endif
if (!memory[position].used) goto allocation;
#if defined(SMP) && !defined(USE_OPENMP)
UNLOCK_COMMAND(&alloc_lock);
#else
blas_unlock(&memory[position].lock);
}
#endif
position ++;
} while (position < NUM_BUFFERS);
} while (position < BUFFERS_PER_THREAD);
goto error;
@ -1126,11 +1069,6 @@ void *blas_memory_alloc(int procpos){
#endif
memory[position].used = 1;
#if defined(SMP) && !defined(USE_OPENMP)
UNLOCK_COMMAND(&alloc_lock);
#else
blas_unlock(&memory[position].lock);
#endif
if (!memory[position].addr) {
do {
@ -1148,14 +1086,14 @@ void *blas_memory_alloc(int procpos){
#ifdef ALLOC_DEVICEDRIVER
if ((*func == alloc_devicedirver) && (map_address == (void *)-1)) {
fprintf(stderr, "OpenBLAS Warning ... Physically contigous allocation was failed.\n");
fprintf(stderr, "OpenBLAS Warning ... Physically contiguous allocation failed.\n");
}
#endif
#ifdef ALLOC_HUGETLBFILE
if ((*func == alloc_hugetlbfile) && (map_address == (void *)-1)) {
#ifndef OS_WINDOWS
fprintf(stderr, "OpenBLAS Warning ... HugeTLB(File) allocation was failed.\n");
fprintf(stderr, "OpenBLAS Warning ... HugeTLB(File) allocation failed.\n");
#endif
}
#endif
@ -1176,44 +1114,13 @@ void *blas_memory_alloc(int procpos){
} while ((BLASLONG)map_address == -1);
#if defined(SMP) && !defined(USE_OPENMP)
LOCK_COMMAND(&alloc_lock);
#endif
memory[position].addr = map_address;
#if defined(SMP) && !defined(USE_OPENMP)
UNLOCK_COMMAND(&alloc_lock);
#endif
#ifdef DEBUG
printf(" Mapping Succeeded. %p(%d)\n", (void *)memory[position].addr, position);
#endif
}
#if defined(WHEREAMI) && !defined(USE_OPENMP)
if (memory[position].pos == -1) memory[position].pos = mypos;
#endif
#ifdef DYNAMIC_ARCH
if (memory_initialized == 1) {
LOCK_COMMAND(&alloc_lock);
if (memory_initialized == 1) {
if (!gotoblas) gotoblas_dynamic_init();
memory_initialized = 2;
}
UNLOCK_COMMAND(&alloc_lock);
}
#endif
#ifdef DEBUG
printf("Mapped : %p %3d\n\n",
(void *)memory[position].addr, position);
@ -1222,7 +1129,7 @@ void *blas_memory_alloc(int procpos){
return (void *)memory[position].addr;
error:
printf("BLAS : Program is Terminated. Because you tried to allocate too many memory regions.\n");
printf("OpenBLAS : Program will terminate because you tried to allocate too many memory regions.\n");
return NULL;
}
@ -1236,10 +1143,7 @@ void blas_memory_free(void *free_area){
#endif
position = 0;
#if defined(SMP) && !defined(USE_OPENMP)
LOCK_COMMAND(&alloc_lock);
#endif
while ((position < NUM_BUFFERS) && (memory[position].addr != free_area))
while ((position < BUFFERS_PER_THREAD) && (memory[position].addr != free_area))
position++;
if (memory[position].addr != free_area) goto error;
@ -1248,13 +1152,7 @@ void blas_memory_free(void *free_area){
printf(" Position : %d\n", position);
#endif
// arm: ensure all writes are finished before other thread takes this memory
WMB;
memory[position].used = 0;
#if defined(SMP) && !defined(USE_OPENMP)
UNLOCK_COMMAND(&alloc_lock);
#endif
#ifdef DEBUG
printf("Unmap Succeeded.\n\n");
@ -1266,11 +1164,8 @@ void blas_memory_free(void *free_area){
printf("BLAS : Bad memory unallocation! : %4d %p\n", position, free_area);
#ifdef DEBUG
for (position = 0; position < NUM_BUFFERS; position++)
for (position = 0; position < BUFFERS_PER_THREAD; position++)
printf("%4ld %p : %d\n", position, memory[position].addr, memory[position].used);
#endif
#if defined(SMP) && !defined(USE_OPENMP)
UNLOCK_COMMAND(&alloc_lock);
#endif
return;
}
@ -1293,8 +1188,6 @@ void blas_shutdown(void){
BLASFUNC(blas_thread_shutdown)();
#endif
LOCK_COMMAND(&alloc_lock);
for (pos = 0; pos < release_pos; pos ++) {
release_info[pos].func(&release_info[pos]);
}
@ -1305,17 +1198,11 @@ void blas_shutdown(void){
base_address = BASE_ADDRESS;
#endif
for (pos = 0; pos < NUM_BUFFERS; pos ++){
for (pos = 0; pos < BUFFERS_PER_THREAD; pos ++){
memory[pos].addr = (void *)0;
memory[pos].used = 0;
#if defined(WHEREAMI) && !defined(USE_OPENMP)
memory[pos].pos = -1;
#endif
memory[pos].lock = 0;
}
UNLOCK_COMMAND(&alloc_lock);
return;
}