Merge pull request #4662 from martin-frbg/hugetlb-doc

Fix and document the two HUGETLB options for buffer allocation in Makefile.rule
This commit is contained in:
Martin Kroeker 2024-05-07 13:32:07 +02:00 committed by GitHub
commit 8735b54fa8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 64 additions and 20 deletions

View File

@ -219,6 +219,16 @@ NO_AFFINITY = 1
# to the user space. If bigphysarea is enabled, it will use it. # to the user space. If bigphysarea is enabled, it will use it.
# DEVICEDRIVER_ALLOCATION = 1 # DEVICEDRIVER_ALLOCATION = 1
# Use large page allocation (called hugepage support in Linux context)
# for the thread buffers (with access by shared memory operations)
# HUGETLB_ALLOCATION = 1
# Use large page allocation called hugepages in Linux) based on mmap accessing
# a memory-backed pseudofile (requires hugetlbfs to be mounted in the system,
# the example below has it mounted on /hugepages. OpenBLAS will create the backing
# file as gotoblas.processid in that path)
# HUGETLBFILE_ALLOCATION = /hugepages
# If you need to synchronize FP CSR between threads (for x86/x86_64 and aarch64 only). # If you need to synchronize FP CSR between threads (for x86/x86_64 and aarch64 only).
# CONSISTENT_FPCSR = 1 # CONSISTENT_FPCSR = 1

View File

@ -962,18 +962,12 @@ endif
ifeq ($(ARCH), loongarch64) ifeq ($(ARCH), loongarch64)
LA64_ABI=$(shell $(CC) -mabi=lp64d -c $(TOPDIR)/cpuid_loongarch64.c -o /dev/null > /dev/null 2> /dev/null && echo lp64d) LA64_ABI=$(shell $(CC) -mabi=lp64d -c $(TOPDIR)/cpuid_loongarch64.c -o /dev/null > /dev/null 2> /dev/null && echo lp64d)
LA64_ARCH=$(shell $(CC) -march=loongarch64 -c $(TOPDIR)/cpuid_loongarch64.c -o /dev/null > /dev/null 2> /dev/null && echo loongarch64)
ifneq ($(LA64_ABI), lp64d) ifneq ($(LA64_ABI), lp64d)
LA64_ABI=lp64 LA64_ABI=lp64
endif endif
ifneq ($(LA64_ARCH), loongarch64)
CCOMMON_OPT += -mabi=$(LA64_ABI)
FCOMMON_OPT += -mabi=$(LA64_ABI)
else
CCOMMON_OPT += -march=loongarch64 -mabi=$(LA64_ABI) CCOMMON_OPT += -march=loongarch64 -mabi=$(LA64_ABI)
FCOMMON_OPT += -march=loongarch64 -mabi=$(LA64_ABI) FCOMMON_OPT += -march=loongarch64 -mabi=$(LA64_ABI)
endif endif
endif
endif endif
@ -1592,13 +1586,23 @@ ifdef FUNCTION_PROFILE
CCOMMON_OPT += -DFUNCTION_PROFILE CCOMMON_OPT += -DFUNCTION_PROFILE
endif endif
ifdef SHMEM_ALLOCATION
ifneq ($(SHMEM_ALLOCATION), 0)
CCOMMON_OPT += -DALLOC_SHM
endif
endif
ifdef HUGETLB_ALLOCATION ifdef HUGETLB_ALLOCATION
ifneq ($(HUGETLB_ALLOCATION), 0)
CCOMMON_OPT += -DALLOC_HUGETLB CCOMMON_OPT += -DALLOC_HUGETLB
endif endif
endif
ifdef HUGETLBFILE_ALLOCATION ifdef HUGETLBFILE_ALLOCATION
ifneq ($(HUGETLBFILE_ALLOCATION), 0)
CCOMMON_OPT += -DALLOC_HUGETLBFILE -DHUGETLB_FILE_NAME=$(HUGETLBFILE_ALLOCATION) CCOMMON_OPT += -DALLOC_HUGETLBFILE -DHUGETLB_FILE_NAME=$(HUGETLBFILE_ALLOCATION)
endif endif
endif
ifdef STATIC_ALLOCATION ifdef STATIC_ALLOCATION
CCOMMON_OPT += -DALLOC_STATIC CCOMMON_OPT += -DALLOC_STATIC

View File

@ -1165,11 +1165,10 @@ void *blas_memory_alloc(int procpos){
#ifdef ALLOC_DEVICEDRIVER #ifdef ALLOC_DEVICEDRIVER
alloc_devicedirver, alloc_devicedirver,
#endif #endif
/* Hugetlb implicitly assumes ALLOC_SHM */ #ifdef ALLOC_SHM && !defined(ALLOC_HUGETLB)
#ifdef ALLOC_SHM
alloc_shm, alloc_shm,
#endif #endif
#if ((defined ALLOC_SHM) && (defined OS_LINUX || defined OS_AIX || defined __sun__ || defined OS_WINDOWS)) #if ((defined ALLOC_HUGETLB) && (defined OS_LINUX || defined OS_AIX || defined __sun__ || defined OS_WINDOWS))
alloc_hugetlb, alloc_hugetlb,
#endif #endif
#ifdef ALLOC_MMAP #ifdef ALLOC_MMAP
@ -1190,7 +1189,6 @@ void *blas_memory_alloc(int procpos){
struct alloc_t * alloc_info; struct alloc_t * alloc_info;
struct alloc_t ** alloc_table; struct alloc_t ** alloc_table;
#if defined(SMP) && !defined(USE_OPENMP) #if defined(SMP) && !defined(USE_OPENMP)
int mi; int mi;
LOCK_COMMAND(&alloc_lock); LOCK_COMMAND(&alloc_lock);
@ -1282,7 +1280,7 @@ UNLOCK_COMMAND(&alloc_lock);
} }
#endif #endif
#if (defined ALLOC_SHM) && (defined OS_LINUX || defined OS_AIX || defined __sun__ || defined OS_WINDOWS) #if (defined ALLOC_HUGETLB) && (defined OS_LINUX || defined OS_AIX || defined __sun__ || defined OS_WINDOWS)
if ((*func == alloc_hugetlb) && (map_address != (void *)-1)) hugetlb_allocated = 1; if ((*func == alloc_hugetlb) && (map_address != (void *)-1)) hugetlb_allocated = 1;
#endif #endif
@ -2494,7 +2492,7 @@ static void *alloc_devicedirver(void *address){
#endif #endif
#ifdef ALLOC_SHM #if defined(ALLOC_SHM) && !defined(ALLOC_HUGETLB)
static void alloc_shm_free(struct release_t *release){ static void alloc_shm_free(struct release_t *release){
@ -2506,7 +2504,9 @@ static void alloc_shm_free(struct release_t *release){
static void *alloc_shm(void *address){ static void *alloc_shm(void *address){
void *map_address; void *map_address;
int shmid; int shmid;
#ifdef DEBUG
fprintf(stderr,"alloc_shm got called\n");
#endif
shmid = shmget(IPC_PRIVATE, BUFFER_SIZE,IPC_CREAT | 0600); shmid = shmget(IPC_PRIVATE, BUFFER_SIZE,IPC_CREAT | 0600);
map_address = (void *)shmat(shmid, address, 0); map_address = (void *)shmat(shmid, address, 0);
@ -2533,6 +2533,7 @@ static void *alloc_shm(void *address){
return map_address; return map_address;
} }
#endif
#if defined OS_LINUX || defined OS_AIX || defined __sun__ || defined OS_WINDOWS #if defined OS_LINUX || defined OS_AIX || defined __sun__ || defined OS_WINDOWS
@ -2562,6 +2563,10 @@ static void *alloc_hugetlb(void *address){
void *map_address = (void *)-1; void *map_address = (void *)-1;
#ifdef DEBUG
fprintf(stderr,"alloc_hugetlb got called\n");
#endif
#if defined(OS_LINUX) || defined(OS_AIX) #if defined(OS_LINUX) || defined(OS_AIX)
int shmid; int shmid;
@ -2583,7 +2588,7 @@ static void *alloc_hugetlb(void *address){
if (map_address != (void *)-1){ if (map_address != (void *)-1){
shmctl(shmid, IPC_RMID, 0); shmctl(shmid, IPC_RMID, 0);
} }else printf("alloc_hugetlb failed\n");
} }
#endif #endif
@ -2645,7 +2650,6 @@ static void *alloc_hugetlb(void *address){
} }
#endif #endif
#endif
#ifdef ALLOC_HUGETLBFILE #ifdef ALLOC_HUGETLBFILE
@ -2762,11 +2766,10 @@ void *blas_memory_alloc(int procpos){
#ifdef ALLOC_DEVICEDRIVER #ifdef ALLOC_DEVICEDRIVER
alloc_devicedirver, alloc_devicedirver,
#endif #endif
/* Hugetlb implicitly assumes ALLOC_SHM */ #ifdef ALLOC_SHM && !defined(ALLOC_HUGETLB)
#ifdef ALLOC_SHM
alloc_shm, alloc_shm,
#endif #endif
#if ((defined ALLOC_SHM) && (defined OS_LINUX || defined OS_AIX || defined __sun__ || defined OS_WINDOWS)) #if ((defined ALLOC_HUGETLB) && (defined OS_LINUX || defined OS_AIX || defined __sun__ || defined OS_WINDOWS))
alloc_hugetlb, alloc_hugetlb,
#endif #endif
#ifdef ALLOC_MMAP #ifdef ALLOC_MMAP
@ -2945,8 +2948,22 @@ void *blas_memory_alloc(int procpos){
} }
#endif #endif
#if (defined ALLOC_SHM) && (defined OS_LINUX || defined OS_AIX || defined __sun__ || defined OS_WINDOWS) #if (defined ALLOC_HUGETLB) && (defined OS_LINUX || defined OS_AIX || defined __sun__ || defined OS_WINDOWS)
if ((*func == alloc_hugetlb) && (map_address != (void *)-1)) hugetlb_allocated = 1; if ((*func == alloc_hugetlb) && (map_address != (void *)-1)) hugetlb_allocated = 1;
#ifdef DEBUG
if (hugetlb_allocated) printf("allocating via shared memory with large page support (hugetlb)\n");
#endif
#endif
#if (defined ALLOC_SHM) && (defined OS_LINUX || defined OS_AIX || defined __sun__ || defined OS_WINDOWS)
#ifdef DEBUG
printf("allocating via shared memory\n");
#endif
if ((*func == alloc_shm) && (map_address == (void *)-1)) {
#ifndef OS_WINDOWS
fprintf(stderr, "OpenBLAS Warning ... shared memory allocation was failed.\n");
#endif
}
#endif #endif
func ++; func ++;
@ -3061,10 +3078,23 @@ allocation2:
} }
#endif #endif
#if (defined ALLOC_SHM) && (defined OS_LINUX || defined OS_AIX || defined __sun__ || defined OS_WINDOWS) #if (defined ALLOC_HUGETLB) && (defined OS_LINUX || defined OS_AIX || defined __sun__ || defined OS_WINDOWS)
#ifdef DEBUG
fprintf(stderr,"OpenBLAS: allocating via shared memory with large page support (hugetlb)\n");
#endif
if ((*func == alloc_hugetlb) && (map_address != (void *)-1)) hugetlb_allocated = 1; if ((*func == alloc_hugetlb) && (map_address != (void *)-1)) hugetlb_allocated = 1;
#endif #endif
#if (defined ALLOC_SHM) && (defined OS_LINUX || defined OS_AIX || defined __sun__ || defined OS_WINDOWS)
#ifdef DEBUG
fprintf(stderr,"allocating via shared memory\n");
#endif
if ((*func == alloc_shm) && (map_address == (void *)-1)) {
#ifndef OS_WINDOWS
fprintf(stderr, "OpenBLAS Warning ... shared memory allocation was failed.\n");
#endif
}
#endif
func ++; func ++;
} }