From 29fc429d9a34a513e737df287c8a8785c9f7a332 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Tue, 11 Jul 2017 18:27:33 +0200 Subject: [PATCH 1/5] Honor cgroup/cpuset constraints when enumerating cpus --- driver/others/init.c | 59 ++++++++++++++++++++++++++++++++++++-- driver/others/memory.c | 65 +++++++++++++++++++++++++++--------------- 2 files changed, 98 insertions(+), 26 deletions(-) diff --git a/driver/others/init.c b/driver/others/init.c index 9be6f52b0..4093776db 100644 --- a/driver/others/init.c +++ b/driver/others/init.c @@ -354,6 +354,24 @@ static int numa_check(void) { return common -> num_nodes; } +#if defined(__GLIBC_PREREQ) +#if !__GLIBC_PREREQ(2, 6) +int sched_getcpu(void) +{ +int cpu; +FILE *fp = NULL; +if ( (fp = fopen("/proc/self/stat", "r")) == NULL) + return -1; +if ( fscanf( fp, "%*s%*s%*s%*s%*s%*s%*s%*s%*s%*s%*s%*s%*s%*s%*s%*s%*s%*s%*s%*s%*s%*s%*s%*s%*s%*s%*s%*s%*s%*s%*s%*s%*s%*s%*s%*s%*s%*s%d", &cpu) != 1) { + fclose (fp); + return -1; + } + fclose (fp); + return(cpu); +} +#endif +#endif + static void numa_mapping(void) { int node, cpu, core; @@ -808,16 +826,51 @@ void gotoblas_affinity_init(void) { common -> shmid = pshmid; if (common -> magic != SH_MAGIC) { - + cpu_set_t *cpusetp; + int nums; + int ret; #ifdef DEBUG fprintf(stderr, "Shared Memory Initialization.\n"); #endif //returns the number of processors which are currently online - common -> num_procs = sysconf(_SC_NPROCESSORS_CONF);; + nums = sysconf(_SC_NPROCESSORS_CONF); + +#if !defined(__GLIBC_PREREQ) || !__GLIBC_PREREQ(2, 3) + common->num_procs = nums; +#elif __GLIBC_PREREQ(2, 7) + cpusetp = CPU_ALLOC(nums); + if (cpusetp == NULL) { + common->num_procs = nums; + } else { + size_t size; + size = CPU_ALLOC_SIZE(nums); + ret = sched_getaffinity(0,size,cpusetp); + if (ret!=0) + common->num_procs = nums; + else + common->num_procs = CPU_COUNT_S(size,cpusetp); + } + CPU_FREE(cpusetp); +#else + ret = sched_getaffinity(0,sizeof(cpu_set_t), cpusetp); + if (ret!=0) { + common->num_procs = nums; + } else { +#if !__GLIBC_PREREQ(2, 6) + int i; + int n = 0; + for (i=0;inum_procs = n; +#else + common->num_procs = CPU_COUNT(sizeof(cpu_set_t),cpusetp); +#endif + +#endif if(common -> num_procs > MAX_CPUS) { - fprintf(stderr, "\nOpenBLAS Warining : The number of CPU/Cores(%d) is beyond the limit(%d). Terminated.\n", common->num_procs, MAX_CPUS); + fprintf(stderr, "\nOpenBLAS Warning : The number of CPU/Cores(%d) is beyond the limit(%d). Terminated.\n", common->num_procs, MAX_CPUS); exit(1); } diff --git a/driver/others/memory.c b/driver/others/memory.c index 916950315..103c97077 100644 --- a/driver/others/memory.c +++ b/driver/others/memory.c @@ -175,7 +175,44 @@ int get_num_procs(void); #else int get_num_procs(void) { static int nums = 0; +cpu_set_t *cpusetp; +size_t size; +int ret; +int i,n; + if (!nums) nums = sysconf(_SC_NPROCESSORS_CONF); +#if !defined(OS_LINUX) + return nums; +#endif + +#if !defined(__GLIBC_PREREQ) + return nums; +#endif +#if !__GLIBC_PREREQ(2, 3) + return nums; +#endif + +#if !__GLIBC_PREREQ(2, 7) + ret = sched_getaffinity(0,sizeof(cpu_set_t), cpusetp); + if (ret!=0) return nums; + n=0; +#if !__GLIBC_PREREQ(2, 6) + for (i=0;i= NUM_BUFFERS) position >>= 1; + while (position > NUM_BUFFERS) position >>= 1; do { if (!memory[position].used && (memory[position].pos == mypos)) { @@ -1164,8 +1201,8 @@ void blas_memory_free(void *free_area){ position = 0; LOCK_COMMAND(&alloc_lock); - while ((position < NUM_BUFFERS) && (memory[position].addr != free_area)) - position++; + while ((memory[position].addr != free_area) + && (position < NUM_BUFFERS)) position++; if (memory[position].addr != free_area) goto error; @@ -1479,30 +1516,12 @@ static int on_process_term(void) #else #pragma comment(linker, "/INCLUDE:__tls_used") #endif - -#ifdef _WIN64 -#pragma const_seg(".CRT$XLB") -#else +#pragma data_seg(push, old_seg) #pragma data_seg(".CRT$XLB") -#endif static void (APIENTRY *dll_callback)(HINSTANCE h, DWORD ul_reason_for_call, PVOID pv) = DllMain; -#ifdef _WIN64 -#pragma const_seg() -#else -#pragma data_seg() -#endif - -#ifdef _WIN64 -#pragma const_seg(".CRT$XTU") -#else #pragma data_seg(".CRT$XTU") -#endif static int(*p_process_term)(void) = on_process_term; -#ifdef _WIN64 -#pragma const_seg() -#else -#pragma data_seg() -#endif +#pragma data_seg(pop, old_seg) #endif #if (defined(C_PGI) || (!defined(C_SUN) && defined(F_INTERFACE_SUN))) && (defined(ARCH_X86) || defined(ARCH_X86_64)) From 731c518cffedbbedb17d55850dd5506672e1f6a4 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Tue, 11 Jul 2017 18:42:39 +0200 Subject: [PATCH 2/5] Add files via upload --- driver/others/memory.c | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/driver/others/memory.c b/driver/others/memory.c index 103c97077..38d063715 100644 --- a/driver/others/memory.c +++ b/driver/others/memory.c @@ -1052,7 +1052,7 @@ void *blas_memory_alloc(int procpos){ mypos = WhereAmI(); position = mypos; - while (position > NUM_BUFFERS) position >>= 1; + while (position >= NUM_BUFFERS) position >>= 1; do { if (!memory[position].used && (memory[position].pos == mypos)) { @@ -1201,8 +1201,8 @@ void blas_memory_free(void *free_area){ position = 0; LOCK_COMMAND(&alloc_lock); - while ((memory[position].addr != free_area) - && (position < NUM_BUFFERS)) position++; + while ((position < NUM_BUFFERS) && (memory[position].addr != free_area)) + position++; if (memory[position].addr != free_area) goto error; @@ -1516,12 +1516,30 @@ static int on_process_term(void) #else #pragma comment(linker, "/INCLUDE:__tls_used") #endif -#pragma data_seg(push, old_seg) + +#ifdef _WIN64 +#pragma const_seg(".CRT$XLB") +#else #pragma data_seg(".CRT$XLB") +#endif static void (APIENTRY *dll_callback)(HINSTANCE h, DWORD ul_reason_for_call, PVOID pv) = DllMain; +#ifdef _WIN64 +#pragma const_seg() +#else +#pragma data_seg() +#endif + +#ifdef _WIN64 +#pragma const_seg(".CRT$XTU") +#else #pragma data_seg(".CRT$XTU") +#endif static int(*p_process_term)(void) = on_process_term; -#pragma data_seg(pop, old_seg) +#ifdef _WIN64 +#pragma const_seg() +#else +#pragma data_seg() +#endif #endif #if (defined(C_PGI) || (!defined(C_SUN) && defined(F_INTERFACE_SUN))) && (defined(ARCH_X86) || defined(ARCH_X86_64)) From 88249ca5f793f9d18584e5388a88054651b9bb7b Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Tue, 11 Jul 2017 18:48:13 +0200 Subject: [PATCH 3/5] Add files via upload From d12b75a6c480020c8b3375d98559f5e3e6b53c3d Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sat, 15 Jul 2017 11:53:28 +0200 Subject: [PATCH 4/5] Fixup braces lost in previous edit --- driver/others/init.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/driver/others/init.c b/driver/others/init.c index 1f07a24ac..221404ee1 100644 --- a/driver/others/init.c +++ b/driver/others/init.c @@ -778,11 +778,8 @@ static int initialized = 0; void gotoblas_affinity_init(void) { int cpu, num_avail; -#ifndef USE_OPENMP cpu_set_t cpu_mask; -#endif - int i; - + if (initialized) return; initialized = 1; @@ -825,16 +822,18 @@ void gotoblas_affinity_init(void) { common -> shmid = pshmid; - if (common -> magic != SH_MAGIC) + if (common -> magic != SH_MAGIC) { cpu_set_t *cpusetp; int nums; int ret; + int i; #ifdef DEBUG fprintf(stderr, "Shared Memory Initialization.\n"); #endif //returns the number of processors which are currently online + nums = sysconf(_SC_NPROCESSORS_CONF); #if !defined(__GLIBC_PREREQ) || !__GLIBC_PREREQ(2, 3) @@ -855,7 +854,7 @@ void gotoblas_affinity_init(void) { CPU_FREE(cpusetp); #else ret = sched_getaffinity(0,sizeof(cpu_set_t), cpusetp); - if (ret!=0) { + if (ret!=0) { common->num_procs = nums; } else { #if !__GLIBC_PREREQ(2, 6) @@ -863,7 +862,8 @@ void gotoblas_affinity_init(void) { int n = 0; for (i=0;inum_procs = n; + common->num_procs = n; + } #else common->num_procs = CPU_COUNT(sizeof(cpu_set_t),cpusetp); #endif @@ -884,7 +884,7 @@ void gotoblas_affinity_init(void) { if (common -> num_nodes > 1) numa_mapping(); common -> final_num_procs = 0; - for(i = 0; i < common -> avail_count; i++) common -> final_num_procs += rcount(common -> avail[i]) + 1; //Make the max cpu number. + for(i = 0; i < common -> avail_count; i++) common -> final_num_procs += rcount(common -> avail[i]) + 1; //Make the max cpu number. for (cpu = 0; cpu < common -> final_num_procs; cpu ++) common -> cpu_use[cpu] = 0; From 80373ea03944574c5006812afc0ff2827716ba44 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sat, 15 Jul 2017 12:48:42 +0200 Subject: [PATCH 5/5] More fixes for silly misedits --- driver/others/init.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/driver/others/init.c b/driver/others/init.c index 221404ee1..4c75d72e4 100644 --- a/driver/others/init.c +++ b/driver/others/init.c @@ -778,8 +778,11 @@ static int initialized = 0; void gotoblas_affinity_init(void) { int cpu, num_avail; +#ifndef USE_OPENMP cpu_set_t cpu_mask; - +#endif + int i; + if (initialized) return; initialized = 1; @@ -826,7 +829,6 @@ void gotoblas_affinity_init(void) { cpu_set_t *cpusetp; int nums; int ret; - int i; #ifdef DEBUG fprintf(stderr, "Shared Memory Initialization.\n");