From 82b75f97e509aaef4bc5d95c4b54b29b77c50ede Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sun, 17 Nov 2019 19:22:04 +0100 Subject: [PATCH] Disable the old QCDOC qalloc by default and copy utility functions from memory.c 1. qalloc() appears to have been a special routine written for the PPC440-based QCDOC supercomputer(s) from around 2005, its source does not seem to be readily available. So switch the #if 1 in the code to rely on standard malloc() by default. 2. Utility functions like get_num_procs, get_num_threads that were added to the "normally" used memory.c in the meantime were still missing here. --- driver/others/memory_qalloc.c | 325 ++++++++++++++++++++++++++++++++-- 1 file changed, 313 insertions(+), 12 deletions(-) diff --git a/driver/others/memory_qalloc.c b/driver/others/memory_qalloc.c index 17b7f5d60..6174d9b75 100644 --- a/driver/others/memory_qalloc.c +++ b/driver/others/memory_qalloc.c @@ -38,21 +38,29 @@ #include #include "common.h" - -#ifndef SMP -#define blas_cpu_number 1 -#else - -int blas_cpu_number = 1; - -int blas_get_cpu_number(void){ - - return blas_cpu_number; -} +#ifdef OS_LINUX +#include +#include +#include +#include +#include +#include +#include #endif +#ifdef OS_HAIKU +#include +#endif + +#if defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) +#include +#include +#endif + + #define FIXED_PAGESIZE 4096 + void *sa = NULL; void *sb = NULL; static double static_buffer[BUFFER_SIZE/sizeof(double)]; @@ -60,7 +68,7 @@ static double static_buffer[BUFFER_SIZE/sizeof(double)]; void *blas_memory_alloc(int numproc){ if (sa == NULL){ -#if 1 +#if 0 sa = (void *)qalloc(QFAST, BUFFER_SIZE); #else sa = (void *)malloc(BUFFER_SIZE); @@ -75,3 +83,296 @@ void blas_memory_free(void *free_area){ return; } + + +extern void openblas_warning(int verbose, const char * msg); + +#ifndef SMP + +#define blas_cpu_number 1 +#define blas_num_threads 1 + +/* Dummy Function */ +int goto_get_num_procs (void) { return 1;}; +void goto_set_num_threads(int num_threads) {}; + +#else + +#if defined(OS_LINUX) || defined(OS_SUNOS) +#ifndef NO_AFFINITY +int get_num_procs(void); +#else +int get_num_procs(void) { + + static int nums = 0; + cpu_set_t cpuset,*cpusetp; + size_t size; + int ret; + +#if defined(__GLIBC_PREREQ) +#if !__GLIBC_PREREQ(2, 7) + int i; +#if !__GLIBC_PREREQ(2, 6) + int n; +#endif +#endif +#endif + + if (!nums) nums = sysconf(_SC_NPROCESSORS_CONF); +#if !defined(OS_LINUX) + return nums; +#endif + +/* +#if !defined(__GLIBC_PREREQ) + return nums; +#else + #if !__GLIBC_PREREQ(2, 3) + return nums; + #endif + + #if !__GLIBC_PREREQ(2, 7) + ret = sched_getaffinity(0,sizeof(cpuset), &cpuset); + if (ret!=0) return nums; + n=0; + #if !__GLIBC_PREREQ(2, 6) + for (i=0;i= CPU_SETSIZE) { + cpusetp = CPU_ALLOC(nums); + if (cpusetp == NULL) { + return nums; + } + size = CPU_ALLOC_SIZE(nums); + ret = sched_getaffinity(0,size,cpusetp); + if (ret!=0) { + CPU_FREE(cpusetp); + return nums; + } + ret = CPU_COUNT_S(size,cpusetp); + if (ret > 0 && ret < nums) nums = ret; + CPU_FREE(cpusetp); + return nums; + } else { + ret = sched_getaffinity(0,sizeof(cpuset),&cpuset); + if (ret!=0) { + return nums; + } + ret = CPU_COUNT(&cpuset); + if (ret > 0 && ret < nums) nums = ret; + return nums; + } + #endif +#endif +*/ + return 1; +} +#endif +#endif + +#ifdef OS_ANDROID +int get_num_procs(void) { + static int nums = 0; + if (!nums) nums = sysconf(_SC_NPROCESSORS_CONF); + return nums; +} +#endif + +#ifdef OS_HAIKU +int get_num_procs(void) { + static int nums = 0; + if (!nums) nums = sysconf(_SC_NPROCESSORS_CONF); + return nums; +} +#endif + +#ifdef OS_AIX +int get_num_procs(void) { + static int nums = 0; + if (!nums) nums = sysconf(_SC_NPROCESSORS_CONF); + return nums; +} +#endif + +#ifdef OS_WINDOWS + +int get_num_procs(void) { + + static int nums = 0; + + if (nums == 0) { + + SYSTEM_INFO sysinfo; + + GetSystemInfo(&sysinfo); + + nums = sysinfo.dwNumberOfProcessors; + } + + return nums; +} + +#endif + +#if defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_DRAGONFLY) + +int get_num_procs(void) { + + static int nums = 0; + + int m[2]; + size_t len; + + if (nums == 0) { + m[0] = CTL_HW; + m[1] = HW_NCPU; + len = sizeof(int); + sysctl(m, 2, &nums, &len, NULL, 0); + } + + return nums; +} + +#endif + +#if defined(OS_DARWIN) +int get_num_procs(void) { + static int nums = 0; + size_t len; + if (nums == 0){ + len = sizeof(int); + sysctlbyname("hw.physicalcpu", &nums, &len, NULL, 0); + } + return nums; +} +/* +void set_stack_limit(int limitMB){ + int result=0; + struct rlimit rl; + rlim_t StackSize; + + StackSize=limitMB*1024*1024; + result=getrlimit(RLIMIT_STACK, &rl); + if(result==0){ + if(rl.rlim_cur < StackSize){ + rl.rlim_cur=StackSize; + result=setrlimit(RLIMIT_STACK, &rl); + if(result !=0){ + fprintf(stderr, "OpenBLAS: set stack limit error =%d\n", result); + } + } + } +} +*/ +#endif + + +/* +OpenBLAS uses the numbers of CPU cores in multithreading. +It can be set by openblas_set_num_threads(int num_threads); +*/ +int blas_cpu_number = 0; +/* +The numbers of threads in the thread pool. +This value is equal or large than blas_cpu_number. This means some threads are sleep. +*/ +int blas_num_threads = 0; + +int goto_get_num_procs (void) { + return blas_cpu_number; +} + +void openblas_fork_handler() +{ + // This handler shuts down the OpenBLAS-managed PTHREAD pool when OpenBLAS is + // built with "make USE_OPENMP=0". + // Hanging can still happen when OpenBLAS is built against the libgomp + // implementation of OpenMP. The problem is tracked at: + // http://gcc.gnu.org/bugzilla/show_bug.cgi?id=60035 + // In the mean time build with USE_OPENMP=0 or link against another + // implementation of OpenMP. +#if !((defined(OS_WINDOWS) && !defined(OS_CYGWIN_NT)) || defined(OS_ANDROID)) && defined(SMP_SERVER) + int err; + err = pthread_atfork ((void (*)(void)) BLASFUNC(blas_thread_shutdown), NULL, NULL); + if(err != 0) + openblas_warning(0, "OpenBLAS Warning ... cannot install fork handler. You may meet hang after fork.\n"); +#endif +} + +extern int openblas_num_threads_env(); +extern int openblas_goto_num_threads_env(); +extern int openblas_omp_num_threads_env(); + +int blas_get_cpu_number(void){ +#if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) || defined(OS_ANDROID) + int max_num; +#endif + int blas_goto_num = 0; + int blas_omp_num = 0; + + if (blas_num_threads) return blas_num_threads; + +#if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) || defined(OS_ANDROID) + max_num = get_num_procs(); +#endif + + // blas_goto_num = 0; +#ifndef USE_OPENMP + blas_goto_num=openblas_num_threads_env(); + if (blas_goto_num < 0) blas_goto_num = 0; + + if (blas_goto_num == 0) { + blas_goto_num=openblas_goto_num_threads_env(); + if (blas_goto_num < 0) blas_goto_num = 0; + } + +#endif + + // blas_omp_num = 0; + blas_omp_num=openblas_omp_num_threads_env(); + if (blas_omp_num < 0) blas_omp_num = 0; + + if (blas_goto_num > 0) blas_num_threads = blas_goto_num; + else if (blas_omp_num > 0) blas_num_threads = blas_omp_num; + else blas_num_threads = MAX_CPU_NUMBER; + +#if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) || defined(OS_ANDROID) + if (blas_num_threads > max_num) blas_num_threads = max_num; +#endif + + if (blas_num_threads > MAX_CPU_NUMBER) blas_num_threads = MAX_CPU_NUMBER; + +#ifdef DEBUG + printf( "Adjusted number of threads : %3d\n", blas_num_threads); +#endif + + blas_cpu_number = blas_num_threads; + + return blas_num_threads; +} +#endif + + +int openblas_get_num_procs(void) { +#ifndef SMP + return 1; +#else + return get_num_procs(); +#endif +} + +int openblas_get_num_threads(void) { +#ifndef SMP + return 1; +#else + // init blas_cpu_number if needed + blas_get_cpu_number(); + return blas_cpu_number; +#endif +}