380 lines
		
	
	
		
			9.7 KiB
		
	
	
	
		
			C
		
	
	
	
			
		
		
	
	
			380 lines
		
	
	
		
			9.7 KiB
		
	
	
	
		
			C
		
	
	
	
| /*********************************************************************/
 | |
| /* Copyright 2009, 2010 The University of Texas at Austin.           */
 | |
| /* All rights reserved.                                              */
 | |
| /*                                                                   */
 | |
| /* Redistribution and use in source and binary forms, with or        */
 | |
| /* without modification, are permitted provided that the following   */
 | |
| /* conditions are met:                                               */
 | |
| /*                                                                   */
 | |
| /*   1. Redistributions of source code must retain the above         */
 | |
| /*      copyright notice, this list of conditions and the following  */
 | |
| /*      disclaimer.                                                  */
 | |
| /*                                                                   */
 | |
| /*   2. Redistributions in binary form must reproduce the above      */
 | |
| /*      copyright notice, this list of conditions and the following  */
 | |
| /*      disclaimer in the documentation and/or other materials       */
 | |
| /*      provided with the distribution.                              */
 | |
| /*                                                                   */
 | |
| /*    THIS  SOFTWARE IS PROVIDED  BY THE  UNIVERSITY OF  TEXAS AT    */
 | |
| /*    AUSTIN  ``AS IS''  AND ANY  EXPRESS OR  IMPLIED WARRANTIES,    */
 | |
| /*    INCLUDING, BUT  NOT LIMITED  TO, THE IMPLIED  WARRANTIES OF    */
 | |
| /*    MERCHANTABILITY  AND FITNESS FOR  A PARTICULAR  PURPOSE ARE    */
 | |
| /*    DISCLAIMED.  IN  NO EVENT SHALL THE UNIVERSITY  OF TEXAS AT    */
 | |
| /*    AUSTIN OR CONTRIBUTORS BE  LIABLE FOR ANY DIRECT, INDIRECT,    */
 | |
| /*    INCIDENTAL,  SPECIAL, EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES    */
 | |
| /*    (INCLUDING, BUT  NOT LIMITED TO,  PROCUREMENT OF SUBSTITUTE    */
 | |
| /*    GOODS  OR  SERVICES; LOSS  OF  USE,  DATA,  OR PROFITS;  OR    */
 | |
| /*    BUSINESS INTERRUPTION) HOWEVER CAUSED  AND ON ANY THEORY OF    */
 | |
| /*    LIABILITY, WHETHER  IN CONTRACT, STRICT  LIABILITY, OR TORT    */
 | |
| /*    (INCLUDING NEGLIGENCE OR OTHERWISE)  ARISING IN ANY WAY OUT    */
 | |
| /*    OF  THE  USE OF  THIS  SOFTWARE,  EVEN  IF ADVISED  OF  THE    */
 | |
| /*    POSSIBILITY OF SUCH DAMAGE.                                    */
 | |
| /*                                                                   */
 | |
| /* The views and conclusions contained in the software and           */
 | |
| /* documentation are those of the authors and should not be          */
 | |
| /* interpreted as representing official policies, either expressed   */
 | |
| /* or implied, of The University of Texas at Austin.                 */
 | |
| /*********************************************************************/
 | |
| 
 | |
| #include <stdio.h>
 | |
| #include "common.h"
 | |
| #ifdef OS_LINUX
 | |
| #include <sys/sysinfo.h>
 | |
| #include <sched.h>
 | |
| #include <errno.h>
 | |
| #include <linux/unistd.h>
 | |
| #include <sys/syscall.h>
 | |
| #include <sys/time.h>
 | |
| #include <sys/resource.h>
 | |
| #endif
 | |
| 
 | |
| #ifdef OS_HAIKU
 | |
| #include <unistd.h>
 | |
| #endif
 | |
| 
 | |
| #if defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN)
 | |
| #include <sys/sysctl.h>
 | |
| #include <sys/resource.h>
 | |
| #endif
 | |
| 
 | |
| 
 | |
| #define FIXED_PAGESIZE 4096
 | |
| 
 | |
| 
 | |
| void *sa = NULL;
 | |
| void *sb = NULL;
 | |
| static double static_buffer[BUFFER_SIZE/sizeof(double)];
 | |
| 
 | |
| void *blas_memory_alloc(int numproc){
 | |
| 
 | |
|   if (sa == NULL){
 | |
| #if 0
 | |
|     sa = (void *)qalloc(QFAST, BUFFER_SIZE);
 | |
| #else
 | |
|     sa = (void *)malloc(BUFFER_SIZE);
 | |
| #endif
 | |
|     sb = (void *)&static_buffer[0];
 | |
|   }
 | |
| 
 | |
|   return sa;
 | |
| }
 | |
| 
 | |
| void blas_memory_free(void *free_area){
 | |
|   return;
 | |
| }
 | |
| 
 | |
| 
 | |
| 
 | |
| extern void openblas_warning(int verbose, const char * msg);
 | |
| 
 | |
| #ifndef SMP
 | |
| 
 | |
| #define blas_cpu_number 1
 | |
| #define blas_num_threads 1
 | |
| 
 | |
| /* Dummy Function */
 | |
| int  goto_get_num_procs  (void) { return 1;};
 | |
| void goto_set_num_threads(int num_threads) {};
 | |
| 
 | |
| #else
 | |
| 
 | |
| #if defined(OS_LINUX) || defined(OS_SUNOS)
 | |
| #ifndef NO_AFFINITY
 | |
| int get_num_procs(void);
 | |
| #else
 | |
| int get_num_procs(void) {
 | |
| 
 | |
|   static int nums = 0;
 | |
|   cpu_set_t cpuset,*cpusetp;
 | |
|   size_t size;
 | |
|   int ret;
 | |
| 
 | |
| #if defined(__GLIBC_PREREQ)
 | |
| #if !__GLIBC_PREREQ(2, 7)
 | |
|   int i;
 | |
| #if !__GLIBC_PREREQ(2, 6)
 | |
|   int n;
 | |
| #endif
 | |
| #endif
 | |
| #endif
 | |
| 
 | |
|   if (!nums) nums = sysconf(_SC_NPROCESSORS_CONF);
 | |
| #if !defined(OS_LINUX)
 | |
|   return nums;
 | |
| #endif
 | |
| 
 | |
| /*
 | |
| #if !defined(__GLIBC_PREREQ)
 | |
|   return nums;
 | |
| #else
 | |
|  #if !__GLIBC_PREREQ(2, 3)
 | |
|   return nums;
 | |
|  #endif
 | |
| 
 | |
|  #if !__GLIBC_PREREQ(2, 7)
 | |
|   ret = sched_getaffinity(0,sizeof(cpuset), &cpuset);
 | |
|   if (ret!=0) return nums;
 | |
|   n=0;
 | |
|   #if !__GLIBC_PREREQ(2, 6)
 | |
|   for (i=0;i<nums;i++)
 | |
|      if (CPU_ISSET(i,&cpuset)) n++;
 | |
|   nums=n;
 | |
|   #else
 | |
|   nums = CPU_COUNT(sizeof(cpuset),&cpuset);
 | |
|   #endif
 | |
|   return nums;
 | |
|  #else
 | |
|   if (nums >= CPU_SETSIZE) {
 | |
|     cpusetp = CPU_ALLOC(nums);
 | |
|       if (cpusetp == NULL) {
 | |
|         return nums;
 | |
|       }
 | |
|     size = CPU_ALLOC_SIZE(nums);
 | |
|     ret = sched_getaffinity(0,size,cpusetp);
 | |
|     if (ret!=0) {
 | |
|       CPU_FREE(cpusetp);
 | |
|       return nums;
 | |
|     }
 | |
|     ret = CPU_COUNT_S(size,cpusetp);
 | |
|     if (ret > 0 && ret < nums) nums = ret;	
 | |
|     CPU_FREE(cpusetp);
 | |
|     return nums;
 | |
|   } else {
 | |
|     ret = sched_getaffinity(0,sizeof(cpuset),&cpuset);
 | |
|     if (ret!=0) {
 | |
|       return nums;
 | |
|     }
 | |
|     ret = CPU_COUNT(&cpuset);
 | |
|     if (ret > 0 && ret < nums) nums = ret;	
 | |
|     return nums;
 | |
|   }
 | |
|  #endif
 | |
| #endif
 | |
| */
 | |
|    return 1;
 | |
| }
 | |
| #endif
 | |
| #endif
 | |
| 
 | |
| #ifdef OS_ANDROID
 | |
| int get_num_procs(void) {
 | |
|   static int nums = 0;
 | |
|   if (!nums) nums = sysconf(_SC_NPROCESSORS_CONF);
 | |
|   return nums;
 | |
| }
 | |
| #endif
 | |
| 
 | |
| #ifdef OS_HAIKU
 | |
| int get_num_procs(void) {
 | |
|   static int nums = 0;
 | |
|   if (!nums) nums = sysconf(_SC_NPROCESSORS_CONF);
 | |
|   return nums;
 | |
| }
 | |
| #endif
 | |
| 
 | |
| #ifdef OS_AIX
 | |
| int get_num_procs(void) {
 | |
|   static int nums = 0;
 | |
|   if (!nums) nums = sysconf(_SC_NPROCESSORS_CONF);
 | |
|   return nums;
 | |
| }
 | |
| #endif
 | |
| 
 | |
| #ifdef OS_WINDOWS
 | |
| 
 | |
| int get_num_procs(void) {
 | |
| 
 | |
|   static int nums = 0;
 | |
| 
 | |
|   if (nums == 0) {
 | |
| 
 | |
|     SYSTEM_INFO sysinfo;
 | |
| 
 | |
|     GetSystemInfo(&sysinfo);
 | |
| 
 | |
|     nums = sysinfo.dwNumberOfProcessors;
 | |
|   }
 | |
| 
 | |
|   return nums;
 | |
| }
 | |
| 
 | |
| #endif
 | |
| 
 | |
| #if defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_DRAGONFLY)
 | |
| 
 | |
| int get_num_procs(void) {
 | |
| 
 | |
|   static int nums = 0;
 | |
| 
 | |
|   int m[2];
 | |
|   size_t len;
 | |
| 
 | |
|   if (nums == 0) {
 | |
|     m[0] = CTL_HW;
 | |
|     m[1] = HW_NCPU;
 | |
|     len = sizeof(int);
 | |
|     sysctl(m, 2, &nums, &len, NULL, 0);
 | |
|   }
 | |
| 
 | |
|   return nums;
 | |
| }
 | |
| 
 | |
| #endif
 | |
| 
 | |
| #if defined(OS_DARWIN)
 | |
| int get_num_procs(void) {
 | |
|   static int nums = 0;
 | |
|   size_t len;
 | |
|   if (nums == 0){
 | |
|     len = sizeof(int);
 | |
|     sysctlbyname("hw.physicalcpu", &nums, &len, NULL, 0);
 | |
|   }
 | |
|   return nums;
 | |
| }
 | |
| /*
 | |
| void set_stack_limit(int limitMB){
 | |
|   int result=0;
 | |
|   struct rlimit rl;
 | |
|   rlim_t StackSize;
 | |
| 
 | |
|   StackSize=limitMB*1024*1024;
 | |
|   result=getrlimit(RLIMIT_STACK, &rl);
 | |
|   if(result==0){
 | |
|     if(rl.rlim_cur < StackSize){
 | |
|       rl.rlim_cur=StackSize;
 | |
|       result=setrlimit(RLIMIT_STACK, &rl);
 | |
|       if(result !=0){
 | |
|         fprintf(stderr, "OpenBLAS: set stack limit error =%d\n", result);
 | |
|       }
 | |
|     }
 | |
|   }
 | |
| }
 | |
| */
 | |
| #endif
 | |
| 
 | |
| 
 | |
| /*
 | |
| OpenBLAS uses the numbers of CPU cores in multithreading.
 | |
| It can be set by openblas_set_num_threads(int num_threads);
 | |
| */
 | |
| int blas_cpu_number  = 0;
 | |
| /*
 | |
| The numbers of threads in the thread pool.
 | |
| This value is equal or large than blas_cpu_number. This means some threads are sleep.
 | |
| */
 | |
| int blas_num_threads = 0;
 | |
| int blas_num_threads_set = 0;
 | |
| 
 | |
| int  goto_get_num_procs  (void) {
 | |
|   return blas_cpu_number;
 | |
| }
 | |
| 
 | |
| void openblas_fork_handler()
 | |
| {
 | |
|   // This handler shuts down the OpenBLAS-managed PTHREAD pool when OpenBLAS is
 | |
|   // built with "make USE_OPENMP=0".
 | |
|   // Hanging can still happen when OpenBLAS is built against the libgomp
 | |
|   // implementation of OpenMP. The problem is tracked at:
 | |
|   //   http://gcc.gnu.org/bugzilla/show_bug.cgi?id=60035
 | |
|   // In the mean time build with USE_OPENMP=0 or link against another
 | |
|   // implementation of OpenMP.
 | |
| #if !((defined(OS_WINDOWS) && !defined(OS_CYGWIN_NT)) || defined(OS_ANDROID)) && defined(SMP_SERVER)
 | |
|   int err;
 | |
|   err = pthread_atfork ((void (*)(void)) BLASFUNC(blas_thread_shutdown), NULL, NULL);
 | |
|   if(err != 0)
 | |
|     openblas_warning(0, "OpenBLAS Warning ... cannot install fork handler. You may meet hang after fork.\n");
 | |
| #endif
 | |
| }
 | |
| 
 | |
| extern int openblas_num_threads_env();
 | |
| extern int openblas_goto_num_threads_env();
 | |
| extern int openblas_omp_num_threads_env();
 | |
| 
 | |
| int blas_get_cpu_number(void){
 | |
| #if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) || defined(OS_ANDROID)
 | |
|   int max_num;
 | |
| #endif
 | |
|   int blas_goto_num   = 0;
 | |
|   int blas_omp_num    = 0;
 | |
| 
 | |
|   if (blas_num_threads) return blas_num_threads;
 | |
| 
 | |
| #if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) || defined(OS_ANDROID)
 | |
|   max_num = get_num_procs();
 | |
| #endif
 | |
| 
 | |
|   // blas_goto_num = 0;
 | |
| #ifndef USE_OPENMP
 | |
|   blas_goto_num=openblas_num_threads_env();
 | |
|   if (blas_goto_num < 0) blas_goto_num = 0;
 | |
| 
 | |
|   if (blas_goto_num == 0) {
 | |
|     blas_goto_num=openblas_goto_num_threads_env();
 | |
|     if (blas_goto_num < 0) blas_goto_num = 0;
 | |
|   }
 | |
| 
 | |
| #endif
 | |
| 
 | |
|   // blas_omp_num = 0;
 | |
|   blas_omp_num=openblas_omp_num_threads_env();
 | |
|   if (blas_omp_num < 0) blas_omp_num = 0;
 | |
| 
 | |
|   if (blas_goto_num > 0) blas_num_threads = blas_goto_num;
 | |
|   else if (blas_omp_num > 0) blas_num_threads = blas_omp_num;
 | |
|   else blas_num_threads = MAX_CPU_NUMBER;
 | |
| 
 | |
| #if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) || defined(OS_ANDROID)
 | |
|   if (blas_num_threads > max_num) blas_num_threads = max_num;
 | |
| #endif
 | |
| 
 | |
|   if (blas_num_threads > MAX_CPU_NUMBER) blas_num_threads = MAX_CPU_NUMBER;
 | |
| 
 | |
| #ifdef DEBUG
 | |
|   printf( "Adjusted number of threads : %3d\n", blas_num_threads);
 | |
| #endif
 | |
| 
 | |
|   blas_cpu_number = blas_num_threads;
 | |
| 
 | |
|   return blas_num_threads;
 | |
| }
 | |
| #endif
 | |
| 
 | |
| 
 | |
| int openblas_get_num_procs(void) {
 | |
| #ifndef SMP
 | |
|   return 1;
 | |
| #else
 | |
|   return get_num_procs();
 | |
| #endif
 | |
| }
 | |
| 
 | |
| int openblas_get_num_threads(void) {
 | |
| #ifndef SMP
 | |
|   return 1;
 | |
| #else
 | |
|   // init blas_cpu_number if needed
 | |
|   blas_get_cpu_number();
 | |
|   return blas_cpu_number;
 | |
| #endif
 | |
| }
 |