Refs #112. Improved setting thread affinity in Linux. Remove the limit (64) about the number of CPU cores.
This commit is contained in:
		
							parent
							
								
									5199809bba
								
							
						
					
					
						commit
						a6adbb299d
					
				| 
						 | 
					@ -1,5 +1,5 @@
 | 
				
			||||||
/*****************************************************************************
 | 
					/*****************************************************************************
 | 
				
			||||||
Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS
 | 
					Copyright (c) 2011,2012 Lab of Parallel Software and Computational Science,ISCAS
 | 
				
			||||||
All rights reserved.
 | 
					All rights reserved.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Redistribution and use in source and binary forms, with or without
 | 
					Redistribution and use in source and binary forms, with or without
 | 
				
			||||||
| 
						 | 
					@ -85,6 +85,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define MAX_NODES	16
 | 
					#define MAX_NODES	16
 | 
				
			||||||
#define MAX_CPUS	256
 | 
					#define MAX_CPUS	256
 | 
				
			||||||
 | 
					#define NCPUBITS        (8*sizeof(unsigned long))
 | 
				
			||||||
 | 
					#define MAX_BITMASK_LEN (MAX_CPUS/NCPUBITS)
 | 
				
			||||||
 | 
					#define CPUELT(cpu)	((cpu) / NCPUBITS)
 | 
				
			||||||
 | 
					#define CPUMASK(cpu)	((unsigned long) 1UL << ((cpu) % NCPUBITS))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define SH_MAGIC	0x510510
 | 
					#define SH_MAGIC	0x510510
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -103,10 +108,10 @@ typedef struct {
 | 
				
			||||||
  int num_nodes;
 | 
					  int num_nodes;
 | 
				
			||||||
  int num_procs;
 | 
					  int num_procs;
 | 
				
			||||||
  int final_num_procs;
 | 
					  int final_num_procs;
 | 
				
			||||||
  unsigned long avail;
 | 
					  unsigned long avail [MAX_BITMASK_LEN];
 | 
				
			||||||
  
 | 
					  int avail_count;
 | 
				
			||||||
  unsigned long cpu_info   [MAX_CPUS];
 | 
					  unsigned long cpu_info   [MAX_CPUS];
 | 
				
			||||||
  unsigned long node_info  [MAX_NODES];
 | 
					  unsigned long node_info  [MAX_NODES][MAX_BITMASK_LEN];
 | 
				
			||||||
  int cpu_use[MAX_CPUS];
 | 
					  int cpu_use[MAX_CPUS];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
} shm_t;
 | 
					} shm_t;
 | 
				
			||||||
| 
						 | 
					@ -126,7 +131,8 @@ static shm_t *common = (void *)-1;
 | 
				
			||||||
static int shmid, pshmid;
 | 
					static int shmid, pshmid;
 | 
				
			||||||
static void *paddr;
 | 
					static void *paddr;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static unsigned long lprocmask, lnodemask;
 | 
					static unsigned long lprocmask[MAX_BITMASK_LEN], lnodemask;
 | 
				
			||||||
 | 
					static int lprocmask_count = 0;
 | 
				
			||||||
static int numprocs = 1;
 | 
					static int numprocs = 1;
 | 
				
			||||||
static int numnodes = 1;
 | 
					static int numnodes = 1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -177,70 +183,114 @@ static inline int rcount(unsigned long number) {
 | 
				
			||||||
  than sizeof(unsigned long). On 64 bits, the limit 
 | 
					  than sizeof(unsigned long). On 64 bits, the limit 
 | 
				
			||||||
  is 64. On 32 bits, it is 32.
 | 
					  is 64. On 32 bits, it is 32.
 | 
				
			||||||
***/
 | 
					***/
 | 
				
			||||||
static inline unsigned long get_cpumap(int node) {
 | 
					static inline void get_cpumap(int node, unsigned long * node_info) {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  int infile;
 | 
					  int infile;
 | 
				
			||||||
  unsigned long affinity;
 | 
					  unsigned long affinity[32];
 | 
				
			||||||
  char name[160];
 | 
					  char name[160];
 | 
				
			||||||
  char cpumap[160];
 | 
					  char cpumap[160];
 | 
				
			||||||
  char *p, *dummy;
 | 
					  char *dummy;
 | 
				
			||||||
  int i=0;
 | 
					  int i=0;
 | 
				
			||||||
 | 
					  int count=0;
 | 
				
			||||||
 | 
					  int k=0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  sprintf(name, CPUMAP_NAME, node);
 | 
					  sprintf(name, CPUMAP_NAME, node);
 | 
				
			||||||
  
 | 
					  
 | 
				
			||||||
  infile = open(name, O_RDONLY);
 | 
					  infile = open(name, O_RDONLY);
 | 
				
			||||||
 | 
					  for(i=0; i<32; i++){
 | 
				
			||||||
  affinity = 0;
 | 
					    affinity[i] = 0;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  if (infile != -1) {
 | 
					  if (infile != -1) {
 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
    read(infile, cpumap, sizeof(cpumap));
 | 
					    read(infile, cpumap, sizeof(cpumap));
 | 
				
			||||||
    p = cpumap;
 | 
					
 | 
				
			||||||
    while (*p != '\n' && i<160){
 | 
					    for(i=0; i<160; i++){
 | 
				
			||||||
      if(*p != ',') {
 | 
					      if(cpumap[i] == '\n')
 | 
				
			||||||
	name[i++]=*p;
 | 
						break;
 | 
				
			||||||
 | 
					      if(cpumap[i] != ','){
 | 
				
			||||||
 | 
						name[k++]=cpumap[i];
 | 
				
			||||||
 | 
						
 | 
				
			||||||
 | 
						//Enough data for Hex
 | 
				
			||||||
 | 
						if(k >= NCPUBITS/4){
 | 
				
			||||||
 | 
						  affinity[count++] = strtoul(name, &dummy, 16);
 | 
				
			||||||
 | 
						  k=0;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
      p++;
 | 
					 | 
				
			||||||
      }
 | 
					      }
 | 
				
			||||||
    p = name;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    //    while ((*p == '0') || (*p == ',')) p++;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    affinity = strtoul(p, &dummy, 16);
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    if(k!=0){
 | 
				
			||||||
 | 
					      name[k]='\0';
 | 
				
			||||||
 | 
					      affinity[count++] = strtoul(name, &dummy, 16);
 | 
				
			||||||
 | 
					      k=0;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    // 0-63bit -> node_info[0], 64-128bit -> node_info[1] ....
 | 
				
			||||||
 | 
					    // revert the sequence
 | 
				
			||||||
 | 
					    for(i=0; i<count && i<MAX_BITMASK_LEN; i++){
 | 
				
			||||||
 | 
					      node_info[i]=affinity[count-i-1];
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
    close(infile);
 | 
					    close(infile);
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  return affinity;
 | 
					  return ;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static inline unsigned long get_share(int cpu, int level) {
 | 
					static inline void get_share(int cpu, int level, unsigned long * share) {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  int infile;
 | 
					  int infile;
 | 
				
			||||||
  unsigned long affinity;
 | 
					  unsigned long affinity[32];
 | 
				
			||||||
 | 
					  char cpumap[160];
 | 
				
			||||||
  char name[160];
 | 
					  char name[160];
 | 
				
			||||||
  char *p;
 | 
					  char *dummy;
 | 
				
			||||||
 | 
					  int count=0;
 | 
				
			||||||
 | 
					  int i=0,k=0;
 | 
				
			||||||
 | 
					  int bitmask_idx = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  sprintf(name, SHARE_NAME, cpu, level);
 | 
					  sprintf(name, SHARE_NAME, cpu, level);
 | 
				
			||||||
  
 | 
					  
 | 
				
			||||||
  infile = open(name, O_RDONLY);
 | 
					  infile = open(name, O_RDONLY);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  affinity = (1UL << cpu);
 | 
					  //  Init share
 | 
				
			||||||
 | 
					  for(i=0; i<MAX_BITMASK_LEN; i++){
 | 
				
			||||||
 | 
					    share[i]=0;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  bitmask_idx = CPUELT(cpu);
 | 
				
			||||||
 | 
					  share[bitmask_idx] = CPUMASK(cpu);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  if (infile != -1) {
 | 
					  if (infile != -1) {
 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
    read(infile, name, sizeof(name));
 | 
					    read(infile, cpumap, sizeof(cpumap));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    p = name;
 | 
					    for(i=0; i<160; i++){
 | 
				
			||||||
 | 
					      if(cpumap[i] == '\n')
 | 
				
			||||||
 | 
						break;
 | 
				
			||||||
 | 
					      if(cpumap[i] != ','){
 | 
				
			||||||
 | 
						name[k++]=cpumap[i];
 | 
				
			||||||
	
 | 
						
 | 
				
			||||||
    while ((*p == '0') || (*p == ',')) p++;
 | 
						//Enough data 
 | 
				
			||||||
 | 
						if(k >= NCPUBITS/4){
 | 
				
			||||||
 | 
						  affinity[count++] = strtoul(name, &dummy, 16);
 | 
				
			||||||
 | 
						  k=0;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    if(k!=0){
 | 
				
			||||||
 | 
					      name[k]='\0';
 | 
				
			||||||
 | 
					      affinity[count++] = strtoul(name, &dummy, 16);
 | 
				
			||||||
 | 
					      k=0;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    // 0-63bit -> node_info[0], 64-128bit -> node_info[1] ....
 | 
				
			||||||
 | 
					    // revert the sequence
 | 
				
			||||||
 | 
					    for(i=0; i<count && i<MAX_BITMASK_LEN; i++){
 | 
				
			||||||
 | 
					      share[i]=affinity[count-i-1];
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
   
 | 
					   
 | 
				
			||||||
    affinity = strtol(p, &p, 16);
 | 
					 | 
				
			||||||
   
 | 
					   
 | 
				
			||||||
    close(infile);
 | 
					    close(infile);
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  return affinity;
 | 
					  return ;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static int numa_check(void) {
 | 
					static int numa_check(void) {
 | 
				
			||||||
| 
						 | 
					@ -248,6 +298,7 @@ static int numa_check(void) {
 | 
				
			||||||
  DIR *dp;
 | 
					  DIR *dp;
 | 
				
			||||||
  struct dirent *dir;
 | 
					  struct dirent *dir;
 | 
				
			||||||
  int node;
 | 
					  int node;
 | 
				
			||||||
 | 
					  int j;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  common -> num_nodes = 0;
 | 
					  common -> num_nodes = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -258,7 +309,9 @@ static int numa_check(void) {
 | 
				
			||||||
    return 0;
 | 
					    return 0;
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  for (node = 0; node < MAX_NODES; node ++) common -> node_info[node] = 0;
 | 
					  for (node = 0; node < MAX_NODES; node ++) {
 | 
				
			||||||
 | 
					    for (j = 0; j<MAX_BITMASK_LEN; j++) common -> node_info[node][j] = 0;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  while ((dir = readdir(dp)) != NULL) {
 | 
					  while ((dir = readdir(dp)) != NULL) {
 | 
				
			||||||
    if (*(unsigned int *) dir -> d_name == 0x065646f6eU) {
 | 
					    if (*(unsigned int *) dir -> d_name == 0x065646f6eU) {
 | 
				
			||||||
| 
						 | 
					@ -266,12 +319,12 @@ static int numa_check(void) {
 | 
				
			||||||
      node = atoi(&dir -> d_name[4]);
 | 
					      node = atoi(&dir -> d_name[4]);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      if (node > MAX_NODES) {
 | 
					      if (node > MAX_NODES) {
 | 
				
			||||||
	fprintf(stderr, "\nGotoBLAS Warining : MAX_NODES (NUMA) is too small. Terminated.\n");
 | 
						fprintf(stderr, "\nOpenBLAS Warning : MAX_NODES (NUMA) is too small. Terminated.\n");
 | 
				
			||||||
	exit(1);
 | 
						exit(1);
 | 
				
			||||||
      }
 | 
					      }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      common -> num_nodes ++;
 | 
					      common -> num_nodes ++;
 | 
				
			||||||
      common -> node_info[node] = get_cpumap(node);
 | 
					      get_cpumap(node, common->node_info[node]);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
| 
						 | 
					@ -284,7 +337,7 @@ static int numa_check(void) {
 | 
				
			||||||
  fprintf(stderr, "Numa found : number of Nodes = %2d\n", common -> num_nodes);
 | 
					  fprintf(stderr, "Numa found : number of Nodes = %2d\n", common -> num_nodes);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  for (node = 0; node < common -> num_nodes; node ++)
 | 
					  for (node = 0; node < common -> num_nodes; node ++)
 | 
				
			||||||
    fprintf(stderr, "MASK (%2d) : %08lx\n", node, common -> node_info[node]);
 | 
					    fprintf(stderr, "MASK (%2d) : %08lx\n", node, common -> node_info[node][0]);
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  return common -> num_nodes;
 | 
					  return common -> num_nodes;
 | 
				
			||||||
| 
						 | 
					@ -296,11 +349,13 @@ static void numa_mapping(void) {
 | 
				
			||||||
  int i, j, h;
 | 
					  int i, j, h;
 | 
				
			||||||
  unsigned long work, bit;
 | 
					  unsigned long work, bit;
 | 
				
			||||||
  int count = 0;
 | 
					  int count = 0;
 | 
				
			||||||
 | 
					  int bitmask_idx = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  for (node = 0; node < common -> num_nodes; node ++) {
 | 
					  for (node = 0; node < common -> num_nodes; node ++) {
 | 
				
			||||||
    core = 0;
 | 
					    core = 0;
 | 
				
			||||||
    for (cpu = 0; cpu < common -> num_procs; cpu ++) {
 | 
					    for (cpu = 0; cpu < common -> num_procs; cpu ++) {
 | 
				
			||||||
      if (common -> node_info[node] & common -> avail & (1UL << cpu)) {
 | 
					      bitmask_idx = CPUELT(cpu);
 | 
				
			||||||
 | 
					      if (common -> node_info[node][bitmask_idx] & common -> avail[bitmask_idx] & CPUMASK(cpu)) {
 | 
				
			||||||
	common -> cpu_info[count] = WRITE_CORE(core) | WRITE_NODE(node) | WRITE_CPU(cpu);
 | 
						common -> cpu_info[count] = WRITE_CORE(core) | WRITE_NODE(node) | WRITE_CPU(cpu);
 | 
				
			||||||
	count ++;
 | 
						count ++;
 | 
				
			||||||
	core ++;
 | 
						core ++;
 | 
				
			||||||
| 
						 | 
					@ -357,58 +412,89 @@ static void numa_mapping(void) {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void disable_hyperthread(void) {
 | 
					static void disable_hyperthread(void) {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  unsigned long share;
 | 
					  unsigned long share[MAX_BITMASK_LEN];
 | 
				
			||||||
  int cpu;
 | 
					  int cpu;
 | 
				
			||||||
 | 
					  int bitmask_idx = 0;
 | 
				
			||||||
 | 
					  int i=0, count=0;
 | 
				
			||||||
 | 
					  bitmask_idx = CPUELT(common -> num_procs);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  if(common->num_procs > 64){
 | 
					  for(i=0; i< bitmask_idx; i++){
 | 
				
			||||||
    fprintf(stderr, "\nOpenBLAS Warining : The number of CPU/Cores(%d) is beyond the limit(64). Terminated.\n", common->num_procs);
 | 
					    common -> avail[count++] = 0xFFFFFFFFFFFFFFFFUL;
 | 
				
			||||||
    exit(1);
 | 
					  }
 | 
				
			||||||
  }else if(common->num_procs == 64){
 | 
					  if(CPUMASK(common -> num_procs) != 1){
 | 
				
			||||||
    common -> avail = 0xFFFFFFFFFFFFFFFFUL;
 | 
					    common -> avail[count++] = CPUMASK(common -> num_procs) - 1;
 | 
				
			||||||
  }else
 | 
					  }
 | 
				
			||||||
    common -> avail = (1UL << common -> num_procs) - 1;
 | 
					  common -> avail_count = count;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  /* if(common->num_procs > 64){ */
 | 
				
			||||||
 | 
					  /*   fprintf(stderr, "\nOpenBLAS Warning : The number of CPU/Cores(%d) is beyond the limit(64). Terminated.\n", common->num_procs); */
 | 
				
			||||||
 | 
					  /*   exit(1); */
 | 
				
			||||||
 | 
					  /* }else if(common->num_procs == 64){ */
 | 
				
			||||||
 | 
					  /*   common -> avail = 0xFFFFFFFFFFFFFFFFUL; */
 | 
				
			||||||
 | 
					  /* }else */
 | 
				
			||||||
 | 
					  /*   common -> avail = (1UL << common -> num_procs) - 1; */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef DEBUG
 | 
					#ifdef DEBUG
 | 
				
			||||||
  fprintf(stderr, "\nAvail CPUs    : %04lx.\n", common -> avail);
 | 
					  fprintf(stderr, "\nAvail CPUs    : ");
 | 
				
			||||||
 | 
					  for(i=0; i<count; i++)
 | 
				
			||||||
 | 
					    fprintf(stderr, "%04lx ", common -> avail[i]);
 | 
				
			||||||
 | 
					  fprintf(stderr, ".\n");
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  for (cpu = 0; cpu < common -> num_procs; cpu ++) {
 | 
					  for (cpu = 0; cpu < common -> num_procs; cpu ++) {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    share = (get_share(cpu, 1) & common -> avail);
 | 
					    get_share(cpu, 1, share);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if (popcount(share) > 1) {
 | 
					    //When the shared cpu are in different element of share & avail array, this may be a bug.
 | 
				
			||||||
 | 
					    for (i = 0; i < count ; i++){
 | 
				
			||||||
 | 
					      if (popcount(share[i]) > 1) {
 | 
				
			||||||
      
 | 
					      
 | 
				
			||||||
#ifdef DEBUG
 | 
					#ifdef DEBUG
 | 
				
			||||||
	fprintf(stderr, "Detected Hyper Threading on CPU %4x; disabled CPU %04lx.\n",
 | 
						fprintf(stderr, "Detected Hyper Threading on CPU %4x; disabled CPU %04lx.\n",
 | 
				
			||||||
	      cpu, share & ~(1UL << cpu));
 | 
							cpu, share[i] & ~(CPUMASK(cpu)));
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
      
 | 
					      
 | 
				
			||||||
      common -> avail &= ~((share & ~(1UL << cpu)));
 | 
						common -> avail[i] &= ~((share[i] & ~ CPUMASK(cpu)));
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void disable_affinity(void) {
 | 
					static void disable_affinity(void) {
 | 
				
			||||||
 | 
					  int i=0;
 | 
				
			||||||
 | 
					  int bitmask_idx=0;
 | 
				
			||||||
 | 
					  int count=0;
 | 
				
			||||||
#ifdef DEBUG
 | 
					#ifdef DEBUG
 | 
				
			||||||
    fprintf(stderr, "Final all available CPUs  : %04lx.\n\n", common -> avail);
 | 
					    fprintf(stderr, "Final all available CPUs  : %04lx.\n\n", common -> avail[0]);
 | 
				
			||||||
    fprintf(stderr, "CPU mask                  : %04lx.\n\n", *(unsigned long *)&cpu_orig_mask[0]);
 | 
					    fprintf(stderr, "CPU mask                  : %04lx.\n\n", *(unsigned long *)&cpu_orig_mask[0]);
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  if(common->final_num_procs > 64){
 | 
					  /* if(common->final_num_procs > 64){ */
 | 
				
			||||||
    fprintf(stderr, "\nOpenBLAS Warining : The number of CPU/Cores(%d) is beyond the limit(64). Terminated.\n", common->final_num_procs);
 | 
					  /*   fprintf(stderr, "\nOpenBLAS Warining : The number of CPU/Cores(%d) is beyond the limit(64). Terminated.\n", common->final_num_procs); */
 | 
				
			||||||
    exit(1);
 | 
					  /*   exit(1); */
 | 
				
			||||||
  }else if(common->final_num_procs == 64){
 | 
					  /* }else if(common->final_num_procs == 64){ */
 | 
				
			||||||
    lprocmask = 0xFFFFFFFFFFFFFFFFUL;
 | 
					  /*   lprocmask = 0xFFFFFFFFFFFFFFFFUL; */
 | 
				
			||||||
  }else
 | 
					  /* }else */
 | 
				
			||||||
    lprocmask = (1UL << common -> final_num_procs) - 1;
 | 
					  /*   lprocmask = (1UL << common -> final_num_procs) - 1; */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  bitmask_idx = CPUELT(common -> final_num_procs);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  for(i=0; i< bitmask_idx; i++){
 | 
				
			||||||
 | 
					    lprocmask[count++] = 0xFFFFFFFFFFFFFFFFUL;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  if(CPUMASK(common -> final_num_procs) != 1){
 | 
				
			||||||
 | 
					    lprocmask[count++] = CPUMASK(common -> final_num_procs) - 1;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  lprocmask_count = count;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifndef USE_OPENMP
 | 
					#ifndef USE_OPENMP
 | 
				
			||||||
  lprocmask &= *(unsigned long *)&cpu_orig_mask[0];
 | 
					  for(i=0; i< count; i++){
 | 
				
			||||||
 | 
					    lprocmask[i] &= ((unsigned long *)&cpu_orig_mask[0])[i];
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef DEBUG
 | 
					#ifdef DEBUG
 | 
				
			||||||
    fprintf(stderr, "I choose these CPUs  : %04lx.\n\n", lprocmask);
 | 
					    fprintf(stderr, "I choose these CPUs  : %04lx.\n\n", lprocmask[0]);
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					@ -498,7 +584,7 @@ static void create_pshmem(void) {
 | 
				
			||||||
static void local_cpu_map(void) {
 | 
					static void local_cpu_map(void) {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  int cpu, id, mapping;
 | 
					  int cpu, id, mapping;
 | 
				
			||||||
 | 
					  int bitmask_idx = 0;
 | 
				
			||||||
  cpu = 0;
 | 
					  cpu = 0;
 | 
				
			||||||
  mapping = 0;
 | 
					  mapping = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -509,7 +595,8 @@ static void local_cpu_map(void) {
 | 
				
			||||||
      if (is_dead(id)) common -> cpu_use[cpu] = 0;
 | 
					      if (is_dead(id)) common -> cpu_use[cpu] = 0;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
    if ((common -> cpu_use[cpu] == 0) && (lprocmask & (1UL << cpu))) {
 | 
					    bitmask_idx = CPUELT(cpu);
 | 
				
			||||||
 | 
					    if ((common -> cpu_use[cpu] == 0) && (lprocmask[bitmask_idx] & CPUMASK(cpu))) {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      common -> cpu_use[cpu] = pshmid;
 | 
					      common -> cpu_use[cpu] = pshmid;
 | 
				
			||||||
      cpu_mapping[mapping] = READ_CPU(common -> cpu_info[cpu]);
 | 
					      cpu_mapping[mapping] = READ_CPU(common -> cpu_info[cpu]);
 | 
				
			||||||
| 
						 | 
					@ -595,6 +682,7 @@ void gotoblas_affinity_init(void) {
 | 
				
			||||||
#ifndef USE_OPENMP
 | 
					#ifndef USE_OPENMP
 | 
				
			||||||
  cpu_set_t cpu_mask;
 | 
					  cpu_set_t cpu_mask;
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					  int i;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  if (initialized) return;
 | 
					  if (initialized) return;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -646,6 +734,11 @@ void gotoblas_affinity_init(void) {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    common -> num_procs = get_nprocs();
 | 
					    common -> num_procs = get_nprocs();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if(common -> num_procs > MAX_CPUS) {
 | 
				
			||||||
 | 
					      fprintf(stderr, "\nOpenBLAS Warining : The number of CPU/Cores(%d) is beyond the limit(%d). Terminated.\n", common->num_procs, MAX_CPUS);
 | 
				
			||||||
 | 
					      exit(1);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    for (cpu = 0; cpu < common -> num_procs; cpu++) common -> cpu_info[cpu] = cpu;
 | 
					    for (cpu = 0; cpu < common -> num_procs; cpu++) common -> cpu_info[cpu] = cpu;
 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
    numa_check();
 | 
					    numa_check();
 | 
				
			||||||
| 
						 | 
					@ -654,7 +747,8 @@ void gotoblas_affinity_init(void) {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if (common -> num_nodes > 1) numa_mapping();
 | 
					    if (common -> num_nodes > 1) numa_mapping();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    common -> final_num_procs = popcount(common -> avail);
 | 
					    common -> final_num_procs = 0;
 | 
				
			||||||
 | 
					    for(i = 0; i < common -> avail_count; i++) common -> final_num_procs += popcount(common -> avail[i]);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    for (cpu = 0; cpu < common -> final_num_procs; cpu ++) common -> cpu_use[cpu] =  0;
 | 
					    for (cpu = 0; cpu < common -> final_num_procs; cpu ++) common -> cpu_use[cpu] =  0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -664,7 +758,8 @@ void gotoblas_affinity_init(void) {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  disable_affinity();
 | 
					  disable_affinity();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  num_avail = popcount(lprocmask);
 | 
					  num_avail = 0;
 | 
				
			||||||
 | 
					  for(i=0; i<lprocmask_count; i++) num_avail += popcount(lprocmask[i]);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  if ((numprocs <= 0) || (numprocs > num_avail)) numprocs = num_avail;
 | 
					  if ((numprocs <= 0) || (numprocs > num_avail)) numprocs = num_avail;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue