Merge pull request #863 from ashwinyes/develop_20160429_update_numa_binding

Update NUMA CPU binding
This commit is contained in:
Zhang Xianyi 2016-04-29 11:46:24 -04:00
commit 2df60f7315
1 changed files with 81 additions and 24 deletions

View File

@ -361,6 +361,9 @@ static void numa_mapping(void) {
unsigned long work, bit; unsigned long work, bit;
int count = 0; int count = 0;
int bitmask_idx = 0; int bitmask_idx = 0;
int current_cpu;
int current_node = 0;
int cpu_count = 0;
for (node = 0; node < common -> num_nodes; node ++) { for (node = 0; node < common -> num_nodes; node ++) {
core = 0; core = 0;
@ -382,33 +385,84 @@ static void numa_mapping(void) {
fprintf(stderr, "CPU (%2d) : %08lx\n", cpu, common -> cpu_info[cpu]); fprintf(stderr, "CPU (%2d) : %08lx\n", cpu, common -> cpu_info[cpu]);
#endif #endif
h = 1; current_cpu = sched_getcpu();
for (cpu = 0; cpu < count; cpu++) {
if (READ_CPU(common -> cpu_info[cpu]) == current_cpu) {
current_node = READ_NODE(common -> cpu_info[cpu]);
break;
}
}
for (i = 0; i < MAX_BITMASK_LEN; i++)
cpu_count += popcount(common -> node_info[current_node][i] & common -> avail[i]);
while (h < count) h = 2 * h + 1; /*
* If all the processes can be accommodated in the
* in the current node itself, then bind to cores
* from the current node only
*/
if (numprocs <= cpu_count) {
/*
* First sort all the cores in order from the current node.
* Then take remaining nodes one by one in order,
* and sort their cores in order.
*/
for (i = 0; i < count; i++) {
for (j = 0; j < count - 1; j++) {
int node_1, node_2;
int core_1, core_2;
int swap = 0;
while (h > 1) { node_1 = READ_NODE(common -> cpu_info[j]);
h /= 2; node_2 = READ_NODE(common -> cpu_info[j + 1]);
for (i = h; i < count; i++) { core_1 = READ_CORE(common -> cpu_info[j]);
work = common -> cpu_info[i]; core_2 = READ_CORE(common -> cpu_info[j + 1]);
bit = CPU_ISSET(i, &cpu_orig_mask[0]);
j = i - h; if (node_1 == node_2) {
while (work < common -> cpu_info[j]) { if (core_1 > core_2)
common -> cpu_info[j + h] = common -> cpu_info[j]; swap = 1;
if (CPU_ISSET(j, &cpu_orig_mask[0])) { } else {
CPU_SET(j + h, &cpu_orig_mask[0]); if ((node_2 == current_node) ||
} else { ((node_1 != current_node) && (node_1 > node_2)))
CPU_CLR(j + h, &cpu_orig_mask[0]); swap = 1;
} }
j -= h; if (swap) {
if (j < 0) break; unsigned long temp;
}
common -> cpu_info[j + h] = work; temp = common->cpu_info[j];
if (bit) { common->cpu_info[j] = common->cpu_info[j + 1];
CPU_SET(j + h, &cpu_orig_mask[0]); common->cpu_info[j + 1] = temp;
} else { }
CPU_CLR(j + h, &cpu_orig_mask[0]);
} }
}
} else {
h = 1;
while (h < count) h = 2 * h + 1;
while (h > 1) {
h /= 2;
for (i = h; i < count; i++) {
work = common -> cpu_info[i];
bit = CPU_ISSET(i, &cpu_orig_mask[0]);
j = i - h;
while (work < common -> cpu_info[j]) {
common -> cpu_info[j + h] = common -> cpu_info[j];
if (CPU_ISSET(j, &cpu_orig_mask[0])) {
CPU_SET(j + h, &cpu_orig_mask[0]);
} else {
CPU_CLR(j + h, &cpu_orig_mask[0]);
}
j -= h;
if (j < 0) break;
}
common -> cpu_info[j + h] = work;
if (bit) {
CPU_SET(j + h, &cpu_orig_mask[0]);
} else {
CPU_CLR(j + h, &cpu_orig_mask[0]);
}
}
} }
} }
@ -416,7 +470,10 @@ static void numa_mapping(void) {
fprintf(stderr, "\nSorting ...\n\n"); fprintf(stderr, "\nSorting ...\n\n");
for (cpu = 0; cpu < count; cpu++) for (cpu = 0; cpu < count; cpu++)
fprintf(stderr, "CPU (%2d) : %08lx\n", cpu, common -> cpu_info[cpu]); fprintf(stderr, "CPUINFO (%2d) : %08lx (CPU=%3lu CORE=%3lu NODE=%3lu)\n", cpu, common -> cpu_info[cpu],
READ_CPU(common -> cpu_info[cpu]),
READ_CORE(common -> cpu_info[cpu]),
READ_NODE(common -> cpu_info[cpu]));
#endif #endif
} }