summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2015-03-20 10:37:03 +0000
committerPeter Maydell <peter.maydell@linaro.org>2015-03-20 10:37:03 +0000
commit4eef86486d4090d7587e94d3f1a2203b94899989 (patch)
treed0eed668a15a46c812db9ad4eda03bbd3f8d01d1
parente7e9b49f8e9ea4c5c9d07f6d8c9071c64dae816a (diff)
parent549fc54b8cfe16a475d8f6b8f838e53b45452b4a (diff)
downloadqemu-4eef86486d4090d7587e94d3f1a2203b94899989.tar.gz
Merge remote-tracking branch 'remotes/ehabkost/tags/work/numa-verify-cpus-pull-request' into staging
NUMA queue 2015-03-19 # gpg: Signature made Thu Mar 19 19:25:53 2015 GMT using RSA key ID 984DC5A6 # gpg: Can't check signature: public key not found * remotes/ehabkost/tags/work/numa-verify-cpus-pull-request: numa: Print warning if no node is assigned to a CPU pc: fix default VCPU to NUMA node mapping numa: introduce machine callback for VCPU to node mapping numa: Reject configuration if CPU appears on multiple nodes numa: Reject CPU indexes > max_cpus numa: Fix off-by-one error at MAX_CPUMASK_BITS check Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r--hw/i386/pc.c9
-rw-r--r--include/hw/boards.h5
-rw-r--r--include/sysemu/numa.h3
-rw-r--r--numa.c73
-rw-r--r--vl.c2
5 files changed, 82 insertions, 10 deletions
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 4b46c299c3..a52d2aff7c 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -1851,6 +1851,14 @@ static void pc_machine_initfn(Object *obj)
NULL, NULL);
}
+static unsigned pc_cpu_index_to_socket_id(unsigned cpu_index)
+{
+ unsigned pkg_id, core_id, smt_id;
+ x86_topo_ids_from_idx(smp_cores, smp_threads, cpu_index,
+ &pkg_id, &core_id, &smt_id);
+ return pkg_id;
+}
+
static void pc_machine_class_init(ObjectClass *oc, void *data)
{
MachineClass *mc = MACHINE_CLASS(oc);
@@ -1859,6 +1867,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data)
pcmc->get_hotplug_handler = mc->get_hotplug_handler;
mc->get_hotplug_handler = pc_get_hotpug_handler;
+ mc->cpu_index_to_socket_id = pc_cpu_index_to_socket_id;
hc->plug = pc_machine_device_plug_cb;
hc->unplug_request = pc_machine_device_unplug_request_cb;
hc->unplug = pc_machine_device_unplug_cb;
diff --git a/include/hw/boards.h b/include/hw/boards.h
index 1feea2b176..78838d13d4 100644
--- a/include/hw/boards.h
+++ b/include/hw/boards.h
@@ -82,6 +82,10 @@ bool machine_mem_merge(MachineState *machine);
* of HotplugHandler object, which handles hotplug operation
* for a given @dev. It may return NULL if @dev doesn't require
* any actions to be performed by hotplug handler.
+ * @cpu_index_to_socket_id:
+ * used to provide @cpu_index to socket number mapping, allowing
+ * a machine to group CPU threads belonging to the same socket/package
+ * Returns: socket number given cpu_index belongs to.
*/
struct MachineClass {
/*< private >*/
@@ -118,6 +122,7 @@ struct MachineClass {
HotplugHandler *(*get_hotplug_handler)(MachineState *machine,
DeviceState *dev);
+ unsigned (*cpu_index_to_socket_id)(unsigned cpu_index);
};
/**
diff --git a/include/sysemu/numa.h b/include/sysemu/numa.h
index 5633b856a8..6523b4d7f9 100644
--- a/include/sysemu/numa.h
+++ b/include/sysemu/numa.h
@@ -6,6 +6,7 @@
#include "qemu/option.h"
#include "sysemu/sysemu.h"
#include "sysemu/hostmem.h"
+#include "hw/boards.h"
extern int nb_numa_nodes; /* Number of NUMA nodes */
@@ -16,7 +17,7 @@ typedef struct node_info {
bool present;
} NodeInfo;
extern NodeInfo numa_info[MAX_NODES];
-void parse_numa_opts(void);
+void parse_numa_opts(MachineClass *mc);
void numa_post_machine_init(void);
void query_numa_node_mem(uint64_t node_mem[]);
extern QemuOptsList qemu_numa_opts;
diff --git a/numa.c b/numa.c
index ffbec68fd8..c975fb2682 100644
--- a/numa.c
+++ b/numa.c
@@ -76,9 +76,11 @@ static void numa_node_parse(NumaNodeOptions *node, QemuOpts *opts, Error **errp)
}
for (cpus = node->cpus; cpus; cpus = cpus->next) {
- if (cpus->value > MAX_CPUMASK_BITS) {
- error_setg(errp, "CPU number %" PRIu16 " is bigger than %d",
- cpus->value, MAX_CPUMASK_BITS);
+ if (cpus->value >= max_cpus) {
+ error_setg(errp,
+ "CPU index (%" PRIu16 ")"
+ " should be smaller than maxcpus (%d)",
+ cpus->value, max_cpus);
return;
}
bitmap_set(numa_info[nodenr].node_cpu, cpus->value, 1);
@@ -165,7 +167,52 @@ error:
return -1;
}
-void parse_numa_opts(void)
+static char *enumerate_cpus(unsigned long *cpus, int max_cpus)
+{
+ int cpu;
+ bool first = true;
+ GString *s = g_string_new(NULL);
+
+ for (cpu = find_first_bit(cpus, max_cpus);
+ cpu < max_cpus;
+ cpu = find_next_bit(cpus, max_cpus, cpu + 1)) {
+ g_string_append_printf(s, "%s%d", first ? "" : " ", cpu);
+ first = false;
+ }
+ return g_string_free(s, FALSE);
+}
+
+static void validate_numa_cpus(void)
+{
+ int i;
+ DECLARE_BITMAP(seen_cpus, MAX_CPUMASK_BITS);
+
+ bitmap_zero(seen_cpus, MAX_CPUMASK_BITS);
+ for (i = 0; i < nb_numa_nodes; i++) {
+ if (bitmap_intersects(seen_cpus, numa_info[i].node_cpu,
+ MAX_CPUMASK_BITS)) {
+ bitmap_and(seen_cpus, seen_cpus,
+ numa_info[i].node_cpu, MAX_CPUMASK_BITS);
+ error_report("CPU(s) present in multiple NUMA nodes: %s",
+ enumerate_cpus(seen_cpus, max_cpus));;
+ exit(EXIT_FAILURE);
+ }
+ bitmap_or(seen_cpus, seen_cpus,
+ numa_info[i].node_cpu, MAX_CPUMASK_BITS);
+ }
+
+ if (!bitmap_full(seen_cpus, max_cpus)) {
+ char *msg;
+ bitmap_complement(seen_cpus, seen_cpus, max_cpus);
+ msg = enumerate_cpus(seen_cpus, max_cpus);
+ error_report("warning: CPU(s) not present in any NUMA nodes: %s", msg);
+ error_report("warning: All CPU(s) up to maxcpus should be described "
+ "in NUMA config");
+ g_free(msg);
+ }
+}
+
+void parse_numa_opts(MachineClass *mc)
{
int i;
@@ -233,15 +280,25 @@ void parse_numa_opts(void)
break;
}
}
- /* assigning the VCPUs round-robin is easier to implement, guest OSes
- * must cope with this anyway, because there are BIOSes out there in
- * real machines which also use this scheme.
+ /* Historically VCPUs were assigned in round-robin order to NUMA
+ * nodes. However it causes issues with guest not handling it nice
+ * in case where cores/threads from a multicore CPU appear on
+ * different nodes. So allow boards to override default distribution
+ * rule grouping VCPUs by socket so that VCPUs from the same socket
+ * would be on the same node.
*/
if (i == nb_numa_nodes) {
for (i = 0; i < max_cpus; i++) {
- set_bit(i, numa_info[i % nb_numa_nodes].node_cpu);
+ unsigned node_id = i % nb_numa_nodes;
+ if (mc->cpu_index_to_socket_id) {
+ node_id = mc->cpu_index_to_socket_id(i) % nb_numa_nodes;
+ }
+
+ set_bit(i, numa_info[node_id].node_cpu);
}
}
+
+ validate_numa_cpus();
}
}
diff --git a/vl.c b/vl.c
index 69617d640a..75ec292216 100644
--- a/vl.c
+++ b/vl.c
@@ -4170,7 +4170,7 @@ int main(int argc, char **argv, char **envp)
default_drive(default_floppy, snapshot, IF_FLOPPY, 0, FD_OPTS);
default_drive(default_sdcard, snapshot, IF_SD, 0, SD_OPTS);
- parse_numa_opts();
+ parse_numa_opts(machine_class);
if (qemu_opts_foreach(qemu_find_opts("mon"), mon_init_func, NULL, 1) != 0) {
exit(1);