summaryrefslogtreecommitdiff
path: root/hw
diff options
context:
space:
mode:
authorStefan Hajnoczi <stefanha@redhat.com>2017-05-15 14:11:55 +0100
committerStefan Hajnoczi <stefanha@redhat.com>2017-05-15 14:12:03 +0100
commitba9915e1f87fec742775d64859e881e4ab611429 (patch)
treee30831b545b12c20525707f4526a534989d44f22 /hw
parent43ad494c0439e0af9f77cd455ec1a217a05b8fc0 (diff)
parent08b277ac46da8b02e50cec455eca7cb2d12ffcf0 (diff)
downloadqemu-ba9915e1f87fec742775d64859e881e4ab611429.tar.gz
Merge remote-tracking branch 'ehabkost/tags/x86-and-machine-pull-request' into staging
x86 and machine queue, 2017-05-11 Highlights: * New "-numa cpu" option * NUMA distance configuration * migration/i386 vmstatification # gpg: Signature made Thu 11 May 2017 08:16:07 PM BST # gpg: using RSA key 0x2807936F984DC5A6 # gpg: Good signature from "Eduardo Habkost <ehabkost@redhat.com>" # gpg: Note: This key has expired! # Primary key fingerprint: 5A32 2FD5 ABC4 D3DB ACCF D1AA 2807 936F 984D C5A6 * ehabkost/tags/x86-and-machine-pull-request: (29 commits) migration/i386: Remove support for pre-0.12 formats vmstatification: i386 FPReg migration/i386: Remove old non-softfloat 64bit FP support tests: check -numa node,cpu=props_list usecase numa: add '-numa cpu,...' option for property based node mapping numa: remove node_cpu bitmaps as they are no longer used numa: use possible_cpus for not mapped CPUs check machine: call machine init from wrapper numa: remove no longer need numa_post_machine_init() tests: numa: add case for QMP command query-cpus QMP: include CpuInstanceProperties into query_cpus output output virt-arm: get numa node mapping from possible_cpus instead of numa_get_node_for_cpu() spapr: get numa node mapping from possible_cpus instead of numa_get_node_for_cpu() pc: get numa node mapping from possible_cpus instead of numa_get_node_for_cpu() numa: do default mapping based on possible_cpus instead of node_cpu bitmaps numa: mirror cpu to node mapping in MachineState::possible_cpus numa: add check that board supports cpu_index to node mapping virt-arm: add node-id property to CPU pc: add node-id property to CPU spapr: add node-id property to sPAPR core ... Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Diffstat (limited to 'hw')
-rw-r--r--hw/acpi/aml-build.c26
-rw-r--r--hw/acpi/cpu.c7
-rw-r--r--hw/arm/virt-acpi-build.c19
-rw-r--r--hw/arm/virt.c123
-rw-r--r--hw/core/machine.c162
-rw-r--r--hw/i386/acpi-build.c15
-rw-r--r--hw/i386/pc.c54
-rw-r--r--hw/i386/pc_piix.c2
-rw-r--r--hw/i386/pc_q35.c2
-rw-r--r--hw/ppc/spapr.c46
-rw-r--r--hw/ppc/spapr_cpu_core.c21
11 files changed, 400 insertions, 77 deletions
diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c
index c6f2032dec..be496c817c 100644
--- a/hw/acpi/aml-build.c
+++ b/hw/acpi/aml-build.c
@@ -24,6 +24,7 @@
#include "hw/acpi/aml-build.h"
#include "qemu/bswap.h"
#include "qemu/bitops.h"
+#include "sysemu/numa.h"
static GArray *build_alloc_array(void)
{
@@ -1609,3 +1610,28 @@ void build_srat_memory(AcpiSratMemoryAffinity *numamem, uint64_t base,
numamem->base_addr = cpu_to_le64(base);
numamem->range_length = cpu_to_le64(len);
}
+
+/*
+ * ACPI spec 5.2.17 System Locality Distance Information Table
+ * (Revision 2.0 or later)
+ */
+void build_slit(GArray *table_data, BIOSLinker *linker)
+{
+ int slit_start, i, j;
+ slit_start = table_data->len;
+
+ acpi_data_push(table_data, sizeof(AcpiTableHeader));
+
+ build_append_int_noprefix(table_data, nb_numa_nodes, 8);
+ for (i = 0; i < nb_numa_nodes; i++) {
+ for (j = 0; j < nb_numa_nodes; j++) {
+ assert(numa_info[i].distance[j]);
+ build_append_int_noprefix(table_data, numa_info[i].distance[j], 1);
+ }
+ }
+
+ build_header(linker, table_data,
+ (void *)(table_data->data + slit_start),
+ "SLIT",
+ table_data->len - slit_start, 1, NULL, NULL);
+}
diff --git a/hw/acpi/cpu.c b/hw/acpi/cpu.c
index 8c719d3f9d..a233fe17cf 100644
--- a/hw/acpi/cpu.c
+++ b/hw/acpi/cpu.c
@@ -503,7 +503,6 @@ void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts,
/* build Processor object for each processor */
for (i = 0; i < arch_ids->len; i++) {
- int j;
Aml *dev;
Aml *uid = aml_int(i);
GArray *madt_buf = g_array_new(0, 1, 1);
@@ -557,9 +556,9 @@ void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts,
* as a result _PXM is required for all CPUs which might
* be hot-plugged. For simplicity, add it for all CPUs.
*/
- j = numa_get_node_for_cpu(i);
- if (j < nb_numa_nodes) {
- aml_append(dev, aml_name_decl("_PXM", aml_int(j)));
+ if (arch_ids->cpus[i].props.has_node_id) {
+ aml_append(dev, aml_name_decl("_PXM",
+ aml_int(arch_ids->cpus[i].props.node_id)));
}
aml_append(cpus_dev, dev);
diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c
index 0835e59bb2..ce7499c9ca 100644
--- a/hw/arm/virt-acpi-build.c
+++ b/hw/arm/virt-acpi-build.c
@@ -486,30 +486,25 @@ build_srat(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
AcpiSystemResourceAffinityTable *srat;
AcpiSratProcessorGiccAffinity *core;
AcpiSratMemoryAffinity *numamem;
- int i, j, srat_start;
+ int i, srat_start;
uint64_t mem_base;
- uint32_t *cpu_node = g_malloc0(vms->smp_cpus * sizeof(uint32_t));
-
- for (i = 0; i < vms->smp_cpus; i++) {
- j = numa_get_node_for_cpu(i);
- if (j < nb_numa_nodes) {
- cpu_node[i] = j;
- }
- }
+ MachineClass *mc = MACHINE_GET_CLASS(vms);
+ const CPUArchIdList *cpu_list = mc->possible_cpu_arch_ids(MACHINE(vms));
srat_start = table_data->len;
srat = acpi_data_push(table_data, sizeof(*srat));
srat->reserved1 = cpu_to_le32(1);
- for (i = 0; i < vms->smp_cpus; ++i) {
+ for (i = 0; i < cpu_list->len; ++i) {
+ int node_id = cpu_list->cpus[i].props.has_node_id ?
+ cpu_list->cpus[i].props.node_id : 0;
core = acpi_data_push(table_data, sizeof(*core));
core->type = ACPI_SRAT_PROCESSOR_GICC;
core->length = sizeof(*core);
- core->proximity = cpu_to_le32(cpu_node[i]);
+ core->proximity = cpu_to_le32(node_id);
core->acpi_processor_uid = cpu_to_le32(i);
core->flags = cpu_to_le32(1);
}
- g_free(cpu_node);
mem_base = vms->memmap[VIRT_MEM].base;
for (i = 0; i < nb_numa_nodes; ++i) {
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 5f62a0321e..c7c8159dfd 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -338,7 +338,7 @@ static void fdt_add_cpu_nodes(const VirtMachineState *vms)
{
int cpu;
int addr_cells = 1;
- unsigned int i;
+ const MachineState *ms = MACHINE(vms);
/*
* From Documentation/devicetree/bindings/arm/cpus.txt
@@ -369,6 +369,7 @@ static void fdt_add_cpu_nodes(const VirtMachineState *vms)
for (cpu = vms->smp_cpus - 1; cpu >= 0; cpu--) {
char *nodename = g_strdup_printf("/cpus/cpu@%d", cpu);
ARMCPU *armcpu = ARM_CPU(qemu_get_cpu(cpu));
+ CPUState *cs = CPU(armcpu);
qemu_fdt_add_subnode(vms->fdt, nodename);
qemu_fdt_setprop_string(vms->fdt, nodename, "device_type", "cpu");
@@ -389,9 +390,9 @@ static void fdt_add_cpu_nodes(const VirtMachineState *vms)
armcpu->mp_affinity);
}
- i = numa_get_node_for_cpu(cpu);
- if (i < nb_numa_nodes) {
- qemu_fdt_setprop_cell(vms->fdt, nodename, "numa-node-id", i);
+ if (ms->possible_cpus->cpus[cs->cpu_index].props.has_node_id) {
+ qemu_fdt_setprop_cell(vms->fdt, nodename, "numa-node-id",
+ ms->possible_cpus->cpus[cs->cpu_index].props.node_id);
}
g_free(nodename);
@@ -1194,10 +1195,35 @@ void virt_machine_done(Notifier *notifier, void *data)
virt_build_smbios(vms);
}
+static uint64_t virt_cpu_mp_affinity(VirtMachineState *vms, int idx)
+{
+ uint8_t clustersz = ARM_DEFAULT_CPUS_PER_CLUSTER;
+ VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(vms);
+
+ if (!vmc->disallow_affinity_adjustment) {
+ /* Adjust MPIDR like 64-bit KVM hosts, which incorporate the
+ * GIC's target-list limitations. 32-bit KVM hosts currently
+ * always create clusters of 4 CPUs, but that is expected to
+ * change when they gain support for gicv3. When KVM is enabled
+ * it will override the changes we make here, therefore our
+ * purposes are to make TCG consistent (with 64-bit KVM hosts)
+ * and to improve SGI efficiency.
+ */
+ if (vms->gic_version == 3) {
+ clustersz = GICV3_TARGETLIST_BITS;
+ } else {
+ clustersz = GIC_TARGETLIST_BITS;
+ }
+ }
+ return arm_cpu_mp_affinity(idx, clustersz);
+}
+
static void machvirt_init(MachineState *machine)
{
VirtMachineState *vms = VIRT_MACHINE(machine);
VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(machine);
+ MachineClass *mc = MACHINE_GET_CLASS(machine);
+ const CPUArchIdList *possible_cpus;
qemu_irq pic[NUM_IRQS];
MemoryRegion *sysmem = get_system_memory();
MemoryRegion *secure_sysmem = NULL;
@@ -1210,7 +1236,6 @@ static void machvirt_init(MachineState *machine)
CPUClass *cc;
Error *err = NULL;
bool firmware_loaded = bios_name || drive_get(IF_PFLASH, 0, 0);
- uint8_t clustersz;
if (!cpu_model) {
cpu_model = "cortex-a15";
@@ -1263,10 +1288,8 @@ static void machvirt_init(MachineState *machine)
*/
if (vms->gic_version == 3) {
virt_max_cpus = vms->memmap[VIRT_GIC_REDIST].size / 0x20000;
- clustersz = GICV3_TARGETLIST_BITS;
} else {
virt_max_cpus = GIC_NCPU;
- clustersz = GIC_TARGETLIST_BITS;
}
if (max_cpus > virt_max_cpus) {
@@ -1324,21 +1347,35 @@ static void machvirt_init(MachineState *machine)
exit(1);
}
- for (n = 0; n < smp_cpus; n++) {
- Object *cpuobj = object_new(typename);
- if (!vmc->disallow_affinity_adjustment) {
- /* Adjust MPIDR like 64-bit KVM hosts, which incorporate the
- * GIC's target-list limitations. 32-bit KVM hosts currently
- * always create clusters of 4 CPUs, but that is expected to
- * change when they gain support for gicv3. When KVM is enabled
- * it will override the changes we make here, therefore our
- * purposes are to make TCG consistent (with 64-bit KVM hosts)
- * and to improve SGI efficiency.
- */
- uint8_t aff1 = n / clustersz;
- uint8_t aff0 = n % clustersz;
- object_property_set_int(cpuobj, (aff1 << ARM_AFF1_SHIFT) | aff0,
- "mp-affinity", NULL);
+ possible_cpus = mc->possible_cpu_arch_ids(machine);
+ for (n = 0; n < possible_cpus->len; n++) {
+ Object *cpuobj;
+ CPUState *cs;
+ int node_id;
+
+ if (n >= smp_cpus) {
+ break;
+ }
+
+ cpuobj = object_new(typename);
+ object_property_set_int(cpuobj, possible_cpus->cpus[n].arch_id,
+ "mp-affinity", NULL);
+
+ cs = CPU(cpuobj);
+ cs->cpu_index = n;
+
+ node_id = possible_cpus->cpus[cs->cpu_index].props.node_id;
+ if (!possible_cpus->cpus[cs->cpu_index].props.has_node_id) {
+ /* by default CPUState::numa_node was 0 if it's not set via CLI
+ * keep it this way for now but in future we probably should
+ * refuse to start up with incomplete numa mapping */
+ node_id = 0;
+ }
+ if (cs->numa_node == CPU_UNSET_NUMA_NODE_ID) {
+ cs->numa_node = node_id;
+ } else {
+ /* CPU isn't device_add compatible yet, this shouldn't happen */
+ error_setg(&error_abort, "user set node-id not implemented");
}
if (!vms->secure) {
@@ -1518,6 +1555,46 @@ static void virt_set_gic_version(Object *obj, const char *value, Error **errp)
}
}
+static CpuInstanceProperties
+virt_cpu_index_to_props(MachineState *ms, unsigned cpu_index)
+{
+ MachineClass *mc = MACHINE_GET_CLASS(ms);
+ const CPUArchIdList *possible_cpus = mc->possible_cpu_arch_ids(ms);
+
+ assert(cpu_index < possible_cpus->len);
+ return possible_cpus->cpus[cpu_index].props;
+}
+
+static const CPUArchIdList *virt_possible_cpu_arch_ids(MachineState *ms)
+{
+ int n;
+ VirtMachineState *vms = VIRT_MACHINE(ms);
+
+ if (ms->possible_cpus) {
+ assert(ms->possible_cpus->len == max_cpus);
+ return ms->possible_cpus;
+ }
+
+ ms->possible_cpus = g_malloc0(sizeof(CPUArchIdList) +
+ sizeof(CPUArchId) * max_cpus);
+ ms->possible_cpus->len = max_cpus;
+ for (n = 0; n < ms->possible_cpus->len; n++) {
+ ms->possible_cpus->cpus[n].arch_id =
+ virt_cpu_mp_affinity(vms, n);
+ ms->possible_cpus->cpus[n].props.has_thread_id = true;
+ ms->possible_cpus->cpus[n].props.thread_id = n;
+
+ /* default distribution of CPUs over NUMA nodes */
+ if (nb_numa_nodes) {
+ /* preset values but do not enable them i.e. 'has_node_id = false',
+ * numa init code will enable them later if manual mapping wasn't
+ * present on CLI */
+ ms->possible_cpus->cpus[n].props.node_id = n % nb_numa_nodes;
+ }
+ }
+ return ms->possible_cpus;
+}
+
static void virt_machine_class_init(ObjectClass *oc, void *data)
{
MachineClass *mc = MACHINE_CLASS(oc);
@@ -1534,6 +1611,8 @@ static void virt_machine_class_init(ObjectClass *oc, void *data)
mc->pci_allow_0_address = true;
/* We know we will never create a pre-ARMv7 CPU which needs 1K pages */
mc->minimum_page_bits = 12;
+ mc->possible_cpu_arch_ids = virt_possible_cpu_arch_ids;
+ mc->cpu_index_to_instance_props = virt_cpu_index_to_props;
}
static const TypeInfo virt_machine_info = {
diff --git a/hw/core/machine.c b/hw/core/machine.c
index ada9eea483..fd6a436064 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -17,8 +17,10 @@
#include "qapi/visitor.h"
#include "hw/sysbus.h"
#include "sysemu/sysemu.h"
+#include "sysemu/numa.h"
#include "qemu/error-report.h"
#include "qemu/cutils.h"
+#include "sysemu/numa.h"
static char *machine_get_accel(Object *obj, Error **errp)
{
@@ -388,6 +390,102 @@ HotpluggableCPUList *machine_query_hotpluggable_cpus(MachineState *machine)
return head;
}
+/**
+ * machine_set_cpu_numa_node:
+ * @machine: machine object to modify
+ * @props: specifies which cpu objects to assign to
+ * numa node specified by @props.node_id
+ * @errp: if an error occurs, a pointer to an area to store the error
+ *
+ * Associate NUMA node specified by @props.node_id with cpu slots that
+ * match socket/core/thread-ids specified by @props. It's recommended to use
+ * query-hotpluggable-cpus.props values to specify affected cpu slots,
+ * which would lead to exact 1:1 mapping of cpu slots to NUMA node.
+ *
+ * However for CLI convenience it's possible to pass in subset of properties,
+ * which would affect all cpu slots that match it.
+ * Ex for pc machine:
+ * -smp 4,cores=2,sockets=2 -numa node,nodeid=0 -numa node,nodeid=1 \
+ * -numa cpu,node-id=0,socket_id=0 \
+ * -numa cpu,node-id=1,socket_id=1
+ * will assign all child cores of socket 0 to node 0 and
+ * of socket 1 to node 1.
+ *
+ * On attempt of reassigning (already assigned) cpu slot to another NUMA node,
+ * return error.
+ * Empty subset is disallowed and function will return with error in this case.
+ */
+void machine_set_cpu_numa_node(MachineState *machine,
+ const CpuInstanceProperties *props, Error **errp)
+{
+ MachineClass *mc = MACHINE_GET_CLASS(machine);
+ bool match = false;
+ int i;
+
+ if (!mc->possible_cpu_arch_ids) {
+ error_setg(errp, "mapping of CPUs to NUMA node is not supported");
+ return;
+ }
+
+ /* disabling node mapping is not supported, forbid it */
+ assert(props->has_node_id);
+
+ /* force board to initialize possible_cpus if it hasn't been done yet */
+ mc->possible_cpu_arch_ids(machine);
+
+ for (i = 0; i < machine->possible_cpus->len; i++) {
+ CPUArchId *slot = &machine->possible_cpus->cpus[i];
+
+ /* reject unsupported by board properties */
+ if (props->has_thread_id && !slot->props.has_thread_id) {
+ error_setg(errp, "thread-id is not supported");
+ return;
+ }
+
+ if (props->has_core_id && !slot->props.has_core_id) {
+ error_setg(errp, "core-id is not supported");
+ return;
+ }
+
+ if (props->has_socket_id && !slot->props.has_socket_id) {
+ error_setg(errp, "socket-id is not supported");
+ return;
+ }
+
+ /* skip slots with explicit mismatch */
+ if (props->has_thread_id && props->thread_id != slot->props.thread_id) {
+ continue;
+ }
+
+ if (props->has_core_id && props->core_id != slot->props.core_id) {
+ continue;
+ }
+
+ if (props->has_socket_id && props->socket_id != slot->props.socket_id) {
+ continue;
+ }
+
+ /* reject assignment if slot is already assigned, for compatibility
+ * of legacy cpu_index mapping with SPAPR core based mapping do not
+ * error out if cpu thread and matched core have the same node-id */
+ if (slot->props.has_node_id &&
+ slot->props.node_id != props->node_id) {
+ error_setg(errp, "CPU is already assigned to node-id: %" PRId64,
+ slot->props.node_id);
+ return;
+ }
+
+ /* assign slot to node as it's matched '-numa cpu' key */
+ match = true;
+ slot->props.node_id = props->node_id;
+ slot->props.has_node_id = props->has_node_id;
+ }
+
+ if (!match) {
+ error_setg(errp, "no match found");
+ }
+}
+
static void machine_class_init(ObjectClass *oc, void *data)
{
MachineClass *mc = MACHINE_CLASS(oc);
@@ -400,6 +498,7 @@ static void machine_class_init(ObjectClass *oc, void *data)
* On Linux, each node's border has to be 8MB aligned
*/
mc->numa_mem_align_shift = 23;
+ mc->numa_auto_assign_ram = numa_default_auto_assign_ram;
object_class_property_add_str(oc, "accel",
machine_get_accel, machine_set_accel, &error_abort);
@@ -580,6 +679,69 @@ bool machine_mem_merge(MachineState *machine)
return machine->mem_merge;
}
+static char *cpu_slot_to_string(const CPUArchId *cpu)
+{
+ GString *s = g_string_new(NULL);
+ if (cpu->props.has_socket_id) {
+ g_string_append_printf(s, "socket-id: %"PRId64, cpu->props.socket_id);
+ }
+ if (cpu->props.has_core_id) {
+ if (s->len) {
+ g_string_append_printf(s, ", ");
+ }
+ g_string_append_printf(s, "core-id: %"PRId64, cpu->props.core_id);
+ }
+ if (cpu->props.has_thread_id) {
+ if (s->len) {
+ g_string_append_printf(s, ", ");
+ }
+ g_string_append_printf(s, "thread-id: %"PRId64, cpu->props.thread_id);
+ }
+ return g_string_free(s, false);
+}
+
+static void machine_numa_validate(MachineState *machine)
+{
+ int i;
+ GString *s = g_string_new(NULL);
+ MachineClass *mc = MACHINE_GET_CLASS(machine);
+ const CPUArchIdList *possible_cpus = mc->possible_cpu_arch_ids(machine);
+
+ assert(nb_numa_nodes);
+ for (i = 0; i < possible_cpus->len; i++) {
+ const CPUArchId *cpu_slot = &possible_cpus->cpus[i];
+
+ /* at this point numa mappings are initilized by CLI options
+ * or with default mappings so it's sufficient to list
+ * all not yet mapped CPUs here */
+ /* TODO: make it hard error in future */
+ if (!cpu_slot->props.has_node_id) {
+ char *cpu_str = cpu_slot_to_string(cpu_slot);
+ g_string_append_printf(s, "%sCPU %d [%s]", s->len ? ", " : "", i,
+ cpu_str);
+ g_free(cpu_str);
+ }
+ }
+ if (s->len) {
+ error_report("warning: CPU(s) not present in any NUMA nodes: %s",
+ s->str);
+ error_report("warning: All CPU(s) up to maxcpus should be described "
+ "in NUMA config, ability to start up with partial NUMA "
+ "mappings is obsoleted and will be removed in future");
+ }
+ g_string_free(s, true);
+}
+
+void machine_run_board_init(MachineState *machine)
+{
+ MachineClass *machine_class = MACHINE_GET_CLASS(machine);
+
+ if (nb_numa_nodes) {
+ machine_numa_validate(machine);
+ }
+ machine_class->init(machine);
+}
+
static void machine_class_finalize(ObjectClass *klass, void *data)
{
MachineClass *mc = MACHINE_CLASS(klass);
diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index 1d8c645ed3..cc0418f327 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -2335,7 +2335,8 @@ build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine)
srat->reserved1 = cpu_to_le32(1);
for (i = 0; i < apic_ids->len; i++) {
- int j = numa_get_node_for_cpu(i);
+ int node_id = apic_ids->cpus[i].props.has_node_id ?
+ apic_ids->cpus[i].props.node_id : 0;
uint32_t apic_id = apic_ids->cpus[i].arch_id;
if (apic_id < 255) {
@@ -2345,9 +2346,7 @@ build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine)
core->type = ACPI_SRAT_PROCESSOR_APIC;
core->length = sizeof(*core);
core->local_apic_id = apic_id;
- if (j < nb_numa_nodes) {
- core->proximity_lo = j;
- }
+ core->proximity_lo = node_id;
memset(core->proximity_hi, 0, 3);
core->local_sapic_eid = 0;
core->flags = cpu_to_le32(1);
@@ -2358,9 +2357,7 @@ build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine)
core->type = ACPI_SRAT_PROCESSOR_x2APIC;
core->length = sizeof(*core);
core->x2apic_id = cpu_to_le32(apic_id);
- if (j < nb_numa_nodes) {
- core->proximity_domain = cpu_to_le32(j);
- }
+ core->proximity_domain = cpu_to_le32(node_id);
core->flags = cpu_to_le32(1);
}
}
@@ -2707,6 +2704,10 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine)
if (pcms->numa_nodes) {
acpi_add_table(table_offsets, tables_blob);
build_srat(tables_blob, tables->linker, machine);
+ if (have_numa_distance) {
+ acpi_add_table(table_offsets, tables_blob);
+ build_slit(tables_blob, tables->linker);
+ }
}
if (acpi_get_mcfg(&mcfg)) {
acpi_add_table(table_offsets, tables_blob);
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index f3b372a18f..e36a375683 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -747,7 +747,9 @@ static FWCfgState *bochs_bios_init(AddressSpace *as, PCMachineState *pcms)
{
FWCfgState *fw_cfg;
uint64_t *numa_fw_cfg;
- int i, j;
+ int i;
+ const CPUArchIdList *cpus;
+ MachineClass *mc = MACHINE_GET_CLASS(pcms);
fw_cfg = fw_cfg_init_io_dma(FW_CFG_IO_BASE, FW_CFG_IO_BASE + 4, as);
fw_cfg_add_i16(fw_cfg, FW_CFG_NB_CPUS, pcms->boot_cpus);
@@ -782,12 +784,12 @@ static FWCfgState *bochs_bios_init(AddressSpace *as, PCMachineState *pcms)
*/
numa_fw_cfg = g_new0(uint64_t, 1 + pcms->apic_id_limit + nb_numa_nodes);
numa_fw_cfg[0] = cpu_to_le64(nb_numa_nodes);
- for (i = 0; i < max_cpus; i++) {
- unsigned int apic_id = x86_cpu_apic_id_from_index(i);
+ cpus = mc->possible_cpu_arch_ids(MACHINE(pcms));
+ for (i = 0; i < cpus->len; i++) {
+ unsigned int apic_id = cpus->cpus[i].arch_id;
assert(apic_id < pcms->apic_id_limit);
- j = numa_get_node_for_cpu(i);
- if (j < nb_numa_nodes) {
- numa_fw_cfg[apic_id + 1] = cpu_to_le64(j);
+ if (cpus->cpus[i].props.has_node_id) {
+ numa_fw_cfg[apic_id + 1] = cpu_to_le64(cpus->cpus[i].props.node_id);
}
}
for (i = 0; i < nb_numa_nodes; i++) {
@@ -1893,6 +1895,7 @@ static void pc_cpu_pre_plug(HotplugHandler *hotplug_dev,
DeviceState *dev, Error **errp)
{
int idx;
+ int node_id;
CPUState *cs;
CPUArchId *cpu_slot;
X86CPUTopoInfo topo;
@@ -1982,6 +1985,22 @@ static void pc_cpu_pre_plug(HotplugHandler *hotplug_dev,
cs = CPU(cpu);
cs->cpu_index = idx;
+
+ node_id = cpu_slot->props.node_id;
+ if (!cpu_slot->props.has_node_id) {
+ /* by default CPUState::numa_node was 0 if it's not set via CLI
+ * keep it this way for now but in future we probably should
+ * refuse to start up with incomplete numa mapping */
+ node_id = 0;
+ }
+ if (cs->numa_node == CPU_UNSET_NUMA_NODE_ID) {
+ cs->numa_node = node_id;
+ } else if (cs->numa_node != node_id) {
+ error_setg(errp, "node-id %d must match numa node specified"
+ "with -numa option for cpu-index %d",
+ cs->numa_node, cs->cpu_index);
+ return;
+ }
}
static void pc_machine_device_pre_plug_cb(HotplugHandler *hotplug_dev,
@@ -2243,12 +2262,14 @@ static void pc_machine_reset(void)
}
}
-static unsigned pc_cpu_index_to_socket_id(unsigned cpu_index)
+static CpuInstanceProperties
+pc_cpu_index_to_props(MachineState *ms, unsigned cpu_index)
{
- X86CPUTopoInfo topo;
- x86_topo_ids_from_idx(smp_cores, smp_threads, cpu_index,
- &topo);
- return topo.pkg_id;
+ MachineClass *mc = MACHINE_GET_CLASS(ms);
+ const CPUArchIdList *possible_cpus = mc->possible_cpu_arch_ids(ms);
+
+ assert(cpu_index < possible_cpus->len);
+ return possible_cpus->cpus[cpu_index].props;
}
static const CPUArchIdList *pc_possible_cpu_arch_ids(MachineState *ms)
@@ -2280,6 +2301,15 @@ static const CPUArchIdList *pc_possible_cpu_arch_ids(MachineState *ms)
ms->possible_cpus->cpus[i].props.core_id = topo.core_id;
ms->possible_cpus->cpus[i].props.has_thread_id = true;
ms->possible_cpus->cpus[i].props.thread_id = topo.smt_id;
+
+ /* default distribution of CPUs over NUMA nodes */
+ if (nb_numa_nodes) {
+ /* preset values but do not enable them i.e. 'has_node_id = false',
+ * numa init code will enable them later if manual mapping wasn't
+ * present on CLI */
+ ms->possible_cpus->cpus[i].props.node_id =
+ topo.pkg_id % nb_numa_nodes;
+ }
}
return ms->possible_cpus;
}
@@ -2322,7 +2352,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data)
pcmc->acpi_data_size = 0x20000 + 0x8000;
pcmc->save_tsc_khz = true;
mc->get_hotplug_handler = pc_get_hotpug_handler;
- mc->cpu_index_to_socket_id = pc_cpu_index_to_socket_id;
+ mc->cpu_index_to_instance_props = pc_cpu_index_to_props;
mc->possible_cpu_arch_ids = pc_possible_cpu_arch_ids;
mc->has_hotpluggable_cpus = true;
mc->default_boot_order = "cad";
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index 9f102aa388..d468b963fb 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -54,6 +54,7 @@
#endif
#include "migration/migration.h"
#include "kvm_i386.h"
+#include "sysemu/numa.h"
#define MAX_IDE_BUS 2
@@ -442,6 +443,7 @@ static void pc_i440fx_2_9_machine_options(MachineClass *m)
pc_i440fx_machine_options(m);
m->alias = "pc";
m->is_default = 1;
+ m->numa_auto_assign_ram = numa_legacy_auto_assign_ram;
}
DEFINE_I440FX_MACHINE(v2_9, "pc-i440fx-2.9", NULL,
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index dd792a8547..66303a78cf 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -47,6 +47,7 @@
#include "hw/usb.h"
#include "qemu/error-report.h"
#include "migration/migration.h"
+#include "sysemu/numa.h"
/* ICH9 AHCI has 6 ports */
#define MAX_SATA_PORTS 6
@@ -305,6 +306,7 @@ static void pc_q35_2_9_machine_options(MachineClass *m)
{
pc_q35_machine_options(m);
m->alias = "q35";
+ m->numa_auto_assign_ram = numa_legacy_auto_assign_ram;
}
DEFINE_Q35_MACHINE(v2_9, "pc-q35-2.9", NULL,
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 1b7cadab0c..0980d733cd 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -2831,9 +2831,11 @@ static void spapr_core_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
MachineClass *mc = MACHINE_GET_CLASS(hotplug_dev);
Error *local_err = NULL;
CPUCore *cc = CPU_CORE(dev);
+ sPAPRCPUCore *sc = SPAPR_CPU_CORE(dev);
char *base_core_type = spapr_get_cpu_core_type(machine->cpu_model);
const char *type = object_get_typename(OBJECT(dev));
CPUArchId *core_slot;
+ int node_id;
int index;
if (dev->hotplugged && !mc->has_hotpluggable_cpus) {
@@ -2868,6 +2870,21 @@ static void spapr_core_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
goto out;
}
+ node_id = core_slot->props.node_id;
+ if (!core_slot->props.has_node_id) {
+ /* by default CPUState::numa_node was 0 if it's not set via CLI
+ * keep it this way for now but in future we probably should
+ * refuse to start up with incomplete numa mapping */
+ node_id = 0;
+ }
+ if (sc->node_id == CPU_UNSET_NUMA_NODE_ID) {
+ sc->node_id = node_id;
+ } else if (sc->node_id != node_id) {
+ error_setg(&local_err, "node-id %d must match numa node specified"
+ "with -numa option for cpu-index %d", sc->node_id, cc->core_id);
+ goto out;
+ }
+
out:
g_free(base_core_type);
error_propagate(errp, local_err);
@@ -2988,11 +3005,18 @@ static HotplugHandler *spapr_get_hotplug_handler(MachineState *machine,
return NULL;
}
-static unsigned spapr_cpu_index_to_socket_id(unsigned cpu_index)
+static CpuInstanceProperties
+spapr_cpu_index_to_props(MachineState *machine, unsigned cpu_index)
{
- /* Allocate to NUMA nodes on a "socket" basis (not that concept of
- * socket means much for the paravirtualized PAPR platform) */
- return cpu_index / smp_threads / smp_cores;
+ CPUArchId *core_slot;
+ MachineClass *mc = MACHINE_GET_CLASS(machine);
+
+ /* make sure possible_cpu are intialized */
+ mc->possible_cpu_arch_ids(machine);
+ /* get CPU core slot containing thread that matches cpu_index */
+ core_slot = spapr_find_cpu_slot(machine, cpu_index, NULL);
+ assert(core_slot);
+ return core_slot->props;
}
static const CPUArchIdList *spapr_possible_cpu_arch_ids(MachineState *machine)
@@ -3019,8 +3043,15 @@ static const CPUArchIdList *spapr_possible_cpu_arch_ids(MachineState *machine)
machine->possible_cpus->cpus[i].arch_id = core_id;
machine->possible_cpus->cpus[i].props.has_core_id = true;
machine->possible_cpus->cpus[i].props.core_id = core_id;
- /* TODO: add 'has_node/node' here to describe
- to which node core belongs */
+
+ /* default distribution of CPUs over NUMA nodes */
+ if (nb_numa_nodes) {
+ /* preset values but do not enable them i.e. 'has_node_id = false',
+ * numa init code will enable them later if manual mapping wasn't
+ * present on CLI */
+ machine->possible_cpus->cpus[i].props.node_id =
+ core_id / smp_threads / smp_cores % nb_numa_nodes;
+ }
}
return machine->possible_cpus;
}
@@ -3145,7 +3176,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
hc->pre_plug = spapr_machine_device_pre_plug;
hc->plug = spapr_machine_device_plug;
hc->unplug = spapr_machine_device_unplug;
- mc->cpu_index_to_socket_id = spapr_cpu_index_to_socket_id;
+ mc->cpu_index_to_instance_props = spapr_cpu_index_to_props;
mc->possible_cpu_arch_ids = spapr_possible_cpu_arch_ids;
hc->unplug_request = spapr_machine_device_unplug_request;
@@ -3249,6 +3280,7 @@ static void spapr_machine_2_9_class_options(MachineClass *mc)
{
spapr_machine_2_10_class_options(mc);
SET_MACHINE_COMPAT(mc, SPAPR_COMPAT_2_9);
+ mc->numa_auto_assign_ram = numa_legacy_auto_assign_ram;
}
DEFINE_SPAPR_MACHINE(2_9, "2.9", false);
diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c
index 4389ef4c2a..a17ea07ef1 100644
--- a/hw/ppc/spapr_cpu_core.c
+++ b/hw/ppc/spapr_cpu_core.c
@@ -176,13 +176,11 @@ static void spapr_cpu_core_realize(DeviceState *dev, Error **errp)
const char *typename = object_class_get_name(scc->cpu_class);
size_t size = object_type_get_instance_size(typename);
Error *local_err = NULL;
- int core_node_id = numa_get_node_for_cpu(cc->core_id);;
void *obj;
int i, j;
sc->threads = g_malloc0(size * cc->nr_threads);
for (i = 0; i < cc->nr_threads; i++) {
- int node_id;
char id[32];
CPUState *cs;
@@ -192,17 +190,8 @@ static void spapr_cpu_core_realize(DeviceState *dev, Error **errp)
cs = CPU(obj);
cs->cpu_index = cc->core_id + i;
- /* Set NUMA node for the added CPUs */
- node_id = numa_get_node_for_cpu(cs->cpu_index);
- if (node_id != core_node_id) {
- error_setg(&local_err, "Invalid node-id=%d of thread[cpu-index: %d]"
- " on CPU[core-id: %d, node-id: %d], node-id must be the same",
- node_id, cs->cpu_index, cc->core_id, core_node_id);
- goto err;
- }
- if (node_id < nb_numa_nodes) {
- cs->numa_node = node_id;
- }
+ /* Set NUMA node for the threads belonged to core */
+ cs->numa_node = sc->node_id;
snprintf(id, sizeof(id), "thread[%d]", i);
object_property_add_child(OBJECT(sc), id, obj, &local_err);
@@ -263,6 +252,11 @@ static const char *spapr_core_models[] = {
"POWER9_v1.0",
};
+static Property spapr_cpu_core_properties[] = {
+ DEFINE_PROP_INT32("node-id", sPAPRCPUCore, node_id, CPU_UNSET_NUMA_NODE_ID),
+ DEFINE_PROP_END_OF_LIST()
+};
+
void spapr_cpu_core_class_init(ObjectClass *oc, void *data)
{
DeviceClass *dc = DEVICE_CLASS(oc);
@@ -270,6 +264,7 @@ void spapr_cpu_core_class_init(ObjectClass *oc, void *data)
dc->realize = spapr_cpu_core_realize;
dc->unrealize = spapr_cpu_core_unrealizefn;
+ dc->props = spapr_cpu_core_properties;
scc->cpu_class = cpu_class_by_name(TYPE_POWERPC_CPU, data);
g_assert(scc->cpu_class);
}