From cdcab9d94101a6dd9ac8136c6f2cd15b6a997896 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Tue, 28 Jun 2016 10:06:46 +0100 Subject: nvdimm: fix memory leak in error code path object_get_canonical_path_component() returns a heap-allocated string that must be freed using g_free(). Reported-by: Paolo Bonzini Signed-off-by: Stefan Hajnoczi Reviewed-by: Igor Mammedov Reviewed-by: Xiao Guangrong Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/mem/nvdimm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/hw/mem/nvdimm.c b/hw/mem/nvdimm.c index 81896c0e84..7895805a23 100644 --- a/hw/mem/nvdimm.c +++ b/hw/mem/nvdimm.c @@ -98,6 +98,7 @@ static void nvdimm_realize(PCDIMMDevice *dimm, Error **errp) "small to contain nvdimm label (0x%" PRIx64 ") and " "aligned PMEM (0x%" PRIx64 ")", path, memory_region_size(mr), nvdimm->label_size, align); + g_free(path); return; } -- cgit v1.2.1 From eaf8d91cd7ac28803eb0fb5a13b0c10e5096c361 Mon Sep 17 00:00:00 2001 From: Marcel Apfelbaum Date: Thu, 14 Jul 2016 16:43:40 +0300 Subject: tests/prom-env-test: increase the test timeout On a slower machine the test can take more than 30 seconds. Increase the timeout to 100 seconds. Signed-off-by: Marcel Apfelbaum Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Tested-by: Mark Cave-Ayland --- tests/prom-env-test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/prom-env-test.c b/tests/prom-env-test.c index 6df57d224b..7a628574c3 100644 --- a/tests/prom-env-test.c +++ b/tests/prom-env-test.c @@ -30,7 +30,7 @@ static void check_guest_memory(void) int i; /* Poll until code has run and modified memory. Wait at most 30 seconds */ - for (i = 0; i < 3000; ++i) { + for (i = 0; i < 10000; ++i) { signature = readl(ADDRESS); if (signature == MAGIC) { break; -- cgit v1.2.1 From 50d3bba9dabaeb89e545ced4dea23d8064a65c7a Mon Sep 17 00:00:00 2001 From: Marcel Apfelbaum Date: Thu, 14 Jul 2016 16:43:41 +0300 Subject: hw/alpha: fix PCI bus initialization Delay the host-bridge 'realization' until the PCI root bus is attached. Signed-off-by: Marcel Apfelbaum Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Tested-by: Mark Cave-Ayland --- hw/alpha/typhoon.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/alpha/typhoon.c b/hw/alpha/typhoon.c index 97721b535d..883db13f96 100644 --- a/hw/alpha/typhoon.c +++ b/hw/alpha/typhoon.c @@ -824,7 +824,6 @@ PCIBus *typhoon_init(ram_addr_t ram_size, ISABus **isa_bus, int i; dev = qdev_create(NULL, TYPE_TYPHOON_PCI_HOST_BRIDGE); - qdev_init_nofail(dev); s = TYPHOON_PCI_HOST_BRIDGE(dev); phb = PCI_HOST_BRIDGE(dev); @@ -889,6 +888,7 @@ PCIBus *typhoon_init(ram_addr_t ram_size, ISABus **isa_bus, &s->pchip.reg_mem, &s->pchip.reg_io, 0, 64, TYPE_PCI_BUS); phb->bus = b; + qdev_init_nofail(dev); /* Host memory as seen from the PCI side, via the IOMMU. */ memory_region_init_iommu(&s->pchip.iommu, OBJECT(s), &typhoon_iommu_ops, -- cgit v1.2.1 From a8c1a75343940fdc9d47e6a9b7723476bb5e1774 Mon Sep 17 00:00:00 2001 From: Marcel Apfelbaum Date: Thu, 14 Jul 2016 16:43:42 +0300 Subject: hw/mips: fix PCI bus initialization Delay the host-bridge 'realization' until the PCI root bus is attached. Signed-off-by: Marcel Apfelbaum Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Tested-by: Mark Cave-Ayland Acked-by: Leon Alrae Tested-by: Leon Alrae --- hw/mips/gt64xxx_pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/mips/gt64xxx_pci.c b/hw/mips/gt64xxx_pci.c index 3f4523df22..4811843ab6 100644 --- a/hw/mips/gt64xxx_pci.c +++ b/hw/mips/gt64xxx_pci.c @@ -1167,7 +1167,6 @@ PCIBus *gt64120_register(qemu_irq *pic) DeviceState *dev; dev = qdev_create(NULL, TYPE_GT64120_PCI_HOST_BRIDGE); - qdev_init_nofail(dev); d = GT64120_PCI_HOST_BRIDGE(dev); phb = PCI_HOST_BRIDGE(dev); memory_region_init(&d->pci0_mem, OBJECT(dev), "pci0-mem", UINT32_MAX); @@ -1178,6 +1177,7 @@ PCIBus *gt64120_register(qemu_irq *pic) &d->pci0_mem, get_system_io(), PCI_DEVFN(18, 0), 4, TYPE_PCI_BUS); + qdev_init_nofail(dev); memory_region_init_io(&d->ISD_mem, OBJECT(dev), &isd_mem_ops, d, "isd-mem", 0x1000); pci_create_simple(phb->bus, PCI_DEVFN(0, 0), "gt64120_pci"); -- cgit v1.2.1 From 2f3ae0b2d4343712ee2a1b23b51bfe5272c52138 Mon Sep 17 00:00:00 2001 From: Marcel Apfelbaum Date: Thu, 14 Jul 2016 16:43:43 +0300 Subject: hw/apb: fix PCI bus initialization Create and connect the PCI root bus to the host bridge before the later is 'realized'. Signed-off-by: Marcel Apfelbaum Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Tested-by: Mark Cave-Ayland --- hw/pci-host/apb.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/hw/pci-host/apb.c b/hw/pci-host/apb.c index babbbef0c2..16587f8373 100644 --- a/hw/pci-host/apb.c +++ b/hw/pci-host/apb.c @@ -670,6 +670,13 @@ PCIBus *pci_apb_init(hwaddr special_base, /* Ultrasparc PBM main bus */ dev = qdev_create(NULL, TYPE_APB); + d = APB_DEVICE(dev); + phb = PCI_HOST_BRIDGE(dev); + phb->bus = pci_register_bus(DEVICE(phb), "pci", + pci_apb_set_irq, pci_pbm_map_irq, d, + &d->pci_mmio, + get_system_io(), + 0, 32, TYPE_PCI_BUS); qdev_init_nofail(dev); s = SYS_BUS_DEVICE(dev); /* apb_config */ @@ -678,18 +685,10 @@ PCIBus *pci_apb_init(hwaddr special_base, sysbus_mmio_map(s, 1, special_base + 0x1000000ULL); /* pci_ioport */ sysbus_mmio_map(s, 2, special_base + 0x2000000ULL); - d = APB_DEVICE(dev); memory_region_init(&d->pci_mmio, OBJECT(s), "pci-mmio", 0x100000000ULL); memory_region_add_subregion(get_system_memory(), mem_base, &d->pci_mmio); - phb = PCI_HOST_BRIDGE(dev); - phb->bus = pci_register_bus(DEVICE(phb), "pci", - pci_apb_set_irq, pci_pbm_map_irq, d, - &d->pci_mmio, - get_system_io(), - 0, 32, TYPE_PCI_BUS); - *pbm_irqs = d->pbm_irqs; d->ivec_irqs = ivec_irqs; -- cgit v1.2.1 From 3c3c1e32033ffa7e0613d26dfb15255466dc9829 Mon Sep 17 00:00:00 2001 From: Marcel Apfelbaum Date: Thu, 14 Jul 2016 16:43:44 +0300 Subject: hw/grackle: fix PCI bus initialization Delay the host-bridge 'realization' until the PCI root bus is attached. Signed-off-by: Marcel Apfelbaum Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Tested-by: Mark Cave-Ayland --- hw/pci-host/grackle.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/pci-host/grackle.c b/hw/pci-host/grackle.c index 8f91216157..2c8acdaaca 100644 --- a/hw/pci-host/grackle.c +++ b/hw/pci-host/grackle.c @@ -72,7 +72,6 @@ PCIBus *pci_grackle_init(uint32_t base, qemu_irq *pic, GrackleState *d; dev = qdev_create(NULL, TYPE_GRACKLE_PCI_HOST_BRIDGE); - qdev_init_nofail(dev); s = SYS_BUS_DEVICE(dev); phb = PCI_HOST_BRIDGE(dev); d = GRACKLE_PCI_HOST_BRIDGE(dev); @@ -92,6 +91,7 @@ PCIBus *pci_grackle_init(uint32_t base, qemu_irq *pic, 0, 4, TYPE_PCI_BUS); pci_create_simple(phb->bus, 0, "grackle"); + qdev_init_nofail(dev); sysbus_mmio_map(s, 0, base); sysbus_mmio_map(s, 1, base + 0x00200000); -- cgit v1.2.1 From 685f9a3428f625f580af0123aa95f4838d86cac3 Mon Sep 17 00:00:00 2001 From: Marcel Apfelbaum Date: Thu, 14 Jul 2016 16:43:45 +0300 Subject: hw/prep: realize the PCI root bus as part of the prep init 'Realize' the PCI root bus manually since the 'realize' mechanism does not propagate to child devices yet. Signed-off-by: Marcel Apfelbaum Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Tested-by: Mark Cave-Ayland --- hw/pci-host/prep.c | 1 + 1 file changed, 1 insertion(+) diff --git a/hw/pci-host/prep.c b/hw/pci-host/prep.c index 487e32ecbf..5580293f93 100644 --- a/hw/pci-host/prep.c +++ b/hw/pci-host/prep.c @@ -247,6 +247,7 @@ static void raven_pcihost_realizefn(DeviceState *d, Error **errp) memory_region_add_subregion(address_space_mem, 0xbffffff0, &s->pci_intack); /* TODO Remove once realize propagates to child devices. */ + object_property_set_bool(OBJECT(&s->pci_bus), true, "realized", errp); object_property_set_bool(OBJECT(&s->pci_dev), true, "realized", errp); } -- cgit v1.2.1 From b1af7959a66610669e1a019b9a84f6ed3a7936c6 Mon Sep 17 00:00:00 2001 From: Marcel Apfelbaum Date: Thu, 14 Jul 2016 16:43:46 +0300 Subject: hw/versatile: realize the PCI root bus as part of the versatile init 'Realize' the PCI root bus manually since the 'realize' mechanism does not propagate to child devices yet. Signed-off-by: Marcel Apfelbaum Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Tested-by: Mark Cave-Ayland --- hw/pci-host/versatile.c | 1 + 1 file changed, 1 insertion(+) diff --git a/hw/pci-host/versatile.c b/hw/pci-host/versatile.c index 0792c4501c..467cbb9cb8 100644 --- a/hw/pci-host/versatile.c +++ b/hw/pci-host/versatile.c @@ -455,6 +455,7 @@ static void pci_vpb_realize(DeviceState *dev, Error **errp) } /* TODO Remove once realize propagates to child devices. */ + object_property_set_bool(OBJECT(&s->pci_bus), true, "realized", errp); object_property_set_bool(OBJECT(&s->pci_dev), true, "realized", errp); } -- cgit v1.2.1 From 1c7955c4503211f45a83e6480bd876f079ba5cd8 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Thu, 14 Jul 2016 13:56:10 +0800 Subject: x86-iommu: introduce parent class Introducing parent class for intel-iommu devices named "x86-iommu". This is preparation work to abstract shared functionalities out from Intel and AMD IOMMUs. Currently, only the parent class is introduced. It does nothing yet. Signed-off-by: Peter Xu Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/Makefile.objs | 2 +- hw/i386/intel_iommu.c | 5 ++-- hw/i386/x86-iommu.c | 53 +++++++++++++++++++++++++++++++++++++++++++ include/hw/i386/intel_iommu.h | 3 ++- include/hw/i386/x86-iommu.h | 46 +++++++++++++++++++++++++++++++++++++ 5 files changed, 105 insertions(+), 4 deletions(-) create mode 100644 hw/i386/x86-iommu.c create mode 100644 include/hw/i386/x86-iommu.h diff --git a/hw/i386/Makefile.objs b/hw/i386/Makefile.objs index b52d5b8756..90e94ffefd 100644 --- a/hw/i386/Makefile.objs +++ b/hw/i386/Makefile.objs @@ -2,7 +2,7 @@ obj-$(CONFIG_KVM) += kvm/ obj-y += multiboot.o obj-y += pc.o pc_piix.o pc_q35.o obj-y += pc_sysfw.o -obj-y += intel_iommu.o +obj-y += x86-iommu.o intel_iommu.o obj-$(CONFIG_XEN) += ../xenpv/ xen/ obj-y += kvmvapic.o diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index 464f2a0518..a430d7de1c 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -2061,17 +2061,18 @@ static void vtd_realize(DeviceState *dev, Error **errp) static void vtd_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); + X86IOMMUClass *x86_class = X86_IOMMU_CLASS(klass); dc->reset = vtd_reset; - dc->realize = vtd_realize; dc->vmsd = &vtd_vmstate; dc->props = vtd_properties; dc->hotpluggable = false; + x86_class->realize = vtd_realize; } static const TypeInfo vtd_info = { .name = TYPE_INTEL_IOMMU_DEVICE, - .parent = TYPE_SYS_BUS_DEVICE, + .parent = TYPE_X86_IOMMU_DEVICE, .instance_size = sizeof(IntelIOMMUState), .class_init = vtd_class_init, }; diff --git a/hw/i386/x86-iommu.c b/hw/i386/x86-iommu.c new file mode 100644 index 0000000000..d739afb141 --- /dev/null +++ b/hw/i386/x86-iommu.c @@ -0,0 +1,53 @@ +/* + * QEMU emulation of common X86 IOMMU + * + * Copyright (C) 2016 Peter Xu, Red Hat + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + + * You should have received a copy of the GNU General Public License along + * with this program; if not, see . + */ + +#include "qemu/osdep.h" +#include "hw/sysbus.h" +#include "hw/boards.h" +#include "hw/i386/x86-iommu.h" + +static void x86_iommu_realize(DeviceState *dev, Error **errp) +{ + X86IOMMUClass *x86_class = X86_IOMMU_GET_CLASS(dev); + if (x86_class->realize) { + x86_class->realize(dev, errp); + } +} + +static void x86_iommu_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + dc->realize = x86_iommu_realize; +} + +static const TypeInfo x86_iommu_info = { + .name = TYPE_X86_IOMMU_DEVICE, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(X86IOMMUState), + .class_init = x86_iommu_class_init, + .class_size = sizeof(X86IOMMUClass), + .abstract = true, +}; + +static void x86_iommu_register_types(void) +{ + type_register_static(&x86_iommu_info); +} + +type_init(x86_iommu_register_types) diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h index b024ffa720..680a0c4e5e 100644 --- a/include/hw/i386/intel_iommu.h +++ b/include/hw/i386/intel_iommu.h @@ -23,6 +23,7 @@ #define INTEL_IOMMU_H #include "hw/qdev.h" #include "sysemu/dma.h" +#include "hw/i386/x86-iommu.h" #define TYPE_INTEL_IOMMU_DEVICE "intel-iommu" #define INTEL_IOMMU_DEVICE(obj) \ @@ -90,7 +91,7 @@ struct VTDIOTLBEntry { /* The iommu (DMAR) device state struct */ struct IntelIOMMUState { - SysBusDevice busdev; + X86IOMMUState x86_iommu; MemoryRegion csrmem; uint8_t csr[DMAR_REG_SIZE]; /* register values */ uint8_t wmask[DMAR_REG_SIZE]; /* R/W bytes */ diff --git a/include/hw/i386/x86-iommu.h b/include/hw/i386/x86-iommu.h new file mode 100644 index 0000000000..924f39ad25 --- /dev/null +++ b/include/hw/i386/x86-iommu.h @@ -0,0 +1,46 @@ +/* + * Common IOMMU interface for X86 platform + * + * Copyright (C) 2016 Peter Xu, Red Hat + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + + * You should have received a copy of the GNU General Public License along + * with this program; if not, see . + */ + +#ifndef IOMMU_COMMON_H +#define IOMMU_COMMON_H + +#include "hw/sysbus.h" + +#define TYPE_X86_IOMMU_DEVICE ("x86-iommu") +#define X86_IOMMU_DEVICE(obj) \ + OBJECT_CHECK(X86IOMMUState, (obj), TYPE_X86_IOMMU_DEVICE) +#define X86_IOMMU_CLASS(klass) \ + OBJECT_CLASS_CHECK(X86IOMMUClass, (klass), TYPE_X86_IOMMU_DEVICE) +#define X86_IOMMU_GET_CLASS(obj) \ + OBJECT_GET_CLASS(X86IOMMUClass, obj, TYPE_X86_IOMMU_DEVICE) + +typedef struct X86IOMMUState X86IOMMUState; +typedef struct X86IOMMUClass X86IOMMUClass; + +struct X86IOMMUClass { + SysBusDeviceClass parent; + /* Intel/AMD specific realize() hook */ + DeviceRealize realize; +}; + +struct X86IOMMUState { + SysBusDevice busdev; +}; + +#endif -- cgit v1.2.1 From 04af0e18bc93c49faa94921b4326ef9261a2fa27 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Thu, 14 Jul 2016 13:56:11 +0800 Subject: intel_iommu: rename VTD_PCI_DEVFN_MAX to x86-iommu Signed-off-by: Peter Xu Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/intel_iommu.c | 11 +++++++---- include/hw/i386/intel_iommu.h | 1 - include/hw/i386/x86-iommu.h | 2 ++ 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index a430d7de1c..3ee5782f8e 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -26,6 +26,8 @@ #include "hw/pci/pci.h" #include "hw/pci/pci_bus.h" #include "hw/i386/pc.h" +#include "hw/boards.h" +#include "hw/i386/x86-iommu.h" /*#define DEBUG_INTEL_IOMMU*/ #ifdef DEBUG_INTEL_IOMMU @@ -192,7 +194,7 @@ static void vtd_reset_context_cache(IntelIOMMUState *s) VTD_DPRINTF(CACHE, "global context_cache_gen=1"); while (g_hash_table_iter_next (&bus_it, NULL, (void**)&vtd_bus)) { - for (devfn_it = 0; devfn_it < VTD_PCI_DEVFN_MAX; ++devfn_it) { + for (devfn_it = 0; devfn_it < X86_IOMMU_PCI_DEVFN_MAX; ++devfn_it) { vtd_as = vtd_bus->dev_as[devfn_it]; if (!vtd_as) { continue; @@ -964,7 +966,7 @@ static void vtd_context_device_invalidate(IntelIOMMUState *s, vtd_bus = vtd_find_as_from_bus_num(s, VTD_SID_TO_BUS(source_id)); if (vtd_bus) { devfn = VTD_SID_TO_DEVFN(source_id); - for (devfn_it = 0; devfn_it < VTD_PCI_DEVFN_MAX; ++devfn_it) { + for (devfn_it = 0; devfn_it < X86_IOMMU_PCI_DEVFN_MAX; ++devfn_it) { vtd_as = vtd_bus->dev_as[devfn_it]; if (vtd_as && ((devfn_it & mask) == (devfn & mask))) { VTD_DPRINTF(INV, "invalidate context-cahce of devfn 0x%"PRIx16, @@ -1916,7 +1918,8 @@ VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, int devfn) if (!vtd_bus) { /* No corresponding free() */ - vtd_bus = g_malloc0(sizeof(VTDBus) + sizeof(VTDAddressSpace *) * VTD_PCI_DEVFN_MAX); + vtd_bus = g_malloc0(sizeof(VTDBus) + sizeof(VTDAddressSpace *) * \ + X86_IOMMU_PCI_DEVFN_MAX); vtd_bus->bus = bus; key = (uintptr_t)bus; g_hash_table_insert(s->vtd_as_by_busptr, &key, vtd_bus); @@ -2032,7 +2035,7 @@ static AddressSpace *vtd_host_dma_iommu(PCIBus *bus, void *opaque, int devfn) IntelIOMMUState *s = opaque; VTDAddressSpace *vtd_as; - assert(0 <= devfn && devfn <= VTD_PCI_DEVFN_MAX); + assert(0 <= devfn && devfn <= X86_IOMMU_PCI_DEVFN_MAX); vtd_as = vtd_find_add_as(s, bus, devfn); return &vtd_as->as; diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h index 680a0c4e5e..07943092b4 100644 --- a/include/hw/i386/intel_iommu.h +++ b/include/hw/i386/intel_iommu.h @@ -35,7 +35,6 @@ #define VTD_PCI_BUS_MAX 256 #define VTD_PCI_SLOT_MAX 32 #define VTD_PCI_FUNC_MAX 8 -#define VTD_PCI_DEVFN_MAX 256 #define VTD_PCI_SLOT(devfn) (((devfn) >> 3) & 0x1f) #define VTD_PCI_FUNC(devfn) ((devfn) & 0x07) #define VTD_SID_TO_BUS(sid) (((sid) >> 8) & 0xff) diff --git a/include/hw/i386/x86-iommu.h b/include/hw/i386/x86-iommu.h index 924f39ad25..fac693d1b5 100644 --- a/include/hw/i386/x86-iommu.h +++ b/include/hw/i386/x86-iommu.h @@ -30,6 +30,8 @@ #define X86_IOMMU_GET_CLASS(obj) \ OBJECT_GET_CLASS(X86IOMMUClass, obj, TYPE_X86_IOMMU_DEVICE) +#define X86_IOMMU_PCI_DEVFN_MAX 256 + typedef struct X86IOMMUState X86IOMMUState; typedef struct X86IOMMUClass X86IOMMUClass; -- cgit v1.2.1 From 1cf5fd573f536de1eb601ed69127a324e940d37f Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Thu, 14 Jul 2016 13:56:12 +0800 Subject: x86-iommu: provide x86_iommu_get_default Instead of searching the device tree every time, one static variable is declared for the default system x86 IOMMU device. Signed-off-by: Peter Xu Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/acpi-build.c | 9 ++------- hw/i386/x86-iommu.c | 23 +++++++++++++++++++++++ include/hw/i386/x86-iommu.h | 6 ++++++ 3 files changed, 31 insertions(+), 7 deletions(-) diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index fbba461a87..12ecf9564a 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -52,7 +52,7 @@ #include "hw/i386/ich9.h" #include "hw/pci/pci_bus.h" #include "hw/pci-host/q35.h" -#include "hw/i386/intel_iommu.h" +#include "hw/i386/x86-iommu.h" #include "hw/timer/hpet.h" #include "hw/acpi/aml-build.h" @@ -2539,12 +2539,7 @@ static bool acpi_get_mcfg(AcpiMcfgInfo *mcfg) static bool acpi_has_iommu(void) { - bool ambiguous; - Object *intel_iommu; - - intel_iommu = object_resolve_path_type("", TYPE_INTEL_IOMMU_DEVICE, - &ambiguous); - return intel_iommu && !ambiguous; + return !!x86_iommu_get_default(); } static diff --git a/hw/i386/x86-iommu.c b/hw/i386/x86-iommu.c index d739afb141..f395139b97 100644 --- a/hw/i386/x86-iommu.c +++ b/hw/i386/x86-iommu.c @@ -21,6 +21,28 @@ #include "hw/sysbus.h" #include "hw/boards.h" #include "hw/i386/x86-iommu.h" +#include "qemu/error-report.h" + +/* Default X86 IOMMU device */ +static X86IOMMUState *x86_iommu_default = NULL; + +static void x86_iommu_set_default(X86IOMMUState *x86_iommu) +{ + assert(x86_iommu); + + if (x86_iommu_default) { + error_report("QEMU does not support multiple vIOMMUs " + "for x86 yet."); + exit(1); + } + + x86_iommu_default = x86_iommu; +} + +X86IOMMUState *x86_iommu_get_default(void) +{ + return x86_iommu_default; +} static void x86_iommu_realize(DeviceState *dev, Error **errp) { @@ -28,6 +50,7 @@ static void x86_iommu_realize(DeviceState *dev, Error **errp) if (x86_class->realize) { x86_class->realize(dev, errp); } + x86_iommu_set_default(X86_IOMMU_DEVICE(dev)); } static void x86_iommu_class_init(ObjectClass *klass, void *data) diff --git a/include/hw/i386/x86-iommu.h b/include/hw/i386/x86-iommu.h index fac693d1b5..b2401a6380 100644 --- a/include/hw/i386/x86-iommu.h +++ b/include/hw/i386/x86-iommu.h @@ -45,4 +45,10 @@ struct X86IOMMUState { SysBusDevice busdev; }; +/** + * x86_iommu_get_default - get default IOMMU device + * @return: pointer to default IOMMU device + */ +X86IOMMUState *x86_iommu_get_default(void); + #endif -- cgit v1.2.1 From 1121e0afdcfa0cd40e36bd3acff56a3fac4f70fd Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Thu, 14 Jul 2016 13:56:13 +0800 Subject: x86-iommu: introduce "intremap" property Adding one property for intel-iommu devices to specify whether we should support interrupt remapping. By default, IR is disabled. To enable it, we should use (take Intel IOMMU as example): -device intel_iommu,intremap=on This property can be shared by Intel and future AMD IOMMUs. Signed-off-by: Peter Xu Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/x86-iommu.c | 23 +++++++++++++++++++++++ include/hw/i386/x86-iommu.h | 1 + 2 files changed, 24 insertions(+) diff --git a/hw/i386/x86-iommu.c b/hw/i386/x86-iommu.c index f395139b97..4280839860 100644 --- a/hw/i386/x86-iommu.c +++ b/hw/i386/x86-iommu.c @@ -59,9 +59,32 @@ static void x86_iommu_class_init(ObjectClass *klass, void *data) dc->realize = x86_iommu_realize; } +static bool x86_iommu_intremap_prop_get(Object *o, Error **errp) +{ + X86IOMMUState *s = X86_IOMMU_DEVICE(o); + return s->intr_supported; +} + +static void x86_iommu_intremap_prop_set(Object *o, bool value, Error **errp) +{ + X86IOMMUState *s = X86_IOMMU_DEVICE(o); + s->intr_supported = value; +} + +static void x86_iommu_instance_init(Object *o) +{ + X86IOMMUState *s = X86_IOMMU_DEVICE(o); + + /* By default, do not support IR */ + s->intr_supported = false; + object_property_add_bool(o, "intremap", x86_iommu_intremap_prop_get, + x86_iommu_intremap_prop_set, NULL); +} + static const TypeInfo x86_iommu_info = { .name = TYPE_X86_IOMMU_DEVICE, .parent = TYPE_SYS_BUS_DEVICE, + .instance_init = x86_iommu_instance_init, .instance_size = sizeof(X86IOMMUState), .class_init = x86_iommu_class_init, .class_size = sizeof(X86IOMMUClass), diff --git a/include/hw/i386/x86-iommu.h b/include/hw/i386/x86-iommu.h index b2401a6380..699dd0615c 100644 --- a/include/hw/i386/x86-iommu.h +++ b/include/hw/i386/x86-iommu.h @@ -43,6 +43,7 @@ struct X86IOMMUClass { struct X86IOMMUState { SysBusDevice busdev; + bool intr_supported; /* Whether vIOMMU supports IR */ }; /** -- cgit v1.2.1 From d46114f9ec40e714ae3fa17c446890d369c32812 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Thu, 14 Jul 2016 13:56:14 +0800 Subject: acpi: enable INTR for DMAR report structure In ACPI DMA remapping report structure, enable INTR flag when specified. Signed-off-by: Peter Xu Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/acpi-build.c | 14 +++++++++++++- include/hw/i386/intel_iommu.h | 2 ++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index 12ecf9564a..06682f13fc 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -59,6 +59,7 @@ #include "qapi/qmp/qint.h" #include "qom/qom-qobject.h" +#include "hw/i386/x86-iommu.h" #include "hw/acpi/ipmi.h" @@ -2454,6 +2455,10 @@ build_mcfg_q35(GArray *table_data, BIOSLinker *linker, AcpiMcfgInfo *info) build_header(linker, table_data, (void *)mcfg, sig, len, 1, NULL, NULL); } +/* + * VT-d spec 8.1 DMA Remapping Reporting Structure + * (version Oct. 2014 or later) + */ static void build_dmar_q35(GArray *table_data, BIOSLinker *linker) { @@ -2461,10 +2466,17 @@ build_dmar_q35(GArray *table_data, BIOSLinker *linker) AcpiTableDmar *dmar; AcpiDmarHardwareUnit *drhd; + uint8_t dmar_flags = 0; + X86IOMMUState *iommu = x86_iommu_get_default(); + + assert(iommu); + if (iommu->intr_supported) { + dmar_flags |= 0x1; /* Flags: 0x1: INT_REMAP */ + } dmar = acpi_data_push(table_data, sizeof(*dmar)); dmar->host_address_width = VTD_HOST_ADDRESS_WIDTH - 1; - dmar->flags = 0; /* No intr_remap for now */ + dmar->flags = dmar_flags; /* DMAR Remapping Hardware Unit Definition structure */ drhd = acpi_data_push(table_data, sizeof(*drhd)); diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h index 07943092b4..741242e1e6 100644 --- a/include/hw/i386/intel_iommu.h +++ b/include/hw/i386/intel_iommu.h @@ -44,6 +44,8 @@ #define VTD_HOST_ADDRESS_WIDTH 39 #define VTD_HAW_MASK ((1ULL << VTD_HOST_ADDRESS_WIDTH) - 1) +#define DMAR_REPORT_F_INTR (1) + typedef struct VTDContextEntry VTDContextEntry; typedef struct VTDContextCacheEntry VTDContextCacheEntry; typedef struct IntelIOMMUState IntelIOMMUState; -- cgit v1.2.1 From b79104722fc77974a222929f5590c3bea2941dd7 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Thu, 14 Jul 2016 13:56:15 +0800 Subject: intel_iommu: allow queued invalidation for IR Queued invalidation is required for IR. This patch add basic support for interrupt cache invalidate requests. Since we currently have no IR cache implemented yet, we can just skip all interrupt cache invalidation requests for now. Signed-off-by: Peter Xu Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/intel_iommu.c | 9 +++++++++ hw/i386/intel_iommu_internal.h | 2 ++ 2 files changed, 11 insertions(+) diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index 3ee5782f8e..26e322a484 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -1404,6 +1404,15 @@ static bool vtd_process_inv_desc(IntelIOMMUState *s) } break; + case VTD_INV_DESC_IEC: + VTD_DPRINTF(INV, "Interrupt Entry Cache Invalidation " + "not implemented yet"); + /* + * Since currently we do not cache interrupt entries, we can + * just mark this descriptor as "good" and move on. + */ + break; + default: VTD_DPRINTF(GENERAL, "error: unkonw Invalidation Descriptor type " "hi 0x%"PRIx64 " lo 0x%"PRIx64 " type %"PRIu8, diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h index e5f514c6e3..b648e694cd 100644 --- a/hw/i386/intel_iommu_internal.h +++ b/hw/i386/intel_iommu_internal.h @@ -286,6 +286,8 @@ typedef struct VTDInvDesc VTDInvDesc; #define VTD_INV_DESC_TYPE 0xf #define VTD_INV_DESC_CC 0x1 /* Context-cache Invalidate Desc */ #define VTD_INV_DESC_IOTLB 0x2 +#define VTD_INV_DESC_IEC 0x4 /* Interrupt Entry Cache + Invalidate Descriptor */ #define VTD_INV_DESC_WAIT 0x5 /* Invalidation Wait Descriptor */ #define VTD_INV_DESC_NONE 0 /* Not an Invalidate Descriptor */ -- cgit v1.2.1 From d54bd7f80a5cf7dc5242f745d4c9542c822a81f3 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Thu, 14 Jul 2016 13:56:16 +0800 Subject: intel_iommu: set IR bit for ECAP register Enable IR in IOMMU Extended Capability register. Signed-off-by: Peter Xu Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/intel_iommu.c | 6 ++++++ hw/i386/intel_iommu_internal.h | 2 ++ 2 files changed, 8 insertions(+) diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index 26e322a484..9c7a08424f 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -1956,6 +1956,8 @@ VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, int devfn) */ static void vtd_init(IntelIOMMUState *s) { + X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s); + memset(s->csr, 0, DMAR_REG_SIZE); memset(s->wmask, 0, DMAR_REG_SIZE); memset(s->w1cmask, 0, DMAR_REG_SIZE); @@ -1977,6 +1979,10 @@ static void vtd_init(IntelIOMMUState *s) VTD_CAP_SAGAW | VTD_CAP_MAMV | VTD_CAP_PSI | VTD_CAP_SLLPS; s->ecap = VTD_ECAP_QI | VTD_ECAP_IRO; + if (x86_iommu->intr_supported) { + s->ecap |= VTD_ECAP_IR; + } + vtd_reset_context_cache(s); vtd_reset_iotlb(s); diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h index b648e694cd..5b98a1143c 100644 --- a/hw/i386/intel_iommu_internal.h +++ b/hw/i386/intel_iommu_internal.h @@ -176,6 +176,8 @@ /* (offset >> 4) << 8 */ #define VTD_ECAP_IRO (DMAR_IOTLB_REG_OFFSET << 4) #define VTD_ECAP_QI (1ULL << 1) +/* Interrupt Remapping support */ +#define VTD_ECAP_IR (1ULL << 3) /* CAP_REG */ /* (offset >> 4) << 24 */ -- cgit v1.2.1 From cfc13df4621ccc72e21b670cec8f03f91d9d6dcf Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Thu, 14 Jul 2016 13:56:17 +0800 Subject: acpi: add DMAR scope definition for root IOAPIC To enable interrupt remapping for intel IOMMU device, each IOAPIC device in the system reported via ACPI MADT must be explicitly enumerated under one specific remapping hardware unit. This patch adds the root-complex IOAPIC into the default DMAR device. Please refer to VT-d spec 8.3.1.1 for more information. Signed-off-by: Peter Xu Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/acpi-build.c | 20 +++++++++++++++++--- include/hw/acpi/acpi-defs.h | 13 +++++++++++++ include/hw/pci-host/q35.h | 8 ++++++++ 3 files changed, 38 insertions(+), 3 deletions(-) diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index 06682f13fc..77c40d92e2 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -81,6 +81,9 @@ #define ACPI_BUILD_DPRINTF(fmt, ...) #endif +/* Default IOAPIC ID */ +#define ACPI_BUILD_IOAPIC_ID 0x0 + typedef struct AcpiMcfgInfo { uint64_t mcfg_base; uint32_t mcfg_size; @@ -384,7 +387,6 @@ build_madt(GArray *table_data, BIOSLinker *linker, PCMachineState *pcms) io_apic = acpi_data_push(table_data, sizeof *io_apic); io_apic->type = ACPI_APIC_IO; io_apic->length = sizeof(*io_apic); -#define ACPI_BUILD_IOAPIC_ID 0x0 io_apic->io_apic_id = ACPI_BUILD_IOAPIC_ID; io_apic->address = cpu_to_le32(IO_APIC_DEFAULT_ADDRESS); io_apic->interrupt = cpu_to_le32(0); @@ -2468,6 +2470,9 @@ build_dmar_q35(GArray *table_data, BIOSLinker *linker) AcpiDmarHardwareUnit *drhd; uint8_t dmar_flags = 0; X86IOMMUState *iommu = x86_iommu_get_default(); + AcpiDmarDeviceScope *scope = NULL; + /* Root complex IOAPIC use one path[0] only */ + size_t ioapic_scope_size = sizeof(*scope) + sizeof(scope->path[0]); assert(iommu); if (iommu->intr_supported) { @@ -2479,13 +2484,22 @@ build_dmar_q35(GArray *table_data, BIOSLinker *linker) dmar->flags = dmar_flags; /* DMAR Remapping Hardware Unit Definition structure */ - drhd = acpi_data_push(table_data, sizeof(*drhd)); + drhd = acpi_data_push(table_data, sizeof(*drhd) + ioapic_scope_size); drhd->type = cpu_to_le16(ACPI_DMAR_TYPE_HARDWARE_UNIT); - drhd->length = cpu_to_le16(sizeof(*drhd)); /* No device scope now */ + drhd->length = cpu_to_le16(sizeof(*drhd) + ioapic_scope_size); drhd->flags = ACPI_DMAR_INCLUDE_PCI_ALL; drhd->pci_segment = cpu_to_le16(0); drhd->address = cpu_to_le64(Q35_HOST_BRIDGE_IOMMU_ADDR); + /* Scope definition for the root-complex IOAPIC. See VT-d spec + * 8.3.1 (version Oct. 2014 or later). */ + scope = &drhd->scope[0]; + scope->entry_type = 0x03; /* Type: 0x03 for IOAPIC */ + scope->length = ioapic_scope_size; + scope->enumeration_id = ACPI_BUILD_IOAPIC_ID; + scope->bus = Q35_PSEUDO_BUS_PLATFORM; + scope->path[0] = cpu_to_le16(Q35_PSEUDO_DEVFN_IOAPIC); + build_header(linker, table_data, (void *)(table_data->data + dmar_start), "DMAR", table_data->len - dmar_start, 1, NULL, NULL); } diff --git a/include/hw/acpi/acpi-defs.h b/include/hw/acpi/acpi-defs.h index ea9be0bdb1..41c1d95c4c 100644 --- a/include/hw/acpi/acpi-defs.h +++ b/include/hw/acpi/acpi-defs.h @@ -571,6 +571,18 @@ enum { /* * Sub-structures for DMAR */ + +/* Device scope structure for DRHD. */ +struct AcpiDmarDeviceScope { + uint8_t entry_type; + uint8_t length; + uint16_t reserved; + uint8_t enumeration_id; + uint8_t bus; + uint16_t path[0]; /* list of dev:func pairs */ +} QEMU_PACKED; +typedef struct AcpiDmarDeviceScope AcpiDmarDeviceScope; + /* Type 0: Hardware Unit Definition */ struct AcpiDmarHardwareUnit { uint16_t type; @@ -579,6 +591,7 @@ struct AcpiDmarHardwareUnit { uint8_t reserved; uint16_t pci_segment; /* The PCI Segment associated with this unit */ uint64_t address; /* Base address of remapping hardware register-set */ + AcpiDmarDeviceScope scope[0]; } QEMU_PACKED; typedef struct AcpiDmarHardwareUnit AcpiDmarHardwareUnit; diff --git a/include/hw/pci-host/q35.h b/include/hw/pci-host/q35.h index 0d64032d87..94486fdd37 100644 --- a/include/hw/pci-host/q35.h +++ b/include/hw/pci-host/q35.h @@ -179,4 +179,12 @@ typedef struct Q35PCIHost { uint64_t mch_mcfg_base(void); +/* + * Arbitary but unique BNF number for IOAPIC device. + * + * TODO: make sure there would have no conflict with real PCI bus + */ +#define Q35_PSEUDO_BUS_PLATFORM (0xff) +#define Q35_PSEUDO_DEVFN_IOAPIC (0x00) + #endif /* HW_Q35_H */ -- cgit v1.2.1 From a58614391d52ef8240071c1db5db6aceaf66a3ea Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Thu, 14 Jul 2016 13:56:18 +0800 Subject: intel_iommu: define interrupt remap table addr register Defined Interrupt Remap Table Address register to store IR table pointer. Also, do proper handling on global command register writes to store table pointer and its size. One more debug flag "DEBUG_IR" is added for interrupt remapping. Signed-off-by: Peter Xu Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/intel_iommu.c | 52 +++++++++++++++++++++++++++++++++++++++++- hw/i386/intel_iommu_internal.h | 4 ++++ include/hw/i386/intel_iommu.h | 5 ++++ 3 files changed, 60 insertions(+), 1 deletion(-) diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index 9c7a08424f..bf74533240 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -33,7 +33,7 @@ #ifdef DEBUG_INTEL_IOMMU enum { DEBUG_GENERAL, DEBUG_CSR, DEBUG_INV, DEBUG_MMU, DEBUG_FLOG, - DEBUG_CACHE, + DEBUG_CACHE, DEBUG_IR, }; #define VTD_DBGBIT(x) (1 << DEBUG_##x) static int vtd_dbgflags = VTD_DBGBIT(GENERAL) | VTD_DBGBIT(CSR); @@ -903,6 +903,19 @@ static void vtd_root_table_setup(IntelIOMMUState *s) (s->root_extended ? "(extended)" : "")); } +static void vtd_interrupt_remap_table_setup(IntelIOMMUState *s) +{ + uint64_t value = 0; + value = vtd_get_quad_raw(s, DMAR_IRTA_REG); + s->intr_size = 1UL << ((value & VTD_IRTA_SIZE_MASK) + 1); + s->intr_root = value & VTD_IRTA_ADDR_MASK; + + /* TODO: invalidate interrupt entry cache */ + + VTD_DPRINTF(CSR, "int remap table addr 0x%"PRIx64 " size %"PRIu32, + s->intr_root, s->intr_size); +} + static void vtd_context_global_invalidate(IntelIOMMUState *s) { s->context_cache_gen++; @@ -1141,6 +1154,16 @@ static void vtd_handle_gcmd_srtp(IntelIOMMUState *s) vtd_set_clear_mask_long(s, DMAR_GSTS_REG, 0, VTD_GSTS_RTPS); } +/* Set Interrupt Remap Table Pointer */ +static void vtd_handle_gcmd_sirtp(IntelIOMMUState *s) +{ + VTD_DPRINTF(CSR, "set Interrupt Remap Table Pointer"); + + vtd_interrupt_remap_table_setup(s); + /* Ok - report back to driver */ + vtd_set_clear_mask_long(s, DMAR_GSTS_REG, 0, VTD_GSTS_IRTPS); +} + /* Handle Translation Enable/Disable */ static void vtd_handle_gcmd_te(IntelIOMMUState *s, bool en) { @@ -1180,6 +1203,10 @@ static void vtd_handle_gcmd_write(IntelIOMMUState *s) /* Queued Invalidation Enable */ vtd_handle_gcmd_qie(s, val & VTD_GCMD_QIE); } + if (val & VTD_GCMD_SIRTP) { + /* Set/update the interrupt remapping root-table pointer */ + vtd_handle_gcmd_sirtp(s); + } } /* Handle write to Context Command Register */ @@ -1841,6 +1868,23 @@ static void vtd_mem_write(void *opaque, hwaddr addr, vtd_update_fsts_ppf(s); break; + case DMAR_IRTA_REG: + VTD_DPRINTF(IR, "DMAR_IRTA_REG write addr 0x%"PRIx64 + ", size %d, val 0x%"PRIx64, addr, size, val); + if (size == 4) { + vtd_set_long(s, addr, val); + } else { + vtd_set_quad(s, addr, val); + } + break; + + case DMAR_IRTA_REG_HI: + VTD_DPRINTF(IR, "DMAR_IRTA_REG_HI write addr 0x%"PRIx64 + ", size %d, val 0x%"PRIx64, addr, size, val); + assert(size == 4); + vtd_set_long(s, addr, val); + break; + default: VTD_DPRINTF(GENERAL, "error: unhandled reg write addr 0x%"PRIx64 ", size %d, val 0x%"PRIx64, addr, size, val); @@ -2032,6 +2076,12 @@ static void vtd_init(IntelIOMMUState *s) /* Fault Recording Registers, 128-bit */ vtd_define_quad(s, DMAR_FRCD_REG_0_0, 0, 0, 0); vtd_define_quad(s, DMAR_FRCD_REG_0_2, 0, 0, 0x8000000000000000ULL); + + /* + * Interrupt remapping registers, not support extended interrupt + * mode for now. + */ + vtd_define_quad(s, DMAR_IRTA_REG, 0, 0xfffffffffffff00fULL, 0); } /* Should not reset address_spaces when reset because devices will still use diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h index 5b98a1143c..309833f99a 100644 --- a/hw/i386/intel_iommu_internal.h +++ b/hw/i386/intel_iommu_internal.h @@ -172,6 +172,10 @@ #define VTD_RTADDR_RTT (1ULL << 11) #define VTD_RTADDR_ADDR_MASK (VTD_HAW_MASK ^ 0xfffULL) +/* IRTA_REG */ +#define VTD_IRTA_ADDR_MASK (VTD_HAW_MASK ^ 0xfffULL) +#define VTD_IRTA_SIZE_MASK (0xfULL) + /* ECAP_REG */ /* (offset >> 4) << 8 */ #define VTD_ECAP_IRO (DMAR_IOTLB_REG_OFFSET << 4) diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h index 741242e1e6..ce515c42f2 100644 --- a/include/hw/i386/intel_iommu.h +++ b/include/hw/i386/intel_iommu.h @@ -125,6 +125,11 @@ struct IntelIOMMUState { MemoryRegionIOMMUOps iommu_ops; GHashTable *vtd_as_by_busptr; /* VTDBus objects indexed by PCIBus* reference */ VTDBus *vtd_as_by_bus_num[VTD_PCI_BUS_MAX]; /* VTDBus objects indexed by bus number */ + + /* interrupt remapping */ + bool intr_enabled; /* Whether guest enabled IR */ + dma_addr_t intr_root; /* Interrupt remapping table pointer */ + uint32_t intr_size; /* Number of IR table entries */ }; /* Find the VTD Address space associated with the given bus pointer, -- cgit v1.2.1 From 80de52ba87d44bf63157900b8dd5ccd5bd795fd4 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Thu, 14 Jul 2016 13:56:19 +0800 Subject: intel_iommu: handle interrupt remap enable Handle writting to IRE bit in global command register. Signed-off-by: Peter Xu Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/intel_iommu.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index bf74533240..6a6cb3be8d 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -1183,6 +1183,22 @@ static void vtd_handle_gcmd_te(IntelIOMMUState *s, bool en) } } +/* Handle Interrupt Remap Enable/Disable */ +static void vtd_handle_gcmd_ire(IntelIOMMUState *s, bool en) +{ + VTD_DPRINTF(CSR, "Interrupt Remap Enable %s", (en ? "on" : "off")); + + if (en) { + s->intr_enabled = true; + /* Ok - report back to driver */ + vtd_set_clear_mask_long(s, DMAR_GSTS_REG, 0, VTD_GSTS_IRES); + } else { + s->intr_enabled = false; + /* Ok - report back to driver */ + vtd_set_clear_mask_long(s, DMAR_GSTS_REG, VTD_GSTS_IRES, 0); + } +} + /* Handle write to Global Command Register */ static void vtd_handle_gcmd_write(IntelIOMMUState *s) { @@ -1207,6 +1223,10 @@ static void vtd_handle_gcmd_write(IntelIOMMUState *s) /* Set/update the interrupt remapping root-table pointer */ vtd_handle_gcmd_sirtp(s); } + if (changed & VTD_GCMD_IRE) { + /* Interrupt remap enable/disable */ + vtd_handle_gcmd_ire(s, val & VTD_GCMD_IRE); + } } /* Handle write to Context Command Register */ -- cgit v1.2.1 From 1f91acee179873ce78985b436051479217c46580 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Thu, 14 Jul 2016 13:56:20 +0800 Subject: intel_iommu: define several structs for IOMMU IR Several data structs are defined to better support the rest of the patches: IRTE to parse remapping table entries, and IOAPIC/MSI related structure bits to parse interrupt entries to be filled in by guest kernel. Signed-off-by: Peter Xu Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- include/hw/i386/intel_iommu.h | 74 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h index ce515c42f2..260aa8ebf3 100644 --- a/include/hw/i386/intel_iommu.h +++ b/include/hw/i386/intel_iommu.h @@ -52,6 +52,8 @@ typedef struct IntelIOMMUState IntelIOMMUState; typedef struct VTDAddressSpace VTDAddressSpace; typedef struct VTDIOTLBEntry VTDIOTLBEntry; typedef struct VTDBus VTDBus; +typedef union VTD_IRTE VTD_IRTE; +typedef union VTD_IR_MSIAddress VTD_IR_MSIAddress; /* Context-Entry */ struct VTDContextEntry { @@ -90,6 +92,78 @@ struct VTDIOTLBEntry { bool write_flags; }; +/* Interrupt Remapping Table Entry Definition */ +union VTD_IRTE { + struct { +#ifdef HOST_WORDS_BIGENDIAN + uint32_t dest_id:32; /* Destination ID */ + uint32_t __reserved_1:8; /* Reserved 1 */ + uint32_t vector:8; /* Interrupt Vector */ + uint32_t irte_mode:1; /* IRTE Mode */ + uint32_t __reserved_0:3; /* Reserved 0 */ + uint32_t __avail:4; /* Available spaces for software */ + uint32_t delivery_mode:3; /* Delivery Mode */ + uint32_t trigger_mode:1; /* Trigger Mode */ + uint32_t redir_hint:1; /* Redirection Hint */ + uint32_t dest_mode:1; /* Destination Mode */ + uint32_t fault_disable:1; /* Fault Processing Disable */ + uint32_t present:1; /* Whether entry present/available */ +#else + uint32_t present:1; /* Whether entry present/available */ + uint32_t fault_disable:1; /* Fault Processing Disable */ + uint32_t dest_mode:1; /* Destination Mode */ + uint32_t redir_hint:1; /* Redirection Hint */ + uint32_t trigger_mode:1; /* Trigger Mode */ + uint32_t delivery_mode:3; /* Delivery Mode */ + uint32_t __avail:4; /* Available spaces for software */ + uint32_t __reserved_0:3; /* Reserved 0 */ + uint32_t irte_mode:1; /* IRTE Mode */ + uint32_t vector:8; /* Interrupt Vector */ + uint32_t __reserved_1:8; /* Reserved 1 */ + uint32_t dest_id:32; /* Destination ID */ +#endif + uint16_t source_id:16; /* Source-ID */ +#ifdef HOST_WORDS_BIGENDIAN + uint64_t __reserved_2:44; /* Reserved 2 */ + uint64_t sid_vtype:2; /* Source-ID Validation Type */ + uint64_t sid_q:2; /* Source-ID Qualifier */ +#else + uint64_t sid_q:2; /* Source-ID Qualifier */ + uint64_t sid_vtype:2; /* Source-ID Validation Type */ + uint64_t __reserved_2:44; /* Reserved 2 */ +#endif + } QEMU_PACKED; + uint64_t data[2]; +}; + +#define VTD_IR_INT_FORMAT_COMPAT (0) /* Compatible Interrupt */ +#define VTD_IR_INT_FORMAT_REMAP (1) /* Remappable Interrupt */ + +/* Programming format for MSI/MSI-X addresses */ +union VTD_IR_MSIAddress { + struct { +#ifdef HOST_WORDS_BIGENDIAN + uint32_t __head:12; /* Should always be: 0x0fee */ + uint32_t index_l:15; /* Interrupt index bit 14-0 */ + uint32_t int_mode:1; /* Interrupt format */ + uint32_t sub_valid:1; /* SHV: Sub-Handle Valid bit */ + uint32_t index_h:1; /* Interrupt index bit 15 */ + uint32_t __not_care:2; +#else + uint32_t __not_care:2; + uint32_t index_h:1; /* Interrupt index bit 15 */ + uint32_t sub_valid:1; /* SHV: Sub-Handle Valid bit */ + uint32_t int_mode:1; /* Interrupt format */ + uint32_t index_l:15; /* Interrupt index bit 14-0 */ + uint32_t __head:12; /* Should always be: 0x0fee */ +#endif + } QEMU_PACKED; + uint32_t data; +}; + +/* When IR is enabled, all MSI/MSI-X data bits should be zero */ +#define VTD_IR_MSI_DATA (0) + /* The iommu (DMAR) device state struct */ struct IntelIOMMUState { X86IOMMUState x86_iommu; -- cgit v1.2.1 From a4ca297e848a3eda39acaec6941fed4eb35916df Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Thu, 14 Jul 2016 13:56:21 +0800 Subject: intel_iommu: add IR translation faults defines Adding translation fault definitions for interrupt remapping. Please refer to VT-d spec section 7.1. Signed-off-by: Peter Xu Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/intel_iommu_internal.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h index 309833f99a..2a9987fbc4 100644 --- a/hw/i386/intel_iommu_internal.h +++ b/hw/i386/intel_iommu_internal.h @@ -271,6 +271,19 @@ typedef enum VTDFaultReason { * context-entry. */ VTD_FR_CONTEXT_ENTRY_TT, + + /* Interrupt remapping transition faults */ + VTD_FR_IR_REQ_RSVD = 0x20, /* One or more IR request reserved + * fields set */ + VTD_FR_IR_INDEX_OVER = 0x21, /* Index value greater than max */ + VTD_FR_IR_ENTRY_P = 0x22, /* Present (P) not set in IRTE */ + VTD_FR_IR_ROOT_INVAL = 0x23, /* IR Root table invalid */ + VTD_FR_IR_IRTE_RSVD = 0x24, /* IRTE Rsvd field non-zero with + * Present flag set */ + VTD_FR_IR_REQ_COMPAT = 0x25, /* Encountered compatible IR + * request while disabled */ + VTD_FR_IR_SID_ERR = 0x26, /* Invalid Source-ID */ + /* This is not a normal fault reason. We use this to indicate some faults * that are not referenced by the VT-d specification. * Fault event with such reason should not be recorded. -- cgit v1.2.1 From 651e4cefeee8e388919e51f4e299033ab2a8b87d Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Thu, 14 Jul 2016 13:56:22 +0800 Subject: intel_iommu: Add support for PCI MSI remap This patch enables interrupt remapping for PCI devices. To play the trick, one memory region "iommu_ir" is added as child region of the original iommu memory region, covering range 0xfeeXXXXX (which is the address range for APIC). All the writes to this range will be taken as MSI, and translation is carried out only when IR is enabled. Idea suggested by Paolo Bonzini. Signed-off-by: Peter Xu Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/intel_iommu.c | 239 +++++++++++++++++++++++++++++++++++++++++ hw/i386/intel_iommu_internal.h | 2 + include/hw/i386/intel_iommu.h | 66 ++++++++++++ 3 files changed, 307 insertions(+) diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index 6a6cb3be8d..2e57396e6d 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -1982,6 +1982,240 @@ static Property vtd_properties[] = { DEFINE_PROP_END_OF_LIST(), }; +/* Read IRTE entry with specific index */ +static int vtd_irte_get(IntelIOMMUState *iommu, uint16_t index, + VTD_IRTE *entry) +{ + dma_addr_t addr = 0x00; + + addr = iommu->intr_root + index * sizeof(*entry); + if (dma_memory_read(&address_space_memory, addr, entry, + sizeof(*entry))) { + VTD_DPRINTF(GENERAL, "error: fail to access IR root at 0x%"PRIx64 + " + %"PRIu16, iommu->intr_root, index); + return -VTD_FR_IR_ROOT_INVAL; + } + + if (!entry->present) { + VTD_DPRINTF(GENERAL, "error: present flag not set in IRTE" + " entry index %u value 0x%"PRIx64 " 0x%"PRIx64, + index, le64_to_cpu(entry->data[1]), + le64_to_cpu(entry->data[0])); + return -VTD_FR_IR_ENTRY_P; + } + + if (entry->__reserved_0 || entry->__reserved_1 || \ + entry->__reserved_2) { + VTD_DPRINTF(GENERAL, "error: IRTE entry index %"PRIu16 + " reserved fields non-zero: 0x%"PRIx64 " 0x%"PRIx64, + index, le64_to_cpu(entry->data[1]), + le64_to_cpu(entry->data[0])); + return -VTD_FR_IR_IRTE_RSVD; + } + + /* + * TODO: Check Source-ID corresponds to SVT (Source Validation + * Type) bits + */ + + return 0; +} + +/* Fetch IRQ information of specific IR index */ +static int vtd_remap_irq_get(IntelIOMMUState *iommu, uint16_t index, VTDIrq *irq) +{ + VTD_IRTE irte = { 0 }; + int ret = 0; + + ret = vtd_irte_get(iommu, index, &irte); + if (ret) { + return ret; + } + + irq->trigger_mode = irte.trigger_mode; + irq->vector = irte.vector; + irq->delivery_mode = irte.delivery_mode; + /* Not support EIM yet: please refer to vt-d 9.10 DST bits */ +#define VTD_IR_APIC_DEST_MASK (0xff00ULL) +#define VTD_IR_APIC_DEST_SHIFT (8) + irq->dest = (le32_to_cpu(irte.dest_id) & VTD_IR_APIC_DEST_MASK) >> \ + VTD_IR_APIC_DEST_SHIFT; + irq->dest_mode = irte.dest_mode; + irq->redir_hint = irte.redir_hint; + + VTD_DPRINTF(IR, "remapping interrupt index %d: trig:%u,vec:%u," + "deliver:%u,dest:%u,dest_mode:%u", index, + irq->trigger_mode, irq->vector, irq->delivery_mode, + irq->dest, irq->dest_mode); + + return 0; +} + +/* Generate one MSI message from VTDIrq info */ +static void vtd_generate_msi_message(VTDIrq *irq, MSIMessage *msg_out) +{ + VTD_MSIMessage msg = {}; + + /* Generate address bits */ + msg.dest_mode = irq->dest_mode; + msg.redir_hint = irq->redir_hint; + msg.dest = irq->dest; + msg.__addr_head = cpu_to_le32(0xfee); + /* Keep this from original MSI address bits */ + msg.__not_used = irq->msi_addr_last_bits; + + /* Generate data bits */ + msg.vector = irq->vector; + msg.delivery_mode = irq->delivery_mode; + msg.level = 1; + msg.trigger_mode = irq->trigger_mode; + + msg_out->address = msg.msi_addr; + msg_out->data = msg.msi_data; +} + +/* Interrupt remapping for MSI/MSI-X entry */ +static int vtd_interrupt_remap_msi(IntelIOMMUState *iommu, + MSIMessage *origin, + MSIMessage *translated) +{ + int ret = 0; + VTD_IR_MSIAddress addr; + uint16_t index; + VTDIrq irq = {0}; + + assert(origin && translated); + + if (!iommu || !iommu->intr_enabled) { + goto do_not_translate; + } + + if (origin->address & VTD_MSI_ADDR_HI_MASK) { + VTD_DPRINTF(GENERAL, "error: MSI addr high 32 bits nonzero" + " during interrupt remapping: 0x%"PRIx32, + (uint32_t)((origin->address & VTD_MSI_ADDR_HI_MASK) >> \ + VTD_MSI_ADDR_HI_SHIFT)); + return -VTD_FR_IR_REQ_RSVD; + } + + addr.data = origin->address & VTD_MSI_ADDR_LO_MASK; + if (le16_to_cpu(addr.__head) != 0xfee) { + VTD_DPRINTF(GENERAL, "error: MSI addr low 32 bits invalid: " + "0x%"PRIx32, addr.data); + return -VTD_FR_IR_REQ_RSVD; + } + + /* This is compatible mode. */ + if (addr.int_mode != VTD_IR_INT_FORMAT_REMAP) { + goto do_not_translate; + } + + index = addr.index_h << 15 | le16_to_cpu(addr.index_l); + +#define VTD_IR_MSI_DATA_SUBHANDLE (0x0000ffff) +#define VTD_IR_MSI_DATA_RESERVED (0xffff0000) + + if (addr.sub_valid) { + /* See VT-d spec 5.1.2.2 and 5.1.3 on subhandle */ + index += origin->data & VTD_IR_MSI_DATA_SUBHANDLE; + } + + ret = vtd_remap_irq_get(iommu, index, &irq); + if (ret) { + return ret; + } + + if (addr.sub_valid) { + VTD_DPRINTF(IR, "received MSI interrupt"); + if (origin->data & VTD_IR_MSI_DATA_RESERVED) { + VTD_DPRINTF(GENERAL, "error: MSI data bits non-zero for " + "interrupt remappable entry: 0x%"PRIx32, + origin->data); + return -VTD_FR_IR_REQ_RSVD; + } + } else { + uint8_t vector = origin->data & 0xff; + VTD_DPRINTF(IR, "received IOAPIC interrupt"); + /* IOAPIC entry vector should be aligned with IRTE vector + * (see vt-d spec 5.1.5.1). */ + if (vector != irq.vector) { + VTD_DPRINTF(GENERAL, "IOAPIC vector inconsistent: " + "entry: %d, IRTE: %d, index: %d", + vector, irq.vector, index); + } + } + + /* + * We'd better keep the last two bits, assuming that guest OS + * might modify it. Keep it does not hurt after all. + */ + irq.msi_addr_last_bits = addr.__not_care; + + /* Translate VTDIrq to MSI message */ + vtd_generate_msi_message(&irq, translated); + + VTD_DPRINTF(IR, "mapping MSI 0x%"PRIx64":0x%"PRIx32 " -> " + "0x%"PRIx64":0x%"PRIx32, origin->address, origin->data, + translated->address, translated->data); + return 0; + +do_not_translate: + memcpy(translated, origin, sizeof(*origin)); + return 0; +} + +static MemTxResult vtd_mem_ir_read(void *opaque, hwaddr addr, + uint64_t *data, unsigned size, + MemTxAttrs attrs) +{ + return MEMTX_OK; +} + +static MemTxResult vtd_mem_ir_write(void *opaque, hwaddr addr, + uint64_t value, unsigned size, + MemTxAttrs attrs) +{ + int ret = 0; + MSIMessage from = {0}, to = {0}; + + from.address = (uint64_t) addr + VTD_INTERRUPT_ADDR_FIRST; + from.data = (uint32_t) value; + + ret = vtd_interrupt_remap_msi(opaque, &from, &to); + if (ret) { + /* TODO: report error */ + VTD_DPRINTF(GENERAL, "int remap fail for addr 0x%"PRIx64 + " data 0x%"PRIx32, from.address, from.data); + /* Drop this interrupt */ + return MEMTX_ERROR; + } + + VTD_DPRINTF(IR, "delivering MSI 0x%"PRIx64":0x%"PRIx32 + " for device sid 0x%04x", + to.address, to.data, sid); + + if (dma_memory_write(&address_space_memory, to.address, + &to.data, size)) { + VTD_DPRINTF(GENERAL, "error: fail to write 0x%"PRIx64 + " value 0x%"PRIx32, to.address, to.data); + } + + return MEMTX_OK; +} + +static const MemoryRegionOps vtd_mem_ir_ops = { + .read_with_attrs = vtd_mem_ir_read, + .write_with_attrs = vtd_mem_ir_write, + .endianness = DEVICE_LITTLE_ENDIAN, + .impl = { + .min_access_size = 4, + .max_access_size = 4, + }, + .valid = { + .min_access_size = 4, + .max_access_size = 4, + }, +}; VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, int devfn) { @@ -2009,6 +2243,11 @@ VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, int devfn) vtd_dev_as->context_cache_entry.context_cache_gen = 0; memory_region_init_iommu(&vtd_dev_as->iommu, OBJECT(s), &s->iommu_ops, "intel_iommu", UINT64_MAX); + memory_region_init_io(&vtd_dev_as->iommu_ir, OBJECT(s), + &vtd_mem_ir_ops, s, "intel_iommu_ir", + VTD_INTERRUPT_ADDR_SIZE); + memory_region_add_subregion(&vtd_dev_as->iommu, VTD_INTERRUPT_ADDR_FIRST, + &vtd_dev_as->iommu_ir); address_space_init(&vtd_dev_as->as, &vtd_dev_as->iommu, "intel_iommu"); } diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h index 2a9987fbc4..e1a08cb496 100644 --- a/hw/i386/intel_iommu_internal.h +++ b/hw/i386/intel_iommu_internal.h @@ -110,6 +110,8 @@ /* Interrupt Address Range */ #define VTD_INTERRUPT_ADDR_FIRST 0xfee00000ULL #define VTD_INTERRUPT_ADDR_LAST 0xfeefffffULL +#define VTD_INTERRUPT_ADDR_SIZE (VTD_INTERRUPT_ADDR_LAST - \ + VTD_INTERRUPT_ADDR_FIRST + 1) /* The shift of source_id in the key of IOTLB hash table */ #define VTD_IOTLB_SID_SHIFT 36 diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h index 260aa8ebf3..cdbbddd79e 100644 --- a/include/hw/i386/intel_iommu.h +++ b/include/hw/i386/intel_iommu.h @@ -24,6 +24,8 @@ #include "hw/qdev.h" #include "sysemu/dma.h" #include "hw/i386/x86-iommu.h" +#include "hw/i386/ioapic.h" +#include "hw/pci/msi.h" #define TYPE_INTEL_IOMMU_DEVICE "intel-iommu" #define INTEL_IOMMU_DEVICE(obj) \ @@ -46,6 +48,10 @@ #define DMAR_REPORT_F_INTR (1) +#define VTD_MSI_ADDR_HI_MASK (0xffffffff00000000ULL) +#define VTD_MSI_ADDR_HI_SHIFT (32) +#define VTD_MSI_ADDR_LO_MASK (0x00000000ffffffffULL) + typedef struct VTDContextEntry VTDContextEntry; typedef struct VTDContextCacheEntry VTDContextCacheEntry; typedef struct IntelIOMMUState IntelIOMMUState; @@ -54,6 +60,8 @@ typedef struct VTDIOTLBEntry VTDIOTLBEntry; typedef struct VTDBus VTDBus; typedef union VTD_IRTE VTD_IRTE; typedef union VTD_IR_MSIAddress VTD_IR_MSIAddress; +typedef struct VTDIrq VTDIrq; +typedef struct VTD_MSIMessage VTD_MSIMessage; /* Context-Entry */ struct VTDContextEntry { @@ -74,6 +82,7 @@ struct VTDAddressSpace { uint8_t devfn; AddressSpace as; MemoryRegion iommu; + MemoryRegion iommu_ir; /* Interrupt region: 0xfeeXXXXX */ IntelIOMMUState *iommu_state; VTDContextCacheEntry context_cache_entry; }; @@ -161,6 +170,63 @@ union VTD_IR_MSIAddress { uint32_t data; }; +/* Generic IRQ entry information */ +struct VTDIrq { + /* Used by both IOAPIC/MSI interrupt remapping */ + uint8_t trigger_mode; + uint8_t vector; + uint8_t delivery_mode; + uint32_t dest; + uint8_t dest_mode; + + /* only used by MSI interrupt remapping */ + uint8_t redir_hint; + uint8_t msi_addr_last_bits; +}; + +struct VTD_MSIMessage { + union { + struct { +#ifdef HOST_WORDS_BIGENDIAN + uint32_t __addr_head:12; /* 0xfee */ + uint32_t dest:8; + uint32_t __reserved:8; + uint32_t redir_hint:1; + uint32_t dest_mode:1; + uint32_t __not_used:2; +#else + uint32_t __not_used:2; + uint32_t dest_mode:1; + uint32_t redir_hint:1; + uint32_t __reserved:8; + uint32_t dest:8; + uint32_t __addr_head:12; /* 0xfee */ +#endif + uint32_t __addr_hi:32; + } QEMU_PACKED; + uint64_t msi_addr; + }; + union { + struct { +#ifdef HOST_WORDS_BIGENDIAN + uint16_t trigger_mode:1; + uint16_t level:1; + uint16_t __resved:3; + uint16_t delivery_mode:3; + uint16_t vector:8; +#else + uint16_t vector:8; + uint16_t delivery_mode:3; + uint16_t __resved:3; + uint16_t level:1; + uint16_t trigger_mode:1; +#endif + uint16_t __resved1:16; + } QEMU_PACKED; + uint32_t msi_data; + }; +}; + /* When IR is enabled, all MSI/MSI-X data bits should be zero */ #define VTD_IR_MSI_DATA (0) -- cgit v1.2.1 From 09cd058a2cf77bb7a3b10ff93c1f80ed88bca364 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 21 Jul 2016 18:42:42 +0300 Subject: intel_iommu: get rid of {0} initializers Correct and portable in theory, but triggers warnings with older gcc versions when -Wmissing-braces is enabled. See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53119 Signed-off-by: Michael S. Tsirkin --- hw/i386/intel_iommu.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index 2e57396e6d..ccfcc69347 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -2024,7 +2024,7 @@ static int vtd_irte_get(IntelIOMMUState *iommu, uint16_t index, /* Fetch IRQ information of specific IR index */ static int vtd_remap_irq_get(IntelIOMMUState *iommu, uint16_t index, VTDIrq *irq) { - VTD_IRTE irte = { 0 }; + VTD_IRTE irte = {}; int ret = 0; ret = vtd_irte_get(iommu, index, &irte); @@ -2082,7 +2082,7 @@ static int vtd_interrupt_remap_msi(IntelIOMMUState *iommu, int ret = 0; VTD_IR_MSIAddress addr; uint16_t index; - VTDIrq irq = {0}; + VTDIrq irq = {}; assert(origin && translated); @@ -2176,7 +2176,7 @@ static MemTxResult vtd_mem_ir_write(void *opaque, hwaddr addr, MemTxAttrs attrs) { int ret = 0; - MSIMessage from = {0}, to = {0}; + MSIMessage from = {}, to = {}; from.address = (uint64_t) addr + VTD_INTERRUPT_ADDR_FIRST; from.data = (uint32_t) value; -- cgit v1.2.1 From cb135f59b8059c3a372652377ef92fa4a49ad550 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Thu, 14 Jul 2016 13:56:23 +0800 Subject: q35: ioapic: add support for emulated IOAPIC IR This patch translates all IOAPIC interrupts into MSI ones. One pseudo ioapic address space is added to transfer the MSI message. By default, it will be system memory address space. When IR is enabled, it will be IOMMU address space. Currently, only emulated IOAPIC is supported. Idea suggested by Jan Kiszka and Rita Sinha in the following patch: https://lists.gnu.org/archive/html/qemu-devel/2016-03/msg01933.html Signed-off-by: Peter Xu Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/intel_iommu.c | 6 +++++- hw/i386/pc.c | 3 +++ hw/intc/ioapic.c | 28 ++++++++++++++++++++++++---- include/hw/i386/apic-msidef.h | 1 + include/hw/i386/ioapic_internal.h | 1 + include/hw/i386/pc.h | 4 ++++ 6 files changed, 38 insertions(+), 5 deletions(-) diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index ccfcc69347..6ba5520283 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -28,6 +28,7 @@ #include "hw/i386/pc.h" #include "hw/boards.h" #include "hw/i386/x86-iommu.h" +#include "hw/pci-host/q35.h" /*#define DEBUG_INTEL_IOMMU*/ #ifdef DEBUG_INTEL_IOMMU @@ -2367,7 +2368,8 @@ static AddressSpace *vtd_host_dma_iommu(PCIBus *bus, void *opaque, int devfn) static void vtd_realize(DeviceState *dev, Error **errp) { - PCIBus *bus = PC_MACHINE(qdev_get_machine())->bus; + PCMachineState *pcms = PC_MACHINE(qdev_get_machine()); + PCIBus *bus = pcms->bus; IntelIOMMUState *s = INTEL_IOMMU_DEVICE(dev); VTD_DPRINTF(GENERAL, ""); @@ -2383,6 +2385,8 @@ static void vtd_realize(DeviceState *dev, Error **errp) vtd_init(s); sysbus_mmio_map(SYS_BUS_DEVICE(s), 0, Q35_HOST_BRIDGE_IOMMU_ADDR); pci_setup_iommu(bus, vtd_host_dma_iommu, dev); + /* Pseudo address space under root PCI bus. */ + pcms->ioapic_as = vtd_host_dma_iommu(bus, s, Q35_PSEUDO_DEVFN_IOAPIC); } static void vtd_class_init(ObjectClass *klass, void *data) diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 719884ff88..979f36d99f 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -1475,6 +1475,9 @@ void pc_memory_init(PCMachineState *pcms, rom_add_option(option_rom[i].name, option_rom[i].bootindex); } pcms->fw_cfg = fw_cfg; + + /* Init default IOAPIC address space */ + pcms->ioapic_as = &address_space_memory; } qemu_irq pc_allocate_cpu_irq(void) diff --git a/hw/intc/ioapic.c b/hw/intc/ioapic.c index 273bb0854c..36dd42af20 100644 --- a/hw/intc/ioapic.c +++ b/hw/intc/ioapic.c @@ -29,6 +29,8 @@ #include "hw/i386/ioapic_internal.h" #include "include/hw/pci/msi.h" #include "sysemu/kvm.h" +#include "target-i386/cpu.h" +#include "hw/i386/apic-msidef.h" //#define DEBUG_IOAPIC @@ -50,13 +52,15 @@ extern int ioapic_no; static void ioapic_service(IOAPICCommonState *s) { + AddressSpace *ioapic_as = PC_MACHINE(qdev_get_machine())->ioapic_as; + uint32_t addr, data; uint8_t i; uint8_t trig_mode; uint8_t vector; uint8_t delivery_mode; uint32_t mask; uint64_t entry; - uint8_t dest; + uint16_t dest_idx; uint8_t dest_mode; for (i = 0; i < IOAPIC_NUM_PINS; i++) { @@ -67,7 +71,14 @@ static void ioapic_service(IOAPICCommonState *s) entry = s->ioredtbl[i]; if (!(entry & IOAPIC_LVT_MASKED)) { trig_mode = ((entry >> IOAPIC_LVT_TRIGGER_MODE_SHIFT) & 1); - dest = entry >> IOAPIC_LVT_DEST_SHIFT; + /* + * By default, this would be dest_id[8] + + * reserved[8]. When IR is enabled, this would be + * interrupt_index[15] + interrupt_format[1]. This + * field never means anything, but only used to + * generate corresponding MSI. + */ + dest_idx = entry >> IOAPIC_LVT_DEST_IDX_SHIFT; dest_mode = (entry >> IOAPIC_LVT_DEST_MODE_SHIFT) & 1; delivery_mode = (entry >> IOAPIC_LVT_DELIV_MODE_SHIFT) & IOAPIC_DM_MASK; @@ -97,8 +108,17 @@ static void ioapic_service(IOAPICCommonState *s) #else (void)coalesce; #endif - apic_deliver_irq(dest, dest_mode, delivery_mode, vector, - trig_mode); + /* No matter whether IR is enabled, we translate + * the IOAPIC message into a MSI one, and its + * address space will decide whether we need a + * translation. */ + addr = APIC_DEFAULT_ADDRESS | \ + (dest_idx << MSI_ADDR_DEST_IDX_SHIFT) | + (dest_mode << MSI_ADDR_DEST_MODE_SHIFT); + data = (vector << MSI_DATA_VECTOR_SHIFT) | + (trig_mode << MSI_DATA_TRIGGER_SHIFT) | + (delivery_mode << MSI_DATA_DELIVERY_MODE_SHIFT); + stl_le_phys(ioapic_as, addr, data); } } } diff --git a/include/hw/i386/apic-msidef.h b/include/hw/i386/apic-msidef.h index 6e2eb71f2f..8b4d4cca55 100644 --- a/include/hw/i386/apic-msidef.h +++ b/include/hw/i386/apic-msidef.h @@ -25,6 +25,7 @@ #define MSI_ADDR_REDIRECTION_SHIFT 3 #define MSI_ADDR_DEST_ID_SHIFT 12 +#define MSI_ADDR_DEST_IDX_SHIFT 4 #define MSI_ADDR_DEST_ID_MASK 0x00ffff0 #endif /* HW_APIC_MSIDEF_H */ diff --git a/include/hw/i386/ioapic_internal.h b/include/hw/i386/ioapic_internal.h index 0542aa1131..5c901ae78f 100644 --- a/include/hw/i386/ioapic_internal.h +++ b/include/hw/i386/ioapic_internal.h @@ -31,6 +31,7 @@ #define IOAPIC_VERSION 0x11 #define IOAPIC_LVT_DEST_SHIFT 56 +#define IOAPIC_LVT_DEST_IDX_SHIFT 48 #define IOAPIC_LVT_MASKED_SHIFT 16 #define IOAPIC_LVT_TRIGGER_MODE_SHIFT 15 #define IOAPIC_LVT_REMOTE_IRR_SHIFT 14 diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index e38c95a4da..9811125492 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -72,6 +72,10 @@ struct PCMachineState { /* NUMA information: */ uint64_t numa_nodes; uint64_t *node_mem; + + /* Address space used by IOAPIC device. All IOAPIC interrupts + * will be translated to MSI messages in the address space. */ + AddressSpace *ioapic_as; }; #define PC_MACHINE_ACPI_DEVICE_PROP "acpi-device" -- cgit v1.2.1 From c15fa0bea982766c5c3de28d1abd245e3c44a17f Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Thu, 14 Jul 2016 13:56:24 +0800 Subject: ioapic: introduce ioapic_entry_parse() helper Abstract IOAPIC entry parsing logic into a helper function. Signed-off-by: Peter Xu Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/intc/ioapic.c | 110 +++++++++++++++++++++++++++---------------------------- 1 file changed, 54 insertions(+), 56 deletions(-) diff --git a/hw/intc/ioapic.c b/hw/intc/ioapic.c index 36dd42af20..cfc7b7b958 100644 --- a/hw/intc/ioapic.c +++ b/hw/intc/ioapic.c @@ -50,18 +50,56 @@ static IOAPICCommonState *ioapics[MAX_IOAPICS]; /* global variable from ioapic_common.c */ extern int ioapic_no; +struct ioapic_entry_info { + /* fields parsed from IOAPIC entries */ + uint8_t masked; + uint8_t trig_mode; + uint16_t dest_idx; + uint8_t dest_mode; + uint8_t delivery_mode; + uint8_t vector; + + /* MSI message generated from above parsed fields */ + uint32_t addr; + uint32_t data; +}; + +static void ioapic_entry_parse(uint64_t entry, struct ioapic_entry_info *info) +{ + memset(info, 0, sizeof(*info)); + info->masked = (entry >> IOAPIC_LVT_MASKED_SHIFT) & 1; + info->trig_mode = (entry >> IOAPIC_LVT_TRIGGER_MODE_SHIFT) & 1; + /* + * By default, this would be dest_id[8] + reserved[8]. When IR + * is enabled, this would be interrupt_index[15] + + * interrupt_format[1]. This field never means anything, but + * only used to generate corresponding MSI. + */ + info->dest_idx = (entry >> IOAPIC_LVT_DEST_IDX_SHIFT) & 0xffff; + info->dest_mode = (entry >> IOAPIC_LVT_DEST_MODE_SHIFT) & 1; + info->delivery_mode = (entry >> IOAPIC_LVT_DELIV_MODE_SHIFT) \ + & IOAPIC_DM_MASK; + if (info->delivery_mode == IOAPIC_DM_EXTINT) { + info->vector = pic_read_irq(isa_pic); + } else { + info->vector = entry & IOAPIC_VECTOR_MASK; + } + + info->addr = APIC_DEFAULT_ADDRESS | \ + (info->dest_idx << MSI_ADDR_DEST_IDX_SHIFT) | \ + (info->dest_mode << MSI_ADDR_DEST_MODE_SHIFT); + info->data = (info->vector << MSI_DATA_VECTOR_SHIFT) | \ + (info->trig_mode << MSI_DATA_TRIGGER_SHIFT) | \ + (info->delivery_mode << MSI_DATA_DELIVERY_MODE_SHIFT); +} + static void ioapic_service(IOAPICCommonState *s) { AddressSpace *ioapic_as = PC_MACHINE(qdev_get_machine())->ioapic_as; - uint32_t addr, data; + struct ioapic_entry_info info; uint8_t i; - uint8_t trig_mode; - uint8_t vector; - uint8_t delivery_mode; uint32_t mask; uint64_t entry; - uint16_t dest_idx; - uint8_t dest_mode; for (i = 0; i < IOAPIC_NUM_PINS; i++) { mask = 1 << i; @@ -69,33 +107,18 @@ static void ioapic_service(IOAPICCommonState *s) int coalesce = 0; entry = s->ioredtbl[i]; - if (!(entry & IOAPIC_LVT_MASKED)) { - trig_mode = ((entry >> IOAPIC_LVT_TRIGGER_MODE_SHIFT) & 1); - /* - * By default, this would be dest_id[8] + - * reserved[8]. When IR is enabled, this would be - * interrupt_index[15] + interrupt_format[1]. This - * field never means anything, but only used to - * generate corresponding MSI. - */ - dest_idx = entry >> IOAPIC_LVT_DEST_IDX_SHIFT; - dest_mode = (entry >> IOAPIC_LVT_DEST_MODE_SHIFT) & 1; - delivery_mode = - (entry >> IOAPIC_LVT_DELIV_MODE_SHIFT) & IOAPIC_DM_MASK; - if (trig_mode == IOAPIC_TRIGGER_EDGE) { + ioapic_entry_parse(entry, &info); + if (!info.masked) { + if (info.trig_mode == IOAPIC_TRIGGER_EDGE) { s->irr &= ~mask; } else { coalesce = s->ioredtbl[i] & IOAPIC_LVT_REMOTE_IRR; s->ioredtbl[i] |= IOAPIC_LVT_REMOTE_IRR; } - if (delivery_mode == IOAPIC_DM_EXTINT) { - vector = pic_read_irq(isa_pic); - } else { - vector = entry & IOAPIC_VECTOR_MASK; - } + #ifdef CONFIG_KVM if (kvm_irqchip_is_split()) { - if (trig_mode == IOAPIC_TRIGGER_EDGE) { + if (info.trig_mode == IOAPIC_TRIGGER_EDGE) { kvm_set_irq(kvm_state, i, 1); kvm_set_irq(kvm_state, i, 0); } else { @@ -112,13 +135,7 @@ static void ioapic_service(IOAPICCommonState *s) * the IOAPIC message into a MSI one, and its * address space will decide whether we need a * translation. */ - addr = APIC_DEFAULT_ADDRESS | \ - (dest_idx << MSI_ADDR_DEST_IDX_SHIFT) | - (dest_mode << MSI_ADDR_DEST_MODE_SHIFT); - data = (vector << MSI_DATA_VECTOR_SHIFT) | - (trig_mode << MSI_DATA_TRIGGER_SHIFT) | - (delivery_mode << MSI_DATA_DELIVERY_MODE_SHIFT); - stl_le_phys(ioapic_as, addr, data); + stl_le_phys(ioapic_as, info.addr, info.data); } } } @@ -169,30 +186,11 @@ static void ioapic_update_kvm_routes(IOAPICCommonState *s) if (kvm_irqchip_is_split()) { for (i = 0; i < IOAPIC_NUM_PINS; i++) { - uint64_t entry = s->ioredtbl[i]; - uint8_t trig_mode; - uint8_t delivery_mode; - uint8_t dest; - uint8_t dest_mode; - uint64_t pin_polarity; MSIMessage msg; - - trig_mode = ((entry >> IOAPIC_LVT_TRIGGER_MODE_SHIFT) & 1); - dest = entry >> IOAPIC_LVT_DEST_SHIFT; - dest_mode = (entry >> IOAPIC_LVT_DEST_MODE_SHIFT) & 1; - pin_polarity = (entry >> IOAPIC_LVT_TRIGGER_MODE_SHIFT) & 1; - delivery_mode = - (entry >> IOAPIC_LVT_DELIV_MODE_SHIFT) & IOAPIC_DM_MASK; - - msg.address = APIC_DEFAULT_ADDRESS; - msg.address |= dest_mode << 2; - msg.address |= dest << 12; - - msg.data = entry & IOAPIC_VECTOR_MASK; - msg.data |= delivery_mode << APIC_DELIVERY_MODE_SHIFT; - msg.data |= pin_polarity << APIC_POLARITY_SHIFT; - msg.data |= trig_mode << APIC_TRIG_MODE_SHIFT; - + struct ioapic_entry_info info; + ioapic_entry_parse(s->ioredtbl[i], &info); + msg.address = info.addr; + msg.data = info.data; kvm_irqchip_update_msi_route(kvm_state, i, msg, NULL); } kvm_irqchip_commit_routes(kvm_state); -- cgit v1.2.1 From 8b5ed7dffa1fa2835a782a8db8d4f3f1f772ada9 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Thu, 14 Jul 2016 13:56:25 +0800 Subject: intel_iommu: add support for split irqchip In split irqchip mode, IOAPIC is working in user space, only update kernel irq routes when entry changed. When IR is enabled, we directly update the kernel with translated messages. It works just like a kernel cache for the remapping entries. Since KVM irqfd is using kernel gsi routes to deliver interrupts, as long as we can support split irqchip, we will support irqfd as well. Also, since kernel gsi routes will cache translated interrupts, irqfd delivery will not suffer from any performance impact due to IR. And, since we supported irqfd, vhost devices will be able to work seamlessly with IR now. Logically this should contain both vhost-net and vhost-user case. Signed-off-by: Paolo Bonzini [move trace-events lines into target-i386/trace-events] Signed-off-by: Peter Xu Reviewed-by: Paolo Bonzini Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- Makefile.objs | 1 + hw/i386/intel_iommu.c | 7 +++++++ include/hw/i386/intel_iommu.h | 1 + include/hw/i386/x86-iommu.h | 5 +++++ target-i386/kvm.c | 27 +++++++++++++++++++++++++++ target-i386/trace-events | 4 ++++ 6 files changed, 45 insertions(+) create mode 100644 target-i386/trace-events diff --git a/Makefile.objs b/Makefile.objs index 7f1f0a3ffd..6d5ddcfd3e 100644 --- a/Makefile.objs +++ b/Makefile.objs @@ -153,6 +153,7 @@ trace-events-y += hw/alpha/trace-events trace-events-y += ui/trace-events trace-events-y += audio/trace-events trace-events-y += net/trace-events +trace-events-y += target-i386/trace-events trace-events-y += target-sparc/trace-events trace-events-y += target-s390x/trace-events trace-events-y += target-ppc/trace-events diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index 6ba5520283..c7ded0fc81 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -2165,6 +2165,12 @@ do_not_translate: return 0; } +static int vtd_int_remap(X86IOMMUState *iommu, MSIMessage *src, + MSIMessage *dst, uint16_t sid) +{ + return vtd_interrupt_remap_msi(INTEL_IOMMU_DEVICE(iommu), src, dst); +} + static MemTxResult vtd_mem_ir_read(void *opaque, hwaddr addr, uint64_t *data, unsigned size, MemTxAttrs attrs) @@ -2399,6 +2405,7 @@ static void vtd_class_init(ObjectClass *klass, void *data) dc->props = vtd_properties; dc->hotpluggable = false; x86_class->realize = vtd_realize; + x86_class->int_remap = vtd_int_remap; } static const TypeInfo vtd_info = { diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h index cdbbddd79e..e048ced5d5 100644 --- a/include/hw/i386/intel_iommu.h +++ b/include/hw/i386/intel_iommu.h @@ -26,6 +26,7 @@ #include "hw/i386/x86-iommu.h" #include "hw/i386/ioapic.h" #include "hw/pci/msi.h" +#include "hw/sysbus.h" #define TYPE_INTEL_IOMMU_DEVICE "intel-iommu" #define INTEL_IOMMU_DEVICE(obj) \ diff --git a/include/hw/i386/x86-iommu.h b/include/hw/i386/x86-iommu.h index 699dd0615c..fa6ce31b74 100644 --- a/include/hw/i386/x86-iommu.h +++ b/include/hw/i386/x86-iommu.h @@ -21,6 +21,7 @@ #define IOMMU_COMMON_H #include "hw/sysbus.h" +#include "hw/pci/pci.h" #define TYPE_X86_IOMMU_DEVICE ("x86-iommu") #define X86_IOMMU_DEVICE(obj) \ @@ -31,6 +32,7 @@ OBJECT_GET_CLASS(X86IOMMUClass, obj, TYPE_X86_IOMMU_DEVICE) #define X86_IOMMU_PCI_DEVFN_MAX 256 +#define X86_IOMMU_SID_INVALID (0xffff) typedef struct X86IOMMUState X86IOMMUState; typedef struct X86IOMMUClass X86IOMMUClass; @@ -39,6 +41,9 @@ struct X86IOMMUClass { SysBusDeviceClass parent; /* Intel/AMD specific realize() hook */ DeviceRealize realize; + /* MSI-based interrupt remapping */ + int (*int_remap)(X86IOMMUState *iommu, MSIMessage *src, + MSIMessage *dst, uint16_t sid); }; struct X86IOMMUState { diff --git a/target-i386/kvm.c b/target-i386/kvm.c index 93275231ec..9c00e48952 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -35,6 +35,7 @@ #include "hw/i386/apic.h" #include "hw/i386/apic_internal.h" #include "hw/i386/apic-msidef.h" +#include "hw/i386/intel_iommu.h" #include "exec/ioport.h" #include "standard-headers/asm-x86/hyperv.h" @@ -42,6 +43,7 @@ #include "hw/pci/msi.h" #include "migration/migration.h" #include "exec/memattrs.h" +#include "trace.h" //#define DEBUG_KVM @@ -3371,6 +3373,31 @@ int kvm_device_msix_deassign(KVMState *s, uint32_t dev_id) int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route, uint64_t address, uint32_t data, PCIDevice *dev) { + X86IOMMUState *iommu = x86_iommu_get_default(); + + if (iommu) { + int ret; + MSIMessage src, dst; + X86IOMMUClass *class = X86_IOMMU_GET_CLASS(iommu); + + src.address = route->u.msi.address_hi; + src.address <<= VTD_MSI_ADDR_HI_SHIFT; + src.address |= route->u.msi.address_lo; + src.data = route->u.msi.data; + + ret = class->int_remap(iommu, &src, &dst, dev ? \ + pci_requester_id(dev) : \ + X86_IOMMU_SID_INVALID); + if (ret) { + trace_kvm_x86_fixup_msi_error(route->gsi); + return 1; + } + + route->u.msi.address_hi = dst.address >> VTD_MSI_ADDR_HI_SHIFT; + route->u.msi.address_lo = dst.address & VTD_MSI_ADDR_LO_MASK; + route->u.msi.data = dst.data; + } + return 0; } diff --git a/target-i386/trace-events b/target-i386/trace-events new file mode 100644 index 0000000000..2113075cd1 --- /dev/null +++ b/target-i386/trace-events @@ -0,0 +1,4 @@ +# See docs/trace-events.txt for syntax documentation. + +# target-i386/kvm.c +kvm_x86_fixup_msi_error(uint32_t gsi) "VT-d failed to remap interrupt for GSI %" PRIu32 -- cgit v1.2.1 From 02a2cbc872df99205eeafd399f01c210e0b797c4 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Thu, 14 Jul 2016 13:56:26 +0800 Subject: x86-iommu: introduce IEC notifiers This patch introduces x86 IOMMU IEC (Interrupt Entry Cache) invalidation notifier list. When vIOMMU receives IEC invalidate request, all the registered units will be notified with specific invalidation requests. Intel IOMMU is the first provider that generates such a event. Signed-off-by: Peter Xu Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/intel_iommu.c | 36 +++++++++++++++++++++++++++++------- hw/i386/intel_iommu_internal.h | 24 ++++++++++++++++++++---- hw/i386/trace-events | 3 +++ hw/i386/x86-iommu.c | 29 +++++++++++++++++++++++++++++ include/hw/i386/x86-iommu.h | 40 ++++++++++++++++++++++++++++++++++++++++ 5 files changed, 121 insertions(+), 11 deletions(-) diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index c7ded0fc81..2acec8555d 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -904,6 +904,12 @@ static void vtd_root_table_setup(IntelIOMMUState *s) (s->root_extended ? "(extended)" : "")); } +static void vtd_iec_notify_all(IntelIOMMUState *s, bool global, + uint32_t index, uint32_t mask) +{ + x86_iommu_iec_notify_all(X86_IOMMU_DEVICE(s), global, index, mask); +} + static void vtd_interrupt_remap_table_setup(IntelIOMMUState *s) { uint64_t value = 0; @@ -911,7 +917,8 @@ static void vtd_interrupt_remap_table_setup(IntelIOMMUState *s) s->intr_size = 1UL << ((value & VTD_IRTA_SIZE_MASK) + 1); s->intr_root = value & VTD_IRTA_ADDR_MASK; - /* TODO: invalidate interrupt entry cache */ + /* Notify global invalidation */ + vtd_iec_notify_all(s, true, 0, 0); VTD_DPRINTF(CSR, "int remap table addr 0x%"PRIx64 " size %"PRIu32, s->intr_root, s->intr_size); @@ -1413,6 +1420,21 @@ static bool vtd_process_iotlb_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc) return true; } +static bool vtd_process_inv_iec_desc(IntelIOMMUState *s, + VTDInvDesc *inv_desc) +{ + VTD_DPRINTF(INV, "inv ir glob %d index %d mask %d", + inv_desc->iec.granularity, + inv_desc->iec.index, + inv_desc->iec.index_mask); + + vtd_iec_notify_all(s, !inv_desc->iec.granularity, + inv_desc->iec.index, + inv_desc->iec.index_mask); + + return true; +} + static bool vtd_process_inv_desc(IntelIOMMUState *s) { VTDInvDesc inv_desc; @@ -1453,12 +1475,12 @@ static bool vtd_process_inv_desc(IntelIOMMUState *s) break; case VTD_INV_DESC_IEC: - VTD_DPRINTF(INV, "Interrupt Entry Cache Invalidation " - "not implemented yet"); - /* - * Since currently we do not cache interrupt entries, we can - * just mark this descriptor as "good" and move on. - */ + VTD_DPRINTF(INV, "Invalidation Interrupt Entry Cache " + "Descriptor hi 0x%"PRIx64 " lo 0x%"PRIx64, + inv_desc.hi, inv_desc.lo); + if (!vtd_process_inv_iec_desc(s, &inv_desc)) { + return false; + } break; default: diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h index e1a08cb496..10c20fef20 100644 --- a/hw/i386/intel_iommu_internal.h +++ b/hw/i386/intel_iommu_internal.h @@ -296,12 +296,28 @@ typedef enum VTDFaultReason { #define VTD_CONTEXT_CACHE_GEN_MAX 0xffffffffUL +/* Interrupt Entry Cache Invalidation Descriptor: VT-d 6.5.2.7. */ +struct VTDInvDescIEC { + uint32_t type:4; /* Should always be 0x4 */ + uint32_t granularity:1; /* If set, it's global IR invalidation */ + uint32_t resved_1:22; + uint32_t index_mask:5; /* 2^N for continuous int invalidation */ + uint32_t index:16; /* Start index to invalidate */ + uint32_t reserved_2:16; +}; +typedef struct VTDInvDescIEC VTDInvDescIEC; + /* Queued Invalidation Descriptor */ -struct VTDInvDesc { - uint64_t lo; - uint64_t hi; +union VTDInvDesc { + struct { + uint64_t lo; + uint64_t hi; + }; + union { + VTDInvDescIEC iec; + }; }; -typedef struct VTDInvDesc VTDInvDesc; +typedef union VTDInvDesc VTDInvDesc; /* Masks for struct VTDInvDesc */ #define VTD_INV_DESC_TYPE 0xf diff --git a/hw/i386/trace-events b/hw/i386/trace-events index ea77bc24c3..b4882c1157 100644 --- a/hw/i386/trace-events +++ b/hw/i386/trace-events @@ -10,3 +10,6 @@ xen_pv_mmio_write(uint64_t addr) "WARNING: write to Xen PV Device MMIO space (ad # hw/i386/pc.c mhp_pc_dimm_assigned_slot(int slot) "0x%d" mhp_pc_dimm_assigned_address(uint64_t addr) "0x%"PRIx64 + +# hw/i386/x86-iommu.c +x86_iommu_iec_notify(bool global, uint32_t index, uint32_t mask) "Notify IEC invalidation: global=%d index=%" PRIu32 " mask=%" PRIu32 diff --git a/hw/i386/x86-iommu.c b/hw/i386/x86-iommu.c index 4280839860..ce26b2a71d 100644 --- a/hw/i386/x86-iommu.c +++ b/hw/i386/x86-iommu.c @@ -22,6 +22,33 @@ #include "hw/boards.h" #include "hw/i386/x86-iommu.h" #include "qemu/error-report.h" +#include "trace.h" + +void x86_iommu_iec_register_notifier(X86IOMMUState *iommu, + iec_notify_fn fn, void *data) +{ + IEC_Notifier *notifier = g_new0(IEC_Notifier, 1); + + notifier->iec_notify = fn; + notifier->private = data; + + QLIST_INSERT_HEAD(&iommu->iec_notifiers, notifier, list); +} + +void x86_iommu_iec_notify_all(X86IOMMUState *iommu, bool global, + uint32_t index, uint32_t mask) +{ + IEC_Notifier *notifier; + + trace_x86_iommu_iec_notify(global, index, mask); + + QLIST_FOREACH(notifier, &iommu->iec_notifiers, list) { + if (notifier->iec_notify) { + notifier->iec_notify(notifier->private, global, + index, mask); + } + } +} /* Default X86 IOMMU device */ static X86IOMMUState *x86_iommu_default = NULL; @@ -46,7 +73,9 @@ X86IOMMUState *x86_iommu_get_default(void) static void x86_iommu_realize(DeviceState *dev, Error **errp) { + X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(dev); X86IOMMUClass *x86_class = X86_IOMMU_GET_CLASS(dev); + QLIST_INIT(&x86_iommu->iec_notifiers); if (x86_class->realize) { x86_class->realize(dev, errp); } diff --git a/include/hw/i386/x86-iommu.h b/include/hw/i386/x86-iommu.h index fa6ce31b74..c48e8dd597 100644 --- a/include/hw/i386/x86-iommu.h +++ b/include/hw/i386/x86-iommu.h @@ -46,9 +46,28 @@ struct X86IOMMUClass { MSIMessage *dst, uint16_t sid); }; +/** + * iec_notify_fn - IEC (Interrupt Entry Cache) notifier hook, + * triggered when IR invalidation happens. + * @private: private data + * @global: whether this is a global IEC invalidation + * @index: IRTE index to invalidate (start from) + * @mask: invalidation mask + */ +typedef void (*iec_notify_fn)(void *private, bool global, + uint32_t index, uint32_t mask); + +struct IEC_Notifier { + iec_notify_fn iec_notify; + void *private; + QLIST_ENTRY(IEC_Notifier) list; +}; +typedef struct IEC_Notifier IEC_Notifier; + struct X86IOMMUState { SysBusDevice busdev; bool intr_supported; /* Whether vIOMMU supports IR */ + QLIST_HEAD(, IEC_Notifier) iec_notifiers; /* IEC notify list */ }; /** @@ -57,4 +76,25 @@ struct X86IOMMUState { */ X86IOMMUState *x86_iommu_get_default(void); +/** + * x86_iommu_iec_register_notifier - register IEC (Interrupt Entry + * Cache) notifiers + * @iommu: IOMMU device to register + * @fn: IEC notifier hook function + * @data: notifier private data + */ +void x86_iommu_iec_register_notifier(X86IOMMUState *iommu, + iec_notify_fn fn, void *data); + +/** + * x86_iommu_iec_notify_all - Notify IEC invalidations + * @iommu: IOMMU device that sends the notification + * @global: whether this is a global invalidation. If true, @index + * and @mask are undefined. + * @index: starting index of interrupt entry to invalidate + * @mask: index mask for the invalidation + */ +void x86_iommu_iec_notify_all(X86IOMMUState *iommu, bool global, + uint32_t index, uint32_t mask); + #endif -- cgit v1.2.1 From e3d9c92507df61608896a579b5b0d7c218d5353e Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Thu, 14 Jul 2016 13:56:27 +0800 Subject: ioapic: register IOMMU IEC notifier for ioapic Let IOAPIC the first consumer of x86 IOMMU IEC invalidation notifiers. This is only used for split irqchip case, when vIOMMU receives IR invalidation requests, IOAPIC will be notified to update kernel irq routes. For simplicity, we just update all IOAPIC routes, even if the invalidated entries are not IOAPIC ones. Since now we are creating IOMMUs using "-device" parameter, IOMMU device will be created after IOAPIC. We need to do the registration after machine done by leveraging machine_done notifier. Signed-off-by: Peter Xu Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/intc/ioapic.c | 31 +++++++++++++++++++++++++++++++ include/hw/i386/ioapic_internal.h | 2 ++ 2 files changed, 33 insertions(+) diff --git a/hw/intc/ioapic.c b/hw/intc/ioapic.c index cfc7b7b958..2d3282a864 100644 --- a/hw/intc/ioapic.c +++ b/hw/intc/ioapic.c @@ -31,6 +31,7 @@ #include "sysemu/kvm.h" #include "target-i386/cpu.h" #include "hw/i386/apic-msidef.h" +#include "hw/i386/x86-iommu.h" //#define DEBUG_IOAPIC @@ -198,6 +199,16 @@ static void ioapic_update_kvm_routes(IOAPICCommonState *s) #endif } +#ifdef CONFIG_KVM +static void ioapic_iec_notifier(void *private, bool global, + uint32_t index, uint32_t mask) +{ + IOAPICCommonState *s = (IOAPICCommonState *)private; + /* For simplicity, we just update all the routes */ + ioapic_update_kvm_routes(s); +} +#endif + void ioapic_eoi_broadcast(int vector) { IOAPICCommonState *s; @@ -354,6 +365,24 @@ static const MemoryRegionOps ioapic_io_ops = { .endianness = DEVICE_NATIVE_ENDIAN, }; +static void ioapic_machine_done_notify(Notifier *notifier, void *data) +{ +#ifdef CONFIG_KVM + IOAPICCommonState *s = container_of(notifier, IOAPICCommonState, + machine_done); + + if (kvm_irqchip_is_split()) { + X86IOMMUState *iommu = x86_iommu_get_default(); + if (iommu) { + /* Register this IOAPIC with IOMMU IEC notifier, so that + * when there are IR invalidates, we can be notified to + * update kernel IR cache. */ + x86_iommu_iec_register_notifier(iommu, ioapic_iec_notifier, s); + } + } +#endif +} + static void ioapic_realize(DeviceState *dev, Error **errp) { IOAPICCommonState *s = IOAPIC_COMMON(dev); @@ -364,6 +393,8 @@ static void ioapic_realize(DeviceState *dev, Error **errp) qdev_init_gpio_in(dev, ioapic_set_irq, IOAPIC_NUM_PINS); ioapics[ioapic_no] = s; + s->machine_done.notify = ioapic_machine_done_notify; + qemu_add_machine_init_done_notifier(&s->machine_done); } static void ioapic_class_init(ObjectClass *klass, void *data) diff --git a/include/hw/i386/ioapic_internal.h b/include/hw/i386/ioapic_internal.h index 5c901ae78f..d89ea1b63b 100644 --- a/include/hw/i386/ioapic_internal.h +++ b/include/hw/i386/ioapic_internal.h @@ -25,6 +25,7 @@ #include "hw/hw.h" #include "exec/memory.h" #include "hw/sysbus.h" +#include "qemu/notify.h" #define MAX_IOAPICS 1 @@ -107,6 +108,7 @@ struct IOAPICCommonState { uint8_t ioregsel; uint32_t irr; uint64_t ioredtbl[IOAPIC_NUM_PINS]; + Notifier machine_done; }; void ioapic_reset_common(DeviceState *dev); -- cgit v1.2.1 From 28589311b326398e88bf6804db8575a6da426b39 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Thu, 14 Jul 2016 13:56:28 +0800 Subject: intel_iommu: Add support for Extended Interrupt Mode As neither QEMU nor KVM support more than 255 CPUs so far, this is simple: we only need to switch the destination ID translation in vtd_remap_irq_get if EIME is set. Once CFI support is there, it will have to take EIM into account as well. So far, nothing to do for this. This patch allows to use x2APIC in split irqchip mode of KVM. Signed-off-by: Jan Kiszka [use le32_to_cpu() to retrieve dest_id] Signed-off-by: Peter Xu Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/intel_iommu.c | 16 +++++++++------- hw/i386/intel_iommu_internal.h | 2 ++ include/hw/i386/intel_iommu.h | 1 + 3 files changed, 12 insertions(+), 7 deletions(-) diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index 2acec8555d..a605b5852e 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -916,6 +916,7 @@ static void vtd_interrupt_remap_table_setup(IntelIOMMUState *s) value = vtd_get_quad_raw(s, DMAR_IRTA_REG); s->intr_size = 1UL << ((value & VTD_IRTA_SIZE_MASK) + 1); s->intr_root = value & VTD_IRTA_ADDR_MASK; + s->intr_eime = value & VTD_IRTA_EIME; /* Notify global invalidation */ vtd_iec_notify_all(s, true, 0, 0); @@ -2058,11 +2059,13 @@ static int vtd_remap_irq_get(IntelIOMMUState *iommu, uint16_t index, VTDIrq *irq irq->trigger_mode = irte.trigger_mode; irq->vector = irte.vector; irq->delivery_mode = irte.delivery_mode; - /* Not support EIM yet: please refer to vt-d 9.10 DST bits */ + irq->dest = le32_to_cpu(irte.dest_id); + if (!iommu->intr_eime) { #define VTD_IR_APIC_DEST_MASK (0xff00ULL) #define VTD_IR_APIC_DEST_SHIFT (8) - irq->dest = (le32_to_cpu(irte.dest_id) & VTD_IR_APIC_DEST_MASK) >> \ - VTD_IR_APIC_DEST_SHIFT; + irq->dest = (irq->dest & VTD_IR_APIC_DEST_MASK) >> + VTD_IR_APIC_DEST_SHIFT; + } irq->dest_mode = irte.dest_mode; irq->redir_hint = irte.redir_hint; @@ -2312,7 +2315,7 @@ static void vtd_init(IntelIOMMUState *s) s->ecap = VTD_ECAP_QI | VTD_ECAP_IRO; if (x86_iommu->intr_supported) { - s->ecap |= VTD_ECAP_IR; + s->ecap |= VTD_ECAP_IR | VTD_ECAP_EIM; } vtd_reset_context_cache(s); @@ -2366,10 +2369,9 @@ static void vtd_init(IntelIOMMUState *s) vtd_define_quad(s, DMAR_FRCD_REG_0_2, 0, 0, 0x8000000000000000ULL); /* - * Interrupt remapping registers, not support extended interrupt - * mode for now. + * Interrupt remapping registers. */ - vtd_define_quad(s, DMAR_IRTA_REG, 0, 0xfffffffffffff00fULL, 0); + vtd_define_quad(s, DMAR_IRTA_REG, 0, 0xfffffffffffff80fULL, 0); } /* Should not reset address_spaces when reset because devices will still use diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h index 10c20fef20..72b0114927 100644 --- a/hw/i386/intel_iommu_internal.h +++ b/hw/i386/intel_iommu_internal.h @@ -176,6 +176,7 @@ /* IRTA_REG */ #define VTD_IRTA_ADDR_MASK (VTD_HAW_MASK ^ 0xfffULL) +#define VTD_IRTA_EIME (1ULL << 11) #define VTD_IRTA_SIZE_MASK (0xfULL) /* ECAP_REG */ @@ -184,6 +185,7 @@ #define VTD_ECAP_QI (1ULL << 1) /* Interrupt Remapping support */ #define VTD_ECAP_IR (1ULL << 3) +#define VTD_ECAP_EIM (1ULL << 4) /* CAP_REG */ /* (offset >> 4) << 24 */ diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h index e048ced5d5..745b4e7687 100644 --- a/include/hw/i386/intel_iommu.h +++ b/include/hw/i386/intel_iommu.h @@ -271,6 +271,7 @@ struct IntelIOMMUState { bool intr_enabled; /* Whether guest enabled IR */ dma_addr_t intr_root; /* Interrupt remapping table pointer */ uint32_t intr_size; /* Number of IR table entries */ + bool intr_eime; /* Extended interrupt mode enabled */ }; /* Find the VTD Address space associated with the given bus pointer, -- cgit v1.2.1 From ede9c94acf6cd1968de4188c0228b714ab871a86 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Thu, 14 Jul 2016 13:56:29 +0800 Subject: intel_iommu: add SID validation for IR This patch enables SID validation. Invalid interrupts will be dropped. Signed-off-by: Peter Xu Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/intel_iommu.c | 69 ++++++++++++++++++++++++++++++++++++------- include/hw/i386/intel_iommu.h | 17 +++++++++++ 2 files changed, 75 insertions(+), 11 deletions(-) diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index a605b5852e..5a97548be5 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -2008,9 +2008,13 @@ static Property vtd_properties[] = { /* Read IRTE entry with specific index */ static int vtd_irte_get(IntelIOMMUState *iommu, uint16_t index, - VTD_IRTE *entry) + VTD_IRTE *entry, uint16_t sid) { + static const uint16_t vtd_svt_mask[VTD_SQ_MAX] = \ + {0xffff, 0xfffb, 0xfff9, 0xfff8}; dma_addr_t addr = 0x00; + uint16_t mask, source_id; + uint8_t bus, bus_max, bus_min; addr = iommu->intr_root + index * sizeof(*entry); if (dma_memory_read(&address_space_memory, addr, entry, @@ -2037,21 +2041,56 @@ static int vtd_irte_get(IntelIOMMUState *iommu, uint16_t index, return -VTD_FR_IR_IRTE_RSVD; } - /* - * TODO: Check Source-ID corresponds to SVT (Source Validation - * Type) bits - */ + if (sid != X86_IOMMU_SID_INVALID) { + /* Validate IRTE SID */ + source_id = le32_to_cpu(entry->source_id); + switch (entry->sid_vtype) { + case VTD_SVT_NONE: + VTD_DPRINTF(IR, "No SID validation for IRTE index %d", index); + break; + + case VTD_SVT_ALL: + mask = vtd_svt_mask[entry->sid_q]; + if ((source_id & mask) != (sid & mask)) { + VTD_DPRINTF(GENERAL, "SID validation for IRTE index " + "%d failed (reqid 0x%04x sid 0x%04x)", index, + sid, source_id); + return -VTD_FR_IR_SID_ERR; + } + break; + + case VTD_SVT_BUS: + bus_max = source_id >> 8; + bus_min = source_id & 0xff; + bus = sid >> 8; + if (bus > bus_max || bus < bus_min) { + VTD_DPRINTF(GENERAL, "SID validation for IRTE index %d " + "failed (bus %d outside %d-%d)", index, bus, + bus_min, bus_max); + return -VTD_FR_IR_SID_ERR; + } + break; + + default: + VTD_DPRINTF(GENERAL, "Invalid SVT bits (0x%x) in IRTE index " + "%d", entry->sid_vtype, index); + /* Take this as verification failure. */ + return -VTD_FR_IR_SID_ERR; + break; + } + } return 0; } /* Fetch IRQ information of specific IR index */ -static int vtd_remap_irq_get(IntelIOMMUState *iommu, uint16_t index, VTDIrq *irq) +static int vtd_remap_irq_get(IntelIOMMUState *iommu, uint16_t index, + VTDIrq *irq, uint16_t sid) { VTD_IRTE irte = {}; int ret = 0; - ret = vtd_irte_get(iommu, index, &irte); + ret = vtd_irte_get(iommu, index, &irte, sid); if (ret) { return ret; } @@ -2103,7 +2142,8 @@ static void vtd_generate_msi_message(VTDIrq *irq, MSIMessage *msg_out) /* Interrupt remapping for MSI/MSI-X entry */ static int vtd_interrupt_remap_msi(IntelIOMMUState *iommu, MSIMessage *origin, - MSIMessage *translated) + MSIMessage *translated, + uint16_t sid) { int ret = 0; VTD_IR_MSIAddress addr; @@ -2146,7 +2186,7 @@ static int vtd_interrupt_remap_msi(IntelIOMMUState *iommu, index += origin->data & VTD_IR_MSI_DATA_SUBHANDLE; } - ret = vtd_remap_irq_get(iommu, index, &irq); + ret = vtd_remap_irq_get(iommu, index, &irq, sid); if (ret) { return ret; } @@ -2193,7 +2233,8 @@ do_not_translate: static int vtd_int_remap(X86IOMMUState *iommu, MSIMessage *src, MSIMessage *dst, uint16_t sid) { - return vtd_interrupt_remap_msi(INTEL_IOMMU_DEVICE(iommu), src, dst); + return vtd_interrupt_remap_msi(INTEL_IOMMU_DEVICE(iommu), + src, dst, sid); } static MemTxResult vtd_mem_ir_read(void *opaque, hwaddr addr, @@ -2209,11 +2250,17 @@ static MemTxResult vtd_mem_ir_write(void *opaque, hwaddr addr, { int ret = 0; MSIMessage from = {}, to = {}; + uint16_t sid = X86_IOMMU_SID_INVALID; from.address = (uint64_t) addr + VTD_INTERRUPT_ADDR_FIRST; from.data = (uint32_t) value; - ret = vtd_interrupt_remap_msi(opaque, &from, &to); + if (!attrs.unspecified) { + /* We have explicit Source ID */ + sid = attrs.requester_id; + } + + ret = vtd_interrupt_remap_msi(opaque, &from, &to, sid); if (ret) { /* TODO: report error */ VTD_DPRINTF(GENERAL, "int remap fail for addr 0x%"PRIx64 diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h index 745b4e7687..2eba7ed4db 100644 --- a/include/hw/i386/intel_iommu.h +++ b/include/hw/i386/intel_iommu.h @@ -102,6 +102,23 @@ struct VTDIOTLBEntry { bool write_flags; }; +/* VT-d Source-ID Qualifier types */ +enum { + VTD_SQ_FULL = 0x00, /* Full SID verification */ + VTD_SQ_IGN_3 = 0x01, /* Ignore bit 3 */ + VTD_SQ_IGN_2_3 = 0x02, /* Ignore bits 2 & 3 */ + VTD_SQ_IGN_1_3 = 0x03, /* Ignore bits 1-3 */ + VTD_SQ_MAX, +}; + +/* VT-d Source Validation Types */ +enum { + VTD_SVT_NONE = 0x00, /* No validation */ + VTD_SVT_ALL = 0x01, /* Do full validation */ + VTD_SVT_BUS = 0x02, /* Validate bus range */ + VTD_SVT_MAX, +}; + /* Interrupt Remapping Table Entry Definition */ union VTD_IRTE { struct { -- cgit v1.2.1 From d1f6af6a17a66f58c238e1c26b928cf71c0c11da Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Thu, 14 Jul 2016 13:56:30 +0800 Subject: kvm-irqchip: simplify kvm_irqchip_add_msi_route Changing the original MSIMessage parameter in kvm_irqchip_add_msi_route into the vector number. Vector index provides more information than the MSIMessage, we can retrieve the MSIMessage using the vector easily. This will avoid fetching MSIMessage every time before adding MSI routes. Meanwhile, the vector info will be used in the coming patches to further enable gsi route update notifications. Signed-off-by: Peter Xu Reviewed-by: Paolo Bonzini Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/kvm/pci-assign.c | 8 ++------ hw/misc/ivshmem.c | 3 +-- hw/vfio/pci.c | 11 +++++------ hw/virtio/virtio-pci.c | 9 +++------ include/sysemu/kvm.h | 13 ++++++++++++- kvm-all.c | 18 ++++++++++++++++-- kvm-stub.c | 2 +- target-i386/kvm.c | 3 +-- 8 files changed, 41 insertions(+), 26 deletions(-) diff --git a/hw/i386/kvm/pci-assign.c b/hw/i386/kvm/pci-assign.c index 1a429e5402..334dfc4433 100644 --- a/hw/i386/kvm/pci-assign.c +++ b/hw/i386/kvm/pci-assign.c @@ -974,10 +974,9 @@ static void assigned_dev_update_msi(PCIDevice *pci_dev) } if (ctrl_byte & PCI_MSI_FLAGS_ENABLE) { - MSIMessage msg = msi_get_message(pci_dev, 0); int virq; - virq = kvm_irqchip_add_msi_route(kvm_state, msg, pci_dev); + virq = kvm_irqchip_add_msi_route(kvm_state, 0, pci_dev); if (virq < 0) { perror("assigned_dev_update_msi: kvm_irqchip_add_msi_route"); return; @@ -1042,7 +1041,6 @@ static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev) uint16_t entries_nr = 0; int i, r = 0; MSIXTableEntry *entry = adev->msix_table; - MSIMessage msg; /* Get the usable entry number for allocating */ for (i = 0; i < adev->msix_max; i++, entry++) { @@ -1079,9 +1077,7 @@ static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev) continue; } - msg.address = entry->addr_lo | ((uint64_t)entry->addr_hi << 32); - msg.data = entry->data; - r = kvm_irqchip_add_msi_route(kvm_state, msg, pci_dev); + r = kvm_irqchip_add_msi_route(kvm_state, i, pci_dev); if (r < 0) { return r; } diff --git a/hw/misc/ivshmem.c b/hw/misc/ivshmem.c index 7e7c843b32..023da84004 100644 --- a/hw/misc/ivshmem.c +++ b/hw/misc/ivshmem.c @@ -441,13 +441,12 @@ static void ivshmem_add_kvm_msi_virq(IVShmemState *s, int vector, Error **errp) { PCIDevice *pdev = PCI_DEVICE(s); - MSIMessage msg = msix_get_message(pdev, vector); int ret; IVSHMEM_DPRINTF("ivshmem_add_kvm_msi_virq vector:%d\n", vector); assert(!s->msi_vectors[vector].pdev); - ret = kvm_irqchip_add_msi_route(kvm_state, msg, pdev); + ret = kvm_irqchip_add_msi_route(kvm_state, vector, pdev); if (ret < 0) { error_setg(errp, "kvm_irqchip_add_msi_route failed"); return; diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index c8436a19d6..87a6f05c65 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -417,11 +417,11 @@ static int vfio_enable_vectors(VFIOPCIDevice *vdev, bool msix) } static void vfio_add_kvm_msi_virq(VFIOPCIDevice *vdev, VFIOMSIVector *vector, - MSIMessage *msg, bool msix) + int vector_n, bool msix) { int virq; - if ((msix && vdev->no_kvm_msix) || (!msix && vdev->no_kvm_msi) || !msg) { + if ((msix && vdev->no_kvm_msix) || (!msix && vdev->no_kvm_msi)) { return; } @@ -429,7 +429,7 @@ static void vfio_add_kvm_msi_virq(VFIOPCIDevice *vdev, VFIOMSIVector *vector, return; } - virq = kvm_irqchip_add_msi_route(kvm_state, *msg, &vdev->pdev); + virq = kvm_irqchip_add_msi_route(kvm_state, vector_n, &vdev->pdev); if (virq < 0) { event_notifier_cleanup(&vector->kvm_interrupt); return; @@ -495,7 +495,7 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr, vfio_update_kvm_msi_virq(vector, *msg, pdev); } } else { - vfio_add_kvm_msi_virq(vdev, vector, msg, true); + vfio_add_kvm_msi_virq(vdev, vector, nr, true); } /* @@ -639,7 +639,6 @@ retry: for (i = 0; i < vdev->nr_vectors; i++) { VFIOMSIVector *vector = &vdev->msi_vectors[i]; - MSIMessage msg = msi_get_message(&vdev->pdev, i); vector->vdev = vdev; vector->virq = -1; @@ -656,7 +655,7 @@ retry: * Attempt to enable route through KVM irqchip, * default to userspace handling if unavailable. */ - vfio_add_kvm_msi_virq(vdev, vector, &msg, false); + vfio_add_kvm_msi_virq(vdev, vector, i, false); } /* Set interrupt type prior to possible interrupts */ diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index 2b34b43060..cbdfd59071 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -699,14 +699,13 @@ static uint32_t virtio_read_config(PCIDevice *pci_dev, static int kvm_virtio_pci_vq_vector_use(VirtIOPCIProxy *proxy, unsigned int queue_no, - unsigned int vector, - MSIMessage msg) + unsigned int vector) { VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector]; int ret; if (irqfd->users == 0) { - ret = kvm_irqchip_add_msi_route(kvm_state, msg, &proxy->pci_dev); + ret = kvm_irqchip_add_msi_route(kvm_state, vector, &proxy->pci_dev); if (ret < 0) { return ret; } @@ -757,7 +756,6 @@ static int kvm_virtio_pci_vector_use(VirtIOPCIProxy *proxy, int nvqs) VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); unsigned int vector; int ret, queue_no; - MSIMessage msg; for (queue_no = 0; queue_no < nvqs; queue_no++) { if (!virtio_queue_get_num(vdev, queue_no)) { @@ -767,8 +765,7 @@ static int kvm_virtio_pci_vector_use(VirtIOPCIProxy *proxy, int nvqs) if (vector >= msix_nr_vectors_allocated(dev)) { continue; } - msg = msix_get_message(dev, vector); - ret = kvm_virtio_pci_vq_vector_use(proxy, queue_no, vector, msg); + ret = kvm_virtio_pci_vq_vector_use(proxy, queue_no, vector); if (ret < 0) { goto undo; } diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h index ad6f837bb4..e5d90bdf18 100644 --- a/include/sysemu/kvm.h +++ b/include/sysemu/kvm.h @@ -474,7 +474,18 @@ static inline void cpu_synchronize_post_init(CPUState *cpu) } } -int kvm_irqchip_add_msi_route(KVMState *s, MSIMessage msg, PCIDevice *dev); +/** + * kvm_irqchip_add_msi_route - Add MSI route for specific vector + * @s: KVM state + * @vector: which vector to add. This can be either MSI/MSIX + * vector. The function will automatically detect whether + * MSI/MSIX is enabled, and fetch corresponding MSI + * message. + * @dev: Owner PCI device to add the route. If @dev is specified + * as @NULL, an empty MSI message will be inited. + * @return: virq (>=0) when success, errno (<0) when failed. + */ +int kvm_irqchip_add_msi_route(KVMState *s, int vector, PCIDevice *dev); int kvm_irqchip_update_msi_route(KVMState *s, int virq, MSIMessage msg, PCIDevice *dev); void kvm_irqchip_release_virq(KVMState *s, int virq); diff --git a/kvm-all.c b/kvm-all.c index a88f917fda..d94c0e4855 100644 --- a/kvm-all.c +++ b/kvm-all.c @@ -25,6 +25,7 @@ #include "qemu/error-report.h" #include "hw/hw.h" #include "hw/pci/msi.h" +#include "hw/pci/msix.h" #include "hw/s390x/adapter.h" #include "exec/gdbstub.h" #include "sysemu/kvm_int.h" @@ -1237,10 +1238,23 @@ int kvm_irqchip_send_msi(KVMState *s, MSIMessage msg) return kvm_set_irq(s, route->kroute.gsi, 1); } -int kvm_irqchip_add_msi_route(KVMState *s, MSIMessage msg, PCIDevice *dev) +int kvm_irqchip_add_msi_route(KVMState *s, int vector, PCIDevice *dev) { struct kvm_irq_routing_entry kroute = {}; int virq; + MSIMessage msg = {0, 0}; + + if (dev) { + if (msix_enabled(dev)) { + msg = msix_get_message(dev, vector); + } else if (msi_enabled(dev)) { + msg = msi_get_message(dev, vector); + } else { + /* Should never happen */ + error_report("%s: unknown interrupt type", __func__); + abort(); + } + } if (kvm_gsi_direct_mapping()) { return kvm_arch_msi_data_to_gsi(msg.data); @@ -1390,7 +1404,7 @@ int kvm_irqchip_send_msi(KVMState *s, MSIMessage msg) abort(); } -int kvm_irqchip_add_msi_route(KVMState *s, MSIMessage msg) +int kvm_irqchip_add_msi_route(KVMState *s, int vector, PCIDevice *dev) { return -ENOSYS; } diff --git a/kvm-stub.c b/kvm-stub.c index 07c09d1141..982e5900b7 100644 --- a/kvm-stub.c +++ b/kvm-stub.c @@ -116,7 +116,7 @@ int kvm_on_sigbus(int code, void *addr) } #ifndef CONFIG_USER_ONLY -int kvm_irqchip_add_msi_route(KVMState *s, MSIMessage msg, PCIDevice *dev) +int kvm_irqchip_add_msi_route(KVMState *s, int vector, PCIDevice *dev) { return -ENOSYS; } diff --git a/target-i386/kvm.c b/target-i386/kvm.c index 9c00e48952..f5745135f2 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -3202,8 +3202,7 @@ void kvm_arch_init_irq_routing(KVMState *s) /* If the ioapic is in QEMU and the lapics are in KVM, reserve MSI routes for signaling interrupts to the local apics. */ for (i = 0; i < IOAPIC_NUM_PINS; i++) { - struct MSIMessage msg = { 0x0, 0x0 }; - if (kvm_irqchip_add_msi_route(s, msg, NULL) < 0) { + if (kvm_irqchip_add_msi_route(s, 0, NULL) < 0) { error_report("Could not enable split IRQ mode."); exit(1); } -- cgit v1.2.1 From 38d87493f37e5dc442c7419c5843fcf60bb39d63 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Thu, 14 Jul 2016 13:56:31 +0800 Subject: kvm-irqchip: i386: add hook for add/remove virq Adding two hooks to be notified when adding/removing msi routes. There are two kinds of MSI routes: - in kvm_irqchip_add_irq_route(): before assigning IRQFD. Used by vhost, vfio, etc. - in kvm_irqchip_send_msi(): when sending direct MSI message, if direct MSI not allowed, we will first create one MSI route entry in the kernel, then trigger it. This patch only hooks the first one (irqfd case). We do not need to take care for the 2nd one, since it's only used by QEMU userspace (kvm-apic) and the messages will always do in-time translation when triggered. While we need to note them down for the 1st one, so that we can notify the kernel when cache invalidation happens. Also, we do not hook IOAPIC msi routes (we have explicit notifier for IOAPIC to keep its cache updated). We only need to care about irqfd users. Signed-off-by: Peter Xu Reviewed-by: Paolo Bonzini Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- include/sysemu/kvm.h | 6 ++++++ kvm-all.c | 2 ++ target-arm/kvm.c | 11 +++++++++++ target-i386/kvm.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ target-i386/trace-events | 2 ++ target-mips/kvm.c | 11 +++++++++++ target-ppc/kvm.c | 11 +++++++++++ target-s390x/kvm.c | 11 +++++++++++ 8 files changed, 102 insertions(+) diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h index e5d90bdf18..0a16e0eb10 100644 --- a/include/sysemu/kvm.h +++ b/include/sysemu/kvm.h @@ -359,6 +359,12 @@ void kvm_arch_init_irq_routing(KVMState *s); int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route, uint64_t address, uint32_t data, PCIDevice *dev); +/* Notify arch about newly added MSI routes */ +int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route, + int vector, PCIDevice *dev); +/* Notify arch about released MSI routes */ +int kvm_arch_release_virq_post(int virq); + int kvm_arch_msi_data_to_gsi(uint32_t data); int kvm_set_irq(KVMState *s, int irq, int level); diff --git a/kvm-all.c b/kvm-all.c index d94c0e4855..69ff658f4d 100644 --- a/kvm-all.c +++ b/kvm-all.c @@ -1133,6 +1133,7 @@ void kvm_irqchip_release_virq(KVMState *s, int virq) } } clear_gsi(s, virq); + kvm_arch_release_virq_post(virq); } static unsigned int kvm_hash_msi(uint32_t data) @@ -1281,6 +1282,7 @@ int kvm_irqchip_add_msi_route(KVMState *s, int vector, PCIDevice *dev) } kvm_add_routing_entry(s, &kroute); + kvm_arch_add_msi_route_post(&kroute, vector, dev); kvm_irqchip_commit_routes(s); return virq; diff --git a/target-arm/kvm.c b/target-arm/kvm.c index 5c2bd7a10b..dbe393c109 100644 --- a/target-arm/kvm.c +++ b/target-arm/kvm.c @@ -622,6 +622,17 @@ int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route, return 0; } +int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route, + int vector, PCIDevice *dev) +{ + return 0; +} + +int kvm_arch_release_virq_post(int virq) +{ + return 0; +} + int kvm_arch_msi_data_to_gsi(uint32_t data) { return (data - 32) & 0xffff; diff --git a/target-i386/kvm.c b/target-i386/kvm.c index f5745135f2..8875034500 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -3400,6 +3400,54 @@ int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route, return 0; } +typedef struct MSIRouteEntry MSIRouteEntry; + +struct MSIRouteEntry { + PCIDevice *dev; /* Device pointer */ + int vector; /* MSI/MSIX vector index */ + int virq; /* Virtual IRQ index */ + QLIST_ENTRY(MSIRouteEntry) list; +}; + +/* List of used GSI routes */ +static QLIST_HEAD(, MSIRouteEntry) msi_route_list = \ + QLIST_HEAD_INITIALIZER(msi_route_list); + +int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route, + int vector, PCIDevice *dev) +{ + MSIRouteEntry *entry; + + if (!dev) { + /* These are (possibly) IOAPIC routes only used for split + * kernel irqchip mode, while what we are housekeeping are + * PCI devices only. */ + return 0; + } + + entry = g_new0(MSIRouteEntry, 1); + entry->dev = dev; + entry->vector = vector; + entry->virq = route->gsi; + QLIST_INSERT_HEAD(&msi_route_list, entry, list); + + trace_kvm_x86_add_msi_route(route->gsi); + return 0; +} + +int kvm_arch_release_virq_post(int virq) +{ + MSIRouteEntry *entry, *next; + QLIST_FOREACH_SAFE(entry, &msi_route_list, list, next) { + if (entry->virq == virq) { + trace_kvm_x86_remove_msi_route(virq); + QLIST_REMOVE(entry, list); + break; + } + } + return 0; +} + int kvm_arch_msi_data_to_gsi(uint32_t data) { abort(); diff --git a/target-i386/trace-events b/target-i386/trace-events index 2113075cd1..818058c4ac 100644 --- a/target-i386/trace-events +++ b/target-i386/trace-events @@ -2,3 +2,5 @@ # target-i386/kvm.c kvm_x86_fixup_msi_error(uint32_t gsi) "VT-d failed to remap interrupt for GSI %" PRIu32 +kvm_x86_add_msi_route(int virq) "Adding route entry for virq %d" +kvm_x86_remove_msi_route(int virq) "Removing route entry for virq %d" diff --git a/target-mips/kvm.c b/target-mips/kvm.c index f3f832d498..dcf5fbba0c 100644 --- a/target-mips/kvm.c +++ b/target-mips/kvm.c @@ -1043,6 +1043,17 @@ int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route, return 0; } +int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route, + int vector, PCIDevice *dev) +{ + return 0; +} + +int kvm_arch_release_virq_post(int virq) +{ + return 0; +} + int kvm_arch_msi_data_to_gsi(uint32_t data) { abort(); diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c index 7a8f5559d9..91e6daf4fd 100644 --- a/target-ppc/kvm.c +++ b/target-ppc/kvm.c @@ -2621,6 +2621,17 @@ int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route, return 0; } +int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route, + int vector, PCIDevice *dev) +{ + return 0; +} + +int kvm_arch_release_virq_post(int virq) +{ + return 0; +} + int kvm_arch_msi_data_to_gsi(uint32_t data) { return data & 0xffff; diff --git a/target-s390x/kvm.c b/target-s390x/kvm.c index 2991bff6a4..80ac6215fb 100644 --- a/target-s390x/kvm.c +++ b/target-s390x/kvm.c @@ -2267,6 +2267,17 @@ int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route, return 0; } +int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route, + int vector, PCIDevice *dev) +{ + return 0; +} + +int kvm_arch_release_virq_post(int virq) +{ + return 0; +} + int kvm_arch_msi_data_to_gsi(uint32_t data) { abort(); -- cgit v1.2.1 From e1d4fb2de594ab0cbe78846bc79617b9bd50c867 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Thu, 14 Jul 2016 13:56:32 +0800 Subject: kvm-irqchip: x86: add msi route notify fn One more IEC notifier is added to let msi routes know about the IEC changes. When interrupt invalidation happens, all registered msi routes will be updated for all PCI devices. Since both vfio and vhost are possible gsi route consumers, this patch will go one step further to keep them safe in split irqchip mode and when irqfd is enabled. Reviewed-by: Paolo Bonzini [move trace-events lines into target-i386/trace-events] Signed-off-by: Peter Xu Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/pci/pci.c | 15 +++++++++++++++ include/hw/pci/pci.h | 2 ++ kvm-all.c | 10 +--------- target-i386/kvm.c | 30 ++++++++++++++++++++++++++++++ target-i386/trace-events | 1 + 5 files changed, 49 insertions(+), 9 deletions(-) diff --git a/hw/pci/pci.c b/hw/pci/pci.c index 149994b815..728c6d4b3b 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -2596,6 +2596,21 @@ PCIDevice *pci_get_function_0(PCIDevice *pci_dev) } } +MSIMessage pci_get_msi_message(PCIDevice *dev, int vector) +{ + MSIMessage msg; + if (msix_enabled(dev)) { + msg = msix_get_message(dev, vector); + } else if (msi_enabled(dev)) { + msg = msi_get_message(dev, vector); + } else { + /* Should never happen */ + error_report("%s: unknown interrupt type", __func__); + abort(); + } + return msg; +} + static const TypeInfo pci_device_type_info = { .name = TYPE_PCI_DEVICE, .parent = TYPE_DEVICE, diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h index 9ed1624f09..74d797d1cf 100644 --- a/include/hw/pci/pci.h +++ b/include/hw/pci/pci.h @@ -805,4 +805,6 @@ extern const VMStateDescription vmstate_pci_device; .offset = vmstate_offset_pointer(_state, _field, PCIDevice), \ } +MSIMessage pci_get_msi_message(PCIDevice *dev, int vector); + #endif diff --git a/kvm-all.c b/kvm-all.c index 69ff658f4d..ca30a58a11 100644 --- a/kvm-all.c +++ b/kvm-all.c @@ -1246,15 +1246,7 @@ int kvm_irqchip_add_msi_route(KVMState *s, int vector, PCIDevice *dev) MSIMessage msg = {0, 0}; if (dev) { - if (msix_enabled(dev)) { - msg = msix_get_message(dev, vector); - } else if (msi_enabled(dev)) { - msg = msi_get_message(dev, vector); - } else { - /* Should never happen */ - error_report("%s: unknown interrupt type", __func__); - abort(); - } + msg = pci_get_msi_message(dev, vector); } if (kvm_gsi_direct_mapping()) { diff --git a/target-i386/kvm.c b/target-i386/kvm.c index 8875034500..61f57f9f20 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -36,6 +36,7 @@ #include "hw/i386/apic_internal.h" #include "hw/i386/apic-msidef.h" #include "hw/i386/intel_iommu.h" +#include "hw/i386/x86-iommu.h" #include "exec/ioport.h" #include "standard-headers/asm-x86/hyperv.h" @@ -3413,9 +3414,26 @@ struct MSIRouteEntry { static QLIST_HEAD(, MSIRouteEntry) msi_route_list = \ QLIST_HEAD_INITIALIZER(msi_route_list); +static void kvm_update_msi_routes_all(void *private, bool global, + uint32_t index, uint32_t mask) +{ + int cnt = 0; + MSIRouteEntry *entry; + MSIMessage msg; + /* TODO: explicit route update */ + QLIST_FOREACH(entry, &msi_route_list, list) { + cnt++; + msg = pci_get_msi_message(entry->dev, entry->vector); + kvm_irqchip_update_msi_route(kvm_state, entry->virq, + msg, entry->dev); + } + trace_kvm_x86_update_msi_routes(cnt); +} + int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route, int vector, PCIDevice *dev) { + static bool notify_list_inited = false; MSIRouteEntry *entry; if (!dev) { @@ -3432,6 +3450,18 @@ int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route, QLIST_INSERT_HEAD(&msi_route_list, entry, list); trace_kvm_x86_add_msi_route(route->gsi); + + if (!notify_list_inited) { + /* For the first time we do add route, add ourselves into + * IOMMU's IEC notify list if needed. */ + X86IOMMUState *iommu = x86_iommu_get_default(); + if (iommu) { + x86_iommu_iec_register_notifier(iommu, + kvm_update_msi_routes_all, + NULL); + } + notify_list_inited = true; + } return 0; } diff --git a/target-i386/trace-events b/target-i386/trace-events index 818058c4ac..ccc49e31e8 100644 --- a/target-i386/trace-events +++ b/target-i386/trace-events @@ -4,3 +4,4 @@ kvm_x86_fixup_msi_error(uint32_t gsi) "VT-d failed to remap interrupt for GSI %" PRIu32 kvm_x86_add_msi_route(int virq) "Adding route entry for virq %d" kvm_x86_remove_msi_route(int virq) "Removing route entry for virq %d" +kvm_x86_update_msi_routes(int num) "Updated %d MSI routes" -- cgit v1.2.1 From 3f1fea0fb5bfd78ede2ceae5dae0b24c0380f423 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Thu, 14 Jul 2016 13:56:33 +0800 Subject: kvm-irqchip: do explicit commit when update irq In the past, we are doing gsi route commit for each irqchip route update. This is not efficient if we are updating lots of routes in the same time. This patch removes the committing phase in kvm_irqchip_update_msi_route(). Instead, we do explicit commit after all routes updated. Signed-off-by: Peter Xu Reviewed-by: Paolo Bonzini Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/kvm/pci-assign.c | 2 ++ hw/misc/ivshmem.c | 1 + hw/vfio/pci.c | 1 + hw/virtio/virtio-pci.c | 1 + include/sysemu/kvm.h | 2 +- kvm-all.c | 2 -- kvm-stub.c | 4 ++++ target-i386/kvm.c | 1 + 8 files changed, 11 insertions(+), 3 deletions(-) diff --git a/hw/i386/kvm/pci-assign.c b/hw/i386/kvm/pci-assign.c index 334dfc4433..8238fbc630 100644 --- a/hw/i386/kvm/pci-assign.c +++ b/hw/i386/kvm/pci-assign.c @@ -1015,6 +1015,7 @@ static void assigned_dev_update_msi_msg(PCIDevice *pci_dev) kvm_irqchip_update_msi_route(kvm_state, assigned_dev->msi_virq[0], msi_get_message(pci_dev, 0), pci_dev); + kvm_irqchip_commit_routes(kvm_state); } static bool assigned_dev_msix_masked(MSIXTableEntry *entry) @@ -1602,6 +1603,7 @@ static void assigned_dev_msix_mmio_write(void *opaque, hwaddr addr, if (ret) { error_report("Error updating irq routing entry (%d)", ret); } + kvm_irqchip_commit_routes(kvm_state); } } } diff --git a/hw/misc/ivshmem.c b/hw/misc/ivshmem.c index 023da84004..40a2ebca20 100644 --- a/hw/misc/ivshmem.c +++ b/hw/misc/ivshmem.c @@ -322,6 +322,7 @@ static int ivshmem_vector_unmask(PCIDevice *dev, unsigned vector, if (ret < 0) { return ret; } + kvm_irqchip_commit_routes(kvm_state); return kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, n, NULL, v->virq); } diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index 87a6f05c65..7bfa17ce38 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -458,6 +458,7 @@ static void vfio_update_kvm_msi_virq(VFIOMSIVector *vector, MSIMessage msg, PCIDevice *pdev) { kvm_irqchip_update_msi_route(kvm_state, vector->virq, msg, pdev); + kvm_irqchip_commit_routes(kvm_state); } static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr, diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index cbdfd59071..f0677b73d8 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -842,6 +842,7 @@ static int virtio_pci_vq_vector_unmask(VirtIOPCIProxy *proxy, if (ret < 0) { return ret; } + kvm_irqchip_commit_routes(kvm_state); } } diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h index 0a16e0eb10..c9c243631e 100644 --- a/include/sysemu/kvm.h +++ b/include/sysemu/kvm.h @@ -371,7 +371,6 @@ int kvm_set_irq(KVMState *s, int irq, int level); int kvm_irqchip_send_msi(KVMState *s, MSIMessage msg); void kvm_irqchip_add_irq_route(KVMState *s, int gsi, int irqchip, int pin); -void kvm_irqchip_commit_routes(KVMState *s); void kvm_put_apic_state(DeviceState *d, struct kvm_lapic_state *kapic); void kvm_get_apic_state(DeviceState *d, struct kvm_lapic_state *kapic); @@ -494,6 +493,7 @@ static inline void cpu_synchronize_post_init(CPUState *cpu) int kvm_irqchip_add_msi_route(KVMState *s, int vector, PCIDevice *dev); int kvm_irqchip_update_msi_route(KVMState *s, int virq, MSIMessage msg, PCIDevice *dev); +void kvm_irqchip_commit_routes(KVMState *s); void kvm_irqchip_release_virq(KVMState *s, int virq); int kvm_irqchip_add_adapter_route(KVMState *s, AdapterInfo *adapter); diff --git a/kvm-all.c b/kvm-all.c index ca30a58a11..3764ba9743 100644 --- a/kvm-all.c +++ b/kvm-all.c @@ -1094,8 +1094,6 @@ static int kvm_update_routing_entry(KVMState *s, *entry = *new_entry; - kvm_irqchip_commit_routes(s); - return 0; } diff --git a/kvm-stub.c b/kvm-stub.c index 982e5900b7..64e23f6be0 100644 --- a/kvm-stub.c +++ b/kvm-stub.c @@ -135,6 +135,10 @@ int kvm_irqchip_update_msi_route(KVMState *s, int virq, MSIMessage msg, return -ENOSYS; } +void kvm_irqchip_commit_routes(KVMState *s) +{ +} + int kvm_irqchip_add_adapter_route(KVMState *s, AdapterInfo *adapter) { return -ENOSYS; diff --git a/target-i386/kvm.c b/target-i386/kvm.c index 61f57f9f20..0a09be656e 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -3427,6 +3427,7 @@ static void kvm_update_msi_routes_all(void *private, bool global, kvm_irqchip_update_msi_route(kvm_state, entry->virq, msg, entry->dev); } + kvm_irqchip_commit_routes(kvm_state); trace_kvm_x86_update_msi_routes(cnt); } -- cgit v1.2.1 From a3f409cb4a35d9aa6a4d24a7a1e05423e189cb7a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= Date: Thu, 14 Jul 2016 13:56:34 +0800 Subject: intel_iommu: support all masks in interrupt entry cache invalidation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Linux guests do not gracefully handle cases when the invalidation mask they wanted is not supported, probably because real hardware always allowed all. We can just say that all 16 masks are supported, because both ioapic_iec_notifier and kvm_update_msi_routes_all invalidate all caches. Signed-off-by: Radim Krčmář Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/intel_iommu.c | 2 +- hw/i386/intel_iommu_internal.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index 5a97548be5..df2678b214 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -2362,7 +2362,7 @@ static void vtd_init(IntelIOMMUState *s) s->ecap = VTD_ECAP_QI | VTD_ECAP_IRO; if (x86_iommu->intr_supported) { - s->ecap |= VTD_ECAP_IR | VTD_ECAP_EIM; + s->ecap |= VTD_ECAP_IR | VTD_ECAP_EIM | VTD_ECAP_MHMV; } vtd_reset_context_cache(s); diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h index 72b0114927..0829a5064f 100644 --- a/hw/i386/intel_iommu_internal.h +++ b/hw/i386/intel_iommu_internal.h @@ -186,6 +186,7 @@ /* Interrupt Remapping support */ #define VTD_ECAP_IR (1ULL << 3) #define VTD_ECAP_EIM (1ULL << 4) +#define VTD_ECAP_MHMV (15ULL << 20) /* CAP_REG */ /* (offset >> 4) << 24 */ -- cgit v1.2.1 From 54a6c11b20bb635ac5bb5d9369782bf00d0c7e19 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Thu, 14 Jul 2016 13:56:35 +0800 Subject: kvm-all: add trace events for kvm irqchip ops These will help us monitoring irqchip route activities more easily. Signed-off-by: Peter Xu Reviewed-by: Paolo Bonzini Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- kvm-all.c | 5 +++++ trace-events | 3 +++ 2 files changed, 8 insertions(+) diff --git a/kvm-all.c b/kvm-all.c index 3764ba9743..ef81ca532a 100644 --- a/kvm-all.c +++ b/kvm-all.c @@ -1048,6 +1048,7 @@ void kvm_irqchip_commit_routes(KVMState *s) int ret; s->irq_routes->flags = 0; + trace_kvm_irqchip_commit_routes(); ret = kvm_vm_ioctl(s, KVM_SET_GSI_ROUTING, s->irq_routes); assert(ret == 0); } @@ -1271,6 +1272,8 @@ int kvm_irqchip_add_msi_route(KVMState *s, int vector, PCIDevice *dev) return -EINVAL; } + trace_kvm_irqchip_add_msi_route(virq); + kvm_add_routing_entry(s, &kroute); kvm_arch_add_msi_route_post(&kroute, vector, dev); kvm_irqchip_commit_routes(s); @@ -1301,6 +1304,8 @@ int kvm_irqchip_update_msi_route(KVMState *s, int virq, MSIMessage msg, return -EINVAL; } + trace_kvm_irqchip_update_msi_route(virq); + return kvm_update_routing_entry(s, &kroute); } diff --git a/trace-events b/trace-events index 476705996b..52c6a6cccf 100644 --- a/trace-events +++ b/trace-events @@ -118,6 +118,9 @@ kvm_run_exit(int cpu_index, uint32_t reason) "cpu_index %d, reason %d" kvm_device_ioctl(int fd, int type, void *arg) "dev fd %d, type 0x%x, arg %p" kvm_failed_reg_get(uint64_t id, const char *msg) "Warning: Unable to retrieve ONEREG %" PRIu64 " from KVM: %s" kvm_failed_reg_set(uint64_t id, const char *msg) "Warning: Unable to set ONEREG %" PRIu64 " to KVM: %s" +kvm_irqchip_commit_routes(void) "" +kvm_irqchip_add_msi_route(int virq) "Adding MSI route virq=%d" +kvm_irqchip_update_msi_route(int virq) "Updating MSI route virq=%d" # TCG related tracing (mostly disabled by default) # cpu-exec.c -- cgit v1.2.1 From 4684a2041005b598ecc92fbaf36463d2fd12b5de Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Thu, 14 Jul 2016 13:56:36 +0800 Subject: intel_iommu: disallow kernel-irqchip=on with IR When user specify "intremap=on" with "-M kernel-irqchip=on", throw error and then quit. Signed-off-by: Peter Xu Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/intel_iommu.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index df2678b214..0e139d1945 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -20,6 +20,7 @@ */ #include "qemu/osdep.h" +#include "qemu/error-report.h" #include "hw/sysbus.h" #include "exec/address-spaces.h" #include "intel_iommu_internal.h" @@ -29,6 +30,7 @@ #include "hw/boards.h" #include "hw/i386/x86-iommu.h" #include "hw/pci-host/q35.h" +#include "sysemu/kvm.h" /*#define DEBUG_INTEL_IOMMU*/ #ifdef DEBUG_INTEL_IOMMU @@ -2448,6 +2450,7 @@ static void vtd_realize(DeviceState *dev, Error **errp) PCMachineState *pcms = PC_MACHINE(qdev_get_machine()); PCIBus *bus = pcms->bus; IntelIOMMUState *s = INTEL_IOMMU_DEVICE(dev); + X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(dev); VTD_DPRINTF(GENERAL, ""); memset(s->vtd_as_by_bus_num, 0, sizeof(s->vtd_as_by_bus_num)); @@ -2464,6 +2467,14 @@ static void vtd_realize(DeviceState *dev, Error **errp) pci_setup_iommu(bus, vtd_host_dma_iommu, dev); /* Pseudo address space under root PCI bus. */ pcms->ioapic_as = vtd_host_dma_iommu(bus, s, Q35_PSEUDO_DEVFN_IOAPIC); + + /* Currently Intel IOMMU IR only support "kernel-irqchip={off|split}" */ + if (x86_iommu->intr_supported && kvm_irqchip_in_kernel() && + !kvm_irqchip_is_split()) { + error_report("Intel Interrupt Remapping cannot work with " + "kernel-irqchip=on, please use 'split|off'."); + exit(1); + } } static void vtd_class_init(ObjectClass *klass, void *data) -- cgit v1.2.1 From bf1780b0d57af0ef3c14a036bc6be1e509dd72fc Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Wed, 13 Jul 2016 13:09:43 +0800 Subject: virtio: Add typedef for handle_output The function pointer signature has been repeated a few times, using a typedef may make coding easier. Signed-off-by: Fam Zheng Reviewed-by: Stefan Hajnoczi Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Cornelia Huck Reviewed-by: Stefan Hajnoczi Acked-by: Paolo Bonzini --- hw/virtio/virtio.c | 9 ++++----- include/hw/virtio/virtio.h | 5 +++-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 18153d5a39..2cc68d2465 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -95,8 +95,8 @@ struct VirtQueue int inuse; uint16_t vector; - void (*handle_output)(VirtIODevice *vdev, VirtQueue *vq); - void (*handle_aio_output)(VirtIODevice *vdev, VirtQueue *vq); + VirtIOHandleOutput handle_output; + VirtIOHandleOutput handle_aio_output; VirtIODevice *vdev; EventNotifier guest_notifier; EventNotifier host_notifier; @@ -1131,7 +1131,7 @@ void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector) } VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size, - void (*handle_output)(VirtIODevice *, VirtQueue *)) + VirtIOHandleOutput handle_output) { int i; @@ -1804,8 +1804,7 @@ static void virtio_queue_host_notifier_aio_read(EventNotifier *n) } void virtio_queue_aio_set_host_notifier_handler(VirtQueue *vq, AioContext *ctx, - void (*handle_output)(VirtIODevice *, - VirtQueue *)) + VirtIOHandleOutput handle_output) { if (handle_output) { vq->handle_aio_output = handle_output; diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h index 8a681f56f1..3670829501 100644 --- a/include/hw/virtio/virtio.h +++ b/include/hw/virtio/virtio.h @@ -138,9 +138,10 @@ void virtio_cleanup(VirtIODevice *vdev); /* Set the child bus name. */ void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name); +typedef void (*VirtIOHandleOutput)(VirtIODevice *, VirtQueue *); + VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size, - void (*handle_output)(VirtIODevice *, - VirtQueue *)); + VirtIOHandleOutput handle_output); void virtio_del_queue(VirtIODevice *vdev, int n); -- cgit v1.2.1 From 872dd82c83745a603d2e07a03d34313eb6467ae4 Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Wed, 13 Jul 2016 13:09:44 +0800 Subject: virtio: Introduce virtio_add_queue_aio Using this function instead of virtio_add_queue marks the vq as aio based. This differentiation will be useful in later patches. Distinguish between virtqueue processing in the iohandler context and main loop AioContext. iohandler context is isolated from AioContexts and therefore does not run during aio_poll(). Signed-off-by: Fam Zheng Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Cornelia Huck Reviewed-by: Stefan Hajnoczi Acked-by: Paolo Bonzini --- hw/virtio/virtio.c | 38 ++++++++++++++++++++++++++++++++++---- include/hw/virtio/virtio.h | 3 +++ 2 files changed, 37 insertions(+), 4 deletions(-) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 2cc68d2465..2fbed0c749 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -97,6 +97,7 @@ struct VirtQueue uint16_t vector; VirtIOHandleOutput handle_output; VirtIOHandleOutput handle_aio_output; + bool use_aio; VirtIODevice *vdev; EventNotifier guest_notifier; EventNotifier host_notifier; @@ -1130,8 +1131,9 @@ void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector) } } -VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size, - VirtIOHandleOutput handle_output) +static VirtQueue *virtio_add_queue_internal(VirtIODevice *vdev, int queue_size, + VirtIOHandleOutput handle_output, + bool use_aio) { int i; @@ -1148,10 +1150,28 @@ VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size, vdev->vq[i].vring.align = VIRTIO_PCI_VRING_ALIGN; vdev->vq[i].handle_output = handle_output; vdev->vq[i].handle_aio_output = NULL; + vdev->vq[i].use_aio = use_aio; return &vdev->vq[i]; } +/* Add a virt queue and mark AIO. + * An AIO queue will use the AioContext based event interface instead of the + * default IOHandler and EventNotifier interface. + */ +VirtQueue *virtio_add_queue_aio(VirtIODevice *vdev, int queue_size, + VirtIOHandleOutput handle_output) +{ + return virtio_add_queue_internal(vdev, queue_size, handle_output, true); +} + +/* Add a normal virt queue (on the contrary to the AIO version above. */ +VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size, + VirtIOHandleOutput handle_output) +{ + return virtio_add_queue_internal(vdev, queue_size, handle_output, false); +} + void virtio_del_queue(VirtIODevice *vdev, int n) { if (n < 0 || n >= VIRTIO_QUEUE_MAX) { @@ -1830,11 +1850,21 @@ static void virtio_queue_host_notifier_read(EventNotifier *n) void virtio_queue_set_host_notifier_fd_handler(VirtQueue *vq, bool assign, bool set_handler) { + AioContext *ctx = qemu_get_aio_context(); if (assign && set_handler) { - event_notifier_set_handler(&vq->host_notifier, true, + if (vq->use_aio) { + aio_set_event_notifier(ctx, &vq->host_notifier, true, virtio_queue_host_notifier_read); + } else { + event_notifier_set_handler(&vq->host_notifier, true, + virtio_queue_host_notifier_read); + } } else { - event_notifier_set_handler(&vq->host_notifier, true, NULL); + if (vq->use_aio) { + aio_set_event_notifier(ctx, &vq->host_notifier, true, NULL); + } else { + event_notifier_set_handler(&vq->host_notifier, true, NULL); + } } if (!assign) { /* Test and clear notifier before after disabling event, diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h index 3670829501..7a82f79a2a 100644 --- a/include/hw/virtio/virtio.h +++ b/include/hw/virtio/virtio.h @@ -143,6 +143,9 @@ typedef void (*VirtIOHandleOutput)(VirtIODevice *, VirtQueue *); VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size, VirtIOHandleOutput handle_output); +VirtQueue *virtio_add_queue_aio(VirtIODevice *vdev, int queue_size, + VirtIOHandleOutput handle_output); + void virtio_del_queue(VirtIODevice *vdev, int n); void *virtqueue_alloc_element(size_t sz, unsigned out_num, unsigned in_num); -- cgit v1.2.1 From 0ff841f6d138904d514efa1d885bcaf54583852d Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Wed, 13 Jul 2016 13:09:45 +0800 Subject: virtio-blk: Call virtio_add_queue_aio AIO based handler is more appropriate here because it will then cooperate with bdrv_drained_begin/end. It is needed by the coming revert patch. Signed-off-by: Fam Zheng Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Cornelia Huck Reviewed-by: Stefan Hajnoczi Acked-by: Paolo Bonzini --- hw/block/virtio-blk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c index 357ff9081e..728b27802c 100644 --- a/hw/block/virtio-blk.c +++ b/hw/block/virtio-blk.c @@ -914,7 +914,7 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp) s->sector_mask = (s->conf.conf.logical_block_size / BDRV_SECTOR_SIZE) - 1; for (i = 0; i < conf->num_queues; i++) { - virtio_add_queue(vdev, 128, virtio_blk_handle_output); + virtio_add_queue_aio(vdev, 128, virtio_blk_handle_output); } virtio_blk_data_plane_create(vdev, conf, &s->dataplane, &err); if (err != NULL) { -- cgit v1.2.1 From 1c627137c10ee2dcf59e0383ade8a9abfa2d4355 Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Wed, 13 Jul 2016 13:09:46 +0800 Subject: virtio-scsi: Call virtio_add_queue_aio AIO based handler is more appropriate here because it will then cooperate with bdrv_drained_begin/end. It is needed by the coming revert patch. Signed-off-by: Fam Zheng Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Cornelia Huck Reviewed-by: Stefan Hajnoczi Acked-by: Paolo Bonzini --- hw/scsi/virtio-scsi.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c index 722c93e5fc..45e2ee8a63 100644 --- a/hw/scsi/virtio-scsi.c +++ b/hw/scsi/virtio-scsi.c @@ -846,13 +846,10 @@ void virtio_scsi_common_realize(DeviceState *dev, Error **errp, s->sense_size = VIRTIO_SCSI_SENSE_DEFAULT_SIZE; s->cdb_size = VIRTIO_SCSI_CDB_DEFAULT_SIZE; - s->ctrl_vq = virtio_add_queue(vdev, VIRTIO_SCSI_VQ_SIZE, - ctrl); - s->event_vq = virtio_add_queue(vdev, VIRTIO_SCSI_VQ_SIZE, - evt); + s->ctrl_vq = virtio_add_queue_aio(vdev, VIRTIO_SCSI_VQ_SIZE, ctrl); + s->event_vq = virtio_add_queue_aio(vdev, VIRTIO_SCSI_VQ_SIZE, evt); for (i = 0; i < s->conf.num_queues; i++) { - s->cmd_vqs[i] = virtio_add_queue(vdev, VIRTIO_SCSI_VQ_SIZE, - cmd); + s->cmd_vqs[i] = virtio_add_queue_aio(vdev, VIRTIO_SCSI_VQ_SIZE, cmd); } if (s->conf.iothread) { -- cgit v1.2.1 From d4a92a8420ac764f21652322aa7d4e49cfcbc607 Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Wed, 13 Jul 2016 13:09:47 +0800 Subject: Revert "mirror: Workaround for unexpected iohandler events during completion" This reverts commit ab27c3b5e7408693dde0b565f050aa55c4a1bcef. The virtio storage device host notifiers now work with bdrv_drained_begin/end, so we don't need this hack any more. Signed-off-by: Fam Zheng Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Cornelia Huck Reviewed-by: Stefan Hajnoczi Acked-by: Paolo Bonzini --- block/mirror.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/block/mirror.c b/block/mirror.c index b1e633ecad..9ae11e5276 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -508,9 +508,6 @@ static void mirror_exit(BlockJob *job, void *opaque) block_job_completed(&s->common, data->ret); g_free(data); bdrv_drained_end(src); - if (qemu_get_aio_context() == bdrv_get_aio_context(src)) { - aio_enable_external(iohandler_get_aio_context()); - } bdrv_unref(src); } @@ -734,12 +731,6 @@ immediate_exit: /* Before we switch to target in mirror_exit, make sure data doesn't * change. */ bdrv_drained_begin(bs); - if (qemu_get_aio_context() == bdrv_get_aio_context(bs)) { - /* FIXME: virtio host notifiers run on iohandler_ctx, therefore the - * above bdrv_drained_end isn't enough to quiesce it. This is ugly, we - * need a block layer API change to achieve this. */ - aio_disable_external(iohandler_get_aio_context()); - } block_job_defer_to_main_loop(&s->common, mirror_exit, data); } -- cgit v1.2.1 From 209b27bbe9453d3f76eb801fdd63bcbafd405df0 Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Wed, 13 Jul 2016 13:09:48 +0800 Subject: virtio-scsi: Replace HandleOutput typedef There is a new common one in virtio.h, use it. Signed-off-by: Fam Zheng Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Cornelia Huck Reviewed-by: Stefan Hajnoczi Acked-by: Paolo Bonzini --- hw/scsi/virtio-scsi.c | 5 +++-- include/hw/virtio/virtio-scsi.h | 6 ++---- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c index 45e2ee8a63..88d4bf03fb 100644 --- a/hw/scsi/virtio-scsi.c +++ b/hw/scsi/virtio-scsi.c @@ -824,8 +824,9 @@ static struct SCSIBusInfo virtio_scsi_scsi_info = { }; void virtio_scsi_common_realize(DeviceState *dev, Error **errp, - HandleOutput ctrl, HandleOutput evt, - HandleOutput cmd) + VirtIOHandleOutput ctrl, + VirtIOHandleOutput evt, + VirtIOHandleOutput cmd) { VirtIODevice *vdev = VIRTIO_DEVICE(dev); VirtIOSCSICommon *s = VIRTIO_SCSI_COMMON(dev); diff --git a/include/hw/virtio/virtio-scsi.h b/include/hw/virtio/virtio-scsi.h index 5e3f088f9a..a1e0cfb449 100644 --- a/include/hw/virtio/virtio-scsi.h +++ b/include/hw/virtio/virtio-scsi.h @@ -121,11 +121,9 @@ typedef struct VirtIOSCSIReq { } req; } VirtIOSCSIReq; -typedef void (*HandleOutput)(VirtIODevice *, VirtQueue *); - void virtio_scsi_common_realize(DeviceState *dev, Error **errp, - HandleOutput ctrl, HandleOutput evt, - HandleOutput cmd); + VirtIOHandleOutput ctrl, VirtIOHandleOutput evt, + VirtIOHandleOutput cmd); void virtio_scsi_common_unrealize(DeviceState *dev, Error **errp); void virtio_scsi_handle_event_vq(VirtIOSCSI *s, VirtQueue *vq); -- cgit v1.2.1 From 76010cb320b08d23a9719e692197407ccd293dfc Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Thu, 14 Jul 2016 18:22:43 +0100 Subject: virtio-net: Remove old migration version support virtio-net has had version 11 since 0ce0e8f4 in 2009 (v0.11.0-rc0-1480-g0ce0e8f) - remove the code to support loading anything earlier. Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Amit Shah Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/net/virtio-net.c | 87 +++++++++++++++++++++-------------------------------- 1 file changed, 34 insertions(+), 53 deletions(-) diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index 999989934e..550db30930 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -1543,7 +1543,7 @@ static int virtio_net_load(QEMUFile *f, void *opaque, int version_id) VirtIONet *n = opaque; VirtIODevice *vdev = VIRTIO_DEVICE(n); - if (version_id < 2 || version_id > VIRTIO_NET_VM_VERSION) + if (version_id != VIRTIO_NET_VM_VERSION) return -EINVAL; return virtio_load(vdev, f, version_id); @@ -1562,68 +1562,49 @@ static int virtio_net_load_device(VirtIODevice *vdev, QEMUFile *f, virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)); - if (version_id >= 3) - n->status = qemu_get_be16(f); + n->status = qemu_get_be16(f); - if (version_id >= 4) { - if (version_id < 8) { - n->promisc = qemu_get_be32(f); - n->allmulti = qemu_get_be32(f); - } else { - n->promisc = qemu_get_byte(f); - n->allmulti = qemu_get_byte(f); - } - } + n->promisc = qemu_get_byte(f); + n->allmulti = qemu_get_byte(f); - if (version_id >= 5) { - n->mac_table.in_use = qemu_get_be32(f); - /* MAC_TABLE_ENTRIES may be different from the saved image */ - if (n->mac_table.in_use <= MAC_TABLE_ENTRIES) { - qemu_get_buffer(f, n->mac_table.macs, - n->mac_table.in_use * ETH_ALEN); - } else { - int64_t i; - - /* Overflow detected - can happen if source has a larger MAC table. - * We simply set overflow flag so there's no need to maintain the - * table of addresses, discard them all. - * Note: 64 bit math to avoid integer overflow. - */ - for (i = 0; i < (int64_t)n->mac_table.in_use * ETH_ALEN; ++i) { - qemu_get_byte(f); - } - n->mac_table.multi_overflow = n->mac_table.uni_overflow = 1; - n->mac_table.in_use = 0; + n->mac_table.in_use = qemu_get_be32(f); + /* MAC_TABLE_ENTRIES may be different from the saved image */ + if (n->mac_table.in_use <= MAC_TABLE_ENTRIES) { + qemu_get_buffer(f, n->mac_table.macs, + n->mac_table.in_use * ETH_ALEN); + } else { + int64_t i; + + /* Overflow detected - can happen if source has a larger MAC table. + * We simply set overflow flag so there's no need to maintain the + * table of addresses, discard them all. + * Note: 64 bit math to avoid integer overflow. + */ + for (i = 0; i < (int64_t)n->mac_table.in_use * ETH_ALEN; ++i) { + qemu_get_byte(f); } + n->mac_table.multi_overflow = n->mac_table.uni_overflow = 1; + n->mac_table.in_use = 0; } - if (version_id >= 6) - qemu_get_buffer(f, (uint8_t *)n->vlans, MAX_VLAN >> 3); + qemu_get_buffer(f, (uint8_t *)n->vlans, MAX_VLAN >> 3); - if (version_id >= 7) { - if (qemu_get_be32(f) && !peer_has_vnet_hdr(n)) { - error_report("virtio-net: saved image requires vnet_hdr=on"); - return -1; - } + if (qemu_get_be32(f) && !peer_has_vnet_hdr(n)) { + error_report("virtio-net: saved image requires vnet_hdr=on"); + return -1; } - if (version_id >= 9) { - n->mac_table.multi_overflow = qemu_get_byte(f); - n->mac_table.uni_overflow = qemu_get_byte(f); - } + n->mac_table.multi_overflow = qemu_get_byte(f); + n->mac_table.uni_overflow = qemu_get_byte(f); - if (version_id >= 10) { - n->alluni = qemu_get_byte(f); - n->nomulti = qemu_get_byte(f); - n->nouni = qemu_get_byte(f); - n->nobcast = qemu_get_byte(f); - } + n->alluni = qemu_get_byte(f); + n->nomulti = qemu_get_byte(f); + n->nouni = qemu_get_byte(f); + n->nobcast = qemu_get_byte(f); - if (version_id >= 11) { - if (qemu_get_byte(f) && !peer_has_ufo(n)) { - error_report("virtio-net: saved image requires TUN_F_UFO support"); - return -1; - } + if (qemu_get_byte(f) && !peer_has_ufo(n)) { + error_report("virtio-net: saved image requires TUN_F_UFO support"); + return -1; } if (n->max_queues > 1) { -- cgit v1.2.1 From 71945ae1647a6e5f07ea668ee6330f0d539f59cb Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Thu, 14 Jul 2016 18:22:44 +0100 Subject: virtio-serial: Remove old migration version support virtio-serial-bus has had version 3 since 37f95bf3d0 in 0.13-rc0; it's time to clean it up a bit. Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Amit Shah Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/char/virtio-serial-bus.c | 37 +++++++++++++++---------------------- 1 file changed, 15 insertions(+), 22 deletions(-) diff --git a/hw/char/virtio-serial-bus.c b/hw/char/virtio-serial-bus.c index 6e5de6dec2..490b5ea243 100644 --- a/hw/char/virtio-serial-bus.c +++ b/hw/char/virtio-serial-bus.c @@ -685,7 +685,7 @@ static void virtio_serial_post_load_timer_cb(void *opaque) s->post_load = NULL; } -static int fetch_active_ports_list(QEMUFile *f, int version_id, +static int fetch_active_ports_list(QEMUFile *f, VirtIOSerial *s, uint32_t nr_active_ports) { uint32_t i; @@ -702,6 +702,7 @@ static int fetch_active_ports_list(QEMUFile *f, int version_id, /* Items in struct VirtIOSerialPort */ for (i = 0; i < nr_active_ports; i++) { VirtIOSerialPort *port; + uint32_t elem_popped; uint32_t id; id = qemu_get_be32(f); @@ -714,23 +715,19 @@ static int fetch_active_ports_list(QEMUFile *f, int version_id, s->post_load->connected[i].port = port; s->post_load->connected[i].host_connected = qemu_get_byte(f); - if (version_id > 2) { - uint32_t elem_popped; - - qemu_get_be32s(f, &elem_popped); - if (elem_popped) { - qemu_get_be32s(f, &port->iov_idx); - qemu_get_be64s(f, &port->iov_offset); + qemu_get_be32s(f, &elem_popped); + if (elem_popped) { + qemu_get_be32s(f, &port->iov_idx); + qemu_get_be64s(f, &port->iov_offset); - port->elem = - qemu_get_virtqueue_element(f, sizeof(VirtQueueElement)); + port->elem = + qemu_get_virtqueue_element(f, sizeof(VirtQueueElement)); - /* - * Port was throttled on source machine. Let's - * unthrottle it here so data starts flowing again. - */ - virtio_serial_throttle_port(port, false); - } + /* + * Port was throttled on source machine. Let's + * unthrottle it here so data starts flowing again. + */ + virtio_serial_throttle_port(port, false); } } timer_mod(s->post_load->timer, 1); @@ -739,7 +736,7 @@ static int fetch_active_ports_list(QEMUFile *f, int version_id, static int virtio_serial_load(QEMUFile *f, void *opaque, int version_id) { - if (version_id > 3) { + if (version_id != 3) { return -EINVAL; } @@ -756,10 +753,6 @@ static int virtio_serial_load_device(VirtIODevice *vdev, QEMUFile *f, int ret; uint32_t tmp; - if (version_id < 2) { - return 0; - } - /* Unused */ qemu_get_be16s(f, (uint16_t *) &tmp); qemu_get_be16s(f, (uint16_t *) &tmp); @@ -781,7 +774,7 @@ static int virtio_serial_load_device(VirtIODevice *vdev, QEMUFile *f, qemu_get_be32s(f, &nr_active_ports); if (nr_active_ports) { - ret = fetch_active_ports_list(f, version_id, s, nr_active_ports); + ret = fetch_active_ports_list(f, s, nr_active_ports); if (ret) { return ret; } -- cgit v1.2.1 From 5943124cc0fe37b3a7541a628cf3ffefd8b3414a Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Thu, 14 Jul 2016 18:22:45 +0100 Subject: virtio: Migration helper function and macro To make conversion of virtio devices to VMState simple at first add a helper function for the simple virtio_save case and a helper macro that defines the VMState structure. These will probably go away or change as more of the virtio code gets converted. Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Cornelia Huck Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/virtio.c | 6 ++++++ include/hw/virtio/virtio.h | 20 ++++++++++++++++++++ 2 files changed, 26 insertions(+) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 2fbed0c749..752b2715d0 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -1464,6 +1464,12 @@ void virtio_save(VirtIODevice *vdev, QEMUFile *f) vmstate_save_state(f, &vmstate_virtio, vdev, NULL); } +/* A wrapper for use as a VMState .put function */ +void virtio_vmstate_save(QEMUFile *f, void *opaque, size_t size) +{ + virtio_save(VIRTIO_DEVICE(opaque), f); +} + static int virtio_set_features_nocheck(VirtIODevice *vdev, uint64_t val) { VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h index 7a82f79a2a..d2490c1975 100644 --- a/include/hw/virtio/virtio.h +++ b/include/hw/virtio/virtio.h @@ -171,6 +171,26 @@ bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq); void virtio_notify(VirtIODevice *vdev, VirtQueue *vq); void virtio_save(VirtIODevice *vdev, QEMUFile *f); +void virtio_vmstate_save(QEMUFile *f, void *opaque, size_t size); + +#define VMSTATE_VIRTIO_DEVICE(devname, v, getf, putf) \ + static const VMStateDescription vmstate_virtio_ ## devname = { \ + .name = "virtio-" #devname , \ + .minimum_version_id = v, \ + .version_id = v, \ + .fields = (VMStateField[]) { \ + { \ + .name = "virtio", \ + .info = &(const VMStateInfo) {\ + .name = "virtio", \ + .get = getf, \ + .put = putf, \ + }, \ + .flags = VMS_SINGLE, \ + }, \ + VMSTATE_END_OF_LIST() \ + } \ + } int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id); -- cgit v1.2.1 From 5a289a28836915e90916cea4c585689df2a43163 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Thu, 14 Jul 2016 18:22:46 +0100 Subject: virtio-scsi: Wrap in vmstate Forcibly convert it to a vmstate wrapper; proper conversion comes later. Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Cornelia Huck Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/scsi/virtio-scsi.c | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c index 88d4bf03fb..ce57ef6248 100644 --- a/hw/scsi/virtio-scsi.c +++ b/hw/scsi/virtio-scsi.c @@ -663,22 +663,17 @@ static void virtio_scsi_reset(VirtIODevice *vdev) /* The device does not have anything to save beyond the virtio data. * Request data is saved with callbacks from SCSI devices. */ -static void virtio_scsi_save(QEMUFile *f, void *opaque) +static void virtio_scsi_save(QEMUFile *f, void *opaque, size_t size) { VirtIODevice *vdev = VIRTIO_DEVICE(opaque); virtio_save(vdev, f); } -static int virtio_scsi_load(QEMUFile *f, void *opaque, int version_id) +static int virtio_scsi_load(QEMUFile *f, void *opaque, size_t size) { VirtIODevice *vdev = VIRTIO_DEVICE(opaque); - int ret; - ret = virtio_load(vdev, f, version_id); - if (ret) { - return ret; - } - return 0; + return virtio_load(vdev, f, 1); } void virtio_scsi_push_event(VirtIOSCSI *s, SCSIDevice *dev, @@ -862,7 +857,6 @@ static void virtio_scsi_device_realize(DeviceState *dev, Error **errp) { VirtIODevice *vdev = VIRTIO_DEVICE(dev); VirtIOSCSI *s = VIRTIO_SCSI(dev); - static int virtio_scsi_id; Error *err = NULL; virtio_scsi_common_realize(dev, &err, virtio_scsi_handle_ctrl, @@ -885,9 +879,6 @@ static void virtio_scsi_device_realize(DeviceState *dev, Error **errp) return; } } - - register_savevm(dev, "virtio-scsi", virtio_scsi_id++, 1, - virtio_scsi_save, virtio_scsi_load, s); } static void virtio_scsi_instance_init(Object *obj) @@ -911,9 +902,6 @@ void virtio_scsi_common_unrealize(DeviceState *dev, Error **errp) static void virtio_scsi_device_unrealize(DeviceState *dev, Error **errp) { - VirtIOSCSI *s = VIRTIO_SCSI(dev); - - unregister_savevm(dev, "virtio-scsi", s); virtio_scsi_common_unrealize(dev, errp); } @@ -930,6 +918,8 @@ static Property virtio_scsi_properties[] = { DEFINE_PROP_END_OF_LIST(), }; +VMSTATE_VIRTIO_DEVICE(scsi, 1, virtio_scsi_load, virtio_scsi_save); + static void virtio_scsi_common_class_init(ObjectClass *klass, void *data) { VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); @@ -946,6 +936,7 @@ static void virtio_scsi_class_init(ObjectClass *klass, void *data) HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(klass); dc->props = virtio_scsi_properties; + dc->vmsd = &vmstate_virtio_scsi; set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); vdc->realize = virtio_scsi_device_realize; vdc->unrealize = virtio_scsi_device_unrealize; -- cgit v1.2.1 From bbded32c644839ae56b497408841b67980fddd90 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Thu, 14 Jul 2016 18:22:47 +0100 Subject: virtio-blk: Wrap in vmstate Forcibly convert it to a vmstate wrapper; proper conversion comes later. Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Cornelia Huck Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/block/virtio-blk.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c index 728b27802c..475a822f5a 100644 --- a/hw/block/virtio-blk.c +++ b/hw/block/virtio-blk.c @@ -798,7 +798,7 @@ static void virtio_blk_set_status(VirtIODevice *vdev, uint8_t status) } } -static void virtio_blk_save(QEMUFile *f, void *opaque) +static void virtio_blk_save(QEMUFile *f, void *opaque, size_t size) { VirtIODevice *vdev = VIRTIO_DEVICE(opaque); @@ -823,15 +823,12 @@ static void virtio_blk_save_device(VirtIODevice *vdev, QEMUFile *f) qemu_put_sbyte(f, 0); } -static int virtio_blk_load(QEMUFile *f, void *opaque, int version_id) +static int virtio_blk_load(QEMUFile *f, void *opaque, size_t size) { VirtIOBlock *s = opaque; VirtIODevice *vdev = VIRTIO_DEVICE(s); - if (version_id != 2) - return -EINVAL; - - return virtio_load(vdev, f, version_id); + return virtio_load(vdev, f, 2); } static int virtio_blk_load_device(VirtIODevice *vdev, QEMUFile *f, @@ -880,7 +877,6 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp) VirtIOBlock *s = VIRTIO_BLK(dev); VirtIOBlkConf *conf = &s->conf; Error *err = NULL; - static int virtio_blk_id; unsigned i; if (!conf->conf.blk) { @@ -924,8 +920,6 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp) } s->change = qemu_add_vm_change_state_handler(virtio_blk_dma_restart_cb, s); - register_savevm(dev, "virtio-blk", virtio_blk_id++, 2, - virtio_blk_save, virtio_blk_load, s); blk_set_dev_ops(s->blk, &virtio_block_ops, s); blk_set_guest_block_size(s->blk, s->conf.conf.logical_block_size); @@ -940,7 +934,6 @@ static void virtio_blk_device_unrealize(DeviceState *dev, Error **errp) virtio_blk_data_plane_destroy(s->dataplane); s->dataplane = NULL; qemu_del_vm_change_state_handler(s->change); - unregister_savevm(dev, "virtio-blk", s); blockdev_mark_auto_del(s->blk); virtio_cleanup(vdev); } @@ -958,6 +951,8 @@ static void virtio_blk_instance_init(Object *obj) DEVICE(obj), NULL); } +VMSTATE_VIRTIO_DEVICE(blk, 2, virtio_blk_load, virtio_blk_save); + static Property virtio_blk_properties[] = { DEFINE_BLOCK_PROPERTIES(VirtIOBlock, conf.conf), DEFINE_BLOCK_ERROR_PROPERTIES(VirtIOBlock, conf.conf), @@ -979,6 +974,7 @@ static void virtio_blk_class_init(ObjectClass *klass, void *data) VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); dc->props = virtio_blk_properties; + dc->vmsd = &vmstate_virtio_blk; set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); vdc->realize = virtio_blk_device_realize; vdc->unrealize = virtio_blk_device_unrealize; -- cgit v1.2.1 From b607579386764c45695ee47001178b97346f99e2 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Thu, 14 Jul 2016 18:22:48 +0100 Subject: virtio-rng: Wrap in vmstate Forcibly convert it to a vmstate wrapper; proper conversion comes later. Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Cornelia Huck Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/virtio-rng.c | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) diff --git a/hw/virtio/virtio-rng.c b/hw/virtio/virtio-rng.c index 6b991a7642..cd8ca10177 100644 --- a/hw/virtio/virtio-rng.c +++ b/hw/virtio/virtio-rng.c @@ -120,22 +120,12 @@ static uint64_t get_features(VirtIODevice *vdev, uint64_t f, Error **errp) return f; } -static void virtio_rng_save(QEMUFile *f, void *opaque) -{ - VirtIODevice *vdev = opaque; - - virtio_save(vdev, f); -} - -static int virtio_rng_load(QEMUFile *f, void *opaque, int version_id) +static int virtio_rng_load(QEMUFile *f, void *opaque, size_t size) { VirtIORNG *vrng = opaque; int ret; - if (version_id != 1) { - return -EINVAL; - } - ret = virtio_load(VIRTIO_DEVICE(vrng), f, version_id); + ret = virtio_load(VIRTIO_DEVICE(vrng), f, 1); if (ret != 0) { return ret; } @@ -214,8 +204,6 @@ static void virtio_rng_device_realize(DeviceState *dev, Error **errp) vrng->rate_limit_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, check_rate_limit, vrng); vrng->activate_timer = true; - register_savevm(dev, "virtio-rng", -1, 1, virtio_rng_save, - virtio_rng_load, vrng); } static void virtio_rng_device_unrealize(DeviceState *dev, Error **errp) @@ -225,10 +213,11 @@ static void virtio_rng_device_unrealize(DeviceState *dev, Error **errp) timer_del(vrng->rate_limit_timer); timer_free(vrng->rate_limit_timer); - unregister_savevm(dev, "virtio-rng", vrng); virtio_cleanup(vdev); } +VMSTATE_VIRTIO_DEVICE(rng, 1, virtio_rng_load, virtio_vmstate_save); + static Property virtio_rng_properties[] = { /* Set a default rate limit of 2^47 bytes per minute or roughly 2TB/s. If * you have an entropy source capable of generating more entropy than this @@ -246,6 +235,7 @@ static void virtio_rng_class_init(ObjectClass *klass, void *data) VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); dc->props = virtio_rng_properties; + dc->vmsd = &vmstate_virtio_rng; set_bit(DEVICE_CATEGORY_MISC, dc->categories); vdc->realize = virtio_rng_device_realize; vdc->unrealize = virtio_rng_device_unrealize; -- cgit v1.2.1 From 7f1ca9b23b3641107d05196ca78fc022a39fd65c Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Thu, 14 Jul 2016 18:22:49 +0100 Subject: virtio-balloon: Wrap in vmstate Forcibly convert it to a vmstate wrapper; proper conversion comes later. Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Cornelia Huck Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/virtio-balloon.c | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c index 1a22e6d993..5af429a58a 100644 --- a/hw/virtio/virtio-balloon.c +++ b/hw/virtio/virtio-balloon.c @@ -396,11 +396,6 @@ static void virtio_balloon_to_target(void *opaque, ram_addr_t target) trace_virtio_balloon_to_target(target, dev->num_pages); } -static void virtio_balloon_save(QEMUFile *f, void *opaque) -{ - virtio_save(VIRTIO_DEVICE(opaque), f); -} - static void virtio_balloon_save_device(VirtIODevice *vdev, QEMUFile *f) { VirtIOBalloon *s = VIRTIO_BALLOON(vdev); @@ -409,12 +404,9 @@ static void virtio_balloon_save_device(VirtIODevice *vdev, QEMUFile *f) qemu_put_be32(f, s->actual); } -static int virtio_balloon_load(QEMUFile *f, void *opaque, int version_id) +static int virtio_balloon_load(QEMUFile *f, void *opaque, size_t size) { - if (version_id != 1) - return -EINVAL; - - return virtio_load(VIRTIO_DEVICE(opaque), f, version_id); + return virtio_load(VIRTIO_DEVICE(opaque), f, 1); } static int virtio_balloon_load_device(VirtIODevice *vdev, QEMUFile *f, @@ -454,9 +446,6 @@ static void virtio_balloon_device_realize(DeviceState *dev, Error **errp) s->svq = virtio_add_queue(vdev, 128, virtio_balloon_receive_stats); reset_stats(s); - - register_savevm(dev, "virtio-balloon", -1, 1, - virtio_balloon_save, virtio_balloon_load, s); } static void virtio_balloon_device_unrealize(DeviceState *dev, Error **errp) @@ -466,7 +455,6 @@ static void virtio_balloon_device_unrealize(DeviceState *dev, Error **errp) balloon_stats_destroy_timer(s); qemu_remove_balloon_handler(s); - unregister_savevm(dev, "virtio-balloon", s); virtio_cleanup(vdev); } @@ -493,6 +481,8 @@ static void virtio_balloon_instance_init(Object *obj) NULL, s, NULL); } +VMSTATE_VIRTIO_DEVICE(balloon, 1, virtio_balloon_load, virtio_vmstate_save); + static Property virtio_balloon_properties[] = { DEFINE_PROP_BIT("deflate-on-oom", VirtIOBalloon, host_features, VIRTIO_BALLOON_F_DEFLATE_ON_OOM, false), @@ -505,6 +495,7 @@ static void virtio_balloon_class_init(ObjectClass *klass, void *data) VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); dc->props = virtio_balloon_properties; + dc->vmsd = &vmstate_virtio_balloon; set_bit(DEVICE_CATEGORY_MISC, dc->categories); vdc->realize = virtio_balloon_device_realize; vdc->unrealize = virtio_balloon_device_unrealize; -- cgit v1.2.1 From 290c2428453632502249df3bb4b99f1b3625137c Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Thu, 14 Jul 2016 18:22:50 +0100 Subject: virtio-net: Wrap in vmstate Forcibly convert it to a vmstate wrapper; proper conversion comes later. Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Cornelia Huck Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/net/virtio-net.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index 550db30930..bb311c4587 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -1492,7 +1492,7 @@ static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue) virtio_net_set_queues(n); } -static void virtio_net_save(QEMUFile *f, void *opaque) +static void virtio_net_save(QEMUFile *f, void *opaque, size_t size) { VirtIONet *n = opaque; VirtIODevice *vdev = VIRTIO_DEVICE(n); @@ -1538,15 +1538,12 @@ static void virtio_net_save_device(VirtIODevice *vdev, QEMUFile *f) } } -static int virtio_net_load(QEMUFile *f, void *opaque, int version_id) +static int virtio_net_load(QEMUFile *f, void *opaque, size_t size) { VirtIONet *n = opaque; VirtIODevice *vdev = VIRTIO_DEVICE(n); - if (version_id != VIRTIO_NET_VM_VERSION) - return -EINVAL; - - return virtio_load(vdev, f, version_id); + return virtio_load(vdev, f, VIRTIO_NET_VM_VERSION); } static int virtio_net_load_device(VirtIODevice *vdev, QEMUFile *f, @@ -1790,8 +1787,6 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp) nc->rxfilter_notify_enabled = 1; n->qdev = dev; - register_savevm(dev, "virtio-net", -1, VIRTIO_NET_VM_VERSION, - virtio_net_save, virtio_net_load, n); } static void virtio_net_device_unrealize(DeviceState *dev, Error **errp) @@ -1803,8 +1798,6 @@ static void virtio_net_device_unrealize(DeviceState *dev, Error **errp) /* This will stop vhost backend if appropriate. */ virtio_net_set_status(vdev, 0); - unregister_savevm(dev, "virtio-net", n); - g_free(n->netclient_name); n->netclient_name = NULL; g_free(n->netclient_type); @@ -1839,6 +1832,9 @@ static void virtio_net_instance_init(Object *obj) DEVICE(n), NULL); } +VMSTATE_VIRTIO_DEVICE(net, VIRTIO_NET_VM_VERSION, virtio_net_load, + virtio_net_save); + static Property virtio_net_properties[] = { DEFINE_PROP_BIT("csum", VirtIONet, host_features, VIRTIO_NET_F_CSUM, true), DEFINE_PROP_BIT("guest_csum", VirtIONet, host_features, @@ -1893,6 +1889,7 @@ static void virtio_net_class_init(ObjectClass *klass, void *data) VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); dc->props = virtio_net_properties; + dc->vmsd = &vmstate_virtio_net; set_bit(DEVICE_CATEGORY_NETWORK, dc->categories); vdc->realize = virtio_net_device_realize; vdc->unrealize = virtio_net_device_unrealize; -- cgit v1.2.1 From 42e6c0390b1c4bc04ca73c862f2f017609c7cc7e Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Thu, 14 Jul 2016 18:22:51 +0100 Subject: virtio-serial: Wrap in vmstate Forcibly convert it to a vmstate wrapper; proper conversion comes later. Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Cornelia Huck Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/char/virtio-serial-bus.c | 27 ++++++--------------------- 1 file changed, 6 insertions(+), 21 deletions(-) diff --git a/hw/char/virtio-serial-bus.c b/hw/char/virtio-serial-bus.c index 490b5ea243..db57a38546 100644 --- a/hw/char/virtio-serial-bus.c +++ b/hw/char/virtio-serial-bus.c @@ -594,12 +594,6 @@ static void vser_reset(VirtIODevice *vdev) guest_reset(vser); } -static void virtio_serial_save(QEMUFile *f, void *opaque) -{ - /* The virtio device */ - virtio_save(VIRTIO_DEVICE(opaque), f); -} - static void virtio_serial_save_device(VirtIODevice *vdev, QEMUFile *f) { VirtIOSerial *s = VIRTIO_SERIAL(vdev); @@ -734,14 +728,10 @@ static int fetch_active_ports_list(QEMUFile *f, return 0; } -static int virtio_serial_load(QEMUFile *f, void *opaque, int version_id) +static int virtio_serial_load(QEMUFile *f, void *opaque, size_t size) { - if (version_id != 3) { - return -EINVAL; - } - /* The virtio device */ - return virtio_load(VIRTIO_DEVICE(opaque), f, version_id); + return virtio_load(VIRTIO_DEVICE(opaque), f, 3); } static int virtio_serial_load_device(VirtIODevice *vdev, QEMUFile *f, @@ -1042,13 +1032,6 @@ static void virtio_serial_device_realize(DeviceState *dev, Error **errp) vser->post_load = NULL; - /* - * Register for the savevm section with the virtio-console name - * to preserve backward compat - */ - register_savevm(dev, "virtio-console", -1, 3, virtio_serial_save, - virtio_serial_load, vser); - QLIST_INSERT_HEAD(&vserdevices.devices, vser, next); } @@ -1079,8 +1062,6 @@ static void virtio_serial_device_unrealize(DeviceState *dev, Error **errp) QLIST_REMOVE(vser, next); - unregister_savevm(dev, "virtio-console", vser); - g_free(vser->ivqs); g_free(vser->ovqs); g_free(vser->ports_map); @@ -1093,6 +1074,9 @@ static void virtio_serial_device_unrealize(DeviceState *dev, Error **errp) virtio_cleanup(vdev); } +/* Note: 'console' is used for backwards compatibility */ +VMSTATE_VIRTIO_DEVICE(console, 3, virtio_serial_load, virtio_vmstate_save); + static Property virtio_serial_properties[] = { DEFINE_PROP_UINT32("max_ports", VirtIOSerial, serial.max_virtserial_ports, 31), @@ -1108,6 +1092,7 @@ static void virtio_serial_class_init(ObjectClass *klass, void *data) QLIST_INIT(&vserdevices.devices); dc->props = virtio_serial_properties; + dc->vmsd = &vmstate_virtio_console; set_bit(DEVICE_CATEGORY_INPUT, dc->categories); vdc->realize = virtio_serial_device_realize; vdc->unrealize = virtio_serial_device_unrealize; -- cgit v1.2.1 From 18e0e5b240ae865554863fa35e8b9af9580f971b Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Thu, 14 Jul 2016 18:22:52 +0100 Subject: 9pfs: Wrap in vmstate Forcibly convert it to a vmstate wrapper; proper conversion comes later. Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Cornelia Huck Reviewed-by: Greg Kurz Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/9pfs/virtio-9p-device.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/hw/9pfs/virtio-9p-device.c b/hw/9pfs/virtio-9p-device.c index 494e85e029..009b43f6d0 100644 --- a/hw/9pfs/virtio-9p-device.c +++ b/hw/9pfs/virtio-9p-device.c @@ -97,14 +97,9 @@ static void virtio_9p_get_config(VirtIODevice *vdev, uint8_t *config) g_free(cfg); } -static void virtio_9p_save(QEMUFile *f, void *opaque) +static int virtio_9p_load(QEMUFile *f, void *opaque, size_t size) { - virtio_save(VIRTIO_DEVICE(opaque), f); -} - -static int virtio_9p_load(QEMUFile *f, void *opaque, int version_id) -{ - return virtio_load(VIRTIO_DEVICE(opaque), f, version_id); + return virtio_load(VIRTIO_DEVICE(opaque), f, 1); } static void virtio_9p_device_realize(DeviceState *dev, Error **errp) @@ -120,7 +115,6 @@ static void virtio_9p_device_realize(DeviceState *dev, Error **errp) v->config_size = sizeof(struct virtio_9p_config) + strlen(s->fsconf.tag); virtio_init(vdev, "virtio-9p", VIRTIO_ID_9P, v->config_size); v->vq = virtio_add_queue(vdev, MAX_REQ, handle_9p_output); - register_savevm(dev, "virtio-9p", -1, 1, virtio_9p_save, virtio_9p_load, v); out: return; @@ -133,7 +127,6 @@ static void virtio_9p_device_unrealize(DeviceState *dev, Error **errp) V9fsState *s = &v->state; virtio_cleanup(vdev); - unregister_savevm(dev, "virtio-9p", v); v9fs_device_unrealize_common(s, errp); } @@ -175,6 +168,8 @@ void virtio_init_iov_from_pdu(V9fsPDU *pdu, struct iovec **piov, /* virtio-9p device */ +VMSTATE_VIRTIO_DEVICE(9p, 1, virtio_9p_load, virtio_vmstate_save); + static Property virtio_9p_properties[] = { DEFINE_PROP_STRING("mount_tag", V9fsVirtioState, state.fsconf.tag), DEFINE_PROP_STRING("fsdev", V9fsVirtioState, state.fsconf.fsdev_id), @@ -187,6 +182,7 @@ static void virtio_9p_class_init(ObjectClass *klass, void *data) VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); dc->props = virtio_9p_properties; + dc->vmsd = &vmstate_virtio_9p; set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); vdc->realize = virtio_9p_device_realize; vdc->unrealize = virtio_9p_device_unrealize; -- cgit v1.2.1 From 428d2ed2c84d71fa7cb25d12e409ee9b7f0a953c Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Thu, 14 Jul 2016 18:22:53 +0100 Subject: virtio-input: Wrap in vmstate Forcibly convert it to a vmstate wrapper; proper conversion comes later. Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Cornelia Huck Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/input/virtio-input.c | 26 ++++++-------------------- 1 file changed, 6 insertions(+), 20 deletions(-) diff --git a/hw/input/virtio-input.c b/hw/input/virtio-input.c index edf69903a6..a87fd6862e 100644 --- a/hw/input/virtio-input.c +++ b/hw/input/virtio-input.c @@ -217,26 +217,14 @@ static void virtio_input_reset(VirtIODevice *vdev) } } -static void virtio_input_save(QEMUFile *f, void *opaque) -{ - VirtIOInput *vinput = opaque; - VirtIODevice *vdev = VIRTIO_DEVICE(vinput); - - virtio_save(vdev, f); -} - -static int virtio_input_load(QEMUFile *f, void *opaque, int version_id) +static int virtio_input_load(QEMUFile *f, void *opaque, size_t size) { VirtIOInput *vinput = opaque; VirtIOInputClass *vic = VIRTIO_INPUT_GET_CLASS(vinput); VirtIODevice *vdev = VIRTIO_DEVICE(vinput); int ret; - if (version_id != VIRTIO_INPUT_VM_VERSION) { - return -EINVAL; - } - - ret = virtio_load(vdev, f, version_id); + ret = virtio_load(vdev, f, VIRTIO_INPUT_VM_VERSION); if (ret) { return ret; } @@ -280,20 +268,14 @@ static void virtio_input_device_realize(DeviceState *dev, Error **errp) vinput->cfg_size); vinput->evt = virtio_add_queue(vdev, 64, virtio_input_handle_evt); vinput->sts = virtio_add_queue(vdev, 64, virtio_input_handle_sts); - - register_savevm(dev, "virtio-input", -1, VIRTIO_INPUT_VM_VERSION, - virtio_input_save, virtio_input_load, vinput); } static void virtio_input_device_unrealize(DeviceState *dev, Error **errp) { VirtIOInputClass *vic = VIRTIO_INPUT_GET_CLASS(dev); VirtIODevice *vdev = VIRTIO_DEVICE(dev); - VirtIOInput *vinput = VIRTIO_INPUT(dev); Error *local_err = NULL; - unregister_savevm(dev, "virtio-input", vinput); - if (vic->unrealize) { vic->unrealize(dev, &local_err); if (local_err) { @@ -304,6 +286,9 @@ static void virtio_input_device_unrealize(DeviceState *dev, Error **errp) virtio_cleanup(vdev); } +VMSTATE_VIRTIO_DEVICE(input, VIRTIO_INPUT_VM_VERSION, virtio_input_load, + virtio_vmstate_save); + static Property virtio_input_properties[] = { DEFINE_PROP_STRING("serial", VirtIOInput, serial), DEFINE_PROP_END_OF_LIST(), @@ -315,6 +300,7 @@ static void virtio_input_class_init(ObjectClass *klass, void *data) VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); dc->props = virtio_input_properties; + dc->vmsd = &vmstate_virtio_input; set_bit(DEVICE_CATEGORY_INPUT, dc->categories); vdc->realize = virtio_input_device_realize; vdc->unrealize = virtio_input_device_unrealize; -- cgit v1.2.1 From de8892215e27caed9c26358a7435ea4877e022b1 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Thu, 14 Jul 2016 18:22:54 +0100 Subject: virtio-gpu: Use migrate_add_blocker for virgl migration blocking virgl conditionally registers a vmstate as unmigratable when virgl is enabled; instead use the migrate_add_blocker mechanism. Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Cornelia Huck Reviewed-by: Gerd Hoffmann --- hw/display/virtio-gpu.c | 19 +++++++++++++------ include/hw/virtio/virtio-gpu.h | 2 ++ 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c index 929c3c8b8a..cc87eb7cf7 100644 --- a/hw/display/virtio-gpu.c +++ b/hw/display/virtio-gpu.c @@ -19,6 +19,7 @@ #include "hw/virtio/virtio.h" #include "hw/virtio/virtio-gpu.h" #include "hw/virtio/virtio-bus.h" +#include "migration/migration.h" #include "qemu/log.h" #include "qapi/error.h" @@ -986,11 +987,6 @@ static const VMStateDescription vmstate_virtio_gpu_scanouts = { }, }; -static const VMStateDescription vmstate_virtio_gpu_unmigratable = { - .name = "virtio-gpu-with-virgl", - .unmigratable = 1, -}; - static void virtio_gpu_save(QEMUFile *f, void *opaque) { VirtIOGPU *g = opaque; @@ -1169,13 +1165,23 @@ static void virtio_gpu_device_realize(DeviceState *qdev, Error **errp) } if (virtio_gpu_virgl_enabled(g->conf)) { - vmstate_register(qdev, -1, &vmstate_virtio_gpu_unmigratable, g); + error_setg(&g->migration_blocker, "virgl is not yet migratable"); + migrate_add_blocker(g->migration_blocker); } else { register_savevm(qdev, "virtio-gpu", -1, VIRTIO_GPU_VM_VERSION, virtio_gpu_save, virtio_gpu_load, g); } } +static void virtio_gpu_device_unrealize(DeviceState *qdev, Error **errp) +{ + VirtIOGPU *g = VIRTIO_GPU(qdev); + if (g->migration_blocker) { + migrate_del_blocker(g->migration_blocker); + error_free(g->migration_blocker); + } +} + static void virtio_gpu_instance_init(Object *obj) { } @@ -1237,6 +1243,7 @@ static void virtio_gpu_class_init(ObjectClass *klass, void *data) VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); vdc->realize = virtio_gpu_device_realize; + vdc->unrealize = virtio_gpu_device_unrealize; vdc->get_config = virtio_gpu_get_config; vdc->set_config = virtio_gpu_set_config; vdc->get_features = virtio_gpu_get_features; diff --git a/include/hw/virtio/virtio-gpu.h b/include/hw/virtio/virtio-gpu.h index 325354f9f3..e4f424ad4a 100644 --- a/include/hw/virtio/virtio-gpu.h +++ b/include/hw/virtio/virtio-gpu.h @@ -118,6 +118,8 @@ typedef struct VirtIOGPU { uint32_t req_3d; uint32_t bytes_3d; } stats; + + Error *migration_blocker; } VirtIOGPU; extern const GraphicHwOps virtio_gpu_ops; -- cgit v1.2.1 From 0fc07498dabc1fbe42ee733c477306e2730c5ccf Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Thu, 14 Jul 2016 18:22:55 +0100 Subject: virtio-gpu: Wrap in vmstate Forcibly convert it to a vmstate wrapper; proper conversion comes later. Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Cornelia Huck Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Gerd Hoffmann --- hw/display/virtio-gpu.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c index cc87eb7cf7..7fe6ed8bf0 100644 --- a/hw/display/virtio-gpu.c +++ b/hw/display/virtio-gpu.c @@ -987,7 +987,7 @@ static const VMStateDescription vmstate_virtio_gpu_scanouts = { }, }; -static void virtio_gpu_save(QEMUFile *f, void *opaque) +static void virtio_gpu_save(QEMUFile *f, void *opaque, size_t size) { VirtIOGPU *g = opaque; VirtIODevice *vdev = VIRTIO_DEVICE(g); @@ -1017,7 +1017,7 @@ static void virtio_gpu_save(QEMUFile *f, void *opaque) vmstate_save_state(f, &vmstate_virtio_gpu_scanouts, g, NULL); } -static int virtio_gpu_load(QEMUFile *f, void *opaque, int version_id) +static int virtio_gpu_load(QEMUFile *f, void *opaque, size_t size) { VirtIOGPU *g = opaque; VirtIODevice *vdev = VIRTIO_DEVICE(g); @@ -1026,11 +1026,7 @@ static int virtio_gpu_load(QEMUFile *f, void *opaque, int version_id) uint32_t resource_id, pformat; int i, ret; - if (version_id != VIRTIO_GPU_VM_VERSION) { - return -EINVAL; - } - - ret = virtio_load(vdev, f, version_id); + ret = virtio_load(vdev, f, VIRTIO_GPU_VM_VERSION); if (ret) { return ret; } @@ -1167,9 +1163,6 @@ static void virtio_gpu_device_realize(DeviceState *qdev, Error **errp) if (virtio_gpu_virgl_enabled(g->conf)) { error_setg(&g->migration_blocker, "virgl is not yet migratable"); migrate_add_blocker(g->migration_blocker); - } else { - register_savevm(qdev, "virtio-gpu", -1, VIRTIO_GPU_VM_VERSION, - virtio_gpu_save, virtio_gpu_load, g); } } @@ -1226,6 +1219,9 @@ static void virtio_gpu_reset(VirtIODevice *vdev) #endif } +VMSTATE_VIRTIO_DEVICE(gpu, VIRTIO_GPU_VM_VERSION, virtio_gpu_load, + virtio_gpu_save); + static Property virtio_gpu_properties[] = { DEFINE_PROP_UINT32("max_outputs", VirtIOGPU, conf.max_outputs, 1), #ifdef CONFIG_VIRGL @@ -1252,6 +1248,7 @@ static void virtio_gpu_class_init(ObjectClass *klass, void *data) vdc->reset = virtio_gpu_reset; dc->props = virtio_gpu_properties; + dc->vmsd = &vmstate_virtio_gpu; } static const TypeInfo virtio_gpu_info = { -- cgit v1.2.1 From 1a210f631b1fc7547b0096717b208d199b16c4d6 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Thu, 14 Jul 2016 18:22:56 +0100 Subject: virtio: Update migration docs Remove references to register_savevm. Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Cornelia Huck Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- docs/virtio-migration.txt | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/docs/virtio-migration.txt b/docs/virtio-migration.txt index cf66458b97..98a6b0ffb5 100644 --- a/docs/virtio-migration.txt +++ b/docs/virtio-migration.txt @@ -28,7 +28,8 @@ virtio core virtio transport virtio device ----------- ---------------- ------------- save() function registered - via register_savevm() + via VMState wrapper on + device class virtio_save() <---------- ------> save_config() - save proxy device @@ -63,7 +64,8 @@ virtio core virtio transport virtio device ----------- ---------------- ------------- load() function registered - via register_savevm() + via VMState wrapper on + device class virtio_load() <---------- ------> load_config() - load proxy device -- cgit v1.2.1 From bc38ee10fc26338e21c01485540f815be1f3db28 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 21 Jul 2016 18:54:10 +0300 Subject: intel_iommu: avoid unnamed fields Also avoid unnamed fields for portability. Also, rename VTD_IRTE to VTD_IR_TableEntry for coding style compliance. Signed-off-by: Michael S. Tsirkin --- hw/i386/intel_iommu.c | 42 +++++++++++++++++++++--------------------- include/hw/i386/intel_iommu.h | 8 ++++---- 2 files changed, 25 insertions(+), 25 deletions(-) diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index 0e139d1945..28c31a2cdf 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -2010,7 +2010,7 @@ static Property vtd_properties[] = { /* Read IRTE entry with specific index */ static int vtd_irte_get(IntelIOMMUState *iommu, uint16_t index, - VTD_IRTE *entry, uint16_t sid) + VTD_IR_TableEntry *entry, uint16_t sid) { static const uint16_t vtd_svt_mask[VTD_SQ_MAX] = \ {0xffff, 0xfffb, 0xfff9, 0xfff8}; @@ -2026,7 +2026,7 @@ static int vtd_irte_get(IntelIOMMUState *iommu, uint16_t index, return -VTD_FR_IR_ROOT_INVAL; } - if (!entry->present) { + if (!entry->irte.present) { VTD_DPRINTF(GENERAL, "error: present flag not set in IRTE" " entry index %u value 0x%"PRIx64 " 0x%"PRIx64, index, le64_to_cpu(entry->data[1]), @@ -2034,8 +2034,8 @@ static int vtd_irte_get(IntelIOMMUState *iommu, uint16_t index, return -VTD_FR_IR_ENTRY_P; } - if (entry->__reserved_0 || entry->__reserved_1 || \ - entry->__reserved_2) { + if (entry->irte.__reserved_0 || entry->irte.__reserved_1 || + entry->irte.__reserved_2) { VTD_DPRINTF(GENERAL, "error: IRTE entry index %"PRIu16 " reserved fields non-zero: 0x%"PRIx64 " 0x%"PRIx64, index, le64_to_cpu(entry->data[1]), @@ -2045,14 +2045,14 @@ static int vtd_irte_get(IntelIOMMUState *iommu, uint16_t index, if (sid != X86_IOMMU_SID_INVALID) { /* Validate IRTE SID */ - source_id = le32_to_cpu(entry->source_id); - switch (entry->sid_vtype) { + source_id = le32_to_cpu(entry->irte.source_id); + switch (entry->irte.sid_vtype) { case VTD_SVT_NONE: VTD_DPRINTF(IR, "No SID validation for IRTE index %d", index); break; case VTD_SVT_ALL: - mask = vtd_svt_mask[entry->sid_q]; + mask = vtd_svt_mask[entry->irte.sid_q]; if ((source_id & mask) != (sid & mask)) { VTD_DPRINTF(GENERAL, "SID validation for IRTE index " "%d failed (reqid 0x%04x sid 0x%04x)", index, @@ -2075,7 +2075,7 @@ static int vtd_irte_get(IntelIOMMUState *iommu, uint16_t index, default: VTD_DPRINTF(GENERAL, "Invalid SVT bits (0x%x) in IRTE index " - "%d", entry->sid_vtype, index); + "%d", entry->irte.sid_vtype, index); /* Take this as verification failure. */ return -VTD_FR_IR_SID_ERR; break; @@ -2089,7 +2089,7 @@ static int vtd_irte_get(IntelIOMMUState *iommu, uint16_t index, static int vtd_remap_irq_get(IntelIOMMUState *iommu, uint16_t index, VTDIrq *irq, uint16_t sid) { - VTD_IRTE irte = {}; + VTD_IR_TableEntry irte = {}; int ret = 0; ret = vtd_irte_get(iommu, index, &irte, sid); @@ -2097,18 +2097,18 @@ static int vtd_remap_irq_get(IntelIOMMUState *iommu, uint16_t index, return ret; } - irq->trigger_mode = irte.trigger_mode; - irq->vector = irte.vector; - irq->delivery_mode = irte.delivery_mode; - irq->dest = le32_to_cpu(irte.dest_id); + irq->trigger_mode = irte.irte.trigger_mode; + irq->vector = irte.irte.vector; + irq->delivery_mode = irte.irte.delivery_mode; + irq->dest = le32_to_cpu(irte.irte.dest_id); if (!iommu->intr_eime) { #define VTD_IR_APIC_DEST_MASK (0xff00ULL) #define VTD_IR_APIC_DEST_SHIFT (8) irq->dest = (irq->dest & VTD_IR_APIC_DEST_MASK) >> VTD_IR_APIC_DEST_SHIFT; } - irq->dest_mode = irte.dest_mode; - irq->redir_hint = irte.redir_hint; + irq->dest_mode = irte.irte.dest_mode; + irq->redir_hint = irte.irte.redir_hint; VTD_DPRINTF(IR, "remapping interrupt index %d: trig:%u,vec:%u," "deliver:%u,dest:%u,dest_mode:%u", index, @@ -2167,23 +2167,23 @@ static int vtd_interrupt_remap_msi(IntelIOMMUState *iommu, } addr.data = origin->address & VTD_MSI_ADDR_LO_MASK; - if (le16_to_cpu(addr.__head) != 0xfee) { + if (le16_to_cpu(addr.addr.__head) != 0xfee) { VTD_DPRINTF(GENERAL, "error: MSI addr low 32 bits invalid: " "0x%"PRIx32, addr.data); return -VTD_FR_IR_REQ_RSVD; } /* This is compatible mode. */ - if (addr.int_mode != VTD_IR_INT_FORMAT_REMAP) { + if (addr.addr.int_mode != VTD_IR_INT_FORMAT_REMAP) { goto do_not_translate; } - index = addr.index_h << 15 | le16_to_cpu(addr.index_l); + index = addr.addr.index_h << 15 | le16_to_cpu(addr.addr.index_l); #define VTD_IR_MSI_DATA_SUBHANDLE (0x0000ffff) #define VTD_IR_MSI_DATA_RESERVED (0xffff0000) - if (addr.sub_valid) { + if (addr.addr.sub_valid) { /* See VT-d spec 5.1.2.2 and 5.1.3 on subhandle */ index += origin->data & VTD_IR_MSI_DATA_SUBHANDLE; } @@ -2193,7 +2193,7 @@ static int vtd_interrupt_remap_msi(IntelIOMMUState *iommu, return ret; } - if (addr.sub_valid) { + if (addr.addr.sub_valid) { VTD_DPRINTF(IR, "received MSI interrupt"); if (origin->data & VTD_IR_MSI_DATA_RESERVED) { VTD_DPRINTF(GENERAL, "error: MSI data bits non-zero for " @@ -2217,7 +2217,7 @@ static int vtd_interrupt_remap_msi(IntelIOMMUState *iommu, * We'd better keep the last two bits, assuming that guest OS * might modify it. Keep it does not hurt after all. */ - irq.msi_addr_last_bits = addr.__not_care; + irq.msi_addr_last_bits = addr.addr.__not_care; /* Translate VTDIrq to MSI message */ vtd_generate_msi_message(&irq, translated); diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h index 2eba7ed4db..a42dbd745a 100644 --- a/include/hw/i386/intel_iommu.h +++ b/include/hw/i386/intel_iommu.h @@ -59,7 +59,7 @@ typedef struct IntelIOMMUState IntelIOMMUState; typedef struct VTDAddressSpace VTDAddressSpace; typedef struct VTDIOTLBEntry VTDIOTLBEntry; typedef struct VTDBus VTDBus; -typedef union VTD_IRTE VTD_IRTE; +typedef union VTD_IR_TableEntry VTD_IR_TableEntry; typedef union VTD_IR_MSIAddress VTD_IR_MSIAddress; typedef struct VTDIrq VTDIrq; typedef struct VTD_MSIMessage VTD_MSIMessage; @@ -120,7 +120,7 @@ enum { }; /* Interrupt Remapping Table Entry Definition */ -union VTD_IRTE { +union VTD_IR_TableEntry { struct { #ifdef HOST_WORDS_BIGENDIAN uint32_t dest_id:32; /* Destination ID */ @@ -159,7 +159,7 @@ union VTD_IRTE { uint64_t sid_vtype:2; /* Source-ID Validation Type */ uint64_t __reserved_2:44; /* Reserved 2 */ #endif - } QEMU_PACKED; + } QEMU_PACKED irte; uint64_t data[2]; }; @@ -184,7 +184,7 @@ union VTD_IR_MSIAddress { uint32_t index_l:15; /* Interrupt index bit 14-0 */ uint32_t __head:12; /* Should always be: 0x0fee */ #endif - } QEMU_PACKED; + } QEMU_PACKED addr; uint32_t data; }; -- cgit v1.2.1