From 7c4228b4771acddcb8815079bc116007cec8a1ff Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Wed, 15 Jan 2014 10:07:26 -0700 Subject: vfio: Destroy memory regions Somehow this has been lurking for a while; we remove our subregions from the base BAR and VGA region mappings, but we don't destroy them, creating a leak and more serious problems when we try to migrate after removing these devices. Add the trivial bit of final cleanup to remove these entirely. Signed-off-by: Alex Williamson --- hw/misc/vfio.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'hw/misc') diff --git a/hw/misc/vfio.c b/hw/misc/vfio.c index 9aecaa82bc..ec9f41b98d 100644 --- a/hw/misc/vfio.c +++ b/hw/misc/vfio.c @@ -1968,6 +1968,7 @@ static void vfio_vga_quirk_teardown(VFIODevice *vdev) while (!QLIST_EMPTY(&vdev->vga.region[i].quirks)) { VFIOQuirk *quirk = QLIST_FIRST(&vdev->vga.region[i].quirks); memory_region_del_subregion(&vdev->vga.region[i].mem, &quirk->mem); + memory_region_destroy(&quirk->mem); QLIST_REMOVE(quirk, next); g_free(quirk); } @@ -1990,6 +1991,7 @@ static void vfio_bar_quirk_teardown(VFIODevice *vdev, int nr) while (!QLIST_EMPTY(&bar->quirks)) { VFIOQuirk *quirk = QLIST_FIRST(&bar->quirks); memory_region_del_subregion(&bar->mem, &quirk->mem); + memory_region_destroy(&quirk->mem); QLIST_REMOVE(quirk, next); g_free(quirk); } @@ -2412,10 +2414,12 @@ static void vfio_unmap_bar(VFIODevice *vdev, int nr) memory_region_del_subregion(&bar->mem, &bar->mmap_mem); munmap(bar->mmap, memory_region_size(&bar->mmap_mem)); + memory_region_destroy(&bar->mmap_mem); if (vdev->msix && vdev->msix->table_bar == nr) { memory_region_del_subregion(&bar->mem, &vdev->msix->mmap_mem); munmap(vdev->msix->mmap, memory_region_size(&vdev->msix->mmap_mem)); + memory_region_destroy(&vdev->msix->mmap_mem); } memory_region_destroy(&bar->mem); -- cgit v1.2.1 From d20b43dfea1587b561aae17e4fa0f7407779d253 Mon Sep 17 00:00:00 2001 From: Bandan Das Date: Wed, 15 Jan 2014 10:11:06 -0700 Subject: vfio: warn if host device rom can't be read If the device rom can't be read, report an error to the user. This alerts the user that the device has a bad state that is causing rom read failure or option rom loading has been disabled from the device boot menu (among other reasons). Signed-off-by: Bandan Das Signed-off-by: Alex Williamson --- hw/misc/vfio.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'hw/misc') diff --git a/hw/misc/vfio.c b/hw/misc/vfio.c index ec9f41b98d..ef615fc28f 100644 --- a/hw/misc/vfio.c +++ b/hw/misc/vfio.c @@ -1125,6 +1125,13 @@ static void vfio_pci_load_rom(VFIODevice *vdev) vdev->rom_offset = reg_info.offset; if (!vdev->rom_size) { + error_report("vfio-pci: Cannot read device rom at " + "%04x:%02x:%02x.%x\n", + vdev->host.domain, vdev->host.bus, vdev->host.slot, + vdev->host.function); + error_printf("Device option ROM contents are probably invalid " + "(check dmesg).\nSkip option ROM probe with rombar=0, " + "or load from file with romfile=\n"); return; } -- cgit v1.2.1 From e638073c569e801ce9def2016a84f955cbbca779 Mon Sep 17 00:00:00 2001 From: Bandan Das Date: Wed, 15 Jan 2014 10:11:52 -0700 Subject: vfio: Do not reattempt a failed rom read During lazy rom loading, if rom read fails, and the guest attempts a read again, vfio will again attempt it. Add a boolean to prevent this. There could be a case where a failed rom read might succeed the next time because of a device reset or such, but it's best to exclude unpredictable behavior Signed-off-by: Bandan Das Signed-off-by: Alex Williamson --- hw/misc/vfio.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'hw/misc') diff --git a/hw/misc/vfio.c b/hw/misc/vfio.c index ef615fc28f..30b1a78f9c 100644 --- a/hw/misc/vfio.c +++ b/hw/misc/vfio.c @@ -191,6 +191,7 @@ typedef struct VFIODevice { bool has_flr; bool has_pm_reset; bool needs_reset; + bool rom_read_failed; } VFIODevice; typedef struct VFIOGroup { @@ -1125,6 +1126,7 @@ static void vfio_pci_load_rom(VFIODevice *vdev) vdev->rom_offset = reg_info.offset; if (!vdev->rom_size) { + vdev->rom_read_failed = true; error_report("vfio-pci: Cannot read device rom at " "%04x:%02x:%02x.%x\n", vdev->host.domain, vdev->host.bus, vdev->host.slot, @@ -1163,6 +1165,9 @@ static uint64_t vfio_rom_read(void *opaque, hwaddr addr, unsigned size) /* Load the ROM lazily when the guest tries to read it */ if (unlikely(!vdev->rom)) { vfio_pci_load_rom(vdev); + if (unlikely(!vdev->rom && !vdev->rom_read_failed)) { + vfio_pci_load_rom(vdev); + } } memcpy(&val, vdev->rom + addr, @@ -1230,6 +1235,7 @@ static void vfio_pci_size_rom(VFIODevice *vdev) PCI_BASE_ADDRESS_SPACE_MEMORY, &vdev->pdev.rom); vdev->pdev.has_rom = true; + vdev->rom_read_failed = false; } static void vfio_vga_write(void *opaque, hwaddr addr, -- cgit v1.2.1 From d3a2fd9b29e43e202315d5e99399b99622469c4a Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Thu, 16 Jan 2014 09:22:07 -0700 Subject: vfio: Filter out bogus mappings Since 57271d63 we now see spurious mappings with the upper bits set if 64bit PCI BARs are sized while enabled. The guest writes a mask of 0xffffffff to the lower BAR to size it, then restores it, then writes the same mask to the upper BAR resulting in a spurious BAR mapping into the last 4G of the 64bit address space. Most architectures do not support or make use of the full 64bits address space for PCI BARs, so we filter out mappings with the high bit set. Long term, we probably need to think about vfio telling us the address width limitations of the IOMMU. Signed-off-by: Alex Williamson Reviewed-by: Michael S. Tsirkin --- hw/misc/vfio.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'hw/misc') diff --git a/hw/misc/vfio.c b/hw/misc/vfio.c index 30b1a78f9c..d304213bf1 100644 --- a/hw/misc/vfio.c +++ b/hw/misc/vfio.c @@ -2156,7 +2156,14 @@ static int vfio_dma_map(VFIOContainer *container, hwaddr iova, static bool vfio_listener_skipped_section(MemoryRegionSection *section) { - return !memory_region_is_ram(section->mr); + return !memory_region_is_ram(section->mr) || + /* + * Sizing an enabled 64-bit BAR can cause spurious mappings to + * addresses in the upper part of the 64-bit address space. These + * are never accessed by the CPU and beyond the address width of + * some IOMMU hardware. TODO: VFIO should tell us the IOMMU width. + */ + section->offset_within_address_space & (1ULL << 63); } static void vfio_listener_region_add(MemoryListener *listener, -- cgit v1.2.1 From 87ca1f77b1c406137fe36ab73b2dc91fb75f8d0a Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Thu, 16 Jan 2014 09:22:07 -0700 Subject: vfio-pci: Fail initfn on DMA mapping errors The vfio-pci initfn will currently succeed even if DMA mappings fail. A typical reason for failure is if the user does not have sufficient privilege to lock all the memory for the guest. In this case, the device gets attached, but can only access a portion of guest memory and is extremely unlikely to work. DMA mappings are done via a MemoryListener, which provides no direct error return path. We therefore stuff the errno into our container structure and check for error after registration completes. We can also test for mapping errors during runtime, but our only option for resolution at that point is to kill the guest with a hw_error. Signed-off-by: Alex Williamson --- hw/misc/vfio.c | 44 ++++++++++++++++++++++++++++++++++++++------ 1 file changed, 38 insertions(+), 6 deletions(-) (limited to 'hw/misc') diff --git a/hw/misc/vfio.c b/hw/misc/vfio.c index d304213bf1..432547ce16 100644 --- a/hw/misc/vfio.c +++ b/hw/misc/vfio.c @@ -135,12 +135,18 @@ enum { struct VFIOGroup; +typedef struct VFIOType1 { + MemoryListener listener; + int error; + bool initialized; +} VFIOType1; + typedef struct VFIOContainer { int fd; /* /dev/vfio/vfio, empowered by the attached groups */ struct { /* enable abstraction to support various iommu backends */ union { - MemoryListener listener; /* Used by type1 iommu */ + VFIOType1 type1; }; void (*release)(struct VFIOContainer *); } iommu_data; @@ -2170,7 +2176,7 @@ static void vfio_listener_region_add(MemoryListener *listener, MemoryRegionSection *section) { VFIOContainer *container = container_of(listener, VFIOContainer, - iommu_data.listener); + iommu_data.type1.listener); hwaddr iova, end; void *vaddr; int ret; @@ -2212,6 +2218,19 @@ static void vfio_listener_region_add(MemoryListener *listener, error_report("vfio_dma_map(%p, 0x%"HWADDR_PRIx", " "0x%"HWADDR_PRIx", %p) = %d (%m)", container, iova, end - iova, vaddr, ret); + + /* + * On the initfn path, store the first error in the container so we + * can gracefully fail. Runtime, there's not much we can do other + * than throw a hardware error. + */ + if (!container->iommu_data.type1.initialized) { + if (!container->iommu_data.type1.error) { + container->iommu_data.type1.error = ret; + } + } else { + hw_error("vfio: DMA mapping failed, unable to continue\n"); + } } } @@ -2219,7 +2238,7 @@ static void vfio_listener_region_del(MemoryListener *listener, MemoryRegionSection *section) { VFIOContainer *container = container_of(listener, VFIOContainer, - iommu_data.listener); + iommu_data.type1.listener); hwaddr iova, end; int ret; @@ -2264,7 +2283,7 @@ static MemoryListener vfio_memory_listener = { static void vfio_listener_release(VFIOContainer *container) { - memory_listener_unregister(&container->iommu_data.listener); + memory_listener_unregister(&container->iommu_data.type1.listener); } /* @@ -3236,10 +3255,23 @@ static int vfio_connect_container(VFIOGroup *group) return -errno; } - container->iommu_data.listener = vfio_memory_listener; + container->iommu_data.type1.listener = vfio_memory_listener; container->iommu_data.release = vfio_listener_release; - memory_listener_register(&container->iommu_data.listener, &address_space_memory); + memory_listener_register(&container->iommu_data.type1.listener, + &address_space_memory); + + if (container->iommu_data.type1.error) { + ret = container->iommu_data.type1.error; + vfio_listener_release(container); + g_free(container); + close(fd); + error_report("vfio: memory listener initialization failed for container\n"); + return ret; + } + + container->iommu_data.type1.initialized = true; + } else { error_report("vfio: No available IOMMU models"); g_free(container); -- cgit v1.2.1 From 8d7b5a1da0e06aa7addd7f084d9ec9d433c4bafb Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Fri, 17 Jan 2014 11:12:56 -0700 Subject: vfio: fix mapping of MSIX bar VFIO virtualizes MSIX table for the guest but not mapping the part of a BAR which contains an MSIX table. Since vfio_mmap_bar() mmaps chunks before and after the MSIX table, they have to be aligned to the host page size which may be TARGET_PAGE_MASK (4K) or 64K in case of PPC64. This fixes boundaries calculations to use the real host page size. Without the patch, the chunk before MSIX table may overlap with the MSIX table and mmap will fail in the host kernel. The result will be serious slowdown as the whole BAR will be emulated by QEMU. Signed-off-by: Alexey Kardashevskiy Signed-off-by: Alex Williamson --- hw/misc/vfio.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'hw/misc') diff --git a/hw/misc/vfio.c b/hw/misc/vfio.c index 432547ce16..8a1f1a124d 100644 --- a/hw/misc/vfio.c +++ b/hw/misc/vfio.c @@ -2544,7 +2544,7 @@ static void vfio_map_bar(VFIODevice *vdev, int nr) * potentially insert a direct-mapped subregion before and after it. */ if (vdev->msix && vdev->msix->table_bar == nr) { - size = vdev->msix->table_offset & TARGET_PAGE_MASK; + size = vdev->msix->table_offset & qemu_host_page_mask; } strncat(name, " mmap", sizeof(name) - strlen(name) - 1); @@ -2556,8 +2556,8 @@ static void vfio_map_bar(VFIODevice *vdev, int nr) if (vdev->msix && vdev->msix->table_bar == nr) { unsigned start; - start = TARGET_PAGE_ALIGN(vdev->msix->table_offset + - (vdev->msix->entries * PCI_MSIX_ENTRY_SIZE)); + start = HOST_PAGE_ALIGN(vdev->msix->table_offset + + (vdev->msix->entries * PCI_MSIX_ENTRY_SIZE)); size = start < bar->size ? bar->size - start : 0; strncat(name, " msix-hi", sizeof(name) - strlen(name) - 1); -- cgit v1.2.1 From 8b6d14087d487203f4d1a67aeaddc3be6c73f49f Mon Sep 17 00:00:00 2001 From: Bandan Das Date: Tue, 28 Jan 2014 08:23:19 -0700 Subject: vfio: correct debug macro typo Change to DEBUG_VFIO in vfio_msi_interrupt() for debug messages to get printed Signed-off-by: Bandan Das Signed-off-by: Alex Williamson --- hw/misc/vfio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'hw/misc') diff --git a/hw/misc/vfio.c b/hw/misc/vfio.c index 8a1f1a124d..8db182fa3d 100644 --- a/hw/misc/vfio.c +++ b/hw/misc/vfio.c @@ -599,7 +599,7 @@ static void vfio_msi_interrupt(void *opaque) return; } -#ifdef VFIO_DEBUG +#ifdef DEBUG_VFIO MSIMessage msg; if (vdev->interrupt == VFIO_INT_MSIX) { -- cgit v1.2.1