summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2017-02-20 09:53:59 +0000
committerPeter Maydell <peter.maydell@linaro.org>2017-02-20 09:53:59 +0000
commitd514cfd763b271b4e97a9fc6adaabc8fd50084ab (patch)
tree146f1c9b310813894b79976e28a092b5cbf2ed12
parentad584d37f2a86b392c25f3f00cc1f1532676c2d1 (diff)
parent7e58326ad7e79b8c5dbcc6f24e9dc1523d84c11b (diff)
downloadqemu-d514cfd763b271b4e97a9fc6adaabc8fd50084ab.tar.gz
Merge remote-tracking branch 'remotes/mst/tags/for_upstream' into staging
virtio, pci: fixes, features virtio is using region caches for performance iommu support for IOTLBs misc fixes Signed-off-by: Michael S. Tsirkin <mst@redhat.com> # gpg: Signature made Fri 17 Feb 2017 19:53:02 GMT # gpg: using RSA key 0x281F0DB8D28D5469 # gpg: Good signature from "Michael S. Tsirkin <mst@kernel.org>" # gpg: aka "Michael S. Tsirkin <mst@redhat.com>" # Primary key fingerprint: 0270 606B 6F3C DF3D 0B17 0970 C350 3912 AFBE 8E67 # Subkey fingerprint: 5D09 FD08 71C8 F85B 94CA 8A0D 281F 0DB8 D28D 5469 * remotes/mst/tags/for_upstream: (23 commits) intel_iommu: vtd_slpt_level_shift check level intel_iommu: convert dbg macros to trace for trans intel_iommu: convert dbg macros to traces for inv intel_iommu: renaming gpa to iova where proper intel_iommu: simplify irq region translation intel_iommu: add "caching-mode" option vfio: allow to notify unmap for very large region vfio: introduce vfio_get_vaddr() vfio: trace map/unmap for notify as well pcie: simplify pcie_add_capability() virtio: Fix no interrupt when not creating msi controller virtio: use VRingMemoryRegionCaches for avail and used rings virtio: check for vring setup in virtio_queue_update_used_idx virtio: use VRingMemoryRegionCaches for descriptor ring virtio: add MemoryListener to cache ring translations virtio: use MemoryRegionCache to access descriptors exec: make address_space_cache_destroy idempotent virtio: use address_space_map/unmap to access descriptors virtio: add virtio_*_phys_cached memory: make memory_listener_unregister idempotent ... Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r--docs/nvdimm.txt124
-rw-r--r--exec.c1
-rw-r--r--hw/block/dataplane/virtio-blk.c4
-rw-r--r--hw/block/virtio-blk.c12
-rw-r--r--hw/i386/intel_iommu.c238
-rw-r--r--hw/i386/intel_iommu_internal.h1
-rw-r--r--hw/i386/trace-events28
-rw-r--r--hw/net/virtio-net.c14
-rw-r--r--hw/pci/pcie.c23
-rw-r--r--hw/scsi/virtio-scsi-dataplane.c14
-rw-r--r--hw/scsi/virtio-scsi.c14
-rw-r--r--hw/vfio/common.c65
-rw-r--r--hw/vfio/trace-events2
-rw-r--r--hw/virtio/virtio.c364
-rw-r--r--include/exec/memory.h2
-rw-r--r--include/hw/i386/intel_iommu.h2
-rw-r--r--include/hw/virtio/virtio-access.h52
-rw-r--r--include/hw/virtio/virtio-blk.h2
-rw-r--r--include/hw/virtio/virtio-scsi.h6
-rw-r--r--include/hw/virtio/virtio.h5
-rw-r--r--memory.c5
21 files changed, 702 insertions, 276 deletions
diff --git a/docs/nvdimm.txt b/docs/nvdimm.txt
new file mode 100644
index 0000000000..2d9f8c0e8c
--- /dev/null
+++ b/docs/nvdimm.txt
@@ -0,0 +1,124 @@
+QEMU Virtual NVDIMM
+===================
+
+This document explains the usage of virtual NVDIMM (vNVDIMM) feature
+which is available since QEMU v2.6.0.
+
+The current QEMU only implements the persistent memory mode of vNVDIMM
+device and not the block window mode.
+
+Basic Usage
+-----------
+
+The storage of a vNVDIMM device in QEMU is provided by the memory
+backend (i.e. memory-backend-file and memory-backend-ram). A simple
+way to create a vNVDIMM device at startup time is done via the
+following command line options:
+
+ -machine pc,nvdimm
+ -m $RAM_SIZE,slots=$N,maxmem=$MAX_SIZE
+ -object memory-backend-file,id=mem1,share=on,mem-path=$PATH,size=$NVDIMM_SIZE
+ -device nvdimm,id=nvdimm1,memdev=mem1
+
+Where,
+
+ - the "nvdimm" machine option enables vNVDIMM feature.
+
+ - "slots=$N" should be equal to or larger than the total amount of
+ normal RAM devices and vNVDIMM devices, e.g. $N should be >= 2 here.
+
+ - "maxmem=$MAX_SIZE" should be equal to or larger than the total size
+ of normal RAM devices and vNVDIMM devices, e.g. $MAX_SIZE should be
+ >= $RAM_SIZE + $NVDIMM_SIZE here.
+
+ - "object memory-backend-file,id=mem1,share=on,mem-path=$PATH,size=$NVDIMM_SIZE"
+ creates a backend storage of size $NVDIMM_SIZE on a file $PATH. All
+ accesses to the virtual NVDIMM device go to the file $PATH.
+
+ "share=on/off" controls the visibility of guest writes. If
+ "share=on", then guest writes will be applied to the backend
+ file. If another guest uses the same backend file with option
+ "share=on", then above writes will be visible to it as well. If
+ "share=off", then guest writes won't be applied to the backend
+ file and thus will be invisible to other guests.
+
+ - "device nvdimm,id=nvdimm1,memdev=mem1" creates a virtual NVDIMM
+ device whose storage is provided by above memory backend device.
+
+Multiple vNVDIMM devices can be created if multiple pairs of "-object"
+and "-device" are provided.
+
+For above command line options, if the guest OS has the proper NVDIMM
+driver, it should be able to detect a NVDIMM device which is in the
+persistent memory mode and whose size is $NVDIMM_SIZE.
+
+Note:
+
+1. Prior to QEMU v2.8.0, if memory-backend-file is used and the actual
+ backend file size is not equal to the size given by "size" option,
+ QEMU will truncate the backend file by ftruncate(2), which will
+ corrupt the existing data in the backend file, especially for the
+ shrink case.
+
+ QEMU v2.8.0 and later check the backend file size and the "size"
+ option. If they do not match, QEMU will report errors and abort in
+ order to avoid the data corruption.
+
+2. QEMU v2.6.0 only puts a basic alignment requirement on the "size"
+ option of memory-backend-file, e.g. 4KB alignment on x86. However,
+ QEMU v.2.7.0 puts an additional alignment requirement, which may
+ require a larger value than the basic one, e.g. 2MB on x86. This
+ change breaks the usage of memory-backend-file that only satisfies
+ the basic alignment.
+
+ QEMU v2.8.0 and later remove the additional alignment on non-s390x
+ architectures, so the broken memory-backend-file can work again.
+
+Label
+-----
+
+QEMU v2.7.0 and later implement the label support for vNVDIMM devices.
+To enable label on vNVDIMM devices, users can simply add
+"label-size=$SZ" option to "-device nvdimm", e.g.
+
+ -device nvdimm,id=nvdimm1,memdev=mem1,label-size=128K
+
+Note:
+
+1. The minimal label size is 128KB.
+
+2. QEMU v2.7.0 and later store labels at the end of backend storage.
+ If a memory backend file, which was previously used as the backend
+ of a vNVDIMM device without labels, is now used for a vNVDIMM
+ device with label, the data in the label area at the end of file
+ will be inaccessible to the guest. If any useful data (e.g. the
+ meta-data of the file system) was stored there, the latter usage
+ may result guest data corruption (e.g. breakage of guest file
+ system).
+
+Hotplug
+-------
+
+QEMU v2.8.0 and later implement the hotplug support for vNVDIMM
+devices. Similarly to the RAM hotplug, the vNVDIMM hotplug is
+accomplished by two monitor commands "object_add" and "device_add".
+
+For example, the following commands add another 4GB vNVDIMM device to
+the guest:
+
+ (qemu) object_add memory-backend-file,id=mem2,share=on,mem-path=new_nvdimm.img,size=4G
+ (qemu) device_add nvdimm,id=nvdimm2,memdev=mem2
+
+Note:
+
+1. Each hotplugged vNVDIMM device consumes one memory slot. Users
+ should always ensure the memory option "-m ...,slots=N" specifies
+ enough number of slots, i.e.
+ N >= number of RAM devices +
+ number of statically plugged vNVDIMM devices +
+ number of hotplugged vNVDIMM devices
+
+2. The similar is required for the memory option "-m ...,maxmem=M", i.e.
+ M >= size of RAM devices +
+ size of statically plugged vNVDIMM devices +
+ size of hotplugged vNVDIMM devices
diff --git a/exec.c b/exec.c
index 6fa337b8d8..865a1e8295 100644
--- a/exec.c
+++ b/exec.c
@@ -3166,6 +3166,7 @@ void address_space_cache_destroy(MemoryRegionCache *cache)
xen_invalidate_map_cache_entry(cache->ptr);
}
memory_region_unref(cache->mr);
+ cache->mr = NULL;
}
/* Called from RCU critical section. This function has the same
diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
index d1f9f63eaf..5556f0e64e 100644
--- a/hw/block/dataplane/virtio-blk.c
+++ b/hw/block/dataplane/virtio-blk.c
@@ -147,7 +147,7 @@ void virtio_blk_data_plane_destroy(VirtIOBlockDataPlane *s)
g_free(s);
}
-static void virtio_blk_data_plane_handle_output(VirtIODevice *vdev,
+static bool virtio_blk_data_plane_handle_output(VirtIODevice *vdev,
VirtQueue *vq)
{
VirtIOBlock *s = (VirtIOBlock *)vdev;
@@ -155,7 +155,7 @@ static void virtio_blk_data_plane_handle_output(VirtIODevice *vdev,
assert(s->dataplane);
assert(s->dataplane_started);
- virtio_blk_handle_vq(s, vq);
+ return virtio_blk_handle_vq(s, vq);
}
/* Context: QEMU global mutex held */
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index 702eda863e..baaa19593f 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -581,10 +581,11 @@ static int virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb)
return 0;
}
-void virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq)
+bool virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq)
{
VirtIOBlockReq *req;
MultiReqBuffer mrb = {};
+ bool progress = false;
blk_io_plug(s->blk);
@@ -592,6 +593,7 @@ void virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq)
virtio_queue_set_notification(vq, 0);
while ((req = virtio_blk_get_request(s, vq))) {
+ progress = true;
if (virtio_blk_handle_request(req, &mrb)) {
virtqueue_detach_element(req->vq, &req->elem, 0);
virtio_blk_free_request(req);
@@ -607,6 +609,12 @@ void virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq)
}
blk_io_unplug(s->blk);
+ return progress;
+}
+
+static void virtio_blk_handle_output_do(VirtIOBlock *s, VirtQueue *vq)
+{
+ virtio_blk_handle_vq(s, vq);
}
static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq)
@@ -622,7 +630,7 @@ static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq)
return;
}
}
- virtio_blk_handle_vq(s, vq);
+ virtio_blk_handle_output_do(s, vq);
}
static void virtio_blk_dma_restart_bh(void *opaque)
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 3270fb9162..22d8226e43 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -35,6 +35,7 @@
#include "sysemu/kvm.h"
#include "hw/i386/apic_internal.h"
#include "kvm_i386.h"
+#include "trace.h"
/*#define DEBUG_INTEL_IOMMU*/
#ifdef DEBUG_INTEL_IOMMU
@@ -167,6 +168,7 @@ static gboolean vtd_hash_remove_by_domain(gpointer key, gpointer value,
/* The shift of an addr for a certain level of paging structure */
static inline uint32_t vtd_slpt_level_shift(uint32_t level)
{
+ assert(level != 0);
return VTD_PAGE_SHIFT_4K + (level - 1) * VTD_SL_LEVEL_BITS;
}
@@ -259,11 +261,9 @@ static void vtd_update_iotlb(IntelIOMMUState *s, uint16_t source_id,
uint64_t *key = g_malloc(sizeof(*key));
uint64_t gfn = vtd_get_iotlb_gfn(addr, level);
- VTD_DPRINTF(CACHE, "update iotlb sid 0x%"PRIx16 " gpa 0x%"PRIx64
- " slpte 0x%"PRIx64 " did 0x%"PRIx16, source_id, addr, slpte,
- domain_id);
+ trace_vtd_iotlb_page_update(source_id, addr, slpte, domain_id);
if (g_hash_table_size(s->iotlb) >= VTD_IOTLB_MAX_SIZE) {
- VTD_DPRINTF(CACHE, "iotlb exceeds size limit, forced to reset");
+ trace_vtd_iotlb_reset("iotlb exceeds size limit");
vtd_reset_iotlb(s);
}
@@ -474,22 +474,19 @@ static void vtd_handle_inv_queue_error(IntelIOMMUState *s)
/* Set the IWC field and try to generate an invalidation completion interrupt */
static void vtd_generate_completion_event(IntelIOMMUState *s)
{
- VTD_DPRINTF(INV, "completes an invalidation wait command with "
- "Interrupt Flag");
if (vtd_get_long_raw(s, DMAR_ICS_REG) & VTD_ICS_IWC) {
- VTD_DPRINTF(INV, "there is a previous interrupt condition to be "
- "serviced by software, "
- "new invalidation event is not generated");
+ trace_vtd_inv_desc_wait_irq("One pending, skip current");
return;
}
vtd_set_clear_mask_long(s, DMAR_ICS_REG, 0, VTD_ICS_IWC);
vtd_set_clear_mask_long(s, DMAR_IECTL_REG, 0, VTD_IECTL_IP);
if (vtd_get_long_raw(s, DMAR_IECTL_REG) & VTD_IECTL_IM) {
- VTD_DPRINTF(INV, "IM filed in IECTL_REG is set, new invalidation "
- "event is not generated");
+ trace_vtd_inv_desc_wait_irq("IM in IECTL_REG is set, "
+ "new event not generated");
return;
} else {
/* Generate the interrupt event */
+ trace_vtd_inv_desc_wait_irq("Generating complete event");
vtd_generate_interrupt(s, DMAR_IEADDR_REG, DMAR_IEDATA_REG);
vtd_set_clear_mask_long(s, DMAR_IECTL_REG, VTD_IECTL_IP, 0);
}
@@ -507,8 +504,7 @@ static int vtd_get_root_entry(IntelIOMMUState *s, uint8_t index,
addr = s->root + index * sizeof(*re);
if (dma_memory_read(&address_space_memory, addr, re, sizeof(*re))) {
- VTD_DPRINTF(GENERAL, "error: fail to access root-entry at 0x%"PRIx64
- " + %"PRIu8, s->root, index);
+ trace_vtd_re_invalid(re->rsvd, re->val);
re->val = 0;
return -VTD_FR_ROOT_TABLE_INV;
}
@@ -526,15 +522,10 @@ static int vtd_get_context_entry_from_root(VTDRootEntry *root, uint8_t index,
{
dma_addr_t addr;
- if (!vtd_root_entry_present(root)) {
- VTD_DPRINTF(GENERAL, "error: root-entry is not present");
- return -VTD_FR_ROOT_ENTRY_P;
- }
+ /* we have checked that root entry is present */
addr = (root->val & VTD_ROOT_ENTRY_CTP) + index * sizeof(*ce);
if (dma_memory_read(&address_space_memory, addr, ce, sizeof(*ce))) {
- VTD_DPRINTF(GENERAL, "error: fail to access context-entry at 0x%"PRIx64
- " + %"PRIu8,
- (uint64_t)(root->val & VTD_ROOT_ENTRY_CTP), index);
+ trace_vtd_re_invalid(root->rsvd, root->val);
return -VTD_FR_CONTEXT_TABLE_INV;
}
ce->lo = le64_to_cpu(ce->lo);
@@ -575,12 +566,12 @@ static uint64_t vtd_get_slpte(dma_addr_t base_addr, uint32_t index)
return slpte;
}
-/* Given a gpa and the level of paging structure, return the offset of current
- * level.
+/* Given an iova and the level of paging structure, return the offset
+ * of current level.
*/
-static inline uint32_t vtd_gpa_level_offset(uint64_t gpa, uint32_t level)
+static inline uint32_t vtd_iova_level_offset(uint64_t iova, uint32_t level)
{
- return (gpa >> vtd_slpt_level_shift(level)) &
+ return (iova >> vtd_slpt_level_shift(level)) &
((1ULL << VTD_SL_LEVEL_BITS) - 1);
}
@@ -628,12 +619,12 @@ static bool vtd_slpte_nonzero_rsvd(uint64_t slpte, uint32_t level)
}
}
-/* Given the @gpa, get relevant @slptep. @slpte_level will be the last level
+/* Given the @iova, get relevant @slptep. @slpte_level will be the last level
* of the translation, can be used for deciding the size of large page.
*/
-static int vtd_gpa_to_slpte(VTDContextEntry *ce, uint64_t gpa, bool is_write,
- uint64_t *slptep, uint32_t *slpte_level,
- bool *reads, bool *writes)
+static int vtd_iova_to_slpte(VTDContextEntry *ce, uint64_t iova, bool is_write,
+ uint64_t *slptep, uint32_t *slpte_level,
+ bool *reads, bool *writes)
{
dma_addr_t addr = vtd_get_slpt_base_from_context(ce);
uint32_t level = vtd_get_level_from_context_entry(ce);
@@ -642,11 +633,11 @@ static int vtd_gpa_to_slpte(VTDContextEntry *ce, uint64_t gpa, bool is_write,
uint32_t ce_agaw = vtd_get_agaw_from_context_entry(ce);
uint64_t access_right_check;
- /* Check if @gpa is above 2^X-1, where X is the minimum of MGAW in CAP_REG
- * and AW in context-entry.
+ /* Check if @iova is above 2^X-1, where X is the minimum of MGAW
+ * in CAP_REG and AW in context-entry.
*/
- if (gpa & ~((1ULL << MIN(ce_agaw, VTD_MGAW)) - 1)) {
- VTD_DPRINTF(GENERAL, "error: gpa 0x%"PRIx64 " exceeds limits", gpa);
+ if (iova & ~((1ULL << MIN(ce_agaw, VTD_MGAW)) - 1)) {
+ VTD_DPRINTF(GENERAL, "error: iova 0x%"PRIx64 " exceeds limits", iova);
return -VTD_FR_ADDR_BEYOND_MGAW;
}
@@ -654,13 +645,13 @@ static int vtd_gpa_to_slpte(VTDContextEntry *ce, uint64_t gpa, bool is_write,
access_right_check = is_write ? VTD_SL_W : VTD_SL_R;
while (true) {
- offset = vtd_gpa_level_offset(gpa, level);
+ offset = vtd_iova_level_offset(iova, level);
slpte = vtd_get_slpte(addr, offset);
if (slpte == (uint64_t)-1) {
VTD_DPRINTF(GENERAL, "error: fail to access second-level paging "
- "entry at level %"PRIu32 " for gpa 0x%"PRIx64,
- level, gpa);
+ "entry at level %"PRIu32 " for iova 0x%"PRIx64,
+ level, iova);
if (level == vtd_get_level_from_context_entry(ce)) {
/* Invalid programming of context-entry */
return -VTD_FR_CONTEXT_ENTRY_INV;
@@ -672,8 +663,8 @@ static int vtd_gpa_to_slpte(VTDContextEntry *ce, uint64_t gpa, bool is_write,
*writes = (*writes) && (slpte & VTD_SL_W);
if (!(slpte & access_right_check)) {
VTD_DPRINTF(GENERAL, "error: lack of %s permission for "
- "gpa 0x%"PRIx64 " slpte 0x%"PRIx64,
- (is_write ? "write" : "read"), gpa, slpte);
+ "iova 0x%"PRIx64 " slpte 0x%"PRIx64,
+ (is_write ? "write" : "read"), iova, slpte);
return is_write ? -VTD_FR_WRITE : -VTD_FR_READ;
}
if (vtd_slpte_nonzero_rsvd(slpte, level)) {
@@ -706,12 +697,11 @@ static int vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t bus_num,
}
if (!vtd_root_entry_present(&re)) {
- VTD_DPRINTF(GENERAL, "error: root-entry #%"PRIu8 " is not present",
- bus_num);
+ /* Not error - it's okay we don't have root entry. */
+ trace_vtd_re_not_present(bus_num);
return -VTD_FR_ROOT_ENTRY_P;
} else if (re.rsvd || (re.val & VTD_ROOT_ENTRY_RSVD)) {
- VTD_DPRINTF(GENERAL, "error: non-zero reserved field in root-entry "
- "hi 0x%"PRIx64 " lo 0x%"PRIx64, re.rsvd, re.val);
+ trace_vtd_re_invalid(re.rsvd, re.val);
return -VTD_FR_ROOT_ENTRY_RSVD;
}
@@ -721,22 +711,17 @@ static int vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t bus_num,
}
if (!vtd_context_entry_present(ce)) {
- VTD_DPRINTF(GENERAL,
- "error: context-entry #%"PRIu8 "(bus #%"PRIu8 ") "
- "is not present", devfn, bus_num);
+ /* Not error - it's okay we don't have context entry. */
+ trace_vtd_ce_not_present(bus_num, devfn);
return -VTD_FR_CONTEXT_ENTRY_P;
} else if ((ce->hi & VTD_CONTEXT_ENTRY_RSVD_HI) ||
(ce->lo & VTD_CONTEXT_ENTRY_RSVD_LO)) {
- VTD_DPRINTF(GENERAL,
- "error: non-zero reserved field in context-entry "
- "hi 0x%"PRIx64 " lo 0x%"PRIx64, ce->hi, ce->lo);
+ trace_vtd_ce_invalid(ce->hi, ce->lo);
return -VTD_FR_CONTEXT_ENTRY_RSVD;
}
/* Check if the programming of context-entry is valid */
if (!vtd_is_level_supported(s, vtd_get_level_from_context_entry(ce))) {
- VTD_DPRINTF(GENERAL, "error: unsupported Address Width value in "
- "context-entry hi 0x%"PRIx64 " lo 0x%"PRIx64,
- ce->hi, ce->lo);
+ trace_vtd_ce_invalid(ce->hi, ce->lo);
return -VTD_FR_CONTEXT_ENTRY_INV;
} else {
switch (ce->lo & VTD_CONTEXT_ENTRY_TT) {
@@ -745,9 +730,7 @@ static int vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t bus_num,
case VTD_CONTEXT_TT_DEV_IOTLB:
break;
default:
- VTD_DPRINTF(GENERAL, "error: unsupported Translation Type in "
- "context-entry hi 0x%"PRIx64 " lo 0x%"PRIx64,
- ce->hi, ce->lo);
+ trace_vtd_ce_invalid(ce->hi, ce->lo);
return -VTD_FR_CONTEXT_ENTRY_INV;
}
}
@@ -818,34 +801,17 @@ static void vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
bool writes = true;
VTDIOTLBEntry *iotlb_entry;
- /* Check if the request is in interrupt address range */
- if (vtd_is_interrupt_addr(addr)) {
- if (is_write) {
- /* FIXME: since we don't know the length of the access here, we
- * treat Non-DWORD length write requests without PASID as
- * interrupt requests, too. Withoud interrupt remapping support,
- * we just use 1:1 mapping.
- */
- VTD_DPRINTF(MMU, "write request to interrupt address "
- "gpa 0x%"PRIx64, addr);
- entry->iova = addr & VTD_PAGE_MASK_4K;
- entry->translated_addr = addr & VTD_PAGE_MASK_4K;
- entry->addr_mask = ~VTD_PAGE_MASK_4K;
- entry->perm = IOMMU_WO;
- return;
- } else {
- VTD_DPRINTF(GENERAL, "error: read request from interrupt address "
- "gpa 0x%"PRIx64, addr);
- vtd_report_dmar_fault(s, source_id, addr, VTD_FR_READ, is_write);
- return;
- }
- }
+ /*
+ * We have standalone memory region for interrupt addresses, we
+ * should never receive translation requests in this region.
+ */
+ assert(!vtd_is_interrupt_addr(addr));
+
/* Try to fetch slpte form IOTLB */
iotlb_entry = vtd_lookup_iotlb(s, source_id, addr);
if (iotlb_entry) {
- VTD_DPRINTF(CACHE, "hit iotlb sid 0x%"PRIx16 " gpa 0x%"PRIx64
- " slpte 0x%"PRIx64 " did 0x%"PRIx16, source_id, addr,
- iotlb_entry->slpte, iotlb_entry->domain_id);
+ trace_vtd_iotlb_page_hit(source_id, addr, iotlb_entry->slpte,
+ iotlb_entry->domain_id);
slpte = iotlb_entry->slpte;
reads = iotlb_entry->read_flags;
writes = iotlb_entry->write_flags;
@@ -854,10 +820,9 @@ static void vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
}
/* Try to fetch context-entry from cache first */
if (cc_entry->context_cache_gen == s->context_cache_gen) {
- VTD_DPRINTF(CACHE, "hit context-cache bus %d devfn %d "
- "(hi %"PRIx64 " lo %"PRIx64 " gen %"PRIu32 ")",
- bus_num, devfn, cc_entry->context_entry.hi,
- cc_entry->context_entry.lo, cc_entry->context_cache_gen);
+ trace_vtd_iotlb_cc_hit(bus_num, devfn, cc_entry->context_entry.hi,
+ cc_entry->context_entry.lo,
+ cc_entry->context_cache_gen);
ce = cc_entry->context_entry;
is_fpd_set = ce.lo & VTD_CONTEXT_ENTRY_FPD;
} else {
@@ -866,30 +831,26 @@ static void vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
if (ret_fr) {
ret_fr = -ret_fr;
if (is_fpd_set && vtd_is_qualified_fault(ret_fr)) {
- VTD_DPRINTF(FLOG, "fault processing is disabled for DMA "
- "requests through this context-entry "
- "(with FPD Set)");
+ trace_vtd_fault_disabled();
} else {
vtd_report_dmar_fault(s, source_id, addr, ret_fr, is_write);
}
return;
}
/* Update context-cache */
- VTD_DPRINTF(CACHE, "update context-cache bus %d devfn %d "
- "(hi %"PRIx64 " lo %"PRIx64 " gen %"PRIu32 "->%"PRIu32 ")",
- bus_num, devfn, ce.hi, ce.lo,
- cc_entry->context_cache_gen, s->context_cache_gen);
+ trace_vtd_iotlb_cc_update(bus_num, devfn, ce.hi, ce.lo,
+ cc_entry->context_cache_gen,
+ s->context_cache_gen);
cc_entry->context_entry = ce;
cc_entry->context_cache_gen = s->context_cache_gen;
}
- ret_fr = vtd_gpa_to_slpte(&ce, addr, is_write, &slpte, &level,
- &reads, &writes);
+ ret_fr = vtd_iova_to_slpte(&ce, addr, is_write, &slpte, &level,
+ &reads, &writes);
if (ret_fr) {
ret_fr = -ret_fr;
if (is_fpd_set && vtd_is_qualified_fault(ret_fr)) {
- VTD_DPRINTF(FLOG, "fault processing is disabled for DMA requests "
- "through this context-entry (with FPD Set)");
+ trace_vtd_fault_disabled();
} else {
vtd_report_dmar_fault(s, source_id, addr, ret_fr, is_write);
}
@@ -939,6 +900,7 @@ static void vtd_interrupt_remap_table_setup(IntelIOMMUState *s)
static void vtd_context_global_invalidate(IntelIOMMUState *s)
{
+ trace_vtd_inv_desc_cc_global();
s->context_cache_gen++;
if (s->context_cache_gen == VTD_CONTEXT_CACHE_GEN_MAX) {
vtd_reset_context_cache(s);
@@ -978,9 +940,11 @@ static void vtd_context_device_invalidate(IntelIOMMUState *s,
uint16_t mask;
VTDBus *vtd_bus;
VTDAddressSpace *vtd_as;
- uint16_t devfn;
+ uint8_t bus_n, devfn;
uint16_t devfn_it;
+ trace_vtd_inv_desc_cc_devices(source_id, func_mask);
+
switch (func_mask & 3) {
case 0:
mask = 0; /* No bits in the SID field masked */
@@ -996,16 +960,16 @@ static void vtd_context_device_invalidate(IntelIOMMUState *s,
break;
}
mask = ~mask;
- VTD_DPRINTF(INV, "device-selective invalidation source 0x%"PRIx16
- " mask %"PRIu16, source_id, mask);
- vtd_bus = vtd_find_as_from_bus_num(s, VTD_SID_TO_BUS(source_id));
+
+ bus_n = VTD_SID_TO_BUS(source_id);
+ vtd_bus = vtd_find_as_from_bus_num(s, bus_n);
if (vtd_bus) {
devfn = VTD_SID_TO_DEVFN(source_id);
for (devfn_it = 0; devfn_it < X86_IOMMU_PCI_DEVFN_MAX; ++devfn_it) {
vtd_as = vtd_bus->dev_as[devfn_it];
if (vtd_as && ((devfn_it & mask) == (devfn & mask))) {
- VTD_DPRINTF(INV, "invalidate context-cahce of devfn 0x%"PRIx16,
- devfn_it);
+ trace_vtd_inv_desc_cc_device(bus_n, VTD_PCI_SLOT(devfn_it),
+ VTD_PCI_FUNC(devfn_it));
vtd_as->context_cache_entry.context_cache_gen = 0;
}
}
@@ -1046,6 +1010,7 @@ static uint64_t vtd_context_cache_invalidate(IntelIOMMUState *s, uint64_t val)
static void vtd_iotlb_global_invalidate(IntelIOMMUState *s)
{
+ trace_vtd_iotlb_reset("global invalidation recved");
vtd_reset_iotlb(s);
}
@@ -1318,9 +1283,7 @@ static bool vtd_process_wait_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc)
{
if ((inv_desc->hi & VTD_INV_DESC_WAIT_RSVD_HI) ||
(inv_desc->lo & VTD_INV_DESC_WAIT_RSVD_LO)) {
- VTD_DPRINTF(GENERAL, "error: non-zero reserved field in Invalidation "
- "Wait Descriptor hi 0x%"PRIx64 " lo 0x%"PRIx64,
- inv_desc->hi, inv_desc->lo);
+ trace_vtd_inv_desc_wait_invalid(inv_desc->hi, inv_desc->lo);
return false;
}
if (inv_desc->lo & VTD_INV_DESC_WAIT_SW) {
@@ -1332,21 +1295,18 @@ static bool vtd_process_wait_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc)
/* FIXME: need to be masked with HAW? */
dma_addr_t status_addr = inv_desc->hi;
- VTD_DPRINTF(INV, "status data 0x%x, status addr 0x%"PRIx64,
- status_data, status_addr);
+ trace_vtd_inv_desc_wait_sw(status_addr, status_data);
status_data = cpu_to_le32(status_data);
if (dma_memory_write(&address_space_memory, status_addr, &status_data,
sizeof(status_data))) {
- VTD_DPRINTF(GENERAL, "error: fail to perform a coherent write");
+ trace_vtd_inv_desc_wait_write_fail(inv_desc->hi, inv_desc->lo);
return false;
}
} else if (inv_desc->lo & VTD_INV_DESC_WAIT_IF) {
/* Interrupt flag */
- VTD_DPRINTF(INV, "Invalidation Wait Descriptor interrupt completion");
vtd_generate_completion_event(s);
} else {
- VTD_DPRINTF(GENERAL, "error: invalid Invalidation Wait Descriptor: "
- "hi 0x%"PRIx64 " lo 0x%"PRIx64, inv_desc->hi, inv_desc->lo);
+ trace_vtd_inv_desc_wait_invalid(inv_desc->hi, inv_desc->lo);
return false;
}
return true;
@@ -1355,30 +1315,29 @@ static bool vtd_process_wait_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc)
static bool vtd_process_context_cache_desc(IntelIOMMUState *s,
VTDInvDesc *inv_desc)
{
+ uint16_t sid, fmask;
+
if ((inv_desc->lo & VTD_INV_DESC_CC_RSVD) || inv_desc->hi) {
- VTD_DPRINTF(GENERAL, "error: non-zero reserved field in Context-cache "
- "Invalidate Descriptor");
+ trace_vtd_inv_desc_cc_invalid(inv_desc->hi, inv_desc->lo);
return false;
}
switch (inv_desc->lo & VTD_INV_DESC_CC_G) {
case VTD_INV_DESC_CC_DOMAIN:
- VTD_DPRINTF(INV, "domain-selective invalidation domain 0x%"PRIx16,
- (uint16_t)VTD_INV_DESC_CC_DID(inv_desc->lo));
+ trace_vtd_inv_desc_cc_domain(
+ (uint16_t)VTD_INV_DESC_CC_DID(inv_desc->lo));
/* Fall through */
case VTD_INV_DESC_CC_GLOBAL:
- VTD_DPRINTF(INV, "global invalidation");
vtd_context_global_invalidate(s);
break;
case VTD_INV_DESC_CC_DEVICE:
- vtd_context_device_invalidate(s, VTD_INV_DESC_CC_SID(inv_desc->lo),
- VTD_INV_DESC_CC_FM(inv_desc->lo));
+ sid = VTD_INV_DESC_CC_SID(inv_desc->lo);
+ fmask = VTD_INV_DESC_CC_FM(inv_desc->lo);
+ vtd_context_device_invalidate(s, sid, fmask);
break;
default:
- VTD_DPRINTF(GENERAL, "error: invalid granularity in Context-cache "
- "Invalidate Descriptor hi 0x%"PRIx64 " lo 0x%"PRIx64,
- inv_desc->hi, inv_desc->lo);
+ trace_vtd_inv_desc_cc_invalid(inv_desc->hi, inv_desc->lo);
return false;
}
return true;
@@ -1392,22 +1351,19 @@ static bool vtd_process_iotlb_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc)
if ((inv_desc->lo & VTD_INV_DESC_IOTLB_RSVD_LO) ||
(inv_desc->hi & VTD_INV_DESC_IOTLB_RSVD_HI)) {
- VTD_DPRINTF(GENERAL, "error: non-zero reserved field in IOTLB "
- "Invalidate Descriptor hi 0x%"PRIx64 " lo 0x%"PRIx64,
- inv_desc->hi, inv_desc->lo);
+ trace_vtd_inv_desc_iotlb_invalid(inv_desc->hi, inv_desc->lo);
return false;
}
switch (inv_desc->lo & VTD_INV_DESC_IOTLB_G) {
case VTD_INV_DESC_IOTLB_GLOBAL:
- VTD_DPRINTF(INV, "global invalidation");
+ trace_vtd_inv_desc_iotlb_global();
vtd_iotlb_global_invalidate(s);
break;
case VTD_INV_DESC_IOTLB_DOMAIN:
domain_id = VTD_INV_DESC_IOTLB_DID(inv_desc->lo);
- VTD_DPRINTF(INV, "domain-selective invalidation domain 0x%"PRIx16,
- domain_id);
+ trace_vtd_inv_desc_iotlb_domain(domain_id);
vtd_iotlb_domain_invalidate(s, domain_id);
break;
@@ -1415,20 +1371,16 @@ static bool vtd_process_iotlb_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc)
domain_id = VTD_INV_DESC_IOTLB_DID(inv_desc->lo);
addr = VTD_INV_DESC_IOTLB_ADDR(inv_desc->hi);
am = VTD_INV_DESC_IOTLB_AM(inv_desc->hi);
- VTD_DPRINTF(INV, "page-selective invalidation domain 0x%"PRIx16
- " addr 0x%"PRIx64 " mask %"PRIu8, domain_id, addr, am);
+ trace_vtd_inv_desc_iotlb_pages(domain_id, addr, am);
if (am > VTD_MAMV) {
- VTD_DPRINTF(GENERAL, "error: supported max address mask value is "
- "%"PRIu8, (uint8_t)VTD_MAMV);
+ trace_vtd_inv_desc_iotlb_invalid(inv_desc->hi, inv_desc->lo);
return false;
}
vtd_iotlb_page_invalidate(s, domain_id, addr, am);
break;
default:
- VTD_DPRINTF(GENERAL, "error: invalid granularity in IOTLB Invalidate "
- "Descriptor hi 0x%"PRIx64 " lo 0x%"PRIx64,
- inv_desc->hi, inv_desc->lo);
+ trace_vtd_inv_desc_iotlb_invalid(inv_desc->hi, inv_desc->lo);
return false;
}
return true;
@@ -1527,33 +1479,28 @@ static bool vtd_process_inv_desc(IntelIOMMUState *s)
switch (desc_type) {
case VTD_INV_DESC_CC:
- VTD_DPRINTF(INV, "Context-cache Invalidate Descriptor hi 0x%"PRIx64
- " lo 0x%"PRIx64, inv_desc.hi, inv_desc.lo);
+ trace_vtd_inv_desc("context-cache", inv_desc.hi, inv_desc.lo);
if (!vtd_process_context_cache_desc(s, &inv_desc)) {
return false;
}
break;
case VTD_INV_DESC_IOTLB:
- VTD_DPRINTF(INV, "IOTLB Invalidate Descriptor hi 0x%"PRIx64
- " lo 0x%"PRIx64, inv_desc.hi, inv_desc.lo);
+ trace_vtd_inv_desc("iotlb", inv_desc.hi, inv_desc.lo);
if (!vtd_process_iotlb_desc(s, &inv_desc)) {
return false;
}
break;
case VTD_INV_DESC_WAIT:
- VTD_DPRINTF(INV, "Invalidation Wait Descriptor hi 0x%"PRIx64
- " lo 0x%"PRIx64, inv_desc.hi, inv_desc.lo);
+ trace_vtd_inv_desc("wait", inv_desc.hi, inv_desc.lo);
if (!vtd_process_wait_desc(s, &inv_desc)) {
return false;
}
break;
case VTD_INV_DESC_IEC:
- VTD_DPRINTF(INV, "Invalidation Interrupt Entry Cache "
- "Descriptor hi 0x%"PRIx64 " lo 0x%"PRIx64,
- inv_desc.hi, inv_desc.lo);
+ trace_vtd_inv_desc("iec", inv_desc.hi, inv_desc.lo);
if (!vtd_process_inv_iec_desc(s, &inv_desc)) {
return false;
}
@@ -1568,9 +1515,7 @@ static bool vtd_process_inv_desc(IntelIOMMUState *s)
break;
default:
- VTD_DPRINTF(GENERAL, "error: unkonw Invalidation Descriptor type "
- "hi 0x%"PRIx64 " lo 0x%"PRIx64 " type %"PRIu8,
- inv_desc.hi, inv_desc.lo, desc_type);
+ trace_vtd_inv_desc_invalid(inv_desc.hi, inv_desc.lo);
return false;
}
s->iq_head++;
@@ -2049,7 +1994,7 @@ static IOMMUTLBEntry vtd_iommu_translate(MemoryRegion *iommu, hwaddr addr,
is_write, &ret);
VTD_DPRINTF(MMU,
"bus %"PRIu8 " slot %"PRIu8 " func %"PRIu8 " devfn %"PRIu8
- " gpa 0x%"PRIx64 " hpa 0x%"PRIx64, pci_bus_num(vtd_as->bus),
+ " iova 0x%"PRIx64 " hpa 0x%"PRIx64, pci_bus_num(vtd_as->bus),
VTD_PCI_SLOT(vtd_as->devfn), VTD_PCI_FUNC(vtd_as->devfn),
vtd_as->devfn, addr, ret.translated_addr);
return ret;
@@ -2115,6 +2060,7 @@ static Property vtd_properties[] = {
DEFINE_PROP_ON_OFF_AUTO("eim", IntelIOMMUState, intr_eim,
ON_OFF_AUTO_AUTO),
DEFINE_PROP_BOOL("x-buggy-eim", IntelIOMMUState, buggy_eim, false),
+ DEFINE_PROP_BOOL("caching-mode", IntelIOMMUState, caching_mode, FALSE),
DEFINE_PROP_END_OF_LIST(),
};
@@ -2496,6 +2442,10 @@ static void vtd_init(IntelIOMMUState *s)
s->ecap |= VTD_ECAP_DT;
}
+ if (s->caching_mode) {
+ s->cap |= VTD_CAP_CM;
+ }
+
vtd_reset_context_cache(s);
vtd_reset_iotlb(s);
diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index 356f188b73..41041219ba 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -202,6 +202,7 @@
#define VTD_CAP_MAMV (VTD_MAMV << 48)
#define VTD_CAP_PSI (1ULL << 39)
#define VTD_CAP_SLLPS ((1ULL << 34) | (1ULL << 35))
+#define VTD_CAP_CM (1ULL << 7)
/* Supported Adjusted Guest Address Widths */
#define VTD_CAP_SAGAW_SHIFT 8
diff --git a/hw/i386/trace-events b/hw/i386/trace-events
index 1cc4a10a07..88ad5e4c43 100644
--- a/hw/i386/trace-events
+++ b/hw/i386/trace-events
@@ -3,6 +3,34 @@
# hw/i386/x86-iommu.c
x86_iommu_iec_notify(bool global, uint32_t index, uint32_t mask) "Notify IEC invalidation: global=%d index=%" PRIu32 " mask=%" PRIu32
+# hw/i386/intel_iommu.c
+vtd_switch_address_space(uint8_t bus, uint8_t slot, uint8_t fn, bool on) "Device %02x:%02x.%x switching address space (iommu enabled=%d)"
+vtd_inv_desc(const char *type, uint64_t hi, uint64_t lo) "invalidate desc type %s high 0x%"PRIx64" low 0x%"PRIx64
+vtd_inv_desc_invalid(uint64_t hi, uint64_t lo) "invalid inv desc hi 0x%"PRIx64" lo 0x%"PRIx64
+vtd_inv_desc_cc_domain(uint16_t domain) "context invalidate domain 0x%"PRIx16
+vtd_inv_desc_cc_global(void) "context invalidate globally"
+vtd_inv_desc_cc_device(uint8_t bus, uint8_t dev, uint8_t fn) "context invalidate device %02"PRIx8":%02"PRIx8".%02"PRIx8
+vtd_inv_desc_cc_devices(uint16_t sid, uint16_t fmask) "context invalidate devices sid 0x%"PRIx16" fmask 0x%"PRIx16
+vtd_inv_desc_cc_invalid(uint64_t hi, uint64_t lo) "invalid context-cache desc hi 0x%"PRIx64" lo 0x%"PRIx64
+vtd_inv_desc_iotlb_global(void) "iotlb invalidate global"
+vtd_inv_desc_iotlb_domain(uint16_t domain) "iotlb invalidate whole domain 0x%"PRIx16
+vtd_inv_desc_iotlb_pages(uint16_t domain, uint64_t addr, uint8_t mask) "iotlb invalidate domain 0x%"PRIx16" addr 0x%"PRIx64" mask 0x%"PRIx8
+vtd_inv_desc_iotlb_invalid(uint64_t hi, uint64_t lo) "invalid iotlb desc hi 0x%"PRIx64" lo 0x%"PRIx64
+vtd_inv_desc_wait_sw(uint64_t addr, uint32_t data) "wait invalidate status write addr 0x%"PRIx64" data 0x%"PRIx32
+vtd_inv_desc_wait_irq(const char *msg) "%s"
+vtd_inv_desc_wait_invalid(uint64_t hi, uint64_t lo) "invalid wait desc hi 0x%"PRIx64" lo 0x%"PRIx64
+vtd_inv_desc_wait_write_fail(uint64_t hi, uint64_t lo) "write fail for wait desc hi 0x%"PRIx64" lo 0x%"PRIx64
+vtd_re_not_present(uint8_t bus) "Root entry bus %"PRIu8" not present"
+vtd_re_invalid(uint64_t hi, uint64_t lo) "invalid root entry hi 0x%"PRIx64" lo 0x%"PRIx64
+vtd_ce_not_present(uint8_t bus, uint8_t devfn) "Context entry bus %"PRIu8" devfn %"PRIu8" not present"
+vtd_ce_invalid(uint64_t hi, uint64_t lo) "invalid context entry hi 0x%"PRIx64" lo 0x%"PRIx64
+vtd_iotlb_page_hit(uint16_t sid, uint64_t addr, uint64_t slpte, uint16_t domain) "IOTLB page hit sid 0x%"PRIx16" iova 0x%"PRIx64" slpte 0x%"PRIx64" domain 0x%"PRIx16
+vtd_iotlb_page_update(uint16_t sid, uint64_t addr, uint64_t slpte, uint16_t domain) "IOTLB page update sid 0x%"PRIx16" iova 0x%"PRIx64" slpte 0x%"PRIx64" domain 0x%"PRIx16
+vtd_iotlb_cc_hit(uint8_t bus, uint8_t devfn, uint64_t high, uint64_t low, uint32_t gen) "IOTLB context hit bus 0x%"PRIx8" devfn 0x%"PRIx8" high 0x%"PRIx64" low 0x%"PRIx64" gen %"PRIu32
+vtd_iotlb_cc_update(uint8_t bus, uint8_t devfn, uint64_t high, uint64_t low, uint32_t gen1, uint32_t gen2) "IOTLB context update bus 0x%"PRIx8" devfn 0x%"PRIx8" high 0x%"PRIx64" low 0x%"PRIx64" gen %"PRIu32" -> gen %"PRIu32
+vtd_iotlb_reset(const char *reason) "IOTLB reset (reason: %s)"
+vtd_fault_disabled(void) "Fault processing disabled for context entry"
+
# hw/i386/amd_iommu.c
amdvi_evntlog_fail(uint64_t addr, uint32_t head) "error: fail to write at addr 0x%"PRIx64" + offset 0x%"PRIx32
amdvi_cache_update(uint16_t domid, uint8_t bus, uint8_t slot, uint8_t func, uint64_t gpa, uint64_t txaddr) " update iotlb domid 0x%"PRIx16" devid: %02x:%02x.%x gpa 0x%"PRIx64" hpa 0x%"PRIx64
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index 354a19eab8..c32168077a 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -1130,7 +1130,8 @@ static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
return 0;
}
-static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf, size_t size)
+static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf,
+ size_t size)
{
VirtIONet *n = qemu_get_nic_opaque(nc);
VirtIONetQueue *q = virtio_net_get_subqueue(nc);
@@ -1233,6 +1234,17 @@ static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf, size_t
return size;
}
+static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf,
+ size_t size)
+{
+ ssize_t r;
+
+ rcu_read_lock();
+ r = virtio_net_receive_rcu(nc, buf, size);
+ rcu_read_unlock();
+ return r;
+}
+
static int32_t virtio_net_flush_tx(VirtIONetQueue *q);
static void virtio_net_tx_complete(NetClientState *nc, ssize_t len)
diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c
index cbd4bb4f8c..fc54bfd53d 100644
--- a/hw/pci/pcie.c
+++ b/hw/pci/pcie.c
@@ -610,7 +610,8 @@ bool pcie_cap_is_arifwd_enabled(const PCIDevice *dev)
* uint16_t ext_cap_size
*/
-static uint16_t pcie_find_capability_list(PCIDevice *dev, uint16_t cap_id,
+/* Passing a cap_id value > 0xffff will return 0 and put end of list in prev */
+static uint16_t pcie_find_capability_list(PCIDevice *dev, uint32_t cap_id,
uint16_t *prev_p)
{
uint16_t prev = 0;
@@ -664,30 +665,24 @@ void pcie_add_capability(PCIDevice *dev,
uint16_t cap_id, uint8_t cap_ver,
uint16_t offset, uint16_t size)
{
- uint32_t header;
- uint16_t next;
-
assert(offset >= PCI_CONFIG_SPACE_SIZE);
assert(offset < offset + size);
assert(offset + size <= PCIE_CONFIG_SPACE_SIZE);
assert(size >= 8);
assert(pci_is_express(dev));
- if (offset == PCI_CONFIG_SPACE_SIZE) {
- header = pci_get_long(dev->config + offset);
- next = PCI_EXT_CAP_NEXT(header);
- } else {
+ if (offset != PCI_CONFIG_SPACE_SIZE) {
uint16_t prev;
- /* 0 is reserved cap id. use internally to find the last capability
- in the linked list */
- next = pcie_find_capability_list(dev, 0, &prev);
-
+ /*
+ * 0xffffffff is not a valid cap id (it's a 16 bit field). use
+ * internally to find the last capability in the linked list.
+ */
+ pcie_find_capability_list(dev, 0xffffffff, &prev);
assert(prev >= PCI_CONFIG_SPACE_SIZE);
- assert(next == 0);
pcie_ext_cap_set_next(dev, prev, offset);
}
- pci_set_long(dev->config + offset, PCI_EXT_CAP(cap_id, cap_ver, next));
+ pci_set_long(dev->config + offset, PCI_EXT_CAP(cap_id, cap_ver, 0));
/* Make capability read-only by default */
memset(dev->wmask + offset, 0, size);
diff --git a/hw/scsi/virtio-scsi-dataplane.c b/hw/scsi/virtio-scsi-dataplane.c
index 6b8d0f0024..74c95e0e60 100644
--- a/hw/scsi/virtio-scsi-dataplane.c
+++ b/hw/scsi/virtio-scsi-dataplane.c
@@ -49,35 +49,35 @@ void virtio_scsi_dataplane_setup(VirtIOSCSI *s, Error **errp)
}
}
-static void virtio_scsi_data_plane_handle_cmd(VirtIODevice *vdev,
+static bool virtio_scsi_data_plane_handle_cmd(VirtIODevice *vdev,
VirtQueue *vq)
{
VirtIOSCSI *s = (VirtIOSCSI *)vdev;
assert(s->ctx && s->dataplane_started);
- virtio_scsi_handle_cmd_vq(s, vq);
+ return virtio_scsi_handle_cmd_vq(s, vq);
}
-static void virtio_scsi_data_plane_handle_ctrl(VirtIODevice *vdev,
+static bool virtio_scsi_data_plane_handle_ctrl(VirtIODevice *vdev,
VirtQueue *vq)
{
VirtIOSCSI *s = VIRTIO_SCSI(vdev);
assert(s->ctx && s->dataplane_started);
- virtio_scsi_handle_ctrl_vq(s, vq);
+ return virtio_scsi_handle_ctrl_vq(s, vq);
}
-static void virtio_scsi_data_plane_handle_event(VirtIODevice *vdev,
+static bool virtio_scsi_data_plane_handle_event(VirtIODevice *vdev,
VirtQueue *vq)
{
VirtIOSCSI *s = VIRTIO_SCSI(vdev);
assert(s->ctx && s->dataplane_started);
- virtio_scsi_handle_event_vq(s, vq);
+ return virtio_scsi_handle_event_vq(s, vq);
}
static int virtio_scsi_vring_init(VirtIOSCSI *s, VirtQueue *vq, int n,
- void (*fn)(VirtIODevice *vdev, VirtQueue *vq))
+ VirtIOHandleAIOOutput fn)
{
BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(s)));
int rc;
diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c
index ce19efffc8..b01030b745 100644
--- a/hw/scsi/virtio-scsi.c
+++ b/hw/scsi/virtio-scsi.c
@@ -436,13 +436,16 @@ static inline void virtio_scsi_release(VirtIOSCSI *s)
}
}
-void virtio_scsi_handle_ctrl_vq(VirtIOSCSI *s, VirtQueue *vq)
+bool virtio_scsi_handle_ctrl_vq(VirtIOSCSI *s, VirtQueue *vq)
{
VirtIOSCSIReq *req;
+ bool progress = false;
while ((req = virtio_scsi_pop_req(s, vq))) {
+ progress = true;
virtio_scsi_handle_ctrl_req(s, req);
}
+ return progress;
}
static void virtio_scsi_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
@@ -591,10 +594,11 @@ static void virtio_scsi_handle_cmd_req_submit(VirtIOSCSI *s, VirtIOSCSIReq *req)
scsi_req_unref(sreq);
}
-void virtio_scsi_handle_cmd_vq(VirtIOSCSI *s, VirtQueue *vq)
+bool virtio_scsi_handle_cmd_vq(VirtIOSCSI *s, VirtQueue *vq)
{
VirtIOSCSIReq *req, *next;
int ret = 0;
+ bool progress = false;
QTAILQ_HEAD(, VirtIOSCSIReq) reqs = QTAILQ_HEAD_INITIALIZER(reqs);
@@ -602,6 +606,7 @@ void virtio_scsi_handle_cmd_vq(VirtIOSCSI *s, VirtQueue *vq)
virtio_queue_set_notification(vq, 0);
while ((req = virtio_scsi_pop_req(s, vq))) {
+ progress = true;
ret = virtio_scsi_handle_cmd_req_prepare(s, req);
if (!ret) {
QTAILQ_INSERT_TAIL(&reqs, req, next);
@@ -624,6 +629,7 @@ void virtio_scsi_handle_cmd_vq(VirtIOSCSI *s, VirtQueue *vq)
QTAILQ_FOREACH_SAFE(req, &reqs, next, next) {
virtio_scsi_handle_cmd_req_submit(s, req);
}
+ return progress;
}
static void virtio_scsi_handle_cmd(VirtIODevice *vdev, VirtQueue *vq)
@@ -752,11 +758,13 @@ out:
virtio_scsi_release(s);
}
-void virtio_scsi_handle_event_vq(VirtIOSCSI *s, VirtQueue *vq)
+bool virtio_scsi_handle_event_vq(VirtIOSCSI *s, VirtQueue *vq)
{
if (s->events_dropped) {
virtio_scsi_push_event(s, NULL, VIRTIO_SCSI_T_NO_EVENT, 0);
+ return true;
}
+ return false;
}
static void virtio_scsi_handle_event(VirtIODevice *vdev, VirtQueue *vq)
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 801578b4b9..f3ba9b9007 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -294,53 +294,78 @@ static bool vfio_listener_skipped_section(MemoryRegionSection *section)
section->offset_within_address_space & (1ULL << 63);
}
-static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
+/* Called with rcu_read_lock held. */
+static bool vfio_get_vaddr(IOMMUTLBEntry *iotlb, void **vaddr,
+ bool *read_only)
{
- VFIOGuestIOMMU *giommu = container_of(n, VFIOGuestIOMMU, n);
- VFIOContainer *container = giommu->container;
- hwaddr iova = iotlb->iova + giommu->iommu_offset;
MemoryRegion *mr;
hwaddr xlat;
hwaddr len = iotlb->addr_mask + 1;
- void *vaddr;
- int ret;
-
- trace_vfio_iommu_map_notify(iova, iova + iotlb->addr_mask);
-
- if (iotlb->target_as != &address_space_memory) {
- error_report("Wrong target AS \"%s\", only system memory is allowed",
- iotlb->target_as->name ? iotlb->target_as->name : "none");
- return;
- }
+ bool writable = iotlb->perm & IOMMU_WO;
/*
* The IOMMU TLB entry we have just covers translation through
* this IOMMU to its immediate target. We need to translate
* it the rest of the way through to memory.
*/
- rcu_read_lock();
mr = address_space_translate(&address_space_memory,
iotlb->translated_addr,
- &xlat, &len, iotlb->perm & IOMMU_WO);
+ &xlat, &len, writable);
if (!memory_region_is_ram(mr)) {
error_report("iommu map to non memory area %"HWADDR_PRIx"",
xlat);
- goto out;
+ return false;
}
+
/*
* Translation truncates length to the IOMMU page size,
* check that it did not truncate too much.
*/
if (len & iotlb->addr_mask) {
error_report("iommu has granularity incompatible with target AS");
- goto out;
+ return false;
}
+ *vaddr = memory_region_get_ram_ptr(mr) + xlat;
+ *read_only = !writable || mr->readonly;
+
+ return true;
+}
+
+static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
+{
+ VFIOGuestIOMMU *giommu = container_of(n, VFIOGuestIOMMU, n);
+ VFIOContainer *container = giommu->container;
+ hwaddr iova = iotlb->iova + giommu->iommu_offset;
+ bool read_only;
+ void *vaddr;
+ int ret;
+
+ trace_vfio_iommu_map_notify(iotlb->perm == IOMMU_NONE ? "UNMAP" : "MAP",
+ iova, iova + iotlb->addr_mask);
+
+ if (iotlb->target_as != &address_space_memory) {
+ error_report("Wrong target AS \"%s\", only system memory is allowed",
+ iotlb->target_as->name ? iotlb->target_as->name : "none");
+ return;
+ }
+
+ rcu_read_lock();
+
if ((iotlb->perm & IOMMU_RW) != IOMMU_NONE) {
- vaddr = memory_region_get_ram_ptr(mr) + xlat;
+ if (!vfio_get_vaddr(iotlb, &vaddr, &read_only)) {
+ goto out;
+ }
+ /*
+ * vaddr is only valid until rcu_read_unlock(). But after
+ * vfio_dma_map has set up the mapping the pages will be
+ * pinned by the kernel. This makes sure that the RAM backend
+ * of vaddr will always be there, even if the memory object is
+ * destroyed and its backing memory munmap-ed.
+ */
ret = vfio_dma_map(container, iova,
iotlb->addr_mask + 1, vaddr,
- !(iotlb->perm & IOMMU_WO) || mr->readonly);
+ read_only);
if (ret) {
error_report("vfio_dma_map(%p, 0x%"HWADDR_PRIx", "
"0x%"HWADDR_PRIx", %p) = %d (%m)",
diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
index 8de8281357..2561c6d31a 100644
--- a/hw/vfio/trace-events
+++ b/hw/vfio/trace-events
@@ -84,7 +84,7 @@ vfio_pci_igd_lpc_bridge_enabled(const char *name) "%s"
# hw/vfio/common.c
vfio_region_write(const char *name, int index, uint64_t addr, uint64_t data, unsigned size) " (%s:region%d+0x%"PRIx64", 0x%"PRIx64 ", %d)"
vfio_region_read(char *name, int index, uint64_t addr, unsigned size, uint64_t data) " (%s:region%d+0x%"PRIx64", %d) = 0x%"PRIx64
-vfio_iommu_map_notify(uint64_t iova_start, uint64_t iova_end) "iommu map @ %"PRIx64" - %"PRIx64
+vfio_iommu_map_notify(const char *op, uint64_t iova_start, uint64_t iova_end) "iommu %s @ %"PRIx64" - %"PRIx64
vfio_listener_region_add_skip(uint64_t start, uint64_t end) "SKIPPING region_add %"PRIx64" - %"PRIx64
vfio_listener_region_add_iommu(uint64_t start, uint64_t end) "region_add [iommu] %"PRIx64" - %"PRIx64
vfio_listener_region_add_ram(uint64_t iova_start, uint64_t iova_end, void *vaddr) "region_add [ram] %"PRIx64" - %"PRIx64" [%p]"
diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 63657066e7..23483c752f 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -60,6 +60,13 @@ typedef struct VRingUsed
VRingUsedElem ring[0];
} VRingUsed;
+typedef struct VRingMemoryRegionCaches {
+ struct rcu_head rcu;
+ MemoryRegionCache desc;
+ MemoryRegionCache avail;
+ MemoryRegionCache used;
+} VRingMemoryRegionCaches;
+
typedef struct VRing
{
unsigned int num;
@@ -68,6 +75,7 @@ typedef struct VRing
hwaddr desc;
hwaddr avail;
hwaddr used;
+ VRingMemoryRegionCaches *caches;
} VRing;
struct VirtQueue
@@ -97,13 +105,58 @@ struct VirtQueue
uint16_t vector;
VirtIOHandleOutput handle_output;
- VirtIOHandleOutput handle_aio_output;
+ VirtIOHandleAIOOutput handle_aio_output;
VirtIODevice *vdev;
EventNotifier guest_notifier;
EventNotifier host_notifier;
QLIST_ENTRY(VirtQueue) node;
};
+static void virtio_free_region_cache(VRingMemoryRegionCaches *caches)
+{
+ if (!caches) {
+ return;
+ }
+
+ address_space_cache_destroy(&caches->desc);
+ address_space_cache_destroy(&caches->avail);
+ address_space_cache_destroy(&caches->used);
+ g_free(caches);
+}
+
+static void virtio_init_region_cache(VirtIODevice *vdev, int n)
+{
+ VirtQueue *vq = &vdev->vq[n];
+ VRingMemoryRegionCaches *old = vq->vring.caches;
+ VRingMemoryRegionCaches *new;
+ hwaddr addr, size;
+ int event_size;
+
+ event_size = virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
+
+ addr = vq->vring.desc;
+ if (!addr) {
+ return;
+ }
+ new = g_new0(VRingMemoryRegionCaches, 1);
+ size = virtio_queue_get_desc_size(vdev, n);
+ address_space_cache_init(&new->desc, vdev->dma_as,
+ addr, size, false);
+
+ size = virtio_queue_get_used_size(vdev, n) + event_size;
+ address_space_cache_init(&new->used, vdev->dma_as,
+ vq->vring.used, size, true);
+
+ size = virtio_queue_get_avail_size(vdev, n) + event_size;
+ address_space_cache_init(&new->avail, vdev->dma_as,
+ vq->vring.avail, size, false);
+
+ atomic_rcu_set(&vq->vring.caches, new);
+ if (old) {
+ call_rcu(old, virtio_free_region_cache, rcu);
+ }
+}
+
/* virt queue functions */
void virtio_queue_update_rings(VirtIODevice *vdev, int n)
{
@@ -117,101 +170,125 @@ void virtio_queue_update_rings(VirtIODevice *vdev, int n)
vring->used = vring_align(vring->avail +
offsetof(VRingAvail, ring[vring->num]),
vring->align);
+ virtio_init_region_cache(vdev, n);
}
+/* Called within rcu_read_lock(). */
static void vring_desc_read(VirtIODevice *vdev, VRingDesc *desc,
- hwaddr desc_pa, int i)
+ MemoryRegionCache *cache, int i)
{
- address_space_read(vdev->dma_as, desc_pa + i * sizeof(VRingDesc),
- MEMTXATTRS_UNSPECIFIED, (void *)desc, sizeof(VRingDesc));
+ address_space_read_cached(cache, i * sizeof(VRingDesc),
+ desc, sizeof(VRingDesc));
virtio_tswap64s(vdev, &desc->addr);
virtio_tswap32s(vdev, &desc->len);
virtio_tswap16s(vdev, &desc->flags);
virtio_tswap16s(vdev, &desc->next);
}
+/* Called within rcu_read_lock(). */
static inline uint16_t vring_avail_flags(VirtQueue *vq)
{
- hwaddr pa;
- pa = vq->vring.avail + offsetof(VRingAvail, flags);
- return virtio_lduw_phys(vq->vdev, pa);
+ VRingMemoryRegionCaches *caches = atomic_rcu_read(&vq->vring.caches);
+ hwaddr pa = offsetof(VRingAvail, flags);
+ return virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
}
+/* Called within rcu_read_lock(). */
static inline uint16_t vring_avail_idx(VirtQueue *vq)
{
- hwaddr pa;
- pa = vq->vring.avail + offsetof(VRingAvail, idx);
- vq->shadow_avail_idx = virtio_lduw_phys(vq->vdev, pa);
+ VRingMemoryRegionCaches *caches = atomic_rcu_read(&vq->vring.caches);
+ hwaddr pa = offsetof(VRingAvail, idx);
+ vq->shadow_avail_idx = virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
return vq->shadow_avail_idx;
}
+/* Called within rcu_read_lock(). */
static inline uint16_t vring_avail_ring(VirtQueue *vq, int i)
{
- hwaddr pa;
- pa = vq->vring.avail + offsetof(VRingAvail, ring[i]);
- return virtio_lduw_phys(vq->vdev, pa);
+ VRingMemoryRegionCaches *caches = atomic_rcu_read(&vq->vring.caches);
+ hwaddr pa = offsetof(VRingAvail, ring[i]);
+ return virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
}
+/* Called within rcu_read_lock(). */
static inline uint16_t vring_get_used_event(VirtQueue *vq)
{
return vring_avail_ring(vq, vq->vring.num);
}
+/* Called within rcu_read_lock(). */
static inline void vring_used_write(VirtQueue *vq, VRingUsedElem *uelem,
int i)
{
- hwaddr pa;
+ VRingMemoryRegionCaches *caches = atomic_rcu_read(&vq->vring.caches);
+ hwaddr pa = offsetof(VRingUsed, ring[i]);
virtio_tswap32s(vq->vdev, &uelem->id);
virtio_tswap32s(vq->vdev, &uelem->len);
- pa = vq->vring.used + offsetof(VRingUsed, ring[i]);
- address_space_write(vq->vdev->dma_as, pa, MEMTXATTRS_UNSPECIFIED,
- (void *)uelem, sizeof(VRingUsedElem));
+ address_space_write_cached(&caches->used, pa, uelem, sizeof(VRingUsedElem));
+ address_space_cache_invalidate(&caches->used, pa, sizeof(VRingUsedElem));
}
+/* Called within rcu_read_lock(). */
static uint16_t vring_used_idx(VirtQueue *vq)
{
- hwaddr pa;
- pa = vq->vring.used + offsetof(VRingUsed, idx);
- return virtio_lduw_phys(vq->vdev, pa);
+ VRingMemoryRegionCaches *caches = atomic_rcu_read(&vq->vring.caches);
+ hwaddr pa = offsetof(VRingUsed, idx);
+ return virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
}
+/* Called within rcu_read_lock(). */
static inline void vring_used_idx_set(VirtQueue *vq, uint16_t val)
{
- hwaddr pa;
- pa = vq->vring.used + offsetof(VRingUsed, idx);
- virtio_stw_phys(vq->vdev, pa, val);
+ VRingMemoryRegionCaches *caches = atomic_rcu_read(&vq->vring.caches);
+ hwaddr pa = offsetof(VRingUsed, idx);
+ virtio_stw_phys_cached(vq->vdev, &caches->used, pa, val);
+ address_space_cache_invalidate(&caches->used, pa, sizeof(val));
vq->used_idx = val;
}
+/* Called within rcu_read_lock(). */
static inline void vring_used_flags_set_bit(VirtQueue *vq, int mask)
{
+ VRingMemoryRegionCaches *caches = atomic_rcu_read(&vq->vring.caches);
VirtIODevice *vdev = vq->vdev;
- hwaddr pa;
- pa = vq->vring.used + offsetof(VRingUsed, flags);
- virtio_stw_phys(vdev, pa, virtio_lduw_phys(vdev, pa) | mask);
+ hwaddr pa = offsetof(VRingUsed, flags);
+ uint16_t flags = virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
+
+ virtio_stw_phys_cached(vdev, &caches->used, pa, flags | mask);
+ address_space_cache_invalidate(&caches->used, pa, sizeof(flags));
}
+/* Called within rcu_read_lock(). */
static inline void vring_used_flags_unset_bit(VirtQueue *vq, int mask)
{
+ VRingMemoryRegionCaches *caches = atomic_rcu_read(&vq->vring.caches);
VirtIODevice *vdev = vq->vdev;
- hwaddr pa;
- pa = vq->vring.used + offsetof(VRingUsed, flags);
- virtio_stw_phys(vdev, pa, virtio_lduw_phys(vdev, pa) & ~mask);
+ hwaddr pa = offsetof(VRingUsed, flags);
+ uint16_t flags = virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
+
+ virtio_stw_phys_cached(vdev, &caches->used, pa, flags & ~mask);
+ address_space_cache_invalidate(&caches->used, pa, sizeof(flags));
}
+/* Called within rcu_read_lock(). */
static inline void vring_set_avail_event(VirtQueue *vq, uint16_t val)
{
+ VRingMemoryRegionCaches *caches;
hwaddr pa;
if (!vq->notification) {
return;
}
- pa = vq->vring.used + offsetof(VRingUsed, ring[vq->vring.num]);
- virtio_stw_phys(vq->vdev, pa, val);
+
+ caches = atomic_rcu_read(&vq->vring.caches);
+ pa = offsetof(VRingUsed, ring[vq->vring.num]);
+ virtio_stw_phys_cached(vq->vdev, &caches->used, pa, val);
}
void virtio_queue_set_notification(VirtQueue *vq, int enable)
{
vq->notification = enable;
+
+ rcu_read_lock();
if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
vring_set_avail_event(vq, vring_avail_idx(vq));
} else if (enable) {
@@ -223,6 +300,7 @@ void virtio_queue_set_notification(VirtQueue *vq, int enable)
/* Expose avail event/used flags before caller checks the avail idx. */
smp_mb();
}
+ rcu_read_unlock();
}
int virtio_queue_ready(VirtQueue *vq)
@@ -231,8 +309,9 @@ int virtio_queue_ready(VirtQueue *vq)
}
/* Fetch avail_idx from VQ memory only when we really need to know if
- * guest has added some buffers. */
-int virtio_queue_empty(VirtQueue *vq)
+ * guest has added some buffers.
+ * Called within rcu_read_lock(). */
+static int virtio_queue_empty_rcu(VirtQueue *vq)
{
if (vq->shadow_avail_idx != vq->last_avail_idx) {
return 0;
@@ -241,6 +320,20 @@ int virtio_queue_empty(VirtQueue *vq)
return vring_avail_idx(vq) == vq->last_avail_idx;
}
+int virtio_queue_empty(VirtQueue *vq)
+{
+ bool empty;
+
+ if (vq->shadow_avail_idx != vq->last_avail_idx) {
+ return 0;
+ }
+
+ rcu_read_lock();
+ empty = vring_avail_idx(vq) == vq->last_avail_idx;
+ rcu_read_unlock();
+ return empty;
+}
+
static void virtqueue_unmap_sg(VirtQueue *vq, const VirtQueueElement *elem,
unsigned int len)
{
@@ -319,6 +412,7 @@ bool virtqueue_rewind(VirtQueue *vq, unsigned int num)
return true;
}
+/* Called within rcu_read_lock(). */
void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
unsigned int len, unsigned int idx)
{
@@ -339,6 +433,7 @@ void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
vring_used_write(vq, &uelem, idx);
}
+/* Called within rcu_read_lock(). */
void virtqueue_flush(VirtQueue *vq, unsigned int count)
{
uint16_t old, new;
@@ -362,10 +457,13 @@ void virtqueue_flush(VirtQueue *vq, unsigned int count)
void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
unsigned int len)
{
+ rcu_read_lock();
virtqueue_fill(vq, elem, len, 0);
virtqueue_flush(vq, 1);
+ rcu_read_unlock();
}
+/* Called within rcu_read_lock(). */
static int virtqueue_num_heads(VirtQueue *vq, unsigned int idx)
{
uint16_t num_heads = vring_avail_idx(vq) - idx;
@@ -385,6 +483,7 @@ static int virtqueue_num_heads(VirtQueue *vq, unsigned int idx)
return num_heads;
}
+/* Called within rcu_read_lock(). */
static bool virtqueue_get_head(VirtQueue *vq, unsigned int idx,
unsigned int *head)
{
@@ -408,7 +507,7 @@ enum {
};
static int virtqueue_read_next_desc(VirtIODevice *vdev, VRingDesc *desc,
- hwaddr desc_pa, unsigned int max,
+ MemoryRegionCache *desc_cache, unsigned int max,
unsigned int *next)
{
/* If this descriptor says it doesn't chain, we're done. */
@@ -426,7 +525,7 @@ static int virtqueue_read_next_desc(VirtIODevice *vdev, VRingDesc *desc,
return VIRTQUEUE_READ_DESC_ERROR;
}
- vring_desc_read(vdev, desc, desc_pa, *next);
+ vring_desc_read(vdev, desc, desc_cache, *next);
return VIRTQUEUE_READ_DESC_MORE;
}
@@ -434,29 +533,38 @@ void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
unsigned int *out_bytes,
unsigned max_in_bytes, unsigned max_out_bytes)
{
- unsigned int idx;
+ VirtIODevice *vdev = vq->vdev;
+ unsigned int max, idx;
unsigned int total_bufs, in_total, out_total;
+ VRingMemoryRegionCaches *caches;
+ MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
+ int64_t len = 0;
int rc;
+ rcu_read_lock();
idx = vq->last_avail_idx;
-
total_bufs = in_total = out_total = 0;
+
+ max = vq->vring.num;
+ caches = atomic_rcu_read(&vq->vring.caches);
+ if (caches->desc.len < max * sizeof(VRingDesc)) {
+ virtio_error(vdev, "Cannot map descriptor ring");
+ goto err;
+ }
+
while ((rc = virtqueue_num_heads(vq, idx)) > 0) {
- VirtIODevice *vdev = vq->vdev;
- unsigned int max, num_bufs, indirect = 0;
+ MemoryRegionCache *desc_cache = &caches->desc;
+ unsigned int num_bufs;
VRingDesc desc;
- hwaddr desc_pa;
unsigned int i;
- max = vq->vring.num;
num_bufs = total_bufs;
if (!virtqueue_get_head(vq, idx++, &i)) {
goto err;
}
- desc_pa = vq->vring.desc;
- vring_desc_read(vdev, &desc, desc_pa, i);
+ vring_desc_read(vdev, &desc, desc_cache, i);
if (desc.flags & VRING_DESC_F_INDIRECT) {
if (desc.len % sizeof(VRingDesc)) {
@@ -471,11 +579,18 @@ void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
}
/* loop over the indirect descriptor table */
- indirect = 1;
+ len = address_space_cache_init(&indirect_desc_cache,
+ vdev->dma_as,
+ desc.addr, desc.len, false);
+ desc_cache = &indirect_desc_cache;
+ if (len < desc.len) {
+ virtio_error(vdev, "Cannot map indirect buffer");
+ goto err;
+ }
+
max = desc.len / sizeof(VRingDesc);
- desc_pa = desc.addr;
num_bufs = i = 0;
- vring_desc_read(vdev, &desc, desc_pa, i);
+ vring_desc_read(vdev, &desc, desc_cache, i);
}
do {
@@ -494,17 +609,19 @@ void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
goto done;
}
- rc = virtqueue_read_next_desc(vdev, &desc, desc_pa, max, &i);
+ rc = virtqueue_read_next_desc(vdev, &desc, desc_cache, max, &i);
} while (rc == VIRTQUEUE_READ_DESC_MORE);
if (rc == VIRTQUEUE_READ_DESC_ERROR) {
goto err;
}
- if (!indirect)
- total_bufs = num_bufs;
- else
+ if (desc_cache == &indirect_desc_cache) {
+ address_space_cache_destroy(&indirect_desc_cache);
total_bufs++;
+ } else {
+ total_bufs = num_bufs;
+ }
}
if (rc < 0) {
@@ -512,12 +629,14 @@ void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
}
done:
+ address_space_cache_destroy(&indirect_desc_cache);
if (in_bytes) {
*in_bytes = in_total;
}
if (out_bytes) {
*out_bytes = out_total;
}
+ rcu_read_unlock();
return;
err:
@@ -651,9 +770,12 @@ static void *virtqueue_alloc_element(size_t sz, unsigned out_num, unsigned in_nu
void *virtqueue_pop(VirtQueue *vq, size_t sz)
{
unsigned int i, head, max;
- hwaddr desc_pa = vq->vring.desc;
+ VRingMemoryRegionCaches *caches;
+ MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
+ MemoryRegionCache *desc_cache;
+ int64_t len;
VirtIODevice *vdev = vq->vdev;
- VirtQueueElement *elem;
+ VirtQueueElement *elem = NULL;
unsigned out_num, in_num;
hwaddr addr[VIRTQUEUE_MAX_SIZE];
struct iovec iov[VIRTQUEUE_MAX_SIZE];
@@ -663,8 +785,9 @@ void *virtqueue_pop(VirtQueue *vq, size_t sz)
if (unlikely(vdev->broken)) {
return NULL;
}
- if (virtio_queue_empty(vq)) {
- return NULL;
+ rcu_read_lock();
+ if (virtio_queue_empty_rcu(vq)) {
+ goto done;
}
/* Needed after virtio_queue_empty(), see comment in
* virtqueue_num_heads(). */
@@ -677,11 +800,11 @@ void *virtqueue_pop(VirtQueue *vq, size_t sz)
if (vq->inuse >= vq->vring.num) {
virtio_error(vdev, "Virtqueue size exceeded");
- return NULL;
+ goto done;
}
if (!virtqueue_get_head(vq, vq->last_avail_idx++, &head)) {
- return NULL;
+ goto done;
}
if (virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
@@ -689,18 +812,33 @@ void *virtqueue_pop(VirtQueue *vq, size_t sz)
}
i = head;
- vring_desc_read(vdev, &desc, desc_pa, i);
+
+ caches = atomic_rcu_read(&vq->vring.caches);
+ if (caches->desc.len < max * sizeof(VRingDesc)) {
+ virtio_error(vdev, "Cannot map descriptor ring");
+ goto done;
+ }
+
+ desc_cache = &caches->desc;
+ vring_desc_read(vdev, &desc, desc_cache, i);
if (desc.flags & VRING_DESC_F_INDIRECT) {
if (desc.len % sizeof(VRingDesc)) {
virtio_error(vdev, "Invalid size for indirect buffer table");
- return NULL;
+ goto done;
}
/* loop over the indirect descriptor table */
+ len = address_space_cache_init(&indirect_desc_cache, vdev->dma_as,
+ desc.addr, desc.len, false);
+ desc_cache = &indirect_desc_cache;
+ if (len < desc.len) {
+ virtio_error(vdev, "Cannot map indirect buffer");
+ goto done;
+ }
+
max = desc.len / sizeof(VRingDesc);
- desc_pa = desc.addr;
i = 0;
- vring_desc_read(vdev, &desc, desc_pa, i);
+ vring_desc_read(vdev, &desc, desc_cache, i);
}
/* Collect all the descriptors */
@@ -731,7 +869,7 @@ void *virtqueue_pop(VirtQueue *vq, size_t sz)
goto err_undo_map;
}
- rc = virtqueue_read_next_desc(vdev, &desc, desc_pa, max, &i);
+ rc = virtqueue_read_next_desc(vdev, &desc, desc_cache, max, &i);
} while (rc == VIRTQUEUE_READ_DESC_MORE);
if (rc == VIRTQUEUE_READ_DESC_ERROR) {
@@ -753,11 +891,15 @@ void *virtqueue_pop(VirtQueue *vq, size_t sz)
vq->inuse++;
trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
+done:
+ address_space_cache_destroy(&indirect_desc_cache);
+ rcu_read_unlock();
+
return elem;
err_undo_map:
virtqueue_undo_map_desc(out_num, in_num, iov);
- return NULL;
+ goto done;
}
/* virtqueue_drop_all:
@@ -1219,6 +1361,7 @@ void virtio_queue_set_rings(VirtIODevice *vdev, int n, hwaddr desc,
vdev->vq[n].vring.desc = desc;
vdev->vq[n].vring.avail = avail;
vdev->vq[n].vring.used = used;
+ virtio_init_region_cache(vdev, n);
}
void virtio_queue_set_num(VirtIODevice *vdev, int n, int num)
@@ -1287,14 +1430,16 @@ void virtio_queue_set_align(VirtIODevice *vdev, int n, int align)
virtio_queue_update_rings(vdev, n);
}
-static void virtio_queue_notify_aio_vq(VirtQueue *vq)
+static bool virtio_queue_notify_aio_vq(VirtQueue *vq)
{
if (vq->vring.desc && vq->handle_aio_output) {
VirtIODevice *vdev = vq->vdev;
trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
- vq->handle_aio_output(vdev, vq);
+ return vq->handle_aio_output(vdev, vq);
}
+
+ return false;
}
static void virtio_queue_notify_vq(VirtQueue *vq)
@@ -1383,6 +1528,7 @@ static void virtio_set_isr(VirtIODevice *vdev, int value)
}
}
+/* Called within rcu_read_lock(). */
static bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq)
{
uint16_t old, new;
@@ -1408,7 +1554,12 @@ static bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq)
void virtio_notify_irqfd(VirtIODevice *vdev, VirtQueue *vq)
{
- if (!virtio_should_notify(vdev, vq)) {
+ bool should_notify;
+ rcu_read_lock();
+ should_notify = virtio_should_notify(vdev, vq);
+ rcu_read_unlock();
+
+ if (!should_notify) {
return;
}
@@ -1433,15 +1584,25 @@ void virtio_notify_irqfd(VirtIODevice *vdev, VirtQueue *vq)
event_notifier_set(&vq->guest_notifier);
}
+static void virtio_irq(VirtQueue *vq)
+{
+ virtio_set_isr(vq->vdev, 0x1);
+ virtio_notify_vector(vq->vdev, vq->vector);
+}
+
void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
{
- if (!virtio_should_notify(vdev, vq)) {
+ bool should_notify;
+ rcu_read_lock();
+ should_notify = virtio_should_notify(vdev, vq);
+ rcu_read_unlock();
+
+ if (!should_notify) {
return;
}
trace_virtio_notify(vdev, vq);
- virtio_set_isr(vq->vdev, 0x1);
- virtio_notify_vector(vdev, vq->vector);
+ virtio_irq(vq);
}
void virtio_notify_config(VirtIODevice *vdev)
@@ -1896,6 +2057,7 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id)
}
}
+ rcu_read_lock();
for (i = 0; i < num; i++) {
if (vdev->vq[i].vring.desc) {
uint16_t nheads;
@@ -1930,6 +2092,7 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id)
}
}
}
+ rcu_read_unlock();
return 0;
}
@@ -1937,9 +2100,6 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id)
void virtio_cleanup(VirtIODevice *vdev)
{
qemu_del_vm_change_state_handler(vdev->vmstate);
- g_free(vdev->config);
- g_free(vdev->vq);
- g_free(vdev->vector_queues);
}
static void virtio_vmstate_change(void *opaque, int running, RunState state)
@@ -2059,7 +2219,11 @@ void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n, uint16_t idx)
void virtio_queue_update_used_idx(VirtIODevice *vdev, int n)
{
- vdev->vq[n].used_idx = vring_used_idx(&vdev->vq[n]);
+ rcu_read_lock();
+ if (vdev->vq[n].vring.desc) {
+ vdev->vq[n].used_idx = vring_used_idx(&vdev->vq[n]);
+ }
+ rcu_read_unlock();
}
void virtio_queue_invalidate_signalled_used(VirtIODevice *vdev, int n)
@@ -2081,7 +2245,7 @@ static void virtio_queue_guest_notifier_read(EventNotifier *n)
{
VirtQueue *vq = container_of(n, VirtQueue, guest_notifier);
if (event_notifier_test_and_clear(n)) {
- virtio_notify_vector(vq->vdev, vq->vector);
+ virtio_irq(vq);
}
}
@@ -2125,16 +2289,17 @@ static bool virtio_queue_host_notifier_aio_poll(void *opaque)
{
EventNotifier *n = opaque;
VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
+ bool progress;
if (virtio_queue_empty(vq)) {
return false;
}
- virtio_queue_notify_aio_vq(vq);
+ progress = virtio_queue_notify_aio_vq(vq);
/* In case the handler function re-enabled notifications */
virtio_queue_set_notification(vq, 0);
- return true;
+ return progress;
}
static void virtio_queue_host_notifier_aio_poll_end(EventNotifier *n)
@@ -2146,7 +2311,7 @@ static void virtio_queue_host_notifier_aio_poll_end(EventNotifier *n)
}
void virtio_queue_aio_set_host_notifier_handler(VirtQueue *vq, AioContext *ctx,
- VirtIOHandleOutput handle_output)
+ VirtIOHandleAIOOutput handle_output)
{
if (handle_output) {
vq->handle_aio_output = handle_output;
@@ -2200,6 +2365,19 @@ void GCC_FMT_ATTR(2, 3) virtio_error(VirtIODevice *vdev, const char *fmt, ...)
}
}
+static void virtio_memory_listener_commit(MemoryListener *listener)
+{
+ VirtIODevice *vdev = container_of(listener, VirtIODevice, listener);
+ int i;
+
+ for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
+ if (vdev->vq[i].vring.num == 0) {
+ break;
+ }
+ virtio_init_region_cache(vdev, i);
+ }
+}
+
static void virtio_device_realize(DeviceState *dev, Error **errp)
{
VirtIODevice *vdev = VIRTIO_DEVICE(dev);
@@ -2222,6 +2400,9 @@ static void virtio_device_realize(DeviceState *dev, Error **errp)
error_propagate(errp, err);
return;
}
+
+ vdev->listener.commit = virtio_memory_listener_commit;
+ memory_listener_register(&vdev->listener, vdev->dma_as);
}
static void virtio_device_unrealize(DeviceState *dev, Error **errp)
@@ -2244,6 +2425,36 @@ static void virtio_device_unrealize(DeviceState *dev, Error **errp)
vdev->bus_name = NULL;
}
+static void virtio_device_free_virtqueues(VirtIODevice *vdev)
+{
+ int i;
+ if (!vdev->vq) {
+ return;
+ }
+
+ for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
+ VRingMemoryRegionCaches *caches;
+ if (vdev->vq[i].vring.num == 0) {
+ break;
+ }
+ caches = atomic_read(&vdev->vq[i].vring.caches);
+ atomic_set(&vdev->vq[i].vring.caches, NULL);
+ virtio_free_region_cache(caches);
+ }
+ g_free(vdev->vq);
+}
+
+static void virtio_device_instance_finalize(Object *obj)
+{
+ VirtIODevice *vdev = VIRTIO_DEVICE(obj);
+
+ memory_listener_unregister(&vdev->listener);
+ virtio_device_free_virtqueues(vdev);
+
+ g_free(vdev->config);
+ g_free(vdev->vector_queues);
+}
+
static Property virtio_properties[] = {
DEFINE_VIRTIO_COMMON_FEATURES(VirtIODevice, host_features),
DEFINE_PROP_END_OF_LIST(),
@@ -2370,6 +2581,7 @@ static const TypeInfo virtio_device_info = {
.parent = TYPE_DEVICE,
.instance_size = sizeof(VirtIODevice),
.class_init = virtio_device_class_init,
+ .instance_finalize = virtio_device_instance_finalize,
.abstract = true,
.class_size = sizeof(VirtioDeviceClass),
};
diff --git a/include/exec/memory.h b/include/exec/memory.h
index 987f9251c6..691102317c 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -1426,6 +1426,8 @@ struct MemoryRegionCache {
bool is_write;
};
+#define MEMORY_REGION_CACHE_INVALID ((MemoryRegionCache) { .mr = NULL })
+
/* address_space_cache_init: prepare for repeated access to a physical
* memory region
*
diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
index 405c9d122e..fe645aa93a 100644
--- a/include/hw/i386/intel_iommu.h
+++ b/include/hw/i386/intel_iommu.h
@@ -257,6 +257,8 @@ struct IntelIOMMUState {
uint8_t womask[DMAR_REG_SIZE]; /* WO (write only - read returns 0) */
uint32_t version;
+ bool caching_mode; /* RO - is cap CM enabled? */
+
dma_addr_t root; /* Current root table pointer */
bool root_extended; /* Type of root table (extended or not) */
bool dmar_enabled; /* Set if DMA remapping is enabled */
diff --git a/include/hw/virtio/virtio-access.h b/include/hw/virtio/virtio-access.h
index 91ae14d254..2e92074bd1 100644
--- a/include/hw/virtio/virtio-access.h
+++ b/include/hw/virtio/virtio-access.h
@@ -156,6 +156,58 @@ static inline uint16_t virtio_tswap16(VirtIODevice *vdev, uint16_t s)
#endif
}
+static inline uint16_t virtio_lduw_phys_cached(VirtIODevice *vdev,
+ MemoryRegionCache *cache,
+ hwaddr pa)
+{
+ if (virtio_access_is_big_endian(vdev)) {
+ return lduw_be_phys_cached(cache, pa);
+ }
+ return lduw_le_phys_cached(cache, pa);
+}
+
+static inline uint32_t virtio_ldl_phys_cached(VirtIODevice *vdev,
+ MemoryRegionCache *cache,
+ hwaddr pa)
+{
+ if (virtio_access_is_big_endian(vdev)) {
+ return ldl_be_phys_cached(cache, pa);
+ }
+ return ldl_le_phys_cached(cache, pa);
+}
+
+static inline uint64_t virtio_ldq_phys_cached(VirtIODevice *vdev,
+ MemoryRegionCache *cache,
+ hwaddr pa)
+{
+ if (virtio_access_is_big_endian(vdev)) {
+ return ldq_be_phys_cached(cache, pa);
+ }
+ return ldq_le_phys_cached(cache, pa);
+}
+
+static inline void virtio_stw_phys_cached(VirtIODevice *vdev,
+ MemoryRegionCache *cache,
+ hwaddr pa, uint16_t value)
+{
+ if (virtio_access_is_big_endian(vdev)) {
+ stw_be_phys_cached(cache, pa, value);
+ } else {
+ stw_le_phys_cached(cache, pa, value);
+ }
+}
+
+static inline void virtio_stl_phys_cached(VirtIODevice *vdev,
+ MemoryRegionCache *cache,
+ hwaddr pa, uint32_t value)
+{
+ if (virtio_access_is_big_endian(vdev)) {
+ stl_be_phys_cached(cache, pa, value);
+ } else {
+ stl_le_phys_cached(cache, pa, value);
+ }
+}
+
static inline void virtio_tswap16s(VirtIODevice *vdev, uint16_t *s)
{
*s = virtio_tswap16(vdev, *s);
diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h
index 9734b4c446..d3c8a6fa8c 100644
--- a/include/hw/virtio/virtio-blk.h
+++ b/include/hw/virtio/virtio-blk.h
@@ -80,6 +80,6 @@ typedef struct MultiReqBuffer {
bool is_write;
} MultiReqBuffer;
-void virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq);
+bool virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq);
#endif
diff --git a/include/hw/virtio/virtio-scsi.h b/include/hw/virtio/virtio-scsi.h
index 73751969ba..f536f77e68 100644
--- a/include/hw/virtio/virtio-scsi.h
+++ b/include/hw/virtio/virtio-scsi.h
@@ -126,9 +126,9 @@ void virtio_scsi_common_realize(DeviceState *dev, Error **errp,
VirtIOHandleOutput cmd);
void virtio_scsi_common_unrealize(DeviceState *dev, Error **errp);
-void virtio_scsi_handle_event_vq(VirtIOSCSI *s, VirtQueue *vq);
-void virtio_scsi_handle_cmd_vq(VirtIOSCSI *s, VirtQueue *vq);
-void virtio_scsi_handle_ctrl_vq(VirtIOSCSI *s, VirtQueue *vq);
+bool virtio_scsi_handle_event_vq(VirtIOSCSI *s, VirtQueue *vq);
+bool virtio_scsi_handle_cmd_vq(VirtIOSCSI *s, VirtQueue *vq);
+bool virtio_scsi_handle_ctrl_vq(VirtIOSCSI *s, VirtQueue *vq);
void virtio_scsi_init_req(VirtIOSCSI *s, VirtQueue *vq, VirtIOSCSIReq *req);
void virtio_scsi_free_req(VirtIOSCSIReq *req);
void virtio_scsi_push_event(VirtIOSCSI *s, SCSIDevice *dev,
diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
index 525da24222..15efcf2057 100644
--- a/include/hw/virtio/virtio.h
+++ b/include/hw/virtio/virtio.h
@@ -85,6 +85,7 @@ struct VirtIODevice
uint32_t generation;
int nvectors;
VirtQueue *vq;
+ MemoryListener listener;
uint16_t device_id;
bool vm_running;
bool broken; /* device in invalid state, needs reset */
@@ -154,6 +155,7 @@ void virtio_error(VirtIODevice *vdev, const char *fmt, ...) GCC_FMT_ATTR(2, 3);
void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name);
typedef void (*VirtIOHandleOutput)(VirtIODevice *, VirtQueue *);
+typedef bool (*VirtIOHandleAIOOutput)(VirtIODevice *, VirtQueue *);
VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
VirtIOHandleOutput handle_output);
@@ -284,8 +286,7 @@ bool virtio_device_ioeventfd_enabled(VirtIODevice *vdev);
EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq);
void virtio_queue_host_notifier_read(EventNotifier *n);
void virtio_queue_aio_set_host_notifier_handler(VirtQueue *vq, AioContext *ctx,
- void (*fn)(VirtIODevice *,
- VirtQueue *));
+ VirtIOHandleAIOOutput handle_output);
VirtQueue *virtio_vector_first_queue(VirtIODevice *vdev, uint16_t vector);
VirtQueue *virtio_vector_next_queue(VirtQueue *vq);
diff --git a/memory.c b/memory.c
index 6c58373422..ed8b5aa83e 100644
--- a/memory.c
+++ b/memory.c
@@ -2371,8 +2371,13 @@ void memory_listener_register(MemoryListener *listener, AddressSpace *as)
void memory_listener_unregister(MemoryListener *listener)
{
+ if (!listener->address_space) {
+ return;
+ }
+
QTAILQ_REMOVE(&memory_listeners, listener, link);
QTAILQ_REMOVE(&listener->address_space->listeners, listener, link_as);
+ listener->address_space = NULL;
}
void address_space_init(AddressSpace *as, MemoryRegion *root, const char *name)