137 files changed, 1961 insertions, 769 deletions
diff --git a/.travis.yml b/.travis.yml
index 27a2d9cfb3..3c7a5cbe25 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -86,6 +86,9 @@ matrix:
     - env: CONFIG="--enable-trace-backends=ust"
            TEST_CMD=""
       compiler: gcc
+    - env: CONFIG="--disable-tcg"
+           TEST_CMD=""
+      compiler: gcc
     - env: CONFIG=""
       os: osx
       compiler: clang
diff --git a/audio/rate_template.h b/audio/rate_template.h
index bd4b1c7685..6e93588877 100644
--- a/audio/rate_template.h
+++ b/audio/rate_template.h
@@ -71,6 +71,12 @@ void NAME (void *opaque, struct st_sample *ibuf, struct st_sample *obuf,
         while (rate->ipos <= (rate->opos >> 32)) {
             ilast = *ibuf++;
             rate->ipos++;
+
+            /* if ipos overflow, there is  a infinite loop */
+            if (rate->ipos == 0xffffffff) {
+                rate->ipos = 1;
+                rate->opos = rate->opos & 0xffffffff;
+            }
             /* See if we finished the input buffer yet */
             if (ibuf >= iend) {
                 goto the_end;
diff --git a/backends/hostmem-ram.c b/backends/hostmem-ram.c
index 04a7ac362b..38977be73e 100644
--- a/backends/hostmem-ram.c
+++ b/backends/hostmem-ram.c
@@ -28,7 +28,7 @@ ram_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
     }
 
     path = object_get_canonical_path_component(OBJECT(backend));
-    memory_region_init_ram(&backend->mr, OBJECT(backend), path,
+    memory_region_init_ram_nomigrate(&backend->mr, OBJECT(backend), path,
                            backend->size, errp);
     g_free(path);
 }
diff --git a/block/io.c b/block/io.c
index b413727524..aece54c015 100644
--- a/block/io.c
+++ b/block/io.c
@@ -149,6 +149,37 @@ bool bdrv_requests_pending(BlockDriverState *bs)
     return false;
 }
 
+typedef struct {
+    Coroutine *co;
+    BlockDriverState *bs;
+    bool done;
+} BdrvCoDrainData;
+
+static void coroutine_fn bdrv_drain_invoke_entry(void *opaque)
+{
+    BdrvCoDrainData *data = opaque;
+    BlockDriverState *bs = data->bs;
+
+    bs->drv->bdrv_co_drain(bs);
+
+    /* Set data->done before reading bs->wakeup.  */
+    atomic_mb_set(&data->done, true);
+    bdrv_wakeup(bs);
+}
+
+static void bdrv_drain_invoke(BlockDriverState *bs)
+{
+    BdrvCoDrainData data = { .bs = bs, .done = false };
+
+    if (!bs->drv || !bs->drv->bdrv_co_drain) {
+        return;
+    }
+
+    data.co = qemu_coroutine_create(bdrv_drain_invoke_entry, &data);
+    bdrv_coroutine_enter(bs, data.co);
+    BDRV_POLL_WHILE(bs, !data.done);
+}
+
 static bool bdrv_drain_recurse(BlockDriverState *bs)
 {
     BdrvChild *child, *tmp;
@@ -156,9 +187,8 @@ static bool bdrv_drain_recurse(BlockDriverState *bs)
 
     waited = BDRV_POLL_WHILE(bs, atomic_read(&bs->in_flight) > 0);
 
-    if (bs->drv && bs->drv->bdrv_drain) {
-        bs->drv->bdrv_drain(bs);
-    }
+    /* Ensure any pending metadata writes are submitted to bs->file.  */
+    bdrv_drain_invoke(bs);
 
     QLIST_FOREACH_SAFE(child, &bs->children, next, tmp) {
         BlockDriverState *bs = child->bs;
@@ -184,12 +214,6 @@ static bool bdrv_drain_recurse(BlockDriverState *bs)
     return waited;
 }
 
-typedef struct {
-    Coroutine *co;
-    BlockDriverState *bs;
-    bool done;
-} BdrvCoDrainData;
-
 static void bdrv_co_drain_bh_cb(void *opaque)
 {
     BdrvCoDrainData *data = opaque;
diff --git a/block/qcow2.c b/block/qcow2.c
index c144ea5620..d5790af1e0 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -2025,8 +2025,6 @@ static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset,
     ret = 0;
 
 fail:
-    qemu_co_mutex_unlock(&s->lock);
-
     while (l2meta != NULL) {
         QCowL2Meta *next;
 
@@ -2040,6 +2038,8 @@ fail:
         l2meta = next;
     }
 
+    qemu_co_mutex_unlock(&s->lock);
+
     qemu_iovec_destroy(&hd_qiov);
     qemu_vfree(cluster_data);
     trace_qcow2_writev_done_req(qemu_coroutine_self(), ret);
diff --git a/block/qed-cluster.c b/block/qed-cluster.c
index d8d6e66a0f..672e2e654b 100644
--- a/block/qed-cluster.c
+++ b/block/qed-cluster.c
@@ -85,6 +85,8 @@ static unsigned int qed_count_contiguous_clusters(BDRVQEDState *s,
  *
  * On failure QED_CLUSTER_L2 or QED_CLUSTER_L1 is returned for missing L2 or L1
  * table offset, respectively. len is number of contiguous unallocated bytes.
+ *
+ * Called with table_lock held.
  */
 int coroutine_fn qed_find_cluster(BDRVQEDState *s, QEDRequest *request,
                                   uint64_t pos, size_t *len,
@@ -112,7 +114,6 @@ int coroutine_fn qed_find_cluster(BDRVQEDState *s, QEDRequest *request,
     }
 
     ret = qed_read_l2_table(s, request, l2_offset);
-    qed_acquire(s);
     if (ret) {
         goto out;
     }
@@ -137,6 +138,5 @@ int coroutine_fn qed_find_cluster(BDRVQEDState *s, QEDRequest *request,
 
 out:
     *img_offset = offset;
-    qed_release(s);
     return ret;
 }
diff --git a/block/qed-l2-cache.c b/block/qed-l2-cache.c
index 5cba794650..b548362398 100644
--- a/block/qed-l2-cache.c
+++ b/block/qed-l2-cache.c
@@ -101,6 +101,8 @@ CachedL2Table *qed_alloc_l2_cache_entry(L2TableCache *l2_cache)
 /**
  * Decrease an entry's reference count and free if necessary when the reference
  * count drops to zero.
+ *
+ * Called with table_lock held.
  */
 void qed_unref_l2_cache_entry(CachedL2Table *entry)
 {
@@ -122,6 +124,8 @@ void qed_unref_l2_cache_entry(CachedL2Table *entry)
  *
  * For a cached entry, this function increases the reference count and returns
  * the entry.
+ *
+ * Called with table_lock held.
  */
 CachedL2Table *qed_find_l2_cache_entry(L2TableCache *l2_cache, uint64_t offset)
 {
@@ -150,6 +154,8 @@ CachedL2Table *qed_find_l2_cache_entry(L2TableCache *l2_cache, uint64_t offset)
  * N.B. This function steals a reference to the l2_table from the caller so the
  * caller must obtain a new reference by issuing a call to
  * qed_find_l2_cache_entry().
+ *
+ * Called with table_lock held.
  */
 void qed_commit_l2_cache_entry(L2TableCache *l2_cache, CachedL2Table *l2_table)
 {
diff --git a/block/qed-table.c b/block/qed-table.c
index ebee2c50f0..eead8b0fc7 100644
--- a/block/qed-table.c
+++ b/block/qed-table.c
@@ -18,6 +18,7 @@
 #include "qed.h"
 #include "qemu/bswap.h"
 
+/* Called either from qed_check or with table_lock held.  */
 static int qed_read_table(BDRVQEDState *s, uint64_t offset, QEDTable *table)
 {
     QEMUIOVector qiov;
@@ -32,18 +33,22 @@ static int qed_read_table(BDRVQEDState *s, uint64_t offset, QEDTable *table)
 
     trace_qed_read_table(s, offset, table);
 
+    if (qemu_in_coroutine()) {
+        qemu_co_mutex_unlock(&s->table_lock);
+    }
     ret = bdrv_preadv(s->bs->file, offset, &qiov);
+    if (qemu_in_coroutine()) {
+        qemu_co_mutex_lock(&s->table_lock);
+    }
     if (ret < 0) {
         goto out;
     }
 
     /* Byteswap offsets */
-    qed_acquire(s);
     noffsets = qiov.size / sizeof(uint64_t);
     for (i = 0; i < noffsets; i++) {
         table->offsets[i] = le64_to_cpu(table->offsets[i]);
     }
-    qed_release(s);
 
     ret = 0;
 out:
@@ -61,6 +66,8 @@ out:
  * @index:      Index of first element
  * @n:          Number of elements
  * @flush:      Whether or not to sync to disk
+ *
+ * Called either from qed_check or with table_lock held.
  */
 static int qed_write_table(BDRVQEDState *s, uint64_t offset, QEDTable *table,
                            unsigned int index, unsigned int n, bool flush)
@@ -97,16 +104,20 @@ static int qed_write_table(BDRVQEDState *s, uint64_t offset, QEDTable *table,
     /* Adjust for offset into table */
     offset += start * sizeof(uint64_t);
 
+    if (qemu_in_coroutine()) {
+        qemu_co_mutex_unlock(&s->table_lock);
+    }
     ret = bdrv_pwritev(s->bs->file, offset, &qiov);
+    if (qemu_in_coroutine()) {
+        qemu_co_mutex_lock(&s->table_lock);
+    }
     trace_qed_write_table_cb(s, table, flush, ret);
     if (ret < 0) {
         goto out;
     }
 
     if (flush) {
-        qed_acquire(s);
         ret = bdrv_flush(s->bs);
-        qed_release(s);
         if (ret < 0) {
             goto out;
         }
@@ -123,6 +134,7 @@ int qed_read_l1_table_sync(BDRVQEDState *s)
     return qed_read_table(s, s->header.l1_table_offset, s->l1_table);
 }
 
+/* Called either from qed_check or with table_lock held.  */
 int qed_write_l1_table(BDRVQEDState *s, unsigned int index, unsigned int n)
 {
     BLKDBG_EVENT(s->bs->file, BLKDBG_L1_UPDATE);
@@ -136,6 +148,7 @@ int qed_write_l1_table_sync(BDRVQEDState *s, unsigned int index,
     return qed_write_l1_table(s, index, n);
 }
 
+/* Called either from qed_check or with table_lock held.  */
 int qed_read_l2_table(BDRVQEDState *s, QEDRequest *request, uint64_t offset)
 {
     int ret;
@@ -154,7 +167,6 @@ int qed_read_l2_table(BDRVQEDState *s, QEDRequest *request, uint64_t offset)
     BLKDBG_EVENT(s->bs->file, BLKDBG_L2_LOAD);
     ret = qed_read_table(s, offset, request->l2_table->table);
 
-    qed_acquire(s);
     if (ret) {
         /* can't trust loaded L2 table anymore */
         qed_unref_l2_cache_entry(request->l2_table);
@@ -170,7 +182,6 @@ int qed_read_l2_table(BDRVQEDState *s, QEDRequest *request, uint64_t offset)
         request->l2_table = qed_find_l2_cache_entry(&s->l2_cache, offset);
         assert(request->l2_table != NULL);
     }
-    qed_release(s);
 
     return ret;
 }
@@ -180,6 +191,7 @@ int qed_read_l2_table_sync(BDRVQEDState *s, QEDRequest *request, uint64_t offset
     return qed_read_l2_table(s, request, offset);
 }
 
+/* Called either from qed_check or with table_lock held.  */
 int qed_write_l2_table(BDRVQEDState *s, QEDRequest *request,
                        unsigned int index, unsigned int n, bool flush)
 {
diff --git a/block/qed.c b/block/qed.c
index 86cad2188c..dc54bf4a93 100644
--- a/block/qed.c
+++ b/block/qed.c
@@ -93,6 +93,8 @@ int qed_write_header_sync(BDRVQEDState *s)
  *
  * This function only updates known header fields in-place and does not affect
  * extra data after the QED header.
+ *
+ * No new allocating reqs can start while this function runs.
  */
 static int coroutine_fn qed_write_header(BDRVQEDState *s)
 {
@@ -109,6 +111,8 @@ static int coroutine_fn qed_write_header(BDRVQEDState *s)
     QEMUIOVector qiov;
     int ret;
 
+    assert(s->allocating_acb || s->allocating_write_reqs_plugged);
+
     buf = qemu_blockalign(s->bs, len);
     iov = (struct iovec) {
         .iov_base = buf,
@@ -219,6 +223,8 @@ static int qed_read_string(BdrvChild *file, uint64_t offset, size_t n,
  * This function only produces the offset where the new clusters should be
  * written.  It updates BDRVQEDState but does not make any changes to the image
  * file.
+ *
+ * Called with table_lock held.
  */
 static uint64_t qed_alloc_clusters(BDRVQEDState *s, unsigned int n)
 {
@@ -236,6 +242,8 @@ QEDTable *qed_alloc_table(BDRVQEDState *s)
 
 /**
  * Allocate a new zeroed L2 table
+ *
+ * Called with table_lock held.
  */
 static CachedL2Table *qed_new_l2_table(BDRVQEDState *s)
 {
@@ -249,19 +257,32 @@ static CachedL2Table *qed_new_l2_table(BDRVQEDState *s)
     return l2_table;
 }
 
-static void qed_plug_allocating_write_reqs(BDRVQEDState *s)
+static bool qed_plug_allocating_write_reqs(BDRVQEDState *s)
 {
+    qemu_co_mutex_lock(&s->table_lock);
+
+    /* No reentrancy is allowed.  */
     assert(!s->allocating_write_reqs_plugged);
+    if (s->allocating_acb != NULL) {
+        /* Another allocating write came concurrently.  This cannot happen
+         * from bdrv_qed_co_drain, but it can happen when the timer runs.
+         */
+        qemu_co_mutex_unlock(&s->table_lock);
+        return false;
+    }
 
     s->allocating_write_reqs_plugged = true;
+    qemu_co_mutex_unlock(&s->table_lock);
+    return true;
 }
 
 static void qed_unplug_allocating_write_reqs(BDRVQEDState *s)
 {
+    qemu_co_mutex_lock(&s->table_lock);
     assert(s->allocating_write_reqs_plugged);
-
     s->allocating_write_reqs_plugged = false;
-    qemu_co_enter_next(&s->allocating_write_reqs);
+    qemu_co_queue_next(&s->allocating_write_reqs);
+    qemu_co_mutex_unlock(&s->table_lock);
 }
 
 static void coroutine_fn qed_need_check_timer_entry(void *opaque)
@@ -269,17 +290,14 @@ static void coroutine_fn qed_need_check_timer_entry(void *opaque)
     BDRVQEDState *s = opaque;
     int ret;
 
-    /* The timer should only fire when allocating writes have drained */
-    assert(!s->allocating_acb);
-
     trace_qed_need_check_timer_cb(s);
 
-    qed_acquire(s);
-    qed_plug_allocating_write_reqs(s);
+    if (!qed_plug_allocating_write_reqs(s)) {
+        return;
+    }
 
     /* Ensure writes are on disk before clearing flag */
     ret = bdrv_co_flush(s->bs->file->bs);
-    qed_release(s);
     if (ret < 0) {
         qed_unplug_allocating_write_reqs(s);
         return;
@@ -301,16 +319,6 @@ static void qed_need_check_timer_cb(void *opaque)
     qemu_coroutine_enter(co);
 }
 
-void qed_acquire(BDRVQEDState *s)
-{
-    aio_context_acquire(bdrv_get_aio_context(s->bs));
-}
-
-void qed_release(BDRVQEDState *s)
-{
-    aio_context_release(bdrv_get_aio_context(s->bs));
-}
-
 static void qed_start_need_check_timer(BDRVQEDState *s)
 {
     trace_qed_start_need_check_timer(s);
@@ -350,7 +358,7 @@ static void bdrv_qed_attach_aio_context(BlockDriverState *bs,
     }
 }
 
-static void bdrv_qed_drain(BlockDriverState *bs)
+static void coroutine_fn bdrv_qed_co_drain(BlockDriverState *bs)
 {
     BDRVQEDState *s = bs->opaque;
 
@@ -359,10 +367,20 @@ static void bdrv_qed_drain(BlockDriverState *bs)
      */
     if (s->need_check_timer && timer_pending(s->need_check_timer)) {
         qed_cancel_need_check_timer(s);
-        qed_need_check_timer_cb(s);
+        qed_need_check_timer_entry(s);
     }
 }
 
+static void bdrv_qed_init_state(BlockDriverState *bs)
+{
+    BDRVQEDState *s = bs->opaque;
+
+    memset(s, 0, sizeof(BDRVQEDState));
+    s->bs = bs;
+    qemu_co_mutex_init(&s->table_lock);
+    qemu_co_queue_init(&s->allocating_write_reqs);
+}
+
 static int bdrv_qed_do_open(BlockDriverState *bs, QDict *options, int flags,
                             Error **errp)
 {
@@ -371,9 +389,6 @@ static int bdrv_qed_do_open(BlockDriverState *bs, QDict *options, int flags,
     int64_t file_size;
     int ret;
 
-    s->bs = bs;
-    qemu_co_queue_init(&s->allocating_write_reqs);
-
     ret = bdrv_pread(bs->file, 0, &le_header, sizeof(le_header));
     if (ret < 0) {
         return ret;
@@ -507,6 +522,7 @@ static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags,
         return -EINVAL;
     }
 
+    bdrv_qed_init_state(bs);
     return bdrv_qed_do_open(bs, options, flags, errp);
 }
 
@@ -681,6 +697,7 @@ typedef struct {
     BlockDriverState **file;
 } QEDIsAllocatedCB;
 
+/* Called with table_lock held.  */
 static void qed_is_allocated_cb(void *opaque, int ret, uint64_t offset, size_t len)
 {
     QEDIsAllocatedCB *cb = opaque;
@@ -728,6 +745,7 @@ static int64_t coroutine_fn bdrv_qed_co_get_block_status(BlockDriverState *bs,
     uint64_t offset;
     int ret;
 
+    qemu_co_mutex_lock(&s->table_lock);
     ret = qed_find_cluster(s, &request, cb.pos, &len, &offset);
     qed_is_allocated_cb(&cb, ret, offset, len);
 
@@ -735,6 +753,7 @@ static int64_t coroutine_fn bdrv_qed_co_get_block_status(BlockDriverState *bs,
     assert(cb.status != BDRV_BLOCK_OFFSET_MASK);
 
     qed_unref_l2_cache_entry(request.l2_table);
+    qemu_co_mutex_unlock(&s->table_lock);
 
     return cb.status;
 }
@@ -865,6 +884,8 @@ out:
  *
  * The cluster offset may be an allocated byte offset in the image file, the
  * zero cluster marker, or the unallocated cluster marker.
+ *
+ * Called with table_lock held.
  */
 static void coroutine_fn qed_update_l2_table(BDRVQEDState *s, QEDTable *table,
                                              int index, unsigned int n,
@@ -880,6 +901,7 @@ static void coroutine_fn qed_update_l2_table(BDRVQEDState *s, QEDTable *table,
     }
 }
 
+/* Called with table_lock held.  */
 static void coroutine_fn qed_aio_complete(QEDAIOCB *acb)
 {
     BDRVQEDState *s = acb_to_s(acb);
@@ -903,7 +925,7 @@ static void coroutine_fn qed_aio_complete(QEDAIOCB *acb)
     if (acb == s->allocating_acb) {
         s->allocating_acb = NULL;
         if (!qemu_co_queue_empty(&s->allocating_write_reqs)) {
-            qemu_co_enter_next(&s->allocating_write_reqs);
+            qemu_co_queue_next(&s->allocating_write_reqs);
         } else if (s->header.features & QED_F_NEED_CHECK) {
             qed_start_need_check_timer(s);
         }
@@ -912,6 +934,8 @@ static void coroutine_fn qed_aio_complete(QEDAIOCB *acb)
 
 /**
  * Update L1 table with new L2 table offset and write it out
+ *
+ * Called with table_lock held.
  */
 static int coroutine_fn qed_aio_write_l1_update(QEDAIOCB *acb)
 {
@@ -940,6 +964,8 @@ static int coroutine_fn qed_aio_write_l1_update(QEDAIOCB *acb)
 
 /**
  * Update L2 table with new cluster offsets and write them out
+ *
+ * Called with table_lock held.
  */
 static int coroutine_fn qed_aio_write_l2_update(QEDAIOCB *acb, uint64_t offset)
 {
@@ -976,50 +1002,26 @@ static int coroutine_fn qed_aio_write_l2_update(QEDAIOCB *acb, uint64_t offset)
 
 /**
  * Write data to the image file
+ *
+ * Called with table_lock *not* held.
  */
 static int coroutine_fn qed_aio_write_main(QEDAIOCB *acb)
 {
     BDRVQEDState *s = acb_to_s(acb);
     uint64_t offset = acb->cur_cluster +
                       qed_offset_into_cluster(s, acb->cur_pos);
-    int ret;
 
     trace_qed_aio_write_main(s, acb, 0, offset, acb->cur_qiov.size);
 
     BLKDBG_EVENT(s->bs->file, BLKDBG_WRITE_AIO);
-    ret = bdrv_co_pwritev(s->bs->file, offset, acb->cur_qiov.size,
-                          &acb->cur_qiov, 0);
-    if (ret < 0) {
-        return ret;
-    }
-
-    if (acb->find_cluster_ret != QED_CLUSTER_FOUND) {
-        if (s->bs->backing) {
-            /*
-             * Flush new data clusters before updating the L2 table
-             *
-             * This flush is necessary when a backing file is in use.  A crash
-             * during an allocating write could result in empty clusters in the
-             * image.  If the write only touched a subregion of the cluster,
-             * then backing image sectors have been lost in the untouched
-             * region.  The solution is to flush after writing a new data
-             * cluster and before updating the L2 table.
-             */
-            ret = bdrv_co_flush(s->bs->file->bs);
-            if (ret < 0) {
-                return ret;
-            }
-        }
-        ret = qed_aio_write_l2_update(acb, acb->cur_cluster);
-        if (ret < 0) {
-            return ret;
-        }
-    }
-    return 0;
+    return bdrv_co_pwritev(s->bs->file, offset, acb->cur_qiov.size,
+                           &acb->cur_qiov, 0);
 }
 
 /**
  * Populate untouched regions of new data cluster
+ *
+ * Called with table_lock held.
  */
 static int coroutine_fn qed_aio_write_cow(QEDAIOCB *acb)
 {
@@ -1027,6 +1029,8 @@ static int coroutine_fn qed_aio_write_cow(QEDAIOCB *acb)
     uint64_t start, len, offset;
     int ret;
 
+    qemu_co_mutex_unlock(&s->table_lock);
+
     /* Populate front untouched region of new data cluster */
     start = qed_start_of_cluster(s, acb->cur_pos);
     len = qed_offset_into_cluster(s, acb->cur_pos);
@@ -1034,7 +1038,7 @@ static int coroutine_fn qed_aio_write_cow(QEDAIOCB *acb)
     trace_qed_aio_write_prefill(s, acb, start, len, acb->cur_cluster);
     ret = qed_copy_from_backing_file(s, start, len, acb->cur_cluster);
     if (ret < 0) {
-        return ret;
+        goto out;
     }
 
     /* Populate back untouched region of new data cluster */
@@ -1047,10 +1051,31 @@ static int coroutine_fn qed_aio_write_cow(QEDAIOCB *acb)
     trace_qed_aio_write_postfill(s, acb, start, len, offset);
     ret = qed_copy_from_backing_file(s, start, len, offset);
     if (ret < 0) {
-        return ret;
+        goto out;
     }
 
-    return qed_aio_write_main(acb);
+    ret = qed_aio_write_main(acb);
+    if (ret < 0) {
+        goto out;
+    }
+
+    if (s->bs->backing) {
+        /*
+         * Flush new data clusters before updating the L2 table
+         *
+         * This flush is necessary when a backing file is in use.  A crash
+         * during an allocating write could result in empty clusters in the
+         * image.  If the write only touched a subregion of the cluster,
+         * then backing image sectors have been lost in the untouched
+         * region.  The solution is to flush after writing a new data
+         * cluster and before updating the L2 table.
+         */
+        ret = bdrv_co_flush(s->bs->file->bs);
+    }
+
+out:
+    qemu_co_mutex_lock(&s->table_lock);
+    return ret;
 }
 
 /**
@@ -1073,6 +1098,8 @@ static bool qed_should_set_need_check(BDRVQEDState *s)
  * @len:        Length in bytes
  *
  * This path is taken when writing to previously unallocated clusters.
+ *
+ * Called with table_lock held.
  */
 static int coroutine_fn qed_aio_write_alloc(QEDAIOCB *acb, size_t len)
 {
@@ -1087,7 +1114,7 @@ static int coroutine_fn qed_aio_write_alloc(QEDAIOCB *acb, size_t len)
     /* Freeze this request if another allocating write is in progress */
     if (s->allocating_acb != acb || s->allocating_write_reqs_plugged) {
         if (s->allocating_acb != NULL) {
-            qemu_co_queue_wait(&s->allocating_write_reqs, NULL);
+            qemu_co_queue_wait(&s->allocating_write_reqs, &s->table_lock);
             assert(s->allocating_acb == NULL);
         }
         s->allocating_acb = acb;
@@ -1103,6 +1130,7 @@ static int coroutine_fn qed_aio_write_alloc(QEDAIOCB *acb, size_t len)
         if (acb->find_cluster_ret == QED_CLUSTER_ZERO) {
             return 0;
         }
+        acb->cur_cluster = 1;
     } else {
         acb->cur_cluster = qed_alloc_clusters(s, acb->cur_nclusters);
     }
@@ -1115,15 +1143,14 @@ static int coroutine_fn qed_aio_write_alloc(QEDAIOCB *acb, size_t len)
         }
     }
 
-    if (acb->flags & QED_AIOCB_ZERO) {
-        ret = qed_aio_write_l2_update(acb, 1);
-    } else {
+    if (!(acb->flags & QED_AIOCB_ZERO)) {
         ret = qed_aio_write_cow(acb);
+        if (ret < 0) {
+            return ret;
+        }
     }
-    if (ret < 0) {
-        return ret;
-    }
-    return 0;
+
+    return qed_aio_write_l2_update(acb, acb->cur_cluster);
 }
 
 /**
@@ -1134,10 +1161,17 @@ static int coroutine_fn qed_aio_write_alloc(QEDAIOCB *acb, size_t len)
  * @len:        Length in bytes
  *
  * This path is taken when writing to already allocated clusters.
+ *
+ * Called with table_lock held.
  */
 static int coroutine_fn qed_aio_write_inplace(QEDAIOCB *acb, uint64_t offset,
                                               size_t len)
 {
+    BDRVQEDState *s = acb_to_s(acb);
+    int r;
+
+    qemu_co_mutex_unlock(&s->table_lock);
+
     /* Allocate buffer for zero writes */
     if (acb->flags & QED_AIOCB_ZERO) {
         struct iovec *iov = acb->qiov->iov;
@@ -1145,7 +1179,8 @@ static int coroutine_fn qed_aio_write_inplace(QEDAIOCB *acb, uint64_t offset,
         if (!iov->iov_base) {
             iov->iov_base = qemu_try_blockalign(acb->bs, iov->iov_len);
             if (iov->iov_base == NULL) {
-                return -ENOMEM;
+                r = -ENOMEM;
+                goto out;
             }
             memset(iov->iov_base, 0, iov->iov_len);
         }
@@ -1155,8 +1190,11 @@ static int coroutine_fn qed_aio_write_inplace(QEDAIOCB *acb, uint64_t offset,
     acb->cur_cluster = offset;
     qemu_iovec_concat(&acb->cur_qiov, acb->qiov, acb->qiov_offset, len);
 
-    /* Do the actual write */
-    return qed_aio_write_main(acb);
+    /* Do the actual write.  */
+    r = qed_aio_write_main(acb);
+out:
+    qemu_co_mutex_lock(&s->table_lock);
+    return r;
 }
 
 /**
@@ -1166,6 +1204,8 @@ static int coroutine_fn qed_aio_write_inplace(QEDAIOCB *acb, uint64_t offset,
  * @ret:        QED_CLUSTER_FOUND, QED_CLUSTER_L2 or QED_CLUSTER_L1
  * @offset:     Cluster offset in bytes
  * @len:        Length in bytes
+ *
+ * Called with table_lock held.
  */
 static int coroutine_fn qed_aio_write_data(void *opaque, int ret,
                                            uint64_t offset, size_t len)
@@ -1197,6 +1237,8 @@ static int coroutine_fn qed_aio_write_data(void *opaque, int ret,
  * @ret:        QED_CLUSTER_FOUND, QED_CLUSTER_L2 or QED_CLUSTER_L1
  * @offset:     Cluster offset in bytes
  * @len:        Length in bytes
+ *
+ * Called with table_lock held.
  */
 static int coroutine_fn qed_aio_read_data(void *opaque, int ret,
                                           uint64_t offset, size_t len)
@@ -1204,6 +1246,9 @@ static int coroutine_fn qed_aio_read_data(void *opaque, int ret,
     QEDAIOCB *acb = opaque;
     BDRVQEDState *s = acb_to_s(acb);
     BlockDriverState *bs = acb->bs;
+    int r;
+
+    qemu_co_mutex_unlock(&s->table_lock);
 
     /* Adjust offset into cluster */
     offset += qed_offset_into_cluster(s, acb->cur_pos);
@@ -1212,22 +1257,23 @@ static int coroutine_fn qed_aio_read_data(void *opaque, int ret,
 
     qemu_iovec_concat(&acb->cur_qiov, acb->qiov, acb->qiov_offset, len);
 
-    /* Handle zero cluster and backing file reads */
+    /* Handle zero cluster and backing file reads, otherwise read
+     * data cluster directly.
+     */
     if (ret == QED_CLUSTER_ZERO) {
         qemu_iovec_memset(&acb->cur_qiov, 0, 0, acb->cur_qiov.size);
-        return 0;
+        r = 0;
     } else if (ret != QED_CLUSTER_FOUND) {
-        return qed_read_backing_file(s, acb->cur_pos, &acb->cur_qiov,
-                                     &acb->backing_qiov);
+        r = qed_read_backing_file(s, acb->cur_pos, &acb->cur_qiov,
+                                  &acb->backing_qiov);
+    } else {
+        BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
+        r = bdrv_co_preadv(bs->file, offset, acb->cur_qiov.size,
+                           &acb->cur_qiov, 0);
     }
 
-    BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
-    ret = bdrv_co_preadv(bs->file, offset, acb->cur_qiov.size,
-                         &acb->cur_qiov, 0);
-    if (ret < 0) {
-        return ret;
-    }
-    return 0;
+    qemu_co_mutex_lock(&s->table_lock);
+    return r;
 }
 
 /**
@@ -1240,6 +1286,7 @@ static int coroutine_fn qed_aio_next_io(QEDAIOCB *acb)
     size_t len;
     int ret;
 
+    qemu_co_mutex_lock(&s->table_lock);
     while (1) {
         trace_qed_aio_next_io(s, acb, 0, acb->cur_pos + acb->cur_qiov.size);
 
@@ -1279,6 +1326,7 @@ static int coroutine_fn qed_aio_next_io(QEDAIOCB *acb)
 
     trace_qed_aio_complete(s, acb, ret);
     qed_aio_complete(acb);
+    qemu_co_mutex_unlock(&s->table_lock);
     return ret;
 }
 
@@ -1474,8 +1522,14 @@ static void bdrv_qed_invalidate_cache(BlockDriverState *bs, Error **errp)
 
     bdrv_qed_close(bs);
 
-    memset(s, 0, sizeof(BDRVQEDState));
+    bdrv_qed_init_state(bs);
+    if (qemu_in_coroutine()) {
+        qemu_co_mutex_lock(&s->table_lock);
+    }
     ret = bdrv_qed_do_open(bs, NULL, bs->open_flags, &local_err);
+    if (qemu_in_coroutine()) {
+        qemu_co_mutex_unlock(&s->table_lock);
+    }
     if (local_err) {
         error_propagate(errp, local_err);
         error_prepend(errp, "Could not reopen qed layer: ");
@@ -1554,7 +1608,7 @@ static BlockDriver bdrv_qed = {
     .bdrv_check               = bdrv_qed_check,
     .bdrv_detach_aio_context  = bdrv_qed_detach_aio_context,
     .bdrv_attach_aio_context  = bdrv_qed_attach_aio_context,
-    .bdrv_drain               = bdrv_qed_drain,
+    .bdrv_co_drain            = bdrv_qed_co_drain,
 };
 
 static void bdrv_qed_init(void)
diff --git a/block/qed.h b/block/qed.h
index dd3a2d5519..f35341f134 100644
--- a/block/qed.h
+++ b/block/qed.h
@@ -151,15 +151,21 @@ typedef struct QEDAIOCB {
 
 typedef struct {
     BlockDriverState *bs;           /* device */
-    uint64_t file_size;             /* length of image file, in bytes */
 
+    /* Written only by an allocating write or the timer handler (the latter
+     * while allocating reqs are plugged).
+     */
     QEDHeader header;               /* always cpu-endian */
+
+    /* Protected by table_lock.  */
+    CoMutex table_lock;
     QEDTable *l1_table;
     L2TableCache l2_cache;          /* l2 table cache */
     uint32_t table_nelems;
     uint32_t l1_shift;
     uint32_t l2_shift;
     uint32_t l2_mask;
+    uint64_t file_size;             /* length of image file, in bytes */
 
     /* Allocating write request queue */
     QEDAIOCB *allocating_acb;
@@ -177,9 +183,6 @@ enum {
     QED_CLUSTER_L1,            /* cluster missing in L1 */
 };
 
-void qed_acquire(BDRVQEDState *s);
-void qed_release(BDRVQEDState *s);
-
 /**
  * Header functions
  */
diff --git a/block/sheepdog.c b/block/sheepdog.c
index b7b7e6bbe5..abb2e79065 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -390,6 +390,7 @@ struct BDRVSheepdogState {
     QLIST_HEAD(inflight_aio_head, AIOReq) inflight_aio_head;
     QLIST_HEAD(failed_aio_head, AIOReq) failed_aio_head;
 
+    CoMutex queue_lock;
     CoQueue overlapping_queue;
     QLIST_HEAD(inflight_aiocb_head, SheepdogAIOCB) inflight_aiocb_head;
 };
@@ -488,7 +489,7 @@ static void wait_for_overlapping_aiocb(BDRVSheepdogState *s, SheepdogAIOCB *acb)
 retry:
     QLIST_FOREACH(cb, &s->inflight_aiocb_head, aiocb_siblings) {
         if (AIOCBOverlapping(acb, cb)) {
-            qemu_co_queue_wait(&s->overlapping_queue, NULL);
+            qemu_co_queue_wait(&s->overlapping_queue, &s->queue_lock);
             goto retry;
         }
     }
@@ -525,8 +526,10 @@ static void sd_aio_setup(SheepdogAIOCB *acb, BDRVSheepdogState *s,
         return;
     }
 
+    qemu_co_mutex_lock(&s->queue_lock);
     wait_for_overlapping_aiocb(s, acb);
     QLIST_INSERT_HEAD(&s->inflight_aiocb_head, acb, aiocb_siblings);
+    qemu_co_mutex_unlock(&s->queue_lock);
 }
 
 static SocketAddress *sd_socket_address(const char *path,
@@ -785,6 +788,7 @@ static coroutine_fn void reconnect_to_sdog(void *opaque)
      * have to move all the inflight requests to the failed queue before
      * resend_aioreq() is called.
      */
+    qemu_co_mutex_lock(&s->queue_lock);
     QLIST_FOREACH_SAFE(aio_req, &s->inflight_aio_head, aio_siblings, next) {
         QLIST_REMOVE(aio_req, aio_siblings);
         QLIST_INSERT_HEAD(&s->failed_aio_head, aio_req, aio_siblings);
@@ -794,8 +798,11 @@ static coroutine_fn void reconnect_to_sdog(void *opaque)
     while (!QLIST_EMPTY(&s->failed_aio_head)) {
         aio_req = QLIST_FIRST(&s->failed_aio_head);
         QLIST_REMOVE(aio_req, aio_siblings);
+        qemu_co_mutex_unlock(&s->queue_lock);
         resend_aioreq(s, aio_req);
+        qemu_co_mutex_lock(&s->queue_lock);
     }
+    qemu_co_mutex_unlock(&s->queue_lock);
 }
 
 /*
@@ -887,7 +894,10 @@ static void coroutine_fn aio_read_response(void *opaque)
     */
     s->co_recv = NULL;
 
+    qemu_co_mutex_lock(&s->queue_lock);
     QLIST_REMOVE(aio_req, aio_siblings);
+    qemu_co_mutex_unlock(&s->queue_lock);
+
     switch (rsp.result) {
     case SD_RES_SUCCESS:
         break;
@@ -1307,7 +1317,9 @@ static void coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
     uint64_t old_oid = aio_req->base_oid;
     bool create = aio_req->create;
 
+    qemu_co_mutex_lock(&s->queue_lock);
     QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings);
+    qemu_co_mutex_unlock(&s->queue_lock);
 
     if (!nr_copies) {
         error_report("bug");
@@ -1678,6 +1690,7 @@ static int sd_open(BlockDriverState *bs, QDict *options, int flags,
     bs->total_sectors = s->inode.vdi_size / BDRV_SECTOR_SIZE;
     pstrcpy(s->name, sizeof(s->name), vdi);
     qemu_co_mutex_init(&s->lock);
+    qemu_co_mutex_init(&s->queue_lock);
     qemu_co_queue_init(&s->overlapping_queue);
     qemu_opts_del(opts);
     g_free(buf);
@@ -2438,12 +2451,16 @@ static void coroutine_fn sd_co_rw_vector(SheepdogAIOCB *acb)
 
 static void sd_aio_complete(SheepdogAIOCB *acb)
 {
+    BDRVSheepdogState *s;
     if (acb->aiocb_type == AIOCB_FLUSH_CACHE) {
         return;
     }
 
+    s = acb->s;
+    qemu_co_mutex_lock(&s->queue_lock);
     QLIST_REMOVE(acb, aiocb_siblings);
-    qemu_co_queue_restart_all(&acb->s->overlapping_queue);
+    qemu_co_queue_restart_all(&s->overlapping_queue);
+    qemu_co_mutex_unlock(&s->queue_lock);
 }
 
 static coroutine_fn int sd_co_writev(BlockDriverState *bs, int64_t sector_num,
diff --git a/block/ssh.c b/block/ssh.c
index 07a57eb466..e8f0404c03 100644
--- a/block/ssh.c
+++ b/block/ssh.c
@@ -888,13 +888,22 @@ static int ssh_has_zero_init(BlockDriverState *bs)
     return has_zero_init;
 }
 
+typedef struct BDRVSSHRestart {
+    BlockDriverState *bs;
+    Coroutine *co;
+} BDRVSSHRestart;
+
 static void restart_coroutine(void *opaque)
 {
-    Coroutine *co = opaque;
+    BDRVSSHRestart *restart = opaque;
+    BlockDriverState *bs = restart->bs;
+    BDRVSSHState *s = bs->opaque;
+    AioContext *ctx = bdrv_get_aio_context(bs);
 
-    DPRINTF("co=%p", co);
+    DPRINTF("co=%p", restart->co);
+    aio_set_fd_handler(ctx, s->sock, false, NULL, NULL, NULL, NULL);
 
-    aio_co_wake(co);
+    aio_co_wake(restart->co);
 }
 
 /* A non-blocking call returned EAGAIN, so yield, ensuring the
@@ -905,7 +914,10 @@ static coroutine_fn void co_yield(BDRVSSHState *s, BlockDriverState *bs)
 {
     int r;
     IOHandler *rd_handler = NULL, *wr_handler = NULL;
-    Coroutine *co = qemu_coroutine_self();
+    BDRVSSHRestart restart = {
+        .bs = bs,
+        .co = qemu_coroutine_self()
+    };
 
     r = libssh2_session_block_directions(s->session);
 
@@ -920,11 +932,9 @@ static coroutine_fn void co_yield(BDRVSSHState *s, BlockDriverState *bs)
             rd_handler, wr_handler);
 
     aio_set_fd_handler(bdrv_get_aio_context(bs), s->sock,
-                       false, rd_handler, wr_handler, NULL, co);
+                       false, rd_handler, wr_handler, NULL, &restart);
     qemu_coroutine_yield();
     DPRINTF("s->sock=%d - back", s->sock);
-    aio_set_fd_handler(bdrv_get_aio_context(bs), s->sock, false,
-                       NULL, NULL, NULL, NULL);
 }
 
 /* SFTP has a function `libssh2_sftp_seek64' which seeks to a position
diff --git a/block/vdi.c b/block/vdi.c
index 2b6e8fa1ed..8da5dfc897 100644
--- a/block/vdi.c
+++ b/block/vdi.c
@@ -172,7 +172,7 @@ typedef struct {
     /* VDI header (converted to host endianness). */
     VdiHeader header;
 
-    CoMutex write_lock;
+    CoRwlock bmap_lock;
 
     Error *migration_blocker;
 } BDRVVdiState;
@@ -485,7 +485,7 @@ static int vdi_open(BlockDriverState *bs, QDict *options, int flags,
         goto fail_free_bmap;
     }
 
-    qemu_co_mutex_init(&s->write_lock);
+    qemu_co_rwlock_init(&s->bmap_lock);
 
     return 0;
 
@@ -557,7 +557,9 @@ vdi_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
                n_bytes, offset);
 
         /* prepare next AIO request */
+        qemu_co_rwlock_rdlock(&s->bmap_lock);
         bmap_entry = le32_to_cpu(s->bmap[block_index]);
+        qemu_co_rwlock_unlock(&s->bmap_lock);
         if (!VDI_IS_ALLOCATED(bmap_entry)) {
             /* Block not allocated, return zeros, no need to wait. */
             qemu_iovec_memset(qiov, bytes_done, 0, n_bytes);
@@ -595,6 +597,7 @@ vdi_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
     uint32_t block_index;
     uint32_t offset_in_block;
     uint32_t n_bytes;
+    uint64_t data_offset;
     uint32_t bmap_first = VDI_UNALLOCATED;
     uint32_t bmap_last = VDI_UNALLOCATED;
     uint8_t *block = NULL;
@@ -614,10 +617,19 @@ vdi_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
                n_bytes, offset);
 
         /* prepare next AIO request */
+        qemu_co_rwlock_rdlock(&s->bmap_lock);
         bmap_entry = le32_to_cpu(s->bmap[block_index]);
         if (!VDI_IS_ALLOCATED(bmap_entry)) {
             /* Allocate new block and write to it. */
             uint64_t data_offset;
+            qemu_co_rwlock_upgrade(&s->bmap_lock);
+            bmap_entry = le32_to_cpu(s->bmap[block_index]);
+            if (VDI_IS_ALLOCATED(bmap_entry)) {
+                /* A concurrent allocation did the work for us.  */
+                qemu_co_rwlock_downgrade(&s->bmap_lock);
+                goto nonallocating_write;
+            }
+
             bmap_entry = s->header.blocks_allocated;
             s->bmap[block_index] = cpu_to_le32(bmap_entry);
             s->header.blocks_allocated++;
@@ -635,30 +647,18 @@ vdi_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
             memset(block + offset_in_block + n_bytes, 0,
                    s->block_size - n_bytes - offset_in_block);
 
-            /* Note that this coroutine does not yield anywhere from reading the
-             * bmap entry until here, so in regards to all the coroutines trying
-             * to write to this cluster, the one doing the allocation will
-             * always be the first to try to acquire the lock.
-             * Therefore, it is also the first that will actually be able to
-             * acquire the lock and thus the padded cluster is written before
-             * the other coroutines can write to the affected area. */
-            qemu_co_mutex_lock(&s->write_lock);
+            /* Write the new block under CoRwLock write-side protection,
+             * so this full-cluster write does not overlap a partial write
+             * of the same cluster, issued from the "else" branch.
+             */
             ret = bdrv_pwrite(bs->file, data_offset, block, s->block_size);
-            qemu_co_mutex_unlock(&s->write_lock);
+            qemu_co_rwlock_unlock(&s->bmap_lock);
         } else {
-            uint64_t data_offset = s->header.offset_data +
-                                   (uint64_t)bmap_entry * s->block_size +
-                                   offset_in_block;
-            qemu_co_mutex_lock(&s->write_lock);
-            /* This lock is only used to make sure the following write operation
-             * is executed after the write issued by the coroutine allocating
-             * this cluster, therefore we do not need to keep it locked.
-             * As stated above, the allocating coroutine will always try to lock
-             * the mutex before all the other concurrent accesses to that
-             * cluster, therefore at this point we can be absolutely certain
-             * that that write operation has returned (there may be other writes
-             * in flight, but they do not concern this very operation). */
-            qemu_co_mutex_unlock(&s->write_lock);
+nonallocating_write:
+            data_offset = s->header.offset_data +
+                           (uint64_t)bmap_entry * s->block_size +
+                           offset_in_block;
+            qemu_co_rwlock_unlock(&s->bmap_lock);
 
             qemu_iovec_reset(&local_qiov);
             qemu_iovec_concat(&local_qiov, qiov, bytes_done, n_bytes);
diff --git a/block/vpc.c b/block/vpc.c
index 9a6f8173a5..8057d42a23 100644
--- a/block/vpc.c
+++ b/block/vpc.c
@@ -496,12 +496,6 @@ static inline int64_t get_image_offset(BlockDriverState *bs, uint64_t offset,
     return block_offset;
 }
 
-static inline int64_t get_sector_offset(BlockDriverState *bs,
-                                        int64_t sector_num, bool write)
-{
-    return get_image_offset(bs, sector_num * BDRV_SECTOR_SIZE, write);
-}
-
 /*
  * Writes the footer to the end of the image file. This is needed when the
  * file grows as it overwrites the old footer
@@ -696,6 +690,7 @@ static int64_t coroutine_fn vpc_co_get_block_status(BlockDriverState *bs,
     VHDFooter *footer = (VHDFooter*) s->footer_buf;
     int64_t start, offset;
     bool allocated;
+    int64_t ret;
     int n;
 
     if (be32_to_cpu(footer->type) == VHD_FIXED) {
@@ -705,10 +700,13 @@ static int64_t coroutine_fn vpc_co_get_block_status(BlockDriverState *bs,
                (sector_num << BDRV_SECTOR_BITS);
     }
 
-    offset = get_sector_offset(bs, sector_num, 0);
+    qemu_co_mutex_lock(&s->lock);
+
+    offset = get_image_offset(bs, sector_num << BDRV_SECTOR_BITS, false);
     start = offset;
     allocated = (offset != -1);
     *pnum = 0;
+    ret = 0;
 
     do {
         /* All sectors in a block are contiguous (without using the bitmap) */
@@ -723,15 +721,17 @@ static int64_t coroutine_fn vpc_co_get_block_status(BlockDriverState *bs,
          * sectors since there is always a bitmap in between. */
         if (allocated) {
             *file = bs->file->bs;
-            return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | start;
+            ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | start;
+            break;
         }
         if (nb_sectors == 0) {
             break;
         }
-        offset = get_sector_offset(bs, sector_num, 0);
+        offset = get_image_offset(bs, sector_num << BDRV_SECTOR_BITS, false);
     } while (offset == -1);
 
-    return 0;
+    qemu_co_mutex_unlock(&s->lock);
+    return ret;
 }
 
 /*
diff --git a/block/vvfat.c b/block/vvfat.c
index 4fd28e1e87..4dae790203 100644
--- a/block/vvfat.c
+++ b/block/vvfat.c
@@ -3078,8 +3078,14 @@ static int coroutine_fn
 write_target_commit(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
                     QEMUIOVector *qiov, int flags)
 {
+    int ret;
+
     BDRVVVFATState* s = *((BDRVVVFATState**) bs->opaque);
-    return try_commit(s);
+    qemu_co_mutex_lock(&s->lock);
+    ret = try_commit(s);
+    qemu_co_mutex_unlock(&s->lock);
+
+    return ret;
 }
 
 static void write_target_close(BlockDriverState *bs) {
diff --git a/docs/devel/memory.txt b/docs/devel/memory.txt
index 811b1bd3c5..8ed810f8b9 100644
--- a/docs/devel/memory.txt
+++ b/docs/devel/memory.txt
@@ -91,6 +91,37 @@ one of whose subregions is a low priority "background" region covering
 the whole address range; this is often clearer and is preferred.
 Subregions cannot be added to an alias region.
 
+Migration
+---------
+
+Where the memory region is backed by host memory (RAM, ROM and
+ROM device memory region types), this host memory needs to be
+copied to the destination on migration. These APIs which allocate
+the host memory for you will also register the memory so it is
+migrated:
+ - memory_region_init_ram()
+ - memory_region_init_rom()
+ - memory_region_init_rom_device()
+
+For most devices and boards this is the correct thing. If you
+have a special case where you need to manage the migration of
+the backing memory yourself, you can call the functions:
+ - memory_region_init_ram_nomigrate()
+ - memory_region_init_rom_nomigrate()
+ - memory_region_init_rom_device_nomigrate()
+which only initialize the MemoryRegion and leave handling
+migration to the caller.
+
+The functions:
+ - memory_region_init_resizeable_ram()
+ - memory_region_init_ram_from_file()
+ - memory_region_init_ram_from_fd()
+ - memory_region_init_ram_ptr()
+ - memory_region_init_ram_device_ptr()
+are for special cases only, and so they do not automatically
+register the backing memory for migration; the caller must
+manage migration if necessary.
+
 Region names
 ------------
 
diff --git a/hmp.c b/hmp.c
index d970ea9855..b42ae59a29 100644
--- a/hmp.c
+++ b/hmp.c
@@ -600,50 +600,92 @@ void hmp_info_blockstats(Monitor *mon, const QDict *qdict)
     qapi_free_BlockStatsList(stats_list);
 }
 
+/* Helper for hmp_info_vnc_clients, _servers */
+static void hmp_info_VncBasicInfo(Monitor *mon, VncBasicInfo *info,
+                                  const char *name)
+{
+    monitor_printf(mon, "  %s: %s:%s (%s%s)\n",
+                   name,
+                   info->host,
+                   info->service,
+                   NetworkAddressFamily_lookup[info->family],
+                   info->websocket ? " (Websocket)" : "");
+}
+
+/* Helper displaying and auth and crypt info */
+static void hmp_info_vnc_authcrypt(Monitor *mon, const char *indent,
+                                   VncPrimaryAuth auth,
+                                   VncVencryptSubAuth *vencrypt)
+{
+    monitor_printf(mon, "%sAuth: %s (Sub: %s)\n", indent,
+                   VncPrimaryAuth_lookup[auth],
+                   vencrypt ? VncVencryptSubAuth_lookup[*vencrypt] : "none");
+}
+
+static void hmp_info_vnc_clients(Monitor *mon, VncClientInfoList *client)
+{
+    while (client) {
+        VncClientInfo *cinfo = client->value;
+
+        hmp_info_VncBasicInfo(mon, qapi_VncClientInfo_base(cinfo), "Client");
+        monitor_printf(mon, "    x509_dname: %s\n",
+                       cinfo->has_x509_dname ?
+                       cinfo->x509_dname : "none");
+        monitor_printf(mon, "    sasl_username: %s\n",
+                       cinfo->has_sasl_username ?
+                       cinfo->sasl_username : "none");
+
+        client = client->next;
+    }
+}
+
+static void hmp_info_vnc_servers(Monitor *mon, VncServerInfo2List *server)
+{
+    while (server) {
+        VncServerInfo2 *sinfo = server->value;
+        hmp_info_VncBasicInfo(mon, qapi_VncServerInfo2_base(sinfo), "Server");
+        hmp_info_vnc_authcrypt(mon, "    ", sinfo->auth,
+                               sinfo->has_vencrypt ? &sinfo->vencrypt : NULL);
+        server = server->next;
+    }
+}
+
 void hmp_info_vnc(Monitor *mon, const QDict *qdict)
 {
-    VncInfo *info;
+    VncInfo2List *info2l;
     Error *err = NULL;
-    VncClientInfoList *client;
 
-    info = qmp_query_vnc(&err);
+    info2l = qmp_query_vnc_servers(&err);
     if (err) {
         error_report_err(err);
         return;
     }
-
-    if (!info->enabled) {
-        monitor_printf(mon, "Server: disabled\n");
-        goto out;
-    }
-
-    monitor_printf(mon, "Server:\n");
-    if (info->has_host && info->has_service) {
-        monitor_printf(mon, "     address: %s:%s\n", info->host, info->service);
-    }
-    if (info->has_auth) {
-        monitor_printf(mon, "        auth: %s\n", info->auth);
+    if (!info2l) {
+        monitor_printf(mon, "None\n");
+        return;
     }
 
-    if (!info->has_clients || info->clients == NULL) {
-        monitor_printf(mon, "Client: none\n");
-    } else {
-        for (client = info->clients; client; client = client->next) {
-            monitor_printf(mon, "Client:\n");
-            monitor_printf(mon, "     address: %s:%s\n",
-                           client->value->host,
-                           client->value->service);
-            monitor_printf(mon, "  x509_dname: %s\n",
-                           client->value->x509_dname ?
-                           client->value->x509_dname : "none");
-            monitor_printf(mon, "    username: %s\n",
-                           client->value->has_sasl_username ?
-                           client->value->sasl_username : "none");
+    while (info2l) {
+        VncInfo2 *info = info2l->value;
+        monitor_printf(mon, "%s:\n", info->id);
+        hmp_info_vnc_servers(mon, info->server);
+        hmp_info_vnc_clients(mon, info->clients);
+        if (!info->server) {
+            /* The server entry displays its auth, we only
+             * need to display in the case of 'reverse' connections
+             * where there's no server.
+             */
+            hmp_info_vnc_authcrypt(mon, "  ", info->auth,
+                               info->has_vencrypt ? &info->vencrypt : NULL);
         }
+        if (info->has_display) {
+            monitor_printf(mon, "  Display: %s\n", info->display);
+        }
+        info2l = info2l->next;
     }
 
-out:
-    qapi_free_VncInfo(info);
+    qapi_free_VncInfo2List(info2l);
+
 }
 
 #ifdef CONFIG_SPICE
diff --git a/hw/arm/aspeed.c b/hw/arm/aspeed.c
index db3f6d20c6..0c5635f300 100644
--- a/hw/arm/aspeed.c
+++ b/hw/arm/aspeed.c
@@ -216,7 +216,7 @@ static void aspeed_board_init(MachineState *machine,
          * SoC and 128MB for the AST2500 SoC, which is twice as big as
          * needed by the flash modules of the Aspeed machines.
          */
-        memory_region_init_rom(boot_rom, OBJECT(bmc), "aspeed.boot_rom",
+        memory_region_init_rom_nomigrate(boot_rom, OBJECT(bmc), "aspeed.boot_rom",
                                fl->size, &error_abort);
         memory_region_add_subregion(get_system_memory(), FIRMWARE_ADDR,
                                     boot_rom);
diff --git a/hw/arm/aspeed_soc.c b/hw/arm/aspeed_soc.c
index 3034849c80..5529024edf 100644
--- a/hw/arm/aspeed_soc.c
+++ b/hw/arm/aspeed_soc.c
@@ -211,7 +211,7 @@ static void aspeed_soc_realize(DeviceState *dev, Error **errp)
     }
 
     /* SRAM */
-    memory_region_init_ram(&s->sram, OBJECT(dev), "aspeed.sram",
+    memory_region_init_ram_nomigrate(&s->sram, OBJECT(dev), "aspeed.sram",
                            sc->info->sram_size, &err);
     if (err) {
         error_propagate(errp, err);
diff --git a/hw/arm/exynos4210.c b/hw/arm/exynos4210.c
index ee851e3ae5..f9e79f3ebb 100644
--- a/hw/arm/exynos4210.c
+++ b/hw/arm/exynos4210.c
@@ -281,7 +281,6 @@ Exynos4210State *exynos4210_init(MemoryRegion *system_mem)
     /* Internal ROM */
     memory_region_init_ram(&s->irom_mem, NULL, "exynos4210.irom",
                            EXYNOS4210_IROM_SIZE, &error_fatal);
-    vmstate_register_ram_global(&s->irom_mem);
     memory_region_set_readonly(&s->irom_mem, true);
     memory_region_add_subregion(system_mem, EXYNOS4210_IROM_BASE_ADDR,
                                 &s->irom_mem);
@@ -297,7 +296,6 @@ Exynos4210State *exynos4210_init(MemoryRegion *system_mem)
     /* Internal RAM */
     memory_region_init_ram(&s->iram_mem, NULL, "exynos4210.iram",
                            EXYNOS4210_IRAM_SIZE, &error_fatal);
-    vmstate_register_ram_global(&s->iram_mem);
     memory_region_add_subregion(system_mem, EXYNOS4210_IRAM_BASE_ADDR,
                                 &s->iram_mem);
 
diff --git a/hw/arm/exynos4_boards.c b/hw/arm/exynos4_boards.c
index 6240b26839..7c03ed32b7 100644
--- a/hw/arm/exynos4_boards.c
+++ b/hw/arm/exynos4_boards.c
@@ -113,7 +113,6 @@ static void exynos4_boards_init_ram(Exynos4BoardState *s,
         memory_region_init_ram(&s->dram1_mem, NULL, "exynos4210.dram1",
                                mem_size - EXYNOS4210_DRAM_MAX_SIZE,
                                &error_fatal);
-        vmstate_register_ram_global(&s->dram1_mem);
         memory_region_add_subregion(system_mem, EXYNOS4210_DRAM1_BASE_ADDR,
                                     &s->dram1_mem);
         mem_size = EXYNOS4210_DRAM_MAX_SIZE;
@@ -121,7 +120,6 @@ static void exynos4_boards_init_ram(Exynos4BoardState *s,
 
     memory_region_init_ram(&s->dram0_mem, NULL, "exynos4210.dram0", mem_size,
                            &error_fatal);
-    vmstate_register_ram_global(&s->dram0_mem);
     memory_region_add_subregion(system_mem, EXYNOS4210_DRAM0_BASE_ADDR,
                                 &s->dram0_mem);
 }
diff --git a/hw/arm/fsl-imx25.c b/hw/arm/fsl-imx25.c
index 40666b68a3..8cff3c1f7b 100644
--- a/hw/arm/fsl-imx25.c
+++ b/hw/arm/fsl-imx25.c
@@ -249,7 +249,7 @@ static void fsl_imx25_realize(DeviceState *dev, Error **errp)
     }
 
     /* initialize 2 x 16 KB ROM */
-    memory_region_init_rom(&s->rom[0], NULL,
+    memory_region_init_rom_nomigrate(&s->rom[0], NULL,
                            "imx25.rom0", FSL_IMX25_ROM0_SIZE, &err);
     if (err) {
         error_propagate(errp, err);
@@ -257,7 +257,7 @@ static void fsl_imx25_realize(DeviceState *dev, Error **errp)
     }
     memory_region_add_subregion(get_system_memory(), FSL_IMX25_ROM0_ADDR,
                                 &s->rom[0]);
-    memory_region_init_rom(&s->rom[1], NULL,
+    memory_region_init_rom_nomigrate(&s->rom[1], NULL,
                            "imx25.rom1", FSL_IMX25_ROM1_SIZE, &err);
     if (err) {
         error_propagate(errp, err);
@@ -275,7 +275,6 @@ static void fsl_imx25_realize(DeviceState *dev, Error **errp)
     }
     memory_region_add_subregion(get_system_memory(), FSL_IMX25_IRAM_ADDR,
                                 &s->iram);
-    vmstate_register_ram_global(&s->iram);
 
     /* internal RAM (128 KB) is aliased over 128 MB - 128 KB */
     memory_region_init_alias(&s->iram_alias, NULL, "imx25.iram_alias",
diff --git a/hw/arm/fsl-imx31.c b/hw/arm/fsl-imx31.c
index c30130667e..90278758f9 100644
--- a/hw/arm/fsl-imx31.c
+++ b/hw/arm/fsl-imx31.c
@@ -219,7 +219,7 @@ static void fsl_imx31_realize(DeviceState *dev, Error **errp)
     }
 
     /* On a real system, the first 16k is a `secure boot rom' */
-    memory_region_init_rom(&s->secure_rom, NULL, "imx31.secure_rom",
+    memory_region_init_rom_nomigrate(&s->secure_rom, NULL, "imx31.secure_rom",
                            FSL_IMX31_SECURE_ROM_SIZE, &err);
     if (err) {
         error_propagate(errp, err);
@@ -229,7 +229,7 @@ static void fsl_imx31_realize(DeviceState *dev, Error **errp)
                                 &s->secure_rom);
 
     /* There is also a 16k ROM */
-    memory_region_init_rom(&s->rom, NULL, "imx31.rom",
+    memory_region_init_rom_nomigrate(&s->rom, NULL, "imx31.rom",
                            FSL_IMX31_ROM_SIZE, &err);
     if (err) {
         error_propagate(errp, err);
@@ -247,7 +247,6 @@ static void fsl_imx31_realize(DeviceState *dev, Error **errp)
     }
     memory_region_add_subregion(get_system_memory(), FSL_IMX31_IRAM_ADDR,
                                 &s->iram);
-    vmstate_register_ram_global(&s->iram);
 
     /* internal RAM (16 KB) is aliased over 256 MB - 16 KB */
     memory_region_init_alias(&s->iram_alias, NULL, "imx31.iram_alias",
diff --git a/hw/arm/fsl-imx6.c b/hw/arm/fsl-imx6.c
index 27773c9c47..576c6631a1 100644
--- a/hw/arm/fsl-imx6.c
+++ b/hw/arm/fsl-imx6.c
@@ -399,7 +399,7 @@ static void fsl_imx6_realize(DeviceState *dev, Error **errp)
                                         FSL_IMX6_ENET_MAC_1588_IRQ));
 
     /* ROM memory */
-    memory_region_init_rom(&s->rom, NULL, "imx6.rom",
+    memory_region_init_rom_nomigrate(&s->rom, NULL, "imx6.rom",
                            FSL_IMX6_ROM_SIZE, &err);
     if (err) {
         error_propagate(errp, err);
@@ -409,7 +409,7 @@ static void fsl_imx6_realize(DeviceState *dev, Error **errp)
                                 &s->rom);
 
     /* CAAM memory */
-    memory_region_init_rom(&s->caam, NULL, "imx6.caam",
+    memory_region_init_rom_nomigrate(&s->caam, NULL, "imx6.caam",
                            FSL_IMX6_CAAM_MEM_SIZE, &err);
     if (err) {
         error_propagate(errp, err);
@@ -427,7 +427,6 @@ static void fsl_imx6_realize(DeviceState *dev, Error **errp)
     }
     memory_region_add_subregion(get_system_memory(), FSL_IMX6_OCRAM_ADDR,
                                 &s->ocram);
-    vmstate_register_ram_global(&s->ocram);
 
     /* internal OCRAM (256 KB) is aliased over 1 MB */
     memory_region_init_alias(&s->ocram_alias, NULL, "imx6.ocram_alias",
diff --git a/hw/arm/highbank.c b/hw/arm/highbank.c
index 750c463e2a..20e60f15c4 100644
--- a/hw/arm/highbank.c
+++ b/hw/arm/highbank.c
@@ -276,7 +276,7 @@ static void calxeda_init(MachineState *machine, enum cxmachines machine_id)
     memory_region_add_subregion(sysmem, 0, dram);
 
     sysram = g_new(MemoryRegion, 1);
-    memory_region_init_ram(sysram, NULL, "highbank.sysram", 0x8000,
+    memory_region_init_ram_nomigrate(sysram, NULL, "highbank.sysram", 0x8000,
                            &error_fatal);
     memory_region_add_subregion(sysmem, 0xfff88000, sysram);
     if (bios_name != NULL) {
diff --git a/hw/arm/integratorcp.c b/hw/arm/integratorcp.c
index ca3eca1d16..d79221d166 100644
--- a/hw/arm/integratorcp.c
+++ b/hw/arm/integratorcp.c
@@ -276,7 +276,7 @@ static void integratorcm_init(Object *obj)
     s->cm_init = 0x00000112;
     s->cm_refcnt_offset = muldiv64(qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), 24,
                                    1000);
-    memory_region_init_ram(&s->flash, obj, "integrator.flash", 0x100000,
+    memory_region_init_ram_nomigrate(&s->flash, obj, "integrator.flash", 0x100000,
                            &error_fatal);
     vmstate_register_ram_global(&s->flash);
 
diff --git a/hw/arm/mainstone.c b/hw/arm/mainstone.c
index f962236cf4..fb268e691e 100644
--- a/hw/arm/mainstone.c
+++ b/hw/arm/mainstone.c
@@ -130,7 +130,6 @@ static void mainstone_common_init(MemoryRegion *address_space_mem,
     mpu = pxa270_init(address_space_mem, mainstone_binfo.ram_size, cpu_model);
     memory_region_init_ram(rom, NULL, "mainstone.rom", MAINSTONE_ROM,
                            &error_fatal);
-    vmstate_register_ram_global(rom);
     memory_region_set_readonly(rom, true);
     memory_region_add_subregion(address_space_mem, 0, rom);
 
diff --git a/hw/arm/musicpal.c b/hw/arm/musicpal.c
index 9c710f74b4..7e8ab3184c 100644
--- a/hw/arm/musicpal.c
+++ b/hw/arm/musicpal.c
@@ -1606,7 +1606,6 @@ static void musicpal_init(MachineState *machine)
 
     memory_region_init_ram(sram, NULL, "musicpal.sram", MP_SRAM_SIZE,
                            &error_fatal);
-    vmstate_register_ram_global(sram);
     memory_region_add_subregion(address_space_mem, MP_SRAM_BASE, sram);
 
     dev = sysbus_create_simple(TYPE_MV88W8618_PIC, MP_PIC_BASE,
diff --git a/hw/arm/omap1.c b/hw/arm/omap1.c
index 54582bd148..3d15ff6779 100644
--- a/hw/arm/omap1.c
+++ b/hw/arm/omap1.c
@@ -3882,7 +3882,6 @@ struct omap_mpu_state_s *omap310_mpu_init(MemoryRegion *system_memory,
     memory_region_add_subregion(system_memory, OMAP_EMIFF_BASE, &s->emiff_ram);
     memory_region_init_ram(&s->imif_ram, NULL, "omap1.sram", s->sram_size,
                            &error_fatal);
-    vmstate_register_ram_global(&s->imif_ram);
     memory_region_add_subregion(system_memory, OMAP_IMIF_BASE, &s->imif_ram);
 
     omap_clkm_init(system_memory, 0xfffece00, 0xe1008000, s);
diff --git a/hw/arm/omap2.c b/hw/arm/omap2.c
index 91f573338c..bbf0b7e188 100644
--- a/hw/arm/omap2.c
+++ b/hw/arm/omap2.c
@@ -2280,7 +2280,6 @@ struct omap_mpu_state_s *omap2420_mpu_init(MemoryRegion *sysmem,
     memory_region_add_subregion(sysmem, OMAP2_Q2_BASE, &s->sdram);
     memory_region_init_ram(&s->sram, NULL, "omap2.sram", s->sram_size,
                            &error_fatal);
-    vmstate_register_ram_global(&s->sram);
     memory_region_add_subregion(sysmem, OMAP2_SRAM_BASE, &s->sram);
 
     s->l4 = omap_l4_init(sysmem, OMAP2_L4_BASE, 54);
diff --git a/hw/arm/omap_sx1.c b/hw/arm/omap_sx1.c
index 5d74026cb2..9809106617 100644
--- a/hw/arm/omap_sx1.c
+++ b/hw/arm/omap_sx1.c
@@ -125,7 +125,6 @@ static void sx1_init(MachineState *machine, const int version)
     /* External Flash (EMIFS) */
     memory_region_init_ram(flash, NULL, "omap_sx1.flash0-0", flash_size,
                            &error_fatal);
-    vmstate_register_ram_global(flash);
     memory_region_set_readonly(flash, true);
     memory_region_add_subregion(address_space, OMAP_CS0_BASE, flash);
 
@@ -167,9 +166,8 @@ static void sx1_init(MachineState *machine, const int version)
     if ((version == 1) &&
             (dinfo = drive_get(IF_PFLASH, 0, fl_idx)) != NULL) {
         MemoryRegion *flash_1 = g_new(MemoryRegion, 1);
-        memory_region_init_ram(flash_1, NULL, "omap_sx1.flash1-0", flash1_size,
-                               &error_fatal);
-        vmstate_register_ram_global(flash_1);
+        memory_region_init_ram(flash_1, NULL, "omap_sx1.flash1-0",
+                               flash1_size, &error_fatal);
         memory_region_set_readonly(flash_1, true);
         memory_region_add_subregion(address_space, OMAP_CS1_BASE, flash_1);
 
diff --git a/hw/arm/palm.c b/hw/arm/palm.c
index 7f460732e3..64cf8ca921 100644
--- a/hw/arm/palm.c
+++ b/hw/arm/palm.c
@@ -216,7 +216,6 @@ static void palmte_init(MachineState *machine)
     /* External Flash (EMIFS) */
     memory_region_init_ram(flash, NULL, "palmte.flash", flash_size,
                            &error_fatal);
-    vmstate_register_ram_global(flash);
     memory_region_set_readonly(flash, true);
     memory_region_add_subregion(address_space_mem, OMAP_CS0_BASE, flash);
 
diff --git a/hw/arm/pxa2xx.c b/hw/arm/pxa2xx.c
index 731ed08de7..194b0bc808 100644
--- a/hw/arm/pxa2xx.c
+++ b/hw/arm/pxa2xx.c
@@ -2076,11 +2076,9 @@ PXA2xxState *pxa270_init(MemoryRegion *address_space,
     /* SDRAM & Internal Memory Storage */
     memory_region_init_ram(&s->sdram, NULL, "pxa270.sdram", sdram_size,
                            &error_fatal);
-    vmstate_register_ram_global(&s->sdram);
     memory_region_add_subregion(address_space, PXA2XX_SDRAM_BASE, &s->sdram);
     memory_region_init_ram(&s->internal, NULL, "pxa270.internal", 0x40000,
                            &error_fatal);
-    vmstate_register_ram_global(&s->internal);
     memory_region_add_subregion(address_space, PXA2XX_INTERNAL_BASE,
                                 &s->internal);
 
@@ -2208,11 +2206,9 @@ PXA2xxState *pxa255_init(MemoryRegion *address_space, unsigned int sdram_size)
     /* SDRAM & Internal Memory Storage */
     memory_region_init_ram(&s->sdram, NULL, "pxa255.sdram", sdram_size,
                            &error_fatal);
-    vmstate_register_ram_global(&s->sdram);
     memory_region_add_subregion(address_space, PXA2XX_SDRAM_BASE, &s->sdram);
     memory_region_init_ram(&s->internal, NULL, "pxa255.internal",
                            PXA2XX_INTERNAL_SIZE, &error_fatal);
-    vmstate_register_ram_global(&s->internal);
     memory_region_add_subregion(address_space, PXA2XX_INTERNAL_BASE,
                                 &s->internal);
 
diff --git a/hw/arm/realview.c b/hw/arm/realview.c
index b7d4753400..76ff5579bc 100644
--- a/hw/arm/realview.c
+++ b/hw/arm/realview.c
@@ -145,13 +145,11 @@ static void realview_init(MachineState *machine,
         ram_size = 0x20000000;
         memory_region_init_ram(ram_lo, NULL, "realview.lowmem", low_ram_size,
                                &error_fatal);
-        vmstate_register_ram_global(ram_lo);
         memory_region_add_subregion(sysmem, 0x20000000, ram_lo);
     }
 
     memory_region_init_ram(ram_hi, NULL, "realview.highmem", ram_size,
                            &error_fatal);
-    vmstate_register_ram_global(ram_hi);
     low_ram_size = ram_size;
     if (low_ram_size > 0x10000000)
       low_ram_size = 0x10000000;
@@ -347,7 +345,6 @@ static void realview_init(MachineState *machine,
        until after Linux boots the secondary CPUs.  */
     memory_region_init_ram(ram_hack, NULL, "realview.hack", 0x1000,
                            &error_fatal);
-    vmstate_register_ram_global(ram_hack);
     memory_region_add_subregion(sysmem, SMP_BOOT_ADDR, ram_hack);
 
     realview_binfo.ram_size = ram_size;
diff --git a/hw/arm/spitz.c b/hw/arm/spitz.c
index 93bde14743..7f588cea21 100644
--- a/hw/arm/spitz.c
+++ b/hw/arm/spitz.c
@@ -920,7 +920,6 @@ static void spitz_common_init(MachineState *machine,
     sl_flash_register(mpu, (model == spitz) ? FLASH_128M : FLASH_1024M);
 
     memory_region_init_ram(rom, NULL, "spitz.rom", SPITZ_ROM, &error_fatal);
-    vmstate_register_ram_global(rom);
     memory_region_set_readonly(rom, true);
     memory_region_add_subregion(address_space_mem, 0, rom);
 
diff --git a/hw/arm/stellaris.c b/hw/arm/stellaris.c
index cf6e7be083..408c1a14d3 100644
--- a/hw/arm/stellaris.c
+++ b/hw/arm/stellaris.c
@@ -1290,13 +1290,11 @@ static void stellaris_init(const char *kernel_filename, const char *cpu_model,
     /* Flash programming is done via the SCU, so pretend it is ROM.  */
     memory_region_init_ram(flash, NULL, "stellaris.flash", flash_size,
                            &error_fatal);
-    vmstate_register_ram_global(flash);
     memory_region_set_readonly(flash, true);
     memory_region_add_subregion(system_memory, 0, flash);
 
     memory_region_init_ram(sram, NULL, "stellaris.sram", sram_size,
                            &error_fatal);
-    vmstate_register_ram_global(sram);
     memory_region_add_subregion(system_memory, 0x20000000, sram);
 
     nvic = armv7m_init(system_memory, flash_size, NUM_IRQ_LINES,
diff --git a/hw/arm/stm32f205_soc.c b/hw/arm/stm32f205_soc.c
index 6e1260d2ed..f61e735f0f 100644
--- a/hw/arm/stm32f205_soc.c
+++ b/hw/arm/stm32f205_soc.c
@@ -100,8 +100,6 @@ static void stm32f205_soc_realize(DeviceState *dev_soc, Error **errp)
     memory_region_init_alias(flash_alias, NULL, "STM32F205.flash.alias",
                              flash, 0, FLASH_SIZE);
 
-    vmstate_register_ram_global(flash);
-
     memory_region_set_readonly(flash, true);
     memory_region_set_readonly(flash_alias, true);
 
@@ -110,7 +108,6 @@ static void stm32f205_soc_realize(DeviceState *dev_soc, Error **errp)
 
     memory_region_init_ram(sram, NULL, "STM32F205.sram", SRAM_SIZE,
                            &error_fatal);
-    vmstate_register_ram_global(sram);
     memory_region_add_subregion(system_memory, SRAM_BASE_ADDRESS, sram);
 
     armv7m = DEVICE(&s->armv7m);
diff --git a/hw/arm/tosa.c b/hw/arm/tosa.c
index 2421b8150d..8b757ff6a3 100644
--- a/hw/arm/tosa.c
+++ b/hw/arm/tosa.c
@@ -235,7 +235,6 @@ static void tosa_init(MachineState *machine)
     mpu = pxa255_init(address_space_mem, tosa_binfo.ram_size);
 
     memory_region_init_ram(rom, NULL, "tosa.rom", TOSA_ROM, &error_fatal);
-    vmstate_register_ram_global(rom);
     memory_region_set_readonly(rom, true);
     memory_region_add_subregion(address_space_mem, 0, rom);
 
diff --git a/hw/arm/vexpress.c b/hw/arm/vexpress.c
index c6b1e674b4..528c65ddb6 100644
--- a/hw/arm/vexpress.c
+++ b/hw/arm/vexpress.c
@@ -392,7 +392,6 @@ static void a15_daughterboard_init(const VexpressMachineState *vms,
     /* 0x2e000000: system SRAM */
     memory_region_init_ram(sram, NULL, "vexpress.a15sram", 0x10000,
                            &error_fatal);
-    vmstate_register_ram_global(sram);
     memory_region_add_subregion(sysmem, 0x2e000000, sram);
 
     /* 0x7ffb0000: DMA330 DMA controller: not modelled */
@@ -675,13 +674,11 @@ static void vexpress_common_init(MachineState *machine)
     sram_size = 0x2000000;
     memory_region_init_ram(sram, NULL, "vexpress.sram", sram_size,
                            &error_fatal);
-    vmstate_register_ram_global(sram);
     memory_region_add_subregion(sysmem, map[VE_SRAM], sram);
 
     vram_size = 0x800000;
     memory_region_init_ram(vram, NULL, "vexpress.vram", vram_size,
                            &error_fatal);
-    vmstate_register_ram_global(vram);
     memory_region_add_subregion(sysmem, map[VE_VIDEORAM], vram);
 
     /* 0x4e000000 LAN9118 Ethernet */
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 010f7244bf..31739d75a3 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -1155,8 +1155,8 @@ static void create_secure_ram(VirtMachineState *vms,
     hwaddr base = vms->memmap[VIRT_SECURE_MEM].base;
     hwaddr size = vms->memmap[VIRT_SECURE_MEM].size;
 
-    memory_region_init_ram(secram, NULL, "virt.secure-ram", size, &error_fatal);
-    vmstate_register_ram_global(secram);
+    memory_region_init_ram(secram, NULL, "virt.secure-ram", size,
+                           &error_fatal);
     memory_region_add_subregion(secure_sysmem, base, secram);
 
     nodename = g_strdup_printf("/secram@%" PRIx64, base);
diff --git a/hw/arm/xilinx_zynq.c b/hw/arm/xilinx_zynq.c
index 3985356fc2..6b11a75e67 100644
--- a/hw/arm/xilinx_zynq.c
+++ b/hw/arm/xilinx_zynq.c
@@ -206,7 +206,6 @@ static void zynq_init(MachineState *machine)
     /* 256K of on-chip memory */
     memory_region_init_ram(ocm_ram, NULL, "zynq.ocm_ram", 256 << 10,
                            &error_fatal);
-    vmstate_register_ram_global(ocm_ram);
     memory_region_add_subregion(address_space_mem, 0xFFFC0000, ocm_ram);
 
     DriveInfo *dinfo = drive_get(IF_PFLASH, 0, 0);
diff --git a/hw/arm/xlnx-zynqmp.c b/hw/arm/xlnx-zynqmp.c
index 64f52f80a5..9eceadbdc8 100644
--- a/hw/arm/xlnx-zynqmp.c
+++ b/hw/arm/xlnx-zynqmp.c
@@ -228,7 +228,6 @@ static void xlnx_zynqmp_realize(DeviceState *dev, Error **errp)
 
         memory_region_init_ram(&s->ocm_ram[i], NULL, ocm_name,
                                XLNX_ZYNQMP_OCM_RAM_SIZE, &error_fatal);
-        vmstate_register_ram_global(&s->ocm_ram[i]);
         memory_region_add_subregion(get_system_memory(),
                                     XLNX_ZYNQMP_OCM_RAM_0_ADDRESS +
                                         i * XLNX_ZYNQMP_OCM_RAM_SIZE,
diff --git a/hw/audio/adlib.c b/hw/audio/adlib.c
index c6e0f10c16..97b876c7e0 100644
--- a/hw/audio/adlib.c
+++ b/hw/audio/adlib.c
@@ -74,8 +74,6 @@ typedef struct {
     PortioList port_list;
 } AdlibState;
 
-static AdlibState *glob_adlib;
-
 static void adlib_stop_opl_timer (AdlibState *s, size_t n)
 {
     OPLTimerOver (s->opl, n);
@@ -130,9 +128,9 @@ static uint32_t adlib_read(void *opaque, uint32_t nport)
     return data;
 }
 
-static void timer_handler (int c, double interval_Sec)
+static void timer_handler (void *opaque, int c, double interval_Sec)
 {
-    AdlibState *s = glob_adlib;
+    AdlibState *s = opaque;
     unsigned n = c & 1;
 #ifdef DEBUG
     double interval;
@@ -259,19 +257,13 @@ static void adlib_realizefn (DeviceState *dev, Error **errp)
     AdlibState *s = ADLIB(dev);
     struct audsettings as;
 
-    if (glob_adlib) {
-        error_setg (errp, "Cannot create more than 1 adlib device");
-        return;
-    }
-    glob_adlib = s;
-
     s->opl = OPLCreate (3579545, s->freq);
     if (!s->opl) {
         error_setg (errp, "OPLCreate %d failed", s->freq);
         return;
     }
     else {
-        OPLSetTimerHandler (s->opl, timer_handler, 0);
+        OPLSetTimerHandler(s->opl, timer_handler, s);
         s->enabled = 1;
     }
 
diff --git a/hw/audio/fmopl.c b/hw/audio/fmopl.c
index 202f752c5d..5cfb6a96dd 100644
--- a/hw/audio/fmopl.c
+++ b/hw/audio/fmopl.c
@@ -788,14 +788,18 @@ static void OPLWriteReg(FM_OPL *OPL, int r, int v)
 				{
 					double interval = st2 ? (double)OPL->T[1]*OPL->TimerBase : 0.0;
 					OPL->st[1] = st2;
-					if (OPL->TimerHandler) (OPL->TimerHandler)(OPL->TimerParam+1,interval);
+                    if (OPL->TimerHandler) {
+                        (OPL->TimerHandler)(OPL->TimerParam, 1, interval);
+                    }
 				}
 				/* timer 1 */
 				if(OPL->st[0] != st1)
 				{
 					double interval = st1 ? (double)OPL->T[0]*OPL->TimerBase : 0.0;
 					OPL->st[0] = st1;
-					if (OPL->TimerHandler) (OPL->TimerHandler)(OPL->TimerParam+0,interval);
+                    if (OPL->TimerHandler) {
+                        (OPL->TimerHandler)(OPL->TimerParam, 0, interval);
+                    }
 				}
 			}
 			return;
@@ -1128,10 +1132,11 @@ void OPLDestroy(FM_OPL *OPL)
 
 /* ----------  Option handlers ----------       */
 
-void OPLSetTimerHandler(FM_OPL *OPL,OPL_TIMERHANDLER TimerHandler,int channelOffset)
+void OPLSetTimerHandler(FM_OPL *OPL, OPL_TIMERHANDLER TimerHandler,
+                        void *param)
 {
 	OPL->TimerHandler   = TimerHandler;
-	OPL->TimerParam = channelOffset;
+    OPL->TimerParam = param;
 }
 
 /* ---------- YM3812 I/O interface ---------- */
@@ -1197,6 +1202,9 @@ int OPLTimerOver(FM_OPL *OPL,int c)
 		}
 	}
 	/* reload timer */
-	if (OPL->TimerHandler) (OPL->TimerHandler)(OPL->TimerParam+c,(double)OPL->T[c]*OPL->TimerBase);
+    if (OPL->TimerHandler) {
+        (OPL->TimerHandler)(OPL->TimerParam, c,
+                            (double)OPL->T[c] * OPL->TimerBase);
+    }
 	return OPL->status>>7;
 }
diff --git a/hw/audio/fmopl.h b/hw/audio/fmopl.h
index fc9f16b58a..f4065f425c 100644
--- a/hw/audio/fmopl.h
+++ b/hw/audio/fmopl.h
@@ -3,7 +3,7 @@
 
 #include <stdint.h>
 
-typedef void (*OPL_TIMERHANDLER)(int channel,double interval_Sec);
+typedef void (*OPL_TIMERHANDLER)(void *param, int channel, double interval_Sec);
 
 /* !!!!! here is private section , do not access there member direct !!!!! */
 
@@ -87,13 +87,14 @@ typedef struct fm_opl_f {
 	uint8_t wavesel;
 	/* external event callback handler */
 	OPL_TIMERHANDLER  TimerHandler;		/* TIMER handler   */
-	int TimerParam;						/* TIMER parameter */
+    void *TimerParam; /* TIMER parameter */
 } FM_OPL;
 
 /* ---------- Generic interface section ---------- */
 FM_OPL *OPLCreate(int clock, int rate);
 void OPLDestroy(FM_OPL *OPL);
-void OPLSetTimerHandler(FM_OPL *OPL,OPL_TIMERHANDLER TimerHandler,int channelOffset);
+void OPLSetTimerHandler(FM_OPL *OPL, OPL_TIMERHANDLER TimerHandler,
+                        void *param);
 
 int OPLWrite(FM_OPL *OPL,int a,int v);
 unsigned char OPLRead(FM_OPL *OPL,int a);
diff --git a/hw/block/onenand.c b/hw/block/onenand.c
index ddf5492426..b7423607d9 100644
--- a/hw/block/onenand.c
+++ b/hw/block/onenand.c
@@ -807,7 +807,7 @@ static int onenand_initfn(SysBusDevice *sbd)
     }
     s->otp = memset(g_malloc((64 + 2) << PAGE_SHIFT),
                     0xff, (64 + 2) << PAGE_SHIFT);
-    memory_region_init_ram(&s->ram, OBJECT(s), "onenand.ram",
+    memory_region_init_ram_nomigrate(&s->ram, OBJECT(s), "onenand.ram",
                            0xc000 << s->shift, &error_fatal);
     vmstate_register_ram_global(&s->ram);
     ram = memory_region_get_ram_ptr(&s->ram);
diff --git a/hw/block/pflash_cfi01.c b/hw/block/pflash_cfi01.c
index 594d4cf6fe..1113ab1ccf 100644
--- a/hw/block/pflash_cfi01.c
+++ b/hw/block/pflash_cfi01.c
@@ -753,7 +753,6 @@ static void pflash_cfi01_realize(DeviceState *dev, Error **errp)
         return;
     }
 
-    vmstate_register_ram(&pfl->mem, DEVICE(pfl));
     pfl->storage = memory_region_get_ram_ptr(&pfl->mem);
     sysbus_init_mmio(SYS_BUS_DEVICE(dev), &pfl->mem);
 
diff --git a/hw/block/pflash_cfi02.c b/hw/block/pflash_cfi02.c
index e6c5c6c25d..c81ddd3a99 100644
--- a/hw/block/pflash_cfi02.c
+++ b/hw/block/pflash_cfi02.c
@@ -629,7 +629,6 @@ static void pflash_cfi02_realize(DeviceState *dev, Error **errp)
         return;
     }
 
-    vmstate_register_ram(&pfl->orig_mem, DEVICE(pfl));
     pfl->storage = memory_region_get_ram_ptr(&pfl->orig_mem);
     pfl->chip_len = chip_len;
 
diff --git a/hw/cris/axis_dev88.c b/hw/cris/axis_dev88.c
index 60df8877c1..80674f6bbb 100644
--- a/hw/cris/axis_dev88.c
+++ b/hw/cris/axis_dev88.c
@@ -281,9 +281,8 @@ void axisdev88_init(MachineState *machine)
 
     /* The ETRAX-FS has 128Kb on chip ram, the docs refer to it as the 
        internal memory.  */
-    memory_region_init_ram(phys_intmem, NULL, "axisdev88.chipram", INTMEM_SIZE,
-                           &error_fatal);
-    vmstate_register_ram_global(phys_intmem);
+    memory_region_init_ram(phys_intmem, NULL, "axisdev88.chipram",
+                           INTMEM_SIZE, &error_fatal);
     memory_region_add_subregion(address_space_mem, 0x38000000, phys_intmem);
 
       /* Attach a NAND flash to CS1.  */
diff --git a/hw/display/cg3.c b/hw/display/cg3.c
index 1de15a1d34..e069c4484c 100644
--- a/hw/display/cg3.c
+++ b/hw/display/cg3.c
@@ -283,7 +283,7 @@ static void cg3_initfn(Object *obj)
     SysBusDevice *sbd = SYS_BUS_DEVICE(obj);
     CG3State *s = CG3(obj);
 
-    memory_region_init_ram(&s->rom, obj, "cg3.prom", FCODE_MAX_ROM_SIZE,
+    memory_region_init_ram_nomigrate(&s->rom, obj, "cg3.prom", FCODE_MAX_ROM_SIZE,
                            &error_fatal);
     memory_region_set_readonly(&s->rom, true);
     sysbus_init_mmio(sbd, &s->rom);
@@ -314,7 +314,6 @@ static void cg3_realizefn(DeviceState *dev, Error **errp)
     memory_region_init_ram(&s->vram_mem, NULL, "cg3.vram", s->vram_size,
                            &error_fatal);
     memory_region_set_log(&s->vram_mem, true, DIRTY_MEMORY_VGA);
-    vmstate_register_ram_global(&s->vram_mem);
     sysbus_init_mmio(sbd, &s->vram_mem);
 
     sysbus_init_irq(sbd, &s->irq);
diff --git a/hw/display/qxl.c b/hw/display/qxl.c
index 3c1688e7cb..7f8c73b56d 100644
--- a/hw/display/qxl.c
+++ b/hw/display/qxl.c
@@ -2091,14 +2091,12 @@ static void qxl_realize_common(PCIQXLDevice *qxl, Error **errp)
     qxl->rom_size = qxl_rom_size();
     memory_region_init_ram(&qxl->rom_bar, OBJECT(qxl), "qxl.vrom",
                            qxl->rom_size, &error_fatal);
-    vmstate_register_ram(&qxl->rom_bar, &qxl->pci.qdev);
     init_qxl_rom(qxl);
     init_qxl_ram(qxl);
 
     qxl->guest_surfaces.cmds = g_new0(QXLPHYSICAL, qxl->ssd.num_surfaces);
     memory_region_init_ram(&qxl->vram_bar, OBJECT(qxl), "qxl.vram",
                            qxl->vram_size, &error_fatal);
-    vmstate_register_ram(&qxl->vram_bar, &qxl->pci.qdev);
     memory_region_init_alias(&qxl->vram32_bar, OBJECT(qxl), "qxl.vram32",
                              &qxl->vram_bar, 0, qxl->vram32_size);
 
@@ -2200,7 +2198,6 @@ static void qxl_realize_secondary(PCIDevice *dev, Error **errp)
     qxl_init_ramsize(qxl);
     memory_region_init_ram(&qxl->vga.vram, OBJECT(dev), "qxl.vgavram",
                            qxl->vga.vram_size, &error_fatal);
-    vmstate_register_ram(&qxl->vga.vram, &qxl->pci.qdev);
     qxl->vga.vram_ptr = memory_region_get_ram_ptr(&qxl->vga.vram);
     qxl->vga.con = graphic_console_init(DEVICE(dev), 0, &qxl_ops, qxl);
 
diff --git a/hw/display/sm501.c b/hw/display/sm501.c
index 9d254ef2e1..af792c533b 100644
--- a/hw/display/sm501.c
+++ b/hw/display/sm501.c
@@ -1578,7 +1578,7 @@ static void sm501_init(SM501State *s, DeviceState *dev,
                   s->local_mem_size_index);
 
     /* local memory */
-    memory_region_init_ram(&s->local_mem_region, OBJECT(dev), "sm501.local",
+    memory_region_init_ram_nomigrate(&s->local_mem_region, OBJECT(dev), "sm501.local",
                            get_local_mem_size(s), &error_fatal);
     vmstate_register_ram_global(&s->local_mem_region);
     memory_region_set_log(&s->local_mem_region, true, DIRTY_MEMORY_VGA);
diff --git a/hw/display/tc6393xb.c b/hw/display/tc6393xb.c
index 92f7120acc..74d10af3d4 100644
--- a/hw/display/tc6393xb.c
+++ b/hw/display/tc6393xb.c
@@ -588,7 +588,6 @@ TC6393xbState *tc6393xb_init(MemoryRegion *sysmem, uint32_t base, qemu_irq irq)
 
     memory_region_init_ram(&s->vram, NULL, "tc6393xb.vram", 0x100000,
                            &error_fatal);
-    vmstate_register_ram_global(&s->vram);
     s->vram_ptr = memory_region_get_ram_ptr(&s->vram);
     memory_region_add_subregion(sysmem, base + 0x100000, &s->vram);
     s->scr_width = 480;
diff --git a/hw/display/tcx.c b/hw/display/tcx.c
index 6593c1d6af..daa93e0929 100644
--- a/hw/display/tcx.c
+++ b/hw/display/tcx.c
@@ -752,7 +752,7 @@ static void tcx_initfn(Object *obj)
     SysBusDevice *sbd = SYS_BUS_DEVICE(obj);
     TCXState *s = TCX(obj);
 
-    memory_region_init_ram(&s->rom, obj, "tcx.prom", FCODE_MAX_ROM_SIZE,
+    memory_region_init_ram_nomigrate(&s->rom, obj, "tcx.prom", FCODE_MAX_ROM_SIZE,
                            &error_fatal);
     memory_region_set_readonly(&s->rom, true);
     sysbus_init_mmio(sbd, &s->rom);
@@ -812,7 +812,7 @@ static void tcx_realizefn(DeviceState *dev, Error **errp)
     uint8_t *vram_base;
     char *fcode_filename;
 
-    memory_region_init_ram(&s->vram_mem, OBJECT(s), "tcx.vram",
+    memory_region_init_ram_nomigrate(&s->vram_mem, OBJECT(s), "tcx.vram",
                            s->vram_size * (1 + 4 + 4), &error_fatal);
     vmstate_register_ram_global(&s->vram_mem);
     memory_region_set_log(&s->vram_mem, true, DIRTY_MEMORY_VGA);
diff --git a/hw/display/vga.c b/hw/display/vga.c
index 80508b83f4..63421f9ee8 100644
--- a/hw/display/vga.c
+++ b/hw/display/vga.c
@@ -2166,7 +2166,7 @@ void vga_common_init(VGACommonState *s, Object *obj, bool global_vmstate)
     }
 
     s->is_vbe_vmstate = 1;
-    memory_region_init_ram(&s->vram, obj, "vga.vram", s->vram_size,
+    memory_region_init_ram_nomigrate(&s->vram, obj, "vga.vram", s->vram_size,
                            &error_fatal);
     vmstate_register_ram(&s->vram, global_vmstate ? NULL : DEVICE(obj));
     xen_register_framebuffer(&s->vram);
diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c
index 0506d2c1b0..6aae147324 100644
--- a/hw/display/virtio-gpu.c
+++ b/hw/display/virtio-gpu.c
@@ -1092,7 +1092,9 @@ static int virtio_gpu_load(QEMUFile *f, void *opaque, size_t size,
 
         dpy_gfx_replace_surface(scanout->con, scanout->ds);
         dpy_gfx_update(scanout->con, 0, 0, scanout->width, scanout->height);
-        update_cursor(g, &scanout->cursor);
+        if (scanout->cursor.resource_id) {
+            update_cursor(g, &scanout->cursor);
+        }
         res->scanout_bitmask |= (1 << i);
     }
 
diff --git a/hw/display/vmware_vga.c b/hw/display/vmware_vga.c
index c989cef1cd..4a64b41259 100644
--- a/hw/display/vmware_vga.c
+++ b/hw/display/vmware_vga.c
@@ -1241,7 +1241,6 @@ static void vmsvga_init(DeviceState *dev, struct vmsvga_state_s *s,
     s->fifo_size = SVGA_FIFO_SIZE;
     memory_region_init_ram(&s->fifo_ram, NULL, "vmsvga.fifo", s->fifo_size,
                            &error_fatal);
-    vmstate_register_ram_global(&s->fifo_ram);
     s->fifo_ptr = memory_region_get_ram_ptr(&s->fifo_ram);
 
     vga_common_init(&s->vga, OBJECT(dev), true);
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index bf541cafd6..22e16031b0 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -1443,7 +1443,6 @@ void pc_memory_init(PCMachineState *pcms,
     option_rom_mr = g_malloc(sizeof(*option_rom_mr));
     memory_region_init_ram(option_rom_mr, NULL, "pc.rom", PC_ROM_SIZE,
                            &error_fatal);
-    vmstate_register_ram_global(option_rom_mr);
     memory_region_add_subregion_overlap(rom_memory,
                                         PC_ROM_MIN_VGA,
                                         option_rom_mr,
diff --git a/hw/i386/pc_sysfw.c b/hw/i386/pc_sysfw.c
index f915ad0a36..6b183747fc 100644
--- a/hw/i386/pc_sysfw.c
+++ b/hw/i386/pc_sysfw.c
@@ -59,7 +59,6 @@ static void pc_isa_bios_init(MemoryRegion *rom_memory,
     isa_bios = g_malloc(sizeof(*isa_bios));
     memory_region_init_ram(isa_bios, NULL, "isa-bios", isa_bios_size,
                            &error_fatal);
-    vmstate_register_ram_global(isa_bios);
     memory_region_add_subregion_overlap(rom_memory,
                                         0x100000 - isa_bios_size,
                                         isa_bios,
@@ -196,7 +195,6 @@ static void old_pc_system_rom_init(MemoryRegion *rom_memory, bool isapc_ram_fw)
     }
     bios = g_malloc(sizeof(*bios));
     memory_region_init_ram(bios, NULL, "pc.bios", bios_size, &error_fatal);
-    vmstate_register_ram_global(bios);
     if (!isapc_ram_fw) {
         memory_region_set_readonly(bios, true);
     }
diff --git a/hw/i386/pci-assign-load-rom.c b/hw/i386/pci-assign-load-rom.c
index fd59076e7a..43429b66be 100644
--- a/hw/i386/pci-assign-load-rom.c
+++ b/hw/i386/pci-assign-load-rom.c
@@ -59,7 +59,7 @@ void *pci_assign_dev_load_option_rom(PCIDevice *dev, struct Object *owner,
     fseek(fp, 0, SEEK_SET);
 
     snprintf(name, sizeof(name), "%s.rom", object_get_typename(owner));
-    memory_region_init_ram(&dev->rom, owner, name, st.st_size, &error_abort);
+    memory_region_init_ram_nomigrate(&dev->rom, owner, name, st.st_size, &error_abort);
     vmstate_register_ram(&dev->rom, &dev->qdev);
     ptr = memory_region_get_ram_ptr(&dev->rom);
     memset(ptr, 0xff, st.st_size);
diff --git a/hw/i386/xen/xen-hvm.c b/hw/i386/xen/xen-hvm.c
index cffa7e2017..3d951a3794 100644
--- a/hw/i386/xen/xen-hvm.c
+++ b/hw/i386/xen/xen-hvm.c
@@ -215,7 +215,6 @@ static void xen_ram_init(PCMachineState *pcms,
     memory_region_init_ram(&ram_memory, NULL, "xen.ram", block_len,
                            &error_fatal);
     *ram_memory_p = &ram_memory;
-    vmstate_register_ram_global(&ram_memory);
 
     memory_region_init_alias(&ram_640k, NULL, "xen.ram.640k",
                              &ram_memory, 0, 0xa0000);
diff --git a/hw/input/milkymist-softusb.c b/hw/input/milkymist-softusb.c
index 40dfca157f..ef8f47cd83 100644
--- a/hw/input/milkymist-softusb.c
+++ b/hw/input/milkymist-softusb.c
@@ -256,12 +256,12 @@ static int milkymist_softusb_init(SysBusDevice *dev)
     sysbus_init_mmio(dev, &s->regs_region);
 
     /* register pmem and dmem */
-    memory_region_init_ram(&s->pmem, OBJECT(s), "milkymist-softusb.pmem",
+    memory_region_init_ram_nomigrate(&s->pmem, OBJECT(s), "milkymist-softusb.pmem",
                            s->pmem_size, &error_fatal);
     vmstate_register_ram_global(&s->pmem);
     s->pmem_ptr = memory_region_get_ram_ptr(&s->pmem);
     sysbus_init_mmio(dev, &s->pmem);
-    memory_region_init_ram(&s->dmem, OBJECT(s), "milkymist-softusb.dmem",
+    memory_region_init_ram_nomigrate(&s->dmem, OBJECT(s), "milkymist-softusb.dmem",
                            s->dmem_size, &error_fatal);
     vmstate_register_ram_global(&s->dmem);
     s->dmem_ptr = memory_region_get_ram_ptr(&s->dmem);
diff --git a/hw/m68k/an5206.c b/hw/m68k/an5206.c
index 142bab98c9..c76244176f 100644
--- a/hw/m68k/an5206.c
+++ b/hw/m68k/an5206.c
@@ -61,7 +61,6 @@ static void an5206_init(MachineState *machine)
 
     /* Internal SRAM.  */
     memory_region_init_ram(sram, NULL, "an5206.sram", 512, &error_fatal);
-    vmstate_register_ram_global(sram);
     memory_region_add_subregion(address_space_mem, AN5206_RAMBAR_ADDR, sram);
 
     mcf5206_init(address_space_mem, AN5206_MBAR_ADDR, cpu);
diff --git a/hw/m68k/mcf5208.c b/hw/m68k/mcf5208.c
index 656351834e..f4b1387c0d 100644
--- a/hw/m68k/mcf5208.c
+++ b/hw/m68k/mcf5208.c
@@ -249,7 +249,6 @@ static void mcf5208evb_init(MachineState *machine)
 
     /* Internal SRAM.  */
     memory_region_init_ram(sram, NULL, "mcf5208.sram", 16384, &error_fatal);
-    vmstate_register_ram_global(sram);
     memory_region_add_subregion(address_space_mem, 0x80000000, sram);
 
     /* Internal peripherals.  */
diff --git a/hw/microblaze/petalogix_ml605_mmu.c b/hw/microblaze/petalogix_ml605_mmu.c
index 4968bdbb28..b664dc0f9c 100644
--- a/hw/microblaze/petalogix_ml605_mmu.c
+++ b/hw/microblaze/petalogix_ml605_mmu.c
@@ -98,12 +98,10 @@ petalogix_ml605_init(MachineState *machine)
     /* Attach emulated BRAM through the LMB.  */
     memory_region_init_ram(phys_lmb_bram, NULL, "petalogix_ml605.lmb_bram",
                            LMB_BRAM_SIZE, &error_fatal);
-    vmstate_register_ram_global(phys_lmb_bram);
     memory_region_add_subregion(address_space_mem, 0x00000000, phys_lmb_bram);
 
     memory_region_init_ram(phys_ram, NULL, "petalogix_ml605.ram", ram_size,
                            &error_fatal);
-    vmstate_register_ram_global(phys_ram);
     memory_region_add_subregion(address_space_mem, MEMORY_BASEADDR, phys_ram);
 
     dinfo = drive_get(IF_PFLASH, 0, 0);
diff --git a/hw/microblaze/petalogix_s3adsp1800_mmu.c b/hw/microblaze/petalogix_s3adsp1800_mmu.c
index 423bcd7f6c..5cb4deb69e 100644
--- a/hw/microblaze/petalogix_s3adsp1800_mmu.c
+++ b/hw/microblaze/petalogix_s3adsp1800_mmu.c
@@ -78,12 +78,10 @@ petalogix_s3adsp1800_init(MachineState *machine)
     memory_region_init_ram(phys_lmb_bram, NULL,
                            "petalogix_s3adsp1800.lmb_bram", LMB_BRAM_SIZE,
                            &error_fatal);
-    vmstate_register_ram_global(phys_lmb_bram);
     memory_region_add_subregion(sysmem, 0x00000000, phys_lmb_bram);
 
     memory_region_init_ram(phys_ram, NULL, "petalogix_s3adsp1800.ram",
                            ram_size, &error_fatal);
-    vmstate_register_ram_global(phys_ram);
     memory_region_add_subregion(sysmem, ddr_base, phys_ram);
 
     dinfo = drive_get(IF_PFLASH, 0, 0);
diff --git a/hw/mips/boston.c b/hw/mips/boston.c
index 146be2ae74..7985c60dde 100644
--- a/hw/mips/boston.c
+++ b/hw/mips/boston.c
@@ -484,7 +484,7 @@ static void boston_mach_init(MachineState *machine)
     sysbus_mmio_map_overlap(SYS_BUS_DEVICE(s->cps), 0, 0, 1);
 
     flash =  g_new(MemoryRegion, 1);
-    memory_region_init_rom_device(flash, NULL, &boston_flash_ops, s,
+    memory_region_init_rom_device_nomigrate(flash, NULL, &boston_flash_ops, s,
                                   "boston.flash", 128 * M_BYTE, &err);
     memory_region_add_subregion_overlap(sys_mem, 0x18000000, flash, 0);
 
diff --git a/hw/mips/mips_fulong2e.c b/hw/mips/mips_fulong2e.c
index dbe2805acb..3f3cb32651 100644
--- a/hw/mips/mips_fulong2e.c
+++ b/hw/mips/mips_fulong2e.c
@@ -296,7 +296,6 @@ static void mips_fulong2e_init(MachineState *machine)
     memory_region_allocate_system_memory(ram, NULL, "fulong2e.ram", ram_size);
     memory_region_init_ram(bios, NULL, "fulong2e.bios", bios_size,
                            &error_fatal);
-    vmstate_register_ram_global(bios);
     memory_region_set_readonly(bios, true);
 
     memory_region_add_subregion(address_space_mem, 0, ram);
diff --git a/hw/mips/mips_jazz.c b/hw/mips/mips_jazz.c
index 1f69322c15..df2262a2a8 100644
--- a/hw/mips/mips_jazz.c
+++ b/hw/mips/mips_jazz.c
@@ -177,7 +177,6 @@ static void mips_jazz_init(MachineState *machine,
 
     memory_region_init_ram(bios, NULL, "mips_jazz.bios", MAGNUM_BIOS_SIZE,
                            &error_fatal);
-    vmstate_register_ram_global(bios);
     memory_region_set_readonly(bios, true);
     memory_region_init_alias(bios2, NULL, "mips_jazz.bios", bios,
                              0, MAGNUM_BIOS_SIZE);
@@ -244,7 +243,6 @@ static void mips_jazz_init(MachineState *machine,
             MemoryRegion *rom_mr = g_new(MemoryRegion, 1);
             memory_region_init_ram(rom_mr, NULL, "g364fb.rom", 0x80000,
                                    &error_fatal);
-            vmstate_register_ram_global(rom_mr);
             memory_region_set_readonly(rom_mr, true);
             uint8_t *rom = memory_region_get_ram_ptr(rom_mr);
             memory_region_add_subregion(address_space, 0x60000000, rom_mr);
diff --git a/hw/mips/mips_malta.c b/hw/mips/mips_malta.c
index 8cb9d3c3ce..3487d16f61 100644
--- a/hw/mips/mips_malta.c
+++ b/hw/mips/mips_malta.c
@@ -1178,7 +1178,7 @@ void mips_malta_init(MachineState *machine)
      * handled by an overlapping region as the resulting ROM code subpage
      * regions are not executable.
      */
-    memory_region_init_ram(bios_copy, NULL, "bios.1fc", BIOS_SIZE,
+    memory_region_init_ram_nomigrate(bios_copy, NULL, "bios.1fc", BIOS_SIZE,
                            &error_fatal);
     if (!rom_copy(memory_region_get_ram_ptr(bios_copy),
                   FLASH_ADDRESS, BIOS_SIZE)) {
diff --git a/hw/mips/mips_mipssim.c b/hw/mips/mips_mipssim.c
index 1b91195006..6990b1b0dd 100644
--- a/hw/mips/mips_mipssim.c
+++ b/hw/mips/mips_mipssim.c
@@ -179,7 +179,6 @@ mips_mipssim_init(MachineState *machine)
                                          ram_size);
     memory_region_init_ram(bios, NULL, "mips_mipssim.bios", BIOS_SIZE,
                            &error_fatal);
-    vmstate_register_ram_global(bios);
     memory_region_set_readonly(bios, true);
 
     memory_region_add_subregion(address_space_mem, 0, ram);
diff --git a/hw/mips/mips_r4k.c b/hw/mips/mips_r4k.c
index f4de9fc343..690874be2b 100644
--- a/hw/mips/mips_r4k.c
+++ b/hw/mips/mips_r4k.c
@@ -238,7 +238,6 @@ void mips_r4k_init(MachineState *machine)
         bios = g_new(MemoryRegion, 1);
         memory_region_init_ram(bios, NULL, "mips_r4k.bios", BIOS_SIZE,
                                &error_fatal);
-        vmstate_register_ram_global(bios);
         memory_region_set_readonly(bios, true);
         memory_region_add_subregion(get_system_memory(), 0x1fc00000, bios);
 
diff --git a/hw/moxie/moxiesim.c b/hw/moxie/moxiesim.c
index 3069834cf4..df3f1249ae 100644
--- a/hw/moxie/moxiesim.c
+++ b/hw/moxie/moxiesim.c
@@ -129,11 +129,9 @@ static void moxiesim_init(MachineState *machine)
 
     /* Allocate RAM. */
     memory_region_init_ram(ram, NULL, "moxiesim.ram", ram_size, &error_fatal);
-    vmstate_register_ram_global(ram);
     memory_region_add_subregion(address_space_mem, ram_base, ram);
 
-    memory_region_init_ram(rom, NULL, "moxie.rom", 128*0x1000, &error_fatal);
-    vmstate_register_ram_global(rom);
+    memory_region_init_ram(rom, NULL, "moxie.rom", 128 * 0x1000, &error_fatal);
     memory_region_add_subregion(get_system_memory(), 0x1000, rom);
 
     if (kernel_filename) {
diff --git a/hw/net/dp8393x.c b/hw/net/dp8393x.c
index b53fcaa8bc..f2d2ce344c 100644
--- a/hw/net/dp8393x.c
+++ b/hw/net/dp8393x.c
@@ -887,7 +887,7 @@ static void dp8393x_realize(DeviceState *dev, Error **errp)
     s->watchdog = timer_new_ns(QEMU_CLOCK_VIRTUAL, dp8393x_watchdog, s);
     s->regs[SONIC_SR] = 0x0004; /* only revision recognized by Linux */
 
-    memory_region_init_ram(&s->prom, OBJECT(dev),
+    memory_region_init_ram_nomigrate(&s->prom, OBJECT(dev),
                            "dp8393x-prom", SONIC_PROM_SIZE, &local_err);
     if (local_err) {
         error_propagate(errp, local_err);
diff --git a/hw/net/milkymist-minimac2.c b/hw/net/milkymist-minimac2.c
index c3a12e1197..3eaa19dfde 100644
--- a/hw/net/milkymist-minimac2.c
+++ b/hw/net/milkymist-minimac2.c
@@ -466,7 +466,7 @@ static int milkymist_minimac2_init(SysBusDevice *sbd)
     sysbus_init_mmio(sbd, &s->regs_region);
 
     /* register buffers memory */
-    memory_region_init_ram(&s->buffers, OBJECT(dev), "milkymist-minimac2.buffers",
+    memory_region_init_ram_nomigrate(&s->buffers, OBJECT(dev), "milkymist-minimac2.buffers",
                            buffers_size, &error_fatal);
     vmstate_register_ram_global(&s->buffers);
     s->rx0_buf = memory_region_get_ram_ptr(&s->buffers);
diff --git a/hw/nios2/10m50_devboard.c b/hw/nios2/10m50_devboard.c
index 051be73e9a..b6868b8233 100644
--- a/hw/nios2/10m50_devboard.c
+++ b/hw/nios2/10m50_devboard.c
@@ -57,19 +57,19 @@ static void nios2_10m50_ghrd_init(MachineState *machine)
     int i;
 
     /* Physical TCM (tb_ram_1k) with alias at 0xc0000000 */
-    memory_region_init_ram(phys_tcm, NULL, "nios2.tcm", tcm_size, &error_abort);
+    memory_region_init_ram(phys_tcm, NULL, "nios2.tcm", tcm_size,
+                           &error_abort);
     memory_region_init_alias(phys_tcm_alias, NULL, "nios2.tcm.alias",
                              phys_tcm, 0, tcm_size);
-    vmstate_register_ram_global(phys_tcm);
     memory_region_add_subregion(address_space_mem, tcm_base, phys_tcm);
     memory_region_add_subregion(address_space_mem, 0xc0000000 + tcm_base,
                                 phys_tcm_alias);
 
     /* Physical DRAM with alias at 0xc0000000 */
-    memory_region_init_ram(phys_ram, NULL, "nios2.ram", ram_size, &error_abort);
+    memory_region_init_ram(phys_ram, NULL, "nios2.ram", ram_size,
+                           &error_abort);
     memory_region_init_alias(phys_ram_alias, NULL, "nios2.ram.alias",
                              phys_ram, 0, ram_size);
-    vmstate_register_ram_global(phys_ram);
     memory_region_add_subregion(address_space_mem, ram_base, phys_ram);
     memory_region_add_subregion(address_space_mem, 0xc0000000 + ram_base,
                                 phys_ram_alias);
diff --git a/hw/openrisc/openrisc_sim.c b/hw/openrisc/openrisc_sim.c
index fc0d0967b7..e1eeffc490 100644
--- a/hw/openrisc/openrisc_sim.c
+++ b/hw/openrisc/openrisc_sim.c
@@ -120,7 +120,6 @@ static void openrisc_sim_init(MachineState *machine)
 
     ram = g_malloc(sizeof(*ram));
     memory_region_init_ram(ram, NULL, "openrisc.ram", ram_size, &error_fatal);
-    vmstate_register_ram_global(ram);
     memory_region_add_subregion(get_system_memory(), 0, ram);
 
     cpu_openrisc_pic_init(cpu);
diff --git a/hw/pci-host/prep.c b/hw/pci-host/prep.c
index 900a6edfcf..8b293ba0f1 100644
--- a/hw/pci-host/prep.c
+++ b/hw/pci-host/prep.c
@@ -304,7 +304,7 @@ static void raven_realize(PCIDevice *d, Error **errp)
     d->config[0x0D] = 0x10; // latency_timer
     d->config[0x34] = 0x00; // capabilities_pointer
 
-    memory_region_init_ram(&s->bios, OBJECT(s), "bios", BIOS_SIZE,
+    memory_region_init_ram_nomigrate(&s->bios, OBJECT(s), "bios", BIOS_SIZE,
                            &error_fatal);
     memory_region_set_readonly(&s->bios, true);
     memory_region_add_subregion(get_system_memory(), (uint32_t)(-BIOS_SIZE),
diff --git a/hw/pci-host/xilinx-pcie.c b/hw/pci-host/xilinx-pcie.c
index 2c78dcfc26..4613dda1d2 100644
--- a/hw/pci-host/xilinx-pcie.c
+++ b/hw/pci-host/xilinx-pcie.c
@@ -120,7 +120,7 @@ static void xilinx_pcie_host_realize(DeviceState *dev, Error **errp)
     memory_region_set_enabled(&s->mmio, false);
 
     /* dummy I/O region */
-    memory_region_init_ram(&s->io, OBJECT(s), "io", 16, NULL);
+    memory_region_init_ram_nomigrate(&s->io, OBJECT(s), "io", 16, NULL);
     memory_region_set_enabled(&s->io, false);
 
     /* interrupt out */
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index 0c6f74a347..258fbe51e2 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -2236,7 +2236,6 @@ static void pci_add_option_rom(PCIDevice *pdev, bool is_default_rom,
     }
     pdev->has_rom = true;
     memory_region_init_rom(&pdev->rom, OBJECT(pdev), name, size, &error_fatal);
-    vmstate_register_ram(&pdev->rom, &pdev->qdev);
     ptr = memory_region_get_ram_ptr(&pdev->rom);
     load_image(path, ptr);
     g_free(path);
diff --git a/hw/ppc/mac_newworld.c b/hw/ppc/mac_newworld.c
index bae1c0ac99..3056d5f075 100644
--- a/hw/ppc/mac_newworld.c
+++ b/hw/ppc/mac_newworld.c
@@ -206,7 +206,6 @@ static void ppc_core99_init(MachineState *machine)
     /* allocate and load BIOS */
     memory_region_init_ram(bios, NULL, "ppc_core99.bios", BIOS_SIZE,
                            &error_fatal);
-    vmstate_register_ram_global(bios);
 
     if (bios_name == NULL)
         bios_name = PROM_FILENAME;
diff --git a/hw/ppc/mac_oldworld.c b/hw/ppc/mac_oldworld.c
index 97bb8541d7..f2ae60a360 100644
--- a/hw/ppc/mac_oldworld.c
+++ b/hw/ppc/mac_oldworld.c
@@ -143,7 +143,6 @@ static void ppc_heathrow_init(MachineState *machine)
     /* allocate and load BIOS */
     memory_region_init_ram(bios, NULL, "ppc_heathrow.bios", BIOS_SIZE,
                            &error_fatal);
-    vmstate_register_ram_global(bios);
 
     if (bios_name == NULL)
         bios_name = PROM_FILENAME;
diff --git a/hw/ppc/ppc405_boards.c b/hw/ppc/ppc405_boards.c
index d01798f245..e92db2c66a 100644
--- a/hw/ppc/ppc405_boards.c
+++ b/hw/ppc/ppc405_boards.c
@@ -220,7 +220,6 @@ static void ref405ep_init(MachineState *machine)
     sram_size = 512 * 1024;
     memory_region_init_ram(sram, NULL, "ef405ep.sram", sram_size,
                            &error_fatal);
-    vmstate_register_ram_global(sram);
     memory_region_add_subregion(sysmem, 0xFFF00000, sram);
     /* allocate and load BIOS */
 #ifdef DEBUG_BOARD_INIT
@@ -255,7 +254,6 @@ static void ref405ep_init(MachineState *machine)
         bios = g_new(MemoryRegion, 1);
         memory_region_init_ram(bios, NULL, "ef405ep.bios", BIOS_SIZE,
                                &error_fatal);
-        vmstate_register_ram_global(bios);
 
         if (bios_name == NULL)
             bios_name = BIOS_FILENAME;
@@ -556,7 +554,6 @@ static void taihu_405ep_init(MachineState *machine)
         bios = g_new(MemoryRegion, 1);
         memory_region_init_ram(bios, NULL, "taihu_405ep.bios", BIOS_SIZE,
                                &error_fatal);
-        vmstate_register_ram_global(bios);
         filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name);
         if (filename) {
             bios_size = load_image(filename, memory_region_get_ram_ptr(bios));
diff --git a/hw/ppc/ppc405_uc.c b/hw/ppc/ppc405_uc.c
index fc32e96bf4..f6fe3e6f5e 100644
--- a/hw/ppc/ppc405_uc.c
+++ b/hw/ppc/ppc405_uc.c
@@ -980,7 +980,6 @@ static void ppc405_ocm_init(CPUPPCState *env)
     /* XXX: Size is 4096 or 0x04000000 */
     memory_region_init_ram(&ocm->isarc_ram, NULL, "ppc405.ocm", 4096,
                            &error_fatal);
-    vmstate_register_ram_global(&ocm->isarc_ram);
     memory_region_init_alias(&ocm->dsarc_ram, NULL, "ppc405.dsarc", &ocm->isarc_ram,
                              0, 4096);
     qemu_register_reset(&ocm_reset, ocm);
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 16638ce80c..970093e6b5 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -98,8 +98,6 @@
 
 #define PHANDLE_XICP            0x00001111
 
-#define HTAB_SIZE(spapr)        (1ULL << ((spapr)->htab_shift))
-
 static ICSState *spapr_ics_create(sPAPRMachineState *spapr,
                                   const char *type_ics,
                                   int nr_irqs, Error **errp)
@@ -874,6 +872,11 @@ static void spapr_dt_rtas(sPAPRMachineState *spapr, void *fdt)
     if (!kvm_enabled() || kvmppc_spapr_use_multitce()) {
         add_str(hypertas, "hcall-multi-tce");
     }
+
+    if (spapr->resize_hpt != SPAPR_RESIZE_HPT_DISABLED) {
+        add_str(hypertas, "hcall-hpt-resize");
+    }
+
     _FDT(fdt_setprop(fdt, rtas, "ibm,hypertas-functions",
                      hypertas->str, hypertas->len));
     g_string_free(hypertas, TRUE);
@@ -1264,7 +1267,7 @@ static void spapr_store_hpte(PPCVirtualHypervisor *vhyp, hwaddr ptex,
     }
 }
 
-static int spapr_hpt_shift_for_ramsize(uint64_t ramsize)
+int spapr_hpt_shift_for_ramsize(uint64_t ramsize)
 {
     int shift;
 
@@ -1285,8 +1288,8 @@ void spapr_free_hpt(sPAPRMachineState *spapr)
     close_htab_fd(spapr);
 }
 
-static void spapr_reallocate_hpt(sPAPRMachineState *spapr, int shift,
-                                 Error **errp)
+void spapr_reallocate_hpt(sPAPRMachineState *spapr, int shift,
+                          Error **errp)
 {
     long rc;
 
@@ -1334,9 +1337,17 @@ static void spapr_reallocate_hpt(sPAPRMachineState *spapr, int shift,
 
 void spapr_setup_hpt_and_vrma(sPAPRMachineState *spapr)
 {
-    spapr_reallocate_hpt(spapr,
-                     spapr_hpt_shift_for_ramsize(MACHINE(spapr)->maxram_size),
-                     &error_fatal);
+    int hpt_shift;
+
+    if ((spapr->resize_hpt == SPAPR_RESIZE_HPT_DISABLED)
+        || (spapr->cas_reboot
+            && !spapr_ovec_test(spapr->ov5_cas, OV5_HPT_RESIZE))) {
+        hpt_shift = spapr_hpt_shift_for_ramsize(MACHINE(spapr)->maxram_size);
+    } else {
+        hpt_shift = spapr_hpt_shift_for_ramsize(MACHINE(spapr)->ram_size);
+    }
+    spapr_reallocate_hpt(spapr, hpt_shift, &error_fatal);
+
     if (spapr->vrma_adjust) {
         spapr->rma_size = kvmppc_rma_size(spapr_node0_size(),
                                           spapr->htab_shift);
@@ -1517,6 +1528,37 @@ static bool version_before_3(void *opaque, int version_id)
     return version_id < 3;
 }
 
+static bool spapr_pending_events_needed(void *opaque)
+{
+    sPAPRMachineState *spapr = (sPAPRMachineState *)opaque;
+    return !QTAILQ_EMPTY(&spapr->pending_events);
+}
+
+static const VMStateDescription vmstate_spapr_event_entry = {
+    .name = "spapr_event_log_entry",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT32(summary, sPAPREventLogEntry),
+        VMSTATE_UINT32(extended_length, sPAPREventLogEntry),
+        VMSTATE_VBUFFER_ALLOC_UINT32(extended_log, sPAPREventLogEntry, 0,
+                                     NULL, extended_length),
+        VMSTATE_END_OF_LIST()
+    },
+};
+
+static const VMStateDescription vmstate_spapr_pending_events = {
+    .name = "spapr_pending_events",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = spapr_pending_events_needed,
+    .fields = (VMStateField[]) {
+        VMSTATE_QTAILQ_V(pending_events, sPAPRMachineState, 1,
+                         vmstate_spapr_event_entry, sPAPREventLogEntry, next),
+        VMSTATE_END_OF_LIST()
+    },
+};
+
 static bool spapr_ov5_cas_needed(void *opaque)
 {
     sPAPRMachineState *spapr = opaque;
@@ -1615,6 +1657,7 @@ static const VMStateDescription vmstate_spapr = {
     .subsections = (const VMStateDescription*[]) {
         &vmstate_spapr_ov5_cas,
         &vmstate_spapr_patb_entry,
+        &vmstate_spapr_pending_events,
         NULL
     }
 };
@@ -2116,12 +2159,41 @@ static void ppc_spapr_init(MachineState *machine)
     hwaddr node0_size = spapr_node0_size();
     long load_limit, fw_size;
     char *filename;
+    Error *resize_hpt_err = NULL;
 
     msi_nonbroken = true;
 
     QLIST_INIT(&spapr->phbs);
     QTAILQ_INIT(&spapr->pending_dimm_unplugs);
 
+    /* Check HPT resizing availability */
+    kvmppc_check_papr_resize_hpt(&resize_hpt_err);
+    if (spapr->resize_hpt == SPAPR_RESIZE_HPT_DEFAULT) {
+        /*
+         * If the user explicitly requested a mode we should either
+         * supply it, or fail completely (which we do below).  But if
+         * it's not set explicitly, we reset our mode to something
+         * that works
+         */
+        if (resize_hpt_err) {
+            spapr->resize_hpt = SPAPR_RESIZE_HPT_DISABLED;
+            error_free(resize_hpt_err);
+            resize_hpt_err = NULL;
+        } else {
+            spapr->resize_hpt = smc->resize_hpt_default;
+        }
+    }
+
+    assert(spapr->resize_hpt != SPAPR_RESIZE_HPT_DEFAULT);
+
+    if ((spapr->resize_hpt != SPAPR_RESIZE_HPT_DISABLED) && resize_hpt_err) {
+        /*
+         * User requested HPT resize, but this host can't supply it.  Bail out
+         */
+        error_report_err(resize_hpt_err);
+        exit(1);
+    }
+
     /* Allocate RMA if necessary */
     rma_alloc_size = kvmppc_alloc_rma(&rma);
 
@@ -2190,6 +2262,11 @@ static void ppc_spapr_init(MachineState *machine)
         spapr_ovec_set(spapr->ov5, OV5_HP_EVT);
     }
 
+    /* advertise support for HPT resizing */
+    if (spapr->resize_hpt != SPAPR_RESIZE_HPT_DISABLED) {
+        spapr_ovec_set(spapr->ov5, OV5_HPT_RESIZE);
+    }
+
     /* init CPUs */
     if (machine->cpu_model == NULL) {
         machine->cpu_model = kvm_enabled() ? "host" : smc->tcg_default_cpu;
@@ -2547,6 +2624,40 @@ static void spapr_set_modern_hotplug_events(Object *obj, bool value,
     spapr->use_hotplug_event_source = value;
 }
 
+static char *spapr_get_resize_hpt(Object *obj, Error **errp)
+{
+    sPAPRMachineState *spapr = SPAPR_MACHINE(obj);
+
+    switch (spapr->resize_hpt) {
+    case SPAPR_RESIZE_HPT_DEFAULT:
+        return g_strdup("default");
+    case SPAPR_RESIZE_HPT_DISABLED:
+        return g_strdup("disabled");
+    case SPAPR_RESIZE_HPT_ENABLED:
+        return g_strdup("enabled");
+    case SPAPR_RESIZE_HPT_REQUIRED:
+        return g_strdup("required");
+    }
+    g_assert_not_reached();
+}
+
+static void spapr_set_resize_hpt(Object *obj, const char *value, Error **errp)
+{
+    sPAPRMachineState *spapr = SPAPR_MACHINE(obj);
+
+    if (strcmp(value, "default") == 0) {
+        spapr->resize_hpt = SPAPR_RESIZE_HPT_DEFAULT;
+    } else if (strcmp(value, "disabled") == 0) {
+        spapr->resize_hpt = SPAPR_RESIZE_HPT_DISABLED;
+    } else if (strcmp(value, "enabled") == 0) {
+        spapr->resize_hpt = SPAPR_RESIZE_HPT_ENABLED;
+    } else if (strcmp(value, "required") == 0) {
+        spapr->resize_hpt = SPAPR_RESIZE_HPT_REQUIRED;
+    } else {
+        error_setg(errp, "Bad value for \"resize-hpt\" property");
+    }
+}
+
 static void spapr_machine_initfn(Object *obj)
 {
     sPAPRMachineState *spapr = SPAPR_MACHINE(obj);
@@ -2571,6 +2682,12 @@ static void spapr_machine_initfn(Object *obj)
     ppc_compat_add_property(obj, "max-cpu-compat", &spapr->max_compat_pvr,
                             "Maximum permitted CPU compatibility mode",
                             &error_fatal);
+
+    object_property_add_str(obj, "resize-hpt",
+                            spapr_get_resize_hpt, spapr_set_resize_hpt, NULL);
+    object_property_set_description(obj, "resize-hpt",
+                                    "Resizing of the Hash Page Table (enabled, disabled, required)",
+                                    NULL);
 }
 
 static void spapr_machine_finalizefn(Object *obj)
@@ -2604,6 +2721,7 @@ static void spapr_add_lmbs(DeviceState *dev, uint64_t addr_start, uint64_t size,
     int i, fdt_offset, fdt_size;
     void *fdt;
     uint64_t addr = addr_start;
+    bool hotplugged = spapr_drc_hotplugged(dev);
     Error *local_err = NULL;
 
     for (i = 0; i < nr_lmbs; i++) {
@@ -2621,18 +2739,21 @@ static void spapr_add_lmbs(DeviceState *dev, uint64_t addr_start, uint64_t size,
                 addr -= SPAPR_MEMORY_BLOCK_SIZE;
                 drc = spapr_drc_by_id(TYPE_SPAPR_DRC_LMB,
                                       addr / SPAPR_MEMORY_BLOCK_SIZE);
-                spapr_drc_detach(drc, dev, NULL);
+                spapr_drc_detach(drc);
             }
             g_free(fdt);
             error_propagate(errp, local_err);
             return;
         }
+        if (!hotplugged) {
+            spapr_drc_reset(drc);
+        }
         addr += SPAPR_MEMORY_BLOCK_SIZE;
     }
     /* send hotplug notification to the
      * guest only in case of hotplugged memory
      */
-    if (dev->hotplugged) {
+    if (hotplugged) {
         if (dedicated_hp_event_source) {
             drc = spapr_drc_by_id(TYPE_SPAPR_DRC_LMB,
                                   addr_start / SPAPR_MEMORY_BLOCK_SIZE);
@@ -2780,8 +2901,10 @@ static sPAPRDIMMState *spapr_recover_pending_dimm_state(sPAPRMachineState *ms,
 /* Callback to be called during DRC release. */
 void spapr_lmb_release(DeviceState *dev)
 {
-    HotplugHandler *hotplug_ctrl = qdev_get_hotplug_handler(dev);
-    sPAPRMachineState *spapr = SPAPR_MACHINE(hotplug_ctrl);
+    sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_hotplug_handler(dev));
+    PCDIMMDevice *dimm = PC_DIMM(dev);
+    PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm);
+    MemoryRegion *mr = ddc->get_memory_region(dimm);
     sPAPRDIMMState *ds = spapr_pending_dimm_unplugs_find(spapr, PC_DIMM(dev));
 
     /* This information will get lost if a migration occurs
@@ -2802,18 +2925,7 @@ void spapr_lmb_release(DeviceState *dev)
      * Now that all the LMBs have been removed by the guest, call the
      * pc-dimm unplug handler to cleanup up the pc-dimm device.
      */
-    hotplug_handler_unplug(hotplug_ctrl, dev, &error_abort);
-}
-
-static void spapr_memory_unplug(HotplugHandler *hotplug_dev, DeviceState *dev,
-                                Error **errp)
-{
-    sPAPRMachineState *ms = SPAPR_MACHINE(hotplug_dev);
-    PCDIMMDevice *dimm = PC_DIMM(dev);
-    PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm);
-    MemoryRegion *mr = ddc->get_memory_region(dimm);
-
-    pc_dimm_memory_unplug(dev, &ms->hotplug_memory, mr);
+    pc_dimm_memory_unplug(dev, &spapr->hotplug_memory, mr);
     object_unparent(OBJECT(dev));
 }
 
@@ -2849,7 +2961,7 @@ static void spapr_memory_unplug_request(HotplugHandler *hotplug_dev,
                               addr / SPAPR_MEMORY_BLOCK_SIZE);
         g_assert(drc);
 
-        spapr_drc_detach(drc, dev, errp);
+        spapr_drc_detach(drc);
         addr += SPAPR_MEMORY_BLOCK_SIZE;
     }
 
@@ -2882,10 +2994,10 @@ static void *spapr_populate_hotplug_cpu_dt(CPUState *cs, int *fdt_offset,
     return fdt;
 }
 
-static void spapr_core_unplug(HotplugHandler *hotplug_dev, DeviceState *dev,
-                              Error **errp)
+/* Callback to be called during DRC release. */
+void spapr_core_release(DeviceState *dev)
 {
-    MachineState *ms = MACHINE(qdev_get_machine());
+    MachineState *ms = MACHINE(qdev_get_hotplug_handler(dev));
     sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(ms);
     CPUCore *cc = CPU_CORE(dev);
     CPUArchId *core_slot = spapr_find_cpu_slot(ms, cc->core_id, NULL);
@@ -2909,22 +3021,12 @@ static void spapr_core_unplug(HotplugHandler *hotplug_dev, DeviceState *dev,
     object_unparent(OBJECT(dev));
 }
 
-/* Callback to be called during DRC release. */
-void spapr_core_release(DeviceState *dev)
-{
-    HotplugHandler *hotplug_ctrl;
-
-    hotplug_ctrl = qdev_get_hotplug_handler(dev);
-    hotplug_handler_unplug(hotplug_ctrl, dev, &error_abort);
-}
-
 static
 void spapr_core_unplug_request(HotplugHandler *hotplug_dev, DeviceState *dev,
                                Error **errp)
 {
     int index;
     sPAPRDRConnector *drc;
-    Error *local_err = NULL;
     CPUCore *cc = CPU_CORE(dev);
     int smt = kvmppc_smt_threads();
 
@@ -2941,11 +3043,7 @@ void spapr_core_unplug_request(HotplugHandler *hotplug_dev, DeviceState *dev,
     drc = spapr_drc_by_id(TYPE_SPAPR_DRC_CPU, index * smt);
     g_assert(drc);
 
-    spapr_drc_detach(drc, dev, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        return;
-    }
+    spapr_drc_detach(drc);
 
     spapr_hotplug_req_remove_by_index(drc);
 }
@@ -2961,11 +3059,10 @@ static void spapr_core_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
     CPUState *cs = CPU(core->threads);
     sPAPRDRConnector *drc;
     Error *local_err = NULL;
-    void *fdt = NULL;
-    int fdt_offset = 0;
     int smt = kvmppc_smt_threads();
     CPUArchId *core_slot;
     int index;
+    bool hotplugged = spapr_drc_hotplugged(dev);
 
     core_slot = spapr_find_cpu_slot(MACHINE(hotplug_dev), cc->core_id, &index);
     if (!core_slot) {
@@ -2977,24 +3074,30 @@ static void spapr_core_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
 
     g_assert(drc || !mc->has_hotpluggable_cpus);
 
-    fdt = spapr_populate_hotplug_cpu_dt(cs, &fdt_offset, spapr);
-
     if (drc) {
+        void *fdt;
+        int fdt_offset;
+
+        fdt = spapr_populate_hotplug_cpu_dt(cs, &fdt_offset, spapr);
+
         spapr_drc_attach(drc, dev, fdt, fdt_offset, &local_err);
         if (local_err) {
             g_free(fdt);
             error_propagate(errp, local_err);
             return;
         }
-    }
 
-    if (dev->hotplugged) {
-        /*
-         * Send hotplug notification interrupt to the guest only in case
-         * of hotplugged CPUs.
-         */
-        spapr_hotplug_req_add_by_index(drc);
+        if (hotplugged) {
+            /*
+             * Send hotplug notification interrupt to the guest only
+             * in case of hotplugged CPUs.
+             */
+            spapr_hotplug_req_add_by_index(drc);
+        } else {
+            spapr_drc_reset(drc);
+        }
     }
+
     core_slot->cpu = OBJECT(dev);
 
     if (smc->pre_2_10_has_unused_icps) {
@@ -3047,9 +3150,9 @@ static void spapr_core_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
      * total vcpus not a multiple of threads-per-core.
      */
     if (mc->has_hotpluggable_cpus && (cc->nr_threads != smp_threads)) {
-        error_setg(errp, "invalid nr-threads %d, must be %d",
+        error_setg(&local_err, "invalid nr-threads %d, must be %d",
                    cc->nr_threads, smp_threads);
-        return;
+        goto out;
     }
 
     core_slot = spapr_find_cpu_slot(MACHINE(hotplug_dev), cc->core_id, &index);
@@ -3119,27 +3222,6 @@ static void spapr_machine_device_plug(HotplugHandler *hotplug_dev,
     }
 }
 
-static void spapr_machine_device_unplug(HotplugHandler *hotplug_dev,
-                                      DeviceState *dev, Error **errp)
-{
-    sPAPRMachineState *sms = SPAPR_MACHINE(qdev_get_machine());
-    MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine());
-
-    if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) {
-        if (spapr_ovec_test(sms->ov5_cas, OV5_HP_EVT)) {
-            spapr_memory_unplug(hotplug_dev, dev, errp);
-        } else {
-            error_setg(errp, "Memory hot unplug not supported for this guest");
-        }
-    } else if (object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_CPU_CORE)) {
-        if (!mc->has_hotpluggable_cpus) {
-            error_setg(errp, "CPU hot unplug not supported on this machine");
-            return;
-        }
-        spapr_core_unplug(hotplug_dev, dev, errp);
-    }
-}
-
 static void spapr_machine_device_unplug_request(HotplugHandler *hotplug_dev,
                                                 DeviceState *dev, Error **errp)
 {
@@ -3357,7 +3439,6 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
     mc->get_hotplug_handler = spapr_get_hotplug_handler;
     hc->pre_plug = spapr_machine_device_pre_plug;
     hc->plug = spapr_machine_device_plug;
-    hc->unplug = spapr_machine_device_unplug;
     mc->cpu_index_to_instance_props = spapr_cpu_index_to_props;
     mc->possible_cpu_arch_ids = spapr_possible_cpu_arch_ids;
     hc->unplug_request = spapr_machine_device_unplug_request;
@@ -3365,6 +3446,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
     smc->dr_lmb_enabled = true;
     smc->tcg_default_cpu = "POWER8";
     mc->has_hotpluggable_cpus = true;
+    smc->resize_hpt_default = SPAPR_RESIZE_HPT_ENABLED;
     fwc->get_dev_path = spapr_get_fw_dev_path;
     nc->nmi_monitor_handler = spapr_nmi;
     smc->phb_placement = spapr_phb_placement;
@@ -3471,6 +3553,7 @@ static void spapr_machine_2_9_class_options(MachineClass *mc)
     SET_MACHINE_COMPAT(mc, SPAPR_COMPAT_2_9);
     mc->numa_auto_assign_ram = numa_legacy_auto_assign_ram;
     smc->pre_2_10_has_unused_icps = true;
+    smc->resize_hpt_default = SPAPR_RESIZE_HPT_DISABLED;
 }
 
 DEFINE_SPAPR_MACHINE(2_9, "2.9", false);
diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c
index f34355dad1..0ffcec6fb2 100644
--- a/hw/ppc/spapr_drc.c
+++ b/hw/ppc/spapr_drc.c
@@ -48,40 +48,40 @@ uint32_t spapr_drc_index(sPAPRDRConnector *drc)
 
 static uint32_t drc_isolate_physical(sPAPRDRConnector *drc)
 {
-    /* if the guest is configuring a device attached to this DRC, we
-     * should reset the configuration state at this point since it may
-     * no longer be reliable (guest released device and needs to start
-     * over, or unplug occurred so the FDT is no longer valid)
-     */
-    g_free(drc->ccs);
-    drc->ccs = NULL;
+    switch (drc->state) {
+    case SPAPR_DRC_STATE_PHYSICAL_POWERON:
+        return RTAS_OUT_SUCCESS; /* Nothing to do */
+    case SPAPR_DRC_STATE_PHYSICAL_CONFIGURED:
+        break; /* see below */
+    case SPAPR_DRC_STATE_PHYSICAL_UNISOLATE:
+        return RTAS_OUT_PARAM_ERROR; /* not allowed */
+    default:
+        g_assert_not_reached();
+    }
 
-    drc->isolation_state = SPAPR_DR_ISOLATION_STATE_ISOLATED;
+    drc->state = SPAPR_DRC_STATE_PHYSICAL_POWERON;
 
-    /* if we're awaiting release, but still in an unconfigured state,
-     * it's likely the guest is still in the process of configuring
-     * the device and is transitioning the devices to an ISOLATED
-     * state as a part of that process. so we only complete the
-     * removal when this transition happens for a device in a
-     * configured state, as suggested by the state diagram from PAPR+
-     * 2.7, 13.4
-     */
-    if (drc->awaiting_release) {
+    if (drc->unplug_requested) {
         uint32_t drc_index = spapr_drc_index(drc);
-        if (drc->configured) {
-            trace_spapr_drc_set_isolation_state_finalizing(drc_index);
-            spapr_drc_detach(drc, DEVICE(drc->dev), NULL);
-        } else {
-            trace_spapr_drc_set_isolation_state_deferring(drc_index);
-        }
+        trace_spapr_drc_set_isolation_state_finalizing(drc_index);
+        spapr_drc_detach(drc);
     }
-    drc->configured = false;
 
     return RTAS_OUT_SUCCESS;
 }
 
 static uint32_t drc_unisolate_physical(sPAPRDRConnector *drc)
 {
+    switch (drc->state) {
+    case SPAPR_DRC_STATE_PHYSICAL_UNISOLATE:
+    case SPAPR_DRC_STATE_PHYSICAL_CONFIGURED:
+        return RTAS_OUT_SUCCESS; /* Nothing to do */
+    case SPAPR_DRC_STATE_PHYSICAL_POWERON:
+        break; /* see below */
+    default:
+        g_assert_not_reached();
+    }
+
     /* cannot unisolate a non-existent resource, and, or resources
      * which are in an 'UNUSABLE' allocation state. (PAPR 2.7,
      * 13.5.3.5)
@@ -90,20 +90,26 @@ static uint32_t drc_unisolate_physical(sPAPRDRConnector *drc)
         return RTAS_OUT_NO_SUCH_INDICATOR;
     }
 
-    drc->isolation_state = SPAPR_DR_ISOLATION_STATE_UNISOLATED;
+    drc->state = SPAPR_DRC_STATE_PHYSICAL_UNISOLATE;
+    drc->ccs_offset = drc->fdt_start_offset;
+    drc->ccs_depth = 0;
 
     return RTAS_OUT_SUCCESS;
 }
 
 static uint32_t drc_isolate_logical(sPAPRDRConnector *drc)
 {
-    /* if the guest is configuring a device attached to this DRC, we
-     * should reset the configuration state at this point since it may
-     * no longer be reliable (guest released device and needs to start
-     * over, or unplug occurred so the FDT is no longer valid)
-     */
-    g_free(drc->ccs);
-    drc->ccs = NULL;
+    switch (drc->state) {
+    case SPAPR_DRC_STATE_LOGICAL_AVAILABLE:
+    case SPAPR_DRC_STATE_LOGICAL_UNUSABLE:
+        return RTAS_OUT_SUCCESS; /* Nothing to do */
+    case SPAPR_DRC_STATE_LOGICAL_CONFIGURED:
+        break; /* see below */
+    case SPAPR_DRC_STATE_LOGICAL_UNISOLATE:
+        return RTAS_OUT_PARAM_ERROR; /* not allowed */
+    default:
+        g_assert_not_reached();
+    }
 
     /*
      * Fail any requests to ISOLATE the LMB DRC if this LMB doesn't
@@ -116,11 +122,11 @@ static uint32_t drc_isolate_logical(sPAPRDRConnector *drc)
      * actually being unplugged, fail the isolation request here.
      */
     if (spapr_drc_type(drc) == SPAPR_DR_CONNECTOR_TYPE_LMB
-        && !drc->awaiting_release) {
+        && !drc->unplug_requested) {
         return RTAS_OUT_HW_ERROR;
     }
 
-    drc->isolation_state = SPAPR_DR_ISOLATION_STATE_ISOLATED;
+    drc->state = SPAPR_DRC_STATE_LOGICAL_AVAILABLE;
 
     /* if we're awaiting release, but still in an unconfigured state,
      * it's likely the guest is still in the process of configuring
@@ -130,38 +136,51 @@ static uint32_t drc_isolate_logical(sPAPRDRConnector *drc)
      * configured state, as suggested by the state diagram from PAPR+
      * 2.7, 13.4
      */
-    if (drc->awaiting_release) {
+    if (drc->unplug_requested) {
         uint32_t drc_index = spapr_drc_index(drc);
-        if (drc->configured) {
-            trace_spapr_drc_set_isolation_state_finalizing(drc_index);
-            spapr_drc_detach(drc, DEVICE(drc->dev), NULL);
-        } else {
-            trace_spapr_drc_set_isolation_state_deferring(drc_index);
-        }
+        trace_spapr_drc_set_isolation_state_finalizing(drc_index);
+        spapr_drc_detach(drc);
     }
-    drc->configured = false;
-
     return RTAS_OUT_SUCCESS;
 }
 
 static uint32_t drc_unisolate_logical(sPAPRDRConnector *drc)
 {
-    /* cannot unisolate a non-existent resource, and, or resources
-     * which are in an 'UNUSABLE' allocation state. (PAPR 2.7,
-     * 13.5.3.5)
-     */
-    if (!drc->dev ||
-        drc->allocation_state == SPAPR_DR_ALLOCATION_STATE_UNUSABLE) {
-        return RTAS_OUT_NO_SUCH_INDICATOR;
+    switch (drc->state) {
+    case SPAPR_DRC_STATE_LOGICAL_UNISOLATE:
+    case SPAPR_DRC_STATE_LOGICAL_CONFIGURED:
+        return RTAS_OUT_SUCCESS; /* Nothing to do */
+    case SPAPR_DRC_STATE_LOGICAL_AVAILABLE:
+        break; /* see below */
+    case SPAPR_DRC_STATE_LOGICAL_UNUSABLE:
+        return RTAS_OUT_NO_SUCH_INDICATOR; /* not allowed */
+    default:
+        g_assert_not_reached();
     }
 
-    drc->isolation_state = SPAPR_DR_ISOLATION_STATE_UNISOLATED;
+    /* Move to AVAILABLE state should have ensured device was present */
+    g_assert(drc->dev);
+
+    drc->state = SPAPR_DRC_STATE_LOGICAL_UNISOLATE;
+    drc->ccs_offset = drc->fdt_start_offset;
+    drc->ccs_depth = 0;
 
     return RTAS_OUT_SUCCESS;
 }
 
 static uint32_t drc_set_usable(sPAPRDRConnector *drc)
 {
+    switch (drc->state) {
+    case SPAPR_DRC_STATE_LOGICAL_AVAILABLE:
+    case SPAPR_DRC_STATE_LOGICAL_UNISOLATE:
+    case SPAPR_DRC_STATE_LOGICAL_CONFIGURED:
+        return RTAS_OUT_SUCCESS; /* Nothing to do */
+    case SPAPR_DRC_STATE_LOGICAL_UNUSABLE:
+        break; /* see below */
+    default:
+        g_assert_not_reached();
+    }
+
     /* if there's no resource/device associated with the DRC, there's
      * no way for us to put it in an allocation state consistent with
      * being 'USABLE'. PAPR 2.7, 13.5.3.4 documents that this should
@@ -170,30 +189,36 @@ static uint32_t drc_set_usable(sPAPRDRConnector *drc)
     if (!drc->dev) {
         return RTAS_OUT_NO_SUCH_INDICATOR;
     }
-    if (drc->awaiting_release && drc->awaiting_allocation) {
-        /* kernel is acknowledging a previous hotplug event
-         * while we are already removing it.
-         * it's safe to ignore awaiting_allocation here since we know the
-         * situation is predicated on the guest either already having done
-         * so (boot-time hotplug), or never being able to acquire in the
-         * first place (hotplug followed by immediate unplug).
-         */
+    if (drc->unplug_requested) {
+        /* Don't allow the guest to move a device away from UNUSABLE
+         * state when we want to unplug it */
         return RTAS_OUT_NO_SUCH_INDICATOR;
     }
 
-    drc->allocation_state = SPAPR_DR_ALLOCATION_STATE_USABLE;
-    drc->awaiting_allocation = false;
+    drc->state = SPAPR_DRC_STATE_LOGICAL_AVAILABLE;
 
     return RTAS_OUT_SUCCESS;
 }
 
 static uint32_t drc_set_unusable(sPAPRDRConnector *drc)
 {
-    drc->allocation_state = SPAPR_DR_ALLOCATION_STATE_UNUSABLE;
-    if (drc->awaiting_release) {
+    switch (drc->state) {
+    case SPAPR_DRC_STATE_LOGICAL_UNUSABLE:
+        return RTAS_OUT_SUCCESS; /* Nothing to do */
+    case SPAPR_DRC_STATE_LOGICAL_AVAILABLE:
+        break; /* see below */
+    case SPAPR_DRC_STATE_LOGICAL_UNISOLATE:
+    case SPAPR_DRC_STATE_LOGICAL_CONFIGURED:
+        return RTAS_OUT_NO_SUCH_INDICATOR; /* not allowed */
+    default:
+        g_assert_not_reached();
+    }
+
+    drc->state = SPAPR_DRC_STATE_LOGICAL_UNUSABLE;
+    if (drc->unplug_requested) {
         uint32_t drc_index = spapr_drc_index(drc);
         trace_spapr_drc_set_allocation_state_finalizing(drc_index);
-        spapr_drc_detach(drc, DEVICE(drc->dev), NULL);
+        spapr_drc_detach(drc);
     }
 
     return RTAS_OUT_SUCCESS;
@@ -247,11 +272,16 @@ static sPAPRDREntitySense physical_entity_sense(sPAPRDRConnector *drc)
 
 static sPAPRDREntitySense logical_entity_sense(sPAPRDRConnector *drc)
 {
-    if (drc->dev
-        && (drc->allocation_state != SPAPR_DR_ALLOCATION_STATE_UNUSABLE)) {
-        return SPAPR_DR_ENTITY_SENSE_PRESENT;
-    } else {
+    switch (drc->state) {
+    case SPAPR_DRC_STATE_LOGICAL_UNUSABLE:
         return SPAPR_DR_ENTITY_SENSE_UNUSABLE;
+    case SPAPR_DRC_STATE_LOGICAL_AVAILABLE:
+    case SPAPR_DRC_STATE_LOGICAL_UNISOLATE:
+    case SPAPR_DRC_STATE_LOGICAL_CONFIGURED:
+        g_assert(drc->dev);
+        return SPAPR_DR_ENTITY_SENSE_PRESENT;
+    default:
+        g_assert_not_reached();
     }
 }
 
@@ -344,23 +374,18 @@ void spapr_drc_attach(sPAPRDRConnector *drc, DeviceState *d, void *fdt,
 {
     trace_spapr_drc_attach(spapr_drc_index(drc));
 
-    if (drc->isolation_state != SPAPR_DR_ISOLATION_STATE_ISOLATED) {
+    if (drc->dev) {
         error_setg(errp, "an attached device is still awaiting release");
         return;
     }
-    if (spapr_drc_type(drc) == SPAPR_DR_CONNECTOR_TYPE_PCI) {
-        g_assert(drc->allocation_state == SPAPR_DR_ALLOCATION_STATE_USABLE);
-    }
+    g_assert((drc->state == SPAPR_DRC_STATE_LOGICAL_UNUSABLE)
+             || (drc->state == SPAPR_DRC_STATE_PHYSICAL_POWERON));
     g_assert(fdt);
 
     drc->dev = d;
     drc->fdt = fdt;
     drc->fdt_start_offset = fdt_start_offset;
 
-    if (spapr_drc_type(drc) != SPAPR_DR_CONNECTOR_TYPE_PCI) {
-        drc->awaiting_allocation = true;
-    }
-
     object_property_add_link(OBJECT(drc), "device",
                              object_get_typename(OBJECT(drc->dev)),
                              (Object **)(&drc->dev),
@@ -373,85 +398,65 @@ static void spapr_drc_release(sPAPRDRConnector *drc)
 
     drck->release(drc->dev);
 
-    drc->awaiting_release = false;
+    drc->unplug_requested = false;
     g_free(drc->fdt);
     drc->fdt = NULL;
     drc->fdt_start_offset = 0;
-    object_property_del(OBJECT(drc), "device", NULL);
+    object_property_del(OBJECT(drc), "device", &error_abort);
     drc->dev = NULL;
 }
 
-void spapr_drc_detach(sPAPRDRConnector *drc, DeviceState *d, Error **errp)
+void spapr_drc_detach(sPAPRDRConnector *drc)
 {
+    sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
+
     trace_spapr_drc_detach(spapr_drc_index(drc));
 
-    if (drc->isolation_state != SPAPR_DR_ISOLATION_STATE_ISOLATED) {
-        trace_spapr_drc_awaiting_isolated(spapr_drc_index(drc));
-        drc->awaiting_release = true;
-        return;
-    }
+    g_assert(drc->dev);
 
-    if (spapr_drc_type(drc) != SPAPR_DR_CONNECTOR_TYPE_PCI &&
-        drc->allocation_state != SPAPR_DR_ALLOCATION_STATE_UNUSABLE) {
-        trace_spapr_drc_awaiting_unusable(spapr_drc_index(drc));
-        drc->awaiting_release = true;
-        return;
-    }
+    drc->unplug_requested = true;
 
-    if (drc->awaiting_allocation) {
-        drc->awaiting_release = true;
-        trace_spapr_drc_awaiting_allocation(spapr_drc_index(drc));
+    if (drc->state != drck->empty_state) {
+        trace_spapr_drc_awaiting_quiesce(spapr_drc_index(drc));
         return;
     }
 
     spapr_drc_release(drc);
 }
 
-static bool release_pending(sPAPRDRConnector *drc)
+void spapr_drc_reset(sPAPRDRConnector *drc)
 {
-    return drc->awaiting_release;
-}
-
-static void drc_reset(void *opaque)
-{
-    sPAPRDRConnector *drc = SPAPR_DR_CONNECTOR(opaque);
+    sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
 
     trace_spapr_drc_reset(spapr_drc_index(drc));
 
-    g_free(drc->ccs);
-    drc->ccs = NULL;
-
     /* immediately upon reset we can safely assume DRCs whose devices
      * are pending removal can be safely removed.
      */
-    if (drc->awaiting_release) {
+    if (drc->unplug_requested) {
         spapr_drc_release(drc);
     }
 
-    drc->awaiting_allocation = false;
-
     if (drc->dev) {
-        /* A device present at reset is coldplugged */
-        drc->isolation_state = SPAPR_DR_ISOLATION_STATE_UNISOLATED;
-        if (spapr_drc_type(drc) != SPAPR_DR_CONNECTOR_TYPE_PCI) {
-            drc->allocation_state = SPAPR_DR_ALLOCATION_STATE_USABLE;
-        }
-        drc->dr_indicator = SPAPR_DR_INDICATOR_ACTIVE;
+        /* A device present at reset is ready to go, same as coldplugged */
+        drc->state = drck->ready_state;
     } else {
-        /* Otherwise device is absent, but might be hotplugged */
-        drc->isolation_state = SPAPR_DR_ISOLATION_STATE_ISOLATED;
-        if (spapr_drc_type(drc) != SPAPR_DR_CONNECTOR_TYPE_PCI) {
-            drc->allocation_state = SPAPR_DR_ALLOCATION_STATE_UNUSABLE;
-        }
-        drc->dr_indicator = SPAPR_DR_INDICATOR_INACTIVE;
+        drc->state = drck->empty_state;
     }
+
+    drc->ccs_offset = -1;
+    drc->ccs_depth = -1;
+}
+
+static void drc_reset(void *opaque)
+{
+    spapr_drc_reset(SPAPR_DR_CONNECTOR(opaque));
 }
 
 static bool spapr_drc_needed(void *opaque)
 {
     sPAPRDRConnector *drc = (sPAPRDRConnector *)opaque;
     sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
-    bool rc = false;
     sPAPRDREntitySense value = drck->dr_entity_sense(drc);
 
     /* If no dev is plugged in there is no need to migrate the DRC state */
@@ -460,23 +465,10 @@ static bool spapr_drc_needed(void *opaque)
     }
 
     /*
-     * If there is dev plugged in, we need to migrate the DRC state when
-     * it is different from cold-plugged state
-     */
-    switch (spapr_drc_type(drc)) {
-    case SPAPR_DR_CONNECTOR_TYPE_PCI:
-    case SPAPR_DR_CONNECTOR_TYPE_CPU:
-    case SPAPR_DR_CONNECTOR_TYPE_LMB:
-        rc = !((drc->isolation_state == SPAPR_DR_ISOLATION_STATE_UNISOLATED) &&
-               (drc->allocation_state == SPAPR_DR_ALLOCATION_STATE_USABLE) &&
-               drc->configured && !drc->awaiting_release);
-        break;
-    case SPAPR_DR_CONNECTOR_TYPE_PHB:
-    case SPAPR_DR_CONNECTOR_TYPE_VIO:
-    default:
-        g_assert_not_reached();
-    }
-    return rc;
+     * We need to migrate the state if it's not equal to the expected
+     * long-term state, which is the same as the coldplugged initial
+     * state */
+    return (drc->state != drck->ready_state);
 }
 
 static const VMStateDescription vmstate_spapr_drc = {
@@ -485,12 +477,7 @@ static const VMStateDescription vmstate_spapr_drc = {
     .minimum_version_id = 1,
     .needed = spapr_drc_needed,
     .fields  = (VMStateField []) {
-        VMSTATE_UINT32(isolation_state, sPAPRDRConnector),
-        VMSTATE_UINT32(allocation_state, sPAPRDRConnector),
-        VMSTATE_UINT32(dr_indicator, sPAPRDRConnector),
-        VMSTATE_BOOL(configured, sPAPRDRConnector),
-        VMSTATE_BOOL(awaiting_release, sPAPRDRConnector),
-        VMSTATE_BOOL(awaiting_allocation, sPAPRDRConnector),
+        VMSTATE_UINT32(state, sPAPRDRConnector),
         VMSTATE_END_OF_LIST()
     }
 };
@@ -559,46 +546,96 @@ sPAPRDRConnector *spapr_dr_connector_new(Object *owner, const char *type,
     object_property_set_bool(OBJECT(drc), true, "realized", NULL);
     g_free(prop_name);
 
-    /* PCI slot always start in a USABLE state, and stay there */
-    if (spapr_drc_type(drc) == SPAPR_DR_CONNECTOR_TYPE_PCI) {
-        drc->allocation_state = SPAPR_DR_ALLOCATION_STATE_USABLE;
-    }
-
     return drc;
 }
 
 static void spapr_dr_connector_instance_init(Object *obj)
 {
     sPAPRDRConnector *drc = SPAPR_DR_CONNECTOR(obj);
+    sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
 
     object_property_add_uint32_ptr(obj, "id", &drc->id, NULL);
     object_property_add(obj, "index", "uint32", prop_get_index,
                         NULL, NULL, NULL, NULL);
     object_property_add(obj, "fdt", "struct", prop_get_fdt,
                         NULL, NULL, NULL, NULL);
+    drc->state = drck->empty_state;
 }
 
 static void spapr_dr_connector_class_init(ObjectClass *k, void *data)
 {
     DeviceClass *dk = DEVICE_CLASS(k);
-    sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_CLASS(k);
 
     dk->realize = realize;
     dk->unrealize = unrealize;
-    drck->release_pending = release_pending;
     /*
      * Reason: it crashes FIXME find and document the real reason
      */
     dk->user_creatable = false;
 }
 
+static bool drc_physical_needed(void *opaque)
+{
+    sPAPRDRCPhysical *drcp = (sPAPRDRCPhysical *)opaque;
+    sPAPRDRConnector *drc = SPAPR_DR_CONNECTOR(drcp);
+
+    if ((drc->dev && (drcp->dr_indicator == SPAPR_DR_INDICATOR_ACTIVE))
+        || (!drc->dev && (drcp->dr_indicator == SPAPR_DR_INDICATOR_INACTIVE))) {
+        return false;
+    }
+    return true;
+}
+
+static const VMStateDescription vmstate_spapr_drc_physical = {
+    .name = "spapr_drc/physical",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = drc_physical_needed,
+    .fields  = (VMStateField []) {
+        VMSTATE_UINT32(dr_indicator, sPAPRDRCPhysical),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static void drc_physical_reset(void *opaque)
+{
+    sPAPRDRConnector *drc = SPAPR_DR_CONNECTOR(opaque);
+    sPAPRDRCPhysical *drcp = SPAPR_DRC_PHYSICAL(drc);
+
+    if (drc->dev) {
+        drcp->dr_indicator = SPAPR_DR_INDICATOR_ACTIVE;
+    } else {
+        drcp->dr_indicator = SPAPR_DR_INDICATOR_INACTIVE;
+    }
+}
+
+static void realize_physical(DeviceState *d, Error **errp)
+{
+    sPAPRDRCPhysical *drcp = SPAPR_DRC_PHYSICAL(d);
+    Error *local_err = NULL;
+
+    realize(d, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
+
+    vmstate_register(DEVICE(drcp), spapr_drc_index(SPAPR_DR_CONNECTOR(drcp)),
+                     &vmstate_spapr_drc_physical, drcp);
+    qemu_register_reset(drc_physical_reset, drcp);
+}
+
 static void spapr_drc_physical_class_init(ObjectClass *k, void *data)
 {
+    DeviceClass *dk = DEVICE_CLASS(k);
     sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_CLASS(k);
 
+    dk->realize = realize_physical;
     drck->dr_entity_sense = physical_entity_sense;
     drck->isolate = drc_isolate_physical;
     drck->unisolate = drc_unisolate_physical;
+    drck->ready_state = SPAPR_DRC_STATE_PHYSICAL_CONFIGURED;
+    drck->empty_state = SPAPR_DRC_STATE_PHYSICAL_POWERON;
 }
 
 static void spapr_drc_logical_class_init(ObjectClass *k, void *data)
@@ -608,6 +645,8 @@ static void spapr_drc_logical_class_init(ObjectClass *k, void *data)
     drck->dr_entity_sense = logical_entity_sense;
     drck->isolate = drc_isolate_logical;
     drck->unisolate = drc_unisolate_logical;
+    drck->ready_state = SPAPR_DRC_STATE_LOGICAL_CONFIGURED;
+    drck->empty_state = SPAPR_DRC_STATE_LOGICAL_UNUSABLE;
 }
 
 static void spapr_drc_cpu_class_init(ObjectClass *k, void *data)
@@ -653,7 +692,7 @@ static const TypeInfo spapr_dr_connector_info = {
 static const TypeInfo spapr_drc_physical_info = {
     .name          = TYPE_SPAPR_DRC_PHYSICAL,
     .parent        = TYPE_SPAPR_DR_CONNECTOR,
-    .instance_size = sizeof(sPAPRDRConnector),
+    .instance_size = sizeof(sPAPRDRCPhysical),
     .class_init    = spapr_drc_physical_class_init,
     .abstract      = true,
 };
@@ -661,7 +700,6 @@ static const TypeInfo spapr_drc_physical_info = {
 static const TypeInfo spapr_drc_logical_info = {
     .name          = TYPE_SPAPR_DRC_LOGICAL,
     .parent        = TYPE_SPAPR_DR_CONNECTOR,
-    .instance_size = sizeof(sPAPRDRConnector),
     .class_init    = spapr_drc_logical_class_init,
     .abstract      = true,
 };
@@ -669,21 +707,18 @@ static const TypeInfo spapr_drc_logical_info = {
 static const TypeInfo spapr_drc_cpu_info = {
     .name          = TYPE_SPAPR_DRC_CPU,
     .parent        = TYPE_SPAPR_DRC_LOGICAL,
-    .instance_size = sizeof(sPAPRDRConnector),
     .class_init    = spapr_drc_cpu_class_init,
 };
 
 static const TypeInfo spapr_drc_pci_info = {
     .name          = TYPE_SPAPR_DRC_PCI,
     .parent        = TYPE_SPAPR_DRC_PHYSICAL,
-    .instance_size = sizeof(sPAPRDRConnector),
     .class_init    = spapr_drc_pci_class_init,
 };
 
 static const TypeInfo spapr_drc_lmb_info = {
     .name          = TYPE_SPAPR_DRC_LMB,
     .parent        = TYPE_SPAPR_DRC_LOGICAL,
-    .instance_size = sizeof(sPAPRDRConnector),
     .class_init    = spapr_drc_lmb_class_init,
 };
 
@@ -896,12 +931,18 @@ static uint32_t rtas_set_dr_indicator(uint32_t idx, uint32_t state)
 {
     sPAPRDRConnector *drc = spapr_drc_by_index(idx);
 
-    if (!drc) {
-        return RTAS_OUT_PARAM_ERROR;
+    if (!drc || !object_dynamic_cast(OBJECT(drc), TYPE_SPAPR_DRC_PHYSICAL)) {
+        return RTAS_OUT_NO_SUCH_INDICATOR;
+    }
+    if ((state != SPAPR_DR_INDICATOR_INACTIVE)
+        && (state != SPAPR_DR_INDICATOR_ACTIVE)
+        && (state != SPAPR_DR_INDICATOR_IDENTIFY)
+        && (state != SPAPR_DR_INDICATOR_ACTION)) {
+        return RTAS_OUT_PARAM_ERROR; /* bad state parameter */
     }
 
     trace_spapr_drc_set_dr_indicator(idx, state);
-    drc->dr_indicator = state;
+    SPAPR_DRC_PHYSICAL(drc)->dr_indicator = state;
     return RTAS_OUT_SUCCESS;
 }
 
@@ -1011,7 +1052,7 @@ static void rtas_ibm_configure_connector(PowerPCCPU *cpu,
     uint64_t wa_offset;
     uint32_t drc_index;
     sPAPRDRConnector *drc;
-    sPAPRConfigureConnectorState *ccs;
+    sPAPRDRConnectorClass *drck;
     sPAPRDRCCResponse resp = SPAPR_DR_CC_RESPONSE_CONTINUE;
     int rc;
 
@@ -1030,18 +1071,16 @@ static void rtas_ibm_configure_connector(PowerPCCPU *cpu,
         goto out;
     }
 
-    if (!drc->fdt) {
-        trace_spapr_rtas_ibm_configure_connector_missing_fdt(drc_index);
+    if ((drc->state != SPAPR_DRC_STATE_LOGICAL_UNISOLATE)
+        && (drc->state != SPAPR_DRC_STATE_PHYSICAL_UNISOLATE)) {
+        /* Need to unisolate the device before configuring */
         rc = SPAPR_DR_CC_RESPONSE_NOT_CONFIGURABLE;
         goto out;
     }
 
-    ccs = drc->ccs;
-    if (!ccs) {
-        ccs = g_new0(sPAPRConfigureConnectorState, 1);
-        ccs->fdt_offset = drc->fdt_start_offset;
-        drc->ccs = ccs;
-    }
+    g_assert(drc->fdt);
+
+    drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
 
     do {
         uint32_t tag;
@@ -1049,12 +1088,12 @@ static void rtas_ibm_configure_connector(PowerPCCPU *cpu,
         const struct fdt_property *prop;
         int fdt_offset_next, prop_len;
 
-        tag = fdt_next_tag(drc->fdt, ccs->fdt_offset, &fdt_offset_next);
+        tag = fdt_next_tag(drc->fdt, drc->ccs_offset, &fdt_offset_next);
 
         switch (tag) {
         case FDT_BEGIN_NODE:
-            ccs->fdt_depth++;
-            name = fdt_get_name(drc->fdt, ccs->fdt_offset, NULL);
+            drc->ccs_depth++;
+            name = fdt_get_name(drc->fdt, drc->ccs_offset, NULL);
 
             /* provide the name of the next OF node */
             wa_offset = CC_VAL_DATA_OFFSET;
@@ -1063,30 +1102,22 @@ static void rtas_ibm_configure_connector(PowerPCCPU *cpu,
             resp = SPAPR_DR_CC_RESPONSE_NEXT_CHILD;
             break;
         case FDT_END_NODE:
-            ccs->fdt_depth--;
-            if (ccs->fdt_depth == 0) {
-                sPAPRDRIsolationState state = drc->isolation_state;
+            drc->ccs_depth--;
+            if (drc->ccs_depth == 0) {
                 uint32_t drc_index = spapr_drc_index(drc);
-                /* done sending the device tree, don't need to track
-                 * the state anymore
-                 */
+
+                /* done sending the device tree, move to configured state */
                 trace_spapr_drc_set_configured(drc_index);
-                if (state == SPAPR_DR_ISOLATION_STATE_UNISOLATED) {
-                    drc->configured = true;
-                } else {
-                    /* guest should be not configuring an isolated device */
-                    trace_spapr_drc_set_configured_skipping(drc_index);
-                }
-                g_free(ccs);
-                drc->ccs = NULL;
-                ccs = NULL;
+                drc->state = drck->ready_state;
+                drc->ccs_offset = -1;
+                drc->ccs_depth = -1;
                 resp = SPAPR_DR_CC_RESPONSE_SUCCESS;
             } else {
                 resp = SPAPR_DR_CC_RESPONSE_PREV_PARENT;
             }
             break;
         case FDT_PROP:
-            prop = fdt_get_property_by_offset(drc->fdt, ccs->fdt_offset,
+            prop = fdt_get_property_by_offset(drc->fdt, drc->ccs_offset,
                                               &prop_len);
             name = fdt_string(drc->fdt, fdt32_to_cpu(prop->nameoff));
 
@@ -1111,8 +1142,8 @@ static void rtas_ibm_configure_connector(PowerPCCPU *cpu,
             /* keep seeking for an actionable tag */
             break;
         }
-        if (ccs) {
-            ccs->fdt_offset = fdt_offset_next;
+        if (drc->ccs_offset >= 0) {
+            drc->ccs_offset = fdt_offset_next;
         }
     } while (resp == SPAPR_DR_CC_RESPONSE_CONTINUE);
 
diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
index 587a3dacb2..f952b78237 100644
--- a/hw/ppc/spapr_events.c
+++ b/hw/ppc/spapr_events.c
@@ -42,8 +42,6 @@
 #include "hw/ppc/spapr_ovec.h"
 #include <libfdt.h>
 
-struct rtas_error_log {
-    uint32_t summary;
 #define RTAS_LOG_VERSION_MASK                   0xff000000
 #define   RTAS_LOG_VERSION_6                    0x06000000
 #define RTAS_LOG_SEVERITY_MASK                  0x00e00000
@@ -85,6 +83,9 @@ struct rtas_error_log {
 #define   RTAS_LOG_TYPE_ECC_CORR                0x0000000a
 #define   RTAS_LOG_TYPE_EPOW                    0x00000040
 #define   RTAS_LOG_TYPE_HOTPLUG                 0x000000e5
+
+struct rtas_error_log {
+    uint32_t summary;
     uint32_t extended_length;
 } QEMU_PACKED;
 
@@ -166,8 +167,7 @@ struct rtas_event_log_v6_epow {
     uint64_t reason_code;
 } QEMU_PACKED;
 
-struct epow_log_full {
-    struct rtas_error_log hdr;
+struct epow_extended_log {
     struct rtas_event_log_v6 v6hdr;
     struct rtas_event_log_v6_maina maina;
     struct rtas_event_log_v6_mainb mainb;
@@ -205,8 +205,7 @@ struct rtas_event_log_v6_hp {
     union drc_identifier drc_id;
 } QEMU_PACKED;
 
-struct hp_log_full {
-    struct rtas_error_log hdr;
+struct hp_extended_log {
     struct rtas_event_log_v6 v6hdr;
     struct rtas_event_log_v6_maina maina;
     struct rtas_event_log_v6_mainb mainb;
@@ -341,25 +340,26 @@ static int rtas_event_log_to_irq(sPAPRMachineState *spapr, int log_type)
     return source->irq;
 }
 
-static void rtas_event_log_queue(int log_type, void *data)
+static uint32_t spapr_event_log_entry_type(sPAPREventLogEntry *entry)
 {
-    sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
-    sPAPREventLogEntry *entry = g_new(sPAPREventLogEntry, 1);
+    return entry->summary & RTAS_LOG_TYPE_MASK;
+}
 
-    g_assert(data);
-    entry->log_type = log_type;
-    entry->data = data;
+static void rtas_event_log_queue(sPAPRMachineState *spapr,
+                                 sPAPREventLogEntry *entry)
+{
     QTAILQ_INSERT_TAIL(&spapr->pending_events, entry, next);
 }
 
-static sPAPREventLogEntry *rtas_event_log_dequeue(uint32_t event_mask)
+static sPAPREventLogEntry *rtas_event_log_dequeue(sPAPRMachineState *spapr,
+                                                  uint32_t event_mask)
 {
-    sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
     sPAPREventLogEntry *entry = NULL;
 
     QTAILQ_FOREACH(entry, &spapr->pending_events, next) {
         const sPAPREventSource *source =
-            rtas_event_log_to_source(spapr, entry->log_type);
+            rtas_event_log_to_source(spapr,
+                                     spapr_event_log_entry_type(entry));
 
         if (source->mask & event_mask) {
             break;
@@ -380,7 +380,8 @@ static bool rtas_event_log_contains(uint32_t event_mask)
 
     QTAILQ_FOREACH(entry, &spapr->pending_events, next) {
         const sPAPREventSource *source =
-            rtas_event_log_to_source(spapr, entry->log_type);
+            rtas_event_log_to_source(spapr,
+                                     spapr_event_log_entry_type(entry));
 
         if (source->mask & event_mask) {
             return true;
@@ -428,27 +429,28 @@ static void spapr_init_maina(struct rtas_event_log_v6_maina *maina,
 static void spapr_powerdown_req(Notifier *n, void *opaque)
 {
     sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
-    struct rtas_error_log *hdr;
+    sPAPREventLogEntry *entry;
     struct rtas_event_log_v6 *v6hdr;
     struct rtas_event_log_v6_maina *maina;
     struct rtas_event_log_v6_mainb *mainb;
     struct rtas_event_log_v6_epow *epow;
-    struct epow_log_full *new_epow;
+    struct epow_extended_log *new_epow;
 
+    entry = g_new(sPAPREventLogEntry, 1);
     new_epow = g_malloc0(sizeof(*new_epow));
-    hdr = &new_epow->hdr;
+    entry->extended_log = new_epow;
+
     v6hdr = &new_epow->v6hdr;
     maina = &new_epow->maina;
     mainb = &new_epow->mainb;
     epow = &new_epow->epow;
 
-    hdr->summary = cpu_to_be32(RTAS_LOG_VERSION_6
-                               | RTAS_LOG_SEVERITY_EVENT
-                               | RTAS_LOG_DISPOSITION_NOT_RECOVERED
-                               | RTAS_LOG_OPTIONAL_PART_PRESENT
-                               | RTAS_LOG_TYPE_EPOW);
-    hdr->extended_length = cpu_to_be32(sizeof(*new_epow)
-                                       - sizeof(new_epow->hdr));
+    entry->summary = RTAS_LOG_VERSION_6
+                       | RTAS_LOG_SEVERITY_EVENT
+                       | RTAS_LOG_DISPOSITION_NOT_RECOVERED
+                       | RTAS_LOG_OPTIONAL_PART_PRESENT
+                       | RTAS_LOG_TYPE_EPOW;
+    entry->extended_length = sizeof(*new_epow);
 
     spapr_init_v6hdr(v6hdr);
     spapr_init_maina(maina, 3 /* Main-A, Main-B and EPOW */);
@@ -468,7 +470,7 @@ static void spapr_powerdown_req(Notifier *n, void *opaque)
     epow->event_modifier = RTAS_LOG_V6_EPOW_MODIFIER_NORMAL;
     epow->extended_modifier = RTAS_LOG_V6_EPOW_XMODIFIER_PARTITION_SPECIFIC;
 
-    rtas_event_log_queue(RTAS_LOG_TYPE_EPOW, new_epow);
+    rtas_event_log_queue(spapr, entry);
 
     qemu_irq_pulse(xics_get_qirq(XICS_FABRIC(spapr),
                                  rtas_event_log_to_irq(spapr,
@@ -480,28 +482,29 @@ static void spapr_hotplug_req_event(uint8_t hp_id, uint8_t hp_action,
                                     union drc_identifier *drc_id)
 {
     sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
-    struct hp_log_full *new_hp;
-    struct rtas_error_log *hdr;
+    sPAPREventLogEntry *entry;
+    struct hp_extended_log *new_hp;
     struct rtas_event_log_v6 *v6hdr;
     struct rtas_event_log_v6_maina *maina;
     struct rtas_event_log_v6_mainb *mainb;
     struct rtas_event_log_v6_hp *hp;
 
-    new_hp = g_malloc0(sizeof(struct hp_log_full));
-    hdr = &new_hp->hdr;
+    entry = g_new(sPAPREventLogEntry, 1);
+    new_hp = g_malloc0(sizeof(struct hp_extended_log));
+    entry->extended_log = new_hp;
+
     v6hdr = &new_hp->v6hdr;
     maina = &new_hp->maina;
     mainb = &new_hp->mainb;
     hp = &new_hp->hp;
 
-    hdr->summary = cpu_to_be32(RTAS_LOG_VERSION_6
-                               | RTAS_LOG_SEVERITY_EVENT
-                               | RTAS_LOG_DISPOSITION_NOT_RECOVERED
-                               | RTAS_LOG_OPTIONAL_PART_PRESENT
-                               | RTAS_LOG_INITIATOR_HOTPLUG
-                               | RTAS_LOG_TYPE_HOTPLUG);
-    hdr->extended_length = cpu_to_be32(sizeof(*new_hp)
-                                       - sizeof(new_hp->hdr));
+    entry->summary = RTAS_LOG_VERSION_6
+        | RTAS_LOG_SEVERITY_EVENT
+        | RTAS_LOG_DISPOSITION_NOT_RECOVERED
+        | RTAS_LOG_OPTIONAL_PART_PRESENT
+        | RTAS_LOG_INITIATOR_HOTPLUG
+        | RTAS_LOG_TYPE_HOTPLUG;
+    entry->extended_length = sizeof(*new_hp);
 
     spapr_init_v6hdr(v6hdr);
     spapr_init_maina(maina, 3 /* Main-A, Main-B, HP */);
@@ -551,7 +554,7 @@ static void spapr_hotplug_req_event(uint8_t hp_id, uint8_t hp_action,
             cpu_to_be32(drc_id->count_indexed.index);
     }
 
-    rtas_event_log_queue(RTAS_LOG_TYPE_HOTPLUG, new_hp);
+    rtas_event_log_queue(spapr, entry);
 
     qemu_irq_pulse(xics_get_qirq(XICS_FABRIC(spapr),
                                  rtas_event_log_to_irq(spapr,
@@ -628,7 +631,7 @@ static void check_exception(PowerPCCPU *cpu, sPAPRMachineState *spapr,
     uint32_t mask, buf, len, event_len;
     uint64_t xinfo;
     sPAPREventLogEntry *event;
-    struct rtas_error_log *hdr;
+    struct rtas_error_log header;
     int i;
 
     if ((nargs < 6) || (nargs > 7) || nret != 1) {
@@ -644,21 +647,24 @@ static void check_exception(PowerPCCPU *cpu, sPAPRMachineState *spapr,
         xinfo |= (uint64_t)rtas_ld(args, 6) << 32;
     }
 
-    event = rtas_event_log_dequeue(mask);
+    event = rtas_event_log_dequeue(spapr, mask);
     if (!event) {
         goto out_no_events;
     }
 
-    hdr = event->data;
-    event_len = be32_to_cpu(hdr->extended_length) + sizeof(*hdr);
+    event_len = event->extended_length + sizeof(header);
 
     if (event_len < len) {
         len = event_len;
     }
 
-    cpu_physical_memory_write(buf, event->data, len);
+    header.summary = cpu_to_be32(event->summary);
+    header.extended_length = cpu_to_be32(event->extended_length);
+    cpu_physical_memory_write(buf, &header, sizeof(header));
+    cpu_physical_memory_write(buf + sizeof(header), event->extended_log,
+                              event->extended_length);
     rtas_st(rets, 0, RTAS_OUT_SUCCESS);
-    g_free(event->data);
+    g_free(event->extended_log);
     g_free(event);
 
     /* according to PAPR+, the IRQ must be left asserted, or re-asserted, if
diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
index 8624ce8d5b..72ea5a8247 100644
--- a/hw/ppc/spapr_hcall.c
+++ b/hw/ppc/spapr_hcall.c
@@ -3,6 +3,7 @@
 #include "sysemu/hw_accel.h"
 #include "sysemu/sysemu.h"
 #include "qemu/log.h"
+#include "qemu/error-report.h"
 #include "cpu.h"
 #include "exec/exec-all.h"
 #include "helper_regs.h"
@@ -354,6 +355,401 @@ static target_ulong h_read(PowerPCCPU *cpu, sPAPRMachineState *spapr,
     return H_SUCCESS;
 }
 
+struct sPAPRPendingHPT {
+    /* These fields are read-only after initialization */
+    int shift;
+    QemuThread thread;
+
+    /* These fields are protected by the BQL */
+    bool complete;
+
+    /* These fields are private to the preparation thread if
+     * !complete, otherwise protected by the BQL */
+    int ret;
+    void *hpt;
+};
+
+static void free_pending_hpt(sPAPRPendingHPT *pending)
+{
+    if (pending->hpt) {
+        qemu_vfree(pending->hpt);
+    }
+
+    g_free(pending);
+}
+
+static void *hpt_prepare_thread(void *opaque)
+{
+    sPAPRPendingHPT *pending = opaque;
+    size_t size = 1ULL << pending->shift;
+
+    pending->hpt = qemu_memalign(size, size);
+    if (pending->hpt) {
+        memset(pending->hpt, 0, size);
+        pending->ret = H_SUCCESS;
+    } else {
+        pending->ret = H_NO_MEM;
+    }
+
+    qemu_mutex_lock_iothread();
+
+    if (SPAPR_MACHINE(qdev_get_machine())->pending_hpt == pending) {
+        /* Ready to go */
+        pending->complete = true;
+    } else {
+        /* We've been cancelled, clean ourselves up */
+        free_pending_hpt(pending);
+    }
+
+    qemu_mutex_unlock_iothread();
+    return NULL;
+}
+
+/* Must be called with BQL held */
+static void cancel_hpt_prepare(sPAPRMachineState *spapr)
+{
+    sPAPRPendingHPT *pending = spapr->pending_hpt;
+
+    /* Let the thread know it's cancelled */
+    spapr->pending_hpt = NULL;
+
+    if (!pending) {
+        /* Nothing to do */
+        return;
+    }
+
+    if (!pending->complete) {
+        /* thread will clean itself up */
+        return;
+    }
+
+    free_pending_hpt(pending);
+}
+
+/* Convert a return code from the KVM ioctl()s implementing resize HPT
+ * into a PAPR hypercall return code */
+static target_ulong resize_hpt_convert_rc(int ret)
+{
+    if (ret >= 100000) {
+        return H_LONG_BUSY_ORDER_100_SEC;
+    } else if (ret >= 10000) {
+        return H_LONG_BUSY_ORDER_10_SEC;
+    } else if (ret >= 1000) {
+        return H_LONG_BUSY_ORDER_1_SEC;
+    } else if (ret >= 100) {
+        return H_LONG_BUSY_ORDER_100_MSEC;
+    } else if (ret >= 10) {
+        return H_LONG_BUSY_ORDER_10_MSEC;
+    } else if (ret > 0) {
+        return H_LONG_BUSY_ORDER_1_MSEC;
+    }
+
+    switch (ret) {
+    case 0:
+        return H_SUCCESS;
+    case -EPERM:
+        return H_AUTHORITY;
+    case -EINVAL:
+        return H_PARAMETER;
+    case -ENXIO:
+        return H_CLOSED;
+    case -ENOSPC:
+        return H_PTEG_FULL;
+    case -EBUSY:
+        return H_BUSY;
+    case -ENOMEM:
+        return H_NO_MEM;
+    default:
+        return H_HARDWARE;
+    }
+}
+
+static target_ulong h_resize_hpt_prepare(PowerPCCPU *cpu,
+                                         sPAPRMachineState *spapr,
+                                         target_ulong opcode,
+                                         target_ulong *args)
+{
+    target_ulong flags = args[0];
+    int shift = args[1];
+    sPAPRPendingHPT *pending = spapr->pending_hpt;
+    uint64_t current_ram_size = MACHINE(spapr)->ram_size;
+    int rc;
+
+    if (spapr->resize_hpt == SPAPR_RESIZE_HPT_DISABLED) {
+        return H_AUTHORITY;
+    }
+
+    if (!spapr->htab_shift) {
+        /* Radix guest, no HPT */
+        return H_NOT_AVAILABLE;
+    }
+
+    trace_spapr_h_resize_hpt_prepare(flags, shift);
+
+    if (flags != 0) {
+        return H_PARAMETER;
+    }
+
+    if (shift && ((shift < 18) || (shift > 46))) {
+        return H_PARAMETER;
+    }
+
+    current_ram_size = pc_existing_dimms_capacity(&error_fatal);
+
+    /* We only allow the guest to allocate an HPT one order above what
+     * we'd normally give them (to stop a small guest claiming a huge
+     * chunk of resources in the HPT */
+    if (shift > (spapr_hpt_shift_for_ramsize(current_ram_size) + 1)) {
+        return H_RESOURCE;
+    }
+
+    rc = kvmppc_resize_hpt_prepare(cpu, flags, shift);
+    if (rc != -ENOSYS) {
+        return resize_hpt_convert_rc(rc);
+    }
+
+    if (pending) {
+        /* something already in progress */
+        if (pending->shift == shift) {
+            /* and it's suitable */
+            if (pending->complete) {
+                return pending->ret;
+            } else {
+                return H_LONG_BUSY_ORDER_100_MSEC;
+            }
+        }
+
+        /* not suitable, cancel and replace */
+        cancel_hpt_prepare(spapr);
+    }
+
+    if (!shift) {
+        /* nothing to do */
+        return H_SUCCESS;
+    }
+
+    /* start new prepare */
+
+    pending = g_new0(sPAPRPendingHPT, 1);
+    pending->shift = shift;
+    pending->ret = H_HARDWARE;
+
+    qemu_thread_create(&pending->thread, "sPAPR HPT prepare",
+                       hpt_prepare_thread, pending, QEMU_THREAD_DETACHED);
+
+    spapr->pending_hpt = pending;
+
+    /* In theory we could estimate the time more accurately based on
+     * the new size, but there's not much point */
+    return H_LONG_BUSY_ORDER_100_MSEC;
+}
+
+static uint64_t new_hpte_load0(void *htab, uint64_t pteg, int slot)
+{
+    uint8_t *addr = htab;
+
+    addr += pteg * HASH_PTEG_SIZE_64;
+    addr += slot * HASH_PTE_SIZE_64;
+    return  ldq_p(addr);
+}
+
+static void new_hpte_store(void *htab, uint64_t pteg, int slot,
+                           uint64_t pte0, uint64_t pte1)
+{
+    uint8_t *addr = htab;
+
+    addr += pteg * HASH_PTEG_SIZE_64;
+    addr += slot * HASH_PTE_SIZE_64;
+
+    stq_p(addr, pte0);
+    stq_p(addr + HASH_PTE_SIZE_64 / 2, pte1);
+}
+
+static int rehash_hpte(PowerPCCPU *cpu,
+                       const ppc_hash_pte64_t *hptes,
+                       void *old_hpt, uint64_t oldsize,
+                       void *new_hpt, uint64_t newsize,
+                       uint64_t pteg, int slot)
+{
+    uint64_t old_hash_mask = (oldsize >> 7) - 1;
+    uint64_t new_hash_mask = (newsize >> 7) - 1;
+    target_ulong pte0 = ppc_hash64_hpte0(cpu, hptes, slot);
+    target_ulong pte1;
+    uint64_t avpn;
+    unsigned base_pg_shift;
+    uint64_t hash, new_pteg, replace_pte0;
+
+    if (!(pte0 & HPTE64_V_VALID) || !(pte0 & HPTE64_V_BOLTED)) {
+        return H_SUCCESS;
+    }
+
+    pte1 = ppc_hash64_hpte1(cpu, hptes, slot);
+
+    base_pg_shift = ppc_hash64_hpte_page_shift_noslb(cpu, pte0, pte1);
+    assert(base_pg_shift); /* H_ENTER shouldn't allow a bad encoding */
+    avpn = HPTE64_V_AVPN_VAL(pte0) & ~(((1ULL << base_pg_shift) - 1) >> 23);
+
+    if (pte0 & HPTE64_V_SECONDARY) {
+        pteg = ~pteg;
+    }
+
+    if ((pte0 & HPTE64_V_SSIZE) == HPTE64_V_SSIZE_256M) {
+        uint64_t offset, vsid;
+
+        /* We only have 28 - 23 bits of offset in avpn */
+        offset = (avpn & 0x1f) << 23;
+        vsid = avpn >> 5;
+        /* We can find more bits from the pteg value */
+        if (base_pg_shift < 23) {
+            offset |= ((vsid ^ pteg) & old_hash_mask) << base_pg_shift;
+        }
+
+        hash = vsid ^ (offset >> base_pg_shift);
+    } else if ((pte0 & HPTE64_V_SSIZE) == HPTE64_V_SSIZE_1T) {
+        uint64_t offset, vsid;
+
+        /* We only have 40 - 23 bits of seg_off in avpn */
+        offset = (avpn & 0x1ffff) << 23;
+        vsid = avpn >> 17;
+        if (base_pg_shift < 23) {
+            offset |= ((vsid ^ (vsid << 25) ^ pteg) & old_hash_mask)
+                << base_pg_shift;
+        }
+
+        hash = vsid ^ (vsid << 25) ^ (offset >> base_pg_shift);
+    } else {
+        error_report("rehash_pte: Bad segment size in HPTE");
+        return H_HARDWARE;
+    }
+
+    new_pteg = hash & new_hash_mask;
+    if (pte0 & HPTE64_V_SECONDARY) {
+        assert(~pteg == (hash & old_hash_mask));
+        new_pteg = ~new_pteg;
+    } else {
+        assert(pteg == (hash & old_hash_mask));
+    }
+    assert((oldsize != newsize) || (pteg == new_pteg));
+    replace_pte0 = new_hpte_load0(new_hpt, new_pteg, slot);
+    /*
+     * Strictly speaking, we don't need all these tests, since we only
+     * ever rehash bolted HPTEs.  We might in future handle non-bolted
+     * HPTEs, though so make the logic correct for those cases as
+     * well.
+     */
+    if (replace_pte0 & HPTE64_V_VALID) {
+        assert(newsize < oldsize);
+        if (replace_pte0 & HPTE64_V_BOLTED) {
+            if (pte0 & HPTE64_V_BOLTED) {
+                /* Bolted collision, nothing we can do */
+                return H_PTEG_FULL;
+            } else {
+                /* Discard this hpte */
+                return H_SUCCESS;
+            }
+        }
+    }
+
+    new_hpte_store(new_hpt, new_pteg, slot, pte0, pte1);
+    return H_SUCCESS;
+}
+
+static int rehash_hpt(PowerPCCPU *cpu,
+                      void *old_hpt, uint64_t oldsize,
+                      void *new_hpt, uint64_t newsize)
+{
+    uint64_t n_ptegs = oldsize >> 7;
+    uint64_t pteg;
+    int slot;
+    int rc;
+
+    for (pteg = 0; pteg < n_ptegs; pteg++) {
+        hwaddr ptex = pteg * HPTES_PER_GROUP;
+        const ppc_hash_pte64_t *hptes
+            = ppc_hash64_map_hptes(cpu, ptex, HPTES_PER_GROUP);
+
+        if (!hptes) {
+            return H_HARDWARE;
+        }
+
+        for (slot = 0; slot < HPTES_PER_GROUP; slot++) {
+            rc = rehash_hpte(cpu, hptes, old_hpt, oldsize, new_hpt, newsize,
+                             pteg, slot);
+            if (rc != H_SUCCESS) {
+                ppc_hash64_unmap_hptes(cpu, hptes, ptex, HPTES_PER_GROUP);
+                return rc;
+            }
+        }
+        ppc_hash64_unmap_hptes(cpu, hptes, ptex, HPTES_PER_GROUP);
+    }
+
+    return H_SUCCESS;
+}
+
+static target_ulong h_resize_hpt_commit(PowerPCCPU *cpu,
+                                        sPAPRMachineState *spapr,
+                                        target_ulong opcode,
+                                        target_ulong *args)
+{
+    target_ulong flags = args[0];
+    target_ulong shift = args[1];
+    sPAPRPendingHPT *pending = spapr->pending_hpt;
+    int rc;
+    size_t newsize;
+
+    if (spapr->resize_hpt == SPAPR_RESIZE_HPT_DISABLED) {
+        return H_AUTHORITY;
+    }
+
+    trace_spapr_h_resize_hpt_commit(flags, shift);
+
+    rc = kvmppc_resize_hpt_commit(cpu, flags, shift);
+    if (rc != -ENOSYS) {
+        return resize_hpt_convert_rc(rc);
+    }
+
+    if (flags != 0) {
+        return H_PARAMETER;
+    }
+
+    if (!pending || (pending->shift != shift)) {
+        /* no matching prepare */
+        return H_CLOSED;
+    }
+
+    if (!pending->complete) {
+        /* prepare has not completed */
+        return H_BUSY;
+    }
+
+    /* Shouldn't have got past PREPARE without an HPT */
+    g_assert(spapr->htab_shift);
+
+    newsize = 1ULL << pending->shift;
+    rc = rehash_hpt(cpu, spapr->htab, HTAB_SIZE(spapr),
+                    pending->hpt, newsize);
+    if (rc == H_SUCCESS) {
+        qemu_vfree(spapr->htab);
+        spapr->htab = pending->hpt;
+        spapr->htab_shift = pending->shift;
+
+        if (kvm_enabled()) {
+            /* For KVM PR, update the HPT pointer */
+            target_ulong sdr1 = (target_ulong)(uintptr_t)spapr->htab
+                | (spapr->htab_shift - 18);
+            kvmppc_update_sdr1(sdr1);
+        }
+
+        pending->hpt = NULL; /* so it's not free()d */
+    }
+
+    /* Clean up */
+    spapr->pending_hpt = NULL;
+    free_pending_hpt(pending);
+
+    return rc;
+}
+
 static target_ulong h_set_sprg0(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                                 target_ulong opcode, target_ulong *args)
 {
@@ -1133,6 +1529,45 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu,
     guest_radix = spapr_ovec_test(ov5_guest, OV5_MMU_RADIX_300);
     spapr_ovec_clear(ov5_guest, OV5_MMU_RADIX_300);
 
+    /*
+     * HPT resizing is a bit of a special case, because when enabled
+     * we assume an HPT guest will support it until it says it
+     * doesn't, instead of assuming it won't support it until it says
+     * it does.  Strictly speaking that approach could break for
+     * guests which don't make a CAS call, but those are so old we
+     * don't care about them.  Without that assumption we'd have to
+     * make at least a temporary allocation of an HPT sized for max
+     * memory, which could be impossibly difficult under KVM HV if
+     * maxram is large.
+     */
+    if (!guest_radix && !spapr_ovec_test(ov5_guest, OV5_HPT_RESIZE)) {
+        int maxshift = spapr_hpt_shift_for_ramsize(MACHINE(spapr)->maxram_size);
+
+        if (spapr->resize_hpt == SPAPR_RESIZE_HPT_REQUIRED) {
+            error_report(
+                "h_client_architecture_support: Guest doesn't support HPT resizing, but resize-hpt=required");
+            exit(1);
+        }
+
+        if (spapr->htab_shift < maxshift) {
+            CPUState *cs;
+
+            /* Guest doesn't know about HPT resizing, so we
+             * pre-emptively resize for the maximum permitted RAM.  At
+             * the point this is called, nothing should have been
+             * entered into the existing HPT */
+            spapr_reallocate_hpt(spapr, maxshift, &error_fatal);
+            CPU_FOREACH(cs) {
+                if (kvm_enabled()) {
+                    /* For KVM PR, update the HPT pointer */
+                    target_ulong sdr1 = (target_ulong)(uintptr_t)spapr->htab
+                        | (spapr->htab_shift - 18);
+                    kvmppc_update_sdr1(sdr1);
+                }
+            }
+        }
+    }
+
     /* NOTE: there are actually a number of ov5 bits where input from the
      * guest is always zero, and the platform/QEMU enables them independently
      * of guest input. To model these properly we'd want some sort of mask,
@@ -1246,6 +1681,10 @@ static void hypercall_register_types(void)
     /* hcall-bulk */
     spapr_register_hypercall(H_BULK_REMOVE, h_bulk_remove);
 
+    /* hcall-hpt-resize */
+    spapr_register_hypercall(H_RESIZE_HPT_PREPARE, h_resize_hpt_prepare);
+    spapr_register_hypercall(H_RESIZE_HPT_COMMIT, h_resize_hpt_commit);
+
     /* hcall-splpar */
     spapr_register_hypercall(H_REGISTER_VPA, h_register_vpa);
     spapr_register_hypercall(H_CEDE, h_cede);
diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index a52dcf8ec0..6ecdf29d28 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -1443,7 +1443,9 @@ static void spapr_pci_plug(HotplugHandler *plug_handler,
     /* If this is function 0, signal hotplug for all the device functions.
      * Otherwise defer sending the hotplug event.
      */
-    if (plugged_dev->hotplugged && PCI_FUNC(pdev->devfn) == 0) {
+    if (!spapr_drc_hotplugged(plugged_dev)) {
+        spapr_drc_reset(drc);
+    } else if (PCI_FUNC(pdev->devfn) == 0) {
         int i;
 
         for (i = 0; i < 8; i++) {
@@ -1474,9 +1476,7 @@ static void spapr_pci_unplug_request(HotplugHandler *plug_handler,
 {
     sPAPRPHBState *phb = SPAPR_PCI_HOST_BRIDGE(DEVICE(plug_handler));
     PCIDevice *pdev = PCI_DEVICE(plugged_dev);
-    sPAPRDRConnectorClass *drck;
     sPAPRDRConnector *drc = spapr_phb_get_pci_drc(phb, pdev);
-    Error *local_err = NULL;
 
     if (!phb->dr_enabled) {
         error_setg(errp, QERR_BUS_NO_HOTPLUG,
@@ -1487,8 +1487,7 @@ static void spapr_pci_unplug_request(HotplugHandler *plug_handler,
     g_assert(drc);
     g_assert(drc->dev == plugged_dev);
 
-    drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
-    if (!drck->release_pending(drc)) {
+    if (!spapr_drc_unplug_requested(drc)) {
         PCIBus *bus = PCI_BUS(qdev_get_parent_bus(DEVICE(pdev)));
         uint32_t slotnr = PCI_SLOT(pdev->devfn);
         sPAPRDRConnector *func_drc;
@@ -1504,7 +1503,7 @@ static void spapr_pci_unplug_request(HotplugHandler *plug_handler,
                 func_drck = SPAPR_DR_CONNECTOR_GET_CLASS(func_drc);
                 state = func_drck->dr_entity_sense(func_drc);
                 if (state == SPAPR_DR_ENTITY_SENSE_PRESENT
-                    && !func_drck->release_pending(func_drc)) {
+                    && !spapr_drc_unplug_requested(func_drc)) {
                     error_setg(errp,
                                "PCI: slot %d, function %d still present. "
                                "Must unplug all non-0 functions first.",
@@ -1514,11 +1513,7 @@ static void spapr_pci_unplug_request(HotplugHandler *plug_handler,
             }
         }
 
-        spapr_drc_detach(drc, DEVICE(pdev), &local_err);
-        if (local_err) {
-            error_propagate(errp, local_err);
-            return;
-        }
+        spapr_drc_detach(drc);
 
         /* if this isn't func 0, defer unplug event. otherwise signal removal
          * for all present functions
diff --git a/hw/ppc/trace-events b/hw/ppc/trace-events
index 3e8e3cffde..0f7d9be4ef 100644
--- a/hw/ppc/trace-events
+++ b/hw/ppc/trace-events
@@ -16,6 +16,8 @@ spapr_cas_continue(unsigned long n) "Copy changes to the guest: %ld bytes"
 # hw/ppc/spapr_hcall.c
 spapr_cas_pvr_try(uint32_t pvr) "%x"
 spapr_cas_pvr(uint32_t cur_pvr, bool explicit_match, uint32_t new_pvr) "current=%x, explicit_match=%u, new=%x"
+spapr_h_resize_hpt_prepare(uint64_t flags, uint64_t shift) "flags=0x%"PRIx64", shift=%"PRIu64
+spapr_h_resize_hpt_commit(uint64_t flags, uint64_t shift) "flags=0x%"PRIx64", shift=%"PRIu64
 
 # hw/ppc/spapr_iommu.c
 spapr_iommu_put(uint64_t liobn, uint64_t ioba, uint64_t tce, uint64_t ret) "liobn=%"PRIx64" ioba=0x%"PRIx64" tce=0x%"PRIx64" ret=%"PRId64
@@ -46,8 +48,7 @@ spapr_drc_set_configured(uint32_t index) "drc: 0x%"PRIx32
 spapr_drc_set_configured_skipping(uint32_t index) "drc: 0x%"PRIx32", isolated device"
 spapr_drc_attach(uint32_t index) "drc: 0x%"PRIx32
 spapr_drc_detach(uint32_t index) "drc: 0x%"PRIx32
-spapr_drc_awaiting_isolated(uint32_t index) "drc: 0x%"PRIx32
-spapr_drc_awaiting_unusable(uint32_t index) "drc: 0x%"PRIx32
+spapr_drc_awaiting_quiesce(uint32_t index) "drc: 0x%"PRIx32
 spapr_drc_awaiting_allocation(uint32_t index) "drc: 0x%"PRIx32
 spapr_drc_reset(uint32_t index) "drc: 0x%"PRIx32
 spapr_drc_realize(uint32_t index) "drc: 0x%"PRIx32
diff --git a/hw/s390x/sclp.c b/hw/s390x/sclp.c
index 83d6023894..9253dbbc64 100644
--- a/hw/s390x/sclp.c
+++ b/hw/s390x/sclp.c
@@ -273,7 +273,6 @@ static void assign_storage(SCLPDevice *sclp, SCCB *sccb)
              * instead of doing it via the ref count of the MemoryRegion. */
             object_ref(OBJECT(standby_ram));
             object_unparent(OBJECT(standby_ram));
-            vmstate_register_ram_global(standby_ram);
             memory_region_add_subregion(sysmem, offset, standby_ram);
         }
         /* The specified subregion is no longer in standby */
diff --git a/hw/sh4/r2d.c b/hw/sh4/r2d.c
index e6fc74ed87..a0462adb97 100644
--- a/hw/sh4/r2d.c
+++ b/hw/sh4/r2d.c
@@ -260,7 +260,6 @@ static void r2d_init(MachineState *machine)
 
     /* Allocate memory space */
     memory_region_init_ram(sdram, NULL, "r2d.sdram", SDRAM_SIZE, &error_fatal);
-    vmstate_register_ram_global(sdram);
     memory_region_add_subregion(address_space_mem, SDRAM_BASE, sdram);
     /* Register peripherals */
     s = sh7750_init(cpu, address_space_mem);
diff --git a/hw/sh4/shix.c b/hw/sh4/shix.c
index fd00cc5ea2..e22eaf0c8f 100644
--- a/hw/sh4/shix.c
+++ b/hw/sh4/shix.c
@@ -64,16 +64,13 @@ static void shix_init(MachineState *machine)
 
     /* Allocate memory space */
     memory_region_init_ram(rom, NULL, "shix.rom", 0x4000, &error_fatal);
-    vmstate_register_ram_global(rom);
     memory_region_set_readonly(rom, true);
     memory_region_add_subregion(sysmem, 0x00000000, rom);
     memory_region_init_ram(&sdram[0], NULL, "shix.sdram1", 0x01000000,
                            &error_fatal);
-    vmstate_register_ram_global(&sdram[0]);
     memory_region_add_subregion(sysmem, 0x08000000, &sdram[0]);
     memory_region_init_ram(&sdram[1], NULL, "shix.sdram2", 0x01000000,
                            &error_fatal);
-    vmstate_register_ram_global(&sdram[1]);
     memory_region_add_subregion(sysmem, 0x0c000000, &sdram[1]);
 
     /* Load BIOS in 0 (and access it through P2, 0xA0000000) */
diff --git a/hw/sparc/leon3.c b/hw/sparc/leon3.c
index f415997649..d5ff188d9e 100644
--- a/hw/sparc/leon3.c
+++ b/hw/sparc/leon3.c
@@ -160,7 +160,6 @@ static void leon3_generic_hw_init(MachineState *machine)
     /* Allocate BIOS */
     prom_size = 8 * 1024 * 1024; /* 8Mb */
     memory_region_init_ram(prom, NULL, "Leon3.bios", prom_size, &error_fatal);
-    vmstate_register_ram_global(prom);
     memory_region_set_readonly(prom, true);
     memory_region_add_subregion(address_space_mem, 0x00000000, prom);
 
diff --git a/hw/sparc/sun4m.c b/hw/sparc/sun4m.c
index 0faff4619f..89dd8a96c3 100644
--- a/hw/sparc/sun4m.c
+++ b/hw/sparc/sun4m.c
@@ -590,7 +590,7 @@ static void idreg_init1(Object *obj)
     IDRegState *s = MACIO_ID_REGISTER(obj);
     SysBusDevice *dev = SYS_BUS_DEVICE(obj);
 
-    memory_region_init_ram(&s->mem, obj,
+    memory_region_init_ram_nomigrate(&s->mem, obj,
                            "sun4m.idreg", sizeof(idreg_data), &error_fatal);
     vmstate_register_ram_global(&s->mem);
     memory_region_set_readonly(&s->mem, true);
@@ -631,7 +631,7 @@ static void afx_init1(Object *obj)
     AFXState *s = TCX_AFX(obj);
     SysBusDevice *dev = SYS_BUS_DEVICE(obj);
 
-    memory_region_init_ram(&s->mem, obj, "sun4m.afx", 4, &error_fatal);
+    memory_region_init_ram_nomigrate(&s->mem, obj, "sun4m.afx", 4, &error_fatal);
     vmstate_register_ram_global(&s->mem);
     sysbus_init_mmio(dev, &s->mem);
 }
@@ -698,7 +698,7 @@ static void prom_init1(Object *obj)
     PROMState *s = OPENPROM(obj);
     SysBusDevice *dev = SYS_BUS_DEVICE(obj);
 
-    memory_region_init_ram(&s->prom, obj, "sun4m.prom", PROM_SIZE_MAX,
+    memory_region_init_ram_nomigrate(&s->prom, obj, "sun4m.prom", PROM_SIZE_MAX,
                            &error_fatal);
     vmstate_register_ram_global(&s->prom);
     memory_region_set_readonly(&s->prom, true);
diff --git a/hw/sparc64/sun4u.c b/hw/sparc64/sun4u.c
index 69f565db25..bbdb40c330 100644
--- a/hw/sparc64/sun4u.c
+++ b/hw/sparc64/sun4u.c
@@ -334,7 +334,7 @@ static void prom_init1(Object *obj)
     PROMState *s = OPENPROM(obj);
     SysBusDevice *dev = SYS_BUS_DEVICE(obj);
 
-    memory_region_init_ram(&s->prom, obj, "sun4u.prom", PROM_SIZE_MAX,
+    memory_region_init_ram_nomigrate(&s->prom, obj, "sun4u.prom", PROM_SIZE_MAX,
                            &error_fatal);
     vmstate_register_ram_global(&s->prom);
     memory_region_set_readonly(&s->prom, true);
@@ -377,7 +377,7 @@ static void ram_realize(DeviceState *dev, Error **errp)
     RamDevice *d = SUN4U_RAM(dev);
     SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
 
-    memory_region_init_ram(&d->ram, OBJECT(d), "sun4u.ram", d->size,
+    memory_region_init_ram_nomigrate(&d->ram, OBJECT(d), "sun4u.ram", d->size,
                            &error_fatal);
     vmstate_register_ram_global(&d->ram);
     sysbus_init_mmio(sbd, &d->ram);
diff --git a/hw/tricore/tricore_testboard.c b/hw/tricore/tricore_testboard.c
index 8910bf0f27..6c574231d5 100644
--- a/hw/tricore/tricore_testboard.c
+++ b/hw/tricore/tricore_testboard.c
@@ -80,24 +80,18 @@ static void tricore_testboard_init(MachineState *machine, int board_id)
         exit(1);
     }
     env = &cpu->env;
-    memory_region_init_ram(ext_cram, NULL, "powerlink_ext_c.ram", 2*1024*1024,
+    memory_region_init_ram(ext_cram, NULL, "powerlink_ext_c.ram",
+                           2 * 1024 * 1024, &error_fatal);
+    memory_region_init_ram(ext_dram, NULL, "powerlink_ext_d.ram",
+                           4 * 1024 * 1024, &error_fatal);
+    memory_region_init_ram(int_cram, NULL, "powerlink_int_c.ram", 48 * 1024,
                            &error_fatal);
-    vmstate_register_ram_global(ext_cram);
-    memory_region_init_ram(ext_dram, NULL, "powerlink_ext_d.ram", 4*1024*1024,
+    memory_region_init_ram(int_dram, NULL, "powerlink_int_d.ram", 48 * 1024,
                            &error_fatal);
-    vmstate_register_ram_global(ext_dram);
-    memory_region_init_ram(int_cram, NULL, "powerlink_int_c.ram", 48*1024,
-                           &error_fatal);
-    vmstate_register_ram_global(int_cram);
-    memory_region_init_ram(int_dram, NULL, "powerlink_int_d.ram", 48*1024,
-                           &error_fatal);
-    vmstate_register_ram_global(int_dram);
-    memory_region_init_ram(pcp_data, NULL, "powerlink_pcp_data.ram", 16*1024,
-                           &error_fatal);
-    vmstate_register_ram_global(pcp_data);
-    memory_region_init_ram(pcp_text, NULL, "powerlink_pcp_text.ram", 32*1024,
-                           &error_fatal);
-    vmstate_register_ram_global(pcp_text);
+    memory_region_init_ram(pcp_data, NULL, "powerlink_pcp_data.ram",
+                           16 * 1024, &error_fatal);
+    memory_region_init_ram(pcp_text, NULL, "powerlink_pcp_text.ram",
+                           32 * 1024, &error_fatal);
 
     memory_region_add_subregion(sysmem, 0x80000000, ext_cram);
     memory_region_add_subregion(sysmem, 0xa1000000, ext_dram);
diff --git a/hw/unicore32/puv3.c b/hw/unicore32/puv3.c
index 032078fd3e..e9d1a60b6f 100644
--- a/hw/unicore32/puv3.c
+++ b/hw/unicore32/puv3.c
@@ -80,7 +80,6 @@ static void puv3_board_init(CPUUniCore32State *env, ram_addr_t ram_size)
     /* SDRAM at address zero.  */
     memory_region_init_ram(ram_memory, NULL, "puv3.ram", ram_size,
                            &error_fatal);
-    vmstate_register_ram_global(ram_memory);
     memory_region_add_subregion(get_system_memory(), 0, ram_memory);
 }
 
diff --git a/hw/usb/hcd-ehci.c b/hw/usb/hcd-ehci.c
index 73090e01ad..604912cb3e 100644
--- a/hw/usb/hcd-ehci.c
+++ b/hw/usb/hcd-ehci.c
@@ -2483,6 +2483,11 @@ void usb_ehci_realize(EHCIState *s, DeviceState *dev, Error **errp)
                    NB_PORTS);
         return;
     }
+    if (s->maxframes < 8 || s->maxframes > 512)  {
+        error_setg(errp, "maxframes %d out if range (8 .. 512)",
+                   s->maxframes);
+        return;
+    }
 
     usb_bus_new(&s->bus, sizeof(s->bus), s->companion_enable ?
                 &ehci_bus_ops_companion : &ehci_bus_ops_standalone, dev);
diff --git a/hw/xtensa/sim.c b/hw/xtensa/sim.c
index 5521e9184a..249cd1e8c9 100644
--- a/hw/xtensa/sim.c
+++ b/hw/xtensa/sim.c
@@ -49,9 +49,7 @@ static void xtensa_create_memory_regions(const XtensaMemory *memory,
         g_string_printf(num_name, "%s%u", name, i);
         m = g_new(MemoryRegion, 1);
         memory_region_init_ram(m, NULL, num_name->str,
-                               memory->location[i].size,
-                               &error_fatal);
-        vmstate_register_ram_global(m);
+                               memory->location[i].size, &error_fatal);
         memory_region_add_subregion(get_system_memory(),
                                     memory->location[i].addr, m);
     }
diff --git a/hw/xtensa/xtfpga.c b/hw/xtensa/xtfpga.c
index d5ac080d4a..635a4d4ec3 100644
--- a/hw/xtensa/xtfpga.c
+++ b/hw/xtensa/xtfpga.c
@@ -147,7 +147,7 @@ static void lx60_net_init(MemoryRegion *address_space,
             sysbus_mmio_get_region(s, 1));
 
     ram = g_malloc(sizeof(*ram));
-    memory_region_init_ram(ram, OBJECT(s), "open_eth.ram", 16384,
+    memory_region_init_ram_nomigrate(ram, OBJECT(s), "open_eth.ram", 16384,
                            &error_fatal);
     vmstate_register_ram_global(ram);
     memory_region_add_subregion(address_space, buffers, ram);
@@ -251,7 +251,6 @@ static void lx_init(const LxBoardDesc *board, MachineState *machine)
     ram = g_malloc(sizeof(*ram));
     memory_region_init_ram(ram, NULL, "lx60.dram", machine->ram_size,
                            &error_fatal);
-    vmstate_register_ram_global(ram);
     memory_region_add_subregion(system_memory, 0, ram);
 
     system_io = g_malloc(sizeof(*system_io));
@@ -294,7 +293,6 @@ static void lx_init(const LxBoardDesc *board, MachineState *machine)
         rom = g_malloc(sizeof(*rom));
         memory_region_init_ram(rom, NULL, "lx60.sram", board->sram_size,
                                &error_fatal);
-        vmstate_register_ram_global(rom);
         memory_region_add_subregion(system_memory, 0xfe000000, rom);
 
         if (kernel_cmdline) {
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 669a2797fd..5c6b761d81 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -324,7 +324,7 @@ struct BlockDriver {
      * Drain and stop any internal sources of requests in the driver, and
      * remain so until next I/O callback (e.g. bdrv_co_writev) is called.
      */
-    void (*bdrv_drain)(BlockDriverState *bs);
+    void coroutine_fn (*bdrv_co_drain)(BlockDriverState *bs);
 
     void (*bdrv_add_child)(BlockDriverState *parent, BlockDriverState *child,
                            Error **errp);
diff --git a/include/exec/memory.h b/include/exec/memory.h
index b7966014fe..400dd4491b 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -420,8 +420,9 @@ void memory_region_init_io(MemoryRegion *mr,
                            uint64_t size);
 
 /**
- * memory_region_init_ram:  Initialize RAM memory region.  Accesses into the
- *                          region will modify memory directly.
+ * memory_region_init_ram_nomigrate:  Initialize RAM memory region.  Accesses
+ *                                    into the region will modify memory
+ *                                    directly.
  *
  * @mr: the #MemoryRegion to be initialized.
  * @owner: the object that tracks the region's reference count
@@ -429,12 +430,15 @@ void memory_region_init_io(MemoryRegion *mr,
  *        must be unique within any device
  * @size: size of the region.
  * @errp: pointer to Error*, to store an error if it happens.
+ *
+ * Note that this function does not do anything to cause the data in the
+ * RAM memory region to be migrated; that is the responsibility of the caller.
  */
-void memory_region_init_ram(MemoryRegion *mr,
-                            struct Object *owner,
-                            const char *name,
-                            uint64_t size,
-                            Error **errp);
+void memory_region_init_ram_nomigrate(MemoryRegion *mr,
+                                      struct Object *owner,
+                                      const char *name,
+                                      uint64_t size,
+                                      Error **errp);
 
 /**
  * memory_region_init_resizeable_ram:  Initialize memory region with resizeable
@@ -451,6 +455,9 @@ void memory_region_init_ram(MemoryRegion *mr,
  * @max_size: max size of the region.
  * @resized: callback to notify owner about used size change.
  * @errp: pointer to Error*, to store an error if it happens.
+ *
+ * Note that this function does not do anything to cause the data in the
+ * RAM memory region to be migrated; that is the responsibility of the caller.
  */
 void memory_region_init_resizeable_ram(MemoryRegion *mr,
                                        struct Object *owner,
@@ -474,6 +481,9 @@ void memory_region_init_resizeable_ram(MemoryRegion *mr,
  * @share: %true if memory must be mmaped with the MAP_SHARED flag
  * @path: the path in which to allocate the RAM.
  * @errp: pointer to Error*, to store an error if it happens.
+ *
+ * Note that this function does not do anything to cause the data in the
+ * RAM memory region to be migrated; that is the responsibility of the caller.
  */
 void memory_region_init_ram_from_file(MemoryRegion *mr,
                                       struct Object *owner,
@@ -494,6 +504,9 @@ void memory_region_init_ram_from_file(MemoryRegion *mr,
  * @share: %true if memory must be mmaped with the MAP_SHARED flag
  * @fd: the fd to mmap.
  * @errp: pointer to Error*, to store an error if it happens.
+ *
+ * Note that this function does not do anything to cause the data in the
+ * RAM memory region to be migrated; that is the responsibility of the caller.
  */
 void memory_region_init_ram_from_fd(MemoryRegion *mr,
                                     struct Object *owner,
@@ -515,6 +528,9 @@ void memory_region_init_ram_from_fd(MemoryRegion *mr,
  *        must be unique within any device
  * @size: size of the region.
  * @ptr: memory to be mapped; must contain at least @size bytes.
+ *
+ * Note that this function does not do anything to cause the data in the
+ * RAM memory region to be migrated; that is the responsibility of the caller.
  */
 void memory_region_init_ram_ptr(MemoryRegion *mr,
                                 struct Object *owner,
@@ -539,6 +555,10 @@ void memory_region_init_ram_ptr(MemoryRegion *mr,
  * @name: the name of the region.
  * @size: size of the region.
  * @ptr: memory to be mapped; must contain at least @size bytes.
+ *
+ * Note that this function does not do anything to cause the data in the
+ * RAM memory region to be migrated; that is the responsibility of the caller.
+ * (For RAM device memory regions, migrating the contents rarely makes sense.)
  */
 void memory_region_init_ram_device_ptr(MemoryRegion *mr,
                                        struct Object *owner,
@@ -566,12 +586,16 @@ void memory_region_init_alias(MemoryRegion *mr,
                               uint64_t size);
 
 /**
- * memory_region_init_rom: Initialize a ROM memory region.
+ * memory_region_init_rom_nomigrate: Initialize a ROM memory region.
  *
- * This has the same effect as calling memory_region_init_ram()
+ * This has the same effect as calling memory_region_init_ram_nomigrate()
  * and then marking the resulting region read-only with
  * memory_region_set_readonly().
  *
+ * Note that this function does not do anything to cause the data in the
+ * RAM side of the memory region to be migrated; that is the responsibility
+ * of the caller.
+ *
  * @mr: the #MemoryRegion to be initialized.
  * @owner: the object that tracks the region's reference count
  * @name: Region name, becomes part of RAMBlock name used in migration stream
@@ -579,15 +603,19 @@ void memory_region_init_alias(MemoryRegion *mr,
  * @size: size of the region.
  * @errp: pointer to Error*, to store an error if it happens.
  */
-void memory_region_init_rom(MemoryRegion *mr,
-                            struct Object *owner,
-                            const char *name,
-                            uint64_t size,
-                            Error **errp);
+void memory_region_init_rom_nomigrate(MemoryRegion *mr,
+                                      struct Object *owner,
+                                      const char *name,
+                                      uint64_t size,
+                                      Error **errp);
 
 /**
- * memory_region_init_rom_device:  Initialize a ROM memory region.  Writes are
- *                                 handled via callbacks.
+ * memory_region_init_rom_device_nomigrate:  Initialize a ROM memory region.
+ *                                 Writes are handled via callbacks.
+ *
+ * Note that this function does not do anything to cause the data in the
+ * RAM side of the memory region to be migrated; that is the responsibility
+ * of the caller.
  *
  * @mr: the #MemoryRegion to be initialized.
  * @owner: the object that tracks the region's reference count
@@ -597,13 +625,13 @@ void memory_region_init_rom(MemoryRegion *mr,
  * @size: size of the region.
  * @errp: pointer to Error*, to store an error if it happens.
  */
-void memory_region_init_rom_device(MemoryRegion *mr,
-                                   struct Object *owner,
-                                   const MemoryRegionOps *ops,
-                                   void *opaque,
-                                   const char *name,
-                                   uint64_t size,
-                                   Error **errp);
+void memory_region_init_rom_device_nomigrate(MemoryRegion *mr,
+                                             struct Object *owner,
+                                             const MemoryRegionOps *ops,
+                                             void *opaque,
+                                             const char *name,
+                                             uint64_t size,
+                                             Error **errp);
 
 /**
  * memory_region_init_reservation: Initialize a memory region that reserves
@@ -651,6 +679,94 @@ void memory_region_init_iommu(void *_iommu_mr,
                               uint64_t size);
 
 /**
+ * memory_region_init_ram - Initialize RAM memory region.  Accesses into the
+ *                          region will modify memory directly.
+ *
+ * @mr: the #MemoryRegion to be initialized
+ * @owner: the object that tracks the region's reference count (must be
+ *         TYPE_DEVICE or a subclass of TYPE_DEVICE, or NULL)
+ * @name: name of the memory region
+ * @size: size of the region in bytes
+ * @errp: pointer to Error*, to store an error if it happens.
+ *
+ * This function allocates RAM for a board model or device, and
+ * arranges for it to be migrated (by calling vmstate_register_ram()
+ * if @owner is a DeviceState, or vmstate_register_ram_global() if
+ * @owner is NULL).
+ *
+ * TODO: Currently we restrict @owner to being either NULL (for
+ * global RAM regions with no owner) or devices, so that we can
+ * give the RAM block a unique name for migration purposes.
+ * We should lift this restriction and allow arbitrary Objects.
+ * If you pass a non-NULL non-device @owner then we will assert.
+ */
+void memory_region_init_ram(MemoryRegion *mr,
+                            struct Object *owner,
+                            const char *name,
+                            uint64_t size,
+                            Error **errp);
+
+/**
+ * memory_region_init_rom: Initialize a ROM memory region.
+ *
+ * This has the same effect as calling memory_region_init_ram()
+ * and then marking the resulting region read-only with
+ * memory_region_set_readonly(). This includes arranging for the
+ * contents to be migrated.
+ *
+ * TODO: Currently we restrict @owner to being either NULL (for
+ * global RAM regions with no owner) or devices, so that we can
+ * give the RAM block a unique name for migration purposes.
+ * We should lift this restriction and allow arbitrary Objects.
+ * If you pass a non-NULL non-device @owner then we will assert.
+ *
+ * @mr: the #MemoryRegion to be initialized.
+ * @owner: the object that tracks the region's reference count
+ * @name: Region name, becomes part of RAMBlock name used in migration stream
+ *        must be unique within any device
+ * @size: size of the region.
+ * @errp: pointer to Error*, to store an error if it happens.
+ */
+void memory_region_init_rom(MemoryRegion *mr,
+                            struct Object *owner,
+                            const char *name,
+                            uint64_t size,
+                            Error **errp);
+
+/**
+ * memory_region_init_rom_device:  Initialize a ROM memory region.
+ *                                 Writes are handled via callbacks.
+ *
+ * This function initializes a memory region backed by RAM for reads
+ * and callbacks for writes, and arranges for the RAM backing to
+ * be migrated (by calling vmstate_register_ram()
+ * if @owner is a DeviceState, or vmstate_register_ram_global() if
+ * @owner is NULL).
+ *
+ * TODO: Currently we restrict @owner to being either NULL (for
+ * global RAM regions with no owner) or devices, so that we can
+ * give the RAM block a unique name for migration purposes.
+ * We should lift this restriction and allow arbitrary Objects.
+ * If you pass a non-NULL non-device @owner then we will assert.
+ *
+ * @mr: the #MemoryRegion to be initialized.
+ * @owner: the object that tracks the region's reference count
+ * @ops: callbacks for write access handling (must not be NULL).
+ * @name: Region name, becomes part of RAMBlock name used in migration stream
+ *        must be unique within any device
+ * @size: size of the region.
+ * @errp: pointer to Error*, to store an error if it happens.
+ */
+void memory_region_init_rom_device(MemoryRegion *mr,
+                                   struct Object *owner,
+                                   const MemoryRegionOps *ops,
+                                   void *opaque,
+                                   const char *name,
+                                   uint64_t size,
+                                   Error **errp);
+
+
+/**
  * memory_region_owner: get a memory region's owner.
  *
  * @mr: the memory region being queried.
diff --git a/include/hw/boards.h b/include/hw/boards.h
index 76ce0219ff..3363dd19fd 100644
--- a/include/hw/boards.h
+++ b/include/hw/boards.h
@@ -9,6 +9,35 @@
 #include "qom/object.h"
 #include "qom/cpu.h"
 
+/**
+ * memory_region_allocate_system_memory - Allocate a board's main memory
+ * @mr: the #MemoryRegion to be initialized
+ * @owner: the object that tracks the region's reference count
+ * @name: name of the memory region
+ * @ram_size: size of the region in bytes
+ *
+ * This function allocates the main memory for a board model, and
+ * initializes @mr appropriately. It also arranges for the memory
+ * to be migrated (by calling vmstate_register_ram_global()).
+ *
+ * Memory allocated via this function will be backed with the memory
+ * backend the user provided using "-mem-path" or "-numa node,memdev=..."
+ * if appropriate; this is typically used to cause host huge pages to be
+ * used. This function should therefore be called by a board exactly once,
+ * for the primary or largest RAM area it implements.
+ *
+ * For boards where the major RAM is split into two parts in the memory
+ * map, you can deal with this by calling memory_region_allocate_system_memory()
+ * once to get a MemoryRegion with enough RAM for both parts, and then
+ * creating alias MemoryRegions via memory_region_init_alias() which
+ * alias into different parts of the RAM MemoryRegion and can be mapped
+ * into the memory map in the appropriate places.
+ *
+ * Smaller pieces of memory (display RAM, static RAMs, etc) don't need
+ * to be backed via the -mem-path memory backend and can simply
+ * be created via memory_region_allocate_aux_memory() or
+ * memory_region_init_ram().
+ */
 void memory_region_allocate_system_memory(MemoryRegion *mr, Object *owner,
                                           const char *name,
                                           uint64_t ram_size);
diff --git a/include/hw/ppc/pnv_psi.h b/include/hw/ppc/pnv_psi.h
index 11d83e43f8..f6af5eae1f 100644
--- a/include/hw/ppc/pnv_psi.h
+++ b/include/hw/ppc/pnv_psi.h
@@ -28,8 +28,6 @@
 
 #define PSIHB_XSCOM_MAX         0x20
 
-typedef struct XICSState XICSState;
-
 typedef struct PnvPsi {
     SysBusDevice parent;
 
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index 5f708eec23..2a303a705c 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -13,6 +13,7 @@ struct sPAPRPHBState;
 struct sPAPRNVRAM;
 typedef struct sPAPREventLogEntry sPAPREventLogEntry;
 typedef struct sPAPREventSource sPAPREventSource;
+typedef struct sPAPRPendingHPT sPAPRPendingHPT;
 
 #define HPTE64_V_HPTE_DIRTY     0x0000000000000040ULL
 #define SPAPR_ENTRY_POINT       0x100
@@ -42,6 +43,13 @@ typedef struct sPAPRMachineClass sPAPRMachineClass;
 #define SPAPR_MACHINE_CLASS(klass) \
     OBJECT_CLASS_CHECK(sPAPRMachineClass, klass, TYPE_SPAPR_MACHINE)
 
+typedef enum {
+    SPAPR_RESIZE_HPT_DEFAULT = 0,
+    SPAPR_RESIZE_HPT_DISABLED,
+    SPAPR_RESIZE_HPT_ENABLED,
+    SPAPR_RESIZE_HPT_REQUIRED,
+} sPAPRResizeHPT;
+
 /**
  * sPAPRMachineClass:
  */
@@ -58,6 +66,7 @@ struct sPAPRMachineClass {
                           uint64_t *buid, hwaddr *pio, 
                           hwaddr *mmio32, hwaddr *mmio64,
                           unsigned n_dma, uint32_t *liobns, Error **errp);
+    sPAPRResizeHPT resize_hpt_default;
 };
 
 /**
@@ -73,9 +82,12 @@ struct sPAPRMachineState {
     ICSState *ics;
     sPAPRRTCState rtc;
 
+    sPAPRResizeHPT resize_hpt;
     void *htab;
     uint32_t htab_shift;
     uint64_t patb_entry; /* Process tbl registed in H_REGISTER_PROCESS_TABLE */
+    sPAPRPendingHPT *pending_hpt; /* in-progress resize */
+
     hwaddr rma_size;
     int vrma_adjust;
     ssize_t rtas_size;
@@ -367,6 +379,8 @@ struct sPAPRMachineState {
 #define H_XIRR_X                0x2FC
 #define H_RANDOM                0x300
 #define H_SET_MODE              0x31C
+#define H_RESIZE_HPT_PREPARE    0x36C
+#define H_RESIZE_HPT_COMMIT     0x370
 #define H_CLEAN_SLB             0x374
 #define H_INVALIDATE_PID        0x378
 #define H_REGISTER_PROC_TBL     0x37C
@@ -607,8 +621,9 @@ struct sPAPRTCETable {
 sPAPRTCETable *spapr_tce_find_by_liobn(target_ulong liobn);
 
 struct sPAPREventLogEntry {
-    int log_type;
-    void *data;
+    uint32_t summary;
+    uint32_t extended_length;
+    void *extended_log;
     QTAILQ_ENTRY(sPAPREventLogEntry) next;
 };
 
@@ -644,6 +659,9 @@ void spapr_hotplug_req_add_by_count_indexed(sPAPRDRConnectorType drc_type,
 void spapr_hotplug_req_remove_by_count_indexed(sPAPRDRConnectorType drc_type,
                                                uint32_t count, uint32_t index);
 void spapr_cpu_parse_features(sPAPRMachineState *spapr);
+int spapr_hpt_shift_for_ramsize(uint64_t ramsize);
+void spapr_reallocate_hpt(sPAPRMachineState *spapr, int shift,
+                          Error **errp);
 
 /* CPU and LMB DRC release callbacks. */
 void spapr_core_release(DeviceState *dev);
@@ -684,4 +702,6 @@ int spapr_rng_populate_dt(void *fdt);
 
 void spapr_do_system_reset_on_cpu(CPUState *cs, run_on_cpu_data arg);
 
+#define HTAB_SIZE(spapr)        (1ULL << ((spapr)->htab_shift))
+
 #endif /* HW_SPAPR_H */
diff --git a/include/hw/ppc/spapr_drc.h b/include/hw/ppc/spapr_drc.h
index d15e9eb3b4..a7958d0a8d 100644
--- a/include/hw/ppc/spapr_drc.h
+++ b/include/hw/ppc/spapr_drc.h
@@ -15,6 +15,7 @@
 
 #include <libfdt.h>
 #include "qom/object.h"
+#include "sysemu/sysemu.h"
 #include "hw/qdev.h"
 
 #define TYPE_SPAPR_DR_CONNECTOR "spapr-dr-connector"
@@ -32,7 +33,7 @@
 #define SPAPR_DRC_PHYSICAL_CLASS(klass) \
         OBJECT_CLASS_CHECK(sPAPRDRConnectorClass, klass, \
                            TYPE_SPAPR_DRC_PHYSICAL)
-#define SPAPR_DRC_PHYSICAL(obj) OBJECT_CHECK(sPAPRDRConnector, (obj), \
+#define SPAPR_DRC_PHYSICAL(obj) OBJECT_CHECK(sPAPRDRCPhysical, (obj), \
                                              TYPE_SPAPR_DRC_PHYSICAL)
 
 #define TYPE_SPAPR_DRC_LOGICAL "spapr-drc-logical"
@@ -172,11 +173,23 @@ typedef enum {
     SPAPR_DR_CC_RESPONSE_NOT_CONFIGURABLE = -9003,
 } sPAPRDRCCResponse;
 
-/* rtas-configure-connector state */
-typedef struct sPAPRConfigureConnectorState {
-    int fdt_offset;
-    int fdt_depth;
-} sPAPRConfigureConnectorState;
+typedef enum {
+    /*
+     * Values come from Fig. 12 in LoPAPR section 13.4
+     *
+     * These are exposed in the migration stream, so don't change
+     * them.
+     */
+    SPAPR_DRC_STATE_INVALID             = 0,
+    SPAPR_DRC_STATE_LOGICAL_UNUSABLE    = 1,
+    SPAPR_DRC_STATE_LOGICAL_AVAILABLE   = 2,
+    SPAPR_DRC_STATE_LOGICAL_UNISOLATE   = 3,
+    SPAPR_DRC_STATE_LOGICAL_CONFIGURED  = 4,
+    SPAPR_DRC_STATE_PHYSICAL_AVAILABLE  = 5,
+    SPAPR_DRC_STATE_PHYSICAL_POWERON    = 6,
+    SPAPR_DRC_STATE_PHYSICAL_UNISOLATE  = 7,
+    SPAPR_DRC_STATE_PHYSICAL_CONFIGURED = 8,
+} sPAPRDRCState;
 
 typedef struct sPAPRDRConnector {
     /*< private >*/
@@ -185,29 +198,25 @@ typedef struct sPAPRDRConnector {
     uint32_t id;
     Object *owner;
 
-    /* DR-indicator */
-    uint32_t dr_indicator;
+    uint32_t state;
 
-    /* sensor/indicator states */
-    uint32_t isolation_state;
-    uint32_t allocation_state;
-
-    /* configure-connector state */
-    void *fdt;
-    int fdt_start_offset;
-    bool configured;
-    sPAPRConfigureConnectorState *ccs;
-
-    bool awaiting_release;
-    bool awaiting_allocation;
+    /* RTAS ibm,configure-connector state */
+    /* (only valid in UNISOLATE state) */
+    int ccs_offset;
+    int ccs_depth;
 
     /* device pointer, via link property */
     DeviceState *dev;
+    bool unplug_requested;
+    void *fdt;
+    int fdt_start_offset;
 } sPAPRDRConnector;
 
 typedef struct sPAPRDRConnectorClass {
     /*< private >*/
     DeviceClass parent;
+    sPAPRDRCState empty_state;
+    sPAPRDRCState ready_state;
 
     /*< public >*/
     sPAPRDRConnectorTypeShift typeshift;
@@ -218,11 +227,23 @@ typedef struct sPAPRDRConnectorClass {
     uint32_t (*isolate)(sPAPRDRConnector *drc);
     uint32_t (*unisolate)(sPAPRDRConnector *drc);
     void (*release)(DeviceState *dev);
-
-    /* QEMU interfaces for managing hotplug operations */
-    bool (*release_pending)(sPAPRDRConnector *drc);
 } sPAPRDRConnectorClass;
 
+typedef struct sPAPRDRCPhysical {
+    /*< private >*/
+    sPAPRDRConnector parent;
+
+    /* DR-indicator */
+    uint32_t dr_indicator;
+} sPAPRDRCPhysical;
+
+static inline bool spapr_drc_hotplugged(DeviceState *dev)
+{
+    return dev->hotplugged && !runstate_check(RUN_STATE_INMIGRATE);
+}
+
+void spapr_drc_reset(sPAPRDRConnector *drc);
+
 uint32_t spapr_drc_index(sPAPRDRConnector *drc);
 sPAPRDRConnectorType spapr_drc_type(sPAPRDRConnector *drc);
 
@@ -235,6 +256,11 @@ int spapr_drc_populate_dt(void *fdt, int fdt_offset, Object *owner,
 
 void spapr_drc_attach(sPAPRDRConnector *drc, DeviceState *d, void *fdt,
                       int fdt_start_offset, Error **errp);
-void spapr_drc_detach(sPAPRDRConnector *drc, DeviceState *d, Error **errp);
+void spapr_drc_detach(sPAPRDRConnector *drc);
+
+static inline bool spapr_drc_unplug_requested(sPAPRDRConnector *drc)
+{
+    return drc->unplug_requested;
+}
 
 #endif /* HW_SPAPR_DRC_H */
diff --git a/include/hw/ppc/spapr_ovec.h b/include/hw/ppc/spapr_ovec.h
index 0b464e22e7..9edfa5ff75 100644
--- a/include/hw/ppc/spapr_ovec.h
+++ b/include/hw/ppc/spapr_ovec.h
@@ -50,6 +50,7 @@ typedef struct sPAPROptionVector sPAPROptionVector;
 #define OV5_DRCONF_MEMORY       OV_BIT(2, 2)
 #define OV5_FORM1_AFFINITY      OV_BIT(5, 0)
 #define OV5_HP_EVT              OV_BIT(6, 5)
+#define OV5_HPT_RESIZE          OV_BIT(6, 7)
 #define OV5_XIVE_EXPLOIT        OV_BIT(23, 7)
 
 /* ISA 3.00 MMU features: */
diff --git a/include/qemu/coroutine.h b/include/qemu/coroutine.h
index a4509bd977..9aff9a735e 100644
--- a/include/qemu/coroutine.h
+++ b/include/qemu/coroutine.h
@@ -229,6 +229,24 @@ void qemu_co_rwlock_init(CoRwlock *lock);
 void qemu_co_rwlock_rdlock(CoRwlock *lock);
 
 /**
+ * Write Locks the CoRwlock from a reader.  This is a bit more efficient than
+ * @qemu_co_rwlock_unlock followed by a separate @qemu_co_rwlock_wrlock.
+ * However, if the lock cannot be upgraded immediately, control is transferred
+ * to the caller of the current coroutine.  Also, @qemu_co_rwlock_upgrade
+ * only overrides CoRwlock fairness if there are no concurrent readers, so
+ * another writer might run while @qemu_co_rwlock_upgrade blocks.
+ */
+void qemu_co_rwlock_upgrade(CoRwlock *lock);
+
+/**
+ * Downgrades a write-side critical section to a reader.  Downgrading with
+ * @qemu_co_rwlock_downgrade never blocks, unlike @qemu_co_rwlock_unlock
+ * followed by @qemu_co_rwlock_rdlock.  This makes it more efficient, but
+ * may also sometimes be necessary for correctness.
+ */
+void qemu_co_rwlock_downgrade(CoRwlock *lock);
+
+/**
  * Write Locks the mutex. If the lock cannot be taken immediately because
  * of a parallel reader, control is transferred to the caller of the current
  * coroutine.
diff --git a/memory.c b/memory.c
index 69f697c20e..a7bc70aac1 100644
--- a/memory.c
+++ b/memory.c
@@ -32,6 +32,7 @@
 #include "sysemu/sysemu.h"
 #include "hw/misc/mmio_interface.h"
 #include "hw/qdev-properties.h"
+#include "migration/vmstate.h"
 
 //#define DEBUG_UNASSIGNED
 
@@ -1365,11 +1366,11 @@ void memory_region_init_io(MemoryRegion *mr,
     mr->terminates = true;
 }
 
-void memory_region_init_ram(MemoryRegion *mr,
-                            Object *owner,
-                            const char *name,
-                            uint64_t size,
-                            Error **errp)
+void memory_region_init_ram_nomigrate(MemoryRegion *mr,
+                                      Object *owner,
+                                      const char *name,
+                                      uint64_t size,
+                                      Error **errp)
 {
     memory_region_init(mr, owner, name, size);
     mr->ram = true;
@@ -1473,11 +1474,11 @@ void memory_region_init_alias(MemoryRegion *mr,
     mr->alias_offset = offset;
 }
 
-void memory_region_init_rom(MemoryRegion *mr,
-                            struct Object *owner,
-                            const char *name,
-                            uint64_t size,
-                            Error **errp)
+void memory_region_init_rom_nomigrate(MemoryRegion *mr,
+                                      struct Object *owner,
+                                      const char *name,
+                                      uint64_t size,
+                                      Error **errp)
 {
     memory_region_init(mr, owner, name, size);
     mr->ram = true;
@@ -1488,13 +1489,13 @@ void memory_region_init_rom(MemoryRegion *mr,
     mr->dirty_log_mask = tcg_enabled() ? (1 << DIRTY_MEMORY_CODE) : 0;
 }
 
-void memory_region_init_rom_device(MemoryRegion *mr,
-                                   Object *owner,
-                                   const MemoryRegionOps *ops,
-                                   void *opaque,
-                                   const char *name,
-                                   uint64_t size,
-                                   Error **errp)
+void memory_region_init_rom_device_nomigrate(MemoryRegion *mr,
+                                             Object *owner,
+                                             const MemoryRegionOps *ops,
+                                             void *opaque,
+                                             const char *name,
+                                             uint64_t size,
+                                             Error **errp)
 {
     assert(ops);
     memory_region_init(mr, owner, name, size);
@@ -2848,6 +2849,81 @@ void mtree_info(fprintf_function mon_printf, void *f, bool flatview)
     }
 }
 
+void memory_region_init_ram(MemoryRegion *mr,
+                            struct Object *owner,
+                            const char *name,
+                            uint64_t size,
+                            Error **errp)
+{
+    DeviceState *owner_dev;
+    Error *err = NULL;
+
+    memory_region_init_ram_nomigrate(mr, owner, name, size, &err);
+    if (err) {
+        error_propagate(errp, err);
+        return;
+    }
+    /* This will assert if owner is neither NULL nor a DeviceState.
+     * We only want the owner here for the purposes of defining a
+     * unique name for migration. TODO: Ideally we should implement
+     * a naming scheme for Objects which are not DeviceStates, in
+     * which case we can relax this restriction.
+     */
+    owner_dev = DEVICE(owner);
+    vmstate_register_ram(mr, owner_dev);
+}
+
+void memory_region_init_rom(MemoryRegion *mr,
+                            struct Object *owner,
+                            const char *name,
+                            uint64_t size,
+                            Error **errp)
+{
+    DeviceState *owner_dev;
+    Error *err = NULL;
+
+    memory_region_init_rom_nomigrate(mr, owner, name, size, &err);
+    if (err) {
+        error_propagate(errp, err);
+        return;
+    }
+    /* This will assert if owner is neither NULL nor a DeviceState.
+     * We only want the owner here for the purposes of defining a
+     * unique name for migration. TODO: Ideally we should implement
+     * a naming scheme for Objects which are not DeviceStates, in
+     * which case we can relax this restriction.
+     */
+    owner_dev = DEVICE(owner);
+    vmstate_register_ram(mr, owner_dev);
+}
+
+void memory_region_init_rom_device(MemoryRegion *mr,
+                                   struct Object *owner,
+                                   const MemoryRegionOps *ops,
+                                   void *opaque,
+                                   const char *name,
+                                   uint64_t size,
+                                   Error **errp)
+{
+    DeviceState *owner_dev;
+    Error *err = NULL;
+
+    memory_region_init_rom_device_nomigrate(mr, owner, ops, opaque,
+                                            name, size, &err);
+    if (err) {
+        error_propagate(errp, err);
+        return;
+    }
+    /* This will assert if owner is neither NULL nor a DeviceState.
+     * We only want the owner here for the purposes of defining a
+     * unique name for migration. TODO: Ideally we should implement
+     * a naming scheme for Objects which are not DeviceStates, in
+     * which case we can relax this restriction.
+     */
+    owner_dev = DEVICE(owner);
+    vmstate_register_ram(mr, owner_dev);
+}
+
 static const TypeInfo memory_region_info = {
     .parent             = TYPE_OBJECT,
     .name               = TYPE_MEMORY_REGION,
diff --git a/numa.c b/numa.c
index b0e75f6268..e32af04cd2 100644
--- a/numa.c
+++ b/numa.c
@@ -542,14 +542,14 @@ static void allocate_system_memory_nonnuma(MemoryRegion *mr, Object *owner,
             /* Legacy behavior: if allocation failed, fall back to
              * regular RAM allocation.
              */
-            memory_region_init_ram(mr, owner, name, ram_size, &error_fatal);
+            memory_region_init_ram_nomigrate(mr, owner, name, ram_size, &error_fatal);
         }
 #else
         fprintf(stderr, "-mem-path not supported on this host\n");
         exit(1);
 #endif
     } else {
-        memory_region_init_ram(mr, owner, name, ram_size, &error_fatal);
+        memory_region_init_ram_nomigrate(mr, owner, name, ram_size, &error_fatal);
     }
     vmstate_register_ram_global(mr);
 }
diff --git a/pc-bios/efi-e1000.rom b/pc-bios/efi-e1000.rom
index 675992428d..4da9de33da 100644
--- a/pc-bios/efi-e1000.rom
+++ b/pc-bios/efi-e1000.rom
diff --git a/pc-bios/efi-e1000e.rom b/pc-bios/efi-e1000e.rom
index 145896c219..c2474a8fab 100644
--- a/pc-bios/efi-e1000e.rom
+++ b/pc-bios/efi-e1000e.rom
diff --git a/pc-bios/efi-eepro100.rom b/pc-bios/efi-eepro100.rom
index ff2793f974..7950faf7cd 100644
--- a/pc-bios/efi-eepro100.rom
+++ b/pc-bios/efi-eepro100.rom
diff --git a/pc-bios/efi-ne2k_pci.rom b/pc-bios/efi-ne2k_pci.rom
index c832ec017e..30edb1392a 100644
--- a/pc-bios/efi-ne2k_pci.rom
+++ b/pc-bios/efi-ne2k_pci.rom
diff --git a/pc-bios/efi-pcnet.rom b/pc-bios/efi-pcnet.rom
index 4d803d30bc..23057c5724 100644
--- a/pc-bios/efi-pcnet.rom
+++ b/pc-bios/efi-pcnet.rom
diff --git a/pc-bios/efi-rtl8139.rom b/pc-bios/efi-rtl8139.rom
index 83488cd54b..beb9301839 100644
--- a/pc-bios/efi-rtl8139.rom
+++ b/pc-bios/efi-rtl8139.rom
diff --git a/pc-bios/efi-virtio.rom b/pc-bios/efi-virtio.rom
index 3563776dbd..f4de5957ec 100644
--- a/pc-bios/efi-virtio.rom
+++ b/pc-bios/efi-virtio.rom
diff --git a/pc-bios/efi-vmxnet3.rom b/pc-bios/efi-vmxnet3.rom
index e22275253b..7501477ea6 100644
--- a/pc-bios/efi-vmxnet3.rom
+++ b/pc-bios/efi-vmxnet3.rom
diff --git a/pc-bios/keymaps/fr-ca b/pc-bios/keymaps/fr-ca
index b645208e42..030f56a78e 100644
--- a/pc-bios/keymaps/fr-ca
+++ b/pc-bios/keymaps/fr-ca
@@ -48,3 +48,5 @@ parenleft 0xa shift
 parenright 0xb shift
 underscore 0xc shift
 plus 0xd shift
+minus 0xc
+equal 0xd
diff --git a/pc-bios/u-boot.e500 b/pc-bios/u-boot.e500
index 6e547de6f9..25537f8fe3 100755
--- a/pc-bios/u-boot.e500
+++ b/pc-bios/u-boot.e500
diff --git a/qemu-options.hx b/qemu-options.hx
index 2cc70b9cfc..9bd6bf0eee 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -1753,7 +1753,7 @@ spec but is traditional QEMU behavior.
 @item key-delay-ms
 
 Set keyboard delay, for key down and key up events, in milliseconds.
-Default is 1.  Keyboards are low-bandwidth devices, so this slowdown
+Default is 10.  Keyboards are low-bandwidth devices, so this slowdown
 can help the device and guest to keep up and not lose events in case
 events are arriving in bulk.  Possible causes for the latter are flaky
 network connections, or scripts for automated testing.
diff --git a/roms/ipxe b/roms/ipxe
-Subproject b991c67c1d91574ef22336cc3a5944d1e63230c
+Subproject 0600d3ae94f93efd10fc6b3c7420a9557a3a167
diff --git a/roms/u-boot b/roms/u-boot
-Subproject 2072e7262965bb48d7fffb1e283101e6ed8b21a
+Subproject d85ca029f257b53a96da6c2fb421e78a003a994
diff --git a/scripts/coccinelle/memory-region-init-ram.cocci b/scripts/coccinelle/memory-region-init-ram.cocci
new file mode 100644
index 0000000000..d290150872
--- /dev/null
+++ b/scripts/coccinelle/memory-region-init-ram.cocci
@@ -0,0 +1,38 @@
+// Replace by-hand memory_region_init_ram_nomigrate/vmstate_register_ram
+// code sequences with use of the new memory_region_init_ram function.
+// Similarly for the _rom and _rom_device functions.
+// We don't try to replace sequences with a non-NULL owner, because
+// there are none in the tree that can be automatically converted
+// (and only a handful that can be manually converted).
+@@
+expression MR;
+expression NAME;
+expression SIZE;
+expression ERRP;
+@@
+-memory_region_init_ram_nomigrate(MR, NULL, NAME, SIZE, ERRP);
++memory_region_init_ram(MR, NULL, NAME, SIZE, ERRP);
+ ...
+-vmstate_register_ram_global(MR);
+@@
+expression MR;
+expression NAME;
+expression SIZE;
+expression ERRP;
+@@
+-memory_region_init_rom_nomigrate(MR, NULL, NAME, SIZE, ERRP);
++memory_region_init_rom(MR, NULL, NAME, SIZE, ERRP);
+ ...
+-vmstate_register_ram_global(MR);
+@@
+expression MR;
+expression OPS;
+expression OPAQUE;
+expression NAME;
+expression SIZE;
+expression ERRP;
+@@
+-memory_region_init_rom_device_nomigrate(MR, NULL, OPS, OPAQUE, NAME, SIZE, ERRP);
++memory_region_init_rom_device(MR, NULL, OPS, OPAQUE, NAME, SIZE, ERRP);
+ ...
+-vmstate_register_ram_global(MR);
diff --git a/slirp/ip6.h b/slirp/ip6.h
index 0908855f0f..b1bea43b3c 100644
--- a/slirp/ip6.h
+++ b/slirp/ip6.h
@@ -57,9 +57,9 @@ static inline bool in6_equal_mach(const struct in6_addr *a,
                                   const struct in6_addr *b,
                                   int prefix_len)
 {
-    if (memcmp(&(a->s6_addr[(prefix_len + 7) / 8]),
-               &(b->s6_addr[(prefix_len + 7) / 8]),
-               16 - (prefix_len + 7) / 8) != 0) {
+    if (memcmp(&(a->s6_addr[DIV_ROUND_UP(prefix_len, 8)]),
+               &(b->s6_addr[DIV_ROUND_UP(prefix_len, 8)]),
+               16 - DIV_ROUND_UP(prefix_len, 8)) != 0) {
         return 0;
     }
 
diff --git a/slirp/misc.c b/slirp/misc.c
index 88e9d94197..260187b6b6 100644
--- a/slirp/misc.c
+++ b/slirp/misc.c
@@ -112,7 +112,9 @@ fork_exec(struct socket *so, const char *ex, int do_pty)
 		    bind(s, (struct sockaddr *)&addr, addrlen) < 0 ||
 		    listen(s, 1) < 0) {
 			error_report("Error: inet socket: %s", strerror(errno));
-			closesocket(s);
+			if (s >= 0) {
+			    closesocket(s);
+			}
 
 			return 0;
 		}
diff --git a/slirp/sbuf.c b/slirp/sbuf.c
index 10119d3ad5..912f235f65 100644
--- a/slirp/sbuf.c
+++ b/slirp/sbuf.c
@@ -91,7 +91,7 @@ sbappend(struct socket *so, struct mbuf *m)
 	if (so->so_urgc) {
 		sbappendsb(&so->so_rcv, m);
 		m_free(m);
-		sosendoob(so);
+		(void)sosendoob(so);
 		return;
 	}
 
diff --git a/slirp/socket.c b/slirp/socket.c
index 3b49a69a93..ecec0295a9 100644
--- a/slirp/socket.c
+++ b/slirp/socket.c
@@ -345,33 +345,40 @@ sosendoob(struct socket *so)
 	if (sb->sb_rptr < sb->sb_wptr) {
 		/* We can send it directly */
 		n = slirp_send(so, sb->sb_rptr, so->so_urgc, (MSG_OOB)); /* |MSG_DONTWAIT)); */
-		so->so_urgc -= n;
-
-		DEBUG_MISC((dfd, " --- sent %d bytes urgent data, %d urgent bytes left\n", n, so->so_urgc));
 	} else {
 		/*
 		 * Since there's no sendv or sendtov like writev,
 		 * we must copy all data to a linear buffer then
 		 * send it all
 		 */
+		uint32_t urgc = so->so_urgc;
 		len = (sb->sb_data + sb->sb_datalen) - sb->sb_rptr;
-		if (len > so->so_urgc) len = so->so_urgc;
+		if (len > urgc) {
+			len = urgc;
+		}
 		memcpy(buff, sb->sb_rptr, len);
-		so->so_urgc -= len;
-		if (so->so_urgc) {
+		urgc -= len;
+		if (urgc) {
 			n = sb->sb_wptr - sb->sb_data;
-			if (n > so->so_urgc) n = so->so_urgc;
+			if (n > urgc) {
+				n = urgc;
+			}
 			memcpy((buff + len), sb->sb_data, n);
-			so->so_urgc -= n;
 			len += n;
 		}
 		n = slirp_send(so, buff, len, (MSG_OOB)); /* |MSG_DONTWAIT)); */
+	}
+
 #ifdef DEBUG
-		if (n != len)
-		   DEBUG_ERROR((dfd, "Didn't send all data urgently XXXXX\n"));
+	if (n != len) {
+		DEBUG_ERROR((dfd, "Didn't send all data urgently XXXXX\n"));
+	}
 #endif
-		DEBUG_MISC((dfd, " ---2 sent %d bytes urgent data, %d urgent bytes left\n", n, so->so_urgc));
+	if (n < 0) {
+		return n;
 	}
+	so->so_urgc -= n;
+	DEBUG_MISC((dfd, " ---2 sent %d bytes urgent data, %d urgent bytes left\n", n, so->so_urgc));
 
 	sb->sb_cc -= n;
 	sb->sb_rptr += n;
@@ -397,7 +404,15 @@ sowrite(struct socket *so)
 	DEBUG_ARG("so = %p", so);
 
 	if (so->so_urgc) {
-		sosendoob(so);
+		uint32_t expected = so->so_urgc;
+		if (sosendoob(so) < expected) {
+			/* Treat a short write as a fatal error too,
+			 * rather than continuing on and sending the urgent
+			 * data as if it were non-urgent and leaving the
+			 * so_urgc count wrong.
+			 */
+			goto err_disconnected;
+		}
 		if (sb->sb_cc == 0)
 			return 0;
 	}
@@ -441,11 +456,7 @@ sowrite(struct socket *so)
 		return 0;
 
 	if (nn <= 0) {
-		DEBUG_MISC((dfd, " --- sowrite disconnected, so->so_state = %x, errno = %d\n",
-			so->so_state, errno));
-		sofcantsendmore(so);
-		tcp_sockclosed(sototcpcb(so));
-		return -1;
+		goto err_disconnected;
 	}
 
 #ifndef HAVE_READV
@@ -472,6 +483,13 @@ sowrite(struct socket *so)
 		sofcantsendmore(so);
 
 	return nn;
+
+err_disconnected:
+	DEBUG_MISC((dfd, " --- sowrite disconnected, so->so_state = %x, errno = %d\n",
+		    so->so_state, errno));
+	sofcantsendmore(so);
+	tcp_sockclosed(sototcpcb(so));
+	return -1;
 }
 
 /*
diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
index f7a7ea5858..85713795de 100644
--- a/target/ppc/kvm.c
+++ b/target/ppc/kvm.c
@@ -22,6 +22,7 @@
 #include <linux/kvm.h>
 
 #include "qemu-common.h"
+#include "qapi/error.h"
 #include "qemu/error-report.h"
 #include "cpu.h"
 #include "cpu-models.h"
@@ -88,6 +89,7 @@ static int cap_fixup_hcalls;
 static int cap_htm;             /* Hardware transactional memory support */
 static int cap_mmu_radix;
 static int cap_mmu_hash_v3;
+static int cap_resize_hpt;
 
 static uint32_t debug_inst_opcode;
 
@@ -144,6 +146,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s)
     cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM);
     cap_mmu_radix = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_RADIX);
     cap_mmu_hash_v3 = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_HASH_V3);
+    cap_resize_hpt = kvm_vm_check_extension(s, KVM_CAP_SPAPR_RESIZE_HPT);
 
     if (!cap_interrupt_level) {
         fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
@@ -2709,3 +2712,76 @@ int kvmppc_enable_hwrng(void)
 
     return kvmppc_enable_hcall(kvm_state, H_RANDOM);
 }
+
+void kvmppc_check_papr_resize_hpt(Error **errp)
+{
+    if (!kvm_enabled()) {
+        return; /* No KVM, we're good */
+    }
+
+    if (cap_resize_hpt) {
+        return; /* Kernel has explicit support, we're good */
+    }
+
+    /* Otherwise fallback on looking for PR KVM */
+    if (kvmppc_is_pr(kvm_state)) {
+        return;
+    }
+
+    error_setg(errp,
+               "Hash page table resizing not available with this KVM version");
+}
+
+int kvmppc_resize_hpt_prepare(PowerPCCPU *cpu, target_ulong flags, int shift)
+{
+    CPUState *cs = CPU(cpu);
+    struct kvm_ppc_resize_hpt rhpt = {
+        .flags = flags,
+        .shift = shift,
+    };
+
+    if (!cap_resize_hpt) {
+        return -ENOSYS;
+    }
+
+    return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_PREPARE, &rhpt);
+}
+
+int kvmppc_resize_hpt_commit(PowerPCCPU *cpu, target_ulong flags, int shift)
+{
+    CPUState *cs = CPU(cpu);
+    struct kvm_ppc_resize_hpt rhpt = {
+        .flags = flags,
+        .shift = shift,
+    };
+
+    if (!cap_resize_hpt) {
+        return -ENOSYS;
+    }
+
+    return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_COMMIT, &rhpt);
+}
+
+static void kvmppc_pivot_hpt_cpu(CPUState *cs, run_on_cpu_data arg)
+{
+    target_ulong sdr1 = arg.target_ptr;
+    PowerPCCPU *cpu = POWERPC_CPU(cs);
+    CPUPPCState *env = &cpu->env;
+
+    /* This is just for the benefit of PR KVM */
+    cpu_synchronize_state(cs);
+    env->spr[SPR_SDR1] = sdr1;
+    if (kvmppc_put_books_sregs(cpu) < 0) {
+        error_report("Unable to update SDR1 in KVM");
+        exit(1);
+    }
+}
+
+void kvmppc_update_sdr1(target_ulong sdr1)
+{
+    CPUState *cs;
+
+    CPU_FOREACH(cs) {
+        run_on_cpu(cs, kvmppc_pivot_hpt_cpu, RUN_ON_CPU_TARGET_PTR(sdr1));
+    }
+}
diff --git a/target/ppc/kvm_ppc.h b/target/ppc/kvm_ppc.h
index eab7c8fdb3..6bc6fb3e2d 100644
--- a/target/ppc/kvm_ppc.h
+++ b/target/ppc/kvm_ppc.h
@@ -63,6 +63,10 @@ bool kvmppc_has_cap_mmu_hash_v3(void);
 int kvmppc_enable_hwrng(void);
 int kvmppc_put_books_sregs(PowerPCCPU *cpu);
 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void);
+void kvmppc_check_papr_resize_hpt(Error **errp);
+int kvmppc_resize_hpt_prepare(PowerPCCPU *cpu, target_ulong flags, int shift);
+int kvmppc_resize_hpt_commit(PowerPCCPU *cpu, target_ulong flags, int shift);
+void kvmppc_update_sdr1(target_ulong sdr1);
 
 bool kvmppc_is_mem_backend_page_size_ok(const char *obj_path);
 
@@ -297,6 +301,28 @@ static inline PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
     return NULL;
 }
 
+static inline void kvmppc_check_papr_resize_hpt(Error **errp)
+{
+    return;
+}
+
+static inline int kvmppc_resize_hpt_prepare(PowerPCCPU *cpu,
+                                            target_ulong flags, int shift)
+{
+    return -ENOSYS;
+}
+
+static inline int kvmppc_resize_hpt_commit(PowerPCCPU *cpu,
+                                           target_ulong flags, int shift)
+{
+    return -ENOSYS;
+}
+
+static inline void kvmppc_update_sdr1(target_ulong sdr1)
+{
+    abort();
+}
+
 #endif
 
 #ifndef CONFIG_KVM
diff --git a/target/ppc/mmu-hash64.h b/target/ppc/mmu-hash64.h
index 54f1e37655..d297b97d37 100644
--- a/target/ppc/mmu-hash64.h
+++ b/target/ppc/mmu-hash64.h
@@ -63,11 +63,15 @@ void ppc_hash64_update_rmls(CPUPPCState *env);
 #define HASH_PTE_SIZE_64        16
 #define HASH_PTEG_SIZE_64       (HASH_PTE_SIZE_64 * HPTES_PER_GROUP)
 
+#define HPTE64_V_SSIZE          SLB_VSID_B
+#define HPTE64_V_SSIZE_256M     SLB_VSID_B_256M
+#define HPTE64_V_SSIZE_1T       SLB_VSID_B_1T
 #define HPTE64_V_SSIZE_SHIFT    62
 #define HPTE64_V_AVPN_SHIFT     7
 #define HPTE64_V_AVPN           0x3fffffffffffff80ULL
 #define HPTE64_V_AVPN_VAL(x)    (((x) & HPTE64_V_AVPN) >> HPTE64_V_AVPN_SHIFT)
 #define HPTE64_V_COMPARE(x, y)  (!(((x) ^ (y)) & 0xffffffffffffff83ULL))
+#define HPTE64_V_BOLTED         0x0000000000000010ULL
 #define HPTE64_V_LARGE          0x0000000000000004ULL
 #define HPTE64_V_SECONDARY      0x0000000000000002ULL
 #define HPTE64_V_VALID          0x0000000000000001ULL
diff --git a/target/ppc/translate_init.c b/target/ppc/translate_init.c
index ae25fafab9..b325c2cce6 100644
--- a/target/ppc/translate_init.c
+++ b/target/ppc/translate_init.c
@@ -9011,8 +9011,16 @@ void cpu_ppc_set_papr(PowerPCCPU *cpu, PPCVirtualHypervisor *vhyp)
         /* By default we choose legacy mode and switch to new hash or radix
          * when a register process table hcall is made. So disable process
          * tables and guest translation shootdown by default
+         *
+         * Hot-plugged CPUs inherit from the guest radix setting under
+         * KVM but not under TCG. Update the default LPCR to keep new
+         * CPUs in sync when radix is enabled.
          */
-        lpcr->default_value &= ~(LPCR_UPRT | LPCR_GTSE);
+        if (ppc64_radix_guest(cpu)) {
+            lpcr->default_value |= LPCR_UPRT | LPCR_GTSE;
+        } else {
+            lpcr->default_value &= ~(LPCR_UPRT | LPCR_GTSE);
+        }
         lpcr->default_value |= LPCR_PDEE | LPCR_HDEE | LPCR_EEE | LPCR_DEE |
                                LPCR_OEE;
         break;
diff --git a/tests/docker/Makefile.include b/tests/docker/Makefile.include
index 037cb9e9e7..012a2fc1af 100644
--- a/tests/docker/Makefile.include
+++ b/tests/docker/Makefile.include
@@ -106,6 +106,8 @@ docker:
 	@echo '                         (default is 1)'
 	@echo '    DEBUG=1              Stop and drop to shell in the created container'
 	@echo '                         before running the command.'
+	@echo '    NETWORK=1            Enable virtual network interface with default backend.'
+	@echo '    NETWORK=$BACKEND     Enable virtual network interface with $BACKEND.'
 	@echo '    NOUSER               Define to disable adding current user to containers passwd.'
 	@echo '    NOCACHE=1            Ignore cache when build images.'
 	@echo '    EXECUTABLE=<path>    Include executable in image.'
@@ -132,7 +134,8 @@ docker-run: docker-qemu-src
 		$(SRC_PATH)/tests/docker/docker.py run 			\
 			$(if $(NOUSER),,-u $(shell id -u)) -t 		\
 			$(if $V,,--rm) 					\
-			$(if $(DEBUG),-i,--net=none) 			\
+			$(if $(DEBUG),-i,)				\
+			$(if $(NETWORK),$(if $(subst $(NETWORK),,1),--net=$(NETWORK)),--net=none) \
 			-e TARGET_LIST=$(TARGET_LIST) 			\
 			-e EXTRA_CONFIGURE_OPTS="$(EXTRA_CONFIGURE_OPTS)" \
 			-e V=$V -e J=$J -e DEBUG=$(DEBUG)		\
diff --git a/tests/docker/docker.py b/tests/docker/docker.py
index e707e5bcca..ee40ca04d9 100755
--- a/tests/docker/docker.py
+++ b/tests/docker/docker.py
@@ -112,13 +112,16 @@ class Docker(object):
         signal.signal(signal.SIGTERM, self._kill_instances)
         signal.signal(signal.SIGHUP, self._kill_instances)
 
-    def _do(self, cmd, quiet=True, infile=None, **kwargs):
+    def _do(self, cmd, quiet=True, **kwargs):
         if quiet:
             kwargs["stdout"] = DEVNULL
-        if infile:
-            kwargs["stdin"] = infile
         return subprocess.call(self._command + cmd, **kwargs)
 
+    def _do_check(self, cmd, quiet=True, **kwargs):
+        if quiet:
+            kwargs["stdout"] = DEVNULL
+        return subprocess.check_call(self._command + cmd, **kwargs)
+
     def _do_kill_instances(self, only_known, only_active=True):
         cmd = ["ps", "-q"]
         if not only_active:
@@ -177,14 +180,14 @@ class Docker(object):
                                     extra_files_cksum)))
         tmp_df.flush()
 
-        self._do(["build", "-t", tag, "-f", tmp_df.name] + argv + \
-                 [docker_dir],
-                 quiet=quiet)
+        self._do_check(["build", "-t", tag, "-f", tmp_df.name] + argv + \
+                       [docker_dir],
+                       quiet=quiet)
 
     def update_image(self, tag, tarball, quiet=True):
         "Update a tagged image using "
 
-        self._do(["build", "-t", tag, "-"], quiet=quiet, infile=tarball)
+        self._do_check(["build", "-t", tag, "-"], quiet=quiet, stdin=tarball)
 
     def image_matches_dockerfile(self, tag, dockerfile):
         try:
@@ -197,9 +200,9 @@ class Docker(object):
         label = uuid.uuid1().hex
         if not keep:
             self._instances.append(label)
-        ret = self._do(["run", "--label",
-                        "com.qemu.instance.uuid=" + label] + cmd,
-                       quiet=quiet)
+        ret = self._do_check(["run", "--label",
+                             "com.qemu.instance.uuid=" + label] + cmd,
+                             quiet=quiet)
         if not keep:
             self._instances.remove(label)
         return ret
diff --git a/ui/vnc.c b/ui/vnc.c
index 26136f5d29..eb91559b6b 100644
--- a/ui/vnc.c
+++ b/ui/vnc.c
@@ -3808,7 +3808,7 @@ void vnc_display_open(const char *id, Error **errp)
     }
 
     lock_key_sync = qemu_opt_get_bool(opts, "lock-key-sync", true);
-    key_delay_ms = qemu_opt_get_number(opts, "key-delay-ms", 1);
+    key_delay_ms = qemu_opt_get_number(opts, "key-delay-ms", 10);
     sasl = qemu_opt_get_bool(opts, "sasl", false);
 #ifndef CONFIG_VNC_SASL
     if (sasl) {
diff --git a/util/qemu-coroutine-lock.c b/util/qemu-coroutine-lock.c
index b44b5d55eb..846ff9167f 100644
--- a/util/qemu-coroutine-lock.c
+++ b/util/qemu-coroutine-lock.c
@@ -402,6 +402,21 @@ void qemu_co_rwlock_unlock(CoRwlock *lock)
     qemu_co_mutex_unlock(&lock->mutex);
 }
 
+void qemu_co_rwlock_downgrade(CoRwlock *lock)
+{
+    Coroutine *self = qemu_coroutine_self();
+
+    /* lock->mutex critical section started in qemu_co_rwlock_wrlock or
+     * qemu_co_rwlock_upgrade.
+     */
+    assert(lock->reader == 0);
+    lock->reader++;
+    qemu_co_mutex_unlock(&lock->mutex);
+
+    /* The rest of the read-side critical section is run without the mutex.  */
+    self->locks_held++;
+}
+
 void qemu_co_rwlock_wrlock(CoRwlock *lock)
 {
     qemu_co_mutex_lock(&lock->mutex);
@@ -416,3 +431,23 @@ void qemu_co_rwlock_wrlock(CoRwlock *lock)
      * There is no need to update self->locks_held.
      */
 }
+
+void qemu_co_rwlock_upgrade(CoRwlock *lock)
+{
+    Coroutine *self = qemu_coroutine_self();
+
+    qemu_co_mutex_lock(&lock->mutex);
+    assert(lock->reader > 0);
+    lock->reader--;
+    lock->pending_writer++;
+    while (lock->reader) {
+        qemu_co_queue_wait(&lock->queue, &lock->mutex);
+    }
+    lock->pending_writer--;
+
+    /* The rest of the write-side critical section is run with
+     * the mutex taken, similar to qemu_co_rwlock_wrlock.  Do
+     * not account for the lock twice in self->locks_held.
+     */
+    self->locks_held--;
+}