From 924fe1293c3e7a3c787bbdfb351e7f168caee3e9 Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@redhat.com>
Date: Tue, 3 Jun 2014 11:21:01 +0200
Subject: aio: fix qemu_bh_schedule() bh->ctx race condition

qemu_bh_schedule() is supposed to be thread-safe at least the first time
it is called.  Unfortunately this is not quite true:

  bh->scheduled = 1;
  aio_notify(bh->ctx);

Since another thread may run the BH callback once it has been scheduled,
there is a race condition if the callback frees the BH before
aio_notify(bh->ctx) has a chance to run.

Reported-by: Stefan Priebe <s.priebe@profihost.ag>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Tested-by: Stefan Priebe <s.priebe@profihost.ag>
---
 async.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/async.c b/async.c
index 6930185e64..5b6fe6b4cc 100644
--- a/async.c
+++ b/async.c
@@ -117,15 +117,21 @@ void qemu_bh_schedule_idle(QEMUBH *bh)
 
 void qemu_bh_schedule(QEMUBH *bh)
 {
+    AioContext *ctx;
+
     if (bh->scheduled)
         return;
+    ctx = bh->ctx;
     bh->idle = 0;
-    /* Make sure that idle & any writes needed by the callback are done
-     * before the locations are read in the aio_bh_poll.
+    /* Make sure that:
+     * 1. idle & any writes needed by the callback are done before the
+     *    locations are read in the aio_bh_poll.
+     * 2. ctx is loaded before scheduled is set and the callback has a chance
+     *    to execute.
      */
-    smp_wmb();
+    smp_mb();
     bh->scheduled = 1;
-    aio_notify(bh->ctx);
+    aio_notify(ctx);
 }
 
 
-- 
cgit v1.2.1


From 2572b37a4751cc967582d7d04f21d9bf97187ae5 Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@redhat.com>
Date: Thu, 8 May 2014 16:34:34 +0200
Subject: block: use BlockDriverState AioContext

Drop the assumption that we're using the main AioContext.  Convert
qemu_aio_wait() to aio_poll() and qemu_bh_new() to aio_bh_new() so the
BlockDriverState AioContext is used.

Note there is still one qemu_aio_wait() left in bdrv_create() but we do
not have a BlockDriverState there and only main loop code invokes this
function.

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
 block.c | 27 ++++++++++++++++++---------
 1 file changed, 18 insertions(+), 9 deletions(-)

diff --git a/block.c b/block.c
index 310ea89fce..05ec8e13e4 100644
--- a/block.c
+++ b/block.c
@@ -2775,10 +2775,12 @@ static int bdrv_prwv_co(BlockDriverState *bs, int64_t offset,
         /* Fast-path if already in coroutine context */
         bdrv_rw_co_entry(&rwco);
     } else {
+        AioContext *aio_context = bdrv_get_aio_context(bs);
+
         co = qemu_coroutine_create(bdrv_rw_co_entry);
         qemu_coroutine_enter(co, &rwco);
         while (rwco.ret == NOT_DONE) {
-            qemu_aio_wait();
+            aio_poll(aio_context, true);
         }
     }
     return rwco.ret;
@@ -4025,10 +4027,12 @@ int64_t bdrv_get_block_status(BlockDriverState *bs, int64_t sector_num,
         /* Fast-path if already in coroutine context */
         bdrv_get_block_status_co_entry(&data);
     } else {
+        AioContext *aio_context = bdrv_get_aio_context(bs);
+
         co = qemu_coroutine_create(bdrv_get_block_status_co_entry);
         qemu_coroutine_enter(co, &data);
         while (!data.done) {
-            qemu_aio_wait();
+            aio_poll(aio_context, true);
         }
     }
     return data.ret;
@@ -4621,7 +4625,7 @@ static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
     acb->is_write = is_write;
     acb->qiov = qiov;
     acb->bounce = qemu_blockalign(bs, qiov->size);
-    acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
+    acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_aio_bh_cb, acb);
 
     if (is_write) {
         qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size);
@@ -4660,13 +4664,14 @@ typedef struct BlockDriverAIOCBCoroutine {
 
 static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb)
 {
+    AioContext *aio_context = bdrv_get_aio_context(blockacb->bs);
     BlockDriverAIOCBCoroutine *acb =
         container_of(blockacb, BlockDriverAIOCBCoroutine, common);
     bool done = false;
 
     acb->done = &done;
     while (!done) {
-        qemu_aio_wait();
+        aio_poll(aio_context, true);
     }
 }
 
@@ -4703,7 +4708,7 @@ static void coroutine_fn bdrv_co_do_rw(void *opaque)
             acb->req.nb_sectors, acb->req.qiov, acb->req.flags);
     }
 
-    acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
+    acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_co_em_bh, acb);
     qemu_bh_schedule(acb->bh);
 }
 
@@ -4739,7 +4744,7 @@ static void coroutine_fn bdrv_aio_flush_co_entry(void *opaque)
     BlockDriverState *bs = acb->common.bs;
 
     acb->req.error = bdrv_co_flush(bs);
-    acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
+    acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_co_em_bh, acb);
     qemu_bh_schedule(acb->bh);
 }
 
@@ -4766,7 +4771,7 @@ static void coroutine_fn bdrv_aio_discard_co_entry(void *opaque)
     BlockDriverState *bs = acb->common.bs;
 
     acb->req.error = bdrv_co_discard(bs, acb->req.sector, acb->req.nb_sectors);
-    acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
+    acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_co_em_bh, acb);
     qemu_bh_schedule(acb->bh);
 }
 
@@ -5006,10 +5011,12 @@ int bdrv_flush(BlockDriverState *bs)
         /* Fast-path if already in coroutine context */
         bdrv_flush_co_entry(&rwco);
     } else {
+        AioContext *aio_context = bdrv_get_aio_context(bs);
+
         co = qemu_coroutine_create(bdrv_flush_co_entry);
         qemu_coroutine_enter(co, &rwco);
         while (rwco.ret == NOT_DONE) {
-            qemu_aio_wait();
+            aio_poll(aio_context, true);
         }
     }
 
@@ -5119,10 +5126,12 @@ int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
         /* Fast-path if already in coroutine context */
         bdrv_discard_co_entry(&rwco);
     } else {
+        AioContext *aio_context = bdrv_get_aio_context(bs);
+
         co = qemu_coroutine_create(bdrv_discard_co_entry);
         qemu_coroutine_enter(co, &rwco);
         while (rwco.ret == NOT_DONE) {
-            qemu_aio_wait();
+            aio_poll(aio_context, true);
         }
     }
 
-- 
cgit v1.2.1


From ed78cda3de92056737364ab3cb748b16f5f17dea Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@redhat.com>
Date: Thu, 8 May 2014 16:34:35 +0200
Subject: block: acquire AioContext in bdrv_*_all()

bdrv_close_all(), bdrv_commit_all(), bdrv_flush_all(),
bdrv_invalidate_cache_all(), and bdrv_clear_incoming_migration_all() are
called by main loop code and touch all BlockDriverState instances.

Some BlockDriverState instances may be running in another AioContext.
Make sure to acquire the AioContext before closing the BlockDriverState.

This will protect against race conditions once virtio-blk data-plane is
using the BlockDriverState from another AioContext event loop.

Note that this patch does not convert bdrv_drain_all() yet since that
conversion is non-trivial.

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
 block.c | 24 +++++++++++++++++++++++-
 1 file changed, 23 insertions(+), 1 deletion(-)

diff --git a/block.c b/block.c
index 05ec8e13e4..7cccb84dab 100644
--- a/block.c
+++ b/block.c
@@ -1856,7 +1856,11 @@ void bdrv_close_all(void)
     BlockDriverState *bs;
 
     QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
+        AioContext *aio_context = bdrv_get_aio_context(bs);
+
+        aio_context_acquire(aio_context);
         bdrv_close(bs);
+        aio_context_release(aio_context);
     }
 }
 
@@ -2352,12 +2356,17 @@ int bdrv_commit_all(void)
     BlockDriverState *bs;
 
     QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
+        AioContext *aio_context = bdrv_get_aio_context(bs);
+
+        aio_context_acquire(aio_context);
         if (bs->drv && bs->backing_hd) {
             int ret = bdrv_commit(bs);
             if (ret < 0) {
+                aio_context_release(aio_context);
                 return ret;
             }
         }
+        aio_context_release(aio_context);
     }
     return 0;
 }
@@ -3833,10 +3842,15 @@ int bdrv_flush_all(void)
     int result = 0;
 
     QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
-        int ret = bdrv_flush(bs);
+        AioContext *aio_context = bdrv_get_aio_context(bs);
+        int ret;
+
+        aio_context_acquire(aio_context);
+        ret = bdrv_flush(bs);
         if (ret < 0 && !result) {
             result = ret;
         }
+        aio_context_release(aio_context);
     }
 
     return result;
@@ -4982,7 +4996,11 @@ void bdrv_invalidate_cache_all(Error **errp)
     Error *local_err = NULL;
 
     QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
+        AioContext *aio_context = bdrv_get_aio_context(bs);
+
+        aio_context_acquire(aio_context);
         bdrv_invalidate_cache(bs, &local_err);
+        aio_context_release(aio_context);
         if (local_err) {
             error_propagate(errp, local_err);
             return;
@@ -4995,7 +5013,11 @@ void bdrv_clear_incoming_migration_all(void)
     BlockDriverState *bs;
 
     QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
+        AioContext *aio_context = bdrv_get_aio_context(bs);
+
+        aio_context_acquire(aio_context);
         bs->open_flags = bs->open_flags & ~(BDRV_O_INCOMING);
+        aio_context_release(aio_context);
     }
 }
 
-- 
cgit v1.2.1


From 9b536adcbefb72090f43c9715ce042e37e47af73 Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@redhat.com>
Date: Thu, 8 May 2014 16:34:36 +0200
Subject: block: acquire AioContext in bdrv_drain_all()

Modify bdrv_drain_all() to take into account that BlockDriverState
instances may be running in different AioContexts.

This patch changes the implementation of bdrv_drain_all() while
preserving the semantics.  Previously kicking throttled requests and
checking for pending requests were done across all BlockDriverState
instances in sequence.  Now we process each BlockDriverState in turn,
making sure to acquire and release its AioContext.

This prevents race conditions between the thread executing
bdrv_drain_all() and the thread running the AioContext.

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
 block.c | 25 +++++++++++--------------
 1 file changed, 11 insertions(+), 14 deletions(-)

diff --git a/block.c b/block.c
index 7cccb84dab..87da79fbe8 100644
--- a/block.c
+++ b/block.c
@@ -1885,17 +1885,6 @@ static bool bdrv_requests_pending(BlockDriverState *bs)
     return false;
 }
 
-static bool bdrv_requests_pending_all(void)
-{
-    BlockDriverState *bs;
-    QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
-        if (bdrv_requests_pending(bs)) {
-            return true;
-        }
-    }
-    return false;
-}
-
 /*
  * Wait for pending requests to complete across all BlockDriverStates
  *
@@ -1915,12 +1904,20 @@ void bdrv_drain_all(void)
     BlockDriverState *bs;
 
     while (busy) {
+        busy = false;
+
         QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
+            AioContext *aio_context = bdrv_get_aio_context(bs);
+            bool bs_busy;
+
+            aio_context_acquire(aio_context);
             bdrv_start_throttled_reqs(bs);
-        }
+            bs_busy = bdrv_requests_pending(bs);
+            bs_busy |= aio_poll(aio_context, bs_busy);
+            aio_context_release(aio_context);
 
-        busy = bdrv_requests_pending_all();
-        busy |= aio_poll(qemu_get_aio_context(), busy);
+            busy |= bs_busy;
+        }
     }
 }
 
-- 
cgit v1.2.1


From dcd042282d855edf70df90b7d61d33b515320b7a Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@redhat.com>
Date: Thu, 8 May 2014 16:34:37 +0200
Subject: block: add bdrv_set_aio_context()

Up until now all BlockDriverState instances have used the QEMU main loop
for fd handlers, timers, and BHs.  This is not scalable on SMP guests
and hosts so we need to move to a model with multiple event loops on
different host CPUs.

bdrv_set_aio_context() assigns the AioContext event loop to use for a
particular BlockDriverState.  It first detaches the entire
BlockDriverState graph from the current AioContext and then attaches to
the new AioContext.

This function will be used by virtio-blk data-plane to assign a
BlockDriverState to its IOThread AioContext.  Make
bdrv_aio_set_context() public since data-plane should not include
block_int.h.

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
 block.c                   | 57 +++++++++++++++++++++++++++++++++++++++++++++--
 include/block/block.h     | 11 +++++++++
 include/block/block_int.h | 36 ++++++++++++++++++++++++++++++
 3 files changed, 102 insertions(+), 2 deletions(-)

diff --git a/block.c b/block.c
index 87da79fbe8..1fd38159e2 100644
--- a/block.c
+++ b/block.c
@@ -363,6 +363,7 @@ BlockDriverState *bdrv_new(const char *device_name, Error **errp)
     qemu_co_queue_init(&bs->throttled_reqs[0]);
     qemu_co_queue_init(&bs->throttled_reqs[1]);
     bs->refcnt = 1;
+    bs->aio_context = qemu_get_aio_context();
 
     return bs;
 }
@@ -5661,8 +5662,60 @@ out:
 
 AioContext *bdrv_get_aio_context(BlockDriverState *bs)
 {
-    /* Currently BlockDriverState always uses the main loop AioContext */
-    return qemu_get_aio_context();
+    return bs->aio_context;
+}
+
+void bdrv_detach_aio_context(BlockDriverState *bs)
+{
+    if (!bs->drv) {
+        return;
+    }
+
+    if (bs->drv->bdrv_detach_aio_context) {
+        bs->drv->bdrv_detach_aio_context(bs);
+    }
+    if (bs->file) {
+        bdrv_detach_aio_context(bs->file);
+    }
+    if (bs->backing_hd) {
+        bdrv_detach_aio_context(bs->backing_hd);
+    }
+
+    bs->aio_context = NULL;
+}
+
+void bdrv_attach_aio_context(BlockDriverState *bs,
+                             AioContext *new_context)
+{
+    if (!bs->drv) {
+        return;
+    }
+
+    bs->aio_context = new_context;
+
+    if (bs->backing_hd) {
+        bdrv_attach_aio_context(bs->backing_hd, new_context);
+    }
+    if (bs->file) {
+        bdrv_attach_aio_context(bs->file, new_context);
+    }
+    if (bs->drv->bdrv_attach_aio_context) {
+        bs->drv->bdrv_attach_aio_context(bs, new_context);
+    }
+}
+
+void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context)
+{
+    bdrv_drain_all(); /* ensure there are no in-flight requests */
+
+    bdrv_detach_aio_context(bs);
+
+    /* This function executes in the old AioContext so acquire the new one in
+     * case it runs in a different thread.
+     */
+    aio_context_acquire(new_context);
+    bdrv_attach_aio_context(bs, new_context);
+    aio_context_release(new_context);
 }
 
 void bdrv_add_before_write_notifier(BlockDriverState *bs,
diff --git a/include/block/block.h b/include/block/block.h
index faee3aa246..292754f9fe 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -574,4 +574,15 @@ int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag);
 int bdrv_debug_resume(BlockDriverState *bs, const char *tag);
 bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag);
 
+/**
+ * bdrv_set_aio_context:
+ *
+ * Changes the #AioContext used for fd handlers, timers, and BHs by this
+ * BlockDriverState and all its children.
+ *
+ * This function must be called from the old #AioContext or with a lock held so
+ * the old #AioContext is not executing.
+ */
+void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context);
+
 #endif
diff --git a/include/block/block_int.h b/include/block/block_int.h
index f2e753f632..93ec86f3f7 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -247,6 +247,19 @@ struct BlockDriver {
      */
     int (*bdrv_has_zero_init)(BlockDriverState *bs);
 
+    /* Remove fd handlers, timers, and other event loop callbacks so the event
+     * loop is no longer in use.  Called with no in-flight requests and in
+     * depth-first traversal order with parents before child nodes.
+     */
+    void (*bdrv_detach_aio_context)(BlockDriverState *bs);
+
+    /* Add fd handlers, timers, and other event loop callbacks so I/O requests
+     * can be processed again.  Called with no in-flight requests and in
+     * depth-first traversal order with child nodes before parent nodes.
+     */
+    void (*bdrv_attach_aio_context)(BlockDriverState *bs,
+                                    AioContext *new_context);
+
     QLIST_ENTRY(BlockDriver) list;
 };
 
@@ -297,6 +310,8 @@ struct BlockDriverState {
     const BlockDevOps *dev_ops;
     void *dev_opaque;
 
+    AioContext *aio_context; /* event loop used for fd handlers, timers, etc */
+
     char filename[1024];
     char backing_file[1024]; /* if non zero, the image is a diff of
                                 this file image */
@@ -396,6 +411,27 @@ void bdrv_add_before_write_notifier(BlockDriverState *bs,
  */
 AioContext *bdrv_get_aio_context(BlockDriverState *bs);
 
+/**
+ * bdrv_detach_aio_context:
+ *
+ * May be called from .bdrv_detach_aio_context() to detach children from the
+ * current #AioContext.  This is only needed by block drivers that manage their
+ * own children.  Both ->file and ->backing_hd are automatically handled and
+ * block drivers should not call this function on them explicitly.
+ */
+void bdrv_detach_aio_context(BlockDriverState *bs);
+
+/**
+ * bdrv_attach_aio_context:
+ *
+ * May be called from .bdrv_attach_aio_context() to attach children to the new
+ * #AioContext.  This is only needed by block drivers that manage their own
+ * children.  Both ->file and ->backing_hd are automatically handled and block
+ * drivers should not call this function on them explicitly.
+ */
+void bdrv_attach_aio_context(BlockDriverState *bs,
+                             AioContext *new_context);
+
 #ifdef _WIN32
 int is_windows_drive(const char *filename);
 #endif
-- 
cgit v1.2.1


From 7e1efdf0a30faa2b3e560deafe16b8bd12c6f399 Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@redhat.com>
Date: Thu, 8 May 2014 16:34:38 +0200
Subject: blkdebug: use BlockDriverState's AioContext

Drop the assumption that we're using the main AioContext.  Convert
qemu_bh_new() to aio_bh_new() so we use the BlockDriverState's
AioContext.

The .bdrv_detach_aio_context() and .bdrv_attach_aio_context() interfaces
are not needed since no fd handlers, timers, or BHs stay registered when
requests have been drained.

Cc: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
 block/blkdebug.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/block/blkdebug.c b/block/blkdebug.c
index 380c736101..f51407de3f 100644
--- a/block/blkdebug.c
+++ b/block/blkdebug.c
@@ -471,7 +471,7 @@ static BlockDriverAIOCB *inject_error(BlockDriverState *bs,
     acb = qemu_aio_get(&blkdebug_aiocb_info, bs, cb, opaque);
     acb->ret = -error;
 
-    bh = qemu_bh_new(error_callback_bh, acb);
+    bh = aio_bh_new(bdrv_get_aio_context(bs), error_callback_bh, acb);
     acb->bh = bh;
     qemu_bh_schedule(bh);
 
-- 
cgit v1.2.1


From 9d94020bc65485fe03a4acabd8559b682b193d07 Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@redhat.com>
Date: Thu, 8 May 2014 16:34:39 +0200
Subject: blkverify: implement .bdrv_detach/attach_aio_context()

Drop the assumption that we're using the main AioContext.  Convert
qemu_bh_new() to aio_bh_new() and qemu_aio_wait() to aio_poll() so we
use the BlockDriverState's AioContext.

Implement .bdrv_detach/attach_aio_context() interfaces to propagate
detach/attach to BDRVBlkverifyState->test_file.  The block layer takes
care of ->file and ->backing_hd but doesn't know about our ->test_file
BlockDriverState, which is also part of the graph.

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
 block/blkverify.c | 47 ++++++++++++++++++++++++++++++++++-------------
 1 file changed, 34 insertions(+), 13 deletions(-)

diff --git a/block/blkverify.c b/block/blkverify.c
index e1c31171c3..621b78593b 100644
--- a/block/blkverify.c
+++ b/block/blkverify.c
@@ -39,12 +39,13 @@ struct BlkverifyAIOCB {
 static void blkverify_aio_cancel(BlockDriverAIOCB *blockacb)
 {
     BlkverifyAIOCB *acb = (BlkverifyAIOCB *)blockacb;
+    AioContext *aio_context = bdrv_get_aio_context(blockacb->bs);
     bool finished = false;
 
     /* Wait until request completes, invokes its callback, and frees itself */
     acb->finished = &finished;
     while (!finished) {
-        qemu_aio_wait();
+        aio_poll(aio_context, true);
     }
 }
 
@@ -228,7 +229,8 @@ static void blkverify_aio_cb(void *opaque, int ret)
             acb->verify(acb);
         }
 
-        acb->bh = qemu_bh_new(blkverify_aio_bh, acb);
+        acb->bh = aio_bh_new(bdrv_get_aio_context(acb->common.bs),
+                             blkverify_aio_bh, acb);
         qemu_bh_schedule(acb->bh);
         break;
     }
@@ -302,21 +304,40 @@ static bool blkverify_recurse_is_first_non_filter(BlockDriverState *bs,
     return bdrv_recurse_is_first_non_filter(s->test_file, candidate);
 }
 
+/* Propagate AioContext changes to ->test_file */
+static void blkverify_detach_aio_context(BlockDriverState *bs)
+{
+    BDRVBlkverifyState *s = bs->opaque;
+
+    bdrv_detach_aio_context(s->test_file);
+}
+
+static void blkverify_attach_aio_context(BlockDriverState *bs,
+                                         AioContext *new_context)
+{
+    BDRVBlkverifyState *s = bs->opaque;
+
+    bdrv_attach_aio_context(s->test_file, new_context);
+}
+
 static BlockDriver bdrv_blkverify = {
-    .format_name            = "blkverify",
-    .protocol_name          = "blkverify",
-    .instance_size          = sizeof(BDRVBlkverifyState),
+    .format_name                      = "blkverify",
+    .protocol_name                    = "blkverify",
+    .instance_size                    = sizeof(BDRVBlkverifyState),
+
+    .bdrv_parse_filename              = blkverify_parse_filename,
+    .bdrv_file_open                   = blkverify_open,
+    .bdrv_close                       = blkverify_close,
+    .bdrv_getlength                   = blkverify_getlength,
 
-    .bdrv_parse_filename    = blkverify_parse_filename,
-    .bdrv_file_open         = blkverify_open,
-    .bdrv_close             = blkverify_close,
-    .bdrv_getlength         = blkverify_getlength,
+    .bdrv_aio_readv                   = blkverify_aio_readv,
+    .bdrv_aio_writev                  = blkverify_aio_writev,
+    .bdrv_aio_flush                   = blkverify_aio_flush,
 
-    .bdrv_aio_readv         = blkverify_aio_readv,
-    .bdrv_aio_writev        = blkverify_aio_writev,
-    .bdrv_aio_flush         = blkverify_aio_flush,
+    .bdrv_attach_aio_context          = blkverify_attach_aio_context,
+    .bdrv_detach_aio_context          = blkverify_detach_aio_context,
 
-    .is_filter              = true,
+    .is_filter                        = true,
     .bdrv_recurse_is_first_non_filter = blkverify_recurse_is_first_non_filter,
 };
 
-- 
cgit v1.2.1


From 63f0f45f2e89b60ff8245fec81328ddfde42a303 Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@redhat.com>
Date: Thu, 8 May 2014 16:34:40 +0200
Subject: curl: implement .bdrv_detach/attach_aio_context()

The curl block driver uses fd handlers, timers, and BHs.  The fd
handlers and timers are managed on behalf of libcurl, which controls
them using callback functions that the block driver implements.

The simplest way to implement .bdrv_detach/attach_aio_context() is to
clean up libcurl in the old event loop and initialize it again in the
new event loop.  We do not need to keep track of anything since there
are no pending requests when the AioContext is changed.

Also make sure to use aio_set_fd_handler() instead of
qemu_aio_set_fd_handler() and aio_bh_new() instead of qemu_bh_new() so
the current AioContext is passed in.

Cc: Alexander Graf <agraf@suse.de>
Cc: Fam Zheng <famz@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
---
 block/curl.c | 192 +++++++++++++++++++++++++++++++++++------------------------
 1 file changed, 115 insertions(+), 77 deletions(-)

diff --git a/block/curl.c b/block/curl.c
index f491b0ba4c..8c84141ced 100644
--- a/block/curl.c
+++ b/block/curl.c
@@ -110,6 +110,7 @@ typedef struct BDRVCURLState {
     size_t readahead_size;
     bool sslverify;
     bool accept_range;
+    AioContext *aio_context;
 } BDRVCURLState;
 
 static void curl_clean_state(CURLState *s);
@@ -134,25 +135,29 @@ static int curl_timer_cb(CURLM *multi, long timeout_ms, void *opaque)
 #endif
 
 static int curl_sock_cb(CURL *curl, curl_socket_t fd, int action,
-                        void *s, void *sp)
+                        void *userp, void *sp)
 {
+    BDRVCURLState *s;
     CURLState *state = NULL;
     curl_easy_getinfo(curl, CURLINFO_PRIVATE, (char **)&state);
     state->sock_fd = fd;
+    s = state->s;
 
     DPRINTF("CURL (AIO): Sock action %d on fd %d\n", action, fd);
     switch (action) {
         case CURL_POLL_IN:
-            qemu_aio_set_fd_handler(fd, curl_multi_read, NULL, state);
+            aio_set_fd_handler(s->aio_context, fd, curl_multi_read,
+                               NULL, state);
             break;
         case CURL_POLL_OUT:
-            qemu_aio_set_fd_handler(fd, NULL, curl_multi_do, state);
+            aio_set_fd_handler(s->aio_context, fd, NULL, curl_multi_do, state);
             break;
         case CURL_POLL_INOUT:
-            qemu_aio_set_fd_handler(fd, curl_multi_read, curl_multi_do, state);
+            aio_set_fd_handler(s->aio_context, fd, curl_multi_read,
+                               curl_multi_do, state);
             break;
         case CURL_POLL_REMOVE:
-            qemu_aio_set_fd_handler(fd, NULL, NULL, NULL);
+            aio_set_fd_handler(s->aio_context, fd, NULL, NULL, NULL);
             break;
     }
 
@@ -365,7 +370,7 @@ static CURLState *curl_init_state(BDRVCURLState *s)
             break;
         }
         if (!state) {
-            qemu_aio_wait();
+            aio_poll(state->s->aio_context, true);
         }
     } while(!state);
 
@@ -422,6 +427,51 @@ static void curl_parse_filename(const char *filename, QDict *options,
     qdict_put(options, CURL_BLOCK_OPT_URL, qstring_from_str(filename));
 }
 
+static void curl_detach_aio_context(BlockDriverState *bs)
+{
+    BDRVCURLState *s = bs->opaque;
+    int i;
+
+    for (i = 0; i < CURL_NUM_STATES; i++) {
+        if (s->states[i].in_use) {
+            curl_clean_state(&s->states[i]);
+        }
+        if (s->states[i].curl) {
+            curl_easy_cleanup(s->states[i].curl);
+            s->states[i].curl = NULL;
+        }
+        if (s->states[i].orig_buf) {
+            g_free(s->states[i].orig_buf);
+            s->states[i].orig_buf = NULL;
+        }
+    }
+    if (s->multi) {
+        curl_multi_cleanup(s->multi);
+        s->multi = NULL;
+    }
+
+    timer_del(&s->timer);
+}
+
+static void curl_attach_aio_context(BlockDriverState *bs,
+                                    AioContext *new_context)
+{
+    BDRVCURLState *s = bs->opaque;
+
+    aio_timer_init(new_context, &s->timer,
+                   QEMU_CLOCK_REALTIME, SCALE_NS,
+                   curl_multi_timeout_do, s);
+
+    assert(!s->multi);
+    s->multi = curl_multi_init();
+    s->aio_context = new_context;
+    curl_multi_setopt(s->multi, CURLMOPT_SOCKETFUNCTION, curl_sock_cb);
+#ifdef NEED_CURL_TIMER_CALLBACK
+    curl_multi_setopt(s->multi, CURLMOPT_TIMERDATA, s);
+    curl_multi_setopt(s->multi, CURLMOPT_TIMERFUNCTION, curl_timer_cb);
+#endif
+}
+
 static QemuOptsList runtime_opts = {
     .name = "curl",
     .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
@@ -491,6 +541,7 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags,
     }
 
     DPRINTF("CURL: Opening %s\n", file);
+    s->aio_context = bdrv_get_aio_context(bs);
     s->url = g_strdup(file);
     state = curl_init_state(s);
     if (!state)
@@ -523,19 +574,7 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags,
     curl_easy_cleanup(state->curl);
     state->curl = NULL;
 
-    aio_timer_init(bdrv_get_aio_context(bs), &s->timer,
-                   QEMU_CLOCK_REALTIME, SCALE_NS,
-                   curl_multi_timeout_do, s);
-
-    // Now we know the file exists and its size, so let's
-    // initialize the multi interface!
-
-    s->multi = curl_multi_init();
-    curl_multi_setopt(s->multi, CURLMOPT_SOCKETFUNCTION, curl_sock_cb);
-#ifdef NEED_CURL_TIMER_CALLBACK
-    curl_multi_setopt(s->multi, CURLMOPT_TIMERDATA, s);
-    curl_multi_setopt(s->multi, CURLMOPT_TIMERFUNCTION, curl_timer_cb);
-#endif
+    curl_attach_aio_context(bs, bdrv_get_aio_context(bs));
 
     qemu_opts_del(opts);
     return 0;
@@ -630,7 +669,7 @@ static BlockDriverAIOCB *curl_aio_readv(BlockDriverState *bs,
     acb->sector_num = sector_num;
     acb->nb_sectors = nb_sectors;
 
-    acb->bh = qemu_bh_new(curl_readv_bh_cb, acb);
+    acb->bh = aio_bh_new(bdrv_get_aio_context(bs), curl_readv_bh_cb, acb);
     qemu_bh_schedule(acb->bh);
     return &acb->common;
 }
@@ -638,25 +677,9 @@ static BlockDriverAIOCB *curl_aio_readv(BlockDriverState *bs,
 static void curl_close(BlockDriverState *bs)
 {
     BDRVCURLState *s = bs->opaque;
-    int i;
 
     DPRINTF("CURL: Close\n");
-    for (i=0; i<CURL_NUM_STATES; i++) {
-        if (s->states[i].in_use)
-            curl_clean_state(&s->states[i]);
-        if (s->states[i].curl) {
-            curl_easy_cleanup(s->states[i].curl);
-            s->states[i].curl = NULL;
-        }
-        if (s->states[i].orig_buf) {
-            g_free(s->states[i].orig_buf);
-            s->states[i].orig_buf = NULL;
-        }
-    }
-    if (s->multi)
-        curl_multi_cleanup(s->multi);
-
-    timer_del(&s->timer);
+    curl_detach_aio_context(bs);
 
     g_free(s->url);
 }
@@ -668,68 +691,83 @@ static int64_t curl_getlength(BlockDriverState *bs)
 }
 
 static BlockDriver bdrv_http = {
-    .format_name            = "http",
-    .protocol_name          = "http",
+    .format_name                = "http",
+    .protocol_name              = "http",
+
+    .instance_size              = sizeof(BDRVCURLState),
+    .bdrv_parse_filename        = curl_parse_filename,
+    .bdrv_file_open             = curl_open,
+    .bdrv_close                 = curl_close,
+    .bdrv_getlength             = curl_getlength,
 
-    .instance_size          = sizeof(BDRVCURLState),
-    .bdrv_parse_filename    = curl_parse_filename,
-    .bdrv_file_open         = curl_open,
-    .bdrv_close             = curl_close,
-    .bdrv_getlength         = curl_getlength,
+    .bdrv_aio_readv             = curl_aio_readv,
 
-    .bdrv_aio_readv         = curl_aio_readv,
+    .bdrv_detach_aio_context    = curl_detach_aio_context,
+    .bdrv_attach_aio_context    = curl_attach_aio_context,
 };
 
 static BlockDriver bdrv_https = {
-    .format_name            = "https",
-    .protocol_name          = "https",
+    .format_name                = "https",
+    .protocol_name              = "https",
 
-    .instance_size          = sizeof(BDRVCURLState),
-    .bdrv_parse_filename    = curl_parse_filename,
-    .bdrv_file_open         = curl_open,
-    .bdrv_close             = curl_close,
-    .bdrv_getlength         = curl_getlength,
+    .instance_size              = sizeof(BDRVCURLState),
+    .bdrv_parse_filename        = curl_parse_filename,
+    .bdrv_file_open             = curl_open,
+    .bdrv_close                 = curl_close,
+    .bdrv_getlength             = curl_getlength,
 
-    .bdrv_aio_readv         = curl_aio_readv,
+    .bdrv_aio_readv             = curl_aio_readv,
+
+    .bdrv_detach_aio_context    = curl_detach_aio_context,
+    .bdrv_attach_aio_context    = curl_attach_aio_context,
 };
 
 static BlockDriver bdrv_ftp = {
-    .format_name            = "ftp",
-    .protocol_name          = "ftp",
+    .format_name                = "ftp",
+    .protocol_name              = "ftp",
+
+    .instance_size              = sizeof(BDRVCURLState),
+    .bdrv_parse_filename        = curl_parse_filename,
+    .bdrv_file_open             = curl_open,
+    .bdrv_close                 = curl_close,
+    .bdrv_getlength             = curl_getlength,
 
-    .instance_size          = sizeof(BDRVCURLState),
-    .bdrv_parse_filename    = curl_parse_filename,
-    .bdrv_file_open         = curl_open,
-    .bdrv_close             = curl_close,
-    .bdrv_getlength         = curl_getlength,
+    .bdrv_aio_readv             = curl_aio_readv,
 
-    .bdrv_aio_readv         = curl_aio_readv,
+    .bdrv_detach_aio_context    = curl_detach_aio_context,
+    .bdrv_attach_aio_context    = curl_attach_aio_context,
 };
 
 static BlockDriver bdrv_ftps = {
-    .format_name            = "ftps",
-    .protocol_name          = "ftps",
+    .format_name                = "ftps",
+    .protocol_name              = "ftps",
 
-    .instance_size          = sizeof(BDRVCURLState),
-    .bdrv_parse_filename    = curl_parse_filename,
-    .bdrv_file_open         = curl_open,
-    .bdrv_close             = curl_close,
-    .bdrv_getlength         = curl_getlength,
+    .instance_size              = sizeof(BDRVCURLState),
+    .bdrv_parse_filename        = curl_parse_filename,
+    .bdrv_file_open             = curl_open,
+    .bdrv_close                 = curl_close,
+    .bdrv_getlength             = curl_getlength,
 
-    .bdrv_aio_readv         = curl_aio_readv,
+    .bdrv_aio_readv             = curl_aio_readv,
+
+    .bdrv_detach_aio_context    = curl_detach_aio_context,
+    .bdrv_attach_aio_context    = curl_attach_aio_context,
 };
 
 static BlockDriver bdrv_tftp = {
-    .format_name            = "tftp",
-    .protocol_name          = "tftp",
+    .format_name                = "tftp",
+    .protocol_name              = "tftp",
+
+    .instance_size              = sizeof(BDRVCURLState),
+    .bdrv_parse_filename        = curl_parse_filename,
+    .bdrv_file_open             = curl_open,
+    .bdrv_close                 = curl_close,
+    .bdrv_getlength             = curl_getlength,
 
-    .instance_size          = sizeof(BDRVCURLState),
-    .bdrv_parse_filename    = curl_parse_filename,
-    .bdrv_file_open         = curl_open,
-    .bdrv_close             = curl_close,
-    .bdrv_getlength         = curl_getlength,
+    .bdrv_aio_readv             = curl_aio_readv,
 
-    .bdrv_aio_readv         = curl_aio_readv,
+    .bdrv_detach_aio_context    = curl_detach_aio_context,
+    .bdrv_attach_aio_context    = curl_attach_aio_context,
 };
 
 static void curl_block_init(void)
-- 
cgit v1.2.1


From 6ee50af24b1478452fbbfc1cd8d78227c0e91cde Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@redhat.com>
Date: Thu, 8 May 2014 16:34:41 +0200
Subject: gluster: use BlockDriverState's AioContext

Drop the assumption that we're using the main AioContext.  Use
aio_bh_new() instead of qemu_bh_new().

The .bdrv_detach_aio_context() and .bdrv_attach_aio_context() interfaces
are not needed since no fd handlers, timers, or BHs stay registered when
requests have been drained.

Cc: Bharata B Rao <bharata@linux.vnet.ibm.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
 block/gluster.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/block/gluster.c b/block/gluster.c
index d0726ec92c..114689e441 100644
--- a/block/gluster.c
+++ b/block/gluster.c
@@ -16,6 +16,7 @@ typedef struct GlusterAIOCB {
     int ret;
     QEMUBH *bh;
     Coroutine *coroutine;
+    AioContext *aio_context;
 } GlusterAIOCB;
 
 typedef struct BDRVGlusterState {
@@ -249,7 +250,7 @@ static void gluster_finish_aiocb(struct glfs_fd *fd, ssize_t ret, void *arg)
         acb->ret = -EIO; /* Partial read/write - fail it */
     }
 
-    acb->bh = qemu_bh_new(qemu_gluster_complete_aio, acb);
+    acb->bh = aio_bh_new(acb->aio_context, qemu_gluster_complete_aio, acb);
     qemu_bh_schedule(acb->bh);
 }
 
@@ -436,6 +437,7 @@ static coroutine_fn int qemu_gluster_co_write_zeroes(BlockDriverState *bs,
     acb->size = size;
     acb->ret = 0;
     acb->coroutine = qemu_coroutine_self();
+    acb->aio_context = bdrv_get_aio_context(bs);
 
     ret = glfs_zerofill_async(s->fd, offset, size, &gluster_finish_aiocb, acb);
     if (ret < 0) {
@@ -549,6 +551,7 @@ static coroutine_fn int qemu_gluster_co_rw(BlockDriverState *bs,
     acb->size = size;
     acb->ret = 0;
     acb->coroutine = qemu_coroutine_self();
+    acb->aio_context = bdrv_get_aio_context(bs);
 
     if (write) {
         ret = glfs_pwritev_async(s->fd, qiov->iov, qiov->niov, offset, 0,
@@ -605,6 +608,7 @@ static coroutine_fn int qemu_gluster_co_flush_to_disk(BlockDriverState *bs)
     acb->size = 0;
     acb->ret = 0;
     acb->coroutine = qemu_coroutine_self();
+    acb->aio_context = bdrv_get_aio_context(bs);
 
     ret = glfs_fsync_async(s->fd, &gluster_finish_aiocb, acb);
     if (ret < 0) {
@@ -633,6 +637,7 @@ static coroutine_fn int qemu_gluster_co_discard(BlockDriverState *bs,
     acb->size = 0;
     acb->ret = 0;
     acb->coroutine = qemu_coroutine_self();
+    acb->aio_context = bdrv_get_aio_context(bs);
 
     ret = glfs_discard_async(s->fd, offset, size, &gluster_finish_aiocb, acb);
     if (ret < 0) {
-- 
cgit v1.2.1


From 80cf6257668ec78537b02af44e059bdc46c30416 Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@redhat.com>
Date: Thu, 8 May 2014 16:34:42 +0200
Subject: iscsi: implement .bdrv_detach/attach_aio_context()

Drop the assumption that we're using the main AioContext for Linux
AIO.  Convert qemu_aio_set_fd_handler() to aio_set_fd_handler() and
timer_new_ms() to aio_timer_new().

The .bdrv_detach/attach_aio_context() interfaces also need to be
implemented to move the fd and timer from the old to the new AioContext.

Cc: Peter Lieven <pl@kamp.de>
Cc: Ronnie Sahlberg <ronniesahlberg@gmail.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Peter Lieven <pl@kamp.de>
---
 block/iscsi.c | 80 +++++++++++++++++++++++++++++++++++++++++------------------
 1 file changed, 56 insertions(+), 24 deletions(-)

diff --git a/block/iscsi.c b/block/iscsi.c
index 3892cc551e..877b877cf2 100644
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -49,6 +49,7 @@
 
 typedef struct IscsiLun {
     struct iscsi_context *iscsi;
+    AioContext *aio_context;
     int lun;
     enum scsi_inquiry_peripheral_device_type type;
     int block_size;
@@ -73,6 +74,7 @@ typedef struct IscsiTask {
     struct scsi_task *task;
     Coroutine *co;
     QEMUBH *bh;
+    IscsiLun *iscsilun;
 } IscsiTask;
 
 typedef struct IscsiAIOCB {
@@ -133,7 +135,7 @@ iscsi_schedule_bh(IscsiAIOCB *acb)
     if (acb->bh) {
         return;
     }
-    acb->bh = qemu_bh_new(iscsi_bh_cb, acb);
+    acb->bh = aio_bh_new(acb->iscsilun->aio_context, iscsi_bh_cb, acb);
     qemu_bh_schedule(acb->bh);
 }
 
@@ -169,7 +171,8 @@ iscsi_co_generic_cb(struct iscsi_context *iscsi, int status,
 
 out:
     if (iTask->co) {
-        iTask->bh = qemu_bh_new(iscsi_co_generic_bh_cb, iTask);
+        iTask->bh = aio_bh_new(iTask->iscsilun->aio_context,
+                               iscsi_co_generic_bh_cb, iTask);
         qemu_bh_schedule(iTask->bh);
     }
 }
@@ -177,8 +180,9 @@ out:
 static void iscsi_co_init_iscsitask(IscsiLun *iscsilun, struct IscsiTask *iTask)
 {
     *iTask = (struct IscsiTask) {
-        .co         = qemu_coroutine_self(),
-        .retries    = ISCSI_CMD_RETRIES,
+        .co             = qemu_coroutine_self(),
+        .retries        = ISCSI_CMD_RETRIES,
+        .iscsilun       = iscsilun,
     };
 }
 
@@ -209,7 +213,7 @@ iscsi_aio_cancel(BlockDriverAIOCB *blockacb)
                                      iscsi_abort_task_cb, acb);
 
     while (acb->status == -EINPROGRESS) {
-        qemu_aio_wait();
+        aio_poll(iscsilun->aio_context, true);
     }
 }
 
@@ -232,10 +236,11 @@ iscsi_set_events(IscsiLun *iscsilun)
     ev = POLLIN;
     ev |= iscsi_which_events(iscsi);
     if (ev != iscsilun->events) {
-        qemu_aio_set_fd_handler(iscsi_get_fd(iscsi),
-                      iscsi_process_read,
-                      (ev & POLLOUT) ? iscsi_process_write : NULL,
-                      iscsilun);
+        aio_set_fd_handler(iscsilun->aio_context,
+                           iscsi_get_fd(iscsi),
+                           iscsi_process_read,
+                           (ev & POLLOUT) ? iscsi_process_write : NULL,
+                           iscsilun);
 
     }
 
@@ -791,7 +796,7 @@ static int iscsi_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
         iscsi_aio_ioctl(bs, req, buf, ioctl_cb, &status);
 
         while (status == -EINPROGRESS) {
-            qemu_aio_wait();
+            aio_poll(iscsilun->aio_context, true);
         }
 
         return 0;
@@ -1195,6 +1200,40 @@ fail_with_err:
     return NULL;
 }
 
+static void iscsi_detach_aio_context(BlockDriverState *bs)
+{
+    IscsiLun *iscsilun = bs->opaque;
+
+    aio_set_fd_handler(iscsilun->aio_context,
+                       iscsi_get_fd(iscsilun->iscsi),
+                       NULL, NULL, NULL);
+    iscsilun->events = 0;
+
+    if (iscsilun->nop_timer) {
+        timer_del(iscsilun->nop_timer);
+        timer_free(iscsilun->nop_timer);
+        iscsilun->nop_timer = NULL;
+    }
+}
+
+static void iscsi_attach_aio_context(BlockDriverState *bs,
+                                     AioContext *new_context)
+{
+    IscsiLun *iscsilun = bs->opaque;
+
+    iscsilun->aio_context = new_context;
+    iscsi_set_events(iscsilun);
+
+#if defined(LIBISCSI_FEATURE_NOP_COUNTER)
+    /* Set up a timer for sending out iSCSI NOPs */
+    iscsilun->nop_timer = aio_timer_new(iscsilun->aio_context,
+                                        QEMU_CLOCK_REALTIME, SCALE_MS,
+                                        iscsi_nop_timed_event, iscsilun);
+    timer_mod(iscsilun->nop_timer,
+              qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + NOP_INTERVAL);
+#endif
+}
+
 /*
  * We support iscsi url's on the form
  * iscsi://[<username>%<password>@]<host>[:<port>]/<targetname>/<lun>
@@ -1301,6 +1340,7 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
     }
 
     iscsilun->iscsi = iscsi;
+    iscsilun->aio_context = bdrv_get_aio_context(bs);
     iscsilun->lun   = iscsi_url->lun;
     iscsilun->has_write_same = true;
 
@@ -1374,11 +1414,7 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
     scsi_free_scsi_task(task);
     task = NULL;
 
-#if defined(LIBISCSI_FEATURE_NOP_COUNTER)
-    /* Set up a timer for sending out iSCSI NOPs */
-    iscsilun->nop_timer = timer_new_ms(QEMU_CLOCK_REALTIME, iscsi_nop_timed_event, iscsilun);
-    timer_mod(iscsilun->nop_timer, qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + NOP_INTERVAL);
-#endif
+    iscsi_attach_aio_context(bs, iscsilun->aio_context);
 
     /* Guess the internal cluster (page) size of the iscsi target by the means
      * of opt_unmap_gran. Transfer the unmap granularity only if it has a
@@ -1422,11 +1458,7 @@ static void iscsi_close(BlockDriverState *bs)
     IscsiLun *iscsilun = bs->opaque;
     struct iscsi_context *iscsi = iscsilun->iscsi;
 
-    if (iscsilun->nop_timer) {
-        timer_del(iscsilun->nop_timer);
-        timer_free(iscsilun->nop_timer);
-    }
-    qemu_aio_set_fd_handler(iscsi_get_fd(iscsi), NULL, NULL, NULL);
+    iscsi_detach_aio_context(bs);
     iscsi_destroy_context(iscsi);
     g_free(iscsilun->zeroblock);
     g_free(iscsilun->allocationmap);
@@ -1530,10 +1562,7 @@ static int iscsi_create(const char *filename, QEMUOptionParameter *options,
     if (ret != 0) {
         goto out;
     }
-    if (iscsilun->nop_timer) {
-        timer_del(iscsilun->nop_timer);
-        timer_free(iscsilun->nop_timer);
-    }
+    iscsi_detach_aio_context(bs);
     if (iscsilun->type != TYPE_DISK) {
         ret = -ENODEV;
         goto out;
@@ -1604,6 +1633,9 @@ static BlockDriver bdrv_iscsi = {
     .bdrv_ioctl       = iscsi_ioctl,
     .bdrv_aio_ioctl   = iscsi_aio_ioctl,
 #endif
+
+    .bdrv_detach_aio_context = iscsi_detach_aio_context,
+    .bdrv_attach_aio_context = iscsi_attach_aio_context,
 };
 
 static QemuOptsList qemu_iscsi_opts = {
-- 
cgit v1.2.1


From 69447cd8f361dbb6c752bd18b15322d27493ca91 Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@redhat.com>
Date: Thu, 8 May 2014 16:34:43 +0200
Subject: nbd: implement .bdrv_detach/attach_aio_context()

Drop the assumption that we're using the main AioContext.  Convert
qemu_aio_set_fd_handler() calls to aio_set_fd_handler().

The .bdrv_detach/attach_aio_context() interfaces also need to be
implemented to move the socket fd handler from the old to the new
AioContext.

Acked-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
 block/nbd-client.c | 24 ++++++++++++---
 block/nbd-client.h |  4 +++
 block/nbd.c        | 87 +++++++++++++++++++++++++++++++++---------------------
 3 files changed, 78 insertions(+), 37 deletions(-)

diff --git a/block/nbd-client.c b/block/nbd-client.c
index 7d698cb619..6e1c97cad0 100644
--- a/block/nbd-client.c
+++ b/block/nbd-client.c
@@ -49,7 +49,7 @@ static void nbd_teardown_connection(NbdClientSession *client)
     shutdown(client->sock, 2);
     nbd_recv_coroutines_enter_all(client);
 
-    qemu_aio_set_fd_handler(client->sock, NULL, NULL, NULL);
+    nbd_client_session_detach_aio_context(client);
     closesocket(client->sock);
     client->sock = -1;
 }
@@ -103,11 +103,14 @@ static int nbd_co_send_request(NbdClientSession *s,
     struct nbd_request *request,
     QEMUIOVector *qiov, int offset)
 {
+    AioContext *aio_context;
     int rc, ret;
 
     qemu_co_mutex_lock(&s->send_mutex);
     s->send_coroutine = qemu_coroutine_self();
-    qemu_aio_set_fd_handler(s->sock, nbd_reply_ready, nbd_restart_write, s);
+    aio_context = bdrv_get_aio_context(s->bs);
+    aio_set_fd_handler(aio_context, s->sock,
+                       nbd_reply_ready, nbd_restart_write, s);
     if (qiov) {
         if (!s->is_unix) {
             socket_set_cork(s->sock, 1);
@@ -126,7 +129,7 @@ static int nbd_co_send_request(NbdClientSession *s,
     } else {
         rc = nbd_send_request(s->sock, request);
     }
-    qemu_aio_set_fd_handler(s->sock, nbd_reply_ready, NULL, s);
+    aio_set_fd_handler(aio_context, s->sock, nbd_reply_ready, NULL, s);
     s->send_coroutine = NULL;
     qemu_co_mutex_unlock(&s->send_mutex);
     return rc;
@@ -335,6 +338,19 @@ int nbd_client_session_co_discard(NbdClientSession *client, int64_t sector_num,
 
 }
 
+void nbd_client_session_detach_aio_context(NbdClientSession *client)
+{
+    aio_set_fd_handler(bdrv_get_aio_context(client->bs), client->sock,
+                       NULL, NULL, NULL);
+}
+
+void nbd_client_session_attach_aio_context(NbdClientSession *client,
+                                           AioContext *new_context)
+{
+    aio_set_fd_handler(new_context, client->sock,
+                       nbd_reply_ready, NULL, client);
+}
+
 void nbd_client_session_close(NbdClientSession *client)
 {
     struct nbd_request request = {
@@ -381,7 +397,7 @@ int nbd_client_session_init(NbdClientSession *client, BlockDriverState *bs,
     /* Now that we're connected, set the socket to be non-blocking and
      * kick the reply mechanism.  */
     qemu_set_nonblock(sock);
-    qemu_aio_set_fd_handler(sock, nbd_reply_ready, NULL, client);
+    nbd_client_session_attach_aio_context(client, bdrv_get_aio_context(bs));
 
     logout("Established connection with NBD server\n");
     return 0;
diff --git a/block/nbd-client.h b/block/nbd-client.h
index f2a63378bb..cd478f3a98 100644
--- a/block/nbd-client.h
+++ b/block/nbd-client.h
@@ -47,4 +47,8 @@ int nbd_client_session_co_writev(NbdClientSession *client, int64_t sector_num,
 int nbd_client_session_co_readv(NbdClientSession *client, int64_t sector_num,
                                 int nb_sectors, QEMUIOVector *qiov);
 
+void nbd_client_session_detach_aio_context(NbdClientSession *client);
+void nbd_client_session_attach_aio_context(NbdClientSession *client,
+                                           AioContext *new_context);
+
 #endif /* NBD_CLIENT_H */
diff --git a/block/nbd.c b/block/nbd.c
index 613f2581ae..4eda0958d7 100644
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -323,46 +323,67 @@ static int64_t nbd_getlength(BlockDriverState *bs)
     return s->client.size;
 }
 
+static void nbd_detach_aio_context(BlockDriverState *bs)
+{
+    BDRVNBDState *s = bs->opaque;
+
+    nbd_client_session_detach_aio_context(&s->client);
+}
+
+static void nbd_attach_aio_context(BlockDriverState *bs,
+                                   AioContext *new_context)
+{
+    BDRVNBDState *s = bs->opaque;
+
+    nbd_client_session_attach_aio_context(&s->client, new_context);
+}
+
 static BlockDriver bdrv_nbd = {
-    .format_name         = "nbd",
-    .protocol_name       = "nbd",
-    .instance_size       = sizeof(BDRVNBDState),
-    .bdrv_parse_filename = nbd_parse_filename,
-    .bdrv_file_open      = nbd_open,
-    .bdrv_co_readv       = nbd_co_readv,
-    .bdrv_co_writev      = nbd_co_writev,
-    .bdrv_close          = nbd_close,
-    .bdrv_co_flush_to_os = nbd_co_flush,
-    .bdrv_co_discard     = nbd_co_discard,
-    .bdrv_getlength      = nbd_getlength,
+    .format_name                = "nbd",
+    .protocol_name              = "nbd",
+    .instance_size              = sizeof(BDRVNBDState),
+    .bdrv_parse_filename        = nbd_parse_filename,
+    .bdrv_file_open             = nbd_open,
+    .bdrv_co_readv              = nbd_co_readv,
+    .bdrv_co_writev             = nbd_co_writev,
+    .bdrv_close                 = nbd_close,
+    .bdrv_co_flush_to_os        = nbd_co_flush,
+    .bdrv_co_discard            = nbd_co_discard,
+    .bdrv_getlength             = nbd_getlength,
+    .bdrv_detach_aio_context    = nbd_detach_aio_context,
+    .bdrv_attach_aio_context    = nbd_attach_aio_context,
 };
 
 static BlockDriver bdrv_nbd_tcp = {
-    .format_name         = "nbd",
-    .protocol_name       = "nbd+tcp",
-    .instance_size       = sizeof(BDRVNBDState),
-    .bdrv_parse_filename = nbd_parse_filename,
-    .bdrv_file_open      = nbd_open,
-    .bdrv_co_readv       = nbd_co_readv,
-    .bdrv_co_writev      = nbd_co_writev,
-    .bdrv_close          = nbd_close,
-    .bdrv_co_flush_to_os = nbd_co_flush,
-    .bdrv_co_discard     = nbd_co_discard,
-    .bdrv_getlength      = nbd_getlength,
+    .format_name                = "nbd",
+    .protocol_name              = "nbd+tcp",
+    .instance_size              = sizeof(BDRVNBDState),
+    .bdrv_parse_filename        = nbd_parse_filename,
+    .bdrv_file_open             = nbd_open,
+    .bdrv_co_readv              = nbd_co_readv,
+    .bdrv_co_writev             = nbd_co_writev,
+    .bdrv_close                 = nbd_close,
+    .bdrv_co_flush_to_os        = nbd_co_flush,
+    .bdrv_co_discard            = nbd_co_discard,
+    .bdrv_getlength             = nbd_getlength,
+    .bdrv_detach_aio_context    = nbd_detach_aio_context,
+    .bdrv_attach_aio_context    = nbd_attach_aio_context,
 };
 
 static BlockDriver bdrv_nbd_unix = {
-    .format_name         = "nbd",
-    .protocol_name       = "nbd+unix",
-    .instance_size       = sizeof(BDRVNBDState),
-    .bdrv_parse_filename = nbd_parse_filename,
-    .bdrv_file_open      = nbd_open,
-    .bdrv_co_readv       = nbd_co_readv,
-    .bdrv_co_writev      = nbd_co_writev,
-    .bdrv_close          = nbd_close,
-    .bdrv_co_flush_to_os = nbd_co_flush,
-    .bdrv_co_discard     = nbd_co_discard,
-    .bdrv_getlength      = nbd_getlength,
+    .format_name                = "nbd",
+    .protocol_name              = "nbd+unix",
+    .instance_size              = sizeof(BDRVNBDState),
+    .bdrv_parse_filename        = nbd_parse_filename,
+    .bdrv_file_open             = nbd_open,
+    .bdrv_co_readv              = nbd_co_readv,
+    .bdrv_co_writev             = nbd_co_writev,
+    .bdrv_close                 = nbd_close,
+    .bdrv_co_flush_to_os        = nbd_co_flush,
+    .bdrv_co_discard            = nbd_co_discard,
+    .bdrv_getlength             = nbd_getlength,
+    .bdrv_detach_aio_context    = nbd_detach_aio_context,
+    .bdrv_attach_aio_context    = nbd_attach_aio_context,
 };
 
 static void bdrv_nbd_init(void)
-- 
cgit v1.2.1


From 471799d135a45b5a09f43bc103b5eb710548bb87 Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@redhat.com>
Date: Thu, 8 May 2014 16:34:44 +0200
Subject: nfs: implement .bdrv_detach/attach_aio_context()

Drop the assumption that we're using the main AioContext.  The following
functions need to be converted:
 * qemu_bh_new() -> aio_bh_new()
 * qemu_aio_set_fd_handler() -> aio_set_fd_handler()
 * qemu_aio_wait() -> aio_poll()

The .bdrv_detach/attach_aio_context() interfaces also need to be
implemented to move the fd handler from the old to the new AioContext.

Cc: Peter Lieven <pl@kamp.de>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Peter Lieven <pl@kamp.de>
---
 block/nfs.c | 81 +++++++++++++++++++++++++++++++++++++++++++------------------
 1 file changed, 57 insertions(+), 24 deletions(-)

diff --git a/block/nfs.c b/block/nfs.c
index 539bd951df..bd9177f3ae 100644
--- a/block/nfs.c
+++ b/block/nfs.c
@@ -40,6 +40,7 @@ typedef struct NFSClient {
     struct nfsfh *fh;
     int events;
     bool has_zero_init;
+    AioContext *aio_context;
 } NFSClient;
 
 typedef struct NFSRPC {
@@ -49,6 +50,7 @@ typedef struct NFSRPC {
     struct stat *st;
     Coroutine *co;
     QEMUBH *bh;
+    NFSClient *client;
 } NFSRPC;
 
 static void nfs_process_read(void *arg);
@@ -58,10 +60,11 @@ static void nfs_set_events(NFSClient *client)
 {
     int ev = nfs_which_events(client->context);
     if (ev != client->events) {
-        qemu_aio_set_fd_handler(nfs_get_fd(client->context),
-                      (ev & POLLIN) ? nfs_process_read : NULL,
-                      (ev & POLLOUT) ? nfs_process_write : NULL,
-                      client);
+        aio_set_fd_handler(client->aio_context,
+                           nfs_get_fd(client->context),
+                           (ev & POLLIN) ? nfs_process_read : NULL,
+                           (ev & POLLOUT) ? nfs_process_write : NULL,
+                           client);
 
     }
     client->events = ev;
@@ -84,7 +87,8 @@ static void nfs_process_write(void *arg)
 static void nfs_co_init_task(NFSClient *client, NFSRPC *task)
 {
     *task = (NFSRPC) {
-        .co         = qemu_coroutine_self(),
+        .co             = qemu_coroutine_self(),
+        .client         = client,
     };
 }
 
@@ -116,7 +120,8 @@ nfs_co_generic_cb(int ret, struct nfs_context *nfs, void *data,
         error_report("NFS Error: %s", nfs_get_error(nfs));
     }
     if (task->co) {
-        task->bh = qemu_bh_new(nfs_co_generic_bh_cb, task);
+        task->bh = aio_bh_new(task->client->aio_context,
+                              nfs_co_generic_bh_cb, task);
         qemu_bh_schedule(task->bh);
     }
 }
@@ -224,13 +229,34 @@ static QemuOptsList runtime_opts = {
     },
 };
 
+static void nfs_detach_aio_context(BlockDriverState *bs)
+{
+    NFSClient *client = bs->opaque;
+
+    aio_set_fd_handler(client->aio_context,
+                       nfs_get_fd(client->context),
+                       NULL, NULL, NULL);
+    client->events = 0;
+}
+
+static void nfs_attach_aio_context(BlockDriverState *bs,
+                                   AioContext *new_context)
+{
+    NFSClient *client = bs->opaque;
+
+    client->aio_context = new_context;
+    nfs_set_events(client);
+}
+
 static void nfs_client_close(NFSClient *client)
 {
     if (client->context) {
         if (client->fh) {
             nfs_close(client->context, client->fh);
         }
-        qemu_aio_set_fd_handler(nfs_get_fd(client->context), NULL, NULL, NULL);
+        aio_set_fd_handler(client->aio_context,
+                           nfs_get_fd(client->context),
+                           NULL, NULL, NULL);
         nfs_destroy_context(client->context);
     }
     memset(client, 0, sizeof(NFSClient));
@@ -345,6 +371,8 @@ static int nfs_file_open(BlockDriverState *bs, QDict *options, int flags,
     QemuOpts *opts;
     Error *local_err = NULL;
 
+    client->aio_context = bdrv_get_aio_context(bs);
+
     opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
     qemu_opts_absorb_qdict(opts, options, &local_err);
     if (local_err) {
@@ -368,6 +396,8 @@ static int nfs_file_create(const char *url, QEMUOptionParameter *options,
     int64_t total_size = 0;
     NFSClient *client = g_malloc0(sizeof(NFSClient));
 
+    client->aio_context = qemu_get_aio_context();
+
     /* Read out options */
     while (options && options->name) {
         if (!strcmp(options->name, "size")) {
@@ -407,7 +437,7 @@ static int64_t nfs_get_allocated_file_size(BlockDriverState *bs)
 
     while (!task.complete) {
         nfs_set_events(client);
-        qemu_aio_wait();
+        aio_poll(client->aio_context, true);
     }
 
     return (task.ret < 0 ? task.ret : st.st_blocks * st.st_blksize);
@@ -420,22 +450,25 @@ static int nfs_file_truncate(BlockDriverState *bs, int64_t offset)
 }
 
 static BlockDriver bdrv_nfs = {
-    .format_name     = "nfs",
-    .protocol_name   = "nfs",
-
-    .instance_size   = sizeof(NFSClient),
-    .bdrv_needs_filename = true,
-    .bdrv_has_zero_init = nfs_has_zero_init,
-    .bdrv_get_allocated_file_size = nfs_get_allocated_file_size,
-    .bdrv_truncate = nfs_file_truncate,
-
-    .bdrv_file_open  = nfs_file_open,
-    .bdrv_close      = nfs_file_close,
-    .bdrv_create     = nfs_file_create,
-
-    .bdrv_co_readv         = nfs_co_readv,
-    .bdrv_co_writev        = nfs_co_writev,
-    .bdrv_co_flush_to_disk = nfs_co_flush,
+    .format_name                    = "nfs",
+    .protocol_name                  = "nfs",
+
+    .instance_size                  = sizeof(NFSClient),
+    .bdrv_needs_filename            = true,
+    .bdrv_has_zero_init             = nfs_has_zero_init,
+    .bdrv_get_allocated_file_size   = nfs_get_allocated_file_size,
+    .bdrv_truncate                  = nfs_file_truncate,
+
+    .bdrv_file_open                 = nfs_file_open,
+    .bdrv_close                     = nfs_file_close,
+    .bdrv_create                    = nfs_file_create,
+
+    .bdrv_co_readv                  = nfs_co_readv,
+    .bdrv_co_writev                 = nfs_co_writev,
+    .bdrv_co_flush_to_disk          = nfs_co_flush,
+
+    .bdrv_detach_aio_context        = nfs_detach_aio_context,
+    .bdrv_attach_aio_context        = nfs_attach_aio_context,
 };
 
 static void nfs_block_init(void)
-- 
cgit v1.2.1


From a8c868c386d4cbdd0fc0a9423c6683880f6e75e4 Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@redhat.com>
Date: Thu, 8 May 2014 16:34:45 +0200
Subject: qed: use BlockDriverState's AioContext

Drop the assumption that we're using the main AioContext.  Convert
qemu_bh_new() to aio_bh_new() and qemu_aio_wait() to aio_poll() so we're
using the BlockDriverState's AioContext.

Implement .bdrv_detach/attach_aio_context() interfaces to move the
QED_F_NEED_CHECK timer from the old AioContext to the new one.

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
 block/qed-table.c |  8 ++++----
 block/qed.c       | 35 +++++++++++++++++++++++++++++------
 2 files changed, 33 insertions(+), 10 deletions(-)

diff --git a/block/qed-table.c b/block/qed-table.c
index 76d2dcccf8..f61107a1cf 100644
--- a/block/qed-table.c
+++ b/block/qed-table.c
@@ -173,7 +173,7 @@ int qed_read_l1_table_sync(BDRVQEDState *s)
     qed_read_table(s, s->header.l1_table_offset,
                    s->l1_table, qed_sync_cb, &ret);
     while (ret == -EINPROGRESS) {
-        qemu_aio_wait();
+        aio_poll(bdrv_get_aio_context(s->bs), true);
     }
 
     return ret;
@@ -194,7 +194,7 @@ int qed_write_l1_table_sync(BDRVQEDState *s, unsigned int index,
 
     qed_write_l1_table(s, index, n, qed_sync_cb, &ret);
     while (ret == -EINPROGRESS) {
-        qemu_aio_wait();
+        aio_poll(bdrv_get_aio_context(s->bs), true);
     }
 
     return ret;
@@ -267,7 +267,7 @@ int qed_read_l2_table_sync(BDRVQEDState *s, QEDRequest *request, uint64_t offset
 
     qed_read_l2_table(s, request, offset, qed_sync_cb, &ret);
     while (ret == -EINPROGRESS) {
-        qemu_aio_wait();
+        aio_poll(bdrv_get_aio_context(s->bs), true);
     }
 
     return ret;
@@ -289,7 +289,7 @@ int qed_write_l2_table_sync(BDRVQEDState *s, QEDRequest *request,
 
     qed_write_l2_table(s, request, index, n, flush, qed_sync_cb, &ret);
     while (ret == -EINPROGRESS) {
-        qemu_aio_wait();
+        aio_poll(bdrv_get_aio_context(s->bs), true);
     }
 
     return ret;
diff --git a/block/qed.c b/block/qed.c
index c130e42d0d..79f5bd392a 100644
--- a/block/qed.c
+++ b/block/qed.c
@@ -21,12 +21,13 @@
 static void qed_aio_cancel(BlockDriverAIOCB *blockacb)
 {
     QEDAIOCB *acb = (QEDAIOCB *)blockacb;
+    AioContext *aio_context = bdrv_get_aio_context(blockacb->bs);
     bool finished = false;
 
     /* Wait for the request to finish */
     acb->finished = &finished;
     while (!finished) {
-        qemu_aio_wait();
+        aio_poll(aio_context, true);
     }
 }
 
@@ -373,6 +374,27 @@ static void bdrv_qed_rebind(BlockDriverState *bs)
     s->bs = bs;
 }
 
+static void bdrv_qed_detach_aio_context(BlockDriverState *bs)
+{
+    BDRVQEDState *s = bs->opaque;
+
+    qed_cancel_need_check_timer(s);
+    timer_free(s->need_check_timer);
+}
+
+static void bdrv_qed_attach_aio_context(BlockDriverState *bs,
+                                        AioContext *new_context)
+{
+    BDRVQEDState *s = bs->opaque;
+
+    s->need_check_timer = aio_timer_new(new_context,
+                                        QEMU_CLOCK_VIRTUAL, SCALE_NS,
+                                        qed_need_check_timer_cb, s);
+    if (s->header.features & QED_F_NEED_CHECK) {
+        qed_start_need_check_timer(s);
+    }
+}
+
 static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags,
                          Error **errp)
 {
@@ -496,8 +518,7 @@ static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags,
         }
     }
 
-    s->need_check_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
-                                            qed_need_check_timer_cb, s);
+    bdrv_qed_attach_aio_context(bs, bdrv_get_aio_context(bs));
 
 out:
     if (ret) {
@@ -528,8 +549,7 @@ static void bdrv_qed_close(BlockDriverState *bs)
 {
     BDRVQEDState *s = bs->opaque;
 
-    qed_cancel_need_check_timer(s);
-    timer_free(s->need_check_timer);
+    bdrv_qed_detach_aio_context(bs);
 
     /* Ensure writes reach stable storage */
     bdrv_flush(bs->file);
@@ -919,7 +939,8 @@ static void qed_aio_complete(QEDAIOCB *acb, int ret)
 
     /* Arrange for a bh to invoke the completion function */
     acb->bh_ret = ret;
-    acb->bh = qemu_bh_new(qed_aio_complete_bh, acb);
+    acb->bh = aio_bh_new(bdrv_get_aio_context(acb->common.bs),
+                         qed_aio_complete_bh, acb);
     qemu_bh_schedule(acb->bh);
 
     /* Start next allocating write request waiting behind this one.  Note that
@@ -1644,6 +1665,8 @@ static BlockDriver bdrv_qed = {
     .bdrv_change_backing_file = bdrv_qed_change_backing_file,
     .bdrv_invalidate_cache    = bdrv_qed_invalidate_cache,
     .bdrv_check               = bdrv_qed_check,
+    .bdrv_detach_aio_context  = bdrv_qed_detach_aio_context,
+    .bdrv_attach_aio_context  = bdrv_qed_attach_aio_context,
 };
 
 static void bdrv_qed_init(void)
-- 
cgit v1.2.1


From e3625d3d6ae8029fc20262ac3848421ec281446f Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@redhat.com>
Date: Thu, 8 May 2014 16:34:46 +0200
Subject: quorum: implement .bdrv_detach/attach_aio_context()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Implement .bdrv_detach/attach_aio_context() interfaces to propagate
detach/attach to BDRVQuorumState->bs[] children.  The block layer takes
care of ->file and ->backing_hd but doesn't know about our ->bs[]
BlockDriverStates, which is also part of the graph.

Reviewed-by: Benoît Canet <benoit.canet@irqsave.net>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
 block/quorum.c | 48 ++++++++++++++++++++++++++++++++++++------------
 1 file changed, 36 insertions(+), 12 deletions(-)

diff --git a/block/quorum.c b/block/quorum.c
index ecec3a5407..426077a520 100644
--- a/block/quorum.c
+++ b/block/quorum.c
@@ -848,25 +848,49 @@ static void quorum_close(BlockDriverState *bs)
     g_free(s->bs);
 }
 
+static void quorum_detach_aio_context(BlockDriverState *bs)
+{
+    BDRVQuorumState *s = bs->opaque;
+    int i;
+
+    for (i = 0; i < s->num_children; i++) {
+        bdrv_detach_aio_context(s->bs[i]);
+    }
+}
+
+static void quorum_attach_aio_context(BlockDriverState *bs,
+                                      AioContext *new_context)
+{
+    BDRVQuorumState *s = bs->opaque;
+    int i;
+
+    for (i = 0; i < s->num_children; i++) {
+        bdrv_attach_aio_context(s->bs[i], new_context);
+    }
+}
+
 static BlockDriver bdrv_quorum = {
-    .format_name        = "quorum",
-    .protocol_name      = "quorum",
+    .format_name                        = "quorum",
+    .protocol_name                      = "quorum",
+
+    .instance_size                      = sizeof(BDRVQuorumState),
 
-    .instance_size      = sizeof(BDRVQuorumState),
+    .bdrv_file_open                     = quorum_open,
+    .bdrv_close                         = quorum_close,
 
-    .bdrv_file_open     = quorum_open,
-    .bdrv_close         = quorum_close,
+    .bdrv_co_flush_to_disk              = quorum_co_flush,
 
-    .bdrv_co_flush_to_disk = quorum_co_flush,
+    .bdrv_getlength                     = quorum_getlength,
 
-    .bdrv_getlength     = quorum_getlength,
+    .bdrv_aio_readv                     = quorum_aio_readv,
+    .bdrv_aio_writev                    = quorum_aio_writev,
+    .bdrv_invalidate_cache              = quorum_invalidate_cache,
 
-    .bdrv_aio_readv     = quorum_aio_readv,
-    .bdrv_aio_writev    = quorum_aio_writev,
-    .bdrv_invalidate_cache = quorum_invalidate_cache,
+    .bdrv_detach_aio_context            = quorum_detach_aio_context,
+    .bdrv_attach_aio_context            = quorum_attach_aio_context,
 
-    .is_filter           = true,
-    .bdrv_recurse_is_first_non_filter = quorum_recurse_is_first_non_filter,
+    .is_filter                          = true,
+    .bdrv_recurse_is_first_non_filter   = quorum_recurse_is_first_non_filter,
 };
 
 static void bdrv_quorum_init(void)
-- 
cgit v1.2.1


From c2f3426c9bba0195b15a919587f794b110a2dcfc Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@redhat.com>
Date: Thu, 8 May 2014 16:34:47 +0200
Subject: block/raw-posix: implement .bdrv_detach/attach_aio_context()

Drop the assumption that we're using the main AioContext for Linux AIO.
Convert the Linux AIO event notifier to use aio_set_event_notifier().

The .bdrv_detach/attach_aio_context() interfaces also need to be
implemented to move the event notifier handler from the old to the new
AioContext.

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
 block/linux-aio.c | 16 ++++++++++++++--
 block/raw-aio.h   |  2 ++
 block/raw-posix.c | 43 +++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 59 insertions(+), 2 deletions(-)

diff --git a/block/linux-aio.c b/block/linux-aio.c
index 53434e2df5..7ff3897fec 100644
--- a/block/linux-aio.c
+++ b/block/linux-aio.c
@@ -177,6 +177,20 @@ out_free_aiocb:
     return NULL;
 }
 
+void laio_detach_aio_context(void *s_, AioContext *old_context)
+{
+    struct qemu_laio_state *s = s_;
+
+    aio_set_event_notifier(old_context, &s->e, NULL);
+}
+
+void laio_attach_aio_context(void *s_, AioContext *new_context)
+{
+    struct qemu_laio_state *s = s_;
+
+    aio_set_event_notifier(new_context, &s->e, qemu_laio_completion_cb);
+}
+
 void *laio_init(void)
 {
     struct qemu_laio_state *s;
@@ -190,8 +204,6 @@ void *laio_init(void)
         goto out_close_efd;
     }
 
-    qemu_aio_set_event_notifier(&s->e, qemu_laio_completion_cb);
-
     return s;
 
 out_close_efd:
diff --git a/block/raw-aio.h b/block/raw-aio.h
index 7ad0a8a0a7..9a761eeade 100644
--- a/block/raw-aio.h
+++ b/block/raw-aio.h
@@ -37,6 +37,8 @@ void *laio_init(void);
 BlockDriverAIOCB *laio_submit(BlockDriverState *bs, void *aio_ctx, int fd,
         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
         BlockDriverCompletionFunc *cb, void *opaque, int type);
+void laio_detach_aio_context(void *s, AioContext *old_context);
+void laio_attach_aio_context(void *s, AioContext *new_context);
 #endif
 
 #ifdef _WIN32
diff --git a/block/raw-posix.c b/block/raw-posix.c
index b7f0f2624b..cce10a45a8 100644
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -307,6 +307,29 @@ static void raw_parse_flags(int bdrv_flags, int *open_flags)
     }
 }
 
+static void raw_detach_aio_context(BlockDriverState *bs)
+{
+#ifdef CONFIG_LINUX_AIO
+    BDRVRawState *s = bs->opaque;
+
+    if (s->use_aio) {
+        laio_detach_aio_context(s->aio_ctx, bdrv_get_aio_context(bs));
+    }
+#endif
+}
+
+static void raw_attach_aio_context(BlockDriverState *bs,
+                                   AioContext *new_context)
+{
+#ifdef CONFIG_LINUX_AIO
+    BDRVRawState *s = bs->opaque;
+
+    if (s->use_aio) {
+        laio_attach_aio_context(s->aio_ctx, new_context);
+    }
+#endif
+}
+
 #ifdef CONFIG_LINUX_AIO
 static int raw_set_aio(void **aio_ctx, int *use_aio, int bdrv_flags)
 {
@@ -447,6 +470,8 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
     }
 #endif
 
+    raw_attach_aio_context(bs, bdrv_get_aio_context(bs));
+
     ret = 0;
 fail:
     if (filename && (bdrv_flags & BDRV_O_TEMPORARY)) {
@@ -1059,6 +1084,9 @@ static BlockDriverAIOCB *raw_aio_flush(BlockDriverState *bs,
 static void raw_close(BlockDriverState *bs)
 {
     BDRVRawState *s = bs->opaque;
+
+    raw_detach_aio_context(bs);
+
     if (s->fd >= 0) {
         qemu_close(s->fd);
         s->fd = -1;
@@ -1478,6 +1506,9 @@ static BlockDriver bdrv_file = {
     .bdrv_get_allocated_file_size
                         = raw_get_allocated_file_size,
 
+    .bdrv_detach_aio_context = raw_detach_aio_context,
+    .bdrv_attach_aio_context = raw_attach_aio_context,
+
     .create_options = raw_create_options,
 };
 
@@ -1878,6 +1909,9 @@ static BlockDriver bdrv_host_device = {
     .bdrv_get_allocated_file_size
                         = raw_get_allocated_file_size,
 
+    .bdrv_detach_aio_context = raw_detach_aio_context,
+    .bdrv_attach_aio_context = raw_attach_aio_context,
+
     /* generic scsi device */
 #ifdef __linux__
     .bdrv_ioctl         = hdev_ioctl,
@@ -2020,6 +2054,9 @@ static BlockDriver bdrv_host_floppy = {
     .bdrv_get_allocated_file_size
                         = raw_get_allocated_file_size,
 
+    .bdrv_detach_aio_context = raw_detach_aio_context,
+    .bdrv_attach_aio_context = raw_attach_aio_context,
+
     /* removable device support */
     .bdrv_is_inserted   = floppy_is_inserted,
     .bdrv_media_changed = floppy_media_changed,
@@ -2145,6 +2182,9 @@ static BlockDriver bdrv_host_cdrom = {
     .bdrv_get_allocated_file_size
                         = raw_get_allocated_file_size,
 
+    .bdrv_detach_aio_context = raw_detach_aio_context,
+    .bdrv_attach_aio_context = raw_attach_aio_context,
+
     /* removable device support */
     .bdrv_is_inserted   = cdrom_is_inserted,
     .bdrv_eject         = cdrom_eject,
@@ -2276,6 +2316,9 @@ static BlockDriver bdrv_host_cdrom = {
     .bdrv_get_allocated_file_size
                         = raw_get_allocated_file_size,
 
+    .bdrv_detach_aio_context = raw_detach_aio_context,
+    .bdrv_attach_aio_context = raw_attach_aio_context,
+
     /* removable device support */
     .bdrv_is_inserted   = cdrom_is_inserted,
     .bdrv_eject         = cdrom_eject,
-- 
cgit v1.2.1


From abd269b7cf1f084a067731acb8f3272c193cb5f0 Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@redhat.com>
Date: Thu, 8 May 2014 16:34:48 +0200
Subject: block/linux-aio: fix memory and fd leak

Hot unplugging -drive aio=native,file=test.img,format=raw images leaves
the Linux AIO event notifier and struct qemu_laio_state allocated.
Luckily nothing will use the event notifier after the BlockDriverState
has been closed so the handler function is never called.

It's still worth fixing this resource leak.

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
 block/linux-aio.c | 8 ++++++++
 block/raw-aio.h   | 1 +
 block/raw-posix.c | 5 +++++
 3 files changed, 14 insertions(+)

diff --git a/block/linux-aio.c b/block/linux-aio.c
index 7ff3897fec..f0a2c087b2 100644
--- a/block/linux-aio.c
+++ b/block/linux-aio.c
@@ -212,3 +212,11 @@ out_free_state:
     g_free(s);
     return NULL;
 }
+
+void laio_cleanup(void *s_)
+{
+    struct qemu_laio_state *s = s_;
+
+    event_notifier_cleanup(&s->e);
+    g_free(s);
+}
diff --git a/block/raw-aio.h b/block/raw-aio.h
index 9a761eeade..55e0ccc6ed 100644
--- a/block/raw-aio.h
+++ b/block/raw-aio.h
@@ -34,6 +34,7 @@
 /* linux-aio.c - Linux native implementation */
 #ifdef CONFIG_LINUX_AIO
 void *laio_init(void);
+void laio_cleanup(void *s);
 BlockDriverAIOCB *laio_submit(BlockDriverState *bs, void *aio_ctx, int fd,
         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
         BlockDriverCompletionFunc *cb, void *opaque, int type);
diff --git a/block/raw-posix.c b/block/raw-posix.c
index cce10a45a8..ffdb1763f8 100644
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -1087,6 +1087,11 @@ static void raw_close(BlockDriverState *bs)
 
     raw_detach_aio_context(bs);
 
+#ifdef CONFIG_LINUX_AIO
+    if (s->use_aio) {
+        laio_cleanup(s->aio_ctx);
+    }
+#endif
     if (s->fd >= 0) {
         qemu_close(s->fd);
         s->fd = -1;
-- 
cgit v1.2.1


From 99cc598924d9686e497fa9ddd7889d6be27f3084 Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@redhat.com>
Date: Thu, 8 May 2014 16:34:49 +0200
Subject: block/raw-win32: create one QEMUWin32AIOState per BDRVRawState

Each QEMUWin32AIOState event notifier is associated with an AioContext.
Since BlockDriverState instances can use different AioContexts we cannot
continue to use a global QEMUWin32AIOState.

Let each BDRVRawState have its own QEMUWin32AIOState and free it when
BDRVRawState is closed.

Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
 block/raw-aio.h   |  1 +
 block/raw-win32.c | 29 ++++++++++++++++-------------
 block/win32-aio.c |  8 ++++++++
 3 files changed, 25 insertions(+), 13 deletions(-)

diff --git a/block/raw-aio.h b/block/raw-aio.h
index 55e0ccc6ed..6269f3df86 100644
--- a/block/raw-aio.h
+++ b/block/raw-aio.h
@@ -45,6 +45,7 @@ void laio_attach_aio_context(void *s, AioContext *new_context);
 #ifdef _WIN32
 typedef struct QEMUWin32AIOState QEMUWin32AIOState;
 QEMUWin32AIOState *win32_aio_init(void);
+void win32_aio_cleanup(QEMUWin32AIOState *aio);
 int win32_aio_attach(QEMUWin32AIOState *aio, HANDLE hfile);
 BlockDriverAIOCB *win32_aio_submit(BlockDriverState *bs,
         QEMUWin32AIOState *aio, HANDLE hfile,
diff --git a/block/raw-win32.c b/block/raw-win32.c
index 064ea3123c..cf1e9cec4c 100644
--- a/block/raw-win32.c
+++ b/block/raw-win32.c
@@ -36,8 +36,6 @@
 #define FTYPE_CD     1
 #define FTYPE_HARDDISK 2
 
-static QEMUWin32AIOState *aio;
-
 typedef struct RawWin32AIOData {
     BlockDriverState *bs;
     HANDLE hfile;
@@ -300,15 +298,6 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags,
 
     raw_parse_flags(flags, &access_flags, &overlapped);
 
-    if ((flags & BDRV_O_NATIVE_AIO) && aio == NULL) {
-        aio = win32_aio_init();
-        if (aio == NULL) {
-            error_setg(errp, "Could not initialize AIO");
-            ret = -EINVAL;
-            goto fail;
-        }
-    }
-
     if (filename[0] && filename[1] == ':') {
         snprintf(s->drive_path, sizeof(s->drive_path), "%c:\\", filename[0]);
     } else if (filename[0] == '\\' && filename[1] == '\\') {
@@ -335,13 +324,21 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags,
     }
 
     if (flags & BDRV_O_NATIVE_AIO) {
-        ret = win32_aio_attach(aio, s->hfile);
+        s->aio = win32_aio_init();
+        if (s->aio == NULL) {
+            CloseHandle(s->hfile);
+            error_setg(errp, "Could not initialize AIO");
+            ret = -EINVAL;
+            goto fail;
+        }
+
+        ret = win32_aio_attach(s->aio, s->hfile);
         if (ret < 0) {
+            win32_aio_cleanup(s->aio);
             CloseHandle(s->hfile);
             error_setg_errno(errp, -ret, "Could not enable AIO");
             goto fail;
         }
-        s->aio = aio;
     }
 
     raw_probe_alignment(bs);
@@ -389,6 +386,12 @@ static BlockDriverAIOCB *raw_aio_flush(BlockDriverState *bs,
 static void raw_close(BlockDriverState *bs)
 {
     BDRVRawState *s = bs->opaque;
+
+    if (s->aio) {
+        win32_aio_cleanup(s->aio);
+        s->aio = NULL;
+    }
+
     CloseHandle(s->hfile);
     if (bs->open_flags & BDRV_O_TEMPORARY) {
         unlink(bs->filename);
diff --git a/block/win32-aio.c b/block/win32-aio.c
index 5d1d199b61..b43b1667ee 100644
--- a/block/win32-aio.c
+++ b/block/win32-aio.c
@@ -204,3 +204,11 @@ out_free_state:
     g_free(s);
     return NULL;
 }
+
+void win32_aio_cleanup(QEMUWin32AIOState *aio)
+{
+    qemu_aio_set_event_notifier(&aio->e, NULL);
+    CloseHandle(aio->hIOCP);
+    event_notifier_cleanup(&aio->e);
+    g_free(aio);
+}
-- 
cgit v1.2.1


From 85ebd381fdae2d551cd6a0ffff47ab08734edf12 Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@redhat.com>
Date: Thu, 8 May 2014 16:34:50 +0200
Subject: block/raw-win32: implement .bdrv_detach/attach_aio_context()

Drop the assumption that we're using the main AioContext for raw-win32.
Convert the aio-win32 code to support detach/attach and replace
qemu_aio_wait() with aio_poll().

The .bdrv_detach/attach_aio_context() interfaces move the aio-win32
event notifier from the old to the new AioContext.

Cc: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
 block/raw-aio.h   |  4 ++++
 block/raw-win32.c | 25 +++++++++++++++++++++++++
 block/win32-aio.c | 21 +++++++++++++++++----
 3 files changed, 46 insertions(+), 4 deletions(-)

diff --git a/block/raw-aio.h b/block/raw-aio.h
index 6269f3df86..8cf084eeb5 100644
--- a/block/raw-aio.h
+++ b/block/raw-aio.h
@@ -51,6 +51,10 @@ BlockDriverAIOCB *win32_aio_submit(BlockDriverState *bs,
         QEMUWin32AIOState *aio, HANDLE hfile,
         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
         BlockDriverCompletionFunc *cb, void *opaque, int type);
+void win32_aio_detach_aio_context(QEMUWin32AIOState *aio,
+                                  AioContext *old_context);
+void win32_aio_attach_aio_context(QEMUWin32AIOState *aio,
+                                  AioContext *new_context);
 #endif
 
 #endif /* QEMU_RAW_AIO_H */
diff --git a/block/raw-win32.c b/block/raw-win32.c
index cf1e9cec4c..324e8187f5 100644
--- a/block/raw-win32.c
+++ b/block/raw-win32.c
@@ -200,6 +200,25 @@ static int set_sparse(int fd)
 				 NULL, 0, NULL, 0, &returned, NULL);
 }
 
+static void raw_detach_aio_context(BlockDriverState *bs)
+{
+    BDRVRawState *s = bs->opaque;
+
+    if (s->aio) {
+        win32_aio_detach_aio_context(s->aio, bdrv_get_aio_context(bs));
+    }
+}
+
+static void raw_attach_aio_context(BlockDriverState *bs,
+                                   AioContext *new_context)
+{
+    BDRVRawState *s = bs->opaque;
+
+    if (s->aio) {
+        win32_aio_attach_aio_context(s->aio, new_context);
+    }
+}
+
 static void raw_probe_alignment(BlockDriverState *bs)
 {
     BDRVRawState *s = bs->opaque;
@@ -339,6 +358,8 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags,
             error_setg_errno(errp, -ret, "Could not enable AIO");
             goto fail;
         }
+
+        win32_aio_attach_aio_context(s->aio, bdrv_get_aio_context(bs));
     }
 
     raw_probe_alignment(bs);
@@ -388,6 +409,7 @@ static void raw_close(BlockDriverState *bs)
     BDRVRawState *s = bs->opaque;
 
     if (s->aio) {
+        win32_aio_detach_aio_context(s->aio, bdrv_get_aio_context(bs));
         win32_aio_cleanup(s->aio);
         s->aio = NULL;
     }
@@ -687,6 +709,9 @@ static BlockDriver bdrv_host_device = {
     .bdrv_aio_writev    = raw_aio_writev,
     .bdrv_aio_flush     = raw_aio_flush,
 
+    .bdrv_detach_aio_context = raw_detach_aio_context,
+    .bdrv_attach_aio_context = raw_attach_aio_context,
+
     .bdrv_getlength      = raw_getlength,
     .has_variable_length = true,
 
diff --git a/block/win32-aio.c b/block/win32-aio.c
index b43b1667ee..8e417f70ae 100644
--- a/block/win32-aio.c
+++ b/block/win32-aio.c
@@ -40,6 +40,7 @@ struct QEMUWin32AIOState {
     HANDLE hIOCP;
     EventNotifier e;
     int count;
+    bool is_aio_context_attached;
 };
 
 typedef struct QEMUWin32AIOCB {
@@ -114,7 +115,7 @@ static void win32_aio_cancel(BlockDriverAIOCB *blockacb)
      * wait for completion.
      */
     while (!HasOverlappedIoCompleted(&waiocb->ov)) {
-        qemu_aio_wait();
+        aio_poll(bdrv_get_aio_context(blockacb->bs), true);
     }
 }
 
@@ -180,6 +181,20 @@ int win32_aio_attach(QEMUWin32AIOState *aio, HANDLE hfile)
     }
 }
 
+void win32_aio_detach_aio_context(QEMUWin32AIOState *aio,
+                                  AioContext *old_context)
+{
+    aio_set_event_notifier(old_context, &aio->e, NULL);
+    aio->is_aio_context_attached = false;
+}
+
+void win32_aio_attach_aio_context(QEMUWin32AIOState *aio,
+                                  AioContext *new_context)
+{
+    aio->is_aio_context_attached = true;
+    aio_set_event_notifier(new_context, &aio->e, win32_aio_completion_cb);
+}
+
 QEMUWin32AIOState *win32_aio_init(void)
 {
     QEMUWin32AIOState *s;
@@ -194,8 +209,6 @@ QEMUWin32AIOState *win32_aio_init(void)
         goto out_close_efd;
     }
 
-    qemu_aio_set_event_notifier(&s->e, win32_aio_completion_cb);
-
     return s;
 
 out_close_efd:
@@ -207,7 +220,7 @@ out_free_state:
 
 void win32_aio_cleanup(QEMUWin32AIOState *aio)
 {
-    qemu_aio_set_event_notifier(&aio->e, NULL);
+    assert(!aio->is_aio_context_attached);
     CloseHandle(aio->hIOCP);
     event_notifier_cleanup(&aio->e);
     g_free(aio);
-- 
cgit v1.2.1


From ea8001914924331549e9b876bcaeac2dc80db2b6 Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@redhat.com>
Date: Thu, 8 May 2014 16:34:51 +0200
Subject: rbd: use BlockDriverState's AioContext

Drop the assumption that we're using the main AioContext.  Convert
qemu_bh_new() to aio_bh_new() and qemu_aio_wait() to aio_poll().

The .bdrv_detach_aio_context() and .bdrv_attach_aio_context() interfaces
are not needed since no fd handlers, timers, or BHs stay registered when
requests have been drained.

Cc: Josh Durgin <josh.durgin@inktank.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Josh Durgin <josh.durgin@inktank.com>
---
 block/rbd.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/block/rbd.c b/block/rbd.c
index 09af48426e..a78760b7fe 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -555,7 +555,7 @@ static void qemu_rbd_aio_cancel(BlockDriverAIOCB *blockacb)
     acb->cancelled = 1;
 
     while (acb->status == -EINPROGRESS) {
-        qemu_aio_wait();
+        aio_poll(bdrv_get_aio_context(acb->common.bs), true);
     }
 
     qemu_aio_release(acb);
@@ -588,7 +588,8 @@ static void rbd_finish_aiocb(rbd_completion_t c, RADOSCB *rcb)
     rcb->ret = rbd_aio_get_return_value(c);
     rbd_aio_release(c);
 
-    acb->bh = qemu_bh_new(rbd_finish_bh, rcb);
+    acb->bh = aio_bh_new(bdrv_get_aio_context(acb->common.bs),
+                         rbd_finish_bh, rcb);
     qemu_bh_schedule(acb->bh);
 }
 
-- 
cgit v1.2.1


From 84390bed59f5a8ead499c4501f54c9b8a747c9d4 Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@redhat.com>
Date: Thu, 8 May 2014 16:34:52 +0200
Subject: sheepdog: implement .bdrv_detach/attach_aio_context()

Drop the assumption that we're using the main AioContext.  Convert
qemu_aio_set_fd_handler() to aio_set_fd_handler() and qemu_aio_wait() to
aio_poll().

The .bdrv_detach/attach_aio_context() interfaces also need to be
implemented to move the socket fd handler from the old to the new
AioContext.

Cc: MORITA Kazutaka <morita.kazutaka@lab.ntt.co.jp>
Acked-by: Liu Yuan <namei.unix@gmail.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
 block/sheepdog.c | 118 +++++++++++++++++++++++++++++++++++++------------------
 1 file changed, 80 insertions(+), 38 deletions(-)

diff --git a/block/sheepdog.c b/block/sheepdog.c
index 4ecbf5f498..9175cc232d 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -314,6 +314,7 @@ struct SheepdogAIOCB {
 
 typedef struct BDRVSheepdogState {
     BlockDriverState *bs;
+    AioContext *aio_context;
 
     SheepdogInode inode;
 
@@ -496,7 +497,7 @@ static void sd_aio_cancel(BlockDriverAIOCB *blockacb)
             sd_finish_aiocb(acb);
             return;
         }
-        qemu_aio_wait();
+        aio_poll(s->aio_context, true);
     }
 }
 
@@ -578,6 +579,7 @@ static void restart_co_req(void *opaque)
 
 typedef struct SheepdogReqCo {
     int sockfd;
+    AioContext *aio_context;
     SheepdogReq *hdr;
     void *data;
     unsigned int *wlen;
@@ -598,14 +600,14 @@ static coroutine_fn void do_co_req(void *opaque)
     unsigned int *rlen = srco->rlen;
 
     co = qemu_coroutine_self();
-    qemu_aio_set_fd_handler(sockfd, NULL, restart_co_req, co);
+    aio_set_fd_handler(srco->aio_context, sockfd, NULL, restart_co_req, co);
 
     ret = send_co_req(sockfd, hdr, data, wlen);
     if (ret < 0) {
         goto out;
     }
 
-    qemu_aio_set_fd_handler(sockfd, restart_co_req, NULL, co);
+    aio_set_fd_handler(srco->aio_context, sockfd, restart_co_req, NULL, co);
 
     ret = qemu_co_recv(sockfd, hdr, sizeof(*hdr));
     if (ret != sizeof(*hdr)) {
@@ -630,18 +632,19 @@ static coroutine_fn void do_co_req(void *opaque)
 out:
     /* there is at most one request for this sockfd, so it is safe to
      * set each handler to NULL. */
-    qemu_aio_set_fd_handler(sockfd, NULL, NULL, NULL);
+    aio_set_fd_handler(srco->aio_context, sockfd, NULL, NULL, NULL);
 
     srco->ret = ret;
     srco->finished = true;
 }
 
-static int do_req(int sockfd, SheepdogReq *hdr, void *data,
-                  unsigned int *wlen, unsigned int *rlen)
+static int do_req(int sockfd, AioContext *aio_context, SheepdogReq *hdr,
+                  void *data, unsigned int *wlen, unsigned int *rlen)
 {
     Coroutine *co;
     SheepdogReqCo srco = {
         .sockfd = sockfd,
+        .aio_context = aio_context,
         .hdr = hdr,
         .data = data,
         .wlen = wlen,
@@ -656,7 +659,7 @@ static int do_req(int sockfd, SheepdogReq *hdr, void *data,
         co = qemu_coroutine_create(do_co_req);
         qemu_coroutine_enter(co, &srco);
         while (!srco.finished) {
-            qemu_aio_wait();
+            aio_poll(aio_context, true);
         }
     }
 
@@ -709,7 +712,7 @@ static coroutine_fn void reconnect_to_sdog(void *opaque)
     BDRVSheepdogState *s = opaque;
     AIOReq *aio_req, *next;
 
-    qemu_aio_set_fd_handler(s->fd, NULL, NULL, NULL);
+    aio_set_fd_handler(s->aio_context, s->fd, NULL, NULL, NULL);
     close(s->fd);
     s->fd = -1;
 
@@ -922,7 +925,7 @@ static int get_sheep_fd(BDRVSheepdogState *s, Error **errp)
         return fd;
     }
 
-    qemu_aio_set_fd_handler(fd, co_read_response, NULL, s);
+    aio_set_fd_handler(s->aio_context, fd, co_read_response, NULL, s);
     return fd;
 }
 
@@ -1092,7 +1095,7 @@ static int find_vdi_name(BDRVSheepdogState *s, const char *filename,
     hdr.snapid = snapid;
     hdr.flags = SD_FLAG_CMD_WRITE;
 
-    ret = do_req(fd, (SheepdogReq *)&hdr, buf, &wlen, &rlen);
+    ret = do_req(fd, s->aio_context, (SheepdogReq *)&hdr, buf, &wlen, &rlen);
     if (ret) {
         error_setg_errno(errp, -ret, "cannot get vdi info");
         goto out;
@@ -1173,7 +1176,8 @@ static void coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
 
     qemu_co_mutex_lock(&s->lock);
     s->co_send = qemu_coroutine_self();
-    qemu_aio_set_fd_handler(s->fd, co_read_response, co_write_request, s);
+    aio_set_fd_handler(s->aio_context, s->fd,
+                       co_read_response, co_write_request, s);
     socket_set_cork(s->fd, 1);
 
     /* send a header */
@@ -1191,12 +1195,13 @@ static void coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
     }
 out:
     socket_set_cork(s->fd, 0);
-    qemu_aio_set_fd_handler(s->fd, co_read_response, NULL, s);
+    aio_set_fd_handler(s->aio_context, s->fd, co_read_response, NULL, s);
     s->co_send = NULL;
     qemu_co_mutex_unlock(&s->lock);
 }
 
-static int read_write_object(int fd, char *buf, uint64_t oid, uint8_t copies,
+static int read_write_object(int fd, AioContext *aio_context, char *buf,
+                             uint64_t oid, uint8_t copies,
                              unsigned int datalen, uint64_t offset,
                              bool write, bool create, uint32_t cache_flags)
 {
@@ -1229,7 +1234,7 @@ static int read_write_object(int fd, char *buf, uint64_t oid, uint8_t copies,
     hdr.offset = offset;
     hdr.copies = copies;
 
-    ret = do_req(fd, (SheepdogReq *)&hdr, buf, &wlen, &rlen);
+    ret = do_req(fd, aio_context, (SheepdogReq *)&hdr, buf, &wlen, &rlen);
     if (ret) {
         error_report("failed to send a request to the sheep");
         return ret;
@@ -1244,19 +1249,23 @@ static int read_write_object(int fd, char *buf, uint64_t oid, uint8_t copies,
     }
 }
 
-static int read_object(int fd, char *buf, uint64_t oid, uint8_t copies,
+static int read_object(int fd, AioContext *aio_context, char *buf,
+                       uint64_t oid, uint8_t copies,
                        unsigned int datalen, uint64_t offset,
                        uint32_t cache_flags)
 {
-    return read_write_object(fd, buf, oid, copies, datalen, offset, false,
+    return read_write_object(fd, aio_context, buf, oid, copies,
+                             datalen, offset, false,
                              false, cache_flags);
 }
 
-static int write_object(int fd, char *buf, uint64_t oid, uint8_t copies,
+static int write_object(int fd, AioContext *aio_context, char *buf,
+                        uint64_t oid, uint8_t copies,
                         unsigned int datalen, uint64_t offset, bool create,
                         uint32_t cache_flags)
 {
-    return read_write_object(fd, buf, oid, copies, datalen, offset, true,
+    return read_write_object(fd, aio_context, buf, oid, copies,
+                             datalen, offset, true,
                              create, cache_flags);
 }
 
@@ -1284,7 +1293,7 @@ static int reload_inode(BDRVSheepdogState *s, uint32_t snapid, const char *tag)
         goto out;
     }
 
-    ret = read_object(fd, (char *)inode, vid_to_vdi_oid(vid),
+    ret = read_object(fd, s->aio_context, (char *)inode, vid_to_vdi_oid(vid),
                       s->inode.nr_copies, sizeof(*inode), 0, s->cache_flags);
     if (ret < 0) {
         goto out;
@@ -1359,6 +1368,22 @@ out:
     }
 }
 
+static void sd_detach_aio_context(BlockDriverState *bs)
+{
+    BDRVSheepdogState *s = bs->opaque;
+
+    aio_set_fd_handler(s->aio_context, s->fd, NULL, NULL, NULL);
+}
+
+static void sd_attach_aio_context(BlockDriverState *bs,
+                                  AioContext *new_context)
+{
+    BDRVSheepdogState *s = bs->opaque;
+
+    s->aio_context = new_context;
+    aio_set_fd_handler(new_context, s->fd, co_read_response, NULL, s);
+}
+
 /* TODO Convert to fine grained options */
 static QemuOptsList runtime_opts = {
     .name = "sheepdog",
@@ -1387,6 +1412,7 @@ static int sd_open(BlockDriverState *bs, QDict *options, int flags,
     const char *filename;
 
     s->bs = bs;
+    s->aio_context = bdrv_get_aio_context(bs);
 
     opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
     qemu_opts_absorb_qdict(opts, options, &local_err);
@@ -1448,8 +1474,8 @@ static int sd_open(BlockDriverState *bs, QDict *options, int flags,
     }
 
     buf = g_malloc(SD_INODE_SIZE);
-    ret = read_object(fd, buf, vid_to_vdi_oid(vid), 0, SD_INODE_SIZE, 0,
-                      s->cache_flags);
+    ret = read_object(fd, s->aio_context, buf, vid_to_vdi_oid(vid),
+                      0, SD_INODE_SIZE, 0, s->cache_flags);
 
     closesocket(fd);
 
@@ -1469,7 +1495,7 @@ static int sd_open(BlockDriverState *bs, QDict *options, int flags,
     g_free(buf);
     return 0;
 out:
-    qemu_aio_set_fd_handler(s->fd, NULL, NULL, NULL);
+    aio_set_fd_handler(bdrv_get_aio_context(bs), s->fd, NULL, NULL, NULL);
     if (s->fd >= 0) {
         closesocket(s->fd);
     }
@@ -1512,7 +1538,7 @@ static int do_sd_create(BDRVSheepdogState *s, uint32_t *vdi_id, int snapshot,
     hdr.copy_policy = s->inode.copy_policy;
     hdr.copies = s->inode.nr_copies;
 
-    ret = do_req(fd, (SheepdogReq *)&hdr, buf, &wlen, &rlen);
+    ret = do_req(fd, s->aio_context, (SheepdogReq *)&hdr, buf, &wlen, &rlen);
 
     closesocket(fd);
 
@@ -1766,7 +1792,8 @@ static void sd_close(BlockDriverState *bs)
     hdr.data_length = wlen;
     hdr.flags = SD_FLAG_CMD_WRITE;
 
-    ret = do_req(fd, (SheepdogReq *)&hdr, s->name, &wlen, &rlen);
+    ret = do_req(fd, s->aio_context, (SheepdogReq *)&hdr,
+                 s->name, &wlen, &rlen);
 
     closesocket(fd);
 
@@ -1775,7 +1802,7 @@ static void sd_close(BlockDriverState *bs)
         error_report("%s, %s", sd_strerror(rsp->result), s->name);
     }
 
-    qemu_aio_set_fd_handler(s->fd, NULL, NULL, NULL);
+    aio_set_fd_handler(bdrv_get_aio_context(bs), s->fd, NULL, NULL, NULL);
     closesocket(s->fd);
     g_free(s->host_spec);
 }
@@ -1812,8 +1839,9 @@ static int sd_truncate(BlockDriverState *bs, int64_t offset)
     /* we don't need to update entire object */
     datalen = SD_INODE_SIZE - sizeof(s->inode.data_vdi_id);
     s->inode.vdi_size = offset;
-    ret = write_object(fd, (char *)&s->inode, vid_to_vdi_oid(s->inode.vdi_id),
-                       s->inode.nr_copies, datalen, 0, false, s->cache_flags);
+    ret = write_object(fd, s->aio_context, (char *)&s->inode,
+                       vid_to_vdi_oid(s->inode.vdi_id), s->inode.nr_copies,
+                       datalen, 0, false, s->cache_flags);
     close(fd);
 
     if (ret < 0) {
@@ -1882,7 +1910,8 @@ static bool sd_delete(BDRVSheepdogState *s)
         return false;
     }
 
-    ret = do_req(fd, (SheepdogReq *)&hdr, s->name, &wlen, &rlen);
+    ret = do_req(fd, s->aio_context, (SheepdogReq *)&hdr,
+                 s->name, &wlen, &rlen);
     closesocket(fd);
     if (ret) {
         return false;
@@ -1939,8 +1968,8 @@ static int sd_create_branch(BDRVSheepdogState *s)
         goto out;
     }
 
-    ret = read_object(fd, buf, vid_to_vdi_oid(vid), s->inode.nr_copies,
-                      SD_INODE_SIZE, 0, s->cache_flags);
+    ret = read_object(fd, s->aio_context, buf, vid_to_vdi_oid(vid),
+                      s->inode.nr_copies, SD_INODE_SIZE, 0, s->cache_flags);
 
     closesocket(fd);
 
@@ -2187,8 +2216,9 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
         goto cleanup;
     }
 
-    ret = write_object(fd, (char *)&s->inode, vid_to_vdi_oid(s->inode.vdi_id),
-                       s->inode.nr_copies, datalen, 0, false, s->cache_flags);
+    ret = write_object(fd, s->aio_context, (char *)&s->inode,
+                       vid_to_vdi_oid(s->inode.vdi_id), s->inode.nr_copies,
+                       datalen, 0, false, s->cache_flags);
     if (ret < 0) {
         error_report("failed to write snapshot's inode.");
         goto cleanup;
@@ -2203,8 +2233,9 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
         goto cleanup;
     }
 
-    ret = read_object(fd, (char *)inode, vid_to_vdi_oid(new_vid),
-                      s->inode.nr_copies, datalen, 0, s->cache_flags);
+    ret = read_object(fd, s->aio_context, (char *)inode,
+                      vid_to_vdi_oid(new_vid), s->inode.nr_copies, datalen, 0,
+                      s->cache_flags);
 
     if (ret < 0) {
         error_report("failed to read new inode info. %s", strerror(errno));
@@ -2311,7 +2342,8 @@ static int sd_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab)
     req.opcode = SD_OP_READ_VDIS;
     req.data_length = max;
 
-    ret = do_req(fd, (SheepdogReq *)&req, vdi_inuse, &wlen, &rlen);
+    ret = do_req(fd, s->aio_context, (SheepdogReq *)&req,
+                 vdi_inuse, &wlen, &rlen);
 
     closesocket(fd);
     if (ret) {
@@ -2338,7 +2370,8 @@ static int sd_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab)
         }
 
         /* we don't need to read entire object */
-        ret = read_object(fd, (char *)&inode, vid_to_vdi_oid(vid),
+        ret = read_object(fd, s->aio_context, (char *)&inode,
+                          vid_to_vdi_oid(vid),
                           0, SD_INODE_SIZE - sizeof(inode.data_vdi_id), 0,
                           s->cache_flags);
 
@@ -2403,11 +2436,11 @@ static int do_load_save_vmstate(BDRVSheepdogState *s, uint8_t *data,
 
         create = (offset == 0);
         if (load) {
-            ret = read_object(fd, (char *)data, vmstate_oid,
+            ret = read_object(fd, s->aio_context, (char *)data, vmstate_oid,
                               s->inode.nr_copies, data_len, offset,
                               s->cache_flags);
         } else {
-            ret = write_object(fd, (char *)data, vmstate_oid,
+            ret = write_object(fd, s->aio_context, (char *)data, vmstate_oid,
                                s->inode.nr_copies, data_len, offset, create,
                                s->cache_flags);
         }
@@ -2580,6 +2613,9 @@ static BlockDriver bdrv_sheepdog = {
     .bdrv_save_vmstate  = sd_save_vmstate,
     .bdrv_load_vmstate  = sd_load_vmstate,
 
+    .bdrv_detach_aio_context = sd_detach_aio_context,
+    .bdrv_attach_aio_context = sd_attach_aio_context,
+
     .create_options = sd_create_options,
 };
 
@@ -2610,6 +2646,9 @@ static BlockDriver bdrv_sheepdog_tcp = {
     .bdrv_save_vmstate  = sd_save_vmstate,
     .bdrv_load_vmstate  = sd_load_vmstate,
 
+    .bdrv_detach_aio_context = sd_detach_aio_context,
+    .bdrv_attach_aio_context = sd_attach_aio_context,
+
     .create_options = sd_create_options,
 };
 
@@ -2640,6 +2679,9 @@ static BlockDriver bdrv_sheepdog_unix = {
     .bdrv_save_vmstate  = sd_save_vmstate,
     .bdrv_load_vmstate  = sd_load_vmstate,
 
+    .bdrv_detach_aio_context = sd_detach_aio_context,
+    .bdrv_attach_aio_context = sd_attach_aio_context,
+
     .create_options = sd_create_options,
 };
 
-- 
cgit v1.2.1


From 2af0b2005669ee3572a19499c51a499114a8ec50 Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@redhat.com>
Date: Thu, 8 May 2014 16:34:53 +0200
Subject: ssh: use BlockDriverState's AioContext

Drop the assumption that we're using the main AioContext.  Use
bdrv_get_aio_context() to register fd handlers in the right AioContext
for this BlockDriverState.

The .bdrv_detach_aio_context() and .bdrv_attach_aio_context() interfaces
are not needed since no fd handlers, timers, or BHs stay registered when
requests have been drained.

For now this doesn't make much difference but will allow ssh to work in
IOThread instances in the future.

Acked-by: Richard W.M. Jones <rjones@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
 block/ssh.c | 36 +++++++++++++++++++-----------------
 1 file changed, 19 insertions(+), 17 deletions(-)

diff --git a/block/ssh.c b/block/ssh.c
index b2129714bc..9779eac2bd 100644
--- a/block/ssh.c
+++ b/block/ssh.c
@@ -773,7 +773,7 @@ static void restart_coroutine(void *opaque)
     qemu_coroutine_enter(co, NULL);
 }
 
-static coroutine_fn void set_fd_handler(BDRVSSHState *s)
+static coroutine_fn void set_fd_handler(BDRVSSHState *s, BlockDriverState *bs)
 {
     int r;
     IOHandler *rd_handler = NULL, *wr_handler = NULL;
@@ -791,24 +791,26 @@ static coroutine_fn void set_fd_handler(BDRVSSHState *s)
     DPRINTF("s->sock=%d rd_handler=%p wr_handler=%p", s->sock,
             rd_handler, wr_handler);
 
-    qemu_aio_set_fd_handler(s->sock, rd_handler, wr_handler, co);
+    aio_set_fd_handler(bdrv_get_aio_context(bs), s->sock,
+                       rd_handler, wr_handler, co);
 }
 
-static coroutine_fn void clear_fd_handler(BDRVSSHState *s)
+static coroutine_fn void clear_fd_handler(BDRVSSHState *s,
+                                          BlockDriverState *bs)
 {
     DPRINTF("s->sock=%d", s->sock);
-    qemu_aio_set_fd_handler(s->sock, NULL, NULL, NULL);
+    aio_set_fd_handler(bdrv_get_aio_context(bs), s->sock, NULL, NULL, NULL);
 }
 
 /* A non-blocking call returned EAGAIN, so yield, ensuring the
  * handlers are set up so that we'll be rescheduled when there is an
  * interesting event on the socket.
  */
-static coroutine_fn void co_yield(BDRVSSHState *s)
+static coroutine_fn void co_yield(BDRVSSHState *s, BlockDriverState *bs)
 {
-    set_fd_handler(s);
+    set_fd_handler(s, bs);
     qemu_coroutine_yield();
-    clear_fd_handler(s);
+    clear_fd_handler(s, bs);
 }
 
 /* SFTP has a function `libssh2_sftp_seek64' which seeks to a position
@@ -838,7 +840,7 @@ static void ssh_seek(BDRVSSHState *s, int64_t offset, int flags)
     }
 }
 
-static coroutine_fn int ssh_read(BDRVSSHState *s,
+static coroutine_fn int ssh_read(BDRVSSHState *s, BlockDriverState *bs,
                                  int64_t offset, size_t size,
                                  QEMUIOVector *qiov)
 {
@@ -871,7 +873,7 @@ static coroutine_fn int ssh_read(BDRVSSHState *s,
         DPRINTF("sftp_read returned %zd", r);
 
         if (r == LIBSSH2_ERROR_EAGAIN || r == LIBSSH2_ERROR_TIMEOUT) {
-            co_yield(s);
+            co_yield(s, bs);
             goto again;
         }
         if (r < 0) {
@@ -906,14 +908,14 @@ static coroutine_fn int ssh_co_readv(BlockDriverState *bs,
     int ret;
 
     qemu_co_mutex_lock(&s->lock);
-    ret = ssh_read(s, sector_num * BDRV_SECTOR_SIZE,
+    ret = ssh_read(s, bs, sector_num * BDRV_SECTOR_SIZE,
                    nb_sectors * BDRV_SECTOR_SIZE, qiov);
     qemu_co_mutex_unlock(&s->lock);
 
     return ret;
 }
 
-static int ssh_write(BDRVSSHState *s,
+static int ssh_write(BDRVSSHState *s, BlockDriverState *bs,
                      int64_t offset, size_t size,
                      QEMUIOVector *qiov)
 {
@@ -941,7 +943,7 @@ static int ssh_write(BDRVSSHState *s,
         DPRINTF("sftp_write returned %zd", r);
 
         if (r == LIBSSH2_ERROR_EAGAIN || r == LIBSSH2_ERROR_TIMEOUT) {
-            co_yield(s);
+            co_yield(s, bs);
             goto again;
         }
         if (r < 0) {
@@ -960,7 +962,7 @@ static int ssh_write(BDRVSSHState *s,
          */
         if (r == 0) {
             ssh_seek(s, offset + written, SSH_SEEK_WRITE|SSH_SEEK_FORCE);
-            co_yield(s);
+            co_yield(s, bs);
             goto again;
         }
 
@@ -988,7 +990,7 @@ static coroutine_fn int ssh_co_writev(BlockDriverState *bs,
     int ret;
 
     qemu_co_mutex_lock(&s->lock);
-    ret = ssh_write(s, sector_num * BDRV_SECTOR_SIZE,
+    ret = ssh_write(s, bs, sector_num * BDRV_SECTOR_SIZE,
                     nb_sectors * BDRV_SECTOR_SIZE, qiov);
     qemu_co_mutex_unlock(&s->lock);
 
@@ -1009,7 +1011,7 @@ static void unsafe_flush_warning(BDRVSSHState *s, const char *what)
 
 #ifdef HAS_LIBSSH2_SFTP_FSYNC
 
-static coroutine_fn int ssh_flush(BDRVSSHState *s)
+static coroutine_fn int ssh_flush(BDRVSSHState *s, BlockDriverState *bs)
 {
     int r;
 
@@ -1017,7 +1019,7 @@ static coroutine_fn int ssh_flush(BDRVSSHState *s)
  again:
     r = libssh2_sftp_fsync(s->sftp_handle);
     if (r == LIBSSH2_ERROR_EAGAIN || r == LIBSSH2_ERROR_TIMEOUT) {
-        co_yield(s);
+        co_yield(s, bs);
         goto again;
     }
     if (r == LIBSSH2_ERROR_SFTP_PROTOCOL &&
@@ -1039,7 +1041,7 @@ static coroutine_fn int ssh_co_flush(BlockDriverState *bs)
     int ret;
 
     qemu_co_mutex_lock(&s->lock);
-    ret = ssh_flush(s);
+    ret = ssh_flush(s, bs);
     qemu_co_mutex_unlock(&s->lock);
 
     return ret;
-- 
cgit v1.2.1


From c75f3bdf46f74c9f1c199ace3d2b291e430dcda3 Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@redhat.com>
Date: Thu, 8 May 2014 16:34:54 +0200
Subject: vmdk: implement .bdrv_detach/attach_aio_context()

Implement .bdrv_detach/attach_aio_context() interfaces to propagate
detach/attach to BDRVVmdkState->extents[].file.  The block layer takes
care of ->file and ->backing_hd but doesn't know about our extents
BlockDriverStates, which is also part of the graph.

Reviewed-by: Fam Zheng <famz@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
 block/vmdk.c | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/block/vmdk.c b/block/vmdk.c
index 2b38f61fcd..b8a476278a 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -2096,6 +2096,27 @@ static int vmdk_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
     return 0;
 }
 
+static void vmdk_detach_aio_context(BlockDriverState *bs)
+{
+    BDRVVmdkState *s = bs->opaque;
+    int i;
+
+    for (i = 0; i < s->num_extents; i++) {
+        bdrv_detach_aio_context(s->extents[i].file);
+    }
+}
+
+static void vmdk_attach_aio_context(BlockDriverState *bs,
+                                    AioContext *new_context)
+{
+    BDRVVmdkState *s = bs->opaque;
+    int i;
+
+    for (i = 0; i < s->num_extents; i++) {
+        bdrv_attach_aio_context(s->extents[i].file, new_context);
+    }
+}
+
 static QEMUOptionParameter vmdk_create_options[] = {
     {
         .name = BLOCK_OPT_SIZE,
@@ -2153,6 +2174,8 @@ static BlockDriver bdrv_vmdk = {
     .bdrv_get_specific_info       = vmdk_get_specific_info,
     .bdrv_refresh_limits          = vmdk_refresh_limits,
     .bdrv_get_info                = vmdk_get_info,
+    .bdrv_detach_aio_context      = vmdk_detach_aio_context,
+    .bdrv_attach_aio_context      = vmdk_attach_aio_context,
 
     .create_options               = vmdk_create_options,
 };
-- 
cgit v1.2.1


From 580b6b2aa2fea42ee42921ef5812b40224ab1b49 Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@redhat.com>
Date: Thu, 8 May 2014 16:34:55 +0200
Subject: dataplane: use the QEMU block layer for I/O

Stop using a custom Linux AIO request queue from ioq.h and instead use
the QEMU block layer for I/O.

This patch adjusts the VirtIOBlockRequest struct with fields needed for
bdrv_aio_readv()/bdrv_aio_writev().  ioq.h used struct iovec and struct
iocb, which we don't need directly anymore.

Modify dataplane start/stop to set the AioContext on the
BlockDriverState.  We also no longer need to get the raw-posix file
descriptor.  This means image formats are now supported with dataplane!

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
 hw/block/dataplane/virtio-blk.c | 193 ++++++++++++++--------------------------
 1 file changed, 65 insertions(+), 128 deletions(-)

diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
index e49c2536b1..75616da521 100644
--- a/hw/block/dataplane/virtio-blk.c
+++ b/hw/block/dataplane/virtio-blk.c
@@ -17,7 +17,6 @@
 #include "qemu/thread.h"
 #include "qemu/error-report.h"
 #include "hw/virtio/dataplane/vring.h"
-#include "ioq.h"
 #include "block/block.h"
 #include "hw/virtio/virtio-blk.h"
 #include "virtio-blk.h"
@@ -25,20 +24,14 @@
 #include "hw/virtio/virtio-bus.h"
 #include "qom/object_interfaces.h"
 
-enum {
-    SEG_MAX = 126,                  /* maximum number of I/O segments */
-    VRING_MAX = SEG_MAX + 2,        /* maximum number of vring descriptors */
-    REQ_MAX = VRING_MAX,            /* maximum number of requests in the vring,
-                                     * is VRING_MAX / 2 with traditional and
-                                     * VRING_MAX with indirect descriptors */
-};
-
 typedef struct {
-    struct iocb iocb;               /* Linux AIO control block */
+    VirtIOBlockDataPlane *s;
     QEMUIOVector *inhdr;            /* iovecs for virtio_blk_inhdr */
     VirtQueueElement *elem;         /* saved data from the virtqueue */
-    struct iovec *bounce_iov;       /* used if guest buffers are unaligned */
-    QEMUIOVector *read_qiov;        /* for read completion /w bounce buffer */
+    QEMUIOVector qiov;              /* original request iovecs */
+    struct iovec bounce_iov;        /* used if guest buffers are unaligned */
+    QEMUIOVector bounce_qiov;       /* bounce buffer iovecs */
+    bool read;                      /* read or write? */
 } VirtIOBlockRequest;
 
 struct VirtIOBlockDataPlane {
@@ -61,16 +54,8 @@ struct VirtIOBlockDataPlane {
     IOThread *iothread;
     IOThread internal_iothread_obj;
     AioContext *ctx;
-    EventNotifier io_notifier;      /* Linux AIO completion */
     EventNotifier host_notifier;    /* doorbell */
 
-    IOQueue ioqueue;                /* Linux AIO queue (should really be per
-                                       IOThread) */
-    VirtIOBlockRequest requests[REQ_MAX]; /* pool of requests, managed by the
-                                             queue */
-
-    unsigned int num_reqs;
-
     /* Operation blocker on BDS */
     Error *blocker;
 };
@@ -85,33 +70,28 @@ static void notify_guest(VirtIOBlockDataPlane *s)
     event_notifier_set(s->guest_notifier);
 }
 
-static void complete_request(struct iocb *iocb, ssize_t ret, void *opaque)
+static void complete_rdwr(void *opaque, int ret)
 {
-    VirtIOBlockDataPlane *s = opaque;
-    VirtIOBlockRequest *req = container_of(iocb, VirtIOBlockRequest, iocb);
+    VirtIOBlockRequest *req = opaque;
     struct virtio_blk_inhdr hdr;
     int len;
 
-    if (likely(ret >= 0)) {
+    if (likely(ret == 0)) {
         hdr.status = VIRTIO_BLK_S_OK;
-        len = ret;
+        len = req->qiov.size;
     } else {
         hdr.status = VIRTIO_BLK_S_IOERR;
         len = 0;
     }
 
-    trace_virtio_blk_data_plane_complete_request(s, req->elem->index, ret);
+    trace_virtio_blk_data_plane_complete_request(req->s, req->elem->index, ret);
 
-    if (req->read_qiov) {
-        assert(req->bounce_iov);
-        qemu_iovec_from_buf(req->read_qiov, 0, req->bounce_iov->iov_base, len);
-        qemu_iovec_destroy(req->read_qiov);
-        g_slice_free(QEMUIOVector, req->read_qiov);
+    if (req->read && req->bounce_iov.iov_base) {
+        qemu_iovec_from_buf(&req->qiov, 0, req->bounce_iov.iov_base, len);
     }
 
-    if (req->bounce_iov) {
-        qemu_vfree(req->bounce_iov->iov_base);
-        g_slice_free(struct iovec, req->bounce_iov);
+    if (req->bounce_iov.iov_base) {
+        qemu_vfree(req->bounce_iov.iov_base);
     }
 
     qemu_iovec_from_buf(req->inhdr, 0, &hdr, sizeof(hdr));
@@ -122,9 +102,9 @@ static void complete_request(struct iocb *iocb, ssize_t ret, void *opaque)
      * written to, but for virtio-blk it seems to be the number of bytes
      * transferred plus the status bytes.
      */
-    vring_push(&s->vring, req->elem, len + sizeof(hdr));
-    req->elem = NULL;
-    s->num_reqs--;
+    vring_push(&req->s->vring, req->elem, len + sizeof(hdr));
+    notify_guest(req->s);
+    g_slice_free(VirtIOBlockRequest, req);
 }
 
 static void complete_request_early(VirtIOBlockDataPlane *s, VirtQueueElement *elem,
@@ -155,51 +135,52 @@ static void do_get_id_cmd(VirtIOBlockDataPlane *s,
     complete_request_early(s, elem, inhdr, VIRTIO_BLK_S_OK);
 }
 
-static int do_rdwr_cmd(VirtIOBlockDataPlane *s, bool read,
-                       struct iovec *iov, unsigned iov_cnt,
-                       long long offset, VirtQueueElement *elem,
-                       QEMUIOVector *inhdr)
+static void do_rdwr_cmd(VirtIOBlockDataPlane *s, bool read,
+                        struct iovec *iov, unsigned iov_cnt,
+                        int64_t sector_num, VirtQueueElement *elem,
+                        QEMUIOVector *inhdr)
 {
-    struct iocb *iocb;
-    QEMUIOVector qiov;
-    struct iovec *bounce_iov = NULL;
-    QEMUIOVector *read_qiov = NULL;
-
-    qemu_iovec_init_external(&qiov, iov, iov_cnt);
-    if (!bdrv_qiov_is_aligned(s->blk->conf.bs, &qiov)) {
-        void *bounce_buffer = qemu_blockalign(s->blk->conf.bs, qiov.size);
-
-        if (read) {
-            /* Need to copy back from bounce buffer on completion */
-            read_qiov = g_slice_new(QEMUIOVector);
-            qemu_iovec_init(read_qiov, iov_cnt);
-            qemu_iovec_concat_iov(read_qiov, iov, iov_cnt, 0, qiov.size);
-        } else {
-            qemu_iovec_to_buf(&qiov, 0, bounce_buffer, qiov.size);
+    VirtIOBlockRequest *req = g_slice_new0(VirtIOBlockRequest);
+    QEMUIOVector *qiov;
+    int nb_sectors;
+
+    /* Fill in virtio block metadata needed for completion */
+    req->s = s;
+    req->elem = elem;
+    req->inhdr = inhdr;
+    req->read = read;
+    qemu_iovec_init_external(&req->qiov, iov, iov_cnt);
+
+    qiov = &req->qiov;
+
+    if (!bdrv_qiov_is_aligned(s->blk->conf.bs, qiov)) {
+        void *bounce_buffer = qemu_blockalign(s->blk->conf.bs, qiov->size);
+
+        /* Populate bounce buffer with data for writes */
+        if (!read) {
+            qemu_iovec_to_buf(qiov, 0, bounce_buffer, qiov->size);
         }
 
         /* Redirect I/O to aligned bounce buffer */
-        bounce_iov = g_slice_new(struct iovec);
-        bounce_iov->iov_base = bounce_buffer;
-        bounce_iov->iov_len = qiov.size;
-        iov = bounce_iov;
-        iov_cnt = 1;
+        req->bounce_iov.iov_base = bounce_buffer;
+        req->bounce_iov.iov_len = qiov->size;
+        qemu_iovec_init_external(&req->bounce_qiov, &req->bounce_iov, 1);
+        qiov = &req->bounce_qiov;
     }
 
-    iocb = ioq_rdwr(&s->ioqueue, read, iov, iov_cnt, offset);
+    nb_sectors = qiov->size / BDRV_SECTOR_SIZE;
 
-    /* Fill in virtio block metadata needed for completion */
-    VirtIOBlockRequest *req = container_of(iocb, VirtIOBlockRequest, iocb);
-    req->elem = elem;
-    req->inhdr = inhdr;
-    req->bounce_iov = bounce_iov;
-    req->read_qiov = read_qiov;
-    return 0;
+    if (read) {
+        bdrv_aio_readv(s->blk->conf.bs, sector_num, qiov, nb_sectors,
+                       complete_rdwr, req);
+    } else {
+        bdrv_aio_writev(s->blk->conf.bs, sector_num, qiov, nb_sectors,
+                        complete_rdwr, req);
+    }
 }
 
-static int process_request(IOQueue *ioq, VirtQueueElement *elem)
+static int process_request(VirtIOBlockDataPlane *s, VirtQueueElement *elem)
 {
-    VirtIOBlockDataPlane *s = container_of(ioq, VirtIOBlockDataPlane, ioqueue);
     struct iovec *iov = elem->out_sg;
     struct iovec *in_iov = elem->in_sg;
     unsigned out_num = elem->out_num;
@@ -234,11 +215,15 @@ static int process_request(IOQueue *ioq, VirtQueueElement *elem)
 
     switch (outhdr.type) {
     case VIRTIO_BLK_T_IN:
-        do_rdwr_cmd(s, true, in_iov, in_num, outhdr.sector * 512, elem, inhdr);
+        do_rdwr_cmd(s, true, in_iov, in_num,
+                    outhdr.sector * 512 / BDRV_SECTOR_SIZE,
+                    elem, inhdr);
         return 0;
 
     case VIRTIO_BLK_T_OUT:
-        do_rdwr_cmd(s, false, iov, out_num, outhdr.sector * 512, elem, inhdr);
+        do_rdwr_cmd(s, false, iov, out_num,
+                    outhdr.sector * 512 / BDRV_SECTOR_SIZE,
+                    elem, inhdr);
         return 0;
 
     case VIRTIO_BLK_T_SCSI_CMD:
@@ -274,7 +259,6 @@ static void handle_notify(EventNotifier *e)
 
     VirtQueueElement *elem;
     int ret;
-    unsigned int num_queued;
 
     event_notifier_test_and_clear(&s->host_notifier);
     for (;;) {
@@ -291,7 +275,7 @@ static void handle_notify(EventNotifier *e)
             trace_virtio_blk_data_plane_process_request(s, elem->out_num,
                                                         elem->in_num, elem->index);
 
-            if (process_request(&s->ioqueue, elem) < 0) {
+            if (process_request(s, elem) < 0) {
                 vring_set_broken(&s->vring);
                 vring_free_element(elem);
                 ret = -EFAULT;
@@ -306,44 +290,10 @@ static void handle_notify(EventNotifier *e)
             if (vring_enable_notification(s->vdev, &s->vring)) {
                 break;
             }
-        } else { /* ret == -ENOBUFS or fatal error, iovecs[] is depleted */
-            /* Since there are no iovecs[] left, stop processing for now.  Do
-             * not re-enable guest->host notifies since the I/O completion
-             * handler knows to check for more vring descriptors anyway.
-             */
+        } else { /* fatal error */
             break;
         }
     }
-
-    num_queued = ioq_num_queued(&s->ioqueue);
-    if (num_queued > 0) {
-        s->num_reqs += num_queued;
-
-        int rc = ioq_submit(&s->ioqueue);
-        if (unlikely(rc < 0)) {
-            fprintf(stderr, "ioq_submit failed %d\n", rc);
-            exit(1);
-        }
-    }
-}
-
-static void handle_io(EventNotifier *e)
-{
-    VirtIOBlockDataPlane *s = container_of(e, VirtIOBlockDataPlane,
-                                           io_notifier);
-
-    event_notifier_test_and_clear(&s->io_notifier);
-    if (ioq_run_completion(&s->ioqueue, complete_request, s) > 0) {
-        notify_guest(s);
-    }
-
-    /* If there were more requests than iovecs, the vring will not be empty yet
-     * so check again.  There should now be enough resources to process more
-     * requests.
-     */
-    if (unlikely(vring_more_avail(&s->vring))) {
-        handle_notify(&s->host_notifier);
-    }
 }
 
 /* Context: QEMU global mutex held */
@@ -437,7 +387,6 @@ void virtio_blk_data_plane_start(VirtIOBlockDataPlane *s)
     BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(s->vdev)));
     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
     VirtQueue *vq;
-    int i;
 
     if (s->started) {
         return;
@@ -470,24 +419,18 @@ void virtio_blk_data_plane_start(VirtIOBlockDataPlane *s)
     }
     s->host_notifier = *virtio_queue_get_host_notifier(vq);
 
-    /* Set up ioqueue */
-    ioq_init(&s->ioqueue, s->fd, REQ_MAX);
-    for (i = 0; i < ARRAY_SIZE(s->requests); i++) {
-        ioq_put_iocb(&s->ioqueue, &s->requests[i].iocb);
-    }
-    s->io_notifier = *ioq_get_notifier(&s->ioqueue);
-
     s->starting = false;
     s->started = true;
     trace_virtio_blk_data_plane_start(s);
 
+    bdrv_set_aio_context(s->blk->conf.bs, s->ctx);
+
     /* Kick right away to begin processing requests already in vring */
     event_notifier_set(virtio_queue_get_host_notifier(vq));
 
     /* Get this show started by hooking up our callbacks */
     aio_context_acquire(s->ctx);
     aio_set_event_notifier(s->ctx, &s->host_notifier, handle_notify);
-    aio_set_event_notifier(s->ctx, &s->io_notifier, handle_io);
     aio_context_release(s->ctx);
 }
 
@@ -507,13 +450,8 @@ void virtio_blk_data_plane_stop(VirtIOBlockDataPlane *s)
     /* Stop notifications for new requests from guest */
     aio_set_event_notifier(s->ctx, &s->host_notifier, NULL);
 
-    /* Complete pending requests */
-    while (s->num_reqs > 0) {
-        aio_poll(s->ctx, true);
-    }
-
-    /* Stop ioq callbacks (there are no pending requests left) */
-    aio_set_event_notifier(s->ctx, &s->io_notifier, NULL);
+    /* Drain and switch bs back to the QEMU main loop */
+    bdrv_set_aio_context(s->blk->conf.bs, qemu_get_aio_context());
 
     aio_context_release(s->ctx);
 
@@ -522,7 +460,6 @@ void virtio_blk_data_plane_stop(VirtIOBlockDataPlane *s)
      */
     vring_teardown(&s->vring, s->vdev, 0);
 
-    ioq_cleanup(&s->ioqueue);
     k->set_host_notifier(qbus->parent, 0, false);
 
     /* Clean up guest notifier (irq) */
-- 
cgit v1.2.1


From 0d6ecec2526d26ada9f4e44d27686a40ab9c087b Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@redhat.com>
Date: Thu, 8 May 2014 16:34:56 +0200
Subject: dataplane: delete IOQueue since it is no longer used

This custom Linux AIO request queue is no longer used by virtio-blk
data-plane.

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
 hw/block/dataplane/Makefile.objs |   2 +-
 hw/block/dataplane/ioq.c         | 117 ---------------------------------------
 hw/block/dataplane/ioq.h         |  57 -------------------
 3 files changed, 1 insertion(+), 175 deletions(-)
 delete mode 100644 hw/block/dataplane/ioq.c
 delete mode 100644 hw/block/dataplane/ioq.h

diff --git a/hw/block/dataplane/Makefile.objs b/hw/block/dataplane/Makefile.objs
index 9da2eb82ba..e786f66421 100644
--- a/hw/block/dataplane/Makefile.objs
+++ b/hw/block/dataplane/Makefile.objs
@@ -1 +1 @@
-obj-y += ioq.o virtio-blk.o
+obj-y += virtio-blk.o
diff --git a/hw/block/dataplane/ioq.c b/hw/block/dataplane/ioq.c
deleted file mode 100644
index f709f87ed6..0000000000
--- a/hw/block/dataplane/ioq.c
+++ /dev/null
@@ -1,117 +0,0 @@
-/*
- * Linux AIO request queue
- *
- * Copyright 2012 IBM, Corp.
- * Copyright 2012 Red Hat, Inc. and/or its affiliates
- *
- * Authors:
- *   Stefan Hajnoczi <stefanha@redhat.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- *
- */
-
-#include "ioq.h"
-
-void ioq_init(IOQueue *ioq, int fd, unsigned int max_reqs)
-{
-    int rc;
-
-    ioq->fd = fd;
-    ioq->max_reqs = max_reqs;
-
-    memset(&ioq->io_ctx, 0, sizeof ioq->io_ctx);
-    rc = io_setup(max_reqs, &ioq->io_ctx);
-    if (rc != 0) {
-        fprintf(stderr, "ioq io_setup failed %d\n", rc);
-        exit(1);
-    }
-
-    rc = event_notifier_init(&ioq->io_notifier, 0);
-    if (rc != 0) {
-        fprintf(stderr, "ioq io event notifier creation failed %d\n", rc);
-        exit(1);
-    }
-
-    ioq->freelist = g_malloc0(sizeof ioq->freelist[0] * max_reqs);
-    ioq->freelist_idx = 0;
-
-    ioq->queue = g_malloc0(sizeof ioq->queue[0] * max_reqs);
-    ioq->queue_idx = 0;
-}
-
-void ioq_cleanup(IOQueue *ioq)
-{
-    g_free(ioq->freelist);
-    g_free(ioq->queue);
-
-    event_notifier_cleanup(&ioq->io_notifier);
-    io_destroy(ioq->io_ctx);
-}
-
-EventNotifier *ioq_get_notifier(IOQueue *ioq)
-{
-    return &ioq->io_notifier;
-}
-
-struct iocb *ioq_get_iocb(IOQueue *ioq)
-{
-    /* Underflow cannot happen since ioq is sized for max_reqs */
-    assert(ioq->freelist_idx != 0);
-
-    struct iocb *iocb = ioq->freelist[--ioq->freelist_idx];
-    ioq->queue[ioq->queue_idx++] = iocb;
-    return iocb;
-}
-
-void ioq_put_iocb(IOQueue *ioq, struct iocb *iocb)
-{
-    /* Overflow cannot happen since ioq is sized for max_reqs */
-    assert(ioq->freelist_idx != ioq->max_reqs);
-
-    ioq->freelist[ioq->freelist_idx++] = iocb;
-}
-
-struct iocb *ioq_rdwr(IOQueue *ioq, bool read, struct iovec *iov,
-                      unsigned int count, long long offset)
-{
-    struct iocb *iocb = ioq_get_iocb(ioq);
-
-    if (read) {
-        io_prep_preadv(iocb, ioq->fd, iov, count, offset);
-    } else {
-        io_prep_pwritev(iocb, ioq->fd, iov, count, offset);
-    }
-    io_set_eventfd(iocb, event_notifier_get_fd(&ioq->io_notifier));
-    return iocb;
-}
-
-int ioq_submit(IOQueue *ioq)
-{
-    int rc = io_submit(ioq->io_ctx, ioq->queue_idx, ioq->queue);
-    ioq->queue_idx = 0; /* reset */
-    return rc;
-}
-
-int ioq_run_completion(IOQueue *ioq, IOQueueCompletion *completion,
-                       void *opaque)
-{
-    struct io_event events[ioq->max_reqs];
-    int nevents, i;
-
-    do {
-        nevents = io_getevents(ioq->io_ctx, 0, ioq->max_reqs, events, NULL);
-    } while (nevents < 0 && errno == EINTR);
-    if (nevents < 0) {
-        return nevents;
-    }
-
-    for (i = 0; i < nevents; i++) {
-        ssize_t ret = ((uint64_t)events[i].res2 << 32) | events[i].res;
-
-        completion(events[i].obj, ret, opaque);
-        ioq_put_iocb(ioq, events[i].obj);
-    }
-    return nevents;
-}
diff --git a/hw/block/dataplane/ioq.h b/hw/block/dataplane/ioq.h
deleted file mode 100644
index b49b5de7f4..0000000000
--- a/hw/block/dataplane/ioq.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Linux AIO request queue
- *
- * Copyright 2012 IBM, Corp.
- * Copyright 2012 Red Hat, Inc. and/or its affiliates
- *
- * Authors:
- *   Stefan Hajnoczi <stefanha@redhat.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- *
- */
-
-#ifndef IOQ_H
-#define IOQ_H
-
-#include <libaio.h>
-#include "qemu/event_notifier.h"
-
-typedef struct {
-    int fd;                         /* file descriptor */
-    unsigned int max_reqs;          /* max length of freelist and queue */
-
-    io_context_t io_ctx;            /* Linux AIO context */
-    EventNotifier io_notifier;      /* Linux AIO eventfd */
-
-    /* Requests can complete in any order so a free list is necessary to manage
-     * available iocbs.
-     */
-    struct iocb **freelist;         /* free iocbs */
-    unsigned int freelist_idx;
-
-    /* Multiple requests are queued up before submitting them all in one go */
-    struct iocb **queue;            /* queued iocbs */
-    unsigned int queue_idx;
-} IOQueue;
-
-void ioq_init(IOQueue *ioq, int fd, unsigned int max_reqs);
-void ioq_cleanup(IOQueue *ioq);
-EventNotifier *ioq_get_notifier(IOQueue *ioq);
-struct iocb *ioq_get_iocb(IOQueue *ioq);
-void ioq_put_iocb(IOQueue *ioq, struct iocb *iocb);
-struct iocb *ioq_rdwr(IOQueue *ioq, bool read, struct iovec *iov,
-                      unsigned int count, long long offset);
-int ioq_submit(IOQueue *ioq);
-
-static inline unsigned int ioq_num_queued(IOQueue *ioq)
-{
-    return ioq->queue_idx;
-}
-
-typedef void IOQueueCompletion(struct iocb *iocb, ssize_t ret, void *opaque);
-int ioq_run_completion(IOQueue *ioq, IOQueueCompletion *completion,
-                       void *opaque);
-
-#endif /* IOQ_H */
-- 
cgit v1.2.1


From 8d242f0ed5908333035dd5a8a94c06d7fe3b765c Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@redhat.com>
Date: Thu, 8 May 2014 16:34:57 +0200
Subject: dataplane: implement async flush

Stop using the raw-posix file descriptor for synchronous
qemu_fdatasync().  Use bdrv_aio_flush() instead and drop the
VirtIOBlockDataPlane->fd field.

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
 hw/block/dataplane/virtio-blk.c | 43 ++++++++++++++++++++++++++---------------
 1 file changed, 27 insertions(+), 16 deletions(-)

diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
index 75616da521..c058c739a6 100644
--- a/hw/block/dataplane/virtio-blk.c
+++ b/hw/block/dataplane/virtio-blk.c
@@ -40,7 +40,6 @@ struct VirtIOBlockDataPlane {
     bool stopping;
 
     VirtIOBlkConf *blk;
-    int fd;                         /* image file descriptor */
 
     VirtIODevice *vdev;
     Vring vring;                    /* virtqueue vring */
@@ -179,6 +178,32 @@ static void do_rdwr_cmd(VirtIOBlockDataPlane *s, bool read,
     }
 }
 
+static void complete_flush(void *opaque, int ret)
+{
+    VirtIOBlockRequest *req = opaque;
+    unsigned char status;
+
+    if (ret == 0) {
+        status = VIRTIO_BLK_S_OK;
+    } else {
+        status = VIRTIO_BLK_S_IOERR;
+    }
+
+    complete_request_early(req->s, req->elem, req->inhdr, status);
+    g_slice_free(VirtIOBlockRequest, req);
+}
+
+static void do_flush_cmd(VirtIOBlockDataPlane *s, VirtQueueElement *elem,
+                         QEMUIOVector *inhdr)
+{
+    VirtIOBlockRequest *req = g_slice_new(VirtIOBlockRequest);
+    req->s = s;
+    req->elem = elem;
+    req->inhdr = inhdr;
+
+    bdrv_aio_flush(s->blk->conf.bs, complete_flush, req);
+}
+
 static int process_request(VirtIOBlockDataPlane *s, VirtQueueElement *elem)
 {
     struct iovec *iov = elem->out_sg;
@@ -232,12 +257,7 @@ static int process_request(VirtIOBlockDataPlane *s, VirtQueueElement *elem)
         return 0;
 
     case VIRTIO_BLK_T_FLUSH:
-        /* TODO fdsync not supported by Linux AIO, do it synchronously here! */
-        if (qemu_fdatasync(s->fd) < 0) {
-            complete_request_early(s, elem, inhdr, VIRTIO_BLK_S_IOERR);
-        } else {
-            complete_request_early(s, elem, inhdr, VIRTIO_BLK_S_OK);
-        }
+        do_flush_cmd(s, elem, inhdr);
         return 0;
 
     case VIRTIO_BLK_T_GET_ID:
@@ -302,7 +322,6 @@ void virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *blk,
                                   Error **errp)
 {
     VirtIOBlockDataPlane *s;
-    int fd;
     Error *local_err = NULL;
 
     *dataplane = NULL;
@@ -333,16 +352,8 @@ void virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *blk,
         return;
     }
 
-    fd = raw_get_aio_fd(blk->conf.bs);
-    if (fd < 0) {
-        error_setg(errp, "drive is incompatible with x-data-plane, "
-                         "use format=raw,cache=none,aio=native");
-        return;
-    }
-
     s = g_new0(VirtIOBlockDataPlane, 1);
     s->vdev = vdev;
-    s->fd = fd;
     s->blk = blk;
 
     if (blk->iothread) {
-- 
cgit v1.2.1


From 76ef2cf5493a215efc351f48ae7094d6c183fcac Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@redhat.com>
Date: Thu, 8 May 2014 16:34:58 +0200
Subject: raw-posix: drop raw_get_aio_fd() since it is no longer used

virtio-blk data-plane now uses the QEMU block layer for I/O.  We do not
need raw_get_aio_fd() anymore.  It was a layering violation anyway, so
let's get rid of it.

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
 block/raw-posix.c     | 34 ----------------------------------
 include/block/block.h |  9 ---------
 2 files changed, 43 deletions(-)

diff --git a/block/raw-posix.c b/block/raw-posix.c
index ffdb1763f8..c2b30be3d3 100644
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -2331,40 +2331,6 @@ static BlockDriver bdrv_host_cdrom = {
 };
 #endif /* __FreeBSD__ */
 
-#ifdef CONFIG_LINUX_AIO
-/**
- * Return the file descriptor for Linux AIO
- *
- * This function is a layering violation and should be removed when it becomes
- * possible to call the block layer outside the global mutex.  It allows the
- * caller to hijack the file descriptor so I/O can be performed outside the
- * block layer.
- */
-int raw_get_aio_fd(BlockDriverState *bs)
-{
-    BDRVRawState *s;
-
-    if (!bs->drv) {
-        return -ENOMEDIUM;
-    }
-
-    if (bs->drv == bdrv_find_format("raw")) {
-        bs = bs->file;
-    }
-
-    /* raw-posix has several protocols so just check for raw_aio_readv */
-    if (bs->drv->bdrv_aio_readv != raw_aio_readv) {
-        return -ENOTSUP;
-    }
-
-    s = bs->opaque;
-    if (!s->use_aio) {
-        return -ENOTSUP;
-    }
-    return s->fd;
-}
-#endif /* CONFIG_LINUX_AIO */
-
 static void bdrv_file_init(void)
 {
     /*
diff --git a/include/block/block.h b/include/block/block.h
index 292754f9fe..29ac56da2c 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -481,15 +481,6 @@ void bdrv_op_block_all(BlockDriverState *bs, Error *reason);
 void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason);
 bool bdrv_op_blocker_is_empty(BlockDriverState *bs);
 
-#ifdef CONFIG_LINUX_AIO
-int raw_get_aio_fd(BlockDriverState *bs);
-#else
-static inline int raw_get_aio_fd(BlockDriverState *bs)
-{
-    return -ENOTSUP;
-}
-#endif
-
 enum BlockAcctType {
     BDRV_ACCT_READ,
     BDRV_ACCT_WRITE,
-- 
cgit v1.2.1


From db519cba8713fb49aa5233e8debe61dccdd3a57f Mon Sep 17 00:00:00 2001
From: Fam Zheng <famz@redhat.com>
Date: Thu, 15 May 2014 19:22:05 +0800
Subject: block: Move declaration of bdrv_get_aio_context to block.h

block_int.h is for block layer and block drivers, other code shouldn't
include it. But similar to bdrv_set_aio_context, bdrv_get_aio_context
should also be accessible from outside of block layer.

Move it.

Signed-off-by: Fam Zheng <famz@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
 include/block/block.h     | 7 +++++++
 include/block/block_int.h | 7 -------
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/include/block/block.h b/include/block/block.h
index 29ac56da2c..7d86e29cf4 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -565,6 +565,13 @@ int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag);
 int bdrv_debug_resume(BlockDriverState *bs, const char *tag);
 bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag);
 
+/**
+ * bdrv_get_aio_context:
+ *
+ * Returns: the currently bound #AioContext
+ */
+AioContext *bdrv_get_aio_context(BlockDriverState *bs);
+
 /**
  * bdrv_set_aio_context:
  *
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 93ec86f3f7..8d58334c1d 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -404,13 +404,6 @@ void bdrv_set_io_limits(BlockDriverState *bs,
 void bdrv_add_before_write_notifier(BlockDriverState *bs,
                                     NotifierWithReturn *notifier);
 
-/**
- * bdrv_get_aio_context:
- *
- * Returns: the currently bound #AioContext
- */
-AioContext *bdrv_get_aio_context(BlockDriverState *bs);
-
 /**
  * bdrv_detach_aio_context:
  *
-- 
cgit v1.2.1


From 6d7e73d62fa32813b6f6a3575db2e9b5e0d43387 Mon Sep 17 00:00:00 2001
From: Fam Zheng <famz@redhat.com>
Date: Thu, 15 May 2014 19:22:06 +0800
Subject: virtio-blk: Allow config-wce in dataplane

Dataplane now uses block layer. Protect bdrv_set_enable_write_cache with
aio_context_acquire and aio_context_release, so we can enable config-wce
to allow guest to modify the write cache online.

Signed-off-by: Fam Zheng <famz@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
 hw/block/dataplane/virtio-blk.c | 6 ------
 hw/block/virtio-blk.c           | 8 +++++++-
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
index c058c739a6..217992b666 100644
--- a/hw/block/dataplane/virtio-blk.c
+++ b/hw/block/dataplane/virtio-blk.c
@@ -336,12 +336,6 @@ void virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *blk,
         return;
     }
 
-    if (blk->config_wce) {
-        error_setg(errp, "device is incompatible with x-data-plane, "
-                         "use config-wce=off");
-        return;
-    }
-
     /* If dataplane is (re-)enabled while the guest is running there could be
      * block jobs that can conflict.
      */
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index 8a568e5edb..5e9433d9ba 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -523,7 +523,10 @@ static void virtio_blk_set_config(VirtIODevice *vdev, const uint8_t *config)
     struct virtio_blk_config blkcfg;
 
     memcpy(&blkcfg, config, sizeof(blkcfg));
+
+    aio_context_acquire(bdrv_get_aio_context(s->bs));
     bdrv_set_enable_write_cache(s->bs, blkcfg.wce != 0);
+    aio_context_release(bdrv_get_aio_context(s->bs));
 }
 
 static uint32_t virtio_blk_get_features(VirtIODevice *vdev, uint32_t features)
@@ -582,7 +585,10 @@ static void virtio_blk_set_status(VirtIODevice *vdev, uint8_t status)
      * s->bs would erroneously be placed in writethrough mode.
      */
     if (!(features & (1 << VIRTIO_BLK_F_CONFIG_WCE))) {
-        bdrv_set_enable_write_cache(s->bs, !!(features & (1 << VIRTIO_BLK_F_WCE)));
+        aio_context_acquire(bdrv_get_aio_context(s->bs));
+        bdrv_set_enable_write_cache(s->bs,
+                                    !!(features & (1 << VIRTIO_BLK_F_WCE)));
+        aio_context_release(bdrv_get_aio_context(s->bs));
     }
 }
 
-- 
cgit v1.2.1


From 5a05cbeeaaa2ec463d48c0026e8e6be243ea0bab Mon Sep 17 00:00:00 2001
From: Fam Zheng <famz@redhat.com>
Date: Thu, 22 May 2014 16:22:42 +0800
Subject: virtio-blk: Factor out virtio_blk_handle_scsi_req from
 virtio_blk_handle_scsi

The common logic to process a scsi request in a VirtQueueElement is
extracted to a function to share with dataplane.

This makes VirtIOBlockReq.scsi unused, so drop it.

Signed-off-by: Fam Zheng <famz@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
 hw/block/virtio-blk.c          | 75 +++++++++++++++++++++++-------------------
 include/hw/virtio/virtio-blk.h |  3 ++
 2 files changed, 44 insertions(+), 34 deletions(-)

diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index 5e9433d9ba..0b1446e2c4 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -33,7 +33,6 @@ typedef struct VirtIOBlockReq
     VirtQueueElement elem;
     struct virtio_blk_inhdr *in;
     struct virtio_blk_outhdr *out;
-    struct virtio_scsi_inhdr *scsi;
     QEMUIOVector qiov;
     struct VirtIOBlockReq *next;
     BlockAcctCookie acct;
@@ -125,13 +124,15 @@ static VirtIOBlockReq *virtio_blk_get_request(VirtIOBlock *s)
     return req;
 }
 
-static void virtio_blk_handle_scsi(VirtIOBlockReq *req)
+int virtio_blk_handle_scsi_req(VirtIOBlock *blk,
+                               VirtQueueElement *elem)
 {
+    int status = VIRTIO_BLK_S_OK;
+    struct virtio_scsi_inhdr *scsi = NULL;
 #ifdef __linux__
-    int ret;
     int i;
+    struct sg_io_hdr hdr;
 #endif
-    int status = VIRTIO_BLK_S_OK;
 
     /*
      * We require at least one output segment each for the virtio_blk_outhdr
@@ -140,19 +141,18 @@ static void virtio_blk_handle_scsi(VirtIOBlockReq *req)
      * We also at least require the virtio_blk_inhdr, the virtio_scsi_inhdr
      * and the sense buffer pointer in the input segments.
      */
-    if (req->elem.out_num < 2 || req->elem.in_num < 3) {
-        virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR);
-        g_free(req);
-        return;
+    if (elem->out_num < 2 || elem->in_num < 3) {
+        status = VIRTIO_BLK_S_IOERR;
+        goto fail;
     }
 
     /*
      * The scsi inhdr is placed in the second-to-last input segment, just
      * before the regular inhdr.
      */
-    req->scsi = (void *)req->elem.in_sg[req->elem.in_num - 2].iov_base;
+    scsi = (void *)elem->in_sg[elem->in_num - 2].iov_base;
 
-    if (!req->dev->blk.scsi) {
+    if (!blk->blk.scsi) {
         status = VIRTIO_BLK_S_UNSUPP;
         goto fail;
     }
@@ -160,43 +160,42 @@ static void virtio_blk_handle_scsi(VirtIOBlockReq *req)
     /*
      * No support for bidirection commands yet.
      */
-    if (req->elem.out_num > 2 && req->elem.in_num > 3) {
+    if (elem->out_num > 2 && elem->in_num > 3) {
         status = VIRTIO_BLK_S_UNSUPP;
         goto fail;
     }
 
 #ifdef __linux__
-    struct sg_io_hdr hdr;
     memset(&hdr, 0, sizeof(struct sg_io_hdr));
     hdr.interface_id = 'S';
-    hdr.cmd_len = req->elem.out_sg[1].iov_len;
-    hdr.cmdp = req->elem.out_sg[1].iov_base;
+    hdr.cmd_len = elem->out_sg[1].iov_len;
+    hdr.cmdp = elem->out_sg[1].iov_base;
     hdr.dxfer_len = 0;
 
-    if (req->elem.out_num > 2) {
+    if (elem->out_num > 2) {
         /*
          * If there are more than the minimally required 2 output segments
          * there is write payload starting from the third iovec.
          */
         hdr.dxfer_direction = SG_DXFER_TO_DEV;
-        hdr.iovec_count = req->elem.out_num - 2;
+        hdr.iovec_count = elem->out_num - 2;
 
         for (i = 0; i < hdr.iovec_count; i++)
-            hdr.dxfer_len += req->elem.out_sg[i + 2].iov_len;
+            hdr.dxfer_len += elem->out_sg[i + 2].iov_len;
 
-        hdr.dxferp = req->elem.out_sg + 2;
+        hdr.dxferp = elem->out_sg + 2;
 
-    } else if (req->elem.in_num > 3) {
+    } else if (elem->in_num > 3) {
         /*
          * If we have more than 3 input segments the guest wants to actually
          * read data.
          */
         hdr.dxfer_direction = SG_DXFER_FROM_DEV;
-        hdr.iovec_count = req->elem.in_num - 3;
+        hdr.iovec_count = elem->in_num - 3;
         for (i = 0; i < hdr.iovec_count; i++)
-            hdr.dxfer_len += req->elem.in_sg[i].iov_len;
+            hdr.dxfer_len += elem->in_sg[i].iov_len;
 
-        hdr.dxferp = req->elem.in_sg;
+        hdr.dxferp = elem->in_sg;
     } else {
         /*
          * Some SCSI commands don't actually transfer any data.
@@ -204,11 +203,11 @@ static void virtio_blk_handle_scsi(VirtIOBlockReq *req)
         hdr.dxfer_direction = SG_DXFER_NONE;
     }
 
-    hdr.sbp = req->elem.in_sg[req->elem.in_num - 3].iov_base;
-    hdr.mx_sb_len = req->elem.in_sg[req->elem.in_num - 3].iov_len;
+    hdr.sbp = elem->in_sg[elem->in_num - 3].iov_base;
+    hdr.mx_sb_len = elem->in_sg[elem->in_num - 3].iov_len;
 
-    ret = bdrv_ioctl(req->dev->bs, SG_IO, &hdr);
-    if (ret) {
+    status = bdrv_ioctl(blk->bs, SG_IO, &hdr);
+    if (status) {
         status = VIRTIO_BLK_S_UNSUPP;
         goto fail;
     }
@@ -224,23 +223,31 @@ static void virtio_blk_handle_scsi(VirtIOBlockReq *req)
         hdr.status = CHECK_CONDITION;
     }
 
-    stl_p(&req->scsi->errors,
+    stl_p(&scsi->errors,
           hdr.status | (hdr.msg_status << 8) |
           (hdr.host_status << 16) | (hdr.driver_status << 24));
-    stl_p(&req->scsi->residual, hdr.resid);
-    stl_p(&req->scsi->sense_len, hdr.sb_len_wr);
-    stl_p(&req->scsi->data_len, hdr.dxfer_len);
+    stl_p(&scsi->residual, hdr.resid);
+    stl_p(&scsi->sense_len, hdr.sb_len_wr);
+    stl_p(&scsi->data_len, hdr.dxfer_len);
 
-    virtio_blk_req_complete(req, status);
-    g_free(req);
-    return;
+    return status;
 #else
     abort();
 #endif
 
 fail:
     /* Just put anything nonzero so that the ioctl fails in the guest.  */
-    stl_p(&req->scsi->errors, 255);
+    if (scsi) {
+        stl_p(&scsi->errors, 255);
+    }
+    return status;
+}
+
+static void virtio_blk_handle_scsi(VirtIOBlockReq *req)
+{
+    int status;
+
+    status = virtio_blk_handle_scsi_req(req->dev, &req->elem);
     virtio_blk_req_complete(req, status);
     g_free(req);
 }
diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h
index e4c41ff2ef..4bc9b549ad 100644
--- a/include/hw/virtio/virtio-blk.h
+++ b/include/hw/virtio/virtio-blk.h
@@ -155,4 +155,7 @@ typedef struct VirtIOBlock {
 
 void virtio_blk_set_conf(DeviceState *dev, VirtIOBlkConf *blk);
 
+int virtio_blk_handle_scsi_req(VirtIOBlock *blk,
+                               VirtQueueElement *elem);
+
 #endif
-- 
cgit v1.2.1


From c9f87b20b996b1005b646281195d61412ba2b844 Mon Sep 17 00:00:00 2001
From: Fam Zheng <famz@redhat.com>
Date: Thu, 22 May 2014 16:22:43 +0800
Subject: dataplane: Support VIRTIO_BLK_T_SCSI_CMD

Signed-off-by: Fam Zheng <famz@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
 hw/block/dataplane/virtio-blk.c | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
index 217992b666..c10b7b70fb 100644
--- a/hw/block/dataplane/virtio-blk.c
+++ b/hw/block/dataplane/virtio-blk.c
@@ -204,6 +204,15 @@ static void do_flush_cmd(VirtIOBlockDataPlane *s, VirtQueueElement *elem,
     bdrv_aio_flush(s->blk->conf.bs, complete_flush, req);
 }
 
+static void do_scsi_cmd(VirtIOBlockDataPlane *s, VirtQueueElement *elem,
+                        QEMUIOVector *inhdr)
+{
+    int status;
+
+    status = virtio_blk_handle_scsi_req(VIRTIO_BLK(s->vdev), elem);
+    complete_request_early(s, elem, inhdr, status);
+}
+
 static int process_request(VirtIOBlockDataPlane *s, VirtQueueElement *elem)
 {
     struct iovec *iov = elem->out_sg;
@@ -252,8 +261,7 @@ static int process_request(VirtIOBlockDataPlane *s, VirtQueueElement *elem)
         return 0;
 
     case VIRTIO_BLK_T_SCSI_CMD:
-        /* TODO support SCSI commands */
-        complete_request_early(s, elem, inhdr, VIRTIO_BLK_S_UNSUPP);
+        do_scsi_cmd(s, elem, inhdr);
         return 0;
 
     case VIRTIO_BLK_T_FLUSH:
@@ -330,12 +338,6 @@ void virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *blk,
         return;
     }
 
-    if (blk->scsi) {
-        error_setg(errp,
-                   "device is incompatible with x-data-plane, use scsi=off");
-        return;
-    }
-
     /* If dataplane is (re-)enabled while the guest is running there could be
      * block jobs that can conflict.
      */
-- 
cgit v1.2.1


From 13af91ebf08d463d3b025cd396d4d11caceac02d Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@redhat.com>
Date: Wed, 14 May 2014 16:22:45 +0200
Subject: throttle: add throttle_detach/attach_aio_context()

Block I/O throttling uses timers and currently always adds them to the
main loop.  Throttling will break if bdrv_set_aio_context() is used to
move a BlockDriverState to a different AioContext.

This patch adds throttle_detach/attach_aio_context() interfaces so the
throttling timers and uses them to move timers to the new AioContext.
Note that bdrv_set_aio_context() already drains all requests so we're
sure no throttled requests are pending.

The test cases need to be updated since the throttle_init() interface
has changed.

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Benoit Canet <benoit@irqsave.net>
---
 block.c                 |  7 +++++++
 include/qemu/throttle.h | 10 ++++++++++
 tests/test-throttle.c   | 25 ++++++++++++++++++++-----
 util/throttle.c         | 27 +++++++++++++++++++++++----
 4 files changed, 60 insertions(+), 9 deletions(-)

diff --git a/block.c b/block.c
index 1fd38159e2..17f763db79 100644
--- a/block.c
+++ b/block.c
@@ -179,6 +179,7 @@ void bdrv_io_limits_enable(BlockDriverState *bs)
 {
     assert(!bs->io_limits_enabled);
     throttle_init(&bs->throttle_state,
+                  bdrv_get_aio_context(bs),
                   QEMU_CLOCK_VIRTUAL,
                   bdrv_throttle_read_timer_cb,
                   bdrv_throttle_write_timer_cb,
@@ -5671,6 +5672,9 @@ void bdrv_detach_aio_context(BlockDriverState *bs)
         return;
     }
 
+    if (bs->io_limits_enabled) {
+        throttle_detach_aio_context(&bs->throttle_state);
+    }
     if (bs->drv->bdrv_detach_aio_context) {
         bs->drv->bdrv_detach_aio_context(bs);
     }
@@ -5702,6 +5706,9 @@ void bdrv_attach_aio_context(BlockDriverState *bs,
     if (bs->drv->bdrv_attach_aio_context) {
         bs->drv->bdrv_attach_aio_context(bs, new_context);
     }
+    if (bs->io_limits_enabled) {
+        throttle_attach_aio_context(&bs->throttle_state, new_context);
+    }
 }
 
 void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context)
diff --git a/include/qemu/throttle.h b/include/qemu/throttle.h
index ab29b0b918..b890613a9c 100644
--- a/include/qemu/throttle.h
+++ b/include/qemu/throttle.h
@@ -67,6 +67,11 @@ typedef struct ThrottleState {
     int64_t previous_leak;    /* timestamp of the last leak done */
     QEMUTimer * timers[2];    /* timers used to do the throttling */
     QEMUClockType clock_type; /* the clock used */
+
+    /* Callbacks */
+    QEMUTimerCB *read_timer_cb;
+    QEMUTimerCB *write_timer_cb;
+    void *timer_opaque;
 } ThrottleState;
 
 /* operations on single leaky buckets */
@@ -82,6 +87,7 @@ bool throttle_compute_timer(ThrottleState *ts,
 
 /* init/destroy cycle */
 void throttle_init(ThrottleState *ts,
+                   AioContext *aio_context,
                    QEMUClockType clock_type,
                    void (read_timer)(void *),
                    void (write_timer)(void *),
@@ -89,6 +95,10 @@ void throttle_init(ThrottleState *ts,
 
 void throttle_destroy(ThrottleState *ts);
 
+void throttle_detach_aio_context(ThrottleState *ts);
+
+void throttle_attach_aio_context(ThrottleState *ts, AioContext *new_context);
+
 bool throttle_have_timer(ThrottleState *ts);
 
 /* configuration */
diff --git a/tests/test-throttle.c b/tests/test-throttle.c
index 1d4ffd3603..5fa5000124 100644
--- a/tests/test-throttle.c
+++ b/tests/test-throttle.c
@@ -12,8 +12,10 @@
 
 #include <glib.h>
 #include <math.h>
+#include "block/aio.h"
 #include "qemu/throttle.h"
 
+AioContext     *ctx;
 LeakyBucket    bkt;
 ThrottleConfig cfg;
 ThrottleState  ts;
@@ -104,7 +106,8 @@ static void test_init(void)
     memset(&ts, 1, sizeof(ts));
 
     /* init the structure */
-    throttle_init(&ts, QEMU_CLOCK_VIRTUAL, read_timer_cb, write_timer_cb, &ts);
+    throttle_init(&ts, ctx, QEMU_CLOCK_VIRTUAL,
+                  read_timer_cb, write_timer_cb, &ts);
 
     /* check initialized fields */
     g_assert(ts.clock_type == QEMU_CLOCK_VIRTUAL);
@@ -126,7 +129,8 @@ static void test_init(void)
 static void test_destroy(void)
 {
     int i;
-    throttle_init(&ts, QEMU_CLOCK_VIRTUAL, read_timer_cb, write_timer_cb, &ts);
+    throttle_init(&ts, ctx, QEMU_CLOCK_VIRTUAL,
+                  read_timer_cb, write_timer_cb, &ts);
     throttle_destroy(&ts);
     for (i = 0; i < 2; i++) {
         g_assert(!ts.timers[i]);
@@ -165,7 +169,8 @@ static void test_config_functions(void)
 
     orig_cfg.op_size = 1;
 
-    throttle_init(&ts, QEMU_CLOCK_VIRTUAL, read_timer_cb, write_timer_cb, &ts);
+    throttle_init(&ts, ctx, QEMU_CLOCK_VIRTUAL,
+                  read_timer_cb, write_timer_cb, &ts);
     /* structure reset by throttle_init previous_leak should be null */
     g_assert(!ts.previous_leak);
     throttle_config(&ts, &orig_cfg);
@@ -324,7 +329,8 @@ static void test_have_timer(void)
     g_assert(!throttle_have_timer(&ts));
 
     /* init the structure */
-    throttle_init(&ts, QEMU_CLOCK_VIRTUAL, read_timer_cb, write_timer_cb, &ts);
+    throttle_init(&ts, ctx, QEMU_CLOCK_VIRTUAL,
+                  read_timer_cb, write_timer_cb, &ts);
 
     /* timer set by init should return true */
     g_assert(throttle_have_timer(&ts));
@@ -357,7 +363,8 @@ static bool do_test_accounting(bool is_ops, /* are we testing bps or ops */
 
     cfg.op_size = op_size;
 
-    throttle_init(&ts, QEMU_CLOCK_VIRTUAL, read_timer_cb, write_timer_cb, &ts);
+    throttle_init(&ts, ctx, QEMU_CLOCK_VIRTUAL,
+                  read_timer_cb, write_timer_cb, &ts);
     throttle_config(&ts, &cfg);
 
     /* account a read */
@@ -461,7 +468,15 @@ static void test_accounting(void)
 
 int main(int argc, char **argv)
 {
+    GSource *src;
+
     init_clocks();
+
+    ctx = aio_context_new();
+    src = aio_get_g_source(ctx);
+    g_source_attach(src, NULL);
+    g_source_unref(src);
+
     do {} while (g_main_context_iteration(NULL, false));
 
     /* tests in the same order as the header function declarations */
diff --git a/util/throttle.c b/util/throttle.c
index 02e6f15587..f976ac7de5 100644
--- a/util/throttle.c
+++ b/util/throttle.c
@@ -22,6 +22,7 @@
 
 #include "qemu/throttle.h"
 #include "qemu/timer.h"
+#include "block/aio.h"
 
 /* This function make a bucket leak
  *
@@ -157,8 +158,18 @@ bool throttle_compute_timer(ThrottleState *ts,
     return false;
 }
 
+/* Add timers to event loop */
+void throttle_attach_aio_context(ThrottleState *ts, AioContext *new_context)
+{
+    ts->timers[0] = aio_timer_new(new_context, ts->clock_type, SCALE_NS,
+                                  ts->read_timer_cb, ts->timer_opaque);
+    ts->timers[1] = aio_timer_new(new_context, ts->clock_type, SCALE_NS,
+                                  ts->write_timer_cb, ts->timer_opaque);
+}
+
 /* To be called first on the ThrottleState */
 void throttle_init(ThrottleState *ts,
+                   AioContext *aio_context,
                    QEMUClockType clock_type,
                    QEMUTimerCB *read_timer_cb,
                    QEMUTimerCB *write_timer_cb,
@@ -167,8 +178,10 @@ void throttle_init(ThrottleState *ts,
     memset(ts, 0, sizeof(ThrottleState));
 
     ts->clock_type = clock_type;
-    ts->timers[0] = timer_new_ns(clock_type, read_timer_cb, timer_opaque);
-    ts->timers[1] = timer_new_ns(clock_type, write_timer_cb, timer_opaque);
+    ts->read_timer_cb = read_timer_cb;
+    ts->write_timer_cb = write_timer_cb;
+    ts->timer_opaque = timer_opaque;
+    throttle_attach_aio_context(ts, aio_context);
 }
 
 /* destroy a timer */
@@ -181,8 +194,8 @@ static void throttle_timer_destroy(QEMUTimer **timer)
     *timer = NULL;
 }
 
-/* To be called last on the ThrottleState */
-void throttle_destroy(ThrottleState *ts)
+/* Remove timers from event loop */
+void throttle_detach_aio_context(ThrottleState *ts)
 {
     int i;
 
@@ -191,6 +204,12 @@ void throttle_destroy(ThrottleState *ts)
     }
 }
 
+/* To be called last on the ThrottleState */
+void throttle_destroy(ThrottleState *ts)
+{
+    throttle_detach_aio_context(ts);
+}
+
 /* is any throttling timer configured */
 bool throttle_have_timer(ThrottleState *ts)
 {
-- 
cgit v1.2.1


From 22524f726210f38298dfb1cefa30257133ad6b8c Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@redhat.com>
Date: Wed, 14 May 2014 16:22:46 +0200
Subject: throttle: add detach/attach test case

Add a test case that checks the timer is really removed/added by the
detach/attach functions.

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Benoit Canet <benoit@irqsave.net>
---
 tests/test-throttle.c | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/tests/test-throttle.c b/tests/test-throttle.c
index 5fa5000124..3de6ab80e0 100644
--- a/tests/test-throttle.c
+++ b/tests/test-throttle.c
@@ -338,6 +338,29 @@ static void test_have_timer(void)
     throttle_destroy(&ts);
 }
 
+static void test_detach_attach(void)
+{
+    /* zero the structure */
+    memset(&ts, 0, sizeof(ts));
+
+    /* init the structure */
+    throttle_init(&ts, ctx, QEMU_CLOCK_VIRTUAL,
+                  read_timer_cb, write_timer_cb, &ts);
+
+    /* timer set by init should return true */
+    g_assert(throttle_have_timer(&ts));
+
+    /* timer should no longer exist after detaching */
+    throttle_detach_aio_context(&ts);
+    g_assert(!throttle_have_timer(&ts));
+
+    /* timer should exist again after attaching */
+    throttle_attach_aio_context(&ts, ctx);
+    g_assert(throttle_have_timer(&ts));
+
+    throttle_destroy(&ts);
+}
+
 static bool do_test_accounting(bool is_ops, /* are we testing bps or ops */
                 int size,                   /* size of the operation to do */
                 double avg,                 /* io limit */
@@ -486,6 +509,7 @@ int main(int argc, char **argv)
     g_test_add_func("/throttle/init",               test_init);
     g_test_add_func("/throttle/destroy",            test_destroy);
     g_test_add_func("/throttle/have_timer",         test_have_timer);
+    g_test_add_func("/throttle/detach_attach",      test_detach_attach);
     g_test_add_func("/throttle/config/enabled",     test_enabled);
     g_test_add_func("/throttle/config/conflicting", test_conflicting_config);
     g_test_add_func("/throttle/config/is_valid",    test_is_valid);
-- 
cgit v1.2.1


From b15446fdbf4ac2b29f6ee5080630a80715abfc20 Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@redhat.com>
Date: Wed, 14 May 2014 16:22:47 +0200
Subject: blockdev: acquire AioContext in block_set_io_throttle

The block_set_io_throttle QMP and HMP commands modify I/O throttling
limits for block devices.

Acquire the BlockDriverState's AioContext to protect against race
conditions with an IOThread that is running I/O for this device.

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Benoit Canet <benoit@irqsave.net>
---
 blockdev.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/blockdev.c b/blockdev.c
index 9b5261b765..4cbcc56b5e 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -1703,6 +1703,7 @@ void qmp_block_set_io_throttle(const char *device, int64_t bps, int64_t bps_rd,
 {
     ThrottleConfig cfg;
     BlockDriverState *bs;
+    AioContext *aio_context;
 
     bs = bdrv_find(device);
     if (!bs) {
@@ -1746,6 +1747,9 @@ void qmp_block_set_io_throttle(const char *device, int64_t bps, int64_t bps_rd,
         return;
     }
 
+    aio_context = bdrv_get_aio_context(bs);
+    aio_context_acquire(aio_context);
+
     if (!bs->io_limits_enabled && throttle_enabled(&cfg)) {
         bdrv_io_limits_enable(bs);
     } else if (bs->io_limits_enabled && !throttle_enabled(&cfg)) {
@@ -1755,6 +1759,8 @@ void qmp_block_set_io_throttle(const char *device, int64_t bps, int64_t bps_rd,
     if (bs->io_limits_enabled) {
         bdrv_set_io_limits(bs, &cfg);
     }
+
+    aio_context_release(aio_context);
 }
 
 int do_drive_del(Monitor *mon, const QDict *qdict, QObject **ret_data)
-- 
cgit v1.2.1


From 1ac362cdbd799eb8165e2e3bb5cd5aa38b1baae3 Mon Sep 17 00:00:00 2001
From: chai wen <chaiw.fnst@cn.fujitsu.com>
Date: Wed, 4 Jun 2014 11:47:37 +0800
Subject: block: fix wrong order in live block migration setup

The function init_blk_migration is better to be called before
set_dirty_tracking as the reasons below.

If we want to track dirty blocks via dirty_maps on a BlockDriverState
when doing live block-migration, its correspoding 'BlkMigDevState' should be
added to block_mig_state.bmds_list first for subsequent processing.
Otherwise set_dirty_tracking will do nothing on an empty list than allocating
dirty_bitmaps for them. And bdrv_get_dirty_count will access the
bmds->dirty_maps directly, then there would be a segfault triggered.

If the set_dirty_tracking fails, qemu_savevm_state_cancel will handle
the cleanup of init_blk_migration automatically.

Reviewed-by: Fam Zheng <famz@redhat.com>
Signed-off-by: chai wen <chaiw.fnst@cn.fujitsu.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
 block-migration.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/block-migration.c b/block-migration.c
index 16562709c8..25a03889f4 100644
--- a/block-migration.c
+++ b/block-migration.c
@@ -629,6 +629,7 @@ static int block_save_setup(QEMUFile *f, void *opaque)
             block_mig_state.submitted, block_mig_state.transferred);
 
     qemu_mutex_lock_iothread();
+    init_blk_migration(f);
 
     /* start track dirty blocks */
     ret = set_dirty_tracking();
@@ -638,8 +639,6 @@ static int block_save_setup(QEMUFile *f, void *opaque)
         return ret;
     }
 
-    init_blk_migration(f);
-
     qemu_mutex_unlock_iothread();
 
     ret = flush_blks(f);
-- 
cgit v1.2.1


From d6635c4dbbcfca7f5bd379dd970617fc3430428f Mon Sep 17 00:00:00 2001
From: Max Reitz <mreitz@redhat.com>
Date: Mon, 2 Jun 2014 22:15:21 +0200
Subject: qemu-img: Document check exit codes

The exit code 63 (check not supported by image format) was not even
documented in the comment above the check command in the source code;
add it, as it does indeed seem useful.

Also, document all of check's exit codes in the manpage.

Signed-off-by: Max Reitz <mreitz@redhat.com>
Reported-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Benoit Canet <benoit@irqsave.net>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
 qemu-img.c    |  9 +++++----
 qemu-img.texi | 23 +++++++++++++++++++++++
 2 files changed, 28 insertions(+), 4 deletions(-)

diff --git a/qemu-img.c b/qemu-img.c
index b3d2bc6f02..aa89ba21fd 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -580,10 +580,11 @@ static int collect_image_check(BlockDriverState *bs,
 /*
  * Checks an image for consistency. Exit codes:
  *
- * 0 - Check completed, image is good
- * 1 - Check not completed because of internal errors
- * 2 - Check completed, image is corrupted
- * 3 - Check completed, image has leaked clusters, but is good otherwise
+ *  0 - Check completed, image is good
+ *  1 - Check not completed because of internal errors
+ *  2 - Check completed, image is corrupted
+ *  3 - Check completed, image has leaked clusters, but is good otherwise
+ * 63 - Checks are not supported by the image format
  */
 static int img_check(int argc, char **argv)
 {
diff --git a/qemu-img.texi b/qemu-img.texi
index f84590ebf0..c68b54148a 100644
--- a/qemu-img.texi
+++ b/qemu-img.texi
@@ -126,6 +126,29 @@ wrong fix or hiding corruption that has already occurred.
 Only the formats @code{qcow2}, @code{qed} and @code{vdi} support
 consistency checks.
 
+In case the image does not have any inconsistencies, check exits with @code{0}.
+Other exit codes indicate the kind of inconsistency found or if another error
+occurred. The following table summarizes all exit codes of the check subcommand:
+
+@table @option
+
+@item 0
+Check completed, the image is (now) consistent
+@item 1
+Check not completed because of internal errors
+@item 2
+Check completed, image is corrupted
+@item 3
+Check completed, image has leaked clusters, but is not corrupted
+@item 63
+Checks are not supported by the image format
+
+@end table
+
+If @code{-r} is specified, exit codes representing the image state refer to the
+state after (the attempt at) repairing it. That is, a successful @code{-r all}
+will yield the exit code 0, independently of the image state before.
+
 @item create [-f @var{fmt}] [-o @var{options}] @var{filename} [@var{size}]
 
 Create the new disk image @var{filename} of size @var{size} and format
-- 
cgit v1.2.1


From 405a27640b33c31ccef4001b3f3936b8c9d2218f Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Thu, 5 Jun 2014 16:19:26 +0200
Subject: rbd: Fix leaks in rbd_start_aio() error path

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Benoit Canet <benoit@irqsave.net>
Reviewed-by: Josh Durgin <josh.durgin@inktank.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
 block/rbd.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/block/rbd.c b/block/rbd.c
index a78760b7fe..93639f783c 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -685,13 +685,16 @@ static BlockDriverAIOCB *rbd_start_aio(BlockDriverState *bs,
     }
 
     if (r < 0) {
-        goto failed;
+        goto failed_completion;
     }
 
     return &acb->common;
 
+failed_completion:
+    rbd_aio_release(c);
 failed:
     g_free(rcb);
+    qemu_vfree(acb->bounce);
     qemu_aio_release(acb);
     return NULL;
 }
-- 
cgit v1.2.1


From b544c1aba8681c2fe5d6715fbd37cf6caf1bc7bb Mon Sep 17 00:00:00 2001
From: Hitoshi Mitake <mitake.hitoshi@lab.ntt.co.jp>
Date: Fri, 6 Jun 2014 13:35:11 +0900
Subject: sheepdog: fix vdi object update after live snapshot

sheepdog driver should decide a write request is COW or not based on inode
object which is active when the write request is issued.

Example of wrong inode update path in the previous driver:
1. drier issues an ordinal write request to an existing object
2. user creates a snapshot of the VDI before the write request is completed
3. the respones for the request is RDONLY, because the VDI is already a snapshot
4. the driver reload an inode object of the new active VDI, then issues a write
   request again
5. the second write request can be completed
6. driver decide the request is COW or not with the below conditional branch:
   	  if (s->inode.data_vdi_id[idx] != s->inode.vdi_id) {
7. the ID of the written object and VID of the new active VDI is different, so
   the driver updates data_vdi_id[idx] and writes inode object
8. the existing object cannot be seen by the new active VDI, it results object
   leaking

Cc: Kevin Wolf <kwolf@redhat.com>
Cc: Stefan Hajnoczi <stefanha@redhat.com>
Cc: Liu Yuan <namei.unix@gmail.com>
Cc: MORITA Kazutaka <morita.kazutaka@lab.ntt.co.jp>
Signed-off-by: Hitoshi Mitake <mitake.hitoshi@lab.ntt.co.jp>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
 block/sheepdog.c | 40 +++++++++++++++++++++++-----------------
 1 file changed, 23 insertions(+), 17 deletions(-)

diff --git a/block/sheepdog.c b/block/sheepdog.c
index 9175cc232d..5f7e025b61 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -282,6 +282,7 @@ typedef struct AIOReq {
     unsigned int data_len;
     uint8_t flags;
     uint32_t id;
+    bool create;
 
     QLIST_ENTRY(AIOReq) aio_siblings;
 } AIOReq;
@@ -405,7 +406,7 @@ static const char * sd_strerror(int err)
 
 static inline AIOReq *alloc_aio_req(BDRVSheepdogState *s, SheepdogAIOCB *acb,
                                     uint64_t oid, unsigned int data_len,
-                                    uint64_t offset, uint8_t flags,
+                                    uint64_t offset, uint8_t flags, bool create,
                                     uint64_t base_oid, unsigned int iov_offset)
 {
     AIOReq *aio_req;
@@ -419,6 +420,7 @@ static inline AIOReq *alloc_aio_req(BDRVSheepdogState *s, SheepdogAIOCB *acb,
     aio_req->data_len = data_len;
     aio_req->flags = flags;
     aio_req->id = s->aioreq_seq_num++;
+    aio_req->create = create;
 
     acb->nr_pending++;
     return aio_req;
@@ -667,8 +669,8 @@ static int do_req(int sockfd, AioContext *aio_context, SheepdogReq *hdr,
 }
 
 static void coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
-                           struct iovec *iov, int niov, bool create,
-                           enum AIOCBState aiocb_type);
+                                         struct iovec *iov, int niov,
+                                         enum AIOCBState aiocb_type);
 static void coroutine_fn resend_aioreq(BDRVSheepdogState *s, AIOReq *aio_req);
 static int reload_inode(BDRVSheepdogState *s, uint32_t snapid, const char *tag);
 static int get_sheep_fd(BDRVSheepdogState *s, Error **errp);
@@ -701,7 +703,7 @@ static void coroutine_fn send_pending_req(BDRVSheepdogState *s, uint64_t oid)
         /* move aio_req from pending list to inflight one */
         QLIST_REMOVE(aio_req, aio_siblings);
         QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings);
-        add_aio_request(s, aio_req, acb->qiov->iov, acb->qiov->niov, false,
+        add_aio_request(s, aio_req, acb->qiov->iov, acb->qiov->niov,
                         acb->aiocb_type);
     }
 }
@@ -800,7 +802,7 @@ static void coroutine_fn aio_read_response(void *opaque)
         }
         idx = data_oid_to_idx(aio_req->oid);
 
-        if (s->inode.data_vdi_id[idx] != s->inode.vdi_id) {
+        if (aio_req->create) {
             /*
              * If the object is newly created one, we need to update
              * the vdi object (metadata object).  min_dirty_data_idx
@@ -1120,8 +1122,8 @@ out:
 }
 
 static void coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
-                           struct iovec *iov, int niov, bool create,
-                           enum AIOCBState aiocb_type)
+                                         struct iovec *iov, int niov,
+                                         enum AIOCBState aiocb_type)
 {
     int nr_copies = s->inode.nr_copies;
     SheepdogObjReq hdr;
@@ -1132,6 +1134,7 @@ static void coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
     uint64_t offset = aio_req->offset;
     uint8_t flags = aio_req->flags;
     uint64_t old_oid = aio_req->base_oid;
+    bool create = aio_req->create;
 
     if (!nr_copies) {
         error_report("bug");
@@ -1324,6 +1327,7 @@ static bool check_simultaneous_create(BDRVSheepdogState *s, AIOReq *aio_req)
             DPRINTF("simultaneous create to %" PRIx64 "\n", aio_req->oid);
             aio_req->flags = 0;
             aio_req->base_oid = 0;
+            aio_req->create = false;
             QLIST_REMOVE(aio_req, aio_siblings);
             QLIST_INSERT_HEAD(&s->pending_aio_head, aio_req, aio_siblings);
             return true;
@@ -1336,7 +1340,8 @@ static bool check_simultaneous_create(BDRVSheepdogState *s, AIOReq *aio_req)
 static void coroutine_fn resend_aioreq(BDRVSheepdogState *s, AIOReq *aio_req)
 {
     SheepdogAIOCB *acb = aio_req->aiocb;
-    bool create = false;
+
+    aio_req->create = false;
 
     /* check whether this request becomes a CoW one */
     if (acb->aiocb_type == AIOCB_WRITE_UDATA && is_data_obj(aio_req->oid)) {
@@ -1354,17 +1359,17 @@ static void coroutine_fn resend_aioreq(BDRVSheepdogState *s, AIOReq *aio_req)
             aio_req->base_oid = vid_to_data_oid(s->inode.data_vdi_id[idx], idx);
             aio_req->flags |= SD_FLAG_CMD_COW;
         }
-        create = true;
+        aio_req->create = true;
     }
 out:
     if (is_data_obj(aio_req->oid)) {
-        add_aio_request(s, aio_req, acb->qiov->iov, acb->qiov->niov, create,
+        add_aio_request(s, aio_req, acb->qiov->iov, acb->qiov->niov,
                         acb->aiocb_type);
     } else {
         struct iovec iov;
         iov.iov_base = &s->inode;
         iov.iov_len = sizeof(s->inode);
-        add_aio_request(s, aio_req, &iov, 1, false, AIOCB_WRITE_UDATA);
+        add_aio_request(s, aio_req, &iov, 1, AIOCB_WRITE_UDATA);
     }
 }
 
@@ -1877,9 +1882,9 @@ static void coroutine_fn sd_write_done(SheepdogAIOCB *acb)
         iov.iov_base = &s->inode;
         iov.iov_len = sizeof(s->inode);
         aio_req = alloc_aio_req(s, acb, vid_to_vdi_oid(s->inode.vdi_id),
-                                data_len, offset, 0, 0, offset);
+                                data_len, offset, 0, false, 0, offset);
         QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings);
-        add_aio_request(s, aio_req, &iov, 1, false, AIOCB_WRITE_UDATA);
+        add_aio_request(s, aio_req, &iov, 1, AIOCB_WRITE_UDATA);
 
         acb->aio_done_func = sd_finish_aiocb;
         acb->aiocb_type = AIOCB_WRITE_UDATA;
@@ -2078,7 +2083,8 @@ static int coroutine_fn sd_co_rw_vector(void *p)
             DPRINTF("new oid %" PRIx64 "\n", oid);
         }
 
-        aio_req = alloc_aio_req(s, acb, oid, len, offset, flags, old_oid, done);
+        aio_req = alloc_aio_req(s, acb, oid, len, offset, flags, create,
+                                old_oid, done);
         QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings);
 
         if (create) {
@@ -2087,7 +2093,7 @@ static int coroutine_fn sd_co_rw_vector(void *p)
             }
         }
 
-        add_aio_request(s, aio_req, acb->qiov->iov, acb->qiov->niov, create,
+        add_aio_request(s, aio_req, acb->qiov->iov, acb->qiov->niov,
                         acb->aiocb_type);
     done:
         offset = 0;
@@ -2167,9 +2173,9 @@ static int coroutine_fn sd_co_flush_to_disk(BlockDriverState *bs)
     acb->aio_done_func = sd_finish_aiocb;
 
     aio_req = alloc_aio_req(s, acb, vid_to_vdi_oid(s->inode.vdi_id),
-                            0, 0, 0, 0, 0);
+                            0, 0, 0, false, 0, 0);
     QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings);
-    add_aio_request(s, aio_req, NULL, 0, false, acb->aiocb_type);
+    add_aio_request(s, aio_req, NULL, 0, acb->aiocb_type);
 
     qemu_coroutine_yield();
     return acb->ret;
-- 
cgit v1.2.1


From 5d039baba411ef90b53150ae394575b25c9bfb74 Mon Sep 17 00:00:00 2001
From: Hitoshi Mitake <mitake.hitoshi@lab.ntt.co.jp>
Date: Fri, 6 Jun 2014 13:35:12 +0900
Subject: sheepdog: reload only header in a case of live snapshot

sheepdog driver doesn't need to read data_vdi_id[] when a live snapshot is
created.

Cc: Kevin Wolf <kwolf@redhat.com>
Cc: Stefan Hajnoczi <stefanha@redhat.com>
Cc: Liu Yuan <namei.unix@gmail.com>
Cc: MORITA Kazutaka <morita.kazutaka@lab.ntt.co.jp>
Signed-off-by: Hitoshi Mitake <mitake.hitoshi@lab.ntt.co.jp>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
 block/sheepdog.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/block/sheepdog.c b/block/sheepdog.c
index 5f7e025b61..1fa19399f0 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -200,6 +200,8 @@ typedef struct SheepdogInode {
     uint32_t data_vdi_id[MAX_DATA_OBJS];
 } SheepdogInode;
 
+#define SD_INODE_HEADER_SIZE offsetof(SheepdogInode, data_vdi_id)
+
 /*
  * 64 bit FNV-1a non-zero initial basis
  */
@@ -1287,7 +1289,7 @@ static int reload_inode(BDRVSheepdogState *s, uint32_t snapid, const char *tag)
         return -EIO;
     }
 
-    inode = g_malloc(sizeof(s->inode));
+    inode = g_malloc(SD_INODE_HEADER_SIZE);
 
     ret = find_vdi_name(s, s->name, snapid, tag, &vid, false, &local_err);
     if (ret) {
@@ -1297,13 +1299,14 @@ static int reload_inode(BDRVSheepdogState *s, uint32_t snapid, const char *tag)
     }
 
     ret = read_object(fd, s->aio_context, (char *)inode, vid_to_vdi_oid(vid),
-                      s->inode.nr_copies, sizeof(*inode), 0, s->cache_flags);
+                      s->inode.nr_copies, SD_INODE_HEADER_SIZE, 0,
+                      s->cache_flags);
     if (ret < 0) {
         goto out;
     }
 
     if (inode->vdi_id != s->inode.vdi_id) {
-        memcpy(&s->inode, inode, sizeof(s->inode));
+        memcpy(&s->inode, inode, SD_INODE_HEADER_SIZE);
     }
 
 out:
-- 
cgit v1.2.1


From d34bda716a7da18ee6f239ce81240ce063ac844e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Beno=C3=AEt=20Canet?= <benoit.canet@irqsave.net>
Date: Thu, 5 Jun 2014 13:45:29 +0200
Subject: qapi: Extract qapi/common.json definitions

Signed-off-by: Benoit Canet <benoit@irqsave.net>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
 qapi-schema.json | 87 ++----------------------------------------------------
 qapi/common.json | 89 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 91 insertions(+), 85 deletions(-)
 create mode 100644 qapi/common.json

diff --git a/qapi-schema.json b/qapi-schema.json
index 7bc33ea717..cc71b27822 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -2,32 +2,8 @@
 #
 # QAPI Schema
 
-##
-# @ErrorClass
-#
-# QEMU error classes
-#
-# @GenericError: this is used for errors that don't require a specific error
-#                class. This should be the default case for most errors
-#
-# @CommandNotFound: the requested command has not been found
-#
-# @DeviceEncrypted: the requested operation can't be fulfilled because the
-#                   selected device is encrypted
-#
-# @DeviceNotActive: a device has failed to be become active
-#
-# @DeviceNotFound: the requested device has not been found
-#
-# @KVMMissingCap: the requested operation can't be fulfilled because a
-#                 required KVM capability is missing
-#
-# Since: 1.2
-##
-{ 'enum': 'ErrorClass',
-  'data': [ 'GenericError', 'CommandNotFound', 'DeviceEncrypted',
-            'DeviceNotActive', 'DeviceNotFound', 'KVMMissingCap' ] }
-
+# QAPI common definitions
+{ 'include': 'qapi/common.json' }
 
 ##
 # LostTickPolicy:
@@ -133,43 +109,6 @@
 ##
 { 'command': 'query-name', 'returns': 'NameInfo' }
 
-##
-# @VersionInfo:
-#
-# A description of QEMU's version.
-#
-# @qemu.major:  The major version of QEMU
-#
-# @qemu.minor:  The minor version of QEMU
-#
-# @qemu.micro:  The micro version of QEMU.  By current convention, a micro
-#               version of 50 signifies a development branch.  A micro version
-#               greater than or equal to 90 signifies a release candidate for
-#               the next minor version.  A micro version of less than 50
-#               signifies a stable release.
-#
-# @package:     QEMU will always set this field to an empty string.  Downstream
-#               versions of QEMU should set this to a non-empty string.  The
-#               exact format depends on the downstream however it highly
-#               recommended that a unique name is used.
-#
-# Since: 0.14.0
-##
-{ 'type': 'VersionInfo',
-  'data': {'qemu': {'major': 'int', 'minor': 'int', 'micro': 'int'},
-           'package': 'str'} }
-
-##
-# @query-version:
-#
-# Returns the current version of QEMU.
-#
-# Returns:  A @VersionInfo object describing the current version of QEMU.
-#
-# Since: 0.14.0
-##
-{ 'command': 'query-version', 'returns': 'VersionInfo' }
-
 ##
 # @KvmInfo:
 #
@@ -583,28 +522,6 @@
   'data': {'device': 'str', 'size': 'int', '*format': 'DataFormat'},
   'returns': 'str' }
 
-##
-# @CommandInfo:
-#
-# Information about a QMP command
-#
-# @name: The command name
-#
-# Since: 0.14.0
-##
-{ 'type': 'CommandInfo', 'data': {'name': 'str'} }
-
-##
-# @query-commands:
-#
-# Return a list of supported QMP commands by this server
-#
-# Returns: A list of @CommandInfo for all supported commands
-#
-# Since: 0.14.0
-##
-{ 'command': 'query-commands', 'returns': ['CommandInfo'] }
-
 ##
 # @EventInfo:
 #
diff --git a/qapi/common.json b/qapi/common.json
new file mode 100644
index 0000000000..4e9a21f2f6
--- /dev/null
+++ b/qapi/common.json
@@ -0,0 +1,89 @@
+# -*- Mode: Python -*-
+#
+# QAPI common definitions
+
+##
+# @ErrorClass
+#
+# QEMU error classes
+#
+# @GenericError: this is used for errors that don't require a specific error
+#                class. This should be the default case for most errors
+#
+# @CommandNotFound: the requested command has not been found
+#
+# @DeviceEncrypted: the requested operation can't be fulfilled because the
+#                   selected device is encrypted
+#
+# @DeviceNotActive: a device has failed to be become active
+#
+# @DeviceNotFound: the requested device has not been found
+#
+# @KVMMissingCap: the requested operation can't be fulfilled because a
+#                 required KVM capability is missing
+#
+# Since: 1.2
+##
+{ 'enum': 'ErrorClass',
+  'data': [ 'GenericError', 'CommandNotFound', 'DeviceEncrypted',
+            'DeviceNotActive', 'DeviceNotFound', 'KVMMissingCap' ] }
+
+##
+# @VersionInfo:
+#
+# A description of QEMU's version.
+#
+# @qemu.major:  The major version of QEMU
+#
+# @qemu.minor:  The minor version of QEMU
+#
+# @qemu.micro:  The micro version of QEMU.  By current convention, a micro
+#               version of 50 signifies a development branch.  A micro version
+#               greater than or equal to 90 signifies a release candidate for
+#               the next minor version.  A micro version of less than 50
+#               signifies a stable release.
+#
+# @package:     QEMU will always set this field to an empty string.  Downstream
+#               versions of QEMU should set this to a non-empty string.  The
+#               exact format depends on the downstream however it highly
+#               recommended that a unique name is used.
+#
+# Since: 0.14.0
+##
+{ 'type': 'VersionInfo',
+  'data': {'qemu': {'major': 'int', 'minor': 'int', 'micro': 'int'},
+           'package': 'str'} }
+
+##
+# @query-version:
+#
+# Returns the current version of QEMU.
+#
+# Returns:  A @VersionInfo object describing the current version of QEMU.
+#
+# Since: 0.14.0
+##
+{ 'command': 'query-version', 'returns': 'VersionInfo' }
+
+##
+# @CommandInfo:
+#
+# Information about a QMP command
+#
+# @name: The command name
+#
+# Since: 0.14.0
+##
+{ 'type': 'CommandInfo', 'data': {'name': 'str'} }
+
+##
+# @query-commands:
+#
+# Return a list of supported QMP commands by this server
+#
+# Returns: A list of @CommandInfo for all supported commands
+#
+# Since: 0.14.0
+##
+{ 'command': 'query-commands', 'returns': ['CommandInfo'] }
+
-- 
cgit v1.2.1


From 5db15096d047f0ec92bd5f1680343ea7acbed4e5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Beno=C3=AEt=20Canet?= <benoit.canet@irqsave.net>
Date: Thu, 5 Jun 2014 13:45:30 +0200
Subject: qapi: create two block related json modules

qapi/block-core.json contains block definitions unrelated to emulation.

qapi/block.json is a superset of the previous and contains definitions related
to emulation.

The purpose of these extractions is to be able to hook qapi/block-core.json
generated code on qemu-nbd.

Signed-off-by: Benoit Canet <benoit@irqsave.net>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
 qapi-schema.json     | 3 +++
 qapi/block-core.json | 6 ++++++
 qapi/block.json      | 7 +++++++
 3 files changed, 16 insertions(+)
 create mode 100644 qapi/block-core.json
 create mode 100644 qapi/block.json

diff --git a/qapi-schema.json b/qapi-schema.json
index cc71b27822..7d47f4dff4 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -5,6 +5,9 @@
 # QAPI common definitions
 { 'include': 'qapi/common.json' }
 
+# QAPI block definitions
+{ 'include': 'qapi/block.json' }
+
 ##
 # LostTickPolicy:
 #
diff --git a/qapi/block-core.json b/qapi/block-core.json
new file mode 100644
index 0000000000..11bd8c2b3d
--- /dev/null
+++ b/qapi/block-core.json
@@ -0,0 +1,6 @@
+# -*- Mode: Python -*-
+#
+# QAPI block core definitions (vm unrelated)
+
+# QAPI common definitions
+{ 'include': 'common.json' }
diff --git a/qapi/block.json b/qapi/block.json
new file mode 100644
index 0000000000..e2b882f120
--- /dev/null
+++ b/qapi/block.json
@@ -0,0 +1,7 @@
+# -*- Mode: Python -*-
+#
+# QAPI block definitions (vm related)
+
+# QAPI block core definitions
+{ 'include': 'block-core.json' }
+
-- 
cgit v1.2.1


From 1ad166b612c2aca517430d88f4a3d60f96bb6756 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Beno=C3=AEt=20Canet?= <benoit.canet@irqsave.net>
Date: Thu, 5 Jun 2014 13:45:31 +0200
Subject: qapi: Extract qapi/block-core.json definitions

Signed-off-by: Benoit Canet <benoit@irqsave.net
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
 qapi-schema.json     | 1428 +-------------------------------------------------
 qapi/block-core.json | 1406 +++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 1417 insertions(+), 1417 deletions(-)

diff --git a/qapi-schema.json b/qapi-schema.json
index 7d47f4dff4..c888d23c9a 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -183,179 +183,6 @@
             'running', 'save-vm', 'shutdown', 'suspended', 'watchdog',
             'guest-panicked' ] }
 
-##
-# @SnapshotInfo
-#
-# @id: unique snapshot id
-#
-# @name: user chosen name
-#
-# @vm-state-size: size of the VM state
-#
-# @date-sec: UTC date of the snapshot in seconds
-#
-# @date-nsec: fractional part in nano seconds to be used with date-sec
-#
-# @vm-clock-sec: VM clock relative to boot in seconds
-#
-# @vm-clock-nsec: fractional part in nano seconds to be used with vm-clock-sec
-#
-# Since: 1.3
-#
-##
-
-{ 'type': 'SnapshotInfo',
-  'data': { 'id': 'str', 'name': 'str', 'vm-state-size': 'int',
-            'date-sec': 'int', 'date-nsec': 'int',
-            'vm-clock-sec': 'int', 'vm-clock-nsec': 'int' } }
-
-##
-# @ImageInfoSpecificQCow2:
-#
-# @compat: compatibility level
-#
-# @lazy-refcounts: #optional on or off; only valid for compat >= 1.1
-#
-# Since: 1.7
-##
-{ 'type': 'ImageInfoSpecificQCow2',
-  'data': {
-      'compat': 'str',
-      '*lazy-refcounts': 'bool'
-  } }
-
-##
-# @ImageInfoSpecificVmdk:
-#
-# @create-type: The create type of VMDK image
-#
-# @cid: Content id of image
-#
-# @parent-cid: Parent VMDK image's cid
-#
-# @extents: List of extent files
-#
-# Since: 1.7
-##
-{ 'type': 'ImageInfoSpecificVmdk',
-  'data': {
-      'create-type': 'str',
-      'cid': 'int',
-      'parent-cid': 'int',
-      'extents': ['ImageInfo']
-  } }
-
-##
-# @ImageInfoSpecific:
-#
-# A discriminated record of image format specific information structures.
-#
-# Since: 1.7
-##
-
-{ 'union': 'ImageInfoSpecific',
-  'data': {
-      'qcow2': 'ImageInfoSpecificQCow2',
-      'vmdk': 'ImageInfoSpecificVmdk'
-  } }
-
-##
-# @ImageInfo:
-#
-# Information about a QEMU image file
-#
-# @filename: name of the image file
-#
-# @format: format of the image file
-#
-# @virtual-size: maximum capacity in bytes of the image
-#
-# @actual-size: #optional actual size on disk in bytes of the image
-#
-# @dirty-flag: #optional true if image is not cleanly closed
-#
-# @cluster-size: #optional size of a cluster in bytes
-#
-# @encrypted: #optional true if the image is encrypted
-#
-# @compressed: #optional true if the image is compressed (Since 1.7)
-#
-# @backing-filename: #optional name of the backing file
-#
-# @full-backing-filename: #optional full path of the backing file
-#
-# @backing-filename-format: #optional the format of the backing file
-#
-# @snapshots: #optional list of VM snapshots
-#
-# @backing-image: #optional info of the backing image (since 1.6)
-#
-# @format-specific: #optional structure supplying additional format-specific
-# information (since 1.7)
-#
-# Since: 1.3
-#
-##
-
-{ 'type': 'ImageInfo',
-  'data': {'filename': 'str', 'format': 'str', '*dirty-flag': 'bool',
-           '*actual-size': 'int', 'virtual-size': 'int',
-           '*cluster-size': 'int', '*encrypted': 'bool', '*compressed': 'bool',
-           '*backing-filename': 'str', '*full-backing-filename': 'str',
-           '*backing-filename-format': 'str', '*snapshots': ['SnapshotInfo'],
-           '*backing-image': 'ImageInfo',
-           '*format-specific': 'ImageInfoSpecific' } }
-
-##
-# @ImageCheck:
-#
-# Information about a QEMU image file check
-#
-# @filename: name of the image file checked
-#
-# @format: format of the image file checked
-#
-# @check-errors: number of unexpected errors occurred during check
-#
-# @image-end-offset: #optional offset (in bytes) where the image ends, this
-#                    field is present if the driver for the image format
-#                    supports it
-#
-# @corruptions: #optional number of corruptions found during the check if any
-#
-# @leaks: #optional number of leaks found during the check if any
-#
-# @corruptions-fixed: #optional number of corruptions fixed during the check
-#                     if any
-#
-# @leaks-fixed: #optional number of leaks fixed during the check if any
-#
-# @total-clusters: #optional total number of clusters, this field is present
-#                  if the driver for the image format supports it
-#
-# @allocated-clusters: #optional total number of allocated clusters, this
-#                      field is present if the driver for the image format
-#                      supports it
-#
-# @fragmented-clusters: #optional total number of fragmented clusters, this
-#                       field is present if the driver for the image format
-#                       supports it
-#
-# @compressed-clusters: #optional total number of compressed clusters, this
-#                       field is present if the driver for the image format
-#                       supports it
-#
-# Since: 1.4
-#
-##
-
-{ 'type': 'ImageCheck',
-  'data': {'filename': 'str', 'format': 'str', 'check-errors': 'int',
-           '*image-end-offset': 'int', '*corruptions': 'int', '*leaks': 'int',
-           '*corruptions-fixed': 'int', '*leaks-fixed': 'int',
-           '*total-clusters': 'int', '*allocated-clusters': 'int',
-           '*fragmented-clusters': 'int', '*compressed-clusters': 'int' } }
-
 ##
 # @StatusInfo:
 #
@@ -836,252 +663,6 @@
 ##
 { 'command': 'query-iothreads', 'returns': ['IOThreadInfo'] }
 
-##
-# @BlockDeviceInfo:
-#
-# Information about the backing device for a block device.
-#
-# @file: the filename of the backing device
-#
-# @node-name: #optional the name of the block driver node (Since 2.0)
-#
-# @ro: true if the backing device was open read-only
-#
-# @drv: the name of the block format used to open the backing device. As of
-#       0.14.0 this can be: 'blkdebug', 'bochs', 'cloop', 'cow', 'dmg',
-#       'file', 'file', 'ftp', 'ftps', 'host_cdrom', 'host_device',
-#       'host_floppy', 'http', 'https', 'nbd', 'parallels', 'qcow',
-#       'qcow2', 'raw', 'tftp', 'vdi', 'vmdk', 'vpc', 'vvfat'
-#
-# @backing_file: #optional the name of the backing file (for copy-on-write)
-#
-# @backing_file_depth: number of files in the backing file chain (since: 1.2)
-#
-# @encrypted: true if the backing device is encrypted
-#
-# @encryption_key_missing: true if the backing device is encrypted but an
-#                          valid encryption key is missing
-#
-# @detect_zeroes: detect and optimize zero writes (Since 2.1)
-#
-# @bps: total throughput limit in bytes per second is specified
-#
-# @bps_rd: read throughput limit in bytes per second is specified
-#
-# @bps_wr: write throughput limit in bytes per second is specified
-#
-# @iops: total I/O operations per second is specified
-#
-# @iops_rd: read I/O operations per second is specified
-#
-# @iops_wr: write I/O operations per second is specified
-#
-# @image: the info of image used (since: 1.6)
-#
-# @bps_max: #optional total max in bytes (Since 1.7)
-#
-# @bps_rd_max: #optional read max in bytes (Since 1.7)
-#
-# @bps_wr_max: #optional write max in bytes (Since 1.7)
-#
-# @iops_max: #optional total I/O operations max (Since 1.7)
-#
-# @iops_rd_max: #optional read I/O operations max (Since 1.7)
-#
-# @iops_wr_max: #optional write I/O operations max (Since 1.7)
-#
-# @iops_size: #optional an I/O size in bytes (Since 1.7)
-#
-# Since: 0.14.0
-#
-##
-{ 'type': 'BlockDeviceInfo',
-  'data': { 'file': 'str', '*node-name': 'str', 'ro': 'bool', 'drv': 'str',
-            '*backing_file': 'str', 'backing_file_depth': 'int',
-            'encrypted': 'bool', 'encryption_key_missing': 'bool',
-            'detect_zeroes': 'BlockdevDetectZeroesOptions',
-            'bps': 'int', 'bps_rd': 'int', 'bps_wr': 'int',
-            'iops': 'int', 'iops_rd': 'int', 'iops_wr': 'int',
-            'image': 'ImageInfo',
-            '*bps_max': 'int', '*bps_rd_max': 'int',
-            '*bps_wr_max': 'int', '*iops_max': 'int',
-            '*iops_rd_max': 'int', '*iops_wr_max': 'int',
-            '*iops_size': 'int' } }
-
-##
-# @BlockDeviceIoStatus:
-#
-# An enumeration of block device I/O status.
-#
-# @ok: The last I/O operation has succeeded
-#
-# @failed: The last I/O operation has failed
-#
-# @nospace: The last I/O operation has failed due to a no-space condition
-#
-# Since: 1.0
-##
-{ 'enum': 'BlockDeviceIoStatus', 'data': [ 'ok', 'failed', 'nospace' ] }
-
-##
-# @BlockDeviceMapEntry:
-#
-# Entry in the metadata map of the device (returned by "qemu-img map")
-#
-# @start: Offset in the image of the first byte described by this entry
-#         (in bytes)
-#
-# @length: Length of the range described by this entry (in bytes)
-#
-# @depth: Number of layers (0 = top image, 1 = top image's backing file, etc.)
-#         before reaching one for which the range is allocated.  The value is
-#         in the range 0 to the depth of the image chain - 1.
-#
-# @zero: the sectors in this range read as zeros
-#
-# @data: reading the image will actually read data from a file (in particular,
-#        if @offset is present this means that the sectors are not simply
-#        preallocated, but contain actual data in raw format)
-#
-# @offset: if present, the image file stores the data for this range in
-#          raw format at the given offset.
-#
-# Since 1.7
-##
-{ 'type': 'BlockDeviceMapEntry',
-  'data': { 'start': 'int', 'length': 'int', 'depth': 'int', 'zero': 'bool',
-            'data': 'bool', '*offset': 'int' } }
-
-##
-# @BlockDirtyInfo:
-#
-# Block dirty bitmap information.
-#
-# @count: number of dirty bytes according to the dirty bitmap
-#
-# @granularity: granularity of the dirty bitmap in bytes (since 1.4)
-#
-# Since: 1.3
-##
-{ 'type': 'BlockDirtyInfo',
-  'data': {'count': 'int', 'granularity': 'int'} }
-
-##
-# @BlockInfo:
-#
-# Block device information.  This structure describes a virtual device and
-# the backing device associated with it.
-#
-# @device: The device name associated with the virtual device.
-#
-# @type: This field is returned only for compatibility reasons, it should
-#        not be used (always returns 'unknown')
-#
-# @removable: True if the device supports removable media.
-#
-# @locked: True if the guest has locked this device from having its media
-#          removed
-#
-# @tray_open: #optional True if the device has a tray and it is open
-#             (only present if removable is true)
-#
-# @dirty-bitmaps: #optional dirty bitmaps information (only present if the
-#                 driver has one or more dirty bitmaps) (Since 2.0)
-#
-# @io-status: #optional @BlockDeviceIoStatus. Only present if the device
-#             supports it and the VM is configured to stop on errors
-#
-# @inserted: #optional @BlockDeviceInfo describing the device if media is
-#            present
-#
-# Since:  0.14.0
-##
-{ 'type': 'BlockInfo',
-  'data': {'device': 'str', 'type': 'str', 'removable': 'bool',
-           'locked': 'bool', '*inserted': 'BlockDeviceInfo',
-           '*tray_open': 'bool', '*io-status': 'BlockDeviceIoStatus',
-           '*dirty-bitmaps': ['BlockDirtyInfo'] } }
-
-##
-# @query-block:
-#
-# Get a list of BlockInfo for all virtual block devices.
-#
-# Returns: a list of @BlockInfo describing each virtual block device
-#
-# Since: 0.14.0
-##
-{ 'command': 'query-block', 'returns': ['BlockInfo'] }
-
-##
-# @BlockDeviceStats:
-#
-# Statistics of a virtual block device or a block backing device.
-#
-# @rd_bytes:      The number of bytes read by the device.
-#
-# @wr_bytes:      The number of bytes written by the device.
-#
-# @rd_operations: The number of read operations performed by the device.
-#
-# @wr_operations: The number of write operations performed by the device.
-#
-# @flush_operations: The number of cache flush operations performed by the
-#                    device (since 0.15.0)
-#
-# @flush_total_time_ns: Total time spend on cache flushes in nano-seconds
-#                       (since 0.15.0).
-#
-# @wr_total_time_ns: Total time spend on writes in nano-seconds (since 0.15.0).
-#
-# @rd_total_time_ns: Total_time_spend on reads in nano-seconds (since 0.15.0).
-#
-# @wr_highest_offset: The offset after the greatest byte written to the
-#                     device.  The intended use of this information is for
-#                     growable sparse files (like qcow2) that are used on top
-#                     of a physical device.
-#
-# Since: 0.14.0
-##
-{ 'type': 'BlockDeviceStats',
-  'data': {'rd_bytes': 'int', 'wr_bytes': 'int', 'rd_operations': 'int',
-           'wr_operations': 'int', 'flush_operations': 'int',
-           'flush_total_time_ns': 'int', 'wr_total_time_ns': 'int',
-           'rd_total_time_ns': 'int', 'wr_highest_offset': 'int' } }
-
-##
-# @BlockStats:
-#
-# Statistics of a virtual block device or a block backing device.
-#
-# @device: #optional If the stats are for a virtual block device, the name
-#          corresponding to the virtual block device.
-#
-# @stats:  A @BlockDeviceStats for the device.
-#
-# @parent: #optional This describes the file block device if it has one.
-#
-# @backing: #optional This describes the backing block device if it has one.
-#           (Since 2.0)
-#
-# Since: 0.14.0
-##
-{ 'type': 'BlockStats',
-  'data': {'*device': 'str', 'stats': 'BlockDeviceStats',
-           '*parent': 'BlockStats',
-           '*backing': 'BlockStats'} }
-
-##
-# @query-blockstats:
-#
-# Query the @BlockStats for all virtual block devices.
-#
-# Returns: A list of @BlockStats for each virtual block devices.
-#
-# Since: 0.14.0
-##
-{ 'command': 'query-blockstats', 'returns': ['BlockStats'] }
-
 ##
 # @VncClientInfo:
 #
@@ -1420,105 +1001,6 @@
 ##
 { 'command': 'query-pci', 'returns': ['PciInfo'] }
 
-##
-# @BlockdevOnError:
-#
-# An enumeration of possible behaviors for errors on I/O operations.
-# The exact meaning depends on whether the I/O was initiated by a guest
-# or by a block job
-#
-# @report: for guest operations, report the error to the guest;
-#          for jobs, cancel the job
-#
-# @ignore: ignore the error, only report a QMP event (BLOCK_IO_ERROR
-#          or BLOCK_JOB_ERROR)
-#
-# @enospc: same as @stop on ENOSPC, same as @report otherwise.
-#
-# @stop: for guest operations, stop the virtual machine;
-#        for jobs, pause the job
-#
-# Since: 1.3
-##
-{ 'enum': 'BlockdevOnError',
-  'data': ['report', 'ignore', 'enospc', 'stop'] }
-
-##
-# @MirrorSyncMode:
-#
-# An enumeration of possible behaviors for the initial synchronization
-# phase of storage mirroring.
-#
-# @top: copies data in the topmost image to the destination
-#
-# @full: copies data from all images to the destination
-#
-# @none: only copy data written from now on
-#
-# Since: 1.3
-##
-{ 'enum': 'MirrorSyncMode',
-  'data': ['top', 'full', 'none'] }
-
-##
-# @BlockJobType:
-#
-# Type of a block job.
-#
-# @commit: block commit job type, see "block-commit"
-#
-# @stream: block stream job type, see "block-stream"
-#
-# @mirror: drive mirror job type, see "drive-mirror"
-#
-# @backup: drive backup job type, see "drive-backup"
-#
-# Since: 1.7
-##
-{ 'enum': 'BlockJobType',
-  'data': ['commit', 'stream', 'mirror', 'backup'] }
-
-##
-# @BlockJobInfo:
-#
-# Information about a long-running block device operation.
-#
-# @type: the job type ('stream' for image streaming)
-#
-# @device: the block device name
-#
-# @len: the maximum progress value
-#
-# @busy: false if the job is known to be in a quiescent state, with
-#        no pending I/O.  Since 1.3.
-#
-# @paused: whether the job is paused or, if @busy is true, will
-#          pause itself as soon as possible.  Since 1.3.
-#
-# @offset: the current progress value
-#
-# @speed: the rate limit, bytes per second
-#
-# @io-status: the status of the job (since 1.3)
-#
-# Since: 1.1
-##
-{ 'type': 'BlockJobInfo',
-  'data': {'type': 'str', 'device': 'str', 'len': 'int',
-           'offset': 'int', 'busy': 'bool', 'paused': 'bool', 'speed': 'int',
-           'io-status': 'BlockDeviceIoStatus'} }
-
-##
-# @query-block-jobs:
-#
-# Return information about long-running block device operations.
-#
-# Returns: a list of @BlockJobInfo for each active block job
-#
-# Since: 1.1
-##
-{ 'command': 'query-block-jobs', 'returns': ['BlockJobInfo'] }
-
 ##
 # @quit:
 #
@@ -1698,43 +1180,6 @@
 ##
 { 'command': 'set_link', 'data': {'name': 'str', 'up': 'bool'} }
 
-##
-# @block_passwd:
-#
-# This command sets the password of a block device that has not been open
-# with a password and requires one.
-#
-# The two cases where this can happen are a block device is created through
-# QEMU's initial command line or a block device is changed through the legacy
-# @change interface.
-#
-# In the event that the block device is created through the initial command
-# line, the VM will start in the stopped state regardless of whether '-S' is
-# used.  The intention is for a management tool to query the block devices to
-# determine which ones are encrypted, set the passwords with this command, and
-# then start the guest with the @cont command.
-#
-# Either @device or @node-name must be set but not both.
-#
-# @device: #optional the name of the block backend device to set the password on
-#
-# @node-name: #optional graph node name to set the password on (Since 2.0)
-#
-# @password: the password to use for the device
-#
-# Returns: nothing on success
-#          If @device is not a valid block device, DeviceNotFound
-#          If @device is not encrypted, DeviceNotEncrypted
-#
-# Notes:  Not all block formats support encryption and some that do are not
-#         able to validate that a password is correct.  Disk corruption may
-#         occur if an invalid password is specified.
-#
-# Since: 0.14.0
-##
-{ 'command': 'block_passwd', 'data': {'*device': 'str',
-                                      '*node-name': 'str', 'password': 'str'} }
-
 ##
 # @balloon:
 #
@@ -1755,68 +1200,6 @@
 ##
 { 'command': 'balloon', 'data': {'value': 'int'} }
 
-##
-# @block_resize
-#
-# Resize a block image while a guest is running.
-#
-# Either @device or @node-name must be set but not both.
-#
-# @device: #optional the name of the device to get the image resized
-#
-# @node-name: #optional graph node name to get the image resized (Since 2.0)
-#
-# @size:  new image size in bytes
-#
-# Returns: nothing on success
-#          If @device is not a valid block device, DeviceNotFound
-#
-# Since: 0.14.0
-##
-{ 'command': 'block_resize', 'data': { '*device': 'str',
-                                       '*node-name': 'str',
-                                       'size': 'int' }}
-
-##
-# @NewImageMode
-#
-# An enumeration that tells QEMU how to set the backing file path in
-# a new image file.
-#
-# @existing: QEMU should look for an existing image file.
-#
-# @absolute-paths: QEMU should create a new image with absolute paths
-# for the backing file. If there is no backing file available, the new
-# image will not be backed either.
-#
-# Since: 1.1
-##
-{ 'enum': 'NewImageMode',
-  'data': [ 'existing', 'absolute-paths' ] }
-
-##
-# @BlockdevSnapshot
-#
-# Either @device or @node-name must be set but not both.
-#
-# @device: #optional the name of the device to generate the snapshot from.
-#
-# @node-name: #optional graph node name to generate the snapshot from (Since 2.0)
-#
-# @snapshot-file: the target of the new image. A new file will be created.
-#
-# @snapshot-node-name: #optional the graph node name of the new image (Since 2.0)
-#
-# @format: #optional the format of the snapshot image, default is 'qcow2'.
-#
-# @mode: #optional whether and how QEMU should create a new image, default is
-#        'absolute-paths'.
-##
-{ 'type': 'BlockdevSnapshot',
-  'data': { '*device': 'str', '*node-name': 'str',
-            'snapshot-file': 'str', '*snapshot-node-name': 'str',
-            '*format': 'str', '*mode': 'NewImageMode' } }
-
 ##
 # @BlockdevSnapshotInternal
 #
@@ -1833,48 +1216,6 @@
 { 'type': 'BlockdevSnapshotInternal',
   'data': { 'device': 'str', 'name': 'str' } }
 
-##
-# @DriveBackup
-#
-# @device: the name of the device which should be copied.
-#
-# @target: the target of the new image. If the file exists, or if it
-#          is a device, the existing file/device will be used as the new
-#          destination.  If it does not exist, a new file will be created.
-#
-# @format: #optional the format of the new destination, default is to
-#          probe if @mode is 'existing', else the format of the source
-#
-# @sync: what parts of the disk image should be copied to the destination
-#        (all the disk, only the sectors allocated in the topmost image, or
-#        only new I/O).
-#
-# @mode: #optional whether and how QEMU should create a new image, default is
-#        'absolute-paths'.
-#
-# @speed: #optional the maximum speed, in bytes per second
-#
-# @on-source-error: #optional the action to take on an error on the source,
-#                   default 'report'.  'stop' and 'enospc' can only be used
-#                   if the block device supports io-status (see BlockInfo).
-#
-# @on-target-error: #optional the action to take on an error on the target,
-#                   default 'report' (no limitations, since this applies to
-#                   a different block device than @device).
-#
-# Note that @on-source-error and @on-target-error only affect background I/O.
-# If an error occurs during a guest write request, the device's rerror/werror
-# actions will be used.
-#
-# Since: 1.6
-##
-{ 'type': 'DriveBackup',
-  'data': { 'device': 'str', 'target': 'str', '*format': 'str',
-            'sync': 'MirrorSyncMode', '*mode': 'NewImageMode',
-            '*speed': 'int',
-            '*on-source-error': 'BlockdevOnError',
-            '*on-target-error': 'BlockdevOnError' } }
-
 ##
 # @Abort
 #
@@ -1921,21 +1262,6 @@
 { 'command': 'transaction',
   'data': { 'actions': [ 'TransactionAction' ] } }
 
-##
-# @blockdev-snapshot-sync
-#
-# Generates a synchronous snapshot of a block device.
-#
-# For the arguments, see the documentation of BlockdevSnapshot.
-#
-# Returns: nothing on success
-#          If @device is not a valid block device, DeviceNotFound
-#
-# Since 0.14.0
-##
-{ 'command': 'blockdev-snapshot-sync',
-  'data': 'BlockdevSnapshot' }
-
 ##
 # @blockdev-snapshot-internal-sync
 #
@@ -1994,145 +1320,22 @@
 #
 # Returns: the output of the command as a string
 #
-# Since: 0.14.0
-#
-# Notes: This command only exists as a stop-gap.  Its use is highly
-#        discouraged.  The semantics of this command are not guaranteed.
-#
-#        Known limitations:
-#
-#        o This command is stateless, this means that commands that depend
-#          on state information (such as getfd) might not work
-#
-#       o Commands that prompt the user for data (eg. 'cont' when the block
-#         device is encrypted) don't currently work
-##
-{ 'command': 'human-monitor-command',
-  'data': {'command-line': 'str', '*cpu-index': 'int'},
-  'returns': 'str' }
-
-##
-# @block-commit
-#
-# Live commit of data from overlay image nodes into backing nodes - i.e.,
-# writes data between 'top' and 'base' into 'base'.
-#
-# @device:  the name of the device
-#
-# @base:   #optional The file name of the backing image to write data into.
-#                    If not specified, this is the deepest backing image
-#
-# @top:              The file name of the backing image within the image chain,
-#                    which contains the topmost data to be committed down.
-#
-#                    If top == base, that is an error.
-#                    If top == active, the job will not be completed by itself,
-#                    user needs to complete the job with the block-job-complete
-#                    command after getting the ready event. (Since 2.0)
-#
-#                    If the base image is smaller than top, then the base image
-#                    will be resized to be the same size as top.  If top is
-#                    smaller than the base image, the base will not be
-#                    truncated.  If you want the base image size to match the
-#                    size of the smaller top, you can safely truncate it
-#                    yourself once the commit operation successfully completes.
-#
-#
-# @speed:  #optional the maximum speed, in bytes per second
-#
-# Returns: Nothing on success
-#          If commit or stream is already active on this device, DeviceInUse
-#          If @device does not exist, DeviceNotFound
-#          If image commit is not supported by this device, NotSupported
-#          If @base or @top is invalid, a generic error is returned
-#          If @speed is invalid, InvalidParameter
-#
-# Since: 1.3
-#
-##
-{ 'command': 'block-commit',
-  'data': { 'device': 'str', '*base': 'str', 'top': 'str',
-            '*speed': 'int' } }
-
-##
-# @drive-backup
-#
-# Start a point-in-time copy of a block device to a new destination.  The
-# status of ongoing drive-backup operations can be checked with
-# query-block-jobs where the BlockJobInfo.type field has the value 'backup'.
-# The operation can be stopped before it has completed using the
-# block-job-cancel command.
-#
-# For the arguments, see the documentation of DriveBackup.
-#
-# Returns: nothing on success
-#          If @device is not a valid block device, DeviceNotFound
-#
-# Since 1.6
-##
-{ 'command': 'drive-backup', 'data': 'DriveBackup' }
-
-##
-# @query-named-block-nodes
-#
-# Get the named block driver list
-#
-# Returns: the list of BlockDeviceInfo
-#
-# Since 2.0
-##
-{ 'command': 'query-named-block-nodes', 'returns': [ 'BlockDeviceInfo' ] }
-
-##
-# @drive-mirror
-#
-# Start mirroring a block device's writes to a new destination.
-#
-# @device:  the name of the device whose writes should be mirrored.
-#
-# @target: the target of the new image. If the file exists, or if it
-#          is a device, the existing file/device will be used as the new
-#          destination.  If it does not exist, a new file will be created.
-#
-# @format: #optional the format of the new destination, default is to
-#          probe if @mode is 'existing', else the format of the source
-#
-# @mode: #optional whether and how QEMU should create a new image, default is
-#        'absolute-paths'.
-#
-# @speed:  #optional the maximum speed, in bytes per second
-#
-# @sync: what parts of the disk image should be copied to the destination
-#        (all the disk, only the sectors allocated in the topmost image, or
-#        only new I/O).
-#
-# @granularity: #optional granularity of the dirty bitmap, default is 64K
-#               if the image format doesn't have clusters, 4K if the clusters
-#               are smaller than that, else the cluster size.  Must be a
-#               power of 2 between 512 and 64M (since 1.4).
-#
-# @buf-size: #optional maximum amount of data in flight from source to
-#            target (since 1.4).
+# Since: 0.14.0
 #
-# @on-source-error: #optional the action to take on an error on the source,
-#                   default 'report'.  'stop' and 'enospc' can only be used
-#                   if the block device supports io-status (see BlockInfo).
+# Notes: This command only exists as a stop-gap.  Its use is highly
+#        discouraged.  The semantics of this command are not guaranteed.
 #
-# @on-target-error: #optional the action to take on an error on the target,
-#                   default 'report' (no limitations, since this applies to
-#                   a different block device than @device).
+#        Known limitations:
 #
-# Returns: nothing on success
-#          If @device is not a valid block device, DeviceNotFound
+#        o This command is stateless, this means that commands that depend
+#          on state information (such as getfd) might not work
 #
-# Since 1.3
+#       o Commands that prompt the user for data (eg. 'cont' when the block
+#         device is encrypted) don't currently work
 ##
-{ 'command': 'drive-mirror',
-  'data': { 'device': 'str', 'target': 'str', '*format': 'str',
-            'sync': 'MirrorSyncMode', '*mode': 'NewImageMode',
-            '*speed': 'int', '*granularity': 'uint32',
-            '*buf-size': 'int', '*on-source-error': 'BlockdevOnError',
-            '*on-target-error': 'BlockdevOnError' } }
+{ 'command': 'human-monitor-command',
+  'data': {'command-line': 'str', '*cpu-index': 'int'},
+  'returns': 'str' }
 
 ##
 # @migrate_cancel
@@ -2417,211 +1620,6 @@
 { 'command': 'change',
   'data': {'device': 'str', 'target': 'str', '*arg': 'str'} }
 
-##
-# @block_set_io_throttle:
-#
-# Change I/O throttle limits for a block drive.
-#
-# @device: The name of the device
-#
-# @bps: total throughput limit in bytes per second
-#
-# @bps_rd: read throughput limit in bytes per second
-#
-# @bps_wr: write throughput limit in bytes per second
-#
-# @iops: total I/O operations per second
-#
-# @ops_rd: read I/O operations per second
-#
-# @iops_wr: write I/O operations per second
-#
-# @bps_max: #optional total max in bytes (Since 1.7)
-#
-# @bps_rd_max: #optional read max in bytes (Since 1.7)
-#
-# @bps_wr_max: #optional write max in bytes (Since 1.7)
-#
-# @iops_max: #optional total I/O operations max (Since 1.7)
-#
-# @iops_rd_max: #optional read I/O operations max (Since 1.7)
-#
-# @iops_wr_max: #optional write I/O operations max (Since 1.7)
-#
-# @iops_size: #optional an I/O size in bytes (Since 1.7)
-#
-# Returns: Nothing on success
-#          If @device is not a valid block device, DeviceNotFound
-#
-# Since: 1.1
-##
-{ 'command': 'block_set_io_throttle',
-  'data': { 'device': 'str', 'bps': 'int', 'bps_rd': 'int', 'bps_wr': 'int',
-            'iops': 'int', 'iops_rd': 'int', 'iops_wr': 'int',
-            '*bps_max': 'int', '*bps_rd_max': 'int',
-            '*bps_wr_max': 'int', '*iops_max': 'int',
-            '*iops_rd_max': 'int', '*iops_wr_max': 'int',
-            '*iops_size': 'int' } }
-
-##
-# @block-stream:
-#
-# Copy data from a backing file into a block device.
-#
-# The block streaming operation is performed in the background until the entire
-# backing file has been copied.  This command returns immediately once streaming
-# has started.  The status of ongoing block streaming operations can be checked
-# with query-block-jobs.  The operation can be stopped before it has completed
-# using the block-job-cancel command.
-#
-# If a base file is specified then sectors are not copied from that base file and
-# its backing chain.  When streaming completes the image file will have the base
-# file as its backing file.  This can be used to stream a subset of the backing
-# file chain instead of flattening the entire image.
-#
-# On successful completion the image file is updated to drop the backing file
-# and the BLOCK_JOB_COMPLETED event is emitted.
-#
-# @device: the device name
-#
-# @base:   #optional the common backing file name
-#
-# @speed:  #optional the maximum speed, in bytes per second
-#
-# @on-error: #optional the action to take on an error (default report).
-#            'stop' and 'enospc' can only be used if the block device
-#            supports io-status (see BlockInfo).  Since 1.3.
-#
-# Returns: Nothing on success
-#          If @device does not exist, DeviceNotFound
-#
-# Since: 1.1
-##
-{ 'command': 'block-stream',
-  'data': { 'device': 'str', '*base': 'str', '*speed': 'int',
-            '*on-error': 'BlockdevOnError' } }
-
-##
-# @block-job-set-speed:
-#
-# Set maximum speed for a background block operation.
-#
-# This command can only be issued when there is an active block job.
-#
-# Throttling can be disabled by setting the speed to 0.
-#
-# @device: the device name
-#
-# @speed:  the maximum speed, in bytes per second, or 0 for unlimited.
-#          Defaults to 0.
-#
-# Returns: Nothing on success
-#          If no background operation is active on this device, DeviceNotActive
-#
-# Since: 1.1
-##
-{ 'command': 'block-job-set-speed',
-  'data': { 'device': 'str', 'speed': 'int' } }
-
-##
-# @block-job-cancel:
-#
-# Stop an active background block operation.
-#
-# This command returns immediately after marking the active background block
-# operation for cancellation.  It is an error to call this command if no
-# operation is in progress.
-#
-# The operation will cancel as soon as possible and then emit the
-# BLOCK_JOB_CANCELLED event.  Before that happens the job is still visible when
-# enumerated using query-block-jobs.
-#
-# For streaming, the image file retains its backing file unless the streaming
-# operation happens to complete just as it is being cancelled.  A new streaming
-# operation can be started at a later time to finish copying all data from the
-# backing file.
-#
-# @device: the device name
-#
-# @force: #optional whether to allow cancellation of a paused job (default
-#         false).  Since 1.3.
-#
-# Returns: Nothing on success
-#          If no background operation is active on this device, DeviceNotActive
-#
-# Since: 1.1
-##
-{ 'command': 'block-job-cancel', 'data': { 'device': 'str', '*force': 'bool' } }
-
-##
-# @block-job-pause:
-#
-# Pause an active background block operation.
-#
-# This command returns immediately after marking the active background block
-# operation for pausing.  It is an error to call this command if no
-# operation is in progress.  Pausing an already paused job has no cumulative
-# effect; a single block-job-resume command will resume the job.
-#
-# The operation will pause as soon as possible.  No event is emitted when
-# the operation is actually paused.  Cancelling a paused job automatically
-# resumes it.
-#
-# @device: the device name
-#
-# Returns: Nothing on success
-#          If no background operation is active on this device, DeviceNotActive
-#
-# Since: 1.3
-##
-{ 'command': 'block-job-pause', 'data': { 'device': 'str' } }
-
-##
-# @block-job-resume:
-#
-# Resume an active background block operation.
-#
-# This command returns immediately after resuming a paused background block
-# operation.  It is an error to call this command if no operation is in
-# progress.  Resuming an already running job is not an error.
-#
-# This command also clears the error status of the job.
-#
-# @device: the device name
-#
-# Returns: Nothing on success
-#          If no background operation is active on this device, DeviceNotActive
-#
-# Since: 1.3
-##
-{ 'command': 'block-job-resume', 'data': { 'device': 'str' } }
-
-##
-# @block-job-complete:
-#
-# Manually trigger completion of an active background block operation.  This
-# is supported for drive mirroring, where it also switches the device to
-# write to the target path only.  The ability to complete is signaled with
-# a BLOCK_JOB_READY event.
-#
-# This command completes an active background block operation synchronously.
-# The ordering of this command's return with the BLOCK_JOB_COMPLETED event
-# is not defined.  Note that if an I/O error occurs during the processing of
-# this command: 1) the command itself will fail; 2) the error will be processed
-# according to the rerror/werror arguments that were specified when starting
-# the operation.
-#
-# A cancelled or paused job cannot be completed.
-#
-# @device: the device name
-#
-# Returns: Nothing on success
-#          If no background operation is active on this device, DeviceNotActive
-#
-# Since: 1.3
-##
-{ 'command': 'block-job-complete', 'data': { 'device': 'str' } }
-
 ##
 # @ObjectTypeInfo:
 #
@@ -4163,410 +3161,6 @@
 { 'command': 'query-rx-filter', 'data': { '*name': 'str' },
   'returns': ['RxFilterInfo'] }
 
-
-##
-# @BlockdevDiscardOptions
-#
-# Determines how to handle discard requests.
-#
-# @ignore:      Ignore the request
-# @unmap:       Forward as an unmap request
-#
-# Since: 1.7
-##
-{ 'enum': 'BlockdevDiscardOptions',
-  'data': [ 'ignore', 'unmap' ] }
-
-##
-# @BlockdevDetectZeroesOptions
-#
-# Describes the operation mode for the automatic conversion of plain
-# zero writes by the OS to driver specific optimized zero write commands.
-#
-# @off:      Disabled (default)
-# @on:       Enabled
-# @unmap:    Enabled and even try to unmap blocks if possible. This requires
-#            also that @BlockdevDiscardOptions is set to unmap for this device.
-#
-# Since: 2.1
-##
-{ 'enum': 'BlockdevDetectZeroesOptions',
-  'data': [ 'off', 'on', 'unmap' ] }
-
-##
-# @BlockdevAioOptions
-#
-# Selects the AIO backend to handle I/O requests
-#
-# @threads:     Use qemu's thread pool
-# @native:      Use native AIO backend (only Linux and Windows)
-#
-# Since: 1.7
-##
-{ 'enum': 'BlockdevAioOptions',
-  'data': [ 'threads', 'native' ] }
-
-##
-# @BlockdevCacheOptions
-#
-# Includes cache-related options for block devices
-#
-# @writeback:   #optional enables writeback mode for any caches (default: true)
-# @direct:      #optional enables use of O_DIRECT (bypass the host page cache;
-#               default: false)
-# @no-flush:    #optional ignore any flush requests for the device (default:
-#               false)
-#
-# Since: 1.7
-##
-{ 'type': 'BlockdevCacheOptions',
-  'data': { '*writeback': 'bool',
-            '*direct': 'bool',
-            '*no-flush': 'bool' } }
-
-##
-# @BlockdevDriver
-#
-# Drivers that are supported in block device operations.
-#
-# @host_device, @host_cdrom, @host_floppy: Since 2.1
-#
-# Since: 2.0
-##
-{ 'enum': 'BlockdevDriver',
-  'data': [ 'file', 'host_device', 'host_cdrom', 'host_floppy',
-            'http', 'https', 'ftp', 'ftps', 'tftp', 'vvfat', 'blkdebug',
-            'blkverify', 'bochs', 'cloop', 'cow', 'dmg', 'parallels', 'qcow',
-            'qcow2', 'qed', 'raw', 'vdi', 'vhdx', 'vmdk', 'vpc', 'quorum' ] }
-
-##
-# @BlockdevOptionsBase
-#
-# Options that are available for all block devices, independent of the block
-# driver.
-#
-# @driver:        block driver name
-# @id:            #optional id by which the new block device can be referred to.
-#                 This is a required option on the top level of blockdev-add, and
-#                 currently not allowed on any other level.
-# @node-name:     #optional the name of a block driver state node (Since 2.0)
-# @discard:       #optional discard-related options (default: ignore)
-# @cache:         #optional cache-related options
-# @aio:           #optional AIO backend (default: threads)
-# @rerror:        #optional how to handle read errors on the device
-#                 (default: report)
-# @werror:        #optional how to handle write errors on the device
-#                 (default: enospc)
-# @read-only:     #optional whether the block device should be read-only
-#                 (default: false)
-# @detect-zeroes: #optional detect and optimize zero writes (Since 2.1)
-#                 (default: off)
-#
-# Since: 1.7
-##
-{ 'type': 'BlockdevOptionsBase',
-  'data': { 'driver': 'BlockdevDriver',
-            '*id': 'str',
-            '*node-name': 'str',
-            '*discard': 'BlockdevDiscardOptions',
-            '*cache': 'BlockdevCacheOptions',
-            '*aio': 'BlockdevAioOptions',
-            '*rerror': 'BlockdevOnError',
-            '*werror': 'BlockdevOnError',
-            '*read-only': 'bool',
-            '*detect-zeroes': 'BlockdevDetectZeroesOptions' } }
-
-##
-# @BlockdevOptionsFile
-#
-# Driver specific block device options for the file backend and similar
-# protocols.
-#
-# @filename:    path to the image file
-#
-# Since: 1.7
-##
-{ 'type': 'BlockdevOptionsFile',
-  'data': { 'filename': 'str' } }
-
-##
-# @BlockdevOptionsVVFAT
-#
-# Driver specific block device options for the vvfat protocol.
-#
-# @dir:         directory to be exported as FAT image
-# @fat-type:    #optional FAT type: 12, 16 or 32
-# @floppy:      #optional whether to export a floppy image (true) or
-#               partitioned hard disk (false; default)
-# @rw:          #optional whether to allow write operations (default: false)
-#
-# Since: 1.7
-##
-{ 'type': 'BlockdevOptionsVVFAT',
-  'data': { 'dir': 'str', '*fat-type': 'int', '*floppy': 'bool',
-            '*rw': 'bool' } }
-
-##
-# @BlockdevOptionsGenericFormat
-#
-# Driver specific block device options for image format that have no option
-# besides their data source.
-#
-# @file:        reference to or definition of the data source block device
-#
-# Since: 1.7
-##
-{ 'type': 'BlockdevOptionsGenericFormat',
-  'data': { 'file': 'BlockdevRef' } }
-
-##
-# @BlockdevOptionsGenericCOWFormat
-#
-# Driver specific block device options for image format that have no option
-# besides their data source and an optional backing file.
-#
-# @backing:     #optional reference to or definition of the backing file block
-#               device (if missing, taken from the image file content). It is
-#               allowed to pass an empty string here in order to disable the
-#               default backing file.
-#
-# Since: 1.7
-##
-{ 'type': 'BlockdevOptionsGenericCOWFormat',
-  'base': 'BlockdevOptionsGenericFormat',
-  'data': { '*backing': 'BlockdevRef' } }
-
-##
-# @BlockdevOptionsQcow2
-#
-# Driver specific block device options for qcow2.
-#
-# @lazy-refcounts:        #optional whether to enable the lazy refcounts
-#                         feature (default is taken from the image file)
-#
-# @pass-discard-request:  #optional whether discard requests to the qcow2
-#                         device should be forwarded to the data source
-#
-# @pass-discard-snapshot: #optional whether discard requests for the data source
-#                         should be issued when a snapshot operation (e.g.
-#                         deleting a snapshot) frees clusters in the qcow2 file
-#
-# @pass-discard-other:    #optional whether discard requests for the data source
-#                         should be issued on other occasions where a cluster
-#                         gets freed
-#
-# Since: 1.7
-##
-{ 'type': 'BlockdevOptionsQcow2',
-  'base': 'BlockdevOptionsGenericCOWFormat',
-  'data': { '*lazy-refcounts': 'bool',
-            '*pass-discard-request': 'bool',
-            '*pass-discard-snapshot': 'bool',
-            '*pass-discard-other': 'bool' } }
-
-##
-# @BlkdebugEvent
-#
-# Trigger events supported by blkdebug.
-##
-{ 'enum': 'BlkdebugEvent',
-  'data': [ 'l1_update', 'l1_grow.alloc_table', 'l1_grow.write_table',
-            'l1_grow.activate_table', 'l2_load', 'l2_update',
-            'l2_update_compressed', 'l2_alloc.cow_read', 'l2_alloc.write',
-            'read_aio', 'read_backing_aio', 'read_compressed', 'write_aio',
-            'write_compressed', 'vmstate_load', 'vmstate_save', 'cow_read',
-            'cow_write', 'reftable_load', 'reftable_grow', 'reftable_update',
-            'refblock_load', 'refblock_update', 'refblock_update_part',
-            'refblock_alloc', 'refblock_alloc.hookup', 'refblock_alloc.write',
-            'refblock_alloc.write_blocks', 'refblock_alloc.write_table',
-            'refblock_alloc.switch_table', 'cluster_alloc',
-            'cluster_alloc_bytes', 'cluster_free', 'flush_to_os',
-            'flush_to_disk' ] }
-
-##
-# @BlkdebugInjectErrorOptions
-#
-# Describes a single error injection for blkdebug.
-#
-# @event:       trigger event
-#
-# @state:       #optional the state identifier blkdebug needs to be in to
-#               actually trigger the event; defaults to "any"
-#
-# @errno:       #optional error identifier (errno) to be returned; defaults to
-#               EIO
-#
-# @sector:      #optional specifies the sector index which has to be affected
-#               in order to actually trigger the event; defaults to "any
-#               sector"
-#
-# @once:        #optional disables further events after this one has been
-#               triggered; defaults to false
-#
-# @immediately: #optional fail immediately; defaults to false
-#
-# Since: 2.0
-##
-{ 'type': 'BlkdebugInjectErrorOptions',
-  'data': { 'event': 'BlkdebugEvent',
-            '*state': 'int',
-            '*errno': 'int',
-            '*sector': 'int',
-            '*once': 'bool',
-            '*immediately': 'bool' } }
-
-##
-# @BlkdebugSetStateOptions
-#
-# Describes a single state-change event for blkdebug.
-#
-# @event:       trigger event
-#
-# @state:       #optional the current state identifier blkdebug needs to be in;
-#               defaults to "any"
-#
-# @new_state:   the state identifier blkdebug is supposed to assume if
-#               this event is triggered
-#
-# Since: 2.0
-##
-{ 'type': 'BlkdebugSetStateOptions',
-  'data': { 'event': 'BlkdebugEvent',
-            '*state': 'int',
-            'new_state': 'int' } }
-
-##
-# @BlockdevOptionsBlkdebug
-#
-# Driver specific block device options for blkdebug.
-#
-# @image:           underlying raw block device (or image file)
-#
-# @config:          #optional filename of the configuration file
-#
-# @align:           #optional required alignment for requests in bytes
-#
-# @inject-error:    #optional array of error injection descriptions
-#
-# @set-state:       #optional array of state-change descriptions
-#
-# Since: 2.0
-##
-{ 'type': 'BlockdevOptionsBlkdebug',
-  'data': { 'image': 'BlockdevRef',
-            '*config': 'str',
-            '*align': 'int',
-            '*inject-error': ['BlkdebugInjectErrorOptions'],
-            '*set-state': ['BlkdebugSetStateOptions'] } }
-
-##
-# @BlockdevOptionsBlkverify
-#
-# Driver specific block device options for blkverify.
-#
-# @test:    block device to be tested
-#
-# @raw:     raw image used for verification
-#
-# Since: 2.0
-##
-{ 'type': 'BlockdevOptionsBlkverify',
-  'data': { 'test': 'BlockdevRef',
-            'raw': 'BlockdevRef' } }
-
-##
-# @BlockdevOptionsQuorum
-#
-# Driver specific block device options for Quorum
-#
-# @blkverify:      #optional true if the driver must print content mismatch
-#                  set to false by default
-#
-# @children:       the children block devices to use
-#
-# @vote-threshold: the vote limit under which a read will fail
-#
-# Since: 2.0
-##
-{ 'type': 'BlockdevOptionsQuorum',
-  'data': { '*blkverify': 'bool',
-            'children': [ 'BlockdevRef' ],
-            'vote-threshold': 'int' } }
-
-##
-# @BlockdevOptions
-#
-# Options for creating a block device.
-#
-# Since: 1.7
-##
-{ 'union': 'BlockdevOptions',
-  'base': 'BlockdevOptionsBase',
-  'discriminator': 'driver',
-  'data': {
-      'file':       'BlockdevOptionsFile',
-      'host_device':'BlockdevOptionsFile',
-      'host_cdrom': 'BlockdevOptionsFile',
-      'host_floppy':'BlockdevOptionsFile',
-      'http':       'BlockdevOptionsFile',
-      'https':      'BlockdevOptionsFile',
-      'ftp':        'BlockdevOptionsFile',
-      'ftps':       'BlockdevOptionsFile',
-      'tftp':       'BlockdevOptionsFile',
-# TODO gluster: Wait for structured options
-# TODO iscsi: Wait for structured options
-# TODO nbd: Should take InetSocketAddress for 'host'?
-# TODO nfs: Wait for structured options
-# TODO rbd: Wait for structured options
-# TODO sheepdog: Wait for structured options
-# TODO ssh: Should take InetSocketAddress for 'host'?
-      'vvfat':      'BlockdevOptionsVVFAT',
-      'blkdebug':   'BlockdevOptionsBlkdebug',
-      'blkverify':  'BlockdevOptionsBlkverify',
-      'bochs':      'BlockdevOptionsGenericFormat',
-      'cloop':      'BlockdevOptionsGenericFormat',
-      'cow':        'BlockdevOptionsGenericCOWFormat',
-      'dmg':        'BlockdevOptionsGenericFormat',
-      'parallels':  'BlockdevOptionsGenericFormat',
-      'qcow':       'BlockdevOptionsGenericCOWFormat',
-      'qcow2':      'BlockdevOptionsQcow2',
-      'qed':        'BlockdevOptionsGenericCOWFormat',
-      'raw':        'BlockdevOptionsGenericFormat',
-      'vdi':        'BlockdevOptionsGenericFormat',
-      'vhdx':       'BlockdevOptionsGenericFormat',
-      'vmdk':       'BlockdevOptionsGenericCOWFormat',
-      'vpc':        'BlockdevOptionsGenericFormat',
-      'quorum':     'BlockdevOptionsQuorum'
-  } }
-
-##
-# @BlockdevRef
-#
-# Reference to a block device.
-#
-# @definition:      defines a new block device inline
-# @reference:       references the ID of an existing block device. An
-#                   empty string means that no block device should be
-#                   referenced.
-#
-# Since: 1.7
-##
-{ 'union': 'BlockdevRef',
-  'discriminator': {},
-  'data': { 'definition': 'BlockdevOptions',
-            'reference': 'str' } }
-
-##
-# @blockdev-add:
-#
-# Creates a new block device.
-#
-# @options: block device options for the new device
-#
-# Since: 1.7
-##
-{ 'command': 'blockdev-add', 'data': { 'options': 'BlockdevOptions' } }
-
 ##
 # @InputButton
 #
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 11bd8c2b3d..7215e48130 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -4,3 +4,1409 @@
 
 # QAPI common definitions
 { 'include': 'common.json' }
+
+##
+# @SnapshotInfo
+#
+# @id: unique snapshot id
+#
+# @name: user chosen name
+#
+# @vm-state-size: size of the VM state
+#
+# @date-sec: UTC date of the snapshot in seconds
+#
+# @date-nsec: fractional part in nano seconds to be used with date-sec
+#
+# @vm-clock-sec: VM clock relative to boot in seconds
+#
+# @vm-clock-nsec: fractional part in nano seconds to be used with vm-clock-sec
+#
+# Since: 1.3
+#
+##
+
+{ 'type': 'SnapshotInfo',
+  'data': { 'id': 'str', 'name': 'str', 'vm-state-size': 'int',
+            'date-sec': 'int', 'date-nsec': 'int',
+            'vm-clock-sec': 'int', 'vm-clock-nsec': 'int' } }
+
+##
+# @ImageInfoSpecificQCow2:
+#
+# @compat: compatibility level
+#
+# @lazy-refcounts: #optional on or off; only valid for compat >= 1.1
+#
+# Since: 1.7
+##
+{ 'type': 'ImageInfoSpecificQCow2',
+  'data': {
+      'compat': 'str',
+      '*lazy-refcounts': 'bool'
+  } }
+
+##
+# @ImageInfoSpecificVmdk:
+#
+# @create-type: The create type of VMDK image
+#
+# @cid: Content id of image
+#
+# @parent-cid: Parent VMDK image's cid
+#
+# @extents: List of extent files
+#
+# Since: 1.7
+##
+{ 'type': 'ImageInfoSpecificVmdk',
+  'data': {
+      'create-type': 'str',
+      'cid': 'int',
+      'parent-cid': 'int',
+      'extents': ['ImageInfo']
+  } }
+
+##
+# @ImageInfoSpecific:
+#
+# A discriminated record of image format specific information structures.
+#
+# Since: 1.7
+##
+
+{ 'union': 'ImageInfoSpecific',
+  'data': {
+      'qcow2': 'ImageInfoSpecificQCow2',
+      'vmdk': 'ImageInfoSpecificVmdk'
+  } }
+
+##
+# @ImageInfo:
+#
+# Information about a QEMU image file
+#
+# @filename: name of the image file
+#
+# @format: format of the image file
+#
+# @virtual-size: maximum capacity in bytes of the image
+#
+# @actual-size: #optional actual size on disk in bytes of the image
+#
+# @dirty-flag: #optional true if image is not cleanly closed
+#
+# @cluster-size: #optional size of a cluster in bytes
+#
+# @encrypted: #optional true if the image is encrypted
+#
+# @compressed: #optional true if the image is compressed (Since 1.7)
+#
+# @backing-filename: #optional name of the backing file
+#
+# @full-backing-filename: #optional full path of the backing file
+#
+# @backing-filename-format: #optional the format of the backing file
+#
+# @snapshots: #optional list of VM snapshots
+#
+# @backing-image: #optional info of the backing image (since 1.6)
+#
+# @format-specific: #optional structure supplying additional format-specific
+# information (since 1.7)
+#
+# Since: 1.3
+#
+##
+
+{ 'type': 'ImageInfo',
+  'data': {'filename': 'str', 'format': 'str', '*dirty-flag': 'bool',
+           '*actual-size': 'int', 'virtual-size': 'int',
+           '*cluster-size': 'int', '*encrypted': 'bool', '*compressed': 'bool',
+           '*backing-filename': 'str', '*full-backing-filename': 'str',
+           '*backing-filename-format': 'str', '*snapshots': ['SnapshotInfo'],
+           '*backing-image': 'ImageInfo',
+           '*format-specific': 'ImageInfoSpecific' } }
+
+##
+# @ImageCheck:
+#
+# Information about a QEMU image file check
+#
+# @filename: name of the image file checked
+#
+# @format: format of the image file checked
+#
+# @check-errors: number of unexpected errors occurred during check
+#
+# @image-end-offset: #optional offset (in bytes) where the image ends, this
+#                    field is present if the driver for the image format
+#                    supports it
+#
+# @corruptions: #optional number of corruptions found during the check if any
+#
+# @leaks: #optional number of leaks found during the check if any
+#
+# @corruptions-fixed: #optional number of corruptions fixed during the check
+#                     if any
+#
+# @leaks-fixed: #optional number of leaks fixed during the check if any
+#
+# @total-clusters: #optional total number of clusters, this field is present
+#                  if the driver for the image format supports it
+#
+# @allocated-clusters: #optional total number of allocated clusters, this
+#                      field is present if the driver for the image format
+#                      supports it
+#
+# @fragmented-clusters: #optional total number of fragmented clusters, this
+#                       field is present if the driver for the image format
+#                       supports it
+#
+# @compressed-clusters: #optional total number of compressed clusters, this
+#                       field is present if the driver for the image format
+#                       supports it
+#
+# Since: 1.4
+#
+##
+
+{ 'type': 'ImageCheck',
+  'data': {'filename': 'str', 'format': 'str', 'check-errors': 'int',
+           '*image-end-offset': 'int', '*corruptions': 'int', '*leaks': 'int',
+           '*corruptions-fixed': 'int', '*leaks-fixed': 'int',
+           '*total-clusters': 'int', '*allocated-clusters': 'int',
+           '*fragmented-clusters': 'int', '*compressed-clusters': 'int' } }
+
+##
+# @BlockDeviceInfo:
+#
+# Information about the backing device for a block device.
+#
+# @file: the filename of the backing device
+#
+# @node-name: #optional the name of the block driver node (Since 2.0)
+#
+# @ro: true if the backing device was open read-only
+#
+# @drv: the name of the block format used to open the backing device. As of
+#       0.14.0 this can be: 'blkdebug', 'bochs', 'cloop', 'cow', 'dmg',
+#       'file', 'file', 'ftp', 'ftps', 'host_cdrom', 'host_device',
+#       'host_floppy', 'http', 'https', 'nbd', 'parallels', 'qcow',
+#       'qcow2', 'raw', 'tftp', 'vdi', 'vmdk', 'vpc', 'vvfat'
+#
+# @backing_file: #optional the name of the backing file (for copy-on-write)
+#
+# @backing_file_depth: number of files in the backing file chain (since: 1.2)
+#
+# @encrypted: true if the backing device is encrypted
+#
+# @encryption_key_missing: true if the backing device is encrypted but an
+#                          valid encryption key is missing
+#
+# @detect_zeroes: detect and optimize zero writes (Since 2.1)
+#
+# @bps: total throughput limit in bytes per second is specified
+#
+# @bps_rd: read throughput limit in bytes per second is specified
+#
+# @bps_wr: write throughput limit in bytes per second is specified
+#
+# @iops: total I/O operations per second is specified
+#
+# @iops_rd: read I/O operations per second is specified
+#
+# @iops_wr: write I/O operations per second is specified
+#
+# @image: the info of image used (since: 1.6)
+#
+# @bps_max: #optional total max in bytes (Since 1.7)
+#
+# @bps_rd_max: #optional read max in bytes (Since 1.7)
+#
+# @bps_wr_max: #optional write max in bytes (Since 1.7)
+#
+# @iops_max: #optional total I/O operations max (Since 1.7)
+#
+# @iops_rd_max: #optional read I/O operations max (Since 1.7)
+#
+# @iops_wr_max: #optional write I/O operations max (Since 1.7)
+#
+# @iops_size: #optional an I/O size in bytes (Since 1.7)
+#
+# Since: 0.14.0
+#
+##
+{ 'type': 'BlockDeviceInfo',
+  'data': { 'file': 'str', '*node-name': 'str', 'ro': 'bool', 'drv': 'str',
+            '*backing_file': 'str', 'backing_file_depth': 'int',
+            'encrypted': 'bool', 'encryption_key_missing': 'bool',
+            'detect_zeroes': 'BlockdevDetectZeroesOptions',
+            'bps': 'int', 'bps_rd': 'int', 'bps_wr': 'int',
+            'iops': 'int', 'iops_rd': 'int', 'iops_wr': 'int',
+            'image': 'ImageInfo',
+            '*bps_max': 'int', '*bps_rd_max': 'int',
+            '*bps_wr_max': 'int', '*iops_max': 'int',
+            '*iops_rd_max': 'int', '*iops_wr_max': 'int',
+            '*iops_size': 'int' } }
+
+##
+# @BlockDeviceIoStatus:
+#
+# An enumeration of block device I/O status.
+#
+# @ok: The last I/O operation has succeeded
+#
+# @failed: The last I/O operation has failed
+#
+# @nospace: The last I/O operation has failed due to a no-space condition
+#
+# Since: 1.0
+##
+{ 'enum': 'BlockDeviceIoStatus', 'data': [ 'ok', 'failed', 'nospace' ] }
+
+##
+# @BlockDeviceMapEntry:
+#
+# Entry in the metadata map of the device (returned by "qemu-img map")
+#
+# @start: Offset in the image of the first byte described by this entry
+#         (in bytes)
+#
+# @length: Length of the range described by this entry (in bytes)
+#
+# @depth: Number of layers (0 = top image, 1 = top image's backing file, etc.)
+#         before reaching one for which the range is allocated.  The value is
+#         in the range 0 to the depth of the image chain - 1.
+#
+# @zero: the sectors in this range read as zeros
+#
+# @data: reading the image will actually read data from a file (in particular,
+#        if @offset is present this means that the sectors are not simply
+#        preallocated, but contain actual data in raw format)
+#
+# @offset: if present, the image file stores the data for this range in
+#          raw format at the given offset.
+#
+# Since 1.7
+##
+{ 'type': 'BlockDeviceMapEntry',
+  'data': { 'start': 'int', 'length': 'int', 'depth': 'int', 'zero': 'bool',
+            'data': 'bool', '*offset': 'int' } }
+
+##
+# @BlockDirtyInfo:
+#
+# Block dirty bitmap information.
+#
+# @count: number of dirty bytes according to the dirty bitmap
+#
+# @granularity: granularity of the dirty bitmap in bytes (since 1.4)
+#
+# Since: 1.3
+##
+{ 'type': 'BlockDirtyInfo',
+  'data': {'count': 'int', 'granularity': 'int'} }
+
+##
+# @BlockInfo:
+#
+# Block device information.  This structure describes a virtual device and
+# the backing device associated with it.
+#
+# @device: The device name associated with the virtual device.
+#
+# @type: This field is returned only for compatibility reasons, it should
+#        not be used (always returns 'unknown')
+#
+# @removable: True if the device supports removable media.
+#
+# @locked: True if the guest has locked this device from having its media
+#          removed
+#
+# @tray_open: #optional True if the device has a tray and it is open
+#             (only present if removable is true)
+#
+# @dirty-bitmaps: #optional dirty bitmaps information (only present if the
+#                 driver has one or more dirty bitmaps) (Since 2.0)
+#
+# @io-status: #optional @BlockDeviceIoStatus. Only present if the device
+#             supports it and the VM is configured to stop on errors
+#
+# @inserted: #optional @BlockDeviceInfo describing the device if media is
+#            present
+#
+# Since:  0.14.0
+##
+{ 'type': 'BlockInfo',
+  'data': {'device': 'str', 'type': 'str', 'removable': 'bool',
+           'locked': 'bool', '*inserted': 'BlockDeviceInfo',
+           '*tray_open': 'bool', '*io-status': 'BlockDeviceIoStatus',
+           '*dirty-bitmaps': ['BlockDirtyInfo'] } }
+
+##
+# @query-block:
+#
+# Get a list of BlockInfo for all virtual block devices.
+#
+# Returns: a list of @BlockInfo describing each virtual block device
+#
+# Since: 0.14.0
+##
+{ 'command': 'query-block', 'returns': ['BlockInfo'] }
+
+##
+# @BlockDeviceStats:
+#
+# Statistics of a virtual block device or a block backing device.
+#
+# @rd_bytes:      The number of bytes read by the device.
+#
+# @wr_bytes:      The number of bytes written by the device.
+#
+# @rd_operations: The number of read operations performed by the device.
+#
+# @wr_operations: The number of write operations performed by the device.
+#
+# @flush_operations: The number of cache flush operations performed by the
+#                    device (since 0.15.0)
+#
+# @flush_total_time_ns: Total time spend on cache flushes in nano-seconds
+#                       (since 0.15.0).
+#
+# @wr_total_time_ns: Total time spend on writes in nano-seconds (since 0.15.0).
+#
+# @rd_total_time_ns: Total_time_spend on reads in nano-seconds (since 0.15.0).
+#
+# @wr_highest_offset: The offset after the greatest byte written to the
+#                     device.  The intended use of this information is for
+#                     growable sparse files (like qcow2) that are used on top
+#                     of a physical device.
+#
+# Since: 0.14.0
+##
+{ 'type': 'BlockDeviceStats',
+  'data': {'rd_bytes': 'int', 'wr_bytes': 'int', 'rd_operations': 'int',
+           'wr_operations': 'int', 'flush_operations': 'int',
+           'flush_total_time_ns': 'int', 'wr_total_time_ns': 'int',
+           'rd_total_time_ns': 'int', 'wr_highest_offset': 'int' } }
+
+##
+# @BlockStats:
+#
+# Statistics of a virtual block device or a block backing device.
+#
+# @device: #optional If the stats are for a virtual block device, the name
+#          corresponding to the virtual block device.
+#
+# @stats:  A @BlockDeviceStats for the device.
+#
+# @parent: #optional This describes the file block device if it has one.
+#
+# @backing: #optional This describes the backing block device if it has one.
+#           (Since 2.0)
+#
+# Since: 0.14.0
+##
+{ 'type': 'BlockStats',
+  'data': {'*device': 'str', 'stats': 'BlockDeviceStats',
+           '*parent': 'BlockStats',
+           '*backing': 'BlockStats'} }
+
+##
+# @query-blockstats:
+#
+# Query the @BlockStats for all virtual block devices.
+#
+# Returns: A list of @BlockStats for each virtual block devices.
+#
+# Since: 0.14.0
+##
+{ 'command': 'query-blockstats', 'returns': ['BlockStats'] }
+
+##
+# @BlockdevOnError:
+#
+# An enumeration of possible behaviors for errors on I/O operations.
+# The exact meaning depends on whether the I/O was initiated by a guest
+# or by a block job
+#
+# @report: for guest operations, report the error to the guest;
+#          for jobs, cancel the job
+#
+# @ignore: ignore the error, only report a QMP event (BLOCK_IO_ERROR
+#          or BLOCK_JOB_ERROR)
+#
+# @enospc: same as @stop on ENOSPC, same as @report otherwise.
+#
+# @stop: for guest operations, stop the virtual machine;
+#        for jobs, pause the job
+#
+# Since: 1.3
+##
+{ 'enum': 'BlockdevOnError',
+  'data': ['report', 'ignore', 'enospc', 'stop'] }
+
+##
+# @MirrorSyncMode:
+#
+# An enumeration of possible behaviors for the initial synchronization
+# phase of storage mirroring.
+#
+# @top: copies data in the topmost image to the destination
+#
+# @full: copies data from all images to the destination
+#
+# @none: only copy data written from now on
+#
+# Since: 1.3
+##
+{ 'enum': 'MirrorSyncMode',
+  'data': ['top', 'full', 'none'] }
+
+##
+# @BlockJobType:
+#
+# Type of a block job.
+#
+# @commit: block commit job type, see "block-commit"
+#
+# @stream: block stream job type, see "block-stream"
+#
+# @mirror: drive mirror job type, see "drive-mirror"
+#
+# @backup: drive backup job type, see "drive-backup"
+#
+# Since: 1.7
+##
+{ 'enum': 'BlockJobType',
+  'data': ['commit', 'stream', 'mirror', 'backup'] }
+
+##
+# @BlockJobInfo:
+#
+# Information about a long-running block device operation.
+#
+# @type: the job type ('stream' for image streaming)
+#
+# @device: the block device name
+#
+# @len: the maximum progress value
+#
+# @busy: false if the job is known to be in a quiescent state, with
+#        no pending I/O.  Since 1.3.
+#
+# @paused: whether the job is paused or, if @busy is true, will
+#          pause itself as soon as possible.  Since 1.3.
+#
+# @offset: the current progress value
+#
+# @speed: the rate limit, bytes per second
+#
+# @io-status: the status of the job (since 1.3)
+#
+# Since: 1.1
+##
+{ 'type': 'BlockJobInfo',
+  'data': {'type': 'str', 'device': 'str', 'len': 'int',
+           'offset': 'int', 'busy': 'bool', 'paused': 'bool', 'speed': 'int',
+           'io-status': 'BlockDeviceIoStatus'} }
+
+##
+# @query-block-jobs:
+#
+# Return information about long-running block device operations.
+#
+# Returns: a list of @BlockJobInfo for each active block job
+#
+# Since: 1.1
+##
+{ 'command': 'query-block-jobs', 'returns': ['BlockJobInfo'] }
+
+##
+# @block_passwd:
+#
+# This command sets the password of a block device that has not been open
+# with a password and requires one.
+#
+# The two cases where this can happen are a block device is created through
+# QEMU's initial command line or a block device is changed through the legacy
+# @change interface.
+#
+# In the event that the block device is created through the initial command
+# line, the VM will start in the stopped state regardless of whether '-S' is
+# used.  The intention is for a management tool to query the block devices to
+# determine which ones are encrypted, set the passwords with this command, and
+# then start the guest with the @cont command.
+#
+# Either @device or @node-name must be set but not both.
+#
+# @device: #optional the name of the block backend device to set the password on
+#
+# @node-name: #optional graph node name to set the password on (Since 2.0)
+#
+# @password: the password to use for the device
+#
+# Returns: nothing on success
+#          If @device is not a valid block device, DeviceNotFound
+#          If @device is not encrypted, DeviceNotEncrypted
+#
+# Notes:  Not all block formats support encryption and some that do are not
+#         able to validate that a password is correct.  Disk corruption may
+#         occur if an invalid password is specified.
+#
+# Since: 0.14.0
+##
+{ 'command': 'block_passwd', 'data': {'*device': 'str',
+                                      '*node-name': 'str', 'password': 'str'} }
+
+##
+# @block_resize
+#
+# Resize a block image while a guest is running.
+#
+# Either @device or @node-name must be set but not both.
+#
+# @device: #optional the name of the device to get the image resized
+#
+# @node-name: #optional graph node name to get the image resized (Since 2.0)
+#
+# @size:  new image size in bytes
+#
+# Returns: nothing on success
+#          If @device is not a valid block device, DeviceNotFound
+#
+# Since: 0.14.0
+##
+{ 'command': 'block_resize', 'data': { '*device': 'str',
+                                       '*node-name': 'str',
+                                       'size': 'int' }}
+
+##
+# @NewImageMode
+#
+# An enumeration that tells QEMU how to set the backing file path in
+# a new image file.
+#
+# @existing: QEMU should look for an existing image file.
+#
+# @absolute-paths: QEMU should create a new image with absolute paths
+# for the backing file. If there is no backing file available, the new
+# image will not be backed either.
+#
+# Since: 1.1
+##
+{ 'enum': 'NewImageMode',
+  'data': [ 'existing', 'absolute-paths' ] }
+
+##
+# @BlockdevSnapshot
+#
+# Either @device or @node-name must be set but not both.
+#
+# @device: #optional the name of the device to generate the snapshot from.
+#
+# @node-name: #optional graph node name to generate the snapshot from (Since 2.0)
+#
+# @snapshot-file: the target of the new image. A new file will be created.
+#
+# @snapshot-node-name: #optional the graph node name of the new image (Since 2.0)
+#
+# @format: #optional the format of the snapshot image, default is 'qcow2'.
+#
+# @mode: #optional whether and how QEMU should create a new image, default is
+#        'absolute-paths'.
+##
+{ 'type': 'BlockdevSnapshot',
+  'data': { '*device': 'str', '*node-name': 'str',
+            'snapshot-file': 'str', '*snapshot-node-name': 'str',
+            '*format': 'str', '*mode': 'NewImageMode' } }
+
+##
+# @DriveBackup
+#
+# @device: the name of the device which should be copied.
+#
+# @target: the target of the new image. If the file exists, or if it
+#          is a device, the existing file/device will be used as the new
+#          destination.  If it does not exist, a new file will be created.
+#
+# @format: #optional the format of the new destination, default is to
+#          probe if @mode is 'existing', else the format of the source
+#
+# @sync: what parts of the disk image should be copied to the destination
+#        (all the disk, only the sectors allocated in the topmost image, or
+#        only new I/O).
+#
+# @mode: #optional whether and how QEMU should create a new image, default is
+#        'absolute-paths'.
+#
+# @speed: #optional the maximum speed, in bytes per second
+#
+# @on-source-error: #optional the action to take on an error on the source,
+#                   default 'report'.  'stop' and 'enospc' can only be used
+#                   if the block device supports io-status (see BlockInfo).
+#
+# @on-target-error: #optional the action to take on an error on the target,
+#                   default 'report' (no limitations, since this applies to
+#                   a different block device than @device).
+#
+# Note that @on-source-error and @on-target-error only affect background I/O.
+# If an error occurs during a guest write request, the device's rerror/werror
+# actions will be used.
+#
+# Since: 1.6
+##
+{ 'type': 'DriveBackup',
+  'data': { 'device': 'str', 'target': 'str', '*format': 'str',
+            'sync': 'MirrorSyncMode', '*mode': 'NewImageMode',
+            '*speed': 'int',
+            '*on-source-error': 'BlockdevOnError',
+            '*on-target-error': 'BlockdevOnError' } }
+
+##
+# @blockdev-snapshot-sync
+#
+# Generates a synchronous snapshot of a block device.
+#
+# For the arguments, see the documentation of BlockdevSnapshot.
+#
+# Returns: nothing on success
+#          If @device is not a valid block device, DeviceNotFound
+#
+# Since 0.14.0
+##
+{ 'command': 'blockdev-snapshot-sync',
+  'data': 'BlockdevSnapshot' }
+
+##
+# @block-commit
+#
+# Live commit of data from overlay image nodes into backing nodes - i.e.,
+# writes data between 'top' and 'base' into 'base'.
+#
+# @device:  the name of the device
+#
+# @base:   #optional The file name of the backing image to write data into.
+#                    If not specified, this is the deepest backing image
+#
+# @top:              The file name of the backing image within the image chain,
+#                    which contains the topmost data to be committed down.
+#
+#                    If top == base, that is an error.
+#                    If top == active, the job will not be completed by itself,
+#                    user needs to complete the job with the block-job-complete
+#                    command after getting the ready event. (Since 2.0)
+#
+#                    If the base image is smaller than top, then the base image
+#                    will be resized to be the same size as top.  If top is
+#                    smaller than the base image, the base will not be
+#                    truncated.  If you want the base image size to match the
+#                    size of the smaller top, you can safely truncate it
+#                    yourself once the commit operation successfully completes.
+#
+#
+# @speed:  #optional the maximum speed, in bytes per second
+#
+# Returns: Nothing on success
+#          If commit or stream is already active on this device, DeviceInUse
+#          If @device does not exist, DeviceNotFound
+#          If image commit is not supported by this device, NotSupported
+#          If @base or @top is invalid, a generic error is returned
+#          If @speed is invalid, InvalidParameter
+#
+# Since: 1.3
+#
+##
+{ 'command': 'block-commit',
+  'data': { 'device': 'str', '*base': 'str', 'top': 'str',
+            '*speed': 'int' } }
+
+##
+# @drive-backup
+#
+# Start a point-in-time copy of a block device to a new destination.  The
+# status of ongoing drive-backup operations can be checked with
+# query-block-jobs where the BlockJobInfo.type field has the value 'backup'.
+# The operation can be stopped before it has completed using the
+# block-job-cancel command.
+#
+# For the arguments, see the documentation of DriveBackup.
+#
+# Returns: nothing on success
+#          If @device is not a valid block device, DeviceNotFound
+#
+# Since 1.6
+##
+{ 'command': 'drive-backup', 'data': 'DriveBackup' }
+
+##
+# @query-named-block-nodes
+#
+# Get the named block driver list
+#
+# Returns: the list of BlockDeviceInfo
+#
+# Since 2.0
+##
+{ 'command': 'query-named-block-nodes', 'returns': [ 'BlockDeviceInfo' ] }
+
+##
+# @drive-mirror
+#
+# Start mirroring a block device's writes to a new destination.
+#
+# @device:  the name of the device whose writes should be mirrored.
+#
+# @target: the target of the new image. If the file exists, or if it
+#          is a device, the existing file/device will be used as the new
+#          destination.  If it does not exist, a new file will be created.
+#
+# @format: #optional the format of the new destination, default is to
+#          probe if @mode is 'existing', else the format of the source
+#
+# @mode: #optional whether and how QEMU should create a new image, default is
+#        'absolute-paths'.
+#
+# @speed:  #optional the maximum speed, in bytes per second
+#
+# @sync: what parts of the disk image should be copied to the destination
+#        (all the disk, only the sectors allocated in the topmost image, or
+#        only new I/O).
+#
+# @granularity: #optional granularity of the dirty bitmap, default is 64K
+#               if the image format doesn't have clusters, 4K if the clusters
+#               are smaller than that, else the cluster size.  Must be a
+#               power of 2 between 512 and 64M (since 1.4).
+#
+# @buf-size: #optional maximum amount of data in flight from source to
+#            target (since 1.4).
+#
+# @on-source-error: #optional the action to take on an error on the source,
+#                   default 'report'.  'stop' and 'enospc' can only be used
+#                   if the block device supports io-status (see BlockInfo).
+#
+# @on-target-error: #optional the action to take on an error on the target,
+#                   default 'report' (no limitations, since this applies to
+#                   a different block device than @device).
+#
+# Returns: nothing on success
+#          If @device is not a valid block device, DeviceNotFound
+#
+# Since 1.3
+##
+{ 'command': 'drive-mirror',
+  'data': { 'device': 'str', 'target': 'str', '*format': 'str',
+            'sync': 'MirrorSyncMode', '*mode': 'NewImageMode',
+            '*speed': 'int', '*granularity': 'uint32',
+            '*buf-size': 'int', '*on-source-error': 'BlockdevOnError',
+            '*on-target-error': 'BlockdevOnError' } }
+
+##
+# @block_set_io_throttle:
+#
+# Change I/O throttle limits for a block drive.
+#
+# @device: The name of the device
+#
+# @bps: total throughput limit in bytes per second
+#
+# @bps_rd: read throughput limit in bytes per second
+#
+# @bps_wr: write throughput limit in bytes per second
+#
+# @iops: total I/O operations per second
+#
+# @ops_rd: read I/O operations per second
+#
+# @iops_wr: write I/O operations per second
+#
+# @bps_max: #optional total max in bytes (Since 1.7)
+#
+# @bps_rd_max: #optional read max in bytes (Since 1.7)
+#
+# @bps_wr_max: #optional write max in bytes (Since 1.7)
+#
+# @iops_max: #optional total I/O operations max (Since 1.7)
+#
+# @iops_rd_max: #optional read I/O operations max (Since 1.7)
+#
+# @iops_wr_max: #optional write I/O operations max (Since 1.7)
+#
+# @iops_size: #optional an I/O size in bytes (Since 1.7)
+#
+# Returns: Nothing on success
+#          If @device is not a valid block device, DeviceNotFound
+#
+# Since: 1.1
+##
+{ 'command': 'block_set_io_throttle',
+  'data': { 'device': 'str', 'bps': 'int', 'bps_rd': 'int', 'bps_wr': 'int',
+            'iops': 'int', 'iops_rd': 'int', 'iops_wr': 'int',
+            '*bps_max': 'int', '*bps_rd_max': 'int',
+            '*bps_wr_max': 'int', '*iops_max': 'int',
+            '*iops_rd_max': 'int', '*iops_wr_max': 'int',
+            '*iops_size': 'int' } }
+
+##
+# @block-stream:
+#
+# Copy data from a backing file into a block device.
+#
+# The block streaming operation is performed in the background until the entire
+# backing file has been copied.  This command returns immediately once streaming
+# has started.  The status of ongoing block streaming operations can be checked
+# with query-block-jobs.  The operation can be stopped before it has completed
+# using the block-job-cancel command.
+#
+# If a base file is specified then sectors are not copied from that base file and
+# its backing chain.  When streaming completes the image file will have the base
+# file as its backing file.  This can be used to stream a subset of the backing
+# file chain instead of flattening the entire image.
+#
+# On successful completion the image file is updated to drop the backing file
+# and the BLOCK_JOB_COMPLETED event is emitted.
+#
+# @device: the device name
+#
+# @base:   #optional the common backing file name
+#
+# @speed:  #optional the maximum speed, in bytes per second
+#
+# @on-error: #optional the action to take on an error (default report).
+#            'stop' and 'enospc' can only be used if the block device
+#            supports io-status (see BlockInfo).  Since 1.3.
+#
+# Returns: Nothing on success
+#          If @device does not exist, DeviceNotFound
+#
+# Since: 1.1
+##
+{ 'command': 'block-stream',
+  'data': { 'device': 'str', '*base': 'str', '*speed': 'int',
+            '*on-error': 'BlockdevOnError' } }
+
+##
+# @block-job-set-speed:
+#
+# Set maximum speed for a background block operation.
+#
+# This command can only be issued when there is an active block job.
+#
+# Throttling can be disabled by setting the speed to 0.
+#
+# @device: the device name
+#
+# @speed:  the maximum speed, in bytes per second, or 0 for unlimited.
+#          Defaults to 0.
+#
+# Returns: Nothing on success
+#          If no background operation is active on this device, DeviceNotActive
+#
+# Since: 1.1
+##
+{ 'command': 'block-job-set-speed',
+  'data': { 'device': 'str', 'speed': 'int' } }
+
+##
+# @block-job-cancel:
+#
+# Stop an active background block operation.
+#
+# This command returns immediately after marking the active background block
+# operation for cancellation.  It is an error to call this command if no
+# operation is in progress.
+#
+# The operation will cancel as soon as possible and then emit the
+# BLOCK_JOB_CANCELLED event.  Before that happens the job is still visible when
+# enumerated using query-block-jobs.
+#
+# For streaming, the image file retains its backing file unless the streaming
+# operation happens to complete just as it is being cancelled.  A new streaming
+# operation can be started at a later time to finish copying all data from the
+# backing file.
+#
+# @device: the device name
+#
+# @force: #optional whether to allow cancellation of a paused job (default
+#         false).  Since 1.3.
+#
+# Returns: Nothing on success
+#          If no background operation is active on this device, DeviceNotActive
+#
+# Since: 1.1
+##
+{ 'command': 'block-job-cancel', 'data': { 'device': 'str', '*force': 'bool' } }
+
+##
+# @block-job-pause:
+#
+# Pause an active background block operation.
+#
+# This command returns immediately after marking the active background block
+# operation for pausing.  It is an error to call this command if no
+# operation is in progress.  Pausing an already paused job has no cumulative
+# effect; a single block-job-resume command will resume the job.
+#
+# The operation will pause as soon as possible.  No event is emitted when
+# the operation is actually paused.  Cancelling a paused job automatically
+# resumes it.
+#
+# @device: the device name
+#
+# Returns: Nothing on success
+#          If no background operation is active on this device, DeviceNotActive
+#
+# Since: 1.3
+##
+{ 'command': 'block-job-pause', 'data': { 'device': 'str' } }
+
+##
+# @block-job-resume:
+#
+# Resume an active background block operation.
+#
+# This command returns immediately after resuming a paused background block
+# operation.  It is an error to call this command if no operation is in
+# progress.  Resuming an already running job is not an error.
+#
+# This command also clears the error status of the job.
+#
+# @device: the device name
+#
+# Returns: Nothing on success
+#          If no background operation is active on this device, DeviceNotActive
+#
+# Since: 1.3
+##
+{ 'command': 'block-job-resume', 'data': { 'device': 'str' } }
+
+##
+# @block-job-complete:
+#
+# Manually trigger completion of an active background block operation.  This
+# is supported for drive mirroring, where it also switches the device to
+# write to the target path only.  The ability to complete is signaled with
+# a BLOCK_JOB_READY event.
+#
+# This command completes an active background block operation synchronously.
+# The ordering of this command's return with the BLOCK_JOB_COMPLETED event
+# is not defined.  Note that if an I/O error occurs during the processing of
+# this command: 1) the command itself will fail; 2) the error will be processed
+# according to the rerror/werror arguments that were specified when starting
+# the operation.
+#
+# A cancelled or paused job cannot be completed.
+#
+# @device: the device name
+#
+# Returns: Nothing on success
+#          If no background operation is active on this device, DeviceNotActive
+#
+# Since: 1.3
+##
+{ 'command': 'block-job-complete', 'data': { 'device': 'str' } }
+
+##
+# @BlockdevDiscardOptions
+#
+# Determines how to handle discard requests.
+#
+# @ignore:      Ignore the request
+# @unmap:       Forward as an unmap request
+#
+# Since: 1.7
+##
+{ 'enum': 'BlockdevDiscardOptions',
+  'data': [ 'ignore', 'unmap' ] }
+
+##
+# @BlockdevDetectZeroesOptions
+#
+# Describes the operation mode for the automatic conversion of plain
+# zero writes by the OS to driver specific optimized zero write commands.
+#
+# @off:      Disabled (default)
+# @on:       Enabled
+# @unmap:    Enabled and even try to unmap blocks if possible. This requires
+#            also that @BlockdevDiscardOptions is set to unmap for this device.
+#
+# Since: 2.1
+##
+{ 'enum': 'BlockdevDetectZeroesOptions',
+  'data': [ 'off', 'on', 'unmap' ] }
+
+##
+# @BlockdevAioOptions
+#
+# Selects the AIO backend to handle I/O requests
+#
+# @threads:     Use qemu's thread pool
+# @native:      Use native AIO backend (only Linux and Windows)
+#
+# Since: 1.7
+##
+{ 'enum': 'BlockdevAioOptions',
+  'data': [ 'threads', 'native' ] }
+
+##
+# @BlockdevCacheOptions
+#
+# Includes cache-related options for block devices
+#
+# @writeback:   #optional enables writeback mode for any caches (default: true)
+# @direct:      #optional enables use of O_DIRECT (bypass the host page cache;
+#               default: false)
+# @no-flush:    #optional ignore any flush requests for the device (default:
+#               false)
+#
+# Since: 1.7
+##
+{ 'type': 'BlockdevCacheOptions',
+  'data': { '*writeback': 'bool',
+            '*direct': 'bool',
+            '*no-flush': 'bool' } }
+
+##
+# @BlockdevDriver
+#
+# Drivers that are supported in block device operations.
+#
+# @host_device, @host_cdrom, @host_floppy: Since 2.1
+#
+# Since: 2.0
+##
+{ 'enum': 'BlockdevDriver',
+  'data': [ 'file', 'host_device', 'host_cdrom', 'host_floppy',
+            'http', 'https', 'ftp', 'ftps', 'tftp', 'vvfat', 'blkdebug',
+            'blkverify', 'bochs', 'cloop', 'cow', 'dmg', 'parallels', 'qcow',
+            'qcow2', 'qed', 'raw', 'vdi', 'vhdx', 'vmdk', 'vpc', 'quorum' ] }
+
+##
+# @BlockdevOptionsBase
+#
+# Options that are available for all block devices, independent of the block
+# driver.
+#
+# @driver:        block driver name
+# @id:            #optional id by which the new block device can be referred to.
+#                 This is a required option on the top level of blockdev-add, and
+#                 currently not allowed on any other level.
+# @node-name:     #optional the name of a block driver state node (Since 2.0)
+# @discard:       #optional discard-related options (default: ignore)
+# @cache:         #optional cache-related options
+# @aio:           #optional AIO backend (default: threads)
+# @rerror:        #optional how to handle read errors on the device
+#                 (default: report)
+# @werror:        #optional how to handle write errors on the device
+#                 (default: enospc)
+# @read-only:     #optional whether the block device should be read-only
+#                 (default: false)
+# @detect-zeroes: #optional detect and optimize zero writes (Since 2.1)
+#                 (default: off)
+#
+# Since: 1.7
+##
+{ 'type': 'BlockdevOptionsBase',
+  'data': { 'driver': 'BlockdevDriver',
+            '*id': 'str',
+            '*node-name': 'str',
+            '*discard': 'BlockdevDiscardOptions',
+            '*cache': 'BlockdevCacheOptions',
+            '*aio': 'BlockdevAioOptions',
+            '*rerror': 'BlockdevOnError',
+            '*werror': 'BlockdevOnError',
+            '*read-only': 'bool',
+            '*detect-zeroes': 'BlockdevDetectZeroesOptions' } }
+
+##
+# @BlockdevOptionsFile
+#
+# Driver specific block device options for the file backend and similar
+# protocols.
+#
+# @filename:    path to the image file
+#
+# Since: 1.7
+##
+{ 'type': 'BlockdevOptionsFile',
+  'data': { 'filename': 'str' } }
+
+##
+# @BlockdevOptionsVVFAT
+#
+# Driver specific block device options for the vvfat protocol.
+#
+# @dir:         directory to be exported as FAT image
+# @fat-type:    #optional FAT type: 12, 16 or 32
+# @floppy:      #optional whether to export a floppy image (true) or
+#               partitioned hard disk (false; default)
+# @rw:          #optional whether to allow write operations (default: false)
+#
+# Since: 1.7
+##
+{ 'type': 'BlockdevOptionsVVFAT',
+  'data': { 'dir': 'str', '*fat-type': 'int', '*floppy': 'bool',
+            '*rw': 'bool' } }
+
+##
+# @BlockdevOptionsGenericFormat
+#
+# Driver specific block device options for image format that have no option
+# besides their data source.
+#
+# @file:        reference to or definition of the data source block device
+#
+# Since: 1.7
+##
+{ 'type': 'BlockdevOptionsGenericFormat',
+  'data': { 'file': 'BlockdevRef' } }
+
+##
+# @BlockdevOptionsGenericCOWFormat
+#
+# Driver specific block device options for image format that have no option
+# besides their data source and an optional backing file.
+#
+# @backing:     #optional reference to or definition of the backing file block
+#               device (if missing, taken from the image file content). It is
+#               allowed to pass an empty string here in order to disable the
+#               default backing file.
+#
+# Since: 1.7
+##
+{ 'type': 'BlockdevOptionsGenericCOWFormat',
+  'base': 'BlockdevOptionsGenericFormat',
+  'data': { '*backing': 'BlockdevRef' } }
+
+##
+# @BlockdevOptionsQcow2
+#
+# Driver specific block device options for qcow2.
+#
+# @lazy-refcounts:        #optional whether to enable the lazy refcounts
+#                         feature (default is taken from the image file)
+#
+# @pass-discard-request:  #optional whether discard requests to the qcow2
+#                         device should be forwarded to the data source
+#
+# @pass-discard-snapshot: #optional whether discard requests for the data source
+#                         should be issued when a snapshot operation (e.g.
+#                         deleting a snapshot) frees clusters in the qcow2 file
+#
+# @pass-discard-other:    #optional whether discard requests for the data source
+#                         should be issued on other occasions where a cluster
+#                         gets freed
+#
+# Since: 1.7
+##
+{ 'type': 'BlockdevOptionsQcow2',
+  'base': 'BlockdevOptionsGenericCOWFormat',
+  'data': { '*lazy-refcounts': 'bool',
+            '*pass-discard-request': 'bool',
+            '*pass-discard-snapshot': 'bool',
+            '*pass-discard-other': 'bool' } }
+
+##
+# @BlkdebugEvent
+#
+# Trigger events supported by blkdebug.
+##
+{ 'enum': 'BlkdebugEvent',
+  'data': [ 'l1_update', 'l1_grow.alloc_table', 'l1_grow.write_table',
+            'l1_grow.activate_table', 'l2_load', 'l2_update',
+            'l2_update_compressed', 'l2_alloc.cow_read', 'l2_alloc.write',
+            'read_aio', 'read_backing_aio', 'read_compressed', 'write_aio',
+            'write_compressed', 'vmstate_load', 'vmstate_save', 'cow_read',
+            'cow_write', 'reftable_load', 'reftable_grow', 'reftable_update',
+            'refblock_load', 'refblock_update', 'refblock_update_part',
+            'refblock_alloc', 'refblock_alloc.hookup', 'refblock_alloc.write',
+            'refblock_alloc.write_blocks', 'refblock_alloc.write_table',
+            'refblock_alloc.switch_table', 'cluster_alloc',
+            'cluster_alloc_bytes', 'cluster_free', 'flush_to_os',
+            'flush_to_disk' ] }
+
+##
+# @BlkdebugInjectErrorOptions
+#
+# Describes a single error injection for blkdebug.
+#
+# @event:       trigger event
+#
+# @state:       #optional the state identifier blkdebug needs to be in to
+#               actually trigger the event; defaults to "any"
+#
+# @errno:       #optional error identifier (errno) to be returned; defaults to
+#               EIO
+#
+# @sector:      #optional specifies the sector index which has to be affected
+#               in order to actually trigger the event; defaults to "any
+#               sector"
+#
+# @once:        #optional disables further events after this one has been
+#               triggered; defaults to false
+#
+# @immediately: #optional fail immediately; defaults to false
+#
+# Since: 2.0
+##
+{ 'type': 'BlkdebugInjectErrorOptions',
+  'data': { 'event': 'BlkdebugEvent',
+            '*state': 'int',
+            '*errno': 'int',
+            '*sector': 'int',
+            '*once': 'bool',
+            '*immediately': 'bool' } }
+
+##
+# @BlkdebugSetStateOptions
+#
+# Describes a single state-change event for blkdebug.
+#
+# @event:       trigger event
+#
+# @state:       #optional the current state identifier blkdebug needs to be in;
+#               defaults to "any"
+#
+# @new_state:   the state identifier blkdebug is supposed to assume if
+#               this event is triggered
+#
+# Since: 2.0
+##
+{ 'type': 'BlkdebugSetStateOptions',
+  'data': { 'event': 'BlkdebugEvent',
+            '*state': 'int',
+            'new_state': 'int' } }
+
+##
+# @BlockdevOptionsBlkdebug
+#
+# Driver specific block device options for blkdebug.
+#
+# @image:           underlying raw block device (or image file)
+#
+# @config:          #optional filename of the configuration file
+#
+# @align:           #optional required alignment for requests in bytes
+#
+# @inject-error:    #optional array of error injection descriptions
+#
+# @set-state:       #optional array of state-change descriptions
+#
+# Since: 2.0
+##
+{ 'type': 'BlockdevOptionsBlkdebug',
+  'data': { 'image': 'BlockdevRef',
+            '*config': 'str',
+            '*align': 'int',
+            '*inject-error': ['BlkdebugInjectErrorOptions'],
+            '*set-state': ['BlkdebugSetStateOptions'] } }
+
+##
+# @BlockdevOptionsBlkverify
+#
+# Driver specific block device options for blkverify.
+#
+# @test:    block device to be tested
+#
+# @raw:     raw image used for verification
+#
+# Since: 2.0
+##
+{ 'type': 'BlockdevOptionsBlkverify',
+  'data': { 'test': 'BlockdevRef',
+            'raw': 'BlockdevRef' } }
+
+##
+# @BlockdevOptionsQuorum
+#
+# Driver specific block device options for Quorum
+#
+# @blkverify:      #optional true if the driver must print content mismatch
+#                  set to false by default
+#
+# @children:       the children block devices to use
+#
+# @vote-threshold: the vote limit under which a read will fail
+#
+# Since: 2.0
+##
+{ 'type': 'BlockdevOptionsQuorum',
+  'data': { '*blkverify': 'bool',
+            'children': [ 'BlockdevRef' ],
+            'vote-threshold': 'int' } }
+
+##
+# @BlockdevOptions
+#
+# Options for creating a block device.
+#
+# Since: 1.7
+##
+{ 'union': 'BlockdevOptions',
+  'base': 'BlockdevOptionsBase',
+  'discriminator': 'driver',
+  'data': {
+      'file':       'BlockdevOptionsFile',
+      'host_device':'BlockdevOptionsFile',
+      'host_cdrom': 'BlockdevOptionsFile',
+      'host_floppy':'BlockdevOptionsFile',
+      'http':       'BlockdevOptionsFile',
+      'https':      'BlockdevOptionsFile',
+      'ftp':        'BlockdevOptionsFile',
+      'ftps':       'BlockdevOptionsFile',
+      'tftp':       'BlockdevOptionsFile',
+# TODO gluster: Wait for structured options
+# TODO iscsi: Wait for structured options
+# TODO nbd: Should take InetSocketAddress for 'host'?
+# TODO nfs: Wait for structured options
+# TODO rbd: Wait for structured options
+# TODO sheepdog: Wait for structured options
+# TODO ssh: Should take InetSocketAddress for 'host'?
+      'vvfat':      'BlockdevOptionsVVFAT',
+      'blkdebug':   'BlockdevOptionsBlkdebug',
+      'blkverify':  'BlockdevOptionsBlkverify',
+      'bochs':      'BlockdevOptionsGenericFormat',
+      'cloop':      'BlockdevOptionsGenericFormat',
+      'cow':        'BlockdevOptionsGenericCOWFormat',
+      'dmg':        'BlockdevOptionsGenericFormat',
+      'parallels':  'BlockdevOptionsGenericFormat',
+      'qcow':       'BlockdevOptionsGenericCOWFormat',
+      'qcow2':      'BlockdevOptionsQcow2',
+      'qed':        'BlockdevOptionsGenericCOWFormat',
+      'raw':        'BlockdevOptionsGenericFormat',
+      'vdi':        'BlockdevOptionsGenericFormat',
+      'vhdx':       'BlockdevOptionsGenericFormat',
+      'vmdk':       'BlockdevOptionsGenericCOWFormat',
+      'vpc':        'BlockdevOptionsGenericFormat',
+      'quorum':     'BlockdevOptionsQuorum'
+  } }
+
+##
+# @BlockdevRef
+#
+# Reference to a block device.
+#
+# @definition:      defines a new block device inline
+# @reference:       references the ID of an existing block device. An
+#                   empty string means that no block device should be
+#                   referenced.
+#
+# Since: 1.7
+##
+{ 'union': 'BlockdevRef',
+  'discriminator': {},
+  'data': { 'definition': 'BlockdevOptions',
+            'reference': 'str' } }
+
+##
+# @blockdev-add:
+#
+# Creates a new block device.
+#
+# @options: block device options for the new device
+#
+# Since: 1.7
+##
+{ 'command': 'blockdev-add', 'data': { 'options': 'BlockdevOptions' } }
+
-- 
cgit v1.2.1


From 2e95fa17196aa12e7e522925ad420000162153d0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Beno=C3=AEt=20Canet?= <benoit.canet@irqsave.net>
Date: Thu, 5 Jun 2014 13:45:32 +0200
Subject: qapi: Extract qapi/block.json definitions

Signed-off-by: Benoit Canet <benoit@irqsave.net>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
 qapi-schema.json | 159 -------------------------------------------------------
 qapi/block.json  | 159 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 159 insertions(+), 159 deletions(-)

diff --git a/qapi-schema.json b/qapi-schema.json
index c888d23c9a..14b498b442 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -32,40 +32,6 @@
 { 'enum': 'LostTickPolicy',
   'data': ['discard', 'delay', 'merge', 'slew' ] }
 
-##
-# BiosAtaTranslation:
-#
-# Policy that BIOS should use to interpret cylinder/head/sector
-# addresses.  Note that Bochs BIOS and SeaBIOS will not actually
-# translate logical CHS to physical; instead, they will use logical
-# block addressing.
-#
-# @auto: If cylinder/heads/sizes are passed, choose between none and LBA
-#        depending on the size of the disk.  If they are not passed,
-#        choose none if QEMU can guess that the disk had 16 or fewer
-#        heads, large if QEMU can guess that the disk had 131072 or
-#        fewer tracks across all heads (i.e. cylinders*heads<131072),
-#        otherwise LBA.
-#
-# @none: The physical disk geometry is equal to the logical geometry.
-#
-# @lba: Assume 63 sectors per track and one of 16, 32, 64, 128 or 255
-#       heads (if fewer than 255 are enough to cover the whole disk
-#       with 1024 cylinders/head).  The number of cylinders/head is
-#       then computed based on the number of sectors and heads.
-#
-# @large: The number of cylinders per head is scaled down to 1024
-#         by correspondingly scaling up the number of heads.
-#
-# @rechs: Same as @large, but first convert a 16-head geometry to
-#         15-head, by proportionally scaling up the number of
-#         cylinders/head.
-#
-# Since: 2.0
-##
-{ 'enum': 'BiosAtaTranslation',
-  'data': ['auto', 'none', 'lba', 'large', 'rechs']}
-
 # @add_client
 #
 # Allow client connections for VNC, Spice and socket based
@@ -1200,22 +1166,6 @@
 ##
 { 'command': 'balloon', 'data': {'value': 'int'} }
 
-##
-# @BlockdevSnapshotInternal
-#
-# @device: the name of the device to generate the snapshot from
-#
-# @name: the name of the internal snapshot to be created
-#
-# Notes: In transaction, if @name is empty, or any snapshot matching @name
-#        exists, the operation will fail. Only some image formats support it,
-#        for example, qcow2, rbd, and sheepdog.
-#
-# Since: 1.7
-##
-{ 'type': 'BlockdevSnapshotInternal',
-  'data': { 'device': 'str', 'name': 'str' } }
-
 ##
 # @Abort
 #
@@ -1262,53 +1212,6 @@
 { 'command': 'transaction',
   'data': { 'actions': [ 'TransactionAction' ] } }
 
-##
-# @blockdev-snapshot-internal-sync
-#
-# Synchronously take an internal snapshot of a block device, when the format
-# of the image used supports it.
-#
-# For the arguments, see the documentation of BlockdevSnapshotInternal.
-#
-# Returns: nothing on success
-#          If @device is not a valid block device, DeviceNotFound
-#          If any snapshot matching @name exists, or @name is empty,
-#          GenericError
-#          If the format of the image used does not support it,
-#          BlockFormatFeatureNotSupported
-#
-# Since 1.7
-##
-{ 'command': 'blockdev-snapshot-internal-sync',
-  'data': 'BlockdevSnapshotInternal' }
-
-##
-# @blockdev-snapshot-delete-internal-sync
-#
-# Synchronously delete an internal snapshot of a block device, when the format
-# of the image used support it. The snapshot is identified by name or id or
-# both. One of the name or id is required. Return SnapshotInfo for the
-# successfully deleted snapshot.
-#
-# @device: the name of the device to delete the snapshot from
-#
-# @id: optional the snapshot's ID to be deleted
-#
-# @name: optional the snapshot's name to be deleted
-#
-# Returns: SnapshotInfo on success
-#          If @device is not a valid block device, DeviceNotFound
-#          If snapshot not found, GenericError
-#          If the format of the image used does not support it,
-#          BlockFormatFeatureNotSupported
-#          If @id and @name are both not specified, GenericError
-#
-# Since 1.7
-##
-{ 'command': 'blockdev-snapshot-delete-internal-sync',
-  'data': { 'device': 'str', '*id': 'str', '*name': 'str'},
-  'returns': 'SnapshotInfo' }
-
 ##
 # @human-monitor-command:
 #
@@ -1552,25 +1455,6 @@
 ##
 { 'command': 'expire_password', 'data': {'protocol': 'str', 'time': 'str'} }
 
-##
-# @eject:
-#
-# Ejects a device from a removable drive.
-#
-# @device:  The name of the device
-#
-# @force:   @optional If true, eject regardless of whether the drive is locked.
-#           If not specified, the default value is false.
-#
-# Returns:  Nothing on success
-#           If @device is not a valid block device, DeviceNotFound
-#
-# Notes:    Ejecting a device will no media results in success
-#
-# Since: 0.14.0
-##
-{ 'command': 'eject', 'data': {'device': 'str', '*force': 'bool'} }
-
 ##
 # @change-vnc-password:
 #
@@ -2578,49 +2462,6 @@
 ##
 { 'command': 'screendump', 'data': {'filename': 'str'} }
 
-##
-# @nbd-server-start:
-#
-# Start an NBD server listening on the given host and port.  Block
-# devices can then be exported using @nbd-server-add.  The NBD
-# server will present them as named exports; for example, another
-# QEMU instance could refer to them as "nbd:HOST:PORT:exportname=NAME".
-#
-# @addr: Address on which to listen.
-#
-# Returns: error if the server is already running.
-#
-# Since: 1.3.0
-##
-{ 'command': 'nbd-server-start',
-  'data': { 'addr': 'SocketAddress' } }
-
-##
-# @nbd-server-add:
-#
-# Export a device to QEMU's embedded NBD server.
-#
-# @device: Block device to be exported
-#
-# @writable: Whether clients should be able to write to the device via the
-#     NBD connection (default false). #optional
-#
-# Returns: error if the device is already marked for export.
-#
-# Since: 1.3.0
-##
-{ 'command': 'nbd-server-add', 'data': {'device': 'str', '*writable': 'bool'} }
-
-##
-# @nbd-server-stop:
-#
-# Stop QEMU's embedded NBD server, and unregister all devices previously
-# added via @nbd-server-add.
-#
-# Since: 1.3.0
-##
-{ 'command': 'nbd-server-stop' }
-
 ##
 # @ChardevFile:
 #
diff --git a/qapi/block.json b/qapi/block.json
index e2b882f120..61c463ab05 100644
--- a/qapi/block.json
+++ b/qapi/block.json
@@ -5,3 +5,162 @@
 # QAPI block core definitions
 { 'include': 'block-core.json' }
 
+##
+# BiosAtaTranslation:
+#
+# Policy that BIOS should use to interpret cylinder/head/sector
+# addresses.  Note that Bochs BIOS and SeaBIOS will not actually
+# translate logical CHS to physical; instead, they will use logical
+# block addressing.
+#
+# @auto: If cylinder/heads/sizes are passed, choose between none and LBA
+#        depending on the size of the disk.  If they are not passed,
+#        choose none if QEMU can guess that the disk had 16 or fewer
+#        heads, large if QEMU can guess that the disk had 131072 or
+#        fewer tracks across all heads (i.e. cylinders*heads<131072),
+#        otherwise LBA.
+#
+# @none: The physical disk geometry is equal to the logical geometry.
+#
+# @lba: Assume 63 sectors per track and one of 16, 32, 64, 128 or 255
+#       heads (if fewer than 255 are enough to cover the whole disk
+#       with 1024 cylinders/head).  The number of cylinders/head is
+#       then computed based on the number of sectors and heads.
+#
+# @large: The number of cylinders per head is scaled down to 1024
+#         by correspondingly scaling up the number of heads.
+#
+# @rechs: Same as @large, but first convert a 16-head geometry to
+#         15-head, by proportionally scaling up the number of
+#         cylinders/head.
+#
+# Since: 2.0
+##
+{ 'enum': 'BiosAtaTranslation',
+  'data': ['auto', 'none', 'lba', 'large', 'rechs']}
+
+##
+# @BlockdevSnapshotInternal
+#
+# @device: the name of the device to generate the snapshot from
+#
+# @name: the name of the internal snapshot to be created
+#
+# Notes: In transaction, if @name is empty, or any snapshot matching @name
+#        exists, the operation will fail. Only some image formats support it,
+#        for example, qcow2, rbd, and sheepdog.
+#
+# Since: 1.7
+##
+{ 'type': 'BlockdevSnapshotInternal',
+  'data': { 'device': 'str', 'name': 'str' } }
+
+##
+# @blockdev-snapshot-internal-sync
+#
+# Synchronously take an internal snapshot of a block device, when the format
+# of the image used supports it.
+#
+# For the arguments, see the documentation of BlockdevSnapshotInternal.
+#
+# Returns: nothing on success
+#          If @device is not a valid block device, DeviceNotFound
+#          If any snapshot matching @name exists, or @name is empty,
+#          GenericError
+#          If the format of the image used does not support it,
+#          BlockFormatFeatureNotSupported
+#
+# Since 1.7
+##
+{ 'command': 'blockdev-snapshot-internal-sync',
+  'data': 'BlockdevSnapshotInternal' }
+
+##
+# @blockdev-snapshot-delete-internal-sync
+#
+# Synchronously delete an internal snapshot of a block device, when the format
+# of the image used support it. The snapshot is identified by name or id or
+# both. One of the name or id is required. Return SnapshotInfo for the
+# successfully deleted snapshot.
+#
+# @device: the name of the device to delete the snapshot from
+#
+# @id: optional the snapshot's ID to be deleted
+#
+# @name: optional the snapshot's name to be deleted
+#
+# Returns: SnapshotInfo on success
+#          If @device is not a valid block device, DeviceNotFound
+#          If snapshot not found, GenericError
+#          If the format of the image used does not support it,
+#          BlockFormatFeatureNotSupported
+#          If @id and @name are both not specified, GenericError
+#
+# Since 1.7
+##
+{ 'command': 'blockdev-snapshot-delete-internal-sync',
+  'data': { 'device': 'str', '*id': 'str', '*name': 'str'},
+  'returns': 'SnapshotInfo' }
+
+##
+# @eject:
+#
+# Ejects a device from a removable drive.
+#
+# @device:  The name of the device
+#
+# @force:   @optional If true, eject regardless of whether the drive is locked.
+#           If not specified, the default value is false.
+#
+# Returns:  Nothing on success
+#           If @device is not a valid block device, DeviceNotFound
+#
+# Notes:    Ejecting a device will no media results in success
+#
+# Since: 0.14.0
+##
+{ 'command': 'eject', 'data': {'device': 'str', '*force': 'bool'} }
+
+##
+# @nbd-server-start:
+#
+# Start an NBD server listening on the given host and port.  Block
+# devices can then be exported using @nbd-server-add.  The NBD
+# server will present them as named exports; for example, another
+# QEMU instance could refer to them as "nbd:HOST:PORT:exportname=NAME".
+#
+# @addr: Address on which to listen.
+#
+# Returns: error if the server is already running.
+#
+# Since: 1.3.0
+##
+{ 'command': 'nbd-server-start',
+  'data': { 'addr': 'SocketAddress' } }
+
+##
+# @nbd-server-add:
+#
+# Export a device to QEMU's embedded NBD server.
+#
+# @device: Block device to be exported
+#
+# @writable: Whether clients should be able to write to the device via the
+#     NBD connection (default false). #optional
+#
+# Returns: error if the device is already marked for export.
+#
+# Since: 1.3.0
+##
+{ 'command': 'nbd-server-add', 'data': {'device': 'str', '*writable': 'bool'} }
+
+##
+# @nbd-server-stop:
+#
+# Stop QEMU's embedded NBD server, and unregister all devices previously
+# added via @nbd-server-add.
+#
+# Since: 1.3.0
+##
+{ 'command': 'nbd-server-stop' }
+
-- 
cgit v1.2.1