block: fix streaming/closing race

Streaming can issue I/O while qcow2_close is running. This causes the L2 caches to become very confused or, alternatively, could cause a segfault when the streaming coroutine is reentered after closing its block device. The fix is to cancel streaming jobs when closing their underlying device. The cancellation must be synchronous, on the other hand qemu_aio_wait will not restart a coroutine that is sleeping in co_sleep. So add a flag saying whether streaming has in-flight I/O. If the busy flag is false, the coroutine is quiescent and, when cancelled, will not issue any new I/O. This protects streaming against closing, but not against deleting. We have a reference count protecting us against concurrent deletion, but I still added an assertion to ensure nothing bad happens. Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> Reviewed-by: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
author: Paolo Bonzini <pbonzini@redhat.com> 2012-03-30 13:17:11 +0200
committer: Kevin Wolf <kwolf@redhat.com> 2012-04-05 14:54:40 +0200
commit: 3e914655f268f627ef004a8f1ea0355311b5aca6 (patch)
tree: d58c2d239b697aa5f7b87fd5074447317dbd8b80
parent: 12bde0eed6b740787bca2c998a838b20c556d0ec (diff)
download: qemu-3e914655f268f627ef004a8f1ea0355311b5aca6.tar.gz
3 files changed, 22 insertions, 2 deletions
diff --git a/block.c b/block.c
index 0344673bd7..16e14fa7d5 100644
--- a/block.c
+++ b/block.c
@@ -813,6 +813,9 @@ unlink_and_fail:
 void bdrv_close(BlockDriverState *bs)
 {
     if (bs->drv) {
+        if (bs->job) {
+            block_job_cancel_sync(bs->job);
+        }
         if (bs == bs_snapshots) {
             bs_snapshots = NULL;
         }
@@ -966,6 +969,8 @@ void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
 void bdrv_delete(BlockDriverState *bs)
 {
     assert(!bs->dev);
+    assert(!bs->job);
+    assert(!bs->in_use);
 
     /* remove from list, if necessary */
     bdrv_make_anon(bs);
@@ -4095,3 +4100,14 @@ bool block_job_is_cancelled(BlockJob *job)
 {
     return job->cancelled;
 }
+
+void block_job_cancel_sync(BlockJob *job)
+{
+    BlockDriverState *bs = job->bs;
+
+    assert(bs->job == job);
+    block_job_cancel(job);
+    while (bs->job != NULL && bs->job->busy) {
+        qemu_aio_wait();
+    }
+}
diff --git a/block/stream.c b/block/stream.c
index d1b3986a8a..f186bfd4f5 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -175,7 +175,7 @@ retry:
             break;
         }
 
-
+        s->common.busy = true;
         if (base) {
             ret = is_allocated_base(bs, base, sector_num,
                                     STREAM_BUFFER_SIZE / BDRV_SECTOR_SIZE, &n);
@@ -189,6 +189,7 @@ retry:
             if (s->common.speed) {
                 uint64_t delay_ns = ratelimit_calculate_delay(&s->limit, n);
                 if (delay_ns > 0) {
+                    s->common.busy = false;
                     co_sleep_ns(rt_clock, delay_ns);
 
                     /* Recheck cancellation and that sectors are unallocated */
@@ -208,6 +209,7 @@ retry:
         /* Note that even when no rate limit is applied we need to yield
          * with no pending I/O here so that qemu_aio_flush() returns.
          */
+        s->common.busy = false;
         co_sleep_ns(rt_clock, 0);
     }
 
@@ -215,7 +217,7 @@ retry:
         bdrv_disable_copy_on_read(bs);
     }
 
-    if (sector_num == end && ret == 0) {
+    if (!block_job_is_cancelled(&s->common) && sector_num == end && ret == 0) {
         const char *base_id = NULL;
         if (base) {
             base_id = s->backing_file_id;
diff --git a/block_int.h b/block_int.h
index 22c86a53fe..a96aabdd87 100644
--- a/block_int.h
+++ b/block_int.h
@@ -83,6 +83,7 @@ struct BlockJob {
     const BlockJobType *job_type;
     BlockDriverState *bs;
     bool cancelled;
+    bool busy;
 
     /* These fields are published by the query-block-jobs QMP API */
     int64_t offset;
@@ -311,6 +312,7 @@ void block_job_complete(BlockJob *job, int ret);
 int block_job_set_speed(BlockJob *job, int64_t value);
 void block_job_cancel(BlockJob *job);
 bool block_job_is_cancelled(BlockJob *job);
+void block_job_cancel_sync(BlockJob *job);
 
 int stream_start(BlockDriverState *bs, BlockDriverState *base,
                  const char *base_id, BlockDriverCompletionFunc *cb,
author	Paolo Bonzini <pbonzini@redhat.com>	2012-03-30 13:17:11 +0200
committer	Kevin Wolf <kwolf@redhat.com>	2012-04-05 14:54:40 +0200
commit	3e914655f268f627ef004a8f1ea0355311b5aca6 (patch)
tree	d58c2d239b697aa5f7b87fd5074447317dbd8b80
parent	12bde0eed6b740787bca2c998a838b20c556d0ec (diff)
download	qemu-3e914655f268f627ef004a8f1ea0355311b5aca6.tar.gz