From ce83ee57f6fa69280a0330f1b49ef75b0ef0742a Mon Sep 17 00:00:00 2001 From: Evgeny Yakovlev Date: Wed, 17 Aug 2016 21:06:53 +0300 Subject: block: fix deadlock in bdrv_co_flush The following commit commit 3ff2f67a7c24183fcbcfe1332e5223ac6f96438c Author: Evgeny Yakovlev Date: Mon Jul 18 22:39:52 2016 +0300 block: ignore flush requests when storage is clean has introduced a regression. There is a problem that it is still possible for 2 requests to execute in non sequential fashion and sometimes this results in a deadlock when bdrv_drain_one/all are called for BDS with such stalled requests. 1. Current flushed_gen and flush_started_gen is 1. 2. Request 1 enters bdrv_co_flush to with write_gen 1 (i.e. the same as flushed_gen). It gets past flushed_gen != flush_started_gen and sets flush_started_gen to 1 (again, the same it was before). 3. Request 1 yields somewhere before exiting bdrv_co_flush 4. Request 2 enters bdrv_co_flush with write_gen 2. It gets past flushed_gen != flush_started_gen and sets flush_started_gen to 2. 5. Request 2 runs to completion and sets flushed_gen to 2 6. Request 1 is resumed, runs to completion and sets flushed_gen to 1. However flush_started_gen is now 2. From here on out flushed_gen is always != to flush_started_gen and all further requests will wait on flush_queue. This change replaces flush_started_gen with an explicitly tracked active flush request. Signed-off-by: Evgeny Yakovlev Signed-off-by: Denis V. Lunev Message-id: 1471457214-3994-2-git-send-email-den@openvz.org CC: Stefan Hajnoczi CC: Fam Zheng CC: Kevin Wolf CC: Max Reitz Signed-off-by: Stefan Hajnoczi --- block/io.c | 5 +++-- include/block/block_int.h | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/block/io.c b/block/io.c index d5493ba349..9c04086286 100644 --- a/block/io.c +++ b/block/io.c @@ -2283,11 +2283,11 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs) int current_gen = bs->write_gen; /* Wait until any previous flushes are completed */ - while (bs->flush_started_gen != bs->flushed_gen) { + while (bs->active_flush_req != NULL) { qemu_co_queue_wait(&bs->flush_queue); } - bs->flush_started_gen = current_gen; + bs->active_flush_req = &req; /* Write back all layers by calling one driver function */ if (bs->drv->bdrv_co_flush) { @@ -2357,6 +2357,7 @@ flush_parent: out: /* Notify any pending flushes that we have completed */ bs->flushed_gen = current_gen; + bs->active_flush_req = NULL; qemu_co_queue_restart_all(&bs->flush_queue); tracked_request_end(&req); diff --git a/include/block/block_int.h b/include/block/block_int.h index 47665be81e..1e939de4fe 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -443,8 +443,8 @@ struct BlockDriverState { note this is a reference count */ CoQueue flush_queue; /* Serializing flush queue */ + BdrvTrackedRequest *active_flush_req; /* Flush request in flight */ unsigned int write_gen; /* Current data generation */ - unsigned int flush_started_gen; /* Generation for which flush has started */ unsigned int flushed_gen; /* Flushed write generation */ BlockDriver *drv; /* NULL means no media */ -- cgit v1.2.1 From 156af3ac98da24f0155eed18ec546157436d6b2e Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Wed, 17 Aug 2016 21:06:54 +0300 Subject: block: fix possible reorder of flush operations This patch reduce CPU usage of flush operations a bit. When we have one flush completed we should kick only next operation. We should not start all pending operations in the hope that they will go back to wait on wait_queue. Also there is a technical possibility that requests will get reordered with the previous approach. After wakeup all requests are removed from the wait queue. They become active and they are processed one-by-one adding to the wait queue in the same order. Though new flush can arrive while all requests are not put into the queue. Signed-off-by: Denis V. Lunev Tested-by: Evgeny Yakovlev Signed-off-by: Evgeny Yakovlev Message-id: 1471457214-3994-3-git-send-email-den@openvz.org CC: Stefan Hajnoczi CC: Fam Zheng CC: Kevin Wolf CC: Max Reitz Signed-off-by: Stefan Hajnoczi --- block/io.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/block/io.c b/block/io.c index 9c04086286..420944d80d 100644 --- a/block/io.c +++ b/block/io.c @@ -2358,7 +2358,8 @@ out: /* Notify any pending flushes that we have completed */ bs->flushed_gen = current_gen; bs->active_flush_req = NULL; - qemu_co_queue_restart_all(&bs->flush_queue); + /* Return value is ignored - it's ok if wait queue is empty */ + qemu_co_queue_next(&bs->flush_queue); tracked_request_end(&req); return ret; -- cgit v1.2.1