From e84a0dd5a75d8e89bec05fac432941988191df32 Mon Sep 17 00:00:00 2001 From: Xu Tian Date: Sun, 9 Oct 2016 17:17:27 +0800 Subject: block: failed qemu-img command should return non-zero exit code If the backing file cannot be opened when doing qemu-img rebase, the variable 'ret' was not assigned a non-zero value, and the qemu-img process terminated with exit code zero. Fix this. Signed-off-by: Xu Tian Reviewed-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- qemu-img.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/qemu-img.c b/qemu-img.c index 67e851248a..ab395a9b1a 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -2956,6 +2956,7 @@ static int img_rebase(int argc, char **argv) error_reportf_err(local_err, "Could not open old backing file '%s': ", backing_name); + ret = -1; goto out; } @@ -2973,6 +2974,7 @@ static int img_rebase(int argc, char **argv) error_reportf_err(local_err, "Could not open new backing file '%s': ", out_baseimg); + ret = -1; goto out; } } -- cgit v1.2.1 From 170f4b2e5cf43895399f57ca4287f26c713be654 Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Wed, 28 Sep 2016 15:04:21 +0800 Subject: qcow2: Support BDRV_REQ_MAY_UNMAP Handling this is similar to what is done to the L2 entry in the case of compressed clusters. Signed-off-by: Fam Zheng Signed-off-by: Kevin Wolf --- block/qcow2-cluster.c | 9 +++++---- block/qcow2.c | 3 ++- block/qcow2.h | 3 ++- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index 61d1ffd223..928c1e298d 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -1558,7 +1558,7 @@ fail: * clusters. */ static int zero_single_l2(BlockDriverState *bs, uint64_t offset, - uint64_t nb_clusters) + uint64_t nb_clusters, int flags) { BDRVQcow2State *s = bs->opaque; uint64_t *l2_table; @@ -1582,7 +1582,7 @@ static int zero_single_l2(BlockDriverState *bs, uint64_t offset, /* Update L2 entries */ qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache, l2_table); - if (old_offset & QCOW_OFLAG_COMPRESSED) { + if (old_offset & QCOW_OFLAG_COMPRESSED || flags & BDRV_REQ_MAY_UNMAP) { l2_table[l2_index + i] = cpu_to_be64(QCOW_OFLAG_ZERO); qcow2_free_any_clusters(bs, old_offset, 1, QCOW2_DISCARD_REQUEST); } else { @@ -1595,7 +1595,8 @@ static int zero_single_l2(BlockDriverState *bs, uint64_t offset, return nb_clusters; } -int qcow2_zero_clusters(BlockDriverState *bs, uint64_t offset, int nb_sectors) +int qcow2_zero_clusters(BlockDriverState *bs, uint64_t offset, int nb_sectors, + int flags) { BDRVQcow2State *s = bs->opaque; uint64_t nb_clusters; @@ -1612,7 +1613,7 @@ int qcow2_zero_clusters(BlockDriverState *bs, uint64_t offset, int nb_sectors) s->cache_discards = true; while (nb_clusters > 0) { - ret = zero_single_l2(bs, offset, nb_clusters); + ret = zero_single_l2(bs, offset, nb_clusters, flags); if (ret < 0) { goto fail; } diff --git a/block/qcow2.c b/block/qcow2.c index e11c7c9d16..6d5689a23c 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -1155,6 +1155,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags, /* Initialise locks */ qemu_co_mutex_init(&s->lock); + bs->supported_zero_flags = BDRV_REQ_MAY_UNMAP; /* Repair image if dirty */ if (!(flags & (BDRV_O_CHECK | BDRV_O_INACTIVE)) && !bs->read_only && @@ -2477,7 +2478,7 @@ static coroutine_fn int qcow2_co_pwrite_zeroes(BlockDriverState *bs, trace_qcow2_pwrite_zeroes(qemu_coroutine_self(), offset, count); /* Whatever is left can use real zero clusters */ - ret = qcow2_zero_clusters(bs, offset, count >> BDRV_SECTOR_BITS); + ret = qcow2_zero_clusters(bs, offset, count >> BDRV_SECTOR_BITS, flags); qemu_co_mutex_unlock(&s->lock); return ret; diff --git a/block/qcow2.h b/block/qcow2.h index 9ce5a37d3a..92203a8b8c 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -547,7 +547,8 @@ uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs, int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m); int qcow2_discard_clusters(BlockDriverState *bs, uint64_t offset, int nb_sectors, enum qcow2_discard_type type, bool full_discard); -int qcow2_zero_clusters(BlockDriverState *bs, uint64_t offset, int nb_sectors); +int qcow2_zero_clusters(BlockDriverState *bs, uint64_t offset, int nb_sectors, + int flags); int qcow2_expand_zero_clusters(BlockDriverState *bs, BlockDriverAmendStatusCB *status_cb, -- cgit v1.2.1 From 0153d2f50bc2ad3f41810d838fcf66acbf10f07a Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 7 Oct 2016 17:05:04 +0200 Subject: block: Remove "options" indirection from blockdev-add Now that QAPI supports boxed types, we can have unions at the top level of a command, so let's put our real options directly there for blockdev-add instead of having a single "options" dict that contains the real arguments. blockdev-add is still experimental and we already made substantial changes to the API recently, so we're free to make changes like this one, too. Signed-off-by: Kevin Wolf Reviewed-by: Eric Blake Reviewed-by: Markus Armbruster Reviewed-by: Fam Zheng Reviewed-by: Stefan Hajnoczi Reviewed-by: Max Reitz --- docs/qmp-commands.txt | 84 ++++++++++++++++------------------- qapi/block-core.json | 4 +- tests/qemu-iotests/041 | 11 +++-- tests/qemu-iotests/067 | 12 +++-- tests/qemu-iotests/071 | 118 +++++++++++++++++++++---------------------------- tests/qemu-iotests/081 | 52 ++++++++++------------ tests/qemu-iotests/085 | 9 ++-- tests/qemu-iotests/087 | 76 +++++++++++++------------------ tests/qemu-iotests/117 | 12 ++--- tests/qemu-iotests/118 | 42 +++++++++--------- tests/qemu-iotests/124 | 20 ++++----- tests/qemu-iotests/139 | 10 ++--- tests/qemu-iotests/141 | 13 +++--- tests/qemu-iotests/155 | 10 ++--- 14 files changed, 214 insertions(+), 259 deletions(-) diff --git a/docs/qmp-commands.txt b/docs/qmp-commands.txt index 3220fb1075..284576d795 100644 --- a/docs/qmp-commands.txt +++ b/docs/qmp-commands.txt @@ -1090,11 +1090,11 @@ Arguments: Example: -> { "execute": "blockdev-add", - "arguments": { "options": { "driver": "qcow2", - "node-name": "node1534", - "file": { "driver": "file", - "filename": "hd1.qcow2" }, - "backing": "" } } } + "arguments": { "driver": "qcow2", + "node-name": "node1534", + "file": { "driver": "file", + "filename": "hd1.qcow2" }, + "backing": "" } } <- { "return": {} } @@ -3130,41 +3130,37 @@ This command is still a work in progress. It doesn't support all block drivers among other things. Stay away from it unless you want to help with its development. -Arguments: - -- "options": block driver options +For the arguments, see the QAPI schema documentation of BlockdevOptions. Example (1): -> { "execute": "blockdev-add", - "arguments": { "options" : { "driver": "qcow2", - "file": { "driver": "file", - "filename": "test.qcow2" } } } } + "arguments": { "driver": "qcow2", + "file": { "driver": "file", + "filename": "test.qcow2" } } } <- { "return": {} } Example (2): -> { "execute": "blockdev-add", "arguments": { - "options": { - "driver": "qcow2", - "node-name": "my_disk", - "discard": "unmap", - "cache": { - "direct": true, - "writeback": true - }, - "file": { - "driver": "file", - "filename": "/tmp/test.qcow2" - }, - "backing": { - "driver": "raw", - "file": { - "driver": "file", - "filename": "/dev/fdset/4" - } - } + "driver": "qcow2", + "node-name": "my_disk", + "discard": "unmap", + "cache": { + "direct": true, + "writeback": true + }, + "file": { + "driver": "file", + "filename": "/tmp/test.qcow2" + }, + "backing": { + "driver": "raw", + "file": { + "driver": "file", + "filename": "/dev/fdset/4" + } } } } @@ -3191,13 +3187,11 @@ Example: -> { "execute": "blockdev-add", "arguments": { - "options": { - "driver": "qcow2", - "node-name": "node0", - "file": { - "driver": "file", - "filename": "test.qcow2" - } + "driver": "qcow2", + "node-name": "node0", + "file": { + "driver": "file", + "filename": "test.qcow2" } } } @@ -3342,10 +3336,10 @@ Arguments: Example: -> { "execute": "blockdev-add", - "arguments": { "options": { "node-name": "node0", - "driver": "raw", - "file": { "driver": "file", - "filename": "fedora.iso" } } } } + "arguments": { { "node-name": "node0", + "driver": "raw", + "file": { "driver": "file", + "filename": "fedora.iso" } } } <- { "return": {} } @@ -3383,10 +3377,10 @@ Example: Add a new node to a quorum -> { "execute": "blockdev-add", - "arguments": { "options": { "driver": "raw", - "node-name": "new_node", - "file": { "driver": "file", - "filename": "test.raw" } } } } + "arguments": { "driver": "raw", + "node-name": "new_node", + "file": { "driver": "file", + "filename": "test.raw" } } } <- { "return": {} } -> { "execute": "x-blockdev-change", "arguments": { "parent": "disk1", diff --git a/qapi/block-core.json b/qapi/block-core.json index 1b7aa1befd..c59047bc94 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -2312,11 +2312,11 @@ # block drivers among other things. Stay away from it unless you want # to help with its development. # -# @options: block device options for the new device +# For the arguments, see the documentation of BlockdevOptions. # # Since: 1.7 ## -{ 'command': 'blockdev-add', 'data': { 'options': 'BlockdevOptions' } } +{ 'command': 'blockdev-add', 'data': 'BlockdevOptions', 'boxed': true } ## # @x-blockdev-del: diff --git a/tests/qemu-iotests/041 b/tests/qemu-iotests/041 index d1e1ad8bd2..30e628f0f7 100755 --- a/tests/qemu-iotests/041 +++ b/tests/qemu-iotests/041 @@ -194,10 +194,9 @@ class TestSingleBlockdev(TestSingleDrive): def setUp(self): TestSingleDrive.setUp(self) qemu_img('create', '-f', iotests.imgfmt, '-o', 'backing_file=%s' % backing_img, target_img) - args = {'options': - {'driver': iotests.imgfmt, - 'node-name': self.qmp_target, - 'file': { 'filename': target_img, 'driver': 'file' } } } + args = {'driver': iotests.imgfmt, + 'node-name': self.qmp_target, + 'file': { 'filename': target_img, 'driver': 'file' } } result = self.vm.qmp("blockdev-add", **args) self.assert_qmp(result, 'return', {}) @@ -782,8 +781,8 @@ class TestRepairQuorum(iotests.QMPTestCase): self.vm.launch() #assemble the quorum block device from the individual files - args = { "options" : { "driver": "quorum", "node-name": "quorum0", - "vote-threshold": 2, "children": [ "img0", "img1", "img2" ] } } + args = { "driver": "quorum", "node-name": "quorum0", + "vote-threshold": 2, "children": [ "img0", "img1", "img2" ] } if self.has_quorum(): result = self.vm.qmp("blockdev-add", **args) self.assert_qmp(result, 'return', {}) diff --git a/tests/qemu-iotests/067 b/tests/qemu-iotests/067 index a12125bd46..38d23fce6b 100755 --- a/tests/qemu-iotests/067 +++ b/tests/qemu-iotests/067 @@ -119,13 +119,11 @@ run_qemu < Date: Tue, 11 Oct 2016 16:12:35 +0200 Subject: block: improve error handling in raw_open Make raw_open for POSIX more consistent in handling errors by setting the error object also when qemu_open fails. The error object was set generally set in case of errors, but I guess this case was overlooked. Do the same for win32. Signed-off-by: Halil Pasic Reviewed-by: Sascha Silbe Tested-by: Marc Hartmayer (POSIX only) Reviewed-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- block/raw-posix.c | 1 + block/raw-win32.c | 1 + 2 files changed, 2 insertions(+) diff --git a/block/raw-posix.c b/block/raw-posix.c index 166e9d1ad5..f481e57f78 100644 --- a/block/raw-posix.c +++ b/block/raw-posix.c @@ -443,6 +443,7 @@ static int raw_open_common(BlockDriverState *bs, QDict *options, fd = qemu_open(filename, s->open_flags, 0644); if (fd < 0) { ret = -errno; + error_setg_errno(errp, errno, "Could not open '%s'", filename); if (ret == -EROFS) { ret = -EACCES; } diff --git a/block/raw-win32.c b/block/raw-win32.c index 734bb105bd..800fabdd72 100644 --- a/block/raw-win32.c +++ b/block/raw-win32.c @@ -373,6 +373,7 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags, if (s->hfile == INVALID_HANDLE_VALUE) { int err = GetLastError(); + error_setg_win32(errp, err, "Could not open '%s'", filename); if (err == ERROR_ACCESS_DENIED) { ret = -EACCES; } else { -- cgit v1.2.1 From 3ac2f2f765a0bc18dfb18c38a522e6123289ffc1 Mon Sep 17 00:00:00 2001 From: Pino Toscano Date: Tue, 18 Oct 2016 12:37:42 +0200 Subject: qapi: fix memory leak in bdrv_image_info_specific_dump The 'obj' result of the visitor was not properly freed, like done in other places doing a similar job. Signed-off-by: Pino Toscano Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- block/qapi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/block/qapi.c b/block/qapi.c index 6f947e3e66..50d30907a2 100644 --- a/block/qapi.c +++ b/block/qapi.c @@ -698,6 +698,7 @@ void bdrv_image_info_specific_dump(fprintf_function func_fprintf, void *f, assert(qobject_type(obj) == QTYPE_QDICT); data = qdict_get(qobject_to_qdict(obj), "data"); dump_qobject(func_fprintf, f, 1, data); + qobject_decref(obj); visit_free(v); } -- cgit v1.2.1 From 6bf77e1c2dc24da1bade16e8a9a637f3b127314d Mon Sep 17 00:00:00 2001 From: Alberto Garcia Date: Mon, 17 Oct 2016 18:46:02 +0300 Subject: throttle: Correct access to wrong BlockBackendPublic structures In 27ccdd52598290f0f8b58be56e235aff7aebfaf3 the throttling fields were moved from BlockDriverState to BlockBackend. However in a few cases the code started using throttling fields from the active BlockBackend instead of the round-robin token, making the algorithm behave incorrectly. This can cause starvation if there's a throttling group with several drives but only one of them has I/O. Cc: qemu-stable@nongnu.org Reported-by: Paolo Bonzini Signed-off-by: Alberto Garcia Reviewed-by: Paolo Bonzini Signed-off-by: Kevin Wolf --- block/throttle-groups.c | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/block/throttle-groups.c b/block/throttle-groups.c index 59545e287e..17b2efb7c7 100644 --- a/block/throttle-groups.c +++ b/block/throttle-groups.c @@ -168,6 +168,22 @@ static BlockBackend *throttle_group_next_blk(BlockBackend *blk) return blk_by_public(next); } +/* + * Return whether a BlockBackend has pending requests. + * + * This assumes that tg->lock is held. + * + * @blk: the BlockBackend + * @is_write: the type of operation (read/write) + * @ret: whether the BlockBackend has pending requests. + */ +static inline bool blk_has_pending_reqs(BlockBackend *blk, + bool is_write) +{ + const BlockBackendPublic *blkp = blk_get_public(blk); + return blkp->pending_reqs[is_write]; +} + /* Return the next BlockBackend in the round-robin sequence with pending I/O * requests. * @@ -188,7 +204,7 @@ static BlockBackend *next_throttle_token(BlockBackend *blk, bool is_write) /* get next bs round in round robin style */ token = throttle_group_next_blk(token); - while (token != start && !blkp->pending_reqs[is_write]) { + while (token != start && !blk_has_pending_reqs(token, is_write)) { token = throttle_group_next_blk(token); } @@ -196,10 +212,13 @@ static BlockBackend *next_throttle_token(BlockBackend *blk, bool is_write) * then decide the token is the current bs because chances are * the current bs get the current request queued. */ - if (token == start && !blkp->pending_reqs[is_write]) { + if (token == start && !blk_has_pending_reqs(token, is_write)) { token = blk; } + /* Either we return the original BB, or one with pending requests */ + assert(token == blk || blk_has_pending_reqs(token, is_write)); + return token; } @@ -257,7 +276,7 @@ static void schedule_next_request(BlockBackend *blk, bool is_write) /* Check if there's any pending request to schedule next */ token = next_throttle_token(blk, is_write); - if (!blkp->pending_reqs[is_write]) { + if (!blk_has_pending_reqs(token, is_write)) { return; } @@ -271,7 +290,7 @@ static void schedule_next_request(BlockBackend *blk, bool is_write) qemu_co_queue_next(&blkp->throttled_reqs[is_write])) { token = blk; } else { - ThrottleTimers *tt = &blkp->throttle_timers; + ThrottleTimers *tt = &blk_get_public(token)->throttle_timers; int64_t now = qemu_clock_get_ns(tt->clock_type); timer_mod(tt->timers[is_write], now + 1); tg->any_timer_armed[is_write] = true; -- cgit v1.2.1 From a26ddb43963e77aeebc2a4f011d27b2d9c017f21 Mon Sep 17 00:00:00 2001 From: Alberto Garcia Date: Mon, 17 Oct 2016 18:46:03 +0300 Subject: qemu-iotests: Test I/O in a single drive from a throttling group iotest 093 contains a test that creates a throttling group with several drives and performs I/O in all of them. This patch adds a new test that creates a similar setup but only performs I/O in one of the drives at the same time. This is useful to test that the round robin algorithm is behaving properly in these scenarios, and is specifically written using the regression introduced in 27ccdd52598290f0f8b58be56e as an example. Signed-off-by: Alberto Garcia Signed-off-by: Kevin Wolf --- tests/qemu-iotests/093 | 33 ++++++++++++++++++++++++++++----- tests/qemu-iotests/093.out | 4 ++-- 2 files changed, 30 insertions(+), 7 deletions(-) diff --git a/tests/qemu-iotests/093 b/tests/qemu-iotests/093 index ffcb271b36..2ed393a548 100755 --- a/tests/qemu-iotests/093 +++ b/tests/qemu-iotests/093 @@ -53,7 +53,7 @@ class ThrottleTestCase(iotests.QMPTestCase): result = self.vm.qmp("block_set_io_throttle", conv_keys=False, **params) self.assert_qmp(result, 'return', {}) - def do_test_throttle(self, ndrives, seconds, params): + def do_test_throttle(self, ndrives, seconds, params, first_drive = 0): def check_limit(limit, num): # IO throttling algorithm is discrete, allow 10% error so the test # is more robust @@ -85,12 +85,14 @@ class ThrottleTestCase(iotests.QMPTestCase): # Send I/O requests to all drives for i in range(rd_nr): for drive in range(0, ndrives): - self.vm.hmp_qemu_io("drive%d" % drive, "aio_read %d %d" % + idx = first_drive + drive + self.vm.hmp_qemu_io("drive%d" % idx, "aio_read %d %d" % (i * rq_size, rq_size)) for i in range(wr_nr): for drive in range(0, ndrives): - self.vm.hmp_qemu_io("drive%d" % drive, "aio_write %d %d" % + idx = first_drive + drive + self.vm.hmp_qemu_io("drive%d" % idx, "aio_write %d %d" % (i * rq_size, rq_size)) # We'll store the I/O stats for each drive in these arrays @@ -105,15 +107,17 @@ class ThrottleTestCase(iotests.QMPTestCase): # Read the stats before advancing the clock for i in range(0, ndrives): + idx = first_drive + i start_rd_bytes[i], start_rd_iops[i], start_wr_bytes[i], \ - start_wr_iops[i] = self.blockstats('drive%d' % i) + start_wr_iops[i] = self.blockstats('drive%d' % idx) self.vm.qtest("clock_step %d" % ns) # Read the stats after advancing the clock for i in range(0, ndrives): + idx = first_drive + i end_rd_bytes[i], end_rd_iops[i], end_wr_bytes[i], \ - end_wr_iops[i] = self.blockstats('drive%d' % i) + end_wr_iops[i] = self.blockstats('drive%d' % idx) # Check that the I/O is within the limits and evenly distributed for i in range(0, ndrives): @@ -129,6 +133,7 @@ class ThrottleTestCase(iotests.QMPTestCase): self.assertTrue(check_limit(params['iops_rd'], rd_iops)) self.assertTrue(check_limit(params['iops_wr'], wr_iops)) + # Connect N drives to a VM and test I/O in all of them def test_all(self): params = {"bps": 4096, "bps_rd": 4096, @@ -146,6 +151,24 @@ class ThrottleTestCase(iotests.QMPTestCase): self.configure_throttle(ndrives, limits) self.do_test_throttle(ndrives, 5, limits) + # Connect N drives to a VM and test I/O in just one of them a time + def test_one(self): + params = {"bps": 4096, + "bps_rd": 4096, + "bps_wr": 4096, + "iops": 10, + "iops_rd": 10, + "iops_wr": 10, + } + # Repeat the test for each one of the drives + for drive in range(0, self.max_drives): + # Pick each out of all possible params and test + for tk in params: + limits = dict([(k, 0) for k in params]) + limits[tk] = params[tk] * self.max_drives + self.configure_throttle(self.max_drives, limits) + self.do_test_throttle(1, 5, limits, drive) + def test_burst(self): params = {"bps": 4096, "bps_rd": 4096, diff --git a/tests/qemu-iotests/093.out b/tests/qemu-iotests/093.out index 914e3737bd..2f7d3902f2 100644 --- a/tests/qemu-iotests/093.out +++ b/tests/qemu-iotests/093.out @@ -1,5 +1,5 @@ -..... +....... ---------------------------------------------------------------------- -Ran 5 tests +Ran 7 tests OK -- cgit v1.2.1 From ffb31e1da7d3e044649717c4ebcb1768b5b44299 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Wed, 28 Sep 2016 22:46:42 +0200 Subject: qemu-nbd: Add --fork option Using the --fork option, one can make qemu-nbd fork the worker process. The original process will exit on error of the worker or once the worker enters the main loop. Suggested-by: Sascha Silbe Signed-off-by: Max Reitz Signed-off-by: Kevin Wolf --- qemu-nbd.c | 17 ++++++++++++++++- qemu-nbd.texi | 2 ++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/qemu-nbd.c b/qemu-nbd.c index cca4a983b7..b757dc7621 100644 --- a/qemu-nbd.c +++ b/qemu-nbd.c @@ -48,6 +48,7 @@ #define QEMU_NBD_OPT_OBJECT 260 #define QEMU_NBD_OPT_TLSCREDS 261 #define QEMU_NBD_OPT_IMAGE_OPTS 262 +#define QEMU_NBD_OPT_FORK 263 #define MBR_SIZE 512 @@ -92,6 +93,8 @@ static void usage(const char *name) " passwords and/or encryption keys\n" " -T, --trace [[enable=]][,events=][,file=]\n" " specify tracing options\n" +" --fork fork off the server process and exit the parent\n" +" once the server is running\n" #ifdef __linux__ "Kernel NBD client support:\n" " -c, --connect=DEV connect FILE to the local NBD device DEV\n" @@ -503,6 +506,7 @@ int main(int argc, char **argv) { "tls-creds", required_argument, NULL, QEMU_NBD_OPT_TLSCREDS }, { "image-opts", no_argument, NULL, QEMU_NBD_OPT_IMAGE_OPTS }, { "trace", required_argument, NULL, 'T' }, + { "fork", no_argument, NULL, QEMU_NBD_OPT_FORK }, { NULL, 0, NULL, 0 } }; int ch; @@ -524,6 +528,8 @@ int main(int argc, char **argv) bool imageOpts = false; bool writethrough = true; char *trace_file = NULL; + bool fork_process = false; + int old_stderr = -1; /* The client thread uses SIGTERM to interrupt the server. A signal * handler ensures that "qemu-nbd -v -c" exits with a nice status code. @@ -715,6 +721,9 @@ int main(int argc, char **argv) g_free(trace_file); trace_file = trace_opt_parse(optarg); break; + case QEMU_NBD_OPT_FORK: + fork_process = true; + break; } } @@ -774,7 +783,7 @@ int main(int argc, char **argv) return 0; } - if (device && !verbose) { + if ((device && !verbose) || fork_process) { int stderr_fd[2]; pid_t pid; int ret; @@ -797,6 +806,7 @@ int main(int argc, char **argv) ret = qemu_daemon(1, 0); /* Temporarily redirect stderr to the parent's pipe... */ + old_stderr = dup(STDERR_FILENO); dup2(stderr_fd[1], STDERR_FILENO); if (ret < 0) { error_report("Failed to daemonize: %s", strerror(errno)); @@ -960,6 +970,11 @@ int main(int argc, char **argv) exit(EXIT_FAILURE); } + if (fork_process) { + dup2(old_stderr, STDERR_FILENO); + close(old_stderr); + } + state = RUNNING; do { main_loop_wait(false); diff --git a/qemu-nbd.texi b/qemu-nbd.texi index 91ebf04b5b..b7a9c6d02f 100644 --- a/qemu-nbd.texi +++ b/qemu-nbd.texi @@ -86,6 +86,8 @@ the new style NBD protocol negotiation Enable mandatory TLS encryption for the server by setting the ID of the TLS credentials object previously created with the --object option. +@item --fork +Fork off the server process and exit the parent once the server is running. @item -v, --verbose Display extra debugging information @item -h, --help -- cgit v1.2.1 From 668b4406314828c8b22ffe5951c3014380d778f1 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Wed, 28 Sep 2016 22:46:43 +0200 Subject: iotests: Remove raciness from 162 With qemu-nbd's new --fork option, we no longer need to launch it the hacky way. Suggested-by: Sascha Silbe Signed-off-by: Max Reitz Signed-off-by: Kevin Wolf --- tests/qemu-iotests/162 | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/qemu-iotests/162 b/tests/qemu-iotests/162 index 0b43ea3395..c7e65936ca 100755 --- a/tests/qemu-iotests/162 +++ b/tests/qemu-iotests/162 @@ -51,8 +51,7 @@ $QEMU_IMG info 'json:{"driver": "nbd", "host": "does.not.exist.example.com", "po # This is a test for NBD's bdrv_refresh_filename() implementation: It expects # either host or path to be set, but it must not assume that they are set to # strings in the options QDict -$QEMU_NBD -k "$PWD/42" -f raw null-co:// & -sleep 0.5 +$QEMU_NBD -k "$PWD/42" -f raw --fork null-co:// $QEMU_IMG info 'json:{"driver": "nbd", "path": 42}' | grep '^image' rm -f 42 -- cgit v1.2.1 From 12ac9d9e903e1a23d59aeb18d5eb50ddcf9ec975 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Wed, 28 Sep 2016 22:46:44 +0200 Subject: iotests: Do not rely on unavailable domains in 162 There are some (mostly ISP-specific) name servers who will redirect non-existing domains to special hosts. In this case, we will get a different error message when trying to connect to such a host, which breaks test 162. 162 needed this specific error message so it can confirm that qemu was indeed trying to connect to the user-specified port. However, we can also confirm this by setting up a local NBD server on exactly that port; so we can fix the issue by doing just that. Reported-by: Stefan Hajnoczi Signed-off-by: Max Reitz Signed-off-by: Kevin Wolf --- tests/qemu-iotests/162 | 19 +++++++++++++++---- tests/qemu-iotests/162.out | 2 +- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/tests/qemu-iotests/162 b/tests/qemu-iotests/162 index c7e65936ca..f8eecb325b 100755 --- a/tests/qemu-iotests/162 +++ b/tests/qemu-iotests/162 @@ -43,10 +43,21 @@ echo '=== NBD ===' $QEMU_IMG info 'json:{"driver": "nbd", "host": 42}' # And this should not treat @port as if it had not been specified -# (We cannot use localhost with an invalid port here, but we need to use a -# non-existing domain, because otherwise the error message will not contain -# the port) -$QEMU_IMG info 'json:{"driver": "nbd", "host": "does.not.exist.example.com", "port": 42}' +# (We need to set up a server here, because the error message for "Connection +# refused" does not contain the destination port) + +# Launching qemu-nbd is done in a loop: We try to set up an NBD server on some +# random port and continue until success, i.e. until we have found a port that +# is not in use yet. +while true; do + port=$((RANDOM + 32768)) + if $QEMU_NBD -p $port -f raw --fork null-co:// 2> /dev/null; then + break + fi +done + +$QEMU_IMG info "json:{'driver': 'nbd', 'host': 'localhost', 'port': $port}" \ + | grep '^image' | sed -e "s/$port/PORT/" # This is a test for NBD's bdrv_refresh_filename() implementation: It expects # either host or path to be set, but it must not assume that they are set to diff --git a/tests/qemu-iotests/162.out b/tests/qemu-iotests/162.out index 9bba72353a..3c5be2c569 100644 --- a/tests/qemu-iotests/162.out +++ b/tests/qemu-iotests/162.out @@ -2,7 +2,7 @@ QA output created by 162 === NBD === qemu-img: Could not open 'json:{"driver": "nbd", "host": 42}': Failed to connect socket: Invalid argument -qemu-img: Could not open 'json:{"driver": "nbd", "host": "does.not.exist.example.com", "port": 42}': address resolution failed for does.not.exist.example.com:42: Name or service not known +image: nbd://localhost:PORT image: nbd+unix://?socket=42 === SSH === -- cgit v1.2.1 From 86ec252c198a62b3a1da3ac62277adf917d985cf Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 5 Oct 2016 18:35:26 +0200 Subject: quorum: change child_iter to children_read This simplifies a bit the code by using the usual C "inclusive start, exclusive end" pattern for ranges. Signed-off-by: Paolo Bonzini Message-id: 1475685327-22767-2-git-send-email-pbonzini@redhat.com Reviewed-by: Max Reitz Reviewed-by: Alberto Garcia Signed-off-by: Max Reitz --- block/quorum.c | 30 +++++++++++++----------------- 1 file changed, 13 insertions(+), 17 deletions(-) diff --git a/block/quorum.c b/block/quorum.c index 9cf876fb34..435296eef6 100644 --- a/block/quorum.c +++ b/block/quorum.c @@ -130,7 +130,7 @@ struct QuorumAIOCB { bool is_read; int vote_ret; - int child_iter; /* which child to read in fifo pattern */ + int children_read; /* how many children have been read from */ }; static bool quorum_vote(QuorumAIOCB *acb); @@ -165,8 +165,7 @@ static void quorum_aio_finalize(QuorumAIOCB *acb) acb->common.cb(acb->common.opaque, ret); if (acb->is_read) { - /* on the quorum case acb->child_iter == s->num_children - 1 */ - for (i = 0; i <= acb->child_iter; i++) { + for (i = 0; i < acb->children_read; i++) { qemu_vfree(acb->qcrs[i].buf); qemu_iovec_destroy(&acb->qcrs[i].qiov); } @@ -301,14 +300,13 @@ static void quorum_aio_cb(void *opaque, int ret) if (acb->is_read && s->read_pattern == QUORUM_READ_PATTERN_FIFO) { /* We try to read next child in FIFO order if we fail to read */ - if (ret < 0 && (acb->child_iter + 1) < s->num_children) { - acb->child_iter++; + if (ret < 0 && acb->children_read < s->num_children) { read_fifo_child(acb); return; } if (ret == 0) { - quorum_copy_qiov(acb->qiov, &acb->qcrs[acb->child_iter].qiov); + quorum_copy_qiov(acb->qiov, &acb->qcrs[acb->children_read - 1].qiov); } acb->vote_ret = ret; quorum_aio_finalize(acb); @@ -653,6 +651,7 @@ static BlockAIOCB *read_quorum_children(QuorumAIOCB *acb) BDRVQuorumState *s = acb->common.bs->opaque; int i; + acb->children_read = s->num_children; for (i = 0; i < s->num_children; i++) { acb->qcrs[i].buf = qemu_blockalign(s->children[i]->bs, acb->qiov->size); qemu_iovec_init(&acb->qcrs[i].qiov, acb->qiov->niov); @@ -671,16 +670,14 @@ static BlockAIOCB *read_quorum_children(QuorumAIOCB *acb) static BlockAIOCB *read_fifo_child(QuorumAIOCB *acb) { BDRVQuorumState *s = acb->common.bs->opaque; + int n = acb->children_read++; - acb->qcrs[acb->child_iter].buf = - qemu_blockalign(s->children[acb->child_iter]->bs, acb->qiov->size); - qemu_iovec_init(&acb->qcrs[acb->child_iter].qiov, acb->qiov->niov); - qemu_iovec_clone(&acb->qcrs[acb->child_iter].qiov, acb->qiov, - acb->qcrs[acb->child_iter].buf); - acb->qcrs[acb->child_iter].aiocb = - bdrv_aio_readv(s->children[acb->child_iter], acb->sector_num, - &acb->qcrs[acb->child_iter].qiov, acb->nb_sectors, - quorum_aio_cb, &acb->qcrs[acb->child_iter]); + acb->qcrs[n].buf = qemu_blockalign(s->children[n]->bs, acb->qiov->size); + qemu_iovec_init(&acb->qcrs[n].qiov, acb->qiov->niov); + qemu_iovec_clone(&acb->qcrs[n].qiov, acb->qiov, acb->qcrs[n].buf); + acb->qcrs[n].aiocb = bdrv_aio_readv(s->children[n], acb->sector_num, + &acb->qcrs[n].qiov, acb->nb_sectors, + quorum_aio_cb, &acb->qcrs[n]); return &acb->common; } @@ -696,13 +693,12 @@ static BlockAIOCB *quorum_aio_readv(BlockDriverState *bs, QuorumAIOCB *acb = quorum_aio_get(s, bs, qiov, sector_num, nb_sectors, cb, opaque); acb->is_read = true; + acb->children_read = 0; if (s->read_pattern == QUORUM_READ_PATTERN_QUORUM) { - acb->child_iter = s->num_children - 1; return read_quorum_children(acb); } - acb->child_iter = 0; return read_fifo_child(acb); } -- cgit v1.2.1 From 1ba7e159787d7c96a8783584395b670c22bba4ef Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 5 Oct 2016 18:35:27 +0200 Subject: quorum: do not allocate multiple iovecs for FIFO strategy In FIFO mode there are no parallel reads, hence there is no need to allocate separate buffers and clone the iovecs. The two cases of quorum_aio_cb are now even more different, and most of quorum_aio_finalize is only needed in one of them, so split them in separate functions. Signed-off-by: Paolo Bonzini Message-id: 1475685327-22767-3-git-send-email-pbonzini@redhat.com Reviewed-by: Max Reitz Reviewed-by: Alberto Garcia Signed-off-by: Max Reitz --- block/quorum.c | 79 +++++++++++++++++++++++++++++----------------------------- 1 file changed, 40 insertions(+), 39 deletions(-) diff --git a/block/quorum.c b/block/quorum.c index 435296eef6..d122299352 100644 --- a/block/quorum.c +++ b/block/quorum.c @@ -156,21 +156,7 @@ static AIOCBInfo quorum_aiocb_info = { static void quorum_aio_finalize(QuorumAIOCB *acb) { - int i, ret = 0; - - if (acb->vote_ret) { - ret = acb->vote_ret; - } - - acb->common.cb(acb->common.opaque, ret); - - if (acb->is_read) { - for (i = 0; i < acb->children_read; i++) { - qemu_vfree(acb->qcrs[i].buf); - qemu_iovec_destroy(&acb->qcrs[i].qiov); - } - } - + acb->common.cb(acb->common.opaque, acb->vote_ret); g_free(acb->qcrs); qemu_aio_unref(acb); } @@ -282,38 +268,52 @@ static void quorum_copy_qiov(QEMUIOVector *dest, QEMUIOVector *source) } } -static void quorum_aio_cb(void *opaque, int ret) +static void quorum_report_bad_acb(QuorumChildRequest *sacb, int ret) +{ + QuorumAIOCB *acb = sacb->parent; + QuorumOpType type = acb->is_read ? QUORUM_OP_TYPE_READ : QUORUM_OP_TYPE_WRITE; + quorum_report_bad(type, acb->sector_num, acb->nb_sectors, + sacb->aiocb->bs->node_name, ret); +} + +static void quorum_fifo_aio_cb(void *opaque, int ret) { QuorumChildRequest *sacb = opaque; QuorumAIOCB *acb = sacb->parent; BDRVQuorumState *s = acb->common.bs->opaque; - bool rewrite = false; - if (ret == 0) { - acb->success_count++; - } else { - QuorumOpType type; - type = acb->is_read ? QUORUM_OP_TYPE_READ : QUORUM_OP_TYPE_WRITE; - quorum_report_bad(type, acb->sector_num, acb->nb_sectors, - sacb->aiocb->bs->node_name, ret); - } + assert(acb->is_read && s->read_pattern == QUORUM_READ_PATTERN_FIFO); + + if (ret < 0) { + quorum_report_bad_acb(sacb, ret); - if (acb->is_read && s->read_pattern == QUORUM_READ_PATTERN_FIFO) { /* We try to read next child in FIFO order if we fail to read */ - if (ret < 0 && acb->children_read < s->num_children) { + if (acb->children_read < s->num_children) { read_fifo_child(acb); return; } - - if (ret == 0) { - quorum_copy_qiov(acb->qiov, &acb->qcrs[acb->children_read - 1].qiov); - } - acb->vote_ret = ret; - quorum_aio_finalize(acb); - return; } + acb->vote_ret = ret; + + /* FIXME: rewrite failed children if acb->children_read > 1? */ + quorum_aio_finalize(acb); +} + +static void quorum_aio_cb(void *opaque, int ret) +{ + QuorumChildRequest *sacb = opaque; + QuorumAIOCB *acb = sacb->parent; + BDRVQuorumState *s = acb->common.bs->opaque; + bool rewrite = false; + int i; + sacb->ret = ret; + if (ret == 0) { + acb->success_count++; + } else { + quorum_report_bad_acb(sacb, ret); + } acb->count++; assert(acb->count <= s->num_children); assert(acb->success_count <= s->num_children); @@ -324,6 +324,10 @@ static void quorum_aio_cb(void *opaque, int ret) /* Do the vote on read */ if (acb->is_read) { rewrite = quorum_vote(acb); + for (i = 0; i < s->num_children; i++) { + qemu_vfree(acb->qcrs[i].buf); + qemu_iovec_destroy(&acb->qcrs[i].qiov); + } } else { quorum_has_too_much_io_failed(acb); } @@ -672,12 +676,9 @@ static BlockAIOCB *read_fifo_child(QuorumAIOCB *acb) BDRVQuorumState *s = acb->common.bs->opaque; int n = acb->children_read++; - acb->qcrs[n].buf = qemu_blockalign(s->children[n]->bs, acb->qiov->size); - qemu_iovec_init(&acb->qcrs[n].qiov, acb->qiov->niov); - qemu_iovec_clone(&acb->qcrs[n].qiov, acb->qiov, acb->qcrs[n].buf); acb->qcrs[n].aiocb = bdrv_aio_readv(s->children[n], acb->sector_num, - &acb->qcrs[n].qiov, acb->nb_sectors, - quorum_aio_cb, &acb->qcrs[n]); + acb->qiov, acb->nb_sectors, + quorum_fifo_aio_cb, &acb->qcrs[n]); return &acb->common; } -- cgit v1.2.1 From dc162c8e4f088b08575460cca35b042d58c141aa Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Thu, 13 Oct 2016 17:58:21 -0400 Subject: block: Hide HBitmap in block dirty bitmap interface HBitmap is an implementation detail of block dirty bitmap that should be hidden from users. Introduce a BdrvDirtyBitmapIter to encapsulate the underlying HBitmapIter. A small difference in the interface is, before, an HBitmapIter is initialized in place, now the new BdrvDirtyBitmapIter must be dynamically allocated because the structure definition is in block/dirty-bitmap.c. Two current users are converted too. Signed-off-by: Fam Zheng Reviewed-by: Max Reitz Signed-off-by: John Snow Message-id: 1476395910-8697-2-git-send-email-jsnow@redhat.com Signed-off-by: Max Reitz --- block/backup.c | 14 ++++++++------ block/dirty-bitmap.c | 39 +++++++++++++++++++++++++++++++++------ block/mirror.c | 24 +++++++++++++----------- include/block/dirty-bitmap.h | 7 +++++-- include/qemu/typedefs.h | 1 + 5 files changed, 60 insertions(+), 25 deletions(-) diff --git a/block/backup.c b/block/backup.c index 582bd0f7ee..02dbe48035 100644 --- a/block/backup.c +++ b/block/backup.c @@ -372,14 +372,14 @@ static int coroutine_fn backup_run_incremental(BackupBlockJob *job) int64_t end; int64_t last_cluster = -1; int64_t sectors_per_cluster = cluster_size_sectors(job); - HBitmapIter hbi; + BdrvDirtyBitmapIter *dbi; granularity = bdrv_dirty_bitmap_granularity(job->sync_bitmap); clusters_per_iter = MAX((granularity / job->cluster_size), 1); - bdrv_dirty_iter_init(job->sync_bitmap, &hbi); + dbi = bdrv_dirty_iter_new(job->sync_bitmap, 0); /* Find the next dirty sector(s) */ - while ((sector = hbitmap_iter_next(&hbi)) != -1) { + while ((sector = bdrv_dirty_iter_next(dbi)) != -1) { cluster = sector / sectors_per_cluster; /* Fake progress updates for any clusters we skipped */ @@ -391,7 +391,7 @@ static int coroutine_fn backup_run_incremental(BackupBlockJob *job) for (end = cluster + clusters_per_iter; cluster < end; cluster++) { do { if (yield_and_check(job)) { - return ret; + goto out; } ret = backup_do_cow(job, cluster * sectors_per_cluster, sectors_per_cluster, &error_is_read, @@ -399,7 +399,7 @@ static int coroutine_fn backup_run_incremental(BackupBlockJob *job) if ((ret < 0) && backup_error_action(job, error_is_read, -ret) == BLOCK_ERROR_ACTION_REPORT) { - return ret; + goto out; } } while (ret < 0); } @@ -407,7 +407,7 @@ static int coroutine_fn backup_run_incremental(BackupBlockJob *job) /* If the bitmap granularity is smaller than the backup granularity, * we need to advance the iterator pointer to the next cluster. */ if (granularity < job->cluster_size) { - bdrv_set_dirty_iter(&hbi, cluster * sectors_per_cluster); + bdrv_set_dirty_iter(dbi, cluster * sectors_per_cluster); } last_cluster = cluster - 1; @@ -419,6 +419,8 @@ static int coroutine_fn backup_run_incremental(BackupBlockJob *job) job->common.offset += ((end - last_cluster - 1) * job->cluster_size); } +out: + bdrv_dirty_iter_free(dbi); return ret; } diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c index f2bfdcfdea..c572dfa941 100644 --- a/block/dirty-bitmap.c +++ b/block/dirty-bitmap.c @@ -42,9 +42,15 @@ struct BdrvDirtyBitmap { char *name; /* Optional non-empty unique ID */ int64_t size; /* Size of the bitmap (Number of sectors) */ bool disabled; /* Bitmap is read-only */ + int active_iterators; /* How many iterators are active */ QLIST_ENTRY(BdrvDirtyBitmap) list; }; +struct BdrvDirtyBitmapIter { + HBitmapIter hbi; + BdrvDirtyBitmap *bitmap; +}; + BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs, const char *name) { BdrvDirtyBitmap *bm; @@ -212,6 +218,7 @@ void bdrv_dirty_bitmap_truncate(BlockDriverState *bs) QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) { assert(!bdrv_dirty_bitmap_frozen(bitmap)); + assert(!bitmap->active_iterators); hbitmap_truncate(bitmap->bitmap, size); bitmap->size = size; } @@ -224,6 +231,7 @@ static void bdrv_do_release_matching_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bm, *next; QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) { if ((!bitmap || bm == bitmap) && (!only_named || bm->name)) { + assert(!bm->active_iterators); assert(!bdrv_dirty_bitmap_frozen(bm)); QLIST_REMOVE(bm, list); hbitmap_free(bm->bitmap); @@ -320,9 +328,29 @@ uint32_t bdrv_dirty_bitmap_granularity(BdrvDirtyBitmap *bitmap) return BDRV_SECTOR_SIZE << hbitmap_granularity(bitmap->bitmap); } -void bdrv_dirty_iter_init(BdrvDirtyBitmap *bitmap, HBitmapIter *hbi) +BdrvDirtyBitmapIter *bdrv_dirty_iter_new(BdrvDirtyBitmap *bitmap, + uint64_t first_sector) +{ + BdrvDirtyBitmapIter *iter = g_new(BdrvDirtyBitmapIter, 1); + hbitmap_iter_init(&iter->hbi, bitmap->bitmap, first_sector); + iter->bitmap = bitmap; + bitmap->active_iterators++; + return iter; +} + +void bdrv_dirty_iter_free(BdrvDirtyBitmapIter *iter) +{ + if (!iter) { + return; + } + assert(iter->bitmap->active_iterators > 0); + iter->bitmap->active_iterators--; + g_free(iter); +} + +int64_t bdrv_dirty_iter_next(BdrvDirtyBitmapIter *iter) { - hbitmap_iter_init(hbi, bitmap->bitmap, 0); + return hbitmap_iter_next(&iter->hbi); } void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap, @@ -373,12 +401,11 @@ void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, } /** - * Advance an HBitmapIter to an arbitrary offset. + * Advance a BdrvDirtyBitmapIter to an arbitrary offset. */ -void bdrv_set_dirty_iter(HBitmapIter *hbi, int64_t offset) +void bdrv_set_dirty_iter(BdrvDirtyBitmapIter *iter, int64_t sector_num) { - assert(hbi->hb); - hbitmap_iter_init(hbi, hbi->hb, offset); + hbitmap_iter_init(&iter->hbi, iter->hbi.hb, sector_num); } int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap) diff --git a/block/mirror.c b/block/mirror.c index f9d1fecaa0..a433e6848c 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -55,7 +55,7 @@ typedef struct MirrorBlockJob { int64_t bdev_length; unsigned long *cow_bitmap; BdrvDirtyBitmap *dirty_bitmap; - HBitmapIter hbi; + BdrvDirtyBitmapIter *dbi; uint8_t *buf; QSIMPLEQ_HEAD(, MirrorBuffer) buf_free; int buf_free_count; @@ -330,10 +330,10 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) int max_io_sectors = MAX((s->buf_size >> BDRV_SECTOR_BITS) / MAX_IN_FLIGHT, MAX_IO_SECTORS); - sector_num = hbitmap_iter_next(&s->hbi); + sector_num = bdrv_dirty_iter_next(s->dbi); if (sector_num < 0) { - bdrv_dirty_iter_init(s->dirty_bitmap, &s->hbi); - sector_num = hbitmap_iter_next(&s->hbi); + bdrv_set_dirty_iter(s->dbi, 0); + sector_num = bdrv_dirty_iter_next(s->dbi); trace_mirror_restart_iter(s, bdrv_get_dirty_count(s->dirty_bitmap)); assert(sector_num >= 0); } @@ -349,7 +349,7 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) /* Find the number of consective dirty chunks following the first dirty * one, and wait for in flight requests in them. */ while (nb_chunks * sectors_per_chunk < (s->buf_size >> BDRV_SECTOR_BITS)) { - int64_t hbitmap_next; + int64_t next_dirty; int64_t next_sector = sector_num + nb_chunks * sectors_per_chunk; int64_t next_chunk = next_sector / sectors_per_chunk; if (next_sector >= end || @@ -360,13 +360,13 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) break; } - hbitmap_next = hbitmap_iter_next(&s->hbi); - if (hbitmap_next > next_sector || hbitmap_next < 0) { + next_dirty = bdrv_dirty_iter_next(s->dbi); + if (next_dirty > next_sector || next_dirty < 0) { /* The bitmap iterator's cache is stale, refresh it */ - bdrv_set_dirty_iter(&s->hbi, next_sector); - hbitmap_next = hbitmap_iter_next(&s->hbi); + bdrv_set_dirty_iter(s->dbi, next_sector); + next_dirty = bdrv_dirty_iter_next(s->dbi); } - assert(hbitmap_next == next_sector); + assert(next_dirty == next_sector); nb_chunks++; } @@ -679,7 +679,8 @@ static void coroutine_fn mirror_run(void *opaque) } } - bdrv_dirty_iter_init(s->dirty_bitmap, &s->hbi); + assert(!s->dbi); + s->dbi = bdrv_dirty_iter_new(s->dirty_bitmap, 0); for (;;) { uint64_t delay_ns = 0; int64_t cnt, delta; @@ -793,6 +794,7 @@ immediate_exit: qemu_vfree(s->buf); g_free(s->cow_bitmap); g_free(s->in_flight_bitmap); + bdrv_dirty_iter_free(s->dbi); bdrv_release_dirty_bitmap(bs, s->dirty_bitmap); data = g_malloc(sizeof(*data)); diff --git a/include/block/dirty-bitmap.h b/include/block/dirty-bitmap.h index ee3388f90d..0ef927d6ce 100644 --- a/include/block/dirty-bitmap.h +++ b/include/block/dirty-bitmap.h @@ -36,8 +36,11 @@ void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap, int64_t cur_sector, int64_t nr_sectors); void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap, int64_t cur_sector, int64_t nr_sectors); -void bdrv_dirty_iter_init(BdrvDirtyBitmap *bitmap, struct HBitmapIter *hbi); -void bdrv_set_dirty_iter(struct HBitmapIter *hbi, int64_t offset); +BdrvDirtyBitmapIter *bdrv_dirty_iter_new(BdrvDirtyBitmap *bitmap, + uint64_t first_sector); +void bdrv_dirty_iter_free(BdrvDirtyBitmapIter *iter); +int64_t bdrv_dirty_iter_next(BdrvDirtyBitmapIter *iter); +void bdrv_set_dirty_iter(BdrvDirtyBitmapIter *hbi, int64_t sector_num); int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap); void bdrv_dirty_bitmap_truncate(BlockDriverState *bs); diff --git a/include/qemu/typedefs.h b/include/qemu/typedefs.h index b113fcf156..1b8c30a7a0 100644 --- a/include/qemu/typedefs.h +++ b/include/qemu/typedefs.h @@ -11,6 +11,7 @@ typedef struct AioContext AioContext; typedef struct AllwinnerAHCIState AllwinnerAHCIState; typedef struct AudioState AudioState; typedef struct BdrvDirtyBitmap BdrvDirtyBitmap; +typedef struct BdrvDirtyBitmapIter BdrvDirtyBitmapIter; typedef struct BlockBackend BlockBackend; typedef struct BlockBackendRootState BlockBackendRootState; typedef struct BlockDriverState BlockDriverState; -- cgit v1.2.1 From 07ac4cdb570c9cf6aed0338414f4f09fee0b549c Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Thu, 13 Oct 2016 17:58:22 -0400 Subject: HBitmap: Introduce "meta" bitmap to track bit changes Upon each bit toggle, the corresponding bit in the meta bitmap will be set. Signed-off-by: Fam Zheng [Amended text inline. --js] Reviewed-by: Max Reitz Signed-off-by: John Snow Message-id: 1476395910-8697-3-git-send-email-jsnow@redhat.com Signed-off-by: Max Reitz --- include/qemu/hbitmap.h | 21 +++++++++++++++ util/hbitmap.c | 69 +++++++++++++++++++++++++++++++++++++++----------- 2 files changed, 75 insertions(+), 15 deletions(-) diff --git a/include/qemu/hbitmap.h b/include/qemu/hbitmap.h index 8ab721e5aa..17259191b6 100644 --- a/include/qemu/hbitmap.h +++ b/include/qemu/hbitmap.h @@ -178,6 +178,27 @@ void hbitmap_iter_init(HBitmapIter *hbi, const HBitmap *hb, uint64_t first); */ unsigned long hbitmap_iter_skip_words(HBitmapIter *hbi); +/* hbitmap_create_meta: + * Create a "meta" hbitmap to track dirtiness of the bits in this HBitmap. + * The caller owns the created bitmap and must call hbitmap_free_meta(hb) to + * free it. + * + * Currently, we only guarantee that if a bit in the hbitmap is changed it + * will be reflected in the meta bitmap, but we do not yet guarantee the + * opposite. + * + * @hb: The HBitmap to operate on. + * @chunk_size: How many bits in @hb does one bit in the meta track. + */ +HBitmap *hbitmap_create_meta(HBitmap *hb, int chunk_size); + +/* hbitmap_free_meta: + * Free the meta bitmap of @hb. + * + * @hb: The HBitmap whose meta bitmap should be freed. + */ +void hbitmap_free_meta(HBitmap *hb); + /** * hbitmap_iter_next: * @hbi: HBitmapIter to operate on. diff --git a/util/hbitmap.c b/util/hbitmap.c index 99fd2ba37b..f303975371 100644 --- a/util/hbitmap.c +++ b/util/hbitmap.c @@ -78,6 +78,9 @@ struct HBitmap { */ int granularity; + /* A meta dirty bitmap to track the dirtiness of bits in this HBitmap. */ + HBitmap *meta; + /* A number of progressively less coarse bitmaps (i.e. level 0 is the * coarsest). Each bit in level N represents a word in level N+1 that * has a set bit, except the last level where each bit represents the @@ -209,25 +212,27 @@ static uint64_t hb_count_between(HBitmap *hb, uint64_t start, uint64_t last) } /* Setting starts at the last layer and propagates up if an element - * changes from zero to non-zero. + * changes. */ static inline bool hb_set_elem(unsigned long *elem, uint64_t start, uint64_t last) { unsigned long mask; - bool changed; + unsigned long old; assert((last >> BITS_PER_LEVEL) == (start >> BITS_PER_LEVEL)); assert(start <= last); mask = 2UL << (last & (BITS_PER_LONG - 1)); mask -= 1UL << (start & (BITS_PER_LONG - 1)); - changed = (*elem == 0); + old = *elem; *elem |= mask; - return changed; + return old != *elem; } -/* The recursive workhorse (the depth is limited to HBITMAP_LEVELS)... */ -static void hb_set_between(HBitmap *hb, int level, uint64_t start, uint64_t last) +/* The recursive workhorse (the depth is limited to HBITMAP_LEVELS)... + * Returns true if at least one bit is changed. */ +static bool hb_set_between(HBitmap *hb, int level, uint64_t start, + uint64_t last) { size_t pos = start >> BITS_PER_LEVEL; size_t lastpos = last >> BITS_PER_LEVEL; @@ -256,23 +261,28 @@ static void hb_set_between(HBitmap *hb, int level, uint64_t start, uint64_t last if (level > 0 && changed) { hb_set_between(hb, level - 1, pos, lastpos); } + return changed; } void hbitmap_set(HBitmap *hb, uint64_t start, uint64_t count) { /* Compute range in the last layer. */ + uint64_t first, n; uint64_t last = start + count - 1; trace_hbitmap_set(hb, start, count, start >> hb->granularity, last >> hb->granularity); - start >>= hb->granularity; + first = start >> hb->granularity; last >>= hb->granularity; - count = last - start + 1; assert(last < hb->size); + n = last - first + 1; - hb->count += count - hb_count_between(hb, start, last); - hb_set_between(hb, HBITMAP_LEVELS - 1, start, last); + hb->count += n - hb_count_between(hb, first, last); + if (hb_set_between(hb, HBITMAP_LEVELS - 1, first, last) && + hb->meta) { + hbitmap_set(hb->meta, start, count); + } } /* Resetting works the other way round: propagate up if the new @@ -293,8 +303,10 @@ static inline bool hb_reset_elem(unsigned long *elem, uint64_t start, uint64_t l return blanked; } -/* The recursive workhorse (the depth is limited to HBITMAP_LEVELS)... */ -static void hb_reset_between(HBitmap *hb, int level, uint64_t start, uint64_t last) +/* The recursive workhorse (the depth is limited to HBITMAP_LEVELS)... + * Returns true if at least one bit is changed. */ +static bool hb_reset_between(HBitmap *hb, int level, uint64_t start, + uint64_t last) { size_t pos = start >> BITS_PER_LEVEL; size_t lastpos = last >> BITS_PER_LEVEL; @@ -337,22 +349,29 @@ static void hb_reset_between(HBitmap *hb, int level, uint64_t start, uint64_t la if (level > 0 && changed) { hb_reset_between(hb, level - 1, pos, lastpos); } + + return changed; + } void hbitmap_reset(HBitmap *hb, uint64_t start, uint64_t count) { /* Compute range in the last layer. */ + uint64_t first; uint64_t last = start + count - 1; trace_hbitmap_reset(hb, start, count, start >> hb->granularity, last >> hb->granularity); - start >>= hb->granularity; + first = start >> hb->granularity; last >>= hb->granularity; assert(last < hb->size); - hb->count -= hb_count_between(hb, start, last); - hb_reset_between(hb, HBITMAP_LEVELS - 1, start, last); + hb->count -= hb_count_between(hb, first, last); + if (hb_reset_between(hb, HBITMAP_LEVELS - 1, first, last) && + hb->meta) { + hbitmap_set(hb->meta, start, count); + } } void hbitmap_reset_all(HBitmap *hb) @@ -381,6 +400,7 @@ bool hbitmap_get(const HBitmap *hb, uint64_t item) void hbitmap_free(HBitmap *hb) { unsigned i; + assert(!hb->meta); for (i = HBITMAP_LEVELS; i-- > 0; ) { g_free(hb->levels[i]); } @@ -458,6 +478,9 @@ void hbitmap_truncate(HBitmap *hb, uint64_t size) (size - old) * sizeof(*hb->levels[i])); } } + if (hb->meta) { + hbitmap_truncate(hb->meta, hb->size << hb->granularity); + } } @@ -493,3 +516,19 @@ bool hbitmap_merge(HBitmap *a, const HBitmap *b) return true; } + +HBitmap *hbitmap_create_meta(HBitmap *hb, int chunk_size) +{ + assert(!(chunk_size & (chunk_size - 1))); + assert(!hb->meta); + hb->meta = hbitmap_alloc(hb->size << hb->granularity, + hb->granularity + ctz32(chunk_size)); + return hb->meta; +} + +void hbitmap_free_meta(HBitmap *hb) +{ + assert(hb->meta); + hbitmap_free(hb->meta); + hb->meta = NULL; +} -- cgit v1.2.1 From 4b62818a4f3075fce1e9335d91beef737a8fc9d3 Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Thu, 13 Oct 2016 17:58:23 -0400 Subject: tests: Add test code for meta bitmap Signed-off-by: Fam Zheng Reviewed-by: John Snow Reviewed-by: Max Reitz Signed-off-by: John Snow Message-id: 1476395910-8697-4-git-send-email-jsnow@redhat.com Signed-off-by: Max Reitz --- tests/test-hbitmap.c | 116 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 116 insertions(+) diff --git a/tests/test-hbitmap.c b/tests/test-hbitmap.c index c0e9895fb9..e3abde1aae 100644 --- a/tests/test-hbitmap.c +++ b/tests/test-hbitmap.c @@ -11,6 +11,7 @@ #include "qemu/osdep.h" #include "qemu/hbitmap.h" +#include "block/block.h" #define LOG_BITS_PER_LONG (BITS_PER_LONG == 32 ? 5 : 6) @@ -20,6 +21,7 @@ typedef struct TestHBitmapData { HBitmap *hb; + HBitmap *meta; unsigned long *bits; size_t size; size_t old_size; @@ -91,6 +93,14 @@ static void hbitmap_test_init(TestHBitmapData *data, } } +static void hbitmap_test_init_meta(TestHBitmapData *data, + uint64_t size, int granularity, + int meta_chunk) +{ + hbitmap_test_init(data, size, granularity); + data->meta = hbitmap_create_meta(data->hb, meta_chunk); +} + static inline size_t hbitmap_test_array_size(size_t bits) { size_t n = DIV_ROUND_UP(bits, BITS_PER_LONG); @@ -133,6 +143,9 @@ static void hbitmap_test_teardown(TestHBitmapData *data, const void *unused) { if (data->hb) { + if (data->meta) { + hbitmap_free_meta(data->hb); + } hbitmap_free(data->hb); data->hb = NULL; } @@ -634,6 +647,103 @@ static void test_hbitmap_truncate_shrink_large(TestHBitmapData *data, hbitmap_test_truncate(data, size, -diff, 0); } +static void hbitmap_check_meta(TestHBitmapData *data, + int64_t start, int count) +{ + int64_t i; + + for (i = 0; i < data->size; i++) { + if (i >= start && i < start + count) { + g_assert(hbitmap_get(data->meta, i)); + } else { + g_assert(!hbitmap_get(data->meta, i)); + } + } +} + +static void hbitmap_test_meta(TestHBitmapData *data, + int64_t start, int count, + int64_t check_start, int check_count) +{ + hbitmap_reset_all(data->hb); + hbitmap_reset_all(data->meta); + + /* Test "unset" -> "unset" will not update meta. */ + hbitmap_reset(data->hb, start, count); + hbitmap_check_meta(data, 0, 0); + + /* Test "unset" -> "set" will update meta */ + hbitmap_set(data->hb, start, count); + hbitmap_check_meta(data, check_start, check_count); + + /* Test "set" -> "set" will not update meta */ + hbitmap_reset_all(data->meta); + hbitmap_set(data->hb, start, count); + hbitmap_check_meta(data, 0, 0); + + /* Test "set" -> "unset" will update meta */ + hbitmap_reset_all(data->meta); + hbitmap_reset(data->hb, start, count); + hbitmap_check_meta(data, check_start, check_count); +} + +static void hbitmap_test_meta_do(TestHBitmapData *data, int chunk_size) +{ + uint64_t size = chunk_size * 100; + hbitmap_test_init_meta(data, size, 0, chunk_size); + + hbitmap_test_meta(data, 0, 1, 0, chunk_size); + hbitmap_test_meta(data, 0, chunk_size, 0, chunk_size); + hbitmap_test_meta(data, chunk_size - 1, 1, 0, chunk_size); + hbitmap_test_meta(data, chunk_size - 1, 2, 0, chunk_size * 2); + hbitmap_test_meta(data, chunk_size - 1, chunk_size + 1, 0, chunk_size * 2); + hbitmap_test_meta(data, chunk_size - 1, chunk_size + 2, 0, chunk_size * 3); + hbitmap_test_meta(data, 7 * chunk_size - 1, chunk_size + 2, + 6 * chunk_size, chunk_size * 3); + hbitmap_test_meta(data, size - 1, 1, size - chunk_size, chunk_size); + hbitmap_test_meta(data, 0, size, 0, size); +} + +static void test_hbitmap_meta_byte(TestHBitmapData *data, const void *unused) +{ + hbitmap_test_meta_do(data, BITS_PER_BYTE); +} + +static void test_hbitmap_meta_word(TestHBitmapData *data, const void *unused) +{ + hbitmap_test_meta_do(data, BITS_PER_LONG); +} + +static void test_hbitmap_meta_sector(TestHBitmapData *data, const void *unused) +{ + hbitmap_test_meta_do(data, BDRV_SECTOR_SIZE * BITS_PER_BYTE); +} + +/** + * Create an HBitmap and test set/unset. + */ +static void test_hbitmap_meta_one(TestHBitmapData *data, const void *unused) +{ + int i; + int64_t offsets[] = { + 0, 1, L1 - 1, L1, L1 + 1, L2 - 1, L2, L2 + 1, L3 - 1, L3, L3 + 1 + }; + + hbitmap_test_init_meta(data, L3 * 2, 0, 1); + for (i = 0; i < ARRAY_SIZE(offsets); i++) { + hbitmap_test_meta(data, offsets[i], 1, offsets[i], 1); + hbitmap_test_meta(data, offsets[i], L1, offsets[i], L1); + hbitmap_test_meta(data, offsets[i], L2, offsets[i], L2); + } +} + +static void test_hbitmap_meta_zero(TestHBitmapData *data, const void *unused) +{ + hbitmap_test_init_meta(data, 0, 0, 1); + + hbitmap_check_meta(data, 0, 0); +} + static void hbitmap_test_add(const char *testpath, void (*test_func)(TestHBitmapData *data, const void *user_data)) { @@ -683,6 +793,12 @@ int main(int argc, char **argv) test_hbitmap_truncate_grow_large); hbitmap_test_add("/hbitmap/truncate/shrink/large", test_hbitmap_truncate_shrink_large); + + hbitmap_test_add("/hbitmap/meta/zero", test_hbitmap_meta_zero); + hbitmap_test_add("/hbitmap/meta/one", test_hbitmap_meta_one); + hbitmap_test_add("/hbitmap/meta/byte", test_hbitmap_meta_byte); + hbitmap_test_add("/hbitmap/meta/word", test_hbitmap_meta_word); + hbitmap_test_add("/hbitmap/meta/sector", test_hbitmap_meta_sector); g_test_run(); return 0; -- cgit v1.2.1 From fb933437de327a9a322d4c2473d0c23aa8d9a267 Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Thu, 13 Oct 2016 17:58:24 -0400 Subject: block: Support meta dirty bitmap The added group of operations enables tracking of the changed bits in the dirty bitmap. Signed-off-by: Fam Zheng Reviewed-by: Max Reitz Signed-off-by: John Snow Message-id: 1476395910-8697-5-git-send-email-jsnow@redhat.com Signed-off-by: Max Reitz --- block/dirty-bitmap.c | 52 ++++++++++++++++++++++++++++++++++++++++++++ include/block/dirty-bitmap.h | 9 ++++++++ 2 files changed, 61 insertions(+) diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c index c572dfa941..9c6febbcb7 100644 --- a/block/dirty-bitmap.c +++ b/block/dirty-bitmap.c @@ -38,6 +38,7 @@ */ struct BdrvDirtyBitmap { HBitmap *bitmap; /* Dirty sector bitmap implementation */ + HBitmap *meta; /* Meta dirty bitmap */ BdrvDirtyBitmap *successor; /* Anonymous child; implies frozen status */ char *name; /* Optional non-empty unique ID */ int64_t size; /* Size of the bitmap (Number of sectors) */ @@ -103,6 +104,56 @@ BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, return bitmap; } +/* bdrv_create_meta_dirty_bitmap + * + * Create a meta dirty bitmap that tracks the changes of bits in @bitmap. I.e. + * when a dirty status bit in @bitmap is changed (either from reset to set or + * the other way around), its respective meta dirty bitmap bit will be marked + * dirty as well. + * + * @bitmap: the block dirty bitmap for which to create a meta dirty bitmap. + * @chunk_size: how many bytes of bitmap data does each bit in the meta bitmap + * track. + */ +void bdrv_create_meta_dirty_bitmap(BdrvDirtyBitmap *bitmap, + int chunk_size) +{ + assert(!bitmap->meta); + bitmap->meta = hbitmap_create_meta(bitmap->bitmap, + chunk_size * BITS_PER_BYTE); +} + +void bdrv_release_meta_dirty_bitmap(BdrvDirtyBitmap *bitmap) +{ + assert(bitmap->meta); + hbitmap_free_meta(bitmap->bitmap); + bitmap->meta = NULL; +} + +int bdrv_dirty_bitmap_get_meta(BlockDriverState *bs, + BdrvDirtyBitmap *bitmap, int64_t sector, + int nb_sectors) +{ + uint64_t i; + int sectors_per_bit = 1 << hbitmap_granularity(bitmap->meta); + + /* To optimize: we can make hbitmap to internally check the range in a + * coarse level, or at least do it word by word. */ + for (i = sector; i < sector + nb_sectors; i += sectors_per_bit) { + if (hbitmap_get(bitmap->meta, i)) { + return true; + } + } + return false; +} + +void bdrv_dirty_bitmap_reset_meta(BlockDriverState *bs, + BdrvDirtyBitmap *bitmap, int64_t sector, + int nb_sectors) +{ + hbitmap_reset(bitmap->meta, sector, nb_sectors); +} + bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap) { return bitmap->successor; @@ -233,6 +284,7 @@ static void bdrv_do_release_matching_dirty_bitmap(BlockDriverState *bs, if ((!bitmap || bm == bitmap) && (!only_named || bm->name)) { assert(!bm->active_iterators); assert(!bdrv_dirty_bitmap_frozen(bm)); + assert(!bm->meta); QLIST_REMOVE(bm, list); hbitmap_free(bm->bitmap); g_free(bm->name); diff --git a/include/block/dirty-bitmap.h b/include/block/dirty-bitmap.h index 0ef927d6ce..69c500ba2f 100644 --- a/include/block/dirty-bitmap.h +++ b/include/block/dirty-bitmap.h @@ -8,6 +8,9 @@ BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, uint32_t granularity, const char *name, Error **errp); +void bdrv_create_meta_dirty_bitmap(BdrvDirtyBitmap *bitmap, + int chunk_size); +void bdrv_release_meta_dirty_bitmap(BdrvDirtyBitmap *bitmap); int bdrv_dirty_bitmap_create_successor(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, Error **errp); @@ -36,6 +39,12 @@ void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap, int64_t cur_sector, int64_t nr_sectors); void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap, int64_t cur_sector, int64_t nr_sectors); +int bdrv_dirty_bitmap_get_meta(BlockDriverState *bs, + BdrvDirtyBitmap *bitmap, int64_t sector, + int nb_sectors); +void bdrv_dirty_bitmap_reset_meta(BlockDriverState *bs, + BdrvDirtyBitmap *bitmap, int64_t sector, + int nb_sectors); BdrvDirtyBitmapIter *bdrv_dirty_iter_new(BdrvDirtyBitmap *bitmap, uint64_t first_sector); void bdrv_dirty_iter_free(BdrvDirtyBitmapIter *iter); -- cgit v1.2.1 From 15891fac7dd0300f94cca081cfef51b074cb988b Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Thu, 13 Oct 2016 17:58:25 -0400 Subject: block: Add two dirty bitmap getters For dirty bitmap users to get the size and the name of a BdrvDirtyBitmap. Signed-off-by: Fam Zheng Reviewed-by: John Snow Reviewed-by: Max Reitz Signed-off-by: John Snow Message-id: 1476395910-8697-6-git-send-email-jsnow@redhat.com Signed-off-by: Max Reitz --- block/dirty-bitmap.c | 10 ++++++++++ include/block/dirty-bitmap.h | 2 ++ 2 files changed, 12 insertions(+) diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c index 9c6febbcb7..860acc9b5b 100644 --- a/block/dirty-bitmap.c +++ b/block/dirty-bitmap.c @@ -154,6 +154,16 @@ void bdrv_dirty_bitmap_reset_meta(BlockDriverState *bs, hbitmap_reset(bitmap->meta, sector, nb_sectors); } +int64_t bdrv_dirty_bitmap_size(const BdrvDirtyBitmap *bitmap) +{ + return bitmap->size; +} + +const char *bdrv_dirty_bitmap_name(const BdrvDirtyBitmap *bitmap) +{ + return bitmap->name; +} + bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap) { return bitmap->successor; diff --git a/include/block/dirty-bitmap.h b/include/block/dirty-bitmap.h index 69c500ba2f..c4e7858a62 100644 --- a/include/block/dirty-bitmap.h +++ b/include/block/dirty-bitmap.h @@ -32,6 +32,8 @@ uint32_t bdrv_get_default_bitmap_granularity(BlockDriverState *bs); uint32_t bdrv_dirty_bitmap_granularity(BdrvDirtyBitmap *bitmap); bool bdrv_dirty_bitmap_enabled(BdrvDirtyBitmap *bitmap); bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap); +const char *bdrv_dirty_bitmap_name(const BdrvDirtyBitmap *bitmap); +int64_t bdrv_dirty_bitmap_size(const BdrvDirtyBitmap *bitmap); DirtyBitmapStatus bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap); int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector); -- cgit v1.2.1 From 7105007a5c8ae6228d78487e1e382d53863ab2c9 Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Thu, 13 Oct 2016 17:58:26 -0400 Subject: block: Assert that bdrv_release_dirty_bitmap succeeded We use a loop over bs->dirty_bitmaps to make sure the caller is only releasing a bitmap owned by bs. Let's also assert that in this case the caller is releasing a bitmap that does exist. Signed-off-by: Fam Zheng Reviewed-by: Max Reitz Signed-off-by: John Snow Message-id: 1476395910-8697-7-git-send-email-jsnow@redhat.com Signed-off-by: Max Reitz --- block/dirty-bitmap.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c index 860acc9b5b..31d5296ef7 100644 --- a/block/dirty-bitmap.c +++ b/block/dirty-bitmap.c @@ -305,6 +305,9 @@ static void bdrv_do_release_matching_dirty_bitmap(BlockDriverState *bs, } } } + if (bitmap) { + abort(); + } } void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap) -- cgit v1.2.1 From 8258888e222ff03bf791d81a0001cdc40b878de4 Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Thu, 13 Oct 2016 17:58:27 -0400 Subject: hbitmap: serialization Functions to serialize / deserialize(restore) HBitmap. HBitmap should be saved to linear sequence of bits independently of endianness and bitmap array element (unsigned long) size. Therefore Little Endian is chosen. These functions are appropriate for dirty bitmap migration, restoring the bitmap in several steps is available. To save performance, every step writes only the last level of the bitmap. All other levels are restored by hbitmap_deserialize_finish() as a last step of restoring. So, HBitmap is inconsistent while restoring. Signed-off-by: Vladimir Sementsov-Ogievskiy [Fix left shift operand to 1UL; add "finish" parameter. - Fam] Signed-off-by: Fam Zheng Signed-off-by: John Snow Message-id: 1476395910-8697-8-git-send-email-jsnow@redhat.com Signed-off-by: Max Reitz --- include/qemu/hbitmap.h | 79 ++++++++++++++++++++++++++++ util/hbitmap.c | 137 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 216 insertions(+) diff --git a/include/qemu/hbitmap.h b/include/qemu/hbitmap.h index 17259191b6..eb464759d5 100644 --- a/include/qemu/hbitmap.h +++ b/include/qemu/hbitmap.h @@ -145,6 +145,85 @@ void hbitmap_reset_all(HBitmap *hb); */ bool hbitmap_get(const HBitmap *hb, uint64_t item); +/** + * hbitmap_serialization_granularity: + * @hb: HBitmap to operate on. + * + * Granularity of serialization chunks, used by other serialization functions. + * For every chunk: + * 1. Chunk start should be aligned to this granularity. + * 2. Chunk size should be aligned too, except for last chunk (for which + * start + count == hb->size) + */ +uint64_t hbitmap_serialization_granularity(const HBitmap *hb); + +/** + * hbitmap_serialization_size: + * @hb: HBitmap to operate on. + * @start: Starting bit + * @count: Number of bits + * + * Return number of bytes hbitmap_(de)serialize_part needs + */ +uint64_t hbitmap_serialization_size(const HBitmap *hb, + uint64_t start, uint64_t count); + +/** + * hbitmap_serialize_part + * @hb: HBitmap to operate on. + * @buf: Buffer to store serialized bitmap. + * @start: First bit to store. + * @count: Number of bits to store. + * + * Stores HBitmap data corresponding to given region. The format of saved data + * is linear sequence of bits, so it can be used by hbitmap_deserialize_part + * independently of endianness and size of HBitmap level array elements + */ +void hbitmap_serialize_part(const HBitmap *hb, uint8_t *buf, + uint64_t start, uint64_t count); + +/** + * hbitmap_deserialize_part + * @hb: HBitmap to operate on. + * @buf: Buffer to restore bitmap data from. + * @start: First bit to restore. + * @count: Number of bits to restore. + * @finish: Whether to call hbitmap_deserialize_finish automatically. + * + * Restores HBitmap data corresponding to given region. The format is the same + * as for hbitmap_serialize_part. + * + * If @finish is false, caller must call hbitmap_serialize_finish before using + * the bitmap. + */ +void hbitmap_deserialize_part(HBitmap *hb, uint8_t *buf, + uint64_t start, uint64_t count, + bool finish); + +/** + * hbitmap_deserialize_zeroes + * @hb: HBitmap to operate on. + * @start: First bit to restore. + * @count: Number of bits to restore. + * @finish: Whether to call hbitmap_deserialize_finish automatically. + * + * Fills the bitmap with zeroes. + * + * If @finish is false, caller must call hbitmap_serialize_finish before using + * the bitmap. + */ +void hbitmap_deserialize_zeroes(HBitmap *hb, uint64_t start, uint64_t count, + bool finish); + +/** + * hbitmap_deserialize_finish + * @hb: HBitmap to operate on. + * + * Repair HBitmap after calling hbitmap_deserialize_data. Actually, all HBitmap + * layers are restored here. + */ +void hbitmap_deserialize_finish(HBitmap *hb); + /** * hbitmap_free: * @hb: HBitmap to operate on. diff --git a/util/hbitmap.c b/util/hbitmap.c index f303975371..5d1a21ce91 100644 --- a/util/hbitmap.c +++ b/util/hbitmap.c @@ -397,6 +397,143 @@ bool hbitmap_get(const HBitmap *hb, uint64_t item) return (hb->levels[HBITMAP_LEVELS - 1][pos >> BITS_PER_LEVEL] & bit) != 0; } +uint64_t hbitmap_serialization_granularity(const HBitmap *hb) +{ + /* Require at least 64 bit granularity to be safe on both 64 bit and 32 bit + * hosts. */ + return 64 << hb->granularity; +} + +/* Start should be aligned to serialization granularity, chunk size should be + * aligned to serialization granularity too, except for last chunk. + */ +static void serialization_chunk(const HBitmap *hb, + uint64_t start, uint64_t count, + unsigned long **first_el, uint64_t *el_count) +{ + uint64_t last = start + count - 1; + uint64_t gran = hbitmap_serialization_granularity(hb); + + assert((start & (gran - 1)) == 0); + assert((last >> hb->granularity) < hb->size); + if ((last >> hb->granularity) != hb->size - 1) { + assert((count & (gran - 1)) == 0); + } + + start = (start >> hb->granularity) >> BITS_PER_LEVEL; + last = (last >> hb->granularity) >> BITS_PER_LEVEL; + + *first_el = &hb->levels[HBITMAP_LEVELS - 1][start]; + *el_count = last - start + 1; +} + +uint64_t hbitmap_serialization_size(const HBitmap *hb, + uint64_t start, uint64_t count) +{ + uint64_t el_count; + unsigned long *cur; + + if (!count) { + return 0; + } + serialization_chunk(hb, start, count, &cur, &el_count); + + return el_count * sizeof(unsigned long); +} + +void hbitmap_serialize_part(const HBitmap *hb, uint8_t *buf, + uint64_t start, uint64_t count) +{ + uint64_t el_count; + unsigned long *cur, *end; + + if (!count) { + return; + } + serialization_chunk(hb, start, count, &cur, &el_count); + end = cur + el_count; + + while (cur != end) { + unsigned long el = + (BITS_PER_LONG == 32 ? cpu_to_le32(*cur) : cpu_to_le64(*cur)); + + memcpy(buf, &el, sizeof(el)); + buf += sizeof(el); + cur++; + } +} + +void hbitmap_deserialize_part(HBitmap *hb, uint8_t *buf, + uint64_t start, uint64_t count, + bool finish) +{ + uint64_t el_count; + unsigned long *cur, *end; + + if (!count) { + return; + } + serialization_chunk(hb, start, count, &cur, &el_count); + end = cur + el_count; + + while (cur != end) { + memcpy(cur, buf, sizeof(*cur)); + + if (BITS_PER_LONG == 32) { + le32_to_cpus((uint32_t *)cur); + } else { + le64_to_cpus((uint64_t *)cur); + } + + buf += sizeof(unsigned long); + cur++; + } + if (finish) { + hbitmap_deserialize_finish(hb); + } +} + +void hbitmap_deserialize_zeroes(HBitmap *hb, uint64_t start, uint64_t count, + bool finish) +{ + uint64_t el_count; + unsigned long *first; + + if (!count) { + return; + } + serialization_chunk(hb, start, count, &first, &el_count); + + memset(first, 0, el_count * sizeof(unsigned long)); + if (finish) { + hbitmap_deserialize_finish(hb); + } +} + +void hbitmap_deserialize_finish(HBitmap *bitmap) +{ + int64_t i, size, prev_size; + int lev; + + /* restore levels starting from penultimate to zero level, assuming + * that the last level is ok */ + size = MAX((bitmap->size + BITS_PER_LONG - 1) >> BITS_PER_LEVEL, 1); + for (lev = HBITMAP_LEVELS - 1; lev-- > 0; ) { + prev_size = size; + size = MAX((size + BITS_PER_LONG - 1) >> BITS_PER_LEVEL, 1); + memset(bitmap->levels[lev], 0, size * sizeof(unsigned long)); + + for (i = 0; i < prev_size; ++i) { + if (bitmap->levels[lev + 1][i]) { + bitmap->levels[lev][i >> BITS_PER_LEVEL] |= + 1UL << (i & (BITS_PER_LONG - 1)); + } + } + } + + bitmap->levels[0][0] |= 1UL << (BITS_PER_LONG - 1); +} + void hbitmap_free(HBitmap *hb) { unsigned i; -- cgit v1.2.1 From 882c36f5902a28125e1f98ded74e1a0b3e05e342 Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Thu, 13 Oct 2016 17:58:28 -0400 Subject: block: BdrvDirtyBitmap serialization interface Several functions to provide necessary access to BdrvDirtyBitmap for block-migration.c Signed-off-by: Vladimir Sementsov-Ogievskiy [Add the "finish" parameters. - Fam] Signed-off-by: Fam Zheng Reviewed-by: John Snow Reviewed-by: Max Reitz Signed-off-by: John Snow Message-id: 1476395910-8697-9-git-send-email-jsnow@redhat.com Signed-off-by: Max Reitz --- block/dirty-bitmap.c | 37 +++++++++++++++++++++++++++++++++++++ include/block/dirty-bitmap.h | 14 ++++++++++++++ 2 files changed, 51 insertions(+) diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c index 31d5296ef7..384146bbf3 100644 --- a/block/dirty-bitmap.c +++ b/block/dirty-bitmap.c @@ -453,6 +453,43 @@ void bdrv_undo_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *in) hbitmap_free(tmp); } +uint64_t bdrv_dirty_bitmap_serialization_size(const BdrvDirtyBitmap *bitmap, + uint64_t start, uint64_t count) +{ + return hbitmap_serialization_size(bitmap->bitmap, start, count); +} + +uint64_t bdrv_dirty_bitmap_serialization_align(const BdrvDirtyBitmap *bitmap) +{ + return hbitmap_serialization_granularity(bitmap->bitmap); +} + +void bdrv_dirty_bitmap_serialize_part(const BdrvDirtyBitmap *bitmap, + uint8_t *buf, uint64_t start, + uint64_t count) +{ + hbitmap_serialize_part(bitmap->bitmap, buf, start, count); +} + +void bdrv_dirty_bitmap_deserialize_part(BdrvDirtyBitmap *bitmap, + uint8_t *buf, uint64_t start, + uint64_t count, bool finish) +{ + hbitmap_deserialize_part(bitmap->bitmap, buf, start, count, finish); +} + +void bdrv_dirty_bitmap_deserialize_zeroes(BdrvDirtyBitmap *bitmap, + uint64_t start, uint64_t count, + bool finish) +{ + hbitmap_deserialize_zeroes(bitmap->bitmap, start, count, finish); +} + +void bdrv_dirty_bitmap_deserialize_finish(BdrvDirtyBitmap *bitmap) +{ + hbitmap_deserialize_finish(bitmap->bitmap); +} + void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, int64_t nr_sectors) { diff --git a/include/block/dirty-bitmap.h b/include/block/dirty-bitmap.h index c4e7858a62..efc2965d35 100644 --- a/include/block/dirty-bitmap.h +++ b/include/block/dirty-bitmap.h @@ -55,4 +55,18 @@ void bdrv_set_dirty_iter(BdrvDirtyBitmapIter *hbi, int64_t sector_num); int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap); void bdrv_dirty_bitmap_truncate(BlockDriverState *bs); +uint64_t bdrv_dirty_bitmap_serialization_size(const BdrvDirtyBitmap *bitmap, + uint64_t start, uint64_t count); +uint64_t bdrv_dirty_bitmap_serialization_align(const BdrvDirtyBitmap *bitmap); +void bdrv_dirty_bitmap_serialize_part(const BdrvDirtyBitmap *bitmap, + uint8_t *buf, uint64_t start, + uint64_t count); +void bdrv_dirty_bitmap_deserialize_part(BdrvDirtyBitmap *bitmap, + uint8_t *buf, uint64_t start, + uint64_t count, bool finish); +void bdrv_dirty_bitmap_deserialize_zeroes(BdrvDirtyBitmap *bitmap, + uint64_t start, uint64_t count, + bool finish); +void bdrv_dirty_bitmap_deserialize_finish(BdrvDirtyBitmap *bitmap); + #endif -- cgit v1.2.1 From 2071f26e2d67fa385c2baf061137b7482ace270c Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Thu, 13 Oct 2016 17:58:29 -0400 Subject: tests: Add test code for hbitmap serialization Signed-off-by: Fam Zheng [Fixed minor constant issue. --js] Signed-off-by: John Snow Signed-off-by: John Snow Message-id: 1476395910-8697-10-git-send-email-jsnow@redhat.com Reviewed-by: Max Reitz Signed-off-by: Max Reitz --- tests/test-hbitmap.c | 156 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 156 insertions(+) diff --git a/tests/test-hbitmap.c b/tests/test-hbitmap.c index e3abde1aae..9b7495cc32 100644 --- a/tests/test-hbitmap.c +++ b/tests/test-hbitmap.c @@ -11,6 +11,7 @@ #include "qemu/osdep.h" #include "qemu/hbitmap.h" +#include "qemu/bitmap.h" #include "block/block.h" #define LOG_BITS_PER_LONG (BITS_PER_LONG == 32 ? 5 : 6) @@ -737,6 +738,16 @@ static void test_hbitmap_meta_one(TestHBitmapData *data, const void *unused) } } +static void test_hbitmap_serialize_granularity(TestHBitmapData *data, + const void *unused) +{ + int r; + + hbitmap_test_init(data, L3 * 2, 3); + r = hbitmap_serialization_granularity(data->hb); + g_assert_cmpint(r, ==, 64 << 3); +} + static void test_hbitmap_meta_zero(TestHBitmapData *data, const void *unused) { hbitmap_test_init_meta(data, 0, 0, 1); @@ -744,6 +755,142 @@ static void test_hbitmap_meta_zero(TestHBitmapData *data, const void *unused) hbitmap_check_meta(data, 0, 0); } +static void hbitmap_test_serialize_range(TestHBitmapData *data, + uint8_t *buf, size_t buf_size, + uint64_t pos, uint64_t count) +{ + size_t i; + unsigned long *el = (unsigned long *)buf; + + assert(hbitmap_granularity(data->hb) == 0); + hbitmap_reset_all(data->hb); + memset(buf, 0, buf_size); + if (count) { + hbitmap_set(data->hb, pos, count); + } + hbitmap_serialize_part(data->hb, buf, 0, data->size); + + /* Serialized buffer is inherently LE, convert it back manually to test */ + for (i = 0; i < buf_size / sizeof(unsigned long); i++) { + el[i] = (BITS_PER_LONG == 32 ? le32_to_cpu(el[i]) : le64_to_cpu(el[i])); + } + + for (i = 0; i < data->size; i++) { + int is_set = test_bit(i, (unsigned long *)buf); + if (i >= pos && i < pos + count) { + g_assert(is_set); + } else { + g_assert(!is_set); + } + } + + /* Re-serialize for deserialization testing */ + memset(buf, 0, buf_size); + hbitmap_serialize_part(data->hb, buf, 0, data->size); + hbitmap_reset_all(data->hb); + hbitmap_deserialize_part(data->hb, buf, 0, data->size, true); + + for (i = 0; i < data->size; i++) { + int is_set = hbitmap_get(data->hb, i); + if (i >= pos && i < pos + count) { + g_assert(is_set); + } else { + g_assert(!is_set); + } + } +} + +static void test_hbitmap_serialize_basic(TestHBitmapData *data, + const void *unused) +{ + int i, j; + size_t buf_size; + uint8_t *buf; + uint64_t positions[] = { 0, 1, L1 - 1, L1, L2 - 1, L2, L2 + 1, L3 - 1 }; + int num_positions = sizeof(positions) / sizeof(positions[0]); + + hbitmap_test_init(data, L3, 0); + buf_size = hbitmap_serialization_size(data->hb, 0, data->size); + buf = g_malloc0(buf_size); + + for (i = 0; i < num_positions; i++) { + for (j = 0; j < num_positions; j++) { + hbitmap_test_serialize_range(data, buf, buf_size, + positions[i], + MIN(positions[j], L3 - positions[i])); + } + } + + g_free(buf); +} + +static void test_hbitmap_serialize_part(TestHBitmapData *data, + const void *unused) +{ + int i, j, k; + size_t buf_size; + uint8_t *buf; + uint64_t positions[] = { 0, 1, L1 - 1, L1, L2 - 1, L2, L2 + 1, L3 - 1 }; + int num_positions = sizeof(positions) / sizeof(positions[0]); + + hbitmap_test_init(data, L3, 0); + buf_size = L2; + buf = g_malloc0(buf_size); + + for (i = 0; i < num_positions; i++) { + hbitmap_set(data->hb, positions[i], 1); + } + + for (i = 0; i < data->size; i += buf_size) { + unsigned long *el = (unsigned long *)buf; + hbitmap_serialize_part(data->hb, buf, i, buf_size); + for (j = 0; j < buf_size / sizeof(unsigned long); j++) { + el[j] = (BITS_PER_LONG == 32 ? le32_to_cpu(el[j]) : le64_to_cpu(el[j])); + } + + for (j = 0; j < buf_size; j++) { + bool should_set = false; + for (k = 0; k < num_positions; k++) { + if (positions[k] == j + i) { + should_set = true; + break; + } + } + g_assert_cmpint(should_set, ==, test_bit(j, (unsigned long *)buf)); + } + } + + g_free(buf); +} + +static void test_hbitmap_serialize_zeroes(TestHBitmapData *data, + const void *unused) +{ + int i; + HBitmapIter iter; + int64_t next; + uint64_t min_l1 = MAX(L1, 64); + uint64_t positions[] = { 0, min_l1, L2, L3 - min_l1}; + int num_positions = sizeof(positions) / sizeof(positions[0]); + + hbitmap_test_init(data, L3, 0); + + for (i = 0; i < num_positions; i++) { + hbitmap_set(data->hb, positions[i], L1); + } + + for (i = 0; i < num_positions; i++) { + hbitmap_deserialize_zeroes(data->hb, positions[i], min_l1, true); + hbitmap_iter_init(&iter, data->hb, 0); + next = hbitmap_iter_next(&iter); + if (i == num_positions - 1) { + g_assert_cmpint(next, ==, -1); + } else { + g_assert_cmpint(next, ==, positions[i + 1]); + } + } +} + static void hbitmap_test_add(const char *testpath, void (*test_func)(TestHBitmapData *data, const void *user_data)) { @@ -799,6 +946,15 @@ int main(int argc, char **argv) hbitmap_test_add("/hbitmap/meta/byte", test_hbitmap_meta_byte); hbitmap_test_add("/hbitmap/meta/word", test_hbitmap_meta_word); hbitmap_test_add("/hbitmap/meta/sector", test_hbitmap_meta_sector); + + hbitmap_test_add("/hbitmap/serialize/granularity", + test_hbitmap_serialize_granularity); + hbitmap_test_add("/hbitmap/serialize/basic", + test_hbitmap_serialize_basic); + hbitmap_test_add("/hbitmap/serialize/part", + test_hbitmap_serialize_part); + hbitmap_test_add("/hbitmap/serialize/zeroes", + test_hbitmap_serialize_zeroes); g_test_run(); return 0; -- cgit v1.2.1 From 6d3f4049ba532b01ff738f9e5a66df1477c5ffdd Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Thu, 13 Oct 2016 17:58:30 -0400 Subject: block: More operations for meta dirty bitmap Callers can create an iterator of meta bitmap with bdrv_dirty_meta_iter_new(), then use the bdrv_dirty_iter_* operations on it. Meta iterators are also counted by bitmap->active_iterators. Also add a couple of functions to retrieve granularity and count. Signed-off-by: Fam Zheng Reviewed-by: Max Reitz Signed-off-by: John Snow Message-id: 1476395910-8697-11-git-send-email-jsnow@redhat.com Signed-off-by: Max Reitz --- block/dirty-bitmap.c | 19 +++++++++++++++++++ include/block/dirty-bitmap.h | 3 +++ 2 files changed, 22 insertions(+) diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c index 384146bbf3..519737c8d3 100644 --- a/block/dirty-bitmap.c +++ b/block/dirty-bitmap.c @@ -393,6 +393,11 @@ uint32_t bdrv_dirty_bitmap_granularity(BdrvDirtyBitmap *bitmap) return BDRV_SECTOR_SIZE << hbitmap_granularity(bitmap->bitmap); } +uint32_t bdrv_dirty_bitmap_meta_granularity(BdrvDirtyBitmap *bitmap) +{ + return BDRV_SECTOR_SIZE << hbitmap_granularity(bitmap->meta); +} + BdrvDirtyBitmapIter *bdrv_dirty_iter_new(BdrvDirtyBitmap *bitmap, uint64_t first_sector) { @@ -403,6 +408,15 @@ BdrvDirtyBitmapIter *bdrv_dirty_iter_new(BdrvDirtyBitmap *bitmap, return iter; } +BdrvDirtyBitmapIter *bdrv_dirty_meta_iter_new(BdrvDirtyBitmap *bitmap) +{ + BdrvDirtyBitmapIter *iter = g_new(BdrvDirtyBitmapIter, 1); + hbitmap_iter_init(&iter->hbi, bitmap->meta, 0); + iter->bitmap = bitmap; + bitmap->active_iterators++; + return iter; +} + void bdrv_dirty_iter_free(BdrvDirtyBitmapIter *iter) { if (!iter) { @@ -514,3 +528,8 @@ int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap) { return hbitmap_count(bitmap->bitmap); } + +int64_t bdrv_get_meta_dirty_count(BdrvDirtyBitmap *bitmap) +{ + return hbitmap_count(bitmap->meta); +} diff --git a/include/block/dirty-bitmap.h b/include/block/dirty-bitmap.h index efc2965d35..9dea14ba03 100644 --- a/include/block/dirty-bitmap.h +++ b/include/block/dirty-bitmap.h @@ -30,6 +30,7 @@ void bdrv_enable_dirty_bitmap(BdrvDirtyBitmap *bitmap); BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs); uint32_t bdrv_get_default_bitmap_granularity(BlockDriverState *bs); uint32_t bdrv_dirty_bitmap_granularity(BdrvDirtyBitmap *bitmap); +uint32_t bdrv_dirty_bitmap_meta_granularity(BdrvDirtyBitmap *bitmap); bool bdrv_dirty_bitmap_enabled(BdrvDirtyBitmap *bitmap); bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap); const char *bdrv_dirty_bitmap_name(const BdrvDirtyBitmap *bitmap); @@ -47,12 +48,14 @@ int bdrv_dirty_bitmap_get_meta(BlockDriverState *bs, void bdrv_dirty_bitmap_reset_meta(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector, int nb_sectors); +BdrvDirtyBitmapIter *bdrv_dirty_meta_iter_new(BdrvDirtyBitmap *bitmap); BdrvDirtyBitmapIter *bdrv_dirty_iter_new(BdrvDirtyBitmap *bitmap, uint64_t first_sector); void bdrv_dirty_iter_free(BdrvDirtyBitmapIter *iter); int64_t bdrv_dirty_iter_next(BdrvDirtyBitmapIter *iter); void bdrv_set_dirty_iter(BdrvDirtyBitmapIter *hbi, int64_t sector_num); int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap); +int64_t bdrv_get_meta_dirty_count(BdrvDirtyBitmap *bitmap); void bdrv_dirty_bitmap_truncate(BlockDriverState *bs); uint64_t bdrv_dirty_bitmap_serialization_size(const BdrvDirtyBitmap *bitmap, -- cgit v1.2.1 From f4f2539bcf2d41f96d7e60c4c7e81ecdf5d81c63 Mon Sep 17 00:00:00 2001 From: Changlong Xie Date: Wed, 12 Oct 2016 12:50:08 +0800 Subject: block/replication: Clarify 'top-id' parameter usage The replication driver only supports the 'top-id' parameter for the secondary side; it must not be supplied for the primary side. Reviewed-by: Eric Blake Signed-off-by: Changlong Xie Message-id: 1476247808-15646-1-git-send-email-xiecl.fnst@cn.fujitsu.com Signed-off-by: Max Reitz --- block/replication.c | 5 +++++ qapi/block-core.json | 3 ++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/block/replication.c b/block/replication.c index 3bd1cf1809..8bbfc8f870 100644 --- a/block/replication.c +++ b/block/replication.c @@ -101,6 +101,11 @@ static int replication_open(BlockDriverState *bs, QDict *options, if (!strcmp(mode, "primary")) { s->mode = REPLICATION_MODE_PRIMARY; + top_id = qemu_opt_get(opts, REPLICATION_TOP_ID); + if (top_id) { + error_setg(&local_err, "The primary side does not support option top-id"); + goto fail; + } } else if (!strcmp(mode, "secondary")) { s->mode = REPLICATION_MODE_SECONDARY; top_id = qemu_opt_get(opts, REPLICATION_TOP_ID); diff --git a/qapi/block-core.json b/qapi/block-core.json index c59047bc94..97b120532a 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -2197,7 +2197,8 @@ # @mode: the replication mode # # @top-id: #optional In secondary mode, node name or device ID of the root -# node who owns the replication node chain. Ignored in primary mode. +# node who owns the replication node chain. Must not be given in +# primary mode. # # Since: 2.8 ## -- cgit v1.2.1