From 26d49c46750aff62c8c6e7a8b14c79d4114a8e53 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Fri, 7 Mar 2014 23:10:12 +0100 Subject: qcow2-refcount: Sanitize refcount table entry When reading the refcount table entry in get_refcount(), only bits which are actually significant for the refcount block offset should be taken into account. Signed-off-by: Max Reitz Reviewed-by: Laszlo Ersek Signed-off-by: Stefan Hajnoczi --- block/qcow2-refcount.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c index 8712d8bd54..6151148507 100644 --- a/block/qcow2-refcount.c +++ b/block/qcow2-refcount.c @@ -96,7 +96,8 @@ static int get_refcount(BlockDriverState *bs, int64_t cluster_index) refcount_table_index = cluster_index >> (s->cluster_bits - REFCOUNT_SHIFT); if (refcount_table_index >= s->refcount_table_size) return 0; - refcount_block_offset = s->refcount_table[refcount_table_index]; + refcount_block_offset = + s->refcount_table[refcount_table_index] & REFT_OFFSET_MASK; if (!refcount_block_offset) return 0; -- cgit v1.2.1 From 3456a8d1852e970688b73d03fdc44dde851759e1 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Tue, 11 Mar 2014 10:58:39 +0100 Subject: block: Update image size in bdrv_invalidate_cache() After migration has completed, we call bdrv_invalidate_cache() so that drivers which cache some data drop their stale copy of the data and reread it from the image file to get a new version of data that the source modified while the migration was running. Reloading metadata from the image file is useless, though, if the size of the image file stays stale (this is a value that is cached for all image formats in block.c). Reads from (meta)data after the old EOF return only zeroes, causing image corruption. We need to update bs->total_sectors in all layers that could potentially have changed their size (i.e. backing files are not a concern - if they are changed, we're in bigger trouble) Signed-off-by: Kevin Wolf Signed-off-by: Stefan Hajnoczi --- block.c | 10 +++++++++- block/qcow2.c | 2 ++ block/qed.c | 3 +++ 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/block.c b/block.c index f1ef4b0109..7b306fb62f 100644 --- a/block.c +++ b/block.c @@ -4776,9 +4776,17 @@ flush_parent: void bdrv_invalidate_cache(BlockDriverState *bs) { - if (bs->drv && bs->drv->bdrv_invalidate_cache) { + if (!bs->drv) { + return; + } + + if (bs->drv->bdrv_invalidate_cache) { bs->drv->bdrv_invalidate_cache(bs); + } else if (bs->file) { + bdrv_invalidate_cache(bs->file); } + + refresh_total_sectors(bs, bs->total_sectors); } void bdrv_invalidate_cache_all(void) diff --git a/block/qcow2.c b/block/qcow2.c index cfe80befa0..b5b1e8c1b9 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -1176,6 +1176,8 @@ static void qcow2_invalidate_cache(BlockDriverState *bs) qcow2_close(bs); + bdrv_invalidate_cache(bs->file); + options = qdict_new(); qdict_put(options, QCOW2_OPT_LAZY_REFCOUNTS, qbool_from_int(s->use_lazy_refcounts)); diff --git a/block/qed.c b/block/qed.c index 8802ad3845..837accd39b 100644 --- a/block/qed.c +++ b/block/qed.c @@ -1563,6 +1563,9 @@ static void bdrv_qed_invalidate_cache(BlockDriverState *bs) BDRVQEDState *s = bs->opaque; bdrv_qed_close(bs); + + bdrv_invalidate_cache(bs->file); + memset(s, 0, sizeof(BDRVQEDState)); bdrv_qed_open(bs, NULL, bs->open_flags, NULL); } -- cgit v1.2.1 From dba2855572c746836ad90ce9154403b5929d996b Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Mon, 10 Mar 2014 23:44:07 +0100 Subject: qcow2: Check bs->drv in copy_sectors() Before dereferencing bs->drv for a call to its member bdrv_co_readv(), copy_sectors() should check whether that pointer is indeed valid, since it may have been set to NULL by e.g. a concurrent write triggering the corruption prevention mechanism. Signed-off-by: Max Reitz Signed-off-by: Stefan Hajnoczi --- block/qcow2-cluster.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index 36c1bed350..9499df9ef2 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -380,6 +380,10 @@ static int coroutine_fn copy_sectors(BlockDriverState *bs, BLKDBG_EVENT(bs->file, BLKDBG_COW_READ); + if (!bs->drv) { + return -ENOMEDIUM; + } + /* Call .bdrv_co_readv() directly instead of using the public block-layer * interface. This avoids double I/O throttling and request tracking, * which can lead to deadlock when block layer copy-on-read is enabled. -- cgit v1.2.1 From 938789ea92b3073ad1113b3e1bdf90d469ea4bf4 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Mon, 10 Mar 2014 23:44:08 +0100 Subject: block: bs->drv may be NULL in bdrv_debug_resume() Currently, bdrv_debug_resume() requires every bs->drv in the BDS stack to be NULL until a bs->drv with an implementation of bdrv_debug_resume() is found. For a normal function, this would be fine, but this is a function for debugging purposes and should therefore allow intermediate BDS not to have a driver (i.e., be "ejected"). Otherwise, it is hard to debug such situations. Signed-off-by: Max Reitz Signed-off-by: Stefan Hajnoczi --- block.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block.c b/block.c index 7b306fb62f..09c92588e3 100644 --- a/block.c +++ b/block.c @@ -4055,7 +4055,7 @@ int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag) int bdrv_debug_resume(BlockDriverState *bs, const char *tag) { - while (bs && bs->drv && !bs->drv->bdrv_debug_resume) { + while (bs && (!bs->drv || !bs->drv->bdrv_debug_resume)) { bs = bs->file; } -- cgit v1.2.1 From 98d39e34fe95f8609be3ccbd1b67926631d2c7c0 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Mon, 10 Mar 2014 23:44:09 +0100 Subject: iotests: Test corruption during COW request Extend test file 060 by a test case for corruption occuring concurrently to a COW request. QEMU should not crash but rather return an appropriate error message. Signed-off-by: Max Reitz Signed-off-by: Stefan Hajnoczi --- tests/qemu-iotests/060 | 26 ++++++++++++++++++++++++++ tests/qemu-iotests/060.out | 15 +++++++++++++++ 2 files changed, 41 insertions(+) diff --git a/tests/qemu-iotests/060 b/tests/qemu-iotests/060 index af8ed9f39a..f0116aab1d 100755 --- a/tests/qemu-iotests/060 +++ b/tests/qemu-iotests/060 @@ -138,6 +138,32 @@ $QEMU_IMG snapshot -a foo "$TEST_IMG" _check_test_img $QEMU_IO -c "$OPEN_RO" -c "read -P 1 0 512" | _filter_qemu_io +echo +echo "=== Testing overlap while COW is in flight ===" +echo +# compat=0.10 is required in order to make the following discard actually +# unallocate the sector rather than make it a zero sector - we want COW, after +# all. +IMGOPTS='compat=0.10' _make_test_img 1G +# Write two clusters, the second one enforces creation of an L2 table after +# the first data cluster. +$QEMU_IO -c 'write 0k 64k' -c 'write 512M 64k' "$TEST_IMG" | _filter_qemu_io +# Discard the first cluster. This cluster will soon enough be reallocated and +# used for COW. +$QEMU_IO -c 'discard 0k 64k' "$TEST_IMG" | _filter_qemu_io +# Now, corrupt the image by marking the second L2 table cluster as free. +poke_file "$TEST_IMG" '131084' "\x00\x00" # 0x2000c +# Start a write operation requiring COW on the image stopping it right before +# doing the read; then, trigger the corruption prevention by writing anything to +# any unallocated cluster, leading to an attempt to overwrite the second L2 +# table. Finally, resume the COW write and see it fail (but not crash). +echo "open -o file.driver=blkdebug $TEST_IMG +break cow_read 0 +aio_write 0k 1k +wait_break 0 +write 64k 64k +resume 0" | $QEMU_IO | _filter_qemu_io + # success, all done echo "*** done" rm -f $seq.full diff --git a/tests/qemu-iotests/060.out b/tests/qemu-iotests/060.out index 6c7bdbb2f2..a517948036 100644 --- a/tests/qemu-iotests/060.out +++ b/tests/qemu-iotests/060.out @@ -78,4 +78,19 @@ read 512/512 bytes at offset 0 No errors were found on the image. read 512/512 bytes at offset 0 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +=== Testing overlap while COW is in flight === + +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 +wrote 65536/65536 bytes at offset 0 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 65536/65536 bytes at offset 536870912 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +discard 65536/65536 bytes at offset 0 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qcow2: Preventing invalid write on metadata (overlaps with active L2 table); image marked as corrupt. +blkdebug: Suspended request '0' +write failed: Input/output error +blkdebug: Resuming request '0' +aio_write failed: No medium found *** done -- cgit v1.2.1 From b5042a36229b4fa5eeb66bbcde78f704975aec00 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Beno=C3=AEt=20Canet?= Date: Mon, 3 Mar 2014 19:11:34 +0100 Subject: block: Rewrite the snapshot authorization mechanism for block filters. This patch keep the recursive way of doing things but simplify it by giving two responsabilities to all block filters implementors. They will need to do two things: -Set the is_filter field of their block driver to true. -Implement the bdrv_recurse_is_first_non_filter method of their block driver like it is done on the Quorum block driver. (block/quorum.c) [Paolo Bonzini pointed out that this patch changes the semantics of blkverify, which now recurses down both bs->file and s->test_file. -- Stefan] Reported-by: Paolo Bonzini Signed-off-by: Benoit Canet Signed-off-by: Stefan Hajnoczi --- block.c | 47 +++++++++++++++++++++-------------------------- block/blkverify.c | 17 ++++++++++++++++- block/quorum.c | 3 +-- include/block/block.h | 9 --------- include/block/block_int.h | 8 ++++---- 5 files changed, 42 insertions(+), 42 deletions(-) diff --git a/block.c b/block.c index 09c92588e3..cf5645a14a 100644 --- a/block.c +++ b/block.c @@ -5398,43 +5398,37 @@ int bdrv_amend_options(BlockDriverState *bs, QEMUOptionParameter *options) return bs->drv->bdrv_amend_options(bs, options); } -/* Used to recurse on single child block filters. - * Single child block filter will store their child in bs->file. +/* This function will be called by the bdrv_recurse_is_first_non_filter method + * of block filter and by bdrv_is_first_non_filter. + * It is used to test if the given bs is the candidate or recurse more in the + * node graph. */ -bool bdrv_generic_is_first_non_filter(BlockDriverState *bs, +bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs, BlockDriverState *candidate) { - if (!bs->drv) { - return false; - } - - if (!bs->drv->authorizations[BS_IS_A_FILTER]) { - if (bs == candidate) { - return true; - } else { - return false; - } - } - - if (!bs->drv->authorizations[BS_FILTER_PASS_DOWN]) { + /* return false if basic checks fails */ + if (!bs || !bs->drv) { return false; } - if (!bs->file) { - return false; + /* the code reached a non block filter driver -> check if the bs is + * the same as the candidate. It's the recursion termination condition. + */ + if (!bs->drv->is_filter) { + return bs == candidate; } + /* Down this path the driver is a block filter driver */ - return bdrv_recurse_is_first_non_filter(bs->file, candidate); -} - -bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs, - BlockDriverState *candidate) -{ - if (bs->drv && bs->drv->bdrv_recurse_is_first_non_filter) { + /* If the block filter recursion method is defined use it to recurse down + * the node graph. + */ + if (bs->drv->bdrv_recurse_is_first_non_filter) { return bs->drv->bdrv_recurse_is_first_non_filter(bs, candidate); } - return bdrv_generic_is_first_non_filter(bs, candidate); + /* the driver is a block filter but don't allow to recurse -> return false + */ + return false; } /* This function checks if the candidate is the first non filter bs down it's @@ -5449,6 +5443,7 @@ bool bdrv_is_first_non_filter(BlockDriverState *candidate) QTAILQ_FOREACH(bs, &bdrv_states, device_list) { bool perm; + /* try to recurse in this top level bs */ perm = bdrv_recurse_is_first_non_filter(bs, candidate); /* candidate is the first non filter */ diff --git a/block/blkverify.c b/block/blkverify.c index b98b08bedf..e1c31171c3 100644 --- a/block/blkverify.c +++ b/block/blkverify.c @@ -288,6 +288,20 @@ static BlockDriverAIOCB *blkverify_aio_flush(BlockDriverState *bs, return bdrv_aio_flush(s->test_file, cb, opaque); } +static bool blkverify_recurse_is_first_non_filter(BlockDriverState *bs, + BlockDriverState *candidate) +{ + BDRVBlkverifyState *s = bs->opaque; + + bool perm = bdrv_recurse_is_first_non_filter(bs->file, candidate); + + if (perm) { + return true; + } + + return bdrv_recurse_is_first_non_filter(s->test_file, candidate); +} + static BlockDriver bdrv_blkverify = { .format_name = "blkverify", .protocol_name = "blkverify", @@ -302,7 +316,8 @@ static BlockDriver bdrv_blkverify = { .bdrv_aio_writev = blkverify_aio_writev, .bdrv_aio_flush = blkverify_aio_flush, - .authorizations = { true, false }, + .is_filter = true, + .bdrv_recurse_is_first_non_filter = blkverify_recurse_is_first_non_filter, }; static void bdrv_blkverify_init(void) diff --git a/block/quorum.c b/block/quorum.c index bd997b7322..33bf2ae6a7 100644 --- a/block/quorum.c +++ b/block/quorum.c @@ -852,8 +852,6 @@ static BlockDriver bdrv_quorum = { .bdrv_file_open = quorum_open, .bdrv_close = quorum_close, - .authorizations = { true, true }, - .bdrv_co_flush_to_disk = quorum_co_flush, .bdrv_getlength = quorum_getlength, @@ -862,6 +860,7 @@ static BlockDriver bdrv_quorum = { .bdrv_aio_writev = quorum_aio_writev, .bdrv_invalidate_cache = quorum_invalidate_cache, + .is_filter = true, .bdrv_recurse_is_first_non_filter = quorum_recurse_is_first_non_filter, }; diff --git a/include/block/block.h b/include/block/block.h index 780f48b7b3..bd34d14109 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -286,15 +286,6 @@ int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix); int bdrv_amend_options(BlockDriverState *bs_new, QEMUOptionParameter *options); /* external snapshots */ - -typedef enum { - BS_IS_A_FILTER, - BS_FILTER_PASS_DOWN, - BS_AUTHORIZATION_COUNT, -} BsAuthorization; - -bool bdrv_generic_is_first_non_filter(BlockDriverState *bs, - BlockDriverState *candidate); bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs, BlockDriverState *candidate); bool bdrv_is_first_non_filter(BlockDriverState *candidate); diff --git a/include/block/block_int.h b/include/block/block_int.h index 0bcf1c9b8c..4fc5ea8a65 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -76,10 +76,10 @@ struct BlockDriver { const char *format_name; int instance_size; - /* this table of boolean contains authorizations for the block operations */ - bool authorizations[BS_AUTHORIZATION_COUNT]; - /* for snapshots complex block filter like Quorum can implement the - * following recursive callback instead of BS_IS_A_FILTER. + /* set to true if the BlockDriver is a block filter */ + bool is_filter; + /* for snapshots block filter like Quorum can implement the + * following recursive callback. * It's purpose is to recurse on the filter children while calling * bdrv_recurse_is_first_non_filter on them. * For a sample implementation look in the future Quorum block filter. -- cgit v1.2.1 From 11f590b1a242492a0108da42f40f0e2b20f0a778 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Mon, 3 Mar 2014 11:30:02 +0100 Subject: object: add object_get_canonical_path_component() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It is often useful to find an object's child property name. Also use this new function to simplify the implementation of object_get_canonical_path(). Reviewed-by: Andreas Färber Signed-off-by: Stefan Hajnoczi --- include/qom/object.h | 8 ++++++++ qom/object.c | 54 +++++++++++++++++++++++++++++++--------------------- 2 files changed, 40 insertions(+), 22 deletions(-) diff --git a/include/qom/object.h b/include/qom/object.h index 9c7c361d30..4cd77049e4 100644 --- a/include/qom/object.h +++ b/include/qom/object.h @@ -973,6 +973,14 @@ const char *object_property_get_type(Object *obj, const char *name, */ Object *object_get_root(void); +/** + * object_get_canonical_path_component: + * + * Returns: The final component in the object's canonical path. The canonical + * path is the path within the composition tree starting from the root. + */ +gchar *object_get_canonical_path_component(Object *obj); + /** * object_get_canonical_path: * diff --git a/qom/object.c b/qom/object.c index 660859c0e7..3290375016 100644 --- a/qom/object.c +++ b/qom/object.c @@ -1102,39 +1102,49 @@ void object_property_add_link(Object *obj, const char *name, g_free(full_type); } +gchar *object_get_canonical_path_component(Object *obj) +{ + ObjectProperty *prop = NULL; + + g_assert(obj); + g_assert(obj->parent != NULL); + + QTAILQ_FOREACH(prop, &obj->parent->properties, node) { + if (!object_property_is_child(prop)) { + continue; + } + + if (prop->opaque == obj) { + return g_strdup(prop->name); + } + } + + /* obj had a parent but was not a child, should never happen */ + g_assert_not_reached(); + return NULL; +} + gchar *object_get_canonical_path(Object *obj) { Object *root = object_get_root(); - char *newpath = NULL, *path = NULL; + char *newpath, *path = NULL; while (obj != root) { - ObjectProperty *prop = NULL; - - g_assert(obj->parent != NULL); - - QTAILQ_FOREACH(prop, &obj->parent->properties, node) { - if (!object_property_is_child(prop)) { - continue; - } + char *component = object_get_canonical_path_component(obj); - if (prop->opaque == obj) { - if (path) { - newpath = g_strdup_printf("%s/%s", prop->name, path); - g_free(path); - path = newpath; - } else { - path = g_strdup(prop->name); - } - break; - } + if (path) { + newpath = g_strdup_printf("%s/%s", component, path); + g_free(component); + g_free(path); + path = newpath; + } else { + path = component; } - g_assert(prop != NULL); - obj = obj->parent; } - newpath = g_strdup_printf("/%s", path); + newpath = g_strdup_printf("/%s", path ? path : ""); g_free(path); return newpath; -- cgit v1.2.1 From 2da61b671eb89fcaa306738f44eed472977d6587 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Mon, 3 Mar 2014 11:30:03 +0100 Subject: rfifolock: add recursive FIFO lock QemuMutex does not guarantee fairness and cannot be acquired recursively: Fairness means each locker gets a turn and the scheduler cannot cause starvation. Recursive locking is useful for composition, it allows a sequence of locking operations to be invoked atomically by acquiring the lock around them. This patch adds RFifoLock, a recursive lock that guarantees FIFO order. Its first user is added in the next patch. RFifoLock has one additional feature: it can be initialized with an optional contention callback. The callback is invoked whenever a thread must wait for the lock. For example, it can be used to poke the current owner so that they release the lock soon. Signed-off-by: Stefan Hajnoczi --- include/qemu/rfifolock.h | 54 ++++++++++++++++++++++++++++ tests/Makefile | 2 ++ tests/test-rfifolock.c | 91 ++++++++++++++++++++++++++++++++++++++++++++++++ util/Makefile.objs | 1 + util/rfifolock.c | 78 +++++++++++++++++++++++++++++++++++++++++ 5 files changed, 226 insertions(+) create mode 100644 include/qemu/rfifolock.h create mode 100644 tests/test-rfifolock.c create mode 100644 util/rfifolock.c diff --git a/include/qemu/rfifolock.h b/include/qemu/rfifolock.h new file mode 100644 index 0000000000..b23ab538a6 --- /dev/null +++ b/include/qemu/rfifolock.h @@ -0,0 +1,54 @@ +/* + * Recursive FIFO lock + * + * Copyright Red Hat, Inc. 2013 + * + * Authors: + * Stefan Hajnoczi + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#ifndef QEMU_RFIFOLOCK_H +#define QEMU_RFIFOLOCK_H + +#include "qemu/thread.h" + +/* Recursive FIFO lock + * + * This lock provides more features than a plain mutex: + * + * 1. Fairness - enforces FIFO order. + * 2. Nesting - can be taken recursively. + * 3. Contention callback - optional, called when thread must wait. + * + * The recursive FIFO lock is heavyweight so prefer other synchronization + * primitives if you do not need its features. + */ +typedef struct { + QemuMutex lock; /* protects all fields */ + + /* FIFO order */ + unsigned int head; /* active ticket number */ + unsigned int tail; /* waiting ticket number */ + QemuCond cond; /* used to wait for our ticket number */ + + /* Nesting */ + QemuThread owner_thread; /* thread that currently has ownership */ + unsigned int nesting; /* amount of nesting levels */ + + /* Contention callback */ + void (*cb)(void *); /* called when thread must wait, with ->lock + * held so it may not recursively lock/unlock + */ + void *cb_opaque; +} RFifoLock; + +void rfifolock_init(RFifoLock *r, void (*cb)(void *), void *opaque); +void rfifolock_destroy(RFifoLock *r); +void rfifolock_lock(RFifoLock *r); +void rfifolock_unlock(RFifoLock *r); + +#endif /* QEMU_RFIFOLOCK_H */ diff --git a/tests/Makefile b/tests/Makefile index e146f81d44..190e596689 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -35,6 +35,7 @@ check-unit-y += tests/test-visitor-serialization$(EXESUF) check-unit-y += tests/test-iov$(EXESUF) gcov-files-test-iov-y = util/iov.c check-unit-y += tests/test-aio$(EXESUF) +check-unit-y += tests/test-rfifolock$(EXESUF) check-unit-y += tests/test-throttle$(EXESUF) gcov-files-test-aio-$(CONFIG_WIN32) = aio-win32.c gcov-files-test-aio-$(CONFIG_POSIX) = aio-posix.c @@ -176,6 +177,7 @@ tests/check-qjson$(EXESUF): tests/check-qjson.o libqemuutil.a libqemustub.a tests/check-qom-interface$(EXESUF): tests/check-qom-interface.o $(qom-core-obj) libqemuutil.a libqemustub.a tests/test-coroutine$(EXESUF): tests/test-coroutine.o $(block-obj-y) libqemuutil.a libqemustub.a tests/test-aio$(EXESUF): tests/test-aio.o $(block-obj-y) libqemuutil.a libqemustub.a +tests/test-rfifolock$(EXESUF): tests/test-rfifolock.o libqemuutil.a libqemustub.a tests/test-throttle$(EXESUF): tests/test-throttle.o $(block-obj-y) libqemuutil.a libqemustub.a tests/test-thread-pool$(EXESUF): tests/test-thread-pool.o $(block-obj-y) libqemuutil.a libqemustub.a tests/test-iov$(EXESUF): tests/test-iov.o libqemuutil.a diff --git a/tests/test-rfifolock.c b/tests/test-rfifolock.c new file mode 100644 index 0000000000..0572ebb42a --- /dev/null +++ b/tests/test-rfifolock.c @@ -0,0 +1,91 @@ +/* + * RFifoLock tests + * + * Copyright Red Hat, Inc. 2013 + * + * Authors: + * Stefan Hajnoczi + * + * This work is licensed under the terms of the GNU LGPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + */ + +#include +#include "qemu-common.h" +#include "qemu/rfifolock.h" + +static void test_nesting(void) +{ + RFifoLock lock; + + /* Trivial test, ensure the lock is recursive */ + rfifolock_init(&lock, NULL, NULL); + rfifolock_lock(&lock); + rfifolock_lock(&lock); + rfifolock_lock(&lock); + rfifolock_unlock(&lock); + rfifolock_unlock(&lock); + rfifolock_unlock(&lock); + rfifolock_destroy(&lock); +} + +typedef struct { + RFifoLock lock; + int fd[2]; +} CallbackTestData; + +static void rfifolock_cb(void *opaque) +{ + CallbackTestData *data = opaque; + int ret; + char c = 0; + + ret = write(data->fd[1], &c, sizeof(c)); + g_assert(ret == 1); +} + +static void *callback_thread(void *opaque) +{ + CallbackTestData *data = opaque; + + /* The other thread holds the lock so the contention callback will be + * invoked... + */ + rfifolock_lock(&data->lock); + rfifolock_unlock(&data->lock); + return NULL; +} + +static void test_callback(void) +{ + CallbackTestData data; + QemuThread thread; + int ret; + char c; + + rfifolock_init(&data.lock, rfifolock_cb, &data); + ret = qemu_pipe(data.fd); + g_assert(ret == 0); + + /* Hold lock but allow the callback to kick us by writing to the pipe */ + rfifolock_lock(&data.lock); + qemu_thread_create(&thread, "callback_thread", + callback_thread, &data, QEMU_THREAD_JOINABLE); + ret = read(data.fd[0], &c, sizeof(c)); + g_assert(ret == 1); + rfifolock_unlock(&data.lock); + /* If we got here then the callback was invoked, as expected */ + + qemu_thread_join(&thread); + close(data.fd[0]); + close(data.fd[1]); + rfifolock_destroy(&data.lock); +} + +int main(int argc, char **argv) +{ + g_test_init(&argc, &argv, NULL); + g_test_add_func("/nesting", test_nesting); + g_test_add_func("/callback", test_callback); + return g_test_run(); +} diff --git a/util/Makefile.objs b/util/Makefile.objs index 937376b082..df83b629a0 100644 --- a/util/Makefile.objs +++ b/util/Makefile.objs @@ -14,3 +14,4 @@ util-obj-y += crc32c.o util-obj-y += throttle.o util-obj-y += getauxval.o util-obj-y += readline.o +util-obj-y += rfifolock.o diff --git a/util/rfifolock.c b/util/rfifolock.c new file mode 100644 index 0000000000..afbf7488df --- /dev/null +++ b/util/rfifolock.c @@ -0,0 +1,78 @@ +/* + * Recursive FIFO lock + * + * Copyright Red Hat, Inc. 2013 + * + * Authors: + * Stefan Hajnoczi + * + * This work is licensed under the terms of the GNU LGPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + * + */ + +#include +#include "qemu/rfifolock.h" + +void rfifolock_init(RFifoLock *r, void (*cb)(void *), void *opaque) +{ + qemu_mutex_init(&r->lock); + r->head = 0; + r->tail = 0; + qemu_cond_init(&r->cond); + r->nesting = 0; + r->cb = cb; + r->cb_opaque = opaque; +} + +void rfifolock_destroy(RFifoLock *r) +{ + qemu_cond_destroy(&r->cond); + qemu_mutex_destroy(&r->lock); +} + +/* + * Theory of operation: + * + * In order to ensure FIFO ordering, implement a ticketlock. Threads acquiring + * the lock enqueue themselves by incrementing the tail index. When the lock + * is unlocked, the head is incremented and waiting threads are notified. + * + * Recursive locking does not take a ticket since the head is only incremented + * when the outermost recursive caller unlocks. + */ +void rfifolock_lock(RFifoLock *r) +{ + qemu_mutex_lock(&r->lock); + + /* Take a ticket */ + unsigned int ticket = r->tail++; + + if (r->nesting > 0 && qemu_thread_is_self(&r->owner_thread)) { + r->tail--; /* put ticket back, we're nesting */ + } else { + while (ticket != r->head) { + /* Invoke optional contention callback */ + if (r->cb) { + r->cb(r->cb_opaque); + } + qemu_cond_wait(&r->cond, &r->lock); + } + } + + qemu_thread_get_self(&r->owner_thread); + r->nesting++; + qemu_mutex_unlock(&r->lock); +} + +void rfifolock_unlock(RFifoLock *r) +{ + qemu_mutex_lock(&r->lock); + assert(r->nesting > 0); + assert(qemu_thread_is_self(&r->owner_thread)); + if (--r->nesting == 0) { + r->head++; + qemu_cond_broadcast(&r->cond); + } + qemu_mutex_unlock(&r->lock); +} -- cgit v1.2.1 From 98563fc3ec44c1becce6f1720ad6b0a82ed101b4 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Mon, 3 Mar 2014 11:30:04 +0100 Subject: aio: add aio_context_acquire() and aio_context_release() It can be useful to run an AioContext from a thread which normally does not "own" the AioContext. For example, request draining can be implemented by acquiring the AioContext and looping aio_poll() until all requests have been completed. The following pattern should work: /* Event loop thread */ while (running) { aio_context_acquire(ctx); aio_poll(ctx, true); aio_context_release(ctx); } /* Another thread */ aio_context_acquire(ctx); bdrv_read(bs, 0x1000, buf, 1); aio_context_release(ctx); This patch implements aio_context_acquire() and aio_context_release(). Note that existing aio_poll() callers do not need to worry about acquiring and releasing - it is only needed when multiple threads will call aio_poll() on the same AioContext. Signed-off-by: Stefan Hajnoczi --- async.c | 18 ++++++++++++++++ include/block/aio.h | 18 ++++++++++++++++ tests/test-aio.c | 59 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 95 insertions(+) diff --git a/async.c b/async.c index 5fb3fa61df..6930185e64 100644 --- a/async.c +++ b/async.c @@ -214,6 +214,7 @@ aio_ctx_finalize(GSource *source) thread_pool_free(ctx->thread_pool); aio_set_event_notifier(ctx, &ctx->notifier, NULL); event_notifier_cleanup(&ctx->notifier); + rfifolock_destroy(&ctx->lock); qemu_mutex_destroy(&ctx->bh_lock); g_array_free(ctx->pollfds, TRUE); timerlistgroup_deinit(&ctx->tlg); @@ -250,6 +251,12 @@ static void aio_timerlist_notify(void *opaque) aio_notify(opaque); } +static void aio_rfifolock_cb(void *opaque) +{ + /* Kick owner thread in case they are blocked in aio_poll() */ + aio_notify(opaque); +} + AioContext *aio_context_new(void) { AioContext *ctx; @@ -257,6 +264,7 @@ AioContext *aio_context_new(void) ctx->pollfds = g_array_new(FALSE, FALSE, sizeof(GPollFD)); ctx->thread_pool = NULL; qemu_mutex_init(&ctx->bh_lock); + rfifolock_init(&ctx->lock, aio_rfifolock_cb, ctx); event_notifier_init(&ctx->notifier, false); aio_set_event_notifier(ctx, &ctx->notifier, (EventNotifierHandler *) @@ -275,3 +283,13 @@ void aio_context_unref(AioContext *ctx) { g_source_unref(&ctx->source); } + +void aio_context_acquire(AioContext *ctx) +{ + rfifolock_lock(&ctx->lock); +} + +void aio_context_release(AioContext *ctx) +{ + rfifolock_unlock(&ctx->lock); +} diff --git a/include/block/aio.h b/include/block/aio.h index 2efdf416cf..a92511bd3b 100644 --- a/include/block/aio.h +++ b/include/block/aio.h @@ -19,6 +19,7 @@ #include "qemu/queue.h" #include "qemu/event_notifier.h" #include "qemu/thread.h" +#include "qemu/rfifolock.h" #include "qemu/timer.h" typedef struct BlockDriverAIOCB BlockDriverAIOCB; @@ -47,6 +48,9 @@ typedef void IOHandler(void *opaque); struct AioContext { GSource source; + /* Protects all fields from multi-threaded access */ + RFifoLock lock; + /* The list of registered AIO handlers */ QLIST_HEAD(, AioHandler) aio_handlers; @@ -104,6 +108,20 @@ void aio_context_ref(AioContext *ctx); */ void aio_context_unref(AioContext *ctx); +/* Take ownership of the AioContext. If the AioContext will be shared between + * threads, a thread must have ownership when calling aio_poll(). + * + * Note that multiple threads calling aio_poll() means timers, BHs, and + * callbacks may be invoked from a different thread than they were registered + * from. Therefore, code must use AioContext acquire/release or use + * fine-grained synchronization to protect shared state if other threads will + * be accessing it simultaneously. + */ +void aio_context_acquire(AioContext *ctx); + +/* Relinquish ownership of the AioContext. */ +void aio_context_release(AioContext *ctx); + /** * aio_bh_new: Allocate a new bottom half structure. * diff --git a/tests/test-aio.c b/tests/test-aio.c index 592721ed3f..56f4288ca8 100644 --- a/tests/test-aio.c +++ b/tests/test-aio.c @@ -112,6 +112,64 @@ static void test_notify(void) g_assert(!aio_poll(ctx, false)); } +typedef struct { + QemuMutex start_lock; + bool thread_acquired; +} AcquireTestData; + +static void *test_acquire_thread(void *opaque) +{ + AcquireTestData *data = opaque; + + /* Wait for other thread to let us start */ + qemu_mutex_lock(&data->start_lock); + qemu_mutex_unlock(&data->start_lock); + + aio_context_acquire(ctx); + aio_context_release(ctx); + + data->thread_acquired = true; /* success, we got here */ + + return NULL; +} + +static void dummy_notifier_read(EventNotifier *unused) +{ + g_assert(false); /* should never be invoked */ +} + +static void test_acquire(void) +{ + QemuThread thread; + EventNotifier notifier; + AcquireTestData data; + + /* Dummy event notifier ensures aio_poll() will block */ + event_notifier_init(¬ifier, false); + aio_set_event_notifier(ctx, ¬ifier, dummy_notifier_read); + g_assert(!aio_poll(ctx, false)); /* consume aio_notify() */ + + qemu_mutex_init(&data.start_lock); + qemu_mutex_lock(&data.start_lock); + data.thread_acquired = false; + + qemu_thread_create(&thread, "test_acquire_thread", + test_acquire_thread, + &data, QEMU_THREAD_JOINABLE); + + /* Block in aio_poll(), let other thread kick us and acquire context */ + aio_context_acquire(ctx); + qemu_mutex_unlock(&data.start_lock); /* let the thread run */ + g_assert(!aio_poll(ctx, true)); + aio_context_release(ctx); + + qemu_thread_join(&thread); + aio_set_event_notifier(ctx, ¬ifier, NULL); + event_notifier_cleanup(¬ifier); + + g_assert(data.thread_acquired); +} + static void test_bh_schedule(void) { BHTestData data = { .n = 0 }; @@ -775,6 +833,7 @@ int main(int argc, char **argv) g_test_init(&argc, &argv, NULL); g_test_add_func("/aio/notify", test_notify); + g_test_add_func("/aio/acquire", test_acquire); g_test_add_func("/aio/bh/schedule", test_bh_schedule); g_test_add_func("/aio/bh/schedule10", test_bh_schedule10); g_test_add_func("/aio/bh/cancel", test_bh_cancel); -- cgit v1.2.1 From be8d8537668c9be7a8dee6aed94b2b3f9fcd4a9f Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Mon, 3 Mar 2014 11:30:05 +0100 Subject: iothread: add I/O thread object MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is a stand-in for Michael Roth's QContext. I expect this to be replaced once QContext is completed. The IOThread object is an AioContext event loop thread. This patch adds the concept of multiple event loop threads, allowing users to define them. When SMP guests run on SMP hosts it makes sense to instantiate multiple IOThreads. This spreads event loop processing across multiple cores. Note that additional patches are required to actually bind a device to an IOThread. [Andreas Färber pointed out that the embedded parent object instance should be called "parent_obj" and have a newline afterwards. This patch has been changed to reflect this. -- Stefan] Signed-off-by: Stefan Hajnoczi --- Makefile.objs | 1 + include/sysemu/iothread.h | 30 ++++++++++++ iothread.c | 120 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 151 insertions(+) create mode 100644 include/sysemu/iothread.h create mode 100644 iothread.c diff --git a/Makefile.objs b/Makefile.objs index 5cd3d816ff..a6e0e2aacc 100644 --- a/Makefile.objs +++ b/Makefile.objs @@ -39,6 +39,7 @@ libcacard-y += libcacard/vcardt.o ifeq ($(CONFIG_SOFTMMU),y) common-obj-y = blockdev.o blockdev-nbd.o block/ +common-obj-y += iothread.o common-obj-y += net/ common-obj-y += qdev-monitor.o device-hotplug.o common-obj-$(CONFIG_WIN32) += os-win32.o diff --git a/include/sysemu/iothread.h b/include/sysemu/iothread.h new file mode 100644 index 0000000000..a32214a647 --- /dev/null +++ b/include/sysemu/iothread.h @@ -0,0 +1,30 @@ +/* + * Event loop thread + * + * Copyright Red Hat Inc., 2013 + * + * Authors: + * Stefan Hajnoczi + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#ifndef IOTHREAD_H +#define IOTHREAD_H + +#include "block/aio.h" + +#define TYPE_IOTHREAD "iothread" + +typedef struct IOThread IOThread; + +#define IOTHREAD(obj) \ + OBJECT_CHECK(IOThread, obj, TYPE_IOTHREAD) + +IOThread *iothread_find(const char *id); +char *iothread_get_id(IOThread *iothread); +AioContext *iothread_get_aio_context(IOThread *iothread); + +#endif /* IOTHREAD_H */ diff --git a/iothread.c b/iothread.c new file mode 100644 index 0000000000..f263ee2744 --- /dev/null +++ b/iothread.c @@ -0,0 +1,120 @@ +/* + * Event loop thread + * + * Copyright Red Hat Inc., 2013 + * + * Authors: + * Stefan Hajnoczi + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#include "qom/object.h" +#include "qom/object_interfaces.h" +#include "qemu/module.h" +#include "qemu/thread.h" +#include "block/aio.h" +#include "sysemu/iothread.h" + +#define IOTHREADS_PATH "/objects" + +typedef ObjectClass IOThreadClass; +struct IOThread { + Object parent_obj; + + QemuThread thread; + AioContext *ctx; + bool stopping; +}; + +#define IOTHREAD_GET_CLASS(obj) \ + OBJECT_GET_CLASS(IOThreadClass, obj, TYPE_IOTHREAD) +#define IOTHREAD_CLASS(klass) \ + OBJECT_CLASS_CHECK(IOThreadClass, klass, TYPE_IOTHREAD) + +static void *iothread_run(void *opaque) +{ + IOThread *iothread = opaque; + + while (!iothread->stopping) { + aio_context_acquire(iothread->ctx); + while (!iothread->stopping && aio_poll(iothread->ctx, true)) { + /* Progress was made, keep going */ + } + aio_context_release(iothread->ctx); + } + return NULL; +} + +static void iothread_instance_finalize(Object *obj) +{ + IOThread *iothread = IOTHREAD(obj); + + iothread->stopping = true; + aio_notify(iothread->ctx); + qemu_thread_join(&iothread->thread); + aio_context_unref(iothread->ctx); +} + +static void iothread_complete(UserCreatable *obj, Error **errp) +{ + IOThread *iothread = IOTHREAD(obj); + + iothread->stopping = false; + iothread->ctx = aio_context_new(); + + /* This assumes we are called from a thread with useful CPU affinity for us + * to inherit. + */ + qemu_thread_create(&iothread->thread, "iothread", iothread_run, + iothread, QEMU_THREAD_JOINABLE); +} + +static void iothread_class_init(ObjectClass *klass, void *class_data) +{ + UserCreatableClass *ucc = USER_CREATABLE_CLASS(klass); + ucc->complete = iothread_complete; +} + +static const TypeInfo iothread_info = { + .name = TYPE_IOTHREAD, + .parent = TYPE_OBJECT, + .class_init = iothread_class_init, + .instance_size = sizeof(IOThread), + .instance_finalize = iothread_instance_finalize, + .interfaces = (InterfaceInfo[]) { + {TYPE_USER_CREATABLE}, + {} + }, +}; + +static void iothread_register_types(void) +{ + type_register_static(&iothread_info); +} + +type_init(iothread_register_types) + +IOThread *iothread_find(const char *id) +{ + Object *container = container_get(object_get_root(), IOTHREADS_PATH); + Object *child; + + child = object_property_get_link(container, id, NULL); + if (!child) { + return NULL; + } + return (IOThread *)object_dynamic_cast(child, TYPE_IOTHREAD); +} + +char *iothread_get_id(IOThread *iothread) +{ + return object_get_canonical_path_component(OBJECT(iothread)); +} + +AioContext *iothread_get_aio_context(IOThread *iothread) +{ + return iothread->ctx; +} -- cgit v1.2.1 From 7d1de46448d0d52183c397d76fbc86cb614ed21b Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Mon, 3 Mar 2014 11:30:06 +0100 Subject: qdev: make get_pointer() handle temporary strings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit get_pointer()'s print() callback might return a heap allocated string, to avoid adding dedicated get_pointer_foo for this case convert current print() callbacks to return temporary heap allocated string and make get_pointer() free it. Reviewed-by: Andreas Färber Signed-off-by: Igor Mammedov Signed-off-by: Stefan Hajnoczi --- hw/core/qdev-properties-system.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c index 5f5957ed8e..878c3bc049 100644 --- a/hw/core/qdev-properties-system.c +++ b/hw/core/qdev-properties-system.c @@ -20,15 +20,16 @@ #include "sysemu/char.h" static void get_pointer(Object *obj, Visitor *v, Property *prop, - const char *(*print)(void *ptr), + char *(*print)(void *ptr), const char *name, Error **errp) { DeviceState *dev = DEVICE(obj); void **ptr = qdev_get_prop_ptr(dev, prop); char *p; - p = (char *) (*ptr ? print(*ptr) : ""); + p = *ptr ? print(*ptr) : g_strdup(""); visit_type_str(v, &p, name, errp); + g_free(p); } static void set_pointer(Object *obj, Visitor *v, Property *prop, @@ -91,9 +92,9 @@ static void release_drive(Object *obj, const char *name, void *opaque) } } -static const char *print_drive(void *ptr) +static char *print_drive(void *ptr) { - return bdrv_get_device_name(ptr); + return g_strdup(bdrv_get_device_name(ptr)); } static void get_drive(Object *obj, Visitor *v, void *opaque, @@ -145,11 +146,12 @@ static void release_chr(Object *obj, const char *name, void *opaque) } -static const char *print_chr(void *ptr) +static char *print_chr(void *ptr) { CharDriverState *chr = ptr; + const char *val = chr->label ? chr->label : ""; - return chr->label ? chr->label : ""; + return g_strdup(val); } static void get_chr(Object *obj, Visitor *v, void *opaque, @@ -224,11 +226,12 @@ err: return ret; } -static const char *print_netdev(void *ptr) +static char *print_netdev(void *ptr) { NetClientState *netdev = ptr; + const char *val = netdev->name ? netdev->name : ""; - return netdev->name ? netdev->name : ""; + return g_strdup(val); } static void get_netdev(Object *obj, Visitor *v, void *opaque, -- cgit v1.2.1 From 6e4a876b433f78f72724f45ae3f9e26596da1b4d Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Mon, 3 Mar 2014 11:30:07 +0100 Subject: iothread: add "iothread" qdev property type Add a "iothread" qdev property type so devices can be hooked up to an IOThread from the comand-line: qemu -object iothread,id=iothread0 \ -device some-device,x-iothread=iothread0 Note that Paolo Bonzini has suggested using QOM links instead. This way the relationship between the objects is reflected in QOM. There are currently shortcomings of object_property_add_link() which prevent this use case. I will attempt to fix them and move to QOM links in a separate series. Signed-off-by: Stefan Hajnoczi --- hw/core/qdev-properties-system.c | 51 ++++++++++++++++++++++++++++++++++++++++ include/hw/qdev-properties.h | 3 +++ 2 files changed, 54 insertions(+) diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c index 878c3bc049..de835612f0 100644 --- a/hw/core/qdev-properties-system.c +++ b/hw/core/qdev-properties-system.c @@ -18,6 +18,7 @@ #include "net/hub.h" #include "qapi/visitor.h" #include "sysemu/char.h" +#include "sysemu/iothread.h" static void get_pointer(Object *obj, Visitor *v, Property *prop, char *(*print)(void *ptr), @@ -385,6 +386,56 @@ void qdev_set_nic_properties(DeviceState *dev, NICInfo *nd) nd->instantiated = 1; } +/* --- iothread --- */ + +static char *print_iothread(void *ptr) +{ + return iothread_get_id(ptr); +} + +static int parse_iothread(DeviceState *dev, const char *str, void **ptr) +{ + IOThread *iothread; + + iothread = iothread_find(str); + if (!iothread) { + return -ENOENT; + } + object_ref(OBJECT(iothread)); + *ptr = iothread; + return 0; +} + +static void get_iothread(Object *obj, struct Visitor *v, void *opaque, + const char *name, Error **errp) +{ + get_pointer(obj, v, opaque, print_iothread, name, errp); +} + +static void set_iothread(Object *obj, struct Visitor *v, void *opaque, + const char *name, Error **errp) +{ + set_pointer(obj, v, opaque, parse_iothread, name, errp); +} + +static void release_iothread(Object *obj, const char *name, void *opaque) +{ + DeviceState *dev = DEVICE(obj); + Property *prop = opaque; + IOThread **ptr = qdev_get_prop_ptr(dev, prop); + + if (*ptr) { + object_unref(OBJECT(*ptr)); + } +} + +PropertyInfo qdev_prop_iothread = { + .name = "iothread", + .get = get_iothread, + .set = set_iothread, + .release = release_iothread, +}; + static int qdev_add_one_global(QemuOpts *opts, void *opaque) { GlobalProperty *g; diff --git a/include/hw/qdev-properties.h b/include/hw/qdev-properties.h index 0c0babfa6a..3c000eea75 100644 --- a/include/hw/qdev-properties.h +++ b/include/hw/qdev-properties.h @@ -22,6 +22,7 @@ extern PropertyInfo qdev_prop_bios_chs_trans; extern PropertyInfo qdev_prop_drive; extern PropertyInfo qdev_prop_netdev; extern PropertyInfo qdev_prop_vlan; +extern PropertyInfo qdev_prop_iothread; extern PropertyInfo qdev_prop_pci_devfn; extern PropertyInfo qdev_prop_blocksize; extern PropertyInfo qdev_prop_pci_host_devaddr; @@ -142,6 +143,8 @@ extern PropertyInfo qdev_prop_arraylen; DEFINE_PROP(_n, _s, _f, qdev_prop_vlan, NICPeers) #define DEFINE_PROP_DRIVE(_n, _s, _f) \ DEFINE_PROP(_n, _s, _f, qdev_prop_drive, BlockDriverState *) +#define DEFINE_PROP_IOTHREAD(_n, _s, _f) \ + DEFINE_PROP(_n, _s, _f, qdev_prop_iothread, IOThread *) #define DEFINE_PROP_MACADDR(_n, _s, _f) \ DEFINE_PROP(_n, _s, _f, qdev_prop_macaddr, MACAddr) #define DEFINE_PROP_LOSTTICKPOLICY(_n, _s, _f, _d) \ -- cgit v1.2.1 From 48ff269272f18d2b8fa53cb08365df417588f585 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Mon, 3 Mar 2014 11:30:08 +0100 Subject: dataplane: replace internal thread with IOThread Today virtio-blk dataplane uses a 1:1 device-per-thread model. Now that IOThreads have been introduced we can generalize this to N:M devices per threads. This patch drops thread code from dataplane in favor of running inside an IOThread AioContext. As a bonus we solve the case where a guest keeps submitting I/O requests while dataplane is trying to stop. Previously the dataplane thread would continue to process requests until the request gave it a break. Now we can shut down in bounded time thanks to aio_context_acquire/release. Signed-off-by: Stefan Hajnoczi --- hw/block/dataplane/virtio-blk.c | 96 +++++++++++++++++++++++------------------ include/hw/virtio/virtio-blk.h | 8 +++- 2 files changed, 60 insertions(+), 44 deletions(-) diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c index d1c7ad4574..a5afc217c0 100644 --- a/hw/block/dataplane/virtio-blk.c +++ b/hw/block/dataplane/virtio-blk.c @@ -23,6 +23,7 @@ #include "virtio-blk.h" #include "block/aio.h" #include "hw/virtio/virtio-bus.h" +#include "monitor/monitor.h" /* for object_add() */ enum { SEG_MAX = 126, /* maximum number of I/O segments */ @@ -44,8 +45,6 @@ struct VirtIOBlockDataPlane { bool started; bool starting; bool stopping; - QEMUBH *start_bh; - QemuThread thread; VirtIOBlkConf *blk; int fd; /* image file descriptor */ @@ -59,12 +58,14 @@ struct VirtIOBlockDataPlane { * (because you don't own the file descriptor or handle; you just * use it). */ + IOThread *iothread; + bool internal_iothread; AioContext *ctx; EventNotifier io_notifier; /* Linux AIO completion */ EventNotifier host_notifier; /* doorbell */ IOQueue ioqueue; /* Linux AIO queue (should really be per - dataplane thread) */ + IOThread) */ VirtIOBlockRequest requests[REQ_MAX]; /* pool of requests, managed by the queue */ @@ -342,26 +343,7 @@ static void handle_io(EventNotifier *e) } } -static void *data_plane_thread(void *opaque) -{ - VirtIOBlockDataPlane *s = opaque; - - while (!s->stopping || s->num_reqs > 0) { - aio_poll(s->ctx, true); - } - return NULL; -} - -static void start_data_plane_bh(void *opaque) -{ - VirtIOBlockDataPlane *s = opaque; - - qemu_bh_delete(s->start_bh); - s->start_bh = NULL; - qemu_thread_create(&s->thread, "data_plane", data_plane_thread, - s, QEMU_THREAD_JOINABLE); -} - +/* Context: QEMU global mutex held */ void virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *blk, VirtIOBlockDataPlane **dataplane, Error **errp) @@ -408,12 +390,33 @@ void virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *blk, s->fd = fd; s->blk = blk; + if (blk->iothread) { + s->internal_iothread = false; + s->iothread = blk->iothread; + object_ref(OBJECT(s->iothread)); + } else { + /* Create per-device IOThread if none specified */ + Error *local_err = NULL; + + s->internal_iothread = true; + object_add(TYPE_IOTHREAD, vdev->name, NULL, NULL, &local_err); + if (error_is_set(&local_err)) { + error_propagate(errp, local_err); + g_free(s); + return; + } + s->iothread = iothread_find(vdev->name); + assert(s->iothread); + } + s->ctx = iothread_get_aio_context(s->iothread); + /* Prevent block operations that conflict with data plane thread */ bdrv_set_in_use(blk->conf.bs, 1); *dataplane = s; } +/* Context: QEMU global mutex held */ void virtio_blk_data_plane_destroy(VirtIOBlockDataPlane *s) { if (!s) { @@ -422,9 +425,14 @@ void virtio_blk_data_plane_destroy(VirtIOBlockDataPlane *s) virtio_blk_data_plane_stop(s); bdrv_set_in_use(s->blk->conf.bs, 0); + object_unref(OBJECT(s->iothread)); + if (s->internal_iothread) { + object_unparent(OBJECT(s->iothread)); + } g_free(s); } +/* Context: QEMU global mutex held */ void virtio_blk_data_plane_start(VirtIOBlockDataPlane *s) { BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(s->vdev))); @@ -448,8 +456,6 @@ void virtio_blk_data_plane_start(VirtIOBlockDataPlane *s) return; } - s->ctx = aio_context_new(); - /* Set up guest notifier (irq) */ if (k->set_guest_notifiers(qbus->parent, 1, true) != 0) { fprintf(stderr, "virtio-blk failed to set guest notifier, " @@ -464,7 +470,6 @@ void virtio_blk_data_plane_start(VirtIOBlockDataPlane *s) exit(1); } s->host_notifier = *virtio_queue_get_host_notifier(vq); - aio_set_event_notifier(s->ctx, &s->host_notifier, handle_notify); /* Set up ioqueue */ ioq_init(&s->ioqueue, s->fd, REQ_MAX); @@ -472,7 +477,6 @@ void virtio_blk_data_plane_start(VirtIOBlockDataPlane *s) ioq_put_iocb(&s->ioqueue, &s->requests[i].iocb); } s->io_notifier = *ioq_get_notifier(&s->ioqueue); - aio_set_event_notifier(s->ctx, &s->io_notifier, handle_io); s->starting = false; s->started = true; @@ -481,11 +485,14 @@ void virtio_blk_data_plane_start(VirtIOBlockDataPlane *s) /* Kick right away to begin processing requests already in vring */ event_notifier_set(virtio_queue_get_host_notifier(vq)); - /* Spawn thread in BH so it inherits iothread cpusets */ - s->start_bh = qemu_bh_new(start_data_plane_bh, s); - qemu_bh_schedule(s->start_bh); + /* Get this show started by hooking up our callbacks */ + aio_context_acquire(s->ctx); + aio_set_event_notifier(s->ctx, &s->host_notifier, handle_notify); + aio_set_event_notifier(s->ctx, &s->io_notifier, handle_io); + aio_context_release(s->ctx); } +/* Context: QEMU global mutex held */ void virtio_blk_data_plane_stop(VirtIOBlockDataPlane *s) { BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(s->vdev))); @@ -496,27 +503,32 @@ void virtio_blk_data_plane_stop(VirtIOBlockDataPlane *s) s->stopping = true; trace_virtio_blk_data_plane_stop(s); - /* Stop thread or cancel pending thread creation BH */ - if (s->start_bh) { - qemu_bh_delete(s->start_bh); - s->start_bh = NULL; - } else { - aio_notify(s->ctx); - qemu_thread_join(&s->thread); + aio_context_acquire(s->ctx); + + /* Stop notifications for new requests from guest */ + aio_set_event_notifier(s->ctx, &s->host_notifier, NULL); + + /* Complete pending requests */ + while (s->num_reqs > 0) { + aio_poll(s->ctx, true); } + /* Stop ioq callbacks (there are no pending requests left) */ aio_set_event_notifier(s->ctx, &s->io_notifier, NULL); - ioq_cleanup(&s->ioqueue); - aio_set_event_notifier(s->ctx, &s->host_notifier, NULL); - k->set_host_notifier(qbus->parent, 0, false); + aio_context_release(s->ctx); - aio_context_unref(s->ctx); + /* Sync vring state back to virtqueue so that non-dataplane request + * processing can continue when we disable the host notifier below. + */ + vring_teardown(&s->vring, s->vdev, 0); + + ioq_cleanup(&s->ioqueue); + k->set_host_notifier(qbus->parent, 0, false); /* Clean up guest notifier (irq) */ k->set_guest_notifiers(qbus->parent, 1, false); - vring_teardown(&s->vring, s->vdev, 0); s->started = false; s->stopping = false; } diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h index 41885da1a0..e4c41ff2ef 100644 --- a/include/hw/virtio/virtio-blk.h +++ b/include/hw/virtio/virtio-blk.h @@ -16,6 +16,7 @@ #include "hw/virtio/virtio.h" #include "hw/block/block.h" +#include "sysemu/iothread.h" #define TYPE_VIRTIO_BLK "virtio-blk-device" #define VIRTIO_BLK(obj) \ @@ -106,6 +107,7 @@ struct virtio_scsi_inhdr struct VirtIOBlkConf { BlockConf conf; + IOThread *iothread; char *serial; uint32_t scsi; uint32_t config_wce; @@ -140,13 +142,15 @@ typedef struct VirtIOBlock { DEFINE_BLOCK_CHS_PROPERTIES(_state, _field.conf), \ DEFINE_PROP_STRING("serial", _state, _field.serial), \ DEFINE_PROP_BIT("config-wce", _state, _field.config_wce, 0, true), \ - DEFINE_PROP_BIT("scsi", _state, _field.scsi, 0, true) + DEFINE_PROP_BIT("scsi", _state, _field.scsi, 0, true), \ + DEFINE_PROP_IOTHREAD("x-iothread", _state, _field.iothread) #else #define DEFINE_VIRTIO_BLK_PROPERTIES(_state, _field) \ DEFINE_BLOCK_PROPERTIES(_state, _field.conf), \ DEFINE_BLOCK_CHS_PROPERTIES(_state, _field.conf), \ DEFINE_PROP_STRING("serial", _state, _field.serial), \ - DEFINE_PROP_BIT("config-wce", _state, _field.config_wce, 0, true) + DEFINE_PROP_BIT("config-wce", _state, _field.config_wce, 0, true), \ + DEFINE_PROP_IOTHREAD("x-iothread", _state, _field.iothread) #endif /* __linux__ */ void virtio_blk_set_conf(DeviceState *dev, VirtIOBlkConf *blk); -- cgit v1.2.1 From 88eb7c29e4320597d2f246adf731f0aac97cfbcc Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Thu, 27 Feb 2014 11:48:41 +0100 Subject: iothread: stash thread ID away Keep the thread ID around so we can report it via QMP. There's only one problem: qemu_get_thread_id() (gettid() wrapper on Linux) must be called from the thread itself. There is no way to get the thread ID outside the thread. This patch uses a condvar to wait for iothread_run() to populate the thread_id inside the thread. Signed-off-by: Stefan Hajnoczi --- iothread.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/iothread.c b/iothread.c index f263ee2744..f90bbc339e 100644 --- a/iothread.c +++ b/iothread.c @@ -26,7 +26,10 @@ struct IOThread { QemuThread thread; AioContext *ctx; + QemuMutex init_done_lock; + QemuCond init_done_cond; /* is thread initialization done? */ bool stopping; + int thread_id; }; #define IOTHREAD_GET_CLASS(obj) \ @@ -38,6 +41,11 @@ static void *iothread_run(void *opaque) { IOThread *iothread = opaque; + qemu_mutex_lock(&iothread->init_done_lock); + iothread->thread_id = qemu_get_thread_id(); + qemu_cond_signal(&iothread->init_done_cond); + qemu_mutex_unlock(&iothread->init_done_lock); + while (!iothread->stopping) { aio_context_acquire(iothread->ctx); while (!iothread->stopping && aio_poll(iothread->ctx, true)) { @@ -55,6 +63,8 @@ static void iothread_instance_finalize(Object *obj) iothread->stopping = true; aio_notify(iothread->ctx); qemu_thread_join(&iothread->thread); + qemu_cond_destroy(&iothread->init_done_cond); + qemu_mutex_destroy(&iothread->init_done_lock); aio_context_unref(iothread->ctx); } @@ -64,12 +74,24 @@ static void iothread_complete(UserCreatable *obj, Error **errp) iothread->stopping = false; iothread->ctx = aio_context_new(); + iothread->thread_id = -1; + + qemu_mutex_init(&iothread->init_done_lock); + qemu_cond_init(&iothread->init_done_cond); /* This assumes we are called from a thread with useful CPU affinity for us * to inherit. */ qemu_thread_create(&iothread->thread, "iothread", iothread_run, iothread, QEMU_THREAD_JOINABLE); + + /* Wait for initialization to complete */ + qemu_mutex_lock(&iothread->init_done_lock); + while (iothread->thread_id == -1) { + qemu_cond_wait(&iothread->init_done_cond, + &iothread->init_done_lock); + } + qemu_mutex_unlock(&iothread->init_done_lock); } static void iothread_class_init(ObjectClass *klass, void *class_data) -- cgit v1.2.1 From dc3dd0d2bed6edf3b60041f31200c674348168e9 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Thu, 27 Feb 2014 11:48:42 +0100 Subject: qmp: add query-iothreads command The "query-iothreads" command returns a list of information about iothreads. See the patch for API documentation. Reviewed-by: Eric Blake Signed-off-by: Stefan Hajnoczi --- iothread.c | 36 ++++++++++++++++++++++++++++++++++++ qapi-schema.json | 29 +++++++++++++++++++++++++++++ qmp-commands.hx | 39 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 104 insertions(+) diff --git a/iothread.c b/iothread.c index f90bbc339e..cb5986b6c9 100644 --- a/iothread.c +++ b/iothread.c @@ -17,6 +17,7 @@ #include "qemu/thread.h" #include "block/aio.h" #include "sysemu/iothread.h" +#include "qmp-commands.h" #define IOTHREADS_PATH "/objects" @@ -140,3 +141,38 @@ AioContext *iothread_get_aio_context(IOThread *iothread) { return iothread->ctx; } + +static int query_one_iothread(Object *object, void *opaque) +{ + IOThreadInfoList ***prev = opaque; + IOThreadInfoList *elem; + IOThreadInfo *info; + IOThread *iothread; + + iothread = (IOThread *)object_dynamic_cast(object, TYPE_IOTHREAD); + if (!iothread) { + return 0; + } + + info = g_new0(IOThreadInfo, 1); + info->id = iothread_get_id(iothread); + info->thread_id = iothread->thread_id; + + elem = g_new0(IOThreadInfoList, 1); + elem->value = info; + elem->next = NULL; + + **prev = elem; + *prev = &elem->next; + return 0; +} + +IOThreadInfoList *qmp_query_iothreads(Error **errp) +{ + IOThreadInfoList *head = NULL; + IOThreadInfoList **prev = &head; + Object *container = container_get(object_get_root(), IOTHREADS_PATH); + + object_child_foreach(container, query_one_iothread, &prev); + return head; +} diff --git a/qapi-schema.json b/qapi-schema.json index f4f9439fe6..b68cd44ebd 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -882,6 +882,35 @@ ## { 'command': 'query-cpus', 'returns': ['CpuInfo'] } +## +# @IOThreadInfo: +# +# Information about an iothread +# +# @id: the identifier of the iothread +# +# @thread-id: ID of the underlying host thread +# +# Since: 2.0 +## +{ 'type': 'IOThreadInfo', + 'data': {'id': 'str', 'thread-id': 'int'} } + +## +# @query-iothreads: +# +# Returns a list of information about each iothread. +# +# Note this list excludes the QEMU main loop thread, which is not declared +# using the -object iothread command-line option. It is always the main thread +# of the process. +# +# Returns: a list of @IOThreadInfo for each iothread +# +# Since: 2.0 +## +{ 'command': 'query-iothreads', 'returns': ['IOThreadInfo'] } + ## # @BlockDeviceInfo: # diff --git a/qmp-commands.hx b/qmp-commands.hx index d982cd62b9..a22621fd44 100644 --- a/qmp-commands.hx +++ b/qmp-commands.hx @@ -2326,6 +2326,45 @@ EQMP .mhandler.cmd_new = qmp_marshal_input_query_cpus, }, +SQMP +query-iothreads +--------------- + +Returns a list of information about each iothread. + +Note this list excludes the QEMU main loop thread, which is not declared +using the -object iothread command-line option. It is always the main thread +of the process. + +Return a json-array. Each iothread is represented by a json-object, which contains: + +- "id": name of iothread (json-str) +- "thread-id": ID of the underlying host thread (json-int) + +Example: + +-> { "execute": "query-iothreads" } +<- { + "return":[ + { + "id":"iothread0", + "thread-id":3134 + }, + { + "id":"iothread1", + "thread-id":3135 + } + ] + } + +EQMP + + { + .name = "query-iothreads", + .args_type = "", + .mhandler.cmd_new = qmp_marshal_input_query_iothreads, + }, + SQMP query-pci --------- -- cgit v1.2.1 From d475e5acd2f4679d6ce458369ee658dbd60227e9 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Tue, 11 Mar 2014 17:42:41 +0100 Subject: qcow2: Keep option in qcow2_invalidate_cache() Instead of manually building a list of all options from BDRVQcowState values just reuse the options that were used to open the image. qcow2_open() won't fully use all of the options in the QDict, but that's okay. This fixes all of the driver-specific options in qcow2, except for lazy-refcounts, which was special cased before. Signed-off-by: Kevin Wolf Reviewed-by: Max Reitz Signed-off-by: Stefan Hajnoczi --- block/qcow2.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/block/qcow2.c b/block/qcow2.c index b5b1e8c1b9..ec23cc47e4 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -1178,11 +1178,8 @@ static void qcow2_invalidate_cache(BlockDriverState *bs) bdrv_invalidate_cache(bs->file); - options = qdict_new(); - qdict_put(options, QCOW2_OPT_LAZY_REFCOUNTS, - qbool_from_int(s->use_lazy_refcounts)); - memset(s, 0, sizeof(BDRVQcowState)); + options = qdict_clone_shallow(bs->options); qcow2_open(bs, options, flags, NULL); QDECREF(options); -- cgit v1.2.1 From 27eb6c097c132bf9fc49d73554b0160293b630cd Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Tue, 11 Mar 2014 15:15:03 +0100 Subject: qcow2: Don't write with BDRV_O_INCOMING qcow2_open() causes writes when repairing an image with the dirty flag set and when clearing autoclear flags. It shouldn't do this when another qemu instance is still actively working on this image file. One effect of the bug is that images may have a cleared dirty flag while the migration source host still has it in use with lazy refcounts enabled, so refcounts are not accurate and the dirty flag must remain set. Signed-off-by: Kevin Wolf Reviewed-by: Eric Blake Signed-off-by: Stefan Hajnoczi --- block/qcow2.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/block/qcow2.c b/block/qcow2.c index ec23cc47e4..945c9d6334 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -644,7 +644,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags, } /* Clear unknown autoclear feature bits */ - if (!bs->read_only && s->autoclear_features != 0) { + if (!bs->read_only && !(flags & BDRV_O_INCOMING) && s->autoclear_features) { s->autoclear_features = 0; ret = qcow2_update_header(bs); if (ret < 0) { @@ -657,7 +657,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags, qemu_co_mutex_init(&s->lock); /* Repair image if dirty */ - if (!(flags & BDRV_O_CHECK) && !bs->read_only && + if (!(flags & (BDRV_O_CHECK | BDRV_O_INCOMING)) && !bs->read_only && (s->incompatible_features & QCOW2_INCOMPAT_DIRTY)) { BdrvCheckResult result = {0}; @@ -1137,10 +1137,12 @@ static void qcow2_close(BlockDriverState *bs) /* else pre-write overlap checks in cache_destroy may crash */ s->l1_table = NULL; - qcow2_cache_flush(bs, s->l2_table_cache); - qcow2_cache_flush(bs, s->refcount_block_cache); + if (!(bs->open_flags & BDRV_O_INCOMING)) { + qcow2_cache_flush(bs, s->l2_table_cache); + qcow2_cache_flush(bs, s->refcount_block_cache); - qcow2_mark_clean(bs); + qcow2_mark_clean(bs); + } qcow2_cache_destroy(bs, s->l2_table_cache); qcow2_cache_destroy(bs, s->refcount_block_cache); -- cgit v1.2.1 From 9562f69cfdc55c7c1625bb88df1637fed182e68b Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Sat, 15 Feb 2014 18:03:21 +0100 Subject: block: Unlink temporary file MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If the image file cannot be opened and was created as a temporary file, it should be deleted; thus, in this case, we should jump to the "unlink_and_fail" label and not just to "fail". Reported-by: Benoît Canet Signed-off-by: Max Reitz Acked-by: Stefan Hajnoczi Signed-off-by: Stefan Hajnoczi --- block.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block.c b/block.c index cf5645a14a..fae50c95b5 100644 --- a/block.c +++ b/block.c @@ -1321,7 +1321,7 @@ int bdrv_open(BlockDriverState **pbs, const char *filename, bdrv_open_flags(bs, flags | BDRV_O_UNMAP) | BDRV_O_PROTOCOL, true, &local_err); if (ret < 0) { - goto fail; + goto unlink_and_fail; } /* Find the right image format driver */ -- cgit v1.2.1 From f988388025c230ef3293cc0c3820cb40e03adfbf Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Wed, 5 Mar 2014 22:23:00 +0100 Subject: qemu-io: Fix warnings from static code analysis Smatch complains about several global symbols which should be local. Add the missing 'static' attributes and move the 'extern' declaration of variable qemuio_misalign to qemu-io.h. This variable also changes the type from 'int' to 'bool' which better fits documents its use. Signed-off-by: Stefan Weil Acked-by: Stefan Hajnoczi Signed-off-by: Stefan Hajnoczi --- include/qemu-io.h | 2 ++ qemu-io-cmds.c | 2 +- qemu-io.c | 7 +++---- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/include/qemu-io.h b/include/qemu-io.h index 7e7c07c09b..5d6006f73b 100644 --- a/include/qemu-io.h +++ b/include/qemu-io.h @@ -38,6 +38,8 @@ typedef struct cmdinfo { helpfunc_t help; } cmdinfo_t; +extern bool qemuio_misalign; + bool qemuio_command(BlockDriverState *bs, const char *cmd); void qemuio_add_command(const cmdinfo_t *ci); diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c index f1de24c91c..fb1db53c6b 100644 --- a/qemu-io-cmds.c +++ b/qemu-io-cmds.c @@ -16,7 +16,7 @@ #define CMD_NOFILE_OK 0x01 -int qemuio_misalign; +bool qemuio_misalign; static cmdinfo_t *cmdtab; static int ncmds; diff --git a/qemu-io.c b/qemu-io.c index fc3860884c..2d119c28a6 100644 --- a/qemu-io.c +++ b/qemu-io.c @@ -24,10 +24,9 @@ #define CMD_NOFILE_OK 0x01 -char *progname; +static char *progname; -BlockDriverState *qemuio_bs; -extern int qemuio_misalign; +static BlockDriverState *qemuio_bs; /* qemu-io commands passed using -c */ static int ncmdline; @@ -408,7 +407,7 @@ int main(int argc, char **argv) readonly = 1; break; case 'm': - qemuio_misalign = 1; + qemuio_misalign = true; break; case 'g': growable = 1; -- cgit v1.2.1 From 7af803d4f80efb56c250f16409501994e60adf60 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Sat, 8 Mar 2014 00:39:41 +0100 Subject: block/raw-posix: bdrv_parse_filename() for hdev The "host_device" protocol driver should strip the "host_device:" prefix from filenames if present. Signed-off-by: Max Reitz Reviewed-by: Benoit Canet Signed-off-by: Stefan Hajnoczi --- block/raw-posix.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/block/raw-posix.c b/block/raw-posix.c index e6b4c1fe02..ab32ff99da 100644 --- a/block/raw-posix.c +++ b/block/raw-posix.c @@ -1561,6 +1561,15 @@ static int check_hdev_writable(BDRVRawState *s) return 0; } +static void hdev_parse_filename(const char *filename, QDict *options, + Error **errp) +{ + /* The prefix is optional, just as for "file". */ + strstart(filename, "host_device:", &filename); + + qdict_put_obj(options, "filename", QOBJECT(qstring_from_str(filename))); +} + static int hdev_open(BlockDriverState *bs, QDict *options, int flags, Error **errp) { @@ -1805,6 +1814,7 @@ static BlockDriver bdrv_host_device = { .instance_size = sizeof(BDRVRawState), .bdrv_needs_filename = true, .bdrv_probe_device = hdev_probe_device, + .bdrv_parse_filename = hdev_parse_filename, .bdrv_file_open = hdev_open, .bdrv_close = raw_close, .bdrv_reopen_prepare = raw_reopen_prepare, -- cgit v1.2.1 From d3f49845831d75fa2e2a5dab04cb78fb12f8a2bc Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Sat, 8 Mar 2014 00:39:42 +0100 Subject: block/raw-posix: bdrv_parse_filename() for floppy The "host_floppy" protocol driver should strip the "host_floppy:" prefix from filenames if present. Signed-off-by: Max Reitz Reviewed-by: Benoit Canet Signed-off-by: Stefan Hajnoczi --- block/raw-posix.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/block/raw-posix.c b/block/raw-posix.c index ab32ff99da..4b8c1839ce 100644 --- a/block/raw-posix.c +++ b/block/raw-posix.c @@ -1844,6 +1844,15 @@ static BlockDriver bdrv_host_device = { }; #ifdef __linux__ +static void floppy_parse_filename(const char *filename, QDict *options, + Error **errp) +{ + /* The prefix is optional, just as for "file". */ + strstart(filename, "host_floppy:", &filename); + + qdict_put_obj(options, "filename", QOBJECT(qstring_from_str(filename))); +} + static int floppy_open(BlockDriverState *bs, QDict *options, int flags, Error **errp) { @@ -1949,6 +1958,7 @@ static BlockDriver bdrv_host_floppy = { .instance_size = sizeof(BDRVRawState), .bdrv_needs_filename = true, .bdrv_probe_device = floppy_probe_device, + .bdrv_parse_filename = floppy_parse_filename, .bdrv_file_open = floppy_open, .bdrv_close = raw_close, .bdrv_reopen_prepare = raw_reopen_prepare, -- cgit v1.2.1 From 18fa1c42a314a3e18f097b28ec5189a144d36f99 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Sat, 8 Mar 2014 00:39:43 +0100 Subject: block/raw-posix: bdrv_parse_filename() for cdrom The "host_cdrom" protocol drivers should strip the "host_cdrom:" prefix from filenames if present. Signed-off-by: Max Reitz Reviewed-by: Benoit Canet Signed-off-by: Stefan Hajnoczi --- block/raw-posix.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/block/raw-posix.c b/block/raw-posix.c index 4b8c1839ce..697cd2ee68 100644 --- a/block/raw-posix.c +++ b/block/raw-posix.c @@ -1983,7 +1983,20 @@ static BlockDriver bdrv_host_floppy = { .bdrv_media_changed = floppy_media_changed, .bdrv_eject = floppy_eject, }; +#endif +#if defined(__linux__) || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) +static void cdrom_parse_filename(const char *filename, QDict *options, + Error **errp) +{ + /* The prefix is optional, just as for "file". */ + strstart(filename, "host_cdrom:", &filename); + + qdict_put_obj(options, "filename", QOBJECT(qstring_from_str(filename))); +} +#endif + +#ifdef __linux__ static int cdrom_open(BlockDriverState *bs, QDict *options, int flags, Error **errp) { @@ -2070,6 +2083,7 @@ static BlockDriver bdrv_host_cdrom = { .instance_size = sizeof(BDRVRawState), .bdrv_needs_filename = true, .bdrv_probe_device = cdrom_probe_device, + .bdrv_parse_filename = cdrom_parse_filename, .bdrv_file_open = cdrom_open, .bdrv_close = raw_close, .bdrv_reopen_prepare = raw_reopen_prepare, @@ -2200,6 +2214,7 @@ static BlockDriver bdrv_host_cdrom = { .instance_size = sizeof(BDRVRawState), .bdrv_needs_filename = true, .bdrv_probe_device = cdrom_probe_device, + .bdrv_parse_filename = cdrom_parse_filename, .bdrv_file_open = cdrom_open, .bdrv_close = raw_close, .bdrv_reopen_prepare = raw_reopen_prepare, -- cgit v1.2.1 From cc28c6aa46334be01f3e65a74601ae38f94bd602 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Sat, 8 Mar 2014 00:39:44 +0100 Subject: block/raw-posix: Strip protocol prefix on creation The hdev_create() implementation in block/raw-posix.c is used by the "host_device", "host_cdrom" and "host_floppy" protocol block drivers together. Thus, any of the associated prefixes may occur and exactly one should should be stripped, if it does (thus, "host_device:host_cdrom:/dev/cdrom" is not shortened to "/dev/cdrom"). Signed-off-by: Max Reitz Reviewed-by: Benoit Canet Signed-off-by: Stefan Hajnoczi --- block/raw-posix.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/block/raw-posix.c b/block/raw-posix.c index 697cd2ee68..1688e16c64 100644 --- a/block/raw-posix.c +++ b/block/raw-posix.c @@ -1776,6 +1776,18 @@ static int hdev_create(const char *filename, QEMUOptionParameter *options, int ret = 0; struct stat stat_buf; int64_t total_size = 0; + bool has_prefix; + + /* This function is used by all three protocol block drivers and therefore + * any of these three prefixes may be given. + * The return value has to be stored somewhere, otherwise this is an error + * due to -Werror=unused-value. */ + has_prefix = + strstart(filename, "host_device:", &filename) || + strstart(filename, "host_cdrom:" , &filename) || + strstart(filename, "host_floppy:", &filename); + + (void)has_prefix; /* Read out options */ while (options && options->name) { -- cgit v1.2.1 From 57ed25b1b08a43f29326df064d43b6420a23b5ba Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Sat, 8 Mar 2014 00:39:45 +0100 Subject: block/raw-win32: bdrv_parse_filename() for hdev The "host_device" protocol driver should strip the "host_device:" prefix from filenames if present. Signed-off-by: Max Reitz Reviewed-by: Benoit Canet Signed-off-by: Stefan Hajnoczi --- block/raw-win32.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/block/raw-win32.c b/block/raw-win32.c index 99547488e4..48cb2c2258 100644 --- a/block/raw-win32.c +++ b/block/raw-win32.c @@ -593,6 +593,15 @@ static int hdev_probe_device(const char *filename) return 0; } +static void hdev_parse_filename(const char *filename, QDict *options, + Error **errp) +{ + /* The prefix is optional, just as for "file". */ + strstart(filename, "host_device:", &filename); + + qdict_put_obj(options, "filename", QOBJECT(qstring_from_str(filename))); +} + static int hdev_open(BlockDriverState *bs, QDict *options, int flags, Error **errp) { @@ -663,6 +672,7 @@ static BlockDriver bdrv_host_device = { .protocol_name = "host_device", .instance_size = sizeof(BDRVRawState), .bdrv_needs_filename = true, + .bdrv_parse_filename = hdev_parse_filename, .bdrv_probe_device = hdev_probe_device, .bdrv_file_open = hdev_open, .bdrv_close = raw_close, -- cgit v1.2.1