From dad3946ec64164b1855e09991be0dfc4358298b4 Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Thu, 10 Aug 2017 20:57:48 -0500 Subject: nbd: Fix trace message for disconnect NBD_CMD_DISC is a disconnect request, not a data discard request. Signed-off-by: Eric Blake Message-Id: <20170811015749.20365-1-eblake@redhat.com> Reviewed-by: Stefan Hajnoczi --- nbd/common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nbd/common.c b/nbd/common.c index a2f28f2eec..e288d1b972 100644 --- a/nbd/common.c +++ b/nbd/common.c @@ -182,7 +182,7 @@ const char *nbd_cmd_lookup(uint16_t cmd) case NBD_CMD_WRITE: return "write"; case NBD_CMD_DISC: - return "discard"; + return "disconnect"; case NBD_CMD_FLUSH: return "flush"; case NBD_CMD_TRIM: -- cgit v1.2.1 From cbaddb25b20060fa0b0a2a46d5ccca65cffd1a6f Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Tue, 15 Aug 2017 14:05:02 +0100 Subject: qemu-iotests: step clock after each test iteration The 093 throttling test submits twice as many requests as the throttle limit in order to ensure that we reach the limit. The remaining requests are left in-flight at the end of each test iteration. Commit 452589b6b47e8dc6353df257fc803dfc1383bed8 ("vl.c/exit: pause cpus before closing block devices") exposed a hang in 093. This happens because requests are still in flight when QEMU terminates but QEMU_CLOCK_VIRTUAL time is frozen. bdrv_drain_all() hangs forever since throttled requests cannot complete. Step the clock at the end of each test iteration so in-flight requests actually finish. This solves the hang and is cleaner than leaving tests in-flight. Note that this could also be "fixed" by disabling throttling when drives are closed in QEMU. That approach has two issues: 1. We must drain requests before disabling throttling, so the hang cannot be easily avoided! 2. Any time QEMU disables throttling internally there is a chance that malicious users can abuse the code path to bypass throttling limits. Therefore it makes more sense to fix the test case than to modify QEMU. Signed-off-by: Stefan Hajnoczi Message-Id: <20170815130502.8736-1-stefanha@redhat.com> Signed-off-by: Eric Blake --- tests/qemu-iotests/093 | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/qemu-iotests/093 b/tests/qemu-iotests/093 index 2ed393a548..ef3997206b 100755 --- a/tests/qemu-iotests/093 +++ b/tests/qemu-iotests/093 @@ -133,6 +133,10 @@ class ThrottleTestCase(iotests.QMPTestCase): self.assertTrue(check_limit(params['iops_rd'], rd_iops)) self.assertTrue(check_limit(params['iops_wr'], wr_iops)) + # Allow remaining requests to finish. We submitted twice as many to + # ensure the throttle limit is reached. + self.vm.qtest("clock_step %d" % ns) + # Connect N drives to a VM and test I/O in all of them def test_all(self): params = {"bps": 4096, -- cgit v1.2.1 From 80adf54ecc3b456828f3d6fe71eeda5572369bb2 Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Tue, 15 Aug 2017 21:07:37 +0800 Subject: stubs: Add vm state change handler stubs They will be used by BlockBackend code in block-obj-y, which doesn't always get linked with common-obj-y. Add stubs to keep ld happy. Signed-off-by: Fam Zheng Message-Id: <20170815130740.31229-2-famz@redhat.com> Signed-off-by: Eric Blake --- stubs/Makefile.objs | 1 + stubs/change-state-handler.c | 14 ++++++++++++++ 2 files changed, 15 insertions(+) create mode 100644 stubs/change-state-handler.c diff --git a/stubs/Makefile.objs b/stubs/Makefile.objs index f5b47bfd74..e69c217aff 100644 --- a/stubs/Makefile.objs +++ b/stubs/Makefile.objs @@ -19,6 +19,7 @@ stub-obj-y += is-daemonized.o stub-obj-$(CONFIG_LINUX_AIO) += linux-aio.o stub-obj-y += machine-init-done.o stub-obj-y += migr-blocker.o +stub-obj-y += change-state-handler.o stub-obj-y += monitor.o stub-obj-y += notify-event.o stub-obj-y += qtest.o diff --git a/stubs/change-state-handler.c b/stubs/change-state-handler.c new file mode 100644 index 0000000000..01b1c6986d --- /dev/null +++ b/stubs/change-state-handler.c @@ -0,0 +1,14 @@ +#include "qemu/osdep.h" +#include "qemu-common.h" +#include "sysemu/sysemu.h" + +VMChangeStateEntry *qemu_add_vm_change_state_handler(VMChangeStateHandler *cb, + void *opaque) +{ + return NULL; +} + +void qemu_del_vm_change_state_handler(VMChangeStateEntry *e) +{ + /* Nothing to do. */ +} -- cgit v1.2.1 From 3dff24f2dffc5f3aa46dc014122012848bd7959d Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Tue, 15 Aug 2017 21:07:38 +0800 Subject: nbd: Fix order of bdrv_set_perm and bdrv_invalidate_cache The "inactive" state of BDS affects whether the permissions can be granted, we must call bdrv_invalidate_cache before bdrv_set_perm to support "-incoming defer" case. Reported-by: Christian Ehrhardt Signed-off-by: Fam Zheng Message-Id: <20170815130740.31229-3-famz@redhat.com> Signed-off-by: Eric Blake --- nbd/server.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/nbd/server.c b/nbd/server.c index 82a78bf439..993ade30bb 100644 --- a/nbd/server.c +++ b/nbd/server.c @@ -1045,11 +1045,22 @@ NBDExport *nbd_export_new(BlockDriverState *bs, off_t dev_offset, off_t size, bool writethrough, BlockBackend *on_eject_blk, Error **errp) { + AioContext *ctx; BlockBackend *blk; NBDExport *exp = g_malloc0(sizeof(NBDExport)); uint64_t perm; int ret; + /* + * NBD exports are used for non-shared storage migration. Make sure + * that BDRV_O_INACTIVE is cleared and the image is ready for write + * access since the export could be available before migration handover. + */ + ctx = bdrv_get_aio_context(bs); + aio_context_acquire(ctx); + bdrv_invalidate_cache(bs, NULL); + aio_context_release(ctx); + /* Don't allow resize while the NBD server is running, otherwise we don't * care what happens with the node. */ perm = BLK_PERM_CONSISTENT_READ; @@ -1087,15 +1098,6 @@ NBDExport *nbd_export_new(BlockDriverState *bs, off_t dev_offset, off_t size, exp->eject_notifier.notify = nbd_eject_notifier; blk_add_remove_bs_notifier(on_eject_blk, &exp->eject_notifier); } - - /* - * NBD exports are used for non-shared storage migration. Make sure - * that BDRV_O_INACTIVE is cleared and the image is ready for write - * access since the export could be available before migration handover. - */ - aio_context_acquire(exp->ctx); - blk_invalidate_cache(blk, NULL); - aio_context_release(exp->ctx); return exp; fail: -- cgit v1.2.1 From 5f7772c4d0cf32f4e779fcd5a69ae4dae24aeebf Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Tue, 15 Aug 2017 21:07:39 +0800 Subject: block-backend: Defer shared_perm tightening migration completion As in the case of nbd_export_new(), bdrv_invalidate_cache() can be called when migration is still in progress. In this case we are not ready to tighten the shared permissions fenced by blk->disable_perm. Defer to a VM state change handler. Signed-off-by: Fam Zheng Message-Id: <20170815130740.31229-4-famz@redhat.com> Signed-off-by: Eric Blake --- block/block-backend.c | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/block/block-backend.c b/block/block-backend.c index 968438c149..e9798e897d 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -20,6 +20,7 @@ #include "qapi-event.h" #include "qemu/id.h" #include "trace.h" +#include "migration/misc.h" /* Number of coroutines to reserve per attached device model */ #define COROUTINE_POOL_RESERVATION 64 @@ -68,6 +69,7 @@ struct BlockBackend { NotifierList remove_bs_notifiers, insert_bs_notifiers; int quiesce_counter; + VMChangeStateEntry *vmsh; }; typedef struct BlockBackendAIOCB { @@ -129,6 +131,23 @@ static const char *blk_root_get_name(BdrvChild *child) return blk_name(child->opaque); } +static void blk_vm_state_changed(void *opaque, int running, RunState state) +{ + Error *local_err = NULL; + BlockBackend *blk = opaque; + + if (state == RUN_STATE_INMIGRATE) { + return; + } + + qemu_del_vm_change_state_handler(blk->vmsh); + blk->vmsh = NULL; + blk_set_perm(blk, blk->perm, blk->shared_perm, &local_err); + if (local_err) { + error_report_err(local_err); + } +} + /* * Notifies the user of the BlockBackend that migration has completed. qdev * devices can tighten their permissions in response (specifically revoke @@ -147,6 +166,24 @@ static void blk_root_activate(BdrvChild *child, Error **errp) blk->disable_perm = false; + blk_set_perm(blk, blk->perm, BLK_PERM_ALL, &local_err); + if (local_err) { + error_propagate(errp, local_err); + blk->disable_perm = true; + return; + } + + if (runstate_check(RUN_STATE_INMIGRATE)) { + /* Activation can happen when migration process is still active, for + * example when nbd_server_add is called during non-shared storage + * migration. Defer the shared_perm update to migration completion. */ + if (!blk->vmsh) { + blk->vmsh = qemu_add_vm_change_state_handler(blk_vm_state_changed, + blk); + } + return; + } + blk_set_perm(blk, blk->perm, blk->shared_perm, &local_err); if (local_err) { error_propagate(errp, local_err); @@ -291,6 +328,10 @@ static void blk_delete(BlockBackend *blk) if (blk->root) { blk_remove_bs(blk); } + if (blk->vmsh) { + qemu_del_vm_change_state_handler(blk->vmsh); + blk->vmsh = NULL; + } assert(QLIST_EMPTY(&blk->remove_bs_notifiers.notifiers)); assert(QLIST_EMPTY(&blk->insert_bs_notifiers.notifiers)); QTAILQ_REMOVE(&block_backends, blk, link); -- cgit v1.2.1 From dd7fdaad654d7484b66410b4b002b14644396587 Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Tue, 15 Aug 2017 21:07:40 +0800 Subject: iotests: Add non-shared storage migration case 192 Signed-off-by: Fam Zheng Message-Id: <20170815130740.31229-5-famz@redhat.com> Tested-by: Eric Blake Signed-off-by: Eric Blake --- tests/qemu-iotests/192 | 63 ++++++++++++++++++++++++++++++++++++++++++++++ tests/qemu-iotests/192.out | 7 ++++++ tests/qemu-iotests/group | 1 + 3 files changed, 71 insertions(+) create mode 100755 tests/qemu-iotests/192 create mode 100644 tests/qemu-iotests/192.out diff --git a/tests/qemu-iotests/192 b/tests/qemu-iotests/192 new file mode 100755 index 0000000000..b50a2c0c8e --- /dev/null +++ b/tests/qemu-iotests/192 @@ -0,0 +1,63 @@ +#!/bin/bash +# +# Test NBD export with -incoming (non-shared storage migration use case from +# libvirt) +# +# Copyright (C) 2017 Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + +# creator +owner=famz@redhat.com + +seq=`basename $0` +echo "QA output created by $seq" + +here=`pwd` +status=1 # failure is the default! + +_cleanup() +{ + _cleanup_test_img +} +trap "_cleanup; exit \$status" 0 1 2 3 15 + +# get standard environment, filters and checks +. ./common.rc +. ./common.filter + +_supported_fmt generic +_supported_proto file +_supported_os Linux + +if [ "$QEMU_DEFAULT_MACHINE" != "pc" ]; then + _notrun "Requires a PC machine" +fi + +size=64M +_make_test_img $size + +{ +echo "nbd_server_start unix:$TEST_DIR/nbd" +echo "nbd_server_add -w drive0" +echo "q" +} | $QEMU -nodefaults -display none -monitor stdio \ + -drive format=$IMGFMT,file=$TEST_IMG,if=ide,id=drive0 \ + -incoming defer 2>&1 | _filter_testdir | _filter_qemu | _filter_hmp + +# success, all done +echo "*** done" +rm -f $seq.full +status=0 diff --git a/tests/qemu-iotests/192.out b/tests/qemu-iotests/192.out new file mode 100644 index 0000000000..1e0be4c4d7 --- /dev/null +++ b/tests/qemu-iotests/192.out @@ -0,0 +1,7 @@ +QA output created by 192 +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 +QEMU X.Y.Z monitor - type 'help' for more information +(qemu) nbd_server_start unix:TEST_DIR/nbd +(qemu) nbd_server_add -w drive0 +(qemu) q +*** done diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group index 1848077932..afbdc427ea 100644 --- a/tests/qemu-iotests/group +++ b/tests/qemu-iotests/group @@ -186,3 +186,4 @@ 188 rw auto quick 189 rw auto 190 rw auto quick +192 rw auto quick -- cgit v1.2.1 From 72b6ffc76653214b69a94a7b1643ff80df134486 Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Mon, 14 Aug 2017 16:34:26 -0500 Subject: nbd-client: Fix regression when server sends garbage When we switched NBD to use coroutines for qemu 2.9 (in particular, commit a12a712a), we introduced a regression: if a server sends us garbage (such as a corrupted magic number), we quit the read loop but do not stop sending further queued commands, resulting in the client hanging when it never reads the response to those additional commands. In qemu 2.8, we properly detected that the server is no longer reliable, and cancelled all existing pending commands with EIO, then tore down the socket so that all further command attempts get EPIPE. Restore the proper behavior of quitting (almost) all communication with a broken server: Once we know we are out of sync or otherwise can't trust the server, we must assume that any further incoming data is unreliable and therefore end all pending commands with EIO, and quit trying to send any further commands. As an exception, we still (try to) send NBD_CMD_DISC to let the server know we are going away (in part, because it is easier to do that than to further refactor nbd_teardown_connection, and in part because it is the only command where we do not have to wait for a reply). Based on a patch by Vladimir Sementsov-Ogievskiy. A malicious server can be created with the following hack, followed by setting NBD_SERVER_DEBUG to a non-zero value in the environment when running qemu-nbd: | --- a/nbd/server.c | +++ b/nbd/server.c | @@ -919,6 +919,17 @@ static int nbd_send_reply(QIOChannel *ioc, NBDReply *reply, Error **errp) | stl_be_p(buf + 4, reply->error); | stq_be_p(buf + 8, reply->handle); | | + static int debug; | + static int count; | + if (!count++) { | + const char *str = getenv("NBD_SERVER_DEBUG"); | + if (str) { | + debug = atoi(str); | + } | + } | + if (debug && !(count % debug)) { | + buf[0] = 0; | + } | return nbd_write(ioc, buf, sizeof(buf), errp); | } Reported-by: Vladimir Sementsov-Ogievskiy Signed-off-by: Eric Blake Message-Id: <20170814213426.24681-1-eblake@redhat.com> Reviewed-by: Stefan Hajnoczi --- block/nbd-client.c | 17 +++++++++++++---- block/nbd-client.h | 1 + 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/block/nbd-client.c b/block/nbd-client.c index 25dd28406b..422ecb4307 100644 --- a/block/nbd-client.c +++ b/block/nbd-client.c @@ -73,7 +73,7 @@ static coroutine_fn void nbd_read_reply_entry(void *opaque) int ret; Error *local_err = NULL; - for (;;) { + while (!s->quit) { assert(s->reply.handle == 0); ret = nbd_receive_reply(s->ioc, &s->reply, &local_err); if (ret < 0) { @@ -107,6 +107,9 @@ static coroutine_fn void nbd_read_reply_entry(void *opaque) qemu_coroutine_yield(); } + if (ret < 0) { + s->quit = true; + } nbd_recv_coroutines_enter_all(s); s->read_reply_co = NULL; } @@ -135,6 +138,10 @@ static int nbd_co_send_request(BlockDriverState *bs, assert(i < MAX_NBD_REQUESTS); request->handle = INDEX_TO_HANDLE(s, i); + if (s->quit) { + qemu_co_mutex_unlock(&s->send_mutex); + return -EIO; + } if (!s->ioc) { qemu_co_mutex_unlock(&s->send_mutex); return -EPIPE; @@ -143,7 +150,7 @@ static int nbd_co_send_request(BlockDriverState *bs, if (qiov) { qio_channel_set_cork(s->ioc, true); rc = nbd_send_request(s->ioc, request); - if (rc >= 0) { + if (rc >= 0 && !s->quit) { ret = nbd_rwv(s->ioc, qiov->iov, qiov->niov, request->len, false, NULL); if (ret != request->len) { @@ -154,6 +161,9 @@ static int nbd_co_send_request(BlockDriverState *bs, } else { rc = nbd_send_request(s->ioc, request); } + if (rc < 0) { + s->quit = true; + } qemu_co_mutex_unlock(&s->send_mutex); return rc; } @@ -168,8 +178,7 @@ static void nbd_co_receive_reply(NBDClientSession *s, /* Wait until we're woken up by nbd_read_reply_entry. */ qemu_coroutine_yield(); *reply = s->reply; - if (reply->handle != request->handle || - !s->ioc) { + if (reply->handle != request->handle || !s->ioc || s->quit) { reply->error = EIO; } else { if (qiov && reply->error == 0) { diff --git a/block/nbd-client.h b/block/nbd-client.h index df80771357..1935ffbcaa 100644 --- a/block/nbd-client.h +++ b/block/nbd-client.h @@ -29,6 +29,7 @@ typedef struct NBDClientSession { Coroutine *recv_coroutine[MAX_NBD_REQUESTS]; NBDReply reply; + bool quit; } NBDClientSession; NBDClientSession *nbd_get_client_session(BlockDriverState *bs); -- cgit v1.2.1