summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2017-11-21 17:05:49 +0000
committerPeter Maydell <peter.maydell@linaro.org>2017-11-21 17:05:49 +0000
commit64807cd77938885f681a9a18b5736e923ad50b7c (patch)
tree7785670f2bc440477c5727b73766685017f6feee
parentfc7dbc119e0852a70dc9fa68bb41a318e49e4cd6 (diff)
parentd975301dc8ae56fb3154348878e47a6211843c0b (diff)
downloadqemu-64807cd77938885f681a9a18b5736e923ad50b7c.tar.gz
Merge remote-tracking branch 'remotes/cody/tags/block-pull-request' into staging
# gpg: Signature made Tue 21 Nov 2017 17:01:33 GMT # gpg: using RSA key 0xBDBE7B27C0DE3057 # gpg: Good signature from "Jeffrey Cody <jcody@redhat.com>" # gpg: aka "Jeffrey Cody <jeff@codyprime.org>" # gpg: aka "Jeffrey Cody <codyprime@gmail.com>" # Primary key fingerprint: 9957 4B4D 3474 90E7 9D98 D624 BDBE 7B27 C0DE 3057 * remotes/cody/tags/block-pull-request: qemu-iotest: add test for blockjob coroutine race condition qemu-iotests: add option in common.qemu for mismatch only coroutine: abort if we try to schedule or enter a pending coroutine blockjob: do not allow coroutine double entry or entry-after-completion Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r--blockjob.c7
-rw-r--r--include/block/blockjob_int.h3
-rw-r--r--include/qemu/coroutine_int.h13
-rwxr-xr-xtests/qemu-iotests/20099
-rw-r--r--tests/qemu-iotests/200.out14
-rw-r--r--tests/qemu-iotests/common.qemu8
-rw-r--r--tests/qemu-iotests/group1
-rw-r--r--util/async.c13
-rw-r--r--util/qemu-coroutine-sleep.c12
-rw-r--r--util/qemu-coroutine.c14
10 files changed, 177 insertions, 7 deletions
diff --git a/blockjob.c b/blockjob.c
index 3a0c49137e..ff9a614531 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -797,11 +797,14 @@ void block_job_sleep_ns(BlockJob *job, QEMUClockType type, int64_t ns)
return;
}
- job->busy = false;
+ /* We need to leave job->busy set here, because when we have
+ * put a coroutine to 'sleep', we have scheduled it to run in
+ * the future. We cannot enter that same coroutine again before
+ * it wakes and runs, otherwise we risk double-entry or entry after
+ * completion. */
if (!block_job_should_pause(job)) {
co_aio_sleep_ns(blk_get_aio_context(job->blk), type, ns);
}
- job->busy = true;
block_job_pause_point(job);
}
diff --git a/include/block/blockjob_int.h b/include/block/blockjob_int.h
index f13ad05c0d..43f3be2965 100644
--- a/include/block/blockjob_int.h
+++ b/include/block/blockjob_int.h
@@ -143,7 +143,8 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver,
* @ns: How many nanoseconds to stop for.
*
* Put the job to sleep (assuming that it wasn't canceled) for @ns
- * nanoseconds. Canceling the job will interrupt the wait immediately.
+ * nanoseconds. Canceling the job will not interrupt the wait, so the
+ * cancel will not process until the coroutine wakes up.
*/
void block_job_sleep_ns(BlockJob *job, QEMUClockType type, int64_t ns);
diff --git a/include/qemu/coroutine_int.h b/include/qemu/coroutine_int.h
index cb98892bba..59e8406398 100644
--- a/include/qemu/coroutine_int.h
+++ b/include/qemu/coroutine_int.h
@@ -46,14 +46,21 @@ struct Coroutine {
size_t locks_held;
+ /* Only used when the coroutine has yielded. */
+ AioContext *ctx;
+
+ /* Used to catch and abort on illegal co-routine entry.
+ * Will contain the name of the function that had first
+ * scheduled the coroutine. */
+ const char *scheduled;
+
+ QSIMPLEQ_ENTRY(Coroutine) co_queue_next;
+
/* Coroutines that should be woken up when we yield or terminate.
* Only used when the coroutine is running.
*/
QSIMPLEQ_HEAD(, Coroutine) co_queue_wakeup;
- /* Only used when the coroutine has yielded. */
- AioContext *ctx;
- QSIMPLEQ_ENTRY(Coroutine) co_queue_next;
QSLIST_ENTRY(Coroutine) co_scheduled_next;
};
diff --git a/tests/qemu-iotests/200 b/tests/qemu-iotests/200
new file mode 100755
index 0000000000..d8787ddb46
--- /dev/null
+++ b/tests/qemu-iotests/200
@@ -0,0 +1,99 @@
+#!/bin/bash
+#
+# Block job co-routine race condition test.
+#
+# See: https://bugzilla.redhat.com/show_bug.cgi?id=1508708
+#
+# Copyright (C) 2017 Red Hat, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+
+# creator
+owner=jcody@redhat.com
+
+seq=`basename $0`
+echo "QA output created by $seq"
+
+here=`pwd`
+status=1 # failure is the default!
+
+_cleanup()
+{
+ _cleanup_qemu
+ rm -f "${TEST_IMG}" "${BACKING_IMG}"
+}
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+# get standard environment, filters and checks
+. ./common.rc
+. ./common.filter
+. ./common.qemu
+
+_supported_fmt qcow2 qed
+_supported_proto file
+_supported_os Linux
+
+BACKING_IMG="${TEST_DIR}/backing.img"
+TEST_IMG="${TEST_DIR}/test.img"
+
+${QEMU_IMG} create -f $IMGFMT "${BACKING_IMG}" 512M | _filter_img_create
+${QEMU_IMG} create -f $IMGFMT -F $IMGFMT "${TEST_IMG}" -b "${BACKING_IMG}" 512M | _filter_img_create
+
+${QEMU_IO} -c "write -P 0xa5 512 300M" "${BACKING_IMG}" | _filter_qemu_io
+
+echo
+echo === Starting QEMU VM ===
+echo
+qemu_comm_method="qmp"
+_launch_qemu -device pci-bridge,id=bridge1,chassis_nr=1,bus=pci.0 \
+ -object iothread,id=iothread0 \
+ -device virtio-scsi-pci,bus=bridge1,addr=0x1f,id=scsi0,iothread=iothread0 \
+ -drive file="${TEST_IMG}",media=disk,if=none,cache=none,id=drive_sysdisk,aio=native,format=$IMGFMT \
+ -device scsi-hd,drive=drive_sysdisk,bus=scsi0.0,id=sysdisk,bootindex=0
+h1=$QEMU_HANDLE
+
+_send_qemu_cmd $h1 "{ 'execute': 'qmp_capabilities' }" 'return'
+
+echo
+echo === Sending stream/cancel, checking for SIGSEGV only ===
+echo
+for (( i=1;i<500;i++ ))
+do
+ mismatch_only='y' qemu_error_no_exit='n' _send_qemu_cmd $h1 \
+ "{
+ 'execute': 'block-stream',
+ 'arguments': {
+ 'device': 'drive_sysdisk',
+ 'speed': 10000000,
+ 'on-error': 'report',
+ 'job-id': 'job-$i'
+ }
+ }
+ {
+ 'execute': 'block-job-cancel',
+ 'arguments': {
+ 'device': 'job-$i'
+ }
+ }" \
+ "{.*{.*}.*}" # should match all well-formed QMP responses
+done
+
+silent='y' _send_qemu_cmd $h1 "{ 'execute': 'quit' }" 'return'
+
+echo "$i iterations performed"
+
+echo "*** done"
+rm -f $seq.full
+status=0
diff --git a/tests/qemu-iotests/200.out b/tests/qemu-iotests/200.out
new file mode 100644
index 0000000000..af6a809e30
--- /dev/null
+++ b/tests/qemu-iotests/200.out
@@ -0,0 +1,14 @@
+QA output created by 200
+Formatting 'TEST_DIR/backing.img', fmt=IMGFMT size=536870912
+Formatting 'TEST_DIR/test.img', fmt=IMGFMT size=536870912 backing_file=TEST_DIR/backing.img backing_fmt=IMGFMT
+wrote 314572800/314572800 bytes at offset 512
+300 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+=== Starting QEMU VM ===
+
+{"return": {}}
+
+=== Sending stream/cancel, checking for SIGSEGV only ===
+
+500 iterations performed
+*** done
diff --git a/tests/qemu-iotests/common.qemu b/tests/qemu-iotests/common.qemu
index 7b3052dc79..85f66b852c 100644
--- a/tests/qemu-iotests/common.qemu
+++ b/tests/qemu-iotests/common.qemu
@@ -50,6 +50,8 @@ _in_fd=4
#
# If $silent is set to anything but an empty string, then
# response is not echoed out.
+# If $mismatch_only is set, only non-matching responses will
+# be echoed.
function _timed_wait_for()
{
local h=${1}
@@ -58,14 +60,18 @@ function _timed_wait_for()
QEMU_STATUS[$h]=0
while IFS= read -t ${QEMU_COMM_TIMEOUT} resp <&${QEMU_OUT[$h]}
do
- if [ -z "${silent}" ]; then
+ if [ -z "${silent}" ] && [ -z "${mismatch_only}" ]; then
echo "${resp}" | _filter_testdir | _filter_qemu \
| _filter_qemu_io | _filter_qmp | _filter_hmp
fi
grep -q "${*}" < <(echo "${resp}")
if [ $? -eq 0 ]; then
return
+ elif [ -z "${silent}" ] && [ -n "${mismatch_only}" ]; then
+ echo "${resp}" | _filter_testdir | _filter_qemu \
+ | _filter_qemu_io | _filter_qmp | _filter_hmp
fi
+
done
QEMU_STATUS[$h]=-1
if [ -z "${qemu_error_no_exit}" ]; then
diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group
index 1fad602152..3e688678dd 100644
--- a/tests/qemu-iotests/group
+++ b/tests/qemu-iotests/group
@@ -196,3 +196,4 @@
196 rw auto quick
197 rw auto quick
198 rw auto
+200 rw auto
diff --git a/util/async.c b/util/async.c
index 0e1bd8780a..4dd9d95a9e 100644
--- a/util/async.c
+++ b/util/async.c
@@ -388,6 +388,9 @@ static void co_schedule_bh_cb(void *opaque)
QSLIST_REMOVE_HEAD(&straight, co_scheduled_next);
trace_aio_co_schedule_bh_cb(ctx, co);
aio_context_acquire(ctx);
+
+ /* Protected by write barrier in qemu_aio_coroutine_enter */
+ atomic_set(&co->scheduled, NULL);
qemu_coroutine_enter(co);
aio_context_release(ctx);
}
@@ -438,6 +441,16 @@ fail:
void aio_co_schedule(AioContext *ctx, Coroutine *co)
{
trace_aio_co_schedule(ctx, co);
+ const char *scheduled = atomic_cmpxchg(&co->scheduled, NULL,
+ __func__);
+
+ if (scheduled) {
+ fprintf(stderr,
+ "%s: Co-routine was already scheduled in '%s'\n",
+ __func__, scheduled);
+ abort();
+ }
+
QSLIST_INSERT_HEAD_ATOMIC(&ctx->scheduled_coroutines,
co, co_scheduled_next);
qemu_bh_schedule(ctx->co_schedule_bh);
diff --git a/util/qemu-coroutine-sleep.c b/util/qemu-coroutine-sleep.c
index 9c5655041b..254349cdbb 100644
--- a/util/qemu-coroutine-sleep.c
+++ b/util/qemu-coroutine-sleep.c
@@ -13,6 +13,7 @@
#include "qemu/osdep.h"
#include "qemu/coroutine.h"
+#include "qemu/coroutine_int.h"
#include "qemu/timer.h"
#include "block/aio.h"
@@ -25,6 +26,8 @@ static void co_sleep_cb(void *opaque)
{
CoSleepCB *sleep_cb = opaque;
+ /* Write of schedule protected by barrier write in aio_co_schedule */
+ atomic_set(&sleep_cb->co->scheduled, NULL);
aio_co_wake(sleep_cb->co);
}
@@ -34,6 +37,15 @@ void coroutine_fn co_aio_sleep_ns(AioContext *ctx, QEMUClockType type,
CoSleepCB sleep_cb = {
.co = qemu_coroutine_self(),
};
+
+ const char *scheduled = atomic_cmpxchg(&sleep_cb.co->scheduled, NULL,
+ __func__);
+ if (scheduled) {
+ fprintf(stderr,
+ "%s: Co-routine was already scheduled in '%s'\n",
+ __func__, scheduled);
+ abort();
+ }
sleep_cb.ts = aio_timer_new(ctx, type, SCALE_NS, co_sleep_cb, &sleep_cb);
timer_mod(sleep_cb.ts, qemu_clock_get_ns(type) + ns);
qemu_coroutine_yield();
diff --git a/util/qemu-coroutine.c b/util/qemu-coroutine.c
index d6095c1d5a..9eff7fd450 100644
--- a/util/qemu-coroutine.c
+++ b/util/qemu-coroutine.c
@@ -107,8 +107,22 @@ void qemu_aio_coroutine_enter(AioContext *ctx, Coroutine *co)
Coroutine *self = qemu_coroutine_self();
CoroutineAction ret;
+ /* Cannot rely on the read barrier for co in aio_co_wake(), as there are
+ * callers outside of aio_co_wake() */
+ const char *scheduled = atomic_mb_read(&co->scheduled);
+
trace_qemu_aio_coroutine_enter(ctx, self, co, co->entry_arg);
+ /* if the Coroutine has already been scheduled, entering it again will
+ * cause us to enter it twice, potentially even after the coroutine has
+ * been deleted */
+ if (scheduled) {
+ fprintf(stderr,
+ "%s: Co-routine was already scheduled in '%s'\n",
+ __func__, scheduled);
+ abort();
+ }
+
if (co->caller) {
fprintf(stderr, "Co-routine re-entered recursively\n");
abort();