summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--block.c50
-rw-r--r--block/block-backend.c31
-rw-r--r--block/io.c25
-rw-r--r--block/raw-posix.c44
-rw-r--r--block/raw-win32.c56
-rw-r--r--blockdev.c112
-rwxr-xr-xconfigure19
-rw-r--r--cpus.c4
-rw-r--r--hw/i386/xen/xen_platform.c2
-rw-r--r--hw/ide/piix.c4
-rw-r--r--include/block/block.h2
-rw-r--r--include/qemu/coroutine_int.h2
-rw-r--r--include/sysemu/block-backend.h3
-rw-r--r--include/sysemu/os-posix.h27
-rw-r--r--qapi/block-core.json31
-rwxr-xr-xtests/qemu-iotests/0874
-rw-r--r--tests/qemu-iotests/087.out2
-rw-r--r--util/coroutine-sigaltstack.c25
-rw-r--r--util/coroutine-ucontext.c11
-rw-r--r--util/coroutine-win32.c2
-rw-r--r--util/oslib-posix.c77
21 files changed, 342 insertions, 191 deletions
diff --git a/block.c b/block.c
index 493ecf3137..bb1f1ec957 100644
--- a/block.c
+++ b/block.c
@@ -42,6 +42,7 @@
#include "qapi-event.h"
#include "qemu/cutils.h"
#include "qemu/id.h"
+#include "qapi/util.h"
#ifdef CONFIG_BSD
#include <sys/ioctl.h>
@@ -764,7 +765,7 @@ static void bdrv_inherited_options(int *child_flags, QDict *child_options,
/* Our block drivers take care to send flushes and respect unmap policy,
* so we can default to enable both on lower layers regardless of the
* corresponding parent options. */
- flags |= BDRV_O_UNMAP;
+ qdict_set_default_str(child_options, BDRV_OPT_DISCARD, "unmap");
/* Clear flags that only apply to the top layer */
flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ |
@@ -954,6 +955,16 @@ static QemuOptsList bdrv_runtime_opts = {
.type = QEMU_OPT_BOOL,
.help = "Node is opened in read-only mode",
},
+ {
+ .name = "detect-zeroes",
+ .type = QEMU_OPT_STRING,
+ .help = "try to optimize zero writes (off, on, unmap)",
+ },
+ {
+ .name = "discard",
+ .type = QEMU_OPT_STRING,
+ .help = "discard operation (ignore/off, unmap/on)",
+ },
{ /* end of list */ }
},
};
@@ -970,6 +981,8 @@ static int bdrv_open_common(BlockDriverState *bs, BdrvChild *file,
const char *filename;
const char *driver_name = NULL;
const char *node_name = NULL;
+ const char *discard;
+ const char *detect_zeroes;
QemuOpts *opts;
BlockDriver *drv;
Error *local_err = NULL;
@@ -1038,6 +1051,41 @@ static int bdrv_open_common(BlockDriverState *bs, BdrvChild *file,
}
}
+ discard = qemu_opt_get(opts, "discard");
+ if (discard != NULL) {
+ if (bdrv_parse_discard_flags(discard, &bs->open_flags) != 0) {
+ error_setg(errp, "Invalid discard option");
+ ret = -EINVAL;
+ goto fail_opts;
+ }
+ }
+
+ detect_zeroes = qemu_opt_get(opts, "detect-zeroes");
+ if (detect_zeroes) {
+ BlockdevDetectZeroesOptions value =
+ qapi_enum_parse(BlockdevDetectZeroesOptions_lookup,
+ detect_zeroes,
+ BLOCKDEV_DETECT_ZEROES_OPTIONS__MAX,
+ BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF,
+ &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ ret = -EINVAL;
+ goto fail_opts;
+ }
+
+ if (value == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP &&
+ !(bs->open_flags & BDRV_O_UNMAP))
+ {
+ error_setg(errp, "setting detect-zeroes to unmap is not allowed "
+ "without setting discard operation to unmap");
+ ret = -EINVAL;
+ goto fail_opts;
+ }
+
+ bs->detect_zeroes = value;
+ }
+
if (filename != NULL) {
pstrcpy(bs->filename, sizeof(bs->filename), filename);
} else {
diff --git a/block/block-backend.c b/block/block-backend.c
index 0bd19abdfb..639294b8e6 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -1592,13 +1592,12 @@ void blk_update_root_state(BlockBackend *blk)
}
/*
- * Applies the information in the root state to the given BlockDriverState. This
- * does not include the flags which have to be specified for bdrv_open(), use
- * blk_get_open_flags_from_root_state() to inquire them.
+ * Returns the detect-zeroes setting to be used for bdrv_open() of a
+ * BlockDriverState which is supposed to inherit the root state.
*/
-void blk_apply_root_state(BlockBackend *blk, BlockDriverState *bs)
+bool blk_get_detect_zeroes_from_root_state(BlockBackend *blk)
{
- bs->detect_zeroes = blk->root_state.detect_zeroes;
+ return blk->root_state.detect_zeroes;
}
/*
@@ -1640,28 +1639,6 @@ int blk_commit_all(void)
return 0;
}
-int blk_flush_all(void)
-{
- BlockBackend *blk = NULL;
- int result = 0;
-
- while ((blk = blk_all_next(blk)) != NULL) {
- AioContext *aio_context = blk_get_aio_context(blk);
- int ret;
-
- aio_context_acquire(aio_context);
- if (blk_is_inserted(blk)) {
- ret = blk_flush(blk);
- if (ret < 0 && !result) {
- result = ret;
- }
- }
- aio_context_release(aio_context);
- }
-
- return result;
-}
-
/* throttling disk I/O limits */
void blk_set_io_limits(BlockBackend *blk, ThrottleConfig *cfg)
diff --git a/block/io.c b/block/io.c
index fdf70807b0..57a2eeb512 100644
--- a/block/io.c
+++ b/block/io.c
@@ -1619,6 +1619,31 @@ int coroutine_fn bdrv_co_pwrite_zeroes(BdrvChild *child, int64_t offset,
BDRV_REQ_ZERO_WRITE | flags);
}
+/*
+ * Flush ALL BDSes regardless of if they are reachable via a BlkBackend or not.
+ */
+int bdrv_flush_all(void)
+{
+ BdrvNextIterator it;
+ BlockDriverState *bs = NULL;
+ int result = 0;
+
+ for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
+ AioContext *aio_context = bdrv_get_aio_context(bs);
+ int ret;
+
+ aio_context_acquire(aio_context);
+ ret = bdrv_flush(bs);
+ if (ret < 0 && !result) {
+ result = ret;
+ }
+ aio_context_release(aio_context);
+ }
+
+ return result;
+}
+
+
typedef struct BdrvCoGetBlockStatusData {
BlockDriverState *bs;
BlockDriverState *base;
diff --git a/block/raw-posix.c b/block/raw-posix.c
index 6ed7547392..166e9d1ad5 100644
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -143,6 +143,7 @@ typedef struct BDRVRawState {
bool has_discard:1;
bool has_write_zeroes:1;
bool discard_zeroes:1;
+ bool use_linux_aio:1;
bool has_fallocate;
bool needs_alignment;
} BDRVRawState;
@@ -367,18 +368,6 @@ static void raw_parse_flags(int bdrv_flags, int *open_flags)
}
}
-#ifdef CONFIG_LINUX_AIO
-static bool raw_use_aio(int bdrv_flags)
-{
- /*
- * Currently Linux do AIO only for files opened with O_DIRECT
- * specified so check NOCACHE flag too
- */
- return (bdrv_flags & (BDRV_O_NOCACHE|BDRV_O_NATIVE_AIO)) ==
- (BDRV_O_NOCACHE|BDRV_O_NATIVE_AIO);
-}
-#endif
-
static void raw_parse_filename(const char *filename, QDict *options,
Error **errp)
{
@@ -399,6 +388,11 @@ static QemuOptsList raw_runtime_opts = {
.type = QEMU_OPT_STRING,
.help = "File name of the image",
},
+ {
+ .name = "aio",
+ .type = QEMU_OPT_STRING,
+ .help = "host AIO implementation (threads, native)",
+ },
{ /* end of list */ }
},
};
@@ -410,6 +404,7 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
QemuOpts *opts;
Error *local_err = NULL;
const char *filename = NULL;
+ BlockdevAioOptions aio, aio_default;
int fd, ret;
struct stat st;
@@ -429,6 +424,18 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
goto fail;
}
+ aio_default = (bdrv_flags & BDRV_O_NATIVE_AIO)
+ ? BLOCKDEV_AIO_OPTIONS_NATIVE
+ : BLOCKDEV_AIO_OPTIONS_THREADS;
+ aio = qapi_enum_parse(BlockdevAioOptions_lookup, qemu_opt_get(opts, "aio"),
+ BLOCKDEV_AIO_OPTIONS__MAX, aio_default, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ ret = -EINVAL;
+ goto fail;
+ }
+ s->use_linux_aio = (aio == BLOCKDEV_AIO_OPTIONS_NATIVE);
+
s->open_flags = open_flags;
raw_parse_flags(bdrv_flags, &s->open_flags);
@@ -444,14 +451,15 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
s->fd = fd;
#ifdef CONFIG_LINUX_AIO
- if (!raw_use_aio(bdrv_flags) && (bdrv_flags & BDRV_O_NATIVE_AIO)) {
+ /* Currently Linux does AIO only for files opened with O_DIRECT */
+ if (s->use_linux_aio && !(s->open_flags & O_DIRECT)) {
error_setg(errp, "aio=native was specified, but it requires "
"cache.direct=on, which was not specified.");
ret = -EINVAL;
goto fail;
}
#else
- if (bdrv_flags & BDRV_O_NATIVE_AIO) {
+ if (s->use_linux_aio) {
error_setg(errp, "aio=native was specified, but is not supported "
"in this build.");
ret = -EINVAL;
@@ -1256,7 +1264,7 @@ static int coroutine_fn raw_co_prw(BlockDriverState *bs, uint64_t offset,
if (!bdrv_qiov_is_aligned(bs, qiov)) {
type |= QEMU_AIO_MISALIGNED;
#ifdef CONFIG_LINUX_AIO
- } else if (bs->open_flags & BDRV_O_NATIVE_AIO) {
+ } else if (s->use_linux_aio) {
LinuxAioState *aio = aio_get_linux_aio(bdrv_get_aio_context(bs));
assert(qiov->size == bytes);
return laio_co_submit(bs, aio, s->fd, offset, qiov, type);
@@ -1285,7 +1293,8 @@ static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, uint64_t offset,
static void raw_aio_plug(BlockDriverState *bs)
{
#ifdef CONFIG_LINUX_AIO
- if (bs->open_flags & BDRV_O_NATIVE_AIO) {
+ BDRVRawState *s = bs->opaque;
+ if (s->use_linux_aio) {
LinuxAioState *aio = aio_get_linux_aio(bdrv_get_aio_context(bs));
laio_io_plug(bs, aio);
}
@@ -1295,7 +1304,8 @@ static void raw_aio_plug(BlockDriverState *bs)
static void raw_aio_unplug(BlockDriverState *bs)
{
#ifdef CONFIG_LINUX_AIO
- if (bs->open_flags & BDRV_O_NATIVE_AIO) {
+ BDRVRawState *s = bs->opaque;
+ if (s->use_linux_aio) {
LinuxAioState *aio = aio_get_linux_aio(bdrv_get_aio_context(bs));
laio_io_unplug(bs, aio);
}
diff --git a/block/raw-win32.c b/block/raw-win32.c
index 56f45fea9e..734bb105bd 100644
--- a/block/raw-win32.c
+++ b/block/raw-win32.c
@@ -32,6 +32,7 @@
#include "block/thread-pool.h"
#include "qemu/iov.h"
#include "qapi/qmp/qstring.h"
+#include "qapi/util.h"
#include <windows.h>
#include <winioctl.h>
@@ -252,7 +253,8 @@ static void raw_probe_alignment(BlockDriverState *bs, Error **errp)
}
}
-static void raw_parse_flags(int flags, int *access_flags, DWORD *overlapped)
+static void raw_parse_flags(int flags, bool use_aio, int *access_flags,
+ DWORD *overlapped)
{
assert(access_flags != NULL);
assert(overlapped != NULL);
@@ -264,7 +266,7 @@ static void raw_parse_flags(int flags, int *access_flags, DWORD *overlapped)
}
*overlapped = FILE_ATTRIBUTE_NORMAL;
- if (flags & BDRV_O_NATIVE_AIO) {
+ if (use_aio) {
*overlapped |= FILE_FLAG_OVERLAPPED;
}
if (flags & BDRV_O_NOCACHE) {
@@ -292,10 +294,35 @@ static QemuOptsList raw_runtime_opts = {
.type = QEMU_OPT_STRING,
.help = "File name of the image",
},
+ {
+ .name = "aio",
+ .type = QEMU_OPT_STRING,
+ .help = "host AIO implementation (threads, native)",
+ },
{ /* end of list */ }
},
};
+static bool get_aio_option(QemuOpts *opts, int flags, Error **errp)
+{
+ BlockdevAioOptions aio, aio_default;
+
+ aio_default = (flags & BDRV_O_NATIVE_AIO) ? BLOCKDEV_AIO_OPTIONS_NATIVE
+ : BLOCKDEV_AIO_OPTIONS_THREADS;
+ aio = qapi_enum_parse(BlockdevAioOptions_lookup, qemu_opt_get(opts, "aio"),
+ BLOCKDEV_AIO_OPTIONS__MAX, aio_default, errp);
+
+ switch (aio) {
+ case BLOCKDEV_AIO_OPTIONS_NATIVE:
+ return true;
+ case BLOCKDEV_AIO_OPTIONS_THREADS:
+ return false;
+ default:
+ error_setg(errp, "Invalid AIO option");
+ }
+ return false;
+}
+
static int raw_open(BlockDriverState *bs, QDict *options, int flags,
Error **errp)
{
@@ -305,6 +332,7 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags,
QemuOpts *opts;
Error *local_err = NULL;
const char *filename;
+ bool use_aio;
int ret;
s->type = FTYPE_FILE;
@@ -319,7 +347,14 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags,
filename = qemu_opt_get(opts, "filename");
- raw_parse_flags(flags, &access_flags, &overlapped);
+ use_aio = get_aio_option(opts, flags, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ ret = -EINVAL;
+ goto fail;
+ }
+
+ raw_parse_flags(flags, use_aio, &access_flags, &overlapped);
if (filename[0] && filename[1] == ':') {
snprintf(s->drive_path, sizeof(s->drive_path), "%c:\\", filename[0]);
@@ -346,7 +381,7 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags,
goto fail;
}
- if (flags & BDRV_O_NATIVE_AIO) {
+ if (use_aio) {
s->aio = win32_aio_init();
if (s->aio == NULL) {
CloseHandle(s->hfile);
@@ -647,6 +682,7 @@ static int hdev_open(BlockDriverState *bs, QDict *options, int flags,
Error *local_err = NULL;
const char *filename;
+ bool use_aio;
QemuOpts *opts = qemu_opts_create(&raw_runtime_opts, NULL, 0,
&error_abort);
@@ -659,6 +695,16 @@ static int hdev_open(BlockDriverState *bs, QDict *options, int flags,
filename = qemu_opt_get(opts, "filename");
+ use_aio = get_aio_option(opts, flags, &local_err);
+ if (!local_err && use_aio) {
+ error_setg(&local_err, "AIO is not supported on Windows host devices");
+ }
+ if (local_err) {
+ error_propagate(errp, local_err);
+ ret = -EINVAL;
+ goto done;
+ }
+
if (strstart(filename, "/dev/cdrom", NULL)) {
if (find_cdrom(device_name, sizeof(device_name)) < 0) {
error_setg(errp, "Could not open CD-ROM drive");
@@ -677,7 +723,7 @@ static int hdev_open(BlockDriverState *bs, QDict *options, int flags,
}
s->type = find_device_type(bs, filename);
- raw_parse_flags(flags, &access_flags, &overlapped);
+ raw_parse_flags(flags, use_aio, &access_flags, &overlapped);
create_flags = OPEN_EXISTING;
diff --git a/blockdev.c b/blockdev.c
index 29c6561fd8..07ec733905 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -356,7 +356,6 @@ static void extract_common_blockdev_options(QemuOpts *opts, int *bdrv_flags,
const char **throttling_group, ThrottleConfig *throttle_cfg,
BlockdevDetectZeroesOptions *detect_zeroes, Error **errp)
{
- const char *discard;
Error *local_error = NULL;
const char *aio;
@@ -365,13 +364,6 @@ static void extract_common_blockdev_options(QemuOpts *opts, int *bdrv_flags,
*bdrv_flags |= BDRV_O_COPY_ON_READ;
}
- if ((discard = qemu_opt_get(opts, "discard")) != NULL) {
- if (bdrv_parse_discard_flags(discard, bdrv_flags) != 0) {
- error_setg(errp, "Invalid discard option");
- return;
- }
- }
-
if ((aio = qemu_opt_get(opts, "aio")) != NULL) {
if (!strcmp(aio, "native")) {
*bdrv_flags |= BDRV_O_NATIVE_AIO;
@@ -449,15 +441,6 @@ static void extract_common_blockdev_options(QemuOpts *opts, int *bdrv_flags,
error_propagate(errp, local_error);
return;
}
-
- if (bdrv_flags &&
- *detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP &&
- !(*bdrv_flags & BDRV_O_UNMAP))
- {
- error_setg(errp, "setting detect-zeroes to unmap is not allowed "
- "without setting discard operation to unmap");
- return;
- }
}
}
@@ -650,35 +633,11 @@ err_no_opts:
return NULL;
}
-static QemuOptsList qemu_root_bds_opts;
-
/* Takes the ownership of bs_opts */
static BlockDriverState *bds_tree_init(QDict *bs_opts, Error **errp)
{
- BlockDriverState *bs;
- QemuOpts *opts;
- Error *local_error = NULL;
- BlockdevDetectZeroesOptions detect_zeroes;
int bdrv_flags = 0;
- opts = qemu_opts_create(&qemu_root_bds_opts, NULL, 1, errp);
- if (!opts) {
- goto fail;
- }
-
- qemu_opts_absorb_qdict(opts, bs_opts, &local_error);
- if (local_error) {
- error_propagate(errp, local_error);
- goto fail;
- }
-
- extract_common_blockdev_options(opts, &bdrv_flags, NULL, NULL,
- &detect_zeroes, &local_error);
- if (local_error) {
- error_propagate(errp, local_error);
- goto fail;
- }
-
/* bdrv_open() defaults to the values in bdrv_flags (for compatibility
* with other callers) rather than what we want as the real defaults.
* Apply the defaults here instead. */
@@ -690,21 +649,7 @@ static BlockDriverState *bds_tree_init(QDict *bs_opts, Error **errp)
bdrv_flags |= BDRV_O_INACTIVE;
}
- bs = bdrv_open(NULL, NULL, bs_opts, bdrv_flags, errp);
- if (!bs) {
- goto fail_no_bs_opts;
- }
-
- bs->detect_zeroes = detect_zeroes;
-
-fail_no_bs_opts:
- qemu_opts_del(opts);
- return bs;
-
-fail:
- qemu_opts_del(opts);
- QDECREF(bs_opts);
- return NULL;
+ return bdrv_open(NULL, NULL, bs_opts, bdrv_flags, errp);
}
void blockdev_close_all_bdrv_states(void)
@@ -2549,6 +2494,7 @@ void qmp_blockdev_change_medium(bool has_device, const char *device,
BlockBackend *blk;
BlockDriverState *medium_bs = NULL;
int bdrv_flags;
+ bool detect_zeroes;
int rc;
QDict *options = NULL;
Error *err = NULL;
@@ -2588,8 +2534,12 @@ void qmp_blockdev_change_medium(bool has_device, const char *device,
abort();
}
+ options = qdict_new();
+ detect_zeroes = blk_get_detect_zeroes_from_root_state(blk);
+ qdict_put(options, "detect-zeroes",
+ qstring_from_str(detect_zeroes ? "on" : "off"));
+
if (has_format) {
- options = qdict_new();
qdict_put(options, "driver", qstring_from_str(format));
}
@@ -2614,7 +2564,7 @@ void qmp_blockdev_change_medium(bool has_device, const char *device,
error_free(err);
err = NULL;
- qmp_x_blockdev_remove_medium(has_device, device, has_id, id, errp);
+ qmp_x_blockdev_remove_medium(has_device, device, has_id, id, &err);
if (err) {
error_propagate(errp, err);
goto fail;
@@ -2626,8 +2576,6 @@ void qmp_blockdev_change_medium(bool has_device, const char *device,
goto fail;
}
- blk_apply_root_state(blk, medium_bs);
-
qmp_blockdev_close_tray(has_device, device, has_id, id, errp);
fail:
@@ -3832,21 +3780,6 @@ void qmp_blockdev_add(BlockdevOptions *options, Error **errp)
QDict *qdict;
Error *local_err = NULL;
- /* TODO Sort it out in raw-posix and drive_new(): Reject aio=native with
- * cache.direct=false instead of silently switching to aio=threads, except
- * when called from drive_new().
- *
- * For now, simply forbidding the combination for all drivers will do. */
- if (options->has_aio && options->aio == BLOCKDEV_AIO_OPTIONS_NATIVE) {
- bool direct = options->has_cache &&
- options->cache->has_direct &&
- options->cache->direct;
- if (!direct) {
- error_setg(errp, "aio=native requires cache.direct=true");
- goto fail;
- }
- }
-
visit_type_BlockdevOptions(v, NULL, &options, &local_err);
if (local_err) {
error_propagate(errp, local_err);
@@ -4005,10 +3938,6 @@ QemuOptsList qemu_common_drive_opts = {
.type = QEMU_OPT_BOOL,
.help = "enable/disable snapshot mode",
},{
- .name = "discard",
- .type = QEMU_OPT_STRING,
- .help = "discard operation (ignore/off, unmap/on)",
- },{
.name = "aio",
.type = QEMU_OPT_STRING,
.help = "host AIO implementation (threads, native)",
@@ -4135,31 +4064,6 @@ QemuOptsList qemu_common_drive_opts = {
},
};
-static QemuOptsList qemu_root_bds_opts = {
- .name = "root-bds",
- .head = QTAILQ_HEAD_INITIALIZER(qemu_root_bds_opts.head),
- .desc = {
- {
- .name = "discard",
- .type = QEMU_OPT_STRING,
- .help = "discard operation (ignore/off, unmap/on)",
- },{
- .name = "aio",
- .type = QEMU_OPT_STRING,
- .help = "host AIO implementation (threads, native)",
- },{
- .name = "copy-on-read",
- .type = QEMU_OPT_BOOL,
- .help = "copy read data from backing file into image file",
- },{
- .name = "detect-zeroes",
- .type = QEMU_OPT_STRING,
- .help = "try to optimize zero writes (off, on, unmap)",
- },
- { /* end of list */ }
- },
-};
-
QemuOptsList qemu_drive_opts = {
.name = "drive",
.head = QTAILQ_HEAD_INITIALIZER(qemu_drive_opts.head),
diff --git a/configure b/configure
index df4a247fc2..5751d8ecaa 100755
--- a/configure
+++ b/configure
@@ -296,6 +296,7 @@ libiscsi=""
libnfs=""
coroutine=""
coroutine_pool=""
+debug_stack_usage="no"
seccomp=""
glusterfs=""
glusterfs_xlator_opt="no"
@@ -1004,6 +1005,8 @@ for opt do
;;
--enable-coroutine-pool) coroutine_pool="yes"
;;
+ --enable-debug-stack-usage) debug_stack_usage="yes"
+ ;;
--disable-docs) docs="no"
;;
--enable-docs) docs="yes"
@@ -4331,6 +4334,17 @@ if test "$coroutine" = "gthread" -a "$coroutine_pool" = "yes"; then
error_exit "'gthread' coroutine backend does not support pool (use --disable-coroutine-pool)"
fi
+if test "$debug_stack_usage" = "yes"; then
+ if test "$cpu" = "ia64" -o "$cpu" = "hppa"; then
+ error_exit "stack usage debugging is not supported for $cpu"
+ fi
+ if test "$coroutine_pool" = "yes"; then
+ echo "WARN: disabling coroutine pool for stack usage debugging"
+ coroutine_pool=no
+ fi
+fi
+
+
##########################################
# check if we have open_by_handle_at
@@ -4916,6 +4930,7 @@ echo "QGA MSI support $guest_agent_msi"
echo "seccomp support $seccomp"
echo "coroutine backend $coroutine"
echo "coroutine pool $coroutine_pool"
+echo "debug stack usage $debug_stack_usage"
echo "GlusterFS support $glusterfs"
echo "Archipelago support $archipelago"
echo "gcov $gcov_tool"
@@ -5384,6 +5399,10 @@ else
echo "CONFIG_COROUTINE_POOL=0" >> $config_host_mak
fi
+if test "$debug_stack_usage" = "yes" ; then
+ echo "CONFIG_DEBUG_STACK_USAGE=y" >> $config_host_mak
+fi
+
if test "$open_by_handle_at" = "yes" ; then
echo "CONFIG_OPEN_BY_HANDLE=y" >> $config_host_mak
fi
diff --git a/cpus.c b/cpus.c
index b2fbe33304..31204bb4b3 100644
--- a/cpus.c
+++ b/cpus.c
@@ -751,7 +751,7 @@ static int do_vm_stop(RunState state)
bdrv_drain_all();
replay_disable_events();
- ret = blk_flush_all();
+ ret = bdrv_flush_all();
return ret;
}
@@ -1408,7 +1408,7 @@ int vm_stop_force_state(RunState state)
bdrv_drain_all();
/* Make sure to return an error if the flush in a previous vm_stop()
* failed. */
- return blk_flush_all();
+ return bdrv_flush_all();
}
}
diff --git a/hw/i386/xen/xen_platform.c b/hw/i386/xen/xen_platform.c
index aa7839324c..f85635cc9a 100644
--- a/hw/i386/xen/xen_platform.c
+++ b/hw/i386/xen/xen_platform.c
@@ -134,8 +134,6 @@ static void platform_fixed_ioport_writew(void *opaque, uint32_t addr, uint32_t v
devices, and bit 2 the non-primary-master IDE devices. */
if (val & UNPLUG_ALL_IDE_DISKS) {
DPRINTF("unplug disks\n");
- blk_drain_all();
- blk_flush_all();
pci_unplug_disks(pci_dev->bus);
}
if (val & UNPLUG_ALL_NICS) {
diff --git a/hw/ide/piix.c b/hw/ide/piix.c
index c190fcaa3c..d5777fd0b3 100644
--- a/hw/ide/piix.c
+++ b/hw/ide/piix.c
@@ -179,6 +179,10 @@ int pci_piix3_xen_ide_unplug(DeviceState *dev)
if (di != NULL && !di->media_cd) {
BlockBackend *blk = blk_by_legacy_dinfo(di);
DeviceState *ds = blk_get_attached_dev(blk);
+
+ blk_drain(blk);
+ blk_flush(blk);
+
if (ds) {
blk_detach_dev(blk, ds);
}
diff --git a/include/block/block.h b/include/block/block.h
index e18233afe0..107c603605 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -108,6 +108,7 @@ typedef struct HDGeometry {
#define BDRV_OPT_CACHE_DIRECT "cache.direct"
#define BDRV_OPT_CACHE_NO_FLUSH "cache.no-flush"
#define BDRV_OPT_READ_ONLY "read-only"
+#define BDRV_OPT_DISCARD "discard"
#define BDRV_SECTOR_BITS 9
@@ -333,6 +334,7 @@ int bdrv_inactivate_all(void);
/* Ensure contents are flushed to disk. */
int bdrv_flush(BlockDriverState *bs);
int coroutine_fn bdrv_co_flush(BlockDriverState *bs);
+int bdrv_flush_all(void);
void bdrv_close_all(void);
void bdrv_drain(BlockDriverState *bs);
void coroutine_fn bdrv_co_drain(BlockDriverState *bs);
diff --git a/include/qemu/coroutine_int.h b/include/qemu/coroutine_int.h
index 6df9d33352..14d4f1d1f2 100644
--- a/include/qemu/coroutine_int.h
+++ b/include/qemu/coroutine_int.h
@@ -28,6 +28,8 @@
#include "qemu/queue.h"
#include "qemu/coroutine.h"
+#define COROUTINE_STACK_SIZE (1 << 20)
+
typedef enum {
COROUTINE_YIELD = 1,
COROUTINE_TERMINATE = 2,
diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h
index 3b29317349..a7993afcda 100644
--- a/include/sysemu/block-backend.h
+++ b/include/sysemu/block-backend.h
@@ -152,7 +152,6 @@ BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf,
int blk_co_pdiscard(BlockBackend *blk, int64_t offset, int count);
int blk_co_flush(BlockBackend *blk);
int blk_flush(BlockBackend *blk);
-int blk_flush_all(void);
int blk_commit_all(void);
void blk_drain(BlockBackend *blk);
void blk_drain_all(void);
@@ -199,7 +198,7 @@ void blk_io_unplug(BlockBackend *blk);
BlockAcctStats *blk_get_stats(BlockBackend *blk);
BlockBackendRootState *blk_get_root_state(BlockBackend *blk);
void blk_update_root_state(BlockBackend *blk);
-void blk_apply_root_state(BlockBackend *blk, BlockDriverState *bs);
+bool blk_get_detect_zeroes_from_root_state(BlockBackend *blk);
int blk_get_open_flags_from_root_state(BlockBackend *blk);
void *blk_aio_get(const AIOCBInfo *aiocb_info, BlockBackend *blk,
diff --git a/include/sysemu/os-posix.h b/include/sysemu/os-posix.h
index 9c7dfdfbec..3cfedbc28b 100644
--- a/include/sysemu/os-posix.h
+++ b/include/sysemu/os-posix.h
@@ -60,4 +60,31 @@ int qemu_utimens(const char *path, const qemu_timespec *times);
bool is_daemonized(void);
+/**
+ * qemu_alloc_stack:
+ * @sz: pointer to a size_t holding the requested usable stack size
+ *
+ * Allocate memory that can be used as a stack, for instance for
+ * coroutines. If the memory cannot be allocated, this function
+ * will abort (like g_malloc()). This function also inserts an
+ * additional guard page to catch a potential stack overflow.
+ * Note that the memory required for the guard page and alignment
+ * and minimal stack size restrictions will increase the value of sz.
+ *
+ * The allocated stack must be freed with qemu_free_stack().
+ *
+ * Returns: pointer to (the lowest address of) the stack memory.
+ */
+void *qemu_alloc_stack(size_t *sz);
+
+/**
+ * qemu_free_stack:
+ * @stack: stack to free
+ * @sz: size of stack in bytes
+ *
+ * Free a stack allocated via qemu_alloc_stack(). Note that sz must
+ * be exactly the adjusted stack size returned by qemu_alloc_stack.
+ */
+void qemu_free_stack(void *stack, size_t sz);
+
#endif
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 92193ab0a1..9d797b8fe0 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -1721,15 +1721,16 @@
##
# @BlockdevOptionsFile
#
-# Driver specific block device options for the file backend and similar
-# protocols.
+# Driver specific block device options for the file backend.
#
# @filename: path to the image file
+# @aio: #optional AIO backend (default: threads) (since: 2.8)
#
# Since: 1.7
##
{ 'struct': 'BlockdevOptionsFile',
- 'data': { 'filename': 'str' } }
+ 'data': { 'filename': 'str',
+ '*aio': 'BlockdevAioOptions' } }
##
# @BlockdevOptionsNull
@@ -2211,6 +2212,18 @@
'*top-id': 'str' } }
##
+# @BlockdevOptionsCurl
+#
+# Driver specific block device options for the curl backend.
+#
+# @filename: path to the image file
+#
+# Since: 1.7
+##
+{ 'struct': 'BlockdevOptionsCurl',
+ 'data': { 'filename': 'str' } }
+
+##
# @BlockdevOptions
#
# Options for creating a block device. Many options are available for all
@@ -2221,7 +2234,6 @@
# This option is required on the top level of blockdev-add.
# @discard: #optional discard-related options (default: ignore)
# @cache: #optional cache-related options
-# @aio: #optional AIO backend (default: threads)
# @read-only: #optional whether the block device should be read-only
# (default: false)
# @detect-zeroes: #optional detect and optimize zero writes (Since 2.1)
@@ -2236,7 +2248,6 @@
'*node-name': 'str',
'*discard': 'BlockdevDiscardOptions',
'*cache': 'BlockdevCacheOptions',
- '*aio': 'BlockdevAioOptions',
'*read-only': 'bool',
'*detect-zeroes': 'BlockdevDetectZeroesOptions' },
'discriminator': 'driver',
@@ -2248,13 +2259,13 @@
'cloop': 'BlockdevOptionsGenericFormat',
'dmg': 'BlockdevOptionsGenericFormat',
'file': 'BlockdevOptionsFile',
- 'ftp': 'BlockdevOptionsFile',
- 'ftps': 'BlockdevOptionsFile',
+ 'ftp': 'BlockdevOptionsCurl',
+ 'ftps': 'BlockdevOptionsCurl',
'gluster': 'BlockdevOptionsGluster',
'host_cdrom': 'BlockdevOptionsFile',
'host_device':'BlockdevOptionsFile',
- 'http': 'BlockdevOptionsFile',
- 'https': 'BlockdevOptionsFile',
+ 'http': 'BlockdevOptionsCurl',
+ 'https': 'BlockdevOptionsCurl',
# TODO iscsi: Wait for structured options
'luks': 'BlockdevOptionsLUKS',
# TODO nbd: Should take InetSocketAddress for 'host'?
@@ -2271,7 +2282,7 @@
'replication':'BlockdevOptionsReplication',
# TODO sheepdog: Wait for structured options
# TODO ssh: Should take InetSocketAddress for 'host'?
- 'tftp': 'BlockdevOptionsFile',
+ 'tftp': 'BlockdevOptionsCurl',
'vdi': 'BlockdevOptionsGenericFormat',
'vhdx': 'BlockdevOptionsGenericFormat',
'vmdk': 'BlockdevOptionsGenericCOWFormat',
diff --git a/tests/qemu-iotests/087 b/tests/qemu-iotests/087
index 5c04577b36..b1ac71f2b8 100755
--- a/tests/qemu-iotests/087
+++ b/tests/qemu-iotests/087
@@ -117,10 +117,10 @@ run_qemu <<EOF
"options": {
"driver": "$IMGFMT",
"node-name": "disk",
- "aio": "native",
"file": {
"driver": "file",
- "filename": "$TEST_IMG"
+ "filename": "$TEST_IMG",
+ "aio": "native"
}
}
}
diff --git a/tests/qemu-iotests/087.out b/tests/qemu-iotests/087.out
index bef68626c8..cd02eaed4c 100644
--- a/tests/qemu-iotests/087.out
+++ b/tests/qemu-iotests/087.out
@@ -27,7 +27,7 @@ QMP_VERSION
Testing:
QMP_VERSION
{"return": {}}
-{"error": {"class": "GenericError", "desc": "aio=native requires cache.direct=true"}}
+{"error": {"class": "GenericError", "desc": "aio=native was specified, but it requires cache.direct=on, which was not specified."}}
{"return": {}}
{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN"}
diff --git a/util/coroutine-sigaltstack.c b/util/coroutine-sigaltstack.c
index a7c3366553..f6fc49a0e5 100644
--- a/util/coroutine-sigaltstack.c
+++ b/util/coroutine-sigaltstack.c
@@ -33,8 +33,9 @@
typedef struct {
Coroutine base;
void *stack;
+ size_t stack_size;
sigjmp_buf env;
-} CoroutineUContext;
+} CoroutineSigAltStack;
/**
* Per-thread coroutine bookkeeping
@@ -44,7 +45,7 @@ typedef struct {
Coroutine *current;
/** The default coroutine */
- CoroutineUContext leader;
+ CoroutineSigAltStack leader;
/** Information for the signal handler (trampoline) */
sigjmp_buf tr_reenter;
@@ -89,7 +90,7 @@ static void __attribute__((constructor)) coroutine_init(void)
* (from the signal handler when it is not signal handling, read ahead
* for more information).
*/
-static void coroutine_bootstrap(CoroutineUContext *self, Coroutine *co)
+static void coroutine_bootstrap(CoroutineSigAltStack *self, Coroutine *co)
{
/* Initialize longjmp environment and switch back the caller */
if (!sigsetjmp(self->env, 0)) {
@@ -109,7 +110,7 @@ static void coroutine_bootstrap(CoroutineUContext *self, Coroutine *co)
*/
static void coroutine_trampoline(int signal)
{
- CoroutineUContext *self;
+ CoroutineSigAltStack *self;
Coroutine *co;
CoroutineThreadState *coTS;
@@ -143,8 +144,7 @@ static void coroutine_trampoline(int signal)
Coroutine *qemu_coroutine_new(void)
{
- const size_t stack_size = 1 << 20;
- CoroutineUContext *co;
+ CoroutineSigAltStack *co;
CoroutineThreadState *coTS;
struct sigaction sa;
struct sigaction osa;
@@ -164,7 +164,8 @@ Coroutine *qemu_coroutine_new(void)
*/
co = g_malloc0(sizeof(*co));
- co->stack = g_malloc(stack_size);
+ co->stack_size = COROUTINE_STACK_SIZE;
+ co->stack = qemu_alloc_stack(&co->stack_size);
co->base.entry_arg = &old_env; /* stash away our jmp_buf */
coTS = coroutine_get_thread_state();
@@ -189,7 +190,7 @@ Coroutine *qemu_coroutine_new(void)
* Set the new stack.
*/
ss.ss_sp = co->stack;
- ss.ss_size = stack_size;
+ ss.ss_size = co->stack_size;
ss.ss_flags = 0;
if (sigaltstack(&ss, &oss) < 0) {
abort();
@@ -251,17 +252,17 @@ Coroutine *qemu_coroutine_new(void)
void qemu_coroutine_delete(Coroutine *co_)
{
- CoroutineUContext *co = DO_UPCAST(CoroutineUContext, base, co_);
+ CoroutineSigAltStack *co = DO_UPCAST(CoroutineSigAltStack, base, co_);
- g_free(co->stack);
+ qemu_free_stack(co->stack, co->stack_size);
g_free(co);
}
CoroutineAction qemu_coroutine_switch(Coroutine *from_, Coroutine *to_,
CoroutineAction action)
{
- CoroutineUContext *from = DO_UPCAST(CoroutineUContext, base, from_);
- CoroutineUContext *to = DO_UPCAST(CoroutineUContext, base, to_);
+ CoroutineSigAltStack *from = DO_UPCAST(CoroutineSigAltStack, base, from_);
+ CoroutineSigAltStack *to = DO_UPCAST(CoroutineSigAltStack, base, to_);
CoroutineThreadState *s = coroutine_get_thread_state();
int ret;
diff --git a/util/coroutine-ucontext.c b/util/coroutine-ucontext.c
index 2bb7e10d4b..6621f3f692 100644
--- a/util/coroutine-ucontext.c
+++ b/util/coroutine-ucontext.c
@@ -34,6 +34,7 @@
typedef struct {
Coroutine base;
void *stack;
+ size_t stack_size;
sigjmp_buf env;
#ifdef CONFIG_VALGRIND_H
@@ -82,7 +83,6 @@ static void coroutine_trampoline(int i0, int i1)
Coroutine *qemu_coroutine_new(void)
{
- const size_t stack_size = 1 << 20;
CoroutineUContext *co;
ucontext_t old_uc, uc;
sigjmp_buf old_env;
@@ -101,17 +101,18 @@ Coroutine *qemu_coroutine_new(void)
}
co = g_malloc0(sizeof(*co));
- co->stack = g_malloc(stack_size);
+ co->stack_size = COROUTINE_STACK_SIZE;
+ co->stack = qemu_alloc_stack(&co->stack_size);
co->base.entry_arg = &old_env; /* stash away our jmp_buf */
uc.uc_link = &old_uc;
uc.uc_stack.ss_sp = co->stack;
- uc.uc_stack.ss_size = stack_size;
+ uc.uc_stack.ss_size = co->stack_size;
uc.uc_stack.ss_flags = 0;
#ifdef CONFIG_VALGRIND_H
co->valgrind_stack_id =
- VALGRIND_STACK_REGISTER(co->stack, co->stack + stack_size);
+ VALGRIND_STACK_REGISTER(co->stack, co->stack + co->stack_size);
#endif
arg.p = co;
@@ -149,7 +150,7 @@ void qemu_coroutine_delete(Coroutine *co_)
valgrind_stack_deregister(co);
#endif
- g_free(co->stack);
+ qemu_free_stack(co->stack, co->stack_size);
g_free(co);
}
diff --git a/util/coroutine-win32.c b/util/coroutine-win32.c
index 02e28e825f..de6bd4fd3e 100644
--- a/util/coroutine-win32.c
+++ b/util/coroutine-win32.c
@@ -71,7 +71,7 @@ static void CALLBACK coroutine_trampoline(void *co_)
Coroutine *qemu_coroutine_new(void)
{
- const size_t stack_size = 1 << 20;
+ const size_t stack_size = COROUTINE_STACK_SIZE;
CoroutineWin32 *co;
co = g_malloc0(sizeof(*co));
diff --git a/util/oslib-posix.c b/util/oslib-posix.c
index f2d4e9e592..aaec1891f5 100644
--- a/util/oslib-posix.c
+++ b/util/oslib-posix.c
@@ -50,6 +50,10 @@
#include "qemu/mmap-alloc.h"
+#ifdef CONFIG_DEBUG_STACK_USAGE
+#include "qemu/error-report.h"
+#endif
+
int qemu_get_thread_id(void)
{
#if defined(__linux__)
@@ -499,3 +503,76 @@ pid_t qemu_fork(Error **errp)
}
return pid;
}
+
+void *qemu_alloc_stack(size_t *sz)
+{
+ void *ptr, *guardpage;
+#ifdef CONFIG_DEBUG_STACK_USAGE
+ void *ptr2;
+#endif
+ size_t pagesz = getpagesize();
+#ifdef _SC_THREAD_STACK_MIN
+ /* avoid stacks smaller than _SC_THREAD_STACK_MIN */
+ long min_stack_sz = sysconf(_SC_THREAD_STACK_MIN);
+ *sz = MAX(MAX(min_stack_sz, 0), *sz);
+#endif
+ /* adjust stack size to a multiple of the page size */
+ *sz = ROUND_UP(*sz, pagesz);
+ /* allocate one extra page for the guard page */
+ *sz += pagesz;
+
+ ptr = mmap(NULL, *sz, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (ptr == MAP_FAILED) {
+ abort();
+ }
+
+#if defined(HOST_IA64)
+ /* separate register stack */
+ guardpage = ptr + (((*sz - pagesz) / 2) & ~pagesz);
+#elif defined(HOST_HPPA)
+ /* stack grows up */
+ guardpage = ptr + *sz - pagesz;
+#else
+ /* stack grows down */
+ guardpage = ptr;
+#endif
+ if (mprotect(guardpage, pagesz, PROT_NONE) != 0) {
+ abort();
+ }
+
+#ifdef CONFIG_DEBUG_STACK_USAGE
+ for (ptr2 = ptr + pagesz; ptr2 < ptr + *sz; ptr2 += sizeof(uint32_t)) {
+ *(uint32_t *)ptr2 = 0xdeadbeaf;
+ }
+#endif
+
+ return ptr;
+}
+
+#ifdef CONFIG_DEBUG_STACK_USAGE
+static __thread unsigned int max_stack_usage;
+#endif
+
+void qemu_free_stack(void *stack, size_t sz)
+{
+#ifdef CONFIG_DEBUG_STACK_USAGE
+ unsigned int usage;
+ void *ptr;
+
+ for (ptr = stack + getpagesize(); ptr < stack + sz;
+ ptr += sizeof(uint32_t)) {
+ if (*(uint32_t *)ptr != 0xdeadbeaf) {
+ break;
+ }
+ }
+ usage = sz - (uintptr_t) (ptr - stack);
+ if (usage > max_stack_usage) {
+ error_report("thread %d max stack usage increased from %u to %u",
+ qemu_get_thread_id(), max_stack_usage, usage);
+ max_stack_usage = usage;
+ }
+#endif
+
+ munmap(stack, sz);
+}