summaryrefslogtreecommitdiff
path: root/block
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2016-05-19 16:54:12 +0100
committerPeter Maydell <peter.maydell@linaro.org>2016-05-19 16:54:12 +0100
commit6bd8ab6889f45a42d69a3a65a4d6e7fc2453c84c (patch)
tree8333fbb0dc3140307c828ed2ebc5b4e81facbbd7 /block
parent776efef32439a31cb13a6acfe8aab833687745ad (diff)
parent7753da2351e0b0ff6825d080aff58d73c994ff47 (diff)
downloadqemu-6bd8ab6889f45a42d69a3a65a4d6e7fc2453c84c.tar.gz
Merge remote-tracking branch 'remotes/kevin/tags/for-upstream' into staging
Block layer patches # gpg: Signature made Thu 19 May 2016 16:09:27 BST using RSA key ID C88F2FD6 # gpg: Good signature from "Kevin Wolf <kwolf@redhat.com>" * remotes/kevin/tags/for-upstream: (31 commits) qemu-iotests: Fix regression in 136 on aio_read invalid qemu-iotests: Simplify 109 with unaligned qemu-img compare qemu-io: Fix recent UI updates block: clarify error message for qmp-eject qemu-iotests: Some more write_zeroes tests qcow2: Fix write_zeroes with partially allocated backing file cluster qcow2: fix condition in is_zero_cluster block: Propagate AioContext change to all children block: Remove BlockDriverState.blk block: Don't return throttling info in query-named-block-nodes block: Avoid bs->blk in bdrv_next() block: Add bdrv_has_blk() block: Remove bdrv_aio_multiwrite() blockjob: Don't touch BDS iostatus blockjob: Don't set iostatus of target block: User BdrvChild callback for device name block: Use BdrvChild callbacks for change_media/resize block: Don't check throttled reqs in bdrv_requests_pending() Revert "block: Forbid I/O throttling on nodes with multiple parents for 2.6" block: Remove bdrv_move_feature_fields() ... Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Diffstat (limited to 'block')
-rw-r--r--block/backup.c34
-rw-r--r--block/blkverify.c19
-rw-r--r--block/block-backend.c251
-rw-r--r--block/commit.c7
-rw-r--r--block/io.c293
-rw-r--r--block/mirror.c38
-rw-r--r--block/qapi.c6
-rw-r--r--block/qcow2.c3
-rw-r--r--block/quorum.c24
-rw-r--r--block/snapshot.c30
-rw-r--r--block/stream.c10
-rw-r--r--block/throttle-groups.c244
-rw-r--r--block/vmdk.c23
13 files changed, 378 insertions, 604 deletions
diff --git a/block/backup.c b/block/backup.c
index 491fd14068..fec45e8212 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -218,15 +218,6 @@ static void backup_set_speed(BlockJob *job, int64_t speed, Error **errp)
ratelimit_set_speed(&s->limit, speed / BDRV_SECTOR_SIZE, SLICE_TIME);
}
-static void backup_iostatus_reset(BlockJob *job)
-{
- BackupBlockJob *s = container_of(job, BackupBlockJob, common);
-
- if (s->target->blk) {
- blk_iostatus_reset(s->target->blk);
- }
-}
-
static void backup_cleanup_sync_bitmap(BackupBlockJob *job, int ret)
{
BdrvDirtyBitmap *bm;
@@ -263,7 +254,6 @@ static const BlockJobDriver backup_job_driver = {
.instance_size = sizeof(BackupBlockJob),
.job_type = BLOCK_JOB_TYPE_BACKUP,
.set_speed = backup_set_speed,
- .iostatus_reset = backup_iostatus_reset,
.commit = backup_commit,
.abort = backup_abort,
};
@@ -272,11 +262,11 @@ static BlockErrorAction backup_error_action(BackupBlockJob *job,
bool read, int error)
{
if (read) {
- return block_job_error_action(&job->common, job->common.bs,
- job->on_source_error, true, error);
+ return block_job_error_action(&job->common, job->on_source_error,
+ true, error);
} else {
- return block_job_error_action(&job->common, job->target,
- job->on_target_error, false, error);
+ return block_job_error_action(&job->common, job->on_target_error,
+ false, error);
}
}
@@ -388,7 +378,6 @@ static void coroutine_fn backup_run(void *opaque)
BackupCompleteData *data;
BlockDriverState *bs = job->common.bs;
BlockDriverState *target = job->target;
- BlockdevOnError on_target_error = job->on_target_error;
NotifierWithReturn before_write = {
.notify = backup_before_write_notify,
};
@@ -404,11 +393,6 @@ static void coroutine_fn backup_run(void *opaque)
job->done_bitmap = bitmap_new(end);
- if (target->blk) {
- blk_set_on_error(target->blk, on_target_error, on_target_error);
- blk_iostatus_enable(target->blk);
- }
-
bdrv_add_before_write_notifier(bs, &before_write);
if (job->sync_mode == MIRROR_SYNC_MODE_NONE) {
@@ -484,9 +468,6 @@ static void coroutine_fn backup_run(void *opaque)
qemu_co_rwlock_unlock(&job->flush_rwlock);
g_free(job->done_bitmap);
- if (target->blk) {
- blk_iostatus_disable(target->blk);
- }
bdrv_op_unblock_all(target, job->common.blocker);
data = g_malloc(sizeof(*data));
@@ -515,13 +496,6 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target,
return;
}
- if ((on_source_error == BLOCKDEV_ON_ERROR_STOP ||
- on_source_error == BLOCKDEV_ON_ERROR_ENOSPC) &&
- (!bs->blk || !blk_iostatus_is_enabled(bs->blk))) {
- error_setg(errp, QERR_INVALID_PARAMETER, "on-source-error");
- return;
- }
-
if (!bdrv_is_inserted(bs)) {
error_setg(errp, "Device is not inserted: %s",
bdrv_get_device_name(bs));
diff --git a/block/blkverify.c b/block/blkverify.c
index 9414b7a84e..4045396a3d 100644
--- a/block/blkverify.c
+++ b/block/blkverify.c
@@ -293,22 +293,6 @@ static bool blkverify_recurse_is_first_non_filter(BlockDriverState *bs,
return bdrv_recurse_is_first_non_filter(s->test_file->bs, candidate);
}
-/* Propagate AioContext changes to ->test_file */
-static void blkverify_detach_aio_context(BlockDriverState *bs)
-{
- BDRVBlkverifyState *s = bs->opaque;
-
- bdrv_detach_aio_context(s->test_file->bs);
-}
-
-static void blkverify_attach_aio_context(BlockDriverState *bs,
- AioContext *new_context)
-{
- BDRVBlkverifyState *s = bs->opaque;
-
- bdrv_attach_aio_context(s->test_file->bs, new_context);
-}
-
static void blkverify_refresh_filename(BlockDriverState *bs, QDict *options)
{
BDRVBlkverifyState *s = bs->opaque;
@@ -356,9 +340,6 @@ static BlockDriver bdrv_blkverify = {
.bdrv_aio_writev = blkverify_aio_writev,
.bdrv_aio_flush = blkverify_aio_flush,
- .bdrv_attach_aio_context = blkverify_attach_aio_context,
- .bdrv_detach_aio_context = blkverify_detach_aio_context,
-
.is_filter = true,
.bdrv_recurse_is_first_non_filter = blkverify_recurse_is_first_non_filter,
};
diff --git a/block/block-backend.c b/block/block-backend.c
index a1e2c7fa20..6928d61de4 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -34,6 +34,7 @@ struct BlockBackend {
DriveInfo *legacy_dinfo; /* null unless created by drive_new() */
QTAILQ_ENTRY(BlockBackend) link; /* for block_backends */
QTAILQ_ENTRY(BlockBackend) monitor_link; /* for monitor_block_backends */
+ BlockBackendPublic public;
void *dev; /* attached device model, if any */
/* TODO change to DeviceState when all users are qdevified */
@@ -74,6 +75,7 @@ static const AIOCBInfo block_backend_aiocb_info = {
};
static void drive_info_del(DriveInfo *dinfo);
+static BlockBackend *bdrv_first_blk(BlockDriverState *bs);
/* All BlockBackends */
static QTAILQ_HEAD(, BlockBackend) block_backends =
@@ -90,9 +92,26 @@ static void blk_root_inherit_options(int *child_flags, QDict *child_options,
/* We're not supposed to call this function for root nodes */
abort();
}
+static void blk_root_drained_begin(BdrvChild *child);
+static void blk_root_drained_end(BdrvChild *child);
+
+static void blk_root_change_media(BdrvChild *child, bool load);
+static void blk_root_resize(BdrvChild *child);
+
+static const char *blk_root_get_name(BdrvChild *child)
+{
+ return blk_name(child->opaque);
+}
static const BdrvChildRole child_root = {
- .inherit_options = blk_root_inherit_options,
+ .inherit_options = blk_root_inherit_options,
+
+ .change_media = blk_root_change_media,
+ .resize = blk_root_resize,
+ .get_name = blk_root_get_name,
+
+ .drained_begin = blk_root_drained_begin,
+ .drained_end = blk_root_drained_end,
};
/*
@@ -106,8 +125,12 @@ BlockBackend *blk_new(Error **errp)
blk = g_new0(BlockBackend, 1);
blk->refcnt = 1;
+ qemu_co_queue_init(&blk->public.throttled_reqs[0]);
+ qemu_co_queue_init(&blk->public.throttled_reqs[1]);
+
notifier_list_init(&blk->remove_bs_notifiers);
notifier_list_init(&blk->insert_bs_notifiers);
+
QTAILQ_INSERT_TAIL(&block_backends, blk, link);
return blk;
}
@@ -128,7 +151,7 @@ BlockBackend *blk_new_with_bs(Error **errp)
bs = bdrv_new_root();
blk->root = bdrv_root_attach_child(bs, "root", &child_root);
- bs->blk = blk;
+ blk->root->opaque = blk;
return blk;
}
@@ -177,10 +200,6 @@ static void blk_delete(BlockBackend *blk)
}
assert(QLIST_EMPTY(&blk->remove_bs_notifiers.notifiers));
assert(QLIST_EMPTY(&blk->insert_bs_notifiers.notifiers));
- if (blk->root_state.throttle_state) {
- g_free(blk->root_state.throttle_group);
- throttle_group_unref(blk->root_state.throttle_state);
- }
QTAILQ_REMOVE(&block_backends, blk, link);
drive_info_del(blk->legacy_dinfo);
block_acct_cleanup(&blk->stats);
@@ -267,28 +286,50 @@ BlockBackend *blk_next(BlockBackend *blk)
: QTAILQ_FIRST(&monitor_block_backends);
}
-/*
- * Iterates over all BlockDriverStates which are attached to a BlockBackend.
- * This function is for use by bdrv_next().
- *
- * @bs must be NULL or a BDS that is attached to a BB.
- */
-BlockDriverState *blk_next_root_bs(BlockDriverState *bs)
-{
+struct BdrvNextIterator {
+ enum {
+ BDRV_NEXT_BACKEND_ROOTS,
+ BDRV_NEXT_MONITOR_OWNED,
+ } phase;
BlockBackend *blk;
+ BlockDriverState *bs;
+};
- if (bs) {
- assert(bs->blk);
- blk = bs->blk;
- } else {
- blk = NULL;
+/* Iterates over all top-level BlockDriverStates, i.e. BDSs that are owned by
+ * the monitor or attached to a BlockBackend */
+BdrvNextIterator *bdrv_next(BdrvNextIterator *it, BlockDriverState **bs)
+{
+ if (!it) {
+ it = g_new(BdrvNextIterator, 1);
+ *it = (BdrvNextIterator) {
+ .phase = BDRV_NEXT_BACKEND_ROOTS,
+ };
+ }
+
+ /* First, return all root nodes of BlockBackends. In order to avoid
+ * returning a BDS twice when multiple BBs refer to it, we only return it
+ * if the BB is the first one in the parent list of the BDS. */
+ if (it->phase == BDRV_NEXT_BACKEND_ROOTS) {
+ do {
+ it->blk = blk_all_next(it->blk);
+ *bs = it->blk ? blk_bs(it->blk) : NULL;
+ } while (it->blk && (*bs == NULL || bdrv_first_blk(*bs) != it->blk));
+
+ if (*bs) {
+ return it;
+ }
+ it->phase = BDRV_NEXT_MONITOR_OWNED;
}
+ /* Then return the monitor-owned BDSes without a BB attached. Ignore all
+ * BDSes that are attached to a BlockBackend here; they have been handled
+ * by the above block already */
do {
- blk = blk_all_next(blk);
- } while (blk && !blk->root);
+ it->bs = bdrv_next_monitor_owned(it->bs);
+ *bs = it->bs;
+ } while (*bs && bdrv_has_blk(*bs));
- return blk ? blk->root->bs : NULL;
+ return *bs ? it : NULL;
}
/*
@@ -375,6 +416,26 @@ BlockDriverState *blk_bs(BlockBackend *blk)
return blk->root ? blk->root->bs : NULL;
}
+static BlockBackend *bdrv_first_blk(BlockDriverState *bs)
+{
+ BdrvChild *child;
+ QLIST_FOREACH(child, &bs->parents, next_parent) {
+ if (child->role == &child_root) {
+ return child->opaque;
+ }
+ }
+
+ return NULL;
+}
+
+/*
+ * Returns true if @bs has an associated BlockBackend.
+ */
+bool bdrv_has_blk(BlockDriverState *bs)
+{
+ return bdrv_first_blk(bs) != NULL;
+}
+
/*
* Return @blk's DriveInfo if any, else null.
*/
@@ -411,17 +472,33 @@ BlockBackend *blk_by_legacy_dinfo(DriveInfo *dinfo)
}
/*
+ * Returns a pointer to the publicly accessible fields of @blk.
+ */
+BlockBackendPublic *blk_get_public(BlockBackend *blk)
+{
+ return &blk->public;
+}
+
+/*
+ * Returns a BlockBackend given the associated @public fields.
+ */
+BlockBackend *blk_by_public(BlockBackendPublic *public)
+{
+ return container_of(public, BlockBackend, public);
+}
+
+/*
* Disassociates the currently associated BlockDriverState from @blk.
*/
void blk_remove_bs(BlockBackend *blk)
{
- assert(blk->root->bs->blk == blk);
-
notifier_list_notify(&blk->remove_bs_notifiers, blk);
+ if (blk->public.throttle_state) {
+ throttle_timers_detach_aio_context(&blk->public.throttle_timers);
+ }
blk_update_root_state(blk);
- blk->root->bs->blk = NULL;
bdrv_root_unref_child(blk->root);
blk->root = NULL;
}
@@ -431,12 +508,15 @@ void blk_remove_bs(BlockBackend *blk)
*/
void blk_insert_bs(BlockBackend *blk, BlockDriverState *bs)
{
- assert(!blk->root && !bs->blk);
bdrv_ref(bs);
blk->root = bdrv_root_attach_child(bs, "root", &child_root);
- bs->blk = blk;
+ blk->root->opaque = blk;
notifier_list_notify(&blk->insert_bs_notifiers, blk);
+ if (blk->public.throttle_state) {
+ throttle_timers_attach_aio_context(
+ &blk->public.throttle_timers, bdrv_get_aio_context(bs));
+ }
}
/*
@@ -525,6 +605,11 @@ void blk_dev_change_media_cb(BlockBackend *blk, bool load)
}
}
+static void blk_root_change_media(BdrvChild *child, bool load)
+{
+ blk_dev_change_media_cb(child->opaque, load);
+}
+
/*
* Does @blk's attached device model have removable media?
* %true if no device model is attached.
@@ -579,8 +664,10 @@ bool blk_dev_is_medium_locked(BlockBackend *blk)
/*
* Notify @blk's attached device model of a backend size change.
*/
-void blk_dev_resize_cb(BlockBackend *blk)
+static void blk_root_resize(BdrvChild *child)
{
+ BlockBackend *blk = child->opaque;
+
if (blk->dev_ops && blk->dev_ops->resize_cb) {
blk->dev_ops->resize_cb(blk->dev_opaque);
}
@@ -692,6 +779,11 @@ static int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset,
return ret;
}
+ /* throttling disk I/O */
+ if (blk->public.throttle_state) {
+ throttle_group_co_io_limits_intercept(blk, bytes, false);
+ }
+
return bdrv_co_preadv(blk_bs(blk), offset, bytes, qiov, flags);
}
@@ -706,6 +798,11 @@ static int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset,
return ret;
}
+ /* throttling disk I/O */
+ if (blk->public.throttle_state) {
+ throttle_group_co_io_limits_intercept(blk, bytes, true);
+ }
+
if (!blk->enable_write_cache) {
flags |= BDRV_REQ_FUA;
}
@@ -775,7 +872,6 @@ static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf,
int blk_pread_unthrottled(BlockBackend *blk, int64_t offset, uint8_t *buf,
int count)
{
- BlockDriverState *bs = blk_bs(blk);
int ret;
ret = blk_check_byte_request(blk, offset, count);
@@ -783,9 +879,9 @@ int blk_pread_unthrottled(BlockBackend *blk, int64_t offset, uint8_t *buf,
return ret;
}
- bdrv_no_throttling_begin(bs);
+ blk_root_drained_begin(blk->root);
ret = blk_pread(blk, offset, buf, count);
- bdrv_no_throttling_end(bs);
+ blk_root_drained_end(blk->root);
return ret;
}
@@ -1008,20 +1104,6 @@ void blk_aio_cancel_async(BlockAIOCB *acb)
bdrv_aio_cancel_async(acb);
}
-int blk_aio_multiwrite(BlockBackend *blk, BlockRequest *reqs, int num_reqs)
-{
- int i, ret;
-
- for (i = 0; i < num_reqs; i++) {
- ret = blk_check_request(blk, reqs[i].sector, reqs[i].nb_sectors);
- if (ret < 0) {
- return ret;
- }
- }
-
- return bdrv_aio_multiwrite(blk_bs(blk), reqs, num_reqs);
-}
-
int blk_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
{
if (!blk_is_available(blk)) {
@@ -1334,7 +1416,14 @@ void blk_set_aio_context(BlockBackend *blk, AioContext *new_context)
BlockDriverState *bs = blk_bs(blk);
if (bs) {
+ if (blk->public.throttle_state) {
+ throttle_timers_detach_aio_context(&blk->public.throttle_timers);
+ }
bdrv_set_aio_context(bs, new_context);
+ if (blk->public.throttle_state) {
+ throttle_timers_attach_aio_context(&blk->public.throttle_timers,
+ new_context);
+ }
}
}
@@ -1499,19 +1588,6 @@ void blk_update_root_state(BlockBackend *blk)
blk->root_state.open_flags = blk->root->bs->open_flags;
blk->root_state.read_only = blk->root->bs->read_only;
blk->root_state.detect_zeroes = blk->root->bs->detect_zeroes;
-
- if (blk->root_state.throttle_group) {
- g_free(blk->root_state.throttle_group);
- throttle_group_unref(blk->root_state.throttle_state);
- }
- if (blk->root->bs->throttle_state) {
- const char *name = throttle_group_get_name(blk->root->bs);
- blk->root_state.throttle_group = g_strdup(name);
- blk->root_state.throttle_state = throttle_group_incref(name);
- } else {
- blk->root_state.throttle_group = NULL;
- blk->root_state.throttle_state = NULL;
- }
}
/*
@@ -1522,9 +1598,6 @@ void blk_update_root_state(BlockBackend *blk)
void blk_apply_root_state(BlockBackend *blk, BlockDriverState *bs)
{
bs->detect_zeroes = blk->root_state.detect_zeroes;
- if (blk->root_state.throttle_group) {
- bdrv_io_limits_enable(bs, blk->root_state.throttle_group);
- }
}
/*
@@ -1587,3 +1660,59 @@ int blk_flush_all(void)
return result;
}
+
+
+/* throttling disk I/O limits */
+void blk_set_io_limits(BlockBackend *blk, ThrottleConfig *cfg)
+{
+ throttle_group_config(blk, cfg);
+}
+
+void blk_io_limits_disable(BlockBackend *blk)
+{
+ assert(blk->public.throttle_state);
+ bdrv_drained_begin(blk_bs(blk));
+ throttle_group_unregister_blk(blk);
+ bdrv_drained_end(blk_bs(blk));
+}
+
+/* should be called before blk_set_io_limits if a limit is set */
+void blk_io_limits_enable(BlockBackend *blk, const char *group)
+{
+ assert(!blk->public.throttle_state);
+ throttle_group_register_blk(blk, group);
+}
+
+void blk_io_limits_update_group(BlockBackend *blk, const char *group)
+{
+ /* this BB is not part of any group */
+ if (!blk->public.throttle_state) {
+ return;
+ }
+
+ /* this BB is a part of the same group than the one we want */
+ if (!g_strcmp0(throttle_group_get_name(blk), group)) {
+ return;
+ }
+
+ /* need to change the group this bs belong to */
+ blk_io_limits_disable(blk);
+ blk_io_limits_enable(blk, group);
+}
+
+static void blk_root_drained_begin(BdrvChild *child)
+{
+ BlockBackend *blk = child->opaque;
+
+ if (blk->public.io_limits_disabled++ == 0) {
+ throttle_group_restart_blk(blk);
+ }
+}
+
+static void blk_root_drained_end(BdrvChild *child)
+{
+ BlockBackend *blk = child->opaque;
+
+ assert(blk->public.io_limits_disabled);
+ --blk->public.io_limits_disabled;
+}
diff --git a/block/commit.c b/block/commit.c
index cba0e8c1e8..f308c8c6f0 100644
--- a/block/commit.c
+++ b/block/commit.c
@@ -214,13 +214,6 @@ void commit_start(BlockDriverState *bs, BlockDriverState *base,
BlockDriverState *overlay_bs;
Error *local_err = NULL;
- if ((on_error == BLOCKDEV_ON_ERROR_STOP ||
- on_error == BLOCKDEV_ON_ERROR_ENOSPC) &&
- (!bs->blk || !blk_iostatus_is_enabled(bs->blk))) {
- error_setg(errp, "Invalid parameter combination");
- return;
- }
-
assert(top != bs);
if (top == base) {
error_setg(errp, "Invalid files for merge: top and base are the same");
diff --git a/block/io.c b/block/io.c
index cd6d71a503..60a6bd8bdb 100644
--- a/block/io.c
+++ b/block/io.c
@@ -27,7 +27,6 @@
#include "sysemu/block-backend.h"
#include "block/blockjob.h"
#include "block/block_int.h"
-#include "block/throttle-groups.h"
#include "qemu/cutils.h"
#include "qapi/error.h"
#include "qemu/error-report.h"
@@ -46,56 +45,26 @@ static void coroutine_fn bdrv_co_do_rw(void *opaque);
static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
int64_t sector_num, int nb_sectors, BdrvRequestFlags flags);
-/* throttling disk I/O limits */
-void bdrv_set_io_limits(BlockDriverState *bs,
- ThrottleConfig *cfg)
+static void bdrv_parent_drained_begin(BlockDriverState *bs)
{
- throttle_group_config(bs, cfg);
-}
+ BdrvChild *c;
-void bdrv_no_throttling_begin(BlockDriverState *bs)
-{
- if (bs->io_limits_disabled++ == 0) {
- throttle_group_restart_bs(bs);
+ QLIST_FOREACH(c, &bs->parents, next_parent) {
+ if (c->role->drained_begin) {
+ c->role->drained_begin(c);
+ }
}
}
-void bdrv_no_throttling_end(BlockDriverState *bs)
+static void bdrv_parent_drained_end(BlockDriverState *bs)
{
- assert(bs->io_limits_disabled);
- --bs->io_limits_disabled;
-}
-
-void bdrv_io_limits_disable(BlockDriverState *bs)
-{
- assert(bs->throttle_state);
- bdrv_no_throttling_begin(bs);
- throttle_group_unregister_bs(bs);
- bdrv_no_throttling_end(bs);
-}
-
-/* should be called before bdrv_set_io_limits if a limit is set */
-void bdrv_io_limits_enable(BlockDriverState *bs, const char *group)
-{
- assert(!bs->throttle_state);
- throttle_group_register_bs(bs, group);
-}
-
-void bdrv_io_limits_update_group(BlockDriverState *bs, const char *group)
-{
- /* this bs is not part of any group */
- if (!bs->throttle_state) {
- return;
- }
+ BdrvChild *c;
- /* this bs is a part of the same group than the one we want */
- if (!g_strcmp0(throttle_group_get_name(bs), group)) {
- return;
+ QLIST_FOREACH(c, &bs->parents, next_parent) {
+ if (c->role->drained_end) {
+ c->role->drained_end(c);
+ }
}
-
- /* need to change the group this bs belong to */
- bdrv_io_limits_disable(bs);
- bdrv_io_limits_enable(bs, group);
}
void bdrv_refresh_limits(BlockDriverState *bs, Error **errp)
@@ -182,12 +151,6 @@ bool bdrv_requests_pending(BlockDriverState *bs)
if (!QLIST_EMPTY(&bs->tracked_requests)) {
return true;
}
- if (!qemu_co_queue_empty(&bs->throttled_reqs[0])) {
- return true;
- }
- if (!qemu_co_queue_empty(&bs->throttled_reqs[1])) {
- return true;
- }
QLIST_FOREACH(child, &bs->children, next) {
if (bdrv_requests_pending(child->bs)) {
@@ -275,17 +238,17 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs)
*/
void coroutine_fn bdrv_co_drain(BlockDriverState *bs)
{
- bdrv_no_throttling_begin(bs);
+ bdrv_parent_drained_begin(bs);
bdrv_io_unplugged_begin(bs);
bdrv_drain_recurse(bs);
bdrv_co_yield_to_drain(bs);
bdrv_io_unplugged_end(bs);
- bdrv_no_throttling_end(bs);
+ bdrv_parent_drained_end(bs);
}
void bdrv_drain(BlockDriverState *bs)
{
- bdrv_no_throttling_begin(bs);
+ bdrv_parent_drained_begin(bs);
bdrv_io_unplugged_begin(bs);
bdrv_drain_recurse(bs);
if (qemu_in_coroutine()) {
@@ -294,7 +257,7 @@ void bdrv_drain(BlockDriverState *bs)
bdrv_drain_poll(bs);
}
bdrv_io_unplugged_end(bs);
- bdrv_no_throttling_end(bs);
+ bdrv_parent_drained_end(bs);
}
/*
@@ -307,17 +270,18 @@ void bdrv_drain_all(void)
{
/* Always run first iteration so any pending completion BHs run */
bool busy = true;
- BlockDriverState *bs = NULL;
+ BlockDriverState *bs;
+ BdrvNextIterator *it = NULL;
GSList *aio_ctxs = NULL, *ctx;
- while ((bs = bdrv_next(bs))) {
+ while ((it = bdrv_next(it, &bs))) {
AioContext *aio_context = bdrv_get_aio_context(bs);
aio_context_acquire(aio_context);
if (bs->job) {
block_job_pause(bs->job);
}
- bdrv_no_throttling_begin(bs);
+ bdrv_parent_drained_begin(bs);
bdrv_io_unplugged_begin(bs);
bdrv_drain_recurse(bs);
aio_context_release(aio_context);
@@ -338,10 +302,10 @@ void bdrv_drain_all(void)
for (ctx = aio_ctxs; ctx != NULL; ctx = ctx->next) {
AioContext *aio_context = ctx->data;
- bs = NULL;
+ it = NULL;
aio_context_acquire(aio_context);
- while ((bs = bdrv_next(bs))) {
+ while ((it = bdrv_next(it, &bs))) {
if (aio_context == bdrv_get_aio_context(bs)) {
if (bdrv_requests_pending(bs)) {
busy = true;
@@ -354,13 +318,13 @@ void bdrv_drain_all(void)
}
}
- bs = NULL;
- while ((bs = bdrv_next(bs))) {
+ it = NULL;
+ while ((it = bdrv_next(it, &bs))) {
AioContext *aio_context = bdrv_get_aio_context(bs);
aio_context_acquire(aio_context);
bdrv_io_unplugged_end(bs);
- bdrv_no_throttling_end(bs);
+ bdrv_parent_drained_end(bs);
if (bs->job) {
block_job_resume(bs->job);
}
@@ -1069,11 +1033,6 @@ int coroutine_fn bdrv_co_preadv(BlockDriverState *bs,
flags |= BDRV_REQ_COPY_ON_READ;
}
- /* throttling disk I/O */
- if (bs->throttle_state) {
- throttle_group_co_io_limits_intercept(bs, bytes, false);
- }
-
/* Align read if necessary by padding qiov */
if (offset & (align - 1)) {
head_buf = qemu_blockalign(bs, align);
@@ -1430,11 +1389,6 @@ int coroutine_fn bdrv_co_pwritev(BlockDriverState *bs,
return ret;
}
- /* throttling disk I/O */
- if (bs->throttle_state) {
- throttle_group_co_io_limits_intercept(bs, bytes, true);
- }
-
/*
* Align write if necessary by performing a read-modify-write cycle.
* Pad qiov with the read parts and be sure to have a tracked request not
@@ -1925,200 +1879,6 @@ BlockAIOCB *bdrv_aio_write_zeroes(BlockDriverState *bs,
cb, opaque, true);
}
-
-typedef struct MultiwriteCB {
- int error;
- int num_requests;
- int num_callbacks;
- struct {
- BlockCompletionFunc *cb;
- void *opaque;
- QEMUIOVector *free_qiov;
- } callbacks[];
-} MultiwriteCB;
-
-static void multiwrite_user_cb(MultiwriteCB *mcb)
-{
- int i;
-
- for (i = 0; i < mcb->num_callbacks; i++) {
- mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
- if (mcb->callbacks[i].free_qiov) {
- qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
- }
- g_free(mcb->callbacks[i].free_qiov);
- }
-}
-
-static void multiwrite_cb(void *opaque, int ret)
-{
- MultiwriteCB *mcb = opaque;
-
- trace_multiwrite_cb(mcb, ret);
-
- if (ret < 0 && !mcb->error) {
- mcb->error = ret;
- }
-
- mcb->num_requests--;
- if (mcb->num_requests == 0) {
- multiwrite_user_cb(mcb);
- g_free(mcb);
- }
-}
-
-static int multiwrite_req_compare(const void *a, const void *b)
-{
- const BlockRequest *req1 = a, *req2 = b;
-
- /*
- * Note that we can't simply subtract req2->sector from req1->sector
- * here as that could overflow the return value.
- */
- if (req1->sector > req2->sector) {
- return 1;
- } else if (req1->sector < req2->sector) {
- return -1;
- } else {
- return 0;
- }
-}
-
-/*
- * Takes a bunch of requests and tries to merge them. Returns the number of
- * requests that remain after merging.
- */
-static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
- int num_reqs, MultiwriteCB *mcb)
-{
- int i, outidx;
-
- // Sort requests by start sector
- qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
-
- // Check if adjacent requests touch the same clusters. If so, combine them,
- // filling up gaps with zero sectors.
- outidx = 0;
- for (i = 1; i < num_reqs; i++) {
- int merge = 0;
- int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
-
- // Handle exactly sequential writes and overlapping writes.
- if (reqs[i].sector <= oldreq_last) {
- merge = 1;
- }
-
- if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 >
- bs->bl.max_iov) {
- merge = 0;
- }
-
- if (bs->bl.max_transfer_length && reqs[outidx].nb_sectors +
- reqs[i].nb_sectors > bs->bl.max_transfer_length) {
- merge = 0;
- }
-
- if (merge) {
- size_t size;
- QEMUIOVector *qiov = g_malloc0(sizeof(*qiov));
- qemu_iovec_init(qiov,
- reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
-
- // Add the first request to the merged one. If the requests are
- // overlapping, drop the last sectors of the first request.
- size = (reqs[i].sector - reqs[outidx].sector) << 9;
- qemu_iovec_concat(qiov, reqs[outidx].qiov, 0, size);
-
- // We should need to add any zeros between the two requests
- assert (reqs[i].sector <= oldreq_last);
-
- // Add the second request
- qemu_iovec_concat(qiov, reqs[i].qiov, 0, reqs[i].qiov->size);
-
- // Add tail of first request, if necessary
- if (qiov->size < reqs[outidx].qiov->size) {
- qemu_iovec_concat(qiov, reqs[outidx].qiov, qiov->size,
- reqs[outidx].qiov->size - qiov->size);
- }
-
- reqs[outidx].nb_sectors = qiov->size >> 9;
- reqs[outidx].qiov = qiov;
-
- mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
- } else {
- outidx++;
- reqs[outidx].sector = reqs[i].sector;
- reqs[outidx].nb_sectors = reqs[i].nb_sectors;
- reqs[outidx].qiov = reqs[i].qiov;
- }
- }
-
- if (bs->blk) {
- block_acct_merge_done(blk_get_stats(bs->blk), BLOCK_ACCT_WRITE,
- num_reqs - outidx - 1);
- }
-
- return outidx + 1;
-}
-
-/*
- * Submit multiple AIO write requests at once.
- *
- * On success, the function returns 0 and all requests in the reqs array have
- * been submitted. In error case this function returns -1, and any of the
- * requests may or may not be submitted yet. In particular, this means that the
- * callback will be called for some of the requests, for others it won't. The
- * caller must check the error field of the BlockRequest to wait for the right
- * callbacks (if error != 0, no callback will be called).
- *
- * The implementation may modify the contents of the reqs array, e.g. to merge
- * requests. However, the fields opaque and error are left unmodified as they
- * are used to signal failure for a single request to the caller.
- */
-int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
-{
- MultiwriteCB *mcb;
- int i;
-
- /* don't submit writes if we don't have a medium */
- if (bs->drv == NULL) {
- for (i = 0; i < num_reqs; i++) {
- reqs[i].error = -ENOMEDIUM;
- }
- return -1;
- }
-
- if (num_reqs == 0) {
- return 0;
- }
-
- // Create MultiwriteCB structure
- mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
- mcb->num_requests = 0;
- mcb->num_callbacks = num_reqs;
-
- for (i = 0; i < num_reqs; i++) {
- mcb->callbacks[i].cb = reqs[i].cb;
- mcb->callbacks[i].opaque = reqs[i].opaque;
- }
-
- // Check for mergable requests
- num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
-
- trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
-
- /* Run the aio requests. */
- mcb->num_requests = num_reqs;
- for (i = 0; i < num_reqs; i++) {
- bdrv_co_aio_rw_vector(bs, reqs[i].sector, reqs[i].qiov,
- reqs[i].nb_sectors, reqs[i].flags,
- multiwrite_cb, mcb,
- true);
- }
-
- return 0;
-}
-
void bdrv_aio_cancel(BlockAIOCB *acb)
{
qemu_aio_ref(acb);
@@ -2789,11 +2549,14 @@ void bdrv_drained_begin(BlockDriverState *bs)
if (!bs->quiesce_counter++) {
aio_disable_external(bdrv_get_aio_context(bs));
}
+ bdrv_parent_drained_begin(bs);
bdrv_drain(bs);
}
void bdrv_drained_end(BlockDriverState *bs)
{
+ bdrv_parent_drained_end(bs);
+
assert(bs->quiesce_counter > 0);
if (--bs->quiesce_counter > 0) {
return;
diff --git a/block/mirror.c b/block/mirror.c
index 039f48125e..b9986d8218 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -80,11 +80,11 @@ static BlockErrorAction mirror_error_action(MirrorBlockJob *s, bool read,
{
s->synced = false;
if (read) {
- return block_job_error_action(&s->common, s->common.bs,
- s->on_source_error, true, error);
+ return block_job_error_action(&s->common, s->on_source_error,
+ true, error);
} else {
- return block_job_error_action(&s->common, s->target,
- s->on_target_error, false, error);
+ return block_job_error_action(&s->common, s->on_target_error,
+ false, error);
}
}
@@ -468,7 +468,7 @@ static void mirror_exit(BlockJob *job, void *opaque)
/* This was checked in mirror_start_job(), but meanwhile one of the
* nodes could have been newly attached to a BlockBackend. */
- if (to_replace->blk && s->target->blk) {
+ if (bdrv_has_blk(to_replace) && bdrv_has_blk(s->target)) {
error_report("block job: Can't create node with two BlockBackends");
data->ret = -EINVAL;
goto out;
@@ -710,9 +710,6 @@ immediate_exit:
g_free(s->cow_bitmap);
g_free(s->in_flight_bitmap);
bdrv_release_dirty_bitmap(bs, s->dirty_bitmap);
- if (s->target->blk) {
- blk_iostatus_disable(s->target->blk);
- }
data = g_malloc(sizeof(*data));
data->ret = ret;
@@ -739,15 +736,6 @@ static void mirror_set_speed(BlockJob *job, int64_t speed, Error **errp)
ratelimit_set_speed(&s->limit, speed / BDRV_SECTOR_SIZE, SLICE_TIME);
}
-static void mirror_iostatus_reset(BlockJob *job)
-{
- MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
-
- if (s->target->blk) {
- blk_iostatus_reset(s->target->blk);
- }
-}
-
static void mirror_complete(BlockJob *job, Error **errp)
{
MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
@@ -793,7 +781,6 @@ static const BlockJobDriver mirror_job_driver = {
.instance_size = sizeof(MirrorBlockJob),
.job_type = BLOCK_JOB_TYPE_MIRROR,
.set_speed = mirror_set_speed,
- .iostatus_reset= mirror_iostatus_reset,
.complete = mirror_complete,
};
@@ -801,8 +788,6 @@ static const BlockJobDriver commit_active_job_driver = {
.instance_size = sizeof(MirrorBlockJob),
.job_type = BLOCK_JOB_TYPE_COMMIT,
.set_speed = mirror_set_speed,
- .iostatus_reset
- = mirror_iostatus_reset,
.complete = mirror_complete,
};
@@ -827,13 +812,6 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target,
assert ((granularity & (granularity - 1)) == 0);
- if ((on_source_error == BLOCKDEV_ON_ERROR_STOP ||
- on_source_error == BLOCKDEV_ON_ERROR_ENOSPC) &&
- (!bs->blk || !blk_iostatus_is_enabled(bs->blk))) {
- error_setg(errp, QERR_INVALID_PARAMETER, "on-source-error");
- return;
- }
-
if (buf_size < 0) {
error_setg(errp, "Invalid parameter 'buf-size'");
return;
@@ -853,7 +831,7 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target,
} else {
replaced_bs = bs;
}
- if (replaced_bs->blk && target->blk) {
+ if (bdrv_has_blk(replaced_bs) && bdrv_has_blk(target)) {
error_setg(errp, "Can't create node with two BlockBackends");
return;
}
@@ -882,10 +860,6 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target,
bdrv_op_block_all(s->target, s->common.blocker);
- if (s->target->blk) {
- blk_set_on_error(s->target->blk, on_target_error, on_target_error);
- blk_iostatus_enable(s->target->blk);
- }
s->common.co = qemu_coroutine_create(mirror_run);
trace_mirror_start(bs, s, s->common.co, opaque);
qemu_coroutine_enter(s->common.co, s);
diff --git a/block/qapi.c b/block/qapi.c
index c5f6ba643c..5594f74d17 100644
--- a/block/qapi.c
+++ b/block/qapi.c
@@ -67,10 +67,10 @@ BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk,
info->backing_file_depth = bdrv_get_backing_file_depth(bs);
info->detect_zeroes = bs->detect_zeroes;
- if (bs->throttle_state) {
+ if (blk && blk_get_public(blk)->throttle_state) {
ThrottleConfig cfg;
- throttle_group_get_config(bs, &cfg);
+ throttle_group_get_config(blk, &cfg);
info->bps = cfg.buckets[THROTTLE_BPS_TOTAL].avg;
info->bps_rd = cfg.buckets[THROTTLE_BPS_READ].avg;
@@ -118,7 +118,7 @@ BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk,
info->iops_size = cfg.op_size;
info->has_group = true;
- info->group = g_strdup(throttle_group_get_name(bs));
+ info->group = g_strdup(throttle_group_get_name(blk));
}
info->write_threshold = bdrv_write_threshold_get(bs);
diff --git a/block/qcow2.c b/block/qcow2.c
index 49d7cff17a..c9306a78c1 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -2413,7 +2413,7 @@ static bool is_zero_cluster(BlockDriverState *bs, int64_t start)
BlockDriverState *file;
int64_t res = bdrv_get_block_status_above(bs, NULL, start,
s->cluster_sectors, &nr, &file);
- return res >= 0 && ((res & BDRV_BLOCK_ZERO) || !(res & BDRV_BLOCK_DATA));
+ return res >= 0 && (res & BDRV_BLOCK_ZERO) && nr == s->cluster_sectors;
}
static bool is_zero_cluster_top_locked(BlockDriverState *bs, int64_t start)
@@ -2424,6 +2424,7 @@ static bool is_zero_cluster_top_locked(BlockDriverState *bs, int64_t start)
int ret;
ret = qcow2_get_cluster_offset(bs, start << BDRV_SECTOR_BITS, &nr, &off);
+ assert(nr == s->cluster_sectors);
return ret == QCOW2_CLUSTER_UNALLOCATED || ret == QCOW2_CLUSTER_ZERO;
}
diff --git a/block/quorum.c b/block/quorum.c
index 1ec3511528..ec6f3b9059 100644
--- a/block/quorum.c
+++ b/block/quorum.c
@@ -989,27 +989,6 @@ static void quorum_close(BlockDriverState *bs)
g_free(s->children);
}
-static void quorum_detach_aio_context(BlockDriverState *bs)
-{
- BDRVQuorumState *s = bs->opaque;
- int i;
-
- for (i = 0; i < s->num_children; i++) {
- bdrv_detach_aio_context(s->children[i]->bs);
- }
-}
-
-static void quorum_attach_aio_context(BlockDriverState *bs,
- AioContext *new_context)
-{
- BDRVQuorumState *s = bs->opaque;
- int i;
-
- for (i = 0; i < s->num_children; i++) {
- bdrv_attach_aio_context(s->children[i]->bs, new_context);
- }
-}
-
static void quorum_add_child(BlockDriverState *bs, BlockDriverState *child_bs,
Error **errp)
{
@@ -1127,9 +1106,6 @@ static BlockDriver bdrv_quorum = {
.bdrv_aio_readv = quorum_aio_readv,
.bdrv_aio_writev = quorum_aio_writev,
- .bdrv_detach_aio_context = quorum_detach_aio_context,
- .bdrv_attach_aio_context = quorum_attach_aio_context,
-
.bdrv_add_child = quorum_add_child,
.bdrv_del_child = quorum_del_child,
diff --git a/block/snapshot.c b/block/snapshot.c
index e9d721df68..3917ec5c91 100644
--- a/block/snapshot.c
+++ b/block/snapshot.c
@@ -373,9 +373,10 @@ int bdrv_snapshot_load_tmp_by_id_or_name(BlockDriverState *bs,
bool bdrv_all_can_snapshot(BlockDriverState **first_bad_bs)
{
bool ok = true;
- BlockDriverState *bs = NULL;
+ BlockDriverState *bs;
+ BdrvNextIterator *it = NULL;
- while (ok && (bs = bdrv_next(bs))) {
+ while (ok && (it = bdrv_next(it, &bs))) {
AioContext *ctx = bdrv_get_aio_context(bs);
aio_context_acquire(ctx);
@@ -393,10 +394,11 @@ int bdrv_all_delete_snapshot(const char *name, BlockDriverState **first_bad_bs,
Error **err)
{
int ret = 0;
- BlockDriverState *bs = NULL;
+ BlockDriverState *bs;
+ BdrvNextIterator *it = NULL;
QEMUSnapshotInfo sn1, *snapshot = &sn1;
- while (ret == 0 && (bs = bdrv_next(bs))) {
+ while (ret == 0 && (it = bdrv_next(it, &bs))) {
AioContext *ctx = bdrv_get_aio_context(bs);
aio_context_acquire(ctx);
@@ -415,9 +417,10 @@ int bdrv_all_delete_snapshot(const char *name, BlockDriverState **first_bad_bs,
int bdrv_all_goto_snapshot(const char *name, BlockDriverState **first_bad_bs)
{
int err = 0;
- BlockDriverState *bs = NULL;
+ BlockDriverState *bs;
+ BdrvNextIterator *it = NULL;
- while (err == 0 && (bs = bdrv_next(bs))) {
+ while (err == 0 && (it = bdrv_next(it, &bs))) {
AioContext *ctx = bdrv_get_aio_context(bs);
aio_context_acquire(ctx);
@@ -435,9 +438,10 @@ int bdrv_all_find_snapshot(const char *name, BlockDriverState **first_bad_bs)
{
QEMUSnapshotInfo sn;
int err = 0;
- BlockDriverState *bs = NULL;
+ BlockDriverState *bs;
+ BdrvNextIterator *it = NULL;
- while (err == 0 && (bs = bdrv_next(bs))) {
+ while (err == 0 && (it = bdrv_next(it, &bs))) {
AioContext *ctx = bdrv_get_aio_context(bs);
aio_context_acquire(ctx);
@@ -457,9 +461,10 @@ int bdrv_all_create_snapshot(QEMUSnapshotInfo *sn,
BlockDriverState **first_bad_bs)
{
int err = 0;
- BlockDriverState *bs = NULL;
+ BlockDriverState *bs;
+ BdrvNextIterator *it = NULL;
- while (err == 0 && (bs = bdrv_next(bs))) {
+ while (err == 0 && (it = bdrv_next(it, &bs))) {
AioContext *ctx = bdrv_get_aio_context(bs);
aio_context_acquire(ctx);
@@ -480,9 +485,10 @@ int bdrv_all_create_snapshot(QEMUSnapshotInfo *sn,
BlockDriverState *bdrv_all_find_vmstate_bs(void)
{
bool not_found = true;
- BlockDriverState *bs = NULL;
+ BlockDriverState *bs;
+ BdrvNextIterator *it = NULL;
- while (not_found && (bs = bdrv_next(bs))) {
+ while (not_found && (it = bdrv_next(it, &bs))) {
AioContext *ctx = bdrv_get_aio_context(bs);
aio_context_acquire(ctx);
diff --git a/block/stream.c b/block/stream.c
index 332b9a183e..40aa32212e 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -163,8 +163,7 @@ wait:
}
if (ret < 0) {
BlockErrorAction action =
- block_job_error_action(&s->common, s->common.bs, s->on_error,
- true, -ret);
+ block_job_error_action(&s->common, s->on_error, true, -ret);
if (action == BLOCK_ERROR_ACTION_STOP) {
n = 0;
continue;
@@ -224,13 +223,6 @@ void stream_start(BlockDriverState *bs, BlockDriverState *base,
{
StreamBlockJob *s;
- if ((on_error == BLOCKDEV_ON_ERROR_STOP ||
- on_error == BLOCKDEV_ON_ERROR_ENOSPC) &&
- (!bs->blk || !blk_iostatus_is_enabled(bs->blk))) {
- error_setg(errp, QERR_INVALID_PARAMETER, "on-error");
- return;
- }
-
s = block_job_create(&stream_job_driver, bs, speed, cb, opaque, errp);
if (!s) {
return;
diff --git a/block/throttle-groups.c b/block/throttle-groups.c
index 9ac063a0cd..59545e287e 100644
--- a/block/throttle-groups.c
+++ b/block/throttle-groups.c
@@ -23,13 +23,14 @@
*/
#include "qemu/osdep.h"
+#include "sysemu/block-backend.h"
#include "block/throttle-groups.h"
#include "qemu/queue.h"
#include "qemu/thread.h"
#include "sysemu/qtest.h"
/* The ThrottleGroup structure (with its ThrottleState) is shared
- * among different BlockDriverState and it's independent from
+ * among different BlockBackends and it's independent from
* AioContext, so in order to use it from different threads it needs
* its own locking.
*
@@ -39,26 +40,26 @@
* The whole ThrottleGroup structure is private and invisible to
* outside users, that only use it through its ThrottleState.
*
- * In addition to the ThrottleGroup structure, BlockDriverState has
+ * In addition to the ThrottleGroup structure, BlockBackendPublic has
* fields that need to be accessed by other members of the group and
- * therefore also need to be protected by this lock. Once a BDS is
- * registered in a group those fields can be accessed by other threads
- * any time.
+ * therefore also need to be protected by this lock. Once a
+ * BlockBackend is registered in a group those fields can be accessed
+ * by other threads any time.
*
* Again, all this is handled internally and is mostly transparent to
* the outside. The 'throttle_timers' field however has an additional
* constraint because it may be temporarily invalid (see for example
* bdrv_set_aio_context()). Therefore in this file a thread will
- * access some other BDS's timers only after verifying that that BDS
- * has throttled requests in the queue.
+ * access some other BlockBackend's timers only after verifying that
+ * that BlockBackend has throttled requests in the queue.
*/
typedef struct ThrottleGroup {
char *name; /* This is constant during the lifetime of the group */
QemuMutex lock; /* This lock protects the following four fields */
ThrottleState ts;
- QLIST_HEAD(, BlockDriverState) head;
- BlockDriverState *tokens[2];
+ QLIST_HEAD(, BlockBackendPublic) head;
+ BlockBackend *tokens[2];
bool any_timer_armed[2];
/* These two are protected by the global throttle_groups_lock */
@@ -132,94 +133,95 @@ void throttle_group_unref(ThrottleState *ts)
qemu_mutex_unlock(&throttle_groups_lock);
}
-/* Get the name from a BlockDriverState's ThrottleGroup. The name (and
- * the pointer) is guaranteed to remain constant during the lifetime
- * of the group.
+/* Get the name from a BlockBackend's ThrottleGroup. The name (and the pointer)
+ * is guaranteed to remain constant during the lifetime of the group.
*
- * @bs: a BlockDriverState that is member of a throttling group
+ * @blk: a BlockBackend that is member of a throttling group
* @ret: the name of the group.
*/
-const char *throttle_group_get_name(BlockDriverState *bs)
+const char *throttle_group_get_name(BlockBackend *blk)
{
- ThrottleGroup *tg = container_of(bs->throttle_state, ThrottleGroup, ts);
+ BlockBackendPublic *blkp = blk_get_public(blk);
+ ThrottleGroup *tg = container_of(blkp->throttle_state, ThrottleGroup, ts);
return tg->name;
}
-/* Return the next BlockDriverState in the round-robin sequence,
- * simulating a circular list.
+/* Return the next BlockBackend in the round-robin sequence, simulating a
+ * circular list.
*
* This assumes that tg->lock is held.
*
- * @bs: the current BlockDriverState
- * @ret: the next BlockDriverState in the sequence
+ * @blk: the current BlockBackend
+ * @ret: the next BlockBackend in the sequence
*/
-static BlockDriverState *throttle_group_next_bs(BlockDriverState *bs)
+static BlockBackend *throttle_group_next_blk(BlockBackend *blk)
{
- ThrottleState *ts = bs->throttle_state;
+ BlockBackendPublic *blkp = blk_get_public(blk);
+ ThrottleState *ts = blkp->throttle_state;
ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts);
- BlockDriverState *next = QLIST_NEXT(bs, round_robin);
+ BlockBackendPublic *next = QLIST_NEXT(blkp, round_robin);
if (!next) {
- return QLIST_FIRST(&tg->head);
+ next = QLIST_FIRST(&tg->head);
}
- return next;
+ return blk_by_public(next);
}
-/* Return the next BlockDriverState in the round-robin sequence with
- * pending I/O requests.
+/* Return the next BlockBackend in the round-robin sequence with pending I/O
+ * requests.
*
* This assumes that tg->lock is held.
*
- * @bs: the current BlockDriverState
+ * @blk: the current BlockBackend
* @is_write: the type of operation (read/write)
- * @ret: the next BlockDriverState with pending requests, or bs
- * if there is none.
+ * @ret: the next BlockBackend with pending requests, or blk if there is
+ * none.
*/
-static BlockDriverState *next_throttle_token(BlockDriverState *bs,
- bool is_write)
+static BlockBackend *next_throttle_token(BlockBackend *blk, bool is_write)
{
- ThrottleGroup *tg = container_of(bs->throttle_state, ThrottleGroup, ts);
- BlockDriverState *token, *start;
+ BlockBackendPublic *blkp = blk_get_public(blk);
+ ThrottleGroup *tg = container_of(blkp->throttle_state, ThrottleGroup, ts);
+ BlockBackend *token, *start;
start = token = tg->tokens[is_write];
/* get next bs round in round robin style */
- token = throttle_group_next_bs(token);
- while (token != start && !token->pending_reqs[is_write]) {
- token = throttle_group_next_bs(token);
+ token = throttle_group_next_blk(token);
+ while (token != start && !blkp->pending_reqs[is_write]) {
+ token = throttle_group_next_blk(token);
}
/* If no IO are queued for scheduling on the next round robin token
* then decide the token is the current bs because chances are
* the current bs get the current request queued.
*/
- if (token == start && !token->pending_reqs[is_write]) {
- token = bs;
+ if (token == start && !blkp->pending_reqs[is_write]) {
+ token = blk;
}
return token;
}
-/* Check if the next I/O request for a BlockDriverState needs to be
- * throttled or not. If there's no timer set in this group, set one
- * and update the token accordingly.
+/* Check if the next I/O request for a BlockBackend needs to be throttled or
+ * not. If there's no timer set in this group, set one and update the token
+ * accordingly.
*
* This assumes that tg->lock is held.
*
- * @bs: the current BlockDriverState
+ * @blk: the current BlockBackend
* @is_write: the type of operation (read/write)
* @ret: whether the I/O request needs to be throttled or not
*/
-static bool throttle_group_schedule_timer(BlockDriverState *bs,
- bool is_write)
+static bool throttle_group_schedule_timer(BlockBackend *blk, bool is_write)
{
- ThrottleState *ts = bs->throttle_state;
- ThrottleTimers *tt = &bs->throttle_timers;
+ BlockBackendPublic *blkp = blk_get_public(blk);
+ ThrottleState *ts = blkp->throttle_state;
+ ThrottleTimers *tt = &blkp->throttle_timers;
ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts);
bool must_wait;
- if (bs->io_limits_disabled) {
+ if (blkp->io_limits_disabled) {
return false;
}
@@ -230,9 +232,9 @@ static bool throttle_group_schedule_timer(BlockDriverState *bs,
must_wait = throttle_schedule_timer(ts, tt, is_write);
- /* If a timer just got armed, set bs as the current token */
+ /* If a timer just got armed, set blk as the current token */
if (must_wait) {
- tg->tokens[is_write] = bs;
+ tg->tokens[is_write] = blk;
tg->any_timer_armed[is_write] = true;
}
@@ -243,18 +245,19 @@ static bool throttle_group_schedule_timer(BlockDriverState *bs,
*
* This assumes that tg->lock is held.
*
- * @bs: the current BlockDriverState
+ * @blk: the current BlockBackend
* @is_write: the type of operation (read/write)
*/
-static void schedule_next_request(BlockDriverState *bs, bool is_write)
+static void schedule_next_request(BlockBackend *blk, bool is_write)
{
- ThrottleGroup *tg = container_of(bs->throttle_state, ThrottleGroup, ts);
+ BlockBackendPublic *blkp = blk_get_public(blk);
+ ThrottleGroup *tg = container_of(blkp->throttle_state, ThrottleGroup, ts);
bool must_wait;
- BlockDriverState *token;
+ BlockBackend *token;
/* Check if there's any pending request to schedule next */
- token = next_throttle_token(bs, is_write);
- if (!token->pending_reqs[is_write]) {
+ token = next_throttle_token(blk, is_write);
+ if (!blkp->pending_reqs[is_write]) {
return;
}
@@ -263,12 +266,12 @@ static void schedule_next_request(BlockDriverState *bs, bool is_write)
/* If it doesn't have to wait, queue it for immediate execution */
if (!must_wait) {
- /* Give preference to requests from the current bs */
+ /* Give preference to requests from the current blk */
if (qemu_in_coroutine() &&
- qemu_co_queue_next(&bs->throttled_reqs[is_write])) {
- token = bs;
+ qemu_co_queue_next(&blkp->throttled_reqs[is_write])) {
+ token = blk;
} else {
- ThrottleTimers *tt = &token->throttle_timers;
+ ThrottleTimers *tt = &blkp->throttle_timers;
int64_t now = qemu_clock_get_ns(tt->clock_type);
timer_mod(tt->timers[is_write], now + 1);
tg->any_timer_armed[is_write] = true;
@@ -281,48 +284,50 @@ static void schedule_next_request(BlockDriverState *bs, bool is_write)
* if necessary, and schedule the next request using a round robin
* algorithm.
*
- * @bs: the current BlockDriverState
+ * @blk: the current BlockBackend
* @bytes: the number of bytes for this I/O
* @is_write: the type of operation (read/write)
*/
-void coroutine_fn throttle_group_co_io_limits_intercept(BlockDriverState *bs,
+void coroutine_fn throttle_group_co_io_limits_intercept(BlockBackend *blk,
unsigned int bytes,
bool is_write)
{
bool must_wait;
- BlockDriverState *token;
+ BlockBackend *token;
- ThrottleGroup *tg = container_of(bs->throttle_state, ThrottleGroup, ts);
+ BlockBackendPublic *blkp = blk_get_public(blk);
+ ThrottleGroup *tg = container_of(blkp->throttle_state, ThrottleGroup, ts);
qemu_mutex_lock(&tg->lock);
/* First we check if this I/O has to be throttled. */
- token = next_throttle_token(bs, is_write);
+ token = next_throttle_token(blk, is_write);
must_wait = throttle_group_schedule_timer(token, is_write);
/* Wait if there's a timer set or queued requests of this type */
- if (must_wait || bs->pending_reqs[is_write]) {
- bs->pending_reqs[is_write]++;
+ if (must_wait || blkp->pending_reqs[is_write]) {
+ blkp->pending_reqs[is_write]++;
qemu_mutex_unlock(&tg->lock);
- qemu_co_queue_wait(&bs->throttled_reqs[is_write]);
+ qemu_co_queue_wait(&blkp->throttled_reqs[is_write]);
qemu_mutex_lock(&tg->lock);
- bs->pending_reqs[is_write]--;
+ blkp->pending_reqs[is_write]--;
}
/* The I/O will be executed, so do the accounting */
- throttle_account(bs->throttle_state, is_write, bytes);
+ throttle_account(blkp->throttle_state, is_write, bytes);
/* Schedule the next request */
- schedule_next_request(bs, is_write);
+ schedule_next_request(blk, is_write);
qemu_mutex_unlock(&tg->lock);
}
-void throttle_group_restart_bs(BlockDriverState *bs)
+void throttle_group_restart_blk(BlockBackend *blk)
{
+ BlockBackendPublic *blkp = blk_get_public(blk);
int i;
for (i = 0; i < 2; i++) {
- while (qemu_co_enter_next(&bs->throttled_reqs[i])) {
+ while (qemu_co_enter_next(&blkp->throttled_reqs[i])) {
;
}
}
@@ -332,13 +337,14 @@ void throttle_group_restart_bs(BlockDriverState *bs)
* to throttle_config(), but guarantees atomicity within the
* throttling group.
*
- * @bs: a BlockDriverState that is member of the group
+ * @blk: a BlockBackend that is a member of the group
* @cfg: the configuration to set
*/
-void throttle_group_config(BlockDriverState *bs, ThrottleConfig *cfg)
+void throttle_group_config(BlockBackend *blk, ThrottleConfig *cfg)
{
- ThrottleTimers *tt = &bs->throttle_timers;
- ThrottleState *ts = bs->throttle_state;
+ BlockBackendPublic *blkp = blk_get_public(blk);
+ ThrottleTimers *tt = &blkp->throttle_timers;
+ ThrottleState *ts = blkp->throttle_state;
ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts);
qemu_mutex_lock(&tg->lock);
/* throttle_config() cancels the timers */
@@ -351,20 +357,21 @@ void throttle_group_config(BlockDriverState *bs, ThrottleConfig *cfg)
throttle_config(ts, tt, cfg);
qemu_mutex_unlock(&tg->lock);
- qemu_co_enter_next(&bs->throttled_reqs[0]);
- qemu_co_enter_next(&bs->throttled_reqs[1]);
+ qemu_co_enter_next(&blkp->throttled_reqs[0]);
+ qemu_co_enter_next(&blkp->throttled_reqs[1]);
}
/* Get the throttle configuration from a particular group. Similar to
* throttle_get_config(), but guarantees atomicity within the
* throttling group.
*
- * @bs: a BlockDriverState that is member of the group
+ * @blk: a BlockBackend that is a member of the group
* @cfg: the configuration will be written here
*/
-void throttle_group_get_config(BlockDriverState *bs, ThrottleConfig *cfg)
+void throttle_group_get_config(BlockBackend *blk, ThrottleConfig *cfg)
{
- ThrottleState *ts = bs->throttle_state;
+ BlockBackendPublic *blkp = blk_get_public(blk);
+ ThrottleState *ts = blkp->throttle_state;
ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts);
qemu_mutex_lock(&tg->lock);
throttle_get_config(ts, cfg);
@@ -374,12 +381,13 @@ void throttle_group_get_config(BlockDriverState *bs, ThrottleConfig *cfg)
/* ThrottleTimers callback. This wakes up a request that was waiting
* because it had been throttled.
*
- * @bs: the BlockDriverState whose request had been throttled
+ * @blk: the BlockBackend whose request had been throttled
* @is_write: the type of operation (read/write)
*/
-static void timer_cb(BlockDriverState *bs, bool is_write)
+static void timer_cb(BlockBackend *blk, bool is_write)
{
- ThrottleState *ts = bs->throttle_state;
+ BlockBackendPublic *blkp = blk_get_public(blk);
+ ThrottleState *ts = blkp->throttle_state;
ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts);
bool empty_queue;
@@ -389,13 +397,13 @@ static void timer_cb(BlockDriverState *bs, bool is_write)
qemu_mutex_unlock(&tg->lock);
/* Run the request that was waiting for this timer */
- empty_queue = !qemu_co_enter_next(&bs->throttled_reqs[is_write]);
+ empty_queue = !qemu_co_enter_next(&blkp->throttled_reqs[is_write]);
/* If the request queue was empty then we have to take care of
* scheduling the next one */
if (empty_queue) {
qemu_mutex_lock(&tg->lock);
- schedule_next_request(bs, is_write);
+ schedule_next_request(blk, is_write);
qemu_mutex_unlock(&tg->lock);
}
}
@@ -410,17 +418,17 @@ static void write_timer_cb(void *opaque)
timer_cb(opaque, true);
}
-/* Register a BlockDriverState in the throttling group, also
- * initializing its timers and updating its throttle_state pointer to
- * point to it. If a throttling group with that name does not exist
- * yet, it will be created.
+/* Register a BlockBackend in the throttling group, also initializing its
+ * timers and updating its throttle_state pointer to point to it. If a
+ * throttling group with that name does not exist yet, it will be created.
*
- * @bs: the BlockDriverState to insert
+ * @blk: the BlockBackend to insert
* @groupname: the name of the group
*/
-void throttle_group_register_bs(BlockDriverState *bs, const char *groupname)
+void throttle_group_register_blk(BlockBackend *blk, const char *groupname)
{
int i;
+ BlockBackendPublic *blkp = blk_get_public(blk);
ThrottleState *ts = throttle_group_incref(groupname);
ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts);
int clock_type = QEMU_CLOCK_REALTIME;
@@ -430,67 +438,67 @@ void throttle_group_register_bs(BlockDriverState *bs, const char *groupname)
clock_type = QEMU_CLOCK_VIRTUAL;
}
- bs->throttle_state = ts;
+ blkp->throttle_state = ts;
qemu_mutex_lock(&tg->lock);
- /* If the ThrottleGroup is new set this BlockDriverState as the token */
+ /* If the ThrottleGroup is new set this BlockBackend as the token */
for (i = 0; i < 2; i++) {
if (!tg->tokens[i]) {
- tg->tokens[i] = bs;
+ tg->tokens[i] = blk;
}
}
- QLIST_INSERT_HEAD(&tg->head, bs, round_robin);
+ QLIST_INSERT_HEAD(&tg->head, blkp, round_robin);
- throttle_timers_init(&bs->throttle_timers,
- bdrv_get_aio_context(bs),
+ throttle_timers_init(&blkp->throttle_timers,
+ blk_get_aio_context(blk),
clock_type,
read_timer_cb,
write_timer_cb,
- bs);
+ blk);
qemu_mutex_unlock(&tg->lock);
}
-/* Unregister a BlockDriverState from its group, removing it from the
- * list, destroying the timers and setting the throttle_state pointer
- * to NULL.
+/* Unregister a BlockBackend from its group, removing it from the list,
+ * destroying the timers and setting the throttle_state pointer to NULL.
*
- * The BlockDriverState must not have pending throttled requests, so
- * the caller has to drain them first.
+ * The BlockBackend must not have pending throttled requests, so the caller has
+ * to drain them first.
*
* The group will be destroyed if it's empty after this operation.
*
- * @bs: the BlockDriverState to remove
+ * @blk: the BlockBackend to remove
*/
-void throttle_group_unregister_bs(BlockDriverState *bs)
+void throttle_group_unregister_blk(BlockBackend *blk)
{
- ThrottleGroup *tg = container_of(bs->throttle_state, ThrottleGroup, ts);
+ BlockBackendPublic *blkp = blk_get_public(blk);
+ ThrottleGroup *tg = container_of(blkp->throttle_state, ThrottleGroup, ts);
int i;
- assert(bs->pending_reqs[0] == 0 && bs->pending_reqs[1] == 0);
- assert(qemu_co_queue_empty(&bs->throttled_reqs[0]));
- assert(qemu_co_queue_empty(&bs->throttled_reqs[1]));
+ assert(blkp->pending_reqs[0] == 0 && blkp->pending_reqs[1] == 0);
+ assert(qemu_co_queue_empty(&blkp->throttled_reqs[0]));
+ assert(qemu_co_queue_empty(&blkp->throttled_reqs[1]));
qemu_mutex_lock(&tg->lock);
for (i = 0; i < 2; i++) {
- if (tg->tokens[i] == bs) {
- BlockDriverState *token = throttle_group_next_bs(bs);
- /* Take care of the case where this is the last bs in the group */
- if (token == bs) {
+ if (tg->tokens[i] == blk) {
+ BlockBackend *token = throttle_group_next_blk(blk);
+ /* Take care of the case where this is the last blk in the group */
+ if (token == blk) {
token = NULL;
}
tg->tokens[i] = token;
}
}
- /* remove the current bs from the list */
- QLIST_REMOVE(bs, round_robin);
- throttle_timers_destroy(&bs->throttle_timers);
+ /* remove the current blk from the list */
+ QLIST_REMOVE(blkp, round_robin);
+ throttle_timers_destroy(&blkp->throttle_timers);
qemu_mutex_unlock(&tg->lock);
throttle_group_unref(&tg->ts);
- bs->throttle_state = NULL;
+ blkp->throttle_state = NULL;
}
static void throttle_groups_init(void)
diff --git a/block/vmdk.c b/block/vmdk.c
index 1cb4b8529c..372e5edc15 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -2344,27 +2344,6 @@ static int vmdk_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
return 0;
}
-static void vmdk_detach_aio_context(BlockDriverState *bs)
-{
- BDRVVmdkState *s = bs->opaque;
- int i;
-
- for (i = 0; i < s->num_extents; i++) {
- bdrv_detach_aio_context(s->extents[i].file->bs);
- }
-}
-
-static void vmdk_attach_aio_context(BlockDriverState *bs,
- AioContext *new_context)
-{
- BDRVVmdkState *s = bs->opaque;
- int i;
-
- for (i = 0; i < s->num_extents; i++) {
- bdrv_attach_aio_context(s->extents[i].file->bs, new_context);
- }
-}
-
static QemuOptsList vmdk_create_opts = {
.name = "vmdk-create-opts",
.head = QTAILQ_HEAD_INITIALIZER(vmdk_create_opts.head),
@@ -2434,8 +2413,6 @@ static BlockDriver bdrv_vmdk = {
.bdrv_get_specific_info = vmdk_get_specific_info,
.bdrv_refresh_limits = vmdk_refresh_limits,
.bdrv_get_info = vmdk_get_info,
- .bdrv_detach_aio_context = vmdk_detach_aio_context,
- .bdrv_attach_aio_context = vmdk_attach_aio_context,
.supports_backing = true,
.create_opts = &vmdk_create_opts,