summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAnthony Liguori <aliguori@us.ibm.com>2013-07-22 10:13:34 -0500
committerAnthony Liguori <aliguori@us.ibm.com>2013-07-22 10:13:34 -0500
commit5447a9afc4150693d3909a8632891061147e170d (patch)
tree497739f22cd788660db5a927eb50d7c6de38b9af
parent293706dd682f578b457d052988cf3c20b4eab82d (diff)
parenta23fdf355969d331f60593fa5b857d43aec25aac (diff)
downloadqemu-5447a9afc4150693d3909a8632891061147e170d.tar.gz
Merge remote-tracking branch 'stefanha/block' into staging
# By Peter Lieven (5) and others # Via Stefan Hajnoczi * stefanha/block: block/raw: add .bdrv_get_info block: fix bdrv_read_unthrottled() cpus: Let vm_stop[_force_state]() always flush block devices block-migration: efficiently encode zero blocks block/raw: add bdrv_co_write_zeroes block: add bdrv_write_zeroes() block: fix vvfat error path for enable_write_target QEMUBH: make AioContext's bh re-entrant dataplane: sync virtio.c and vring.c virtqueue state gluster: Add discard support for GlusterFS block driver. gluster: Use pkg-config to configure GlusterFS block driver Message-id: 1374223132-29107-1-git-send-email-stefanha@redhat.com Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
-rw-r--r--async.c33
-rw-r--r--block-migration.c32
-rw-r--r--block.c29
-rw-r--r--block/gluster.c45
-rw-r--r--block/raw.c14
-rw-r--r--block/vvfat.c25
-rwxr-xr-xconfigure25
-rw-r--r--cpus.c9
-rw-r--r--hw/block/dataplane/virtio-blk.c2
-rw-r--r--hw/virtio/dataplane/vring.c8
-rw-r--r--include/block/aio.h7
-rw-r--r--include/block/block.h2
-rw-r--r--include/hw/virtio/dataplane/vring.h2
-rw-r--r--include/migration/migration.h1
-rw-r--r--migration.c9
-rw-r--r--qapi-schema.json8
16 files changed, 203 insertions, 48 deletions
diff --git a/async.c b/async.c
index 90fe906539..5ce3633010 100644
--- a/async.c
+++ b/async.c
@@ -47,11 +47,16 @@ QEMUBH *aio_bh_new(AioContext *ctx, QEMUBHFunc *cb, void *opaque)
bh->ctx = ctx;
bh->cb = cb;
bh->opaque = opaque;
+ qemu_mutex_lock(&ctx->bh_lock);
bh->next = ctx->first_bh;
+ /* Make sure that the members are ready before putting bh into list */
+ smp_wmb();
ctx->first_bh = bh;
+ qemu_mutex_unlock(&ctx->bh_lock);
return bh;
}
+/* Multiple occurrences of aio_bh_poll cannot be called concurrently */
int aio_bh_poll(AioContext *ctx)
{
QEMUBH *bh, **bhp, *next;
@@ -61,9 +66,15 @@ int aio_bh_poll(AioContext *ctx)
ret = 0;
for (bh = ctx->first_bh; bh; bh = next) {
+ /* Make sure that fetching bh happens before accessing its members */
+ smp_read_barrier_depends();
next = bh->next;
if (!bh->deleted && bh->scheduled) {
bh->scheduled = 0;
+ /* Paired with write barrier in bh schedule to ensure reading for
+ * idle & callbacks coming after bh's scheduling.
+ */
+ smp_rmb();
if (!bh->idle)
ret = 1;
bh->idle = 0;
@@ -75,6 +86,7 @@ int aio_bh_poll(AioContext *ctx)
/* remove deleted bhs */
if (!ctx->walking_bh) {
+ qemu_mutex_lock(&ctx->bh_lock);
bhp = &ctx->first_bh;
while (*bhp) {
bh = *bhp;
@@ -85,6 +97,7 @@ int aio_bh_poll(AioContext *ctx)
bhp = &bh->next;
}
}
+ qemu_mutex_unlock(&ctx->bh_lock);
}
return ret;
@@ -94,24 +107,38 @@ void qemu_bh_schedule_idle(QEMUBH *bh)
{
if (bh->scheduled)
return;
- bh->scheduled = 1;
bh->idle = 1;
+ /* Make sure that idle & any writes needed by the callback are done
+ * before the locations are read in the aio_bh_poll.
+ */
+ smp_wmb();
+ bh->scheduled = 1;
}
void qemu_bh_schedule(QEMUBH *bh)
{
if (bh->scheduled)
return;
- bh->scheduled = 1;
bh->idle = 0;
+ /* Make sure that idle & any writes needed by the callback are done
+ * before the locations are read in the aio_bh_poll.
+ */
+ smp_wmb();
+ bh->scheduled = 1;
aio_notify(bh->ctx);
}
+
+/* This func is async.
+ */
void qemu_bh_cancel(QEMUBH *bh)
{
bh->scheduled = 0;
}
+/* This func is async.The bottom half will do the delete action at the finial
+ * end.
+ */
void qemu_bh_delete(QEMUBH *bh)
{
bh->scheduled = 0;
@@ -176,6 +203,7 @@ aio_ctx_finalize(GSource *source)
thread_pool_free(ctx->thread_pool);
aio_set_event_notifier(ctx, &ctx->notifier, NULL, NULL);
event_notifier_cleanup(&ctx->notifier);
+ qemu_mutex_destroy(&ctx->bh_lock);
g_array_free(ctx->pollfds, TRUE);
}
@@ -211,6 +239,7 @@ AioContext *aio_context_new(void)
ctx = (AioContext *) g_source_new(&aio_source_funcs, sizeof(AioContext));
ctx->pollfds = g_array_new(FALSE, FALSE, sizeof(GPollFD));
ctx->thread_pool = NULL;
+ qemu_mutex_init(&ctx->bh_lock);
event_notifier_init(&ctx->notifier, false);
aio_set_event_notifier(ctx, &ctx->notifier,
(EventNotifierHandler *)
diff --git a/block-migration.c b/block-migration.c
index 2fd7699794..f803f2006f 100644
--- a/block-migration.c
+++ b/block-migration.c
@@ -29,6 +29,7 @@
#define BLK_MIG_FLAG_DEVICE_BLOCK 0x01
#define BLK_MIG_FLAG_EOS 0x02
#define BLK_MIG_FLAG_PROGRESS 0x04
+#define BLK_MIG_FLAG_ZERO_BLOCK 0x08
#define MAX_IS_ALLOCATED_SEARCH 65536
@@ -80,6 +81,7 @@ typedef struct BlkMigState {
int shared_base;
QSIMPLEQ_HEAD(bmds_list, BlkMigDevState) bmds_list;
int64_t total_sector_sum;
+ bool zero_blocks;
/* Protected by lock. */
QSIMPLEQ_HEAD(blk_list, BlkMigBlock) blk_list;
@@ -114,16 +116,30 @@ static void blk_mig_unlock(void)
static void blk_send(QEMUFile *f, BlkMigBlock * blk)
{
int len;
+ uint64_t flags = BLK_MIG_FLAG_DEVICE_BLOCK;
+
+ if (block_mig_state.zero_blocks &&
+ buffer_is_zero(blk->buf, BLOCK_SIZE)) {
+ flags |= BLK_MIG_FLAG_ZERO_BLOCK;
+ }
/* sector number and flags */
qemu_put_be64(f, (blk->sector << BDRV_SECTOR_BITS)
- | BLK_MIG_FLAG_DEVICE_BLOCK);
+ | flags);
/* device name */
len = strlen(blk->bmds->bs->device_name);
qemu_put_byte(f, len);
qemu_put_buffer(f, (uint8_t *)blk->bmds->bs->device_name, len);
+ /* if a block is zero we need to flush here since the network
+ * bandwidth is now a lot higher than the storage device bandwidth.
+ * thus if we queue zero blocks we slow down the migration */
+ if (flags & BLK_MIG_FLAG_ZERO_BLOCK) {
+ qemu_fflush(f);
+ return;
+ }
+
qemu_put_buffer(f, blk->buf, BLOCK_SIZE);
}
@@ -344,6 +360,7 @@ static void init_blk_migration(QEMUFile *f)
block_mig_state.total_sector_sum = 0;
block_mig_state.prev_progress = -1;
block_mig_state.bulk_completed = 0;
+ block_mig_state.zero_blocks = migrate_zero_blocks();
bdrv_iterate(init_blk_migration_it, NULL);
}
@@ -762,12 +779,15 @@ static int block_load(QEMUFile *f, void *opaque, int version_id)
nr_sectors = BDRV_SECTORS_PER_DIRTY_CHUNK;
}
- buf = g_malloc(BLOCK_SIZE);
-
- qemu_get_buffer(f, buf, BLOCK_SIZE);
- ret = bdrv_write(bs, addr, buf, nr_sectors);
+ if (flags & BLK_MIG_FLAG_ZERO_BLOCK) {
+ ret = bdrv_write_zeroes(bs, addr, nr_sectors);
+ } else {
+ buf = g_malloc(BLOCK_SIZE);
+ qemu_get_buffer(f, buf, BLOCK_SIZE);
+ ret = bdrv_write(bs, addr, buf, nr_sectors);
+ g_free(buf);
+ }
- g_free(buf);
if (ret < 0) {
return ret;
}
diff --git a/block.c b/block.c
index b56024113b..6cd39fa146 100644
--- a/block.c
+++ b/block.c
@@ -2162,6 +2162,7 @@ typedef struct RwCo {
QEMUIOVector *qiov;
bool is_write;
int ret;
+ BdrvRequestFlags flags;
} RwCo;
static void coroutine_fn bdrv_rw_co_entry(void *opaque)
@@ -2170,10 +2171,12 @@ static void coroutine_fn bdrv_rw_co_entry(void *opaque)
if (!rwco->is_write) {
rwco->ret = bdrv_co_do_readv(rwco->bs, rwco->sector_num,
- rwco->nb_sectors, rwco->qiov, 0);
+ rwco->nb_sectors, rwco->qiov,
+ rwco->flags);
} else {
rwco->ret = bdrv_co_do_writev(rwco->bs, rwco->sector_num,
- rwco->nb_sectors, rwco->qiov, 0);
+ rwco->nb_sectors, rwco->qiov,
+ rwco->flags);
}
}
@@ -2181,7 +2184,8 @@ static void coroutine_fn bdrv_rw_co_entry(void *opaque)
* Process a vectored synchronous request using coroutines
*/
static int bdrv_rwv_co(BlockDriverState *bs, int64_t sector_num,
- QEMUIOVector *qiov, bool is_write)
+ QEMUIOVector *qiov, bool is_write,
+ BdrvRequestFlags flags)
{
Coroutine *co;
RwCo rwco = {
@@ -2191,6 +2195,7 @@ static int bdrv_rwv_co(BlockDriverState *bs, int64_t sector_num,
.qiov = qiov,
.is_write = is_write,
.ret = NOT_DONE,
+ .flags = flags,
};
assert((qiov->size & (BDRV_SECTOR_SIZE - 1)) == 0);
@@ -2222,7 +2227,7 @@ static int bdrv_rwv_co(BlockDriverState *bs, int64_t sector_num,
* Process a synchronous request using coroutines
*/
static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
- int nb_sectors, bool is_write)
+ int nb_sectors, bool is_write, BdrvRequestFlags flags)
{
QEMUIOVector qiov;
struct iovec iov = {
@@ -2231,14 +2236,14 @@ static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
};
qemu_iovec_init_external(&qiov, &iov, 1);
- return bdrv_rwv_co(bs, sector_num, &qiov, is_write);
+ return bdrv_rwv_co(bs, sector_num, &qiov, is_write, flags);
}
/* return < 0 if error. See bdrv_write() for the return codes */
int bdrv_read(BlockDriverState *bs, int64_t sector_num,
uint8_t *buf, int nb_sectors)
{
- return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false);
+ return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false, 0);
}
/* Just like bdrv_read(), but with I/O throttling temporarily disabled */
@@ -2250,7 +2255,7 @@ int bdrv_read_unthrottled(BlockDriverState *bs, int64_t sector_num,
enabled = bs->io_limits_enabled;
bs->io_limits_enabled = false;
- ret = bdrv_read(bs, 0, buf, 1);
+ ret = bdrv_read(bs, sector_num, buf, nb_sectors);
bs->io_limits_enabled = enabled;
return ret;
}
@@ -2264,12 +2269,18 @@ int bdrv_read_unthrottled(BlockDriverState *bs, int64_t sector_num,
int bdrv_write(BlockDriverState *bs, int64_t sector_num,
const uint8_t *buf, int nb_sectors)
{
- return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true);
+ return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true, 0);
}
int bdrv_writev(BlockDriverState *bs, int64_t sector_num, QEMUIOVector *qiov)
{
- return bdrv_rwv_co(bs, sector_num, qiov, true);
+ return bdrv_rwv_co(bs, sector_num, qiov, true, 0);
+}
+
+int bdrv_write_zeroes(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
+{
+ return bdrv_rw_co(bs, sector_num, NULL, nb_sectors, true,
+ BDRV_REQ_ZERO_WRITE);
}
int bdrv_pread(BlockDriverState *bs, int64_t offset,
diff --git a/block/gluster.c b/block/gluster.c
index 61424bcb01..6de418c0bd 100644
--- a/block/gluster.c
+++ b/block/gluster.c
@@ -532,6 +532,39 @@ out:
return NULL;
}
+#ifdef CONFIG_GLUSTERFS_DISCARD
+static BlockDriverAIOCB *qemu_gluster_aio_discard(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors, BlockDriverCompletionFunc *cb,
+ void *opaque)
+{
+ int ret;
+ GlusterAIOCB *acb;
+ BDRVGlusterState *s = bs->opaque;
+ size_t size;
+ off_t offset;
+
+ offset = sector_num * BDRV_SECTOR_SIZE;
+ size = nb_sectors * BDRV_SECTOR_SIZE;
+
+ acb = qemu_aio_get(&gluster_aiocb_info, bs, cb, opaque);
+ acb->size = 0;
+ acb->ret = 0;
+ acb->finished = NULL;
+ s->qemu_aio_count++;
+
+ ret = glfs_discard_async(s->fd, offset, size, &gluster_finish_aiocb, acb);
+ if (ret < 0) {
+ goto out;
+ }
+ return &acb->common;
+
+out:
+ s->qemu_aio_count--;
+ qemu_aio_release(acb);
+ return NULL;
+}
+#endif
+
static int64_t qemu_gluster_getlength(BlockDriverState *bs)
{
BDRVGlusterState *s = bs->opaque;
@@ -602,6 +635,9 @@ static BlockDriver bdrv_gluster = {
.bdrv_aio_writev = qemu_gluster_aio_writev,
.bdrv_aio_flush = qemu_gluster_aio_flush,
.bdrv_has_zero_init = qemu_gluster_has_zero_init,
+#ifdef CONFIG_GLUSTERFS_DISCARD
+ .bdrv_aio_discard = qemu_gluster_aio_discard,
+#endif
.create_options = qemu_gluster_create_options,
};
@@ -618,6 +654,9 @@ static BlockDriver bdrv_gluster_tcp = {
.bdrv_aio_writev = qemu_gluster_aio_writev,
.bdrv_aio_flush = qemu_gluster_aio_flush,
.bdrv_has_zero_init = qemu_gluster_has_zero_init,
+#ifdef CONFIG_GLUSTERFS_DISCARD
+ .bdrv_aio_discard = qemu_gluster_aio_discard,
+#endif
.create_options = qemu_gluster_create_options,
};
@@ -634,6 +673,9 @@ static BlockDriver bdrv_gluster_unix = {
.bdrv_aio_writev = qemu_gluster_aio_writev,
.bdrv_aio_flush = qemu_gluster_aio_flush,
.bdrv_has_zero_init = qemu_gluster_has_zero_init,
+#ifdef CONFIG_GLUSTERFS_DISCARD
+ .bdrv_aio_discard = qemu_gluster_aio_discard,
+#endif
.create_options = qemu_gluster_create_options,
};
@@ -650,6 +692,9 @@ static BlockDriver bdrv_gluster_rdma = {
.bdrv_aio_writev = qemu_gluster_aio_writev,
.bdrv_aio_flush = qemu_gluster_aio_flush,
.bdrv_has_zero_init = qemu_gluster_has_zero_init,
+#ifdef CONFIG_GLUSTERFS_DISCARD
+ .bdrv_aio_discard = qemu_gluster_aio_discard,
+#endif
.create_options = qemu_gluster_create_options,
};
diff --git a/block/raw.c b/block/raw.c
index ce10422006..f1682d4f32 100644
--- a/block/raw.c
+++ b/block/raw.c
@@ -42,6 +42,13 @@ static int coroutine_fn raw_co_is_allocated(BlockDriverState *bs,
return bdrv_co_is_allocated(bs->file, sector_num, nb_sectors, pnum);
}
+static int coroutine_fn raw_co_write_zeroes(BlockDriverState *bs,
+ int64_t sector_num,
+ int nb_sectors)
+{
+ return bdrv_co_write_zeroes(bs->file, sector_num, nb_sectors);
+}
+
static int64_t raw_getlength(BlockDriverState *bs)
{
return bdrv_getlength(bs->file);
@@ -114,6 +121,11 @@ static int raw_has_zero_init(BlockDriverState *bs)
return bdrv_has_zero_init(bs->file);
}
+static int raw_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
+{
+ return bdrv_get_info(bs->file, bdi);
+}
+
static BlockDriver bdrv_raw = {
.format_name = "raw",
@@ -128,10 +140,12 @@ static BlockDriver bdrv_raw = {
.bdrv_co_readv = raw_co_readv,
.bdrv_co_writev = raw_co_writev,
.bdrv_co_is_allocated = raw_co_is_allocated,
+ .bdrv_co_write_zeroes = raw_co_write_zeroes,
.bdrv_co_discard = raw_co_discard,
.bdrv_probe = raw_probe,
.bdrv_getlength = raw_getlength,
+ .bdrv_get_info = raw_get_info,
.bdrv_truncate = raw_truncate,
.bdrv_is_inserted = raw_is_inserted,
diff --git a/block/vvfat.c b/block/vvfat.c
index 87b02799d0..cd3b8edd9f 100644
--- a/block/vvfat.c
+++ b/block/vvfat.c
@@ -1164,8 +1164,8 @@ DLOG(if (stderr == NULL) {
s->sector_count = cyls * heads * secs - (s->first_sectors_number - 1);
if (qemu_opt_get_bool(opts, "rw", false)) {
- if (enable_write_target(s)) {
- ret = -EIO;
+ ret = enable_write_target(s);
+ if (ret < 0) {
goto fail;
}
bs->read_only = 0;
@@ -2917,9 +2917,7 @@ static int enable_write_target(BDRVVVFATState *s)
s->qcow_filename = g_malloc(1024);
ret = get_tmp_filename(s->qcow_filename, 1024);
if (ret < 0) {
- g_free(s->qcow_filename);
- s->qcow_filename = NULL;
- return ret;
+ goto err;
}
bdrv_qcow = bdrv_find_format("qcow");
@@ -2927,18 +2925,18 @@ static int enable_write_target(BDRVVVFATState *s)
set_option_parameter_int(options, BLOCK_OPT_SIZE, s->sector_count * 512);
set_option_parameter(options, BLOCK_OPT_BACKING_FILE, "fat:");
- if (bdrv_create(bdrv_qcow, s->qcow_filename, options) < 0)
- return -1;
+ ret = bdrv_create(bdrv_qcow, s->qcow_filename, options);
+ if (ret < 0) {
+ goto err;
+ }
s->qcow = bdrv_new("");
- if (s->qcow == NULL) {
- return -1;
- }
ret = bdrv_open(s->qcow, s->qcow_filename, NULL,
BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_NO_FLUSH, bdrv_qcow);
if (ret < 0) {
- return ret;
+ bdrv_delete(s->qcow);
+ goto err;
}
#ifndef _WIN32
@@ -2951,6 +2949,11 @@ static int enable_write_target(BDRVVVFATState *s)
*(void**)s->bs->backing_hd->opaque = s;
return 0;
+
+err:
+ g_free(s->qcow_filename);
+ s->qcow_filename = NULL;
+ return ret;
}
static void vvfat_close(BlockDriverState *bs)
diff --git a/configure b/configure
index 9e1cd1975a..7c45db2fdd 100755
--- a/configure
+++ b/configure
@@ -237,6 +237,7 @@ libiscsi=""
coroutine=""
seccomp=""
glusterfs=""
+glusterfs_discard="no"
virtio_blk_data_plane=""
gtk=""
gtkabi="2.0"
@@ -2570,23 +2571,21 @@ fi
##########################################
# glusterfs probe
if test "$glusterfs" != "no" ; then
- cat > $TMPC <<EOF
-#include <glusterfs/api/glfs.h>
-int main(void) {
- (void) glfs_new("volume");
- return 0;
-}
-EOF
- glusterfs_libs="-lgfapi -lgfrpc -lgfxdr"
- if compile_prog "" "$glusterfs_libs" ; then
- glusterfs=yes
+ if $pkg_config --atleast-version=3 glusterfs-api >/dev/null 2>&1; then
+ glusterfs="yes"
+ glusterfs_cflags=`$pkg_config --cflags glusterfs-api 2>/dev/null`
+ glusterfs_libs=`$pkg_config --libs glusterfs-api 2>/dev/null`
+ CFLAGS="$CFLAGS $glusterfs_cflags"
libs_tools="$glusterfs_libs $libs_tools"
libs_softmmu="$glusterfs_libs $libs_softmmu"
+ if $pkg_config --atleast-version=5 glusterfs-api >/dev/null 2>&1; then
+ glusterfs_discard="yes"
+ fi
else
if test "$glusterfs" = "yes" ; then
feature_not_found "GlusterFS backend support"
fi
- glusterfs=no
+ glusterfs="no"
fi
fi
@@ -3969,6 +3968,10 @@ if test "$glusterfs" = "yes" ; then
echo "CONFIG_GLUSTERFS=y" >> $config_host_mak
fi
+if test "$glusterfs_discard" = "yes" ; then
+ echo "CONFIG_GLUSTERFS_DISCARD=y" >> $config_host_mak
+fi
+
if test "$libssh2" = "yes" ; then
echo "CONFIG_LIBSSH2=y" >> $config_host_mak
fi
diff --git a/cpus.c b/cpus.c
index 8062cdd57e..2509eb5af3 100644
--- a/cpus.c
+++ b/cpus.c
@@ -443,11 +443,12 @@ static int do_vm_stop(RunState state)
pause_all_vcpus();
runstate_set(state);
vm_state_notify(0, state);
- bdrv_drain_all();
- ret = bdrv_flush_all();
monitor_protocol_event(QEVENT_STOP, NULL);
}
+ bdrv_drain_all();
+ ret = bdrv_flush_all();
+
return ret;
}
@@ -1126,7 +1127,9 @@ int vm_stop_force_state(RunState state)
return vm_stop(state);
} else {
runstate_set(state);
- return 0;
+ /* Make sure to return an error if the flush in a previous vm_stop()
+ * failed. */
+ return bdrv_flush_all();
}
}
diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
index 03566650c6..2faed43127 100644
--- a/hw/block/dataplane/virtio-blk.c
+++ b/hw/block/dataplane/virtio-blk.c
@@ -537,7 +537,7 @@ void virtio_blk_data_plane_stop(VirtIOBlockDataPlane *s)
/* Clean up guest notifier (irq) */
k->set_guest_notifiers(qbus->parent, 1, false);
- vring_teardown(&s->vring);
+ vring_teardown(&s->vring, s->vdev, 0);
s->started = false;
s->stopping = false;
}
diff --git a/hw/virtio/dataplane/vring.c b/hw/virtio/dataplane/vring.c
index e0d6e83625..82cc151b17 100644
--- a/hw/virtio/dataplane/vring.c
+++ b/hw/virtio/dataplane/vring.c
@@ -39,8 +39,8 @@ bool vring_setup(Vring *vring, VirtIODevice *vdev, int n)
vring_init(&vring->vr, virtio_queue_get_num(vdev, n), vring_ptr, 4096);
- vring->last_avail_idx = 0;
- vring->last_used_idx = 0;
+ vring->last_avail_idx = virtio_queue_get_last_avail_idx(vdev, n);
+ vring->last_used_idx = vring->vr.used->idx;
vring->signalled_used = 0;
vring->signalled_used_valid = false;
@@ -49,8 +49,10 @@ bool vring_setup(Vring *vring, VirtIODevice *vdev, int n)
return true;
}
-void vring_teardown(Vring *vring)
+void vring_teardown(Vring *vring, VirtIODevice *vdev, int n)
{
+ virtio_queue_set_last_avail_idx(vdev, n, vring->last_avail_idx);
+
hostmem_finalize(&vring->hostmem);
}
diff --git a/include/block/aio.h b/include/block/aio.h
index 183679374f..cc77771c46 100644
--- a/include/block/aio.h
+++ b/include/block/aio.h
@@ -17,6 +17,7 @@
#include "qemu-common.h"
#include "qemu/queue.h"
#include "qemu/event_notifier.h"
+#include "qemu/thread.h"
typedef struct BlockDriverAIOCB BlockDriverAIOCB;
typedef void BlockDriverCompletionFunc(void *opaque, int ret);
@@ -53,6 +54,8 @@ typedef struct AioContext {
*/
int walking_handlers;
+ /* lock to protect between bh's adders and deleter */
+ QemuMutex bh_lock;
/* Anchor of the list of Bottom Halves belonging to the context */
struct QEMUBH *first_bh;
@@ -127,6 +130,8 @@ void aio_notify(AioContext *ctx);
* aio_bh_poll: Poll bottom halves for an AioContext.
*
* These are internal functions used by the QEMU main loop.
+ * And notice that multiple occurrences of aio_bh_poll cannot
+ * be called concurrently
*/
int aio_bh_poll(AioContext *ctx);
@@ -163,6 +168,8 @@ void qemu_bh_cancel(QEMUBH *bh);
* Deleting a bottom half frees the memory that was allocated for it by
* qemu_bh_new. It also implies canceling the bottom half if it was
* scheduled.
+ * This func is async. The bottom half will do the delete action at the finial
+ * end.
*
* @bh: The bottom half to be deleted.
*/
diff --git a/include/block/block.h b/include/block/block.h
index b6b9014a9c..742fce5f7f 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -157,6 +157,8 @@ int bdrv_read_unthrottled(BlockDriverState *bs, int64_t sector_num,
uint8_t *buf, int nb_sectors);
int bdrv_write(BlockDriverState *bs, int64_t sector_num,
const uint8_t *buf, int nb_sectors);
+int bdrv_write_zeroes(BlockDriverState *bs, int64_t sector_num,
+ int nb_sectors);
int bdrv_writev(BlockDriverState *bs, int64_t sector_num, QEMUIOVector *qiov);
int bdrv_pread(BlockDriverState *bs, int64_t offset,
void *buf, int count);
diff --git a/include/hw/virtio/dataplane/vring.h b/include/hw/virtio/dataplane/vring.h
index 9380cb5413..c0b69ff18f 100644
--- a/include/hw/virtio/dataplane/vring.h
+++ b/include/hw/virtio/dataplane/vring.h
@@ -50,7 +50,7 @@ static inline void vring_set_broken(Vring *vring)
}
bool vring_setup(Vring *vring, VirtIODevice *vdev, int n);
-void vring_teardown(Vring *vring);
+void vring_teardown(Vring *vring, VirtIODevice *vdev, int n);
void vring_disable_notification(VirtIODevice *vdev, Vring *vring);
bool vring_enable_notification(VirtIODevice *vdev, Vring *vring);
bool vring_should_notify(VirtIODevice *vdev, Vring *vring);
diff --git a/include/migration/migration.h b/include/migration/migration.h
index bc9fde0b2a..701709a1e9 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -124,6 +124,7 @@ void migrate_add_blocker(Error *reason);
void migrate_del_blocker(Error *reason);
bool migrate_rdma_pin_all(void);
+bool migrate_zero_blocks(void);
bool migrate_auto_converge(void);
diff --git a/migration.c b/migration.c
index 9f5a4230d1..a9c042186d 100644
--- a/migration.c
+++ b/migration.c
@@ -493,6 +493,15 @@ bool migrate_auto_converge(void)
return s->enabled_capabilities[MIGRATION_CAPABILITY_AUTO_CONVERGE];
}
+bool migrate_zero_blocks(void)
+{
+ MigrationState *s;
+
+ s = migrate_get_current();
+
+ return s->enabled_capabilities[MIGRATION_CAPABILITY_ZERO_BLOCKS];
+}
+
int migrate_use_xbzrle(void)
{
MigrationState *s;
diff --git a/qapi-schema.json b/qapi-schema.json
index 8d33d527d3..592bb9c7a1 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -613,10 +613,16 @@
# Disabled by default. Experimental: may (or may not) be renamed after
# further testing is complete. (since 1.6)
#
+# @zero-blocks: During storage migration encode blocks of zeroes efficiently. This
+# essentially saves 1MB of zeroes per block on the wire. Enabling requires
+# source and target VM to support this feature. To enable it is sufficient
+# to enable the capability on the source VM. The feature is disabled by
+# default. (since 1.6)
+#
# Since: 1.2
##
{ 'enum': 'MigrationCapability',
- 'data': ['xbzrle', 'x-rdma-pin-all', 'auto-converge'] }
+ 'data': ['xbzrle', 'x-rdma-pin-all', 'auto-converge', 'zero-blocks'] }
##
# @MigrationCapabilityStatus