summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--qemu-img-cmds.hx4
-rw-r--r--qemu-img.c322
-rw-r--r--qemu-img.texi16
3 files changed, 243 insertions, 99 deletions
diff --git a/qemu-img-cmds.hx b/qemu-img-cmds.hx
index f054599a91..9c9702cc62 100644
--- a/qemu-img-cmds.hx
+++ b/qemu-img-cmds.hx
@@ -40,9 +40,9 @@ STEXI
ETEXI
DEF("convert", img_convert,
- "convert [--object objectdef] [--image-opts] [-c] [-p] [-q] [-n] [-f fmt] [-t cache] [-T src_cache] [-O output_fmt] [-o options] [-s snapshot_id_or_name] [-l snapshot_param] [-S sparse_size] filename [filename2 [...]] output_filename")
+ "convert [--object objectdef] [--image-opts] [-c] [-p] [-q] [-n] [-f fmt] [-t cache] [-T src_cache] [-O output_fmt] [-o options] [-s snapshot_id_or_name] [-l snapshot_param] [-S sparse_size] [-m num_coroutines] [-W] filename [filename2 [...]] output_filename")
STEXI
-@item convert [--object @var{objectdef}] [--image-opts] [-c] [-p] [-q] [-n] [-f @var{fmt}] [-t @var{cache}] [-T @var{src_cache}] [-O @var{output_fmt}] [-o @var{options}] [-s @var{snapshot_id_or_name}] [-l @var{snapshot_param}] [-S @var{sparse_size}] @var{filename} [@var{filename2} [...]] @var{output_filename}
+@item convert [--object @var{objectdef}] [--image-opts] [-c] [-p] [-q] [-n] [-f @var{fmt}] [-t @var{cache}] [-T @var{src_cache}] [-O @var{output_fmt}] [-o @var{options}] [-s @var{snapshot_id_or_name}] [-l @var{snapshot_param}] [-S @var{sparse_size}] [-m @var{num_coroutines}] [-W] @var{filename} [@var{filename2} [...]] @var{output_filename}
ETEXI
DEF("dd", img_dd,
diff --git a/qemu-img.c b/qemu-img.c
index df3aefd35a..caa76a7400 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -156,6 +156,11 @@ static void QEMU_NORETURN help(void)
" kinds of errors, with a higher risk of choosing the wrong fix or\n"
" hiding corruption that has already occurred.\n"
"\n"
+ "Parameters to convert subcommand:\n"
+ " '-m' specifies how many coroutines work in parallel during the convert\n"
+ " process (defaults to 8)\n"
+ " '-W' allow to write to the target out of order rather than sequential\n"
+ "\n"
"Parameters to snapshot subcommand:\n"
" 'snapshot' is the name of the snapshot to create, apply or delete\n"
" '-a' applies a snapshot (revert disk to saved state)\n"
@@ -1462,48 +1467,61 @@ enum ImgConvertBlockStatus {
BLK_BACKING_FILE,
};
+#define MAX_COROUTINES 16
+
typedef struct ImgConvertState {
BlockBackend **src;
int64_t *src_sectors;
- int src_cur, src_num;
- int64_t src_cur_offset;
+ int src_num;
int64_t total_sectors;
int64_t allocated_sectors;
+ int64_t allocated_done;
+ int64_t sector_num;
+ int64_t wr_offs;
enum ImgConvertBlockStatus status;
int64_t sector_next_status;
BlockBackend *target;
bool has_zero_init;
bool compressed;
bool target_has_backing;
+ bool wr_in_order;
int min_sparse;
size_t cluster_sectors;
size_t buf_sectors;
+ int num_coroutines;
+ int running_coroutines;
+ Coroutine *co[MAX_COROUTINES];
+ int64_t wait_sector_num[MAX_COROUTINES];
+ CoMutex lock;
+ int ret;
} ImgConvertState;
-static void convert_select_part(ImgConvertState *s, int64_t sector_num)
+static void convert_select_part(ImgConvertState *s, int64_t sector_num,
+ int *src_cur, int64_t *src_cur_offset)
{
- assert(sector_num >= s->src_cur_offset);
- while (sector_num - s->src_cur_offset >= s->src_sectors[s->src_cur]) {
- s->src_cur_offset += s->src_sectors[s->src_cur];
- s->src_cur++;
- assert(s->src_cur < s->src_num);
+ *src_cur = 0;
+ *src_cur_offset = 0;
+ while (sector_num - *src_cur_offset >= s->src_sectors[*src_cur]) {
+ *src_cur_offset += s->src_sectors[*src_cur];
+ (*src_cur)++;
+ assert(*src_cur < s->src_num);
}
}
static int convert_iteration_sectors(ImgConvertState *s, int64_t sector_num)
{
- int64_t ret;
- int n;
+ int64_t ret, src_cur_offset;
+ int n, src_cur;
- convert_select_part(s, sector_num);
+ convert_select_part(s, sector_num, &src_cur, &src_cur_offset);
assert(s->total_sectors > sector_num);
n = MIN(s->total_sectors - sector_num, BDRV_REQUEST_MAX_SECTORS);
if (s->sector_next_status <= sector_num) {
BlockDriverState *file;
- ret = bdrv_get_block_status(blk_bs(s->src[s->src_cur]),
- sector_num - s->src_cur_offset,
+ ret = bdrv_get_block_status(blk_bs(s->src[src_cur]),
+ sector_num - src_cur_offset,
n, &n, &file);
if (ret < 0) {
return ret;
@@ -1519,8 +1537,8 @@ static int convert_iteration_sectors(ImgConvertState *s, int64_t sector_num)
/* Check block status of the backing file chain to avoid
* needlessly reading zeroes and limiting the iteration to the
* buffer size */
- ret = bdrv_get_block_status_above(blk_bs(s->src[s->src_cur]), NULL,
- sector_num - s->src_cur_offset,
+ ret = bdrv_get_block_status_above(blk_bs(s->src[src_cur]), NULL,
+ sector_num - src_cur_offset,
n, &n, &file);
if (ret < 0) {
return ret;
@@ -1558,28 +1576,34 @@ static int convert_iteration_sectors(ImgConvertState *s, int64_t sector_num)
return n;
}
-static int convert_read(ImgConvertState *s, int64_t sector_num, int nb_sectors,
- uint8_t *buf)
+static int coroutine_fn convert_co_read(ImgConvertState *s, int64_t sector_num,
+ int nb_sectors, uint8_t *buf)
{
- int n;
- int ret;
+ int n, ret;
+ QEMUIOVector qiov;
+ struct iovec iov;
assert(nb_sectors <= s->buf_sectors);
while (nb_sectors > 0) {
BlockBackend *blk;
- int64_t bs_sectors;
+ int src_cur;
+ int64_t bs_sectors, src_cur_offset;
/* In the case of compression with multiple source files, we can get a
* nb_sectors that spreads into the next part. So we must be able to
* read across multiple BDSes for one convert_read() call. */
- convert_select_part(s, sector_num);
- blk = s->src[s->src_cur];
- bs_sectors = s->src_sectors[s->src_cur];
-
- n = MIN(nb_sectors, bs_sectors - (sector_num - s->src_cur_offset));
- ret = blk_pread(blk,
- (sector_num - s->src_cur_offset) << BDRV_SECTOR_BITS,
- buf, n << BDRV_SECTOR_BITS);
+ convert_select_part(s, sector_num, &src_cur, &src_cur_offset);
+ blk = s->src[src_cur];
+ bs_sectors = s->src_sectors[src_cur];
+
+ n = MIN(nb_sectors, bs_sectors - (sector_num - src_cur_offset));
+ iov.iov_base = buf;
+ iov.iov_len = n << BDRV_SECTOR_BITS;
+ qemu_iovec_init_external(&qiov, &iov, 1);
+
+ ret = blk_co_preadv(
+ blk, (sector_num - src_cur_offset) << BDRV_SECTOR_BITS,
+ n << BDRV_SECTOR_BITS, &qiov, 0);
if (ret < 0) {
return ret;
}
@@ -1592,15 +1616,18 @@ static int convert_read(ImgConvertState *s, int64_t sector_num, int nb_sectors,
return 0;
}
-static int convert_write(ImgConvertState *s, int64_t sector_num, int nb_sectors,
- const uint8_t *buf)
+
+static int coroutine_fn convert_co_write(ImgConvertState *s, int64_t sector_num,
+ int nb_sectors, uint8_t *buf,
+ enum ImgConvertBlockStatus status)
{
int ret;
+ QEMUIOVector qiov;
+ struct iovec iov;
while (nb_sectors > 0) {
int n = nb_sectors;
-
- switch (s->status) {
+ switch (status) {
case BLK_BACKING_FILE:
/* If we have a backing file, leave clusters unallocated that are
* unallocated in the source image, so that the backing file is
@@ -1621,9 +1648,13 @@ static int convert_write(ImgConvertState *s, int64_t sector_num, int nb_sectors,
break;
}
- ret = blk_pwrite_compressed(s->target,
- sector_num << BDRV_SECTOR_BITS,
- buf, n << BDRV_SECTOR_BITS);
+ iov.iov_base = buf;
+ iov.iov_len = n << BDRV_SECTOR_BITS;
+ qemu_iovec_init_external(&qiov, &iov, 1);
+
+ ret = blk_co_pwritev(s->target, sector_num << BDRV_SECTOR_BITS,
+ n << BDRV_SECTOR_BITS, &qiov,
+ BDRV_REQ_WRITE_COMPRESSED);
if (ret < 0) {
return ret;
}
@@ -1636,8 +1667,12 @@ static int convert_write(ImgConvertState *s, int64_t sector_num, int nb_sectors,
if (!s->min_sparse ||
is_allocated_sectors_min(buf, n, &n, s->min_sparse))
{
- ret = blk_pwrite(s->target, sector_num << BDRV_SECTOR_BITS,
- buf, n << BDRV_SECTOR_BITS, 0);
+ iov.iov_base = buf;
+ iov.iov_len = n << BDRV_SECTOR_BITS;
+ qemu_iovec_init_external(&qiov, &iov, 1);
+
+ ret = blk_co_pwritev(s->target, sector_num << BDRV_SECTOR_BITS,
+ n << BDRV_SECTOR_BITS, &qiov, 0);
if (ret < 0) {
return ret;
}
@@ -1649,8 +1684,9 @@ static int convert_write(ImgConvertState *s, int64_t sector_num, int nb_sectors,
if (s->has_zero_init) {
break;
}
- ret = blk_pwrite_zeroes(s->target, sector_num << BDRV_SECTOR_BITS,
- n << BDRV_SECTOR_BITS, 0);
+ ret = blk_co_pwrite_zeroes(s->target,
+ sector_num << BDRV_SECTOR_BITS,
+ n << BDRV_SECTOR_BITS, 0);
if (ret < 0) {
return ret;
}
@@ -1665,12 +1701,122 @@ static int convert_write(ImgConvertState *s, int64_t sector_num, int nb_sectors,
return 0;
}
-static int convert_do_copy(ImgConvertState *s)
+static void coroutine_fn convert_co_do_copy(void *opaque)
{
+ ImgConvertState *s = opaque;
uint8_t *buf = NULL;
- int64_t sector_num, allocated_done;
- int ret;
- int n;
+ int ret, i;
+ int index = -1;
+
+ for (i = 0; i < s->num_coroutines; i++) {
+ if (s->co[i] == qemu_coroutine_self()) {
+ index = i;
+ break;
+ }
+ }
+ assert(index >= 0);
+
+ s->running_coroutines++;
+ buf = blk_blockalign(s->target, s->buf_sectors * BDRV_SECTOR_SIZE);
+
+ while (1) {
+ int n;
+ int64_t sector_num;
+ enum ImgConvertBlockStatus status;
+
+ qemu_co_mutex_lock(&s->lock);
+ if (s->ret != -EINPROGRESS || s->sector_num >= s->total_sectors) {
+ qemu_co_mutex_unlock(&s->lock);
+ goto out;
+ }
+ n = convert_iteration_sectors(s, s->sector_num);
+ if (n < 0) {
+ qemu_co_mutex_unlock(&s->lock);
+ s->ret = n;
+ goto out;
+ }
+ /* save current sector and allocation status to local variables */
+ sector_num = s->sector_num;
+ status = s->status;
+ if (!s->min_sparse && s->status == BLK_ZERO) {
+ n = MIN(n, s->buf_sectors);
+ }
+ /* increment global sector counter so that other coroutines can
+ * already continue reading beyond this request */
+ s->sector_num += n;
+ qemu_co_mutex_unlock(&s->lock);
+
+ if (status == BLK_DATA || (!s->min_sparse && status == BLK_ZERO)) {
+ s->allocated_done += n;
+ qemu_progress_print(100.0 * s->allocated_done /
+ s->allocated_sectors, 0);
+ }
+
+ if (status == BLK_DATA) {
+ ret = convert_co_read(s, sector_num, n, buf);
+ if (ret < 0) {
+ error_report("error while reading sector %" PRId64
+ ": %s", sector_num, strerror(-ret));
+ s->ret = ret;
+ goto out;
+ }
+ } else if (!s->min_sparse && status == BLK_ZERO) {
+ status = BLK_DATA;
+ memset(buf, 0x00, n * BDRV_SECTOR_SIZE);
+ }
+
+ if (s->wr_in_order) {
+ /* keep writes in order */
+ while (s->wr_offs != sector_num) {
+ if (s->ret != -EINPROGRESS) {
+ goto out;
+ }
+ s->wait_sector_num[index] = sector_num;
+ qemu_coroutine_yield();
+ }
+ s->wait_sector_num[index] = -1;
+ }
+
+ ret = convert_co_write(s, sector_num, n, buf, status);
+ if (ret < 0) {
+ error_report("error while writing sector %" PRId64
+ ": %s", sector_num, strerror(-ret));
+ s->ret = ret;
+ goto out;
+ }
+
+ if (s->wr_in_order) {
+ /* reenter the coroutine that might have waited
+ * for this write to complete */
+ s->wr_offs = sector_num + n;
+ for (i = 0; i < s->num_coroutines; i++) {
+ if (s->co[i] && s->wait_sector_num[i] == s->wr_offs) {
+ /*
+ * A -> B -> A cannot occur because A has
+ * s->wait_sector_num[i] == -1 during A -> B. Therefore
+ * B will never enter A during this time window.
+ */
+ qemu_coroutine_enter(s->co[i]);
+ break;
+ }
+ }
+ }
+ }
+
+out:
+ qemu_vfree(buf);
+ s->co[index] = NULL;
+ s->running_coroutines--;
+ if (!s->running_coroutines && s->ret == -EINPROGRESS) {
+ /* the convert job finished successfully */
+ s->ret = 0;
+ }
+}
+
+static int convert_do_copy(ImgConvertState *s)
+{
+ int ret, i, n;
+ int64_t sector_num = 0;
/* Check whether we have zero initialisation or can get it efficiently */
s->has_zero_init = s->min_sparse && !s->target_has_backing
@@ -1691,21 +1837,15 @@ static int convert_do_copy(ImgConvertState *s)
if (s->compressed) {
if (s->cluster_sectors <= 0 || s->cluster_sectors > s->buf_sectors) {
error_report("invalid cluster size");
- ret = -EINVAL;
- goto fail;
+ return -EINVAL;
}
s->buf_sectors = s->cluster_sectors;
}
- buf = blk_blockalign(s->target, s->buf_sectors * BDRV_SECTOR_SIZE);
- /* Calculate allocated sectors for progress */
- s->allocated_sectors = 0;
- sector_num = 0;
while (sector_num < s->total_sectors) {
n = convert_iteration_sectors(s, sector_num);
if (n < 0) {
- ret = n;
- goto fail;
+ return n;
}
if (s->status == BLK_DATA || (!s->min_sparse && s->status == BLK_ZERO))
{
@@ -1715,61 +1855,29 @@ static int convert_do_copy(ImgConvertState *s)
}
/* Do the copy */
- s->src_cur = 0;
- s->src_cur_offset = 0;
s->sector_next_status = 0;
+ s->ret = -EINPROGRESS;
- sector_num = 0;
- allocated_done = 0;
-
- while (sector_num < s->total_sectors) {
- n = convert_iteration_sectors(s, sector_num);
- if (n < 0) {
- ret = n;
- goto fail;
- }
- if (s->status == BLK_DATA || (!s->min_sparse && s->status == BLK_ZERO))
- {
- allocated_done += n;
- qemu_progress_print(100.0 * allocated_done / s->allocated_sectors,
- 0);
- }
-
- if (s->status == BLK_DATA) {
- ret = convert_read(s, sector_num, n, buf);
- if (ret < 0) {
- error_report("error while reading sector %" PRId64
- ": %s", sector_num, strerror(-ret));
- goto fail;
- }
- } else if (!s->min_sparse && s->status == BLK_ZERO) {
- n = MIN(n, s->buf_sectors);
- memset(buf, 0, n * BDRV_SECTOR_SIZE);
- s->status = BLK_DATA;
- }
-
- ret = convert_write(s, sector_num, n, buf);
- if (ret < 0) {
- error_report("error while writing sector %" PRId64
- ": %s", sector_num, strerror(-ret));
- goto fail;
- }
+ qemu_co_mutex_init(&s->lock);
+ for (i = 0; i < s->num_coroutines; i++) {
+ s->co[i] = qemu_coroutine_create(convert_co_do_copy, s);
+ s->wait_sector_num[i] = -1;
+ qemu_coroutine_enter(s->co[i]);
+ }
- sector_num += n;
+ while (s->ret == -EINPROGRESS) {
+ main_loop_wait(false);
}
- if (s->compressed) {
+ if (s->compressed && !s->ret) {
/* signal EOF to align */
ret = blk_pwrite_compressed(s->target, 0, NULL, 0);
if (ret < 0) {
- goto fail;
+ return ret;
}
}
- ret = 0;
-fail:
- qemu_vfree(buf);
- return ret;
+ return s->ret;
}
static int img_convert(int argc, char **argv)
@@ -1797,6 +1905,8 @@ static int img_convert(int argc, char **argv)
QemuOpts *sn_opts = NULL;
ImgConvertState state;
bool image_opts = false;
+ bool wr_in_order = true;
+ long num_coroutines = 8;
fmt = NULL;
out_fmt = "raw";
@@ -1812,7 +1922,7 @@ static int img_convert(int argc, char **argv)
{"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
{0, 0, 0, 0}
};
- c = getopt_long(argc, argv, "hf:O:B:ce6o:s:l:S:pt:T:qn",
+ c = getopt_long(argc, argv, "hf:O:B:ce6o:s:l:S:pt:T:qnm:W",
long_options, NULL);
if (c == -1) {
break;
@@ -1904,6 +2014,18 @@ static int img_convert(int argc, char **argv)
case 'n':
skip_create = 1;
break;
+ case 'm':
+ if (qemu_strtol(optarg, NULL, 0, &num_coroutines) ||
+ num_coroutines < 1 || num_coroutines > MAX_COROUTINES) {
+ error_report("Invalid number of coroutines. Allowed number of"
+ " coroutines is between 1 and %d", MAX_COROUTINES);
+ ret = -1;
+ goto fail_getopt;
+ }
+ break;
+ case 'W':
+ wr_in_order = false;
+ break;
case OPTION_OBJECT:
opts = qemu_opts_parse_noisily(&qemu_object_opts,
optarg, true);
@@ -1923,6 +2045,12 @@ static int img_convert(int argc, char **argv)
goto fail_getopt;
}
+ if (!wr_in_order && compress) {
+ error_report("Out of order write and compress are mutually exclusive");
+ ret = -1;
+ goto fail_getopt;
+ }
+
/* Initialize before goto out */
if (quiet) {
progress = 0;
@@ -2163,6 +2291,8 @@ static int img_convert(int argc, char **argv)
.min_sparse = min_sparse,
.cluster_sectors = cluster_sectors,
.buf_sectors = bufsectors,
+ .wr_in_order = wr_in_order,
+ .num_coroutines = num_coroutines,
};
ret = convert_do_copy(&state);
diff --git a/qemu-img.texi b/qemu-img.texi
index 174aae38b7..c81db3e81c 100644
--- a/qemu-img.texi
+++ b/qemu-img.texi
@@ -137,6 +137,12 @@ Parameters to convert subcommand:
@item -n
Skip the creation of the target volume
+@item -m
+Number of parallel coroutines for the convert process
+@item -W
+Allow out-of-order writes to the destination. This option improves performance,
+but is only recommended for preallocated devices like host devices or other
+raw block devices.
@end table
Parameters to dd subcommand:
@@ -296,7 +302,7 @@ Error on reading data
@end table
-@item convert [-c] [-p] [-n] [-f @var{fmt}] [-t @var{cache}] [-T @var{src_cache}] [-O @var{output_fmt}] [-o @var{options}] [-s @var{snapshot_id_or_name}] [-l @var{snapshot_param}] [-S @var{sparse_size}] @var{filename} [@var{filename2} [...]] @var{output_filename}
+@item convert [-c] [-p] [-n] [-f @var{fmt}] [-t @var{cache}] [-T @var{src_cache}] [-O @var{output_fmt}] [-o @var{options}] [-s @var{snapshot_id_or_name}] [-l @var{snapshot_param}] [-m @var{num_coroutines}] [-W] [-S @var{sparse_size}] @var{filename} [@var{filename2} [...]] @var{output_filename}
Convert the disk image @var{filename} or a snapshot @var{snapshot_param}(@var{snapshot_id_or_name} is deprecated)
to disk image @var{output_filename} using format @var{output_fmt}. It can be optionally compressed (@code{-c}
@@ -326,6 +332,14 @@ skipped. This is useful for formats such as @code{rbd} if the target
volume has already been created with site specific options that cannot
be supplied through qemu-img.
+Out of order writes can be enabled with @code{-W} to improve performance.
+This is only recommended for preallocated devices like host devices or other
+raw block devices. Out of order write does not work in combination with
+creating compressed images.
+
+@var{num_coroutines} specifies how many coroutines work in parallel during
+the convert process (defaults to 8).
+
@item dd [-f @var{fmt}] [-O @var{output_fmt}] [bs=@var{block_size}] [count=@var{blocks}] [skip=@var{blocks}] if=@var{input} of=@var{output}
Dd copies from @var{input} file to @var{output} file converting it from