summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--block/io.c120
1 files changed, 82 insertions, 38 deletions
diff --git a/block/io.c b/block/io.c
index a5598ed869..8e419070b5 100644
--- a/block/io.c
+++ b/block/io.c
@@ -34,6 +34,9 @@
#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
+/* Maximum bounce buffer for copy-on-read and write zeroes, in bytes */
+#define MAX_BOUNCE_BUFFER (32768 << BDRV_SECTOR_BITS)
+
static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
int64_t offset, int bytes, BdrvRequestFlags flags);
@@ -945,11 +948,14 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child,
BlockDriver *drv = bs->drv;
struct iovec iov;
- QEMUIOVector bounce_qiov;
+ QEMUIOVector local_qiov;
int64_t cluster_offset;
unsigned int cluster_bytes;
size_t skip_bytes;
int ret;
+ int max_transfer = MIN_NON_ZERO(bs->bl.max_transfer,
+ BDRV_REQUEST_MAX_BYTES);
+ unsigned int progress = 0;
/* FIXME We cannot require callers to have write permissions when all they
* are doing is a read request. If we did things right, write permissions
@@ -961,53 +967,95 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child,
// assert(child->perm & (BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE));
/* Cover entire cluster so no additional backing file I/O is required when
- * allocating cluster in the image file.
+ * allocating cluster in the image file. Note that this value may exceed
+ * BDRV_REQUEST_MAX_BYTES (even when the original read did not), which
+ * is one reason we loop rather than doing it all at once.
*/
bdrv_round_to_clusters(bs, offset, bytes, &cluster_offset, &cluster_bytes);
+ skip_bytes = offset - cluster_offset;
trace_bdrv_co_do_copy_on_readv(bs, offset, bytes,
cluster_offset, cluster_bytes);
- iov.iov_len = cluster_bytes;
- iov.iov_base = bounce_buffer = qemu_try_blockalign(bs, iov.iov_len);
+ bounce_buffer = qemu_try_blockalign(bs,
+ MIN(MIN(max_transfer, cluster_bytes),
+ MAX_BOUNCE_BUFFER));
if (bounce_buffer == NULL) {
ret = -ENOMEM;
goto err;
}
- qemu_iovec_init_external(&bounce_qiov, &iov, 1);
+ while (cluster_bytes) {
+ int64_t pnum;
- ret = bdrv_driver_preadv(bs, cluster_offset, cluster_bytes,
- &bounce_qiov, 0);
- if (ret < 0) {
- goto err;
- }
+ ret = bdrv_is_allocated(bs, cluster_offset,
+ MIN(cluster_bytes, max_transfer), &pnum);
+ if (ret < 0) {
+ /* Safe to treat errors in querying allocation as if
+ * unallocated; we'll probably fail again soon on the
+ * read, but at least that will set a decent errno.
+ */
+ pnum = MIN(cluster_bytes, max_transfer);
+ }
- bdrv_debug_event(bs, BLKDBG_COR_WRITE);
- if (drv->bdrv_co_pwrite_zeroes &&
- buffer_is_zero(bounce_buffer, iov.iov_len)) {
- /* FIXME: Should we (perhaps conditionally) be setting
- * BDRV_REQ_MAY_UNMAP, if it will allow for a sparser copy
- * that still correctly reads as zero? */
- ret = bdrv_co_do_pwrite_zeroes(bs, cluster_offset, cluster_bytes, 0);
- } else {
- /* This does not change the data on the disk, it is not necessary
- * to flush even in cache=writethrough mode.
- */
- ret = bdrv_driver_pwritev(bs, cluster_offset, cluster_bytes,
- &bounce_qiov, 0);
- }
+ assert(skip_bytes < pnum);
- if (ret < 0) {
- /* It might be okay to ignore write errors for guest requests. If this
- * is a deliberate copy-on-read then we don't want to ignore the error.
- * Simply report it in all cases.
- */
- goto err;
- }
+ if (ret <= 0) {
+ /* Must copy-on-read; use the bounce buffer */
+ iov.iov_base = bounce_buffer;
+ iov.iov_len = pnum = MIN(pnum, MAX_BOUNCE_BUFFER);
+ qemu_iovec_init_external(&local_qiov, &iov, 1);
- skip_bytes = offset - cluster_offset;
- qemu_iovec_from_buf(qiov, 0, bounce_buffer + skip_bytes, bytes);
+ ret = bdrv_driver_preadv(bs, cluster_offset, pnum,
+ &local_qiov, 0);
+ if (ret < 0) {
+ goto err;
+ }
+
+ bdrv_debug_event(bs, BLKDBG_COR_WRITE);
+ if (drv->bdrv_co_pwrite_zeroes &&
+ buffer_is_zero(bounce_buffer, pnum)) {
+ /* FIXME: Should we (perhaps conditionally) be setting
+ * BDRV_REQ_MAY_UNMAP, if it will allow for a sparser copy
+ * that still correctly reads as zero? */
+ ret = bdrv_co_do_pwrite_zeroes(bs, cluster_offset, pnum, 0);
+ } else {
+ /* This does not change the data on the disk, it is not
+ * necessary to flush even in cache=writethrough mode.
+ */
+ ret = bdrv_driver_pwritev(bs, cluster_offset, pnum,
+ &local_qiov, 0);
+ }
+
+ if (ret < 0) {
+ /* It might be okay to ignore write errors for guest
+ * requests. If this is a deliberate copy-on-read
+ * then we don't want to ignore the error. Simply
+ * report it in all cases.
+ */
+ goto err;
+ }
+
+ qemu_iovec_from_buf(qiov, progress, bounce_buffer + skip_bytes,
+ pnum - skip_bytes);
+ } else {
+ /* Read directly into the destination */
+ qemu_iovec_init(&local_qiov, qiov->niov);
+ qemu_iovec_concat(&local_qiov, qiov, progress, pnum - skip_bytes);
+ ret = bdrv_driver_preadv(bs, offset + progress, local_qiov.size,
+ &local_qiov, 0);
+ qemu_iovec_destroy(&local_qiov);
+ if (ret < 0) {
+ goto err;
+ }
+ }
+
+ cluster_offset += pnum;
+ cluster_bytes -= pnum;
+ progress += pnum - skip_bytes;
+ skip_bytes = 0;
+ }
+ ret = 0;
err:
qemu_vfree(bounce_buffer);
@@ -1213,9 +1261,6 @@ int coroutine_fn bdrv_co_readv(BdrvChild *child, int64_t sector_num,
return bdrv_co_do_readv(child, sector_num, nb_sectors, qiov, 0);
}
-/* Maximum buffer for write zeroes fallback, in bytes */
-#define MAX_WRITE_ZEROES_BOUNCE_BUFFER (32768 << BDRV_SECTOR_BITS)
-
static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
int64_t offset, int bytes, BdrvRequestFlags flags)
{
@@ -1230,8 +1275,7 @@ static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
int max_write_zeroes = MIN_NON_ZERO(bs->bl.max_pwrite_zeroes, INT_MAX);
int alignment = MAX(bs->bl.pwrite_zeroes_alignment,
bs->bl.request_alignment);
- int max_transfer = MIN_NON_ZERO(bs->bl.max_transfer,
- MAX_WRITE_ZEROES_BOUNCE_BUFFER);
+ int max_transfer = MIN_NON_ZERO(bs->bl.max_transfer, MAX_BOUNCE_BUFFER);
assert(alignment % bs->bl.request_alignment == 0);
head = offset % alignment;