summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--block/Makefile.objs1
-rw-r--r--block/backup.c341
-rw-r--r--include/block/block_int.h19
-rw-r--r--trace-events8
4 files changed, 369 insertions, 0 deletions
diff --git a/block/Makefile.objs b/block/Makefile.objs
index 2981654846..4cf9aa499f 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -21,5 +21,6 @@ endif
common-obj-y += stream.o
common-obj-y += commit.o
common-obj-y += mirror.o
+common-obj-y += backup.o
$(obj)/curl.o: QEMU_CFLAGS+=$(CURL_CFLAGS)
diff --git a/block/backup.c b/block/backup.c
new file mode 100644
index 0000000000..16105d40b1
--- /dev/null
+++ b/block/backup.c
@@ -0,0 +1,341 @@
+/*
+ * QEMU backup
+ *
+ * Copyright (C) 2013 Proxmox Server Solutions
+ *
+ * Authors:
+ * Dietmar Maurer (dietmar@proxmox.com)
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include <stdio.h>
+#include <errno.h>
+#include <unistd.h>
+
+#include "trace.h"
+#include "block/block.h"
+#include "block/block_int.h"
+#include "block/blockjob.h"
+#include "qemu/ratelimit.h"
+
+#define BACKUP_CLUSTER_BITS 16
+#define BACKUP_CLUSTER_SIZE (1 << BACKUP_CLUSTER_BITS)
+#define BACKUP_SECTORS_PER_CLUSTER (BACKUP_CLUSTER_SIZE / BDRV_SECTOR_SIZE)
+
+#define SLICE_TIME 100000000ULL /* ns */
+
+typedef struct CowRequest {
+ int64_t start;
+ int64_t end;
+ QLIST_ENTRY(CowRequest) list;
+ CoQueue wait_queue; /* coroutines blocked on this request */
+} CowRequest;
+
+typedef struct BackupBlockJob {
+ BlockJob common;
+ BlockDriverState *target;
+ RateLimit limit;
+ BlockdevOnError on_source_error;
+ BlockdevOnError on_target_error;
+ CoRwlock flush_rwlock;
+ uint64_t sectors_read;
+ HBitmap *bitmap;
+ QLIST_HEAD(, CowRequest) inflight_reqs;
+} BackupBlockJob;
+
+/* See if in-flight requests overlap and wait for them to complete */
+static void coroutine_fn wait_for_overlapping_requests(BackupBlockJob *job,
+ int64_t start,
+ int64_t end)
+{
+ CowRequest *req;
+ bool retry;
+
+ do {
+ retry = false;
+ QLIST_FOREACH(req, &job->inflight_reqs, list) {
+ if (end > req->start && start < req->end) {
+ qemu_co_queue_wait(&req->wait_queue);
+ retry = true;
+ break;
+ }
+ }
+ } while (retry);
+}
+
+/* Keep track of an in-flight request */
+static void cow_request_begin(CowRequest *req, BackupBlockJob *job,
+ int64_t start, int64_t end)
+{
+ req->start = start;
+ req->end = end;
+ qemu_co_queue_init(&req->wait_queue);
+ QLIST_INSERT_HEAD(&job->inflight_reqs, req, list);
+}
+
+/* Forget about a completed request */
+static void cow_request_end(CowRequest *req)
+{
+ QLIST_REMOVE(req, list);
+ qemu_co_queue_restart_all(&req->wait_queue);
+}
+
+static int coroutine_fn backup_do_cow(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors,
+ bool *error_is_read)
+{
+ BackupBlockJob *job = (BackupBlockJob *)bs->job;
+ CowRequest cow_request;
+ struct iovec iov;
+ QEMUIOVector bounce_qiov;
+ void *bounce_buffer = NULL;
+ int ret = 0;
+ int64_t start, end;
+ int n;
+
+ qemu_co_rwlock_rdlock(&job->flush_rwlock);
+
+ start = sector_num / BACKUP_SECTORS_PER_CLUSTER;
+ end = DIV_ROUND_UP(sector_num + nb_sectors, BACKUP_SECTORS_PER_CLUSTER);
+
+ trace_backup_do_cow_enter(job, start, sector_num, nb_sectors);
+
+ wait_for_overlapping_requests(job, start, end);
+ cow_request_begin(&cow_request, job, start, end);
+
+ for (; start < end; start++) {
+ if (hbitmap_get(job->bitmap, start)) {
+ trace_backup_do_cow_skip(job, start);
+ continue; /* already copied */
+ }
+
+ trace_backup_do_cow_process(job, start);
+
+ n = MIN(BACKUP_SECTORS_PER_CLUSTER,
+ job->common.len / BDRV_SECTOR_SIZE -
+ start * BACKUP_SECTORS_PER_CLUSTER);
+
+ if (!bounce_buffer) {
+ bounce_buffer = qemu_blockalign(bs, BACKUP_CLUSTER_SIZE);
+ }
+ iov.iov_base = bounce_buffer;
+ iov.iov_len = n * BDRV_SECTOR_SIZE;
+ qemu_iovec_init_external(&bounce_qiov, &iov, 1);
+
+ ret = bdrv_co_readv(bs, start * BACKUP_SECTORS_PER_CLUSTER, n,
+ &bounce_qiov);
+ if (ret < 0) {
+ trace_backup_do_cow_read_fail(job, start, ret);
+ if (error_is_read) {
+ *error_is_read = true;
+ }
+ goto out;
+ }
+
+ if (buffer_is_zero(iov.iov_base, iov.iov_len)) {
+ ret = bdrv_co_write_zeroes(job->target,
+ start * BACKUP_SECTORS_PER_CLUSTER, n);
+ } else {
+ ret = bdrv_co_writev(job->target,
+ start * BACKUP_SECTORS_PER_CLUSTER, n,
+ &bounce_qiov);
+ }
+ if (ret < 0) {
+ trace_backup_do_cow_write_fail(job, start, ret);
+ if (error_is_read) {
+ *error_is_read = false;
+ }
+ goto out;
+ }
+
+ hbitmap_set(job->bitmap, start, 1);
+
+ /* Publish progress, guest I/O counts as progress too. Note that the
+ * offset field is an opaque progress value, it is not a disk offset.
+ */
+ job->sectors_read += n;
+ job->common.offset += n * BDRV_SECTOR_SIZE;
+ }
+
+out:
+ if (bounce_buffer) {
+ qemu_vfree(bounce_buffer);
+ }
+
+ cow_request_end(&cow_request);
+
+ trace_backup_do_cow_return(job, sector_num, nb_sectors, ret);
+
+ qemu_co_rwlock_unlock(&job->flush_rwlock);
+
+ return ret;
+}
+
+static int coroutine_fn backup_before_write_notify(
+ NotifierWithReturn *notifier,
+ void *opaque)
+{
+ BdrvTrackedRequest *req = opaque;
+
+ return backup_do_cow(req->bs, req->sector_num, req->nb_sectors, NULL);
+}
+
+static void backup_set_speed(BlockJob *job, int64_t speed, Error **errp)
+{
+ BackupBlockJob *s = container_of(job, BackupBlockJob, common);
+
+ if (speed < 0) {
+ error_set(errp, QERR_INVALID_PARAMETER, "speed");
+ return;
+ }
+ ratelimit_set_speed(&s->limit, speed / BDRV_SECTOR_SIZE, SLICE_TIME);
+}
+
+static void backup_iostatus_reset(BlockJob *job)
+{
+ BackupBlockJob *s = container_of(job, BackupBlockJob, common);
+
+ bdrv_iostatus_reset(s->target);
+}
+
+static const BlockJobType backup_job_type = {
+ .instance_size = sizeof(BackupBlockJob),
+ .job_type = "backup",
+ .set_speed = backup_set_speed,
+ .iostatus_reset = backup_iostatus_reset,
+};
+
+static BlockErrorAction backup_error_action(BackupBlockJob *job,
+ bool read, int error)
+{
+ if (read) {
+ return block_job_error_action(&job->common, job->common.bs,
+ job->on_source_error, true, error);
+ } else {
+ return block_job_error_action(&job->common, job->target,
+ job->on_target_error, false, error);
+ }
+}
+
+static void coroutine_fn backup_run(void *opaque)
+{
+ BackupBlockJob *job = opaque;
+ BlockDriverState *bs = job->common.bs;
+ BlockDriverState *target = job->target;
+ BlockdevOnError on_target_error = job->on_target_error;
+ NotifierWithReturn before_write = {
+ .notify = backup_before_write_notify,
+ };
+ int64_t start, end;
+ int ret = 0;
+
+ QLIST_INIT(&job->inflight_reqs);
+ qemu_co_rwlock_init(&job->flush_rwlock);
+
+ start = 0;
+ end = DIV_ROUND_UP(job->common.len / BDRV_SECTOR_SIZE,
+ BACKUP_SECTORS_PER_CLUSTER);
+
+ job->bitmap = hbitmap_alloc(end, 0);
+
+ bdrv_set_enable_write_cache(target, true);
+ bdrv_set_on_error(target, on_target_error, on_target_error);
+ bdrv_iostatus_enable(target);
+
+ bdrv_add_before_write_notifier(bs, &before_write);
+
+ for (; start < end; start++) {
+ bool error_is_read;
+
+ if (block_job_is_cancelled(&job->common)) {
+ break;
+ }
+
+ /* we need to yield so that qemu_aio_flush() returns.
+ * (without, VM does not reboot)
+ */
+ if (job->common.speed) {
+ uint64_t delay_ns = ratelimit_calculate_delay(
+ &job->limit, job->sectors_read);
+ job->sectors_read = 0;
+ block_job_sleep_ns(&job->common, rt_clock, delay_ns);
+ } else {
+ block_job_sleep_ns(&job->common, rt_clock, 0);
+ }
+
+ if (block_job_is_cancelled(&job->common)) {
+ break;
+ }
+
+ ret = backup_do_cow(bs, start * BACKUP_SECTORS_PER_CLUSTER,
+ BACKUP_SECTORS_PER_CLUSTER, &error_is_read);
+ if (ret < 0) {
+ /* Depending on error action, fail now or retry cluster */
+ BlockErrorAction action =
+ backup_error_action(job, error_is_read, -ret);
+ if (action == BDRV_ACTION_REPORT) {
+ break;
+ } else {
+ start--;
+ continue;
+ }
+ }
+ }
+
+ notifier_with_return_remove(&before_write);
+
+ /* wait until pending backup_do_cow() calls have completed */
+ qemu_co_rwlock_wrlock(&job->flush_rwlock);
+ qemu_co_rwlock_unlock(&job->flush_rwlock);
+
+ hbitmap_free(job->bitmap);
+
+ bdrv_iostatus_disable(target);
+ bdrv_delete(target);
+
+ block_job_completed(&job->common, ret);
+}
+
+void backup_start(BlockDriverState *bs, BlockDriverState *target,
+ int64_t speed,
+ BlockdevOnError on_source_error,
+ BlockdevOnError on_target_error,
+ BlockDriverCompletionFunc *cb, void *opaque,
+ Error **errp)
+{
+ int64_t len;
+
+ assert(bs);
+ assert(target);
+ assert(cb);
+
+ if ((on_source_error == BLOCKDEV_ON_ERROR_STOP ||
+ on_source_error == BLOCKDEV_ON_ERROR_ENOSPC) &&
+ !bdrv_iostatus_is_enabled(bs)) {
+ error_set(errp, QERR_INVALID_PARAMETER, "on-source-error");
+ return;
+ }
+
+ len = bdrv_getlength(bs);
+ if (len < 0) {
+ error_setg_errno(errp, -len, "unable to get length for '%s'",
+ bdrv_get_device_name(bs));
+ return;
+ }
+
+ BackupBlockJob *job = block_job_create(&backup_job_type, bs, speed,
+ cb, opaque, errp);
+ if (!job) {
+ return;
+ }
+
+ job->on_source_error = on_source_error;
+ job->on_target_error = on_target_error;
+ job->target = target;
+ job->common.len = len;
+ job->common.co = qemu_coroutine_create(backup_run);
+ qemu_coroutine_enter(job->common.co, job);
+}
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 2d009556b0..c6ac871e21 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -399,4 +399,23 @@ void mirror_start(BlockDriverState *bs, BlockDriverState *target,
BlockDriverCompletionFunc *cb,
void *opaque, Error **errp);
+/*
+ * backup_start:
+ * @bs: Block device to operate on.
+ * @target: Block device to write to.
+ * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
+ * @on_source_error: The action to take upon error reading from the source.
+ * @on_target_error: The action to take upon error writing to the target.
+ * @cb: Completion function for the job.
+ * @opaque: Opaque pointer value passed to @cb.
+ *
+ * Start a backup operation on @bs. Clusters in @bs are written to @target
+ * until the job is cancelled or manually completed.
+ */
+void backup_start(BlockDriverState *bs, BlockDriverState *target,
+ int64_t speed, BlockdevOnError on_source_error,
+ BlockdevOnError on_target_error,
+ BlockDriverCompletionFunc *cb, void *opaque,
+ Error **errp);
+
#endif /* BLOCK_INT_H */
diff --git a/trace-events b/trace-events
index c5f1ccb96d..0acce7b350 100644
--- a/trace-events
+++ b/trace-events
@@ -92,6 +92,14 @@ mirror_yield_in_flight(void *s, int64_t sector_num, int in_flight) "s %p sector_
mirror_yield_buf_busy(void *s, int nb_chunks, int in_flight) "s %p requested chunks %d in_flight %d"
mirror_break_buf_busy(void *s, int nb_chunks, int in_flight) "s %p requested chunks %d in_flight %d"
+# block/backup.c
+backup_do_cow_enter(void *job, int64_t start, int64_t sector_num, int nb_sectors) "job %p start %"PRId64" sector_num %"PRId64" nb_sectors %d"
+backup_do_cow_return(void *job, int64_t sector_num, int nb_sectors, int ret) "job %p sector_num %"PRId64" nb_sectors %d ret %d"
+backup_do_cow_skip(void *job, int64_t start) "job %p start %"PRId64
+backup_do_cow_process(void *job, int64_t start) "job %p start %"PRId64
+backup_do_cow_read_fail(void *job, int64_t start, int ret) "job %p start %"PRId64" ret %d"
+backup_do_cow_write_fail(void *job, int64_t start, int ret) "job %p start %"PRId64" ret %d"
+
# blockdev.c
qmp_block_job_cancel(void *job) "job %p"
qmp_block_job_pause(void *job) "job %p"