summaryrefslogtreecommitdiff
path: root/migration/savevm.c
diff options
context:
space:
mode:
Diffstat (limited to 'migration/savevm.c')
-rw-r--r--migration/savevm.c826
1 files changed, 744 insertions, 82 deletions
diff --git a/migration/savevm.c b/migration/savevm.c
index e05158d7ba..be52314a12 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -37,6 +37,7 @@
#include "qemu/timer.h"
#include "audio/audio.h"
#include "migration/migration.h"
+#include "migration/postcopy-ram.h"
#include "qapi/qmp/qerror.h"
#include "qemu/error-report.h"
#include "qemu/sockets.h"
@@ -45,6 +46,7 @@
#include "exec/memory.h"
#include "qmp-commands.h"
#include "trace.h"
+#include "qemu/bitops.h"
#include "qemu/iov.h"
#include "block/snapshot.h"
#include "block/qapi.h"
@@ -57,8 +59,26 @@
#define ARP_PTYPE_IP 0x0800
#define ARP_OP_REQUEST_REV 0x3
+const unsigned int postcopy_ram_discard_version = 0;
+
static bool skip_section_footers;
+static struct mig_cmd_args {
+ ssize_t len; /* -1 = variable */
+ const char *name;
+} mig_cmd_args[] = {
+ [MIG_CMD_INVALID] = { .len = -1, .name = "INVALID" },
+ [MIG_CMD_OPEN_RETURN_PATH] = { .len = 0, .name = "OPEN_RETURN_PATH" },
+ [MIG_CMD_PING] = { .len = sizeof(uint32_t), .name = "PING" },
+ [MIG_CMD_POSTCOPY_ADVISE] = { .len = 16, .name = "POSTCOPY_ADVISE" },
+ [MIG_CMD_POSTCOPY_LISTEN] = { .len = 0, .name = "POSTCOPY_LISTEN" },
+ [MIG_CMD_POSTCOPY_RUN] = { .len = 0, .name = "POSTCOPY_RUN" },
+ [MIG_CMD_POSTCOPY_RAM_DISCARD] = {
+ .len = -1, .name = "POSTCOPY_RAM_DISCARD" },
+ [MIG_CMD_PACKAGED] = { .len = 4, .name = "PACKAGED" },
+ [MIG_CMD_MAX] = { .len = -1, .name = "MAX" },
+};
+
static int announce_self_create(uint8_t *buf,
uint8_t *mac_addr)
{
@@ -694,6 +714,156 @@ static void save_section_footer(QEMUFile *f, SaveStateEntry *se)
}
}
+/**
+ * qemu_savevm_command_send: Send a 'QEMU_VM_COMMAND' type element with the
+ * command and associated data.
+ *
+ * @f: File to send command on
+ * @command: Command type to send
+ * @len: Length of associated data
+ * @data: Data associated with command.
+ */
+void qemu_savevm_command_send(QEMUFile *f,
+ enum qemu_vm_cmd command,
+ uint16_t len,
+ uint8_t *data)
+{
+ trace_savevm_command_send(command, len);
+ qemu_put_byte(f, QEMU_VM_COMMAND);
+ qemu_put_be16(f, (uint16_t)command);
+ qemu_put_be16(f, len);
+ qemu_put_buffer(f, data, len);
+ qemu_fflush(f);
+}
+
+void qemu_savevm_send_ping(QEMUFile *f, uint32_t value)
+{
+ uint32_t buf;
+
+ trace_savevm_send_ping(value);
+ buf = cpu_to_be32(value);
+ qemu_savevm_command_send(f, MIG_CMD_PING, sizeof(value), (uint8_t *)&buf);
+}
+
+void qemu_savevm_send_open_return_path(QEMUFile *f)
+{
+ trace_savevm_send_open_return_path();
+ qemu_savevm_command_send(f, MIG_CMD_OPEN_RETURN_PATH, 0, NULL);
+}
+
+/* We have a buffer of data to send; we don't want that all to be loaded
+ * by the command itself, so the command contains just the length of the
+ * extra buffer that we then send straight after it.
+ * TODO: Must be a better way to organise that
+ *
+ * Returns:
+ * 0 on success
+ * -ve on error
+ */
+int qemu_savevm_send_packaged(QEMUFile *f, const QEMUSizedBuffer *qsb)
+{
+ size_t cur_iov;
+ size_t len = qsb_get_length(qsb);
+ uint32_t tmp;
+
+ if (len > MAX_VM_CMD_PACKAGED_SIZE) {
+ error_report("%s: Unreasonably large packaged state: %zu",
+ __func__, len);
+ return -1;
+ }
+
+ tmp = cpu_to_be32(len);
+
+ trace_qemu_savevm_send_packaged();
+ qemu_savevm_command_send(f, MIG_CMD_PACKAGED, 4, (uint8_t *)&tmp);
+
+ /* all the data follows (concatinating the iov's) */
+ for (cur_iov = 0; cur_iov < qsb->n_iov; cur_iov++) {
+ /* The iov entries are partially filled */
+ size_t towrite = MIN(qsb->iov[cur_iov].iov_len, len);
+ len -= towrite;
+
+ if (!towrite) {
+ break;
+ }
+
+ qemu_put_buffer(f, qsb->iov[cur_iov].iov_base, towrite);
+ }
+
+ return 0;
+}
+
+/* Send prior to any postcopy transfer */
+void qemu_savevm_send_postcopy_advise(QEMUFile *f)
+{
+ uint64_t tmp[2];
+ tmp[0] = cpu_to_be64(getpagesize());
+ tmp[1] = cpu_to_be64(1ul << qemu_target_page_bits());
+
+ trace_qemu_savevm_send_postcopy_advise();
+ qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_ADVISE, 16, (uint8_t *)tmp);
+}
+
+/* Sent prior to starting the destination running in postcopy, discard pages
+ * that have already been sent but redirtied on the source.
+ * CMD_POSTCOPY_RAM_DISCARD consist of:
+ * byte version (0)
+ * byte Length of name field (not including 0)
+ * n x byte RAM block name
+ * byte 0 terminator (just for safety)
+ * n x Byte ranges within the named RAMBlock
+ * be64 Start of the range
+ * be64 Length
+ *
+ * name: RAMBlock name that these entries are part of
+ * len: Number of page entries
+ * start_list: 'len' addresses
+ * length_list: 'len' addresses
+ *
+ */
+void qemu_savevm_send_postcopy_ram_discard(QEMUFile *f, const char *name,
+ uint16_t len,
+ uint64_t *start_list,
+ uint64_t *length_list)
+{
+ uint8_t *buf;
+ uint16_t tmplen;
+ uint16_t t;
+ size_t name_len = strlen(name);
+
+ trace_qemu_savevm_send_postcopy_ram_discard(name, len);
+ assert(name_len < 256);
+ buf = g_malloc0(1 + 1 + name_len + 1 + (8 + 8) * len);
+ buf[0] = postcopy_ram_discard_version;
+ buf[1] = name_len;
+ memcpy(buf + 2, name, name_len);
+ tmplen = 2 + name_len;
+ buf[tmplen++] = '\0';
+
+ for (t = 0; t < len; t++) {
+ cpu_to_be64w((uint64_t *)(buf + tmplen), start_list[t]);
+ tmplen += 8;
+ cpu_to_be64w((uint64_t *)(buf + tmplen), length_list[t]);
+ tmplen += 8;
+ }
+ qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_RAM_DISCARD, tmplen, buf);
+ g_free(buf);
+}
+
+/* Get the destination into a state where it can receive postcopy data. */
+void qemu_savevm_send_postcopy_listen(QEMUFile *f)
+{
+ trace_savevm_send_postcopy_listen();
+ qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_LISTEN, 0, NULL);
+}
+
+/* Kick the destination into running */
+void qemu_savevm_send_postcopy_run(QEMUFile *f)
+{
+ trace_savevm_send_postcopy_run();
+ qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_RUN, 0, NULL);
+}
+
bool qemu_savevm_state_blocked(Error **errp)
{
SaveStateEntry *se;
@@ -713,6 +883,12 @@ void qemu_savevm_state_header(QEMUFile *f)
trace_savevm_state_header();
qemu_put_be32(f, QEMU_VM_FILE_MAGIC);
qemu_put_be32(f, QEMU_VM_FILE_VERSION);
+
+ if (!savevm_state.skip_configuration) {
+ qemu_put_byte(f, QEMU_VM_CONFIGURATION);
+ vmstate_save_state(f, &vmstate_configuration, &savevm_state, 0);
+ }
+
}
void qemu_savevm_state_begin(QEMUFile *f,
@@ -729,11 +905,6 @@ void qemu_savevm_state_begin(QEMUFile *f,
se->ops->set_params(params, se->opaque);
}
- if (!savevm_state.skip_configuration) {
- qemu_put_byte(f, QEMU_VM_CONFIGURATION);
- vmstate_save_state(f, &vmstate_configuration, &savevm_state, 0);
- }
-
QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
if (!se->ops || !se->ops->save_live_setup) {
continue;
@@ -760,7 +931,7 @@ void qemu_savevm_state_begin(QEMUFile *f,
* 0 : We haven't finished, caller have to go again
* 1 : We have finished, we can go to complete phase
*/
-int qemu_savevm_state_iterate(QEMUFile *f)
+int qemu_savevm_state_iterate(QEMUFile *f, bool postcopy)
{
SaveStateEntry *se;
int ret = 1;
@@ -775,6 +946,15 @@ int qemu_savevm_state_iterate(QEMUFile *f)
continue;
}
}
+ /*
+ * In the postcopy phase, any device that doesn't know how to
+ * do postcopy should have saved it's state in the _complete
+ * call that's already run, it might get confused if we call
+ * iterate afterwards.
+ */
+ if (postcopy && !se->ops->save_live_complete_postcopy) {
+ continue;
+ }
if (qemu_file_rate_limit(f)) {
return 0;
}
@@ -803,22 +983,65 @@ int qemu_savevm_state_iterate(QEMUFile *f)
static bool should_send_vmdesc(void)
{
MachineState *machine = MACHINE(qdev_get_machine());
- return !machine->suppress_vmdesc;
+ bool in_postcopy = migration_in_postcopy(migrate_get_current());
+ return !machine->suppress_vmdesc && !in_postcopy;
}
-void qemu_savevm_state_complete(QEMUFile *f)
+/*
+ * Calls the save_live_complete_postcopy methods
+ * causing the last few pages to be sent immediately and doing any associated
+ * cleanup.
+ * Note postcopy also calls qemu_savevm_state_complete_precopy to complete
+ * all the other devices, but that happens at the point we switch to postcopy.
+ */
+void qemu_savevm_state_complete_postcopy(QEMUFile *f)
+{
+ SaveStateEntry *se;
+ int ret;
+
+ QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
+ if (!se->ops || !se->ops->save_live_complete_postcopy) {
+ continue;
+ }
+ if (se->ops && se->ops->is_active) {
+ if (!se->ops->is_active(se->opaque)) {
+ continue;
+ }
+ }
+ trace_savevm_section_start(se->idstr, se->section_id);
+ /* Section type */
+ qemu_put_byte(f, QEMU_VM_SECTION_END);
+ qemu_put_be32(f, se->section_id);
+
+ ret = se->ops->save_live_complete_postcopy(f, se->opaque);
+ trace_savevm_section_end(se->idstr, se->section_id, ret);
+ save_section_footer(f, se);
+ if (ret < 0) {
+ qemu_file_set_error(f, ret);
+ return;
+ }
+ }
+
+ qemu_put_byte(f, QEMU_VM_EOF);
+ qemu_fflush(f);
+}
+
+void qemu_savevm_state_complete_precopy(QEMUFile *f)
{
QJSON *vmdesc;
int vmdesc_len;
SaveStateEntry *se;
int ret;
+ bool in_postcopy = migration_in_postcopy(migrate_get_current());
- trace_savevm_state_complete();
+ trace_savevm_state_complete_precopy();
cpu_synchronize_all_states();
QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
- if (!se->ops || !se->ops->save_live_complete) {
+ if (!se->ops ||
+ (in_postcopy && se->ops->save_live_complete_postcopy) ||
+ !se->ops->save_live_complete_precopy) {
continue;
}
if (se->ops && se->ops->is_active) {
@@ -830,7 +1053,7 @@ void qemu_savevm_state_complete(QEMUFile *f)
save_section_header(f, se, QEMU_VM_SECTION_END);
- ret = se->ops->save_live_complete(f, se->opaque);
+ ret = se->ops->save_live_complete_precopy(f, se->opaque);
trace_savevm_section_end(se->idstr, se->section_id, ret);
save_section_footer(f, se);
if (ret < 0) {
@@ -867,7 +1090,10 @@ void qemu_savevm_state_complete(QEMUFile *f)
save_section_footer(f, se);
}
- qemu_put_byte(f, QEMU_VM_EOF);
+ if (!in_postcopy) {
+ /* Postcopy stream will still be going */
+ qemu_put_byte(f, QEMU_VM_EOF);
+ }
json_end_array(vmdesc);
qjson_finish(vmdesc);
@@ -883,10 +1109,19 @@ void qemu_savevm_state_complete(QEMUFile *f)
qemu_fflush(f);
}
-uint64_t qemu_savevm_state_pending(QEMUFile *f, uint64_t max_size)
+/* Give an estimate of the amount left to be transferred,
+ * the result is split into the amount for units that can and
+ * for units that can't do postcopy.
+ */
+void qemu_savevm_state_pending(QEMUFile *f, uint64_t max_size,
+ uint64_t *res_non_postcopiable,
+ uint64_t *res_postcopiable)
{
SaveStateEntry *se;
- uint64_t ret = 0;
+
+ *res_non_postcopiable = 0;
+ *res_postcopiable = 0;
+
QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
if (!se->ops || !se->ops->save_live_pending) {
@@ -897,9 +1132,9 @@ uint64_t qemu_savevm_state_pending(QEMUFile *f, uint64_t max_size)
continue;
}
}
- ret += se->ops->save_live_pending(f, se->opaque, max_size);
+ se->ops->save_live_pending(f, se->opaque, max_size,
+ res_non_postcopiable, res_postcopiable);
}
- return ret;
}
void qemu_savevm_state_cleanup(void)
@@ -921,6 +1156,8 @@ static int qemu_savevm_state(QEMUFile *f, Error **errp)
.blk = 0,
.shared = 0
};
+ MigrationState *ms = migrate_init(&params);
+ ms->file = f;
if (qemu_savevm_state_blocked(errp)) {
return -EINVAL;
@@ -932,18 +1169,18 @@ static int qemu_savevm_state(QEMUFile *f, Error **errp)
qemu_mutex_lock_iothread();
while (qemu_file_get_error(f) == 0) {
- if (qemu_savevm_state_iterate(f) > 0) {
+ if (qemu_savevm_state_iterate(f, false) > 0) {
break;
}
}
ret = qemu_file_get_error(f);
if (ret == 0) {
- qemu_savevm_state_complete(f);
+ qemu_savevm_state_complete_precopy(f);
ret = qemu_file_get_error(f);
}
+ qemu_savevm_state_cleanup();
if (ret != 0) {
- qemu_savevm_state_cleanup();
error_setg_errno(errp, -ret, "Error while writing VM state");
}
return ret;
@@ -1001,6 +1238,420 @@ static SaveStateEntry *find_se(const char *idstr, int instance_id)
return NULL;
}
+enum LoadVMExitCodes {
+ /* Allow a command to quit all layers of nested loadvm loops */
+ LOADVM_QUIT = 1,
+};
+
+static int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis);
+
+/* ------ incoming postcopy messages ------ */
+/* 'advise' arrives before any transfers just to tell us that a postcopy
+ * *might* happen - it might be skipped if precopy transferred everything
+ * quickly.
+ */
+static int loadvm_postcopy_handle_advise(MigrationIncomingState *mis)
+{
+ PostcopyState ps = postcopy_state_set(POSTCOPY_INCOMING_ADVISE);
+ uint64_t remote_hps, remote_tps;
+
+ trace_loadvm_postcopy_handle_advise();
+ if (ps != POSTCOPY_INCOMING_NONE) {
+ error_report("CMD_POSTCOPY_ADVISE in wrong postcopy state (%d)", ps);
+ return -1;
+ }
+
+ if (!postcopy_ram_supported_by_host()) {
+ return -1;
+ }
+
+ remote_hps = qemu_get_be64(mis->from_src_file);
+ if (remote_hps != getpagesize()) {
+ /*
+ * Some combinations of mismatch are probably possible but it gets
+ * a bit more complicated. In particular we need to place whole
+ * host pages on the dest at once, and we need to ensure that we
+ * handle dirtying to make sure we never end up sending part of
+ * a hostpage on it's own.
+ */
+ error_report("Postcopy needs matching host page sizes (s=%d d=%d)",
+ (int)remote_hps, getpagesize());
+ return -1;
+ }
+
+ remote_tps = qemu_get_be64(mis->from_src_file);
+ if (remote_tps != (1ul << qemu_target_page_bits())) {
+ /*
+ * Again, some differences could be dealt with, but for now keep it
+ * simple.
+ */
+ error_report("Postcopy needs matching target page sizes (s=%d d=%d)",
+ (int)remote_tps, 1 << qemu_target_page_bits());
+ return -1;
+ }
+
+ if (ram_postcopy_incoming_init(mis)) {
+ return -1;
+ }
+
+ postcopy_state_set(POSTCOPY_INCOMING_ADVISE);
+
+ return 0;
+}
+
+/* After postcopy we will be told to throw some pages away since they're
+ * dirty and will have to be demand fetched. Must happen before CPU is
+ * started.
+ * There can be 0..many of these messages, each encoding multiple pages.
+ */
+static int loadvm_postcopy_ram_handle_discard(MigrationIncomingState *mis,
+ uint16_t len)
+{
+ int tmp;
+ char ramid[256];
+ PostcopyState ps = postcopy_state_get();
+
+ trace_loadvm_postcopy_ram_handle_discard();
+
+ switch (ps) {
+ case POSTCOPY_INCOMING_ADVISE:
+ /* 1st discard */
+ tmp = postcopy_ram_prepare_discard(mis);
+ if (tmp) {
+ return tmp;
+ }
+ break;
+
+ case POSTCOPY_INCOMING_DISCARD:
+ /* Expected state */
+ break;
+
+ default:
+ error_report("CMD_POSTCOPY_RAM_DISCARD in wrong postcopy state (%d)",
+ ps);
+ return -1;
+ }
+ /* We're expecting a
+ * Version (0)
+ * a RAM ID string (length byte, name, 0 term)
+ * then at least 1 16 byte chunk
+ */
+ if (len < (1 + 1 + 1 + 1 + 2 * 8)) {
+ error_report("CMD_POSTCOPY_RAM_DISCARD invalid length (%d)", len);
+ return -1;
+ }
+
+ tmp = qemu_get_byte(mis->from_src_file);
+ if (tmp != postcopy_ram_discard_version) {
+ error_report("CMD_POSTCOPY_RAM_DISCARD invalid version (%d)", tmp);
+ return -1;
+ }
+
+ if (!qemu_get_counted_string(mis->from_src_file, ramid)) {
+ error_report("CMD_POSTCOPY_RAM_DISCARD Failed to read RAMBlock ID");
+ return -1;
+ }
+ tmp = qemu_get_byte(mis->from_src_file);
+ if (tmp != 0) {
+ error_report("CMD_POSTCOPY_RAM_DISCARD missing nil (%d)", tmp);
+ return -1;
+ }
+
+ len -= 3 + strlen(ramid);
+ if (len % 16) {
+ error_report("CMD_POSTCOPY_RAM_DISCARD invalid length (%d)", len);
+ return -1;
+ }
+ trace_loadvm_postcopy_ram_handle_discard_header(ramid, len);
+ while (len) {
+ uint64_t start_addr, block_length;
+ start_addr = qemu_get_be64(mis->from_src_file);
+ block_length = qemu_get_be64(mis->from_src_file);
+
+ len -= 16;
+ int ret = ram_discard_range(mis, ramid, start_addr,
+ block_length);
+ if (ret) {
+ return ret;
+ }
+ }
+ trace_loadvm_postcopy_ram_handle_discard_end();
+
+ return 0;
+}
+
+/*
+ * Triggered by a postcopy_listen command; this thread takes over reading
+ * the input stream, leaving the main thread free to carry on loading the rest
+ * of the device state (from RAM).
+ * (TODO:This could do with being in a postcopy file - but there again it's
+ * just another input loop, not that postcopy specific)
+ */
+static void *postcopy_ram_listen_thread(void *opaque)
+{
+ QEMUFile *f = opaque;
+ MigrationIncomingState *mis = migration_incoming_get_current();
+ int load_res;
+
+ qemu_sem_post(&mis->listen_thread_sem);
+ trace_postcopy_ram_listen_thread_start();
+
+ /*
+ * Because we're a thread and not a coroutine we can't yield
+ * in qemu_file, and thus we must be blocking now.
+ */
+ qemu_file_set_blocking(f, true);
+ load_res = qemu_loadvm_state_main(f, mis);
+ /* And non-blocking again so we don't block in any cleanup */
+ qemu_file_set_blocking(f, false);
+
+ trace_postcopy_ram_listen_thread_exit();
+ if (load_res < 0) {
+ error_report("%s: loadvm failed: %d", __func__, load_res);
+ qemu_file_set_error(f, load_res);
+ } else {
+ /*
+ * This looks good, but it's possible that the device loading in the
+ * main thread hasn't finished yet, and so we might not be in 'RUN'
+ * state yet; wait for the end of the main thread.
+ */
+ qemu_event_wait(&mis->main_thread_load_event);
+ }
+ postcopy_ram_incoming_cleanup(mis);
+ /*
+ * If everything has worked fine, then the main thread has waited
+ * for us to start, and we're the last use of the mis.
+ * (If something broke then qemu will have to exit anyway since it's
+ * got a bad migration state).
+ */
+ migration_incoming_state_destroy();
+
+ if (load_res < 0) {
+ /*
+ * If something went wrong then we have a bad state so exit;
+ * depending how far we got it might be possible at this point
+ * to leave the guest running and fire MCEs for pages that never
+ * arrived as a desperate recovery step.
+ */
+ exit(EXIT_FAILURE);
+ }
+
+ return NULL;
+}
+
+/* After this message we must be able to immediately receive postcopy data */
+static int loadvm_postcopy_handle_listen(MigrationIncomingState *mis)
+{
+ PostcopyState ps = postcopy_state_set(POSTCOPY_INCOMING_LISTENING);
+ trace_loadvm_postcopy_handle_listen();
+ if (ps != POSTCOPY_INCOMING_ADVISE && ps != POSTCOPY_INCOMING_DISCARD) {
+ error_report("CMD_POSTCOPY_LISTEN in wrong postcopy state (%d)", ps);
+ return -1;
+ }
+ if (ps == POSTCOPY_INCOMING_ADVISE) {
+ /*
+ * A rare case, we entered listen without having to do any discards,
+ * so do the setup that's normally done at the time of the 1st discard.
+ */
+ postcopy_ram_prepare_discard(mis);
+ }
+
+ /*
+ * Sensitise RAM - can now generate requests for blocks that don't exist
+ * However, at this point the CPU shouldn't be running, and the IO
+ * shouldn't be doing anything yet so don't actually expect requests
+ */
+ if (postcopy_ram_enable_notify(mis)) {
+ return -1;
+ }
+
+ if (mis->have_listen_thread) {
+ error_report("CMD_POSTCOPY_RAM_LISTEN already has a listen thread");
+ return -1;
+ }
+
+ mis->have_listen_thread = true;
+ /* Start up the listening thread and wait for it to signal ready */
+ qemu_sem_init(&mis->listen_thread_sem, 0);
+ qemu_thread_create(&mis->listen_thread, "postcopy/listen",
+ postcopy_ram_listen_thread, mis->from_src_file,
+ QEMU_THREAD_JOINABLE);
+ qemu_sem_wait(&mis->listen_thread_sem);
+ qemu_sem_destroy(&mis->listen_thread_sem);
+
+ return 0;
+}
+
+/* After all discards we can start running and asking for pages */
+static int loadvm_postcopy_handle_run(MigrationIncomingState *mis)
+{
+ PostcopyState ps = postcopy_state_set(POSTCOPY_INCOMING_RUNNING);
+ Error *local_err = NULL;
+
+ trace_loadvm_postcopy_handle_run();
+ if (ps != POSTCOPY_INCOMING_LISTENING) {
+ error_report("CMD_POSTCOPY_RUN in wrong postcopy state (%d)", ps);
+ return -1;
+ }
+
+ /* TODO we should move all of this lot into postcopy_ram.c or a shared code
+ * in migration.c
+ */
+ cpu_synchronize_all_post_init();
+
+ qemu_announce_self();
+
+ /* Make sure all file formats flush their mutable metadata */
+ bdrv_invalidate_cache_all(&local_err);
+ if (local_err) {
+ error_report_err(local_err);
+ return -1;
+ }
+
+ trace_loadvm_postcopy_handle_run_cpu_sync();
+ cpu_synchronize_all_post_init();
+
+ trace_loadvm_postcopy_handle_run_vmstart();
+
+ if (autostart) {
+ /* Hold onto your hats, starting the CPU */
+ vm_start();
+ } else {
+ /* leave it paused and let management decide when to start the CPU */
+ runstate_set(RUN_STATE_PAUSED);
+ }
+
+ /* We need to finish reading the stream from the package
+ * and also stop reading anything more from the stream that loaded the
+ * package (since it's now being read by the listener thread).
+ * LOADVM_QUIT will quit all the layers of nested loadvm loops.
+ */
+ return LOADVM_QUIT;
+}
+
+/**
+ * Immediately following this command is a blob of data containing an embedded
+ * chunk of migration stream; read it and load it.
+ *
+ * @mis: Incoming state
+ * @length: Length of packaged data to read
+ *
+ * Returns: Negative values on error
+ *
+ */
+static int loadvm_handle_cmd_packaged(MigrationIncomingState *mis)
+{
+ int ret;
+ uint8_t *buffer;
+ uint32_t length;
+ QEMUSizedBuffer *qsb;
+
+ length = qemu_get_be32(mis->from_src_file);
+ trace_loadvm_handle_cmd_packaged(length);
+
+ if (length > MAX_VM_CMD_PACKAGED_SIZE) {
+ error_report("Unreasonably large packaged state: %u", length);
+ return -1;
+ }
+ buffer = g_malloc0(length);
+ ret = qemu_get_buffer(mis->from_src_file, buffer, (int)length);
+ if (ret != length) {
+ g_free(buffer);
+ error_report("CMD_PACKAGED: Buffer receive fail ret=%d length=%d\n",
+ ret, length);
+ return (ret < 0) ? ret : -EAGAIN;
+ }
+ trace_loadvm_handle_cmd_packaged_received(ret);
+
+ /* Setup a dummy QEMUFile that actually reads from the buffer */
+ qsb = qsb_create(buffer, length);
+ g_free(buffer); /* Because qsb_create copies */
+ if (!qsb) {
+ error_report("Unable to create qsb");
+ }
+ QEMUFile *packf = qemu_bufopen("r", qsb);
+
+ ret = qemu_loadvm_state_main(packf, mis);
+ trace_loadvm_handle_cmd_packaged_main(ret);
+ qemu_fclose(packf);
+ qsb_free(qsb);
+
+ return ret;
+}
+
+/*
+ * Process an incoming 'QEMU_VM_COMMAND'
+ * 0 just a normal return
+ * LOADVM_QUIT All good, but exit the loop
+ * <0 Error
+ */
+static int loadvm_process_command(QEMUFile *f)
+{
+ MigrationIncomingState *mis = migration_incoming_get_current();
+ uint16_t cmd;
+ uint16_t len;
+ uint32_t tmp32;
+
+ cmd = qemu_get_be16(f);
+ len = qemu_get_be16(f);
+
+ trace_loadvm_process_command(cmd, len);
+ if (cmd >= MIG_CMD_MAX || cmd == MIG_CMD_INVALID) {
+ error_report("MIG_CMD 0x%x unknown (len 0x%x)", cmd, len);
+ return -EINVAL;
+ }
+
+ if (mig_cmd_args[cmd].len != -1 && mig_cmd_args[cmd].len != len) {
+ error_report("%s received with bad length - expecting %zu, got %d",
+ mig_cmd_args[cmd].name,
+ (size_t)mig_cmd_args[cmd].len, len);
+ return -ERANGE;
+ }
+
+ switch (cmd) {
+ case MIG_CMD_OPEN_RETURN_PATH:
+ if (mis->to_src_file) {
+ error_report("CMD_OPEN_RETURN_PATH called when RP already open");
+ /* Not really a problem, so don't give up */
+ return 0;
+ }
+ mis->to_src_file = qemu_file_get_return_path(f);
+ if (!mis->to_src_file) {
+ error_report("CMD_OPEN_RETURN_PATH failed");
+ return -1;
+ }
+ break;
+
+ case MIG_CMD_PING:
+ tmp32 = qemu_get_be32(f);
+ trace_loadvm_process_command_ping(tmp32);
+ if (!mis->to_src_file) {
+ error_report("CMD_PING (0x%x) received with no return path",
+ tmp32);
+ return -1;
+ }
+ migrate_send_rp_pong(mis, tmp32);
+ break;
+
+ case MIG_CMD_PACKAGED:
+ return loadvm_handle_cmd_packaged(mis);
+
+ case MIG_CMD_POSTCOPY_ADVISE:
+ return loadvm_postcopy_handle_advise(mis);
+
+ case MIG_CMD_POSTCOPY_LISTEN:
+ return loadvm_postcopy_handle_listen(mis);
+
+ case MIG_CMD_POSTCOPY_RUN:
+ return loadvm_postcopy_handle_run(mis);
+
+ case MIG_CMD_POSTCOPY_RAM_DISCARD:
+ return loadvm_postcopy_ram_handle_discard(mis, len);
+ }
+
+ return 0;
+}
+
struct LoadStateEntry {
QLIST_ENTRY(LoadStateEntry) entry;
SaveStateEntry *se;
@@ -1053,47 +1704,10 @@ void loadvm_free_handlers(MigrationIncomingState *mis)
}
}
-int qemu_loadvm_state(QEMUFile *f)
+static int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis)
{
- MigrationIncomingState *mis = migration_incoming_get_current();
- Error *local_err = NULL;
uint8_t section_type;
- unsigned int v;
int ret;
- int file_error_after_eof = -1;
-
- if (qemu_savevm_state_blocked(&local_err)) {
- error_report_err(local_err);
- return -EINVAL;
- }
-
- v = qemu_get_be32(f);
- if (v != QEMU_VM_FILE_MAGIC) {
- error_report("Not a migration stream");
- return -EINVAL;
- }
-
- v = qemu_get_be32(f);
- if (v == QEMU_VM_FILE_VERSION_COMPAT) {
- error_report("SaveVM v2 format is obsolete and don't work anymore");
- return -ENOTSUP;
- }
- if (v != QEMU_VM_FILE_VERSION) {
- error_report("Unsupported migration stream version");
- return -ENOTSUP;
- }
-
- if (!savevm_state.skip_configuration) {
- if (qemu_get_byte(f) != QEMU_VM_CONFIGURATION) {
- error_report("Configuration section missing");
- return -EINVAL;
- }
- ret = vmstate_load_state(f, &vmstate_configuration, &savevm_state, 0);
-
- if (ret) {
- return ret;
- }
- }
while ((section_type = qemu_get_byte(f)) != QEMU_VM_EOF) {
uint32_t instance_id, version_id, section_id;
@@ -1122,16 +1736,14 @@ int qemu_loadvm_state(QEMUFile *f)
if (se == NULL) {
error_report("Unknown savevm section or instance '%s' %d",
idstr, instance_id);
- ret = -EINVAL;
- goto out;
+ return -EINVAL;
}
/* Validate version */
if (version_id > se->version_id) {
error_report("savevm: unsupported version %d for '%s' v%d",
version_id, idstr, se->version_id);
- ret = -EINVAL;
- goto out;
+ return -EINVAL;
}
/* Add entry */
@@ -1146,11 +1758,10 @@ int qemu_loadvm_state(QEMUFile *f)
if (ret < 0) {
error_report("error while loading state for instance 0x%x of"
" device '%s'", instance_id, idstr);
- goto out;
+ return ret;
}
if (!check_section_footer(f, le)) {
- ret = -EINVAL;
- goto out;
+ return -EINVAL;
}
break;
case QEMU_VM_SECTION_PART:
@@ -1165,29 +1776,88 @@ int qemu_loadvm_state(QEMUFile *f)
}
if (le == NULL) {
error_report("Unknown savevm section %d", section_id);
- ret = -EINVAL;
- goto out;
+ return -EINVAL;
}
ret = vmstate_load(f, le->se, le->version_id);
if (ret < 0) {
error_report("error while loading state section id %d(%s)",
section_id, le->se->idstr);
- goto out;
+ return ret;
}
if (!check_section_footer(f, le)) {
- ret = -EINVAL;
- goto out;
+ return -EINVAL;
+ }
+ break;
+ case QEMU_VM_COMMAND:
+ ret = loadvm_process_command(f);
+ trace_qemu_loadvm_state_section_command(ret);
+ if ((ret < 0) || (ret & LOADVM_QUIT)) {
+ return ret;
}
break;
default:
error_report("Unknown savevm section type %d", section_type);
- ret = -EINVAL;
- goto out;
+ return -EINVAL;
}
}
- file_error_after_eof = qemu_file_get_error(f);
+ return 0;
+}
+
+int qemu_loadvm_state(QEMUFile *f)
+{
+ MigrationIncomingState *mis = migration_incoming_get_current();
+ Error *local_err = NULL;
+ unsigned int v;
+ int ret;
+
+ if (qemu_savevm_state_blocked(&local_err)) {
+ error_report_err(local_err);
+ return -EINVAL;
+ }
+
+ v = qemu_get_be32(f);
+ if (v != QEMU_VM_FILE_MAGIC) {
+ error_report("Not a migration stream");
+ return -EINVAL;
+ }
+
+ v = qemu_get_be32(f);
+ if (v == QEMU_VM_FILE_VERSION_COMPAT) {
+ error_report("SaveVM v2 format is obsolete and don't work anymore");
+ return -ENOTSUP;
+ }
+ if (v != QEMU_VM_FILE_VERSION) {
+ error_report("Unsupported migration stream version");
+ return -ENOTSUP;
+ }
+
+ if (!savevm_state.skip_configuration) {
+ if (qemu_get_byte(f) != QEMU_VM_CONFIGURATION) {
+ error_report("Configuration section missing");
+ return -EINVAL;
+ }
+ ret = vmstate_load_state(f, &vmstate_configuration, &savevm_state, 0);
+
+ if (ret) {
+ return ret;
+ }
+ }
+
+ ret = qemu_loadvm_state_main(f, mis);
+ qemu_event_set(&mis->main_thread_load_event);
+
+ trace_qemu_loadvm_state_post_main(ret);
+
+ if (mis->have_listen_thread) {
+ /* Listen thread still going, can't clean up yet */
+ return ret;
+ }
+
+ if (ret == 0) {
+ ret = qemu_file_get_error(f);
+ }
/*
* Try to read in the VMDESC section as well, so that dumping tools that
@@ -1199,10 +1869,10 @@ int qemu_loadvm_state(QEMUFile *f)
* We also mustn't read data that isn't there; some transports (RDMA)
* will stall waiting for that data when the source has already closed.
*/
- if (should_send_vmdesc()) {
+ if (ret == 0 && should_send_vmdesc()) {
uint8_t *buf;
uint32_t size;
- section_type = qemu_get_byte(f);
+ uint8_t section_type = qemu_get_byte(f);
if (section_type != QEMU_VM_VMDESCRIPTION) {
error_report("Expected vmdescription section, but got %d",
@@ -1226,14 +1896,6 @@ int qemu_loadvm_state(QEMUFile *f)
cpu_synchronize_all_post_init();
- ret = 0;
-
-out:
- if (ret == 0) {
- /* We may not have a VMDESC section, so ignore relative errors */
- ret = file_error_after_eof;
- }
-
return ret;
}