summaryrefslogtreecommitdiff
path: root/migration/colo.c
diff options
context:
space:
mode:
authorzhanghailiang <zhang.zhanghailiang@huawei.com>2017-01-17 20:57:43 +0800
committerDr. David Alan Gilbert <dgilbert@redhat.com>2017-02-13 17:27:13 +0000
commitc937b9a6db2d564b96aae35a6757bb4144ea5184 (patch)
tree1cf95ee7425b082b1c20778b45c34f36ed06649f /migration/colo.c
parent479125d53eb8509d69a0548f131028a65fcbd65a (diff)
downloadqemu-c937b9a6db2d564b96aae35a6757bb4144ea5184.tar.gz
COLO: Shutdown related socket fd while do failover
If the net connection between primary host and secondary host breaks while COLO/COLO incoming threads are doing read() or write(). It will block until connection is timeout, and the failover process will be blocked because of it. So it is necessary to shutdown all the socket fds used by COLO to avoid this situation. Besides, we should close the corresponding file descriptors after failvoer BH shutdown them, Or there will be an error. Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com> Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com> Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com> Cc: Dr. David Alan Gilbert <dgilbert@redhat.com> Message-Id: <1484657864-21708-3-git-send-email-zhang.zhanghailiang@huawei.com> Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Diffstat (limited to 'migration/colo.c')
-rw-r--r--migration/colo.c43
1 files changed, 43 insertions, 0 deletions
diff --git a/migration/colo.c b/migration/colo.c
index 08b2e46dac..3222812d96 100644
--- a/migration/colo.c
+++ b/migration/colo.c
@@ -59,6 +59,18 @@ static void secondary_vm_do_failover(void)
/* recover runstate to normal migration finish state */
autostart = true;
}
+ /*
+ * Make sure COLO incoming thread not block in recv or send,
+ * If mis->from_src_file and mis->to_src_file use the same fd,
+ * The second shutdown() will return -1, we ignore this value,
+ * It is harmless.
+ */
+ if (mis->from_src_file) {
+ qemu_file_shutdown(mis->from_src_file);
+ }
+ if (mis->to_src_file) {
+ qemu_file_shutdown(mis->to_src_file);
+ }
old_state = failover_set_state(FAILOVER_STATUS_ACTIVE,
FAILOVER_STATUS_COMPLETED);
@@ -67,6 +79,8 @@ static void secondary_vm_do_failover(void)
"secondary VM", FailoverStatus_lookup[old_state]);
return;
}
+ /* Notify COLO incoming thread that failover work is finished */
+ qemu_sem_post(&mis->colo_incoming_sem);
/* For Secondary VM, jump to incoming co */
if (mis->migration_incoming_co) {
qemu_coroutine_enter(mis->migration_incoming_co);
@@ -81,6 +95,18 @@ static void primary_vm_do_failover(void)
migrate_set_state(&s->state, MIGRATION_STATUS_COLO,
MIGRATION_STATUS_COMPLETED);
+ /*
+ * Wake up COLO thread which may blocked in recv() or send(),
+ * The s->rp_state.from_dst_file and s->to_dst_file may use the
+ * same fd, but we still shutdown the fd for twice, it is harmless.
+ */
+ if (s->to_dst_file) {
+ qemu_file_shutdown(s->to_dst_file);
+ }
+ if (s->rp_state.from_dst_file) {
+ qemu_file_shutdown(s->rp_state.from_dst_file);
+ }
+
old_state = failover_set_state(FAILOVER_STATUS_ACTIVE,
FAILOVER_STATUS_COMPLETED);
if (old_state != FAILOVER_STATUS_ACTIVE) {
@@ -88,6 +114,8 @@ static void primary_vm_do_failover(void)
FailoverStatus_lookup[old_state]);
return;
}
+ /* Notify COLO thread that failover work is finished */
+ qemu_sem_post(&s->colo_exit_sem);
}
void colo_do_failover(MigrationState *s)
@@ -361,6 +389,14 @@ out:
timer_del(s->colo_delay_timer);
+ /* Hope this not to be too long to wait here */
+ qemu_sem_wait(&s->colo_exit_sem);
+ qemu_sem_destroy(&s->colo_exit_sem);
+ /*
+ * Must be called after failover BH is completed,
+ * Or the failover BH may shutdown the wrong fd that
+ * re-used by other threads after we release here.
+ */
if (s->rp_state.from_dst_file) {
qemu_fclose(s->rp_state.from_dst_file);
}
@@ -385,6 +421,7 @@ void migrate_start_colo_process(MigrationState *s)
s->colo_delay_timer = timer_new_ms(QEMU_CLOCK_HOST,
colo_checkpoint_notify, s);
+ qemu_sem_init(&s->colo_exit_sem, 0);
migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE,
MIGRATION_STATUS_COLO);
colo_process_checkpoint(s);
@@ -423,6 +460,8 @@ void *colo_process_incoming_thread(void *opaque)
uint64_t value;
Error *local_err = NULL;
+ qemu_sem_init(&mis->colo_incoming_sem, 0);
+
migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
MIGRATION_STATUS_COLO);
@@ -533,6 +572,10 @@ out:
qemu_fclose(fb);
}
+ /* Hope this not to be too long to loop here */
+ qemu_sem_wait(&mis->colo_incoming_sem);
+ qemu_sem_destroy(&mis->colo_incoming_sem);
+ /* Must be called after failover BH is completed */
if (mis->to_src_file) {
qemu_fclose(mis->to_src_file);
}