summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--cpus.c81
-rw-r--r--kvm-stub.c5
-rw-r--r--kvm.h3
-rw-r--r--target-i386/cpu.h20
-rw-r--r--target-i386/helper.c2
-rw-r--r--target-i386/kvm.c178
-rw-r--r--target-i386/kvm_x86.h3
7 files changed, 278 insertions, 14 deletions
diff --git a/cpus.c b/cpus.c
index 38756572d9..36a6d1f350 100644
--- a/cpus.c
+++ b/cpus.c
@@ -34,6 +34,9 @@
#include "cpus.h"
#include "compatfd.h"
+#ifdef CONFIG_LINUX
+#include <sys/prctl.h>
+#endif
#ifdef SIGRTMIN
#define SIG_IPI (SIGRTMIN+4)
@@ -41,6 +44,10 @@
#define SIG_IPI SIGUSR1
#endif
+#ifndef PR_MCE_KILL
+#define PR_MCE_KILL 33
+#endif
+
static CPUState *next_cpu;
/***********************************************************/
@@ -498,28 +505,77 @@ static void qemu_tcg_wait_io_event(void)
}
}
+static void sigbus_reraise(void)
+{
+ sigset_t set;
+ struct sigaction action;
+
+ memset(&action, 0, sizeof(action));
+ action.sa_handler = SIG_DFL;
+ if (!sigaction(SIGBUS, &action, NULL)) {
+ raise(SIGBUS);
+ sigemptyset(&set);
+ sigaddset(&set, SIGBUS);
+ sigprocmask(SIG_UNBLOCK, &set, NULL);
+ }
+ perror("Failed to re-raise SIGBUS!\n");
+ abort();
+}
+
+static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo,
+ void *ctx)
+{
+#if defined(TARGET_I386)
+ if (kvm_on_sigbus(siginfo->ssi_code, (void *)(intptr_t)siginfo->ssi_addr))
+#endif
+ sigbus_reraise();
+}
+
static void qemu_kvm_eat_signal(CPUState *env, int timeout)
{
struct timespec ts;
int r, e;
siginfo_t siginfo;
sigset_t waitset;
+ sigset_t chkset;
ts.tv_sec = timeout / 1000;
ts.tv_nsec = (timeout % 1000) * 1000000;
sigemptyset(&waitset);
sigaddset(&waitset, SIG_IPI);
+ sigaddset(&waitset, SIGBUS);
- qemu_mutex_unlock(&qemu_global_mutex);
- r = sigtimedwait(&waitset, &siginfo, &ts);
- e = errno;
- qemu_mutex_lock(&qemu_global_mutex);
+ do {
+ qemu_mutex_unlock(&qemu_global_mutex);
- if (r == -1 && !(e == EAGAIN || e == EINTR)) {
- fprintf(stderr, "sigtimedwait: %s\n", strerror(e));
- exit(1);
- }
+ r = sigtimedwait(&waitset, &siginfo, &ts);
+ e = errno;
+
+ qemu_mutex_lock(&qemu_global_mutex);
+
+ if (r == -1 && !(e == EAGAIN || e == EINTR)) {
+ fprintf(stderr, "sigtimedwait: %s\n", strerror(e));
+ exit(1);
+ }
+
+ switch (r) {
+ case SIGBUS:
+#ifdef TARGET_I386
+ if (kvm_on_sigbus_vcpu(env, siginfo.si_code, siginfo.si_addr))
+#endif
+ sigbus_reraise();
+ break;
+ default:
+ break;
+ }
+
+ r = sigpending(&chkset);
+ if (r == -1) {
+ fprintf(stderr, "sigpending: %s\n", strerror(e));
+ exit(1);
+ }
+ } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS));
}
static void qemu_kvm_wait_io_event(CPUState *env)
@@ -640,6 +696,7 @@ static void kvm_init_ipi(CPUState *env)
pthread_sigmask(SIG_BLOCK, NULL, &set);
sigdelset(&set, SIG_IPI);
+ sigdelset(&set, SIGBUS);
r = kvm_set_signal_mask(env, &set);
if (r) {
fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(r));
@@ -650,6 +707,7 @@ static void kvm_init_ipi(CPUState *env)
static sigset_t block_io_signals(void)
{
sigset_t set;
+ struct sigaction action;
/* SIGUSR2 used by posix-aio-compat.c */
sigemptyset(&set);
@@ -660,8 +718,15 @@ static sigset_t block_io_signals(void)
sigaddset(&set, SIGIO);
sigaddset(&set, SIGALRM);
sigaddset(&set, SIG_IPI);
+ sigaddset(&set, SIGBUS);
pthread_sigmask(SIG_BLOCK, &set, NULL);
+ memset(&action, 0, sizeof(action));
+ action.sa_flags = SA_SIGINFO;
+ action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler;
+ sigaction(SIGBUS, &action, NULL);
+ prctl(PR_MCE_KILL, 1, 1, 0, 0);
+
return set;
}
diff --git a/kvm-stub.c b/kvm-stub.c
index d45f9fa1df..5384a4b9a4 100644
--- a/kvm-stub.c
+++ b/kvm-stub.c
@@ -141,3 +141,8 @@ int kvm_set_ioeventfd_mmio_long(int fd, uint32_t adr, uint32_t val, bool assign)
{
return -ENOSYS;
}
+
+int kvm_on_sigbus(int code, void *addr)
+{
+ return 1;
+}
diff --git a/kvm.h b/kvm.h
index b2fb3af8aa..60a9b425c8 100644
--- a/kvm.h
+++ b/kvm.h
@@ -110,6 +110,9 @@ int kvm_arch_init_vcpu(CPUState *env);
void kvm_arch_reset_vcpu(CPUState *env);
+int kvm_on_sigbus_vcpu(CPUState *env, int code, void *addr);
+int kvm_on_sigbus(int code, void *addr);
+
struct kvm_guest_debug;
struct kvm_debug_exit_arch;
diff --git a/target-i386/cpu.h b/target-i386/cpu.h
index 77eeab185e..85ed30f7d3 100644
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -250,16 +250,32 @@
#define PG_ERROR_RSVD_MASK 0x08
#define PG_ERROR_I_D_MASK 0x10
-#define MCG_CTL_P (1UL<<8) /* MCG_CAP register available */
+#define MCG_CTL_P (1ULL<<8) /* MCG_CAP register available */
+#define MCG_SER_P (1ULL<<24) /* MCA recovery/new status bits */
-#define MCE_CAP_DEF MCG_CTL_P
+#define MCE_CAP_DEF (MCG_CTL_P|MCG_SER_P)
#define MCE_BANKS_DEF 10
+#define MCG_STATUS_RIPV (1ULL<<0) /* restart ip valid */
+#define MCG_STATUS_EIPV (1ULL<<1) /* ip points to correct instruction */
#define MCG_STATUS_MCIP (1ULL<<2) /* machine check in progress */
#define MCI_STATUS_VAL (1ULL<<63) /* valid error */
#define MCI_STATUS_OVER (1ULL<<62) /* previous errors lost */
#define MCI_STATUS_UC (1ULL<<61) /* uncorrected error */
+#define MCI_STATUS_EN (1ULL<<60) /* error enabled */
+#define MCI_STATUS_MISCV (1ULL<<59) /* misc error reg. valid */
+#define MCI_STATUS_ADDRV (1ULL<<58) /* addr reg. valid */
+#define MCI_STATUS_PCC (1ULL<<57) /* processor context corrupt */
+#define MCI_STATUS_S (1ULL<<56) /* Signaled machine check */
+#define MCI_STATUS_AR (1ULL<<55) /* Action required */
+
+/* MISC register defines */
+#define MCM_ADDR_SEGOFF 0 /* segment offset */
+#define MCM_ADDR_LINEAR 1 /* linear address */
+#define MCM_ADDR_PHYS 2 /* physical address */
+#define MCM_ADDR_MEM 3 /* memory address */
+#define MCM_ADDR_GENERIC 7 /* generic */
#define MSR_IA32_TSC 0x10
#define MSR_IA32_APICBASE 0x1b
diff --git a/target-i386/helper.c b/target-i386/helper.c
index 4b430dd374..4fff4a871f 100644
--- a/target-i386/helper.c
+++ b/target-i386/helper.c
@@ -1032,7 +1032,7 @@ void cpu_inject_x86_mce(CPUState *cenv, int bank, uint64_t status,
return;
if (kvm_enabled()) {
- kvm_inject_x86_mce(cenv, bank, status, mcg_status, addr, misc);
+ kvm_inject_x86_mce(cenv, bank, status, mcg_status, addr, misc, 0);
return;
}
diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index 343fb022c8..0161f6d2fb 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -46,6 +46,13 @@
#define MSR_KVM_WALL_CLOCK 0x11
#define MSR_KVM_SYSTEM_TIME 0x12
+#ifndef BUS_MCEERR_AR
+#define BUS_MCEERR_AR 4
+#endif
+#ifndef BUS_MCEERR_AO
+#define BUS_MCEERR_AO 5
+#endif
+
#ifdef KVM_CAP_EXT_CPUID
static struct kvm_cpuid2 *try_get_cpuid(KVMState *s, int max)
@@ -192,10 +199,39 @@ static int kvm_set_mce(CPUState *env, struct kvm_x86_mce *m)
return kvm_vcpu_ioctl(env, KVM_X86_SET_MCE, m);
}
+static int kvm_get_msr(CPUState *env, struct kvm_msr_entry *msrs, int n)
+{
+ struct kvm_msrs *kmsrs = qemu_malloc(sizeof *kmsrs + n * sizeof *msrs);
+ int r;
+
+ kmsrs->nmsrs = n;
+ memcpy(kmsrs->entries, msrs, n * sizeof *msrs);
+ r = kvm_vcpu_ioctl(env, KVM_GET_MSRS, kmsrs);
+ memcpy(msrs, kmsrs->entries, n * sizeof *msrs);
+ free(kmsrs);
+ return r;
+}
+
+/* FIXME: kill this and kvm_get_msr, use env->mcg_status instead */
+static int kvm_mce_in_exception(CPUState *env)
+{
+ struct kvm_msr_entry msr_mcg_status = {
+ .index = MSR_MCG_STATUS,
+ };
+ int r;
+
+ r = kvm_get_msr(env, &msr_mcg_status, 1);
+ if (r == -1 || r == 0) {
+ return -1;
+ }
+ return !!(msr_mcg_status.data & MCG_STATUS_MCIP);
+}
+
struct kvm_x86_mce_data
{
CPUState *env;
struct kvm_x86_mce *mce;
+ int abort_on_error;
};
static void kvm_do_inject_x86_mce(void *_data)
@@ -203,14 +239,26 @@ static void kvm_do_inject_x86_mce(void *_data)
struct kvm_x86_mce_data *data = _data;
int r;
+ /* If there is an MCE excpetion being processed, ignore this SRAO MCE */
+ r = kvm_mce_in_exception(data->env);
+ if (r == -1)
+ fprintf(stderr, "Failed to get MCE status\n");
+ else if (r && !(data->mce->status & MCI_STATUS_AR))
+ return;
+
r = kvm_set_mce(data->env, data->mce);
- if (r < 0)
+ if (r < 0) {
perror("kvm_set_mce FAILED");
+ if (data->abort_on_error) {
+ abort();
+ }
+ }
}
#endif
void kvm_inject_x86_mce(CPUState *cenv, int bank, uint64_t status,
- uint64_t mcg_status, uint64_t addr, uint64_t misc)
+ uint64_t mcg_status, uint64_t addr, uint64_t misc,
+ int abort_on_error)
{
#ifdef KVM_CAP_MCE
struct kvm_x86_mce mce = {
@@ -225,7 +273,15 @@ void kvm_inject_x86_mce(CPUState *cenv, int bank, uint64_t status,
.mce = &mce,
};
+ if (!cenv->mcg_cap) {
+ fprintf(stderr, "MCE support is not enabled!\n");
+ return;
+ }
+
run_on_cpu(cenv, kvm_do_inject_x86_mce, &data);
+#else
+ if (abort_on_error)
+ abort();
#endif
}
@@ -1528,3 +1584,121 @@ bool kvm_arch_stop_on_emulation_error(CPUState *env)
((env->segs[R_CS].selector & 3) != 3);
}
+static void hardware_memory_error(void)
+{
+ fprintf(stderr, "Hardware memory error!\n");
+ exit(1);
+}
+
+int kvm_on_sigbus_vcpu(CPUState *env, int code, void *addr)
+{
+#if defined(KVM_CAP_MCE)
+ struct kvm_x86_mce mce = {
+ .bank = 9,
+ };
+ void *vaddr;
+ ram_addr_t ram_addr;
+ target_phys_addr_t paddr;
+ int r;
+
+ if ((env->mcg_cap & MCG_SER_P) && addr
+ && (code == BUS_MCEERR_AR
+ || code == BUS_MCEERR_AO)) {
+ if (code == BUS_MCEERR_AR) {
+ /* Fake an Intel architectural Data Load SRAR UCR */
+ mce.status = MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN
+ | MCI_STATUS_MISCV | MCI_STATUS_ADDRV | MCI_STATUS_S
+ | MCI_STATUS_AR | 0x134;
+ mce.misc = (MCM_ADDR_PHYS << 6) | 0xc;
+ mce.mcg_status = MCG_STATUS_MCIP | MCG_STATUS_EIPV;
+ } else {
+ /*
+ * If there is an MCE excpetion being processed, ignore
+ * this SRAO MCE
+ */
+ r = kvm_mce_in_exception(env);
+ if (r == -1) {
+ fprintf(stderr, "Failed to get MCE status\n");
+ } else if (r) {
+ return 0;
+ }
+ /* Fake an Intel architectural Memory scrubbing UCR */
+ mce.status = MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN
+ | MCI_STATUS_MISCV | MCI_STATUS_ADDRV | MCI_STATUS_S
+ | 0xc0;
+ mce.misc = (MCM_ADDR_PHYS << 6) | 0xc;
+ mce.mcg_status = MCG_STATUS_MCIP | MCG_STATUS_RIPV;
+ }
+ vaddr = (void *)addr;
+ if (qemu_ram_addr_from_host(vaddr, &ram_addr) ||
+ !kvm_physical_memory_addr_from_ram(env->kvm_state, ram_addr, &paddr)) {
+ fprintf(stderr, "Hardware memory error for memory used by "
+ "QEMU itself instead of guest system!\n");
+ /* Hope we are lucky for AO MCE */
+ if (code == BUS_MCEERR_AO) {
+ return 0;
+ } else {
+ hardware_memory_error();
+ }
+ }
+ mce.addr = paddr;
+ r = kvm_set_mce(env, &mce);
+ if (r < 0) {
+ fprintf(stderr, "kvm_set_mce: %s\n", strerror(errno));
+ abort();
+ }
+ } else
+#endif
+ {
+ if (code == BUS_MCEERR_AO) {
+ return 0;
+ } else if (code == BUS_MCEERR_AR) {
+ hardware_memory_error();
+ } else {
+ return 1;
+ }
+ }
+ return 0;
+}
+
+int kvm_on_sigbus(int code, void *addr)
+{
+#if defined(KVM_CAP_MCE)
+ if ((first_cpu->mcg_cap & MCG_SER_P) && addr && code == BUS_MCEERR_AO) {
+ uint64_t status;
+ void *vaddr;
+ ram_addr_t ram_addr;
+ target_phys_addr_t paddr;
+ CPUState *cenv;
+
+ /* Hope we are lucky for AO MCE */
+ vaddr = addr;
+ if (qemu_ram_addr_from_host(vaddr, &ram_addr) ||
+ !kvm_physical_memory_addr_from_ram(first_cpu->kvm_state, ram_addr, &paddr)) {
+ fprintf(stderr, "Hardware memory error for memory used by "
+ "QEMU itself instead of guest system!: %p\n", addr);
+ return 0;
+ }
+ status = MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN
+ | MCI_STATUS_MISCV | MCI_STATUS_ADDRV | MCI_STATUS_S
+ | 0xc0;
+ kvm_inject_x86_mce(first_cpu, 9, status,
+ MCG_STATUS_MCIP | MCG_STATUS_RIPV, paddr,
+ (MCM_ADDR_PHYS << 6) | 0xc, 1);
+ for (cenv = first_cpu->next_cpu; cenv != NULL; cenv = cenv->next_cpu) {
+ kvm_inject_x86_mce(cenv, 1, MCI_STATUS_VAL | MCI_STATUS_UC,
+ MCG_STATUS_MCIP | MCG_STATUS_RIPV, 0, 0, 1);
+ }
+ } else
+#endif
+ {
+ if (code == BUS_MCEERR_AO) {
+ return 0;
+ } else if (code == BUS_MCEERR_AR) {
+ hardware_memory_error();
+ } else {
+ return 1;
+ }
+ }
+ return 0;
+}
diff --git a/target-i386/kvm_x86.h b/target-i386/kvm_x86.h
index c1ebd24fcf..04932cf4c8 100644
--- a/target-i386/kvm_x86.h
+++ b/target-i386/kvm_x86.h
@@ -16,6 +16,7 @@
#define __KVM_X86_H__
void kvm_inject_x86_mce(CPUState *cenv, int bank, uint64_t status,
- uint64_t mcg_status, uint64_t addr, uint64_t misc);
+ uint64_t mcg_status, uint64_t addr, uint64_t misc,
+ int abort_on_error);
#endif