From 452589b6b47e8dc6353df257fc803dfc1383bed8 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Thu, 13 Jul 2017 20:01:16 +0100 Subject: vl.c/exit: pause cpus before closing block devices There's a rare exit seg if the guest is accessing IO during exit. It's always hitting the atomic_inc(&bs->in_flight) with a NULL bs. This was added recently in 99723548 but I don't see it as the cause. Flip vl.c around so we pause the cpus before closing the block devices, that way we shouldn't have anything trying to access them when they're gone. This was originally Red Hat bz https://bugzilla.redhat.com/show_bug.cgi?id=1451015 Signed-off-by: Dr. David Alan Gilbert Reported-by: Cong Li -- This is a very rare race, I'll leave it running in a loop to see if we hit anything else and to check this really fixes it. I do worry if there are other cases that can trigger this - e.g. hot-unplug or ejecting a CD. Message-Id: <20170713190116.21608-1-dgilbert@redhat.com> Signed-off-by: Paolo Bonzini --- vl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vl.c b/vl.c index fb6b2efafa..dd803fc244 100644 --- a/vl.c +++ b/vl.c @@ -4787,8 +4787,8 @@ int main(int argc, char **argv, char **envp) replay_disable_events(); iothread_stop_all(); - bdrv_close_all(); pause_all_vcpus(); + bdrv_close_all(); res_free(); /* vhost-user must be cleaned up before chardevs. */ -- cgit v1.2.1 From f70d3451fe468eacddb15ccf5fd170754510b0a0 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Mon, 24 Jul 2017 17:51:25 +0100 Subject: cpu_physical_memory_sync_dirty_bitmap: Fix alignment check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This code has an optimised, word aligned version, and a boring unaligned version. Recently 084140bd498909 fixed a missing offset addition from the core of both versions. However, the offset isn't necessarily aligned and thus the choice between the two versions needs fixing up to also include the offset. Symptom: A few stuck unsent pages during migration; not normally noticed unless under very low bandwidth in which case the migration may get stuck never ending and never performing a 2nd sync; noticed by a hanging postcopy-test on a very heavily loaded system. Fixes: 084140bd498909 Signed-off-by: Dr. David Alan Gilbert Reported-by: Alex Benneé Tested-by: Alex Benneé -- v2 Move 'page' inside the if (Comment from Paolo) Message-Id: <20170724165125.29887-1-dgilbert@redhat.com> Signed-off-by: Paolo Bonzini --- include/exec/ram_addr.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h index c04f4f67f6..d017639f7e 100644 --- a/include/exec/ram_addr.h +++ b/include/exec/ram_addr.h @@ -377,19 +377,20 @@ uint64_t cpu_physical_memory_sync_dirty_bitmap(RAMBlock *rb, uint64_t *real_dirty_pages) { ram_addr_t addr; - unsigned long page = BIT_WORD(start >> TARGET_PAGE_BITS); + unsigned long word = BIT_WORD((start + rb->offset) >> TARGET_PAGE_BITS); uint64_t num_dirty = 0; unsigned long *dest = rb->bmap; /* start address is aligned at the start of a word? */ - if (((page * BITS_PER_LONG) << TARGET_PAGE_BITS) == start) { + if (((word * BITS_PER_LONG) << TARGET_PAGE_BITS) == + (start + rb->offset)) { int k; int nr = BITS_TO_LONGS(length >> TARGET_PAGE_BITS); unsigned long * const *src; - unsigned long word = BIT_WORD((start + rb->offset) >> TARGET_PAGE_BITS); unsigned long idx = (word * BITS_PER_LONG) / DIRTY_MEMORY_BLOCK_SIZE; unsigned long offset = BIT_WORD((word * BITS_PER_LONG) % DIRTY_MEMORY_BLOCK_SIZE); + unsigned long page = BIT_WORD(start >> TARGET_PAGE_BITS); rcu_read_lock(); -- cgit v1.2.1 From d73f0247228830a7730bb60af467e8d47aee78cf Mon Sep 17 00:00:00 2001 From: Laurent Vivier Date: Mon, 17 Jul 2017 16:45:27 +0200 Subject: accel: cleanup error output Only emit "XXX accelerator not found", if there are not further accelerators listed. eg accel=kvm:tcg doesn't print a "KVM accelerator not found" warning when it falls back to tcg, but a accel=kvm prints a warning, since no fallback is given. Suggested-by: Daniel P. Berrange Suggested-by: Paolo Bonzini Signed-off-by: Laurent Vivier Message-Id: <20170717144527.24534-1-lvivier@redhat.com> Signed-off-by: Paolo Bonzini --- accel/accel.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/accel/accel.c b/accel/accel.c index fa8584488e..8ae40e1e13 100644 --- a/accel/accel.c +++ b/accel/accel.c @@ -33,6 +33,7 @@ #include "sysemu/qtest.h" #include "hw/xen/xen.h" #include "qom/object.h" +#include "qemu/error-report.h" static const TypeInfo accel_type = { .name = TYPE_ACCEL, @@ -69,19 +70,20 @@ static int accel_init_machine(AccelClass *acc, MachineState *ms) void configure_accelerator(MachineState *ms) { - const char *p; + const char *accel, *p; char buf[10]; int ret; bool accel_initialised = false; bool init_failed = false; AccelClass *acc = NULL; - p = qemu_opt_get(qemu_get_machine_opts(), "accel"); - if (p == NULL) { + accel = qemu_opt_get(qemu_get_machine_opts(), "accel"); + if (accel == NULL) { /* Use the default "accelerator", tcg */ - p = "tcg"; + accel = "tcg"; } + p = accel; while (!accel_initialised && *p != '\0') { if (*p == ':') { p++; @@ -89,7 +91,6 @@ void configure_accelerator(MachineState *ms) p = get_opt_name(buf, sizeof(buf), p, ':'); acc = accel_find(buf); if (!acc) { - fprintf(stderr, "\"%s\" accelerator not found.\n", buf); continue; } if (acc->available && !acc->available()) { @@ -100,9 +101,8 @@ void configure_accelerator(MachineState *ms) ret = accel_init_machine(acc, ms); if (ret < 0) { init_failed = true; - fprintf(stderr, "failed to initialize %s: %s\n", - acc->name, - strerror(-ret)); + error_report("failed to initialize %s: %s", + acc->name, strerror(-ret)); } else { accel_initialised = true; } @@ -110,13 +110,13 @@ void configure_accelerator(MachineState *ms) if (!accel_initialised) { if (!init_failed) { - fprintf(stderr, "No accelerator found!\n"); + error_report("-machine accel=%s: No accelerator found", accel); } exit(1); } if (init_failed) { - fprintf(stderr, "Back to %s accelerator.\n", acc->name); + error_report("Back to %s accelerator", acc->name); } } -- cgit v1.2.1 From 4db0db1fa6b653970148f6ff7a24bede1d52ef9d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= Date: Thu, 20 Jul 2017 12:00:46 +0200 Subject: char-fd: remove useless chr pointer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Apparently unused since it was introduced in commit a29753f8aa79a34a324afebe340182a51a5aef11. Now, it can be trivially accessed by CHARDEV() of self. Signed-off-by: Marc-André Lureau Message-Id: <20170720100046.4424-1-marcandre.lureau@redhat.com> Reviewed-by: Eric Blake Signed-off-by: Paolo Bonzini --- chardev/char-fd.c | 1 - include/chardev/char-fd.h | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/chardev/char-fd.c b/chardev/char-fd.c index 1584a3de20..6a62a545f2 100644 --- a/chardev/char-fd.c +++ b/chardev/char-fd.c @@ -141,7 +141,6 @@ void qemu_chr_open_fd(Chardev *chr, qio_channel_set_name(QIO_CHANNEL(s->ioc_out), name); g_free(name); qemu_set_nonblock(fd_out); - s->chr = chr; } static void char_fd_class_init(ObjectClass *oc, void *data) diff --git a/include/chardev/char-fd.h b/include/chardev/char-fd.h index 55ae5b47b0..e7c2b176f9 100644 --- a/include/chardev/char-fd.h +++ b/include/chardev/char-fd.h @@ -29,7 +29,7 @@ typedef struct FDChardev { Chardev parent; - Chardev *chr; + QIOChannel *ioc_in, *ioc_out; int max_size; } FDChardev; -- cgit v1.2.1 From 0ec846bface0f9733ca61ba18e3d4b72bfd9f8ca Mon Sep 17 00:00:00 2001 From: Anton Nefedov Date: Tue, 25 Jul 2017 13:04:41 +0300 Subject: char: don't exit on hmp 'chardev-add help' qemu_chr_new_from_opts() is used from both vl.c and hmp, and it is quite confusing to see qemu suddenly exit after receiving a help option in hmp. Do exit(0) from vl.c instead. Signed-off-by: Anton Nefedov Message-Id: <1500977081-120929-1-git-send-email-anton.nefedov@virtuozzo.com> Signed-off-by: Paolo Bonzini --- chardev/char.c | 2 +- include/chardev/char.h | 4 +++- vl.c | 10 ++++++---- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/chardev/char.c b/chardev/char.c index c34b44abc9..5d283b90d3 100644 --- a/chardev/char.c +++ b/chardev/char.c @@ -620,7 +620,7 @@ Chardev *qemu_chr_new_from_opts(QemuOpts *opts, Error **errp) error_report("Available chardev backend types: %s", str->str); g_string_free(str, true); - exit(0); + return NULL; } if (id == NULL) { diff --git a/include/chardev/char.h b/include/chardev/char.h index 1604ea9143..66dde4637e 100644 --- a/include/chardev/char.h +++ b/include/chardev/char.h @@ -65,7 +65,9 @@ struct Chardev { * * @opts see qemu-config.c for a list of valid options * - * Returns: a new character backend + * Returns: on success: a new character backend + * otherwise: NULL; @errp specifies the error + * or left untouched in case of help option */ Chardev *qemu_chr_new_from_opts(QemuOpts *opts, Error **errp); diff --git a/vl.c b/vl.c index dd803fc244..99fcfa0442 100644 --- a/vl.c +++ b/vl.c @@ -2344,10 +2344,12 @@ static int chardev_init_func(void *opaque, QemuOpts *opts, Error **errp) { Error *local_err = NULL; - qemu_chr_new_from_opts(opts, &local_err); - if (local_err) { - error_report_err(local_err); - return -1; + if (!qemu_chr_new_from_opts(opts, &local_err)) { + if (local_err) { + error_report_err(local_err); + return -1; + } + exit(0); } return 0; } -- cgit v1.2.1 From eb22aeca65f3769af33ba559757b42f24f743c18 Mon Sep 17 00:00:00 2001 From: "Daniel P. Berrange" Date: Tue, 25 Jul 2017 12:36:38 +0100 Subject: docs: document deprecation policy & deprecated features in appendix The deprecation of features in QEMU is totally adhoc currently, with no way for the user to get a list of what is deprecated in each release. This adds an appendix to the doc that records when each deprecation was made and provides text explaining what to use instead, if anything. Since there has been no formal policy around removal of deprecated features in the past, any deprecations prior to 2.10.0 are to be treated as if they had been made at the 2.10.0 release. Thus the earliest that existing deprecations will be deleted is the start of the 2.12.0 cycle. Signed-off-by: Daniel P. Berrange Message-Id: <20170725113638.7019-1-berrange@redhat.com> Reviewed-by: Thomas Huth Reviewed-by: Stefan Hajnoczi Signed-off-by: Paolo Bonzini --- qemu-doc.texi | 175 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 175 insertions(+) diff --git a/qemu-doc.texi b/qemu-doc.texi index 48af5155c7..aeb7bc52f5 100644 --- a/qemu-doc.texi +++ b/qemu-doc.texi @@ -38,6 +38,7 @@ * QEMU Guest Agent:: * QEMU User space emulator:: * Implementation notes:: +* Deprecated features:: * License:: * Index:: @end menu @@ -3128,6 +3129,180 @@ Run the emulation in single step mode. @include qemu-tech.texi +@node Deprecated features +@appendix Deprecated features + +In general features are intended to be supported indefinitely once +introduced into QEMU. In the event that a feature needs to be removed, +it will be listed in this appendix. The feature will remain functional +for 2 releases prior to actual removal. Deprecated features may also +generate warnings on the console when QEMU starts up, or if activated +via a monitor command, however, this is not a mandatory requirement. + +Prior to the 2.10.0 release there was no official policy on how +long features would be deprecated prior to their removal, nor +any documented list of which features were deprecated. Thus +any features deprecated prior to 2.10.0 will be treated as if +they were first deprecated in the 2.10.0 release. + +What follows is a list of all features currently marked as +deprecated. + +@section System emulator command line arguments + +@subsection -drive boot=on|off (since 1.3.0) + +The ``boot=on|off'' option to the ``-drive'' argument is +ignored. Applications should use the ``bootindex=N'' parameter +to set an absolute ordering between devices instead. + +@subsection -tdf (since 1.3.0) + +The ``-tdf'' argument is ignored. The behaviour implemented +by this argument is now the default when using the KVM PIT, +but can be requested explicitly using +``-global kvm-pit.lost_tick_policy=slew''. + +@subsection -no-kvm-pit-reinjection (since 1.3.0) + +The ``-no-kvm-pit-reinjection'' argument is now a +synonym for setting ``-global kvm-pit.lost_tick_policy=discard''. + +@subsection -no-kvm-irqchip (since 1.3.0) + +The ``-no-kvm-irqchip'' argument is now a synonym for +setting ``-machine kernel_irqchip=off''. + +@subsection -no-kvm-pit (since 1.3.0) + +The ``-no-kvm-pit'' argument is ignored. It is no longer +possible to disable the KVM PIT directly. + +@subsection -no-kvm (since 1.3.0) + +The ``-no-kvm'' argument is now a synonym for setting +``-machine accel=tcg''. + +@subsection -mon default=on (since 2.4.0) + +The ``default'' option to the ``-mon'' argument is +now ignored. When multiple monitors were enabled, it +indicated which monitor would receive log messages +from the various subsystems. This feature is no longer +required as messages are now only sent to the monitor +in response to explicitly monitor commands. + +@subsection -vnc tls (since 2.5.0) + +The ``-vnc tls'' argument is now a synonym for setting +``-object tls-creds-anon,id=tls0'' combined with +``-vnc tls-creds=tls0' + +@subsection -vnc x509 (since 2.5.0) + +The ``-vnc x509=/path/to/certs'' argument is now a +synonym for setting +``-object tls-creds-x509,dir=/path/to/certs,id=tls0,verify-peer=no'' +combined with ``-vnc tls-creds=tls0' + +@subsection -vnc x509verify (since 2.5.0) + +The ``-vnc x509verify=/path/to/certs'' argument is now a +synonym for setting +``-object tls-creds-x509,dir=/path/to/certs,id=tls0,verify-peer=yes'' +combined with ``-vnc tls-creds=tls0' + +@subsection -tftp (since 2.6.0) + +The ``-tftp /some/dir'' argument is now a synonym for setting +the ``-netdev user,tftp=/some/dir' argument. The new syntax +allows different settings to be provided per NIC. + +@subsection -bootp (since 2.6.0) + +The ``-bootp /some/file'' argument is now a synonym for setting +the ``-netdev user,bootp=/some/file' argument. The new syntax +allows different settings to be provided per NIC. + +@subsection -redir (since 2.6.0) + +The ``-redir ARGS'' argument is now a synonym for setting +the ``-netdev user,hostfwd=ARGS'' argument instead. The new +syntax allows different settings to be provided per NIC. + +@subsection -smb (since 2.6.0) + +The ``-smb /some/dir'' argument is now a synonym for setting +the ``-netdev user,smb=/some/dir'' argument instead. The new +syntax allows different settings to be provided per NIC. + +@subsection -net channel (since 2.6.0) + +The ``--net channel,ARGS'' argument is now a synonym for setting +the ``-netdev user,guestfwd=ARGS'' argument instead. + +@subsection -net vlan (since 2.9.0) + +The ``-net van=NN'' argument is partially replaced with the +new ``-netdev'' argument. The remaining use cases will no +longer be directly supported in QEMU. + +@subsection -drive if=scsi (since 2.9.0) + +The ``-drive if=scsi'' argument is replaced by the the +``-device BUS-TYPE'' argument combined with ``-drive if=none''. + +@subsection -net dump (since 2.10.0) + +The ``--net dump'' argument is now replaced with the +``-object filter-dump'' argument which works in combination +with the modern ``-netdev`` backends instead. + +@subsection -hdachs (since 2.10.0) + +The ``-hdachs'' argument is now a synonym for setting +the ``cyls'', ``heads'', ``secs'', and ``trans'' properties +on the ``ide-hd'' device using the ``-device'' argument. +The new syntax allows different settings to be provided +per disk. + +@subsection -usbdevice (since 2.10.0) + +The ``-usbdevice DEV'' argument is now a synonym for setting +the ``-device usb-DEV'' argument instead. The deprecated syntax +would automatically enable USB support on the machine type. +If using the new syntax, USB support must be explicitly +enabled via the ``-machine usb=on'' argument. + +@section qemu-img command line arguments + +@subsection convert -s (since 2.0.0) + +The ``convert -s snapshot_id_or_name'' argument is obsoleted +by the ``convert -l snapshot_param'' argument instead. + +@section System emulator human monitor commands + +@subsection usb_add (since 2.10.0) + +The ``usb_add'' command is replaced by the ``device_add'' command. + +@subsection usb_del (since 2.10.0) + +The ``usb_del'' command is replaced by the ``device_del'' command. + +@section System emulator devices + +@subsection ivshmem (since 2.6.0) + +The ``ivshmem'' device type is replaced by either the ``ivshmem-plain'' +or ``ivshmem-doorbell`` device types. + +@subsection spapr-pci-vfio-host-bridge (since 2.6.0) + +The ``spapr-pci-vfio-host-bridge'' device type is replaced by +the ``spapr-pci-host-bridge'' device type. + @node License @appendix License -- cgit v1.2.1 From 4fadfa00301695a4985e2a229cab857b2ce5c775 Mon Sep 17 00:00:00 2001 From: Peng Hao Date: Fri, 14 Jul 2017 23:47:36 +0800 Subject: target-i386: kvm_get/put_vcpu_events don't handle sipi_vector qemu call kvm_get_vcpu_events, and kernel return sipi_vector always 0, never valid when reporting to user space. But when qemu calls kvm_put_vcpu_events will make sipi_vector in kernel be 0. This will accidently modify sipi_vector when sipi_vector in kernel is not 0. Signed-off-by: Peng Hao Reviewed-by: Liu Yi Message-Id: <1500047256-8911-1-git-send-email-peng.hao2@zte.com.cn> Signed-off-by: Paolo Bonzini --- target/i386/kvm.c | 35 ++++++++++++++++++++--------------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/target/i386/kvm.c b/target/i386/kvm.c index a6613e19f2..6db7783edc 100644 --- a/target/i386/kvm.c +++ b/target/i386/kvm.c @@ -2444,8 +2444,10 @@ static int kvm_put_vcpu_events(X86CPU *cpu, int level) } if (level >= KVM_PUT_RESET_STATE) { - events.flags |= - KVM_VCPUEVENT_VALID_NMI_PENDING | KVM_VCPUEVENT_VALID_SIPI_VECTOR; + events.flags |= KVM_VCPUEVENT_VALID_NMI_PENDING; + if (env->mp_state == KVM_MP_STATE_SIPI_RECEIVED) { + events.flags |= KVM_VCPUEVENT_VALID_SIPI_VECTOR; + } } return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_VCPU_EVENTS, &events); @@ -2633,6 +2635,10 @@ int kvm_arch_put_registers(CPUState *cpu, int level) if (ret < 0) { return ret; } + ret = kvm_put_vcpu_events(x86_cpu, level); + if (ret < 0) { + return ret; + } if (level >= KVM_PUT_RESET_STATE) { ret = kvm_put_mp_state(x86_cpu); if (ret < 0) { @@ -2644,11 +2650,6 @@ int kvm_arch_put_registers(CPUState *cpu, int level) if (ret < 0) { return ret; } - - ret = kvm_put_vcpu_events(x86_cpu, level); - if (ret < 0) { - return ret; - } ret = kvm_put_debugregs(x86_cpu); if (ret < 0) { return ret; @@ -2668,35 +2669,39 @@ int kvm_arch_get_registers(CPUState *cs) assert(cpu_is_stopped(cs) || qemu_cpu_is_self(cs)); - ret = kvm_getput_regs(cpu, 0); + ret = kvm_get_vcpu_events(cpu); if (ret < 0) { goto out; } - ret = kvm_get_xsave(cpu); + /* + * KVM_GET_MPSTATE can modify CS and RIP, call it before + * KVM_GET_REGS and KVM_GET_SREGS. + */ + ret = kvm_get_mp_state(cpu); if (ret < 0) { goto out; } - ret = kvm_get_xcrs(cpu); + ret = kvm_getput_regs(cpu, 0); if (ret < 0) { goto out; } - ret = kvm_get_sregs(cpu); + ret = kvm_get_xsave(cpu); if (ret < 0) { goto out; } - ret = kvm_get_msrs(cpu); + ret = kvm_get_xcrs(cpu); if (ret < 0) { goto out; } - ret = kvm_get_mp_state(cpu); + ret = kvm_get_sregs(cpu); if (ret < 0) { goto out; } - ret = kvm_get_apic(cpu); + ret = kvm_get_msrs(cpu); if (ret < 0) { goto out; } - ret = kvm_get_vcpu_events(cpu); + ret = kvm_get_apic(cpu); if (ret < 0) { goto out; } -- cgit v1.2.1 From f5aa69bdc3418773f26747ca282c291519626ece Mon Sep 17 00:00:00 2001 From: Anthony PERARD Date: Wed, 26 Jul 2017 17:53:26 +0100 Subject: exec: Add lock parameter to qemu_ram_ptr_length Commit 04bf2526ce87f21b32c9acba1c5518708c243ad0 (exec: use qemu_ram_ptr_length to access guest ram) start using qemu_ram_ptr_length instead of qemu_map_ram_ptr, but when used with Xen, the behavior of both function is different. They both call xen_map_cache, but one with "lock", meaning the mapping of guest memory is never released implicitly, and the second one without, which means, mapping can be release later, when needed. In the context of address_space_{read,write}_continue, the ptr to those mapping should not be locked because it is used immediatly and never used again. The lock parameter make it explicit in which context qemu_ram_ptr_length is called. Signed-off-by: Anthony PERARD Message-Id: <20170726165326.10327-1-anthony.perard@citrix.com> Reviewed-by: Stefano Stabellini Signed-off-by: Paolo Bonzini --- exec.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/exec.c b/exec.c index 01ac21e3cd..d20c34ca83 100644 --- a/exec.c +++ b/exec.c @@ -2203,7 +2203,7 @@ void *qemu_map_ram_ptr(RAMBlock *ram_block, ram_addr_t addr) * Called within RCU critical section. */ static void *qemu_ram_ptr_length(RAMBlock *ram_block, ram_addr_t addr, - hwaddr *size) + hwaddr *size, bool lock) { RAMBlock *block = ram_block; if (*size == 0) { @@ -2222,10 +2222,10 @@ static void *qemu_ram_ptr_length(RAMBlock *ram_block, ram_addr_t addr, * In that case just map the requested area. */ if (block->offset == 0) { - return xen_map_cache(addr, *size, 1, true); + return xen_map_cache(addr, *size, lock, lock); } - block->host = xen_map_cache(block->offset, block->max_length, 1, true); + block->host = xen_map_cache(block->offset, block->max_length, 1, lock); } return ramblock_ptr(block, addr); @@ -2947,7 +2947,7 @@ static MemTxResult address_space_write_continue(AddressSpace *as, hwaddr addr, } } else { /* RAM case */ - ptr = qemu_ram_ptr_length(mr->ram_block, addr1, &l); + ptr = qemu_ram_ptr_length(mr->ram_block, addr1, &l, false); memcpy(ptr, buf, l); invalidate_and_set_dirty(mr, addr1, l); } @@ -3038,7 +3038,7 @@ MemTxResult address_space_read_continue(AddressSpace *as, hwaddr addr, } } else { /* RAM case */ - ptr = qemu_ram_ptr_length(mr->ram_block, addr1, &l); + ptr = qemu_ram_ptr_length(mr->ram_block, addr1, &l, false); memcpy(buf, ptr, l); } @@ -3349,7 +3349,7 @@ void *address_space_map(AddressSpace *as, memory_region_ref(mr); *plen = address_space_extend_translation(as, addr, len, mr, xlat, l, is_write); - ptr = qemu_ram_ptr_length(mr->ram_block, xlat, plen); + ptr = qemu_ram_ptr_length(mr->ram_block, xlat, plen, true); rcu_read_unlock(); return ptr; -- cgit v1.2.1 From 393c13b940be8f2e5b126cd9f442c12e7ecb4cac Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 27 Jul 2017 16:47:08 +0200 Subject: bt: stop the sdp memory allocation craziness Clang static analyzer reports a memory leak. Actually, the allocated memory escapes here: record->attribute_list[record->attributes].pair = data; but clang is correct that the memory might leak if len is zero. We know it isn't; assert that it is the case. The craziness doesn't end there. The memory is freed by bt_l2cap_sdp_close_ch: g_free(sdp->service_list[i].attribute_list->pair); which actually should have been written like this: g_free(sdp->service_list[i].attribute_list[0].pair); The attribute_list is sorted with qsort; but indeed the first entry of attribute_list should point to "data" even after the qsort, because the first record has id SDP_ATTR_RECORD_HANDLE, whose numeric value is zero. But hang on. The qsort function is static int sdp_attributeid_compare( const struct sdp_service_attribute_s *a, const struct sdp_service_attribute_s *b) { return (int) b->attribute_id - a->attribute_id; } but no one ever writes attribute_id. So it only works if qsort is stable, and who knows what else is broken, but we can fix it by setting attribute_id in the while loop. Signed-off-by: Paolo Bonzini --- hw/bt/sdp.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/hw/bt/sdp.c b/hw/bt/sdp.c index f67b3b89c0..3cb60b9495 100644 --- a/hw/bt/sdp.c +++ b/hw/bt/sdp.c @@ -580,7 +580,7 @@ static void bt_l2cap_sdp_close_ch(void *opaque) int i; for (i = 0; i < sdp->services; i ++) { - g_free(sdp->service_list[i].attribute_list->pair); + g_free(sdp->service_list[i].attribute_list[0].pair); g_free(sdp->service_list[i].attribute_list); g_free(sdp->service_list[i].uuid); } @@ -720,6 +720,8 @@ static void sdp_service_record_build(struct sdp_service_record_s *record, len += sdp_attr_max_size(&def->attributes[record->attributes ++].data, &record->uuids); } + + assert(len > 0); record->uuids = pow2ceil(record->uuids); record->attribute_list = g_malloc0(record->attributes * sizeof(*record->attribute_list)); @@ -730,12 +732,14 @@ static void sdp_service_record_build(struct sdp_service_record_s *record, record->attributes = 0; uuid = record->uuid; while (def->attributes[record->attributes].data.type) { + int attribute_id = def->attributes[record->attributes].id; record->attribute_list[record->attributes].pair = data; + record->attribute_list[record->attributes].attribute_id = attribute_id; len = 0; data[len ++] = SDP_DTYPE_UINT | SDP_DSIZE_2; - data[len ++] = def->attributes[record->attributes].id >> 8; - data[len ++] = def->attributes[record->attributes].id & 0xff; + data[len ++] = attribute_id >> 8; + data[len ++] = attribute_id & 0xff; len += sdp_attr_write(data + len, &def->attributes[record->attributes].data, &uuid); @@ -749,10 +753,15 @@ static void sdp_service_record_build(struct sdp_service_record_s *record, data += len; } - /* Sort the attribute list by the AttributeID */ + /* Sort the attribute list by the AttributeID. The first must be + * SDP_ATTR_RECORD_HANDLE so that bt_l2cap_sdp_close_ch can free + * the buffer. + */ qsort(record->attribute_list, record->attributes, sizeof(*record->attribute_list), (void *) sdp_attributeid_compare); + assert(record->attribute_list[0].pair == data); + /* Sort the searchable UUIDs list for bisection */ qsort(record->uuid, record->uuids, sizeof(*record->uuid), -- cgit v1.2.1 From 8bfce83a3b50d45b7107a50a6cd61d5304a925a2 Mon Sep 17 00:00:00 2001 From: "Daniel P. Berrange" Date: Tue, 25 Jul 2017 15:10:41 +0100 Subject: qemu-options: document existance of versioned machine types The -machine docs did not explain what the versioned machine types are for, nor that they'll be maintained across releases. Signed-off-by: Daniel P. Berrange Message-Id: <20170725141041.1195-1-berrange@redhat.com> Signed-off-by: Paolo Bonzini --- qemu-options.hx | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/qemu-options.hx b/qemu-options.hx index 746b5fa75d..9f6e2adfff 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -49,7 +49,20 @@ STEXI @item -machine [type=]@var{name}[,prop=@var{value}[,...]] @findex -machine Select the emulated machine by @var{name}. Use @code{-machine help} to list -available machines. Supported machine properties are: +available machines. + +For architectures which aim to support live migration compatibility +across releases, each release will introduce a new versioned machine +type. For example, the 2.8.0 release introduced machine types +``pc-i440fx-2.8'' and ``pc-q35-2.8'' for the x86_64/i686 architectures. + +To allow live migration of guests from QEMU version 2.8.0, to QEMU +version 2.9.0, the 2.9.0 version must support the ``pc-i440fx-2.8'' +and ``pc-q35-2.8'' machines too. To allow users live migrating VMs +to skip multiple intermediate releases when upgrading, new releases +of QEMU will support machine types from many previous versions. + +Supported machine properties are: @table @option @item accel=@var{accels1}[:@var{accels2}[:...]] This is used to enable an accelerator. Depending on the target architecture, -- cgit v1.2.1 From 1931076077254a2886daa7c830c7838ebd1f81ef Mon Sep 17 00:00:00 2001 From: Jay Zhou Date: Fri, 28 Jul 2017 18:28:53 +0800 Subject: migration: optimize the downtime Qemu_savevm_state_cleanup takes about 300ms in my ram migration tests with a 8U24G vm(20G is really occupied), the main cost comes from KVM_SET_USER_MEMORY_REGION ioctl when mem.memory_size = 0 in kvm_set_user_memory_region. In kmod, the main cost is kvm_zap_obsolete_pages, which traverses the active_mmu_pages list to zap the unsync sptes. It can be optimized by delaying memory_global_dirty_log_stop to the next vm_start. Changes v2->v3: - NULL VMChangeStateHandler if it is deleted and protect the scenario of nested invocations of memory_global_dirty_log_start/stop [Paolo] Changes v1->v2: - create a VMChangeStateHandler in memory.c to reduce the coupling [Paolo] Signed-off-by: Jay Zhou Message-Id: <1501237733-2736-1-git-send-email-jianjay.zhou@huawei.com> Signed-off-by: Paolo Bonzini --- memory.c | 36 +++++++++++++++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/memory.c b/memory.c index a7bc70aac1..c0adc35410 100644 --- a/memory.c +++ b/memory.c @@ -2357,8 +2357,15 @@ void memory_global_dirty_log_sync(void) } } +static VMChangeStateEntry *vmstate_change; + void memory_global_dirty_log_start(void) { + if (vmstate_change) { + qemu_del_vm_change_state_handler(vmstate_change); + vmstate_change = NULL; + } + global_dirty_log = true; MEMORY_LISTENER_CALL_GLOBAL(log_global_start, Forward); @@ -2369,7 +2376,7 @@ void memory_global_dirty_log_start(void) memory_region_transaction_commit(); } -void memory_global_dirty_log_stop(void) +static void memory_global_dirty_log_do_stop(void) { global_dirty_log = false; @@ -2381,6 +2388,33 @@ void memory_global_dirty_log_stop(void) MEMORY_LISTENER_CALL_GLOBAL(log_global_stop, Reverse); } +static void memory_vm_change_state_handler(void *opaque, int running, + RunState state) +{ + if (running) { + memory_global_dirty_log_do_stop(); + + if (vmstate_change) { + qemu_del_vm_change_state_handler(vmstate_change); + vmstate_change = NULL; + } + } +} + +void memory_global_dirty_log_stop(void) +{ + if (!runstate_is_running()) { + if (vmstate_change) { + return; + } + vmstate_change = qemu_add_vm_change_state_handler( + memory_vm_change_state_handler, NULL); + return; + } + + memory_global_dirty_log_do_stop(); +} + static void listener_add_address_space(MemoryListener *listener, AddressSpace *as) { -- cgit v1.2.1 From dfaea0c1a8482c5410917f91a9e2b88d4954b69e Mon Sep 17 00:00:00 2001 From: Mao Zhongyi Date: Wed, 26 Jul 2017 16:41:52 +0800 Subject: hw/scsi/vmw_pvscsi: Remove the dead error handling qemu_bh_new() is a wrapper around aio_bh_new(), which returns null only when g_new() does. It doesn't. So remove the dead error handling. Reviewed-by: Dmitry Fleytman Cc: Paolo Bonzini Cc: Markus Armbruster Signed-off-by: Mao Zhongyi Message-Id: <20170726084153.10121-1-maozy.fnst@cn.fujitsu.com> Signed-off-by: Paolo Bonzini --- hw/scsi/vmw_pvscsi.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/hw/scsi/vmw_pvscsi.c b/hw/scsi/vmw_pvscsi.c index 4a106da856..d92973ede1 100644 --- a/hw/scsi/vmw_pvscsi.c +++ b/hw/scsi/vmw_pvscsi.c @@ -1138,10 +1138,6 @@ pvscsi_init(PCIDevice *pci_dev) } s->completion_worker = qemu_bh_new(pvscsi_process_completion_queue, s); - if (!s->completion_worker) { - pvscsi_cleanup_msi(s); - return -ENOMEM; - } scsi_bus_new(&s->bus, sizeof(s->bus), DEVICE(pci_dev), &pvscsi_scsi_info, NULL); -- cgit v1.2.1 From fafeb41cd0abac412d7c77da1b680d5df805b454 Mon Sep 17 00:00:00 2001 From: Mao Zhongyi Date: Wed, 26 Jul 2017 16:41:53 +0800 Subject: hw/scsi/vmw_pvscsi: Convert to realize Convert a device model where initialization obviously can't fail, make it implement realize() rather than init(). Reviewed-by: Dmitry Fleytman Cc: Paolo Bonzini Cc: Markus Armbruster Signed-off-by: Mao Zhongyi Message-Id: <20170726084153.10121-2-maozy.fnst@cn.fujitsu.com> Signed-off-by: Paolo Bonzini --- hw/scsi/vmw_pvscsi.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/hw/scsi/vmw_pvscsi.c b/hw/scsi/vmw_pvscsi.c index d92973ede1..77d8b6f9e2 100644 --- a/hw/scsi/vmw_pvscsi.c +++ b/hw/scsi/vmw_pvscsi.c @@ -1103,8 +1103,8 @@ static const struct SCSIBusInfo pvscsi_scsi_info = { .cancel = pvscsi_request_cancelled, }; -static int -pvscsi_init(PCIDevice *pci_dev) +static void +pvscsi_realizefn(PCIDevice *pci_dev, Error **errp) { PVSCSIState *s = PVSCSI(pci_dev); @@ -1144,8 +1144,6 @@ pvscsi_init(PCIDevice *pci_dev) /* override default SCSI bus hotplug-handler, with pvscsi's one */ qbus_set_hotplug_handler(BUS(&s->bus), DEVICE(s), &error_abort); pvscsi_reset_state(s); - - return 0; } static void @@ -1278,7 +1276,7 @@ static void pvscsi_class_init(ObjectClass *klass, void *data) PVSCSIClass *pvs_k = PVSCSI_DEVICE_CLASS(klass); HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(klass); - k->init = pvscsi_init; + k->realize = pvscsi_realizefn; k->exit = pvscsi_uninit; k->vendor_id = PCI_VENDOR_ID_VMWARE; k->device_id = PCI_DEVICE_ID_VMWARE_PVSCSI; -- cgit v1.2.1 From bc706fa9039d875bb33ad7e9b27423a42455e17b Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 25 Jul 2017 13:46:07 +0200 Subject: rtc-test: cleanup register_b_set_flag test Introduce set_datetime_bcd/assert_datetime_bcd, and handle UIP correctly. Signed-off-by: Paolo Bonzini --- tests/rtc-test.c | 76 ++++++++++++++++++++++++++++++++++---------------------- 1 file changed, 46 insertions(+), 30 deletions(-) diff --git a/tests/rtc-test.c b/tests/rtc-test.c index e78f701afb..798cf5e285 100644 --- a/tests/rtc-test.c +++ b/tests/rtc-test.c @@ -17,6 +17,8 @@ #include "qemu/timer.h" #include "hw/timer/mc146818rtc_regs.h" +#define UIP_HOLD_LENGTH (8 * NANOSECONDS_PER_SECOND / 32768) + static uint8_t base = 0x70; static int bcd2dec(int value) @@ -297,16 +299,30 @@ static void alarm_time(void) g_assert(cmos_read(RTC_REG_C) == 0); } +static void set_time_regs(int h, int m, int s) +{ + cmos_write(RTC_HOURS, h); + cmos_write(RTC_MINUTES, m); + cmos_write(RTC_SECONDS, s); +} + static void set_time(int mode, int h, int m, int s) { - /* set BCD 12 hour mode */ cmos_write(RTC_REG_B, mode); - cmos_write(RTC_REG_A, 0x76); + set_time_regs(h, m, s); + cmos_write(RTC_REG_A, 0x26); +} + +static void set_datetime_bcd(int h, int min, int s, int d, int m, int y) +{ cmos_write(RTC_HOURS, h); - cmos_write(RTC_MINUTES, m); + cmos_write(RTC_MINUTES, min); cmos_write(RTC_SECONDS, s); - cmos_write(RTC_REG_A, 0x26); + cmos_write(RTC_YEAR, y & 0xFF); + cmos_write(RTC_CENTURY, y >> 8); + cmos_write(RTC_MONTH, m); + cmos_write(RTC_DAY_OF_MONTH, d); } #define assert_time(h, m, s) \ @@ -316,6 +332,17 @@ static void set_time(int mode, int h, int m, int s) g_assert_cmpint(cmos_read(RTC_SECONDS), ==, s); \ } while(0) +#define assert_datetime_bcd(h, min, s, d, m, y) \ + do { \ + g_assert_cmpint(cmos_read(RTC_HOURS), ==, h); \ + g_assert_cmpint(cmos_read(RTC_MINUTES), ==, min); \ + g_assert_cmpint(cmos_read(RTC_SECONDS), ==, s); \ + g_assert_cmpint(cmos_read(RTC_DAY_OF_MONTH), ==, d); \ + g_assert_cmpint(cmos_read(RTC_MONTH), ==, m); \ + g_assert_cmpint(cmos_read(RTC_YEAR), ==, (y & 0xFF)); \ + g_assert_cmpint(cmos_read(RTC_CENTURY), ==, (y >> 8)); \ + } while(0) + static void basic_12h_bcd(void) { /* set BCD 12 hour mode */ @@ -506,41 +533,30 @@ static void fuzz_registers(void) static void register_b_set_flag(void) { + if (cmos_read(RTC_REG_A) & REG_A_UIP) { + clock_step(UIP_HOLD_LENGTH + NANOSECONDS_PER_SECOND / 5); + } + g_assert_cmpint(cmos_read(RTC_REG_A) & REG_A_UIP, ==, 0); + /* Enable binary-coded decimal (BCD) mode and SET flag in Register B*/ cmos_write(RTC_REG_B, REG_B_24H | REG_B_SET); - cmos_write(RTC_REG_A, 0x76); - cmos_write(RTC_YEAR, 0x11); - cmos_write(RTC_CENTURY, 0x20); - cmos_write(RTC_MONTH, 0x02); - cmos_write(RTC_DAY_OF_MONTH, 0x02); - cmos_write(RTC_HOURS, 0x02); - cmos_write(RTC_MINUTES, 0x04); - cmos_write(RTC_SECONDS, 0x58); - cmos_write(RTC_REG_A, 0x26); + set_datetime_bcd(0x02, 0x04, 0x58, 0x02, 0x02, 0x2011); - /* Since SET flag is still enabled, these are equality checks. */ - g_assert_cmpint(cmos_read(RTC_HOURS), ==, 0x02); - g_assert_cmpint(cmos_read(RTC_MINUTES), ==, 0x04); - g_assert_cmpint(cmos_read(RTC_SECONDS), ==, 0x58); - g_assert_cmpint(cmos_read(RTC_DAY_OF_MONTH), ==, 0x02); - g_assert_cmpint(cmos_read(RTC_MONTH), ==, 0x02); - g_assert_cmpint(cmos_read(RTC_YEAR), ==, 0x11); - g_assert_cmpint(cmos_read(RTC_CENTURY), ==, 0x20); + assert_datetime_bcd(0x02, 0x04, 0x58, 0x02, 0x02, 0x2011); + + /* Since SET flag is still enabled, time does not advance. */ + clock_step(1000000000LL); + assert_datetime_bcd(0x02, 0x04, 0x58, 0x02, 0x02, 0x2011); /* Disable SET flag in Register B */ cmos_write(RTC_REG_B, cmos_read(RTC_REG_B) & ~REG_B_SET); - g_assert_cmpint(cmos_read(RTC_HOURS), ==, 0x02); - g_assert_cmpint(cmos_read(RTC_MINUTES), ==, 0x04); + assert_datetime_bcd(0x02, 0x04, 0x58, 0x02, 0x02, 0x2011); - /* Since SET flag is disabled, this is an inequality check. - * We (reasonably) assume that no (sexagesimal) overflow occurs. */ - g_assert_cmpint(cmos_read(RTC_SECONDS), >=, 0x58); - g_assert_cmpint(cmos_read(RTC_DAY_OF_MONTH), ==, 0x02); - g_assert_cmpint(cmos_read(RTC_MONTH), ==, 0x02); - g_assert_cmpint(cmos_read(RTC_YEAR), ==, 0x11); - g_assert_cmpint(cmos_read(RTC_CENTURY), ==, 0x20); + /* Since SET flag is disabled, the clock now advances. */ + clock_step(1000000000LL); + assert_datetime_bcd(0x02, 0x04, 0x59, 0x02, 0x02, 0x2011); } #define RTC_PERIOD_CODE1 13 /* 8 Hz */ -- cgit v1.2.1 From da3a392f0562c1deffd2bc1258d2893bb397009c Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 25 Jul 2017 13:46:29 +0200 Subject: rtc-test: introduce more update tests Test divider reset and UIP behavior. Signed-off-by: Paolo Bonzini --- tests/rtc-test.c | 82 +++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 81 insertions(+), 1 deletion(-) diff --git a/tests/rtc-test.c b/tests/rtc-test.c index 798cf5e285..d7a96cbd79 100644 --- a/tests/rtc-test.c +++ b/tests/rtc-test.c @@ -325,6 +325,30 @@ static void set_datetime_bcd(int h, int min, int s, int d, int m, int y) cmos_write(RTC_DAY_OF_MONTH, d); } +static void set_datetime_dec(int h, int min, int s, int d, int m, int y) +{ + cmos_write(RTC_HOURS, h); + cmos_write(RTC_MINUTES, min); + cmos_write(RTC_SECONDS, s); + cmos_write(RTC_YEAR, y % 100); + cmos_write(RTC_CENTURY, y / 100); + cmos_write(RTC_MONTH, m); + cmos_write(RTC_DAY_OF_MONTH, d); +} + +static void set_datetime(int mode, int h, int min, int s, int d, int m, int y) +{ + cmos_write(RTC_REG_B, mode); + + cmos_write(RTC_REG_A, 0x76); + if (mode & REG_B_DM) { + set_datetime_dec(h, min, s, d, m, y); + } else { + set_datetime_bcd(h, min, s, d, m, y); + } + cmos_write(RTC_REG_A, 0x26); +} + #define assert_time(h, m, s) \ do { \ g_assert_cmpint(cmos_read(RTC_HOURS), ==, h); \ @@ -559,6 +583,60 @@ static void register_b_set_flag(void) assert_datetime_bcd(0x02, 0x04, 0x59, 0x02, 0x02, 0x2011); } +static void divider_reset(void) +{ + /* Enable binary-coded decimal (BCD) mode in Register B*/ + cmos_write(RTC_REG_B, REG_B_24H); + + /* Enter divider reset */ + cmos_write(RTC_REG_A, 0x76); + set_datetime_bcd(0x02, 0x04, 0x58, 0x02, 0x02, 0x2011); + + assert_datetime_bcd(0x02, 0x04, 0x58, 0x02, 0x02, 0x2011); + + /* Since divider reset flag is still enabled, these are equality checks. */ + clock_step(1000000000LL); + assert_datetime_bcd(0x02, 0x04, 0x58, 0x02, 0x02, 0x2011); + + /* The first update ends 500 ms after divider reset */ + cmos_write(RTC_REG_A, 0x26); + clock_step(500000000LL - UIP_HOLD_LENGTH - 1); + g_assert_cmpint(cmos_read(RTC_REG_A) & REG_A_UIP, ==, 0); + assert_datetime_bcd(0x02, 0x04, 0x58, 0x02, 0x02, 0x2011); + + clock_step(1); + g_assert_cmpint(cmos_read(RTC_REG_A) & REG_A_UIP, !=, 0); + clock_step(UIP_HOLD_LENGTH); + g_assert_cmpint(cmos_read(RTC_REG_A) & REG_A_UIP, ==, 0); + + assert_datetime_bcd(0x02, 0x04, 0x59, 0x02, 0x02, 0x2011); +} + +static void uip_stuck(void) +{ + set_datetime(REG_B_24H, 0x02, 0x04, 0x58, 0x02, 0x02, 0x2011); + + /* The first update ends 500 ms after divider reset */ + (void)cmos_read(RTC_REG_C); + clock_step(500000000LL); + g_assert_cmpint(cmos_read(RTC_REG_A) & REG_A_UIP, ==, 0); + assert_datetime_bcd(0x02, 0x04, 0x59, 0x02, 0x02, 0x2011); + + /* UF is now set. */ + cmos_write(RTC_HOURS_ALARM, 0x02); + cmos_write(RTC_MINUTES_ALARM, 0xC0); + cmos_write(RTC_SECONDS_ALARM, 0xC0); + + /* Because the alarm will fire soon, reading register A will latch UIP. */ + clock_step(1000000000LL - UIP_HOLD_LENGTH / 2); + g_assert_cmpint(cmos_read(RTC_REG_A) & REG_A_UIP, !=, 0); + + /* Move the alarm far away. This must not cause UIP to remain stuck! */ + cmos_write(RTC_HOURS_ALARM, 0x03); + clock_step(UIP_HOLD_LENGTH); + g_assert_cmpint(cmos_read(RTC_REG_A) & REG_A_UIP, ==, 0); +} + #define RTC_PERIOD_CODE1 13 /* 8 Hz */ #define RTC_PERIOD_CODE2 15 /* 2 Hz */ @@ -625,7 +703,9 @@ int main(int argc, char **argv) qtest_add_func("/rtc/basic/bcd-12h", basic_12h_bcd); qtest_add_func("/rtc/set-year/20xx", set_year_20xx); qtest_add_func("/rtc/set-year/1980", set_year_1980); - qtest_add_func("/rtc/misc/register_b_set_flag", register_b_set_flag); + qtest_add_func("/rtc/update/register_b_set_flag", register_b_set_flag); + qtest_add_func("/rtc/update/divider-reset", divider_reset); + qtest_add_func("/rtc/update/uip-stuck", uip_stuck); qtest_add_func("/rtc/misc/fuzz-registers", fuzz_registers); qtest_add_func("/rtc/periodic/interrupt", periodic_timer); -- cgit v1.2.1 From 6a51d83a17e8213db353dd6756685fd9e3513e13 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 25 Jul 2017 14:35:35 +0200 Subject: mc146818rtc: simplify check_update_timer Move all the optimized cases together, since they all have UF=1 in common. Signed-off-by: Paolo Bonzini --- hw/timer/mc146818rtc.c | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/hw/timer/mc146818rtc.c b/hw/timer/mc146818rtc.c index 1b8d3d7d4c..ffb2c6a33e 100644 --- a/hw/timer/mc146818rtc.c +++ b/hw/timer/mc146818rtc.c @@ -291,26 +291,14 @@ static void check_update_timer(RTCState *s) /* From the data sheet: "Holding the dividers in reset prevents * interrupts from operating, while setting the SET bit allows" - * them to occur. However, it will prevent an alarm interrupt - * from occurring, because the time of day is not updated. + * them to occur. */ if ((s->cmos_data[RTC_REG_A] & 0x60) == 0x60) { timer_del(s->update_timer); return; } - if ((s->cmos_data[RTC_REG_C] & REG_C_UF) && - (s->cmos_data[RTC_REG_B] & REG_B_SET)) { - timer_del(s->update_timer); - return; - } - if ((s->cmos_data[RTC_REG_C] & REG_C_UF) && - (s->cmos_data[RTC_REG_C] & REG_C_AF)) { - timer_del(s->update_timer); - return; - } guest_nsec = get_guest_rtc_ns(s) % NANOSECONDS_PER_SECOND; - /* if UF is clear, reprogram to next second */ next_update_time = qemu_clock_get_ns(rtc_clock) + NANOSECONDS_PER_SECOND - guest_nsec; @@ -321,7 +309,17 @@ static void check_update_timer(RTCState *s) s->next_alarm_time = next_update_time + (next_alarm_sec - 1) * NANOSECONDS_PER_SECOND; + /* If UF is already set, we might be able to optimize. */ if (s->cmos_data[RTC_REG_C] & REG_C_UF) { + /* If AF cannot change (i.e. either it is set already, or + * SET=1 and then the time is not updated), nothing to do. + */ + if ((s->cmos_data[RTC_REG_B] & REG_B_SET) || + (s->cmos_data[RTC_REG_C] & REG_C_AF)) { + timer_del(s->update_timer); + return; + } + /* UF is set, but AF is clear. Program the timer to target * the alarm time. */ next_update_time = s->next_alarm_time; -- cgit v1.2.1 From 33f21e4f044ac1c37f60edc1f1aee628be8f463b Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 25 Jul 2017 14:55:38 +0200 Subject: mc146818rtc: implement UIP latching as intended In some cases, the guest can observe the wrong ordering of UIP and interrupts. This can happen if the VCPU exit is timed like this: iothread VCPU ... wait for interrupt ... t-100ns read register A t wake up, take BQL t+100ns update_in_progress return false return UIP=0 trigger interrupt The interrupt is late; the VCPU expected the falling edge of UIP to happen after the interrupt. update_in_progress is already trying to cover this case by latching UIP if the timer is going to fire soon, and the fix is documented in the commit message for commit 56038ef623 ("RTC: Update the RTC clock only when reading it", 2012-09-10). It cannot be tested with qtest, because its timing of interrupts vs. reads is exact. However, the implementation was incorrect because UIP cmos_ioport_read cleared register A instead of leaving that to rtc_update_timer. Fixing the implementation of cmos_ioport_read to match the commit message, however, breaks the "uip-stuck" test case from the previous patch. To fix it, skip update timer optimizations if UIP has been latched. Signed-off-by: Paolo Bonzini --- hw/timer/mc146818rtc.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/hw/timer/mc146818rtc.c b/hw/timer/mc146818rtc.c index ffb2c6a33e..82843ed03f 100644 --- a/hw/timer/mc146818rtc.c +++ b/hw/timer/mc146818rtc.c @@ -294,6 +294,7 @@ static void check_update_timer(RTCState *s) * them to occur. */ if ((s->cmos_data[RTC_REG_A] & 0x60) == 0x60) { + assert((s->cmos_data[RTC_REG_A] & REG_A_UIP) == 0); timer_del(s->update_timer); return; } @@ -309,8 +310,12 @@ static void check_update_timer(RTCState *s) s->next_alarm_time = next_update_time + (next_alarm_sec - 1) * NANOSECONDS_PER_SECOND; - /* If UF is already set, we might be able to optimize. */ - if (s->cmos_data[RTC_REG_C] & REG_C_UF) { + /* If update_in_progress latched the UIP bit, we must keep the timer + * programmed to the next second, so that UIP is cleared. Otherwise, + * if UF is already set, we might be able to optimize. + */ + if (!(s->cmos_data[RTC_REG_A] & REG_A_UIP) && + (s->cmos_data[RTC_REG_C] & REG_C_UF)) { /* If AF cannot change (i.e. either it is set already, or * SET=1 and then the time is not updated), nothing to do. */ @@ -725,12 +730,10 @@ static uint64_t cmos_ioport_read(void *opaque, hwaddr addr, ret = s->cmos_data[s->cmos_index]; break; case RTC_REG_A: + ret = s->cmos_data[s->cmos_index]; if (update_in_progress(s)) { - s->cmos_data[s->cmos_index] |= REG_A_UIP; - } else { - s->cmos_data[s->cmos_index] &= ~REG_A_UIP; + ret |= REG_A_UIP; } - ret = s->cmos_data[s->cmos_index]; break; case RTC_REG_C: ret = s->cmos_data[s->cmos_index]; -- cgit v1.2.1