summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAnthony Liguori <aliguori@amazon.com>2013-10-18 10:01:49 -0700
committerAnthony Liguori <aliguori@amazon.com>2013-10-18 10:01:49 -0700
commit989644915c281ac83f06f65923d716272ede1ed8 (patch)
tree574ac7adcfdb572fd735b7f509c6ba95aabe4b95
parent1cb9b64df380f232bcd142ab27c085cff0add1d8 (diff)
parent041603fe5d4537cd165941f96bd76a31f7f662fd (diff)
downloadqemu-989644915c281ac83f06f65923d716272ede1ed8.tar.gz
Merge remote-tracking branch 'bonzini/iommu-for-anthony' into staging
# By Paolo Bonzini (10) and others # Via Paolo Bonzini * bonzini/iommu-for-anthony: exec: remove qemu_safe_ram_ptr icount: make it thread-safe icount: document (future) locking rules for icount icount: prepare the code for future races in calling qemu_clock_warp icount: reorganize icount_warp_rt icount: use cpu_get_icount() directly timer: add timer_mod_anticipate and timer_mod_anticipate_ns timer: extract timer_mod_ns_locked and timerlist_rearm timer: make qemu_clock_enable sync between disable and timer's cb qemu-thread: add QemuEvent timer: protect timers_state's clock with seqlock seqlock: introduce read-write seqlock vga: Mark relevant portio lists regions as coalesced MMIO flushing cirrus: Mark vga io region as coalesced MMIO flushing portio: Allow to mark portio lists as coalesced MMIO flushing compatfd: switch to QemuThread memory: fix 128 arithmetic in info mtree Message-id: 1382024935-28297-1-git-send-email-pbonzini@redhat.com Signed-off-by: Anthony Liguori <aliguori@amazon.com>
-rw-r--r--cpus.c144
-rw-r--r--exec.c97
-rw-r--r--hw/display/cirrus_vga.c3
-rw-r--r--hw/display/qxl.c1
-rw-r--r--hw/display/vga.c5
-rw-r--r--include/exec/ioport.h2
-rw-r--r--include/qemu/seqlock.h72
-rw-r--r--include/qemu/thread-posix.h8
-rw-r--r--include/qemu/thread-win32.h4
-rw-r--r--include/qemu/thread.h7
-rw-r--r--include/qemu/timer.h34
-rw-r--r--ioport.c9
-rw-r--r--memory.c4
-rw-r--r--qemu-timer.c97
-rw-r--r--util/compatfd.c16
-rw-r--r--util/qemu-thread-posix.c116
-rw-r--r--util/qemu-thread-win32.c26
17 files changed, 502 insertions, 143 deletions
diff --git a/cpus.c b/cpus.c
index e566297bd3..398229ecbd 100644
--- a/cpus.c
+++ b/cpus.c
@@ -37,6 +37,7 @@
#include "sysemu/qtest.h"
#include "qemu/main-loop.h"
#include "qemu/bitmap.h"
+#include "qemu/seqlock.h"
#ifndef _WIN32
#include "qemu/compatfd.h"
@@ -97,21 +98,32 @@ static bool all_cpu_threads_idle(void)
/***********************************************************/
/* guest cycle counter */
+/* Protected by TimersState seqlock */
+
+/* Compensate for varying guest execution speed. */
+static int64_t qemu_icount_bias;
+static int64_t vm_clock_warp_start;
/* Conversion factor from emulated instructions to virtual clock ticks. */
static int icount_time_shift;
/* Arbitrarily pick 1MIPS as the minimum allowable speed. */
#define MAX_ICOUNT_SHIFT 10
-/* Compensate for varying guest execution speed. */
-static int64_t qemu_icount_bias;
+
+/* Only written by TCG thread */
+static int64_t qemu_icount;
+
static QEMUTimer *icount_rt_timer;
static QEMUTimer *icount_vm_timer;
static QEMUTimer *icount_warp_timer;
-static int64_t vm_clock_warp_start;
-static int64_t qemu_icount;
typedef struct TimersState {
+ /* Protected by BQL. */
int64_t cpu_ticks_prev;
int64_t cpu_ticks_offset;
+
+ /* cpu_clock_offset can be read out of BQL, so protect it with
+ * this lock.
+ */
+ QemuSeqLock vm_clock_seqlock;
int64_t cpu_clock_offset;
int32_t cpu_ticks_enabled;
int64_t dummy;
@@ -120,7 +132,7 @@ typedef struct TimersState {
static TimersState timers_state;
/* Return the virtual CPU time, based on the instruction counter. */
-int64_t cpu_get_icount(void)
+static int64_t cpu_get_icount_locked(void)
{
int64_t icount;
CPUState *cpu = current_cpu;
@@ -136,7 +148,21 @@ int64_t cpu_get_icount(void)
return qemu_icount_bias + (icount << icount_time_shift);
}
+int64_t cpu_get_icount(void)
+{
+ int64_t icount;
+ unsigned start;
+
+ do {
+ start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
+ icount = cpu_get_icount_locked();
+ } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
+
+ return icount;
+}
+
/* return the host CPU cycle counter and handle stop/restart */
+/* Caller must hold the BQL */
int64_t cpu_get_ticks(void)
{
if (use_icount) {
@@ -157,37 +183,63 @@ int64_t cpu_get_ticks(void)
}
}
-/* return the host CPU monotonic timer and handle stop/restart */
-int64_t cpu_get_clock(void)
+static int64_t cpu_get_clock_locked(void)
{
int64_t ti;
+
if (!timers_state.cpu_ticks_enabled) {
- return timers_state.cpu_clock_offset;
+ ti = timers_state.cpu_clock_offset;
} else {
ti = get_clock();
- return ti + timers_state.cpu_clock_offset;
+ ti += timers_state.cpu_clock_offset;
}
+
+ return ti;
}
-/* enable cpu_get_ticks() */
+/* return the host CPU monotonic timer and handle stop/restart */
+int64_t cpu_get_clock(void)
+{
+ int64_t ti;
+ unsigned start;
+
+ do {
+ start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
+ ti = cpu_get_clock_locked();
+ } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
+
+ return ti;
+}
+
+/* enable cpu_get_ticks()
+ * Caller must hold BQL which server as mutex for vm_clock_seqlock.
+ */
void cpu_enable_ticks(void)
{
+ /* Here, the really thing protected by seqlock is cpu_clock_offset. */
+ seqlock_write_lock(&timers_state.vm_clock_seqlock);
if (!timers_state.cpu_ticks_enabled) {
timers_state.cpu_ticks_offset -= cpu_get_real_ticks();
timers_state.cpu_clock_offset -= get_clock();
timers_state.cpu_ticks_enabled = 1;
}
+ seqlock_write_unlock(&timers_state.vm_clock_seqlock);
}
/* disable cpu_get_ticks() : the clock is stopped. You must not call
- cpu_get_ticks() after that. */
+ * cpu_get_ticks() after that.
+ * Caller must hold BQL which server as mutex for vm_clock_seqlock.
+ */
void cpu_disable_ticks(void)
{
+ /* Here, the really thing protected by seqlock is cpu_clock_offset. */
+ seqlock_write_lock(&timers_state.vm_clock_seqlock);
if (timers_state.cpu_ticks_enabled) {
timers_state.cpu_ticks_offset = cpu_get_ticks();
- timers_state.cpu_clock_offset = cpu_get_clock();
+ timers_state.cpu_clock_offset = cpu_get_clock_locked();
timers_state.cpu_ticks_enabled = 0;
}
+ seqlock_write_unlock(&timers_state.vm_clock_seqlock);
}
/* Correlation between real and virtual time is always going to be
@@ -201,13 +253,19 @@ static void icount_adjust(void)
int64_t cur_time;
int64_t cur_icount;
int64_t delta;
+
+ /* Protected by TimersState mutex. */
static int64_t last_delta;
+
/* If the VM is not running, then do nothing. */
if (!runstate_is_running()) {
return;
}
- cur_time = cpu_get_clock();
- cur_icount = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+
+ seqlock_write_lock(&timers_state.vm_clock_seqlock);
+ cur_time = cpu_get_clock_locked();
+ cur_icount = cpu_get_icount_locked();
+
delta = cur_icount - cur_time;
/* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
if (delta > 0
@@ -224,6 +282,7 @@ static void icount_adjust(void)
}
last_delta = delta;
qemu_icount_bias = cur_icount - (qemu_icount << icount_time_shift);
+ seqlock_write_unlock(&timers_state.vm_clock_seqlock);
}
static void icount_adjust_rt(void *opaque)
@@ -248,30 +307,37 @@ static int64_t qemu_icount_round(int64_t count)
static void icount_warp_rt(void *opaque)
{
- if (vm_clock_warp_start == -1) {
+ /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
+ * changes from -1 to another value, so the race here is okay.
+ */
+ if (atomic_read(&vm_clock_warp_start) == -1) {
return;
}
+ seqlock_write_lock(&timers_state.vm_clock_seqlock);
if (runstate_is_running()) {
int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
- int64_t warp_delta = clock - vm_clock_warp_start;
- if (use_icount == 1) {
- qemu_icount_bias += warp_delta;
- } else {
+ int64_t warp_delta;
+
+ warp_delta = clock - vm_clock_warp_start;
+ if (use_icount == 2) {
/*
* In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
* far ahead of real time.
*/
- int64_t cur_time = cpu_get_clock();
- int64_t cur_icount = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+ int64_t cur_time = cpu_get_clock_locked();
+ int64_t cur_icount = cpu_get_icount_locked();
int64_t delta = cur_time - cur_icount;
- qemu_icount_bias += MIN(warp_delta, delta);
- }
- if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
- qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
+ warp_delta = MIN(warp_delta, delta);
}
+ qemu_icount_bias += warp_delta;
}
vm_clock_warp_start = -1;
+ seqlock_write_unlock(&timers_state.vm_clock_seqlock);
+
+ if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
+ qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
+ }
}
void qtest_clock_warp(int64_t dest)
@@ -281,7 +347,10 @@ void qtest_clock_warp(int64_t dest)
while (clock < dest) {
int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
int64_t warp = MIN(dest - clock, deadline);
+ seqlock_write_lock(&timers_state.vm_clock_seqlock);
qemu_icount_bias += warp;
+ seqlock_write_unlock(&timers_state.vm_clock_seqlock);
+
qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
}
@@ -290,6 +359,7 @@ void qtest_clock_warp(int64_t dest)
void qemu_clock_warp(QEMUClockType type)
{
+ int64_t clock;
int64_t deadline;
/*
@@ -309,8 +379,8 @@ void qemu_clock_warp(QEMUClockType type)
* the earliest QEMU_CLOCK_VIRTUAL timer.
*/
icount_warp_rt(NULL);
- if (!all_cpu_threads_idle() || !qemu_clock_has_timers(QEMU_CLOCK_VIRTUAL)) {
- timer_del(icount_warp_timer);
+ timer_del(icount_warp_timer);
+ if (!all_cpu_threads_idle()) {
return;
}
@@ -319,17 +389,11 @@ void qemu_clock_warp(QEMUClockType type)
return;
}
- vm_clock_warp_start = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
/* We want to use the earliest deadline from ALL vm_clocks */
+ clock = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
-
- /* Maintain prior (possibly buggy) behaviour where if no deadline
- * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
- * INT32_MAX nanoseconds ahead, we still use INT32_MAX
- * nanoseconds.
- */
- if ((deadline < 0) || (deadline > INT32_MAX)) {
- deadline = INT32_MAX;
+ if (deadline < 0) {
+ return;
}
if (deadline > 0) {
@@ -350,7 +414,12 @@ void qemu_clock_warp(QEMUClockType type)
* you will not be sending network packets continuously instead of
* every 100ms.
*/
- timer_mod(icount_warp_timer, vm_clock_warp_start + deadline);
+ seqlock_write_lock(&timers_state.vm_clock_seqlock);
+ if (vm_clock_warp_start == -1 || vm_clock_warp_start > clock) {
+ vm_clock_warp_start = clock;
+ }
+ seqlock_write_unlock(&timers_state.vm_clock_seqlock);
+ timer_mod_anticipate(icount_warp_timer, clock + deadline);
} else if (deadline == 0) {
qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
}
@@ -371,6 +440,7 @@ static const VMStateDescription vmstate_timers = {
void configure_icount(const char *option)
{
+ seqlock_init(&timers_state.vm_clock_seqlock, NULL);
vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
if (!option) {
return;
diff --git a/exec.c b/exec.c
index bea2cffd94..2e31ffcb2c 100644
--- a/exec.c
+++ b/exec.c
@@ -129,7 +129,6 @@ static PhysPageMap next_map;
static void io_mem_init(void);
static void memory_map_init(void);
-static void *qemu_safe_ram_ptr(ram_addr_t addr);
static MemoryRegion io_mem_watch;
#endif
@@ -626,22 +625,39 @@ void cpu_abort(CPUArchState *env, const char *fmt, ...)
}
#if !defined(CONFIG_USER_ONLY)
+static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
+{
+ RAMBlock *block;
+
+ /* The list is protected by the iothread lock here. */
+ block = ram_list.mru_block;
+ if (block && addr - block->offset < block->length) {
+ goto found;
+ }
+ QTAILQ_FOREACH(block, &ram_list.blocks, next) {
+ if (addr - block->offset < block->length) {
+ goto found;
+ }
+ }
+
+ fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
+ abort();
+
+found:
+ ram_list.mru_block = block;
+ return block;
+}
+
static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t end,
uintptr_t length)
{
- uintptr_t start1;
+ RAMBlock *block;
+ ram_addr_t start1;
- /* we modify the TLB cache so that the dirty bit will be set again
- when accessing the range */
- start1 = (uintptr_t)qemu_safe_ram_ptr(start);
- /* Check that we don't span multiple blocks - this breaks the
- address comparisons below. */
- if ((uintptr_t)qemu_safe_ram_ptr(end - 1) - start1
- != (end - 1) - start) {
- abort();
- }
+ block = qemu_get_ram_block(start);
+ assert(block == qemu_get_ram_block(end - 1));
+ start1 = (uintptr_t)block->host + (start - block->offset);
cpu_tlb_reset_dirty_all(start1, length);
-
}
/* Note: start and end must be within the same ram block. */
@@ -1269,29 +1285,6 @@ void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
}
#endif /* !_WIN32 */
-static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
-{
- RAMBlock *block;
-
- /* The list is protected by the iothread lock here. */
- block = ram_list.mru_block;
- if (block && addr - block->offset < block->length) {
- goto found;
- }
- QTAILQ_FOREACH(block, &ram_list.blocks, next) {
- if (addr - block->offset < block->length) {
- goto found;
- }
- }
-
- fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
- abort();
-
-found:
- ram_list.mru_block = block;
- return block;
-}
-
/* Return a host pointer to ram allocated with qemu_ram_alloc.
With the exception of the softmmu code in this file, this should
only be used for local memory (e.g. video ram) that the device owns,
@@ -1319,40 +1312,6 @@ void *qemu_get_ram_ptr(ram_addr_t addr)
return block->host + (addr - block->offset);
}
-/* Return a host pointer to ram allocated with qemu_ram_alloc. Same as
- * qemu_get_ram_ptr but do not touch ram_list.mru_block.
- *
- * ??? Is this still necessary?
- */
-static void *qemu_safe_ram_ptr(ram_addr_t addr)
-{
- RAMBlock *block;
-
- /* The list is protected by the iothread lock here. */
- QTAILQ_FOREACH(block, &ram_list.blocks, next) {
- if (addr - block->offset < block->length) {
- if (xen_enabled()) {
- /* We need to check if the requested address is in the RAM
- * because we don't want to map the entire memory in QEMU.
- * In that case just map until the end of the page.
- */
- if (block->offset == 0) {
- return xen_map_cache(addr, 0, 0);
- } else if (block->host == NULL) {
- block->host =
- xen_map_cache(block->offset, block->length, 1);
- }
- }
- return block->host + (addr - block->offset);
- }
- }
-
- fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
- abort();
-
- return NULL;
-}
-
/* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
* but takes a size argument */
static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size)
diff --git a/hw/display/cirrus_vga.c b/hw/display/cirrus_vga.c
index dbd1f4a47b..e4c345fa82 100644
--- a/hw/display/cirrus_vga.c
+++ b/hw/display/cirrus_vga.c
@@ -2447,7 +2447,6 @@ static uint64_t cirrus_vga_ioport_read(void *opaque, hwaddr addr,
VGACommonState *s = &c->vga;
int val, index;
- qemu_flush_coalesced_mmio_buffer();
addr += 0x3b0;
if (vga_ioport_invalid(s, addr)) {
@@ -2544,7 +2543,6 @@ static void cirrus_vga_ioport_write(void *opaque, hwaddr addr, uint64_t val,
VGACommonState *s = &c->vga;
int index;
- qemu_flush_coalesced_mmio_buffer();
addr += 0x3b0;
/* check port range access depending on color/monochrome mode */
@@ -2843,6 +2841,7 @@ static void cirrus_init_common(CirrusVGAState *s, Object *owner,
/* Register ioport 0x3b0 - 0x3df */
memory_region_init_io(&s->cirrus_vga_io, owner, &cirrus_vga_io_ops, s,
"cirrus-io", 0x30);
+ memory_region_set_flush_coalesced(&s->cirrus_vga_io);
memory_region_add_subregion(system_io, 0x3b0, &s->cirrus_vga_io);
memory_region_init(&s->low_mem_container, owner,
diff --git a/hw/display/qxl.c b/hw/display/qxl.c
index c2cea1ce88..de835d6af8 100644
--- a/hw/display/qxl.c
+++ b/hw/display/qxl.c
@@ -2073,6 +2073,7 @@ static int qxl_init_primary(PCIDevice *dev)
pci_address_space(dev), pci_address_space_io(dev), false);
portio_list_init(qxl_vga_port_list, OBJECT(dev), qxl_vga_portio_list,
vga, "vga");
+ portio_list_set_flush_coalesced(qxl_vga_port_list);
portio_list_add(qxl_vga_port_list, pci_address_space_io(dev), 0x3b0);
vga->con = graphic_console_init(DEVICE(dev), &qxl_ops, qxl);
diff --git a/hw/display/vga.c b/hw/display/vga.c
index 7b91d9c54e..b5e22849ab 100644
--- a/hw/display/vga.c
+++ b/hw/display/vga.c
@@ -359,8 +359,6 @@ uint32_t vga_ioport_read(void *opaque, uint32_t addr)
VGACommonState *s = opaque;
int val, index;
- qemu_flush_coalesced_mmio_buffer();
-
if (vga_ioport_invalid(s, addr)) {
val = 0xff;
} else {
@@ -453,8 +451,6 @@ void vga_ioport_write(void *opaque, uint32_t addr, uint32_t val)
VGACommonState *s = opaque;
int index;
- qemu_flush_coalesced_mmio_buffer();
-
/* check port range access depending on color/monochrome mode */
if (vga_ioport_invalid(s, addr)) {
return;
@@ -2373,6 +2369,7 @@ void vga_init(VGACommonState *s, Object *obj, MemoryRegion *address_space,
memory_region_set_coalescing(vga_io_memory);
if (init_vga_ports) {
portio_list_init(vga_port_list, obj, vga_ports, s, "vga");
+ portio_list_set_flush_coalesced(vga_port_list);
portio_list_add(vga_port_list, address_space_io, 0x3b0);
}
if (vbe_ports) {
diff --git a/include/exec/ioport.h b/include/exec/ioport.h
index b3848be684..3bd6722627 100644
--- a/include/exec/ioport.h
+++ b/include/exec/ioport.h
@@ -64,11 +64,13 @@ typedef struct PortioList {
struct MemoryRegion **regions;
void *opaque;
const char *name;
+ bool flush_coalesced_mmio;
} PortioList;
void portio_list_init(PortioList *piolist, Object *owner,
const struct MemoryRegionPortio *callbacks,
void *opaque, const char *name);
+void portio_list_set_flush_coalesced(PortioList *piolist);
void portio_list_destroy(PortioList *piolist);
void portio_list_add(PortioList *piolist,
struct MemoryRegion *address_space,
diff --git a/include/qemu/seqlock.h b/include/qemu/seqlock.h
new file mode 100644
index 0000000000..3ff118a1a1
--- /dev/null
+++ b/include/qemu/seqlock.h
@@ -0,0 +1,72 @@
+/*
+ * Seqlock implementation for QEMU
+ *
+ * Copyright Red Hat, Inc. 2013
+ *
+ * Author:
+ * Paolo Bonzini <pbonzini@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+#ifndef QEMU_SEQLOCK_H
+#define QEMU_SEQLOCK_H 1
+
+#include <qemu/atomic.h>
+#include <qemu/thread.h>
+
+typedef struct QemuSeqLock QemuSeqLock;
+
+struct QemuSeqLock {
+ QemuMutex *mutex;
+ unsigned sequence;
+};
+
+static inline void seqlock_init(QemuSeqLock *sl, QemuMutex *mutex)
+{
+ sl->mutex = mutex;
+ sl->sequence = 0;
+}
+
+/* Lock out other writers and update the count. */
+static inline void seqlock_write_lock(QemuSeqLock *sl)
+{
+ if (sl->mutex) {
+ qemu_mutex_lock(sl->mutex);
+ }
+ ++sl->sequence;
+
+ /* Write sequence before updating other fields. */
+ smp_wmb();
+}
+
+static inline void seqlock_write_unlock(QemuSeqLock *sl)
+{
+ /* Write other fields before finalizing sequence. */
+ smp_wmb();
+
+ ++sl->sequence;
+ if (sl->mutex) {
+ qemu_mutex_unlock(sl->mutex);
+ }
+}
+
+static inline unsigned seqlock_read_begin(QemuSeqLock *sl)
+{
+ /* Always fail if a write is in progress. */
+ unsigned ret = sl->sequence & ~1;
+
+ /* Read sequence before reading other fields. */
+ smp_rmb();
+ return ret;
+}
+
+static int seqlock_read_retry(const QemuSeqLock *sl, unsigned start)
+{
+ /* Read other fields before reading final sequence. */
+ smp_rmb();
+ return unlikely(sl->sequence != start);
+}
+
+#endif
diff --git a/include/qemu/thread-posix.h b/include/qemu/thread-posix.h
index 361566abc4..eb5c7a1da1 100644
--- a/include/qemu/thread-posix.h
+++ b/include/qemu/thread-posix.h
@@ -21,6 +21,14 @@ struct QemuSemaphore {
#endif
};
+struct QemuEvent {
+#ifndef __linux__
+ pthread_mutex_t lock;
+ pthread_cond_t cond;
+#endif
+ unsigned value;
+};
+
struct QemuThread {
pthread_t thread;
};
diff --git a/include/qemu/thread-win32.h b/include/qemu/thread-win32.h
index 13adb958f0..3d58081bed 100644
--- a/include/qemu/thread-win32.h
+++ b/include/qemu/thread-win32.h
@@ -17,6 +17,10 @@ struct QemuSemaphore {
HANDLE sema;
};
+struct QemuEvent {
+ HANDLE event;
+};
+
typedef struct QemuThreadData QemuThreadData;
struct QemuThread {
QemuThreadData *data;
diff --git a/include/qemu/thread.h b/include/qemu/thread.h
index c02404b9fb..3e32c6531c 100644
--- a/include/qemu/thread.h
+++ b/include/qemu/thread.h
@@ -7,6 +7,7 @@
typedef struct QemuMutex QemuMutex;
typedef struct QemuCond QemuCond;
typedef struct QemuSemaphore QemuSemaphore;
+typedef struct QemuEvent QemuEvent;
typedef struct QemuThread QemuThread;
#ifdef _WIN32
@@ -45,6 +46,12 @@ void qemu_sem_wait(QemuSemaphore *sem);
int qemu_sem_timedwait(QemuSemaphore *sem, int ms);
void qemu_sem_destroy(QemuSemaphore *sem);
+void qemu_event_init(QemuEvent *ev, bool init);
+void qemu_event_set(QemuEvent *ev);
+void qemu_event_reset(QemuEvent *ev);
+void qemu_event_wait(QemuEvent *ev);
+void qemu_event_destroy(QemuEvent *ev);
+
void qemu_thread_create(QemuThread *thread,
void *(*start_routine)(void *),
void *arg, int mode);
diff --git a/include/qemu/timer.h b/include/qemu/timer.h
index b58903bef5..5afcffc3f9 100644
--- a/include/qemu/timer.h
+++ b/include/qemu/timer.h
@@ -189,6 +189,12 @@ void qemu_clock_notify(QEMUClockType type);
* @enabled: true to enable, false to disable
*
* Enable or disable a clock
+ * Disabling the clock will wait for related timerlists to stop
+ * executing qemu_run_timers. Thus, this functions should not
+ * be used from the callback of a timer that is based on @clock.
+ * Doing so would cause a deadlock.
+ *
+ * Caller should hold BQL.
*/
void qemu_clock_enable(QEMUClockType type, bool enabled);
@@ -539,6 +545,19 @@ void timer_del(QEMUTimer *ts);
void timer_mod_ns(QEMUTimer *ts, int64_t expire_time);
/**
+ * timer_mod_anticipate_ns:
+ * @ts: the timer
+ * @expire_time: the expiry time in nanoseconds
+ *
+ * Modify a timer to expire at @expire_time or the current time,
+ * whichever comes earlier.
+ *
+ * This function is thread-safe but the timer and its timer list must not be
+ * freed while this function is running.
+ */
+void timer_mod_anticipate_ns(QEMUTimer *ts, int64_t expire_time);
+
+/**
* timer_mod:
* @ts: the timer
* @expire_time: the expire time in the units associated with the timer
@@ -552,6 +571,19 @@ void timer_mod_ns(QEMUTimer *ts, int64_t expire_time);
void timer_mod(QEMUTimer *ts, int64_t expire_timer);
/**
+ * timer_mod_anticipate:
+ * @ts: the timer
+ * @expire_time: the expiry time in nanoseconds
+ *
+ * Modify a timer to expire at @expire_time or the current time, whichever
+ * comes earlier, taking into account the scale associated with the timer.
+ *
+ * This function is thread-safe but the timer and its timer list must not be
+ * freed while this function is running.
+ */
+void timer_mod_anticipate(QEMUTimer *ts, int64_t expire_time);
+
+/**
* timer_pending:
* @ts: the timer
*
@@ -653,7 +685,9 @@ static inline int64_t qemu_soonest_timeout(int64_t timeout1, int64_t timeout2)
void init_clocks(void);
int64_t cpu_get_ticks(void);
+/* Caller must hold BQL */
void cpu_enable_ticks(void);
+/* Caller must hold BQL */
void cpu_disable_ticks(void);
static inline int64_t get_ticks_per_sec(void)
diff --git a/ioport.c b/ioport.c
index 707cce88ab..3d91e79edc 100644
--- a/ioport.c
+++ b/ioport.c
@@ -139,6 +139,12 @@ void portio_list_init(PortioList *piolist,
piolist->opaque = opaque;
piolist->owner = owner;
piolist->name = name;
+ piolist->flush_coalesced_mmio = false;
+}
+
+void portio_list_set_flush_coalesced(PortioList *piolist)
+{
+ piolist->flush_coalesced_mmio = true;
}
void portio_list_destroy(PortioList *piolist)
@@ -231,6 +237,9 @@ static void portio_list_add_1(PortioList *piolist,
*/
memory_region_init_io(&mrpio->mr, piolist->owner, &portio_ops, mrpio,
piolist->name, off_high - off_low);
+ if (piolist->flush_coalesced_mmio) {
+ memory_region_set_flush_coalesced(&mrpio->mr);
+ }
memory_region_add_subregion(piolist->address_space,
start + off_low, &mrpio->mr);
piolist->regions[piolist->nr] = &mrpio->mr;
diff --git a/memory.c b/memory.c
index 5a10fd0bde..7f1f2661a5 100644
--- a/memory.c
+++ b/memory.c
@@ -1809,7 +1809,9 @@ static void mtree_print_mr(fprintf_function mon_printf, void *f,
mr->alias->name,
mr->alias_offset,
mr->alias_offset
- + (hwaddr)int128_get64(mr->size) - 1);
+ + (int128_nz(mr->size) ?
+ (hwaddr)int128_get64(int128_sub(mr->size,
+ int128_one())) : 0));
} else {
mon_printf(f,
TARGET_FMT_plx "-" TARGET_FMT_plx " (prio %d, %c%c): %s\n",
diff --git a/qemu-timer.c b/qemu-timer.c
index 6b62e88669..e15ce477cc 100644
--- a/qemu-timer.c
+++ b/qemu-timer.c
@@ -45,6 +45,7 @@
/* timers */
typedef struct QEMUClock {
+ /* We rely on BQL to protect the timerlists */
QLIST_HEAD(, QEMUTimerList) timerlists;
NotifierList reset_notifiers;
@@ -71,6 +72,9 @@ struct QEMUTimerList {
QLIST_ENTRY(QEMUTimerList) list;
QEMUTimerListNotifyCB *notify_cb;
void *notify_opaque;
+
+ /* lightweight method to mark the end of timerlist's running */
+ QemuEvent timers_done_ev;
};
/**
@@ -99,6 +103,7 @@ QEMUTimerList *timerlist_new(QEMUClockType type,
QEMUClock *clock = qemu_clock_ptr(type);
timer_list = g_malloc0(sizeof(QEMUTimerList));
+ qemu_event_init(&timer_list->timers_done_ev, false);
timer_list->clock = clock;
timer_list->notify_cb = cb;
timer_list->notify_opaque = opaque;
@@ -143,13 +148,25 @@ void qemu_clock_notify(QEMUClockType type)
}
}
+/* Disabling the clock will wait for related timerlists to stop
+ * executing qemu_run_timers. Thus, this functions should not
+ * be used from the callback of a timer that is based on @clock.
+ * Doing so would cause a deadlock.
+ *
+ * Caller should hold BQL.
+ */
void qemu_clock_enable(QEMUClockType type, bool enabled)
{
QEMUClock *clock = qemu_clock_ptr(type);
+ QEMUTimerList *tl;
bool old = clock->enabled;
clock->enabled = enabled;
if (enabled && !old) {
qemu_clock_notify(type);
+ } else if (!enabled && old) {
+ QLIST_FOREACH(tl, &clock->timerlists, list) {
+ qemu_event_wait(&tl->timers_done_ev);
+ }
}
}
@@ -338,6 +355,34 @@ static void timer_del_locked(QEMUTimerList *timer_list, QEMUTimer *ts)
}
}
+static bool timer_mod_ns_locked(QEMUTimerList *timer_list,
+ QEMUTimer *ts, int64_t expire_time)
+{
+ QEMUTimer **pt, *t;
+
+ /* add the timer in the sorted list */
+ pt = &timer_list->active_timers;
+ for (;;) {
+ t = *pt;
+ if (!timer_expired_ns(t, expire_time)) {
+ break;
+ }
+ pt = &t->next;
+ }
+ ts->expire_time = MAX(expire_time, 0);
+ ts->next = *pt;
+ *pt = ts;
+
+ return pt == &timer_list->active_timers;
+}
+
+static void timerlist_rearm(QEMUTimerList *timer_list)
+{
+ /* Interrupt execution to force deadline recalculation. */
+ qemu_clock_warp(timer_list->clock->type);
+ timerlist_notify(timer_list);
+}
+
/* stop a timer, but do not dealloc it */
void timer_del(QEMUTimer *ts)
{
@@ -353,30 +398,39 @@ void timer_del(QEMUTimer *ts)
void timer_mod_ns(QEMUTimer *ts, int64_t expire_time)
{
QEMUTimerList *timer_list = ts->timer_list;
- QEMUTimer **pt, *t;
+ bool rearm;
qemu_mutex_lock(&timer_list->active_timers_lock);
timer_del_locked(timer_list, ts);
+ rearm = timer_mod_ns_locked(timer_list, ts, expire_time);
+ qemu_mutex_unlock(&timer_list->active_timers_lock);
- /* add the timer in the sorted list */
- pt = &timer_list->active_timers;
- for(;;) {
- t = *pt;
- if (!timer_expired_ns(t, expire_time)) {
- break;
+ if (rearm) {
+ timerlist_rearm(timer_list);
+ }
+}
+
+/* modify the current timer so that it will be fired when current_time
+ >= expire_time or the current deadline, whichever comes earlier.
+ The corresponding callback will be called. */
+void timer_mod_anticipate_ns(QEMUTimer *ts, int64_t expire_time)
+{
+ QEMUTimerList *timer_list = ts->timer_list;
+ bool rearm;
+
+ qemu_mutex_lock(&timer_list->active_timers_lock);
+ if (ts->expire_time == -1 || ts->expire_time > expire_time) {
+ if (ts->expire_time != -1) {
+ timer_del_locked(timer_list, ts);
}
- pt = &t->next;
+ rearm = timer_mod_ns_locked(timer_list, ts, expire_time);
+ } else {
+ rearm = false;
}
- ts->expire_time = MAX(expire_time, 0);
- ts->next = *pt;
- *pt = ts;
qemu_mutex_unlock(&timer_list->active_timers_lock);
- /* Rearm if necessary */
- if (pt == &timer_list->active_timers) {
- /* Interrupt execution to force deadline recalculation. */
- qemu_clock_warp(timer_list->clock->type);
- timerlist_notify(timer_list);
+ if (rearm) {
+ timerlist_rearm(timer_list);
}
}
@@ -385,6 +439,11 @@ void timer_mod(QEMUTimer *ts, int64_t expire_time)
timer_mod_ns(ts, expire_time * ts->scale);
}
+void timer_mod_anticipate(QEMUTimer *ts, int64_t expire_time)
+{
+ timer_mod_anticipate_ns(ts, expire_time * ts->scale);
+}
+
bool timer_pending(QEMUTimer *ts)
{
return ts->expire_time >= 0;
@@ -403,8 +462,9 @@ bool timerlist_run_timers(QEMUTimerList *timer_list)
QEMUTimerCB *cb;
void *opaque;
+ qemu_event_reset(&timer_list->timers_done_ev);
if (!timer_list->clock->enabled) {
- return progress;
+ goto out;
}
current_time = qemu_clock_get_ns(timer_list->clock->type);
@@ -428,6 +488,9 @@ bool timerlist_run_timers(QEMUTimerList *timer_list)
cb(opaque);
progress = true;
}
+
+out:
+ qemu_event_set(&timer_list->timers_done_ev);
return progress;
}
diff --git a/util/compatfd.c b/util/compatfd.c
index 9cf3f2834d..430a41c855 100644
--- a/util/compatfd.c
+++ b/util/compatfd.c
@@ -15,9 +15,9 @@
#include "qemu-common.h"
#include "qemu/compatfd.h"
+#include "qemu/thread.h"
#include <sys/syscall.h>
-#include <pthread.h>
struct sigfd_compat_info
{
@@ -28,10 +28,6 @@ struct sigfd_compat_info
static void *sigwait_compat(void *opaque)
{
struct sigfd_compat_info *info = opaque;
- sigset_t all;
-
- sigfillset(&all);
- pthread_sigmask(SIG_BLOCK, &all, NULL);
while (1) {
int sig;
@@ -71,9 +67,8 @@ static void *sigwait_compat(void *opaque)
static int qemu_signalfd_compat(const sigset_t *mask)
{
- pthread_attr_t attr;
- pthread_t tid;
struct sigfd_compat_info *info;
+ QemuThread thread;
int fds[2];
info = malloc(sizeof(*info));
@@ -93,12 +88,7 @@ static int qemu_signalfd_compat(const sigset_t *mask)
memcpy(&info->mask, mask, sizeof(*mask));
info->fd = fds[1];
- pthread_attr_init(&attr);
- pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
-
- pthread_create(&tid, &attr, sigwait_compat, info);
-
- pthread_attr_destroy(&attr);
+ qemu_thread_create(&thread, sigwait_compat, info, QEMU_THREAD_DETACHED);
return fds[0];
}
diff --git a/util/qemu-thread-posix.c b/util/qemu-thread-posix.c
index 4de133e7b2..37dd298631 100644
--- a/util/qemu-thread-posix.c
+++ b/util/qemu-thread-posix.c
@@ -20,7 +20,12 @@
#include <limits.h>
#include <unistd.h>
#include <sys/time.h>
+#ifdef __linux__
+#include <sys/syscall.h>
+#include <linux/futex.h>
+#endif
#include "qemu/thread.h"
+#include "qemu/atomic.h"
static void error_exit(int err, const char *msg)
{
@@ -272,6 +277,117 @@ void qemu_sem_wait(QemuSemaphore *sem)
#endif
}
+#ifdef __linux__
+#define futex(...) syscall(__NR_futex, __VA_ARGS__)
+
+static inline void futex_wake(QemuEvent *ev, int n)
+{
+ futex(ev, FUTEX_WAKE, n, NULL, NULL, 0);
+}
+
+static inline void futex_wait(QemuEvent *ev, unsigned val)
+{
+ futex(ev, FUTEX_WAIT, (int) val, NULL, NULL, 0);
+}
+#else
+static inline void futex_wake(QemuEvent *ev, int n)
+{
+ if (n == 1) {
+ pthread_cond_signal(&ev->cond);
+ } else {
+ pthread_cond_broadcast(&ev->cond);
+ }
+}
+
+static inline void futex_wait(QemuEvent *ev, unsigned val)
+{
+ pthread_mutex_lock(&ev->lock);
+ if (ev->value == val) {
+ pthread_cond_wait(&ev->cond, &ev->lock);
+ }
+ pthread_mutex_unlock(&ev->lock);
+}
+#endif
+
+/* Valid transitions:
+ * - free->set, when setting the event
+ * - busy->set, when setting the event, followed by futex_wake
+ * - set->free, when resetting the event
+ * - free->busy, when waiting
+ *
+ * set->busy does not happen (it can be observed from the outside but
+ * it really is set->free->busy).
+ *
+ * busy->free provably cannot happen; to enforce it, the set->free transition
+ * is done with an OR, which becomes a no-op if the event has concurrently
+ * transitioned to free or busy.
+ */
+
+#define EV_SET 0
+#define EV_FREE 1
+#define EV_BUSY -1
+
+void qemu_event_init(QemuEvent *ev, bool init)
+{
+#ifndef __linux__
+ pthread_mutex_init(&ev->lock, NULL);
+ pthread_cond_init(&ev->cond, NULL);
+#endif
+
+ ev->value = (init ? EV_SET : EV_FREE);
+}
+
+void qemu_event_destroy(QemuEvent *ev)
+{
+#ifndef __linux__
+ pthread_mutex_destroy(&ev->lock);
+ pthread_cond_destroy(&ev->cond);
+#endif
+}
+
+void qemu_event_set(QemuEvent *ev)
+{
+ if (atomic_mb_read(&ev->value) != EV_SET) {
+ if (atomic_xchg(&ev->value, EV_SET) == EV_BUSY) {
+ /* There were waiters, wake them up. */
+ futex_wake(ev, INT_MAX);
+ }
+ }
+}
+
+void qemu_event_reset(QemuEvent *ev)
+{
+ if (atomic_mb_read(&ev->value) == EV_SET) {
+ /*
+ * If there was a concurrent reset (or even reset+wait),
+ * do nothing. Otherwise change EV_SET->EV_FREE.
+ */
+ atomic_or(&ev->value, EV_FREE);
+ }
+}
+
+void qemu_event_wait(QemuEvent *ev)
+{
+ unsigned value;
+
+ value = atomic_mb_read(&ev->value);
+ if (value != EV_SET) {
+ if (value == EV_FREE) {
+ /*
+ * Leave the event reset and tell qemu_event_set that there
+ * are waiters. No need to retry, because there cannot be
+ * a concurent busy->free transition. After the CAS, the
+ * event will be either set or busy.
+ */
+ if (atomic_cmpxchg(&ev->value, EV_FREE, EV_BUSY) == EV_SET) {
+ return;
+ }
+ }
+ futex_wait(ev, EV_BUSY);
+ }
+}
+
+
void qemu_thread_create(QemuThread *thread,
void *(*start_routine)(void*),
void *arg, int mode)
diff --git a/util/qemu-thread-win32.c b/util/qemu-thread-win32.c
index 517878dcc1..27a5217769 100644
--- a/util/qemu-thread-win32.c
+++ b/util/qemu-thread-win32.c
@@ -227,6 +227,32 @@ void qemu_sem_wait(QemuSemaphore *sem)
}
}
+void qemu_event_init(QemuEvent *ev, bool init)
+{
+ /* Manual reset. */
+ ev->event = CreateEvent(NULL, TRUE, init, NULL);
+}
+
+void qemu_event_destroy(QemuEvent *ev)
+{
+ CloseHandle(ev->event);
+}
+
+void qemu_event_set(QemuEvent *ev)
+{
+ SetEvent(ev->event);
+}
+
+void qemu_event_reset(QemuEvent *ev)
+{
+ ResetEvent(ev->event);
+}
+
+void qemu_event_wait(QemuEvent *ev)
+{
+ WaitForSingleObject(ev->event, INFINITE);
+}
+
struct QemuThreadData {
/* Passed to win32_start_routine. */
void *(*start_routine)(void *);