Merge remote-tracking branch 'remotes/rth/tags/pull-tcg-20160611' into staging

TB hashing improvements # gpg: Signature made Sun 12 Jun 2016 01:12:50 BST # gpg: using RSA key 0xAD1270CC4DD0279B # gpg: Good signature from "Richard Henderson <rth7680@gmail.com>" # gpg: aka "Richard Henderson <rth@redhat.com>" # gpg: aka "Richard Henderson <rth@twiddle.net>" # Primary key fingerprint: 9CB1 8DDA F8E8 49AD 2AFC 16A4 AD12 70CC 4DD0 279B * remotes/rth/tags/pull-tcg-20160611: translate-all: add tb hash bucket info to 'info jit' dump tb hash: track translated blocks with qht qht: add test-qht-par to invoke qht-bench from 'check' target qht: add qht-bench, a performance benchmark qht: add test program qht: QEMU's fast, resizable and scalable Hash Table qdist: add test program qdist: add module to represent frequency distributions of data tb hash: hash phys_pc, pc, and flags with xxhash exec: add tb_hash_func5, derived from xxhash qemu-thread: add simple test-and-set spinlock include/processor.h: define cpu_relax() seqlock: rename write_lock/unlock to write_begin/end seqlock: remove optional mutex compiler.h: add QEMU_ALIGNED() to enforce struct alignment Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
author: Peter Maydell <peter.maydell@linaro.org> 2016-06-13 10:12:44 +0100
committer: Peter Maydell <peter.maydell@linaro.org> 2016-06-13 10:12:44 +0100
commit: da2fdd0bd1514a44309dd5be162ebfb6c166a716 (patch)
tree: 6c7a1856e3889fd91bcaedd2c89f6cebb702e805
parent: a93c1bdf0bd4689287094ddb2aae3dc907da3535 (diff)
parent: 329844d4bc3d5a11f1e63938d66f74c9584c7abc (diff)
download: qemu-da2fdd0bd1514a44309dd5be162ebfb6c166a716.tar.gz
22 files changed, 2893 insertions, 132 deletions
diff --git a/cpu-exec.c b/cpu-exec.c
index f7c642f4a9..b840e1d2dd 100644
--- a/cpu-exec.c
+++ b/cpu-exec.c
@@ -225,57 +225,57 @@ static void cpu_exec_nocache(CPUState *cpu, int max_cycles,
 }
 #endif
 
-static TranslationBlock *tb_find_physical(CPUState *cpu,
-                                          target_ulong pc,
-                                          target_ulong cs_base,
-                                          uint32_t flags)
+struct tb_desc {
+    target_ulong pc;
+    target_ulong cs_base;
+    CPUArchState *env;
+    tb_page_addr_t phys_page1;
+    uint32_t flags;
+};
+
+static bool tb_cmp(const void *p, const void *d)
 {
-    CPUArchState *env = (CPUArchState *)cpu->env_ptr;
-    TranslationBlock *tb, **tb_hash_head, **ptb1;
-    unsigned int h;
-    tb_page_addr_t phys_pc, phys_page1;
-
-    /* find translated block using physical mappings */
-    phys_pc = get_page_addr_code(env, pc);
-    phys_page1 = phys_pc & TARGET_PAGE_MASK;
-    h = tb_phys_hash_func(phys_pc);
-
-    /* Start at head of the hash entry */
-    ptb1 = tb_hash_head = &tcg_ctx.tb_ctx.tb_phys_hash[h];
-    tb = *ptb1;
-
-    while (tb) {
-        if (tb->pc == pc &&
-            tb->page_addr[0] == phys_page1 &&
-            tb->cs_base == cs_base &&
-            tb->flags == flags) {
-
-            if (tb->page_addr[1] == -1) {
-                /* done, we have a match */
-                break;
-            } else {
-                /* check next page if needed */
-                target_ulong virt_page2 = (pc & TARGET_PAGE_MASK) +
-                                          TARGET_PAGE_SIZE;
-                tb_page_addr_t phys_page2 = get_page_addr_code(env, virt_page2);
-
-                if (tb->page_addr[1] == phys_page2) {
-                    break;
-                }
+    const TranslationBlock *tb = p;
+    const struct tb_desc *desc = d;
+
+    if (tb->pc == desc->pc &&
+        tb->page_addr[0] == desc->phys_page1 &&
+        tb->cs_base == desc->cs_base &&
+        tb->flags == desc->flags) {
+        /* check next page if needed */
+        if (tb->page_addr[1] == -1) {
+            return true;
+        } else {
+            tb_page_addr_t phys_page2;
+            target_ulong virt_page2;
+
+            virt_page2 = (desc->pc & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
+            phys_page2 = get_page_addr_code(desc->env, virt_page2);
+            if (tb->page_addr[1] == phys_page2) {
+                return true;
             }
         }
-
-        ptb1 = &tb->phys_hash_next;
-        tb = *ptb1;
     }
+    return false;
+}
 
-    if (tb) {
-        /* Move the TB to the head of the list */
-        *ptb1 = tb->phys_hash_next;
-        tb->phys_hash_next = *tb_hash_head;
-        *tb_hash_head = tb;
-    }
-    return tb;
+static TranslationBlock *tb_find_physical(CPUState *cpu,
+                                          target_ulong pc,
+                                          target_ulong cs_base,
+                                          uint32_t flags)
+{
+    tb_page_addr_t phys_pc;
+    struct tb_desc desc;
+    uint32_t h;
+
+    desc.env = (CPUArchState *)cpu->env_ptr;
+    desc.cs_base = cs_base;
+    desc.flags = flags;
+    desc.pc = pc;
+    phys_pc = get_page_addr_code(desc.env, pc);
+    desc.phys_page1 = phys_pc & TARGET_PAGE_MASK;
+    h = tb_hash_func(phys_pc, pc, flags);
+    return qht_lookup(&tcg_ctx.tb_ctx.htable, tb_cmp, &desc, h);
 }
 
 static TranslationBlock *tb_find_slow(CPUState *cpu,
diff --git a/cpus.c b/cpus.c
index 326742f445..84c3520d44 100644
--- a/cpus.c
+++ b/cpus.c
@@ -249,13 +249,13 @@ int64_t cpu_get_clock(void)
 void cpu_enable_ticks(void)
 {
     /* Here, the really thing protected by seqlock is cpu_clock_offset. */
-    seqlock_write_lock(&timers_state.vm_clock_seqlock);
+    seqlock_write_begin(&timers_state.vm_clock_seqlock);
     if (!timers_state.cpu_ticks_enabled) {
         timers_state.cpu_ticks_offset -= cpu_get_host_ticks();
         timers_state.cpu_clock_offset -= get_clock();
         timers_state.cpu_ticks_enabled = 1;
     }
-    seqlock_write_unlock(&timers_state.vm_clock_seqlock);
+    seqlock_write_end(&timers_state.vm_clock_seqlock);
 }
 
 /* disable cpu_get_ticks() : the clock is stopped. You must not call
@@ -265,13 +265,13 @@ void cpu_enable_ticks(void)
 void cpu_disable_ticks(void)
 {
     /* Here, the really thing protected by seqlock is cpu_clock_offset. */
-    seqlock_write_lock(&timers_state.vm_clock_seqlock);
+    seqlock_write_begin(&timers_state.vm_clock_seqlock);
     if (timers_state.cpu_ticks_enabled) {
         timers_state.cpu_ticks_offset += cpu_get_host_ticks();
         timers_state.cpu_clock_offset = cpu_get_clock_locked();
         timers_state.cpu_ticks_enabled = 0;
     }
-    seqlock_write_unlock(&timers_state.vm_clock_seqlock);
+    seqlock_write_end(&timers_state.vm_clock_seqlock);
 }
 
 /* Correlation between real and virtual time is always going to be
@@ -294,7 +294,7 @@ static void icount_adjust(void)
         return;
     }
 
-    seqlock_write_lock(&timers_state.vm_clock_seqlock);
+    seqlock_write_begin(&timers_state.vm_clock_seqlock);
     cur_time = cpu_get_clock_locked();
     cur_icount = cpu_get_icount_locked();
 
@@ -315,7 +315,7 @@ static void icount_adjust(void)
     last_delta = delta;
     timers_state.qemu_icount_bias = cur_icount
                               - (timers_state.qemu_icount << icount_time_shift);
-    seqlock_write_unlock(&timers_state.vm_clock_seqlock);
+    seqlock_write_end(&timers_state.vm_clock_seqlock);
 }
 
 static void icount_adjust_rt(void *opaque)
@@ -355,7 +355,7 @@ static void icount_warp_rt(void)
         return;
     }
 
-    seqlock_write_lock(&timers_state.vm_clock_seqlock);
+    seqlock_write_begin(&timers_state.vm_clock_seqlock);
     if (runstate_is_running()) {
         int64_t clock = REPLAY_CLOCK(REPLAY_CLOCK_VIRTUAL_RT,
                                      cpu_get_clock_locked());
@@ -374,7 +374,7 @@ static void icount_warp_rt(void)
         timers_state.qemu_icount_bias += warp_delta;
     }
     vm_clock_warp_start = -1;
-    seqlock_write_unlock(&timers_state.vm_clock_seqlock);
+    seqlock_write_end(&timers_state.vm_clock_seqlock);
 
     if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
         qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
@@ -399,9 +399,9 @@ void qtest_clock_warp(int64_t dest)
         int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
         int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
 
-        seqlock_write_lock(&timers_state.vm_clock_seqlock);
+        seqlock_write_begin(&timers_state.vm_clock_seqlock);
         timers_state.qemu_icount_bias += warp;
-        seqlock_write_unlock(&timers_state.vm_clock_seqlock);
+        seqlock_write_end(&timers_state.vm_clock_seqlock);
 
         qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
         timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
@@ -468,9 +468,9 @@ void qemu_start_warp_timer(void)
              * It is useful when we want a deterministic execution time,
              * isolated from host latencies.
              */
-            seqlock_write_lock(&timers_state.vm_clock_seqlock);
+            seqlock_write_begin(&timers_state.vm_clock_seqlock);
             timers_state.qemu_icount_bias += deadline;
-            seqlock_write_unlock(&timers_state.vm_clock_seqlock);
+            seqlock_write_end(&timers_state.vm_clock_seqlock);
             qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
         } else {
             /*
@@ -481,11 +481,11 @@ void qemu_start_warp_timer(void)
              * you will not be sending network packets continuously instead of
              * every 100ms.
              */
-            seqlock_write_lock(&timers_state.vm_clock_seqlock);
+            seqlock_write_begin(&timers_state.vm_clock_seqlock);
             if (vm_clock_warp_start == -1 || vm_clock_warp_start > clock) {
                 vm_clock_warp_start = clock;
             }
-            seqlock_write_unlock(&timers_state.vm_clock_seqlock);
+            seqlock_write_end(&timers_state.vm_clock_seqlock);
             timer_mod_anticipate(icount_warp_timer, clock + deadline);
         }
     } else if (deadline == 0) {
@@ -621,7 +621,7 @@ int cpu_throttle_get_percentage(void)
 
 void cpu_ticks_init(void)
 {
-    seqlock_init(&timers_state.vm_clock_seqlock, NULL);
+    seqlock_init(&timers_state.vm_clock_seqlock);
     vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
     throttle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
                                            cpu_throttle_timer_tick, NULL);
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index e076397e2f..c1f59fa59d 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -215,8 +215,6 @@ struct TranslationBlock {
 
     void *tc_ptr;    /* pointer to the translated code */
     uint8_t *tc_search;  /* pointer to search data */
-    /* next matching tb for physical address. */
-    struct TranslationBlock *phys_hash_next;
     /* original tb when cflags has CF_NOCACHE */
     struct TranslationBlock *orig_tb;
     /* first and second physical page containing code. The lower bit
diff --git a/include/exec/tb-context.h b/include/exec/tb-context.h
index 5efe3d9087..e209c1c28c 100644
--- a/include/exec/tb-context.h
+++ b/include/exec/tb-context.h
@@ -21,9 +21,10 @@
 #define QEMU_TB_CONTEXT_H_
 
 #include "qemu/thread.h"
+#include "qemu/qht.h"
 
-#define CODE_GEN_PHYS_HASH_BITS     15
-#define CODE_GEN_PHYS_HASH_SIZE     (1 << CODE_GEN_PHYS_HASH_BITS)
+#define CODE_GEN_HTABLE_BITS     15
+#define CODE_GEN_HTABLE_SIZE     (1 << CODE_GEN_HTABLE_BITS)
 
 typedef struct TranslationBlock TranslationBlock;
 typedef struct TBContext TBContext;
@@ -31,7 +32,7 @@ typedef struct TBContext TBContext;
 struct TBContext {
 
     TranslationBlock *tbs;
-    TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE];
+    struct qht htable;
     int nb_tbs;
     /* any access to the tbs or the page table must use this lock */
     QemuMutex tb_lock;
diff --git a/include/exec/tb-hash-xx.h b/include/exec/tb-hash-xx.h
new file mode 100644
index 0000000000..9f3fc05636
--- /dev/null
+++ b/include/exec/tb-hash-xx.h
@@ -0,0 +1,94 @@
+/*
+ * xxHash - Fast Hash algorithm
+ * Copyright (C) 2012-2016, Yann Collet
+ *
+ * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * + Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * + Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * You can contact the author at :
+ * - xxHash source repository : https://github.com/Cyan4973/xxHash
+ */
+#ifndef EXEC_TB_HASH_XX
+#define EXEC_TB_HASH_XX
+
+#include <qemu/bitops.h>
+
+#define PRIME32_1   2654435761U
+#define PRIME32_2   2246822519U
+#define PRIME32_3   3266489917U
+#define PRIME32_4    668265263U
+#define PRIME32_5    374761393U
+
+#define TB_HASH_XX_SEED 1
+
+/*
+ * xxhash32, customized for input variables that are not guaranteed to be
+ * contiguous in memory.
+ */
+static inline
+uint32_t tb_hash_func5(uint64_t a0, uint64_t b0, uint32_t e)
+{
+    uint32_t v1 = TB_HASH_XX_SEED + PRIME32_1 + PRIME32_2;
+    uint32_t v2 = TB_HASH_XX_SEED + PRIME32_2;
+    uint32_t v3 = TB_HASH_XX_SEED + 0;
+    uint32_t v4 = TB_HASH_XX_SEED - PRIME32_1;
+    uint32_t a = a0 >> 32;
+    uint32_t b = a0;
+    uint32_t c = b0 >> 32;
+    uint32_t d = b0;
+    uint32_t h32;
+
+    v1 += a * PRIME32_2;
+    v1 = rol32(v1, 13);
+    v1 *= PRIME32_1;
+
+    v2 += b * PRIME32_2;
+    v2 = rol32(v2, 13);
+    v2 *= PRIME32_1;
+
+    v3 += c * PRIME32_2;
+    v3 = rol32(v3, 13);
+    v3 *= PRIME32_1;
+
+    v4 += d * PRIME32_2;
+    v4 = rol32(v4, 13);
+    v4 *= PRIME32_1;
+
+    h32 = rol32(v1, 1) + rol32(v2, 7) + rol32(v3, 12) + rol32(v4, 18);
+    h32 += 20;
+
+    h32 += e * PRIME32_3;
+    h32  = rol32(h32, 17) * PRIME32_4;
+
+    h32 ^= h32 >> 15;
+    h32 *= PRIME32_2;
+    h32 ^= h32 >> 13;
+    h32 *= PRIME32_3;
+    h32 ^= h32 >> 16;
+
+    return h32;
+}
+
+#endif /* EXEC_TB_HASH_XX */
diff --git a/include/exec/tb-hash.h b/include/exec/tb-hash.h
index 0f4e8a08af..1d0200bc91 100644
--- a/include/exec/tb-hash.h
+++ b/include/exec/tb-hash.h
@@ -20,6 +20,8 @@
 #ifndef EXEC_TB_HASH
 #define EXEC_TB_HASH
 
+#include "exec/tb-hash-xx.h"
+
 /* Only the bottom TB_JMP_PAGE_BITS of the jump cache hash bits vary for
    addresses on the same page.  The top bits are the same.  This allows
    TLB invalidation to quickly clear a subset of the hash table.  */
@@ -43,9 +45,10 @@ static inline unsigned int tb_jmp_cache_hash_func(target_ulong pc)
            | (tmp & TB_JMP_ADDR_MASK));
 }
 
-static inline unsigned int tb_phys_hash_func(tb_page_addr_t pc)
+static inline
+uint32_t tb_hash_func(tb_page_addr_t phys_pc, target_ulong pc, uint32_t flags)
 {
-    return (pc >> 2) & (CODE_GEN_PHYS_HASH_SIZE - 1);
+    return tb_hash_func5(phys_pc, pc, flags);
 }
 
 #endif
diff --git a/include/qemu/compiler.h b/include/qemu/compiler.h
index 8f1cc7ba67..b64f899870 100644
--- a/include/qemu/compiler.h
+++ b/include/qemu/compiler.h
@@ -41,6 +41,8 @@
 # define QEMU_PACKED __attribute__((packed))
 #endif
 
+#define QEMU_ALIGNED(X) __attribute__((aligned(X)))
+
 #ifndef glue
 #define xglue(x, y) x ## y
 #define glue(x, y) xglue(x, y)
diff --git a/include/qemu/processor.h b/include/qemu/processor.h
new file mode 100644
index 0000000000..8b2570283a
--- /dev/null
+++ b/include/qemu/processor.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (C) 2016, Emilio G. Cota <cota@braap.org>
+ *
+ * License: GNU GPL, version 2.
+ *   See the COPYING file in the top-level directory.
+ */
+#ifndef QEMU_PROCESSOR_H
+#define QEMU_PROCESSOR_H
+
+#include "qemu/atomic.h"
+
+#if defined(__i386__) || defined(__x86_64__)
+# define cpu_relax() asm volatile("rep; nop" ::: "memory")
+
+#elif defined(__ia64__)
+# define cpu_relax() asm volatile("hint @pause" ::: "memory")
+
+#elif defined(__aarch64__)
+# define cpu_relax() asm volatile("yield" ::: "memory")
+
+#elif defined(__powerpc64__)
+/* set Hardware Multi-Threading (HMT) priority to low; then back to medium */
+# define cpu_relax() asm volatile("or 1, 1, 1;" \
+                                  "or 2, 2, 2;" ::: "memory")
+
+#else
+# define cpu_relax() barrier()
+#endif
+
+#endif /* QEMU_PROCESSOR_H */
diff --git a/include/qemu/qdist.h b/include/qemu/qdist.h
new file mode 100644
index 0000000000..f30050c2d1
--- /dev/null
+++ b/include/qemu/qdist.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (C) 2016, Emilio G. Cota <cota@braap.org>
+ *
+ * License: GNU GPL, version 2 or later.
+ *   See the COPYING file in the top-level directory.
+ */
+#ifndef QEMU_QDIST_H
+#define QEMU_QDIST_H
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "qemu/bitops.h"
+
+/*
+ * Samples with the same 'x value' end up in the same qdist_entry,
+ * e.g. inc(0.1) and inc(0.1) end up as {x=0.1, count=2}.
+ *
+ * Binning happens only at print time, so that we retain the flexibility to
+ * choose the binning. This might not be ideal for workloads that do not care
+ * much about precision and insert many samples all with different x values;
+ * in that case, pre-binning (e.g. entering both 0.115 and 0.097 as 0.1)
+ * should be considered.
+ */
+struct qdist_entry {
+    double x;
+    unsigned long count;
+};
+
+struct qdist {
+    struct qdist_entry *entries;
+    size_t n;
+    size_t size;
+};
+
+#define QDIST_PR_BORDER     BIT(0)
+#define QDIST_PR_LABELS     BIT(1)
+/* the remaining options only work if PR_LABELS is set */
+#define QDIST_PR_NODECIMAL  BIT(2)
+#define QDIST_PR_PERCENT    BIT(3)
+#define QDIST_PR_100X       BIT(4)
+#define QDIST_PR_NOBINRANGE BIT(5)
+
+void qdist_init(struct qdist *dist);
+void qdist_destroy(struct qdist *dist);
+
+void qdist_add(struct qdist *dist, double x, long count);
+void qdist_inc(struct qdist *dist, double x);
+double qdist_xmin(const struct qdist *dist);
+double qdist_xmax(const struct qdist *dist);
+double qdist_avg(const struct qdist *dist);
+unsigned long qdist_sample_count(const struct qdist *dist);
+size_t qdist_unique_entries(const struct qdist *dist);
+
+/* callers must free the returned string with g_free() */
+char *qdist_pr_plain(const struct qdist *dist, size_t n_groups);
+
+/* callers must free the returned string with g_free() */
+char *qdist_pr(const struct qdist *dist, size_t n_groups, uint32_t opt);
+
+/* Only qdist code and test code should ever call this function */
+void qdist_bin__internal(struct qdist *to, const struct qdist *from, size_t n);
+
+#endif /* QEMU_QDIST_H */
diff --git a/include/qemu/qht.h b/include/qemu/qht.h
new file mode 100644
index 0000000000..aec60aa534
--- /dev/null
+++ b/include/qemu/qht.h
@@ -0,0 +1,183 @@
+/*
+ * Copyright (C) 2016, Emilio G. Cota <cota@braap.org>
+ *
+ * License: GNU GPL, version 2 or later.
+ *   See the COPYING file in the top-level directory.
+ */
+#ifndef QEMU_QHT_H
+#define QEMU_QHT_H
+
+#include "qemu/osdep.h"
+#include "qemu/seqlock.h"
+#include "qemu/thread.h"
+#include "qemu/qdist.h"
+
+struct qht {
+    struct qht_map *map;
+    QemuMutex lock; /* serializes setters of ht->map */
+    unsigned int mode;
+};
+
+/**
+ * struct qht_stats - Statistics of a QHT
+ * @head_buckets: number of head buckets
+ * @used_head_buckets: number of non-empty head buckets
+ * @entries: total number of entries
+ * @chain: frequency distribution representing the number of buckets in each
+ *         chain, excluding empty chains.
+ * @occupancy: frequency distribution representing chain occupancy rate.
+ *             Valid range: from 0.0 (empty) to 1.0 (full occupancy).
+ *
+ * An entry is a pointer-hash pair.
+ * Each bucket can host several entries.
+ * Chains are chains of buckets, whose first link is always a head bucket.
+ */
+struct qht_stats {
+    size_t head_buckets;
+    size_t used_head_buckets;
+    size_t entries;
+    struct qdist chain;
+    struct qdist occupancy;
+};
+
+typedef bool (*qht_lookup_func_t)(const void *obj, const void *userp);
+typedef void (*qht_iter_func_t)(struct qht *ht, void *p, uint32_t h, void *up);
+
+#define QHT_MODE_AUTO_RESIZE 0x1 /* auto-resize when heavily loaded */
+
+/**
+ * qht_init - Initialize a QHT
+ * @ht: QHT to be initialized
+ * @n_elems: number of entries the hash table should be optimized for.
+ * @mode: bitmask with OR'ed QHT_MODE_*
+ */
+void qht_init(struct qht *ht, size_t n_elems, unsigned int mode);
+
+/**
+ * qht_destroy - destroy a previously initialized QHT
+ * @ht: QHT to be destroyed
+ *
+ * Call only when there are no readers/writers left.
+ */
+void qht_destroy(struct qht *ht);
+
+/**
+ * qht_insert - Insert a pointer into the hash table
+ * @ht: QHT to insert to
+ * @p: pointer to be inserted
+ * @hash: hash corresponding to @p
+ *
+ * Attempting to insert a NULL @p is a bug.
+ * Inserting the same pointer @p with different @hash values is a bug.
+ *
+ * Returns true on sucess.
+ * Returns false if the @p-@hash pair already exists in the hash table.
+ */
+bool qht_insert(struct qht *ht, void *p, uint32_t hash);
+
+/**
+ * qht_lookup - Look up a pointer in a QHT
+ * @ht: QHT to be looked up
+ * @func: function to compare existing pointers against @userp
+ * @userp: pointer to pass to @func
+ * @hash: hash of the pointer to be looked up
+ *
+ * Needs to be called under an RCU read-critical section.
+ *
+ * The user-provided @func compares pointers in QHT against @userp.
+ * If the function returns true, a match has been found.
+ *
+ * Returns the corresponding pointer when a match is found.
+ * Returns NULL otherwise.
+ */
+void *qht_lookup(struct qht *ht, qht_lookup_func_t func, const void *userp,
+                 uint32_t hash);
+
+/**
+ * qht_remove - remove a pointer from the hash table
+ * @ht: QHT to remove from
+ * @p: pointer to be removed
+ * @hash: hash corresponding to @p
+ *
+ * Attempting to remove a NULL @p is a bug.
+ *
+ * Just-removed @p pointers cannot be immediately freed; they need to remain
+ * valid until the end of the RCU grace period in which qht_remove() is called.
+ * This guarantees that concurrent lookups will always compare against valid
+ * data.
+ *
+ * Returns true on success.
+ * Returns false if the @p-@hash pair was not found.
+ */
+bool qht_remove(struct qht *ht, const void *p, uint32_t hash);
+
+/**
+ * qht_reset - reset a QHT
+ * @ht: QHT to be reset
+ *
+ * All entries in the hash table are reset. No resizing is performed.
+ *
+ * If concurrent readers may exist, the objects pointed to by the hash table
+ * must remain valid for the existing RCU grace period -- see qht_remove().
+ * See also: qht_reset_size()
+ */
+void qht_reset(struct qht *ht);
+
+/**
+ * qht_reset_size - reset and resize a QHT
+ * @ht: QHT to be reset and resized
+ * @n_elems: number of entries the resized hash table should be optimized for.
+ *
+ * Returns true if the resize was necessary and therefore performed.
+ * Returns false otherwise.
+ *
+ * If concurrent readers may exist, the objects pointed to by the hash table
+ * must remain valid for the existing RCU grace period -- see qht_remove().
+ * See also: qht_reset(), qht_resize().
+ */
+bool qht_reset_size(struct qht *ht, size_t n_elems);
+
+/**
+ * qht_resize - resize a QHT
+ * @ht: QHT to be resized
+ * @n_elems: number of entries the resized hash table should be optimized for
+ *
+ * Returns true on success.
+ * Returns false if the resize was not necessary and therefore not performed.
+ * See also: qht_reset_size().
+ */
+bool qht_resize(struct qht *ht, size_t n_elems);
+
+/**
+ * qht_iter - Iterate over a QHT
+ * @ht: QHT to be iterated over
+ * @func: function to be called for each entry in QHT
+ * @userp: additional pointer to be passed to @func
+ *
+ * Each time it is called, user-provided @func is passed a pointer-hash pair,
+ * plus @userp.
+ */
+void qht_iter(struct qht *ht, qht_iter_func_t func, void *userp);
+
+/**
+ * qht_statistics_init - Gather statistics from a QHT
+ * @ht: QHT to gather statistics from
+ * @stats: pointer to a struct qht_stats to be filled in
+ *
+ * Does NOT need to be called under an RCU read-critical section,
+ * since it does not dereference any pointers stored in the hash table.
+ *
+ * When done with @stats, pass the struct to qht_statistics_destroy().
+ * Failing to do this will leak memory.
+ */
+void qht_statistics_init(struct qht *ht, struct qht_stats *stats);
+
+/**
+ * qht_statistics_destroy - Destroy a struct qht_stats
+ * @stats: stuct qht_stats to be destroyed
+ *
+ * See also: qht_statistics_init().
+ */
+void qht_statistics_destroy(struct qht_stats *stats);
+
+#endif /* QEMU_QHT_H */
diff --git a/include/qemu/seqlock.h b/include/qemu/seqlock.h
index 70b01fd60d..4dfc055353 100644
--- a/include/qemu/seqlock.h
+++ b/include/qemu/seqlock.h
@@ -19,37 +19,29 @@
 typedef struct QemuSeqLock QemuSeqLock;
 
 struct QemuSeqLock {
-    QemuMutex *mutex;
     unsigned sequence;
 };
 
-static inline void seqlock_init(QemuSeqLock *sl, QemuMutex *mutex)
+static inline void seqlock_init(QemuSeqLock *sl)
 {
-    sl->mutex = mutex;
     sl->sequence = 0;
 }
 
 /* Lock out other writers and update the count.  */
-static inline void seqlock_write_lock(QemuSeqLock *sl)
+static inline void seqlock_write_begin(QemuSeqLock *sl)
 {
-    if (sl->mutex) {
-        qemu_mutex_lock(sl->mutex);
-    }
     ++sl->sequence;
 
     /* Write sequence before updating other fields.  */
     smp_wmb();
 }
 
-static inline void seqlock_write_unlock(QemuSeqLock *sl)
+static inline void seqlock_write_end(QemuSeqLock *sl)
 {
     /* Write other fields before finalizing sequence.  */
     smp_wmb();
 
     ++sl->sequence;
-    if (sl->mutex) {
-        qemu_mutex_unlock(sl->mutex);
-    }
 }
 
 static inline unsigned seqlock_read_begin(QemuSeqLock *sl)
diff --git a/include/qemu/thread.h b/include/qemu/thread.h
index bdae6dfdbe..c5d71cf8fc 100644
--- a/include/qemu/thread.h
+++ b/include/qemu/thread.h
@@ -1,6 +1,8 @@
 #ifndef __QEMU_THREAD_H
 #define __QEMU_THREAD_H 1
 
+#include "qemu/processor.h"
+#include "qemu/atomic.h"
 
 typedef struct QemuMutex QemuMutex;
 typedef struct QemuCond QemuCond;
@@ -60,4 +62,37 @@ struct Notifier;
 void qemu_thread_atexit_add(struct Notifier *notifier);
 void qemu_thread_atexit_remove(struct Notifier *notifier);
 
+typedef struct QemuSpin {
+    int value;
+} QemuSpin;
+
+static inline void qemu_spin_init(QemuSpin *spin)
+{
+    __sync_lock_release(&spin->value);
+}
+
+static inline void qemu_spin_lock(QemuSpin *spin)
+{
+    while (unlikely(__sync_lock_test_and_set(&spin->value, true))) {
+        while (atomic_read(&spin->value)) {
+            cpu_relax();
+        }
+    }
+}
+
+static inline bool qemu_spin_trylock(QemuSpin *spin)
+{
+    return __sync_lock_test_and_set(&spin->value, true);
+}
+
+static inline bool qemu_spin_locked(QemuSpin *spin)
+{
+    return atomic_read(&spin->value);
+}
+
+static inline void qemu_spin_unlock(QemuSpin *spin)
+{
+    __sync_lock_release(&spin->value);
+}
+
 #endif
diff --git a/tests/.gitignore b/tests/.gitignore
index a06a8ba26f..840ea396ab 100644
--- a/tests/.gitignore
+++ b/tests/.gitignore
@@ -7,6 +7,7 @@ check-qnull
 check-qstring
 check-qom-interface
 check-qom-proplist
+qht-bench
 rcutorture
 test-aio
 test-base64
@@ -48,7 +49,10 @@ test-qapi-types.[ch]
 test-qapi-visit.[ch]
 test-qdev-global-props
 test-qemu-opts
+test-qdist
 test-qga
+test-qht
+test-qht-par
 test-qmp-commands
 test-qmp-commands.h
 test-qmp-event
diff --git a/tests/Makefile.include b/tests/Makefile.include
index a3e20e39ec..7d63d16c3f 100644
--- a/tests/Makefile.include
+++ b/tests/Makefile.include
@@ -70,6 +70,12 @@ check-unit-y += tests/rcutorture$(EXESUF)
 gcov-files-rcutorture-y = util/rcu.c
 check-unit-y += tests/test-rcu-list$(EXESUF)
 gcov-files-test-rcu-list-y = util/rcu.c
+check-unit-y += tests/test-qdist$(EXESUF)
+gcov-files-test-qdist-y = util/qdist.c
+check-unit-y += tests/test-qht$(EXESUF)
+gcov-files-test-qht-y = util/qht.c
+check-unit-y += tests/test-qht-par$(EXESUF)
+gcov-files-test-qht-par-y = util/qht.c
 check-unit-y += tests/test-bitops$(EXESUF)
 check-unit-$(CONFIG_HAS_GLIB_SUBPROCESS_TESTS) += tests/test-qdev-global-props$(EXESUF)
 check-unit-y += tests/check-qom-interface$(EXESUF)
@@ -394,7 +400,9 @@ test-obj-y = tests/check-qint.o tests/check-qstring.o tests/check-qdict.o \
 	tests/test-qmp-commands.o tests/test-visitor-serialization.o \
 	tests/test-x86-cpuid.o tests/test-mul64.o tests/test-int128.o \
 	tests/test-opts-visitor.o tests/test-qmp-event.o \
-	tests/rcutorture.o tests/test-rcu-list.o
+	tests/rcutorture.o tests/test-rcu-list.o \
+	tests/test-qdist.o \
+	tests/test-qht.o tests/qht-bench.o tests/test-qht-par.o
 
 $(test-obj-y): QEMU_INCLUDES += -Itests
 QEMU_CFLAGS += -I$(SRC_PATH)/tests
@@ -433,6 +441,10 @@ tests/test-cutils$(EXESUF): tests/test-cutils.o util/cutils.o
 tests/test-int128$(EXESUF): tests/test-int128.o
 tests/rcutorture$(EXESUF): tests/rcutorture.o $(test-util-obj-y)
 tests/test-rcu-list$(EXESUF): tests/test-rcu-list.o $(test-util-obj-y)
+tests/test-qdist$(EXESUF): tests/test-qdist.o $(test-util-obj-y)
+tests/test-qht$(EXESUF): tests/test-qht.o $(test-util-obj-y)
+tests/test-qht-par$(EXESUF): tests/test-qht-par.o tests/qht-bench$(EXESUF) $(test-util-obj-y)
+tests/qht-bench$(EXESUF): tests/qht-bench.o $(test-util-obj-y)
 
 tests/test-qdev-global-props$(EXESUF): tests/test-qdev-global-props.o \
 	hw/core/qdev.o hw/core/qdev-properties.o hw/core/hotplug.o\
diff --git a/tests/qht-bench.c b/tests/qht-bench.c
new file mode 100644
index 0000000000..ad8efbca95
--- /dev/null
+++ b/tests/qht-bench.c
@@ -0,0 +1,488 @@
+/*
+ * Copyright (C) 2016, Emilio G. Cota <cota@braap.org>
+ *
+ * License: GNU GPL, version 2 or later.
+ *   See the COPYING file in the top-level directory.
+ */
+#include "qemu/osdep.h"
+#include <glib.h>
+#include "qemu/processor.h"
+#include "qemu/atomic.h"
+#include "qemu/qht.h"
+#include "qemu/rcu.h"
+#include "exec/tb-hash-xx.h"
+
+struct thread_stats {
+    size_t rd;
+    size_t not_rd;
+    size_t in;
+    size_t not_in;
+    size_t rm;
+    size_t not_rm;
+    size_t rz;
+    size_t not_rz;
+};
+
+struct thread_info {
+    void (*func)(struct thread_info *);
+    struct thread_stats stats;
+    uint64_t r;
+    bool write_op; /* writes alternate between insertions and removals */
+    bool resize_down;
+} QEMU_ALIGNED(64); /* avoid false sharing among threads */
+
+static struct qht ht;
+static QemuThread *rw_threads;
+
+#define DEFAULT_RANGE (4096)
+#define DEFAULT_QHT_N_ELEMS DEFAULT_RANGE
+
+static unsigned int duration = 1;
+static unsigned int n_rw_threads = 1;
+static unsigned long lookup_range = DEFAULT_RANGE;
+static unsigned long update_range = DEFAULT_RANGE;
+static size_t init_range = DEFAULT_RANGE;
+static size_t init_size = DEFAULT_RANGE;
+static size_t n_ready_threads;
+static long populate_offset;
+static long *keys;
+
+static size_t resize_min;
+static size_t resize_max;
+static struct thread_info *rz_info;
+static unsigned long resize_delay = 1000;
+static double resize_rate; /* 0.0 to 1.0 */
+static unsigned int n_rz_threads = 1;
+static QemuThread *rz_threads;
+
+static double update_rate; /* 0.0 to 1.0 */
+static uint64_t update_threshold;
+static uint64_t resize_threshold;
+
+static size_t qht_n_elems = DEFAULT_QHT_N_ELEMS;
+static int qht_mode;
+
+static bool test_start;
+static bool test_stop;
+
+static struct thread_info *rw_info;
+
+static const char commands_string[] =
+    " -d = duration, in seconds\n"
+    " -n = number of threads\n"
+    "\n"
+    " -o = offset at which keys start\n"
+    "\n"
+    " -g = set -s,-k,-K,-l,-r to the same value\n"
+    " -s = initial size hint\n"
+    " -k = initial number of keys\n"
+    " -K = initial range of keys (will be rounded up to pow2)\n"
+    " -l = lookup range of keys (will be rounded up to pow2)\n"
+    " -r = update range of keys (will be rounded up to pow2)\n"
+    "\n"
+    " -u = update rate (0.0 to 100.0), 50/50 split of insertions/removals\n"
+    "\n"
+    " -R = enable auto-resize\n"
+    " -S = resize rate (0.0 to 100.0)\n"
+    " -D = delay (in us) between potential resizes\n"
+    " -N = number of resize threads";
+
+static void usage_complete(int argc, char *argv[])
+{
+    fprintf(stderr, "Usage: %s [options]\n", argv[0]);
+    fprintf(stderr, "options:\n%s\n", commands_string);
+    exit(-1);
+}
+
+static bool is_equal(const void *obj, const void *userp)
+{
+    const long *a = obj;
+    const long *b = userp;
+
+    return *a == *b;
+}
+
+static inline uint32_t h(unsigned long v)
+{
+    return tb_hash_func5(v, 0, 0);
+}
+
+/*
+ * From: https://en.wikipedia.org/wiki/Xorshift
+ * This is faster than rand_r(), and gives us a wider range (RAND_MAX is only
+ * guaranteed to be >= INT_MAX).
+ */
+static uint64_t xorshift64star(uint64_t x)
+{
+    x ^= x >> 12; /* a */
+    x ^= x << 25; /* b */
+    x ^= x >> 27; /* c */
+    return x * UINT64_C(2685821657736338717);
+}
+
+static void do_rz(struct thread_info *info)
+{
+    struct thread_stats *stats = &info->stats;
+
+    if (info->r < resize_threshold) {
+        size_t size = info->resize_down ? resize_min : resize_max;
+        bool resized;
+
+        resized = qht_resize(&ht, size);
+        info->resize_down = !info->resize_down;
+
+        if (resized) {
+            stats->rz++;
+        } else {
+            stats->not_rz++;
+        }
+    }
+    g_usleep(resize_delay);
+}
+
+static void do_rw(struct thread_info *info)
+{
+    struct thread_stats *stats = &info->stats;
+    uint32_t hash;
+    long *p;
+
+    if (info->r >= update_threshold) {
+        bool read;
+
+        p = &keys[info->r & (lookup_range - 1)];
+        hash = h(*p);
+        read = qht_lookup(&ht, is_equal, p, hash);
+        if (read) {
+            stats->rd++;
+        } else {
+            stats->not_rd++;
+        }
+    } else {
+        p = &keys[info->r & (update_range - 1)];
+        hash = h(*p);
+        if (info->write_op) {
+            bool written = false;
+
+            if (qht_lookup(&ht, is_equal, p, hash) == NULL) {
+                written = qht_insert(&ht, p, hash);
+            }
+            if (written) {
+                stats->in++;
+            } else {
+                stats->not_in++;
+            }
+        } else {
+            bool removed = false;
+
+            if (qht_lookup(&ht, is_equal, p, hash)) {
+                removed = qht_remove(&ht, p, hash);
+            }
+            if (removed) {
+                stats->rm++;
+            } else {
+                stats->not_rm++;
+            }
+        }
+        info->write_op = !info->write_op;
+    }
+}
+
+static void *thread_func(void *p)
+{
+    struct thread_info *info = p;
+
+    rcu_register_thread();
+
+    atomic_inc(&n_ready_threads);
+    while (!atomic_mb_read(&test_start)) {
+        cpu_relax();
+    }
+
+    rcu_read_lock();
+    while (!atomic_read(&test_stop)) {
+        info->r = xorshift64star(info->r);
+        info->func(info);
+    }
+    rcu_read_unlock();
+
+    rcu_unregister_thread();
+    return NULL;
+}
+
+/* sets everything except info->func */
+static void prepare_thread_info(struct thread_info *info, int i)
+{
+    /* seed for the RNG; each thread should have a different one */
+    info->r = (i + 1) ^ time(NULL);
+    /* the first update will be a write */
+    info->write_op = true;
+    /* the first resize will be down */
+    info->resize_down = true;
+
+    memset(&info->stats, 0, sizeof(info->stats));
+}
+
+static void
+th_create_n(QemuThread **threads, struct thread_info **infos, const char *name,
+            void (*func)(struct thread_info *), int offset, int n)
+{
+    struct thread_info *info;
+    QemuThread *th;
+    int i;
+
+    th = g_malloc(sizeof(*th) * n);
+    *threads = th;
+
+    info = qemu_memalign(64, sizeof(*info) * n);
+    *infos = info;
+
+    for (i = 0; i < n; i++) {
+        prepare_thread_info(&info[i], offset + i);
+        info[i].func = func;
+        qemu_thread_create(&th[i], name, thread_func, &info[i],
+                           QEMU_THREAD_JOINABLE);
+    }
+}
+
+static void create_threads(void)
+{
+    th_create_n(&rw_threads, &rw_info, "rw", do_rw, 0, n_rw_threads);
+    th_create_n(&rz_threads, &rz_info, "rz", do_rz, n_rw_threads, n_rz_threads);
+}
+
+static void pr_params(void)
+{
+    printf("Parameters:\n");
+    printf(" duration:          %d s\n", duration);
+    printf(" # of threads:      %u\n", n_rw_threads);
+    printf(" initial # of keys: %zu\n", init_size);
+    printf(" initial size hint: %zu\n", qht_n_elems);
+    printf(" auto-resize:       %s\n",
+           qht_mode & QHT_MODE_AUTO_RESIZE ? "on" : "off");
+    if (resize_rate) {
+        printf(" resize_rate:       %f%%\n", resize_rate * 100.0);
+        printf(" resize range:      %zu-%zu\n", resize_min, resize_max);
+        printf(" # resize threads   %u\n", n_rz_threads);
+    }
+    printf(" update rate:       %f%%\n", update_rate * 100.0);
+    printf(" offset:            %ld\n", populate_offset);
+    printf(" initial key range: %zu\n", init_range);
+    printf(" lookup range:      %lu\n", lookup_range);
+    printf(" update range:      %lu\n", update_range);
+}
+
+static void do_threshold(double rate, uint64_t *threshold)
+{
+    if (rate == 1.0) {
+        *threshold = UINT64_MAX;
+    } else {
+        *threshold = rate * UINT64_MAX;
+    }
+}
+
+static void htable_init(void)
+{
+    unsigned long n = MAX(init_range, update_range);
+    uint64_t r = time(NULL);
+    size_t retries = 0;
+    size_t i;
+
+    /* avoid allocating memory later by allocating all the keys now */
+    keys = g_malloc(sizeof(*keys) * n);
+    for (i = 0; i < n; i++) {
+        keys[i] = populate_offset + i;
+    }
+
+    /* some sanity checks */
+    g_assert_cmpuint(lookup_range, <=, n);
+
+    /* compute thresholds */
+    do_threshold(update_rate, &update_threshold);
+    do_threshold(resize_rate, &resize_threshold);
+
+    if (resize_rate) {
+        resize_min = n / 2;
+        resize_max = n;
+        assert(resize_min < resize_max);
+    } else {
+        n_rz_threads = 0;
+    }
+
+    /* initialize the hash table */
+    qht_init(&ht, qht_n_elems, qht_mode);
+    assert(init_size <= init_range);
+
+    pr_params();
+
+    fprintf(stderr, "Initialization: populating %zu items...", init_size);
+    for (i = 0; i < init_size; i++) {
+        for (;;) {
+            uint32_t hash;
+            long *p;
+
+            r = xorshift64star(r);
+            p = &keys[r & (init_range - 1)];
+            hash = h(*p);
+            if (qht_insert(&ht, p, hash)) {
+                break;
+            }
+            retries++;
+        }
+    }
+    fprintf(stderr, " populated after %zu retries\n", retries);
+}
+
+static void add_stats(struct thread_stats *s, struct thread_info *info, int n)
+{
+    int i;
+
+    for (i = 0; i < n; i++) {
+        struct thread_stats *stats = &info[i].stats;
+
+        s->rd += stats->rd;
+        s->not_rd += stats->not_rd;
+
+        s->in += stats->in;
+        s->not_in += stats->not_in;
+
+        s->rm += stats->rm;
+        s->not_rm += stats->not_rm;
+
+        s->rz += stats->rz;
+        s->not_rz += stats->not_rz;
+    }
+}
+
+static void pr_stats(void)
+{
+    struct thread_stats s = {};
+    double tx;
+
+    add_stats(&s, rw_info, n_rw_threads);
+    add_stats(&s, rz_info, n_rz_threads);
+
+    printf("Results:\n");
+
+    if (resize_rate) {
+        printf(" Resizes:           %zu (%.2f%% of %zu)\n",
+               s.rz, (double)s.rz / (s.rz + s.not_rz) * 100, s.rz + s.not_rz);
+    }
+
+    printf(" Read:              %.2f M (%.2f%% of %.2fM)\n",
+           (double)s.rd / 1e6,
+           (double)s.rd / (s.rd + s.not_rd) * 100,
+           (double)(s.rd + s.not_rd) / 1e6);
+    printf(" Inserted:          %.2f M (%.2f%% of %.2fM)\n",
+           (double)s.in / 1e6,
+           (double)s.in / (s.in + s.not_in) * 100,
+           (double)(s.in + s.not_in) / 1e6);
+    printf(" Removed:           %.2f M (%.2f%% of %.2fM)\n",
+           (double)s.rm / 1e6,
+           (double)s.rm / (s.rm + s.not_rm) * 100,
+           (double)(s.rm + s.not_rm) / 1e6);
+
+    tx = (s.rd + s.not_rd + s.in + s.not_in + s.rm + s.not_rm) / 1e6 / duration;
+    printf(" Throughput:        %.2f MT/s\n", tx);
+    printf(" Throughput/thread: %.2f MT/s/thread\n", tx / n_rw_threads);
+}
+
+static void run_test(void)
+{
+    unsigned int remaining;
+    int i;
+
+    while (atomic_read(&n_ready_threads) != n_rw_threads + n_rz_threads) {
+        cpu_relax();
+    }
+    atomic_mb_set(&test_start, true);
+    do {
+        remaining = sleep(duration);
+    } while (remaining);
+    atomic_mb_set(&test_stop, true);
+
+    for (i = 0; i < n_rw_threads; i++) {
+        qemu_thread_join(&rw_threads[i]);
+    }
+    for (i = 0; i < n_rz_threads; i++) {
+        qemu_thread_join(&rz_threads[i]);
+    }
+}
+
+static void parse_args(int argc, char *argv[])
+{
+    int c;
+
+    for (;;) {
+        c = getopt(argc, argv, "d:D:g:k:K:l:hn:N:o:r:Rs:S:u:");
+        if (c < 0) {
+            break;
+        }
+        switch (c) {
+        case 'd':
+            duration = atoi(optarg);
+            break;
+        case 'D':
+            resize_delay = atol(optarg);
+            break;
+        case 'g':
+            init_range = pow2ceil(atol(optarg));
+            lookup_range = pow2ceil(atol(optarg));
+            update_range = pow2ceil(atol(optarg));
+            qht_n_elems = atol(optarg);
+            init_size = atol(optarg);
+            break;
+        case 'h':
+            usage_complete(argc, argv);
+            exit(0);
+        case 'k':
+            init_size = atol(optarg);
+            break;
+        case 'K':
+            init_range = pow2ceil(atol(optarg));
+            break;
+        case 'l':
+            lookup_range = pow2ceil(atol(optarg));
+            break;
+        case 'n':
+            n_rw_threads = atoi(optarg);
+            break;
+        case 'N':
+            n_rz_threads = atoi(optarg);
+            break;
+        case 'o':
+            populate_offset = atol(optarg);
+            break;
+        case 'r':
+            update_range = pow2ceil(atol(optarg));
+            break;
+        case 'R':
+            qht_mode |= QHT_MODE_AUTO_RESIZE;
+            break;
+        case 's':
+            qht_n_elems = atol(optarg);
+            break;
+        case 'S':
+            resize_rate = atof(optarg) / 100.0;
+            if (resize_rate > 1.0) {
+                resize_rate = 1.0;
+            }
+            break;
+        case 'u':
+            update_rate = atof(optarg) / 100.0;
+            if (update_rate > 1.0) {
+                update_rate = 1.0;
+            }
+            break;
+        }
+    }
+}
+
+int main(int argc, char *argv[])
+{
+    parse_args(argc, argv);
+    htable_init();
+    create_threads();
+    run_test();
+    pr_stats();
+    return 0;
+}
diff --git a/tests/test-qdist.c b/tests/test-qdist.c
new file mode 100644
index 0000000000..a67f26057e
--- /dev/null
+++ b/tests/test-qdist.c
@@ -0,0 +1,384 @@
+/*
+ * Copyright (C) 2016, Emilio G. Cota <cota@braap.org>
+ *
+ * License: GNU GPL, version 2 or later.
+ *   See the COPYING file in the top-level directory.
+ */
+#include "qemu/osdep.h"
+#include <glib.h>
+#include "qemu/qdist.h"
+
+#include <math.h>
+
+struct entry_desc {
+    double x;
+    unsigned long count;
+
+    /* 0 prints a space, 1-8 prints from qdist_blocks[] */
+    int fill_code;
+};
+
+/* See: https://en.wikipedia.org/wiki/Block_Elements */
+static const gunichar qdist_blocks[] = {
+    0x2581,
+    0x2582,
+    0x2583,
+    0x2584,
+    0x2585,
+    0x2586,
+    0x2587,
+    0x2588
+};
+
+#define QDIST_NR_BLOCK_CODES ARRAY_SIZE(qdist_blocks)
+
+static char *pr_hist(const struct entry_desc *darr, size_t n)
+{
+    GString *s = g_string_new("");
+    size_t i;
+
+    for (i = 0; i < n; i++) {
+        int fill = darr[i].fill_code;
+
+        if (fill) {
+            assert(fill <= QDIST_NR_BLOCK_CODES);
+            g_string_append_unichar(s, qdist_blocks[fill - 1]);
+        } else {
+            g_string_append_c(s, ' ');
+        }
+    }
+    return g_string_free(s, FALSE);
+}
+
+static void
+histogram_check(const struct qdist *dist, const struct entry_desc *darr,
+                size_t n, size_t n_bins)
+{
+    char *pr = qdist_pr_plain(dist, n_bins);
+    char *str = pr_hist(darr, n);
+
+    g_assert_cmpstr(pr, ==, str);
+    g_free(pr);
+    g_free(str);
+}
+
+static void histogram_check_single_full(const struct qdist *dist, size_t n_bins)
+{
+    struct entry_desc desc = { .fill_code = 8 };
+
+    histogram_check(dist, &desc, 1, n_bins);
+}
+
+static void
+entries_check(const struct qdist *dist, const struct entry_desc *darr, size_t n)
+{
+    size_t i;
+
+    for (i = 0; i < n; i++) {
+        struct qdist_entry *e = &dist->entries[i];
+
+        g_assert_cmpuint(e->count, ==, darr[i].count);
+    }
+}
+
+static void
+entries_insert(struct qdist *dist, const struct entry_desc *darr, size_t n)
+{
+    size_t i;
+
+    for (i = 0; i < n; i++) {
+        qdist_add(dist, darr[i].x, darr[i].count);
+    }
+}
+
+static void do_test_bin(const struct entry_desc *a, size_t n_a,
+                        const struct entry_desc *b, size_t n_b)
+{
+    struct qdist qda;
+    struct qdist qdb;
+
+    qdist_init(&qda);
+
+    entries_insert(&qda, a, n_a);
+    qdist_inc(&qda, a[0].x);
+    qdist_add(&qda, a[0].x, -1);
+
+    g_assert_cmpuint(qdist_unique_entries(&qda), ==, n_a);
+    g_assert_cmpfloat(qdist_xmin(&qda), ==, a[0].x);
+    g_assert_cmpfloat(qdist_xmax(&qda), ==, a[n_a - 1].x);
+    histogram_check(&qda, a, n_a, 0);
+    histogram_check(&qda, a, n_a, n_a);
+
+    qdist_bin__internal(&qdb, &qda, n_b);
+    g_assert_cmpuint(qdb.n, ==, n_b);
+    entries_check(&qdb, b, n_b);
+    g_assert_cmpuint(qdist_sample_count(&qda), ==, qdist_sample_count(&qdb));
+    /*
+     * No histogram_check() for $qdb, since we'd rebin it and that is a bug.
+     * Instead, regenerate it from $qda.
+     */
+    histogram_check(&qda, b, n_b, n_b);
+
+    qdist_destroy(&qdb);
+    qdist_destroy(&qda);
+}
+
+static void do_test_pr(uint32_t opt)
+{
+    static const struct entry_desc desc[] = {
+        [0] = { 1, 900, 8 },
+        [1] = { 2, 1, 1 },
+        [2] = { 3, 2, 1 }
+    };
+    static const char border[] = "|";
+    const char *llabel = NULL;
+    const char *rlabel = NULL;
+    struct qdist dist;
+    GString *s;
+    char *str;
+    char *pr;
+    size_t n;
+
+    n = ARRAY_SIZE(desc);
+    qdist_init(&dist);
+
+    entries_insert(&dist, desc, n);
+    histogram_check(&dist, desc, n, 0);
+
+    s = g_string_new("");
+
+    if (opt & QDIST_PR_LABELS) {
+        unsigned int lopts = opt & (QDIST_PR_NODECIMAL |
+                                    QDIST_PR_PERCENT |
+                                    QDIST_PR_100X |
+                                    QDIST_PR_NOBINRANGE);
+
+        if (lopts == 0) {
+            llabel = "[1.0,1.7)";
+            rlabel = "[2.3,3.0]";
+        } else if (lopts == QDIST_PR_NODECIMAL) {
+            llabel = "[1,2)";
+            rlabel = "[2,3]";
+        } else if (lopts == (QDIST_PR_PERCENT | QDIST_PR_NODECIMAL)) {
+            llabel = "[1,2)%";
+            rlabel = "[2,3]%";
+        } else if (lopts == QDIST_PR_100X) {
+            llabel = "[100.0,166.7)";
+            rlabel = "[233.3,300.0]";
+        } else if (lopts == (QDIST_PR_NOBINRANGE | QDIST_PR_NODECIMAL)) {
+            llabel = "1";
+            rlabel = "3";
+        } else {
+            g_assert_cmpstr("BUG", ==, "This is not meant to be exhaustive");
+        }
+    }
+
+    if (llabel) {
+        g_string_append(s, llabel);
+    }
+    if (opt & QDIST_PR_BORDER) {
+        g_string_append(s, border);
+    }
+
+    str = pr_hist(desc, n);
+    g_string_append(s, str);
+    g_free(str);
+
+    if (opt & QDIST_PR_BORDER) {
+        g_string_append(s, border);
+    }
+    if (rlabel) {
+        g_string_append(s, rlabel);
+    }
+
+    str = g_string_free(s, FALSE);
+    pr = qdist_pr(&dist, n, opt);
+    g_assert_cmpstr(pr, ==, str);
+    g_free(pr);
+    g_free(str);
+
+    qdist_destroy(&dist);
+}
+
+static inline void do_test_pr_label(uint32_t opt)
+{
+    opt |= QDIST_PR_LABELS;
+    do_test_pr(opt);
+}
+
+static void test_pr(void)
+{
+    do_test_pr(0);
+
+    do_test_pr(QDIST_PR_BORDER);
+
+    /* 100X should be ignored because we're not setting LABELS */
+    do_test_pr(QDIST_PR_100X);
+
+    do_test_pr_label(0);
+    do_test_pr_label(QDIST_PR_NODECIMAL);
+    do_test_pr_label(QDIST_PR_PERCENT | QDIST_PR_NODECIMAL);
+    do_test_pr_label(QDIST_PR_100X);
+    do_test_pr_label(QDIST_PR_NOBINRANGE | QDIST_PR_NODECIMAL);
+}
+
+static void test_bin_shrink(void)
+{
+    static const struct entry_desc a[] = {
+        [0] = { 0.0,   42922, 7 },
+        [1] = { 0.25,  47834, 8 },
+        [2] = { 0.50,  26628, 0 },
+        [3] = { 0.625, 597,   4 },
+        [4] = { 0.75,  10298, 1 },
+        [5] = { 0.875, 22,    2 },
+        [6] = { 1.0,   2771,  1 }
+    };
+    static const struct entry_desc b[] = {
+        [0] = { 0.0, 42922, 7 },
+        [1] = { 0.25, 47834, 8 },
+        [2] = { 0.50, 27225, 3 },
+        [3] = { 0.75, 13091, 1 }
+    };
+
+    return do_test_bin(a, ARRAY_SIZE(a), b, ARRAY_SIZE(b));
+}
+
+static void test_bin_expand(void)
+{
+    static const struct entry_desc a[] = {
+        [0] = { 0.0,   11713, 5 },
+        [1] = { 0.25,  20294, 0 },
+        [2] = { 0.50,  17266, 8 },
+        [3] = { 0.625, 1506,  0 },
+        [4] = { 0.75,  10355, 6 },
+        [5] = { 0.833, 2,     1 },
+        [6] = { 0.875, 99,    4 },
+        [7] = { 1.0,   4301,  2 }
+    };
+    static const struct entry_desc b[] = {
+        [0] = { 0.0, 11713, 5 },
+        [1] = { 0.0, 0,     0 },
+        [2] = { 0.0, 20294, 8 },
+        [3] = { 0.0, 0,     0 },
+        [4] = { 0.0, 0,     0 },
+        [5] = { 0.0, 17266, 6 },
+        [6] = { 0.0, 1506,  1 },
+        [7] = { 0.0, 10355, 4 },
+        [8] = { 0.0, 101,   1 },
+        [9] = { 0.0, 4301,  2 }
+    };
+
+    return do_test_bin(a, ARRAY_SIZE(a), b, ARRAY_SIZE(b));
+}
+
+static void test_bin_precision(void)
+{
+    static const struct entry_desc a[] = {
+        [0] = { 0, 213549, 8 },
+        [1] = { 1, 70, 1 },
+    };
+    static const struct entry_desc b[] = {
+        [0] = { 0, 213549, 8 },
+        [1] = { 0, 70, 1 },
+    };
+
+    return do_test_bin(a, ARRAY_SIZE(a), b, ARRAY_SIZE(b));
+}
+
+static void test_bin_simple(void)
+{
+    static const struct entry_desc a[] = {
+        [0] = { 10, 101, 8 },
+        [1] = { 11, 0, 0 },
+        [2] = { 12, 2, 1 }
+    };
+    static const struct entry_desc b[] = {
+        [0] = { 0, 101, 8 },
+        [1] = { 0, 0, 0 },
+        [2] = { 0, 0, 0 },
+        [3] = { 0, 0, 0 },
+        [4] = { 0, 2, 1 }
+    };
+
+    return do_test_bin(a, ARRAY_SIZE(a), b, ARRAY_SIZE(b));
+}
+
+static void test_single_full(void)
+{
+    struct qdist dist;
+
+    qdist_init(&dist);
+
+    qdist_add(&dist, 3, 102);
+    g_assert_cmpfloat(qdist_avg(&dist), ==, 3);
+    g_assert_cmpfloat(qdist_xmin(&dist), ==, 3);
+    g_assert_cmpfloat(qdist_xmax(&dist), ==, 3);
+
+    histogram_check_single_full(&dist, 0);
+    histogram_check_single_full(&dist, 1);
+    histogram_check_single_full(&dist, 10);
+
+    qdist_destroy(&dist);
+}
+
+static void test_single_empty(void)
+{
+    struct qdist dist;
+    char *pr;
+
+    qdist_init(&dist);
+
+    qdist_add(&dist, 3, 0);
+    g_assert_cmpuint(qdist_sample_count(&dist), ==, 0);
+    g_assert(isnan(qdist_avg(&dist)));
+    g_assert_cmpfloat(qdist_xmin(&dist), ==, 3);
+    g_assert_cmpfloat(qdist_xmax(&dist), ==, 3);
+
+    pr = qdist_pr_plain(&dist, 0);
+    g_assert_cmpstr(pr, ==, " ");
+    g_free(pr);
+
+    pr = qdist_pr_plain(&dist, 1);
+    g_assert_cmpstr(pr, ==, " ");
+    g_free(pr);
+
+    pr = qdist_pr_plain(&dist, 2);
+    g_assert_cmpstr(pr, ==, " ");
+    g_free(pr);
+
+    qdist_destroy(&dist);
+}
+
+static void test_none(void)
+{
+    struct qdist dist;
+    char *pr;
+
+    qdist_init(&dist);
+
+    g_assert(isnan(qdist_avg(&dist)));
+    g_assert(isnan(qdist_xmin(&dist)));
+    g_assert(isnan(qdist_xmax(&dist)));
+
+    pr = qdist_pr_plain(&dist, 0);
+    g_assert(pr == NULL);
+
+    pr = qdist_pr_plain(&dist, 2);
+    g_assert(pr == NULL);
+
+    qdist_destroy(&dist);
+}
+
+int main(int argc, char *argv[])
+{
+    g_test_init(&argc, &argv, NULL);
+    g_test_add_func("/qdist/none", test_none);
+    g_test_add_func("/qdist/single/empty", test_single_empty);
+    g_test_add_func("/qdist/single/full", test_single_full);
+    g_test_add_func("/qdist/binning/simple", test_bin_simple);
+    g_test_add_func("/qdist/binning/precision", test_bin_precision);
+    g_test_add_func("/qdist/binning/expand", test_bin_expand);
+    g_test_add_func("/qdist/binning/shrink", test_bin_shrink);
+    g_test_add_func("/qdist/pr", test_pr);
+    return g_test_run();
+}
diff --git a/tests/test-qht-par.c b/tests/test-qht-par.c
new file mode 100644
index 0000000000..f09e004ec6
--- /dev/null
+++ b/tests/test-qht-par.c
@@ -0,0 +1,56 @@
+/*
+ * Copyright (C) 2016, Emilio G. Cota <cota@braap.org>
+ *
+ * License: GNU GPL, version 2 or later.
+ *   See the COPYING file in the top-level directory.
+ */
+#include "qemu/osdep.h"
+#include <glib.h>
+
+#define TEST_QHT_STRING "tests/qht-bench 1>/dev/null 2>&1 -R -S0.1 -D10000 -N1 "
+
+static void test_qht(int n_threads, int update_rate, int duration)
+{
+    char *str;
+    int rc;
+
+    str = g_strdup_printf(TEST_QHT_STRING "-n %d -u %d -d %d",
+                          n_threads, update_rate, duration);
+    rc = system(str);
+    g_free(str);
+    g_assert_cmpint(rc, ==, 0);
+}
+
+static void test_2th0u1s(void)
+{
+    test_qht(2, 0, 1);
+}
+
+static void test_2th20u1s(void)
+{
+    test_qht(2, 20, 1);
+}
+
+static void test_2th0u5s(void)
+{
+    test_qht(2, 0, 5);
+}
+
+static void test_2th20u5s(void)
+{
+    test_qht(2, 20, 5);
+}
+
+int main(int argc, char *argv[])
+{
+    g_test_init(&argc, &argv, NULL);
+
+    if (g_test_quick()) {
+        g_test_add_func("/qht/parallel/2threads-0%updates-1s", test_2th0u1s);
+        g_test_add_func("/qht/parallel/2threads-20%updates-1s", test_2th20u1s);
+    } else {
+        g_test_add_func("/qht/parallel/2threads-0%updates-5s", test_2th0u5s);
+        g_test_add_func("/qht/parallel/2threads-20%updates-5s", test_2th20u5s);
+    }
+    return g_test_run();
+}
diff --git a/tests/test-qht.c b/tests/test-qht.c
new file mode 100644
index 0000000000..c8eb9305ed
--- /dev/null
+++ b/tests/test-qht.c
@@ -0,0 +1,159 @@
+/*
+ * Copyright (C) 2016, Emilio G. Cota <cota@braap.org>
+ *
+ * License: GNU GPL, version 2 or later.
+ *   See the COPYING file in the top-level directory.
+ */
+#include "qemu/osdep.h"
+#include <glib.h>
+#include "qemu/qht.h"
+
+#define N 5000
+
+static struct qht ht;
+static int32_t arr[N * 2];
+
+static bool is_equal(const void *obj, const void *userp)
+{
+    const int32_t *a = obj;
+    const int32_t *b = userp;
+
+    return *a == *b;
+}
+
+static void insert(int a, int b)
+{
+    int i;
+
+    for (i = a; i < b; i++) {
+        uint32_t hash;
+
+        arr[i] = i;
+        hash = i;
+
+        qht_insert(&ht, &arr[i], hash);
+    }
+}
+
+static void rm(int init, int end)
+{
+    int i;
+
+    for (i = init; i < end; i++) {
+        uint32_t hash;
+
+        hash = arr[i];
+        g_assert_true(qht_remove(&ht, &arr[i], hash));
+    }
+}
+
+static void check(int a, int b, bool expected)
+{
+    struct qht_stats stats;
+    int i;
+
+    for (i = a; i < b; i++) {
+        void *p;
+        uint32_t hash;
+        int32_t val;
+
+        val = i;
+        hash = i;
+        p = qht_lookup(&ht, is_equal, &val, hash);
+        g_assert_true(!!p == expected);
+    }
+    qht_statistics_init(&ht, &stats);
+    if (stats.used_head_buckets) {
+        g_assert_cmpfloat(qdist_avg(&stats.chain), >=, 1.0);
+    }
+    g_assert_cmpuint(stats.head_buckets, >, 0);
+    qht_statistics_destroy(&stats);
+}
+
+static void count_func(struct qht *ht, void *p, uint32_t hash, void *userp)
+{
+    unsigned int *curr = userp;
+
+    (*curr)++;
+}
+
+static void check_n(size_t expected)
+{
+    struct qht_stats stats;
+
+    qht_statistics_init(&ht, &stats);
+    g_assert_cmpuint(stats.entries, ==, expected);
+    qht_statistics_destroy(&stats);
+}
+
+static void iter_check(unsigned int count)
+{
+    unsigned int curr = 0;
+
+    qht_iter(&ht, count_func, &curr);
+    g_assert_cmpuint(curr, ==, count);
+}
+
+static void qht_do_test(unsigned int mode, size_t init_entries)
+{
+    qht_init(&ht, 0, mode);
+
+    insert(0, N);
+    check(0, N, true);
+    check_n(N);
+    check(-N, -1, false);
+    iter_check(N);
+
+    rm(101, 102);
+    check_n(N - 1);
+    insert(N, N * 2);
+    check_n(N + N - 1);
+    rm(N, N * 2);
+    check_n(N - 1);
+    insert(101, 102);
+    check_n(N);
+
+    rm(10, 200);
+    check_n(N - 190);
+    insert(150, 200);
+    check_n(N - 190 + 50);
+    insert(10, 150);
+    check_n(N);
+
+    rm(1, 2);
+    check_n(N - 1);
+    qht_reset_size(&ht, 0);
+    check_n(0);
+    check(0, N, false);
+
+    qht_destroy(&ht);
+}
+
+static void qht_test(unsigned int mode)
+{
+    qht_do_test(mode, 0);
+    qht_do_test(mode, 1);
+    qht_do_test(mode, 2);
+    qht_do_test(mode, 8);
+    qht_do_test(mode, 16);
+    qht_do_test(mode, 8192);
+    qht_do_test(mode, 16384);
+}
+
+static void test_default(void)
+{
+    qht_test(0);
+}
+
+static void test_resize(void)
+{
+    qht_test(QHT_MODE_AUTO_RESIZE);
+}
+
+int main(int argc, char *argv[])
+{
+    g_test_init(&argc, &argv, NULL);
+    g_test_add_func("/qht/mode/default", test_default);
+    g_test_add_func("/qht/mode/resize", test_resize);
+    return g_test_run();
+}
diff --git a/translate-all.c b/translate-all.c
index 118e7d3c84..e8b88b4485 100644
--- a/translate-all.c
+++ b/translate-all.c
@@ -735,6 +735,13 @@ static inline void code_gen_alloc(size_t tb_size)
     qemu_mutex_init(&tcg_ctx.tb_ctx.tb_lock);
 }
 
+static void tb_htable_init(void)
+{
+    unsigned int mode = QHT_MODE_AUTO_RESIZE;
+
+    qht_init(&tcg_ctx.tb_ctx.htable, CODE_GEN_HTABLE_SIZE, mode);
+}
+
 /* Must be called before using the QEMU cpus. 'tb_size' is the size
    (in bytes) allocated to the translation buffer. Zero means default
    size. */
@@ -742,6 +749,7 @@ void tcg_exec_init(unsigned long tb_size)
 {
     cpu_gen_init();
     page_init();
+    tb_htable_init();
     code_gen_alloc(tb_size);
 #if defined(CONFIG_SOFTMMU)
     /* There's no guest base to take into account, so go ahead and
@@ -846,7 +854,7 @@ void tb_flush(CPUState *cpu)
         cpu->tb_flushed = true;
     }
 
-    memset(tcg_ctx.tb_ctx.tb_phys_hash, 0, sizeof(tcg_ctx.tb_ctx.tb_phys_hash));
+    qht_reset_size(&tcg_ctx.tb_ctx.htable, CODE_GEN_HTABLE_SIZE);
     page_flush_tb();
 
     tcg_ctx.code_gen_ptr = tcg_ctx.code_gen_buffer;
@@ -857,60 +865,46 @@ void tb_flush(CPUState *cpu)
 
 #ifdef DEBUG_TB_CHECK
 
-static void tb_invalidate_check(target_ulong address)
+static void
+do_tb_invalidate_check(struct qht *ht, void *p, uint32_t hash, void *userp)
 {
-    TranslationBlock *tb;
-    int i;
+    TranslationBlock *tb = p;
+    target_ulong addr = *(target_ulong *)userp;
 
-    address &= TARGET_PAGE_MASK;
-    for (i = 0; i < CODE_GEN_PHYS_HASH_SIZE; i++) {
-        for (tb = tcg_ctx.tb_ctx.tb_phys_hash[i]; tb != NULL;
-             tb = tb->phys_hash_next) {
-            if (!(address + TARGET_PAGE_SIZE <= tb->pc ||
-                  address >= tb->pc + tb->size)) {
-                printf("ERROR invalidate: address=" TARGET_FMT_lx
-                       " PC=%08lx size=%04x\n",
-                       address, (long)tb->pc, tb->size);
-            }
-        }
+    if (!(addr + TARGET_PAGE_SIZE <= tb->pc || addr >= tb->pc + tb->size)) {
+        printf("ERROR invalidate: address=" TARGET_FMT_lx
+               " PC=%08lx size=%04x\n", addr, (long)tb->pc, tb->size);
     }
 }
 
-/* verify that all the pages have correct rights for code */
-static void tb_page_check(void)
+static void tb_invalidate_check(target_ulong address)
 {
-    TranslationBlock *tb;
-    int i, flags1, flags2;
-
-    for (i = 0; i < CODE_GEN_PHYS_HASH_SIZE; i++) {
-        for (tb = tcg_ctx.tb_ctx.tb_phys_hash[i]; tb != NULL;
-                tb = tb->phys_hash_next) {
-            flags1 = page_get_flags(tb->pc);
-            flags2 = page_get_flags(tb->pc + tb->size - 1);
-            if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) {
-                printf("ERROR page flags: PC=%08lx size=%04x f1=%x f2=%x\n",
-                       (long)tb->pc, tb->size, flags1, flags2);
-            }
-        }
-    }
+    address &= TARGET_PAGE_MASK;
+    qht_iter(&tcg_ctx.tb_ctx.htable, do_tb_invalidate_check, &address);
 }
 
-#endif
-
-static inline void tb_hash_remove(TranslationBlock **ptb, TranslationBlock *tb)
+static void
+do_tb_page_check(struct qht *ht, void *p, uint32_t hash, void *userp)
 {
-    TranslationBlock *tb1;
+    TranslationBlock *tb = p;
+    int flags1, flags2;
 
-    for (;;) {
-        tb1 = *ptb;
-        if (tb1 == tb) {
-            *ptb = tb1->phys_hash_next;
-            break;
-        }
-        ptb = &tb1->phys_hash_next;
+    flags1 = page_get_flags(tb->pc);
+    flags2 = page_get_flags(tb->pc + tb->size - 1);
+    if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) {
+        printf("ERROR page flags: PC=%08lx size=%04x f1=%x f2=%x\n",
+               (long)tb->pc, tb->size, flags1, flags2);
     }
 }
 
+/* verify that all the pages have correct rights for code */
+static void tb_page_check(void)
+{
+    qht_iter(&tcg_ctx.tb_ctx.htable, do_tb_page_check, NULL);
+}
+
+#endif
+
 static inline void tb_page_remove(TranslationBlock **ptb, TranslationBlock *tb)
 {
     TranslationBlock *tb1;
@@ -992,13 +986,13 @@ void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
 {
     CPUState *cpu;
     PageDesc *p;
-    unsigned int h;
+    uint32_t h;
     tb_page_addr_t phys_pc;
 
     /* remove the TB from the hash list */
     phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
-    h = tb_phys_hash_func(phys_pc);
-    tb_hash_remove(&tcg_ctx.tb_ctx.tb_phys_hash[h], tb);
+    h = tb_hash_func(phys_pc, tb->pc, tb->flags);
+    qht_remove(&tcg_ctx.tb_ctx.htable, tb, h);
 
     /* remove the TB from the page list */
     if (tb->page_addr[0] != page_addr) {
@@ -1127,14 +1121,11 @@ static inline void tb_alloc_page(TranslationBlock *tb,
 static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
                          tb_page_addr_t phys_page2)
 {
-    unsigned int h;
-    TranslationBlock **ptb;
+    uint32_t h;
 
-    /* add in the physical hash table */
-    h = tb_phys_hash_func(phys_pc);
-    ptb = &tcg_ctx.tb_ctx.tb_phys_hash[h];
-    tb->phys_hash_next = *ptb;
-    *ptb = tb;
+    /* add in the hash table */
+    h = tb_hash_func(phys_pc, tb->pc, tb->flags);
+    qht_insert(&tcg_ctx.tb_ctx.htable, tb, h);
 
     /* add in the page list */
     tb_alloc_page(tb, 0, phys_pc & TARGET_PAGE_MASK);
@@ -1677,6 +1668,10 @@ void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
     int i, target_code_size, max_target_code_size;
     int direct_jmp_count, direct_jmp2_count, cross_page;
     TranslationBlock *tb;
+    struct qht_stats hst;
+    uint32_t hgram_opts;
+    size_t hgram_bins;
+    char *hgram;
 
     target_code_size = 0;
     max_target_code_size = 0;
@@ -1727,6 +1722,38 @@ void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
                 direct_jmp2_count,
                 tcg_ctx.tb_ctx.nb_tbs ? (direct_jmp2_count * 100) /
                         tcg_ctx.tb_ctx.nb_tbs : 0);
+
+    qht_statistics_init(&tcg_ctx.tb_ctx.htable, &hst);
+
+    cpu_fprintf(f, "TB hash buckets     %zu/%zu (%0.2f%% head buckets used)\n",
+                hst.used_head_buckets, hst.head_buckets,
+                (double)hst.used_head_buckets / hst.head_buckets * 100);
+
+    hgram_opts =  QDIST_PR_BORDER | QDIST_PR_LABELS;
+    hgram_opts |= QDIST_PR_100X   | QDIST_PR_PERCENT;
+    if (qdist_xmax(&hst.occupancy) - qdist_xmin(&hst.occupancy) == 1) {
+        hgram_opts |= QDIST_PR_NODECIMAL;
+    }
+    hgram = qdist_pr(&hst.occupancy, 10, hgram_opts);
+    cpu_fprintf(f, "TB hash occupancy   %0.2f%% avg chain occ. Histogram: %s\n",
+                qdist_avg(&hst.occupancy) * 100, hgram);
+    g_free(hgram);
+
+    hgram_opts = QDIST_PR_BORDER | QDIST_PR_LABELS;
+    hgram_bins = qdist_xmax(&hst.chain) - qdist_xmin(&hst.chain);
+    if (hgram_bins > 10) {
+        hgram_bins = 10;
+    } else {
+        hgram_bins = 0;
+        hgram_opts |= QDIST_PR_NODECIMAL | QDIST_PR_NOBINRANGE;
+    }
+    hgram = qdist_pr(&hst.chain, hgram_bins, hgram_opts);
+    cpu_fprintf(f, "TB hash avg chain   %0.3f buckets. Histogram: %s\n",
+                qdist_avg(&hst.chain), hgram);
+    g_free(hgram);
+
+    qht_statistics_destroy(&hst);
+
     cpu_fprintf(f, "\nStatistics:\n");
     cpu_fprintf(f, "TB flush count      %d\n", tcg_ctx.tb_ctx.tb_flush_count);
     cpu_fprintf(f, "TB invalidate count %d\n",
diff --git a/util/Makefile.objs b/util/Makefile.objs
index a8a777ec40..45f8794864 100644
--- a/util/Makefile.objs
+++ b/util/Makefile.objs
@@ -32,3 +32,5 @@ util-obj-y += buffer.o
 util-obj-y += timed-average.o
 util-obj-y += base64.o
 util-obj-y += log.o
+util-obj-y += qdist.o
+util-obj-y += qht.o
diff --git a/util/qdist.c b/util/qdist.c
new file mode 100644
index 0000000000..4ea2e34fc2
--- /dev/null
+++ b/util/qdist.c
@@ -0,0 +1,395 @@
+/*
+ * qdist.c - QEMU helpers for handling frequency distributions of data.
+ *
+ * Copyright (C) 2016, Emilio G. Cota <cota@braap.org>
+ *
+ * License: GNU GPL, version 2 or later.
+ *   See the COPYING file in the top-level directory.
+ */
+#include "qemu/qdist.h"
+
+#include <math.h>
+#ifndef NAN
+#define NAN (0.0 / 0.0)
+#endif
+
+void qdist_init(struct qdist *dist)
+{
+    dist->entries = g_malloc(sizeof(*dist->entries));
+    dist->size = 1;
+    dist->n = 0;
+}
+
+void qdist_destroy(struct qdist *dist)
+{
+    g_free(dist->entries);
+}
+
+static inline int qdist_cmp_double(double a, double b)
+{
+    if (a > b) {
+        return 1;
+    } else if (a < b) {
+        return -1;
+    }
+    return 0;
+}
+
+static int qdist_cmp(const void *ap, const void *bp)
+{
+    const struct qdist_entry *a = ap;
+    const struct qdist_entry *b = bp;
+
+    return qdist_cmp_double(a->x, b->x);
+}
+
+void qdist_add(struct qdist *dist, double x, long count)
+{
+    struct qdist_entry *entry = NULL;
+
+    if (dist->n) {
+        struct qdist_entry e;
+
+        e.x = x;
+        entry = bsearch(&e, dist->entries, dist->n, sizeof(e), qdist_cmp);
+    }
+
+    if (entry) {
+        entry->count += count;
+        return;
+    }
+
+    if (unlikely(dist->n == dist->size)) {
+        dist->size *= 2;
+        dist->entries = g_realloc(dist->entries,
+                                  sizeof(*dist->entries) * (dist->size));
+    }
+    dist->n++;
+    entry = &dist->entries[dist->n - 1];
+    entry->x = x;
+    entry->count = count;
+    qsort(dist->entries, dist->n, sizeof(*entry), qdist_cmp);
+}
+
+void qdist_inc(struct qdist *dist, double x)
+{
+    qdist_add(dist, x, 1);
+}
+
+/*
+ * Unicode for block elements. See:
+ *   https://en.wikipedia.org/wiki/Block_Elements
+ */
+static const gunichar qdist_blocks[] = {
+    0x2581,
+    0x2582,
+    0x2583,
+    0x2584,
+    0x2585,
+    0x2586,
+    0x2587,
+    0x2588
+};
+
+#define QDIST_NR_BLOCK_CODES ARRAY_SIZE(qdist_blocks)
+
+/*
+ * Print a distribution into a string.
+ *
+ * This function assumes that appropriate binning has been done on the input;
+ * see qdist_bin__internal() and qdist_pr_plain().
+ *
+ * Callers must free the returned string with g_free().
+ */
+static char *qdist_pr_internal(const struct qdist *dist)
+{
+    double min, max;
+    GString *s = g_string_new("");
+    size_t i;
+
+    /* if only one entry, its printout will be either full or empty */
+    if (dist->n == 1) {
+        if (dist->entries[0].count) {
+            g_string_append_unichar(s, qdist_blocks[QDIST_NR_BLOCK_CODES - 1]);
+        } else {
+            g_string_append_c(s, ' ');
+        }
+        goto out;
+    }
+
+    /* get min and max counts */
+    min = dist->entries[0].count;
+    max = min;
+    for (i = 0; i < dist->n; i++) {
+        struct qdist_entry *e = &dist->entries[i];
+
+        if (e->count < min) {
+            min = e->count;
+        }
+        if (e->count > max) {
+            max = e->count;
+        }
+    }
+
+    for (i = 0; i < dist->n; i++) {
+        struct qdist_entry *e = &dist->entries[i];
+        int index;
+
+        /* make an exception with 0; instead of using block[0], print a space */
+        if (e->count) {
+            /* divide first to avoid loss of precision when e->count == max */
+            index = (e->count - min) / (max - min) * (QDIST_NR_BLOCK_CODES - 1);
+            g_string_append_unichar(s, qdist_blocks[index]);
+        } else {
+            g_string_append_c(s, ' ');
+        }
+    }
+ out:
+    return g_string_free(s, FALSE);
+}
+
+/*
+ * Bin the distribution in @from into @n bins of consecutive, non-overlapping
+ * intervals, copying the result to @to.
+ *
+ * This function is internal to qdist: only this file and test code should
+ * ever call it.
+ *
+ * Note: calling this function on an already-binned qdist is a bug.
+ *
+ * If @n == 0 or @from->n == 1, use @from->n.
+ */
+void qdist_bin__internal(struct qdist *to, const struct qdist *from, size_t n)
+{
+    double xmin, xmax;
+    double step;
+    size_t i, j;
+
+    qdist_init(to);
+
+    if (from->n == 0) {
+        return;
+    }
+    if (n == 0 || from->n == 1) {
+        n = from->n;
+    }
+
+    /* set equally-sized bins between @from's left and right */
+    xmin = qdist_xmin(from);
+    xmax = qdist_xmax(from);
+    step = (xmax - xmin) / n;
+
+    if (n == from->n) {
+        /* if @from's entries are equally spaced, no need to re-bin */
+        for (i = 0; i < from->n; i++) {
+            if (from->entries[i].x != xmin + i * step) {
+                goto rebin;
+            }
+        }
+        /* they're equally spaced, so copy the dist and bail out */
+        to->entries = g_new(struct qdist_entry, from->n);
+        to->n = from->n;
+        memcpy(to->entries, from->entries, sizeof(*to->entries) * to->n);
+        return;
+    }
+
+ rebin:
+    j = 0;
+    for (i = 0; i < n; i++) {
+        double x;
+        double left, right;
+
+        left = xmin + i * step;
+        right = xmin + (i + 1) * step;
+
+        /* Add x, even if it might not get any counts later */
+        x = left;
+        qdist_add(to, x, 0);
+
+        /*
+         * To avoid double-counting we capture [left, right) ranges, except for
+         * the righmost bin, which captures a [left, right] range.
+         */
+        while (j < from->n && (from->entries[j].x < right || i == n - 1)) {
+            struct qdist_entry *o = &from->entries[j];
+
+            qdist_add(to, x, o->count);
+            j++;
+        }
+    }
+}
+
+/*
+ * Print @dist into a string, after re-binning it into @n bins of consecutive,
+ * non-overlapping intervals.
+ *
+ * If @n == 0, use @orig->n.
+ *
+ * Callers must free the returned string with g_free().
+ */
+char *qdist_pr_plain(const struct qdist *dist, size_t n)
+{
+    struct qdist binned;
+    char *ret;
+
+    if (dist->n == 0) {
+        return NULL;
+    }
+    qdist_bin__internal(&binned, dist, n);
+    ret = qdist_pr_internal(&binned);
+    qdist_destroy(&binned);
+    return ret;
+}
+
+static char *qdist_pr_label(const struct qdist *dist, size_t n_bins,
+                            uint32_t opt, bool is_left)
+{
+    const char *percent;
+    const char *lparen;
+    const char *rparen;
+    GString *s;
+    double x1, x2, step;
+    double x;
+    double n;
+    int dec;
+
+    s = g_string_new("");
+    if (!(opt & QDIST_PR_LABELS)) {
+        goto out;
+    }
+
+    dec = opt & QDIST_PR_NODECIMAL ? 0 : 1;
+    percent = opt & QDIST_PR_PERCENT ? "%" : "";
+
+    n = n_bins ? n_bins : dist->n;
+    x = is_left ? qdist_xmin(dist) : qdist_xmax(dist);
+    step = (qdist_xmax(dist) - qdist_xmin(dist)) / n;
+
+    if (opt & QDIST_PR_100X) {
+        x *= 100.0;
+        step *= 100.0;
+    }
+    if (opt & QDIST_PR_NOBINRANGE) {
+        lparen = rparen = "";
+        x1 = x;
+        x2 = x; /* unnecessary, but a dumb compiler might not figure it out */
+    } else {
+        lparen = "[";
+        rparen = is_left ? ")" : "]";
+        if (is_left) {
+            x1 = x;
+            x2 = x + step;
+        } else {
+            x1 = x - step;
+            x2 = x;
+        }
+    }
+    g_string_append_printf(s, "%s%.*f", lparen, dec, x1);
+    if (!(opt & QDIST_PR_NOBINRANGE)) {
+        g_string_append_printf(s, ",%.*f%s", dec, x2, rparen);
+    }
+    g_string_append(s, percent);
+ out:
+    return g_string_free(s, FALSE);
+}
+
+/*
+ * Print the distribution's histogram into a string.
+ *
+ * See also: qdist_pr_plain().
+ *
+ * Callers must free the returned string with g_free().
+ */
+char *qdist_pr(const struct qdist *dist, size_t n_bins, uint32_t opt)
+{
+    const char *border = opt & QDIST_PR_BORDER ? "|" : "";
+    char *llabel, *rlabel;
+    char *hgram;
+    GString *s;
+
+    if (dist->n == 0) {
+        return NULL;
+    }
+
+    s = g_string_new("");
+
+    llabel = qdist_pr_label(dist, n_bins, opt, true);
+    rlabel = qdist_pr_label(dist, n_bins, opt, false);
+    hgram = qdist_pr_plain(dist, n_bins);
+    g_string_append_printf(s, "%s%s%s%s%s",
+                           llabel, border, hgram, border, rlabel);
+    g_free(llabel);
+    g_free(rlabel);
+    g_free(hgram);
+
+    return g_string_free(s, FALSE);
+}
+
+static inline double qdist_x(const struct qdist *dist, int index)
+{
+    if (dist->n == 0) {
+        return NAN;
+    }
+    return dist->entries[index].x;
+}
+
+double qdist_xmin(const struct qdist *dist)
+{
+    return qdist_x(dist, 0);
+}
+
+double qdist_xmax(const struct qdist *dist)
+{
+    return qdist_x(dist, dist->n - 1);
+}
+
+size_t qdist_unique_entries(const struct qdist *dist)
+{
+    return dist->n;
+}
+
+unsigned long qdist_sample_count(const struct qdist *dist)
+{
+    unsigned long count = 0;
+    size_t i;
+
+    for (i = 0; i < dist->n; i++) {
+        struct qdist_entry *e = &dist->entries[i];
+
+        count += e->count;
+    }
+    return count;
+}
+
+static double qdist_pairwise_avg(const struct qdist *dist, size_t index,
+                                 size_t n, unsigned long count)
+{
+    /* amortize the recursion by using a base case > 2 */
+    if (n <= 8) {
+        size_t i;
+        double ret = 0;
+
+        for (i = 0; i < n; i++) {
+            struct qdist_entry *e = &dist->entries[index + i];
+
+            ret += e->x * e->count / count;
+        }
+        return ret;
+    } else {
+        size_t n2 = n / 2;
+
+        return qdist_pairwise_avg(dist, index, n2, count) +
+               qdist_pairwise_avg(dist, index + n2, n - n2, count);
+    }
+}
+
+double qdist_avg(const struct qdist *dist)
+{
+    unsigned long count;
+
+    count = qdist_sample_count(dist);
+    if (!count) {
+        return NAN;
+    }
+    return qdist_pairwise_avg(dist, 0, dist->n, count);
+}
diff --git a/util/qht.c b/util/qht.c
new file mode 100644
index 0000000000..6f749098f4
--- /dev/null
+++ b/util/qht.c
@@ -0,0 +1,833 @@
+/*
+ * qht.c - QEMU Hash Table, designed to scale for read-mostly workloads.
+ *
+ * Copyright (C) 2016, Emilio G. Cota <cota@braap.org>
+ *
+ * License: GNU GPL, version 2 or later.
+ *   See the COPYING file in the top-level directory.
+ *
+ * Assumptions:
+ * - NULL cannot be inserted/removed as a pointer value.
+ * - Trying to insert an already-existing hash-pointer pair is OK. However,
+ *   it is not OK to insert into the same hash table different hash-pointer
+ *   pairs that have the same pointer value, but not the hashes.
+ * - Lookups are performed under an RCU read-critical section; removals
+ *   must wait for a grace period to elapse before freeing removed objects.
+ *
+ * Features:
+ * - Reads (i.e. lookups and iterators) can be concurrent with other reads.
+ *   Lookups that are concurrent with writes to the same bucket will retry
+ *   via a seqlock; iterators acquire all bucket locks and therefore can be
+ *   concurrent with lookups and are serialized wrt writers.
+ * - Writes (i.e. insertions/removals) can be concurrent with writes to
+ *   different buckets; writes to the same bucket are serialized through a lock.
+ * - Optional auto-resizing: the hash table resizes up if the load surpasses
+ *   a certain threshold. Resizing is done concurrently with readers; writes
+ *   are serialized with the resize operation.
+ *
+ * The key structure is the bucket, which is cacheline-sized. Buckets
+ * contain a few hash values and pointers; the u32 hash values are stored in
+ * full so that resizing is fast. Having this structure instead of directly
+ * chaining items has two advantages:
+ * - Failed lookups fail fast, and touch a minimum number of cache lines.
+ * - Resizing the hash table with concurrent lookups is easy.
+ *
+ * There are two types of buckets:
+ * 1. "head" buckets are the ones allocated in the array of buckets in qht_map.
+ * 2. all "non-head" buckets (i.e. all others) are members of a chain that
+ *    starts from a head bucket.
+ * Note that the seqlock and spinlock of a head bucket applies to all buckets
+ * chained to it; these two fields are unused in non-head buckets.
+ *
+ * On removals, we move the last valid item in the chain to the position of the
+ * just-removed entry. This makes lookups slightly faster, since the moment an
+ * invalid entry is found, the (failed) lookup is over.
+ *
+ * Resizing is done by taking all bucket spinlocks (so that no other writers can
+ * race with us) and then copying all entries into a new hash map. Then, the
+ * ht->map pointer is set, and the old map is freed once no RCU readers can see
+ * it anymore.
+ *
+ * Writers check for concurrent resizes by comparing ht->map before and after
+ * acquiring their bucket lock. If they don't match, a resize has occured
+ * while the bucket spinlock was being acquired.
+ *
+ * Related Work:
+ * - Idea of cacheline-sized buckets with full hashes taken from:
+ *   David, Guerraoui & Trigonakis, "Asynchronized Concurrency:
+ *   The Secret to Scaling Concurrent Search Data Structures", ASPLOS'15.
+ * - Why not RCU-based hash tables? They would allow us to get rid of the
+ *   seqlock, but resizing would take forever since RCU read critical
+ *   sections in QEMU take quite a long time.
+ *   More info on relativistic hash tables:
+ *   + Triplett, McKenney & Walpole, "Resizable, Scalable, Concurrent Hash
+ *     Tables via Relativistic Programming", USENIX ATC'11.
+ *   + Corbet, "Relativistic hash tables, part 1: Algorithms", @ lwn.net, 2014.
+ *     https://lwn.net/Articles/612021/
+ */
+#include "qemu/qht.h"
+#include "qemu/atomic.h"
+#include "qemu/rcu.h"
+
+//#define QHT_DEBUG
+
+/*
+ * We want to avoid false sharing of cache lines. Most systems have 64-byte
+ * cache lines so we go with it for simplicity.
+ *
+ * Note that systems with smaller cache lines will be fine (the struct is
+ * almost 64-bytes); systems with larger cache lines might suffer from
+ * some false sharing.
+ */
+#define QHT_BUCKET_ALIGN 64
+
+/* define these to keep sizeof(qht_bucket) within QHT_BUCKET_ALIGN */
+#if HOST_LONG_BITS == 32
+#define QHT_BUCKET_ENTRIES 6
+#else /* 64-bit */
+#define QHT_BUCKET_ENTRIES 4
+#endif
+
+/*
+ * Note: reading partially-updated pointers in @pointers could lead to
+ * segfaults. We thus access them with atomic_read/set; this guarantees
+ * that the compiler makes all those accesses atomic. We also need the
+ * volatile-like behavior in atomic_read, since otherwise the compiler
+ * might refetch the pointer.
+ * atomic_read's are of course not necessary when the bucket lock is held.
+ *
+ * If both ht->lock and b->lock are grabbed, ht->lock should always
+ * be grabbed first.
+ */
+struct qht_bucket {
+    QemuSpin lock;
+    QemuSeqLock sequence;
+    uint32_t hashes[QHT_BUCKET_ENTRIES];
+    void *pointers[QHT_BUCKET_ENTRIES];
+    struct qht_bucket *next;
+} QEMU_ALIGNED(QHT_BUCKET_ALIGN);
+
+QEMU_BUILD_BUG_ON(sizeof(struct qht_bucket) > QHT_BUCKET_ALIGN);
+
+/**
+ * struct qht_map - structure to track an array of buckets
+ * @rcu: used by RCU. Keep it as the top field in the struct to help valgrind
+ *       find the whole struct.
+ * @buckets: array of head buckets. It is constant once the map is created.
+ * @n_buckets: number of head buckets. It is constant once the map is created.
+ * @n_added_buckets: number of added (i.e. "non-head") buckets
+ * @n_added_buckets_threshold: threshold to trigger an upward resize once the
+ *                             number of added buckets surpasses it.
+ *
+ * Buckets are tracked in what we call a "map", i.e. this structure.
+ */
+struct qht_map {
+    struct rcu_head rcu;
+    struct qht_bucket *buckets;
+    size_t n_buckets;
+    size_t n_added_buckets;
+    size_t n_added_buckets_threshold;
+};
+
+/* trigger a resize when n_added_buckets > n_buckets / div */
+#define QHT_NR_ADDED_BUCKETS_THRESHOLD_DIV 8
+
+static void qht_do_resize(struct qht *ht, struct qht_map *new);
+static void qht_grow_maybe(struct qht *ht);
+
+#ifdef QHT_DEBUG
+
+#define qht_debug_assert(X) do { assert(X); } while (0)
+
+static void qht_bucket_debug__locked(struct qht_bucket *b)
+{
+    bool seen_empty = false;
+    bool corrupt = false;
+    int i;
+
+    do {
+        for (i = 0; i < QHT_BUCKET_ENTRIES; i++) {
+            if (b->pointers[i] == NULL) {
+                seen_empty = true;
+                continue;
+            }
+            if (seen_empty) {
+                fprintf(stderr, "%s: b: %p, pos: %i, hash: 0x%x, p: %p\n",
+                        __func__, b, i, b->hashes[i], b->pointers[i]);
+                corrupt = true;
+            }
+        }
+        b = b->next;
+    } while (b);
+    qht_debug_assert(!corrupt);
+}
+
+static void qht_map_debug__all_locked(struct qht_map *map)
+{
+    int i;
+
+    for (i = 0; i < map->n_buckets; i++) {
+        qht_bucket_debug__locked(&map->buckets[i]);
+    }
+}
+#else
+
+#define qht_debug_assert(X) do { (void)(X); } while (0)
+
+static inline void qht_bucket_debug__locked(struct qht_bucket *b)
+{ }
+
+static inline void qht_map_debug__all_locked(struct qht_map *map)
+{ }
+#endif /* QHT_DEBUG */
+
+static inline size_t qht_elems_to_buckets(size_t n_elems)
+{
+    return pow2ceil(n_elems / QHT_BUCKET_ENTRIES);
+}
+
+static inline void qht_head_init(struct qht_bucket *b)
+{
+    memset(b, 0, sizeof(*b));
+    qemu_spin_init(&b->lock);
+    seqlock_init(&b->sequence);
+}
+
+static inline
+struct qht_bucket *qht_map_to_bucket(struct qht_map *map, uint32_t hash)
+{
+    return &map->buckets[hash & (map->n_buckets - 1)];
+}
+
+/* acquire all bucket locks from a map */
+static void qht_map_lock_buckets(struct qht_map *map)
+{
+    size_t i;
+
+    for (i = 0; i < map->n_buckets; i++) {
+        struct qht_bucket *b = &map->buckets[i];
+
+        qemu_spin_lock(&b->lock);
+    }
+}
+
+static void qht_map_unlock_buckets(struct qht_map *map)
+{
+    size_t i;
+
+    for (i = 0; i < map->n_buckets; i++) {
+        struct qht_bucket *b = &map->buckets[i];
+
+        qemu_spin_unlock(&b->lock);
+    }
+}
+
+/*
+ * Call with at least a bucket lock held.
+ * @map should be the value read before acquiring the lock (or locks).
+ */
+static inline bool qht_map_is_stale__locked(struct qht *ht, struct qht_map *map)
+{
+    return map != ht->map;
+}
+
+/*
+ * Grab all bucket locks, and set @pmap after making sure the map isn't stale.
+ *
+ * Pairs with qht_map_unlock_buckets(), hence the pass-by-reference.
+ *
+ * Note: callers cannot have ht->lock held.
+ */
+static inline
+void qht_map_lock_buckets__no_stale(struct qht *ht, struct qht_map **pmap)
+{
+    struct qht_map *map;
+
+    map = atomic_rcu_read(&ht->map);
+    qht_map_lock_buckets(map);
+    if (likely(!qht_map_is_stale__locked(ht, map))) {
+        *pmap = map;
+        return;
+    }
+    qht_map_unlock_buckets(map);
+
+    /* we raced with a resize; acquire ht->lock to see the updated ht->map */
+    qemu_mutex_lock(&ht->lock);
+    map = ht->map;
+    qht_map_lock_buckets(map);
+    qemu_mutex_unlock(&ht->lock);
+    *pmap = map;
+    return;
+}
+
+/*
+ * Get a head bucket and lock it, making sure its parent map is not stale.
+ * @pmap is filled with a pointer to the bucket's parent map.
+ *
+ * Unlock with qemu_spin_unlock(&b->lock).
+ *
+ * Note: callers cannot have ht->lock held.
+ */
+static inline
+struct qht_bucket *qht_bucket_lock__no_stale(struct qht *ht, uint32_t hash,
+                                             struct qht_map **pmap)
+{
+    struct qht_bucket *b;
+    struct qht_map *map;
+
+    map = atomic_rcu_read(&ht->map);
+    b = qht_map_to_bucket(map, hash);
+
+    qemu_spin_lock(&b->lock);
+    if (likely(!qht_map_is_stale__locked(ht, map))) {
+        *pmap = map;
+        return b;
+    }
+    qemu_spin_unlock(&b->lock);
+
+    /* we raced with a resize; acquire ht->lock to see the updated ht->map */
+    qemu_mutex_lock(&ht->lock);
+    map = ht->map;
+    b = qht_map_to_bucket(map, hash);
+    qemu_spin_lock(&b->lock);
+    qemu_mutex_unlock(&ht->lock);
+    *pmap = map;
+    return b;
+}
+
+static inline bool qht_map_needs_resize(struct qht_map *map)
+{
+    return atomic_read(&map->n_added_buckets) > map->n_added_buckets_threshold;
+}
+
+static inline void qht_chain_destroy(struct qht_bucket *head)
+{
+    struct qht_bucket *curr = head->next;
+    struct qht_bucket *prev;
+
+    while (curr) {
+        prev = curr;
+        curr = curr->next;
+        qemu_vfree(prev);
+    }
+}
+
+/* pass only an orphan map */
+static void qht_map_destroy(struct qht_map *map)
+{
+    size_t i;
+
+    for (i = 0; i < map->n_buckets; i++) {
+        qht_chain_destroy(&map->buckets[i]);
+    }
+    qemu_vfree(map->buckets);
+    g_free(map);
+}
+
+static struct qht_map *qht_map_create(size_t n_buckets)
+{
+    struct qht_map *map;
+    size_t i;
+
+    map = g_malloc(sizeof(*map));
+    map->n_buckets = n_buckets;
+
+    map->n_added_buckets = 0;
+    map->n_added_buckets_threshold = n_buckets /
+        QHT_NR_ADDED_BUCKETS_THRESHOLD_DIV;
+
+    /* let tiny hash tables to at least add one non-head bucket */
+    if (unlikely(map->n_added_buckets_threshold == 0)) {
+        map->n_added_buckets_threshold = 1;
+    }
+
+    map->buckets = qemu_memalign(QHT_BUCKET_ALIGN,
+                                 sizeof(*map->buckets) * n_buckets);
+    for (i = 0; i < n_buckets; i++) {
+        qht_head_init(&map->buckets[i]);
+    }
+    return map;
+}
+
+void qht_init(struct qht *ht, size_t n_elems, unsigned int mode)
+{
+    struct qht_map *map;
+    size_t n_buckets = qht_elems_to_buckets(n_elems);
+
+    ht->mode = mode;
+    qemu_mutex_init(&ht->lock);
+    map = qht_map_create(n_buckets);
+    atomic_rcu_set(&ht->map, map);
+}
+
+/* call only when there are no readers/writers left */
+void qht_destroy(struct qht *ht)
+{
+    qht_map_destroy(ht->map);
+    memset(ht, 0, sizeof(*ht));
+}
+
+static void qht_bucket_reset__locked(struct qht_bucket *head)
+{
+    struct qht_bucket *b = head;
+    int i;
+
+    seqlock_write_begin(&head->sequence);
+    do {
+        for (i = 0; i < QHT_BUCKET_ENTRIES; i++) {
+            if (b->pointers[i] == NULL) {
+                goto done;
+            }
+            b->hashes[i] = 0;
+            atomic_set(&b->pointers[i], NULL);
+        }
+        b = b->next;
+    } while (b);
+ done:
+    seqlock_write_end(&head->sequence);
+}
+
+/* call with all bucket locks held */
+static void qht_map_reset__all_locked(struct qht_map *map)
+{
+    size_t i;
+
+    for (i = 0; i < map->n_buckets; i++) {
+        qht_bucket_reset__locked(&map->buckets[i]);
+    }
+    qht_map_debug__all_locked(map);
+}
+
+void qht_reset(struct qht *ht)
+{
+    struct qht_map *map;
+
+    qht_map_lock_buckets__no_stale(ht, &map);
+    qht_map_reset__all_locked(map);
+    qht_map_unlock_buckets(map);
+}
+
+bool qht_reset_size(struct qht *ht, size_t n_elems)
+{
+    struct qht_map *new;
+    struct qht_map *map;
+    size_t n_buckets;
+    bool resize = false;
+
+    n_buckets = qht_elems_to_buckets(n_elems);
+
+    qemu_mutex_lock(&ht->lock);
+    map = ht->map;
+    if (n_buckets != map->n_buckets) {
+        new = qht_map_create(n_buckets);
+        resize = true;
+    }
+
+    qht_map_lock_buckets(map);
+    qht_map_reset__all_locked(map);
+    if (resize) {
+        qht_do_resize(ht, new);
+    }
+    qht_map_unlock_buckets(map);
+    qemu_mutex_unlock(&ht->lock);
+
+    return resize;
+}
+
+static inline
+void *qht_do_lookup(struct qht_bucket *head, qht_lookup_func_t func,
+                    const void *userp, uint32_t hash)
+{
+    struct qht_bucket *b = head;
+    int i;
+
+    do {
+        for (i = 0; i < QHT_BUCKET_ENTRIES; i++) {
+            if (b->hashes[i] == hash) {
+                void *p = atomic_read(&b->pointers[i]);
+
+                if (likely(p) && likely(func(p, userp))) {
+                    return p;
+                }
+            }
+        }
+        b = atomic_rcu_read(&b->next);
+    } while (b);
+
+    return NULL;
+}
+
+static __attribute__((noinline))
+void *qht_lookup__slowpath(struct qht_bucket *b, qht_lookup_func_t func,
+                           const void *userp, uint32_t hash)
+{
+    unsigned int version;
+    void *ret;
+
+    do {
+        version = seqlock_read_begin(&b->sequence);
+        ret = qht_do_lookup(b, func, userp, hash);
+    } while (seqlock_read_retry(&b->sequence, version));
+    return ret;
+}
+
+void *qht_lookup(struct qht *ht, qht_lookup_func_t func, const void *userp,
+                 uint32_t hash)
+{
+    struct qht_bucket *b;
+    struct qht_map *map;
+    unsigned int version;
+    void *ret;
+
+    map = atomic_rcu_read(&ht->map);
+    b = qht_map_to_bucket(map, hash);
+
+    version = seqlock_read_begin(&b->sequence);
+    ret = qht_do_lookup(b, func, userp, hash);
+    if (likely(!seqlock_read_retry(&b->sequence, version))) {
+        return ret;
+    }
+    /*
+     * Removing the do/while from the fastpath gives a 4% perf. increase when
+     * running a 100%-lookup microbenchmark.
+     */
+    return qht_lookup__slowpath(b, func, userp, hash);
+}
+
+/* call with head->lock held */
+static bool qht_insert__locked(struct qht *ht, struct qht_map *map,
+                               struct qht_bucket *head, void *p, uint32_t hash,
+                               bool *needs_resize)
+{
+    struct qht_bucket *b = head;
+    struct qht_bucket *prev = NULL;
+    struct qht_bucket *new = NULL;
+    int i;
+
+    do {
+        for (i = 0; i < QHT_BUCKET_ENTRIES; i++) {
+            if (b->pointers[i]) {
+                if (unlikely(b->pointers[i] == p)) {
+                    return false;
+                }
+            } else {
+                goto found;
+            }
+        }
+        prev = b;
+        b = b->next;
+    } while (b);
+
+    b = qemu_memalign(QHT_BUCKET_ALIGN, sizeof(*b));
+    memset(b, 0, sizeof(*b));
+    new = b;
+    i = 0;
+    atomic_inc(&map->n_added_buckets);
+    if (unlikely(qht_map_needs_resize(map)) && needs_resize) {
+        *needs_resize = true;
+    }
+
+ found:
+    /* found an empty key: acquire the seqlock and write */
+    seqlock_write_begin(&head->sequence);
+    if (new) {
+        atomic_rcu_set(&prev->next, b);
+    }
+    b->hashes[i] = hash;
+    atomic_set(&b->pointers[i], p);
+    seqlock_write_end(&head->sequence);
+    return true;
+}
+
+static __attribute__((noinline)) void qht_grow_maybe(struct qht *ht)
+{
+    struct qht_map *map;
+
+    /*
+     * If the lock is taken it probably means there's an ongoing resize,
+     * so bail out.
+     */
+    if (qemu_mutex_trylock(&ht->lock)) {
+        return;
+    }
+    map = ht->map;
+    /* another thread might have just performed the resize we were after */
+    if (qht_map_needs_resize(map)) {
+        struct qht_map *new = qht_map_create(map->n_buckets * 2);
+
+        qht_map_lock_buckets(map);
+        qht_do_resize(ht, new);
+        qht_map_unlock_buckets(map);
+    }
+    qemu_mutex_unlock(&ht->lock);
+}
+
+bool qht_insert(struct qht *ht, void *p, uint32_t hash)
+{
+    struct qht_bucket *b;
+    struct qht_map *map;
+    bool needs_resize = false;
+    bool ret;
+
+    /* NULL pointers are not supported */
+    qht_debug_assert(p);
+
+    b = qht_bucket_lock__no_stale(ht, hash, &map);
+    ret = qht_insert__locked(ht, map, b, p, hash, &needs_resize);
+    qht_bucket_debug__locked(b);
+    qemu_spin_unlock(&b->lock);
+
+    if (unlikely(needs_resize) && ht->mode & QHT_MODE_AUTO_RESIZE) {
+        qht_grow_maybe(ht);
+    }
+    return ret;
+}
+
+static inline bool qht_entry_is_last(struct qht_bucket *b, int pos)
+{
+    if (pos == QHT_BUCKET_ENTRIES - 1) {
+        if (b->next == NULL) {
+            return true;
+        }
+        return b->next->pointers[0] == NULL;
+    }
+    return b->pointers[pos + 1] == NULL;
+}
+
+static void
+qht_entry_move(struct qht_bucket *to, int i, struct qht_bucket *from, int j)
+{
+    qht_debug_assert(!(to == from && i == j));
+    qht_debug_assert(to->pointers[i]);
+    qht_debug_assert(from->pointers[j]);
+
+    to->hashes[i] = from->hashes[j];
+    atomic_set(&to->pointers[i], from->pointers[j]);
+
+    from->hashes[j] = 0;
+    atomic_set(&from->pointers[j], NULL);
+}
+
+/*
+ * Find the last valid entry in @head, and swap it with @orig[pos], which has
+ * just been invalidated.
+ */
+static inline void qht_bucket_remove_entry(struct qht_bucket *orig, int pos)
+{
+    struct qht_bucket *b = orig;
+    struct qht_bucket *prev = NULL;
+    int i;
+
+    if (qht_entry_is_last(orig, pos)) {
+        orig->hashes[pos] = 0;
+        atomic_set(&orig->pointers[pos], NULL);
+        return;
+    }
+    do {
+        for (i = 0; i < QHT_BUCKET_ENTRIES; i++) {
+            if (b->pointers[i]) {
+                continue;
+            }
+            if (i > 0) {
+                return qht_entry_move(orig, pos, b, i - 1);
+            }
+            qht_debug_assert(prev);
+            return qht_entry_move(orig, pos, prev, QHT_BUCKET_ENTRIES - 1);
+        }
+        prev = b;
+        b = b->next;
+    } while (b);
+    /* no free entries other than orig[pos], so swap it with the last one */
+    qht_entry_move(orig, pos, prev, QHT_BUCKET_ENTRIES - 1);
+}
+
+/* call with b->lock held */
+static inline
+bool qht_remove__locked(struct qht_map *map, struct qht_bucket *head,
+                        const void *p, uint32_t hash)
+{
+    struct qht_bucket *b = head;
+    int i;
+
+    do {
+        for (i = 0; i < QHT_BUCKET_ENTRIES; i++) {
+            void *q = b->pointers[i];
+
+            if (unlikely(q == NULL)) {
+                return false;
+            }
+            if (q == p) {
+                qht_debug_assert(b->hashes[i] == hash);
+                seqlock_write_begin(&head->sequence);
+                qht_bucket_remove_entry(b, i);
+                seqlock_write_end(&head->sequence);
+                return true;
+            }
+        }
+        b = b->next;
+    } while (b);
+    return false;
+}
+
+bool qht_remove(struct qht *ht, const void *p, uint32_t hash)
+{
+    struct qht_bucket *b;
+    struct qht_map *map;
+    bool ret;
+
+    /* NULL pointers are not supported */
+    qht_debug_assert(p);
+
+    b = qht_bucket_lock__no_stale(ht, hash, &map);
+    ret = qht_remove__locked(map, b, p, hash);
+    qht_bucket_debug__locked(b);
+    qemu_spin_unlock(&b->lock);
+    return ret;
+}
+
+static inline void qht_bucket_iter(struct qht *ht, struct qht_bucket *b,
+                                   qht_iter_func_t func, void *userp)
+{
+    int i;
+
+    do {
+        for (i = 0; i < QHT_BUCKET_ENTRIES; i++) {
+            if (b->pointers[i] == NULL) {
+                return;
+            }
+            func(ht, b->pointers[i], b->hashes[i], userp);
+        }
+        b = b->next;
+    } while (b);
+}
+
+/* call with all of the map's locks held */
+static inline void qht_map_iter__all_locked(struct qht *ht, struct qht_map *map,
+                                            qht_iter_func_t func, void *userp)
+{
+    size_t i;
+
+    for (i = 0; i < map->n_buckets; i++) {
+        qht_bucket_iter(ht, &map->buckets[i], func, userp);
+    }
+}
+
+void qht_iter(struct qht *ht, qht_iter_func_t func, void *userp)
+{
+    struct qht_map *map;
+
+    map = atomic_rcu_read(&ht->map);
+    qht_map_lock_buckets(map);
+    /* Note: ht here is merely for carrying ht->mode; ht->map won't be read */
+    qht_map_iter__all_locked(ht, map, func, userp);
+    qht_map_unlock_buckets(map);
+}
+
+static void qht_map_copy(struct qht *ht, void *p, uint32_t hash, void *userp)
+{
+    struct qht_map *new = userp;
+    struct qht_bucket *b = qht_map_to_bucket(new, hash);
+
+    /* no need to acquire b->lock because no thread has seen this map yet */
+    qht_insert__locked(ht, new, b, p, hash, NULL);
+}
+
+/*
+ * Call with ht->lock and all bucket locks held.
+ *
+ * Creating the @new map here would add unnecessary delay while all the locks
+ * are held--holding up the bucket locks is particularly bad, since no writes
+ * can occur while these are held. Thus, we let callers create the new map,
+ * hopefully without the bucket locks held.
+ */
+static void qht_do_resize(struct qht *ht, struct qht_map *new)
+{
+    struct qht_map *old;
+
+    old = ht->map;
+    g_assert_cmpuint(new->n_buckets, !=, old->n_buckets);
+
+    qht_map_iter__all_locked(ht, old, qht_map_copy, new);
+    qht_map_debug__all_locked(new);
+
+    atomic_rcu_set(&ht->map, new);
+    call_rcu(old, qht_map_destroy, rcu);
+}
+
+bool qht_resize(struct qht *ht, size_t n_elems)
+{
+    size_t n_buckets = qht_elems_to_buckets(n_elems);
+    size_t ret = false;
+
+    qemu_mutex_lock(&ht->lock);
+    if (n_buckets != ht->map->n_buckets) {
+        struct qht_map *new;
+        struct qht_map *old = ht->map;
+
+        new = qht_map_create(n_buckets);
+        qht_map_lock_buckets(old);
+        qht_do_resize(ht, new);
+        qht_map_unlock_buckets(old);
+        ret = true;
+    }
+    qemu_mutex_unlock(&ht->lock);
+
+    return ret;
+}
+
+/* pass @stats to qht_statistics_destroy() when done */
+void qht_statistics_init(struct qht *ht, struct qht_stats *stats)
+{
+    struct qht_map *map;
+    int i;
+
+    map = atomic_rcu_read(&ht->map);
+
+    stats->head_buckets = map->n_buckets;
+    stats->used_head_buckets = 0;
+    stats->entries = 0;
+    qdist_init(&stats->chain);
+    qdist_init(&stats->occupancy);
+
+    for (i = 0; i < map->n_buckets; i++) {
+        struct qht_bucket *head = &map->buckets[i];
+        struct qht_bucket *b;
+        unsigned int version;
+        size_t buckets;
+        size_t entries;
+        int j;
+
+        do {
+            version = seqlock_read_begin(&head->sequence);
+            buckets = 0;
+            entries = 0;
+            b = head;
+            do {
+                for (j = 0; j < QHT_BUCKET_ENTRIES; j++) {
+                    if (atomic_read(&b->pointers[j]) == NULL) {
+                        break;
+                    }
+                    entries++;
+                }
+                buckets++;
+                b = atomic_rcu_read(&b->next);
+            } while (b);
+        } while (seqlock_read_retry(&head->sequence, version));
+
+        if (entries) {
+            qdist_inc(&stats->chain, buckets);
+            qdist_inc(&stats->occupancy,
+                      (double)entries / QHT_BUCKET_ENTRIES / buckets);
+            stats->used_head_buckets++;
+            stats->entries += entries;
+        } else {
+            qdist_inc(&stats->occupancy, 0);
+        }
+    }
+}
+
+void qht_statistics_destroy(struct qht_stats *stats)
+{
+    qdist_destroy(&stats->occupancy);
+    qdist_destroy(&stats->chain);
+}
author	Peter Maydell <peter.maydell@linaro.org>	2016-06-13 10:12:44 +0100
committer	Peter Maydell <peter.maydell@linaro.org>	2016-06-13 10:12:44 +0100
commit	da2fdd0bd1514a44309dd5be162ebfb6c166a716 (patch)
tree	6c7a1856e3889fd91bcaedd2c89f6cebb702e805
parent	a93c1bdf0bd4689287094ddb2aae3dc907da3535 (diff)
parent	329844d4bc3d5a11f1e63938d66f74c9584c7abc (diff)
download	qemu-da2fdd0bd1514a44309dd5be162ebfb6c166a716.tar.gz