summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2017-06-20 16:01:15 +0100
committerPeter Maydell <peter.maydell@linaro.org>2017-06-20 16:01:15 +0100
commit65a0e3e842df7f73ff2e4a61948f992a41e570a8 (patch)
treeb1364b41d6f6b270c589433478297e2a91289af6 /include
parent7e56accdaf35234b69c33c85e4a44a5d56325e53 (diff)
parent5b50bf77ce6e773b04a303a2912876c9a1bfca43 (diff)
downloadqemu-65a0e3e842df7f73ff2e4a61948f992a41e570a8.tar.gz
Merge remote-tracking branch 'remotes/famz/tags/docker-and-block-pull-request' into staging
# gpg: Signature made Fri 16 Jun 2017 01:18:46 BST # gpg: using RSA key 0xCA35624C6A9171C6 # gpg: Good signature from "Fam Zheng <famz@redhat.com>" # gpg: WARNING: This key is not certified with sufficiently trusted signatures! # gpg: It is not certain that the signature belongs to the owner. # Primary key fingerprint: 5003 7CB7 9706 0F76 F021 AD56 CA35 624C 6A91 71C6 * remotes/famz/tags/docker-and-block-pull-request: (23 commits) block: make accounting thread-safe block: split BlockAcctStats creation and setup block: introduce block_account_one_io block: protect modification of dirty bitmaps with a mutex migration/block: reset dirty bitmap before reading block: introduce dirty_bitmap_mutex block: protect tracked_requests and flush_queue with reqs_lock block: access write_gen with atomics block: use Stat64 for wr_highest_offset util: add stats64 module throttle-groups: protect throttled requests with a CoMutex throttle-groups: do not use qemu_co_enter_next throttle-groups: only start one coroutine from drained_begin block: access io_plugged with atomic ops block: access wakeup with atomic ops block: access serialising_in_flight with atomic ops block: access io_limits_disabled with atomic ops block: access quiesce_counter with atomic ops block: access copy_on_read with atomic ops docker: Add flex and bison to centos6 image ... Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Diffstat (limited to 'include')
-rw-r--r--include/block/accounting.h11
-rw-r--r--include/block/block.h5
-rw-r--r--include/block/block_int.h61
-rw-r--r--include/block/dirty-bitmap.h25
-rw-r--r--include/qemu/stats64.h193
-rw-r--r--include/sysemu/block-backend.h10
6 files changed, 266 insertions, 39 deletions
diff --git a/include/block/accounting.h b/include/block/accounting.h
index 20891639d5..b833d26d6c 100644
--- a/include/block/accounting.h
+++ b/include/block/accounting.h
@@ -26,8 +26,10 @@
#define BLOCK_ACCOUNTING_H
#include "qemu/timed-average.h"
+#include "qemu/thread.h"
typedef struct BlockAcctTimedStats BlockAcctTimedStats;
+typedef struct BlockAcctStats BlockAcctStats;
enum BlockAcctType {
BLOCK_ACCT_READ,
@@ -37,12 +39,14 @@ enum BlockAcctType {
};
struct BlockAcctTimedStats {
+ BlockAcctStats *stats;
TimedAverage latency[BLOCK_MAX_IOTYPE];
unsigned interval_length; /* in seconds */
QSLIST_ENTRY(BlockAcctTimedStats) entries;
};
-typedef struct BlockAcctStats {
+struct BlockAcctStats {
+ QemuMutex lock;
uint64_t nr_bytes[BLOCK_MAX_IOTYPE];
uint64_t nr_ops[BLOCK_MAX_IOTYPE];
uint64_t invalid_ops[BLOCK_MAX_IOTYPE];
@@ -53,7 +57,7 @@ typedef struct BlockAcctStats {
QSLIST_HEAD(, BlockAcctTimedStats) intervals;
bool account_invalid;
bool account_failed;
-} BlockAcctStats;
+};
typedef struct BlockAcctCookie {
int64_t bytes;
@@ -61,7 +65,8 @@ typedef struct BlockAcctCookie {
enum BlockAcctType type;
} BlockAcctCookie;
-void block_acct_init(BlockAcctStats *stats, bool account_invalid,
+void block_acct_init(BlockAcctStats *stats);
+void block_acct_setup(BlockAcctStats *stats, bool account_invalid,
bool account_failed);
void block_acct_cleanup(BlockAcctStats *stats);
void block_acct_add_interval(BlockAcctStats *stats, unsigned interval_length);
diff --git a/include/block/block.h b/include/block/block.h
index 9b355e92d8..a4f09df95a 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -402,7 +402,8 @@ void bdrv_drain_all(void);
* block_job_defer_to_main_loop for how to do it). \
*/ \
assert(!bs_->wakeup); \
- bs_->wakeup = true; \
+ /* Set bs->wakeup before evaluating cond. */ \
+ atomic_mb_set(&bs_->wakeup, true); \
while (busy_) { \
if ((cond)) { \
waited_ = busy_ = true; \
@@ -414,7 +415,7 @@ void bdrv_drain_all(void);
waited_ |= busy_; \
} \
} \
- bs_->wakeup = false; \
+ atomic_set(&bs_->wakeup, false); \
} \
waited_; })
diff --git a/include/block/block_int.h b/include/block/block_int.h
index cb78c4fa82..748970055e 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -29,6 +29,7 @@
#include "qemu/option.h"
#include "qemu/queue.h"
#include "qemu/coroutine.h"
+#include "qemu/stats64.h"
#include "qemu/timer.h"
#include "qapi-types.h"
#include "qemu/hbitmap.h"
@@ -595,11 +596,6 @@ struct BlockDriverState {
/* Protected by AioContext lock */
- /* If true, copy read backing sectors into image. Can be >1 if more
- * than one client has requested copy-on-read.
- */
- int copy_on_read;
-
/* If we are reading a disk image, give its size in sectors.
* Generally read-only; it is written to by load_snapshot and
* save_snaphost, but the block layer is quiescent during those.
@@ -609,34 +605,57 @@ struct BlockDriverState {
/* Callback before write request is processed */
NotifierWithReturnList before_write_notifiers;
- /* number of in-flight requests; overall and serialising */
- unsigned int in_flight;
- unsigned int serialising_in_flight;
+ /* threshold limit for writes, in bytes. "High water mark". */
+ uint64_t write_threshold_offset;
+ NotifierWithReturn write_threshold_notifier;
- bool wakeup;
+ /* Writing to the list requires the BQL _and_ the dirty_bitmap_mutex.
+ * Reading from the list can be done with either the BQL or the
+ * dirty_bitmap_mutex. Modifying a bitmap only requires
+ * dirty_bitmap_mutex. */
+ QemuMutex dirty_bitmap_mutex;
+ QLIST_HEAD(, BdrvDirtyBitmap) dirty_bitmaps;
/* Offset after the highest byte written to */
- uint64_t wr_highest_offset;
+ Stat64 wr_highest_offset;
- /* threshold limit for writes, in bytes. "High water mark". */
- uint64_t write_threshold_offset;
- NotifierWithReturn write_threshold_notifier;
+ /* If true, copy read backing sectors into image. Can be >1 if more
+ * than one client has requested copy-on-read. Accessed with atomic
+ * ops.
+ */
+ int copy_on_read;
- /* counter for nested bdrv_io_plug */
- unsigned io_plugged;
+ /* number of in-flight requests; overall and serialising.
+ * Accessed with atomic ops.
+ */
+ unsigned int in_flight;
+ unsigned int serialising_in_flight;
- QLIST_HEAD(, BdrvTrackedRequest) tracked_requests;
- CoQueue flush_queue; /* Serializing flush queue */
- bool active_flush_req; /* Flush request in flight? */
- unsigned int write_gen; /* Current data generation */
- unsigned int flushed_gen; /* Flushed write generation */
+ /* Internal to BDRV_POLL_WHILE and bdrv_wakeup. Accessed with atomic
+ * ops.
+ */
+ bool wakeup;
- QLIST_HEAD(, BdrvDirtyBitmap) dirty_bitmaps;
+ /* counter for nested bdrv_io_plug.
+ * Accessed with atomic ops.
+ */
+ unsigned io_plugged;
/* do we need to tell the quest if we have a volatile write cache? */
int enable_write_cache;
+ /* Accessed with atomic ops. */
int quiesce_counter;
+ unsigned int write_gen; /* Current data generation */
+
+ /* Protected by reqs_lock. */
+ CoMutex reqs_lock;
+ QLIST_HEAD(, BdrvTrackedRequest) tracked_requests;
+ CoQueue flush_queue; /* Serializing flush queue */
+ bool active_flush_req; /* Flush request in flight? */
+
+ /* Only read/written by whoever has set active_flush_req to true. */
+ unsigned int flushed_gen; /* Flushed write generation */
};
struct BlockBackendRootState {
diff --git a/include/block/dirty-bitmap.h b/include/block/dirty-bitmap.h
index 9dea14ba03..ad6558af56 100644
--- a/include/block/dirty-bitmap.h
+++ b/include/block/dirty-bitmap.h
@@ -36,8 +36,6 @@ bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap);
const char *bdrv_dirty_bitmap_name(const BdrvDirtyBitmap *bitmap);
int64_t bdrv_dirty_bitmap_size(const BdrvDirtyBitmap *bitmap);
DirtyBitmapStatus bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap);
-int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap,
- int64_t sector);
void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap,
int64_t cur_sector, int64_t nr_sectors);
void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap,
@@ -45,6 +43,9 @@ void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap,
int bdrv_dirty_bitmap_get_meta(BlockDriverState *bs,
BdrvDirtyBitmap *bitmap, int64_t sector,
int nb_sectors);
+int bdrv_dirty_bitmap_get_meta_locked(BlockDriverState *bs,
+ BdrvDirtyBitmap *bitmap, int64_t sector,
+ int nb_sectors);
void bdrv_dirty_bitmap_reset_meta(BlockDriverState *bs,
BdrvDirtyBitmap *bitmap, int64_t sector,
int nb_sectors);
@@ -52,11 +53,6 @@ BdrvDirtyBitmapIter *bdrv_dirty_meta_iter_new(BdrvDirtyBitmap *bitmap);
BdrvDirtyBitmapIter *bdrv_dirty_iter_new(BdrvDirtyBitmap *bitmap,
uint64_t first_sector);
void bdrv_dirty_iter_free(BdrvDirtyBitmapIter *iter);
-int64_t bdrv_dirty_iter_next(BdrvDirtyBitmapIter *iter);
-void bdrv_set_dirty_iter(BdrvDirtyBitmapIter *hbi, int64_t sector_num);
-int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap);
-int64_t bdrv_get_meta_dirty_count(BdrvDirtyBitmap *bitmap);
-void bdrv_dirty_bitmap_truncate(BlockDriverState *bs);
uint64_t bdrv_dirty_bitmap_serialization_size(const BdrvDirtyBitmap *bitmap,
uint64_t start, uint64_t count);
@@ -72,4 +68,19 @@ void bdrv_dirty_bitmap_deserialize_zeroes(BdrvDirtyBitmap *bitmap,
bool finish);
void bdrv_dirty_bitmap_deserialize_finish(BdrvDirtyBitmap *bitmap);
+/* Functions that require manual locking. */
+void bdrv_dirty_bitmap_lock(BdrvDirtyBitmap *bitmap);
+void bdrv_dirty_bitmap_unlock(BdrvDirtyBitmap *bitmap);
+int bdrv_get_dirty_locked(BlockDriverState *bs, BdrvDirtyBitmap *bitmap,
+ int64_t sector);
+void bdrv_set_dirty_bitmap_locked(BdrvDirtyBitmap *bitmap,
+ int64_t cur_sector, int64_t nr_sectors);
+void bdrv_reset_dirty_bitmap_locked(BdrvDirtyBitmap *bitmap,
+ int64_t cur_sector, int64_t nr_sectors);
+int64_t bdrv_dirty_iter_next(BdrvDirtyBitmapIter *iter);
+void bdrv_set_dirty_iter(BdrvDirtyBitmapIter *hbi, int64_t sector_num);
+int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap);
+int64_t bdrv_get_meta_dirty_count(BdrvDirtyBitmap *bitmap);
+void bdrv_dirty_bitmap_truncate(BlockDriverState *bs);
+
#endif
diff --git a/include/qemu/stats64.h b/include/qemu/stats64.h
new file mode 100644
index 0000000000..4a357b3e9d
--- /dev/null
+++ b/include/qemu/stats64.h
@@ -0,0 +1,193 @@
+/*
+ * Atomic operations on 64-bit quantities.
+ *
+ * Copyright (C) 2017 Red Hat, Inc.
+ *
+ * Author: Paolo Bonzini <pbonzini@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef QEMU_STATS64_H
+#define QEMU_STATS64_H 1
+
+#include "qemu/atomic.h"
+
+/* This provides atomic operations on 64-bit type, using a reader-writer
+ * spinlock on architectures that do not have 64-bit accesses. Even on
+ * those architectures, it tries hard not to take the lock.
+ */
+
+typedef struct Stat64 {
+#ifdef CONFIG_ATOMIC64
+ uint64_t value;
+#else
+ uint32_t low, high;
+ uint32_t lock;
+#endif
+} Stat64;
+
+#ifdef CONFIG_ATOMIC64
+static inline void stat64_init(Stat64 *s, uint64_t value)
+{
+ /* This is not guaranteed to be atomic! */
+ *s = (Stat64) { value };
+}
+
+static inline uint64_t stat64_get(const Stat64 *s)
+{
+ return atomic_read__nocheck(&s->value);
+}
+
+static inline void stat64_add(Stat64 *s, uint64_t value)
+{
+ atomic_add(&s->value, value);
+}
+
+static inline void stat64_min(Stat64 *s, uint64_t value)
+{
+ uint64_t orig = atomic_read__nocheck(&s->value);
+ while (orig > value) {
+ orig = atomic_cmpxchg__nocheck(&s->value, orig, value);
+ }
+}
+
+static inline void stat64_max(Stat64 *s, uint64_t value)
+{
+ uint64_t orig = atomic_read__nocheck(&s->value);
+ while (orig < value) {
+ orig = atomic_cmpxchg__nocheck(&s->value, orig, value);
+ }
+}
+#else
+uint64_t stat64_get(const Stat64 *s);
+bool stat64_min_slow(Stat64 *s, uint64_t value);
+bool stat64_max_slow(Stat64 *s, uint64_t value);
+bool stat64_add32_carry(Stat64 *s, uint32_t low, uint32_t high);
+
+static inline void stat64_init(Stat64 *s, uint64_t value)
+{
+ /* This is not guaranteed to be atomic! */
+ *s = (Stat64) { .low = value, .high = value >> 32, .lock = 0 };
+}
+
+static inline void stat64_add(Stat64 *s, uint64_t value)
+{
+ uint32_t low, high;
+ high = value >> 32;
+ low = (uint32_t) value;
+ if (!low) {
+ if (high) {
+ atomic_add(&s->high, high);
+ }
+ return;
+ }
+
+ for (;;) {
+ uint32_t orig = s->low;
+ uint32_t result = orig + low;
+ uint32_t old;
+
+ if (result < low || high) {
+ /* If the high part is affected, take the lock. */
+ if (stat64_add32_carry(s, low, high)) {
+ return;
+ }
+ continue;
+ }
+
+ /* No carry, try with a 32-bit cmpxchg. The result is independent of
+ * the high 32 bits, so it can race just fine with stat64_add32_carry
+ * and even stat64_get!
+ */
+ old = atomic_cmpxchg(&s->low, orig, result);
+ if (orig == old) {
+ return;
+ }
+ }
+}
+
+static inline void stat64_min(Stat64 *s, uint64_t value)
+{
+ uint32_t low, high;
+ uint32_t orig_low, orig_high;
+
+ high = value >> 32;
+ low = (uint32_t) value;
+ do {
+ orig_high = atomic_read(&s->high);
+ if (orig_high < high) {
+ return;
+ }
+
+ if (orig_high == high) {
+ /* High 32 bits are equal. Read low after high, otherwise we
+ * can get a false positive (e.g. 0x1235,0x0000 changes to
+ * 0x1234,0x8000 and we read it as 0x1234,0x0000). Pairs with
+ * the write barrier in stat64_min_slow.
+ */
+ smp_rmb();
+ orig_low = atomic_read(&s->low);
+ if (orig_low <= low) {
+ return;
+ }
+
+ /* See if we were lucky and a writer raced against us. The
+ * barrier is theoretically unnecessary, but if we remove it
+ * we may miss being lucky.
+ */
+ smp_rmb();
+ orig_high = atomic_read(&s->high);
+ if (orig_high < high) {
+ return;
+ }
+ }
+
+ /* If the value changes in any way, we have to take the lock. */
+ } while (!stat64_min_slow(s, value));
+}
+
+static inline void stat64_max(Stat64 *s, uint64_t value)
+{
+ uint32_t low, high;
+ uint32_t orig_low, orig_high;
+
+ high = value >> 32;
+ low = (uint32_t) value;
+ do {
+ orig_high = atomic_read(&s->high);
+ if (orig_high > high) {
+ return;
+ }
+
+ if (orig_high == high) {
+ /* High 32 bits are equal. Read low after high, otherwise we
+ * can get a false positive (e.g. 0x1234,0x8000 changes to
+ * 0x1235,0x0000 and we read it as 0x1235,0x8000). Pairs with
+ * the write barrier in stat64_max_slow.
+ */
+ smp_rmb();
+ orig_low = atomic_read(&s->low);
+ if (orig_low >= low) {
+ return;
+ }
+
+ /* See if we were lucky and a writer raced against us. The
+ * barrier is theoretically unnecessary, but if we remove it
+ * we may miss being lucky.
+ */
+ smp_rmb();
+ orig_high = atomic_read(&s->high);
+ if (orig_high > high) {
+ return;
+ }
+ }
+
+ /* If the value changes in any way, we have to take the lock. */
+ } while (!stat64_max_slow(s, value));
+}
+
+#endif
+
+#endif
diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h
index 840ad6134c..999eb2333a 100644
--- a/include/sysemu/block-backend.h
+++ b/include/sysemu/block-backend.h
@@ -72,15 +72,13 @@ typedef struct BlockDevOps {
* fields that must be public. This is in particular for QLIST_ENTRY() and
* friends so that BlockBackends can be kept in lists outside block-backend.c */
typedef struct BlockBackendPublic {
- /* I/O throttling has its own locking, but also some fields are
- * protected by the AioContext lock.
- */
-
- /* Protected by AioContext lock. */
+ /* throttled_reqs_lock protects the CoQueues for throttled requests. */
+ CoMutex throttled_reqs_lock;
CoQueue throttled_reqs[2];
/* Nonzero if the I/O limits are currently being ignored; generally
- * it is zero. */
+ * it is zero. Accessed with atomic operations.
+ */
unsigned int io_limits_disabled;
/* The following fields are protected by the ThrottleGroup lock.