diff options
author | Peter Maydell <peter.maydell@linaro.org> | 2017-06-20 16:01:15 +0100 |
---|---|---|
committer | Peter Maydell <peter.maydell@linaro.org> | 2017-06-20 16:01:15 +0100 |
commit | 65a0e3e842df7f73ff2e4a61948f992a41e570a8 (patch) | |
tree | b1364b41d6f6b270c589433478297e2a91289af6 /include | |
parent | 7e56accdaf35234b69c33c85e4a44a5d56325e53 (diff) | |
parent | 5b50bf77ce6e773b04a303a2912876c9a1bfca43 (diff) | |
download | qemu-65a0e3e842df7f73ff2e4a61948f992a41e570a8.tar.gz |
Merge remote-tracking branch 'remotes/famz/tags/docker-and-block-pull-request' into staging
# gpg: Signature made Fri 16 Jun 2017 01:18:46 BST
# gpg: using RSA key 0xCA35624C6A9171C6
# gpg: Good signature from "Fam Zheng <famz@redhat.com>"
# gpg: WARNING: This key is not certified with sufficiently trusted signatures!
# gpg: It is not certain that the signature belongs to the owner.
# Primary key fingerprint: 5003 7CB7 9706 0F76 F021 AD56 CA35 624C 6A91 71C6
* remotes/famz/tags/docker-and-block-pull-request: (23 commits)
block: make accounting thread-safe
block: split BlockAcctStats creation and setup
block: introduce block_account_one_io
block: protect modification of dirty bitmaps with a mutex
migration/block: reset dirty bitmap before reading
block: introduce dirty_bitmap_mutex
block: protect tracked_requests and flush_queue with reqs_lock
block: access write_gen with atomics
block: use Stat64 for wr_highest_offset
util: add stats64 module
throttle-groups: protect throttled requests with a CoMutex
throttle-groups: do not use qemu_co_enter_next
throttle-groups: only start one coroutine from drained_begin
block: access io_plugged with atomic ops
block: access wakeup with atomic ops
block: access serialising_in_flight with atomic ops
block: access io_limits_disabled with atomic ops
block: access quiesce_counter with atomic ops
block: access copy_on_read with atomic ops
docker: Add flex and bison to centos6 image
...
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Diffstat (limited to 'include')
-rw-r--r-- | include/block/accounting.h | 11 | ||||
-rw-r--r-- | include/block/block.h | 5 | ||||
-rw-r--r-- | include/block/block_int.h | 61 | ||||
-rw-r--r-- | include/block/dirty-bitmap.h | 25 | ||||
-rw-r--r-- | include/qemu/stats64.h | 193 | ||||
-rw-r--r-- | include/sysemu/block-backend.h | 10 |
6 files changed, 266 insertions, 39 deletions
diff --git a/include/block/accounting.h b/include/block/accounting.h index 20891639d5..b833d26d6c 100644 --- a/include/block/accounting.h +++ b/include/block/accounting.h @@ -26,8 +26,10 @@ #define BLOCK_ACCOUNTING_H #include "qemu/timed-average.h" +#include "qemu/thread.h" typedef struct BlockAcctTimedStats BlockAcctTimedStats; +typedef struct BlockAcctStats BlockAcctStats; enum BlockAcctType { BLOCK_ACCT_READ, @@ -37,12 +39,14 @@ enum BlockAcctType { }; struct BlockAcctTimedStats { + BlockAcctStats *stats; TimedAverage latency[BLOCK_MAX_IOTYPE]; unsigned interval_length; /* in seconds */ QSLIST_ENTRY(BlockAcctTimedStats) entries; }; -typedef struct BlockAcctStats { +struct BlockAcctStats { + QemuMutex lock; uint64_t nr_bytes[BLOCK_MAX_IOTYPE]; uint64_t nr_ops[BLOCK_MAX_IOTYPE]; uint64_t invalid_ops[BLOCK_MAX_IOTYPE]; @@ -53,7 +57,7 @@ typedef struct BlockAcctStats { QSLIST_HEAD(, BlockAcctTimedStats) intervals; bool account_invalid; bool account_failed; -} BlockAcctStats; +}; typedef struct BlockAcctCookie { int64_t bytes; @@ -61,7 +65,8 @@ typedef struct BlockAcctCookie { enum BlockAcctType type; } BlockAcctCookie; -void block_acct_init(BlockAcctStats *stats, bool account_invalid, +void block_acct_init(BlockAcctStats *stats); +void block_acct_setup(BlockAcctStats *stats, bool account_invalid, bool account_failed); void block_acct_cleanup(BlockAcctStats *stats); void block_acct_add_interval(BlockAcctStats *stats, unsigned interval_length); diff --git a/include/block/block.h b/include/block/block.h index 9b355e92d8..a4f09df95a 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -402,7 +402,8 @@ void bdrv_drain_all(void); * block_job_defer_to_main_loop for how to do it). \ */ \ assert(!bs_->wakeup); \ - bs_->wakeup = true; \ + /* Set bs->wakeup before evaluating cond. */ \ + atomic_mb_set(&bs_->wakeup, true); \ while (busy_) { \ if ((cond)) { \ waited_ = busy_ = true; \ @@ -414,7 +415,7 @@ void bdrv_drain_all(void); waited_ |= busy_; \ } \ } \ - bs_->wakeup = false; \ + atomic_set(&bs_->wakeup, false); \ } \ waited_; }) diff --git a/include/block/block_int.h b/include/block/block_int.h index cb78c4fa82..748970055e 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -29,6 +29,7 @@ #include "qemu/option.h" #include "qemu/queue.h" #include "qemu/coroutine.h" +#include "qemu/stats64.h" #include "qemu/timer.h" #include "qapi-types.h" #include "qemu/hbitmap.h" @@ -595,11 +596,6 @@ struct BlockDriverState { /* Protected by AioContext lock */ - /* If true, copy read backing sectors into image. Can be >1 if more - * than one client has requested copy-on-read. - */ - int copy_on_read; - /* If we are reading a disk image, give its size in sectors. * Generally read-only; it is written to by load_snapshot and * save_snaphost, but the block layer is quiescent during those. @@ -609,34 +605,57 @@ struct BlockDriverState { /* Callback before write request is processed */ NotifierWithReturnList before_write_notifiers; - /* number of in-flight requests; overall and serialising */ - unsigned int in_flight; - unsigned int serialising_in_flight; + /* threshold limit for writes, in bytes. "High water mark". */ + uint64_t write_threshold_offset; + NotifierWithReturn write_threshold_notifier; - bool wakeup; + /* Writing to the list requires the BQL _and_ the dirty_bitmap_mutex. + * Reading from the list can be done with either the BQL or the + * dirty_bitmap_mutex. Modifying a bitmap only requires + * dirty_bitmap_mutex. */ + QemuMutex dirty_bitmap_mutex; + QLIST_HEAD(, BdrvDirtyBitmap) dirty_bitmaps; /* Offset after the highest byte written to */ - uint64_t wr_highest_offset; + Stat64 wr_highest_offset; - /* threshold limit for writes, in bytes. "High water mark". */ - uint64_t write_threshold_offset; - NotifierWithReturn write_threshold_notifier; + /* If true, copy read backing sectors into image. Can be >1 if more + * than one client has requested copy-on-read. Accessed with atomic + * ops. + */ + int copy_on_read; - /* counter for nested bdrv_io_plug */ - unsigned io_plugged; + /* number of in-flight requests; overall and serialising. + * Accessed with atomic ops. + */ + unsigned int in_flight; + unsigned int serialising_in_flight; - QLIST_HEAD(, BdrvTrackedRequest) tracked_requests; - CoQueue flush_queue; /* Serializing flush queue */ - bool active_flush_req; /* Flush request in flight? */ - unsigned int write_gen; /* Current data generation */ - unsigned int flushed_gen; /* Flushed write generation */ + /* Internal to BDRV_POLL_WHILE and bdrv_wakeup. Accessed with atomic + * ops. + */ + bool wakeup; - QLIST_HEAD(, BdrvDirtyBitmap) dirty_bitmaps; + /* counter for nested bdrv_io_plug. + * Accessed with atomic ops. + */ + unsigned io_plugged; /* do we need to tell the quest if we have a volatile write cache? */ int enable_write_cache; + /* Accessed with atomic ops. */ int quiesce_counter; + unsigned int write_gen; /* Current data generation */ + + /* Protected by reqs_lock. */ + CoMutex reqs_lock; + QLIST_HEAD(, BdrvTrackedRequest) tracked_requests; + CoQueue flush_queue; /* Serializing flush queue */ + bool active_flush_req; /* Flush request in flight? */ + + /* Only read/written by whoever has set active_flush_req to true. */ + unsigned int flushed_gen; /* Flushed write generation */ }; struct BlockBackendRootState { diff --git a/include/block/dirty-bitmap.h b/include/block/dirty-bitmap.h index 9dea14ba03..ad6558af56 100644 --- a/include/block/dirty-bitmap.h +++ b/include/block/dirty-bitmap.h @@ -36,8 +36,6 @@ bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap); const char *bdrv_dirty_bitmap_name(const BdrvDirtyBitmap *bitmap); int64_t bdrv_dirty_bitmap_size(const BdrvDirtyBitmap *bitmap); DirtyBitmapStatus bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap); -int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, - int64_t sector); void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap, int64_t cur_sector, int64_t nr_sectors); void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap, @@ -45,6 +43,9 @@ void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap, int bdrv_dirty_bitmap_get_meta(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector, int nb_sectors); +int bdrv_dirty_bitmap_get_meta_locked(BlockDriverState *bs, + BdrvDirtyBitmap *bitmap, int64_t sector, + int nb_sectors); void bdrv_dirty_bitmap_reset_meta(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector, int nb_sectors); @@ -52,11 +53,6 @@ BdrvDirtyBitmapIter *bdrv_dirty_meta_iter_new(BdrvDirtyBitmap *bitmap); BdrvDirtyBitmapIter *bdrv_dirty_iter_new(BdrvDirtyBitmap *bitmap, uint64_t first_sector); void bdrv_dirty_iter_free(BdrvDirtyBitmapIter *iter); -int64_t bdrv_dirty_iter_next(BdrvDirtyBitmapIter *iter); -void bdrv_set_dirty_iter(BdrvDirtyBitmapIter *hbi, int64_t sector_num); -int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap); -int64_t bdrv_get_meta_dirty_count(BdrvDirtyBitmap *bitmap); -void bdrv_dirty_bitmap_truncate(BlockDriverState *bs); uint64_t bdrv_dirty_bitmap_serialization_size(const BdrvDirtyBitmap *bitmap, uint64_t start, uint64_t count); @@ -72,4 +68,19 @@ void bdrv_dirty_bitmap_deserialize_zeroes(BdrvDirtyBitmap *bitmap, bool finish); void bdrv_dirty_bitmap_deserialize_finish(BdrvDirtyBitmap *bitmap); +/* Functions that require manual locking. */ +void bdrv_dirty_bitmap_lock(BdrvDirtyBitmap *bitmap); +void bdrv_dirty_bitmap_unlock(BdrvDirtyBitmap *bitmap); +int bdrv_get_dirty_locked(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, + int64_t sector); +void bdrv_set_dirty_bitmap_locked(BdrvDirtyBitmap *bitmap, + int64_t cur_sector, int64_t nr_sectors); +void bdrv_reset_dirty_bitmap_locked(BdrvDirtyBitmap *bitmap, + int64_t cur_sector, int64_t nr_sectors); +int64_t bdrv_dirty_iter_next(BdrvDirtyBitmapIter *iter); +void bdrv_set_dirty_iter(BdrvDirtyBitmapIter *hbi, int64_t sector_num); +int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap); +int64_t bdrv_get_meta_dirty_count(BdrvDirtyBitmap *bitmap); +void bdrv_dirty_bitmap_truncate(BlockDriverState *bs); + #endif diff --git a/include/qemu/stats64.h b/include/qemu/stats64.h new file mode 100644 index 0000000000..4a357b3e9d --- /dev/null +++ b/include/qemu/stats64.h @@ -0,0 +1,193 @@ +/* + * Atomic operations on 64-bit quantities. + * + * Copyright (C) 2017 Red Hat, Inc. + * + * Author: Paolo Bonzini <pbonzini@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef QEMU_STATS64_H +#define QEMU_STATS64_H 1 + +#include "qemu/atomic.h" + +/* This provides atomic operations on 64-bit type, using a reader-writer + * spinlock on architectures that do not have 64-bit accesses. Even on + * those architectures, it tries hard not to take the lock. + */ + +typedef struct Stat64 { +#ifdef CONFIG_ATOMIC64 + uint64_t value; +#else + uint32_t low, high; + uint32_t lock; +#endif +} Stat64; + +#ifdef CONFIG_ATOMIC64 +static inline void stat64_init(Stat64 *s, uint64_t value) +{ + /* This is not guaranteed to be atomic! */ + *s = (Stat64) { value }; +} + +static inline uint64_t stat64_get(const Stat64 *s) +{ + return atomic_read__nocheck(&s->value); +} + +static inline void stat64_add(Stat64 *s, uint64_t value) +{ + atomic_add(&s->value, value); +} + +static inline void stat64_min(Stat64 *s, uint64_t value) +{ + uint64_t orig = atomic_read__nocheck(&s->value); + while (orig > value) { + orig = atomic_cmpxchg__nocheck(&s->value, orig, value); + } +} + +static inline void stat64_max(Stat64 *s, uint64_t value) +{ + uint64_t orig = atomic_read__nocheck(&s->value); + while (orig < value) { + orig = atomic_cmpxchg__nocheck(&s->value, orig, value); + } +} +#else +uint64_t stat64_get(const Stat64 *s); +bool stat64_min_slow(Stat64 *s, uint64_t value); +bool stat64_max_slow(Stat64 *s, uint64_t value); +bool stat64_add32_carry(Stat64 *s, uint32_t low, uint32_t high); + +static inline void stat64_init(Stat64 *s, uint64_t value) +{ + /* This is not guaranteed to be atomic! */ + *s = (Stat64) { .low = value, .high = value >> 32, .lock = 0 }; +} + +static inline void stat64_add(Stat64 *s, uint64_t value) +{ + uint32_t low, high; + high = value >> 32; + low = (uint32_t) value; + if (!low) { + if (high) { + atomic_add(&s->high, high); + } + return; + } + + for (;;) { + uint32_t orig = s->low; + uint32_t result = orig + low; + uint32_t old; + + if (result < low || high) { + /* If the high part is affected, take the lock. */ + if (stat64_add32_carry(s, low, high)) { + return; + } + continue; + } + + /* No carry, try with a 32-bit cmpxchg. The result is independent of + * the high 32 bits, so it can race just fine with stat64_add32_carry + * and even stat64_get! + */ + old = atomic_cmpxchg(&s->low, orig, result); + if (orig == old) { + return; + } + } +} + +static inline void stat64_min(Stat64 *s, uint64_t value) +{ + uint32_t low, high; + uint32_t orig_low, orig_high; + + high = value >> 32; + low = (uint32_t) value; + do { + orig_high = atomic_read(&s->high); + if (orig_high < high) { + return; + } + + if (orig_high == high) { + /* High 32 bits are equal. Read low after high, otherwise we + * can get a false positive (e.g. 0x1235,0x0000 changes to + * 0x1234,0x8000 and we read it as 0x1234,0x0000). Pairs with + * the write barrier in stat64_min_slow. + */ + smp_rmb(); + orig_low = atomic_read(&s->low); + if (orig_low <= low) { + return; + } + + /* See if we were lucky and a writer raced against us. The + * barrier is theoretically unnecessary, but if we remove it + * we may miss being lucky. + */ + smp_rmb(); + orig_high = atomic_read(&s->high); + if (orig_high < high) { + return; + } + } + + /* If the value changes in any way, we have to take the lock. */ + } while (!stat64_min_slow(s, value)); +} + +static inline void stat64_max(Stat64 *s, uint64_t value) +{ + uint32_t low, high; + uint32_t orig_low, orig_high; + + high = value >> 32; + low = (uint32_t) value; + do { + orig_high = atomic_read(&s->high); + if (orig_high > high) { + return; + } + + if (orig_high == high) { + /* High 32 bits are equal. Read low after high, otherwise we + * can get a false positive (e.g. 0x1234,0x8000 changes to + * 0x1235,0x0000 and we read it as 0x1235,0x8000). Pairs with + * the write barrier in stat64_max_slow. + */ + smp_rmb(); + orig_low = atomic_read(&s->low); + if (orig_low >= low) { + return; + } + + /* See if we were lucky and a writer raced against us. The + * barrier is theoretically unnecessary, but if we remove it + * we may miss being lucky. + */ + smp_rmb(); + orig_high = atomic_read(&s->high); + if (orig_high > high) { + return; + } + } + + /* If the value changes in any way, we have to take the lock. */ + } while (!stat64_max_slow(s, value)); +} + +#endif + +#endif diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h index 840ad6134c..999eb2333a 100644 --- a/include/sysemu/block-backend.h +++ b/include/sysemu/block-backend.h @@ -72,15 +72,13 @@ typedef struct BlockDevOps { * fields that must be public. This is in particular for QLIST_ENTRY() and * friends so that BlockBackends can be kept in lists outside block-backend.c */ typedef struct BlockBackendPublic { - /* I/O throttling has its own locking, but also some fields are - * protected by the AioContext lock. - */ - - /* Protected by AioContext lock. */ + /* throttled_reqs_lock protects the CoQueues for throttled requests. */ + CoMutex throttled_reqs_lock; CoQueue throttled_reqs[2]; /* Nonzero if the I/O limits are currently being ignored; generally - * it is zero. */ + * it is zero. Accessed with atomic operations. + */ unsigned int io_limits_disabled; /* The following fields are protected by the ThrottleGroup lock. |