summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--audio/audio.c11
-rw-r--r--audio/audio.h5
-rw-r--r--audio/mixeng.c32
-rw-r--r--audio/sdlaudio.c48
-rw-r--r--block.c583
-rw-r--r--block/backup.c22
-rw-r--r--block/blkdebug.c2
-rwxr-xr-xblock/blkreplay.c1
-rw-r--r--block/blkverify.c1
-rw-r--r--block/block-backend.c116
-rw-r--r--block/bochs.c1
-rw-r--r--block/cloop.c1
-rw-r--r--block/commit.c176
-rw-r--r--block/crypto.c1
-rw-r--r--block/dmg.c1
-rw-r--r--block/io.c41
-rw-r--r--block/mirror.c237
-rw-r--r--block/parallels.c4
-rw-r--r--block/qcow.c4
-rw-r--r--block/qcow2.c19
-rw-r--r--block/qed.c4
-rw-r--r--block/quorum.c11
-rw-r--r--block/raw-format.c1
-rw-r--r--block/rbd.c553
-rw-r--r--block/replication.c3
-rw-r--r--block/sheepdog.c2
-rw-r--r--block/stream.c47
-rw-r--r--block/vdi.c4
-rw-r--r--block/vhdx.c4
-rw-r--r--block/vmdk.c7
-rw-r--r--block/vpc.c4
-rw-r--r--block/vvfat.c24
-rw-r--r--blockdev.c74
-rw-r--r--blockjob.c62
-rw-r--r--docs/mach-virt-graphical.cfg281
-rw-r--r--docs/mach-virt-serial.cfg243
-rw-r--r--docs/migration.txt71
-rw-r--r--docs/q35-chipset.cfg152
-rw-r--r--docs/q35-emulated.cfg288
-rw-r--r--docs/q35-virtio-graphical.cfg248
-rw-r--r--docs/q35-virtio-serial.cfg193
-rw-r--r--docs/replay.txt7
-rw-r--r--exec.c83
-rw-r--r--hmp.c33
-rw-r--r--hw/acpi/pcihp.c11
-rw-r--r--hw/acpi/piix4.c2
-rw-r--r--hw/block/block.c24
-rw-r--r--hw/block/fdc.c28
-rw-r--r--hw/block/m25p80.c8
-rw-r--r--hw/block/nand.c7
-rw-r--r--hw/block/nvme.c8
-rw-r--r--hw/block/onenand.c7
-rw-r--r--hw/block/pflash_cfi01.c18
-rw-r--r--hw/block/pflash_cfi02.c19
-rw-r--r--hw/block/virtio-blk.c8
-rw-r--r--hw/core/bus.c2
-rw-r--r--hw/core/ptimer.c8
-rw-r--r--hw/core/qdev-properties-system.c9
-rw-r--r--hw/core/qdev.c7
-rw-r--r--hw/i386/acpi-build.c4
-rw-r--r--hw/ide/core.c2
-rw-r--r--hw/ide/qdev.c9
-rw-r--r--hw/intc/xics.c461
-rw-r--r--hw/intc/xics_kvm.c184
-rw-r--r--hw/intc/xics_spapr.c128
-rw-r--r--hw/nvram/spapr_nvram.c8
-rw-r--r--hw/pci/pci.c28
-rw-r--r--hw/ppc/spapr.c196
-rw-r--r--hw/ppc/spapr_cpu_core.c24
-rw-r--r--hw/ppc/spapr_events.c10
-rw-r--r--hw/ppc/spapr_hcall.c89
-rw-r--r--hw/ppc/spapr_pci.c312
-rw-r--r--hw/ppc/spapr_vio.c2
-rw-r--r--hw/scsi/scsi-disk.c12
-rw-r--r--hw/sd/sd.c8
-rw-r--r--hw/usb/bus.c19
-rw-r--r--hw/usb/dev-storage.c22
-rw-r--r--hw/usb/dev-uas.c2
-rw-r--r--include/block/block.h46
-rw-r--r--include/block/block_int.h126
-rw-r--r--include/block/blockjob.h14
-rw-r--r--include/block/blockjob_int.h4
-rw-r--r--include/exec/cpu-common.h2
-rw-r--r--include/glib-compat.h21
-rw-r--r--include/hw/block/block.h8
-rw-r--r--include/hw/pci-host/spapr.h2
-rw-r--r--include/hw/pci/pci.h4
-rw-r--r--include/hw/pci/pci_ids.h112
-rw-r--r--include/hw/ppc/spapr.h5
-rw-r--r--include/hw/ppc/spapr_vio.h2
-rw-r--r--include/hw/ppc/xics.h97
-rw-r--r--include/hw/ptimer.h1
-rw-r--r--include/migration/migration.h6
-rw-r--r--include/migration/postcopy-ram.h13
-rw-r--r--include/migration/vmstate.h4
-rw-r--r--include/qemu-io.h1
-rw-r--r--include/qemu/timer.h5
-rw-r--r--include/sysemu/block-backend.h9
-rw-r--r--include/sysemu/replay.h7
-rw-r--r--migration/block.c21
-rw-r--r--migration/migration.c36
-rw-r--r--migration/postcopy-ram.c142
-rw-r--r--migration/ram.c109
-rw-r--r--migration/savevm.c38
-rw-r--r--migration/trace-events2
-rw-r--r--migration/vmstate.c97
-rw-r--r--monitor.c4
-rw-r--r--nbd/server.c16
-rw-r--r--pc-bios/bios-256k.binbin262144 -> 262144 bytes
-rw-r--r--pc-bios/bios.binbin131072 -> 131072 bytes
-rw-r--r--pc-bios/openbios-ppcbin750840 -> 750840 bytes
-rw-r--r--pc-bios/openbios-sparc32bin382048 -> 382048 bytes
-rw-r--r--pc-bios/openbios-sparc64bin1593408 -> 1593408 bytes
-rw-r--r--pc-bios/vgabios-cirrus.binbin38400 -> 38400 bytes
-rw-r--r--pc-bios/vgabios-qxl.binbin38912 -> 38912 bytes
-rw-r--r--pc-bios/vgabios-stdvga.binbin38912 -> 38912 bytes
-rw-r--r--pc-bios/vgabios-virtio.binbin38912 -> 38912 bytes
-rw-r--r--pc-bios/vgabios-vmware.binbin38912 -> 38912 bytes
-rw-r--r--pc-bios/vgabios.binbin38400 -> 38400 bytes
-rw-r--r--qapi-schema.json9
-rw-r--r--qapi/block-core.json78
-rw-r--r--qdev-monitor.c9
-rw-r--r--qemu-img-cmds.hx4
-rw-r--r--qemu-img.c334
-rw-r--r--qemu-img.texi16
-rw-r--r--qemu-io-cmds.c28
-rw-r--r--qtest.c1
-rw-r--r--replay/Makefile.objs1
-rw-r--r--replay/replay-audio.c79
-rw-r--r--replay/replay-internal.h4
m---------roms/openbios0
m---------roms/seabios0
-rwxr-xr-xscripts/vmstate-static-checker.py5
-rw-r--r--stubs/vmstate.c6
-rw-r--r--target/i386/cpu-qom.h8
-rw-r--r--target/i386/cpu.c455
-rw-r--r--target/i386/cpu.h2
-rw-r--r--target/ppc/Makefile.objs5
-rw-r--r--target/ppc/arch_dump.c154
-rw-r--r--target/ppc/cpu.c47
-rw-r--r--target/ppc/cpu.h50
-rw-r--r--target/ppc/int_helper.c34
-rw-r--r--target/ppc/kvm.c128
-rw-r--r--target/ppc/kvm_ppc.h20
-rw-r--r--target/ppc/machine.c5
-rw-r--r--target/ppc/misc_helper.c8
-rw-r--r--target/ppc/mmu-hash32.c14
-rw-r--r--target/ppc/mmu-hash32.h34
-rw-r--r--target/ppc/mmu-hash64.c193
-rw-r--r--target/ppc/mmu-hash64.h65
-rw-r--r--target/ppc/mmu_helper.c51
-rw-r--r--target/ppc/translate.c115
-rw-r--r--target/ppc/translate_init.c39
-rw-r--r--target/sparc/translate.c27
-rw-r--r--tcg/aarch64/tcg-target.inc.c4
-rw-r--r--tests/bios-tables-test.c2
-rw-r--r--tests/e1000-test.c1
-rw-r--r--tests/e1000e-test.c6
-rw-r--r--tests/eepro100-test.c1
-rw-r--r--tests/endianness-test.c3
-rw-r--r--tests/hd-geo-test.c53
-rw-r--r--tests/i440fx-test.c5
-rw-r--r--tests/ide-test.c12
-rw-r--r--tests/ipmi-bt-test.c1
-rw-r--r--tests/ipmi-kcs-test.c1
-rw-r--r--tests/libqos/usb.c6
-rw-r--r--tests/libqos/usb.h1
-rw-r--r--tests/libqos/virtio-pci.c38
-rw-r--r--tests/libqos/virtio-pci.h6
-rw-r--r--tests/libqtest.c10
-rw-r--r--tests/postcopy-test.c2
-rw-r--r--tests/ptimer-test-stubs.c5
-rw-r--r--tests/ptimer-test.c122
-rw-r--r--tests/pvpanic-test.c1
-rw-r--r--tests/q35-test.c3
-rw-r--r--tests/qemu-iotests/049.out14
-rw-r--r--tests/qemu-iotests/051.pc.out6
-rwxr-xr-xtests/qemu-iotests/05511
-rw-r--r--tests/qemu-iotests/085.out2
-rwxr-xr-xtests/qemu-iotests/1412
-rw-r--r--tests/qemu-iotests/141.out4
-rw-r--r--tests/qemu-iotests/172.out53
-rw-r--r--tests/tco-test.c35
-rw-r--r--tests/test-blockjob-txn.c6
-rw-r--r--tests/test-blockjob.c10
-rw-r--r--tests/test-filter-mirror.c2
-rw-r--r--tests/test-filter-redirector.c4
-rw-r--r--tests/test-io-channel-command.c6
-rw-r--r--tests/test-throttle.c7
-rw-r--r--tests/test-vmstate.c122
-rw-r--r--tests/usb-hcd-ehci-test.c19
-rw-r--r--tests/usb-hcd-uhci-test.c1
-rw-r--r--tests/vhost-user-test.c11
-rw-r--r--tests/virtio-9p-test.c2
-rw-r--r--tests/virtio-blk-test.c29
-rw-r--r--tests/virtio-scsi-test.c2
-rw-r--r--util/qemu-option.c2
-rw-r--r--util/qemu-timer.c5
198 files changed, 6697 insertions, 2314 deletions
diff --git a/audio/audio.c b/audio/audio.c
index c845a44f0a..c8898d8422 100644
--- a/audio/audio.c
+++ b/audio/audio.c
@@ -28,6 +28,7 @@
#include "qemu/timer.h"
#include "sysemu/sysemu.h"
#include "qemu/cutils.h"
+#include "sysemu/replay.h"
#define AUDIO_CAP "audio"
#include "audio_int.h"
@@ -1112,7 +1113,7 @@ static int audio_is_timer_needed (void)
static void audio_reset_timer (AudioState *s)
{
if (audio_is_timer_needed ()) {
- timer_mod (s->ts,
+ timer_mod_anticipate_ns(s->ts,
qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + conf.period.ticks);
}
else {
@@ -1387,6 +1388,7 @@ static void audio_run_out (AudioState *s)
prev_rpos = hw->rpos;
played = hw->pcm_ops->run_out (hw, live);
+ replay_audio_out(&played);
if (audio_bug (AUDIO_FUNC, hw->rpos >= hw->samples)) {
dolog ("hw->rpos=%d hw->samples=%d played=%d\n",
hw->rpos, hw->samples, played);
@@ -1450,9 +1452,12 @@ static void audio_run_in (AudioState *s)
while ((hw = audio_pcm_hw_find_any_enabled_in (hw))) {
SWVoiceIn *sw;
- int captured, min;
+ int captured = 0, min;
- captured = hw->pcm_ops->run_in (hw);
+ if (replay_mode != REPLAY_MODE_PLAY) {
+ captured = hw->pcm_ops->run_in(hw);
+ }
+ replay_audio_in(&captured, hw->conv_buf, &hw->wpos, hw->samples);
min = audio_pcm_hw_find_min_in (hw);
hw->total_samples_captured += captured - min;
diff --git a/audio/audio.h b/audio/audio.h
index c3c51988f5..f4339a185e 100644
--- a/audio/audio.h
+++ b/audio/audio.h
@@ -166,4 +166,9 @@ int wav_start_capture (CaptureState *s, const char *path, int freq,
bool audio_is_cleaning_up(void);
void audio_cleanup(void);
+void audio_sample_to_uint64(void *samples, int pos,
+ uint64_t *left, uint64_t *right);
+void audio_sample_from_uint64(void *samples, int pos,
+ uint64_t left, uint64_t right);
+
#endif /* QEMU_AUDIO_H */
diff --git a/audio/mixeng.c b/audio/mixeng.c
index 66c0328d42..0bf9b5360f 100644
--- a/audio/mixeng.c
+++ b/audio/mixeng.c
@@ -25,6 +25,7 @@
#include "qemu/osdep.h"
#include "qemu-common.h"
#include "qemu/bswap.h"
+#include "qemu/error-report.h"
#include "audio.h"
#define AUDIO_CAP "mixeng"
@@ -267,6 +268,37 @@ f_sample *mixeng_clip[2][2][2][3] = {
}
};
+
+void audio_sample_to_uint64(void *samples, int pos,
+ uint64_t *left, uint64_t *right)
+{
+ struct st_sample *sample = samples;
+ sample += pos;
+#ifdef FLOAT_MIXENG
+ error_report(
+ "Coreaudio and floating point samples are not supported by replay yet");
+ abort();
+#else
+ *left = sample->l;
+ *right = sample->r;
+#endif
+}
+
+void audio_sample_from_uint64(void *samples, int pos,
+ uint64_t left, uint64_t right)
+{
+ struct st_sample *sample = samples;
+ sample += pos;
+#ifdef FLOAT_MIXENG
+ error_report(
+ "Coreaudio and floating point samples are not supported by replay yet");
+ abort();
+#else
+ sample->l = left;
+ sample->r = right;
+#endif
+}
+
/*
* August 21, 1998
* Copyright 1998 Fabrice Bellard.
diff --git a/audio/sdlaudio.c b/audio/sdlaudio.c
index db69fe1416..e8d91d22af 100644
--- a/audio/sdlaudio.c
+++ b/audio/sdlaudio.c
@@ -38,10 +38,14 @@
#define AUDIO_CAP "sdl"
#include "audio_int.h"
+#define USE_SEMAPHORE (SDL_MAJOR_VERSION < 2)
+
typedef struct SDLVoiceOut {
HWVoiceOut hw;
int live;
+#if USE_SEMAPHORE
int rpos;
+#endif
int decr;
} SDLVoiceOut;
@@ -53,8 +57,10 @@ static struct {
static struct SDLAudioState {
int exit;
+#if USE_SEMAPHORE
SDL_mutex *mutex;
SDL_sem *sem;
+#endif
int initialized;
bool driver_created;
} glob_sdl;
@@ -73,31 +79,45 @@ static void GCC_FMT_ATTR (1, 2) sdl_logerr (const char *fmt, ...)
static int sdl_lock (SDLAudioState *s, const char *forfn)
{
+#if USE_SEMAPHORE
if (SDL_LockMutex (s->mutex)) {
sdl_logerr ("SDL_LockMutex for %s failed\n", forfn);
return -1;
}
+#else
+ SDL_LockAudio();
+#endif
+
return 0;
}
static int sdl_unlock (SDLAudioState *s, const char *forfn)
{
+#if USE_SEMAPHORE
if (SDL_UnlockMutex (s->mutex)) {
sdl_logerr ("SDL_UnlockMutex for %s failed\n", forfn);
return -1;
}
+#else
+ SDL_UnlockAudio();
+#endif
+
return 0;
}
static int sdl_post (SDLAudioState *s, const char *forfn)
{
+#if USE_SEMAPHORE
if (SDL_SemPost (s->sem)) {
sdl_logerr ("SDL_SemPost for %s failed\n", forfn);
return -1;
}
+#endif
+
return 0;
}
+#if USE_SEMAPHORE
static int sdl_wait (SDLAudioState *s, const char *forfn)
{
if (SDL_SemWait (s->sem)) {
@@ -106,6 +126,7 @@ static int sdl_wait (SDLAudioState *s, const char *forfn)
}
return 0;
}
+#endif
static int sdl_unlock_and_post (SDLAudioState *s, const char *forfn)
{
@@ -246,6 +267,7 @@ static void sdl_callback (void *opaque, Uint8 *buf, int len)
int to_mix, decr;
/* dolog ("in callback samples=%d\n", samples); */
+#if USE_SEMAPHORE
sdl_wait (s, "sdl_callback");
if (s->exit) {
return;
@@ -264,6 +286,11 @@ static void sdl_callback (void *opaque, Uint8 *buf, int len)
if (!sdl->live) {
goto again;
}
+#else
+ if (s->exit || !sdl->live) {
+ break;
+ }
+#endif
/* dolog ("in callback live=%d\n", live); */
to_mix = audio_MIN (samples, sdl->live);
@@ -274,7 +301,11 @@ static void sdl_callback (void *opaque, Uint8 *buf, int len)
/* dolog ("in callback to_mix %d, chunk %d\n", to_mix, chunk); */
hw->clip (buf, src, chunk);
+#if USE_SEMAPHORE
sdl->rpos = (sdl->rpos + chunk) % hw->samples;
+#else
+ hw->rpos = (hw->rpos + chunk) % hw->samples;
+#endif
to_mix -= chunk;
buf += chunk << hw->info.shift;
}
@@ -282,12 +313,21 @@ static void sdl_callback (void *opaque, Uint8 *buf, int len)
sdl->live -= decr;
sdl->decr += decr;
+#if USE_SEMAPHORE
again:
if (sdl_unlock (s, "sdl_callback")) {
return;
}
+#endif
}
/* dolog ("done len=%d\n", len); */
+
+#if (SDL_MAJOR_VERSION >= 2)
+ /* SDL2 does not clear the remaining buffer for us, so do it on our own */
+ if (samples) {
+ memset(buf, 0, samples << hw->info.shift);
+ }
+#endif
}
static int sdl_write_out (SWVoiceOut *sw, void *buf, int len)
@@ -315,8 +355,12 @@ static int sdl_run_out (HWVoiceOut *hw, int live)
decr = audio_MIN (sdl->decr, live);
sdl->decr -= decr;
+#if USE_SEMAPHORE
sdl->live = live - decr;
hw->rpos = sdl->rpos;
+#else
+ sdl->live = live;
+#endif
if (sdl->live > 0) {
sdl_unlock_and_post (s, "sdl_run_out");
@@ -405,6 +449,7 @@ static void *sdl_audio_init (void)
return NULL;
}
+#if USE_SEMAPHORE
s->mutex = SDL_CreateMutex ();
if (!s->mutex) {
sdl_logerr ("Failed to create SDL mutex\n");
@@ -419,6 +464,7 @@ static void *sdl_audio_init (void)
SDL_QuitSubSystem (SDL_INIT_AUDIO);
return NULL;
}
+#endif
s->driver_created = true;
return s;
@@ -428,8 +474,10 @@ static void sdl_audio_fini (void *opaque)
{
SDLAudioState *s = opaque;
sdl_close (s);
+#if USE_SEMAPHORE
SDL_DestroySemaphore (s->sem);
SDL_DestroyMutex (s->mutex);
+#endif
SDL_QuitSubSystem (SDL_INIT_AUDIO);
s->driver_created = false;
}
diff --git a/block.c b/block.c
index b663204f3f..f293ccb5af 100644
--- a/block.c
+++ b/block.c
@@ -707,6 +707,12 @@ int bdrv_parse_cache_mode(const char *mode, int *flags, bool *writethrough)
return 0;
}
+static char *bdrv_child_get_parent_desc(BdrvChild *c)
+{
+ BlockDriverState *parent = c->opaque;
+ return g_strdup(bdrv_get_device_or_node_name(parent));
+}
+
static void bdrv_child_cb_drained_begin(BdrvChild *child)
{
BlockDriverState *bs = child->opaque;
@@ -774,6 +780,7 @@ static void bdrv_inherited_options(int *child_flags, QDict *child_options,
}
const BdrvChildRole child_file = {
+ .get_parent_desc = bdrv_child_get_parent_desc,
.inherit_options = bdrv_inherited_options,
.drained_begin = bdrv_child_cb_drained_begin,
.drained_end = bdrv_child_cb_drained_end,
@@ -794,11 +801,63 @@ static void bdrv_inherited_fmt_options(int *child_flags, QDict *child_options,
}
const BdrvChildRole child_format = {
+ .get_parent_desc = bdrv_child_get_parent_desc,
.inherit_options = bdrv_inherited_fmt_options,
.drained_begin = bdrv_child_cb_drained_begin,
.drained_end = bdrv_child_cb_drained_end,
};
+static void bdrv_backing_attach(BdrvChild *c)
+{
+ BlockDriverState *parent = c->opaque;
+ BlockDriverState *backing_hd = c->bs;
+
+ assert(!parent->backing_blocker);
+ error_setg(&parent->backing_blocker,
+ "node is used as backing hd of '%s'",
+ bdrv_get_device_or_node_name(parent));
+
+ parent->open_flags &= ~BDRV_O_NO_BACKING;
+ pstrcpy(parent->backing_file, sizeof(parent->backing_file),
+ backing_hd->filename);
+ pstrcpy(parent->backing_format, sizeof(parent->backing_format),
+ backing_hd->drv ? backing_hd->drv->format_name : "");
+
+ bdrv_op_block_all(backing_hd, parent->backing_blocker);
+ /* Otherwise we won't be able to commit or stream */
+ bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET,
+ parent->backing_blocker);
+ bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_STREAM,
+ parent->backing_blocker);
+ /*
+ * We do backup in 3 ways:
+ * 1. drive backup
+ * The target bs is new opened, and the source is top BDS
+ * 2. blockdev backup
+ * Both the source and the target are top BDSes.
+ * 3. internal backup(used for block replication)
+ * Both the source and the target are backing file
+ *
+ * In case 1 and 2, neither the source nor the target is the backing file.
+ * In case 3, we will block the top BDS, so there is only one block job
+ * for the top BDS and its backing chain.
+ */
+ bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_BACKUP_SOURCE,
+ parent->backing_blocker);
+ bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_BACKUP_TARGET,
+ parent->backing_blocker);
+}
+
+static void bdrv_backing_detach(BdrvChild *c)
+{
+ BlockDriverState *parent = c->opaque;
+
+ assert(parent->backing_blocker);
+ bdrv_op_unblock_all(c->bs, parent->backing_blocker);
+ error_free(parent->backing_blocker);
+ parent->backing_blocker = NULL;
+}
+
/*
* Returns the options and flags that bs->backing should get, based on the
* given options and flags for the parent BDS
@@ -823,7 +882,10 @@ static void bdrv_backing_options(int *child_flags, QDict *child_options,
*child_flags = flags;
}
-static const BdrvChildRole child_backing = {
+const BdrvChildRole child_backing = {
+ .get_parent_desc = bdrv_child_get_parent_desc,
+ .attach = bdrv_backing_attach,
+ .detach = bdrv_backing_detach,
.inherit_options = bdrv_backing_options,
.drained_begin = bdrv_child_cb_drained_begin,
.drained_end = bdrv_child_cb_drained_end,
@@ -1326,15 +1388,352 @@ static int bdrv_fill_options(QDict **options, const char *filename,
return 0;
}
-static void bdrv_replace_child(BdrvChild *child, BlockDriverState *new_bs)
+/*
+ * Check whether permissions on this node can be changed in a way that
+ * @cumulative_perms and @cumulative_shared_perms are the new cumulative
+ * permissions of all its parents. This involves checking whether all necessary
+ * permission changes to child nodes can be performed.
+ *
+ * A call to this function must always be followed by a call to bdrv_set_perm()
+ * or bdrv_abort_perm_update().
+ */
+static int bdrv_check_perm(BlockDriverState *bs, uint64_t cumulative_perms,
+ uint64_t cumulative_shared_perms, Error **errp)
+{
+ BlockDriver *drv = bs->drv;
+ BdrvChild *c;
+ int ret;
+
+ /* Write permissions never work with read-only images */
+ if ((cumulative_perms & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) &&
+ bdrv_is_read_only(bs))
+ {
+ error_setg(errp, "Block node is read-only");
+ return -EPERM;
+ }
+
+ /* Check this node */
+ if (!drv) {
+ return 0;
+ }
+
+ if (drv->bdrv_check_perm) {
+ return drv->bdrv_check_perm(bs, cumulative_perms,
+ cumulative_shared_perms, errp);
+ }
+
+ /* Drivers that never have children can omit .bdrv_child_perm() */
+ if (!drv->bdrv_child_perm) {
+ assert(QLIST_EMPTY(&bs->children));
+ return 0;
+ }
+
+ /* Check all children */
+ QLIST_FOREACH(c, &bs->children, next) {
+ uint64_t cur_perm, cur_shared;
+ drv->bdrv_child_perm(bs, c, c->role,
+ cumulative_perms, cumulative_shared_perms,
+ &cur_perm, &cur_shared);
+ ret = bdrv_child_check_perm(c, cur_perm, cur_shared, errp);
+ if (ret < 0) {
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * Notifies drivers that after a previous bdrv_check_perm() call, the
+ * permission update is not performed and any preparations made for it (e.g.
+ * taken file locks) need to be undone.
+ *
+ * This function recursively notifies all child nodes.
+ */
+static void bdrv_abort_perm_update(BlockDriverState *bs)
+{
+ BlockDriver *drv = bs->drv;
+ BdrvChild *c;
+
+ if (!drv) {
+ return;
+ }
+
+ if (drv->bdrv_abort_perm_update) {
+ drv->bdrv_abort_perm_update(bs);
+ }
+
+ QLIST_FOREACH(c, &bs->children, next) {
+ bdrv_child_abort_perm_update(c);
+ }
+}
+
+static void bdrv_set_perm(BlockDriverState *bs, uint64_t cumulative_perms,
+ uint64_t cumulative_shared_perms)
+{
+ BlockDriver *drv = bs->drv;
+ BdrvChild *c;
+
+ if (!drv) {
+ return;
+ }
+
+ /* Update this node */
+ if (drv->bdrv_set_perm) {
+ drv->bdrv_set_perm(bs, cumulative_perms, cumulative_shared_perms);
+ }
+
+ /* Drivers that never have children can omit .bdrv_child_perm() */
+ if (!drv->bdrv_child_perm) {
+ assert(QLIST_EMPTY(&bs->children));
+ return;
+ }
+
+ /* Update all children */
+ QLIST_FOREACH(c, &bs->children, next) {
+ uint64_t cur_perm, cur_shared;
+ drv->bdrv_child_perm(bs, c, c->role,
+ cumulative_perms, cumulative_shared_perms,
+ &cur_perm, &cur_shared);
+ bdrv_child_set_perm(c, cur_perm, cur_shared);
+ }
+}
+
+static void bdrv_get_cumulative_perm(BlockDriverState *bs, uint64_t *perm,
+ uint64_t *shared_perm)
+{
+ BdrvChild *c;
+ uint64_t cumulative_perms = 0;
+ uint64_t cumulative_shared_perms = BLK_PERM_ALL;
+
+ QLIST_FOREACH(c, &bs->parents, next_parent) {
+ cumulative_perms |= c->perm;
+ cumulative_shared_perms &= c->shared_perm;
+ }
+
+ *perm = cumulative_perms;
+ *shared_perm = cumulative_shared_perms;
+}
+
+static char *bdrv_child_user_desc(BdrvChild *c)
+{
+ if (c->role->get_parent_desc) {
+ return c->role->get_parent_desc(c);
+ }
+
+ return g_strdup("another user");
+}
+
+static char *bdrv_perm_names(uint64_t perm)
+{
+ struct perm_name {
+ uint64_t perm;
+ const char *name;
+ } permissions[] = {
+ { BLK_PERM_CONSISTENT_READ, "consistent read" },
+ { BLK_PERM_WRITE, "write" },
+ { BLK_PERM_WRITE_UNCHANGED, "write unchanged" },
+ { BLK_PERM_RESIZE, "resize" },
+ { BLK_PERM_GRAPH_MOD, "change children" },
+ { 0, NULL }
+ };
+
+ char *result = g_strdup("");
+ struct perm_name *p;
+
+ for (p = permissions; p->name; p++) {
+ if (perm & p->perm) {
+ char *old = result;
+ result = g_strdup_printf("%s%s%s", old, *old ? ", " : "", p->name);
+ g_free(old);
+ }
+ }
+
+ return result;
+}
+
+/*
+ * Checks whether a new reference to @bs can be added if the new user requires
+ * @new_used_perm/@new_shared_perm as its permissions. If @ignore_child is set,
+ * this old reference is ignored in the calculations; this allows checking
+ * permission updates for an existing reference.
+ *
+ * Needs to be followed by a call to either bdrv_set_perm() or
+ * bdrv_abort_perm_update(). */
+static int bdrv_check_update_perm(BlockDriverState *bs, uint64_t new_used_perm,
+ uint64_t new_shared_perm,
+ BdrvChild *ignore_child, Error **errp)
+{
+ BdrvChild *c;
+ uint64_t cumulative_perms = new_used_perm;
+ uint64_t cumulative_shared_perms = new_shared_perm;
+
+ /* There is no reason why anyone couldn't tolerate write_unchanged */
+ assert(new_shared_perm & BLK_PERM_WRITE_UNCHANGED);
+
+ QLIST_FOREACH(c, &bs->parents, next_parent) {
+ if (c == ignore_child) {
+ continue;
+ }
+
+ if ((new_used_perm & c->shared_perm) != new_used_perm) {
+ char *user = bdrv_child_user_desc(c);
+ char *perm_names = bdrv_perm_names(new_used_perm & ~c->shared_perm);
+ error_setg(errp, "Conflicts with use by %s as '%s', which does not "
+ "allow '%s' on %s",
+ user, c->name, perm_names, bdrv_get_node_name(c->bs));
+ g_free(user);
+ g_free(perm_names);
+ return -EPERM;
+ }
+
+ if ((c->perm & new_shared_perm) != c->perm) {
+ char *user = bdrv_child_user_desc(c);
+ char *perm_names = bdrv_perm_names(c->perm & ~new_shared_perm);
+ error_setg(errp, "Conflicts with use by %s as '%s', which uses "
+ "'%s' on %s",
+ user, c->name, perm_names, bdrv_get_node_name(c->bs));
+ g_free(user);
+ g_free(perm_names);
+ return -EPERM;
+ }
+
+ cumulative_perms |= c->perm;
+ cumulative_shared_perms &= c->shared_perm;
+ }
+
+ return bdrv_check_perm(bs, cumulative_perms, cumulative_shared_perms, errp);
+}
+
+/* Needs to be followed by a call to either bdrv_child_set_perm() or
+ * bdrv_child_abort_perm_update(). */
+int bdrv_child_check_perm(BdrvChild *c, uint64_t perm, uint64_t shared,
+ Error **errp)
+{
+ return bdrv_check_update_perm(c->bs, perm, shared, c, errp);
+}
+
+void bdrv_child_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared)
+{
+ uint64_t cumulative_perms, cumulative_shared_perms;
+
+ c->perm = perm;
+ c->shared_perm = shared;
+
+ bdrv_get_cumulative_perm(c->bs, &cumulative_perms,
+ &cumulative_shared_perms);
+ bdrv_set_perm(c->bs, cumulative_perms, cumulative_shared_perms);
+}
+
+void bdrv_child_abort_perm_update(BdrvChild *c)
+{
+ bdrv_abort_perm_update(c->bs);
+}
+
+int bdrv_child_try_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared,
+ Error **errp)
+{
+ int ret;
+
+ ret = bdrv_child_check_perm(c, perm, shared, errp);
+ if (ret < 0) {
+ bdrv_child_abort_perm_update(c);
+ return ret;
+ }
+
+ bdrv_child_set_perm(c, perm, shared);
+
+ return 0;
+}
+
+#define DEFAULT_PERM_PASSTHROUGH (BLK_PERM_CONSISTENT_READ \
+ | BLK_PERM_WRITE \
+ | BLK_PERM_WRITE_UNCHANGED \
+ | BLK_PERM_RESIZE)
+#define DEFAULT_PERM_UNCHANGED (BLK_PERM_ALL & ~DEFAULT_PERM_PASSTHROUGH)
+
+void bdrv_filter_default_perms(BlockDriverState *bs, BdrvChild *c,
+ const BdrvChildRole *role,
+ uint64_t perm, uint64_t shared,
+ uint64_t *nperm, uint64_t *nshared)
+{
+ if (c == NULL) {
+ *nperm = perm & DEFAULT_PERM_PASSTHROUGH;
+ *nshared = (shared & DEFAULT_PERM_PASSTHROUGH) | DEFAULT_PERM_UNCHANGED;
+ return;
+ }
+
+ *nperm = (perm & DEFAULT_PERM_PASSTHROUGH) |
+ (c->perm & DEFAULT_PERM_UNCHANGED);
+ *nshared = (shared & DEFAULT_PERM_PASSTHROUGH) |
+ (c->shared_perm & DEFAULT_PERM_UNCHANGED);
+}
+
+void bdrv_format_default_perms(BlockDriverState *bs, BdrvChild *c,
+ const BdrvChildRole *role,
+ uint64_t perm, uint64_t shared,
+ uint64_t *nperm, uint64_t *nshared)
+{
+ bool backing = (role == &child_backing);
+ assert(role == &child_backing || role == &child_file);
+
+ if (!backing) {
+ /* Apart from the modifications below, the same permissions are
+ * forwarded and left alone as for filters */
+ bdrv_filter_default_perms(bs, c, role, perm, shared, &perm, &shared);
+
+ /* Format drivers may touch metadata even if the guest doesn't write */
+ if (!bdrv_is_read_only(bs)) {
+ perm |= BLK_PERM_WRITE | BLK_PERM_RESIZE;
+ }
+
+ /* bs->file always needs to be consistent because of the metadata. We
+ * can never allow other users to resize or write to it. */
+ perm |= BLK_PERM_CONSISTENT_READ;
+ shared &= ~(BLK_PERM_WRITE | BLK_PERM_RESIZE);
+ } else {
+ /* We want consistent read from backing files if the parent needs it.
+ * No other operations are performed on backing files. */
+ perm &= BLK_PERM_CONSISTENT_READ;
+
+ /* If the parent can deal with changing data, we're okay with a
+ * writable and resizable backing file. */
+ /* TODO Require !(perm & BLK_PERM_CONSISTENT_READ), too? */
+ if (shared & BLK_PERM_WRITE) {
+ shared = BLK_PERM_WRITE | BLK_PERM_RESIZE;
+ } else {
+ shared = 0;
+ }
+
+ shared |= BLK_PERM_CONSISTENT_READ | BLK_PERM_GRAPH_MOD |
+ BLK_PERM_WRITE_UNCHANGED;
+ }
+
+ *nperm = perm;
+ *nshared = shared;
+}
+
+static void bdrv_replace_child(BdrvChild *child, BlockDriverState *new_bs,
+ bool check_new_perm)
{
BlockDriverState *old_bs = child->bs;
+ uint64_t perm, shared_perm;
if (old_bs) {
if (old_bs->quiesce_counter && child->role->drained_end) {
child->role->drained_end(child);
}
+ if (child->role->detach) {
+ child->role->detach(child);
+ }
QLIST_REMOVE(child, next_parent);
+
+ /* Update permissions for old node. This is guaranteed to succeed
+ * because we're just taking a parent away, so we're loosening
+ * restrictions. */
+ bdrv_get_cumulative_perm(old_bs, &perm, &shared_perm);
+ bdrv_check_perm(old_bs, perm, shared_perm, &error_abort);
+ bdrv_set_perm(old_bs, perm, shared_perm);
}
child->bs = new_bs;
@@ -1344,23 +1743,46 @@ static void bdrv_replace_child(BdrvChild *child, BlockDriverState *new_bs)
if (new_bs->quiesce_counter && child->role->drained_begin) {
child->role->drained_begin(child);
}
+
+ bdrv_get_cumulative_perm(new_bs, &perm, &shared_perm);
+ if (check_new_perm) {
+ bdrv_check_perm(new_bs, perm, shared_perm, &error_abort);
+ }
+ bdrv_set_perm(new_bs, perm, shared_perm);
+
+ if (child->role->attach) {
+ child->role->attach(child);
+ }
}
}
BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
const char *child_name,
const BdrvChildRole *child_role,
- void *opaque)
+ uint64_t perm, uint64_t shared_perm,
+ void *opaque, Error **errp)
{
- BdrvChild *child = g_new(BdrvChild, 1);
+ BdrvChild *child;
+ int ret;
+
+ ret = bdrv_check_update_perm(child_bs, perm, shared_perm, NULL, errp);
+ if (ret < 0) {
+ bdrv_abort_perm_update(child_bs);
+ return NULL;
+ }
+
+ child = g_new(BdrvChild, 1);
*child = (BdrvChild) {
- .bs = NULL,
- .name = g_strdup(child_name),
- .role = child_role,
- .opaque = opaque,
+ .bs = NULL,
+ .name = g_strdup(child_name),
+ .role = child_role,
+ .perm = perm,
+ .shared_perm = shared_perm,
+ .opaque = opaque,
};
- bdrv_replace_child(child, child_bs);
+ /* This performs the matching bdrv_set_perm() for the above check. */
+ bdrv_replace_child(child, child_bs, false);
return child;
}
@@ -1368,10 +1790,24 @@ BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
BlockDriverState *child_bs,
const char *child_name,
- const BdrvChildRole *child_role)
+ const BdrvChildRole *child_role,
+ Error **errp)
{
- BdrvChild *child = bdrv_root_attach_child(child_bs, child_name, child_role,
- parent_bs);
+ BdrvChild *child;
+ uint64_t perm, shared_perm;
+
+ bdrv_get_cumulative_perm(parent_bs, &perm, &shared_perm);
+
+ assert(parent_bs->drv);
+ parent_bs->drv->bdrv_child_perm(parent_bs, NULL, child_role,
+ perm, shared_perm, &perm, &shared_perm);
+
+ child = bdrv_root_attach_child(child_bs, child_name, child_role,
+ perm, shared_perm, parent_bs, errp);
+ if (child == NULL) {
+ return NULL;
+ }
+
QLIST_INSERT_HEAD(&parent_bs->children, child, next);
return child;
}
@@ -1383,7 +1819,7 @@ static void bdrv_detach_child(BdrvChild *child)
child->next.le_prev = NULL;
}
- bdrv_replace_child(child, NULL);
+ bdrv_replace_child(child, NULL, false);
g_free(child->name);
g_free(child);
@@ -1447,57 +1883,28 @@ static void bdrv_parent_cb_resize(BlockDriverState *bs)
* Sets the backing file link of a BDS. A new reference is created; callers
* which don't need their own reference any more must call bdrv_unref().
*/
-void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd)
+void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd,
+ Error **errp)
{
if (backing_hd) {
bdrv_ref(backing_hd);
}
if (bs->backing) {
- assert(bs->backing_blocker);
- bdrv_op_unblock_all(bs->backing->bs, bs->backing_blocker);
bdrv_unref_child(bs, bs->backing);
- } else if (backing_hd) {
- error_setg(&bs->backing_blocker,
- "node is used as backing hd of '%s'",
- bdrv_get_device_or_node_name(bs));
}
if (!backing_hd) {
- error_free(bs->backing_blocker);
- bs->backing_blocker = NULL;
bs->backing = NULL;
goto out;
}
- bs->backing = bdrv_attach_child(bs, backing_hd, "backing", &child_backing);
- bs->open_flags &= ~BDRV_O_NO_BACKING;
- pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_hd->filename);
- pstrcpy(bs->backing_format, sizeof(bs->backing_format),
- backing_hd->drv ? backing_hd->drv->format_name : "");
- bdrv_op_block_all(backing_hd, bs->backing_blocker);
- /* Otherwise we won't be able to commit or stream */
- bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET,
- bs->backing_blocker);
- bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_STREAM,
- bs->backing_blocker);
- /*
- * We do backup in 3 ways:
- * 1. drive backup
- * The target bs is new opened, and the source is top BDS
- * 2. blockdev backup
- * Both the source and the target are top BDSes.
- * 3. internal backup(used for block replication)
- * Both the source and the target are backing file
- *
- * In case 1 and 2, neither the source nor the target is the backing file.
- * In case 3, we will block the top BDS, so there is only one block job
- * for the top BDS and its backing chain.
- */
- bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_BACKUP_SOURCE,
- bs->backing_blocker);
- bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_BACKUP_TARGET,
- bs->backing_blocker);
+ bs->backing = bdrv_attach_child(bs, backing_hd, "backing", &child_backing,
+ errp);
+ if (!bs->backing) {
+ bdrv_unref(backing_hd);
+ }
+
out:
bdrv_refresh_limits(bs, NULL);
}
@@ -1580,8 +1987,12 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options,
/* Hook up the backing file link; drop our reference, bs owns the
* backing_hd reference now */
- bdrv_set_backing_hd(bs, backing_hd);
+ bdrv_set_backing_hd(bs, backing_hd, &local_err);
bdrv_unref(backing_hd);
+ if (local_err) {
+ ret = -EINVAL;
+ goto free_exit;
+ }
qdict_del(parent_options, bdref_key);
@@ -1648,6 +2059,7 @@ BdrvChild *bdrv_open_child(const char *filename,
const BdrvChildRole *child_role,
bool allow_none, Error **errp)
{
+ BdrvChild *c;
BlockDriverState *bs;
bs = bdrv_open_child_bs(filename, options, bdref_key, parent, child_role,
@@ -1656,7 +2068,13 @@ BdrvChild *bdrv_open_child(const char *filename,
return NULL;
}
- return bdrv_attach_child(parent, bs, bdref_key, child_role);
+ c = bdrv_attach_child(parent, bs, bdref_key, child_role, errp);
+ if (!c) {
+ bdrv_unref(bs);
+ return NULL;
+ }
+
+ return c;
}
static BlockDriverState *bdrv_append_temp_snapshot(BlockDriverState *bs,
@@ -1669,6 +2087,7 @@ static BlockDriverState *bdrv_append_temp_snapshot(BlockDriverState *bs,
int64_t total_size;
QemuOpts *opts = NULL;
BlockDriverState *bs_snapshot;
+ Error *local_err = NULL;
int ret;
/* if snapshot, we create a temporary backing file and open it
@@ -1718,7 +2137,12 @@ static BlockDriverState *bdrv_append_temp_snapshot(BlockDriverState *bs,
* call bdrv_unref() on it), so in order to be able to return one, we have
* to increase bs_snapshot's refcount here */
bdrv_ref(bs_snapshot);
- bdrv_append(bs_snapshot, bs);
+ bdrv_append(bs_snapshot, bs, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ ret = -EINVAL;
+ goto out;
+ }
g_free(tmp_filename);
return bs_snapshot;
@@ -1862,9 +2286,12 @@ static BlockDriverState *bdrv_open_inherit(const char *filename,
goto fail;
}
if (file_bs != NULL) {
- file = blk_new();
- blk_insert_bs(file, file_bs);
+ file = blk_new(BLK_PERM_CONSISTENT_READ, BLK_PERM_ALL);
+ blk_insert_bs(file, file_bs, &local_err);
bdrv_unref(file_bs);
+ if (local_err) {
+ goto fail;
+ }
qdict_put(options, "file",
qstring_from_str(bdrv_get_node_name(file_bs)));
@@ -2405,7 +2832,7 @@ static void bdrv_close(BlockDriverState *bs)
bs->drv->bdrv_close(bs);
bs->drv = NULL;
- bdrv_set_backing_hd(bs, NULL);
+ bdrv_set_backing_hd(bs, NULL, &error_abort);
if (bs->file != NULL) {
bdrv_unref_child(bs, bs->file);
@@ -2465,10 +2892,13 @@ static void change_parent_backing_link(BlockDriverState *from,
BdrvChild *c, *next, *to_c;
QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) {
+ if (c->role->stay_at_node) {
+ continue;
+ }
if (c->role == &child_backing) {
- /* @from is generally not allowed to be a backing file, except for
- * when @to is the overlay. In that case, @from may not be replaced
- * by @to as @to's backing node. */
+ /* If @from is a backing file of @to, ignore the child to avoid
+ * creating a loop. We only want to change the pointer of other
+ * parents. */
QLIST_FOREACH(to_c, &to->children, next) {
if (to_c == c) {
break;
@@ -2479,9 +2909,10 @@ static void change_parent_backing_link(BlockDriverState *from,
}
}
- assert(c->role != &child_backing);
bdrv_ref(to);
- bdrv_replace_child(c, to);
+ /* FIXME Are we sure that bdrv_replace_child() can't run into
+ * &error_abort because of permissions? */
+ bdrv_replace_child(c, to, true);
bdrv_unref(from);
}
}
@@ -2502,19 +2933,25 @@ static void change_parent_backing_link(BlockDriverState *from,
* parents of bs_top after bdrv_append() returns. If the caller needs to keep a
* reference of its own, it must call bdrv_ref().
*/
-void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
+void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top,
+ Error **errp)
{
- assert(!bdrv_requests_pending(bs_top));
- assert(!bdrv_requests_pending(bs_new));
+ Error *local_err = NULL;
- bdrv_ref(bs_top);
+ assert(!atomic_read(&bs_top->in_flight));
+ assert(!atomic_read(&bs_new->in_flight));
+
+ bdrv_set_backing_hd(bs_new, bs_top, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ goto out;
+ }
change_parent_backing_link(bs_top, bs_new);
- bdrv_set_backing_hd(bs_new, bs_top);
- bdrv_unref(bs_top);
/* bs_new is now referenced by its new parents, we don't need the
* additional reference any more. */
+out:
bdrv_unref(bs_new);
}
@@ -2658,6 +3095,7 @@ int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
BlockDriverState *base, const char *backing_file_str)
{
BlockDriverState *new_top_bs = NULL;
+ Error *local_err = NULL;
int ret = -EIO;
if (!top->drv || !base->drv) {
@@ -2690,7 +3128,13 @@ int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
if (ret) {
goto exit;
}
- bdrv_set_backing_hd(new_top_bs, base);
+
+ bdrv_set_backing_hd(new_top_bs, base, &local_err);
+ if (local_err) {
+ ret = -EPERM;
+ error_report_err(local_err);
+ goto exit;
+ }
ret = 0;
exit:
@@ -2705,6 +3149,9 @@ int bdrv_truncate(BdrvChild *child, int64_t offset)
BlockDriverState *bs = child->bs;
BlockDriver *drv = bs->drv;
int ret;
+
+ assert(child->perm & BLK_PERM_RESIZE);
+
if (!drv)
return -ENOMEDIUM;
if (!drv->bdrv_truncate)
diff --git a/block/backup.c b/block/backup.c
index fe010e78e3..d1ab617c7e 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -618,14 +618,24 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
goto error;
}
- job = block_job_create(job_id, &backup_job_driver, bs, speed,
- creation_flags, cb, opaque, errp);
+ /* job->common.len is fixed, so we can't allow resize */
+ job = block_job_create(job_id, &backup_job_driver, bs,
+ BLK_PERM_CONSISTENT_READ,
+ BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE |
+ BLK_PERM_WRITE_UNCHANGED | BLK_PERM_GRAPH_MOD,
+ speed, creation_flags, cb, opaque, errp);
if (!job) {
goto error;
}
- job->target = blk_new();
- blk_insert_bs(job->target, target);
+ /* The target must match the source in size, so no resize here either */
+ job->target = blk_new(BLK_PERM_WRITE,
+ BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE |
+ BLK_PERM_WRITE_UNCHANGED | BLK_PERM_GRAPH_MOD);
+ ret = blk_insert_bs(job->target, target, errp);
+ if (ret < 0) {
+ goto error;
+ }
job->on_source_error = on_source_error;
job->on_target_error = on_target_error;
@@ -652,7 +662,9 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
job->cluster_size = MAX(BACKUP_CLUSTER_SIZE_DEFAULT, bdi.cluster_size);
}
- block_job_add_bdrv(&job->common, target);
+ /* Required permissions are already taken with target's blk_new() */
+ block_job_add_bdrv(&job->common, "target", target, 0, BLK_PERM_ALL,
+ &error_abort);
job->common.len = len;
block_job_txn_add_job(txn, &job->common);
diff --git a/block/blkdebug.c b/block/blkdebug.c
index 6117ce5fca..67e8024e36 100644
--- a/block/blkdebug.c
+++ b/block/blkdebug.c
@@ -734,6 +734,8 @@ static BlockDriver bdrv_blkdebug = {
.bdrv_file_open = blkdebug_open,
.bdrv_close = blkdebug_close,
.bdrv_reopen_prepare = blkdebug_reopen_prepare,
+ .bdrv_child_perm = bdrv_filter_default_perms,
+
.bdrv_getlength = blkdebug_getlength,
.bdrv_truncate = blkdebug_truncate,
.bdrv_refresh_filename = blkdebug_refresh_filename,
diff --git a/block/blkreplay.c b/block/blkreplay.c
index cfc8c5be02..e1102119fb 100755
--- a/block/blkreplay.c
+++ b/block/blkreplay.c
@@ -137,6 +137,7 @@ static BlockDriver bdrv_blkreplay = {
.bdrv_file_open = blkreplay_open,
.bdrv_close = blkreplay_close,
+ .bdrv_child_perm = bdrv_filter_default_perms,
.bdrv_getlength = blkreplay_getlength,
.bdrv_co_preadv = blkreplay_co_preadv,
diff --git a/block/blkverify.c b/block/blkverify.c
index 43a940c2f5..9a1e21c6ad 100644
--- a/block/blkverify.c
+++ b/block/blkverify.c
@@ -320,6 +320,7 @@ static BlockDriver bdrv_blkverify = {
.bdrv_parse_filename = blkverify_parse_filename,
.bdrv_file_open = blkverify_open,
.bdrv_close = blkverify_close,
+ .bdrv_child_perm = bdrv_filter_default_perms,
.bdrv_getlength = blkverify_getlength,
.bdrv_refresh_filename = blkverify_refresh_filename,
diff --git a/block/block-backend.c b/block/block-backend.c
index 492e71e41f..daa7908d01 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -59,6 +59,9 @@ struct BlockBackend {
bool iostatus_enabled;
BlockDeviceIoStatus iostatus;
+ uint64_t perm;
+ uint64_t shared_perm;
+
bool allow_write_beyond_eof;
NotifierList remove_bs_notifiers, insert_bs_notifiers;
@@ -77,6 +80,7 @@ static const AIOCBInfo block_backend_aiocb_info = {
static void drive_info_del(DriveInfo *dinfo);
static BlockBackend *bdrv_first_blk(BlockDriverState *bs);
+static char *blk_get_attached_dev_id(BlockBackend *blk);
/* All BlockBackends */
static QTAILQ_HEAD(, BlockBackend) block_backends =
@@ -99,6 +103,25 @@ static void blk_root_drained_end(BdrvChild *child);
static void blk_root_change_media(BdrvChild *child, bool load);
static void blk_root_resize(BdrvChild *child);
+static char *blk_root_get_parent_desc(BdrvChild *child)
+{
+ BlockBackend *blk = child->opaque;
+ char *dev_id;
+
+ if (blk->name) {
+ return g_strdup(blk->name);
+ }
+
+ dev_id = blk_get_attached_dev_id(blk);
+ if (*dev_id) {
+ return dev_id;
+ } else {
+ /* TODO Callback into the BB owner for something more detailed */
+ g_free(dev_id);
+ return g_strdup("a block device");
+ }
+}
+
static const char *blk_root_get_name(BdrvChild *child)
{
return blk_name(child->opaque);
@@ -110,6 +133,7 @@ static const BdrvChildRole child_root = {
.change_media = blk_root_change_media,
.resize = blk_root_resize,
.get_name = blk_root_get_name,
+ .get_parent_desc = blk_root_get_parent_desc,
.drained_begin = blk_root_drained_begin,
.drained_end = blk_root_drained_end,
@@ -117,15 +141,23 @@ static const BdrvChildRole child_root = {
/*
* Create a new BlockBackend with a reference count of one.
- * Store an error through @errp on failure, unless it's null.
+ *
+ * @perm is a bitmasks of BLK_PERM_* constants which describes the permissions
+ * to request for a block driver node that is attached to this BlockBackend.
+ * @shared_perm is a bitmask which describes which permissions may be granted
+ * to other users of the attached node.
+ * Both sets of permissions can be changed later using blk_set_perm().
+ *
* Return the new BlockBackend on success, null on failure.
*/
-BlockBackend *blk_new(void)
+BlockBackend *blk_new(uint64_t perm, uint64_t shared_perm)
{
BlockBackend *blk;
blk = g_new0(BlockBackend, 1);
blk->refcnt = 1;
+ blk->perm = perm;
+ blk->shared_perm = shared_perm;
blk_set_enable_write_cache(blk, true);
qemu_co_queue_init(&blk->public.throttled_reqs[0]);
@@ -155,15 +187,33 @@ BlockBackend *blk_new_open(const char *filename, const char *reference,
{
BlockBackend *blk;
BlockDriverState *bs;
+ uint64_t perm;
+
+ /* blk_new_open() is mainly used in .bdrv_create implementations and the
+ * tools where sharing isn't a concern because the BDS stays private, so we
+ * just request permission according to the flags.
+ *
+ * The exceptions are xen_disk and blockdev_init(); in these cases, the
+ * caller of blk_new_open() doesn't make use of the permissions, but they
+ * shouldn't hurt either. We can still share everything here because the
+ * guest devices will add their own blockers if they can't share. */
+ perm = BLK_PERM_CONSISTENT_READ;
+ if (flags & BDRV_O_RDWR) {
+ perm |= BLK_PERM_WRITE;
+ }
+ if (flags & BDRV_O_RESIZE) {
+ perm |= BLK_PERM_RESIZE;
+ }
- blk = blk_new();
+ blk = blk_new(perm, BLK_PERM_ALL);
bs = bdrv_open(filename, reference, options, flags, errp);
if (!bs) {
blk_unref(blk);
return NULL;
}
- blk->root = bdrv_root_attach_child(bs, "root", &child_root, blk);
+ blk->root = bdrv_root_attach_child(bs, "root", &child_root,
+ perm, BLK_PERM_ALL, blk, &error_abort);
return blk;
}
@@ -495,16 +545,49 @@ void blk_remove_bs(BlockBackend *blk)
/*
* Associates a new BlockDriverState with @blk.
*/
-void blk_insert_bs(BlockBackend *blk, BlockDriverState *bs)
+int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp)
{
+ blk->root = bdrv_root_attach_child(bs, "root", &child_root,
+ blk->perm, blk->shared_perm, blk, errp);
+ if (blk->root == NULL) {
+ return -EPERM;
+ }
bdrv_ref(bs);
- blk->root = bdrv_root_attach_child(bs, "root", &child_root, blk);
notifier_list_notify(&blk->insert_bs_notifiers, blk);
if (blk->public.throttle_state) {
throttle_timers_attach_aio_context(
&blk->public.throttle_timers, bdrv_get_aio_context(bs));
}
+
+ return 0;
+}
+
+/*
+ * Sets the permission bitmasks that the user of the BlockBackend needs.
+ */
+int blk_set_perm(BlockBackend *blk, uint64_t perm, uint64_t shared_perm,
+ Error **errp)
+{
+ int ret;
+
+ if (blk->root) {
+ ret = bdrv_child_try_set_perm(blk->root, perm, shared_perm, errp);
+ if (ret < 0) {
+ return ret;
+ }
+ }
+
+ blk->perm = perm;
+ blk->shared_perm = shared_perm;
+
+ return 0;
+}
+
+void blk_get_perm(BlockBackend *blk, uint64_t *perm, uint64_t *shared_perm)
+{
+ *perm = blk->perm;
+ *shared_perm = blk->shared_perm;
}
static int blk_do_attach_dev(BlockBackend *blk, void *dev)
@@ -553,6 +636,7 @@ void blk_detach_dev(BlockBackend *blk, void *dev)
blk->dev_ops = NULL;
blk->dev_opaque = NULL;
blk->guest_block_size = 512;
+ blk_set_perm(blk, 0, BLK_PERM_ALL, &error_abort);
blk_unref(blk);
}
@@ -620,19 +704,29 @@ void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops,
/*
* Notify @blk's attached device model of media change.
- * If @load is true, notify of media load.
- * Else, notify of media eject.
+ *
+ * If @load is true, notify of media load. This action can fail, meaning that
+ * the medium cannot be loaded. @errp is set then.
+ *
+ * If @load is false, notify of media eject. This can never fail.
+ *
* Also send DEVICE_TRAY_MOVED events as appropriate.
*/
-void blk_dev_change_media_cb(BlockBackend *blk, bool load)
+void blk_dev_change_media_cb(BlockBackend *blk, bool load, Error **errp)
{
if (blk->dev_ops && blk->dev_ops->change_media_cb) {
bool tray_was_open, tray_is_open;
+ Error *local_err = NULL;
assert(!blk->legacy_dev);
tray_was_open = blk_dev_is_tray_open(blk);
- blk->dev_ops->change_media_cb(blk->dev_opaque, load);
+ blk->dev_ops->change_media_cb(blk->dev_opaque, load, &local_err);
+ if (local_err) {
+ assert(load == true);
+ error_propagate(errp, local_err);
+ return;
+ }
tray_is_open = blk_dev_is_tray_open(blk);
if (tray_was_open != tray_is_open) {
@@ -646,7 +740,7 @@ void blk_dev_change_media_cb(BlockBackend *blk, bool load)
static void blk_root_change_media(BdrvChild *child, bool load)
{
- blk_dev_change_media_cb(child->opaque, load);
+ blk_dev_change_media_cb(child->opaque, load, NULL);
}
/*
diff --git a/block/bochs.c b/block/bochs.c
index 7dd2ac4f51..516da56c3b 100644
--- a/block/bochs.c
+++ b/block/bochs.c
@@ -293,6 +293,7 @@ static BlockDriver bdrv_bochs = {
.instance_size = sizeof(BDRVBochsState),
.bdrv_probe = bochs_probe,
.bdrv_open = bochs_open,
+ .bdrv_child_perm = bdrv_format_default_perms,
.bdrv_refresh_limits = bochs_refresh_limits,
.bdrv_co_preadv = bochs_co_preadv,
.bdrv_close = bochs_close,
diff --git a/block/cloop.c b/block/cloop.c
index 877c9b0d1b..a6c7b9dbe6 100644
--- a/block/cloop.c
+++ b/block/cloop.c
@@ -290,6 +290,7 @@ static BlockDriver bdrv_cloop = {
.instance_size = sizeof(BDRVCloopState),
.bdrv_probe = cloop_probe,
.bdrv_open = cloop_open,
+ .bdrv_child_perm = bdrv_format_default_perms,
.bdrv_refresh_limits = cloop_refresh_limits,
.bdrv_co_preadv = cloop_co_preadv,
.bdrv_close = cloop_close,
diff --git a/block/commit.c b/block/commit.c
index c284e8535d..22a0a4db98 100644
--- a/block/commit.c
+++ b/block/commit.c
@@ -36,6 +36,7 @@ typedef struct CommitBlockJob {
BlockJob common;
RateLimit limit;
BlockDriverState *active;
+ BlockDriverState *commit_top_bs;
BlockBackend *top;
BlockBackend *base;
BlockdevOnError on_error;
@@ -83,12 +84,23 @@ static void commit_complete(BlockJob *job, void *opaque)
BlockDriverState *active = s->active;
BlockDriverState *top = blk_bs(s->top);
BlockDriverState *base = blk_bs(s->base);
- BlockDriverState *overlay_bs = bdrv_find_overlay(active, top);
+ BlockDriverState *overlay_bs = bdrv_find_overlay(active, s->commit_top_bs);
int ret = data->ret;
+ bool remove_commit_top_bs = false;
+
+ /* Remove base node parent that still uses BLK_PERM_WRITE/RESIZE before
+ * the normal backing chain can be restored. */
+ blk_unref(s->base);
if (!block_job_is_cancelled(&s->common) && ret == 0) {
/* success */
- ret = bdrv_drop_intermediate(active, top, base, s->backing_file_str);
+ ret = bdrv_drop_intermediate(active, s->commit_top_bs, base,
+ s->backing_file_str);
+ } else if (overlay_bs) {
+ /* XXX Can (or should) we somehow keep 'consistent read' blocked even
+ * after the failed/cancelled commit job is gone? If we already wrote
+ * something to base, the intermediate images aren't valid any more. */
+ remove_commit_top_bs = true;
}
/* restore base open flags here if appropriate (e.g., change the base back
@@ -102,9 +114,15 @@ static void commit_complete(BlockJob *job, void *opaque)
}
g_free(s->backing_file_str);
blk_unref(s->top);
- blk_unref(s->base);
block_job_completed(&s->common, ret);
g_free(data);
+
+ /* If bdrv_drop_intermediate() didn't already do that, remove the commit
+ * filter driver from the backing chain. Do this as the final step so that
+ * the 'consistent read' permission can be granted. */
+ if (remove_commit_top_bs) {
+ bdrv_set_backing_hd(overlay_bs, top, &error_abort);
+ }
}
static void coroutine_fn commit_run(void *opaque)
@@ -208,10 +226,38 @@ static const BlockJobDriver commit_job_driver = {
.start = commit_run,
};
+static int coroutine_fn bdrv_commit_top_preadv(BlockDriverState *bs,
+ uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags)
+{
+ return bdrv_co_preadv(bs->backing, offset, bytes, qiov, flags);
+}
+
+static void bdrv_commit_top_close(BlockDriverState *bs)
+{
+}
+
+static void bdrv_commit_top_child_perm(BlockDriverState *bs, BdrvChild *c,
+ const BdrvChildRole *role,
+ uint64_t perm, uint64_t shared,
+ uint64_t *nperm, uint64_t *nshared)
+{
+ *nperm = 0;
+ *nshared = BLK_PERM_ALL;
+}
+
+/* Dummy node that provides consistent read to its users without requiring it
+ * from its backing file and that allows writes on the backing file chain. */
+static BlockDriver bdrv_commit_top = {
+ .format_name = "commit_top",
+ .bdrv_co_preadv = bdrv_commit_top_preadv,
+ .bdrv_close = bdrv_commit_top_close,
+ .bdrv_child_perm = bdrv_commit_top_child_perm,
+};
+
void commit_start(const char *job_id, BlockDriverState *bs,
BlockDriverState *base, BlockDriverState *top, int64_t speed,
BlockdevOnError on_error, const char *backing_file_str,
- Error **errp)
+ const char *filter_node_name, Error **errp)
{
CommitBlockJob *s;
BlockReopenQueue *reopen_queue = NULL;
@@ -219,7 +265,9 @@ void commit_start(const char *job_id, BlockDriverState *bs,
int orig_base_flags;
BlockDriverState *iter;
BlockDriverState *overlay_bs;
+ BlockDriverState *commit_top_bs = NULL;
Error *local_err = NULL;
+ int ret;
assert(top != bs);
if (top == base) {
@@ -234,8 +282,8 @@ void commit_start(const char *job_id, BlockDriverState *bs,
return;
}
- s = block_job_create(job_id, &commit_job_driver, bs, speed,
- BLOCK_JOB_DEFAULT, NULL, NULL, errp);
+ s = block_job_create(job_id, &commit_job_driver, bs, 0, BLK_PERM_ALL,
+ speed, BLOCK_JOB_DEFAULT, NULL, NULL, errp);
if (!s) {
return;
}
@@ -256,30 +304,70 @@ void commit_start(const char *job_id, BlockDriverState *bs,
bdrv_reopen_multiple(bdrv_get_aio_context(bs), reopen_queue, &local_err);
if (local_err != NULL) {
error_propagate(errp, local_err);
- block_job_unref(&s->common);
- return;
+ goto fail;
}
}
+ /* Insert commit_top block node above top, so we can block consistent read
+ * on the backing chain below it */
+ commit_top_bs = bdrv_new_open_driver(&bdrv_commit_top, filter_node_name, 0,
+ errp);
+ if (commit_top_bs == NULL) {
+ goto fail;
+ }
+
+ bdrv_set_backing_hd(commit_top_bs, top, &error_abort);
+ bdrv_set_backing_hd(overlay_bs, commit_top_bs, &error_abort);
+
+ s->commit_top_bs = commit_top_bs;
+ bdrv_unref(commit_top_bs);
/* Block all nodes between top and base, because they will
* disappear from the chain after this operation. */
assert(bdrv_chain_contains(top, base));
- for (iter = top; iter != backing_bs(base); iter = backing_bs(iter)) {
- block_job_add_bdrv(&s->common, iter);
+ for (iter = top; iter != base; iter = backing_bs(iter)) {
+ /* XXX BLK_PERM_WRITE needs to be allowed so we don't block ourselves
+ * at s->base (if writes are blocked for a node, they are also blocked
+ * for its backing file). The other options would be a second filter
+ * driver above s->base. */
+ ret = block_job_add_bdrv(&s->common, "intermediate node", iter, 0,
+ BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE,
+ errp);
+ if (ret < 0) {
+ goto fail;
+ }
+ }
+
+ ret = block_job_add_bdrv(&s->common, "base", base, 0, BLK_PERM_ALL, errp);
+ if (ret < 0) {
+ goto fail;
}
+
/* overlay_bs must be blocked because it needs to be modified to
- * update the backing image string, but if it's the root node then
- * don't block it again */
- if (bs != overlay_bs) {
- block_job_add_bdrv(&s->common, overlay_bs);
+ * update the backing image string. */
+ ret = block_job_add_bdrv(&s->common, "overlay of top", overlay_bs,
+ BLK_PERM_GRAPH_MOD, BLK_PERM_ALL, errp);
+ if (ret < 0) {
+ goto fail;
}
- s->base = blk_new();
- blk_insert_bs(s->base, base);
+ s->base = blk_new(BLK_PERM_CONSISTENT_READ
+ | BLK_PERM_WRITE
+ | BLK_PERM_RESIZE,
+ BLK_PERM_CONSISTENT_READ
+ | BLK_PERM_GRAPH_MOD
+ | BLK_PERM_WRITE_UNCHANGED);
+ ret = blk_insert_bs(s->base, base, errp);
+ if (ret < 0) {
+ goto fail;
+ }
- s->top = blk_new();
- blk_insert_bs(s->top, top);
+ /* Required permissions are already taken with block_job_add_bdrv() */
+ s->top = blk_new(0, BLK_PERM_ALL);
+ blk_insert_bs(s->top, top, errp);
+ if (ret < 0) {
+ goto fail;
+ }
s->active = bs;
@@ -292,6 +380,19 @@ void commit_start(const char *job_id, BlockDriverState *bs,
trace_commit_start(bs, base, top, s);
block_job_start(&s->common);
+ return;
+
+fail:
+ if (s->base) {
+ blk_unref(s->base);
+ }
+ if (s->top) {
+ blk_unref(s->top);
+ }
+ if (commit_top_bs) {
+ bdrv_set_backing_hd(overlay_bs, top, &error_abort);
+ }
+ block_job_unref(&s->common);
}
@@ -301,11 +402,14 @@ void commit_start(const char *job_id, BlockDriverState *bs,
int bdrv_commit(BlockDriverState *bs)
{
BlockBackend *src, *backing;
+ BlockDriverState *backing_file_bs = NULL;
+ BlockDriverState *commit_top_bs = NULL;
BlockDriver *drv = bs->drv;
int64_t sector, total_sectors, length, backing_length;
int n, ro, open_flags;
int ret = 0;
uint8_t *buf = NULL;
+ Error *local_err = NULL;
if (!drv)
return -ENOMEDIUM;
@@ -328,11 +432,33 @@ int bdrv_commit(BlockDriverState *bs)
}
}
- src = blk_new();
- blk_insert_bs(src, bs);
+ src = blk_new(BLK_PERM_CONSISTENT_READ, BLK_PERM_ALL);
+ backing = blk_new(BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL);
- backing = blk_new();
- blk_insert_bs(backing, bs->backing->bs);
+ ret = blk_insert_bs(src, bs, &local_err);
+ if (ret < 0) {
+ error_report_err(local_err);
+ goto ro_cleanup;
+ }
+
+ /* Insert commit_top block node above backing, so we can write to it */
+ backing_file_bs = backing_bs(bs);
+
+ commit_top_bs = bdrv_new_open_driver(&bdrv_commit_top, NULL, BDRV_O_RDWR,
+ &local_err);
+ if (commit_top_bs == NULL) {
+ error_report_err(local_err);
+ goto ro_cleanup;
+ }
+
+ bdrv_set_backing_hd(commit_top_bs, backing_file_bs, &error_abort);
+ bdrv_set_backing_hd(bs, commit_top_bs, &error_abort);
+
+ ret = blk_insert_bs(backing, backing_file_bs, &local_err);
+ if (ret < 0) {
+ error_report_err(local_err);
+ goto ro_cleanup;
+ }
length = blk_getlength(src);
if (length < 0) {
@@ -404,8 +530,12 @@ int bdrv_commit(BlockDriverState *bs)
ro_cleanup:
qemu_vfree(buf);
- blk_unref(src);
blk_unref(backing);
+ if (backing_file_bs) {
+ bdrv_set_backing_hd(bs, backing_file_bs, &error_abort);
+ }
+ bdrv_unref(commit_top_bs);
+ blk_unref(src);
if (ro) {
/* ignoring error return here */
diff --git a/block/crypto.c b/block/crypto.c
index 7cb2ff2946..4a2038888d 100644
--- a/block/crypto.c
+++ b/block/crypto.c
@@ -628,6 +628,7 @@ BlockDriver bdrv_crypto_luks = {
.bdrv_probe = block_crypto_probe_luks,
.bdrv_open = block_crypto_open_luks,
.bdrv_close = block_crypto_close,
+ .bdrv_child_perm = bdrv_format_default_perms,
.bdrv_create = block_crypto_create_luks,
.bdrv_truncate = block_crypto_truncate,
.create_opts = &block_crypto_create_opts_luks,
diff --git a/block/dmg.c b/block/dmg.c
index 8e387cdfe5..a7d25fc47b 100644
--- a/block/dmg.c
+++ b/block/dmg.c
@@ -697,6 +697,7 @@ static BlockDriver bdrv_dmg = {
.bdrv_probe = dmg_probe,
.bdrv_open = dmg_open,
.bdrv_refresh_limits = dmg_refresh_limits,
+ .bdrv_child_perm = bdrv_format_default_perms,
.bdrv_co_preadv = dmg_co_preadv,
.bdrv_close = dmg_close,
};
diff --git a/block/io.c b/block/io.c
index d5c45447fd..8f38d46de0 100644
--- a/block/io.c
+++ b/block/io.c
@@ -925,9 +925,11 @@ bdrv_driver_pwritev_compressed(BlockDriverState *bs, uint64_t offset,
return drv->bdrv_co_pwritev_compressed(bs, offset, bytes, qiov);
}
-static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
+static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child,
int64_t offset, unsigned int bytes, QEMUIOVector *qiov)
{
+ BlockDriverState *bs = child->bs;
+
/* Perform I/O through a temporary buffer so that users who scribble over
* their read buffer while the operation is in progress do not end up
* modifying the image file. This is critical for zero-copy guest I/O
@@ -943,6 +945,8 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
size_t skip_bytes;
int ret;
+ assert(child->perm & (BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE));
+
/* Cover entire cluster so no additional backing file I/O is required when
* allocating cluster in the image file.
*/
@@ -1001,10 +1005,11 @@ err:
* handles copy on read, zeroing after EOF, and fragmentation of large
* reads; any other features must be implemented by the caller.
*/
-static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
+static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child,
BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
int64_t align, QEMUIOVector *qiov, int flags)
{
+ BlockDriverState *bs = child->bs;
int64_t total_bytes, max_bytes;
int ret = 0;
uint64_t bytes_remaining = bytes;
@@ -1050,7 +1055,7 @@ static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
}
if (!ret || pnum != nb_sectors) {
- ret = bdrv_co_do_copy_on_readv(bs, offset, bytes, qiov);
+ ret = bdrv_co_do_copy_on_readv(child, offset, bytes, qiov);
goto out;
}
}
@@ -1158,7 +1163,7 @@ int coroutine_fn bdrv_co_preadv(BdrvChild *child,
}
tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_READ);
- ret = bdrv_aligned_preadv(bs, &req, offset, bytes, align,
+ ret = bdrv_aligned_preadv(child, &req, offset, bytes, align,
use_local_qiov ? &local_qiov : qiov,
flags);
tracked_request_end(&req);
@@ -1306,10 +1311,11 @@ fail:
* Forwards an already correctly aligned write request to the BlockDriver,
* after possibly fragmenting it.
*/
-static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
+static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child,
BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
int64_t align, QEMUIOVector *qiov, int flags)
{
+ BlockDriverState *bs = child->bs;
BlockDriver *drv = bs->drv;
bool waited;
int ret;
@@ -1332,6 +1338,8 @@ static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
assert(!waited || !req->serialising);
assert(req->overlap_offset <= offset);
assert(offset + bytes <= req->overlap_offset + req->overlap_bytes);
+ assert(child->perm & BLK_PERM_WRITE);
+ assert(end_sector <= bs->total_sectors || child->perm & BLK_PERM_RESIZE);
ret = notifier_with_return_list_notify(&bs->before_write_notifiers, req);
@@ -1397,12 +1405,13 @@ static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
return ret;
}
-static int coroutine_fn bdrv_co_do_zero_pwritev(BlockDriverState *bs,
+static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child,
int64_t offset,
unsigned int bytes,
BdrvRequestFlags flags,
BdrvTrackedRequest *req)
{
+ BlockDriverState *bs = child->bs;
uint8_t *buf = NULL;
QEMUIOVector local_qiov;
struct iovec iov;
@@ -1430,7 +1439,7 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BlockDriverState *bs,
mark_request_serialising(req, align);
wait_serialising_requests(req);
bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_HEAD);
- ret = bdrv_aligned_preadv(bs, req, offset & ~(align - 1), align,
+ ret = bdrv_aligned_preadv(child, req, offset & ~(align - 1), align,
align, &local_qiov, 0);
if (ret < 0) {
goto fail;
@@ -1438,7 +1447,7 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BlockDriverState *bs,
bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD);
memset(buf + head_padding_bytes, 0, zero_bytes);
- ret = bdrv_aligned_pwritev(bs, req, offset & ~(align - 1), align,
+ ret = bdrv_aligned_pwritev(child, req, offset & ~(align - 1), align,
align, &local_qiov,
flags & ~BDRV_REQ_ZERO_WRITE);
if (ret < 0) {
@@ -1452,7 +1461,7 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BlockDriverState *bs,
if (bytes >= align) {
/* Write the aligned part in the middle. */
uint64_t aligned_bytes = bytes & ~(align - 1);
- ret = bdrv_aligned_pwritev(bs, req, offset, aligned_bytes, align,
+ ret = bdrv_aligned_pwritev(child, req, offset, aligned_bytes, align,
NULL, flags);
if (ret < 0) {
goto fail;
@@ -1468,7 +1477,7 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BlockDriverState *bs,
mark_request_serialising(req, align);
wait_serialising_requests(req);
bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL);
- ret = bdrv_aligned_preadv(bs, req, offset, align,
+ ret = bdrv_aligned_preadv(child, req, offset, align,
align, &local_qiov, 0);
if (ret < 0) {
goto fail;
@@ -1476,7 +1485,7 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BlockDriverState *bs,
bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
memset(buf, 0, bytes);
- ret = bdrv_aligned_pwritev(bs, req, offset, align, align,
+ ret = bdrv_aligned_pwritev(child, req, offset, align, align,
&local_qiov, flags & ~BDRV_REQ_ZERO_WRITE);
}
fail:
@@ -1523,7 +1532,7 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_WRITE);
if (!qiov) {
- ret = bdrv_co_do_zero_pwritev(bs, offset, bytes, flags, &req);
+ ret = bdrv_co_do_zero_pwritev(child, offset, bytes, flags, &req);
goto out;
}
@@ -1542,7 +1551,7 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
qemu_iovec_init_external(&head_qiov, &head_iov, 1);
bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_HEAD);
- ret = bdrv_aligned_preadv(bs, &req, offset & ~(align - 1), align,
+ ret = bdrv_aligned_preadv(child, &req, offset & ~(align - 1), align,
align, &head_qiov, 0);
if (ret < 0) {
goto fail;
@@ -1584,8 +1593,8 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
qemu_iovec_init_external(&tail_qiov, &tail_iov, 1);
bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL);
- ret = bdrv_aligned_preadv(bs, &req, (offset + bytes) & ~(align - 1), align,
- align, &tail_qiov, 0);
+ ret = bdrv_aligned_preadv(child, &req, (offset + bytes) & ~(align - 1),
+ align, align, &tail_qiov, 0);
if (ret < 0) {
goto fail;
}
@@ -1603,7 +1612,7 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
bytes = ROUND_UP(bytes, align);
}
- ret = bdrv_aligned_pwritev(bs, &req, offset, bytes, align,
+ ret = bdrv_aligned_pwritev(child, &req, offset, bytes, align,
use_local_qiov ? &local_qiov : qiov,
flags);
diff --git a/block/mirror.c b/block/mirror.c
index 1b34b366d0..57f26c33a4 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -38,7 +38,10 @@ typedef struct MirrorBlockJob {
BlockJob common;
RateLimit limit;
BlockBackend *target;
+ BlockDriverState *mirror_top_bs;
+ BlockDriverState *source;
BlockDriverState *base;
+
/* The name of the graph node to replace */
char *replaces;
/* The BDS to replace */
@@ -327,7 +330,7 @@ static void mirror_do_zero_or_discard(MirrorBlockJob *s,
static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
{
- BlockDriverState *source = blk_bs(s->common.blk);
+ BlockDriverState *source = s->source;
int64_t sector_num, first_chunk;
uint64_t delay_ns = 0;
/* At least the first dirty chunk is mirrored in one iteration. */
@@ -497,12 +500,30 @@ static void mirror_exit(BlockJob *job, void *opaque)
MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
MirrorExitData *data = opaque;
AioContext *replace_aio_context = NULL;
- BlockDriverState *src = blk_bs(s->common.blk);
+ BlockDriverState *src = s->source;
BlockDriverState *target_bs = blk_bs(s->target);
+ BlockDriverState *mirror_top_bs = s->mirror_top_bs;
+ Error *local_err = NULL;
/* Make sure that the source BDS doesn't go away before we called
* block_job_completed(). */
bdrv_ref(src);
+ bdrv_ref(mirror_top_bs);
+
+ /* We don't access the source any more. Dropping any WRITE/RESIZE is
+ * required before it could become a backing file of target_bs. */
+ bdrv_child_try_set_perm(mirror_top_bs->backing, 0, BLK_PERM_ALL,
+ &error_abort);
+ if (s->backing_mode == MIRROR_SOURCE_BACKING_CHAIN) {
+ BlockDriverState *backing = s->is_none_mode ? src : s->base;
+ if (backing_bs(target_bs) != backing) {
+ bdrv_set_backing_hd(target_bs, backing, &local_err);
+ if (local_err) {
+ error_report_err(local_err);
+ data->ret = -EPERM;
+ }
+ }
+ }
if (s->to_replace) {
replace_aio_context = bdrv_get_aio_context(s->to_replace);
@@ -524,10 +545,6 @@ static void mirror_exit(BlockJob *job, void *opaque)
bdrv_drained_begin(target_bs);
bdrv_replace_in_backing_chain(to_replace, target_bs);
bdrv_drained_end(target_bs);
-
- /* We just changed the BDS the job BB refers to */
- blk_remove_bs(job->blk);
- blk_insert_bs(job->blk, src);
}
if (s->to_replace) {
bdrv_op_unblock_all(s->to_replace, s->replace_blocker);
@@ -540,9 +557,26 @@ static void mirror_exit(BlockJob *job, void *opaque)
g_free(s->replaces);
blk_unref(s->target);
s->target = NULL;
+
+ /* Remove the mirror filter driver from the graph. Before this, get rid of
+ * the blockers on the intermediate nodes so that the resulting state is
+ * valid. */
+ block_job_remove_all_bdrv(job);
+ bdrv_replace_in_backing_chain(mirror_top_bs, backing_bs(mirror_top_bs));
+
+ /* We just changed the BDS the job BB refers to (with either or both of the
+ * bdrv_replace_in_backing_chain() calls), so switch the BB back so the
+ * cleanup does the right thing. We don't need any permissions any more
+ * now. */
+ blk_remove_bs(job->blk);
+ blk_set_perm(job->blk, 0, BLK_PERM_ALL, &error_abort);
+ blk_insert_bs(job->blk, mirror_top_bs, &error_abort);
+
block_job_completed(&s->common, data->ret);
+
g_free(data);
bdrv_drained_end(src);
+ bdrv_unref(mirror_top_bs);
bdrv_unref(src);
}
@@ -562,7 +596,7 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s)
{
int64_t sector_num, end;
BlockDriverState *base = s->base;
- BlockDriverState *bs = blk_bs(s->common.blk);
+ BlockDriverState *bs = s->source;
BlockDriverState *target_bs = blk_bs(s->target);
int ret, n;
@@ -644,7 +678,7 @@ static void coroutine_fn mirror_run(void *opaque)
{
MirrorBlockJob *s = opaque;
MirrorExitData *data;
- BlockDriverState *bs = blk_bs(s->common.blk);
+ BlockDriverState *bs = s->source;
BlockDriverState *target_bs = blk_bs(s->target);
bool need_drain = true;
int64_t length;
@@ -876,9 +910,8 @@ static void mirror_set_speed(BlockJob *job, int64_t speed, Error **errp)
static void mirror_complete(BlockJob *job, Error **errp)
{
MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
- BlockDriverState *src, *target;
+ BlockDriverState *target;
- src = blk_bs(job->blk);
target = blk_bs(s->target);
if (!s->synced) {
@@ -910,6 +943,10 @@ static void mirror_complete(BlockJob *job, Error **errp)
replace_aio_context = bdrv_get_aio_context(s->to_replace);
aio_context_acquire(replace_aio_context);
+ /* TODO Translate this into permission system. Current definition of
+ * GRAPH_MOD would require to request it for the parents; they might
+ * not even be BlockDriverStates, however, so a BdrvChild can't address
+ * them. May need redefinition of GRAPH_MOD. */
error_setg(&s->replace_blocker,
"block device is in use by block-job-complete");
bdrv_op_block_all(s->to_replace, s->replace_blocker);
@@ -918,13 +955,6 @@ static void mirror_complete(BlockJob *job, Error **errp)
aio_context_release(replace_aio_context);
}
- if (s->backing_mode == MIRROR_SOURCE_BACKING_CHAIN) {
- BlockDriverState *backing = s->is_none_mode ? src : s->base;
- if (backing_bs(target) != backing) {
- bdrv_set_backing_hd(target, backing);
- }
- }
-
s->should_complete = true;
block_job_enter(&s->common);
}
@@ -980,6 +1010,77 @@ static const BlockJobDriver commit_active_job_driver = {
.drain = mirror_drain,
};
+static int coroutine_fn bdrv_mirror_top_preadv(BlockDriverState *bs,
+ uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags)
+{
+ return bdrv_co_preadv(bs->backing, offset, bytes, qiov, flags);
+}
+
+static int coroutine_fn bdrv_mirror_top_pwritev(BlockDriverState *bs,
+ uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags)
+{
+ return bdrv_co_pwritev(bs->backing, offset, bytes, qiov, flags);
+}
+
+static int coroutine_fn bdrv_mirror_top_flush(BlockDriverState *bs)
+{
+ return bdrv_co_flush(bs->backing->bs);
+}
+
+static int64_t coroutine_fn bdrv_mirror_top_get_block_status(
+ BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum,
+ BlockDriverState **file)
+{
+ *pnum = nb_sectors;
+ *file = bs->backing->bs;
+ return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID | BDRV_BLOCK_DATA |
+ (sector_num << BDRV_SECTOR_BITS);
+}
+
+static int coroutine_fn bdrv_mirror_top_pwrite_zeroes(BlockDriverState *bs,
+ int64_t offset, int count, BdrvRequestFlags flags)
+{
+ return bdrv_co_pwrite_zeroes(bs->backing, offset, count, flags);
+}
+
+static int coroutine_fn bdrv_mirror_top_pdiscard(BlockDriverState *bs,
+ int64_t offset, int count)
+{
+ return bdrv_co_pdiscard(bs->backing->bs, offset, count);
+}
+
+static void bdrv_mirror_top_close(BlockDriverState *bs)
+{
+}
+
+static void bdrv_mirror_top_child_perm(BlockDriverState *bs, BdrvChild *c,
+ const BdrvChildRole *role,
+ uint64_t perm, uint64_t shared,
+ uint64_t *nperm, uint64_t *nshared)
+{
+ /* Must be able to forward guest writes to the real image */
+ *nperm = 0;
+ if (perm & BLK_PERM_WRITE) {
+ *nperm |= BLK_PERM_WRITE;
+ }
+
+ *nshared = BLK_PERM_ALL;
+}
+
+/* Dummy node that provides consistent read to its users without requiring it
+ * from its backing file and that allows writes on the backing file chain. */
+static BlockDriver bdrv_mirror_top = {
+ .format_name = "mirror_top",
+ .bdrv_co_preadv = bdrv_mirror_top_preadv,
+ .bdrv_co_pwritev = bdrv_mirror_top_pwritev,
+ .bdrv_co_pwrite_zeroes = bdrv_mirror_top_pwrite_zeroes,
+ .bdrv_co_pdiscard = bdrv_mirror_top_pdiscard,
+ .bdrv_co_flush = bdrv_mirror_top_flush,
+ .bdrv_co_get_block_status = bdrv_mirror_top_get_block_status,
+ .bdrv_close = bdrv_mirror_top_close,
+ .bdrv_child_perm = bdrv_mirror_top_child_perm,
+};
+
static void mirror_start_job(const char *job_id, BlockDriverState *bs,
int creation_flags, BlockDriverState *target,
const char *replaces, int64_t speed,
@@ -992,9 +1093,14 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs,
void *opaque, Error **errp,
const BlockJobDriver *driver,
bool is_none_mode, BlockDriverState *base,
- bool auto_complete)
+ bool auto_complete, const char *filter_node_name)
{
MirrorBlockJob *s;
+ BlockDriverState *mirror_top_bs;
+ bool target_graph_mod;
+ bool target_is_backing;
+ Error *local_err = NULL;
+ int ret;
if (granularity == 0) {
granularity = bdrv_get_default_bitmap_granularity(target);
@@ -1011,14 +1117,62 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs,
buf_size = DEFAULT_MIRROR_BUF_SIZE;
}
- s = block_job_create(job_id, driver, bs, speed, creation_flags,
- cb, opaque, errp);
- if (!s) {
+ /* In the case of active commit, add dummy driver to provide consistent
+ * reads on the top, while disabling it in the intermediate nodes, and make
+ * the backing chain writable. */
+ mirror_top_bs = bdrv_new_open_driver(&bdrv_mirror_top, filter_node_name,
+ BDRV_O_RDWR, errp);
+ if (mirror_top_bs == NULL) {
+ return;
+ }
+ mirror_top_bs->total_sectors = bs->total_sectors;
+
+ /* bdrv_append takes ownership of the mirror_top_bs reference, need to keep
+ * it alive until block_job_create() even if bs has no parent. */
+ bdrv_ref(mirror_top_bs);
+ bdrv_drained_begin(bs);
+ bdrv_append(mirror_top_bs, bs, &local_err);
+ bdrv_drained_end(bs);
+
+ if (local_err) {
+ bdrv_unref(mirror_top_bs);
+ error_propagate(errp, local_err);
return;
}
- s->target = blk_new();
- blk_insert_bs(s->target, target);
+ /* Make sure that the source is not resized while the job is running */
+ s = block_job_create(job_id, driver, mirror_top_bs,
+ BLK_PERM_CONSISTENT_READ,
+ BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED |
+ BLK_PERM_WRITE | BLK_PERM_GRAPH_MOD, speed,
+ creation_flags, cb, opaque, errp);
+ bdrv_unref(mirror_top_bs);
+ if (!s) {
+ goto fail;
+ }
+ s->source = bs;
+ s->mirror_top_bs = mirror_top_bs;
+
+ /* No resize for the target either; while the mirror is still running, a
+ * consistent read isn't necessarily possible. We could possibly allow
+ * writes and graph modifications, though it would likely defeat the
+ * purpose of a mirror, so leave them blocked for now.
+ *
+ * In the case of active commit, things look a bit different, though,
+ * because the target is an already populated backing file in active use.
+ * We can allow anything except resize there.*/
+ target_is_backing = bdrv_chain_contains(bs, target);
+ target_graph_mod = (backing_mode != MIRROR_LEAVE_BACKING_CHAIN);
+ s->target = blk_new(BLK_PERM_WRITE | BLK_PERM_RESIZE |
+ (target_graph_mod ? BLK_PERM_GRAPH_MOD : 0),
+ BLK_PERM_WRITE_UNCHANGED |
+ (target_is_backing ? BLK_PERM_CONSISTENT_READ |
+ BLK_PERM_WRITE |
+ BLK_PERM_GRAPH_MOD : 0));
+ ret = blk_insert_bs(s->target, target, errp);
+ if (ret < 0) {
+ goto fail;
+ }
s->replaces = g_strdup(replaces);
s->on_source_error = on_source_error;
@@ -1041,18 +1195,40 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs,
return;
}
- block_job_add_bdrv(&s->common, target);
+ /* Required permissions are already taken with blk_new() */
+ block_job_add_bdrv(&s->common, "target", target, 0, BLK_PERM_ALL,
+ &error_abort);
+
/* In commit_active_start() all intermediate nodes disappear, so
* any jobs in them must be blocked */
- if (bdrv_chain_contains(bs, target)) {
+ if (target_is_backing) {
BlockDriverState *iter;
for (iter = backing_bs(bs); iter != target; iter = backing_bs(iter)) {
- block_job_add_bdrv(&s->common, iter);
+ /* XXX BLK_PERM_WRITE needs to be allowed so we don't block
+ * ourselves at s->base (if writes are blocked for a node, they are
+ * also blocked for its backing file). The other options would be a
+ * second filter driver above s->base (== target). */
+ ret = block_job_add_bdrv(&s->common, "intermediate node", iter, 0,
+ BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE,
+ errp);
+ if (ret < 0) {
+ goto fail;
+ }
}
}
trace_mirror_start(bs, s, opaque);
block_job_start(&s->common);
+ return;
+
+fail:
+ if (s) {
+ g_free(s->replaces);
+ blk_unref(s->target);
+ block_job_unref(&s->common);
+ }
+
+ bdrv_replace_in_backing_chain(mirror_top_bs, backing_bs(mirror_top_bs));
}
void mirror_start(const char *job_id, BlockDriverState *bs,
@@ -1061,7 +1237,7 @@ void mirror_start(const char *job_id, BlockDriverState *bs,
MirrorSyncMode mode, BlockMirrorBackingMode backing_mode,
BlockdevOnError on_source_error,
BlockdevOnError on_target_error,
- bool unmap, Error **errp)
+ bool unmap, const char *filter_node_name, Error **errp)
{
bool is_none_mode;
BlockDriverState *base;
@@ -1075,12 +1251,14 @@ void mirror_start(const char *job_id, BlockDriverState *bs,
mirror_start_job(job_id, bs, BLOCK_JOB_DEFAULT, target, replaces,
speed, granularity, buf_size, backing_mode,
on_source_error, on_target_error, unmap, NULL, NULL, errp,
- &mirror_job_driver, is_none_mode, base, false);
+ &mirror_job_driver, is_none_mode, base, false,
+ filter_node_name);
}
void commit_active_start(const char *job_id, BlockDriverState *bs,
BlockDriverState *base, int creation_flags,
int64_t speed, BlockdevOnError on_error,
+ const char *filter_node_name,
BlockCompletionFunc *cb, void *opaque, Error **errp,
bool auto_complete)
{
@@ -1096,7 +1274,8 @@ void commit_active_start(const char *job_id, BlockDriverState *bs,
mirror_start_job(job_id, bs, creation_flags, base, NULL, speed, 0, 0,
MIRROR_LEAVE_BACKING_CHAIN,
on_error, on_error, true, cb, opaque, &local_err,
- &commit_active_job_driver, false, base, auto_complete);
+ &commit_active_job_driver, false, base, auto_complete,
+ filter_node_name);
if (local_err) {
error_propagate(errp, local_err);
goto error_restore_flags;
diff --git a/block/parallels.c b/block/parallels.c
index b2ec09f7e6..19935e29a9 100644
--- a/block/parallels.c
+++ b/block/parallels.c
@@ -488,7 +488,8 @@ static int parallels_create(const char *filename, QemuOpts *opts, Error **errp)
}
file = blk_new_open(filename, NULL, NULL,
- BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
+ BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
+ &local_err);
if (file == NULL) {
error_propagate(errp, local_err);
return -EIO;
@@ -762,6 +763,7 @@ static BlockDriver bdrv_parallels = {
.bdrv_probe = parallels_probe,
.bdrv_open = parallels_open,
.bdrv_close = parallels_close,
+ .bdrv_child_perm = bdrv_format_default_perms,
.bdrv_co_get_block_status = parallels_co_get_block_status,
.bdrv_has_zero_init = bdrv_has_zero_init_1,
.bdrv_co_flush_to_os = parallels_co_flush_to_os,
diff --git a/block/qcow.c b/block/qcow.c
index 038b05ab1b..9d6ac83959 100644
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -823,7 +823,8 @@ static int qcow_create(const char *filename, QemuOpts *opts, Error **errp)
}
qcow_blk = blk_new_open(filename, NULL, NULL,
- BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
+ BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
+ &local_err);
if (qcow_blk == NULL) {
error_propagate(errp, local_err);
ret = -EIO;
@@ -1052,6 +1053,7 @@ static BlockDriver bdrv_qcow = {
.bdrv_probe = qcow_probe,
.bdrv_open = qcow_open,
.bdrv_close = qcow_close,
+ .bdrv_child_perm = bdrv_format_default_perms,
.bdrv_reopen_prepare = qcow_reopen_prepare,
.bdrv_create = qcow_create,
.bdrv_has_zero_init = bdrv_has_zero_init_1,
diff --git a/block/qcow2.c b/block/qcow2.c
index 21e61427eb..6a92d2ef3f 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -2202,7 +2202,8 @@ static int qcow2_create2(const char *filename, int64_t total_size,
}
blk = blk_new_open(filename, NULL, NULL,
- BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
+ BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
+ &local_err);
if (blk == NULL) {
error_propagate(errp, local_err);
return -EIO;
@@ -2266,7 +2267,8 @@ static int qcow2_create2(const char *filename, int64_t total_size,
options = qdict_new();
qdict_put(options, "driver", qstring_from_str("qcow2"));
blk = blk_new_open(filename, NULL, options,
- BDRV_O_RDWR | BDRV_O_NO_FLUSH, &local_err);
+ BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_NO_FLUSH,
+ &local_err);
if (blk == NULL) {
error_propagate(errp, local_err);
ret = -EIO;
@@ -3113,6 +3115,7 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts,
uint64_t cluster_size = s->cluster_size;
bool encrypt;
int refcount_bits = s->refcount_bits;
+ Error *local_err = NULL;
int ret;
QemuOptDesc *desc = opts->list->desc;
Qcow2AmendHelperCBInfo helper_cb_info;
@@ -3262,11 +3265,16 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts,
}
if (new_size) {
- BlockBackend *blk = blk_new();
- blk_insert_bs(blk, bs);
+ BlockBackend *blk = blk_new(BLK_PERM_RESIZE, BLK_PERM_ALL);
+ ret = blk_insert_bs(blk, bs, &local_err);
+ if (ret < 0) {
+ error_report_err(local_err);
+ blk_unref(blk);
+ return ret;
+ }
+
ret = blk_truncate(blk, new_size);
blk_unref(blk);
-
if (ret < 0) {
return ret;
}
@@ -3403,6 +3411,7 @@ BlockDriver bdrv_qcow2 = {
.bdrv_reopen_commit = qcow2_reopen_commit,
.bdrv_reopen_abort = qcow2_reopen_abort,
.bdrv_join_options = qcow2_join_options,
+ .bdrv_child_perm = bdrv_format_default_perms,
.bdrv_create = qcow2_create,
.bdrv_has_zero_init = bdrv_has_zero_init_1,
.bdrv_co_get_block_status = qcow2_co_get_block_status,
diff --git a/block/qed.c b/block/qed.c
index 62a0a09326..5ec7fd83f2 100644
--- a/block/qed.c
+++ b/block/qed.c
@@ -625,7 +625,8 @@ static int qed_create(const char *filename, uint32_t cluster_size,
}
blk = blk_new_open(filename, NULL, NULL,
- BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
+ BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
+ &local_err);
if (blk == NULL) {
error_propagate(errp, local_err);
return -EIO;
@@ -1704,6 +1705,7 @@ static BlockDriver bdrv_qed = {
.bdrv_open = bdrv_qed_open,
.bdrv_close = bdrv_qed_close,
.bdrv_reopen_prepare = bdrv_qed_reopen_prepare,
+ .bdrv_child_perm = bdrv_format_default_perms,
.bdrv_create = bdrv_qed_create,
.bdrv_has_zero_init = bdrv_has_zero_init_1,
.bdrv_co_get_block_status = bdrv_qed_co_get_block_status,
diff --git a/block/quorum.c b/block/quorum.c
index 86e2072dce..40205fb1b3 100644
--- a/block/quorum.c
+++ b/block/quorum.c
@@ -1032,10 +1032,17 @@ static void quorum_add_child(BlockDriverState *bs, BlockDriverState *child_bs,
/* We can safely add the child now */
bdrv_ref(child_bs);
- child = bdrv_attach_child(bs, child_bs, indexstr, &child_format);
+
+ child = bdrv_attach_child(bs, child_bs, indexstr, &child_format, errp);
+ if (child == NULL) {
+ s->next_child_index--;
+ bdrv_unref(child_bs);
+ goto out;
+ }
s->children = g_renew(BdrvChild *, s->children, s->num_children + 1);
s->children[s->num_children++] = child;
+out:
bdrv_drained_end(bs);
}
@@ -1126,6 +1133,8 @@ static BlockDriver bdrv_quorum = {
.bdrv_add_child = quorum_add_child,
.bdrv_del_child = quorum_del_child,
+ .bdrv_child_perm = bdrv_filter_default_perms,
+
.is_filter = true,
.bdrv_recurse_is_first_non_filter = quorum_recurse_is_first_non_filter,
};
diff --git a/block/raw-format.c b/block/raw-format.c
index ce34d1b1cd..86fbc657eb 100644
--- a/block/raw-format.c
+++ b/block/raw-format.c
@@ -467,6 +467,7 @@ BlockDriver bdrv_raw = {
.bdrv_reopen_abort = &raw_reopen_abort,
.bdrv_open = &raw_open,
.bdrv_close = &raw_close,
+ .bdrv_child_perm = bdrv_filter_default_perms,
.bdrv_create = &raw_create,
.bdrv_co_preadv = &raw_co_preadv,
.bdrv_co_pwritev = &raw_co_pwritev,
diff --git a/block/rbd.c b/block/rbd.c
index 22e8e69cbd..ee13f3d9d3 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -18,6 +18,7 @@
#include "block/block_int.h"
#include "crypto/secret.h"
#include "qemu/cutils.h"
+#include "qapi/qmp/qstring.h"
#include <rbd/librbd.h>
@@ -102,10 +103,10 @@ typedef struct BDRVRBDState {
char *snap;
} BDRVRBDState;
-static int qemu_rbd_next_tok(char *dst, int dst_len,
- char *src, char delim,
- const char *name,
- char **p, Error **errp)
+static char *qemu_rbd_next_tok(int max_len,
+ char *src, char delim,
+ const char *name,
+ char **p, Error **errp)
{
int l;
char *end;
@@ -127,17 +128,15 @@ static int qemu_rbd_next_tok(char *dst, int dst_len,
}
}
l = strlen(src);
- if (l >= dst_len) {
+ if (l >= max_len) {
error_setg(errp, "%s too long", name);
- return -EINVAL;
+ return NULL;
} else if (l == 0) {
error_setg(errp, "%s too short", name);
- return -EINVAL;
+ return NULL;
}
- pstrcpy(dst, dst_len, src);
-
- return 0;
+ return src;
}
static void qemu_rbd_unescape(char *src)
@@ -153,87 +152,134 @@ static void qemu_rbd_unescape(char *src)
*p = '\0';
}
-static int qemu_rbd_parsename(const char *filename,
- char *pool, int pool_len,
- char *snap, int snap_len,
- char *name, int name_len,
- char *conf, int conf_len,
- Error **errp)
+static void qemu_rbd_parse_filename(const char *filename, QDict *options,
+ Error **errp)
{
const char *start;
- char *p, *buf;
- int ret;
+ char *p, *buf, *keypairs;
+ char *found_str;
+ size_t max_keypair_size;
+ Error *local_err = NULL;
if (!strstart(filename, "rbd:", &start)) {
error_setg(errp, "File name must start with 'rbd:'");
- return -EINVAL;
+ return;
}
+ max_keypair_size = strlen(start) + 1;
buf = g_strdup(start);
+ keypairs = g_malloc0(max_keypair_size);
p = buf;
- *snap = '\0';
- *conf = '\0';
- ret = qemu_rbd_next_tok(pool, pool_len, p,
- '/', "pool name", &p, errp);
- if (ret < 0 || !p) {
- ret = -EINVAL;
+ found_str = qemu_rbd_next_tok(RBD_MAX_POOL_NAME_SIZE, p,
+ '/', "pool name", &p, &local_err);
+ if (local_err) {
+ goto done;
+ }
+ if (!p) {
+ error_setg(errp, "Pool name is required");
goto done;
}
- qemu_rbd_unescape(pool);
+ qemu_rbd_unescape(found_str);
+ qdict_put(options, "pool", qstring_from_str(found_str));
if (strchr(p, '@')) {
- ret = qemu_rbd_next_tok(name, name_len, p,
- '@', "object name", &p, errp);
- if (ret < 0) {
+ found_str = qemu_rbd_next_tok(RBD_MAX_IMAGE_NAME_SIZE, p,
+ '@', "object name", &p, &local_err);
+ if (local_err) {
goto done;
}
- ret = qemu_rbd_next_tok(snap, snap_len, p,
- ':', "snap name", &p, errp);
- qemu_rbd_unescape(snap);
+ qemu_rbd_unescape(found_str);
+ qdict_put(options, "image", qstring_from_str(found_str));
+
+ found_str = qemu_rbd_next_tok(RBD_MAX_SNAP_NAME_SIZE, p,
+ ':', "snap name", &p, &local_err);
+ if (local_err) {
+ goto done;
+ }
+ qemu_rbd_unescape(found_str);
+ qdict_put(options, "snapshot", qstring_from_str(found_str));
} else {
- ret = qemu_rbd_next_tok(name, name_len, p,
- ':', "object name", &p, errp);
+ found_str = qemu_rbd_next_tok(RBD_MAX_IMAGE_NAME_SIZE, p,
+ ':', "object name", &p, &local_err);
+ if (local_err) {
+ goto done;
+ }
+ qemu_rbd_unescape(found_str);
+ qdict_put(options, "image", qstring_from_str(found_str));
}
- qemu_rbd_unescape(name);
- if (ret < 0 || !p) {
+ if (!p) {
goto done;
}
- ret = qemu_rbd_next_tok(conf, conf_len, p,
- '\0', "configuration", &p, errp);
-
-done:
- g_free(buf);
- return ret;
-}
-
-static char *qemu_rbd_parse_clientname(const char *conf, char *clientname)
-{
- const char *p = conf;
+ found_str = qemu_rbd_next_tok(RBD_MAX_CONF_NAME_SIZE, p,
+ '\0', "configuration", &p, &local_err);
+ if (local_err) {
+ goto done;
+ }
- while (*p) {
- int len;
- const char *end = strchr(p, ':');
+ p = found_str;
- if (end) {
- len = end - p;
- } else {
- len = strlen(p);
+ /* The following are essentially all key/value pairs, and we treat
+ * 'id' and 'conf' a bit special. Key/value pairs may be in any order. */
+ while (p) {
+ char *name, *value;
+ name = qemu_rbd_next_tok(RBD_MAX_CONF_NAME_SIZE, p,
+ '=', "conf option name", &p, &local_err);
+ if (local_err) {
+ break;
}
- if (strncmp(p, "id=", 3) == 0) {
- len -= 3;
- strncpy(clientname, p + 3, len);
- clientname[len] = '\0';
- return clientname;
+ if (!p) {
+ error_setg(errp, "conf option %s has no value", name);
+ break;
}
- if (end == NULL) {
+
+ qemu_rbd_unescape(name);
+
+ value = qemu_rbd_next_tok(RBD_MAX_CONF_VAL_SIZE, p,
+ ':', "conf option value", &p, &local_err);
+ if (local_err) {
break;
}
- p = end + 1;
+ qemu_rbd_unescape(value);
+
+ if (!strcmp(name, "conf")) {
+ qdict_put(options, "conf", qstring_from_str(value));
+ } else if (!strcmp(name, "id")) {
+ qdict_put(options, "user" , qstring_from_str(value));
+ } else {
+ /* FIXME: This is pretty ugly, and not the right way to do this.
+ * These should be contained in a structure, and then
+ * passed explicitly as individual key/value pairs to
+ * rados. Consider this legacy code that needs to be
+ * updated. */
+ char *tmp = g_malloc0(max_keypair_size);
+ /* only use a delimiter if it is not the first keypair found */
+ /* These are sets of unknown key/value pairs we'll pass along
+ * to ceph */
+ if (keypairs[0]) {
+ snprintf(tmp, max_keypair_size, ":%s=%s", name, value);
+ pstrcat(keypairs, max_keypair_size, tmp);
+ } else {
+ snprintf(keypairs, max_keypair_size, "%s=%s", name, value);
+ }
+ g_free(tmp);
+ }
}
- return NULL;
+
+ if (keypairs[0]) {
+ qdict_put(options, "keyvalue-pairs", qstring_from_str(keypairs));
+ }
+
+
+done:
+ if (local_err) {
+ error_propagate(errp, local_err);
+ }
+ g_free(buf);
+ g_free(keypairs);
+ return;
}
@@ -256,26 +302,24 @@ static int qemu_rbd_set_auth(rados_t cluster, const char *secretid,
return 0;
}
-
-static int qemu_rbd_set_conf(rados_t cluster, const char *conf,
- bool only_read_conf_file,
- Error **errp)
+static int qemu_rbd_set_keypairs(rados_t cluster, const char *keypairs,
+ Error **errp)
{
char *p, *buf;
- char name[RBD_MAX_CONF_NAME_SIZE];
- char value[RBD_MAX_CONF_VAL_SIZE];
+ char *name;
+ char *value;
+ Error *local_err = NULL;
int ret = 0;
- buf = g_strdup(conf);
+ buf = g_strdup(keypairs);
p = buf;
while (p) {
- ret = qemu_rbd_next_tok(name, sizeof(name), p,
- '=', "conf option name", &p, errp);
- if (ret < 0) {
+ name = qemu_rbd_next_tok(RBD_MAX_CONF_NAME_SIZE, p,
+ '=', "conf option name", &p, &local_err);
+ if (local_err) {
break;
}
- qemu_rbd_unescape(name);
if (!p) {
error_setg(errp, "conf option %s has no value", name);
@@ -283,36 +327,24 @@ static int qemu_rbd_set_conf(rados_t cluster, const char *conf,
break;
}
- ret = qemu_rbd_next_tok(value, sizeof(value), p,
- ':', "conf option value", &p, errp);
- if (ret < 0) {
+ value = qemu_rbd_next_tok(RBD_MAX_CONF_VAL_SIZE, p,
+ ':', "conf option value", &p, &local_err);
+ if (local_err) {
break;
}
- qemu_rbd_unescape(value);
- if (strcmp(name, "conf") == 0) {
- /* read the conf file alone, so it doesn't override more
- specific settings for a particular device */
- if (only_read_conf_file) {
- ret = rados_conf_read_file(cluster, value);
- if (ret < 0) {
- error_setg_errno(errp, -ret, "error reading conf file %s",
- value);
- break;
- }
- }
- } else if (strcmp(name, "id") == 0) {
- /* ignore, this is parsed by qemu_rbd_parse_clientname() */
- } else if (!only_read_conf_file) {
- ret = rados_conf_set(cluster, name, value);
- if (ret < 0) {
- error_setg_errno(errp, -ret, "invalid conf option %s", name);
- ret = -EINVAL;
- break;
- }
+ ret = rados_conf_set(cluster, name, value);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "invalid conf option %s", name);
+ ret = -EINVAL;
+ break;
}
}
+ if (local_err) {
+ error_propagate(errp, local_err);
+ ret = -EINVAL;
+ }
g_free(buf);
return ret;
}
@@ -328,33 +360,84 @@ static void qemu_rbd_memset(RADOSCB *rcb, int64_t offs)
}
}
+static QemuOptsList runtime_opts = {
+ .name = "rbd",
+ .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
+ .desc = {
+ {
+ .name = "filename",
+ .type = QEMU_OPT_STRING,
+ .help = "Specification of the rbd image",
+ },
+ {
+ .name = "password-secret",
+ .type = QEMU_OPT_STRING,
+ .help = "ID of secret providing the password",
+ },
+ {
+ .name = "conf",
+ .type = QEMU_OPT_STRING,
+ .help = "Rados config file location",
+ },
+ {
+ .name = "pool",
+ .type = QEMU_OPT_STRING,
+ .help = "Rados pool name",
+ },
+ {
+ .name = "image",
+ .type = QEMU_OPT_STRING,
+ .help = "Image name in the pool",
+ },
+ {
+ .name = "snapshot",
+ .type = QEMU_OPT_STRING,
+ .help = "Ceph snapshot name",
+ },
+ {
+ /* maps to 'id' in rados_create() */
+ .name = "user",
+ .type = QEMU_OPT_STRING,
+ .help = "Rados id name",
+ },
+ {
+ .name = "keyvalue-pairs",
+ .type = QEMU_OPT_STRING,
+ .help = "Legacy rados key/value option parameters",
+ },
+ {
+ .name = "host",
+ .type = QEMU_OPT_STRING,
+ },
+ {
+ .name = "port",
+ .type = QEMU_OPT_STRING,
+ },
+ {
+ .name = "auth",
+ .type = QEMU_OPT_STRING,
+ .help = "Supported authentication method, either cephx or none",
+ },
+ { /* end of list */ }
+ },
+};
+
static int qemu_rbd_create(const char *filename, QemuOpts *opts, Error **errp)
{
Error *local_err = NULL;
int64_t bytes = 0;
int64_t objsize;
int obj_order = 0;
- char pool[RBD_MAX_POOL_NAME_SIZE];
- char name[RBD_MAX_IMAGE_NAME_SIZE];
- char snap_buf[RBD_MAX_SNAP_NAME_SIZE];
- char conf[RBD_MAX_CONF_SIZE];
- char clientname_buf[RBD_MAX_CONF_SIZE];
- char *clientname;
+ const char *pool, *name, *conf, *clientname, *keypairs;
const char *secretid;
rados_t cluster;
rados_ioctx_t io_ctx;
- int ret;
+ QDict *options = NULL;
+ QemuOpts *rbd_opts = NULL;
+ int ret = 0;
secretid = qemu_opt_get(opts, "password-secret");
- if (qemu_rbd_parsename(filename, pool, sizeof(pool),
- snap_buf, sizeof(snap_buf),
- name, sizeof(name),
- conf, sizeof(conf), &local_err) < 0) {
- error_propagate(errp, local_err);
- return -EINVAL;
- }
-
/* Read out options */
bytes = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
BDRV_SECTOR_SIZE);
@@ -362,35 +445,55 @@ static int qemu_rbd_create(const char *filename, QemuOpts *opts, Error **errp)
if (objsize) {
if ((objsize - 1) & objsize) { /* not a power of 2? */
error_setg(errp, "obj size needs to be power of 2");
- return -EINVAL;
+ ret = -EINVAL;
+ goto exit;
}
if (objsize < 4096) {
error_setg(errp, "obj size too small");
- return -EINVAL;
+ ret = -EINVAL;
+ goto exit;
}
obj_order = ctz32(objsize);
}
- clientname = qemu_rbd_parse_clientname(conf, clientname_buf);
+ options = qdict_new();
+ qemu_rbd_parse_filename(filename, options, &local_err);
+ if (local_err) {
+ ret = -EINVAL;
+ error_propagate(errp, local_err);
+ goto exit;
+ }
+
+ rbd_opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
+ qemu_opts_absorb_qdict(rbd_opts, options, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ ret = -EINVAL;
+ goto exit;
+ }
+
+ pool = qemu_opt_get(rbd_opts, "pool");
+ conf = qemu_opt_get(rbd_opts, "conf");
+ clientname = qemu_opt_get(rbd_opts, "user");
+ name = qemu_opt_get(rbd_opts, "image");
+ keypairs = qemu_opt_get(rbd_opts, "keyvalue-pairs");
+
ret = rados_create(&cluster, clientname);
if (ret < 0) {
error_setg_errno(errp, -ret, "error initializing");
- return ret;
+ goto exit;
}
- if (strstr(conf, "conf=") == NULL) {
- /* try default location, but ignore failure */
- rados_conf_read_file(cluster, NULL);
- } else if (conf[0] != '\0' &&
- qemu_rbd_set_conf(cluster, conf, true, &local_err) < 0) {
- error_propagate(errp, local_err);
+ /* try default location when conf=NULL, but ignore failure */
+ ret = rados_conf_read_file(cluster, conf);
+ if (conf && ret < 0) {
+ error_setg_errno(errp, -ret, "error reading conf file %s", conf);
ret = -EIO;
goto shutdown;
}
- if (conf[0] != '\0' &&
- qemu_rbd_set_conf(cluster, conf, false, &local_err) < 0) {
- error_propagate(errp, local_err);
+ ret = qemu_rbd_set_keypairs(cluster, keypairs, errp);
+ if (ret < 0) {
ret = -EIO;
goto shutdown;
}
@@ -421,6 +524,10 @@ static int qemu_rbd_create(const char *filename, QemuOpts *opts, Error **errp)
shutdown:
rados_shutdown(cluster);
+
+exit:
+ QDECREF(options);
+ qemu_opts_del(rbd_opts);
return ret;
}
@@ -471,38 +578,104 @@ static void qemu_rbd_complete_aio(RADOSCB *rcb)
qemu_aio_unref(acb);
}
-/* TODO Convert to fine grained options */
-static QemuOptsList runtime_opts = {
- .name = "rbd",
- .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
- .desc = {
- {
- .name = "filename",
- .type = QEMU_OPT_STRING,
- .help = "Specification of the rbd image",
- },
- {
- .name = "password-secret",
- .type = QEMU_OPT_STRING,
- .help = "ID of secret providing the password",
- },
- { /* end of list */ }
- },
-};
+#define RBD_MON_HOST 0
+#define RBD_AUTH_SUPPORTED 1
+
+static char *qemu_rbd_array_opts(QDict *options, const char *prefix, int type,
+ Error **errp)
+{
+ int num_entries;
+ QemuOpts *opts = NULL;
+ QDict *sub_options;
+ const char *host;
+ const char *port;
+ char *str;
+ char *rados_str = NULL;
+ Error *local_err = NULL;
+ int i;
+
+ assert(type == RBD_MON_HOST || type == RBD_AUTH_SUPPORTED);
+
+ num_entries = qdict_array_entries(options, prefix);
+
+ if (num_entries < 0) {
+ error_setg(errp, "Parse error on RBD QDict array");
+ return NULL;
+ }
+
+ for (i = 0; i < num_entries; i++) {
+ char *strbuf = NULL;
+ const char *value;
+ char *rados_str_tmp;
+
+ str = g_strdup_printf("%s%d.", prefix, i);
+ qdict_extract_subqdict(options, &sub_options, str);
+ g_free(str);
+
+ opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
+ qemu_opts_absorb_qdict(opts, sub_options, &local_err);
+ QDECREF(sub_options);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ g_free(rados_str);
+ rados_str = NULL;
+ goto exit;
+ }
+
+ if (type == RBD_MON_HOST) {
+ host = qemu_opt_get(opts, "host");
+ port = qemu_opt_get(opts, "port");
+
+ value = host;
+ if (port) {
+ /* check for ipv6 */
+ if (strchr(host, ':')) {
+ strbuf = g_strdup_printf("[%s]:%s", host, port);
+ } else {
+ strbuf = g_strdup_printf("%s:%s", host, port);
+ }
+ value = strbuf;
+ } else if (strchr(host, ':')) {
+ strbuf = g_strdup_printf("[%s]", host);
+ value = strbuf;
+ }
+ } else {
+ value = qemu_opt_get(opts, "auth");
+ }
+
+
+ /* each iteration in the for loop will build upon the string, and if
+ * rados_str is NULL then it is our first pass */
+ if (rados_str) {
+ /* separate options with ';', as that is what rados_conf_set()
+ * requires */
+ rados_str_tmp = rados_str;
+ rados_str = g_strdup_printf("%s;%s", rados_str_tmp, value);
+ g_free(rados_str_tmp);
+ } else {
+ rados_str = g_strdup(value);
+ }
+
+ g_free(strbuf);
+ qemu_opts_del(opts);
+ opts = NULL;
+ }
+
+exit:
+ qemu_opts_del(opts);
+ return rados_str;
+}
static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
Error **errp)
{
BDRVRBDState *s = bs->opaque;
- char pool[RBD_MAX_POOL_NAME_SIZE];
- char snap_buf[RBD_MAX_SNAP_NAME_SIZE];
- char conf[RBD_MAX_CONF_SIZE];
- char clientname_buf[RBD_MAX_CONF_SIZE];
- char *clientname;
+ const char *pool, *snap, *conf, *clientname, *name, *keypairs;
const char *secretid;
QemuOpts *opts;
Error *local_err = NULL;
- const char *filename;
+ char *mon_host = NULL;
+ char *auth_supported = NULL;
int r;
opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
@@ -513,41 +686,63 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
return -EINVAL;
}
- filename = qemu_opt_get(opts, "filename");
- secretid = qemu_opt_get(opts, "password-secret");
+ auth_supported = qemu_rbd_array_opts(options, "auth-supported.",
+ RBD_AUTH_SUPPORTED, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ r = -EINVAL;
+ goto failed_opts;
+ }
- if (qemu_rbd_parsename(filename, pool, sizeof(pool),
- snap_buf, sizeof(snap_buf),
- s->name, sizeof(s->name),
- conf, sizeof(conf), errp) < 0) {
+ mon_host = qemu_rbd_array_opts(options, "server.",
+ RBD_MON_HOST, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
r = -EINVAL;
goto failed_opts;
}
- clientname = qemu_rbd_parse_clientname(conf, clientname_buf);
+ secretid = qemu_opt_get(opts, "password-secret");
+
+ pool = qemu_opt_get(opts, "pool");
+ conf = qemu_opt_get(opts, "conf");
+ snap = qemu_opt_get(opts, "snapshot");
+ clientname = qemu_opt_get(opts, "user");
+ name = qemu_opt_get(opts, "image");
+ keypairs = qemu_opt_get(opts, "keyvalue-pairs");
+
r = rados_create(&s->cluster, clientname);
if (r < 0) {
error_setg_errno(errp, -r, "error initializing");
goto failed_opts;
}
- s->snap = NULL;
- if (snap_buf[0] != '\0') {
- s->snap = g_strdup(snap_buf);
+ s->snap = g_strdup(snap);
+ if (name) {
+ pstrcpy(s->name, RBD_MAX_IMAGE_NAME_SIZE, name);
+ }
+
+ /* try default location when conf=NULL, but ignore failure */
+ r = rados_conf_read_file(s->cluster, conf);
+ if (conf && r < 0) {
+ error_setg_errno(errp, -r, "error reading conf file %s", conf);
+ goto failed_shutdown;
+ }
+
+ r = qemu_rbd_set_keypairs(s->cluster, keypairs, errp);
+ if (r < 0) {
+ goto failed_shutdown;
}
- if (strstr(conf, "conf=") == NULL) {
- /* try default location, but ignore failure */
- rados_conf_read_file(s->cluster, NULL);
- } else if (conf[0] != '\0') {
- r = qemu_rbd_set_conf(s->cluster, conf, true, errp);
+ if (mon_host) {
+ r = rados_conf_set(s->cluster, "mon_host", mon_host);
if (r < 0) {
goto failed_shutdown;
}
}
- if (conf[0] != '\0') {
- r = qemu_rbd_set_conf(s->cluster, conf, false, errp);
+ if (auth_supported) {
+ r = rados_conf_set(s->cluster, "auth_supported", auth_supported);
if (r < 0) {
goto failed_shutdown;
}
@@ -601,6 +796,8 @@ failed_shutdown:
g_free(s->snap);
failed_opts:
qemu_opts_del(opts);
+ g_free(mon_host);
+ g_free(auth_supported);
return r;
}
@@ -1004,18 +1201,18 @@ static QemuOptsList qemu_rbd_create_opts = {
};
static BlockDriver bdrv_rbd = {
- .format_name = "rbd",
- .instance_size = sizeof(BDRVRBDState),
- .bdrv_needs_filename = true,
- .bdrv_file_open = qemu_rbd_open,
- .bdrv_close = qemu_rbd_close,
- .bdrv_create = qemu_rbd_create,
- .bdrv_has_zero_init = bdrv_has_zero_init_1,
- .bdrv_get_info = qemu_rbd_getinfo,
- .create_opts = &qemu_rbd_create_opts,
- .bdrv_getlength = qemu_rbd_getlength,
- .bdrv_truncate = qemu_rbd_truncate,
- .protocol_name = "rbd",
+ .format_name = "rbd",
+ .instance_size = sizeof(BDRVRBDState),
+ .bdrv_parse_filename = qemu_rbd_parse_filename,
+ .bdrv_file_open = qemu_rbd_open,
+ .bdrv_close = qemu_rbd_close,
+ .bdrv_create = qemu_rbd_create,
+ .bdrv_has_zero_init = bdrv_has_zero_init_1,
+ .bdrv_get_info = qemu_rbd_getinfo,
+ .create_opts = &qemu_rbd_create_opts,
+ .bdrv_getlength = qemu_rbd_getlength,
+ .bdrv_truncate = qemu_rbd_truncate,
+ .protocol_name = "rbd",
.bdrv_aio_readv = qemu_rbd_aio_readv,
.bdrv_aio_writev = qemu_rbd_aio_writev,
diff --git a/block/replication.c b/block/replication.c
index eff85c77ba..22f170fd33 100644
--- a/block/replication.c
+++ b/block/replication.c
@@ -644,7 +644,7 @@ static void replication_stop(ReplicationState *rs, bool failover, Error **errp)
s->replication_state = BLOCK_REPLICATION_FAILOVER;
commit_active_start(NULL, s->active_disk->bs, s->secondary_disk->bs,
BLOCK_JOB_INTERNAL, 0, BLOCKDEV_ON_ERROR_REPORT,
- replication_done, bs, errp, true);
+ NULL, replication_done, bs, errp, true);
break;
default:
aio_context_release(aio_context);
@@ -660,6 +660,7 @@ BlockDriver bdrv_replication = {
.bdrv_open = replication_open,
.bdrv_close = replication_close,
+ .bdrv_child_perm = bdrv_filter_default_perms,
.bdrv_getlength = replication_getlength,
.bdrv_co_readv = replication_co_readv,
diff --git a/block/sheepdog.c b/block/sheepdog.c
index 860ba61502..743471043e 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -1609,7 +1609,7 @@ static int sd_prealloc(const char *filename, Error **errp)
int ret;
blk = blk_new_open(filename, NULL, NULL,
- BDRV_O_RDWR | BDRV_O_PROTOCOL, errp);
+ BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, errp);
if (blk == NULL) {
ret = -EIO;
goto out_with_err_set;
diff --git a/block/stream.c b/block/stream.c
index 1523ba7dfb..0113710845 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -68,6 +68,7 @@ static void stream_complete(BlockJob *job, void *opaque)
StreamCompleteData *data = opaque;
BlockDriverState *bs = blk_bs(job->blk);
BlockDriverState *base = s->base;
+ Error *local_err = NULL;
if (!block_job_is_cancelled(&s->common) && data->reached_end &&
data->ret == 0) {
@@ -79,11 +80,19 @@ static void stream_complete(BlockJob *job, void *opaque)
}
}
data->ret = bdrv_change_backing_file(bs, base_id, base_fmt);
- bdrv_set_backing_hd(bs, base);
+ bdrv_set_backing_hd(bs, base, &local_err);
+ if (local_err) {
+ error_report_err(local_err);
+ data->ret = -EPERM;
+ goto out;
+ }
}
+out:
/* Reopen the image back in read-only mode if necessary */
if (s->bs_flags != bdrv_get_flags(bs)) {
+ /* Give up write permissions before making it read-only */
+ blk_set_perm(job->blk, 0, BLK_PERM_ALL, &error_abort);
bdrv_reopen(bs, s->bs_flags, NULL);
}
@@ -229,25 +238,35 @@ void stream_start(const char *job_id, BlockDriverState *bs,
BlockDriverState *iter;
int orig_bs_flags;
- s = block_job_create(job_id, &stream_job_driver, bs, speed,
- BLOCK_JOB_DEFAULT, NULL, NULL, errp);
- if (!s) {
- return;
- }
-
/* Make sure that the image is opened in read-write mode */
orig_bs_flags = bdrv_get_flags(bs);
if (!(orig_bs_flags & BDRV_O_RDWR)) {
if (bdrv_reopen(bs, orig_bs_flags | BDRV_O_RDWR, errp) != 0) {
- block_job_unref(&s->common);
return;
}
}
- /* Block all intermediate nodes between bs and base, because they
- * will disappear from the chain after this operation */
+ /* Prevent concurrent jobs trying to modify the graph structure here, we
+ * already have our own plans. Also don't allow resize as the image size is
+ * queried only at the job start and then cached. */
+ s = block_job_create(job_id, &stream_job_driver, bs,
+ BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED |
+ BLK_PERM_GRAPH_MOD,
+ BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED |
+ BLK_PERM_WRITE,
+ speed, BLOCK_JOB_DEFAULT, NULL, NULL, errp);
+ if (!s) {
+ goto fail;
+ }
+
+ /* Block all intermediate nodes between bs and base, because they will
+ * disappear from the chain after this operation. The streaming job reads
+ * every block only once, assuming that it doesn't change, so block writes
+ * and resizes. */
for (iter = backing_bs(bs); iter && iter != base; iter = backing_bs(iter)) {
- block_job_add_bdrv(&s->common, iter);
+ block_job_add_bdrv(&s->common, "intermediate node", iter, 0,
+ BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED,
+ &error_abort);
}
s->base = base;
@@ -257,4 +276,10 @@ void stream_start(const char *job_id, BlockDriverState *bs,
s->on_error = on_error;
trace_stream_start(bs, base, s);
block_job_start(&s->common);
+ return;
+
+fail:
+ if (orig_bs_flags != bdrv_get_flags(bs)) {
+ bdrv_reopen(bs, s->bs_flags, NULL);
+ }
}
diff --git a/block/vdi.c b/block/vdi.c
index 18b4773aac..9b4f70e977 100644
--- a/block/vdi.c
+++ b/block/vdi.c
@@ -763,7 +763,8 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp)
}
blk = blk_new_open(filename, NULL, NULL,
- BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
+ BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
+ &local_err);
if (blk == NULL) {
error_propagate(errp, local_err);
ret = -EIO;
@@ -891,6 +892,7 @@ static BlockDriver bdrv_vdi = {
.bdrv_open = vdi_open,
.bdrv_close = vdi_close,
.bdrv_reopen_prepare = vdi_reopen_prepare,
+ .bdrv_child_perm = bdrv_format_default_perms,
.bdrv_create = vdi_create,
.bdrv_has_zero_init = bdrv_has_zero_init_1,
.bdrv_co_get_block_status = vdi_co_get_block_status,
diff --git a/block/vhdx.c b/block/vhdx.c
index 9918ee98ff..052a753159 100644
--- a/block/vhdx.c
+++ b/block/vhdx.c
@@ -1859,7 +1859,8 @@ static int vhdx_create(const char *filename, QemuOpts *opts, Error **errp)
}
blk = blk_new_open(filename, NULL, NULL,
- BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
+ BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
+ &local_err);
if (blk == NULL) {
error_propagate(errp, local_err);
ret = -EIO;
@@ -1983,6 +1984,7 @@ static BlockDriver bdrv_vhdx = {
.bdrv_open = vhdx_open,
.bdrv_close = vhdx_close,
.bdrv_reopen_prepare = vhdx_reopen_prepare,
+ .bdrv_child_perm = bdrv_format_default_perms,
.bdrv_co_readv = vhdx_co_readv,
.bdrv_co_writev = vhdx_co_writev,
.bdrv_create = vhdx_create,
diff --git a/block/vmdk.c b/block/vmdk.c
index 9d68ec5a4e..a9bd22bf93 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -1703,7 +1703,8 @@ static int vmdk_create_extent(const char *filename, int64_t filesize,
}
blk = blk_new_open(filename, NULL, NULL,
- BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
+ BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
+ &local_err);
if (blk == NULL) {
error_propagate(errp, local_err);
ret = -EIO;
@@ -2071,7 +2072,8 @@ static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp)
}
new_blk = blk_new_open(filename, NULL, NULL,
- BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
+ BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
+ &local_err);
if (new_blk == NULL) {
error_propagate(errp, local_err);
ret = -EIO;
@@ -2359,6 +2361,7 @@ static BlockDriver bdrv_vmdk = {
.bdrv_open = vmdk_open,
.bdrv_check = vmdk_check,
.bdrv_reopen_prepare = vmdk_reopen_prepare,
+ .bdrv_child_perm = bdrv_format_default_perms,
.bdrv_co_preadv = vmdk_co_preadv,
.bdrv_co_pwritev = vmdk_co_pwritev,
.bdrv_co_pwritev_compressed = vmdk_co_pwritev_compressed,
diff --git a/block/vpc.c b/block/vpc.c
index d0df2a1c54..f591d4be38 100644
--- a/block/vpc.c
+++ b/block/vpc.c
@@ -915,7 +915,8 @@ static int vpc_create(const char *filename, QemuOpts *opts, Error **errp)
}
blk = blk_new_open(filename, NULL, NULL,
- BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
+ BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
+ &local_err);
if (blk == NULL) {
error_propagate(errp, local_err);
ret = -EIO;
@@ -1067,6 +1068,7 @@ static BlockDriver bdrv_vpc = {
.bdrv_open = vpc_open,
.bdrv_close = vpc_close,
.bdrv_reopen_prepare = vpc_reopen_prepare,
+ .bdrv_child_perm = bdrv_format_default_perms,
.bdrv_create = vpc_create,
.bdrv_co_preadv = vpc_co_preadv,
diff --git a/block/vvfat.c b/block/vvfat.c
index 7f230be006..aa61c329e7 100644
--- a/block/vvfat.c
+++ b/block/vvfat.c
@@ -3041,7 +3041,7 @@ static int enable_write_target(BlockDriverState *bs, Error **errp)
&error_abort);
*(void**) backing->opaque = s;
- bdrv_set_backing_hd(s->bs, backing);
+ bdrv_set_backing_hd(s->bs, backing, &error_abort);
bdrv_unref(backing);
return 0;
@@ -3052,6 +3052,27 @@ err:
return ret;
}
+static void vvfat_child_perm(BlockDriverState *bs, BdrvChild *c,
+ const BdrvChildRole *role,
+ uint64_t perm, uint64_t shared,
+ uint64_t *nperm, uint64_t *nshared)
+{
+ BDRVVVFATState *s = bs->opaque;
+
+ assert(c == s->qcow || role == &child_backing);
+
+ if (c == s->qcow) {
+ /* This is a private node, nobody should try to attach to it */
+ *nperm = BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE;
+ *nshared = BLK_PERM_WRITE_UNCHANGED;
+ } else {
+ /* The backing file is there so 'commit' can use it. vvfat doesn't
+ * access it in any way. */
+ *nperm = 0;
+ *nshared = BLK_PERM_ALL;
+ }
+}
+
static void vvfat_close(BlockDriverState *bs)
{
BDRVVVFATState *s = bs->opaque;
@@ -3077,6 +3098,7 @@ static BlockDriver bdrv_vvfat = {
.bdrv_file_open = vvfat_open,
.bdrv_refresh_limits = vvfat_refresh_limits,
.bdrv_close = vvfat_close,
+ .bdrv_child_perm = vvfat_child_perm,
.bdrv_co_preadv = vvfat_co_preadv,
.bdrv_co_pwritev = vvfat_co_pwritev,
diff --git a/blockdev.c b/blockdev.c
index 8682bd81d8..8eb4e84fe0 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -558,7 +558,7 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts,
if ((!file || !*file) && !qdict_size(bs_opts)) {
BlockBackendRootState *blk_rs;
- blk = blk_new();
+ blk = blk_new(0, BLK_PERM_ALL);
blk_rs = blk_get_root_state(blk);
blk_rs->open_flags = bdrv_flags;
blk_rs->read_only = read_only;
@@ -1768,6 +1768,17 @@ static void external_snapshot_prepare(BlkActionState *common,
if (!state->new_bs->drv->supports_backing) {
error_setg(errp, "The snapshot does not support backing images");
+ return;
+ }
+
+ /* This removes our old bs and adds the new bs. This is an operation that
+ * can fail, so we need to do it in .prepare; undoing it for abort is
+ * always possible. */
+ bdrv_ref(state->new_bs);
+ bdrv_append(state->new_bs, state->old_bs, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
}
}
@@ -1778,8 +1789,6 @@ static void external_snapshot_commit(BlkActionState *common)
bdrv_set_aio_context(state->new_bs, state->aio_context);
- /* This removes our old bs and adds the new bs */
- bdrv_append(state->new_bs, state->old_bs);
/* We don't need (or want) to use the transactional
* bdrv_reopen_multiple() across all the entries at once, because we
* don't want to abort all of them if one of them fails the reopen */
@@ -1794,7 +1803,9 @@ static void external_snapshot_abort(BlkActionState *common)
ExternalSnapshotState *state =
DO_UPCAST(ExternalSnapshotState, common, common);
if (state->new_bs) {
- bdrv_unref(state->new_bs);
+ if (state->new_bs->backing) {
+ bdrv_replace_in_backing_chain(state->new_bs, state->old_bs);
+ }
}
}
@@ -1805,6 +1816,7 @@ static void external_snapshot_clean(BlkActionState *common)
if (state->aio_context) {
bdrv_drained_end(state->old_bs);
aio_context_release(state->aio_context);
+ bdrv_unref(state->new_bs);
}
}
@@ -2311,7 +2323,7 @@ static int do_open_tray(const char *blk_name, const char *qdev_id,
}
if (!locked || force) {
- blk_dev_change_media_cb(blk, false);
+ blk_dev_change_media_cb(blk, false, &error_abort);
}
if (locked && !force) {
@@ -2349,6 +2361,7 @@ void qmp_blockdev_close_tray(bool has_device, const char *device,
Error **errp)
{
BlockBackend *blk;
+ Error *local_err = NULL;
device = has_device ? device : NULL;
id = has_id ? id : NULL;
@@ -2372,7 +2385,11 @@ void qmp_blockdev_close_tray(bool has_device, const char *device,
return;
}
- blk_dev_change_media_cb(blk, true);
+ blk_dev_change_media_cb(blk, true, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
}
void qmp_x_blockdev_remove_medium(bool has_device, const char *device,
@@ -2425,7 +2442,7 @@ void qmp_x_blockdev_remove_medium(bool has_device, const char *device,
* called at all); therefore, the medium needs to be ejected here.
* Do it after blk_remove_bs() so blk_is_inserted(blk) returns the @load
* value passed here (i.e. false). */
- blk_dev_change_media_cb(blk, false);
+ blk_dev_change_media_cb(blk, false, &error_abort);
}
out:
@@ -2435,7 +2452,9 @@ out:
static void qmp_blockdev_insert_anon_medium(BlockBackend *blk,
BlockDriverState *bs, Error **errp)
{
+ Error *local_err = NULL;
bool has_device;
+ int ret;
/* For BBs without a device, we can exchange the BDS tree at will */
has_device = blk_get_attached_dev(blk);
@@ -2455,7 +2474,10 @@ static void qmp_blockdev_insert_anon_medium(BlockBackend *blk,
return;
}
- blk_insert_bs(blk, bs);
+ ret = blk_insert_bs(blk, bs, errp);
+ if (ret < 0) {
+ return;
+ }
if (!blk_dev_has_tray(blk)) {
/* For tray-less devices, blockdev-close-tray is a no-op (or may not be
@@ -2463,7 +2485,12 @@ static void qmp_blockdev_insert_anon_medium(BlockBackend *blk,
* slot here.
* Do it after blk_insert_bs() so blk_is_inserted(blk) returns the @load
* value passed here (i.e. true). */
- blk_dev_change_media_cb(blk, true);
+ blk_dev_change_media_cb(blk, true, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ blk_remove_bs(blk);
+ return;
+ }
}
}
@@ -2890,8 +2917,11 @@ void qmp_block_resize(bool has_device, const char *device,
goto out;
}
- blk = blk_new();
- blk_insert_bs(blk, bs);
+ blk = blk_new(BLK_PERM_RESIZE, BLK_PERM_ALL);
+ ret = blk_insert_bs(blk, bs, errp);
+ if (ret < 0) {
+ goto out;
+ }
/* complete all in-flight operations before resizing the device */
bdrv_drain_all();
@@ -3014,6 +3044,7 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device,
bool has_top, const char *top,
bool has_backing_file, const char *backing_file,
bool has_speed, int64_t speed,
+ bool has_filter_node_name, const char *filter_node_name,
Error **errp)
{
BlockDriverState *bs;
@@ -3029,6 +3060,9 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device,
if (!has_speed) {
speed = 0;
}
+ if (!has_filter_node_name) {
+ filter_node_name = NULL;
+ }
/* Important Note:
* libvirt relies on the DeviceNotFound error class in order to probe for
@@ -3103,8 +3137,8 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device,
goto out;
}
commit_active_start(has_job_id ? job_id : NULL, bs, base_bs,
- BLOCK_JOB_DEFAULT, speed, on_error, NULL, NULL,
- &local_err, false);
+ BLOCK_JOB_DEFAULT, speed, on_error,
+ filter_node_name, NULL, NULL, &local_err, false);
} else {
BlockDriverState *overlay_bs = bdrv_find_overlay(bs, top_bs);
if (bdrv_op_is_blocked(overlay_bs, BLOCK_OP_TYPE_COMMIT_TARGET, errp)) {
@@ -3112,7 +3146,7 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device,
}
commit_start(has_job_id ? job_id : NULL, bs, base_bs, top_bs, speed,
on_error, has_backing_file ? backing_file : NULL,
- &local_err);
+ filter_node_name, &local_err);
}
if (local_err != NULL) {
error_propagate(errp, local_err);
@@ -3348,6 +3382,8 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
bool has_on_target_error,
BlockdevOnError on_target_error,
bool has_unmap, bool unmap,
+ bool has_filter_node_name,
+ const char *filter_node_name,
Error **errp)
{
@@ -3369,6 +3405,9 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
if (!has_unmap) {
unmap = true;
}
+ if (!has_filter_node_name) {
+ filter_node_name = NULL;
+ }
if (granularity != 0 && (granularity < 512 || granularity > 1048576 * 64)) {
error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "granularity",
@@ -3398,7 +3437,8 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
mirror_start(job_id, bs, target,
has_replaces ? replaces : NULL,
speed, granularity, buf_size, sync, backing_mode,
- on_source_error, on_target_error, unmap, errp);
+ on_source_error, on_target_error, unmap, filter_node_name,
+ errp);
}
void qmp_drive_mirror(DriveMirror *arg, Error **errp)
@@ -3536,6 +3576,7 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
arg->has_on_source_error, arg->on_source_error,
arg->has_on_target_error, arg->on_target_error,
arg->has_unmap, arg->unmap,
+ false, NULL,
&local_err);
bdrv_unref(target_bs);
error_propagate(errp, local_err);
@@ -3554,6 +3595,8 @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id,
BlockdevOnError on_source_error,
bool has_on_target_error,
BlockdevOnError on_target_error,
+ bool has_filter_node_name,
+ const char *filter_node_name,
Error **errp)
{
BlockDriverState *bs;
@@ -3585,6 +3628,7 @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id,
has_on_source_error, on_source_error,
has_on_target_error, on_target_error,
true, true,
+ has_filter_node_name, filter_node_name,
&local_err);
error_propagate(errp, local_err);
diff --git a/blockjob.c b/blockjob.c
index abee11bb08..69126af97f 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -55,6 +55,19 @@ struct BlockJobTxn {
static QLIST_HEAD(, BlockJob) block_jobs = QLIST_HEAD_INITIALIZER(block_jobs);
+static char *child_job_get_parent_desc(BdrvChild *c)
+{
+ BlockJob *job = c->opaque;
+ return g_strdup_printf("%s job '%s'",
+ BlockJobType_lookup[job->driver->job_type],
+ job->id);
+}
+
+static const BdrvChildRole child_job = {
+ .get_parent_desc = child_job_get_parent_desc,
+ .stay_at_node = true,
+};
+
BlockJob *block_job_next(BlockJob *job)
{
if (!job) {
@@ -115,19 +128,44 @@ static void block_job_detach_aio_context(void *opaque)
block_job_unref(job);
}
-void block_job_add_bdrv(BlockJob *job, BlockDriverState *bs)
+void block_job_remove_all_bdrv(BlockJob *job)
+{
+ GSList *l;
+ for (l = job->nodes; l; l = l->next) {
+ BdrvChild *c = l->data;
+ bdrv_op_unblock_all(c->bs, job->blocker);
+ bdrv_root_unref_child(c);
+ }
+ g_slist_free(job->nodes);
+ job->nodes = NULL;
+}
+
+int block_job_add_bdrv(BlockJob *job, const char *name, BlockDriverState *bs,
+ uint64_t perm, uint64_t shared_perm, Error **errp)
{
- job->nodes = g_slist_prepend(job->nodes, bs);
+ BdrvChild *c;
+
+ c = bdrv_root_attach_child(bs, name, &child_job, perm, shared_perm,
+ job, errp);
+ if (c == NULL) {
+ return -EPERM;
+ }
+
+ job->nodes = g_slist_prepend(job->nodes, c);
bdrv_ref(bs);
bdrv_op_block_all(bs, job->blocker);
+
+ return 0;
}
void *block_job_create(const char *job_id, const BlockJobDriver *driver,
- BlockDriverState *bs, int64_t speed, int flags,
+ BlockDriverState *bs, uint64_t perm,
+ uint64_t shared_perm, int64_t speed, int flags,
BlockCompletionFunc *cb, void *opaque, Error **errp)
{
BlockBackend *blk;
BlockJob *job;
+ int ret;
if (bs->job) {
error_setg(errp, QERR_DEVICE_IN_USE, bdrv_get_device_name(bs));
@@ -159,13 +197,17 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver,
}
}
- blk = blk_new();
- blk_insert_bs(blk, bs);
+ blk = blk_new(perm, shared_perm);
+ ret = blk_insert_bs(blk, bs, errp);
+ if (ret < 0) {
+ blk_unref(blk);
+ return NULL;
+ }
job = g_malloc0(driver->instance_size);
error_setg(&job->blocker, "block device is in use by block job: %s",
BlockJobType_lookup[driver->job_type]);
- block_job_add_bdrv(job, bs);
+ block_job_add_bdrv(job, "main node", bs, 0, BLK_PERM_ALL, &error_abort);
bdrv_op_unblock(bs, BLOCK_OP_TYPE_DATAPLANE, job->blocker);
job->driver = driver;
@@ -228,15 +270,9 @@ void block_job_ref(BlockJob *job)
void block_job_unref(BlockJob *job)
{
if (--job->refcnt == 0) {
- GSList *l;
BlockDriverState *bs = blk_bs(job->blk);
bs->job = NULL;
- for (l = job->nodes; l; l = l->next) {
- bs = l->data;
- bdrv_op_unblock_all(bs, job->blocker);
- bdrv_unref(bs);
- }
- g_slist_free(job->nodes);
+ block_job_remove_all_bdrv(job);
blk_remove_aio_context_notifier(job->blk,
block_job_attached_aio_context,
block_job_detach_aio_context, job);
diff --git a/docs/mach-virt-graphical.cfg b/docs/mach-virt-graphical.cfg
new file mode 100644
index 0000000000..0fdf6846dd
--- /dev/null
+++ b/docs/mach-virt-graphical.cfg
@@ -0,0 +1,281 @@
+# mach-virt - VirtIO guest (graphical console)
+# =========================================================
+#
+# Usage:
+#
+# $ qemu-system-aarch64 \
+# -nodefaults \
+# -readconfig mach-virt-graphical.cfg \
+# -cpu host
+#
+# You will probably need to tweak the lines marked as
+# CHANGE ME before being able to use this configuration!
+#
+# The guest will have a selection of VirtIO devices
+# tailored towards optimal performance with modern guests,
+# and will be accessed through a graphical console.
+#
+# ---------------------------------------------------------
+#
+# Using -nodefaults is required to have full control over
+# the virtual hardware: when it's specified, QEMU will
+# populate the board with only the builtin peripherals,
+# such as the PL011 UART, plus a PCI Express Root Bus; the
+# user will then have to explicitly add further devices.
+#
+# The PCI Express Root Bus shows up in the guest as:
+#
+# 00:00.0 Host bridge
+#
+# This configuration file adds a number of other useful
+# devices, more specifically:
+#
+# 00:01.0 Display controller
+# 00.1c.* PCI bridge (PCI Express Root Ports)
+# 01:00.0 SCSI storage controller
+# 02:00.0 Ethernet controller
+# 03:00.0 USB controller
+#
+# More information about these devices is available below.
+
+
+# Machine options
+# =========================================================
+#
+# We use the virt machine type and enable KVM acceleration
+# for better performance.
+#
+# Using less than 1 GiB of memory is probably not going to
+# yield good performance in the guest, and might even lead
+# to obscure boot issues in some cases.
+#
+# Unfortunately, there is no way to configure the CPU model
+# in this file, so it will have to be provided on the
+# command line, but we can configure the guest to use the
+# same GIC version as the host.
+
+[machine]
+ type = "virt"
+ accel = "kvm"
+ gic-version = "host"
+
+[memory]
+ size = "1024"
+
+
+# Firmware configuration
+# =========================================================
+#
+# There are two parts to the firmware: a read-only image
+# containing the executable code, which is shared between
+# guests, and a read/write variable store that is owned
+# by one specific guest, exclusively, and is used to
+# record information such as the UEFI boot order.
+#
+# For any new guest, its permanent, private variable store
+# should initially be copied from the template file
+# provided along with the firmware binary.
+#
+# Depending on the OS distribution you're using on the
+# host, the name of the package containing the firmware
+# binary and variable store template, as well as the paths
+# to the files themselves, will be different. For example:
+#
+# Fedora
+# edk2-aarch64 (pkg)
+# /usr/share/edk2/aarch64/QEMU_EFI-pflash.raw (bin)
+# /usr/share/edk2/aarch64/vars-template-pflash.raw (var)
+#
+# RHEL
+# AAVMF (pkg)
+# /usr/share/AAVMF/AAVMF_CODE.fd (bin)
+# /usr/share/AAVMF/AAVMF_VARS.fd (var)
+#
+# Debian/Ubuntu
+# qemu-efi (pkg)
+# /usr/share/AAVMF/AAVMF_CODE.fd (bin)
+# /usr/share/AAVMF/AAVMF_VARS.fd (var)
+
+[drive "uefi-binary"]
+ file = "/usr/share/AAVMF/AAVMF_CODE.fd" # CHANGE ME
+ format = "raw"
+ if = "pflash"
+ unit = "0"
+ readonly = "on"
+
+[drive "uefi-varstore"]
+ file = "guest_VARS.fd" # CHANGE ME
+ format = "raw"
+ if = "pflash"
+ unit = "1"
+
+
+# PCI bridge (PCI Express Root Ports)
+# =========================================================
+#
+# We create eight PCI Express Root Ports, and we plug them
+# all into separate functions of the same slot. Some of
+# them will be used by devices, the rest will remain
+# available for hotplug.
+
+[device "pcie.1"]
+ driver = "pcie-root-port"
+ bus = "pcie.0"
+ addr = "1c.0"
+ port = "1"
+ chassis = "1"
+ multifunction = "on"
+
+[device "pcie.2"]
+ driver = "pcie-root-port"
+ bus = "pcie.0"
+ addr = "1c.1"
+ port = "2"
+ chassis = "2"
+
+[device "pcie.3"]
+ driver = "pcie-root-port"
+ bus = "pcie.0"
+ addr = "1c.2"
+ port = "3"
+ chassis = "3"
+
+[device "pcie.4"]
+ driver = "pcie-root-port"
+ bus = "pcie.0"
+ addr = "1c.3"
+ port = "4"
+ chassis = "4"
+
+[device "pcie.5"]
+ driver = "pcie-root-port"
+ bus = "pcie.0"
+ addr = "1c.4"
+ port = "5"
+ chassis = "5"
+
+[device "pcie.6"]
+ driver = "pcie-root-port"
+ bus = "pcie.0"
+ addr = "1c.5"
+ port = "6"
+ chassis = "6"
+
+[device "pcie.7"]
+ driver = "pcie-root-port"
+ bus = "pcie.0"
+ addr = "1c.6"
+ port = "7"
+ chassis = "7"
+
+[device "pcie.8"]
+ driver = "pcie-root-port"
+ bus = "pcie.0"
+ addr = "1c.7"
+ port = "8"
+ chassis = "8"
+
+
+# SCSI storage controller (and storage)
+# =========================================================
+#
+# We use virtio-scsi here so that we can (hot)plug a large
+# number of disks without running into issues; a SCSI disk,
+# backed by a qcow2 disk image on the host's filesystem, is
+# attached to it.
+#
+# We also create an optical disk, mostly for installation
+# purposes: once the guest OS has been succesfully
+# installed, the guest will no longer boot from optical
+# media. If you don't want, or no longer want, to have an
+# optical disk in the guest you can safely comment out
+# all relevant sections below.
+
+[device "scsi"]
+ driver = "virtio-scsi-pci"
+ bus = "pcie.1"
+ addr = "00.0"
+
+[device "scsi-disk"]
+ driver = "scsi-hd"
+ bus = "scsi.0"
+ drive = "disk"
+ bootindex = "1"
+
+[drive "disk"]
+ file = "guest.qcow2" # CHANGE ME
+ format = "qcow2"
+ if = "none"
+
+[device "scsi-optical-disk"]
+ driver = "scsi-cd"
+ bus = "scsi.0"
+ drive = "optical-disk"
+ bootindex = "2"
+
+[drive "optical-disk"]
+ file = "install.iso" # CHANGE ME
+ format = "raw"
+ if = "none"
+
+
+# Ethernet controller
+# =========================================================
+#
+# We use virtio-net for improved performance over emulated
+# hardware; on the host side, we take advantage of user
+# networking so that the QEMU process doesn't require any
+# additional privileges.
+
+[netdev "hostnet"]
+ type = "user"
+
+[device "net"]
+ driver = "virtio-net-pci"
+ netdev = "hostnet"
+ bus = "pcie.2"
+ addr = "00.0"
+
+
+# USB controller (and input devices)
+# =========================================================
+#
+# We add a virtualization-friendly USB 3.0 controller and
+# a USB keyboard / USB tablet combo so that graphical
+# guests can be controlled appropriately.
+
+[device "usb"]
+ driver = "nec-usb-xhci"
+ bus = "pcie.3"
+ addr = "00.0"
+
+[device "keyboard"]
+ driver = "usb-kbd"
+ bus = "usb.0"
+
+[device "tablet"]
+ driver = "usb-tablet"
+ bus = "usb.0"
+
+
+# Display controller
+# =========================================================
+#
+# We use virtio-gpu because the legacy VGA framebuffer is
+# very troublesome on aarch64, and virtio-gpu is the only
+# video device that doesn't implement it.
+#
+# If you're running the guest on a remote, potentially
+# headless host, you will probably want to append something
+# like
+#
+# -display vnc=127.0.0.1:0
+#
+# to the command line in order to prevent QEMU from
+# creating a graphical display window on the host and
+# enable remote access instead.
+
+[device "video"]
+ driver = "virtio-gpu"
+ bus = "pcie.0"
+ addr = "01.0"
diff --git a/docs/mach-virt-serial.cfg b/docs/mach-virt-serial.cfg
new file mode 100644
index 0000000000..aee9f1c5a1
--- /dev/null
+++ b/docs/mach-virt-serial.cfg
@@ -0,0 +1,243 @@
+# mach-virt - VirtIO guest (serial console)
+# =========================================================
+#
+# Usage:
+#
+# $ qemu-system-aarch64 \
+# -nodefaults \
+# -readconfig mach-virt-serial.cfg \
+# -display none -serial mon:stdio \
+# -cpu host
+#
+# You will probably need to tweak the lines marked as
+# CHANGE ME before being able to use this configuration!
+#
+# The guest will have a selection of VirtIO devices
+# tailored towards optimal performance with modern guests,
+# and will be accessed through the serial console.
+#
+# ---------------------------------------------------------
+#
+# Using -nodefaults is required to have full control over
+# the virtual hardware: when it's specified, QEMU will
+# populate the board with only the builtin peripherals,
+# such as the PL011 UART, plus a PCI Express Root Bus; the
+# user will then have to explicitly add further devices.
+#
+# The PCI Express Root Bus shows up in the guest as:
+#
+# 00:00.0 Host bridge
+#
+# This configuration file adds a number of other useful
+# devices, more specifically:
+#
+# 00.1c.* PCI bridge (PCI Express Root Ports)
+# 01:00.0 SCSI storage controller
+# 02:00.0 Ethernet controller
+#
+# More information about these devices is available below.
+#
+# We use '-display none' to prevent QEMU from creating a
+# graphical display window, which would serve no use in
+# this specific configuration, and '-serial mon:stdio' to
+# multiplex the guest's serial console and the QEMU monitor
+# to the host's stdio; use 'Ctrl+A h' to learn how to
+# switch between the two and more.
+
+
+# Machine options
+# =========================================================
+#
+# We use the virt machine type and enable KVM acceleration
+# for better performance.
+#
+# Using less than 1 GiB of memory is probably not going to
+# yield good performance in the guest, and might even lead
+# to obscure boot issues in some cases.
+#
+# Unfortunately, there is no way to configure the CPU model
+# in this file, so it will have to be provided on the
+# command line, but we can configure the guest to use the
+# same GIC version as the host.
+
+[machine]
+ type = "virt"
+ accel = "kvm"
+ gic-version = "host"
+
+[memory]
+ size = "1024"
+
+
+# Firmware configuration
+# =========================================================
+#
+# There are two parts to the firmware: a read-only image
+# containing the executable code, which is shared between
+# guests, and a read/write variable store that is owned
+# by one specific guest, exclusively, and is used to
+# record information such as the UEFI boot order.
+#
+# For any new guest, its permanent, private variable store
+# should initially be copied from the template file
+# provided along with the firmware binary.
+#
+# Depending on the OS distribution you're using on the
+# host, the name of the package containing the firmware
+# binary and variable store template, as well as the paths
+# to the files themselves, will be different. For example:
+#
+# Fedora
+# edk2-aarch64 (pkg)
+# /usr/share/edk2/aarch64/QEMU_EFI-pflash.raw (bin)
+# /usr/share/edk2/aarch64/vars-template-pflash.raw (var)
+#
+# RHEL
+# AAVMF (pkg)
+# /usr/share/AAVMF/AAVMF_CODE.fd (bin)
+# /usr/share/AAVMF/AAVMF_VARS.fd (var)
+#
+# Debian/Ubuntu
+# qemu-efi (pkg)
+# /usr/share/AAVMF/AAVMF_CODE.fd (bin)
+# /usr/share/AAVMF/AAVMF_VARS.fd (var)
+
+[drive "uefi-binary"]
+ file = "/usr/share/AAVMF/AAVMF_CODE.fd" # CHANGE ME
+ format = "raw"
+ if = "pflash"
+ unit = "0"
+ readonly = "on"
+
+[drive "uefi-varstore"]
+ file = "guest_VARS.fd" # CHANGE ME
+ format = "raw"
+ if = "pflash"
+ unit = "1"
+
+
+# PCI bridge (PCI Express Root Ports)
+# =========================================================
+#
+# We create eight PCI Express Root Ports, and we plug them
+# all into separate functions of the same slot. Some of
+# them will be used by devices, the rest will remain
+# available for hotplug.
+
+[device "pcie.1"]
+ driver = "pcie-root-port"
+ bus = "pcie.0"
+ addr = "1c.0"
+ port = "1"
+ chassis = "1"
+ multifunction = "on"
+
+[device "pcie.2"]
+ driver = "pcie-root-port"
+ bus = "pcie.0"
+ addr = "1c.1"
+ port = "2"
+ chassis = "2"
+
+[device "pcie.3"]
+ driver = "pcie-root-port"
+ bus = "pcie.0"
+ addr = "1c.2"
+ port = "3"
+ chassis = "3"
+
+[device "pcie.4"]
+ driver = "pcie-root-port"
+ bus = "pcie.0"
+ addr = "1c.3"
+ port = "4"
+ chassis = "4"
+
+[device "pcie.5"]
+ driver = "pcie-root-port"
+ bus = "pcie.0"
+ addr = "1c.4"
+ port = "5"
+ chassis = "5"
+
+[device "pcie.6"]
+ driver = "pcie-root-port"
+ bus = "pcie.0"
+ addr = "1c.5"
+ port = "6"
+ chassis = "6"
+
+[device "pcie.7"]
+ driver = "pcie-root-port"
+ bus = "pcie.0"
+ addr = "1c.6"
+ port = "7"
+ chassis = "7"
+
+[device "pcie.8"]
+ driver = "pcie-root-port"
+ bus = "pcie.0"
+ addr = "1c.7"
+ port = "8"
+ chassis = "8"
+
+
+# SCSI storage controller (and storage)
+# =========================================================
+#
+# We use virtio-scsi here so that we can (hot)plug a large
+# number of disks without running into issues; a SCSI disk,
+# backed by a qcow2 disk image on the host's filesystem, is
+# attached to it.
+#
+# We also create an optical disk, mostly for installation
+# purposes: once the guest OS has been succesfully
+# installed, the guest will no longer boot from optical
+# media. If you don't want, or no longer want, to have an
+# optical disk in the guest you can safely comment out
+# all relevant sections below.
+
+[device "scsi"]
+ driver = "virtio-scsi-pci"
+ bus = "pcie.1"
+ addr = "00.0"
+
+[device "scsi-disk"]
+ driver = "scsi-hd"
+ bus = "scsi.0"
+ drive = "disk"
+ bootindex = "1"
+
+[drive "disk"]
+ file = "guest.qcow2" # CHANGE ME
+ format = "qcow2"
+ if = "none"
+
+[device "scsi-optical-disk"]
+ driver = "scsi-cd"
+ bus = "scsi.0"
+ drive = "optical-disk"
+ bootindex = "2"
+
+[drive "optical-disk"]
+ file = "install.iso" # CHANGE ME
+ format = "raw"
+ if = "none"
+
+
+# Ethernet controller
+# =========================================================
+#
+# We use virtio-net for improved performance over emulated
+# hardware; on the host side, we take advantage of user
+# networking so that the QEMU process doesn't require any
+# additional privileges.
+
+[netdev "hostnet"]
+ type = "user"
+
+[device "net"]
+ driver = "virtio-net-pci"
+ netdev = "hostnet"
+ bus = "pcie.2"
+ addr = "00.0"
diff --git a/docs/migration.txt b/docs/migration.txt
index 6503c17685..1b940a829b 100644
--- a/docs/migration.txt
+++ b/docs/migration.txt
@@ -161,6 +161,11 @@ include/hw/hw.h.
=== More about versions ===
+Version numbers are intended for major incompatible changes to the
+migration of a device, and using them breaks backwards-migration
+compatibility; in general most changes can be made by adding Subsections
+(see below) or _TEST macros (see below) which won't break compatibility.
+
You can see that there are several version fields:
- version_id: the maximum version_id supported by VMState for that device.
@@ -175,6 +180,9 @@ version_id. And the function load_state_old() (if present) is able to
load state from minimum_version_id_old to minimum_version_id. This
function is deprecated and will be removed when no more users are left.
+Saving state will always create a section with the 'version_id' value
+and thus can't be loaded by any older QEMU.
+
=== Massaging functions ===
Sometimes, it is not enough to be able to save the state directly
@@ -292,6 +300,56 @@ save/send this state when we are in the middle of a pio operation
not enabled, the values on that fields are garbage and don't need to
be sent.
+Using a condition function that checks a 'property' to determine whether
+to send a subsection allows backwards migration compatibility when
+new subsections are added.
+
+For example;
+ a) Add a new property using DEFINE_PROP_BOOL - e.g. support-foo and
+ default it to true.
+ b) Add an entry to the HW_COMPAT_ for the previous version
+ that sets the property to false.
+ c) Add a static bool support_foo function that tests the property.
+ d) Add a subsection with a .needed set to the support_foo function
+ e) (potentially) Add a pre_load that sets up a default value for 'foo'
+ to be used if the subsection isn't loaded.
+
+Now that subsection will not be generated when using an older
+machine type and the migration stream will be accepted by older
+QEMU versions. pre-load functions can be used to initialise state
+on the newer version so that they default to suitable values
+when loading streams created by older QEMU versions that do not
+generate the subsection.
+
+In some cases subsections are added for data that had been accidentally
+omitted by earlier versions; if the missing data causes the migration
+process to succeed but the guest to behave badly then it may be better
+to send the subsection and cause the migration to explicitly fail
+with the unknown subsection error. If the bad behaviour only happens
+with certain data values, making the subsection conditional on
+the data value (rather than the machine type) allows migrations to succeed
+in most cases. In general the preference is to tie the subsection to
+the machine type, and allow reliable migrations, unless the behaviour
+from omission of the subsection is really bad.
+
+= Not sending existing elements =
+
+Sometimes members of the VMState are no longer needed;
+ removing them will break migration compatibility
+ making them version dependent and bumping the version will break backwards
+ migration compatibility.
+
+The best way is to:
+ a) Add a new property/compatibility/function in the same way for subsections
+ above.
+ b) replace the VMSTATE macro with the _TEST version of the macro, e.g.:
+ VMSTATE_UINT32(foo, barstruct)
+ becomes
+ VMSTATE_UINT32_TEST(foo, barstruct, pre_version_baz)
+
+ Sometime in the future when we no longer care about the ancient
+versions these can be killed off.
+
= Return path =
In most migration scenarios there is only a single data path that runs
@@ -482,3 +540,16 @@ request for a page that has already been sent is ignored. Duplicate requests
such as this can happen as a page is sent at about the same time the
destination accesses it.
+=== Postcopy with hugepages ===
+
+Postcopy now works with hugetlbfs backed memory:
+ a) The linux kernel on the destination must support userfault on hugepages.
+ b) The huge-page configuration on the source and destination VMs must be
+ identical; i.e. RAMBlocks on both sides must use the same page size.
+ c) Note that -mem-path /dev/hugepages will fall back to allocating normal
+ RAM if it doesn't have enough hugepages, triggering (b) to fail.
+ Using -mem-prealloc enforces the allocation using hugepages.
+ d) Care should be taken with the size of hugepage used; postcopy with 2MB
+ hugepages works well, however 1GB hugepages are likely to be problematic
+ since it takes ~1 second to transfer a 1GB hugepage across a 10Gbps link,
+ and until the full page is transferred the destination thread is blocked.
diff --git a/docs/q35-chipset.cfg b/docs/q35-chipset.cfg
deleted file mode 100644
index e4ddb7d9cc..0000000000
--- a/docs/q35-chipset.cfg
+++ /dev/null
@@ -1,152 +0,0 @@
-################################################################
-#
-# qemu -M q35 creates a bare machine with just the very essential
-# chipset devices being present:
-#
-# 00.0 - Host bridge
-# 1f.0 - ISA bridge / LPC
-# 1f.2 - SATA (AHCI) controller
-# 1f.3 - SMBus controller
-#
-# This config file documents the other devices and how they are
-# created. You can simply use "-readconfig $thisfile" to create
-# them all. Here is a overview:
-#
-# 19.0 - Ethernet controller (not created, our e1000 emulation
-# doesn't emulate the ich9 device).
-# 1a.* - USB Controller #2 (ehci + uhci companions)
-# 1b.0 - HD Audio Controller
-# 1c.* - PCI Express Ports
-# 1d.* - USB Controller #1 (ehci + uhci companions,
-# "qemu -M q35 -usb" creates these too)
-# 1e.0 - PCI Bridge
-#
-
-[device "ich9-ehci-2"]
- driver = "ich9-usb-ehci2"
- multifunction = "on"
- bus = "pcie.0"
- addr = "1a.7"
-
-[device "ich9-uhci-4"]
- driver = "ich9-usb-uhci4"
- multifunction = "on"
- bus = "pcie.0"
- addr = "1a.0"
- masterbus = "ich9-ehci-2.0"
- firstport = "0"
-
-[device "ich9-uhci-5"]
- driver = "ich9-usb-uhci5"
- multifunction = "on"
- bus = "pcie.0"
- addr = "1a.1"
- masterbus = "ich9-ehci-2.0"
- firstport = "2"
-
-[device "ich9-uhci-6"]
- driver = "ich9-usb-uhci6"
- multifunction = "on"
- bus = "pcie.0"
- addr = "1a.2"
- masterbus = "ich9-ehci-2.0"
- firstport = "4"
-
-
-[device "ich9-hda-audio"]
- driver = "ich9-intel-hda"
- bus = "pcie.0"
- addr = "1b.0"
-
-
-[device "ich9-pcie-port-1"]
- driver = "ioh3420"
- multifunction = "on"
- bus = "pcie.0"
- addr = "1c.0"
- port = "1"
- chassis = "1"
-
-[device "ich9-pcie-port-2"]
- driver = "ioh3420"
- multifunction = "on"
- bus = "pcie.0"
- addr = "1c.1"
- port = "2"
- chassis = "2"
-
-[device "ich9-pcie-port-3"]
- driver = "ioh3420"
- multifunction = "on"
- bus = "pcie.0"
- addr = "1c.2"
- port = "3"
- chassis = "3"
-
-[device "ich9-pcie-port-4"]
- driver = "ioh3420"
- multifunction = "on"
- bus = "pcie.0"
- addr = "1c.3"
- port = "4"
- chassis = "4"
-
-##
-# Example PCIe switch with two downstream ports
-#
-#[device "pcie-switch-upstream-port-1"]
-# driver = "x3130-upstream"
-# bus = "ich9-pcie-port-4"
-# addr = "00.0"
-#
-#[device "pcie-switch-downstream-port-1-1"]
-# driver = "xio3130-downstream"
-# multifunction = "on"
-# bus = "pcie-switch-upstream-port-1"
-# addr = "00.0"
-# port = "1"
-# chassis = "5"
-#
-#[device "pcie-switch-downstream-port-1-2"]
-# driver = "xio3130-downstream"
-# multifunction = "on"
-# bus = "pcie-switch-upstream-port-1"
-# addr = "00.1"
-# port = "1"
-# chassis = "6"
-
-[device "ich9-ehci-1"]
- driver = "ich9-usb-ehci1"
- multifunction = "on"
- bus = "pcie.0"
- addr = "1d.7"
-
-[device "ich9-uhci-1"]
- driver = "ich9-usb-uhci1"
- multifunction = "on"
- bus = "pcie.0"
- addr = "1d.0"
- masterbus = "ich9-ehci-1.0"
- firstport = "0"
-
-[device "ich9-uhci-2"]
- driver = "ich9-usb-uhci2"
- multifunction = "on"
- bus = "pcie.0"
- addr = "1d.1"
- masterbus = "ich9-ehci-1.0"
- firstport = "2"
-
-[device "ich9-uhci-3"]
- driver = "ich9-usb-uhci3"
- multifunction = "on"
- bus = "pcie.0"
- addr = "1d.2"
- masterbus = "ich9-ehci-1.0"
- firstport = "4"
-
-
-[device "ich9-pci-bridge"]
- driver = "i82801b11-bridge"
- bus = "pcie.0"
- addr = "1e.0"
diff --git a/docs/q35-emulated.cfg b/docs/q35-emulated.cfg
new file mode 100644
index 0000000000..c6416d6545
--- /dev/null
+++ b/docs/q35-emulated.cfg
@@ -0,0 +1,288 @@
+# q35 - Emulated guest (graphical console)
+# =========================================================
+#
+# Usage:
+#
+# $ qemu-system-x86_64 \
+# -nodefaults \
+# -readconfig q35-emulated.cfg
+#
+# You will probably need to tweak the lines marked as
+# CHANGE ME before being able to use this configuration!
+#
+# The guest will have a selection of emulated devices that
+# closely resembles that of a physical machine, and will be
+# accessed through a graphical console.
+#
+# ---------------------------------------------------------
+#
+# Using -nodefaults is required to have full control over
+# the virtual hardware: when it's specified, QEMU will
+# populate the board with only the builtin peripherals
+# plus a small selection of core PCI devices and
+# controllers; the user will then have to explicitly add
+# further devices.
+#
+# The core PCI devices show up in the guest as:
+#
+# 00:00.0 Host bridge
+# 00:1f.0 ISA bridge / LPC
+# 00:1f.2 SATA (AHCI) controller
+# 00:1f.3 SMBus controller
+#
+# This configuration file adds a number of devices that
+# are pretty much guaranteed to be present in every single
+# physical machine based on q35, more specifically:
+#
+# 00:01.0 VGA compatible controller
+# 00:19.0 Ethernet controller
+# 00:1a.* USB controller (#2)
+# 00:1b.0 Audio device
+# 00:1c.* PCI bridge (PCI Express Root Ports)
+# 00:1d.* USB Controller (#1)
+# 00:1e.0 PCI bridge (legacy PCI bridge)
+#
+# More information about these devices is available below.
+
+
+# Machine options
+# =========================================================
+#
+# We use the q35 machine type and enable KVM acceleration
+# for better performance.
+#
+# Using less than 1 GiB of memory is probably not going to
+# yield good performance in the guest, and might even lead
+# to obscure boot issues in some cases.
+#
+# Unfortunately, there is no way to configure the CPU model
+# in this file, so it will have to be provided on the
+# command line.
+
+[machine]
+ type = "q35"
+ accel = "kvm"
+
+[memory]
+ size = "1024"
+
+
+# PCI bridge (PCI Express Root Ports)
+# =========================================================
+#
+# We add four PCI Express Root Ports, all sharing the same
+# slot on the PCI Express Root Bus. These ports support
+# hotplug.
+
+[device "ich9-pcie-port-1"]
+ driver = "ioh3420"
+ multifunction = "on"
+ bus = "pcie.0"
+ addr = "1c.0"
+ port = "1"
+ chassis = "1"
+
+[device "ich9-pcie-port-2"]
+ driver = "ioh3420"
+ multifunction = "on"
+ bus = "pcie.0"
+ addr = "1c.1"
+ port = "2"
+ chassis = "2"
+
+[device "ich9-pcie-port-3"]
+ driver = "ioh3420"
+ multifunction = "on"
+ bus = "pcie.0"
+ addr = "1c.2"
+ port = "3"
+ chassis = "3"
+
+[device "ich9-pcie-port-4"]
+ driver = "ioh3420"
+ multifunction = "on"
+ bus = "pcie.0"
+ addr = "1c.3"
+ port = "4"
+ chassis = "4"
+
+
+# PCI bridge (legacy PCI bridge)
+# =========================================================
+#
+# This bridge can be used to build an independent topology
+# for legacy PCI devices. PCI Express devices should be
+# plugged into PCI Express slots instead, so ideally there
+# will be no devices connected to this bridge.
+
+[device "ich9-pci-bridge"]
+ driver = "i82801b11-bridge"
+ bus = "pcie.0"
+ addr = "1e.0"
+
+
+# SATA storage
+# =========================================================
+#
+# An implicit SATA controller is created automatically for
+# every single q35 guest; here we create a disk, backed by
+# a qcow2 disk image on the host's filesystem, and attach
+# it to that controller so that the guest can use it.
+#
+# We also create an optical disk, mostly for installation
+# purposes: once the guest OS has been succesfully
+# installed, the guest will no longer boot from optical
+# media. If you don't want, or no longer want, to have an
+# optical disk in the guest you can safely comment out
+# all relevant sections below.
+
+[device "sata-disk"]
+ driver = "ide-hd"
+ bus = "ide.0"
+ drive = "disk"
+ bootindex = "1"
+
+[drive "disk"]
+ file = "guest.qcow2" # CHANGE ME
+ format = "qcow2"
+ if = "none"
+
+[device "sata-optical-disk"]
+ driver = "ide-cd"
+ bus = "ide.1"
+ drive = "optical-disk"
+ bootindex = "2"
+
+[drive "optical-disk"]
+ file = "install.iso" # CHANGE ME
+ format = "raw"
+ if = "none"
+
+
+# USB controller (#1)
+# =========================================================
+#
+# EHCI controller + UHCI companion controllers.
+
+[device "ich9-ehci-1"]
+ driver = "ich9-usb-ehci1"
+ multifunction = "on"
+ bus = "pcie.0"
+ addr = "1d.7"
+
+[device "ich9-uhci-1"]
+ driver = "ich9-usb-uhci1"
+ multifunction = "on"
+ bus = "pcie.0"
+ addr = "1d.0"
+ masterbus = "ich9-ehci-1.0"
+ firstport = "0"
+
+[device "ich9-uhci-2"]
+ driver = "ich9-usb-uhci2"
+ multifunction = "on"
+ bus = "pcie.0"
+ addr = "1d.1"
+ masterbus = "ich9-ehci-1.0"
+ firstport = "2"
+
+[device "ich9-uhci-3"]
+ driver = "ich9-usb-uhci3"
+ multifunction = "on"
+ bus = "pcie.0"
+ addr = "1d.2"
+ masterbus = "ich9-ehci-1.0"
+ firstport = "4"
+
+
+# USB controller (#2)
+# =========================================================
+#
+# EHCI controller + UHCI companion controllers.
+
+[device "ich9-ehci-2"]
+ driver = "ich9-usb-ehci2"
+ multifunction = "on"
+ bus = "pcie.0"
+ addr = "1a.7"
+
+[device "ich9-uhci-4"]
+ driver = "ich9-usb-uhci4"
+ multifunction = "on"
+ bus = "pcie.0"
+ addr = "1a.0"
+ masterbus = "ich9-ehci-2.0"
+ firstport = "0"
+
+[device "ich9-uhci-5"]
+ driver = "ich9-usb-uhci5"
+ multifunction = "on"
+ bus = "pcie.0"
+ addr = "1a.1"
+ masterbus = "ich9-ehci-2.0"
+ firstport = "2"
+
+[device "ich9-uhci-6"]
+ driver = "ich9-usb-uhci6"
+ multifunction = "on"
+ bus = "pcie.0"
+ addr = "1a.2"
+ masterbus = "ich9-ehci-2.0"
+ firstport = "4"
+
+
+# Ethernet controller
+# =========================================================
+#
+# We add a Gigabit Ethernet interface to the guest; on the
+# host side, we take advantage of user networking so that
+# the QEMU process doesn't require any additional
+# privileges.
+
+[netdev "hostnet"]
+ type = "user"
+
+[device "net"]
+ driver = "e1000"
+ netdev = "hostnet"
+ bus = "pcie.0"
+ addr = "19.0"
+
+
+# VGA compatible controller
+# =========================================================
+#
+# We use stdvga instead of Cirrus as it supports more video
+# modes and is closer to what actual hardware looks like.
+#
+# If you're running the guest on a remote, potentially
+# headless host, you will probably want to append something
+# like
+#
+# -display vnc=127.0.0.1:0
+#
+# to the command line in order to prevent QEMU from
+# creating a graphical display window on the host and
+# enable remote access instead.
+
+[device "video"]
+ driver = "VGA"
+ bus = "pcie.0"
+ addr = "01.0"
+
+
+# Audio device
+# =========================================================
+#
+# The sound card is a legacy PCI device that is plugged
+# directly into the PCI Express Root Bus.
+
+[device "ich9-hda-audio"]
+ driver = "ich9-intel-hda"
+ bus = "pcie.0"
+ addr = "1b.0"
+
+[device "ich9-hda-duplex"]
+ driver = "hda-duplex"
+ bus = "ich9-hda-audio.0"
+ cad = "0"
diff --git a/docs/q35-virtio-graphical.cfg b/docs/q35-virtio-graphical.cfg
new file mode 100644
index 0000000000..28bde2fc57
--- /dev/null
+++ b/docs/q35-virtio-graphical.cfg
@@ -0,0 +1,248 @@
+# q35 - VirtIO guest (graphical console)
+# =========================================================
+#
+# Usage:
+#
+# $ qemu-system-x86_64 \
+# -nodefaults \
+# -readconfig q35-virtio-graphical.cfg
+#
+# You will probably need to tweak the lines marked as
+# CHANGE ME before being able to use this configuration!
+#
+# The guest will have a selection of VirtIO devices
+# tailored towards optimal performance with modern guests,
+# and will be accessed through a graphical console.
+#
+# ---------------------------------------------------------
+#
+# Using -nodefaults is required to have full control over
+# the virtual hardware: when it's specified, QEMU will
+# populate the board with only the builtin peripherals
+# plus a small selection of core PCI devices and
+# controllers; the user will then have to explicitly add
+# further devices.
+#
+# The core PCI devices show up in the guest as:
+#
+# 00:00.0 Host bridge
+# 00:1f.0 ISA bridge / LPC
+# 00:1f.2 SATA (AHCI) controller
+# 00:1f.3 SMBus controller
+#
+# This configuration file adds a number of other useful
+# devices, more specifically:
+#
+# 00:01.0 VGA compatible controller
+# 00:1b.0 Audio device
+# 00.1c.* PCI bridge (PCI Express Root Ports)
+# 01:00.0 SCSI storage controller
+# 02:00.0 Ethernet controller
+# 03:00.0 USB controller
+#
+# More information about these devices is available below.
+
+
+# Machine options
+# =========================================================
+#
+# We use the q35 machine type and enable KVM acceleration
+# for better performance.
+#
+# Using less than 1 GiB of memory is probably not going to
+# yield good performance in the guest, and might even lead
+# to obscure boot issues in some cases.
+
+[machine]
+ type = "q35"
+ accel = "kvm"
+
+[memory]
+ size = "1024"
+
+
+# PCI bridge (PCI Express Root Ports)
+# =========================================================
+#
+# We create eight PCI Express Root Ports, and we plug them
+# all into separate functions of the same slot. Some of
+# them will be used by devices, the rest will remain
+# available for hotplug.
+
+[device "pcie.1"]
+ driver = "pcie-root-port"
+ bus = "pcie.0"
+ addr = "1c.0"
+ port = "1"
+ chassis = "1"
+ multifunction = "on"
+
+[device "pcie.2"]
+ driver = "pcie-root-port"
+ bus = "pcie.0"
+ addr = "1c.1"
+ port = "2"
+ chassis = "2"
+
+[device "pcie.3"]
+ driver = "pcie-root-port"
+ bus = "pcie.0"
+ addr = "1c.2"
+ port = "3"
+ chassis = "3"
+
+[device "pcie.4"]
+ driver = "pcie-root-port"
+ bus = "pcie.0"
+ addr = "1c.3"
+ port = "4"
+ chassis = "4"
+
+[device "pcie.5"]
+ driver = "pcie-root-port"
+ bus = "pcie.0"
+ addr = "1c.4"
+ port = "5"
+ chassis = "5"
+
+[device "pcie.6"]
+ driver = "pcie-root-port"
+ bus = "pcie.0"
+ addr = "1c.5"
+ port = "6"
+ chassis = "6"
+
+[device "pcie.7"]
+ driver = "pcie-root-port"
+ bus = "pcie.0"
+ addr = "1c.6"
+ port = "7"
+ chassis = "7"
+
+[device "pcie.8"]
+ driver = "pcie-root-port"
+ bus = "pcie.0"
+ addr = "1c.7"
+ port = "8"
+ chassis = "8"
+
+
+# SCSI storage controller (and storage)
+# =========================================================
+#
+# We use virtio-scsi here so that we can (hot)plug a large
+# number of disks without running into issues; a SCSI disk,
+# backed by a qcow2 disk image on the host's filesystem, is
+# attached to it.
+#
+# We also create an optical disk, mostly for installation
+# purposes: once the guest OS has been succesfully
+# installed, the guest will no longer boot from optical
+# media. If you don't want, or no longer want, to have an
+# optical disk in the guest you can safely comment out
+# all relevant sections below.
+
+[device "scsi"]
+ driver = "virtio-scsi-pci"
+ bus = "pcie.1"
+ addr = "00.0"
+
+[device "scsi-disk"]
+ driver = "scsi-hd"
+ bus = "scsi.0"
+ drive = "disk"
+ bootindex = "1"
+
+[drive "disk"]
+ file = "guest.qcow2" # CHANGE ME
+ format = "qcow2"
+ if = "none"
+
+[device "scsi-optical-disk"]
+ driver = "scsi-cd"
+ bus = "scsi.0"
+ drive = "optical-disk"
+ bootindex = "2"
+
+[drive "optical-disk"]
+ file = "install.iso" # CHANGE ME
+ format = "raw"
+ if = "none"
+
+
+# Ethernet controller
+# =========================================================
+#
+# We use virtio-net for improved performance over emulated
+# hardware; on the host side, we take advantage of user
+# networking so that the QEMU process doesn't require any
+# additional privileges.
+
+[netdev "hostnet"]
+ type = "user"
+
+[device "net"]
+ driver = "virtio-net-pci"
+ netdev = "hostnet"
+ bus = "pcie.2"
+ addr = "00.0"
+
+
+# USB controller (and input devices)
+# =========================================================
+#
+# We add a virtualization-friendly USB 3.0 controller and
+# a USB tablet so that graphical guests can be controlled
+# appropriately. A USB keyboard is not needed, as q35
+# guests get a PS/2 one added automatically.
+
+[device "usb"]
+ driver = "nec-usb-xhci"
+ bus = "pcie.3"
+ addr = "00.0"
+
+[device "tablet"]
+ driver = "usb-tablet"
+ bus = "usb.0"
+
+
+# VGA compatible controller
+# =========================================================
+#
+# We plug the QXL video card directly into the PCI Express
+# Root Bus as it is a legacy PCI device; this way, we can
+# reduce the number of PCI Express controllers in the
+# guest.
+#
+# If you're running the guest on a remote, potentially
+# headless host, you will probably want to append something
+# like
+#
+# -display vnc=127.0.0.1:0
+#
+# to the command line in order to prevent QEMU from
+# creating a graphical display window on the host and
+# enable remote access instead.
+
+[device "video"]
+ driver = "qxl-vga"
+ bus = "pcie.0"
+ addr = "01.0"
+
+
+# Audio device
+# =========================================================
+#
+# Like the video card, the sound card is a legacy PCI
+# device and as such can be plugged directly into the PCI
+# Express Root Bus.
+
+[device "sound"]
+ driver = "ich9-intel-hda"
+ bus = "pcie.0"
+ addr = "1b.0"
+
+[device "duplex"]
+ driver = "hda-duplex"
+ bus = "sound.0"
+ cad = "0"
diff --git a/docs/q35-virtio-serial.cfg b/docs/q35-virtio-serial.cfg
new file mode 100644
index 0000000000..c33c9cc07a
--- /dev/null
+++ b/docs/q35-virtio-serial.cfg
@@ -0,0 +1,193 @@
+# q35 - VirtIO guest (serial console)
+# =========================================================
+#
+# Usage:
+#
+# $ qemu-system-x86_64 \
+# -nodefaults \
+# -readconfig q35-virtio-serial.cfg \
+# -display none -serial mon:stdio
+#
+# You will probably need to tweak the lines marked as
+# CHANGE ME before being able to use this configuration!
+#
+# The guest will have a selection of VirtIO devices
+# tailored towards optimal performance with modern guests,
+# and will be accessed through the serial console.
+#
+# ---------------------------------------------------------
+#
+# Using -nodefaults is required to have full control over
+# the virtual hardware: when it's specified, QEMU will
+# populate the board with only the builtin peripherals
+# plus a small selection of core PCI devices and
+# controllers; the user will then have to explicitly add
+# further devices.
+#
+# The core PCI devices show up in the guest as:
+#
+# 00:00.0 Host bridge
+# 00:1f.0 ISA bridge / LPC
+# 00:1f.2 SATA (AHCI) controller
+# 00:1f.3 SMBus controller
+#
+# This configuration file adds a number of other useful
+# devices, more specifically:
+#
+# 00.1c.* PCI bridge (PCI Express Root Ports)
+# 01:00.0 SCSI storage controller
+# 02:00.0 Ethernet controller
+#
+# More information about these devices is available below.
+#
+# We use '-display none' to prevent QEMU from creating a
+# graphical display window, which would serve no use in
+# this specific configuration, and '-serial mon:stdio' to
+# multiplex the guest's serial console and the QEMU monitor
+# to the host's stdio; use 'Ctrl+A h' to learn how to
+# switch between the two and more.
+
+
+# Machine options
+# =========================================================
+#
+# We use the q35 machine type and enable KVM acceleration
+# for better performance.
+#
+# Using less than 1 GiB of memory is probably not going to
+# yield good performance in the guest, and might even lead
+# to obscure boot issues in some cases.
+
+[machine]
+ type = "q35"
+ accel = "kvm"
+
+[memory]
+ size = "1024"
+
+
+# PCI bridge (PCI Express Root Ports)
+# =========================================================
+#
+# We create eight PCI Express Root Ports, and we plug them
+# all into separate functions of the same slot. Some of
+# them will be used by devices, the rest will remain
+# available for hotplug.
+
+[device "pcie.1"]
+ driver = "pcie-root-port"
+ bus = "pcie.0"
+ addr = "1c.0"
+ port = "1"
+ chassis = "1"
+ multifunction = "on"
+
+[device "pcie.2"]
+ driver = "pcie-root-port"
+ bus = "pcie.0"
+ addr = "1c.1"
+ port = "2"
+ chassis = "2"
+
+[device "pcie.3"]
+ driver = "pcie-root-port"
+ bus = "pcie.0"
+ addr = "1c.2"
+ port = "3"
+ chassis = "3"
+
+[device "pcie.4"]
+ driver = "pcie-root-port"
+ bus = "pcie.0"
+ addr = "1c.3"
+ port = "4"
+ chassis = "4"
+
+[device "pcie.5"]
+ driver = "pcie-root-port"
+ bus = "pcie.0"
+ addr = "1c.4"
+ port = "5"
+ chassis = "5"
+
+[device "pcie.6"]
+ driver = "pcie-root-port"
+ bus = "pcie.0"
+ addr = "1c.5"
+ port = "6"
+ chassis = "6"
+
+[device "pcie.7"]
+ driver = "pcie-root-port"
+ bus = "pcie.0"
+ addr = "1c.6"
+ port = "7"
+ chassis = "7"
+
+[device "pcie.8"]
+ driver = "pcie-root-port"
+ bus = "pcie.0"
+ addr = "1c.7"
+ port = "8"
+ chassis = "8"
+
+
+# SCSI storage controller (and storage)
+# =========================================================
+#
+# We use virtio-scsi here so that we can (hot)plug a large
+# number of disks without running into issues; a SCSI disk,
+# backed by a qcow2 disk image on the host's filesystem, is
+# attached to it.
+#
+# We also create an optical disk, mostly for installation
+# purposes: once the guest OS has been succesfully
+# installed, the guest will no longer boot from optical
+# media. If you don't want, or no longer want, to have an
+# optical disk in the guest you can safely comment out
+# all relevant sections below.
+
+[device "scsi"]
+ driver = "virtio-scsi-pci"
+ bus = "pcie.1"
+ addr = "00.0"
+
+[device "scsi-disk"]
+ driver = "scsi-hd"
+ bus = "scsi.0"
+ drive = "disk"
+ bootindex = "1"
+
+[drive "disk"]
+ file = "guest.qcow2" # CHANGE ME
+ format = "qcow2"
+ if = "none"
+
+[device "scsi-optical-disk"]
+ driver = "scsi-cd"
+ bus = "scsi.0"
+ drive = "optical-disk"
+ bootindex = "2"
+
+[drive "optical-disk"]
+ file = "install.iso" # CHANGE ME
+ format = "raw"
+ if = "none"
+
+
+# Ethernet controller
+# =========================================================
+#
+# We use virtio-net for improved performance over emulated
+# hardware; on the host side, we take advantage of user
+# networking so that the QEMU process doesn't require any
+# additional privileges.
+
+[netdev "hostnet"]
+ type = "user"
+
+[device "net"]
+ driver = "virtio-net-pci"
+ netdev = "hostnet"
+ bus = "pcie.2"
+ addr = "00.0"
diff --git a/docs/replay.txt b/docs/replay.txt
index 03e193193f..486c1e0e9d 100644
--- a/docs/replay.txt
+++ b/docs/replay.txt
@@ -225,3 +225,10 @@ recording the virtual machine this filter puts all packets coming from
the outer world into the log. In replay mode packets from the log are
injected into the network device. All interactions with network backend
in replay mode are disabled.
+
+Audio devices
+-------------
+
+Audio data is recorded and replay automatically. The command line for recording
+and replaying must contain identical specifications of audio hardware, e.g.:
+ -soundhw ac97
diff --git a/exec.c b/exec.c
index 3adf2b1861..785d20f648 100644
--- a/exec.c
+++ b/exec.c
@@ -45,6 +45,12 @@
#include "exec/address-spaces.h"
#include "sysemu/xen-mapcache.h"
#include "trace-root.h"
+
+#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
+#include <fcntl.h>
+#include <linux/falloc.h>
+#endif
+
#endif
#include "exec/cpu-all.h"
#include "qemu/rcu_queue.h"
@@ -1518,6 +1524,19 @@ size_t qemu_ram_pagesize(RAMBlock *rb)
return rb->page_size;
}
+/* Returns the largest size of page in use */
+size_t qemu_ram_pagesize_largest(void)
+{
+ RAMBlock *block;
+ size_t largest = 0;
+
+ QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
+ largest = MAX(largest, qemu_ram_pagesize(block));
+ }
+
+ return largest;
+}
+
static int memory_try_enable_merging(void *addr, size_t len)
{
if (!machine_mem_merge(current_machine)) {
@@ -3294,4 +3313,68 @@ int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
rcu_read_unlock();
return ret;
}
+
+/*
+ * Unmap pages of memory from start to start+length such that
+ * they a) read as 0, b) Trigger whatever fault mechanism
+ * the OS provides for postcopy.
+ * The pages must be unmapped by the end of the function.
+ * Returns: 0 on success, none-0 on failure
+ *
+ */
+int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length)
+{
+ int ret = -1;
+
+ uint8_t *host_startaddr = rb->host + start;
+
+ if ((uintptr_t)host_startaddr & (rb->page_size - 1)) {
+ error_report("ram_block_discard_range: Unaligned start address: %p",
+ host_startaddr);
+ goto err;
+ }
+
+ if ((start + length) <= rb->used_length) {
+ uint8_t *host_endaddr = host_startaddr + length;
+ if ((uintptr_t)host_endaddr & (rb->page_size - 1)) {
+ error_report("ram_block_discard_range: Unaligned end address: %p",
+ host_endaddr);
+ goto err;
+ }
+
+ errno = ENOTSUP; /* If we are missing MADVISE etc */
+
+ if (rb->page_size == qemu_host_page_size) {
+#if defined(CONFIG_MADVISE)
+ /* Note: We need the madvise MADV_DONTNEED behaviour of definitely
+ * freeing the page.
+ */
+ ret = madvise(host_startaddr, length, MADV_DONTNEED);
+#endif
+ } else {
+ /* Huge page case - unfortunately it can't do DONTNEED, but
+ * it can do the equivalent by FALLOC_FL_PUNCH_HOLE in the
+ * huge page file.
+ */
+#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
+ ret = fallocate(rb->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
+ start, length);
+#endif
+ }
+ if (ret) {
+ ret = -errno;
+ error_report("ram_block_discard_range: Failed to discard range "
+ "%s:%" PRIx64 " +%zx (%d)",
+ rb->idstr, start, length, ret);
+ }
+ } else {
+ error_report("ram_block_discard_range: Overrun block '%s' (%" PRIu64
+ "/%zx/" RAM_ADDR_FMT")",
+ rb->idstr, start, length, rb->used_length);
+ }
+
+err:
+ return ret;
+}
+
#endif
diff --git a/hmp.c b/hmp.c
index aadbcf507f..261843f7a2 100644
--- a/hmp.c
+++ b/hmp.c
@@ -2045,13 +2045,17 @@ void hmp_qemu_io(Monitor *mon, const QDict *qdict)
const char* device = qdict_get_str(qdict, "device");
const char* command = qdict_get_str(qdict, "command");
Error *err = NULL;
+ int ret;
blk = blk_by_name(device);
if (!blk) {
BlockDriverState *bs = bdrv_lookup_bs(NULL, device, &err);
if (bs) {
- blk = local_blk = blk_new();
- blk_insert_bs(blk, bs);
+ blk = local_blk = blk_new(0, BLK_PERM_ALL);
+ ret = blk_insert_bs(blk, bs, &err);
+ if (ret < 0) {
+ goto fail;
+ }
} else {
goto fail;
}
@@ -2060,6 +2064,31 @@ void hmp_qemu_io(Monitor *mon, const QDict *qdict)
aio_context = blk_get_aio_context(blk);
aio_context_acquire(aio_context);
+ /*
+ * Notably absent: Proper permission management. This is sad, but it seems
+ * almost impossible to achieve without changing the semantics and thereby
+ * limiting the use cases of the qemu-io HMP command.
+ *
+ * In an ideal world we would unconditionally create a new BlockBackend for
+ * qemuio_command(), but we have commands like 'reopen' and want them to
+ * take effect on the exact BlockBackend whose name the user passed instead
+ * of just on a temporary copy of it.
+ *
+ * Another problem is that deleting the temporary BlockBackend involves
+ * draining all requests on it first, but some qemu-iotests cases want to
+ * issue multiple aio_read/write requests and expect them to complete in
+ * the background while the monitor has already returned.
+ *
+ * This is also what prevents us from saving the original permissions and
+ * restoring them later: We can't revoke permissions until all requests
+ * have completed, and we don't know when that is nor can we really let
+ * anything else run before we have revoken them to avoid race conditions.
+ *
+ * What happens now is that command() in qemu-io-cmds.c can extend the
+ * permissions if necessary for the qemu-io command. And they simply stay
+ * extended, possibly resulting in a read-only guest device keeping write
+ * permissions. Ugly, but it appears to be the lesser evil.
+ */
qemuio_command(blk, command);
aio_context_release(aio_context);
diff --git a/hw/acpi/pcihp.c b/hw/acpi/pcihp.c
index d957d1e30d..2b0f3e1bfb 100644
--- a/hw/acpi/pcihp.c
+++ b/hw/acpi/pcihp.c
@@ -49,7 +49,6 @@
#define ACPI_PCIHP_ADDR 0xae00
#define ACPI_PCIHP_SIZE 0x0014
-#define ACPI_PCIHP_LEGACY_SIZE 0x000f
#define PCI_UP_BASE 0x0000
#define PCI_DOWN_BASE 0x0004
#define PCI_EJ_BASE 0x0008
@@ -302,16 +301,6 @@ void acpi_pcihp_init(Object *owner, AcpiPciHpState *s, PCIBus *root_bus,
s->root= root_bus;
s->legacy_piix = !bridges_enabled;
- if (s->legacy_piix) {
- unsigned *bus_bsel = g_malloc(sizeof *bus_bsel);
-
- s->io_len = ACPI_PCIHP_LEGACY_SIZE;
-
- *bus_bsel = ACPI_PCIHP_BSEL_DEFAULT;
- object_property_add_uint32_ptr(OBJECT(root_bus), ACPI_PCIHP_PROP_BSEL,
- bus_bsel, NULL);
- }
-
memory_region_init_io(&s->io, owner, &acpi_pcihp_io_ops, s,
"acpi-pci-hotplug", s->io_len);
memory_region_add_subregion(address_space_io, s->io_base, &s->io);
diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c
index 6d99fe407c..a553a7e110 100644
--- a/hw/acpi/piix4.c
+++ b/hw/acpi/piix4.c
@@ -440,6 +440,8 @@ static void piix4_update_bus_hotplug(PCIBus *pci_bus, void *opaque)
{
PIIX4PMState *s = opaque;
+ /* pci_bus cannot outlive PIIX4PMState, because /machine keeps it alive
+ * and it's not hot-unpluggable */
qbus_set_hotplug_handler(BUS(pci_bus), DEVICE(s), &error_abort);
}
diff --git a/hw/block/block.c b/hw/block/block.c
index 8dc9d84a39..27878d0087 100644
--- a/hw/block/block.c
+++ b/hw/block/block.c
@@ -51,11 +51,33 @@ void blkconf_blocksizes(BlockConf *conf)
}
}
-void blkconf_apply_backend_options(BlockConf *conf)
+void blkconf_apply_backend_options(BlockConf *conf, bool readonly,
+ bool resizable, Error **errp)
{
BlockBackend *blk = conf->blk;
BlockdevOnError rerror, werror;
+ uint64_t perm, shared_perm;
bool wce;
+ int ret;
+
+ perm = BLK_PERM_CONSISTENT_READ;
+ if (!readonly) {
+ perm |= BLK_PERM_WRITE;
+ }
+
+ shared_perm = BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED |
+ BLK_PERM_GRAPH_MOD;
+ if (resizable) {
+ shared_perm |= BLK_PERM_RESIZE;
+ }
+ if (conf->share_rw) {
+ shared_perm |= BLK_PERM_WRITE;
+ }
+
+ ret = blk_set_perm(blk, perm, shared_perm, errp);
+ if (ret < 0) {
+ return;
+ }
switch (conf->wce) {
case ON_OFF_AUTO_ON: wce = true; break;
diff --git a/hw/block/fdc.c b/hw/block/fdc.c
index 17d29e7bc5..a328693d15 100644
--- a/hw/block/fdc.c
+++ b/hw/block/fdc.c
@@ -186,6 +186,7 @@ typedef enum FDiskFlags {
struct FDrive {
FDCtrl *fdctrl;
BlockBackend *blk;
+ BlockConf *conf;
/* Drive status */
FloppyDriveType drive; /* CMOS drive type */
uint8_t perpendicular; /* 2.88 MB access mode */
@@ -469,9 +470,22 @@ static void fd_revalidate(FDrive *drv)
}
}
-static void fd_change_cb(void *opaque, bool load)
+static void fd_change_cb(void *opaque, bool load, Error **errp)
{
FDrive *drive = opaque;
+ Error *local_err = NULL;
+
+ if (!load) {
+ blk_set_perm(drive->blk, 0, BLK_PERM_ALL, &error_abort);
+ } else {
+ blkconf_apply_backend_options(drive->conf,
+ blk_is_read_only(drive->blk), false,
+ &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
+ }
drive->media_changed = 1;
drive->media_validated = false;
@@ -508,6 +522,7 @@ static int floppy_drive_init(DeviceState *qdev)
FloppyDrive *dev = FLOPPY_DRIVE(qdev);
FloppyBus *bus = FLOPPY_BUS(qdev->parent_bus);
FDrive *drive;
+ Error *local_err = NULL;
int ret;
if (dev->unit == -1) {
@@ -533,7 +548,7 @@ static int floppy_drive_init(DeviceState *qdev)
if (!dev->conf.blk) {
/* Anonymous BlockBackend for an empty drive */
- dev->conf.blk = blk_new();
+ dev->conf.blk = blk_new(0, BLK_PERM_ALL);
ret = blk_attach_dev(dev->conf.blk, qdev);
assert(ret == 0);
}
@@ -551,7 +566,13 @@ static int floppy_drive_init(DeviceState *qdev)
* blkconf_apply_backend_options(). */
dev->conf.rerror = BLOCKDEV_ON_ERROR_AUTO;
dev->conf.werror = BLOCKDEV_ON_ERROR_AUTO;
- blkconf_apply_backend_options(&dev->conf);
+
+ blkconf_apply_backend_options(&dev->conf, blk_is_read_only(dev->conf.blk),
+ false, &local_err);
+ if (local_err) {
+ error_report_err(local_err);
+ return -1;
+ }
/* 'enospc' is the default for -drive, 'report' is what blk_new() gives us
* for empty drives. */
@@ -565,6 +586,7 @@ static int floppy_drive_init(DeviceState *qdev)
return -1;
}
+ drive->conf = &dev->conf;
drive->blk = dev->conf.blk;
drive->fdctrl = bus->fdc;
diff --git a/hw/block/m25p80.c b/hw/block/m25p80.c
index 2d6eb46a04..190573cefa 100644
--- a/hw/block/m25p80.c
+++ b/hw/block/m25p80.c
@@ -1215,6 +1215,7 @@ static void m25p80_realize(SSISlave *ss, Error **errp)
{
Flash *s = M25P80(ss);
M25P80Class *mc = M25P80_GET_CLASS(s);
+ int ret;
s->pi = mc->pi;
@@ -1222,6 +1223,13 @@ static void m25p80_realize(SSISlave *ss, Error **errp)
s->dirty_page = -1;
if (s->blk) {
+ uint64_t perm = BLK_PERM_CONSISTENT_READ |
+ (blk_is_read_only(s->blk) ? 0 : BLK_PERM_WRITE);
+ ret = blk_set_perm(s->blk, perm, BLK_PERM_ALL, errp);
+ if (ret < 0) {
+ return;
+ }
+
DB_PRINT_L(0, "Binding to IF_MTD drive\n");
s->storage = blk_blockalign(s->blk, s->size);
diff --git a/hw/block/nand.c b/hw/block/nand.c
index c69e6755d9..0d33ac281f 100644
--- a/hw/block/nand.c
+++ b/hw/block/nand.c
@@ -373,6 +373,8 @@ static void nand_realize(DeviceState *dev, Error **errp)
{
int pagesize;
NANDFlashState *s = NAND(dev);
+ int ret;
+
s->buswidth = nand_flash_ids[s->chip_id].width >> 3;
s->size = nand_flash_ids[s->chip_id].size << 20;
@@ -407,6 +409,11 @@ static void nand_realize(DeviceState *dev, Error **errp)
error_setg(errp, "Can't use a read-only drive");
return;
}
+ ret = blk_set_perm(s->blk, BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE,
+ BLK_PERM_ALL, errp);
+ if (ret < 0) {
+ return;
+ }
if (blk_getlength(s->blk) >=
(s->pages << s->page_shift) + (s->pages << s->oob_shift)) {
pagesize = 0;
diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index ae91a18f17..ae303d44e5 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -835,6 +835,7 @@ static int nvme_init(PCIDevice *pci_dev)
int i;
int64_t bs_size;
uint8_t *pci_conf;
+ Error *local_err = NULL;
if (!n->conf.blk) {
return -1;
@@ -850,7 +851,12 @@ static int nvme_init(PCIDevice *pci_dev)
return -1;
}
blkconf_blocksizes(&n->conf);
- blkconf_apply_backend_options(&n->conf);
+ blkconf_apply_backend_options(&n->conf, blk_is_read_only(n->conf.blk),
+ false, &local_err);
+ if (local_err) {
+ error_report_err(local_err);
+ return -1;
+ }
pci_conf = pci_dev->config;
pci_conf[PCI_INTERRUPT_PIN] = 1;
diff --git a/hw/block/onenand.c b/hw/block/onenand.c
index 8d8422739e..ddf5492426 100644
--- a/hw/block/onenand.c
+++ b/hw/block/onenand.c
@@ -778,6 +778,7 @@ static int onenand_initfn(SysBusDevice *sbd)
OneNANDState *s = ONE_NAND(dev);
uint32_t size = 1 << (24 + ((s->id.dev >> 4) & 7));
void *ram;
+ Error *local_err = NULL;
s->base = (hwaddr)-1;
s->rdy = NULL;
@@ -796,6 +797,12 @@ static int onenand_initfn(SysBusDevice *sbd)
error_report("Can't use a read-only drive");
return -1;
}
+ blk_set_perm(s->blk, BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE,
+ BLK_PERM_ALL, &local_err);
+ if (local_err) {
+ error_report_err(local_err);
+ return -1;
+ }
s->blk_cur = s->blk;
}
s->otp = memset(g_malloc((64 + 2) << PAGE_SHIFT),
diff --git a/hw/block/pflash_cfi01.c b/hw/block/pflash_cfi01.c
index 71b98a3eef..594d4cf6fe 100644
--- a/hw/block/pflash_cfi01.c
+++ b/hw/block/pflash_cfi01.c
@@ -758,6 +758,18 @@ static void pflash_cfi01_realize(DeviceState *dev, Error **errp)
sysbus_init_mmio(SYS_BUS_DEVICE(dev), &pfl->mem);
if (pfl->blk) {
+ uint64_t perm;
+ pfl->ro = blk_is_read_only(pfl->blk);
+ perm = BLK_PERM_CONSISTENT_READ | (pfl->ro ? 0 : BLK_PERM_WRITE);
+ ret = blk_set_perm(pfl->blk, perm, BLK_PERM_ALL, errp);
+ if (ret < 0) {
+ return;
+ }
+ } else {
+ pfl->ro = 0;
+ }
+
+ if (pfl->blk) {
/* read the initial flash content */
ret = blk_pread(pfl->blk, 0, pfl->storage, total_len);
@@ -768,12 +780,6 @@ static void pflash_cfi01_realize(DeviceState *dev, Error **errp)
}
}
- if (pfl->blk) {
- pfl->ro = blk_is_read_only(pfl->blk);
- } else {
- pfl->ro = 0;
- }
-
/* Default to devices being used at their maximum device width. This was
* assumed before the device_width support was added.
*/
diff --git a/hw/block/pflash_cfi02.c b/hw/block/pflash_cfi02.c
index ef71322759..e6c5c6c25d 100644
--- a/hw/block/pflash_cfi02.c
+++ b/hw/block/pflash_cfi02.c
@@ -632,6 +632,19 @@ static void pflash_cfi02_realize(DeviceState *dev, Error **errp)
vmstate_register_ram(&pfl->orig_mem, DEVICE(pfl));
pfl->storage = memory_region_get_ram_ptr(&pfl->orig_mem);
pfl->chip_len = chip_len;
+
+ if (pfl->blk) {
+ uint64_t perm;
+ pfl->ro = blk_is_read_only(pfl->blk);
+ perm = BLK_PERM_CONSISTENT_READ | (pfl->ro ? 0 : BLK_PERM_WRITE);
+ ret = blk_set_perm(pfl->blk, perm, BLK_PERM_ALL, errp);
+ if (ret < 0) {
+ return;
+ }
+ } else {
+ pfl->ro = 0;
+ }
+
if (pfl->blk) {
/* read the initial flash content */
ret = blk_pread(pfl->blk, 0, pfl->storage, chip_len);
@@ -646,12 +659,6 @@ static void pflash_cfi02_realize(DeviceState *dev, Error **errp)
pfl->rom_mode = 1;
sysbus_init_mmio(SYS_BUS_DEVICE(dev), &pfl->mem);
- if (pfl->blk) {
- pfl->ro = blk_is_read_only(pfl->blk);
- } else {
- pfl->ro = 0;
- }
-
pfl->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, pflash_timer, pfl);
pfl->wcycle = 0;
pfl->cmd = 0;
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index 843bd2fa73..98c16a7a9a 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -928,7 +928,13 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp)
}
blkconf_serial(&conf->conf, &conf->serial);
- blkconf_apply_backend_options(&conf->conf);
+ blkconf_apply_backend_options(&conf->conf,
+ blk_is_read_only(conf->conf.blk), true,
+ &err);
+ if (err) {
+ error_propagate(errp, err);
+ return;
+ }
s->original_wce = blk_enable_write_cache(conf->conf.blk);
blkconf_geometry(&conf->conf, NULL, 65535, 255, 255, &err);
if (err) {
diff --git a/hw/core/bus.c b/hw/core/bus.c
index cf383fc1af..4651f24486 100644
--- a/hw/core/bus.c
+++ b/hw/core/bus.c
@@ -197,7 +197,7 @@ static void qbus_initfn(Object *obj)
TYPE_HOTPLUG_HANDLER,
(Object **)&bus->hotplug_handler,
object_property_allow_set_link,
- OBJ_PROP_LINK_UNREF_ON_RELEASE,
+ 0,
NULL);
object_property_add_bool(obj, "realized",
bus_get_realized, bus_set_realized, NULL);
diff --git a/hw/core/ptimer.c b/hw/core/ptimer.c
index 3af82afe78..59ccb00550 100644
--- a/hw/core/ptimer.c
+++ b/hw/core/ptimer.c
@@ -12,6 +12,7 @@
#include "qemu/host-utils.h"
#include "sysemu/replay.h"
#include "sysemu/qtest.h"
+#include "block/aio.h"
#define DELTA_ADJUST 1
#define DELTA_NO_ADJUST -1
@@ -353,3 +354,10 @@ ptimer_state *ptimer_init(QEMUBH *bh, uint8_t policy_mask)
s->policy_mask = policy_mask;
return s;
}
+
+void ptimer_free(ptimer_state *s)
+{
+ qemu_bh_delete(s->bh);
+ timer_free(s->timer);
+ g_free(s);
+}
diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c
index 94f4d8bde4..c34be1c1ba 100644
--- a/hw/core/qdev-properties-system.c
+++ b/hw/core/qdev-properties-system.c
@@ -73,14 +73,19 @@ static void parse_drive(DeviceState *dev, const char *str, void **ptr,
{
BlockBackend *blk;
bool blk_created = false;
+ int ret;
blk = blk_by_name(str);
if (!blk) {
BlockDriverState *bs = bdrv_lookup_bs(NULL, str, NULL);
if (bs) {
- blk = blk_new();
- blk_insert_bs(blk, bs);
+ blk = blk_new(0, BLK_PERM_ALL);
blk_created = true;
+
+ ret = blk_insert_bs(blk, bs, errp);
+ if (ret < 0) {
+ goto fail;
+ }
}
}
if (!blk) {
diff --git a/hw/core/qdev.c b/hw/core/qdev.c
index 923e626333..1e7fb33246 100644
--- a/hw/core/qdev.c
+++ b/hw/core/qdev.c
@@ -37,6 +37,7 @@
#include "hw/boards.h"
#include "hw/sysbus.h"
#include "qapi-event.h"
+#include "migration/migration.h"
int qdev_hotplug = 0;
static bool qdev_hot_added = false;
@@ -903,6 +904,7 @@ static void device_set_realized(Object *obj, bool value, Error **errp)
Error *local_err = NULL;
bool unattached_parent = false;
static int unattached_count;
+ int ret;
if (dev->hotplugged && !dc->hotpluggable) {
error_setg(errp, QERR_DEVICE_NO_HOTPLUG, object_get_typename(obj));
@@ -910,6 +912,11 @@ static void device_set_realized(Object *obj, bool value, Error **errp)
}
if (value && !dev->realized) {
+ ret = check_migratable(obj, &local_err);
+ if (ret < 0) {
+ goto fail;
+ }
+
if (!obj->parent) {
gchar *name = g_strdup_printf("device[%d]", unattached_count++);
diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index 8018f0533b..2073108577 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -463,7 +463,7 @@ static void *acpi_set_bsel(PCIBus *bus, void *opaque)
*bus_bsel = (*bsel_alloc)++;
object_property_add_uint32_ptr(OBJECT(bus), ACPI_PCIHP_PROP_BSEL,
- bus_bsel, NULL);
+ bus_bsel, &error_abort);
}
return bsel_alloc;
@@ -472,7 +472,7 @@ static void *acpi_set_bsel(PCIBus *bus, void *opaque)
static void acpi_set_pci_info(void)
{
PCIBus *bus = find_i440fx(); /* TODO: Q35 support */
- unsigned bsel_alloc = 0;
+ unsigned bsel_alloc = ACPI_PCIHP_BSEL_DEFAULT;
if (bus) {
/* Scan all PCI buses. Set property to enable acpi based hotplug. */
diff --git a/hw/ide/core.c b/hw/ide/core.c
index cfa5de6ebf..db509b3e15 100644
--- a/hw/ide/core.c
+++ b/hw/ide/core.c
@@ -1120,7 +1120,7 @@ static void ide_cfata_metadata_write(IDEState *s)
}
/* called when the inserted state of the media has changed */
-static void ide_cd_change_cb(void *opaque, bool load)
+static void ide_cd_change_cb(void *opaque, bool load, Error **errp)
{
IDEState *s = opaque;
uint64_t nb_sectors;
diff --git a/hw/ide/qdev.c b/hw/ide/qdev.c
index dbaa75cf59..4383cd111d 100644
--- a/hw/ide/qdev.c
+++ b/hw/ide/qdev.c
@@ -170,7 +170,7 @@ static int ide_dev_initfn(IDEDevice *dev, IDEDriveKind kind)
return -1;
} else {
/* Anonymous BlockBackend for an empty drive */
- dev->conf.blk = blk_new();
+ dev->conf.blk = blk_new(0, BLK_PERM_ALL);
}
}
@@ -196,7 +196,12 @@ static int ide_dev_initfn(IDEDevice *dev, IDEDriveKind kind)
return -1;
}
}
- blkconf_apply_backend_options(&dev->conf);
+ blkconf_apply_backend_options(&dev->conf, kind == IDE_CD, kind != IDE_CD,
+ &err);
+ if (err) {
+ error_report_err(err);
+ return -1;
+ }
if (ide_init_drive(s, dev->conf.blk, kind,
dev->version, dev->serial, dev->model, dev->wwn,
diff --git a/hw/intc/xics.c b/hw/intc/xics.c
index 095c16a300..ffc0747c7f 100644
--- a/hw/intc/xics.c
+++ b/hw/intc/xics.c
@@ -49,40 +49,41 @@ int xics_get_cpu_index_by_dt_id(int cpu_dt_id)
return -1;
}
-void xics_cpu_destroy(XICSState *xics, PowerPCCPU *cpu)
+void xics_cpu_destroy(XICSFabric *xi, PowerPCCPU *cpu)
{
CPUState *cs = CPU(cpu);
- ICPState *ss = &xics->ss[cs->cpu_index];
+ ICPState *icp = xics_icp_get(xi, cs->cpu_index);
- assert(cs->cpu_index < xics->nr_servers);
- assert(cs == ss->cs);
+ assert(icp);
+ assert(cs == icp->cs);
- ss->output = NULL;
- ss->cs = NULL;
+ icp->output = NULL;
+ icp->cs = NULL;
}
-void xics_cpu_setup(XICSState *xics, PowerPCCPU *cpu)
+void xics_cpu_setup(XICSFabric *xi, PowerPCCPU *cpu)
{
CPUState *cs = CPU(cpu);
CPUPPCState *env = &cpu->env;
- ICPState *ss = &xics->ss[cs->cpu_index];
- XICSStateClass *info = XICS_COMMON_GET_CLASS(xics);
+ ICPState *icp = xics_icp_get(xi, cs->cpu_index);
+ ICPStateClass *icpc;
- assert(cs->cpu_index < xics->nr_servers);
+ assert(icp);
- ss->cs = cs;
+ icp->cs = cs;
- if (info->cpu_setup) {
- info->cpu_setup(xics, cpu);
+ icpc = ICP_GET_CLASS(icp);
+ if (icpc->cpu_setup) {
+ icpc->cpu_setup(icp, cpu);
}
switch (PPC_INPUT(env)) {
case PPC_FLAGS_INPUT_POWER7:
- ss->output = env->irq_inputs[POWER7_INPUT_INT];
+ icp->output = env->irq_inputs[POWER7_INPUT_INT];
break;
case PPC_FLAGS_INPUT_970:
- ss->output = env->irq_inputs[PPC970_INPUT_INT];
+ icp->output = env->irq_inputs[PPC970_INPUT_INT];
break;
default:
@@ -92,185 +93,43 @@ void xics_cpu_setup(XICSState *xics, PowerPCCPU *cpu)
}
}
-static void xics_common_pic_print_info(InterruptStatsProvider *obj,
- Monitor *mon)
+void icp_pic_print_info(ICPState *icp, Monitor *mon)
{
- XICSState *xics = XICS_COMMON(obj);
- ICSState *ics;
- uint32_t i;
-
- for (i = 0; i < xics->nr_servers; i++) {
- ICPState *icp = &xics->ss[i];
-
- if (!icp->output) {
- continue;
- }
- monitor_printf(mon, "CPU %d XIRR=%08x (%p) PP=%02x MFRR=%02x\n",
- i, icp->xirr, icp->xirr_owner,
- icp->pending_priority, icp->mfrr);
- }
-
- QLIST_FOREACH(ics, &xics->ics, list) {
- monitor_printf(mon, "ICS %4x..%4x %p\n",
- ics->offset, ics->offset + ics->nr_irqs - 1, ics);
-
- if (!ics->irqs) {
- continue;
- }
-
- for (i = 0; i < ics->nr_irqs; i++) {
- ICSIRQState *irq = ics->irqs + i;
-
- if (!(irq->flags & XICS_FLAGS_IRQ_MASK)) {
- continue;
- }
- monitor_printf(mon, " %4x %s %02x %02x\n",
- ics->offset + i,
- (irq->flags & XICS_FLAGS_IRQ_LSI) ?
- "LSI" : "MSI",
- irq->priority, irq->status);
- }
- }
-}
-
-/*
- * XICS Common class - parent for emulated XICS and KVM-XICS
- */
-static void xics_common_reset(DeviceState *d)
-{
- XICSState *xics = XICS_COMMON(d);
- ICSState *ics;
- int i;
-
- for (i = 0; i < xics->nr_servers; i++) {
- device_reset(DEVICE(&xics->ss[i]));
- }
-
- QLIST_FOREACH(ics, &xics->ics, list) {
- device_reset(DEVICE(ics));
- }
-}
-
-static void xics_prop_get_nr_irqs(Object *obj, Visitor *v, const char *name,
- void *opaque, Error **errp)
-{
- XICSState *xics = XICS_COMMON(obj);
- int64_t value = xics->nr_irqs;
+ int cpu_index = icp->cs ? icp->cs->cpu_index : -1;
- visit_type_int(v, name, &value, errp);
-}
-
-static void xics_prop_set_nr_irqs(Object *obj, Visitor *v, const char *name,
- void *opaque, Error **errp)
-{
- XICSState *xics = XICS_COMMON(obj);
- XICSStateClass *info = XICS_COMMON_GET_CLASS(xics);
- Error *error = NULL;
- int64_t value;
-
- visit_type_int(v, name, &value, &error);
- if (error) {
- error_propagate(errp, error);
+ if (!icp->output) {
return;
}
- if (xics->nr_irqs) {
- error_setg(errp, "Number of interrupts is already set to %u",
- xics->nr_irqs);
- return;
- }
-
- assert(info->set_nr_irqs);
- info->set_nr_irqs(xics, value, errp);
-}
-
-void xics_set_nr_servers(XICSState *xics, uint32_t nr_servers,
- const char *typename, Error **errp)
-{
- int i;
-
- xics->nr_servers = nr_servers;
-
- xics->ss = g_malloc0(xics->nr_servers * sizeof(ICPState));
- for (i = 0; i < xics->nr_servers; i++) {
- char name[32];
- ICPState *icp = &xics->ss[i];
-
- object_initialize(icp, sizeof(*icp), typename);
- snprintf(name, sizeof(name), "icp[%d]", i);
- object_property_add_child(OBJECT(xics), name, OBJECT(icp), errp);
- icp->xics = xics;
- }
+ monitor_printf(mon, "CPU %d XIRR=%08x (%p) PP=%02x MFRR=%02x\n",
+ cpu_index, icp->xirr, icp->xirr_owner,
+ icp->pending_priority, icp->mfrr);
}
-static void xics_prop_get_nr_servers(Object *obj, Visitor *v,
- const char *name, void *opaque,
- Error **errp)
+void ics_pic_print_info(ICSState *ics, Monitor *mon)
{
- XICSState *xics = XICS_COMMON(obj);
- int64_t value = xics->nr_servers;
-
- visit_type_int(v, name, &value, errp);
-}
+ uint32_t i;
-static void xics_prop_set_nr_servers(Object *obj, Visitor *v,
- const char *name, void *opaque,
- Error **errp)
-{
- XICSState *xics = XICS_COMMON(obj);
- XICSStateClass *xsc = XICS_COMMON_GET_CLASS(xics);
- Error *error = NULL;
- int64_t value;
+ monitor_printf(mon, "ICS %4x..%4x %p\n",
+ ics->offset, ics->offset + ics->nr_irqs - 1, ics);
- visit_type_int(v, name, &value, &error);
- if (error) {
- error_propagate(errp, error);
+ if (!ics->irqs) {
return;
}
- if (xics->nr_servers) {
- error_setg(errp, "Number of servers is already set to %u",
- xics->nr_servers);
- return;
- }
-
- assert(xsc->set_nr_servers);
- xsc->set_nr_servers(xics, value, errp);
-}
-
-static void xics_common_initfn(Object *obj)
-{
- XICSState *xics = XICS_COMMON(obj);
- QLIST_INIT(&xics->ics);
- object_property_add(obj, "nr_irqs", "int",
- xics_prop_get_nr_irqs, xics_prop_set_nr_irqs,
- NULL, NULL, NULL);
- object_property_add(obj, "nr_servers", "int",
- xics_prop_get_nr_servers, xics_prop_set_nr_servers,
- NULL, NULL, NULL);
-}
-
-static void xics_common_class_init(ObjectClass *oc, void *data)
-{
- DeviceClass *dc = DEVICE_CLASS(oc);
- InterruptStatsProviderClass *ic = INTERRUPT_STATS_PROVIDER_CLASS(oc);
+ for (i = 0; i < ics->nr_irqs; i++) {
+ ICSIRQState *irq = ics->irqs + i;
- dc->reset = xics_common_reset;
- ic->print_info = xics_common_pic_print_info;
+ if (!(irq->flags & XICS_FLAGS_IRQ_MASK)) {
+ continue;
+ }
+ monitor_printf(mon, " %4x %s %02x %02x\n",
+ ics->offset + i,
+ (irq->flags & XICS_FLAGS_IRQ_LSI) ?
+ "LSI" : "MSI",
+ irq->priority, irq->status);
+ }
}
-static const TypeInfo xics_common_info = {
- .name = TYPE_XICS_COMMON,
- .parent = TYPE_SYS_BUS_DEVICE,
- .instance_size = sizeof(XICSState),
- .class_size = sizeof(XICSStateClass),
- .instance_init = xics_common_initfn,
- .class_init = xics_common_class_init,
- .interfaces = (InterfaceInfo[]) {
- { TYPE_INTERRUPT_STATS_PROVIDER },
- { }
- },
-};
-
/*
* ICP: Presentation layer
*/
@@ -278,8 +137,8 @@ static const TypeInfo xics_common_info = {
#define XISR_MASK 0x00ffffff
#define CPPR_MASK 0xff000000
-#define XISR(ss) (((ss)->xirr) & XISR_MASK)
-#define CPPR(ss) (((ss)->xirr) >> 24)
+#define XISR(icp) (((icp)->xirr) & XISR_MASK)
+#define CPPR(icp) (((icp)->xirr) >> 24)
static void ics_reject(ICSState *ics, uint32_t nr)
{
@@ -290,7 +149,7 @@ static void ics_reject(ICSState *ics, uint32_t nr)
}
}
-static void ics_resend(ICSState *ics)
+void ics_resend(ICSState *ics)
{
ICSStateClass *k = ICS_BASE_GET_CLASS(ics);
@@ -308,151 +167,152 @@ static void ics_eoi(ICSState *ics, int nr)
}
}
-static void icp_check_ipi(ICPState *ss)
+static void icp_check_ipi(ICPState *icp)
{
- if (XISR(ss) && (ss->pending_priority <= ss->mfrr)) {
+ if (XISR(icp) && (icp->pending_priority <= icp->mfrr)) {
return;
}
- trace_xics_icp_check_ipi(ss->cs->cpu_index, ss->mfrr);
+ trace_xics_icp_check_ipi(icp->cs->cpu_index, icp->mfrr);
- if (XISR(ss) && ss->xirr_owner) {
- ics_reject(ss->xirr_owner, XISR(ss));
+ if (XISR(icp) && icp->xirr_owner) {
+ ics_reject(icp->xirr_owner, XISR(icp));
}
- ss->xirr = (ss->xirr & ~XISR_MASK) | XICS_IPI;
- ss->pending_priority = ss->mfrr;
- ss->xirr_owner = NULL;
- qemu_irq_raise(ss->output);
+ icp->xirr = (icp->xirr & ~XISR_MASK) | XICS_IPI;
+ icp->pending_priority = icp->mfrr;
+ icp->xirr_owner = NULL;
+ qemu_irq_raise(icp->output);
}
-static void icp_resend(ICPState *ss)
+void icp_resend(ICPState *icp)
{
- ICSState *ics;
+ XICSFabric *xi = icp->xics;
+ XICSFabricClass *xic = XICS_FABRIC_GET_CLASS(xi);
- if (ss->mfrr < CPPR(ss)) {
- icp_check_ipi(ss);
- }
- QLIST_FOREACH(ics, &ss->xics->ics, list) {
- ics_resend(ics);
+ if (icp->mfrr < CPPR(icp)) {
+ icp_check_ipi(icp);
}
+
+ xic->ics_resend(xi);
}
-void icp_set_cppr(ICPState *ss, uint8_t cppr)
+void icp_set_cppr(ICPState *icp, uint8_t cppr)
{
uint8_t old_cppr;
uint32_t old_xisr;
- old_cppr = CPPR(ss);
- ss->xirr = (ss->xirr & ~CPPR_MASK) | (cppr << 24);
+ old_cppr = CPPR(icp);
+ icp->xirr = (icp->xirr & ~CPPR_MASK) | (cppr << 24);
if (cppr < old_cppr) {
- if (XISR(ss) && (cppr <= ss->pending_priority)) {
- old_xisr = XISR(ss);
- ss->xirr &= ~XISR_MASK; /* Clear XISR */
- ss->pending_priority = 0xff;
- qemu_irq_lower(ss->output);
- if (ss->xirr_owner) {
- ics_reject(ss->xirr_owner, old_xisr);
- ss->xirr_owner = NULL;
+ if (XISR(icp) && (cppr <= icp->pending_priority)) {
+ old_xisr = XISR(icp);
+ icp->xirr &= ~XISR_MASK; /* Clear XISR */
+ icp->pending_priority = 0xff;
+ qemu_irq_lower(icp->output);
+ if (icp->xirr_owner) {
+ ics_reject(icp->xirr_owner, old_xisr);
+ icp->xirr_owner = NULL;
}
}
} else {
- if (!XISR(ss)) {
- icp_resend(ss);
+ if (!XISR(icp)) {
+ icp_resend(icp);
}
}
}
-void icp_set_mfrr(ICPState *ss, uint8_t mfrr)
+void icp_set_mfrr(ICPState *icp, uint8_t mfrr)
{
- ss->mfrr = mfrr;
- if (mfrr < CPPR(ss)) {
- icp_check_ipi(ss);
+ icp->mfrr = mfrr;
+ if (mfrr < CPPR(icp)) {
+ icp_check_ipi(icp);
}
}
-uint32_t icp_accept(ICPState *ss)
+uint32_t icp_accept(ICPState *icp)
{
- uint32_t xirr = ss->xirr;
+ uint32_t xirr = icp->xirr;
- qemu_irq_lower(ss->output);
- ss->xirr = ss->pending_priority << 24;
- ss->pending_priority = 0xff;
- ss->xirr_owner = NULL;
+ qemu_irq_lower(icp->output);
+ icp->xirr = icp->pending_priority << 24;
+ icp->pending_priority = 0xff;
+ icp->xirr_owner = NULL;
- trace_xics_icp_accept(xirr, ss->xirr);
+ trace_xics_icp_accept(xirr, icp->xirr);
return xirr;
}
-uint32_t icp_ipoll(ICPState *ss, uint32_t *mfrr)
+uint32_t icp_ipoll(ICPState *icp, uint32_t *mfrr)
{
if (mfrr) {
- *mfrr = ss->mfrr;
+ *mfrr = icp->mfrr;
}
- return ss->xirr;
+ return icp->xirr;
}
-void icp_eoi(ICPState *ss, uint32_t xirr)
+void icp_eoi(ICPState *icp, uint32_t xirr)
{
+ XICSFabric *xi = icp->xics;
+ XICSFabricClass *xic = XICS_FABRIC_GET_CLASS(xi);
ICSState *ics;
uint32_t irq;
/* Send EOI -> ICS */
- ss->xirr = (ss->xirr & ~CPPR_MASK) | (xirr & CPPR_MASK);
- trace_xics_icp_eoi(ss->cs->cpu_index, xirr, ss->xirr);
+ icp->xirr = (icp->xirr & ~CPPR_MASK) | (xirr & CPPR_MASK);
+ trace_xics_icp_eoi(icp->cs->cpu_index, xirr, icp->xirr);
irq = xirr & XISR_MASK;
- QLIST_FOREACH(ics, &ss->xics->ics, list) {
- if (ics_valid_irq(ics, irq)) {
- ics_eoi(ics, irq);
- }
+
+ ics = xic->ics_get(xi, irq);
+ if (ics) {
+ ics_eoi(ics, irq);
}
- if (!XISR(ss)) {
- icp_resend(ss);
+ if (!XISR(icp)) {
+ icp_resend(icp);
}
}
static void icp_irq(ICSState *ics, int server, int nr, uint8_t priority)
{
- XICSState *xics = ics->xics;
- ICPState *ss = xics->ss + server;
+ ICPState *icp = xics_icp_get(ics->xics, server);
trace_xics_icp_irq(server, nr, priority);
- if ((priority >= CPPR(ss))
- || (XISR(ss) && (ss->pending_priority <= priority))) {
+ if ((priority >= CPPR(icp))
+ || (XISR(icp) && (icp->pending_priority <= priority))) {
ics_reject(ics, nr);
} else {
- if (XISR(ss) && ss->xirr_owner) {
- ics_reject(ss->xirr_owner, XISR(ss));
- ss->xirr_owner = NULL;
+ if (XISR(icp) && icp->xirr_owner) {
+ ics_reject(icp->xirr_owner, XISR(icp));
+ icp->xirr_owner = NULL;
}
- ss->xirr = (ss->xirr & ~XISR_MASK) | (nr & XISR_MASK);
- ss->xirr_owner = ics;
- ss->pending_priority = priority;
- trace_xics_icp_raise(ss->xirr, ss->pending_priority);
- qemu_irq_raise(ss->output);
+ icp->xirr = (icp->xirr & ~XISR_MASK) | (nr & XISR_MASK);
+ icp->xirr_owner = ics;
+ icp->pending_priority = priority;
+ trace_xics_icp_raise(icp->xirr, icp->pending_priority);
+ qemu_irq_raise(icp->output);
}
}
static void icp_dispatch_pre_save(void *opaque)
{
- ICPState *ss = opaque;
- ICPStateClass *info = ICP_GET_CLASS(ss);
+ ICPState *icp = opaque;
+ ICPStateClass *info = ICP_GET_CLASS(icp);
if (info->pre_save) {
- info->pre_save(ss);
+ info->pre_save(icp);
}
}
static int icp_dispatch_post_load(void *opaque, int version_id)
{
- ICPState *ss = opaque;
- ICPStateClass *info = ICP_GET_CLASS(ss);
+ ICPState *icp = opaque;
+ ICPStateClass *info = ICP_GET_CLASS(icp);
if (info->post_load) {
- return info->post_load(ss, version_id);
+ return info->post_load(icp, version_id);
}
return 0;
@@ -485,12 +345,30 @@ static void icp_reset(DeviceState *dev)
qemu_set_irq(icp->output, 0);
}
+static void icp_realize(DeviceState *dev, Error **errp)
+{
+ ICPState *icp = ICP(dev);
+ Object *obj;
+ Error *err = NULL;
+
+ obj = object_property_get_link(OBJECT(dev), "xics", &err);
+ if (!obj) {
+ error_setg(errp, "%s: required link 'xics' not found: %s",
+ __func__, error_get_pretty(err));
+ return;
+ }
+
+ icp->xics = XICS_FABRIC(obj);
+}
+
+
static void icp_class_init(ObjectClass *klass, void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
dc->reset = icp_reset;
dc->vmsd = &vmstate_icp_server;
+ dc->realize = icp_realize;
}
static const TypeInfo icp_info = {
@@ -663,17 +541,6 @@ static void ics_simple_reset(DeviceState *dev)
}
}
-static int ics_simple_post_load(ICSState *ics, int version_id)
-{
- int i;
-
- for (i = 0; i < ics->xics->nr_servers; i++) {
- icp_resend(&ics->xics->ss[i]);
- }
-
- return 0;
-}
-
static void ics_simple_dispatch_pre_save(void *opaque)
{
ICSState *ics = opaque;
@@ -746,15 +613,20 @@ static void ics_simple_realize(DeviceState *dev, Error **errp)
ics->qirqs = qemu_allocate_irqs(ics_simple_set_irq, ics, ics->nr_irqs);
}
+static Property ics_simple_properties[] = {
+ DEFINE_PROP_UINT32("nr-irqs", ICSState, nr_irqs, 0),
+ DEFINE_PROP_END_OF_LIST(),
+};
+
static void ics_simple_class_init(ObjectClass *klass, void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
ICSStateClass *isc = ICS_BASE_CLASS(klass);
- dc->realize = ics_simple_realize;
+ isc->realize = ics_simple_realize;
+ dc->props = ics_simple_properties;
dc->vmsd = &vmstate_ics_simple;
dc->reset = ics_simple_reset;
- isc->post_load = ics_simple_post_load;
isc->reject = ics_simple_reject;
isc->resend = ics_simple_resend;
isc->eoi = ics_simple_eoi;
@@ -769,38 +641,69 @@ static const TypeInfo ics_simple_info = {
.instance_init = ics_simple_initfn,
};
+static void ics_base_realize(DeviceState *dev, Error **errp)
+{
+ ICSStateClass *icsc = ICS_BASE_GET_CLASS(dev);
+ ICSState *ics = ICS_BASE(dev);
+ Object *obj;
+ Error *err = NULL;
+
+ obj = object_property_get_link(OBJECT(dev), "xics", &err);
+ if (!obj) {
+ error_setg(errp, "%s: required link 'xics' not found: %s",
+ __func__, error_get_pretty(err));
+ return;
+ }
+ ics->xics = XICS_FABRIC(obj);
+
+
+ if (icsc->realize) {
+ icsc->realize(dev, errp);
+ }
+}
+
+static void ics_base_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+
+ dc->realize = ics_base_realize;
+}
+
static const TypeInfo ics_base_info = {
.name = TYPE_ICS_BASE,
.parent = TYPE_DEVICE,
.abstract = true,
.instance_size = sizeof(ICSState),
+ .class_init = ics_base_class_init,
.class_size = sizeof(ICSStateClass),
};
+static const TypeInfo xics_fabric_info = {
+ .name = TYPE_XICS_FABRIC,
+ .parent = TYPE_INTERFACE,
+ .class_size = sizeof(XICSFabricClass),
+};
+
/*
* Exported functions
*/
-ICSState *xics_find_source(XICSState *xics, int irq)
+qemu_irq xics_get_qirq(XICSFabric *xi, int irq)
{
- ICSState *ics;
+ XICSFabricClass *xic = XICS_FABRIC_GET_CLASS(xi);
+ ICSState *ics = xic->ics_get(xi, irq);
- QLIST_FOREACH(ics, &xics->ics, list) {
- if (ics_valid_irq(ics, irq)) {
- return ics;
- }
+ if (ics) {
+ return ics->qirqs[irq - ics->offset];
}
+
return NULL;
}
-qemu_irq xics_get_qirq(XICSState *xics, int irq)
+ICPState *xics_icp_get(XICSFabric *xi, int server)
{
- ICSState *ics = xics_find_source(xics, irq);
+ XICSFabricClass *xic = XICS_FABRIC_GET_CLASS(xi);
- if (ics) {
- return ics->qirqs[irq - ics->offset];
- }
-
- return NULL;
+ return xic->icp_get(xi, server);
}
void ics_set_irq_type(ICSState *ics, int srcno, bool lsi)
@@ -813,10 +716,10 @@ void ics_set_irq_type(ICSState *ics, int srcno, bool lsi)
static void xics_register_types(void)
{
- type_register_static(&xics_common_info);
type_register_static(&ics_simple_info);
type_register_static(&ics_base_info);
type_register_static(&icp_info);
+ type_register_static(&xics_fabric_info);
}
type_init(xics_register_types)
diff --git a/hw/intc/xics_kvm.c b/hw/intc/xics_kvm.c
index 17694eaa87..0a3daca3bb 100644
--- a/hw/intc/xics_kvm.c
+++ b/hw/intc/xics_kvm.c
@@ -40,16 +40,12 @@
#include <sys/ioctl.h>
-typedef struct KVMXICSState {
- XICSState parent_obj;
-
- int kernel_xics_fd;
-} KVMXICSState;
+static int kernel_xics_fd = -1;
/*
* ICP-KVM
*/
-static void icp_get_kvm_state(ICPState *ss)
+static void icp_get_kvm_state(ICPState *icp)
{
uint64_t state;
struct kvm_one_reg reg = {
@@ -59,25 +55,25 @@ static void icp_get_kvm_state(ICPState *ss)
int ret;
/* ICP for this CPU thread is not in use, exiting */
- if (!ss->cs) {
+ if (!icp->cs) {
return;
}
- ret = kvm_vcpu_ioctl(ss->cs, KVM_GET_ONE_REG, &reg);
+ ret = kvm_vcpu_ioctl(icp->cs, KVM_GET_ONE_REG, &reg);
if (ret != 0) {
error_report("Unable to retrieve KVM interrupt controller state"
- " for CPU %ld: %s", kvm_arch_vcpu_id(ss->cs), strerror(errno));
+ " for CPU %ld: %s", kvm_arch_vcpu_id(icp->cs), strerror(errno));
exit(1);
}
- ss->xirr = state >> KVM_REG_PPC_ICP_XISR_SHIFT;
- ss->mfrr = (state >> KVM_REG_PPC_ICP_MFRR_SHIFT)
+ icp->xirr = state >> KVM_REG_PPC_ICP_XISR_SHIFT;
+ icp->mfrr = (state >> KVM_REG_PPC_ICP_MFRR_SHIFT)
& KVM_REG_PPC_ICP_MFRR_MASK;
- ss->pending_priority = (state >> KVM_REG_PPC_ICP_PPRI_SHIFT)
+ icp->pending_priority = (state >> KVM_REG_PPC_ICP_PPRI_SHIFT)
& KVM_REG_PPC_ICP_PPRI_MASK;
}
-static int icp_set_kvm_state(ICPState *ss, int version_id)
+static int icp_set_kvm_state(ICPState *icp, int version_id)
{
uint64_t state;
struct kvm_one_reg reg = {
@@ -87,18 +83,18 @@ static int icp_set_kvm_state(ICPState *ss, int version_id)
int ret;
/* ICP for this CPU thread is not in use, exiting */
- if (!ss->cs) {
+ if (!icp->cs) {
return 0;
}
- state = ((uint64_t)ss->xirr << KVM_REG_PPC_ICP_XISR_SHIFT)
- | ((uint64_t)ss->mfrr << KVM_REG_PPC_ICP_MFRR_SHIFT)
- | ((uint64_t)ss->pending_priority << KVM_REG_PPC_ICP_PPRI_SHIFT);
+ state = ((uint64_t)icp->xirr << KVM_REG_PPC_ICP_XISR_SHIFT)
+ | ((uint64_t)icp->mfrr << KVM_REG_PPC_ICP_MFRR_SHIFT)
+ | ((uint64_t)icp->pending_priority << KVM_REG_PPC_ICP_PPRI_SHIFT);
- ret = kvm_vcpu_ioctl(ss->cs, KVM_SET_ONE_REG, &reg);
+ ret = kvm_vcpu_ioctl(icp->cs, KVM_SET_ONE_REG, &reg);
if (ret != 0) {
error_report("Unable to restore KVM interrupt controller state (0x%"
- PRIx64 ") for CPU %ld: %s", state, kvm_arch_vcpu_id(ss->cs),
+ PRIx64 ") for CPU %ld: %s", state, kvm_arch_vcpu_id(icp->cs),
strerror(errno));
return ret;
}
@@ -122,6 +118,34 @@ static void icp_kvm_reset(DeviceState *dev)
icp_set_kvm_state(icp, 1);
}
+static void icp_kvm_cpu_setup(ICPState *icp, PowerPCCPU *cpu)
+{
+ CPUState *cs = CPU(cpu);
+ int ret;
+
+ if (kernel_xics_fd == -1) {
+ abort();
+ }
+
+ /*
+ * If we are reusing a parked vCPU fd corresponding to the CPU
+ * which was hot-removed earlier we don't have to renable
+ * KVM_CAP_IRQ_XICS capability again.
+ */
+ if (icp->cap_irq_xics_enabled) {
+ return;
+ }
+
+ ret = kvm_vcpu_enable_cap(cs, KVM_CAP_IRQ_XICS, 0, kernel_xics_fd,
+ kvm_arch_vcpu_id(cs));
+ if (ret < 0) {
+ error_report("Unable to connect CPU%ld to kernel XICS: %s",
+ kvm_arch_vcpu_id(cs), strerror(errno));
+ exit(1);
+ }
+ icp->cap_irq_xics_enabled = true;
+}
+
static void icp_kvm_class_init(ObjectClass *klass, void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
@@ -130,6 +154,7 @@ static void icp_kvm_class_init(ObjectClass *klass, void *data)
dc->reset = icp_kvm_reset;
icpc->pre_save = icp_get_kvm_state;
icpc->post_load = icp_set_kvm_state;
+ icpc->cpu_setup = icp_kvm_cpu_setup;
}
static const TypeInfo icp_kvm_info = {
@@ -145,7 +170,6 @@ static const TypeInfo icp_kvm_info = {
*/
static void ics_get_kvm_state(ICSState *ics)
{
- KVMXICSState *xicskvm = XICS_SPAPR_KVM(ics->xics);
uint64_t state;
struct kvm_device_attr attr = {
.flags = 0,
@@ -160,7 +184,7 @@ static void ics_get_kvm_state(ICSState *ics)
attr.attr = i + ics->offset;
- ret = ioctl(xicskvm->kernel_xics_fd, KVM_GET_DEVICE_ATTR, &attr);
+ ret = ioctl(kernel_xics_fd, KVM_GET_DEVICE_ATTR, &attr);
if (ret != 0) {
error_report("Unable to retrieve KVM interrupt controller state"
" for IRQ %d: %s", i + ics->offset, strerror(errno));
@@ -204,7 +228,6 @@ static void ics_get_kvm_state(ICSState *ics)
static int ics_set_kvm_state(ICSState *ics, int version_id)
{
- KVMXICSState *xicskvm = XICS_SPAPR_KVM(ics->xics);
uint64_t state;
struct kvm_device_attr attr = {
.flags = 0,
@@ -238,7 +261,7 @@ static int ics_set_kvm_state(ICSState *ics, int version_id)
}
}
- ret = ioctl(xicskvm->kernel_xics_fd, KVM_SET_DEVICE_ATTR, &attr);
+ ret = ioctl(kernel_xics_fd, KVM_SET_DEVICE_ATTR, &attr);
if (ret != 0) {
error_report("Unable to restore KVM interrupt controller state"
" for IRQs %d: %s", i + ics->offset, strerror(errno));
@@ -308,7 +331,7 @@ static void ics_kvm_class_init(ObjectClass *klass, void *data)
DeviceClass *dc = DEVICE_CLASS(klass);
ICSStateClass *icsc = ICS_BASE_CLASS(klass);
- dc->realize = ics_kvm_realize;
+ icsc->realize = ics_kvm_realize;
dc->reset = ics_kvm_reset;
icsc->pre_save = ics_get_kvm_state;
icsc->post_load = ics_set_kvm_state;
@@ -324,57 +347,6 @@ static const TypeInfo ics_kvm_info = {
/*
* XICS-KVM
*/
-static void xics_kvm_cpu_setup(XICSState *xics, PowerPCCPU *cpu)
-{
- CPUState *cs;
- ICPState *ss;
- KVMXICSState *xicskvm = XICS_SPAPR_KVM(xics);
- int ret;
-
- cs = CPU(cpu);
- ss = &xics->ss[cs->cpu_index];
-
- assert(cs->cpu_index < xics->nr_servers);
- if (xicskvm->kernel_xics_fd == -1) {
- abort();
- }
-
- /*
- * If we are reusing a parked vCPU fd corresponding to the CPU
- * which was hot-removed earlier we don't have to renable
- * KVM_CAP_IRQ_XICS capability again.
- */
- if (ss->cap_irq_xics_enabled) {
- return;
- }
-
- ret = kvm_vcpu_enable_cap(cs, KVM_CAP_IRQ_XICS, 0, xicskvm->kernel_xics_fd,
- kvm_arch_vcpu_id(cs));
- if (ret < 0) {
- error_report("Unable to connect CPU%ld to kernel XICS: %s",
- kvm_arch_vcpu_id(cs), strerror(errno));
- exit(1);
- }
- ss->cap_irq_xics_enabled = true;
-}
-
-static void xics_kvm_set_nr_irqs(XICSState *xics, uint32_t nr_irqs,
- Error **errp)
-{
- ICSState *ics = QLIST_FIRST(&xics->ics);
-
- /* This needs to be deprecated ... */
- xics->nr_irqs = nr_irqs;
- if (ics) {
- ics->nr_irqs = nr_irqs;
- }
-}
-
-static void xics_kvm_set_nr_servers(XICSState *xics, uint32_t nr_servers,
- Error **errp)
-{
- xics_set_nr_servers(xics, nr_servers, TYPE_KVM_ICP, errp);
-}
static void rtas_dummy(PowerPCCPU *cpu, sPAPRMachineState *spapr,
uint32_t token,
@@ -385,13 +357,9 @@ static void rtas_dummy(PowerPCCPU *cpu, sPAPRMachineState *spapr,
__func__);
}
-static void xics_kvm_realize(DeviceState *dev, Error **errp)
+int xics_kvm_init(sPAPRMachineState *spapr, Error **errp)
{
- KVMXICSState *xicskvm = XICS_SPAPR_KVM(dev);
- XICSState *xics = XICS_COMMON(dev);
- ICSState *ics;
- int i, rc;
- Error *error = NULL;
+ int rc;
struct kvm_create_device xics_create_device = {
.type = KVM_DEV_TYPE_XICS,
.flags = 0,
@@ -439,72 +407,24 @@ static void xics_kvm_realize(DeviceState *dev, Error **errp)
goto fail;
}
- xicskvm->kernel_xics_fd = xics_create_device.fd;
-
- QLIST_FOREACH(ics, &xics->ics, list) {
- object_property_set_bool(OBJECT(ics), true, "realized", &error);
- if (error) {
- error_propagate(errp, error);
- goto fail;
- }
- }
-
- assert(xics->nr_servers);
- for (i = 0; i < xics->nr_servers; i++) {
- object_property_set_bool(OBJECT(&xics->ss[i]), true, "realized",
- &error);
- if (error) {
- error_propagate(errp, error);
- goto fail;
- }
- }
+ kernel_xics_fd = xics_create_device.fd;
kvm_kernel_irqchip = true;
kvm_msi_via_irqfd_allowed = true;
kvm_gsi_direct_mapping = true;
- return;
+ return rc;
fail:
kvmppc_define_rtas_kernel_token(0, "ibm,set-xive");
kvmppc_define_rtas_kernel_token(0, "ibm,get-xive");
kvmppc_define_rtas_kernel_token(0, "ibm,int-on");
kvmppc_define_rtas_kernel_token(0, "ibm,int-off");
+ return -1;
}
-static void xics_kvm_initfn(Object *obj)
-{
- XICSState *xics = XICS_COMMON(obj);
- ICSState *ics;
-
- ics = ICS_SIMPLE(object_new(TYPE_ICS_KVM));
- object_property_add_child(obj, "ics", OBJECT(ics), NULL);
- ics->xics = xics;
- QLIST_INSERT_HEAD(&xics->ics, ics, list);
-}
-
-static void xics_kvm_class_init(ObjectClass *oc, void *data)
-{
- DeviceClass *dc = DEVICE_CLASS(oc);
- XICSStateClass *xsc = XICS_COMMON_CLASS(oc);
-
- dc->realize = xics_kvm_realize;
- xsc->cpu_setup = xics_kvm_cpu_setup;
- xsc->set_nr_irqs = xics_kvm_set_nr_irqs;
- xsc->set_nr_servers = xics_kvm_set_nr_servers;
-}
-
-static const TypeInfo xics_spapr_kvm_info = {
- .name = TYPE_XICS_SPAPR_KVM,
- .parent = TYPE_XICS_COMMON,
- .instance_size = sizeof(KVMXICSState),
- .class_init = xics_kvm_class_init,
- .instance_init = xics_kvm_initfn,
-};
-
static void xics_kvm_register_types(void)
{
- type_register_static(&xics_spapr_kvm_info);
type_register_static(&ics_kvm_info);
type_register_static(&icp_kvm_info);
}
diff --git a/hw/intc/xics_spapr.c b/hw/intc/xics_spapr.c
index 2e3f1c5e95..84d24b2837 100644
--- a/hw/intc/xics_spapr.c
+++ b/hw/intc/xics_spapr.c
@@ -44,7 +44,7 @@ static target_ulong h_cppr(PowerPCCPU *cpu, sPAPRMachineState *spapr,
target_ulong opcode, target_ulong *args)
{
CPUState *cs = CPU(cpu);
- ICPState *icp = &spapr->xics->ss[cs->cpu_index];
+ ICPState *icp = xics_icp_get(XICS_FABRIC(spapr), cs->cpu_index);
target_ulong cppr = args[0];
icp_set_cppr(icp, cppr);
@@ -56,12 +56,13 @@ static target_ulong h_ipi(PowerPCCPU *cpu, sPAPRMachineState *spapr,
{
target_ulong server = xics_get_cpu_index_by_dt_id(args[0]);
target_ulong mfrr = args[1];
+ ICPState *icp = xics_icp_get(XICS_FABRIC(spapr), server);
- if (server >= spapr->xics->nr_servers) {
+ if (!icp) {
return H_PARAMETER;
}
- icp_set_mfrr(spapr->xics->ss + server, mfrr);
+ icp_set_mfrr(icp, mfrr);
return H_SUCCESS;
}
@@ -69,7 +70,7 @@ static target_ulong h_xirr(PowerPCCPU *cpu, sPAPRMachineState *spapr,
target_ulong opcode, target_ulong *args)
{
CPUState *cs = CPU(cpu);
- ICPState *icp = &spapr->xics->ss[cs->cpu_index];
+ ICPState *icp = xics_icp_get(XICS_FABRIC(spapr), cs->cpu_index);
uint32_t xirr = icp_accept(icp);
args[0] = xirr;
@@ -80,7 +81,7 @@ static target_ulong h_xirr_x(PowerPCCPU *cpu, sPAPRMachineState *spapr,
target_ulong opcode, target_ulong *args)
{
CPUState *cs = CPU(cpu);
- ICPState *icp = &spapr->xics->ss[cs->cpu_index];
+ ICPState *icp = xics_icp_get(XICS_FABRIC(spapr), cs->cpu_index);
uint32_t xirr = icp_accept(icp);
args[0] = xirr;
@@ -92,7 +93,7 @@ static target_ulong h_eoi(PowerPCCPU *cpu, sPAPRMachineState *spapr,
target_ulong opcode, target_ulong *args)
{
CPUState *cs = CPU(cpu);
- ICPState *icp = &spapr->xics->ss[cs->cpu_index];
+ ICPState *icp = xics_icp_get(XICS_FABRIC(spapr), cs->cpu_index);
target_ulong xirr = args[0];
icp_eoi(icp, xirr);
@@ -103,7 +104,7 @@ static target_ulong h_ipoll(PowerPCCPU *cpu, sPAPRMachineState *spapr,
target_ulong opcode, target_ulong *args)
{
CPUState *cs = CPU(cpu);
- ICPState *icp = &spapr->xics->ss[cs->cpu_index];
+ ICPState *icp = xics_icp_get(XICS_FABRIC(spapr), cs->cpu_index);
uint32_t mfrr;
uint32_t xirr = icp_ipoll(icp, &mfrr);
@@ -118,7 +119,7 @@ static void rtas_set_xive(PowerPCCPU *cpu, sPAPRMachineState *spapr,
uint32_t nargs, target_ulong args,
uint32_t nret, target_ulong rets)
{
- ICSState *ics = QLIST_FIRST(&spapr->xics->ics);
+ ICSState *ics = spapr->ics;
uint32_t nr, srcno, server, priority;
if ((nargs != 3) || (nret != 1)) {
@@ -134,7 +135,7 @@ static void rtas_set_xive(PowerPCCPU *cpu, sPAPRMachineState *spapr,
server = xics_get_cpu_index_by_dt_id(rtas_ld(args, 1));
priority = rtas_ld(args, 2);
- if (!ics_valid_irq(ics, nr) || (server >= ics->xics->nr_servers)
+ if (!ics_valid_irq(ics, nr) || !xics_icp_get(XICS_FABRIC(spapr), server)
|| (priority > 0xff)) {
rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
return;
@@ -151,7 +152,7 @@ static void rtas_get_xive(PowerPCCPU *cpu, sPAPRMachineState *spapr,
uint32_t nargs, target_ulong args,
uint32_t nret, target_ulong rets)
{
- ICSState *ics = QLIST_FIRST(&spapr->xics->ics);
+ ICSState *ics = spapr->ics;
uint32_t nr, srcno;
if ((nargs != 1) || (nret != 3)) {
@@ -181,7 +182,7 @@ static void rtas_int_off(PowerPCCPU *cpu, sPAPRMachineState *spapr,
uint32_t nargs, target_ulong args,
uint32_t nret, target_ulong rets)
{
- ICSState *ics = QLIST_FIRST(&spapr->xics->ics);
+ ICSState *ics = spapr->ics;
uint32_t nr, srcno;
if ((nargs != 1) || (nret != 1)) {
@@ -212,7 +213,7 @@ static void rtas_int_on(PowerPCCPU *cpu, sPAPRMachineState *spapr,
uint32_t nargs, target_ulong args,
uint32_t nret, target_ulong rets)
{
- ICSState *ics = QLIST_FIRST(&spapr->xics->ics);
+ ICSState *ics = spapr->ics;
uint32_t nr, srcno;
if ((nargs != 1) || (nret != 1)) {
@@ -239,36 +240,8 @@ static void rtas_int_on(PowerPCCPU *cpu, sPAPRMachineState *spapr,
rtas_st(rets, 0, RTAS_OUT_SUCCESS);
}
-static void xics_spapr_set_nr_irqs(XICSState *xics, uint32_t nr_irqs,
- Error **errp)
+int xics_spapr_init(sPAPRMachineState *spapr, Error **errp)
{
- ICSState *ics = QLIST_FIRST(&xics->ics);
-
- /* This needs to be deprecated ... */
- xics->nr_irqs = nr_irqs;
- if (ics) {
- ics->nr_irqs = nr_irqs;
- }
-}
-
-static void xics_spapr_set_nr_servers(XICSState *xics, uint32_t nr_servers,
- Error **errp)
-{
- xics_set_nr_servers(xics, nr_servers, TYPE_ICP, errp);
-}
-
-static void xics_spapr_realize(DeviceState *dev, Error **errp)
-{
- XICSState *xics = XICS_SPAPR(dev);
- ICSState *ics;
- Error *error = NULL;
- int i;
-
- if (!xics->nr_servers) {
- error_setg(errp, "Number of servers needs to be greater 0");
- return;
- }
-
/* Registration of global state belongs into realize */
spapr_rtas_register(RTAS_IBM_SET_XIVE, "ibm,set-xive", rtas_set_xive);
spapr_rtas_register(RTAS_IBM_GET_XIVE, "ibm,get-xive", rtas_get_xive);
@@ -281,55 +254,9 @@ static void xics_spapr_realize(DeviceState *dev, Error **errp)
spapr_register_hypercall(H_XIRR_X, h_xirr_x);
spapr_register_hypercall(H_EOI, h_eoi);
spapr_register_hypercall(H_IPOLL, h_ipoll);
-
- QLIST_FOREACH(ics, &xics->ics, list) {
- object_property_set_bool(OBJECT(ics), true, "realized", &error);
- if (error) {
- error_propagate(errp, error);
- return;
- }
- }
-
- for (i = 0; i < xics->nr_servers; i++) {
- object_property_set_bool(OBJECT(&xics->ss[i]), true, "realized",
- &error);
- if (error) {
- error_propagate(errp, error);
- return;
- }
- }
-}
-
-static void xics_spapr_initfn(Object *obj)
-{
- XICSState *xics = XICS_SPAPR(obj);
- ICSState *ics;
-
- ics = ICS_SIMPLE(object_new(TYPE_ICS_SIMPLE));
- object_property_add_child(obj, "ics", OBJECT(ics), NULL);
- ics->xics = xics;
- QLIST_INSERT_HEAD(&xics->ics, ics, list);
-}
-
-static void xics_spapr_class_init(ObjectClass *oc, void *data)
-{
- DeviceClass *dc = DEVICE_CLASS(oc);
- XICSStateClass *xsc = XICS_SPAPR_CLASS(oc);
-
- dc->realize = xics_spapr_realize;
- xsc->set_nr_irqs = xics_spapr_set_nr_irqs;
- xsc->set_nr_servers = xics_spapr_set_nr_servers;
+ return 0;
}
-static const TypeInfo xics_spapr_info = {
- .name = TYPE_XICS_SPAPR,
- .parent = TYPE_XICS_COMMON,
- .instance_size = sizeof(XICSState),
- .class_size = sizeof(XICSStateClass),
- .class_init = xics_spapr_class_init,
- .instance_init = xics_spapr_initfn,
-};
-
#define ICS_IRQ_FREE(ics, srcno) \
(!((ics)->irqs[(srcno)].flags & (XICS_FLAGS_IRQ_MASK)))
@@ -354,9 +281,8 @@ static int ics_find_free_block(ICSState *ics, int num, int alignnum)
return -1;
}
-int xics_spapr_alloc(XICSState *xics, int irq_hint, bool lsi, Error **errp)
+int spapr_ics_alloc(ICSState *ics, int irq_hint, bool lsi, Error **errp)
{
- ICSState *ics = QLIST_FIRST(&xics->ics);
int irq;
if (!ics) {
@@ -387,10 +313,9 @@ int xics_spapr_alloc(XICSState *xics, int irq_hint, bool lsi, Error **errp)
* Allocate block of consecutive IRQs, and return the number of the first IRQ in
* the block. If align==true, aligns the first IRQ number to num.
*/
-int xics_spapr_alloc_block(XICSState *xics, int num, bool lsi, bool align,
- Error **errp)
+int spapr_ics_alloc_block(ICSState *ics, int num, bool lsi,
+ bool align, Error **errp)
{
- ICSState *ics = QLIST_FIRST(&xics->ics);
int i, first = -1;
if (!ics) {
@@ -440,20 +365,18 @@ static void ics_free(ICSState *ics, int srcno, int num)
}
}
-void xics_spapr_free(XICSState *xics, int irq, int num)
+void spapr_ics_free(ICSState *ics, int irq, int num)
{
- ICSState *ics = xics_find_source(xics, irq);
-
- if (ics) {
+ if (ics_valid_irq(ics, irq)) {
trace_xics_ics_free(0, irq, num);
ics_free(ics, irq - ics->offset, num);
}
}
-void spapr_dt_xics(XICSState *xics, void *fdt, uint32_t phandle)
+void spapr_dt_xics(int nr_servers, void *fdt, uint32_t phandle)
{
uint32_t interrupt_server_ranges_prop[] = {
- 0, cpu_to_be32(xics->nr_servers),
+ 0, cpu_to_be32(nr_servers),
};
int node;
@@ -470,10 +393,3 @@ void spapr_dt_xics(XICSState *xics, void *fdt, uint32_t phandle)
_FDT(fdt_setprop_cell(fdt, node, "linux,phandle", phandle));
_FDT(fdt_setprop_cell(fdt, node, "phandle", phandle));
}
-
-static void xics_spapr_register_types(void)
-{
- type_register_static(&xics_spapr_info);
-}
-
-type_init(xics_spapr_register_types)
diff --git a/hw/nvram/spapr_nvram.c b/hw/nvram/spapr_nvram.c
index 65ba188555..aa5d2c1f5f 100644
--- a/hw/nvram/spapr_nvram.c
+++ b/hw/nvram/spapr_nvram.c
@@ -141,9 +141,17 @@ static void rtas_nvram_store(PowerPCCPU *cpu, sPAPRMachineState *spapr,
static void spapr_nvram_realize(VIOsPAPRDevice *dev, Error **errp)
{
sPAPRNVRAM *nvram = VIO_SPAPR_NVRAM(dev);
+ int ret;
if (nvram->blk) {
nvram->size = blk_getlength(nvram->blk);
+
+ ret = blk_set_perm(nvram->blk,
+ BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE,
+ BLK_PERM_ALL, errp);
+ if (ret < 0) {
+ return;
+ }
} else {
nvram->size = DEFAULT_NVRAM_SIZE;
}
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index a563555e7d..273f1e4602 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -1530,6 +1530,34 @@ static const pci_class_desc pci_class_descriptions[] =
{ 0, NULL}
};
+static void pci_for_each_device_under_bus_reverse(PCIBus *bus,
+ void (*fn)(PCIBus *b,
+ PCIDevice *d,
+ void *opaque),
+ void *opaque)
+{
+ PCIDevice *d;
+ int devfn;
+
+ for (devfn = 0; devfn < ARRAY_SIZE(bus->devices); devfn++) {
+ d = bus->devices[ARRAY_SIZE(bus->devices) - 1 - devfn];
+ if (d) {
+ fn(bus, d, opaque);
+ }
+ }
+}
+
+void pci_for_each_device_reverse(PCIBus *bus, int bus_num,
+ void (*fn)(PCIBus *b, PCIDevice *d, void *opaque),
+ void *opaque)
+{
+ bus = pci_find_bus_nr(bus, bus_num);
+
+ if (bus) {
+ pci_for_each_device_under_bus_reverse(bus, fn, opaque);
+ }
+}
+
static void pci_for_each_device_under_bus(PCIBus *bus,
void (*fn)(PCIBus *b, PCIDevice *d,
void *opaque),
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 87d8366c44..81c6c1c27c 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -63,6 +63,7 @@
#include "qemu/error-report.h"
#include "trace.h"
#include "hw/nmi.h"
+#include "hw/intc/intc.h"
#include "hw/compat.h"
#include "qemu/cutils.h"
@@ -95,37 +96,68 @@
#define HTAB_SIZE(spapr) (1ULL << ((spapr)->htab_shift))
-static XICSState *try_create_xics(const char *type, int nr_servers,
- int nr_irqs, Error **errp)
+static int try_create_xics(sPAPRMachineState *spapr, const char *type_ics,
+ const char *type_icp, int nr_servers,
+ int nr_irqs, Error **errp)
{
- Error *err = NULL;
- DeviceState *dev;
+ XICSFabric *xi = XICS_FABRIC(spapr);
+ Error *err = NULL, *local_err = NULL;
+ ICSState *ics = NULL;
+ int i;
- dev = qdev_create(NULL, type);
- qdev_prop_set_uint32(dev, "nr_servers", nr_servers);
- qdev_prop_set_uint32(dev, "nr_irqs", nr_irqs);
- object_property_set_bool(OBJECT(dev), true, "realized", &err);
+ ics = ICS_SIMPLE(object_new(type_ics));
+ qdev_set_parent_bus(DEVICE(ics), sysbus_get_default());
+ object_property_add_child(OBJECT(spapr), "ics", OBJECT(ics), NULL);
+ object_property_set_int(OBJECT(ics), nr_irqs, "nr-irqs", &err);
+ object_property_add_const_link(OBJECT(ics), "xics", OBJECT(xi), NULL);
+ object_property_set_bool(OBJECT(ics), true, "realized", &local_err);
+ error_propagate(&err, local_err);
if (err) {
- error_propagate(errp, err);
- object_unparent(OBJECT(dev));
- return NULL;
+ goto error;
}
- return XICS_COMMON(dev);
+
+ spapr->icps = g_malloc0(nr_servers * sizeof(ICPState));
+ spapr->nr_servers = nr_servers;
+
+ for (i = 0; i < nr_servers; i++) {
+ ICPState *icp = &spapr->icps[i];
+
+ object_initialize(icp, sizeof(*icp), type_icp);
+ qdev_set_parent_bus(DEVICE(icp), sysbus_get_default());
+ object_property_add_child(OBJECT(spapr), "icp[*]", OBJECT(icp), NULL);
+ object_property_add_const_link(OBJECT(icp), "xics", OBJECT(xi), NULL);
+ object_property_set_bool(OBJECT(icp), true, "realized", &err);
+ if (err) {
+ goto error;
+ }
+ object_unref(OBJECT(icp));
+ }
+
+ spapr->ics = ics;
+ return 0;
+
+error:
+ error_propagate(errp, err);
+ if (ics) {
+ object_unparent(OBJECT(ics));
+ }
+ return -1;
}
-static XICSState *xics_system_init(MachineState *machine,
- int nr_servers, int nr_irqs, Error **errp)
+static int xics_system_init(MachineState *machine,
+ int nr_servers, int nr_irqs, Error **errp)
{
- XICSState *xics = NULL;
+ int rc = -1;
if (kvm_enabled()) {
Error *err = NULL;
- if (machine_kernel_irqchip_allowed(machine)) {
- xics = try_create_xics(TYPE_XICS_SPAPR_KVM, nr_servers, nr_irqs,
- &err);
+ if (machine_kernel_irqchip_allowed(machine) &&
+ !xics_kvm_init(SPAPR_MACHINE(machine), errp)) {
+ rc = try_create_xics(SPAPR_MACHINE(machine), TYPE_ICS_KVM,
+ TYPE_KVM_ICP, nr_servers, nr_irqs, &err);
}
- if (machine_kernel_irqchip_required(machine) && !xics) {
+ if (machine_kernel_irqchip_required(machine) && rc < 0) {
error_reportf_err(err,
"kernel_irqchip requested but unavailable: ");
} else {
@@ -133,11 +165,13 @@ static XICSState *xics_system_init(MachineState *machine,
}
}
- if (!xics) {
- xics = try_create_xics(TYPE_XICS_SPAPR, nr_servers, nr_irqs, errp);
+ if (rc < 0) {
+ xics_spapr_init(SPAPR_MACHINE(machine), errp);
+ rc = try_create_xics(SPAPR_MACHINE(machine), TYPE_ICS_SIMPLE,
+ TYPE_ICP, nr_servers, nr_irqs, errp);
}
- return xics;
+ return rc;
}
static int spapr_fixup_cpu_smt_dt(void *fdt, int offset, PowerPCCPU *cpu,
@@ -924,7 +958,7 @@ static void *spapr_build_fdt(sPAPRMachineState *spapr,
_FDT(fdt_setprop_cell(fdt, 0, "#size-cells", 2));
/* /interrupt controller */
- spapr_dt_xics(spapr->xics, fdt, PHANDLE_XICP);
+ spapr_dt_xics(spapr->nr_servers, fdt, PHANDLE_XICP);
ret = spapr_populate_memory(spapr, fdt);
if (ret < 0) {
@@ -1053,6 +1087,62 @@ static void close_htab_fd(sPAPRMachineState *spapr)
spapr->htab_fd = -1;
}
+static hwaddr spapr_hpt_mask(PPCVirtualHypervisor *vhyp)
+{
+ sPAPRMachineState *spapr = SPAPR_MACHINE(vhyp);
+
+ return HTAB_SIZE(spapr) / HASH_PTEG_SIZE_64 - 1;
+}
+
+static const ppc_hash_pte64_t *spapr_map_hptes(PPCVirtualHypervisor *vhyp,
+ hwaddr ptex, int n)
+{
+ sPAPRMachineState *spapr = SPAPR_MACHINE(vhyp);
+ hwaddr pte_offset = ptex * HASH_PTE_SIZE_64;
+
+ if (!spapr->htab) {
+ /*
+ * HTAB is controlled by KVM. Fetch into temporary buffer
+ */
+ ppc_hash_pte64_t *hptes = g_malloc(n * HASH_PTE_SIZE_64);
+ kvmppc_read_hptes(hptes, ptex, n);
+ return hptes;
+ }
+
+ /*
+ * HTAB is controlled by QEMU. Just point to the internally
+ * accessible PTEG.
+ */
+ return (const ppc_hash_pte64_t *)(spapr->htab + pte_offset);
+}
+
+static void spapr_unmap_hptes(PPCVirtualHypervisor *vhyp,
+ const ppc_hash_pte64_t *hptes,
+ hwaddr ptex, int n)
+{
+ sPAPRMachineState *spapr = SPAPR_MACHINE(vhyp);
+
+ if (!spapr->htab) {
+ g_free((void *)hptes);
+ }
+
+ /* Nothing to do for qemu managed HPT */
+}
+
+static void spapr_store_hpte(PPCVirtualHypervisor *vhyp, hwaddr ptex,
+ uint64_t pte0, uint64_t pte1)
+{
+ sPAPRMachineState *spapr = SPAPR_MACHINE(vhyp);
+ hwaddr offset = ptex * HASH_PTE_SIZE_64;
+
+ if (!spapr->htab) {
+ kvmppc_write_hpte(ptex, pte0, pte1);
+ } else {
+ stq_p(spapr->htab + offset, pte0);
+ stq_p(spapr->htab + offset + HASH_PTE_SIZE_64 / 2, pte1);
+ }
+}
+
static int spapr_hpt_shift_for_ramsize(uint64_t ramsize)
{
int shift;
@@ -1252,6 +1342,13 @@ static int spapr_post_load(void *opaque, int version_id)
sPAPRMachineState *spapr = (sPAPRMachineState *)opaque;
int err = 0;
+ if (!object_dynamic_cast(OBJECT(spapr->ics), TYPE_ICS_KVM)) {
+ int i;
+ for (i = 0; i < spapr->nr_servers; i++) {
+ icp_resend(&spapr->icps[i]);
+ }
+ }
+
/* In earlier versions, there was no separate qdev for the PAPR
* RTC, so the RTC offset was stored directly in sPAPREnvironment.
* So when migrating from those versions, poke the incoming offset
@@ -1902,9 +1999,8 @@ static void ppc_spapr_init(MachineState *machine)
load_limit = MIN(spapr->rma_size, RTAS_MAX_ADDR) - FW_OVERHEAD;
/* Set up Interrupt Controller before we create the VCPUs */
- spapr->xics = xics_system_init(machine,
- DIV_ROUND_UP(max_cpus * smt, smp_threads),
- XICS_IRQS_SPAPR, &error_fatal);
+ xics_system_init(machine, DIV_ROUND_UP(max_cpus * smt, smp_threads),
+ XICS_IRQS_SPAPR, &error_fatal);
/* Set up containers for ibm,client-set-architecture negotiated options */
spapr->ov5 = spapr_ovec_new();
@@ -2872,6 +2968,40 @@ static void spapr_phb_placement(sPAPRMachineState *spapr, uint32_t index,
*mmio64 = SPAPR_PCI_BASE + (index + 1) * SPAPR_PCI_MEM64_WIN_SIZE;
}
+static ICSState *spapr_ics_get(XICSFabric *dev, int irq)
+{
+ sPAPRMachineState *spapr = SPAPR_MACHINE(dev);
+
+ return ics_valid_irq(spapr->ics, irq) ? spapr->ics : NULL;
+}
+
+static void spapr_ics_resend(XICSFabric *dev)
+{
+ sPAPRMachineState *spapr = SPAPR_MACHINE(dev);
+
+ ics_resend(spapr->ics);
+}
+
+static ICPState *spapr_icp_get(XICSFabric *xi, int server)
+{
+ sPAPRMachineState *spapr = SPAPR_MACHINE(xi);
+
+ return (server < spapr->nr_servers) ? &spapr->icps[server] : NULL;
+}
+
+static void spapr_pic_print_info(InterruptStatsProvider *obj,
+ Monitor *mon)
+{
+ sPAPRMachineState *spapr = SPAPR_MACHINE(obj);
+ int i;
+
+ for (i = 0; i < spapr->nr_servers; i++) {
+ icp_pic_print_info(&spapr->icps[i], mon);
+ }
+
+ ics_pic_print_info(spapr->ics, mon);
+}
+
static void spapr_machine_class_init(ObjectClass *oc, void *data)
{
MachineClass *mc = MACHINE_CLASS(oc);
@@ -2880,6 +3010,8 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
NMIClass *nc = NMI_CLASS(oc);
HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc);
PPCVirtualHypervisorClass *vhc = PPC_VIRTUAL_HYPERVISOR_CLASS(oc);
+ XICSFabricClass *xic = XICS_FABRIC_CLASS(oc);
+ InterruptStatsProviderClass *ispc = INTERRUPT_STATS_PROVIDER_CLASS(oc);
mc->desc = "pSeries Logical Partition (PAPR compliant)";
@@ -2891,7 +3023,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
mc->init = ppc_spapr_init;
mc->reset = ppc_spapr_reset;
mc->block_default_type = IF_SCSI;
- mc->max_cpus = 255;
+ mc->max_cpus = 1024;
mc->no_parallel = 1;
mc->default_boot_order = "";
mc->default_ram_size = 512 * M_BYTE;
@@ -2913,6 +3045,14 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
nc->nmi_monitor_handler = spapr_nmi;
smc->phb_placement = spapr_phb_placement;
vhc->hypercall = emulate_spapr_hypercall;
+ vhc->hpt_mask = spapr_hpt_mask;
+ vhc->map_hptes = spapr_map_hptes;
+ vhc->unmap_hptes = spapr_unmap_hptes;
+ vhc->store_hpte = spapr_store_hpte;
+ xic->ics_get = spapr_ics_get;
+ xic->ics_resend = spapr_ics_resend;
+ xic->icp_get = spapr_icp_get;
+ ispc->print_info = spapr_pic_print_info;
}
static const TypeInfo spapr_machine_info = {
@@ -2929,6 +3069,8 @@ static const TypeInfo spapr_machine_info = {
{ TYPE_NMI },
{ TYPE_HOTPLUG_HANDLER },
{ TYPE_PPC_VIRTUAL_HYPERVISOR },
+ { TYPE_XICS_FABRIC },
+ { TYPE_INTERRUPT_STATS_PROVIDER },
{ }
},
};
diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c
index 55cd0456eb..90d682fe33 100644
--- a/hw/ppc/spapr_cpu_core.c
+++ b/hw/ppc/spapr_cpu_core.c
@@ -13,10 +13,12 @@
#include "hw/boards.h"
#include "qapi/error.h"
#include "sysemu/cpus.h"
+#include "sysemu/kvm.h"
#include "target/ppc/kvm_ppc.h"
#include "hw/ppc/ppc.h"
#include "target/ppc/mmu-hash64.h"
#include "sysemu/numa.h"
+#include "qemu/error-report.h"
static void spapr_cpu_reset(void *opaque)
{
@@ -34,15 +36,26 @@ static void spapr_cpu_reset(void *opaque)
env->spr[SPR_HIOR] = 0;
- ppc_hash64_set_external_hpt(cpu, spapr->htab, spapr->htab_shift,
- &error_fatal);
+ /*
+ * This is a hack for the benefit of KVM PR - it abuses the SDR1
+ * slot in kvm_sregs to communicate the userspace address of the
+ * HPT
+ */
+ if (kvm_enabled()) {
+ env->spr[SPR_SDR1] = (target_ulong)(uintptr_t)spapr->htab
+ | (spapr->htab_shift - 18);
+ if (kvmppc_put_books_sregs(cpu) < 0) {
+ error_report("Unable to update SDR1 in KVM");
+ exit(1);
+ }
+ }
}
static void spapr_cpu_destroy(PowerPCCPU *cpu)
{
sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
- xics_cpu_destroy(spapr->xics, cpu);
+ xics_cpu_destroy(XICS_FABRIC(spapr), cpu);
qemu_unregister_reset(spapr_cpu_reset, cpu);
}
@@ -57,8 +70,7 @@ static void spapr_cpu_init(sPAPRMachineState *spapr, PowerPCCPU *cpu,
cpu_ppc_tb_init(env, SPAPR_TIMEBASE_FREQ);
/* Enable PAPR mode in TCG or KVM */
- cpu_ppc_set_vhyp(cpu, PPC_VIRTUAL_HYPERVISOR(spapr));
- cpu_ppc_set_papr(cpu);
+ cpu_ppc_set_papr(cpu, PPC_VIRTUAL_HYPERVISOR(spapr));
if (cpu->max_compat) {
Error *local_err = NULL;
@@ -76,7 +88,7 @@ static void spapr_cpu_init(sPAPRMachineState *spapr, PowerPCCPU *cpu,
cs->numa_node = i;
}
- xics_cpu_setup(spapr->xics, cpu);
+ xics_cpu_setup(XICS_FABRIC(spapr), cpu);
qemu_register_reset(spapr_cpu_reset, cpu);
spapr_cpu_reset(cpu);
diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
index f85a9c32a7..24a5758e62 100644
--- a/hw/ppc/spapr_events.c
+++ b/hw/ppc/spapr_events.c
@@ -481,7 +481,7 @@ static void spapr_powerdown_req(Notifier *n, void *opaque)
rtas_event_log_queue(RTAS_LOG_TYPE_EPOW, new_epow, true);
- qemu_irq_pulse(xics_get_qirq(spapr->xics,
+ qemu_irq_pulse(xics_get_qirq(XICS_FABRIC(spapr),
rtas_event_log_to_irq(spapr,
RTAS_LOG_TYPE_EPOW)));
}
@@ -574,7 +574,7 @@ static void spapr_hotplug_req_event(uint8_t hp_id, uint8_t hp_action,
rtas_event_log_queue(RTAS_LOG_TYPE_HOTPLUG, new_hp, true);
- qemu_irq_pulse(xics_get_qirq(spapr->xics,
+ qemu_irq_pulse(xics_get_qirq(XICS_FABRIC(spapr),
rtas_event_log_to_irq(spapr,
RTAS_LOG_TYPE_HOTPLUG)));
}
@@ -695,7 +695,7 @@ static void check_exception(PowerPCCPU *cpu, sPAPRMachineState *spapr,
spapr_event_sources_get_source(spapr->event_sources, i);
g_assert(source->enabled);
- qemu_irq_pulse(xics_get_qirq(spapr->xics, source->irq));
+ qemu_irq_pulse(xics_get_qirq(XICS_FABRIC(spapr), source->irq));
}
}
@@ -752,7 +752,7 @@ void spapr_events_init(sPAPRMachineState *spapr)
spapr->event_sources = spapr_event_sources_new();
spapr_event_sources_register(spapr->event_sources, EVENT_CLASS_EPOW,
- xics_spapr_alloc(spapr->xics, 0, false,
+ spapr_ics_alloc(spapr->ics, 0, false,
&error_fatal));
/* NOTE: if machine supports modern/dedicated hotplug event source,
@@ -765,7 +765,7 @@ void spapr_events_init(sPAPRMachineState *spapr)
*/
if (spapr->use_hotplug_event_source) {
spapr_event_sources_register(spapr->event_sources, EVENT_CLASS_HOT_PLUG,
- xics_spapr_alloc(spapr->xics, 0, false,
+ spapr_ics_alloc(spapr->ics, 0, false,
&error_fatal));
}
diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
index 42d20e0b92..f05a90ed2c 100644
--- a/hw/ppc/spapr_hcall.c
+++ b/hw/ppc/spapr_hcall.c
@@ -47,12 +47,12 @@ static bool has_spr(PowerPCCPU *cpu, int spr)
return cpu->env.spr_cb[spr].name != NULL;
}
-static inline bool valid_pte_index(CPUPPCState *env, target_ulong pte_index)
+static inline bool valid_ptex(PowerPCCPU *cpu, target_ulong ptex)
{
/*
- * hash value/pteg group index is normalized by htab_mask
+ * hash value/pteg group index is normalized by HPT mask
*/
- if (((pte_index & ~7ULL) / HPTES_PER_GROUP) & ~env->htab_mask) {
+ if (((ptex & ~7ULL) / HPTES_PER_GROUP) & ~ppc_hash64_hpt_mask(cpu)) {
return false;
}
return true;
@@ -77,15 +77,14 @@ static bool is_ram_address(sPAPRMachineState *spapr, hwaddr addr)
static target_ulong h_enter(PowerPCCPU *cpu, sPAPRMachineState *spapr,
target_ulong opcode, target_ulong *args)
{
- CPUPPCState *env = &cpu->env;
target_ulong flags = args[0];
- target_ulong pte_index = args[1];
+ target_ulong ptex = args[1];
target_ulong pteh = args[2];
target_ulong ptel = args[3];
unsigned apshift;
target_ulong raddr;
- target_ulong index;
- uint64_t token;
+ target_ulong slot;
+ const ppc_hash_pte64_t *hptes;
apshift = ppc_hash64_hpte_page_shift_noslb(cpu, pteh, ptel);
if (!apshift) {
@@ -116,36 +115,36 @@ static target_ulong h_enter(PowerPCCPU *cpu, sPAPRMachineState *spapr,
pteh &= ~0x60ULL;
- if (!valid_pte_index(env, pte_index)) {
+ if (!valid_ptex(cpu, ptex)) {
return H_PARAMETER;
}
- index = 0;
+ slot = ptex & 7ULL;
+ ptex = ptex & ~7ULL;
+
if (likely((flags & H_EXACT) == 0)) {
- pte_index &= ~7ULL;
- token = ppc_hash64_start_access(cpu, pte_index);
- for (; index < 8; index++) {
- if (!(ppc_hash64_load_hpte0(cpu, token, index) & HPTE64_V_VALID)) {
+ hptes = ppc_hash64_map_hptes(cpu, ptex, HPTES_PER_GROUP);
+ for (slot = 0; slot < 8; slot++) {
+ if (!(ppc_hash64_hpte0(cpu, hptes, slot) & HPTE64_V_VALID)) {
break;
}
}
- ppc_hash64_stop_access(cpu, token);
- if (index == 8) {
+ ppc_hash64_unmap_hptes(cpu, hptes, ptex, HPTES_PER_GROUP);
+ if (slot == 8) {
return H_PTEG_FULL;
}
} else {
- token = ppc_hash64_start_access(cpu, pte_index);
- if (ppc_hash64_load_hpte0(cpu, token, 0) & HPTE64_V_VALID) {
- ppc_hash64_stop_access(cpu, token);
+ hptes = ppc_hash64_map_hptes(cpu, ptex + slot, 1);
+ if (ppc_hash64_hpte0(cpu, hptes, 0) & HPTE64_V_VALID) {
+ ppc_hash64_unmap_hptes(cpu, hptes, ptex + slot, 1);
return H_PTEG_FULL;
}
- ppc_hash64_stop_access(cpu, token);
+ ppc_hash64_unmap_hptes(cpu, hptes, ptex, 1);
}
- ppc_hash64_store_hpte(cpu, pte_index + index,
- pteh | HPTE64_V_HPTE_DIRTY, ptel);
+ ppc_hash64_store_hpte(cpu, ptex + slot, pteh | HPTE64_V_HPTE_DIRTY, ptel);
- args[0] = pte_index + index;
+ args[0] = ptex + slot;
return H_SUCCESS;
}
@@ -161,18 +160,17 @@ static RemoveResult remove_hpte(PowerPCCPU *cpu, target_ulong ptex,
target_ulong flags,
target_ulong *vp, target_ulong *rp)
{
- CPUPPCState *env = &cpu->env;
- uint64_t token;
+ const ppc_hash_pte64_t *hptes;
target_ulong v, r;
- if (!valid_pte_index(env, ptex)) {
+ if (!valid_ptex(cpu, ptex)) {
return REMOVE_PARM;
}
- token = ppc_hash64_start_access(cpu, ptex);
- v = ppc_hash64_load_hpte0(cpu, token, 0);
- r = ppc_hash64_load_hpte1(cpu, token, 0);
- ppc_hash64_stop_access(cpu, token);
+ hptes = ppc_hash64_map_hptes(cpu, ptex, 1);
+ v = ppc_hash64_hpte0(cpu, hptes, 0);
+ r = ppc_hash64_hpte1(cpu, hptes, 0);
+ ppc_hash64_unmap_hptes(cpu, hptes, ptex, 1);
if ((v & HPTE64_V_VALID) == 0 ||
((flags & H_AVPN) && (v & ~0x7fULL) != avpn) ||
@@ -191,11 +189,11 @@ static target_ulong h_remove(PowerPCCPU *cpu, sPAPRMachineState *spapr,
{
CPUPPCState *env = &cpu->env;
target_ulong flags = args[0];
- target_ulong pte_index = args[1];
+ target_ulong ptex = args[1];
target_ulong avpn = args[2];
RemoveResult ret;
- ret = remove_hpte(cpu, pte_index, avpn, flags,
+ ret = remove_hpte(cpu, ptex, avpn, flags,
&args[0], &args[1]);
switch (ret) {
@@ -291,19 +289,19 @@ static target_ulong h_protect(PowerPCCPU *cpu, sPAPRMachineState *spapr,
{
CPUPPCState *env = &cpu->env;
target_ulong flags = args[0];
- target_ulong pte_index = args[1];
+ target_ulong ptex = args[1];
target_ulong avpn = args[2];
- uint64_t token;
+ const ppc_hash_pte64_t *hptes;
target_ulong v, r;
- if (!valid_pte_index(env, pte_index)) {
+ if (!valid_ptex(cpu, ptex)) {
return H_PARAMETER;
}
- token = ppc_hash64_start_access(cpu, pte_index);
- v = ppc_hash64_load_hpte0(cpu, token, 0);
- r = ppc_hash64_load_hpte1(cpu, token, 0);
- ppc_hash64_stop_access(cpu, token);
+ hptes = ppc_hash64_map_hptes(cpu, ptex, 1);
+ v = ppc_hash64_hpte0(cpu, hptes, 0);
+ r = ppc_hash64_hpte1(cpu, hptes, 0);
+ ppc_hash64_unmap_hptes(cpu, hptes, ptex, 1);
if ((v & HPTE64_V_VALID) == 0 ||
((flags & H_AVPN) && (v & ~0x7fULL) != avpn)) {
@@ -315,36 +313,35 @@ static target_ulong h_protect(PowerPCCPU *cpu, sPAPRMachineState *spapr,
r |= (flags << 55) & HPTE64_R_PP0;
r |= (flags << 48) & HPTE64_R_KEY_HI;
r |= flags & (HPTE64_R_PP | HPTE64_R_N | HPTE64_R_KEY_LO);
- ppc_hash64_store_hpte(cpu, pte_index,
+ ppc_hash64_store_hpte(cpu, ptex,
(v & ~HPTE64_V_VALID) | HPTE64_V_HPTE_DIRTY, 0);
- ppc_hash64_tlb_flush_hpte(cpu, pte_index, v, r);
+ ppc_hash64_tlb_flush_hpte(cpu, ptex, v, r);
/* Flush the tlb */
check_tlb_flush(env, true);
/* Don't need a memory barrier, due to qemu's global lock */
- ppc_hash64_store_hpte(cpu, pte_index, v | HPTE64_V_HPTE_DIRTY, r);
+ ppc_hash64_store_hpte(cpu, ptex, v | HPTE64_V_HPTE_DIRTY, r);
return H_SUCCESS;
}
static target_ulong h_read(PowerPCCPU *cpu, sPAPRMachineState *spapr,
target_ulong opcode, target_ulong *args)
{
- CPUPPCState *env = &cpu->env;
target_ulong flags = args[0];
- target_ulong pte_index = args[1];
+ target_ulong ptex = args[1];
uint8_t *hpte;
int i, ridx, n_entries = 1;
- if (!valid_pte_index(env, pte_index)) {
+ if (!valid_ptex(cpu, ptex)) {
return H_PARAMETER;
}
if (flags & H_READ_4) {
/* Clear the two low order bits */
- pte_index &= ~(3ULL);
+ ptex &= ~(3ULL);
n_entries = 4;
}
- hpte = env->external_htab + (pte_index * HASH_PTE_SIZE_64);
+ hpte = spapr->htab + (ptex * HASH_PTE_SIZE_64);
for (i = 0, ridx = 0; i < n_entries; i++) {
args[ridx++] = ldq_p(hpte);
diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index fd6fc1d953..2a3499eaf8 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -43,6 +43,7 @@
#include "hw/pci/pci_bridge.h"
#include "hw/pci/pci_bus.h"
+#include "hw/pci/pci_ids.h"
#include "hw/ppc/spapr_drc.h"
#include "sysemu/device_tree.h"
#include "sysemu/kvm.h"
@@ -325,7 +326,7 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPRMachineState *spapr,
return;
}
- xics_spapr_free(spapr->xics, msi->first_irq, msi->num);
+ spapr_ics_free(spapr->ics, msi->first_irq, msi->num);
if (msi_present(pdev)) {
spapr_msi_setmsg(pdev, 0, false, 0, 0);
}
@@ -363,7 +364,7 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPRMachineState *spapr,
}
/* Allocate MSIs */
- irq = xics_spapr_alloc_block(spapr->xics, req_num, false,
+ irq = spapr_ics_alloc_block(spapr->ics, req_num, false,
ret_intr_type == RTAS_TYPE_MSI, &err);
if (err) {
error_reportf_err(err, "Can't allocate MSIs for device %x: ",
@@ -374,7 +375,7 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPRMachineState *spapr,
/* Release previous MSIs */
if (msi) {
- xics_spapr_free(spapr->xics, msi->first_irq, msi->num);
+ spapr_ics_free(spapr->ics, msi->first_irq, msi->num);
g_hash_table_remove(phb->msi, &config_addr);
}
@@ -736,7 +737,7 @@ static void spapr_msi_write(void *opaque, hwaddr addr,
trace_spapr_pci_msi_write(addr, data, irq);
- qemu_irq_pulse(xics_get_qirq(spapr->xics, irq));
+ qemu_irq_pulse(xics_get_qirq(XICS_FABRIC(spapr), irq));
}
static const MemoryRegionOps spapr_msi_ops = {
@@ -946,6 +947,274 @@ static void populate_resource_props(PCIDevice *d, ResourceProps *rp)
rp->assigned_len = assigned_idx * sizeof(ResourceFields);
}
+typedef struct PCIClass PCIClass;
+typedef struct PCISubClass PCISubClass;
+typedef struct PCIIFace PCIIFace;
+
+struct PCIIFace {
+ int iface;
+ const char *name;
+};
+
+struct PCISubClass {
+ int subclass;
+ const char *name;
+ const PCIIFace *iface;
+};
+
+struct PCIClass {
+ const char *name;
+ const PCISubClass *subc;
+};
+
+static const PCISubClass undef_subclass[] = {
+ { PCI_CLASS_NOT_DEFINED_VGA, "display", NULL },
+ { 0xFF, NULL, NULL },
+};
+
+static const PCISubClass mass_subclass[] = {
+ { PCI_CLASS_STORAGE_SCSI, "scsi", NULL },
+ { PCI_CLASS_STORAGE_IDE, "ide", NULL },
+ { PCI_CLASS_STORAGE_FLOPPY, "fdc", NULL },
+ { PCI_CLASS_STORAGE_IPI, "ipi", NULL },
+ { PCI_CLASS_STORAGE_RAID, "raid", NULL },
+ { PCI_CLASS_STORAGE_ATA, "ata", NULL },
+ { PCI_CLASS_STORAGE_SATA, "sata", NULL },
+ { PCI_CLASS_STORAGE_SAS, "sas", NULL },
+ { 0xFF, NULL, NULL },
+};
+
+static const PCISubClass net_subclass[] = {
+ { PCI_CLASS_NETWORK_ETHERNET, "ethernet", NULL },
+ { PCI_CLASS_NETWORK_TOKEN_RING, "token-ring", NULL },
+ { PCI_CLASS_NETWORK_FDDI, "fddi", NULL },
+ { PCI_CLASS_NETWORK_ATM, "atm", NULL },
+ { PCI_CLASS_NETWORK_ISDN, "isdn", NULL },
+ { PCI_CLASS_NETWORK_WORLDFIP, "worldfip", NULL },
+ { PCI_CLASS_NETWORK_PICMG214, "picmg", NULL },
+ { 0xFF, NULL, NULL },
+};
+
+static const PCISubClass displ_subclass[] = {
+ { PCI_CLASS_DISPLAY_VGA, "vga", NULL },
+ { PCI_CLASS_DISPLAY_XGA, "xga", NULL },
+ { PCI_CLASS_DISPLAY_3D, "3d-controller", NULL },
+ { 0xFF, NULL, NULL },
+};
+
+static const PCISubClass media_subclass[] = {
+ { PCI_CLASS_MULTIMEDIA_VIDEO, "video", NULL },
+ { PCI_CLASS_MULTIMEDIA_AUDIO, "sound", NULL },
+ { PCI_CLASS_MULTIMEDIA_PHONE, "telephony", NULL },
+ { 0xFF, NULL, NULL },
+};
+
+static const PCISubClass mem_subclass[] = {
+ { PCI_CLASS_MEMORY_RAM, "memory", NULL },
+ { PCI_CLASS_MEMORY_FLASH, "flash", NULL },
+ { 0xFF, NULL, NULL },
+};
+
+static const PCISubClass bridg_subclass[] = {
+ { PCI_CLASS_BRIDGE_HOST, "host", NULL },
+ { PCI_CLASS_BRIDGE_ISA, "isa", NULL },
+ { PCI_CLASS_BRIDGE_EISA, "eisa", NULL },
+ { PCI_CLASS_BRIDGE_MC, "mca", NULL },
+ { PCI_CLASS_BRIDGE_PCI, "pci", NULL },
+ { PCI_CLASS_BRIDGE_PCMCIA, "pcmcia", NULL },
+ { PCI_CLASS_BRIDGE_NUBUS, "nubus", NULL },
+ { PCI_CLASS_BRIDGE_CARDBUS, "cardbus", NULL },
+ { PCI_CLASS_BRIDGE_RACEWAY, "raceway", NULL },
+ { PCI_CLASS_BRIDGE_PCI_SEMITP, "semi-transparent-pci", NULL },
+ { PCI_CLASS_BRIDGE_IB_PCI, "infiniband", NULL },
+ { 0xFF, NULL, NULL },
+};
+
+static const PCISubClass comm_subclass[] = {
+ { PCI_CLASS_COMMUNICATION_SERIAL, "serial", NULL },
+ { PCI_CLASS_COMMUNICATION_PARALLEL, "parallel", NULL },
+ { PCI_CLASS_COMMUNICATION_MULTISERIAL, "multiport-serial", NULL },
+ { PCI_CLASS_COMMUNICATION_MODEM, "modem", NULL },
+ { PCI_CLASS_COMMUNICATION_GPIB, "gpib", NULL },
+ { PCI_CLASS_COMMUNICATION_SC, "smart-card", NULL },
+ { 0xFF, NULL, NULL, },
+};
+
+static const PCIIFace pic_iface[] = {
+ { PCI_CLASS_SYSTEM_PIC_IOAPIC, "io-apic" },
+ { PCI_CLASS_SYSTEM_PIC_IOXAPIC, "io-xapic" },
+ { 0xFF, NULL },
+};
+
+static const PCISubClass sys_subclass[] = {
+ { PCI_CLASS_SYSTEM_PIC, "interrupt-controller", pic_iface },
+ { PCI_CLASS_SYSTEM_DMA, "dma-controller", NULL },
+ { PCI_CLASS_SYSTEM_TIMER, "timer", NULL },
+ { PCI_CLASS_SYSTEM_RTC, "rtc", NULL },
+ { PCI_CLASS_SYSTEM_PCI_HOTPLUG, "hot-plug-controller", NULL },
+ { PCI_CLASS_SYSTEM_SDHCI, "sd-host-controller", NULL },
+ { 0xFF, NULL, NULL },
+};
+
+static const PCISubClass inp_subclass[] = {
+ { PCI_CLASS_INPUT_KEYBOARD, "keyboard", NULL },
+ { PCI_CLASS_INPUT_PEN, "pen", NULL },
+ { PCI_CLASS_INPUT_MOUSE, "mouse", NULL },
+ { PCI_CLASS_INPUT_SCANNER, "scanner", NULL },
+ { PCI_CLASS_INPUT_GAMEPORT, "gameport", NULL },
+ { 0xFF, NULL, NULL },
+};
+
+static const PCISubClass dock_subclass[] = {
+ { PCI_CLASS_DOCKING_GENERIC, "dock", NULL },
+ { 0xFF, NULL, NULL },
+};
+
+static const PCISubClass cpu_subclass[] = {
+ { PCI_CLASS_PROCESSOR_PENTIUM, "pentium", NULL },
+ { PCI_CLASS_PROCESSOR_POWERPC, "powerpc", NULL },
+ { PCI_CLASS_PROCESSOR_MIPS, "mips", NULL },
+ { PCI_CLASS_PROCESSOR_CO, "co-processor", NULL },
+ { 0xFF, NULL, NULL },
+};
+
+static const PCIIFace usb_iface[] = {
+ { PCI_CLASS_SERIAL_USB_UHCI, "usb-uhci" },
+ { PCI_CLASS_SERIAL_USB_OHCI, "usb-ohci", },
+ { PCI_CLASS_SERIAL_USB_EHCI, "usb-ehci" },
+ { PCI_CLASS_SERIAL_USB_XHCI, "usb-xhci" },
+ { PCI_CLASS_SERIAL_USB_UNKNOWN, "usb-unknown" },
+ { PCI_CLASS_SERIAL_USB_DEVICE, "usb-device" },
+ { 0xFF, NULL },
+};
+
+static const PCISubClass ser_subclass[] = {
+ { PCI_CLASS_SERIAL_FIREWIRE, "firewire", NULL },
+ { PCI_CLASS_SERIAL_ACCESS, "access-bus", NULL },
+ { PCI_CLASS_SERIAL_SSA, "ssa", NULL },
+ { PCI_CLASS_SERIAL_USB, "usb", usb_iface },
+ { PCI_CLASS_SERIAL_FIBER, "fibre-channel", NULL },
+ { PCI_CLASS_SERIAL_SMBUS, "smb", NULL },
+ { PCI_CLASS_SERIAL_IB, "infiniband", NULL },
+ { PCI_CLASS_SERIAL_IPMI, "ipmi", NULL },
+ { PCI_CLASS_SERIAL_SERCOS, "sercos", NULL },
+ { PCI_CLASS_SERIAL_CANBUS, "canbus", NULL },
+ { 0xFF, NULL, NULL },
+};
+
+static const PCISubClass wrl_subclass[] = {
+ { PCI_CLASS_WIRELESS_IRDA, "irda", NULL },
+ { PCI_CLASS_WIRELESS_CIR, "consumer-ir", NULL },
+ { PCI_CLASS_WIRELESS_RF_CONTROLLER, "rf-controller", NULL },
+ { PCI_CLASS_WIRELESS_BLUETOOTH, "bluetooth", NULL },
+ { PCI_CLASS_WIRELESS_BROADBAND, "broadband", NULL },
+ { 0xFF, NULL, NULL },
+};
+
+static const PCISubClass sat_subclass[] = {
+ { PCI_CLASS_SATELLITE_TV, "satellite-tv", NULL },
+ { PCI_CLASS_SATELLITE_AUDIO, "satellite-audio", NULL },
+ { PCI_CLASS_SATELLITE_VOICE, "satellite-voice", NULL },
+ { PCI_CLASS_SATELLITE_DATA, "satellite-data", NULL },
+ { 0xFF, NULL, NULL },
+};
+
+static const PCISubClass crypt_subclass[] = {
+ { PCI_CLASS_CRYPT_NETWORK, "network-encryption", NULL },
+ { PCI_CLASS_CRYPT_ENTERTAINMENT,
+ "entertainment-encryption", NULL },
+ { 0xFF, NULL, NULL },
+};
+
+static const PCISubClass spc_subclass[] = {
+ { PCI_CLASS_SP_DPIO, "dpio", NULL },
+ { PCI_CLASS_SP_PERF, "counter", NULL },
+ { PCI_CLASS_SP_SYNCH, "measurement", NULL },
+ { PCI_CLASS_SP_MANAGEMENT, "management-card", NULL },
+ { 0xFF, NULL, NULL },
+};
+
+static const PCIClass pci_classes[] = {
+ { "legacy-device", undef_subclass },
+ { "mass-storage", mass_subclass },
+ { "network", net_subclass },
+ { "display", displ_subclass, },
+ { "multimedia-device", media_subclass },
+ { "memory-controller", mem_subclass },
+ { "unknown-bridge", bridg_subclass },
+ { "communication-controller", comm_subclass},
+ { "system-peripheral", sys_subclass },
+ { "input-controller", inp_subclass },
+ { "docking-station", dock_subclass },
+ { "cpu", cpu_subclass },
+ { "serial-bus", ser_subclass },
+ { "wireless-controller", wrl_subclass },
+ { "intelligent-io", NULL },
+ { "satellite-device", sat_subclass },
+ { "encryption", crypt_subclass },
+ { "data-processing-controller", spc_subclass },
+};
+
+static const char *pci_find_device_name(uint8_t class, uint8_t subclass,
+ uint8_t iface)
+{
+ const PCIClass *pclass;
+ const PCISubClass *psubclass;
+ const PCIIFace *piface;
+ const char *name;
+
+ if (class >= ARRAY_SIZE(pci_classes)) {
+ return "pci";
+ }
+
+ pclass = pci_classes + class;
+ name = pclass->name;
+
+ if (pclass->subc == NULL) {
+ return name;
+ }
+
+ psubclass = pclass->subc;
+ while ((psubclass->subclass & 0xff) != 0xff) {
+ if ((psubclass->subclass & 0xff) == subclass) {
+ name = psubclass->name;
+ break;
+ }
+ psubclass++;
+ }
+
+ piface = psubclass->iface;
+ if (piface == NULL) {
+ return name;
+ }
+ while ((piface->iface & 0xff) != 0xff) {
+ if ((piface->iface & 0xff) == iface) {
+ name = piface->name;
+ break;
+ }
+ piface++;
+ }
+
+ return name;
+}
+
+static void pci_get_node_name(char *nodename, int len, PCIDevice *dev)
+{
+ int slot = PCI_SLOT(dev->devfn);
+ int func = PCI_FUNC(dev->devfn);
+ uint32_t ccode = pci_default_read_config(dev, PCI_CLASS_PROG, 3);
+ const char *name;
+
+ name = pci_find_device_name((ccode >> 16) & 0xff, (ccode >> 8) & 0xff,
+ ccode & 0xff);
+
+ if (func != 0) {
+ snprintf(nodename, len, "%s@%x,%x", name, slot, func);
+ } else {
+ snprintf(nodename, len, "%s@%x", name, slot);
+ }
+}
+
static uint32_t spapr_phb_get_pci_drc_index(sPAPRPHBState *phb,
PCIDevice *pdev);
@@ -957,6 +1226,7 @@ static int spapr_populate_pci_child_dt(PCIDevice *dev, void *fdt, int offset,
int pci_status, err;
char *buf = NULL;
uint32_t drc_index = spapr_phb_get_pci_drc_index(sphb, dev);
+ uint32_t ccode = pci_default_read_config(dev, PCI_CLASS_PROG, 3);
uint32_t max_msi, max_msix;
if (pci_default_read_config(dev, PCI_HEADER_TYPE, 1) ==
@@ -971,8 +1241,7 @@ static int spapr_populate_pci_child_dt(PCIDevice *dev, void *fdt, int offset,
pci_default_read_config(dev, PCI_DEVICE_ID, 2)));
_FDT(fdt_setprop_cell(fdt, offset, "revision-id",
pci_default_read_config(dev, PCI_REVISION_ID, 1)));
- _FDT(fdt_setprop_cell(fdt, offset, "class-code",
- pci_default_read_config(dev, PCI_CLASS_PROG, 3)));
+ _FDT(fdt_setprop_cell(fdt, offset, "class-code", ccode));
if (pci_default_read_config(dev, PCI_INTERRUPT_PIN, 1)) {
_FDT(fdt_setprop_cell(fdt, offset, "interrupts",
pci_default_read_config(dev, PCI_INTERRUPT_PIN, 1)));
@@ -1013,11 +1282,10 @@ static int spapr_populate_pci_child_dt(PCIDevice *dev, void *fdt, int offset,
_FDT(fdt_setprop(fdt, offset, "udf-supported", NULL, 0));
}
- /* NOTE: this is normally generated by firmware via path/unit name,
- * but in our case we must set it manually since it does not get
- * processed by OF beforehand
- */
- _FDT(fdt_setprop_string(fdt, offset, "name", "pci"));
+ _FDT(fdt_setprop_string(fdt, offset, "name",
+ pci_find_device_name((ccode >> 16) & 0xff,
+ (ccode >> 8) & 0xff,
+ ccode & 0xff)));
buf = spapr_phb_get_loc_code(sphb, dev);
if (!buf) {
error_report("Failed setting the ibm,loc-code");
@@ -1061,15 +1329,9 @@ static int spapr_create_pci_child_dt(sPAPRPHBState *phb, PCIDevice *dev,
void *fdt, int node_offset)
{
int offset, ret;
- int slot = PCI_SLOT(dev->devfn);
- int func = PCI_FUNC(dev->devfn);
char nodename[FDT_NAME_MAX];
- if (func != 0) {
- snprintf(nodename, FDT_NAME_MAX, "pci@%x,%x", slot, func);
- } else {
- snprintf(nodename, FDT_NAME_MAX, "pci@%x", slot);
- }
+ pci_get_node_name(nodename, FDT_NAME_MAX, dev);
offset = fdt_add_subnode(fdt, node_offset, nodename);
ret = spapr_populate_pci_child_dt(dev, fdt, offset, phb);
@@ -1485,7 +1747,7 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp)
uint32_t irq;
Error *local_err = NULL;
- irq = xics_spapr_alloc_block(spapr->xics, 1, true, false, &local_err);
+ irq = spapr_ics_alloc_block(spapr->ics, 1, true, false, &local_err);
if (local_err) {
error_propagate(errp, local_err);
error_prepend(errp, "can't allocate LSIs: ");
@@ -1782,9 +2044,9 @@ static void spapr_populate_pci_devices_dt(PCIBus *bus, PCIDevice *pdev,
s_fdt.fdt = p->fdt;
s_fdt.node_off = offset;
s_fdt.sphb = p->sphb;
- pci_for_each_device(sec_bus, pci_bus_num(sec_bus),
- spapr_populate_pci_devices_dt,
- &s_fdt);
+ pci_for_each_device_reverse(sec_bus, pci_bus_num(sec_bus),
+ spapr_populate_pci_devices_dt,
+ &s_fdt);
}
static void spapr_phb_pci_enumerate_bridge(PCIBus *bus, PCIDevice *pdev,
@@ -1953,9 +2215,9 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb,
s_fdt.fdt = fdt;
s_fdt.node_off = bus_off;
s_fdt.sphb = phb;
- pci_for_each_device(bus, pci_bus_num(bus),
- spapr_populate_pci_devices_dt,
- &s_fdt);
+ pci_for_each_device_reverse(bus, pci_bus_num(bus),
+ spapr_populate_pci_devices_dt,
+ &s_fdt);
ret = spapr_drc_populate_dt(fdt, bus_off, OBJECT(phb),
SPAPR_DR_CONNECTOR_TYPE_PCI);
diff --git a/hw/ppc/spapr_vio.c b/hw/ppc/spapr_vio.c
index 8bfc5f971f..a0ee4fd265 100644
--- a/hw/ppc/spapr_vio.c
+++ b/hw/ppc/spapr_vio.c
@@ -454,7 +454,7 @@ static void spapr_vio_busdev_realize(DeviceState *qdev, Error **errp)
dev->qdev.id = id;
}
- dev->irq = xics_spapr_alloc(spapr->xics, dev->irq, false, &local_err);
+ dev->irq = spapr_ics_alloc(spapr->ics, dev->irq, false, &local_err);
if (local_err) {
error_propagate(errp, local_err);
return;
diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
index bbfb5dc289..a53f058621 100644
--- a/hw/scsi/scsi-disk.c
+++ b/hw/scsi/scsi-disk.c
@@ -2240,7 +2240,7 @@ static void scsi_disk_resize_cb(void *opaque)
}
}
-static void scsi_cd_change_media_cb(void *opaque, bool load)
+static void scsi_cd_change_media_cb(void *opaque, bool load, Error **errp)
{
SCSIDiskState *s = opaque;
@@ -2328,7 +2328,13 @@ static void scsi_realize(SCSIDevice *dev, Error **errp)
return;
}
}
- blkconf_apply_backend_options(&dev->conf);
+ blkconf_apply_backend_options(&dev->conf,
+ blk_is_read_only(s->qdev.conf.blk),
+ dev->type == TYPE_DISK, &err);
+ if (err) {
+ error_propagate(errp, err);
+ return;
+ }
if (s->qdev.conf.discard_granularity == -1) {
s->qdev.conf.discard_granularity =
@@ -2380,7 +2386,7 @@ static void scsi_cd_realize(SCSIDevice *dev, Error **errp)
SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, dev);
if (!dev->conf.blk) {
- dev->conf.blk = blk_new();
+ dev->conf.blk = blk_new(0, BLK_PERM_ALL);
}
s->qdev.blocksize = 2048;
diff --git a/hw/sd/sd.c b/hw/sd/sd.c
index 8e88e8311a..ba47bff4db 100644
--- a/hw/sd/sd.c
+++ b/hw/sd/sd.c
@@ -458,7 +458,7 @@ static bool sd_get_readonly(SDState *sd)
return sd->wp_switch;
}
-static void sd_cardchange(void *opaque, bool load)
+static void sd_cardchange(void *opaque, bool load, Error **errp)
{
SDState *sd = opaque;
DeviceState *dev = DEVICE(sd);
@@ -1887,6 +1887,7 @@ static void sd_instance_finalize(Object *obj)
static void sd_realize(DeviceState *dev, Error **errp)
{
SDState *sd = SD_CARD(dev);
+ int ret;
if (sd->blk && blk_is_read_only(sd->blk)) {
error_setg(errp, "Cannot use read-only drive as SD card");
@@ -1894,6 +1895,11 @@ static void sd_realize(DeviceState *dev, Error **errp)
}
if (sd->blk) {
+ ret = blk_set_perm(sd->blk, BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE,
+ BLK_PERM_ALL, errp);
+ if (ret < 0) {
+ return;
+ }
blk_set_dev_ops(sd->blk, &sd_block_ops, sd);
}
}
diff --git a/hw/usb/bus.c b/hw/usb/bus.c
index efe4b8e1a6..24f1608b4b 100644
--- a/hw/usb/bus.c
+++ b/hw/usb/bus.c
@@ -8,7 +8,6 @@
#include "monitor/monitor.h"
#include "trace.h"
#include "qemu/cutils.h"
-#include "migration/migration.h"
static void usb_bus_dev_print(Monitor *mon, DeviceState *qdev, int indent);
@@ -688,8 +687,6 @@ USBDevice *usbdevice_create(const char *cmdline)
const char *params;
int len;
USBDevice *dev;
- ObjectClass *klass;
- DeviceClass *dc;
params = strchr(cmdline,':');
if (params) {
@@ -724,22 +721,6 @@ USBDevice *usbdevice_create(const char *cmdline)
return NULL;
}
- klass = object_class_by_name(f->name);
- if (klass == NULL) {
- error_report("Device '%s' not found", f->name);
- return NULL;
- }
-
- dc = DEVICE_CLASS(klass);
-
- if (only_migratable) {
- if (dc->vmsd->unmigratable) {
- error_report("Device %s is not migratable, but --only-migratable "
- "was specified", f->name);
- return NULL;
- }
- }
-
if (f->usbdevice_init) {
dev = f->usbdevice_init(bus, params);
} else {
diff --git a/hw/usb/dev-storage.c b/hw/usb/dev-storage.c
index c607f7606d..8a61ec94c8 100644
--- a/hw/usb/dev-storage.c
+++ b/hw/usb/dev-storage.c
@@ -589,6 +589,13 @@ static const struct SCSIBusInfo usb_msd_scsi_info_bot = {
.load_request = usb_msd_load_request,
};
+static void usb_msd_unrealize_storage(USBDevice *dev, Error **errp)
+{
+ MSDState *s = USB_STORAGE_DEV(dev);
+
+ object_unref(OBJECT(&s->bus));
+}
+
static void usb_msd_realize_storage(USBDevice *dev, Error **errp)
{
MSDState *s = USB_STORAGE_DEV(dev);
@@ -603,7 +610,11 @@ static void usb_msd_realize_storage(USBDevice *dev, Error **errp)
blkconf_serial(&s->conf, &dev->serial);
blkconf_blocksizes(&s->conf);
- blkconf_apply_backend_options(&s->conf);
+ blkconf_apply_backend_options(&s->conf, blk_is_read_only(blk), true, &err);
+ if (err) {
+ error_propagate(errp, err);
+ return;
+ }
/*
* Hack alert: this pretends to be a block device, but it's really
@@ -635,6 +646,13 @@ static void usb_msd_realize_storage(USBDevice *dev, Error **errp)
s->scsi_dev = scsi_dev;
}
+static void usb_msd_unrealize_bot(USBDevice *dev, Error **errp)
+{
+ MSDState *s = USB_STORAGE_DEV(dev);
+
+ object_unref(OBJECT(&s->bus));
+}
+
static void usb_msd_realize_bot(USBDevice *dev, Error **errp)
{
MSDState *s = USB_STORAGE_DEV(dev);
@@ -755,6 +773,7 @@ static void usb_msd_class_initfn_storage(ObjectClass *klass, void *data)
USBDeviceClass *uc = USB_DEVICE_CLASS(klass);
uc->realize = usb_msd_realize_storage;
+ uc->unrealize = usb_msd_unrealize_storage;
dc->props = msd_properties;
}
@@ -817,6 +836,7 @@ static void usb_msd_class_initfn_bot(ObjectClass *klass, void *data)
USBDeviceClass *uc = USB_DEVICE_CLASS(klass);
uc->realize = usb_msd_realize_bot;
+ uc->unrealize = usb_msd_unrealize_bot;
uc->attached_settable = true;
}
diff --git a/hw/usb/dev-uas.c b/hw/usb/dev-uas.c
index 3b26655889..fffc424396 100644
--- a/hw/usb/dev-uas.c
+++ b/hw/usb/dev-uas.c
@@ -896,6 +896,8 @@ static void usb_uas_unrealize(USBDevice *dev, Error **errp)
UASDevice *uas = USB_UAS(dev);
qemu_bh_delete(uas->status_bh);
+
+ object_unref(OBJECT(&uas->bus));
}
static void usb_uas_realize(USBDevice *dev, Error **errp)
diff --git a/include/block/block.h b/include/block/block.h
index bde5ebda18..c7c4a3ac3a 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -82,6 +82,7 @@ typedef struct HDGeometry {
} HDGeometry;
#define BDRV_O_RDWR 0x0002
+#define BDRV_O_RESIZE 0x0004 /* request permission for resizing the node */
#define BDRV_O_SNAPSHOT 0x0008 /* open the file read only and save writes in a snapshot */
#define BDRV_O_TEMPORARY 0x0010 /* delete the file after use */
#define BDRV_O_NOCACHE 0x0020 /* do not use the host page cache */
@@ -187,6 +188,42 @@ typedef enum BlockOpType {
BLOCK_OP_TYPE_MAX,
} BlockOpType;
+/* Block node permission constants */
+enum {
+ /**
+ * A user that has the "permission" of consistent reads is guaranteed that
+ * their view of the contents of the block device is complete and
+ * self-consistent, representing the contents of a disk at a specific
+ * point.
+ *
+ * For most block devices (including their backing files) this is true, but
+ * the property cannot be maintained in a few situations like for
+ * intermediate nodes of a commit block job.
+ */
+ BLK_PERM_CONSISTENT_READ = 0x01,
+
+ /** This permission is required to change the visible disk contents. */
+ BLK_PERM_WRITE = 0x02,
+
+ /**
+ * This permission (which is weaker than BLK_PERM_WRITE) is both enough and
+ * required for writes to the block node when the caller promises that
+ * the visible disk content doesn't change.
+ */
+ BLK_PERM_WRITE_UNCHANGED = 0x04,
+
+ /** This permission is required to change the size of a block node. */
+ BLK_PERM_RESIZE = 0x08,
+
+ /**
+ * This permission is required to change the node that this BdrvChild
+ * points to.
+ */
+ BLK_PERM_GRAPH_MOD = 0x10,
+
+ BLK_PERM_ALL = 0x1f,
+};
+
/* disk I/O throttling */
void bdrv_init(void);
void bdrv_init_with_whitelist(void);
@@ -199,7 +236,8 @@ int bdrv_create(BlockDriver *drv, const char* filename,
QemuOpts *opts, Error **errp);
int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp);
BlockDriverState *bdrv_new(void);
-void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top);
+void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top,
+ Error **errp);
void bdrv_replace_in_backing_chain(BlockDriverState *old,
BlockDriverState *new);
@@ -210,7 +248,8 @@ BdrvChild *bdrv_open_child(const char *filename,
BlockDriverState* parent,
const BdrvChildRole *child_role,
bool allow_none, Error **errp);
-void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd);
+void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd,
+ Error **errp);
int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options,
const char *bdref_key, Error **errp);
BlockDriverState *bdrv_open(const char *filename, const char *reference,
@@ -484,7 +523,8 @@ void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child);
BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
BlockDriverState *child_bs,
const char *child_name,
- const BdrvChildRole *child_role);
+ const BdrvChildRole *child_role,
+ Error **errp);
bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp);
void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason);
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 1670941da9..a57c0bfb55 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -320,6 +320,59 @@ struct BlockDriver {
void (*bdrv_del_child)(BlockDriverState *parent, BdrvChild *child,
Error **errp);
+ /**
+ * Informs the block driver that a permission change is intended. The
+ * driver checks whether the change is permissible and may take other
+ * preparations for the change (e.g. get file system locks). This operation
+ * is always followed either by a call to either .bdrv_set_perm or
+ * .bdrv_abort_perm_update.
+ *
+ * Checks whether the requested set of cumulative permissions in @perm
+ * can be granted for accessing @bs and whether no other users are using
+ * permissions other than those given in @shared (both arguments take
+ * BLK_PERM_* bitmasks).
+ *
+ * If both conditions are met, 0 is returned. Otherwise, -errno is returned
+ * and errp is set to an error describing the conflict.
+ */
+ int (*bdrv_check_perm)(BlockDriverState *bs, uint64_t perm,
+ uint64_t shared, Error **errp);
+
+ /**
+ * Called to inform the driver that the set of cumulative set of used
+ * permissions for @bs has changed to @perm, and the set of sharable
+ * permission to @shared. The driver can use this to propagate changes to
+ * its children (i.e. request permissions only if a parent actually needs
+ * them).
+ *
+ * This function is only invoked after bdrv_check_perm(), so block drivers
+ * may rely on preparations made in their .bdrv_check_perm implementation.
+ */
+ void (*bdrv_set_perm)(BlockDriverState *bs, uint64_t perm, uint64_t shared);
+
+ /*
+ * Called to inform the driver that after a previous bdrv_check_perm()
+ * call, the permission update is not performed and any preparations made
+ * for it (e.g. taken file locks) need to be undone.
+ *
+ * This function can be called even for nodes that never saw a
+ * bdrv_check_perm() call. It is a no-op then.
+ */
+ void (*bdrv_abort_perm_update)(BlockDriverState *bs);
+
+ /**
+ * Returns in @nperm and @nshared the permissions that the driver for @bs
+ * needs on its child @c, based on the cumulative permissions requested by
+ * the parents in @parent_perm and @parent_shared.
+ *
+ * If @c is NULL, return the permissions for attaching a new child for the
+ * given @role.
+ */
+ void (*bdrv_child_perm)(BlockDriverState *bs, BdrvChild *c,
+ const BdrvChildRole *role,
+ uint64_t parent_perm, uint64_t parent_shared,
+ uint64_t *nperm, uint64_t *nshared);
+
QLIST_ENTRY(BlockDriver) list;
};
@@ -388,6 +441,10 @@ typedef struct BdrvAioNotifier {
} BdrvAioNotifier;
struct BdrvChildRole {
+ /* If true, bdrv_replace_in_backing_chain() doesn't change the node this
+ * BdrvChild points to. */
+ bool stay_at_node;
+
void (*inherit_options)(int *child_flags, QDict *child_options,
int parent_flags, QDict *parent_options);
@@ -399,6 +456,12 @@ struct BdrvChildRole {
* name), or NULL if the parent can't provide a better name. */
const char* (*get_name)(BdrvChild *child);
+ /* Returns a malloced string that describes the parent of the child for a
+ * human reader. This could be a node-name, BlockBackend name, qdev ID or
+ * QOM path of the device owning the BlockBackend, job type and ID etc. The
+ * caller is responsible for freeing the memory. */
+ char* (*get_parent_desc)(BdrvChild *child);
+
/*
* If this pair of functions is implemented, the parent doesn't issue new
* requests after returning from .drained_begin() until .drained_end() is
@@ -409,16 +472,32 @@ struct BdrvChildRole {
*/
void (*drained_begin)(BdrvChild *child);
void (*drained_end)(BdrvChild *child);
+
+ void (*attach)(BdrvChild *child);
+ void (*detach)(BdrvChild *child);
};
extern const BdrvChildRole child_file;
extern const BdrvChildRole child_format;
+extern const BdrvChildRole child_backing;
struct BdrvChild {
BlockDriverState *bs;
char *name;
const BdrvChildRole *role;
void *opaque;
+
+ /**
+ * Granted permissions for operating on this BdrvChild (BLK_PERM_* bitmask)
+ */
+ uint64_t perm;
+
+ /**
+ * Permissions that can still be granted to other users of @bs while this
+ * BdrvChild is still attached to it. (BLK_PERM_* bitmask)
+ */
+ uint64_t shared_perm;
+
QLIST_ENTRY(BdrvChild) next;
QLIST_ENTRY(BdrvChild) next_parent;
};
@@ -701,13 +780,16 @@ void stream_start(const char *job_id, BlockDriverState *bs,
* @speed: The maximum speed, in bytes per second, or 0 for unlimited.
* @on_error: The action to take upon error.
* @backing_file_str: String to use as the backing file in @top's overlay
+ * @filter_node_name: The node name that should be assigned to the filter
+ * driver that the commit job inserts into the graph above @top. NULL means
+ * that a node name should be autogenerated.
* @errp: Error object.
*
*/
void commit_start(const char *job_id, BlockDriverState *bs,
BlockDriverState *base, BlockDriverState *top, int64_t speed,
BlockdevOnError on_error, const char *backing_file_str,
- Error **errp);
+ const char *filter_node_name, Error **errp);
/**
* commit_active_start:
* @job_id: The id of the newly-created job, or %NULL to use the
@@ -718,6 +800,9 @@ void commit_start(const char *job_id, BlockDriverState *bs,
* See @BlockJobCreateFlags
* @speed: The maximum speed, in bytes per second, or 0 for unlimited.
* @on_error: The action to take upon error.
+ * @filter_node_name: The node name that should be assigned to the filter
+ * driver that the commit job inserts into the graph above @bs. NULL means that
+ * a node name should be autogenerated.
* @cb: Completion function for the job.
* @opaque: Opaque pointer value passed to @cb.
* @errp: Error object.
@@ -727,8 +812,9 @@ void commit_start(const char *job_id, BlockDriverState *bs,
void commit_active_start(const char *job_id, BlockDriverState *bs,
BlockDriverState *base, int creation_flags,
int64_t speed, BlockdevOnError on_error,
- BlockCompletionFunc *cb,
- void *opaque, Error **errp, bool auto_complete);
+ const char *filter_node_name,
+ BlockCompletionFunc *cb, void *opaque, Error **errp,
+ bool auto_complete);
/*
* mirror_start:
* @job_id: The id of the newly-created job, or %NULL to use the
@@ -745,6 +831,9 @@ void commit_active_start(const char *job_id, BlockDriverState *bs,
* @on_source_error: The action to take upon error reading from the source.
* @on_target_error: The action to take upon error writing to the target.
* @unmap: Whether to unmap target where source sectors only contain zeroes.
+ * @filter_node_name: The node name that should be assigned to the filter
+ * driver that the mirror job inserts into the graph above @bs. NULL means that
+ * a node name should be autogenerated.
* @errp: Error object.
*
* Start a mirroring operation on @bs. Clusters that are allocated
@@ -758,7 +847,7 @@ void mirror_start(const char *job_id, BlockDriverState *bs,
MirrorSyncMode mode, BlockMirrorBackingMode backing_mode,
BlockdevOnError on_source_error,
BlockdevOnError on_target_error,
- bool unmap, Error **errp);
+ bool unmap, const char *filter_node_name, Error **errp);
/*
* backup_job_create:
@@ -796,11 +885,36 @@ void hmp_drive_add_node(Monitor *mon, const char *optstr);
BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
const char *child_name,
const BdrvChildRole *child_role,
- void *opaque);
+ uint64_t perm, uint64_t shared_perm,
+ void *opaque, Error **errp);
void bdrv_root_unref_child(BdrvChild *child);
+int bdrv_child_check_perm(BdrvChild *c, uint64_t perm, uint64_t shared,
+ Error **errp);
+void bdrv_child_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared);
+void bdrv_child_abort_perm_update(BdrvChild *c);
+int bdrv_child_try_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared,
+ Error **errp);
+
+/* Default implementation for BlockDriver.bdrv_child_perm() that can be used by
+ * block filters: Forward CONSISTENT_READ, WRITE, WRITE_UNCHANGED and RESIZE to
+ * all children */
+void bdrv_filter_default_perms(BlockDriverState *bs, BdrvChild *c,
+ const BdrvChildRole *role,
+ uint64_t perm, uint64_t shared,
+ uint64_t *nperm, uint64_t *nshared);
+
+/* Default implementation for BlockDriver.bdrv_child_perm() that can be used by
+ * (non-raw) image formats: Like above for bs->backing, but for bs->file it
+ * requires WRITE | RESIZE for read-write images, always requires
+ * CONSISTENT_READ and doesn't share WRITE. */
+void bdrv_format_default_perms(BlockDriverState *bs, BdrvChild *c,
+ const BdrvChildRole *role,
+ uint64_t perm, uint64_t shared,
+ uint64_t *nperm, uint64_t *nshared);
+
const char *bdrv_get_parent_name(const BlockDriverState *bs);
-void blk_dev_change_media_cb(BlockBackend *blk, bool load);
+void blk_dev_change_media_cb(BlockBackend *blk, bool load, Error **errp);
bool blk_dev_has_removable_media(BlockBackend *blk);
bool blk_dev_has_tray(BlockBackend *blk);
void blk_dev_eject_request(BlockBackend *blk, bool force);
diff --git a/include/block/blockjob.h b/include/block/blockjob.h
index 1acb256223..9e906f7d7e 100644
--- a/include/block/blockjob.h
+++ b/include/block/blockjob.h
@@ -169,13 +169,25 @@ BlockJob *block_job_get(const char *id);
/**
* block_job_add_bdrv:
* @job: A block job
+ * @name: The name to assign to the new BdrvChild
* @bs: A BlockDriverState that is involved in @job
+ * @perm, @shared_perm: Permissions to request on the node
*
* Add @bs to the list of BlockDriverState that are involved in
* @job. This means that all operations will be blocked on @bs while
* @job exists.
*/
-void block_job_add_bdrv(BlockJob *job, BlockDriverState *bs);
+int block_job_add_bdrv(BlockJob *job, const char *name, BlockDriverState *bs,
+ uint64_t perm, uint64_t shared_perm, Error **errp);
+
+/**
+ * block_job_remove_all_bdrv:
+ * @job: The block job
+ *
+ * Remove all BlockDriverStates from the list of nodes that are involved in the
+ * job. This removes the blockers added with block_job_add_bdrv().
+ */
+void block_job_remove_all_bdrv(BlockJob *job);
/**
* block_job_set_speed:
diff --git a/include/block/blockjob_int.h b/include/block/blockjob_int.h
index 82238229c6..3f86cc5acc 100644
--- a/include/block/blockjob_int.h
+++ b/include/block/blockjob_int.h
@@ -119,6 +119,7 @@ struct BlockJobDriver {
* generated automatically.
* @job_type: The class object for the newly-created job.
* @bs: The block
+ * @perm, @shared_perm: Permissions to request for @bs
* @speed: The maximum speed, in bytes per second, or 0 for unlimited.
* @cb: Completion function for the job.
* @opaque: Opaque pointer value passed to @cb.
@@ -134,7 +135,8 @@ struct BlockJobDriver {
* called from a wrapper that is specific to the job type.
*/
void *block_job_create(const char *job_id, const BlockJobDriver *driver,
- BlockDriverState *bs, int64_t speed, int flags,
+ BlockDriverState *bs, uint64_t perm,
+ uint64_t shared_perm, int64_t speed, int flags,
BlockCompletionFunc *cb, void *opaque, Error **errp);
/**
diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h
index bd15853e51..8c305aa4fa 100644
--- a/include/exec/cpu-common.h
+++ b/include/exec/cpu-common.h
@@ -64,6 +64,7 @@ void qemu_ram_set_idstr(RAMBlock *block, const char *name, DeviceState *dev);
void qemu_ram_unset_idstr(RAMBlock *block);
const char *qemu_ram_get_idstr(RAMBlock *rb);
size_t qemu_ram_pagesize(RAMBlock *block);
+size_t qemu_ram_pagesize_largest(void);
void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
int len, int is_write);
@@ -105,6 +106,7 @@ typedef int (RAMBlockIterFunc)(const char *block_name, void *host_addr,
ram_addr_t offset, ram_addr_t length, void *opaque);
int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque);
+int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length);
#endif
diff --git a/include/glib-compat.h b/include/glib-compat.h
index 0cd24ffbe9..863c8cf73d 100644
--- a/include/glib-compat.h
+++ b/include/glib-compat.h
@@ -328,4 +328,25 @@ static inline void g_source_set_name_by_id(guint tag, const char *name)
#define g_test_subprocess() (0)
#endif
+
+#if !GLIB_CHECK_VERSION(2, 34, 0)
+static inline void
+g_test_add_data_func_full(const char *path,
+ gpointer data,
+ gpointer fn,
+ gpointer data_free_func)
+{
+#if GLIB_CHECK_VERSION(2, 26, 0)
+ /* back-compat casts, remove this once we can require new-enough glib */
+ g_test_add_vtable(path, 0, data, NULL,
+ (GTestFixtureFunc)fn, (GTestFixtureFunc) data_free_func);
+#else
+ /* back-compat casts, remove this once we can require new-enough glib */
+ g_test_add_vtable(path, 0, data, NULL,
+ (void (*)(void)) fn, (void (*)(void)) data_free_func);
+#endif
+}
+#endif
+
+
#endif
diff --git a/include/hw/block/block.h b/include/hw/block/block.h
index df9d207d81..f3f6e8ef02 100644
--- a/include/hw/block/block.h
+++ b/include/hw/block/block.h
@@ -26,6 +26,7 @@ typedef struct BlockConf {
/* geometry, not all devices use this */
uint32_t cyls, heads, secs;
OnOffAuto wce;
+ bool share_rw;
BlockdevOnError rerror;
BlockdevOnError werror;
} BlockConf;
@@ -53,7 +54,9 @@ static inline unsigned int get_physical_block_exp(BlockConf *conf)
DEFINE_PROP_UINT32("opt_io_size", _state, _conf.opt_io_size, 0), \
DEFINE_PROP_UINT32("discard_granularity", _state, \
_conf.discard_granularity, -1), \
- DEFINE_PROP_ON_OFF_AUTO("write-cache", _state, _conf.wce, ON_OFF_AUTO_AUTO)
+ DEFINE_PROP_ON_OFF_AUTO("write-cache", _state, _conf.wce, \
+ ON_OFF_AUTO_AUTO), \
+ DEFINE_PROP_BOOL("share-rw", _state, _conf.share_rw, false)
#define DEFINE_BLOCK_CHS_PROPERTIES(_state, _conf) \
DEFINE_PROP_UINT32("cyls", _state, _conf.cyls, 0), \
@@ -73,7 +76,8 @@ void blkconf_geometry(BlockConf *conf, int *trans,
unsigned cyls_max, unsigned heads_max, unsigned secs_max,
Error **errp);
void blkconf_blocksizes(BlockConf *conf);
-void blkconf_apply_backend_options(BlockConf *conf);
+void blkconf_apply_backend_options(BlockConf *conf, bool readonly,
+ bool resizable, Error **errp);
/* Hard disk geometry */
diff --git a/include/hw/pci-host/spapr.h b/include/hw/pci-host/spapr.h
index 092294ed5a..dfa76143f3 100644
--- a/include/hw/pci-host/spapr.h
+++ b/include/hw/pci-host/spapr.h
@@ -106,7 +106,7 @@ static inline qemu_irq spapr_phb_lsi_qirq(struct sPAPRPHBState *phb, int pin)
{
sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
- return xics_get_qirq(spapr->xics, phb->lsi_table[pin].irq);
+ return xics_get_qirq(XICS_FABRIC(spapr), phb->lsi_table[pin].irq);
}
PCIHostState *spapr_create_phb(sPAPRMachineState *spapr, int index);
diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index 6983f13745..9349acbfb2 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -429,6 +429,10 @@ int pci_bus_numa_node(PCIBus *bus);
void pci_for_each_device(PCIBus *bus, int bus_num,
void (*fn)(PCIBus *bus, PCIDevice *d, void *opaque),
void *opaque);
+void pci_for_each_device_reverse(PCIBus *bus, int bus_num,
+ void (*fn)(PCIBus *bus, PCIDevice *d,
+ void *opaque),
+ void *opaque);
void pci_for_each_bus_depth_first(PCIBus *bus,
void *(*begin)(PCIBus *bus, void *parent_state),
void (*end)(PCIBus *bus, void *state),
diff --git a/include/hw/pci/pci_ids.h b/include/hw/pci/pci_ids.h
index d77ca60a0e..d22ad8dd3b 100644
--- a/include/hw/pci/pci_ids.h
+++ b/include/hw/pci/pci_ids.h
@@ -13,41 +13,84 @@
/* Device classes and subclasses */
-#define PCI_BASE_CLASS_STORAGE 0x01
-#define PCI_BASE_CLASS_NETWORK 0x02
+#define PCI_CLASS_NOT_DEFINED 0x0000
+#define PCI_CLASS_NOT_DEFINED_VGA 0x0001
+#define PCI_BASE_CLASS_STORAGE 0x01
#define PCI_CLASS_STORAGE_SCSI 0x0100
#define PCI_CLASS_STORAGE_IDE 0x0101
+#define PCI_CLASS_STORAGE_FLOPPY 0x0102
+#define PCI_CLASS_STORAGE_IPI 0x0103
#define PCI_CLASS_STORAGE_RAID 0x0104
+#define PCI_CLASS_STORAGE_ATA 0x0105
#define PCI_CLASS_STORAGE_SATA 0x0106
+#define PCI_CLASS_STORAGE_SAS 0x0107
#define PCI_CLASS_STORAGE_EXPRESS 0x0108
#define PCI_CLASS_STORAGE_OTHER 0x0180
+#define PCI_BASE_CLASS_NETWORK 0x02
#define PCI_CLASS_NETWORK_ETHERNET 0x0200
+#define PCI_CLASS_NETWORK_TOKEN_RING 0x0201
+#define PCI_CLASS_NETWORK_FDDI 0x0202
+#define PCI_CLASS_NETWORK_ATM 0x0203
+#define PCI_CLASS_NETWORK_ISDN 0x0204
+#define PCI_CLASS_NETWORK_WORLDFIP 0x0205
+#define PCI_CLASS_NETWORK_PICMG214 0x0206
#define PCI_CLASS_NETWORK_OTHER 0x0280
+#define PCI_BASE_CLASS_DISPLAY 0x03
#define PCI_CLASS_DISPLAY_VGA 0x0300
+#define PCI_CLASS_DISPLAY_XGA 0x0301
+#define PCI_CLASS_DISPLAY_3D 0x0302
#define PCI_CLASS_DISPLAY_OTHER 0x0380
+#define PCI_BASE_CLASS_MULTIMEDIA 0x04
+#define PCI_CLASS_MULTIMEDIA_VIDEO 0x0400
#define PCI_CLASS_MULTIMEDIA_AUDIO 0x0401
+#define PCI_CLASS_MULTIMEDIA_PHONE 0x0402
+#define PCI_CLASS_MULTIMEDIA_OTHER 0x0480
+#define PCI_BASE_CLASS_MEMORY 0x05
#define PCI_CLASS_MEMORY_RAM 0x0500
+#define PCI_CLASS_MEMORY_FLASH 0x0501
+#define PCI_CLASS_MEMORY_OTHER 0x0580
-#define PCI_CLASS_SYSTEM_SDHCI 0x0805
-#define PCI_CLASS_SYSTEM_OTHER 0x0880
-
-#define PCI_CLASS_SERIAL_USB 0x0c03
-#define PCI_CLASS_SERIAL_SMBUS 0x0c05
-
+#define PCI_BASE_CLASS_BRIDGE 0x06
#define PCI_CLASS_BRIDGE_HOST 0x0600
#define PCI_CLASS_BRIDGE_ISA 0x0601
+#define PCI_CLASS_BRIDGE_EISA 0x0602
+#define PCI_CLASS_BRIDGE_MC 0x0603
#define PCI_CLASS_BRIDGE_PCI 0x0604
#define PCI_CLASS_BRIDGE_PCI_INF_SUB 0x01
+#define PCI_CLASS_BRIDGE_PCMCIA 0x0605
+#define PCI_CLASS_BRIDGE_NUBUS 0x0606
+#define PCI_CLASS_BRIDGE_CARDBUS 0x0607
+#define PCI_CLASS_BRIDGE_RACEWAY 0x0608
+#define PCI_CLASS_BRIDGE_PCI_SEMITP 0x0609
+#define PCI_CLASS_BRIDGE_IB_PCI 0x060a
#define PCI_CLASS_BRIDGE_OTHER 0x0680
+#define PCI_BASE_CLASS_COMMUNICATION 0x07
#define PCI_CLASS_COMMUNICATION_SERIAL 0x0700
+#define PCI_CLASS_COMMUNICATION_PARALLEL 0x0701
+#define PCI_CLASS_COMMUNICATION_MULTISERIAL 0x0702
+#define PCI_CLASS_COMMUNICATION_MODEM 0x0703
+#define PCI_CLASS_COMMUNICATION_GPIB 0x0704
+#define PCI_CLASS_COMMUNICATION_SC 0x0705
#define PCI_CLASS_COMMUNICATION_OTHER 0x0780
+#define PCI_BASE_CLASS_SYSTEM 0x08
+#define PCI_CLASS_SYSTEM_PIC 0x0800
+#define PCI_CLASS_SYSTEM_PIC_IOAPIC 0x080010
+#define PCI_CLASS_SYSTEM_PIC_IOXAPIC 0x080020
+#define PCI_CLASS_SYSTEM_DMA 0x0801
+#define PCI_CLASS_SYSTEM_TIMER 0x0802
+#define PCI_CLASS_SYSTEM_RTC 0x0803
+#define PCI_CLASS_SYSTEM_PCI_HOTPLUG 0x0804
+#define PCI_CLASS_SYSTEM_SDHCI 0x0805
+#define PCI_CLASS_SYSTEM_OTHER 0x0880
+
+#define PCI_BASE_CLASS_INPUT 0x09
#define PCI_CLASS_INPUT_KEYBOARD 0x0900
#define PCI_CLASS_INPUT_PEN 0x0901
#define PCI_CLASS_INPUT_MOUSE 0x0902
@@ -55,8 +98,59 @@
#define PCI_CLASS_INPUT_GAMEPORT 0x0904
#define PCI_CLASS_INPUT_OTHER 0x0980
-#define PCI_CLASS_PROCESSOR_CO 0x0b40
+#define PCI_BASE_CLASS_DOCKING 0x0a
+#define PCI_CLASS_DOCKING_GENERIC 0x0a00
+#define PCI_CLASS_DOCKING_OTHER 0x0a80
+
+#define PCI_BASE_CLASS_PROCESSOR 0x0b
+#define PCI_CLASS_PROCESSOR_PENTIUM 0x0b02
#define PCI_CLASS_PROCESSOR_POWERPC 0x0b20
+#define PCI_CLASS_PROCESSOR_MIPS 0x0b30
+#define PCI_CLASS_PROCESSOR_CO 0x0b40
+
+#define PCI_BASE_CLASS_SERIAL 0x0c
+#define PCI_CLASS_SERIAL_FIREWIRE 0x0c00
+#define PCI_CLASS_SERIAL_ACCESS 0x0c01
+#define PCI_CLASS_SERIAL_SSA 0x0c02
+#define PCI_CLASS_SERIAL_USB 0x0c03
+#define PCI_CLASS_SERIAL_USB_UHCI 0x0c0300
+#define PCI_CLASS_SERIAL_USB_OHCI 0x0c0310
+#define PCI_CLASS_SERIAL_USB_EHCI 0x0c0320
+#define PCI_CLASS_SERIAL_USB_XHCI 0x0c0330
+#define PCI_CLASS_SERIAL_USB_UNKNOWN 0x0c0380
+#define PCI_CLASS_SERIAL_USB_DEVICE 0x0c03fe
+#define PCI_CLASS_SERIAL_FIBER 0x0c04
+#define PCI_CLASS_SERIAL_SMBUS 0x0c05
+#define PCI_CLASS_SERIAL_IB 0x0c06
+#define PCI_CLASS_SERIAL_IPMI 0x0c07
+#define PCI_CLASS_SERIAL_SERCOS 0x0c08
+#define PCI_CLASS_SERIAL_CANBUS 0x0c09
+
+#define PCI_BASE_CLASS_WIRELESS 0x0d
+#define PCI_CLASS_WIRELESS_IRDA 0x0d00
+#define PCI_CLASS_WIRELESS_CIR 0x0d01
+#define PCI_CLASS_WIRELESS_RF_CONTROLLER 0x0d10
+#define PCI_CLASS_WIRELESS_BLUETOOTH 0x0d11
+#define PCI_CLASS_WIRELESS_BROADBAND 0x0d12
+#define PCI_CLASS_WIRELESS_OTHER 0x0d80
+
+#define PCI_BASE_CLASS_SATELLITE 0x0f
+#define PCI_CLASS_SATELLITE_TV 0x0f00
+#define PCI_CLASS_SATELLITE_AUDIO 0x0f01
+#define PCI_CLASS_SATELLITE_VOICE 0x0f03
+#define PCI_CLASS_SATELLITE_DATA 0x0f04
+
+#define PCI_BASE_CLASS_CRYPT 0x10
+#define PCI_CLASS_CRYPT_NETWORK 0x1000
+#define PCI_CLASS_CRYPT_ENTERTAINMENT 0x1001
+#define PCI_CLASS_CRYPT_OTHER 0x1080
+
+#define PCI_BASE_CLASS_SIGNAL_PROCESSING 0x11
+#define PCI_CLASS_SP_DPIO 0x1100
+#define PCI_CLASS_SP_PERF 0x1101
+#define PCI_CLASS_SP_SYNCH 0x1110
+#define PCI_CLASS_SP_MANAGEMENT 0x1120
+#define PCI_CLASS_SP_OTHER 0x1180
#define PCI_CLASS_OTHERS 0xff
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index f9b17d860a..cfd271129d 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -58,7 +58,7 @@ struct sPAPRMachineState {
struct VIOsPAPRBus *vio_bus;
QLIST_HEAD(, sPAPRPHBState) phbs;
struct sPAPRNVRAM *nvram;
- XICSState *xics;
+ ICSState *ics;
DeviceState *rtc;
void *htab;
@@ -94,6 +94,9 @@ struct sPAPRMachineState {
/*< public >*/
char *kvm_type;
MemoryHotplugState hotplug_memory;
+
+ uint32_t nr_servers;
+ ICPState *icps;
};
#define H_SUCCESS 0
diff --git a/include/hw/ppc/spapr_vio.h b/include/hw/ppc/spapr_vio.h
index fc6f673ea0..2e9685a5d9 100644
--- a/include/hw/ppc/spapr_vio.h
+++ b/include/hw/ppc/spapr_vio.h
@@ -87,7 +87,7 @@ static inline qemu_irq spapr_vio_qirq(VIOsPAPRDevice *dev)
{
sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
- return xics_get_qirq(spapr->xics, dev->irq);
+ return xics_get_qirq(XICS_FABRIC(spapr), dev->irq);
}
static inline bool spapr_vio_dma_valid(VIOsPAPRDevice *dev, uint64_t taddr,
diff --git a/include/hw/ppc/xics.h b/include/hw/ppc/xics.h
index 3f0c31610a..1945913bf1 100644
--- a/include/hw/ppc/xics.h
+++ b/include/hw/ppc/xics.h
@@ -30,29 +30,6 @@
#include "hw/sysbus.h"
-#define TYPE_XICS_COMMON "xics-common"
-#define XICS_COMMON(obj) OBJECT_CHECK(XICSState, (obj), TYPE_XICS_COMMON)
-
-/*
- * Retain xics as the type name to be compatible for migration. Rest all the
- * functions, class and variables are renamed as xics_spapr.
- */
-#define TYPE_XICS_SPAPR "xics"
-#define XICS_SPAPR(obj) OBJECT_CHECK(XICSState, (obj), TYPE_XICS_SPAPR)
-
-#define TYPE_XICS_SPAPR_KVM "xics-spapr-kvm"
-#define XICS_SPAPR_KVM(obj) \
- OBJECT_CHECK(KVMXICSState, (obj), TYPE_XICS_SPAPR_KVM)
-
-#define XICS_COMMON_CLASS(klass) \
- OBJECT_CLASS_CHECK(XICSStateClass, (klass), TYPE_XICS_COMMON)
-#define XICS_SPAPR_CLASS(klass) \
- OBJECT_CLASS_CHECK(XICSStateClass, (klass), TYPE_XICS_SPAPR)
-#define XICS_COMMON_GET_CLASS(obj) \
- OBJECT_GET_CLASS(XICSStateClass, (obj), TYPE_XICS_COMMON)
-#define XICS_SPAPR_GET_CLASS(obj) \
- OBJECT_GET_CLASS(XICSStateClass, (obj), TYPE_XICS_SPAPR)
-
#define XICS_IPI 0x2
#define XICS_BUID 0x1
#define XICS_IRQ_BASE (XICS_BUID << 12)
@@ -62,31 +39,12 @@
* (the kernel implementation supports more but we don't exploit
* that yet)
*/
-typedef struct XICSStateClass XICSStateClass;
-typedef struct XICSState XICSState;
typedef struct ICPStateClass ICPStateClass;
typedef struct ICPState ICPState;
typedef struct ICSStateClass ICSStateClass;
typedef struct ICSState ICSState;
typedef struct ICSIRQState ICSIRQState;
-
-struct XICSStateClass {
- DeviceClass parent_class;
-
- void (*cpu_setup)(XICSState *icp, PowerPCCPU *cpu);
- void (*set_nr_irqs)(XICSState *icp, uint32_t nr_irqs, Error **errp);
- void (*set_nr_servers)(XICSState *icp, uint32_t nr_servers, Error **errp);
-};
-
-struct XICSState {
- /*< private >*/
- SysBusDevice parent_obj;
- /*< public >*/
- uint32_t nr_servers;
- uint32_t nr_irqs;
- ICPState *ss;
- QLIST_HEAD(, ICSState) ics;
-};
+typedef struct XICSFabric XICSFabric;
#define TYPE_ICP "icp"
#define ICP(obj) OBJECT_CHECK(ICPState, (obj), TYPE_ICP)
@@ -104,6 +62,7 @@ struct ICPStateClass {
void (*pre_save)(ICPState *s);
int (*post_load)(ICPState *s, int version_id);
+ void (*cpu_setup)(ICPState *icp, PowerPCCPU *cpu);
};
struct ICPState {
@@ -118,7 +77,7 @@ struct ICPState {
qemu_irq output;
bool cap_irq_xics_enabled;
- XICSState *xics;
+ XICSFabric *xics;
};
#define TYPE_ICS_BASE "ics-base"
@@ -139,6 +98,7 @@ struct ICPState {
struct ICSStateClass {
DeviceClass parent_class;
+ void (*realize)(DeviceState *dev, Error **errp);
void (*pre_save)(ICSState *s);
int (*post_load)(ICSState *s, int version_id);
void (*reject)(ICSState *s, uint32_t irq);
@@ -154,8 +114,7 @@ struct ICSState {
uint32_t offset;
qemu_irq *qirqs;
ICSIRQState *irqs;
- XICSState *xics;
- QLIST_ENTRY(ICSState) list;
+ XICSFabric *xics;
};
static inline bool ics_valid_irq(ICSState *ics, uint32_t nr)
@@ -180,19 +139,37 @@ struct ICSIRQState {
uint8_t flags;
};
+typedef struct XICSFabric {
+ Object parent;
+} XICSFabric;
+
+#define TYPE_XICS_FABRIC "xics-fabric"
+#define XICS_FABRIC(obj) \
+ OBJECT_CHECK(XICSFabric, (obj), TYPE_XICS_FABRIC)
+#define XICS_FABRIC_CLASS(klass) \
+ OBJECT_CLASS_CHECK(XICSFabricClass, (klass), TYPE_XICS_FABRIC)
+#define XICS_FABRIC_GET_CLASS(obj) \
+ OBJECT_GET_CLASS(XICSFabricClass, (obj), TYPE_XICS_FABRIC)
+
+typedef struct XICSFabricClass {
+ InterfaceClass parent;
+ ICSState *(*ics_get)(XICSFabric *xi, int irq);
+ void (*ics_resend)(XICSFabric *xi);
+ ICPState *(*icp_get)(XICSFabric *xi, int server);
+} XICSFabricClass;
+
#define XICS_IRQS_SPAPR 1024
-qemu_irq xics_get_qirq(XICSState *icp, int irq);
-int xics_spapr_alloc(XICSState *icp, int irq_hint, bool lsi, Error **errp);
-int xics_spapr_alloc_block(XICSState *icp, int num, bool lsi, bool align,
+int spapr_ics_alloc(ICSState *ics, int irq_hint, bool lsi, Error **errp);
+int spapr_ics_alloc_block(ICSState *ics, int num, bool lsi, bool align,
Error **errp);
-void xics_spapr_free(XICSState *icp, int irq, int num);
-void spapr_dt_xics(XICSState *xics, void *fdt, uint32_t phandle);
+void spapr_ics_free(ICSState *ics, int irq, int num);
+void spapr_dt_xics(int nr_servers, void *fdt, uint32_t phandle);
-void xics_cpu_setup(XICSState *icp, PowerPCCPU *cpu);
-void xics_cpu_destroy(XICSState *icp, PowerPCCPU *cpu);
-void xics_set_nr_servers(XICSState *xics, uint32_t nr_servers,
- const char *typename, Error **errp);
+qemu_irq xics_get_qirq(XICSFabric *xi, int irq);
+ICPState *xics_icp_get(XICSFabric *xi, int server);
+void xics_cpu_setup(XICSFabric *xi, PowerPCCPU *cpu);
+void xics_cpu_destroy(XICSFabric *xi, PowerPCCPU *cpu);
/* Internal XICS interfaces */
int xics_get_cpu_index_by_dt_id(int cpu_dt_id);
@@ -207,7 +184,15 @@ void ics_simple_write_xive(ICSState *ics, int nr, int server,
uint8_t priority, uint8_t saved_priority);
void ics_set_irq_type(ICSState *ics, int srcno, bool lsi);
+void icp_pic_print_info(ICPState *icp, Monitor *mon);
+void ics_pic_print_info(ICSState *ics, Monitor *mon);
+
+void ics_resend(ICSState *ics);
+void icp_resend(ICPState *ss);
+
+typedef struct sPAPRMachineState sPAPRMachineState;
-ICSState *xics_find_source(XICSState *icp, int irq);
+int xics_kvm_init(sPAPRMachineState *spapr, Error **errp);
+int xics_spapr_init(sPAPRMachineState *spapr, Error **errp);
#endif /* XICS_H */
diff --git a/include/hw/ptimer.h b/include/hw/ptimer.h
index 48cccbdb51..eafc3f0a86 100644
--- a/include/hw/ptimer.h
+++ b/include/hw/ptimer.h
@@ -60,6 +60,7 @@ typedef struct ptimer_state ptimer_state;
typedef void (*ptimer_cb)(void *opaque);
ptimer_state *ptimer_init(QEMUBH *bh, uint8_t policy_mask);
+void ptimer_free(ptimer_state *s);
void ptimer_set_period(ptimer_state *s, int64_t period);
void ptimer_set_freq(ptimer_state *s, uint32_t freq);
uint64_t ptimer_get_limit(ptimer_state *s);
diff --git a/include/migration/migration.h b/include/migration/migration.h
index 1735d66512..5720c884f4 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -22,6 +22,7 @@
#include "qapi-types.h"
#include "exec/cpu-common.h"
#include "qemu/coroutine_int.h"
+#include "qom/object.h"
#define QEMU_VM_FILE_MAGIC 0x5145564d
#define QEMU_VM_FILE_VERSION_COMPAT 0x00000002
@@ -92,6 +93,7 @@ struct MigrationIncomingState {
*/
QemuEvent main_thread_load_event;
+ size_t largest_page_size;
bool have_fault_thread;
QemuThread fault_thread;
QemuSemaphore fault_thread_sem;
@@ -107,6 +109,7 @@ struct MigrationIncomingState {
QEMUFile *to_src_file;
QemuMutex rp_mutex; /* We send replies from multiple threads */
void *postcopy_tmp_page;
+ void *postcopy_tmp_zero_page;
QEMUBH *bh;
@@ -313,6 +316,8 @@ int migrate_add_blocker(Error *reason, Error **errp);
*/
void migrate_del_blocker(Error *reason);
+int check_migratable(Object *obj, Error **err);
+
bool migrate_release_ram(void);
bool migrate_postcopy_ram(void);
bool migrate_zero_blocks(void);
@@ -375,6 +380,7 @@ void global_state_store_running(void);
void flush_page_queue(MigrationState *ms);
int ram_save_queue_pages(MigrationState *ms, const char *rbname,
ram_addr_t start, ram_addr_t len);
+uint64_t ram_pagesize_summary(void);
PostcopyState postcopy_state_get(void);
/* Set the state and return the old state */
diff --git a/include/migration/postcopy-ram.h b/include/migration/postcopy-ram.h
index b6a7491f2d..8e036b95a2 100644
--- a/include/migration/postcopy-ram.h
+++ b/include/migration/postcopy-ram.h
@@ -35,13 +35,6 @@ int postcopy_ram_incoming_init(MigrationIncomingState *mis, size_t ram_pages);
int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis);
/*
- * Discard the contents of 'length' bytes from 'start'
- * We can assume that if we've been called postcopy_ram_hosttest returned true
- */
-int postcopy_ram_discard_range(MigrationIncomingState *mis, uint8_t *start,
- size_t length);
-
-/*
* Userfault requires us to mark RAM as NOHUGEPAGE prior to discard
* however leaving it until after precopy means that most of the precopy
* data is still THPd
@@ -81,13 +74,15 @@ void postcopy_discard_send_finish(MigrationState *ms,
* to use other postcopy_ routines to allocate.
* returns 0 on success
*/
-int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from);
+int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from,
+ size_t pagesize);
/*
* Place a zero page at (host) atomically
* returns 0 on success
*/
-int postcopy_place_page_zero(MigrationIncomingState *mis, void *host);
+int postcopy_place_page_zero(MigrationIncomingState *mis, void *host,
+ size_t pagesize);
/*
* Allocate a page of memory that can be mapped at a later point in time
diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h
index 63e7b02e05..f2dbf8410a 100644
--- a/include/migration/vmstate.h
+++ b/include/migration/vmstate.h
@@ -253,6 +253,10 @@ extern const VMStateInfo vmstate_info_uint16;
extern const VMStateInfo vmstate_info_uint32;
extern const VMStateInfo vmstate_info_uint64;
+/** Put this in the stream when migrating a null pointer.*/
+#define VMS_NULLPTR_MARKER (0x30U) /* '0' */
+extern const VMStateInfo vmstate_info_nullptr;
+
extern const VMStateInfo vmstate_info_float64;
extern const VMStateInfo vmstate_info_cpudouble;
diff --git a/include/qemu-io.h b/include/qemu-io.h
index 4d402b9b01..196fde0f3a 100644
--- a/include/qemu-io.h
+++ b/include/qemu-io.h
@@ -36,6 +36,7 @@ typedef struct cmdinfo {
const char *args;
const char *oneline;
helpfunc_t help;
+ uint64_t perm;
} cmdinfo_t;
extern bool qemuio_misalign;
diff --git a/include/qemu/timer.h b/include/qemu/timer.h
index 9abed51ae8..26e628584c 100644
--- a/include/qemu/timer.h
+++ b/include/qemu/timer.h
@@ -610,7 +610,10 @@ void timer_deinit(QEMUTimer *ts);
*
* Free a timer (it must not be on the active list)
*/
-void timer_free(QEMUTimer *ts);
+static inline void timer_free(QEMUTimer *ts)
+{
+ g_free(ts);
+}
/**
* timer_del:
diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h
index f365a51acf..096c17fce0 100644
--- a/include/sysemu/block-backend.h
+++ b/include/sysemu/block-backend.h
@@ -34,7 +34,7 @@ typedef struct BlockDevOps {
* changes. Sure would be useful if it did.
* Device models with removable media must implement this callback.
*/
- void (*change_media_cb)(void *opaque, bool load);
+ void (*change_media_cb)(void *opaque, bool load, Error **errp);
/*
* Runs when an eject request is issued from the monitor, the tray
* is closed, and the medium is locked.
@@ -84,7 +84,7 @@ typedef struct BlockBackendPublic {
QLIST_ENTRY(BlockBackendPublic) round_robin;
} BlockBackendPublic;
-BlockBackend *blk_new(void);
+BlockBackend *blk_new(uint64_t perm, uint64_t shared_perm);
BlockBackend *blk_new_open(const char *filename, const char *reference,
QDict *options, int flags, Error **errp);
int blk_get_refcnt(BlockBackend *blk);
@@ -102,9 +102,12 @@ BlockBackend *blk_by_public(BlockBackendPublic *public);
BlockDriverState *blk_bs(BlockBackend *blk);
void blk_remove_bs(BlockBackend *blk);
-void blk_insert_bs(BlockBackend *blk, BlockDriverState *bs);
+int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp);
bool bdrv_has_blk(BlockDriverState *bs);
bool bdrv_is_root_node(BlockDriverState *bs);
+int blk_set_perm(BlockBackend *blk, uint64_t perm, uint64_t shared_perm,
+ Error **errp);
+void blk_get_perm(BlockBackend *blk, uint64_t *perm, uint64_t *shared_perm);
void blk_set_allow_write_beyond_eof(BlockBackend *blk, bool allow);
void blk_iostatus_enable(BlockBackend *blk);
diff --git a/include/sysemu/replay.h b/include/sysemu/replay.h
index 7aad20b07f..f1c0712795 100644
--- a/include/sysemu/replay.h
+++ b/include/sysemu/replay.h
@@ -152,6 +152,13 @@ void replay_unregister_net(ReplayNetState *rns);
void replay_net_packet_event(ReplayNetState *rns, unsigned flags,
const struct iovec *iov, int iovcnt);
+/* Audio */
+
+/*! Saves/restores number of played samples of audio out operation. */
+void replay_audio_out(int *played);
+/*! Saves/restores recorded samples of audio in operation. */
+void replay_audio_in(int *recorded, void *samples, int *wpos, int size);
+
/* VM state operations */
/*! Called at the start of execution.
diff --git a/migration/block.c b/migration/block.c
index ebc10e628d..1941bc2402 100644
--- a/migration/block.c
+++ b/migration/block.c
@@ -379,7 +379,7 @@ static void unset_dirty_tracking(void)
}
}
-static void init_blk_migration(QEMUFile *f)
+static int init_blk_migration(QEMUFile *f)
{
BlockDriverState *bs;
BlkMigDevState *bmds;
@@ -390,6 +390,8 @@ static void init_blk_migration(QEMUFile *f)
BlkMigDevState *bmds;
BlockDriverState *bs;
} *bmds_bs;
+ Error *local_err = NULL;
+ int ret;
block_mig_state.submitted = 0;
block_mig_state.read_done = 0;
@@ -411,11 +413,12 @@ static void init_blk_migration(QEMUFile *f)
sectors = bdrv_nb_sectors(bs);
if (sectors <= 0) {
+ ret = sectors;
goto out;
}
bmds = g_new0(BlkMigDevState, 1);
- bmds->blk = blk_new();
+ bmds->blk = blk_new(BLK_PERM_CONSISTENT_READ, BLK_PERM_ALL);
bmds->blk_name = g_strdup(bdrv_get_device_name(bs));
bmds->bulk_completed = 0;
bmds->total_sectors = sectors;
@@ -445,7 +448,11 @@ static void init_blk_migration(QEMUFile *f)
BlockDriverState *bs = bmds_bs[i].bs;
if (bmds) {
- blk_insert_bs(bmds->blk, bs);
+ ret = blk_insert_bs(bmds->blk, bs, &local_err);
+ if (ret < 0) {
+ error_report_err(local_err);
+ goto out;
+ }
alloc_aio_bitmap(bmds);
error_setg(&bmds->blocker, "block device is in use by migration");
@@ -453,8 +460,10 @@ static void init_blk_migration(QEMUFile *f)
}
}
+ ret = 0;
out:
g_free(bmds_bs);
+ return ret;
}
/* Called with no lock taken. */
@@ -705,7 +714,11 @@ static int block_save_setup(QEMUFile *f, void *opaque)
block_mig_state.submitted, block_mig_state.transferred);
qemu_mutex_lock_iothread();
- init_blk_migration(f);
+ ret = init_blk_migration(f);
+ if (ret < 0) {
+ qemu_mutex_unlock_iothread();
+ return ret;
+ }
/* start track dirty blocks */
ret = set_dirty_tracking();
diff --git a/migration/migration.c b/migration/migration.c
index c6ae69d371..3dab6845b1 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -49,6 +49,10 @@
* for sending the last part */
#define DEFAULT_MIGRATE_SET_DOWNTIME 300
+/* Maximum migrate downtime set to 2000 seconds */
+#define MAX_MIGRATE_DOWNTIME_SECONDS 2000
+#define MAX_MIGRATE_DOWNTIME (MAX_MIGRATE_DOWNTIME_SECONDS * 1000)
+
/* Default compression thread count */
#define DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT 8
/* Default decompression thread count, usually decompression is at
@@ -383,6 +387,7 @@ static void process_incoming_migration_co(void *opaque)
int ret;
mis->from_src_file = f;
+ mis->largest_page_size = qemu_ram_pagesize_largest();
postcopy_state_set(POSTCOPY_INCOMING_NONE);
migrate_set_state(&mis->state, MIGRATION_STATUS_NONE,
MIGRATION_STATUS_ACTIVE);
@@ -843,10 +848,11 @@ void qmp_migrate_set_parameters(MigrationParameters *params, Error **errp)
return;
}
if (params->has_downtime_limit &&
- (params->downtime_limit < 0 || params->downtime_limit > 2000000)) {
- error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
- "downtime_limit",
- "an integer in the range of 0 to 2000000 milliseconds");
+ (params->downtime_limit < 0 ||
+ params->downtime_limit > MAX_MIGRATE_DOWNTIME)) {
+ error_setg(errp, "Parameter 'downtime_limit' expects an integer in "
+ "the range of 0 to %d milliseconds",
+ MAX_MIGRATE_DOWNTIME);
return;
}
if (params->has_x_checkpoint_delay && (params->x_checkpoint_delay < 0)) {
@@ -1145,6 +1151,21 @@ void migrate_del_blocker(Error *reason)
migration_blockers = g_slist_remove(migration_blockers, reason);
}
+int check_migratable(Object *obj, Error **err)
+{
+ DeviceClass *dc = DEVICE_GET_CLASS(obj);
+ if (only_migratable && dc->vmsd) {
+ if (dc->vmsd->unmigratable) {
+ error_setg(err, "Device %s is not migratable, but "
+ "--only-migratable was specified",
+ object_get_typename(obj));
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
void qmp_migrate_incoming(const char *uri, Error **errp)
{
Error *local_err = NULL;
@@ -1289,6 +1310,13 @@ void qmp_migrate_set_speed(int64_t value, Error **errp)
void qmp_migrate_set_downtime(double value, Error **errp)
{
+ if (value < 0 || value > MAX_MIGRATE_DOWNTIME_SECONDS) {
+ error_setg(errp, "Parameter 'downtime_limit' expects an integer in "
+ "the range of 0 to %d seconds",
+ MAX_MIGRATE_DOWNTIME_SECONDS);
+ return;
+ }
+
value *= 1000; /* Convert to milliseconds */
value = MAX(0, MIN(INT64_MAX, value));
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index a40dddbaf6..effbeb64fb 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -81,25 +81,18 @@ static bool ufd_version_check(int ufd)
return false;
}
- return true;
-}
-
-/*
- * Check for things that postcopy won't support; returns 0 if the block
- * is fine.
- */
-static int check_range(const char *block_name, void *host_addr,
- ram_addr_t offset, ram_addr_t length, void *opaque)
-{
- RAMBlock *rb = qemu_ram_block_by_name(block_name);
-
- if (qemu_ram_pagesize(rb) > getpagesize()) {
- error_report("Postcopy doesn't support large page sizes yet (%s)",
- block_name);
- return -E2BIG;
+ if (getpagesize() != ram_pagesize_summary()) {
+ bool have_hp = false;
+ /* We've got a huge page */
+#ifdef UFFD_FEATURE_MISSING_HUGETLBFS
+ have_hp = api_struct.features & UFFD_FEATURE_MISSING_HUGETLBFS;
+#endif
+ if (!have_hp) {
+ error_report("Userfault on this host does not support huge pages");
+ return false;
+ }
}
-
- return 0;
+ return true;
}
/*
@@ -122,12 +115,6 @@ bool postcopy_ram_supported_by_host(void)
goto out;
}
- /* Check for anything about the RAMBlocks we don't support */
- if (qemu_ram_foreach_block(check_range, NULL)) {
- /* check_range will have printed its own error */
- goto out;
- }
-
ufd = syscall(__NR_userfaultfd, O_CLOEXEC);
if (ufd == -1) {
error_report("%s: userfaultfd not available: %s", __func__,
@@ -200,27 +187,6 @@ out:
return ret;
}
-/**
- * postcopy_ram_discard_range: Discard a range of memory.
- * We can assume that if we've been called postcopy_ram_hosttest returned true.
- *
- * @mis: Current incoming migration state.
- * @start, @length: range of memory to discard.
- *
- * returns: 0 on success.
- */
-int postcopy_ram_discard_range(MigrationIncomingState *mis, uint8_t *start,
- size_t length)
-{
- trace_postcopy_ram_discard_range(start, length);
- if (madvise(start, length, MADV_DONTNEED)) {
- error_report("%s MADV_DONTNEED: %s", __func__, strerror(errno));
- return -1;
- }
-
- return 0;
-}
-
/*
* Setup an area of RAM so that it *can* be used for postcopy later; this
* must be done right at the start prior to pre-copy.
@@ -239,7 +205,7 @@ static int init_range(const char *block_name, void *host_addr,
* - we're going to get the copy from the source anyway.
* (Precopy will just overwrite this data, so doesn't need the discard)
*/
- if (postcopy_ram_discard_range(mis, host_addr, length)) {
+ if (ram_discard_range(mis, block_name, 0, length)) {
return -1;
}
@@ -342,9 +308,13 @@ int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis)
migrate_send_rp_shut(mis, qemu_file_get_error(mis->from_src_file) != 0);
if (mis->postcopy_tmp_page) {
- munmap(mis->postcopy_tmp_page, getpagesize());
+ munmap(mis->postcopy_tmp_page, mis->largest_page_size);
mis->postcopy_tmp_page = NULL;
}
+ if (mis->postcopy_tmp_zero_page) {
+ munmap(mis->postcopy_tmp_zero_page, mis->largest_page_size);
+ mis->postcopy_tmp_zero_page = NULL;
+ }
trace_postcopy_ram_incoming_cleanup_exit();
return 0;
}
@@ -408,6 +378,10 @@ static int ram_block_enable_notify(const char *block_name, void *host_addr,
error_report("%s userfault register: %s", __func__, strerror(errno));
return -1;
}
+ if (!(reg_struct.ioctls & ((__u64)1 << _UFFDIO_COPY))) {
+ error_report("%s userfault: Region doesn't support COPY", __func__);
+ return -1;
+ }
return 0;
}
@@ -420,7 +394,6 @@ static void *postcopy_ram_fault_thread(void *opaque)
MigrationIncomingState *mis = opaque;
struct uffd_msg msg;
int ret;
- size_t hostpagesize = getpagesize();
RAMBlock *rb = NULL;
RAMBlock *last_rb = NULL; /* last RAMBlock we sent part of */
@@ -487,7 +460,7 @@ static void *postcopy_ram_fault_thread(void *opaque)
break;
}
- rb_offset &= ~(hostpagesize - 1);
+ rb_offset &= ~(qemu_ram_pagesize(rb) - 1);
trace_postcopy_ram_fault_thread_request(msg.arg.pagefault.address,
qemu_ram_get_idstr(rb),
rb_offset);
@@ -499,11 +472,11 @@ static void *postcopy_ram_fault_thread(void *opaque)
if (rb != last_rb) {
last_rb = rb;
migrate_send_rp_req_pages(mis, qemu_ram_get_idstr(rb),
- rb_offset, hostpagesize);
+ rb_offset, qemu_ram_pagesize(rb));
} else {
/* Save some space */
migrate_send_rp_req_pages(mis, NULL,
- rb_offset, hostpagesize);
+ rb_offset, qemu_ram_pagesize(rb));
}
}
trace_postcopy_ram_fault_thread_exit();
@@ -564,13 +537,14 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis)
* Place a host page (from) at (host) atomically
* returns 0 on success
*/
-int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from)
+int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from,
+ size_t pagesize)
{
struct uffdio_copy copy_struct;
copy_struct.dst = (uint64_t)(uintptr_t)host;
copy_struct.src = (uint64_t)(uintptr_t)from;
- copy_struct.len = getpagesize();
+ copy_struct.len = pagesize;
copy_struct.mode = 0;
/* copy also acks to the kernel waking the stalled thread up
@@ -580,8 +554,8 @@ int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from)
*/
if (ioctl(mis->userfault_fd, UFFDIO_COPY, &copy_struct)) {
int e = errno;
- error_report("%s: %s copy host: %p from: %p",
- __func__, strerror(e), host, from);
+ error_report("%s: %s copy host: %p from: %p (size: %zd)",
+ __func__, strerror(e), host, from, pagesize);
return -e;
}
@@ -594,23 +568,44 @@ int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from)
* Place a zero page at (host) atomically
* returns 0 on success
*/
-int postcopy_place_page_zero(MigrationIncomingState *mis, void *host)
+int postcopy_place_page_zero(MigrationIncomingState *mis, void *host,
+ size_t pagesize)
{
- struct uffdio_zeropage zero_struct;
+ trace_postcopy_place_page_zero(host);
- zero_struct.range.start = (uint64_t)(uintptr_t)host;
- zero_struct.range.len = getpagesize();
- zero_struct.mode = 0;
+ if (pagesize == getpagesize()) {
+ struct uffdio_zeropage zero_struct;
+ zero_struct.range.start = (uint64_t)(uintptr_t)host;
+ zero_struct.range.len = getpagesize();
+ zero_struct.mode = 0;
- if (ioctl(mis->userfault_fd, UFFDIO_ZEROPAGE, &zero_struct)) {
- int e = errno;
- error_report("%s: %s zero host: %p",
- __func__, strerror(e), host);
+ if (ioctl(mis->userfault_fd, UFFDIO_ZEROPAGE, &zero_struct)) {
+ int e = errno;
+ error_report("%s: %s zero host: %p",
+ __func__, strerror(e), host);
- return -e;
+ return -e;
+ }
+ } else {
+ /* The kernel can't use UFFDIO_ZEROPAGE for hugepages */
+ if (!mis->postcopy_tmp_zero_page) {
+ mis->postcopy_tmp_zero_page = mmap(NULL, mis->largest_page_size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ -1, 0);
+ if (mis->postcopy_tmp_zero_page == MAP_FAILED) {
+ int e = errno;
+ mis->postcopy_tmp_zero_page = NULL;
+ error_report("%s: %s mapping large zero page",
+ __func__, strerror(e));
+ return -e;
+ }
+ memset(mis->postcopy_tmp_zero_page, '\0', mis->largest_page_size);
+ }
+ return postcopy_place_page(mis, host, mis->postcopy_tmp_zero_page,
+ pagesize);
}
- trace_postcopy_place_page_zero(host);
return 0;
}
@@ -625,7 +620,7 @@ int postcopy_place_page_zero(MigrationIncomingState *mis, void *host)
void *postcopy_get_tmp_page(MigrationIncomingState *mis)
{
if (!mis->postcopy_tmp_page) {
- mis->postcopy_tmp_page = mmap(NULL, getpagesize(),
+ mis->postcopy_tmp_page = mmap(NULL, mis->largest_page_size,
PROT_READ | PROT_WRITE, MAP_PRIVATE |
MAP_ANONYMOUS, -1, 0);
if (mis->postcopy_tmp_page == MAP_FAILED) {
@@ -658,13 +653,6 @@ int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis)
return -1;
}
-int postcopy_ram_discard_range(MigrationIncomingState *mis, uint8_t *start,
- size_t length)
-{
- assert(0);
- return -1;
-}
-
int postcopy_ram_prepare_discard(MigrationIncomingState *mis)
{
assert(0);
@@ -677,13 +665,15 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis)
return -1;
}
-int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from)
+int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from,
+ size_t pagesize)
{
assert(0);
return -1;
}
-int postcopy_place_page_zero(MigrationIncomingState *mis, void *host)
+int postcopy_place_page_zero(MigrationIncomingState *mis, void *host,
+ size_t pagesize)
{
assert(0);
return -1;
diff --git a/migration/ram.c b/migration/ram.c
index f289fcddd5..719425b9b8 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -600,6 +600,23 @@ static void migration_bitmap_sync_init(void)
iterations_prev = 0;
}
+/* Returns a summary bitmap of the page sizes of all RAMBlocks;
+ * for VMs with just normal pages this is equivalent to the
+ * host page size. If it's got some huge pages then it's the OR
+ * of all the different page sizes.
+ */
+uint64_t ram_pagesize_summary(void)
+{
+ RAMBlock *block;
+ uint64_t summary = 0;
+
+ QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
+ summary |= block->page_size;
+ }
+
+ return summary;
+}
+
static void migration_bitmap_sync(void)
{
RAMBlock *block;
@@ -1285,6 +1302,8 @@ static int ram_save_target_page(MigrationState *ms, QEMUFile *f,
* offset to point into the middle of a host page
* in which case the remainder of the hostpage is sent.
* Only dirty target pages are sent.
+ * Note that the host page size may be a huge page for this
+ * block.
*
* Returns: Number of pages written.
*
@@ -1303,6 +1322,8 @@ static int ram_save_host_page(MigrationState *ms, QEMUFile *f,
ram_addr_t dirty_ram_abs)
{
int tmppages, pages = 0;
+ size_t pagesize = qemu_ram_pagesize(pss->block);
+
do {
tmppages = ram_save_target_page(ms, f, pss, last_stage,
bytes_transferred, dirty_ram_abs);
@@ -1313,7 +1334,7 @@ static int ram_save_host_page(MigrationState *ms, QEMUFile *f,
pages += tmppages;
pss->offset += TARGET_PAGE_SIZE;
dirty_ram_abs += TARGET_PAGE_SIZE;
- } while (pss->offset & (qemu_host_page_size - 1));
+ } while (pss->offset & (pagesize - 1));
/* The offset we leave with is the last one we looked at */
pss->offset -= TARGET_PAGE_SIZE;
@@ -1655,12 +1676,17 @@ static void postcopy_chunk_hostpages_pass(MigrationState *ms, bool unsent_pass,
{
unsigned long *bitmap;
unsigned long *unsentmap;
- unsigned int host_ratio = qemu_host_page_size / TARGET_PAGE_SIZE;
+ unsigned int host_ratio = block->page_size / TARGET_PAGE_SIZE;
unsigned long first = block->offset >> TARGET_PAGE_BITS;
unsigned long len = block->used_length >> TARGET_PAGE_BITS;
unsigned long last = first + (len - 1);
unsigned long run_start;
+ if (block->page_size == TARGET_PAGE_SIZE) {
+ /* Easy case - TPS==HPS for a non-huge page RAMBlock */
+ return;
+ }
+
bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
unsentmap = atomic_rcu_read(&migration_bitmap_rcu)->unsentmap;
@@ -1764,7 +1790,8 @@ static void postcopy_chunk_hostpages_pass(MigrationState *ms, bool unsent_pass,
* Utility for the outgoing postcopy code.
*
* Discard any partially sent host-page size chunks, mark any partially
- * dirty host-page size chunks as all dirty.
+ * dirty host-page size chunks as all dirty. In this case the host-page
+ * is the host-page for the particular RAMBlock, i.e. it might be a huge page
*
* Returns: 0 on success
*/
@@ -1772,11 +1799,6 @@ static int postcopy_chunk_hostpages(MigrationState *ms)
{
struct RAMBlock *block;
- if (qemu_host_page_size == TARGET_PAGE_SIZE) {
- /* Easy case - TPS==HPS - nothing to be done */
- return 0;
- }
-
/* Easiest way to make sure we don't resume in the middle of a host-page */
last_seen_block = NULL;
last_sent_block = NULL;
@@ -1832,7 +1854,7 @@ int ram_postcopy_send_discard_bitmap(MigrationState *ms)
return -EINVAL;
}
- /* Deal with TPS != HPS */
+ /* Deal with TPS != HPS and huge pages */
ret = postcopy_chunk_hostpages(ms);
if (ret) {
rcu_read_unlock();
@@ -1872,6 +1894,8 @@ int ram_discard_range(MigrationIncomingState *mis,
{
int ret = -1;
+ trace_ram_discard_range(block_name, start, length);
+
rcu_read_lock();
RAMBlock *rb = qemu_ram_block_by_name(block_name);
@@ -1881,27 +1905,7 @@ int ram_discard_range(MigrationIncomingState *mis,
goto err;
}
- uint8_t *host_startaddr = rb->host + start;
-
- if ((uintptr_t)host_startaddr & (qemu_host_page_size - 1)) {
- error_report("ram_discard_range: Unaligned start address: %p",
- host_startaddr);
- goto err;
- }
-
- if ((start + length) <= rb->used_length) {
- uint8_t *host_endaddr = host_startaddr + length;
- if ((uintptr_t)host_endaddr & (qemu_host_page_size - 1)) {
- error_report("ram_discard_range: Unaligned end address: %p",
- host_endaddr);
- goto err;
- }
- ret = postcopy_ram_discard_range(mis, host_startaddr, length);
- } else {
- error_report("ram_discard_range: Overrun block '%s' (%" PRIu64
- "/%zx/" RAM_ADDR_FMT")",
- block_name, start, length, rb->used_length);
- }
+ ret = ram_block_discard_range(rb, start, length);
err:
rcu_read_unlock();
@@ -2010,6 +2014,9 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
qemu_put_byte(f, strlen(block->idstr));
qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));
qemu_put_be64(f, block->used_length);
+ if (migrate_postcopy_ram() && block->page_size != qemu_host_page_size) {
+ qemu_put_be64(f, block->page_size);
+ }
}
rcu_read_unlock();
@@ -2387,7 +2394,7 @@ static int ram_load_postcopy(QEMUFile *f)
{
int flags = 0, ret = 0;
bool place_needed = false;
- bool matching_page_sizes = qemu_host_page_size == TARGET_PAGE_SIZE;
+ bool matching_page_sizes = false;
MigrationIncomingState *mis = migration_incoming_get_current();
/* Temporary page that is later 'placed' */
void *postcopy_host_page = postcopy_get_tmp_page(mis);
@@ -2399,6 +2406,7 @@ static int ram_load_postcopy(QEMUFile *f)
void *host = NULL;
void *page_buffer = NULL;
void *place_source = NULL;
+ RAMBlock *block = NULL;
uint8_t ch;
addr = qemu_get_be64(f);
@@ -2408,7 +2416,7 @@ static int ram_load_postcopy(QEMUFile *f)
trace_ram_load_postcopy_loop((uint64_t)addr, flags);
place_needed = false;
if (flags & (RAM_SAVE_FLAG_COMPRESS | RAM_SAVE_FLAG_PAGE)) {
- RAMBlock *block = ram_block_from_stream(f, flags);
+ block = ram_block_from_stream(f, flags);
host = host_from_ram_block_offset(block, addr);
if (!host) {
@@ -2416,8 +2424,11 @@ static int ram_load_postcopy(QEMUFile *f)
ret = -EINVAL;
break;
}
+ matching_page_sizes = block->page_size == TARGET_PAGE_SIZE;
/*
- * Postcopy requires that we place whole host pages atomically.
+ * Postcopy requires that we place whole host pages atomically;
+ * these may be huge pages for RAMBlocks that are backed by
+ * hugetlbfs.
* To make it atomic, the data is read into a temporary page
* that's moved into place later.
* The migration protocol uses, possibly smaller, target-pages
@@ -2425,9 +2436,9 @@ static int ram_load_postcopy(QEMUFile *f)
* of a host page in order.
*/
page_buffer = postcopy_host_page +
- ((uintptr_t)host & ~qemu_host_page_mask);
+ ((uintptr_t)host & (block->page_size - 1));
/* If all TP are zero then we can optimise the place */
- if (!((uintptr_t)host & ~qemu_host_page_mask)) {
+ if (!((uintptr_t)host & (block->page_size - 1))) {
all_zero = true;
} else {
/* not the 1st TP within the HP */
@@ -2445,7 +2456,7 @@ static int ram_load_postcopy(QEMUFile *f)
* page
*/
place_needed = (((uintptr_t)host + TARGET_PAGE_SIZE) &
- ~qemu_host_page_mask) == 0;
+ (block->page_size - 1)) == 0;
place_source = postcopy_host_page;
}
last_host = host;
@@ -2483,14 +2494,14 @@ static int ram_load_postcopy(QEMUFile *f)
if (place_needed) {
/* This gets called at the last target page in the host page */
+ void *place_dest = host + TARGET_PAGE_SIZE - block->page_size;
+
if (all_zero) {
- ret = postcopy_place_page_zero(mis,
- host + TARGET_PAGE_SIZE -
- qemu_host_page_size);
+ ret = postcopy_place_page_zero(mis, place_dest,
+ block->page_size);
} else {
- ret = postcopy_place_page(mis, host + TARGET_PAGE_SIZE -
- qemu_host_page_size,
- place_source);
+ ret = postcopy_place_page(mis, place_dest,
+ place_source, block->page_size);
}
}
if (!ret) {
@@ -2511,6 +2522,8 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
* be atomic
*/
bool postcopy_running = postcopy_state_get() >= POSTCOPY_INCOMING_LISTENING;
+ /* ADVISE is earlier, it shows the source has the postcopy capability on */
+ bool postcopy_advised = postcopy_state_get() >= POSTCOPY_INCOMING_ADVISE;
seq_iter++;
@@ -2575,6 +2588,18 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
error_report_err(local_err);
}
}
+ /* For postcopy we need to check hugepage sizes match */
+ if (postcopy_advised &&
+ block->page_size != qemu_host_page_size) {
+ uint64_t remote_page_size = qemu_get_be64(f);
+ if (remote_page_size != block->page_size) {
+ error_report("Mismatched RAM page size %s "
+ "(local) %zd != %" PRId64,
+ id, block->page_size,
+ remote_page_size);
+ ret = -EINVAL;
+ }
+ }
ram_control_load_hook(f, RAM_CONTROL_BLOCK_REG,
block->idstr);
} else {
diff --git a/migration/savevm.c b/migration/savevm.c
index 5ecd264134..3b19a4a274 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -688,6 +688,7 @@ int vmstate_register_with_alias_id(DeviceState *dev, int instance_id,
return -1;
}
+ g_free(id);
se->compat = g_new0(CompatEntry, 1);
pstrcpy(se->compat->idstr, sizeof(se->compat->idstr), vmsd->name);
@@ -869,7 +870,7 @@ int qemu_savevm_send_packaged(QEMUFile *f, const uint8_t *buf, size_t len)
void qemu_savevm_send_postcopy_advise(QEMUFile *f)
{
uint64_t tmp[2];
- tmp[0] = cpu_to_be64(getpagesize());
+ tmp[0] = cpu_to_be64(ram_pagesize_summary());
tmp[1] = cpu_to_be64(1ul << qemu_target_page_bits());
trace_qemu_savevm_send_postcopy_advise();
@@ -1276,6 +1277,11 @@ done:
status = MIGRATION_STATUS_COMPLETED;
}
migrate_set_state(&ms->state, MIGRATION_STATUS_SETUP, status);
+
+ /* f is outer parameter, it should not stay in global migration state after
+ * this function finished */
+ ms->to_dst_file = NULL;
+
return ret;
}
@@ -1346,7 +1352,7 @@ static int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis);
static int loadvm_postcopy_handle_advise(MigrationIncomingState *mis)
{
PostcopyState ps = postcopy_state_set(POSTCOPY_INCOMING_ADVISE);
- uint64_t remote_hps, remote_tps;
+ uint64_t remote_pagesize_summary, local_pagesize_summary, remote_tps;
trace_loadvm_postcopy_handle_advise();
if (ps != POSTCOPY_INCOMING_NONE) {
@@ -1359,17 +1365,27 @@ static int loadvm_postcopy_handle_advise(MigrationIncomingState *mis)
return -1;
}
- remote_hps = qemu_get_be64(mis->from_src_file);
- if (remote_hps != getpagesize()) {
+ remote_pagesize_summary = qemu_get_be64(mis->from_src_file);
+ local_pagesize_summary = ram_pagesize_summary();
+
+ if (remote_pagesize_summary != local_pagesize_summary) {
/*
- * Some combinations of mismatch are probably possible but it gets
- * a bit more complicated. In particular we need to place whole
- * host pages on the dest at once, and we need to ensure that we
- * handle dirtying to make sure we never end up sending part of
- * a hostpage on it's own.
+ * This detects two potential causes of mismatch:
+ * a) A mismatch in host page sizes
+ * Some combinations of mismatch are probably possible but it gets
+ * a bit more complicated. In particular we need to place whole
+ * host pages on the dest at once, and we need to ensure that we
+ * handle dirtying to make sure we never end up sending part of
+ * a hostpage on it's own.
+ * b) The use of different huge page sizes on source/destination
+ * a more fine grain test is performed during RAM block migration
+ * but this test here causes a nice early clear failure, and
+ * also fails when passed to an older qemu that doesn't
+ * do huge pages.
*/
- error_report("Postcopy needs matching host page sizes (s=%d d=%d)",
- (int)remote_hps, getpagesize());
+ error_report("Postcopy needs matching RAM page sizes (s=%" PRIx64
+ " d=%" PRIx64 ")",
+ remote_pagesize_summary, local_pagesize_summary);
return -1;
}
diff --git a/migration/trace-events b/migration/trace-events
index fa660e35b1..7372ce2a51 100644
--- a/migration/trace-events
+++ b/migration/trace-events
@@ -68,6 +68,7 @@ get_queued_page_not_dirty(const char *block_name, uint64_t tmp_offset, uint64_t
migration_bitmap_sync_start(void) ""
migration_bitmap_sync_end(uint64_t dirty_pages) "dirty_pages %" PRIu64
migration_throttle(void) ""
+ram_discard_range(const char *rbname, uint64_t start, size_t len) "%s: start: %" PRIx64 " %zx"
ram_load_postcopy_loop(uint64_t addr, int flags) "@%" PRIx64 " %x"
ram_postcopy_send_discard_bitmap(void) ""
ram_save_queue_pages(const char *rbname, size_t start, size_t len) "%s: start: %zx len: %zx"
@@ -176,7 +177,6 @@ rdma_start_outgoing_migration_after_rdma_source_init(void) ""
# migration/postcopy-ram.c
postcopy_discard_send_finish(const char *ramblock, int nwords, int ncmds) "%s mask words sent=%d in %d commands"
postcopy_discard_send_range(const char *ramblock, unsigned long start, unsigned long length) "%s:%lx/%lx"
-postcopy_ram_discard_range(void *start, size_t length) "%p,+%zx"
postcopy_cleanup_range(const char *ramblock, void *host_addr, size_t offset, size_t length) "%s: %p offset=%zx length=%zx"
postcopy_init_range(const char *ramblock, void *host_addr, size_t offset, size_t length) "%s: %p offset=%zx length=%zx"
postcopy_nhp_range(const char *ramblock, void *host_addr, size_t offset, size_t length) "%s: %p offset=%zx length=%zx"
diff --git a/migration/vmstate.c b/migration/vmstate.c
index b4d8ae982a..78b3cd48e7 100644
--- a/migration/vmstate.c
+++ b/migration/vmstate.c
@@ -52,29 +52,15 @@ static int vmstate_size(void *opaque, VMStateField *field)
return size;
}
-static void *vmstate_base_addr(void *opaque, VMStateField *field, bool alloc)
+static void vmstate_handle_alloc(void *ptr, VMStateField *field, void *opaque)
{
- void *base_addr = opaque + field->offset;
-
- if (field->flags & VMS_POINTER) {
- if (alloc && (field->flags & VMS_ALLOC)) {
- gsize size = 0;
- if (field->flags & VMS_VBUFFER) {
- size = vmstate_size(opaque, field);
- } else {
- int n_elems = vmstate_n_elems(opaque, field);
- if (n_elems) {
- size = n_elems * field->size;
- }
- }
- if (size) {
- *(void **)base_addr = g_malloc(size);
- }
+ if (field->flags & VMS_POINTER && field->flags & VMS_ALLOC) {
+ gsize size = vmstate_size(opaque, field);
+ size *= vmstate_n_elems(opaque, field);
+ if (size) {
+ *(void **)ptr = g_malloc(size);
}
- base_addr = *(void **)base_addr;
}
-
- return base_addr;
}
int vmstate_load_state(QEMUFile *f, const VMStateDescription *vmsd,
@@ -116,21 +102,30 @@ int vmstate_load_state(QEMUFile *f, const VMStateDescription *vmsd,
field->field_exists(opaque, version_id)) ||
(!field->field_exists &&
field->version_id <= version_id)) {
- void *base_addr = vmstate_base_addr(opaque, field, true);
+ void *first_elem = opaque + field->offset;
int i, n_elems = vmstate_n_elems(opaque, field);
int size = vmstate_size(opaque, field);
+ vmstate_handle_alloc(first_elem, field, opaque);
+ if (field->flags & VMS_POINTER) {
+ first_elem = *(void **)first_elem;
+ assert(first_elem || !n_elems);
+ }
for (i = 0; i < n_elems; i++) {
- void *addr = base_addr + size * i;
+ void *curr_elem = first_elem + size * i;
if (field->flags & VMS_ARRAY_OF_POINTER) {
- addr = *(void **)addr;
+ curr_elem = *(void **)curr_elem;
}
- if (field->flags & VMS_STRUCT) {
- ret = vmstate_load_state(f, field->vmsd, addr,
+ if (!curr_elem) {
+ /* if null pointer check placeholder and do not follow */
+ assert(field->flags & VMS_ARRAY_OF_POINTER);
+ ret = vmstate_info_nullptr.get(f, curr_elem, size, NULL);
+ } else if (field->flags & VMS_STRUCT) {
+ ret = vmstate_load_state(f, field->vmsd, curr_elem,
field->vmsd->version_id);
} else {
- ret = field->info->get(f, addr, size, field);
+ ret = field->info->get(f, curr_elem, size, field);
}
if (ret >= 0) {
ret = qemu_file_get_error(f);
@@ -321,26 +316,34 @@ void vmstate_save_state(QEMUFile *f, const VMStateDescription *vmsd,
while (field->name) {
if (!field->field_exists ||
field->field_exists(opaque, vmsd->version_id)) {
- void *base_addr = vmstate_base_addr(opaque, field, false);
+ void *first_elem = opaque + field->offset;
int i, n_elems = vmstate_n_elems(opaque, field);
int size = vmstate_size(opaque, field);
int64_t old_offset, written_bytes;
QJSON *vmdesc_loop = vmdesc;
trace_vmstate_save_state_loop(vmsd->name, field->name, n_elems);
+ if (field->flags & VMS_POINTER) {
+ first_elem = *(void **)first_elem;
+ assert(first_elem || !n_elems);
+ }
for (i = 0; i < n_elems; i++) {
- void *addr = base_addr + size * i;
+ void *curr_elem = first_elem + size * i;
vmsd_desc_field_start(vmsd, vmdesc_loop, field, i, n_elems);
old_offset = qemu_ftell_fast(f);
-
if (field->flags & VMS_ARRAY_OF_POINTER) {
- addr = *(void **)addr;
+ assert(curr_elem);
+ curr_elem = *(void **)curr_elem;
}
- if (field->flags & VMS_STRUCT) {
- vmstate_save_state(f, field->vmsd, addr, vmdesc_loop);
+ if (!curr_elem) {
+ /* if null pointer write placeholder and do not follow */
+ assert(field->flags & VMS_ARRAY_OF_POINTER);
+ vmstate_info_nullptr.put(f, curr_elem, size, NULL, NULL);
+ } else if (field->flags & VMS_STRUCT) {
+ vmstate_save_state(f, field->vmsd, curr_elem, vmdesc_loop);
} else {
- field->info->put(f, addr, size, field, vmdesc_loop);
+ field->info->put(f, curr_elem, size, field, vmdesc_loop);
}
written_bytes = qemu_ftell_fast(f) - old_offset;
@@ -752,6 +755,34 @@ const VMStateInfo vmstate_info_uint64 = {
.put = put_uint64,
};
+static int get_nullptr(QEMUFile *f, void *pv, size_t size, VMStateField *field)
+
+{
+ if (qemu_get_byte(f) == VMS_NULLPTR_MARKER) {
+ return 0;
+ }
+ error_report("vmstate: get_nullptr expected VMS_NULLPTR_MARKER");
+ return -EINVAL;
+}
+
+static int put_nullptr(QEMUFile *f, void *pv, size_t size,
+ VMStateField *field, QJSON *vmdesc)
+
+{
+ if (pv == NULL) {
+ qemu_put_byte(f, VMS_NULLPTR_MARKER);
+ return 0;
+ }
+ error_report("vmstate: put_nullptr must be called with pv == NULL");
+ return -EINVAL;
+}
+
+const VMStateInfo vmstate_info_nullptr = {
+ .name = "uint64",
+ .get = get_nullptr,
+ .put = put_nullptr,
+};
+
/* 64 bit unsigned int. See that the received value is the same than the one
in the field */
diff --git a/monitor.c b/monitor.c
index f8f4a07cfb..b68944d93c 100644
--- a/monitor.c
+++ b/monitor.c
@@ -984,8 +984,10 @@ static void qmp_unregister_commands_hack(void)
#ifndef TARGET_ARM
qmp_unregister_command("query-gic-capabilities");
#endif
-#if !defined(TARGET_S390X)
+#if !defined(TARGET_S390X) && !defined(TARGET_I386)
qmp_unregister_command("query-cpu-model-expansion");
+#endif
+#if !defined(TARGET_S390X)
qmp_unregister_command("query-cpu-model-baseline");
qmp_unregister_command("query-cpu-model-comparison");
#endif
diff --git a/nbd/server.c b/nbd/server.c
index ac92fa0727..924a1fe2db 100644
--- a/nbd/server.c
+++ b/nbd/server.c
@@ -891,9 +891,21 @@ NBDExport *nbd_export_new(BlockDriverState *bs, off_t dev_offset, off_t size,
{
BlockBackend *blk;
NBDExport *exp = g_malloc0(sizeof(NBDExport));
+ uint64_t perm;
+ int ret;
- blk = blk_new();
- blk_insert_bs(blk, bs);
+ /* Don't allow resize while the NBD server is running, otherwise we don't
+ * care what happens with the node. */
+ perm = BLK_PERM_CONSISTENT_READ;
+ if ((nbdflags & NBD_FLAG_READ_ONLY) == 0) {
+ perm |= BLK_PERM_WRITE;
+ }
+ blk = blk_new(perm, BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED |
+ BLK_PERM_WRITE | BLK_PERM_GRAPH_MOD);
+ ret = blk_insert_bs(blk, bs, errp);
+ if (ret < 0) {
+ goto fail;
+ }
blk_set_enable_write_cache(blk, !writethrough);
exp->refcount = 1;
diff --git a/pc-bios/bios-256k.bin b/pc-bios/bios-256k.bin
index 229b5af986..18666c9f2f 100644
--- a/pc-bios/bios-256k.bin
+++ b/pc-bios/bios-256k.bin
Binary files differ
diff --git a/pc-bios/bios.bin b/pc-bios/bios.bin
index 9a9b0f0106..a394411fe5 100644
--- a/pc-bios/bios.bin
+++ b/pc-bios/bios.bin
Binary files differ
diff --git a/pc-bios/openbios-ppc b/pc-bios/openbios-ppc
index 95f1167261..4869c9dcf3 100644
--- a/pc-bios/openbios-ppc
+++ b/pc-bios/openbios-ppc
Binary files differ
diff --git a/pc-bios/openbios-sparc32 b/pc-bios/openbios-sparc32
index 675968ea62..aada55e094 100644
--- a/pc-bios/openbios-sparc32
+++ b/pc-bios/openbios-sparc32
Binary files differ
diff --git a/pc-bios/openbios-sparc64 b/pc-bios/openbios-sparc64
index d4b95326fe..cf466f6a4c 100644
--- a/pc-bios/openbios-sparc64
+++ b/pc-bios/openbios-sparc64
Binary files differ
diff --git a/pc-bios/vgabios-cirrus.bin b/pc-bios/vgabios-cirrus.bin
index 9dadce2345..e6c42bd3c3 100644
--- a/pc-bios/vgabios-cirrus.bin
+++ b/pc-bios/vgabios-cirrus.bin
Binary files differ
diff --git a/pc-bios/vgabios-qxl.bin b/pc-bios/vgabios-qxl.bin
index a89725c81c..915eba7c81 100644
--- a/pc-bios/vgabios-qxl.bin
+++ b/pc-bios/vgabios-qxl.bin
Binary files differ
diff --git a/pc-bios/vgabios-stdvga.bin b/pc-bios/vgabios-stdvga.bin
index ea041412a2..40eca8c6d1 100644
--- a/pc-bios/vgabios-stdvga.bin
+++ b/pc-bios/vgabios-stdvga.bin
Binary files differ
diff --git a/pc-bios/vgabios-virtio.bin b/pc-bios/vgabios-virtio.bin
index 71e22fc868..8b3abfa003 100644
--- a/pc-bios/vgabios-virtio.bin
+++ b/pc-bios/vgabios-virtio.bin
Binary files differ
diff --git a/pc-bios/vgabios-vmware.bin b/pc-bios/vgabios-vmware.bin
index ad239cbfe8..6a90b945bd 100644
--- a/pc-bios/vgabios-vmware.bin
+++ b/pc-bios/vgabios-vmware.bin
Binary files differ
diff --git a/pc-bios/vgabios.bin b/pc-bios/vgabios.bin
index 9947c2c26f..d3ed89d94b 100644
--- a/pc-bios/vgabios.bin
+++ b/pc-bios/vgabios.bin
Binary files differ
diff --git a/qapi-schema.json b/qapi-schema.json
index 84692da9e9..fb39d1dc11 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -4274,6 +4274,15 @@
# migration-safe, but allows tooling to get an insight and work with
# model details.
#
+# Note: When a non-migration-safe CPU model is expanded in static mode, some
+# features enabled by the CPU model may be omitted, because they can't be
+# implemented by a static CPU model definition (e.g. cache info passthrough and
+# PMU passthrough in x86). If you need an accurate representation of the
+# features enabled by a non-migration-safe CPU model, use @full. If you need a
+# static representation that will keep ABI compatibility even when changing QEMU
+# version or machine-type, use @static (but keep in mind that some features may
+# be omitted).
+#
# Since: 2.8.0
##
{ 'enum': 'CpuModelExpansionType',
diff --git a/qapi/block-core.json b/qapi/block-core.json
index cf24c04242..bc0ccd615c 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -1304,6 +1304,11 @@
#
# @speed: #optional the maximum speed, in bytes per second
#
+# @filter-node-name: #optional the node name that should be assigned to the
+# filter driver that the commit job inserts into the graph
+# above @top. If this option is not given, a node name is
+# autogenerated. (Since: 2.9)
+#
# Returns: Nothing on success
# If commit or stream is already active on this device, DeviceInUse
# If @device does not exist, DeviceNotFound
@@ -1323,7 +1328,8 @@
##
{ 'command': 'block-commit',
'data': { '*job-id': 'str', 'device': 'str', '*base': 'str', '*top': 'str',
- '*backing-file': 'str', '*speed': 'int' } }
+ '*backing-file': 'str', '*speed': 'int',
+ '*filter-node-name': 'str' } }
##
# @drive-backup:
@@ -1671,6 +1677,11 @@
# default 'report' (no limitations, since this applies to
# a different block device than @device).
#
+# @filter-node-name: #optional the node name that should be assigned to the
+# filter driver that the mirror job inserts into the graph
+# above @device. If this option is not given, a node name is
+# autogenerated. (Since: 2.9)
+#
# Returns: nothing on success.
#
# Since: 2.6
@@ -1690,7 +1701,8 @@
'sync': 'MirrorSyncMode',
'*speed': 'int', '*granularity': 'uint32',
'*buf-size': 'int', '*on-source-error': 'BlockdevOnError',
- '*on-target-error': 'BlockdevOnError' } }
+ '*on-target-error': 'BlockdevOnError',
+ '*filter-node-name': 'str' } }
##
# @block_set_io_throttle:
@@ -2111,6 +2123,7 @@
# @replication: Since 2.8
# @ssh: Since 2.8
# @iscsi: Since 2.9
+# @rbd: Since 2.9
#
# Since: 2.0
##
@@ -2119,7 +2132,7 @@
'dmg', 'file', 'ftp', 'ftps', 'gluster', 'host_cdrom',
'host_device', 'http', 'https', 'iscsi', 'luks', 'nbd', 'nfs',
'null-aio', 'null-co', 'parallels', 'qcow', 'qcow2', 'qed',
- 'quorum', 'raw', 'replication', 'ssh', 'vdi', 'vhdx', 'vmdk',
+ 'quorum', 'raw', 'rbd', 'replication', 'ssh', 'vdi', 'vhdx', 'vmdk',
'vpc', 'vvfat' ] }
##
@@ -2665,6 +2678,63 @@
'*header-digest': 'IscsiHeaderDigest',
'*timeout': 'int' } }
+
+##
+# @RbdAuthSupport:
+#
+# An enumeration of RBD auth support
+#
+# Since: 2.9
+##
+{ 'enum': 'RbdAuthSupport',
+ 'data': [ 'cephx', 'none' ] }
+
+
+##
+# @RbdAuthMethod:
+#
+# An enumeration of rados auth_supported types
+#
+# Since: 2.9
+##
+{ 'struct': 'RbdAuthMethod',
+ 'data': { 'auth': 'RbdAuthSupport' } }
+
+##
+# @BlockdevOptionsRbd:
+#
+# @pool: Ceph pool name.
+#
+# @image: Image name in the Ceph pool.
+#
+# @conf: #optional path to Ceph configuration file. Values
+# in the configuration file will be overridden by
+# options specified via QAPI.
+#
+# @snapshot: #optional Ceph snapshot name.
+#
+# @user: #optional Ceph id name.
+#
+# @server: #optional Monitor host address and port. This maps
+# to the "mon_host" Ceph option.
+#
+# @auth-supported: #optional Authentication supported.
+#
+# @password-secret: #optional The ID of a QCryptoSecret object providing
+# the password for the login.
+#
+# Since: 2.9
+##
+{ 'struct': 'BlockdevOptionsRbd',
+ 'data': { 'pool': 'str',
+ 'image': 'str',
+ '*conf': 'str',
+ '*snapshot': 'str',
+ '*user': 'str',
+ '*server': ['InetSocketAddress'],
+ '*auth-supported': ['RbdAuthMethod'],
+ '*password-secret': 'str' } }
+
##
# @ReplicationMode:
#
@@ -2863,7 +2933,7 @@
'qed': 'BlockdevOptionsGenericCOWFormat',
'quorum': 'BlockdevOptionsQuorum',
'raw': 'BlockdevOptionsRaw',
-# TODO rbd: Wait for structured options
+ 'rbd': 'BlockdevOptionsRbd',
'replication':'BlockdevOptionsReplication',
# TODO sheepdog: Wait for structured options
'ssh': 'BlockdevOptionsSsh',
diff --git a/qdev-monitor.c b/qdev-monitor.c
index 549f45f066..5f2fcdfc45 100644
--- a/qdev-monitor.c
+++ b/qdev-monitor.c
@@ -29,7 +29,6 @@
#include "qemu/error-report.h"
#include "qemu/help_option.h"
#include "sysemu/block-backend.h"
-#include "migration/migration.h"
/*
* Aliases were a bad idea from the start. Let's keep them
@@ -579,14 +578,6 @@ DeviceState *qdev_device_add(QemuOpts *opts, Error **errp)
return NULL;
}
- if (only_migratable) {
- if (dc->vmsd->unmigratable) {
- error_setg(errp, "Device %s is not migratable, but "
- "--only-migratable was specified", driver);
- return NULL;
- }
- }
-
/* find bus */
path = qemu_opt_get(opts, "bus");
if (path != NULL) {
diff --git a/qemu-img-cmds.hx b/qemu-img-cmds.hx
index f054599a91..9c9702cc62 100644
--- a/qemu-img-cmds.hx
+++ b/qemu-img-cmds.hx
@@ -40,9 +40,9 @@ STEXI
ETEXI
DEF("convert", img_convert,
- "convert [--object objectdef] [--image-opts] [-c] [-p] [-q] [-n] [-f fmt] [-t cache] [-T src_cache] [-O output_fmt] [-o options] [-s snapshot_id_or_name] [-l snapshot_param] [-S sparse_size] filename [filename2 [...]] output_filename")
+ "convert [--object objectdef] [--image-opts] [-c] [-p] [-q] [-n] [-f fmt] [-t cache] [-T src_cache] [-O output_fmt] [-o options] [-s snapshot_id_or_name] [-l snapshot_param] [-S sparse_size] [-m num_coroutines] [-W] filename [filename2 [...]] output_filename")
STEXI
-@item convert [--object @var{objectdef}] [--image-opts] [-c] [-p] [-q] [-n] [-f @var{fmt}] [-t @var{cache}] [-T @var{src_cache}] [-O @var{output_fmt}] [-o @var{options}] [-s @var{snapshot_id_or_name}] [-l @var{snapshot_param}] [-S @var{sparse_size}] @var{filename} [@var{filename2} [...]] @var{output_filename}
+@item convert [--object @var{objectdef}] [--image-opts] [-c] [-p] [-q] [-n] [-f @var{fmt}] [-t @var{cache}] [-T @var{src_cache}] [-O @var{output_fmt}] [-o @var{options}] [-s @var{snapshot_id_or_name}] [-l @var{snapshot_param}] [-S @var{sparse_size}] [-m @var{num_coroutines}] [-W] @var{filename} [@var{filename2} [...]] @var{output_filename}
ETEXI
DEF("dd", img_dd,
diff --git a/qemu-img.c b/qemu-img.c
index df3aefd35a..98b836b030 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -156,6 +156,11 @@ static void QEMU_NORETURN help(void)
" kinds of errors, with a higher risk of choosing the wrong fix or\n"
" hiding corruption that has already occurred.\n"
"\n"
+ "Parameters to convert subcommand:\n"
+ " '-m' specifies how many coroutines work in parallel during the convert\n"
+ " process (defaults to 8)\n"
+ " '-W' allow to write to the target out of order rather than sequential\n"
+ "\n"
"Parameters to snapshot subcommand:\n"
" 'snapshot' is the name of the snapshot to create, apply or delete\n"
" '-a' applies a snapshot (revert disk to saved state)\n"
@@ -809,6 +814,8 @@ static void run_block_job(BlockJob *job, Error **errp)
{
AioContext *aio_context = blk_get_aio_context(job->blk);
+ /* FIXME In error cases, the job simply goes away and we access a dangling
+ * pointer below. */
aio_context_acquire(aio_context);
do {
aio_poll(aio_context, true);
@@ -830,6 +837,7 @@ static int img_commit(int argc, char **argv)
const char *filename, *fmt, *cache, *base;
BlockBackend *blk;
BlockDriverState *bs, *base_bs;
+ BlockJob *job;
bool progress = false, quiet = false, drop = false;
bool writethrough;
Error *local_err = NULL;
@@ -950,8 +958,8 @@ static int img_commit(int argc, char **argv)
aio_context = bdrv_get_aio_context(bs);
aio_context_acquire(aio_context);
commit_active_start("commit", bs, base_bs, BLOCK_JOB_DEFAULT, 0,
- BLOCKDEV_ON_ERROR_REPORT, common_block_job_cb, &cbi,
- &local_err, false);
+ BLOCKDEV_ON_ERROR_REPORT, NULL, common_block_job_cb,
+ &cbi, &local_err, false);
aio_context_release(aio_context);
if (local_err) {
goto done;
@@ -965,7 +973,8 @@ static int img_commit(int argc, char **argv)
bdrv_ref(bs);
}
- run_block_job(bs->job, &local_err);
+ job = block_job_get("commit");
+ run_block_job(job, &local_err);
if (local_err) {
goto unref_backing;
}
@@ -1462,48 +1471,61 @@ enum ImgConvertBlockStatus {
BLK_BACKING_FILE,
};
+#define MAX_COROUTINES 16
+
typedef struct ImgConvertState {
BlockBackend **src;
int64_t *src_sectors;
- int src_cur, src_num;
- int64_t src_cur_offset;
+ int src_num;
int64_t total_sectors;
int64_t allocated_sectors;
+ int64_t allocated_done;
+ int64_t sector_num;
+ int64_t wr_offs;
enum ImgConvertBlockStatus status;
int64_t sector_next_status;
BlockBackend *target;
bool has_zero_init;
bool compressed;
bool target_has_backing;
+ bool wr_in_order;
int min_sparse;
size_t cluster_sectors;
size_t buf_sectors;
+ int num_coroutines;
+ int running_coroutines;
+ Coroutine *co[MAX_COROUTINES];
+ int64_t wait_sector_num[MAX_COROUTINES];
+ CoMutex lock;
+ int ret;
} ImgConvertState;
-static void convert_select_part(ImgConvertState *s, int64_t sector_num)
+static void convert_select_part(ImgConvertState *s, int64_t sector_num,
+ int *src_cur, int64_t *src_cur_offset)
{
- assert(sector_num >= s->src_cur_offset);
- while (sector_num - s->src_cur_offset >= s->src_sectors[s->src_cur]) {
- s->src_cur_offset += s->src_sectors[s->src_cur];
- s->src_cur++;
- assert(s->src_cur < s->src_num);
+ *src_cur = 0;
+ *src_cur_offset = 0;
+ while (sector_num - *src_cur_offset >= s->src_sectors[*src_cur]) {
+ *src_cur_offset += s->src_sectors[*src_cur];
+ (*src_cur)++;
+ assert(*src_cur < s->src_num);
}
}
static int convert_iteration_sectors(ImgConvertState *s, int64_t sector_num)
{
- int64_t ret;
- int n;
+ int64_t ret, src_cur_offset;
+ int n, src_cur;
- convert_select_part(s, sector_num);
+ convert_select_part(s, sector_num, &src_cur, &src_cur_offset);
assert(s->total_sectors > sector_num);
n = MIN(s->total_sectors - sector_num, BDRV_REQUEST_MAX_SECTORS);
if (s->sector_next_status <= sector_num) {
BlockDriverState *file;
- ret = bdrv_get_block_status(blk_bs(s->src[s->src_cur]),
- sector_num - s->src_cur_offset,
+ ret = bdrv_get_block_status(blk_bs(s->src[src_cur]),
+ sector_num - src_cur_offset,
n, &n, &file);
if (ret < 0) {
return ret;
@@ -1519,8 +1541,8 @@ static int convert_iteration_sectors(ImgConvertState *s, int64_t sector_num)
/* Check block status of the backing file chain to avoid
* needlessly reading zeroes and limiting the iteration to the
* buffer size */
- ret = bdrv_get_block_status_above(blk_bs(s->src[s->src_cur]), NULL,
- sector_num - s->src_cur_offset,
+ ret = bdrv_get_block_status_above(blk_bs(s->src[src_cur]), NULL,
+ sector_num - src_cur_offset,
n, &n, &file);
if (ret < 0) {
return ret;
@@ -1558,28 +1580,34 @@ static int convert_iteration_sectors(ImgConvertState *s, int64_t sector_num)
return n;
}
-static int convert_read(ImgConvertState *s, int64_t sector_num, int nb_sectors,
- uint8_t *buf)
+static int coroutine_fn convert_co_read(ImgConvertState *s, int64_t sector_num,
+ int nb_sectors, uint8_t *buf)
{
- int n;
- int ret;
+ int n, ret;
+ QEMUIOVector qiov;
+ struct iovec iov;
assert(nb_sectors <= s->buf_sectors);
while (nb_sectors > 0) {
BlockBackend *blk;
- int64_t bs_sectors;
+ int src_cur;
+ int64_t bs_sectors, src_cur_offset;
/* In the case of compression with multiple source files, we can get a
* nb_sectors that spreads into the next part. So we must be able to
* read across multiple BDSes for one convert_read() call. */
- convert_select_part(s, sector_num);
- blk = s->src[s->src_cur];
- bs_sectors = s->src_sectors[s->src_cur];
-
- n = MIN(nb_sectors, bs_sectors - (sector_num - s->src_cur_offset));
- ret = blk_pread(blk,
- (sector_num - s->src_cur_offset) << BDRV_SECTOR_BITS,
- buf, n << BDRV_SECTOR_BITS);
+ convert_select_part(s, sector_num, &src_cur, &src_cur_offset);
+ blk = s->src[src_cur];
+ bs_sectors = s->src_sectors[src_cur];
+
+ n = MIN(nb_sectors, bs_sectors - (sector_num - src_cur_offset));
+ iov.iov_base = buf;
+ iov.iov_len = n << BDRV_SECTOR_BITS;
+ qemu_iovec_init_external(&qiov, &iov, 1);
+
+ ret = blk_co_preadv(
+ blk, (sector_num - src_cur_offset) << BDRV_SECTOR_BITS,
+ n << BDRV_SECTOR_BITS, &qiov, 0);
if (ret < 0) {
return ret;
}
@@ -1592,15 +1620,18 @@ static int convert_read(ImgConvertState *s, int64_t sector_num, int nb_sectors,
return 0;
}
-static int convert_write(ImgConvertState *s, int64_t sector_num, int nb_sectors,
- const uint8_t *buf)
+
+static int coroutine_fn convert_co_write(ImgConvertState *s, int64_t sector_num,
+ int nb_sectors, uint8_t *buf,
+ enum ImgConvertBlockStatus status)
{
int ret;
+ QEMUIOVector qiov;
+ struct iovec iov;
while (nb_sectors > 0) {
int n = nb_sectors;
-
- switch (s->status) {
+ switch (status) {
case BLK_BACKING_FILE:
/* If we have a backing file, leave clusters unallocated that are
* unallocated in the source image, so that the backing file is
@@ -1621,9 +1652,13 @@ static int convert_write(ImgConvertState *s, int64_t sector_num, int nb_sectors,
break;
}
- ret = blk_pwrite_compressed(s->target,
- sector_num << BDRV_SECTOR_BITS,
- buf, n << BDRV_SECTOR_BITS);
+ iov.iov_base = buf;
+ iov.iov_len = n << BDRV_SECTOR_BITS;
+ qemu_iovec_init_external(&qiov, &iov, 1);
+
+ ret = blk_co_pwritev(s->target, sector_num << BDRV_SECTOR_BITS,
+ n << BDRV_SECTOR_BITS, &qiov,
+ BDRV_REQ_WRITE_COMPRESSED);
if (ret < 0) {
return ret;
}
@@ -1636,8 +1671,12 @@ static int convert_write(ImgConvertState *s, int64_t sector_num, int nb_sectors,
if (!s->min_sparse ||
is_allocated_sectors_min(buf, n, &n, s->min_sparse))
{
- ret = blk_pwrite(s->target, sector_num << BDRV_SECTOR_BITS,
- buf, n << BDRV_SECTOR_BITS, 0);
+ iov.iov_base = buf;
+ iov.iov_len = n << BDRV_SECTOR_BITS;
+ qemu_iovec_init_external(&qiov, &iov, 1);
+
+ ret = blk_co_pwritev(s->target, sector_num << BDRV_SECTOR_BITS,
+ n << BDRV_SECTOR_BITS, &qiov, 0);
if (ret < 0) {
return ret;
}
@@ -1649,8 +1688,9 @@ static int convert_write(ImgConvertState *s, int64_t sector_num, int nb_sectors,
if (s->has_zero_init) {
break;
}
- ret = blk_pwrite_zeroes(s->target, sector_num << BDRV_SECTOR_BITS,
- n << BDRV_SECTOR_BITS, 0);
+ ret = blk_co_pwrite_zeroes(s->target,
+ sector_num << BDRV_SECTOR_BITS,
+ n << BDRV_SECTOR_BITS, 0);
if (ret < 0) {
return ret;
}
@@ -1665,12 +1705,122 @@ static int convert_write(ImgConvertState *s, int64_t sector_num, int nb_sectors,
return 0;
}
-static int convert_do_copy(ImgConvertState *s)
+static void coroutine_fn convert_co_do_copy(void *opaque)
{
+ ImgConvertState *s = opaque;
uint8_t *buf = NULL;
- int64_t sector_num, allocated_done;
- int ret;
- int n;
+ int ret, i;
+ int index = -1;
+
+ for (i = 0; i < s->num_coroutines; i++) {
+ if (s->co[i] == qemu_coroutine_self()) {
+ index = i;
+ break;
+ }
+ }
+ assert(index >= 0);
+
+ s->running_coroutines++;
+ buf = blk_blockalign(s->target, s->buf_sectors * BDRV_SECTOR_SIZE);
+
+ while (1) {
+ int n;
+ int64_t sector_num;
+ enum ImgConvertBlockStatus status;
+
+ qemu_co_mutex_lock(&s->lock);
+ if (s->ret != -EINPROGRESS || s->sector_num >= s->total_sectors) {
+ qemu_co_mutex_unlock(&s->lock);
+ goto out;
+ }
+ n = convert_iteration_sectors(s, s->sector_num);
+ if (n < 0) {
+ qemu_co_mutex_unlock(&s->lock);
+ s->ret = n;
+ goto out;
+ }
+ /* save current sector and allocation status to local variables */
+ sector_num = s->sector_num;
+ status = s->status;
+ if (!s->min_sparse && s->status == BLK_ZERO) {
+ n = MIN(n, s->buf_sectors);
+ }
+ /* increment global sector counter so that other coroutines can
+ * already continue reading beyond this request */
+ s->sector_num += n;
+ qemu_co_mutex_unlock(&s->lock);
+
+ if (status == BLK_DATA || (!s->min_sparse && status == BLK_ZERO)) {
+ s->allocated_done += n;
+ qemu_progress_print(100.0 * s->allocated_done /
+ s->allocated_sectors, 0);
+ }
+
+ if (status == BLK_DATA) {
+ ret = convert_co_read(s, sector_num, n, buf);
+ if (ret < 0) {
+ error_report("error while reading sector %" PRId64
+ ": %s", sector_num, strerror(-ret));
+ s->ret = ret;
+ goto out;
+ }
+ } else if (!s->min_sparse && status == BLK_ZERO) {
+ status = BLK_DATA;
+ memset(buf, 0x00, n * BDRV_SECTOR_SIZE);
+ }
+
+ if (s->wr_in_order) {
+ /* keep writes in order */
+ while (s->wr_offs != sector_num) {
+ if (s->ret != -EINPROGRESS) {
+ goto out;
+ }
+ s->wait_sector_num[index] = sector_num;
+ qemu_coroutine_yield();
+ }
+ s->wait_sector_num[index] = -1;
+ }
+
+ ret = convert_co_write(s, sector_num, n, buf, status);
+ if (ret < 0) {
+ error_report("error while writing sector %" PRId64
+ ": %s", sector_num, strerror(-ret));
+ s->ret = ret;
+ goto out;
+ }
+
+ if (s->wr_in_order) {
+ /* reenter the coroutine that might have waited
+ * for this write to complete */
+ s->wr_offs = sector_num + n;
+ for (i = 0; i < s->num_coroutines; i++) {
+ if (s->co[i] && s->wait_sector_num[i] == s->wr_offs) {
+ /*
+ * A -> B -> A cannot occur because A has
+ * s->wait_sector_num[i] == -1 during A -> B. Therefore
+ * B will never enter A during this time window.
+ */
+ qemu_coroutine_enter(s->co[i]);
+ break;
+ }
+ }
+ }
+ }
+
+out:
+ qemu_vfree(buf);
+ s->co[index] = NULL;
+ s->running_coroutines--;
+ if (!s->running_coroutines && s->ret == -EINPROGRESS) {
+ /* the convert job finished successfully */
+ s->ret = 0;
+ }
+}
+
+static int convert_do_copy(ImgConvertState *s)
+{
+ int ret, i, n;
+ int64_t sector_num = 0;
/* Check whether we have zero initialisation or can get it efficiently */
s->has_zero_init = s->min_sparse && !s->target_has_backing
@@ -1691,21 +1841,15 @@ static int convert_do_copy(ImgConvertState *s)
if (s->compressed) {
if (s->cluster_sectors <= 0 || s->cluster_sectors > s->buf_sectors) {
error_report("invalid cluster size");
- ret = -EINVAL;
- goto fail;
+ return -EINVAL;
}
s->buf_sectors = s->cluster_sectors;
}
- buf = blk_blockalign(s->target, s->buf_sectors * BDRV_SECTOR_SIZE);
- /* Calculate allocated sectors for progress */
- s->allocated_sectors = 0;
- sector_num = 0;
while (sector_num < s->total_sectors) {
n = convert_iteration_sectors(s, sector_num);
if (n < 0) {
- ret = n;
- goto fail;
+ return n;
}
if (s->status == BLK_DATA || (!s->min_sparse && s->status == BLK_ZERO))
{
@@ -1715,61 +1859,29 @@ static int convert_do_copy(ImgConvertState *s)
}
/* Do the copy */
- s->src_cur = 0;
- s->src_cur_offset = 0;
s->sector_next_status = 0;
+ s->ret = -EINPROGRESS;
- sector_num = 0;
- allocated_done = 0;
-
- while (sector_num < s->total_sectors) {
- n = convert_iteration_sectors(s, sector_num);
- if (n < 0) {
- ret = n;
- goto fail;
- }
- if (s->status == BLK_DATA || (!s->min_sparse && s->status == BLK_ZERO))
- {
- allocated_done += n;
- qemu_progress_print(100.0 * allocated_done / s->allocated_sectors,
- 0);
- }
-
- if (s->status == BLK_DATA) {
- ret = convert_read(s, sector_num, n, buf);
- if (ret < 0) {
- error_report("error while reading sector %" PRId64
- ": %s", sector_num, strerror(-ret));
- goto fail;
- }
- } else if (!s->min_sparse && s->status == BLK_ZERO) {
- n = MIN(n, s->buf_sectors);
- memset(buf, 0, n * BDRV_SECTOR_SIZE);
- s->status = BLK_DATA;
- }
-
- ret = convert_write(s, sector_num, n, buf);
- if (ret < 0) {
- error_report("error while writing sector %" PRId64
- ": %s", sector_num, strerror(-ret));
- goto fail;
- }
+ qemu_co_mutex_init(&s->lock);
+ for (i = 0; i < s->num_coroutines; i++) {
+ s->co[i] = qemu_coroutine_create(convert_co_do_copy, s);
+ s->wait_sector_num[i] = -1;
+ qemu_coroutine_enter(s->co[i]);
+ }
- sector_num += n;
+ while (s->ret == -EINPROGRESS) {
+ main_loop_wait(false);
}
- if (s->compressed) {
+ if (s->compressed && !s->ret) {
/* signal EOF to align */
ret = blk_pwrite_compressed(s->target, 0, NULL, 0);
if (ret < 0) {
- goto fail;
+ return ret;
}
}
- ret = 0;
-fail:
- qemu_vfree(buf);
- return ret;
+ return s->ret;
}
static int img_convert(int argc, char **argv)
@@ -1797,6 +1909,8 @@ static int img_convert(int argc, char **argv)
QemuOpts *sn_opts = NULL;
ImgConvertState state;
bool image_opts = false;
+ bool wr_in_order = true;
+ long num_coroutines = 8;
fmt = NULL;
out_fmt = "raw";
@@ -1812,7 +1926,7 @@ static int img_convert(int argc, char **argv)
{"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
{0, 0, 0, 0}
};
- c = getopt_long(argc, argv, "hf:O:B:ce6o:s:l:S:pt:T:qn",
+ c = getopt_long(argc, argv, "hf:O:B:ce6o:s:l:S:pt:T:qnm:W",
long_options, NULL);
if (c == -1) {
break;
@@ -1904,6 +2018,18 @@ static int img_convert(int argc, char **argv)
case 'n':
skip_create = 1;
break;
+ case 'm':
+ if (qemu_strtol(optarg, NULL, 0, &num_coroutines) ||
+ num_coroutines < 1 || num_coroutines > MAX_COROUTINES) {
+ error_report("Invalid number of coroutines. Allowed number of"
+ " coroutines is between 1 and %d", MAX_COROUTINES);
+ ret = -1;
+ goto fail_getopt;
+ }
+ break;
+ case 'W':
+ wr_in_order = false;
+ break;
case OPTION_OBJECT:
opts = qemu_opts_parse_noisily(&qemu_object_opts,
optarg, true);
@@ -1923,6 +2049,12 @@ static int img_convert(int argc, char **argv)
goto fail_getopt;
}
+ if (!wr_in_order && compress) {
+ error_report("Out of order write and compress are mutually exclusive");
+ ret = -1;
+ goto fail_getopt;
+ }
+
/* Initialize before goto out */
if (quiet) {
progress = 0;
@@ -2163,6 +2295,8 @@ static int img_convert(int argc, char **argv)
.min_sparse = min_sparse,
.cluster_sectors = cluster_sectors,
.buf_sectors = bufsectors,
+ .wr_in_order = wr_in_order,
+ .num_coroutines = num_coroutines,
};
ret = convert_do_copy(&state);
@@ -3289,7 +3423,7 @@ static int img_resize(int argc, char **argv)
qemu_opts_del(param);
blk = img_open(image_opts, filename, fmt,
- BDRV_O_RDWR, false, quiet);
+ BDRV_O_RDWR | BDRV_O_RESIZE, false, quiet);
if (!blk) {
ret = -1;
goto out;
diff --git a/qemu-img.texi b/qemu-img.texi
index 174aae38b7..c81db3e81c 100644
--- a/qemu-img.texi
+++ b/qemu-img.texi
@@ -137,6 +137,12 @@ Parameters to convert subcommand:
@item -n
Skip the creation of the target volume
+@item -m
+Number of parallel coroutines for the convert process
+@item -W
+Allow out-of-order writes to the destination. This option improves performance,
+but is only recommended for preallocated devices like host devices or other
+raw block devices.
@end table
Parameters to dd subcommand:
@@ -296,7 +302,7 @@ Error on reading data
@end table
-@item convert [-c] [-p] [-n] [-f @var{fmt}] [-t @var{cache}] [-T @var{src_cache}] [-O @var{output_fmt}] [-o @var{options}] [-s @var{snapshot_id_or_name}] [-l @var{snapshot_param}] [-S @var{sparse_size}] @var{filename} [@var{filename2} [...]] @var{output_filename}
+@item convert [-c] [-p] [-n] [-f @var{fmt}] [-t @var{cache}] [-T @var{src_cache}] [-O @var{output_fmt}] [-o @var{options}] [-s @var{snapshot_id_or_name}] [-l @var{snapshot_param}] [-m @var{num_coroutines}] [-W] [-S @var{sparse_size}] @var{filename} [@var{filename2} [...]] @var{output_filename}
Convert the disk image @var{filename} or a snapshot @var{snapshot_param}(@var{snapshot_id_or_name} is deprecated)
to disk image @var{output_filename} using format @var{output_fmt}. It can be optionally compressed (@code{-c}
@@ -326,6 +332,14 @@ skipped. This is useful for formats such as @code{rbd} if the target
volume has already been created with site specific options that cannot
be supplied through qemu-img.
+Out of order writes can be enabled with @code{-W} to improve performance.
+This is only recommended for preallocated devices like host devices or other
+raw block devices. Out of order write does not work in combination with
+creating compressed images.
+
+@var{num_coroutines} specifies how many coroutines work in parallel during
+the convert process (defaults to 8).
+
@item dd [-f @var{fmt}] [-O @var{output_fmt}] [bs=@var{block_size}] [count=@var{blocks}] [skip=@var{blocks}] if=@var{input} of=@var{output}
Dd copies from @var{input} file to @var{output} file converting it from
diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c
index 7ac1576d4c..2c48f9ce1a 100644
--- a/qemu-io-cmds.c
+++ b/qemu-io-cmds.c
@@ -83,6 +83,29 @@ static int command(BlockBackend *blk, const cmdinfo_t *ct, int argc,
}
return 0;
}
+
+ /* Request additional permissions if necessary for this command. The caller
+ * is responsible for restoring the original permissions afterwards if this
+ * is what it wants. */
+ if (ct->perm && blk_is_available(blk)) {
+ uint64_t orig_perm, orig_shared_perm;
+ blk_get_perm(blk, &orig_perm, &orig_shared_perm);
+
+ if (ct->perm & ~orig_perm) {
+ uint64_t new_perm;
+ Error *local_err = NULL;
+ int ret;
+
+ new_perm = orig_perm | ct->perm;
+
+ ret = blk_set_perm(blk, new_perm, orig_shared_perm, &local_err);
+ if (ret < 0) {
+ error_report_err(local_err);
+ return 0;
+ }
+ }
+ }
+
optind = 0;
return ct->cfunc(blk, argc, argv);
}
@@ -918,6 +941,7 @@ static const cmdinfo_t write_cmd = {
.name = "write",
.altname = "w",
.cfunc = write_f,
+ .perm = BLK_PERM_WRITE,
.argmin = 2,
.argmax = -1,
.args = "[-bcCfquz] [-P pattern] off len",
@@ -1093,6 +1117,7 @@ static int writev_f(BlockBackend *blk, int argc, char **argv);
static const cmdinfo_t writev_cmd = {
.name = "writev",
.cfunc = writev_f,
+ .perm = BLK_PERM_WRITE,
.argmin = 2,
.argmax = -1,
.args = "[-Cfq] [-P pattern] off len [len..]",
@@ -1392,6 +1417,7 @@ static int aio_write_f(BlockBackend *blk, int argc, char **argv);
static const cmdinfo_t aio_write_cmd = {
.name = "aio_write",
.cfunc = aio_write_f,
+ .perm = BLK_PERM_WRITE,
.argmin = 2,
.argmax = -1,
.args = "[-Cfiquz] [-P pattern] off len [len..]",
@@ -1556,6 +1582,7 @@ static const cmdinfo_t truncate_cmd = {
.name = "truncate",
.altname = "t",
.cfunc = truncate_f,
+ .perm = BLK_PERM_WRITE | BLK_PERM_RESIZE,
.argmin = 1,
.argmax = 1,
.args = "off",
@@ -1653,6 +1680,7 @@ static const cmdinfo_t discard_cmd = {
.name = "discard",
.altname = "d",
.cfunc = discard_f,
+ .perm = BLK_PERM_WRITE,
.argmin = 2,
.argmax = -1,
.args = "[-Cq] off len",
diff --git a/qtest.c b/qtest.c
index a6858272eb..5aa6636ca8 100644
--- a/qtest.c
+++ b/qtest.c
@@ -240,6 +240,7 @@ static void GCC_FMT_ATTR(2, 3) qtest_sendf(CharBackend *chr,
va_start(ap, fmt);
buffer = g_strdup_vprintf(fmt, ap);
qtest_send(chr, buffer);
+ g_free(buffer);
va_end(ap);
}
diff --git a/replay/Makefile.objs b/replay/Makefile.objs
index b2afd4030a..cee6539a23 100644
--- a/replay/Makefile.objs
+++ b/replay/Makefile.objs
@@ -6,3 +6,4 @@ common-obj-y += replay-input.o
common-obj-y += replay-char.o
common-obj-y += replay-snapshot.o
common-obj-y += replay-net.o
+common-obj-y += replay-audio.o \ No newline at end of file
diff --git a/replay/replay-audio.c b/replay/replay-audio.c
new file mode 100644
index 0000000000..3d837434d4
--- /dev/null
+++ b/replay/replay-audio.c
@@ -0,0 +1,79 @@
+/*
+ * replay-audio.c
+ *
+ * Copyright (c) 2010-2017 Institute for System Programming
+ * of the Russian Academy of Sciences.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/error-report.h"
+#include "sysemu/replay.h"
+#include "replay-internal.h"
+#include "sysemu/sysemu.h"
+#include "audio/audio.h"
+
+void replay_audio_out(int *played)
+{
+ if (replay_mode == REPLAY_MODE_RECORD) {
+ replay_save_instructions();
+ replay_mutex_lock();
+ replay_put_event(EVENT_AUDIO_OUT);
+ replay_put_dword(*played);
+ replay_mutex_unlock();
+ } else if (replay_mode == REPLAY_MODE_PLAY) {
+ replay_account_executed_instructions();
+ replay_mutex_lock();
+ if (replay_next_event_is(EVENT_AUDIO_OUT)) {
+ *played = replay_get_dword();
+ replay_finish_event();
+ replay_mutex_unlock();
+ } else {
+ replay_mutex_unlock();
+ error_report("Missing audio out event in the replay log");
+ abort();
+ }
+ }
+}
+
+void replay_audio_in(int *recorded, void *samples, int *wpos, int size)
+{
+ int pos;
+ uint64_t left, right;
+ if (replay_mode == REPLAY_MODE_RECORD) {
+ replay_save_instructions();
+ replay_mutex_lock();
+ replay_put_event(EVENT_AUDIO_IN);
+ replay_put_dword(*recorded);
+ replay_put_dword(*wpos);
+ for (pos = (*wpos - *recorded + size) % size ; pos != *wpos
+ ; pos = (pos + 1) % size) {
+ audio_sample_to_uint64(samples, pos, &left, &right);
+ replay_put_qword(left);
+ replay_put_qword(right);
+ }
+ replay_mutex_unlock();
+ } else if (replay_mode == REPLAY_MODE_PLAY) {
+ replay_account_executed_instructions();
+ replay_mutex_lock();
+ if (replay_next_event_is(EVENT_AUDIO_IN)) {
+ *recorded = replay_get_dword();
+ *wpos = replay_get_dword();
+ for (pos = (*wpos - *recorded + size) % size ; pos != *wpos
+ ; pos = (pos + 1) % size) {
+ left = replay_get_qword();
+ right = replay_get_qword();
+ audio_sample_from_uint64(samples, pos, left, right);
+ }
+ replay_finish_event();
+ replay_mutex_unlock();
+ } else {
+ replay_mutex_unlock();
+ error_report("Missing audio in event in the replay log");
+ abort();
+ }
+ }
+}
diff --git a/replay/replay-internal.h b/replay/replay-internal.h
index c26d0795f2..ed66ed803c 100644
--- a/replay/replay-internal.h
+++ b/replay/replay-internal.h
@@ -29,6 +29,10 @@ enum ReplayEvents {
/* for character device read all event */
EVENT_CHAR_READ_ALL,
EVENT_CHAR_READ_ALL_ERROR,
+ /* for audio out event */
+ EVENT_AUDIO_OUT,
+ /* for audio in event */
+ EVENT_AUDIO_IN,
/* for clock read/writes */
/* some of greater codes are reserved for clocks */
EVENT_CLOCK,
diff --git a/roms/openbios b/roms/openbios
-Subproject ef8a14e8afb47635c9c5f7524a52c3251827e29
+Subproject 0cd97cc904e71fbb461112f6756934ec6af890b
diff --git a/roms/seabios b/roms/seabios
-Subproject 8891697e3f7d84355420573efd98e94f1473676
+Subproject 5f4c7b13cdf9c450eb55645f4362ea58fa61b79
diff --git a/scripts/vmstate-static-checker.py b/scripts/vmstate-static-checker.py
index 14a27e7f6a..bcef7ee28e 100755
--- a/scripts/vmstate-static-checker.py
+++ b/scripts/vmstate-static-checker.py
@@ -85,6 +85,11 @@ def check_fields_match(name, s_field, d_field):
'xio3130-express-upstream-port': ['br.dev', 'parent_obj.parent_obj',
'br.dev.exp.aer_log',
'parent_obj.parent_obj.exp.aer_log'],
+ 'spapr_pci': ['dma_liobn[0]', 'mig_liobn',
+ 'mem_win_addr', 'mig_mem_win_addr',
+ 'mem_win_size', 'mig_mem_win_size',
+ 'io_win_addr', 'mig_io_win_addr',
+ 'io_win_size', 'mig_io_win_size'],
}
if not name in changed_names:
diff --git a/stubs/vmstate.c b/stubs/vmstate.c
index bbe158fe3b..6d52f29bb2 100644
--- a/stubs/vmstate.c
+++ b/stubs/vmstate.c
@@ -1,6 +1,7 @@
#include "qemu/osdep.h"
#include "qemu-common.h"
#include "migration/vmstate.h"
+#include "migration/migration.h"
const VMStateDescription vmstate_dummy = {};
@@ -19,3 +20,8 @@ void vmstate_unregister(DeviceState *dev,
void *opaque)
{
}
+
+int check_migratable(Object *obj, Error **err)
+{
+ return 0;
+}
diff --git a/target/i386/cpu-qom.h b/target/i386/cpu-qom.h
index 8cd607e9a2..c2205e6077 100644
--- a/target/i386/cpu-qom.h
+++ b/target/i386/cpu-qom.h
@@ -48,7 +48,9 @@ typedef struct X86CPUDefinition X86CPUDefinition;
* X86CPUClass:
* @cpu_def: CPU model definition
* @kvm_required: Whether CPU model requires KVM to be enabled.
+ * @ordering: Ordering on the "-cpu help" CPU model list.
* @migration_safe: See CpuDefinitionInfo::migration_safe
+ * @static_model: See CpuDefinitionInfo::static
* @parent_realize: The parent class' realize handler.
* @parent_reset: The parent class' reset handler.
*
@@ -59,11 +61,15 @@ typedef struct X86CPUClass {
CPUClass parent_class;
/*< public >*/
- /* Should be eventually replaced by subclass-specific property defaults. */
+ /* CPU definition, automatically loaded by instance_init if not NULL.
+ * Should be eventually replaced by subclass-specific property defaults.
+ */
X86CPUDefinition *cpu_def;
bool kvm_required;
+ int ordering;
bool migration_safe;
+ bool static_model;
/* Optional description of CPU model.
* If unavailable, cpu_def->model_id is used */
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index b6f157dca3..89421c893b 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -29,10 +29,16 @@
#include "qemu/option.h"
#include "qemu/config-file.h"
#include "qapi/qmp/qerror.h"
+#include "qapi/qmp/qstring.h"
+#include "qapi/qmp/qdict.h"
+#include "qapi/qmp/qbool.h"
+#include "qapi/qmp/qint.h"
+#include "qapi/qmp/qfloat.h"
#include "qapi-types.h"
#include "qapi-visit.h"
#include "qapi/visitor.h"
+#include "qom/qom-qobject.h"
#include "sysemu/arch_init.h"
#if defined(CONFIG_KVM)
@@ -1503,15 +1509,15 @@ void x86_cpu_change_kvm_default(const char *prop, const char *value)
static uint32_t x86_cpu_get_supported_feature_word(FeatureWord w,
bool migratable_only);
-#ifdef CONFIG_KVM
-
static bool lmce_supported(void)
{
- uint64_t mce_cap;
+ uint64_t mce_cap = 0;
+#ifdef CONFIG_KVM
if (kvm_ioctl(kvm_state, KVM_X86_GET_MCE_CAP_SUPPORTED, &mce_cap) < 0) {
return false;
}
+#endif
return !!(mce_cap & MCG_LMCE_P);
}
@@ -1531,51 +1537,28 @@ static int cpu_x86_fill_model_id(char *str)
return 0;
}
-static X86CPUDefinition host_cpudef;
-
-static Property host_x86_cpu_properties[] = {
+static Property max_x86_cpu_properties[] = {
DEFINE_PROP_BOOL("migratable", X86CPU, migratable, true),
DEFINE_PROP_BOOL("host-cache-info", X86CPU, cache_info_passthrough, false),
DEFINE_PROP_END_OF_LIST()
};
-/* class_init for the "host" CPU model
- *
- * This function may be called before KVM is initialized.
- */
-static void host_x86_cpu_class_init(ObjectClass *oc, void *data)
+static void max_x86_cpu_class_init(ObjectClass *oc, void *data)
{
DeviceClass *dc = DEVICE_CLASS(oc);
X86CPUClass *xcc = X86_CPU_CLASS(oc);
- uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
-
- xcc->kvm_required = true;
-
- host_cpuid(0x0, 0, &eax, &ebx, &ecx, &edx);
- x86_cpu_vendor_words2str(host_cpudef.vendor, ebx, edx, ecx);
-
- host_cpuid(0x1, 0, &eax, &ebx, &ecx, &edx);
- host_cpudef.family = ((eax >> 8) & 0x0F) + ((eax >> 20) & 0xFF);
- host_cpudef.model = ((eax >> 4) & 0x0F) | ((eax & 0xF0000) >> 12);
- host_cpudef.stepping = eax & 0x0F;
- cpu_x86_fill_model_id(host_cpudef.model_id);
+ xcc->ordering = 9;
- xcc->cpu_def = &host_cpudef;
xcc->model_description =
- "KVM processor with all supported host features "
- "(only available in KVM mode)";
-
- /* level, xlevel, xlevel2, and the feature words are initialized on
- * instance_init, because they require KVM to be initialized.
- */
+ "Enables all features supported by the accelerator in the current host";
- dc->props = host_x86_cpu_properties;
- /* Reason: host_x86_cpu_initfn() dies when !kvm_enabled() */
- dc->cannot_destroy_with_object_finalize_yet = true;
+ dc->props = max_x86_cpu_properties;
}
-static void host_x86_cpu_initfn(Object *obj)
+static void x86_cpu_load_def(X86CPU *cpu, X86CPUDefinition *def, Error **errp);
+
+static void max_x86_cpu_initfn(Object *obj)
{
X86CPU *cpu = X86_CPU(obj);
CPUX86State *env = &cpu->env;
@@ -1584,10 +1567,24 @@ static void host_x86_cpu_initfn(Object *obj)
/* We can't fill the features array here because we don't know yet if
* "migratable" is true or false.
*/
- cpu->host_features = true;
+ cpu->max_features = true;
- /* If KVM is disabled, x86_cpu_realizefn() will report an error later */
if (kvm_enabled()) {
+ X86CPUDefinition host_cpudef = { };
+ uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
+
+ host_cpuid(0x0, 0, &eax, &ebx, &ecx, &edx);
+ x86_cpu_vendor_words2str(host_cpudef.vendor, ebx, edx, ecx);
+
+ host_cpuid(0x1, 0, &eax, &ebx, &ecx, &edx);
+ host_cpudef.family = ((eax >> 8) & 0x0F) + ((eax >> 20) & 0xFF);
+ host_cpudef.model = ((eax >> 4) & 0x0F) | ((eax & 0xF0000) >> 12);
+ host_cpudef.stepping = eax & 0x0F;
+
+ cpu_x86_fill_model_id(host_cpudef.model_id);
+
+ x86_cpu_load_def(cpu, &host_cpudef, &error_abort);
+
env->cpuid_min_level =
kvm_arch_get_supported_cpuid(s, 0x0, 0, R_EAX);
env->cpuid_min_xlevel =
@@ -1598,15 +1595,44 @@ static void host_x86_cpu_initfn(Object *obj)
if (lmce_supported()) {
object_property_set_bool(OBJECT(cpu), true, "lmce", &error_abort);
}
+ } else {
+ object_property_set_str(OBJECT(cpu), CPUID_VENDOR_AMD,
+ "vendor", &error_abort);
+ object_property_set_int(OBJECT(cpu), 6, "family", &error_abort);
+ object_property_set_int(OBJECT(cpu), 6, "model", &error_abort);
+ object_property_set_int(OBJECT(cpu), 3, "stepping", &error_abort);
+ object_property_set_str(OBJECT(cpu),
+ "QEMU TCG CPU version " QEMU_HW_VERSION,
+ "model-id", &error_abort);
}
object_property_set_bool(OBJECT(cpu), true, "pmu", &error_abort);
}
+static const TypeInfo max_x86_cpu_type_info = {
+ .name = X86_CPU_TYPE_NAME("max"),
+ .parent = TYPE_X86_CPU,
+ .instance_init = max_x86_cpu_initfn,
+ .class_init = max_x86_cpu_class_init,
+};
+
+#ifdef CONFIG_KVM
+
+static void host_x86_cpu_class_init(ObjectClass *oc, void *data)
+{
+ X86CPUClass *xcc = X86_CPU_CLASS(oc);
+
+ xcc->kvm_required = true;
+ xcc->ordering = 8;
+
+ xcc->model_description =
+ "KVM processor with all supported host features "
+ "(only available in KVM mode)";
+}
+
static const TypeInfo host_x86_cpu_type_info = {
.name = X86_CPU_TYPE_NAME("host"),
- .parent = TYPE_X86_CPU,
- .instance_init = host_x86_cpu_initfn,
+ .parent = X86_CPU_TYPE_NAME("max"),
.class_init = host_x86_cpu_class_init,
};
@@ -2060,7 +2086,7 @@ static void x86_cpu_parse_featurestr(const char *typename, char *features,
}
}
-static void x86_cpu_load_features(X86CPU *cpu, Error **errp);
+static void x86_cpu_expand_features(X86CPU *cpu, Error **errp);
static int x86_cpu_filter_features(X86CPU *cpu);
/* Check for missing features that may prevent the CPU class from
@@ -2083,9 +2109,9 @@ static void x86_cpu_class_check_missing_features(X86CPUClass *xcc,
xc = X86_CPU(object_new(object_class_get_name(OBJECT_CLASS(xcc))));
- x86_cpu_load_features(xc, &err);
+ x86_cpu_expand_features(xc, &err);
if (err) {
- /* Errors at x86_cpu_load_features should never happen,
+ /* Errors at x86_cpu_expand_features should never happen,
* but in case it does, just report the model as not
* runnable at all using the "type" property.
*/
@@ -2128,7 +2154,7 @@ static void listflags(FILE *f, fprintf_function print, const char **featureset)
}
}
-/* Sort alphabetically by type name, listing kvm_required models last. */
+/* Sort alphabetically by type name, respecting X86CPUClass::ordering. */
static gint x86_cpu_list_compare(gconstpointer a, gconstpointer b)
{
ObjectClass *class_a = (ObjectClass *)a;
@@ -2137,9 +2163,8 @@ static gint x86_cpu_list_compare(gconstpointer a, gconstpointer b)
X86CPUClass *cc_b = X86_CPU_CLASS(class_b);
const char *name_a, *name_b;
- if (cc_a->kvm_required != cc_b->kvm_required) {
- /* kvm_required items go last */
- return cc_a->kvm_required ? 1 : -1;
+ if (cc_a->ordering != cc_b->ordering) {
+ return cc_a->ordering - cc_b->ordering;
} else {
name_a = object_class_get_name(class_a);
name_b = object_class_get_name(class_b);
@@ -2161,7 +2186,7 @@ static void x86_cpu_list_entry(gpointer data, gpointer user_data)
CPUListState *s = user_data;
char *name = x86_cpu_class_get_model_name(cc);
const char *desc = cc->model_description;
- if (!desc) {
+ if (!desc && cc->cpu_def) {
desc = cc->cpu_def->model_id;
}
@@ -2210,6 +2235,7 @@ static void x86_cpu_definition_entry(gpointer data, gpointer user_data)
info->q_typename = g_strdup(object_class_get_name(oc));
info->migration_safe = cc->migration_safe;
info->has_migration_safe = true;
+ info->q_static = cc->static_model;
entry = g_malloc0(sizeof(*entry));
entry->value = info;
@@ -2247,31 +2273,6 @@ static uint32_t x86_cpu_get_supported_feature_word(FeatureWord w,
return r;
}
-/*
- * Filters CPU feature words based on host availability of each feature.
- *
- * Returns: 0 if all flags are supported by the host, non-zero otherwise.
- */
-static int x86_cpu_filter_features(X86CPU *cpu)
-{
- CPUX86State *env = &cpu->env;
- FeatureWord w;
- int rv = 0;
-
- for (w = 0; w < FEATURE_WORDS; w++) {
- uint32_t host_feat =
- x86_cpu_get_supported_feature_word(w, false);
- uint32_t requested_features = env->features[w];
- env->features[w] &= host_feat;
- cpu->filtered_features[w] = requested_features & ~env->features[w];
- if (cpu->filtered_features[w]) {
- rv = 1;
- }
- }
-
- return rv;
-}
-
static void x86_cpu_report_filtered_features(X86CPU *cpu)
{
FeatureWord w;
@@ -2293,7 +2294,7 @@ static void x86_cpu_apply_props(X86CPU *cpu, PropValue *props)
}
}
-/* Load data from X86CPUDefinition
+/* Load data from X86CPUDefinition into a X86CPU object
*/
static void x86_cpu_load_def(X86CPU *cpu, X86CPUDefinition *def, Error **errp)
{
@@ -2302,6 +2303,11 @@ static void x86_cpu_load_def(X86CPU *cpu, X86CPUDefinition *def, Error **errp)
char host_vendor[CPUID_VENDOR_SZ + 1];
FeatureWord w;
+ /*NOTE: any property set by this function should be returned by
+ * x86_cpu_static_props(), so static expansion of
+ * query-cpu-model-expansion is always complete.
+ */
+
/* CPU models only set _minimum_ values for level/xlevel: */
object_property_set_int(OBJECT(cpu), def->level, "min-level", errp);
object_property_set_int(OBJECT(cpu), def->xlevel, "min-xlevel", errp);
@@ -2346,6 +2352,212 @@ static void x86_cpu_load_def(X86CPU *cpu, X86CPUDefinition *def, Error **errp)
}
+/* Return a QDict containing keys for all properties that can be included
+ * in static expansion of CPU models. All properties set by x86_cpu_load_def()
+ * must be included in the dictionary.
+ */
+static QDict *x86_cpu_static_props(void)
+{
+ FeatureWord w;
+ int i;
+ static const char *props[] = {
+ "min-level",
+ "min-xlevel",
+ "family",
+ "model",
+ "stepping",
+ "model-id",
+ "vendor",
+ "lmce",
+ NULL,
+ };
+ static QDict *d;
+
+ if (d) {
+ return d;
+ }
+
+ d = qdict_new();
+ for (i = 0; props[i]; i++) {
+ qdict_put_obj(d, props[i], qnull());
+ }
+
+ for (w = 0; w < FEATURE_WORDS; w++) {
+ FeatureWordInfo *fi = &feature_word_info[w];
+ int bit;
+ for (bit = 0; bit < 32; bit++) {
+ if (!fi->feat_names[bit]) {
+ continue;
+ }
+ qdict_put_obj(d, fi->feat_names[bit], qnull());
+ }
+ }
+
+ return d;
+}
+
+/* Add an entry to @props dict, with the value for property. */
+static void x86_cpu_expand_prop(X86CPU *cpu, QDict *props, const char *prop)
+{
+ QObject *value = object_property_get_qobject(OBJECT(cpu), prop,
+ &error_abort);
+
+ qdict_put_obj(props, prop, value);
+}
+
+/* Convert CPU model data from X86CPU object to a property dictionary
+ * that can recreate exactly the same CPU model.
+ */
+static void x86_cpu_to_dict(X86CPU *cpu, QDict *props)
+{
+ QDict *sprops = x86_cpu_static_props();
+ const QDictEntry *e;
+
+ for (e = qdict_first(sprops); e; e = qdict_next(sprops, e)) {
+ const char *prop = qdict_entry_key(e);
+ x86_cpu_expand_prop(cpu, props, prop);
+ }
+}
+
+/* Convert CPU model data from X86CPU object to a property dictionary
+ * that can recreate exactly the same CPU model, including every
+ * writeable QOM property.
+ */
+static void x86_cpu_to_dict_full(X86CPU *cpu, QDict *props)
+{
+ ObjectPropertyIterator iter;
+ ObjectProperty *prop;
+
+ object_property_iter_init(&iter, OBJECT(cpu));
+ while ((prop = object_property_iter_next(&iter))) {
+ /* skip read-only or write-only properties */
+ if (!prop->get || !prop->set) {
+ continue;
+ }
+
+ /* "hotplugged" is the only property that is configurable
+ * on the command-line but will be set differently on CPUs
+ * created using "-cpu ... -smp ..." and by CPUs created
+ * on the fly by x86_cpu_from_model() for querying. Skip it.
+ */
+ if (!strcmp(prop->name, "hotplugged")) {
+ continue;
+ }
+ x86_cpu_expand_prop(cpu, props, prop->name);
+ }
+}
+
+static void object_apply_props(Object *obj, QDict *props, Error **errp)
+{
+ const QDictEntry *prop;
+ Error *err = NULL;
+
+ for (prop = qdict_first(props); prop; prop = qdict_next(props, prop)) {
+ object_property_set_qobject(obj, qdict_entry_value(prop),
+ qdict_entry_key(prop), &err);
+ if (err) {
+ break;
+ }
+ }
+
+ error_propagate(errp, err);
+}
+
+/* Create X86CPU object according to model+props specification */
+static X86CPU *x86_cpu_from_model(const char *model, QDict *props, Error **errp)
+{
+ X86CPU *xc = NULL;
+ X86CPUClass *xcc;
+ Error *err = NULL;
+
+ xcc = X86_CPU_CLASS(cpu_class_by_name(TYPE_X86_CPU, model));
+ if (xcc == NULL) {
+ error_setg(&err, "CPU model '%s' not found", model);
+ goto out;
+ }
+
+ xc = X86_CPU(object_new(object_class_get_name(OBJECT_CLASS(xcc))));
+ if (props) {
+ object_apply_props(OBJECT(xc), props, &err);
+ if (err) {
+ goto out;
+ }
+ }
+
+ x86_cpu_expand_features(xc, &err);
+ if (err) {
+ goto out;
+ }
+
+out:
+ if (err) {
+ error_propagate(errp, err);
+ object_unref(OBJECT(xc));
+ xc = NULL;
+ }
+ return xc;
+}
+
+CpuModelExpansionInfo *
+arch_query_cpu_model_expansion(CpuModelExpansionType type,
+ CpuModelInfo *model,
+ Error **errp)
+{
+ X86CPU *xc = NULL;
+ Error *err = NULL;
+ CpuModelExpansionInfo *ret = g_new0(CpuModelExpansionInfo, 1);
+ QDict *props = NULL;
+ const char *base_name;
+
+ xc = x86_cpu_from_model(model->name,
+ model->has_props ?
+ qobject_to_qdict(model->props) :
+ NULL, &err);
+ if (err) {
+ goto out;
+ }
+
+ props = qdict_new();
+
+ switch (type) {
+ case CPU_MODEL_EXPANSION_TYPE_STATIC:
+ /* Static expansion will be based on "base" only */
+ base_name = "base";
+ x86_cpu_to_dict(xc, props);
+ break;
+ case CPU_MODEL_EXPANSION_TYPE_FULL:
+ /* As we don't return every single property, full expansion needs
+ * to keep the original model name+props, and add extra
+ * properties on top of that.
+ */
+ base_name = model->name;
+ x86_cpu_to_dict_full(xc, props);
+ break;
+ default:
+ error_setg(&err, "Unsupportted expansion type");
+ goto out;
+ }
+
+ if (!props) {
+ props = qdict_new();
+ }
+ x86_cpu_to_dict(xc, props);
+
+ ret->model = g_new0(CpuModelInfo, 1);
+ ret->model->name = g_strdup(base_name);
+ ret->model->props = QOBJECT(props);
+ ret->model->has_props = true;
+
+out:
+ object_unref(OBJECT(xc));
+ if (err) {
+ error_propagate(errp, err);
+ qapi_free_CpuModelExpansionInfo(ret);
+ ret = NULL;
+ }
+ return ret;
+}
+
X86CPU *cpu_x86_init(const char *cpu_model)
{
return X86_CPU(cpu_generic_init(TYPE_X86_CPU, cpu_model));
@@ -3095,20 +3307,59 @@ static void x86_cpu_enable_xsave_components(X86CPU *cpu)
env->features[FEAT_XSAVE_COMP_HI] = mask >> 32;
}
-/* Load CPUID data based on configured features */
-static void x86_cpu_load_features(X86CPU *cpu, Error **errp)
+/***** Steps involved on loading and filtering CPUID data
+ *
+ * When initializing and realizing a CPU object, the steps
+ * involved in setting up CPUID data are:
+ *
+ * 1) Loading CPU model definition (X86CPUDefinition). This is
+ * implemented by x86_cpu_load_def() and should be completely
+ * transparent, as it is done automatically by instance_init.
+ * No code should need to look at X86CPUDefinition structs
+ * outside instance_init.
+ *
+ * 2) CPU expansion. This is done by realize before CPUID
+ * filtering, and will make sure host/accelerator data is
+ * loaded for CPU models that depend on host capabilities
+ * (e.g. "host"). Done by x86_cpu_expand_features().
+ *
+ * 3) CPUID filtering. This initializes extra data related to
+ * CPUID, and checks if the host supports all capabilities
+ * required by the CPU. Runnability of a CPU model is
+ * determined at this step. Done by x86_cpu_filter_features().
+ *
+ * Some operations don't require all steps to be performed.
+ * More precisely:
+ *
+ * - CPU instance creation (instance_init) will run only CPU
+ * model loading. CPU expansion can't run at instance_init-time
+ * because host/accelerator data may be not available yet.
+ * - CPU realization will perform both CPU model expansion and CPUID
+ * filtering, and return an error in case one of them fails.
+ * - query-cpu-definitions needs to run all 3 steps. It needs
+ * to run CPUID filtering, as the 'unavailable-features'
+ * field is set based on the filtering results.
+ * - The query-cpu-model-expansion QMP command only needs to run
+ * CPU model loading and CPU expansion. It should not filter
+ * any CPUID data based on host capabilities.
+ */
+
+/* Expand CPU configuration data, based on configured features
+ * and host/accelerator capabilities when appropriate.
+ */
+static void x86_cpu_expand_features(X86CPU *cpu, Error **errp)
{
CPUX86State *env = &cpu->env;
FeatureWord w;
GList *l;
Error *local_err = NULL;
- /*TODO: cpu->host_features incorrectly overwrites features
+ /*TODO: cpu->max_features incorrectly overwrites features
* set using "feat=on|off". Once we fix this, we can convert
* plus_features & minus_features to global properties
* inside x86_cpu_parse_featurestr() too.
*/
- if (cpu->host_features) {
+ if (cpu->max_features) {
for (w = 0; w < FEATURE_WORDS; w++) {
env->features[w] =
x86_cpu_get_supported_feature_word(w, cpu->migratable);
@@ -3173,6 +3424,32 @@ out:
}
}
+/*
+ * Finishes initialization of CPUID data, filters CPU feature
+ * words based on host availability of each feature.
+ *
+ * Returns: 0 if all flags are supported by the host, non-zero otherwise.
+ */
+static int x86_cpu_filter_features(X86CPU *cpu)
+{
+ CPUX86State *env = &cpu->env;
+ FeatureWord w;
+ int rv = 0;
+
+ for (w = 0; w < FEATURE_WORDS; w++) {
+ uint32_t host_feat =
+ x86_cpu_get_supported_feature_word(w, false);
+ uint32_t requested_features = env->features[w];
+ env->features[w] &= host_feat;
+ cpu->filtered_features[w] = requested_features & ~env->features[w];
+ if (cpu->filtered_features[w]) {
+ rv = 1;
+ }
+ }
+
+ return rv;
+}
+
#define IS_INTEL_CPU(env) ((env)->cpuid_vendor1 == CPUID_VENDOR_INTEL_1 && \
(env)->cpuid_vendor2 == CPUID_VENDOR_INTEL_2 && \
(env)->cpuid_vendor3 == CPUID_VENDOR_INTEL_3)
@@ -3200,7 +3477,7 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp)
return;
}
- x86_cpu_load_features(cpu, &local_err);
+ x86_cpu_expand_features(cpu, &local_err);
if (local_err) {
goto out;
}
@@ -3619,7 +3896,9 @@ static void x86_cpu_initfn(Object *obj)
object_property_add_alias(obj, "sse4_1", obj, "sse4.1", &error_abort);
object_property_add_alias(obj, "sse4_2", obj, "sse4.2", &error_abort);
- x86_cpu_load_def(cpu, xcc->cpu_def, &error_abort);
+ if (xcc->cpu_def) {
+ x86_cpu_load_def(cpu, xcc->cpu_def, &error_abort);
+ }
}
static int64_t x86_cpu_get_arch_id(CPUState *cs)
@@ -3774,6 +4053,24 @@ static const TypeInfo x86_cpu_type_info = {
.class_init = x86_cpu_common_class_init,
};
+
+/* "base" CPU model, used by query-cpu-model-expansion */
+static void x86_cpu_base_class_init(ObjectClass *oc, void *data)
+{
+ X86CPUClass *xcc = X86_CPU_CLASS(oc);
+
+ xcc->static_model = true;
+ xcc->migration_safe = true;
+ xcc->model_description = "base CPU model type with no features enabled";
+ xcc->ordering = 8;
+}
+
+static const TypeInfo x86_base_cpu_type_info = {
+ .name = X86_CPU_TYPE_NAME("base"),
+ .parent = TYPE_X86_CPU,
+ .class_init = x86_cpu_base_class_init,
+};
+
static void x86_cpu_register_types(void)
{
int i;
@@ -3782,6 +4079,8 @@ static void x86_cpu_register_types(void)
for (i = 0; i < ARRAY_SIZE(builtin_x86_defs); i++) {
x86_register_cpudef_type(&builtin_x86_defs[i]);
}
+ type_register_static(&max_x86_cpu_type_info);
+ type_register_static(&x86_base_cpu_type_info);
#ifdef CONFIG_KVM
type_register_static(&host_x86_cpu_type_info);
#endif
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 573f2aa988..12a39d590f 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -1211,7 +1211,7 @@ struct X86CPU {
bool enforce_cpuid;
bool expose_kvm;
bool migratable;
- bool host_features;
+ bool max_features; /* Enable all supported features automatically */
uint32_t apic_id;
/* Enables publishing of TSC increment and Local APIC bus frequencies to
diff --git a/target/ppc/Makefile.objs b/target/ppc/Makefile.objs
index a8c7a30cde..0057b319c0 100644
--- a/target/ppc/Makefile.objs
+++ b/target/ppc/Makefile.objs
@@ -1,8 +1,9 @@
obj-y += cpu-models.o
+obj-y += cpu.o
obj-y += translate.o
ifeq ($(CONFIG_SOFTMMU),y)
-obj-y += machine.o mmu_helper.o mmu-hash32.o monitor.o
-obj-$(TARGET_PPC64) += mmu-hash64.o arch_dump.o compat.o
+obj-y += machine.o mmu_helper.o mmu-hash32.o monitor.o arch_dump.o
+obj-$(TARGET_PPC64) += mmu-hash64.o compat.o
endif
obj-$(CONFIG_KVM) += kvm.o
obj-$(call lnot,$(CONFIG_KVM)) += kvm-stub.o
diff --git a/target/ppc/arch_dump.c b/target/ppc/arch_dump.c
index 40282a1f50..28d9cc7d79 100644
--- a/target/ppc/arch_dump.c
+++ b/target/ppc/arch_dump.c
@@ -1,5 +1,5 @@
/*
- * writing ELF notes for ppc64 arch
+ * writing ELF notes for ppc{64,} arch
*
*
* Copyright IBM, Corp. 2013
@@ -19,36 +19,48 @@
#include "sysemu/dump.h"
#include "sysemu/kvm.h"
-struct PPC64UserRegStruct {
- uint64_t gpr[32];
- uint64_t nip;
- uint64_t msr;
- uint64_t orig_gpr3;
- uint64_t ctr;
- uint64_t link;
- uint64_t xer;
- uint64_t ccr;
- uint64_t softe;
- uint64_t trap;
- uint64_t dar;
- uint64_t dsisr;
- uint64_t result;
+#ifdef TARGET_PPC64
+#define ELFCLASS ELFCLASS64
+#define cpu_to_dump_reg cpu_to_dump64
+typedef uint64_t reg_t;
+typedef Elf64_Nhdr Elf_Nhdr;
+#else
+#define ELFCLASS ELFCLASS32
+#define cpu_to_dump_reg cpu_to_dump32
+typedef uint32_t reg_t;
+typedef Elf32_Nhdr Elf_Nhdr;
+#endif /* TARGET_PPC64 */
+
+struct PPCUserRegStruct {
+ reg_t gpr[32];
+ reg_t nip;
+ reg_t msr;
+ reg_t orig_gpr3;
+ reg_t ctr;
+ reg_t link;
+ reg_t xer;
+ reg_t ccr;
+ reg_t softe;
+ reg_t trap;
+ reg_t dar;
+ reg_t dsisr;
+ reg_t result;
} QEMU_PACKED;
-struct PPC64ElfPrstatus {
+struct PPCElfPrstatus {
char pad1[112];
- struct PPC64UserRegStruct pr_reg;
- uint64_t pad2[4];
+ struct PPCUserRegStruct pr_reg;
+ reg_t pad2[4];
} QEMU_PACKED;
-struct PPC64ElfFpregset {
+struct PPCElfFpregset {
uint64_t fpr[32];
- uint64_t fpscr;
+ reg_t fpscr;
} QEMU_PACKED;
-struct PPC64ElfVmxregset {
+struct PPCElfVmxregset {
ppc_avr_t avr[32];
ppc_avr_t vscr;
union {
@@ -57,26 +69,26 @@ struct PPC64ElfVmxregset {
} vrsave;
} QEMU_PACKED;
-struct PPC64ElfVsxregset {
+struct PPCElfVsxregset {
uint64_t vsr[32];
} QEMU_PACKED;
-struct PPC64ElfSperegset {
+struct PPCElfSperegset {
uint32_t evr[32];
uint64_t spe_acc;
uint32_t spe_fscr;
} QEMU_PACKED;
typedef struct noteStruct {
- Elf64_Nhdr hdr;
+ Elf_Nhdr hdr;
char name[5];
char pad3[3];
union {
- struct PPC64ElfPrstatus prstatus;
- struct PPC64ElfFpregset fpregset;
- struct PPC64ElfVmxregset vmxregset;
- struct PPC64ElfVsxregset vsxregset;
- struct PPC64ElfSperegset speregset;
+ struct PPCElfPrstatus prstatus;
+ struct PPCElfFpregset fpregset;
+ struct PPCElfVmxregset vmxregset;
+ struct PPCElfVsxregset vsxregset;
+ struct PPCElfSperegset speregset;
} contents;
} QEMU_PACKED Note;
@@ -85,12 +97,12 @@ typedef struct NoteFuncArg {
DumpState *state;
} NoteFuncArg;
-static void ppc64_write_elf64_prstatus(NoteFuncArg *arg, PowerPCCPU *cpu)
+static void ppc_write_elf_prstatus(NoteFuncArg *arg, PowerPCCPU *cpu)
{
int i;
- uint64_t cr;
- struct PPC64ElfPrstatus *prstatus;
- struct PPC64UserRegStruct *reg;
+ reg_t cr;
+ struct PPCElfPrstatus *prstatus;
+ struct PPCUserRegStruct *reg;
Note *note = &arg->note;
DumpState *s = arg->state;
@@ -101,25 +113,25 @@ static void ppc64_write_elf64_prstatus(NoteFuncArg *arg, PowerPCCPU *cpu)
reg = &prstatus->pr_reg;
for (i = 0; i < 32; i++) {
- reg->gpr[i] = cpu_to_dump64(s, cpu->env.gpr[i]);
+ reg->gpr[i] = cpu_to_dump_reg(s, cpu->env.gpr[i]);
}
- reg->nip = cpu_to_dump64(s, cpu->env.nip);
- reg->msr = cpu_to_dump64(s, cpu->env.msr);
- reg->ctr = cpu_to_dump64(s, cpu->env.ctr);
- reg->link = cpu_to_dump64(s, cpu->env.lr);
- reg->xer = cpu_to_dump64(s, cpu_read_xer(&cpu->env));
+ reg->nip = cpu_to_dump_reg(s, cpu->env.nip);
+ reg->msr = cpu_to_dump_reg(s, cpu->env.msr);
+ reg->ctr = cpu_to_dump_reg(s, cpu->env.ctr);
+ reg->link = cpu_to_dump_reg(s, cpu->env.lr);
+ reg->xer = cpu_to_dump_reg(s, cpu_read_xer(&cpu->env));
cr = 0;
for (i = 0; i < 8; i++) {
cr |= (cpu->env.crf[i] & 15) << (4 * (7 - i));
}
- reg->ccr = cpu_to_dump64(s, cr);
+ reg->ccr = cpu_to_dump_reg(s, cr);
}
-static void ppc64_write_elf64_fpregset(NoteFuncArg *arg, PowerPCCPU *cpu)
+static void ppc_write_elf_fpregset(NoteFuncArg *arg, PowerPCCPU *cpu)
{
int i;
- struct PPC64ElfFpregset *fpregset;
+ struct PPCElfFpregset *fpregset;
Note *note = &arg->note;
DumpState *s = arg->state;
@@ -131,13 +143,13 @@ static void ppc64_write_elf64_fpregset(NoteFuncArg *arg, PowerPCCPU *cpu)
for (i = 0; i < 32; i++) {
fpregset->fpr[i] = cpu_to_dump64(s, cpu->env.fpr[i]);
}
- fpregset->fpscr = cpu_to_dump64(s, cpu->env.fpscr);
+ fpregset->fpscr = cpu_to_dump_reg(s, cpu->env.fpscr);
}
-static void ppc64_write_elf64_vmxregset(NoteFuncArg *arg, PowerPCCPU *cpu)
+static void ppc_write_elf_vmxregset(NoteFuncArg *arg, PowerPCCPU *cpu)
{
int i;
- struct PPC64ElfVmxregset *vmxregset;
+ struct PPCElfVmxregset *vmxregset;
Note *note = &arg->note;
DumpState *s = arg->state;
@@ -164,10 +176,11 @@ static void ppc64_write_elf64_vmxregset(NoteFuncArg *arg, PowerPCCPU *cpu)
}
vmxregset->vscr.u32[3] = cpu_to_dump32(s, cpu->env.vscr);
}
-static void ppc64_write_elf64_vsxregset(NoteFuncArg *arg, PowerPCCPU *cpu)
+
+static void ppc_write_elf_vsxregset(NoteFuncArg *arg, PowerPCCPU *cpu)
{
int i;
- struct PPC64ElfVsxregset *vsxregset;
+ struct PPCElfVsxregset *vsxregset;
Note *note = &arg->note;
DumpState *s = arg->state;
@@ -179,9 +192,10 @@ static void ppc64_write_elf64_vsxregset(NoteFuncArg *arg, PowerPCCPU *cpu)
vsxregset->vsr[i] = cpu_to_dump64(s, cpu->env.vsr[i]);
}
}
-static void ppc64_write_elf64_speregset(NoteFuncArg *arg, PowerPCCPU *cpu)
+
+static void ppc_write_elf_speregset(NoteFuncArg *arg, PowerPCCPU *cpu)
{
- struct PPC64ElfSperegset *speregset;
+ struct PPCElfSperegset *speregset;
Note *note = &arg->note;
DumpState *s = arg->state;
@@ -197,11 +211,11 @@ static const struct NoteFuncDescStruct {
int contents_size;
void (*note_contents_func)(NoteFuncArg *arg, PowerPCCPU *cpu);
} note_func[] = {
- {sizeof(((Note *)0)->contents.prstatus), ppc64_write_elf64_prstatus},
- {sizeof(((Note *)0)->contents.fpregset), ppc64_write_elf64_fpregset},
- {sizeof(((Note *)0)->contents.vmxregset), ppc64_write_elf64_vmxregset},
- {sizeof(((Note *)0)->contents.vsxregset), ppc64_write_elf64_vsxregset},
- {sizeof(((Note *)0)->contents.speregset), ppc64_write_elf64_speregset},
+ {sizeof(((Note *)0)->contents.prstatus), ppc_write_elf_prstatus},
+ {sizeof(((Note *)0)->contents.fpregset), ppc_write_elf_fpregset},
+ {sizeof(((Note *)0)->contents.vmxregset), ppc_write_elf_vmxregset},
+ {sizeof(((Note *)0)->contents.vsxregset), ppc_write_elf_vsxregset},
+ {sizeof(((Note *)0)->contents.speregset), ppc_write_elf_speregset},
{ 0, NULL}
};
@@ -213,8 +227,9 @@ int cpu_get_dump_info(ArchDumpInfo *info,
PowerPCCPU *cpu = POWERPC_CPU(first_cpu);
PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
- info->d_machine = EM_PPC64;
- info->d_class = ELFCLASS64;
+ info->d_machine = PPC_ELF_MACHINE;
+ info->d_class = ELFCLASS;
+
if ((*pcc->interrupts_big_endian)(cpu)) {
info->d_endian = ELFDATA2MSB;
} else {
@@ -236,25 +251,19 @@ ssize_t cpu_get_note_size(int class, int machine, int nr_cpus)
int note_head_size;
const NoteFuncDesc *nf;
- if (class != ELFCLASS64) {
- return -1;
- }
- assert(machine == EM_PPC64);
-
- note_head_size = sizeof(Elf64_Nhdr);
-
+ note_head_size = sizeof(Elf_Nhdr);
for (nf = note_func; nf->note_contents_func; nf++) {
elf_note_size = elf_note_size + note_head_size + name_size +
- nf->contents_size;
+ nf->contents_size;
}
return (elf_note_size) * nr_cpus;
}
-static int ppc64_write_all_elf64_notes(const char *note_name,
- WriteCoreDumpFunction f,
- PowerPCCPU *cpu, int id,
- void *opaque)
+static int ppc_write_all_elf_notes(const char *note_name,
+ WriteCoreDumpFunction f,
+ PowerPCCPU *cpu, int id,
+ void *opaque)
{
NoteFuncArg arg = { .state = opaque };
int ret = -1;
@@ -282,5 +291,12 @@ int ppc64_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs,
int cpuid, void *opaque)
{
PowerPCCPU *cpu = POWERPC_CPU(cs);
- return ppc64_write_all_elf64_notes("CORE", f, cpu, cpuid, opaque);
+ return ppc_write_all_elf_notes("CORE", f, cpu, cpuid, opaque);
+}
+
+int ppc32_cpu_write_elf32_note(WriteCoreDumpFunction f, CPUState *cs,
+ int cpuid, void *opaque)
+{
+ PowerPCCPU *cpu = POWERPC_CPU(cs);
+ return ppc_write_all_elf_notes("CORE", f, cpu, cpuid, opaque);
}
diff --git a/target/ppc/cpu.c b/target/ppc/cpu.c
new file mode 100644
index 0000000000..28011668e7
--- /dev/null
+++ b/target/ppc/cpu.c
@@ -0,0 +1,47 @@
+/*
+ * PowerPC CPU routines for qemu.
+ *
+ * Copyright (c) 2017 Nikunj A Dadhania, IBM Corporation.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "cpu-models.h"
+
+target_ulong cpu_read_xer(CPUPPCState *env)
+{
+ if (is_isa300(env)) {
+ return env->xer | (env->so << XER_SO) |
+ (env->ov << XER_OV) | (env->ca << XER_CA) |
+ (env->ov32 << XER_OV32) | (env->ca32 << XER_CA32);
+ }
+
+ return env->xer | (env->so << XER_SO) | (env->ov << XER_OV) |
+ (env->ca << XER_CA);
+}
+
+void cpu_write_xer(CPUPPCState *env, target_ulong xer)
+{
+ env->so = (xer >> XER_SO) & 1;
+ env->ov = (xer >> XER_OV) & 1;
+ env->ca = (xer >> XER_CA) & 1;
+ /* write all the flags, while reading back check of isa300 */
+ env->ov32 = (xer >> XER_OV32) & 1;
+ env->ca32 = (xer >> XER_CA32) & 1;
+ env->xer = xer & ~((1ul << XER_SO) |
+ (1ul << XER_OV) | (1ul << XER_CA) |
+ (1ul << XER_OV32) | (1ul << XER_CA32));
+}
diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index 425e79d52d..d33c17e646 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -223,11 +223,12 @@ enum {
typedef struct opc_handler_t opc_handler_t;
/*****************************************************************************/
-/* Types used to describe some PowerPC registers */
+/* Types used to describe some PowerPC registers etc. */
typedef struct DisasContext DisasContext;
typedef struct ppc_spr_t ppc_spr_t;
typedef union ppc_avr_t ppc_avr_t;
typedef union ppc_tlb_t ppc_tlb_t;
+typedef struct ppc_hash_pte64 ppc_hash_pte64_t;
/* SPR access micro-ops generations callbacks */
struct ppc_spr_t {
@@ -305,14 +306,6 @@ union ppc_tlb_t {
#define TLB_MAS 3
#endif
-#define SDR_32_HTABORG 0xFFFF0000UL
-#define SDR_32_HTABMASK 0x000001FFUL
-
-#if defined(TARGET_PPC64)
-#define SDR_64_HTABORG 0xFFFFFFFFFFFC0000ULL
-#define SDR_64_HTABSIZE 0x000000000000001FULL
-#endif /* defined(TARGET_PPC64 */
-
typedef struct ppc_slb_t ppc_slb_t;
struct ppc_slb_t {
uint64_t esid;
@@ -965,6 +958,8 @@ struct CPUPPCState {
target_ulong so;
target_ulong ov;
target_ulong ca;
+ target_ulong ov32;
+ target_ulong ca32;
/* Reservation address */
target_ulong reserve_addr;
/* Reservation value */
@@ -1005,12 +1000,7 @@ struct CPUPPCState {
/* tcg TLB needs flush (deferred slb inval instruction typically) */
#endif
/* segment registers */
- hwaddr htab_base;
- /* mask used to normalize hash value to PTEG index */
- hwaddr htab_mask;
target_ulong sr[32];
- /* externally stored hash table */
- uint8_t *external_htab;
/* BATs */
uint32_t nb_BATs;
target_ulong DBAT[2][8];
@@ -1218,6 +1208,14 @@ struct PPCVirtualHypervisor {
struct PPCVirtualHypervisorClass {
InterfaceClass parent;
void (*hypercall)(PPCVirtualHypervisor *vhyp, PowerPCCPU *cpu);
+ hwaddr (*hpt_mask)(PPCVirtualHypervisor *vhyp);
+ const ppc_hash_pte64_t *(*map_hptes)(PPCVirtualHypervisor *vhyp,
+ hwaddr ptex, int n);
+ void (*unmap_hptes)(PPCVirtualHypervisor *vhyp,
+ const ppc_hash_pte64_t *hptes,
+ hwaddr ptex, int n);
+ void (*store_hpte)(PPCVirtualHypervisor *vhyp, hwaddr ptex,
+ uint64_t pte0, uint64_t pte1);
};
#define TYPE_PPC_VIRTUAL_HYPERVISOR "ppc-virtual-hypervisor"
@@ -1243,6 +1241,8 @@ int ppc_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg);
int ppc_cpu_gdb_write_register_apple(CPUState *cpu, uint8_t *buf, int reg);
int ppc64_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs,
int cpuid, void *opaque);
+int ppc32_cpu_write_elf32_note(WriteCoreDumpFunction f, CPUState *cs,
+ int cpuid, void *opaque);
#ifndef CONFIG_USER_ONLY
void ppc_cpu_do_system_reset(CPUState *cs);
extern const struct VMStateDescription vmstate_ppc_cpu;
@@ -1300,8 +1300,7 @@ void store_booke_tcr (CPUPPCState *env, target_ulong val);
void store_booke_tsr (CPUPPCState *env, target_ulong val);
void ppc_tlb_invalidate_all (CPUPPCState *env);
void ppc_tlb_invalidate_one (CPUPPCState *env, target_ulong addr);
-void cpu_ppc_set_vhyp(PowerPCCPU *cpu, PPCVirtualHypervisor *vhyp);
-void cpu_ppc_set_papr(PowerPCCPU *cpu);
+void cpu_ppc_set_papr(PowerPCCPU *cpu, PPCVirtualHypervisor *vhyp);
#endif
#endif
@@ -1372,11 +1371,15 @@ int ppc_compat_max_threads(PowerPCCPU *cpu);
#define XER_SO 31
#define XER_OV 30
#define XER_CA 29
+#define XER_OV32 19
+#define XER_CA32 18
#define XER_CMP 8
#define XER_BC 0
#define xer_so (env->so)
#define xer_ov (env->ov)
#define xer_ca (env->ca)
+#define xer_ov32 (env->ov)
+#define xer_ca32 (env->ca)
#define xer_cmp ((env->xer >> XER_CMP) & 0xFF)
#define xer_bc ((env->xer >> XER_BC) & 0x7F)
@@ -2343,18 +2346,9 @@ enum {
/*****************************************************************************/
-static inline target_ulong cpu_read_xer(CPUPPCState *env)
-{
- return env->xer | (env->so << XER_SO) | (env->ov << XER_OV) | (env->ca << XER_CA);
-}
-
-static inline void cpu_write_xer(CPUPPCState *env, target_ulong xer)
-{
- env->so = (xer >> XER_SO) & 1;
- env->ov = (xer >> XER_OV) & 1;
- env->ca = (xer >> XER_CA) & 1;
- env->xer = xer & ~((1u << XER_SO) | (1u << XER_OV) | (1u << XER_CA));
-}
+#define is_isa300(ctx) (!!(ctx->insns_flags2 & PPC2_ISA300))
+target_ulong cpu_read_xer(CPUPPCState *env);
+void cpu_write_xer(CPUPPCState *env, target_ulong xer);
static inline void cpu_get_tb_cpu_state(CPUPPCState *env, target_ulong *pc,
target_ulong *cs_base, uint32_t *flags)
diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c
index dd0a8929b3..da4e1a62c9 100644
--- a/target/ppc/int_helper.c
+++ b/target/ppc/int_helper.c
@@ -28,6 +28,15 @@
/*****************************************************************************/
/* Fixed point operations helpers */
+static inline void helper_update_ov_legacy(CPUPPCState *env, int ov)
+{
+ if (unlikely(ov)) {
+ env->so = env->ov = 1;
+ } else {
+ env->ov = 0;
+ }
+}
+
target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb,
uint32_t oe)
{
@@ -49,11 +58,7 @@ target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb,
}
if (oe) {
- if (unlikely(overflow)) {
- env->so = env->ov = 1;
- } else {
- env->ov = 0;
- }
+ helper_update_ov_legacy(env, overflow);
}
return (target_ulong)rt;
@@ -81,11 +86,7 @@ target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb,
}
if (oe) {
- if (unlikely(overflow)) {
- env->so = env->ov = 1;
- } else {
- env->ov = 0;
- }
+ helper_update_ov_legacy(env, overflow);
}
return (target_ulong)rt;
@@ -105,11 +106,7 @@ uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
}
if (oe) {
- if (unlikely(overflow)) {
- env->so = env->ov = 1;
- } else {
- env->ov = 0;
- }
+ helper_update_ov_legacy(env, overflow);
}
return rt;
@@ -127,12 +124,7 @@ uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe)
}
if (oe) {
-
- if (unlikely(overflow)) {
- env->so = env->ov = 1;
- } else {
- env->ov = 0;
- }
+ helper_update_ov_legacy(env, overflow);
}
return rt;
diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
index 52bbea514a..acc40ece65 100644
--- a/target/ppc/kvm.c
+++ b/target/ppc/kvm.c
@@ -1251,7 +1251,7 @@ static int kvmppc_get_books_sregs(PowerPCCPU *cpu)
return ret;
}
- if (!env->external_htab) {
+ if (!cpu->vhyp) {
ppc_store_sdr1(env, sregs.u.s.sdr1);
}
@@ -2596,89 +2596,85 @@ void kvm_arch_init_irq_routing(KVMState *s)
{
}
-struct kvm_get_htab_buf {
- struct kvm_get_htab_header header;
- /*
- * We require one extra byte for read
- */
- target_ulong hpte[(HPTES_PER_GROUP * 2) + 1];
-};
-
-uint64_t kvmppc_hash64_read_pteg(PowerPCCPU *cpu, target_ulong pte_index)
+void kvmppc_read_hptes(ppc_hash_pte64_t *hptes, hwaddr ptex, int n)
{
- int htab_fd;
- struct kvm_get_htab_fd ghf;
- struct kvm_get_htab_buf *hpte_buf;
+ struct kvm_get_htab_fd ghf = {
+ .flags = 0,
+ .start_index = ptex,
+ };
+ int fd, rc;
+ int i;
- ghf.flags = 0;
- ghf.start_index = pte_index;
- htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
- if (htab_fd < 0) {
- goto error_out;
+ fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
+ if (fd < 0) {
+ hw_error("kvmppc_read_hptes: Unable to open HPT fd");
}
- hpte_buf = g_malloc0(sizeof(*hpte_buf));
- /*
- * Read the hpte group
- */
- if (read(htab_fd, hpte_buf, sizeof(*hpte_buf)) < 0) {
- goto out_close;
- }
+ i = 0;
+ while (i < n) {
+ struct kvm_get_htab_header *hdr;
+ int m = n < HPTES_PER_GROUP ? n : HPTES_PER_GROUP;
+ char buf[sizeof(*hdr) + m * HASH_PTE_SIZE_64];
- close(htab_fd);
- return (uint64_t)(uintptr_t) hpte_buf->hpte;
+ rc = read(fd, buf, sizeof(buf));
+ if (rc < 0) {
+ hw_error("kvmppc_read_hptes: Unable to read HPTEs");
+ }
-out_close:
- g_free(hpte_buf);
- close(htab_fd);
-error_out:
- return 0;
-}
+ hdr = (struct kvm_get_htab_header *)buf;
+ while ((i < n) && ((char *)hdr < (buf + rc))) {
+ int invalid = hdr->n_invalid;
-void kvmppc_hash64_free_pteg(uint64_t token)
-{
- struct kvm_get_htab_buf *htab_buf;
+ if (hdr->index != (ptex + i)) {
+ hw_error("kvmppc_read_hptes: Unexpected HPTE index %"PRIu32
+ " != (%"HWADDR_PRIu" + %d", hdr->index, ptex, i);
+ }
+
+ memcpy(hptes + i, hdr + 1, HASH_PTE_SIZE_64 * hdr->n_valid);
+ i += hdr->n_valid;
- htab_buf = container_of((void *)(uintptr_t) token, struct kvm_get_htab_buf,
- hpte);
- g_free(htab_buf);
- return;
+ if ((n - i) < invalid) {
+ invalid = n - i;
+ }
+ memset(hptes + i, 0, invalid * HASH_PTE_SIZE_64);
+ i += hdr->n_invalid;
+
+ hdr = (struct kvm_get_htab_header *)
+ ((char *)(hdr + 1) + HASH_PTE_SIZE_64 * hdr->n_valid);
+ }
+ }
+
+ close(fd);
}
-void kvmppc_hash64_write_pte(CPUPPCState *env, target_ulong pte_index,
- target_ulong pte0, target_ulong pte1)
+void kvmppc_write_hpte(hwaddr ptex, uint64_t pte0, uint64_t pte1)
{
- int htab_fd;
+ int fd, rc;
struct kvm_get_htab_fd ghf;
- struct kvm_get_htab_buf hpte_buf;
+ struct {
+ struct kvm_get_htab_header hdr;
+ uint64_t pte0;
+ uint64_t pte1;
+ } buf;
ghf.flags = 0;
ghf.start_index = 0; /* Ignored */
- htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
- if (htab_fd < 0) {
- goto error_out;
- }
-
- hpte_buf.header.n_valid = 1;
- hpte_buf.header.n_invalid = 0;
- hpte_buf.header.index = pte_index;
- hpte_buf.hpte[0] = pte0;
- hpte_buf.hpte[1] = pte1;
- /*
- * Write the hpte entry.
- * CAUTION: write() has the warn_unused_result attribute. Hence we
- * need to check the return value, even though we do nothing.
- */
- if (write(htab_fd, &hpte_buf, sizeof(hpte_buf)) < 0) {
- goto out_close;
+ fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
+ if (fd < 0) {
+ hw_error("kvmppc_write_hpte: Unable to open HPT fd");
}
-out_close:
- close(htab_fd);
- return;
+ buf.hdr.n_valid = 1;
+ buf.hdr.n_invalid = 0;
+ buf.hdr.index = ptex;
+ buf.pte0 = cpu_to_be64(pte0);
+ buf.pte1 = cpu_to_be64(pte1);
-error_out:
- return;
+ rc = write(fd, &buf, sizeof(buf));
+ if (rc != sizeof(buf)) {
+ hw_error("kvmppc_write_hpte: Unable to update KVM HPT");
+ }
+ close(fd);
}
int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
diff --git a/target/ppc/kvm_ppc.h b/target/ppc/kvm_ppc.h
index 8da2ee418a..8e9f42d0c6 100644
--- a/target/ppc/kvm_ppc.h
+++ b/target/ppc/kvm_ppc.h
@@ -49,11 +49,8 @@ int kvmppc_get_htab_fd(bool write);
int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns);
int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
uint16_t n_valid, uint16_t n_invalid);
-uint64_t kvmppc_hash64_read_pteg(PowerPCCPU *cpu, target_ulong pte_index);
-void kvmppc_hash64_free_pteg(uint64_t token);
-
-void kvmppc_hash64_write_pte(CPUPPCState *env, target_ulong pte_index,
- target_ulong pte0, target_ulong pte1);
+void kvmppc_read_hptes(ppc_hash_pte64_t *hptes, hwaddr ptex, int n);
+void kvmppc_write_hpte(hwaddr ptex, uint64_t pte0, uint64_t pte1);
bool kvmppc_has_cap_fixup_hcalls(void);
bool kvmppc_has_cap_htm(void);
int kvmppc_enable_hwrng(void);
@@ -234,20 +231,13 @@ static inline int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
abort();
}
-static inline uint64_t kvmppc_hash64_read_pteg(PowerPCCPU *cpu,
- target_ulong pte_index)
-{
- abort();
-}
-
-static inline void kvmppc_hash64_free_pteg(uint64_t token)
+static inline void kvmppc_read_hptes(ppc_hash_pte64_t *hptes,
+ hwaddr ptex, int n)
{
abort();
}
-static inline void kvmppc_hash64_write_pte(CPUPPCState *env,
- target_ulong pte_index,
- target_ulong pte0, target_ulong pte1)
+static inline void kvmppc_write_hpte(hwaddr ptex, uint64_t pte0, uint64_t pte1)
{
abort();
}
diff --git a/target/ppc/machine.c b/target/ppc/machine.c
index df9f7a4e05..6cb3a48db1 100644
--- a/target/ppc/machine.c
+++ b/target/ppc/machine.c
@@ -76,7 +76,7 @@ static int cpu_load_old(QEMUFile *f, void *opaque, int version_id)
qemu_get_betls(f, &env->pb[i]);
for (i = 0; i < 1024; i++)
qemu_get_betls(f, &env->spr[i]);
- if (!env->external_htab) {
+ if (!cpu->vhyp) {
ppc_store_sdr1(env, sdr1);
}
qemu_get_be32s(f, &env->vscr);
@@ -228,8 +228,7 @@ static int cpu_post_load(void *opaque, int version_id)
env->IBAT[1][i+4] = env->spr[SPR_IBAT4U + 2*i + 1];
}
- if (!env->external_htab) {
- /* Restore htab_base and htab_mask variables */
+ if (!cpu->vhyp) {
ppc_store_sdr1(env, env->spr[SPR_SDR1]);
}
diff --git a/target/ppc/misc_helper.c b/target/ppc/misc_helper.c
index ab432bafaf..fa573dd7d2 100644
--- a/target/ppc/misc_helper.c
+++ b/target/ppc/misc_helper.c
@@ -82,11 +82,9 @@ void helper_store_sdr1(CPUPPCState *env, target_ulong val)
{
PowerPCCPU *cpu = ppc_env_get_cpu(env);
- if (!env->external_htab) {
- if (env->spr[SPR_SDR1] != val) {
- ppc_store_sdr1(env, val);
- tlb_flush(CPU(cpu));
- }
+ if (env->spr[SPR_SDR1] != val) {
+ ppc_store_sdr1(env, val);
+ tlb_flush(CPU(cpu));
}
}
diff --git a/target/ppc/mmu-hash32.c b/target/ppc/mmu-hash32.c
index 29bace622a..03ae3c1279 100644
--- a/target/ppc/mmu-hash32.c
+++ b/target/ppc/mmu-hash32.c
@@ -304,9 +304,9 @@ static int ppc_hash32_direct_store(PowerPCCPU *cpu, target_ulong sr,
hwaddr get_pteg_offset32(PowerPCCPU *cpu, hwaddr hash)
{
- CPUPPCState *env = &cpu->env;
+ target_ulong mask = ppc_hash32_hpt_mask(cpu);
- return (hash * HASH_PTEG_SIZE_32) & env->htab_mask;
+ return (hash * HASH_PTEG_SIZE_32) & mask;
}
static hwaddr ppc_hash32_pteg_search(PowerPCCPU *cpu, hwaddr pteg_off,
@@ -339,7 +339,6 @@ static hwaddr ppc_hash32_htab_lookup(PowerPCCPU *cpu,
target_ulong sr, target_ulong eaddr,
ppc_hash_pte32_t *pte)
{
- CPUPPCState *env = &cpu->env;
hwaddr pteg_off, pte_offset;
hwaddr hash;
uint32_t vsid, pgidx, ptem;
@@ -353,21 +352,22 @@ static hwaddr ppc_hash32_htab_lookup(PowerPCCPU *cpu,
qemu_log_mask(CPU_LOG_MMU, "htab_base " TARGET_FMT_plx
" htab_mask " TARGET_FMT_plx
" hash " TARGET_FMT_plx "\n",
- env->htab_base, env->htab_mask, hash);
+ ppc_hash32_hpt_base(cpu), ppc_hash32_hpt_mask(cpu), hash);
/* Primary PTEG lookup */
qemu_log_mask(CPU_LOG_MMU, "0 htab=" TARGET_FMT_plx "/" TARGET_FMT_plx
" vsid=%" PRIx32 " ptem=%" PRIx32
" hash=" TARGET_FMT_plx "\n",
- env->htab_base, env->htab_mask, vsid, ptem, hash);
+ ppc_hash32_hpt_base(cpu), ppc_hash32_hpt_mask(cpu),
+ vsid, ptem, hash);
pteg_off = get_pteg_offset32(cpu, hash);
pte_offset = ppc_hash32_pteg_search(cpu, pteg_off, 0, ptem, pte);
if (pte_offset == -1) {
/* Secondary PTEG lookup */
qemu_log_mask(CPU_LOG_MMU, "1 htab=" TARGET_FMT_plx "/" TARGET_FMT_plx
" vsid=%" PRIx32 " api=%" PRIx32
- " hash=" TARGET_FMT_plx "\n", env->htab_base,
- env->htab_mask, vsid, ptem, ~hash);
+ " hash=" TARGET_FMT_plx "\n", ppc_hash32_hpt_base(cpu),
+ ppc_hash32_hpt_mask(cpu), vsid, ptem, ~hash);
pteg_off = get_pteg_offset32(cpu, ~hash);
pte_offset = ppc_hash32_pteg_search(cpu, pteg_off, 1, ptem, pte);
}
diff --git a/target/ppc/mmu-hash32.h b/target/ppc/mmu-hash32.h
index 5b9fb08d1a..898021f0d8 100644
--- a/target/ppc/mmu-hash32.h
+++ b/target/ppc/mmu-hash32.h
@@ -44,6 +44,8 @@ int ppc_hash32_handle_mmu_fault(PowerPCCPU *cpu, vaddr address, int rw,
/*
* Hash page table definitions
*/
+#define SDR_32_HTABORG 0xFFFF0000UL
+#define SDR_32_HTABMASK 0x000001FFUL
#define HPTES_PER_GROUP 8
#define HASH_PTE_SIZE_32 8
@@ -65,42 +67,46 @@ int ppc_hash32_handle_mmu_fault(PowerPCCPU *cpu, vaddr address, int rw,
#define HPTE32_R_WIMG 0x00000078
#define HPTE32_R_PP 0x00000003
+static inline hwaddr ppc_hash32_hpt_base(PowerPCCPU *cpu)
+{
+ return cpu->env.spr[SPR_SDR1] & SDR_32_HTABORG;
+}
+
+static inline hwaddr ppc_hash32_hpt_mask(PowerPCCPU *cpu)
+{
+ return ((cpu->env.spr[SPR_SDR1] & SDR_32_HTABMASK) << 16) | 0xFFFF;
+}
+
static inline target_ulong ppc_hash32_load_hpte0(PowerPCCPU *cpu,
hwaddr pte_offset)
{
- CPUPPCState *env = &cpu->env;
+ target_ulong base = ppc_hash32_hpt_base(cpu);
- assert(!env->external_htab); /* Not supported on 32-bit for now */
- return ldl_phys(CPU(cpu)->as, env->htab_base + pte_offset);
+ return ldl_phys(CPU(cpu)->as, base + pte_offset);
}
static inline target_ulong ppc_hash32_load_hpte1(PowerPCCPU *cpu,
hwaddr pte_offset)
{
- CPUPPCState *env = &cpu->env;
+ target_ulong base = ppc_hash32_hpt_base(cpu);
- assert(!env->external_htab); /* Not supported on 32-bit for now */
- return ldl_phys(CPU(cpu)->as,
- env->htab_base + pte_offset + HASH_PTE_SIZE_32 / 2);
+ return ldl_phys(CPU(cpu)->as, base + pte_offset + HASH_PTE_SIZE_32 / 2);
}
static inline void ppc_hash32_store_hpte0(PowerPCCPU *cpu,
hwaddr pte_offset, target_ulong pte0)
{
- CPUPPCState *env = &cpu->env;
+ target_ulong base = ppc_hash32_hpt_base(cpu);
- assert(!env->external_htab); /* Not supported on 32-bit for now */
- stl_phys(CPU(cpu)->as, env->htab_base + pte_offset, pte0);
+ stl_phys(CPU(cpu)->as, base + pte_offset, pte0);
}
static inline void ppc_hash32_store_hpte1(PowerPCCPU *cpu,
hwaddr pte_offset, target_ulong pte1)
{
- CPUPPCState *env = &cpu->env;
+ target_ulong base = ppc_hash32_hpt_base(cpu);
- assert(!env->external_htab); /* Not supported on 32-bit for now */
- stl_phys(CPU(cpu)->as,
- env->htab_base + pte_offset + HASH_PTE_SIZE_32 / 2, pte1);
+ stl_phys(CPU(cpu)->as, base + pte_offset + HASH_PTE_SIZE_32 / 2, pte1);
}
typedef struct {
diff --git a/target/ppc/mmu-hash64.c b/target/ppc/mmu-hash64.c
index 76669ed82c..d44f2bb432 100644
--- a/target/ppc/mmu-hash64.c
+++ b/target/ppc/mmu-hash64.c
@@ -27,6 +27,7 @@
#include "kvm_ppc.h"
#include "mmu-hash64.h"
#include "exec/log.h"
+#include "hw/hw.h"
//#define DEBUG_SLB
@@ -37,12 +38,6 @@
#endif
/*
- * Used to indicate that a CPU has its hash page table (HPT) managed
- * within the host kernel
- */
-#define MMU_HASH64_KVM_MANAGED_HPT ((void *)-1)
-
-/*
* SLB handling
*/
@@ -294,55 +289,6 @@ target_ulong helper_load_slb_vsid(CPUPPCState *env, target_ulong rb)
return rt;
}
-/*
- * 64-bit hash table MMU handling
- */
-void ppc_hash64_set_sdr1(PowerPCCPU *cpu, target_ulong value,
- Error **errp)
-{
- CPUPPCState *env = &cpu->env;
- target_ulong htabsize = value & SDR_64_HTABSIZE;
-
- env->spr[SPR_SDR1] = value;
- if (htabsize > 28) {
- error_setg(errp,
- "Invalid HTABSIZE 0x" TARGET_FMT_lx" stored in SDR1",
- htabsize);
- htabsize = 28;
- }
- env->htab_mask = (1ULL << (htabsize + 18 - 7)) - 1;
- env->htab_base = value & SDR_64_HTABORG;
-}
-
-void ppc_hash64_set_external_hpt(PowerPCCPU *cpu, void *hpt, int shift,
- Error **errp)
-{
- CPUPPCState *env = &cpu->env;
- Error *local_err = NULL;
-
- if (hpt) {
- env->external_htab = hpt;
- } else {
- env->external_htab = MMU_HASH64_KVM_MANAGED_HPT;
- }
- ppc_hash64_set_sdr1(cpu, (target_ulong)(uintptr_t)hpt | (shift - 18),
- &local_err);
- if (local_err) {
- error_propagate(errp, local_err);
- return;
- }
-
- /* Not strictly necessary, but makes it clearer that an external
- * htab is in use when debugging */
- env->htab_base = -1;
-
- if (kvm_enabled()) {
- if (kvmppc_put_books_sregs(cpu) < 0) {
- error_setg(errp, "Unable to update SDR1 in KVM");
- }
- }
-}
-
static int ppc_hash64_pte_prot(PowerPCCPU *cpu,
ppc_slb_t *slb, ppc_hash_pte64_t pte)
{
@@ -431,34 +377,43 @@ static int ppc_hash64_amr_prot(PowerPCCPU *cpu, ppc_hash_pte64_t pte)
return prot;
}
-uint64_t ppc_hash64_start_access(PowerPCCPU *cpu, target_ulong pte_index)
+const ppc_hash_pte64_t *ppc_hash64_map_hptes(PowerPCCPU *cpu,
+ hwaddr ptex, int n)
{
- uint64_t token = 0;
- hwaddr pte_offset;
+ hwaddr pte_offset = ptex * HASH_PTE_SIZE_64;
+ hwaddr base = ppc_hash64_hpt_base(cpu);
+ hwaddr plen = n * HASH_PTE_SIZE_64;
+ const ppc_hash_pte64_t *hptes;
+
+ if (cpu->vhyp) {
+ PPCVirtualHypervisorClass *vhc =
+ PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
+ return vhc->map_hptes(cpu->vhyp, ptex, n);
+ }
- pte_offset = pte_index * HASH_PTE_SIZE_64;
- if (cpu->env.external_htab == MMU_HASH64_KVM_MANAGED_HPT) {
- /*
- * HTAB is controlled by KVM. Fetch the PTEG into a new buffer.
- */
- token = kvmppc_hash64_read_pteg(cpu, pte_index);
- } else if (cpu->env.external_htab) {
- /*
- * HTAB is controlled by QEMU. Just point to the internally
- * accessible PTEG.
- */
- token = (uint64_t)(uintptr_t) cpu->env.external_htab + pte_offset;
- } else if (cpu->env.htab_base) {
- token = cpu->env.htab_base + pte_offset;
+ if (!base) {
+ return NULL;
}
- return token;
+
+ hptes = address_space_map(CPU(cpu)->as, base + pte_offset, &plen, false);
+ if (plen < (n * HASH_PTE_SIZE_64)) {
+ hw_error("%s: Unable to map all requested HPTEs\n", __func__);
+ }
+ return hptes;
}
-void ppc_hash64_stop_access(PowerPCCPU *cpu, uint64_t token)
+void ppc_hash64_unmap_hptes(PowerPCCPU *cpu, const ppc_hash_pte64_t *hptes,
+ hwaddr ptex, int n)
{
- if (cpu->env.external_htab == MMU_HASH64_KVM_MANAGED_HPT) {
- kvmppc_hash64_free_pteg(token);
+ if (cpu->vhyp) {
+ PPCVirtualHypervisorClass *vhc =
+ PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
+ vhc->unmap_hptes(cpu->vhyp, hptes, ptex, n);
+ return;
}
+
+ address_space_unmap(CPU(cpu)->as, (void *)hptes, n * HASH_PTE_SIZE_64,
+ false, n * HASH_PTE_SIZE_64);
}
static unsigned hpte_page_shift(const struct ppc_one_seg_page_size *sps,
@@ -503,20 +458,19 @@ static hwaddr ppc_hash64_pteg_search(PowerPCCPU *cpu, hwaddr hash,
target_ulong ptem,
ppc_hash_pte64_t *pte, unsigned *pshift)
{
- CPUPPCState *env = &cpu->env;
int i;
- uint64_t token;
+ const ppc_hash_pte64_t *pteg;
target_ulong pte0, pte1;
- target_ulong pte_index;
+ target_ulong ptex;
- pte_index = (hash & env->htab_mask) * HPTES_PER_GROUP;
- token = ppc_hash64_start_access(cpu, pte_index);
- if (!token) {
+ ptex = (hash & ppc_hash64_hpt_mask(cpu)) * HPTES_PER_GROUP;
+ pteg = ppc_hash64_map_hptes(cpu, ptex, HPTES_PER_GROUP);
+ if (!pteg) {
return -1;
}
for (i = 0; i < HPTES_PER_GROUP; i++) {
- pte0 = ppc_hash64_load_hpte0(cpu, token, i);
- pte1 = ppc_hash64_load_hpte1(cpu, token, i);
+ pte0 = ppc_hash64_hpte0(cpu, pteg, i);
+ pte1 = ppc_hash64_hpte1(cpu, pteg, i);
/* This compares V, B, H (secondary) and the AVPN */
if (HPTE64_V_COMPARE(pte0, ptem)) {
@@ -536,11 +490,11 @@ static hwaddr ppc_hash64_pteg_search(PowerPCCPU *cpu, hwaddr hash,
*/
pte->pte0 = pte0;
pte->pte1 = pte1;
- ppc_hash64_stop_access(cpu, token);
- return (pte_index + i) * HASH_PTE_SIZE_64;
+ ppc_hash64_unmap_hptes(cpu, pteg, ptex, HPTES_PER_GROUP);
+ return ptex + i;
}
}
- ppc_hash64_stop_access(cpu, token);
+ ppc_hash64_unmap_hptes(cpu, pteg, ptex, HPTES_PER_GROUP);
/*
* We didn't find a valid entry.
*/
@@ -552,8 +506,7 @@ static hwaddr ppc_hash64_htab_lookup(PowerPCCPU *cpu,
ppc_hash_pte64_t *pte, unsigned *pshift)
{
CPUPPCState *env = &cpu->env;
- hwaddr pte_offset;
- hwaddr hash;
+ hwaddr hash, ptex;
uint64_t vsid, epnmask, epn, ptem;
const struct ppc_one_seg_page_size *sps = slb->sps;
@@ -588,29 +541,30 @@ static hwaddr ppc_hash64_htab_lookup(PowerPCCPU *cpu,
qemu_log_mask(CPU_LOG_MMU,
"htab_base " TARGET_FMT_plx " htab_mask " TARGET_FMT_plx
" hash " TARGET_FMT_plx "\n",
- env->htab_base, env->htab_mask, hash);
+ ppc_hash64_hpt_base(cpu), ppc_hash64_hpt_mask(cpu), hash);
/* Primary PTEG lookup */
qemu_log_mask(CPU_LOG_MMU,
"0 htab=" TARGET_FMT_plx "/" TARGET_FMT_plx
" vsid=" TARGET_FMT_lx " ptem=" TARGET_FMT_lx
" hash=" TARGET_FMT_plx "\n",
- env->htab_base, env->htab_mask, vsid, ptem, hash);
- pte_offset = ppc_hash64_pteg_search(cpu, hash, sps, ptem, pte, pshift);
+ ppc_hash64_hpt_base(cpu), ppc_hash64_hpt_mask(cpu),
+ vsid, ptem, hash);
+ ptex = ppc_hash64_pteg_search(cpu, hash, sps, ptem, pte, pshift);
- if (pte_offset == -1) {
+ if (ptex == -1) {
/* Secondary PTEG lookup */
ptem |= HPTE64_V_SECONDARY;
qemu_log_mask(CPU_LOG_MMU,
"1 htab=" TARGET_FMT_plx "/" TARGET_FMT_plx
" vsid=" TARGET_FMT_lx " api=" TARGET_FMT_lx
- " hash=" TARGET_FMT_plx "\n", env->htab_base,
- env->htab_mask, vsid, ptem, ~hash);
+ " hash=" TARGET_FMT_plx "\n", ppc_hash64_hpt_base(cpu),
+ ppc_hash64_hpt_mask(cpu), vsid, ptem, ~hash);
- pte_offset = ppc_hash64_pteg_search(cpu, ~hash, sps, ptem, pte, pshift);
+ ptex = ppc_hash64_pteg_search(cpu, ~hash, sps, ptem, pte, pshift);
}
- return pte_offset;
+ return ptex;
}
unsigned ppc_hash64_hpte_page_shift_noslb(PowerPCCPU *cpu,
@@ -708,7 +662,7 @@ int ppc_hash64_handle_mmu_fault(PowerPCCPU *cpu, vaddr eaddr,
CPUPPCState *env = &cpu->env;
ppc_slb_t *slb;
unsigned apshift;
- hwaddr pte_offset;
+ hwaddr ptex;
ppc_hash_pte64_t pte;
int pp_prot, amr_prot, prot;
uint64_t new_pte1, dsisr;
@@ -792,8 +746,8 @@ skip_slb_search:
}
/* 4. Locate the PTE in the hash table */
- pte_offset = ppc_hash64_htab_lookup(cpu, slb, eaddr, &pte, &apshift);
- if (pte_offset == -1) {
+ ptex = ppc_hash64_htab_lookup(cpu, slb, eaddr, &pte, &apshift);
+ if (ptex == -1) {
dsisr = 0x40000000;
if (rwx == 2) {
ppc_hash64_set_isi(cs, env, dsisr);
@@ -806,7 +760,7 @@ skip_slb_search:
return 1;
}
qemu_log_mask(CPU_LOG_MMU,
- "found PTE at offset %08" HWADDR_PRIx "\n", pte_offset);
+ "found PTE at index %08" HWADDR_PRIx "\n", ptex);
/* 5. Check access permissions */
@@ -849,8 +803,7 @@ skip_slb_search:
}
if (new_pte1 != pte.pte1) {
- ppc_hash64_store_hpte(cpu, pte_offset / HASH_PTE_SIZE_64,
- pte.pte0, new_pte1);
+ ppc_hash64_store_hpte(cpu, ptex, pte.pte0, new_pte1);
}
/* 7. Determine the real address from the PTE */
@@ -867,7 +820,7 @@ hwaddr ppc_hash64_get_phys_page_debug(PowerPCCPU *cpu, target_ulong addr)
{
CPUPPCState *env = &cpu->env;
ppc_slb_t *slb;
- hwaddr pte_offset, raddr;
+ hwaddr ptex, raddr;
ppc_hash_pte64_t pte;
unsigned apshift;
@@ -900,8 +853,8 @@ hwaddr ppc_hash64_get_phys_page_debug(PowerPCCPU *cpu, target_ulong addr)
}
}
- pte_offset = ppc_hash64_htab_lookup(cpu, slb, addr, &pte, &apshift);
- if (pte_offset == -1) {
+ ptex = ppc_hash64_htab_lookup(cpu, slb, addr, &pte, &apshift);
+ if (ptex == -1) {
return -1;
}
@@ -909,30 +862,24 @@ hwaddr ppc_hash64_get_phys_page_debug(PowerPCCPU *cpu, target_ulong addr)
& TARGET_PAGE_MASK;
}
-void ppc_hash64_store_hpte(PowerPCCPU *cpu,
- target_ulong pte_index,
- target_ulong pte0, target_ulong pte1)
+void ppc_hash64_store_hpte(PowerPCCPU *cpu, hwaddr ptex,
+ uint64_t pte0, uint64_t pte1)
{
- CPUPPCState *env = &cpu->env;
+ hwaddr base = ppc_hash64_hpt_base(cpu);
+ hwaddr offset = ptex * HASH_PTE_SIZE_64;
- if (env->external_htab == MMU_HASH64_KVM_MANAGED_HPT) {
- kvmppc_hash64_write_pte(env, pte_index, pte0, pte1);
+ if (cpu->vhyp) {
+ PPCVirtualHypervisorClass *vhc =
+ PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
+ vhc->store_hpte(cpu->vhyp, ptex, pte0, pte1);
return;
}
- pte_index *= HASH_PTE_SIZE_64;
- if (env->external_htab) {
- stq_p(env->external_htab + pte_index, pte0);
- stq_p(env->external_htab + pte_index + HASH_PTE_SIZE_64 / 2, pte1);
- } else {
- stq_phys(CPU(cpu)->as, env->htab_base + pte_index, pte0);
- stq_phys(CPU(cpu)->as,
- env->htab_base + pte_index + HASH_PTE_SIZE_64 / 2, pte1);
- }
+ stq_phys(CPU(cpu)->as, base + offset, pte0);
+ stq_phys(CPU(cpu)->as, base + offset + HASH_PTE_SIZE_64 / 2, pte1);
}
-void ppc_hash64_tlb_flush_hpte(PowerPCCPU *cpu,
- target_ulong pte_index,
+void ppc_hash64_tlb_flush_hpte(PowerPCCPU *cpu, target_ulong ptex,
target_ulong pte0, target_ulong pte1)
{
/*
diff --git a/target/ppc/mmu-hash64.h b/target/ppc/mmu-hash64.h
index 7a0b7fca41..54f1e37655 100644
--- a/target/ppc/mmu-hash64.h
+++ b/target/ppc/mmu-hash64.h
@@ -10,8 +10,8 @@ int ppc_store_slb(PowerPCCPU *cpu, target_ulong slot,
hwaddr ppc_hash64_get_phys_page_debug(PowerPCCPU *cpu, target_ulong addr);
int ppc_hash64_handle_mmu_fault(PowerPCCPU *cpu, vaddr address, int rw,
int mmu_idx);
-void ppc_hash64_store_hpte(PowerPCCPU *cpu, target_ulong index,
- target_ulong pte0, target_ulong pte1);
+void ppc_hash64_store_hpte(PowerPCCPU *cpu, hwaddr ptex,
+ uint64_t pte0, uint64_t pte1);
void ppc_hash64_tlb_flush_hpte(PowerPCCPU *cpu,
target_ulong pte_index,
target_ulong pte0, target_ulong pte1);
@@ -56,6 +56,9 @@ void ppc_hash64_update_rmls(CPUPPCState *env);
* Hash page table definitions
*/
+#define SDR_64_HTABORG 0x0FFFFFFFFFFC0000ULL
+#define SDR_64_HTABSIZE 0x000000000000001FULL
+
#define HPTES_PER_GROUP 8
#define HASH_PTE_SIZE_64 16
#define HASH_PTEG_SIZE_64 (HASH_PTE_SIZE_64 * HPTES_PER_GROUP)
@@ -91,45 +94,41 @@ void ppc_hash64_update_rmls(CPUPPCState *env);
#define HPTE64_V_1TB_SEG 0x4000000000000000ULL
#define HPTE64_V_VRMA_MASK 0x4001ffffff000000ULL
-void ppc_hash64_set_sdr1(PowerPCCPU *cpu, target_ulong value,
- Error **errp);
-void ppc_hash64_set_external_hpt(PowerPCCPU *cpu, void *hpt, int shift,
- Error **errp);
-
-uint64_t ppc_hash64_start_access(PowerPCCPU *cpu, target_ulong pte_index);
-void ppc_hash64_stop_access(PowerPCCPU *cpu, uint64_t token);
-
-static inline target_ulong ppc_hash64_load_hpte0(PowerPCCPU *cpu,
- uint64_t token, int index)
+static inline hwaddr ppc_hash64_hpt_base(PowerPCCPU *cpu)
{
- CPUPPCState *env = &cpu->env;
- uint64_t addr;
-
- addr = token + (index * HASH_PTE_SIZE_64);
- if (env->external_htab) {
- return ldq_p((const void *)(uintptr_t)addr);
- } else {
- return ldq_phys(CPU(cpu)->as, addr);
- }
+ return cpu->env.spr[SPR_SDR1] & SDR_64_HTABORG;
}
-static inline target_ulong ppc_hash64_load_hpte1(PowerPCCPU *cpu,
- uint64_t token, int index)
+static inline hwaddr ppc_hash64_hpt_mask(PowerPCCPU *cpu)
{
- CPUPPCState *env = &cpu->env;
- uint64_t addr;
-
- addr = token + (index * HASH_PTE_SIZE_64) + HASH_PTE_SIZE_64/2;
- if (env->external_htab) {
- return ldq_p((const void *)(uintptr_t)addr);
- } else {
- return ldq_phys(CPU(cpu)->as, addr);
+ if (cpu->vhyp) {
+ PPCVirtualHypervisorClass *vhc =
+ PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
+ return vhc->hpt_mask(cpu->vhyp);
}
+ return (1ULL << ((cpu->env.spr[SPR_SDR1] & SDR_64_HTABSIZE) + 18 - 7)) - 1;
}
-typedef struct {
+struct ppc_hash_pte64 {
uint64_t pte0, pte1;
-} ppc_hash_pte64_t;
+};
+
+const ppc_hash_pte64_t *ppc_hash64_map_hptes(PowerPCCPU *cpu,
+ hwaddr ptex, int n);
+void ppc_hash64_unmap_hptes(PowerPCCPU *cpu, const ppc_hash_pte64_t *hptes,
+ hwaddr ptex, int n);
+
+static inline uint64_t ppc_hash64_hpte0(PowerPCCPU *cpu,
+ const ppc_hash_pte64_t *hptes, int i)
+{
+ return ldq_p(&(hptes[i].pte0));
+}
+
+static inline uint64_t ppc_hash64_hpte1(PowerPCCPU *cpu,
+ const ppc_hash_pte64_t *hptes, int i)
+{
+ return ldq_p(&(hptes[i].pte1));
+}
#endif /* CONFIG_USER_ONLY */
diff --git a/target/ppc/mmu_helper.c b/target/ppc/mmu_helper.c
index eb2d482ef7..a1af3d6bf2 100644
--- a/target/ppc/mmu_helper.c
+++ b/target/ppc/mmu_helper.c
@@ -28,6 +28,7 @@
#include "exec/cpu_ldst.h"
#include "exec/log.h"
#include "helper_regs.h"
+#include "qemu/error-report.h"
//#define DEBUG_MMU
//#define DEBUG_BATS
@@ -466,6 +467,7 @@ static int get_bat_6xx_tlb(CPUPPCState *env, mmu_ctx_t *ctx,
static inline int get_segment_6xx_tlb(CPUPPCState *env, mmu_ctx_t *ctx,
target_ulong eaddr, int rw, int type)
{
+ PowerPCCPU *cpu = ppc_env_get_cpu(env);
hwaddr hash;
target_ulong vsid;
int ds, pr, target_page_bits;
@@ -503,7 +505,7 @@ static inline int get_segment_6xx_tlb(CPUPPCState *env, mmu_ctx_t *ctx,
qemu_log_mask(CPU_LOG_MMU, "htab_base " TARGET_FMT_plx
" htab_mask " TARGET_FMT_plx
" hash " TARGET_FMT_plx "\n",
- env->htab_base, env->htab_mask, hash);
+ ppc_hash32_hpt_base(cpu), ppc_hash32_hpt_mask(cpu), hash);
ctx->hash[0] = hash;
ctx->hash[1] = ~hash;
@@ -518,9 +520,11 @@ static inline int get_segment_6xx_tlb(CPUPPCState *env, mmu_ctx_t *ctx,
uint32_t a0, a1, a2, a3;
qemu_log("Page table: " TARGET_FMT_plx " len " TARGET_FMT_plx
- "\n", env->htab_base, env->htab_mask + 0x80);
- for (curaddr = env->htab_base;
- curaddr < (env->htab_base + env->htab_mask + 0x80);
+ "\n", ppc_hash32_hpt_base(cpu),
+ ppc_hash32_hpt_mask(env) + 0x80);
+ for (curaddr = ppc_hash32_hpt_base(cpu);
+ curaddr < (ppc_hash32_hpt_base(cpu)
+ + ppc_hash32_hpt_mask(cpu) + 0x80);
curaddr += 16) {
a0 = ldl_phys(cs->as, curaddr);
a1 = ldl_phys(cs->as, curaddr + 4);
@@ -1205,12 +1209,13 @@ static void mmu6xx_dump_BATs(FILE *f, fprintf_function cpu_fprintf,
static void mmu6xx_dump_mmu(FILE *f, fprintf_function cpu_fprintf,
CPUPPCState *env)
{
+ PowerPCCPU *cpu = ppc_env_get_cpu(env);
ppc6xx_tlb_t *tlb;
target_ulong sr;
int type, way, entry, i;
- cpu_fprintf(f, "HTAB base = 0x%"HWADDR_PRIx"\n", env->htab_base);
- cpu_fprintf(f, "HTAB mask = 0x%"HWADDR_PRIx"\n", env->htab_mask);
+ cpu_fprintf(f, "HTAB base = 0x%"HWADDR_PRIx"\n", ppc_hash32_hpt_base(cpu));
+ cpu_fprintf(f, "HTAB mask = 0x%"HWADDR_PRIx"\n", ppc_hash32_hpt_mask(cpu));
cpu_fprintf(f, "\nSegment registers:\n");
for (i = 0; i < 32; i++) {
@@ -1592,9 +1597,9 @@ static int cpu_ppc_handle_mmu_fault(CPUPPCState *env, target_ulong address,
env->spr[SPR_DCMP] = 0x80000000 | ctx.ptem;
tlb_miss:
env->error_code |= ctx.key << 19;
- env->spr[SPR_HASH1] = env->htab_base +
+ env->spr[SPR_HASH1] = ppc_hash32_hpt_base(cpu) +
get_pteg_offset32(cpu, ctx.hash[0]);
- env->spr[SPR_HASH2] = env->htab_base +
+ env->spr[SPR_HASH2] = ppc_hash32_hpt_base(cpu) +
get_pteg_offset32(cpu, ctx.hash[1]);
break;
case POWERPC_MMU_SOFT_74xx:
@@ -1997,26 +2002,28 @@ void ppc_tlb_invalidate_one(CPUPPCState *env, target_ulong addr)
/* Special registers manipulation */
void ppc_store_sdr1(CPUPPCState *env, target_ulong value)
{
+ PowerPCCPU *cpu = ppc_env_get_cpu(env);
qemu_log_mask(CPU_LOG_MMU, "%s: " TARGET_FMT_lx "\n", __func__, value);
- assert(!env->external_htab);
- env->spr[SPR_SDR1] = value;
+ assert(!cpu->vhyp);
#if defined(TARGET_PPC64)
if (env->mmu_model & POWERPC_MMU_64) {
- PowerPCCPU *cpu = ppc_env_get_cpu(env);
- Error *local_err = NULL;
+ target_ulong sdr_mask = SDR_64_HTABORG | SDR_64_HTABSIZE;
+ target_ulong htabsize = value & SDR_64_HTABSIZE;
- ppc_hash64_set_sdr1(cpu, value, &local_err);
- if (local_err) {
- error_report_err(local_err);
- error_free(local_err);
+ if (value & ~sdr_mask) {
+ error_report("Invalid bits 0x"TARGET_FMT_lx" set in SDR1",
+ value & ~sdr_mask);
+ value &= sdr_mask;
+ }
+ if (htabsize > 28) {
+ error_report("Invalid HTABSIZE 0x" TARGET_FMT_lx" stored in SDR1",
+ htabsize);
+ return;
}
- } else
-#endif /* defined(TARGET_PPC64) */
- {
- /* FIXME: Should check for valid HTABMASK values */
- env->htab_mask = ((value & SDR_32_HTABMASK) << 16) | 0xFFFF;
- env->htab_base = value & SDR_32_HTABORG;
}
+#endif /* defined(TARGET_PPC64) */
+ /* FIXME: Should check for valid HTABMASK values in 32-bit case */
+ env->spr[SPR_SDR1] = value;
}
/* Segment registers load and store */
diff --git a/target/ppc/translate.c b/target/ppc/translate.c
index 3ba2616b8a..6e6868b7a0 100644
--- a/target/ppc/translate.c
+++ b/target/ppc/translate.c
@@ -71,7 +71,7 @@ static TCGv cpu_lr;
#if defined(TARGET_PPC64)
static TCGv cpu_cfar;
#endif
-static TCGv cpu_xer, cpu_so, cpu_ov, cpu_ca;
+static TCGv cpu_xer, cpu_so, cpu_ov, cpu_ca, cpu_ov32, cpu_ca32;
static TCGv cpu_reserve;
static TCGv cpu_fpscr;
static TCGv_i32 cpu_access_type;
@@ -173,6 +173,10 @@ void ppc_translate_init(void)
offsetof(CPUPPCState, ov), "OV");
cpu_ca = tcg_global_mem_new(cpu_env,
offsetof(CPUPPCState, ca), "CA");
+ cpu_ov32 = tcg_global_mem_new(cpu_env,
+ offsetof(CPUPPCState, ov32), "OV32");
+ cpu_ca32 = tcg_global_mem_new(cpu_env,
+ offsetof(CPUPPCState, ca32), "CA32");
cpu_reserve = tcg_global_mem_new(cpu_env,
offsetof(CPUPPCState, reserve_addr),
@@ -806,12 +810,40 @@ static inline void gen_op_arith_compute_ov(DisasContext *ctx, TCGv arg0,
}
tcg_temp_free(t0);
if (NARROW_MODE(ctx)) {
- tcg_gen_ext32s_tl(cpu_ov, cpu_ov);
+ tcg_gen_extract_tl(cpu_ov, cpu_ov, 31, 1);
+ if (is_isa300(ctx)) {
+ tcg_gen_mov_tl(cpu_ov32, cpu_ov);
+ }
+ } else {
+ if (is_isa300(ctx)) {
+ tcg_gen_extract_tl(cpu_ov32, cpu_ov, 31, 1);
+ }
+ tcg_gen_extract_tl(cpu_ov, cpu_ov, 63, 1);
}
- tcg_gen_shri_tl(cpu_ov, cpu_ov, TARGET_LONG_BITS - 1);
tcg_gen_or_tl(cpu_so, cpu_so, cpu_ov);
}
+static inline void gen_op_arith_compute_ca32(DisasContext *ctx,
+ TCGv res, TCGv arg0, TCGv arg1,
+ int sub)
+{
+ TCGv t0;
+
+ if (!is_isa300(ctx)) {
+ return;
+ }
+
+ t0 = tcg_temp_new();
+ if (sub) {
+ tcg_gen_eqv_tl(t0, arg0, arg1);
+ } else {
+ tcg_gen_xor_tl(t0, arg0, arg1);
+ }
+ tcg_gen_xor_tl(t0, t0, res);
+ tcg_gen_extract_tl(cpu_ca32, t0, 32, 1);
+ tcg_temp_free(t0);
+}
+
/* Common add function */
static inline void gen_op_arith_add(DisasContext *ctx, TCGv ret, TCGv arg1,
TCGv arg2, bool add_ca, bool compute_ca,
@@ -838,6 +870,9 @@ static inline void gen_op_arith_add(DisasContext *ctx, TCGv ret, TCGv arg1,
tcg_temp_free(t1);
tcg_gen_shri_tl(cpu_ca, cpu_ca, 32); /* extract bit 32 */
tcg_gen_andi_tl(cpu_ca, cpu_ca, 1);
+ if (is_isa300(ctx)) {
+ tcg_gen_mov_tl(cpu_ca32, cpu_ca);
+ }
} else {
TCGv zero = tcg_const_tl(0);
if (add_ca) {
@@ -846,6 +881,7 @@ static inline void gen_op_arith_add(DisasContext *ctx, TCGv ret, TCGv arg1,
} else {
tcg_gen_add2_tl(t0, cpu_ca, arg1, zero, arg2, zero);
}
+ gen_op_arith_compute_ca32(ctx, t0, arg1, arg2, 0);
tcg_temp_free(zero);
}
} else {
@@ -985,6 +1021,9 @@ static inline void gen_op_arith_divw(DisasContext *ctx, TCGv ret, TCGv arg1,
}
if (compute_ov) {
tcg_gen_extu_i32_tl(cpu_ov, t2);
+ if (is_isa300(ctx)) {
+ tcg_gen_extu_i32_tl(cpu_ov32, t2);
+ }
tcg_gen_or_tl(cpu_so, cpu_so, cpu_ov);
}
tcg_temp_free_i32(t0);
@@ -1056,6 +1095,9 @@ static inline void gen_op_arith_divd(DisasContext *ctx, TCGv ret, TCGv arg1,
}
if (compute_ov) {
tcg_gen_mov_tl(cpu_ov, t2);
+ if (is_isa300(ctx)) {
+ tcg_gen_mov_tl(cpu_ov32, t2);
+ }
tcg_gen_or_tl(cpu_so, cpu_so, cpu_ov);
}
tcg_temp_free_i64(t0);
@@ -1074,10 +1116,10 @@ static void glue(gen_, name)(DisasContext *ctx)
cpu_gpr[rA(ctx->opcode)], cpu_gpr[rB(ctx->opcode)], \
sign, compute_ov); \
}
-/* divwu divwu. divwuo divwuo. */
+/* divdu divdu. divduo divduo. */
GEN_INT_ARITH_DIVD(divdu, 0x0E, 0, 0);
GEN_INT_ARITH_DIVD(divduo, 0x1E, 0, 1);
-/* divw divw. divwo divwo. */
+/* divd divd. divdo divdo. */
GEN_INT_ARITH_DIVD(divd, 0x0F, 1, 0);
GEN_INT_ARITH_DIVD(divdo, 0x1F, 1, 1);
@@ -1249,6 +1291,9 @@ static void gen_mullwo(DisasContext *ctx)
tcg_gen_sari_i32(t0, t0, 31);
tcg_gen_setcond_i32(TCG_COND_NE, t0, t0, t1);
tcg_gen_extu_i32_tl(cpu_ov, t0);
+ if (is_isa300(ctx)) {
+ tcg_gen_mov_tl(cpu_ov32, cpu_ov);
+ }
tcg_gen_or_tl(cpu_so, cpu_so, cpu_ov);
tcg_temp_free_i32(t0);
@@ -1310,6 +1355,9 @@ static void gen_mulldo(DisasContext *ctx)
tcg_gen_sari_i64(t0, t0, 63);
tcg_gen_setcond_i64(TCG_COND_NE, cpu_ov, t0, t1);
+ if (is_isa300(ctx)) {
+ tcg_gen_mov_tl(cpu_ov32, cpu_ov);
+ }
tcg_gen_or_tl(cpu_so, cpu_so, cpu_ov);
tcg_temp_free_i64(t0);
@@ -1353,17 +1401,22 @@ static inline void gen_op_arith_subf(DisasContext *ctx, TCGv ret, TCGv arg1,
tcg_temp_free(t1);
tcg_gen_shri_tl(cpu_ca, cpu_ca, 32); /* extract bit 32 */
tcg_gen_andi_tl(cpu_ca, cpu_ca, 1);
+ if (is_isa300(ctx)) {
+ tcg_gen_mov_tl(cpu_ca32, cpu_ca);
+ }
} else if (add_ca) {
TCGv zero, inv1 = tcg_temp_new();
tcg_gen_not_tl(inv1, arg1);
zero = tcg_const_tl(0);
tcg_gen_add2_tl(t0, cpu_ca, arg2, zero, cpu_ca, zero);
tcg_gen_add2_tl(t0, cpu_ca, t0, cpu_ca, inv1, zero);
+ gen_op_arith_compute_ca32(ctx, t0, inv1, arg2, 0);
tcg_temp_free(zero);
tcg_temp_free(inv1);
} else {
tcg_gen_setcond_tl(TCG_COND_GEU, cpu_ca, arg2, arg1);
tcg_gen_sub_tl(t0, arg2, arg1);
+ gen_op_arith_compute_ca32(ctx, t0, arg1, arg2, 1);
}
} else if (add_ca) {
/* Since we're ignoring carry-out, we can simplify the
@@ -1442,7 +1495,10 @@ static inline void gen_op_arith_neg(DisasContext *ctx, bool compute_ov)
static void gen_neg(DisasContext *ctx)
{
- gen_op_arith_neg(ctx, 0);
+ tcg_gen_neg_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)]);
+ if (unlikely(Rc(ctx->opcode))) {
+ gen_set_Rc0(ctx, cpu_gpr[rD(ctx->opcode)]);
+ }
}
static void gen_nego(DisasContext *ctx)
@@ -3703,7 +3759,7 @@ static void gen_tdi(DisasContext *ctx)
/*** Processor control ***/
-static void gen_read_xer(TCGv dst)
+static void gen_read_xer(DisasContext *ctx, TCGv dst)
{
TCGv t0 = tcg_temp_new();
TCGv t1 = tcg_temp_new();
@@ -3715,6 +3771,12 @@ static void gen_read_xer(TCGv dst)
tcg_gen_or_tl(t0, t0, t1);
tcg_gen_or_tl(dst, dst, t2);
tcg_gen_or_tl(dst, dst, t0);
+ if (is_isa300(ctx)) {
+ tcg_gen_shli_tl(t0, cpu_ov32, XER_OV32);
+ tcg_gen_or_tl(dst, dst, t0);
+ tcg_gen_shli_tl(t0, cpu_ca32, XER_CA32);
+ tcg_gen_or_tl(dst, dst, t0);
+ }
tcg_temp_free(t0);
tcg_temp_free(t1);
tcg_temp_free(t2);
@@ -3722,14 +3784,16 @@ static void gen_read_xer(TCGv dst)
static void gen_write_xer(TCGv src)
{
+ /* Write all flags, while reading back check for isa300 */
tcg_gen_andi_tl(cpu_xer, src,
- ~((1u << XER_SO) | (1u << XER_OV) | (1u << XER_CA)));
- tcg_gen_shri_tl(cpu_so, src, XER_SO);
- tcg_gen_shri_tl(cpu_ov, src, XER_OV);
- tcg_gen_shri_tl(cpu_ca, src, XER_CA);
- tcg_gen_andi_tl(cpu_so, cpu_so, 1);
- tcg_gen_andi_tl(cpu_ov, cpu_ov, 1);
- tcg_gen_andi_tl(cpu_ca, cpu_ca, 1);
+ ~((1u << XER_SO) |
+ (1u << XER_OV) | (1u << XER_OV32) |
+ (1u << XER_CA) | (1u << XER_CA32)));
+ tcg_gen_extract_tl(cpu_ov32, src, XER_OV32, 1);
+ tcg_gen_extract_tl(cpu_ca32, src, XER_CA32, 1);
+ tcg_gen_extract_tl(cpu_so, src, XER_SO, 1);
+ tcg_gen_extract_tl(cpu_ov, src, XER_OV, 1);
+ tcg_gen_extract_tl(cpu_ca, src, XER_CA, 1);
}
/* mcrxr */
@@ -3755,6 +3819,28 @@ static void gen_mcrxr(DisasContext *ctx)
tcg_gen_movi_tl(cpu_ca, 0);
}
+#ifdef TARGET_PPC64
+/* mcrxrx */
+static void gen_mcrxrx(DisasContext *ctx)
+{
+ TCGv t0 = tcg_temp_new();
+ TCGv t1 = tcg_temp_new();
+ TCGv_i32 dst = cpu_crf[crfD(ctx->opcode)];
+
+ /* copy OV and OV32 */
+ tcg_gen_shli_tl(t0, cpu_ov, 1);
+ tcg_gen_or_tl(t0, t0, cpu_ov32);
+ tcg_gen_shli_tl(t0, t0, 2);
+ /* copy CA and CA32 */
+ tcg_gen_shli_tl(t1, cpu_ca, 1);
+ tcg_gen_or_tl(t1, t1, cpu_ca32);
+ tcg_gen_or_tl(t0, t0, t1);
+ tcg_gen_trunc_tl_i32(dst, t0);
+ tcg_temp_free(t0);
+ tcg_temp_free(t1);
+}
+#endif
+
/* mfcr mfocrf */
static void gen_mfcr(DisasContext *ctx)
{
@@ -6424,6 +6510,7 @@ GEN_HANDLER(mtcrf, 0x1F, 0x10, 0x04, 0x00000801, PPC_MISC),
#if defined(TARGET_PPC64)
GEN_HANDLER(mtmsrd, 0x1F, 0x12, 0x05, 0x001EF801, PPC_64B),
GEN_HANDLER_E(setb, 0x1F, 0x00, 0x04, 0x0003F801, PPC_NONE, PPC2_ISA300),
+GEN_HANDLER_E(mcrxrx, 0x1F, 0x00, 0x12, 0x007FF801, PPC_NONE, PPC2_ISA300),
#endif
GEN_HANDLER(mtmsr, 0x1F, 0x12, 0x04, 0x001EF801, PPC_MISC),
GEN_HANDLER(mtspr, 0x1F, 0x13, 0x0E, 0x00000000, PPC_MISC),
diff --git a/target/ppc/translate_init.c b/target/ppc/translate_init.c
index be35cbd3a2..37f74be984 100644
--- a/target/ppc/translate_init.c
+++ b/target/ppc/translate_init.c
@@ -107,7 +107,7 @@ static void spr_access_nop(DisasContext *ctx, int sprn, int gprn)
/* XER */
static void spr_read_xer (DisasContext *ctx, int gprn, int sprn)
{
- gen_read_xer(cpu_gpr[gprn]);
+ gen_read_xer(ctx, cpu_gpr[gprn]);
}
static void spr_write_xer (DisasContext *ctx, int sprn, int gprn)
@@ -740,10 +740,22 @@ static void gen_spr_ne_601 (CPUPPCState *env)
&spr_read_decr, &spr_write_decr,
0x00000000);
/* Memory management */
- spr_register(env, SPR_SDR1, "SDR1",
- SPR_NOACCESS, SPR_NOACCESS,
- &spr_read_generic, &spr_write_sdr1,
- 0x00000000);
+#ifndef CONFIG_USER_ONLY
+ if (env->has_hv_mode) {
+ /* SDR1 is a hypervisor resource on CPUs which have a
+ * hypervisor mode */
+ spr_register_hv(env, SPR_SDR1, "SDR1",
+ SPR_NOACCESS, SPR_NOACCESS,
+ SPR_NOACCESS, SPR_NOACCESS,
+ &spr_read_generic, &spr_write_sdr1,
+ 0x00000000);
+ } else {
+ spr_register(env, SPR_SDR1, "SDR1",
+ SPR_NOACCESS, SPR_NOACCESS,
+ &spr_read_generic, &spr_write_sdr1,
+ 0x00000000);
+ }
+#endif
}
/* BATs 0-3 */
@@ -8835,18 +8847,14 @@ POWERPC_FAMILY(POWER9)(ObjectClass *oc, void *data)
}
#if !defined(CONFIG_USER_ONLY)
-
-void cpu_ppc_set_vhyp(PowerPCCPU *cpu, PPCVirtualHypervisor *vhyp)
-{
- cpu->vhyp = vhyp;
-}
-
-void cpu_ppc_set_papr(PowerPCCPU *cpu)
+void cpu_ppc_set_papr(PowerPCCPU *cpu, PPCVirtualHypervisor *vhyp)
{
CPUPPCState *env = &cpu->env;
ppc_spr_t *lpcr = &env->spr_cb[SPR_LPCR];
ppc_spr_t *amor = &env->spr_cb[SPR_AMOR];
+ cpu->vhyp = vhyp;
+
/* PAPR always has exception vectors in RAM not ROM. To ensure this,
* MSR[IP] should never be set.
*
@@ -10489,11 +10497,12 @@ static void ppc_cpu_class_init(ObjectClass *oc, void *data)
#else
cc->get_phys_page_debug = ppc_cpu_get_phys_page_debug;
cc->vmsd = &vmstate_ppc_cpu;
-#if defined(TARGET_PPC64)
- cc->write_elf64_note = ppc64_cpu_write_elf64_note;
-#endif
#endif
cc->cpu_exec_enter = ppc_cpu_exec_enter;
+#if defined(CONFIG_SOFTMMU)
+ cc->write_elf64_note = ppc64_cpu_write_elf64_note;
+ cc->write_elf32_note = ppc32_cpu_write_elf32_note;
+#endif
cc->gdb_num_core_regs = 71;
diff --git a/target/sparc/translate.c b/target/sparc/translate.c
index 655060cd9a..aa6734d54e 100644
--- a/target/sparc/translate.c
+++ b/target/sparc/translate.c
@@ -2448,8 +2448,31 @@ static void gen_ldstub_asi(DisasContext *dc, TCGv dst, TCGv addr, int insn)
gen_ldstub(dc, dst, addr, da.mem_idx);
break;
default:
- /* ??? Should be DAE_invalid_asi. */
- gen_exception(dc, TT_DATA_ACCESS);
+ /* ??? In theory, this should be raise DAE_invalid_asi.
+ But the SS-20 roms do ldstuba [%l0] #ASI_M_CTL, %o1. */
+ if (parallel_cpus) {
+ gen_helper_exit_atomic(cpu_env);
+ } else {
+ TCGv_i32 r_asi = tcg_const_i32(da.asi);
+ TCGv_i32 r_mop = tcg_const_i32(MO_UB);
+ TCGv_i64 s64, t64;
+
+ save_state(dc);
+ t64 = tcg_temp_new_i64();
+ gen_helper_ld_asi(t64, cpu_env, addr, r_asi, r_mop);
+
+ s64 = tcg_const_i64(0xff);
+ gen_helper_st_asi(cpu_env, addr, s64, r_asi, r_mop);
+ tcg_temp_free_i64(s64);
+ tcg_temp_free_i32(r_mop);
+ tcg_temp_free_i32(r_asi);
+
+ tcg_gen_trunc_i64_tl(dst, t64);
+ tcg_temp_free_i64(t64);
+
+ /* End the TB. */
+ dc->npc = DYNAMIC_PC;
+ }
break;
}
}
diff --git a/tcg/aarch64/tcg-target.inc.c b/tcg/aarch64/tcg-target.inc.c
index 6d227a5a6a..290de6dae6 100644
--- a/tcg/aarch64/tcg-target.inc.c
+++ b/tcg/aarch64/tcg-target.inc.c
@@ -866,7 +866,7 @@ static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l)
}
}
-static void tcg_out_brcond(TCGContext *s, TCGMemOp ext, TCGCond c, TCGArg a,
+static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a,
TCGArg b, bool b_const, TCGLabel *l)
{
intptr_t offset;
@@ -937,7 +937,7 @@ static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd,
}
}
-static inline void tcg_out_addsub2(TCGContext *s, int ext, TCGReg rl,
+static inline void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl,
TCGReg rh, TCGReg al, TCGReg ah,
tcg_target_long bl, tcg_target_long bh,
bool const_bl, bool const_bh, bool sub)
diff --git a/tests/bios-tables-test.c b/tests/bios-tables-test.c
index 423a6f53c2..88dbf97853 100644
--- a/tests/bios-tables-test.c
+++ b/tests/bios-tables-test.c
@@ -72,7 +72,7 @@ static void free_test_data(test_data *data)
g_free(temp->asl_file);
}
- g_array_free(data->tables, false);
+ g_array_free(data->tables, true);
}
static void test_acpi_rsdp_address(test_data *data)
diff --git a/tests/e1000-test.c b/tests/e1000-test.c
index 59cab68a60..0c5fcdcc44 100644
--- a/tests/e1000-test.c
+++ b/tests/e1000-test.c
@@ -44,6 +44,7 @@ int main(int argc, char **argv)
path = g_strdup_printf("e1000/%s", models[i]);
qtest_add_data_func(path, models[i], test_device);
+ g_free(path);
}
return g_test_run();
diff --git a/tests/e1000e-test.c b/tests/e1000e-test.c
index 8c42ca919f..c612dc64ec 100644
--- a/tests/e1000e-test.c
+++ b/tests/e1000e-test.c
@@ -99,7 +99,10 @@ static QPCIBus *test_bus;
static void e1000e_pci_foreach_callback(QPCIDevice *dev, int devfn, void *data)
{
- *(QPCIDevice **) data = dev;
+ QPCIDevice **res = data;
+
+ g_assert_null(*res);
+ *res = dev;
}
static QPCIDevice *e1000e_device_find(QPCIBus *bus)
@@ -403,6 +406,7 @@ static void data_test_clear(e1000e_device *d)
e1000e_device_clear(test_bus, d);
close(test_sockets[0]);
pc_alloc_uninit(test_alloc);
+ g_free(d->pci_dev);
qpci_free_pc(test_bus);
qtest_end();
}
diff --git a/tests/eepro100-test.c b/tests/eepro100-test.c
index ed23258b0f..bdc8a67d57 100644
--- a/tests/eepro100-test.c
+++ b/tests/eepro100-test.c
@@ -54,6 +54,7 @@ int main(int argc, char **argv)
path = g_strdup_printf("eepro100/%s", models[i]);
qtest_add_data_func(path, models[i], test_device);
+ g_free(path);
}
return g_test_run();
diff --git a/tests/endianness-test.c b/tests/endianness-test.c
index cf8d41b7b4..ed0bf52019 100644
--- a/tests/endianness-test.c
+++ b/tests/endianness-test.c
@@ -295,14 +295,17 @@ int main(int argc, char **argv)
path = g_strdup_printf("endianness/%s",
test_cases[i].machine);
qtest_add_data_func(path, &test_cases[i], test_endianness);
+ g_free(path);
path = g_strdup_printf("endianness/split/%s",
test_cases[i].machine);
qtest_add_data_func(path, &test_cases[i], test_endianness_split);
+ g_free(path);
path = g_strdup_printf("endianness/combine/%s",
test_cases[i].machine);
qtest_add_data_func(path, &test_cases[i], test_endianness_combine);
+ g_free(path);
}
return g_test_run();
diff --git a/tests/hd-geo-test.c b/tests/hd-geo-test.c
index 6176e81ab2..24870b38f4 100644
--- a/tests/hd-geo-test.c
+++ b/tests/hd-geo-test.c
@@ -19,6 +19,8 @@
#include "qemu-common.h"
#include "libqtest.h"
+#define ARGV_SIZE 256
+
static char *create_test_img(int secs)
{
char *template = strdup("/tmp/qtest.XXXXXX");
@@ -66,7 +68,7 @@ static const CHST hd_chst[backend_last][mbr_last] = {
},
};
-static const char *img_file_name[backend_last];
+static char *img_file_name[backend_last];
static const CHST *cur_ide[4];
@@ -234,28 +236,36 @@ static int setup_ide(int argc, char *argv[], int argv_sz,
*/
static void test_ide_none(void)
{
- char *argv[256];
-
- setup_common(argv, ARRAY_SIZE(argv));
- qtest_start(g_strjoinv(" ", argv));
+ char **argv = g_new0(char *, ARGV_SIZE);
+ char *args;
+
+ setup_common(argv, ARGV_SIZE);
+ args = g_strjoinv(" ", argv);
+ qtest_start(args);
+ g_strfreev(argv);
+ g_free(args);
test_cmos();
qtest_end();
}
static void test_ide_mbr(bool use_device, MBRcontents mbr)
{
- char *argv[256];
+ char **argv = g_new0(char *, ARGV_SIZE);
+ char *args;
int argc;
Backend i;
const char *dev;
- argc = setup_common(argv, ARRAY_SIZE(argv));
+ argc = setup_common(argv, ARGV_SIZE);
for (i = 0; i < backend_last; i++) {
cur_ide[i] = &hd_chst[i][mbr];
dev = use_device ? (is_hd(cur_ide[i]) ? "ide-hd" : "ide-cd") : NULL;
- argc = setup_ide(argc, argv, ARRAY_SIZE(argv), i, dev, i, mbr, "");
+ argc = setup_ide(argc, argv, ARGV_SIZE, i, dev, i, mbr, "");
}
- qtest_start(g_strjoinv(" ", argv));
+ args = g_strjoinv(" ", argv);
+ qtest_start(args);
+ g_strfreev(argv);
+ g_free(args);
test_cmos();
qtest_end();
}
@@ -310,12 +320,13 @@ static void test_ide_device_mbr_chs(void)
static void test_ide_drive_user(const char *dev, bool trans)
{
- char *argv[256], *opts;
+ char **argv = g_new0(char *, ARGV_SIZE);
+ char *args, *opts;
int argc;
int secs = img_secs[backend_small];
const CHST expected_chst = { secs / (4 * 32) , 4, 32, trans };
- argc = setup_common(argv, ARRAY_SIZE(argv));
+ argc = setup_common(argv, ARGV_SIZE);
opts = g_strdup_printf("%s,%s%scyls=%d,heads=%d,secs=%d",
dev ?: "",
trans && dev ? "bios-chs-" : "",
@@ -323,11 +334,14 @@ static void test_ide_drive_user(const char *dev, bool trans)
expected_chst.cyls, expected_chst.heads,
expected_chst.secs);
cur_ide[0] = &expected_chst;
- argc = setup_ide(argc, argv, ARRAY_SIZE(argv),
+ argc = setup_ide(argc, argv, ARGV_SIZE,
0, dev ? opts : NULL, backend_small, mbr_chs,
dev ? "" : opts);
g_free(opts);
- qtest_start(g_strjoinv(" ", argv));
+ args = g_strjoinv(" ", argv);
+ qtest_start(args);
+ g_strfreev(argv);
+ g_free(args);
test_cmos();
qtest_end();
}
@@ -369,18 +383,22 @@ static void test_ide_device_user_chst(void)
*/
static void test_ide_drive_cd_0(void)
{
- char *argv[256];
+ char **argv = g_new0(char *, ARGV_SIZE);
+ char *args;
int argc, ide_idx;
Backend i;
- argc = setup_common(argv, ARRAY_SIZE(argv));
+ argc = setup_common(argv, ARGV_SIZE);
for (i = 0; i <= backend_empty; i++) {
ide_idx = backend_empty - i;
cur_ide[ide_idx] = &hd_chst[i][mbr_blank];
- argc = setup_ide(argc, argv, ARRAY_SIZE(argv),
+ argc = setup_ide(argc, argv, ARGV_SIZE,
ide_idx, NULL, i, mbr_blank, "");
}
- qtest_start(g_strjoinv(" ", argv));
+ args = g_strjoinv(" ", argv);
+ qtest_start(args);
+ g_strfreev(argv);
+ g_free(args);
test_cmos();
qtest_end();
}
@@ -418,6 +436,7 @@ int main(int argc, char **argv)
for (i = 0; i < backend_last; i++) {
if (img_file_name[i]) {
unlink(img_file_name[i]);
+ free(img_file_name[i]);
}
}
diff --git a/tests/i440fx-test.c b/tests/i440fx-test.c
index da2d5a53f0..e9d05c87d1 100644
--- a/tests/i440fx-test.c
+++ b/tests/i440fx-test.c
@@ -134,6 +134,8 @@ static void test_i440fx_defaults(gconstpointer opaque)
/* 3.2.26 */
g_assert_cmpint(qpci_config_readb(dev, 0x93), ==, 0x00); /* TRC */
+ g_free(dev);
+ qpci_free_pc(bus);
qtest_end();
}
@@ -270,6 +272,9 @@ static void test_i440fx_pam(gconstpointer opaque)
/* Verify the area is not our new mask */
g_assert(!verify_area(pam_area[i].start, pam_area[i].end, 0x82));
}
+
+ g_free(dev);
+ qpci_free_pc(bus);
qtest_end();
}
diff --git a/tests/ide-test.c b/tests/ide-test.c
index b57c2b1676..139ebc0ec6 100644
--- a/tests/ide-test.c
+++ b/tests/ide-test.c
@@ -339,6 +339,7 @@ static void test_bmdma_simple_rw(void)
g_assert(memcmp(buf, cmpbuf, len) == 0);
+ free_pci_device(dev);
g_free(buf);
g_free(cmpbuf);
}
@@ -369,6 +370,7 @@ static void test_bmdma_short_prdt(void)
prdt, ARRAY_SIZE(prdt), NULL);
g_assert_cmphex(status, ==, 0);
assert_bit_clear(qpci_io_readb(dev, ide_bar, reg_status), DF | ERR);
+ free_pci_device(dev);
}
static void test_bmdma_one_sector_short_prdt(void)
@@ -398,6 +400,7 @@ static void test_bmdma_one_sector_short_prdt(void)
prdt, ARRAY_SIZE(prdt), NULL);
g_assert_cmphex(status, ==, 0);
assert_bit_clear(qpci_io_readb(dev, ide_bar, reg_status), DF | ERR);
+ free_pci_device(dev);
}
static void test_bmdma_long_prdt(void)
@@ -426,6 +429,7 @@ static void test_bmdma_long_prdt(void)
prdt, ARRAY_SIZE(prdt), NULL);
g_assert_cmphex(status, ==, BM_STS_INTR);
assert_bit_clear(qpci_io_readb(dev, ide_bar, reg_status), DF | ERR);
+ free_pci_device(dev);
}
static void test_bmdma_no_busmaster(void)
@@ -449,6 +453,7 @@ static void test_bmdma_no_busmaster(void)
* in practice. At least we want to be aware of any changes. */
g_assert_cmphex(status, ==, BM_STS_ACTIVE | BM_STS_INTR);
assert_bit_clear(qpci_io_readb(dev, ide_bar, reg_status), DF | ERR);
+ free_pci_device(dev);
}
static void test_bmdma_setup(void)
@@ -525,6 +530,7 @@ static void test_identify(void)
assert_bit_set(buf[85], 0x20);
ide_test_quit();
+ free_pci_device(dev);
}
/*
@@ -563,6 +569,7 @@ static void make_dirty(uint8_t device)
assert_bit_clear(qpci_io_readb(dev, ide_bar, reg_status), DF | ERR);
g_free(buf);
+ free_pci_device(dev);
}
static void test_flush(void)
@@ -609,6 +616,7 @@ static void test_flush(void)
assert_bit_clear(data, BSY | DF | ERR | DRQ);
ide_test_quit();
+ free_pci_device(dev);
}
static void test_retry_flush(const char *machine)
@@ -659,6 +667,7 @@ static void test_retry_flush(const char *machine)
assert_bit_clear(data, BSY | DF | ERR | DRQ);
ide_test_quit();
+ free_pci_device(dev);
}
static void test_flush_nodev(void)
@@ -676,6 +685,7 @@ static void test_flush_nodev(void)
/* Just testing that qemu doesn't crash... */
+ free_pci_device(dev);
ide_test_quit();
}
@@ -742,6 +752,7 @@ static uint8_t ide_wait_clear(uint8_t flag)
while (true) {
data = qpci_io_readb(dev, ide_bar, reg_status);
if (!(data & flag)) {
+ free_pci_device(dev);
return data;
}
if (difftime(time(NULL), st) > 5.0) {
@@ -851,6 +862,7 @@ static void cdrom_pio_impl(int nblocks)
g_free(pattern);
g_free(rx);
test_bmdma_teardown();
+ free_pci_device(dev);
}
static void test_cdrom_pio(void)
diff --git a/tests/ipmi-bt-test.c b/tests/ipmi-bt-test.c
index e84dd6889b..7e21a9bbcb 100644
--- a/tests/ipmi-bt-test.c
+++ b/tests/ipmi-bt-test.c
@@ -420,6 +420,7 @@ int main(int argc, char **argv)
" -device ipmi-bmc-extern,chardev=ipmi0,id=bmc0"
" -device isa-ipmi-bt,bmc=bmc0", emu_port);
qtest_start(cmdline);
+ g_free(cmdline);
qtest_irq_intercept_in(global_qtest, "ioapic");
qtest_add_func("/ipmi/extern/connect", test_connect);
qtest_add_func("/ipmi/extern/bt_base", test_bt_base);
diff --git a/tests/ipmi-kcs-test.c b/tests/ipmi-kcs-test.c
index 9cf0b34a33..178ffc1797 100644
--- a/tests/ipmi-kcs-test.c
+++ b/tests/ipmi-kcs-test.c
@@ -279,6 +279,7 @@ int main(int argc, char **argv)
cmdline = g_strdup_printf("-device ipmi-bmc-sim,id=bmc0"
" -device isa-ipmi-kcs,bmc=bmc0");
qtest_start(cmdline);
+ g_free(cmdline);
qtest_irq_intercept_in(global_qtest, "ioapic");
qtest_add_func("/ipmi/local/kcs_base", test_kcs_base);
qtest_add_func("/ipmi/local/kcs_abort", test_kcs_abort);
diff --git a/tests/libqos/usb.c b/tests/libqos/usb.c
index 72d7a961fe..0cdfaecda7 100644
--- a/tests/libqos/usb.c
+++ b/tests/libqos/usb.c
@@ -24,6 +24,11 @@ void qusb_pci_init_one(QPCIBus *pcibus, struct qhc *hc, uint32_t devfn, int bar)
hc->bar = qpci_iomap(hc->dev, bar, NULL);
}
+void uhci_deinit(struct qhc *hc)
+{
+ g_free(hc->dev);
+}
+
void uhci_port_test(struct qhc *hc, int port, uint16_t expect)
{
uint16_t value = qpci_io_readw(hc->dev, hc->bar, 0x10 + 2 * port);
@@ -64,4 +69,5 @@ void usb_test_hotplug(const char *hcd_id, const int port,
g_assert(response);
g_assert(qdict_haskey(response, "event"));
g_assert(!strcmp(qdict_get_str(response, "event"), "DEVICE_DELETED"));
+ QDECREF(response);
}
diff --git a/tests/libqos/usb.h b/tests/libqos/usb.h
index 423dcfd82f..297cfc564d 100644
--- a/tests/libqos/usb.h
+++ b/tests/libqos/usb.h
@@ -11,6 +11,7 @@ struct qhc {
void qusb_pci_init_one(QPCIBus *pcibus, struct qhc *hc,
uint32_t devfn, int bar);
void uhci_port_test(struct qhc *hc, int port, uint16_t expect);
+void uhci_deinit(struct qhc *hc);
void usb_test_hotplug(const char *bus_name, const int port,
void (*port_check)(void));
diff --git a/tests/libqos/virtio-pci.c b/tests/libqos/virtio-pci.c
index d4bf841f23..7ac15c04e1 100644
--- a/tests/libqos/virtio-pci.c
+++ b/tests/libqos/virtio-pci.c
@@ -24,9 +24,17 @@
typedef struct QVirtioPCIForeachData {
void (*func)(QVirtioDevice *d, void *data);
uint16_t device_type;
+ bool has_slot;
+ int slot;
void *user_data;
} QVirtioPCIForeachData;
+void qvirtio_pci_device_free(QVirtioPCIDevice *dev)
+{
+ g_free(dev->pdev);
+ g_free(dev);
+}
+
static QVirtioPCIDevice *qpcidevice_to_qvirtiodevice(QPCIDevice *pdev)
{
QVirtioPCIDevice *vpcidev;
@@ -49,16 +57,18 @@ static void qvirtio_pci_foreach_callback(
QVirtioPCIForeachData *d = data;
QVirtioPCIDevice *vpcidev = qpcidevice_to_qvirtiodevice(dev);
- if (vpcidev->vdev.device_type == d->device_type) {
+ if (vpcidev->vdev.device_type == d->device_type &&
+ (!d->has_slot || vpcidev->pdev->devfn == d->slot << 3)) {
d->func(&vpcidev->vdev, d->user_data);
} else {
- g_free(vpcidev);
+ qvirtio_pci_device_free(vpcidev);
}
}
static void qvirtio_pci_assign_device(QVirtioDevice *d, void *data)
{
QVirtioPCIDevice **vpcidev = data;
+ assert(!*vpcidev);
*vpcidev = (QVirtioPCIDevice *)d;
}
@@ -284,21 +294,39 @@ const QVirtioBus qvirtio_pci = {
.virtqueue_kick = qvirtio_pci_virtqueue_kick,
};
-void qvirtio_pci_foreach(QPCIBus *bus, uint16_t device_type,
+static void qvirtio_pci_foreach(QPCIBus *bus, uint16_t device_type,
+ bool has_slot, int slot,
void (*func)(QVirtioDevice *d, void *data), void *data)
{
QVirtioPCIForeachData d = { .func = func,
.device_type = device_type,
+ .has_slot = has_slot,
+ .slot = slot,
.user_data = data };
qpci_device_foreach(bus, PCI_VENDOR_ID_REDHAT_QUMRANET, -1,
- qvirtio_pci_foreach_callback, &d);
+ qvirtio_pci_foreach_callback, &d);
}
QVirtioPCIDevice *qvirtio_pci_device_find(QPCIBus *bus, uint16_t device_type)
{
QVirtioPCIDevice *dev = NULL;
- qvirtio_pci_foreach(bus, device_type, qvirtio_pci_assign_device, &dev);
+
+ qvirtio_pci_foreach(bus, device_type, false, 0,
+ qvirtio_pci_assign_device, &dev);
+
+ dev->vdev.bus = &qvirtio_pci;
+
+ return dev;
+}
+
+QVirtioPCIDevice *qvirtio_pci_device_find_slot(QPCIBus *bus,
+ uint16_t device_type, int slot)
+{
+ QVirtioPCIDevice *dev = NULL;
+
+ qvirtio_pci_foreach(bus, device_type, true, slot,
+ qvirtio_pci_assign_device, &dev);
dev->vdev.bus = &qvirtio_pci;
diff --git a/tests/libqos/virtio-pci.h b/tests/libqos/virtio-pci.h
index 38c54c63ea..6ef19094cb 100644
--- a/tests/libqos/virtio-pci.h
+++ b/tests/libqos/virtio-pci.h
@@ -31,9 +31,11 @@ typedef struct QVirtQueuePCI {
extern const QVirtioBus qvirtio_pci;
-void qvirtio_pci_foreach(QPCIBus *bus, uint16_t device_type,
- void (*func)(QVirtioDevice *d, void *data), void *data);
QVirtioPCIDevice *qvirtio_pci_device_find(QPCIBus *bus, uint16_t device_type);
+QVirtioPCIDevice *qvirtio_pci_device_find_slot(QPCIBus *bus,
+ uint16_t device_type, int slot);
+void qvirtio_pci_device_free(QVirtioPCIDevice *dev);
+
void qvirtio_pci_device_enable(QVirtioPCIDevice *d);
void qvirtio_pci_device_disable(QVirtioPCIDevice *d);
diff --git a/tests/libqtest.c b/tests/libqtest.c
index e54354de8a..3a0e0d63a7 100644
--- a/tests/libqtest.c
+++ b/tests/libqtest.c
@@ -805,17 +805,7 @@ void qtest_add_data_func_full(const char *str, void *data,
GDestroyNotify data_free_func)
{
gchar *path = g_strdup_printf("/%s/%s", qtest_get_arch(), str);
-#if GLIB_CHECK_VERSION(2, 34, 0)
g_test_add_data_func_full(path, data, fn, data_free_func);
-#elif GLIB_CHECK_VERSION(2, 26, 0)
- /* back-compat casts, remove this once we can require new-enough glib */
- g_test_add_vtable(path, 0, data, NULL,
- (GTestFixtureFunc)fn, (GTestFixtureFunc) data_free_func);
-#else
- /* back-compat casts, remove this once we can require new-enough glib */
- g_test_add_vtable(path, 0, data, NULL,
- (void (*)(void)) fn, (void (*)(void)) data_free_func);
-#endif
g_free(path);
}
diff --git a/tests/postcopy-test.c b/tests/postcopy-test.c
index dafe8beba4..de35a18903 100644
--- a/tests/postcopy-test.c
+++ b/tests/postcopy-test.c
@@ -482,7 +482,7 @@ static void test_migrate(void)
usleep(10 * 1000);
} while (dest_byte_a == dest_byte_b);
- qmp("{ 'execute' : 'stop'}");
+ qmp_discard_response("{ 'execute' : 'stop'}");
/* With it stopped, check nothing changes */
qtest_memread(to, start_address, &dest_byte_c, 1);
sleep(1);
diff --git a/tests/ptimer-test-stubs.c b/tests/ptimer-test-stubs.c
index 21d4ebb0fe..8a1b0a336c 100644
--- a/tests/ptimer-test-stubs.c
+++ b/tests/ptimer-test-stubs.c
@@ -108,6 +108,11 @@ QEMUBH *qemu_bh_new(QEMUBHFunc *cb, void *opaque)
return bh;
}
+void qemu_bh_delete(QEMUBH *bh)
+{
+ g_free(bh);
+}
+
void replay_bh_schedule_event(QEMUBH *bh)
{
bh->cb(bh->opaque);
diff --git a/tests/ptimer-test.c b/tests/ptimer-test.c
index b36a476483..5d1a2a8188 100644
--- a/tests/ptimer-test.c
+++ b/tests/ptimer-test.c
@@ -73,6 +73,7 @@ static void check_set_count(gconstpointer arg)
ptimer_set_count(ptimer, 1000);
g_assert_cmpuint(ptimer_get_count(ptimer), ==, 1000);
g_assert_false(triggered);
+ ptimer_free(ptimer);
}
static void check_set_limit(gconstpointer arg)
@@ -92,6 +93,7 @@ static void check_set_limit(gconstpointer arg)
g_assert_cmpuint(ptimer_get_count(ptimer), ==, 2000);
g_assert_cmpuint(ptimer_get_limit(ptimer), ==, 2000);
g_assert_false(triggered);
+ ptimer_free(ptimer);
}
static void check_oneshot(gconstpointer arg)
@@ -194,6 +196,7 @@ static void check_oneshot(gconstpointer arg)
g_assert_cmpuint(ptimer_get_count(ptimer), ==, 0);
g_assert_false(triggered);
+ ptimer_free(ptimer);
}
static void check_periodic(gconstpointer arg)
@@ -360,6 +363,7 @@ static void check_periodic(gconstpointer arg)
g_assert_cmpuint(ptimer_get_count(ptimer), ==,
(no_round_down ? 8 : 7) + (wrap_policy ? 1 : 0));
g_assert_false(triggered);
+ ptimer_free(ptimer);
}
static void check_on_the_fly_mode_change(gconstpointer arg)
@@ -406,6 +410,7 @@ static void check_on_the_fly_mode_change(gconstpointer arg)
g_assert_cmpuint(ptimer_get_count(ptimer), ==, 0);
g_assert_true(triggered);
+ ptimer_free(ptimer);
}
static void check_on_the_fly_period_change(gconstpointer arg)
@@ -438,6 +443,7 @@ static void check_on_the_fly_period_change(gconstpointer arg)
g_assert_cmpuint(ptimer_get_count(ptimer), ==, 0);
g_assert_true(triggered);
+ ptimer_free(ptimer);
}
static void check_on_the_fly_freq_change(gconstpointer arg)
@@ -470,6 +476,7 @@ static void check_on_the_fly_freq_change(gconstpointer arg)
g_assert_cmpuint(ptimer_get_count(ptimer), ==, 0);
g_assert_true(triggered);
+ ptimer_free(ptimer);
}
static void check_run_with_period_0(gconstpointer arg)
@@ -487,6 +494,7 @@ static void check_run_with_period_0(gconstpointer arg)
g_assert_cmpuint(ptimer_get_count(ptimer), ==, 99);
g_assert_false(triggered);
+ ptimer_free(ptimer);
}
static void check_run_with_delta_0(gconstpointer arg)
@@ -591,6 +599,7 @@ static void check_run_with_delta_0(gconstpointer arg)
g_assert_true(triggered);
ptimer_stop(ptimer);
+ ptimer_free(ptimer);
}
static void check_periodic_with_load_0(gconstpointer arg)
@@ -649,6 +658,7 @@ static void check_periodic_with_load_0(gconstpointer arg)
}
ptimer_stop(ptimer);
+ ptimer_free(ptimer);
}
static void check_oneshot_with_load_0(gconstpointer arg)
@@ -682,14 +692,14 @@ static void check_oneshot_with_load_0(gconstpointer arg)
} else {
g_assert_false(triggered);
}
+
+ ptimer_free(ptimer);
}
static void add_ptimer_tests(uint8_t policy)
{
- uint8_t *ppolicy = g_malloc(1);
- char *policy_name = g_malloc0(256);
-
- *ppolicy = policy;
+ char policy_name[256] = "";
+ char *tmp;
if (policy == PTIMER_POLICY_DEFAULT) {
g_sprintf(policy_name, "default");
@@ -715,49 +725,67 @@ static void add_ptimer_tests(uint8_t policy)
g_strlcat(policy_name, "no_counter_rounddown,", 256);
}
- g_test_add_data_func(
- g_strdup_printf("/ptimer/set_count policy=%s", policy_name),
- ppolicy, check_set_count);
-
- g_test_add_data_func(
- g_strdup_printf("/ptimer/set_limit policy=%s", policy_name),
- ppolicy, check_set_limit);
-
- g_test_add_data_func(
- g_strdup_printf("/ptimer/oneshot policy=%s", policy_name),
- ppolicy, check_oneshot);
-
- g_test_add_data_func(
- g_strdup_printf("/ptimer/periodic policy=%s", policy_name),
- ppolicy, check_periodic);
-
- g_test_add_data_func(
- g_strdup_printf("/ptimer/on_the_fly_mode_change policy=%s", policy_name),
- ppolicy, check_on_the_fly_mode_change);
-
- g_test_add_data_func(
- g_strdup_printf("/ptimer/on_the_fly_period_change policy=%s", policy_name),
- ppolicy, check_on_the_fly_period_change);
-
- g_test_add_data_func(
- g_strdup_printf("/ptimer/on_the_fly_freq_change policy=%s", policy_name),
- ppolicy, check_on_the_fly_freq_change);
-
- g_test_add_data_func(
- g_strdup_printf("/ptimer/run_with_period_0 policy=%s", policy_name),
- ppolicy, check_run_with_period_0);
-
- g_test_add_data_func(
- g_strdup_printf("/ptimer/run_with_delta_0 policy=%s", policy_name),
- ppolicy, check_run_with_delta_0);
-
- g_test_add_data_func(
- g_strdup_printf("/ptimer/periodic_with_load_0 policy=%s", policy_name),
- ppolicy, check_periodic_with_load_0);
-
- g_test_add_data_func(
- g_strdup_printf("/ptimer/oneshot_with_load_0 policy=%s", policy_name),
- ppolicy, check_oneshot_with_load_0);
+ g_test_add_data_func_full(
+ tmp = g_strdup_printf("/ptimer/set_count policy=%s", policy_name),
+ g_memdup(&policy, 1), check_set_count, g_free);
+ g_free(tmp);
+
+ g_test_add_data_func_full(
+ tmp = g_strdup_printf("/ptimer/set_limit policy=%s", policy_name),
+ g_memdup(&policy, 1), check_set_limit, g_free);
+ g_free(tmp);
+
+ g_test_add_data_func_full(
+ tmp = g_strdup_printf("/ptimer/oneshot policy=%s", policy_name),
+ g_memdup(&policy, 1), check_oneshot, g_free);
+ g_free(tmp);
+
+ g_test_add_data_func_full(
+ tmp = g_strdup_printf("/ptimer/periodic policy=%s", policy_name),
+ g_memdup(&policy, 1), check_periodic, g_free);
+ g_free(tmp);
+
+ g_test_add_data_func_full(
+ tmp = g_strdup_printf("/ptimer/on_the_fly_mode_change policy=%s",
+ policy_name),
+ g_memdup(&policy, 1), check_on_the_fly_mode_change, g_free);
+ g_free(tmp);
+
+ g_test_add_data_func_full(
+ tmp = g_strdup_printf("/ptimer/on_the_fly_period_change policy=%s",
+ policy_name),
+ g_memdup(&policy, 1), check_on_the_fly_period_change, g_free);
+ g_free(tmp);
+
+ g_test_add_data_func_full(
+ tmp = g_strdup_printf("/ptimer/on_the_fly_freq_change policy=%s",
+ policy_name),
+ g_memdup(&policy, 1), check_on_the_fly_freq_change, g_free);
+ g_free(tmp);
+
+ g_test_add_data_func_full(
+ tmp = g_strdup_printf("/ptimer/run_with_period_0 policy=%s",
+ policy_name),
+ g_memdup(&policy, 1), check_run_with_period_0, g_free);
+ g_free(tmp);
+
+ g_test_add_data_func_full(
+ tmp = g_strdup_printf("/ptimer/run_with_delta_0 policy=%s",
+ policy_name),
+ g_memdup(&policy, 1), check_run_with_delta_0, g_free);
+ g_free(tmp);
+
+ g_test_add_data_func_full(
+ tmp = g_strdup_printf("/ptimer/periodic_with_load_0 policy=%s",
+ policy_name),
+ g_memdup(&policy, 1), check_periodic_with_load_0, g_free);
+ g_free(tmp);
+
+ g_test_add_data_func_full(
+ tmp = g_strdup_printf("/ptimer/oneshot_with_load_0 policy=%s",
+ policy_name),
+ g_memdup(&policy, 1), check_oneshot_with_load_0, g_free);
+ g_free(tmp);
}
static void add_all_ptimer_policies_comb_tests(void)
diff --git a/tests/pvpanic-test.c b/tests/pvpanic-test.c
index 3bfa678667..71ebb5c02c 100644
--- a/tests/pvpanic-test.c
+++ b/tests/pvpanic-test.c
@@ -27,6 +27,7 @@ static void test_panic(void)
data = qdict_get_qdict(response, "data");
g_assert(qdict_haskey(data, "action"));
g_assert_cmpstr(qdict_get_str(data, "action"), ==, "pause");
+ QDECREF(response);
}
int main(int argc, char **argv)
diff --git a/tests/q35-test.c b/tests/q35-test.c
index 763fe3d6ae..cc58f3ecf4 100644
--- a/tests/q35-test.c
+++ b/tests/q35-test.c
@@ -71,6 +71,9 @@ static void test_smram_lock(void)
g_assert(smram_test_bit(pcidev, MCH_HOST_BRIDGE_SMRAM_D_OPEN) == false);
smram_set_bit(pcidev, MCH_HOST_BRIDGE_SMRAM_D_OPEN, true);
g_assert(smram_test_bit(pcidev, MCH_HOST_BRIDGE_SMRAM_D_OPEN) == true);
+
+ g_free(pcidev);
+ qpci_free_pc(pcibus);
}
int main(int argc, char **argv)
diff --git a/tests/qemu-iotests/049.out b/tests/qemu-iotests/049.out
index 4673b67f37..34e66db691 100644
--- a/tests/qemu-iotests/049.out
+++ b/tests/qemu-iotests/049.out
@@ -95,14 +95,14 @@ qemu-img create -f qcow2 TEST_DIR/t.qcow2 -- -1024
qemu-img: Image size must be less than 8 EiB!
qemu-img create -f qcow2 -o size=-1024 TEST_DIR/t.qcow2
-qemu-img: Parameter 'size' expects a non-negative number below 2^64
+qemu-img: Value '-1024' is out of range for parameter 'size'
qemu-img: TEST_DIR/t.qcow2: Invalid options for file format 'qcow2'
qemu-img create -f qcow2 TEST_DIR/t.qcow2 -- -1k
qemu-img: Image size must be less than 8 EiB!
qemu-img create -f qcow2 -o size=-1k TEST_DIR/t.qcow2
-qemu-img: Parameter 'size' expects a non-negative number below 2^64
+qemu-img: Value '-1k' is out of range for parameter 'size'
qemu-img: TEST_DIR/t.qcow2: Invalid options for file format 'qcow2'
qemu-img create -f qcow2 TEST_DIR/t.qcow2 -- 1kilobyte
@@ -110,15 +110,19 @@ qemu-img: Invalid image size specified! You may use k, M, G, T, P or E suffixes
qemu-img: kilobytes, megabytes, gigabytes, terabytes, petabytes and exabytes.
qemu-img create -f qcow2 -o size=1kilobyte TEST_DIR/t.qcow2
-Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=1024 encryption=off cluster_size=65536 lazy_refcounts=off refcount_bits=16
+qemu-img: Parameter 'size' expects a non-negative number below 2^64
+Optional suffix k, M, G, T, P or E means kilo-, mega-, giga-, tera-, peta-
+and exabytes, respectively.
+qemu-img: TEST_DIR/t.qcow2: Invalid options for file format 'qcow2'
qemu-img create -f qcow2 TEST_DIR/t.qcow2 -- foobar
qemu-img: Invalid image size specified! You may use k, M, G, T, P or E suffixes for
qemu-img: kilobytes, megabytes, gigabytes, terabytes, petabytes and exabytes.
qemu-img create -f qcow2 -o size=foobar TEST_DIR/t.qcow2
-qemu-img: Parameter 'size' expects a size
-You may use k, M, G or T suffixes for kilobytes, megabytes, gigabytes and terabytes.
+qemu-img: Parameter 'size' expects a non-negative number below 2^64
+Optional suffix k, M, G, T, P or E means kilo-, mega-, giga-, tera-, peta-
+and exabytes, respectively.
qemu-img: TEST_DIR/t.qcow2: Invalid options for file format 'qcow2'
== Check correct interpretation of suffixes for cluster size ==
diff --git a/tests/qemu-iotests/051.pc.out b/tests/qemu-iotests/051.pc.out
index e206ad6c29..c6f4eef215 100644
--- a/tests/qemu-iotests/051.pc.out
+++ b/tests/qemu-iotests/051.pc.out
@@ -179,7 +179,7 @@ qququiquit
Testing: -drive file=TEST_DIR/t.qcow2,if=ide,readonly=on
QEMU X.Y.Z monitor - type 'help' for more information
-(qemu) QEMU_PROG: Can't use a read-only drive
+(qemu) QEMU_PROG: Block node is read-only
QEMU_PROG: Initialization of device ide-hd failed: Device initialization failed.
Testing: -drive file=TEST_DIR/t.qcow2,if=scsi,readonly=on
@@ -201,12 +201,12 @@ QEMU X.Y.Z monitor - type 'help' for more information
Testing: -drive file=TEST_DIR/t.qcow2,if=none,id=disk,readonly=on -device ide-drive,drive=disk
QEMU X.Y.Z monitor - type 'help' for more information
-(qemu) QEMU_PROG: -device ide-drive,drive=disk: Can't use a read-only drive
+(qemu) QEMU_PROG: -device ide-drive,drive=disk: Block node is read-only
QEMU_PROG: -device ide-drive,drive=disk: Device initialization failed.
Testing: -drive file=TEST_DIR/t.qcow2,if=none,id=disk,readonly=on -device ide-hd,drive=disk
QEMU X.Y.Z monitor - type 'help' for more information
-(qemu) QEMU_PROG: -device ide-hd,drive=disk: Can't use a read-only drive
+(qemu) QEMU_PROG: -device ide-hd,drive=disk: Block node is read-only
QEMU_PROG: -device ide-hd,drive=disk: Device initialization failed.
Testing: -drive file=TEST_DIR/t.qcow2,if=none,id=disk,readonly=on -device lsi53c895a -device scsi-disk,drive=disk
diff --git a/tests/qemu-iotests/055 b/tests/qemu-iotests/055
index 1d3fd04b65..aafcd249f6 100755
--- a/tests/qemu-iotests/055
+++ b/tests/qemu-iotests/055
@@ -48,7 +48,8 @@ class TestSingleDrive(iotests.QMPTestCase):
def setUp(self):
qemu_img('create', '-f', iotests.imgfmt, blockdev_target_img, str(image_len))
- self.vm = iotests.VM().add_drive(test_img).add_drive(blockdev_target_img)
+ self.vm = iotests.VM().add_drive(test_img)
+ self.vm.add_drive(blockdev_target_img, interface="none")
if iotests.qemu_default_machine == 'pc':
self.vm.add_drive(None, 'media=cdrom', 'ide')
self.vm.launch()
@@ -164,7 +165,8 @@ class TestSetSpeed(iotests.QMPTestCase):
def setUp(self):
qemu_img('create', '-f', iotests.imgfmt, blockdev_target_img, str(image_len))
- self.vm = iotests.VM().add_drive(test_img).add_drive(blockdev_target_img)
+ self.vm = iotests.VM().add_drive(test_img)
+ self.vm.add_drive(blockdev_target_img, interface="none")
self.vm.launch()
def tearDown(self):
@@ -247,7 +249,8 @@ class TestSingleTransaction(iotests.QMPTestCase):
def setUp(self):
qemu_img('create', '-f', iotests.imgfmt, blockdev_target_img, str(image_len))
- self.vm = iotests.VM().add_drive(test_img).add_drive(blockdev_target_img)
+ self.vm = iotests.VM().add_drive(test_img)
+ self.vm.add_drive(blockdev_target_img, interface="none")
if iotests.qemu_default_machine == 'pc':
self.vm.add_drive(None, 'media=cdrom', 'ide')
self.vm.launch()
@@ -460,7 +463,7 @@ class TestDriveCompression(iotests.QMPTestCase):
qemu_img('create', '-f', fmt, blockdev_target_img,
str(TestDriveCompression.image_len), *args)
- self.vm.add_drive(blockdev_target_img, format=fmt)
+ self.vm.add_drive(blockdev_target_img, format=fmt, interface="none")
self.vm.launch()
diff --git a/tests/qemu-iotests/085.out b/tests/qemu-iotests/085.out
index 08e4bb7218..182acb42cf 100644
--- a/tests/qemu-iotests/085.out
+++ b/tests/qemu-iotests/085.out
@@ -74,7 +74,7 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 backing_file=TEST_DIR/
=== Invalid command - snapshot node used as backing hd ===
-{"error": {"class": "GenericError", "desc": "Node 'snap_11' is busy: node is used as backing hd of 'virtio0'"}}
+{"error": {"class": "GenericError", "desc": "Node 'snap_11' is busy: node is used as backing hd of 'snap_12'"}}
=== Invalid command - snapshot node has a backing image ===
diff --git a/tests/qemu-iotests/141 b/tests/qemu-iotests/141
index 3ba79f027a..6d8f0a1a84 100755
--- a/tests/qemu-iotests/141
+++ b/tests/qemu-iotests/141
@@ -67,7 +67,7 @@ test_blockjob()
_send_qemu_cmd $QEMU_HANDLE \
"{'execute': 'x-blockdev-del',
'arguments': {'node-name': 'drv0'}}" \
- 'error'
+ 'error' | _filter_generated_node_ids
_send_qemu_cmd $QEMU_HANDLE \
"{'execute': 'block-job-cancel',
diff --git a/tests/qemu-iotests/141.out b/tests/qemu-iotests/141.out
index 195ca1a604..82e763b68d 100644
--- a/tests/qemu-iotests/141.out
+++ b/tests/qemu-iotests/141.out
@@ -20,7 +20,7 @@ Formatting 'TEST_DIR/o.IMGFMT', fmt=IMGFMT size=1048576 backing_file=TEST_DIR/t.
Formatting 'TEST_DIR/o.IMGFMT', fmt=IMGFMT size=1048576 backing_file=TEST_DIR/t.IMGFMT backing_fmt=IMGFMT
{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "mirror"}}
{"return": {}}
-{"error": {"class": "GenericError", "desc": "Node drv0 is in use"}}
+{"error": {"class": "GenericError", "desc": "Node 'drv0' is busy: node is used as backing hd of 'NODE_NAME'"}}
{"return": {}}
{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "mirror"}}
{"return": {}}
@@ -30,7 +30,7 @@ Formatting 'TEST_DIR/o.IMGFMT', fmt=IMGFMT size=1048576 backing_file=TEST_DIR/t.
{"return": {}}
{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "commit"}}
{"return": {}}
-{"error": {"class": "GenericError", "desc": "Node drv0 is in use"}}
+{"error": {"class": "GenericError", "desc": "Node 'drv0' is busy: node is used as backing hd of 'NODE_NAME'"}}
{"return": {}}
{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "commit"}}
{"return": {}}
diff --git a/tests/qemu-iotests/172.out b/tests/qemu-iotests/172.out
index 6b7edaf28f..54b53293d7 100644
--- a/tests/qemu-iotests/172.out
+++ b/tests/qemu-iotests/172.out
@@ -28,6 +28,7 @@ Testing:
opt_io_size = 0 (0x0)
discard_granularity = 4294967295 (0xffffffff)
write-cache = "auto"
+ share-rw = false
drive-type = "288"
@@ -57,6 +58,7 @@ Testing: -fda TEST_DIR/t.qcow2
opt_io_size = 0 (0x0)
discard_granularity = 4294967295 (0xffffffff)
write-cache = "auto"
+ share-rw = false
drive-type = "144"
Testing: -fdb TEST_DIR/t.qcow2
@@ -83,6 +85,7 @@ Testing: -fdb TEST_DIR/t.qcow2
opt_io_size = 0 (0x0)
discard_granularity = 4294967295 (0xffffffff)
write-cache = "auto"
+ share-rw = false
drive-type = "144"
dev: floppy, id ""
unit = 0 (0x0)
@@ -93,6 +96,7 @@ Testing: -fdb TEST_DIR/t.qcow2
opt_io_size = 0 (0x0)
discard_granularity = 4294967295 (0xffffffff)
write-cache = "auto"
+ share-rw = false
drive-type = "288"
Testing: -fda TEST_DIR/t.qcow2 -fdb TEST_DIR/t.qcow2
@@ -119,6 +123,7 @@ Testing: -fda TEST_DIR/t.qcow2 -fdb TEST_DIR/t.qcow2
opt_io_size = 0 (0x0)
discard_granularity = 4294967295 (0xffffffff)
write-cache = "auto"
+ share-rw = false
drive-type = "144"
dev: floppy, id ""
unit = 0 (0x0)
@@ -129,6 +134,7 @@ Testing: -fda TEST_DIR/t.qcow2 -fdb TEST_DIR/t.qcow2
opt_io_size = 0 (0x0)
discard_granularity = 4294967295 (0xffffffff)
write-cache = "auto"
+ share-rw = false
drive-type = "144"
@@ -158,6 +164,7 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2
opt_io_size = 0 (0x0)
discard_granularity = 4294967295 (0xffffffff)
write-cache = "auto"
+ share-rw = false
drive-type = "144"
Testing: -drive if=floppy,file=TEST_DIR/t.qcow2,index=1
@@ -184,6 +191,7 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2,index=1
opt_io_size = 0 (0x0)
discard_granularity = 4294967295 (0xffffffff)
write-cache = "auto"
+ share-rw = false
drive-type = "144"
dev: floppy, id ""
unit = 0 (0x0)
@@ -194,6 +202,7 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2,index=1
opt_io_size = 0 (0x0)
discard_granularity = 4294967295 (0xffffffff)
write-cache = "auto"
+ share-rw = false
drive-type = "288"
Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=floppy,file=TEST_DIR/t.qcow2,index=1
@@ -220,6 +229,7 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=floppy,file=TEST_DIR/t
opt_io_size = 0 (0x0)
discard_granularity = 4294967295 (0xffffffff)
write-cache = "auto"
+ share-rw = false
drive-type = "144"
dev: floppy, id ""
unit = 0 (0x0)
@@ -230,6 +240,7 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=floppy,file=TEST_DIR/t
opt_io_size = 0 (0x0)
discard_granularity = 4294967295 (0xffffffff)
write-cache = "auto"
+ share-rw = false
drive-type = "144"
@@ -259,6 +270,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveA=none0
opt_io_size = 0 (0x0)
discard_granularity = 4294967295 (0xffffffff)
write-cache = "auto"
+ share-rw = false
drive-type = "144"
Testing: -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveB=none0
@@ -285,6 +297,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveB=none0
opt_io_size = 0 (0x0)
discard_granularity = 4294967295 (0xffffffff)
write-cache = "auto"
+ share-rw = false
drive-type = "144"
Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveA=none0 -global isa-fdc.driveB=none1
@@ -311,6 +324,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco
opt_io_size = 0 (0x0)
discard_granularity = 4294967295 (0xffffffff)
write-cache = "auto"
+ share-rw = false
drive-type = "144"
dev: floppy, id ""
unit = 0 (0x0)
@@ -321,6 +335,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco
opt_io_size = 0 (0x0)
discard_granularity = 4294967295 (0xffffffff)
write-cache = "auto"
+ share-rw = false
drive-type = "144"
@@ -350,6 +365,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0
opt_io_size = 0 (0x0)
discard_granularity = 4294967295 (0xffffffff)
write-cache = "auto"
+ share-rw = false
drive-type = "144"
Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,unit=1
@@ -376,6 +392,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,unit=1
opt_io_size = 0 (0x0)
discard_granularity = 4294967295 (0xffffffff)
write-cache = "auto"
+ share-rw = false
drive-type = "144"
Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0 -device floppy,drive=none1,unit=1
@@ -402,6 +419,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco
opt_io_size = 0 (0x0)
discard_granularity = 4294967295 (0xffffffff)
write-cache = "auto"
+ share-rw = false
drive-type = "144"
dev: floppy, id ""
unit = 0 (0x0)
@@ -412,6 +430,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco
opt_io_size = 0 (0x0)
discard_granularity = 4294967295 (0xffffffff)
write-cache = "auto"
+ share-rw = false
drive-type = "144"
@@ -441,6 +460,7 @@ Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-
opt_io_size = 0 (0x0)
discard_granularity = 4294967295 (0xffffffff)
write-cache = "auto"
+ share-rw = false
drive-type = "144"
dev: floppy, id ""
unit = 0 (0x0)
@@ -451,6 +471,7 @@ Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-
opt_io_size = 0 (0x0)
discard_granularity = 4294967295 (0xffffffff)
write-cache = "auto"
+ share-rw = false
drive-type = "144"
Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveA=none0
@@ -477,6 +498,7 @@ Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-
opt_io_size = 0 (0x0)
discard_granularity = 4294967295 (0xffffffff)
write-cache = "auto"
+ share-rw = false
drive-type = "144"
dev: floppy, id ""
unit = 0 (0x0)
@@ -487,6 +509,7 @@ Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-
opt_io_size = 0 (0x0)
discard_granularity = 4294967295 (0xffffffff)
write-cache = "auto"
+ share-rw = false
drive-type = "144"
Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveA=none0
@@ -513,6 +536,7 @@ Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-
opt_io_size = 0 (0x0)
discard_granularity = 4294967295 (0xffffffff)
write-cache = "auto"
+ share-rw = false
drive-type = "144"
Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveB=none0
@@ -539,6 +563,7 @@ Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-
opt_io_size = 0 (0x0)
discard_granularity = 4294967295 (0xffffffff)
write-cache = "auto"
+ share-rw = false
drive-type = "144"
@@ -568,6 +593,7 @@ Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device flop
opt_io_size = 0 (0x0)
discard_granularity = 4294967295 (0xffffffff)
write-cache = "auto"
+ share-rw = false
drive-type = "144"
dev: floppy, id ""
unit = 0 (0x0)
@@ -578,6 +604,7 @@ Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device flop
opt_io_size = 0 (0x0)
discard_granularity = 4294967295 (0xffffffff)
write-cache = "auto"
+ share-rw = false
drive-type = "144"
Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,unit=1
@@ -604,6 +631,7 @@ Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device flop
opt_io_size = 0 (0x0)
discard_granularity = 4294967295 (0xffffffff)
write-cache = "auto"
+ share-rw = false
drive-type = "144"
dev: floppy, id ""
unit = 0 (0x0)
@@ -614,6 +642,7 @@ Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device flop
opt_io_size = 0 (0x0)
discard_granularity = 4294967295 (0xffffffff)
write-cache = "auto"
+ share-rw = false
drive-type = "144"
Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0
@@ -640,6 +669,7 @@ Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device flop
opt_io_size = 0 (0x0)
discard_granularity = 4294967295 (0xffffffff)
write-cache = "auto"
+ share-rw = false
drive-type = "144"
dev: floppy, id ""
unit = 1 (0x1)
@@ -650,6 +680,7 @@ Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device flop
opt_io_size = 0 (0x0)
discard_granularity = 4294967295 (0xffffffff)
write-cache = "auto"
+ share-rw = false
drive-type = "144"
Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,unit=0
@@ -676,6 +707,7 @@ Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device flop
opt_io_size = 0 (0x0)
discard_granularity = 4294967295 (0xffffffff)
write-cache = "auto"
+ share-rw = false
drive-type = "144"
dev: floppy, id ""
unit = 1 (0x1)
@@ -686,6 +718,7 @@ Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device flop
opt_io_size = 0 (0x0)
discard_granularity = 4294967295 (0xffffffff)
write-cache = "auto"
+ share-rw = false
drive-type = "144"
Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,unit=0
@@ -723,6 +756,7 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.q
opt_io_size = 0 (0x0)
discard_granularity = 4294967295 (0xffffffff)
write-cache = "auto"
+ share-rw = false
drive-type = "144"
dev: floppy, id ""
unit = 0 (0x0)
@@ -733,6 +767,7 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.q
opt_io_size = 0 (0x0)
discard_granularity = 4294967295 (0xffffffff)
write-cache = "auto"
+ share-rw = false
drive-type = "144"
Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,unit=1
@@ -759,6 +794,7 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.q
opt_io_size = 0 (0x0)
discard_granularity = 4294967295 (0xffffffff)
write-cache = "auto"
+ share-rw = false
drive-type = "144"
dev: floppy, id ""
unit = 0 (0x0)
@@ -769,6 +805,7 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.q
opt_io_size = 0 (0x0)
discard_granularity = 4294967295 (0xffffffff)
write-cache = "auto"
+ share-rw = false
drive-type = "144"
Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,unit=0
@@ -802,6 +839,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco
opt_io_size = 0 (0x0)
discard_granularity = 4294967295 (0xffffffff)
write-cache = "auto"
+ share-rw = false
drive-type = "144"
dev: floppy, id ""
unit = 0 (0x0)
@@ -812,6 +850,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco
opt_io_size = 0 (0x0)
discard_granularity = 4294967295 (0xffffffff)
write-cache = "auto"
+ share-rw = false
drive-type = "144"
Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveA=none0 -device floppy,drive=none1,unit=1
@@ -838,6 +877,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco
opt_io_size = 0 (0x0)
discard_granularity = 4294967295 (0xffffffff)
write-cache = "auto"
+ share-rw = false
drive-type = "144"
dev: floppy, id ""
unit = 0 (0x0)
@@ -848,6 +888,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco
opt_io_size = 0 (0x0)
discard_granularity = 4294967295 (0xffffffff)
write-cache = "auto"
+ share-rw = false
drive-type = "144"
Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveB=none0 -device floppy,drive=none1
@@ -874,6 +915,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco
opt_io_size = 0 (0x0)
discard_granularity = 4294967295 (0xffffffff)
write-cache = "auto"
+ share-rw = false
drive-type = "144"
dev: floppy, id ""
unit = 1 (0x1)
@@ -884,6 +926,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco
opt_io_size = 0 (0x0)
discard_granularity = 4294967295 (0xffffffff)
write-cache = "auto"
+ share-rw = false
drive-type = "144"
Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveB=none0 -device floppy,drive=none1,unit=0
@@ -910,6 +953,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco
opt_io_size = 0 (0x0)
discard_granularity = 4294967295 (0xffffffff)
write-cache = "auto"
+ share-rw = false
drive-type = "144"
dev: floppy, id ""
unit = 1 (0x1)
@@ -920,6 +964,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco
opt_io_size = 0 (0x0)
discard_granularity = 4294967295 (0xffffffff)
write-cache = "auto"
+ share-rw = false
drive-type = "144"
Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveA=none0 -device floppy,drive=none1,unit=0
@@ -964,6 +1009,7 @@ Testing: -device floppy
opt_io_size = 0 (0x0)
discard_granularity = 4294967295 (0xffffffff)
write-cache = "auto"
+ share-rw = false
drive-type = "288"
Testing: -device floppy,drive-type=120
@@ -990,6 +1036,7 @@ Testing: -device floppy,drive-type=120
opt_io_size = 0 (0x0)
discard_granularity = 4294967295 (0xffffffff)
write-cache = "auto"
+ share-rw = false
drive-type = "120"
Testing: -device floppy,drive-type=144
@@ -1016,6 +1063,7 @@ Testing: -device floppy,drive-type=144
opt_io_size = 0 (0x0)
discard_granularity = 4294967295 (0xffffffff)
write-cache = "auto"
+ share-rw = false
drive-type = "144"
Testing: -device floppy,drive-type=288
@@ -1042,6 +1090,7 @@ Testing: -device floppy,drive-type=288
opt_io_size = 0 (0x0)
discard_granularity = 4294967295 (0xffffffff)
write-cache = "auto"
+ share-rw = false
drive-type = "288"
@@ -1071,6 +1120,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,drive-t
opt_io_size = 0 (0x0)
discard_granularity = 4294967295 (0xffffffff)
write-cache = "auto"
+ share-rw = false
drive-type = "120"
Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,drive-type=288
@@ -1097,6 +1147,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,drive-t
opt_io_size = 0 (0x0)
discard_granularity = 4294967295 (0xffffffff)
write-cache = "auto"
+ share-rw = false
drive-type = "288"
@@ -1126,6 +1177,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,logical
opt_io_size = 0 (0x0)
discard_granularity = 4294967295 (0xffffffff)
write-cache = "auto"
+ share-rw = false
drive-type = "144"
Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,physical_block_size=512
@@ -1152,6 +1204,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,physica
opt_io_size = 0 (0x0)
discard_granularity = 4294967295 (0xffffffff)
write-cache = "auto"
+ share-rw = false
drive-type = "144"
Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,logical_block_size=4096
diff --git a/tests/tco-test.c b/tests/tco-test.c
index ef02ec5903..c4c264eb3d 100644
--- a/tests/tco-test.c
+++ b/tests/tco-test.c
@@ -42,11 +42,18 @@ typedef struct {
bool noreboot;
QPCIDevice *dev;
QPCIBar tco_io_bar;
+ QPCIBus *bus;
} TestData;
+static void test_end(TestData *d)
+{
+ g_free(d->dev);
+ qpci_free_pc(d->bus);
+ qtest_end();
+}
+
static void test_init(TestData *d)
{
- QPCIBus *bus;
QTestState *qs;
char *s;
@@ -57,8 +64,8 @@ static void test_init(TestData *d)
qtest_irq_intercept_in(qs, "ioapic");
g_free(s);
- bus = qpci_init_pc(NULL);
- d->dev = qpci_device_find(bus, QPCI_DEVFN(0x1f, 0x00));
+ d->bus = qpci_init_pc(NULL);
+ d->dev = qpci_device_find(d->bus, QPCI_DEVFN(0x1f, 0x00));
g_assert(d->dev != NULL);
qpci_device_enable(d->dev);
@@ -148,7 +155,7 @@ static void test_tco_defaults(void)
SW_IRQ_GEN_DEFAULT);
g_assert_cmpint(qpci_io_readw(d.dev, d.tco_io_bar, TCO_TMR), ==,
TCO_TMR_DEFAULT);
- qtest_end();
+ test_end(&d);
}
static void test_tco_timeout(void)
@@ -192,7 +199,7 @@ static void test_tco_timeout(void)
g_assert(ret == 1);
stop_tco(&d);
- qtest_end();
+ test_end(&d);
}
static void test_tco_max_timeout(void)
@@ -225,7 +232,7 @@ static void test_tco_max_timeout(void)
g_assert(ret == 1);
stop_tco(&d);
- qtest_end();
+ test_end(&d);
}
static QDict *get_watchdog_action(void)
@@ -262,7 +269,7 @@ static void test_tco_second_timeout_pause(void)
QDECREF(ad);
stop_tco(&td);
- qtest_end();
+ test_end(&td);
}
static void test_tco_second_timeout_reset(void)
@@ -287,7 +294,7 @@ static void test_tco_second_timeout_reset(void)
QDECREF(ad);
stop_tco(&td);
- qtest_end();
+ test_end(&td);
}
static void test_tco_second_timeout_shutdown(void)
@@ -312,7 +319,7 @@ static void test_tco_second_timeout_shutdown(void)
QDECREF(ad);
stop_tco(&td);
- qtest_end();
+ test_end(&td);
}
static void test_tco_second_timeout_none(void)
@@ -337,7 +344,7 @@ static void test_tco_second_timeout_none(void)
QDECREF(ad);
stop_tco(&td);
- qtest_end();
+ test_end(&td);
}
static void test_tco_ticks_counter(void)
@@ -365,7 +372,7 @@ static void test_tco_ticks_counter(void)
} while (!(qpci_io_readw(d.dev, d.tco_io_bar, TCO1_STS) & TCO_TIMEOUT));
stop_tco(&d);
- qtest_end();
+ test_end(&d);
}
static void test_tco1_control_bits(void)
@@ -383,7 +390,7 @@ static void test_tco1_control_bits(void)
qpci_io_writew(d.dev, d.tco_io_bar, TCO1_CNT, val);
g_assert_cmpint(qpci_io_readw(d.dev, d.tco_io_bar, TCO1_CNT), ==,
TCO_LOCK);
- qtest_end();
+ test_end(&d);
}
static void test_tco1_status_bits(void)
@@ -412,7 +419,7 @@ static void test_tco1_status_bits(void)
g_assert(ret == 1);
qpci_io_writew(d.dev, d.tco_io_bar, TCO1_STS, val);
g_assert_cmpint(qpci_io_readw(d.dev, d.tco_io_bar, TCO1_STS), ==, 0);
- qtest_end();
+ test_end(&d);
}
static void test_tco2_status_bits(void)
@@ -439,7 +446,7 @@ static void test_tco2_status_bits(void)
g_assert(ret == 1);
qpci_io_writew(d.dev, d.tco_io_bar, TCO2_STS, val);
g_assert_cmpint(qpci_io_readw(d.dev, d.tco_io_bar, TCO2_STS), ==, 0);
- qtest_end();
+ test_end(&d);
}
int main(int argc, char **argv)
diff --git a/tests/test-blockjob-txn.c b/tests/test-blockjob-txn.c
index f6dfd08746..4ccbda14af 100644
--- a/tests/test-blockjob-txn.c
+++ b/tests/test-blockjob-txn.c
@@ -101,9 +101,9 @@ static BlockJob *test_block_job_start(unsigned int iterations,
g_assert_nonnull(bs);
snprintf(job_id, sizeof(job_id), "job%u", counter++);
- s = block_job_create(job_id, &test_block_job_driver, bs, 0,
- BLOCK_JOB_DEFAULT, test_block_job_cb,
- data, &error_abort);
+ s = block_job_create(job_id, &test_block_job_driver, bs,
+ 0, BLK_PERM_ALL, 0, BLOCK_JOB_DEFAULT,
+ test_block_job_cb, data, &error_abort);
s->iterations = iterations;
s->use_timer = use_timer;
s->rc = rc;
diff --git a/tests/test-blockjob.c b/tests/test-blockjob.c
index 068c9e419b..740e740398 100644
--- a/tests/test-blockjob.c
+++ b/tests/test-blockjob.c
@@ -30,8 +30,9 @@ static BlockJob *do_test_id(BlockBackend *blk, const char *id,
BlockJob *job;
Error *errp = NULL;
- job = block_job_create(id, &test_block_job_driver, blk_bs(blk), 0,
- BLOCK_JOB_DEFAULT, block_job_cb, NULL, &errp);
+ job = block_job_create(id, &test_block_job_driver, blk_bs(blk),
+ 0, BLK_PERM_ALL, 0, BLOCK_JOB_DEFAULT, block_job_cb,
+ NULL, &errp);
if (should_succeed) {
g_assert_null(errp);
g_assert_nonnull(job);
@@ -53,13 +54,14 @@ static BlockJob *do_test_id(BlockBackend *blk, const char *id,
* BlockDriverState inserted. */
static BlockBackend *create_blk(const char *name)
{
- BlockBackend *blk = blk_new();
+ /* No I/O is performed on this device */
+ BlockBackend *blk = blk_new(0, BLK_PERM_ALL);
BlockDriverState *bs;
bs = bdrv_open("null-co://", NULL, NULL, 0, &error_abort);
g_assert_nonnull(bs);
- blk_insert_bs(blk, bs);
+ blk_insert_bs(blk, bs, &error_abort);
bdrv_unref(bs);
if (name) {
diff --git a/tests/test-filter-mirror.c b/tests/test-filter-mirror.c
index ffaaffabd0..9f84402493 100644
--- a/tests/test-filter-mirror.c
+++ b/tests/test-filter-mirror.c
@@ -57,7 +57,7 @@ static void test_mirror(void)
};
/* send a qmp command to guarantee that 'connected' is setting to true. */
- qmp("{ 'execute' : 'query-status'}");
+ qmp_discard_response("{ 'execute' : 'query-status'}");
ret = iov_send(send_sock[0], iov, 2, 0, sizeof(size) + sizeof(send_buf));
g_assert_cmpint(ret, ==, sizeof(send_buf) + sizeof(size));
close(send_sock[0]);
diff --git a/tests/test-filter-redirector.c b/tests/test-filter-redirector.c
index c63b68f03a..0c4b8d52ef 100644
--- a/tests/test-filter-redirector.c
+++ b/tests/test-filter-redirector.c
@@ -99,7 +99,7 @@ static void test_redirector_tx(void)
g_assert_cmpint(recv_sock, !=, -1);
/* send a qmp command to guarantee that 'connected' is setting to true. */
- qmp("{ 'execute' : 'query-status'}");
+ qmp_discard_response("{ 'execute' : 'query-status'}");
struct iovec iov[] = {
{
@@ -184,7 +184,7 @@ static void test_redirector_rx(void)
send_sock = unix_connect(sock_path1, NULL);
g_assert_cmpint(send_sock, !=, -1);
/* send a qmp command to guarantee that 'connected' is setting to true. */
- qmp("{ 'execute' : 'query-status'}");
+ qmp_discard_response("{ 'execute' : 'query-status'}");
ret = iov_send(send_sock, iov, 2, 0, sizeof(size) + sizeof(send_buf));
g_assert_cmpint(ret, ==, sizeof(send_buf) + sizeof(size));
diff --git a/tests/test-io-channel-command.c b/tests/test-io-channel-command.c
index 1d1f461bed..46ce1ff01c 100644
--- a/tests/test-io-channel-command.c
+++ b/tests/test-io-channel-command.c
@@ -29,8 +29,8 @@ static void test_io_channel_command_fifo(bool async)
#define TEST_FIFO "tests/test-io-channel-command.fifo"
QIOChannel *src, *dst;
QIOChannelTest *test;
- char *srcfifo = g_strdup_printf("PIPE:%s,wronly", TEST_FIFO);
- char *dstfifo = g_strdup_printf("PIPE:%s,rdonly", TEST_FIFO);
+ const char *srcfifo = "PIPE:" TEST_FIFO ",wronly";
+ const char *dstfifo = "PIPE:" TEST_FIFO ",rdonly";
const char *srcargv[] = {
"/bin/socat", "-", srcfifo, NULL,
};
@@ -59,8 +59,6 @@ static void test_io_channel_command_fifo(bool async)
object_unref(OBJECT(src));
object_unref(OBJECT(dst));
- g_free(srcfifo);
- g_free(dstfifo);
unlink(TEST_FIFO);
}
diff --git a/tests/test-throttle.c b/tests/test-throttle.c
index 363b59a38f..bd7c501b2e 100644
--- a/tests/test-throttle.c
+++ b/tests/test-throttle.c
@@ -593,9 +593,10 @@ static void test_groups(void)
BlockBackend *blk1, *blk2, *blk3;
BlockBackendPublic *blkp1, *blkp2, *blkp3;
- blk1 = blk_new();
- blk2 = blk_new();
- blk3 = blk_new();
+ /* No actual I/O is performed on these devices */
+ blk1 = blk_new(0, BLK_PERM_ALL);
+ blk2 = blk_new(0, BLK_PERM_ALL);
+ blk3 = blk_new(0, BLK_PERM_ALL);
blkp1 = blk_get_public(blk1);
blkp2 = blk_get_public(blk2);
diff --git a/tests/test-vmstate.c b/tests/test-vmstate.c
index 39f338a4c4..f694a89782 100644
--- a/tests/test-vmstate.c
+++ b/tests/test-vmstate.c
@@ -476,6 +476,8 @@ const VMStateDescription vmsd_tst = {
}
};
+/* test array migration */
+
#define AR_SIZE 4
typedef struct {
@@ -492,20 +494,22 @@ const VMStateDescription vmsd_arps = {
VMSTATE_END_OF_LIST()
}
};
+
+static uint8_t wire_arr_ptr_no0[] = {
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x01,
+ 0x00, 0x00, 0x00, 0x02,
+ 0x00, 0x00, 0x00, 0x03,
+ QEMU_VM_EOF
+};
+
static void test_arr_ptr_str_no0_save(void)
{
TestStructTriv ar[AR_SIZE] = {{.i = 0}, {.i = 1}, {.i = 2}, {.i = 3} };
TestArrayOfPtrToStuct sample = {.ar = {&ar[0], &ar[1], &ar[2], &ar[3]} };
- uint8_t wire_sample[] = {
- 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x01,
- 0x00, 0x00, 0x00, 0x02,
- 0x00, 0x00, 0x00, 0x03,
- QEMU_VM_EOF
- };
save_vmstate(&vmsd_arps, &sample);
- compare_vmstate(wire_sample, sizeof(wire_sample));
+ compare_vmstate(wire_arr_ptr_no0, sizeof(wire_arr_ptr_no0));
}
static void test_arr_ptr_str_no0_load(void)
@@ -514,21 +518,98 @@ static void test_arr_ptr_str_no0_load(void)
TestStructTriv ar[AR_SIZE] = {};
TestArrayOfPtrToStuct obj = {.ar = {&ar[0], &ar[1], &ar[2], &ar[3]} };
int idx;
- uint8_t wire_sample[] = {
- 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x01,
- 0x00, 0x00, 0x00, 0x02,
- 0x00, 0x00, 0x00, 0x03,
- QEMU_VM_EOF
- };
- save_buffer(wire_sample, sizeof(wire_sample));
+ save_buffer(wire_arr_ptr_no0, sizeof(wire_arr_ptr_no0));
+ SUCCESS(load_vmstate_one(&vmsd_arps, &obj, 1,
+ wire_arr_ptr_no0, sizeof(wire_arr_ptr_no0)));
+ for (idx = 0; idx < AR_SIZE; ++idx) {
+ /* compare the target array ar with the ground truth array ar_gt */
+ g_assert_cmpint(ar_gt[idx].i, ==, ar[idx].i);
+ }
+}
+
+static uint8_t wire_arr_ptr_0[] = {
+ 0x00, 0x00, 0x00, 0x00,
+ VMS_NULLPTR_MARKER,
+ 0x00, 0x00, 0x00, 0x02,
+ 0x00, 0x00, 0x00, 0x03,
+ QEMU_VM_EOF
+};
+
+static void test_arr_ptr_str_0_save(void)
+{
+ TestStructTriv ar[AR_SIZE] = {{.i = 0}, {.i = 1}, {.i = 2}, {.i = 3} };
+ TestArrayOfPtrToStuct sample = {.ar = {&ar[0], NULL, &ar[2], &ar[3]} };
+
+ save_vmstate(&vmsd_arps, &sample);
+ compare_vmstate(wire_arr_ptr_0, sizeof(wire_arr_ptr_0));
+}
+
+static void test_arr_ptr_str_0_load(void)
+{
+ TestStructTriv ar_gt[AR_SIZE] = {{.i = 0}, {.i = 0}, {.i = 2}, {.i = 3} };
+ TestStructTriv ar[AR_SIZE] = {};
+ TestArrayOfPtrToStuct obj = {.ar = {&ar[0], NULL, &ar[2], &ar[3]} };
+ int idx;
+
+ save_buffer(wire_arr_ptr_0, sizeof(wire_arr_ptr_0));
SUCCESS(load_vmstate_one(&vmsd_arps, &obj, 1,
- wire_sample, sizeof(wire_sample)));
+ wire_arr_ptr_0, sizeof(wire_arr_ptr_0)));
for (idx = 0; idx < AR_SIZE; ++idx) {
/* compare the target array ar with the ground truth array ar_gt */
g_assert_cmpint(ar_gt[idx].i, ==, ar[idx].i);
}
+ for (idx = 0; idx < AR_SIZE; ++idx) {
+ if (idx == 1) {
+ g_assert_cmpint((uintptr_t)(obj.ar[idx]), ==, 0);
+ } else {
+ g_assert_cmpint((uintptr_t)(obj.ar[idx]), !=, 0);
+ }
+ }
+}
+
+typedef struct TestArrayOfPtrToInt {
+ int32_t *ar[AR_SIZE];
+} TestArrayOfPtrToInt;
+
+const VMStateDescription vmsd_arpp = {
+ .name = "test/arps",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .fields = (VMStateField[]) {
+ VMSTATE_ARRAY_OF_POINTER(ar, TestArrayOfPtrToInt,
+ AR_SIZE, 0, vmstate_info_int32, int32_t*),
+ VMSTATE_END_OF_LIST()
+ }
+};
+
+static void test_arr_ptr_prim_0_save(void)
+{
+ int32_t ar[AR_SIZE] = {0 , 1, 2, 3};
+ TestArrayOfPtrToInt sample = {.ar = {&ar[0], NULL, &ar[2], &ar[3]} };
+
+ save_vmstate(&vmsd_arpp, &sample);
+ compare_vmstate(wire_arr_ptr_0, sizeof(wire_arr_ptr_0));
+}
+
+static void test_arr_ptr_prim_0_load(void)
+{
+ int32_t ar_gt[AR_SIZE] = {0, 1, 2, 3};
+ int32_t ar[AR_SIZE] = {3 , 42, 1, 0};
+ TestArrayOfPtrToInt obj = {.ar = {&ar[0], NULL, &ar[2], &ar[3]} };
+ int idx;
+
+ save_buffer(wire_arr_ptr_0, sizeof(wire_arr_ptr_0));
+ SUCCESS(load_vmstate_one(&vmsd_arpp, &obj, 1,
+ wire_arr_ptr_0, sizeof(wire_arr_ptr_0)));
+ for (idx = 0; idx < AR_SIZE; ++idx) {
+ /* compare the target array ar with the ground truth array ar_gt */
+ if (idx == 1) {
+ g_assert_cmpint(42, ==, ar[idx]);
+ } else {
+ g_assert_cmpint(ar_gt[idx], ==, ar[idx]);
+ }
+ }
}
/* test QTAILQ migration */
@@ -781,6 +862,13 @@ int main(int argc, char **argv)
test_arr_ptr_str_no0_save);
g_test_add_func("/vmstate/array/ptr/str/no0/load",
test_arr_ptr_str_no0_load);
+ g_test_add_func("/vmstate/array/ptr/str/0/save", test_arr_ptr_str_0_save);
+ g_test_add_func("/vmstate/array/ptr/str/0/load",
+ test_arr_ptr_str_0_load);
+ g_test_add_func("/vmstate/array/ptr/prim/0/save",
+ test_arr_ptr_prim_0_save);
+ g_test_add_func("/vmstate/array/ptr/prim/0/load",
+ test_arr_ptr_prim_0_load);
g_test_add_func("/vmstate/qtailq/save/saveq", test_save_q);
g_test_add_func("/vmstate/qtailq/load/loadq", test_load_q);
g_test_add_func("/vmstate/tmp_struct", test_tmp_struct);
diff --git a/tests/usb-hcd-ehci-test.c b/tests/usb-hcd-ehci-test.c
index 57af8a034e..944eb1c088 100644
--- a/tests/usb-hcd-ehci-test.c
+++ b/tests/usb-hcd-ehci-test.c
@@ -50,11 +50,8 @@ static void ehci_port_test(struct qhc *hc, int port, uint32_t expect)
/* tests */
-static void pci_init(void)
+static void test_init(void)
{
- if (pcibus) {
- return;
- }
pcibus = qpci_init_pc(NULL);
g_assert(pcibus != NULL);
@@ -64,6 +61,15 @@ static void pci_init(void)
qusb_pci_init_one(pcibus, &ehci1, QPCI_DEVFN(0x1d, 7), 0);
}
+static void test_deinit(void)
+{
+ uhci_deinit(&uhci1);
+ uhci_deinit(&uhci2);
+ uhci_deinit(&uhci3);
+ uhci_deinit(&ehci1);
+ qpci_free_pc(pcibus);
+}
+
static void pci_uhci_port_1(void)
{
g_assert(pcibus != NULL);
@@ -142,7 +148,7 @@ int main(int argc, char **argv)
int ret;
g_test_init(&argc, &argv, NULL);
- qtest_add_func("/ehci/pci/init", pci_init);
+
qtest_add_func("/ehci/pci/uhci-port-1", pci_uhci_port_1);
qtest_add_func("/ehci/pci/ehci-port-1", pci_ehci_port_1);
qtest_add_func("/ehci/pci/ehci-config", pci_ehci_config);
@@ -161,7 +167,10 @@ int main(int argc, char **argv)
"-drive if=none,id=usbcdrom,media=cdrom "
"-device usb-tablet,bus=ich9-ehci-1.0,port=1,usb_version=1 "
"-device usb-storage,bus=ich9-ehci-1.0,port=2,drive=usbcdrom ");
+
+ test_init();
ret = g_test_run();
+ test_deinit();
qtest_end();
diff --git a/tests/usb-hcd-uhci-test.c b/tests/usb-hcd-uhci-test.c
index e956b9ccb7..f25bae5e6c 100644
--- a/tests/usb-hcd-uhci-test.c
+++ b/tests/usb-hcd-uhci-test.c
@@ -28,6 +28,7 @@ static void test_port(int port)
g_assert(port > 0);
qusb_pci_init_one(qs->pcibus, &uhci, QPCI_DEVFN(0x1d, 0), 4);
uhci_port_test(&uhci, port - 1, UHCI_PORT_CCS);
+ uhci_deinit(&uhci);
}
static void test_port_1(void)
diff --git a/tests/vhost-user-test.c b/tests/vhost-user-test.c
index 2c45c7b29f..a61896c32d 100644
--- a/tests/vhost-user-test.c
+++ b/tests/vhost-user-test.c
@@ -139,6 +139,7 @@ enum {
};
typedef struct TestServer {
+ QPCIBus *bus;
gchar *socket_path;
gchar *mig_path;
gchar *chr_name;
@@ -160,14 +161,13 @@ static const char *root;
static void init_virtio_dev(TestServer *s)
{
- QPCIBus *bus;
QVirtioPCIDevice *dev;
uint32_t features;
- bus = qpci_init_pc(NULL);
- g_assert_nonnull(bus);
+ s->bus = qpci_init_pc(NULL);
+ g_assert_nonnull(s->bus);
- dev = qvirtio_pci_device_find(bus, VIRTIO_ID_NET);
+ dev = qvirtio_pci_device_find(s->bus, VIRTIO_ID_NET);
g_assert_nonnull(dev);
qvirtio_pci_device_enable(dev);
@@ -180,6 +180,7 @@ static void init_virtio_dev(TestServer *s)
qvirtio_set_features(&dev->vdev, features);
qvirtio_set_driver_ok(&dev->vdev);
+ qvirtio_pci_device_free(dev);
}
static void wait_for_fds(TestServer *s)
@@ -507,6 +508,8 @@ static gboolean _test_server_free(TestServer *server)
g_free(server->mig_path);
g_free(server->chr_name);
+ qpci_free_pc(server->bus);
+
g_free(server);
return FALSE;
diff --git a/tests/virtio-9p-test.c b/tests/virtio-9p-test.c
index 9556291567..43a1ad813f 100644
--- a/tests/virtio-9p-test.c
+++ b/tests/virtio-9p-test.c
@@ -80,7 +80,7 @@ static void qvirtio_9p_pci_stop(QVirtIO9P *v9p)
{
qvirtqueue_cleanup(v9p->dev->bus, v9p->vq, v9p->qs->alloc);
qvirtio_pci_device_disable(container_of(v9p->dev, QVirtioPCIDevice, vdev));
- g_free(v9p->dev);
+ qvirtio_pci_device_free((QVirtioPCIDevice *)v9p->dev);
qvirtio_9p_stop(v9p);
}
diff --git a/tests/virtio-blk-test.c b/tests/virtio-blk-test.c
index 0e32e416dd..1eee95df49 100644
--- a/tests/virtio-blk-test.c
+++ b/tests/virtio-blk-test.c
@@ -108,7 +108,7 @@ static QVirtioPCIDevice *virtio_blk_pci_init(QPCIBus *bus, int slot)
{
QVirtioPCIDevice *dev;
- dev = qvirtio_pci_device_find(bus, VIRTIO_ID_BLOCK);
+ dev = qvirtio_pci_device_find_slot(bus, VIRTIO_ID_BLOCK, slot);
g_assert(dev != NULL);
g_assert_cmphex(dev->vdev.device_type, ==, VIRTIO_ID_BLOCK);
g_assert_cmphex(dev->pdev->devfn, ==, ((slot << 3) | PCI_FN));
@@ -296,7 +296,7 @@ static void pci_basic(void)
/* End test */
qvirtqueue_cleanup(dev->vdev.bus, &vqpci->vq, qs->alloc);
qvirtio_pci_device_disable(dev);
- g_free(dev);
+ qvirtio_pci_device_free(dev);
qtest_shutdown(qs);
}
@@ -389,7 +389,7 @@ static void pci_indirect(void)
/* End test */
qvirtqueue_cleanup(dev->vdev.bus, &vqpci->vq, qs->alloc);
qvirtio_pci_device_disable(dev);
- g_free(dev);
+ qvirtio_pci_device_free(dev);
qtest_shutdown(qs);
}
@@ -409,15 +409,16 @@ static void pci_config(void)
qvirtio_set_driver_ok(&dev->vdev);
- qmp("{ 'execute': 'block_resize', 'arguments': { 'device': 'drive0', "
- " 'size': %d } }", n_size);
+ qmp_discard_response("{ 'execute': 'block_resize', "
+ " 'arguments': { 'device': 'drive0', "
+ " 'size': %d } }", n_size);
qvirtio_wait_config_isr(&dev->vdev, QVIRTIO_BLK_TIMEOUT_US);
capacity = qvirtio_config_readq(&dev->vdev, 0);
g_assert_cmpint(capacity, ==, n_size / 512);
qvirtio_pci_device_disable(dev);
- g_free(dev);
+ qvirtio_pci_device_free(dev);
qtest_shutdown(qs);
}
@@ -458,8 +459,9 @@ static void pci_msix(void)
qvirtio_set_driver_ok(&dev->vdev);
- qmp("{ 'execute': 'block_resize', 'arguments': { 'device': 'drive0', "
- " 'size': %d } }", n_size);
+ qmp_discard_response("{ 'execute': 'block_resize', "
+ " 'arguments': { 'device': 'drive0', "
+ " 'size': %d } }", n_size);
qvirtio_wait_config_isr(&dev->vdev, QVIRTIO_BLK_TIMEOUT_US);
@@ -524,7 +526,7 @@ static void pci_msix(void)
qvirtqueue_cleanup(dev->vdev.bus, &vqpci->vq, qs->alloc);
qpci_msix_disable(dev->pdev);
qvirtio_pci_device_disable(dev);
- g_free(dev);
+ qvirtio_pci_device_free(dev);
qtest_shutdown(qs);
}
@@ -640,7 +642,7 @@ static void pci_idx(void)
qvirtqueue_cleanup(dev->vdev.bus, &vqpci->vq, qs->alloc);
qpci_msix_disable(dev->pdev);
qvirtio_pci_device_disable(dev);
- g_free(dev);
+ qvirtio_pci_device_free(dev);
qtest_shutdown(qs);
}
@@ -659,7 +661,7 @@ static void pci_hotplug(void)
dev = virtio_blk_pci_init(qs->pcibus, PCI_SLOT_HP);
g_assert(dev);
qvirtio_pci_device_disable(dev);
- g_free(dev);
+ qvirtio_pci_device_free(dev);
/* unplug secondary disk */
if (strcmp(arch, "i386") == 0 || strcmp(arch, "x86_64") == 0) {
@@ -691,8 +693,9 @@ static void mmio_basic(void)
test_basic(&dev->vdev, alloc, vq);
- qmp("{ 'execute': 'block_resize', 'arguments': { 'device': 'drive0', "
- " 'size': %d } }", n_size);
+ qmp_discard_response("{ 'execute': 'block_resize', "
+ " 'arguments': { 'device': 'drive0', "
+ " 'size': %d } }", n_size);
qvirtio_wait_queue_isr(&dev->vdev, vq, QVIRTIO_BLK_TIMEOUT_US);
diff --git a/tests/virtio-scsi-test.c b/tests/virtio-scsi-test.c
index 69220ef07b..0eabd56fd9 100644
--- a/tests/virtio-scsi-test.c
+++ b/tests/virtio-scsi-test.c
@@ -63,7 +63,7 @@ static void qvirtio_scsi_pci_free(QVirtIOSCSI *vs)
qvirtqueue_cleanup(vs->dev->bus, vs->vq[i], vs->qs->alloc);
}
qvirtio_pci_device_disable(container_of(vs->dev, QVirtioPCIDevice, vdev));
- g_free(vs->dev);
+ qvirtio_pci_device_free((QVirtioPCIDevice *)vs->dev);
qvirtio_scsi_stop(vs->qs);
g_free(vs);
}
diff --git a/util/qemu-option.c b/util/qemu-option.c
index 419f2528b8..5ce1b5c246 100644
--- a/util/qemu-option.c
+++ b/util/qemu-option.c
@@ -179,7 +179,7 @@ void parse_option_size(const char *name, const char *value,
err = qemu_strtosz(value, NULL, &size);
if (err == -ERANGE) {
- error_setg(errp, "Value '%s' is too large for parameter '%s'",
+ error_setg(errp, "Value '%s' is out of range for parameter '%s'",
value, name);
return;
}
diff --git a/util/qemu-timer.c b/util/qemu-timer.c
index ff620ecff7..6cf70b96f6 100644
--- a/util/qemu-timer.c
+++ b/util/qemu-timer.c
@@ -355,11 +355,6 @@ void timer_deinit(QEMUTimer *ts)
ts->timer_list = NULL;
}
-void timer_free(QEMUTimer *ts)
-{
- g_free(ts);
-}
-
static void timer_del_locked(QEMUTimerList *timer_list, QEMUTimer *ts)
{
QEMUTimer **pt, *t;