summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitignore4
-rw-r--r--MAINTAINERS6
-rw-r--r--Makefile6
-rw-r--r--Makefile.objs7
-rw-r--r--Makefile.target15
-rw-r--r--block-migration.c9
-rw-r--r--block.c140
-rw-r--r--block/Makefile.objs2
-rw-r--r--block/archipelago.c1069
-rw-r--r--block/bochs.c6
-rw-r--r--block/cloop.c23
-rw-r--r--block/curl.c8
-rw-r--r--block/dmg.c19
-rw-r--r--block/iscsi.c5
-rw-r--r--block/mirror.c7
-rw-r--r--block/nfs.c6
-rw-r--r--block/parallels.c6
-rw-r--r--block/qapi.c42
-rw-r--r--block/qcow.c33
-rw-r--r--block/qcow2-cache.c13
-rw-r--r--block/qcow2-cluster.c47
-rw-r--r--block/qcow2-refcount.c54
-rw-r--r--block/qcow2-snapshot.c23
-rw-r--r--block/qcow2.c45
-rw-r--r--block/qed-check.c7
-rw-r--r--block/qed.c6
-rw-r--r--block/raw-posix.c6
-rw-r--r--block/rbd.c7
-rw-r--r--block/vdi.c112
-rw-r--r--block/vhdx-endian.c11
-rw-r--r--block/vhdx-log.c55
-rw-r--r--block/vhdx.c98
-rw-r--r--block/vhdx.h1
-rw-r--r--block/vmdk.c239
-rw-r--r--block/vpc.c112
-rw-r--r--block/win32-aio.c6
-rwxr-xr-xconfigure52
-rw-r--r--cpu-exec.c6
-rw-r--r--docs/multiple-iothreads.txt134
-rw-r--r--docs/specs/qcow2.txt12
-rw-r--r--docs/tracing.txt40
-rw-r--r--hw/audio/intel-hda.c4
-rw-r--r--hw/block/xen_disk.c1
-rw-r--r--hw/i386/acpi-build.c2
-rw-r--r--hw/i386/acpi-dsdt.dsl4
-rw-r--r--hw/i386/acpi-dsdt.hex.generated8
-rw-r--r--hw/i386/pc.c30
-rw-r--r--hw/i386/pc_piix.c23
-rw-r--r--hw/i386/pc_q35.c20
-rw-r--r--hw/i386/q35-acpi-dsdt.dsl4
-rw-r--r--hw/i386/ssdt-mem.dsl16
-rw-r--r--hw/i386/ssdt-misc.dsl2
-rw-r--r--hw/mem/pc-dimm.c6
-rw-r--r--hw/misc/ivshmem.c4
-rw-r--r--hw/net/e1000.c60
-rw-r--r--hw/pci/msi.c2
-rw-r--r--hw/pci/msix.c2
-rw-r--r--hw/virtio/virtio-rng.c6
-rw-r--r--include/block/block.h2
-rw-r--r--include/block/coroutine.h11
-rw-r--r--include/exec/helper-gen.h2
-rw-r--r--include/exec/helper-proto.h1
-rw-r--r--include/exec/helper-tcg.h1
-rw-r--r--include/hw/acpi/pc-hotplug.h2
-rw-r--r--include/hw/i386/pc.h9
-rw-r--r--include/qemu/osdep.h1
-rw-r--r--include/trace-tcg.h7
-rw-r--r--include/trace.h1
-rw-r--r--numa.c4
-rw-r--r--qapi/block-core.json40
-rw-r--r--qdev-monitor.c40
-rw-r--r--qemu-coroutine.c26
-rw-r--r--qemu-img.c98
-rw-r--r--qmp.c1
-rwxr-xr-xscripts/simpletrace.py24
-rw-r--r--scripts/tracetool/__init__.py105
-rw-r--r--scripts/tracetool/format/events_h.py5
-rw-r--r--scripts/tracetool/format/simpletrace_stap.py71
-rw-r--r--scripts/tracetool/format/stap.py11
-rw-r--r--scripts/tracetool/format/tcg_h.py57
-rw-r--r--scripts/tracetool/format/tcg_helper_c.py50
-rw-r--r--scripts/tracetool/format/tcg_helper_h.py50
-rw-r--r--scripts/tracetool/format/tcg_helper_wrapper_h.py70
-rw-r--r--scripts/tracetool/format/ust_events_h.py15
-rw-r--r--scripts/tracetool/transform.py166
-rw-r--r--target-alpha/translate.c3
-rw-r--r--target-arm/translate-a64.c2
-rw-r--r--target-arm/translate.c3
-rw-r--r--target-cris/translate.c3
-rw-r--r--target-i386/translate.c3
-rw-r--r--target-lm32/translate.c3
-rw-r--r--target-m68k/translate.c3
-rw-r--r--target-microblaze/translate.c3
-rw-r--r--target-mips/translate.c3
-rw-r--r--target-openrisc/translate.c3
-rw-r--r--target-ppc/translate.c3
-rw-r--r--target-s390x/translate.c2
-rw-r--r--target-sh4/translate.c3
-rw-r--r--target-sparc/translate.c3
-rw-r--r--target-unicore32/translate.c3
-rw-r--r--target-xtensa/translate.c3
-rwxr-xr-xtests/qemu-iotests/0594
-rw-r--r--tests/qemu-iotests/059.out202
-rwxr-xr-xtests/qemu-iotests/0609
-rw-r--r--tests/qemu-iotests/060.out8
-rwxr-xr-xtests/qemu-iotests/08416
-rw-r--r--tests/qemu-iotests/084.out14
-rw-r--r--tests/qemu-iotests/common6
-rw-r--r--tests/qemu-iotests/common.rc9
-rw-r--r--tests/qemu-iotests/sample_images/iotest-version3.vmdk.bz2bin414 -> 4764 bytes
-rw-r--r--tests/test-coroutine.c24
-rw-r--r--thread-pool.c27
-rw-r--r--trace-events14
-rw-r--r--trace/Makefile.objs53
-rw-r--r--translate-all.c3
-rw-r--r--util/oslib-posix.c16
-rw-r--r--util/oslib-win32.c9
117 files changed, 3428 insertions, 595 deletions
diff --git a/.gitignore b/.gitignore
index 2286d0a109..e32a58417a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -11,6 +11,10 @@
/trace/generated-tracers.dtrace
/trace/generated-events.h
/trace/generated-events.c
+/trace/generated-helpers-wrappers.h
+/trace/generated-helpers.h
+/trace/generated-helpers.c
+/trace/generated-tcg-tracers.h
/trace/generated-ust-provider.h
/trace/generated-ust.c
/libcacard/trace/generated-tracers.c
diff --git a/MAINTAINERS b/MAINTAINERS
index 906f252477..59940f97ad 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1000,3 +1000,9 @@ SSH
M: Richard W.M. Jones <rjones@redhat.com>
S: Supported
F: block/ssh.c
+
+ARCHIPELAGO
+M: Chrysostomos Nanakos <cnanakos@grnet.gr>
+M: Chrysostomos Nanakos <chris@include.gr>
+S: Maintained
+F: block/archipelago.c
diff --git a/Makefile b/Makefile
index c5751b879a..b33aaacde6 100644
--- a/Makefile
+++ b/Makefile
@@ -57,6 +57,12 @@ GENERATED_HEADERS += trace/generated-tracers-dtrace.h
endif
GENERATED_SOURCES += trace/generated-tracers.c
+GENERATED_HEADERS += trace/generated-tcg-tracers.h
+
+GENERATED_HEADERS += trace/generated-helpers-wrappers.h
+GENERATED_HEADERS += trace/generated-helpers.h
+GENERATED_SOURCES += trace/generated-helpers.c
+
ifeq ($(findstring ust,$(TRACE_BACKENDS)),ust)
GENERATED_HEADERS += trace/generated-ust-provider.h
GENERATED_SOURCES += trace/generated-ust.c
diff --git a/Makefile.objs b/Makefile.objs
index efd9768e86..97db978d16 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -1,7 +1,7 @@
#######################################################################
# Common libraries for tools and emulators
stub-obj-y = stubs/
-util-obj-y = util/ qobject/ qapi/ trace/ qapi-types.o qapi-visit.o qapi-event.o
+util-obj-y = util/ qobject/ qapi/ qapi-types.o qapi-visit.o qapi-event.o
#######################################################################
# block-obj-y is code used by both qemu system emulation and qemu-img
@@ -101,6 +101,11 @@ version-obj-$(CONFIG_WIN32) += $(BUILD_DIR)/version.o
version-lobj-$(CONFIG_WIN32) += $(BUILD_DIR)/version.lo
######################################################################
+# tracing
+util-obj-y += trace/
+target-obj-y += trace/
+
+######################################################################
# guest agent
# FIXME: a few definitions from qapi-types.o/qapi-visit.o are needed
diff --git a/Makefile.target b/Makefile.target
index 137d0b0517..1e8d7abcb3 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -38,7 +38,7 @@ config-target.h: config-target.h-timestamp
config-target.h-timestamp: config-target.mak
ifdef CONFIG_TRACE_SYSTEMTAP
-stap: $(QEMU_PROG).stp-installed $(QEMU_PROG).stp
+stap: $(QEMU_PROG).stp-installed $(QEMU_PROG).stp $(QEMU_PROG)-simpletrace.stp
ifdef CONFIG_USER_ONLY
TARGET_TYPE=user
@@ -64,6 +64,13 @@ $(QEMU_PROG).stp: $(SRC_PATH)/trace-events
--target-type=$(TARGET_TYPE) \
< $< > $@," GEN $(TARGET_DIR)$(QEMU_PROG).stp")
+$(QEMU_PROG)-simpletrace.stp: $(SRC_PATH)/trace-events
+ $(call quiet-command,$(TRACETOOL) \
+ --format=simpletrace-stap \
+ --backends=$(TRACE_BACKENDS) \
+ --probe-prefix=qemu.$(TARGET_TYPE).$(TARGET_NAME) \
+ < $< > $@," GEN $(TARGET_DIR)$(QEMU_PROG)-simpletrace.stp")
+
else
stap:
endif
@@ -152,15 +159,20 @@ endif # CONFIG_SOFTMMU
dummy := $(call unnest-vars,,obj-y)
all-obj-y := $(obj-y)
+target-obj-y :=
block-obj-y :=
common-obj-y :=
include $(SRC_PATH)/Makefile.objs
+dummy := $(call unnest-vars,,target-obj-y)
+target-obj-y-save := $(target-obj-y)
dummy := $(call unnest-vars,.., \
block-obj-y \
block-obj-m \
common-obj-y \
common-obj-m)
+target-obj-y := $(target-obj-y-save)
all-obj-y += $(common-obj-y)
+all-obj-y += $(target-obj-y)
all-obj-$(CONFIG_SOFTMMU) += $(block-obj-y)
# build either PROG or PROGW
@@ -191,6 +203,7 @@ endif
ifdef CONFIG_TRACE_SYSTEMTAP
$(INSTALL_DIR) "$(DESTDIR)$(qemu_datadir)/../systemtap/tapset"
$(INSTALL_DATA) $(QEMU_PROG).stp-installed "$(DESTDIR)$(qemu_datadir)/../systemtap/tapset/$(QEMU_PROG).stp"
+ $(INSTALL_DATA) $(QEMU_PROG)-simpletrace.stp "$(DESTDIR)$(qemu_datadir)/../systemtap/tapset/$(QEMU_PROG)-simpletrace.stp"
endif
GENERATED_HEADERS += config-target.h
diff --git a/block-migration.c b/block-migration.c
index 73cdd07e8c..ba3ed36f77 100644
--- a/block-migration.c
+++ b/block-migration.c
@@ -186,7 +186,7 @@ static int bmds_aio_inflight(BlkMigDevState *bmds, int64_t sector)
{
int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
- if ((sector << BDRV_SECTOR_BITS) < bdrv_getlength(bmds->bs)) {
+ if (sector < bdrv_nb_sectors(bmds->bs)) {
return !!(bmds->aio_bitmap[chunk / (sizeof(unsigned long) * 8)] &
(1UL << (chunk % (sizeof(unsigned long) * 8))));
} else {
@@ -223,8 +223,7 @@ static void alloc_aio_bitmap(BlkMigDevState *bmds)
BlockDriverState *bs = bmds->bs;
int64_t bitmap_size;
- bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) +
- BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1;
+ bitmap_size = bdrv_nb_sectors(bs) + BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1;
bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * 8;
bmds->aio_bitmap = g_malloc0(bitmap_size);
@@ -350,7 +349,7 @@ static void init_blk_migration_it(void *opaque, BlockDriverState *bs)
int64_t sectors;
if (!bdrv_is_read_only(bs)) {
- sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
+ sectors = bdrv_nb_sectors(bs);
if (sectors <= 0) {
return;
}
@@ -799,7 +798,7 @@ static int block_load(QEMUFile *f, void *opaque, int version_id)
if (bs != bs_prev) {
bs_prev = bs;
- total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
+ total_sectors = bdrv_nb_sectors(bs);
if (total_sectors <= 0) {
error_report("Error getting length of block device %s",
device_name);
diff --git a/block.c b/block.c
index 8cf519ba3a..6fa0201074 100644
--- a/block.c
+++ b/block.c
@@ -57,6 +57,8 @@ struct BdrvDirtyBitmap {
#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
+#define COROUTINE_POOL_RESERVATION 64 /* number of coroutines to reserve */
+
static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load);
static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
@@ -701,6 +703,7 @@ static int find_image_format(BlockDriverState *bs, const char *filename,
/**
* Set the current 'total_sectors' value
+ * Return 0 on success, -errno on error.
*/
static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
{
@@ -1313,7 +1316,6 @@ int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp)
error_setg_errno(errp, -total_size, "Could not get image size");
goto out;
}
- total_size &= BDRV_SECTOR_MASK;
/* Create the temporary image */
ret = get_tmp_filename(tmp_filename, PATH_MAX + 1);
@@ -2107,6 +2109,9 @@ int bdrv_attach_dev(BlockDriverState *bs, void *dev)
}
bs->dev = dev;
bdrv_iostatus_reset(bs);
+
+ /* We're expecting I/O from the device so bump up coroutine pool size */
+ qemu_coroutine_adjust_pool_size(COROUTINE_POOL_RESERVATION);
return 0;
}
@@ -2126,6 +2131,7 @@ void bdrv_detach_dev(BlockDriverState *bs, void *dev)
bs->dev_ops = NULL;
bs->dev_opaque = NULL;
bs->guest_block_size = 512;
+ qemu_coroutine_adjust_pool_size(-COROUTINE_POOL_RESERVATION);
}
/* TODO change to return DeviceState * when all users are qdevified */
@@ -2203,6 +2209,9 @@ bool bdrv_dev_is_medium_locked(BlockDriverState *bs)
*/
int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix)
{
+ if (bs->drv == NULL) {
+ return -ENOMEDIUM;
+ }
if (bs->drv->bdrv_check == NULL) {
return -ENOTSUP;
}
@@ -2269,7 +2278,14 @@ int bdrv_commit(BlockDriverState *bs)
}
total_sectors = length >> BDRV_SECTOR_BITS;
- buf = g_malloc(COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
+
+ /* qemu_try_blockalign() for bs will choose an alignment that works for
+ * bs->backing_hd as well, so no need to compare the alignment manually. */
+ buf = qemu_try_blockalign(bs, COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
+ if (buf == NULL) {
+ ret = -ENOMEM;
+ goto ro_cleanup;
+ }
for (sector = 0; sector < total_sectors; sector += n) {
ret = bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n);
@@ -2307,7 +2323,7 @@ int bdrv_commit(BlockDriverState *bs)
ret = 0;
ro_cleanup:
- g_free(buf);
+ qemu_vfree(buf);
if (ro) {
/* ignoring error return here */
@@ -2827,18 +2843,16 @@ int bdrv_write_zeroes(BlockDriverState *bs, int64_t sector_num,
*/
int bdrv_make_zero(BlockDriverState *bs, BdrvRequestFlags flags)
{
- int64_t target_size;
- int64_t ret, nb_sectors, sector_num = 0;
+ int64_t target_sectors, ret, nb_sectors, sector_num = 0;
int n;
- target_size = bdrv_getlength(bs);
- if (target_size < 0) {
- return target_size;
+ target_sectors = bdrv_nb_sectors(bs);
+ if (target_sectors < 0) {
+ return target_sectors;
}
- target_size /= BDRV_SECTOR_SIZE;
for (;;) {
- nb_sectors = target_size - sector_num;
+ nb_sectors = target_sectors - sector_num;
if (nb_sectors <= 0) {
return 0;
}
@@ -2968,7 +2982,12 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
cluster_sector_num, cluster_nb_sectors);
iov.iov_len = cluster_nb_sectors * BDRV_SECTOR_SIZE;
- iov.iov_base = bounce_buffer = qemu_blockalign(bs, iov.iov_len);
+ iov.iov_base = bounce_buffer = qemu_try_blockalign(bs, iov.iov_len);
+ if (bounce_buffer == NULL) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
qemu_iovec_init_external(&bounce_qiov, &iov, 1);
ret = drv->bdrv_co_readv(bs, cluster_sector_num, cluster_nb_sectors,
@@ -3056,15 +3075,14 @@ static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
} else {
/* Read zeros after EOF of growable BDSes */
- int64_t len, total_sectors, max_nb_sectors;
+ int64_t total_sectors, max_nb_sectors;
- len = bdrv_getlength(bs);
- if (len < 0) {
- ret = len;
+ total_sectors = bdrv_nb_sectors(bs);
+ if (total_sectors < 0) {
+ ret = total_sectors;
goto out;
}
- total_sectors = DIV_ROUND_UP(len, BDRV_SECTOR_SIZE);
max_nb_sectors = ROUND_UP(MAX(0, total_sectors - sector_num),
align >> BDRV_SECTOR_BITS);
if (max_nb_sectors > 0) {
@@ -3253,7 +3271,11 @@ static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
/* Fall back to bounce buffer if write zeroes is unsupported */
iov.iov_len = num * BDRV_SECTOR_SIZE;
if (iov.iov_base == NULL) {
- iov.iov_base = qemu_blockalign(bs, num * BDRV_SECTOR_SIZE);
+ iov.iov_base = qemu_try_blockalign(bs, num * BDRV_SECTOR_SIZE);
+ if (iov.iov_base == NULL) {
+ ret = -ENOMEM;
+ goto fail;
+ }
memset(iov.iov_base, 0, num * BDRV_SECTOR_SIZE);
}
qemu_iovec_init_external(&qiov, &iov, 1);
@@ -3273,6 +3295,7 @@ static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
nb_sectors -= num;
}
+fail:
qemu_vfree(iov.iov_base);
return ret;
}
@@ -3536,11 +3559,12 @@ int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
}
/**
- * Length of a file in bytes. Return < 0 if error or unknown.
+ * Return number of sectors on success, -errno on error.
*/
-int64_t bdrv_getlength(BlockDriverState *bs)
+int64_t bdrv_nb_sectors(BlockDriverState *bs)
{
BlockDriver *drv = bs->drv;
+
if (!drv)
return -ENOMEDIUM;
@@ -3550,19 +3574,26 @@ int64_t bdrv_getlength(BlockDriverState *bs)
return ret;
}
}
- return bs->total_sectors * BDRV_SECTOR_SIZE;
+ return bs->total_sectors;
+}
+
+/**
+ * Return length in bytes on success, -errno on error.
+ * The length is always a multiple of BDRV_SECTOR_SIZE.
+ */
+int64_t bdrv_getlength(BlockDriverState *bs)
+{
+ int64_t ret = bdrv_nb_sectors(bs);
+
+ return ret < 0 ? ret : ret * BDRV_SECTOR_SIZE;
}
/* return 0 as number of sectors if no device present or error */
void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
{
- int64_t length;
- length = bdrv_getlength(bs);
- if (length < 0)
- length = 0;
- else
- length = length >> BDRV_SECTOR_BITS;
- *nb_sectors_ptr = length;
+ int64_t nb_sectors = bdrv_nb_sectors(bs);
+
+ *nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors;
}
void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error,
@@ -3945,21 +3976,21 @@ static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
int64_t sector_num,
int nb_sectors, int *pnum)
{
- int64_t length;
+ int64_t total_sectors;
int64_t n;
int64_t ret, ret2;
- length = bdrv_getlength(bs);
- if (length < 0) {
- return length;
+ total_sectors = bdrv_nb_sectors(bs);
+ if (total_sectors < 0) {
+ return total_sectors;
}
- if (sector_num >= (length >> BDRV_SECTOR_BITS)) {
+ if (sector_num >= total_sectors) {
*pnum = 0;
return 0;
}
- n = bs->total_sectors - sector_num;
+ n = total_sectors - sector_num;
if (n < nb_sectors) {
nb_sectors = n;
}
@@ -3994,8 +4025,8 @@ static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
ret |= BDRV_BLOCK_ZERO;
} else if (bs->backing_hd) {
BlockDriverState *bs2 = bs->backing_hd;
- int64_t length2 = bdrv_getlength(bs2);
- if (length2 >= 0 && sector_num >= (length2 >> BDRV_SECTOR_BITS)) {
+ int64_t nb_sectors2 = bdrv_nb_sectors(bs2);
+ if (nb_sectors2 >= 0 && sector_num >= nb_sectors2) {
ret |= BDRV_BLOCK_ZERO;
}
}
@@ -4607,8 +4638,9 @@ static void bdrv_aio_bh_cb(void *opaque)
{
BlockDriverAIOCBSync *acb = opaque;
- if (!acb->is_write)
+ if (!acb->is_write && acb->ret >= 0) {
qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
+ }
qemu_vfree(acb->bounce);
acb->common.cb(acb->common.opaque, acb->ret);
qemu_bh_delete(acb->bh);
@@ -4630,10 +4662,12 @@ static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
acb = qemu_aio_get(&bdrv_em_aiocb_info, bs, cb, opaque);
acb->is_write = is_write;
acb->qiov = qiov;
- acb->bounce = qemu_blockalign(bs, qiov->size);
+ acb->bounce = qemu_try_blockalign(bs, qiov->size);
acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_aio_bh_cb, acb);
- if (is_write) {
+ if (acb->bounce == NULL) {
+ acb->ret = -ENOMEM;
+ } else if (is_write) {
qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size);
acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
} else {
@@ -5247,6 +5281,19 @@ void *qemu_blockalign(BlockDriverState *bs, size_t size)
return qemu_memalign(bdrv_opt_mem_align(bs), size);
}
+void *qemu_try_blockalign(BlockDriverState *bs, size_t size)
+{
+ size_t align = bdrv_opt_mem_align(bs);
+
+ /* Ensure that NULL is never returned on success */
+ assert(align > 0);
+ if (size == 0) {
+ size = align;
+ }
+
+ return qemu_try_memalign(align, size);
+}
+
/*
* Check if all memory in this vector is sector aligned.
*/
@@ -5277,13 +5324,12 @@ BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, int granularity,
granularity >>= BDRV_SECTOR_BITS;
assert(granularity);
- bitmap_size = bdrv_getlength(bs);
+ bitmap_size = bdrv_nb_sectors(bs);
if (bitmap_size < 0) {
error_setg_errno(errp, -bitmap_size, "could not get length of device");
errno = -bitmap_size;
return NULL;
}
- bitmap_size >>= BDRV_SECTOR_BITS;
bitmap = g_malloc0(sizeof(BdrvDirtyBitmap));
bitmap->bitmap = hbitmap_alloc(bitmap_size, ffs(granularity) - 1);
QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list);
@@ -5371,6 +5417,9 @@ void bdrv_ref(BlockDriverState *bs)
* deleted. */
void bdrv_unref(BlockDriverState *bs)
{
+ if (!bs) {
+ return;
+ }
assert(bs->refcnt > 0);
if (--bs->refcnt == 0) {
bdrv_delete(bs);
@@ -5591,7 +5640,7 @@ void bdrv_img_create(const char *filename, const char *fmt,
if (size == -1) {
if (backing_file) {
BlockDriverState *bs;
- uint64_t size;
+ int64_t size;
int back_flags;
/* backing files always opened read-only */
@@ -5609,8 +5658,13 @@ void bdrv_img_create(const char *filename, const char *fmt,
local_err = NULL;
goto out;
}
- bdrv_get_geometry(bs, &size);
- size *= 512;
+ size = bdrv_getlength(bs);
+ if (size < 0) {
+ error_setg_errno(errp, -size, "Could not get size of '%s'",
+ backing_file);
+ bdrv_unref(bs);
+ goto out;
+ }
qemu_opt_set_number(opts, BLOCK_OPT_SIZE, size);
diff --git a/block/Makefile.objs b/block/Makefile.objs
index fd88c03ece..858d2b387b 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -17,6 +17,7 @@ block-obj-$(CONFIG_LIBNFS) += nfs.o
block-obj-$(CONFIG_CURL) += curl.o
block-obj-$(CONFIG_RBD) += rbd.o
block-obj-$(CONFIG_GLUSTERFS) += gluster.o
+block-obj-$(CONFIG_ARCHIPELAGO) += archipelago.o
block-obj-$(CONFIG_LIBSSH2) += ssh.o
endif
@@ -35,5 +36,6 @@ gluster.o-cflags := $(GLUSTERFS_CFLAGS)
gluster.o-libs := $(GLUSTERFS_LIBS)
ssh.o-cflags := $(LIBSSH2_CFLAGS)
ssh.o-libs := $(LIBSSH2_LIBS)
+archipelago.o-libs := $(ARCHIPELAGO_LIBS)
qcow.o-libs := -lz
linux-aio.o-libs := -laio
diff --git a/block/archipelago.c b/block/archipelago.c
new file mode 100644
index 0000000000..6629d03a1d
--- /dev/null
+++ b/block/archipelago.c
@@ -0,0 +1,1069 @@
+/*
+ * QEMU Block driver for Archipelago
+ *
+ * Copyright (C) 2014 Chrysostomos Nanakos <cnanakos@grnet.gr>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+/*
+ * VM Image on Archipelago volume is specified like this:
+ *
+ * file.driver=archipelago,file.volume=<volumename>
+ * [,file.mport=<mapperd_port>[,file.vport=<vlmcd_port>]
+ * [,file.segment=<segment_name>]]
+ *
+ * or
+ *
+ * file=archipelago:<volumename>[/mport=<mapperd_port>[:vport=<vlmcd_port>][:
+ * segment=<segment_name>]]
+ *
+ * 'archipelago' is the protocol.
+ *
+ * 'mport' is the port number on which mapperd is listening. This is optional
+ * and if not specified, QEMU will make Archipelago to use the default port.
+ *
+ * 'vport' is the port number on which vlmcd is listening. This is optional
+ * and if not specified, QEMU will make Archipelago to use the default port.
+ *
+ * 'segment' is the name of the shared memory segment Archipelago stack
+ * is using. This is optional and if not specified, QEMU will make Archipelago
+ * to use the default value, 'archipelago'.
+ *
+ * Examples:
+ *
+ * file.driver=archipelago,file.volume=my_vm_volume
+ * file.driver=archipelago,file.volume=my_vm_volume,file.mport=123
+ * file.driver=archipelago,file.volume=my_vm_volume,file.mport=123,
+ * file.vport=1234
+ * file.driver=archipelago,file.volume=my_vm_volume,file.mport=123,
+ * file.vport=1234,file.segment=my_segment
+ *
+ * or
+ *
+ * file=archipelago:my_vm_volume
+ * file=archipelago:my_vm_volume/mport=123
+ * file=archipelago:my_vm_volume/mport=123:vport=1234
+ * file=archipelago:my_vm_volume/mport=123:vport=1234:segment=my_segment
+ *
+ */
+
+#include "qemu-common.h"
+#include "block/block_int.h"
+#include "qemu/error-report.h"
+#include "qemu/thread.h"
+#include "qapi/qmp/qint.h"
+#include "qapi/qmp/qstring.h"
+#include "qapi/qmp/qjson.h"
+
+#include <inttypes.h>
+#include <xseg/xseg.h>
+#include <xseg/protocol.h>
+
+#define ARCHIP_FD_READ 0
+#define ARCHIP_FD_WRITE 1
+#define MAX_REQUEST_SIZE 524288
+
+#define ARCHIPELAGO_OPT_VOLUME "volume"
+#define ARCHIPELAGO_OPT_SEGMENT "segment"
+#define ARCHIPELAGO_OPT_MPORT "mport"
+#define ARCHIPELAGO_OPT_VPORT "vport"
+#define ARCHIPELAGO_DFL_MPORT 1001
+#define ARCHIPELAGO_DFL_VPORT 501
+
+#define archipelagolog(fmt, ...) \
+ do { \
+ fprintf(stderr, "archipelago\t%-24s: " fmt, __func__, ##__VA_ARGS__); \
+ } while (0)
+
+typedef enum {
+ ARCHIP_OP_READ,
+ ARCHIP_OP_WRITE,
+ ARCHIP_OP_FLUSH,
+ ARCHIP_OP_VOLINFO,
+} ARCHIPCmd;
+
+typedef struct ArchipelagoAIOCB {
+ BlockDriverAIOCB common;
+ QEMUBH *bh;
+ struct BDRVArchipelagoState *s;
+ QEMUIOVector *qiov;
+ ARCHIPCmd cmd;
+ bool cancelled;
+ int status;
+ int64_t size;
+ int64_t ret;
+} ArchipelagoAIOCB;
+
+typedef struct BDRVArchipelagoState {
+ ArchipelagoAIOCB *event_acb;
+ char *volname;
+ char *segment_name;
+ uint64_t size;
+ /* Archipelago specific */
+ struct xseg *xseg;
+ struct xseg_port *port;
+ xport srcport;
+ xport sport;
+ xport mportno;
+ xport vportno;
+ QemuMutex archip_mutex;
+ QemuCond archip_cond;
+ bool is_signaled;
+ /* Request handler specific */
+ QemuThread request_th;
+ QemuCond request_cond;
+ QemuMutex request_mutex;
+ bool th_is_signaled;
+ bool stopping;
+} BDRVArchipelagoState;
+
+typedef struct ArchipelagoSegmentedRequest {
+ size_t count;
+ size_t total;
+ int ref;
+ int failed;
+} ArchipelagoSegmentedRequest;
+
+typedef struct AIORequestData {
+ const char *volname;
+ off_t offset;
+ size_t size;
+ uint64_t bufidx;
+ int ret;
+ int op;
+ ArchipelagoAIOCB *aio_cb;
+ ArchipelagoSegmentedRequest *segreq;
+} AIORequestData;
+
+static void qemu_archipelago_complete_aio(void *opaque);
+
+static void init_local_signal(struct xseg *xseg, xport sport, xport srcport)
+{
+ if (xseg && (sport != srcport)) {
+ xseg_init_local_signal(xseg, srcport);
+ sport = srcport;
+ }
+}
+
+static void archipelago_finish_aiocb(AIORequestData *reqdata)
+{
+ if (reqdata->aio_cb->ret != reqdata->segreq->total) {
+ reqdata->aio_cb->ret = -EIO;
+ } else if (reqdata->aio_cb->ret == reqdata->segreq->total) {
+ reqdata->aio_cb->ret = 0;
+ }
+ reqdata->aio_cb->bh = aio_bh_new(
+ bdrv_get_aio_context(reqdata->aio_cb->common.bs),
+ qemu_archipelago_complete_aio, reqdata
+ );
+ qemu_bh_schedule(reqdata->aio_cb->bh);
+}
+
+static int wait_reply(struct xseg *xseg, xport srcport, struct xseg_port *port,
+ struct xseg_request *expected_req)
+{
+ struct xseg_request *req;
+ xseg_prepare_wait(xseg, srcport);
+ void *psd = xseg_get_signal_desc(xseg, port);
+ while (1) {
+ req = xseg_receive(xseg, srcport, X_NONBLOCK);
+ if (req) {
+ if (req != expected_req) {
+ archipelagolog("Unknown received request\n");
+ xseg_put_request(xseg, req, srcport);
+ } else if (!(req->state & XS_SERVED)) {
+ return -1;
+ } else {
+ break;
+ }
+ }
+ xseg_wait_signal(xseg, psd, 100000UL);
+ }
+ xseg_cancel_wait(xseg, srcport);
+ return 0;
+}
+
+static void xseg_request_handler(void *state)
+{
+ BDRVArchipelagoState *s = (BDRVArchipelagoState *) state;
+ void *psd = xseg_get_signal_desc(s->xseg, s->port);
+ qemu_mutex_lock(&s->request_mutex);
+
+ while (!s->stopping) {
+ struct xseg_request *req;
+ void *data;
+ xseg_prepare_wait(s->xseg, s->srcport);
+ req = xseg_receive(s->xseg, s->srcport, X_NONBLOCK);
+ if (req) {
+ AIORequestData *reqdata;
+ ArchipelagoSegmentedRequest *segreq;
+ xseg_get_req_data(s->xseg, req, (void **)&reqdata);
+
+ switch (reqdata->op) {
+ case ARCHIP_OP_READ:
+ data = xseg_get_data(s->xseg, req);
+ segreq = reqdata->segreq;
+ segreq->count += req->serviced;
+
+ qemu_iovec_from_buf(reqdata->aio_cb->qiov, reqdata->bufidx,
+ data,
+ req->serviced);
+
+ xseg_put_request(s->xseg, req, s->srcport);
+
+ if ((__sync_add_and_fetch(&segreq->ref, -1)) == 0) {
+ if (!segreq->failed) {
+ reqdata->aio_cb->ret = segreq->count;
+ archipelago_finish_aiocb(reqdata);
+ g_free(segreq);
+ } else {
+ g_free(segreq);
+ g_free(reqdata);
+ }
+ } else {
+ g_free(reqdata);
+ }
+ break;
+ case ARCHIP_OP_WRITE:
+ case ARCHIP_OP_FLUSH:
+ segreq = reqdata->segreq;
+ segreq->count += req->serviced;
+ xseg_put_request(s->xseg, req, s->srcport);
+
+ if ((__sync_add_and_fetch(&segreq->ref, -1)) == 0) {
+ if (!segreq->failed) {
+ reqdata->aio_cb->ret = segreq->count;
+ archipelago_finish_aiocb(reqdata);
+ g_free(segreq);
+ } else {
+ g_free(segreq);
+ g_free(reqdata);
+ }
+ } else {
+ g_free(reqdata);
+ }
+ break;
+ case ARCHIP_OP_VOLINFO:
+ s->is_signaled = true;
+ qemu_cond_signal(&s->archip_cond);
+ break;
+ }
+ } else {
+ xseg_wait_signal(s->xseg, psd, 100000UL);
+ }
+ xseg_cancel_wait(s->xseg, s->srcport);
+ }
+
+ s->th_is_signaled = true;
+ qemu_cond_signal(&s->request_cond);
+ qemu_mutex_unlock(&s->request_mutex);
+ qemu_thread_exit(NULL);
+}
+
+static int qemu_archipelago_xseg_init(BDRVArchipelagoState *s)
+{
+ if (xseg_initialize()) {
+ archipelagolog("Cannot initialize XSEG\n");
+ goto err_exit;
+ }
+
+ s->xseg = xseg_join("posix", s->segment_name,
+ "posixfd", NULL);
+ if (!s->xseg) {
+ archipelagolog("Cannot join XSEG shared memory segment\n");
+ goto err_exit;
+ }
+ s->port = xseg_bind_dynport(s->xseg);
+ s->srcport = s->port->portno;
+ init_local_signal(s->xseg, s->sport, s->srcport);
+ return 0;
+
+err_exit:
+ return -1;
+}
+
+static int qemu_archipelago_init(BDRVArchipelagoState *s)
+{
+ int ret;
+
+ ret = qemu_archipelago_xseg_init(s);
+ if (ret < 0) {
+ error_report("Cannot initialize XSEG. Aborting...\n");
+ goto err_exit;
+ }
+
+ qemu_cond_init(&s->archip_cond);
+ qemu_mutex_init(&s->archip_mutex);
+ qemu_cond_init(&s->request_cond);
+ qemu_mutex_init(&s->request_mutex);
+ s->th_is_signaled = false;
+ qemu_thread_create(&s->request_th, "xseg_io_th",
+ (void *) xseg_request_handler,
+ (void *) s, QEMU_THREAD_JOINABLE);
+
+err_exit:
+ return ret;
+}
+
+static void qemu_archipelago_complete_aio(void *opaque)
+{
+ AIORequestData *reqdata = (AIORequestData *) opaque;
+ ArchipelagoAIOCB *aio_cb = (ArchipelagoAIOCB *) reqdata->aio_cb;
+
+ qemu_bh_delete(aio_cb->bh);
+ aio_cb->common.cb(aio_cb->common.opaque, aio_cb->ret);
+ aio_cb->status = 0;
+
+ if (!aio_cb->cancelled) {
+ qemu_aio_release(aio_cb);
+ }
+ g_free(reqdata);
+}
+
+static void xseg_find_port(char *pstr, const char *needle, xport *aport)
+{
+ const char *a;
+ char *endptr = NULL;
+ unsigned long port;
+ if (strstart(pstr, needle, &a)) {
+ if (strlen(a) > 0) {
+ port = strtoul(a, &endptr, 10);
+ if (strlen(endptr)) {
+ *aport = -2;
+ return;
+ }
+ *aport = (xport) port;
+ }
+ }
+}
+
+static void xseg_find_segment(char *pstr, const char *needle,
+ char **segment_name)
+{
+ const char *a;
+ if (strstart(pstr, needle, &a)) {
+ if (strlen(a) > 0) {
+ *segment_name = g_strdup(a);
+ }
+ }
+}
+
+static void parse_filename_opts(const char *filename, Error **errp,
+ char **volume, char **segment_name,
+ xport *mport, xport *vport)
+{
+ const char *start;
+ char *tokens[4], *ds;
+ int idx;
+ xport lmport = NoPort, lvport = NoPort;
+
+ strstart(filename, "archipelago:", &start);
+
+ ds = g_strdup(start);
+ tokens[0] = strtok(ds, "/");
+ tokens[1] = strtok(NULL, ":");
+ tokens[2] = strtok(NULL, ":");
+ tokens[3] = strtok(NULL, "\0");
+
+ if (!strlen(tokens[0])) {
+ error_setg(errp, "volume name must be specified first");
+ g_free(ds);
+ return;
+ }
+
+ for (idx = 1; idx < 4; idx++) {
+ if (tokens[idx] != NULL) {
+ if (strstart(tokens[idx], "mport=", NULL)) {
+ xseg_find_port(tokens[idx], "mport=", &lmport);
+ }
+ if (strstart(tokens[idx], "vport=", NULL)) {
+ xseg_find_port(tokens[idx], "vport=", &lvport);
+ }
+ if (strstart(tokens[idx], "segment=", NULL)) {
+ xseg_find_segment(tokens[idx], "segment=", segment_name);
+ }
+ }
+ }
+
+ if ((lmport == -2) || (lvport == -2)) {
+ error_setg(errp, "mport and/or vport must be set");
+ g_free(ds);
+ return;
+ }
+ *volume = g_strdup(tokens[0]);
+ *mport = lmport;
+ *vport = lvport;
+ g_free(ds);
+}
+
+static void archipelago_parse_filename(const char *filename, QDict *options,
+ Error **errp)
+{
+ const char *start;
+ char *volume = NULL, *segment_name = NULL;
+ xport mport = NoPort, vport = NoPort;
+
+ if (qdict_haskey(options, ARCHIPELAGO_OPT_VOLUME)
+ || qdict_haskey(options, ARCHIPELAGO_OPT_SEGMENT)
+ || qdict_haskey(options, ARCHIPELAGO_OPT_MPORT)
+ || qdict_haskey(options, ARCHIPELAGO_OPT_VPORT)) {
+ error_setg(errp, "volume/mport/vport/segment and a file name may not"
+ " be specified at the same time");
+ return;
+ }
+
+ if (!strstart(filename, "archipelago:", &start)) {
+ error_setg(errp, "File name must start with 'archipelago:'");
+ return;
+ }
+
+ if (!strlen(start) || strstart(start, "/", NULL)) {
+ error_setg(errp, "volume name must be specified");
+ return;
+ }
+
+ parse_filename_opts(filename, errp, &volume, &segment_name, &mport, &vport);
+
+ if (volume) {
+ qdict_put(options, ARCHIPELAGO_OPT_VOLUME, qstring_from_str(volume));
+ g_free(volume);
+ }
+ if (segment_name) {
+ qdict_put(options, ARCHIPELAGO_OPT_SEGMENT,
+ qstring_from_str(segment_name));
+ g_free(segment_name);
+ }
+ if (mport != NoPort) {
+ qdict_put(options, ARCHIPELAGO_OPT_MPORT, qint_from_int(mport));
+ }
+ if (vport != NoPort) {
+ qdict_put(options, ARCHIPELAGO_OPT_VPORT, qint_from_int(vport));
+ }
+}
+
+static QemuOptsList archipelago_runtime_opts = {
+ .name = "archipelago",
+ .head = QTAILQ_HEAD_INITIALIZER(archipelago_runtime_opts.head),
+ .desc = {
+ {
+ .name = ARCHIPELAGO_OPT_VOLUME,
+ .type = QEMU_OPT_STRING,
+ .help = "Name of the volume image",
+ },
+ {
+ .name = ARCHIPELAGO_OPT_SEGMENT,
+ .type = QEMU_OPT_STRING,
+ .help = "Name of the Archipelago shared memory segment",
+ },
+ {
+ .name = ARCHIPELAGO_OPT_MPORT,
+ .type = QEMU_OPT_NUMBER,
+ .help = "Archipelago mapperd port number"
+ },
+ {
+ .name = ARCHIPELAGO_OPT_VPORT,
+ .type = QEMU_OPT_NUMBER,
+ .help = "Archipelago vlmcd port number"
+
+ },
+ { /* end of list */ }
+ },
+};
+
+static int qemu_archipelago_open(BlockDriverState *bs,
+ QDict *options,
+ int bdrv_flags,
+ Error **errp)
+{
+ int ret = 0;
+ const char *volume, *segment_name;
+ QemuOpts *opts;
+ Error *local_err = NULL;
+ BDRVArchipelagoState *s = bs->opaque;
+
+ opts = qemu_opts_create(&archipelago_runtime_opts, NULL, 0, &error_abort);
+ qemu_opts_absorb_qdict(opts, options, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ ret = -EINVAL;
+ goto err_exit;
+ }
+
+ s->mportno = qemu_opt_get_number(opts, ARCHIPELAGO_OPT_MPORT,
+ ARCHIPELAGO_DFL_MPORT);
+ s->vportno = qemu_opt_get_number(opts, ARCHIPELAGO_OPT_VPORT,
+ ARCHIPELAGO_DFL_VPORT);
+
+ segment_name = qemu_opt_get(opts, ARCHIPELAGO_OPT_SEGMENT);
+ if (segment_name == NULL) {
+ s->segment_name = g_strdup("archipelago");
+ } else {
+ s->segment_name = g_strdup(segment_name);
+ }
+
+ volume = qemu_opt_get(opts, ARCHIPELAGO_OPT_VOLUME);
+ if (volume == NULL) {
+ error_setg(errp, "archipelago block driver requires the 'volume'"
+ " option");
+ ret = -EINVAL;
+ goto err_exit;
+ }
+ s->volname = g_strdup(volume);
+
+ /* Initialize XSEG, join shared memory segment */
+ ret = qemu_archipelago_init(s);
+ if (ret < 0) {
+ error_setg(errp, "cannot initialize XSEG and join shared "
+ "memory segment");
+ goto err_exit;
+ }
+
+ qemu_opts_del(opts);
+ return 0;
+
+err_exit:
+ g_free(s->volname);
+ g_free(s->segment_name);
+ qemu_opts_del(opts);
+ return ret;
+}
+
+static void qemu_archipelago_close(BlockDriverState *bs)
+{
+ int r, targetlen;
+ char *target;
+ struct xseg_request *req;
+ BDRVArchipelagoState *s = bs->opaque;
+
+ s->stopping = true;
+
+ qemu_mutex_lock(&s->request_mutex);
+ while (!s->th_is_signaled) {
+ qemu_cond_wait(&s->request_cond,
+ &s->request_mutex);
+ }
+ qemu_mutex_unlock(&s->request_mutex);
+ qemu_thread_join(&s->request_th);
+ qemu_cond_destroy(&s->request_cond);
+ qemu_mutex_destroy(&s->request_mutex);
+
+ qemu_cond_destroy(&s->archip_cond);
+ qemu_mutex_destroy(&s->archip_mutex);
+
+ targetlen = strlen(s->volname);
+ req = xseg_get_request(s->xseg, s->srcport, s->vportno, X_ALLOC);
+ if (!req) {
+ archipelagolog("Cannot get XSEG request\n");
+ goto err_exit;
+ }
+ r = xseg_prep_request(s->xseg, req, targetlen, 0);
+ if (r < 0) {
+ xseg_put_request(s->xseg, req, s->srcport);
+ archipelagolog("Cannot prepare XSEG close request\n");
+ goto err_exit;
+ }
+
+ target = xseg_get_target(s->xseg, req);
+ memcpy(target, s->volname, targetlen);
+ req->size = req->datalen;
+ req->offset = 0;
+ req->op = X_CLOSE;
+
+ xport p = xseg_submit(s->xseg, req, s->srcport, X_ALLOC);
+ if (p == NoPort) {
+ xseg_put_request(s->xseg, req, s->srcport);
+ archipelagolog("Cannot submit XSEG close request\n");
+ goto err_exit;
+ }
+
+ xseg_signal(s->xseg, p);
+ wait_reply(s->xseg, s->srcport, s->port, req);
+
+ xseg_put_request(s->xseg, req, s->srcport);
+
+err_exit:
+ g_free(s->volname);
+ g_free(s->segment_name);
+ xseg_quit_local_signal(s->xseg, s->srcport);
+ xseg_leave_dynport(s->xseg, s->port);
+ xseg_leave(s->xseg);
+}
+
+static int qemu_archipelago_create_volume(Error **errp, const char *volname,
+ char *segment_name,
+ uint64_t size, xport mportno,
+ xport vportno)
+{
+ int ret, targetlen;
+ struct xseg *xseg = NULL;
+ struct xseg_request *req;
+ struct xseg_request_clone *xclone;
+ struct xseg_port *port;
+ xport srcport = NoPort, sport = NoPort;
+ char *target;
+
+ /* Try default values if none has been set */
+ if (mportno == (xport) -1) {
+ mportno = ARCHIPELAGO_DFL_MPORT;
+ }
+
+ if (vportno == (xport) -1) {
+ vportno = ARCHIPELAGO_DFL_VPORT;
+ }
+
+ if (xseg_initialize()) {
+ error_setg(errp, "Cannot initialize XSEG");
+ return -1;
+ }
+
+ xseg = xseg_join("posix", segment_name,
+ "posixfd", NULL);
+
+ if (!xseg) {
+ error_setg(errp, "Cannot join XSEG shared memory segment");
+ return -1;
+ }
+
+ port = xseg_bind_dynport(xseg);
+ srcport = port->portno;
+ init_local_signal(xseg, sport, srcport);
+
+ req = xseg_get_request(xseg, srcport, mportno, X_ALLOC);
+ if (!req) {
+ error_setg(errp, "Cannot get XSEG request");
+ return -1;
+ }
+
+ targetlen = strlen(volname);
+ ret = xseg_prep_request(xseg, req, targetlen,
+ sizeof(struct xseg_request_clone));
+ if (ret < 0) {
+ error_setg(errp, "Cannot prepare XSEG request");
+ goto err_exit;
+ }
+
+ target = xseg_get_target(xseg, req);
+ if (!target) {
+ error_setg(errp, "Cannot get XSEG target.\n");
+ goto err_exit;
+ }
+ memcpy(target, volname, targetlen);
+ xclone = (struct xseg_request_clone *) xseg_get_data(xseg, req);
+ memset(xclone->target, 0 , XSEG_MAX_TARGETLEN);
+ xclone->targetlen = 0;
+ xclone->size = size;
+ req->offset = 0;
+ req->size = req->datalen;
+ req->op = X_CLONE;
+
+ xport p = xseg_submit(xseg, req, srcport, X_ALLOC);
+ if (p == NoPort) {
+ error_setg(errp, "Could not submit XSEG request");
+ goto err_exit;
+ }
+ xseg_signal(xseg, p);
+
+ ret = wait_reply(xseg, srcport, port, req);
+ if (ret < 0) {
+ error_setg(errp, "wait_reply() error.");
+ }
+
+ xseg_put_request(xseg, req, srcport);
+ xseg_quit_local_signal(xseg, srcport);
+ xseg_leave_dynport(xseg, port);
+ xseg_leave(xseg);
+ return ret;
+
+err_exit:
+ xseg_put_request(xseg, req, srcport);
+ xseg_quit_local_signal(xseg, srcport);
+ xseg_leave_dynport(xseg, port);
+ xseg_leave(xseg);
+ return -1;
+}
+
+static int qemu_archipelago_create(const char *filename,
+ QemuOpts *options,
+ Error **errp)
+{
+ int ret = 0;
+ uint64_t total_size = 0;
+ char *volname = NULL, *segment_name = NULL;
+ const char *start;
+ xport mport = NoPort, vport = NoPort;
+
+ if (!strstart(filename, "archipelago:", &start)) {
+ error_setg(errp, "File name must start with 'archipelago:'");
+ return -1;
+ }
+
+ if (!strlen(start) || strstart(start, "/", NULL)) {
+ error_setg(errp, "volume name must be specified");
+ return -1;
+ }
+
+ parse_filename_opts(filename, errp, &volname, &segment_name, &mport,
+ &vport);
+ total_size = qemu_opt_get_size_del(options, BLOCK_OPT_SIZE, 0);
+
+ if (segment_name == NULL) {
+ segment_name = g_strdup("archipelago");
+ }
+
+ /* Create an Archipelago volume */
+ ret = qemu_archipelago_create_volume(errp, volname, segment_name,
+ total_size, mport,
+ vport);
+
+ g_free(volname);
+ g_free(segment_name);
+ return ret;
+}
+
+static void qemu_archipelago_aio_cancel(BlockDriverAIOCB *blockacb)
+{
+ ArchipelagoAIOCB *aio_cb = (ArchipelagoAIOCB *) blockacb;
+ aio_cb->cancelled = true;
+ while (aio_cb->status == -EINPROGRESS) {
+ aio_poll(bdrv_get_aio_context(aio_cb->common.bs), true);
+ }
+ qemu_aio_release(aio_cb);
+}
+
+static const AIOCBInfo archipelago_aiocb_info = {
+ .aiocb_size = sizeof(ArchipelagoAIOCB),
+ .cancel = qemu_archipelago_aio_cancel,
+};
+
+static int archipelago_submit_request(BDRVArchipelagoState *s,
+ uint64_t bufidx,
+ size_t count,
+ off_t offset,
+ ArchipelagoAIOCB *aio_cb,
+ ArchipelagoSegmentedRequest *segreq,
+ int op)
+{
+ int ret, targetlen;
+ char *target;
+ void *data = NULL;
+ struct xseg_request *req;
+ AIORequestData *reqdata = g_malloc(sizeof(AIORequestData));
+
+ targetlen = strlen(s->volname);
+ req = xseg_get_request(s->xseg, s->srcport, s->vportno, X_ALLOC);
+ if (!req) {
+ archipelagolog("Cannot get XSEG request\n");
+ goto err_exit2;
+ }
+ ret = xseg_prep_request(s->xseg, req, targetlen, count);
+ if (ret < 0) {
+ archipelagolog("Cannot prepare XSEG request\n");
+ goto err_exit;
+ }
+ target = xseg_get_target(s->xseg, req);
+ if (!target) {
+ archipelagolog("Cannot get XSEG target\n");
+ goto err_exit;
+ }
+ memcpy(target, s->volname, targetlen);
+ req->size = count;
+ req->offset = offset;
+
+ switch (op) {
+ case ARCHIP_OP_READ:
+ req->op = X_READ;
+ break;
+ case ARCHIP_OP_WRITE:
+ req->op = X_WRITE;
+ break;
+ case ARCHIP_OP_FLUSH:
+ req->op = X_FLUSH;
+ break;
+ }
+ reqdata->volname = s->volname;
+ reqdata->offset = offset;
+ reqdata->size = count;
+ reqdata->bufidx = bufidx;
+ reqdata->aio_cb = aio_cb;
+ reqdata->segreq = segreq;
+ reqdata->op = op;
+
+ xseg_set_req_data(s->xseg, req, reqdata);
+ if (op == ARCHIP_OP_WRITE) {
+ data = xseg_get_data(s->xseg, req);
+ if (!data) {
+ archipelagolog("Cannot get XSEG data\n");
+ goto err_exit;
+ }
+ qemu_iovec_to_buf(aio_cb->qiov, bufidx, data, count);
+ }
+
+ xport p = xseg_submit(s->xseg, req, s->srcport, X_ALLOC);
+ if (p == NoPort) {
+ archipelagolog("Could not submit XSEG request\n");
+ goto err_exit;
+ }
+ xseg_signal(s->xseg, p);
+ return 0;
+
+err_exit:
+ g_free(reqdata);
+ xseg_put_request(s->xseg, req, s->srcport);
+ return -EIO;
+err_exit2:
+ g_free(reqdata);
+ return -EIO;
+}
+
+static int archipelago_aio_segmented_rw(BDRVArchipelagoState *s,
+ size_t count,
+ off_t offset,
+ ArchipelagoAIOCB *aio_cb,
+ int op)
+{
+ int i, ret, segments_nr, last_segment_size;
+ ArchipelagoSegmentedRequest *segreq;
+
+ segreq = g_malloc(sizeof(ArchipelagoSegmentedRequest));
+
+ if (op == ARCHIP_OP_FLUSH) {
+ segments_nr = 1;
+ segreq->ref = segments_nr;
+ segreq->total = count;
+ segreq->count = 0;
+ segreq->failed = 0;
+ ret = archipelago_submit_request(s, 0, count, offset, aio_cb,
+ segreq, ARCHIP_OP_FLUSH);
+ if (ret < 0) {
+ goto err_exit;
+ }
+ return 0;
+ }
+
+ segments_nr = (int)(count / MAX_REQUEST_SIZE) + \
+ ((count % MAX_REQUEST_SIZE) ? 1 : 0);
+ last_segment_size = (int)(count % MAX_REQUEST_SIZE);
+
+ segreq->ref = segments_nr;
+ segreq->total = count;
+ segreq->count = 0;
+ segreq->failed = 0;
+
+ for (i = 0; i < segments_nr - 1; i++) {
+ ret = archipelago_submit_request(s, i * MAX_REQUEST_SIZE,
+ MAX_REQUEST_SIZE,
+ offset + i * MAX_REQUEST_SIZE,
+ aio_cb, segreq, op);
+
+ if (ret < 0) {
+ goto err_exit;
+ }
+ }
+
+ if ((segments_nr > 1) && last_segment_size) {
+ ret = archipelago_submit_request(s, i * MAX_REQUEST_SIZE,
+ last_segment_size,
+ offset + i * MAX_REQUEST_SIZE,
+ aio_cb, segreq, op);
+ } else if ((segments_nr > 1) && !last_segment_size) {
+ ret = archipelago_submit_request(s, i * MAX_REQUEST_SIZE,
+ MAX_REQUEST_SIZE,
+ offset + i * MAX_REQUEST_SIZE,
+ aio_cb, segreq, op);
+ } else if (segments_nr == 1) {
+ ret = archipelago_submit_request(s, 0, count, offset, aio_cb,
+ segreq, op);
+ }
+
+ if (ret < 0) {
+ goto err_exit;
+ }
+
+ return 0;
+
+err_exit:
+ __sync_add_and_fetch(&segreq->failed, 1);
+ if (segments_nr == 1) {
+ if (__sync_add_and_fetch(&segreq->ref, -1) == 0) {
+ g_free(segreq);
+ }
+ } else {
+ if ((__sync_add_and_fetch(&segreq->ref, -segments_nr + i)) == 0) {
+ g_free(segreq);
+ }
+ }
+
+ return ret;
+}
+
+static BlockDriverAIOCB *qemu_archipelago_aio_rw(BlockDriverState *bs,
+ int64_t sector_num,
+ QEMUIOVector *qiov,
+ int nb_sectors,
+ BlockDriverCompletionFunc *cb,
+ void *opaque,
+ int op)
+{
+ ArchipelagoAIOCB *aio_cb;
+ BDRVArchipelagoState *s = bs->opaque;
+ int64_t size, off;
+ int ret;
+
+ aio_cb = qemu_aio_get(&archipelago_aiocb_info, bs, cb, opaque);
+ aio_cb->cmd = op;
+ aio_cb->qiov = qiov;
+
+ aio_cb->ret = 0;
+ aio_cb->s = s;
+ aio_cb->cancelled = false;
+ aio_cb->status = -EINPROGRESS;
+
+ off = sector_num * BDRV_SECTOR_SIZE;
+ size = nb_sectors * BDRV_SECTOR_SIZE;
+ aio_cb->size = size;
+
+ ret = archipelago_aio_segmented_rw(s, size, off,
+ aio_cb, op);
+ if (ret < 0) {
+ goto err_exit;
+ }
+ return &aio_cb->common;
+
+err_exit:
+ error_report("qemu_archipelago_aio_rw(): I/O Error\n");
+ qemu_aio_release(aio_cb);
+ return NULL;
+}
+
+static BlockDriverAIOCB *qemu_archipelago_aio_readv(BlockDriverState *bs,
+ int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+ BlockDriverCompletionFunc *cb, void *opaque)
+{
+ return qemu_archipelago_aio_rw(bs, sector_num, qiov, nb_sectors, cb,
+ opaque, ARCHIP_OP_READ);
+}
+
+static BlockDriverAIOCB *qemu_archipelago_aio_writev(BlockDriverState *bs,
+ int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+ BlockDriverCompletionFunc *cb, void *opaque)
+{
+ return qemu_archipelago_aio_rw(bs, sector_num, qiov, nb_sectors, cb,
+ opaque, ARCHIP_OP_WRITE);
+}
+
+static int64_t archipelago_volume_info(BDRVArchipelagoState *s)
+{
+ uint64_t size;
+ int ret, targetlen;
+ struct xseg_request *req;
+ struct xseg_reply_info *xinfo;
+ AIORequestData *reqdata = g_malloc(sizeof(AIORequestData));
+
+ const char *volname = s->volname;
+ targetlen = strlen(volname);
+ req = xseg_get_request(s->xseg, s->srcport, s->mportno, X_ALLOC);
+ if (!req) {
+ archipelagolog("Cannot get XSEG request\n");
+ goto err_exit2;
+ }
+ ret = xseg_prep_request(s->xseg, req, targetlen,
+ sizeof(struct xseg_reply_info));
+ if (ret < 0) {
+ archipelagolog("Cannot prepare XSEG request\n");
+ goto err_exit;
+ }
+ char *target = xseg_get_target(s->xseg, req);
+ if (!target) {
+ archipelagolog("Cannot get XSEG target\n");
+ goto err_exit;
+ }
+ memcpy(target, volname, targetlen);
+ req->size = req->datalen;
+ req->offset = 0;
+ req->op = X_INFO;
+
+ reqdata->op = ARCHIP_OP_VOLINFO;
+ reqdata->volname = volname;
+ xseg_set_req_data(s->xseg, req, reqdata);
+
+ xport p = xseg_submit(s->xseg, req, s->srcport, X_ALLOC);
+ if (p == NoPort) {
+ archipelagolog("Cannot submit XSEG request\n");
+ goto err_exit;
+ }
+ xseg_signal(s->xseg, p);
+ qemu_mutex_lock(&s->archip_mutex);
+ while (!s->is_signaled) {
+ qemu_cond_wait(&s->archip_cond, &s->archip_mutex);
+ }
+ s->is_signaled = false;
+ qemu_mutex_unlock(&s->archip_mutex);
+
+ xinfo = (struct xseg_reply_info *) xseg_get_data(s->xseg, req);
+ size = xinfo->size;
+ xseg_put_request(s->xseg, req, s->srcport);
+ g_free(reqdata);
+ s->size = size;
+ return size;
+
+err_exit:
+ xseg_put_request(s->xseg, req, s->srcport);
+err_exit2:
+ g_free(reqdata);
+ return -EIO;
+}
+
+static int64_t qemu_archipelago_getlength(BlockDriverState *bs)
+{
+ int64_t ret;
+ BDRVArchipelagoState *s = bs->opaque;
+
+ ret = archipelago_volume_info(s);
+ return ret;
+}
+
+static QemuOptsList qemu_archipelago_create_opts = {
+ .name = "archipelago-create-opts",
+ .head = QTAILQ_HEAD_INITIALIZER(qemu_archipelago_create_opts.head),
+ .desc = {
+ {
+ .name = BLOCK_OPT_SIZE,
+ .type = QEMU_OPT_SIZE,
+ .help = "Virtual disk size"
+ },
+ { /* end of list */ }
+ }
+};
+
+static BlockDriverAIOCB *qemu_archipelago_aio_flush(BlockDriverState *bs,
+ BlockDriverCompletionFunc *cb, void *opaque)
+{
+ return qemu_archipelago_aio_rw(bs, 0, NULL, 0, cb, opaque,
+ ARCHIP_OP_FLUSH);
+}
+
+static BlockDriver bdrv_archipelago = {
+ .format_name = "archipelago",
+ .protocol_name = "archipelago",
+ .instance_size = sizeof(BDRVArchipelagoState),
+ .bdrv_parse_filename = archipelago_parse_filename,
+ .bdrv_file_open = qemu_archipelago_open,
+ .bdrv_close = qemu_archipelago_close,
+ .bdrv_create = qemu_archipelago_create,
+ .bdrv_getlength = qemu_archipelago_getlength,
+ .bdrv_aio_readv = qemu_archipelago_aio_readv,
+ .bdrv_aio_writev = qemu_archipelago_aio_writev,
+ .bdrv_aio_flush = qemu_archipelago_aio_flush,
+ .bdrv_has_zero_init = bdrv_has_zero_init_1,
+ .create_opts = &qemu_archipelago_create_opts,
+};
+
+static void bdrv_archipelago_init(void)
+{
+ bdrv_register(&bdrv_archipelago);
+}
+
+block_init(bdrv_archipelago_init);
diff --git a/block/bochs.c b/block/bochs.c
index eba23df335..6674b27438 100644
--- a/block/bochs.c
+++ b/block/bochs.c
@@ -131,7 +131,11 @@ static int bochs_open(BlockDriverState *bs, QDict *options, int flags,
return -EFBIG;
}
- s->catalog_bitmap = g_malloc(s->catalog_size * 4);
+ s->catalog_bitmap = g_try_malloc(s->catalog_size * 4);
+ if (s->catalog_size && s->catalog_bitmap == NULL) {
+ error_setg(errp, "Could not allocate memory for catalog");
+ return -ENOMEM;
+ }
ret = bdrv_pread(bs->file, le32_to_cpu(bochs.header), s->catalog_bitmap,
s->catalog_size * 4);
diff --git a/block/cloop.c b/block/cloop.c
index 8457737922..f328be06f8 100644
--- a/block/cloop.c
+++ b/block/cloop.c
@@ -116,7 +116,12 @@ static int cloop_open(BlockDriverState *bs, QDict *options, int flags,
"try increasing block size");
return -EINVAL;
}
- s->offsets = g_malloc(offsets_size);
+
+ s->offsets = g_try_malloc(offsets_size);
+ if (s->offsets == NULL) {
+ error_setg(errp, "Could not allocate offsets table");
+ return -ENOMEM;
+ }
ret = bdrv_pread(bs->file, 128 + 4 + 4, s->offsets, offsets_size);
if (ret < 0) {
@@ -158,8 +163,20 @@ static int cloop_open(BlockDriverState *bs, QDict *options, int flags,
}
/* initialize zlib engine */
- s->compressed_block = g_malloc(max_compressed_block_size + 1);
- s->uncompressed_block = g_malloc(s->block_size);
+ s->compressed_block = g_try_malloc(max_compressed_block_size + 1);
+ if (s->compressed_block == NULL) {
+ error_setg(errp, "Could not allocate compressed_block");
+ ret = -ENOMEM;
+ goto fail;
+ }
+
+ s->uncompressed_block = g_try_malloc(s->block_size);
+ if (s->uncompressed_block == NULL) {
+ error_setg(errp, "Could not allocate uncompressed_block");
+ ret = -ENOMEM;
+ goto fail;
+ }
+
if (inflateInit(&s->zstream) != Z_OK) {
ret = -EINVAL;
goto fail;
diff --git a/block/curl.c b/block/curl.c
index 79ff2f1e41..d4b85d20a5 100644
--- a/block/curl.c
+++ b/block/curl.c
@@ -640,7 +640,13 @@ static void curl_readv_bh_cb(void *p)
state->buf_start = start;
state->buf_len = acb->end + s->readahead_size;
end = MIN(start + state->buf_len, s->len) - 1;
- state->orig_buf = g_malloc(state->buf_len);
+ state->orig_buf = g_try_malloc(state->buf_len);
+ if (state->buf_len && state->orig_buf == NULL) {
+ curl_clean_state(state);
+ acb->common.cb(acb->common.opaque, -ENOMEM);
+ qemu_aio_release(acb);
+ return;
+ }
state->acb[0] = acb;
snprintf(state->range, 127, "%zd-%zd", start, end);
diff --git a/block/dmg.c b/block/dmg.c
index 1e153cd76d..e455886d2b 100644
--- a/block/dmg.c
+++ b/block/dmg.c
@@ -284,8 +284,15 @@ static int dmg_open(BlockDriverState *bs, QDict *options, int flags,
}
/* initialize zlib engine */
- s->compressed_chunk = g_malloc(max_compressed_size + 1);
- s->uncompressed_chunk = g_malloc(512 * max_sectors_per_chunk);
+ s->compressed_chunk = qemu_try_blockalign(bs->file,
+ max_compressed_size + 1);
+ s->uncompressed_chunk = qemu_try_blockalign(bs->file,
+ 512 * max_sectors_per_chunk);
+ if (s->compressed_chunk == NULL || s->uncompressed_chunk == NULL) {
+ ret = -ENOMEM;
+ goto fail;
+ }
+
if (inflateInit(&s->zstream) != Z_OK) {
ret = -EINVAL;
goto fail;
@@ -302,8 +309,8 @@ fail:
g_free(s->lengths);
g_free(s->sectors);
g_free(s->sectorcounts);
- g_free(s->compressed_chunk);
- g_free(s->uncompressed_chunk);
+ qemu_vfree(s->compressed_chunk);
+ qemu_vfree(s->uncompressed_chunk);
return ret;
}
@@ -426,8 +433,8 @@ static void dmg_close(BlockDriverState *bs)
g_free(s->lengths);
g_free(s->sectors);
g_free(s->sectorcounts);
- g_free(s->compressed_chunk);
- g_free(s->uncompressed_chunk);
+ qemu_vfree(s->compressed_chunk);
+ qemu_vfree(s->uncompressed_chunk);
inflateEnd(&s->zstream);
}
diff --git a/block/iscsi.c b/block/iscsi.c
index a7bb6970ac..2c9cfc18eb 100644
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -893,7 +893,10 @@ coroutine_fn iscsi_co_write_zeroes(BlockDriverState *bs, int64_t sector_num,
nb_blocks = sector_qemu2lun(nb_sectors, iscsilun);
if (iscsilun->zeroblock == NULL) {
- iscsilun->zeroblock = g_malloc0(iscsilun->block_size);
+ iscsilun->zeroblock = g_try_malloc0(iscsilun->block_size);
+ if (iscsilun->zeroblock == NULL) {
+ return -ENOMEM;
+ }
}
iscsi_co_init_iscsitask(iscsilun, &iTask);
diff --git a/block/mirror.c b/block/mirror.c
index c7a655fc58..5e7a166b39 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -367,7 +367,12 @@ static void coroutine_fn mirror_run(void *opaque)
}
end = s->common.len >> BDRV_SECTOR_BITS;
- s->buf = qemu_blockalign(bs, s->buf_size);
+ s->buf = qemu_try_blockalign(bs, s->buf_size);
+ if (s->buf == NULL) {
+ ret = -ENOMEM;
+ goto immediate_exit;
+ }
+
sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS;
mirror_free_init(s);
diff --git a/block/nfs.c b/block/nfs.c
index 8439e0d389..fe46c33709 100644
--- a/block/nfs.c
+++ b/block/nfs.c
@@ -172,7 +172,11 @@ static int coroutine_fn nfs_co_writev(BlockDriverState *bs,
nfs_co_init_task(client, &task);
- buf = g_malloc(nb_sectors * BDRV_SECTOR_SIZE);
+ buf = g_try_malloc(nb_sectors * BDRV_SECTOR_SIZE);
+ if (nb_sectors && buf == NULL) {
+ return -ENOMEM;
+ }
+
qemu_iovec_to_buf(iov, 0, buf, nb_sectors * BDRV_SECTOR_SIZE);
if (nfs_pwrite_async(client->context, client->fh,
diff --git a/block/parallels.c b/block/parallels.c
index 1a5bd350b3..7325678a4d 100644
--- a/block/parallels.c
+++ b/block/parallels.c
@@ -105,7 +105,11 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
ret = -EFBIG;
goto fail;
}
- s->catalog_bitmap = g_malloc(s->catalog_size * 4);
+ s->catalog_bitmap = g_try_malloc(s->catalog_size * 4);
+ if (s->catalog_size && s->catalog_bitmap == NULL) {
+ ret = -ENOMEM;
+ goto fail;
+ }
ret = bdrv_pread(bs->file, 64, s->catalog_bitmap, s->catalog_size * 4);
if (ret < 0) {
diff --git a/block/qapi.c b/block/qapi.c
index f44f6b4012..79d1e6a9f4 100644
--- a/block/qapi.c
+++ b/block/qapi.c
@@ -28,6 +28,13 @@
#include "qapi-visit.h"
#include "qapi/qmp-output-visitor.h"
#include "qapi/qmp/types.h"
+#ifdef __linux__
+#include <linux/fs.h>
+#include <sys/ioctl.h>
+#ifndef FS_NOCOW_FL
+#define FS_NOCOW_FL 0x00800000 /* Do not cow file */
+#endif
+#endif
BlockDeviceInfo *bdrv_block_device_info(BlockDriverState *bs)
{
@@ -165,19 +172,28 @@ void bdrv_query_image_info(BlockDriverState *bs,
ImageInfo **p_info,
Error **errp)
{
- uint64_t total_sectors;
+ int64_t size;
const char *backing_filename;
char backing_filename2[1024];
BlockDriverInfo bdi;
int ret;
Error *err = NULL;
- ImageInfo *info = g_new0(ImageInfo, 1);
-
- bdrv_get_geometry(bs, &total_sectors);
+ ImageInfo *info;
+#ifdef __linux__
+ int fd, attr;
+#endif
+
+ size = bdrv_getlength(bs);
+ if (size < 0) {
+ error_setg_errno(errp, -size, "Can't get size of device '%s'",
+ bdrv_get_device_name(bs));
+ return;
+ }
+ info = g_new0(ImageInfo, 1);
info->filename = g_strdup(bs->filename);
info->format = g_strdup(bdrv_get_format_name(bs));
- info->virtual_size = total_sectors * 512;
+ info->virtual_size = size;
info->actual_size = bdrv_get_allocated_file_size(bs);
info->has_actual_size = info->actual_size >= 0;
if (bdrv_is_encrypted(bs)) {
@@ -195,6 +211,18 @@ void bdrv_query_image_info(BlockDriverState *bs,
info->format_specific = bdrv_get_specific_info(bs);
info->has_format_specific = info->format_specific != NULL;
+#ifdef __linux__
+ /* get NOCOW info */
+ fd = qemu_open(bs->filename, O_RDONLY | O_NONBLOCK);
+ if (fd >= 0) {
+ if (ioctl(fd, FS_IOC_GETFLAGS, &attr) == 0 && (attr & FS_NOCOW_FL)) {
+ info->has_nocow = true;
+ info->nocow = true;
+ }
+ qemu_close(fd);
+ }
+#endif
+
backing_filename = bs->backing_file;
if (backing_filename[0] != '\0') {
info->backing_filename = g_strdup(backing_filename);
@@ -625,4 +653,8 @@ void bdrv_image_info_dump(fprintf_function func_fprintf, void *f,
func_fprintf(f, "Format specific information:\n");
bdrv_image_info_specific_dump(func_fprintf, f, info->format_specific);
}
+
+ if (info->has_nocow && info->nocow) {
+ func_fprintf(f, "NOCOW flag: set\n");
+ }
}
diff --git a/block/qcow.c b/block/qcow.c
index a874056cf3..67332f03c1 100644
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -182,7 +182,12 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
}
s->l1_table_offset = header.l1_table_offset;
- s->l1_table = g_malloc(s->l1_size * sizeof(uint64_t));
+ s->l1_table = g_try_malloc(s->l1_size * sizeof(uint64_t));
+ if (s->l1_table == NULL) {
+ error_setg(errp, "Could not allocate memory for L1 table");
+ ret = -ENOMEM;
+ goto fail;
+ }
ret = bdrv_pread(bs->file, s->l1_table_offset, s->l1_table,
s->l1_size * sizeof(uint64_t));
@@ -193,8 +198,16 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
for(i = 0;i < s->l1_size; i++) {
be64_to_cpus(&s->l1_table[i]);
}
- /* alloc L2 cache */
- s->l2_cache = g_malloc(s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t));
+
+ /* alloc L2 cache (max. 64k * 16 * 8 = 8 MB) */
+ s->l2_cache =
+ qemu_try_blockalign(bs->file,
+ s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t));
+ if (s->l2_cache == NULL) {
+ error_setg(errp, "Could not allocate L2 table cache");
+ ret = -ENOMEM;
+ goto fail;
+ }
s->cluster_cache = g_malloc(s->cluster_size);
s->cluster_data = g_malloc(s->cluster_size);
s->cluster_cache_offset = -1;
@@ -226,7 +239,7 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
fail:
g_free(s->l1_table);
- g_free(s->l2_cache);
+ qemu_vfree(s->l2_cache);
g_free(s->cluster_cache);
g_free(s->cluster_data);
return ret;
@@ -517,7 +530,10 @@ static coroutine_fn int qcow_co_readv(BlockDriverState *bs, int64_t sector_num,
void *orig_buf;
if (qiov->niov > 1) {
- buf = orig_buf = qemu_blockalign(bs, qiov->size);
+ buf = orig_buf = qemu_try_blockalign(bs, qiov->size);
+ if (buf == NULL) {
+ return -ENOMEM;
+ }
} else {
orig_buf = NULL;
buf = (uint8_t *)qiov->iov->iov_base;
@@ -619,7 +635,10 @@ static coroutine_fn int qcow_co_writev(BlockDriverState *bs, int64_t sector_num,
s->cluster_cache_offset = -1; /* disable compressed cache */
if (qiov->niov > 1) {
- buf = orig_buf = qemu_blockalign(bs, qiov->size);
+ buf = orig_buf = qemu_try_blockalign(bs, qiov->size);
+ if (buf == NULL) {
+ return -ENOMEM;
+ }
qemu_iovec_to_buf(qiov, 0, buf, qiov->size);
} else {
orig_buf = NULL;
@@ -685,7 +704,7 @@ static void qcow_close(BlockDriverState *bs)
BDRVQcowState *s = bs->opaque;
g_free(s->l1_table);
- g_free(s->l2_cache);
+ qemu_vfree(s->l2_cache);
g_free(s->cluster_cache);
g_free(s->cluster_data);
diff --git a/block/qcow2-cache.c b/block/qcow2-cache.c
index 8ecbb5bc00..5353b44828 100644
--- a/block/qcow2-cache.c
+++ b/block/qcow2-cache.c
@@ -53,10 +53,21 @@ Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables)
c->entries = g_malloc0(sizeof(*c->entries) * num_tables);
for (i = 0; i < c->size; i++) {
- c->entries[i].table = qemu_blockalign(bs, s->cluster_size);
+ c->entries[i].table = qemu_try_blockalign(bs->file, s->cluster_size);
+ if (c->entries[i].table == NULL) {
+ goto fail;
+ }
}
return c;
+
+fail:
+ for (i = 0; i < c->size; i++) {
+ qemu_vfree(c->entries[i].table);
+ }
+ g_free(c->entries);
+ g_free(c);
+ return NULL;
}
int qcow2_cache_destroy(BlockDriverState* bs, Qcow2Cache *c)
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index 4208dc08b5..5b36018b3e 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -72,14 +72,20 @@ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
#endif
new_l1_size2 = sizeof(uint64_t) * new_l1_size;
- new_l1_table = g_malloc0(align_offset(new_l1_size2, 512));
+ new_l1_table = qemu_try_blockalign(bs->file,
+ align_offset(new_l1_size2, 512));
+ if (new_l1_table == NULL) {
+ return -ENOMEM;
+ }
+ memset(new_l1_table, 0, align_offset(new_l1_size2, 512));
+
memcpy(new_l1_table, s->l1_table, s->l1_size * sizeof(uint64_t));
/* write new table (align to cluster) */
BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_ALLOC_TABLE);
new_l1_table_offset = qcow2_alloc_clusters(bs, new_l1_size2);
if (new_l1_table_offset < 0) {
- g_free(new_l1_table);
+ qemu_vfree(new_l1_table);
return new_l1_table_offset;
}
@@ -113,7 +119,7 @@ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
if (ret < 0) {
goto fail;
}
- g_free(s->l1_table);
+ qemu_vfree(s->l1_table);
old_l1_table_offset = s->l1_table_offset;
s->l1_table_offset = new_l1_table_offset;
s->l1_table = new_l1_table;
@@ -123,7 +129,7 @@ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
QCOW2_DISCARD_OTHER);
return 0;
fail:
- g_free(new_l1_table);
+ qemu_vfree(new_l1_table);
qcow2_free_clusters(bs, new_l1_table_offset, new_l1_size2,
QCOW2_DISCARD_OTHER);
return ret;
@@ -372,7 +378,10 @@ static int coroutine_fn copy_sectors(BlockDriverState *bs,
}
iov.iov_len = n * BDRV_SECTOR_SIZE;
- iov.iov_base = qemu_blockalign(bs, iov.iov_len);
+ iov.iov_base = qemu_try_blockalign(bs, iov.iov_len);
+ if (iov.iov_base == NULL) {
+ return -ENOMEM;
+ }
qemu_iovec_init_external(&qiov, &iov, 1);
@@ -702,7 +711,11 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m)
trace_qcow2_cluster_link_l2(qemu_coroutine_self(), m->nb_clusters);
assert(m->nb_clusters > 0);
- old_cluster = g_malloc(m->nb_clusters * sizeof(uint64_t));
+ old_cluster = g_try_malloc(m->nb_clusters * sizeof(uint64_t));
+ if (old_cluster == NULL) {
+ ret = -ENOMEM;
+ goto err;
+ }
/* copy content of unmodified sectors */
ret = perform_cow(bs, m, &m->cow_start);
@@ -1106,6 +1119,17 @@ static int handle_alloc(BlockDriverState *bs, uint64_t guest_offset,
return 0;
}
+ /* !*host_offset would overwrite the image header and is reserved for "no
+ * host offset preferred". If 0 was a valid host offset, it'd trigger the
+ * following overlap check; do that now to avoid having an invalid value in
+ * *host_offset. */
+ if (!alloc_cluster_offset) {
+ ret = qcow2_pre_write_overlap_check(bs, 0, alloc_cluster_offset,
+ nb_clusters * s->cluster_size);
+ assert(ret < 0);
+ goto fail;
+ }
+
/*
* Save info needed for meta data update.
*
@@ -1562,7 +1586,10 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
if (!is_active_l1) {
/* inactive L2 tables require a buffer to be stored in when loading
* them from disk */
- l2_table = qemu_blockalign(bs, s->cluster_size);
+ l2_table = qemu_try_blockalign(bs->file, s->cluster_size);
+ if (l2_table == NULL) {
+ return -ENOMEM;
+ }
}
for (i = 0; i < l1_size; i++) {
@@ -1740,7 +1767,11 @@ int qcow2_expand_zero_clusters(BlockDriverState *bs)
nb_clusters = size_to_clusters(s, bs->file->total_sectors *
BDRV_SECTOR_SIZE);
- expanded_clusters = g_malloc0((nb_clusters + 7) / 8);
+ expanded_clusters = g_try_malloc0((nb_clusters + 7) / 8);
+ if (expanded_clusters == NULL) {
+ ret = -ENOMEM;
+ goto fail;
+ }
ret = expand_zero_clusters_in_l1(bs, s->l1_table, s->l1_size,
&expanded_clusters, &nb_clusters);
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index cc6cf743d6..d60e2feb10 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -46,19 +46,25 @@ int qcow2_refcount_init(BlockDriverState *bs)
assert(s->refcount_table_size <= INT_MAX / sizeof(uint64_t));
refcount_table_size2 = s->refcount_table_size * sizeof(uint64_t);
- s->refcount_table = g_malloc(refcount_table_size2);
+ s->refcount_table = g_try_malloc(refcount_table_size2);
+
if (s->refcount_table_size > 0) {
+ if (s->refcount_table == NULL) {
+ ret = -ENOMEM;
+ goto fail;
+ }
BLKDBG_EVENT(bs->file, BLKDBG_REFTABLE_LOAD);
ret = bdrv_pread(bs->file, s->refcount_table_offset,
s->refcount_table, refcount_table_size2);
- if (ret != refcount_table_size2)
+ if (ret < 0) {
goto fail;
+ }
for(i = 0; i < s->refcount_table_size; i++)
be64_to_cpus(&s->refcount_table[i]);
}
return 0;
fail:
- return -ENOMEM;
+ return ret;
}
void qcow2_refcount_close(BlockDriverState *bs)
@@ -344,8 +350,14 @@ static int alloc_refcount_block(BlockDriverState *bs,
uint64_t meta_offset = (blocks_used * refcount_block_clusters) *
s->cluster_size;
uint64_t table_offset = meta_offset + blocks_clusters * s->cluster_size;
- uint16_t *new_blocks = g_malloc0(blocks_clusters * s->cluster_size);
- uint64_t *new_table = g_malloc0(table_size * sizeof(uint64_t));
+ uint64_t *new_table = g_try_malloc0(table_size * sizeof(uint64_t));
+ uint16_t *new_blocks = g_try_malloc0(blocks_clusters * s->cluster_size);
+
+ assert(table_size > 0 && blocks_clusters > 0);
+ if (new_table == NULL || new_blocks == NULL) {
+ ret = -ENOMEM;
+ goto fail_table;
+ }
/* Fill the new refcount table */
memcpy(new_table, s->refcount_table,
@@ -424,6 +436,7 @@ static int alloc_refcount_block(BlockDriverState *bs,
return -EAGAIN;
fail_table:
+ g_free(new_blocks);
g_free(new_table);
fail_block:
if (*refcount_block != NULL) {
@@ -847,7 +860,8 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
int64_t l1_table_offset, int l1_size, int addend)
{
BDRVQcowState *s = bs->opaque;
- uint64_t *l1_table, *l2_table, l2_offset, offset, l1_size2, l1_allocated;
+ uint64_t *l1_table, *l2_table, l2_offset, offset, l1_size2;
+ bool l1_allocated = false;
int64_t old_offset, old_l2_offset;
int i, j, l1_modified = 0, nb_csectors, refcount;
int ret;
@@ -862,8 +876,12 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
* l1_table_offset when it is the current s->l1_table_offset! Be careful
* when changing this! */
if (l1_table_offset != s->l1_table_offset) {
- l1_table = g_malloc0(align_offset(l1_size2, 512));
- l1_allocated = 1;
+ l1_table = g_try_malloc0(align_offset(l1_size2, 512));
+ if (l1_size2 && l1_table == NULL) {
+ ret = -ENOMEM;
+ goto fail;
+ }
+ l1_allocated = true;
ret = bdrv_pread(bs->file, l1_table_offset, l1_table, l1_size2);
if (ret < 0) {
@@ -875,7 +893,7 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
} else {
assert(l1_size == s->l1_size);
l1_table = s->l1_table;
- l1_allocated = 0;
+ l1_allocated = false;
}
for(i = 0; i < l1_size; i++) {
@@ -1197,7 +1215,11 @@ static int check_refcounts_l1(BlockDriverState *bs,
if (l1_size2 == 0) {
l1_table = NULL;
} else {
- l1_table = g_malloc(l1_size2);
+ l1_table = g_try_malloc(l1_size2);
+ if (l1_table == NULL) {
+ ret = -ENOMEM;
+ goto fail;
+ }
if (bdrv_pread(bs->file, l1_table_offset,
l1_table, l1_size2) != l1_size2)
goto fail;
@@ -1501,7 +1523,11 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
return -EFBIG;
}
- refcount_table = g_malloc0(nb_clusters * sizeof(uint16_t));
+ refcount_table = g_try_malloc0(nb_clusters * sizeof(uint16_t));
+ if (nb_clusters && refcount_table == NULL) {
+ res->check_errors++;
+ return -ENOMEM;
+ }
res->bfi.total_clusters =
size_to_clusters(s, bs->total_sectors * BDRV_SECTOR_SIZE);
@@ -1753,9 +1779,13 @@ int qcow2_check_metadata_overlap(BlockDriverState *bs, int ign, int64_t offset,
uint64_t l1_ofs = s->snapshots[i].l1_table_offset;
uint32_t l1_sz = s->snapshots[i].l1_size;
uint64_t l1_sz2 = l1_sz * sizeof(uint64_t);
- uint64_t *l1 = g_malloc(l1_sz2);
+ uint64_t *l1 = g_try_malloc(l1_sz2);
int ret;
+ if (l1_sz2 && l1 == NULL) {
+ return -ENOMEM;
+ }
+
ret = bdrv_pread(bs->file, l1_ofs, l1, l1_sz2);
if (ret < 0) {
g_free(l1);
diff --git a/block/qcow2-snapshot.c b/block/qcow2-snapshot.c
index 0aa9defbe2..f67b47282f 100644
--- a/block/qcow2-snapshot.c
+++ b/block/qcow2-snapshot.c
@@ -381,7 +381,12 @@ int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
sn->l1_table_offset = l1_table_offset;
sn->l1_size = s->l1_size;
- l1_table = g_malloc(s->l1_size * sizeof(uint64_t));
+ l1_table = g_try_malloc(s->l1_size * sizeof(uint64_t));
+ if (s->l1_size && l1_table == NULL) {
+ ret = -ENOMEM;
+ goto fail;
+ }
+
for(i = 0; i < s->l1_size; i++) {
l1_table[i] = cpu_to_be64(s->l1_table[i]);
}
@@ -499,7 +504,11 @@ int qcow2_snapshot_goto(BlockDriverState *bs, const char *snapshot_id)
* Decrease the refcount referenced by the old one only when the L1
* table is overwritten.
*/
- sn_l1_table = g_malloc0(cur_l1_bytes);
+ sn_l1_table = g_try_malloc0(cur_l1_bytes);
+ if (cur_l1_bytes && sn_l1_table == NULL) {
+ ret = -ENOMEM;
+ goto fail;
+ }
ret = bdrv_pread(bs->file, sn->l1_table_offset, sn_l1_table, sn_l1_bytes);
if (ret < 0) {
@@ -698,17 +707,21 @@ int qcow2_snapshot_load_tmp(BlockDriverState *bs,
return -EFBIG;
}
new_l1_bytes = sn->l1_size * sizeof(uint64_t);
- new_l1_table = g_malloc0(align_offset(new_l1_bytes, 512));
+ new_l1_table = qemu_try_blockalign(bs->file,
+ align_offset(new_l1_bytes, 512));
+ if (new_l1_table == NULL) {
+ return -ENOMEM;
+ }
ret = bdrv_pread(bs->file, sn->l1_table_offset, new_l1_table, new_l1_bytes);
if (ret < 0) {
error_setg(errp, "Failed to read l1 table for snapshot");
- g_free(new_l1_table);
+ qemu_vfree(new_l1_table);
return ret;
}
/* Switch the L1 table */
- g_free(s->l1_table);
+ qemu_vfree(s->l1_table);
s->l1_size = sn->l1_size;
s->l1_table_offset = sn->l1_table_offset;
diff --git a/block/qcow2.c b/block/qcow2.c
index 1e3ab6bd02..435e0e11d0 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -688,8 +688,13 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
if (s->l1_size > 0) {
- s->l1_table = g_malloc0(
+ s->l1_table = qemu_try_blockalign(bs->file,
align_offset(s->l1_size * sizeof(uint64_t), 512));
+ if (s->l1_table == NULL) {
+ error_setg(errp, "Could not allocate L1 table");
+ ret = -ENOMEM;
+ goto fail;
+ }
ret = bdrv_pread(bs->file, s->l1_table_offset, s->l1_table,
s->l1_size * sizeof(uint64_t));
if (ret < 0) {
@@ -704,11 +709,22 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
/* alloc L2 table/refcount block cache */
s->l2_table_cache = qcow2_cache_create(bs, L2_CACHE_SIZE);
s->refcount_block_cache = qcow2_cache_create(bs, REFCOUNT_CACHE_SIZE);
+ if (s->l2_table_cache == NULL || s->refcount_block_cache == NULL) {
+ error_setg(errp, "Could not allocate metadata caches");
+ ret = -ENOMEM;
+ goto fail;
+ }
s->cluster_cache = g_malloc(s->cluster_size);
/* one more sector for decompressed data alignment */
- s->cluster_data = qemu_blockalign(bs, QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size
- + 512);
+ s->cluster_data = qemu_try_blockalign(bs->file, QCOW_MAX_CRYPT_CLUSTERS
+ * s->cluster_size + 512);
+ if (s->cluster_data == NULL) {
+ error_setg(errp, "Could not allocate temporary cluster buffer");
+ ret = -ENOMEM;
+ goto fail;
+ }
+
s->cluster_cache_offset = -1;
s->flags = flags;
@@ -852,7 +868,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
cleanup_unknown_header_ext(bs);
qcow2_free_snapshots(bs);
qcow2_refcount_close(bs);
- g_free(s->l1_table);
+ qemu_vfree(s->l1_table);
/* else pre-write overlap checks in cache_destroy may crash */
s->l1_table = NULL;
if (s->l2_table_cache) {
@@ -1082,7 +1098,12 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
*/
if (!cluster_data) {
cluster_data =
- qemu_blockalign(bs, QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
+ qemu_try_blockalign(bs->file, QCOW_MAX_CRYPT_CLUSTERS
+ * s->cluster_size);
+ if (cluster_data == NULL) {
+ ret = -ENOMEM;
+ goto fail;
+ }
}
assert(cur_nr_sectors <=
@@ -1182,8 +1203,13 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
if (s->crypt_method) {
if (!cluster_data) {
- cluster_data = qemu_blockalign(bs, QCOW_MAX_CRYPT_CLUSTERS *
- s->cluster_size);
+ cluster_data = qemu_try_blockalign(bs->file,
+ QCOW_MAX_CRYPT_CLUSTERS
+ * s->cluster_size);
+ if (cluster_data == NULL) {
+ ret = -ENOMEM;
+ goto fail;
+ }
}
assert(hd_qiov.size <=
@@ -1270,7 +1296,7 @@ fail:
static void qcow2_close(BlockDriverState *bs)
{
BDRVQcowState *s = bs->opaque;
- g_free(s->l1_table);
+ qemu_vfree(s->l1_table);
/* else pre-write overlap checks in cache_destroy may crash */
s->l1_table = NULL;
@@ -1557,7 +1583,7 @@ static int preallocate(BlockDriverState *bs)
int ret;
QCowL2Meta *meta;
- nb_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
+ nb_sectors = bdrv_nb_sectors(bs);
offset = 0;
while (nb_sectors) {
@@ -1947,7 +1973,6 @@ static int qcow2_write_compressed(BlockDriverState *bs, int64_t sector_num,
/* align end of file to a sector boundary to ease reading with
sector based I/Os */
cluster_offset = bdrv_getlength(bs->file);
- cluster_offset = (cluster_offset + 511) & ~511;
bdrv_truncate(bs->file, cluster_offset);
return 0;
}
diff --git a/block/qed-check.c b/block/qed-check.c
index b473dcd61f..40a882cc93 100644
--- a/block/qed-check.c
+++ b/block/qed-check.c
@@ -227,8 +227,11 @@ int qed_check(BDRVQEDState *s, BdrvCheckResult *result, bool fix)
};
int ret;
- check.used_clusters = g_malloc0(((check.nclusters + 31) / 32) *
- sizeof(check.used_clusters[0]));
+ check.used_clusters = g_try_malloc0(((check.nclusters + 31) / 32) *
+ sizeof(check.used_clusters[0]));
+ if (check.nclusters && check.used_clusters == NULL) {
+ return -ENOMEM;
+ }
check.result->bfi.total_clusters =
(s->header.image_size + s->header.cluster_size - 1) /
diff --git a/block/qed.c b/block/qed.c
index 7944832181..ba395af76a 100644
--- a/block/qed.c
+++ b/block/qed.c
@@ -1240,7 +1240,11 @@ static void qed_aio_write_inplace(QEDAIOCB *acb, uint64_t offset, size_t len)
struct iovec *iov = acb->qiov->iov;
if (!iov->iov_base) {
- iov->iov_base = qemu_blockalign(acb->common.bs, iov->iov_len);
+ iov->iov_base = qemu_try_blockalign(acb->common.bs, iov->iov_len);
+ if (iov->iov_base == NULL) {
+ qed_aio_complete(acb, -ENOMEM);
+ return;
+ }
memset(iov->iov_base, 0, iov->iov_len);
}
}
diff --git a/block/raw-posix.c b/block/raw-posix.c
index 8e9758e920..1194eb00ad 100644
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -798,7 +798,11 @@ static ssize_t handle_aiocb_rw(RawPosixAIOData *aiocb)
* Ok, we have to do it the hard way, copy all segments into
* a single aligned buffer.
*/
- buf = qemu_blockalign(aiocb->bs, aiocb->aio_nbytes);
+ buf = qemu_try_blockalign(aiocb->bs, aiocb->aio_nbytes);
+ if (buf == NULL) {
+ return -ENOMEM;
+ }
+
if (aiocb->aio_type & QEMU_AIO_WRITE) {
char *p = buf;
int i;
diff --git a/block/rbd.c b/block/rbd.c
index 2b797d3e8b..4459102adf 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -617,7 +617,7 @@ static BlockDriverAIOCB *rbd_start_aio(BlockDriverState *bs,
RBDAIOCmd cmd)
{
RBDAIOCB *acb;
- RADOSCB *rcb;
+ RADOSCB *rcb = NULL;
rbd_completion_t c;
int64_t off, size;
char *buf;
@@ -631,7 +631,10 @@ static BlockDriverAIOCB *rbd_start_aio(BlockDriverState *bs,
if (cmd == RBD_AIO_DISCARD || cmd == RBD_AIO_FLUSH) {
acb->bounce = NULL;
} else {
- acb->bounce = qemu_blockalign(bs, qiov->size);
+ acb->bounce = qemu_try_blockalign(bs, qiov->size);
+ if (acb->bounce == NULL) {
+ goto failed;
+ }
}
acb->ret = 0;
acb->error = 0;
diff --git a/block/vdi.c b/block/vdi.c
index 197bd77c97..adc6aa9a5f 100644
--- a/block/vdi.c
+++ b/block/vdi.c
@@ -53,13 +53,6 @@
#include "block/block_int.h"
#include "qemu/module.h"
#include "migration/migration.h"
-#ifdef __linux__
-#include <linux/fs.h>
-#include <sys/ioctl.h>
-#ifndef FS_NOCOW_FL
-#define FS_NOCOW_FL 0x00800000 /* Do not cow file */
-#endif
-#endif
#if defined(CONFIG_UUID)
#include <uuid/uuid.h>
@@ -299,7 +292,12 @@ static int vdi_check(BlockDriverState *bs, BdrvCheckResult *res,
return -ENOTSUP;
}
- bmap = g_malloc(s->header.blocks_in_image * sizeof(uint32_t));
+ bmap = g_try_malloc(s->header.blocks_in_image * sizeof(uint32_t));
+ if (s->header.blocks_in_image && bmap == NULL) {
+ res->check_errors++;
+ return -ENOMEM;
+ }
+
memset(bmap, 0xff, s->header.blocks_in_image * sizeof(uint32_t));
/* Check block map and value of blocks_allocated. */
@@ -357,23 +355,23 @@ static int vdi_make_empty(BlockDriverState *bs)
static int vdi_probe(const uint8_t *buf, int buf_size, const char *filename)
{
const VdiHeader *header = (const VdiHeader *)buf;
- int result = 0;
+ int ret = 0;
logout("\n");
if (buf_size < sizeof(*header)) {
/* Header too small, no VDI. */
} else if (le32_to_cpu(header->signature) == VDI_SIGNATURE) {
- result = 100;
+ ret = 100;
}
- if (result == 0) {
+ if (ret == 0) {
logout("no vdi image\n");
} else {
logout("%s", header->text);
}
- return result;
+ return ret;
}
static int vdi_open(BlockDriverState *bs, QDict *options, int flags,
@@ -478,7 +476,12 @@ static int vdi_open(BlockDriverState *bs, QDict *options, int flags,
bmap_size = header.blocks_in_image * sizeof(uint32_t);
bmap_size = (bmap_size + SECTOR_SIZE - 1) / SECTOR_SIZE;
- s->bmap = g_malloc(bmap_size * SECTOR_SIZE);
+ s->bmap = qemu_try_blockalign(bs->file, bmap_size * SECTOR_SIZE);
+ if (s->bmap == NULL) {
+ ret = -ENOMEM;
+ goto fail;
+ }
+
ret = bdrv_read(bs->file, s->bmap_sector, (uint8_t *)s->bmap, bmap_size);
if (ret < 0) {
goto fail_free_bmap;
@@ -493,7 +496,7 @@ static int vdi_open(BlockDriverState *bs, QDict *options, int flags,
return 0;
fail_free_bmap:
- g_free(s->bmap);
+ qemu_vfree(s->bmap);
fail:
return ret;
@@ -681,8 +684,7 @@ static int vdi_co_write(BlockDriverState *bs,
static int vdi_create(const char *filename, QemuOpts *opts, Error **errp)
{
- int fd;
- int result = 0;
+ int ret = 0;
uint64_t bytes = 0;
uint32_t blocks;
size_t block_size = DEFAULT_CLUSTER_SIZE;
@@ -690,7 +692,10 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp)
VdiHeader header;
size_t i;
size_t bmap_size;
- bool nocow = false;
+ int64_t offset = 0;
+ Error *local_err = NULL;
+ BlockDriverState *bs = NULL;
+ uint32_t *bmap = NULL;
logout("\n");
@@ -707,37 +712,25 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp)
image_type = VDI_TYPE_STATIC;
}
#endif
- nocow = qemu_opt_get_bool_del(opts, BLOCK_OPT_NOCOW, false);
if (bytes > VDI_DISK_SIZE_MAX) {
- result = -ENOTSUP;
+ ret = -ENOTSUP;
error_setg(errp, "Unsupported VDI image size (size is 0x%" PRIx64
", max supported is 0x%" PRIx64 ")",
bytes, VDI_DISK_SIZE_MAX);
goto exit;
}
- fd = qemu_open(filename,
- O_WRONLY | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE,
- 0644);
- if (fd < 0) {
- result = -errno;
+ ret = bdrv_create_file(filename, opts, &local_err);
+ if (ret < 0) {
+ error_propagate(errp, local_err);
goto exit;
}
-
- if (nocow) {
-#ifdef __linux__
- /* Set NOCOW flag to solve performance issue on fs like btrfs.
- * This is an optimisation. The FS_IOC_SETFLAGS ioctl return value will
- * be ignored since any failure of this operation should not block the
- * left work.
- */
- int attr;
- if (ioctl(fd, FS_IOC_GETFLAGS, &attr) == 0) {
- attr |= FS_NOCOW_FL;
- ioctl(fd, FS_IOC_SETFLAGS, &attr);
- }
-#endif
+ ret = bdrv_open(&bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL,
+ NULL, &local_err);
+ if (ret < 0) {
+ error_propagate(errp, local_err);
+ goto exit;
}
/* We need enough blocks to store the given disk size,
@@ -769,13 +762,20 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp)
vdi_header_print(&header);
#endif
vdi_header_to_le(&header);
- if (write(fd, &header, sizeof(header)) < 0) {
- result = -errno;
- goto close_and_exit;
+ ret = bdrv_pwrite_sync(bs, offset, &header, sizeof(header));
+ if (ret < 0) {
+ error_setg(errp, "Error writing header to %s", filename);
+ goto exit;
}
+ offset += sizeof(header);
if (bmap_size > 0) {
- uint32_t *bmap = g_malloc0(bmap_size);
+ bmap = g_try_malloc0(bmap_size);
+ if (bmap == NULL) {
+ ret = -ENOMEM;
+ error_setg(errp, "Could not allocate bmap");
+ goto exit;
+ }
for (i = 0; i < blocks; i++) {
if (image_type == VDI_TYPE_STATIC) {
bmap[i] = i;
@@ -783,35 +783,33 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp)
bmap[i] = VDI_UNALLOCATED;
}
}
- if (write(fd, bmap, bmap_size) < 0) {
- result = -errno;
- g_free(bmap);
- goto close_and_exit;
+ ret = bdrv_pwrite_sync(bs, offset, bmap, bmap_size);
+ if (ret < 0) {
+ error_setg(errp, "Error writing bmap to %s", filename);
+ goto exit;
}
- g_free(bmap);
+ offset += bmap_size;
}
if (image_type == VDI_TYPE_STATIC) {
- if (ftruncate(fd, sizeof(header) + bmap_size + blocks * block_size)) {
- result = -errno;
- goto close_and_exit;
+ ret = bdrv_truncate(bs, offset + blocks * block_size);
+ if (ret < 0) {
+ error_setg(errp, "Failed to statically allocate %s", filename);
+ goto exit;
}
}
-close_and_exit:
- if ((close(fd) < 0) && !result) {
- result = -errno;
- }
-
exit:
- return result;
+ bdrv_unref(bs);
+ g_free(bmap);
+ return ret;
}
static void vdi_close(BlockDriverState *bs)
{
BDRVVdiState *s = bs->opaque;
- g_free(s->bmap);
+ qemu_vfree(s->bmap);
migrate_del_blocker(s->migration_blocker);
error_free(s->migration_blocker);
diff --git a/block/vhdx-endian.c b/block/vhdx-endian.c
index fe879ed995..0640d3f4a9 100644
--- a/block/vhdx-endian.c
+++ b/block/vhdx-endian.c
@@ -82,8 +82,6 @@ void vhdx_log_desc_le_import(VHDXLogDescriptor *d)
assert(d != NULL);
le32_to_cpus(&d->signature);
- le32_to_cpus(&d->trailing_bytes);
- le64_to_cpus(&d->leading_bytes);
le64_to_cpus(&d->file_offset);
le64_to_cpus(&d->sequence_number);
}
@@ -99,6 +97,15 @@ void vhdx_log_desc_le_export(VHDXLogDescriptor *d)
cpu_to_le64s(&d->sequence_number);
}
+void vhdx_log_data_le_import(VHDXLogDataSector *d)
+{
+ assert(d != NULL);
+
+ le32_to_cpus(&d->data_signature);
+ le32_to_cpus(&d->sequence_high);
+ le32_to_cpus(&d->sequence_low);
+}
+
void vhdx_log_data_le_export(VHDXLogDataSector *d)
{
assert(d != NULL);
diff --git a/block/vhdx-log.c b/block/vhdx-log.c
index a77c040ee0..eb5c7a097b 100644
--- a/block/vhdx-log.c
+++ b/block/vhdx-log.c
@@ -84,6 +84,7 @@ static int vhdx_log_peek_hdr(BlockDriverState *bs, VHDXLogEntries *log,
if (ret < 0) {
goto exit;
}
+ vhdx_log_entry_hdr_le_import(hdr);
exit:
return ret;
@@ -211,7 +212,7 @@ static bool vhdx_log_hdr_is_valid(VHDXLogEntries *log, VHDXLogEntryHeader *hdr,
{
int valid = false;
- if (memcmp(&hdr->signature, "loge", 4)) {
+ if (hdr->signature != VHDX_LOG_SIGNATURE) {
goto exit;
}
@@ -275,12 +276,12 @@ static bool vhdx_log_desc_is_valid(VHDXLogDescriptor *desc,
goto exit;
}
- if (!memcmp(&desc->signature, "zero", 4)) {
+ if (desc->signature == VHDX_LOG_ZERO_SIGNATURE) {
if (desc->zero_length % VHDX_LOG_SECTOR_SIZE == 0) {
/* valid */
ret = true;
}
- } else if (!memcmp(&desc->signature, "desc", 4)) {
+ } else if (desc->signature == VHDX_LOG_DESC_SIGNATURE) {
/* valid */
ret = true;
}
@@ -327,13 +328,15 @@ static int vhdx_compute_desc_sectors(uint32_t desc_cnt)
* passed into this function. Each descriptor will also be validated,
* and error returned if any are invalid. */
static int vhdx_log_read_desc(BlockDriverState *bs, BDRVVHDXState *s,
- VHDXLogEntries *log, VHDXLogDescEntries **buffer)
+ VHDXLogEntries *log, VHDXLogDescEntries **buffer,
+ bool convert_endian)
{
int ret = 0;
uint32_t desc_sectors;
uint32_t sectors_read;
VHDXLogEntryHeader hdr;
VHDXLogDescEntries *desc_entries = NULL;
+ VHDXLogDescriptor desc;
int i;
assert(*buffer == NULL);
@@ -342,14 +345,19 @@ static int vhdx_log_read_desc(BlockDriverState *bs, BDRVVHDXState *s,
if (ret < 0) {
goto exit;
}
- vhdx_log_entry_hdr_le_import(&hdr);
+
if (vhdx_log_hdr_is_valid(log, &hdr, s) == false) {
ret = -EINVAL;
goto exit;
}
desc_sectors = vhdx_compute_desc_sectors(hdr.descriptor_count);
- desc_entries = qemu_blockalign(bs, desc_sectors * VHDX_LOG_SECTOR_SIZE);
+ desc_entries = qemu_try_blockalign(bs->file,
+ desc_sectors * VHDX_LOG_SECTOR_SIZE);
+ if (desc_entries == NULL) {
+ ret = -ENOMEM;
+ goto exit;
+ }
ret = vhdx_log_read_sectors(bs, log, &sectors_read, desc_entries,
desc_sectors, false);
@@ -363,12 +371,19 @@ static int vhdx_log_read_desc(BlockDriverState *bs, BDRVVHDXState *s,
/* put in proper endianness, and validate each desc */
for (i = 0; i < hdr.descriptor_count; i++) {
- vhdx_log_desc_le_import(&desc_entries->desc[i]);
- if (vhdx_log_desc_is_valid(&desc_entries->desc[i], &hdr) == false) {
+ desc = desc_entries->desc[i];
+ vhdx_log_desc_le_import(&desc);
+ if (convert_endian) {
+ desc_entries->desc[i] = desc;
+ }
+ if (vhdx_log_desc_is_valid(&desc, &hdr) == false) {
ret = -EINVAL;
goto free_and_exit;
}
}
+ if (convert_endian) {
+ desc_entries->hdr = hdr;
+ }
*buffer = desc_entries;
goto exit;
@@ -403,7 +418,7 @@ static int vhdx_log_flush_desc(BlockDriverState *bs, VHDXLogDescriptor *desc,
buffer = qemu_blockalign(bs, VHDX_LOG_SECTOR_SIZE);
- if (!memcmp(&desc->signature, "desc", 4)) {
+ if (desc->signature == VHDX_LOG_DESC_SIGNATURE) {
/* data sector */
if (data == NULL) {
ret = -EFAULT;
@@ -431,10 +446,15 @@ static int vhdx_log_flush_desc(BlockDriverState *bs, VHDXLogDescriptor *desc,
memcpy(buffer+offset, &desc->trailing_bytes, 4);
- } else if (!memcmp(&desc->signature, "zero", 4)) {
+ } else if (desc->signature == VHDX_LOG_ZERO_SIGNATURE) {
/* write 'count' sectors of sector */
memset(buffer, 0, VHDX_LOG_SECTOR_SIZE);
count = desc->zero_length / VHDX_LOG_SECTOR_SIZE;
+ } else {
+ error_report("Invalid VHDX log descriptor entry signature 0x%" PRIx32,
+ desc->signature);
+ ret = -EINVAL;
+ goto exit;
}
file_offset = desc->file_offset;
@@ -493,13 +513,13 @@ static int vhdx_log_flush(BlockDriverState *bs, BDRVVHDXState *s,
goto exit;
}
- ret = vhdx_log_read_desc(bs, s, &logs->log, &desc_entries);
+ ret = vhdx_log_read_desc(bs, s, &logs->log, &desc_entries, true);
if (ret < 0) {
goto exit;
}
for (i = 0; i < desc_entries->hdr.descriptor_count; i++) {
- if (!memcmp(&desc_entries->desc[i].signature, "desc", 4)) {
+ if (desc_entries->desc[i].signature == VHDX_LOG_DESC_SIGNATURE) {
/* data sector, so read a sector to flush */
ret = vhdx_log_read_sectors(bs, &logs->log, &sectors_read,
data, 1, false);
@@ -510,6 +530,7 @@ static int vhdx_log_flush(BlockDriverState *bs, BDRVVHDXState *s,
ret = -EINVAL;
goto exit;
}
+ vhdx_log_data_le_import(data);
}
ret = vhdx_log_flush_desc(bs, &desc_entries->desc[i], data);
@@ -558,9 +579,6 @@ static int vhdx_validate_log_entry(BlockDriverState *bs, BDRVVHDXState *s,
goto inc_and_exit;
}
- vhdx_log_entry_hdr_le_import(&hdr);
-
-
if (vhdx_log_hdr_is_valid(log, &hdr, s) == false) {
goto inc_and_exit;
}
@@ -573,13 +591,13 @@ static int vhdx_validate_log_entry(BlockDriverState *bs, BDRVVHDXState *s,
desc_sectors = vhdx_compute_desc_sectors(hdr.descriptor_count);
- /* Read desc sectors, and calculate log checksum */
+ /* Read all log sectors, and calculate log checksum */
total_sectors = hdr.entry_length / VHDX_LOG_SECTOR_SIZE;
/* read_desc() will increment the read idx */
- ret = vhdx_log_read_desc(bs, s, log, &desc_buffer);
+ ret = vhdx_log_read_desc(bs, s, log, &desc_buffer, false);
if (ret < 0) {
goto free_and_exit;
}
@@ -602,7 +620,7 @@ static int vhdx_validate_log_entry(BlockDriverState *bs, BDRVVHDXState *s,
}
}
crc ^= 0xffffffff;
- if (crc != desc_buffer->hdr.checksum) {
+ if (crc != hdr.checksum) {
goto free_and_exit;
}
@@ -962,7 +980,6 @@ static int vhdx_log_write(BlockDriverState *bs, BDRVVHDXState *s,
* last data sector */
vhdx_update_checksum(buffer, total_length,
offsetof(VHDXLogEntryHeader, checksum));
- cpu_to_le32s((uint32_t *)(buffer + 4));
/* now write to the log */
ret = vhdx_log_write_sectors(bs, &s->log, &sectors_written, buffer,
diff --git a/block/vhdx.c b/block/vhdx.c
index fedcf9f9ca..f666940db7 100644
--- a/block/vhdx.c
+++ b/block/vhdx.c
@@ -135,10 +135,8 @@ typedef struct VHDXSectorInfo {
* buf: buffer pointer
* size: size of buffer (must be > crc_offset+4)
*
- * Note: The resulting checksum is in the CPU endianness, not necessarily
- * in the file format endianness (LE). Any header export to disk should
- * make sure that vhdx_header_le_export() is used to convert to the
- * correct endianness
+ * Note: The buffer should have all multi-byte data in little-endian format,
+ * and the resulting checksum is in little endian format.
*/
uint32_t vhdx_update_checksum(uint8_t *buf, size_t size, int crc_offset)
{
@@ -149,6 +147,7 @@ uint32_t vhdx_update_checksum(uint8_t *buf, size_t size, int crc_offset)
memset(buf + crc_offset, 0, sizeof(crc));
crc = crc32c(0xffffffff, buf, size);
+ cpu_to_le32s(&crc);
memcpy(buf + crc_offset, &crc, sizeof(crc));
return crc;
@@ -300,7 +299,7 @@ static int vhdx_write_header(BlockDriverState *bs_file, VHDXHeader *hdr,
{
uint8_t *buffer = NULL;
int ret;
- VHDXHeader header_le;
+ VHDXHeader *header_le;
assert(bs_file != NULL);
assert(hdr != NULL);
@@ -321,11 +320,12 @@ static int vhdx_write_header(BlockDriverState *bs_file, VHDXHeader *hdr,
}
/* overwrite the actual VHDXHeader portion */
- memcpy(buffer, hdr, sizeof(VHDXHeader));
- hdr->checksum = vhdx_update_checksum(buffer, VHDX_HEADER_SIZE,
- offsetof(VHDXHeader, checksum));
- vhdx_header_le_export(hdr, &header_le);
- ret = bdrv_pwrite_sync(bs_file, offset, &header_le, sizeof(VHDXHeader));
+ header_le = (VHDXHeader *)buffer;
+ memcpy(header_le, hdr, sizeof(VHDXHeader));
+ vhdx_header_le_export(hdr, header_le);
+ vhdx_update_checksum(buffer, VHDX_HEADER_SIZE,
+ offsetof(VHDXHeader, checksum));
+ ret = bdrv_pwrite_sync(bs_file, offset, header_le, sizeof(VHDXHeader));
exit:
qemu_vfree(buffer);
@@ -432,13 +432,14 @@ static void vhdx_parse_header(BlockDriverState *bs, BDRVVHDXState *s,
}
/* copy over just the relevant portion that we need */
memcpy(header1, buffer, sizeof(VHDXHeader));
- vhdx_header_le_import(header1);
- if (vhdx_checksum_is_valid(buffer, VHDX_HEADER_SIZE, 4) &&
- !memcmp(&header1->signature, "head", 4) &&
- header1->version == 1) {
- h1_seq = header1->sequence_number;
- h1_valid = true;
+ if (vhdx_checksum_is_valid(buffer, VHDX_HEADER_SIZE, 4)) {
+ vhdx_header_le_import(header1);
+ if (header1->signature == VHDX_HEADER_SIGNATURE &&
+ header1->version == 1) {
+ h1_seq = header1->sequence_number;
+ h1_valid = true;
+ }
}
ret = bdrv_pread(bs->file, VHDX_HEADER2_OFFSET, buffer, VHDX_HEADER_SIZE);
@@ -447,13 +448,14 @@ static void vhdx_parse_header(BlockDriverState *bs, BDRVVHDXState *s,
}
/* copy over just the relevant portion that we need */
memcpy(header2, buffer, sizeof(VHDXHeader));
- vhdx_header_le_import(header2);
- if (vhdx_checksum_is_valid(buffer, VHDX_HEADER_SIZE, 4) &&
- !memcmp(&header2->signature, "head", 4) &&
- header2->version == 1) {
- h2_seq = header2->sequence_number;
- h2_valid = true;
+ if (vhdx_checksum_is_valid(buffer, VHDX_HEADER_SIZE, 4)) {
+ vhdx_header_le_import(header2);
+ if (header2->signature == VHDX_HEADER_SIGNATURE &&
+ header2->version == 1) {
+ h2_seq = header2->sequence_number;
+ h2_valid = true;
+ }
}
/* If there is only 1 valid header (or no valid headers), we
@@ -519,15 +521,21 @@ static int vhdx_open_region_tables(BlockDriverState *bs, BDRVVHDXState *s)
goto fail;
}
memcpy(&s->rt, buffer, sizeof(s->rt));
- vhdx_region_header_le_import(&s->rt);
offset += sizeof(s->rt);
- if (!vhdx_checksum_is_valid(buffer, VHDX_HEADER_BLOCK_SIZE, 4) ||
- memcmp(&s->rt.signature, "regi", 4)) {
+ if (!vhdx_checksum_is_valid(buffer, VHDX_HEADER_BLOCK_SIZE, 4)) {
ret = -EINVAL;
goto fail;
}
+ vhdx_region_header_le_import(&s->rt);
+
+ if (s->rt.signature != VHDX_REGION_SIGNATURE) {
+ ret = -EINVAL;
+ goto fail;
+ }
+
+
/* Per spec, maximum region table entry count is 2047 */
if (s->rt.entry_count > 2047) {
ret = -EINVAL;
@@ -630,7 +638,7 @@ static int vhdx_parse_metadata(BlockDriverState *bs, BDRVVHDXState *s)
vhdx_metadata_header_le_import(&s->metadata_hdr);
- if (memcmp(&s->metadata_hdr.signature, "metadata", 8)) {
+ if (s->metadata_hdr.signature != VHDX_METADATA_SIGNATURE) {
ret = -EINVAL;
goto exit;
}
@@ -950,7 +958,11 @@ static int vhdx_open(BlockDriverState *bs, QDict *options, int flags,
}
/* s->bat is freed in vhdx_close() */
- s->bat = qemu_blockalign(bs, s->bat_rt.length);
+ s->bat = qemu_try_blockalign(bs->file, s->bat_rt.length);
+ if (s->bat == NULL) {
+ ret = -ENOMEM;
+ goto fail;
+ }
ret = bdrv_pread(bs->file, s->bat_offset, s->bat, s->bat_rt.length);
if (ret < 0) {
@@ -1540,7 +1552,8 @@ exit:
*/
static int vhdx_create_bat(BlockDriverState *bs, BDRVVHDXState *s,
uint64_t image_size, VHDXImageType type,
- bool use_zero_blocks, VHDXRegionTableEntry *rt_bat)
+ bool use_zero_blocks, uint64_t file_offset,
+ uint32_t length)
{
int ret = 0;
uint64_t data_file_offset;
@@ -1555,7 +1568,7 @@ static int vhdx_create_bat(BlockDriverState *bs, BDRVVHDXState *s,
/* this gives a data start after BAT/bitmap entries, and well
* past any metadata entries (with a 4 MB buffer for future
* expansion */
- data_file_offset = rt_bat->file_offset + rt_bat->length + 5 * MiB;
+ data_file_offset = file_offset + length + 5 * MiB;
total_sectors = image_size >> s->logical_sector_size_bits;
if (type == VHDX_TYPE_DYNAMIC) {
@@ -1579,7 +1592,11 @@ static int vhdx_create_bat(BlockDriverState *bs, BDRVVHDXState *s,
use_zero_blocks ||
bdrv_has_zero_init(bs) == 0) {
/* for a fixed file, the default BAT entry is not zero */
- s->bat = g_malloc0(rt_bat->length);
+ s->bat = g_try_malloc0(length);
+ if (length && s->bat != NULL) {
+ ret = -ENOMEM;
+ goto exit;
+ }
block_state = type == VHDX_TYPE_FIXED ? PAYLOAD_BLOCK_FULLY_PRESENT :
PAYLOAD_BLOCK_NOT_PRESENT;
block_state = use_zero_blocks ? PAYLOAD_BLOCK_ZERO : block_state;
@@ -1594,7 +1611,7 @@ static int vhdx_create_bat(BlockDriverState *bs, BDRVVHDXState *s,
cpu_to_le64s(&s->bat[sinfo.bat_idx]);
sector_num += s->sectors_per_block;
}
- ret = bdrv_pwrite(bs, rt_bat->file_offset, s->bat, rt_bat->length);
+ ret = bdrv_pwrite(bs, file_offset, s->bat, length);
if (ret < 0) {
goto exit;
}
@@ -1626,6 +1643,8 @@ static int vhdx_create_new_region_table(BlockDriverState *bs,
int ret = 0;
uint32_t offset = 0;
void *buffer = NULL;
+ uint64_t bat_file_offset;
+ uint32_t bat_length;
BDRVVHDXState *s = NULL;
VHDXRegionTableHeader *region_table;
VHDXRegionTableEntry *rt_bat;
@@ -1674,19 +1693,26 @@ static int vhdx_create_new_region_table(BlockDriverState *bs,
rt_metadata->length = 1 * MiB; /* min size, and more than enough */
*metadata_offset = rt_metadata->file_offset;
+ bat_file_offset = rt_bat->file_offset;
+ bat_length = rt_bat->length;
+
+ vhdx_region_header_le_export(region_table);
+ vhdx_region_entry_le_export(rt_bat);
+ vhdx_region_entry_le_export(rt_metadata);
+
vhdx_update_checksum(buffer, VHDX_HEADER_BLOCK_SIZE,
offsetof(VHDXRegionTableHeader, checksum));
/* The region table gives us the data we need to create the BAT,
* so do that now */
- ret = vhdx_create_bat(bs, s, image_size, type, use_zero_blocks, rt_bat);
+ ret = vhdx_create_bat(bs, s, image_size, type, use_zero_blocks,
+ bat_file_offset, bat_length);
+ if (ret < 0) {
+ goto exit;
+ }
/* Now write out the region headers to disk */
- vhdx_region_header_le_export(region_table);
- vhdx_region_entry_le_export(rt_bat);
- vhdx_region_entry_le_export(rt_metadata);
-
ret = bdrv_pwrite(bs, VHDX_REGION_TABLE_OFFSET, buffer,
VHDX_HEADER_BLOCK_SIZE);
if (ret < 0) {
diff --git a/block/vhdx.h b/block/vhdx.h
index 5370010c59..b4a12a081e 100644
--- a/block/vhdx.h
+++ b/block/vhdx.h
@@ -435,6 +435,7 @@ void vhdx_header_le_import(VHDXHeader *h);
void vhdx_header_le_export(VHDXHeader *orig_h, VHDXHeader *new_h);
void vhdx_log_desc_le_import(VHDXLogDescriptor *d);
void vhdx_log_desc_le_export(VHDXLogDescriptor *d);
+void vhdx_log_data_le_import(VHDXLogDataSector *d);
void vhdx_log_data_le_export(VHDXLogDataSector *d);
void vhdx_log_entry_hdr_le_import(VHDXLogEntryHeader *hdr);
void vhdx_log_entry_hdr_le_export(VHDXLogEntryHeader *hdr);
diff --git a/block/vmdk.c b/block/vmdk.c
index 0517bbaf91..01412a8939 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -106,6 +106,7 @@ typedef struct VmdkExtent {
uint32_t l2_cache_counts[L2_CACHE_SIZE];
int64_t cluster_sectors;
+ int64_t next_cluster_sector;
char *type;
} VmdkExtent;
@@ -124,7 +125,6 @@ typedef struct BDRVVmdkState {
} BDRVVmdkState;
typedef struct VmdkMetaData {
- uint32_t offset;
unsigned int l1_index;
unsigned int l2_index;
unsigned int l2_offset;
@@ -397,6 +397,7 @@ static int vmdk_add_extent(BlockDriverState *bs,
{
VmdkExtent *extent;
BDRVVmdkState *s = bs->opaque;
+ int64_t length;
if (cluster_sectors > 0x200000) {
/* 0x200000 * 512Bytes = 1GB for one cluster is unrealistic */
@@ -412,6 +413,11 @@ static int vmdk_add_extent(BlockDriverState *bs,
return -EFBIG;
}
+ length = bdrv_getlength(file);
+ if (length < 0) {
+ return length;
+ }
+
s->extents = g_realloc(s->extents,
(s->num_extents + 1) * sizeof(VmdkExtent));
extent = &s->extents[s->num_extents];
@@ -427,6 +433,8 @@ static int vmdk_add_extent(BlockDriverState *bs,
extent->l1_entry_sectors = l2_size * cluster_sectors;
extent->l2_size = l2_size;
extent->cluster_sectors = flat ? sectors : cluster_sectors;
+ extent->next_cluster_sector =
+ ROUND_UP(DIV_ROUND_UP(length, BDRV_SECTOR_SIZE), cluster_sectors);
if (s->num_extents > 1) {
extent->end_sector = (*(extent - 1)).end_sector + extent->sectors;
@@ -448,7 +456,11 @@ static int vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent,
/* read the L1 table */
l1_size = extent->l1_size * sizeof(uint32_t);
- extent->l1_table = g_malloc(l1_size);
+ extent->l1_table = g_try_malloc(l1_size);
+ if (l1_size && extent->l1_table == NULL) {
+ return -ENOMEM;
+ }
+
ret = bdrv_pread(extent->file,
extent->l1_table_offset,
extent->l1_table,
@@ -464,7 +476,11 @@ static int vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent,
}
if (extent->l1_backup_table_offset) {
- extent->l1_backup_table = g_malloc(l1_size);
+ extent->l1_backup_table = g_try_malloc(l1_size);
+ if (l1_size && extent->l1_backup_table == NULL) {
+ ret = -ENOMEM;
+ goto fail_l1;
+ }
ret = bdrv_pread(extent->file,
extent->l1_backup_table_offset,
extent->l1_backup_table,
@@ -669,8 +685,7 @@ static int vmdk_open_vmdk4(BlockDriverState *bs,
if (le32_to_cpu(header.flags) & VMDK4_FLAG_RGD) {
l1_backup_offset = le64_to_cpu(header.rgd_offset) << 9;
}
- if (bdrv_getlength(file) <
- le64_to_cpu(header.grain_offset) * BDRV_SECTOR_SIZE) {
+ if (bdrv_nb_sectors(file) < le64_to_cpu(header.grain_offset)) {
error_setg(errp, "File truncated, expecting at least %" PRId64 " bytes",
(int64_t)(le64_to_cpu(header.grain_offset)
* BDRV_SECTOR_SIZE));
@@ -952,57 +967,97 @@ static void vmdk_refresh_limits(BlockDriverState *bs, Error **errp)
}
}
+/**
+ * get_whole_cluster
+ *
+ * Copy backing file's cluster that covers @sector_num, otherwise write zero,
+ * to the cluster at @cluster_sector_num.
+ *
+ * If @skip_start_sector < @skip_end_sector, the relative range
+ * [@skip_start_sector, @skip_end_sector) is not copied or written, and leave
+ * it for call to write user data in the request.
+ */
static int get_whole_cluster(BlockDriverState *bs,
- VmdkExtent *extent,
- uint64_t cluster_offset,
- uint64_t offset,
- bool allocate)
+ VmdkExtent *extent,
+ uint64_t cluster_sector_num,
+ uint64_t sector_num,
+ uint64_t skip_start_sector,
+ uint64_t skip_end_sector)
{
int ret = VMDK_OK;
- uint8_t *whole_grain = NULL;
+ int64_t cluster_bytes;
+ uint8_t *whole_grain;
+ /* For COW, align request sector_num to cluster start */
+ sector_num = QEMU_ALIGN_DOWN(sector_num, extent->cluster_sectors);
+ cluster_bytes = extent->cluster_sectors << BDRV_SECTOR_BITS;
+ whole_grain = qemu_blockalign(bs, cluster_bytes);
+
+ if (!bs->backing_hd) {
+ memset(whole_grain, 0, skip_start_sector << BDRV_SECTOR_BITS);
+ memset(whole_grain + (skip_end_sector << BDRV_SECTOR_BITS), 0,
+ cluster_bytes - (skip_end_sector << BDRV_SECTOR_BITS));
+ }
+
+ assert(skip_end_sector <= extent->cluster_sectors);
/* we will be here if it's first write on non-exist grain(cluster).
* try to read from parent image, if exist */
- if (bs->backing_hd) {
- whole_grain =
- qemu_blockalign(bs, extent->cluster_sectors << BDRV_SECTOR_BITS);
- if (!vmdk_is_cid_valid(bs)) {
- ret = VMDK_ERROR;
- goto exit;
- }
+ if (bs->backing_hd && !vmdk_is_cid_valid(bs)) {
+ ret = VMDK_ERROR;
+ goto exit;
+ }
- /* floor offset to cluster */
- offset -= offset % (extent->cluster_sectors * 512);
- ret = bdrv_read(bs->backing_hd, offset >> 9, whole_grain,
- extent->cluster_sectors);
+ /* Read backing data before skip range */
+ if (skip_start_sector > 0) {
+ if (bs->backing_hd) {
+ ret = bdrv_read(bs->backing_hd, sector_num,
+ whole_grain, skip_start_sector);
+ if (ret < 0) {
+ ret = VMDK_ERROR;
+ goto exit;
+ }
+ }
+ ret = bdrv_write(extent->file, cluster_sector_num, whole_grain,
+ skip_start_sector);
if (ret < 0) {
ret = VMDK_ERROR;
goto exit;
}
-
- /* Write grain only into the active image */
- ret = bdrv_write(extent->file, cluster_offset, whole_grain,
- extent->cluster_sectors);
+ }
+ /* Read backing data after skip range */
+ if (skip_end_sector < extent->cluster_sectors) {
+ if (bs->backing_hd) {
+ ret = bdrv_read(bs->backing_hd, sector_num + skip_end_sector,
+ whole_grain + (skip_end_sector << BDRV_SECTOR_BITS),
+ extent->cluster_sectors - skip_end_sector);
+ if (ret < 0) {
+ ret = VMDK_ERROR;
+ goto exit;
+ }
+ }
+ ret = bdrv_write(extent->file, cluster_sector_num + skip_end_sector,
+ whole_grain + (skip_end_sector << BDRV_SECTOR_BITS),
+ extent->cluster_sectors - skip_end_sector);
if (ret < 0) {
ret = VMDK_ERROR;
goto exit;
}
}
+
exit:
qemu_vfree(whole_grain);
return ret;
}
-static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data)
+static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data,
+ uint32_t offset)
{
- uint32_t offset;
- QEMU_BUILD_BUG_ON(sizeof(offset) != sizeof(m_data->offset));
- offset = cpu_to_le32(m_data->offset);
+ offset = cpu_to_le32(offset);
/* update L2 table */
if (bdrv_pwrite_sync(
extent->file,
((int64_t)m_data->l2_offset * 512)
- + (m_data->l2_index * sizeof(m_data->offset)),
+ + (m_data->l2_index * sizeof(offset)),
&offset, sizeof(offset)) < 0) {
return VMDK_ERROR;
}
@@ -1012,7 +1067,7 @@ static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data)
if (bdrv_pwrite_sync(
extent->file,
((int64_t)m_data->l2_offset * 512)
- + (m_data->l2_index * sizeof(m_data->offset)),
+ + (m_data->l2_index * sizeof(offset)),
&offset, sizeof(offset)) < 0) {
return VMDK_ERROR;
}
@@ -1024,17 +1079,41 @@ static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data)
return VMDK_OK;
}
+/**
+ * get_cluster_offset
+ *
+ * Look up cluster offset in extent file by sector number, and store in
+ * @cluster_offset.
+ *
+ * For flat extents, the start offset as parsed from the description file is
+ * returned.
+ *
+ * For sparse extents, look up in L1, L2 table. If allocate is true, return an
+ * offset for a new cluster and update L2 cache. If there is a backing file,
+ * COW is done before returning; otherwise, zeroes are written to the allocated
+ * cluster. Both COW and zero writing skips the sector range
+ * [@skip_start_sector, @skip_end_sector) passed in by caller, because caller
+ * has new data to write there.
+ *
+ * Returns: VMDK_OK if cluster exists and mapped in the image.
+ * VMDK_UNALLOC if cluster is not mapped and @allocate is false.
+ * VMDK_ERROR if failed.
+ */
static int get_cluster_offset(BlockDriverState *bs,
- VmdkExtent *extent,
- VmdkMetaData *m_data,
- uint64_t offset,
- int allocate,
- uint64_t *cluster_offset)
+ VmdkExtent *extent,
+ VmdkMetaData *m_data,
+ uint64_t offset,
+ bool allocate,
+ uint64_t *cluster_offset,
+ uint64_t skip_start_sector,
+ uint64_t skip_end_sector)
{
unsigned int l1_index, l2_offset, l2_index;
int min_index, i, j;
uint32_t min_count, *l2_table;
bool zeroed = false;
+ int64_t ret;
+ int32_t cluster_sector;
if (m_data) {
m_data->valid = 0;
@@ -1088,52 +1167,41 @@ static int get_cluster_offset(BlockDriverState *bs,
extent->l2_cache_counts[min_index] = 1;
found:
l2_index = ((offset >> 9) / extent->cluster_sectors) % extent->l2_size;
- *cluster_offset = le32_to_cpu(l2_table[l2_index]);
+ cluster_sector = le32_to_cpu(l2_table[l2_index]);
if (m_data) {
m_data->valid = 1;
m_data->l1_index = l1_index;
m_data->l2_index = l2_index;
- m_data->offset = *cluster_offset;
m_data->l2_offset = l2_offset;
m_data->l2_cache_entry = &l2_table[l2_index];
}
- if (extent->has_zero_grain && *cluster_offset == VMDK_GTE_ZEROED) {
+ if (extent->has_zero_grain && cluster_sector == VMDK_GTE_ZEROED) {
zeroed = true;
}
- if (!*cluster_offset || zeroed) {
+ if (!cluster_sector || zeroed) {
if (!allocate) {
return zeroed ? VMDK_ZEROED : VMDK_UNALLOC;
}
- /* Avoid the L2 tables update for the images that have snapshots. */
- *cluster_offset = bdrv_getlength(extent->file);
- if (!extent->compressed) {
- bdrv_truncate(
- extent->file,
- *cluster_offset + (extent->cluster_sectors << 9)
- );
- }
-
- *cluster_offset >>= 9;
- l2_table[l2_index] = cpu_to_le32(*cluster_offset);
+ cluster_sector = extent->next_cluster_sector;
+ extent->next_cluster_sector += extent->cluster_sectors;
/* First of all we write grain itself, to avoid race condition
* that may to corrupt the image.
* This problem may occur because of insufficient space on host disk
* or inappropriate VM shutdown.
*/
- if (get_whole_cluster(
- bs, extent, *cluster_offset, offset, allocate) == -1) {
- return VMDK_ERROR;
- }
-
- if (m_data) {
- m_data->offset = *cluster_offset;
+ ret = get_whole_cluster(bs, extent,
+ cluster_sector,
+ offset >> BDRV_SECTOR_BITS,
+ skip_start_sector, skip_end_sector);
+ if (ret) {
+ return ret;
}
}
- *cluster_offset <<= 9;
+ *cluster_offset = cluster_sector << BDRV_SECTOR_BITS;
return VMDK_OK;
}
@@ -1168,7 +1236,8 @@ static int64_t coroutine_fn vmdk_co_get_block_status(BlockDriverState *bs,
}
qemu_co_mutex_lock(&s->lock);
ret = get_cluster_offset(bs, extent, NULL,
- sector_num * 512, 0, &offset);
+ sector_num * 512, false, &offset,
+ 0, 0);
qemu_co_mutex_unlock(&s->lock);
switch (ret) {
@@ -1321,9 +1390,9 @@ static int vmdk_read(BlockDriverState *bs, int64_t sector_num,
if (!extent) {
return -EIO;
}
- ret = get_cluster_offset(
- bs, extent, NULL,
- sector_num << 9, 0, &cluster_offset);
+ ret = get_cluster_offset(bs, extent, NULL,
+ sector_num << 9, false, &cluster_offset,
+ 0, 0);
extent_begin_sector = extent->end_sector - extent->sectors;
extent_relative_sector_num = sector_num - extent_begin_sector;
index_in_cluster = extent_relative_sector_num % extent->cluster_sectors;
@@ -1404,12 +1473,17 @@ static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
if (!extent) {
return -EIO;
}
- ret = get_cluster_offset(
- bs,
- extent,
- &m_data,
- sector_num << 9, !extent->compressed,
- &cluster_offset);
+ extent_begin_sector = extent->end_sector - extent->sectors;
+ extent_relative_sector_num = sector_num - extent_begin_sector;
+ index_in_cluster = extent_relative_sector_num % extent->cluster_sectors;
+ n = extent->cluster_sectors - index_in_cluster;
+ if (n > nb_sectors) {
+ n = nb_sectors;
+ }
+ ret = get_cluster_offset(bs, extent, &m_data, sector_num << 9,
+ !(extent->compressed || zeroed),
+ &cluster_offset,
+ index_in_cluster, index_in_cluster + n);
if (extent->compressed) {
if (ret == VMDK_OK) {
/* Refuse write to allocated cluster for streamOptimized */
@@ -1418,24 +1492,13 @@ static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
return -EIO;
} else {
/* allocate */
- ret = get_cluster_offset(
- bs,
- extent,
- &m_data,
- sector_num << 9, 1,
- &cluster_offset);
+ ret = get_cluster_offset(bs, extent, &m_data, sector_num << 9,
+ true, &cluster_offset, 0, 0);
}
}
if (ret == VMDK_ERROR) {
return -EINVAL;
}
- extent_begin_sector = extent->end_sector - extent->sectors;
- extent_relative_sector_num = sector_num - extent_begin_sector;
- index_in_cluster = extent_relative_sector_num % extent->cluster_sectors;
- n = extent->cluster_sectors - index_in_cluster;
- if (n > nb_sectors) {
- n = nb_sectors;
- }
if (zeroed) {
/* Do zeroed write, buf is ignored */
if (extent->has_zero_grain &&
@@ -1443,9 +1506,9 @@ static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
n >= extent->cluster_sectors) {
n = extent->cluster_sectors;
if (!zero_dry_run) {
- m_data.offset = VMDK_GTE_ZEROED;
/* update L2 tables */
- if (vmdk_L2update(extent, &m_data) != VMDK_OK) {
+ if (vmdk_L2update(extent, &m_data, VMDK_GTE_ZEROED)
+ != VMDK_OK) {
return -EIO;
}
}
@@ -1461,7 +1524,9 @@ static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
}
if (m_data.valid) {
/* update L2 tables */
- if (vmdk_L2update(extent, &m_data) != VMDK_OK) {
+ if (vmdk_L2update(extent, &m_data,
+ cluster_offset >> BDRV_SECTOR_BITS)
+ != VMDK_OK) {
return -EIO;
}
}
@@ -1999,7 +2064,7 @@ static int vmdk_check(BlockDriverState *bs, BdrvCheckResult *result,
BDRVVmdkState *s = bs->opaque;
VmdkExtent *extent = NULL;
int64_t sector_num = 0;
- int64_t total_sectors = bdrv_getlength(bs) / BDRV_SECTOR_SIZE;
+ int64_t total_sectors = bdrv_nb_sectors(bs);
int ret;
uint64_t cluster_offset;
@@ -2020,7 +2085,7 @@ static int vmdk_check(BlockDriverState *bs, BdrvCheckResult *result,
}
ret = get_cluster_offset(bs, extent, NULL,
sector_num << BDRV_SECTOR_BITS,
- 0, &cluster_offset);
+ false, &cluster_offset, 0, 0);
if (ret == VMDK_ERROR) {
fprintf(stderr,
"ERROR: could not get cluster_offset for sector %"
diff --git a/block/vpc.c b/block/vpc.c
index 8b376a40be..055efc42d2 100644
--- a/block/vpc.c
+++ b/block/vpc.c
@@ -29,13 +29,6 @@
#if defined(CONFIG_UUID)
#include <uuid/uuid.h>
#endif
-#ifdef __linux__
-#include <linux/fs.h>
-#include <sys/ioctl.h>
-#ifndef FS_NOCOW_FL
-#define FS_NOCOW_FL 0x00800000 /* Do not cow file */
-#endif
-#endif
/**************************************************************/
@@ -276,7 +269,11 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
goto fail;
}
- s->pagetable = qemu_blockalign(bs, s->max_table_entries * 4);
+ s->pagetable = qemu_try_blockalign(bs->file, s->max_table_entries * 4);
+ if (s->pagetable == NULL) {
+ ret = -ENOMEM;
+ goto fail;
+ }
s->bat_offset = be64_to_cpu(dyndisk_header->table_offset);
@@ -656,39 +653,41 @@ static int calculate_geometry(int64_t total_sectors, uint16_t* cyls,
return 0;
}
-static int create_dynamic_disk(int fd, uint8_t *buf, int64_t total_sectors)
+static int create_dynamic_disk(BlockDriverState *bs, uint8_t *buf,
+ int64_t total_sectors)
{
VHDDynDiskHeader *dyndisk_header =
(VHDDynDiskHeader *) buf;
size_t block_size, num_bat_entries;
int i;
- int ret = -EIO;
+ int ret;
+ int64_t offset = 0;
// Write the footer (twice: at the beginning and at the end)
block_size = 0x200000;
num_bat_entries = (total_sectors + block_size / 512) / (block_size / 512);
- if (write(fd, buf, HEADER_SIZE) != HEADER_SIZE) {
+ ret = bdrv_pwrite_sync(bs, offset, buf, HEADER_SIZE);
+ if (ret) {
goto fail;
}
- if (lseek(fd, 1536 + ((num_bat_entries * 4 + 511) & ~511), SEEK_SET) < 0) {
- goto fail;
- }
- if (write(fd, buf, HEADER_SIZE) != HEADER_SIZE) {
+ offset = 1536 + ((num_bat_entries * 4 + 511) & ~511);
+ ret = bdrv_pwrite_sync(bs, offset, buf, HEADER_SIZE);
+ if (ret < 0) {
goto fail;
}
// Write the initial BAT
- if (lseek(fd, 3 * 512, SEEK_SET) < 0) {
- goto fail;
- }
+ offset = 3 * 512;
memset(buf, 0xFF, 512);
for (i = 0; i < (num_bat_entries * 4 + 511) / 512; i++) {
- if (write(fd, buf, 512) != 512) {
+ ret = bdrv_pwrite_sync(bs, offset, buf, 512);
+ if (ret < 0) {
goto fail;
}
+ offset += 512;
}
// Prepare the Dynamic Disk Header
@@ -709,39 +708,35 @@ static int create_dynamic_disk(int fd, uint8_t *buf, int64_t total_sectors)
dyndisk_header->checksum = be32_to_cpu(vpc_checksum(buf, 1024));
// Write the header
- if (lseek(fd, 512, SEEK_SET) < 0) {
- goto fail;
- }
+ offset = 512;
- if (write(fd, buf, 1024) != 1024) {
+ ret = bdrv_pwrite_sync(bs, offset, buf, 1024);
+ if (ret < 0) {
goto fail;
}
- ret = 0;
fail:
return ret;
}
-static int create_fixed_disk(int fd, uint8_t *buf, int64_t total_size)
+static int create_fixed_disk(BlockDriverState *bs, uint8_t *buf,
+ int64_t total_size)
{
- int ret = -EIO;
+ int ret;
/* Add footer to total size */
- total_size += 512;
- if (ftruncate(fd, total_size) != 0) {
- ret = -errno;
- goto fail;
- }
- if (lseek(fd, -512, SEEK_END) < 0) {
- goto fail;
- }
- if (write(fd, buf, HEADER_SIZE) != HEADER_SIZE) {
- goto fail;
+ total_size += HEADER_SIZE;
+
+ ret = bdrv_truncate(bs, total_size);
+ if (ret < 0) {
+ return ret;
}
- ret = 0;
+ ret = bdrv_pwrite_sync(bs, total_size - HEADER_SIZE, buf, HEADER_SIZE);
+ if (ret < 0) {
+ return ret;
+ }
- fail:
return ret;
}
@@ -750,7 +745,7 @@ static int vpc_create(const char *filename, QemuOpts *opts, Error **errp)
uint8_t buf[1024];
VHDFooter *footer = (VHDFooter *) buf;
char *disk_type_param;
- int fd, i;
+ int i;
uint16_t cyls = 0;
uint8_t heads = 0;
uint8_t secs_per_cyl = 0;
@@ -758,7 +753,8 @@ static int vpc_create(const char *filename, QemuOpts *opts, Error **errp)
int64_t total_size;
int disk_type;
int ret = -EIO;
- bool nocow = false;
+ Error *local_err = NULL;
+ BlockDriverState *bs = NULL;
/* Read out options */
total_size = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0);
@@ -775,28 +771,17 @@ static int vpc_create(const char *filename, QemuOpts *opts, Error **errp)
} else {
disk_type = VHD_DYNAMIC;
}
- nocow = qemu_opt_get_bool_del(opts, BLOCK_OPT_NOCOW, false);
- /* Create the file */
- fd = qemu_open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0644);
- if (fd < 0) {
- ret = -EIO;
+ ret = bdrv_create_file(filename, opts, &local_err);
+ if (ret < 0) {
+ error_propagate(errp, local_err);
goto out;
}
-
- if (nocow) {
-#ifdef __linux__
- /* Set NOCOW flag to solve performance issue on fs like btrfs.
- * This is an optimisation. The FS_IOC_SETFLAGS ioctl return value will
- * be ignored since any failure of this operation should not block the
- * left work.
- */
- int attr;
- if (ioctl(fd, FS_IOC_GETFLAGS, &attr) == 0) {
- attr |= FS_NOCOW_FL;
- ioctl(fd, FS_IOC_SETFLAGS, &attr);
- }
-#endif
+ ret = bdrv_open(&bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL,
+ NULL, &local_err);
+ if (ret < 0) {
+ error_propagate(errp, local_err);
+ goto out;
}
/*
@@ -810,7 +795,7 @@ static int vpc_create(const char *filename, QemuOpts *opts, Error **errp)
&secs_per_cyl))
{
ret = -EFBIG;
- goto fail;
+ goto out;
}
}
@@ -856,14 +841,13 @@ static int vpc_create(const char *filename, QemuOpts *opts, Error **errp)
footer->checksum = be32_to_cpu(vpc_checksum(buf, HEADER_SIZE));
if (disk_type == VHD_DYNAMIC) {
- ret = create_dynamic_disk(fd, buf, total_sectors);
+ ret = create_dynamic_disk(bs, buf, total_sectors);
} else {
- ret = create_fixed_disk(fd, buf, total_size);
+ ret = create_fixed_disk(bs, buf, total_size);
}
-fail:
- qemu_close(fd);
out:
+ bdrv_unref(bs);
g_free(disk_type_param);
return ret;
}
diff --git a/block/win32-aio.c b/block/win32-aio.c
index 8e417f70ae..5030e3274d 100644
--- a/block/win32-aio.c
+++ b/block/win32-aio.c
@@ -139,7 +139,10 @@ BlockDriverAIOCB *win32_aio_submit(BlockDriverState *bs,
waiocb->is_read = (type == QEMU_AIO_READ);
if (qiov->niov > 1) {
- waiocb->buf = qemu_blockalign(bs, qiov->size);
+ waiocb->buf = qemu_try_blockalign(bs, qiov->size);
+ if (waiocb->buf == NULL) {
+ goto out;
+ }
if (type & QEMU_AIO_WRITE) {
iov_to_buf(qiov->iov, qiov->niov, 0, waiocb->buf, qiov->size);
}
@@ -168,6 +171,7 @@ BlockDriverAIOCB *win32_aio_submit(BlockDriverState *bs,
out_dec_count:
aio->count--;
+out:
qemu_aio_release(waiocb);
return NULL;
}
diff --git a/configure b/configure
index f49e61888e..283c71cb7a 100755
--- a/configure
+++ b/configure
@@ -326,6 +326,7 @@ seccomp=""
glusterfs=""
glusterfs_discard="no"
glusterfs_zerofill="no"
+archipelago=""
virtio_blk_data_plane=""
gtk=""
gtkabi=""
@@ -1087,6 +1088,10 @@ for opt do
;;
--enable-glusterfs) glusterfs="yes"
;;
+ --disable-archipelago) archipelago="no"
+ ;;
+ --enable-archipelago) archipelago="yes"
+ ;;
--disable-virtio-blk-data-plane) virtio_blk_data_plane="no"
;;
--enable-virtio-blk-data-plane) virtio_blk_data_plane="yes"
@@ -1382,6 +1387,8 @@ Advanced options (experts only):
--enable-coroutine-pool enable coroutine freelist (better performance)
--enable-glusterfs enable GlusterFS backend
--disable-glusterfs disable GlusterFS backend
+ --enable-archipelago enable Archipelago backend
+ --disable-archipelago disable Archipelago backend
--enable-gcov enable test coverage analysis with gcov
--gcov=GCOV use specified gcov [$gcov_tool]
--disable-tpm disable TPM support
@@ -3072,6 +3079,33 @@ EOF
fi
fi
+
+##########################################
+# archipelago probe
+if test "$archipelago" != "no" ; then
+ cat > $TMPC <<EOF
+#include <stdio.h>
+#include <xseg/xseg.h>
+#include <xseg/protocol.h>
+int main(void) {
+ xseg_initialize();
+ return 0;
+}
+EOF
+ archipelago_libs=-lxseg
+ if compile_prog "" "$archipelago_libs"; then
+ archipelago="yes"
+ libs_tools="$archipelago_libs $libs_tools"
+ libs_softmmu="$archipelago_libs $libs_softmmu"
+ else
+ if test "$archipelago" = "yes" ; then
+ feature_not_found "Archipelago backend support" "Install libxseg devel"
+ fi
+ archipelago="no"
+ fi
+fi
+
+
##########################################
# glusterfs probe
if test "$glusterfs" != "no" ; then
@@ -3087,7 +3121,8 @@ if test "$glusterfs" != "no" ; then
fi
else
if test "$glusterfs" = "yes" ; then
- feature_not_found "GlusterFS backend support" "Install glusterfs-api devel"
+ feature_not_found "GlusterFS backend support" \
+ "Install glusterfs-api devel >= 3"
fi
glusterfs="no"
fi
@@ -3532,7 +3567,8 @@ EOF
spice_server_version=$($pkg_config --modversion spice-server)
else
if test "$spice" = "yes" ; then
- feature_not_found "spice" "Install spice-server and spice-protocol devel"
+ feature_not_found "spice" \
+ "Install spice-server(>=0.12.0) and spice-protocol(>=0.12.3) devel"
fi
spice="no"
fi
@@ -3563,7 +3599,7 @@ EOF
smartcard_nss="yes"
else
if test "$smartcard_nss" = "yes"; then
- feature_not_found "nss"
+ feature_not_found "nss" "Install nss devel >= 3.12.8"
fi
smartcard_nss="no"
fi
@@ -3579,7 +3615,7 @@ if test "$libusb" != "no" ; then
libs_softmmu="$libs_softmmu $libusb_libs"
else
if test "$libusb" = "yes"; then
- feature_not_found "libusb" "Install libusb devel"
+ feature_not_found "libusb" "Install libusb devel >= 1.0.13"
fi
libusb="no"
fi
@@ -4004,7 +4040,7 @@ if test "$libnfs" != "no" ; then
LIBS="$LIBS $libnfs_libs"
else
if test "$libnfs" = "yes" ; then
- feature_not_found "libnfs"
+ feature_not_found "libnfs" "Install libnfs devel >= 1.9.3"
fi
libnfs="no"
fi
@@ -4251,6 +4287,7 @@ echo "seccomp support $seccomp"
echo "coroutine backend $coroutine"
echo "coroutine pool $coroutine_pool"
echo "GlusterFS support $glusterfs"
+echo "Archipelago support $archipelago"
echo "virtio-blk-data-plane $virtio_blk_data_plane"
echo "gcov $gcov_tool"
echo "gcov enabled $gcov"
@@ -4689,6 +4726,11 @@ if test "$glusterfs_zerofill" = "yes" ; then
echo "CONFIG_GLUSTERFS_ZEROFILL=y" >> $config_host_mak
fi
+if test "$archipelago" = "yes" ; then
+ echo "CONFIG_ARCHIPELAGO=m" >> $config_host_mak
+ echo "ARCHIPELAGO_LIBS=$archipelago_libs" >> $config_host_mak
+fi
+
if test "$libssh2" = "yes" ; then
echo "CONFIG_LIBSSH2=m" >> $config_host_mak
echo "LIBSSH2_CFLAGS=$libssh2_cflags" >> $config_host_mak
diff --git a/cpu-exec.c b/cpu-exec.c
index cbc8067b37..c6aad742e1 100644
--- a/cpu-exec.c
+++ b/cpu-exec.c
@@ -18,6 +18,7 @@
*/
#include "config.h"
#include "cpu.h"
+#include "trace.h"
#include "disas/disas.h"
#include "tcg.h"
#include "qemu/atomic.h"
@@ -168,6 +169,9 @@ static inline tcg_target_ulong cpu_tb_exec(CPUState *cpu, uint8_t *tb_ptr)
#endif /* DEBUG_DISAS */
next_tb = tcg_qemu_tb_exec(env, tb_ptr);
+ trace_exec_tb_exit((void *) (next_tb & ~TB_EXIT_MASK),
+ next_tb & TB_EXIT_MASK);
+
if ((next_tb & TB_EXIT_MASK) > TB_EXIT_IDX1) {
/* We didn't start executing this TB (eg because the instruction
* counter hit zero); we must restore the guest PC to the address
@@ -208,6 +212,7 @@ static void cpu_exec_nocache(CPUArchState *env, int max_cycles,
max_cycles);
cpu->current_tb = tb;
/* execute the generated code */
+ trace_exec_tb_nocache(tb, tb->pc);
cpu_tb_exec(cpu, tb->tc_ptr);
cpu->current_tb = NULL;
tb_phys_invalidate(tb, -1);
@@ -749,6 +754,7 @@ int cpu_exec(CPUArchState *env)
cpu->current_tb = tb;
barrier();
if (likely(!cpu->exit_request)) {
+ trace_exec_tb(tb, tb->pc);
tc_ptr = tb->tc_ptr;
/* execute the generated code */
next_tb = cpu_tb_exec(cpu, tc_ptr);
diff --git a/docs/multiple-iothreads.txt b/docs/multiple-iothreads.txt
new file mode 100644
index 0000000000..40b8419916
--- /dev/null
+++ b/docs/multiple-iothreads.txt
@@ -0,0 +1,134 @@
+Copyright (c) 2014 Red Hat Inc.
+
+This work is licensed under the terms of the GNU GPL, version 2 or later. See
+the COPYING file in the top-level directory.
+
+
+This document explains the IOThread feature and how to write code that runs
+outside the QEMU global mutex.
+
+The main loop and IOThreads
+---------------------------
+QEMU is an event-driven program that can do several things at once using an
+event loop. The VNC server and the QMP monitor are both processed from the
+same event loop, which monitors their file descriptors until they become
+readable and then invokes a callback.
+
+The default event loop is called the main loop (see main-loop.c). It is
+possible to create additional event loop threads using -object
+iothread,id=my-iothread.
+
+Side note: The main loop and IOThread are both event loops but their code is
+not shared completely. Sometimes it is useful to remember that although they
+are conceptually similar they are currently not interchangeable.
+
+Why IOThreads are useful
+------------------------
+IOThreads allow the user to control the placement of work. The main loop is a
+scalability bottleneck on hosts with many CPUs. Work can be spread across
+several IOThreads instead of just one main loop. When set up correctly this
+can improve I/O latency and reduce jitter seen by the guest.
+
+The main loop is also deeply associated with the QEMU global mutex, which is a
+scalability bottleneck in itself. vCPU threads and the main loop use the QEMU
+global mutex to serialize execution of QEMU code. This mutex is necessary
+because a lot of QEMU's code historically was not thread-safe.
+
+The fact that all I/O processing is done in a single main loop and that the
+QEMU global mutex is contended by all vCPU threads and the main loop explain
+why it is desirable to place work into IOThreads.
+
+The experimental virtio-blk data-plane implementation has been benchmarked and
+shows these effects:
+ftp://public.dhe.ibm.com/linux/pdfs/KVM_Virtualized_IO_Performance_Paper.pdf
+
+How to program for IOThreads
+----------------------------
+The main difference between legacy code and new code that can run in an
+IOThread is dealing explicitly with the event loop object, AioContext
+(see include/block/aio.h). Code that only works in the main loop
+implicitly uses the main loop's AioContext. Code that supports running
+in IOThreads must be aware of its AioContext.
+
+AioContext supports the following services:
+ * File descriptor monitoring (read/write/error on POSIX hosts)
+ * Event notifiers (inter-thread signalling)
+ * Timers
+ * Bottom Halves (BH) deferred callbacks
+
+There are several old APIs that use the main loop AioContext:
+ * LEGACY qemu_aio_set_fd_handler() - monitor a file descriptor
+ * LEGACY qemu_aio_set_event_notifier() - monitor an event notifier
+ * LEGACY timer_new_ms() - create a timer
+ * LEGACY qemu_bh_new() - create a BH
+ * LEGACY qemu_aio_wait() - run an event loop iteration
+
+Since they implicitly work on the main loop they cannot be used in code that
+runs in an IOThread. They might cause a crash or deadlock if called from an
+IOThread since the QEMU global mutex is not held.
+
+Instead, use the AioContext functions directly (see include/block/aio.h):
+ * aio_set_fd_handler() - monitor a file descriptor
+ * aio_set_event_notifier() - monitor an event notifier
+ * aio_timer_new() - create a timer
+ * aio_bh_new() - create a BH
+ * aio_poll() - run an event loop iteration
+
+The AioContext can be obtained from the IOThread using
+iothread_get_aio_context() or for the main loop using qemu_get_aio_context().
+Code that takes an AioContext argument works both in IOThreads or the main
+loop, depending on which AioContext instance the caller passes in.
+
+How to synchronize with an IOThread
+-----------------------------------
+AioContext is not thread-safe so some rules must be followed when using file
+descriptors, event notifiers, timers, or BHs across threads:
+
+1. AioContext functions can be called safely from file descriptor, event
+notifier, timer, or BH callbacks invoked by the AioContext. No locking is
+necessary.
+
+2. Other threads wishing to access the AioContext must use
+aio_context_acquire()/aio_context_release() for mutual exclusion. Once the
+context is acquired no other thread can access it or run event loop iterations
+in this AioContext.
+
+aio_context_acquire()/aio_context_release() calls may be nested. This
+means you can call them if you're not sure whether #1 applies.
+
+There is currently no lock ordering rule if a thread needs to acquire multiple
+AioContexts simultaneously. Therefore, it is only safe for code holding the
+QEMU global mutex to acquire other AioContexts.
+
+Side note: the best way to schedule a function call across threads is to create
+a BH in the target AioContext beforehand and then call qemu_bh_schedule(). No
+acquire/release or locking is needed for the qemu_bh_schedule() call. But be
+sure to acquire the AioContext for aio_bh_new() if necessary.
+
+The relationship between AioContext and the block layer
+-------------------------------------------------------
+The AioContext originates from the QEMU block layer because it provides a
+scoped way of running event loop iterations until all work is done. This
+feature is used to complete all in-flight block I/O requests (see
+bdrv_drain_all()). Nowadays AioContext is a generic event loop that can be
+used by any QEMU subsystem.
+
+The block layer has support for AioContext integrated. Each BlockDriverState
+is associated with an AioContext using bdrv_set_aio_context() and
+bdrv_get_aio_context(). This allows block layer code to process I/O inside the
+right AioContext. Other subsystems may wish to follow a similar approach.
+
+Block layer code must therefore expect to run in an IOThread and avoid using
+old APIs that implicitly use the main loop. See the "How to program for
+IOThreads" above for information on how to do that.
+
+If main loop code such as a QMP function wishes to access a BlockDriverState it
+must first call aio_context_acquire(bdrv_get_aio_context(bs)) to ensure the
+IOThread does not run in parallel.
+
+Long-running jobs (usually in the form of coroutines) are best scheduled in the
+BlockDriverState's AioContext to avoid the need to acquire/release around each
+bdrv_*() call. Be aware that there is currently no mechanism to get notified
+when bdrv_set_aio_context() moves this BlockDriverState to a different
+AioContext (see bdrv_detach_aio_context()/bdrv_attach_aio_context()), so you
+may need to add this if you want to support long-running jobs.
diff --git a/docs/specs/qcow2.txt b/docs/specs/qcow2.txt
index 3f713a6447..cfbc8b070c 100644
--- a/docs/specs/qcow2.txt
+++ b/docs/specs/qcow2.txt
@@ -135,12 +135,12 @@ be stored. Each extension has a structure like the following:
Unless stated otherwise, each header extension type shall appear at most once
in the same image.
-The remaining space between the end of the header extension area and the end of
-the first cluster can be used for the backing file name. It is not allowed to
-store other data here, so that an implementation can safely modify the header
-and add extensions without harming data of compatible features that it
-doesn't support. Compatible features that need space for additional data can
-use a header extension.
+If the image has a backing file then the backing file name should be stored in
+the remaining space between the end of the header extension area and the end of
+the first cluster. It is not allowed to store other data here, so that an
+implementation can safely modify the header and add extensions without harming
+data of compatible features that it doesn't support. Compatible features that
+need space for additional data can use a header extension.
== Feature name table ==
diff --git a/docs/tracing.txt b/docs/tracing.txt
index c6ab1c11c1..2e035a5b3c 100644
--- a/docs/tracing.txt
+++ b/docs/tracing.txt
@@ -307,3 +307,43 @@ guard such computations and avoid its compilation when the event is disabled:
You can check both if the event has been disabled and is dynamically enabled at
the same time using the 'trace_event_get_state' routine (see header
"trace/control.h" for more information).
+
+=== "tcg" ===
+
+Guest code generated by TCG can be traced by defining an event with the "tcg"
+event property. Internally, this property generates two events:
+"<eventname>_trans" to trace the event at translation time, and
+"<eventname>_exec" to trace the event at execution time.
+
+Instead of using these two events, you should instead use the function
+"trace_<eventname>_tcg" during translation (TCG code generation). This function
+will automatically call "trace_<eventname>_trans", and will generate the
+necessary TCG code to call "trace_<eventname>_exec" during guest code execution.
+
+Events with the "tcg" property can be declared in the "trace-events" file with a
+mix of native and TCG types, and "trace_<eventname>_tcg" will gracefully forward
+them to the "<eventname>_trans" and "<eventname>_exec" events. Since TCG values
+are not known at translation time, these are ignored by the "<eventname>_trans"
+event. Because of this, the entry in the "trace-events" file needs two printing
+formats (separated by a comma):
+
+ tcg foo(uint8_t a1, TCGv_i32 a2) "a1=%d", "a1=%d a2=%d"
+
+For example:
+
+ #include "trace-tcg.h"
+
+ void some_disassembly_func (...)
+ {
+ uint8_t a1 = ...;
+ TCGv_i32 a2 = ...;
+ trace_foo_tcg(a1, a2);
+ }
+
+This will immediately call:
+
+ void trace_foo_trans(uint8_t a1);
+
+and will generate the TCG code to call:
+
+ void trace_foo(uint8_t a1, uint32_t a2);
diff --git a/hw/audio/intel-hda.c b/hw/audio/intel-hda.c
index aa49b47482..0ac911eada 100644
--- a/hw/audio/intel-hda.c
+++ b/hw/audio/intel-hda.c
@@ -187,6 +187,7 @@ struct IntelHDAState {
/* properties */
uint32_t debug;
uint32_t msi;
+ bool old_msi_addr;
};
#define TYPE_INTEL_HDA_GENERIC "intel-hda-generic"
@@ -1141,7 +1142,7 @@ static int intel_hda_init(PCIDevice *pci)
"intel-hda", 0x4000);
pci_register_bar(&d->pci, 0, 0, &d->mmio);
if (d->msi) {
- msi_init(&d->pci, 0x50, 1, true, false);
+ msi_init(&d->pci, d->old_msi_addr ? 0x50 : 0x60, 1, true, false);
}
hda_codec_bus_init(DEVICE(pci), &d->codecs, sizeof(d->codecs),
@@ -1236,6 +1237,7 @@ static const VMStateDescription vmstate_intel_hda = {
static Property intel_hda_properties[] = {
DEFINE_PROP_UINT32("debug", IntelHDAState, debug, 0),
DEFINE_PROP_UINT32("msi", IntelHDAState, msi, 1),
+ DEFINE_PROP_BOOL("old_msi_addr", IntelHDAState, old_msi_addr, false),
DEFINE_PROP_END_OF_LIST(),
};
diff --git a/hw/block/xen_disk.c b/hw/block/xen_disk.c
index aed5b5b3e9..a221d0bfca 100644
--- a/hw/block/xen_disk.c
+++ b/hw/block/xen_disk.c
@@ -589,6 +589,7 @@ static int blk_send_response_one(struct ioreq *ioreq)
break;
default:
dst = NULL;
+ return 0;
}
memcpy(dst, &resp, sizeof(resp));
blkdev->rings.common.rsp_prod_pvt++;
diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index 816c6d9b47..10b84d039b 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -1393,7 +1393,7 @@ build_rsdp(GArray *rsdp_table, GArray *linker, unsigned rsdt)
{
AcpiRsdpDescriptor *rsdp = acpi_data_push(rsdp_table, sizeof *rsdp);
- bios_linker_loader_alloc(linker, ACPI_BUILD_RSDP_FILE, 1,
+ bios_linker_loader_alloc(linker, ACPI_BUILD_RSDP_FILE, 16,
true /* fseg memory */);
memcpy(&rsdp->signature, "RSD PTR ", 8);
diff --git a/hw/i386/acpi-dsdt.dsl b/hw/i386/acpi-dsdt.dsl
index 6ba017014a..559f4b6653 100644
--- a/hw/i386/acpi-dsdt.dsl
+++ b/hw/i386/acpi-dsdt.dsl
@@ -302,7 +302,7 @@ DefinitionBlock (
/****************************************************************
* General purpose events
****************************************************************/
- External(\_SB.PCI0.MEMORY_HOPTLUG_DEVICE.MEMORY_SLOT_SCAN_METHOD, MethodObj)
+ External(\_SB.PCI0.MEMORY_HOTPLUG_DEVICE.MEMORY_SLOT_SCAN_METHOD, MethodObj)
Scope(\_GPE) {
Name(_HID, "ACPI0006")
@@ -321,7 +321,7 @@ DefinitionBlock (
}
Method(_E03) {
// Memory hotplug event
- \_SB.PCI0.MEMORY_HOPTLUG_DEVICE.MEMORY_SLOT_SCAN_METHOD()
+ \_SB.PCI0.MEMORY_HOTPLUG_DEVICE.MEMORY_SLOT_SCAN_METHOD()
}
Method(_L04) {
}
diff --git a/hw/i386/acpi-dsdt.hex.generated b/hw/i386/acpi-dsdt.hex.generated
index 6c8a1fcdfb..a21bf410e5 100644
--- a/hw/i386/acpi-dsdt.hex.generated
+++ b/hw/i386/acpi-dsdt.hex.generated
@@ -8,7 +8,7 @@ static unsigned char AcpiDsdtAmlCode[] = {
0x0,
0x0,
0x1,
-0x2e,
+0x1f,
0x42,
0x58,
0x50,
@@ -31,9 +31,9 @@ static unsigned char AcpiDsdtAmlCode[] = {
0x4e,
0x54,
0x4c,
-0x13,
-0x9,
-0x12,
+0x28,
+0x5,
+0x10,
0x20,
0x10,
0x49,
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 9e589825f0..8fa8d2f781 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -1066,35 +1066,6 @@ typedef struct PcRomPciInfo {
uint64_t w64_max;
} PcRomPciInfo;
-static void pc_fw_cfg_guest_info(PcGuestInfo *guest_info)
-{
- PcRomPciInfo *info;
- Object *pci_info;
- bool ambiguous = false;
-
- if (!guest_info->has_pci_info || !guest_info->fw_cfg) {
- return;
- }
- pci_info = object_resolve_path_type("", TYPE_PCI_HOST_BRIDGE, &ambiguous);
- g_assert(!ambiguous);
- if (!pci_info) {
- return;
- }
-
- info = g_malloc(sizeof *info);
- info->w32_min = cpu_to_le64(object_property_get_int(pci_info,
- PCI_HOST_PROP_PCI_HOLE_START, NULL));
- info->w32_max = cpu_to_le64(object_property_get_int(pci_info,
- PCI_HOST_PROP_PCI_HOLE_END, NULL));
- info->w64_min = cpu_to_le64(object_property_get_int(pci_info,
- PCI_HOST_PROP_PCI_HOLE64_START, NULL));
- info->w64_max = cpu_to_le64(object_property_get_int(pci_info,
- PCI_HOST_PROP_PCI_HOLE64_END, NULL));
- /* Pass PCI hole info to guest via a side channel.
- * Required so guest PCI enumeration does the right thing. */
- fw_cfg_add_file(guest_info->fw_cfg, "etc/pci-info", info, sizeof *info);
-}
-
typedef struct PcGuestInfoState {
PcGuestInfo info;
Notifier machine_done;
@@ -1106,7 +1077,6 @@ void pc_guest_info_machine_done(Notifier *notifier, void *data)
PcGuestInfoState *guest_info_state = container_of(notifier,
PcGuestInfoState,
machine_done);
- pc_fw_cfg_guest_info(&guest_info_state->info);
acpi_setup(&guest_info_state->info);
}
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index 4f22be85da..47ac1b528d 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -59,7 +59,6 @@ static const int ide_iobase[MAX_IDE_BUS] = { 0x1f0, 0x170 };
static const int ide_iobase2[MAX_IDE_BUS] = { 0x3f6, 0x376 };
static const int ide_irq[MAX_IDE_BUS] = { 14, 15 };
-static bool has_pci_info;
static bool has_acpi_build = true;
static int legacy_acpi_table_size;
static bool smbios_defaults = true;
@@ -166,7 +165,6 @@ static void pc_init1(MachineState *machine,
guest_info->has_acpi_build = has_acpi_build;
guest_info->legacy_acpi_table_size = legacy_acpi_table_size;
- guest_info->has_pci_info = has_pci_info;
guest_info->isapc_ram_fw = !pci_enabled;
guest_info->has_reserved_memory = has_reserved_memory;
@@ -340,7 +338,6 @@ static void pc_compat_1_7(MachineState *machine)
static void pc_compat_1_6(MachineState *machine)
{
pc_compat_1_7(machine);
- has_pci_info = false;
rom_file_has_mr = false;
has_acpi_build = false;
}
@@ -422,7 +419,6 @@ static void pc_init_pci_no_kvmclock(MachineState *machine)
static void pc_init_isa(MachineState *machine)
{
- has_pci_info = false;
has_acpi_build = false;
smbios_defaults = false;
gigabyte_align = false;
@@ -457,16 +453,28 @@ static void pc_xen_hvm_init(MachineState *machine)
.desc = "Standard PC (i440FX + PIIX, 1996)", \
.hot_add_cpu = pc_hot_add_cpu
-#define PC_I440FX_2_1_MACHINE_OPTIONS \
+#define PC_I440FX_2_2_MACHINE_OPTIONS \
PC_I440FX_MACHINE_OPTIONS, \
.default_machine_opts = "firmware=bios-256k.bin"
+static QEMUMachine pc_i440fx_machine_v2_2 = {
+ PC_I440FX_2_2_MACHINE_OPTIONS,
+ .name = "pc-i440fx-2.2",
+ .alias = "pc",
+ .init = pc_init_pci,
+ .is_default = 1,
+};
+
+#define PC_I440FX_2_1_MACHINE_OPTIONS PC_I440FX_2_2_MACHINE_OPTIONS
+
static QEMUMachine pc_i440fx_machine_v2_1 = {
PC_I440FX_2_1_MACHINE_OPTIONS,
.name = "pc-i440fx-2.1",
- .alias = "pc",
.init = pc_init_pci,
- .is_default = 1,
+ .compat_props = (GlobalProperty[]) {
+ PC_COMPAT_2_1,
+ { /* end of list */ }
+ },
};
#define PC_I440FX_2_0_MACHINE_OPTIONS PC_I440FX_2_1_MACHINE_OPTIONS
@@ -903,6 +911,7 @@ static QEMUMachine xenfv_machine = {
static void pc_machine_init(void)
{
+ qemu_register_pc_machine(&pc_i440fx_machine_v2_2);
qemu_register_pc_machine(&pc_i440fx_machine_v2_1);
qemu_register_pc_machine(&pc_i440fx_machine_v2_0);
qemu_register_pc_machine(&pc_i440fx_machine_v1_7);
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index c39ee98933..43350d7bcc 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -49,7 +49,6 @@
/* ICH9 AHCI has 6 ports */
#define MAX_SATA_PORTS 6
-static bool has_pci_info;
static bool has_acpi_build = true;
static bool smbios_defaults = true;
static bool smbios_legacy_mode;
@@ -150,7 +149,6 @@ static void pc_q35_init(MachineState *machine)
}
guest_info = pc_guest_info_init(below_4g_mem_size, above_4g_mem_size);
- guest_info->has_pci_info = has_pci_info;
guest_info->isapc_ram_fw = false;
guest_info->has_acpi_build = has_acpi_build;
guest_info->has_reserved_memory = has_reserved_memory;
@@ -296,7 +294,6 @@ static void pc_compat_1_7(MachineState *machine)
static void pc_compat_1_6(MachineState *machine)
{
pc_compat_1_7(machine);
- has_pci_info = false;
rom_file_has_mr = false;
has_acpi_build = false;
}
@@ -348,15 +345,27 @@ static void pc_q35_init_1_4(MachineState *machine)
.desc = "Standard PC (Q35 + ICH9, 2009)", \
.hot_add_cpu = pc_hot_add_cpu
-#define PC_Q35_2_1_MACHINE_OPTIONS \
+#define PC_Q35_2_2_MACHINE_OPTIONS \
PC_Q35_MACHINE_OPTIONS, \
.default_machine_opts = "firmware=bios-256k.bin"
+static QEMUMachine pc_q35_machine_v2_2 = {
+ PC_Q35_2_2_MACHINE_OPTIONS,
+ .name = "pc-q35-2.2",
+ .alias = "q35",
+ .init = pc_q35_init,
+};
+
+#define PC_Q35_2_1_MACHINE_OPTIONS PC_Q35_2_2_MACHINE_OPTIONS
+
static QEMUMachine pc_q35_machine_v2_1 = {
PC_Q35_2_1_MACHINE_OPTIONS,
.name = "pc-q35-2.1",
- .alias = "q35",
.init = pc_q35_init,
+ .compat_props = (GlobalProperty[]) {
+ PC_COMPAT_2_1,
+ { /* end of list */ }
+ },
};
#define PC_Q35_2_0_MACHINE_OPTIONS PC_Q35_2_1_MACHINE_OPTIONS
@@ -421,6 +430,7 @@ static QEMUMachine pc_q35_machine_v1_4 = {
static void pc_q35_machine_init(void)
{
+ qemu_register_pc_machine(&pc_q35_machine_v2_2);
qemu_register_pc_machine(&pc_q35_machine_v2_1);
qemu_register_pc_machine(&pc_q35_machine_v2_0);
qemu_register_pc_machine(&pc_q35_machine_v1_7);
diff --git a/hw/i386/q35-acpi-dsdt.dsl b/hw/i386/q35-acpi-dsdt.dsl
index 8c3eae73bf..054b035b08 100644
--- a/hw/i386/q35-acpi-dsdt.dsl
+++ b/hw/i386/q35-acpi-dsdt.dsl
@@ -410,7 +410,7 @@ DefinitionBlock (
/****************************************************************
* General purpose events
****************************************************************/
- External(\_SB.PCI0.MEMORY_HOPTLUG_DEVICE.MEMORY_SLOT_SCAN_METHOD, MethodObj)
+ External(\_SB.PCI0.MEMORY_HOTPLUG_DEVICE.MEMORY_SLOT_SCAN_METHOD, MethodObj)
Scope(\_GPE) {
Name(_HID, "ACPI0006")
@@ -425,7 +425,7 @@ DefinitionBlock (
}
Method(_E03) {
// Memory hotplug event
- \_SB.PCI0.MEMORY_HOPTLUG_DEVICE.MEMORY_SLOT_SCAN_METHOD()
+ \_SB.PCI0.MEMORY_HOTPLUG_DEVICE.MEMORY_SLOT_SCAN_METHOD()
}
Method(_L04) {
}
diff --git a/hw/i386/ssdt-mem.dsl b/hw/i386/ssdt-mem.dsl
index 8e17bd1f97..22ff5ddfc3 100644
--- a/hw/i386/ssdt-mem.dsl
+++ b/hw/i386/ssdt-mem.dsl
@@ -39,10 +39,10 @@ ACPI_EXTRACT_ALL_CODE ssdm_mem_aml
DefinitionBlock ("ssdt-mem.aml", "SSDT", 0x02, "BXPC", "CSSDT", 0x1)
{
- External(\_SB.PCI0.MEMORY_HOPTLUG_DEVICE.MEMORY_SLOT_CRS_METHOD, MethodObj)
- External(\_SB.PCI0.MEMORY_HOPTLUG_DEVICE.MEMORY_SLOT_STATUS_METHOD, MethodObj)
- External(\_SB.PCI0.MEMORY_HOPTLUG_DEVICE.MEMORY_SLOT_OST_METHOD, MethodObj)
- External(\_SB.PCI0.MEMORY_HOPTLUG_DEVICE.MEMORY_SLOT_PROXIMITY_METHOD, MethodObj)
+ External(\_SB.PCI0.MEMORY_HOTPLUG_DEVICE.MEMORY_SLOT_CRS_METHOD, MethodObj)
+ External(\_SB.PCI0.MEMORY_HOTPLUG_DEVICE.MEMORY_SLOT_STATUS_METHOD, MethodObj)
+ External(\_SB.PCI0.MEMORY_HOTPLUG_DEVICE.MEMORY_SLOT_OST_METHOD, MethodObj)
+ External(\_SB.PCI0.MEMORY_HOTPLUG_DEVICE.MEMORY_SLOT_PROXIMITY_METHOD, MethodObj)
Scope(\_SB) {
/* v------------------ DO NOT EDIT ------------------v */
@@ -58,19 +58,19 @@ DefinitionBlock ("ssdt-mem.aml", "SSDT", 0x02, "BXPC", "CSSDT", 0x1)
Name(_HID, EISAID("PNP0C80"))
Method(_CRS, 0) {
- Return(\_SB.PCI0.MEMORY_HOPTLUG_DEVICE.MEMORY_SLOT_CRS_METHOD(_UID))
+ Return(\_SB.PCI0.MEMORY_HOTPLUG_DEVICE.MEMORY_SLOT_CRS_METHOD(_UID))
}
Method(_STA, 0) {
- Return(\_SB.PCI0.MEMORY_HOPTLUG_DEVICE.MEMORY_SLOT_STATUS_METHOD(_UID))
+ Return(\_SB.PCI0.MEMORY_HOTPLUG_DEVICE.MEMORY_SLOT_STATUS_METHOD(_UID))
}
Method(_PXM, 0) {
- Return(\_SB.PCI0.MEMORY_HOPTLUG_DEVICE.MEMORY_SLOT_PROXIMITY_METHOD(_UID))
+ Return(\_SB.PCI0.MEMORY_HOTPLUG_DEVICE.MEMORY_SLOT_PROXIMITY_METHOD(_UID))
}
Method(_OST, 3) {
- \_SB.PCI0.MEMORY_HOPTLUG_DEVICE.MEMORY_SLOT_OST_METHOD(_UID, Arg0, Arg1, Arg2)
+ \_SB.PCI0.MEMORY_HOTPLUG_DEVICE.MEMORY_SLOT_OST_METHOD(_UID, Arg0, Arg1, Arg2)
}
}
}
diff --git a/hw/i386/ssdt-misc.dsl b/hw/i386/ssdt-misc.dsl
index d329b8ba57..0fd448000b 100644
--- a/hw/i386/ssdt-misc.dsl
+++ b/hw/i386/ssdt-misc.dsl
@@ -120,7 +120,7 @@ DefinitionBlock ("ssdt-misc.aml", "SSDT", 0x01, "BXPC", "BXSSDTSUSP", 0x1)
External(MEMORY_SLOT_NOTIFY_METHOD, MethodObj)
Scope(\_SB.PCI0) {
- Device(MEMORY_HOPTLUG_DEVICE) {
+ Device(MEMORY_HOTPLUG_DEVICE) {
Name(_HID, "PNP0A06")
Name(_UID, "Memory hotplug resources")
diff --git a/hw/mem/pc-dimm.c b/hw/mem/pc-dimm.c
index 08f49ed53b..5bfc5b7483 100644
--- a/hw/mem/pc-dimm.c
+++ b/hw/mem/pc-dimm.c
@@ -252,6 +252,12 @@ static void pc_dimm_realize(DeviceState *dev, Error **errp)
error_setg(errp, "'" PC_DIMM_MEMDEV_PROP "' property is not set");
return;
}
+ if (dimm->node >= nb_numa_nodes) {
+ error_setg(errp, "'DIMM property " PC_DIMM_NODE_PROP " has value %"
+ PRIu32 "' which exceeds the number of numa nodes: %d",
+ dimm->node, nb_numa_nodes);
+ return;
+ }
}
static MemoryRegion *pc_dimm_get_memory_region(PCDIMMDevice *dimm)
diff --git a/hw/misc/ivshmem.c b/hw/misc/ivshmem.c
index 2be4b86726..d3e1195066 100644
--- a/hw/misc/ivshmem.c
+++ b/hw/misc/ivshmem.c
@@ -483,8 +483,8 @@ static void ivshmem_read(void *opaque, const uint8_t * buf, int flags)
"ivshmem.bar2", s->ivshmem_size, map_ptr);
vmstate_register_ram(&s->ivshmem, DEVICE(s));
- IVSHMEM_DPRINTF("guest h/w addr = %" PRIu64 ", size = %" PRIu64 "\n",
- s->ivshmem_offset, s->ivshmem_size);
+ IVSHMEM_DPRINTF("guest h/w addr = %p, size = %" PRIu64 "\n",
+ map_ptr, s->ivshmem_size);
memory_region_add_subregion(&s->bar, 0, &s->ivshmem);
diff --git a/hw/net/e1000.c b/hw/net/e1000.c
index 0fc29a0ae3..a2c4608601 100644
--- a/hw/net/e1000.c
+++ b/hw/net/e1000.c
@@ -186,21 +186,31 @@ e1000_link_up(E1000State *s)
s->phy_reg[PHY_STATUS] |= MII_SR_LINK_STATUS;
}
+static bool
+have_autoneg(E1000State *s)
+{
+ return (s->compat_flags & E1000_FLAG_AUTONEG) &&
+ (s->phy_reg[PHY_CTRL] & MII_CR_AUTO_NEG_EN);
+}
+
static void
set_phy_ctrl(E1000State *s, int index, uint16_t val)
{
+ /* bits 0-5 reserved; MII_CR_[RESTART_AUTO_NEG,RESET] are self clearing */
+ s->phy_reg[PHY_CTRL] = val & ~(0x3f |
+ MII_CR_RESET |
+ MII_CR_RESTART_AUTO_NEG);
+
/*
* QEMU 1.3 does not support link auto-negotiation emulation, so if we
* migrate during auto negotiation, after migration the link will be
* down.
*/
- if (!(s->compat_flags & E1000_FLAG_AUTONEG)) {
- return;
- }
- if ((val & MII_CR_AUTO_NEG_EN) && (val & MII_CR_RESTART_AUTO_NEG)) {
+ if (have_autoneg(s) && (val & MII_CR_RESTART_AUTO_NEG)) {
e1000_link_down(s);
DBGOUT(PHY, "Start link auto negotiation\n");
- timer_mod(s->autoneg_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500);
+ timer_mod(s->autoneg_timer,
+ qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500);
}
}
@@ -223,13 +233,30 @@ static const char phy_regcap[0x20] = {
/* PHY_ID2 documented in 8254x_GBe_SDM.pdf, pp. 250 */
static const uint16_t phy_reg_init[] = {
- [PHY_CTRL] = 0x1140,
- [PHY_STATUS] = 0x794d, /* link initially up with not completed autoneg */
- [PHY_ID1] = 0x141, /* [PHY_ID2] configured per DevId, from e1000_reset() */
- [PHY_1000T_CTRL] = 0x0e00, [M88E1000_PHY_SPEC_CTRL] = 0x360,
- [M88E1000_EXT_PHY_SPEC_CTRL] = 0x0d60, [PHY_AUTONEG_ADV] = 0xde1,
- [PHY_LP_ABILITY] = 0x1e0, [PHY_1000T_STATUS] = 0x3c00,
+ [PHY_CTRL] = MII_CR_SPEED_SELECT_MSB |
+ MII_CR_FULL_DUPLEX |
+ MII_CR_AUTO_NEG_EN,
+
+ [PHY_STATUS] = MII_SR_EXTENDED_CAPS |
+ MII_SR_LINK_STATUS | /* link initially up */
+ MII_SR_AUTONEG_CAPS |
+ /* MII_SR_AUTONEG_COMPLETE: initially NOT completed */
+ MII_SR_PREAMBLE_SUPPRESS |
+ MII_SR_EXTENDED_STATUS |
+ MII_SR_10T_HD_CAPS |
+ MII_SR_10T_FD_CAPS |
+ MII_SR_100X_HD_CAPS |
+ MII_SR_100X_FD_CAPS,
+
+ [PHY_ID1] = 0x141,
+ /* [PHY_ID2] configured per DevId, from e1000_reset() */
+ [PHY_AUTONEG_ADV] = 0xde1,
+ [PHY_LP_ABILITY] = 0x1e0,
+ [PHY_1000T_CTRL] = 0x0e00,
+ [PHY_1000T_STATUS] = 0x3c00,
+ [M88E1000_PHY_SPEC_CTRL] = 0x360,
[M88E1000_PHY_SPEC_STATUS] = 0xac00,
+ [M88E1000_EXT_PHY_SPEC_CTRL] = 0x0d60,
};
static const uint32_t mac_reg_init[] = {
@@ -446,8 +473,9 @@ set_mdic(E1000State *s, int index, uint32_t val)
} else {
if (addr < NPHYWRITEOPS && phyreg_writeops[addr]) {
phyreg_writeops[addr](s, index, data);
+ } else {
+ s->phy_reg[addr] = data;
}
- s->phy_reg[addr] = data;
}
}
s->mac_reg[MDIC] = val | E1000_MDIC_READY;
@@ -848,14 +876,6 @@ receive_filter(E1000State *s, const uint8_t *buf, int size)
return 0;
}
-static bool
-have_autoneg(E1000State *s)
-{
- return (s->compat_flags & E1000_FLAG_AUTONEG) &&
- (s->phy_reg[PHY_CTRL] & MII_CR_AUTO_NEG_EN) &&
- (s->phy_reg[PHY_CTRL] & MII_CR_RESTART_AUTO_NEG);
-}
-
static void
e1000_set_link_status(NetClientState *nc)
{
diff --git a/hw/pci/msi.c b/hw/pci/msi.c
index a4a3040d4d..52d23130d9 100644
--- a/hw/pci/msi.c
+++ b/hw/pci/msi.c
@@ -291,7 +291,7 @@ void msi_notify(PCIDevice *dev, unsigned int vector)
"notify vector 0x%x"
" address: 0x%"PRIx64" data: 0x%"PRIx32"\n",
vector, msg.address, msg.data);
- stl_le_phys(&address_space_memory, msg.address, msg.data);
+ stl_le_phys(&dev->bus_master_as, msg.address, msg.data);
}
/* Normally called by pci_default_write_config(). */
diff --git a/hw/pci/msix.c b/hw/pci/msix.c
index 5c49bfc304..20ae47632f 100644
--- a/hw/pci/msix.c
+++ b/hw/pci/msix.c
@@ -439,7 +439,7 @@ void msix_notify(PCIDevice *dev, unsigned vector)
msg = msix_get_message(dev, vector);
- stl_le_phys(&address_space_memory, msg.address, msg.data);
+ stl_le_phys(&dev->bus_master_as, msg.address, msg.data);
}
void msix_reset(PCIDevice *dev)
diff --git a/hw/virtio/virtio-rng.c b/hw/virtio/virtio-rng.c
index 03fd04a1e5..e85a979754 100644
--- a/hw/virtio/virtio-rng.c
+++ b/hw/virtio/virtio-rng.c
@@ -16,6 +16,7 @@
#include "hw/virtio/virtio-rng.h"
#include "sysemu/rng.h"
#include "qom/object_interfaces.h"
+#include "trace.h"
static bool is_guest_ready(VirtIORNG *vrng)
{
@@ -24,6 +25,7 @@ static bool is_guest_ready(VirtIORNG *vrng)
&& (vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
return true;
}
+ trace_virtio_rng_guest_not_ready(vrng);
return false;
}
@@ -62,6 +64,7 @@ static void chr_read(void *opaque, const void *buf, size_t size)
offset += len;
virtqueue_push(vrng->vq, &elem, len);
+ trace_virtio_rng_pushed(vrng, len);
}
virtio_notify(vdev, vrng->vq);
}
@@ -81,6 +84,9 @@ static void virtio_rng_process(VirtIORNG *vrng)
quota = MIN((uint64_t)vrng->quota_remaining, (uint64_t)UINT32_MAX);
}
size = get_request_size(vrng->vq, quota);
+
+ trace_virtio_rng_request(vrng, size, quota);
+
size = MIN(vrng->quota_remaining, size);
if (size) {
rng_backend_request_entropy(vrng->rng, size, chr_read, vrng);
diff --git a/include/block/block.h b/include/block/block.h
index f08471d7e1..e94b701667 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -275,6 +275,7 @@ BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
const char *backing_file);
int bdrv_get_backing_file_depth(BlockDriverState *bs);
int bdrv_truncate(BlockDriverState *bs, int64_t offset);
+int64_t bdrv_nb_sectors(BlockDriverState *bs);
int64_t bdrv_getlength(BlockDriverState *bs);
int64_t bdrv_get_allocated_file_size(BlockDriverState *bs);
void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr);
@@ -454,6 +455,7 @@ void bdrv_img_create(const char *filename, const char *fmt,
size_t bdrv_opt_mem_align(BlockDriverState *bs);
void bdrv_set_guest_block_size(BlockDriverState *bs, int align);
void *qemu_blockalign(BlockDriverState *bs, size_t size);
+void *qemu_try_blockalign(BlockDriverState *bs, size_t size);
bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov);
struct HBitmapIter;
diff --git a/include/block/coroutine.h b/include/block/coroutine.h
index b408f96b82..b9b7f488c9 100644
--- a/include/block/coroutine.h
+++ b/include/block/coroutine.h
@@ -223,4 +223,15 @@ void coroutine_fn co_aio_sleep_ns(AioContext *ctx, QEMUClockType type,
* Note that this function clobbers the handlers for the file descriptor.
*/
void coroutine_fn yield_until_fd_readable(int fd);
+
+/**
+ * Add or subtract from the coroutine pool size
+ *
+ * The coroutine implementation keeps a pool of coroutines to be reused by
+ * qemu_coroutine_create(). This makes coroutine creation cheap. Heavy
+ * coroutine users should call this to reserve pool space. Call it again with
+ * a negative number to release pool space.
+ */
+void qemu_coroutine_adjust_pool_size(int n);
+
#endif /* QEMU_COROUTINE_H */
diff --git a/include/exec/helper-gen.h b/include/exec/helper-gen.h
index a04a0341e2..0d0da3aeb3 100644
--- a/include/exec/helper-gen.h
+++ b/include/exec/helper-gen.h
@@ -57,6 +57,8 @@ static inline void glue(gen_helper_, name)(dh_retvar_decl(ret) \
}
#include "helper.h"
+#include "trace/generated-helpers.h"
+#include "trace/generated-helpers-wrappers.h"
#include "tcg-runtime.h"
#undef DEF_HELPER_FLAGS_0
diff --git a/include/exec/helper-proto.h b/include/exec/helper-proto.h
index 828951c609..effdd4383a 100644
--- a/include/exec/helper-proto.h
+++ b/include/exec/helper-proto.h
@@ -27,6 +27,7 @@ dh_ctype(ret) HELPER(name) (dh_ctype(t1), dh_ctype(t2), dh_ctype(t3), \
dh_ctype(t4), dh_ctype(t5));
#include "helper.h"
+#include "trace/generated-helpers.h"
#include "tcg-runtime.h"
#undef DEF_HELPER_FLAGS_0
diff --git a/include/exec/helper-tcg.h b/include/exec/helper-tcg.h
index d704c81126..79fa3c8c81 100644
--- a/include/exec/helper-tcg.h
+++ b/include/exec/helper-tcg.h
@@ -36,6 +36,7 @@
| dh_sizemask(t5, 5) },
#include "helper.h"
+#include "trace/generated-helpers.h"
#include "tcg-runtime.h"
#undef DEF_HELPER_FLAGS_0
diff --git a/include/hw/acpi/pc-hotplug.h b/include/hw/acpi/pc-hotplug.h
index bf5157d7c3..b9db29576c 100644
--- a/include/hw/acpi/pc-hotplug.h
+++ b/include/hw/acpi/pc-hotplug.h
@@ -32,7 +32,7 @@
#define ACPI_MEMORY_HOTPLUG_IO_LEN 24
#define ACPI_MEMORY_HOTPLUG_BASE 0x0a00
-#define MEMORY_HOPTLUG_DEVICE MHPD
+#define MEMORY_HOTPLUG_DEVICE MHPD
#define MEMORY_SLOTS_NUMBER MDNR
#define MEMORY_HOTPLUG_IO_REGION HPMR
#define MEMORY_SLOT_ADDR_LOW MRBL
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index 863eefbf8c..0fca9e3fd7 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -85,7 +85,6 @@ typedef struct PcPciInfo {
#define ACPI_PM_PROP_GPE0_BLK_LEN "gpe0_blk_len"
struct PcGuestInfo {
- bool has_pci_info;
bool isapc_ram_fw;
hwaddr ram_size, ram_size_below_4g;
unsigned apic_id_limit;
@@ -300,7 +299,15 @@ int e820_add_entry(uint64_t, uint64_t, uint32_t);
int e820_get_num_entries(void);
bool e820_get_entry(int, uint32_t, uint64_t *, uint64_t *);
+#define PC_COMPAT_2_1 \
+ {\
+ .driver = "intel-hda",\
+ .property = "old_msi_addr",\
+ .value = "on",\
+ }
+
#define PC_COMPAT_2_0 \
+ PC_COMPAT_2_1, \
{\
.driver = "virtio-scsi-pci",\
.property = "any_layout",\
diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h
index 8480d523f0..9dd43fc2db 100644
--- a/include/qemu/osdep.h
+++ b/include/qemu/osdep.h
@@ -95,6 +95,7 @@ typedef signed int int_fast16_t;
#define qemu_printf printf
int qemu_daemon(int nochdir, int noclose);
+void *qemu_try_memalign(size_t alignment, size_t size);
void *qemu_memalign(size_t alignment, size_t size);
void *qemu_anon_ram_alloc(size_t size);
void qemu_vfree(void *ptr);
diff --git a/include/trace-tcg.h b/include/trace-tcg.h
new file mode 100644
index 0000000000..6f6bdbb44a
--- /dev/null
+++ b/include/trace-tcg.h
@@ -0,0 +1,7 @@
+#ifndef TRACE_TCG_H
+#define TRACE_TCG_H
+
+#include "trace/generated-tcg-tracers.h"
+#include "trace/generated-events.h"
+
+#endif /* TRACE_TCG_H */
diff --git a/include/trace.h b/include/trace.h
index c15f498128..44a1f1f8c7 100644
--- a/include/trace.h
+++ b/include/trace.h
@@ -2,5 +2,6 @@
#define TRACE_H
#include "trace/generated-tracers.h"
+#include "trace/generated-events.h"
#endif /* TRACE_H */
diff --git a/numa.c b/numa.c
index 7bf7834b7f..c78cec96a8 100644
--- a/numa.c
+++ b/numa.c
@@ -210,8 +210,8 @@ void set_numa_nodes(void)
numa_total += numa_info[i].node_mem;
}
if (numa_total != ram_size) {
- error_report("total memory for NUMA nodes (%" PRIu64 ")"
- " should equal RAM size (" RAM_ADDR_FMT ")",
+ error_report("total memory for NUMA nodes (0x%" PRIx64 ")"
+ " should equal RAM size (0x" RAM_ADDR_FMT ")",
numa_total, ram_size);
exit(1);
}
diff --git a/qapi/block-core.json b/qapi/block-core.json
index e378653a77..fb74c56e32 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -115,6 +115,8 @@
# @format-specific: #optional structure supplying additional format-specific
# information (since 1.7)
#
+# @nocow: #optional info of whether NOCOW flag is set or not. (since 2.2)
+#
# Since: 1.3
#
##
@@ -126,7 +128,8 @@
'*backing-filename': 'str', '*full-backing-filename': 'str',
'*backing-filename-format': 'str', '*snapshots': ['SnapshotInfo'],
'*backing-image': 'ImageInfo',
- '*format-specific': 'ImageInfoSpecific' } }
+ '*format-specific': 'ImageInfoSpecific',
+ '*nocow': 'bool' } }
##
# @ImageCheck:
@@ -194,6 +197,7 @@
# 'file', 'file', 'ftp', 'ftps', 'host_cdrom', 'host_device',
# 'host_floppy', 'http', 'https', 'nbd', 'parallels', 'qcow',
# 'qcow2', 'raw', 'tftp', 'vdi', 'vmdk', 'vpc', 'vvfat'
+# 2.2: 'archipelago'
#
# @backing_file: #optional the name of the backing file (for copy-on-write)
#
@@ -1143,7 +1147,7 @@
# Since: 2.0
##
{ 'enum': 'BlockdevDriver',
- 'data': [ 'file', 'host_device', 'host_cdrom', 'host_floppy',
+ 'data': [ 'archipelago', 'file', 'host_device', 'host_cdrom', 'host_floppy',
'http', 'https', 'ftp', 'ftps', 'tftp', 'vvfat', 'blkdebug',
'blkverify', 'bochs', 'cloop', 'cow', 'dmg', 'parallels', 'qcow',
'qcow2', 'qed', 'raw', 'vdi', 'vhdx', 'vmdk', 'vpc', 'quorum' ] }
@@ -1273,6 +1277,37 @@
'*pass-discard-snapshot': 'bool',
'*pass-discard-other': 'bool' } }
+
+##
+# @BlockdevOptionsArchipelago
+#
+# Driver specific block device options for Archipelago.
+#
+# @volume: Name of the Archipelago volume image
+#
+# @mport: #optional The port number on which mapperd is
+# listening. This is optional
+# and if not specified, QEMU will make Archipelago
+# use the default port (1001).
+#
+# @vport: #optional The port number on which vlmcd is
+# listening. This is optional
+# and if not specified, QEMU will make Archipelago
+# use the default port (501).
+#
+# @segment: #optional The name of the shared memory segment
+# Archipelago stack is using. This is optional
+# and if not specified, QEMU will make Archipelago
+# use the default value, 'archipelago'.
+# Since: 2.2
+##
+{ 'type': 'BlockdevOptionsArchipelago',
+ 'data': { 'volume': 'str',
+ '*mport': 'int',
+ '*vport': 'int',
+ '*segment': 'str' } }
+
+
##
# @BlkdebugEvent
#
@@ -1416,6 +1451,7 @@
'base': 'BlockdevOptionsBase',
'discriminator': 'driver',
'data': {
+ 'archipelago':'BlockdevOptionsArchipelago',
'file': 'BlockdevOptionsFile',
'host_device':'BlockdevOptionsFile',
'host_cdrom': 'BlockdevOptionsFile',
diff --git a/qdev-monitor.c b/qdev-monitor.c
index 81a4e9b27f..fb9ee24b3a 100644
--- a/qdev-monitor.c
+++ b/qdev-monitor.c
@@ -182,9 +182,10 @@ static const char *find_typename_by_alias(const char *alias)
int qdev_device_help(QemuOpts *opts)
{
+ Error *local_err = NULL;
const char *driver;
- Property *prop;
- ObjectClass *klass;
+ DevicePropertyInfoList *prop_list;
+ DevicePropertyInfoList *prop;
driver = qemu_opt_get(opts, "driver");
if (driver && is_help_option(driver)) {
@@ -196,35 +197,28 @@ int qdev_device_help(QemuOpts *opts)
return 0;
}
- klass = object_class_by_name(driver);
- if (!klass) {
+ if (!object_class_by_name(driver)) {
const char *typename = find_typename_by_alias(driver);
if (typename) {
driver = typename;
- klass = object_class_by_name(driver);
}
}
- if (!object_class_dynamic_cast(klass, TYPE_DEVICE)) {
- return 0;
+ prop_list = qmp_device_list_properties(driver, &local_err);
+ if (!prop_list) {
+ error_printf("%s\n", error_get_pretty(local_err));
+ error_free(local_err);
+ return 1;
}
- do {
- for (prop = DEVICE_CLASS(klass)->props; prop && prop->name; prop++) {
- /*
- * TODO Properties without a parser are just for dirty hacks.
- * qdev_prop_ptr is the only such PropertyInfo. It's marked
- * for removal. This conditional should be removed along with
- * it.
- */
- if (!prop->info->set) {
- continue; /* no way to set it, don't show */
- }
- error_printf("%s.%s=%s\n", driver, prop->name,
- prop->info->legacy_name ?: prop->info->name);
- }
- klass = object_class_get_parent(klass);
- } while (klass != object_class_by_name(TYPE_DEVICE));
+
+ for (prop = prop_list; prop; prop = prop->next) {
+ error_printf("%s.%s=%s\n", driver,
+ prop->value->name,
+ prop->value->type);
+ }
+
+ qapi_free_DevicePropertyInfoList(prop_list);
return 1;
}
diff --git a/qemu-coroutine.c b/qemu-coroutine.c
index 470852100a..bd574aa1b5 100644
--- a/qemu-coroutine.c
+++ b/qemu-coroutine.c
@@ -19,14 +19,14 @@
#include "block/coroutine_int.h"
enum {
- /* Maximum free pool size prevents holding too many freed coroutines */
- POOL_MAX_SIZE = 64,
+ POOL_DEFAULT_SIZE = 64,
};
/** Free list to speed up creation */
static QemuMutex pool_lock;
static QSLIST_HEAD(, Coroutine) pool = QSLIST_HEAD_INITIALIZER(pool);
static unsigned int pool_size;
+static unsigned int pool_max_size = POOL_DEFAULT_SIZE;
Coroutine *qemu_coroutine_create(CoroutineEntry *entry)
{
@@ -55,7 +55,7 @@ static void coroutine_delete(Coroutine *co)
{
if (CONFIG_COROUTINE_POOL) {
qemu_mutex_lock(&pool_lock);
- if (pool_size < POOL_MAX_SIZE) {
+ if (pool_size < pool_max_size) {
QSLIST_INSERT_HEAD(&pool, co, pool_next);
co->caller = NULL;
pool_size++;
@@ -137,3 +137,23 @@ void coroutine_fn qemu_coroutine_yield(void)
self->caller = NULL;
coroutine_swap(self, to);
}
+
+void qemu_coroutine_adjust_pool_size(int n)
+{
+ qemu_mutex_lock(&pool_lock);
+
+ pool_max_size += n;
+
+ /* Callers should never take away more than they added */
+ assert(pool_max_size >= POOL_DEFAULT_SIZE);
+
+ /* Trim oversized pool down to new max */
+ while (pool_size > pool_max_size) {
+ Coroutine *co = QSLIST_FIRST(&pool);
+ QSLIST_REMOVE_HEAD(&pool, pool_next);
+ pool_size--;
+ qemu_coroutine_delete(co);
+ }
+
+ qemu_mutex_unlock(&pool_lock);
+}
diff --git a/qemu-img.c b/qemu-img.c
index 19495bb594..18caa4b450 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -960,7 +960,6 @@ static int img_compare(int argc, char **argv)
int64_t sector_num = 0;
int64_t nb_sectors;
int c, pnum;
- uint64_t bs_sectors;
uint64_t progress_base;
for (;;) {
@@ -1022,10 +1021,20 @@ static int img_compare(int argc, char **argv)
buf1 = qemu_blockalign(bs1, IO_BUF_SIZE);
buf2 = qemu_blockalign(bs2, IO_BUF_SIZE);
- bdrv_get_geometry(bs1, &bs_sectors);
- total_sectors1 = bs_sectors;
- bdrv_get_geometry(bs2, &bs_sectors);
- total_sectors2 = bs_sectors;
+ total_sectors1 = bdrv_nb_sectors(bs1);
+ if (total_sectors1 < 0) {
+ error_report("Can't get size of %s: %s",
+ filename1, strerror(-total_sectors1));
+ ret = 4;
+ goto out;
+ }
+ total_sectors2 = bdrv_nb_sectors(bs2);
+ if (total_sectors2 < 0) {
+ error_report("Can't get size of %s: %s",
+ filename2, strerror(-total_sectors2));
+ ret = 4;
+ goto out;
+ }
total_sectors = MIN(total_sectors1, total_sectors2);
progress_base = MAX(total_sectors1, total_sectors2);
@@ -1187,7 +1196,7 @@ static int img_convert(int argc, char **argv)
BlockDriver *drv, *proto_drv;
BlockDriverState **bs = NULL, *out_bs = NULL;
int64_t total_sectors, nb_sectors, sector_num, bs_offset;
- uint64_t bs_sectors;
+ int64_t *bs_sectors = NULL;
uint8_t * buf = NULL;
size_t bufsectors = IO_BUF_SIZE / BDRV_SECTOR_SIZE;
const uint8_t *buf1;
@@ -1328,7 +1337,8 @@ static int img_convert(int argc, char **argv)
qemu_progress_print(0, 100);
- bs = g_malloc0(bs_n * sizeof(BlockDriverState *));
+ bs = g_new0(BlockDriverState *, bs_n);
+ bs_sectors = g_new(int64_t, bs_n);
total_sectors = 0;
for (bs_i = 0; bs_i < bs_n; bs_i++) {
@@ -1342,8 +1352,14 @@ static int img_convert(int argc, char **argv)
ret = -1;
goto out;
}
- bdrv_get_geometry(bs[bs_i], &bs_sectors);
- total_sectors += bs_sectors;
+ bs_sectors[bs_i] = bdrv_nb_sectors(bs[bs_i]);
+ if (bs_sectors[bs_i] < 0) {
+ error_report("Could not get size of %s: %s",
+ argv[optind + bs_i], strerror(-bs_sectors[bs_i]));
+ ret = -1;
+ goto out;
+ }
+ total_sectors += bs_sectors[bs_i];
}
if (sn_opts) {
@@ -1461,7 +1477,6 @@ static int img_convert(int argc, char **argv)
bs_i = 0;
bs_offset = 0;
- bdrv_get_geometry(bs[0], &bs_sectors);
/* increase bufsectors from the default 4096 (2M) if opt_transfer_length
* or discard_alignment of the out_bs is greater. Limit to 32768 (16MB)
@@ -1474,13 +1489,13 @@ static int img_convert(int argc, char **argv)
buf = qemu_blockalign(out_bs, bufsectors * BDRV_SECTOR_SIZE);
if (skip_create) {
- int64_t output_length = bdrv_getlength(out_bs);
- if (output_length < 0) {
+ int64_t output_sectors = bdrv_nb_sectors(out_bs);
+ if (output_sectors < 0) {
error_report("unable to get output image length: %s\n",
- strerror(-output_length));
+ strerror(-output_sectors));
ret = -1;
goto out;
- } else if (output_length < total_sectors << BDRV_SECTOR_BITS) {
+ } else if (output_sectors < total_sectors) {
error_report("output file is smaller than input file");
ret = -1;
goto out;
@@ -1528,19 +1543,19 @@ static int img_convert(int argc, char **argv)
buf2 = buf;
while (remainder > 0) {
int nlow;
- while (bs_num == bs_sectors) {
+ while (bs_num == bs_sectors[bs_i]) {
+ bs_offset += bs_sectors[bs_i];
bs_i++;
assert (bs_i < bs_n);
- bs_offset += bs_sectors;
- bdrv_get_geometry(bs[bs_i], &bs_sectors);
bs_num = 0;
/* printf("changing part: sector_num=%" PRId64 ", "
"bs_i=%d, bs_offset=%" PRId64 ", bs_sectors=%" PRId64
- "\n", sector_num, bs_i, bs_offset, bs_sectors); */
+ "\n", sector_num, bs_i, bs_offset, bs_sectors[bs_i]); */
}
- assert (bs_num < bs_sectors);
+ assert (bs_num < bs_sectors[bs_i]);
- nlow = (remainder > bs_sectors - bs_num) ? bs_sectors - bs_num : remainder;
+ nlow = remainder > bs_sectors[bs_i] - bs_num
+ ? bs_sectors[bs_i] - bs_num : remainder;
ret = bdrv_read(bs[bs_i], bs_num, buf2, nlow);
if (ret < 0) {
@@ -1601,14 +1616,13 @@ restart:
break;
}
- while (sector_num - bs_offset >= bs_sectors) {
+ while (sector_num - bs_offset >= bs_sectors[bs_i]) {
+ bs_offset += bs_sectors[bs_i];
bs_i ++;
assert (bs_i < bs_n);
- bs_offset += bs_sectors;
- bdrv_get_geometry(bs[bs_i], &bs_sectors);
/* printf("changing part: sector_num=%" PRId64 ", bs_i=%d, "
"bs_offset=%" PRId64 ", bs_sectors=%" PRId64 "\n",
- sector_num, bs_i, bs_offset, bs_sectors); */
+ sector_num, bs_i, bs_offset, bs_sectors[bs_i]); */
}
if ((out_baseimg || has_zero_init) &&
@@ -1661,7 +1675,7 @@ restart:
}
}
- n = MIN(n, bs_sectors - (sector_num - bs_offset));
+ n = MIN(n, bs_sectors[bs_i] - (sector_num - bs_offset));
sectors_read += n;
if (count_allocated_sectors) {
@@ -1719,6 +1733,7 @@ out:
}
g_free(bs);
}
+ g_free(bs_sectors);
fail_getopt:
g_free(options);
@@ -2418,9 +2433,9 @@ static int img_rebase(int argc, char **argv)
* the image is the same as the original one at any time.
*/
if (!unsafe) {
- uint64_t num_sectors;
- uint64_t old_backing_num_sectors;
- uint64_t new_backing_num_sectors = 0;
+ int64_t num_sectors;
+ int64_t old_backing_num_sectors;
+ int64_t new_backing_num_sectors = 0;
uint64_t sector;
int n;
uint8_t * buf_old;
@@ -2430,10 +2445,31 @@ static int img_rebase(int argc, char **argv)
buf_old = qemu_blockalign(bs, IO_BUF_SIZE);
buf_new = qemu_blockalign(bs, IO_BUF_SIZE);
- bdrv_get_geometry(bs, &num_sectors);
- bdrv_get_geometry(bs_old_backing, &old_backing_num_sectors);
+ num_sectors = bdrv_nb_sectors(bs);
+ if (num_sectors < 0) {
+ error_report("Could not get size of '%s': %s",
+ filename, strerror(-num_sectors));
+ ret = -1;
+ goto out;
+ }
+ old_backing_num_sectors = bdrv_nb_sectors(bs_old_backing);
+ if (old_backing_num_sectors < 0) {
+ char backing_name[1024];
+
+ bdrv_get_backing_filename(bs, backing_name, sizeof(backing_name));
+ error_report("Could not get size of '%s': %s",
+ backing_name, strerror(-old_backing_num_sectors));
+ ret = -1;
+ goto out;
+ }
if (bs_new_backing) {
- bdrv_get_geometry(bs_new_backing, &new_backing_num_sectors);
+ new_backing_num_sectors = bdrv_nb_sectors(bs_new_backing);
+ if (new_backing_num_sectors < 0) {
+ error_report("Could not get size of '%s': %s",
+ out_baseimg, strerror(-new_backing_num_sectors));
+ ret = -1;
+ goto out;
+ }
}
if (num_sectors != 0) {
diff --git a/qmp.c b/qmp.c
index 0d2553abf5..c6767c4df3 100644
--- a/qmp.c
+++ b/qmp.c
@@ -509,6 +509,7 @@ DevicePropertyInfoList *qmp_device_list_properties(const char *typename,
if (strcmp(prop->name, "type") == 0 ||
strcmp(prop->name, "realized") == 0 ||
strcmp(prop->name, "hotpluggable") == 0 ||
+ strcmp(prop->name, "hotplugged") == 0 ||
strcmp(prop->name, "parent_bus") == 0) {
continue;
}
diff --git a/scripts/simpletrace.py b/scripts/simpletrace.py
index 1aa9460e49..3916c6d14a 100755
--- a/scripts/simpletrace.py
+++ b/scripts/simpletrace.py
@@ -58,8 +58,8 @@ def read_record(edict, fobj):
rechdr = read_header(fobj, rec_header_fmt)
return get_record(edict, rechdr, fobj) # return tuple of record elements
-def read_trace_file(edict, fobj):
- """Deserialize trace records from a file, yielding record tuples (event_num, timestamp, pid, arg1, ..., arg6)."""
+def read_trace_header(fobj):
+ """Read and verify trace file header"""
header = read_header(fobj, log_header_fmt)
if header is None or \
header[0] != header_event_id or \
@@ -73,6 +73,8 @@ def read_trace_file(edict, fobj):
raise ValueError('Log format %d not supported with this QEMU release!'
% log_version)
+def read_trace_records(edict, fobj):
+ """Deserialize trace records from a file, yielding record tuples (event_num, timestamp, pid, arg1, ..., arg6)."""
while True:
rec = read_record(edict, fobj)
if rec is None:
@@ -102,13 +104,16 @@ class Analyzer(object):
"""Called at the end of the trace."""
pass
-def process(events, log, analyzer):
+def process(events, log, analyzer, read_header=True):
"""Invoke an analyzer on each event in a log."""
if isinstance(events, str):
events = _read_events(open(events, 'r'))
if isinstance(log, str):
log = open(log, 'rb')
+ if read_header:
+ read_trace_header(log)
+
dropped_event = Event.build("Dropped_Event(uint64_t num_events_dropped)")
edict = {dropped_event_id: dropped_event}
@@ -137,7 +142,7 @@ def process(events, log, analyzer):
analyzer.begin()
fn_cache = {}
- for rec in read_trace_file(edict, log):
+ for rec in read_trace_records(edict, log):
event_num = rec[0]
event = edict[event_num]
if event_num not in fn_cache:
@@ -152,12 +157,17 @@ def run(analyzer):
advanced scripts will want to call process() instead."""
import sys
- if len(sys.argv) != 3:
- sys.stderr.write('usage: %s <trace-events> <trace-file>\n' % sys.argv[0])
+ read_header = True
+ if len(sys.argv) == 4 and sys.argv[1] == '--no-header':
+ read_header = False
+ del sys.argv[1]
+ elif len(sys.argv) != 3:
+ sys.stderr.write('usage: %s [--no-header] <trace-events> ' \
+ '<trace-file>\n' % sys.argv[0])
sys.exit(1)
events = _read_events(open(sys.argv[1], 'r'))
- process(events, sys.argv[2], analyzer)
+ process(events, sys.argv[2], analyzer, read_header=read_header)
if __name__ == '__main__':
class Formatter(Analyzer):
diff --git a/scripts/tracetool/__init__.py b/scripts/tracetool/__init__.py
index e8e8edcc4c..36c789de8f 100644
--- a/scripts/tracetool/__init__.py
+++ b/scripts/tracetool/__init__.py
@@ -15,9 +15,11 @@ __email__ = "stefanha@linux.vnet.ibm.com"
import re
import sys
+import weakref
import tracetool.format
import tracetool.backend
+import tracetool.transform
def error_write(*lines):
@@ -108,6 +110,18 @@ class Arguments:
"""List of argument types."""
return [ type_ for type_, _ in self._args ]
+ def transform(self, *trans):
+ """Return a new Arguments instance with transformed types.
+
+ The types in the resulting Arguments instance are transformed according
+ to tracetool.transform.transform_type.
+ """
+ res = []
+ for type_, name in self._args:
+ res.append((tracetool.transform.transform_type(type_, *trans),
+ name))
+ return Arguments(res)
+
class Event(object):
"""Event description.
@@ -122,13 +136,21 @@ class Event(object):
Properties of the event.
args : Arguments
The event arguments.
+ arg_fmts : str
+ The format strings for each argument.
"""
- _CRE = re.compile("((?P<props>.*)\s+)?(?P<name>[^(\s]+)\((?P<args>[^)]*)\)\s*(?P<fmt>\".*)?")
+ _CRE = re.compile("((?P<props>.*)\s+)?"
+ "(?P<name>[^(\s]+)"
+ "\((?P<args>[^)]*)\)"
+ "\s*"
+ "(?:(?:(?P<fmt_trans>\".+),)?\s*(?P<fmt>\".+))?"
+ "\s*")
+ _FMT = re.compile("(%\w+|%.*PRI\S+)")
- _VALID_PROPS = set(["disable"])
+ _VALID_PROPS = set(["disable", "tcg", "tcg-trans", "tcg-exec"])
- def __init__(self, name, props, fmt, args):
+ def __init__(self, name, props, fmt, args, arg_fmts, orig=None):
"""
Parameters
----------
@@ -136,20 +158,32 @@ class Event(object):
Event name.
props : list of str
Property names.
- fmt : str
- Event printing format.
+ fmt : str, list of str
+ Event printing format (or formats).
args : Arguments
Event arguments.
+ arg_fmts : list of str
+ Format strings for each argument.
+ orig : Event or None
+ Original Event before transformation.
+
"""
self.name = name
self.properties = props
self.fmt = fmt
self.args = args
+ self.arg_fmts = arg_fmts
+
+ if orig is None:
+ self.original = weakref.ref(self)
+ else:
+ self.original = orig
unknown_props = set(self.properties) - self._VALID_PROPS
if len(unknown_props) > 0:
raise ValueError("Unknown properties: %s"
% ", ".join(unknown_props))
+ assert isinstance(self.fmt, str) or len(self.fmt) == 2
def copy(self):
"""Create a new copy."""
@@ -172,24 +206,50 @@ class Event(object):
name = groups["name"]
props = groups["props"].split()
fmt = groups["fmt"]
+ fmt_trans = groups["fmt_trans"]
+ if len(fmt_trans) > 0:
+ fmt = [fmt_trans, fmt]
args = Arguments.build(groups["args"])
+ arg_fmts = Event._FMT.findall(fmt)
+
+ if "tcg-trans" in props:
+ raise ValueError("Invalid property 'tcg-trans'")
+ if "tcg-exec" in props:
+ raise ValueError("Invalid property 'tcg-exec'")
+ if "tcg" not in props and not isinstance(fmt, str):
+ raise ValueError("Only events with 'tcg' property can have two formats")
+ if "tcg" in props and isinstance(fmt, str):
+ raise ValueError("Events with 'tcg' property must have two formats")
- return Event(name, props, fmt, args)
+ return Event(name, props, fmt, args, arg_fmts)
def __repr__(self):
"""Evaluable string representation for this object."""
+ if isinstance(self.fmt, str):
+ fmt = self.fmt
+ else:
+ fmt = "%s, %s" % (self.fmt[0], self.fmt[1])
return "Event('%s %s(%s) %s')" % (" ".join(self.properties),
self.name,
self.args,
- self.fmt)
+ fmt)
QEMU_TRACE = "trace_%(name)s"
+ QEMU_TRACE_TCG = QEMU_TRACE + "_tcg"
def api(self, fmt=None):
if fmt is None:
fmt = Event.QEMU_TRACE
return fmt % {"name": self.name}
+ def transform(self, *trans):
+ """Return a new Event with transformed Arguments."""
+ return Event(self.name,
+ list(self.properties),
+ self.fmt,
+ self.args.transform(*trans),
+ self)
+
def _read_events(fobj):
res = []
@@ -272,4 +332,35 @@ def generate(fevents, format, backends,
events = _read_events(fevents)
+ # transform TCG-enabled events
+ new_events = []
+ for event in events:
+ if "tcg" not in event.properties:
+ new_events.append(event)
+ else:
+ event_trans = event.copy()
+ event_trans.name += "_trans"
+ event_trans.properties += ["tcg-trans"]
+ event_trans.fmt = event.fmt[0]
+ args_trans = []
+ for atrans, aorig in zip(
+ event_trans.transform(tracetool.transform.TCG_2_HOST).args,
+ event.args):
+ if atrans == aorig:
+ args_trans.append(atrans)
+ event_trans.args = Arguments(args_trans)
+ event_trans = event_trans.copy()
+
+ event_exec = event.copy()
+ event_exec.name += "_exec"
+ event_exec.properties += ["tcg-exec"]
+ event_exec.fmt = event.fmt[1]
+ event_exec = event_exec.transform(tracetool.transform.TCG_2_HOST)
+
+ new_event = [event_trans, event_exec]
+ event.event_trans, event.event_exec = new_event
+
+ new_events.extend(new_event)
+ events = new_events
+
tracetool.format.generate(events, format, backend)
diff --git a/scripts/tracetool/format/events_h.py b/scripts/tracetool/format/events_h.py
index 25d913bb25..9f114a3497 100644
--- a/scripts/tracetool/format/events_h.py
+++ b/scripts/tracetool/format/events_h.py
@@ -40,6 +40,11 @@ def generate(events, backend):
enabled = 0
else:
enabled = 1
+ if "tcg-trans" in e.properties:
+ # a single define for the two "sub-events"
+ out('#define TRACE_%(name)s_ENABLED %(enabled)d',
+ name=e.original.original.name.upper(),
+ enabled=enabled)
out('#define TRACE_%s_ENABLED %d' % (e.name.upper(), enabled))
out('#include "trace/event-internal.h"',
diff --git a/scripts/tracetool/format/simpletrace_stap.py b/scripts/tracetool/format/simpletrace_stap.py
new file mode 100644
index 0000000000..7e44bc1811
--- /dev/null
+++ b/scripts/tracetool/format/simpletrace_stap.py
@@ -0,0 +1,71 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+"""
+Generate .stp file that outputs simpletrace binary traces (DTrace with SystemTAP only).
+"""
+
+__author__ = "Stefan Hajnoczi <redhat.com>"
+__copyright__ = "Copyright (C) 2014, Red Hat, Inc."
+__license__ = "GPL version 2 or (at your option) any later version"
+
+__maintainer__ = "Stefan Hajnoczi"
+__email__ = "stefanha@redhat.com"
+
+
+from tracetool import out
+from tracetool.backend.dtrace import binary, probeprefix
+from tracetool.backend.simple import is_string
+from tracetool.format.stap import stap_escape
+
+
+def generate(events, backend):
+ out('/* This file is autogenerated by tracetool, do not edit. */',
+ '')
+
+ for event_id, e in enumerate(events):
+ if 'disable' in e.properties:
+ continue
+
+ out('probe %(probeprefix)s.simpletrace.%(name)s = %(probeprefix)s.%(name)s ?',
+ '{',
+ probeprefix=probeprefix(),
+ name=e.name)
+
+ # Calculate record size
+ sizes = ['24'] # sizeof(TraceRecord)
+ for type_, name in e.args:
+ name = stap_escape(name)
+ if is_string(type_):
+ out(' try {',
+ ' arg%(name)s_str = %(name)s ? user_string_n(%(name)s, 512) : "<null>"',
+ ' } catch {}',
+ ' arg%(name)s_len = strlen(arg%(name)s_str)',
+ name=name)
+ sizes.append('4 + arg%s_len' % name)
+ else:
+ sizes.append('8')
+ sizestr = ' + '.join(sizes)
+
+ # Generate format string and value pairs for record header and arguments
+ fields = [('8b', str(event_id)),
+ ('8b', 'gettimeofday_ns()'),
+ ('4b', sizestr),
+ ('4b', 'pid()')]
+ for type_, name in e.args:
+ name = stap_escape(name)
+ if is_string(type_):
+ fields.extend([('4b', 'arg%s_len' % name),
+ ('.*s', 'arg%s_len, arg%s_str' % (name, name))])
+ else:
+ fields.append(('8b', name))
+
+ # Emit the entire record in a single SystemTap printf()
+ fmt_str = '%'.join(fmt for fmt, _ in fields)
+ arg_str = ', '.join(arg for _, arg in fields)
+ out(' printf("%%%(fmt_str)s", %(arg_str)s)',
+ fmt_str=fmt_str, arg_str=arg_str)
+
+ out('}')
+
+ out()
diff --git a/scripts/tracetool/format/stap.py b/scripts/tracetool/format/stap.py
index e24abf7f11..9e780f1b06 100644
--- a/scripts/tracetool/format/stap.py
+++ b/scripts/tracetool/format/stap.py
@@ -27,6 +27,13 @@ RESERVED_WORDS = (
)
+def stap_escape(identifier):
+ # Append underscore to reserved keywords
+ if identifier in RESERVED_WORDS:
+ return identifier + '_'
+ return identifier
+
+
def generate(events, backend):
events = [e for e in events
if "disable" not in e.properties]
@@ -45,9 +52,7 @@ def generate(events, backend):
i = 1
if len(e.args) > 0:
for name in e.args.names():
- # Append underscore to reserved keywords
- if name in RESERVED_WORDS:
- name += '_'
+ name = stap_escape(name)
out(' %s = $arg%d;' % (name, i))
i += 1
diff --git a/scripts/tracetool/format/tcg_h.py b/scripts/tracetool/format/tcg_h.py
new file mode 100644
index 0000000000..f676b66622
--- /dev/null
+++ b/scripts/tracetool/format/tcg_h.py
@@ -0,0 +1,57 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+"""
+Generate .h file for TCG code generation.
+"""
+
+__author__ = "Lluís Vilanova <vilanova@ac.upc.edu>"
+__copyright__ = "Copyright 2012-2014, Lluís Vilanova <vilanova@ac.upc.edu>"
+__license__ = "GPL version 2 or (at your option) any later version"
+
+__maintainer__ = "Stefan Hajnoczi"
+__email__ = "stefanha@linux.vnet.ibm.com"
+
+
+from tracetool import out
+
+
+def generate(events, backend):
+ out('/* This file is autogenerated by tracetool, do not edit. */',
+ '/* You must include this file after the inclusion of helper.h */',
+ '',
+ '#ifndef TRACE__GENERATED_TCG_TRACERS_H',
+ '#define TRACE__GENERATED_TCG_TRACERS_H',
+ '',
+ '#include <stdint.h>',
+ '',
+ '#include "trace.h"',
+ '#include "exec/helper-proto.h"',
+ '',
+ )
+
+ for e in events:
+ # just keep one of them
+ if "tcg-trans" not in e.properties:
+ continue
+
+ # get the original event definition
+ e = e.original.original
+
+ out('static inline void %(name_tcg)s(%(args)s)',
+ '{',
+ name_tcg=e.api(e.QEMU_TRACE_TCG),
+ args=e.args)
+
+ if "disable" not in e.properties:
+ out(' %(name_trans)s(%(argnames_trans)s);',
+ ' gen_helper_%(name_exec)s(%(argnames_exec)s);',
+ name_trans=e.event_trans.api(e.QEMU_TRACE),
+ name_exec=e.event_exec.api(e.QEMU_TRACE),
+ argnames_trans=", ".join(e.event_trans.args.names()),
+ argnames_exec=", ".join(e.event_exec.args.names()))
+
+ out('}')
+
+ out('',
+ '#endif /* TRACE__GENERATED_TCG_TRACERS_H */')
diff --git a/scripts/tracetool/format/tcg_helper_c.py b/scripts/tracetool/format/tcg_helper_c.py
new file mode 100644
index 0000000000..96655a0590
--- /dev/null
+++ b/scripts/tracetool/format/tcg_helper_c.py
@@ -0,0 +1,50 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+"""
+Generate trace/generated-helpers.c.
+"""
+
+__author__ = "Lluís Vilanova <vilanova@ac.upc.edu>"
+__copyright__ = "Copyright 2012-2014, Lluís Vilanova <vilanova@ac.upc.edu>"
+__license__ = "GPL version 2 or (at your option) any later version"
+
+__maintainer__ = "Stefan Hajnoczi"
+__email__ = "stefanha@linux.vnet.ibm.com"
+
+
+from tracetool import out
+from tracetool.transform import *
+
+
+def generate(events, backend):
+ events = [e for e in events
+ if "disable" not in e.properties]
+
+ out('/* This file is autogenerated by tracetool, do not edit. */',
+ '',
+ '#include "qemu-common.h"',
+ '#include "trace.h"',
+ '#include "exec/helper-proto.h"',
+ '',
+ )
+
+ for e in events:
+ if "tcg-exec" not in e.properties:
+ continue
+
+ # tracetool.generate always transforms types to host
+ e_args = e.original.args
+
+ values = ["(%s)%s" % (t, n)
+ for t, n in e.args.transform(TCG_2_TCG_HELPER_DEF)]
+
+ out('void %(name_tcg)s(%(args)s)',
+ '{',
+ ' %(name)s(%(values)s);',
+ '}',
+ name_tcg="helper_%s_proxy" % e.api(),
+ name=e.api(),
+ args=e_args.transform(HOST_2_TCG_COMPAT, TCG_2_TCG_HELPER_DEF),
+ values=", ".join(values),
+ )
diff --git a/scripts/tracetool/format/tcg_helper_h.py b/scripts/tracetool/format/tcg_helper_h.py
new file mode 100644
index 0000000000..a8ba7ba8e3
--- /dev/null
+++ b/scripts/tracetool/format/tcg_helper_h.py
@@ -0,0 +1,50 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+"""
+Generate trace/generated-helpers.h.
+"""
+
+__author__ = "Lluís Vilanova <vilanova@ac.upc.edu>"
+__copyright__ = "Copyright 2012-2014, Lluís Vilanova <vilanova@ac.upc.edu>"
+__license__ = "GPL version 2 or (at your option) any later version"
+
+__maintainer__ = "Stefan Hajnoczi"
+__email__ = "stefanha@linux.vnet.ibm.com"
+
+
+from tracetool import out
+from tracetool.transform import *
+
+
+def generate(events, backend):
+ events = [e for e in events
+ if "disable" not in e.properties]
+
+ out('/* This file is autogenerated by tracetool, do not edit. */',
+ '',
+ )
+
+ for e in events:
+ if "tcg-exec" not in e.properties:
+ continue
+
+ # tracetool.generate always transforms types to host
+ e_args = e.original.args
+
+ # TCG helper proxy declaration
+ fmt = "DEF_HELPER_FLAGS_%(argc)d(%(name)s, %(flags)svoid%(types)s)"
+ args = e_args.transform(HOST_2_TCG_COMPAT, HOST_2_TCG,
+ TCG_2_TCG_HELPER_DECL)
+ types = ", ".join(args.types())
+ if types != "":
+ types = ", " + types
+
+ flags = "TCG_CALL_NO_RWG, "
+
+ out(fmt,
+ flags=flags,
+ argc=len(args),
+ name=e.api() + "_proxy",
+ types=types,
+ )
diff --git a/scripts/tracetool/format/tcg_helper_wrapper_h.py b/scripts/tracetool/format/tcg_helper_wrapper_h.py
new file mode 100644
index 0000000000..cac5a878f9
--- /dev/null
+++ b/scripts/tracetool/format/tcg_helper_wrapper_h.py
@@ -0,0 +1,70 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+"""
+Generate trace/generated-helpers-wrappers.h.
+"""
+
+__author__ = "Lluís Vilanova <vilanova@ac.upc.edu>"
+__copyright__ = "Copyright 2012-2014, Lluís Vilanova <vilanova@ac.upc.edu>"
+__license__ = "GPL version 2 or (at your option) any later version"
+
+__maintainer__ = "Stefan Hajnoczi"
+__email__ = "stefanha@linux.vnet.ibm.com"
+
+
+from tracetool import out
+from tracetool.transform import *
+
+
+def generate(events, backend):
+ events = [e for e in events
+ if "disable" not in e.properties]
+
+ out('/* This file is autogenerated by tracetool, do not edit. */',
+ '',
+ '#define tcg_temp_new_nop(v) (v)',
+ '#define tcg_temp_free_nop(v)',
+ '',
+ )
+
+ for e in events:
+ if "tcg-exec" not in e.properties:
+ continue
+
+ # tracetool.generate always transforms types to host
+ e_args = e.original.args
+
+ # mixed-type to TCG helper bridge
+ args_tcg_compat = e_args.transform(HOST_2_TCG_COMPAT)
+
+ code_new = [
+ "%(tcg_type)s __%(name)s = %(tcg_func)s(%(name)s);" %
+ {"tcg_type": transform_type(type_, HOST_2_TCG),
+ "tcg_func": transform_type(type_, HOST_2_TCG_TMP_NEW),
+ "name": name}
+ for (type_, name) in args_tcg_compat
+ ]
+
+ code_free = [
+ "%(tcg_func)s(__%(name)s);" %
+ {"tcg_func": transform_type(type_, HOST_2_TCG_TMP_FREE),
+ "name": name}
+ for (type_, name) in args_tcg_compat
+ ]
+
+ gen_name = "gen_helper_" + e.api()
+
+ out('static inline void %(name)s(%(args)s)',
+ '{',
+ ' %(code_new)s',
+ ' %(proxy_name)s(%(tmp_names)s);',
+ ' %(code_free)s',
+ '}',
+ name=gen_name,
+ args=e_args,
+ proxy_name=gen_name + "_proxy",
+ code_new="\n ".join(code_new),
+ code_free="\n ".join(code_free),
+ tmp_names=", ".join(["__%s" % name for _, name in e_args]),
+ )
diff --git a/scripts/tracetool/format/ust_events_h.py b/scripts/tracetool/format/ust_events_h.py
index 5102565470..d18989942a 100644
--- a/scripts/tracetool/format/ust_events_h.py
+++ b/scripts/tracetool/format/ust_events_h.py
@@ -63,13 +63,20 @@ def generate(events, backend):
name=e.name,
args=", ".join(", ".join(i) for i in e.args))
- for t, n in e.args:
- if ('int' in t) or ('long' in t) or ('unsigned' in t) or ('size_t' in t):
+ types = e.args.types()
+ names = e.args.names()
+ fmts = e.arg_fmts
+ for t,n,f in zip(types, names, fmts):
+ if ('char *' in t) or ('char*' in t):
+ out(' ctf_string(' + n + ', ' + n + ')')
+ elif ("%p" in f) or ("x" in f) or ("PRIx" in f):
+ out(' ctf_integer_hex('+ t + ', ' + n + ', ' + n + ')')
+ elif ("ptr" in t) or ("*" in t):
+ out(' ctf_integer_hex('+ t + ', ' + n + ', ' + n + ')')
+ elif ('int' in t) or ('long' in t) or ('unsigned' in t) or ('size_t' in t):
out(' ctf_integer(' + t + ', ' + n + ', ' + n + ')')
elif ('double' in t) or ('float' in t):
out(' ctf_float(' + t + ', ' + n + ', ' + n + ')')
- elif ('char *' in t) or ('char*' in t):
- out(' ctf_string(' + n + ', ' + n + ')')
elif ('void *' in t) or ('void*' in t):
out(' ctf_integer_hex(unsigned long, ' + n + ', ' + n + ')')
diff --git a/scripts/tracetool/transform.py b/scripts/tracetool/transform.py
new file mode 100644
index 0000000000..fc5e679ed4
--- /dev/null
+++ b/scripts/tracetool/transform.py
@@ -0,0 +1,166 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+"""
+Type-transformation rules.
+"""
+
+__author__ = "Lluís Vilanova <vilanova@ac.upc.edu>"
+__copyright__ = "Copyright 2012-2014, Lluís Vilanova <vilanova@ac.upc.edu>"
+__license__ = "GPL version 2 or (at your option) any later version"
+
+__maintainer__ = "Stefan Hajnoczi"
+__email__ = "stefanha@linux.vnet.ibm.com"
+
+
+def _transform_type(type_, trans):
+ if isinstance(trans, str):
+ return trans
+ elif isinstance(trans, dict):
+ if type_ in trans:
+ return _transform_type(type_, trans[type_])
+ elif None in trans:
+ return _transform_type(type_, trans[None])
+ else:
+ return type_
+ elif callable(trans):
+ return trans(type_)
+ else:
+ raise ValueError("Invalid type transformation rule: %s" % trans)
+
+
+def transform_type(type_, *trans):
+ """Return a new type transformed according to the given rules.
+
+ Applies each of the transformation rules in trans in order.
+
+ If an element of trans is a string, return it.
+
+ If an element of trans is a function, call it with type_ as its only
+ argument.
+
+ If an element of trans is a dict, search type_ in its keys. If type_ is
+ a key, use the value as a transformation rule for type_. Otherwise, if
+ None is a key use the value as a transformation rule for type_.
+
+ Otherwise, return type_.
+
+ Parameters
+ ----------
+ type_ : str
+ Type to transform.
+ trans : list of function or dict
+ Type transformation rules.
+ """
+ if len(trans) == 0:
+ raise ValueError
+ res = type_
+ for t in trans:
+ res = _transform_type(res, t)
+ return res
+
+
+##################################################
+# tcg -> host
+
+def _tcg_2_host(type_):
+ if type_ == "TCGv":
+ # force a fixed-size type (target-independent)
+ return "uint64_t"
+ else:
+ return type_
+
+TCG_2_HOST = {
+ "TCGv_i32": "uint32_t",
+ "TCGv_i64": "uint64_t",
+ "TCGv_ptr": "void *",
+ None: _tcg_2_host,
+ }
+
+
+##################################################
+# host -> host compatible with tcg sizes
+
+HOST_2_TCG_COMPAT = {
+ "uint8_t": "uint32_t",
+ }
+
+
+##################################################
+# host/tcg -> tcg
+
+def _host_2_tcg(type_):
+ if type_.startswith("TCGv"):
+ return type_
+ raise ValueError("Don't know how to translate '%s' into a TCG type\n" % type_)
+
+HOST_2_TCG = {
+ "uint32_t": "TCGv_i32",
+ "uint64_t": "TCGv_i64",
+ "void *" : "TCGv_ptr",
+ None: _host_2_tcg,
+ }
+
+
+##################################################
+# tcg -> tcg helper definition
+
+def _tcg_2_helper_def(type_):
+ if type_ == "TCGv":
+ return "target_ulong"
+ else:
+ return type_
+
+TCG_2_TCG_HELPER_DEF = {
+ "TCGv_i32": "uint32_t",
+ "TCGv_i64": "uint64_t",
+ "TCGv_ptr": "void *",
+ None: _tcg_2_helper_def,
+ }
+
+
+##################################################
+# tcg -> tcg helper declaration
+
+def _tcg_2_tcg_helper_decl_error(type_):
+ raise ValueError("Don't know how to translate type '%s' into a TCG helper declaration type\n" % type_)
+
+TCG_2_TCG_HELPER_DECL = {
+ "TCGv" : "tl",
+ "TCGv_ptr": "ptr",
+ "TCGv_i32": "i32",
+ "TCGv_i64": "i64",
+ None: _tcg_2_tcg_helper_decl_error,
+ }
+
+
+##################################################
+# host/tcg -> tcg temporal constant allocation
+
+def _host_2_tcg_tmp_new(type_):
+ if type_.startswith("TCGv"):
+ return "tcg_temp_new_nop"
+ raise ValueError("Don't know how to translate type '%s' into a TCG temporal allocation" % type_)
+
+HOST_2_TCG_TMP_NEW = {
+ "uint32_t": "tcg_const_i32",
+ "uint64_t": "tcg_const_i64",
+ "void *" : "tcg_const_ptr",
+ None: _host_2_tcg_tmp_new,
+ }
+
+
+##################################################
+# host/tcg -> tcg temporal constant deallocation
+
+def _host_2_tcg_tmp_free(type_):
+ if type_.startswith("TCGv"):
+ return "tcg_temp_free_nop"
+ raise ValueError("Don't know how to translate type '%s' into a TCG temporal deallocation" % type_)
+
+HOST_2_TCG_TMP_FREE = {
+ "uint32_t": "tcg_temp_free_i32",
+ "uint64_t": "tcg_temp_free_i64",
+ "void *" : "tcg_temp_free_ptr",
+ None: _host_2_tcg_tmp_free,
+ }
diff --git a/target-alpha/translate.c b/target-alpha/translate.c
index cc81e774df..76658a074a 100644
--- a/target-alpha/translate.c
+++ b/target-alpha/translate.c
@@ -26,6 +26,9 @@
#include "exec/helper-proto.h"
#include "exec/helper-gen.h"
+#include "trace-tcg.h"
+
+
#undef ALPHA_DEBUG_DISAS
#define CONFIG_SOFTFLOAT_INLINE
diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c
index 33b5025fee..f04ca49631 100644
--- a/target-arm/translate-a64.c
+++ b/target-arm/translate-a64.c
@@ -35,6 +35,8 @@
#include "exec/helper-proto.h"
#include "exec/helper-gen.h"
+#include "trace-tcg.h"
+
static TCGv_i64 cpu_X[32];
static TCGv_i64 cpu_pc;
static TCGv_i32 cpu_NF, cpu_ZF, cpu_CF, cpu_VF;
diff --git a/target-arm/translate.c b/target-arm/translate.c
index cf4e767ff8..40121858d6 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -35,6 +35,9 @@
#include "exec/helper-proto.h"
#include "exec/helper-gen.h"
+#include "trace-tcg.h"
+
+
#define ENABLE_ARCH_4T arm_feature(env, ARM_FEATURE_V4T)
#define ENABLE_ARCH_5 arm_feature(env, ARM_FEATURE_V5)
/* currently all emulated v5 cores are also v5TE, so don't bother */
diff --git a/target-cris/translate.c b/target-cris/translate.c
index ab0e47962b..e37b04e69d 100644
--- a/target-cris/translate.c
+++ b/target-cris/translate.c
@@ -33,6 +33,9 @@
#include "exec/helper-gen.h"
+#include "trace-tcg.h"
+
+
#define DISAS_CRIS 0
#if DISAS_CRIS
# define LOG_DIS(...) qemu_log_mask(CPU_LOG_TB_IN_ASM, ## __VA_ARGS__)
diff --git a/target-i386/translate.c b/target-i386/translate.c
index 6fcd8245d2..418173e0ea 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -32,6 +32,9 @@
#include "exec/helper-proto.h"
#include "exec/helper-gen.h"
+#include "trace-tcg.h"
+
+
#define PREFIX_REPZ 0x01
#define PREFIX_REPNZ 0x02
#define PREFIX_LOCK 0x04
diff --git a/target-lm32/translate.c b/target-lm32/translate.c
index a51ade9a15..8454e8b517 100644
--- a/target-lm32/translate.c
+++ b/target-lm32/translate.c
@@ -27,6 +27,9 @@
#include "exec/helper-gen.h"
+#include "trace-tcg.h"
+
+
#define DISAS_LM32 1
#if DISAS_LM32
# define LOG_DIS(...) qemu_log_mask(CPU_LOG_TB_IN_ASM, ## __VA_ARGS__)
diff --git a/target-m68k/translate.c b/target-m68k/translate.c
index 50df4d3844..efd4cfc3c7 100644
--- a/target-m68k/translate.c
+++ b/target-m68k/translate.c
@@ -27,6 +27,9 @@
#include "exec/helper-proto.h"
#include "exec/helper-gen.h"
+#include "trace-tcg.h"
+
+
//#define DEBUG_DISPATCH 1
/* Fake floating point. */
diff --git a/target-microblaze/translate.c b/target-microblaze/translate.c
index 03ea15803b..fd2b771645 100644
--- a/target-microblaze/translate.c
+++ b/target-microblaze/translate.c
@@ -26,6 +26,9 @@
#include "exec/cpu_ldst.h"
#include "exec/helper-gen.h"
+#include "trace-tcg.h"
+
+
#define SIM_COMPAT 0
#define DISAS_GNU 1
#define DISAS_MB 1
diff --git a/target-mips/translate.c b/target-mips/translate.c
index c381366506..06db150325 100644
--- a/target-mips/translate.c
+++ b/target-mips/translate.c
@@ -30,6 +30,9 @@
#include "exec/helper-gen.h"
#include "sysemu/kvm.h"
+#include "trace-tcg.h"
+
+
#define MIPS_DEBUG_DISAS 0
//#define MIPS_DEBUG_SIGN_EXTENSIONS
diff --git a/target-openrisc/translate.c b/target-openrisc/translate.c
index 55ff935d5e..407bd9762f 100644
--- a/target-openrisc/translate.c
+++ b/target-openrisc/translate.c
@@ -31,6 +31,9 @@
#include "exec/helper-proto.h"
#include "exec/helper-gen.h"
+#include "trace-tcg.h"
+
+
#define OPENRISC_DISAS
#ifdef OPENRISC_DISAS
diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index b23933f7bd..c07bb01a7a 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -27,6 +27,9 @@
#include "exec/helper-proto.h"
#include "exec/helper-gen.h"
+#include "trace-tcg.h"
+
+
#define CPU_SINGLE_STEP 0x1
#define CPU_BRANCH_STEP 0x2
#define GDBSTUB_SINGLE_STEP 0x4
diff --git a/target-s390x/translate.c b/target-s390x/translate.c
index e2a1d05f15..0cb036f667 100644
--- a/target-s390x/translate.c
+++ b/target-s390x/translate.c
@@ -42,6 +42,8 @@ static TCGv_ptr cpu_env;
#include "exec/helper-proto.h"
#include "exec/helper-gen.h"
+#include "trace-tcg.h"
+
/* Information that (most) every instruction needs to manipulate. */
typedef struct DisasContext DisasContext;
diff --git a/target-sh4/translate.c b/target-sh4/translate.c
index 8126818142..3088edc6a6 100644
--- a/target-sh4/translate.c
+++ b/target-sh4/translate.c
@@ -28,6 +28,9 @@
#include "exec/helper-proto.h"
#include "exec/helper-gen.h"
+#include "trace-tcg.h"
+
+
typedef struct DisasContext {
struct TranslationBlock *tb;
target_ulong pc;
diff --git a/target-sparc/translate.c b/target-sparc/translate.c
index 1ab07a18a2..78c4e21cff 100644
--- a/target-sparc/translate.c
+++ b/target-sparc/translate.c
@@ -32,6 +32,9 @@
#include "exec/helper-gen.h"
+#include "trace-tcg.h"
+
+
#define DEBUG_DISAS
#define DYNAMIC_PC 1 /* dynamic pc value */
diff --git a/target-unicore32/translate.c b/target-unicore32/translate.c
index e3643c23cd..653c225187 100644
--- a/target-unicore32/translate.c
+++ b/target-unicore32/translate.c
@@ -23,6 +23,9 @@
#include "exec/helper-proto.h"
#include "exec/helper-gen.h"
+#include "trace-tcg.h"
+
+
/* internal defines */
typedef struct DisasContext {
target_ulong pc;
diff --git a/target-xtensa/translate.c b/target-xtensa/translate.c
index 2f22cce845..badca195f4 100644
--- a/target-xtensa/translate.c
+++ b/target-xtensa/translate.c
@@ -41,6 +41,9 @@
#include "exec/helper-proto.h"
#include "exec/helper-gen.h"
+#include "trace-tcg.h"
+
+
typedef struct DisasContext {
const XtensaConfig *config;
TranslationBlock *tb;
diff --git a/tests/qemu-iotests/059 b/tests/qemu-iotests/059
index 26a2fd3e0e..3c053c29b4 100755
--- a/tests/qemu-iotests/059
+++ b/tests/qemu-iotests/059
@@ -114,6 +114,10 @@ echo
echo "=== Testing version 3 ==="
_use_sample_img iotest-version3.vmdk.bz2
_img_info
+for i in {0..99}; do
+ $QEMU_IO -r -c "read -P $(( i % 10 + 0x30 )) $(( i * 64 * 1024 * 10 + i * 512 )) 512" $TEST_IMG \
+ | _filter_qemu_io
+done
echo
echo "=== Testing 4TB monolithicFlat creation and IO ==="
diff --git a/tests/qemu-iotests/059.out b/tests/qemu-iotests/059.out
index eba0dedda0..0dadba658f 100644
--- a/tests/qemu-iotests/059.out
+++ b/tests/qemu-iotests/059.out
@@ -2056,8 +2056,208 @@ wrote 512/512 bytes at offset 10240
=== Testing version 3 ===
image: TEST_DIR/iotest-version3.IMGFMT
file format: IMGFMT
-virtual size: 1.0G (1073741824 bytes)
+virtual size: 16G (17179869184 bytes)
cluster_size: 65536
+read 512/512 bytes at offset 0
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 655872
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 1311744
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 1967616
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 2623488
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3279360
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3935232
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 4591104
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 5246976
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 5902848
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 6558720
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 7214592
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 7870464
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 8526336
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 9182208
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 9838080
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 10493952
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 11149824
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 11805696
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 12461568
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 13117440
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 13773312
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 14429184
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 15085056
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 15740928
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 16396800
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 17052672
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 17708544
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 18364416
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 19020288
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 19676160
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 20332032
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 20987904
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 21643776
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 22299648
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 22955520
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 23611392
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 24267264
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 24923136
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 25579008
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 26234880
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 26890752
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 27546624
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 28202496
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 28858368
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 29514240
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 30170112
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 30825984
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 31481856
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 32137728
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 32793600
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 33449472
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 34105344
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 34761216
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 35417088
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 36072960
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 36728832
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 37384704
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 38040576
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 38696448
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 39352320
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 40008192
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 40664064
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 41319936
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 41975808
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 42631680
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 43287552
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 43943424
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 44599296
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 45255168
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 45911040
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 46566912
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 47222784
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 47878656
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 48534528
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 49190400
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 49846272
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 50502144
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 51158016
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 51813888
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 52469760
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 53125632
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 53781504
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 54437376
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 55093248
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 55749120
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 56404992
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 57060864
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 57716736
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 58372608
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 59028480
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 59684352
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 60340224
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 60996096
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 61651968
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 62307840
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 62963712
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 63619584
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 64275456
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 64931328
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
=== Testing 4TB monolithicFlat creation and IO ===
Formatting 'TEST_DIR/iotest-version3.IMGFMT', fmt=IMGFMT size=4398046511104
diff --git a/tests/qemu-iotests/060 b/tests/qemu-iotests/060
index 3cffc12fec..830386fdaa 100755
--- a/tests/qemu-iotests/060
+++ b/tests/qemu-iotests/060
@@ -164,6 +164,15 @@ wait_break 0
write 64k 64k
resume 0" | $QEMU_IO | _filter_qemu_io
+echo
+echo "=== Testing unallocated image header ==="
+echo
+_make_test_img 64M
+# Create L1/L2
+$QEMU_IO -c "$OPEN_RW" -c "write 0 64k" | _filter_qemu_io
+poke_file "$TEST_IMG" "$rb_offset" "\x00\x00"
+$QEMU_IO -c "$OPEN_RW" -c "write 64k 64k" | _filter_qemu_io
+
# success, all done
echo "*** done"
rm -f $seq.full
diff --git a/tests/qemu-iotests/060.out b/tests/qemu-iotests/060.out
index a517948036..c27c952412 100644
--- a/tests/qemu-iotests/060.out
+++ b/tests/qemu-iotests/060.out
@@ -93,4 +93,12 @@ blkdebug: Suspended request '0'
write failed: Input/output error
blkdebug: Resuming request '0'
aio_write failed: No medium found
+
+=== Testing unallocated image header ===
+
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
+wrote 65536/65536 bytes at offset 0
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qcow2: Preventing invalid write on metadata (overlaps with qcow2_header); image marked as corrupt.
+write failed: Input/output error
*** done
diff --git a/tests/qemu-iotests/084 b/tests/qemu-iotests/084
index cb4d7b729e..ae33c2cba4 100755
--- a/tests/qemu-iotests/084
+++ b/tests/qemu-iotests/084
@@ -1,6 +1,7 @@
#!/bin/bash
#
-# Test case for VDI header corruption; image too large, and too many blocks
+# Test case for VDI header corruption; image too large, and too many blocks.
+# Also simple test for creating dynamic and static VDI images.
#
# Copyright (C) 2013 Red Hat, Inc.
#
@@ -43,14 +44,25 @@ _supported_fmt vdi
_supported_proto generic
_supported_os Linux
+size=64M
ds_offset=368 # disk image size field offset
bs_offset=376 # block size field offset
bii_offset=384 # block in image field offset
echo
+echo "=== Statically allocated image creation ==="
+echo
+_make_test_img $size -o static
+_img_info
+stat -c"disk image file size in bytes: %s" "${TEST_IMG}"
+_cleanup_test_img
+
+echo
echo "=== Testing image size bounds ==="
echo
-_make_test_img 64M
+_make_test_img $size
+_img_info
+stat -c"disk image file size in bytes: %s" "${TEST_IMG}"
# check for image size too large
# poke max image size, and appropriate blocks_in_image value
diff --git a/tests/qemu-iotests/084.out b/tests/qemu-iotests/084.out
index c7120d9b0b..ea29ae0b9d 100644
--- a/tests/qemu-iotests/084.out
+++ b/tests/qemu-iotests/084.out
@@ -1,8 +1,22 @@
QA output created by 084
+=== Statically allocated image creation ===
+
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
+image: TEST_DIR/t.IMGFMT
+file format: IMGFMT
+virtual size: 64M (67108864 bytes)
+cluster_size: 1048576
+disk image file size in bytes: 67109888
+
=== Testing image size bounds ===
Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
+image: TEST_DIR/t.IMGFMT
+file format: IMGFMT
+virtual size: 64M (67108864 bytes)
+cluster_size: 1048576
+disk image file size in bytes: 1024
Test 1: Maximum size (1024 TB):
qemu-img: Could not open 'TEST_DIR/t.IMGFMT': Could not open 'TEST_DIR/t.IMGFMT': Invalid argument
diff --git a/tests/qemu-iotests/common b/tests/qemu-iotests/common
index e4083f4212..70df659d5b 100644
--- a/tests/qemu-iotests/common
+++ b/tests/qemu-iotests/common
@@ -152,6 +152,7 @@ check options
-nbd test nbd
-ssh test ssh
-nfs test nfs
+ -archipelago test archipelago
-xdiff graphical mode diff
-nocache use O_DIRECT on backing file
-misalign misalign memory allocations
@@ -263,6 +264,11 @@ testlist options
xpand=false
;;
+ -archipelago)
+ IMGPROTO=archipelago
+ xpand=false
+ ;;
+
-nocache)
CACHEMODE="none"
CACHEMODE_IS_DEFAULT=false
diff --git a/tests/qemu-iotests/common.rc b/tests/qemu-iotests/common.rc
index e0ea7e3a7d..3fd691e6cd 100644
--- a/tests/qemu-iotests/common.rc
+++ b/tests/qemu-iotests/common.rc
@@ -64,6 +64,8 @@ elif [ "$IMGPROTO" = "ssh" ]; then
elif [ "$IMGPROTO" = "nfs" ]; then
TEST_DIR="nfs://127.0.0.1/$TEST_DIR"
TEST_IMG=$TEST_DIR/t.$IMGFMT
+elif [ "$IMGPROTO" = "archipelago" ]; then
+ TEST_IMG="archipelago:at.$IMGFMT"
else
TEST_IMG=$IMGPROTO:$TEST_DIR/t.$IMGFMT
fi
@@ -163,7 +165,8 @@ _make_test_img()
-e "s# lazy_refcounts=\\(on\\|off\\)##g" \
-e "s# block_size=[0-9]\\+##g" \
-e "s# block_state_zero=\\(on\\|off\\)##g" \
- -e "s# log_size=[0-9]\\+##g"
+ -e "s# log_size=[0-9]\\+##g" \
+ -e "s/archipelago:a/TEST_DIR\//g"
# Start an NBD server on the image file, which is what we'll be talking to
if [ $IMGPROTO = "nbd" ]; then
@@ -206,6 +209,10 @@ _cleanup_test_img()
rbd --no-progress rm "$TEST_DIR/t.$IMGFMT" > /dev/null
;;
+ archipelago)
+ vlmc remove "at.$IMGFMT" > /dev/null
+ ;;
+
sheepdog)
collie vdi delete "$TEST_DIR/t.$IMGFMT"
;;
diff --git a/tests/qemu-iotests/sample_images/iotest-version3.vmdk.bz2 b/tests/qemu-iotests/sample_images/iotest-version3.vmdk.bz2
index 30abf217e7..619329a246 100644
--- a/tests/qemu-iotests/sample_images/iotest-version3.vmdk.bz2
+++ b/tests/qemu-iotests/sample_images/iotest-version3.vmdk.bz2
Binary files differ
diff --git a/tests/test-coroutine.c b/tests/test-coroutine.c
index 760636ddc4..6e634f4a78 100644
--- a/tests/test-coroutine.c
+++ b/tests/test-coroutine.c
@@ -288,6 +288,29 @@ static void perf_yield(void)
maxcycles, duration);
}
+static __attribute__((noinline)) void dummy(unsigned *i)
+{
+ (*i)--;
+}
+
+static void perf_baseline(void)
+{
+ unsigned int i, maxcycles;
+ double duration;
+
+ maxcycles = 100000000;
+ i = maxcycles;
+
+ g_test_timer_start();
+ while (i > 0) {
+ dummy(&i);
+ }
+ duration = g_test_timer_elapsed();
+
+ g_test_message("Function call %u iterations: %f s\n",
+ maxcycles, duration);
+}
+
int main(int argc, char **argv)
{
g_test_init(&argc, &argv, NULL);
@@ -301,6 +324,7 @@ int main(int argc, char **argv)
g_test_add_func("/perf/lifecycle", perf_lifecycle);
g_test_add_func("/perf/nesting", perf_nesting);
g_test_add_func("/perf/yield", perf_yield);
+ g_test_add_func("/perf/function-call", perf_baseline);
}
return g_test_run();
}
diff --git a/thread-pool.c b/thread-pool.c
index dfb699dd94..23888dcfc4 100644
--- a/thread-pool.c
+++ b/thread-pool.c
@@ -21,7 +21,6 @@
#include "block/coroutine.h"
#include "trace.h"
#include "block/block_int.h"
-#include "qemu/event_notifier.h"
#include "block/thread-pool.h"
#include "qemu/main-loop.h"
@@ -57,8 +56,8 @@ struct ThreadPoolElement {
};
struct ThreadPool {
- EventNotifier notifier;
AioContext *ctx;
+ QEMUBH *completion_bh;
QemuMutex lock;
QemuCond check_cancel;
QemuCond worker_stopped;
@@ -119,7 +118,7 @@ static void *worker_thread(void *opaque)
qemu_cond_broadcast(&pool->check_cancel);
}
- event_notifier_set(&pool->notifier);
+ qemu_bh_schedule(pool->completion_bh);
}
pool->cur_threads--;
@@ -168,12 +167,11 @@ static void spawn_thread(ThreadPool *pool)
}
}
-static void event_notifier_ready(EventNotifier *notifier)
+static void thread_pool_completion_bh(void *opaque)
{
- ThreadPool *pool = container_of(notifier, ThreadPool, notifier);
+ ThreadPool *pool = opaque;
ThreadPoolElement *elem, *next;
- event_notifier_test_and_clear(notifier);
restart:
QLIST_FOREACH_SAFE(elem, &pool->head, all, next) {
if (elem->state != THREAD_CANCELED && elem->state != THREAD_DONE) {
@@ -187,6 +185,12 @@ restart:
QLIST_REMOVE(elem, all);
/* Read state before ret. */
smp_rmb();
+
+ /* Schedule ourselves in case elem->common.cb() calls aio_poll() to
+ * wait for another request that completed at the same time.
+ */
+ qemu_bh_schedule(pool->completion_bh);
+
elem->common.cb(elem->common.opaque, elem->ret);
qemu_aio_release(elem);
goto restart;
@@ -215,7 +219,7 @@ static void thread_pool_cancel(BlockDriverAIOCB *acb)
qemu_sem_timedwait(&pool->sem, 0) == 0) {
QTAILQ_REMOVE(&pool->request_list, elem, reqs);
elem->state = THREAD_CANCELED;
- event_notifier_set(&pool->notifier);
+ qemu_bh_schedule(pool->completion_bh);
} else {
pool->pending_cancellations++;
while (elem->state != THREAD_CANCELED && elem->state != THREAD_DONE) {
@@ -224,7 +228,7 @@ static void thread_pool_cancel(BlockDriverAIOCB *acb)
pool->pending_cancellations--;
}
qemu_mutex_unlock(&pool->lock);
- event_notifier_ready(&pool->notifier);
+ thread_pool_completion_bh(pool);
}
static const AIOCBInfo thread_pool_aiocb_info = {
@@ -293,8 +297,8 @@ static void thread_pool_init_one(ThreadPool *pool, AioContext *ctx)
}
memset(pool, 0, sizeof(*pool));
- event_notifier_init(&pool->notifier, false);
pool->ctx = ctx;
+ pool->completion_bh = aio_bh_new(ctx, thread_pool_completion_bh, pool);
qemu_mutex_init(&pool->lock);
qemu_cond_init(&pool->check_cancel);
qemu_cond_init(&pool->worker_stopped);
@@ -304,8 +308,6 @@ static void thread_pool_init_one(ThreadPool *pool, AioContext *ctx)
QLIST_INIT(&pool->head);
QTAILQ_INIT(&pool->request_list);
-
- aio_set_event_notifier(ctx, &pool->notifier, event_notifier_ready);
}
ThreadPool *thread_pool_new(AioContext *ctx)
@@ -339,11 +341,10 @@ void thread_pool_free(ThreadPool *pool)
qemu_mutex_unlock(&pool->lock);
- aio_set_event_notifier(pool->ctx, &pool->notifier, NULL);
+ qemu_bh_delete(pool->completion_bh);
qemu_sem_destroy(&pool->sem);
qemu_cond_destroy(&pool->check_cancel);
qemu_cond_destroy(&pool->worker_stopped);
qemu_mutex_destroy(&pool->lock);
- event_notifier_cleanup(&pool->notifier);
g_free(pool);
}
diff --git a/trace-events b/trace-events
index 11a17a8a40..81bc915edd 100644
--- a/trace-events
+++ b/trace-events
@@ -41,6 +41,11 @@ virtio_irq(void *vq) "vq %p"
virtio_notify(void *vdev, void *vq) "vdev %p vq %p"
virtio_set_status(void *vdev, uint8_t val) "vdev %p val %u"
+# hw/virtio/virtio-rng.c
+virtio_rng_guest_not_ready(void *rng) "rng %p: guest not ready"
+virtio_rng_pushed(void *rng, size_t len) "rng %p: %zd bytes pushed"
+virtio_rng_request(void *rng, size_t size, unsigned quota) "rng %p: %zd bytes requested, %u bytes quota left"
+
# hw/char/virtio-serial-bus.c
virtio_serial_send_control_event(unsigned int port, uint16_t event, uint16_t value) "port %u, event %u, value %u"
virtio_serial_throttle_port(unsigned int port, bool throttle) "port %u, throttle %d"
@@ -1265,6 +1270,15 @@ kvm_failed_spr_get(int str, const char *msg) "Warning: Unable to retrieve SPR %d
kvm_failed_reg_get(uint64_t id, const char *msg) "Warning: Unable to retrieve ONEREG %" PRIu64 " from KVM: %s"
kvm_failed_reg_set(uint64_t id, const char *msg) "Warning: Unable to set ONEREG %" PRIu64 " to KVM: %s"
+# TCG related tracing (mostly disabled by default)
+# cpu-exec.c
+disable exec_tb(void *tb, uintptr_t pc) "tb:%p pc=0x%"PRIxPTR
+disable exec_tb_nocache(void *tb, uintptr_t pc) "tb:%p pc=0x%"PRIxPTR
+disable exec_tb_exit(void *next_tb, unsigned int flags) "tb:%p flags=%x"
+
+# translate-all.c
+translate_block(void *tb, uintptr_t pc, uint8_t *tb_code) "tb:%p, pc:0x%"PRIxPTR", tb_code:%p"
+
# memory.c
memory_region_ops_read(void *mr, uint64_t addr, uint64_t value, unsigned size) "mr %p addr %#"PRIx64" value %#"PRIx64" size %u"
memory_region_ops_write(void *mr, uint64_t addr, uint64_t value, unsigned size) "mr %p addr %#"PRIx64" value %#"PRIx64" size %u"
diff --git a/trace/Makefile.objs b/trace/Makefile.objs
index d7a86969a4..387f191fd4 100644
--- a/trace/Makefile.objs
+++ b/trace/Makefile.objs
@@ -49,6 +49,9 @@ util-obj-y += generated-events.o
######################################################################
# Auto-generated tracing routines
+##################################################
+# Execution level
+
$(obj)/generated-tracers.h: $(obj)/generated-tracers.h-timestamp
@cmp -s $< $@ || cp $< $@
$(obj)/generated-tracers.h-timestamp: $(SRC_PATH)/trace-events $(BUILD_DIR)/config-host.mak
@@ -57,8 +60,8 @@ $(obj)/generated-tracers.h-timestamp: $(SRC_PATH)/trace-events $(BUILD_DIR)/conf
--backends=$(TRACE_BACKENDS) \
< $< > $@," GEN $(patsubst %-timestamp,%,$@)")
-######################################################################
-# Auto-generated tracing routines (non-DTrace)
+##############################
+# non-DTrace
$(obj)/generated-tracers.c: $(obj)/generated-tracers.c-timestamp
@cmp -s $< $@ || cp $< $@
@@ -70,9 +73,8 @@ $(obj)/generated-tracers.c-timestamp: $(SRC_PATH)/trace-events $(BUILD_DIR)/conf
$(obj)/generated-tracers.o: $(obj)/generated-tracers.c $(obj)/generated-tracers.h
-
-######################################################################
-# Auto-generated DTrace code
+##############################
+# DTrace
# Normal practice is to name DTrace probe file with a '.d' extension
# but that gets picked up by QEMU's Makefile as an external dependency
@@ -94,6 +96,47 @@ $(obj)/generated-tracers-dtrace.o: $(obj)/generated-tracers-dtrace.dtrace
util-obj-y += generated-tracers-dtrace.o
endif
+##################################################
+# Translation level
+
+$(obj)/generated-helpers-wrappers.h: $(obj)/generated-helpers-wrappers.h-timestamp
+$(obj)/generated-helpers-wrappers.h-timestamp: $(SRC_PATH)/trace-events $(BUILD_DIR)/config-host.mak
+ $(call quiet-command,$(TRACETOOL) \
+ --format=tcg-helper-wrapper-h \
+ --backend=$(TRACE_BACKENDS) \
+ < $< > $@," GEN $(patsubst %-timestamp,%,$@)")
+ @cmp -s $@ $(patsubst %-timestamp,%,$@) || cp $@ $(patsubst %-timestamp,%,$@)
+
+$(obj)/generated-helpers.h: $(obj)/generated-helpers.h-timestamp
+$(obj)/generated-helpers.h-timestamp: $(SRC_PATH)/trace-events $(BUILD_DIR)/config-host.mak
+ $(call quiet-command,$(TRACETOOL) \
+ --format=tcg-helper-h \
+ --backend=$(TRACE_BACKENDS) \
+ < $< > $@," GEN $(patsubst %-timestamp,%,$@)")
+ @cmp -s $@ $(patsubst %-timestamp,%,$@) || cp $@ $(patsubst %-timestamp,%,$@)
+
+$(obj)/generated-helpers.c: $(obj)/generated-helpers.c-timestamp
+$(obj)/generated-helpers.c-timestamp: $(SRC_PATH)/trace-events $(BUILD_DIR)/config-host.mak
+ $(call quiet-command,$(TRACETOOL) \
+ --format=tcg-helper-c \
+ --backend=$(TRACE_BACKENDS) \
+ < $< > $@," GEN $(patsubst %-timestamp,%,$@)")
+ @cmp -s $@ $(patsubst %-timestamp,%,$@) || cp $@ $(patsubst %-timestamp,%,$@)
+
+$(obj)/generated-helpers.o: $(obj)/generated-helpers.c
+
+target-obj-y += generated-helpers.o
+
+
+$(obj)/generated-tcg-tracers.h: $(obj)/generated-tcg-tracers.h-timestamp
+$(obj)/generated-tcg-tracers.h-timestamp: $(SRC_PATH)/trace-events $(BUILD_DIR)/config-host.mak
+ $(call quiet-command,$(TRACETOOL) \
+ --format=tcg-h \
+ --backend=$(TRACE_BACKENDS) \
+ < $< > $@," GEN $(patsubst %-timestamp,%,$@)")
+ @cmp -s $@ $(patsubst %-timestamp,%,$@) || cp $@ $(patsubst %-timestamp,%,$@)
+
+
######################################################################
# Backend code
diff --git a/translate-all.c b/translate-all.c
index 8f7e11b0a5..2e0265ae27 100644
--- a/translate-all.c
+++ b/translate-all.c
@@ -33,6 +33,7 @@
#include "qemu-common.h"
#define NO_CPU_IO_DEFS
#include "cpu.h"
+#include "trace.h"
#include "disas/disas.h"
#include "tcg.h"
#if defined(CONFIG_USER_ONLY)
@@ -158,6 +159,8 @@ int cpu_gen_code(CPUArchState *env, TranslationBlock *tb, int *gen_code_size_ptr
gen_intermediate_code(env, tb);
+ trace_translate_block(tb, tb->pc, tb->tc_ptr);
+
/* generate machine code */
gen_code_buf = tb->tc_ptr;
tb->tb_next_offset[0] = 0xffff;
diff --git a/util/oslib-posix.c b/util/oslib-posix.c
index cdbfb2e270..016a047cfc 100644
--- a/util/oslib-posix.c
+++ b/util/oslib-posix.c
@@ -94,7 +94,7 @@ void *qemu_oom_check(void *ptr)
return ptr;
}
-void *qemu_memalign(size_t alignment, size_t size)
+void *qemu_try_memalign(size_t alignment, size_t size)
{
void *ptr;
@@ -106,19 +106,23 @@ void *qemu_memalign(size_t alignment, size_t size)
int ret;
ret = posix_memalign(&ptr, alignment, size);
if (ret != 0) {
- fprintf(stderr, "Failed to allocate %zu B: %s\n",
- size, strerror(ret));
- abort();
+ errno = ret;
+ ptr = NULL;
}
#elif defined(CONFIG_BSD)
- ptr = qemu_oom_check(valloc(size));
+ ptr = valloc(size);
#else
- ptr = qemu_oom_check(memalign(alignment, size));
+ ptr = memalign(alignment, size);
#endif
trace_qemu_memalign(alignment, size, ptr);
return ptr;
}
+void *qemu_memalign(size_t alignment, size_t size)
+{
+ return qemu_oom_check(qemu_try_memalign(alignment, size));
+}
+
/* alloc shared memory pages */
void *qemu_anon_ram_alloc(size_t size)
{
diff --git a/util/oslib-win32.c b/util/oslib-win32.c
index 507cedd84d..a3eab4acba 100644
--- a/util/oslib-win32.c
+++ b/util/oslib-win32.c
@@ -50,18 +50,23 @@ void *qemu_oom_check(void *ptr)
return ptr;
}
-void *qemu_memalign(size_t alignment, size_t size)
+void *qemu_try_memalign(size_t alignment, size_t size)
{
void *ptr;
if (!size) {
abort();
}
- ptr = qemu_oom_check(VirtualAlloc(NULL, size, MEM_COMMIT, PAGE_READWRITE));
+ ptr = VirtualAlloc(NULL, size, MEM_COMMIT, PAGE_READWRITE);
trace_qemu_memalign(alignment, size, ptr);
return ptr;
}
+void *qemu_memalign(size_t alignment, size_t size)
+{
+ return qemu_oom_check(qemu_try_memalign(alignment, size));
+}
+
void *qemu_anon_ram_alloc(size_t size)
{
void *ptr;