From 9117b47717ad208b12786ce88eacb013f9b3dd1c Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Mon, 19 Aug 2013 10:38:01 +0200 Subject: qcow2: Change default for new images to compat=1.1 By the time that qemu 1.7 will be released, enough time will have passed since qemu 1.1, which is the first version to understand version 3 images, that changing the default shouldn't hurt many people any more and the benefits of using the new format outweigh the pain. qemu-iotests already runs with compat=1.1 by default. Signed-off-by: Kevin Wolf Reviewed-by: Eric Blake --- block/qcow2.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/block/qcow2.c b/block/qcow2.c index 78097e5173..5e5f4132d9 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -1429,7 +1429,9 @@ static int qcow2_create(const char *filename, QEMUOptionParameter *options) return -EINVAL; } } else if (!strcmp(options->name, BLOCK_OPT_COMPAT_LEVEL)) { - if (!options->value.s || !strcmp(options->value.s, "0.10")) { + if (!options->value.s) { + /* keep the default */ + } else if (!strcmp(options->value.s, "0.10")) { version = 2; } else if (!strcmp(options->value.s, "1.1")) { version = 3; -- cgit v1.2.1 From 09da4a72926e2d0af0e5f0cb967ab0dd345311f4 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Mon, 15 Apr 2013 10:59:42 +0200 Subject: block: Remove redundant assertion The failing condition is checked immediately before the assertion, so keeping the assertion is kind of redundant. Signed-off-by: Kevin Wolf --- block.c | 1 - 1 file changed, 1 deletion(-) diff --git a/block.c b/block.c index a387c1ad68..26639e8b70 100644 --- a/block.c +++ b/block.c @@ -743,7 +743,6 @@ static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file, ret = -EINVAL; goto free_and_fail; } - assert(file != NULL); bs->file = file; ret = drv->bdrv_open(bs, options, open_flags); } -- cgit v1.2.1 From 015370301fd90ea5d17522eba00ae2797569ce8b Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Tue, 2 Jul 2013 12:18:18 +0200 Subject: qapi-types.py: Split off generate_struct_fields() Signed-off-by: Kevin Wolf Reviewed-by: Eric Blake --- scripts/qapi-types.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/scripts/qapi-types.py b/scripts/qapi-types.py index 5ee46ea1b3..86de9800ea 100644 --- a/scripts/qapi-types.py +++ b/scripts/qapi-types.py @@ -57,12 +57,8 @@ typedef struct %(name)sList ''', name=name) -def generate_struct(structname, fieldname, members): - ret = mcgen(''' -struct %(name)s -{ -''', - name=structname) +def generate_struct_fields(members): + ret = '' for argname, argentry, optional, structured in parse_args(members): if optional: @@ -80,6 +76,17 @@ struct %(name)s ''', c_type=c_type(argentry), c_name=c_var(argname)) + return ret + +def generate_struct(structname, fieldname, members): + ret = mcgen(''' +struct %(name)s +{ +''', + name=structname) + + ret += generate_struct_fields(members) + if len(fieldname): fieldname = " " + fieldname ret += mcgen(''' -- cgit v1.2.1 From c0447d870b25cd95af2630ab3d376321bd6e820a Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Wed, 28 Aug 2013 09:50:40 +0200 Subject: Revert "block: Disable driver-specific options for 1.6" This reverts commit 8afaefb8919dc8746a57c450a758717c516c7b0a. Signed-off-by: Kevin Wolf --- blockdev.c | 143 ----------------------------------------------- tests/qemu-iotests/group | 2 +- 2 files changed, 1 insertion(+), 144 deletions(-) diff --git a/blockdev.c b/blockdev.c index 121520ecbc..e70e16e4de 100644 --- a/blockdev.c +++ b/blockdev.c @@ -46,7 +46,6 @@ static QTAILQ_HEAD(drivelist, DriveInfo) drives = QTAILQ_HEAD_INITIALIZER(drives); extern QemuOptsList qemu_common_drive_opts; -extern QemuOptsList qemu_old_drive_opts; static const char *const if_name[IF_COUNT] = { [IF_NONE] = "none", @@ -755,26 +754,6 @@ DriveInfo *drive_init(QemuOpts *all_opts, BlockInterfaceType block_default_type) { const char *value; - /* - * Check that only old options are used by copying into a QemuOpts with - * stricter checks. Going through a QDict seems to be the easiest way to - * achieve this... - */ - QemuOpts* check_opts; - QDict *qdict; - Error *local_err = NULL; - - qdict = qemu_opts_to_qdict(all_opts, NULL); - check_opts = qemu_opts_from_qdict(&qemu_old_drive_opts, qdict, &local_err); - QDECREF(qdict); - - if (error_is_set(&local_err)) { - qerror_report_err(local_err); - error_free(local_err); - return NULL; - } - qemu_opts_del(check_opts); - /* Change legacy command line options into QMP ones */ qemu_opt_rename(all_opts, "iops", "throttling.iops-total"); qemu_opt_rename(all_opts, "iops_rd", "throttling.iops-read"); @@ -2001,128 +1980,6 @@ QemuOptsList qemu_common_drive_opts = { }, }; -QemuOptsList qemu_old_drive_opts = { - .name = "drive", - .head = QTAILQ_HEAD_INITIALIZER(qemu_old_drive_opts.head), - .desc = { - { - .name = "bus", - .type = QEMU_OPT_NUMBER, - .help = "bus number", - },{ - .name = "unit", - .type = QEMU_OPT_NUMBER, - .help = "unit number (i.e. lun for scsi)", - },{ - .name = "if", - .type = QEMU_OPT_STRING, - .help = "interface (ide, scsi, sd, mtd, floppy, pflash, virtio)", - },{ - .name = "index", - .type = QEMU_OPT_NUMBER, - .help = "index number", - },{ - .name = "cyls", - .type = QEMU_OPT_NUMBER, - .help = "number of cylinders (ide disk geometry)", - },{ - .name = "heads", - .type = QEMU_OPT_NUMBER, - .help = "number of heads (ide disk geometry)", - },{ - .name = "secs", - .type = QEMU_OPT_NUMBER, - .help = "number of sectors (ide disk geometry)", - },{ - .name = "trans", - .type = QEMU_OPT_STRING, - .help = "chs translation (auto, lba. none)", - },{ - .name = "media", - .type = QEMU_OPT_STRING, - .help = "media type (disk, cdrom)", - },{ - .name = "snapshot", - .type = QEMU_OPT_BOOL, - .help = "enable/disable snapshot mode", - },{ - .name = "file", - .type = QEMU_OPT_STRING, - .help = "disk image", - },{ - .name = "discard", - .type = QEMU_OPT_STRING, - .help = "discard operation (ignore/off, unmap/on)", - },{ - .name = "cache", - .type = QEMU_OPT_STRING, - .help = "host cache usage (none, writeback, writethrough, " - "directsync, unsafe)", - },{ - .name = "aio", - .type = QEMU_OPT_STRING, - .help = "host AIO implementation (threads, native)", - },{ - .name = "format", - .type = QEMU_OPT_STRING, - .help = "disk format (raw, qcow2, ...)", - },{ - .name = "serial", - .type = QEMU_OPT_STRING, - .help = "disk serial number", - },{ - .name = "rerror", - .type = QEMU_OPT_STRING, - .help = "read error action", - },{ - .name = "werror", - .type = QEMU_OPT_STRING, - .help = "write error action", - },{ - .name = "addr", - .type = QEMU_OPT_STRING, - .help = "pci address (virtio only)", - },{ - .name = "readonly", - .type = QEMU_OPT_BOOL, - .help = "open drive file as read-only", - },{ - .name = "iops", - .type = QEMU_OPT_NUMBER, - .help = "limit total I/O operations per second", - },{ - .name = "iops_rd", - .type = QEMU_OPT_NUMBER, - .help = "limit read operations per second", - },{ - .name = "iops_wr", - .type = QEMU_OPT_NUMBER, - .help = "limit write operations per second", - },{ - .name = "bps", - .type = QEMU_OPT_NUMBER, - .help = "limit total bytes per second", - },{ - .name = "bps_rd", - .type = QEMU_OPT_NUMBER, - .help = "limit read bytes per second", - },{ - .name = "bps_wr", - .type = QEMU_OPT_NUMBER, - .help = "limit write bytes per second", - },{ - .name = "copy-on-read", - .type = QEMU_OPT_BOOL, - .help = "copy read data from backing file into image file", - },{ - .name = "boot", - .type = QEMU_OPT_BOOL, - .help = "(deprecated, ignored)", - }, - { /* end of list */ } - }, -}; - QemuOptsList qemu_drive_opts = { .name = "drive", .head = QTAILQ_HEAD_INITIALIZER(qemu_drive_opts.head), diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group index 43c05d6f5c..93ace2e9b0 100644 --- a/tests/qemu-iotests/group +++ b/tests/qemu-iotests/group @@ -57,7 +57,7 @@ 048 img auto quick 049 rw auto 050 rw auto backing quick -#051 rw auto +051 rw auto 052 rw auto backing 053 rw auto 054 rw auto -- cgit v1.2.1 From cccc30b4ad5d9835f5525d94210c8de26f4f5f94 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Wed, 28 Aug 2013 16:12:20 +0200 Subject: qemu-iotests: Update reference output for 051 Signed-off-by: Kevin Wolf --- tests/qemu-iotests/051.out | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/qemu-iotests/051.out b/tests/qemu-iotests/051.out index 5582ed3655..86e989cc6a 100644 --- a/tests/qemu-iotests/051.out +++ b/tests/qemu-iotests/051.out @@ -85,7 +85,6 @@ QEMU_PROG: -drive if=virtio: Device 'virtio-blk-pci' could not be initialized Testing: -drive if=scsi QEMU X.Y.Z monitor - type 'help' for more information (qemu) QEMU_PROG: -drive if=scsi: Device needs media, but drive is empty -QEMU_PROG: -drive if=scsi: Device initialization failed. QEMU_PROG: Device initialization failed. QEMU_PROG: Initialization of device lsi53c895a failed -- cgit v1.2.1 From 127c84e1a52f11bf418cc2d3bf804da5091a190a Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Fri, 23 Aug 2013 17:35:45 +0100 Subject: block/qcow2.h: Avoid "1LL << 63" (shifts into sign bit) The expression "1LL << 63" tries to shift the 1 into the sign bit of a 'long long', which provokes a clang sanitizer warning: runtime error: left shift of 1 by 63 places cannot be represented in type 'long long' Use "1ULL << 63" as the definition of QCOW_OFLAG_COPIED instead to avoid this. For consistency, we also update the other QCOW_OFLAG definitions to use the ULL suffix rather than LL, though only the shift by 63 is undefined behaviour. Signed-off-by: Peter Maydell Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- block/qcow2.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/block/qcow2.h b/block/qcow2.h index dba9771419..365a17e4e8 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -40,11 +40,11 @@ #define QCOW_MAX_CRYPT_CLUSTERS 32 /* indicate that the refcount of the referenced cluster is exactly one. */ -#define QCOW_OFLAG_COPIED (1LL << 63) +#define QCOW_OFLAG_COPIED (1ULL << 63) /* indicate that the cluster is compressed (they never have the copied flag) */ -#define QCOW_OFLAG_COMPRESSED (1LL << 62) +#define QCOW_OFLAG_COMPRESSED (1ULL << 62) /* The cluster reads as all zeros */ -#define QCOW_OFLAG_ZERO (1LL << 0) +#define QCOW_OFLAG_ZERO (1ULL << 0) #define REFCOUNT_SHIFT 1 /* refcount size is 2 bytes */ -- cgit v1.2.1 From e1c66c6d82fe5988d66531febc27ef8480c44c8a Mon Sep 17 00:00:00 2001 From: Laszlo Ersek Date: Wed, 21 Aug 2013 12:41:17 +0200 Subject: add skeleton for BSD licensed "raw" BlockDriver On 08/05/13 15:03, Paolo Bonzini wrote: > > > ----- Original Message ----- >> From: "Laszlo Ersek" >> To: "Paolo Bonzini" >> Sent: Monday, August 5, 2013 2:43:46 PM >> Subject: Re: [PATCH 1/2] raw: add license header >> >> On 08/02/13 00:27, Paolo Bonzini wrote: >>> On 08/01/2013 10:13 AM, Christoph Hellwig wrote: >>>> On Wed, Jul 31, 2013 at 08:19:51AM +0200, Paolo Bonzini wrote: >>>>> Most of the block layer is under the BSD license, thus it is >>>>> reasonable to license block/raw.c the same way. CCed people should >>>>> ACK by replying with a Signed-off-by line. >>>> >>>> The coded was intended to be GPLv2. >>> >>> Laszlo, would you be willing to do clean-room reverse engineering? >>> >>> (No rants, please. :)) >> >> What's the scope exactly? > > It's quite small, it's a file full of forwarders like > > static void raw_foo(BlockDriverState *bs) > { > return bdrv_foo(bs->file); > } > > It's 170 lines of code, all as boring as this. I only picked you > because I'm quite certain you have never seen the file (and the answer > confirmed it). > > Basically: > > 1) BlockDriver is a struct in which these function members are > interesting: > > .bdrv_reopen_prepare > .bdrv_co_readv > .bdrv_co_writev > .bdrv_co_is_allocated > .bdrv_co_write_zeroes > .bdrv_co_discard > .bdrv_getlength > .bdrv_get_info > .bdrv_truncate > .bdrv_is_inserted > .bdrv_media_changed > .bdrv_eject > .bdrv_lock_medium > .bdrv_ioctl > .bdrv_aio_ioctl > .bdrv_has_zero_init > > They should be implemented as simple forwarders (see above). > There are 16 functions listed here, you can easily see how this > already accounts for 100+ SLOC roughly... > > The implementations of bdrv_co_readv and bdrv_co_writev should also > call BLKDBG_EVENT on bs->file too, before forwarding to bs->file. The > events to be generated are BLKDBG_READ_AIO and BLKDBG_WRITE_AIO. > > 2) This is also a simple forwarder function: > > .bdrv_create > > but there is no BlockDriverState argument so the forwarded-to function > does not have a bs->file argument either. The forwarded-to function > is bdrv_create_file. > > 3) These members are special > > .format_name is the string "raw" > .bdrv_open raw_open should set bs->sg to bs->file->sg and return 0 > .bdrv_close raw_close should do nothing > .bdrv_probe raw_probe should just return 1. > > 4) There is another member, .create_options, which is an array of > QEMUOptionParameter structs, terminated by an all-zero item. The only > option you need is for the virtual disk size. You will find something > to copy from in other block drivers, for example block/qcow2.c. > > 5) Formats are registered with bdrv_register (takes a BlockDriver*). > You also need to pass the caller of bdrv_register to block_init. > > 6) I'm not sure how to organize the patch series, so I'll leave this to > your creativity. I guess in this case move/copy detection of git should > be disabled. I would definitely include this spec in the commit > message as a proof of clean-room reverse engineering. > > 7) Remember a BSD header like the one in block.c. > > Paolo This patch implements the email up to the paragraph ending with "100+ SLOC roughly". The skeleton is generated from the list there, with a simple shell loop using "sed" and the raw_foo() template. The BSD license block is copied (and reflowed) from "util/qemu-progress.c". Signed-off-by: Laszlo Ersek Signed-off-by: Kevin Wolf --- block/raw_bsd.c | 108 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 108 insertions(+) create mode 100644 block/raw_bsd.c diff --git a/block/raw_bsd.c b/block/raw_bsd.c new file mode 100644 index 0000000000..5c17d538e5 --- /dev/null +++ b/block/raw_bsd.c @@ -0,0 +1,108 @@ +/* BlockDriver implementation for "raw" + * + * Copyright (C) 2013, Red Hat, Inc. + * + * Author: + * Laszlo Ersek + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "block/block_int.h" + +static TYPE raw_reopen_prepare(BlockDriverState *bs) +{ + return bdrv_reopen_prepare(bs->file); +} + +static TYPE raw_co_readv(BlockDriverState *bs) +{ + return bdrv_co_readv(bs->file); +} + +static TYPE raw_co_writev(BlockDriverState *bs) +{ + return bdrv_co_writev(bs->file); +} + +static TYPE raw_co_is_allocated(BlockDriverState *bs) +{ + return bdrv_co_is_allocated(bs->file); +} + +static TYPE raw_co_write_zeroes(BlockDriverState *bs) +{ + return bdrv_co_write_zeroes(bs->file); +} + +static TYPE raw_co_discard(BlockDriverState *bs) +{ + return bdrv_co_discard(bs->file); +} + +static TYPE raw_getlength(BlockDriverState *bs) +{ + return bdrv_getlength(bs->file); +} + +static TYPE raw_get_info(BlockDriverState *bs) +{ + return bdrv_get_info(bs->file); +} + +static TYPE raw_truncate(BlockDriverState *bs) +{ + return bdrv_truncate(bs->file); +} + +static TYPE raw_is_inserted(BlockDriverState *bs) +{ + return bdrv_is_inserted(bs->file); +} + +static TYPE raw_media_changed(BlockDriverState *bs) +{ + return bdrv_media_changed(bs->file); +} + +static TYPE raw_eject(BlockDriverState *bs) +{ + return bdrv_eject(bs->file); +} + +static TYPE raw_lock_medium(BlockDriverState *bs) +{ + return bdrv_lock_medium(bs->file); +} + +static TYPE raw_ioctl(BlockDriverState *bs) +{ + return bdrv_ioctl(bs->file); +} + +static TYPE raw_aio_ioctl(BlockDriverState *bs) +{ + return bdrv_aio_ioctl(bs->file); +} + +static TYPE raw_has_zero_init(BlockDriverState *bs) +{ + return bdrv_has_zero_init(bs->file); +} + -- cgit v1.2.1 From 9eaafd90d14b6049cc1d0e0b6c712459d447363c Mon Sep 17 00:00:00 2001 From: Laszlo Ersek Date: Wed, 21 Aug 2013 12:41:18 +0200 Subject: raw_bsd: emit debug events in bdrv_co_readv() and bdrv_co_writev() On 08/05/13 15:03, Paolo Bonzini wrote: > > [...] > > 1) BlockDriver is a struct in which these function members are > interesting: > > .bdrv_reopen_prepare > .bdrv_co_readv > .bdrv_co_writev > .bdrv_co_is_allocated > .bdrv_co_write_zeroes > .bdrv_co_discard > .bdrv_getlength > .bdrv_get_info > .bdrv_truncate > .bdrv_is_inserted > .bdrv_media_changed > .bdrv_eject > .bdrv_lock_medium > .bdrv_ioctl > .bdrv_aio_ioctl > .bdrv_has_zero_init > > They should be implemented as simple forwarders (see above). There are > 16 functions listed here, you can easily see how this already accounts > for 100+ SLOC roughly... > > The implementations of bdrv_co_readv and bdrv_co_writev should also call > BLKDBG_EVENT on bs->file too, before forwarding to bs->file. The events > to be generated are BLKDBG_READ_AIO and BLKDBG_WRITE_AIO. Signed-off-by: Laszlo Ersek Signed-off-by: Kevin Wolf --- block/raw_bsd.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/block/raw_bsd.c b/block/raw_bsd.c index 5c17d538e5..19091a3ba2 100644 --- a/block/raw_bsd.c +++ b/block/raw_bsd.c @@ -33,11 +33,13 @@ static TYPE raw_reopen_prepare(BlockDriverState *bs) static TYPE raw_co_readv(BlockDriverState *bs) { + BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO); return bdrv_co_readv(bs->file); } static TYPE raw_co_writev(BlockDriverState *bs) { + BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO); return bdrv_co_writev(bs->file); } -- cgit v1.2.1 From 1565262c370195f1d7781d98f78fa002ab16b385 Mon Sep 17 00:00:00 2001 From: Laszlo Ersek Date: Wed, 21 Aug 2013 12:41:19 +0200 Subject: raw_bsd: add raw_create() On 08/05/13 15:03, Paolo Bonzini wrote: > > [...] > > 2) This is also a simple forwarder function: > > .bdrv_create > > but there is no BlockDriverState argument so the forwarded-to function > does not have a bs->file argument either. The forwarded-to function is > bdrv_create_file. Signed-off-by: Laszlo Ersek Signed-off-by: Kevin Wolf --- block/raw_bsd.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/block/raw_bsd.c b/block/raw_bsd.c index 19091a3ba2..5bcbe71c00 100644 --- a/block/raw_bsd.c +++ b/block/raw_bsd.c @@ -108,3 +108,7 @@ static TYPE raw_has_zero_init(BlockDriverState *bs) return bdrv_has_zero_init(bs->file); } +static TYPE raw_create(void) +{ + return bdrv_create_file(); +} -- cgit v1.2.1 From 01dd96d8f4f253ee29b4e0362a73c5f43bdc0b18 Mon Sep 17 00:00:00 2001 From: Laszlo Ersek Date: Wed, 21 Aug 2013 12:41:20 +0200 Subject: raw_bsd: introduce "special members" On 08/05/13 15:03, Paolo Bonzini wrote: > > [...] > > 3) These members are special > > .format_name is the string "raw" > .bdrv_open raw_open should set bs->sg to bs->file->sg and return 0 > .bdrv_close raw_close should do nothing > .bdrv_probe raw_probe should just return 1. v1->v2: On 08/20/13 10:11, Kevin Wolf wrote: > Am 16.08.2013 um 16:15 hat Laszlo Ersek geschrieben: >> +static int raw_probe(void) >> +{ >> + return 1; >> +} > > Maybe add a comment here like "smallest possible positive score so that > raw is used if and only if no other block driver works". Signed-off-by: Laszlo Ersek Signed-off-by: Kevin Wolf --- block/raw_bsd.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/block/raw_bsd.c b/block/raw_bsd.c index 5bcbe71c00..b1d7209b7c 100644 --- a/block/raw_bsd.c +++ b/block/raw_bsd.c @@ -112,3 +112,26 @@ static TYPE raw_create(void) { return bdrv_create_file(); } + +static const char *raw_format_name(void) +{ + return "raw"; +} + +static int raw_open(BlockDriverState *bs) +{ + bs->sg = bs->file->sg; + return 0; +} + +static void raw_close(void) +{ +} + +static int raw_probe(void) +{ + /* smallest possible positive score so that raw is used if and only if no + * other block driver works + */ + return 1; +} -- cgit v1.2.1 From ff369a483df89cc6ca510b0c3ab9afe9cf0bdfdc Mon Sep 17 00:00:00 2001 From: Laszlo Ersek Date: Wed, 21 Aug 2013 12:41:21 +0200 Subject: raw_bsd: add raw_create_options On 08/05/13 15:03, Paolo Bonzini wrote: > > [...] > > 4) There is another member, .create_options, which is an array of > QEMUOptionParameter structs, terminated by an all-zero item. The only > option you need is for the virtual disk size. You will find something > to copy from in other block drivers, for example block/qcow2.c. Code taken and adapted from "block/qcow2.c", as suggested. The code being copied/modified is blamed on commit 20d97356c9df6d68fbd37d6334fdb7063f24eab6 Author: Blue Swirl Date: Fri Apr 23 20:19:47 2010 +0000 Fix OpenBSD build and commit 7c80ab3f21f0b1342f23057d4345ae266c7348d9 Author: Jes Sorensen Date: Fri Dec 17 16:02:39 2010 +0100 block/qcow2.c: rename qcow_ functions to qcow2_ Signed-off-by: Laszlo Ersek Signed-off-by: Kevin Wolf --- block/raw_bsd.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/block/raw_bsd.c b/block/raw_bsd.c index b1d7209b7c..b70245d123 100644 --- a/block/raw_bsd.c +++ b/block/raw_bsd.c @@ -1,6 +1,7 @@ /* BlockDriver implementation for "raw" * - * Copyright (C) 2013, Red Hat, Inc. + * Copyright (C) 2010, 2013, Red Hat, Inc. + * Copyright (C) 2010, Blue Swirl * * Author: * Laszlo Ersek @@ -25,6 +26,16 @@ */ #include "block/block_int.h" +#include "qemu/option.h" + +static const QEMUOptionParameter raw_create_options[] = { + { + .name = BLOCK_OPT_SIZE, + .type = OPT_SIZE, + .help = "Virtual disk size" + }, + { 0 } +}; static TYPE raw_reopen_prepare(BlockDriverState *bs) { -- cgit v1.2.1 From 775d6afd5cd66f2154a81f5de9d3dd7297a35072 Mon Sep 17 00:00:00 2001 From: Laszlo Ersek Date: Wed, 21 Aug 2013 12:41:22 +0200 Subject: raw_bsd: register bdrv_raw On 08/05/13 15:03, Paolo Bonzini wrote: > > [...] > > 5) Formats are registered with bdrv_register (takes a BlockDriver*). You > also need to pass the caller of bdrv_register to block_init. Fill in the BlockDriver structure with the raw_*() functions that have been added to "block/raw_bsd.c", in the order the fields are defined in "include/block/block_int.h". I needed more explanation / naming examples for registering the driver than what Paolo gave me, so I copied / adapted from "block/qcow2.c". The parts I took as basis for modification are blamed on commit 5efa9d5a8b18841c9c62208a494d7f519238979a Author: Anthony Liguori Date: Sat May 9 17:03:42 2009 -0500 Convert block infrastructure to use new module init functionality commit 20d97356c9df6d68fbd37d6334fdb7063f24eab6 Author: Blue Swirl Date: Fri Apr 23 20:19:47 2010 +0000 Fix OpenBSD build Signed-off-by: Laszlo Ersek Signed-off-by: Kevin Wolf --- block/raw_bsd.c | 38 +++++++++++++++++++++++++++++++++----- 1 file changed, 33 insertions(+), 5 deletions(-) diff --git a/block/raw_bsd.c b/block/raw_bsd.c index b70245d123..2dc1921739 100644 --- a/block/raw_bsd.c +++ b/block/raw_bsd.c @@ -2,6 +2,7 @@ * * Copyright (C) 2010, 2013, Red Hat, Inc. * Copyright (C) 2010, Blue Swirl + * Copyright (C) 2009, Anthony Liguori * * Author: * Laszlo Ersek @@ -124,11 +125,6 @@ static TYPE raw_create(void) return bdrv_create_file(); } -static const char *raw_format_name(void) -{ - return "raw"; -} - static int raw_open(BlockDriverState *bs) { bs->sg = bs->file->sg; @@ -146,3 +142,35 @@ static int raw_probe(void) */ return 1; } + +static BlockDriver bdrv_raw = { + .format_name = "raw", + .bdrv_probe = &raw_probe, + .bdrv_reopen_prepare = &raw_reopen_prepare, + .bdrv_open = &raw_open, + .bdrv_close = &raw_close, + .bdrv_create = &raw_create, + .bdrv_co_readv = &raw_co_readv, + .bdrv_co_writev = &raw_co_writev, + .bdrv_co_write_zeroes = &raw_co_write_zeroes, + .bdrv_co_discard = &raw_co_discard, + .bdrv_co_is_allocated = &raw_co_is_allocated, + .bdrv_truncate = &raw_truncate, + .bdrv_getlength = &raw_getlength, + .bdrv_get_info = &raw_get_info, + .bdrv_is_inserted = &raw_is_inserted, + .bdrv_media_changed = &raw_media_changed, + .bdrv_eject = &raw_eject, + .bdrv_lock_medium = &raw_lock_medium, + .bdrv_ioctl = &raw_ioctl, + .bdrv_aio_ioctl = &raw_aio_ioctl, + .create_options = &raw_create_options[0], + .bdrv_has_zero_init = &raw_has_zero_init +}; + +static void bdrv_raw_init(void) +{ + bdrv_register(&bdrv_raw); +} + +block_init(bdrv_raw_init); -- cgit v1.2.1 From 7a6d3fc594d1166ec78a6b74ba76753078de0de5 Mon Sep 17 00:00:00 2001 From: Laszlo Ersek Date: Wed, 21 Aug 2013 12:41:23 +0200 Subject: switch raw block driver from "raw.o" to "raw_bsd.o" "Incoming" function prototypes and "outgoing" function calls must match reality. Implemented using the "struct BlockDriver" definition in "include/block/block_int.h", and gcc errors & warnings. v1->v2: On 08/20/13 09:51, Kevin Wolf wrote: > Am 18.08.2013 um 16:29 hat Paolo Bonzini geschrieben: >> Il 16/08/2013 16:15, Laszlo Ersek ha scritto: >>> +static int raw_reopen_prepare(BDRVReopenState *reopen_state, >>> + BlockReopenQueue *queue, Error **errp) >>> { >>> - return bdrv_reopen_prepare(bs->file); >>> + BDRVReopenState tmp = *reopen_state; >>> + >>> + tmp.bs = tmp.bs->file; >>> + return bdrv_reopen_prepare(&tmp, queue, errp); >>> } >> >> This should just return zero, my fault. > > Which is because bdrv_reopen_queue() already queues bs->file for reopen. > The simple return 0; implementation is shared by all other format drivers > that support reopening images. Signed-off-by: Laszlo Ersek Signed-off-by: Kevin Wolf --- block/Makefile.objs | 2 +- block/raw_bsd.c | 78 ++++++++++++++++++++++++++++++----------------------- 2 files changed, 45 insertions(+), 35 deletions(-) diff --git a/block/Makefile.objs b/block/Makefile.objs index 4cf9aa499f..3bb85b535c 100644 --- a/block/Makefile.objs +++ b/block/Makefile.objs @@ -1,4 +1,4 @@ -block-obj-y += raw.o cow.o qcow.o vdi.o vmdk.o cloop.o dmg.o bochs.o vpc.o vvfat.o +block-obj-y += raw_bsd.o cow.o qcow.o vdi.o vmdk.o cloop.o dmg.o bochs.o vpc.o vvfat.o block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-cache.o block-obj-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o block-obj-y += qed-check.o diff --git a/block/raw_bsd.c b/block/raw_bsd.c index 2dc1921739..ab2b0fd7d2 100644 --- a/block/raw_bsd.c +++ b/block/raw_bsd.c @@ -29,7 +29,7 @@ #include "block/block_int.h" #include "qemu/option.h" -static const QEMUOptionParameter raw_create_options[] = { +static QEMUOptionParameter raw_create_options[] = { { .name = BLOCK_OPT_SIZE, .type = OPT_SIZE, @@ -38,104 +38,114 @@ static const QEMUOptionParameter raw_create_options[] = { { 0 } }; -static TYPE raw_reopen_prepare(BlockDriverState *bs) +static int raw_reopen_prepare(BDRVReopenState *reopen_state, + BlockReopenQueue *queue, Error **errp) { - return bdrv_reopen_prepare(bs->file); + return 0; } -static TYPE raw_co_readv(BlockDriverState *bs) +static int coroutine_fn raw_co_readv(BlockDriverState *bs, int64_t sector_num, + int nb_sectors, QEMUIOVector *qiov) { BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO); - return bdrv_co_readv(bs->file); + return bdrv_co_readv(bs->file, sector_num, nb_sectors, qiov); } -static TYPE raw_co_writev(BlockDriverState *bs) +static int coroutine_fn raw_co_writev(BlockDriverState *bs, int64_t sector_num, + int nb_sectors, QEMUIOVector *qiov) { BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO); - return bdrv_co_writev(bs->file); + return bdrv_co_writev(bs->file, sector_num, nb_sectors, qiov); } -static TYPE raw_co_is_allocated(BlockDriverState *bs) +static int coroutine_fn raw_co_is_allocated(BlockDriverState *bs, + int64_t sector_num, int nb_sectors, + int *pnum) { - return bdrv_co_is_allocated(bs->file); + return bdrv_co_is_allocated(bs->file, sector_num, nb_sectors, pnum); } -static TYPE raw_co_write_zeroes(BlockDriverState *bs) +static int coroutine_fn raw_co_write_zeroes(BlockDriverState *bs, + int64_t sector_num, int nb_sectors) { - return bdrv_co_write_zeroes(bs->file); + return bdrv_co_write_zeroes(bs->file, sector_num, nb_sectors); } -static TYPE raw_co_discard(BlockDriverState *bs) +static int coroutine_fn raw_co_discard(BlockDriverState *bs, + int64_t sector_num, int nb_sectors) { - return bdrv_co_discard(bs->file); + return bdrv_co_discard(bs->file, sector_num, nb_sectors); } -static TYPE raw_getlength(BlockDriverState *bs) +static int64_t raw_getlength(BlockDriverState *bs) { return bdrv_getlength(bs->file); } -static TYPE raw_get_info(BlockDriverState *bs) +static int raw_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) { - return bdrv_get_info(bs->file); + return bdrv_get_info(bs->file, bdi); } -static TYPE raw_truncate(BlockDriverState *bs) +static int raw_truncate(BlockDriverState *bs, int64_t offset) { - return bdrv_truncate(bs->file); + return bdrv_truncate(bs->file, offset); } -static TYPE raw_is_inserted(BlockDriverState *bs) +static int raw_is_inserted(BlockDriverState *bs) { return bdrv_is_inserted(bs->file); } -static TYPE raw_media_changed(BlockDriverState *bs) +static int raw_media_changed(BlockDriverState *bs) { return bdrv_media_changed(bs->file); } -static TYPE raw_eject(BlockDriverState *bs) +static void raw_eject(BlockDriverState *bs, bool eject_flag) { - return bdrv_eject(bs->file); + bdrv_eject(bs->file, eject_flag); } -static TYPE raw_lock_medium(BlockDriverState *bs) +static void raw_lock_medium(BlockDriverState *bs, bool locked) { - return bdrv_lock_medium(bs->file); + bdrv_lock_medium(bs->file, locked); } -static TYPE raw_ioctl(BlockDriverState *bs) +static int raw_ioctl(BlockDriverState *bs, unsigned long int req, void *buf) { - return bdrv_ioctl(bs->file); + return bdrv_ioctl(bs->file, req, buf); } -static TYPE raw_aio_ioctl(BlockDriverState *bs) +static BlockDriverAIOCB *raw_aio_ioctl(BlockDriverState *bs, + unsigned long int req, void *buf, + BlockDriverCompletionFunc *cb, + void *opaque) { - return bdrv_aio_ioctl(bs->file); + return bdrv_aio_ioctl(bs->file, req, buf, cb, opaque); } -static TYPE raw_has_zero_init(BlockDriverState *bs) +static int raw_has_zero_init(BlockDriverState *bs) { return bdrv_has_zero_init(bs->file); } -static TYPE raw_create(void) +static int raw_create(const char *filename, QEMUOptionParameter *options) { - return bdrv_create_file(); + return bdrv_create_file(filename, options); } -static int raw_open(BlockDriverState *bs) +static int raw_open(BlockDriverState *bs, QDict *options, int flags) { bs->sg = bs->file->sg; return 0; } -static void raw_close(void) +static void raw_close(BlockDriverState *bs) { } -static int raw_probe(void) +static int raw_probe(const uint8_t *buf, int buf_size, const char *filename) { /* smallest possible positive score so that raw is used if and only if no * other block driver works -- cgit v1.2.1 From e5b1d99f5528315dc77aab369ae060d7cbad1e2a Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Wed, 28 Aug 2013 15:15:52 +0200 Subject: block: Remove old raw driver This is unused code now. Signed-off-by: Kevin Wolf --- block/raw.c | 192 ------------------------------------------------------------ 1 file changed, 192 deletions(-) delete mode 100644 block/raw.c diff --git a/block/raw.c b/block/raw.c deleted file mode 100644 index 47518253fe..0000000000 --- a/block/raw.c +++ /dev/null @@ -1,192 +0,0 @@ -/* - * Block driver for RAW format - * - * Copyright (c) 2006 Fabrice Bellard - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ - -#include "qemu-common.h" -#include "block/block_int.h" -#include "qemu/module.h" - -static int raw_open(BlockDriverState *bs, QDict *options, int flags) -{ - bs->sg = bs->file->sg; - return 0; -} - -/* We have nothing to do for raw reopen, stubs just return - * success */ -static int raw_reopen_prepare(BDRVReopenState *state, - BlockReopenQueue *queue, Error **errp) -{ - return 0; -} - -static int coroutine_fn raw_co_readv(BlockDriverState *bs, int64_t sector_num, - int nb_sectors, QEMUIOVector *qiov) -{ - BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO); - return bdrv_co_readv(bs->file, sector_num, nb_sectors, qiov); -} - -static int coroutine_fn raw_co_writev(BlockDriverState *bs, int64_t sector_num, - int nb_sectors, QEMUIOVector *qiov) -{ - BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO); - return bdrv_co_writev(bs->file, sector_num, nb_sectors, qiov); -} - -static void raw_close(BlockDriverState *bs) -{ -} - -static int coroutine_fn raw_co_is_allocated(BlockDriverState *bs, - int64_t sector_num, - int nb_sectors, int *pnum) -{ - return bdrv_co_is_allocated(bs->file, sector_num, nb_sectors, pnum); -} - -static int coroutine_fn raw_co_write_zeroes(BlockDriverState *bs, - int64_t sector_num, - int nb_sectors) -{ - return bdrv_co_write_zeroes(bs->file, sector_num, nb_sectors); -} - -static int64_t raw_getlength(BlockDriverState *bs) -{ - return bdrv_getlength(bs->file); -} - -static int raw_truncate(BlockDriverState *bs, int64_t offset) -{ - return bdrv_truncate(bs->file, offset); -} - -static int raw_probe(const uint8_t *buf, int buf_size, const char *filename) -{ - return 1; /* everything can be opened as raw image */ -} - -static int coroutine_fn raw_co_discard(BlockDriverState *bs, - int64_t sector_num, int nb_sectors) -{ - return bdrv_co_discard(bs->file, sector_num, nb_sectors); -} - -static int raw_is_inserted(BlockDriverState *bs) -{ - return bdrv_is_inserted(bs->file); -} - -static int raw_media_changed(BlockDriverState *bs) -{ - return bdrv_media_changed(bs->file); -} - -static void raw_eject(BlockDriverState *bs, bool eject_flag) -{ - bdrv_eject(bs->file, eject_flag); -} - -static void raw_lock_medium(BlockDriverState *bs, bool locked) -{ - bdrv_lock_medium(bs->file, locked); -} - -static int raw_ioctl(BlockDriverState *bs, unsigned long int req, void *buf) -{ - return bdrv_ioctl(bs->file, req, buf); -} - -static BlockDriverAIOCB *raw_aio_ioctl(BlockDriverState *bs, - unsigned long int req, void *buf, - BlockDriverCompletionFunc *cb, void *opaque) -{ - return bdrv_aio_ioctl(bs->file, req, buf, cb, opaque); -} - -static int raw_create(const char *filename, QEMUOptionParameter *options) -{ - return bdrv_create_file(filename, options); -} - -static QEMUOptionParameter raw_create_options[] = { - { - .name = BLOCK_OPT_SIZE, - .type = OPT_SIZE, - .help = "Virtual disk size" - }, - { NULL } -}; - -static int raw_has_zero_init(BlockDriverState *bs) -{ - return bdrv_has_zero_init(bs->file); -} - -static int raw_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) -{ - return bdrv_get_info(bs->file, bdi); -} - -static BlockDriver bdrv_raw = { - .format_name = "raw", - - /* It's really 0, but we need to make g_malloc() happy */ - .instance_size = 1, - - .bdrv_open = raw_open, - .bdrv_close = raw_close, - - .bdrv_reopen_prepare = raw_reopen_prepare, - - .bdrv_co_readv = raw_co_readv, - .bdrv_co_writev = raw_co_writev, - .bdrv_co_is_allocated = raw_co_is_allocated, - .bdrv_co_write_zeroes = raw_co_write_zeroes, - .bdrv_co_discard = raw_co_discard, - - .bdrv_probe = raw_probe, - .bdrv_getlength = raw_getlength, - .bdrv_get_info = raw_get_info, - .bdrv_truncate = raw_truncate, - - .bdrv_is_inserted = raw_is_inserted, - .bdrv_media_changed = raw_media_changed, - .bdrv_eject = raw_eject, - .bdrv_lock_medium = raw_lock_medium, - - .bdrv_ioctl = raw_ioctl, - .bdrv_aio_ioctl = raw_aio_ioctl, - - .bdrv_create = raw_create, - .create_options = raw_create_options, - .bdrv_has_zero_init = raw_has_zero_init, -}; - -static void bdrv_raw_init(void) -{ - bdrv_register(&bdrv_raw); -} - -block_init(bdrv_raw_init); -- cgit v1.2.1 From 9faa574f7d07109e2256c0b4b63e8711d650f2d8 Mon Sep 17 00:00:00 2001 From: Bharata B Rao Date: Tue, 27 Aug 2013 13:45:41 +0530 Subject: gluster: Abort on AIO completion failure Currently if gluster AIO callback thread fails to notify the QEMU thread about AIO completion, we try graceful recovery by marking the disk drive as inaccessible. This error recovery code is race-prone as found by Asias and Stefan. However as found out by Paolo, this kind of error is impossible and hence simplify the code that handles this error recovery. Signed-off-by: Bharata B Rao Reviewed-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- block/gluster.c | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/block/gluster.c b/block/gluster.c index 46f36f8cd6..dbb03f4de5 100644 --- a/block/gluster.c +++ b/block/gluster.c @@ -427,20 +427,9 @@ static void gluster_finish_aiocb(struct glfs_fd *fd, ssize_t ret, void *arg) /* * Gluster AIO callback thread failed to notify the waiting * QEMU thread about IO completion. - * - * Complete this IO request and make the disk inaccessible for - * subsequent reads and writes. */ - error_report("Gluster failed to notify QEMU about IO completion"); - - qemu_mutex_lock_iothread(); /* We are in gluster thread context */ - acb->common.cb(acb->common.opaque, -EIO); - qemu_aio_release(acb); - close(s->fds[GLUSTER_FD_READ]); - close(s->fds[GLUSTER_FD_WRITE]); - qemu_aio_set_fd_handler(s->fds[GLUSTER_FD_READ], NULL, NULL, NULL); - bs->drv = NULL; /* Make the disk inaccessible */ - qemu_mutex_unlock_iothread(); + error_report("Gluster AIO completion failed: %s", strerror(errno)); + abort(); } } -- cgit v1.2.1 From d4ca092a423f1f853a99357bab01a168bb57d625 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Thu, 29 Aug 2013 11:15:44 +0200 Subject: option: Add assigned flag to QEMUOptionParameter Adds an "assigned" flag to QEMUOptionParameter which is cleared at the beginning of parse_option_parameters and set on (successful) set_option_parameter and set_option_parameter_int. Signed-off-by: Max Reitz Signed-off-by: Kevin Wolf --- include/qemu/option.h | 1 + util/qemu-option.c | 9 +++++++++ 2 files changed, 10 insertions(+) diff --git a/include/qemu/option.h b/include/qemu/option.h index 7a58e477d9..63db4ccb9a 100644 --- a/include/qemu/option.h +++ b/include/qemu/option.h @@ -46,6 +46,7 @@ typedef struct QEMUOptionParameter { char* s; } value; const char *help; + bool assigned; } QEMUOptionParameter; diff --git a/util/qemu-option.c b/util/qemu-option.c index 4ebdc4c33c..e0844a966c 100644 --- a/util/qemu-option.c +++ b/util/qemu-option.c @@ -275,6 +275,8 @@ int set_option_parameter(QEMUOptionParameter *list, const char *name, return -1; } + list->assigned = true; + return 0; } @@ -306,6 +308,8 @@ int set_option_parameter_int(QEMUOptionParameter *list, const char *name, return -1; } + list->assigned = true; + return 0; } @@ -397,6 +401,7 @@ QEMUOptionParameter *parse_option_parameters(const char *param, char value[256]; char *param_delim, *value_delim; char next_delim; + int i; if (list == NULL) { return NULL; @@ -406,6 +411,10 @@ QEMUOptionParameter *parse_option_parameters(const char *param, dest = allocated = append_option_parameters(NULL, list); } + for (i = 0; dest[i].name; i++) { + dest[i].assigned = false; + } + while (*param) { // Find parameter name and value in the string -- cgit v1.2.1 From 8b81a7b6ba8686f35f9cb0acdd54004d63206f03 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Fri, 30 Aug 2013 10:40:14 +0200 Subject: qcow2-refcount: Snapshot update for zero clusters Account for all cluster types in qcow2_update_snapshot_refcounts; this prevents this function from updating the refcount of unallocated zero clusters which effectively led to wrong adjustments of the refcount of cluster 0 (the main qcow2 header). This in turn resulted in images with (unallocated) zero clusters having a cluster 0 refcount greater than one after creating a snapshot. Signed-off-by: Max Reitz Signed-off-by: Kevin Wolf --- block/qcow2-refcount.c | 52 +++++++++++++++++++++++++++++++++----------------- 1 file changed, 35 insertions(+), 17 deletions(-) diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c index 1244693f39..a61224a414 100644 --- a/block/qcow2-refcount.c +++ b/block/qcow2-refcount.c @@ -861,11 +861,14 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs, } for(j = 0; j < s->l2_size; j++) { + uint64_t cluster_index; + offset = be64_to_cpu(l2_table[j]); - if (offset != 0) { - old_offset = offset; - offset &= ~QCOW_OFLAG_COPIED; - if (offset & QCOW_OFLAG_COMPRESSED) { + old_offset = offset; + offset &= ~QCOW_OFLAG_COPIED; + + switch (qcow2_get_cluster_type(offset)) { + case QCOW2_CLUSTER_COMPRESSED: nb_csectors = ((offset >> s->csize_shift) & s->csize_mask) + 1; if (addend != 0) { @@ -880,8 +883,16 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs, } /* compressed clusters are never modified */ refcount = 2; - } else { - uint64_t cluster_index = (offset & L2E_OFFSET_MASK) >> s->cluster_bits; + break; + + case QCOW2_CLUSTER_NORMAL: + case QCOW2_CLUSTER_ZERO: + cluster_index = (offset & L2E_OFFSET_MASK) >> s->cluster_bits; + if (!cluster_index) { + /* unallocated */ + refcount = 0; + break; + } if (addend != 0) { refcount = update_cluster_refcount(bs, cluster_index, addend, QCOW2_DISCARD_SNAPSHOT); @@ -893,19 +904,26 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs, ret = refcount; goto fail; } - } + break; - if (refcount == 1) { - offset |= QCOW_OFLAG_COPIED; - } - if (offset != old_offset) { - if (addend > 0) { - qcow2_cache_set_dependency(bs, s->l2_table_cache, - s->refcount_block_cache); - } - l2_table[j] = cpu_to_be64(offset); - qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table); + case QCOW2_CLUSTER_UNALLOCATED: + refcount = 0; + break; + + default: + abort(); + } + + if (refcount == 1) { + offset |= QCOW_OFLAG_COPIED; + } + if (offset != old_offset) { + if (addend > 0) { + qcow2_cache_set_dependency(bs, s->l2_table_cache, + s->refcount_block_cache); } + l2_table[j] = cpu_to_be64(offset); + qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table); } } -- cgit v1.2.1 From 449df7063815489a0b091bcb3afa9ae80ae3acbf Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Fri, 30 Aug 2013 10:40:15 +0200 Subject: qemu-iotests: Snapshotting zero clusters This test creates an image with unallocated zero clusters, then creates a snapshot. Afterwards, there should be neither any errors nor leaks. Signed-off-by: Max Reitz Signed-off-by: Kevin Wolf --- tests/qemu-iotests/062 | 64 ++++++++++++++++++++++++++++++++++++++++++++++ tests/qemu-iotests/062.out | 9 +++++++ tests/qemu-iotests/group | 1 + 3 files changed, 74 insertions(+) create mode 100755 tests/qemu-iotests/062 create mode 100644 tests/qemu-iotests/062.out diff --git a/tests/qemu-iotests/062 b/tests/qemu-iotests/062 new file mode 100755 index 0000000000..0511246dee --- /dev/null +++ b/tests/qemu-iotests/062 @@ -0,0 +1,64 @@ +#!/bin/bash +# +# Test case for snapshotting images with unallocated zero clusters in +# qcow2 +# +# Copyright (C) 2013 Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + +# creator +owner=mreitz@redhat.com + +seq=`basename $0` +echo "QA output created by $seq" + +here=`pwd` +tmp=/tmp/$$ +status=1 # failure is the default! + +_cleanup() +{ + _cleanup_test_img +} +trap "_cleanup; exit \$status" 0 1 2 3 15 + +# get standard environment, filters and checks +. ./common.rc +. ./common.filter + +# This tests qocw2-specific low-level functionality +_supported_fmt qcow2 +_supported_proto generic +_supported_os Linux + +IMGOPTS="compat=1.1" +IMG_SIZE=64M + +echo +echo "=== Testing snapshotting an image with zero clusters ===" +echo +_make_test_img $IMG_SIZE +# Write some zero clusters +$QEMU_IO -c "write -z 0 256k" "$TEST_IMG" | _filter_qemu_io +# Create a snapshot +$QEMU_IMG snapshot -c foo "$TEST_IMG" +# Check the image (there shouldn't be any errors or leaks) +_check_test_img + +# success, all done +echo "*** done" +rm -f $seq.full +status=0 diff --git a/tests/qemu-iotests/062.out b/tests/qemu-iotests/062.out new file mode 100644 index 0000000000..442d761959 --- /dev/null +++ b/tests/qemu-iotests/062.out @@ -0,0 +1,9 @@ +QA output created by 062 + +=== Testing snapshotting an image with zero clusters === + +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 +wrote 262144/262144 bytes at offset 0 +256 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +No errors were found on the image. +*** done diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group index 93ace2e9b0..fb137922d0 100644 --- a/tests/qemu-iotests/group +++ b/tests/qemu-iotests/group @@ -64,3 +64,4 @@ 055 rw auto 056 rw auto backing 059 rw auto +062 rw auto -- cgit v1.2.1 From 69c98726537627e708abb8fcb33e3a2b10e40bf1 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Fri, 30 Aug 2013 14:34:24 +0200 Subject: qcow2: Add corrupt bit This adds an incompatible bit indicating corruption to qcow2. Any image with this bit set may not be written to unless for repairing (and subsequently clearing the bit if the repair has been successful). Signed-off-by: Max Reitz Signed-off-by: Kevin Wolf --- block/qcow2.c | 47 ++++++++++++++++++++++++++++++++++++++++++++++ block/qcow2.h | 7 ++++++- docs/specs/qcow2.txt | 7 ++++++- tests/qemu-iotests/031.out | 12 ++++++------ tests/qemu-iotests/036.out | 2 +- 5 files changed, 66 insertions(+), 9 deletions(-) diff --git a/block/qcow2.c b/block/qcow2.c index 5e5f4132d9..fe915681b2 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -272,6 +272,37 @@ static int qcow2_mark_clean(BlockDriverState *bs) return 0; } +/* + * Marks the image as corrupt. + */ +int qcow2_mark_corrupt(BlockDriverState *bs) +{ + BDRVQcowState *s = bs->opaque; + + s->incompatible_features |= QCOW2_INCOMPAT_CORRUPT; + return qcow2_update_header(bs); +} + +/* + * Marks the image as consistent, i.e., unsets the corrupt bit, and flushes + * before if necessary. + */ +int qcow2_mark_consistent(BlockDriverState *bs) +{ + BDRVQcowState *s = bs->opaque; + + if (s->incompatible_features & QCOW2_INCOMPAT_CORRUPT) { + int ret = bdrv_flush(bs); + if (ret < 0) { + return ret; + } + + s->incompatible_features &= ~QCOW2_INCOMPAT_CORRUPT; + return qcow2_update_header(bs); + } + return 0; +} + static int qcow2_check(BlockDriverState *bs, BdrvCheckResult *result, BdrvCheckMode fix) { @@ -402,6 +433,17 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags) goto fail; } + if (s->incompatible_features & QCOW2_INCOMPAT_CORRUPT) { + /* Corrupt images may not be written to unless they are being repaired + */ + if ((flags & BDRV_O_RDWR) && !(flags & BDRV_O_CHECK)) { + error_report("qcow2: Image is corrupt; cannot be opened " + "read/write."); + ret = -EACCES; + goto fail; + } + } + /* Check support for various header values */ if (header.refcount_order != 4) { report_unsupported(bs, "%d bit reference counts", @@ -1129,6 +1171,11 @@ int qcow2_update_header(BlockDriverState *bs) .bit = QCOW2_INCOMPAT_DIRTY_BITNR, .name = "dirty bit", }, + { + .type = QCOW2_FEAT_TYPE_INCOMPATIBLE, + .bit = QCOW2_INCOMPAT_CORRUPT_BITNR, + .name = "corrupt bit", + }, { .type = QCOW2_FEAT_TYPE_COMPATIBLE, .bit = QCOW2_COMPAT_LAZY_REFCOUNTS_BITNR, diff --git a/block/qcow2.h b/block/qcow2.h index 365a17e4e8..32ecb338ab 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -119,9 +119,12 @@ enum { /* Incompatible feature bits */ enum { QCOW2_INCOMPAT_DIRTY_BITNR = 0, + QCOW2_INCOMPAT_CORRUPT_BITNR = 1, QCOW2_INCOMPAT_DIRTY = 1 << QCOW2_INCOMPAT_DIRTY_BITNR, + QCOW2_INCOMPAT_CORRUPT = 1 << QCOW2_INCOMPAT_CORRUPT_BITNR, - QCOW2_INCOMPAT_MASK = QCOW2_INCOMPAT_DIRTY, + QCOW2_INCOMPAT_MASK = QCOW2_INCOMPAT_DIRTY + | QCOW2_INCOMPAT_CORRUPT, }; /* Compatible feature bits */ @@ -361,6 +364,8 @@ int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov, int64_t sector_num, int nb_sectors); int qcow2_mark_dirty(BlockDriverState *bs); +int qcow2_mark_corrupt(BlockDriverState *bs); +int qcow2_mark_consistent(BlockDriverState *bs); int qcow2_update_header(BlockDriverState *bs); /* qcow2-refcount.c functions */ diff --git a/docs/specs/qcow2.txt b/docs/specs/qcow2.txt index 36a559d886..33eca360cc 100644 --- a/docs/specs/qcow2.txt +++ b/docs/specs/qcow2.txt @@ -80,7 +80,12 @@ in the description of a field. tables to repair refcounts before accessing the image. - Bits 1-63: Reserved (set to 0) + Bit 1: Corrupt bit. If this bit is set then any data + structure may be corrupt and the image must not + be written to (unless for regaining + consistency). + + Bits 2-63: Reserved (set to 0) 80 - 87: compatible_features Bitmask of compatible features. An implementation can diff --git a/tests/qemu-iotests/031.out b/tests/qemu-iotests/031.out index 796c993df2..a94334478e 100644 --- a/tests/qemu-iotests/031.out +++ b/tests/qemu-iotests/031.out @@ -54,7 +54,7 @@ header_length 72 Header extension: magic 0x6803f857 -length 96 +length 144 data Header extension: @@ -68,7 +68,7 @@ No errors were found on the image. magic 0x514649fb version 2 -backing_file_offset 0xf8 +backing_file_offset 0x128 backing_file_size 0x17 cluster_bits 16 size 67108864 @@ -92,7 +92,7 @@ data 'host_device' Header extension: magic 0x6803f857 -length 96 +length 144 data Header extension: @@ -155,7 +155,7 @@ header_length 104 Header extension: magic 0x6803f857 -length 96 +length 144 data Header extension: @@ -169,7 +169,7 @@ No errors were found on the image. magic 0x514649fb version 3 -backing_file_offset 0x118 +backing_file_offset 0x148 backing_file_size 0x17 cluster_bits 16 size 67108864 @@ -193,7 +193,7 @@ data 'host_device' Header extension: magic 0x6803f857 -length 96 +length 144 data Header extension: diff --git a/tests/qemu-iotests/036.out b/tests/qemu-iotests/036.out index 063ca22d66..55a3e6e441 100644 --- a/tests/qemu-iotests/036.out +++ b/tests/qemu-iotests/036.out @@ -46,7 +46,7 @@ header_length 104 Header extension: magic 0x6803f857 -length 96 +length 144 data *** done -- cgit v1.2.1 From a40f1c2add4d5f58d594f810fe36cabcf32bc4b0 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Fri, 30 Aug 2013 14:34:25 +0200 Subject: qcow2: Metadata overlap checks Two new functions are added; the first one checks a given range in the image file for overlaps with metadata (main header, L1 tables, L2 tables, refcount table and blocks). The second one should be used immediately before writing to the image file as it calls the first function and, upon collision, marks the image as corrupt and makes the BDS unusable, thereby preventing further access. Both functions take a bitmask argument specifying the structures which should be checked for overlaps, making it possible to also check metadata writes against colliding with other structures. Signed-off-by: Max Reitz Signed-off-by: Kevin Wolf --- block/qcow2-refcount.c | 172 ++++++++++++++++++++++++++++++++++++++++++++++ block/qcow2.h | 39 +++++++++++ include/monitor/monitor.h | 1 + monitor.c | 1 + 4 files changed, 213 insertions(+) diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c index a61224a414..8ee2f13604 100644 --- a/block/qcow2-refcount.c +++ b/block/qcow2-refcount.c @@ -25,6 +25,8 @@ #include "qemu-common.h" #include "block/block_int.h" #include "block/qcow2.h" +#include "qemu/range.h" +#include "qapi/qmp/types.h" static int64_t alloc_clusters_noref(BlockDriverState *bs, int64_t size); static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs, @@ -1390,3 +1392,173 @@ fail: return ret; } +#define overlaps_with(ofs, sz) \ + ranges_overlap(offset, size, ofs, sz) + +/* + * Checks if the given offset into the image file is actually free to use by + * looking for overlaps with important metadata sections (L1/L2 tables etc.), + * i.e. a sanity check without relying on the refcount tables. + * + * The chk parameter specifies exactly what checks to perform (being a bitmask + * of QCow2MetadataOverlap values). + * + * Returns: + * - 0 if writing to this offset will not affect the mentioned metadata + * - a positive QCow2MetadataOverlap value indicating one overlapping section + * - a negative value (-errno) indicating an error while performing a check, + * e.g. when bdrv_read failed on QCOW2_OL_INACTIVE_L2 + */ +int qcow2_check_metadata_overlap(BlockDriverState *bs, int chk, int64_t offset, + int64_t size) +{ + BDRVQcowState *s = bs->opaque; + int i, j; + + if (!size) { + return 0; + } + + if (chk & QCOW2_OL_MAIN_HEADER) { + if (offset < s->cluster_size) { + return QCOW2_OL_MAIN_HEADER; + } + } + + /* align range to test to cluster boundaries */ + size = align_offset(offset_into_cluster(s, offset) + size, s->cluster_size); + offset = start_of_cluster(s, offset); + + if ((chk & QCOW2_OL_ACTIVE_L1) && s->l1_size) { + if (overlaps_with(s->l1_table_offset, s->l1_size * sizeof(uint64_t))) { + return QCOW2_OL_ACTIVE_L1; + } + } + + if ((chk & QCOW2_OL_REFCOUNT_TABLE) && s->refcount_table_size) { + if (overlaps_with(s->refcount_table_offset, + s->refcount_table_size * sizeof(uint64_t))) { + return QCOW2_OL_REFCOUNT_TABLE; + } + } + + if ((chk & QCOW2_OL_SNAPSHOT_TABLE) && s->snapshots_size) { + if (overlaps_with(s->snapshots_offset, s->snapshots_size)) { + return QCOW2_OL_SNAPSHOT_TABLE; + } + } + + if ((chk & QCOW2_OL_INACTIVE_L1) && s->snapshots) { + for (i = 0; i < s->nb_snapshots; i++) { + if (s->snapshots[i].l1_size && + overlaps_with(s->snapshots[i].l1_table_offset, + s->snapshots[i].l1_size * sizeof(uint64_t))) { + return QCOW2_OL_INACTIVE_L1; + } + } + } + + if ((chk & QCOW2_OL_ACTIVE_L2) && s->l1_table) { + for (i = 0; i < s->l1_size; i++) { + if ((s->l1_table[i] & L1E_OFFSET_MASK) && + overlaps_with(s->l1_table[i] & L1E_OFFSET_MASK, + s->cluster_size)) { + return QCOW2_OL_ACTIVE_L2; + } + } + } + + if ((chk & QCOW2_OL_REFCOUNT_BLOCK) && s->refcount_table) { + for (i = 0; i < s->refcount_table_size; i++) { + if ((s->refcount_table[i] & REFT_OFFSET_MASK) && + overlaps_with(s->refcount_table[i] & REFT_OFFSET_MASK, + s->cluster_size)) { + return QCOW2_OL_REFCOUNT_BLOCK; + } + } + } + + if ((chk & QCOW2_OL_INACTIVE_L2) && s->snapshots) { + for (i = 0; i < s->nb_snapshots; i++) { + uint64_t l1_ofs = s->snapshots[i].l1_table_offset; + uint32_t l1_sz = s->snapshots[i].l1_size; + uint64_t *l1 = g_malloc(l1_sz * sizeof(uint64_t)); + int ret; + + ret = bdrv_read(bs->file, l1_ofs / BDRV_SECTOR_SIZE, (uint8_t *)l1, + l1_sz * sizeof(uint64_t) / BDRV_SECTOR_SIZE); + + if (ret < 0) { + g_free(l1); + return ret; + } + + for (j = 0; j < l1_sz; j++) { + if ((l1[j] & L1E_OFFSET_MASK) && + overlaps_with(l1[j] & L1E_OFFSET_MASK, s->cluster_size)) { + g_free(l1); + return QCOW2_OL_INACTIVE_L2; + } + } + + g_free(l1); + } + } + + return 0; +} + +static const char *metadata_ol_names[] = { + [QCOW2_OL_MAIN_HEADER_BITNR] = "qcow2_header", + [QCOW2_OL_ACTIVE_L1_BITNR] = "active L1 table", + [QCOW2_OL_ACTIVE_L2_BITNR] = "active L2 table", + [QCOW2_OL_REFCOUNT_TABLE_BITNR] = "refcount table", + [QCOW2_OL_REFCOUNT_BLOCK_BITNR] = "refcount block", + [QCOW2_OL_SNAPSHOT_TABLE_BITNR] = "snapshot table", + [QCOW2_OL_INACTIVE_L1_BITNR] = "inactive L1 table", + [QCOW2_OL_INACTIVE_L2_BITNR] = "inactive L2 table", +}; + +/* + * First performs a check for metadata overlaps (through + * qcow2_check_metadata_overlap); if that fails with a negative value (error + * while performing a check), that value is returned. If an impending overlap + * is detected, the BDS will be made unusable, the qcow2 file marked corrupt + * and -EIO returned. + * + * Returns 0 if there were neither overlaps nor errors while checking for + * overlaps; or a negative value (-errno) on error. + */ +int qcow2_pre_write_overlap_check(BlockDriverState *bs, int chk, int64_t offset, + int64_t size) +{ + int ret = qcow2_check_metadata_overlap(bs, chk, offset, size); + + if (ret < 0) { + return ret; + } else if (ret > 0) { + int metadata_ol_bitnr = ffs(ret) - 1; + char *message; + QObject *data; + + assert(metadata_ol_bitnr < QCOW2_OL_MAX_BITNR); + + fprintf(stderr, "qcow2: Preventing invalid write on metadata (overlaps " + "with %s); image marked as corrupt.\n", + metadata_ol_names[metadata_ol_bitnr]); + message = g_strdup_printf("Prevented %s overwrite", + metadata_ol_names[metadata_ol_bitnr]); + data = qobject_from_jsonf("{ 'device': %s, 'msg': %s, 'offset': %" + PRId64 ", 'size': %" PRId64 " }", bs->device_name, message, + offset, size); + monitor_protocol_event(QEVENT_BLOCK_IMAGE_CORRUPTED, data); + g_free(message); + qobject_decref(data); + + qcow2_mark_corrupt(bs); + bs->drv = NULL; /* make BDS unusable */ + return -EIO; + } + + return 0; +} diff --git a/block/qcow2.h b/block/qcow2.h index 32ecb338ab..d4448c6350 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -289,6 +289,40 @@ enum { QCOW2_CLUSTER_ZERO }; +typedef enum QCow2MetadataOverlap { + QCOW2_OL_MAIN_HEADER_BITNR = 0, + QCOW2_OL_ACTIVE_L1_BITNR = 1, + QCOW2_OL_ACTIVE_L2_BITNR = 2, + QCOW2_OL_REFCOUNT_TABLE_BITNR = 3, + QCOW2_OL_REFCOUNT_BLOCK_BITNR = 4, + QCOW2_OL_SNAPSHOT_TABLE_BITNR = 5, + QCOW2_OL_INACTIVE_L1_BITNR = 6, + QCOW2_OL_INACTIVE_L2_BITNR = 7, + + QCOW2_OL_MAX_BITNR = 8, + + QCOW2_OL_NONE = 0, + QCOW2_OL_MAIN_HEADER = (1 << QCOW2_OL_MAIN_HEADER_BITNR), + QCOW2_OL_ACTIVE_L1 = (1 << QCOW2_OL_ACTIVE_L1_BITNR), + QCOW2_OL_ACTIVE_L2 = (1 << QCOW2_OL_ACTIVE_L2_BITNR), + QCOW2_OL_REFCOUNT_TABLE = (1 << QCOW2_OL_REFCOUNT_TABLE_BITNR), + QCOW2_OL_REFCOUNT_BLOCK = (1 << QCOW2_OL_REFCOUNT_BLOCK_BITNR), + QCOW2_OL_SNAPSHOT_TABLE = (1 << QCOW2_OL_SNAPSHOT_TABLE_BITNR), + QCOW2_OL_INACTIVE_L1 = (1 << QCOW2_OL_INACTIVE_L1_BITNR), + /* NOTE: Checking overlaps with inactive L2 tables will result in bdrv + * reads. */ + QCOW2_OL_INACTIVE_L2 = (1 << QCOW2_OL_INACTIVE_L2_BITNR), +} QCow2MetadataOverlap; + +/* Perform all overlap checks which don't require disk access */ +#define QCOW2_OL_CACHED \ + (QCOW2_OL_MAIN_HEADER | QCOW2_OL_ACTIVE_L1 | QCOW2_OL_ACTIVE_L2 | \ + QCOW2_OL_REFCOUNT_TABLE | QCOW2_OL_REFCOUNT_BLOCK | \ + QCOW2_OL_SNAPSHOT_TABLE | QCOW2_OL_INACTIVE_L1) + +/* The default checks to perform */ +#define QCOW2_OL_DEFAULT QCOW2_OL_CACHED + #define L1E_OFFSET_MASK 0x00ffffffffffff00ULL #define L2E_OFFSET_MASK 0x00ffffffffffff00ULL #define L2E_COMPRESSED_OFFSET_SIZE_MASK 0x3fffffffffffffffULL @@ -390,6 +424,11 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res, void qcow2_process_discards(BlockDriverState *bs, int ret); +int qcow2_check_metadata_overlap(BlockDriverState *bs, int chk, int64_t offset, + int64_t size); +int qcow2_pre_write_overlap_check(BlockDriverState *bs, int chk, int64_t offset, + int64_t size); + /* qcow2-cluster.c functions */ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size, bool exact_size); diff --git a/include/monitor/monitor.h b/include/monitor/monitor.h index 1942cc42fe..10fa0e390c 100644 --- a/include/monitor/monitor.h +++ b/include/monitor/monitor.h @@ -48,6 +48,7 @@ typedef enum MonitorEvent { QEVENT_BALLOON_CHANGE, QEVENT_SPICE_MIGRATE_COMPLETED, QEVENT_GUEST_PANICKED, + QEVENT_BLOCK_IMAGE_CORRUPTED, /* Add to 'monitor_event_names' array in monitor.c when * defining new events here */ diff --git a/monitor.c b/monitor.c index ee9744cfb6..2c542e1bca 100644 --- a/monitor.c +++ b/monitor.c @@ -504,6 +504,7 @@ static const char *monitor_event_names[] = { [QEVENT_BALLOON_CHANGE] = "BALLOON_CHANGE", [QEVENT_SPICE_MIGRATE_COMPLETED] = "SPICE_MIGRATE_COMPLETED", [QEVENT_GUEST_PANICKED] = "GUEST_PANICKED", + [QEVENT_BLOCK_IMAGE_CORRUPTED] = "BLOCK_IMAGE_CORRUPTED", }; QEMU_BUILD_BUG_ON(ARRAY_SIZE(monitor_event_names) != QEVENT_MAX) -- cgit v1.2.1 From cf93980e775b709ec8f33f55846e6dcf1c7a612c Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Fri, 30 Aug 2013 14:34:26 +0200 Subject: qcow2: Employ metadata overlap checks The pre-write overlap check function is now called before most of the qcow2 writes (aborting it on collision or other error). Signed-off-by: Max Reitz Signed-off-by: Kevin Wolf --- block/qcow2-cache.c | 17 +++++++++++++++++ block/qcow2-cluster.c | 21 +++++++++++++++++++++ block/qcow2-snapshot.c | 22 ++++++++++++++++++++++ block/qcow2.c | 26 ++++++++++++++++++++++++++ 4 files changed, 86 insertions(+) diff --git a/block/qcow2-cache.c b/block/qcow2-cache.c index 2f3114ecc2..7bcae09a69 100644 --- a/block/qcow2-cache.c +++ b/block/qcow2-cache.c @@ -114,6 +114,23 @@ static int qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i) return ret; } + if (c == s->refcount_block_cache) { + ret = qcow2_pre_write_overlap_check(bs, + QCOW2_OL_DEFAULT & ~QCOW2_OL_REFCOUNT_BLOCK, + c->entries[i].offset, s->cluster_size); + } else if (c == s->l2_table_cache) { + ret = qcow2_pre_write_overlap_check(bs, + QCOW2_OL_DEFAULT & ~QCOW2_OL_ACTIVE_L2, + c->entries[i].offset, s->cluster_size); + } else { + ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_DEFAULT, + c->entries[i].offset, s->cluster_size); + } + + if (ret < 0) { + return ret; + } + if (c == s->refcount_block_cache) { BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_UPDATE_PART); } else if (c == s->l2_table_cache) { diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index cca76d4fcd..7c248aaede 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -80,6 +80,14 @@ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size, goto fail; } + /* the L1 position has not yet been updated, so these clusters must + * indeed be completely free */ + ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_DEFAULT, + new_l1_table_offset, new_l1_size2); + if (ret < 0) { + goto fail; + } + BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_WRITE_TABLE); for(i = 0; i < s->l1_size; i++) new_l1_table[i] = cpu_to_be64(new_l1_table[i]); @@ -149,6 +157,13 @@ static int write_l1_entry(BlockDriverState *bs, int l1_index) buf[i] = cpu_to_be64(s->l1_table[l1_start_index + i]); } + ret = qcow2_pre_write_overlap_check(bs, + QCOW2_OL_DEFAULT & ~QCOW2_OL_ACTIVE_L1, + s->l1_table_offset + 8 * l1_start_index, sizeof(buf)); + if (ret < 0) { + return ret; + } + BLKDBG_EVENT(bs->file, BLKDBG_L1_UPDATE); ret = bdrv_pwrite_sync(bs->file, s->l1_table_offset + 8 * l1_start_index, buf, sizeof(buf)); @@ -368,6 +383,12 @@ static int coroutine_fn copy_sectors(BlockDriverState *bs, &s->aes_encrypt_key); } + ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_DEFAULT, + cluster_offset + n_start * BDRV_SECTOR_SIZE, n * BDRV_SECTOR_SIZE); + if (ret < 0) { + goto out; + } + BLKDBG_EVENT(bs->file, BLKDBG_COW_WRITE); ret = bdrv_co_writev(bs->file, (cluster_offset >> 9) + n_start, n, &qiov); if (ret < 0) { diff --git a/block/qcow2-snapshot.c b/block/qcow2-snapshot.c index 0caac9055f..e7e601301a 100644 --- a/block/qcow2-snapshot.c +++ b/block/qcow2-snapshot.c @@ -189,6 +189,15 @@ static int qcow2_write_snapshots(BlockDriverState *bs) return ret; } + /* The snapshot list position has not yet been updated, so these clusters + * must indeed be completely free */ + ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_DEFAULT, offset, + s->snapshots_size); + if (ret < 0) { + return ret; + } + + /* Write all snapshots to the new list */ for(i = 0; i < s->nb_snapshots; i++) { sn = s->snapshots + i; @@ -363,6 +372,12 @@ int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info) l1_table[i] = cpu_to_be64(s->l1_table[i]); } + ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_DEFAULT, + sn->l1_table_offset, s->l1_size * sizeof(uint64_t)); + if (ret < 0) { + goto fail; + } + ret = bdrv_pwrite(bs->file, sn->l1_table_offset, l1_table, s->l1_size * sizeof(uint64_t)); if (ret < 0) { @@ -475,6 +490,13 @@ int qcow2_snapshot_goto(BlockDriverState *bs, const char *snapshot_id) goto fail; } + ret = qcow2_pre_write_overlap_check(bs, + QCOW2_OL_DEFAULT & ~QCOW2_OL_ACTIVE_L1, + s->l1_table_offset, cur_l1_bytes); + if (ret < 0) { + goto fail; + } + ret = bdrv_pwrite_sync(bs->file, s->l1_table_offset, sn_l1_table, cur_l1_bytes); if (ret < 0) { diff --git a/block/qcow2.c b/block/qcow2.c index fe915681b2..05e002d856 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -624,6 +624,8 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags) qcow2_free_snapshots(bs); qcow2_refcount_close(bs); g_free(s->l1_table); + /* else pre-write overlap checks in cache_destroy may crash */ + s->l1_table = NULL; if (s->l2_table_cache) { qcow2_cache_destroy(bs, s->l2_table_cache); } @@ -923,6 +925,13 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs, cur_nr_sectors * 512); } + ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_DEFAULT, + cluster_offset + index_in_cluster * BDRV_SECTOR_SIZE, + cur_nr_sectors * BDRV_SECTOR_SIZE); + if (ret < 0) { + goto fail; + } + qemu_co_mutex_unlock(&s->lock); BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO); trace_qcow2_writev_data(qemu_coroutine_self(), @@ -989,6 +998,8 @@ static void qcow2_close(BlockDriverState *bs) { BDRVQcowState *s = bs->opaque; g_free(s->l1_table); + /* else pre-write overlap checks in cache_destroy may crash */ + s->l1_table = NULL; qcow2_cache_flush(bs, s->l2_table_cache); qcow2_cache_flush(bs, s->refcount_block_cache); @@ -1668,6 +1679,14 @@ static int qcow2_write_compressed(BlockDriverState *bs, int64_t sector_num, if (ret != Z_STREAM_END || out_len >= s->cluster_size) { /* could not compress: write normal cluster */ + + ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_DEFAULT, + sector_num * BDRV_SECTOR_SIZE, + s->cluster_sectors * BDRV_SECTOR_SIZE); + if (ret < 0) { + goto fail; + } + ret = bdrv_write(bs, sector_num, buf, s->cluster_sectors); if (ret < 0) { goto fail; @@ -1680,6 +1699,13 @@ static int qcow2_write_compressed(BlockDriverState *bs, int64_t sector_num, goto fail; } cluster_offset &= s->cluster_offset_mask; + + ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_DEFAULT, + cluster_offset, out_len); + if (ret < 0) { + goto fail; + } + BLKDBG_EVENT(bs->file, BLKDBG_WRITE_COMPRESSED); ret = bdrv_pwrite(bs->file, cluster_offset, out_buf, out_len); if (ret < 0) { -- cgit v1.2.1 From 4f6ed88c03c4026e31ce152ea760a0da839f0dda Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Fri, 30 Aug 2013 14:34:27 +0200 Subject: qcow2-refcount: Move OFLAG_COPIED checks Move the OFLAG_COPIED checks out of check_refcounts_l1 and check_refcounts_l2 and after the actual refcount checks/fixes (since the refcounts might actually change there). Signed-off-by: Max Reitz Signed-off-by: Kevin Wolf --- block/qcow2-refcount.c | 115 +++++++++++++++++++++++++++++++++++-------------- 1 file changed, 82 insertions(+), 33 deletions(-) diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c index 8ee2f13604..aa4b98d85a 100644 --- a/block/qcow2-refcount.c +++ b/block/qcow2-refcount.c @@ -1053,7 +1053,7 @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res, BDRVQcowState *s = bs->opaque; uint64_t *l2_table, l2_entry; uint64_t next_contiguous_offset = 0; - int i, l2_size, nb_csectors, refcount; + int i, l2_size, nb_csectors; /* Read L2 table from disk */ l2_size = s->l2_size * sizeof(uint64_t); @@ -1105,23 +1105,8 @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res, case QCOW2_CLUSTER_NORMAL: { - /* QCOW_OFLAG_COPIED must be set iff refcount == 1 */ uint64_t offset = l2_entry & L2E_OFFSET_MASK; - if (flags & CHECK_OFLAG_COPIED) { - refcount = get_refcount(bs, offset >> s->cluster_bits); - if (refcount < 0) { - fprintf(stderr, "Can't get refcount for offset %" - PRIx64 ": %s\n", l2_entry, strerror(-refcount)); - goto fail; - } - if ((refcount == 1) != ((l2_entry & QCOW_OFLAG_COPIED) != 0)) { - fprintf(stderr, "ERROR OFLAG_COPIED: offset=%" - PRIx64 " refcount=%d\n", l2_entry, refcount); - res->corruptions++; - } - } - if (flags & CHECK_FRAG_INFO) { res->bfi.allocated_clusters++; if (next_contiguous_offset && @@ -1178,7 +1163,7 @@ static int check_refcounts_l1(BlockDriverState *bs, { BDRVQcowState *s = bs->opaque; uint64_t *l1_table, l2_offset, l1_size2; - int i, refcount, ret; + int i, ret; l1_size2 = l1_size * sizeof(uint64_t); @@ -1202,22 +1187,6 @@ static int check_refcounts_l1(BlockDriverState *bs, for(i = 0; i < l1_size; i++) { l2_offset = l1_table[i]; if (l2_offset) { - /* QCOW_OFLAG_COPIED must be set iff refcount == 1 */ - if (flags & CHECK_OFLAG_COPIED) { - refcount = get_refcount(bs, (l2_offset & ~QCOW_OFLAG_COPIED) - >> s->cluster_bits); - if (refcount < 0) { - fprintf(stderr, "Can't get refcount for l2_offset %" - PRIx64 ": %s\n", l2_offset, strerror(-refcount)); - goto fail; - } - if ((refcount == 1) != ((l2_offset & QCOW_OFLAG_COPIED) != 0)) { - fprintf(stderr, "ERROR OFLAG_COPIED: l2_offset=%" PRIx64 - " refcount=%d\n", l2_offset, refcount); - res->corruptions++; - } - } - /* Mark L2 table as used */ l2_offset &= L1E_OFFSET_MASK; inc_refcounts(bs, res, refcount_table, refcount_table_size, @@ -1248,6 +1217,80 @@ fail: return -EIO; } +/* + * Checks the OFLAG_COPIED flag for all L1 and L2 entries. + * + * This function does not print an error message nor does it increment + * check_errors if get_refcount fails (this is because such an error will have + * been already detected and sufficiently signaled by the calling function + * (qcow2_check_refcounts) by the time this function is called). + */ +static int check_oflag_copied(BlockDriverState *bs, BdrvCheckResult *res) +{ + BDRVQcowState *s = bs->opaque; + uint64_t *l2_table = qemu_blockalign(bs, s->cluster_size); + int ret; + int refcount; + int i, j; + + for (i = 0; i < s->l1_size; i++) { + uint64_t l1_entry = s->l1_table[i]; + uint64_t l2_offset = l1_entry & L1E_OFFSET_MASK; + + if (!l2_offset) { + continue; + } + + refcount = get_refcount(bs, l2_offset >> s->cluster_bits); + if (refcount < 0) { + /* don't print message nor increment check_errors */ + continue; + } + if ((refcount == 1) != ((l1_entry & QCOW_OFLAG_COPIED) != 0)) { + fprintf(stderr, "ERROR OFLAG_COPIED L2 cluster: l1_index=%d " + "l1_entry=%" PRIx64 " refcount=%d\n", + i, l1_entry, refcount); + res->corruptions++; + } + + ret = bdrv_pread(bs->file, l2_offset, l2_table, + s->l2_size * sizeof(uint64_t)); + if (ret < 0) { + fprintf(stderr, "ERROR: Could not read L2 table: %s\n", + strerror(-ret)); + res->check_errors++; + goto fail; + } + + for (j = 0; j < s->l2_size; j++) { + uint64_t l2_entry = be64_to_cpu(l2_table[j]); + uint64_t data_offset = l2_entry & L2E_OFFSET_MASK; + int cluster_type = qcow2_get_cluster_type(l2_entry); + + if ((cluster_type == QCOW2_CLUSTER_NORMAL) || + ((cluster_type == QCOW2_CLUSTER_ZERO) && (data_offset != 0))) { + refcount = get_refcount(bs, data_offset >> s->cluster_bits); + if (refcount < 0) { + /* don't print message nor increment check_errors */ + continue; + } + if ((refcount == 1) != ((l2_entry & QCOW_OFLAG_COPIED) != 0)) { + fprintf(stderr, "ERROR OFLAG_COPIED data cluster: " + "l2_entry=%" PRIx64 " refcount=%d\n", + l2_entry, refcount); + res->corruptions++; + } + } + } + } + + ret = 0; + +fail: + qemu_vfree(l2_table); + return ret; +} + /* * Checks an image for refcount consistency. * @@ -1383,6 +1426,12 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res, } } + /* check OFLAG_COPIED */ + ret = check_oflag_copied(bs, res); + if (ret < 0) { + goto fail; + } + res->image_end_offset = (highest_cluster + 1) * s->cluster_size; ret = 0; -- cgit v1.2.1 From e23e400ec62a03dea58ddb38479b4f1ef86f556d Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Fri, 30 Aug 2013 14:34:28 +0200 Subject: qcow2-refcount: Repair OFLAG_COPIED errors Since the OFLAG_COPIED checks are now executed after the refcounts have been repaired (if repairing), it is safe to assume that they are correct but the OFLAG_COPIED flag may be not. Therefore, if its value differs from what it should be (considering the according refcount), that discrepancy can be repaired by correctly setting (or clearing that flag. Signed-off-by: Max Reitz Signed-off-by: Kevin Wolf --- block/qcow2-cluster.c | 4 ++-- block/qcow2-refcount.c | 58 ++++++++++++++++++++++++++++++++++++++++++++------ block/qcow2.h | 1 + 3 files changed, 55 insertions(+), 8 deletions(-) diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index 7c248aaede..2d5aa92962 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -145,7 +145,7 @@ static int l2_load(BlockDriverState *bs, uint64_t l2_offset, * and we really don't want bdrv_pread to perform a read-modify-write) */ #define L1_ENTRIES_PER_SECTOR (512 / 8) -static int write_l1_entry(BlockDriverState *bs, int l1_index) +int qcow2_write_l1_entry(BlockDriverState *bs, int l1_index) { BDRVQcowState *s = bs->opaque; uint64_t buf[L1_ENTRIES_PER_SECTOR]; @@ -254,7 +254,7 @@ static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table) /* update the L1 entry */ trace_qcow2_l2_allocate_write_l1(bs, l1_index); s->l1_table[l1_index] = l2_offset | QCOW_OFLAG_COPIED; - ret = write_l1_entry(bs, l1_index); + ret = qcow2_write_l1_entry(bs, l1_index); if (ret < 0) { goto fail; } diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c index aa4b98d85a..2276b6f7f5 100644 --- a/block/qcow2-refcount.c +++ b/block/qcow2-refcount.c @@ -1225,7 +1225,8 @@ fail: * been already detected and sufficiently signaled by the calling function * (qcow2_check_refcounts) by the time this function is called). */ -static int check_oflag_copied(BlockDriverState *bs, BdrvCheckResult *res) +static int check_oflag_copied(BlockDriverState *bs, BdrvCheckResult *res, + BdrvCheckMode fix) { BDRVQcowState *s = bs->opaque; uint64_t *l2_table = qemu_blockalign(bs, s->cluster_size); @@ -1236,6 +1237,7 @@ static int check_oflag_copied(BlockDriverState *bs, BdrvCheckResult *res) for (i = 0; i < s->l1_size; i++) { uint64_t l1_entry = s->l1_table[i]; uint64_t l2_offset = l1_entry & L1E_OFFSET_MASK; + bool l2_dirty = false; if (!l2_offset) { continue; @@ -1247,10 +1249,24 @@ static int check_oflag_copied(BlockDriverState *bs, BdrvCheckResult *res) continue; } if ((refcount == 1) != ((l1_entry & QCOW_OFLAG_COPIED) != 0)) { - fprintf(stderr, "ERROR OFLAG_COPIED L2 cluster: l1_index=%d " + fprintf(stderr, "%s OFLAG_COPIED L2 cluster: l1_index=%d " "l1_entry=%" PRIx64 " refcount=%d\n", + fix & BDRV_FIX_ERRORS ? "Repairing" : + "ERROR", i, l1_entry, refcount); - res->corruptions++; + if (fix & BDRV_FIX_ERRORS) { + s->l1_table[i] = refcount == 1 + ? l1_entry | QCOW_OFLAG_COPIED + : l1_entry & ~QCOW_OFLAG_COPIED; + ret = qcow2_write_l1_entry(bs, i); + if (ret < 0) { + res->check_errors++; + goto fail; + } + res->corruptions_fixed++; + } else { + res->corruptions++; + } } ret = bdrv_pread(bs->file, l2_offset, l2_table, @@ -1275,13 +1291,43 @@ static int check_oflag_copied(BlockDriverState *bs, BdrvCheckResult *res) continue; } if ((refcount == 1) != ((l2_entry & QCOW_OFLAG_COPIED) != 0)) { - fprintf(stderr, "ERROR OFLAG_COPIED data cluster: " + fprintf(stderr, "%s OFLAG_COPIED data cluster: " "l2_entry=%" PRIx64 " refcount=%d\n", + fix & BDRV_FIX_ERRORS ? "Repairing" : + "ERROR", l2_entry, refcount); - res->corruptions++; + if (fix & BDRV_FIX_ERRORS) { + l2_table[j] = cpu_to_be64(refcount == 1 + ? l2_entry | QCOW_OFLAG_COPIED + : l2_entry & ~QCOW_OFLAG_COPIED); + l2_dirty = true; + res->corruptions_fixed++; + } else { + res->corruptions++; + } } } } + + if (l2_dirty) { + ret = qcow2_pre_write_overlap_check(bs, + QCOW2_OL_DEFAULT & ~QCOW2_OL_ACTIVE_L2, l2_offset, + s->cluster_size); + if (ret < 0) { + fprintf(stderr, "ERROR: Could not write L2 table; metadata " + "overlap check failed: %s\n", strerror(-ret)); + res->check_errors++; + goto fail; + } + + ret = bdrv_pwrite(bs->file, l2_offset, l2_table, s->cluster_size); + if (ret < 0) { + fprintf(stderr, "ERROR: Could not write L2 table: %s\n", + strerror(-ret)); + res->check_errors++; + goto fail; + } + } } ret = 0; @@ -1427,7 +1473,7 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res, } /* check OFLAG_COPIED */ - ret = check_oflag_copied(bs, res); + ret = check_oflag_copied(bs, res, fix); if (ret < 0) { goto fail; } diff --git a/block/qcow2.h b/block/qcow2.h index d4448c6350..1000239e4c 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -432,6 +432,7 @@ int qcow2_pre_write_overlap_check(BlockDriverState *bs, int chk, int64_t offset, /* qcow2-cluster.c functions */ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size, bool exact_size); +int qcow2_write_l1_entry(BlockDriverState *bs, int l1_index); void qcow2_l2_cache_reset(BlockDriverState *bs); int qcow2_decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset); void qcow2_encrypt_sectors(BDRVQcowState *s, int64_t sector_num, -- cgit v1.2.1 From afa50193cde574528a130a25544fd6f3aa8da069 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Mon, 2 Sep 2013 09:25:10 +0200 Subject: qcow2-refcount: Repair shared refcount blocks If the refcount of a refcount block is greater than one, we can at least try to repair that problem by duplicating the affected block. Signed-off-by: Max Reitz Signed-off-by: Kevin Wolf --- block/blkdebug.c | 1 + block/qcow2-refcount.c | 148 ++++++++++++++++++++++++++++++++++++++++++++++++- include/block/block.h | 1 + 3 files changed, 148 insertions(+), 2 deletions(-) diff --git a/block/blkdebug.c b/block/blkdebug.c index ccb627ad93..5d33e03608 100644 --- a/block/blkdebug.c +++ b/block/blkdebug.c @@ -168,6 +168,7 @@ static const char *event_names[BLKDBG_EVENT_MAX] = { [BLKDBG_REFTABLE_LOAD] = "reftable_load", [BLKDBG_REFTABLE_GROW] = "reftable_grow", + [BLKDBG_REFTABLE_UPDATE] = "reftable_update", [BLKDBG_REFBLOCK_LOAD] = "refblock_load", [BLKDBG_REFBLOCK_UPDATE] = "refblock_update", diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c index 2276b6f7f5..ba129de478 100644 --- a/block/qcow2-refcount.c +++ b/block/qcow2-refcount.c @@ -1337,6 +1337,121 @@ fail: return ret; } +/* + * Writes one sector of the refcount table to the disk + */ +#define RT_ENTRIES_PER_SECTOR (512 / sizeof(uint64_t)) +static int write_reftable_entry(BlockDriverState *bs, int rt_index) +{ + BDRVQcowState *s = bs->opaque; + uint64_t buf[RT_ENTRIES_PER_SECTOR]; + int rt_start_index; + int i, ret; + + rt_start_index = rt_index & ~(RT_ENTRIES_PER_SECTOR - 1); + for (i = 0; i < RT_ENTRIES_PER_SECTOR; i++) { + buf[i] = cpu_to_be64(s->refcount_table[rt_start_index + i]); + } + + ret = qcow2_pre_write_overlap_check(bs, + QCOW2_OL_DEFAULT & ~QCOW2_OL_REFCOUNT_TABLE, + s->refcount_table_offset + rt_start_index * sizeof(uint64_t), + sizeof(buf)); + if (ret < 0) { + return ret; + } + + BLKDBG_EVENT(bs->file, BLKDBG_REFTABLE_UPDATE); + ret = bdrv_pwrite_sync(bs->file, s->refcount_table_offset + + rt_start_index * sizeof(uint64_t), buf, sizeof(buf)); + if (ret < 0) { + return ret; + } + + return 0; +} + +/* + * Allocates a new cluster for the given refcount block (represented by its + * offset in the image file) and copies the current content there. This function + * does _not_ decrement the reference count for the currently occupied cluster. + * + * This function prints an informative message to stderr on error (and returns + * -errno); on success, 0 is returned. + */ +static int64_t realloc_refcount_block(BlockDriverState *bs, int reftable_index, + uint64_t offset) +{ + BDRVQcowState *s = bs->opaque; + int64_t new_offset = 0; + void *refcount_block = NULL; + int ret; + + /* allocate new refcount block */ + new_offset = qcow2_alloc_clusters(bs, s->cluster_size); + if (new_offset < 0) { + fprintf(stderr, "Could not allocate new cluster: %s\n", + strerror(-new_offset)); + ret = new_offset; + goto fail; + } + + /* fetch current refcount block content */ + ret = qcow2_cache_get(bs, s->refcount_block_cache, offset, &refcount_block); + if (ret < 0) { + fprintf(stderr, "Could not fetch refcount block: %s\n", strerror(-ret)); + goto fail; + } + + /* new block has not yet been entered into refcount table, therefore it is + * no refcount block yet (regarding this check) */ + ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_DEFAULT, new_offset, + s->cluster_size); + if (ret < 0) { + fprintf(stderr, "Could not write refcount block; metadata overlap " + "check failed: %s\n", strerror(-ret)); + /* the image will be marked corrupt, so don't even attempt on freeing + * the cluster */ + new_offset = 0; + goto fail; + } + + /* write to new block */ + ret = bdrv_write(bs->file, new_offset / BDRV_SECTOR_SIZE, refcount_block, + s->cluster_sectors); + if (ret < 0) { + fprintf(stderr, "Could not write refcount block: %s\n", strerror(-ret)); + goto fail; + } + + /* update refcount table */ + assert(!(new_offset & (s->cluster_size - 1))); + s->refcount_table[reftable_index] = new_offset; + ret = write_reftable_entry(bs, reftable_index); + if (ret < 0) { + fprintf(stderr, "Could not update refcount table: %s\n", + strerror(-ret)); + goto fail; + } + +fail: + if (new_offset && (ret < 0)) { + qcow2_free_clusters(bs, new_offset, s->cluster_size, + QCOW2_DISCARD_ALWAYS); + } + if (refcount_block) { + if (ret < 0) { + qcow2_cache_put(bs, s->refcount_block_cache, &refcount_block); + } else { + ret = qcow2_cache_put(bs, s->refcount_block_cache, &refcount_block); + } + } + if (ret < 0) { + return ret; + } + return new_offset; +} + /* * Checks an image for refcount consistency. * @@ -1413,10 +1528,39 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res, inc_refcounts(bs, res, refcount_table, nb_clusters, offset, s->cluster_size); if (refcount_table[cluster] != 1) { - fprintf(stderr, "ERROR refcount block %" PRId64 + fprintf(stderr, "%s refcount block %" PRId64 " refcount=%d\n", + fix & BDRV_FIX_ERRORS ? "Repairing" : + "ERROR", i, refcount_table[cluster]); - res->corruptions++; + + if (fix & BDRV_FIX_ERRORS) { + int64_t new_offset; + + new_offset = realloc_refcount_block(bs, i, offset); + if (new_offset < 0) { + res->corruptions++; + continue; + } + + /* update refcounts */ + if ((new_offset >> s->cluster_bits) >= nb_clusters) { + /* increase refcount_table size if necessary */ + int old_nb_clusters = nb_clusters; + nb_clusters = (new_offset >> s->cluster_bits) + 1; + refcount_table = g_realloc(refcount_table, + nb_clusters * sizeof(uint16_t)); + memset(&refcount_table[old_nb_clusters], 0, (nb_clusters + - old_nb_clusters) * sizeof(uint16_t)); + } + refcount_table[cluster]--; + inc_refcounts(bs, res, refcount_table, nb_clusters, + new_offset, s->cluster_size); + + res->corruptions_fixed++; + } else { + res->corruptions++; + } } } } diff --git a/include/block/block.h b/include/block/block.h index 742fce5f7f..e6b391ce88 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -413,6 +413,7 @@ typedef enum { BLKDBG_REFTABLE_LOAD, BLKDBG_REFTABLE_GROW, + BLKDBG_REFTABLE_UPDATE, BLKDBG_REFBLOCK_LOAD, BLKDBG_REFBLOCK_UPDATE, -- cgit v1.2.1 From 24530f3e060c71b6c57c7a70336f08a13a8b0a3d Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Fri, 30 Aug 2013 14:34:30 +0200 Subject: qcow2_check: Mark image consistent If no corruptions remain after an image repair (and no errors have been encountered), clear the corrupt flag in qcow2_check. Signed-off-by: Max Reitz Signed-off-by: Kevin Wolf --- block/qcow2.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/block/qcow2.c b/block/qcow2.c index 05e002d856..4bc679a155 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -312,7 +312,11 @@ static int qcow2_check(BlockDriverState *bs, BdrvCheckResult *result, } if (fix && result->check_errors == 0 && result->corruptions == 0) { - return qcow2_mark_clean(bs); + ret = qcow2_mark_clean(bs); + if (ret < 0) { + return ret; + } + return qcow2_mark_consistent(bs); } return ret; } -- cgit v1.2.1 From ca0eca91b65c34d6e5f5c77d5c18ed3de5b26139 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Fri, 30 Aug 2013 14:34:31 +0200 Subject: qemu-iotests: Overlapping cluster allocations A new test on corrupted images with overlapping cluster allocations. Signed-off-by: Max Reitz Signed-off-by: Kevin Wolf --- tests/qemu-iotests/060 | 111 +++++++++++++++++++++++++++++++++++++++++++++ tests/qemu-iotests/060.out | 44 ++++++++++++++++++ tests/qemu-iotests/group | 1 + 3 files changed, 156 insertions(+) create mode 100755 tests/qemu-iotests/060 create mode 100644 tests/qemu-iotests/060.out diff --git a/tests/qemu-iotests/060 b/tests/qemu-iotests/060 new file mode 100755 index 0000000000..65bb09f023 --- /dev/null +++ b/tests/qemu-iotests/060 @@ -0,0 +1,111 @@ +#!/bin/bash +# +# Test case for image corruption (overlapping data structures) in qcow2 +# +# Copyright (C) 2013 Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + +# creator +owner=mreitz@redhat.com + +seq=`basename $0` +echo "QA output created by $seq" + +here=`pwd` +tmp=/tmp/$$ +status=1 # failure is the default! + +_cleanup() +{ + _cleanup_test_img +} +trap "_cleanup; exit \$status" 0 1 2 3 15 + +# get standard environment, filters and checks +. ./common.rc +. ./common.filter + +# This tests qocw2-specific low-level functionality +_supported_fmt qcow2 +_supported_proto generic +_supported_os Linux + +rt_offset=65536 # 0x10000 (XXX: just an assumption) +rb_offset=131072 # 0x20000 (XXX: just an assumption) +l1_offset=196608 # 0x30000 (XXX: just an assumption) +l2_offset=262144 # 0x40000 (XXX: just an assumption) + +IMGOPTS="compat=1.1" + +echo +echo "=== Testing L2 reference into L1 ===" +echo +_make_test_img 64M +# Link first L1 entry (first L2 table) onto itself +# (Note the MSb in the L1 entry is set, ensuring the refcount is one - else any +# later write will result in a COW operation, effectively ruining this attempt +# on image corruption) +poke_file "$TEST_IMG" "$l1_offset" "\x80\x00\x00\x00\x00\x03\x00\x00" +_check_test_img + +# The corrupt bit should not be set anyway +./qcow2.py "$TEST_IMG" dump-header | grep incompatible_features + +# Try to write something, thereby forcing the corrupt bit to be set +$QEMU_IO -c "write -P 0x2a 0 512" "$TEST_IMG" | _filter_qemu_io + +# The corrupt bit must now be set +./qcow2.py "$TEST_IMG" dump-header | grep incompatible_features + +# Try to open the image R/W (which should fail) +$QEMU_IO -c "read 0 512" "$TEST_IMG" 2>&1 | _filter_qemu_io | sed -e "s/can't open device .*$/can't open device/" + +# Try to open it RO (which should succeed) +$QEMU_IO -c "read 0 512" -r "$TEST_IMG" | _filter_qemu_io + +# We could now try to fix the image, but this would probably fail (how should an +# L2 table linked onto the L1 table be fixed?) + +echo +echo "=== Testing cluster data reference into refcount block ===" +echo +_make_test_img 64M +# Allocate L2 table +truncate -s "$(($l2_offset+65536))" "$TEST_IMG" +poke_file "$TEST_IMG" "$l1_offset" "\x80\x00\x00\x00\x00\x04\x00\x00" +# Mark cluster as used +poke_file "$TEST_IMG" "$(($rb_offset+8))" "\x00\x01" +# Redirect new data cluster onto refcount block +poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x02\x00\x00" +_check_test_img +./qcow2.py "$TEST_IMG" dump-header | grep incompatible_features +$QEMU_IO -c "write -P 0x2a 0 512" "$TEST_IMG" | _filter_qemu_io +./qcow2.py "$TEST_IMG" dump-header | grep incompatible_features + +# Try to fix it +_check_test_img -r all + +# The corrupt bit should be cleared +./qcow2.py "$TEST_IMG" dump-header | grep incompatible_features + +# Look if it's really really fixed +$QEMU_IO -c "write -P 0x2a 0 512" "$TEST_IMG" | _filter_qemu_io +./qcow2.py "$TEST_IMG" dump-header | grep incompatible_features + +# success, all done +echo "*** done" +rm -f $seq.full +status=0 diff --git a/tests/qemu-iotests/060.out b/tests/qemu-iotests/060.out new file mode 100644 index 0000000000..ca4583a4a4 --- /dev/null +++ b/tests/qemu-iotests/060.out @@ -0,0 +1,44 @@ +QA output created by 060 + +=== Testing L2 reference into L1 === + +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 +ERROR cluster 3 refcount=1 reference=3 + +1 errors were found on the image. +Data may be corrupted, or further writes to the image may corrupt it. +incompatible_features 0x0 +qcow2: Preventing invalid write on metadata (overlaps with active L1 table); image marked as corrupt. +write failed: Input/output error +incompatible_features 0x2 +qcow2: Image is corrupt; cannot be opened read/write. +qemu-io: can't open device +no file open, try 'help open' +read 512/512 bytes at offset 0 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +=== Testing cluster data reference into refcount block === + +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 +ERROR refcount block 0 refcount=2 +ERROR cluster 2 refcount=1 reference=2 + +2 errors were found on the image. +Data may be corrupted, or further writes to the image may corrupt it. +incompatible_features 0x0 +qcow2: Preventing invalid write on metadata (overlaps with refcount block); image marked as corrupt. +write failed: Input/output error +incompatible_features 0x2 +Repairing refcount block 0 refcount=2 +The following inconsistencies were found and repaired: + + 0 leaked clusters + 1 corruptions + +Double checking the fixed image now... +No errors were found on the image. +incompatible_features 0x0 +wrote 512/512 bytes at offset 0 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +incompatible_features 0x0 +*** done diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group index fb137922d0..b6962421fa 100644 --- a/tests/qemu-iotests/group +++ b/tests/qemu-iotests/group @@ -64,4 +64,5 @@ 055 rw auto 056 rw auto backing 059 rw auto +060 rw auto 062 rw auto -- cgit v1.2.1