summaryrefslogtreecommitdiff
path: root/block
diff options
context:
space:
mode:
authorAnthony Liguori <anthony@codemonkey.ws>2013-10-31 17:02:26 +0100
committerAnthony Liguori <anthony@codemonkey.ws>2013-10-31 17:02:26 +0100
commita126050a103c924b03388a9a64ce9af8c96b0969 (patch)
treec93b9c5cdb8b2a0256845e596e198f1c65c4fcde /block
parentef5cfe5bbd8bb05a51afaf7ab313769eb9ef44b6 (diff)
parentf4c129a38a5430b7342a7a23f53a22831154612f (diff)
downloadqemu-a126050a103c924b03388a9a64ce9af8c96b0969.tar.gz
Merge remote-tracking branch 'kwolf/tags/for-anthony' into staging
Block patches for 1.7.0-rc0 (v2) # gpg: Signature made Thu 31 Oct 2013 04:44:39 PM CET using RSA key ID C88F2FD6 # gpg: Can't check signature: public key not found * kwolf/tags/for-anthony: (30 commits) vmdk: Implment bdrv_get_specific_info qapi: Add optional field 'compressed' to ImageInfo qemu-iotests: prefill some data to test image sheepdog: check simultaneous create in resend_aioreq sheepdog: cancel aio requests if possible sheepdog: make add_aio_request and send_aioreq void functions sheepdog: try to reconnect to sheepdog after network error coroutine: add co_aio_sleep_ns() to allow sleep in block drivers sheepdog: reload inode outside of resend_aioreq sheepdog: handle vdi objects in resend_aio_req sheepdog: check return values of qemu_co_recv/send correctly qemu-iotests: Test case for backing file deletion qemu-iotests: drop duplicated "create_image" qemu-iotests: Fix 051 reference output block: Avoid unecessary drv->bdrv_getlength() calls block: Disable BDRV_O_COPY_ON_READ for the backing file ahci: fix win7 hang on boot sheepdog: pass copy_policy in the request sheepdog: explicitly set copies as type uint8_t block: Don't copy backing file name on error ... Message-id: 1383064269-27720-1-git-send-email-kwolf@redhat.com Signed-off-by: Anthony Liguori <anthony@codemonkey.ws>
Diffstat (limited to 'block')
-rw-r--r--block/qcow2.c19
-rw-r--r--block/raw-posix.c9
-rw-r--r--block/raw-win32.c4
-rw-r--r--block/raw_bsd.c1
-rw-r--r--block/sheepdog.c352
-rw-r--r--block/vmdk.c68
-rw-r--r--block/vpc.c7
7 files changed, 325 insertions, 135 deletions
diff --git a/block/qcow2.c b/block/qcow2.c
index c1abaffa19..6e5d98dc48 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -1584,6 +1584,16 @@ static int qcow2_create2(const char *filename, int64_t total_size,
}
}
+ bdrv_close(bs);
+
+ /* Reopen the image without BDRV_O_NO_FLUSH to flush it before returning */
+ ret = bdrv_open(bs, filename, NULL,
+ BDRV_O_RDWR | BDRV_O_CACHE_WB, drv, &local_err);
+ if (error_is_set(&local_err)) {
+ error_propagate(errp, local_err);
+ goto out;
+ }
+
ret = 0;
out:
bdrv_unref(bs);
@@ -1939,13 +1949,22 @@ static int qcow2_save_vmstate(BlockDriverState *bs, QEMUIOVector *qiov,
int64_t pos)
{
BDRVQcowState *s = bs->opaque;
+ int64_t total_sectors = bs->total_sectors;
int growable = bs->growable;
+ bool zero_beyond_eof = bs->zero_beyond_eof;
int ret;
BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_SAVE);
bs->growable = 1;
+ bs->zero_beyond_eof = false;
ret = bdrv_pwritev(bs, qcow2_vm_state_offset(s) + pos, qiov);
bs->growable = growable;
+ bs->zero_beyond_eof = zero_beyond_eof;
+
+ /* bdrv_co_do_writev will have increased the total_sectors value to include
+ * the VM state - the VM state is however not an actual part of the block
+ * device, therefore, we need to restore the old value. */
+ bs->total_sectors = total_sectors;
return ret;
}
diff --git a/block/raw-posix.c b/block/raw-posix.c
index 6f03fbf793..f6d48bbdb2 100644
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -1715,7 +1715,8 @@ static BlockDriver bdrv_host_floppy = {
.bdrv_aio_flush = raw_aio_flush,
.bdrv_truncate = raw_truncate,
- .bdrv_getlength = raw_getlength,
+ .bdrv_getlength = raw_getlength,
+ .has_variable_length = true,
.bdrv_get_allocated_file_size
= raw_get_allocated_file_size,
@@ -1824,7 +1825,8 @@ static BlockDriver bdrv_host_cdrom = {
.bdrv_aio_flush = raw_aio_flush,
.bdrv_truncate = raw_truncate,
- .bdrv_getlength = raw_getlength,
+ .bdrv_getlength = raw_getlength,
+ .has_variable_length = true,
.bdrv_get_allocated_file_size
= raw_get_allocated_file_size,
@@ -1951,7 +1953,8 @@ static BlockDriver bdrv_host_cdrom = {
.bdrv_aio_flush = raw_aio_flush,
.bdrv_truncate = raw_truncate,
- .bdrv_getlength = raw_getlength,
+ .bdrv_getlength = raw_getlength,
+ .has_variable_length = true,
.bdrv_get_allocated_file_size
= raw_get_allocated_file_size,
diff --git a/block/raw-win32.c b/block/raw-win32.c
index 676b5701db..2741e4de10 100644
--- a/block/raw-win32.c
+++ b/block/raw-win32.c
@@ -616,7 +616,9 @@ static BlockDriver bdrv_host_device = {
.bdrv_aio_writev = raw_aio_writev,
.bdrv_aio_flush = raw_aio_flush,
- .bdrv_getlength = raw_getlength,
+ .bdrv_getlength = raw_getlength,
+ .has_variable_length = true,
+
.bdrv_get_allocated_file_size
= raw_get_allocated_file_size,
};
diff --git a/block/raw_bsd.c b/block/raw_bsd.c
index 0078c1baeb..2265dcc03f 100644
--- a/block/raw_bsd.c
+++ b/block/raw_bsd.c
@@ -178,6 +178,7 @@ static BlockDriver bdrv_raw = {
.bdrv_co_get_block_status = &raw_co_get_block_status,
.bdrv_truncate = &raw_truncate,
.bdrv_getlength = &raw_getlength,
+ .has_variable_length = true,
.bdrv_get_info = &raw_get_info,
.bdrv_is_inserted = &raw_is_inserted,
.bdrv_media_changed = &raw_media_changed,
diff --git a/block/sheepdog.c b/block/sheepdog.c
index 5f81c93ee3..ef387de71f 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -125,8 +125,9 @@ typedef struct SheepdogObjReq {
uint32_t data_length;
uint64_t oid;
uint64_t cow_oid;
- uint32_t copies;
- uint32_t rsvd;
+ uint8_t copies;
+ uint8_t copy_policy;
+ uint8_t reserved[6];
uint64_t offset;
} SheepdogObjReq;
@@ -138,7 +139,9 @@ typedef struct SheepdogObjRsp {
uint32_t id;
uint32_t data_length;
uint32_t result;
- uint32_t copies;
+ uint8_t copies;
+ uint8_t copy_policy;
+ uint8_t reserved[2];
uint32_t pad[6];
} SheepdogObjRsp;
@@ -151,7 +154,9 @@ typedef struct SheepdogVdiReq {
uint32_t data_length;
uint64_t vdi_size;
uint32_t vdi_id;
- uint32_t copies;
+ uint8_t copies;
+ uint8_t copy_policy;
+ uint8_t reserved[2];
uint32_t snapid;
uint32_t pad[3];
} SheepdogVdiReq;
@@ -222,6 +227,11 @@ static inline uint64_t data_oid_to_idx(uint64_t oid)
return oid & (MAX_DATA_OBJS - 1);
}
+static inline uint32_t oid_to_vid(uint64_t oid)
+{
+ return (oid & ~VDI_BIT) >> VDI_SPACE_SHIFT;
+}
+
static inline uint64_t vid_to_vdi_oid(uint32_t vid)
{
return VDI_BIT | ((uint64_t)vid << VDI_SPACE_SHIFT);
@@ -289,11 +299,14 @@ struct SheepdogAIOCB {
Coroutine *coroutine;
void (*aio_done_func)(SheepdogAIOCB *);
- bool canceled;
+ bool cancelable;
+ bool *finished;
int nr_pending;
};
typedef struct BDRVSheepdogState {
+ BlockDriverState *bs;
+
SheepdogInode inode;
uint32_t min_dirty_data_idx;
@@ -313,8 +326,11 @@ typedef struct BDRVSheepdogState {
Coroutine *co_recv;
uint32_t aioreq_seq_num;
+
+ /* Every aio request must be linked to either of these queues. */
QLIST_HEAD(inflight_aio_head, AIOReq) inflight_aio_head;
QLIST_HEAD(pending_aio_head, AIOReq) pending_aio_head;
+ QLIST_HEAD(failed_aio_head, AIOReq) failed_aio_head;
} BDRVSheepdogState;
static const char * sd_strerror(int err)
@@ -403,6 +419,7 @@ static inline void free_aio_req(BDRVSheepdogState *s, AIOReq *aio_req)
{
SheepdogAIOCB *acb = aio_req->aiocb;
+ acb->cancelable = false;
QLIST_REMOVE(aio_req, aio_siblings);
g_free(aio_req);
@@ -411,23 +428,68 @@ static inline void free_aio_req(BDRVSheepdogState *s, AIOReq *aio_req)
static void coroutine_fn sd_finish_aiocb(SheepdogAIOCB *acb)
{
- if (!acb->canceled) {
- qemu_coroutine_enter(acb->coroutine, NULL);
+ qemu_coroutine_enter(acb->coroutine, NULL);
+ if (acb->finished) {
+ *acb->finished = true;
}
qemu_aio_release(acb);
}
+/*
+ * Check whether the specified acb can be canceled
+ *
+ * We can cancel aio when any request belonging to the acb is:
+ * - Not processed by the sheepdog server.
+ * - Not linked to the inflight queue.
+ */
+static bool sd_acb_cancelable(const SheepdogAIOCB *acb)
+{
+ BDRVSheepdogState *s = acb->common.bs->opaque;
+ AIOReq *aioreq;
+
+ if (!acb->cancelable) {
+ return false;
+ }
+
+ QLIST_FOREACH(aioreq, &s->inflight_aio_head, aio_siblings) {
+ if (aioreq->aiocb == acb) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
static void sd_aio_cancel(BlockDriverAIOCB *blockacb)
{
SheepdogAIOCB *acb = (SheepdogAIOCB *)blockacb;
+ BDRVSheepdogState *s = acb->common.bs->opaque;
+ AIOReq *aioreq, *next;
+ bool finished = false;
+
+ acb->finished = &finished;
+ while (!finished) {
+ if (sd_acb_cancelable(acb)) {
+ /* Remove outstanding requests from pending and failed queues. */
+ QLIST_FOREACH_SAFE(aioreq, &s->pending_aio_head, aio_siblings,
+ next) {
+ if (aioreq->aiocb == acb) {
+ free_aio_req(s, aioreq);
+ }
+ }
+ QLIST_FOREACH_SAFE(aioreq, &s->failed_aio_head, aio_siblings,
+ next) {
+ if (aioreq->aiocb == acb) {
+ free_aio_req(s, aioreq);
+ }
+ }
- /*
- * Sheepdog cannot cancel the requests which are already sent to
- * the servers, so we just complete the request with -EIO here.
- */
- acb->ret = -EIO;
- qemu_coroutine_enter(acb->coroutine, NULL);
- acb->canceled = true;
+ assert(acb->nr_pending == 0);
+ sd_finish_aiocb(acb);
+ return;
+ }
+ qemu_aio_wait();
+ }
}
static const AIOCBInfo sd_aiocb_info = {
@@ -448,7 +510,8 @@ static SheepdogAIOCB *sd_aio_setup(BlockDriverState *bs, QEMUIOVector *qiov,
acb->nb_sectors = nb_sectors;
acb->aio_done_func = NULL;
- acb->canceled = false;
+ acb->cancelable = true;
+ acb->finished = NULL;
acb->coroutine = qemu_coroutine_self();
acb->ret = 0;
acb->nr_pending = 0;
@@ -489,13 +552,13 @@ static coroutine_fn int send_co_req(int sockfd, SheepdogReq *hdr, void *data,
int ret;
ret = qemu_co_send(sockfd, hdr, sizeof(*hdr));
- if (ret < sizeof(*hdr)) {
+ if (ret != sizeof(*hdr)) {
error_report("failed to send a req, %s", strerror(errno));
return ret;
}
ret = qemu_co_send(sockfd, data, *wlen);
- if (ret < *wlen) {
+ if (ret != *wlen) {
error_report("failed to send a req, %s", strerror(errno));
}
@@ -541,7 +604,7 @@ static coroutine_fn void do_co_req(void *opaque)
qemu_aio_set_fd_handler(sockfd, restart_co_req, NULL, co);
ret = qemu_co_recv(sockfd, hdr, sizeof(*hdr));
- if (ret < sizeof(*hdr)) {
+ if (ret != sizeof(*hdr)) {
error_report("failed to get a rsp, %s", strerror(errno));
ret = -errno;
goto out;
@@ -553,7 +616,7 @@ static coroutine_fn void do_co_req(void *opaque)
if (*rlen) {
ret = qemu_co_recv(sockfd, data, *rlen);
- if (ret < *rlen) {
+ if (ret != *rlen) {
error_report("failed to get the data, %s", strerror(errno));
ret = -errno;
goto out;
@@ -596,11 +659,13 @@ static int do_req(int sockfd, SheepdogReq *hdr, void *data,
return srco.ret;
}
-static int coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
+static void coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
struct iovec *iov, int niov, bool create,
enum AIOCBState aiocb_type);
-static int coroutine_fn resend_aioreq(BDRVSheepdogState *s, AIOReq *aio_req);
-
+static void coroutine_fn resend_aioreq(BDRVSheepdogState *s, AIOReq *aio_req);
+static int reload_inode(BDRVSheepdogState *s, uint32_t snapid, const char *tag);
+static int get_sheep_fd(BDRVSheepdogState *s);
+static void co_write_request(void *opaque);
static AIOReq *find_pending_req(BDRVSheepdogState *s, uint64_t oid)
{
@@ -623,22 +688,59 @@ static void coroutine_fn send_pending_req(BDRVSheepdogState *s, uint64_t oid)
{
AIOReq *aio_req;
SheepdogAIOCB *acb;
- int ret;
while ((aio_req = find_pending_req(s, oid)) != NULL) {
acb = aio_req->aiocb;
/* move aio_req from pending list to inflight one */
QLIST_REMOVE(aio_req, aio_siblings);
QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings);
- ret = add_aio_request(s, aio_req, acb->qiov->iov,
- acb->qiov->niov, false, acb->aiocb_type);
- if (ret < 0) {
- error_report("add_aio_request is failed");
- free_aio_req(s, aio_req);
- if (!acb->nr_pending) {
- sd_finish_aiocb(acb);
- }
+ add_aio_request(s, aio_req, acb->qiov->iov, acb->qiov->niov, false,
+ acb->aiocb_type);
+ }
+}
+
+static coroutine_fn void reconnect_to_sdog(void *opaque)
+{
+ BDRVSheepdogState *s = opaque;
+ AIOReq *aio_req, *next;
+
+ qemu_aio_set_fd_handler(s->fd, NULL, NULL, NULL);
+ close(s->fd);
+ s->fd = -1;
+
+ /* Wait for outstanding write requests to be completed. */
+ while (s->co_send != NULL) {
+ co_write_request(opaque);
+ }
+
+ /* Try to reconnect the sheepdog server every one second. */
+ while (s->fd < 0) {
+ s->fd = get_sheep_fd(s);
+ if (s->fd < 0) {
+ DPRINTF("Wait for connection to be established\n");
+ co_aio_sleep_ns(bdrv_get_aio_context(s->bs), QEMU_CLOCK_REALTIME,
+ 1000000000ULL);
}
+ };
+
+ /*
+ * Now we have to resend all the request in the inflight queue. However,
+ * resend_aioreq() can yield and newly created requests can be added to the
+ * inflight queue before the coroutine is resumed. To avoid mixing them, we
+ * have to move all the inflight requests to the failed queue before
+ * resend_aioreq() is called.
+ */
+ QLIST_FOREACH_SAFE(aio_req, &s->inflight_aio_head, aio_siblings, next) {
+ QLIST_REMOVE(aio_req, aio_siblings);
+ QLIST_INSERT_HEAD(&s->failed_aio_head, aio_req, aio_siblings);
+ }
+
+ /* Resend all the failed aio requests. */
+ while (!QLIST_EMPTY(&s->failed_aio_head)) {
+ aio_req = QLIST_FIRST(&s->failed_aio_head);
+ QLIST_REMOVE(aio_req, aio_siblings);
+ QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings);
+ resend_aioreq(s, aio_req);
}
}
@@ -658,15 +760,11 @@ static void coroutine_fn aio_read_response(void *opaque)
SheepdogAIOCB *acb;
uint64_t idx;
- if (QLIST_EMPTY(&s->inflight_aio_head)) {
- goto out;
- }
-
/* read a header */
ret = qemu_co_recv(fd, &rsp, sizeof(rsp));
- if (ret < 0) {
+ if (ret != sizeof(rsp)) {
error_report("failed to get the header, %s", strerror(errno));
- goto out;
+ goto err;
}
/* find the right aio_req from the inflight aio list */
@@ -677,7 +775,7 @@ static void coroutine_fn aio_read_response(void *opaque)
}
if (!aio_req) {
error_report("cannot find aio_req %x", rsp.id);
- goto out;
+ goto err;
}
acb = aio_req->aiocb;
@@ -715,9 +813,9 @@ static void coroutine_fn aio_read_response(void *opaque)
case AIOCB_READ_UDATA:
ret = qemu_co_recvv(fd, acb->qiov->iov, acb->qiov->niov,
aio_req->iov_offset, rsp.data_length);
- if (ret < 0) {
+ if (ret != rsp.data_length) {
error_report("failed to get the data, %s", strerror(errno));
- goto out;
+ goto err;
}
break;
case AIOCB_FLUSH_CACHE:
@@ -748,11 +846,20 @@ static void coroutine_fn aio_read_response(void *opaque)
case SD_RES_SUCCESS:
break;
case SD_RES_READONLY:
- ret = resend_aioreq(s, aio_req);
- if (ret == SD_RES_SUCCESS) {
- goto out;
+ if (s->inode.vdi_id == oid_to_vid(aio_req->oid)) {
+ ret = reload_inode(s, 0, "");
+ if (ret < 0) {
+ goto err;
+ }
}
- /* fall through */
+ if (is_data_obj(aio_req->oid)) {
+ aio_req->oid = vid_to_data_oid(s->inode.vdi_id,
+ data_oid_to_idx(aio_req->oid));
+ } else {
+ aio_req->oid = vid_to_vdi_oid(s->inode.vdi_id);
+ }
+ resend_aioreq(s, aio_req);
+ goto out;
default:
acb->ret = -EIO;
error_report("%s", sd_strerror(rsp.result));
@@ -769,6 +876,10 @@ static void coroutine_fn aio_read_response(void *opaque)
}
out:
s->co_recv = NULL;
+ return;
+err:
+ s->co_recv = NULL;
+ reconnect_to_sdog(opaque);
}
static void co_read_response(void *opaque)
@@ -997,7 +1108,7 @@ out:
return ret;
}
-static int coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
+static void coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
struct iovec *iov, int niov, bool create,
enum AIOCBState aiocb_type)
{
@@ -1059,29 +1170,25 @@ static int coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
/* send a header */
ret = qemu_co_send(s->fd, &hdr, sizeof(hdr));
- if (ret < 0) {
- qemu_co_mutex_unlock(&s->lock);
+ if (ret != sizeof(hdr)) {
error_report("failed to send a req, %s", strerror(errno));
- return -errno;
+ goto out;
}
if (wlen) {
ret = qemu_co_sendv(s->fd, iov, niov, aio_req->iov_offset, wlen);
- if (ret < 0) {
- qemu_co_mutex_unlock(&s->lock);
+ if (ret != wlen) {
error_report("failed to send a data, %s", strerror(errno));
- return -errno;
}
}
-
+out:
socket_set_cork(s->fd, 0);
qemu_aio_set_fd_handler(s->fd, co_read_response, NULL, s);
+ s->co_send = NULL;
qemu_co_mutex_unlock(&s->lock);
-
- return 0;
}
-static int read_write_object(int fd, char *buf, uint64_t oid, int copies,
+static int read_write_object(int fd, char *buf, uint64_t oid, uint8_t copies,
unsigned int datalen, uint64_t offset,
bool write, bool create, uint32_t cache_flags)
{
@@ -1129,7 +1236,7 @@ static int read_write_object(int fd, char *buf, uint64_t oid, int copies,
}
}
-static int read_object(int fd, char *buf, uint64_t oid, int copies,
+static int read_object(int fd, char *buf, uint64_t oid, uint8_t copies,
unsigned int datalen, uint64_t offset,
uint32_t cache_flags)
{
@@ -1137,7 +1244,7 @@ static int read_object(int fd, char *buf, uint64_t oid, int copies,
false, cache_flags);
}
-static int write_object(int fd, char *buf, uint64_t oid, int copies,
+static int write_object(int fd, char *buf, uint64_t oid, uint8_t copies,
unsigned int datalen, uint64_t offset, bool create,
uint32_t cache_flags)
{
@@ -1181,51 +1288,62 @@ out:
return ret;
}
-static int coroutine_fn resend_aioreq(BDRVSheepdogState *s, AIOReq *aio_req)
+/* Return true if the specified request is linked to the pending list. */
+static bool check_simultaneous_create(BDRVSheepdogState *s, AIOReq *aio_req)
{
- SheepdogAIOCB *acb = aio_req->aiocb;
- bool create = false;
- int ret;
-
- ret = reload_inode(s, 0, "");
- if (ret < 0) {
- return ret;
+ AIOReq *areq;
+ QLIST_FOREACH(areq, &s->inflight_aio_head, aio_siblings) {
+ if (areq != aio_req && areq->oid == aio_req->oid) {
+ /*
+ * Sheepdog cannot handle simultaneous create requests to the same
+ * object, so we cannot send the request until the previous request
+ * finishes.
+ */
+ DPRINTF("simultaneous create to %" PRIx64 "\n", aio_req->oid);
+ aio_req->flags = 0;
+ aio_req->base_oid = 0;
+ QLIST_REMOVE(aio_req, aio_siblings);
+ QLIST_INSERT_HEAD(&s->pending_aio_head, aio_req, aio_siblings);
+ return true;
+ }
}
- aio_req->oid = vid_to_data_oid(s->inode.vdi_id,
- data_oid_to_idx(aio_req->oid));
+ return false;
+}
+
+static void coroutine_fn resend_aioreq(BDRVSheepdogState *s, AIOReq *aio_req)
+{
+ SheepdogAIOCB *acb = aio_req->aiocb;
+ bool create = false;
/* check whether this request becomes a CoW one */
- if (acb->aiocb_type == AIOCB_WRITE_UDATA) {
+ if (acb->aiocb_type == AIOCB_WRITE_UDATA && is_data_obj(aio_req->oid)) {
int idx = data_oid_to_idx(aio_req->oid);
- AIOReq *areq;
- if (s->inode.data_vdi_id[idx] == 0) {
- create = true;
- goto out;
- }
if (is_data_obj_writable(&s->inode, idx)) {
goto out;
}
- /* link to the pending list if there is another CoW request to
- * the same object */
- QLIST_FOREACH(areq, &s->inflight_aio_head, aio_siblings) {
- if (areq != aio_req && areq->oid == aio_req->oid) {
- DPRINTF("simultaneous CoW to %" PRIx64 "\n", aio_req->oid);
- QLIST_REMOVE(aio_req, aio_siblings);
- QLIST_INSERT_HEAD(&s->pending_aio_head, aio_req, aio_siblings);
- return SD_RES_SUCCESS;
- }
+ if (check_simultaneous_create(s, aio_req)) {
+ return;
}
- aio_req->base_oid = vid_to_data_oid(s->inode.data_vdi_id[idx], idx);
- aio_req->flags |= SD_FLAG_CMD_COW;
+ if (s->inode.data_vdi_id[idx]) {
+ aio_req->base_oid = vid_to_data_oid(s->inode.data_vdi_id[idx], idx);
+ aio_req->flags |= SD_FLAG_CMD_COW;
+ }
create = true;
}
out:
- return add_aio_request(s, aio_req, acb->qiov->iov, acb->qiov->niov,
- create, acb->aiocb_type);
+ if (is_data_obj(aio_req->oid)) {
+ add_aio_request(s, aio_req, acb->qiov->iov, acb->qiov->niov, create,
+ acb->aiocb_type);
+ } else {
+ struct iovec iov;
+ iov.iov_base = &s->inode;
+ iov.iov_len = sizeof(s->inode);
+ add_aio_request(s, aio_req, &iov, 1, false, AIOCB_WRITE_UDATA);
+ }
}
/* TODO Convert to fine grained options */
@@ -1255,6 +1373,8 @@ static int sd_open(BlockDriverState *bs, QDict *options, int flags,
Error *local_err = NULL;
const char *filename;
+ s->bs = bs;
+
opts = qemu_opts_create_nofail(&runtime_opts);
qemu_opts_absorb_qdict(opts, options, &local_err);
if (error_is_set(&local_err)) {
@@ -1268,6 +1388,7 @@ static int sd_open(BlockDriverState *bs, QDict *options, int flags,
QLIST_INIT(&s->inflight_aio_head);
QLIST_INIT(&s->pending_aio_head);
+ QLIST_INIT(&s->failed_aio_head);
s->fd = -1;
memset(vdi, 0, sizeof(vdi));
@@ -1344,7 +1465,8 @@ out:
}
static int do_sd_create(BDRVSheepdogState *s, char *filename, int64_t vdi_size,
- uint32_t base_vid, uint32_t *vdi_id, int snapshot)
+ uint32_t base_vid, uint32_t *vdi_id, int snapshot,
+ uint8_t copy_policy)
{
SheepdogVdiReq hdr;
SheepdogVdiRsp *rsp = (SheepdogVdiRsp *)&hdr;
@@ -1374,6 +1496,7 @@ static int do_sd_create(BDRVSheepdogState *s, char *filename, int64_t vdi_size,
hdr.data_length = wlen;
hdr.vdi_size = vdi_size;
+ hdr.copy_policy = copy_policy;
ret = do_req(fd, (SheepdogReq *)&hdr, buf, &wlen, &rlen);
@@ -1526,7 +1649,8 @@ static int sd_create(const char *filename, QEMUOptionParameter *options,
bdrv_unref(bs);
}
- ret = do_sd_create(s, vdi, vdi_size, base_vid, &vid, 0);
+ /* TODO: allow users to specify copy number */
+ ret = do_sd_create(s, vdi, vdi_size, base_vid, &vid, 0, 0);
if (!prealloc || ret) {
goto out;
}
@@ -1621,7 +1745,6 @@ static int sd_truncate(BlockDriverState *bs, int64_t offset)
*/
static void coroutine_fn sd_write_done(SheepdogAIOCB *acb)
{
- int ret;
BDRVSheepdogState *s = acb->common.bs->opaque;
struct iovec iov;
AIOReq *aio_req;
@@ -1643,18 +1766,13 @@ static void coroutine_fn sd_write_done(SheepdogAIOCB *acb)
aio_req = alloc_aio_req(s, acb, vid_to_vdi_oid(s->inode.vdi_id),
data_len, offset, 0, 0, offset);
QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings);
- ret = add_aio_request(s, aio_req, &iov, 1, false, AIOCB_WRITE_UDATA);
- if (ret) {
- free_aio_req(s, aio_req);
- acb->ret = -EIO;
- goto out;
- }
+ add_aio_request(s, aio_req, &iov, 1, false, AIOCB_WRITE_UDATA);
acb->aio_done_func = sd_finish_aiocb;
acb->aiocb_type = AIOCB_WRITE_UDATA;
return;
}
-out:
+
sd_finish_aiocb(acb);
}
@@ -1716,7 +1834,7 @@ static int sd_create_branch(BDRVSheepdogState *s)
*/
deleted = sd_delete(s);
ret = do_sd_create(s, s->name, s->inode.vdi_size, s->inode.vdi_id, &vid,
- !deleted);
+ !deleted, s->inode.copy_policy);
if (ret) {
goto out;
}
@@ -1840,35 +1958,16 @@ static int coroutine_fn sd_co_rw_vector(void *p)
}
aio_req = alloc_aio_req(s, acb, oid, len, offset, flags, old_oid, done);
+ QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings);
if (create) {
- AIOReq *areq;
- QLIST_FOREACH(areq, &s->inflight_aio_head, aio_siblings) {
- if (areq->oid == oid) {
- /*
- * Sheepdog cannot handle simultaneous create
- * requests to the same object. So we cannot send
- * the request until the previous request
- * finishes.
- */
- aio_req->flags = 0;
- aio_req->base_oid = 0;
- QLIST_INSERT_HEAD(&s->pending_aio_head, aio_req,
- aio_siblings);
- goto done;
- }
+ if (check_simultaneous_create(s, aio_req)) {
+ goto done;
}
}
- QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings);
- ret = add_aio_request(s, aio_req, acb->qiov->iov, acb->qiov->niov,
- create, acb->aiocb_type);
- if (ret < 0) {
- error_report("add_aio_request is failed");
- free_aio_req(s, aio_req);
- acb->ret = -EIO;
- goto out;
- }
+ add_aio_request(s, aio_req, acb->qiov->iov, acb->qiov->niov, create,
+ acb->aiocb_type);
done:
offset = 0;
idx++;
@@ -1936,7 +2035,6 @@ static int coroutine_fn sd_co_flush_to_disk(BlockDriverState *bs)
BDRVSheepdogState *s = bs->opaque;
SheepdogAIOCB *acb;
AIOReq *aio_req;
- int ret;
if (s->cache_flags != SD_FLAG_CMD_CACHE) {
return 0;
@@ -1949,13 +2047,7 @@ static int coroutine_fn sd_co_flush_to_disk(BlockDriverState *bs)
aio_req = alloc_aio_req(s, acb, vid_to_vdi_oid(s->inode.vdi_id),
0, 0, 0, 0, 0);
QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings);
- ret = add_aio_request(s, aio_req, NULL, 0, false, acb->aiocb_type);
- if (ret < 0) {
- error_report("add_aio_request is failed");
- free_aio_req(s, aio_req);
- qemu_aio_release(acb);
- return ret;
- }
+ add_aio_request(s, aio_req, NULL, 0, false, acb->aiocb_type);
qemu_coroutine_yield();
return acb->ret;
@@ -2006,7 +2098,7 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
}
ret = do_sd_create(s, s->name, s->inode.vdi_size, s->inode.vdi_id, &new_vid,
- 1);
+ 1, s->inode.copy_policy);
if (ret < 0) {
error_report("failed to create inode for snapshot. %s",
strerror(errno));
diff --git a/block/vmdk.c b/block/vmdk.c
index 32ec8b7766..a7ebd0f125 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -106,6 +106,7 @@ typedef struct VmdkExtent {
uint32_t l2_cache_counts[L2_CACHE_SIZE];
int64_t cluster_sectors;
+ char *type;
} VmdkExtent;
typedef struct BDRVVmdkState {
@@ -113,11 +114,13 @@ typedef struct BDRVVmdkState {
uint64_t desc_offset;
bool cid_updated;
bool cid_checked;
+ uint32_t cid;
uint32_t parent_cid;
int num_extents;
/* Extent array with num_extents entries, ascend ordered by address */
VmdkExtent *extents;
Error *migration_blocker;
+ char *create_type;
} BDRVVmdkState;
typedef struct VmdkMetaData {
@@ -214,6 +217,7 @@ static void vmdk_free_extents(BlockDriverState *bs)
g_free(e->l1_table);
g_free(e->l2_cache);
g_free(e->l1_backup_table);
+ g_free(e->type);
if (e->file != bs->file) {
bdrv_unref(e->file);
}
@@ -534,6 +538,7 @@ static int vmdk_open_vmdk4(BlockDriverState *bs,
uint32_t l1_size, l1_entry_sectors;
VMDK4Header header;
VmdkExtent *extent;
+ BDRVVmdkState *s = bs->opaque;
int64_t l1_backup_offset = 0;
ret = bdrv_pread(file, sizeof(magic), &header, sizeof(header));
@@ -549,6 +554,10 @@ static int vmdk_open_vmdk4(BlockDriverState *bs,
}
}
+ if (!s->create_type) {
+ s->create_type = g_strdup("monolithicSparse");
+ }
+
if (le64_to_cpu(header.gd_offset) == VMDK4_GD_AT_END) {
/*
* The footer takes precedence over the header, so read it in. The
@@ -709,6 +718,8 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs,
int64_t flat_offset;
char extent_path[PATH_MAX];
BlockDriverState *extent_file;
+ BDRVVmdkState *s = bs->opaque;
+ VmdkExtent *extent;
while (*p) {
/* parse extent line:
@@ -751,7 +762,6 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs,
/* save to extents array */
if (!strcmp(type, "FLAT") || !strcmp(type, "VMFS")) {
/* FLAT extent */
- VmdkExtent *extent;
ret = vmdk_add_extent(bs, extent_file, true, sectors,
0, 0, 0, 0, 0, &extent, errp);
@@ -766,10 +776,12 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs,
bdrv_unref(extent_file);
return ret;
}
+ extent = &s->extents[s->num_extents - 1];
} else {
error_setg(errp, "Unsupported extent type '%s'", type);
return -ENOTSUP;
}
+ extent->type = g_strdup(type);
next_line:
/* move to next line */
while (*p) {
@@ -817,6 +829,7 @@ static int vmdk_open_desc_file(BlockDriverState *bs, int flags,
ret = -ENOTSUP;
goto exit;
}
+ s->create_type = g_strdup(ct);
s->desc_offset = 0;
ret = vmdk_parse_extents(buf, bs, bs->file->filename, errp);
exit:
@@ -843,6 +856,7 @@ static int vmdk_open(BlockDriverState *bs, QDict *options, int flags,
if (ret) {
goto fail;
}
+ s->cid = vmdk_read_cid(bs, 0);
s->parent_cid = vmdk_read_cid(bs, 1);
qemu_co_mutex_init(&s->lock);
@@ -855,6 +869,8 @@ static int vmdk_open(BlockDriverState *bs, QDict *options, int flags,
return 0;
fail:
+ g_free(s->create_type);
+ s->create_type = NULL;
vmdk_free_extents(bs);
return ret;
}
@@ -1766,6 +1782,7 @@ static void vmdk_close(BlockDriverState *bs)
BDRVVmdkState *s = bs->opaque;
vmdk_free_extents(bs);
+ g_free(s->create_type);
migrate_del_blocker(s->migration_blocker);
error_free(s->migration_blocker);
@@ -1827,6 +1844,54 @@ static int vmdk_has_zero_init(BlockDriverState *bs)
return 1;
}
+static ImageInfo *vmdk_get_extent_info(VmdkExtent *extent)
+{
+ ImageInfo *info = g_new0(ImageInfo, 1);
+
+ *info = (ImageInfo){
+ .filename = g_strdup(extent->file->filename),
+ .format = g_strdup(extent->type),
+ .virtual_size = extent->sectors * BDRV_SECTOR_SIZE,
+ .compressed = extent->compressed,
+ .has_compressed = extent->compressed,
+ .cluster_size = extent->cluster_sectors * BDRV_SECTOR_SIZE,
+ .has_cluster_size = !extent->flat,
+ };
+
+ return info;
+}
+
+static ImageInfoSpecific *vmdk_get_specific_info(BlockDriverState *bs)
+{
+ int i;
+ BDRVVmdkState *s = bs->opaque;
+ ImageInfoSpecific *spec_info = g_new0(ImageInfoSpecific, 1);
+ ImageInfoList **next;
+
+ *spec_info = (ImageInfoSpecific){
+ .kind = IMAGE_INFO_SPECIFIC_KIND_VMDK,
+ {
+ .vmdk = g_new0(ImageInfoSpecificVmdk, 1),
+ },
+ };
+
+ *spec_info->vmdk = (ImageInfoSpecificVmdk) {
+ .create_type = g_strdup(s->create_type),
+ .cid = s->cid,
+ .parent_cid = s->parent_cid,
+ };
+
+ next = &spec_info->vmdk->extents;
+ for (i = 0; i < s->num_extents; i++) {
+ *next = g_new0(ImageInfoList, 1);
+ (*next)->value = vmdk_get_extent_info(&s->extents[i]);
+ (*next)->next = NULL;
+ next = &(*next)->next;
+ }
+
+ return spec_info;
+}
+
static QEMUOptionParameter vmdk_create_options[] = {
{
.name = BLOCK_OPT_SIZE,
@@ -1879,6 +1944,7 @@ static BlockDriver bdrv_vmdk = {
.bdrv_co_get_block_status = vmdk_co_get_block_status,
.bdrv_get_allocated_file_size = vmdk_get_allocated_file_size,
.bdrv_has_zero_init = vmdk_has_zero_init,
+ .bdrv_get_specific_info = vmdk_get_specific_info,
.create_options = vmdk_create_options,
};
diff --git a/block/vpc.c b/block/vpc.c
index b5dca3961e..627d11cb9b 100644
--- a/block/vpc.c
+++ b/block/vpc.c
@@ -260,6 +260,13 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
}
}
+ if (s->free_data_block_offset > bdrv_getlength(bs->file)) {
+ error_setg(errp, "block-vpc: free_data_block_offset points after "
+ "the end of file. The image has been truncated.");
+ ret = -EINVAL;
+ goto fail;
+ }
+
s->last_bitmap_offset = (int64_t) -1;
#ifdef CACHE