From 7c9e527659c67d4d7b41d9504f93d2d7ee482488 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 21 Aug 2017 18:58:56 +0200 Subject: scsi, file-posix: add support for persistent reservation management It is a common requirement for virtual machine to send persistent reservations, but this currently requires either running QEMU with CAP_SYS_RAWIO, or using out-of-tree patches that let an unprivileged QEMU bypass Linux's filter on SG_IO commands. As an alternative mechanism, the next patches will introduce a privileged helper to run persistent reservation commands without expanding QEMU's attack surface unnecessarily. The helper is invoked through a "pr-manager" QOM object, to which file-posix.c passes SG_IO requests for PERSISTENT RESERVE OUT and PERSISTENT RESERVE IN commands. For example: $ qemu-system-x86_64 -device virtio-scsi \ -object pr-manager-helper,id=helper0,path=/var/run/qemu-pr-helper.sock -drive if=none,id=hd,driver=raw,file.filename=/dev/sdb,file.pr-manager=helper0 -device scsi-block,drive=hd or: $ qemu-system-x86_64 -device virtio-scsi \ -object pr-manager-helper,id=helper0,path=/var/run/qemu-pr-helper.sock -blockdev node-name=hd,driver=raw,file.driver=host_device,file.filename=/dev/sdb,file.pr-manager=helper0 -device scsi-block,drive=hd Multiple pr-manager implementations are conceivable and possible, though only one is implemented right now. For example, a pr-manager could: - talk directly to the multipath daemon from a privileged QEMU (i.e. QEMU links to libmpathpersist); this makes reservation work properly with multipath, but still requires CAP_SYS_RAWIO - use the Linux IOC_PR_* ioctls (they require CAP_SYS_ADMIN though) - more interestingly, implement reservations directly in QEMU through file system locks or a shared database (e.g. sqlite) Signed-off-by: Paolo Bonzini --- scsi/Makefile.objs | 2 + scsi/pr-manager.c | 109 +++++++++++++++++++++++++++++++++++++++++++++++++++++ scsi/trace-events | 3 ++ 3 files changed, 114 insertions(+) create mode 100644 scsi/pr-manager.c create mode 100644 scsi/trace-events (limited to 'scsi') diff --git a/scsi/Makefile.objs b/scsi/Makefile.objs index 31b82a5a36..5496d2ae6a 100644 --- a/scsi/Makefile.objs +++ b/scsi/Makefile.objs @@ -1 +1,3 @@ block-obj-y += utils.o + +block-obj-$(CONFIG_LINUX) += pr-manager.o diff --git a/scsi/pr-manager.c b/scsi/pr-manager.c new file mode 100644 index 0000000000..87c45db5d4 --- /dev/null +++ b/scsi/pr-manager.c @@ -0,0 +1,109 @@ +/* + * Persistent reservation manager abstract class + * + * Copyright (c) 2017 Red Hat, Inc. + * + * Author: Paolo Bonzini + * + * This code is licensed under the LGPL. + * + */ + +#include "qemu/osdep.h" +#include + +#include "qapi/error.h" +#include "block/aio.h" +#include "block/thread-pool.h" +#include "scsi/pr-manager.h" +#include "trace.h" + +typedef struct PRManagerData { + PRManager *pr_mgr; + struct sg_io_hdr *hdr; + int fd; +} PRManagerData; + +static int pr_manager_worker(void *opaque) +{ + PRManagerData *data = opaque; + PRManager *pr_mgr = data->pr_mgr; + PRManagerClass *pr_mgr_class = + PR_MANAGER_GET_CLASS(pr_mgr); + struct sg_io_hdr *hdr = data->hdr; + int fd = data->fd; + int r; + + g_free(data); + trace_pr_manager_run(fd, hdr->cmdp[0], hdr->cmdp[1]); + + /* The reference was taken in pr_manager_execute. */ + r = pr_mgr_class->run(pr_mgr, fd, hdr); + object_unref(OBJECT(pr_mgr)); + return r; +} + + +BlockAIOCB *pr_manager_execute(PRManager *pr_mgr, + AioContext *ctx, int fd, + struct sg_io_hdr *hdr, + BlockCompletionFunc *complete, + void *opaque) +{ + PRManagerData *data = g_new(PRManagerData, 1); + ThreadPool *pool = aio_get_thread_pool(ctx); + + trace_pr_manager_execute(fd, hdr->cmdp[0], hdr->cmdp[1], opaque); + data->pr_mgr = pr_mgr; + data->fd = fd; + data->hdr = hdr; + + /* The matching object_unref is in pr_manager_worker. */ + object_ref(OBJECT(pr_mgr)); + return thread_pool_submit_aio(pool, pr_manager_worker, + data, complete, opaque); +} + +static const TypeInfo pr_manager_info = { + .parent = TYPE_OBJECT, + .name = TYPE_PR_MANAGER, + .class_size = sizeof(PRManagerClass), + .abstract = true, + .interfaces = (InterfaceInfo[]) { + { TYPE_USER_CREATABLE }, + { } + } +}; + +PRManager *pr_manager_lookup(const char *id, Error **errp) +{ + Object *obj; + PRManager *pr_mgr; + + obj = object_resolve_path_component(object_get_objects_root(), id); + if (!obj) { + error_setg(errp, "No persistent reservation manager with id '%s'", id); + return NULL; + } + + pr_mgr = (PRManager *) + object_dynamic_cast(obj, + TYPE_PR_MANAGER); + if (!pr_mgr) { + error_setg(errp, + "Object with id '%s' is not a persistent reservation manager", + id); + return NULL; + } + + return pr_mgr; +} + +static void +pr_manager_register_types(void) +{ + type_register_static(&pr_manager_info); +} + + +type_init(pr_manager_register_types); diff --git a/scsi/trace-events b/scsi/trace-events new file mode 100644 index 0000000000..45f5b6e49b --- /dev/null +++ b/scsi/trace-events @@ -0,0 +1,3 @@ +# scsi/pr-manager.c +pr_manager_execute(int fd, int cmd, int sa, void *opaque) "fd=%d cmd=0x%02x service action=0x%02x opaque=%p" +pr_manager_run(int fd, int cmd, int sa) "fd=%d cmd=0x%02x service action=0x%02x" -- cgit v1.2.1