summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2016-03-23 12:57:44 +0000
committerPeter Maydell <peter.maydell@linaro.org>2016-03-23 12:57:44 +0000
commit2538039f2c26d66053426fb547e4f25e669baf62 (patch)
tree28decfaf0c61e206bc286cfc7b014eee058be222
parentffa6564c9b13cea4b704e184d29d721f2cb061bb (diff)
parenta335c6f204eefba8ff935bcee8f31f51d2174119 (diff)
downloadqemu-2538039f2c26d66053426fb547e4f25e669baf62.tar.gz
Merge remote-tracking branch 'remotes/armbru/tags/pull-ivshmem-2016-03-18' into staging
ivshmem: Fixes, cleanups, device model split # gpg: Signature made Mon 21 Mar 2016 20:33:54 GMT using RSA key ID EB918653 # gpg: Good signature from "Markus Armbruster <armbru@redhat.com>" # gpg: aka "Markus Armbruster <armbru@pond.sub.org>" * remotes/armbru/tags/pull-ivshmem-2016-03-18: (40 commits) contrib/ivshmem-server: Print "not for production" warning ivshmem: Require master to have ID zero ivshmem: Drop ivshmem property x-memdev ivshmem: Clean up after the previous commit ivshmem: Split ivshmem-plain, ivshmem-doorbell off ivshmem ivshmem: Replace int role_val by OnOffAuto master qdev: New DEFINE_PROP_ON_OFF_AUTO ivshmem: Inline check_shm_size() into its only caller ivshmem: Simplify memory regions for BAR 2 (shared memory) ivshmem: Implement shm=... with a memory backend ivshmem: Tighten check of property "size" ivshmem: Simplify how we cope with short reads from server ivshmem: Drop the hackish test for UNIX domain chardev ivshmem: Rely on server sending the ID right after the version ivshmem: Propagate errors through ivshmem_recv_setup() ivshmem: Receive shared memory synchronously in realize() ivshmem: Plug leaks on unplug, fix peer disconnect ivshmem: Disentangle ivshmem_read() ivshmem: Simplify rejection of invalid peer ID from server ivshmem: Assert interrupts are set up once ... Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r--contrib/ivshmem-server/ivshmem-server.c56
-rw-r--r--contrib/ivshmem-server/ivshmem-server.h4
-rw-r--r--contrib/ivshmem-server/main.c100
-rw-r--r--default-configs/pci.mak2
-rw-r--r--docs/specs/ivshmem-spec.txt254
-rw-r--r--docs/specs/ivshmem_device_spec.txt161
-rw-r--r--hw/core/qdev-properties.c10
-rw-r--r--hw/misc/ivshmem.c1084
-rw-r--r--include/hw/qdev-properties.h3
-rw-r--r--qemu-doc.texi49
-rw-r--r--target-ppc/kvm.c6
-rw-r--r--tests/Makefile2
-rw-r--r--tests/ivshmem-test.c95
-rw-r--r--tests/libqos/pci-pc.c8
-rw-r--r--util/event_notifier-posix.c6
15 files changed, 1021 insertions, 819 deletions
diff --git a/contrib/ivshmem-server/ivshmem-server.c b/contrib/ivshmem-server/ivshmem-server.c
index bfd0fad49a..172db78b37 100644
--- a/contrib/ivshmem-server/ivshmem-server.c
+++ b/contrib/ivshmem-server/ivshmem-server.c
@@ -12,9 +12,6 @@
#include <sys/mman.h>
#include <sys/socket.h>
#include <sys/un.h>
-#ifdef CONFIG_LINUX
-#include <sys/vfs.h>
-#endif
#include "ivshmem-server.h"
@@ -257,7 +254,8 @@ ivshmem_server_ftruncate(int fd, unsigned shmsize)
/* Init a new ivshmem server */
int
ivshmem_server_init(IvshmemServer *server, const char *unix_sock_path,
- const char *shm_path, size_t shm_size, unsigned n_vectors,
+ const char *shm_path, bool use_shm_open,
+ size_t shm_size, unsigned n_vectors,
bool verbose)
{
int ret;
@@ -278,6 +276,7 @@ ivshmem_server_init(IvshmemServer *server, const char *unix_sock_path,
return -1;
}
+ server->use_shm_open = use_shm_open;
server->shm_size = shm_size;
server->n_vectors = n_vectors;
@@ -286,31 +285,6 @@ ivshmem_server_init(IvshmemServer *server, const char *unix_sock_path,
return 0;
}
-#ifdef CONFIG_LINUX
-
-#define HUGETLBFS_MAGIC 0x958458f6
-
-static long gethugepagesize(const char *path)
-{
- struct statfs fs;
- int ret;
-
- do {
- ret = statfs(path, &fs);
- } while (ret != 0 && errno == EINTR);
-
- if (ret != 0) {
- return -1;
- }
-
- if (fs.f_type != HUGETLBFS_MAGIC) {
- return -1;
- }
-
- return fs.f_bsize;
-}
-#endif
-
/* open shm, create and bind to the unix socket */
int
ivshmem_server_start(IvshmemServer *server)
@@ -319,27 +293,17 @@ ivshmem_server_start(IvshmemServer *server)
int shm_fd, sock_fd, ret;
/* open shm file */
-#ifdef CONFIG_LINUX
- long hpagesize;
-
- hpagesize = gethugepagesize(server->shm_path);
- if (hpagesize < 0 && errno != ENOENT) {
- IVSHMEM_SERVER_DEBUG(server, "cannot stat shm file %s: %s\n",
- server->shm_path, strerror(errno));
- }
-
- if (hpagesize > 0) {
+ if (server->use_shm_open) {
+ IVSHMEM_SERVER_DEBUG(server, "Using POSIX shared memory: %s\n",
+ server->shm_path);
+ shm_fd = shm_open(server->shm_path, O_CREAT | O_RDWR, S_IRWXU);
+ } else {
gchar *filename = g_strdup_printf("%s/ivshmem.XXXXXX", server->shm_path);
- IVSHMEM_SERVER_DEBUG(server, "Using hugepages: %s\n", server->shm_path);
+ IVSHMEM_SERVER_DEBUG(server, "Using file-backed shared memory: %s\n",
+ server->shm_path);
shm_fd = mkstemp(filename);
unlink(filename);
g_free(filename);
- } else
-#endif
- {
- IVSHMEM_SERVER_DEBUG(server, "Using POSIX shared memory: %s\n",
- server->shm_path);
- shm_fd = shm_open(server->shm_path, O_CREAT|O_RDWR, S_IRWXU);
}
if (shm_fd < 0) {
diff --git a/contrib/ivshmem-server/ivshmem-server.h b/contrib/ivshmem-server/ivshmem-server.h
index e9de8a369d..3851639618 100644
--- a/contrib/ivshmem-server/ivshmem-server.h
+++ b/contrib/ivshmem-server/ivshmem-server.h
@@ -66,6 +66,7 @@ typedef struct IvshmemServer {
char unix_sock_path[PATH_MAX]; /**< path to unix socket */
int sock_fd; /**< unix sock file descriptor */
char shm_path[PATH_MAX]; /**< path to shm */
+ bool use_shm_open;
size_t shm_size; /**< size of shm */
int shm_fd; /**< shm file descriptor */
unsigned n_vectors; /**< number of vectors */
@@ -89,7 +90,8 @@ typedef struct IvshmemServer {
*/
int
ivshmem_server_init(IvshmemServer *server, const char *unix_sock_path,
- const char *shm_path, size_t shm_size, unsigned n_vectors,
+ const char *shm_path, bool use_shm_open,
+ size_t shm_size, unsigned n_vectors,
bool verbose);
/**
diff --git a/contrib/ivshmem-server/main.c b/contrib/ivshmem-server/main.c
index cca1061f0e..dc64a1832c 100644
--- a/contrib/ivshmem-server/main.c
+++ b/contrib/ivshmem-server/main.c
@@ -29,35 +29,38 @@ typedef struct IvshmemServerArgs {
const char *pid_file;
const char *unix_socket_path;
const char *shm_path;
+ bool use_shm_open;
uint64_t shm_size;
unsigned n_vectors;
} IvshmemServerArgs;
-/* show ivshmem_server_usage and exit with given error code */
static void
-ivshmem_server_usage(const char *name, int code)
+ivshmem_server_usage(const char *progname)
{
- fprintf(stderr, "%s [opts]\n", name);
- fprintf(stderr, " -h: show this help\n");
- fprintf(stderr, " -v: verbose mode\n");
- fprintf(stderr, " -F: foreground mode (default is to daemonize)\n");
- fprintf(stderr, " -p <pid_file>: path to the PID file (used in daemon\n"
- " mode only).\n"
- " Default=%s\n", IVSHMEM_SERVER_DEFAULT_SHM_PATH);
- fprintf(stderr, " -S <unix_socket_path>: path to the unix socket\n"
- " to listen to.\n"
- " Default=%s\n", IVSHMEM_SERVER_DEFAULT_UNIX_SOCK_PATH);
- fprintf(stderr, " -m <shm_path>: path to the shared memory.\n"
- " The path corresponds to a POSIX shm name or a\n"
- " hugetlbfs mount point.\n"
- " default=%s\n", IVSHMEM_SERVER_DEFAULT_SHM_PATH);
- fprintf(stderr, " -l <size>: size of shared memory in bytes. The suffix\n"
- " K, M and G can be used (ex: 1K means 1024).\n"
- " default=%u\n", IVSHMEM_SERVER_DEFAULT_SHM_SIZE);
- fprintf(stderr, " -n <n_vects>: number of vectors.\n"
- " default=%u\n", IVSHMEM_SERVER_DEFAULT_N_VECTORS);
-
- exit(code);
+ printf("Usage: %s [OPTION]...\n"
+ " -h: show this help\n"
+ " -v: verbose mode\n"
+ " -F: foreground mode (default is to daemonize)\n"
+ " -p <pid-file>: path to the PID file (used in daemon mode only)\n"
+ " default " IVSHMEM_SERVER_DEFAULT_PID_FILE "\n"
+ " -S <unix-socket-path>: path to the unix socket to listen to\n"
+ " default " IVSHMEM_SERVER_DEFAULT_UNIX_SOCK_PATH "\n"
+ " -M <shm-name>: POSIX shared memory object to use\n"
+ " default " IVSHMEM_SERVER_DEFAULT_SHM_PATH "\n"
+ " -m <dir-name>: where to create shared memory\n"
+ " -l <size>: size of shared memory in bytes\n"
+ " suffixes K, M and G can be used, e.g. 1K means 1024\n"
+ " default %u\n"
+ " -n <nvectors>: number of vectors\n"
+ " default %u\n",
+ progname, IVSHMEM_SERVER_DEFAULT_SHM_SIZE,
+ IVSHMEM_SERVER_DEFAULT_N_VECTORS);
+}
+
+static void
+ivshmem_server_help(const char *progname)
+{
+ fprintf(stderr, "Try '%s -h' for more information.\n", progname);
}
/* parse the program arguments, exit on error */
@@ -68,20 +71,12 @@ ivshmem_server_parse_args(IvshmemServerArgs *args, int argc, char *argv[])
unsigned long long v;
Error *err = NULL;
- while ((c = getopt(argc, argv,
- "h" /* help */
- "v" /* verbose */
- "F" /* foreground */
- "p:" /* pid_file */
- "S:" /* unix_socket_path */
- "m:" /* shm_path */
- "l:" /* shm_size */
- "n:" /* n_vectors */
- )) != -1) {
+ while ((c = getopt(argc, argv, "hvFp:S:m:M:l:n:")) != -1) {
switch (c) {
case 'h': /* help */
- ivshmem_server_usage(argv[0], 0);
+ ivshmem_server_usage(argv[0]);
+ exit(0);
break;
case 'v': /* verbose */
@@ -92,36 +87,41 @@ ivshmem_server_parse_args(IvshmemServerArgs *args, int argc, char *argv[])
args->foreground = 1;
break;
- case 'p': /* pid_file */
+ case 'p': /* pid file */
args->pid_file = optarg;
break;
- case 'S': /* unix_socket_path */
+ case 'S': /* unix socket path */
args->unix_socket_path = optarg;
break;
- case 'm': /* shm_path */
+ case 'M': /* shm name */
+ case 'm': /* dir name */
args->shm_path = optarg;
+ args->use_shm_open = c == 'M';
break;
- case 'l': /* shm_size */
+ case 'l': /* shm size */
parse_option_size("shm_size", optarg, &args->shm_size, &err);
if (err) {
error_report_err(err);
- ivshmem_server_usage(argv[0], 1);
+ ivshmem_server_help(argv[0]);
+ exit(1);
}
break;
- case 'n': /* n_vectors */
+ case 'n': /* number of vectors */
if (parse_uint_full(optarg, &v, 0) < 0) {
fprintf(stderr, "cannot parse n_vectors\n");
- ivshmem_server_usage(argv[0], 1);
+ ivshmem_server_help(argv[0]);
+ exit(1);
}
args->n_vectors = v;
break;
default:
- ivshmem_server_usage(argv[0], 1);
+ ivshmem_server_usage(argv[0]);
+ exit(1);
break;
}
}
@@ -129,12 +129,14 @@ ivshmem_server_parse_args(IvshmemServerArgs *args, int argc, char *argv[])
if (args->n_vectors > IVSHMEM_SERVER_MAX_VECTORS) {
fprintf(stderr, "too many requested vectors (max is %d)\n",
IVSHMEM_SERVER_MAX_VECTORS);
- ivshmem_server_usage(argv[0], 1);
+ ivshmem_server_help(argv[0]);
+ exit(1);
}
if (args->verbose == 1 && args->foreground == 0) {
fprintf(stderr, "cannot use verbose in daemon mode\n");
- ivshmem_server_usage(argv[0], 1);
+ ivshmem_server_help(argv[0]);
+ exit(1);
}
}
@@ -192,11 +194,18 @@ main(int argc, char *argv[])
.pid_file = IVSHMEM_SERVER_DEFAULT_PID_FILE,
.unix_socket_path = IVSHMEM_SERVER_DEFAULT_UNIX_SOCK_PATH,
.shm_path = IVSHMEM_SERVER_DEFAULT_SHM_PATH,
+ .use_shm_open = true,
.shm_size = IVSHMEM_SERVER_DEFAULT_SHM_SIZE,
.n_vectors = IVSHMEM_SERVER_DEFAULT_N_VECTORS,
};
int ret = 1;
+ /*
+ * Do not remove this notice without adding proper error handling!
+ * Start with handling ivshmem_server_send_one_msg() failure.
+ */
+ printf("*** Example code, do not use in production ***\n");
+
/* parse arguments, will exit on error */
ivshmem_server_parse_args(&args, argc, argv);
@@ -219,7 +228,8 @@ main(int argc, char *argv[])
}
/* init the ivshms structure */
- if (ivshmem_server_init(&server, args.unix_socket_path, args.shm_path,
+ if (ivshmem_server_init(&server, args.unix_socket_path,
+ args.shm_path, args.use_shm_open,
args.shm_size, args.n_vectors, args.verbose) < 0) {
fprintf(stderr, "cannot init server\n");
goto err;
diff --git a/default-configs/pci.mak b/default-configs/pci.mak
index 4fa9a28ef6..9c8bc68c4c 100644
--- a/default-configs/pci.mak
+++ b/default-configs/pci.mak
@@ -36,5 +36,5 @@ CONFIG_SDHCI=y
CONFIG_EDU=y
CONFIG_VGA=y
CONFIG_VGA_PCI=y
-CONFIG_IVSHMEM=$(CONFIG_POSIX)
+CONFIG_IVSHMEM=$(CONFIG_EVENTFD)
CONFIG_ROCKER=y
diff --git a/docs/specs/ivshmem-spec.txt b/docs/specs/ivshmem-spec.txt
new file mode 100644
index 0000000000..a1f5499796
--- /dev/null
+++ b/docs/specs/ivshmem-spec.txt
@@ -0,0 +1,254 @@
+= Device Specification for Inter-VM shared memory device =
+
+The Inter-VM shared memory device (ivshmem) is designed to share a
+memory region between multiple QEMU processes running different guests
+and the host. In order for all guests to be able to pick up the
+shared memory area, it is modeled by QEMU as a PCI device exposing
+said memory to the guest as a PCI BAR.
+
+The device can use a shared memory object on the host directly, or it
+can obtain one from an ivshmem server.
+
+In the latter case, the device can additionally interrupt its peers, and
+get interrupted by its peers.
+
+
+== Configuring the ivshmem PCI device ==
+
+There are two basic configurations:
+
+- Just shared memory: -device ivshmem-plain,memdev=HMB,...
+
+ This uses host memory backend HMB. It should have option "share"
+ set.
+
+- Shared memory plus interrupts: -device ivshmem,chardev=CHR,vectors=N,...
+
+ An ivshmem server must already be running on the host. The device
+ connects to the server's UNIX domain socket via character device
+ CHR.
+
+ Each peer gets assigned a unique ID by the server. IDs must be
+ between 0 and 65535.
+
+ Interrupts are message-signaled (MSI-X). vectors=N configures the
+ number of vectors to use.
+
+For more details on ivshmem device properties, see The QEMU Emulator
+User Documentation (qemu-doc.*).
+
+
+== The ivshmem PCI device's guest interface ==
+
+The device has vendor ID 1af4, device ID 1110, revision 1. Before
+QEMU 2.6.0, it had revision 0.
+
+=== PCI BARs ===
+
+The ivshmem PCI device has two or three BARs:
+
+- BAR0 holds device registers (256 Byte MMIO)
+- BAR1 holds MSI-X table and PBA (only ivshmem-doorbell)
+- BAR2 maps the shared memory object
+
+There are two ways to use this device:
+
+- If you only need the shared memory part, BAR2 suffices. This way,
+ you have access to the shared memory in the guest and can use it as
+ you see fit. Memnic, for example, uses ivshmem this way from guest
+ user space (see http://dpdk.org/browse/memnic).
+
+- If you additionally need the capability for peers to interrupt each
+ other, you need BAR0 and BAR1. You will most likely want to write a
+ kernel driver to handle interrupts. Requires the device to be
+ configured for interrupts, obviously.
+
+Before QEMU 2.6.0, BAR2 can initially be invalid if the device is
+configured for interrupts. It becomes safely accessible only after
+the ivshmem server provided the shared memory. These devices have PCI
+revision 0 rather than 1. Guest software should wait for the
+IVPosition register (described below) to become non-negative before
+accessing BAR2.
+
+Revision 0 of the device is not capable to tell guest software whether
+it is configured for interrupts.
+
+=== PCI device registers ===
+
+BAR 0 contains the following registers:
+
+ Offset Size Access On reset Function
+ 0 4 read/write 0 Interrupt Mask
+ bit 0: peer interrupt (rev 0)
+ reserved (rev 1)
+ bit 1..31: reserved
+ 4 4 read/write 0 Interrupt Status
+ bit 0: peer interrupt (rev 0)
+ reserved (rev 1)
+ bit 1..31: reserved
+ 8 4 read-only 0 or ID IVPosition
+ 12 4 write-only N/A Doorbell
+ bit 0..15: vector
+ bit 16..31: peer ID
+ 16 240 none N/A reserved
+
+Software should only access the registers as specified in column
+"Access". Reserved bits should be ignored on read, and preserved on
+write.
+
+In revision 0 of the device, Interrupt Status and Mask Register
+together control the legacy INTx interrupt when the device has no
+MSI-X capability: INTx is asserted when the bit-wise AND of Status and
+Mask is non-zero and the device has no MSI-X capability. Interrupt
+Status Register bit 0 becomes 1 when an interrupt request from a peer
+is received. Reading the register clears it.
+
+IVPosition Register: if the device is not configured for interrupts,
+this is zero. Else, it is the device's ID (between 0 and 65535).
+
+Before QEMU 2.6.0, the register may read -1 for a short while after
+reset. These devices have PCI revision 0 rather than 1.
+
+There is no good way for software to find out whether the device is
+configured for interrupts. A positive IVPosition means interrupts,
+but zero could be either.
+
+Doorbell Register: writing this register requests to interrupt a peer.
+The written value's high 16 bits are the ID of the peer to interrupt,
+and its low 16 bits select an interrupt vector.
+
+If the device is not configured for interrupts, the write is ignored.
+
+If the interrupt hasn't completed setup, the write is ignored. The
+device is not capable to tell guest software whether setup is
+complete. Interrupts can regress to this state on migration.
+
+If the peer with the requested ID isn't connected, or it has fewer
+interrupt vectors connected, the write is ignored. The device is not
+capable to tell guest software what peers are connected, or how many
+interrupt vectors are connected.
+
+The peer's interrupt for this vector then becomes pending. There is
+no way for software to clear the pending bit, and a polling mode of
+operation is therefore impossible.
+
+If the peer is a revision 0 device without MSI-X capability, its
+Interrupt Status register is set to 1. This asserts INTx unless
+masked by the Interrupt Mask register. The device is not capable to
+communicate the interrupt vector to guest software then.
+
+With multiple MSI-X vectors, different vectors can be used to indicate
+different events have occurred. The semantics of interrupt vectors
+are left to the application.
+
+
+== Interrupt infrastructure ==
+
+When configured for interrupts, the peers share eventfd objects in
+addition to shared memory. The shared resources are managed by an
+ivshmem server.
+
+=== The ivshmem server ===
+
+The server listens on a UNIX domain socket.
+
+For each new client that connects to the server, the server
+- picks an ID,
+- creates eventfd file descriptors for the interrupt vectors,
+- sends the ID and the file descriptor for the shared memory to the
+ new client,
+- sends connect notifications for the new client to the other clients
+ (these contain file descriptors for sending interrupts),
+- sends connect notifications for the other clients to the new client,
+ and
+- sends interrupt setup messages to the new client (these contain file
+ descriptors for receiving interrupts).
+
+The first client to connect to the server receives ID zero.
+
+When a client disconnects from the server, the server sends disconnect
+notifications to the other clients.
+
+The next section describes the protocol in detail.
+
+If the server terminates without sending disconnect notifications for
+its connected clients, the clients can elect to continue. They can
+communicate with each other normally, but won't receive disconnect
+notification on disconnect, and no new clients can connect. There is
+no way for the clients to connect to a restarted server. The device
+is not capable to tell guest software whether the server is still up.
+
+Example server code is in contrib/ivshmem-server/. Not to be used in
+production. It assumes all clients use the same number of interrupt
+vectors.
+
+A standalone client is in contrib/ivshmem-client/. It can be useful
+for debugging.
+
+=== The ivshmem Client-Server Protocol ===
+
+An ivshmem device configured for interrupts connects to an ivshmem
+server. This section details the protocol between the two.
+
+The connection is one-way: the server sends messages to the client.
+Each message consists of a single 8 byte little-endian signed number,
+and may be accompanied by a file descriptor via SCM_RIGHTS. Both
+client and server close the connection on error.
+
+Note: QEMU currently doesn't close the connection right on error, but
+only when the character device is destroyed.
+
+On connect, the server sends the following messages in order:
+
+1. The protocol version number, currently zero. The client should
+ close the connection on receipt of versions it can't handle.
+
+2. The client's ID. This is unique among all clients of this server.
+ IDs must be between 0 and 65535, because the Doorbell register
+ provides only 16 bits for them.
+
+3. The number -1, accompanied by the file descriptor for the shared
+ memory.
+
+4. Connect notifications for existing other clients, if any. This is
+ a peer ID (number between 0 and 65535 other than the client's ID),
+ repeated N times. Each repetition is accompanied by one file
+ descriptor. These are for interrupting the peer with that ID using
+ vector 0,..,N-1, in order. If the client is configured for fewer
+ vectors, it closes the extra file descriptors. If it is configured
+ for more, the extra vectors remain unconnected.
+
+5. Interrupt setup. This is the client's own ID, repeated N times.
+ Each repetition is accompanied by one file descriptor. These are
+ for receiving interrupts from peers using vector 0,..,N-1, in
+ order. If the client is configured for fewer vectors, it closes
+ the extra file descriptors. If it is configured for more, the
+ extra vectors remain unconnected.
+
+From then on, the server sends these kinds of messages:
+
+6. Connection / disconnection notification. This is a peer ID.
+
+ - If the number comes with a file descriptor, it's a connection
+ notification, exactly like in step 4.
+
+ - Else, it's a disconnection notification for the peer with that ID.
+
+Known bugs:
+
+* The protocol changed incompatibly in QEMU 2.5. Before, messages
+ were native endian long, and there was no version number.
+
+* The protocol is poorly designed.
+
+=== The ivshmem Client-Client Protocol ===
+
+An ivshmem device configured for interrupts receives eventfd file
+descriptors for interrupting peers and getting interrupted by peers
+from the server, as explained in the previous section.
+
+To interrupt a peer, the device writes the 8-byte integer 1 in native
+byte order to the respective file descriptor.
+
+To receive an interrupt, the device reads and discards as many 8-byte
+integers as it can.
diff --git a/docs/specs/ivshmem_device_spec.txt b/docs/specs/ivshmem_device_spec.txt
deleted file mode 100644
index d318d65c32..0000000000
--- a/docs/specs/ivshmem_device_spec.txt
+++ /dev/null
@@ -1,161 +0,0 @@
-
-Device Specification for Inter-VM shared memory device
-------------------------------------------------------
-
-The Inter-VM shared memory device is designed to share a memory region (created
-on the host via the POSIX shared memory API) between multiple QEMU processes
-running different guests. In order for all guests to be able to pick up the
-shared memory area, it is modeled by QEMU as a PCI device exposing said memory
-to the guest as a PCI BAR.
-The memory region does not belong to any guest, but is a POSIX memory object on
-the host. The host can access this shared memory if needed.
-
-The device also provides an optional communication mechanism between guests
-sharing the same memory object. More details about that in the section 'Guest to
-guest communication' section.
-
-
-The Inter-VM PCI device
------------------------
-
-From the VM point of view, the ivshmem PCI device supports three BARs.
-
-- BAR0 is a 1 Kbyte MMIO region to support registers and interrupts when MSI is
- not used.
-- BAR1 is used for MSI-X when it is enabled in the device.
-- BAR2 is used to access the shared memory object.
-
-It is your choice how to use the device but you must choose between two
-behaviors :
-
-- basically, if you only need the shared memory part, you will map BAR2.
- This way, you have access to the shared memory in guest and can use it as you
- see fit (memnic, for example, uses it in userland
- http://dpdk.org/browse/memnic).
-
-- BAR0 and BAR1 are used to implement an optional communication mechanism
- through interrupts in the guests. If you need an event mechanism between the
- guests accessing the shared memory, you will most likely want to write a
- kernel driver that will handle interrupts. See details in the section 'Guest
- to guest communication' section.
-
-The behavior is chosen when starting your QEMU processes:
-- no communication mechanism needed, the first QEMU to start creates the shared
- memory on the host, subsequent QEMU processes will use it.
-
-- communication mechanism needed, an ivshmem server must be started before any
- QEMU processes, then each QEMU process connects to the server unix socket.
-
-For more details on the QEMU ivshmem parameters, see qemu-doc documentation.
-
-
-Guest to guest communication
-----------------------------
-
-This section details the communication mechanism between the guests accessing
-the ivhsmem shared memory.
-
-*ivshmem server*
-
-This server code is available in qemu.git/contrib/ivshmem-server.
-
-The server must be started on the host before any guest.
-It creates a shared memory object then waits for clients to connect on a unix
-socket. All the messages are little-endian int64_t integer.
-
-For each client (QEMU process) that connects to the server:
-- the server sends a protocol version, if client does not support it, the client
- closes the communication,
-- the server assigns an ID for this client and sends this ID to him as the first
- message,
-- the server sends a fd to the shared memory object to this client,
-- the server creates a new set of host eventfds associated to the new client and
- sends this set to all already connected clients,
-- finally, the server sends all the eventfds sets for all clients to the new
- client.
-
-The server signals all clients when one of them disconnects.
-
-The client IDs are limited to 16 bits because of the current implementation (see
-Doorbell register in 'PCI device registers' subsection). Hence only 65536
-clients are supported.
-
-All the file descriptors (fd to the shared memory, eventfds for each client)
-are passed to clients using SCM_RIGHTS over the server unix socket.
-
-Apart from the current ivshmem implementation in QEMU, an ivshmem client has
-been provided in qemu.git/contrib/ivshmem-client for debug.
-
-*QEMU as an ivshmem client*
-
-At initialisation, when creating the ivshmem device, QEMU first receives a
-protocol version and closes communication with server if it does not match.
-Then, QEMU gets its ID from the server then makes it available through BAR0
-IVPosition register for the VM to use (see 'PCI device registers' subsection).
-QEMU then uses the fd to the shared memory to map it to BAR2.
-eventfds for all other clients received from the server are stored to implement
-BAR0 Doorbell register (see 'PCI device registers' subsection).
-Finally, eventfds assigned to this QEMU process are used to send interrupts in
-this VM.
-
-*PCI device registers*
-
-From the VM point of view, the ivshmem PCI device supports 4 registers of
-32-bits each.
-
-enum ivshmem_registers {
- IntrMask = 0,
- IntrStatus = 4,
- IVPosition = 8,
- Doorbell = 12
-};
-
-The first two registers are the interrupt mask and status registers. Mask and
-status are only used with pin-based interrupts. They are unused with MSI
-interrupts.
-
-Status Register: The status register is set to 1 when an interrupt occurs.
-
-Mask Register: The mask register is bitwise ANDed with the interrupt status
-and the result will raise an interrupt if it is non-zero. However, since 1 is
-the only value the status will be set to, it is only the first bit of the mask
-that has any effect. Therefore interrupts can be masked by setting the first
-bit to 0 and unmasked by setting the first bit to 1.
-
-IVPosition Register: The IVPosition register is read-only and reports the
-guest's ID number. The guest IDs are non-negative integers. When using the
-server, since the server is a separate process, the VM ID will only be set when
-the device is ready (shared memory is received from the server and accessible
-via the device). If the device is not ready, the IVPosition will return -1.
-Applications should ensure that they have a valid VM ID before accessing the
-shared memory.
-
-Doorbell Register: To interrupt another guest, a guest must write to the
-Doorbell register. The doorbell register is 32-bits, logically divided into
-two 16-bit fields. The high 16-bits are the guest ID to interrupt and the low
-16-bits are the interrupt vector to trigger. The semantics of the value
-written to the doorbell depends on whether the device is using MSI or a regular
-pin-based interrupt. In short, MSI uses vectors while regular interrupts set
-the status register.
-
-Regular Interrupts
-
-If regular interrupts are used (due to either a guest not supporting MSI or the
-user specifying not to use them on startup) then the value written to the lower
-16-bits of the Doorbell register results is arbitrary and will trigger an
-interrupt in the destination guest.
-
-Message Signalled Interrupts
-
-An ivshmem device may support multiple MSI vectors. If so, the lower 16-bits
-written to the Doorbell register must be between 0 and the maximum number of
-vectors the guest supports. The lower 16 bits written to the doorbell is the
-MSI vector that will be raised in the destination guest. The number of MSI
-vectors is configurable but it is set when the VM is started.
-
-The important thing to remember with MSI is that it is only a signal, no status
-is set (since MSI interrupts are not shared). All information other than the
-interrupt itself should be communicated via the shared memory region. Devices
-supporting multiple MSI vectors can use different vectors to indicate different
-events have occurred. The semantics of interrupt vectors are left to the
-user's discretion.
diff --git a/hw/core/qdev-properties.c b/hw/core/qdev-properties.c
index bc89800246..d2f5a08af6 100644
--- a/hw/core/qdev-properties.c
+++ b/hw/core/qdev-properties.c
@@ -516,6 +516,16 @@ PropertyInfo qdev_prop_macaddr = {
.set = set_mac,
};
+/* --- on/off/auto --- */
+
+PropertyInfo qdev_prop_on_off_auto = {
+ .name = "OnOffAuto",
+ .description = "on/off/auto",
+ .enum_table = OnOffAuto_lookup,
+ .get = get_enum,
+ .set = set_enum,
+};
+
/* --- lost tick policy --- */
QEMU_BUILD_BUG_ON(sizeof(LostTickPolicy) != sizeof(int));
diff --git a/hw/misc/ivshmem.c b/hw/misc/ivshmem.c
index 1838bc8506..132387f04b 100644
--- a/hw/misc/ivshmem.c
+++ b/hw/misc/ivshmem.c
@@ -26,9 +26,10 @@
#include "migration/migration.h"
#include "qemu/error-report.h"
#include "qemu/event_notifier.h"
-#include "qemu/fifo8.h"
+#include "qom/object_interfaces.h"
#include "sysemu/char.h"
#include "sysemu/hostmem.h"
+#include "sysemu/qtest.h"
#include "qapi/visitor.h"
#include "exec/ram_addr.h"
@@ -39,22 +40,31 @@
#define PCI_VENDOR_ID_IVSHMEM PCI_VENDOR_ID_REDHAT_QUMRANET
#define PCI_DEVICE_ID_IVSHMEM 0x1110
-#define IVSHMEM_MAX_PEERS G_MAXUINT16
+#define IVSHMEM_MAX_PEERS UINT16_MAX
#define IVSHMEM_IOEVENTFD 0
#define IVSHMEM_MSI 1
-#define IVSHMEM_PEER 0
-#define IVSHMEM_MASTER 1
-
#define IVSHMEM_REG_BAR_SIZE 0x100
-//#define DEBUG_IVSHMEM
-#ifdef DEBUG_IVSHMEM
-#define IVSHMEM_DPRINTF(fmt, ...) \
- do {printf("IVSHMEM: " fmt, ## __VA_ARGS__); } while (0)
-#else
-#define IVSHMEM_DPRINTF(fmt, ...)
-#endif
+#define IVSHMEM_DEBUG 0
+#define IVSHMEM_DPRINTF(fmt, ...) \
+ do { \
+ if (IVSHMEM_DEBUG) { \
+ printf("IVSHMEM: " fmt, ## __VA_ARGS__); \
+ } \
+ } while (0)
+
+#define TYPE_IVSHMEM_COMMON "ivshmem-common"
+#define IVSHMEM_COMMON(obj) \
+ OBJECT_CHECK(IVShmemState, (obj), TYPE_IVSHMEM_COMMON)
+
+#define TYPE_IVSHMEM_PLAIN "ivshmem-plain"
+#define IVSHMEM_PLAIN(obj) \
+ OBJECT_CHECK(IVShmemState, (obj), TYPE_IVSHMEM_PLAIN)
+
+#define TYPE_IVSHMEM_DOORBELL "ivshmem-doorbell"
+#define IVSHMEM_DOORBELL(obj) \
+ OBJECT_CHECK(IVShmemState, (obj), TYPE_IVSHMEM_DOORBELL)
#define TYPE_IVSHMEM "ivshmem"
#define IVSHMEM(obj) \
@@ -75,38 +85,40 @@ typedef struct IVShmemState {
PCIDevice parent_obj;
/*< public >*/
- HostMemoryBackend *hostmem;
+ uint32_t features;
+
+ /* exactly one of these two may be set */
+ HostMemoryBackend *hostmem; /* with interrupts */
+ CharDriverState *server_chr; /* without interrupts */
+
+ /* registers */
uint32_t intrmask;
uint32_t intrstatus;
+ int vm_id;
- CharDriverState **eventfd_chr;
- CharDriverState *server_chr;
- Fifo8 incoming_fifo;
- MemoryRegion ivshmem_mmio;
-
- /* We might need to register the BAR before we actually have the memory.
- * So prepare a container MemoryRegion for the BAR immediately and
- * add a subregion when we have the memory.
- */
- MemoryRegion bar;
- MemoryRegion ivshmem;
- uint64_t ivshmem_size; /* size of shared memory region */
- uint32_t ivshmem_64bit;
+ /* BARs */
+ MemoryRegion ivshmem_mmio; /* BAR 0 (registers) */
+ MemoryRegion *ivshmem_bar2; /* BAR 2 (shared memory) */
+ MemoryRegion server_bar2; /* used with server_chr */
+ /* interrupt support */
Peer *peers;
- int nb_peers; /* how many peers we have space for */
-
- int vm_id;
+ int nb_peers; /* space in @peers[] */
uint32_t vectors;
- uint32_t features;
MSIVector *msi_vectors;
+ uint64_t msg_buf; /* buffer for receiving server messages */
+ int msg_buffered_bytes; /* #bytes in @msg_buf */
+ /* migration stuff */
+ OnOffAuto master;
Error *migration_blocker;
- char * shmobj;
- char * sizearg;
- char * role;
- int role_val; /* scalar to avoid multiple string comparisons */
+ /* legacy cruft */
+ char *role;
+ char *shmobj;
+ char *sizearg;
+ size_t legacy_size;
+ uint32_t not_legacy_32bit;
} IVShmemState;
/* registers for the Inter-VM shared memory device */
@@ -122,12 +134,34 @@ static inline uint32_t ivshmem_has_feature(IVShmemState *ivs,
return (ivs->features & (1 << feature));
}
-/* accessing registers - based on rtl8139 */
+static inline bool ivshmem_is_master(IVShmemState *s)
+{
+ assert(s->master != ON_OFF_AUTO_AUTO);
+ return s->master == ON_OFF_AUTO_ON;
+}
+
static void ivshmem_update_irq(IVShmemState *s)
{
PCIDevice *d = PCI_DEVICE(s);
- int isr;
- isr = (s->intrstatus & s->intrmask) & 0xffffffff;
+ uint32_t isr = s->intrstatus & s->intrmask;
+
+ /*
+ * Do nothing unless the device actually uses INTx. Here's how
+ * the device variants signal interrupts, what they put in PCI
+ * config space:
+ * Device variant Interrupt Interrupt Pin MSI-X cap.
+ * ivshmem-plain none 0 no
+ * ivshmem-doorbell MSI-X 1 yes(1)
+ * ivshmem,msi=off INTx 1 no
+ * ivshmem,msi=on MSI-X 1(2) yes(1)
+ * (1) if guest enabled MSI-X
+ * (2) the device lies
+ * Leads to the condition for doing nothing:
+ */
+ if (ivshmem_has_feature(s, IVSHMEM_MSI)
+ || !d->config[PCI_INTERRUPT_PIN]) {
+ return;
+ }
/* don't print ISR resets */
if (isr) {
@@ -135,7 +169,7 @@ static void ivshmem_update_irq(IVShmemState *s)
isr ? 1 : 0, s->intrstatus, s->intrmask);
}
- pci_set_irq(d, (isr != 0));
+ pci_set_irq(d, isr != 0);
}
static void ivshmem_IntrMask_write(IVShmemState *s, uint32_t val)
@@ -143,7 +177,6 @@ static void ivshmem_IntrMask_write(IVShmemState *s, uint32_t val)
IVSHMEM_DPRINTF("IntrMask write(w) val = 0x%04x\n", val);
s->intrmask = val;
-
ivshmem_update_irq(s);
}
@@ -152,7 +185,6 @@ static uint32_t ivshmem_IntrMask_read(IVShmemState *s)
uint32_t ret = s->intrmask;
IVSHMEM_DPRINTF("intrmask read(w) val = 0x%04x\n", ret);
-
return ret;
}
@@ -161,7 +193,6 @@ static void ivshmem_IntrStatus_write(IVShmemState *s, uint32_t val)
IVSHMEM_DPRINTF("IntrStatus write(w) val = 0x%04x\n", val);
s->intrstatus = val;
-
ivshmem_update_irq(s);
}
@@ -171,9 +202,7 @@ static uint32_t ivshmem_IntrStatus_read(IVShmemState *s)
/* reading ISR clears all interrupts */
s->intrstatus = 0;
-
ivshmem_update_irq(s);
-
return ret;
}
@@ -237,12 +266,7 @@ static uint64_t ivshmem_io_read(void *opaque, hwaddr addr,
break;
case IVPOSITION:
- /* return my VM ID if the memory is mapped */
- if (memory_region_is_mapped(&s->ivshmem)) {
- ret = s->vm_id;
- } else {
- ret = -1;
- }
+ ret = s->vm_id;
break;
default:
@@ -263,21 +287,11 @@ static const MemoryRegionOps ivshmem_mmio_ops = {
},
};
-static int ivshmem_can_receive(void * opaque)
-{
- return sizeof(int64_t);
-}
-
-static void ivshmem_event(void *opaque, int event)
-{
- IVSHMEM_DPRINTF("ivshmem_event %d\n", event);
-}
-
static void ivshmem_vector_notify(void *opaque)
{
MSIVector *entry = opaque;
PCIDevice *pdev = entry->pdev;
- IVShmemState *s = IVSHMEM(pdev);
+ IVShmemState *s = IVSHMEM_COMMON(pdev);
int vector = entry - s->msi_vectors;
EventNotifier *n = &s->peers[s->vm_id].eventfds[vector];
@@ -287,7 +301,9 @@ static void ivshmem_vector_notify(void *opaque)
IVSHMEM_DPRINTF("interrupt on vector %p %d\n", pdev, vector);
if (ivshmem_has_feature(s, IVSHMEM_MSI)) {
- msix_notify(pdev, vector);
+ if (msix_enabled(pdev)) {
+ msix_notify(pdev, vector);
+ }
} else {
ivshmem_IntrStatus_write(s, 1);
}
@@ -296,7 +312,7 @@ static void ivshmem_vector_notify(void *opaque)
static int ivshmem_vector_unmask(PCIDevice *dev, unsigned vector,
MSIMessage msg)
{
- IVShmemState *s = IVSHMEM(dev);
+ IVShmemState *s = IVSHMEM_COMMON(dev);
EventNotifier *n = &s->peers[s->vm_id].eventfds[vector];
MSIVector *v = &s->msi_vectors[vector];
int ret;
@@ -313,7 +329,7 @@ static int ivshmem_vector_unmask(PCIDevice *dev, unsigned vector,
static void ivshmem_vector_mask(PCIDevice *dev, unsigned vector)
{
- IVShmemState *s = IVSHMEM(dev);
+ IVShmemState *s = IVSHMEM_COMMON(dev);
EventNotifier *n = &s->peers[s->vm_id].eventfds[vector];
int ret;
@@ -330,7 +346,7 @@ static void ivshmem_vector_poll(PCIDevice *dev,
unsigned int vector_start,
unsigned int vector_end)
{
- IVShmemState *s = IVSHMEM(dev);
+ IVShmemState *s = IVSHMEM_COMMON(dev);
unsigned int vector;
IVSHMEM_DPRINTF("vector poll %p %d-%d\n", dev, vector_start, vector_end);
@@ -355,61 +371,13 @@ static void watch_vector_notifier(IVShmemState *s, EventNotifier *n,
{
int eventfd = event_notifier_get_fd(n);
- /* if MSI is supported we need multiple interrupts */
+ assert(!s->msi_vectors[vector].pdev);
s->msi_vectors[vector].pdev = PCI_DEVICE(s);
qemu_set_fd_handler(eventfd, ivshmem_vector_notify,
NULL, &s->msi_vectors[vector]);
}
-static int check_shm_size(IVShmemState *s, int fd, Error **errp)
-{
- /* check that the guest isn't going to try and map more memory than the
- * the object has allocated return -1 to indicate error */
-
- struct stat buf;
-
- if (fstat(fd, &buf) < 0) {
- error_setg(errp, "exiting: fstat on fd %d failed: %s",
- fd, strerror(errno));
- return -1;
- }
-
- if (s->ivshmem_size > buf.st_size) {
- error_setg(errp, "Requested memory size greater"
- " than shared object size (%" PRIu64 " > %" PRIu64")",
- s->ivshmem_size, (uint64_t)buf.st_size);
- return -1;
- } else {
- return 0;
- }
-}
-
-/* create the shared memory BAR when we are not using the server, so we can
- * create the BAR and map the memory immediately */
-static int create_shared_memory_BAR(IVShmemState *s, int fd, uint8_t attr,
- Error **errp)
-{
- void * ptr;
-
- ptr = mmap(0, s->ivshmem_size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
- if (ptr == MAP_FAILED) {
- error_setg_errno(errp, errno, "Failed to mmap shared memory");
- return -1;
- }
-
- memory_region_init_ram_ptr(&s->ivshmem, OBJECT(s), "ivshmem.bar2",
- s->ivshmem_size, ptr);
- qemu_set_ram_fd(memory_region_get_ram_addr(&s->ivshmem), fd);
- vmstate_register_ram(&s->ivshmem, DEVICE(s));
- memory_region_add_subregion(&s->bar, 0, &s->ivshmem);
-
- /* region for shared memory */
- pci_register_bar(PCI_DEVICE(s), 2, attr, &s->bar);
-
- return 0;
-}
-
static void ivshmem_add_eventfd(IVShmemState *s, int posn, int i)
{
memory_region_add_eventfd(&s->ivshmem_mmio,
@@ -434,21 +402,17 @@ static void close_peer_eventfds(IVShmemState *s, int posn)
{
int i, n;
- if (!ivshmem_has_feature(s, IVSHMEM_IOEVENTFD)) {
- return;
- }
- if (posn < 0 || posn >= s->nb_peers) {
- error_report("invalid peer %d", posn);
- return;
- }
-
+ assert(posn >= 0 && posn < s->nb_peers);
n = s->peers[posn].nb_eventfds;
- memory_region_transaction_begin();
- for (i = 0; i < n; i++) {
- ivshmem_del_eventfd(s, posn, i);
+ if (ivshmem_has_feature(s, IVSHMEM_IOEVENTFD)) {
+ memory_region_transaction_begin();
+ for (i = 0; i < n; i++) {
+ ivshmem_del_eventfd(s, posn, i);
+ }
+ memory_region_transaction_commit();
}
- memory_region_transaction_commit();
+
for (i = 0; i < n; i++) {
event_notifier_cleanup(&s->peers[posn].eventfds[i]);
}
@@ -457,286 +421,320 @@ static void close_peer_eventfds(IVShmemState *s, int posn)
s->peers[posn].nb_eventfds = 0;
}
-/* this function increase the dynamic storage need to store data about other
- * peers */
-static int resize_peers(IVShmemState *s, int new_min_size)
-{
-
- int j, old_size;
-
- /* limit number of max peers */
- if (new_min_size <= 0 || new_min_size > IVSHMEM_MAX_PEERS) {
- return -1;
- }
- if (new_min_size <= s->nb_peers) {
- return 0;
- }
-
- old_size = s->nb_peers;
- s->nb_peers = new_min_size;
-
- IVSHMEM_DPRINTF("bumping storage to %d peers\n", s->nb_peers);
-
- s->peers = g_realloc(s->peers, s->nb_peers * sizeof(Peer));
-
- for (j = old_size; j < s->nb_peers; j++) {
- s->peers[j].eventfds = g_new0(EventNotifier, s->vectors);
- s->peers[j].nb_eventfds = 0;
- }
-
- return 0;
-}
-
-static bool fifo_update_and_get(IVShmemState *s, const uint8_t *buf, int size,
- void *data, size_t len)
+static void resize_peers(IVShmemState *s, int nb_peers)
{
- const uint8_t *p;
- uint32_t num;
-
- assert(len <= sizeof(int64_t)); /* limitation of the fifo */
- if (fifo8_is_empty(&s->incoming_fifo) && size == len) {
- memcpy(data, buf, size);
- return true;
- }
-
- IVSHMEM_DPRINTF("short read of %d bytes\n", size);
-
- num = MIN(size, sizeof(int64_t) - fifo8_num_used(&s->incoming_fifo));
- fifo8_push_all(&s->incoming_fifo, buf, num);
-
- if (fifo8_num_used(&s->incoming_fifo) < len) {
- assert(num == 0);
- return false;
- }
-
- size -= num;
- buf += num;
- p = fifo8_pop_buf(&s->incoming_fifo, len, &num);
- assert(num == len);
-
- memcpy(data, p, len);
+ int old_nb_peers = s->nb_peers;
+ int i;
- if (size > 0) {
- fifo8_push_all(&s->incoming_fifo, buf, size);
- }
+ assert(nb_peers > old_nb_peers);
+ IVSHMEM_DPRINTF("bumping storage to %d peers\n", nb_peers);
- return true;
-}
+ s->peers = g_realloc(s->peers, nb_peers * sizeof(Peer));
+ s->nb_peers = nb_peers;
-static bool fifo_update_and_get_i64(IVShmemState *s,
- const uint8_t *buf, int size, int64_t *i64)
-{
- if (fifo_update_and_get(s, buf, size, i64, sizeof(*i64))) {
- *i64 = GINT64_FROM_LE(*i64);
- return true;
+ for (i = old_nb_peers; i < nb_peers; i++) {
+ s->peers[i].eventfds = g_new0(EventNotifier, s->vectors);
+ s->peers[i].nb_eventfds = 0;
}
-
- return false;
}
-static int ivshmem_add_kvm_msi_virq(IVShmemState *s, int vector)
+static void ivshmem_add_kvm_msi_virq(IVShmemState *s, int vector,
+ Error **errp)
{
PCIDevice *pdev = PCI_DEVICE(s);
MSIMessage msg = msix_get_message(pdev, vector);
int ret;
IVSHMEM_DPRINTF("ivshmem_add_kvm_msi_virq vector:%d\n", vector);
-
- if (s->msi_vectors[vector].pdev != NULL) {
- return 0;
- }
+ assert(!s->msi_vectors[vector].pdev);
ret = kvm_irqchip_add_msi_route(kvm_state, msg, pdev);
if (ret < 0) {
- error_report("ivshmem: kvm_irqchip_add_msi_route failed");
- return -1;
+ error_setg(errp, "kvm_irqchip_add_msi_route failed");
+ return;
}
s->msi_vectors[vector].virq = ret;
s->msi_vectors[vector].pdev = pdev;
-
- return 0;
}
-static void setup_interrupt(IVShmemState *s, int vector)
+static void setup_interrupt(IVShmemState *s, int vector, Error **errp)
{
EventNotifier *n = &s->peers[s->vm_id].eventfds[vector];
bool with_irqfd = kvm_msi_via_irqfd_enabled() &&
ivshmem_has_feature(s, IVSHMEM_MSI);
PCIDevice *pdev = PCI_DEVICE(s);
+ Error *err = NULL;
IVSHMEM_DPRINTF("setting up interrupt for vector: %d\n", vector);
if (!with_irqfd) {
- IVSHMEM_DPRINTF("with eventfd");
+ IVSHMEM_DPRINTF("with eventfd\n");
watch_vector_notifier(s, n, vector);
} else if (msix_enabled(pdev)) {
- IVSHMEM_DPRINTF("with irqfd");
- if (ivshmem_add_kvm_msi_virq(s, vector) < 0) {
+ IVSHMEM_DPRINTF("with irqfd\n");
+ ivshmem_add_kvm_msi_virq(s, vector, &err);
+ if (err) {
+ error_propagate(errp, err);
return;
}
if (!msix_is_masked(pdev, vector)) {
kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, n, NULL,
s->msi_vectors[vector].virq);
+ /* TODO handle error */
}
} else {
/* it will be delayed until msix is enabled, in write_config */
- IVSHMEM_DPRINTF("with irqfd, delayed until msix enabled");
+ IVSHMEM_DPRINTF("with irqfd, delayed until msix enabled\n");
}
}
-static void ivshmem_read(void *opaque, const uint8_t *buf, int size)
+static void process_msg_shmem(IVShmemState *s, int fd, Error **errp)
{
- IVShmemState *s = opaque;
- int incoming_fd;
- int new_eventfd;
- int64_t incoming_posn;
- Error *err = NULL;
- Peer *peer;
+ struct stat buf;
+ size_t size;
+ void *ptr;
- if (!fifo_update_and_get_i64(s, buf, size, &incoming_posn)) {
+ if (s->ivshmem_bar2) {
+ error_setg(errp, "server sent unexpected shared memory message");
+ close(fd);
return;
}
- if (incoming_posn < -1) {
- IVSHMEM_DPRINTF("invalid incoming_posn %" PRId64 "\n", incoming_posn);
+ if (fstat(fd, &buf) < 0) {
+ error_setg_errno(errp, errno,
+ "can't determine size of shared memory sent by server");
+ close(fd);
return;
}
- /* pick off s->server_chr->msgfd and store it, posn should accompany msg */
- incoming_fd = qemu_chr_fe_get_msgfd(s->server_chr);
- IVSHMEM_DPRINTF("posn is %" PRId64 ", fd is %d\n",
- incoming_posn, incoming_fd);
-
- /* make sure we have enough space for this peer */
- if (incoming_posn >= s->nb_peers) {
- if (resize_peers(s, incoming_posn + 1) < 0) {
- error_report("failed to resize peers array");
- if (incoming_fd != -1) {
- close(incoming_fd);
- }
+ size = buf.st_size;
+
+ /* Legacy cruft */
+ if (s->legacy_size != SIZE_MAX) {
+ if (size < s->legacy_size) {
+ error_setg(errp, "server sent only %zd bytes of shared memory",
+ (size_t)buf.st_size);
+ close(fd);
return;
}
+ size = s->legacy_size;
}
- peer = &s->peers[incoming_posn];
+ /* mmap the region and map into the BAR2 */
+ ptr = mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+ if (ptr == MAP_FAILED) {
+ error_setg_errno(errp, errno, "Failed to mmap shared memory");
+ close(fd);
+ return;
+ }
+ memory_region_init_ram_ptr(&s->server_bar2, OBJECT(s),
+ "ivshmem.bar2", size, ptr);
+ qemu_set_ram_fd(memory_region_get_ram_addr(&s->server_bar2), fd);
+ s->ivshmem_bar2 = &s->server_bar2;
+}
- if (incoming_fd == -1) {
- /* if posn is positive and unseen before then this is our posn*/
- if (incoming_posn >= 0 && s->vm_id == -1) {
- /* receive our posn */
- s->vm_id = incoming_posn;
- } else {
- /* otherwise an fd == -1 means an existing peer has gone away */
- IVSHMEM_DPRINTF("posn %" PRId64 " has gone away\n", incoming_posn);
- close_peer_eventfds(s, incoming_posn);
- }
+static void process_msg_disconnect(IVShmemState *s, uint16_t posn,
+ Error **errp)
+{
+ IVSHMEM_DPRINTF("posn %d has gone away\n", posn);
+ if (posn >= s->nb_peers || posn == s->vm_id) {
+ error_setg(errp, "invalid peer %d", posn);
return;
}
+ close_peer_eventfds(s, posn);
+}
- /* if the position is -1, then it's shared memory region fd */
- if (incoming_posn == -1) {
- void * map_ptr;
+static void process_msg_connect(IVShmemState *s, uint16_t posn, int fd,
+ Error **errp)
+{
+ Peer *peer = &s->peers[posn];
+ int vector;
- if (memory_region_is_mapped(&s->ivshmem)) {
- error_report("shm already initialized");
- close(incoming_fd);
- return;
- }
+ /*
+ * The N-th connect message for this peer comes with the file
+ * descriptor for vector N-1. Count messages to find the vector.
+ */
+ if (peer->nb_eventfds >= s->vectors) {
+ error_setg(errp, "Too many eventfd received, device has %d vectors",
+ s->vectors);
+ close(fd);
+ return;
+ }
+ vector = peer->nb_eventfds++;
- if (check_shm_size(s, incoming_fd, &err) == -1) {
- error_report_err(err);
- close(incoming_fd);
- return;
- }
+ IVSHMEM_DPRINTF("eventfds[%d][%d] = %d\n", posn, vector, fd);
+ event_notifier_init_fd(&peer->eventfds[vector], fd);
+ fcntl_setfl(fd, O_NONBLOCK); /* msix/irqfd poll non block */
- /* mmap the region and map into the BAR2 */
- map_ptr = mmap(0, s->ivshmem_size, PROT_READ|PROT_WRITE, MAP_SHARED,
- incoming_fd, 0);
- if (map_ptr == MAP_FAILED) {
- error_report("Failed to mmap shared memory %s", strerror(errno));
- close(incoming_fd);
- return;
- }
- memory_region_init_ram_ptr(&s->ivshmem, OBJECT(s),
- "ivshmem.bar2", s->ivshmem_size, map_ptr);
- qemu_set_ram_fd(memory_region_get_ram_addr(&s->ivshmem),
- incoming_fd);
- vmstate_register_ram(&s->ivshmem, DEVICE(s));
+ if (posn == s->vm_id) {
+ setup_interrupt(s, vector, errp);
+ /* TODO do we need to handle the error? */
+ }
- IVSHMEM_DPRINTF("guest h/w addr = %p, size = %" PRIu64 "\n",
- map_ptr, s->ivshmem_size);
+ if (ivshmem_has_feature(s, IVSHMEM_IOEVENTFD)) {
+ ivshmem_add_eventfd(s, posn, vector);
+ }
+}
- memory_region_add_subregion(&s->bar, 0, &s->ivshmem);
+static void process_msg(IVShmemState *s, int64_t msg, int fd, Error **errp)
+{
+ IVSHMEM_DPRINTF("posn is %" PRId64 ", fd is %d\n", msg, fd);
+ if (msg < -1 || msg > IVSHMEM_MAX_PEERS) {
+ error_setg(errp, "server sent invalid message %" PRId64, msg);
+ close(fd);
return;
}
- /* each peer has an associated array of eventfds, and we keep
- * track of how many eventfds received so far */
- /* get a new eventfd: */
- if (peer->nb_eventfds >= s->vectors) {
- error_report("Too many eventfd received, device has %d vectors",
- s->vectors);
- close(incoming_fd);
+ if (msg == -1) {
+ process_msg_shmem(s, fd, errp);
return;
}
- new_eventfd = peer->nb_eventfds++;
+ if (msg >= s->nb_peers) {
+ resize_peers(s, msg + 1);
+ }
- /* this is an eventfd for a particular peer VM */
- IVSHMEM_DPRINTF("eventfds[%" PRId64 "][%d] = %d\n", incoming_posn,
- new_eventfd, incoming_fd);
- event_notifier_init_fd(&peer->eventfds[new_eventfd], incoming_fd);
- fcntl_setfl(incoming_fd, O_NONBLOCK); /* msix/irqfd poll non block */
+ if (fd >= 0) {
+ process_msg_connect(s, msg, fd, errp);
+ } else {
+ process_msg_disconnect(s, msg, errp);
+ }
+}
+
+static int ivshmem_can_receive(void *opaque)
+{
+ IVShmemState *s = opaque;
+
+ assert(s->msg_buffered_bytes < sizeof(s->msg_buf));
+ return sizeof(s->msg_buf) - s->msg_buffered_bytes;
+}
+
+static void ivshmem_read(void *opaque, const uint8_t *buf, int size)
+{
+ IVShmemState *s = opaque;
+ Error *err = NULL;
+ int fd;
+ int64_t msg;
- if (incoming_posn == s->vm_id) {
- setup_interrupt(s, new_eventfd);
+ assert(size >= 0 && s->msg_buffered_bytes + size <= sizeof(s->msg_buf));
+ memcpy((unsigned char *)&s->msg_buf + s->msg_buffered_bytes, buf, size);
+ s->msg_buffered_bytes += size;
+ if (s->msg_buffered_bytes < sizeof(s->msg_buf)) {
+ return;
}
+ msg = le64_to_cpu(s->msg_buf);
+ s->msg_buffered_bytes = 0;
- if (ivshmem_has_feature(s, IVSHMEM_IOEVENTFD)) {
- ivshmem_add_eventfd(s, incoming_posn, new_eventfd);
+ fd = qemu_chr_fe_get_msgfd(s->server_chr);
+ IVSHMEM_DPRINTF("posn is %" PRId64 ", fd is %d\n", msg, fd);
+
+ process_msg(s, msg, fd, &err);
+ if (err) {
+ error_report_err(err);
}
}
-static void ivshmem_check_version(void *opaque, const uint8_t * buf, int size)
+static int64_t ivshmem_recv_msg(IVShmemState *s, int *pfd, Error **errp)
{
- IVShmemState *s = opaque;
- int tmp;
- int64_t version;
+ int64_t msg;
+ int n, ret;
+
+ n = 0;
+ do {
+ ret = qemu_chr_fe_read_all(s->server_chr, (uint8_t *)&msg + n,
+ sizeof(msg) - n);
+ if (ret < 0 && ret != -EINTR) {
+ error_setg_errno(errp, -ret, "read from server failed");
+ return INT64_MIN;
+ }
+ n += ret;
+ } while (n < sizeof(msg));
- if (!fifo_update_and_get_i64(s, buf, size, &version)) {
+ *pfd = qemu_chr_fe_get_msgfd(s->server_chr);
+ return msg;
+}
+
+static void ivshmem_recv_setup(IVShmemState *s, Error **errp)
+{
+ Error *err = NULL;
+ int64_t msg;
+ int fd;
+
+ msg = ivshmem_recv_msg(s, &fd, &err);
+ if (err) {
+ error_propagate(errp, err);
+ return;
+ }
+ if (msg != IVSHMEM_PROTOCOL_VERSION) {
+ error_setg(errp, "server sent version %" PRId64 ", expecting %d",
+ msg, IVSHMEM_PROTOCOL_VERSION);
+ return;
+ }
+ if (fd != -1) {
+ error_setg(errp, "server sent invalid version message");
return;
}
- tmp = qemu_chr_fe_get_msgfd(s->server_chr);
- if (tmp != -1 || version != IVSHMEM_PROTOCOL_VERSION) {
- fprintf(stderr, "incompatible version, you are connecting to a ivshmem-"
- "server using a different protocol please check your setup\n");
- qemu_chr_delete(s->server_chr);
- s->server_chr = NULL;
+ /*
+ * ivshmem-server sends the remaining initial messages in a fixed
+ * order, but the device has always accepted them in any order.
+ * Stay as compatible as practical, just in case people use
+ * servers that behave differently.
+ */
+
+ /*
+ * ivshmem_device_spec.txt has always required the ID message
+ * right here, and ivshmem-server has always complied. However,
+ * older versions of the device accepted it out of order, but
+ * broke when an interrupt setup message arrived before it.
+ */
+ msg = ivshmem_recv_msg(s, &fd, &err);
+ if (err) {
+ error_propagate(errp, err);
return;
}
+ if (fd != -1 || msg < 0 || msg > IVSHMEM_MAX_PEERS) {
+ error_setg(errp, "server sent invalid ID message");
+ return;
+ }
+ s->vm_id = msg;
+
+ /*
+ * Receive more messages until we got shared memory.
+ */
+ do {
+ msg = ivshmem_recv_msg(s, &fd, &err);
+ if (err) {
+ error_propagate(errp, err);
+ return;
+ }
+ process_msg(s, msg, fd, &err);
+ if (err) {
+ error_propagate(errp, err);
+ return;
+ }
+ } while (msg != -1);
- IVSHMEM_DPRINTF("version check ok, switch to real chardev handler\n");
- qemu_chr_add_handlers(s->server_chr, ivshmem_can_receive, ivshmem_read,
- ivshmem_event, s);
+ /*
+ * This function must either map the shared memory or fail. The
+ * loop above ensures that: it terminates normally only after it
+ * successfully processed the server's shared memory message.
+ * Assert that actually mapped the shared memory:
+ */
+ assert(s->ivshmem_bar2);
}
/* Select the MSI-X vectors used by device.
* ivshmem maps events to vectors statically, so
* we just enable all vectors on init and after reset. */
-static void ivshmem_use_msix(IVShmemState * s)
+static void ivshmem_msix_vector_use(IVShmemState *s)
{
PCIDevice *d = PCI_DEVICE(s);
int i;
- IVSHMEM_DPRINTF("%s, msix present: %d\n", __func__, msix_present(d));
- if (!msix_present(d)) {
- return;
- }
-
for (i = 0; i < s->vectors; i++) {
msix_vector_use(d, i);
}
@@ -744,11 +742,13 @@ static void ivshmem_use_msix(IVShmemState * s)
static void ivshmem_reset(DeviceState *d)
{
- IVShmemState *s = IVSHMEM(d);
+ IVShmemState *s = IVSHMEM_COMMON(d);
s->intrstatus = 0;
s->intrmask = 0;
- ivshmem_use_msix(s);
+ if (ivshmem_has_feature(s, IVSHMEM_MSI)) {
+ ivshmem_msix_vector_use(s);
+ }
}
static int ivshmem_setup_interrupts(IVShmemState *s)
@@ -762,7 +762,7 @@ static int ivshmem_setup_interrupts(IVShmemState *s)
}
IVSHMEM_DPRINTF("msix initialized (%d vectors)\n", s->vectors);
- ivshmem_use_msix(s);
+ ivshmem_msix_vector_use(s);
}
return 0;
@@ -774,7 +774,13 @@ static void ivshmem_enable_irqfd(IVShmemState *s)
int i;
for (i = 0; i < s->peers[s->vm_id].nb_eventfds; i++) {
- ivshmem_add_kvm_msi_virq(s, i);
+ Error *err = NULL;
+
+ ivshmem_add_kvm_msi_virq(s, i, &err);
+ if (err) {
+ error_report_err(err);
+ /* TODO do we need to handle the error? */
+ }
}
if (msix_set_vector_notifiers(pdev,
@@ -814,13 +820,13 @@ static void ivshmem_disable_irqfd(IVShmemState *s)
static void ivshmem_write_config(PCIDevice *pdev, uint32_t address,
uint32_t val, int len)
{
- IVShmemState *s = IVSHMEM(pdev);
+ IVShmemState *s = IVSHMEM_COMMON(pdev);
int is_enabled, was_enabled = msix_enabled(pdev);
pci_default_write_config(pdev, address, val, len);
is_enabled = msix_enabled(pdev);
- if (kvm_msi_via_irqfd_enabled() && s->vm_id != -1) {
+ if (kvm_msi_via_irqfd_enabled()) {
if (!was_enabled && is_enabled) {
ivshmem_enable_irqfd(s);
} else if (was_enabled && !is_enabled) {
@@ -829,42 +835,14 @@ static void ivshmem_write_config(PCIDevice *pdev, uint32_t address,
}
}
-static void pci_ivshmem_realize(PCIDevice *dev, Error **errp)
+static void ivshmem_common_realize(PCIDevice *dev, Error **errp)
{
- IVShmemState *s = IVSHMEM(dev);
+ IVShmemState *s = IVSHMEM_COMMON(dev);
+ Error *err = NULL;
uint8_t *pci_conf;
uint8_t attr = PCI_BASE_ADDRESS_SPACE_MEMORY |
PCI_BASE_ADDRESS_MEM_PREFETCH;
- if (!!s->server_chr + !!s->shmobj + !!s->hostmem != 1) {
- error_setg(errp,
- "You must specify either 'shm', 'chardev' or 'x-memdev'");
- return;
- }
-
- if (s->hostmem) {
- MemoryRegion *mr;
-
- if (s->sizearg) {
- g_warning("size argument ignored with hostmem");
- }
-
- mr = host_memory_backend_get_memory(s->hostmem, errp);
- s->ivshmem_size = memory_region_size(mr);
- } else if (s->sizearg == NULL) {
- s->ivshmem_size = 4 << 20; /* 4 MB default */
- } else {
- char *end;
- int64_t size = qemu_strtosz(s->sizearg, &end);
- if (size < 0 || *end != '\0' || !is_power_of_2(size)) {
- error_setg(errp, "Invalid size %s", s->sizearg);
- return;
- }
- s->ivshmem_size = size;
- }
-
- fifo8_create(&s->incoming_fifo, sizeof(int64_t));
-
/* IRQFD requires MSI */
if (ivshmem_has_feature(s, IVSHMEM_IOEVENTFD) &&
!ivshmem_has_feature(s, IVSHMEM_MSI)) {
@@ -872,31 +850,9 @@ static void pci_ivshmem_realize(PCIDevice *dev, Error **errp)
return;
}
- /* check that role is reasonable */
- if (s->role) {
- if (strncmp(s->role, "peer", 5) == 0) {
- s->role_val = IVSHMEM_PEER;
- } else if (strncmp(s->role, "master", 7) == 0) {
- s->role_val = IVSHMEM_MASTER;
- } else {
- error_setg(errp, "'role' must be 'peer' or 'master'");
- return;
- }
- } else {
- s->role_val = IVSHMEM_MASTER; /* default */
- }
-
- if (s->role_val == IVSHMEM_PEER) {
- error_setg(&s->migration_blocker,
- "Migration is disabled when using feature 'peer mode' in device 'ivshmem'");
- migrate_add_blocker(s->migration_blocker);
- }
-
pci_conf = dev->config;
pci_conf[PCI_COMMAND] = PCI_COMMAND_IO | PCI_COMMAND_MEMORY;
- pci_config_set_interrupt_pin(pci_conf, 1);
-
memory_region_init_io(&s->ivshmem_mmio, OBJECT(s), &ivshmem_mmio_ops, s,
"ivshmem-mmio", IVSHMEM_REG_BAR_SIZE);
@@ -904,116 +860,87 @@ static void pci_ivshmem_realize(PCIDevice *dev, Error **errp)
pci_register_bar(dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY,
&s->ivshmem_mmio);
- memory_region_init(&s->bar, OBJECT(s), "ivshmem-bar2-container", s->ivshmem_size);
- if (s->ivshmem_64bit) {
+ if (!s->not_legacy_32bit) {
attr |= PCI_BASE_ADDRESS_MEM_TYPE_64;
}
if (s->hostmem != NULL) {
- MemoryRegion *mr;
-
IVSHMEM_DPRINTF("using hostmem\n");
- mr = host_memory_backend_get_memory(MEMORY_BACKEND(s->hostmem), errp);
- vmstate_register_ram(mr, DEVICE(s));
- memory_region_add_subregion(&s->bar, 0, mr);
- pci_register_bar(PCI_DEVICE(s), 2, attr, &s->bar);
- } else if (s->server_chr != NULL) {
- /* FIXME do not rely on what chr drivers put into filename */
- if (strncmp(s->server_chr->filename, "unix:", 5)) {
- error_setg(errp, "chardev is not a unix client socket");
- return;
- }
-
- /* if we get a UNIX socket as the parameter we will talk
- * to the ivshmem server to receive the memory region */
-
+ s->ivshmem_bar2 = host_memory_backend_get_memory(s->hostmem,
+ &error_abort);
+ } else {
IVSHMEM_DPRINTF("using shared memory server (socket = %s)\n",
s->server_chr->filename);
- if (ivshmem_setup_interrupts(s) < 0) {
- error_setg(errp, "failed to initialize interrupts");
- return;
- }
-
/* we allocate enough space for 16 peers and grow as needed */
resize_peers(s, 16);
- s->vm_id = -1;
- pci_register_bar(dev, 2, attr, &s->bar);
+ /*
+ * Receive setup messages from server synchronously.
+ * Older versions did it asynchronously, but that creates a
+ * number of entertaining race conditions.
+ */
+ ivshmem_recv_setup(s, &err);
+ if (err) {
+ error_propagate(errp, err);
+ return;
+ }
- s->eventfd_chr = g_malloc0(s->vectors * sizeof(CharDriverState *));
+ if (s->master == ON_OFF_AUTO_ON && s->vm_id != 0) {
+ error_setg(errp,
+ "master must connect to the server before any peers");
+ return;
+ }
qemu_chr_add_handlers(s->server_chr, ivshmem_can_receive,
- ivshmem_check_version, ivshmem_event, s);
- } else {
- /* just map the file immediately, we're not using a server */
- int fd;
-
- IVSHMEM_DPRINTF("using shm_open (shm object = %s)\n", s->shmobj);
-
- /* try opening with O_EXCL and if it succeeds zero the memory
- * by truncating to 0 */
- if ((fd = shm_open(s->shmobj, O_CREAT|O_RDWR|O_EXCL,
- S_IRWXU|S_IRWXG|S_IRWXO)) > 0) {
- /* truncate file to length PCI device's memory */
- if (ftruncate(fd, s->ivshmem_size) != 0) {
- error_report("could not truncate shared file");
- }
+ ivshmem_read, NULL, s);
- } else if ((fd = shm_open(s->shmobj, O_CREAT|O_RDWR,
- S_IRWXU|S_IRWXG|S_IRWXO)) < 0) {
- error_setg(errp, "could not open shared file");
+ if (ivshmem_setup_interrupts(s) < 0) {
+ error_setg(errp, "failed to initialize interrupts");
return;
}
+ }
- if (check_shm_size(s, fd, errp) == -1) {
- return;
- }
+ vmstate_register_ram(s->ivshmem_bar2, DEVICE(s));
+ pci_register_bar(PCI_DEVICE(s), 2, attr, s->ivshmem_bar2);
- create_shared_memory_BAR(s, fd, attr, errp);
+ if (s->master == ON_OFF_AUTO_AUTO) {
+ s->master = s->vm_id == 0 ? ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF;
+ }
+
+ if (!ivshmem_is_master(s)) {
+ error_setg(&s->migration_blocker,
+ "Migration is disabled when using feature 'peer mode' in device 'ivshmem'");
+ migrate_add_blocker(s->migration_blocker);
}
}
-static void pci_ivshmem_exit(PCIDevice *dev)
+static void ivshmem_exit(PCIDevice *dev)
{
- IVShmemState *s = IVSHMEM(dev);
+ IVShmemState *s = IVSHMEM_COMMON(dev);
int i;
- fifo8_destroy(&s->incoming_fifo);
-
if (s->migration_blocker) {
migrate_del_blocker(s->migration_blocker);
error_free(s->migration_blocker);
}
- if (memory_region_is_mapped(&s->ivshmem)) {
+ if (memory_region_is_mapped(s->ivshmem_bar2)) {
if (!s->hostmem) {
- void *addr = memory_region_get_ram_ptr(&s->ivshmem);
+ void *addr = memory_region_get_ram_ptr(s->ivshmem_bar2);
int fd;
- if (munmap(addr, s->ivshmem_size) == -1) {
+ if (munmap(addr, memory_region_size(s->ivshmem_bar2) == -1)) {
error_report("Failed to munmap shared memory %s",
strerror(errno));
}
- fd = qemu_get_ram_fd(memory_region_get_ram_addr(&s->ivshmem));
- if (fd != -1) {
- close(fd);
- }
+ fd = qemu_get_ram_fd(memory_region_get_ram_addr(s->ivshmem_bar2));
+ close(fd);
}
- vmstate_unregister_ram(&s->ivshmem, DEVICE(dev));
- memory_region_del_subregion(&s->bar, &s->ivshmem);
- }
-
- if (s->eventfd_chr) {
- for (i = 0; i < s->vectors; i++) {
- if (s->eventfd_chr[i]) {
- qemu_chr_free(s->eventfd_chr[i]);
- }
- }
- g_free(s->eventfd_chr);
+ vmstate_unregister_ram(s->ivshmem_bar2, DEVICE(dev));
}
if (s->peers) {
@@ -1030,23 +957,11 @@ static void pci_ivshmem_exit(PCIDevice *dev)
g_free(s->msi_vectors);
}
-static bool test_msix(void *opaque, int version_id)
-{
- IVShmemState *s = opaque;
-
- return ivshmem_has_feature(s, IVSHMEM_MSI);
-}
-
-static bool test_no_msix(void *opaque, int version_id)
-{
- return !test_msix(opaque, version_id);
-}
-
static int ivshmem_pre_load(void *opaque)
{
IVShmemState *s = opaque;
- if (s->role_val == IVSHMEM_PEER) {
+ if (!ivshmem_is_master(s)) {
error_report("'peer' devices are not migratable");
return -EINVAL;
}
@@ -1059,12 +974,145 @@ static int ivshmem_post_load(void *opaque, int version_id)
IVShmemState *s = opaque;
if (ivshmem_has_feature(s, IVSHMEM_MSI)) {
- ivshmem_use_msix(s);
+ ivshmem_msix_vector_use(s);
}
-
return 0;
}
+static void ivshmem_common_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+ PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
+
+ k->realize = ivshmem_common_realize;
+ k->exit = ivshmem_exit;
+ k->config_write = ivshmem_write_config;
+ k->vendor_id = PCI_VENDOR_ID_IVSHMEM;
+ k->device_id = PCI_DEVICE_ID_IVSHMEM;
+ k->class_id = PCI_CLASS_MEMORY_RAM;
+ k->revision = 1;
+ dc->reset = ivshmem_reset;
+ set_bit(DEVICE_CATEGORY_MISC, dc->categories);
+ dc->desc = "Inter-VM shared memory";
+}
+
+static const TypeInfo ivshmem_common_info = {
+ .name = TYPE_IVSHMEM_COMMON,
+ .parent = TYPE_PCI_DEVICE,
+ .instance_size = sizeof(IVShmemState),
+ .abstract = true,
+ .class_init = ivshmem_common_class_init,
+};
+
+static void ivshmem_check_memdev_is_busy(Object *obj, const char *name,
+ Object *val, Error **errp)
+{
+ MemoryRegion *mr;
+
+ mr = host_memory_backend_get_memory(MEMORY_BACKEND(val), &error_abort);
+ if (memory_region_is_mapped(mr)) {
+ char *path = object_get_canonical_path_component(val);
+ error_setg(errp, "can't use already busy memdev: %s", path);
+ g_free(path);
+ } else {
+ qdev_prop_allow_set_link_before_realize(obj, name, val, errp);
+ }
+}
+
+static const VMStateDescription ivshmem_plain_vmsd = {
+ .name = TYPE_IVSHMEM_PLAIN,
+ .version_id = 0,
+ .minimum_version_id = 0,
+ .pre_load = ivshmem_pre_load,
+ .post_load = ivshmem_post_load,
+ .fields = (VMStateField[]) {
+ VMSTATE_PCI_DEVICE(parent_obj, IVShmemState),
+ VMSTATE_UINT32(intrstatus, IVShmemState),
+ VMSTATE_UINT32(intrmask, IVShmemState),
+ VMSTATE_END_OF_LIST()
+ },
+};
+
+static Property ivshmem_plain_properties[] = {
+ DEFINE_PROP_ON_OFF_AUTO("master", IVShmemState, master, ON_OFF_AUTO_OFF),
+ DEFINE_PROP_END_OF_LIST(),
+};
+
+static void ivshmem_plain_init(Object *obj)
+{
+ IVShmemState *s = IVSHMEM_PLAIN(obj);
+
+ object_property_add_link(obj, "memdev", TYPE_MEMORY_BACKEND,
+ (Object **)&s->hostmem,
+ ivshmem_check_memdev_is_busy,
+ OBJ_PROP_LINK_UNREF_ON_RELEASE,
+ &error_abort);
+}
+
+static void ivshmem_plain_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+
+ dc->props = ivshmem_plain_properties;
+ dc->vmsd = &ivshmem_plain_vmsd;
+}
+
+static const TypeInfo ivshmem_plain_info = {
+ .name = TYPE_IVSHMEM_PLAIN,
+ .parent = TYPE_IVSHMEM_COMMON,
+ .instance_size = sizeof(IVShmemState),
+ .instance_init = ivshmem_plain_init,
+ .class_init = ivshmem_plain_class_init,
+};
+
+static const VMStateDescription ivshmem_doorbell_vmsd = {
+ .name = TYPE_IVSHMEM_DOORBELL,
+ .version_id = 0,
+ .minimum_version_id = 0,
+ .pre_load = ivshmem_pre_load,
+ .post_load = ivshmem_post_load,
+ .fields = (VMStateField[]) {
+ VMSTATE_PCI_DEVICE(parent_obj, IVShmemState),
+ VMSTATE_MSIX(parent_obj, IVShmemState),
+ VMSTATE_UINT32(intrstatus, IVShmemState),
+ VMSTATE_UINT32(intrmask, IVShmemState),
+ VMSTATE_END_OF_LIST()
+ },
+};
+
+static Property ivshmem_doorbell_properties[] = {
+ DEFINE_PROP_CHR("chardev", IVShmemState, server_chr),
+ DEFINE_PROP_UINT32("vectors", IVShmemState, vectors, 1),
+ DEFINE_PROP_BIT("ioeventfd", IVShmemState, features, IVSHMEM_IOEVENTFD,
+ true),
+ DEFINE_PROP_ON_OFF_AUTO("master", IVShmemState, master, ON_OFF_AUTO_OFF),
+ DEFINE_PROP_END_OF_LIST(),
+};
+
+static void ivshmem_doorbell_init(Object *obj)
+{
+ IVShmemState *s = IVSHMEM_DOORBELL(obj);
+
+ s->features |= (1 << IVSHMEM_MSI);
+ s->legacy_size = SIZE_MAX; /* whatever the server sends */
+}
+
+static void ivshmem_doorbell_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+
+ dc->props = ivshmem_doorbell_properties;
+ dc->vmsd = &ivshmem_doorbell_vmsd;
+}
+
+static const TypeInfo ivshmem_doorbell_info = {
+ .name = TYPE_IVSHMEM_DOORBELL,
+ .parent = TYPE_IVSHMEM_COMMON,
+ .instance_size = sizeof(IVShmemState),
+ .instance_init = ivshmem_doorbell_init,
+ .class_init = ivshmem_doorbell_class_init,
+};
+
static int ivshmem_load_old(QEMUFile *f, void *opaque, int version_id)
{
IVShmemState *s = opaque;
@@ -1077,9 +1125,9 @@ static int ivshmem_load_old(QEMUFile *f, void *opaque, int version_id)
return -EINVAL;
}
- if (s->role_val == IVSHMEM_PEER) {
- error_report("'peer' devices are not migratable");
- return -EINVAL;
+ ret = ivshmem_pre_load(s);
+ if (ret) {
+ return ret;
}
ret = pci_device_load(pdev, f);
@@ -1089,7 +1137,7 @@ static int ivshmem_load_old(QEMUFile *f, void *opaque, int version_id)
if (ivshmem_has_feature(s, IVSHMEM_MSI)) {
msix_load(pdev, f);
- ivshmem_use_msix(s);
+ ivshmem_msix_vector_use(s);
} else {
s->intrstatus = qemu_get_be32(f);
s->intrmask = qemu_get_be32(f);
@@ -1098,6 +1146,18 @@ static int ivshmem_load_old(QEMUFile *f, void *opaque, int version_id)
return 0;
}
+static bool test_msix(void *opaque, int version_id)
+{
+ IVShmemState *s = opaque;
+
+ return ivshmem_has_feature(s, IVSHMEM_MSI);
+}
+
+static bool test_no_msix(void *opaque, int version_id)
+{
+ return !test_msix(opaque, version_id);
+}
+
static const VMStateDescription ivshmem_vmsd = {
.name = "ivshmem",
.version_id = 1,
@@ -1121,68 +1181,110 @@ static Property ivshmem_properties[] = {
DEFINE_PROP_CHR("chardev", IVShmemState, server_chr),
DEFINE_PROP_STRING("size", IVShmemState, sizearg),
DEFINE_PROP_UINT32("vectors", IVShmemState, vectors, 1),
- DEFINE_PROP_BIT("ioeventfd", IVShmemState, features, IVSHMEM_IOEVENTFD, false),
+ DEFINE_PROP_BIT("ioeventfd", IVShmemState, features, IVSHMEM_IOEVENTFD,
+ false),
DEFINE_PROP_BIT("msi", IVShmemState, features, IVSHMEM_MSI, true),
DEFINE_PROP_STRING("shm", IVShmemState, shmobj),
DEFINE_PROP_STRING("role", IVShmemState, role),
- DEFINE_PROP_UINT32("use64", IVShmemState, ivshmem_64bit, 1),
+ DEFINE_PROP_UINT32("use64", IVShmemState, not_legacy_32bit, 1),
DEFINE_PROP_END_OF_LIST(),
};
-static void ivshmem_class_init(ObjectClass *klass, void *data)
+static void desugar_shm(IVShmemState *s)
{
- DeviceClass *dc = DEVICE_CLASS(klass);
- PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
-
- k->realize = pci_ivshmem_realize;
- k->exit = pci_ivshmem_exit;
- k->config_write = ivshmem_write_config;
- k->vendor_id = PCI_VENDOR_ID_IVSHMEM;
- k->device_id = PCI_DEVICE_ID_IVSHMEM;
- k->class_id = PCI_CLASS_MEMORY_RAM;
- dc->reset = ivshmem_reset;
- dc->props = ivshmem_properties;
- dc->vmsd = &ivshmem_vmsd;
- set_bit(DEVICE_CATEGORY_MISC, dc->categories);
- dc->desc = "Inter-VM shared memory";
+ Object *obj;
+ char *path;
+
+ obj = object_new("memory-backend-file");
+ path = g_strdup_printf("/dev/shm/%s", s->shmobj);
+ object_property_set_str(obj, path, "mem-path", &error_abort);
+ g_free(path);
+ object_property_set_int(obj, s->legacy_size, "size", &error_abort);
+ object_property_set_bool(obj, true, "share", &error_abort);
+ object_property_add_child(OBJECT(s), "internal-shm-backend", obj,
+ &error_abort);
+ user_creatable_complete(obj, &error_abort);
+ s->hostmem = MEMORY_BACKEND(obj);
}
-static void ivshmem_check_memdev_is_busy(Object *obj, const char *name,
- Object *val, Error **errp)
+static void ivshmem_realize(PCIDevice *dev, Error **errp)
{
- MemoryRegion *mr;
+ IVShmemState *s = IVSHMEM_COMMON(dev);
- mr = host_memory_backend_get_memory(MEMORY_BACKEND(val), errp);
- if (memory_region_is_mapped(mr)) {
- char *path = object_get_canonical_path_component(val);
- error_setg(errp, "can't use already busy memdev: %s", path);
- g_free(path);
+ if (!qtest_enabled()) {
+ error_report("ivshmem is deprecated, please use ivshmem-plain"
+ " or ivshmem-doorbell instead");
+ }
+
+ if (!!s->server_chr + !!s->shmobj != 1) {
+ error_setg(errp, "You must specify either 'shm' or 'chardev'");
+ return;
+ }
+
+ if (s->sizearg == NULL) {
+ s->legacy_size = 4 << 20; /* 4 MB default */
} else {
- qdev_prop_allow_set_link_before_realize(obj, name, val, errp);
+ char *end;
+ int64_t size = qemu_strtosz(s->sizearg, &end);
+ if (size < 0 || (size_t)size != size || *end != '\0'
+ || !is_power_of_2(size)) {
+ error_setg(errp, "Invalid size %s", s->sizearg);
+ return;
+ }
+ s->legacy_size = size;
}
+
+ /* check that role is reasonable */
+ if (s->role) {
+ if (strncmp(s->role, "peer", 5) == 0) {
+ s->master = ON_OFF_AUTO_OFF;
+ } else if (strncmp(s->role, "master", 7) == 0) {
+ s->master = ON_OFF_AUTO_ON;
+ } else {
+ error_setg(errp, "'role' must be 'peer' or 'master'");
+ return;
+ }
+ } else {
+ s->master = ON_OFF_AUTO_AUTO;
+ }
+
+ if (s->shmobj) {
+ desugar_shm(s);
+ }
+
+ /*
+ * Note: we don't use INTx with IVSHMEM_MSI at all, so this is a
+ * bald-faced lie then. But it's a backwards compatible lie.
+ */
+ pci_config_set_interrupt_pin(dev->config, 1);
+
+ ivshmem_common_realize(dev, errp);
}
-static void ivshmem_init(Object *obj)
+static void ivshmem_class_init(ObjectClass *klass, void *data)
{
- IVShmemState *s = IVSHMEM(obj);
+ DeviceClass *dc = DEVICE_CLASS(klass);
+ PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
- object_property_add_link(obj, "x-memdev", TYPE_MEMORY_BACKEND,
- (Object **)&s->hostmem,
- ivshmem_check_memdev_is_busy,
- OBJ_PROP_LINK_UNREF_ON_RELEASE,
- &error_abort);
+ k->realize = ivshmem_realize;
+ k->revision = 0;
+ dc->desc = "Inter-VM shared memory (legacy)";
+ dc->props = ivshmem_properties;
+ dc->vmsd = &ivshmem_vmsd;
}
static const TypeInfo ivshmem_info = {
.name = TYPE_IVSHMEM,
- .parent = TYPE_PCI_DEVICE,
+ .parent = TYPE_IVSHMEM_COMMON,
.instance_size = sizeof(IVShmemState),
- .instance_init = ivshmem_init,
.class_init = ivshmem_class_init,
};
static void ivshmem_register_types(void)
{
+ type_register_static(&ivshmem_common_info);
+ type_register_static(&ivshmem_plain_info);
+ type_register_static(&ivshmem_doorbell_info);
type_register_static(&ivshmem_info);
}
diff --git a/include/hw/qdev-properties.h b/include/hw/qdev-properties.h
index 03a1b91f31..0586cacceb 100644
--- a/include/hw/qdev-properties.h
+++ b/include/hw/qdev-properties.h
@@ -18,6 +18,7 @@ extern PropertyInfo qdev_prop_string;
extern PropertyInfo qdev_prop_chr;
extern PropertyInfo qdev_prop_ptr;
extern PropertyInfo qdev_prop_macaddr;
+extern PropertyInfo qdev_prop_on_off_auto;
extern PropertyInfo qdev_prop_losttickpolicy;
extern PropertyInfo qdev_prop_bios_chs_trans;
extern PropertyInfo qdev_prop_fdc_drive_type;
@@ -155,6 +156,8 @@ extern PropertyInfo qdev_prop_arraylen;
DEFINE_PROP(_n, _s, _f, qdev_prop_drive, BlockBackend *)
#define DEFINE_PROP_MACADDR(_n, _s, _f) \
DEFINE_PROP(_n, _s, _f, qdev_prop_macaddr, MACAddr)
+#define DEFINE_PROP_ON_OFF_AUTO(_n, _s, _f, _d) \
+ DEFINE_PROP_DEFAULT(_n, _s, _f, _d, qdev_prop_on_off_auto, OnOffAuto)
#define DEFINE_PROP_LOSTTICKPOLICY(_n, _s, _f, _d) \
DEFINE_PROP_DEFAULT(_n, _s, _f, _d, qdev_prop_losttickpolicy, \
LostTickPolicy)
diff --git a/qemu-doc.texi b/qemu-doc.texi
index bc9dd13cc9..79141d3582 100644
--- a/qemu-doc.texi
+++ b/qemu-doc.texi
@@ -1262,13 +1262,18 @@ basic example.
@subsection Inter-VM Shared Memory device
-With KVM enabled on a Linux host, a shared memory device is available. Guests
-map a POSIX shared memory region into the guest as a PCI device that enables
-zero-copy communication to the application level of the guests. The basic
-syntax is:
+On Linux hosts, a shared memory device is available. The basic syntax
+is:
@example
-qemu-system-i386 -device ivshmem,size=@var{size},shm=@var{shm-name}
+qemu-system-x86_64 -device ivshmem-plain,memdev=@var{hostmem}
+@end example
+
+where @var{hostmem} names a host memory backend. For a POSIX shared
+memory backend, use something like
+
+@example
+-object memory-backend-file,size=1M,share,mem-path=/dev/shm/ivshmem,id=@var{hostmem}
@end example
If desired, interrupts can be sent between guest VMs accessing the same shared
@@ -1282,28 +1287,24 @@ memory server is:
ivshmem-server -p @var{pidfile} -S @var{path} -m @var{shm-name} -l @var{shm-size} -n @var{vectors}
# Then start your qemu instances with matching arguments
-qemu-system-i386 -device ivshmem,size=@var{shm-size},vectors=@var{vectors},chardev=@var{id}
- [,msi=on][,ioeventfd=on][,role=peer|master]
+qemu-system-x86_64 -device ivshmem-doorbell,vectors=@var{vectors},chardev=@var{id}
-chardev socket,path=@var{path},id=@var{id}
@end example
When using the server, the guest will be assigned a VM ID (>=0) that allows guests
using the same server to communicate via interrupts. Guests can read their
-VM ID from a device register (see example code). Since receiving the shared
-memory region from the server is asynchronous, there is a (small) chance the
-guest may boot before the shared memory is attached. To allow an application
-to ensure shared memory is attached, the VM ID register will return -1 (an
-invalid VM ID) until the memory is attached. Once the shared memory is
-attached, the VM ID will return the guest's valid VM ID. With these semantics,
-the guest application can check to ensure the shared memory is attached to the
-guest before proceeding.
-
-The @option{role} argument can be set to either master or peer and will affect
-how the shared memory is migrated. With @option{role=master}, the guest will
-copy the shared memory on migration to the destination host. With
-@option{role=peer}, the guest will not be able to migrate with the device attached.
-With the @option{peer} case, the device should be detached and then reattached
-after migration using the PCI hotplug support.
+VM ID from a device register (see ivshmem-spec.txt).
+
+@subsubsection Migration with ivshmem
+
+With device property @option{master=on}, the guest will copy the shared
+memory on migration to the destination host. With @option{master=off},
+the guest will not be able to migrate with the device attached. In the
+latter case, the device should be detached and then reattached after
+migration using the PCI hotplug support.
+
+At most one of the devices sharing the same memory can be master. The
+master must complete migration before you plug back the other devices.
@subsubsection ivshmem and hugepages
@@ -1311,8 +1312,8 @@ Instead of specifying the <shm size> using POSIX shm, you may specify
a memory backend that has hugepage support:
@example
-qemu-system-i386 -object memory-backend-file,size=1G,mem-path=/mnt/hugepages/my-shmem-file,id=mb1
- -device ivshmem,x-memdev=mb1
+qemu-system-x86_64 -object memory-backend-file,size=1G,mem-path=/dev/hugepages/my-shmem-file,share,id=mb1
+ -device ivshmem-plain,memdev=mb1
@end example
ivshmem-server also supports hugepages mount points with the
diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
index 776336b8b4..2fc993143e 100644
--- a/target-ppc/kvm.c
+++ b/target-ppc/kvm.c
@@ -333,6 +333,12 @@ static long gethugepagesize(const char *mem_path)
return fs.f_bsize;
}
+/*
+ * FIXME TOCTTOU: this iterates over memory backends' mem-path, which
+ * may or may not name the same files / on the same filesystem now as
+ * when we actually open and map them. Iterate over the file
+ * descriptors instead, and use qemu_fd_getpagesize().
+ */
static int find_max_supported_pagesize(Object *obj, void *opaque)
{
char *mem_path;
diff --git a/tests/Makefile b/tests/Makefile
index 60371ca008..d1ff18200f 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -166,7 +166,7 @@ gcov-files-pci-y += hw/display/virtio-gpu-pci.c
gcov-files-pci-$(CONFIG_VIRTIO_VGA) += hw/display/virtio-vga.c
check-qtest-pci-y += tests/intel-hda-test$(EXESUF)
gcov-files-pci-y += hw/audio/intel-hda.c hw/audio/hda-codec.c
-check-qtest-pci-$(CONFIG_POSIX) += tests/ivshmem-test$(EXESUF)
+check-qtest-pci-$(CONFIG_EVENTFD) += tests/ivshmem-test$(EXESUF)
gcov-files-pci-y += hw/misc/ivshmem.c
check-qtest-i386-y = tests/endianness-test$(EXESUF)
diff --git a/tests/ivshmem-test.c b/tests/ivshmem-test.c
index e184c67a1d..c027ff1e09 100644
--- a/tests/ivshmem-test.c
+++ b/tests/ivshmem-test.c
@@ -110,25 +110,26 @@ static void setup_vm_cmd(IVState *s, const char *cmd, bool msix)
s->pcibus = qpci_init_pc();
s->dev = get_device(s->pcibus);
- /* FIXME: other bar order fails, mappings changes */
- s->mem_base = qpci_iomap(s->dev, 2, &barsize);
- g_assert_nonnull(s->mem_base);
- g_assert_cmpuint(barsize, ==, TMPSHMSIZE);
+ s->reg_base = qpci_iomap(s->dev, 0, &barsize);
+ g_assert_nonnull(s->reg_base);
+ g_assert_cmpuint(barsize, ==, 256);
if (msix) {
qpci_msix_enable(s->dev);
}
- s->reg_base = qpci_iomap(s->dev, 0, &barsize);
- g_assert_nonnull(s->reg_base);
- g_assert_cmpuint(barsize, ==, 256);
+ s->mem_base = qpci_iomap(s->dev, 2, &barsize);
+ g_assert_nonnull(s->mem_base);
+ g_assert_cmpuint(barsize, ==, TMPSHMSIZE);
qpci_device_enable(s->dev);
}
static void setup_vm(IVState *s)
{
- char *cmd = g_strdup_printf("-device ivshmem,shm=%s,size=1M", tmpshm);
+ char *cmd = g_strdup_printf("-object memory-backend-file"
+ ",id=mb1,size=1M,share,mem-path=/dev/shm%s"
+ " -device ivshmem-plain,memdev=mb1", tmpshm);
setup_vm_cmd(s, cmd, false);
@@ -144,32 +145,41 @@ static void test_ivshmem_single(void)
setup_vm(&state);
s = &state;
- /* valid io */
- out_reg(s, INTRMASK, 0);
- in_reg(s, INTRSTATUS);
- in_reg(s, IVPOSITION);
+ /* initial state of readable registers */
+ g_assert_cmpuint(in_reg(s, INTRMASK), ==, 0);
+ g_assert_cmpuint(in_reg(s, INTRSTATUS), ==, 0);
+ g_assert_cmpuint(in_reg(s, IVPOSITION), ==, 0);
+ /* trigger interrupt via registers */
out_reg(s, INTRMASK, 0xffffffff);
g_assert_cmpuint(in_reg(s, INTRMASK), ==, 0xffffffff);
out_reg(s, INTRSTATUS, 1);
- /* XXX: intercept IRQ, not seen in resp */
+ /* check interrupt status */
g_assert_cmpuint(in_reg(s, INTRSTATUS), ==, 1);
+ /* reading clears */
+ g_assert_cmpuint(in_reg(s, INTRSTATUS), ==, 0);
+ /* TODO intercept actual interrupt (needs qtest work) */
- /* invalid io */
+ /* invalid register access */
out_reg(s, IVPOSITION, 1);
+ in_reg(s, DOORBELL);
+
+ /* ring the (non-functional) doorbell */
out_reg(s, DOORBELL, 8 << 16);
+ /* write shared memory */
for (i = 0; i < G_N_ELEMENTS(data); i++) {
data[i] = i;
}
qtest_memwrite(s->qtest, (uintptr_t)s->mem_base, data, sizeof(data));
+ /* verify write */
for (i = 0; i < G_N_ELEMENTS(data); i++) {
g_assert_cmpuint(((uint32_t *)tmpshmem)[i], ==, i);
}
+ /* read it back and verify read */
memset(data, 0, sizeof(data));
-
qtest_memread(s->qtest, (uintptr_t)s->mem_base, data, sizeof(data));
for (i = 0; i < G_N_ELEMENTS(data); i++) {
g_assert_cmpuint(data[i], ==, i);
@@ -276,8 +286,10 @@ static void *server_thread(void *data)
static void setup_vm_with_server(IVState *s, int nvectors, bool msi)
{
char *cmd = g_strdup_printf("-chardev socket,id=chr0,path=%s,nowait "
- "-device ivshmem,size=1M,chardev=chr0,vectors=%d,msi=%s",
- tmpserver, nvectors, msi ? "true" : "false");
+ "-device ivshmem%s,chardev=chr0,vectors=%d",
+ tmpserver,
+ msi ? "-doorbell" : ",size=1M,msi=off",
+ nvectors);
setup_vm_cmd(s, cmd, msi);
@@ -293,8 +305,7 @@ static void test_ivshmem_server(bool msi)
int nvectors = 2;
guint64 end_time = g_get_monotonic_time() + 5 * G_TIME_SPAN_SECOND;
- memset(tmpshmem, 0x42, TMPSHMSIZE);
- ret = ivshmem_server_init(&server, tmpserver, tmpshm,
+ ret = ivshmem_server_init(&server, tmpserver, tmpshm, true,
TMPSHMSIZE, nvectors,
g_test_verbose());
g_assert_cmpint(ret, ==, 0);
@@ -302,49 +313,39 @@ static void test_ivshmem_server(bool msi)
ret = ivshmem_server_start(&server);
g_assert_cmpint(ret, ==, 0);
- setup_vm_with_server(&state1, nvectors, msi);
- s1 = &state1;
- setup_vm_with_server(&state2, nvectors, msi);
- s2 = &state2;
-
- g_assert_cmpuint(in_reg(s1, IVPOSITION), ==, 0xffffffff);
- g_assert_cmpuint(in_reg(s2, IVPOSITION), ==, 0xffffffff);
-
- g_assert_cmpuint(qtest_readb(s1->qtest, (uintptr_t)s1->mem_base), ==, 0x00);
-
thread.server = &server;
ret = pipe(thread.pipe);
g_assert_cmpint(ret, ==, 0);
thread.thread = g_thread_new("ivshmem-server", server_thread, &thread);
g_assert(thread.thread != NULL);
- /* waiting until mapping is done */
- while (g_get_monotonic_time() < end_time) {
- g_usleep(1000);
-
- if (qtest_readb(s1->qtest, (uintptr_t)s1->mem_base) == 0x42 &&
- qtest_readb(s2->qtest, (uintptr_t)s2->mem_base) == 0x42) {
- break;
- }
- }
+ setup_vm_with_server(&state1, nvectors, msi);
+ s1 = &state1;
+ setup_vm_with_server(&state2, nvectors, msi);
+ s2 = &state2;
/* check got different VM ids */
vm1 = in_reg(s1, IVPOSITION);
vm2 = in_reg(s2, IVPOSITION);
- g_assert_cmpuint(vm1, !=, vm2);
+ g_assert_cmpint(vm1, >=, 0);
+ g_assert_cmpint(vm2, >=, 0);
+ g_assert_cmpint(vm1, !=, vm2);
+ /* check number of MSI-X vectors */
global_qtest = s1->qtest;
if (msi) {
ret = qpci_msix_table_size(s1->dev);
g_assert_cmpuint(ret, ==, nvectors);
}
- /* ping vm2 -> vm1 */
+ /* TODO test behavior before MSI-X is enabled */
+
+ /* ping vm2 -> vm1 on vector 0 */
if (msi) {
ret = qpci_msix_pending(s1->dev, 0);
g_assert_cmpuint(ret, ==, 0);
} else {
- out_reg(s1, INTRSTATUS, 0);
+ g_assert_cmpuint(in_reg(s1, INTRSTATUS), ==, 0);
}
out_reg(s2, DOORBELL, vm1 << 16);
do {
@@ -353,18 +354,18 @@ static void test_ivshmem_server(bool msi)
} while (ret == 0 && g_get_monotonic_time() < end_time);
g_assert_cmpuint(ret, !=, 0);
- /* ping vm1 -> vm2 */
+ /* ping vm1 -> vm2 on vector 1 */
global_qtest = s2->qtest;
if (msi) {
- ret = qpci_msix_pending(s2->dev, 0);
+ ret = qpci_msix_pending(s2->dev, 1);
g_assert_cmpuint(ret, ==, 0);
} else {
- out_reg(s2, INTRSTATUS, 0);
+ g_assert_cmpuint(in_reg(s2, INTRSTATUS), ==, 0);
}
- out_reg(s1, DOORBELL, vm2 << 16);
+ out_reg(s1, DOORBELL, vm2 << 16 | 1);
do {
g_usleep(10000);
- ret = msi ? qpci_msix_pending(s2->dev, 0) : in_reg(s2, INTRSTATUS);
+ ret = msi ? qpci_msix_pending(s2->dev, 1) : in_reg(s2, INTRSTATUS);
} while (ret == 0 && g_get_monotonic_time() < end_time);
g_assert_cmpuint(ret, !=, 0);
@@ -415,7 +416,7 @@ static void test_ivshmem_memdev(void)
/* just for the sake of checking memory-backend property */
setup_vm_cmd(&state, "-object memory-backend-ram,size=1M,id=mb1"
- " -device ivshmem,x-memdev=mb1", false);
+ " -device ivshmem-plain,memdev=mb1", false);
cleanup_vm(&state);
}
diff --git a/tests/libqos/pci-pc.c b/tests/libqos/pci-pc.c
index 08167c09fe..77f15e5a0e 100644
--- a/tests/libqos/pci-pc.c
+++ b/tests/libqos/pci-pc.c
@@ -184,7 +184,9 @@ static void *qpci_pc_iomap(QPCIBus *bus, QPCIDevice *dev, int barno, uint64_t *s
if (io_type == PCI_BASE_ADDRESS_SPACE_IO) {
uint16_t loc;
- g_assert((s->pci_iohole_alloc + size) <= s->pci_iohole_size);
+ g_assert(QEMU_ALIGN_UP(s->pci_iohole_alloc, size) + size
+ <= s->pci_iohole_size);
+ s->pci_iohole_alloc = QEMU_ALIGN_UP(s->pci_iohole_alloc, size);
loc = s->pci_iohole_start + s->pci_iohole_alloc;
s->pci_iohole_alloc += size;
@@ -194,7 +196,9 @@ static void *qpci_pc_iomap(QPCIBus *bus, QPCIDevice *dev, int barno, uint64_t *s
} else {
uint64_t loc;
- g_assert((s->pci_hole_alloc + size) <= s->pci_hole_size);
+ g_assert(QEMU_ALIGN_UP(s->pci_hole_alloc, size) + size
+ <= s->pci_hole_size);
+ s->pci_hole_alloc = QEMU_ALIGN_UP(s->pci_hole_alloc, size);
loc = s->pci_hole_start + s->pci_hole_alloc;
s->pci_hole_alloc += size;
diff --git a/util/event_notifier-posix.c b/util/event_notifier-posix.c
index 2e30e74bd6..c9657a61ae 100644
--- a/util/event_notifier-posix.c
+++ b/util/event_notifier-posix.c
@@ -20,11 +20,17 @@
#include <sys/eventfd.h>
#endif
+#ifdef CONFIG_EVENTFD
+/*
+ * Initialize @e with existing file descriptor @fd.
+ * @fd must be a genuine eventfd object, emulation with pipe won't do.
+ */
void event_notifier_init_fd(EventNotifier *e, int fd)
{
e->rfd = fd;
e->wfd = fd;
}
+#endif
int event_notifier_init(EventNotifier *e, int active)
{