From 817c60457f41e8643b612d451b3737433e9c7e0a Mon Sep 17 00:00:00 2001
From: Juan Quintela <quintela@redhat.com>
Date: Mon, 11 Feb 2013 15:11:10 +0100
Subject: migration: Improve QMP documentation

Signed-off-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
---
 qmp-commands.hx | 54 ++++++++++++++++++++++++++++++++----------------------
 1 file changed, 32 insertions(+), 22 deletions(-)

diff --git a/qmp-commands.hx b/qmp-commands.hx
index b370060848..5e847b18d9 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -644,7 +644,7 @@ EQMP
 
 SQMP
 migrate-set-cache-size
----------------------
+----------------------
 
 Set cache size to be used by XBZRLE migration, the cache size will be rounded
 down to the nearest power of 2
@@ -667,7 +667,7 @@ EQMP
 
 SQMP
 query-migrate-cache-size
----------------------
+------------------------
 
 Show cache size to be used by XBZRLE migration
 
@@ -2431,32 +2431,42 @@ The main json-object contains the following:
      - Possible values: "active", "completed", "failed", "cancelled"
 - "total-time": total amount of ms since migration started.  If
                 migration has ended, it returns the total migration
-		 time (json-int)
+                time (json-int)
 - "downtime": only present when migration has finished correctly
               total amount in ms for downtime that happened (json-int)
 - "expected-downtime": only present while migration is active
                 total amount in ms for downtime that was calculated on
-		the last bitmap round (json-int)
+                the last bitmap round (json-int)
 - "ram": only present if "status" is "active", it is a json-object with the
-  following RAM information (in bytes):
-         - "transferred": amount transferred (json-int)
-         - "remaining": amount remaining (json-int)
-         - "total": total (json-int)
-         - "duplicate": number of duplicated pages (json-int)
-         - "normal" : number of normal pages transferred (json-int)
-         - "normal-bytes" : number of normal bytes transferred (json-int)
+  following RAM information:
+         - "transferred": amount transferred in bytes (json-int)
+         - "remaining": amount remaining to transfer in bytes (json-int)
+         - "total": total amount of memory in bytes (json-int)
+         - "duplicate": number of pages filled entirely with the same
+            byte (json-int)
+            These are sent over the wire much more efficiently.
+         - "normal" : number of whole pages transfered.  I.e. they
+            were not sent as duplicate or xbzrle pages (json-int)
+         - "normal-bytes" : number of bytes transferred in whole
+            pages. This is just normal pages times size of one page,
+            but this way upper levels don't need to care about page
+            size (json-int)
 - "disk": only present if "status" is "active" and it is a block migration,
-  it is a json-object with the following disk information (in bytes):
-         - "transferred": amount transferred (json-int)
-         - "remaining": amount remaining (json-int)
-         - "total": total (json-int)
+  it is a json-object with the following disk information:
+         - "transferred": amount transferred in bytes (json-int)
+         - "remaining": amount remaining to transfer in bytes json-int)
+         - "total": total disk size in bytes (json-int)
 - "xbzrle-cache": only present if XBZRLE is active.
   It is a json-object with the following XBZRLE information:
-         - "cache-size": XBZRLE cache size
-         - "bytes": total XBZRLE bytes transferred
+         - "cache-size": XBZRLE cache size in bytes
+         - "bytes": number of bytes transferred for XBZRLE compressed pages
          - "pages": number of XBZRLE compressed pages
-         - "cache-miss": number of cache misses
-         - "overflow": number of XBZRLE overflows
+         - "cache-miss": number of XBRZRLE page cache misses
+         - "overflow": number of times XBZRLE overflows.  This means
+           that the XBZRLE encoding was bigger than just sent the
+           whole page, and then we sent the whole page instead (as as
+           normal page).
+
 Examples:
 
 1. Before the first migration
@@ -2567,11 +2577,11 @@ EQMP
 
 SQMP
 migrate-set-capabilities
--------
+------------------------
 
 Enable/Disable migration capabilities
 
-- "xbzrle": xbzrle support
+- "xbzrle": XBZRLE support
 
 Arguments:
 
@@ -2590,7 +2600,7 @@ EQMP
     },
 SQMP
 query-migrate-capabilities
--------
+--------------------------
 
 Query current migration capabilities
 
-- 
cgit v1.2.1


From e344b8a16de429ada3d9126f26e2a96d71348356 Mon Sep 17 00:00:00 2001
From: David Gibson <david@gibson.dropbear.id.au>
Date: Tue, 12 Mar 2013 14:06:00 +1100
Subject: savevm: Add VMSTATE_UINT64_EQUAL helpers

The savevm code already includes a number of *_EQUAL helpers which act as
sanity checks verifying that the configuration of the saved state matches
that of the machine we're loading into to work.  Variants already exist
for 8 bit 16 bit and 32 bit integers, but not 64 bit integers.  This patch
fills that hole, adding a UINT64 version.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Juan Quintela <quintela@redhat.com>
---
 include/migration/vmstate.h |  7 +++++++
 savevm.c                    | 21 +++++++++++++++++++++
 2 files changed, 28 insertions(+)

diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h
index 6666d27b25..f12ad79aba 100644
--- a/include/migration/vmstate.h
+++ b/include/migration/vmstate.h
@@ -149,6 +149,7 @@ extern const VMStateInfo vmstate_info_uint8_equal;
 extern const VMStateInfo vmstate_info_uint16_equal;
 extern const VMStateInfo vmstate_info_int32_equal;
 extern const VMStateInfo vmstate_info_uint32_equal;
+extern const VMStateInfo vmstate_info_uint64_equal;
 extern const VMStateInfo vmstate_info_int32_le;
 
 extern const VMStateInfo vmstate_info_uint8;
@@ -521,6 +522,12 @@ extern const VMStateInfo vmstate_info_bitmap;
 #define VMSTATE_UINT32_EQUAL(_f, _s)                                   \
     VMSTATE_SINGLE(_f, _s, 0, vmstate_info_uint32_equal, uint32_t)
 
+#define VMSTATE_UINT64_EQUAL_V(_f, _s, _v)                            \
+    VMSTATE_SINGLE(_f, _s, _v, vmstate_info_uint64_equal, uint64_t)
+
+#define VMSTATE_UINT64_EQUAL(_f, _s)                                  \
+    VMSTATE_UINT64_EQUAL_V(_f, _s, 0)
+
 #define VMSTATE_INT32_LE(_f, _s)                                   \
     VMSTATE_SINGLE(_f, _s, 0, vmstate_info_int32_le, int32_t)
 
diff --git a/savevm.c b/savevm.c
index 35c8d1e445..cd98b0db1d 100644
--- a/savevm.c
+++ b/savevm.c
@@ -1072,6 +1072,27 @@ const VMStateInfo vmstate_info_uint64 = {
     .put  = put_uint64,
 };
 
+/* 64 bit unsigned int. See that the received value is the same than the one
+   in the field */
+
+static int get_uint64_equal(QEMUFile *f, void *pv, size_t size)
+{
+    uint64_t *v = pv;
+    uint64_t v2;
+    qemu_get_be64s(f, &v2);
+
+    if (*v == v2) {
+        return 0;
+    }
+    return -EINVAL;
+}
+
+const VMStateInfo vmstate_info_uint64_equal = {
+    .name = "int64 equal",
+    .get  = get_uint64_equal,
+    .put  = put_uint64,
+};
+
 /* 8 bit int. See that the received value is the same than the one
    in the field */
 
-- 
cgit v1.2.1


From d58f5598342ffebe6c6278d8b90792060fca4792 Mon Sep 17 00:00:00 2001
From: David Gibson <david@gibson.dropbear.id.au>
Date: Tue, 12 Mar 2013 14:06:01 +1100
Subject: savevm: Add VMSTATE_UINTTL_EQUAL helper

This adds an _EQUAL VMSTATE helper for target_ulongs, defined in terms of
VMSTATE_UINT32_EQUAL or VMSTATE_UINT64_EQUAL as appropriate.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Juan Quintela <quintela@redhat.com>
---
 hw/hw.h                     | 6 ++++++
 include/migration/vmstate.h | 7 +++++--
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/hw/hw.h b/hw/hw.h
index 1553e54aa7..1fb9afa322 100644
--- a/hw/hw.h
+++ b/hw/hw.h
@@ -52,16 +52,22 @@ int qemu_boot_set(const char *boot_devices);
 #if TARGET_LONG_BITS == 64
 #define VMSTATE_UINTTL_V(_f, _s, _v)                                  \
     VMSTATE_UINT64_V(_f, _s, _v)
+#define VMSTATE_UINTTL_EQUAL_V(_f, _s, _v)                            \
+    VMSTATE_UINT64_EQUAL_V(_f, _s, _v)
 #define VMSTATE_UINTTL_ARRAY_V(_f, _s, _n, _v)                        \
     VMSTATE_UINT64_ARRAY_V(_f, _s, _n, _v)
 #else
 #define VMSTATE_UINTTL_V(_f, _s, _v)                                  \
     VMSTATE_UINT32_V(_f, _s, _v)
+#define VMSTATE_UINTTL_EQUAL_V(_f, _s, _v)                            \
+    VMSTATE_UINT32_EQUAL_V(_f, _s, _v)
 #define VMSTATE_UINTTL_ARRAY_V(_f, _s, _n, _v)                        \
     VMSTATE_UINT32_ARRAY_V(_f, _s, _n, _v)
 #endif
 #define VMSTATE_UINTTL(_f, _s)                                        \
     VMSTATE_UINTTL_V(_f, _s, 0)
+#define VMSTATE_UINTTL_EQUAL(_f, _s)                                  \
+    VMSTATE_UINTTL_EQUAL_V(_f, _s, 0)
 #define VMSTATE_UINTTL_ARRAY(_f, _s, _n)                              \
     VMSTATE_UINTTL_ARRAY_V(_f, _s, _n, 0)
 
diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h
index f12ad79aba..294d455b7b 100644
--- a/include/migration/vmstate.h
+++ b/include/migration/vmstate.h
@@ -519,8 +519,11 @@ extern const VMStateInfo vmstate_info_bitmap;
 #define VMSTATE_INT32_EQUAL(_f, _s)                                   \
     VMSTATE_SINGLE(_f, _s, 0, vmstate_info_int32_equal, int32_t)
 
-#define VMSTATE_UINT32_EQUAL(_f, _s)                                   \
-    VMSTATE_SINGLE(_f, _s, 0, vmstate_info_uint32_equal, uint32_t)
+#define VMSTATE_UINT32_EQUAL_V(_f, _s, _v)                            \
+    VMSTATE_SINGLE(_f, _s, _v, vmstate_info_uint32_equal, uint32_t)
+
+#define VMSTATE_UINT32_EQUAL(_f, _s)                                  \
+    VMSTATE_UINT32_EQUAL_V(_f, _s, 0)
 
 #define VMSTATE_UINT64_EQUAL_V(_f, _s, _v)                            \
     VMSTATE_SINGLE(_f, _s, _v, vmstate_info_uint64_equal, uint64_t)
-- 
cgit v1.2.1


From 213945e4d753b5f214468ff746d65fa76e21dbd1 Mon Sep 17 00:00:00 2001
From: David Gibson <david@gibson.dropbear.id.au>
Date: Tue, 12 Mar 2013 14:06:02 +1100
Subject: savevm: Add VMSTATE_FLOAT64 helpers

The current savevm code includes VMSTATE helpers for a number of commonly
used data types, but not for the float64 type used by the internal floating
point emulation code.  This patch fixes the deficiency.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Juan Quintela <quintela@redhat.com>
---
 include/migration/vmstate.h | 15 +++++++++++++++
 savevm.c                    | 23 +++++++++++++++++++++++
 2 files changed, 38 insertions(+)

diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h
index 294d455b7b..5c31ff1c9b 100644
--- a/include/migration/vmstate.h
+++ b/include/migration/vmstate.h
@@ -157,6 +157,8 @@ extern const VMStateInfo vmstate_info_uint16;
 extern const VMStateInfo vmstate_info_uint32;
 extern const VMStateInfo vmstate_info_uint64;
 
+extern const VMStateInfo vmstate_info_float64;
+
 extern const VMStateInfo vmstate_info_timer;
 extern const VMStateInfo vmstate_info_buffer;
 extern const VMStateInfo vmstate_info_unused_buffer;
@@ -543,6 +545,13 @@ extern const VMStateInfo vmstate_info_bitmap;
 #define VMSTATE_UINT32_TEST(_f, _s, _t)                                  \
     VMSTATE_SINGLE_TEST(_f, _s, _t, 0, vmstate_info_uint32, uint32_t)
 
+
+#define VMSTATE_FLOAT64_V(_f, _s, _v)                                 \
+    VMSTATE_SINGLE(_f, _s, _v, vmstate_info_float64, float64)
+
+#define VMSTATE_FLOAT64(_f, _s)                                       \
+    VMSTATE_FLOAT64_V(_f, _s, 0)
+
 #define VMSTATE_TIMER_TEST(_f, _s, _test)                             \
     VMSTATE_POINTER_TEST(_f, _s, _test, vmstate_info_timer, QEMUTimer *)
 
@@ -609,6 +618,12 @@ extern const VMStateInfo vmstate_info_bitmap;
 #define VMSTATE_INT64_ARRAY(_f, _s, _n)                               \
     VMSTATE_INT64_ARRAY_V(_f, _s, _n, 0)
 
+#define VMSTATE_FLOAT64_ARRAY_V(_f, _s, _n, _v)                       \
+    VMSTATE_ARRAY(_f, _s, _n, _v, vmstate_info_float64, float64)
+
+#define VMSTATE_FLOAT64_ARRAY(_f, _s, _n)                             \
+    VMSTATE_FLOAT64_ARRAY_V(_f, _s, _n, 0)
+
 #define VMSTATE_BUFFER_V(_f, _s, _v)                                  \
     VMSTATE_STATIC_BUFFER(_f, _s, _v, NULL, 0, sizeof(typeof_field(_s, _f)))
 
diff --git a/savevm.c b/savevm.c
index cd98b0db1d..8f1344a959 100644
--- a/savevm.c
+++ b/savevm.c
@@ -1133,6 +1133,29 @@ const VMStateInfo vmstate_info_uint16_equal = {
     .put  = put_uint16,
 };
 
+/* floating point */
+
+static int get_float64(QEMUFile *f, void *pv, size_t size)
+{
+    float64 *v = pv;
+
+    *v = make_float64(qemu_get_be64(f));
+    return 0;
+}
+
+static void put_float64(QEMUFile *f, void *pv, size_t size)
+{
+    uint64_t *v = pv;
+
+    qemu_put_be64(f, float64_val(*v));
+}
+
+const VMStateInfo vmstate_info_float64 = {
+    .name = "float64",
+    .get  = get_float64,
+    .put  = put_float64,
+};
+
 /* timers  */
 
 static int get_timer(QEMUFile *f, void *pv, size_t size)
-- 
cgit v1.2.1


From 8474a9dd6757be064bf4b35f422b4640d1cca0a5 Mon Sep 17 00:00:00 2001
From: David Gibson <david@gibson.dropbear.id.au>
Date: Tue, 12 Mar 2013 14:06:03 +1100
Subject: savevm: Add VMSTATE_STRUCT_VARRAY_POINTER_UINT32

Currently the savevm code contains a VMSTATE_STRUCT_VARRAY_POINTER_INT32
helper (a variably sized array with the number of elements in an int32_t),
but not VMSTATE_STRUCT_VARRAY_POINTER_UINT32 (... with the number of
elements in a uint32_t).  This patch (trivially) fixes the deficiency.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Juan Quintela <quintela@redhat.com>
---
 include/migration/vmstate.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h
index 5c31ff1c9b..d8af9f2cb3 100644
--- a/include/migration/vmstate.h
+++ b/include/migration/vmstate.h
@@ -343,6 +343,16 @@ extern const VMStateInfo vmstate_info_bitmap;
     .offset     = vmstate_offset_pointer(_state, _field, _type),     \
 }
 
+#define VMSTATE_STRUCT_VARRAY_POINTER_UINT32(_field, _state, _field_num, _vmsd, _type) { \
+    .name       = (stringify(_field)),                               \
+    .version_id = 0,                                                 \
+    .num_offset = vmstate_offset_value(_state, _field_num, uint32_t),\
+    .size       = sizeof(_type),                                     \
+    .vmsd       = &(_vmsd),                                          \
+    .flags      = VMS_POINTER | VMS_VARRAY_INT32 | VMS_STRUCT,       \
+    .offset     = vmstate_offset_pointer(_state, _field, _type),     \
+}
+
 #define VMSTATE_STRUCT_VARRAY_POINTER_UINT16(_field, _state, _field_num, _vmsd, _type) { \
     .name       = (stringify(_field)),                               \
     .version_id = 0,                                                 \
-- 
cgit v1.2.1


From 377e2cb96b76c2b0023c1acc7230bf3a9e9f9f40 Mon Sep 17 00:00:00 2001
From: David Gibson <david@gibson.dropbear.id.au>
Date: Tue, 12 Mar 2013 14:06:04 +1100
Subject: savevm: Fix bugs in the VMSTATE_VBUFFER_MULTIPLY definition

The VMSTATE_BUFFER_MULTIPLY macro is misnamed - it actually specifies
a variably sized buffer with VMS_VBUFFER, so should be named
VMSTATE_VBUFFER_MULTIPLY.  This patch fixes this (the macro had no current
users under either name).

In addition, unlike the other VMSTATE_VBUFFER variants, this macro did not
specify VMS_POINTER.  This patch fixes this bug as well.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Juan Quintela <quintela@redhat.com>
---
 include/migration/vmstate.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h
index d8af9f2cb3..65918a9abe 100644
--- a/include/migration/vmstate.h
+++ b/include/migration/vmstate.h
@@ -393,14 +393,14 @@ extern const VMStateInfo vmstate_info_bitmap;
     .offset       = vmstate_offset_buffer(_state, _field) + _start,  \
 }
 
-#define VMSTATE_BUFFER_MULTIPLY(_field, _state, _version, _test, _start, _field_size, _multiply) { \
+#define VMSTATE_VBUFFER_MULTIPLY(_field, _state, _version, _test, _start, _field_size, _multiply) { \
     .name         = (stringify(_field)),                             \
     .version_id   = (_version),                                      \
     .field_exists = (_test),                                         \
     .size_offset  = vmstate_offset_value(_state, _field_size, uint32_t),\
     .size         = (_multiply),                                      \
     .info         = &vmstate_info_buffer,                            \
-    .flags        = VMS_VBUFFER|VMS_MULTIPLY,                        \
+    .flags        = VMS_VBUFFER|VMS_POINTER|VMS_MULTIPLY,            \
     .offset       = offsetof(_state, _field),                        \
     .start        = (_start),                                        \
 }
-- 
cgit v1.2.1


From c61ca00ada744eb24825be2ba4d6ba8fe3a870a4 Mon Sep 17 00:00:00 2001
From: Peter Lieven <pl@kamp.de>
Date: Tue, 26 Mar 2013 10:58:30 +0100
Subject: move vector definitions to qemu-common.h

vector optimizations will now be used at various places
not just in is_dup_page() in arch_init.c

Signed-off-by: Peter Lieven <pl@kamp.de>
Signed-off-by: Juan Quintela <quintela@redhat.com>
---
 arch_init.c           | 20 --------------------
 include/qemu-common.h | 21 +++++++++++++++++++++
 2 files changed, 21 insertions(+), 20 deletions(-)

diff --git a/arch_init.c b/arch_init.c
index e8ade9e639..35974c2828 100644
--- a/arch_init.c
+++ b/arch_init.c
@@ -116,26 +116,6 @@ const uint32_t arch_type = QEMU_ARCH;
 #define RAM_SAVE_FLAG_CONTINUE 0x20
 #define RAM_SAVE_FLAG_XBZRLE   0x40
 
-#ifdef __ALTIVEC__
-#include <altivec.h>
-#define VECTYPE        vector unsigned char
-#define SPLAT(p)       vec_splat(vec_ld(0, p), 0)
-#define ALL_EQ(v1, v2) vec_all_eq(v1, v2)
-/* altivec.h may redefine the bool macro as vector type.
- * Reset it to POSIX semantics. */
-#undef bool
-#define bool _Bool
-#elif defined __SSE2__
-#include <emmintrin.h>
-#define VECTYPE        __m128i
-#define SPLAT(p)       _mm_set1_epi8(*(p))
-#define ALL_EQ(v1, v2) (_mm_movemask_epi8(_mm_cmpeq_epi8(v1, v2)) == 0xFFFF)
-#else
-#define VECTYPE        unsigned long
-#define SPLAT(p)       (*(p) * (~0UL / 255))
-#define ALL_EQ(v1, v2) ((v1) == (v2))
-#endif
-
 
 static struct defconfig_file {
     const char *filename;
diff --git a/include/qemu-common.h b/include/qemu-common.h
index 2371132c11..d7ad3a70f1 100644
--- a/include/qemu-common.h
+++ b/include/qemu-common.h
@@ -448,4 +448,25 @@ int uleb128_decode_small(const uint8_t *in, uint32_t *n);
 
 void hexdump(const char *buf, FILE *fp, const char *prefix, size_t size);
 
+/* vector definitions */
+#ifdef __ALTIVEC__
+#include <altivec.h>
+#define VECTYPE        vector unsigned char
+#define SPLAT(p)       vec_splat(vec_ld(0, p), 0)
+#define ALL_EQ(v1, v2) vec_all_eq(v1, v2)
+/* altivec.h may redefine the bool macro as vector type.
+ * Reset it to POSIX semantics. */
+#undef bool
+#define bool _Bool
+#elif defined __SSE2__
+#include <emmintrin.h>
+#define VECTYPE        __m128i
+#define SPLAT(p)       _mm_set1_epi8(*(p))
+#define ALL_EQ(v1, v2) (_mm_movemask_epi8(_mm_cmpeq_epi8(v1, v2)) == 0xFFFF)
+#else
+#define VECTYPE        unsigned long
+#define SPLAT(p)       (*(p) * (~0UL / 255))
+#define ALL_EQ(v1, v2) ((v1) == (v2))
+#endif
+
 #endif
-- 
cgit v1.2.1


From 41a259bd2b1796ddabdae600ee539269a7ddb6a5 Mon Sep 17 00:00:00 2001
From: Peter Lieven <pl@kamp.de>
Date: Tue, 26 Mar 2013 10:58:32 +0100
Subject: cutils: add a function to find non-zero content in a buffer

this adds buffer_find_nonzero_offset() which is a SSE2/Altivec
optimized function that searches for non-zero content in a
buffer.

the function starts full unrolling only after the first few chunks have
been checked one by one. analyzing real memory page data has revealed
that non-zero pages are non-zero within the first 256-512 bits in
most cases. as this function is also heavily used to check for zero memory
pages this tweak has been made to avoid the high setup costs of the fully
unrolled check for non-zero pages.

due to the optimizations used in the function there are restrictions
on buffer address and search length. the function
can_use_buffer_find_nonzero_content() can be used to check if
the function can be used safely.

Signed-off-by: Peter Lieven <pl@kamp.de>
Signed-off-by: Juan Quintela <quintela@redhat.com>
---
 include/qemu-common.h | 10 ++++++++++
 util/cutils.c         | 55 +++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 65 insertions(+)

diff --git a/include/qemu-common.h b/include/qemu-common.h
index d7ad3a70f1..31fff22f32 100644
--- a/include/qemu-common.h
+++ b/include/qemu-common.h
@@ -469,4 +469,14 @@ void hexdump(const char *buf, FILE *fp, const char *prefix, size_t size);
 #define ALL_EQ(v1, v2) ((v1) == (v2))
 #endif
 
+#define BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR 8
+static inline bool
+can_use_buffer_find_nonzero_offset(const void *buf, size_t len)
+{
+    return (len % (BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR
+                   * sizeof(VECTYPE)) == 0
+            && ((uintptr_t) buf) % sizeof(VECTYPE) == 0);
+}
+size_t buffer_find_nonzero_offset(const void *buf, size_t len);
+
 #endif
diff --git a/util/cutils.c b/util/cutils.c
index 1439da4f99..0696a3ba5e 100644
--- a/util/cutils.c
+++ b/util/cutils.c
@@ -142,6 +142,61 @@ int qemu_fdatasync(int fd)
 #endif
 }
 
+/*
+ * Searches for an area with non-zero content in a buffer
+ *
+ * Attention! The len must be a multiple of
+ * BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR * sizeof(VECTYPE)
+ * and addr must be a multiple of sizeof(VECTYPE) due to
+ * restriction of optimizations in this function.
+ *
+ * can_use_buffer_find_nonzero_offset() can be used to check
+ * these requirements.
+ *
+ * The return value is the offset of the non-zero area rounded
+ * down to a multiple of sizeof(VECTYPE) for the first
+ * BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR chunks and down to
+ * BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR * sizeof(VECTYPE)
+ * afterwards.
+ *
+ * If the buffer is all zero the return value is equal to len.
+ */
+
+size_t buffer_find_nonzero_offset(const void *buf, size_t len)
+{
+    const VECTYPE *p = buf;
+    const VECTYPE zero = (VECTYPE){0};
+    size_t i;
+
+    assert(can_use_buffer_find_nonzero_offset(buf, len));
+
+    if (!len) {
+        return 0;
+    }
+
+    for (i = 0; i < BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR; i++) {
+        if (!ALL_EQ(p[i], zero)) {
+            return i * sizeof(VECTYPE);
+        }
+    }
+
+    for (i = BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR;
+         i < len / sizeof(VECTYPE);
+         i += BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR) {
+        VECTYPE tmp0 = p[i + 0] | p[i + 1];
+        VECTYPE tmp1 = p[i + 2] | p[i + 3];
+        VECTYPE tmp2 = p[i + 4] | p[i + 5];
+        VECTYPE tmp3 = p[i + 6] | p[i + 7];
+        VECTYPE tmp01 = tmp0 | tmp1;
+        VECTYPE tmp23 = tmp2 | tmp3;
+        if (!ALL_EQ(tmp01 | tmp23, zero)) {
+            break;
+        }
+    }
+
+    return i * sizeof(VECTYPE);
+}
+
 /*
  * Checks if a buffer is all zeroes
  *
-- 
cgit v1.2.1


From 56ded708ec38e4cb75a7c7357480ca34c0dc6875 Mon Sep 17 00:00:00 2001
From: Peter Lieven <pl@kamp.de>
Date: Tue, 26 Mar 2013 10:58:33 +0100
Subject: buffer_is_zero: use vector optimizations if possible

performance gain on SSE2 is approx. 20-25%. altivec
is not tested. performance for unsigned long arithmetic
is unchanged.

Signed-off-by: Peter Lieven <pl@kamp.de>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Orit Wasserman <owasserm@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
---
 util/cutils.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/util/cutils.c b/util/cutils.c
index 0696a3ba5e..5024253405 100644
--- a/util/cutils.c
+++ b/util/cutils.c
@@ -215,6 +215,11 @@ bool buffer_is_zero(const void *buf, size_t len)
     long d0, d1, d2, d3;
     const long * const data = buf;
 
+    /* use vector optimized zero check if possible */
+    if (can_use_buffer_find_nonzero_offset(buf, len)) {
+        return buffer_find_nonzero_offset(buf, len) == len;
+    }
+
     assert(len % (4 * sizeof(long)) == 0);
     len /= sizeof(long);
 
-- 
cgit v1.2.1


From 49f676a00ab540fac1d2008be26434cf85607722 Mon Sep 17 00:00:00 2001
From: Peter Lieven <pl@kamp.de>
Date: Tue, 26 Mar 2013 10:58:34 +0100
Subject: bitops: unroll while loop in find_next_bit()

this patch adopts the loop unrolling idea of bitmap_is_zero() to
speed up the skipping of large areas with zeros in find_next_bit().

this routine is extensively used to find dirty pages in
live migration.

testing only the find_next_bit performance on a zeroed bitfield
the loop onrolling decreased executing time by approx. 50% on x86_64.

Signed-off-by: Peter Lieven <pl@kamp.de>
Signed-off-by: Juan Quintela <quintela@redhat.com>
---
 util/bitops.c | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/util/bitops.c b/util/bitops.c
index e72237ab2b..227c38b883 100644
--- a/util/bitops.c
+++ b/util/bitops.c
@@ -42,7 +42,23 @@ unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
         size -= BITS_PER_LONG;
         result += BITS_PER_LONG;
     }
-    while (size & ~(BITS_PER_LONG-1)) {
+    while (size >= 4*BITS_PER_LONG) {
+        unsigned long d1, d2, d3;
+        tmp = *p;
+        d1 = *(p+1);
+        d2 = *(p+2);
+        d3 = *(p+3);
+        if (tmp) {
+            goto found_middle;
+        }
+        if (d1 | d2 | d3) {
+            break;
+        }
+        p += 4;
+        result += 4*BITS_PER_LONG;
+        size -= 4*BITS_PER_LONG;
+    }
+    while (size >= BITS_PER_LONG) {
         if ((tmp = *(p++))) {
             goto found_middle;
         }
-- 
cgit v1.2.1


From 3edcd7e6ebae3ef0ac178eed5f4225803159562d Mon Sep 17 00:00:00 2001
From: Peter Lieven <pl@kamp.de>
Date: Tue, 26 Mar 2013 10:58:35 +0100
Subject: migration: search for zero instead of dup pages

virtually all dup pages are zero pages. remove
the special is_dup_page() function and use the
optimized buffer_find_nonzero_offset() function
instead.

here buffer_find_nonzero_offset() is used directly
to avoid the unnecssary additional checks in
buffer_is_zero().

raw performace gain checking 1 GByte zeroed memory
over is_dup_page() is approx. 10-12% with SSE2
and 8-10% with unsigned long arithmedtic.

Signed-off-by: Peter Lieven <pl@kamp.de>
Reviewed-by: Orit Wasserman <owasserm@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
---
 arch_init.c | 21 ++++++---------------
 1 file changed, 6 insertions(+), 15 deletions(-)

diff --git a/arch_init.c b/arch_init.c
index 35974c2828..dd5deffa91 100644
--- a/arch_init.c
+++ b/arch_init.c
@@ -146,19 +146,10 @@ int qemu_read_default_config_files(bool userconfig)
     return 0;
 }
 
-static int is_dup_page(uint8_t *page)
+static inline bool is_zero_page(uint8_t *p)
 {
-    VECTYPE *p = (VECTYPE *)page;
-    VECTYPE val = SPLAT(page);
-    int i;
-
-    for (i = 0; i < TARGET_PAGE_SIZE / sizeof(VECTYPE); i++) {
-        if (!ALL_EQ(val, p[i])) {
-            return 0;
-        }
-    }
-
-    return 1;
+    return buffer_find_nonzero_offset(p, TARGET_PAGE_SIZE) ==
+        TARGET_PAGE_SIZE;
 }
 
 /* struct contains XBZRLE cache and a static page
@@ -445,12 +436,12 @@ static int ram_save_block(QEMUFile *f, bool last_stage)
 
             /* In doubt sent page as normal */
             bytes_sent = -1;
-            if (is_dup_page(p)) {
+            if (is_zero_page(p)) {
                 acct_info.dup_pages++;
                 bytes_sent = save_block_hdr(f, block, offset, cont,
                                             RAM_SAVE_FLAG_COMPRESS);
-                qemu_put_byte(f, *p);
-                bytes_sent += 1;
+                qemu_put_byte(f, 0);
+                bytes_sent++;
             } else if (migrate_use_xbzrle()) {
                 current_addr = block->offset + offset;
                 bytes_sent = save_xbzrle_page(f, p, current_addr, block,
-- 
cgit v1.2.1


From 78d07ae7ac74bcc7f79aeefbaff17fb142f44b4d Mon Sep 17 00:00:00 2001
From: Peter Lieven <pl@kamp.de>
Date: Tue, 26 Mar 2013 10:58:36 +0100
Subject: migration: add an indicator for bulk state of ram migration

the first round of ram transfer is special since all pages
are dirty and thus all memory pages are transferred to
the target. this patch adds a boolean variable to track
this stage.

Signed-off-by: Peter Lieven <pl@kamp.de>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Orit Wasserman <owasserm@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
---
 arch_init.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch_init.c b/arch_init.c
index dd5deffa91..1291bd2b3a 100644
--- a/arch_init.c
+++ b/arch_init.c
@@ -319,6 +319,7 @@ static ram_addr_t last_offset;
 static unsigned long *migration_bitmap;
 static uint64_t migration_dirty_pages;
 static uint32_t last_version;
+static bool ram_bulk_stage;
 
 static inline
 ram_addr_t migration_bitmap_find_and_reset_dirty(MemoryRegion *mr,
@@ -426,6 +427,7 @@ static int ram_save_block(QEMUFile *f, bool last_stage)
             if (!block) {
                 block = QTAILQ_FIRST(&ram_list.blocks);
                 complete_round = true;
+                ram_bulk_stage = false;
             }
         } else {
             uint8_t *p;
@@ -529,6 +531,7 @@ static void reset_ram_globals(void)
     last_sent_block = NULL;
     last_offset = 0;
     last_version = ram_list.version;
+    ram_bulk_stage = true;
 }
 
 #define MAX_WAIT 50 /* ms, half buffered_file limit */
-- 
cgit v1.2.1


From f1c72795af573b24a7da5eb52375c9aba8a37972 Mon Sep 17 00:00:00 2001
From: Peter Lieven <pl@kamp.de>
Date: Tue, 26 Mar 2013 10:58:37 +0100
Subject: migration: do not sent zero pages in bulk stage

during bulk stage of ram migration if a page is a
zero page do not send it at all.
the memory at the destination reads as zero anyway.

even if there is an madvise with QEMU_MADV_DONTNEED
at the target upon receipt of a zero page I have observed
that the target starts swapping if the memory is overcommitted.
it seems that the pages are dropped asynchronously.

this patch also updates QMP to return the number of
skipped pages in MigrationStats.

Signed-off-by: Peter Lieven <pl@kamp.de>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
---
 arch_init.c                   | 24 ++++++++++++++++++++----
 hmp.c                         |  2 ++
 include/migration/migration.h |  2 ++
 migration.c                   |  3 ++-
 qapi-schema.json              |  8 +++++---
 qmp-commands.hx               |  1 +
 6 files changed, 32 insertions(+), 8 deletions(-)

diff --git a/arch_init.c b/arch_init.c
index 1291bd2b3a..3a0d02eafa 100644
--- a/arch_init.c
+++ b/arch_init.c
@@ -183,6 +183,7 @@ int64_t xbzrle_cache_resize(int64_t new_size)
 /* accounting for migration statistics */
 typedef struct AccountingInfo {
     uint64_t dup_pages;
+    uint64_t skipped_pages;
     uint64_t norm_pages;
     uint64_t iterations;
     uint64_t xbzrle_bytes;
@@ -208,6 +209,16 @@ uint64_t dup_mig_pages_transferred(void)
     return acct_info.dup_pages;
 }
 
+uint64_t skipped_mig_bytes_transferred(void)
+{
+    return acct_info.skipped_pages * TARGET_PAGE_SIZE;
+}
+
+uint64_t skipped_mig_pages_transferred(void)
+{
+    return acct_info.skipped_pages;
+}
+
 uint64_t norm_mig_bytes_transferred(void)
 {
     return acct_info.norm_pages * TARGET_PAGE_SIZE;
@@ -440,10 +451,15 @@ static int ram_save_block(QEMUFile *f, bool last_stage)
             bytes_sent = -1;
             if (is_zero_page(p)) {
                 acct_info.dup_pages++;
-                bytes_sent = save_block_hdr(f, block, offset, cont,
-                                            RAM_SAVE_FLAG_COMPRESS);
-                qemu_put_byte(f, 0);
-                bytes_sent++;
+                if (!ram_bulk_stage) {
+                    bytes_sent = save_block_hdr(f, block, offset, cont,
+                                                RAM_SAVE_FLAG_COMPRESS);
+                    qemu_put_byte(f, 0);
+                    bytes_sent++;
+                } else {
+                    acct_info.skipped_pages++;
+                    bytes_sent = 0;
+                }
             } else if (migrate_use_xbzrle()) {
                 current_addr = block->offset + offset;
                 bytes_sent = save_xbzrle_page(f, p, current_addr, block,
diff --git a/hmp.c b/hmp.c
index b0a861cfbb..e3e833edf4 100644
--- a/hmp.c
+++ b/hmp.c
@@ -173,6 +173,8 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict)
                        info->ram->total >> 10);
         monitor_printf(mon, "duplicate: %" PRIu64 " pages\n",
                        info->ram->duplicate);
+        monitor_printf(mon, "skipped: %" PRIu64 " pages\n",
+                       info->ram->skipped);
         monitor_printf(mon, "normal: %" PRIu64 " pages\n",
                        info->ram->normal);
         monitor_printf(mon, "normal bytes: %" PRIu64 " kbytes\n",
diff --git a/include/migration/migration.h b/include/migration/migration.h
index bb617fdacf..e2acec64c0 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -96,6 +96,8 @@ extern SaveVMHandlers savevm_ram_handlers;
 
 uint64_t dup_mig_bytes_transferred(void);
 uint64_t dup_mig_pages_transferred(void);
+uint64_t skipped_mig_bytes_transferred(void);
+uint64_t skipped_mig_pages_transferred(void);
 uint64_t norm_mig_bytes_transferred(void);
 uint64_t norm_mig_pages_transferred(void);
 uint64_t xbzrle_mig_bytes_transferred(void);
diff --git a/migration.c b/migration.c
index 185d11260d..7fb2147391 100644
--- a/migration.c
+++ b/migration.c
@@ -197,11 +197,11 @@ MigrationInfo *qmp_query_migrate(Error **errp)
         info->ram->remaining = ram_bytes_remaining();
         info->ram->total = ram_bytes_total();
         info->ram->duplicate = dup_mig_pages_transferred();
+        info->ram->skipped = skipped_mig_pages_transferred();
         info->ram->normal = norm_mig_pages_transferred();
         info->ram->normal_bytes = norm_mig_bytes_transferred();
         info->ram->dirty_pages_rate = s->dirty_pages_rate;
 
-
         if (blk_mig_active()) {
             info->has_disk = true;
             info->disk = g_malloc0(sizeof(*info->disk));
@@ -227,6 +227,7 @@ MigrationInfo *qmp_query_migrate(Error **errp)
         info->ram->remaining = 0;
         info->ram->total = ram_bytes_total();
         info->ram->duplicate = dup_mig_pages_transferred();
+        info->ram->skipped = skipped_mig_pages_transferred();
         info->ram->normal = norm_mig_pages_transferred();
         info->ram->normal_bytes = norm_mig_bytes_transferred();
         break;
diff --git a/qapi-schema.json b/qapi-schema.json
index 088f4e150f..6c4966be43 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -496,7 +496,9 @@
 #
 # @total: total amount of bytes involved in the migration process
 #
-# @duplicate: number of duplicate pages (since 1.2)
+# @duplicate: number of duplicate (zero) pages (since 1.2)
+#
+# @skipped: number of skipped zero pages (since 1.5)
 #
 # @normal : number of normal pages (since 1.2)
 #
@@ -509,8 +511,8 @@
 ##
 { 'type': 'MigrationStats',
   'data': {'transferred': 'int', 'remaining': 'int', 'total': 'int' ,
-           'duplicate': 'int', 'normal': 'int', 'normal-bytes': 'int',
-           'dirty-pages-rate' : 'int' } }
+           'duplicate': 'int', 'skipped': 'int', 'normal': 'int',
+           'normal-bytes': 'int', 'dirty-pages-rate' : 'int' } }
 
 ##
 # @XBZRLECacheStats
diff --git a/qmp-commands.hx b/qmp-commands.hx
index 5e847b18d9..fdc40486f6 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -2445,6 +2445,7 @@ The main json-object contains the following:
          - "duplicate": number of pages filled entirely with the same
             byte (json-int)
             These are sent over the wire much more efficiently.
+         - "skipped": number of skipped zero pages (json-int)
          - "normal" : number of whole pages transfered.  I.e. they
             were not sent as duplicate or xbzrle pages (json-int)
          - "normal-bytes" : number of bytes transferred in whole
-- 
cgit v1.2.1


From 70c8652bf3c1fea79b7b68864e86926715c49261 Mon Sep 17 00:00:00 2001
From: Peter Lieven <pl@kamp.de>
Date: Tue, 26 Mar 2013 10:58:38 +0100
Subject: migration: do not search dirty pages in bulk stage

avoid searching for dirty pages just increment the
page offset. all pages are dirty anyway.

Signed-off-by: Peter Lieven <pl@kamp.de>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Orit Wasserman <owasserm@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
---
 arch_init.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/arch_init.c b/arch_init.c
index 3a0d02eafa..a522735dd1 100644
--- a/arch_init.c
+++ b/arch_init.c
@@ -340,7 +340,13 @@ ram_addr_t migration_bitmap_find_and_reset_dirty(MemoryRegion *mr,
     unsigned long nr = base + (start >> TARGET_PAGE_BITS);
     unsigned long size = base + (int128_get64(mr->size) >> TARGET_PAGE_BITS);
 
-    unsigned long next = find_next_bit(migration_bitmap, size, nr);
+    unsigned long next;
+
+    if (ram_bulk_stage && nr > base) {
+        next = nr + 1;
+    } else {
+        next = find_next_bit(migration_bitmap, size, nr);
+    }
 
     if (next < size) {
         clear_bit(next, migration_bitmap);
-- 
cgit v1.2.1


From 5cc11c46cf187c7d5306b68e730ec0d372cd7ef0 Mon Sep 17 00:00:00 2001
From: Peter Lieven <pl@kamp.de>
Date: Tue, 26 Mar 2013 10:58:39 +0100
Subject: migration: use XBZRLE only after bulk stage

at the beginning of migration all pages are marked dirty and
in the first round a bulk migration of all pages is performed.

currently all these pages are copied to the page cache regardless
of whether they are frequently updated or not. this doesn't make sense
since most of these pages are never transferred again.

this patch changes the XBZRLE transfer to only be used after
the bulk stage has been completed. that means a page is added
to the page cache the second time it is transferred and XBZRLE
can benefit from the third time of transfer.

since the page cache is likely smaller than the number of pages
it's also likely that in the second round the page is missing in the
cache due to collisions in the bulk phase.

on the other hand a lot of unnecessary mallocs, memdups and frees
are saved.

the following results have been taken earlier while executing
the test program from docs/xbzrle.txt. (+) with the patch and (-)
without. (thanks to Eric Blake for reformatting and comments)

+ total time: 22185 milliseconds
- total time: 22410 milliseconds

Shaved 0.3 seconds, better than 1%!

+ downtime: 29 milliseconds
- downtime: 21 milliseconds

Not sure why downtime seemed worse, but probably not the end of the world.

+ transferred ram: 706034 kbytes
- transferred ram: 721318 kbytes

Fewer bytes sent - good.

+ remaining ram: 0 kbytes
- remaining ram: 0 kbytes
+ total ram: 1057216 kbytes
- total ram: 1057216 kbytes
+ duplicate: 108556 pages
- duplicate: 105553 pages
+ normal: 175146 pages
- normal: 179589 pages
+ normal bytes: 700584 kbytes
- normal bytes: 718356 kbytes

Fewer normal bytes...

+ cache size: 67108864 bytes
- cache size: 67108864 bytes
+ xbzrle transferred: 3127 kbytes
- xbzrle transferred: 630 kbytes

...and more compressed pages sent - good.

+ xbzrle pages: 117811 pages
- xbzrle pages: 21527 pages
+ xbzrle cache miss: 18750
- xbzrle cache miss: 179589

And very good improvement on the cache miss rate.

+ xbzrle overflow : 0
- xbzrle overflow : 0

Signed-off-by: Peter Lieven <pl@kamp.de>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Orit Wasserman <owasserm@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
---
 arch_init.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch_init.c b/arch_init.c
index a522735dd1..e1af89875e 100644
--- a/arch_init.c
+++ b/arch_init.c
@@ -466,7 +466,7 @@ static int ram_save_block(QEMUFile *f, bool last_stage)
                     acct_info.skipped_pages++;
                     bytes_sent = 0;
                 }
-            } else if (migrate_use_xbzrle()) {
+            } else if (!ram_bulk_stage && migrate_use_xbzrle()) {
                 current_addr = block->offset + offset;
                 bytes_sent = save_xbzrle_page(f, p, current_addr, block,
                                               offset, cont, last_stage);
-- 
cgit v1.2.1


From d913829f0fd8451abcb1fd9d6dfce5586d9d7e10 Mon Sep 17 00:00:00 2001
From: Orit Wasserman <owasserm@redhat.com>
Date: Fri, 22 Mar 2013 16:47:57 +0200
Subject: Add QemuFileWritevBuffer QemuFileOps

This will allow us to write an iovec

Signed-off-by: Orit Wasserman <owasserm@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
---
 include/migration/qemu-file.h | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/include/migration/qemu-file.h b/include/migration/qemu-file.h
index df812617f8..8b8070fbbf 100644
--- a/include/migration/qemu-file.h
+++ b/include/migration/qemu-file.h
@@ -51,11 +51,18 @@ typedef int (QEMUFileCloseFunc)(void *opaque);
  */
 typedef int (QEMUFileGetFD)(void *opaque);
 
+/*
+ * This function writes an iovec to file.
+ */
+typedef ssize_t (QEMUFileWritevBufferFunc)(void *opaque, struct iovec *iov,
+                                           int iovcnt);
+
 typedef struct QEMUFileOps {
     QEMUFilePutBufferFunc *put_buffer;
     QEMUFileGetBufferFunc *get_buffer;
     QEMUFileCloseFunc *close;
     QEMUFileGetFD *get_fd;
+    QEMUFileWritevBufferFunc *writev_buffer;
 } QEMUFileOps;
 
 QEMUFile *qemu_fopen_ops(void *opaque, const QEMUFileOps *ops);
-- 
cgit v1.2.1


From 28085f7b4d06970efa004257fcef013caf495a08 Mon Sep 17 00:00:00 2001
From: Orit Wasserman <owasserm@redhat.com>
Date: Fri, 22 Mar 2013 16:47:58 +0200
Subject: Add socket_writev_buffer function

Signed-off-by: Orit Wasserman <owasserm@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
---
 savevm.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/savevm.c b/savevm.c
index 8f1344a959..3466ef4aac 100644
--- a/savevm.c
+++ b/savevm.c
@@ -39,6 +39,7 @@
 #include "qmp-commands.h"
 #include "trace.h"
 #include "qemu/bitops.h"
+#include "qemu/iov.h"
 
 #define SELF_ANNOUNCE_ROUNDS 5
 
@@ -171,6 +172,19 @@ static void coroutine_fn yield_until_fd_readable(int fd)
     qemu_coroutine_yield();
 }
 
+static ssize_t socket_writev_buffer(void *opaque, struct iovec *iov, int iovcnt)
+{
+    QEMUFileSocket *s = opaque;
+    ssize_t len;
+    ssize_t size = iov_size(iov, iovcnt);
+
+    len = iov_send(s->fd, iov, iovcnt, 0, size);
+    if (len < size) {
+        len = -socket_error();
+    }
+    return len;
+}
+
 static int socket_get_fd(void *opaque)
 {
     QEMUFileSocket *s = opaque;
@@ -387,6 +401,7 @@ static const QEMUFileOps socket_read_ops = {
 static const QEMUFileOps socket_write_ops = {
     .get_fd =     socket_get_fd,
     .put_buffer = socket_put_buffer,
+    .writev_buffer = socket_writev_buffer,
     .close =      socket_close
 };
 
-- 
cgit v1.2.1


From 7d8a30bb98e89c203b3d2289ab0638c38bbeb7c1 Mon Sep 17 00:00:00 2001
From: Orit Wasserman <owasserm@redhat.com>
Date: Fri, 22 Mar 2013 16:47:59 +0200
Subject: Update bytes_xfer in qemu_put_byte

Signed-off-by: Orit Wasserman <owasserm@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
---
 savevm.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/savevm.c b/savevm.c
index 3466ef4aac..5ab256c97d 100644
--- a/savevm.c
+++ b/savevm.c
@@ -648,6 +648,8 @@ void qemu_put_byte(QEMUFile *f, int v)
 
     f->buf[f->buf_index++] = v;
     f->is_write = 1;
+    f->bytes_xfer++;
+
     if (f->buf_index >= IO_BUF_SIZE) {
         qemu_fflush(f);
     }
-- 
cgit v1.2.1


From b3ea2bdb792f6d961ba3adf45cf1f0c63c61e09d Mon Sep 17 00:00:00 2001
From: Orit Wasserman <owasserm@redhat.com>
Date: Fri, 22 Mar 2013 16:48:00 +0200
Subject: Store the data to send also in iovec

All data is still copied into the static buffer.
Adjacent iovecs are coalesced so we send one big buffer
instead of many small buffers.

Signed-off-by: Orit Wasserman <owasserm@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
---
 savevm.c | 24 ++++++++++++++++++++++--
 1 file changed, 22 insertions(+), 2 deletions(-)

diff --git a/savevm.c b/savevm.c
index 5ab256c97d..aaaf39a678 100644
--- a/savevm.c
+++ b/savevm.c
@@ -114,6 +114,7 @@ void qemu_announce_self(void)
 /* savevm/loadvm support */
 
 #define IO_BUF_SIZE 32768
+#define MAX_IOV_SIZE MIN(IOV_MAX, 64)
 
 struct QEMUFile {
     const QEMUFileOps *ops;
@@ -129,6 +130,9 @@ struct QEMUFile {
     int buf_size; /* 0 when writing */
     uint8_t buf[IO_BUF_SIZE];
 
+    struct iovec iov[MAX_IOV_SIZE];
+    unsigned int iovcnt;
+
     int last_error;
 };
 
@@ -528,6 +532,7 @@ static void qemu_fflush(QEMUFile *f)
             f->pos += f->buf_index;
         }
         f->buf_index = 0;
+        f->iovcnt = 0;
     }
     if (ret < 0) {
         qemu_file_set_error(f, ret);
@@ -601,6 +606,18 @@ int qemu_fclose(QEMUFile *f)
     return ret;
 }
 
+static void add_to_iovec(QEMUFile *f, const uint8_t *buf, int size)
+{
+    /* check for adjacent buffer and coalesce them */
+    if (f->iovcnt > 0 && buf == f->iov[f->iovcnt - 1].iov_base +
+        f->iov[f->iovcnt - 1].iov_len) {
+        f->iov[f->iovcnt - 1].iov_len += size;
+    } else {
+        f->iov[f->iovcnt].iov_base = (uint8_t *)buf;
+        f->iov[f->iovcnt++].iov_len = size;
+    }
+}
+
 void qemu_put_buffer(QEMUFile *f, const uint8_t *buf, int size)
 {
     int l;
@@ -620,12 +637,13 @@ void qemu_put_buffer(QEMUFile *f, const uint8_t *buf, int size)
         if (l > size)
             l = size;
         memcpy(f->buf + f->buf_index, buf, l);
+        add_to_iovec(f, f->buf + f->buf_index, l);
         f->is_write = 1;
         f->buf_index += l;
         f->bytes_xfer += l;
         buf += l;
         size -= l;
-        if (f->buf_index >= IO_BUF_SIZE) {
+        if (f->buf_index >= IO_BUF_SIZE || f->iovcnt >= MAX_IOV_SIZE) {
             qemu_fflush(f);
             if (qemu_file_get_error(f)) {
                 break;
@@ -650,7 +668,9 @@ void qemu_put_byte(QEMUFile *f, int v)
     f->is_write = 1;
     f->bytes_xfer++;
 
-    if (f->buf_index >= IO_BUF_SIZE) {
+    add_to_iovec(f, f->buf + (f->buf_index - 1), 1);
+
+    if (f->buf_index >= IO_BUF_SIZE || f->iovcnt >= MAX_IOV_SIZE) {
         qemu_fflush(f);
     }
 }
-- 
cgit v1.2.1


From cb88aa88d7e96cd12328915b33bf4a1bc054aa3f Mon Sep 17 00:00:00 2001
From: Orit Wasserman <owasserm@redhat.com>
Date: Fri, 22 Mar 2013 16:48:01 +0200
Subject: Use writev ops if available

Update qemu_fflush and stdio_close to use writev ops if they are available
Use the buffers stored in the iovec.

Signed-off-by: Orit Wasserman <owasserm@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
---
 savevm.c | 31 +++++++++++++++++++++++--------
 1 file changed, 23 insertions(+), 8 deletions(-)

diff --git a/savevm.c b/savevm.c
index aaaf39a678..1d929878ae 100644
--- a/savevm.c
+++ b/savevm.c
@@ -293,7 +293,7 @@ static int stdio_fclose(void *opaque)
     QEMUFileStdio *s = opaque;
     int ret = 0;
 
-    if (s->file->ops->put_buffer) {
+    if (s->file->ops->put_buffer || s->file->ops->writev_buffer) {
         int fd = fileno(s->stdio_file);
         struct stat st;
 
@@ -516,20 +516,35 @@ static void qemu_file_set_error(QEMUFile *f, int ret)
     }
 }
 
-/** Flushes QEMUFile buffer
+/**
+ * Flushes QEMUFile buffer
  *
+ * If there is writev_buffer QEMUFileOps it uses it otherwise uses
+ * put_buffer ops.
  */
 static void qemu_fflush(QEMUFile *f)
 {
-    int ret = 0;
+    ssize_t ret = 0;
+    int i = 0;
 
-    if (!f->ops->put_buffer) {
+    if (!f->ops->writev_buffer && !f->ops->put_buffer) {
         return;
     }
-    if (f->is_write && f->buf_index > 0) {
-        ret = f->ops->put_buffer(f->opaque, f->buf, f->pos, f->buf_index);
-        if (ret >= 0) {
-            f->pos += f->buf_index;
+
+    if (f->is_write && f->iovcnt > 0) {
+        if (f->ops->writev_buffer) {
+            ret = f->ops->writev_buffer(f->opaque, f->iov, f->iovcnt);
+            if (ret >= 0) {
+                f->pos += ret;
+            }
+        } else {
+            for (i = 0; i < f->iovcnt && ret >= 0; i++) {
+                ret = f->ops->put_buffer(f->opaque, f->iov[i].iov_base, f->pos,
+                                         f->iov[i].iov_len);
+                if (ret >= 0) {
+                    f->pos += ret;
+                }
+            }
         }
         f->buf_index = 0;
         f->iovcnt = 0;
-- 
cgit v1.2.1


From 6181ec245529e0d40ac669fe3044eef3a9e19610 Mon Sep 17 00:00:00 2001
From: Orit Wasserman <owasserm@redhat.com>
Date: Fri, 22 Mar 2013 16:48:02 +0200
Subject: Add qemu_put_buffer_async

This allows us to add a buffer to the iovec to send without copying it
into the static buffer, the buffer will be sent later when qemu_fflush is called.

Signed-off-by: Orit Wasserman <owasserm@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
---
 include/migration/qemu-file.h |  5 +++++
 savevm.c                      | 34 ++++++++++++++++++++++++++--------
 2 files changed, 31 insertions(+), 8 deletions(-)

diff --git a/include/migration/qemu-file.h b/include/migration/qemu-file.h
index 8b8070fbbf..623c434b15 100644
--- a/include/migration/qemu-file.h
+++ b/include/migration/qemu-file.h
@@ -75,6 +75,11 @@ int qemu_fclose(QEMUFile *f);
 int64_t qemu_ftell(QEMUFile *f);
 void qemu_put_buffer(QEMUFile *f, const uint8_t *buf, int size);
 void qemu_put_byte(QEMUFile *f, int v);
+/*
+ * put_buffer without copying the buffer.
+ * The buffer should be available till it is sent asynchronously.
+ */
+void qemu_put_buffer_async(QEMUFile *f, const uint8_t *buf, int size);
 
 static inline void qemu_put_ubyte(QEMUFile *f, unsigned int v)
 {
diff --git a/savevm.c b/savevm.c
index 1d929878ae..406caa90e5 100644
--- a/savevm.c
+++ b/savevm.c
@@ -633,6 +633,28 @@ static void add_to_iovec(QEMUFile *f, const uint8_t *buf, int size)
     }
 }
 
+void qemu_put_buffer_async(QEMUFile *f, const uint8_t *buf, int size)
+{
+    if (f->last_error) {
+        return;
+    }
+
+    if (f->is_write == 0 && f->buf_index > 0) {
+        fprintf(stderr,
+                "Attempted to write to buffer while read buffer is not empty\n");
+        abort();
+    }
+
+    add_to_iovec(f, buf, size);
+
+    f->is_write = 1;
+    f->bytes_xfer += size;
+
+    if (f->buf_index >= IO_BUF_SIZE || f->iovcnt >= MAX_IOV_SIZE) {
+        qemu_fflush(f);
+    }
+}
+
 void qemu_put_buffer(QEMUFile *f, const uint8_t *buf, int size)
 {
     int l;
@@ -652,18 +674,14 @@ void qemu_put_buffer(QEMUFile *f, const uint8_t *buf, int size)
         if (l > size)
             l = size;
         memcpy(f->buf + f->buf_index, buf, l);
-        add_to_iovec(f, f->buf + f->buf_index, l);
         f->is_write = 1;
         f->buf_index += l;
-        f->bytes_xfer += l;
+        qemu_put_buffer_async(f, f->buf + (f->buf_index - l), l);
+        if (qemu_file_get_error(f)) {
+            break;
+        }
         buf += l;
         size -= l;
-        if (f->buf_index >= IO_BUF_SIZE || f->iovcnt >= MAX_IOV_SIZE) {
-            qemu_fflush(f);
-            if (qemu_file_get_error(f)) {
-                break;
-            }
-        }
     }
 }
 
-- 
cgit v1.2.1


From 500f0061d628b52220038939728f0d7aee634468 Mon Sep 17 00:00:00 2001
From: Orit Wasserman <owasserm@redhat.com>
Date: Fri, 22 Mar 2013 16:48:03 +0200
Subject: Use qemu_put_buffer_async for guest memory pages

This will remove an unneeded copy of guest memory pages.
For the page header and device state we still copy the data to the
static buffer the other option is to allocate the memory on demand
which is more expensive.

Signed-off-by: Orit Wasserman <owasserm@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
---
 arch_init.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch_init.c b/arch_init.c
index e1af89875e..4ef5a15a6e 100644
--- a/arch_init.c
+++ b/arch_init.c
@@ -478,7 +478,7 @@ static int ram_save_block(QEMUFile *f, bool last_stage)
             /* XBZRLE overflow or normal page */
             if (bytes_sent == -1) {
                 bytes_sent = save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_PAGE);
-                qemu_put_buffer(f, p, TARGET_PAGE_SIZE);
+                qemu_put_buffer_async(f, p, TARGET_PAGE_SIZE);
                 bytes_sent += TARGET_PAGE_SIZE;
                 acct_info.norm_pages++;
             }
-- 
cgit v1.2.1