From ff7a1eb0a1262f7d451cc1e70c65dd23771ce2a2 Mon Sep 17 00:00:00 2001
From: Richard Henderson <rth@twiddle.net>
Date: Sat, 16 Feb 2013 12:47:00 -0800
Subject: host-utils: Improve mulu64 and muls64

The new formulation makes better use of add-with-carry type insns
that the host may have.  Use gcc's sign adjustment trick to avoid
having to perform a 128-bit negation.

Signed-off-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Blue Swirl <blauwirbel@gmail.com>
---
 util/host-utils.c | 92 +++++++++++++++++++++++--------------------------------
 1 file changed, 38 insertions(+), 54 deletions(-)

(limited to 'util/host-utils.c')

diff --git a/util/host-utils.c b/util/host-utils.c
index 2d06a2cb78..f0784d6335 100644
--- a/util/host-utils.c
+++ b/util/host-utils.c
@@ -27,79 +27,63 @@
 #include <stdint.h>
 #include "qemu/host-utils.h"
 
-//#define DEBUG_MULDIV
-
 /* Long integer helpers */
 #ifndef CONFIG_INT128
-static void add128 (uint64_t *plow, uint64_t *phigh, uint64_t a, uint64_t b)
-{
-    *plow += a;
-    /* carry test */
-    if (*plow < a)
-        (*phigh)++;
-    *phigh += b;
-}
-
-static void neg128 (uint64_t *plow, uint64_t *phigh)
+static inline void mul64(uint64_t *plow, uint64_t *phigh,
+                         uint64_t a, uint64_t b)
 {
-    *plow = ~*plow;
-    *phigh = ~*phigh;
-    add128(plow, phigh, 1, 0);
-}
-
-static void mul64 (uint64_t *plow, uint64_t *phigh, uint64_t a, uint64_t b)
-{
-    uint32_t a0, a1, b0, b1;
-    uint64_t v;
-
-    a0 = a;
-    a1 = a >> 32;
-
-    b0 = b;
-    b1 = b >> 32;
+    typedef union {
+        uint64_t ll;
+        struct {
+#ifdef HOST_WORDS_BIGENDIAN
+            uint32_t high, low;
+#else
+            uint32_t low, high;
+#endif
+        } l;
+    } LL;
+    LL rl, rm, rn, rh, a0, b0;
+    uint64_t c;
 
-    v = (uint64_t)a0 * (uint64_t)b0;
-    *plow = v;
-    *phigh = 0;
+    a0.ll = a;
+    b0.ll = b;
 
-    v = (uint64_t)a0 * (uint64_t)b1;
-    add128(plow, phigh, v << 32, v >> 32);
+    rl.ll = (uint64_t)a0.l.low * b0.l.low;
+    rm.ll = (uint64_t)a0.l.low * b0.l.high;
+    rn.ll = (uint64_t)a0.l.high * b0.l.low;
+    rh.ll = (uint64_t)a0.l.high * b0.l.high;
 
-    v = (uint64_t)a1 * (uint64_t)b0;
-    add128(plow, phigh, v << 32, v >> 32);
+    c = (uint64_t)rl.l.high + rm.l.low + rn.l.low;
+    rl.l.high = c;
+    c >>= 32;
+    c = c + rm.l.high + rn.l.high + rh.l.low;
+    rh.l.low = c;
+    rh.l.high += (uint32_t)(c >> 32);
 
-    v = (uint64_t)a1 * (uint64_t)b1;
-    *phigh += v;
+    *plow = rl.ll;
+    *phigh = rh.ll;
 }
 
 /* Unsigned 64x64 -> 128 multiplication */
 void mulu64 (uint64_t *plow, uint64_t *phigh, uint64_t a, uint64_t b)
 {
     mul64(plow, phigh, a, b);
-#if defined(DEBUG_MULDIV)
-    printf("mulu64: 0x%016llx * 0x%016llx = 0x%016llx%016llx\n",
-           a, b, *phigh, *plow);
-#endif
 }
 
 /* Signed 64x64 -> 128 multiplication */
 void muls64 (uint64_t *plow, uint64_t *phigh, int64_t a, int64_t b)
 {
-    int sa, sb;
+    uint64_t rh;
 
-    sa = (a < 0);
-    if (sa)
-        a = -a;
-    sb = (b < 0);
-    if (sb)
-        b = -b;
-    mul64(plow, phigh, a, b);
-    if (sa ^ sb) {
-        neg128(plow, phigh);
+    mul64(plow, &rh, a, b);
+
+    /* Adjust for signs.  */
+    if (b < 0) {
+        rh -= a;
     }
-#if defined(DEBUG_MULDIV)
-    printf("muls64: 0x%016llx * 0x%016llx = 0x%016llx%016llx\n",
-           a, b, *phigh, *plow);
-#endif
+    if (a < 0) {
+        rh -= b;
+    }
+    *phigh = rh;
 }
 #endif /* !CONFIG_INT128 */
-- 
cgit v1.2.1