From 100b5e0170e86661aaf830869be930a1a201ed08 Mon Sep 17 00:00:00 2001
From: Richard Henderson <rth@twiddle.net>
Date: Sat, 14 Sep 2013 15:57:22 -0700
Subject: tcg: Put target helper data into an array.

One call inside of a loop to tcg_register_helper instead of hundreds
of sequential calls.

Presumably more icache and branch prediction friendly; resulting binary
size mostly unchanged on x86_64, as we're trading 32-bit rip-relative
references in .text for full 64-bit pointers in .rodata.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 include/exec/def-helper.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/exec/def-helper.h b/include/exec/def-helper.h
index 022a9ceb6a..73d51f9cf5 100644
--- a/include/exec/def-helper.h
+++ b/include/exec/def-helper.h
@@ -240,8 +240,7 @@ static inline void glue(gen_helper_, name)(dh_retvar_decl(ret) \
 #elif GEN_HELPER == 2
 /* Register helpers.  */
 
-#define DEF_HELPER_FLAGS_0(name, flags, ret) \
-tcg_register_helper(HELPER(name), #name);
+#define DEF_HELPER_FLAGS_0(name, flags, ret)  { HELPER(name), #name },
 
 #define DEF_HELPER_FLAGS_1(name, flags, ret, t1) \
 DEF_HELPER_FLAGS_0(name, flags, ret)
-- 
cgit v1.2.1


From 023261ef851b22a04f6c5d76da870051031757a6 Mon Sep 17 00:00:00 2001
From: Richard Henderson <rth@twiddle.net>
Date: Tue, 1 Oct 2013 13:47:38 -0700
Subject: tcg-aarch64: Update to helper_ret_*_mmu routines

A minimal update to use the new helpers with the return address argument.

Tested-by: Claudio Fontana <claudio.fontana@linaro.org>
Reviewed-by: Claudio Fontana <claudio.fontana@linaro.org>
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 include/exec/exec-all.h | 18 ------------------
 1 file changed, 18 deletions(-)

(limited to 'include')

diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index 8dd15948d8..3ce80d1587 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -320,24 +320,6 @@ extern uintptr_t tci_tb_ptr;
 
 #define GETPC()  (GETRA() - GETPC_ADJ)
 
-/* The LDST optimizations splits code generation into fast and slow path.
-   In some implementations, we pass the "logical" return address manually;
-   in others, we must infer the logical return from the true return.  */
-#if defined(CONFIG_QEMU_LDST_OPTIMIZATION) && defined(CONFIG_SOFTMMU)
-# if defined(__aarch64__)
-#  define GETRA_LDST(RA)  tcg_getra_ldst(RA)
-static inline uintptr_t tcg_getra_ldst(uintptr_t ra)
-{
-    int32_t b;
-    ra += 4;                    /* skip one instruction */
-    b = *(int32_t *)ra;         /* load the branch insn */
-    b = (b << 6) >> (6 - 2);    /* extract the displacement */
-    ra += b;                    /* apply the displacement  */
-    return ra;
-}
-# endif
-#endif /* CONFIG_QEMU_LDST_OPTIMIZATION */
-
 /* ??? Delete these once they are no longer used.  */
 bool is_tcg_gen_code(uintptr_t pc_ptr);
 #ifdef GETRA_LDST
-- 
cgit v1.2.1


From dbdbe0cd3124a3e9afa2d1c11da7c6476097bb9d Mon Sep 17 00:00:00 2001
From: Richard Henderson <rth@twiddle.net>
Date: Tue, 3 Sep 2013 14:24:58 -0700
Subject: exec: Delete is_tcg_gen_code and GETRA_EXT

All implementations now boil down to GETRA.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 include/exec/exec-all.h         | 12 ------------
 include/exec/softmmu_template.h |  4 ++--
 2 files changed, 2 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index 3ce80d1587..6ad05cacf5 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -320,18 +320,6 @@ extern uintptr_t tci_tb_ptr;
 
 #define GETPC()  (GETRA() - GETPC_ADJ)
 
-/* ??? Delete these once they are no longer used.  */
-bool is_tcg_gen_code(uintptr_t pc_ptr);
-#ifdef GETRA_LDST
-# define GETRA_EXT()  tcg_getra_ext(GETRA())
-static inline uintptr_t tcg_getra_ext(uintptr_t ra)
-{
-    return is_tcg_gen_code(ra) ? GETRA_LDST(ra) : ra;
-}
-#else
-# define GETRA_EXT()  GETRA()
-#endif
-
 #if !defined(CONFIG_USER_ONLY)
 
 void phys_mem_set_alloc(void *(*alloc)(ram_addr_t));
diff --git a/include/exec/softmmu_template.h b/include/exec/softmmu_template.h
index 5bbc56afd5..5edac51709 100644
--- a/include/exec/softmmu_template.h
+++ b/include/exec/softmmu_template.h
@@ -172,7 +172,7 @@ glue(glue(helper_ld, SUFFIX), MMUSUFFIX)(CPUArchState *env, target_ulong addr,
                                          int mmu_idx)
 {
     return glue(glue(helper_ret_ld, USUFFIX), MMUSUFFIX)(env, addr, mmu_idx,
-                                                        GETRA_EXT());
+                                                         GETRA());
 }
 
 #ifndef SOFTMMU_CODE_ACCESS
@@ -285,7 +285,7 @@ glue(glue(helper_st, SUFFIX), MMUSUFFIX)(CPUArchState *env, target_ulong addr,
                                          DATA_TYPE val, int mmu_idx)
 {
     glue(glue(helper_ret_st, SUFFIX), MMUSUFFIX)(env, addr, val, mmu_idx,
-                                                 GETRA_EXT());
+                                                 GETRA());
 }
 
 #endif /* !defined(SOFTMMU_CODE_ACCESS) */
-- 
cgit v1.2.1


From 867b3201a333e35a91bea9febc66cce689a765c4 Mon Sep 17 00:00:00 2001
From: Richard Henderson <rth@twiddle.net>
Date: Wed, 4 Sep 2013 11:45:20 -0700
Subject: exec: Add both big- and little-endian memory helpers

Step three in the transition: helpers not tied to the target
"default" endianness.  To be used when the guest uses a memory
operation with non-default endianness.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 include/exec/softmmu_template.h | 286 +++++++++++++++++++++++++++++++++++-----
 1 file changed, 251 insertions(+), 35 deletions(-)

(limited to 'include')

diff --git a/include/exec/softmmu_template.h b/include/exec/softmmu_template.h
index 5edac51709..c6a544069c 100644
--- a/include/exec/softmmu_template.h
+++ b/include/exec/softmmu_template.h
@@ -70,6 +70,48 @@
 #define ADDR_READ addr_read
 #endif
 
+#if DATA_SIZE == 8
+# define BSWAP(X)  bswap64(X)
+#elif DATA_SIZE == 4
+# define BSWAP(X)  bswap32(X)
+#elif DATA_SIZE == 2
+# define BSWAP(X)  bswap16(X)
+#else
+# define BSWAP(X)  (X)
+#endif
+
+#ifdef TARGET_WORDS_BIGENDIAN
+# define TGT_BE(X)  (X)
+# define TGT_LE(X)  BSWAP(X)
+#else
+# define TGT_BE(X)  BSWAP(X)
+# define TGT_LE(X)  (X)
+#endif
+
+#if DATA_SIZE == 1
+# define helper_le_ld_name  glue(glue(helper_ret_ld, USUFFIX), MMUSUFFIX)
+# define helper_be_ld_name  helper_le_ld_name
+# define helper_le_lds_name glue(glue(helper_ret_ld, SSUFFIX), MMUSUFFIX)
+# define helper_be_lds_name helper_le_lds_name
+# define helper_le_st_name  glue(glue(helper_ret_st, SUFFIX), MMUSUFFIX)
+# define helper_be_st_name  helper_le_st_name
+#else
+# define helper_le_ld_name  glue(glue(helper_le_ld, USUFFIX), MMUSUFFIX)
+# define helper_be_ld_name  glue(glue(helper_be_ld, USUFFIX), MMUSUFFIX)
+# define helper_le_lds_name glue(glue(helper_le_ld, SSUFFIX), MMUSUFFIX)
+# define helper_be_lds_name glue(glue(helper_be_ld, SSUFFIX), MMUSUFFIX)
+# define helper_le_st_name  glue(glue(helper_le_st, SUFFIX), MMUSUFFIX)
+# define helper_be_st_name  glue(glue(helper_be_st, SUFFIX), MMUSUFFIX)
+#endif
+
+#ifdef TARGET_WORDS_BIGENDIAN
+# define helper_te_ld_name  helper_be_ld_name
+# define helper_te_st_name  helper_be_st_name
+#else
+# define helper_te_ld_name  helper_le_ld_name
+# define helper_te_st_name  helper_le_st_name
+#endif
+
 static inline DATA_TYPE glue(io_read, SUFFIX)(CPUArchState *env,
                                               hwaddr physaddr,
                                               target_ulong addr,
@@ -89,18 +131,16 @@ static inline DATA_TYPE glue(io_read, SUFFIX)(CPUArchState *env,
     return val;
 }
 
-/* handle all cases except unaligned access which span two pages */
 #ifdef SOFTMMU_CODE_ACCESS
-static
+static __attribute__((unused))
 #endif
-WORD_TYPE
-glue(glue(helper_ret_ld, USUFFIX), MMUSUFFIX)(CPUArchState *env,
-                                              target_ulong addr, int mmu_idx,
-                                              uintptr_t retaddr)
+WORD_TYPE helper_le_ld_name(CPUArchState *env, target_ulong addr, int mmu_idx,
+                            uintptr_t retaddr)
 {
     int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
     target_ulong tlb_addr = env->tlb_table[mmu_idx][index].ADDR_READ;
     uintptr_t haddr;
+    DATA_TYPE res;
 
     /* Adjust the given return address.  */
     retaddr -= GETPC_ADJ;
@@ -124,7 +164,12 @@ glue(glue(helper_ret_ld, USUFFIX), MMUSUFFIX)(CPUArchState *env,
             goto do_unaligned_access;
         }
         ioaddr = env->iotlb[mmu_idx][index];
-        return glue(io_read, SUFFIX)(env, ioaddr, addr, retaddr);
+
+        /* ??? Note that the io helpers always read data in the target
+           byte ordering.  We should push the LE/BE request down into io.  */
+        res = glue(io_read, SUFFIX)(env, ioaddr, addr, retaddr);
+        res = TGT_LE(res);
+        return res;
     }
 
     /* Handle slow unaligned access (it spans two pages or IO).  */
@@ -132,7 +177,7 @@ glue(glue(helper_ret_ld, USUFFIX), MMUSUFFIX)(CPUArchState *env,
         && unlikely((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1
                     >= TARGET_PAGE_SIZE)) {
         target_ulong addr1, addr2;
-        DATA_TYPE res1, res2, res;
+        DATA_TYPE res1, res2;
         unsigned shift;
     do_unaligned_access:
 #ifdef ALIGNED_ONLY
@@ -142,16 +187,94 @@ glue(glue(helper_ret_ld, USUFFIX), MMUSUFFIX)(CPUArchState *env,
         addr2 = addr1 + DATA_SIZE;
         /* Note the adjustment at the beginning of the function.
            Undo that for the recursion.  */
-        res1 = glue(glue(helper_ret_ld, USUFFIX), MMUSUFFIX)
-            (env, addr1, mmu_idx, retaddr + GETPC_ADJ);
-        res2 = glue(glue(helper_ret_ld, USUFFIX), MMUSUFFIX)
-            (env, addr2, mmu_idx, retaddr + GETPC_ADJ);
+        res1 = helper_le_ld_name(env, addr1, mmu_idx, retaddr + GETPC_ADJ);
+        res2 = helper_le_ld_name(env, addr2, mmu_idx, retaddr + GETPC_ADJ);
         shift = (addr & (DATA_SIZE - 1)) * 8;
-#ifdef TARGET_WORDS_BIGENDIAN
-        res = (res1 << shift) | (res2 >> ((DATA_SIZE * 8) - shift));
-#else
+
+        /* Little-endian combine.  */
         res = (res1 >> shift) | (res2 << ((DATA_SIZE * 8) - shift));
+        return res;
+    }
+
+    /* Handle aligned access or unaligned access in the same page.  */
+#ifdef ALIGNED_ONLY
+    if ((addr & (DATA_SIZE - 1)) != 0) {
+        do_unaligned_access(env, addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
+    }
+#endif
+
+    haddr = addr + env->tlb_table[mmu_idx][index].addend;
+#if DATA_SIZE == 1
+    res = glue(glue(ld, LSUFFIX), _p)((uint8_t *)haddr);
+#else
+    res = glue(glue(ld, LSUFFIX), _le_p)((uint8_t *)haddr);
+#endif
+    return res;
+}
+
+#if DATA_SIZE > 1
+#ifdef SOFTMMU_CODE_ACCESS
+static __attribute__((unused))
+#endif
+WORD_TYPE helper_be_ld_name(CPUArchState *env, target_ulong addr, int mmu_idx,
+                            uintptr_t retaddr)
+{
+    int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
+    target_ulong tlb_addr = env->tlb_table[mmu_idx][index].ADDR_READ;
+    uintptr_t haddr;
+    DATA_TYPE res;
+
+    /* Adjust the given return address.  */
+    retaddr -= GETPC_ADJ;
+
+    /* If the TLB entry is for a different page, reload and try again.  */
+    if ((addr & TARGET_PAGE_MASK)
+         != (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
+#ifdef ALIGNED_ONLY
+        if ((addr & (DATA_SIZE - 1)) != 0) {
+            do_unaligned_access(env, addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
+        }
+#endif
+        tlb_fill(env, addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
+        tlb_addr = env->tlb_table[mmu_idx][index].ADDR_READ;
+    }
+
+    /* Handle an IO access.  */
+    if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
+        hwaddr ioaddr;
+        if ((addr & (DATA_SIZE - 1)) != 0) {
+            goto do_unaligned_access;
+        }
+        ioaddr = env->iotlb[mmu_idx][index];
+
+        /* ??? Note that the io helpers always read data in the target
+           byte ordering.  We should push the LE/BE request down into io.  */
+        res = glue(io_read, SUFFIX)(env, ioaddr, addr, retaddr);
+        res = TGT_BE(res);
+        return res;
+    }
+
+    /* Handle slow unaligned access (it spans two pages or IO).  */
+    if (DATA_SIZE > 1
+        && unlikely((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1
+                    >= TARGET_PAGE_SIZE)) {
+        target_ulong addr1, addr2;
+        DATA_TYPE res1, res2;
+        unsigned shift;
+    do_unaligned_access:
+#ifdef ALIGNED_ONLY
+        do_unaligned_access(env, addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
 #endif
+        addr1 = addr & ~(DATA_SIZE - 1);
+        addr2 = addr1 + DATA_SIZE;
+        /* Note the adjustment at the beginning of the function.
+           Undo that for the recursion.  */
+        res1 = helper_be_ld_name(env, addr1, mmu_idx, retaddr + GETPC_ADJ);
+        res2 = helper_be_ld_name(env, addr2, mmu_idx, retaddr + GETPC_ADJ);
+        shift = (addr & (DATA_SIZE - 1)) * 8;
+
+        /* Big-endian combine.  */
+        res = (res1 << shift) | (res2 >> ((DATA_SIZE * 8) - shift));
         return res;
     }
 
@@ -163,16 +286,16 @@ glue(glue(helper_ret_ld, USUFFIX), MMUSUFFIX)(CPUArchState *env,
 #endif
 
     haddr = addr + env->tlb_table[mmu_idx][index].addend;
-    /* Note that ldl_raw is defined with type "int".  */
-    return (DATA_TYPE) glue(glue(ld, LSUFFIX), _raw)((uint8_t *)haddr);
+    res = glue(glue(ld, LSUFFIX), _be_p)((uint8_t *)haddr);
+    return res;
 }
+#endif /* DATA_SIZE > 1 */
 
 DATA_TYPE
 glue(glue(helper_ld, SUFFIX), MMUSUFFIX)(CPUArchState *env, target_ulong addr,
                                          int mmu_idx)
 {
-    return glue(glue(helper_ret_ld, USUFFIX), MMUSUFFIX)(env, addr, mmu_idx,
-                                                         GETRA());
+    return helper_te_ld_name (env, addr, mmu_idx, GETRA());
 }
 
 #ifndef SOFTMMU_CODE_ACCESS
@@ -180,14 +303,19 @@ glue(glue(helper_ld, SUFFIX), MMUSUFFIX)(CPUArchState *env, target_ulong addr,
 /* Provide signed versions of the load routines as well.  We can of course
    avoid this for 64-bit data, or for 32-bit data on 32-bit host.  */
 #if DATA_SIZE * 8 < TCG_TARGET_REG_BITS
-WORD_TYPE
-glue(glue(helper_ret_ld, SSUFFIX), MMUSUFFIX)(CPUArchState *env,
-                                              target_ulong addr, int mmu_idx,
-                                              uintptr_t retaddr)
+WORD_TYPE helper_le_lds_name(CPUArchState *env, target_ulong addr,
+                             int mmu_idx, uintptr_t retaddr)
+{
+    return (SDATA_TYPE)helper_le_ld_name(env, addr, mmu_idx, retaddr);
+}
+
+# if DATA_SIZE > 1
+WORD_TYPE helper_be_lds_name(CPUArchState *env, target_ulong addr,
+                             int mmu_idx, uintptr_t retaddr)
 {
-    return (SDATA_TYPE) glue(glue(helper_ret_ld, USUFFIX), MMUSUFFIX)
-        (env, addr, mmu_idx, retaddr);
+    return (SDATA_TYPE)helper_be_ld_name(env, addr, mmu_idx, retaddr);
 }
+# endif
 #endif
 
 static inline void glue(io_write, SUFFIX)(CPUArchState *env,
@@ -208,10 +336,8 @@ static inline void glue(io_write, SUFFIX)(CPUArchState *env,
     io_mem_write(mr, physaddr, val, 1 << SHIFT);
 }
 
-void
-glue(glue(helper_ret_st, SUFFIX), MMUSUFFIX)(CPUArchState *env,
-                                             target_ulong addr, DATA_TYPE val,
-                                             int mmu_idx, uintptr_t retaddr)
+void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
+                       int mmu_idx, uintptr_t retaddr)
 {
     int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
     target_ulong tlb_addr = env->tlb_table[mmu_idx][index].addr_write;
@@ -239,6 +365,10 @@ glue(glue(helper_ret_st, SUFFIX), MMUSUFFIX)(CPUArchState *env,
             goto do_unaligned_access;
         }
         ioaddr = env->iotlb[mmu_idx][index];
+
+        /* ??? Note that the io helpers always read data in the target
+           byte ordering.  We should push the LE/BE request down into io.  */
+        val = TGT_LE(val);
         glue(io_write, SUFFIX)(env, ioaddr, val, addr, retaddr);
         return;
     }
@@ -256,11 +386,84 @@ glue(glue(helper_ret_st, SUFFIX), MMUSUFFIX)(CPUArchState *env,
         /* Note: relies on the fact that tlb_fill() does not remove the
          * previous page from the TLB cache.  */
         for (i = DATA_SIZE - 1; i >= 0; i--) {
-#ifdef TARGET_WORDS_BIGENDIAN
-            uint8_t val8 = val >> (((DATA_SIZE - 1) * 8) - (i * 8));
-#else
+            /* Little-endian extract.  */
             uint8_t val8 = val >> (i * 8);
+            /* Note the adjustment at the beginning of the function.
+               Undo that for the recursion.  */
+            glue(helper_ret_stb, MMUSUFFIX)(env, addr + i, val8,
+                                            mmu_idx, retaddr + GETPC_ADJ);
+        }
+        return;
+    }
+
+    /* Handle aligned access or unaligned access in the same page.  */
+#ifdef ALIGNED_ONLY
+    if ((addr & (DATA_SIZE - 1)) != 0) {
+        do_unaligned_access(env, addr, 1, mmu_idx, retaddr);
+    }
+#endif
+
+    haddr = addr + env->tlb_table[mmu_idx][index].addend;
+#if DATA_SIZE == 1
+    glue(glue(st, SUFFIX), _p)((uint8_t *)haddr, val);
+#else
+    glue(glue(st, SUFFIX), _le_p)((uint8_t *)haddr, val);
 #endif
+}
+
+#if DATA_SIZE > 1
+void helper_be_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
+                       int mmu_idx, uintptr_t retaddr)
+{
+    int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
+    target_ulong tlb_addr = env->tlb_table[mmu_idx][index].addr_write;
+    uintptr_t haddr;
+
+    /* Adjust the given return address.  */
+    retaddr -= GETPC_ADJ;
+
+    /* If the TLB entry is for a different page, reload and try again.  */
+    if ((addr & TARGET_PAGE_MASK)
+        != (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
+#ifdef ALIGNED_ONLY
+        if ((addr & (DATA_SIZE - 1)) != 0) {
+            do_unaligned_access(env, addr, 1, mmu_idx, retaddr);
+        }
+#endif
+        tlb_fill(env, addr, 1, mmu_idx, retaddr);
+        tlb_addr = env->tlb_table[mmu_idx][index].addr_write;
+    }
+
+    /* Handle an IO access.  */
+    if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
+        hwaddr ioaddr;
+        if ((addr & (DATA_SIZE - 1)) != 0) {
+            goto do_unaligned_access;
+        }
+        ioaddr = env->iotlb[mmu_idx][index];
+
+        /* ??? Note that the io helpers always read data in the target
+           byte ordering.  We should push the LE/BE request down into io.  */
+        val = TGT_BE(val);
+        glue(io_write, SUFFIX)(env, ioaddr, val, addr, retaddr);
+        return;
+    }
+
+    /* Handle slow unaligned access (it spans two pages or IO).  */
+    if (DATA_SIZE > 1
+        && unlikely((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1
+                     >= TARGET_PAGE_SIZE)) {
+        int i;
+    do_unaligned_access:
+#ifdef ALIGNED_ONLY
+        do_unaligned_access(env, addr, 1, mmu_idx, retaddr);
+#endif
+        /* XXX: not efficient, but simple */
+        /* Note: relies on the fact that tlb_fill() does not remove the
+         * previous page from the TLB cache.  */
+        for (i = DATA_SIZE - 1; i >= 0; i--) {
+            /* Big-endian extract.  */
+            uint8_t val8 = val >> (((DATA_SIZE - 1) * 8) - (i * 8));
             /* Note the adjustment at the beginning of the function.
                Undo that for the recursion.  */
             glue(helper_ret_stb, MMUSUFFIX)(env, addr + i, val8,
@@ -277,15 +480,15 @@ glue(glue(helper_ret_st, SUFFIX), MMUSUFFIX)(CPUArchState *env,
 #endif
 
     haddr = addr + env->tlb_table[mmu_idx][index].addend;
-    glue(glue(st, SUFFIX), _raw)((uint8_t *)haddr, val);
+    glue(glue(st, SUFFIX), _be_p)((uint8_t *)haddr, val);
 }
+#endif /* DATA_SIZE > 1 */
 
 void
 glue(glue(helper_st, SUFFIX), MMUSUFFIX)(CPUArchState *env, target_ulong addr,
                                          DATA_TYPE val, int mmu_idx)
 {
-    glue(glue(helper_ret_st, SUFFIX), MMUSUFFIX)(env, addr, val, mmu_idx,
-                                                 GETRA());
+    helper_te_st_name(env, addr, val, mmu_idx, GETRA());
 }
 
 #endif /* !defined(SOFTMMU_CODE_ACCESS) */
@@ -301,3 +504,16 @@ glue(glue(helper_st, SUFFIX), MMUSUFFIX)(CPUArchState *env, target_ulong addr,
 #undef SDATA_TYPE
 #undef USUFFIX
 #undef SSUFFIX
+#undef BSWAP
+#undef TGT_BE
+#undef TGT_LE
+#undef CPU_BE
+#undef CPU_LE
+#undef helper_le_ld_name
+#undef helper_be_ld_name
+#undef helper_le_lds_name
+#undef helper_be_lds_name
+#undef helper_le_st_name
+#undef helper_be_st_name
+#undef helper_te_ld_name
+#undef helper_te_st_name
-- 
cgit v1.2.1