From 100b5e0170e86661aaf830869be930a1a201ed08 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 14 Sep 2013 15:57:22 -0700 Subject: tcg: Put target helper data into an array. One call inside of a loop to tcg_register_helper instead of hundreds of sequential calls. Presumably more icache and branch prediction friendly; resulting binary size mostly unchanged on x86_64, as we're trading 32-bit rip-relative references in .text for full 64-bit pointers in .rodata. Signed-off-by: Richard Henderson --- include/exec/def-helper.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/exec/def-helper.h b/include/exec/def-helper.h index 022a9ceb6a..73d51f9cf5 100644 --- a/include/exec/def-helper.h +++ b/include/exec/def-helper.h @@ -240,8 +240,7 @@ static inline void glue(gen_helper_, name)(dh_retvar_decl(ret) \ #elif GEN_HELPER == 2 /* Register helpers. */ -#define DEF_HELPER_FLAGS_0(name, flags, ret) \ -tcg_register_helper(HELPER(name), #name); +#define DEF_HELPER_FLAGS_0(name, flags, ret) { HELPER(name), #name }, #define DEF_HELPER_FLAGS_1(name, flags, ret, t1) \ DEF_HELPER_FLAGS_0(name, flags, ret) -- cgit v1.2.1 From 023261ef851b22a04f6c5d76da870051031757a6 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 1 Oct 2013 13:47:38 -0700 Subject: tcg-aarch64: Update to helper_ret_*_mmu routines A minimal update to use the new helpers with the return address argument. Tested-by: Claudio Fontana Reviewed-by: Claudio Fontana Signed-off-by: Richard Henderson --- include/exec/exec-all.h | 18 ------------------ 1 file changed, 18 deletions(-) (limited to 'include') diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index 8dd15948d8..3ce80d1587 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -320,24 +320,6 @@ extern uintptr_t tci_tb_ptr; #define GETPC() (GETRA() - GETPC_ADJ) -/* The LDST optimizations splits code generation into fast and slow path. - In some implementations, we pass the "logical" return address manually; - in others, we must infer the logical return from the true return. */ -#if defined(CONFIG_QEMU_LDST_OPTIMIZATION) && defined(CONFIG_SOFTMMU) -# if defined(__aarch64__) -# define GETRA_LDST(RA) tcg_getra_ldst(RA) -static inline uintptr_t tcg_getra_ldst(uintptr_t ra) -{ - int32_t b; - ra += 4; /* skip one instruction */ - b = *(int32_t *)ra; /* load the branch insn */ - b = (b << 6) >> (6 - 2); /* extract the displacement */ - ra += b; /* apply the displacement */ - return ra; -} -# endif -#endif /* CONFIG_QEMU_LDST_OPTIMIZATION */ - /* ??? Delete these once they are no longer used. */ bool is_tcg_gen_code(uintptr_t pc_ptr); #ifdef GETRA_LDST -- cgit v1.2.1 From dbdbe0cd3124a3e9afa2d1c11da7c6476097bb9d Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 3 Sep 2013 14:24:58 -0700 Subject: exec: Delete is_tcg_gen_code and GETRA_EXT All implementations now boil down to GETRA. Signed-off-by: Richard Henderson --- include/exec/exec-all.h | 12 ------------ include/exec/softmmu_template.h | 4 ++-- 2 files changed, 2 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index 3ce80d1587..6ad05cacf5 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -320,18 +320,6 @@ extern uintptr_t tci_tb_ptr; #define GETPC() (GETRA() - GETPC_ADJ) -/* ??? Delete these once they are no longer used. */ -bool is_tcg_gen_code(uintptr_t pc_ptr); -#ifdef GETRA_LDST -# define GETRA_EXT() tcg_getra_ext(GETRA()) -static inline uintptr_t tcg_getra_ext(uintptr_t ra) -{ - return is_tcg_gen_code(ra) ? GETRA_LDST(ra) : ra; -} -#else -# define GETRA_EXT() GETRA() -#endif - #if !defined(CONFIG_USER_ONLY) void phys_mem_set_alloc(void *(*alloc)(ram_addr_t)); diff --git a/include/exec/softmmu_template.h b/include/exec/softmmu_template.h index 5bbc56afd5..5edac51709 100644 --- a/include/exec/softmmu_template.h +++ b/include/exec/softmmu_template.h @@ -172,7 +172,7 @@ glue(glue(helper_ld, SUFFIX), MMUSUFFIX)(CPUArchState *env, target_ulong addr, int mmu_idx) { return glue(glue(helper_ret_ld, USUFFIX), MMUSUFFIX)(env, addr, mmu_idx, - GETRA_EXT()); + GETRA()); } #ifndef SOFTMMU_CODE_ACCESS @@ -285,7 +285,7 @@ glue(glue(helper_st, SUFFIX), MMUSUFFIX)(CPUArchState *env, target_ulong addr, DATA_TYPE val, int mmu_idx) { glue(glue(helper_ret_st, SUFFIX), MMUSUFFIX)(env, addr, val, mmu_idx, - GETRA_EXT()); + GETRA()); } #endif /* !defined(SOFTMMU_CODE_ACCESS) */ -- cgit v1.2.1 From 867b3201a333e35a91bea9febc66cce689a765c4 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 4 Sep 2013 11:45:20 -0700 Subject: exec: Add both big- and little-endian memory helpers Step three in the transition: helpers not tied to the target "default" endianness. To be used when the guest uses a memory operation with non-default endianness. Signed-off-by: Richard Henderson --- include/exec/softmmu_template.h | 286 +++++++++++++++++++++++++++++++++++----- 1 file changed, 251 insertions(+), 35 deletions(-) (limited to 'include') diff --git a/include/exec/softmmu_template.h b/include/exec/softmmu_template.h index 5edac51709..c6a544069c 100644 --- a/include/exec/softmmu_template.h +++ b/include/exec/softmmu_template.h @@ -70,6 +70,48 @@ #define ADDR_READ addr_read #endif +#if DATA_SIZE == 8 +# define BSWAP(X) bswap64(X) +#elif DATA_SIZE == 4 +# define BSWAP(X) bswap32(X) +#elif DATA_SIZE == 2 +# define BSWAP(X) bswap16(X) +#else +# define BSWAP(X) (X) +#endif + +#ifdef TARGET_WORDS_BIGENDIAN +# define TGT_BE(X) (X) +# define TGT_LE(X) BSWAP(X) +#else +# define TGT_BE(X) BSWAP(X) +# define TGT_LE(X) (X) +#endif + +#if DATA_SIZE == 1 +# define helper_le_ld_name glue(glue(helper_ret_ld, USUFFIX), MMUSUFFIX) +# define helper_be_ld_name helper_le_ld_name +# define helper_le_lds_name glue(glue(helper_ret_ld, SSUFFIX), MMUSUFFIX) +# define helper_be_lds_name helper_le_lds_name +# define helper_le_st_name glue(glue(helper_ret_st, SUFFIX), MMUSUFFIX) +# define helper_be_st_name helper_le_st_name +#else +# define helper_le_ld_name glue(glue(helper_le_ld, USUFFIX), MMUSUFFIX) +# define helper_be_ld_name glue(glue(helper_be_ld, USUFFIX), MMUSUFFIX) +# define helper_le_lds_name glue(glue(helper_le_ld, SSUFFIX), MMUSUFFIX) +# define helper_be_lds_name glue(glue(helper_be_ld, SSUFFIX), MMUSUFFIX) +# define helper_le_st_name glue(glue(helper_le_st, SUFFIX), MMUSUFFIX) +# define helper_be_st_name glue(glue(helper_be_st, SUFFIX), MMUSUFFIX) +#endif + +#ifdef TARGET_WORDS_BIGENDIAN +# define helper_te_ld_name helper_be_ld_name +# define helper_te_st_name helper_be_st_name +#else +# define helper_te_ld_name helper_le_ld_name +# define helper_te_st_name helper_le_st_name +#endif + static inline DATA_TYPE glue(io_read, SUFFIX)(CPUArchState *env, hwaddr physaddr, target_ulong addr, @@ -89,18 +131,16 @@ static inline DATA_TYPE glue(io_read, SUFFIX)(CPUArchState *env, return val; } -/* handle all cases except unaligned access which span two pages */ #ifdef SOFTMMU_CODE_ACCESS -static +static __attribute__((unused)) #endif -WORD_TYPE -glue(glue(helper_ret_ld, USUFFIX), MMUSUFFIX)(CPUArchState *env, - target_ulong addr, int mmu_idx, - uintptr_t retaddr) +WORD_TYPE helper_le_ld_name(CPUArchState *env, target_ulong addr, int mmu_idx, + uintptr_t retaddr) { int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); target_ulong tlb_addr = env->tlb_table[mmu_idx][index].ADDR_READ; uintptr_t haddr; + DATA_TYPE res; /* Adjust the given return address. */ retaddr -= GETPC_ADJ; @@ -124,7 +164,12 @@ glue(glue(helper_ret_ld, USUFFIX), MMUSUFFIX)(CPUArchState *env, goto do_unaligned_access; } ioaddr = env->iotlb[mmu_idx][index]; - return glue(io_read, SUFFIX)(env, ioaddr, addr, retaddr); + + /* ??? Note that the io helpers always read data in the target + byte ordering. We should push the LE/BE request down into io. */ + res = glue(io_read, SUFFIX)(env, ioaddr, addr, retaddr); + res = TGT_LE(res); + return res; } /* Handle slow unaligned access (it spans two pages or IO). */ @@ -132,7 +177,7 @@ glue(glue(helper_ret_ld, USUFFIX), MMUSUFFIX)(CPUArchState *env, && unlikely((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1 >= TARGET_PAGE_SIZE)) { target_ulong addr1, addr2; - DATA_TYPE res1, res2, res; + DATA_TYPE res1, res2; unsigned shift; do_unaligned_access: #ifdef ALIGNED_ONLY @@ -142,16 +187,94 @@ glue(glue(helper_ret_ld, USUFFIX), MMUSUFFIX)(CPUArchState *env, addr2 = addr1 + DATA_SIZE; /* Note the adjustment at the beginning of the function. Undo that for the recursion. */ - res1 = glue(glue(helper_ret_ld, USUFFIX), MMUSUFFIX) - (env, addr1, mmu_idx, retaddr + GETPC_ADJ); - res2 = glue(glue(helper_ret_ld, USUFFIX), MMUSUFFIX) - (env, addr2, mmu_idx, retaddr + GETPC_ADJ); + res1 = helper_le_ld_name(env, addr1, mmu_idx, retaddr + GETPC_ADJ); + res2 = helper_le_ld_name(env, addr2, mmu_idx, retaddr + GETPC_ADJ); shift = (addr & (DATA_SIZE - 1)) * 8; -#ifdef TARGET_WORDS_BIGENDIAN - res = (res1 << shift) | (res2 >> ((DATA_SIZE * 8) - shift)); -#else + + /* Little-endian combine. */ res = (res1 >> shift) | (res2 << ((DATA_SIZE * 8) - shift)); + return res; + } + + /* Handle aligned access or unaligned access in the same page. */ +#ifdef ALIGNED_ONLY + if ((addr & (DATA_SIZE - 1)) != 0) { + do_unaligned_access(env, addr, READ_ACCESS_TYPE, mmu_idx, retaddr); + } +#endif + + haddr = addr + env->tlb_table[mmu_idx][index].addend; +#if DATA_SIZE == 1 + res = glue(glue(ld, LSUFFIX), _p)((uint8_t *)haddr); +#else + res = glue(glue(ld, LSUFFIX), _le_p)((uint8_t *)haddr); +#endif + return res; +} + +#if DATA_SIZE > 1 +#ifdef SOFTMMU_CODE_ACCESS +static __attribute__((unused)) +#endif +WORD_TYPE helper_be_ld_name(CPUArchState *env, target_ulong addr, int mmu_idx, + uintptr_t retaddr) +{ + int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); + target_ulong tlb_addr = env->tlb_table[mmu_idx][index].ADDR_READ; + uintptr_t haddr; + DATA_TYPE res; + + /* Adjust the given return address. */ + retaddr -= GETPC_ADJ; + + /* If the TLB entry is for a different page, reload and try again. */ + if ((addr & TARGET_PAGE_MASK) + != (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) { +#ifdef ALIGNED_ONLY + if ((addr & (DATA_SIZE - 1)) != 0) { + do_unaligned_access(env, addr, READ_ACCESS_TYPE, mmu_idx, retaddr); + } +#endif + tlb_fill(env, addr, READ_ACCESS_TYPE, mmu_idx, retaddr); + tlb_addr = env->tlb_table[mmu_idx][index].ADDR_READ; + } + + /* Handle an IO access. */ + if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) { + hwaddr ioaddr; + if ((addr & (DATA_SIZE - 1)) != 0) { + goto do_unaligned_access; + } + ioaddr = env->iotlb[mmu_idx][index]; + + /* ??? Note that the io helpers always read data in the target + byte ordering. We should push the LE/BE request down into io. */ + res = glue(io_read, SUFFIX)(env, ioaddr, addr, retaddr); + res = TGT_BE(res); + return res; + } + + /* Handle slow unaligned access (it spans two pages or IO). */ + if (DATA_SIZE > 1 + && unlikely((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1 + >= TARGET_PAGE_SIZE)) { + target_ulong addr1, addr2; + DATA_TYPE res1, res2; + unsigned shift; + do_unaligned_access: +#ifdef ALIGNED_ONLY + do_unaligned_access(env, addr, READ_ACCESS_TYPE, mmu_idx, retaddr); #endif + addr1 = addr & ~(DATA_SIZE - 1); + addr2 = addr1 + DATA_SIZE; + /* Note the adjustment at the beginning of the function. + Undo that for the recursion. */ + res1 = helper_be_ld_name(env, addr1, mmu_idx, retaddr + GETPC_ADJ); + res2 = helper_be_ld_name(env, addr2, mmu_idx, retaddr + GETPC_ADJ); + shift = (addr & (DATA_SIZE - 1)) * 8; + + /* Big-endian combine. */ + res = (res1 << shift) | (res2 >> ((DATA_SIZE * 8) - shift)); return res; } @@ -163,16 +286,16 @@ glue(glue(helper_ret_ld, USUFFIX), MMUSUFFIX)(CPUArchState *env, #endif haddr = addr + env->tlb_table[mmu_idx][index].addend; - /* Note that ldl_raw is defined with type "int". */ - return (DATA_TYPE) glue(glue(ld, LSUFFIX), _raw)((uint8_t *)haddr); + res = glue(glue(ld, LSUFFIX), _be_p)((uint8_t *)haddr); + return res; } +#endif /* DATA_SIZE > 1 */ DATA_TYPE glue(glue(helper_ld, SUFFIX), MMUSUFFIX)(CPUArchState *env, target_ulong addr, int mmu_idx) { - return glue(glue(helper_ret_ld, USUFFIX), MMUSUFFIX)(env, addr, mmu_idx, - GETRA()); + return helper_te_ld_name (env, addr, mmu_idx, GETRA()); } #ifndef SOFTMMU_CODE_ACCESS @@ -180,14 +303,19 @@ glue(glue(helper_ld, SUFFIX), MMUSUFFIX)(CPUArchState *env, target_ulong addr, /* Provide signed versions of the load routines as well. We can of course avoid this for 64-bit data, or for 32-bit data on 32-bit host. */ #if DATA_SIZE * 8 < TCG_TARGET_REG_BITS -WORD_TYPE -glue(glue(helper_ret_ld, SSUFFIX), MMUSUFFIX)(CPUArchState *env, - target_ulong addr, int mmu_idx, - uintptr_t retaddr) +WORD_TYPE helper_le_lds_name(CPUArchState *env, target_ulong addr, + int mmu_idx, uintptr_t retaddr) +{ + return (SDATA_TYPE)helper_le_ld_name(env, addr, mmu_idx, retaddr); +} + +# if DATA_SIZE > 1 +WORD_TYPE helper_be_lds_name(CPUArchState *env, target_ulong addr, + int mmu_idx, uintptr_t retaddr) { - return (SDATA_TYPE) glue(glue(helper_ret_ld, USUFFIX), MMUSUFFIX) - (env, addr, mmu_idx, retaddr); + return (SDATA_TYPE)helper_be_ld_name(env, addr, mmu_idx, retaddr); } +# endif #endif static inline void glue(io_write, SUFFIX)(CPUArchState *env, @@ -208,10 +336,8 @@ static inline void glue(io_write, SUFFIX)(CPUArchState *env, io_mem_write(mr, physaddr, val, 1 << SHIFT); } -void -glue(glue(helper_ret_st, SUFFIX), MMUSUFFIX)(CPUArchState *env, - target_ulong addr, DATA_TYPE val, - int mmu_idx, uintptr_t retaddr) +void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val, + int mmu_idx, uintptr_t retaddr) { int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); target_ulong tlb_addr = env->tlb_table[mmu_idx][index].addr_write; @@ -239,6 +365,10 @@ glue(glue(helper_ret_st, SUFFIX), MMUSUFFIX)(CPUArchState *env, goto do_unaligned_access; } ioaddr = env->iotlb[mmu_idx][index]; + + /* ??? Note that the io helpers always read data in the target + byte ordering. We should push the LE/BE request down into io. */ + val = TGT_LE(val); glue(io_write, SUFFIX)(env, ioaddr, val, addr, retaddr); return; } @@ -256,11 +386,84 @@ glue(glue(helper_ret_st, SUFFIX), MMUSUFFIX)(CPUArchState *env, /* Note: relies on the fact that tlb_fill() does not remove the * previous page from the TLB cache. */ for (i = DATA_SIZE - 1; i >= 0; i--) { -#ifdef TARGET_WORDS_BIGENDIAN - uint8_t val8 = val >> (((DATA_SIZE - 1) * 8) - (i * 8)); -#else + /* Little-endian extract. */ uint8_t val8 = val >> (i * 8); + /* Note the adjustment at the beginning of the function. + Undo that for the recursion. */ + glue(helper_ret_stb, MMUSUFFIX)(env, addr + i, val8, + mmu_idx, retaddr + GETPC_ADJ); + } + return; + } + + /* Handle aligned access or unaligned access in the same page. */ +#ifdef ALIGNED_ONLY + if ((addr & (DATA_SIZE - 1)) != 0) { + do_unaligned_access(env, addr, 1, mmu_idx, retaddr); + } +#endif + + haddr = addr + env->tlb_table[mmu_idx][index].addend; +#if DATA_SIZE == 1 + glue(glue(st, SUFFIX), _p)((uint8_t *)haddr, val); +#else + glue(glue(st, SUFFIX), _le_p)((uint8_t *)haddr, val); #endif +} + +#if DATA_SIZE > 1 +void helper_be_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val, + int mmu_idx, uintptr_t retaddr) +{ + int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); + target_ulong tlb_addr = env->tlb_table[mmu_idx][index].addr_write; + uintptr_t haddr; + + /* Adjust the given return address. */ + retaddr -= GETPC_ADJ; + + /* If the TLB entry is for a different page, reload and try again. */ + if ((addr & TARGET_PAGE_MASK) + != (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) { +#ifdef ALIGNED_ONLY + if ((addr & (DATA_SIZE - 1)) != 0) { + do_unaligned_access(env, addr, 1, mmu_idx, retaddr); + } +#endif + tlb_fill(env, addr, 1, mmu_idx, retaddr); + tlb_addr = env->tlb_table[mmu_idx][index].addr_write; + } + + /* Handle an IO access. */ + if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) { + hwaddr ioaddr; + if ((addr & (DATA_SIZE - 1)) != 0) { + goto do_unaligned_access; + } + ioaddr = env->iotlb[mmu_idx][index]; + + /* ??? Note that the io helpers always read data in the target + byte ordering. We should push the LE/BE request down into io. */ + val = TGT_BE(val); + glue(io_write, SUFFIX)(env, ioaddr, val, addr, retaddr); + return; + } + + /* Handle slow unaligned access (it spans two pages or IO). */ + if (DATA_SIZE > 1 + && unlikely((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1 + >= TARGET_PAGE_SIZE)) { + int i; + do_unaligned_access: +#ifdef ALIGNED_ONLY + do_unaligned_access(env, addr, 1, mmu_idx, retaddr); +#endif + /* XXX: not efficient, but simple */ + /* Note: relies on the fact that tlb_fill() does not remove the + * previous page from the TLB cache. */ + for (i = DATA_SIZE - 1; i >= 0; i--) { + /* Big-endian extract. */ + uint8_t val8 = val >> (((DATA_SIZE - 1) * 8) - (i * 8)); /* Note the adjustment at the beginning of the function. Undo that for the recursion. */ glue(helper_ret_stb, MMUSUFFIX)(env, addr + i, val8, @@ -277,15 +480,15 @@ glue(glue(helper_ret_st, SUFFIX), MMUSUFFIX)(CPUArchState *env, #endif haddr = addr + env->tlb_table[mmu_idx][index].addend; - glue(glue(st, SUFFIX), _raw)((uint8_t *)haddr, val); + glue(glue(st, SUFFIX), _be_p)((uint8_t *)haddr, val); } +#endif /* DATA_SIZE > 1 */ void glue(glue(helper_st, SUFFIX), MMUSUFFIX)(CPUArchState *env, target_ulong addr, DATA_TYPE val, int mmu_idx) { - glue(glue(helper_ret_st, SUFFIX), MMUSUFFIX)(env, addr, val, mmu_idx, - GETRA()); + helper_te_st_name(env, addr, val, mmu_idx, GETRA()); } #endif /* !defined(SOFTMMU_CODE_ACCESS) */ @@ -301,3 +504,16 @@ glue(glue(helper_st, SUFFIX), MMUSUFFIX)(CPUArchState *env, target_ulong addr, #undef SDATA_TYPE #undef USUFFIX #undef SSUFFIX +#undef BSWAP +#undef TGT_BE +#undef TGT_LE +#undef CPU_BE +#undef CPU_LE +#undef helper_le_ld_name +#undef helper_be_ld_name +#undef helper_le_lds_name +#undef helper_be_lds_name +#undef helper_le_st_name +#undef helper_be_st_name +#undef helper_te_ld_name +#undef helper_te_st_name -- cgit v1.2.1