From 619f90ba62e27c674b1a9af8c0ae68eef8d64a92 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 5 Sep 2013 10:22:09 +0200 Subject: tcg-ppc: use new return-argument ld/st helpers These use a 32-bit load-of-immediate to save a mflr+addi+mtlr sequence. Tested with a Windows 98 guest (pretty much the most recent thing I could run on my PPC machine) and kvm-unit-tests's sieve.flat. The speed up for sieve.flat is as high as 10% for qemu-system-i386, 25% (no kidding) for qemu-system-x86_64 on my PowerBook G4. Signed-off-by: Paolo Bonzini Signed-off-by: Richard Henderson --- tcg/ppc/tcg-target.c | 41 ++++++++++++++++++++--------------------- 1 file changed, 20 insertions(+), 21 deletions(-) (limited to 'tcg') diff --git a/tcg/ppc/tcg-target.c b/tcg/ppc/tcg-target.c index 204ffbe5fb..24a8621fe1 100644 --- a/tcg/ppc/tcg-target.c +++ b/tcg/ppc/tcg-target.c @@ -550,22 +550,24 @@ static void add_qemu_ldst_label (TCGContext *s, label->label_ptr[0] = label_ptr; } -/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr, - int mmu_idx) */ +/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr, + * int mmu_idx, uintptr_t ra) + */ static const void * const qemu_ld_helpers[4] = { - helper_ldb_mmu, - helper_ldw_mmu, - helper_ldl_mmu, - helper_ldq_mmu, + helper_ret_ldub_mmu, + helper_ret_lduw_mmu, + helper_ret_ldul_mmu, + helper_ret_ldq_mmu, }; -/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr, - uintxx_t val, int mmu_idx) */ +/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr, + * uintxx_t val, int mmu_idx, uintptr_t ra) + */ static const void * const qemu_st_helpers[4] = { - helper_stb_mmu, - helper_stw_mmu, - helper_stl_mmu, - helper_stq_mmu, + helper_ret_stb_mmu, + helper_ret_stw_mmu, + helper_ret_stl_mmu, + helper_ret_stq_mmu, }; static void *ld_trampolines[4]; @@ -860,9 +862,9 @@ static void tcg_out_qemu_ld_slow_path (TCGContext *s, TCGLabelQemuLdst *label) tcg_out_mov (s, TCG_TYPE_I32, ir++, label->addrhi_reg); tcg_out_mov (s, TCG_TYPE_I32, ir++, addr_reg); #endif - tcg_out_movi (s, TCG_TYPE_I32, ir, mem_index); + tcg_out_movi (s, TCG_TYPE_I32, ir++, mem_index); + tcg_out_movi (s, TCG_TYPE_I32, ir, (tcg_target_long) raddr); tcg_out_b (s, LK, (tcg_target_long) ld_trampolines[s_bits]); - tcg_out32 (s, (tcg_target_long) raddr); switch (opc) { case 0|4: tcg_out32 (s, EXTSB | RA (data_reg) | RS (3)); @@ -954,10 +956,10 @@ static void tcg_out_qemu_st_slow_path (TCGContext *s, TCGLabelQemuLdst *label) } ir++; - tcg_out_movi (s, TCG_TYPE_I32, ir, mem_index); - tcg_out_b (s, LK, (tcg_target_long) st_trampolines[opc]); - tcg_out32 (s, (tcg_target_long) raddr); - tcg_out_b (s, 0, (tcg_target_long) raddr); + tcg_out_movi (s, TCG_TYPE_I32, ir++, mem_index); + tcg_out_movi (s, TCG_TYPE_I32, ir, (tcg_target_long) raddr); + tcg_out32 (s, MTSPR | RS (ir) | LR); + tcg_out_b (s, 0, (tcg_target_long) st_trampolines[opc]); } void tcg_out_tb_finalize(TCGContext *s) @@ -981,9 +983,6 @@ void tcg_out_tb_finalize(TCGContext *s) #ifdef CONFIG_SOFTMMU static void emit_ldst_trampoline (TCGContext *s, const void *ptr) { - tcg_out32 (s, MFSPR | RT (3) | LR); - tcg_out32 (s, ADDI | RT (3) | RA (3) | 4); - tcg_out32 (s, MTSPR | RS (3) | LR); tcg_out_mov (s, TCG_TYPE_I32, 3, TCG_AREG0); tcg_out_call (s, (tcg_target_long) ptr, 1, 0); } -- cgit v1.2.1