From 7f6bdc431a5c567fca0130d79c8b14f531a0eb14 Mon Sep 17 00:00:00 2001 From: Aurelien Jarno Date: Wed, 17 Jun 2015 12:53:13 +0200 Subject: target-sh4: add flags markups for FP helpers Most floating point helpers can trigger an exception, but don't change the globals. Mark these helpers as TCG_CALL_NO_WG. Reviewed-by: Richard Henderson Signed-off-by: Aurelien Jarno --- target-sh4/helper.h | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) (limited to 'target-sh4') diff --git a/target-sh4/helper.h b/target-sh4/helper.h index c9bc407042..dce859caea 100644 --- a/target-sh4/helper.h +++ b/target-sh4/helper.h @@ -18,28 +18,28 @@ DEF_HELPER_2(ld_fpscr, void, env, i32) DEF_HELPER_FLAGS_1(fabs_FT, TCG_CALL_NO_RWG_SE, f32, f32) DEF_HELPER_FLAGS_1(fabs_DT, TCG_CALL_NO_RWG_SE, f64, f64) -DEF_HELPER_3(fadd_FT, f32, env, f32, f32) -DEF_HELPER_3(fadd_DT, f64, env, f64, f64) -DEF_HELPER_2(fcnvsd_FT_DT, f64, env, f32) -DEF_HELPER_2(fcnvds_DT_FT, f32, env, f64) +DEF_HELPER_FLAGS_3(fadd_FT, TCG_CALL_NO_WG, f32, env, f32, f32) +DEF_HELPER_FLAGS_3(fadd_DT, TCG_CALL_NO_WG, f64, env, f64, f64) +DEF_HELPER_FLAGS_2(fcnvsd_FT_DT, TCG_CALL_NO_WG, f64, env, f32) +DEF_HELPER_FLAGS_2(fcnvds_DT_FT, TCG_CALL_NO_WG, f32, env, f64) DEF_HELPER_3(fcmp_eq_FT, void, env, f32, f32) DEF_HELPER_3(fcmp_eq_DT, void, env, f64, f64) DEF_HELPER_3(fcmp_gt_FT, void, env, f32, f32) DEF_HELPER_3(fcmp_gt_DT, void, env, f64, f64) -DEF_HELPER_3(fdiv_FT, f32, env, f32, f32) -DEF_HELPER_3(fdiv_DT, f64, env, f64, f64) -DEF_HELPER_2(float_FT, f32, env, i32) -DEF_HELPER_2(float_DT, f64, env, i32) -DEF_HELPER_4(fmac_FT, f32, env, f32, f32, f32) -DEF_HELPER_3(fmul_FT, f32, env, f32, f32) -DEF_HELPER_3(fmul_DT, f64, env, f64, f64) +DEF_HELPER_FLAGS_3(fdiv_FT, TCG_CALL_NO_WG, f32, env, f32, f32) +DEF_HELPER_FLAGS_3(fdiv_DT, TCG_CALL_NO_WG, f64, env, f64, f64) +DEF_HELPER_FLAGS_2(float_FT, TCG_CALL_NO_WG, f32, env, i32) +DEF_HELPER_FLAGS_2(float_DT, TCG_CALL_NO_WG, f64, env, i32) +DEF_HELPER_FLAGS_4(fmac_FT, TCG_CALL_NO_WG, f32, env, f32, f32, f32) +DEF_HELPER_FLAGS_3(fmul_FT, TCG_CALL_NO_WG, f32, env, f32, f32) +DEF_HELPER_FLAGS_3(fmul_DT, TCG_CALL_NO_WG, f64, env, f64, f64) DEF_HELPER_FLAGS_1(fneg_T, TCG_CALL_NO_RWG_SE, f32, f32) -DEF_HELPER_3(fsub_FT, f32, env, f32, f32) -DEF_HELPER_3(fsub_DT, f64, env, f64, f64) -DEF_HELPER_2(fsqrt_FT, f32, env, f32) -DEF_HELPER_2(fsqrt_DT, f64, env, f64) -DEF_HELPER_2(ftrc_FT, i32, env, f32) -DEF_HELPER_2(ftrc_DT, i32, env, f64) +DEF_HELPER_FLAGS_3(fsub_FT, TCG_CALL_NO_WG, f32, env, f32, f32) +DEF_HELPER_FLAGS_3(fsub_DT, TCG_CALL_NO_WG, f64, env, f64, f64) +DEF_HELPER_FLAGS_2(fsqrt_FT, TCG_CALL_NO_WG, f32, env, f32) +DEF_HELPER_FLAGS_2(fsqrt_DT, TCG_CALL_NO_WG, f64, env, f64) +DEF_HELPER_FLAGS_2(ftrc_FT, TCG_CALL_NO_WG, i32, env, f32) +DEF_HELPER_FLAGS_2(ftrc_DT, TCG_CALL_NO_WG, i32, env, f64) DEF_HELPER_3(fipr, void, env, i32, i32) DEF_HELPER_2(ftrv, void, env, i32) -- cgit v1.2.1 From 218fd7301f88df440da3e16b9cfca000cd2fe111 Mon Sep 17 00:00:00 2001 From: Aurelien Jarno Date: Sun, 5 Jul 2015 17:05:08 +0200 Subject: target-sh4: use deposit in swap.b instruction Reviewed-by: Richard Henderson Signed-off-by: Aurelien Jarno --- target-sh4/translate.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'target-sh4') diff --git a/target-sh4/translate.c b/target-sh4/translate.c index be0cb321cf..50043cf5b5 100644 --- a/target-sh4/translate.c +++ b/target-sh4/translate.c @@ -612,15 +612,11 @@ static void _decode_opc(DisasContext * ctx) return; case 0x6008: /* swap.b Rm,Rn */ { - TCGv high, low; - high = tcg_temp_new(); - tcg_gen_andi_i32(high, REG(B7_4), 0xffff0000); - low = tcg_temp_new(); + TCGv low = tcg_temp_new();; tcg_gen_ext16u_i32(low, REG(B7_4)); tcg_gen_bswap16_i32(low, low); - tcg_gen_or_i32(REG(B11_8), high, low); + tcg_gen_deposit_i32(REG(B11_8), REG(B7_4), low, 0, 16); tcg_temp_free(low); - tcg_temp_free(high); } return; case 0x6009: /* swap.w Rm,Rn */ -- cgit v1.2.1 From eb6ca2b4a69325e95526bc0f2897791df04e44dc Mon Sep 17 00:00:00 2001 From: Aurelien Jarno Date: Sun, 5 Jul 2015 18:50:09 +0200 Subject: target-sh4: improve cmp/str instruction Instead of testing bytes one by one, we can use the following trick from https://graphics.stanford.edu/~seander/bithacks.html: haszero(v) = (v - 0x01010101) & ~v & 0x80808080 The subexpression v - 0x01010101, evaluates to a high bit set in any byte whenever the corresponding byte in v is zero or greater than 0x80. The sub-expression ~v & 0x80808080 evaluates to high bits set in bytes where the byte of v doesn't have its high bit set (so the byte was less than 0x80). Finally, by ANDing these two sub-expressions the result is the high bits set where the bytes in v were zero, since the high bits set due to a value greater than 0x80 in the first sub-expression are masked off by the second. Reviewed-by: Richard Henderson Signed-off-by: Aurelien Jarno --- target-sh4/translate.c | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) (limited to 'target-sh4') diff --git a/target-sh4/translate.c b/target-sh4/translate.c index 50043cf5b5..ca6ef5aca7 100644 --- a/target-sh4/translate.c +++ b/target-sh4/translate.c @@ -688,18 +688,11 @@ static void _decode_opc(DisasContext * ctx) { TCGv cmp1 = tcg_temp_new(); TCGv cmp2 = tcg_temp_new(); - tcg_gen_xor_i32(cmp1, REG(B7_4), REG(B11_8)); - tcg_gen_andi_i32(cmp2, cmp1, 0xff000000); - tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_sr_t, cmp2, 0); - tcg_gen_andi_i32(cmp2, cmp1, 0x00ff0000); - tcg_gen_setcondi_i32(TCG_COND_EQ, cmp2, cmp2, 0); - tcg_gen_or_i32(cpu_sr_t, cpu_sr_t, cmp2); - tcg_gen_andi_i32(cmp2, cmp1, 0x0000ff00); - tcg_gen_setcondi_i32(TCG_COND_EQ, cmp2, cmp2, 0); - tcg_gen_or_i32(cpu_sr_t, cpu_sr_t, cmp2); - tcg_gen_andi_i32(cmp2, cmp1, 0x000000ff); - tcg_gen_setcondi_i32(TCG_COND_EQ, cmp2, cmp2, 0); - tcg_gen_or_i32(cpu_sr_t, cpu_sr_t, cmp2); + tcg_gen_xor_i32(cmp2, REG(B7_4), REG(B11_8)); + tcg_gen_subi_i32(cmp1, cmp2, 0x01010101); + tcg_gen_andc_i32(cmp1, cmp1, cmp2); + tcg_gen_andi_i32(cmp1, cmp1, 0x80808080); + tcg_gen_setcondi_i32(TCG_COND_NE, cpu_sr_t, cmp1, 0); tcg_temp_free(cmp2); tcg_temp_free(cmp1); } -- cgit v1.2.1 From 577601616dea10db10a716de1be448f8564076f4 Mon Sep 17 00:00:00 2001 From: Aurelien Jarno Date: Sun, 5 Jul 2015 22:37:18 +0200 Subject: target-sh4: improve shld instruction The SH4 shld instruction can shift in both direction, depending on the sign of the shift. This is currently implemented using branches, which is not really efficient and prevents the optimizer to do its job. In practice it is often used with a constant loaded in a register just before. Simplify the implementation by computing both the value shifted to the left and to the right, and then selecting the correct one with a movcond. As with a negative value the shift amount can go up to 32 which is undefined, we shift the value in two steps. Reviewed-by: Richard Henderson Signed-off-by: Aurelien Jarno --- target-sh4/translate.c | 48 ++++++++++++++++++++++-------------------------- 1 file changed, 22 insertions(+), 26 deletions(-) (limited to 'target-sh4') diff --git a/target-sh4/translate.c b/target-sh4/translate.c index ca6ef5aca7..c8dd3a71a0 100644 --- a/target-sh4/translate.c +++ b/target-sh4/translate.c @@ -867,32 +867,28 @@ static void _decode_opc(DisasContext * ctx) return; case 0x400d: /* shld Rm,Rn */ { - TCGLabel *label1 = gen_new_label(); - TCGLabel *label2 = gen_new_label(); - TCGLabel *label3 = gen_new_label(); - TCGv shift; - tcg_gen_brcondi_i32(TCG_COND_LT, REG(B7_4), 0, label1); - /* Rm positive, shift to the left */ - shift = tcg_temp_new(); - tcg_gen_andi_i32(shift, REG(B7_4), 0x1f); - tcg_gen_shl_i32(REG(B11_8), REG(B11_8), shift); - tcg_temp_free(shift); - tcg_gen_br(label3); - /* Rm negative, shift to the right */ - gen_set_label(label1); - shift = tcg_temp_new(); - tcg_gen_andi_i32(shift, REG(B7_4), 0x1f); - tcg_gen_brcondi_i32(TCG_COND_EQ, shift, 0, label2); - tcg_gen_not_i32(shift, REG(B7_4)); - tcg_gen_andi_i32(shift, shift, 0x1f); - tcg_gen_addi_i32(shift, shift, 1); - tcg_gen_shr_i32(REG(B11_8), REG(B11_8), shift); - tcg_temp_free(shift); - tcg_gen_br(label3); - /* Rm = -32 */ - gen_set_label(label2); - tcg_gen_movi_i32(REG(B11_8), 0); - gen_set_label(label3); + TCGv t0 = tcg_temp_new(); + TCGv t1 = tcg_temp_new(); + TCGv t2 = tcg_temp_new(); + + tcg_gen_andi_i32(t0, REG(B7_4), 0x1f); + + /* positive case: shift to the left */ + tcg_gen_shl_i32(t1, REG(B11_8), t0); + + /* negative case: shift to the right in two steps to + correctly handle the -32 case */ + tcg_gen_xori_i32(t0, t0, 0x1f); + tcg_gen_shr_i32(t2, REG(B11_8), t0); + tcg_gen_shri_i32(t2, t2, 1); + + /* select between the two cases */ + tcg_gen_movi_i32(t0, 0); + tcg_gen_movcond_i32(TCG_COND_GE, REG(B11_8), REG(B7_4), t0, t1, t2); + + tcg_temp_free(t0); + tcg_temp_free(t1); + tcg_temp_free(t2); } return; case 0x3008: /* sub Rm,Rn */ -- cgit v1.2.1 From be654c83608eaba199ed45444debf2dd46a88fe6 Mon Sep 17 00:00:00 2001 From: Aurelien Jarno Date: Sun, 5 Jul 2015 22:39:03 +0200 Subject: target-sh4: improve shad instruction The SH4 shad instruction can shift in both direction, depending on the sign of the shift. This is currently implemented using branches, which is not really efficient and prevents the optimizer to do its job. In practice it is often used with a constant loaded in a register just before. Simplify the implementation by computing both the value shifted to the left and to the right, and then selecting the correct one with a movcond. As with a negative value the shift amount can go up to 32 which is undefined, we shift the value in two steps. Reviewed-by: Richard Henderson Signed-off-by: Aurelien Jarno --- target-sh4/translate.c | 53 +++++++++++++++++++++----------------------------- 1 file changed, 22 insertions(+), 31 deletions(-) (limited to 'target-sh4') diff --git a/target-sh4/translate.c b/target-sh4/translate.c index c8dd3a71a0..724c0e7106 100644 --- a/target-sh4/translate.c +++ b/target-sh4/translate.c @@ -832,37 +832,28 @@ static void _decode_opc(DisasContext * ctx) return; case 0x400c: /* shad Rm,Rn */ { - TCGLabel *label1 = gen_new_label(); - TCGLabel *label2 = gen_new_label(); - TCGLabel *label3 = gen_new_label(); - TCGLabel *label4 = gen_new_label(); - TCGv shift; - tcg_gen_brcondi_i32(TCG_COND_LT, REG(B7_4), 0, label1); - /* Rm positive, shift to the left */ - shift = tcg_temp_new(); - tcg_gen_andi_i32(shift, REG(B7_4), 0x1f); - tcg_gen_shl_i32(REG(B11_8), REG(B11_8), shift); - tcg_temp_free(shift); - tcg_gen_br(label4); - /* Rm negative, shift to the right */ - gen_set_label(label1); - shift = tcg_temp_new(); - tcg_gen_andi_i32(shift, REG(B7_4), 0x1f); - tcg_gen_brcondi_i32(TCG_COND_EQ, shift, 0, label2); - tcg_gen_not_i32(shift, REG(B7_4)); - tcg_gen_andi_i32(shift, shift, 0x1f); - tcg_gen_addi_i32(shift, shift, 1); - tcg_gen_sar_i32(REG(B11_8), REG(B11_8), shift); - tcg_temp_free(shift); - tcg_gen_br(label4); - /* Rm = -32 */ - gen_set_label(label2); - tcg_gen_brcondi_i32(TCG_COND_LT, REG(B11_8), 0, label3); - tcg_gen_movi_i32(REG(B11_8), 0); - tcg_gen_br(label4); - gen_set_label(label3); - tcg_gen_movi_i32(REG(B11_8), 0xffffffff); - gen_set_label(label4); + TCGv t0 = tcg_temp_new(); + TCGv t1 = tcg_temp_new(); + TCGv t2 = tcg_temp_new(); + + tcg_gen_andi_i32(t0, REG(B7_4), 0x1f); + + /* positive case: shift to the left */ + tcg_gen_shl_i32(t1, REG(B11_8), t0); + + /* negative case: shift to the right in two steps to + correctly handle the -32 case */ + tcg_gen_xori_i32(t0, t0, 0x1f); + tcg_gen_sar_i32(t2, REG(B11_8), t0); + tcg_gen_sari_i32(t2, t2, 1); + + /* select between the two cases */ + tcg_gen_movi_i32(t0, 0); + tcg_gen_movcond_i32(TCG_COND_GE, REG(B11_8), REG(B7_4), t0, t1, t2); + + tcg_temp_free(t0); + tcg_temp_free(t1); + tcg_temp_free(t2); } return; case 0x400d: /* shld Rm,Rn */ -- cgit v1.2.1