From a7d00d4effb58889ac6df64f98ac50c9d1594149 Mon Sep 17 00:00:00 2001 From: Pranith Kumar Date: Thu, 14 Jul 2016 16:20:14 -0400 Subject: tcg/i386: Add support for fence Generate a 'lock orl $0,0(%esp)' instruction for ordering instead of mfence which has similar ordering semantics. Signed-off-by: Pranith Kumar Message-Id: <20160714202026.9727-3-bobby.prani@gmail.com> Signed-off-by: Richard Henderson --- tcg/i386/tcg-target.inc.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'tcg') diff --git a/tcg/i386/tcg-target.inc.c b/tcg/i386/tcg-target.inc.c index e30a122f19..cf7536bd52 100644 --- a/tcg/i386/tcg-target.inc.c +++ b/tcg/i386/tcg-target.inc.c @@ -686,6 +686,18 @@ static inline void tcg_out_pushi(TCGContext *s, tcg_target_long val) } } +static inline void tcg_out_mb(TCGContext *s, TCGArg a0) +{ + /* Given the strength of x86 memory ordering, we only need care for + store-load ordering. Experimentally, "lock orl $0,0(%esp)" is + faster than "mfence", so don't bother with the sse insn. */ + if (a0 & TCG_MO_ST_LD) { + tcg_out8(s, 0xf0); + tcg_out_modrm_offset(s, OPC_ARITH_EvIb, ARITH_OR, TCG_REG_ESP, 0); + tcg_out8(s, 0); + } +} + static inline void tcg_out_push(TCGContext *s, int reg) { tcg_out_opc(s, OPC_PUSH_r32 + LOWREGMASK(reg), 0, reg, 0); @@ -2131,6 +2143,9 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, } break; + case INDEX_op_mb: + tcg_out_mb(s, args[0]); + break; case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ case INDEX_op_mov_i64: case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */ @@ -2196,6 +2211,8 @@ static const TCGTargetOpDef x86_op_defs[] = { { INDEX_op_add2_i32, { "r", "r", "0", "1", "ri", "ri" } }, { INDEX_op_sub2_i32, { "r", "r", "0", "1", "ri", "ri" } }, + { INDEX_op_mb, { } }, + #if TCG_TARGET_REG_BITS == 32 { INDEX_op_brcond2_i32, { "r", "r", "ri", "ri" } }, { INDEX_op_setcond2_i32, { "r", "r", "r", "ri", "ri" } }, -- cgit v1.2.1