summaryrefslogtreecommitdiff
path: root/tcg
diff options
context:
space:
mode:
Diffstat (limited to 'tcg')
-rw-r--r--tcg/arm/tcg-target.c2
-rw-r--r--tcg/i386/tcg-target.c156
-rw-r--r--tcg/i386/tcg-target.h9
-rw-r--r--tcg/optimize.c165
-rw-r--r--tcg/tcg.c2
5 files changed, 282 insertions, 52 deletions
diff --git a/tcg/arm/tcg-target.c b/tcg/arm/tcg-target.c
index 82658a170c..c8884b31f4 100644
--- a/tcg/arm/tcg-target.c
+++ b/tcg/arm/tcg-target.c
@@ -1866,7 +1866,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
SHIFT_IMM_ROR((0x20 - args[2]) & 0x1f) :
SHIFT_IMM_LSL(0));
} else {
- tcg_out_dat_imm(s, COND_AL, ARITH_RSB, TCG_REG_TMP, args[1], 0x20);
+ tcg_out_dat_imm(s, COND_AL, ARITH_RSB, TCG_REG_TMP, args[2], 0x20);
tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, args[1],
SHIFT_REG_ROR(TCG_REG_TMP));
}
diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
index 5d4cf9386e..fef1717418 100644
--- a/tcg/i386/tcg-target.c
+++ b/tcg/i386/tcg-target.c
@@ -88,6 +88,11 @@ static const int tcg_target_call_oarg_regs[] = {
#endif
};
+/* Constants we accept. */
+#define TCG_CT_CONST_S32 0x100
+#define TCG_CT_CONST_U32 0x200
+#define TCG_CT_CONST_I32 0x400
+
/* Registers used with L constraint, which are the first argument
registers on x86_64, and two random call clobbered registers on
i386. */
@@ -124,6 +129,16 @@ static bool have_movbe;
# define have_movbe 0
#endif
+/* We need this symbol in tcg-target.h, and we can't properly conditionalize
+ it there. Therefore we always define the variable. */
+bool have_bmi1;
+
+#if defined(CONFIG_CPUID_H) && defined(bit_BMI2)
+static bool have_bmi2;
+#else
+# define have_bmi2 0
+#endif
+
static uint8_t *tb_ret_addr;
static void patch_reloc(uint8_t *code_ptr, int type,
@@ -166,6 +181,7 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
tcg_regset_set_reg(ct->u.regs, TCG_REG_EBX);
break;
case 'c':
+ case_c:
ct->ct |= TCG_CT_REG;
tcg_regset_set_reg(ct->u.regs, TCG_REG_ECX);
break;
@@ -194,6 +210,7 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
tcg_regset_set32(ct->u.regs, 0, 0xf);
break;
case 'r':
+ case_r:
ct->ct |= TCG_CT_REG;
if (TCG_TARGET_REG_BITS == 64) {
tcg_regset_set32(ct->u.regs, 0, 0xffff);
@@ -201,6 +218,13 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
tcg_regset_set32(ct->u.regs, 0, 0xff);
}
break;
+ case 'C':
+ /* With SHRX et al, we need not use ECX as shift count register. */
+ if (have_bmi2) {
+ goto case_r;
+ } else {
+ goto case_c;
+ }
/* qemu_ld/st address constraint */
case 'L':
@@ -220,6 +244,9 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
case 'Z':
ct->ct |= TCG_CT_CONST_U32;
break;
+ case 'I':
+ ct->ct |= TCG_CT_CONST_I32;
+ break;
default:
return -1;
@@ -243,6 +270,9 @@ static inline int tcg_target_const_match(tcg_target_long val,
if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) {
return 1;
}
+ if ((ct & TCG_CT_CONST_I32) && ~val == (int32_t)~val) {
+ return 1;
+ }
return 0;
}
@@ -268,10 +298,13 @@ static inline int tcg_target_const_match(tcg_target_long val,
# define P_REXB_RM 0
# define P_GS 0
#endif
+#define P_SIMDF3 0x10000 /* 0xf3 opcode prefix */
+#define P_SIMDF2 0x20000 /* 0xf2 opcode prefix */
#define OPC_ARITH_EvIz (0x81)
#define OPC_ARITH_EvIb (0x83)
#define OPC_ARITH_GvEv (0x03) /* ... plus (ARITH_FOO << 3) */
+#define OPC_ANDN (0xf2 | P_EXT38)
#define OPC_ADD_GvEv (OPC_ARITH_GvEv | (ARITH_ADD << 3))
#define OPC_BSWAP (0xc8 | P_EXT)
#define OPC_CALL_Jz (0xe8)
@@ -309,6 +342,9 @@ static inline int tcg_target_const_match(tcg_target_long val,
#define OPC_SHIFT_1 (0xd1)
#define OPC_SHIFT_Ib (0xc1)
#define OPC_SHIFT_cl (0xd3)
+#define OPC_SARX (0xf7 | P_EXT38 | P_SIMDF3)
+#define OPC_SHLX (0xf7 | P_EXT38 | P_DATA16)
+#define OPC_SHRX (0xf7 | P_EXT38 | P_SIMDF2)
#define OPC_TESTL (0x85)
#define OPC_XCHG_ax_r32 (0x90)
@@ -398,9 +434,9 @@ static void tcg_out_opc(TCGContext *s, int opc, int r, int rm, int x)
rex = 0;
rex |= (opc & P_REXW) ? 0x8 : 0x0; /* REX.W */
- rex |= (r & 8) >> 1; /* REX.R */
- rex |= (x & 8) >> 2; /* REX.X */
- rex |= (rm & 8) >> 3; /* REX.B */
+ rex |= (r & 8) >> 1; /* REX.R */
+ rex |= (x & 8) >> 2; /* REX.X */
+ rex |= (rm & 8) >> 3; /* REX.B */
/* P_REXB_{R,RM} indicates that the given register is the low byte.
For %[abcd]l we need no REX prefix, but for %{si,di,bp,sp}l we do,
@@ -449,6 +485,48 @@ static void tcg_out_modrm(TCGContext *s, int opc, int r, int rm)
tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
}
+static void tcg_out_vex_modrm(TCGContext *s, int opc, int r, int v, int rm)
+{
+ int tmp;
+
+ if ((opc & (P_REXW | P_EXT | P_EXT38)) || (rm & 8)) {
+ /* Three byte VEX prefix. */
+ tcg_out8(s, 0xc4);
+
+ /* VEX.m-mmmm */
+ if (opc & P_EXT38) {
+ tmp = 2;
+ } else if (opc & P_EXT) {
+ tmp = 1;
+ } else {
+ tcg_abort();
+ }
+ tmp |= 0x40; /* VEX.X */
+ tmp |= (r & 8 ? 0 : 0x80); /* VEX.R */
+ tmp |= (rm & 8 ? 0 : 0x20); /* VEX.B */
+ tcg_out8(s, tmp);
+
+ tmp = (opc & P_REXW ? 0x80 : 0); /* VEX.W */
+ } else {
+ /* Two byte VEX prefix. */
+ tcg_out8(s, 0xc5);
+
+ tmp = (r & 8 ? 0 : 0x80); /* VEX.R */
+ }
+ /* VEX.pp */
+ if (opc & P_DATA16) {
+ tmp |= 1; /* 0x66 */
+ } else if (opc & P_SIMDF3) {
+ tmp |= 2; /* 0xf3 */
+ } else if (opc & P_SIMDF2) {
+ tmp |= 3; /* 0xf2 */
+ }
+ tmp |= (~v & 15) << 3; /* VEX.vvvv */
+ tcg_out8(s, tmp);
+ tcg_out8(s, opc);
+ tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
+}
+
/* Output an opcode with a full "rm + (index<<shift) + offset" address mode.
We handle either RM and INDEX missing with a negative value. In 64-bit
mode for absolute addresses, ~RM is the size of the immediate operand
@@ -1638,7 +1716,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
const TCGArg *args, const int *const_args)
{
- int c, rexw = 0;
+ int c, vexop, rexw = 0;
#if TCG_TARGET_REG_BITS == 64
# define OP_32_64(x) \
@@ -1774,6 +1852,16 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
}
break;
+ OP_32_64(andc):
+ if (const_args[2]) {
+ tcg_out_mov(s, rexw ? TCG_TYPE_I64 : TCG_TYPE_I32,
+ args[0], args[1]);
+ tgen_arithi(s, ARITH_AND + rexw, args[0], ~args[2], 0);
+ } else {
+ tcg_out_vex_modrm(s, OPC_ANDN + rexw, args[0], args[2], args[1]);
+ }
+ break;
+
OP_32_64(mul):
if (const_args[2]) {
int32_t val;
@@ -1799,19 +1887,28 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
OP_32_64(shl):
c = SHIFT_SHL;
- goto gen_shift;
+ vexop = OPC_SHLX;
+ goto gen_shift_maybe_vex;
OP_32_64(shr):
c = SHIFT_SHR;
- goto gen_shift;
+ vexop = OPC_SHRX;
+ goto gen_shift_maybe_vex;
OP_32_64(sar):
c = SHIFT_SAR;
- goto gen_shift;
+ vexop = OPC_SARX;
+ goto gen_shift_maybe_vex;
OP_32_64(rotl):
c = SHIFT_ROL;
goto gen_shift;
OP_32_64(rotr):
c = SHIFT_ROR;
goto gen_shift;
+ gen_shift_maybe_vex:
+ if (have_bmi2 && !const_args[2]) {
+ tcg_out_vex_modrm(s, vexop + rexw, args[0], args[2], args[1]);
+ break;
+ }
+ /* FALLTHRU */
gen_shift:
if (const_args[2]) {
tcg_out_shifti(s, c + rexw, args[0], args[2]);
@@ -2002,10 +2099,11 @@ static const TCGTargetOpDef x86_op_defs[] = {
{ INDEX_op_and_i32, { "r", "0", "ri" } },
{ INDEX_op_or_i32, { "r", "0", "ri" } },
{ INDEX_op_xor_i32, { "r", "0", "ri" } },
+ { INDEX_op_andc_i32, { "r", "r", "ri" } },
- { INDEX_op_shl_i32, { "r", "0", "ci" } },
- { INDEX_op_shr_i32, { "r", "0", "ci" } },
- { INDEX_op_sar_i32, { "r", "0", "ci" } },
+ { INDEX_op_shl_i32, { "r", "0", "Ci" } },
+ { INDEX_op_shr_i32, { "r", "0", "Ci" } },
+ { INDEX_op_sar_i32, { "r", "0", "Ci" } },
{ INDEX_op_rotl_i32, { "r", "0", "ci" } },
{ INDEX_op_rotr_i32, { "r", "0", "ci" } },
@@ -2059,10 +2157,11 @@ static const TCGTargetOpDef x86_op_defs[] = {
{ INDEX_op_and_i64, { "r", "0", "reZ" } },
{ INDEX_op_or_i64, { "r", "0", "re" } },
{ INDEX_op_xor_i64, { "r", "0", "re" } },
+ { INDEX_op_andc_i64, { "r", "r", "rI" } },
- { INDEX_op_shl_i64, { "r", "0", "ci" } },
- { INDEX_op_shr_i64, { "r", "0", "ci" } },
- { INDEX_op_sar_i64, { "r", "0", "ci" } },
+ { INDEX_op_shl_i64, { "r", "0", "Ci" } },
+ { INDEX_op_shr_i64, { "r", "0", "Ci" } },
+ { INDEX_op_sar_i64, { "r", "0", "Ci" } },
{ INDEX_op_rotl_i64, { "r", "0", "ci" } },
{ INDEX_op_rotr_i64, { "r", "0", "ci" } },
@@ -2196,25 +2295,34 @@ static void tcg_target_qemu_prologue(TCGContext *s)
static void tcg_target_init(TCGContext *s)
{
-#if !(defined(have_cmov) && defined(have_movbe))
- {
- unsigned a, b, c, d;
- int ret = __get_cpuid(1, &a, &b, &c, &d);
+ unsigned a, b, c, d;
+ int max = __get_cpuid_max(0, 0);
-# ifndef have_cmov
+ if (max >= 1) {
+ __cpuid(1, a, b, c, d);
+#ifndef have_cmov
/* For 32-bit, 99% certainty that we're running on hardware that
supports cmov, but we still need to check. In case cmov is not
available, we'll use a small forward branch. */
- have_cmov = ret && (d & bit_CMOV);
-# endif
-
-# ifndef have_movbe
+ have_cmov = (d & bit_CMOV) != 0;
+#endif
+#ifndef have_movbe
/* MOVBE is only available on Intel Atom and Haswell CPUs, so we
need to probe for it. */
- have_movbe = ret && (c & bit_MOVBE);
-# endif
+ have_movbe = (c & bit_MOVBE) != 0;
+#endif
}
+
+ if (max >= 7) {
+ /* BMI1 is available on AMD Piledriver and Intel Haswell CPUs. */
+ __cpuid_count(7, 0, a, b, c, d);
+#ifdef bit_BMI
+ have_bmi1 = (b & bit_BMI) != 0;
#endif
+#ifndef have_bmi2
+ have_bmi2 = (b & bit_BMI2) != 0;
+#endif
+ }
if (TCG_TARGET_REG_BITS == 64) {
tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffff);
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
index 92c0fcd36d..bdf2222452 100644
--- a/tcg/i386/tcg-target.h
+++ b/tcg/i386/tcg-target.h
@@ -64,9 +64,6 @@ typedef enum {
TCG_REG_RDI = TCG_REG_EDI,
} TCGReg;
-#define TCG_CT_CONST_S32 0x100
-#define TCG_CT_CONST_U32 0x200
-
/* used for function call generation */
#define TCG_REG_CALL_STACK TCG_REG_ESP
#define TCG_TARGET_STACK_ALIGN 16
@@ -76,6 +73,8 @@ typedef enum {
#define TCG_TARGET_CALL_STACK_OFFSET 0
#endif
+extern bool have_bmi1;
+
/* optional instructions */
#define TCG_TARGET_HAS_div2_i32 1
#define TCG_TARGET_HAS_rot_i32 1
@@ -87,7 +86,7 @@ typedef enum {
#define TCG_TARGET_HAS_bswap32_i32 1
#define TCG_TARGET_HAS_neg_i32 1
#define TCG_TARGET_HAS_not_i32 1
-#define TCG_TARGET_HAS_andc_i32 0
+#define TCG_TARGET_HAS_andc_i32 have_bmi1
#define TCG_TARGET_HAS_orc_i32 0
#define TCG_TARGET_HAS_eqv_i32 0
#define TCG_TARGET_HAS_nand_i32 0
@@ -115,7 +114,7 @@ typedef enum {
#define TCG_TARGET_HAS_bswap64_i64 1
#define TCG_TARGET_HAS_neg_i64 1
#define TCG_TARGET_HAS_not_i64 1
-#define TCG_TARGET_HAS_andc_i64 0
+#define TCG_TARGET_HAS_andc_i64 have_bmi1
#define TCG_TARGET_HAS_orc_i64 0
#define TCG_TARGET_HAS_eqv_i64 0
#define TCG_TARGET_HAS_nand_i64 0
diff --git a/tcg/optimize.c b/tcg/optimize.c
index 89e2d6a3b3..7777743e88 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -655,11 +655,68 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
}
}
break;
+ CASE_OP_32_64(xor):
+ CASE_OP_32_64(nand):
+ if (temps[args[1]].state != TCG_TEMP_CONST
+ && temps[args[2]].state == TCG_TEMP_CONST
+ && temps[args[2]].val == -1) {
+ i = 1;
+ goto try_not;
+ }
+ break;
+ CASE_OP_32_64(nor):
+ if (temps[args[1]].state != TCG_TEMP_CONST
+ && temps[args[2]].state == TCG_TEMP_CONST
+ && temps[args[2]].val == 0) {
+ i = 1;
+ goto try_not;
+ }
+ break;
+ CASE_OP_32_64(andc):
+ if (temps[args[2]].state != TCG_TEMP_CONST
+ && temps[args[1]].state == TCG_TEMP_CONST
+ && temps[args[1]].val == -1) {
+ i = 2;
+ goto try_not;
+ }
+ break;
+ CASE_OP_32_64(orc):
+ CASE_OP_32_64(eqv):
+ if (temps[args[2]].state != TCG_TEMP_CONST
+ && temps[args[1]].state == TCG_TEMP_CONST
+ && temps[args[1]].val == 0) {
+ i = 2;
+ goto try_not;
+ }
+ break;
+ try_not:
+ {
+ TCGOpcode not_op;
+ bool have_not;
+
+ if (def->flags & TCG_OPF_64BIT) {
+ not_op = INDEX_op_not_i64;
+ have_not = TCG_TARGET_HAS_not_i64;
+ } else {
+ not_op = INDEX_op_not_i32;
+ have_not = TCG_TARGET_HAS_not_i32;
+ }
+ if (!have_not) {
+ break;
+ }
+ s->gen_opc_buf[op_index] = not_op;
+ reset_temp(args[0]);
+ gen_args[0] = args[0];
+ gen_args[1] = args[i];
+ args += 3;
+ gen_args += 2;
+ continue;
+ }
default:
break;
}
- /* Simplify expression for "op r, a, 0 => mov r, a" cases */
+ /* Simplify expression for "op r, a, const => mov r, a" cases */
switch (op) {
CASE_OP_32_64(add):
CASE_OP_32_64(sub):
@@ -670,28 +727,38 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
CASE_OP_32_64(rotr):
CASE_OP_32_64(or):
CASE_OP_32_64(xor):
- if (temps[args[1]].state == TCG_TEMP_CONST) {
- /* Proceed with possible constant folding. */
- break;
- }
- if (temps[args[2]].state == TCG_TEMP_CONST
+ CASE_OP_32_64(andc):
+ if (temps[args[1]].state != TCG_TEMP_CONST
+ && temps[args[2]].state == TCG_TEMP_CONST
&& temps[args[2]].val == 0) {
- if (temps_are_copies(args[0], args[1])) {
- s->gen_opc_buf[op_index] = INDEX_op_nop;
- } else {
- s->gen_opc_buf[op_index] = op_to_mov(op);
- tcg_opt_gen_mov(s, gen_args, args[0], args[1]);
- gen_args += 2;
- }
- args += 3;
- continue;
+ goto do_mov3;
+ }
+ break;
+ CASE_OP_32_64(and):
+ CASE_OP_32_64(orc):
+ CASE_OP_32_64(eqv):
+ if (temps[args[1]].state != TCG_TEMP_CONST
+ && temps[args[2]].state == TCG_TEMP_CONST
+ && temps[args[2]].val == -1) {
+ goto do_mov3;
}
break;
+ do_mov3:
+ if (temps_are_copies(args[0], args[1])) {
+ s->gen_opc_buf[op_index] = INDEX_op_nop;
+ } else {
+ s->gen_opc_buf[op_index] = op_to_mov(op);
+ tcg_opt_gen_mov(s, gen_args, args[0], args[1]);
+ gen_args += 2;
+ }
+ args += 3;
+ continue;
default:
break;
}
- /* Simplify using known-zero bits */
+ /* Simplify using known-zero bits. Currently only ops with a single
+ output argument is supported. */
mask = -1;
affected = -1;
switch (op) {
@@ -726,16 +793,36 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
mask = temps[args[1]].mask & mask;
break;
- CASE_OP_32_64(sar):
+ CASE_OP_32_64(andc):
+ /* Known-zeros does not imply known-ones. Therefore unless
+ args[2] is constant, we can't infer anything from it. */
if (temps[args[2]].state == TCG_TEMP_CONST) {
- mask = ((tcg_target_long)temps[args[1]].mask
- >> temps[args[2]].val);
+ mask = ~temps[args[2]].mask;
+ goto and_const;
}
+ /* But we certainly know nothing outside args[1] may be set. */
+ mask = temps[args[1]].mask;
break;
- CASE_OP_32_64(shr):
+ case INDEX_op_sar_i32:
if (temps[args[2]].state == TCG_TEMP_CONST) {
- mask = temps[args[1]].mask >> temps[args[2]].val;
+ mask = (int32_t)temps[args[1]].mask >> temps[args[2]].val;
+ }
+ break;
+ case INDEX_op_sar_i64:
+ if (temps[args[2]].state == TCG_TEMP_CONST) {
+ mask = (int64_t)temps[args[1]].mask >> temps[args[2]].val;
+ }
+ break;
+
+ case INDEX_op_shr_i32:
+ if (temps[args[2]].state == TCG_TEMP_CONST) {
+ mask = (uint32_t)temps[args[1]].mask >> temps[args[2]].val;
+ }
+ break;
+ case INDEX_op_shr_i64:
+ if (temps[args[2]].state == TCG_TEMP_CONST) {
+ mask = (uint64_t)temps[args[1]].mask >> temps[args[2]].val;
}
break;
@@ -769,10 +856,40 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
mask = temps[args[3]].mask | temps[args[4]].mask;
break;
+ CASE_OP_32_64(ld8u):
+ case INDEX_op_qemu_ld8u:
+ mask = 0xff;
+ break;
+ CASE_OP_32_64(ld16u):
+ case INDEX_op_qemu_ld16u:
+ mask = 0xffff;
+ break;
+ case INDEX_op_ld32u_i64:
+#if TCG_TARGET_REG_BITS == 64
+ case INDEX_op_qemu_ld32u:
+#endif
+ mask = 0xffffffffu;
+ break;
+
+ CASE_OP_32_64(qemu_ld):
+ {
+ TCGMemOp mop = args[def->nb_oargs + def->nb_iargs];
+ if (!(mop & MO_SIGN)) {
+ mask = (2ULL << ((8 << (mop & MO_SIZE)) - 1)) - 1;
+ }
+ }
+ break;
+
default:
break;
}
+ /* 32-bit ops (non 64-bit ops and non load/store ops) generate 32-bit
+ results */
+ if (!(def->flags & (TCG_OPF_CALL_CLOBBER | TCG_OPF_64BIT))) {
+ mask &= 0xffffffffu;
+ }
+
if (mask == 0) {
assert(def->nb_oargs == 1);
s->gen_opc_buf[op_index] = op_to_movi(op);
@@ -839,6 +956,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
/* Simplify expression for "op r, a, a => movi r, 0" cases */
switch (op) {
+ CASE_OP_32_64(andc):
CASE_OP_32_64(sub):
CASE_OP_32_64(xor):
if (temps_are_copies(args[1], args[2])) {
@@ -1140,6 +1258,11 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
} else {
for (i = 0; i < def->nb_oargs; i++) {
reset_temp(args[i]);
+ /* Save the corresponding known-zero bits mask for the
+ first output argument (only one supported so far). */
+ if (i == 0) {
+ temps[args[i]].mask = mask;
+ }
}
}
for (i = 0; i < def->nb_args; i++) {
diff --git a/tcg/tcg.c b/tcg/tcg.c
index acd02b99b6..ffc851e0c6 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -526,7 +526,7 @@ static inline int tcg_temp_new_internal(TCGType type, int temp_local)
ts->temp_local = temp_local;
ts->name = NULL;
ts++;
- ts->base_type = TCG_TYPE_I32;
+ ts->base_type = type;
ts->type = TCG_TYPE_I32;
ts->temp_allocated = 1;
ts->temp_local = temp_local;