summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--hw/ide/macio.c1
-rw-r--r--hw/ppc/spapr.c24
-rw-r--r--hw/ppc/spapr_cpu_core.c7
-rw-r--r--hw/ppc/spapr_iommu.c4
-rw-r--r--include/exec/gen-icount.h2
-rw-r--r--include/hw/ppc/spapr.h1
-rw-r--r--include/qemu/log.h3
-rw-r--r--tcg/optimize.c37
-rw-r--r--tcg/tcg-op.c2
-rw-r--r--tcg/tcg.c588
-rw-r--r--tcg/tcg.h52
-rw-r--r--util/log.c24
12 files changed, 456 insertions, 289 deletions
diff --git a/hw/ide/macio.c b/hw/ide/macio.c
index 5a326afd96..76f97c2539 100644
--- a/hw/ide/macio.c
+++ b/hw/ide/macio.c
@@ -273,6 +273,7 @@ static void pmac_ide_atapi_transfer_cb(void *opaque, int ret)
s->io_buffer_size = MIN(s->io_buffer_size, io->len);
dma_memory_write(&address_space_memory, io->addr, s->io_buffer,
s->io_buffer_size);
+ io->len = 0;
ide_atapi_cmd_ok(s);
m->dma_active = false;
goto done;
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index bce237189d..57564e5b8e 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -938,6 +938,7 @@ static void spapr_finalize_fdt(sPAPRMachineState *spapr,
hwaddr rtas_size)
{
MachineState *machine = MACHINE(qdev_get_machine());
+ MachineClass *mc = MACHINE_GET_CLASS(machine);
sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(machine);
const char *boot_device = machine->boot_order;
int ret, i;
@@ -1020,7 +1021,7 @@ static void spapr_finalize_fdt(sPAPRMachineState *spapr,
_FDT(spapr_drc_populate_dt(fdt, 0, NULL, SPAPR_DR_CONNECTOR_TYPE_LMB));
}
- if (smc->dr_cpu_enabled) {
+ if (mc->query_hotpluggable_cpus) {
int offset = fdt_path_offset(fdt, "/cpus");
ret = spapr_drc_populate_dt(fdt, offset, NULL,
SPAPR_DR_CONNECTOR_TYPE_CPU);
@@ -1712,6 +1713,7 @@ static void spapr_validate_node_memory(MachineState *machine, Error **errp)
static void ppc_spapr_init(MachineState *machine)
{
sPAPRMachineState *spapr = SPAPR_MACHINE(machine);
+ MachineClass *mc = MACHINE_GET_CLASS(machine);
sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(machine);
const char *kernel_filename = machine->kernel_filename;
const char *kernel_cmdline = machine->kernel_cmdline;
@@ -1733,7 +1735,7 @@ static void ppc_spapr_init(MachineState *machine)
int spapr_cores = smp_cpus / smp_threads;
int spapr_max_cores = max_cpus / smp_threads;
- if (smc->dr_cpu_enabled) {
+ if (mc->query_hotpluggable_cpus) {
if (smp_cpus % smp_threads) {
error_report("smp_cpus (%u) must be multiple of threads (%u)",
smp_cpus, smp_threads);
@@ -1810,7 +1812,7 @@ static void ppc_spapr_init(MachineState *machine)
machine->cpu_model = kvm_enabled() ? "host" : "POWER7";
}
- if (smc->dr_cpu_enabled) {
+ if (mc->query_hotpluggable_cpus) {
char *type = spapr_get_cpu_core_type(machine->cpu_model);
spapr->cores = g_new0(Object *, spapr_max_cores);
@@ -2333,12 +2335,12 @@ static void spapr_machine_device_plug(HotplugHandler *hotplug_dev,
static void spapr_machine_device_unplug(HotplugHandler *hotplug_dev,
DeviceState *dev, Error **errp)
{
- sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(qdev_get_machine());
+ MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine());
if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) {
error_setg(errp, "Memory hot unplug not supported by sPAPR");
} else if (object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_CPU_CORE)) {
- if (!smc->dr_cpu_enabled) {
+ if (!mc->query_hotpluggable_cpus) {
error_setg(errp, "CPU hot unplug not supported on this machine");
return;
}
@@ -2376,11 +2378,8 @@ static HotpluggableCPUList *spapr_query_hotpluggable_cpus(MachineState *machine)
int i;
HotpluggableCPUList *head = NULL;
sPAPRMachineState *spapr = SPAPR_MACHINE(machine);
- sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(machine);
int spapr_max_cores = max_cpus / smp_threads;
- g_assert(smc->dr_cpu_enabled);
-
for (i = 0; i < spapr_max_cores; i++) {
HotpluggableCPUList *list_item = g_new0(typeof(*list_item), 1);
HotpluggableCPU *cpu_item = g_new0(typeof(*cpu_item), 1);
@@ -2435,12 +2434,9 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
hc->plug = spapr_machine_device_plug;
hc->unplug = spapr_machine_device_unplug;
mc->cpu_index_to_socket_id = spapr_cpu_index_to_socket_id;
- if (smc->dr_cpu_enabled) {
- mc->query_hotpluggable_cpus = spapr_query_hotpluggable_cpus;
- }
smc->dr_lmb_enabled = true;
- smc->dr_cpu_enabled = true;
+ mc->query_hotpluggable_cpus = spapr_query_hotpluggable_cpus;
fwc->get_dev_path = spapr_get_fw_dev_path;
nc->nmi_monitor_handler = spapr_nmi;
}
@@ -2521,10 +2517,8 @@ static void spapr_machine_2_6_instance_options(MachineState *machine)
static void spapr_machine_2_6_class_options(MachineClass *mc)
{
- sPAPRMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
-
spapr_machine_2_7_class_options(mc);
- smc->dr_cpu_enabled = false;
+ mc->query_hotpluggable_cpus = NULL;
SET_MACHINE_COMPAT(mc, SPAPR_COMPAT_2_6);
}
diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c
index 170ed154a6..704803ac23 100644
--- a/hw/ppc/spapr_cpu_core.c
+++ b/hw/ppc/spapr_cpu_core.c
@@ -153,7 +153,6 @@ void spapr_core_unplug(HotplugHandler *hotplug_dev, DeviceState *dev,
void spapr_core_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
Error **errp)
{
- sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(OBJECT(hotplug_dev));
sPAPRMachineState *spapr = SPAPR_MACHINE(OBJECT(hotplug_dev));
sPAPRCPUCore *core = SPAPR_CPU_CORE(OBJECT(dev));
CPUCore *cc = CPU_CORE(dev);
@@ -166,8 +165,6 @@ void spapr_core_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
int index = cc->core_id / smp_threads;
int smt = kvmppc_smt_threads();
- g_assert(smc->dr_cpu_enabled);
-
drc = spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_CPU, index * smt);
spapr->cores[index] = OBJECT(dev);
@@ -209,7 +206,7 @@ void spapr_core_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
Error **errp)
{
MachineState *machine = MACHINE(OBJECT(hotplug_dev));
- sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(OBJECT(hotplug_dev));
+ MachineClass *mc = MACHINE_GET_CLASS(hotplug_dev);
sPAPRMachineState *spapr = SPAPR_MACHINE(OBJECT(hotplug_dev));
int spapr_max_cores = max_cpus / smp_threads;
int index;
@@ -218,7 +215,7 @@ void spapr_core_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
char *base_core_type = spapr_get_cpu_core_type(machine->cpu_model);
const char *type = object_get_typename(OBJECT(dev));
- if (!smc->dr_cpu_enabled) {
+ if (!mc->query_hotpluggable_cpus) {
error_setg(&local_err, "CPU hotplug not supported for this machine");
goto out;
}
diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
index d57b05d5c0..6bc4d4db33 100644
--- a/hw/ppc/spapr_iommu.c
+++ b/hw/ppc/spapr_iommu.c
@@ -385,7 +385,9 @@ static void spapr_tce_reset(DeviceState *dev)
sPAPRTCETable *tcet = SPAPR_TCE_TABLE(dev);
size_t table_size = tcet->nb_table * sizeof(uint64_t);
- memset(tcet->table, 0, table_size);
+ if (tcet->nb_table) {
+ memset(tcet->table, 0, table_size);
+ }
}
static target_ulong put_tce_emu(sPAPRTCETable *tcet, target_ulong ioba,
diff --git a/include/exec/gen-icount.h b/include/exec/gen-icount.h
index 1af03d8f23..050de59b38 100644
--- a/include/exec/gen-icount.h
+++ b/include/exec/gen-icount.h
@@ -59,7 +59,7 @@ static void gen_tb_end(TranslationBlock *tb, int num_insns)
}
/* Terminate the linked list. */
- tcg_ctx.gen_op_buf[tcg_ctx.gen_last_op_idx].next = -1;
+ tcg_ctx.gen_op_buf[tcg_ctx.gen_op_buf[0].prev].next = 0;
}
static inline void gen_io_start(void)
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index bd8ac28160..caf7be919f 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -38,7 +38,6 @@ struct sPAPRMachineClass {
/*< public >*/
bool dr_lmb_enabled; /* enable dynamic-reconfig/hotplug of LMBs */
- bool dr_cpu_enabled; /* enable dynamic-reconfig/hotplug of CPUs */
bool use_ohci_by_default; /* use USB-OHCI instead of XHCI */
};
diff --git a/include/qemu/log.h b/include/qemu/log.h
index 8bec6b4039..00bf37fc0f 100644
--- a/include/qemu/log.h
+++ b/include/qemu/log.h
@@ -42,6 +42,7 @@ static inline bool qemu_log_separate(void)
#define CPU_LOG_TB_NOCHAIN (1 << 13)
#define CPU_LOG_PAGE (1 << 14)
#define LOG_TRACE (1 << 15)
+#define CPU_LOG_TB_OP_IND (1 << 16)
/* Returns true if a bit is set in the current loglevel mask
*/
@@ -54,7 +55,7 @@ static inline bool qemu_loglevel_mask(int mask)
/* main logging function
*/
-void GCC_FMT_ATTR(1, 2) qemu_log(const char *fmt, ...);
+int GCC_FMT_ATTR(1, 2) qemu_log(const char *fmt, ...);
/* vfprintf-like logging function
*/
diff --git a/tcg/optimize.c b/tcg/optimize.c
index c0d975b3d9..cffe89b525 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -82,37 +82,6 @@ static void init_temp_info(TCGArg temp)
}
}
-static TCGOp *insert_op_before(TCGContext *s, TCGOp *old_op,
- TCGOpcode opc, int nargs)
-{
- int oi = s->gen_next_op_idx;
- int pi = s->gen_next_parm_idx;
- int prev = old_op->prev;
- int next = old_op - s->gen_op_buf;
- TCGOp *new_op;
-
- tcg_debug_assert(oi < OPC_BUF_SIZE);
- tcg_debug_assert(pi + nargs <= OPPARAM_BUF_SIZE);
- s->gen_next_op_idx = oi + 1;
- s->gen_next_parm_idx = pi + nargs;
-
- new_op = &s->gen_op_buf[oi];
- *new_op = (TCGOp){
- .opc = opc,
- .args = pi,
- .prev = prev,
- .next = next
- };
- if (prev >= 0) {
- s->gen_op_buf[prev].next = oi;
- } else {
- s->gen_first_op_idx = oi;
- }
- old_op->prev = oi;
-
- return new_op;
-}
-
static int op_bits(TCGOpcode op)
{
const TCGOpDef *def = &tcg_op_defs[op];
@@ -583,7 +552,7 @@ void tcg_optimize(TCGContext *s)
nb_globals = s->nb_globals;
reset_all_temps(nb_temps);
- for (oi = s->gen_first_op_idx; oi >= 0; oi = oi_next) {
+ for (oi = s->gen_op_buf[0].next; oi != 0; oi = oi_next) {
tcg_target_ulong mask, partmask, affected;
int nb_oargs, nb_iargs, i;
TCGArg tmp;
@@ -1120,7 +1089,7 @@ void tcg_optimize(TCGContext *s)
uint64_t a = ((uint64_t)ah << 32) | al;
uint64_t b = ((uint64_t)bh << 32) | bl;
TCGArg rl, rh;
- TCGOp *op2 = insert_op_before(s, op, INDEX_op_movi_i32, 2);
+ TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_movi_i32, 2);
TCGArg *args2 = &s->gen_opparam_buf[op2->args];
if (opc == INDEX_op_add2_i32) {
@@ -1146,7 +1115,7 @@ void tcg_optimize(TCGContext *s)
uint32_t b = temps[args[3]].val;
uint64_t r = (uint64_t)a * b;
TCGArg rl, rh;
- TCGOp *op2 = insert_op_before(s, op, INDEX_op_movi_i32, 2);
+ TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_movi_i32, 2);
TCGArg *args2 = &s->gen_opparam_buf[op2->args];
rl = args[0];
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
index 293b854370..0243c99094 100644
--- a/tcg/tcg-op.c
+++ b/tcg/tcg-op.c
@@ -52,7 +52,7 @@ static void tcg_emit_op(TCGContext *ctx, TCGOpcode opc, int args)
int pi = oi - 1;
tcg_debug_assert(oi < OPC_BUF_SIZE);
- ctx->gen_last_op_idx = oi;
+ ctx->gen_op_buf[0].prev = oi;
ctx->gen_next_op_idx = ni;
ctx->gen_op_buf[oi] = (TCGOp){
diff --git a/tcg/tcg.c b/tcg/tcg.c
index 0c46c43cfa..42417bdc92 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -23,7 +23,6 @@
*/
/* define it to use liveness analysis (better code) */
-#define USE_LIVENESS_ANALYSIS
#define USE_TCG_OPTIMIZATIONS
#include "qemu/osdep.h"
@@ -333,7 +332,7 @@ void tcg_context_init(TCGContext *s)
memset(s, 0, sizeof(*s));
s->nb_globals = 0;
-
+
/* Count total number of arguments and allocate the corresponding
space */
total_args = 0;
@@ -438,9 +437,9 @@ void tcg_func_start(TCGContext *s)
s->goto_tb_issue_mask = 0;
#endif
- s->gen_first_op_idx = 0;
- s->gen_last_op_idx = -1;
- s->gen_next_op_idx = 0;
+ s->gen_op_buf[0].next = 1;
+ s->gen_op_buf[0].prev = 0;
+ s->gen_next_op_idx = 1;
s->gen_next_parm_idx = 0;
s->be = tcg_malloc(sizeof(TCGBackendData));
@@ -532,8 +531,12 @@ int tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
#endif
if (!base_ts->fixed_reg) {
- indirect_reg = 1;
+ /* We do not support double-indirect registers. */
+ tcg_debug_assert(!base_ts->indirect_reg);
base_ts->indirect_base = 1;
+ s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
+ ? 2 : 1);
+ indirect_reg = 1;
}
if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
@@ -825,16 +828,16 @@ void tcg_gen_callN(TCGContext *s, void *func, TCGArg ret,
real_args++;
}
#endif
- /* If stack grows up, then we will be placing successive
- arguments at lower addresses, which means we need to
- reverse the order compared to how we would normally
- treat either big or little-endian. For those arguments
- that will wind up in registers, this still works for
- HPPA (the only current STACK_GROWSUP target) since the
- argument registers are *also* allocated in decreasing
- order. If another such target is added, this logic may
- have to get more complicated to differentiate between
- stack arguments and register arguments. */
+ /* If stack grows up, then we will be placing successive
+ arguments at lower addresses, which means we need to
+ reverse the order compared to how we would normally
+ treat either big or little-endian. For those arguments
+ that will wind up in registers, this still works for
+ HPPA (the only current STACK_GROWSUP target) since the
+ argument registers are *also* allocated in decreasing
+ order. If another such target is added, this logic may
+ have to get more complicated to differentiate between
+ stack arguments and register arguments. */
#if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP)
s->gen_opparam_buf[pi++] = args[i] + 1;
s->gen_opparam_buf[pi++] = args[i];
@@ -869,7 +872,7 @@ void tcg_gen_callN(TCGContext *s, void *func, TCGArg ret,
/* Make sure the calli field didn't overflow. */
tcg_debug_assert(s->gen_op_buf[i].calli == real_args);
- s->gen_last_op_idx = i;
+ s->gen_op_buf[0].prev = i;
s->gen_next_op_idx = i + 1;
s->gen_next_parm_idx = pi;
@@ -1021,11 +1024,12 @@ void tcg_dump_ops(TCGContext *s)
TCGOp *op;
int oi;
- for (oi = s->gen_first_op_idx; oi >= 0; oi = op->next) {
+ for (oi = s->gen_op_buf[0].next; oi != 0; oi = op->next) {
int i, k, nb_oargs, nb_iargs, nb_cargs;
const TCGOpDef *def;
const TCGArg *args;
TCGOpcode c;
+ int col = 0;
op = &s->gen_op_buf[oi];
c = op->opc;
@@ -1033,7 +1037,7 @@ void tcg_dump_ops(TCGContext *s)
args = &s->gen_opparam_buf[op->args];
if (c == INDEX_op_insn_start) {
- qemu_log("%s ----", oi != s->gen_first_op_idx ? "\n" : "");
+ col += qemu_log("%s ----", oi != s->gen_op_buf[0].next ? "\n" : "");
for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
target_ulong a;
@@ -1042,7 +1046,7 @@ void tcg_dump_ops(TCGContext *s)
#else
a = args[i];
#endif
- qemu_log(" " TARGET_FMT_lx, a);
+ col += qemu_log(" " TARGET_FMT_lx, a);
}
} else if (c == INDEX_op_call) {
/* variable number of arguments */
@@ -1051,12 +1055,12 @@ void tcg_dump_ops(TCGContext *s)
nb_cargs = def->nb_cargs;
/* function name, flags, out args */
- qemu_log(" %s %s,$0x%" TCG_PRIlx ",$%d", def->name,
- tcg_find_helper(s, args[nb_oargs + nb_iargs]),
- args[nb_oargs + nb_iargs + 1], nb_oargs);
+ col += qemu_log(" %s %s,$0x%" TCG_PRIlx ",$%d", def->name,
+ tcg_find_helper(s, args[nb_oargs + nb_iargs]),
+ args[nb_oargs + nb_iargs + 1], nb_oargs);
for (i = 0; i < nb_oargs; i++) {
- qemu_log(",%s", tcg_get_arg_str_idx(s, buf, sizeof(buf),
- args[i]));
+ col += qemu_log(",%s", tcg_get_arg_str_idx(s, buf, sizeof(buf),
+ args[i]));
}
for (i = 0; i < nb_iargs; i++) {
TCGArg arg = args[nb_oargs + i];
@@ -1064,10 +1068,10 @@ void tcg_dump_ops(TCGContext *s)
if (arg != TCG_CALL_DUMMY_ARG) {
t = tcg_get_arg_str_idx(s, buf, sizeof(buf), arg);
}
- qemu_log(",%s", t);
+ col += qemu_log(",%s", t);
}
} else {
- qemu_log(" %s ", def->name);
+ col += qemu_log(" %s ", def->name);
nb_oargs = def->nb_oargs;
nb_iargs = def->nb_iargs;
@@ -1076,17 +1080,17 @@ void tcg_dump_ops(TCGContext *s)
k = 0;
for (i = 0; i < nb_oargs; i++) {
if (k != 0) {
- qemu_log(",");
+ col += qemu_log(",");
}
- qemu_log("%s", tcg_get_arg_str_idx(s, buf, sizeof(buf),
- args[k++]));
+ col += qemu_log("%s", tcg_get_arg_str_idx(s, buf, sizeof(buf),
+ args[k++]));
}
for (i = 0; i < nb_iargs; i++) {
if (k != 0) {
- qemu_log(",");
+ col += qemu_log(",");
}
- qemu_log("%s", tcg_get_arg_str_idx(s, buf, sizeof(buf),
- args[k++]));
+ col += qemu_log("%s", tcg_get_arg_str_idx(s, buf, sizeof(buf),
+ args[k++]));
}
switch (c) {
case INDEX_op_brcond_i32:
@@ -1098,9 +1102,9 @@ void tcg_dump_ops(TCGContext *s)
case INDEX_op_setcond_i64:
case INDEX_op_movcond_i64:
if (args[k] < ARRAY_SIZE(cond_name) && cond_name[args[k]]) {
- qemu_log(",%s", cond_name[args[k++]]);
+ col += qemu_log(",%s", cond_name[args[k++]]);
} else {
- qemu_log(",$0x%" TCG_PRIlx, args[k++]);
+ col += qemu_log(",$0x%" TCG_PRIlx, args[k++]);
}
i = 1;
break;
@@ -1114,12 +1118,12 @@ void tcg_dump_ops(TCGContext *s)
unsigned ix = get_mmuidx(oi);
if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) {
- qemu_log(",$0x%x,%u", op, ix);
+ col += qemu_log(",$0x%x,%u", op, ix);
} else {
const char *s_al, *s_op;
s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
- qemu_log(",%s%s,%u", s_al, s_op, ix);
+ col += qemu_log(",%s%s,%u", s_al, s_op, ix);
}
i = 1;
}
@@ -1134,14 +1138,39 @@ void tcg_dump_ops(TCGContext *s)
case INDEX_op_brcond_i32:
case INDEX_op_brcond_i64:
case INDEX_op_brcond2_i32:
- qemu_log("%s$L%d", k ? "," : "", arg_label(args[k])->id);
+ col += qemu_log("%s$L%d", k ? "," : "", arg_label(args[k])->id);
i++, k++;
break;
default:
break;
}
for (; i < nb_cargs; i++, k++) {
- qemu_log("%s$0x%" TCG_PRIlx, k ? "," : "", args[k]);
+ col += qemu_log("%s$0x%" TCG_PRIlx, k ? "," : "", args[k]);
+ }
+ }
+ if (op->life) {
+ unsigned life = op->life;
+
+ for (; col < 48; ++col) {
+ putc(' ', qemu_logfile);
+ }
+
+ if (life & (SYNC_ARG * 3)) {
+ qemu_log(" sync:");
+ for (i = 0; i < 2; ++i) {
+ if (life & (SYNC_ARG << i)) {
+ qemu_log(" %d", i);
+ }
+ }
+ }
+ life /= DEAD_ARG;
+ if (life) {
+ qemu_log(" dead:");
+ for (i = 0; life; ++i, life >>= 1) {
+ if (life & 1) {
+ qemu_log(" %d", i);
+ }
+ }
}
}
qemu_log("\n");
@@ -1298,71 +1327,116 @@ void tcg_op_remove(TCGContext *s, TCGOp *op)
int next = op->next;
int prev = op->prev;
- if (next >= 0) {
- s->gen_op_buf[next].prev = prev;
- } else {
- s->gen_last_op_idx = prev;
- }
- if (prev >= 0) {
- s->gen_op_buf[prev].next = next;
- } else {
- s->gen_first_op_idx = next;
- }
+ /* We should never attempt to remove the list terminator. */
+ tcg_debug_assert(op != &s->gen_op_buf[0]);
+
+ s->gen_op_buf[next].prev = prev;
+ s->gen_op_buf[prev].next = next;
- memset(op, -1, sizeof(*op));
+ memset(op, 0, sizeof(*op));
#ifdef CONFIG_PROFILER
s->del_op_count++;
#endif
}
-#ifdef USE_LIVENESS_ANALYSIS
+TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op,
+ TCGOpcode opc, int nargs)
+{
+ int oi = s->gen_next_op_idx;
+ int pi = s->gen_next_parm_idx;
+ int prev = old_op->prev;
+ int next = old_op - s->gen_op_buf;
+ TCGOp *new_op;
+
+ tcg_debug_assert(oi < OPC_BUF_SIZE);
+ tcg_debug_assert(pi + nargs <= OPPARAM_BUF_SIZE);
+ s->gen_next_op_idx = oi + 1;
+ s->gen_next_parm_idx = pi + nargs;
+
+ new_op = &s->gen_op_buf[oi];
+ *new_op = (TCGOp){
+ .opc = opc,
+ .args = pi,
+ .prev = prev,
+ .next = next
+ };
+ s->gen_op_buf[prev].next = oi;
+ old_op->prev = oi;
+
+ return new_op;
+}
+
+TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op,
+ TCGOpcode opc, int nargs)
+{
+ int oi = s->gen_next_op_idx;
+ int pi = s->gen_next_parm_idx;
+ int prev = old_op - s->gen_op_buf;
+ int next = old_op->next;
+ TCGOp *new_op;
+
+ tcg_debug_assert(oi < OPC_BUF_SIZE);
+ tcg_debug_assert(pi + nargs <= OPPARAM_BUF_SIZE);
+ s->gen_next_op_idx = oi + 1;
+ s->gen_next_parm_idx = pi + nargs;
+
+ new_op = &s->gen_op_buf[oi];
+ *new_op = (TCGOp){
+ .opc = opc,
+ .args = pi,
+ .prev = prev,
+ .next = next
+ };
+ s->gen_op_buf[next].prev = oi;
+ old_op->next = oi;
+
+ return new_op;
+}
+
+#define TS_DEAD 1
+#define TS_MEM 2
+
+#define IS_DEAD_ARG(n) (arg_life & (DEAD_ARG << (n)))
+#define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
+
/* liveness analysis: end of function: all temps are dead, and globals
should be in memory. */
-static inline void tcg_la_func_end(TCGContext *s, uint8_t *dead_temps,
- uint8_t *mem_temps)
+static inline void tcg_la_func_end(TCGContext *s, uint8_t *temp_state)
{
- memset(dead_temps, 1, s->nb_temps);
- memset(mem_temps, 1, s->nb_globals);
- memset(mem_temps + s->nb_globals, 0, s->nb_temps - s->nb_globals);
+ memset(temp_state, TS_DEAD | TS_MEM, s->nb_globals);
+ memset(temp_state + s->nb_globals, TS_DEAD, s->nb_temps - s->nb_globals);
}
/* liveness analysis: end of basic block: all temps are dead, globals
and local temps should be in memory. */
-static inline void tcg_la_bb_end(TCGContext *s, uint8_t *dead_temps,
- uint8_t *mem_temps)
+static inline void tcg_la_bb_end(TCGContext *s, uint8_t *temp_state)
{
- int i;
+ int i, n;
- memset(dead_temps, 1, s->nb_temps);
- memset(mem_temps, 1, s->nb_globals);
- for(i = s->nb_globals; i < s->nb_temps; i++) {
- mem_temps[i] = s->temps[i].temp_local;
+ tcg_la_func_end(s, temp_state);
+ for (i = s->nb_globals, n = s->nb_temps; i < n; i++) {
+ if (s->temps[i].temp_local) {
+ temp_state[i] |= TS_MEM;
+ }
}
}
-/* Liveness analysis : update the opc_dead_args array to tell if a
+/* Liveness analysis : update the opc_arg_life array to tell if a
given input arguments is dead. Instructions updating dead
temporaries are removed. */
-static void tcg_liveness_analysis(TCGContext *s)
+static void liveness_pass_1(TCGContext *s, uint8_t *temp_state)
{
- uint8_t *dead_temps, *mem_temps;
- int oi, oi_prev, nb_ops;
+ int nb_globals = s->nb_globals;
+ int oi, oi_prev;
- nb_ops = s->gen_next_op_idx;
- s->op_dead_args = tcg_malloc(nb_ops * sizeof(uint16_t));
- s->op_sync_args = tcg_malloc(nb_ops * sizeof(uint8_t));
-
- dead_temps = tcg_malloc(s->nb_temps);
- mem_temps = tcg_malloc(s->nb_temps);
- tcg_la_func_end(s, dead_temps, mem_temps);
+ tcg_la_func_end(s, temp_state);
- for (oi = s->gen_last_op_idx; oi >= 0; oi = oi_prev) {
+ for (oi = s->gen_op_buf[0].prev; oi != 0; oi = oi_prev) {
int i, nb_iargs, nb_oargs;
TCGOpcode opc_new, opc_new2;
bool have_opc_new2;
- uint16_t dead_args;
- uint8_t sync_args;
+ TCGLifeData arg_life = 0;
TCGArg arg;
TCGOp * const op = &s->gen_op_buf[oi];
@@ -1385,7 +1459,7 @@ static void tcg_liveness_analysis(TCGContext *s)
if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
for (i = 0; i < nb_oargs; i++) {
arg = args[i];
- if (!dead_temps[arg] || mem_temps[arg]) {
+ if (temp_state[arg] != TS_DEAD) {
goto do_not_remove_call;
}
}
@@ -1394,46 +1468,44 @@ static void tcg_liveness_analysis(TCGContext *s)
do_not_remove_call:
/* output args are dead */
- dead_args = 0;
- sync_args = 0;
for (i = 0; i < nb_oargs; i++) {
arg = args[i];
- if (dead_temps[arg]) {
- dead_args |= (1 << i);
+ if (temp_state[arg] & TS_DEAD) {
+ arg_life |= DEAD_ARG << i;
}
- if (mem_temps[arg]) {
- sync_args |= (1 << i);
+ if (temp_state[arg] & TS_MEM) {
+ arg_life |= SYNC_ARG << i;
}
- dead_temps[arg] = 1;
- mem_temps[arg] = 0;
+ temp_state[arg] = TS_DEAD;
}
- if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
- /* globals should be synced to memory */
- memset(mem_temps, 1, s->nb_globals);
- }
if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
TCG_CALL_NO_READ_GLOBALS))) {
/* globals should go back to memory */
- memset(dead_temps, 1, s->nb_globals);
+ memset(temp_state, TS_DEAD | TS_MEM, nb_globals);
+ } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
+ /* globals should be synced to memory */
+ for (i = 0; i < nb_globals; i++) {
+ temp_state[i] |= TS_MEM;
+ }
}
/* record arguments that die in this helper */
for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
arg = args[i];
if (arg != TCG_CALL_DUMMY_ARG) {
- if (dead_temps[arg]) {
- dead_args |= (1 << i);
+ if (temp_state[arg] & TS_DEAD) {
+ arg_life |= DEAD_ARG << i;
}
}
}
/* input arguments are live for preceding opcodes */
- for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
+ for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
arg = args[i];
- dead_temps[arg] = 0;
+ if (arg != TCG_CALL_DUMMY_ARG) {
+ temp_state[arg] &= ~TS_DEAD;
+ }
}
- s->op_dead_args[oi] = dead_args;
- s->op_sync_args[oi] = sync_args;
}
}
break;
@@ -1441,8 +1513,7 @@ static void tcg_liveness_analysis(TCGContext *s)
break;
case INDEX_op_discard:
/* mark the temporary as dead */
- dead_temps[args[0]] = 1;
- mem_temps[args[0]] = 0;
+ temp_state[args[0]] = TS_DEAD;
break;
case INDEX_op_add2_i32:
@@ -1463,8 +1534,8 @@ static void tcg_liveness_analysis(TCGContext *s)
the low part. The result can be optimized to a simple
add or sub. This happens often for x86_64 guest when the
cpu mode is set to 32 bit. */
- if (dead_temps[args[1]] && !mem_temps[args[1]]) {
- if (dead_temps[args[0]] && !mem_temps[args[0]]) {
+ if (temp_state[args[1]] == TS_DEAD) {
+ if (temp_state[args[0]] == TS_DEAD) {
goto do_remove;
}
/* Replace the opcode and adjust the args in place,
@@ -1501,8 +1572,8 @@ static void tcg_liveness_analysis(TCGContext *s)
do_mul2:
nb_iargs = 2;
nb_oargs = 2;
- if (dead_temps[args[1]] && !mem_temps[args[1]]) {
- if (dead_temps[args[0]] && !mem_temps[args[0]]) {
+ if (temp_state[args[1]] == TS_DEAD) {
+ if (temp_state[args[0]] == TS_DEAD) {
/* Both parts of the operation are dead. */
goto do_remove;
}
@@ -1510,8 +1581,7 @@ static void tcg_liveness_analysis(TCGContext *s)
op->opc = opc = opc_new;
args[1] = args[2];
args[2] = args[3];
- } else if (have_opc_new2 && dead_temps[args[0]]
- && !mem_temps[args[0]]) {
+ } else if (temp_state[args[0]] == TS_DEAD && have_opc_new2) {
/* The low part of the operation is dead; generate the high. */
op->opc = opc = opc_new2;
args[0] = args[1];
@@ -1534,8 +1604,7 @@ static void tcg_liveness_analysis(TCGContext *s)
implies side effects */
if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
for (i = 0; i < nb_oargs; i++) {
- arg = args[i];
- if (!dead_temps[arg] || mem_temps[arg]) {
+ if (temp_state[args[i]] != TS_DEAD) {
goto do_not_remove;
}
}
@@ -1544,59 +1613,203 @@ static void tcg_liveness_analysis(TCGContext *s)
} else {
do_not_remove:
/* output args are dead */
- dead_args = 0;
- sync_args = 0;
for (i = 0; i < nb_oargs; i++) {
arg = args[i];
- if (dead_temps[arg]) {
- dead_args |= (1 << i);
+ if (temp_state[arg] & TS_DEAD) {
+ arg_life |= DEAD_ARG << i;
}
- if (mem_temps[arg]) {
- sync_args |= (1 << i);
+ if (temp_state[arg] & TS_MEM) {
+ arg_life |= SYNC_ARG << i;
}
- dead_temps[arg] = 1;
- mem_temps[arg] = 0;
+ temp_state[arg] = TS_DEAD;
}
/* if end of basic block, update */
if (def->flags & TCG_OPF_BB_END) {
- tcg_la_bb_end(s, dead_temps, mem_temps);
+ tcg_la_bb_end(s, temp_state);
} else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
/* globals should be synced to memory */
- memset(mem_temps, 1, s->nb_globals);
+ for (i = 0; i < nb_globals; i++) {
+ temp_state[i] |= TS_MEM;
+ }
}
/* record arguments that die in this opcode */
for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
arg = args[i];
- if (dead_temps[arg]) {
- dead_args |= (1 << i);
+ if (temp_state[arg] & TS_DEAD) {
+ arg_life |= DEAD_ARG << i;
}
}
/* input arguments are live for preceding opcodes */
for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
- arg = args[i];
- dead_temps[arg] = 0;
+ temp_state[args[i]] &= ~TS_DEAD;
}
- s->op_dead_args[oi] = dead_args;
- s->op_sync_args[oi] = sync_args;
}
break;
}
+ op->life = arg_life;
}
}
-#else
-/* dummy liveness analysis */
-static void tcg_liveness_analysis(TCGContext *s)
+
+/* Liveness analysis: Convert indirect regs to direct temporaries. */
+static bool liveness_pass_2(TCGContext *s, uint8_t *temp_state)
{
- int nb_ops = s->gen_next_op_idx;
+ int nb_globals = s->nb_globals;
+ int16_t *dir_temps;
+ int i, oi, oi_next;
+ bool changes = false;
+
+ dir_temps = tcg_malloc(nb_globals * sizeof(int16_t));
+ memset(dir_temps, 0, nb_globals * sizeof(int16_t));
+
+ /* Create a temporary for each indirect global. */
+ for (i = 0; i < nb_globals; ++i) {
+ TCGTemp *its = &s->temps[i];
+ if (its->indirect_reg) {
+ TCGTemp *dts = tcg_temp_alloc(s);
+ dts->type = its->type;
+ dts->base_type = its->base_type;
+ dir_temps[i] = temp_idx(s, dts);
+ }
+ }
+
+ memset(temp_state, TS_DEAD, nb_globals);
+
+ for (oi = s->gen_op_buf[0].next; oi != 0; oi = oi_next) {
+ TCGOp *op = &s->gen_op_buf[oi];
+ TCGArg *args = &s->gen_opparam_buf[op->args];
+ TCGOpcode opc = op->opc;
+ const TCGOpDef *def = &tcg_op_defs[opc];
+ TCGLifeData arg_life = op->life;
+ int nb_iargs, nb_oargs, call_flags;
+ TCGArg arg, dir;
+
+ oi_next = op->next;
+
+ if (opc == INDEX_op_call) {
+ nb_oargs = op->callo;
+ nb_iargs = op->calli;
+ call_flags = args[nb_oargs + nb_iargs + 1];
+ } else {
+ nb_iargs = def->nb_iargs;
+ nb_oargs = def->nb_oargs;
+
+ /* Set flags similar to how calls require. */
+ if (def->flags & TCG_OPF_BB_END) {
+ /* Like writing globals: save_globals */
+ call_flags = 0;
+ } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
+ /* Like reading globals: sync_globals */
+ call_flags = TCG_CALL_NO_WRITE_GLOBALS;
+ } else {
+ /* No effect on globals. */
+ call_flags = (TCG_CALL_NO_READ_GLOBALS |
+ TCG_CALL_NO_WRITE_GLOBALS);
+ }
+ }
+
+ /* Make sure that input arguments are available. */
+ for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
+ arg = args[i];
+ /* Note this unsigned test catches TCG_CALL_ARG_DUMMY too. */
+ if (arg < nb_globals) {
+ dir = dir_temps[arg];
+ if (dir != 0 && temp_state[arg] == TS_DEAD) {
+ TCGTemp *its = &s->temps[arg];
+ TCGOpcode lopc = (its->type == TCG_TYPE_I32
+ ? INDEX_op_ld_i32
+ : INDEX_op_ld_i64);
+ TCGOp *lop = tcg_op_insert_before(s, op, lopc, 3);
+ TCGArg *largs = &s->gen_opparam_buf[lop->args];
+
+ largs[0] = dir;
+ largs[1] = temp_idx(s, its->mem_base);
+ largs[2] = its->mem_offset;
+
+ /* Loaded, but synced with memory. */
+ temp_state[arg] = TS_MEM;
+ }
+ }
+ }
+
+ /* Perform input replacement, and mark inputs that became dead.
+ No action is required except keeping temp_state up to date
+ so that we reload when needed. */
+ for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
+ arg = args[i];
+ if (arg < nb_globals) {
+ dir = dir_temps[arg];
+ if (dir != 0) {
+ args[i] = dir;
+ changes = true;
+ if (IS_DEAD_ARG(i)) {
+ temp_state[arg] = TS_DEAD;
+ }
+ }
+ }
+ }
+
+ /* Liveness analysis should ensure that the following are
+ all correct, for call sites and basic block end points. */
+ if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
+ /* Nothing to do */
+ } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
+ for (i = 0; i < nb_globals; ++i) {
+ /* Liveness should see that globals are synced back,
+ that is, either TS_DEAD or TS_MEM. */
+ tcg_debug_assert(dir_temps[i] == 0
+ || temp_state[i] != 0);
+ }
+ } else {
+ for (i = 0; i < nb_globals; ++i) {
+ /* Liveness should see that globals are saved back,
+ that is, TS_DEAD, waiting to be reloaded. */
+ tcg_debug_assert(dir_temps[i] == 0
+ || temp_state[i] == TS_DEAD);
+ }
+ }
+
+ /* Outputs become available. */
+ for (i = 0; i < nb_oargs; i++) {
+ arg = args[i];
+ if (arg >= nb_globals) {
+ continue;
+ }
+ dir = dir_temps[arg];
+ if (dir == 0) {
+ continue;
+ }
+ args[i] = dir;
+ changes = true;
+
+ /* The output is now live and modified. */
+ temp_state[arg] = 0;
+
+ /* Sync outputs upon their last write. */
+ if (NEED_SYNC_ARG(i)) {
+ TCGTemp *its = &s->temps[arg];
+ TCGOpcode sopc = (its->type == TCG_TYPE_I32
+ ? INDEX_op_st_i32
+ : INDEX_op_st_i64);
+ TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3);
+ TCGArg *sargs = &s->gen_opparam_buf[sop->args];
+
+ sargs[0] = dir;
+ sargs[1] = temp_idx(s, its->mem_base);
+ sargs[2] = its->mem_offset;
+
+ temp_state[arg] = TS_MEM;
+ }
+ /* Drop outputs that are dead. */
+ if (IS_DEAD_ARG(i)) {
+ temp_state[arg] = TS_DEAD;
+ }
+ }
+ }
- s->op_dead_args = tcg_malloc(nb_ops * sizeof(uint16_t));
- memset(s->op_dead_args, 0, nb_ops * sizeof(uint16_t));
- s->op_sync_args = tcg_malloc(nb_ops * sizeof(uint8_t));
- memset(s->op_sync_args, 0, nb_ops * sizeof(uint8_t));
+ return changes;
}
-#endif
#ifdef CONFIG_DEBUG_TCG
static void dump_regs(TCGContext *s)
@@ -1728,14 +1941,6 @@ static void temp_sync(TCGContext *s, TCGTemp *ts,
if (!ts->mem_allocated) {
temp_allocate_frame(s, temp_idx(s, ts));
}
- if (ts->indirect_reg) {
- if (ts->val_type == TEMP_VAL_REG) {
- tcg_regset_set_reg(allocated_regs, ts->reg);
- }
- temp_load(s, ts->mem_base,
- tcg_target_available_regs[TCG_TYPE_PTR],
- allocated_regs);
- }
switch (ts->val_type) {
case TEMP_VAL_CONST:
/* If we're going to free the temp immediately, then we won't
@@ -1826,12 +2031,6 @@ static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
break;
case TEMP_VAL_MEM:
reg = tcg_reg_alloc(s, desired_regs, allocated_regs, ts->indirect_base);
- if (ts->indirect_reg) {
- tcg_regset_set_reg(allocated_regs, reg);
- temp_load(s, ts->mem_base,
- tcg_target_available_regs[TCG_TYPE_PTR],
- allocated_regs);
- }
tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
ts->mem_coherent = 1;
break;
@@ -1848,16 +2047,9 @@ static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
temporary registers needs to be allocated to store a constant. */
static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
{
-#ifdef USE_LIVENESS_ANALYSIS
- /* ??? Liveness does not yet incorporate indirect bases. */
- if (!ts->indirect_base) {
- /* The liveness analysis already ensures that globals are back
- in memory. Keep an tcg_debug_assert for safety. */
- tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || ts->fixed_reg);
- return;
- }
-#endif
- temp_sync(s, ts, allocated_regs, 1);
+ /* The liveness analysis already ensures that globals are back
+ in memory. Keep an tcg_debug_assert for safety. */
+ tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || ts->fixed_reg);
}
/* save globals to their canonical location and assume they can be
@@ -1881,16 +2073,9 @@ static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
for (i = 0; i < s->nb_globals; i++) {
TCGTemp *ts = &s->temps[i];
-#ifdef USE_LIVENESS_ANALYSIS
- /* ??? Liveness does not yet incorporate indirect bases. */
- if (!ts->indirect_base) {
- tcg_debug_assert(ts->val_type != TEMP_VAL_REG
- || ts->fixed_reg
- || ts->mem_coherent);
- continue;
- }
-#endif
- temp_sync(s, ts, allocated_regs, 0);
+ tcg_debug_assert(ts->val_type != TEMP_VAL_REG
+ || ts->fixed_reg
+ || ts->mem_coherent);
}
}
@@ -1905,27 +2090,17 @@ static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
if (ts->temp_local) {
temp_save(s, ts, allocated_regs);
} else {
-#ifdef USE_LIVENESS_ANALYSIS
- /* ??? Liveness does not yet incorporate indirect bases. */
- if (!ts->indirect_base) {
- /* The liveness analysis already ensures that temps are dead.
- Keep an tcg_debug_assert for safety. */
- tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
- continue;
- }
-#endif
- temp_dead(s, ts);
+ /* The liveness analysis already ensures that temps are dead.
+ Keep an tcg_debug_assert for safety. */
+ tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
}
}
save_globals(s, allocated_regs);
}
-#define IS_DEAD_ARG(n) ((dead_args >> (n)) & 1)
-#define NEED_SYNC_ARG(n) ((sync_args >> (n)) & 1)
-
static void tcg_reg_alloc_movi(TCGContext *s, const TCGArg *args,
- uint16_t dead_args, uint8_t sync_args)
+ TCGLifeData arg_life)
{
TCGTemp *ots;
tcg_target_ulong val;
@@ -1954,8 +2129,7 @@ static void tcg_reg_alloc_movi(TCGContext *s, const TCGArg *args,
}
static void tcg_reg_alloc_mov(TCGContext *s, const TCGOpDef *def,
- const TCGArg *args, uint16_t dead_args,
- uint8_t sync_args)
+ const TCGArg *args, TCGLifeData arg_life)
{
TCGRegSet allocated_regs;
TCGTemp *ts, *ots;
@@ -1987,12 +2161,6 @@ static void tcg_reg_alloc_mov(TCGContext *s, const TCGOpDef *def,
if (!ots->mem_allocated) {
temp_allocate_frame(s, args[0]);
}
- if (ots->indirect_reg) {
- tcg_regset_set_reg(allocated_regs, ts->reg);
- temp_load(s, ots->mem_base,
- tcg_target_available_regs[TCG_TYPE_PTR],
- allocated_regs);
- }
tcg_out_st(s, otype, ts->reg, ots->mem_base->reg, ots->mem_offset);
if (IS_DEAD_ARG(1)) {
temp_dead(s, ts);
@@ -2040,8 +2208,7 @@ static void tcg_reg_alloc_mov(TCGContext *s, const TCGOpDef *def,
static void tcg_reg_alloc_op(TCGContext *s,
const TCGOpDef *def, TCGOpcode opc,
- const TCGArg *args, uint16_t dead_args,
- uint8_t sync_args)
+ const TCGArg *args, TCGLifeData arg_life)
{
TCGRegSet allocated_regs;
int i, k, nb_iargs, nb_oargs;
@@ -2206,8 +2373,7 @@ static void tcg_reg_alloc_op(TCGContext *s,
#endif
static void tcg_reg_alloc_call(TCGContext *s, int nb_oargs, int nb_iargs,
- const TCGArg * const args, uint16_t dead_args,
- uint8_t sync_args)
+ const TCGArg * const args, TCGLifeData arg_life)
{
int flags, nb_regs, i;
TCGReg reg;
@@ -2363,7 +2529,7 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
{
int n;
- n = s->gen_last_op_idx + 1;
+ n = s->gen_op_buf[0].prev + 1;
s->op_count += n;
if (n > s->op_count_max) {
s->op_count_max = n;
@@ -2399,7 +2565,27 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
s->la_time -= profile_getclock();
#endif
- tcg_liveness_analysis(s);
+ {
+ uint8_t *temp_state = tcg_malloc(s->nb_temps + s->nb_indirects);
+
+ liveness_pass_1(s, temp_state);
+
+ if (s->nb_indirects > 0) {
+#ifdef DEBUG_DISAS
+ if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
+ && qemu_log_in_addr_range(tb->pc))) {
+ qemu_log("OP before indirect lowering:\n");
+ tcg_dump_ops(s);
+ qemu_log("\n");
+ }
+#endif
+ /* Replace indirect temps with direct temps. */
+ if (liveness_pass_2(s, temp_state)) {
+ /* If changes were made, re-run liveness. */
+ liveness_pass_1(s, temp_state);
+ }
+ }
+ }
#ifdef CONFIG_PROFILER
s->la_time += profile_getclock();
@@ -2422,13 +2608,12 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
tcg_out_tb_init(s);
num_insns = -1;
- for (oi = s->gen_first_op_idx; oi >= 0; oi = oi_next) {
+ for (oi = s->gen_op_buf[0].next; oi != 0; oi = oi_next) {
TCGOp * const op = &s->gen_op_buf[oi];
TCGArg * const args = &s->gen_opparam_buf[op->args];
TCGOpcode opc = op->opc;
const TCGOpDef *def = &tcg_op_defs[opc];
- uint16_t dead_args = s->op_dead_args[oi];
- uint8_t sync_args = s->op_sync_args[oi];
+ TCGLifeData arg_life = op->life;
oi_next = op->next;
#ifdef CONFIG_PROFILER
@@ -2438,11 +2623,11 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
switch (opc) {
case INDEX_op_mov_i32:
case INDEX_op_mov_i64:
- tcg_reg_alloc_mov(s, def, args, dead_args, sync_args);
+ tcg_reg_alloc_mov(s, def, args, arg_life);
break;
case INDEX_op_movi_i32:
case INDEX_op_movi_i64:
- tcg_reg_alloc_movi(s, args, dead_args, sync_args);
+ tcg_reg_alloc_movi(s, args, arg_life);
break;
case INDEX_op_insn_start:
if (num_insns >= 0) {
@@ -2467,8 +2652,7 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
tcg_out_label(s, arg_label(args[0]), s->code_ptr);
break;
case INDEX_op_call:
- tcg_reg_alloc_call(s, op->callo, op->calli, args,
- dead_args, sync_args);
+ tcg_reg_alloc_call(s, op->callo, op->calli, args, arg_life);
break;
default:
/* Sanity check that we've not introduced any unhandled opcodes. */
@@ -2478,7 +2662,7 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
/* Note: in order to speed up the code, it would be much
faster to have specialized register allocator functions for
some common argument patterns */
- tcg_reg_alloc_op(s, def, opc, args, dead_args, sync_args);
+ tcg_reg_alloc_op(s, def, opc, args, arg_life);
break;
}
#ifdef CONFIG_DEBUG_TCG
diff --git a/tcg/tcg.h b/tcg/tcg.h
index 6046dcdc89..1bcabcad9d 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -575,24 +575,41 @@ typedef struct TCGTempSet {
unsigned long l[BITS_TO_LONGS(TCG_MAX_TEMPS)];
} TCGTempSet;
+/* While we limit helpers to 6 arguments, for 32-bit hosts, with padding,
+ this imples a max of 6*2 (64-bit in) + 2 (64-bit out) = 14 operands.
+ There are never more than 2 outputs, which means that we can store all
+ dead + sync data within 16 bits. */
+#define DEAD_ARG 4
+#define SYNC_ARG 1
+typedef uint16_t TCGLifeData;
+
+/* The layout here is designed to avoid crossing of a 32-bit boundary.
+ If we do so, gcc adds padding, expanding the size to 12. */
typedef struct TCGOp {
- TCGOpcode opc : 8;
+ TCGOpcode opc : 8; /* 8 */
+
+ /* Index of the prev/next op, or 0 for the end of the list. */
+ unsigned prev : 10; /* 18 */
+ unsigned next : 10; /* 28 */
/* The number of out and in parameter for a call. */
- unsigned callo : 2;
- unsigned calli : 6;
+ unsigned calli : 4; /* 32 */
+ unsigned callo : 2; /* 34 */
- /* Index of the arguments for this op, or -1 for zero-operand ops. */
- signed args : 16;
+ /* Index of the arguments for this op, or 0 for zero-operand ops. */
+ unsigned args : 14; /* 48 */
- /* Index of the prex/next op, or -1 for the end of the list. */
- signed prev : 16;
- signed next : 16;
+ /* Lifetime data of the operands. */
+ unsigned life : 16; /* 64 */
} TCGOp;
-QEMU_BUILD_BUG_ON(NB_OPS > 0xff);
-QEMU_BUILD_BUG_ON(OPC_BUF_SIZE >= 0x7fff);
-QEMU_BUILD_BUG_ON(OPPARAM_BUF_SIZE >= 0x7fff);
+/* Make sure operands fit in the bitfields above. */
+QEMU_BUILD_BUG_ON(NB_OPS > (1 << 8));
+QEMU_BUILD_BUG_ON(OPC_BUF_SIZE > (1 << 10));
+QEMU_BUILD_BUG_ON(OPPARAM_BUF_SIZE > (1 << 14));
+
+/* Make sure that we don't overflow 64 bits without noticing. */
+QEMU_BUILD_BUG_ON(sizeof(TCGOp) > 8);
struct TCGContext {
uint8_t *pool_cur, *pool_end;
@@ -600,6 +617,7 @@ struct TCGContext {
int nb_labels;
int nb_globals;
int nb_temps;
+ int nb_indirects;
/* goto_tb support */
tcg_insn_unit *code_buf;
@@ -607,13 +625,6 @@ struct TCGContext {
uint16_t *tb_jmp_insn_offset; /* tb->jmp_insn_offset if USE_DIRECT_JUMP */
uintptr_t *tb_jmp_target_addr; /* tb->jmp_target_addr if !USE_DIRECT_JUMP */
- /* liveness analysis */
- uint16_t *op_dead_args; /* for each operation, each bit tells if the
- corresponding argument is dead */
- uint8_t *op_sync_args; /* for each operation, each bit tells if the
- corresponding output argument needs to be
- sync to memory. */
-
TCGRegSet reserved_regs;
intptr_t current_frame_offset;
intptr_t frame_start;
@@ -649,8 +660,6 @@ struct TCGContext {
int goto_tb_issue_mask;
#endif
- int gen_first_op_idx;
- int gen_last_op_idx;
int gen_next_op_idx;
int gen_next_parm_idx;
@@ -890,6 +899,9 @@ void tcg_gen_callN(TCGContext *s, void *func,
TCGArg ret, int nargs, TCGArg *args);
void tcg_op_remove(TCGContext *s, TCGOp *op);
+TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *op, TCGOpcode opc, int narg);
+TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *op, TCGOpcode opc, int narg);
+
void tcg_optimize(TCGContext *s);
/* only used for debugging purposes */
diff --git a/util/log.c b/util/log.c
index b6c75b1102..54b54e868a 100644
--- a/util/log.c
+++ b/util/log.c
@@ -32,15 +32,22 @@ int qemu_loglevel;
static int log_append = 0;
static GArray *debug_regions;
-void qemu_log(const char *fmt, ...)
+/* Return the number of characters emitted. */
+int qemu_log(const char *fmt, ...)
{
- va_list ap;
-
- va_start(ap, fmt);
+ int ret = 0;
if (qemu_logfile) {
- vfprintf(qemu_logfile, fmt, ap);
+ va_list ap;
+ va_start(ap, fmt);
+ ret = vfprintf(qemu_logfile, fmt, ap);
+ va_end(ap);
+
+ /* Don't pass back error results. */
+ if (ret < 0) {
+ ret = 0;
+ }
}
- va_end(ap);
+ return ret;
}
static bool log_uses_own_buffers;
@@ -240,8 +247,9 @@ const QEMULogItem qemu_log_items[] = {
{ CPU_LOG_TB_OP, "op",
"show micro ops for each compiled TB" },
{ CPU_LOG_TB_OP_OPT, "op_opt",
- "show micro ops (x86 only: before eflags optimization) and\n"
- "after liveness analysis" },
+ "show micro ops after optimization" },
+ { CPU_LOG_TB_OP_IND, "op_ind",
+ "show micro ops before indirect lowering" },
{ CPU_LOG_INT, "int",
"show interrupts/exceptions in short format" },
{ CPU_LOG_EXEC, "exec",