summaryrefslogtreecommitdiff
path: root/tools/perf
diff options
context:
space:
mode:
Diffstat (limited to 'tools/perf')
-rw-r--r--tools/perf/Documentation/intel-bts.txt86
-rw-r--r--tools/perf/arch/x86/util/Build1
-rw-r--r--tools/perf/arch/x86/util/auxtrace.c49
-rw-r--r--tools/perf/arch/x86/util/intel-bts.c458
-rw-r--r--tools/perf/arch/x86/util/pmu.c3
-rw-r--r--tools/perf/util/Build1
-rw-r--r--tools/perf/util/auxtrace.c3
-rw-r--r--tools/perf/util/auxtrace.h1
-rw-r--r--tools/perf/util/intel-bts.c933
-rw-r--r--tools/perf/util/intel-bts.h43
-rw-r--r--tools/perf/util/pmu.c4
11 files changed, 1576 insertions, 6 deletions
diff --git a/tools/perf/Documentation/intel-bts.txt b/tools/perf/Documentation/intel-bts.txt
new file mode 100644
index 000000000000..8bdc93bd7fdb
--- /dev/null
+++ b/tools/perf/Documentation/intel-bts.txt
@@ -0,0 +1,86 @@
+Intel Branch Trace Store
+========================
+
+Overview
+========
+
+Intel BTS could be regarded as a predecessor to Intel PT and has some
+similarities because it can also identify every branch a program takes. A
+notable difference is that Intel BTS has no timing information and as a
+consequence the present implementation is limited to per-thread recording.
+
+While decoding Intel BTS does not require walking the object code, the object
+code is still needed to pair up calls and returns correctly, consequently much
+of the Intel PT documentation applies also to Intel BTS. Refer to the Intel PT
+documentation and consider that the PMU 'intel_bts' can usually be used in
+place of 'intel_pt' in the examples provided, with the proviso that per-thread
+recording must also be stipulated i.e. the --per-thread option for
+'perf record'.
+
+
+perf record
+===========
+
+new event
+---------
+
+The Intel BTS kernel driver creates a new PMU for Intel BTS. The perf record
+option is:
+
+ -e intel_bts//
+
+Currently Intel BTS is limited to per-thread tracing so the --per-thread option
+is also needed.
+
+
+snapshot option
+---------------
+
+The snapshot option is the same as Intel PT (refer Intel PT documentation).
+
+
+auxtrace mmap size option
+-----------------------
+
+The mmap size option is the same as Intel PT (refer Intel PT documentation).
+
+
+perf script
+===========
+
+By default, perf script will decode trace data found in the perf.data file.
+This can be further controlled by option --itrace. The --itrace option is
+the same as Intel PT (refer Intel PT documentation) except that neither
+"instructions" events nor "transactions" events (and consequently call
+chains) are supported.
+
+To disable trace decoding entirely, use the option --no-itrace.
+
+
+dump option
+-----------
+
+perf script has an option (-D) to "dump" the events i.e. display the binary
+data.
+
+When -D is used, Intel BTS packets are displayed.
+
+To disable the display of Intel BTS packets, combine the -D option with
+--no-itrace.
+
+
+perf report
+===========
+
+By default, perf report will decode trace data found in the perf.data file.
+This can be further controlled by new option --itrace exactly the same as
+perf script.
+
+
+perf inject
+===========
+
+perf inject also accepts the --itrace option in which case tracing data is
+removed and replaced with the synthesized events. e.g.
+
+ perf inject --itrace -i perf.data -o perf.data.new
diff --git a/tools/perf/arch/x86/util/Build b/tools/perf/arch/x86/util/Build
index a8be9f9d0462..2c55e1b336c5 100644
--- a/tools/perf/arch/x86/util/Build
+++ b/tools/perf/arch/x86/util/Build
@@ -10,3 +10,4 @@ libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
libperf-$(CONFIG_AUXTRACE) += auxtrace.o
libperf-$(CONFIG_AUXTRACE) += intel-pt.o
+libperf-$(CONFIG_AUXTRACE) += intel-bts.o
diff --git a/tools/perf/arch/x86/util/auxtrace.c b/tools/perf/arch/x86/util/auxtrace.c
index e7654b506312..7a7805583e3f 100644
--- a/tools/perf/arch/x86/util/auxtrace.c
+++ b/tools/perf/arch/x86/util/auxtrace.c
@@ -13,11 +13,56 @@
*
*/
+#include <stdbool.h>
+
#include "../../util/header.h"
+#include "../../util/debug.h"
+#include "../../util/pmu.h"
#include "../../util/auxtrace.h"
#include "../../util/intel-pt.h"
+#include "../../util/intel-bts.h"
+#include "../../util/evlist.h"
+
+static
+struct auxtrace_record *auxtrace_record__init_intel(struct perf_evlist *evlist,
+ int *err)
+{
+ struct perf_pmu *intel_pt_pmu;
+ struct perf_pmu *intel_bts_pmu;
+ struct perf_evsel *evsel;
+ bool found_pt = false;
+ bool found_bts = false;
+
+ intel_pt_pmu = perf_pmu__find(INTEL_PT_PMU_NAME);
+ intel_bts_pmu = perf_pmu__find(INTEL_BTS_PMU_NAME);
+
+ if (evlist) {
+ evlist__for_each(evlist, evsel) {
+ if (intel_pt_pmu &&
+ evsel->attr.type == intel_pt_pmu->type)
+ found_pt = true;
+ if (intel_bts_pmu &&
+ evsel->attr.type == intel_bts_pmu->type)
+ found_bts = true;
+ }
+ }
+
+ if (found_pt && found_bts) {
+ pr_err("intel_pt and intel_bts may not be used together\n");
+ *err = -EINVAL;
+ return NULL;
+ }
+
+ if (found_pt)
+ return intel_pt_recording_init(err);
+
+ if (found_bts)
+ return intel_bts_recording_init(err);
-struct auxtrace_record *auxtrace_record__init(struct perf_evlist *evlist __maybe_unused,
+ return NULL;
+}
+
+struct auxtrace_record *auxtrace_record__init(struct perf_evlist *evlist,
int *err)
{
char buffer[64];
@@ -32,7 +77,7 @@ struct auxtrace_record *auxtrace_record__init(struct perf_evlist *evlist __maybe
}
if (!strncmp(buffer, "GenuineIntel,", 13))
- return intel_pt_recording_init(err);
+ return auxtrace_record__init_intel(evlist, err);
return NULL;
}
diff --git a/tools/perf/arch/x86/util/intel-bts.c b/tools/perf/arch/x86/util/intel-bts.c
new file mode 100644
index 000000000000..9b94ce520917
--- /dev/null
+++ b/tools/perf/arch/x86/util/intel-bts.c
@@ -0,0 +1,458 @@
+/*
+ * intel-bts.c: Intel Processor Trace support
+ * Copyright (c) 2013-2015, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/bitops.h>
+#include <linux/log2.h>
+
+#include "../../util/cpumap.h"
+#include "../../util/evsel.h"
+#include "../../util/evlist.h"
+#include "../../util/session.h"
+#include "../../util/util.h"
+#include "../../util/pmu.h"
+#include "../../util/debug.h"
+#include "../../util/tsc.h"
+#include "../../util/auxtrace.h"
+#include "../../util/intel-bts.h"
+
+#define KiB(x) ((x) * 1024)
+#define MiB(x) ((x) * 1024 * 1024)
+#define KiB_MASK(x) (KiB(x) - 1)
+#define MiB_MASK(x) (MiB(x) - 1)
+
+#define INTEL_BTS_DFLT_SAMPLE_SIZE KiB(4)
+
+#define INTEL_BTS_MAX_SAMPLE_SIZE KiB(60)
+
+struct intel_bts_snapshot_ref {
+ void *ref_buf;
+ size_t ref_offset;
+ bool wrapped;
+};
+
+struct intel_bts_recording {
+ struct auxtrace_record itr;
+ struct perf_pmu *intel_bts_pmu;
+ struct perf_evlist *evlist;
+ bool snapshot_mode;
+ size_t snapshot_size;
+ int snapshot_ref_cnt;
+ struct intel_bts_snapshot_ref *snapshot_refs;
+};
+
+struct branch {
+ u64 from;
+ u64 to;
+ u64 misc;
+};
+
+static size_t intel_bts_info_priv_size(struct auxtrace_record *itr __maybe_unused)
+{
+ return INTEL_BTS_AUXTRACE_PRIV_SIZE;
+}
+
+static int intel_bts_info_fill(struct auxtrace_record *itr,
+ struct perf_session *session,
+ struct auxtrace_info_event *auxtrace_info,
+ size_t priv_size)
+{
+ struct intel_bts_recording *btsr =
+ container_of(itr, struct intel_bts_recording, itr);
+ struct perf_pmu *intel_bts_pmu = btsr->intel_bts_pmu;
+ struct perf_event_mmap_page *pc;
+ struct perf_tsc_conversion tc = { .time_mult = 0, };
+ bool cap_user_time_zero = false;
+ int err;
+
+ if (priv_size != INTEL_BTS_AUXTRACE_PRIV_SIZE)
+ return -EINVAL;
+
+ if (!session->evlist->nr_mmaps)
+ return -EINVAL;
+
+ pc = session->evlist->mmap[0].base;
+ if (pc) {
+ err = perf_read_tsc_conversion(pc, &tc);
+ if (err) {
+ if (err != -EOPNOTSUPP)
+ return err;
+ } else {
+ cap_user_time_zero = tc.time_mult != 0;
+ }
+ if (!cap_user_time_zero)
+ ui__warning("Intel BTS: TSC not available\n");
+ }
+
+ auxtrace_info->type = PERF_AUXTRACE_INTEL_BTS;
+ auxtrace_info->priv[INTEL_BTS_PMU_TYPE] = intel_bts_pmu->type;
+ auxtrace_info->priv[INTEL_BTS_TIME_SHIFT] = tc.time_shift;
+ auxtrace_info->priv[INTEL_BTS_TIME_MULT] = tc.time_mult;
+ auxtrace_info->priv[INTEL_BTS_TIME_ZERO] = tc.time_zero;
+ auxtrace_info->priv[INTEL_BTS_CAP_USER_TIME_ZERO] = cap_user_time_zero;
+ auxtrace_info->priv[INTEL_BTS_SNAPSHOT_MODE] = btsr->snapshot_mode;
+
+ return 0;
+}
+
+static int intel_bts_recording_options(struct auxtrace_record *itr,
+ struct perf_evlist *evlist,
+ struct record_opts *opts)
+{
+ struct intel_bts_recording *btsr =
+ container_of(itr, struct intel_bts_recording, itr);
+ struct perf_pmu *intel_bts_pmu = btsr->intel_bts_pmu;
+ struct perf_evsel *evsel, *intel_bts_evsel = NULL;
+ const struct cpu_map *cpus = evlist->cpus;
+ bool privileged = geteuid() == 0 || perf_event_paranoid() < 0;
+
+ btsr->evlist = evlist;
+ btsr->snapshot_mode = opts->auxtrace_snapshot_mode;
+
+ evlist__for_each(evlist, evsel) {
+ if (evsel->attr.type == intel_bts_pmu->type) {
+ if (intel_bts_evsel) {
+ pr_err("There may be only one " INTEL_BTS_PMU_NAME " event\n");
+ return -EINVAL;
+ }
+ evsel->attr.freq = 0;
+ evsel->attr.sample_period = 1;
+ intel_bts_evsel = evsel;
+ opts->full_auxtrace = true;
+ }
+ }
+
+ if (opts->auxtrace_snapshot_mode && !opts->full_auxtrace) {
+ pr_err("Snapshot mode (-S option) requires " INTEL_BTS_PMU_NAME " PMU event (-e " INTEL_BTS_PMU_NAME ")\n");
+ return -EINVAL;
+ }
+
+ if (!opts->full_auxtrace)
+ return 0;
+
+ if (opts->full_auxtrace && !cpu_map__empty(cpus)) {
+ pr_err(INTEL_BTS_PMU_NAME " does not support per-cpu recording\n");
+ return -EINVAL;
+ }
+
+ /* Set default sizes for snapshot mode */
+ if (opts->auxtrace_snapshot_mode) {
+ if (!opts->auxtrace_snapshot_size && !opts->auxtrace_mmap_pages) {
+ if (privileged) {
+ opts->auxtrace_mmap_pages = MiB(4) / page_size;
+ } else {
+ opts->auxtrace_mmap_pages = KiB(128) / page_size;
+ if (opts->mmap_pages == UINT_MAX)
+ opts->mmap_pages = KiB(256) / page_size;
+ }
+ } else if (!opts->auxtrace_mmap_pages && !privileged &&
+ opts->mmap_pages == UINT_MAX) {
+ opts->mmap_pages = KiB(256) / page_size;
+ }
+ if (!opts->auxtrace_snapshot_size)
+ opts->auxtrace_snapshot_size =
+ opts->auxtrace_mmap_pages * (size_t)page_size;
+ if (!opts->auxtrace_mmap_pages) {
+ size_t sz = opts->auxtrace_snapshot_size;
+
+ sz = round_up(sz, page_size) / page_size;
+ opts->auxtrace_mmap_pages = roundup_pow_of_two(sz);
+ }
+ if (opts->auxtrace_snapshot_size >
+ opts->auxtrace_mmap_pages * (size_t)page_size) {
+ pr_err("Snapshot size %zu must not be greater than AUX area tracing mmap size %zu\n",
+ opts->auxtrace_snapshot_size,
+ opts->auxtrace_mmap_pages * (size_t)page_size);
+ return -EINVAL;
+ }
+ if (!opts->auxtrace_snapshot_size || !opts->auxtrace_mmap_pages) {
+ pr_err("Failed to calculate default snapshot size and/or AUX area tracing mmap pages\n");
+ return -EINVAL;
+ }
+ pr_debug2("Intel BTS snapshot size: %zu\n",
+ opts->auxtrace_snapshot_size);
+ }
+
+ /* Set default sizes for full trace mode */
+ if (opts->full_auxtrace && !opts->auxtrace_mmap_pages) {
+ if (privileged) {
+ opts->auxtrace_mmap_pages = MiB(4) / page_size;
+ } else {
+ opts->auxtrace_mmap_pages = KiB(128) / page_size;
+ if (opts->mmap_pages == UINT_MAX)
+ opts->mmap_pages = KiB(256) / page_size;
+ }
+ }
+
+ /* Validate auxtrace_mmap_pages */
+ if (opts->auxtrace_mmap_pages) {
+ size_t sz = opts->auxtrace_mmap_pages * (size_t)page_size;
+ size_t min_sz;
+
+ if (opts->auxtrace_snapshot_mode)
+ min_sz = KiB(4);
+ else
+ min_sz = KiB(8);
+
+ if (sz < min_sz || !is_power_of_2(sz)) {
+ pr_err("Invalid mmap size for Intel BTS: must be at least %zuKiB and a power of 2\n",
+ min_sz / 1024);
+ return -EINVAL;
+ }
+ }
+
+ if (intel_bts_evsel) {
+ /*
+ * To obtain the auxtrace buffer file descriptor, the auxtrace event
+ * must come first.
+ */
+ perf_evlist__to_front(evlist, intel_bts_evsel);
+ /*
+ * In the case of per-cpu mmaps, we need the CPU on the
+ * AUX event.
+ */
+ if (!cpu_map__empty(cpus))
+ perf_evsel__set_sample_bit(intel_bts_evsel, CPU);
+ }
+
+ /* Add dummy event to keep tracking */
+ if (opts->full_auxtrace) {
+ struct perf_evsel *tracking_evsel;
+ int err;
+
+ err = parse_events(evlist, "dummy:u", NULL);
+ if (err)
+ return err;
+
+ tracking_evsel = perf_evlist__last(evlist);
+
+ perf_evlist__set_tracking_event(evlist, tracking_evsel);
+
+ tracking_evsel->attr.freq = 0;
+ tracking_evsel->attr.sample_period = 1;
+ }
+
+ return 0;
+}
+
+static int intel_bts_parse_snapshot_options(struct auxtrace_record *itr,
+ struct record_opts *opts,
+ const char *str)
+{
+ struct intel_bts_recording *btsr =
+ container_of(itr, struct intel_bts_recording, itr);
+ unsigned long long snapshot_size = 0;
+ char *endptr;
+
+ if (str) {
+ snapshot_size = strtoull(str, &endptr, 0);
+ if (*endptr || snapshot_size > SIZE_MAX)
+ return -1;
+ }
+
+ opts->auxtrace_snapshot_mode = true;
+ opts->auxtrace_snapshot_size = snapshot_size;
+
+ btsr->snapshot_size = snapshot_size;
+
+ return 0;
+}
+
+static u64 intel_bts_reference(struct auxtrace_record *itr __maybe_unused)
+{
+ return rdtsc();
+}
+
+static int intel_bts_alloc_snapshot_refs(struct intel_bts_recording *btsr,
+ int idx)
+{
+ const size_t sz = sizeof(struct intel_bts_snapshot_ref);
+ int cnt = btsr->snapshot_ref_cnt, new_cnt = cnt * 2;
+ struct intel_bts_snapshot_ref *refs;
+
+ if (!new_cnt)
+ new_cnt = 16;
+
+ while (new_cnt <= idx)
+ new_cnt *= 2;
+
+ refs = calloc(new_cnt, sz);
+ if (!refs)
+ return -ENOMEM;
+
+ memcpy(refs, btsr->snapshot_refs, cnt * sz);
+
+ btsr->snapshot_refs = refs;
+ btsr->snapshot_ref_cnt = new_cnt;
+
+ return 0;
+}
+
+static void intel_bts_free_snapshot_refs(struct intel_bts_recording *btsr)
+{
+ int i;
+
+ for (i = 0; i < btsr->snapshot_ref_cnt; i++)
+ zfree(&btsr->snapshot_refs[i].ref_buf);
+ zfree(&btsr->snapshot_refs);
+}
+
+static void intel_bts_recording_free(struct auxtrace_record *itr)
+{
+ struct intel_bts_recording *btsr =
+ container_of(itr, struct intel_bts_recording, itr);
+
+ intel_bts_free_snapshot_refs(btsr);
+ free(btsr);
+}
+
+static int intel_bts_snapshot_start(struct auxtrace_record *itr)
+{
+ struct intel_bts_recording *btsr =
+ container_of(itr, struct intel_bts_recording, itr);
+ struct perf_evsel *evsel;
+
+ evlist__for_each(btsr->evlist, evsel) {
+ if (evsel->attr.type == btsr->intel_bts_pmu->type)
+ return perf_evlist__disable_event(btsr->evlist, evsel);
+ }
+ return -EINVAL;
+}
+
+static int intel_bts_snapshot_finish(struct auxtrace_record *itr)
+{
+ struct intel_bts_recording *btsr =
+ container_of(itr, struct intel_bts_recording, itr);
+ struct perf_evsel *evsel;
+
+ evlist__for_each(btsr->evlist, evsel) {
+ if (evsel->attr.type == btsr->intel_bts_pmu->type)
+ return perf_evlist__enable_event(btsr->evlist, evsel);
+ }
+ return -EINVAL;
+}
+
+static bool intel_bts_first_wrap(u64 *data, size_t buf_size)
+{
+ int i, a, b;
+
+ b = buf_size >> 3;
+ a = b - 512;
+ if (a < 0)
+ a = 0;
+
+ for (i = a; i < b; i++) {
+ if (data[i])
+ return true;
+ }
+
+ return false;
+}
+
+static int intel_bts_find_snapshot(struct auxtrace_record *itr, int idx,
+ struct auxtrace_mmap *mm, unsigned char *data,
+ u64 *head, u64 *old)
+{
+ struct intel_bts_recording *btsr =
+ container_of(itr, struct intel_bts_recording, itr);
+ bool wrapped;
+ int err;
+
+ pr_debug3("%s: mmap index %d old head %zu new head %zu\n",
+ __func__, idx, (size_t)*old, (size_t)*head);
+
+ if (idx >= btsr->snapshot_ref_cnt) {
+ err = intel_bts_alloc_snapshot_refs(btsr, idx);
+ if (err)
+ goto out_err;
+ }
+
+ wrapped = btsr->snapshot_refs[idx].wrapped;
+ if (!wrapped && intel_bts_first_wrap((u64 *)data, mm->len)) {
+ btsr->snapshot_refs[idx].wrapped = true;
+ wrapped = true;
+ }
+
+ /*
+ * In full trace mode 'head' continually increases. However in snapshot
+ * mode 'head' is an offset within the buffer. Here 'old' and 'head'
+ * are adjusted to match the full trace case which expects that 'old' is
+ * always less than 'head'.
+ */
+ if (wrapped) {
+ *old = *head;
+ *head += mm->len;
+ } else {
+ if (mm->mask)
+ *old &= mm->mask;
+ else
+ *old %= mm->len;
+ if (*old > *head)
+ *head += mm->len;
+ }
+
+ pr_debug3("%s: wrap-around %sdetected, adjusted old head %zu adjusted new head %zu\n",
+ __func__, wrapped ? "" : "not ", (size_t)*old, (size_t)*head);
+
+ return 0;
+
+out_err:
+ pr_err("%s: failed, error %d\n", __func__, err);
+ return err;
+}
+
+static int intel_bts_read_finish(struct auxtrace_record *itr, int idx)
+{
+ struct intel_bts_recording *btsr =
+ container_of(itr, struct intel_bts_recording, itr);
+ struct perf_evsel *evsel;
+
+ evlist__for_each(btsr->evlist, evsel) {
+ if (evsel->attr.type == btsr->intel_bts_pmu->type)
+ return perf_evlist__enable_event_idx(btsr->evlist,
+ evsel, idx);
+ }
+ return -EINVAL;
+}
+
+struct auxtrace_record *intel_bts_recording_init(int *err)
+{
+ struct perf_pmu *intel_bts_pmu = perf_pmu__find(INTEL_BTS_PMU_NAME);
+ struct intel_bts_recording *btsr;
+
+ if (!intel_bts_pmu)
+ return NULL;
+
+ btsr = zalloc(sizeof(struct intel_bts_recording));
+ if (!btsr) {
+ *err = -ENOMEM;
+ return NULL;
+ }
+
+ btsr->intel_bts_pmu = intel_bts_pmu;
+ btsr->itr.recording_options = intel_bts_recording_options;
+ btsr->itr.info_priv_size = intel_bts_info_priv_size;
+ btsr->itr.info_fill = intel_bts_info_fill;
+ btsr->itr.free = intel_bts_recording_free;
+ btsr->itr.snapshot_start = intel_bts_snapshot_start;
+ btsr->itr.snapshot_finish = intel_bts_snapshot_finish;
+ btsr->itr.find_snapshot = intel_bts_find_snapshot;
+ btsr->itr.parse_snapshot_options = intel_bts_parse_snapshot_options;
+ btsr->itr.reference = intel_bts_reference;
+ btsr->itr.read_finish = intel_bts_read_finish;
+ btsr->itr.alignment = sizeof(struct branch);
+ return &btsr->itr;
+}
diff --git a/tools/perf/arch/x86/util/pmu.c b/tools/perf/arch/x86/util/pmu.c
index fd11cc3ce780..79fe07158d00 100644
--- a/tools/perf/arch/x86/util/pmu.c
+++ b/tools/perf/arch/x86/util/pmu.c
@@ -3,6 +3,7 @@
#include <linux/perf_event.h>
#include "../../util/intel-pt.h"
+#include "../../util/intel-bts.h"
#include "../../util/pmu.h"
struct perf_event_attr *perf_pmu__get_default_config(struct perf_pmu *pmu __maybe_unused)
@@ -10,6 +11,8 @@ struct perf_event_attr *perf_pmu__get_default_config(struct perf_pmu *pmu __mayb
#ifdef HAVE_AUXTRACE_SUPPORT
if (!strcmp(pmu->name, INTEL_PT_PMU_NAME))
return intel_pt_pmu_default_config(pmu);
+ if (!strcmp(pmu->name, INTEL_BTS_PMU_NAME))
+ pmu->selectable = true;
#endif
return NULL;
}
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index c20473d1369e..e912856cc4e5 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -80,6 +80,7 @@ libperf-y += thread-stack.o
libperf-$(CONFIG_AUXTRACE) += auxtrace.o
libperf-$(CONFIG_AUXTRACE) += intel-pt-decoder/
libperf-$(CONFIG_AUXTRACE) += intel-pt.o
+libperf-$(CONFIG_AUXTRACE) += intel-bts.o
libperf-y += parse-branch-options.o
libperf-$(CONFIG_LIBELF) += symbol-elf.o
diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
index 0f0b7e11e2d9..a980e7c50ee0 100644
--- a/tools/perf/util/auxtrace.c
+++ b/tools/perf/util/auxtrace.c
@@ -48,6 +48,7 @@
#include "parse-options.h"
#include "intel-pt.h"
+#include "intel-bts.h"
int auxtrace_mmap__mmap(struct auxtrace_mmap *mm,
struct auxtrace_mmap_params *mp,
@@ -888,6 +889,8 @@ int perf_event__process_auxtrace_info(struct perf_tool *tool __maybe_unused,
switch (type) {
case PERF_AUXTRACE_INTEL_PT:
return intel_pt_process_auxtrace_info(event, session);
+ case PERF_AUXTRACE_INTEL_BTS:
+ return intel_bts_process_auxtrace_info(event, session);
case PERF_AUXTRACE_UNKNOWN:
default:
return -EINVAL;
diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h
index 7d12f33a3a06..bf72b77a588a 100644
--- a/tools/perf/util/auxtrace.h
+++ b/tools/perf/util/auxtrace.h
@@ -40,6 +40,7 @@ struct events_stats;
enum auxtrace_type {
PERF_AUXTRACE_UNKNOWN,
PERF_AUXTRACE_INTEL_PT,
+ PERF_AUXTRACE_INTEL_BTS,
};
enum itrace_period_type {
diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c
new file mode 100644
index 000000000000..ea768625ab5b
--- /dev/null
+++ b/tools/perf/util/intel-bts.c
@@ -0,0 +1,933 @@
+/*
+ * intel-bts.c: Intel Processor Trace support
+ * Copyright (c) 2013-2015, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#include <endian.h>
+#include <byteswap.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/bitops.h>
+#include <linux/log2.h>
+
+#include "cpumap.h"
+#include "color.h"
+#include "evsel.h"
+#include "evlist.h"
+#include "machine.h"
+#include "session.h"
+#include "util.h"
+#include "thread.h"
+#include "thread-stack.h"
+#include "debug.h"
+#include "tsc.h"
+#include "auxtrace.h"
+#include "intel-pt-decoder/intel-pt-insn-decoder.h"
+#include "intel-bts.h"
+
+#define MAX_TIMESTAMP (~0ULL)
+
+#define INTEL_BTS_ERR_NOINSN 5
+#define INTEL_BTS_ERR_LOST 9
+
+#if __BYTE_ORDER == __BIG_ENDIAN
+#define le64_to_cpu bswap_64
+#else
+#define le64_to_cpu
+#endif
+
+struct intel_bts {
+ struct auxtrace auxtrace;
+ struct auxtrace_queues queues;
+ struct auxtrace_heap heap;
+ u32 auxtrace_type;
+ struct perf_session *session;
+ struct machine *machine;
+ bool sampling_mode;
+ bool snapshot_mode;
+ bool data_queued;
+ u32 pmu_type;
+ struct perf_tsc_conversion tc;
+ bool cap_user_time_zero;
+ struct itrace_synth_opts synth_opts;
+ bool sample_branches;
+ u32 branches_filter;
+ u64 branches_sample_type;
+ u64 branches_id;
+ size_t branches_event_size;
+ bool synth_needs_swap;
+};
+
+struct intel_bts_queue {
+ struct intel_bts *bts;
+ unsigned int queue_nr;
+ struct auxtrace_buffer *buffer;
+ bool on_heap;
+ bool done;
+ pid_t pid;
+ pid_t tid;
+ int cpu;
+ u64 time;
+ struct intel_pt_insn intel_pt_insn;
+ u32 sample_flags;
+};
+
+struct branch {
+ u64 from;
+ u64 to;
+ u64 misc;
+};
+
+static void intel_bts_dump(struct intel_bts *bts __maybe_unused,
+ unsigned char *buf, size_t len)
+{
+ struct branch *branch;
+ size_t i, pos = 0, br_sz = sizeof(struct branch), sz;
+ const char *color = PERF_COLOR_BLUE;
+
+ color_fprintf(stdout, color,
+ ". ... Intel BTS data: size %zu bytes\n",
+ len);
+
+ while (len) {
+ if (len >= br_sz)
+ sz = br_sz;
+ else
+ sz = len;
+ printf(".");
+ color_fprintf(stdout, color, " %08x: ", pos);
+ for (i = 0; i < sz; i++)
+ color_fprintf(stdout, color, " %02x", buf[i]);
+ for (; i < br_sz; i++)
+ color_fprintf(stdout, color, " ");
+ if (len >= br_sz) {
+ branch = (struct branch *)buf;
+ color_fprintf(stdout, color, " %"PRIx64" -> %"PRIx64" %s\n",
+ le64_to_cpu(branch->from),
+ le64_to_cpu(branch->to),
+ le64_to_cpu(branch->misc) & 0x10 ?
+ "pred" : "miss");
+ } else {
+ color_fprintf(stdout, color, " Bad record!\n");
+ }
+ pos += sz;
+ buf += sz;
+ len -= sz;
+ }
+}
+
+static void intel_bts_dump_event(struct intel_bts *bts, unsigned char *buf,
+ size_t len)
+{
+ printf(".\n");
+ intel_bts_dump(bts, buf, len);
+}
+
+static int intel_bts_lost(struct intel_bts *bts, struct perf_sample *sample)
+{
+ union perf_event event;
+ int err;
+
+ auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE,
+ INTEL_BTS_ERR_LOST, sample->cpu, sample->pid,
+ sample->tid, 0, "Lost trace data");
+
+ err = perf_session__deliver_synth_event(bts->session, &event, NULL);
+ if (err)
+ pr_err("Intel BTS: failed to deliver error event, error %d\n",
+ err);
+
+ return err;
+}
+
+static struct intel_bts_queue *intel_bts_alloc_queue(struct intel_bts *bts,
+ unsigned int queue_nr)
+{
+ struct intel_bts_queue *btsq;
+
+ btsq = zalloc(sizeof(struct intel_bts_queue));
+ if (!btsq)
+ return NULL;
+
+ btsq->bts = bts;
+ btsq->queue_nr = queue_nr;
+ btsq->pid = -1;
+ btsq->tid = -1;
+ btsq->cpu = -1;
+
+ return btsq;
+}
+
+static int intel_bts_setup_queue(struct intel_bts *bts,
+ struct auxtrace_queue *queue,
+ unsigned int queue_nr)
+{
+ struct intel_bts_queue *btsq = queue->priv;
+
+ if (list_empty(&queue->head))
+ return 0;
+
+ if (!btsq) {
+ btsq = intel_bts_alloc_queue(bts, queue_nr);
+ if (!btsq)
+ return -ENOMEM;
+ queue->priv = btsq;
+
+ if (queue->cpu != -1)
+ btsq->cpu = queue->cpu;
+ btsq->tid = queue->tid;
+ }
+
+ if (bts->sampling_mode)
+ return 0;
+
+ if (!btsq->on_heap && !btsq->buffer) {
+ int ret;
+
+ btsq->buffer = auxtrace_buffer__next(queue, NULL);
+ if (!btsq->buffer)
+ return 0;
+
+ ret = auxtrace_heap__add(&bts->heap, queue_nr,
+ btsq->buffer->reference);
+ if (ret)
+ return ret;
+ btsq->on_heap = true;
+ }
+
+ return 0;
+}
+
+static int intel_bts_setup_queues(struct intel_bts *bts)
+{
+ unsigned int i;
+ int ret;
+
+ for (i = 0; i < bts->queues.nr_queues; i++) {
+ ret = intel_bts_setup_queue(bts, &bts->queues.queue_array[i],
+ i);
+ if (ret)
+ return ret;
+ }
+ return 0;
+}
+
+static inline int intel_bts_update_queues(struct intel_bts *bts)
+{
+ if (bts->queues.new_data) {
+ bts->queues.new_data = false;
+ return intel_bts_setup_queues(bts);
+ }
+ return 0;
+}
+
+static unsigned char *intel_bts_find_overlap(unsigned char *buf_a, size_t len_a,
+ unsigned char *buf_b, size_t len_b)
+{
+ size_t offs, len;
+
+ if (len_a > len_b)
+ offs = len_a - len_b;
+ else
+ offs = 0;
+
+ for (; offs < len_a; offs += sizeof(struct branch)) {
+ len = len_a - offs;
+ if (!memcmp(buf_a + offs, buf_b, len))
+ return buf_b + len;
+ }
+
+ return buf_b;
+}
+
+static int intel_bts_do_fix_overlap(struct auxtrace_queue *queue,
+ struct auxtrace_buffer *b)
+{
+ struct auxtrace_buffer *a;
+ void *start;
+
+ if (b->list.prev == &queue->head)
+ return 0;
+ a = list_entry(b->list.prev, struct auxtrace_buffer, list);
+ start = intel_bts_find_overlap(a->data, a->size, b->data, b->size);
+ if (!start)
+ return -EINVAL;
+ b->use_size = b->data + b->size - start;
+ b->use_data = start;
+ return 0;
+}
+
+static int intel_bts_synth_branch_sample(struct intel_bts_queue *btsq,
+ struct branch *branch)
+{
+ int ret;
+ struct intel_bts *bts = btsq->bts;
+ union perf_event event;
+ struct perf_sample sample = { .ip = 0, };
+
+ event.sample.header.type = PERF_RECORD_SAMPLE;
+ event.sample.header.misc = PERF_RECORD_MISC_USER;
+ event.sample.header.size = sizeof(struct perf_event_header);
+
+ sample.ip = le64_to_cpu(branch->from);
+ sample.pid = btsq->pid;
+ sample.tid = btsq->tid;
+ sample.addr = le64_to_cpu(branch->to);
+ sample.id = btsq->bts->branches_id;
+ sample.stream_id = btsq->bts->branches_id;
+ sample.period = 1;
+ sample.cpu = btsq->cpu;
+ sample.flags = btsq->sample_flags;
+ sample.insn_len = btsq->intel_pt_insn.length;
+
+ if (bts->synth_opts.inject) {
+ event.sample.header.size = bts->branches_event_size;
+ ret = perf_event__synthesize_sample(&event,
+ bts->branches_sample_type,
+ 0, &sample,
+ bts->synth_needs_swap);
+ if (ret)
+ return ret;
+ }
+
+ ret = perf_session__deliver_synth_event(bts->session, &event, &sample);
+ if (ret)
+ pr_err("Intel BTS: failed to deliver branch event, error %d\n",
+ ret);
+
+ return ret;
+}
+
+static int intel_bts_get_next_insn(struct intel_bts_queue *btsq, u64 ip)
+{
+ struct machine *machine = btsq->bts->machine;
+ struct thread *thread;
+ struct addr_location al;
+ unsigned char buf[1024];
+ size_t bufsz;
+ ssize_t len;
+ int x86_64;
+ uint8_t cpumode;
+ int err = -1;
+
+ bufsz = intel_pt_insn_max_size();
+
+ if (machine__kernel_ip(machine, ip))
+ cpumode = PERF_RECORD_MISC_KERNEL;
+ else
+ cpumode = PERF_RECORD_MISC_USER;
+
+ thread = machine__find_thread(machine, -1, btsq->tid);
+ if (!thread)
+ return -1;
+
+ thread__find_addr_map(thread, cpumode, MAP__FUNCTION, ip, &al);
+ if (!al.map || !al.map->dso)
+ goto out_put;
+
+ len = dso__data_read_addr(al.map->dso, al.map, machine, ip, buf, bufsz);
+ if (len <= 0)
+ goto out_put;
+
+ /* Load maps to ensure dso->is_64_bit has been updated */
+ map__load(al.map, machine->symbol_filter);
+
+ x86_64 = al.map->dso->is_64_bit;
+
+ if (intel_pt_get_insn(buf, len, x86_64, &btsq->intel_pt_insn))
+ goto out_put;
+
+ err = 0;
+out_put:
+ thread__put(thread);
+ return err;
+}
+
+static int intel_bts_synth_error(struct intel_bts *bts, int cpu, pid_t pid,
+ pid_t tid, u64 ip)
+{
+ union perf_event event;
+ int err;
+
+ auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE,
+ INTEL_BTS_ERR_NOINSN, cpu, pid, tid, ip,
+ "Failed to get instruction");
+
+ err = perf_session__deliver_synth_event(bts->session, &event, NULL);
+ if (err)
+ pr_err("Intel BTS: failed to deliver error event, error %d\n",
+ err);
+
+ return err;
+}
+
+static int intel_bts_get_branch_type(struct intel_bts_queue *btsq,
+ struct branch *branch)
+{
+ int err;
+
+ if (!branch->from) {
+ if (branch->to)
+ btsq->sample_flags = PERF_IP_FLAG_BRANCH |
+ PERF_IP_FLAG_TRACE_BEGIN;
+ else
+ btsq->sample_flags = 0;
+ btsq->intel_pt_insn.length = 0;
+ } else if (!branch->to) {
+ btsq->sample_flags = PERF_IP_FLAG_BRANCH |
+ PERF_IP_FLAG_TRACE_END;
+ btsq->intel_pt_insn.length = 0;
+ } else {
+ err = intel_bts_get_next_insn(btsq, branch->from);
+ if (err) {
+ btsq->sample_flags = 0;
+ btsq->intel_pt_insn.length = 0;
+ if (!btsq->bts->synth_opts.errors)
+ return 0;
+ err = intel_bts_synth_error(btsq->bts, btsq->cpu,
+ btsq->pid, btsq->tid,
+ branch->from);
+ return err;
+ }
+ btsq->sample_flags = intel_pt_insn_type(btsq->intel_pt_insn.op);
+ /* Check for an async branch into the kernel */
+ if (!machine__kernel_ip(btsq->bts->machine, branch->from) &&
+ machine__kernel_ip(btsq->bts->machine, branch->to) &&
+ btsq->sample_flags != (PERF_IP_FLAG_BRANCH |
+ PERF_IP_FLAG_CALL |
+ PERF_IP_FLAG_SYSCALLRET))
+ btsq->sample_flags = PERF_IP_FLAG_BRANCH |
+ PERF_IP_FLAG_CALL |
+ PERF_IP_FLAG_ASYNC |
+ PERF_IP_FLAG_INTERRUPT;
+ }
+
+ return 0;
+}
+
+static int intel_bts_process_buffer(struct intel_bts_queue *btsq,
+ struct auxtrace_buffer *buffer)
+{
+ struct branch *branch;
+ size_t sz, bsz = sizeof(struct branch);
+ u32 filter = btsq->bts->branches_filter;
+ int err = 0;
+
+ if (buffer->use_data) {
+ sz = buffer->use_size;
+ branch = buffer->use_data;
+ } else {
+ sz = buffer->size;
+ branch = buffer->data;
+ }
+
+ if (!btsq->bts->sample_branches)
+ return 0;
+
+ for (; sz > bsz; branch += 1, sz -= bsz) {
+ if (!branch->from && !branch->to)
+ continue;
+ intel_bts_get_branch_type(btsq, branch);
+ if (filter && !(filter & btsq->sample_flags))
+ continue;
+ err = intel_bts_synth_branch_sample(btsq, branch);
+ if (err)
+ break;
+ }
+ return err;
+}
+
+static int intel_bts_process_queue(struct intel_bts_queue *btsq, u64 *timestamp)
+{
+ struct auxtrace_buffer *buffer = btsq->buffer, *old_buffer = buffer;
+ struct auxtrace_queue *queue;
+ struct thread *thread;
+ int err;
+
+ if (btsq->done)
+ return 1;
+
+ if (btsq->pid == -1) {
+ thread = machine__find_thread(btsq->bts->machine, -1,
+ btsq->tid);
+ if (thread)
+ btsq->pid = thread->pid_;
+ } else {
+ thread = machine__findnew_thread(btsq->bts->machine, btsq->pid,
+ btsq->tid);
+ }
+
+ queue = &btsq->bts->queues.queue_array[btsq->queue_nr];
+
+ if (!buffer)
+ buffer = auxtrace_buffer__next(queue, NULL);
+
+ if (!buffer) {
+ if (!btsq->bts->sampling_mode)
+ btsq->done = 1;
+ err = 1;
+ goto out_put;
+ }
+
+ /* Currently there is no support for split buffers */
+ if (buffer->consecutive) {
+ err = -EINVAL;
+ goto out_put;
+ }
+
+ if (!buffer->data) {
+ int fd = perf_data_file__fd(btsq->bts->session->file);
+
+ buffer->data = auxtrace_buffer__get_data(buffer, fd);
+ if (!buffer->data) {
+ err = -ENOMEM;
+ goto out_put;
+ }
+ }
+
+ if (btsq->bts->snapshot_mode && !buffer->consecutive &&
+ intel_bts_do_fix_overlap(queue, buffer)) {
+ err = -ENOMEM;
+ goto out_put;
+ }
+
+ if (!btsq->bts->synth_opts.callchain && thread &&
+ (!old_buffer || btsq->bts->sampling_mode ||
+ (btsq->bts->snapshot_mode && !buffer->consecutive)))
+ thread_stack__set_trace_nr(thread, buffer->buffer_nr + 1);
+
+ err = intel_bts_process_buffer(btsq, buffer);
+
+ auxtrace_buffer__drop_data(buffer);
+
+ btsq->buffer = auxtrace_buffer__next(queue, buffer);
+ if (btsq->buffer) {
+ if (timestamp)
+ *timestamp = btsq->buffer->reference;
+ } else {
+ if (!btsq->bts->sampling_mode)
+ btsq->done = 1;
+ }
+out_put:
+ thread__put(thread);
+ return err;
+}
+
+static int intel_bts_flush_queue(struct intel_bts_queue *btsq)
+{
+ u64 ts = 0;
+ int ret;
+
+ while (1) {
+ ret = intel_bts_process_queue(btsq, &ts);
+ if (ret < 0)
+ return ret;
+ if (ret)
+ break;
+ }
+ return 0;
+}
+
+static int intel_bts_process_tid_exit(struct intel_bts *bts, pid_t tid)
+{
+ struct auxtrace_queues *queues = &bts->queues;
+ unsigned int i;
+
+ for (i = 0; i < queues->nr_queues; i++) {
+ struct auxtrace_queue *queue = &bts->queues.queue_array[i];
+ struct intel_bts_queue *btsq = queue->priv;
+
+ if (btsq && btsq->tid == tid)
+ return intel_bts_flush_queue(btsq);
+ }
+ return 0;
+}
+
+static int intel_bts_process_queues(struct intel_bts *bts, u64 timestamp)
+{
+ while (1) {
+ unsigned int queue_nr;
+ struct auxtrace_queue *queue;
+ struct intel_bts_queue *btsq;
+ u64 ts = 0;
+ int ret;
+
+ if (!bts->heap.heap_cnt)
+ return 0;
+
+ if (bts->heap.heap_array[0].ordinal > timestamp)
+ return 0;
+
+ queue_nr = bts->heap.heap_array[0].queue_nr;
+ queue = &bts->queues.queue_array[queue_nr];
+ btsq = queue->priv;
+
+ auxtrace_heap__pop(&bts->heap);
+
+ ret = intel_bts_process_queue(btsq, &ts);
+ if (ret < 0) {
+ auxtrace_heap__add(&bts->heap, queue_nr, ts);
+ return ret;
+ }
+
+ if (!ret) {
+ ret = auxtrace_heap__add(&bts->heap, queue_nr, ts);
+ if (ret < 0)
+ return ret;
+ } else {
+ btsq->on_heap = false;
+ }
+ }
+
+ return 0;
+}
+
+static int intel_bts_process_event(struct perf_session *session,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct perf_tool *tool)
+{
+ struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts,
+ auxtrace);
+ u64 timestamp;
+ int err;
+
+ if (dump_trace)
+ return 0;
+
+ if (!tool->ordered_events) {
+ pr_err("Intel BTS requires ordered events\n");
+ return -EINVAL;
+ }
+
+ if (sample->time && sample->time != (u64)-1)
+ timestamp = perf_time_to_tsc(sample->time, &bts->tc);
+ else
+ timestamp = 0;
+
+ err = intel_bts_update_queues(bts);
+ if (err)
+ return err;
+
+ err = intel_bts_process_queues(bts, timestamp);
+ if (err)
+ return err;
+ if (event->header.type == PERF_RECORD_EXIT) {
+ err = intel_bts_process_tid_exit(bts, event->comm.tid);
+ if (err)
+ return err;
+ }
+
+ if (event->header.type == PERF_RECORD_AUX &&
+ (event->aux.flags & PERF_AUX_FLAG_TRUNCATED) &&
+ bts->synth_opts.errors)
+ err = intel_bts_lost(bts, sample);
+
+ return err;
+}
+
+static int intel_bts_process_auxtrace_event(struct perf_session *session,
+ union perf_event *event,
+ struct perf_tool *tool __maybe_unused)
+{
+ struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts,
+ auxtrace);
+
+ if (bts->sampling_mode)
+ return 0;
+
+ if (!bts->data_queued) {
+ struct auxtrace_buffer *buffer;
+ off_t data_offset;
+ int fd = perf_data_file__fd(session->file);
+ int err;
+
+ if (perf_data_file__is_pipe(session->file)) {
+ data_offset = 0;
+ } else {
+ data_offset = lseek(fd, 0, SEEK_CUR);
+ if (data_offset == -1)
+ return -errno;
+ }
+
+ err = auxtrace_queues__add_event(&bts->queues, session, event,
+ data_offset, &buffer);
+ if (err)
+ return err;
+
+ /* Dump here now we have copied a piped trace out of the pipe */
+ if (dump_trace) {
+ if (auxtrace_buffer__get_data(buffer, fd)) {
+ intel_bts_dump_event(bts, buffer->data,
+ buffer->size);
+ auxtrace_buffer__put_data(buffer);
+ }
+ }
+ }
+
+ return 0;
+}
+
+static int intel_bts_flush(struct perf_session *session __maybe_unused,
+ struct perf_tool *tool __maybe_unused)
+{
+ struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts,
+ auxtrace);
+ int ret;
+
+ if (dump_trace || bts->sampling_mode)
+ return 0;
+
+ if (!tool->ordered_events)
+ return -EINVAL;
+
+ ret = intel_bts_update_queues(bts);
+ if (ret < 0)
+ return ret;
+
+ return intel_bts_process_queues(bts, MAX_TIMESTAMP);
+}
+
+static void intel_bts_free_queue(void *priv)
+{
+ struct intel_bts_queue *btsq = priv;
+
+ if (!btsq)
+ return;
+ free(btsq);
+}
+
+static void intel_bts_free_events(struct perf_session *session)
+{
+ struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts,
+ auxtrace);
+ struct auxtrace_queues *queues = &bts->queues;
+ unsigned int i;
+
+ for (i = 0; i < queues->nr_queues; i++) {
+ intel_bts_free_queue(queues->queue_array[i].priv);
+ queues->queue_array[i].priv = NULL;
+ }
+ auxtrace_queues__free(queues);
+}
+
+static void intel_bts_free(struct perf_session *session)
+{
+ struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts,
+ auxtrace);
+
+ auxtrace_heap__free(&bts->heap);
+ intel_bts_free_events(session);
+ session->auxtrace = NULL;
+ free(bts);
+}
+
+struct intel_bts_synth {
+ struct perf_tool dummy_tool;
+ struct perf_session *session;
+};
+
+static int intel_bts_event_synth(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample __maybe_unused,
+ struct machine *machine __maybe_unused)
+{
+ struct intel_bts_synth *intel_bts_synth =
+ container_of(tool, struct intel_bts_synth, dummy_tool);
+
+ return perf_session__deliver_synth_event(intel_bts_synth->session,
+ event, NULL);
+}
+
+static int intel_bts_synth_event(struct perf_session *session,
+ struct perf_event_attr *attr, u64 id)
+{
+ struct intel_bts_synth intel_bts_synth;
+
+ memset(&intel_bts_synth, 0, sizeof(struct intel_bts_synth));
+ intel_bts_synth.session = session;
+
+ return perf_event__synthesize_attr(&intel_bts_synth.dummy_tool, attr, 1,
+ &id, intel_bts_event_synth);
+}
+
+static int intel_bts_synth_events(struct intel_bts *bts,
+ struct perf_session *session)
+{
+ struct perf_evlist *evlist = session->evlist;
+ struct perf_evsel *evsel;
+ struct perf_event_attr attr;
+ bool found = false;
+ u64 id;
+ int err;
+
+ evlist__for_each(evlist, evsel) {
+ if (evsel->attr.type == bts->pmu_type && evsel->ids) {
+ found = true;
+ break;
+ }
+ }
+
+ if (!found) {
+ pr_debug("There are no selected events with Intel BTS data\n");
+ return 0;
+ }
+
+ memset(&attr, 0, sizeof(struct perf_event_attr));
+ attr.size = sizeof(struct perf_event_attr);
+ attr.type = PERF_TYPE_HARDWARE;
+ attr.sample_type = evsel->attr.sample_type & PERF_SAMPLE_MASK;
+ attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID |
+ PERF_SAMPLE_PERIOD;
+ attr.sample_type &= ~(u64)PERF_SAMPLE_TIME;
+ attr.sample_type &= ~(u64)PERF_SAMPLE_CPU;
+ attr.exclude_user = evsel->attr.exclude_user;
+ attr.exclude_kernel = evsel->attr.exclude_kernel;
+ attr.exclude_hv = evsel->attr.exclude_hv;
+ attr.exclude_host = evsel->attr.exclude_host;
+ attr.exclude_guest = evsel->attr.exclude_guest;
+ attr.sample_id_all = evsel->attr.sample_id_all;
+ attr.read_format = evsel->attr.read_format;
+
+ id = evsel->id[0] + 1000000000;
+ if (!id)
+ id = 1;
+
+ if (bts->synth_opts.branches) {
+ attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
+ attr.sample_period = 1;
+ attr.sample_type |= PERF_SAMPLE_ADDR;
+ pr_debug("Synthesizing 'branches' event with id %" PRIu64 " sample type %#" PRIx64 "\n",
+ id, (u64)attr.sample_type);
+ err = intel_bts_synth_event(session, &attr, id);
+ if (err) {
+ pr_err("%s: failed to synthesize 'branches' event type\n",
+ __func__);
+ return err;
+ }
+ bts->sample_branches = true;
+ bts->branches_sample_type = attr.sample_type;
+ bts->branches_id = id;
+ /*
+ * We only use sample types from PERF_SAMPLE_MASK so we can use
+ * __perf_evsel__sample_size() here.
+ */
+ bts->branches_event_size = sizeof(struct sample_event) +
+ __perf_evsel__sample_size(attr.sample_type);
+ }
+
+ bts->synth_needs_swap = evsel->needs_swap;
+
+ return 0;
+}
+
+static const char * const intel_bts_info_fmts[] = {
+ [INTEL_BTS_PMU_TYPE] = " PMU Type %"PRId64"\n",
+ [INTEL_BTS_TIME_SHIFT] = " Time Shift %"PRIu64"\n",
+ [INTEL_BTS_TIME_MULT] = " Time Muliplier %"PRIu64"\n",
+ [INTEL_BTS_TIME_ZERO] = " Time Zero %"PRIu64"\n",
+ [INTEL_BTS_CAP_USER_TIME_ZERO] = " Cap Time Zero %"PRId64"\n",
+ [INTEL_BTS_SNAPSHOT_MODE] = " Snapshot mode %"PRId64"\n",
+};
+
+static void intel_bts_print_info(u64 *arr, int start, int finish)
+{
+ int i;
+
+ if (!dump_trace)
+ return;
+
+ for (i = start; i <= finish; i++)
+ fprintf(stdout, intel_bts_info_fmts[i], arr[i]);
+}
+
+u64 intel_bts_auxtrace_info_priv[INTEL_BTS_AUXTRACE_PRIV_SIZE];
+
+int intel_bts_process_auxtrace_info(union perf_event *event,
+ struct perf_session *session)
+{
+ struct auxtrace_info_event *auxtrace_info = &event->auxtrace_info;
+ size_t min_sz = sizeof(u64) * INTEL_BTS_SNAPSHOT_MODE;
+ struct intel_bts *bts;
+ int err;
+
+ if (auxtrace_info->header.size < sizeof(struct auxtrace_info_event) +
+ min_sz)
+ return -EINVAL;
+
+ bts = zalloc(sizeof(struct intel_bts));
+ if (!bts)
+ return -ENOMEM;
+
+ err = auxtrace_queues__init(&bts->queues);
+ if (err)
+ goto err_free;
+
+ bts->session = session;
+ bts->machine = &session->machines.host; /* No kvm support */
+ bts->auxtrace_type = auxtrace_info->type;
+ bts->pmu_type = auxtrace_info->priv[INTEL_BTS_PMU_TYPE];
+ bts->tc.time_shift = auxtrace_info->priv[INTEL_BTS_TIME_SHIFT];
+ bts->tc.time_mult = auxtrace_info->priv[INTEL_BTS_TIME_MULT];
+ bts->tc.time_zero = auxtrace_info->priv[INTEL_BTS_TIME_ZERO];
+ bts->cap_user_time_zero =
+ auxtrace_info->priv[INTEL_BTS_CAP_USER_TIME_ZERO];
+ bts->snapshot_mode = auxtrace_info->priv[INTEL_BTS_SNAPSHOT_MODE];
+
+ bts->sampling_mode = false;
+
+ bts->auxtrace.process_event = intel_bts_process_event;
+ bts->auxtrace.process_auxtrace_event = intel_bts_process_auxtrace_event;
+ bts->auxtrace.flush_events = intel_bts_flush;
+ bts->auxtrace.free_events = intel_bts_free_events;
+ bts->auxtrace.free = intel_bts_free;
+ session->auxtrace = &bts->auxtrace;
+
+ intel_bts_print_info(&auxtrace_info->priv[0], INTEL_BTS_PMU_TYPE,
+ INTEL_BTS_SNAPSHOT_MODE);
+
+ if (dump_trace)
+ return 0;
+
+ if (session->itrace_synth_opts && session->itrace_synth_opts->set)
+ bts->synth_opts = *session->itrace_synth_opts;
+ else
+ itrace_synth_opts__set_default(&bts->synth_opts);
+
+ if (bts->synth_opts.calls)
+ bts->branches_filter |= PERF_IP_FLAG_CALL | PERF_IP_FLAG_ASYNC |
+ PERF_IP_FLAG_TRACE_END;
+ if (bts->synth_opts.returns)
+ bts->branches_filter |= PERF_IP_FLAG_RETURN |
+ PERF_IP_FLAG_TRACE_BEGIN;
+
+ err = intel_bts_synth_events(bts, session);
+ if (err)
+ goto err_free_queues;
+
+ err = auxtrace_queues__process_index(&bts->queues, session);
+ if (err)
+ goto err_free_queues;
+
+ if (bts->queues.populated)
+ bts->data_queued = true;
+
+ return 0;
+
+err_free_queues:
+ auxtrace_queues__free(&bts->queues);
+ session->auxtrace = NULL;
+err_free:
+ free(bts);
+ return err;
+}
diff --git a/tools/perf/util/intel-bts.h b/tools/perf/util/intel-bts.h
new file mode 100644
index 000000000000..ca65e21b3e83
--- /dev/null
+++ b/tools/perf/util/intel-bts.h
@@ -0,0 +1,43 @@
+/*
+ * intel-bts.h: Intel Processor Trace support
+ * Copyright (c) 2013-2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#ifndef INCLUDE__PERF_INTEL_BTS_H__
+#define INCLUDE__PERF_INTEL_BTS_H__
+
+#define INTEL_BTS_PMU_NAME "intel_bts"
+
+enum {
+ INTEL_BTS_PMU_TYPE,
+ INTEL_BTS_TIME_SHIFT,
+ INTEL_BTS_TIME_MULT,
+ INTEL_BTS_TIME_ZERO,
+ INTEL_BTS_CAP_USER_TIME_ZERO,
+ INTEL_BTS_SNAPSHOT_MODE,
+ INTEL_BTS_AUXTRACE_PRIV_MAX,
+};
+
+#define INTEL_BTS_AUXTRACE_PRIV_SIZE (INTEL_BTS_AUXTRACE_PRIV_MAX * sizeof(u64))
+
+struct auxtrace_record;
+struct perf_tool;
+union perf_event;
+struct perf_session;
+
+struct auxtrace_record *intel_bts_recording_init(int *err);
+
+int intel_bts_process_auxtrace_info(union perf_event *event,
+ struct perf_session *session);
+
+#endif
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index 3c71138e7672..89c91a1a67e7 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -462,10 +462,6 @@ static struct perf_pmu *pmu_lookup(const char *name)
LIST_HEAD(aliases);
__u32 type;
- /* No support for intel_bts so disallow it */
- if (!strcmp(name, "intel_bts"))
- return NULL;
-
/*
* The pmu data we store & need consists of the pmu
* type value and format definitions. Load both right