summaryrefslogtreecommitdiff
path: root/tools/perf/util/intel-pt.c
diff options
context:
space:
mode:
Diffstat (limited to 'tools/perf/util/intel-pt.c')
-rw-r--r--tools/perf/util/intel-pt.c354
1 files changed, 331 insertions, 23 deletions
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index 9276506d07f8e..8ed51f4e9e304 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -33,6 +33,7 @@
#include "tsc.h"
#include "intel-pt.h"
#include "config.h"
+#include "time-utils.h"
#include "intel-pt-decoder/intel-pt-log.h"
#include "intel-pt-decoder/intel-pt-decoder.h"
@@ -41,6 +42,11 @@
#define MAX_TIMESTAMP (~0ULL)
+struct range {
+ u64 start;
+ u64 end;
+};
+
struct intel_pt {
struct auxtrace auxtrace;
struct auxtrace_queues queues;
@@ -109,6 +115,9 @@ struct intel_pt {
char *filter;
struct addr_filters filts;
+
+ struct range *time_ranges;
+ unsigned int range_cnt;
};
enum switch_state {
@@ -145,9 +154,18 @@ struct intel_pt_queue {
bool have_sample;
u64 time;
u64 timestamp;
+ u64 sel_timestamp;
+ bool sel_start;
+ unsigned int sel_idx;
u32 flags;
u16 insn_len;
u64 last_insn_cnt;
+ u64 ipc_insn_cnt;
+ u64 ipc_cyc_cnt;
+ u64 last_in_insn_cnt;
+ u64 last_in_cyc_cnt;
+ u64 last_br_insn_cnt;
+ u64 last_br_cyc_cnt;
char insn[INTEL_PT_INSN_BUF_SZ];
};
@@ -224,32 +242,13 @@ static int intel_pt_do_fix_overlap(struct intel_pt *pt, struct auxtrace_buffer *
return 0;
}
-/* This function assumes data is processed sequentially only */
-static int intel_pt_get_trace(struct intel_pt_buffer *b, void *data)
+static int intel_pt_get_buffer(struct intel_pt_queue *ptq,
+ struct auxtrace_buffer *buffer,
+ struct auxtrace_buffer *old_buffer,
+ struct intel_pt_buffer *b)
{
- struct intel_pt_queue *ptq = data;
- struct auxtrace_buffer *buffer = ptq->buffer;
- struct auxtrace_buffer *old_buffer = ptq->old_buffer;
- struct auxtrace_queue *queue;
bool might_overlap;
- if (ptq->stop) {
- b->len = 0;
- return 0;
- }
-
- queue = &ptq->pt->queues.queue_array[ptq->queue_nr];
-
- buffer = auxtrace_buffer__next(queue, buffer);
- if (!buffer) {
- if (old_buffer)
- auxtrace_buffer__drop_data(old_buffer);
- b->len = 0;
- return 0;
- }
-
- ptq->buffer = buffer;
-
if (!buffer->data) {
int fd = perf_data__fd(ptq->pt->session->data);
@@ -279,6 +278,95 @@ static int intel_pt_get_trace(struct intel_pt_buffer *b, void *data)
b->consecutive = true;
}
+ return 0;
+}
+
+/* Do not drop buffers with references - refer intel_pt_get_trace() */
+static void intel_pt_lookahead_drop_buffer(struct intel_pt_queue *ptq,
+ struct auxtrace_buffer *buffer)
+{
+ if (!buffer || buffer == ptq->buffer || buffer == ptq->old_buffer)
+ return;
+
+ auxtrace_buffer__drop_data(buffer);
+}
+
+/* Must be serialized with respect to intel_pt_get_trace() */
+static int intel_pt_lookahead(void *data, intel_pt_lookahead_cb_t cb,
+ void *cb_data)
+{
+ struct intel_pt_queue *ptq = data;
+ struct auxtrace_buffer *buffer = ptq->buffer;
+ struct auxtrace_buffer *old_buffer = ptq->old_buffer;
+ struct auxtrace_queue *queue;
+ int err = 0;
+
+ queue = &ptq->pt->queues.queue_array[ptq->queue_nr];
+
+ while (1) {
+ struct intel_pt_buffer b = { .len = 0 };
+
+ buffer = auxtrace_buffer__next(queue, buffer);
+ if (!buffer)
+ break;
+
+ err = intel_pt_get_buffer(ptq, buffer, old_buffer, &b);
+ if (err)
+ break;
+
+ if (b.len) {
+ intel_pt_lookahead_drop_buffer(ptq, old_buffer);
+ old_buffer = buffer;
+ } else {
+ intel_pt_lookahead_drop_buffer(ptq, buffer);
+ continue;
+ }
+
+ err = cb(&b, cb_data);
+ if (err)
+ break;
+ }
+
+ if (buffer != old_buffer)
+ intel_pt_lookahead_drop_buffer(ptq, buffer);
+ intel_pt_lookahead_drop_buffer(ptq, old_buffer);
+
+ return err;
+}
+
+/*
+ * This function assumes data is processed sequentially only.
+ * Must be serialized with respect to intel_pt_lookahead()
+ */
+static int intel_pt_get_trace(struct intel_pt_buffer *b, void *data)
+{
+ struct intel_pt_queue *ptq = data;
+ struct auxtrace_buffer *buffer = ptq->buffer;
+ struct auxtrace_buffer *old_buffer = ptq->old_buffer;
+ struct auxtrace_queue *queue;
+ int err;
+
+ if (ptq->stop) {
+ b->len = 0;
+ return 0;
+ }
+
+ queue = &ptq->pt->queues.queue_array[ptq->queue_nr];
+
+ buffer = auxtrace_buffer__next(queue, buffer);
+ if (!buffer) {
+ if (old_buffer)
+ auxtrace_buffer__drop_data(old_buffer);
+ b->len = 0;
+ return 0;
+ }
+
+ ptq->buffer = buffer;
+
+ err = intel_pt_get_buffer(ptq, buffer, old_buffer, b);
+ if (err)
+ return err;
+
if (ptq->step_through_buffers)
ptq->stop = true;
@@ -798,6 +886,7 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt,
params.get_trace = intel_pt_get_trace;
params.walk_insn = intel_pt_walk_next_insn;
+ params.lookahead = intel_pt_lookahead;
params.data = ptq;
params.return_compression = intel_pt_return_compression(pt);
params.branch_enable = intel_pt_branch_enable(pt);
@@ -921,6 +1010,23 @@ static void intel_pt_sample_flags(struct intel_pt_queue *ptq)
ptq->flags |= PERF_IP_FLAG_TRACE_END;
}
+static void intel_pt_setup_time_range(struct intel_pt *pt,
+ struct intel_pt_queue *ptq)
+{
+ if (!pt->range_cnt)
+ return;
+
+ ptq->sel_timestamp = pt->time_ranges[0].start;
+ ptq->sel_idx = 0;
+
+ if (ptq->sel_timestamp) {
+ ptq->sel_start = true;
+ } else {
+ ptq->sel_timestamp = pt->time_ranges[0].end;
+ ptq->sel_start = false;
+ }
+}
+
static int intel_pt_setup_queue(struct intel_pt *pt,
struct auxtrace_queue *queue,
unsigned int queue_nr)
@@ -945,6 +1051,8 @@ static int intel_pt_setup_queue(struct intel_pt *pt,
ptq->step_through_buffers = true;
ptq->sync_switch = pt->sync_switch;
+
+ intel_pt_setup_time_range(pt, ptq);
}
if (!ptq->on_heap &&
@@ -959,6 +1067,14 @@ static int intel_pt_setup_queue(struct intel_pt *pt,
intel_pt_log("queue %u getting timestamp\n", queue_nr);
intel_pt_log("queue %u decoding cpu %d pid %d tid %d\n",
queue_nr, ptq->cpu, ptq->pid, ptq->tid);
+
+ if (ptq->sel_start && ptq->sel_timestamp) {
+ ret = intel_pt_fast_forward(ptq->decoder,
+ ptq->sel_timestamp);
+ if (ret)
+ return ret;
+ }
+
while (1) {
state = intel_pt_decode(ptq->decoder);
if (state->err) {
@@ -978,6 +1094,9 @@ static int intel_pt_setup_queue(struct intel_pt *pt,
queue_nr, ptq->timestamp);
ptq->state = state;
ptq->have_sample = true;
+ if (ptq->sel_start && ptq->sel_timestamp &&
+ ptq->timestamp < ptq->sel_timestamp)
+ ptq->have_sample = false;
intel_pt_sample_flags(ptq);
ret = auxtrace_heap__add(&pt->heap, queue_nr, ptq->timestamp);
if (ret)
@@ -1153,6 +1272,13 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
sample.branch_stack = (struct branch_stack *)&dummy_bs;
}
+ sample.cyc_cnt = ptq->ipc_cyc_cnt - ptq->last_br_cyc_cnt;
+ if (sample.cyc_cnt) {
+ sample.insn_cnt = ptq->ipc_insn_cnt - ptq->last_br_insn_cnt;
+ ptq->last_br_insn_cnt = ptq->ipc_insn_cnt;
+ ptq->last_br_cyc_cnt = ptq->ipc_cyc_cnt;
+ }
+
return intel_pt_deliver_synth_b_event(pt, event, &sample,
pt->branches_sample_type);
}
@@ -1208,6 +1334,13 @@ static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq)
sample.stream_id = ptq->pt->instructions_id;
sample.period = ptq->state->tot_insn_cnt - ptq->last_insn_cnt;
+ sample.cyc_cnt = ptq->ipc_cyc_cnt - ptq->last_in_cyc_cnt;
+ if (sample.cyc_cnt) {
+ sample.insn_cnt = ptq->ipc_insn_cnt - ptq->last_in_insn_cnt;
+ ptq->last_in_insn_cnt = ptq->ipc_insn_cnt;
+ ptq->last_in_cyc_cnt = ptq->ipc_cyc_cnt;
+ }
+
ptq->last_insn_cnt = ptq->state->tot_insn_cnt;
return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
@@ -1479,6 +1612,15 @@ static int intel_pt_sample(struct intel_pt_queue *ptq)
ptq->have_sample = false;
+ if (ptq->state->tot_cyc_cnt > ptq->ipc_cyc_cnt) {
+ /*
+ * Cycle count and instruction count only go together to create
+ * a valid IPC ratio when the cycle count changes.
+ */
+ ptq->ipc_insn_cnt = ptq->state->tot_insn_cnt;
+ ptq->ipc_cyc_cnt = ptq->state->tot_cyc_cnt;
+ }
+
if (pt->sample_pwr_events && (state->type & INTEL_PT_PWR_EVT)) {
if (state->type & INTEL_PT_CBR_CHG) {
err = intel_pt_synth_cbr_sample(ptq);
@@ -1641,10 +1783,83 @@ static void intel_pt_enable_sync_switch(struct intel_pt *pt)
}
}
+/*
+ * To filter against time ranges, it is only necessary to look at the next start
+ * or end time.
+ */
+static bool intel_pt_next_time(struct intel_pt_queue *ptq)
+{
+ struct intel_pt *pt = ptq->pt;
+
+ if (ptq->sel_start) {
+ /* Next time is an end time */
+ ptq->sel_start = false;
+ ptq->sel_timestamp = pt->time_ranges[ptq->sel_idx].end;
+ return true;
+ } else if (ptq->sel_idx + 1 < pt->range_cnt) {
+ /* Next time is a start time */
+ ptq->sel_start = true;
+ ptq->sel_idx += 1;
+ ptq->sel_timestamp = pt->time_ranges[ptq->sel_idx].start;
+ return true;
+ }
+
+ /* No next time */
+ return false;
+}
+
+static int intel_pt_time_filter(struct intel_pt_queue *ptq, u64 *ff_timestamp)
+{
+ int err;
+
+ while (1) {
+ if (ptq->sel_start) {
+ if (ptq->timestamp >= ptq->sel_timestamp) {
+ /* After start time, so consider next time */
+ intel_pt_next_time(ptq);
+ if (!ptq->sel_timestamp) {
+ /* No end time */
+ return 0;
+ }
+ /* Check against end time */
+ continue;
+ }
+ /* Before start time, so fast forward */
+ ptq->have_sample = false;
+ if (ptq->sel_timestamp > *ff_timestamp) {
+ if (ptq->sync_switch) {
+ intel_pt_next_tid(ptq->pt, ptq);
+ ptq->switch_state = INTEL_PT_SS_UNKNOWN;
+ }
+ *ff_timestamp = ptq->sel_timestamp;
+ err = intel_pt_fast_forward(ptq->decoder,
+ ptq->sel_timestamp);
+ if (err)
+ return err;
+ }
+ return 0;
+ } else if (ptq->timestamp > ptq->sel_timestamp) {
+ /* After end time, so consider next time */
+ if (!intel_pt_next_time(ptq)) {
+ /* No next time range, so stop decoding */
+ ptq->have_sample = false;
+ ptq->switch_state = INTEL_PT_SS_NOT_TRACING;
+ return 1;
+ }
+ /* Check against next start time */
+ continue;
+ } else {
+ /* Before end time */
+ return 0;
+ }
+ }
+}
+
static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp)
{
const struct intel_pt_state *state = ptq->state;
struct intel_pt *pt = ptq->pt;
+ u64 ff_timestamp = 0;
int err;
if (!pt->kernel_start) {
@@ -1709,6 +1924,12 @@ static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp)
ptq->timestamp = state->timestamp;
}
+ if (ptq->sel_timestamp) {
+ err = intel_pt_time_filter(ptq, &ff_timestamp);
+ if (err)
+ return err;
+ }
+
if (!pt->timeless_decoding && ptq->timestamp >= *timestamp) {
*timestamp = ptq->timestamp;
return 0;
@@ -2114,6 +2335,7 @@ static void intel_pt_free(struct perf_session *session)
thread__put(pt->unknown_thread);
addr_filters__exit(&pt->filts);
zfree(&pt->filter);
+ zfree(&pt->time_ranges);
free(pt);
}
@@ -2411,6 +2633,85 @@ static int intel_pt_perf_config(const char *var, const char *value, void *data)
return 0;
}
+/* Find least TSC which converts to ns or later */
+static u64 intel_pt_tsc_start(u64 ns, struct intel_pt *pt)
+{
+ u64 tsc, tm;
+
+ tsc = perf_time_to_tsc(ns, &pt->tc);
+
+ while (1) {
+ tm = tsc_to_perf_time(tsc, &pt->tc);
+ if (tm < ns)
+ break;
+ tsc -= 1;
+ }
+
+ while (tm < ns)
+ tm = tsc_to_perf_time(++tsc, &pt->tc);
+
+ return tsc;
+}
+
+/* Find greatest TSC which converts to ns or earlier */
+static u64 intel_pt_tsc_end(u64 ns, struct intel_pt *pt)
+{
+ u64 tsc, tm;
+
+ tsc = perf_time_to_tsc(ns, &pt->tc);
+
+ while (1) {
+ tm = tsc_to_perf_time(tsc, &pt->tc);
+ if (tm > ns)
+ break;
+ tsc += 1;
+ }
+
+ while (tm > ns)
+ tm = tsc_to_perf_time(--tsc, &pt->tc);
+
+ return tsc;
+}
+
+static int intel_pt_setup_time_ranges(struct intel_pt *pt,
+ struct itrace_synth_opts *opts)
+{
+ struct perf_time_interval *p = opts->ptime_range;
+ int n = opts->range_num;
+ int i;
+
+ if (!n || !p || pt->timeless_decoding)
+ return 0;
+
+ pt->time_ranges = calloc(n, sizeof(struct range));
+ if (!pt->time_ranges)
+ return -ENOMEM;
+
+ pt->range_cnt = n;
+
+ intel_pt_log("%s: %u range(s)\n", __func__, n);
+
+ for (i = 0; i < n; i++) {
+ struct range *r = &pt->time_ranges[i];
+ u64 ts = p[i].start;
+ u64 te = p[i].end;
+
+ /*
+ * Take care to ensure the TSC range matches the perf-time range
+ * when converted back to perf-time.
+ */
+ r->start = ts ? intel_pt_tsc_start(ts, pt) : 0;
+ r->end = te ? intel_pt_tsc_end(te, pt) : 0;
+
+ intel_pt_log("range %d: perf time interval: %"PRIu64" to %"PRIu64"\n",
+ i, ts, te);
+ intel_pt_log("range %d: TSC time interval: %#"PRIx64" to %#"PRIx64"\n",
+ i, r->start, r->end);
+ }
+
+ return 0;
+}
+
static const char * const intel_pt_info_fmts[] = {
[INTEL_PT_PMU_TYPE] = " PMU Type %"PRId64"\n",
[INTEL_PT_TIME_SHIFT] = " Time Shift %"PRIu64"\n",
@@ -2643,6 +2944,12 @@ int intel_pt_process_auxtrace_info(union perf_event *event,
pt->cbr2khz = tsc_freq / pt->max_non_turbo_ratio / 1000;
}
+ if (session->itrace_synth_opts) {
+ err = intel_pt_setup_time_ranges(pt, session->itrace_synth_opts);
+ if (err)
+ goto err_delete_thread;
+ }
+
if (pt->synth_opts.calls)
pt->branches_filter |= PERF_IP_FLAG_CALL | PERF_IP_FLAG_ASYNC |
PERF_IP_FLAG_TRACE_END;
@@ -2683,6 +2990,7 @@ err_free_queues:
err_free:
addr_filters__exit(&pt->filts);
zfree(&pt->filter);
+ zfree(&pt->time_ranges);
free(pt);
return err;
}