1*f6aab3d8Srobert //===-- PerfContextSwitchDecoder.cpp --======------------------------------===//
2*f6aab3d8Srobert // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
3*f6aab3d8Srobert // See https://llvm.org/LICENSE.txt for license information.
4*f6aab3d8Srobert // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
5*f6aab3d8Srobert //
6*f6aab3d8Srobert //===----------------------------------------------------------------------===//
7*f6aab3d8Srobert
8*f6aab3d8Srobert #include "PerfContextSwitchDecoder.h"
9*f6aab3d8Srobert #include <optional>
10*f6aab3d8Srobert
11*f6aab3d8Srobert using namespace lldb;
12*f6aab3d8Srobert using namespace lldb_private;
13*f6aab3d8Srobert using namespace lldb_private::trace_intel_pt;
14*f6aab3d8Srobert using namespace llvm;
15*f6aab3d8Srobert
16*f6aab3d8Srobert /// Copied from <linux/perf_event.h> to avoid depending on perf_event.h on
17*f6aab3d8Srobert /// non-linux platforms.
18*f6aab3d8Srobert /// \{
19*f6aab3d8Srobert #define PERF_RECORD_MISC_SWITCH_OUT (1 << 13)
20*f6aab3d8Srobert
21*f6aab3d8Srobert #define PERF_RECORD_LOST 2
22*f6aab3d8Srobert #define PERF_RECORD_THROTTLE 5
23*f6aab3d8Srobert #define PERF_RECORD_UNTHROTTLE 6
24*f6aab3d8Srobert #define PERF_RECORD_LOST_SAMPLES 13
25*f6aab3d8Srobert #define PERF_RECORD_SWITCH_CPU_WIDE 15
26*f6aab3d8Srobert #define PERF_RECORD_MAX 19
27*f6aab3d8Srobert
28*f6aab3d8Srobert struct perf_event_header {
29*f6aab3d8Srobert uint32_t type;
30*f6aab3d8Srobert uint16_t misc;
31*f6aab3d8Srobert uint16_t size;
32*f6aab3d8Srobert
33*f6aab3d8Srobert /// \return
34*f6aab3d8Srobert /// An \a llvm::Error if the record looks obviously wrong, or \a
35*f6aab3d8Srobert /// llvm::Error::success() otherwise.
SanityCheckperf_event_header36*f6aab3d8Srobert Error SanityCheck() const {
37*f6aab3d8Srobert // The following checks are based on visual inspection of the records and
38*f6aab3d8Srobert // enums in
39*f6aab3d8Srobert // https://elixir.bootlin.com/linux/v4.8/source/include/uapi/linux/perf_event.h
40*f6aab3d8Srobert // See PERF_RECORD_MAX, PERF_RECORD_SWITCH and the data similar records
41*f6aab3d8Srobert // hold.
42*f6aab3d8Srobert
43*f6aab3d8Srobert // A record of too many uint64_t's or more should mean that the data is
44*f6aab3d8Srobert // wrong
45*f6aab3d8Srobert const uint64_t max_valid_size_bytes = 8000;
46*f6aab3d8Srobert if (size == 0 || size > max_valid_size_bytes)
47*f6aab3d8Srobert return createStringError(
48*f6aab3d8Srobert inconvertibleErrorCode(),
49*f6aab3d8Srobert formatv("A record of {0} bytes was found.", size));
50*f6aab3d8Srobert
51*f6aab3d8Srobert // We add some numbers to PERF_RECORD_MAX because some systems might have
52*f6aab3d8Srobert // custom records. In any case, we are looking only for abnormal data.
53*f6aab3d8Srobert if (type >= PERF_RECORD_MAX + 100)
54*f6aab3d8Srobert return createStringError(
55*f6aab3d8Srobert inconvertibleErrorCode(),
56*f6aab3d8Srobert formatv("Invalid record type {0} was found.", type));
57*f6aab3d8Srobert return Error::success();
58*f6aab3d8Srobert }
59*f6aab3d8Srobert
IsContextSwitchRecordperf_event_header60*f6aab3d8Srobert bool IsContextSwitchRecord() const {
61*f6aab3d8Srobert return type == PERF_RECORD_SWITCH_CPU_WIDE;
62*f6aab3d8Srobert }
63*f6aab3d8Srobert
IsErrorRecordperf_event_header64*f6aab3d8Srobert bool IsErrorRecord() const {
65*f6aab3d8Srobert return type == PERF_RECORD_LOST || type == PERF_RECORD_THROTTLE ||
66*f6aab3d8Srobert type == PERF_RECORD_UNTHROTTLE || type == PERF_RECORD_LOST_SAMPLES;
67*f6aab3d8Srobert }
68*f6aab3d8Srobert };
69*f6aab3d8Srobert /// \}
70*f6aab3d8Srobert
71*f6aab3d8Srobert /// Record found in the perf_event context switch traces. It might contain
72*f6aab3d8Srobert /// additional fields in memory, but header.size should have the actual size
73*f6aab3d8Srobert /// of the record.
74*f6aab3d8Srobert struct PerfContextSwitchRecord {
75*f6aab3d8Srobert struct perf_event_header header;
76*f6aab3d8Srobert uint32_t next_prev_pid;
77*f6aab3d8Srobert uint32_t next_prev_tid;
78*f6aab3d8Srobert uint32_t pid, tid;
79*f6aab3d8Srobert uint64_t time_in_nanos;
80*f6aab3d8Srobert
IsOutPerfContextSwitchRecord81*f6aab3d8Srobert bool IsOut() const { return header.misc & PERF_RECORD_MISC_SWITCH_OUT; }
82*f6aab3d8Srobert };
83*f6aab3d8Srobert
84*f6aab3d8Srobert /// Record produced after parsing the raw context switch trace produce by
85*f6aab3d8Srobert /// perf_event. A major difference between this struct and
86*f6aab3d8Srobert /// PerfContextSwitchRecord is that this one uses tsc instead of nanos.
87*f6aab3d8Srobert struct ContextSwitchRecord {
88*f6aab3d8Srobert uint64_t tsc;
89*f6aab3d8Srobert /// Whether the switch is in or out
90*f6aab3d8Srobert bool is_out;
91*f6aab3d8Srobert /// pid = 0 and tid = 0 indicate the swapper or idle process, which normally
92*f6aab3d8Srobert /// runs after a context switch out of a normal user thread.
93*f6aab3d8Srobert lldb::pid_t pid;
94*f6aab3d8Srobert lldb::tid_t tid;
95*f6aab3d8Srobert
IsOutContextSwitchRecord96*f6aab3d8Srobert bool IsOut() const { return is_out; }
97*f6aab3d8Srobert
IsInContextSwitchRecord98*f6aab3d8Srobert bool IsIn() const { return !is_out; }
99*f6aab3d8Srobert };
100*f6aab3d8Srobert
GetLowestKnownTSC() const101*f6aab3d8Srobert uint64_t ThreadContinuousExecution::GetLowestKnownTSC() const {
102*f6aab3d8Srobert switch (variant) {
103*f6aab3d8Srobert case Variant::Complete:
104*f6aab3d8Srobert return tscs.complete.start;
105*f6aab3d8Srobert case Variant::OnlyStart:
106*f6aab3d8Srobert return tscs.only_start.start;
107*f6aab3d8Srobert case Variant::OnlyEnd:
108*f6aab3d8Srobert return tscs.only_end.end;
109*f6aab3d8Srobert case Variant::HintedEnd:
110*f6aab3d8Srobert return tscs.hinted_end.start;
111*f6aab3d8Srobert case Variant::HintedStart:
112*f6aab3d8Srobert return tscs.hinted_start.end;
113*f6aab3d8Srobert }
114*f6aab3d8Srobert }
115*f6aab3d8Srobert
GetStartTSC() const116*f6aab3d8Srobert uint64_t ThreadContinuousExecution::GetStartTSC() const {
117*f6aab3d8Srobert switch (variant) {
118*f6aab3d8Srobert case Variant::Complete:
119*f6aab3d8Srobert return tscs.complete.start;
120*f6aab3d8Srobert case Variant::OnlyStart:
121*f6aab3d8Srobert return tscs.only_start.start;
122*f6aab3d8Srobert case Variant::OnlyEnd:
123*f6aab3d8Srobert return 0;
124*f6aab3d8Srobert case Variant::HintedEnd:
125*f6aab3d8Srobert return tscs.hinted_end.start;
126*f6aab3d8Srobert case Variant::HintedStart:
127*f6aab3d8Srobert return tscs.hinted_start.hinted_start;
128*f6aab3d8Srobert }
129*f6aab3d8Srobert }
130*f6aab3d8Srobert
GetEndTSC() const131*f6aab3d8Srobert uint64_t ThreadContinuousExecution::GetEndTSC() const {
132*f6aab3d8Srobert switch (variant) {
133*f6aab3d8Srobert case Variant::Complete:
134*f6aab3d8Srobert return tscs.complete.end;
135*f6aab3d8Srobert case Variant::OnlyStart:
136*f6aab3d8Srobert return std::numeric_limits<uint64_t>::max();
137*f6aab3d8Srobert case Variant::OnlyEnd:
138*f6aab3d8Srobert return tscs.only_end.end;
139*f6aab3d8Srobert case Variant::HintedEnd:
140*f6aab3d8Srobert return tscs.hinted_end.hinted_end;
141*f6aab3d8Srobert case Variant::HintedStart:
142*f6aab3d8Srobert return tscs.hinted_start.end;
143*f6aab3d8Srobert }
144*f6aab3d8Srobert }
145*f6aab3d8Srobert
CreateCompleteExecution(lldb::cpu_id_t cpu_id,lldb::tid_t tid,lldb::pid_t pid,uint64_t start,uint64_t end)146*f6aab3d8Srobert ThreadContinuousExecution ThreadContinuousExecution::CreateCompleteExecution(
147*f6aab3d8Srobert lldb::cpu_id_t cpu_id, lldb::tid_t tid, lldb::pid_t pid, uint64_t start,
148*f6aab3d8Srobert uint64_t end) {
149*f6aab3d8Srobert ThreadContinuousExecution o(cpu_id, tid, pid);
150*f6aab3d8Srobert o.variant = Variant::Complete;
151*f6aab3d8Srobert o.tscs.complete.start = start;
152*f6aab3d8Srobert o.tscs.complete.end = end;
153*f6aab3d8Srobert return o;
154*f6aab3d8Srobert }
155*f6aab3d8Srobert
CreateHintedStartExecution(lldb::cpu_id_t cpu_id,lldb::tid_t tid,lldb::pid_t pid,uint64_t hinted_start,uint64_t end)156*f6aab3d8Srobert ThreadContinuousExecution ThreadContinuousExecution::CreateHintedStartExecution(
157*f6aab3d8Srobert lldb::cpu_id_t cpu_id, lldb::tid_t tid, lldb::pid_t pid,
158*f6aab3d8Srobert uint64_t hinted_start, uint64_t end) {
159*f6aab3d8Srobert ThreadContinuousExecution o(cpu_id, tid, pid);
160*f6aab3d8Srobert o.variant = Variant::HintedStart;
161*f6aab3d8Srobert o.tscs.hinted_start.hinted_start = hinted_start;
162*f6aab3d8Srobert o.tscs.hinted_start.end = end;
163*f6aab3d8Srobert return o;
164*f6aab3d8Srobert }
165*f6aab3d8Srobert
CreateHintedEndExecution(lldb::cpu_id_t cpu_id,lldb::tid_t tid,lldb::pid_t pid,uint64_t start,uint64_t hinted_end)166*f6aab3d8Srobert ThreadContinuousExecution ThreadContinuousExecution::CreateHintedEndExecution(
167*f6aab3d8Srobert lldb::cpu_id_t cpu_id, lldb::tid_t tid, lldb::pid_t pid, uint64_t start,
168*f6aab3d8Srobert uint64_t hinted_end) {
169*f6aab3d8Srobert ThreadContinuousExecution o(cpu_id, tid, pid);
170*f6aab3d8Srobert o.variant = Variant::HintedEnd;
171*f6aab3d8Srobert o.tscs.hinted_end.start = start;
172*f6aab3d8Srobert o.tscs.hinted_end.hinted_end = hinted_end;
173*f6aab3d8Srobert return o;
174*f6aab3d8Srobert }
175*f6aab3d8Srobert
CreateOnlyEndExecution(lldb::cpu_id_t cpu_id,lldb::tid_t tid,lldb::pid_t pid,uint64_t end)176*f6aab3d8Srobert ThreadContinuousExecution ThreadContinuousExecution::CreateOnlyEndExecution(
177*f6aab3d8Srobert lldb::cpu_id_t cpu_id, lldb::tid_t tid, lldb::pid_t pid, uint64_t end) {
178*f6aab3d8Srobert ThreadContinuousExecution o(cpu_id, tid, pid);
179*f6aab3d8Srobert o.variant = Variant::OnlyEnd;
180*f6aab3d8Srobert o.tscs.only_end.end = end;
181*f6aab3d8Srobert return o;
182*f6aab3d8Srobert }
183*f6aab3d8Srobert
CreateOnlyStartExecution(lldb::cpu_id_t cpu_id,lldb::tid_t tid,lldb::pid_t pid,uint64_t start)184*f6aab3d8Srobert ThreadContinuousExecution ThreadContinuousExecution::CreateOnlyStartExecution(
185*f6aab3d8Srobert lldb::cpu_id_t cpu_id, lldb::tid_t tid, lldb::pid_t pid, uint64_t start) {
186*f6aab3d8Srobert ThreadContinuousExecution o(cpu_id, tid, pid);
187*f6aab3d8Srobert o.variant = Variant::OnlyStart;
188*f6aab3d8Srobert o.tscs.only_start.start = start;
189*f6aab3d8Srobert return o;
190*f6aab3d8Srobert }
191*f6aab3d8Srobert
RecoverExecutionsFromConsecutiveRecords(cpu_id_t cpu_id,const LinuxPerfZeroTscConversion & tsc_conversion,const ContextSwitchRecord & current_record,const std::optional<ContextSwitchRecord> & prev_record,std::function<void (const ThreadContinuousExecution & execution)> on_new_execution)192*f6aab3d8Srobert static Error RecoverExecutionsFromConsecutiveRecords(
193*f6aab3d8Srobert cpu_id_t cpu_id, const LinuxPerfZeroTscConversion &tsc_conversion,
194*f6aab3d8Srobert const ContextSwitchRecord ¤t_record,
195*f6aab3d8Srobert const std::optional<ContextSwitchRecord> &prev_record,
196*f6aab3d8Srobert std::function<void(const ThreadContinuousExecution &execution)>
197*f6aab3d8Srobert on_new_execution) {
198*f6aab3d8Srobert if (!prev_record) {
199*f6aab3d8Srobert if (current_record.IsOut()) {
200*f6aab3d8Srobert on_new_execution(ThreadContinuousExecution::CreateOnlyEndExecution(
201*f6aab3d8Srobert cpu_id, current_record.tid, current_record.pid, current_record.tsc));
202*f6aab3d8Srobert }
203*f6aab3d8Srobert // The 'in' case will be handled later when we try to look for its end
204*f6aab3d8Srobert return Error::success();
205*f6aab3d8Srobert }
206*f6aab3d8Srobert
207*f6aab3d8Srobert const ContextSwitchRecord &prev = *prev_record;
208*f6aab3d8Srobert if (prev.tsc >= current_record.tsc)
209*f6aab3d8Srobert return createStringError(
210*f6aab3d8Srobert inconvertibleErrorCode(),
211*f6aab3d8Srobert formatv("A context switch record doesn't happen after the previous "
212*f6aab3d8Srobert "record. Previous TSC= {0}, current TSC = {1}.",
213*f6aab3d8Srobert prev.tsc, current_record.tsc));
214*f6aab3d8Srobert
215*f6aab3d8Srobert if (current_record.IsIn() && prev.IsIn()) {
216*f6aab3d8Srobert // We found two consecutive ins, which means that we didn't capture
217*f6aab3d8Srobert // the end of the previous execution.
218*f6aab3d8Srobert on_new_execution(ThreadContinuousExecution::CreateHintedEndExecution(
219*f6aab3d8Srobert cpu_id, prev.tid, prev.pid, prev.tsc, current_record.tsc - 1));
220*f6aab3d8Srobert } else if (current_record.IsOut() && prev.IsOut()) {
221*f6aab3d8Srobert // We found two consecutive outs, that means that we didn't capture
222*f6aab3d8Srobert // the beginning of the current execution.
223*f6aab3d8Srobert on_new_execution(ThreadContinuousExecution::CreateHintedStartExecution(
224*f6aab3d8Srobert cpu_id, current_record.tid, current_record.pid, prev.tsc + 1,
225*f6aab3d8Srobert current_record.tsc));
226*f6aab3d8Srobert } else if (current_record.IsOut() && prev.IsIn()) {
227*f6aab3d8Srobert if (current_record.pid == prev.pid && current_record.tid == prev.tid) {
228*f6aab3d8Srobert /// A complete execution
229*f6aab3d8Srobert on_new_execution(ThreadContinuousExecution::CreateCompleteExecution(
230*f6aab3d8Srobert cpu_id, current_record.tid, current_record.pid, prev.tsc,
231*f6aab3d8Srobert current_record.tsc));
232*f6aab3d8Srobert } else {
233*f6aab3d8Srobert // An out after the in of a different thread. The first one doesn't
234*f6aab3d8Srobert // have an end, and the second one doesn't have a start.
235*f6aab3d8Srobert on_new_execution(ThreadContinuousExecution::CreateHintedEndExecution(
236*f6aab3d8Srobert cpu_id, prev.tid, prev.pid, prev.tsc, current_record.tsc - 1));
237*f6aab3d8Srobert on_new_execution(ThreadContinuousExecution::CreateHintedStartExecution(
238*f6aab3d8Srobert cpu_id, current_record.tid, current_record.pid, prev.tsc + 1,
239*f6aab3d8Srobert current_record.tsc));
240*f6aab3d8Srobert }
241*f6aab3d8Srobert }
242*f6aab3d8Srobert return Error::success();
243*f6aab3d8Srobert }
244*f6aab3d8Srobert
245*f6aab3d8Srobert Expected<std::vector<ThreadContinuousExecution>>
DecodePerfContextSwitchTrace(ArrayRef<uint8_t> data,cpu_id_t cpu_id,const LinuxPerfZeroTscConversion & tsc_conversion)246*f6aab3d8Srobert lldb_private::trace_intel_pt::DecodePerfContextSwitchTrace(
247*f6aab3d8Srobert ArrayRef<uint8_t> data, cpu_id_t cpu_id,
248*f6aab3d8Srobert const LinuxPerfZeroTscConversion &tsc_conversion) {
249*f6aab3d8Srobert
250*f6aab3d8Srobert std::vector<ThreadContinuousExecution> executions;
251*f6aab3d8Srobert
252*f6aab3d8Srobert // This offset is used to create the error message in case of failures.
253*f6aab3d8Srobert size_t offset = 0;
254*f6aab3d8Srobert
255*f6aab3d8Srobert auto do_decode = [&]() -> Error {
256*f6aab3d8Srobert std::optional<ContextSwitchRecord> prev_record;
257*f6aab3d8Srobert while (offset < data.size()) {
258*f6aab3d8Srobert const perf_event_header &perf_record =
259*f6aab3d8Srobert *reinterpret_cast<const perf_event_header *>(data.data() + offset);
260*f6aab3d8Srobert if (Error err = perf_record.SanityCheck())
261*f6aab3d8Srobert return err;
262*f6aab3d8Srobert
263*f6aab3d8Srobert if (perf_record.IsContextSwitchRecord()) {
264*f6aab3d8Srobert const PerfContextSwitchRecord &context_switch_record =
265*f6aab3d8Srobert *reinterpret_cast<const PerfContextSwitchRecord *>(data.data() +
266*f6aab3d8Srobert offset);
267*f6aab3d8Srobert ContextSwitchRecord record{
268*f6aab3d8Srobert tsc_conversion.ToTSC(context_switch_record.time_in_nanos),
269*f6aab3d8Srobert context_switch_record.IsOut(),
270*f6aab3d8Srobert static_cast<lldb::pid_t>(context_switch_record.pid),
271*f6aab3d8Srobert static_cast<lldb::tid_t>(context_switch_record.tid)};
272*f6aab3d8Srobert
273*f6aab3d8Srobert if (Error err = RecoverExecutionsFromConsecutiveRecords(
274*f6aab3d8Srobert cpu_id, tsc_conversion, record, prev_record,
275*f6aab3d8Srobert [&](const ThreadContinuousExecution &execution) {
276*f6aab3d8Srobert executions.push_back(execution);
277*f6aab3d8Srobert }))
278*f6aab3d8Srobert return err;
279*f6aab3d8Srobert
280*f6aab3d8Srobert prev_record = record;
281*f6aab3d8Srobert }
282*f6aab3d8Srobert offset += perf_record.size;
283*f6aab3d8Srobert }
284*f6aab3d8Srobert
285*f6aab3d8Srobert // We might have an incomplete last record
286*f6aab3d8Srobert if (prev_record && prev_record->IsIn())
287*f6aab3d8Srobert executions.push_back(ThreadContinuousExecution::CreateOnlyStartExecution(
288*f6aab3d8Srobert cpu_id, prev_record->tid, prev_record->pid, prev_record->tsc));
289*f6aab3d8Srobert return Error::success();
290*f6aab3d8Srobert };
291*f6aab3d8Srobert
292*f6aab3d8Srobert if (Error err = do_decode())
293*f6aab3d8Srobert return createStringError(inconvertibleErrorCode(),
294*f6aab3d8Srobert formatv("Malformed perf context switch trace for "
295*f6aab3d8Srobert "cpu {0} at offset {1}. {2}",
296*f6aab3d8Srobert cpu_id, offset, toString(std::move(err))));
297*f6aab3d8Srobert
298*f6aab3d8Srobert return executions;
299*f6aab3d8Srobert }
300*f6aab3d8Srobert
301*f6aab3d8Srobert Expected<std::vector<uint8_t>>
FilterProcessesFromContextSwitchTrace(llvm::ArrayRef<uint8_t> data,const std::set<lldb::pid_t> & pids)302*f6aab3d8Srobert lldb_private::trace_intel_pt::FilterProcessesFromContextSwitchTrace(
303*f6aab3d8Srobert llvm::ArrayRef<uint8_t> data, const std::set<lldb::pid_t> &pids) {
304*f6aab3d8Srobert size_t offset = 0;
305*f6aab3d8Srobert std::vector<uint8_t> out_data;
306*f6aab3d8Srobert
307*f6aab3d8Srobert while (offset < data.size()) {
308*f6aab3d8Srobert const perf_event_header &perf_record =
309*f6aab3d8Srobert *reinterpret_cast<const perf_event_header *>(data.data() + offset);
310*f6aab3d8Srobert if (Error err = perf_record.SanityCheck())
311*f6aab3d8Srobert return std::move(err);
312*f6aab3d8Srobert bool should_copy = false;
313*f6aab3d8Srobert if (perf_record.IsContextSwitchRecord()) {
314*f6aab3d8Srobert const PerfContextSwitchRecord &context_switch_record =
315*f6aab3d8Srobert *reinterpret_cast<const PerfContextSwitchRecord *>(data.data() +
316*f6aab3d8Srobert offset);
317*f6aab3d8Srobert if (pids.count(context_switch_record.pid))
318*f6aab3d8Srobert should_copy = true;
319*f6aab3d8Srobert } else if (perf_record.IsErrorRecord()) {
320*f6aab3d8Srobert should_copy = true;
321*f6aab3d8Srobert }
322*f6aab3d8Srobert
323*f6aab3d8Srobert if (should_copy) {
324*f6aab3d8Srobert for (size_t i = 0; i < perf_record.size; i++) {
325*f6aab3d8Srobert out_data.push_back(data[offset + i]);
326*f6aab3d8Srobert }
327*f6aab3d8Srobert }
328*f6aab3d8Srobert
329*f6aab3d8Srobert offset += perf_record.size;
330*f6aab3d8Srobert }
331*f6aab3d8Srobert return out_data;
332*f6aab3d8Srobert }
333