1*81ad6265SDimitry Andric //===-- PerfContextSwitchDecoder.cpp --======------------------------------===// 2*81ad6265SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 3*81ad6265SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 4*81ad6265SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 5*81ad6265SDimitry Andric // 6*81ad6265SDimitry Andric //===----------------------------------------------------------------------===// 7*81ad6265SDimitry Andric 8*81ad6265SDimitry Andric #include "PerfContextSwitchDecoder.h" 9*81ad6265SDimitry Andric 10*81ad6265SDimitry Andric using namespace lldb; 11*81ad6265SDimitry Andric using namespace lldb_private; 12*81ad6265SDimitry Andric using namespace lldb_private::trace_intel_pt; 13*81ad6265SDimitry Andric using namespace llvm; 14*81ad6265SDimitry Andric 15*81ad6265SDimitry Andric /// Copied from <linux/perf_event.h> to avoid depending on perf_event.h on 16*81ad6265SDimitry Andric /// non-linux platforms. 17*81ad6265SDimitry Andric /// \{ 18*81ad6265SDimitry Andric #define PERF_RECORD_MISC_SWITCH_OUT (1 << 13) 19*81ad6265SDimitry Andric #define PERF_RECORD_MAX 19 20*81ad6265SDimitry Andric #define PERF_RECORD_SWITCH_CPU_WIDE 15 21*81ad6265SDimitry Andric 22*81ad6265SDimitry Andric struct perf_event_header { 23*81ad6265SDimitry Andric uint32_t type; 24*81ad6265SDimitry Andric uint16_t misc; 25*81ad6265SDimitry Andric uint16_t size; 26*81ad6265SDimitry Andric 27*81ad6265SDimitry Andric /// \return 28*81ad6265SDimitry Andric /// An \a llvm::Error if the record looks obviously wrong, or \a 29*81ad6265SDimitry Andric /// llvm::Error::success() otherwise. 30*81ad6265SDimitry Andric Error SanityCheck() const { 31*81ad6265SDimitry Andric // The following checks are based on visual inspection of the records and 32*81ad6265SDimitry Andric // enums in 33*81ad6265SDimitry Andric // https://elixir.bootlin.com/linux/v4.8/source/include/uapi/linux/perf_event.h 34*81ad6265SDimitry Andric // See PERF_RECORD_MAX, PERF_RECORD_SWITCH and the data similar records 35*81ad6265SDimitry Andric // hold. 36*81ad6265SDimitry Andric 37*81ad6265SDimitry Andric // A record of too many uint64_t's or more should mean that the data is 38*81ad6265SDimitry Andric // wrong 39*81ad6265SDimitry Andric const uint64_t max_valid_size_bytes = 8000; 40*81ad6265SDimitry Andric if (size == 0 || size > max_valid_size_bytes) 41*81ad6265SDimitry Andric return createStringError( 42*81ad6265SDimitry Andric inconvertibleErrorCode(), 43*81ad6265SDimitry Andric formatv("A record of {0} bytes was found.", size)); 44*81ad6265SDimitry Andric 45*81ad6265SDimitry Andric // We add some numbers to PERF_RECORD_MAX because some systems might have 46*81ad6265SDimitry Andric // custom records. In any case, we are looking only for abnormal data. 47*81ad6265SDimitry Andric if (type >= PERF_RECORD_MAX + 100) 48*81ad6265SDimitry Andric return createStringError( 49*81ad6265SDimitry Andric inconvertibleErrorCode(), 50*81ad6265SDimitry Andric formatv("Invalid record type {0} was found.", type)); 51*81ad6265SDimitry Andric return Error::success(); 52*81ad6265SDimitry Andric } 53*81ad6265SDimitry Andric 54*81ad6265SDimitry Andric bool IsContextSwitchRecord() const { 55*81ad6265SDimitry Andric return type == PERF_RECORD_SWITCH_CPU_WIDE; 56*81ad6265SDimitry Andric } 57*81ad6265SDimitry Andric }; 58*81ad6265SDimitry Andric /// \} 59*81ad6265SDimitry Andric 60*81ad6265SDimitry Andric /// Record found in the perf_event context switch traces. It might contain 61*81ad6265SDimitry Andric /// additional fields in memory, but header.size should have the actual size 62*81ad6265SDimitry Andric /// of the record. 63*81ad6265SDimitry Andric struct PerfContextSwitchRecord { 64*81ad6265SDimitry Andric struct perf_event_header header; 65*81ad6265SDimitry Andric uint32_t next_prev_pid; 66*81ad6265SDimitry Andric uint32_t next_prev_tid; 67*81ad6265SDimitry Andric uint32_t pid, tid; 68*81ad6265SDimitry Andric uint64_t time_in_nanos; 69*81ad6265SDimitry Andric 70*81ad6265SDimitry Andric bool IsOut() const { return header.misc & PERF_RECORD_MISC_SWITCH_OUT; } 71*81ad6265SDimitry Andric }; 72*81ad6265SDimitry Andric 73*81ad6265SDimitry Andric /// Record produced after parsing the raw context switch trace produce by 74*81ad6265SDimitry Andric /// perf_event. A major difference between this struct and 75*81ad6265SDimitry Andric /// PerfContextSwitchRecord is that this one uses tsc instead of nanos. 76*81ad6265SDimitry Andric struct ContextSwitchRecord { 77*81ad6265SDimitry Andric uint64_t tsc; 78*81ad6265SDimitry Andric /// Whether the switch is in or out 79*81ad6265SDimitry Andric bool is_out; 80*81ad6265SDimitry Andric /// pid = 0 and tid = 0 indicate the swapper or idle process, which normally 81*81ad6265SDimitry Andric /// runs after a context switch out of a normal user thread. 82*81ad6265SDimitry Andric lldb::pid_t pid; 83*81ad6265SDimitry Andric lldb::tid_t tid; 84*81ad6265SDimitry Andric 85*81ad6265SDimitry Andric bool IsOut() const { return is_out; } 86*81ad6265SDimitry Andric 87*81ad6265SDimitry Andric bool IsIn() const { return !is_out; } 88*81ad6265SDimitry Andric }; 89*81ad6265SDimitry Andric 90*81ad6265SDimitry Andric uint64_t ThreadContinuousExecution::GetLowestKnownTSC() const { 91*81ad6265SDimitry Andric switch (variant) { 92*81ad6265SDimitry Andric case Variant::Complete: 93*81ad6265SDimitry Andric return tscs.complete.start; 94*81ad6265SDimitry Andric case Variant::OnlyStart: 95*81ad6265SDimitry Andric return tscs.only_start.start; 96*81ad6265SDimitry Andric case Variant::OnlyEnd: 97*81ad6265SDimitry Andric return tscs.only_end.end; 98*81ad6265SDimitry Andric case Variant::HintedEnd: 99*81ad6265SDimitry Andric return tscs.hinted_end.start; 100*81ad6265SDimitry Andric case Variant::HintedStart: 101*81ad6265SDimitry Andric return tscs.hinted_start.end; 102*81ad6265SDimitry Andric } 103*81ad6265SDimitry Andric } 104*81ad6265SDimitry Andric 105*81ad6265SDimitry Andric uint64_t ThreadContinuousExecution::GetStartTSC() const { 106*81ad6265SDimitry Andric switch (variant) { 107*81ad6265SDimitry Andric case Variant::Complete: 108*81ad6265SDimitry Andric return tscs.complete.start; 109*81ad6265SDimitry Andric case Variant::OnlyStart: 110*81ad6265SDimitry Andric return tscs.only_start.start; 111*81ad6265SDimitry Andric case Variant::OnlyEnd: 112*81ad6265SDimitry Andric return 0; 113*81ad6265SDimitry Andric case Variant::HintedEnd: 114*81ad6265SDimitry Andric return tscs.hinted_end.start; 115*81ad6265SDimitry Andric case Variant::HintedStart: 116*81ad6265SDimitry Andric return tscs.hinted_start.hinted_start; 117*81ad6265SDimitry Andric } 118*81ad6265SDimitry Andric } 119*81ad6265SDimitry Andric 120*81ad6265SDimitry Andric uint64_t ThreadContinuousExecution::GetEndTSC() const { 121*81ad6265SDimitry Andric switch (variant) { 122*81ad6265SDimitry Andric case Variant::Complete: 123*81ad6265SDimitry Andric return tscs.complete.end; 124*81ad6265SDimitry Andric case Variant::OnlyStart: 125*81ad6265SDimitry Andric return std::numeric_limits<uint64_t>::max(); 126*81ad6265SDimitry Andric case Variant::OnlyEnd: 127*81ad6265SDimitry Andric return tscs.only_end.end; 128*81ad6265SDimitry Andric case Variant::HintedEnd: 129*81ad6265SDimitry Andric return tscs.hinted_end.hinted_end; 130*81ad6265SDimitry Andric case Variant::HintedStart: 131*81ad6265SDimitry Andric return tscs.hinted_start.end; 132*81ad6265SDimitry Andric } 133*81ad6265SDimitry Andric } 134*81ad6265SDimitry Andric 135*81ad6265SDimitry Andric ThreadContinuousExecution ThreadContinuousExecution::CreateCompleteExecution( 136*81ad6265SDimitry Andric lldb::cpu_id_t cpu_id, lldb::tid_t tid, lldb::pid_t pid, uint64_t start, 137*81ad6265SDimitry Andric uint64_t end) { 138*81ad6265SDimitry Andric ThreadContinuousExecution o(cpu_id, tid, pid); 139*81ad6265SDimitry Andric o.variant = Variant::Complete; 140*81ad6265SDimitry Andric o.tscs.complete.start = start; 141*81ad6265SDimitry Andric o.tscs.complete.end = end; 142*81ad6265SDimitry Andric return o; 143*81ad6265SDimitry Andric } 144*81ad6265SDimitry Andric 145*81ad6265SDimitry Andric ThreadContinuousExecution ThreadContinuousExecution::CreateHintedStartExecution( 146*81ad6265SDimitry Andric lldb::cpu_id_t cpu_id, lldb::tid_t tid, lldb::pid_t pid, 147*81ad6265SDimitry Andric uint64_t hinted_start, uint64_t end) { 148*81ad6265SDimitry Andric ThreadContinuousExecution o(cpu_id, tid, pid); 149*81ad6265SDimitry Andric o.variant = Variant::HintedStart; 150*81ad6265SDimitry Andric o.tscs.hinted_start.hinted_start = hinted_start; 151*81ad6265SDimitry Andric o.tscs.hinted_start.end = end; 152*81ad6265SDimitry Andric return o; 153*81ad6265SDimitry Andric } 154*81ad6265SDimitry Andric 155*81ad6265SDimitry Andric ThreadContinuousExecution ThreadContinuousExecution::CreateHintedEndExecution( 156*81ad6265SDimitry Andric lldb::cpu_id_t cpu_id, lldb::tid_t tid, lldb::pid_t pid, uint64_t start, 157*81ad6265SDimitry Andric uint64_t hinted_end) { 158*81ad6265SDimitry Andric ThreadContinuousExecution o(cpu_id, tid, pid); 159*81ad6265SDimitry Andric o.variant = Variant::HintedEnd; 160*81ad6265SDimitry Andric o.tscs.hinted_end.start = start; 161*81ad6265SDimitry Andric o.tscs.hinted_end.hinted_end = hinted_end; 162*81ad6265SDimitry Andric return o; 163*81ad6265SDimitry Andric } 164*81ad6265SDimitry Andric 165*81ad6265SDimitry Andric ThreadContinuousExecution ThreadContinuousExecution::CreateOnlyEndExecution( 166*81ad6265SDimitry Andric lldb::cpu_id_t cpu_id, lldb::tid_t tid, lldb::pid_t pid, uint64_t end) { 167*81ad6265SDimitry Andric ThreadContinuousExecution o(cpu_id, tid, pid); 168*81ad6265SDimitry Andric o.variant = Variant::OnlyEnd; 169*81ad6265SDimitry Andric o.tscs.only_end.end = end; 170*81ad6265SDimitry Andric return o; 171*81ad6265SDimitry Andric } 172*81ad6265SDimitry Andric 173*81ad6265SDimitry Andric ThreadContinuousExecution ThreadContinuousExecution::CreateOnlyStartExecution( 174*81ad6265SDimitry Andric lldb::cpu_id_t cpu_id, lldb::tid_t tid, lldb::pid_t pid, uint64_t start) { 175*81ad6265SDimitry Andric ThreadContinuousExecution o(cpu_id, tid, pid); 176*81ad6265SDimitry Andric o.variant = Variant::OnlyStart; 177*81ad6265SDimitry Andric o.tscs.only_start.start = start; 178*81ad6265SDimitry Andric return o; 179*81ad6265SDimitry Andric } 180*81ad6265SDimitry Andric 181*81ad6265SDimitry Andric static Error RecoverExecutionsFromConsecutiveRecords( 182*81ad6265SDimitry Andric cpu_id_t cpu_id, const LinuxPerfZeroTscConversion &tsc_conversion, 183*81ad6265SDimitry Andric const ContextSwitchRecord ¤t_record, 184*81ad6265SDimitry Andric const Optional<ContextSwitchRecord> &prev_record, 185*81ad6265SDimitry Andric std::function<void(const ThreadContinuousExecution &execution)> 186*81ad6265SDimitry Andric on_new_execution) { 187*81ad6265SDimitry Andric if (!prev_record) { 188*81ad6265SDimitry Andric if (current_record.IsOut()) { 189*81ad6265SDimitry Andric on_new_execution(ThreadContinuousExecution::CreateOnlyEndExecution( 190*81ad6265SDimitry Andric cpu_id, current_record.tid, current_record.pid, current_record.tsc)); 191*81ad6265SDimitry Andric } 192*81ad6265SDimitry Andric // The 'in' case will be handled later when we try to look for its end 193*81ad6265SDimitry Andric return Error::success(); 194*81ad6265SDimitry Andric } 195*81ad6265SDimitry Andric 196*81ad6265SDimitry Andric const ContextSwitchRecord &prev = *prev_record; 197*81ad6265SDimitry Andric if (prev.tsc >= current_record.tsc) 198*81ad6265SDimitry Andric return createStringError( 199*81ad6265SDimitry Andric inconvertibleErrorCode(), 200*81ad6265SDimitry Andric formatv("A context switch record doesn't happen after the previous " 201*81ad6265SDimitry Andric "record. Previous TSC= {0}, current TSC = {1}.", 202*81ad6265SDimitry Andric prev.tsc, current_record.tsc)); 203*81ad6265SDimitry Andric 204*81ad6265SDimitry Andric if (current_record.IsIn() && prev.IsIn()) { 205*81ad6265SDimitry Andric // We found two consecutive ins, which means that we didn't capture 206*81ad6265SDimitry Andric // the end of the previous execution. 207*81ad6265SDimitry Andric on_new_execution(ThreadContinuousExecution::CreateHintedEndExecution( 208*81ad6265SDimitry Andric cpu_id, prev.tid, prev.pid, prev.tsc, current_record.tsc - 1)); 209*81ad6265SDimitry Andric } else if (current_record.IsOut() && prev.IsOut()) { 210*81ad6265SDimitry Andric // We found two consecutive outs, that means that we didn't capture 211*81ad6265SDimitry Andric // the beginning of the current execution. 212*81ad6265SDimitry Andric on_new_execution(ThreadContinuousExecution::CreateHintedStartExecution( 213*81ad6265SDimitry Andric cpu_id, current_record.tid, current_record.pid, prev.tsc + 1, 214*81ad6265SDimitry Andric current_record.tsc)); 215*81ad6265SDimitry Andric } else if (current_record.IsOut() && prev.IsIn()) { 216*81ad6265SDimitry Andric if (current_record.pid == prev.pid && current_record.tid == prev.tid) { 217*81ad6265SDimitry Andric /// A complete execution 218*81ad6265SDimitry Andric on_new_execution(ThreadContinuousExecution::CreateCompleteExecution( 219*81ad6265SDimitry Andric cpu_id, current_record.tid, current_record.pid, prev.tsc, 220*81ad6265SDimitry Andric current_record.tsc)); 221*81ad6265SDimitry Andric } else { 222*81ad6265SDimitry Andric // An out after the in of a different thread. The first one doesn't 223*81ad6265SDimitry Andric // have an end, and the second one doesn't have a start. 224*81ad6265SDimitry Andric on_new_execution(ThreadContinuousExecution::CreateHintedEndExecution( 225*81ad6265SDimitry Andric cpu_id, prev.tid, prev.pid, prev.tsc, current_record.tsc - 1)); 226*81ad6265SDimitry Andric on_new_execution(ThreadContinuousExecution::CreateHintedStartExecution( 227*81ad6265SDimitry Andric cpu_id, current_record.tid, current_record.pid, prev.tsc + 1, 228*81ad6265SDimitry Andric current_record.tsc)); 229*81ad6265SDimitry Andric } 230*81ad6265SDimitry Andric } 231*81ad6265SDimitry Andric return Error::success(); 232*81ad6265SDimitry Andric } 233*81ad6265SDimitry Andric 234*81ad6265SDimitry Andric Expected<std::vector<ThreadContinuousExecution>> 235*81ad6265SDimitry Andric lldb_private::trace_intel_pt::DecodePerfContextSwitchTrace( 236*81ad6265SDimitry Andric ArrayRef<uint8_t> data, cpu_id_t cpu_id, 237*81ad6265SDimitry Andric const LinuxPerfZeroTscConversion &tsc_conversion) { 238*81ad6265SDimitry Andric 239*81ad6265SDimitry Andric std::vector<ThreadContinuousExecution> executions; 240*81ad6265SDimitry Andric 241*81ad6265SDimitry Andric // This offset is used to create the error message in case of failures. 242*81ad6265SDimitry Andric size_t offset = 0; 243*81ad6265SDimitry Andric 244*81ad6265SDimitry Andric auto do_decode = [&]() -> Error { 245*81ad6265SDimitry Andric Optional<ContextSwitchRecord> prev_record; 246*81ad6265SDimitry Andric while (offset < data.size()) { 247*81ad6265SDimitry Andric const perf_event_header &perf_record = 248*81ad6265SDimitry Andric *reinterpret_cast<const perf_event_header *>(data.data() + offset); 249*81ad6265SDimitry Andric if (Error err = perf_record.SanityCheck()) 250*81ad6265SDimitry Andric return err; 251*81ad6265SDimitry Andric 252*81ad6265SDimitry Andric if (perf_record.IsContextSwitchRecord()) { 253*81ad6265SDimitry Andric const PerfContextSwitchRecord &context_switch_record = 254*81ad6265SDimitry Andric *reinterpret_cast<const PerfContextSwitchRecord *>(data.data() + 255*81ad6265SDimitry Andric offset); 256*81ad6265SDimitry Andric ContextSwitchRecord record{ 257*81ad6265SDimitry Andric tsc_conversion.ToTSC(context_switch_record.time_in_nanos), 258*81ad6265SDimitry Andric context_switch_record.IsOut(), 259*81ad6265SDimitry Andric static_cast<lldb::pid_t>(context_switch_record.pid), 260*81ad6265SDimitry Andric static_cast<lldb::tid_t>(context_switch_record.tid)}; 261*81ad6265SDimitry Andric 262*81ad6265SDimitry Andric if (Error err = RecoverExecutionsFromConsecutiveRecords( 263*81ad6265SDimitry Andric cpu_id, tsc_conversion, record, prev_record, 264*81ad6265SDimitry Andric [&](const ThreadContinuousExecution &execution) { 265*81ad6265SDimitry Andric executions.push_back(execution); 266*81ad6265SDimitry Andric })) 267*81ad6265SDimitry Andric return err; 268*81ad6265SDimitry Andric 269*81ad6265SDimitry Andric prev_record = record; 270*81ad6265SDimitry Andric } 271*81ad6265SDimitry Andric offset += perf_record.size; 272*81ad6265SDimitry Andric } 273*81ad6265SDimitry Andric 274*81ad6265SDimitry Andric // We might have an incomplete last record 275*81ad6265SDimitry Andric if (prev_record && prev_record->IsIn()) 276*81ad6265SDimitry Andric executions.push_back(ThreadContinuousExecution::CreateOnlyStartExecution( 277*81ad6265SDimitry Andric cpu_id, prev_record->tid, prev_record->pid, prev_record->tsc)); 278*81ad6265SDimitry Andric return Error::success(); 279*81ad6265SDimitry Andric }; 280*81ad6265SDimitry Andric 281*81ad6265SDimitry Andric if (Error err = do_decode()) 282*81ad6265SDimitry Andric return createStringError(inconvertibleErrorCode(), 283*81ad6265SDimitry Andric formatv("Malformed perf context switch trace for " 284*81ad6265SDimitry Andric "cpu {0} at offset {1}. {2}", 285*81ad6265SDimitry Andric cpu_id, offset, toString(std::move(err)))); 286*81ad6265SDimitry Andric 287*81ad6265SDimitry Andric return executions; 288*81ad6265SDimitry Andric } 289