181ad6265SDimitry Andric //===-- PerfContextSwitchDecoder.cpp --======------------------------------===// 281ad6265SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 381ad6265SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 481ad6265SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 581ad6265SDimitry Andric // 681ad6265SDimitry Andric //===----------------------------------------------------------------------===// 781ad6265SDimitry Andric 881ad6265SDimitry Andric #include "PerfContextSwitchDecoder.h" 981ad6265SDimitry Andric 1081ad6265SDimitry Andric using namespace lldb; 1181ad6265SDimitry Andric using namespace lldb_private; 1281ad6265SDimitry Andric using namespace lldb_private::trace_intel_pt; 1381ad6265SDimitry Andric using namespace llvm; 1481ad6265SDimitry Andric 1581ad6265SDimitry Andric /// Copied from <linux/perf_event.h> to avoid depending on perf_event.h on 1681ad6265SDimitry Andric /// non-linux platforms. 1781ad6265SDimitry Andric /// \{ 1881ad6265SDimitry Andric #define PERF_RECORD_MISC_SWITCH_OUT (1 << 13) 19*753f127fSDimitry Andric 20*753f127fSDimitry Andric #define PERF_RECORD_LOST 2 21*753f127fSDimitry Andric #define PERF_RECORD_THROTTLE 5 22*753f127fSDimitry Andric #define PERF_RECORD_UNTHROTTLE 6 23*753f127fSDimitry Andric #define PERF_RECORD_LOST_SAMPLES 13 2481ad6265SDimitry Andric #define PERF_RECORD_SWITCH_CPU_WIDE 15 25*753f127fSDimitry Andric #define PERF_RECORD_MAX 19 2681ad6265SDimitry Andric 2781ad6265SDimitry Andric struct perf_event_header { 2881ad6265SDimitry Andric uint32_t type; 2981ad6265SDimitry Andric uint16_t misc; 3081ad6265SDimitry Andric uint16_t size; 3181ad6265SDimitry Andric 3281ad6265SDimitry Andric /// \return 3381ad6265SDimitry Andric /// An \a llvm::Error if the record looks obviously wrong, or \a 3481ad6265SDimitry Andric /// llvm::Error::success() otherwise. 3581ad6265SDimitry Andric Error SanityCheck() const { 3681ad6265SDimitry Andric // The following checks are based on visual inspection of the records and 3781ad6265SDimitry Andric // enums in 3881ad6265SDimitry Andric // https://elixir.bootlin.com/linux/v4.8/source/include/uapi/linux/perf_event.h 3981ad6265SDimitry Andric // See PERF_RECORD_MAX, PERF_RECORD_SWITCH and the data similar records 4081ad6265SDimitry Andric // hold. 4181ad6265SDimitry Andric 4281ad6265SDimitry Andric // A record of too many uint64_t's or more should mean that the data is 4381ad6265SDimitry Andric // wrong 4481ad6265SDimitry Andric const uint64_t max_valid_size_bytes = 8000; 4581ad6265SDimitry Andric if (size == 0 || size > max_valid_size_bytes) 4681ad6265SDimitry Andric return createStringError( 4781ad6265SDimitry Andric inconvertibleErrorCode(), 4881ad6265SDimitry Andric formatv("A record of {0} bytes was found.", size)); 4981ad6265SDimitry Andric 5081ad6265SDimitry Andric // We add some numbers to PERF_RECORD_MAX because some systems might have 5181ad6265SDimitry Andric // custom records. In any case, we are looking only for abnormal data. 5281ad6265SDimitry Andric if (type >= PERF_RECORD_MAX + 100) 5381ad6265SDimitry Andric return createStringError( 5481ad6265SDimitry Andric inconvertibleErrorCode(), 5581ad6265SDimitry Andric formatv("Invalid record type {0} was found.", type)); 5681ad6265SDimitry Andric return Error::success(); 5781ad6265SDimitry Andric } 5881ad6265SDimitry Andric 5981ad6265SDimitry Andric bool IsContextSwitchRecord() const { 6081ad6265SDimitry Andric return type == PERF_RECORD_SWITCH_CPU_WIDE; 6181ad6265SDimitry Andric } 62*753f127fSDimitry Andric 63*753f127fSDimitry Andric bool IsErrorRecord() const { 64*753f127fSDimitry Andric return type == PERF_RECORD_LOST || type == PERF_RECORD_THROTTLE || 65*753f127fSDimitry Andric type == PERF_RECORD_UNTHROTTLE || type == PERF_RECORD_LOST_SAMPLES; 66*753f127fSDimitry Andric } 6781ad6265SDimitry Andric }; 6881ad6265SDimitry Andric /// \} 6981ad6265SDimitry Andric 7081ad6265SDimitry Andric /// Record found in the perf_event context switch traces. It might contain 7181ad6265SDimitry Andric /// additional fields in memory, but header.size should have the actual size 7281ad6265SDimitry Andric /// of the record. 7381ad6265SDimitry Andric struct PerfContextSwitchRecord { 7481ad6265SDimitry Andric struct perf_event_header header; 7581ad6265SDimitry Andric uint32_t next_prev_pid; 7681ad6265SDimitry Andric uint32_t next_prev_tid; 7781ad6265SDimitry Andric uint32_t pid, tid; 7881ad6265SDimitry Andric uint64_t time_in_nanos; 7981ad6265SDimitry Andric 8081ad6265SDimitry Andric bool IsOut() const { return header.misc & PERF_RECORD_MISC_SWITCH_OUT; } 8181ad6265SDimitry Andric }; 8281ad6265SDimitry Andric 8381ad6265SDimitry Andric /// Record produced after parsing the raw context switch trace produce by 8481ad6265SDimitry Andric /// perf_event. A major difference between this struct and 8581ad6265SDimitry Andric /// PerfContextSwitchRecord is that this one uses tsc instead of nanos. 8681ad6265SDimitry Andric struct ContextSwitchRecord { 8781ad6265SDimitry Andric uint64_t tsc; 8881ad6265SDimitry Andric /// Whether the switch is in or out 8981ad6265SDimitry Andric bool is_out; 9081ad6265SDimitry Andric /// pid = 0 and tid = 0 indicate the swapper or idle process, which normally 9181ad6265SDimitry Andric /// runs after a context switch out of a normal user thread. 9281ad6265SDimitry Andric lldb::pid_t pid; 9381ad6265SDimitry Andric lldb::tid_t tid; 9481ad6265SDimitry Andric 9581ad6265SDimitry Andric bool IsOut() const { return is_out; } 9681ad6265SDimitry Andric 9781ad6265SDimitry Andric bool IsIn() const { return !is_out; } 9881ad6265SDimitry Andric }; 9981ad6265SDimitry Andric 10081ad6265SDimitry Andric uint64_t ThreadContinuousExecution::GetLowestKnownTSC() const { 10181ad6265SDimitry Andric switch (variant) { 10281ad6265SDimitry Andric case Variant::Complete: 10381ad6265SDimitry Andric return tscs.complete.start; 10481ad6265SDimitry Andric case Variant::OnlyStart: 10581ad6265SDimitry Andric return tscs.only_start.start; 10681ad6265SDimitry Andric case Variant::OnlyEnd: 10781ad6265SDimitry Andric return tscs.only_end.end; 10881ad6265SDimitry Andric case Variant::HintedEnd: 10981ad6265SDimitry Andric return tscs.hinted_end.start; 11081ad6265SDimitry Andric case Variant::HintedStart: 11181ad6265SDimitry Andric return tscs.hinted_start.end; 11281ad6265SDimitry Andric } 11381ad6265SDimitry Andric } 11481ad6265SDimitry Andric 11581ad6265SDimitry Andric uint64_t ThreadContinuousExecution::GetStartTSC() const { 11681ad6265SDimitry Andric switch (variant) { 11781ad6265SDimitry Andric case Variant::Complete: 11881ad6265SDimitry Andric return tscs.complete.start; 11981ad6265SDimitry Andric case Variant::OnlyStart: 12081ad6265SDimitry Andric return tscs.only_start.start; 12181ad6265SDimitry Andric case Variant::OnlyEnd: 12281ad6265SDimitry Andric return 0; 12381ad6265SDimitry Andric case Variant::HintedEnd: 12481ad6265SDimitry Andric return tscs.hinted_end.start; 12581ad6265SDimitry Andric case Variant::HintedStart: 12681ad6265SDimitry Andric return tscs.hinted_start.hinted_start; 12781ad6265SDimitry Andric } 12881ad6265SDimitry Andric } 12981ad6265SDimitry Andric 13081ad6265SDimitry Andric uint64_t ThreadContinuousExecution::GetEndTSC() const { 13181ad6265SDimitry Andric switch (variant) { 13281ad6265SDimitry Andric case Variant::Complete: 13381ad6265SDimitry Andric return tscs.complete.end; 13481ad6265SDimitry Andric case Variant::OnlyStart: 13581ad6265SDimitry Andric return std::numeric_limits<uint64_t>::max(); 13681ad6265SDimitry Andric case Variant::OnlyEnd: 13781ad6265SDimitry Andric return tscs.only_end.end; 13881ad6265SDimitry Andric case Variant::HintedEnd: 13981ad6265SDimitry Andric return tscs.hinted_end.hinted_end; 14081ad6265SDimitry Andric case Variant::HintedStart: 14181ad6265SDimitry Andric return tscs.hinted_start.end; 14281ad6265SDimitry Andric } 14381ad6265SDimitry Andric } 14481ad6265SDimitry Andric 14581ad6265SDimitry Andric ThreadContinuousExecution ThreadContinuousExecution::CreateCompleteExecution( 14681ad6265SDimitry Andric lldb::cpu_id_t cpu_id, lldb::tid_t tid, lldb::pid_t pid, uint64_t start, 14781ad6265SDimitry Andric uint64_t end) { 14881ad6265SDimitry Andric ThreadContinuousExecution o(cpu_id, tid, pid); 14981ad6265SDimitry Andric o.variant = Variant::Complete; 15081ad6265SDimitry Andric o.tscs.complete.start = start; 15181ad6265SDimitry Andric o.tscs.complete.end = end; 15281ad6265SDimitry Andric return o; 15381ad6265SDimitry Andric } 15481ad6265SDimitry Andric 15581ad6265SDimitry Andric ThreadContinuousExecution ThreadContinuousExecution::CreateHintedStartExecution( 15681ad6265SDimitry Andric lldb::cpu_id_t cpu_id, lldb::tid_t tid, lldb::pid_t pid, 15781ad6265SDimitry Andric uint64_t hinted_start, uint64_t end) { 15881ad6265SDimitry Andric ThreadContinuousExecution o(cpu_id, tid, pid); 15981ad6265SDimitry Andric o.variant = Variant::HintedStart; 16081ad6265SDimitry Andric o.tscs.hinted_start.hinted_start = hinted_start; 16181ad6265SDimitry Andric o.tscs.hinted_start.end = end; 16281ad6265SDimitry Andric return o; 16381ad6265SDimitry Andric } 16481ad6265SDimitry Andric 16581ad6265SDimitry Andric ThreadContinuousExecution ThreadContinuousExecution::CreateHintedEndExecution( 16681ad6265SDimitry Andric lldb::cpu_id_t cpu_id, lldb::tid_t tid, lldb::pid_t pid, uint64_t start, 16781ad6265SDimitry Andric uint64_t hinted_end) { 16881ad6265SDimitry Andric ThreadContinuousExecution o(cpu_id, tid, pid); 16981ad6265SDimitry Andric o.variant = Variant::HintedEnd; 17081ad6265SDimitry Andric o.tscs.hinted_end.start = start; 17181ad6265SDimitry Andric o.tscs.hinted_end.hinted_end = hinted_end; 17281ad6265SDimitry Andric return o; 17381ad6265SDimitry Andric } 17481ad6265SDimitry Andric 17581ad6265SDimitry Andric ThreadContinuousExecution ThreadContinuousExecution::CreateOnlyEndExecution( 17681ad6265SDimitry Andric lldb::cpu_id_t cpu_id, lldb::tid_t tid, lldb::pid_t pid, uint64_t end) { 17781ad6265SDimitry Andric ThreadContinuousExecution o(cpu_id, tid, pid); 17881ad6265SDimitry Andric o.variant = Variant::OnlyEnd; 17981ad6265SDimitry Andric o.tscs.only_end.end = end; 18081ad6265SDimitry Andric return o; 18181ad6265SDimitry Andric } 18281ad6265SDimitry Andric 18381ad6265SDimitry Andric ThreadContinuousExecution ThreadContinuousExecution::CreateOnlyStartExecution( 18481ad6265SDimitry Andric lldb::cpu_id_t cpu_id, lldb::tid_t tid, lldb::pid_t pid, uint64_t start) { 18581ad6265SDimitry Andric ThreadContinuousExecution o(cpu_id, tid, pid); 18681ad6265SDimitry Andric o.variant = Variant::OnlyStart; 18781ad6265SDimitry Andric o.tscs.only_start.start = start; 18881ad6265SDimitry Andric return o; 18981ad6265SDimitry Andric } 19081ad6265SDimitry Andric 19181ad6265SDimitry Andric static Error RecoverExecutionsFromConsecutiveRecords( 19281ad6265SDimitry Andric cpu_id_t cpu_id, const LinuxPerfZeroTscConversion &tsc_conversion, 19381ad6265SDimitry Andric const ContextSwitchRecord ¤t_record, 19481ad6265SDimitry Andric const Optional<ContextSwitchRecord> &prev_record, 19581ad6265SDimitry Andric std::function<void(const ThreadContinuousExecution &execution)> 19681ad6265SDimitry Andric on_new_execution) { 19781ad6265SDimitry Andric if (!prev_record) { 19881ad6265SDimitry Andric if (current_record.IsOut()) { 19981ad6265SDimitry Andric on_new_execution(ThreadContinuousExecution::CreateOnlyEndExecution( 20081ad6265SDimitry Andric cpu_id, current_record.tid, current_record.pid, current_record.tsc)); 20181ad6265SDimitry Andric } 20281ad6265SDimitry Andric // The 'in' case will be handled later when we try to look for its end 20381ad6265SDimitry Andric return Error::success(); 20481ad6265SDimitry Andric } 20581ad6265SDimitry Andric 20681ad6265SDimitry Andric const ContextSwitchRecord &prev = *prev_record; 20781ad6265SDimitry Andric if (prev.tsc >= current_record.tsc) 20881ad6265SDimitry Andric return createStringError( 20981ad6265SDimitry Andric inconvertibleErrorCode(), 21081ad6265SDimitry Andric formatv("A context switch record doesn't happen after the previous " 21181ad6265SDimitry Andric "record. Previous TSC= {0}, current TSC = {1}.", 21281ad6265SDimitry Andric prev.tsc, current_record.tsc)); 21381ad6265SDimitry Andric 21481ad6265SDimitry Andric if (current_record.IsIn() && prev.IsIn()) { 21581ad6265SDimitry Andric // We found two consecutive ins, which means that we didn't capture 21681ad6265SDimitry Andric // the end of the previous execution. 21781ad6265SDimitry Andric on_new_execution(ThreadContinuousExecution::CreateHintedEndExecution( 21881ad6265SDimitry Andric cpu_id, prev.tid, prev.pid, prev.tsc, current_record.tsc - 1)); 21981ad6265SDimitry Andric } else if (current_record.IsOut() && prev.IsOut()) { 22081ad6265SDimitry Andric // We found two consecutive outs, that means that we didn't capture 22181ad6265SDimitry Andric // the beginning of the current execution. 22281ad6265SDimitry Andric on_new_execution(ThreadContinuousExecution::CreateHintedStartExecution( 22381ad6265SDimitry Andric cpu_id, current_record.tid, current_record.pid, prev.tsc + 1, 22481ad6265SDimitry Andric current_record.tsc)); 22581ad6265SDimitry Andric } else if (current_record.IsOut() && prev.IsIn()) { 22681ad6265SDimitry Andric if (current_record.pid == prev.pid && current_record.tid == prev.tid) { 22781ad6265SDimitry Andric /// A complete execution 22881ad6265SDimitry Andric on_new_execution(ThreadContinuousExecution::CreateCompleteExecution( 22981ad6265SDimitry Andric cpu_id, current_record.tid, current_record.pid, prev.tsc, 23081ad6265SDimitry Andric current_record.tsc)); 23181ad6265SDimitry Andric } else { 23281ad6265SDimitry Andric // An out after the in of a different thread. The first one doesn't 23381ad6265SDimitry Andric // have an end, and the second one doesn't have a start. 23481ad6265SDimitry Andric on_new_execution(ThreadContinuousExecution::CreateHintedEndExecution( 23581ad6265SDimitry Andric cpu_id, prev.tid, prev.pid, prev.tsc, current_record.tsc - 1)); 23681ad6265SDimitry Andric on_new_execution(ThreadContinuousExecution::CreateHintedStartExecution( 23781ad6265SDimitry Andric cpu_id, current_record.tid, current_record.pid, prev.tsc + 1, 23881ad6265SDimitry Andric current_record.tsc)); 23981ad6265SDimitry Andric } 24081ad6265SDimitry Andric } 24181ad6265SDimitry Andric return Error::success(); 24281ad6265SDimitry Andric } 24381ad6265SDimitry Andric 24481ad6265SDimitry Andric Expected<std::vector<ThreadContinuousExecution>> 24581ad6265SDimitry Andric lldb_private::trace_intel_pt::DecodePerfContextSwitchTrace( 24681ad6265SDimitry Andric ArrayRef<uint8_t> data, cpu_id_t cpu_id, 24781ad6265SDimitry Andric const LinuxPerfZeroTscConversion &tsc_conversion) { 24881ad6265SDimitry Andric 24981ad6265SDimitry Andric std::vector<ThreadContinuousExecution> executions; 25081ad6265SDimitry Andric 25181ad6265SDimitry Andric // This offset is used to create the error message in case of failures. 25281ad6265SDimitry Andric size_t offset = 0; 25381ad6265SDimitry Andric 25481ad6265SDimitry Andric auto do_decode = [&]() -> Error { 25581ad6265SDimitry Andric Optional<ContextSwitchRecord> prev_record; 25681ad6265SDimitry Andric while (offset < data.size()) { 25781ad6265SDimitry Andric const perf_event_header &perf_record = 25881ad6265SDimitry Andric *reinterpret_cast<const perf_event_header *>(data.data() + offset); 25981ad6265SDimitry Andric if (Error err = perf_record.SanityCheck()) 26081ad6265SDimitry Andric return err; 26181ad6265SDimitry Andric 26281ad6265SDimitry Andric if (perf_record.IsContextSwitchRecord()) { 26381ad6265SDimitry Andric const PerfContextSwitchRecord &context_switch_record = 26481ad6265SDimitry Andric *reinterpret_cast<const PerfContextSwitchRecord *>(data.data() + 26581ad6265SDimitry Andric offset); 26681ad6265SDimitry Andric ContextSwitchRecord record{ 26781ad6265SDimitry Andric tsc_conversion.ToTSC(context_switch_record.time_in_nanos), 26881ad6265SDimitry Andric context_switch_record.IsOut(), 26981ad6265SDimitry Andric static_cast<lldb::pid_t>(context_switch_record.pid), 27081ad6265SDimitry Andric static_cast<lldb::tid_t>(context_switch_record.tid)}; 27181ad6265SDimitry Andric 27281ad6265SDimitry Andric if (Error err = RecoverExecutionsFromConsecutiveRecords( 27381ad6265SDimitry Andric cpu_id, tsc_conversion, record, prev_record, 27481ad6265SDimitry Andric [&](const ThreadContinuousExecution &execution) { 27581ad6265SDimitry Andric executions.push_back(execution); 27681ad6265SDimitry Andric })) 27781ad6265SDimitry Andric return err; 27881ad6265SDimitry Andric 27981ad6265SDimitry Andric prev_record = record; 28081ad6265SDimitry Andric } 28181ad6265SDimitry Andric offset += perf_record.size; 28281ad6265SDimitry Andric } 28381ad6265SDimitry Andric 28481ad6265SDimitry Andric // We might have an incomplete last record 28581ad6265SDimitry Andric if (prev_record && prev_record->IsIn()) 28681ad6265SDimitry Andric executions.push_back(ThreadContinuousExecution::CreateOnlyStartExecution( 28781ad6265SDimitry Andric cpu_id, prev_record->tid, prev_record->pid, prev_record->tsc)); 28881ad6265SDimitry Andric return Error::success(); 28981ad6265SDimitry Andric }; 29081ad6265SDimitry Andric 29181ad6265SDimitry Andric if (Error err = do_decode()) 29281ad6265SDimitry Andric return createStringError(inconvertibleErrorCode(), 29381ad6265SDimitry Andric formatv("Malformed perf context switch trace for " 29481ad6265SDimitry Andric "cpu {0} at offset {1}. {2}", 29581ad6265SDimitry Andric cpu_id, offset, toString(std::move(err)))); 29681ad6265SDimitry Andric 29781ad6265SDimitry Andric return executions; 29881ad6265SDimitry Andric } 299*753f127fSDimitry Andric 300*753f127fSDimitry Andric Expected<std::vector<uint8_t>> 301*753f127fSDimitry Andric lldb_private::trace_intel_pt::FilterProcessesFromContextSwitchTrace( 302*753f127fSDimitry Andric llvm::ArrayRef<uint8_t> data, const std::set<lldb::pid_t> &pids) { 303*753f127fSDimitry Andric size_t offset = 0; 304*753f127fSDimitry Andric std::vector<uint8_t> out_data; 305*753f127fSDimitry Andric 306*753f127fSDimitry Andric while (offset < data.size()) { 307*753f127fSDimitry Andric const perf_event_header &perf_record = 308*753f127fSDimitry Andric *reinterpret_cast<const perf_event_header *>(data.data() + offset); 309*753f127fSDimitry Andric if (Error err = perf_record.SanityCheck()) 310*753f127fSDimitry Andric return std::move(err); 311*753f127fSDimitry Andric bool should_copy = false; 312*753f127fSDimitry Andric if (perf_record.IsContextSwitchRecord()) { 313*753f127fSDimitry Andric const PerfContextSwitchRecord &context_switch_record = 314*753f127fSDimitry Andric *reinterpret_cast<const PerfContextSwitchRecord *>(data.data() + 315*753f127fSDimitry Andric offset); 316*753f127fSDimitry Andric if (pids.count(context_switch_record.pid)) 317*753f127fSDimitry Andric should_copy = true; 318*753f127fSDimitry Andric } else if (perf_record.IsErrorRecord()) { 319*753f127fSDimitry Andric should_copy = true; 320*753f127fSDimitry Andric } 321*753f127fSDimitry Andric 322*753f127fSDimitry Andric if (should_copy) { 323*753f127fSDimitry Andric for (size_t i = 0; i < perf_record.size; i++) { 324*753f127fSDimitry Andric out_data.push_back(data[offset + i]); 325*753f127fSDimitry Andric } 326*753f127fSDimitry Andric } 327*753f127fSDimitry Andric 328*753f127fSDimitry Andric offset += perf_record.size; 329*753f127fSDimitry Andric } 330*753f127fSDimitry Andric return out_data; 331*753f127fSDimitry Andric } 332