1*f6aab3d8Srobert //===-- TraceIntelPTMultiCpuDecoder.cpp -----------------------------------===//
2*f6aab3d8Srobert //
3*f6aab3d8Srobert // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*f6aab3d8Srobert // See https://llvm.org/LICENSE.txt for license information.
5*f6aab3d8Srobert // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*f6aab3d8Srobert //
7*f6aab3d8Srobert //===----------------------------------------------------------------------===//
8*f6aab3d8Srobert
9*f6aab3d8Srobert #include "TraceIntelPTMultiCpuDecoder.h"
10*f6aab3d8Srobert #include "TraceIntelPT.h"
11*f6aab3d8Srobert #include "llvm/Support/Error.h"
12*f6aab3d8Srobert #include <optional>
13*f6aab3d8Srobert
14*f6aab3d8Srobert using namespace lldb;
15*f6aab3d8Srobert using namespace lldb_private;
16*f6aab3d8Srobert using namespace lldb_private::trace_intel_pt;
17*f6aab3d8Srobert using namespace llvm;
18*f6aab3d8Srobert
TraceIntelPTMultiCpuDecoder(TraceIntelPTSP trace_sp)19*f6aab3d8Srobert TraceIntelPTMultiCpuDecoder::TraceIntelPTMultiCpuDecoder(
20*f6aab3d8Srobert TraceIntelPTSP trace_sp)
21*f6aab3d8Srobert : m_trace_wp(trace_sp) {
22*f6aab3d8Srobert for (Process *proc : trace_sp->GetAllProcesses()) {
23*f6aab3d8Srobert for (ThreadSP thread_sp : proc->GetThreadList().Threads()) {
24*f6aab3d8Srobert m_tids.insert(thread_sp->GetID());
25*f6aab3d8Srobert }
26*f6aab3d8Srobert }
27*f6aab3d8Srobert }
28*f6aab3d8Srobert
GetTrace()29*f6aab3d8Srobert TraceIntelPTSP TraceIntelPTMultiCpuDecoder::GetTrace() {
30*f6aab3d8Srobert return m_trace_wp.lock();
31*f6aab3d8Srobert }
32*f6aab3d8Srobert
TracesThread(lldb::tid_t tid) const33*f6aab3d8Srobert bool TraceIntelPTMultiCpuDecoder::TracesThread(lldb::tid_t tid) const {
34*f6aab3d8Srobert return m_tids.count(tid);
35*f6aab3d8Srobert }
36*f6aab3d8Srobert
FindLowestTSC()37*f6aab3d8Srobert Expected<std::optional<uint64_t>> TraceIntelPTMultiCpuDecoder::FindLowestTSC() {
38*f6aab3d8Srobert std::optional<uint64_t> lowest_tsc;
39*f6aab3d8Srobert TraceIntelPTSP trace_sp = GetTrace();
40*f6aab3d8Srobert
41*f6aab3d8Srobert Error err = GetTrace()->OnAllCpusBinaryDataRead(
42*f6aab3d8Srobert IntelPTDataKinds::kIptTrace,
43*f6aab3d8Srobert [&](const DenseMap<cpu_id_t, ArrayRef<uint8_t>> &buffers) -> Error {
44*f6aab3d8Srobert for (auto &cpu_id_to_buffer : buffers) {
45*f6aab3d8Srobert Expected<std::optional<uint64_t>> tsc =
46*f6aab3d8Srobert FindLowestTSCInTrace(*trace_sp, cpu_id_to_buffer.second);
47*f6aab3d8Srobert if (!tsc)
48*f6aab3d8Srobert return tsc.takeError();
49*f6aab3d8Srobert if (*tsc && (!lowest_tsc || *lowest_tsc > **tsc))
50*f6aab3d8Srobert lowest_tsc = **tsc;
51*f6aab3d8Srobert }
52*f6aab3d8Srobert return Error::success();
53*f6aab3d8Srobert });
54*f6aab3d8Srobert if (err)
55*f6aab3d8Srobert return std::move(err);
56*f6aab3d8Srobert return lowest_tsc;
57*f6aab3d8Srobert }
58*f6aab3d8Srobert
Decode(Thread & thread)59*f6aab3d8Srobert Expected<DecodedThreadSP> TraceIntelPTMultiCpuDecoder::Decode(Thread &thread) {
60*f6aab3d8Srobert if (Error err = CorrelateContextSwitchesAndIntelPtTraces())
61*f6aab3d8Srobert return std::move(err);
62*f6aab3d8Srobert
63*f6aab3d8Srobert TraceIntelPTSP trace_sp = GetTrace();
64*f6aab3d8Srobert
65*f6aab3d8Srobert return trace_sp->GetThreadTimer(thread.GetID())
66*f6aab3d8Srobert .TimeTask("Decoding instructions", [&]() -> Expected<DecodedThreadSP> {
67*f6aab3d8Srobert auto it = m_decoded_threads.find(thread.GetID());
68*f6aab3d8Srobert if (it != m_decoded_threads.end())
69*f6aab3d8Srobert return it->second;
70*f6aab3d8Srobert
71*f6aab3d8Srobert DecodedThreadSP decoded_thread_sp = std::make_shared<DecodedThread>(
72*f6aab3d8Srobert thread.shared_from_this(), trace_sp->GetPerfZeroTscConversion());
73*f6aab3d8Srobert
74*f6aab3d8Srobert Error err = trace_sp->OnAllCpusBinaryDataRead(
75*f6aab3d8Srobert IntelPTDataKinds::kIptTrace,
76*f6aab3d8Srobert [&](const DenseMap<cpu_id_t, ArrayRef<uint8_t>> &buffers) -> Error {
77*f6aab3d8Srobert auto it =
78*f6aab3d8Srobert m_continuous_executions_per_thread->find(thread.GetID());
79*f6aab3d8Srobert if (it != m_continuous_executions_per_thread->end())
80*f6aab3d8Srobert return DecodeSystemWideTraceForThread(
81*f6aab3d8Srobert *decoded_thread_sp, *trace_sp, buffers, it->second);
82*f6aab3d8Srobert
83*f6aab3d8Srobert return Error::success();
84*f6aab3d8Srobert });
85*f6aab3d8Srobert if (err)
86*f6aab3d8Srobert return std::move(err);
87*f6aab3d8Srobert
88*f6aab3d8Srobert m_decoded_threads.try_emplace(thread.GetID(), decoded_thread_sp);
89*f6aab3d8Srobert return decoded_thread_sp;
90*f6aab3d8Srobert });
91*f6aab3d8Srobert }
92*f6aab3d8Srobert
GetPSBBlocksForCPU(TraceIntelPT & trace,cpu_id_t cpu_id)93*f6aab3d8Srobert static Expected<std::vector<PSBBlock>> GetPSBBlocksForCPU(TraceIntelPT &trace,
94*f6aab3d8Srobert cpu_id_t cpu_id) {
95*f6aab3d8Srobert std::vector<PSBBlock> psb_blocks;
96*f6aab3d8Srobert Error err = trace.OnCpuBinaryDataRead(
97*f6aab3d8Srobert cpu_id, IntelPTDataKinds::kIptTrace,
98*f6aab3d8Srobert [&](ArrayRef<uint8_t> data) -> Error {
99*f6aab3d8Srobert Expected<std::vector<PSBBlock>> split_trace =
100*f6aab3d8Srobert SplitTraceIntoPSBBlock(trace, data, /*expect_tscs=*/true);
101*f6aab3d8Srobert if (!split_trace)
102*f6aab3d8Srobert return split_trace.takeError();
103*f6aab3d8Srobert
104*f6aab3d8Srobert psb_blocks = std::move(*split_trace);
105*f6aab3d8Srobert return Error::success();
106*f6aab3d8Srobert });
107*f6aab3d8Srobert if (err)
108*f6aab3d8Srobert return std::move(err);
109*f6aab3d8Srobert return psb_blocks;
110*f6aab3d8Srobert }
111*f6aab3d8Srobert
112*f6aab3d8Srobert Expected<DenseMap<lldb::tid_t, std::vector<IntelPTThreadContinousExecution>>>
DoCorrelateContextSwitchesAndIntelPtTraces()113*f6aab3d8Srobert TraceIntelPTMultiCpuDecoder::DoCorrelateContextSwitchesAndIntelPtTraces() {
114*f6aab3d8Srobert DenseMap<lldb::tid_t, std::vector<IntelPTThreadContinousExecution>>
115*f6aab3d8Srobert continuous_executions_per_thread;
116*f6aab3d8Srobert TraceIntelPTSP trace_sp = GetTrace();
117*f6aab3d8Srobert
118*f6aab3d8Srobert std::optional<LinuxPerfZeroTscConversion> conv_opt =
119*f6aab3d8Srobert trace_sp->GetPerfZeroTscConversion();
120*f6aab3d8Srobert if (!conv_opt)
121*f6aab3d8Srobert return createStringError(
122*f6aab3d8Srobert inconvertibleErrorCode(),
123*f6aab3d8Srobert "TSC to nanoseconds conversion values were not found");
124*f6aab3d8Srobert
125*f6aab3d8Srobert LinuxPerfZeroTscConversion tsc_conversion = *conv_opt;
126*f6aab3d8Srobert
127*f6aab3d8Srobert for (cpu_id_t cpu_id : trace_sp->GetTracedCpus()) {
128*f6aab3d8Srobert Expected<std::vector<PSBBlock>> psb_blocks =
129*f6aab3d8Srobert GetPSBBlocksForCPU(*trace_sp, cpu_id);
130*f6aab3d8Srobert if (!psb_blocks)
131*f6aab3d8Srobert return psb_blocks.takeError();
132*f6aab3d8Srobert
133*f6aab3d8Srobert m_total_psb_blocks += psb_blocks->size();
134*f6aab3d8Srobert // We'll be iterating through the thread continuous executions and the intel
135*f6aab3d8Srobert // pt subtraces sorted by time.
136*f6aab3d8Srobert auto it = psb_blocks->begin();
137*f6aab3d8Srobert auto on_new_thread_execution =
138*f6aab3d8Srobert [&](const ThreadContinuousExecution &thread_execution) {
139*f6aab3d8Srobert IntelPTThreadContinousExecution execution(thread_execution);
140*f6aab3d8Srobert
141*f6aab3d8Srobert for (; it != psb_blocks->end() &&
142*f6aab3d8Srobert *it->tsc < thread_execution.GetEndTSC();
143*f6aab3d8Srobert it++) {
144*f6aab3d8Srobert if (*it->tsc > thread_execution.GetStartTSC()) {
145*f6aab3d8Srobert execution.psb_blocks.push_back(*it);
146*f6aab3d8Srobert } else {
147*f6aab3d8Srobert m_unattributed_psb_blocks++;
148*f6aab3d8Srobert }
149*f6aab3d8Srobert }
150*f6aab3d8Srobert continuous_executions_per_thread[thread_execution.tid].push_back(
151*f6aab3d8Srobert execution);
152*f6aab3d8Srobert };
153*f6aab3d8Srobert Error err = trace_sp->OnCpuBinaryDataRead(
154*f6aab3d8Srobert cpu_id, IntelPTDataKinds::kPerfContextSwitchTrace,
155*f6aab3d8Srobert [&](ArrayRef<uint8_t> data) -> Error {
156*f6aab3d8Srobert Expected<std::vector<ThreadContinuousExecution>> executions =
157*f6aab3d8Srobert DecodePerfContextSwitchTrace(data, cpu_id, tsc_conversion);
158*f6aab3d8Srobert if (!executions)
159*f6aab3d8Srobert return executions.takeError();
160*f6aab3d8Srobert for (const ThreadContinuousExecution &exec : *executions)
161*f6aab3d8Srobert on_new_thread_execution(exec);
162*f6aab3d8Srobert return Error::success();
163*f6aab3d8Srobert });
164*f6aab3d8Srobert if (err)
165*f6aab3d8Srobert return std::move(err);
166*f6aab3d8Srobert
167*f6aab3d8Srobert m_unattributed_psb_blocks += psb_blocks->end() - it;
168*f6aab3d8Srobert }
169*f6aab3d8Srobert // We now sort the executions of each thread to have them ready for
170*f6aab3d8Srobert // instruction decoding
171*f6aab3d8Srobert for (auto &tid_executions : continuous_executions_per_thread)
172*f6aab3d8Srobert std::sort(tid_executions.second.begin(), tid_executions.second.end());
173*f6aab3d8Srobert
174*f6aab3d8Srobert return continuous_executions_per_thread;
175*f6aab3d8Srobert }
176*f6aab3d8Srobert
CorrelateContextSwitchesAndIntelPtTraces()177*f6aab3d8Srobert Error TraceIntelPTMultiCpuDecoder::CorrelateContextSwitchesAndIntelPtTraces() {
178*f6aab3d8Srobert if (m_setup_error)
179*f6aab3d8Srobert return createStringError(inconvertibleErrorCode(), m_setup_error->c_str());
180*f6aab3d8Srobert
181*f6aab3d8Srobert if (m_continuous_executions_per_thread)
182*f6aab3d8Srobert return Error::success();
183*f6aab3d8Srobert
184*f6aab3d8Srobert Error err = GetTrace()->GetGlobalTimer().TimeTask(
185*f6aab3d8Srobert "Context switch and Intel PT traces correlation", [&]() -> Error {
186*f6aab3d8Srobert if (auto correlation = DoCorrelateContextSwitchesAndIntelPtTraces()) {
187*f6aab3d8Srobert m_continuous_executions_per_thread.emplace(std::move(*correlation));
188*f6aab3d8Srobert return Error::success();
189*f6aab3d8Srobert } else {
190*f6aab3d8Srobert return correlation.takeError();
191*f6aab3d8Srobert }
192*f6aab3d8Srobert });
193*f6aab3d8Srobert if (err) {
194*f6aab3d8Srobert m_setup_error = toString(std::move(err));
195*f6aab3d8Srobert return createStringError(inconvertibleErrorCode(), m_setup_error->c_str());
196*f6aab3d8Srobert }
197*f6aab3d8Srobert return Error::success();
198*f6aab3d8Srobert }
199*f6aab3d8Srobert
GetNumContinuousExecutionsForThread(lldb::tid_t tid) const200*f6aab3d8Srobert size_t TraceIntelPTMultiCpuDecoder::GetNumContinuousExecutionsForThread(
201*f6aab3d8Srobert lldb::tid_t tid) const {
202*f6aab3d8Srobert if (!m_continuous_executions_per_thread)
203*f6aab3d8Srobert return 0;
204*f6aab3d8Srobert auto it = m_continuous_executions_per_thread->find(tid);
205*f6aab3d8Srobert if (it == m_continuous_executions_per_thread->end())
206*f6aab3d8Srobert return 0;
207*f6aab3d8Srobert return it->second.size();
208*f6aab3d8Srobert }
209*f6aab3d8Srobert
GetTotalContinuousExecutionsCount() const210*f6aab3d8Srobert size_t TraceIntelPTMultiCpuDecoder::GetTotalContinuousExecutionsCount() const {
211*f6aab3d8Srobert if (!m_continuous_executions_per_thread)
212*f6aab3d8Srobert return 0;
213*f6aab3d8Srobert size_t count = 0;
214*f6aab3d8Srobert for (const auto &kv : *m_continuous_executions_per_thread)
215*f6aab3d8Srobert count += kv.second.size();
216*f6aab3d8Srobert return count;
217*f6aab3d8Srobert }
218*f6aab3d8Srobert
219*f6aab3d8Srobert size_t
GePSBBlocksCountForThread(lldb::tid_t tid) const220*f6aab3d8Srobert TraceIntelPTMultiCpuDecoder::GePSBBlocksCountForThread(lldb::tid_t tid) const {
221*f6aab3d8Srobert if (!m_continuous_executions_per_thread)
222*f6aab3d8Srobert return 0;
223*f6aab3d8Srobert size_t count = 0;
224*f6aab3d8Srobert auto it = m_continuous_executions_per_thread->find(tid);
225*f6aab3d8Srobert if (it == m_continuous_executions_per_thread->end())
226*f6aab3d8Srobert return 0;
227*f6aab3d8Srobert for (const IntelPTThreadContinousExecution &execution : it->second)
228*f6aab3d8Srobert count += execution.psb_blocks.size();
229*f6aab3d8Srobert return count;
230*f6aab3d8Srobert }
231*f6aab3d8Srobert
GetUnattributedPSBBlocksCount() const232*f6aab3d8Srobert size_t TraceIntelPTMultiCpuDecoder::GetUnattributedPSBBlocksCount() const {
233*f6aab3d8Srobert return m_unattributed_psb_blocks;
234*f6aab3d8Srobert }
235*f6aab3d8Srobert
GetTotalPSBBlocksCount() const236*f6aab3d8Srobert size_t TraceIntelPTMultiCpuDecoder::GetTotalPSBBlocksCount() const {
237*f6aab3d8Srobert return m_total_psb_blocks;
238*f6aab3d8Srobert }
239