xref: /llvm-project/lldb/source/Plugins/Trace/intel-pt/DecodedThread.cpp (revision c4fb631ceeeff2a292cc9cf5232b491afe09744d)
1 //===-- DecodedThread.cpp -------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "DecodedThread.h"
10 #include "TraceCursorIntelPT.h"
11 #include <intel-pt.h>
12 #include <memory>
13 
14 using namespace lldb;
15 using namespace lldb_private;
16 using namespace lldb_private::trace_intel_pt;
17 using namespace llvm;
18 
19 bool lldb_private::trace_intel_pt::IsLibiptError(int libipt_status) {
20   return libipt_status < 0;
21 }
22 
23 bool lldb_private::trace_intel_pt::IsEndOfStream(int libipt_status) {
24   return libipt_status == -pte_eos;
25 }
26 
27 bool lldb_private::trace_intel_pt::IsTscUnavailable(int libipt_status) {
28   return libipt_status == -pte_no_time;
29 }
30 
31 char IntelPTError::ID;
32 
33 IntelPTError::IntelPTError(int libipt_error_code, lldb::addr_t address)
34     : m_libipt_error_code(libipt_error_code), m_address(address) {
35   assert(libipt_error_code < 0);
36 }
37 
38 void IntelPTError::log(llvm::raw_ostream &OS) const {
39   OS << pt_errstr(pt_errcode(m_libipt_error_code));
40   if (m_address != LLDB_INVALID_ADDRESS && m_address > 0)
41     OS << formatv(": {0:x+16}", m_address);
42 }
43 
44 bool DecodedThread::TSCRange::InRange(uint64_t item_index) const {
45   return item_index >= first_item_index &&
46          item_index < first_item_index + items_count;
47 }
48 
49 bool DecodedThread::NanosecondsRange::InRange(uint64_t item_index) const {
50   return item_index >= first_item_index &&
51          item_index < first_item_index + items_count;
52 }
53 
54 double DecodedThread::NanosecondsRange::GetInterpolatedTime(
55     uint64_t item_index, uint64_t begin_of_time_nanos,
56     const LinuxPerfZeroTscConversion &tsc_conversion) const {
57   uint64_t items_since_last_tsc = item_index - first_item_index;
58 
59   auto interpolate = [&](uint64_t next_range_start_ns) {
60     if (next_range_start_ns == nanos) {
61       // If the resolution of the conversion formula is bad enough to consider
62       // these two timestamps as equal, then we just increase the next one by 1
63       // for correction
64       next_range_start_ns++;
65     }
66     long double item_duration =
67         static_cast<long double>(items_count) / (next_range_start_ns - nanos);
68     return (nanos - begin_of_time_nanos) + items_since_last_tsc * item_duration;
69   };
70 
71   if (!next_range) {
72     // If this is the last TSC range, so we have to extrapolate. In this case,
73     // we assume that each instruction took one TSC, which is what an
74     // instruction would take if no parallelism is achieved and the frequency
75     // multiplier is 1.
76     return interpolate(tsc_conversion.ToNanos(tsc + items_count));
77   }
78   if (items_count < (next_range->tsc - tsc)) {
79     // If the numbers of items in this range is less than the total TSC duration
80     // of this range, i.e. each instruction taking longer than 1 TSC, then we
81     // can assume that something else happened between these TSCs (e.g. a
82     // context switch, change to kernel, decoding errors, etc). In this case, we
83     // also assume that each instruction took 1 TSC. A proper way to improve
84     // this would be to analize the next events in the trace looking for context
85     // switches or trace disablement events, but for now, as we only want an
86     // approximation, we keep it simple. We are also guaranteed that the time in
87     // nanos of the next range is different to the current one, just because of
88     // the definition of a NanosecondsRange.
89     return interpolate(
90         std::min(tsc_conversion.ToNanos(tsc + items_count), next_range->nanos));
91   }
92 
93   // In this case, each item took less than 1 TSC, so some parallelism was
94   // achieved, which is an indication that we didn't suffered of any kind of
95   // interruption.
96   return interpolate(next_range->nanos);
97 }
98 
99 uint64_t DecodedThread::GetItemsCount() const { return m_item_kinds.size(); }
100 
101 lldb::addr_t
102 DecodedThread::GetInstructionLoadAddress(uint64_t item_index) const {
103   return m_item_data[item_index].load_address;
104 }
105 
106 ThreadSP DecodedThread::GetThread() { return m_thread_sp; }
107 
108 DecodedThread::TraceItemStorage &
109 DecodedThread::CreateNewTraceItem(lldb::TraceItemKind kind) {
110   m_item_kinds.push_back(kind);
111   m_item_data.emplace_back();
112   if (m_last_tsc)
113     (*m_last_tsc)->second.items_count++;
114   if (m_last_nanoseconds)
115     (*m_last_nanoseconds)->second.items_count++;
116   return m_item_data.back();
117 }
118 
119 void DecodedThread::NotifyTsc(TSC tsc) {
120   if (m_last_tsc && (*m_last_tsc)->second.tsc == tsc)
121     return;
122 
123   m_last_tsc =
124       m_tscs.emplace(GetItemsCount(), TSCRange{tsc, 0, GetItemsCount()}).first;
125 
126   if (m_tsc_conversion) {
127     uint64_t nanos = m_tsc_conversion->ToNanos(tsc);
128     if (!m_last_nanoseconds || (*m_last_nanoseconds)->second.nanos != nanos) {
129       m_last_nanoseconds =
130           m_nanoseconds
131               .emplace(GetItemsCount(), NanosecondsRange{nanos, tsc, nullptr, 0,
132                                                          GetItemsCount()})
133               .first;
134       if (*m_last_nanoseconds != m_nanoseconds.begin()) {
135         auto prev_range = prev(*m_last_nanoseconds);
136         prev_range->second.next_range = &(*m_last_nanoseconds)->second;
137       }
138     }
139   }
140   AppendEvent(lldb::eTraceEventHWClockTick);
141 }
142 
143 void DecodedThread::NotifyCPU(lldb::cpu_id_t cpu_id) {
144   if (!m_last_cpu || *m_last_cpu != cpu_id) {
145     m_cpus.emplace(GetItemsCount(), cpu_id);
146     m_last_cpu = cpu_id;
147     AppendEvent(lldb::eTraceEventCPUChanged);
148   }
149 }
150 
151 lldb::cpu_id_t DecodedThread::GetCPUByIndex(uint64_t item_index) const {
152   auto it = m_cpus.upper_bound(item_index);
153   return it == m_cpus.begin() ? LLDB_INVALID_CPU_ID : prev(it)->second;
154 }
155 
156 Optional<DecodedThread::TSCRange>
157 DecodedThread::GetTSCRangeByIndex(uint64_t item_index) const {
158   auto next_it = m_tscs.upper_bound(item_index);
159   if (next_it == m_tscs.begin())
160     return None;
161   return prev(next_it)->second;
162 }
163 
164 Optional<DecodedThread::NanosecondsRange>
165 DecodedThread::GetNanosecondsRangeByIndex(uint64_t item_index) {
166   auto next_it = m_nanoseconds.upper_bound(item_index);
167   if (next_it == m_nanoseconds.begin())
168     return None;
169   return prev(next_it)->second;
170 }
171 
172 void DecodedThread::AppendEvent(lldb::TraceEvent event) {
173   CreateNewTraceItem(lldb::eTraceItemKindEvent).event = event;
174   m_events_stats.RecordEvent(event);
175 }
176 
177 void DecodedThread::AppendInstruction(const pt_insn &insn) {
178   CreateNewTraceItem(lldb::eTraceItemKindInstruction).load_address = insn.ip;
179 }
180 
181 void DecodedThread::AppendError(const IntelPTError &error) {
182   // End of stream shouldn't be a public error
183   if (IsEndOfStream(error.GetLibiptErrorCode()))
184     return;
185   CreateNewTraceItem(lldb::eTraceItemKindError).error =
186       ConstString(error.message()).AsCString();
187 }
188 
189 void DecodedThread::AppendCustomError(StringRef err) {
190   CreateNewTraceItem(lldb::eTraceItemKindError).error =
191       ConstString(err).AsCString();
192 }
193 
194 lldb::TraceEvent DecodedThread::GetEventByIndex(int item_index) const {
195   return m_item_data[item_index].event;
196 }
197 
198 void DecodedThread::LibiptErrorsStats::RecordError(int libipt_error_code) {
199   libipt_errors_counts[pt_errstr(pt_errcode(libipt_error_code))]++;
200   total_count++;
201 }
202 
203 void DecodedThread::RecordTscError(int libipt_error_code) {
204   m_tsc_errors_stats.RecordError(libipt_error_code);
205 }
206 
207 const DecodedThread::LibiptErrorsStats &
208 DecodedThread::GetTscErrorsStats() const {
209   return m_tsc_errors_stats;
210 }
211 
212 const DecodedThread::EventsStats &DecodedThread::GetEventsStats() const {
213   return m_events_stats;
214 }
215 
216 void DecodedThread::EventsStats::RecordEvent(lldb::TraceEvent event) {
217   events_counts[event]++;
218   total_count++;
219 }
220 
221 lldb::TraceItemKind
222 DecodedThread::GetItemKindByIndex(uint64_t item_index) const {
223   return static_cast<lldb::TraceItemKind>(m_item_kinds[item_index]);
224 }
225 
226 const char *DecodedThread::GetErrorByIndex(uint64_t item_index) const {
227   return m_item_data[item_index].error;
228 }
229 
230 DecodedThread::DecodedThread(
231     ThreadSP thread_sp,
232     const llvm::Optional<LinuxPerfZeroTscConversion> &tsc_conversion)
233     : m_thread_sp(thread_sp), m_tsc_conversion(tsc_conversion) {}
234 
235 size_t DecodedThread::CalculateApproximateMemoryUsage() const {
236   return sizeof(TraceItemStorage) * m_item_data.size() +
237          sizeof(uint8_t) * m_item_kinds.size() +
238          (sizeof(uint64_t) + sizeof(TSC)) * m_tscs.size() +
239          (sizeof(uint64_t) + sizeof(uint64_t)) * m_nanoseconds.size() +
240          (sizeof(uint64_t) + sizeof(lldb::cpu_id_t)) * m_cpus.size();
241 }
242