xref: /llvm-project/llvm/lib/Support/TimeProfiler.cpp (revision abaf13ad589d72b356a8788a5095f869d9d038d0)
1 //===-- TimeProfiler.cpp - Hierarchical Time Profiler ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements hierarchical time profiler.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/Support/TimeProfiler.h"
14 #include "llvm/ADT/STLExtras.h"
15 #include "llvm/ADT/STLFunctionalExtras.h"
16 #include "llvm/ADT/StringMap.h"
17 #include "llvm/Support/JSON.h"
18 #include "llvm/Support/Path.h"
19 #include "llvm/Support/Process.h"
20 #include "llvm/Support/Threading.h"
21 #include <algorithm>
22 #include <cassert>
23 #include <chrono>
24 #include <memory>
25 #include <mutex>
26 #include <string>
27 #include <vector>
28 
29 using namespace llvm;
30 
31 namespace {
32 
33 using std::chrono::duration;
34 using std::chrono::duration_cast;
35 using std::chrono::microseconds;
36 using std::chrono::steady_clock;
37 using std::chrono::system_clock;
38 using std::chrono::time_point;
39 using std::chrono::time_point_cast;
40 
41 struct TimeTraceProfilerInstances {
42   std::mutex Lock;
43   std::vector<TimeTraceProfiler *> List;
44 };
45 
46 TimeTraceProfilerInstances &getTimeTraceProfilerInstances() {
47   static TimeTraceProfilerInstances Instances;
48   return Instances;
49 }
50 
51 } // anonymous namespace
52 
53 // Per Thread instance
54 static LLVM_THREAD_LOCAL TimeTraceProfiler *TimeTraceProfilerInstance = nullptr;
55 
56 TimeTraceProfiler *llvm::getTimeTraceProfilerInstance() {
57   return TimeTraceProfilerInstance;
58 }
59 
60 namespace {
61 
62 using ClockType = steady_clock;
63 using TimePointType = time_point<ClockType>;
64 using DurationType = duration<ClockType::rep, ClockType::period>;
65 using CountAndDurationType = std::pair<size_t, DurationType>;
66 using NameAndCountAndDurationType =
67     std::pair<std::string, CountAndDurationType>;
68 
69 } // anonymous namespace
70 
71 /// Represents an open or completed time section entry to be captured.
72 struct llvm::TimeTraceProfilerEntry {
73   const TimePointType Start;
74   TimePointType End;
75   const std::string Name;
76   const std::string Detail;
77   const bool AsyncEvent = false;
78   TimeTraceProfilerEntry(TimePointType &&S, TimePointType &&E, std::string &&N,
79                          std::string &&Dt, bool Ae)
80       : Start(std::move(S)), End(std::move(E)), Name(std::move(N)),
81         Detail(std::move(Dt)), AsyncEvent(Ae) {}
82 
83   // Calculate timings for FlameGraph. Cast time points to microsecond precision
84   // rather than casting duration. This avoids truncation issues causing inner
85   // scopes overruning outer scopes.
86   ClockType::rep getFlameGraphStartUs(TimePointType StartTime) const {
87     return (time_point_cast<microseconds>(Start) -
88             time_point_cast<microseconds>(StartTime))
89         .count();
90   }
91 
92   ClockType::rep getFlameGraphDurUs() const {
93     return (time_point_cast<microseconds>(End) -
94             time_point_cast<microseconds>(Start))
95         .count();
96   }
97 };
98 
99 struct llvm::TimeTraceProfiler {
100   TimeTraceProfiler(unsigned TimeTraceGranularity = 0, StringRef ProcName = "")
101       : BeginningOfTime(system_clock::now()), StartTime(ClockType::now()),
102         ProcName(ProcName), Pid(sys::Process::getProcessId()),
103         Tid(llvm::get_threadid()), TimeTraceGranularity(TimeTraceGranularity) {
104     llvm::get_thread_name(ThreadName);
105   }
106 
107   TimeTraceProfilerEntry *begin(std::string Name,
108                                 llvm::function_ref<std::string()> Detail,
109                                 bool AsyncEvent = false) {
110     Stack.emplace_back(std::make_unique<TimeTraceProfilerEntry>(
111         ClockType::now(), TimePointType(), std::move(Name), Detail(),
112         AsyncEvent));
113     return Stack.back().get();
114   }
115 
116   void end() {
117     assert(!Stack.empty() && "Must call begin() first");
118     end(*Stack.back());
119   }
120 
121   void end(TimeTraceProfilerEntry &E) {
122     assert(!Stack.empty() && "Must call begin() first");
123     E.End = ClockType::now();
124 
125     // Calculate duration at full precision for overall counts.
126     DurationType Duration = E.End - E.Start;
127 
128     // Only include sections longer or equal to TimeTraceGranularity msec.
129     if (duration_cast<microseconds>(Duration).count() >= TimeTraceGranularity)
130       Entries.emplace_back(E);
131 
132     // Track total time taken by each "name", but only the topmost levels of
133     // them; e.g. if there's a template instantiation that instantiates other
134     // templates from within, we only want to add the topmost one. "topmost"
135     // happens to be the ones that don't have any currently open entries above
136     // itself.
137     if (llvm::none_of(llvm::drop_begin(llvm::reverse(Stack)),
138                       [&](const std::unique_ptr<TimeTraceProfilerEntry> &Val) {
139                         return Val->Name == E.Name;
140                       })) {
141       auto &CountAndTotal = CountAndTotalPerName[E.Name];
142       CountAndTotal.first++;
143       CountAndTotal.second += Duration;
144     };
145 
146     llvm::erase_if(Stack,
147                    [&](const std::unique_ptr<TimeTraceProfilerEntry> &Val) {
148                      return Val.get() == &E;
149                    });
150   }
151 
152   // Write events from this TimeTraceProfilerInstance and
153   // ThreadTimeTraceProfilerInstances.
154   void write(raw_pwrite_stream &OS) {
155     // Acquire Mutex as reading ThreadTimeTraceProfilerInstances.
156     auto &Instances = getTimeTraceProfilerInstances();
157     std::lock_guard<std::mutex> Lock(Instances.Lock);
158     assert(Stack.empty() &&
159            "All profiler sections should be ended when calling write");
160     assert(llvm::all_of(Instances.List,
161                         [](const auto &TTP) { return TTP->Stack.empty(); }) &&
162            "All profiler sections should be ended when calling write");
163 
164     json::OStream J(OS);
165     J.objectBegin();
166     J.attributeBegin("traceEvents");
167     J.arrayBegin();
168 
169     // Emit all events for the main flame graph.
170     auto writeEvent = [&](const auto &E, uint64_t Tid) {
171       auto StartUs = E.getFlameGraphStartUs(StartTime);
172       auto DurUs = E.getFlameGraphDurUs();
173 
174       J.object([&] {
175         J.attribute("pid", Pid);
176         J.attribute("tid", int64_t(Tid));
177         J.attribute("ts", StartUs);
178         if (E.AsyncEvent) {
179           J.attribute("cat", E.Name);
180           J.attribute("ph", "b");
181           J.attribute("id", 0);
182         } else {
183           J.attribute("ph", "X");
184           J.attribute("dur", DurUs);
185         }
186         J.attribute("name", E.Name);
187         if (!E.Detail.empty()) {
188           J.attributeObject("args", [&] { J.attribute("detail", E.Detail); });
189         }
190       });
191 
192       if (E.AsyncEvent) {
193         J.object([&] {
194           J.attribute("pid", Pid);
195           J.attribute("tid", int64_t(Tid));
196           J.attribute("ts", StartUs + DurUs);
197           J.attribute("cat", E.Name);
198           J.attribute("ph", "e");
199           J.attribute("id", 0);
200           J.attribute("name", E.Name);
201         });
202       }
203     };
204     for (const TimeTraceProfilerEntry &E : Entries)
205       writeEvent(E, this->Tid);
206     for (const TimeTraceProfiler *TTP : Instances.List)
207       for (const TimeTraceProfilerEntry &E : TTP->Entries)
208         writeEvent(E, TTP->Tid);
209 
210     // Emit totals by section name as additional "thread" events, sorted from
211     // longest one.
212     // Find highest used thread id.
213     uint64_t MaxTid = this->Tid;
214     for (const TimeTraceProfiler *TTP : Instances.List)
215       MaxTid = std::max(MaxTid, TTP->Tid);
216 
217     // Combine all CountAndTotalPerName from threads into one.
218     StringMap<CountAndDurationType> AllCountAndTotalPerName;
219     auto combineStat = [&](const auto &Stat) {
220       StringRef Key = Stat.getKey();
221       auto Value = Stat.getValue();
222       auto &CountAndTotal = AllCountAndTotalPerName[Key];
223       CountAndTotal.first += Value.first;
224       CountAndTotal.second += Value.second;
225     };
226     for (const auto &Stat : CountAndTotalPerName)
227       combineStat(Stat);
228     for (const TimeTraceProfiler *TTP : Instances.List)
229       for (const auto &Stat : TTP->CountAndTotalPerName)
230         combineStat(Stat);
231 
232     std::vector<NameAndCountAndDurationType> SortedTotals;
233     SortedTotals.reserve(AllCountAndTotalPerName.size());
234     for (const auto &Total : AllCountAndTotalPerName)
235       SortedTotals.emplace_back(std::string(Total.getKey()), Total.getValue());
236 
237     llvm::sort(SortedTotals, [](const NameAndCountAndDurationType &A,
238                                 const NameAndCountAndDurationType &B) {
239       return A.second.second > B.second.second;
240     });
241 
242     // Report totals on separate threads of tracing file.
243     uint64_t TotalTid = MaxTid + 1;
244     for (const NameAndCountAndDurationType &Total : SortedTotals) {
245       auto DurUs = duration_cast<microseconds>(Total.second.second).count();
246       auto Count = AllCountAndTotalPerName[Total.first].first;
247 
248       J.object([&] {
249         J.attribute("pid", Pid);
250         J.attribute("tid", int64_t(TotalTid));
251         J.attribute("ph", "X");
252         J.attribute("ts", 0);
253         J.attribute("dur", DurUs);
254         J.attribute("name", "Total " + Total.first);
255         J.attributeObject("args", [&] {
256           J.attribute("count", int64_t(Count));
257           J.attribute("avg ms", int64_t(DurUs / Count / 1000));
258         });
259       });
260 
261       ++TotalTid;
262     }
263 
264     auto writeMetadataEvent = [&](const char *Name, uint64_t Tid,
265                                   StringRef arg) {
266       J.object([&] {
267         J.attribute("cat", "");
268         J.attribute("pid", Pid);
269         J.attribute("tid", int64_t(Tid));
270         J.attribute("ts", 0);
271         J.attribute("ph", "M");
272         J.attribute("name", Name);
273         J.attributeObject("args", [&] { J.attribute("name", arg); });
274       });
275     };
276 
277     writeMetadataEvent("process_name", Tid, ProcName);
278     writeMetadataEvent("thread_name", Tid, ThreadName);
279     for (const TimeTraceProfiler *TTP : Instances.List)
280       writeMetadataEvent("thread_name", TTP->Tid, TTP->ThreadName);
281 
282     J.arrayEnd();
283     J.attributeEnd();
284 
285     // Emit the absolute time when this TimeProfiler started.
286     // This can be used to combine the profiling data from
287     // multiple processes and preserve actual time intervals.
288     J.attribute("beginningOfTime",
289                 time_point_cast<microseconds>(BeginningOfTime)
290                     .time_since_epoch()
291                     .count());
292 
293     J.objectEnd();
294   }
295 
296   SmallVector<std::unique_ptr<TimeTraceProfilerEntry>, 16> Stack;
297   SmallVector<TimeTraceProfilerEntry, 128> Entries;
298   StringMap<CountAndDurationType> CountAndTotalPerName;
299   // System clock time when the session was begun.
300   const time_point<system_clock> BeginningOfTime;
301   // Profiling clock time when the session was begun.
302   const TimePointType StartTime;
303   const std::string ProcName;
304   const sys::Process::Pid Pid;
305   SmallString<0> ThreadName;
306   const uint64_t Tid;
307 
308   // Minimum time granularity (in microseconds)
309   const unsigned TimeTraceGranularity;
310 };
311 
312 void llvm::timeTraceProfilerInitialize(unsigned TimeTraceGranularity,
313                                        StringRef ProcName) {
314   assert(TimeTraceProfilerInstance == nullptr &&
315          "Profiler should not be initialized");
316   TimeTraceProfilerInstance = new TimeTraceProfiler(
317       TimeTraceGranularity, llvm::sys::path::filename(ProcName));
318 }
319 
320 // Removes all TimeTraceProfilerInstances.
321 // Called from main thread.
322 void llvm::timeTraceProfilerCleanup() {
323   delete TimeTraceProfilerInstance;
324   TimeTraceProfilerInstance = nullptr;
325 
326   auto &Instances = getTimeTraceProfilerInstances();
327   std::lock_guard<std::mutex> Lock(Instances.Lock);
328   for (auto *TTP : Instances.List)
329     delete TTP;
330   Instances.List.clear();
331 }
332 
333 // Finish TimeTraceProfilerInstance on a worker thread.
334 // This doesn't remove the instance, just moves the pointer to global vector.
335 void llvm::timeTraceProfilerFinishThread() {
336   auto &Instances = getTimeTraceProfilerInstances();
337   std::lock_guard<std::mutex> Lock(Instances.Lock);
338   Instances.List.push_back(TimeTraceProfilerInstance);
339   TimeTraceProfilerInstance = nullptr;
340 }
341 
342 void llvm::timeTraceProfilerWrite(raw_pwrite_stream &OS) {
343   assert(TimeTraceProfilerInstance != nullptr &&
344          "Profiler object can't be null");
345   TimeTraceProfilerInstance->write(OS);
346 }
347 
348 Error llvm::timeTraceProfilerWrite(StringRef PreferredFileName,
349                                    StringRef FallbackFileName) {
350   assert(TimeTraceProfilerInstance != nullptr &&
351          "Profiler object can't be null");
352 
353   std::string Path = PreferredFileName.str();
354   if (Path.empty()) {
355     Path = FallbackFileName == "-" ? "out" : FallbackFileName.str();
356     Path += ".time-trace";
357   }
358 
359   std::error_code EC;
360   raw_fd_ostream OS(Path, EC, sys::fs::OF_TextWithCRLF);
361   if (EC)
362     return createStringError(EC, "Could not open " + Path);
363 
364   timeTraceProfilerWrite(OS);
365   return Error::success();
366 }
367 
368 TimeTraceProfilerEntry *llvm::timeTraceProfilerBegin(StringRef Name,
369                                                      StringRef Detail) {
370   if (TimeTraceProfilerInstance != nullptr)
371     return TimeTraceProfilerInstance->begin(
372         std::string(Name), [&]() { return std::string(Detail); }, false);
373   return nullptr;
374 }
375 
376 TimeTraceProfilerEntry *
377 llvm::timeTraceProfilerBegin(StringRef Name,
378                              llvm::function_ref<std::string()> Detail) {
379   if (TimeTraceProfilerInstance != nullptr)
380     return TimeTraceProfilerInstance->begin(std::string(Name), Detail, false);
381   return nullptr;
382 }
383 
384 TimeTraceProfilerEntry *llvm::timeTraceAsyncProfilerBegin(StringRef Name,
385                                                           StringRef Detail) {
386   if (TimeTraceProfilerInstance != nullptr)
387     return TimeTraceProfilerInstance->begin(
388         std::string(Name), [&]() { return std::string(Detail); }, true);
389   return nullptr;
390 }
391 
392 void llvm::timeTraceProfilerEnd() {
393   if (TimeTraceProfilerInstance != nullptr)
394     TimeTraceProfilerInstance->end();
395 }
396 
397 void llvm::timeTraceProfilerEnd(TimeTraceProfilerEntry *E) {
398   if (TimeTraceProfilerInstance != nullptr)
399     TimeTraceProfilerInstance->end(*E);
400 }
401