xref: /llvm-project/llvm/lib/Support/TimeProfiler.cpp (revision 141574bacb2c10b795490d0fa5ea31acbc5d8c6e)
1 //===-- TimeProfiler.cpp - Hierarchical Time Profiler ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements hierarchical time profiler.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/Support/TimeProfiler.h"
14 #include "llvm/ADT/STLExtras.h"
15 #include "llvm/ADT/STLFunctionalExtras.h"
16 #include "llvm/ADT/SmallVector.h"
17 #include "llvm/ADT/StringMap.h"
18 #include "llvm/Support/JSON.h"
19 #include "llvm/Support/Path.h"
20 #include "llvm/Support/Process.h"
21 #include "llvm/Support/Threading.h"
22 #include <algorithm>
23 #include <cassert>
24 #include <chrono>
25 #include <memory>
26 #include <mutex>
27 #include <string>
28 #include <vector>
29 
30 using namespace llvm;
31 
32 namespace {
33 
34 using std::chrono::duration;
35 using std::chrono::duration_cast;
36 using std::chrono::microseconds;
37 using std::chrono::steady_clock;
38 using std::chrono::system_clock;
39 using std::chrono::time_point;
40 using std::chrono::time_point_cast;
41 
42 struct TimeTraceProfilerInstances {
43   std::mutex Lock;
44   std::vector<TimeTraceProfiler *> List;
45 };
46 
47 TimeTraceProfilerInstances &getTimeTraceProfilerInstances() {
48   static TimeTraceProfilerInstances Instances;
49   return Instances;
50 }
51 
52 } // anonymous namespace
53 
54 // Per Thread instance
55 static LLVM_THREAD_LOCAL TimeTraceProfiler *TimeTraceProfilerInstance = nullptr;
56 
57 TimeTraceProfiler *llvm::getTimeTraceProfilerInstance() {
58   return TimeTraceProfilerInstance;
59 }
60 
61 namespace {
62 
63 using ClockType = steady_clock;
64 using TimePointType = time_point<ClockType>;
65 using DurationType = duration<ClockType::rep, ClockType::period>;
66 using CountAndDurationType = std::pair<size_t, DurationType>;
67 using NameAndCountAndDurationType =
68     std::pair<std::string, CountAndDurationType>;
69 
70 } // anonymous namespace
71 
72 /// Represents an open or completed time section entry to be captured.
73 struct llvm::TimeTraceProfilerEntry {
74   const TimePointType Start;
75   TimePointType End;
76   const std::string Name;
77   TimeTraceMetadata Metadata;
78 
79   const TimeTraceEventType EventType = TimeTraceEventType::CompleteEvent;
80   TimeTraceProfilerEntry(TimePointType &&S, TimePointType &&E, std::string &&N,
81                          std::string &&Dt, TimeTraceEventType Et)
82       : Start(std::move(S)), End(std::move(E)), Name(std::move(N)), Metadata(),
83         EventType(Et) {
84     Metadata.Detail = std::move(Dt);
85   }
86 
87   TimeTraceProfilerEntry(TimePointType &&S, TimePointType &&E, std::string &&N,
88                          TimeTraceMetadata &&Mt, TimeTraceEventType Et)
89       : Start(std::move(S)), End(std::move(E)), Name(std::move(N)),
90         Metadata(std::move(Mt)), EventType(Et) {}
91 
92   // Calculate timings for FlameGraph. Cast time points to microsecond precision
93   // rather than casting duration. This avoids truncation issues causing inner
94   // scopes overruning outer scopes.
95   ClockType::rep getFlameGraphStartUs(TimePointType StartTime) const {
96     return (time_point_cast<microseconds>(Start) -
97             time_point_cast<microseconds>(StartTime))
98         .count();
99   }
100 
101   ClockType::rep getFlameGraphDurUs() const {
102     return (time_point_cast<microseconds>(End) -
103             time_point_cast<microseconds>(Start))
104         .count();
105   }
106 };
107 
108 // Represents a currently open (in-progress) time trace entry. InstantEvents
109 // that happen during an open event are associated with the duration of this
110 // parent event and they are dropped if parent duration is shorter than
111 // the granularity.
112 struct InProgressEntry {
113   TimeTraceProfilerEntry Event;
114   std::vector<TimeTraceProfilerEntry> InstantEvents;
115 
116   InProgressEntry(TimePointType S, TimePointType E, std::string N,
117                   std::string Dt, TimeTraceEventType Et)
118       : Event(std::move(S), std::move(E), std::move(N), std::move(Dt), Et),
119         InstantEvents() {}
120 
121   InProgressEntry(TimePointType S, TimePointType E, std::string N,
122                   TimeTraceMetadata Mt, TimeTraceEventType Et)
123       : Event(std::move(S), std::move(E), std::move(N), std::move(Mt), Et),
124         InstantEvents() {}
125 };
126 
127 struct llvm::TimeTraceProfiler {
128   TimeTraceProfiler(unsigned TimeTraceGranularity = 0, StringRef ProcName = "",
129                     bool TimeTraceVerbose = false)
130       : BeginningOfTime(system_clock::now()), StartTime(ClockType::now()),
131         ProcName(ProcName), Pid(sys::Process::getProcessId()),
132         Tid(llvm::get_threadid()), TimeTraceGranularity(TimeTraceGranularity),
133         TimeTraceVerbose(TimeTraceVerbose) {
134     llvm::get_thread_name(ThreadName);
135   }
136 
137   TimeTraceProfilerEntry *
138   begin(std::string Name, llvm::function_ref<std::string()> Detail,
139         TimeTraceEventType EventType = TimeTraceEventType::CompleteEvent) {
140     assert(EventType != TimeTraceEventType::InstantEvent &&
141            "Instant Events don't have begin and end.");
142     Stack.emplace_back(std::make_unique<InProgressEntry>(
143         ClockType::now(), TimePointType(), std::move(Name), Detail(),
144         EventType));
145     return &Stack.back()->Event;
146   }
147 
148   TimeTraceProfilerEntry *
149   begin(std::string Name, llvm::function_ref<TimeTraceMetadata()> Metadata,
150         TimeTraceEventType EventType = TimeTraceEventType::CompleteEvent) {
151     assert(EventType != TimeTraceEventType::InstantEvent &&
152            "Instant Events don't have begin and end.");
153     Stack.emplace_back(std::make_unique<InProgressEntry>(
154         ClockType::now(), TimePointType(), std::move(Name), Metadata(),
155         EventType));
156     return &Stack.back()->Event;
157   }
158 
159   void insert(std::string Name, llvm::function_ref<std::string()> Detail) {
160     if (Stack.empty())
161       return;
162 
163     Stack.back()->InstantEvents.emplace_back(TimeTraceProfilerEntry(
164         ClockType::now(), TimePointType(), std::move(Name), Detail(),
165         TimeTraceEventType::InstantEvent));
166   }
167 
168   void end() {
169     assert(!Stack.empty() && "Must call begin() first");
170     end(Stack.back()->Event);
171   }
172 
173   void end(TimeTraceProfilerEntry &E) {
174     assert(!Stack.empty() && "Must call begin() first");
175     E.End = ClockType::now();
176 
177     // Calculate duration at full precision for overall counts.
178     DurationType Duration = E.End - E.Start;
179 
180     const auto *Iter =
181         llvm::find_if(Stack, [&](const std::unique_ptr<InProgressEntry> &Val) {
182           return &Val->Event == &E;
183         });
184     assert(Iter != Stack.end() && "Event not in the Stack");
185 
186     // Only include sections longer or equal to TimeTraceGranularity msec.
187     if (duration_cast<microseconds>(Duration).count() >= TimeTraceGranularity) {
188       Entries.emplace_back(E);
189       for (auto &IE : Iter->get()->InstantEvents) {
190         Entries.emplace_back(IE);
191       }
192     }
193 
194     // Track total time taken by each "name", but only the topmost levels of
195     // them; e.g. if there's a template instantiation that instantiates other
196     // templates from within, we only want to add the topmost one. "topmost"
197     // happens to be the ones that don't have any currently open entries above
198     // itself.
199     if (llvm::none_of(llvm::drop_begin(llvm::reverse(Stack)),
200                       [&](const std::unique_ptr<InProgressEntry> &Val) {
201                         return Val->Event.Name == E.Name;
202                       })) {
203       auto &CountAndTotal = CountAndTotalPerName[E.Name];
204       CountAndTotal.first++;
205       CountAndTotal.second += Duration;
206     };
207 
208     Stack.erase(Iter);
209   }
210 
211   // Write events from this TimeTraceProfilerInstance and
212   // ThreadTimeTraceProfilerInstances.
213   void write(raw_pwrite_stream &OS) {
214     // Acquire Mutex as reading ThreadTimeTraceProfilerInstances.
215     auto &Instances = getTimeTraceProfilerInstances();
216     std::lock_guard<std::mutex> Lock(Instances.Lock);
217     assert(Stack.empty() &&
218            "All profiler sections should be ended when calling write");
219     assert(llvm::all_of(Instances.List,
220                         [](const auto &TTP) { return TTP->Stack.empty(); }) &&
221            "All profiler sections should be ended when calling write");
222 
223     json::OStream J(OS);
224     J.objectBegin();
225     J.attributeBegin("traceEvents");
226     J.arrayBegin();
227 
228     // Emit all events for the main flame graph.
229     auto writeEvent = [&](const auto &E, uint64_t Tid) {
230       auto StartUs = E.getFlameGraphStartUs(StartTime);
231       auto DurUs = E.getFlameGraphDurUs();
232 
233       J.object([&] {
234         J.attribute("pid", Pid);
235         J.attribute("tid", int64_t(Tid));
236         J.attribute("ts", StartUs);
237         if (E.EventType == TimeTraceEventType::AsyncEvent) {
238           J.attribute("cat", E.Name);
239           J.attribute("ph", "b");
240           J.attribute("id", 0);
241         } else if (E.EventType == TimeTraceEventType::CompleteEvent) {
242           J.attribute("ph", "X");
243           J.attribute("dur", DurUs);
244         } else { // instant event
245           assert(E.EventType == TimeTraceEventType::InstantEvent &&
246                  "InstantEvent expected");
247           J.attribute("ph", "i");
248         }
249         J.attribute("name", E.Name);
250         if (!E.Metadata.isEmpty()) {
251           J.attributeObject("args", [&] {
252             if (!E.Metadata.Detail.empty())
253               J.attribute("detail", E.Metadata.Detail);
254             if (!E.Metadata.File.empty())
255               J.attribute("file", E.Metadata.File);
256             if (E.Metadata.Line > 0)
257               J.attribute("line", E.Metadata.Line);
258           });
259         }
260       });
261 
262       if (E.EventType == TimeTraceEventType::AsyncEvent) {
263         J.object([&] {
264           J.attribute("pid", Pid);
265           J.attribute("tid", int64_t(Tid));
266           J.attribute("ts", StartUs + DurUs);
267           J.attribute("cat", E.Name);
268           J.attribute("ph", "e");
269           J.attribute("id", 0);
270           J.attribute("name", E.Name);
271         });
272       }
273     };
274     for (const TimeTraceProfilerEntry &E : Entries)
275       writeEvent(E, this->Tid);
276     for (const TimeTraceProfiler *TTP : Instances.List)
277       for (const TimeTraceProfilerEntry &E : TTP->Entries)
278         writeEvent(E, TTP->Tid);
279 
280     // Emit totals by section name as additional "thread" events, sorted from
281     // longest one.
282     // Find highest used thread id.
283     uint64_t MaxTid = this->Tid;
284     for (const TimeTraceProfiler *TTP : Instances.List)
285       MaxTid = std::max(MaxTid, TTP->Tid);
286 
287     // Combine all CountAndTotalPerName from threads into one.
288     StringMap<CountAndDurationType> AllCountAndTotalPerName;
289     auto combineStat = [&](const auto &Stat) {
290       StringRef Key = Stat.getKey();
291       auto Value = Stat.getValue();
292       auto &CountAndTotal = AllCountAndTotalPerName[Key];
293       CountAndTotal.first += Value.first;
294       CountAndTotal.second += Value.second;
295     };
296     for (const auto &Stat : CountAndTotalPerName)
297       combineStat(Stat);
298     for (const TimeTraceProfiler *TTP : Instances.List)
299       for (const auto &Stat : TTP->CountAndTotalPerName)
300         combineStat(Stat);
301 
302     std::vector<NameAndCountAndDurationType> SortedTotals;
303     SortedTotals.reserve(AllCountAndTotalPerName.size());
304     for (const auto &Total : AllCountAndTotalPerName)
305       SortedTotals.emplace_back(std::string(Total.getKey()), Total.getValue());
306 
307     llvm::sort(SortedTotals, [](const NameAndCountAndDurationType &A,
308                                 const NameAndCountAndDurationType &B) {
309       return A.second.second > B.second.second;
310     });
311 
312     // Report totals on separate threads of tracing file.
313     uint64_t TotalTid = MaxTid + 1;
314     for (const NameAndCountAndDurationType &Total : SortedTotals) {
315       auto DurUs = duration_cast<microseconds>(Total.second.second).count();
316       auto Count = AllCountAndTotalPerName[Total.first].first;
317 
318       J.object([&] {
319         J.attribute("pid", Pid);
320         J.attribute("tid", int64_t(TotalTid));
321         J.attribute("ph", "X");
322         J.attribute("ts", 0);
323         J.attribute("dur", DurUs);
324         J.attribute("name", "Total " + Total.first);
325         J.attributeObject("args", [&] {
326           J.attribute("count", int64_t(Count));
327           J.attribute("avg ms", int64_t(DurUs / Count / 1000));
328         });
329       });
330 
331       ++TotalTid;
332     }
333 
334     auto writeMetadataEvent = [&](const char *Name, uint64_t Tid,
335                                   StringRef arg) {
336       J.object([&] {
337         J.attribute("cat", "");
338         J.attribute("pid", Pid);
339         J.attribute("tid", int64_t(Tid));
340         J.attribute("ts", 0);
341         J.attribute("ph", "M");
342         J.attribute("name", Name);
343         J.attributeObject("args", [&] { J.attribute("name", arg); });
344       });
345     };
346 
347     writeMetadataEvent("process_name", Tid, ProcName);
348     writeMetadataEvent("thread_name", Tid, ThreadName);
349     for (const TimeTraceProfiler *TTP : Instances.List)
350       writeMetadataEvent("thread_name", TTP->Tid, TTP->ThreadName);
351 
352     J.arrayEnd();
353     J.attributeEnd();
354 
355     // Emit the absolute time when this TimeProfiler started.
356     // This can be used to combine the profiling data from
357     // multiple processes and preserve actual time intervals.
358     J.attribute("beginningOfTime",
359                 time_point_cast<microseconds>(BeginningOfTime)
360                     .time_since_epoch()
361                     .count());
362 
363     J.objectEnd();
364   }
365 
366   SmallVector<std::unique_ptr<InProgressEntry>, 16> Stack;
367   SmallVector<TimeTraceProfilerEntry, 128> Entries;
368   StringMap<CountAndDurationType> CountAndTotalPerName;
369   // System clock time when the session was begun.
370   const time_point<system_clock> BeginningOfTime;
371   // Profiling clock time when the session was begun.
372   const TimePointType StartTime;
373   const std::string ProcName;
374   const sys::Process::Pid Pid;
375   SmallString<0> ThreadName;
376   const uint64_t Tid;
377 
378   // Minimum time granularity (in microseconds)
379   const unsigned TimeTraceGranularity;
380 
381   // Make time trace capture verbose event details (e.g. source filenames). This
382   // can increase the size of the output by 2-3 times.
383   const bool TimeTraceVerbose;
384 };
385 
386 bool llvm::isTimeTraceVerbose() {
387   return getTimeTraceProfilerInstance() &&
388          getTimeTraceProfilerInstance()->TimeTraceVerbose;
389 }
390 
391 void llvm::timeTraceProfilerInitialize(unsigned TimeTraceGranularity,
392                                        StringRef ProcName,
393                                        bool TimeTraceVerbose) {
394   assert(TimeTraceProfilerInstance == nullptr &&
395          "Profiler should not be initialized");
396   TimeTraceProfilerInstance = new TimeTraceProfiler(
397       TimeTraceGranularity, llvm::sys::path::filename(ProcName),
398       TimeTraceVerbose);
399 }
400 
401 // Removes all TimeTraceProfilerInstances.
402 // Called from main thread.
403 void llvm::timeTraceProfilerCleanup() {
404   delete TimeTraceProfilerInstance;
405   TimeTraceProfilerInstance = nullptr;
406 
407   auto &Instances = getTimeTraceProfilerInstances();
408   std::lock_guard<std::mutex> Lock(Instances.Lock);
409   for (auto *TTP : Instances.List)
410     delete TTP;
411   Instances.List.clear();
412 }
413 
414 // Finish TimeTraceProfilerInstance on a worker thread.
415 // This doesn't remove the instance, just moves the pointer to global vector.
416 void llvm::timeTraceProfilerFinishThread() {
417   auto &Instances = getTimeTraceProfilerInstances();
418   std::lock_guard<std::mutex> Lock(Instances.Lock);
419   Instances.List.push_back(TimeTraceProfilerInstance);
420   TimeTraceProfilerInstance = nullptr;
421 }
422 
423 void llvm::timeTraceProfilerWrite(raw_pwrite_stream &OS) {
424   assert(TimeTraceProfilerInstance != nullptr &&
425          "Profiler object can't be null");
426   TimeTraceProfilerInstance->write(OS);
427 }
428 
429 Error llvm::timeTraceProfilerWrite(StringRef PreferredFileName,
430                                    StringRef FallbackFileName) {
431   assert(TimeTraceProfilerInstance != nullptr &&
432          "Profiler object can't be null");
433 
434   std::string Path = PreferredFileName.str();
435   if (Path.empty()) {
436     Path = FallbackFileName == "-" ? "out" : FallbackFileName.str();
437     Path += ".time-trace";
438   }
439 
440   std::error_code EC;
441   raw_fd_ostream OS(Path, EC, sys::fs::OF_TextWithCRLF);
442   if (EC)
443     return createStringError(EC, "Could not open " + Path);
444 
445   timeTraceProfilerWrite(OS);
446   return Error::success();
447 }
448 
449 TimeTraceProfilerEntry *llvm::timeTraceProfilerBegin(StringRef Name,
450                                                      StringRef Detail) {
451   if (TimeTraceProfilerInstance != nullptr)
452     return TimeTraceProfilerInstance->begin(
453         std::string(Name), [&]() { return std::string(Detail); },
454         TimeTraceEventType::CompleteEvent);
455   return nullptr;
456 }
457 
458 TimeTraceProfilerEntry *
459 llvm::timeTraceProfilerBegin(StringRef Name,
460                              llvm::function_ref<std::string()> Detail) {
461   if (TimeTraceProfilerInstance != nullptr)
462     return TimeTraceProfilerInstance->begin(std::string(Name), Detail,
463                                             TimeTraceEventType::CompleteEvent);
464   return nullptr;
465 }
466 
467 TimeTraceProfilerEntry *
468 llvm::timeTraceProfilerBegin(StringRef Name,
469                              llvm::function_ref<TimeTraceMetadata()> Metadata) {
470   if (TimeTraceProfilerInstance != nullptr)
471     return TimeTraceProfilerInstance->begin(std::string(Name), Metadata,
472                                             TimeTraceEventType::CompleteEvent);
473   return nullptr;
474 }
475 
476 TimeTraceProfilerEntry *llvm::timeTraceAsyncProfilerBegin(StringRef Name,
477                                                           StringRef Detail) {
478   if (TimeTraceProfilerInstance != nullptr)
479     return TimeTraceProfilerInstance->begin(
480         std::string(Name), [&]() { return std::string(Detail); },
481         TimeTraceEventType::AsyncEvent);
482   return nullptr;
483 }
484 
485 void llvm::timeTraceAddInstantEvent(StringRef Name,
486                                     llvm::function_ref<std::string()> Detail) {
487   if (TimeTraceProfilerInstance != nullptr)
488     TimeTraceProfilerInstance->insert(std::string(Name), Detail);
489 }
490 
491 void llvm::timeTraceProfilerEnd() {
492   if (TimeTraceProfilerInstance != nullptr)
493     TimeTraceProfilerInstance->end();
494 }
495 
496 void llvm::timeTraceProfilerEnd(TimeTraceProfilerEntry *E) {
497   if (TimeTraceProfilerInstance != nullptr)
498     TimeTraceProfilerInstance->end(*E);
499 }
500