1 //===-- TimeProfiler.cpp - Hierarchical Time Profiler ---------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements hierarchical time profiler. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "llvm/Support/TimeProfiler.h" 14 #include "llvm/ADT/STLExtras.h" 15 #include "llvm/ADT/STLFunctionalExtras.h" 16 #include "llvm/ADT/SmallVector.h" 17 #include "llvm/ADT/StringMap.h" 18 #include "llvm/Support/JSON.h" 19 #include "llvm/Support/Path.h" 20 #include "llvm/Support/Process.h" 21 #include "llvm/Support/Threading.h" 22 #include <algorithm> 23 #include <cassert> 24 #include <chrono> 25 #include <memory> 26 #include <mutex> 27 #include <string> 28 #include <vector> 29 30 using namespace llvm; 31 32 namespace { 33 34 using std::chrono::duration; 35 using std::chrono::duration_cast; 36 using std::chrono::microseconds; 37 using std::chrono::steady_clock; 38 using std::chrono::system_clock; 39 using std::chrono::time_point; 40 using std::chrono::time_point_cast; 41 42 struct TimeTraceProfilerInstances { 43 std::mutex Lock; 44 std::vector<TimeTraceProfiler *> List; 45 }; 46 47 TimeTraceProfilerInstances &getTimeTraceProfilerInstances() { 48 static TimeTraceProfilerInstances Instances; 49 return Instances; 50 } 51 52 } // anonymous namespace 53 54 // Per Thread instance 55 static LLVM_THREAD_LOCAL TimeTraceProfiler *TimeTraceProfilerInstance = nullptr; 56 57 TimeTraceProfiler *llvm::getTimeTraceProfilerInstance() { 58 return TimeTraceProfilerInstance; 59 } 60 61 namespace { 62 63 using ClockType = steady_clock; 64 using TimePointType = time_point<ClockType>; 65 using DurationType = duration<ClockType::rep, ClockType::period>; 66 using CountAndDurationType = std::pair<size_t, DurationType>; 67 using NameAndCountAndDurationType = 68 std::pair<std::string, CountAndDurationType>; 69 70 } // anonymous namespace 71 72 /// Represents an open or completed time section entry to be captured. 73 struct llvm::TimeTraceProfilerEntry { 74 const TimePointType Start; 75 TimePointType End; 76 const std::string Name; 77 TimeTraceMetadata Metadata; 78 79 const TimeTraceEventType EventType = TimeTraceEventType::CompleteEvent; 80 TimeTraceProfilerEntry(TimePointType &&S, TimePointType &&E, std::string &&N, 81 std::string &&Dt, TimeTraceEventType Et) 82 : Start(std::move(S)), End(std::move(E)), Name(std::move(N)), Metadata(), 83 EventType(Et) { 84 Metadata.Detail = std::move(Dt); 85 } 86 87 TimeTraceProfilerEntry(TimePointType &&S, TimePointType &&E, std::string &&N, 88 TimeTraceMetadata &&Mt, TimeTraceEventType Et) 89 : Start(std::move(S)), End(std::move(E)), Name(std::move(N)), 90 Metadata(std::move(Mt)), EventType(Et) {} 91 92 // Calculate timings for FlameGraph. Cast time points to microsecond precision 93 // rather than casting duration. This avoids truncation issues causing inner 94 // scopes overruning outer scopes. 95 ClockType::rep getFlameGraphStartUs(TimePointType StartTime) const { 96 return (time_point_cast<microseconds>(Start) - 97 time_point_cast<microseconds>(StartTime)) 98 .count(); 99 } 100 101 ClockType::rep getFlameGraphDurUs() const { 102 return (time_point_cast<microseconds>(End) - 103 time_point_cast<microseconds>(Start)) 104 .count(); 105 } 106 }; 107 108 // Represents a currently open (in-progress) time trace entry. InstantEvents 109 // that happen during an open event are associated with the duration of this 110 // parent event and they are dropped if parent duration is shorter than 111 // the granularity. 112 struct InProgressEntry { 113 TimeTraceProfilerEntry Event; 114 std::vector<TimeTraceProfilerEntry> InstantEvents; 115 116 InProgressEntry(TimePointType S, TimePointType E, std::string N, 117 std::string Dt, TimeTraceEventType Et) 118 : Event(std::move(S), std::move(E), std::move(N), std::move(Dt), Et), 119 InstantEvents() {} 120 121 InProgressEntry(TimePointType S, TimePointType E, std::string N, 122 TimeTraceMetadata Mt, TimeTraceEventType Et) 123 : Event(std::move(S), std::move(E), std::move(N), std::move(Mt), Et), 124 InstantEvents() {} 125 }; 126 127 struct llvm::TimeTraceProfiler { 128 TimeTraceProfiler(unsigned TimeTraceGranularity = 0, StringRef ProcName = "", 129 bool TimeTraceVerbose = false) 130 : BeginningOfTime(system_clock::now()), StartTime(ClockType::now()), 131 ProcName(ProcName), Pid(sys::Process::getProcessId()), 132 Tid(llvm::get_threadid()), TimeTraceGranularity(TimeTraceGranularity), 133 TimeTraceVerbose(TimeTraceVerbose) { 134 llvm::get_thread_name(ThreadName); 135 } 136 137 TimeTraceProfilerEntry * 138 begin(std::string Name, llvm::function_ref<std::string()> Detail, 139 TimeTraceEventType EventType = TimeTraceEventType::CompleteEvent) { 140 assert(EventType != TimeTraceEventType::InstantEvent && 141 "Instant Events don't have begin and end."); 142 Stack.emplace_back(std::make_unique<InProgressEntry>( 143 ClockType::now(), TimePointType(), std::move(Name), Detail(), 144 EventType)); 145 return &Stack.back()->Event; 146 } 147 148 TimeTraceProfilerEntry * 149 begin(std::string Name, llvm::function_ref<TimeTraceMetadata()> Metadata, 150 TimeTraceEventType EventType = TimeTraceEventType::CompleteEvent) { 151 assert(EventType != TimeTraceEventType::InstantEvent && 152 "Instant Events don't have begin and end."); 153 Stack.emplace_back(std::make_unique<InProgressEntry>( 154 ClockType::now(), TimePointType(), std::move(Name), Metadata(), 155 EventType)); 156 return &Stack.back()->Event; 157 } 158 159 void insert(std::string Name, llvm::function_ref<std::string()> Detail) { 160 if (Stack.empty()) 161 return; 162 163 Stack.back()->InstantEvents.emplace_back(TimeTraceProfilerEntry( 164 ClockType::now(), TimePointType(), std::move(Name), Detail(), 165 TimeTraceEventType::InstantEvent)); 166 } 167 168 void end() { 169 assert(!Stack.empty() && "Must call begin() first"); 170 end(Stack.back()->Event); 171 } 172 173 void end(TimeTraceProfilerEntry &E) { 174 assert(!Stack.empty() && "Must call begin() first"); 175 E.End = ClockType::now(); 176 177 // Calculate duration at full precision for overall counts. 178 DurationType Duration = E.End - E.Start; 179 180 const auto *Iter = 181 llvm::find_if(Stack, [&](const std::unique_ptr<InProgressEntry> &Val) { 182 return &Val->Event == &E; 183 }); 184 assert(Iter != Stack.end() && "Event not in the Stack"); 185 186 // Only include sections longer or equal to TimeTraceGranularity msec. 187 if (duration_cast<microseconds>(Duration).count() >= TimeTraceGranularity) { 188 Entries.emplace_back(E); 189 for (auto &IE : Iter->get()->InstantEvents) { 190 Entries.emplace_back(IE); 191 } 192 } 193 194 // Track total time taken by each "name", but only the topmost levels of 195 // them; e.g. if there's a template instantiation that instantiates other 196 // templates from within, we only want to add the topmost one. "topmost" 197 // happens to be the ones that don't have any currently open entries above 198 // itself. 199 if (llvm::none_of(llvm::drop_begin(llvm::reverse(Stack)), 200 [&](const std::unique_ptr<InProgressEntry> &Val) { 201 return Val->Event.Name == E.Name; 202 })) { 203 auto &CountAndTotal = CountAndTotalPerName[E.Name]; 204 CountAndTotal.first++; 205 CountAndTotal.second += Duration; 206 }; 207 208 Stack.erase(Iter); 209 } 210 211 // Write events from this TimeTraceProfilerInstance and 212 // ThreadTimeTraceProfilerInstances. 213 void write(raw_pwrite_stream &OS) { 214 // Acquire Mutex as reading ThreadTimeTraceProfilerInstances. 215 auto &Instances = getTimeTraceProfilerInstances(); 216 std::lock_guard<std::mutex> Lock(Instances.Lock); 217 assert(Stack.empty() && 218 "All profiler sections should be ended when calling write"); 219 assert(llvm::all_of(Instances.List, 220 [](const auto &TTP) { return TTP->Stack.empty(); }) && 221 "All profiler sections should be ended when calling write"); 222 223 json::OStream J(OS); 224 J.objectBegin(); 225 J.attributeBegin("traceEvents"); 226 J.arrayBegin(); 227 228 // Emit all events for the main flame graph. 229 auto writeEvent = [&](const auto &E, uint64_t Tid) { 230 auto StartUs = E.getFlameGraphStartUs(StartTime); 231 auto DurUs = E.getFlameGraphDurUs(); 232 233 J.object([&] { 234 J.attribute("pid", Pid); 235 J.attribute("tid", int64_t(Tid)); 236 J.attribute("ts", StartUs); 237 if (E.EventType == TimeTraceEventType::AsyncEvent) { 238 J.attribute("cat", E.Name); 239 J.attribute("ph", "b"); 240 J.attribute("id", 0); 241 } else if (E.EventType == TimeTraceEventType::CompleteEvent) { 242 J.attribute("ph", "X"); 243 J.attribute("dur", DurUs); 244 } else { // instant event 245 assert(E.EventType == TimeTraceEventType::InstantEvent && 246 "InstantEvent expected"); 247 J.attribute("ph", "i"); 248 } 249 J.attribute("name", E.Name); 250 if (!E.Metadata.isEmpty()) { 251 J.attributeObject("args", [&] { 252 if (!E.Metadata.Detail.empty()) 253 J.attribute("detail", E.Metadata.Detail); 254 if (!E.Metadata.File.empty()) 255 J.attribute("file", E.Metadata.File); 256 if (E.Metadata.Line > 0) 257 J.attribute("line", E.Metadata.Line); 258 }); 259 } 260 }); 261 262 if (E.EventType == TimeTraceEventType::AsyncEvent) { 263 J.object([&] { 264 J.attribute("pid", Pid); 265 J.attribute("tid", int64_t(Tid)); 266 J.attribute("ts", StartUs + DurUs); 267 J.attribute("cat", E.Name); 268 J.attribute("ph", "e"); 269 J.attribute("id", 0); 270 J.attribute("name", E.Name); 271 }); 272 } 273 }; 274 for (const TimeTraceProfilerEntry &E : Entries) 275 writeEvent(E, this->Tid); 276 for (const TimeTraceProfiler *TTP : Instances.List) 277 for (const TimeTraceProfilerEntry &E : TTP->Entries) 278 writeEvent(E, TTP->Tid); 279 280 // Emit totals by section name as additional "thread" events, sorted from 281 // longest one. 282 // Find highest used thread id. 283 uint64_t MaxTid = this->Tid; 284 for (const TimeTraceProfiler *TTP : Instances.List) 285 MaxTid = std::max(MaxTid, TTP->Tid); 286 287 // Combine all CountAndTotalPerName from threads into one. 288 StringMap<CountAndDurationType> AllCountAndTotalPerName; 289 auto combineStat = [&](const auto &Stat) { 290 StringRef Key = Stat.getKey(); 291 auto Value = Stat.getValue(); 292 auto &CountAndTotal = AllCountAndTotalPerName[Key]; 293 CountAndTotal.first += Value.first; 294 CountAndTotal.second += Value.second; 295 }; 296 for (const auto &Stat : CountAndTotalPerName) 297 combineStat(Stat); 298 for (const TimeTraceProfiler *TTP : Instances.List) 299 for (const auto &Stat : TTP->CountAndTotalPerName) 300 combineStat(Stat); 301 302 std::vector<NameAndCountAndDurationType> SortedTotals; 303 SortedTotals.reserve(AllCountAndTotalPerName.size()); 304 for (const auto &Total : AllCountAndTotalPerName) 305 SortedTotals.emplace_back(std::string(Total.getKey()), Total.getValue()); 306 307 llvm::sort(SortedTotals, [](const NameAndCountAndDurationType &A, 308 const NameAndCountAndDurationType &B) { 309 return A.second.second > B.second.second; 310 }); 311 312 // Report totals on separate threads of tracing file. 313 uint64_t TotalTid = MaxTid + 1; 314 for (const NameAndCountAndDurationType &Total : SortedTotals) { 315 auto DurUs = duration_cast<microseconds>(Total.second.second).count(); 316 auto Count = AllCountAndTotalPerName[Total.first].first; 317 318 J.object([&] { 319 J.attribute("pid", Pid); 320 J.attribute("tid", int64_t(TotalTid)); 321 J.attribute("ph", "X"); 322 J.attribute("ts", 0); 323 J.attribute("dur", DurUs); 324 J.attribute("name", "Total " + Total.first); 325 J.attributeObject("args", [&] { 326 J.attribute("count", int64_t(Count)); 327 J.attribute("avg ms", int64_t(DurUs / Count / 1000)); 328 }); 329 }); 330 331 ++TotalTid; 332 } 333 334 auto writeMetadataEvent = [&](const char *Name, uint64_t Tid, 335 StringRef arg) { 336 J.object([&] { 337 J.attribute("cat", ""); 338 J.attribute("pid", Pid); 339 J.attribute("tid", int64_t(Tid)); 340 J.attribute("ts", 0); 341 J.attribute("ph", "M"); 342 J.attribute("name", Name); 343 J.attributeObject("args", [&] { J.attribute("name", arg); }); 344 }); 345 }; 346 347 writeMetadataEvent("process_name", Tid, ProcName); 348 writeMetadataEvent("thread_name", Tid, ThreadName); 349 for (const TimeTraceProfiler *TTP : Instances.List) 350 writeMetadataEvent("thread_name", TTP->Tid, TTP->ThreadName); 351 352 J.arrayEnd(); 353 J.attributeEnd(); 354 355 // Emit the absolute time when this TimeProfiler started. 356 // This can be used to combine the profiling data from 357 // multiple processes and preserve actual time intervals. 358 J.attribute("beginningOfTime", 359 time_point_cast<microseconds>(BeginningOfTime) 360 .time_since_epoch() 361 .count()); 362 363 J.objectEnd(); 364 } 365 366 SmallVector<std::unique_ptr<InProgressEntry>, 16> Stack; 367 SmallVector<TimeTraceProfilerEntry, 128> Entries; 368 StringMap<CountAndDurationType> CountAndTotalPerName; 369 // System clock time when the session was begun. 370 const time_point<system_clock> BeginningOfTime; 371 // Profiling clock time when the session was begun. 372 const TimePointType StartTime; 373 const std::string ProcName; 374 const sys::Process::Pid Pid; 375 SmallString<0> ThreadName; 376 const uint64_t Tid; 377 378 // Minimum time granularity (in microseconds) 379 const unsigned TimeTraceGranularity; 380 381 // Make time trace capture verbose event details (e.g. source filenames). This 382 // can increase the size of the output by 2-3 times. 383 const bool TimeTraceVerbose; 384 }; 385 386 bool llvm::isTimeTraceVerbose() { 387 return getTimeTraceProfilerInstance() && 388 getTimeTraceProfilerInstance()->TimeTraceVerbose; 389 } 390 391 void llvm::timeTraceProfilerInitialize(unsigned TimeTraceGranularity, 392 StringRef ProcName, 393 bool TimeTraceVerbose) { 394 assert(TimeTraceProfilerInstance == nullptr && 395 "Profiler should not be initialized"); 396 TimeTraceProfilerInstance = new TimeTraceProfiler( 397 TimeTraceGranularity, llvm::sys::path::filename(ProcName), 398 TimeTraceVerbose); 399 } 400 401 // Removes all TimeTraceProfilerInstances. 402 // Called from main thread. 403 void llvm::timeTraceProfilerCleanup() { 404 delete TimeTraceProfilerInstance; 405 TimeTraceProfilerInstance = nullptr; 406 407 auto &Instances = getTimeTraceProfilerInstances(); 408 std::lock_guard<std::mutex> Lock(Instances.Lock); 409 for (auto *TTP : Instances.List) 410 delete TTP; 411 Instances.List.clear(); 412 } 413 414 // Finish TimeTraceProfilerInstance on a worker thread. 415 // This doesn't remove the instance, just moves the pointer to global vector. 416 void llvm::timeTraceProfilerFinishThread() { 417 auto &Instances = getTimeTraceProfilerInstances(); 418 std::lock_guard<std::mutex> Lock(Instances.Lock); 419 Instances.List.push_back(TimeTraceProfilerInstance); 420 TimeTraceProfilerInstance = nullptr; 421 } 422 423 void llvm::timeTraceProfilerWrite(raw_pwrite_stream &OS) { 424 assert(TimeTraceProfilerInstance != nullptr && 425 "Profiler object can't be null"); 426 TimeTraceProfilerInstance->write(OS); 427 } 428 429 Error llvm::timeTraceProfilerWrite(StringRef PreferredFileName, 430 StringRef FallbackFileName) { 431 assert(TimeTraceProfilerInstance != nullptr && 432 "Profiler object can't be null"); 433 434 std::string Path = PreferredFileName.str(); 435 if (Path.empty()) { 436 Path = FallbackFileName == "-" ? "out" : FallbackFileName.str(); 437 Path += ".time-trace"; 438 } 439 440 std::error_code EC; 441 raw_fd_ostream OS(Path, EC, sys::fs::OF_TextWithCRLF); 442 if (EC) 443 return createStringError(EC, "Could not open " + Path); 444 445 timeTraceProfilerWrite(OS); 446 return Error::success(); 447 } 448 449 TimeTraceProfilerEntry *llvm::timeTraceProfilerBegin(StringRef Name, 450 StringRef Detail) { 451 if (TimeTraceProfilerInstance != nullptr) 452 return TimeTraceProfilerInstance->begin( 453 std::string(Name), [&]() { return std::string(Detail); }, 454 TimeTraceEventType::CompleteEvent); 455 return nullptr; 456 } 457 458 TimeTraceProfilerEntry * 459 llvm::timeTraceProfilerBegin(StringRef Name, 460 llvm::function_ref<std::string()> Detail) { 461 if (TimeTraceProfilerInstance != nullptr) 462 return TimeTraceProfilerInstance->begin(std::string(Name), Detail, 463 TimeTraceEventType::CompleteEvent); 464 return nullptr; 465 } 466 467 TimeTraceProfilerEntry * 468 llvm::timeTraceProfilerBegin(StringRef Name, 469 llvm::function_ref<TimeTraceMetadata()> Metadata) { 470 if (TimeTraceProfilerInstance != nullptr) 471 return TimeTraceProfilerInstance->begin(std::string(Name), Metadata, 472 TimeTraceEventType::CompleteEvent); 473 return nullptr; 474 } 475 476 TimeTraceProfilerEntry *llvm::timeTraceAsyncProfilerBegin(StringRef Name, 477 StringRef Detail) { 478 if (TimeTraceProfilerInstance != nullptr) 479 return TimeTraceProfilerInstance->begin( 480 std::string(Name), [&]() { return std::string(Detail); }, 481 TimeTraceEventType::AsyncEvent); 482 return nullptr; 483 } 484 485 void llvm::timeTraceAddInstantEvent(StringRef Name, 486 llvm::function_ref<std::string()> Detail) { 487 if (TimeTraceProfilerInstance != nullptr) 488 TimeTraceProfilerInstance->insert(std::string(Name), Detail); 489 } 490 491 void llvm::timeTraceProfilerEnd() { 492 if (TimeTraceProfilerInstance != nullptr) 493 TimeTraceProfilerInstance->end(); 494 } 495 496 void llvm::timeTraceProfilerEnd(TimeTraceProfilerEntry *E) { 497 if (TimeTraceProfilerInstance != nullptr) 498 TimeTraceProfilerInstance->end(*E); 499 } 500