1 //===-- TimeProfiler.cpp - Hierarchical Time Profiler ---------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements hierarchical time profiler. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "llvm/Support/TimeProfiler.h" 14 #include "llvm/ADT/STLExtras.h" 15 #include "llvm/ADT/STLFunctionalExtras.h" 16 #include "llvm/ADT/StringMap.h" 17 #include "llvm/Support/JSON.h" 18 #include "llvm/Support/Path.h" 19 #include "llvm/Support/Process.h" 20 #include "llvm/Support/Threading.h" 21 #include <algorithm> 22 #include <cassert> 23 #include <chrono> 24 #include <memory> 25 #include <mutex> 26 #include <string> 27 #include <vector> 28 29 using namespace llvm; 30 31 namespace { 32 33 using std::chrono::duration; 34 using std::chrono::duration_cast; 35 using std::chrono::microseconds; 36 using std::chrono::steady_clock; 37 using std::chrono::system_clock; 38 using std::chrono::time_point; 39 using std::chrono::time_point_cast; 40 41 struct TimeTraceProfilerInstances { 42 std::mutex Lock; 43 std::vector<TimeTraceProfiler *> List; 44 }; 45 46 TimeTraceProfilerInstances &getTimeTraceProfilerInstances() { 47 static TimeTraceProfilerInstances Instances; 48 return Instances; 49 } 50 51 } // anonymous namespace 52 53 // Per Thread instance 54 static LLVM_THREAD_LOCAL TimeTraceProfiler *TimeTraceProfilerInstance = nullptr; 55 56 TimeTraceProfiler *llvm::getTimeTraceProfilerInstance() { 57 return TimeTraceProfilerInstance; 58 } 59 60 namespace { 61 62 using ClockType = steady_clock; 63 using TimePointType = time_point<ClockType>; 64 using DurationType = duration<ClockType::rep, ClockType::period>; 65 using CountAndDurationType = std::pair<size_t, DurationType>; 66 using NameAndCountAndDurationType = 67 std::pair<std::string, CountAndDurationType>; 68 69 } // anonymous namespace 70 71 /// Represents an open or completed time section entry to be captured. 72 struct llvm::TimeTraceProfilerEntry { 73 const TimePointType Start; 74 TimePointType End; 75 const std::string Name; 76 const std::string Detail; 77 const bool AsyncEvent = false; 78 TimeTraceProfilerEntry(TimePointType &&S, TimePointType &&E, std::string &&N, 79 std::string &&Dt, bool Ae) 80 : Start(std::move(S)), End(std::move(E)), Name(std::move(N)), 81 Detail(std::move(Dt)), AsyncEvent(Ae) {} 82 83 // Calculate timings for FlameGraph. Cast time points to microsecond precision 84 // rather than casting duration. This avoids truncation issues causing inner 85 // scopes overruning outer scopes. 86 ClockType::rep getFlameGraphStartUs(TimePointType StartTime) const { 87 return (time_point_cast<microseconds>(Start) - 88 time_point_cast<microseconds>(StartTime)) 89 .count(); 90 } 91 92 ClockType::rep getFlameGraphDurUs() const { 93 return (time_point_cast<microseconds>(End) - 94 time_point_cast<microseconds>(Start)) 95 .count(); 96 } 97 }; 98 99 struct llvm::TimeTraceProfiler { 100 TimeTraceProfiler(unsigned TimeTraceGranularity = 0, StringRef ProcName = "") 101 : BeginningOfTime(system_clock::now()), StartTime(ClockType::now()), 102 ProcName(ProcName), Pid(sys::Process::getProcessId()), 103 Tid(llvm::get_threadid()), TimeTraceGranularity(TimeTraceGranularity) { 104 llvm::get_thread_name(ThreadName); 105 } 106 107 TimeTraceProfilerEntry *begin(std::string Name, 108 llvm::function_ref<std::string()> Detail, 109 bool AsyncEvent = false) { 110 Stack.emplace_back(std::make_unique<TimeTraceProfilerEntry>( 111 ClockType::now(), TimePointType(), std::move(Name), Detail(), 112 AsyncEvent)); 113 return Stack.back().get(); 114 } 115 116 void end() { 117 assert(!Stack.empty() && "Must call begin() first"); 118 end(*Stack.back()); 119 } 120 121 void end(TimeTraceProfilerEntry &E) { 122 assert(!Stack.empty() && "Must call begin() first"); 123 E.End = ClockType::now(); 124 125 // Calculate duration at full precision for overall counts. 126 DurationType Duration = E.End - E.Start; 127 128 // Only include sections longer or equal to TimeTraceGranularity msec. 129 if (duration_cast<microseconds>(Duration).count() >= TimeTraceGranularity) 130 Entries.emplace_back(E); 131 132 // Track total time taken by each "name", but only the topmost levels of 133 // them; e.g. if there's a template instantiation that instantiates other 134 // templates from within, we only want to add the topmost one. "topmost" 135 // happens to be the ones that don't have any currently open entries above 136 // itself. 137 if (llvm::none_of(llvm::drop_begin(llvm::reverse(Stack)), 138 [&](const std::unique_ptr<TimeTraceProfilerEntry> &Val) { 139 return Val->Name == E.Name; 140 })) { 141 auto &CountAndTotal = CountAndTotalPerName[E.Name]; 142 CountAndTotal.first++; 143 CountAndTotal.second += Duration; 144 }; 145 146 llvm::erase_if(Stack, 147 [&](const std::unique_ptr<TimeTraceProfilerEntry> &Val) { 148 return Val.get() == &E; 149 }); 150 } 151 152 // Write events from this TimeTraceProfilerInstance and 153 // ThreadTimeTraceProfilerInstances. 154 void write(raw_pwrite_stream &OS) { 155 // Acquire Mutex as reading ThreadTimeTraceProfilerInstances. 156 auto &Instances = getTimeTraceProfilerInstances(); 157 std::lock_guard<std::mutex> Lock(Instances.Lock); 158 assert(Stack.empty() && 159 "All profiler sections should be ended when calling write"); 160 assert(llvm::all_of(Instances.List, 161 [](const auto &TTP) { return TTP->Stack.empty(); }) && 162 "All profiler sections should be ended when calling write"); 163 164 json::OStream J(OS); 165 J.objectBegin(); 166 J.attributeBegin("traceEvents"); 167 J.arrayBegin(); 168 169 // Emit all events for the main flame graph. 170 auto writeEvent = [&](const auto &E, uint64_t Tid) { 171 auto StartUs = E.getFlameGraphStartUs(StartTime); 172 auto DurUs = E.getFlameGraphDurUs(); 173 174 J.object([&] { 175 J.attribute("pid", Pid); 176 J.attribute("tid", int64_t(Tid)); 177 J.attribute("ts", StartUs); 178 if (E.AsyncEvent) { 179 J.attribute("cat", E.Name); 180 J.attribute("ph", "b"); 181 J.attribute("id", 0); 182 } else { 183 J.attribute("ph", "X"); 184 J.attribute("dur", DurUs); 185 } 186 J.attribute("name", E.Name); 187 if (!E.Detail.empty()) { 188 J.attributeObject("args", [&] { J.attribute("detail", E.Detail); }); 189 } 190 }); 191 192 if (E.AsyncEvent) { 193 J.object([&] { 194 J.attribute("pid", Pid); 195 J.attribute("tid", int64_t(Tid)); 196 J.attribute("ts", StartUs + DurUs); 197 J.attribute("cat", E.Name); 198 J.attribute("ph", "e"); 199 J.attribute("id", 0); 200 J.attribute("name", E.Name); 201 }); 202 } 203 }; 204 for (const TimeTraceProfilerEntry &E : Entries) 205 writeEvent(E, this->Tid); 206 for (const TimeTraceProfiler *TTP : Instances.List) 207 for (const TimeTraceProfilerEntry &E : TTP->Entries) 208 writeEvent(E, TTP->Tid); 209 210 // Emit totals by section name as additional "thread" events, sorted from 211 // longest one. 212 // Find highest used thread id. 213 uint64_t MaxTid = this->Tid; 214 for (const TimeTraceProfiler *TTP : Instances.List) 215 MaxTid = std::max(MaxTid, TTP->Tid); 216 217 // Combine all CountAndTotalPerName from threads into one. 218 StringMap<CountAndDurationType> AllCountAndTotalPerName; 219 auto combineStat = [&](const auto &Stat) { 220 StringRef Key = Stat.getKey(); 221 auto Value = Stat.getValue(); 222 auto &CountAndTotal = AllCountAndTotalPerName[Key]; 223 CountAndTotal.first += Value.first; 224 CountAndTotal.second += Value.second; 225 }; 226 for (const auto &Stat : CountAndTotalPerName) 227 combineStat(Stat); 228 for (const TimeTraceProfiler *TTP : Instances.List) 229 for (const auto &Stat : TTP->CountAndTotalPerName) 230 combineStat(Stat); 231 232 std::vector<NameAndCountAndDurationType> SortedTotals; 233 SortedTotals.reserve(AllCountAndTotalPerName.size()); 234 for (const auto &Total : AllCountAndTotalPerName) 235 SortedTotals.emplace_back(std::string(Total.getKey()), Total.getValue()); 236 237 llvm::sort(SortedTotals, [](const NameAndCountAndDurationType &A, 238 const NameAndCountAndDurationType &B) { 239 return A.second.second > B.second.second; 240 }); 241 242 // Report totals on separate threads of tracing file. 243 uint64_t TotalTid = MaxTid + 1; 244 for (const NameAndCountAndDurationType &Total : SortedTotals) { 245 auto DurUs = duration_cast<microseconds>(Total.second.second).count(); 246 auto Count = AllCountAndTotalPerName[Total.first].first; 247 248 J.object([&] { 249 J.attribute("pid", Pid); 250 J.attribute("tid", int64_t(TotalTid)); 251 J.attribute("ph", "X"); 252 J.attribute("ts", 0); 253 J.attribute("dur", DurUs); 254 J.attribute("name", "Total " + Total.first); 255 J.attributeObject("args", [&] { 256 J.attribute("count", int64_t(Count)); 257 J.attribute("avg ms", int64_t(DurUs / Count / 1000)); 258 }); 259 }); 260 261 ++TotalTid; 262 } 263 264 auto writeMetadataEvent = [&](const char *Name, uint64_t Tid, 265 StringRef arg) { 266 J.object([&] { 267 J.attribute("cat", ""); 268 J.attribute("pid", Pid); 269 J.attribute("tid", int64_t(Tid)); 270 J.attribute("ts", 0); 271 J.attribute("ph", "M"); 272 J.attribute("name", Name); 273 J.attributeObject("args", [&] { J.attribute("name", arg); }); 274 }); 275 }; 276 277 writeMetadataEvent("process_name", Tid, ProcName); 278 writeMetadataEvent("thread_name", Tid, ThreadName); 279 for (const TimeTraceProfiler *TTP : Instances.List) 280 writeMetadataEvent("thread_name", TTP->Tid, TTP->ThreadName); 281 282 J.arrayEnd(); 283 J.attributeEnd(); 284 285 // Emit the absolute time when this TimeProfiler started. 286 // This can be used to combine the profiling data from 287 // multiple processes and preserve actual time intervals. 288 J.attribute("beginningOfTime", 289 time_point_cast<microseconds>(BeginningOfTime) 290 .time_since_epoch() 291 .count()); 292 293 J.objectEnd(); 294 } 295 296 SmallVector<std::unique_ptr<TimeTraceProfilerEntry>, 16> Stack; 297 SmallVector<TimeTraceProfilerEntry, 128> Entries; 298 StringMap<CountAndDurationType> CountAndTotalPerName; 299 // System clock time when the session was begun. 300 const time_point<system_clock> BeginningOfTime; 301 // Profiling clock time when the session was begun. 302 const TimePointType StartTime; 303 const std::string ProcName; 304 const sys::Process::Pid Pid; 305 SmallString<0> ThreadName; 306 const uint64_t Tid; 307 308 // Minimum time granularity (in microseconds) 309 const unsigned TimeTraceGranularity; 310 }; 311 312 void llvm::timeTraceProfilerInitialize(unsigned TimeTraceGranularity, 313 StringRef ProcName) { 314 assert(TimeTraceProfilerInstance == nullptr && 315 "Profiler should not be initialized"); 316 TimeTraceProfilerInstance = new TimeTraceProfiler( 317 TimeTraceGranularity, llvm::sys::path::filename(ProcName)); 318 } 319 320 // Removes all TimeTraceProfilerInstances. 321 // Called from main thread. 322 void llvm::timeTraceProfilerCleanup() { 323 delete TimeTraceProfilerInstance; 324 TimeTraceProfilerInstance = nullptr; 325 326 auto &Instances = getTimeTraceProfilerInstances(); 327 std::lock_guard<std::mutex> Lock(Instances.Lock); 328 for (auto *TTP : Instances.List) 329 delete TTP; 330 Instances.List.clear(); 331 } 332 333 // Finish TimeTraceProfilerInstance on a worker thread. 334 // This doesn't remove the instance, just moves the pointer to global vector. 335 void llvm::timeTraceProfilerFinishThread() { 336 auto &Instances = getTimeTraceProfilerInstances(); 337 std::lock_guard<std::mutex> Lock(Instances.Lock); 338 Instances.List.push_back(TimeTraceProfilerInstance); 339 TimeTraceProfilerInstance = nullptr; 340 } 341 342 void llvm::timeTraceProfilerWrite(raw_pwrite_stream &OS) { 343 assert(TimeTraceProfilerInstance != nullptr && 344 "Profiler object can't be null"); 345 TimeTraceProfilerInstance->write(OS); 346 } 347 348 Error llvm::timeTraceProfilerWrite(StringRef PreferredFileName, 349 StringRef FallbackFileName) { 350 assert(TimeTraceProfilerInstance != nullptr && 351 "Profiler object can't be null"); 352 353 std::string Path = PreferredFileName.str(); 354 if (Path.empty()) { 355 Path = FallbackFileName == "-" ? "out" : FallbackFileName.str(); 356 Path += ".time-trace"; 357 } 358 359 std::error_code EC; 360 raw_fd_ostream OS(Path, EC, sys::fs::OF_TextWithCRLF); 361 if (EC) 362 return createStringError(EC, "Could not open " + Path); 363 364 timeTraceProfilerWrite(OS); 365 return Error::success(); 366 } 367 368 TimeTraceProfilerEntry *llvm::timeTraceProfilerBegin(StringRef Name, 369 StringRef Detail) { 370 if (TimeTraceProfilerInstance != nullptr) 371 return TimeTraceProfilerInstance->begin( 372 std::string(Name), [&]() { return std::string(Detail); }, false); 373 return nullptr; 374 } 375 376 TimeTraceProfilerEntry * 377 llvm::timeTraceProfilerBegin(StringRef Name, 378 llvm::function_ref<std::string()> Detail) { 379 if (TimeTraceProfilerInstance != nullptr) 380 return TimeTraceProfilerInstance->begin(std::string(Name), Detail, false); 381 return nullptr; 382 } 383 384 TimeTraceProfilerEntry *llvm::timeTraceAsyncProfilerBegin(StringRef Name, 385 StringRef Detail) { 386 if (TimeTraceProfilerInstance != nullptr) 387 return TimeTraceProfilerInstance->begin( 388 std::string(Name), [&]() { return std::string(Detail); }, true); 389 return nullptr; 390 } 391 392 void llvm::timeTraceProfilerEnd() { 393 if (TimeTraceProfilerInstance != nullptr) 394 TimeTraceProfilerInstance->end(); 395 } 396 397 void llvm::timeTraceProfilerEnd(TimeTraceProfilerEntry *E) { 398 if (TimeTraceProfilerInstance != nullptr) 399 TimeTraceProfilerInstance->end(*E); 400 } 401