1 //===-- xray_profiling.cc ---------------------------------------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file is a part of XRay, a dynamic runtime instrumentation system. 11 // 12 // This is the implementation of a profiling handler. 13 // 14 //===----------------------------------------------------------------------===// 15 #include <memory> 16 #include <time.h> 17 18 #include "sanitizer_common/sanitizer_atomic.h" 19 #include "sanitizer_common/sanitizer_flags.h" 20 #include "xray/xray_interface.h" 21 #include "xray/xray_log_interface.h" 22 #include "xray_buffer_queue.h" 23 #include "xray_flags.h" 24 #include "xray_profile_collector.h" 25 #include "xray_profiling_flags.h" 26 #include "xray_recursion_guard.h" 27 #include "xray_tsc.h" 28 #include "xray_utils.h" 29 #include <pthread.h> 30 31 namespace __xray { 32 33 namespace { 34 35 static atomic_sint32_t ProfilerLogFlushStatus = { 36 XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING}; 37 38 static atomic_sint32_t ProfilerLogStatus = { 39 XRayLogInitStatus::XRAY_LOG_UNINITIALIZED}; 40 41 static SpinMutex ProfilerOptionsMutex; 42 43 struct ProfilingData { 44 atomic_uintptr_t Allocators; 45 atomic_uintptr_t FCT; 46 }; 47 48 static pthread_key_t ProfilingKey; 49 50 // We use a global buffer queue, which gets initialized once at initialisation 51 // time, and gets reset when profiling is "done". 52 static std::aligned_storage<sizeof(BufferQueue), alignof(BufferQueue)>::type 53 BufferQueueStorage; 54 static BufferQueue *BQ = nullptr; 55 56 thread_local FunctionCallTrie::Allocators::Buffers ThreadBuffers; 57 thread_local std::aligned_storage<sizeof(FunctionCallTrie::Allocators), 58 alignof(FunctionCallTrie::Allocators)>::type 59 AllocatorsStorage; 60 thread_local std::aligned_storage<sizeof(FunctionCallTrie), 61 alignof(FunctionCallTrie)>::type 62 FunctionCallTrieStorage; 63 thread_local ProfilingData TLD{{0}, {0}}; 64 thread_local atomic_uint8_t ReentranceGuard{0}; 65 66 // We use a separate guard for ensuring that for this thread, if we're already 67 // cleaning up, that any signal handlers don't attempt to cleanup nor 68 // initialise. 69 thread_local atomic_uint8_t TLDInitGuard{0}; 70 71 // We also use a separate latch to signal that the thread is exiting, and 72 // non-essential work should be ignored (things like recording events, etc.). 73 thread_local atomic_uint8_t ThreadExitingLatch{0}; 74 75 static ProfilingData *getThreadLocalData() XRAY_NEVER_INSTRUMENT { 76 thread_local auto ThreadOnce = []() XRAY_NEVER_INSTRUMENT { 77 pthread_setspecific(ProfilingKey, &TLD); 78 return false; 79 }(); 80 (void)ThreadOnce; 81 82 RecursionGuard TLDInit(TLDInitGuard); 83 if (!TLDInit) 84 return nullptr; 85 86 if (atomic_load_relaxed(&ThreadExitingLatch)) 87 return nullptr; 88 89 uptr Allocators = 0; 90 if (atomic_compare_exchange_strong(&TLD.Allocators, &Allocators, 1, 91 memory_order_acq_rel)) { 92 bool Success = false; 93 auto AllocatorsUndo = at_scope_exit([&]() XRAY_NEVER_INSTRUMENT { 94 if (!Success) 95 atomic_store(&TLD.Allocators, 0, memory_order_release); 96 }); 97 98 // Acquire a set of buffers for this thread. 99 if (BQ == nullptr) 100 return nullptr; 101 102 if (BQ->getBuffer(ThreadBuffers.NodeBuffer) != BufferQueue::ErrorCode::Ok) 103 return nullptr; 104 auto NodeBufferUndo = at_scope_exit([&]() XRAY_NEVER_INSTRUMENT { 105 if (!Success) 106 BQ->releaseBuffer(ThreadBuffers.NodeBuffer); 107 }); 108 109 if (BQ->getBuffer(ThreadBuffers.RootsBuffer) != BufferQueue::ErrorCode::Ok) 110 return nullptr; 111 auto RootsBufferUndo = at_scope_exit([&]() XRAY_NEVER_INSTRUMENT { 112 if (!Success) 113 BQ->releaseBuffer(ThreadBuffers.RootsBuffer); 114 }); 115 116 if (BQ->getBuffer(ThreadBuffers.ShadowStackBuffer) != 117 BufferQueue::ErrorCode::Ok) 118 return nullptr; 119 auto ShadowStackBufferUndo = at_scope_exit([&]() XRAY_NEVER_INSTRUMENT { 120 if (!Success) 121 BQ->releaseBuffer(ThreadBuffers.ShadowStackBuffer); 122 }); 123 124 if (BQ->getBuffer(ThreadBuffers.NodeIdPairBuffer) != 125 BufferQueue::ErrorCode::Ok) 126 return nullptr; 127 128 Success = true; 129 new (&AllocatorsStorage) FunctionCallTrie::Allocators( 130 FunctionCallTrie::InitAllocatorsFromBuffers(ThreadBuffers)); 131 Allocators = reinterpret_cast<uptr>( 132 reinterpret_cast<FunctionCallTrie::Allocators *>(&AllocatorsStorage)); 133 atomic_store(&TLD.Allocators, Allocators, memory_order_release); 134 } 135 136 if (Allocators == 1) 137 return nullptr; 138 139 uptr FCT = 0; 140 if (atomic_compare_exchange_strong(&TLD.FCT, &FCT, 1, memory_order_acq_rel)) { 141 new (&FunctionCallTrieStorage) 142 FunctionCallTrie(*reinterpret_cast<FunctionCallTrie::Allocators *>( 143 atomic_load_relaxed(&TLD.Allocators))); 144 FCT = reinterpret_cast<uptr>( 145 reinterpret_cast<FunctionCallTrie *>(&FunctionCallTrieStorage)); 146 atomic_store(&TLD.FCT, FCT, memory_order_release); 147 } 148 149 if (FCT == 1) 150 return nullptr; 151 152 return &TLD; 153 } 154 155 static void cleanupTLD() XRAY_NEVER_INSTRUMENT { 156 auto FCT = atomic_exchange(&TLD.FCT, 0, memory_order_acq_rel); 157 if (FCT == reinterpret_cast<uptr>(reinterpret_cast<FunctionCallTrie *>( 158 &FunctionCallTrieStorage))) 159 reinterpret_cast<FunctionCallTrie *>(FCT)->~FunctionCallTrie(); 160 161 auto Allocators = atomic_exchange(&TLD.Allocators, 0, memory_order_acq_rel); 162 if (Allocators == 163 reinterpret_cast<uptr>( 164 reinterpret_cast<FunctionCallTrie::Allocators *>(&AllocatorsStorage))) 165 reinterpret_cast<FunctionCallTrie::Allocators *>(Allocators)->~Allocators(); 166 } 167 168 static void postCurrentThreadFCT(ProfilingData &T) XRAY_NEVER_INSTRUMENT { 169 RecursionGuard TLDInit(TLDInitGuard); 170 if (!TLDInit) 171 return; 172 173 uptr P = atomic_exchange(&T.FCT, 0, memory_order_acq_rel); 174 if (P != reinterpret_cast<uptr>( 175 reinterpret_cast<FunctionCallTrie *>(&FunctionCallTrieStorage))) 176 return; 177 178 auto FCT = reinterpret_cast<FunctionCallTrie *>(P); 179 DCHECK_NE(FCT, nullptr); 180 181 uptr A = atomic_exchange(&T.Allocators, 0, memory_order_acq_rel); 182 if (A != 183 reinterpret_cast<uptr>( 184 reinterpret_cast<FunctionCallTrie::Allocators *>(&AllocatorsStorage))) 185 return; 186 187 auto Allocators = reinterpret_cast<FunctionCallTrie::Allocators *>(A); 188 DCHECK_NE(Allocators, nullptr); 189 190 // Always move the data into the profile collector. 191 profileCollectorService::post(BQ, std::move(*FCT), std::move(*Allocators), 192 std::move(ThreadBuffers), GetTid()); 193 194 // Re-initialize the ThreadBuffers object to a known "default" state. 195 ThreadBuffers = FunctionCallTrie::Allocators::Buffers{}; 196 } 197 198 } // namespace 199 200 const char *profilingCompilerDefinedFlags() XRAY_NEVER_INSTRUMENT { 201 #ifdef XRAY_PROFILER_DEFAULT_OPTIONS 202 return SANITIZER_STRINGIFY(XRAY_PROFILER_DEFAULT_OPTIONS); 203 #else 204 return ""; 205 #endif 206 } 207 208 XRayLogFlushStatus profilingFlush() XRAY_NEVER_INSTRUMENT { 209 if (atomic_load(&ProfilerLogStatus, memory_order_acquire) != 210 XRayLogInitStatus::XRAY_LOG_FINALIZED) { 211 if (Verbosity()) 212 Report("Not flushing profiles, profiling not been finalized.\n"); 213 return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING; 214 } 215 216 RecursionGuard SignalGuard(ReentranceGuard); 217 if (!SignalGuard) { 218 if (Verbosity()) 219 Report("Cannot finalize properly inside a signal handler!\n"); 220 atomic_store(&ProfilerLogFlushStatus, 221 XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING, 222 memory_order_release); 223 return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING; 224 } 225 226 s32 Previous = atomic_exchange(&ProfilerLogFlushStatus, 227 XRayLogFlushStatus::XRAY_LOG_FLUSHING, 228 memory_order_acq_rel); 229 if (Previous == XRayLogFlushStatus::XRAY_LOG_FLUSHING) { 230 if (Verbosity()) 231 Report("Not flushing profiles, implementation still flushing.\n"); 232 return XRayLogFlushStatus::XRAY_LOG_FLUSHING; 233 } 234 235 // At this point, we'll create the file that will contain the profile, but 236 // only if the options say so. 237 if (!profilingFlags()->no_flush) { 238 // First check whether we have data in the profile collector service 239 // before we try and write anything down. 240 XRayBuffer B = profileCollectorService::nextBuffer({nullptr, 0}); 241 if (B.Data == nullptr) { 242 if (Verbosity()) 243 Report("profiling: No data to flush.\n"); 244 } else { 245 LogWriter *LW = LogWriter::Open(); 246 if (LW == nullptr) { 247 if (Verbosity()) 248 Report("profiling: Failed to flush to file, dropping data.\n"); 249 } else { 250 // Now for each of the buffers, write out the profile data as we would 251 // see it in memory, verbatim. 252 while (B.Data != nullptr && B.Size != 0) { 253 LW->WriteAll(reinterpret_cast<const char *>(B.Data), 254 reinterpret_cast<const char *>(B.Data) + B.Size); 255 B = profileCollectorService::nextBuffer(B); 256 } 257 } 258 LogWriter::Close(LW); 259 } 260 } 261 262 profileCollectorService::reset(); 263 264 atomic_store(&ProfilerLogFlushStatus, XRayLogFlushStatus::XRAY_LOG_FLUSHED, 265 memory_order_release); 266 atomic_store(&ProfilerLogStatus, XRayLogInitStatus::XRAY_LOG_UNINITIALIZED, 267 memory_order_release); 268 269 return XRayLogFlushStatus::XRAY_LOG_FLUSHED; 270 } 271 272 void profilingHandleArg0(int32_t FuncId, 273 XRayEntryType Entry) XRAY_NEVER_INSTRUMENT { 274 unsigned char CPU; 275 auto TSC = readTSC(CPU); 276 RecursionGuard G(ReentranceGuard); 277 if (!G) 278 return; 279 280 auto Status = atomic_load(&ProfilerLogStatus, memory_order_acquire); 281 if (UNLIKELY(Status == XRayLogInitStatus::XRAY_LOG_UNINITIALIZED || 282 Status == XRayLogInitStatus::XRAY_LOG_INITIALIZING)) 283 return; 284 285 if (UNLIKELY(Status == XRayLogInitStatus::XRAY_LOG_FINALIZED || 286 Status == XRayLogInitStatus::XRAY_LOG_FINALIZING)) { 287 postCurrentThreadFCT(TLD); 288 return; 289 } 290 291 auto T = getThreadLocalData(); 292 if (T == nullptr) 293 return; 294 295 auto FCT = reinterpret_cast<FunctionCallTrie *>(atomic_load_relaxed(&T->FCT)); 296 switch (Entry) { 297 case XRayEntryType::ENTRY: 298 case XRayEntryType::LOG_ARGS_ENTRY: 299 FCT->enterFunction(FuncId, TSC, CPU); 300 break; 301 case XRayEntryType::EXIT: 302 case XRayEntryType::TAIL: 303 FCT->exitFunction(FuncId, TSC, CPU); 304 break; 305 default: 306 // FIXME: Handle bugs. 307 break; 308 } 309 } 310 311 void profilingHandleArg1(int32_t FuncId, XRayEntryType Entry, 312 uint64_t) XRAY_NEVER_INSTRUMENT { 313 return profilingHandleArg0(FuncId, Entry); 314 } 315 316 XRayLogInitStatus profilingFinalize() XRAY_NEVER_INSTRUMENT { 317 s32 CurrentStatus = XRayLogInitStatus::XRAY_LOG_INITIALIZED; 318 if (!atomic_compare_exchange_strong(&ProfilerLogStatus, &CurrentStatus, 319 XRayLogInitStatus::XRAY_LOG_FINALIZING, 320 memory_order_release)) { 321 if (Verbosity()) 322 Report("Cannot finalize profile, the profiling is not initialized.\n"); 323 return static_cast<XRayLogInitStatus>(CurrentStatus); 324 } 325 326 // Mark then finalize the current generation of buffers. This allows us to let 327 // the threads currently holding onto new buffers still use them, but let the 328 // last reference do the memory cleanup. 329 DCHECK_NE(BQ, nullptr); 330 BQ->finalize(); 331 332 // Wait a grace period to allow threads to see that we're finalizing. 333 SleepForMillis(profilingFlags()->grace_period_ms); 334 335 // If we for some reason are entering this function from an instrumented 336 // handler, we bail out. 337 RecursionGuard G(ReentranceGuard); 338 if (!G) 339 return static_cast<XRayLogInitStatus>(CurrentStatus); 340 341 // Post the current thread's data if we have any. 342 postCurrentThreadFCT(TLD); 343 344 // Then we force serialize the log data. 345 profileCollectorService::serialize(); 346 347 atomic_store(&ProfilerLogStatus, XRayLogInitStatus::XRAY_LOG_FINALIZED, 348 memory_order_release); 349 return XRayLogInitStatus::XRAY_LOG_FINALIZED; 350 } 351 352 XRayLogInitStatus 353 profilingLoggingInit(size_t, size_t, void *Options, 354 size_t OptionsSize) XRAY_NEVER_INSTRUMENT { 355 RecursionGuard G(ReentranceGuard); 356 if (!G) 357 return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; 358 359 s32 CurrentStatus = XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; 360 if (!atomic_compare_exchange_strong(&ProfilerLogStatus, &CurrentStatus, 361 XRayLogInitStatus::XRAY_LOG_INITIALIZING, 362 memory_order_acq_rel)) { 363 if (Verbosity()) 364 Report("Cannot initialize already initialised profiling " 365 "implementation.\n"); 366 return static_cast<XRayLogInitStatus>(CurrentStatus); 367 } 368 369 { 370 SpinMutexLock Lock(&ProfilerOptionsMutex); 371 FlagParser ConfigParser; 372 ProfilerFlags Flags; 373 Flags.setDefaults(); 374 registerProfilerFlags(&ConfigParser, &Flags); 375 ConfigParser.ParseString(profilingCompilerDefinedFlags()); 376 const char *Env = GetEnv("XRAY_PROFILING_OPTIONS"); 377 if (Env == nullptr) 378 Env = ""; 379 ConfigParser.ParseString(Env); 380 381 // Then parse the configuration string provided. 382 ConfigParser.ParseString(static_cast<const char *>(Options)); 383 if (Verbosity()) 384 ReportUnrecognizedFlags(); 385 *profilingFlags() = Flags; 386 } 387 388 // We need to reset the profile data collection implementation now. 389 profileCollectorService::reset(); 390 391 // Then also reset the buffer queue implementation. 392 if (BQ == nullptr) { 393 bool Success = false; 394 new (&BufferQueueStorage) 395 BufferQueue(profilingFlags()->per_thread_allocator_max, 396 profilingFlags()->buffers_max, Success); 397 if (!Success) { 398 if (Verbosity()) 399 Report("Failed to initialize preallocated memory buffers!"); 400 atomic_store(&ProfilerLogStatus, 401 XRayLogInitStatus::XRAY_LOG_UNINITIALIZED, 402 memory_order_release); 403 return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; 404 } 405 406 // If we've succeded, set the global pointer to the initialised storage. 407 BQ = reinterpret_cast<BufferQueue *>(&BufferQueueStorage); 408 } else { 409 BQ->finalize(); 410 auto InitStatus = BQ->init(profilingFlags()->per_thread_allocator_max, 411 profilingFlags()->buffers_max); 412 413 if (InitStatus != BufferQueue::ErrorCode::Ok) { 414 if (Verbosity()) 415 Report("Failed to initialize preallocated memory buffers; error: %s", 416 BufferQueue::getErrorString(InitStatus)); 417 atomic_store(&ProfilerLogStatus, 418 XRayLogInitStatus::XRAY_LOG_UNINITIALIZED, 419 memory_order_release); 420 return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; 421 } 422 423 DCHECK(!BQ->finalizing()); 424 } 425 426 // We need to set up the exit handlers. 427 static pthread_once_t Once = PTHREAD_ONCE_INIT; 428 pthread_once( 429 &Once, +[] { 430 pthread_key_create( 431 &ProfilingKey, +[](void *P) XRAY_NEVER_INSTRUMENT { 432 if (atomic_exchange(&ThreadExitingLatch, 1, memory_order_acq_rel)) 433 return; 434 435 if (P == nullptr) 436 return; 437 438 auto T = reinterpret_cast<ProfilingData *>(P); 439 if (atomic_load_relaxed(&T->Allocators) == 0) 440 return; 441 442 { 443 // If we're somehow executing this while inside a 444 // non-reentrant-friendly context, we skip attempting to post 445 // the current thread's data. 446 RecursionGuard G(ReentranceGuard); 447 if (!G) 448 return; 449 450 postCurrentThreadFCT(*T); 451 } 452 }); 453 454 // We also need to set up an exit handler, so that we can get the 455 // profile information at exit time. We use the C API to do this, to not 456 // rely on C++ ABI functions for registering exit handlers. 457 Atexit(+[]() XRAY_NEVER_INSTRUMENT { 458 if (atomic_exchange(&ThreadExitingLatch, 1, memory_order_acq_rel)) 459 return; 460 461 auto Cleanup = 462 at_scope_exit([]() XRAY_NEVER_INSTRUMENT { cleanupTLD(); }); 463 464 // Finalize and flush. 465 if (profilingFinalize() != XRAY_LOG_FINALIZED || 466 profilingFlush() != XRAY_LOG_FLUSHED) 467 return; 468 469 if (Verbosity()) 470 Report("XRay Profile flushed at exit."); 471 }); 472 }); 473 474 __xray_log_set_buffer_iterator(profileCollectorService::nextBuffer); 475 __xray_set_handler(profilingHandleArg0); 476 __xray_set_handler_arg1(profilingHandleArg1); 477 478 atomic_store(&ProfilerLogStatus, XRayLogInitStatus::XRAY_LOG_INITIALIZED, 479 memory_order_release); 480 if (Verbosity()) 481 Report("XRay Profiling init successful.\n"); 482 483 return XRayLogInitStatus::XRAY_LOG_INITIALIZED; 484 } 485 486 bool profilingDynamicInitializer() XRAY_NEVER_INSTRUMENT { 487 // Set up the flag defaults from the static defaults and the 488 // compiler-provided defaults. 489 { 490 SpinMutexLock Lock(&ProfilerOptionsMutex); 491 auto *F = profilingFlags(); 492 F->setDefaults(); 493 FlagParser ProfilingParser; 494 registerProfilerFlags(&ProfilingParser, F); 495 ProfilingParser.ParseString(profilingCompilerDefinedFlags()); 496 } 497 498 XRayLogImpl Impl{ 499 profilingLoggingInit, 500 profilingFinalize, 501 profilingHandleArg0, 502 profilingFlush, 503 }; 504 auto RegistrationResult = __xray_log_register_mode("xray-profiling", Impl); 505 if (RegistrationResult != XRayLogRegisterStatus::XRAY_REGISTRATION_OK) { 506 if (Verbosity()) 507 Report("Cannot register XRay Profiling mode to 'xray-profiling'; error = " 508 "%d\n", 509 RegistrationResult); 510 return false; 511 } 512 513 if (!internal_strcmp(flags()->xray_mode, "xray-profiling")) 514 __xray_log_select_mode("xray_profiling"); 515 return true; 516 } 517 518 } // namespace __xray 519 520 static auto UNUSED Unused = __xray::profilingDynamicInitializer(); 521