1 //===-- llvm/Debuginfod/Debuginfod.cpp - Debuginfod client library --------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// 11 /// This file contains several definitions for the debuginfod client and server. 12 /// For the client, this file defines the fetchInfo function. For the server, 13 /// this file defines the DebuginfodLogEntry and DebuginfodServer structs, as 14 /// well as the DebuginfodLog, DebuginfodCollection classes. The fetchInfo 15 /// function retrieves any of the three supported artifact types: (executable, 16 /// debuginfo, source file) associated with a build-id from debuginfod servers. 17 /// If a source file is to be fetched, its absolute path must be specified in 18 /// the Description argument to fetchInfo. The DebuginfodLogEntry, 19 /// DebuginfodLog, and DebuginfodCollection are used by the DebuginfodServer to 20 /// scan the local filesystem for binaries and serve the debuginfod protocol. 21 /// 22 //===----------------------------------------------------------------------===// 23 24 #include "llvm/Debuginfod/Debuginfod.h" 25 #include "llvm/ADT/StringExtras.h" 26 #include "llvm/ADT/StringRef.h" 27 #include "llvm/BinaryFormat/Magic.h" 28 #include "llvm/DebugInfo/DWARF/DWARFContext.h" 29 #include "llvm/DebugInfo/Symbolize/Symbolize.h" 30 #include "llvm/Debuginfod/HTTPClient.h" 31 #include "llvm/Object/BuildID.h" 32 #include "llvm/Object/ELFObjectFile.h" 33 #include "llvm/Support/CachePruning.h" 34 #include "llvm/Support/Caching.h" 35 #include "llvm/Support/Errc.h" 36 #include "llvm/Support/Error.h" 37 #include "llvm/Support/FileUtilities.h" 38 #include "llvm/Support/MemoryBuffer.h" 39 #include "llvm/Support/Path.h" 40 #include "llvm/Support/ThreadPool.h" 41 #include "llvm/Support/xxhash.h" 42 43 #include <atomic> 44 #include <optional> 45 #include <thread> 46 47 namespace llvm { 48 49 using llvm::object::BuildIDRef; 50 51 namespace { 52 std::optional<SmallVector<StringRef>> DebuginfodUrls; 53 // Many Readers/Single Writer lock protecting the global debuginfod URL list. 54 llvm::sys::RWMutex UrlsMutex; 55 } // namespace 56 57 static std::string uniqueKey(llvm::StringRef S) { 58 return utostr(xxh3_64bits(S)); 59 } 60 61 // Returns a binary BuildID as a normalized hex string. 62 // Uses lowercase for compatibility with common debuginfod servers. 63 static std::string buildIDToString(BuildIDRef ID) { 64 return llvm::toHex(ID, /*LowerCase=*/true); 65 } 66 67 bool canUseDebuginfod() { 68 return HTTPClient::isAvailable() && !getDefaultDebuginfodUrls().empty(); 69 } 70 71 SmallVector<StringRef> getDefaultDebuginfodUrls() { 72 std::shared_lock<llvm::sys::RWMutex> ReadGuard(UrlsMutex); 73 if (!DebuginfodUrls) { 74 // Only read from the environment variable if the user hasn't already 75 // set the value 76 ReadGuard.unlock(); 77 std::unique_lock<llvm::sys::RWMutex> WriteGuard(UrlsMutex); 78 DebuginfodUrls = SmallVector<StringRef>(); 79 if (const char *DebuginfodUrlsEnv = std::getenv("DEBUGINFOD_URLS")) { 80 StringRef(DebuginfodUrlsEnv) 81 .split(DebuginfodUrls.value(), " ", -1, false); 82 } 83 WriteGuard.unlock(); 84 ReadGuard.lock(); 85 } 86 return DebuginfodUrls.value(); 87 } 88 89 // Set the default debuginfod URL list, override the environment variable 90 void setDefaultDebuginfodUrls(const SmallVector<StringRef> &URLs) { 91 std::unique_lock<llvm::sys::RWMutex> WriteGuard(UrlsMutex); 92 DebuginfodUrls = URLs; 93 } 94 95 /// Finds a default local file caching directory for the debuginfod client, 96 /// first checking DEBUGINFOD_CACHE_PATH. 97 Expected<std::string> getDefaultDebuginfodCacheDirectory() { 98 if (const char *CacheDirectoryEnv = std::getenv("DEBUGINFOD_CACHE_PATH")) 99 return CacheDirectoryEnv; 100 101 SmallString<64> CacheDirectory; 102 if (!sys::path::cache_directory(CacheDirectory)) 103 return createStringError( 104 errc::io_error, "Unable to determine appropriate cache directory."); 105 sys::path::append(CacheDirectory, "llvm-debuginfod", "client"); 106 return std::string(CacheDirectory); 107 } 108 109 std::chrono::milliseconds getDefaultDebuginfodTimeout() { 110 long Timeout; 111 const char *DebuginfodTimeoutEnv = std::getenv("DEBUGINFOD_TIMEOUT"); 112 if (DebuginfodTimeoutEnv && 113 to_integer(StringRef(DebuginfodTimeoutEnv).trim(), Timeout, 10)) 114 return std::chrono::milliseconds(Timeout * 1000); 115 116 return std::chrono::milliseconds(90 * 1000); 117 } 118 119 /// The following functions fetch a debuginfod artifact to a file in a local 120 /// cache and return the cached file path. They first search the local cache, 121 /// followed by the debuginfod servers. 122 123 Expected<std::string> getCachedOrDownloadSource(BuildIDRef ID, 124 StringRef SourceFilePath) { 125 SmallString<64> UrlPath; 126 sys::path::append(UrlPath, sys::path::Style::posix, "buildid", 127 buildIDToString(ID), "source", 128 sys::path::convert_to_slash(SourceFilePath)); 129 return getCachedOrDownloadArtifact(uniqueKey(UrlPath), UrlPath); 130 } 131 132 Expected<std::string> getCachedOrDownloadExecutable(BuildIDRef ID) { 133 SmallString<64> UrlPath; 134 sys::path::append(UrlPath, sys::path::Style::posix, "buildid", 135 buildIDToString(ID), "executable"); 136 return getCachedOrDownloadArtifact(uniqueKey(UrlPath), UrlPath); 137 } 138 139 Expected<std::string> getCachedOrDownloadDebuginfo(BuildIDRef ID) { 140 SmallString<64> UrlPath; 141 sys::path::append(UrlPath, sys::path::Style::posix, "buildid", 142 buildIDToString(ID), "debuginfo"); 143 return getCachedOrDownloadArtifact(uniqueKey(UrlPath), UrlPath); 144 } 145 146 // General fetching function. 147 Expected<std::string> getCachedOrDownloadArtifact(StringRef UniqueKey, 148 StringRef UrlPath) { 149 SmallString<10> CacheDir; 150 151 Expected<std::string> CacheDirOrErr = getDefaultDebuginfodCacheDirectory(); 152 if (!CacheDirOrErr) 153 return CacheDirOrErr.takeError(); 154 CacheDir = *CacheDirOrErr; 155 156 return getCachedOrDownloadArtifact(UniqueKey, UrlPath, CacheDir, 157 getDefaultDebuginfodUrls(), 158 getDefaultDebuginfodTimeout()); 159 } 160 161 namespace { 162 163 /// A simple handler which streams the returned data to a cache file. The cache 164 /// file is only created if a 200 OK status is observed. 165 class StreamedHTTPResponseHandler : public HTTPResponseHandler { 166 using CreateStreamFn = 167 std::function<Expected<std::unique_ptr<CachedFileStream>>()>; 168 CreateStreamFn CreateStream; 169 HTTPClient &Client; 170 std::unique_ptr<CachedFileStream> FileStream; 171 172 public: 173 StreamedHTTPResponseHandler(CreateStreamFn CreateStream, HTTPClient &Client) 174 : CreateStream(CreateStream), Client(Client) {} 175 virtual ~StreamedHTTPResponseHandler() = default; 176 177 Error handleBodyChunk(StringRef BodyChunk) override; 178 }; 179 180 } // namespace 181 182 Error StreamedHTTPResponseHandler::handleBodyChunk(StringRef BodyChunk) { 183 if (!FileStream) { 184 unsigned Code = Client.responseCode(); 185 if (Code && Code != 200) 186 return Error::success(); 187 Expected<std::unique_ptr<CachedFileStream>> FileStreamOrError = 188 CreateStream(); 189 if (!FileStreamOrError) 190 return FileStreamOrError.takeError(); 191 FileStream = std::move(*FileStreamOrError); 192 } 193 *FileStream->OS << BodyChunk; 194 return Error::success(); 195 } 196 197 // An over-accepting simplification of the HTTP RFC 7230 spec. 198 static bool isHeader(StringRef S) { 199 StringRef Name; 200 StringRef Value; 201 std::tie(Name, Value) = S.split(':'); 202 if (Name.empty() || Value.empty()) 203 return false; 204 return all_of(Name, [](char C) { return llvm::isPrint(C) && C != ' '; }) && 205 all_of(Value, [](char C) { return llvm::isPrint(C) || C == '\t'; }); 206 } 207 208 static SmallVector<std::string, 0> getHeaders() { 209 const char *Filename = getenv("DEBUGINFOD_HEADERS_FILE"); 210 if (!Filename) 211 return {}; 212 ErrorOr<std::unique_ptr<MemoryBuffer>> HeadersFile = 213 MemoryBuffer::getFile(Filename, /*IsText=*/true); 214 if (!HeadersFile) 215 return {}; 216 217 SmallVector<std::string, 0> Headers; 218 uint64_t LineNumber = 0; 219 for (StringRef Line : llvm::split((*HeadersFile)->getBuffer(), '\n')) { 220 LineNumber++; 221 if (!Line.empty() && Line.back() == '\r') 222 Line = Line.drop_back(); 223 if (!isHeader(Line)) { 224 if (!all_of(Line, llvm::isSpace)) 225 WithColor::warning() 226 << "could not parse debuginfod header: " << Filename << ':' 227 << LineNumber << '\n'; 228 continue; 229 } 230 Headers.emplace_back(Line); 231 } 232 return Headers; 233 } 234 235 Expected<std::string> getCachedOrDownloadArtifact( 236 StringRef UniqueKey, StringRef UrlPath, StringRef CacheDirectoryPath, 237 ArrayRef<StringRef> DebuginfodUrls, std::chrono::milliseconds Timeout) { 238 SmallString<64> AbsCachedArtifactPath; 239 sys::path::append(AbsCachedArtifactPath, CacheDirectoryPath, 240 "llvmcache-" + UniqueKey); 241 242 Expected<FileCache> CacheOrErr = 243 localCache("Debuginfod-client", ".debuginfod-client", CacheDirectoryPath); 244 if (!CacheOrErr) 245 return CacheOrErr.takeError(); 246 247 FileCache Cache = *CacheOrErr; 248 // We choose an arbitrary Task parameter as we do not make use of it. 249 unsigned Task = 0; 250 Expected<AddStreamFn> CacheAddStreamOrErr = Cache(Task, UniqueKey, ""); 251 if (!CacheAddStreamOrErr) 252 return CacheAddStreamOrErr.takeError(); 253 AddStreamFn &CacheAddStream = *CacheAddStreamOrErr; 254 if (!CacheAddStream) 255 return std::string(AbsCachedArtifactPath); 256 // The artifact was not found in the local cache, query the debuginfod 257 // servers. 258 if (!HTTPClient::isAvailable()) 259 return createStringError(errc::io_error, 260 "No working HTTP client is available."); 261 262 if (!HTTPClient::IsInitialized) 263 return createStringError( 264 errc::io_error, 265 "A working HTTP client is available, but it is not initialized. To " 266 "allow Debuginfod to make HTTP requests, call HTTPClient::initialize() " 267 "at the beginning of main."); 268 269 HTTPClient Client; 270 Client.setTimeout(Timeout); 271 for (StringRef ServerUrl : DebuginfodUrls) { 272 SmallString<64> ArtifactUrl; 273 sys::path::append(ArtifactUrl, sys::path::Style::posix, ServerUrl, UrlPath); 274 275 // Perform the HTTP request and if successful, write the response body to 276 // the cache. 277 { 278 StreamedHTTPResponseHandler Handler( 279 [&]() { return CacheAddStream(Task, ""); }, Client); 280 HTTPRequest Request(ArtifactUrl); 281 Request.Headers = getHeaders(); 282 Error Err = Client.perform(Request, Handler); 283 if (Err) 284 return std::move(Err); 285 286 unsigned Code = Client.responseCode(); 287 if (Code && Code != 200) 288 continue; 289 } 290 291 Expected<CachePruningPolicy> PruningPolicyOrErr = 292 parseCachePruningPolicy(std::getenv("DEBUGINFOD_CACHE_POLICY")); 293 if (!PruningPolicyOrErr) 294 return PruningPolicyOrErr.takeError(); 295 pruneCache(CacheDirectoryPath, *PruningPolicyOrErr); 296 297 // Return the path to the artifact on disk. 298 return std::string(AbsCachedArtifactPath); 299 } 300 301 return createStringError(errc::argument_out_of_domain, "build id not found"); 302 } 303 304 DebuginfodLogEntry::DebuginfodLogEntry(const Twine &Message) 305 : Message(Message.str()) {} 306 307 void DebuginfodLog::push(const Twine &Message) { 308 push(DebuginfodLogEntry(Message)); 309 } 310 311 void DebuginfodLog::push(DebuginfodLogEntry Entry) { 312 { 313 std::lock_guard<std::mutex> Guard(QueueMutex); 314 LogEntryQueue.push(Entry); 315 } 316 QueueCondition.notify_one(); 317 } 318 319 DebuginfodLogEntry DebuginfodLog::pop() { 320 { 321 std::unique_lock<std::mutex> Guard(QueueMutex); 322 // Wait for messages to be pushed into the queue. 323 QueueCondition.wait(Guard, [&] { return !LogEntryQueue.empty(); }); 324 } 325 std::lock_guard<std::mutex> Guard(QueueMutex); 326 if (!LogEntryQueue.size()) 327 llvm_unreachable("Expected message in the queue."); 328 329 DebuginfodLogEntry Entry = LogEntryQueue.front(); 330 LogEntryQueue.pop(); 331 return Entry; 332 } 333 334 DebuginfodCollection::DebuginfodCollection(ArrayRef<StringRef> PathsRef, 335 DebuginfodLog &Log, ThreadPool &Pool, 336 double MinInterval) 337 : Log(Log), Pool(Pool), MinInterval(MinInterval) { 338 for (StringRef Path : PathsRef) 339 Paths.push_back(Path.str()); 340 } 341 342 Error DebuginfodCollection::update() { 343 std::lock_guard<sys::Mutex> Guard(UpdateMutex); 344 if (UpdateTimer.isRunning()) 345 UpdateTimer.stopTimer(); 346 UpdateTimer.clear(); 347 for (const std::string &Path : Paths) { 348 Log.push("Updating binaries at path " + Path); 349 if (Error Err = findBinaries(Path)) 350 return Err; 351 } 352 Log.push("Updated collection"); 353 UpdateTimer.startTimer(); 354 return Error::success(); 355 } 356 357 Expected<bool> DebuginfodCollection::updateIfStale() { 358 if (!UpdateTimer.isRunning()) 359 return false; 360 UpdateTimer.stopTimer(); 361 double Time = UpdateTimer.getTotalTime().getWallTime(); 362 UpdateTimer.startTimer(); 363 if (Time < MinInterval) 364 return false; 365 if (Error Err = update()) 366 return std::move(Err); 367 return true; 368 } 369 370 Error DebuginfodCollection::updateForever(std::chrono::milliseconds Interval) { 371 while (true) { 372 if (Error Err = update()) 373 return Err; 374 std::this_thread::sleep_for(Interval); 375 } 376 llvm_unreachable("updateForever loop should never end"); 377 } 378 379 static bool hasELFMagic(StringRef FilePath) { 380 file_magic Type; 381 std::error_code EC = identify_magic(FilePath, Type); 382 if (EC) 383 return false; 384 switch (Type) { 385 case file_magic::elf: 386 case file_magic::elf_relocatable: 387 case file_magic::elf_executable: 388 case file_magic::elf_shared_object: 389 case file_magic::elf_core: 390 return true; 391 default: 392 return false; 393 } 394 } 395 396 Error DebuginfodCollection::findBinaries(StringRef Path) { 397 std::error_code EC; 398 sys::fs::recursive_directory_iterator I(Twine(Path), EC), E; 399 std::mutex IteratorMutex; 400 ThreadPoolTaskGroup IteratorGroup(Pool); 401 for (unsigned WorkerIndex = 0; WorkerIndex < Pool.getThreadCount(); 402 WorkerIndex++) { 403 IteratorGroup.async([&, this]() -> void { 404 std::string FilePath; 405 while (true) { 406 { 407 // Check if iteration is over or there is an error during iteration 408 std::lock_guard<std::mutex> Guard(IteratorMutex); 409 if (I == E || EC) 410 return; 411 // Grab a file path from the directory iterator and advance the 412 // iterator. 413 FilePath = I->path(); 414 I.increment(EC); 415 } 416 417 // Inspect the file at this path to determine if it is debuginfo. 418 if (!hasELFMagic(FilePath)) 419 continue; 420 421 Expected<object::OwningBinary<object::Binary>> BinOrErr = 422 object::createBinary(FilePath); 423 424 if (!BinOrErr) { 425 consumeError(BinOrErr.takeError()); 426 continue; 427 } 428 object::Binary *Bin = std::move(BinOrErr.get().getBinary()); 429 if (!Bin->isObject()) 430 continue; 431 432 // TODO: Support non-ELF binaries 433 object::ELFObjectFileBase *Object = 434 dyn_cast<object::ELFObjectFileBase>(Bin); 435 if (!Object) 436 continue; 437 438 BuildIDRef ID = getBuildID(Object); 439 if (ID.empty()) 440 continue; 441 442 std::string IDString = buildIDToString(ID); 443 if (Object->hasDebugInfo()) { 444 std::lock_guard<sys::RWMutex> DebugBinariesGuard(DebugBinariesMutex); 445 (void)DebugBinaries.try_emplace(IDString, std::move(FilePath)); 446 } else { 447 std::lock_guard<sys::RWMutex> BinariesGuard(BinariesMutex); 448 (void)Binaries.try_emplace(IDString, std::move(FilePath)); 449 } 450 } 451 }); 452 } 453 IteratorGroup.wait(); 454 std::unique_lock<std::mutex> Guard(IteratorMutex); 455 if (EC) 456 return errorCodeToError(EC); 457 return Error::success(); 458 } 459 460 Expected<std::optional<std::string>> 461 DebuginfodCollection::getBinaryPath(BuildIDRef ID) { 462 Log.push("getting binary path of ID " + buildIDToString(ID)); 463 std::shared_lock<sys::RWMutex> Guard(BinariesMutex); 464 auto Loc = Binaries.find(buildIDToString(ID)); 465 if (Loc != Binaries.end()) { 466 std::string Path = Loc->getValue(); 467 return Path; 468 } 469 return std::nullopt; 470 } 471 472 Expected<std::optional<std::string>> 473 DebuginfodCollection::getDebugBinaryPath(BuildIDRef ID) { 474 Log.push("getting debug binary path of ID " + buildIDToString(ID)); 475 std::shared_lock<sys::RWMutex> Guard(DebugBinariesMutex); 476 auto Loc = DebugBinaries.find(buildIDToString(ID)); 477 if (Loc != DebugBinaries.end()) { 478 std::string Path = Loc->getValue(); 479 return Path; 480 } 481 return std::nullopt; 482 } 483 484 Expected<std::string> DebuginfodCollection::findBinaryPath(BuildIDRef ID) { 485 { 486 // Check collection; perform on-demand update if stale. 487 Expected<std::optional<std::string>> PathOrErr = getBinaryPath(ID); 488 if (!PathOrErr) 489 return PathOrErr.takeError(); 490 std::optional<std::string> Path = *PathOrErr; 491 if (!Path) { 492 Expected<bool> UpdatedOrErr = updateIfStale(); 493 if (!UpdatedOrErr) 494 return UpdatedOrErr.takeError(); 495 if (*UpdatedOrErr) { 496 // Try once more. 497 PathOrErr = getBinaryPath(ID); 498 if (!PathOrErr) 499 return PathOrErr.takeError(); 500 Path = *PathOrErr; 501 } 502 } 503 if (Path) 504 return *Path; 505 } 506 507 // Try federation. 508 Expected<std::string> PathOrErr = getCachedOrDownloadExecutable(ID); 509 if (!PathOrErr) 510 consumeError(PathOrErr.takeError()); 511 512 // Fall back to debug binary. 513 return findDebugBinaryPath(ID); 514 } 515 516 Expected<std::string> DebuginfodCollection::findDebugBinaryPath(BuildIDRef ID) { 517 // Check collection; perform on-demand update if stale. 518 Expected<std::optional<std::string>> PathOrErr = getDebugBinaryPath(ID); 519 if (!PathOrErr) 520 return PathOrErr.takeError(); 521 std::optional<std::string> Path = *PathOrErr; 522 if (!Path) { 523 Expected<bool> UpdatedOrErr = updateIfStale(); 524 if (!UpdatedOrErr) 525 return UpdatedOrErr.takeError(); 526 if (*UpdatedOrErr) { 527 // Try once more. 528 PathOrErr = getBinaryPath(ID); 529 if (!PathOrErr) 530 return PathOrErr.takeError(); 531 Path = *PathOrErr; 532 } 533 } 534 if (Path) 535 return *Path; 536 537 // Try federation. 538 return getCachedOrDownloadDebuginfo(ID); 539 } 540 541 DebuginfodServer::DebuginfodServer(DebuginfodLog &Log, 542 DebuginfodCollection &Collection) 543 : Log(Log), Collection(Collection) { 544 cantFail( 545 Server.get(R"(/buildid/(.*)/debuginfo)", [&](HTTPServerRequest Request) { 546 Log.push("GET " + Request.UrlPath); 547 std::string IDString; 548 if (!tryGetFromHex(Request.UrlPathMatches[0], IDString)) { 549 Request.setResponse( 550 {404, "text/plain", "Build ID is not a hex string\n"}); 551 return; 552 } 553 object::BuildID ID(IDString.begin(), IDString.end()); 554 Expected<std::string> PathOrErr = Collection.findDebugBinaryPath(ID); 555 if (Error Err = PathOrErr.takeError()) { 556 consumeError(std::move(Err)); 557 Request.setResponse({404, "text/plain", "Build ID not found\n"}); 558 return; 559 } 560 streamFile(Request, *PathOrErr); 561 })); 562 cantFail( 563 Server.get(R"(/buildid/(.*)/executable)", [&](HTTPServerRequest Request) { 564 Log.push("GET " + Request.UrlPath); 565 std::string IDString; 566 if (!tryGetFromHex(Request.UrlPathMatches[0], IDString)) { 567 Request.setResponse( 568 {404, "text/plain", "Build ID is not a hex string\n"}); 569 return; 570 } 571 object::BuildID ID(IDString.begin(), IDString.end()); 572 Expected<std::string> PathOrErr = Collection.findBinaryPath(ID); 573 if (Error Err = PathOrErr.takeError()) { 574 consumeError(std::move(Err)); 575 Request.setResponse({404, "text/plain", "Build ID not found\n"}); 576 return; 577 } 578 streamFile(Request, *PathOrErr); 579 })); 580 } 581 582 } // namespace llvm 583