xref: /llvm-project/llvm/lib/Debuginfod/Debuginfod.cpp (revision aadaaface2ec96ee30d92bf46faa41dd9e68b64d)
1 //===-- llvm/Debuginfod/Debuginfod.cpp - Debuginfod client library --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 ///
11 /// This file contains several definitions for the debuginfod client and server.
12 /// For the client, this file defines the fetchInfo function. For the server,
13 /// this file defines the DebuginfodLogEntry and DebuginfodServer structs, as
14 /// well as the DebuginfodLog, DebuginfodCollection classes. The fetchInfo
15 /// function retrieves any of the three supported artifact types: (executable,
16 /// debuginfo, source file) associated with a build-id from debuginfod servers.
17 /// If a source file is to be fetched, its absolute path must be specified in
18 /// the Description argument to fetchInfo. The DebuginfodLogEntry,
19 /// DebuginfodLog, and DebuginfodCollection are used by the DebuginfodServer to
20 /// scan the local filesystem for binaries and serve the debuginfod protocol.
21 ///
22 //===----------------------------------------------------------------------===//
23 
24 #include "llvm/Debuginfod/Debuginfod.h"
25 #include "llvm/ADT/StringExtras.h"
26 #include "llvm/ADT/StringRef.h"
27 #include "llvm/BinaryFormat/Magic.h"
28 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
29 #include "llvm/DebugInfo/Symbolize/Symbolize.h"
30 #include "llvm/Debuginfod/HTTPClient.h"
31 #include "llvm/Object/BuildID.h"
32 #include "llvm/Object/ELFObjectFile.h"
33 #include "llvm/Support/CachePruning.h"
34 #include "llvm/Support/Caching.h"
35 #include "llvm/Support/Errc.h"
36 #include "llvm/Support/Error.h"
37 #include "llvm/Support/FileUtilities.h"
38 #include "llvm/Support/MemoryBuffer.h"
39 #include "llvm/Support/Path.h"
40 #include "llvm/Support/ThreadPool.h"
41 #include "llvm/Support/xxhash.h"
42 
43 #include <atomic>
44 #include <thread>
45 
46 namespace llvm {
47 
48 using llvm::object::BuildIDRef;
49 
50 static std::string uniqueKey(llvm::StringRef S) { return utostr(xxHash64(S)); }
51 
52 // Returns a binary BuildID as a normalized hex string.
53 // Uses lowercase for compatibility with common debuginfod servers.
54 static std::string buildIDToString(BuildIDRef ID) {
55   return llvm::toHex(ID, /*LowerCase=*/true);
56 }
57 
58 Expected<SmallVector<StringRef>> getDefaultDebuginfodUrls() {
59   const char *DebuginfodUrlsEnv = std::getenv("DEBUGINFOD_URLS");
60   if (DebuginfodUrlsEnv == nullptr)
61     return SmallVector<StringRef>();
62 
63   SmallVector<StringRef> DebuginfodUrls;
64   StringRef(DebuginfodUrlsEnv).split(DebuginfodUrls, " ");
65   return DebuginfodUrls;
66 }
67 
68 /// Finds a default local file caching directory for the debuginfod client,
69 /// first checking DEBUGINFOD_CACHE_PATH.
70 Expected<std::string> getDefaultDebuginfodCacheDirectory() {
71   if (const char *CacheDirectoryEnv = std::getenv("DEBUGINFOD_CACHE_PATH"))
72     return CacheDirectoryEnv;
73 
74   SmallString<64> CacheDirectory;
75   if (!sys::path::cache_directory(CacheDirectory))
76     return createStringError(
77         errc::io_error, "Unable to determine appropriate cache directory.");
78   sys::path::append(CacheDirectory, "llvm-debuginfod", "client");
79   return std::string(CacheDirectory);
80 }
81 
82 std::chrono::milliseconds getDefaultDebuginfodTimeout() {
83   long Timeout;
84   const char *DebuginfodTimeoutEnv = std::getenv("DEBUGINFOD_TIMEOUT");
85   if (DebuginfodTimeoutEnv &&
86       to_integer(StringRef(DebuginfodTimeoutEnv).trim(), Timeout, 10))
87     return std::chrono::milliseconds(Timeout * 1000);
88 
89   return std::chrono::milliseconds(90 * 1000);
90 }
91 
92 /// The following functions fetch a debuginfod artifact to a file in a local
93 /// cache and return the cached file path. They first search the local cache,
94 /// followed by the debuginfod servers.
95 
96 Expected<std::string> getCachedOrDownloadSource(BuildIDRef ID,
97                                                 StringRef SourceFilePath) {
98   SmallString<64> UrlPath;
99   sys::path::append(UrlPath, sys::path::Style::posix, "buildid",
100                     buildIDToString(ID), "source",
101                     sys::path::convert_to_slash(SourceFilePath));
102   return getCachedOrDownloadArtifact(uniqueKey(UrlPath), UrlPath);
103 }
104 
105 Expected<std::string> getCachedOrDownloadExecutable(BuildIDRef ID) {
106   SmallString<64> UrlPath;
107   sys::path::append(UrlPath, sys::path::Style::posix, "buildid",
108                     buildIDToString(ID), "executable");
109   return getCachedOrDownloadArtifact(uniqueKey(UrlPath), UrlPath);
110 }
111 
112 Expected<std::string> getCachedOrDownloadDebuginfo(BuildIDRef ID) {
113   SmallString<64> UrlPath;
114   sys::path::append(UrlPath, sys::path::Style::posix, "buildid",
115                     buildIDToString(ID), "debuginfo");
116   return getCachedOrDownloadArtifact(uniqueKey(UrlPath), UrlPath);
117 }
118 
119 // General fetching function.
120 Expected<std::string> getCachedOrDownloadArtifact(StringRef UniqueKey,
121                                                   StringRef UrlPath) {
122   SmallString<10> CacheDir;
123 
124   Expected<std::string> CacheDirOrErr = getDefaultDebuginfodCacheDirectory();
125   if (!CacheDirOrErr)
126     return CacheDirOrErr.takeError();
127   CacheDir = *CacheDirOrErr;
128 
129   Expected<SmallVector<StringRef>> DebuginfodUrlsOrErr =
130       getDefaultDebuginfodUrls();
131   if (!DebuginfodUrlsOrErr)
132     return DebuginfodUrlsOrErr.takeError();
133   SmallVector<StringRef> &DebuginfodUrls = *DebuginfodUrlsOrErr;
134   return getCachedOrDownloadArtifact(UniqueKey, UrlPath, CacheDir,
135                                      DebuginfodUrls,
136                                      getDefaultDebuginfodTimeout());
137 }
138 
139 namespace {
140 
141 /// A simple handler which streams the returned data to a cache file. The cache
142 /// file is only created if a 200 OK status is observed.
143 class StreamedHTTPResponseHandler : public HTTPResponseHandler {
144   using CreateStreamFn =
145       std::function<Expected<std::unique_ptr<CachedFileStream>>()>;
146   CreateStreamFn CreateStream;
147   HTTPClient &Client;
148   std::unique_ptr<CachedFileStream> FileStream;
149 
150 public:
151   StreamedHTTPResponseHandler(CreateStreamFn CreateStream, HTTPClient &Client)
152       : CreateStream(CreateStream), Client(Client) {}
153   virtual ~StreamedHTTPResponseHandler() = default;
154 
155   Error handleBodyChunk(StringRef BodyChunk) override;
156 };
157 
158 } // namespace
159 
160 Error StreamedHTTPResponseHandler::handleBodyChunk(StringRef BodyChunk) {
161   if (!FileStream) {
162     if (Client.responseCode() != 200)
163       return Error::success();
164     Expected<std::unique_ptr<CachedFileStream>> FileStreamOrError =
165         CreateStream();
166     if (!FileStreamOrError)
167       return FileStreamOrError.takeError();
168     FileStream = std::move(*FileStreamOrError);
169   }
170   *FileStream->OS << BodyChunk;
171   return Error::success();
172 }
173 
174 // An over-accepting simplification of the HTTP RFC 7230 spec.
175 static bool isHeader(StringRef S) {
176   StringRef Name;
177   StringRef Value;
178   std::tie(Name, Value) = S.split(':');
179   if (Name.empty() || Value.empty())
180     return false;
181   return all_of(Name, [](char C) { return llvm::isPrint(C) && C != ' '; }) &&
182          all_of(Value, [](char C) { return llvm::isPrint(C) || C == '\t'; });
183 }
184 
185 static SmallVector<std::string, 0> getHeaders() {
186   const char *Filename = getenv("DEBUGINFOD_HEADERS_FILE");
187   if (!Filename)
188     return {};
189   ErrorOr<std::unique_ptr<MemoryBuffer>> HeadersFile =
190       MemoryBuffer::getFile(Filename, /*IsText=*/true);
191   if (!HeadersFile)
192     return {};
193 
194   SmallVector<std::string, 0> Headers;
195   uint64_t LineNumber = 0;
196   for (StringRef Line : llvm::split((*HeadersFile)->getBuffer(), '\n')) {
197     LineNumber++;
198     if (!Line.empty() && Line.back() == '\r')
199       Line = Line.drop_back();
200     if (!isHeader(Line)) {
201       if (!all_of(Line, llvm::isSpace))
202         WithColor::warning()
203             << "could not parse debuginfod header: " << Filename << ':'
204             << LineNumber << '\n';
205       continue;
206     }
207     Headers.emplace_back(Line);
208   }
209   return Headers;
210 }
211 
212 Expected<std::string> getCachedOrDownloadArtifact(
213     StringRef UniqueKey, StringRef UrlPath, StringRef CacheDirectoryPath,
214     ArrayRef<StringRef> DebuginfodUrls, std::chrono::milliseconds Timeout) {
215   SmallString<64> AbsCachedArtifactPath;
216   sys::path::append(AbsCachedArtifactPath, CacheDirectoryPath,
217                     "llvmcache-" + UniqueKey);
218 
219   Expected<FileCache> CacheOrErr =
220       localCache("Debuginfod-client", ".debuginfod-client", CacheDirectoryPath);
221   if (!CacheOrErr)
222     return CacheOrErr.takeError();
223 
224   FileCache Cache = *CacheOrErr;
225   // We choose an arbitrary Task parameter as we do not make use of it.
226   unsigned Task = 0;
227   Expected<AddStreamFn> CacheAddStreamOrErr = Cache(Task, UniqueKey, "");
228   if (!CacheAddStreamOrErr)
229     return CacheAddStreamOrErr.takeError();
230   AddStreamFn &CacheAddStream = *CacheAddStreamOrErr;
231   if (!CacheAddStream)
232     return std::string(AbsCachedArtifactPath);
233   // The artifact was not found in the local cache, query the debuginfod
234   // servers.
235   if (!HTTPClient::isAvailable())
236     return createStringError(errc::io_error,
237                              "No working HTTP client is available.");
238 
239   if (!HTTPClient::IsInitialized)
240     return createStringError(
241         errc::io_error,
242         "A working HTTP client is available, but it is not initialized. To "
243         "allow Debuginfod to make HTTP requests, call HTTPClient::initialize() "
244         "at the beginning of main.");
245 
246   HTTPClient Client;
247   Client.setTimeout(Timeout);
248   for (StringRef ServerUrl : DebuginfodUrls) {
249     SmallString<64> ArtifactUrl;
250     sys::path::append(ArtifactUrl, sys::path::Style::posix, ServerUrl, UrlPath);
251 
252     // Perform the HTTP request and if successful, write the response body to
253     // the cache.
254     StreamedHTTPResponseHandler Handler(
255         [&]() { return CacheAddStream(Task, ""); }, Client);
256     HTTPRequest Request(ArtifactUrl);
257     Request.Headers = getHeaders();
258     Error Err = Client.perform(Request, Handler);
259     if (Err)
260       return std::move(Err);
261 
262     if (Client.responseCode() != 200)
263       continue;
264 
265     // Return the path to the artifact on disk.
266     return std::string(AbsCachedArtifactPath);
267   }
268 
269   return createStringError(errc::argument_out_of_domain, "build id not found");
270 }
271 
272 DebuginfodLogEntry::DebuginfodLogEntry(const Twine &Message)
273     : Message(Message.str()) {}
274 
275 void DebuginfodLog::push(const Twine &Message) {
276   push(DebuginfodLogEntry(Message));
277 }
278 
279 void DebuginfodLog::push(DebuginfodLogEntry Entry) {
280   {
281     std::lock_guard<std::mutex> Guard(QueueMutex);
282     LogEntryQueue.push(Entry);
283   }
284   QueueCondition.notify_one();
285 }
286 
287 DebuginfodLogEntry DebuginfodLog::pop() {
288   {
289     std::unique_lock<std::mutex> Guard(QueueMutex);
290     // Wait for messages to be pushed into the queue.
291     QueueCondition.wait(Guard, [&] { return !LogEntryQueue.empty(); });
292   }
293   std::lock_guard<std::mutex> Guard(QueueMutex);
294   if (!LogEntryQueue.size())
295     llvm_unreachable("Expected message in the queue.");
296 
297   DebuginfodLogEntry Entry = LogEntryQueue.front();
298   LogEntryQueue.pop();
299   return Entry;
300 }
301 
302 DebuginfodCollection::DebuginfodCollection(ArrayRef<StringRef> PathsRef,
303                                            DebuginfodLog &Log, ThreadPool &Pool,
304                                            double MinInterval)
305     : Log(Log), Pool(Pool), MinInterval(MinInterval) {
306   for (StringRef Path : PathsRef)
307     Paths.push_back(Path.str());
308 }
309 
310 Error DebuginfodCollection::update() {
311   std::lock_guard<sys::Mutex> Guard(UpdateMutex);
312   if (UpdateTimer.isRunning())
313     UpdateTimer.stopTimer();
314   UpdateTimer.clear();
315   for (const std::string &Path : Paths) {
316     Log.push("Updating binaries at path " + Path);
317     if (Error Err = findBinaries(Path))
318       return Err;
319   }
320   Log.push("Updated collection");
321   UpdateTimer.startTimer();
322   return Error::success();
323 }
324 
325 Expected<bool> DebuginfodCollection::updateIfStale() {
326   if (!UpdateTimer.isRunning())
327     return false;
328   UpdateTimer.stopTimer();
329   double Time = UpdateTimer.getTotalTime().getWallTime();
330   UpdateTimer.startTimer();
331   if (Time < MinInterval)
332     return false;
333   if (Error Err = update())
334     return std::move(Err);
335   return true;
336 }
337 
338 Error DebuginfodCollection::updateForever(std::chrono::milliseconds Interval) {
339   while (true) {
340     if (Error Err = update())
341       return Err;
342     std::this_thread::sleep_for(Interval);
343   }
344   llvm_unreachable("updateForever loop should never end");
345 }
346 
347 static bool hasELFMagic(StringRef FilePath) {
348   file_magic Type;
349   std::error_code EC = identify_magic(FilePath, Type);
350   if (EC)
351     return false;
352   switch (Type) {
353   case file_magic::elf:
354   case file_magic::elf_relocatable:
355   case file_magic::elf_executable:
356   case file_magic::elf_shared_object:
357   case file_magic::elf_core:
358     return true;
359   default:
360     return false;
361   }
362 }
363 
364 Error DebuginfodCollection::findBinaries(StringRef Path) {
365   std::error_code EC;
366   sys::fs::recursive_directory_iterator I(Twine(Path), EC), E;
367   std::mutex IteratorMutex;
368   ThreadPoolTaskGroup IteratorGroup(Pool);
369   for (unsigned WorkerIndex = 0; WorkerIndex < Pool.getThreadCount();
370        WorkerIndex++) {
371     IteratorGroup.async([&, this]() -> void {
372       std::string FilePath;
373       while (true) {
374         {
375           // Check if iteration is over or there is an error during iteration
376           std::lock_guard<std::mutex> Guard(IteratorMutex);
377           if (I == E || EC)
378             return;
379           // Grab a file path from the directory iterator and advance the
380           // iterator.
381           FilePath = I->path();
382           I.increment(EC);
383         }
384 
385         // Inspect the file at this path to determine if it is debuginfo.
386         if (!hasELFMagic(FilePath))
387           continue;
388 
389         Expected<object::OwningBinary<object::Binary>> BinOrErr =
390             object::createBinary(FilePath);
391 
392         if (!BinOrErr) {
393           consumeError(BinOrErr.takeError());
394           continue;
395         }
396         object::Binary *Bin = std::move(BinOrErr.get().getBinary());
397         if (!Bin->isObject())
398           continue;
399 
400         // TODO: Support non-ELF binaries
401         object::ELFObjectFileBase *Object =
402             dyn_cast<object::ELFObjectFileBase>(Bin);
403         if (!Object)
404           continue;
405 
406         Optional<BuildIDRef> ID = getBuildID(Object);
407         if (!ID)
408           continue;
409 
410         std::string IDString = buildIDToString(ID.value());
411         if (Object->hasDebugInfo()) {
412           std::lock_guard<sys::RWMutex> DebugBinariesGuard(DebugBinariesMutex);
413           DebugBinaries[IDString] = FilePath;
414         } else {
415           std::lock_guard<sys::RWMutex> BinariesGuard(BinariesMutex);
416           Binaries[IDString] = FilePath;
417         }
418       }
419     });
420   }
421   IteratorGroup.wait();
422   std::unique_lock<std::mutex> Guard(IteratorMutex);
423   if (EC)
424     return errorCodeToError(EC);
425   return Error::success();
426 }
427 
428 Expected<Optional<std::string>>
429 DebuginfodCollection::getBinaryPath(BuildIDRef ID) {
430   Log.push("getting binary path of ID " + buildIDToString(ID));
431   std::shared_lock<sys::RWMutex> Guard(BinariesMutex);
432   auto Loc = Binaries.find(buildIDToString(ID));
433   if (Loc != Binaries.end()) {
434     std::string Path = Loc->getValue();
435     return Path;
436   }
437   return std::nullopt;
438 }
439 
440 Expected<Optional<std::string>>
441 DebuginfodCollection::getDebugBinaryPath(BuildIDRef ID) {
442   Log.push("getting debug binary path of ID " + buildIDToString(ID));
443   std::shared_lock<sys::RWMutex> Guard(DebugBinariesMutex);
444   auto Loc = DebugBinaries.find(buildIDToString(ID));
445   if (Loc != DebugBinaries.end()) {
446     std::string Path = Loc->getValue();
447     return Path;
448   }
449   return std::nullopt;
450 }
451 
452 Expected<std::string> DebuginfodCollection::findBinaryPath(BuildIDRef ID) {
453   {
454     // Check collection; perform on-demand update if stale.
455     Expected<Optional<std::string>> PathOrErr = getBinaryPath(ID);
456     if (!PathOrErr)
457       return PathOrErr.takeError();
458     Optional<std::string> Path = *PathOrErr;
459     if (!Path) {
460       Expected<bool> UpdatedOrErr = updateIfStale();
461       if (!UpdatedOrErr)
462         return UpdatedOrErr.takeError();
463       if (*UpdatedOrErr) {
464         // Try once more.
465         PathOrErr = getBinaryPath(ID);
466         if (!PathOrErr)
467           return PathOrErr.takeError();
468         Path = *PathOrErr;
469       }
470     }
471     if (Path)
472       return Path.value();
473   }
474 
475   // Try federation.
476   Expected<std::string> PathOrErr = getCachedOrDownloadExecutable(ID);
477   if (!PathOrErr)
478     consumeError(PathOrErr.takeError());
479 
480   // Fall back to debug binary.
481   return findDebugBinaryPath(ID);
482 }
483 
484 Expected<std::string> DebuginfodCollection::findDebugBinaryPath(BuildIDRef ID) {
485   // Check collection; perform on-demand update if stale.
486   Expected<Optional<std::string>> PathOrErr = getDebugBinaryPath(ID);
487   if (!PathOrErr)
488     return PathOrErr.takeError();
489   Optional<std::string> Path = *PathOrErr;
490   if (!Path) {
491     Expected<bool> UpdatedOrErr = updateIfStale();
492     if (!UpdatedOrErr)
493       return UpdatedOrErr.takeError();
494     if (*UpdatedOrErr) {
495       // Try once more.
496       PathOrErr = getBinaryPath(ID);
497       if (!PathOrErr)
498         return PathOrErr.takeError();
499       Path = *PathOrErr;
500     }
501   }
502   if (Path)
503     return Path.value();
504 
505   // Try federation.
506   return getCachedOrDownloadDebuginfo(ID);
507 }
508 
509 DebuginfodServer::DebuginfodServer(DebuginfodLog &Log,
510                                    DebuginfodCollection &Collection)
511     : Log(Log), Collection(Collection) {
512   cantFail(
513       Server.get(R"(/buildid/(.*)/debuginfo)", [&](HTTPServerRequest Request) {
514         Log.push("GET " + Request.UrlPath);
515         std::string IDString;
516         if (!tryGetFromHex(Request.UrlPathMatches[0], IDString)) {
517           Request.setResponse(
518               {404, "text/plain", "Build ID is not a hex string\n"});
519           return;
520         }
521         object::BuildID ID(IDString.begin(), IDString.end());
522         Expected<std::string> PathOrErr = Collection.findDebugBinaryPath(ID);
523         if (Error Err = PathOrErr.takeError()) {
524           consumeError(std::move(Err));
525           Request.setResponse({404, "text/plain", "Build ID not found\n"});
526           return;
527         }
528         streamFile(Request, *PathOrErr);
529       }));
530   cantFail(
531       Server.get(R"(/buildid/(.*)/executable)", [&](HTTPServerRequest Request) {
532         Log.push("GET " + Request.UrlPath);
533         std::string IDString;
534         if (!tryGetFromHex(Request.UrlPathMatches[0], IDString)) {
535           Request.setResponse(
536               {404, "text/plain", "Build ID is not a hex string\n"});
537           return;
538         }
539         object::BuildID ID(IDString.begin(), IDString.end());
540         Expected<std::string> PathOrErr = Collection.findBinaryPath(ID);
541         if (Error Err = PathOrErr.takeError()) {
542           consumeError(std::move(Err));
543           Request.setResponse({404, "text/plain", "Build ID not found\n"});
544           return;
545         }
546         streamFile(Request, *PathOrErr);
547       }));
548 }
549 
550 } // namespace llvm
551