xref: /freebsd-src/contrib/llvm-project/llvm/lib/Debuginfod/Debuginfod.cpp (revision 1db9f3b21e39176dd5b67cf8ac378633b172463e)
1 //===-- llvm/Debuginfod/Debuginfod.cpp - Debuginfod client library --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 ///
11 /// This file contains several definitions for the debuginfod client and server.
12 /// For the client, this file defines the fetchInfo function. For the server,
13 /// this file defines the DebuginfodLogEntry and DebuginfodServer structs, as
14 /// well as the DebuginfodLog, DebuginfodCollection classes. The fetchInfo
15 /// function retrieves any of the three supported artifact types: (executable,
16 /// debuginfo, source file) associated with a build-id from debuginfod servers.
17 /// If a source file is to be fetched, its absolute path must be specified in
18 /// the Description argument to fetchInfo. The DebuginfodLogEntry,
19 /// DebuginfodLog, and DebuginfodCollection are used by the DebuginfodServer to
20 /// scan the local filesystem for binaries and serve the debuginfod protocol.
21 ///
22 //===----------------------------------------------------------------------===//
23 
24 #include "llvm/Debuginfod/Debuginfod.h"
25 #include "llvm/ADT/StringExtras.h"
26 #include "llvm/ADT/StringRef.h"
27 #include "llvm/BinaryFormat/Magic.h"
28 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
29 #include "llvm/DebugInfo/Symbolize/Symbolize.h"
30 #include "llvm/Debuginfod/HTTPClient.h"
31 #include "llvm/Object/BuildID.h"
32 #include "llvm/Object/ELFObjectFile.h"
33 #include "llvm/Support/CachePruning.h"
34 #include "llvm/Support/Caching.h"
35 #include "llvm/Support/Errc.h"
36 #include "llvm/Support/Error.h"
37 #include "llvm/Support/FileUtilities.h"
38 #include "llvm/Support/MemoryBuffer.h"
39 #include "llvm/Support/Path.h"
40 #include "llvm/Support/ThreadPool.h"
41 #include "llvm/Support/xxhash.h"
42 
43 #include <atomic>
44 #include <optional>
45 #include <thread>
46 
47 namespace llvm {
48 
49 using llvm::object::BuildIDRef;
50 
51 namespace {
52 std::optional<SmallVector<StringRef>> DebuginfodUrls;
53 // Many Readers/Single Writer lock protecting the global debuginfod URL list.
54 llvm::sys::RWMutex UrlsMutex;
55 } // namespace
56 
57 static std::string uniqueKey(llvm::StringRef S) {
58   return utostr(xxh3_64bits(S));
59 }
60 
61 // Returns a binary BuildID as a normalized hex string.
62 // Uses lowercase for compatibility with common debuginfod servers.
63 static std::string buildIDToString(BuildIDRef ID) {
64   return llvm::toHex(ID, /*LowerCase=*/true);
65 }
66 
67 bool canUseDebuginfod() {
68   return HTTPClient::isAvailable() && !getDefaultDebuginfodUrls().empty();
69 }
70 
71 SmallVector<StringRef> getDefaultDebuginfodUrls() {
72   std::shared_lock<llvm::sys::RWMutex> ReadGuard(UrlsMutex);
73   if (!DebuginfodUrls) {
74     // Only read from the environment variable if the user hasn't already
75     // set the value
76     ReadGuard.unlock();
77     std::unique_lock<llvm::sys::RWMutex> WriteGuard(UrlsMutex);
78     DebuginfodUrls = SmallVector<StringRef>();
79     if (const char *DebuginfodUrlsEnv = std::getenv("DEBUGINFOD_URLS")) {
80       StringRef(DebuginfodUrlsEnv)
81           .split(DebuginfodUrls.value(), " ", -1, false);
82     }
83     WriteGuard.unlock();
84     ReadGuard.lock();
85   }
86   return DebuginfodUrls.value();
87 }
88 
89 // Set the default debuginfod URL list, override the environment variable
90 void setDefaultDebuginfodUrls(const SmallVector<StringRef> &URLs) {
91   std::unique_lock<llvm::sys::RWMutex> WriteGuard(UrlsMutex);
92   DebuginfodUrls = URLs;
93 }
94 
95 /// Finds a default local file caching directory for the debuginfod client,
96 /// first checking DEBUGINFOD_CACHE_PATH.
97 Expected<std::string> getDefaultDebuginfodCacheDirectory() {
98   if (const char *CacheDirectoryEnv = std::getenv("DEBUGINFOD_CACHE_PATH"))
99     return CacheDirectoryEnv;
100 
101   SmallString<64> CacheDirectory;
102   if (!sys::path::cache_directory(CacheDirectory))
103     return createStringError(
104         errc::io_error, "Unable to determine appropriate cache directory.");
105   sys::path::append(CacheDirectory, "llvm-debuginfod", "client");
106   return std::string(CacheDirectory);
107 }
108 
109 std::chrono::milliseconds getDefaultDebuginfodTimeout() {
110   long Timeout;
111   const char *DebuginfodTimeoutEnv = std::getenv("DEBUGINFOD_TIMEOUT");
112   if (DebuginfodTimeoutEnv &&
113       to_integer(StringRef(DebuginfodTimeoutEnv).trim(), Timeout, 10))
114     return std::chrono::milliseconds(Timeout * 1000);
115 
116   return std::chrono::milliseconds(90 * 1000);
117 }
118 
119 /// The following functions fetch a debuginfod artifact to a file in a local
120 /// cache and return the cached file path. They first search the local cache,
121 /// followed by the debuginfod servers.
122 
123 Expected<std::string> getCachedOrDownloadSource(BuildIDRef ID,
124                                                 StringRef SourceFilePath) {
125   SmallString<64> UrlPath;
126   sys::path::append(UrlPath, sys::path::Style::posix, "buildid",
127                     buildIDToString(ID), "source",
128                     sys::path::convert_to_slash(SourceFilePath));
129   return getCachedOrDownloadArtifact(uniqueKey(UrlPath), UrlPath);
130 }
131 
132 Expected<std::string> getCachedOrDownloadExecutable(BuildIDRef ID) {
133   SmallString<64> UrlPath;
134   sys::path::append(UrlPath, sys::path::Style::posix, "buildid",
135                     buildIDToString(ID), "executable");
136   return getCachedOrDownloadArtifact(uniqueKey(UrlPath), UrlPath);
137 }
138 
139 Expected<std::string> getCachedOrDownloadDebuginfo(BuildIDRef ID) {
140   SmallString<64> UrlPath;
141   sys::path::append(UrlPath, sys::path::Style::posix, "buildid",
142                     buildIDToString(ID), "debuginfo");
143   return getCachedOrDownloadArtifact(uniqueKey(UrlPath), UrlPath);
144 }
145 
146 // General fetching function.
147 Expected<std::string> getCachedOrDownloadArtifact(StringRef UniqueKey,
148                                                   StringRef UrlPath) {
149   SmallString<10> CacheDir;
150 
151   Expected<std::string> CacheDirOrErr = getDefaultDebuginfodCacheDirectory();
152   if (!CacheDirOrErr)
153     return CacheDirOrErr.takeError();
154   CacheDir = *CacheDirOrErr;
155 
156   return getCachedOrDownloadArtifact(UniqueKey, UrlPath, CacheDir,
157                                      getDefaultDebuginfodUrls(),
158                                      getDefaultDebuginfodTimeout());
159 }
160 
161 namespace {
162 
163 /// A simple handler which streams the returned data to a cache file. The cache
164 /// file is only created if a 200 OK status is observed.
165 class StreamedHTTPResponseHandler : public HTTPResponseHandler {
166   using CreateStreamFn =
167       std::function<Expected<std::unique_ptr<CachedFileStream>>()>;
168   CreateStreamFn CreateStream;
169   HTTPClient &Client;
170   std::unique_ptr<CachedFileStream> FileStream;
171 
172 public:
173   StreamedHTTPResponseHandler(CreateStreamFn CreateStream, HTTPClient &Client)
174       : CreateStream(CreateStream), Client(Client) {}
175   virtual ~StreamedHTTPResponseHandler() = default;
176 
177   Error handleBodyChunk(StringRef BodyChunk) override;
178 };
179 
180 } // namespace
181 
182 Error StreamedHTTPResponseHandler::handleBodyChunk(StringRef BodyChunk) {
183   if (!FileStream) {
184     unsigned Code = Client.responseCode();
185     if (Code && Code != 200)
186       return Error::success();
187     Expected<std::unique_ptr<CachedFileStream>> FileStreamOrError =
188         CreateStream();
189     if (!FileStreamOrError)
190       return FileStreamOrError.takeError();
191     FileStream = std::move(*FileStreamOrError);
192   }
193   *FileStream->OS << BodyChunk;
194   return Error::success();
195 }
196 
197 // An over-accepting simplification of the HTTP RFC 7230 spec.
198 static bool isHeader(StringRef S) {
199   StringRef Name;
200   StringRef Value;
201   std::tie(Name, Value) = S.split(':');
202   if (Name.empty() || Value.empty())
203     return false;
204   return all_of(Name, [](char C) { return llvm::isPrint(C) && C != ' '; }) &&
205          all_of(Value, [](char C) { return llvm::isPrint(C) || C == '\t'; });
206 }
207 
208 static SmallVector<std::string, 0> getHeaders() {
209   const char *Filename = getenv("DEBUGINFOD_HEADERS_FILE");
210   if (!Filename)
211     return {};
212   ErrorOr<std::unique_ptr<MemoryBuffer>> HeadersFile =
213       MemoryBuffer::getFile(Filename, /*IsText=*/true);
214   if (!HeadersFile)
215     return {};
216 
217   SmallVector<std::string, 0> Headers;
218   uint64_t LineNumber = 0;
219   for (StringRef Line : llvm::split((*HeadersFile)->getBuffer(), '\n')) {
220     LineNumber++;
221     if (!Line.empty() && Line.back() == '\r')
222       Line = Line.drop_back();
223     if (!isHeader(Line)) {
224       if (!all_of(Line, llvm::isSpace))
225         WithColor::warning()
226             << "could not parse debuginfod header: " << Filename << ':'
227             << LineNumber << '\n';
228       continue;
229     }
230     Headers.emplace_back(Line);
231   }
232   return Headers;
233 }
234 
235 Expected<std::string> getCachedOrDownloadArtifact(
236     StringRef UniqueKey, StringRef UrlPath, StringRef CacheDirectoryPath,
237     ArrayRef<StringRef> DebuginfodUrls, std::chrono::milliseconds Timeout) {
238   SmallString<64> AbsCachedArtifactPath;
239   sys::path::append(AbsCachedArtifactPath, CacheDirectoryPath,
240                     "llvmcache-" + UniqueKey);
241 
242   Expected<FileCache> CacheOrErr =
243       localCache("Debuginfod-client", ".debuginfod-client", CacheDirectoryPath);
244   if (!CacheOrErr)
245     return CacheOrErr.takeError();
246 
247   FileCache Cache = *CacheOrErr;
248   // We choose an arbitrary Task parameter as we do not make use of it.
249   unsigned Task = 0;
250   Expected<AddStreamFn> CacheAddStreamOrErr = Cache(Task, UniqueKey, "");
251   if (!CacheAddStreamOrErr)
252     return CacheAddStreamOrErr.takeError();
253   AddStreamFn &CacheAddStream = *CacheAddStreamOrErr;
254   if (!CacheAddStream)
255     return std::string(AbsCachedArtifactPath);
256   // The artifact was not found in the local cache, query the debuginfod
257   // servers.
258   if (!HTTPClient::isAvailable())
259     return createStringError(errc::io_error,
260                              "No working HTTP client is available.");
261 
262   if (!HTTPClient::IsInitialized)
263     return createStringError(
264         errc::io_error,
265         "A working HTTP client is available, but it is not initialized. To "
266         "allow Debuginfod to make HTTP requests, call HTTPClient::initialize() "
267         "at the beginning of main.");
268 
269   HTTPClient Client;
270   Client.setTimeout(Timeout);
271   for (StringRef ServerUrl : DebuginfodUrls) {
272     SmallString<64> ArtifactUrl;
273     sys::path::append(ArtifactUrl, sys::path::Style::posix, ServerUrl, UrlPath);
274 
275     // Perform the HTTP request and if successful, write the response body to
276     // the cache.
277     {
278       StreamedHTTPResponseHandler Handler(
279           [&]() { return CacheAddStream(Task, ""); }, Client);
280       HTTPRequest Request(ArtifactUrl);
281       Request.Headers = getHeaders();
282       Error Err = Client.perform(Request, Handler);
283       if (Err)
284         return std::move(Err);
285 
286       unsigned Code = Client.responseCode();
287       if (Code && Code != 200)
288         continue;
289     }
290 
291     Expected<CachePruningPolicy> PruningPolicyOrErr =
292         parseCachePruningPolicy(std::getenv("DEBUGINFOD_CACHE_POLICY"));
293     if (!PruningPolicyOrErr)
294       return PruningPolicyOrErr.takeError();
295     pruneCache(CacheDirectoryPath, *PruningPolicyOrErr);
296 
297     // Return the path to the artifact on disk.
298     return std::string(AbsCachedArtifactPath);
299   }
300 
301   return createStringError(errc::argument_out_of_domain, "build id not found");
302 }
303 
304 DebuginfodLogEntry::DebuginfodLogEntry(const Twine &Message)
305     : Message(Message.str()) {}
306 
307 void DebuginfodLog::push(const Twine &Message) {
308   push(DebuginfodLogEntry(Message));
309 }
310 
311 void DebuginfodLog::push(DebuginfodLogEntry Entry) {
312   {
313     std::lock_guard<std::mutex> Guard(QueueMutex);
314     LogEntryQueue.push(Entry);
315   }
316   QueueCondition.notify_one();
317 }
318 
319 DebuginfodLogEntry DebuginfodLog::pop() {
320   {
321     std::unique_lock<std::mutex> Guard(QueueMutex);
322     // Wait for messages to be pushed into the queue.
323     QueueCondition.wait(Guard, [&] { return !LogEntryQueue.empty(); });
324   }
325   std::lock_guard<std::mutex> Guard(QueueMutex);
326   if (!LogEntryQueue.size())
327     llvm_unreachable("Expected message in the queue.");
328 
329   DebuginfodLogEntry Entry = LogEntryQueue.front();
330   LogEntryQueue.pop();
331   return Entry;
332 }
333 
334 DebuginfodCollection::DebuginfodCollection(ArrayRef<StringRef> PathsRef,
335                                            DebuginfodLog &Log, ThreadPool &Pool,
336                                            double MinInterval)
337     : Log(Log), Pool(Pool), MinInterval(MinInterval) {
338   for (StringRef Path : PathsRef)
339     Paths.push_back(Path.str());
340 }
341 
342 Error DebuginfodCollection::update() {
343   std::lock_guard<sys::Mutex> Guard(UpdateMutex);
344   if (UpdateTimer.isRunning())
345     UpdateTimer.stopTimer();
346   UpdateTimer.clear();
347   for (const std::string &Path : Paths) {
348     Log.push("Updating binaries at path " + Path);
349     if (Error Err = findBinaries(Path))
350       return Err;
351   }
352   Log.push("Updated collection");
353   UpdateTimer.startTimer();
354   return Error::success();
355 }
356 
357 Expected<bool> DebuginfodCollection::updateIfStale() {
358   if (!UpdateTimer.isRunning())
359     return false;
360   UpdateTimer.stopTimer();
361   double Time = UpdateTimer.getTotalTime().getWallTime();
362   UpdateTimer.startTimer();
363   if (Time < MinInterval)
364     return false;
365   if (Error Err = update())
366     return std::move(Err);
367   return true;
368 }
369 
370 Error DebuginfodCollection::updateForever(std::chrono::milliseconds Interval) {
371   while (true) {
372     if (Error Err = update())
373       return Err;
374     std::this_thread::sleep_for(Interval);
375   }
376   llvm_unreachable("updateForever loop should never end");
377 }
378 
379 static bool hasELFMagic(StringRef FilePath) {
380   file_magic Type;
381   std::error_code EC = identify_magic(FilePath, Type);
382   if (EC)
383     return false;
384   switch (Type) {
385   case file_magic::elf:
386   case file_magic::elf_relocatable:
387   case file_magic::elf_executable:
388   case file_magic::elf_shared_object:
389   case file_magic::elf_core:
390     return true;
391   default:
392     return false;
393   }
394 }
395 
396 Error DebuginfodCollection::findBinaries(StringRef Path) {
397   std::error_code EC;
398   sys::fs::recursive_directory_iterator I(Twine(Path), EC), E;
399   std::mutex IteratorMutex;
400   ThreadPoolTaskGroup IteratorGroup(Pool);
401   for (unsigned WorkerIndex = 0; WorkerIndex < Pool.getThreadCount();
402        WorkerIndex++) {
403     IteratorGroup.async([&, this]() -> void {
404       std::string FilePath;
405       while (true) {
406         {
407           // Check if iteration is over or there is an error during iteration
408           std::lock_guard<std::mutex> Guard(IteratorMutex);
409           if (I == E || EC)
410             return;
411           // Grab a file path from the directory iterator and advance the
412           // iterator.
413           FilePath = I->path();
414           I.increment(EC);
415         }
416 
417         // Inspect the file at this path to determine if it is debuginfo.
418         if (!hasELFMagic(FilePath))
419           continue;
420 
421         Expected<object::OwningBinary<object::Binary>> BinOrErr =
422             object::createBinary(FilePath);
423 
424         if (!BinOrErr) {
425           consumeError(BinOrErr.takeError());
426           continue;
427         }
428         object::Binary *Bin = std::move(BinOrErr.get().getBinary());
429         if (!Bin->isObject())
430           continue;
431 
432         // TODO: Support non-ELF binaries
433         object::ELFObjectFileBase *Object =
434             dyn_cast<object::ELFObjectFileBase>(Bin);
435         if (!Object)
436           continue;
437 
438         BuildIDRef ID = getBuildID(Object);
439         if (ID.empty())
440           continue;
441 
442         std::string IDString = buildIDToString(ID);
443         if (Object->hasDebugInfo()) {
444           std::lock_guard<sys::RWMutex> DebugBinariesGuard(DebugBinariesMutex);
445           (void)DebugBinaries.try_emplace(IDString, std::move(FilePath));
446         } else {
447           std::lock_guard<sys::RWMutex> BinariesGuard(BinariesMutex);
448           (void)Binaries.try_emplace(IDString, std::move(FilePath));
449         }
450       }
451     });
452   }
453   IteratorGroup.wait();
454   std::unique_lock<std::mutex> Guard(IteratorMutex);
455   if (EC)
456     return errorCodeToError(EC);
457   return Error::success();
458 }
459 
460 Expected<std::optional<std::string>>
461 DebuginfodCollection::getBinaryPath(BuildIDRef ID) {
462   Log.push("getting binary path of ID " + buildIDToString(ID));
463   std::shared_lock<sys::RWMutex> Guard(BinariesMutex);
464   auto Loc = Binaries.find(buildIDToString(ID));
465   if (Loc != Binaries.end()) {
466     std::string Path = Loc->getValue();
467     return Path;
468   }
469   return std::nullopt;
470 }
471 
472 Expected<std::optional<std::string>>
473 DebuginfodCollection::getDebugBinaryPath(BuildIDRef ID) {
474   Log.push("getting debug binary path of ID " + buildIDToString(ID));
475   std::shared_lock<sys::RWMutex> Guard(DebugBinariesMutex);
476   auto Loc = DebugBinaries.find(buildIDToString(ID));
477   if (Loc != DebugBinaries.end()) {
478     std::string Path = Loc->getValue();
479     return Path;
480   }
481   return std::nullopt;
482 }
483 
484 Expected<std::string> DebuginfodCollection::findBinaryPath(BuildIDRef ID) {
485   {
486     // Check collection; perform on-demand update if stale.
487     Expected<std::optional<std::string>> PathOrErr = getBinaryPath(ID);
488     if (!PathOrErr)
489       return PathOrErr.takeError();
490     std::optional<std::string> Path = *PathOrErr;
491     if (!Path) {
492       Expected<bool> UpdatedOrErr = updateIfStale();
493       if (!UpdatedOrErr)
494         return UpdatedOrErr.takeError();
495       if (*UpdatedOrErr) {
496         // Try once more.
497         PathOrErr = getBinaryPath(ID);
498         if (!PathOrErr)
499           return PathOrErr.takeError();
500         Path = *PathOrErr;
501       }
502     }
503     if (Path)
504       return *Path;
505   }
506 
507   // Try federation.
508   Expected<std::string> PathOrErr = getCachedOrDownloadExecutable(ID);
509   if (!PathOrErr)
510     consumeError(PathOrErr.takeError());
511 
512   // Fall back to debug binary.
513   return findDebugBinaryPath(ID);
514 }
515 
516 Expected<std::string> DebuginfodCollection::findDebugBinaryPath(BuildIDRef ID) {
517   // Check collection; perform on-demand update if stale.
518   Expected<std::optional<std::string>> PathOrErr = getDebugBinaryPath(ID);
519   if (!PathOrErr)
520     return PathOrErr.takeError();
521   std::optional<std::string> Path = *PathOrErr;
522   if (!Path) {
523     Expected<bool> UpdatedOrErr = updateIfStale();
524     if (!UpdatedOrErr)
525       return UpdatedOrErr.takeError();
526     if (*UpdatedOrErr) {
527       // Try once more.
528       PathOrErr = getBinaryPath(ID);
529       if (!PathOrErr)
530         return PathOrErr.takeError();
531       Path = *PathOrErr;
532     }
533   }
534   if (Path)
535     return *Path;
536 
537   // Try federation.
538   return getCachedOrDownloadDebuginfo(ID);
539 }
540 
541 DebuginfodServer::DebuginfodServer(DebuginfodLog &Log,
542                                    DebuginfodCollection &Collection)
543     : Log(Log), Collection(Collection) {
544   cantFail(
545       Server.get(R"(/buildid/(.*)/debuginfo)", [&](HTTPServerRequest Request) {
546         Log.push("GET " + Request.UrlPath);
547         std::string IDString;
548         if (!tryGetFromHex(Request.UrlPathMatches[0], IDString)) {
549           Request.setResponse(
550               {404, "text/plain", "Build ID is not a hex string\n"});
551           return;
552         }
553         object::BuildID ID(IDString.begin(), IDString.end());
554         Expected<std::string> PathOrErr = Collection.findDebugBinaryPath(ID);
555         if (Error Err = PathOrErr.takeError()) {
556           consumeError(std::move(Err));
557           Request.setResponse({404, "text/plain", "Build ID not found\n"});
558           return;
559         }
560         streamFile(Request, *PathOrErr);
561       }));
562   cantFail(
563       Server.get(R"(/buildid/(.*)/executable)", [&](HTTPServerRequest Request) {
564         Log.push("GET " + Request.UrlPath);
565         std::string IDString;
566         if (!tryGetFromHex(Request.UrlPathMatches[0], IDString)) {
567           Request.setResponse(
568               {404, "text/plain", "Build ID is not a hex string\n"});
569           return;
570         }
571         object::BuildID ID(IDString.begin(), IDString.end());
572         Expected<std::string> PathOrErr = Collection.findBinaryPath(ID);
573         if (Error Err = PathOrErr.takeError()) {
574           consumeError(std::move(Err));
575           Request.setResponse({404, "text/plain", "Build ID not found\n"});
576           return;
577         }
578         streamFile(Request, *PathOrErr);
579       }));
580 }
581 
582 } // namespace llvm
583