xref: /llvm-project/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp (revision 0d1d1b363d9588c192152cec4f256f3edfea7e48)
1 //===-- LLVMSymbolize.cpp -------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Implementation for LLVM symbolization library.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/DebugInfo/Symbolize/Symbolize.h"
14 
15 #include "llvm/ADT/STLExtras.h"
16 #include "llvm/DebugInfo/BTF/BTFContext.h"
17 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
18 #include "llvm/DebugInfo/PDB/PDB.h"
19 #include "llvm/DebugInfo/PDB/PDBContext.h"
20 #include "llvm/DebugInfo/Symbolize/SymbolizableObjectFile.h"
21 #include "llvm/Demangle/Demangle.h"
22 #include "llvm/Object/BuildID.h"
23 #include "llvm/Object/COFF.h"
24 #include "llvm/Object/ELFObjectFile.h"
25 #include "llvm/Object/MachO.h"
26 #include "llvm/Object/MachOUniversal.h"
27 #include "llvm/Support/CRC.h"
28 #include "llvm/Support/Casting.h"
29 #include "llvm/Support/DataExtractor.h"
30 #include "llvm/Support/Errc.h"
31 #include "llvm/Support/FileSystem.h"
32 #include "llvm/Support/MemoryBuffer.h"
33 #include "llvm/Support/Path.h"
34 #include <cassert>
35 #include <cstring>
36 
37 namespace llvm {
38 namespace codeview {
39 union DebugInfo;
40 }
41 namespace symbolize {
42 
43 LLVMSymbolizer::LLVMSymbolizer() = default;
44 
45 LLVMSymbolizer::LLVMSymbolizer(const Options &Opts)
46     : Opts(Opts),
47       BIDFetcher(std::make_unique<BuildIDFetcher>(Opts.DebugFileDirectory)) {}
48 
49 LLVMSymbolizer::~LLVMSymbolizer() = default;
50 
51 template <typename T>
52 Expected<DILineInfo>
53 LLVMSymbolizer::symbolizeCodeCommon(const T &ModuleSpecifier,
54                                     object::SectionedAddress ModuleOffset) {
55 
56   auto InfoOrErr = getOrCreateModuleInfo(ModuleSpecifier);
57   if (!InfoOrErr)
58     return InfoOrErr.takeError();
59 
60   SymbolizableModule *Info = *InfoOrErr;
61 
62   // A null module means an error has already been reported. Return an empty
63   // result.
64   if (!Info)
65     return DILineInfo();
66 
67   // If the user is giving us relative addresses, add the preferred base of the
68   // object to the offset before we do the query. It's what DIContext expects.
69   if (Opts.RelativeAddresses)
70     ModuleOffset.Address += Info->getModulePreferredBase();
71 
72   DILineInfo LineInfo = Info->symbolizeCode(
73       ModuleOffset,
74       DILineInfoSpecifier(Opts.PathStyle, Opts.PrintFunctions,
75                           Opts.SkipLineZero),
76       Opts.UseSymbolTable);
77   if (Opts.Demangle)
78     LineInfo.FunctionName = DemangleName(LineInfo.FunctionName, Info);
79   return LineInfo;
80 }
81 
82 Expected<DILineInfo>
83 LLVMSymbolizer::symbolizeCode(const ObjectFile &Obj,
84                               object::SectionedAddress ModuleOffset) {
85   return symbolizeCodeCommon(Obj, ModuleOffset);
86 }
87 
88 Expected<DILineInfo>
89 LLVMSymbolizer::symbolizeCode(StringRef ModuleName,
90                               object::SectionedAddress ModuleOffset) {
91   return symbolizeCodeCommon(ModuleName, ModuleOffset);
92 }
93 
94 Expected<DILineInfo>
95 LLVMSymbolizer::symbolizeCode(ArrayRef<uint8_t> BuildID,
96                               object::SectionedAddress ModuleOffset) {
97   return symbolizeCodeCommon(BuildID, ModuleOffset);
98 }
99 
100 template <typename T>
101 Expected<DIInliningInfo> LLVMSymbolizer::symbolizeInlinedCodeCommon(
102     const T &ModuleSpecifier, object::SectionedAddress ModuleOffset) {
103   auto InfoOrErr = getOrCreateModuleInfo(ModuleSpecifier);
104   if (!InfoOrErr)
105     return InfoOrErr.takeError();
106 
107   SymbolizableModule *Info = *InfoOrErr;
108 
109   // A null module means an error has already been reported. Return an empty
110   // result.
111   if (!Info)
112     return DIInliningInfo();
113 
114   // If the user is giving us relative addresses, add the preferred base of the
115   // object to the offset before we do the query. It's what DIContext expects.
116   if (Opts.RelativeAddresses)
117     ModuleOffset.Address += Info->getModulePreferredBase();
118 
119   DIInliningInfo InlinedContext = Info->symbolizeInlinedCode(
120       ModuleOffset,
121       DILineInfoSpecifier(Opts.PathStyle, Opts.PrintFunctions,
122                           Opts.SkipLineZero),
123       Opts.UseSymbolTable);
124   if (Opts.Demangle) {
125     for (int i = 0, n = InlinedContext.getNumberOfFrames(); i < n; i++) {
126       auto *Frame = InlinedContext.getMutableFrame(i);
127       Frame->FunctionName = DemangleName(Frame->FunctionName, Info);
128     }
129   }
130   return InlinedContext;
131 }
132 
133 Expected<DIInliningInfo>
134 LLVMSymbolizer::symbolizeInlinedCode(const ObjectFile &Obj,
135                                      object::SectionedAddress ModuleOffset) {
136   return symbolizeInlinedCodeCommon(Obj, ModuleOffset);
137 }
138 
139 Expected<DIInliningInfo>
140 LLVMSymbolizer::symbolizeInlinedCode(StringRef ModuleName,
141                                      object::SectionedAddress ModuleOffset) {
142   return symbolizeInlinedCodeCommon(ModuleName, ModuleOffset);
143 }
144 
145 Expected<DIInliningInfo>
146 LLVMSymbolizer::symbolizeInlinedCode(ArrayRef<uint8_t> BuildID,
147                                      object::SectionedAddress ModuleOffset) {
148   return symbolizeInlinedCodeCommon(BuildID, ModuleOffset);
149 }
150 
151 template <typename T>
152 Expected<DIGlobal>
153 LLVMSymbolizer::symbolizeDataCommon(const T &ModuleSpecifier,
154                                     object::SectionedAddress ModuleOffset) {
155 
156   auto InfoOrErr = getOrCreateModuleInfo(ModuleSpecifier);
157   if (!InfoOrErr)
158     return InfoOrErr.takeError();
159 
160   SymbolizableModule *Info = *InfoOrErr;
161   // A null module means an error has already been reported. Return an empty
162   // result.
163   if (!Info)
164     return DIGlobal();
165 
166   // If the user is giving us relative addresses, add the preferred base of
167   // the object to the offset before we do the query. It's what DIContext
168   // expects.
169   if (Opts.RelativeAddresses)
170     ModuleOffset.Address += Info->getModulePreferredBase();
171 
172   DIGlobal Global = Info->symbolizeData(ModuleOffset);
173   if (Opts.Demangle)
174     Global.Name = DemangleName(Global.Name, Info);
175   return Global;
176 }
177 
178 Expected<DIGlobal>
179 LLVMSymbolizer::symbolizeData(const ObjectFile &Obj,
180                               object::SectionedAddress ModuleOffset) {
181   return symbolizeDataCommon(Obj, ModuleOffset);
182 }
183 
184 Expected<DIGlobal>
185 LLVMSymbolizer::symbolizeData(StringRef ModuleName,
186                               object::SectionedAddress ModuleOffset) {
187   return symbolizeDataCommon(ModuleName, ModuleOffset);
188 }
189 
190 Expected<DIGlobal>
191 LLVMSymbolizer::symbolizeData(ArrayRef<uint8_t> BuildID,
192                               object::SectionedAddress ModuleOffset) {
193   return symbolizeDataCommon(BuildID, ModuleOffset);
194 }
195 
196 template <typename T>
197 Expected<std::vector<DILocal>>
198 LLVMSymbolizer::symbolizeFrameCommon(const T &ModuleSpecifier,
199                                      object::SectionedAddress ModuleOffset) {
200   auto InfoOrErr = getOrCreateModuleInfo(ModuleSpecifier);
201   if (!InfoOrErr)
202     return InfoOrErr.takeError();
203 
204   SymbolizableModule *Info = *InfoOrErr;
205   // A null module means an error has already been reported. Return an empty
206   // result.
207   if (!Info)
208     return std::vector<DILocal>();
209 
210   // If the user is giving us relative addresses, add the preferred base of
211   // the object to the offset before we do the query. It's what DIContext
212   // expects.
213   if (Opts.RelativeAddresses)
214     ModuleOffset.Address += Info->getModulePreferredBase();
215 
216   return Info->symbolizeFrame(ModuleOffset);
217 }
218 
219 Expected<std::vector<DILocal>>
220 LLVMSymbolizer::symbolizeFrame(const ObjectFile &Obj,
221                                object::SectionedAddress ModuleOffset) {
222   return symbolizeFrameCommon(Obj, ModuleOffset);
223 }
224 
225 Expected<std::vector<DILocal>>
226 LLVMSymbolizer::symbolizeFrame(StringRef ModuleName,
227                                object::SectionedAddress ModuleOffset) {
228   return symbolizeFrameCommon(ModuleName, ModuleOffset);
229 }
230 
231 Expected<std::vector<DILocal>>
232 LLVMSymbolizer::symbolizeFrame(ArrayRef<uint8_t> BuildID,
233                                object::SectionedAddress ModuleOffset) {
234   return symbolizeFrameCommon(BuildID, ModuleOffset);
235 }
236 
237 template <typename T>
238 Expected<std::vector<DILineInfo>>
239 LLVMSymbolizer::findSymbolCommon(const T &ModuleSpecifier, StringRef Symbol,
240                                  uint64_t Offset) {
241   auto InfoOrErr = getOrCreateModuleInfo(ModuleSpecifier);
242   if (!InfoOrErr)
243     return InfoOrErr.takeError();
244 
245   SymbolizableModule *Info = *InfoOrErr;
246   std::vector<DILineInfo> Result;
247 
248   // A null module means an error has already been reported. Return an empty
249   // result.
250   if (!Info)
251     return Result;
252 
253   for (object::SectionedAddress A : Info->findSymbol(Symbol, Offset)) {
254     DILineInfo LineInfo = Info->symbolizeCode(
255         A, DILineInfoSpecifier(Opts.PathStyle, Opts.PrintFunctions),
256         Opts.UseSymbolTable);
257     if (LineInfo.FileName != DILineInfo::BadString) {
258       if (Opts.Demangle)
259         LineInfo.FunctionName = DemangleName(LineInfo.FunctionName, Info);
260       Result.push_back(LineInfo);
261     }
262   }
263 
264   return Result;
265 }
266 
267 Expected<std::vector<DILineInfo>>
268 LLVMSymbolizer::findSymbol(const ObjectFile &Obj, StringRef Symbol,
269                            uint64_t Offset) {
270   return findSymbolCommon(Obj, Symbol, Offset);
271 }
272 
273 Expected<std::vector<DILineInfo>>
274 LLVMSymbolizer::findSymbol(StringRef ModuleName, StringRef Symbol,
275                            uint64_t Offset) {
276   return findSymbolCommon(ModuleName, Symbol, Offset);
277 }
278 
279 Expected<std::vector<DILineInfo>>
280 LLVMSymbolizer::findSymbol(ArrayRef<uint8_t> BuildID, StringRef Symbol,
281                            uint64_t Offset) {
282   return findSymbolCommon(BuildID, Symbol, Offset);
283 }
284 
285 void LLVMSymbolizer::flush() {
286   ObjectForUBPathAndArch.clear();
287   LRUBinaries.clear();
288   CacheSize = 0;
289   BinaryForPath.clear();
290   ObjectPairForPathArch.clear();
291   Modules.clear();
292   BuildIDPaths.clear();
293 }
294 
295 namespace {
296 
297 // For Path="/path/to/foo" and Basename="foo" assume that debug info is in
298 // /path/to/foo.dSYM/Contents/Resources/DWARF/foo.
299 // For Path="/path/to/bar.dSYM" and Basename="foo" assume that debug info is in
300 // /path/to/bar.dSYM/Contents/Resources/DWARF/foo.
301 std::string getDarwinDWARFResourceForPath(const std::string &Path,
302                                           const std::string &Basename) {
303   SmallString<16> ResourceName = StringRef(Path);
304   if (sys::path::extension(Path) != ".dSYM") {
305     ResourceName += ".dSYM";
306   }
307   sys::path::append(ResourceName, "Contents", "Resources", "DWARF");
308   sys::path::append(ResourceName, Basename);
309   return std::string(ResourceName);
310 }
311 
312 bool checkFileCRC(StringRef Path, uint32_t CRCHash) {
313   ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
314       MemoryBuffer::getFileOrSTDIN(Path);
315   if (!MB)
316     return false;
317   return CRCHash == llvm::crc32(arrayRefFromStringRef(MB.get()->getBuffer()));
318 }
319 
320 bool getGNUDebuglinkContents(const ObjectFile *Obj, std::string &DebugName,
321                              uint32_t &CRCHash) {
322   if (!Obj)
323     return false;
324   for (const SectionRef &Section : Obj->sections()) {
325     StringRef Name;
326     consumeError(Section.getName().moveInto(Name));
327 
328     Name = Name.substr(Name.find_first_not_of("._"));
329     if (Name == "gnu_debuglink") {
330       Expected<StringRef> ContentsOrErr = Section.getContents();
331       if (!ContentsOrErr) {
332         consumeError(ContentsOrErr.takeError());
333         return false;
334       }
335       DataExtractor DE(*ContentsOrErr, Obj->isLittleEndian(), 0);
336       uint64_t Offset = 0;
337       if (const char *DebugNameStr = DE.getCStr(&Offset)) {
338         // 4-byte align the offset.
339         Offset = (Offset + 3) & ~0x3;
340         if (DE.isValidOffsetForDataOfSize(Offset, 4)) {
341           DebugName = DebugNameStr;
342           CRCHash = DE.getU32(&Offset);
343           return true;
344         }
345       }
346       break;
347     }
348   }
349   return false;
350 }
351 
352 bool darwinDsymMatchesBinary(const MachOObjectFile *DbgObj,
353                              const MachOObjectFile *Obj) {
354   ArrayRef<uint8_t> dbg_uuid = DbgObj->getUuid();
355   ArrayRef<uint8_t> bin_uuid = Obj->getUuid();
356   if (dbg_uuid.empty() || bin_uuid.empty())
357     return false;
358   return !memcmp(dbg_uuid.data(), bin_uuid.data(), dbg_uuid.size());
359 }
360 
361 } // end anonymous namespace
362 
363 ObjectFile *LLVMSymbolizer::lookUpDsymFile(const std::string &ExePath,
364                                            const MachOObjectFile *MachExeObj,
365                                            const std::string &ArchName) {
366   // On Darwin we may find DWARF in separate object file in
367   // resource directory.
368   std::vector<std::string> DsymPaths;
369   StringRef Filename = sys::path::filename(ExePath);
370   DsymPaths.push_back(
371       getDarwinDWARFResourceForPath(ExePath, std::string(Filename)));
372   for (const auto &Path : Opts.DsymHints) {
373     DsymPaths.push_back(
374         getDarwinDWARFResourceForPath(Path, std::string(Filename)));
375   }
376   for (const auto &Path : DsymPaths) {
377     auto DbgObjOrErr = getOrCreateObject(Path, ArchName);
378     if (!DbgObjOrErr) {
379       // Ignore errors, the file might not exist.
380       consumeError(DbgObjOrErr.takeError());
381       continue;
382     }
383     ObjectFile *DbgObj = DbgObjOrErr.get();
384     if (!DbgObj)
385       continue;
386     const MachOObjectFile *MachDbgObj = dyn_cast<const MachOObjectFile>(DbgObj);
387     if (!MachDbgObj)
388       continue;
389     if (darwinDsymMatchesBinary(MachDbgObj, MachExeObj))
390       return DbgObj;
391   }
392   return nullptr;
393 }
394 
395 ObjectFile *LLVMSymbolizer::lookUpDebuglinkObject(const std::string &Path,
396                                                   const ObjectFile *Obj,
397                                                   const std::string &ArchName) {
398   std::string DebuglinkName;
399   uint32_t CRCHash;
400   std::string DebugBinaryPath;
401   if (!getGNUDebuglinkContents(Obj, DebuglinkName, CRCHash))
402     return nullptr;
403   if (!findDebugBinary(Path, DebuglinkName, CRCHash, DebugBinaryPath))
404     return nullptr;
405   auto DbgObjOrErr = getOrCreateObject(DebugBinaryPath, ArchName);
406   if (!DbgObjOrErr) {
407     // Ignore errors, the file might not exist.
408     consumeError(DbgObjOrErr.takeError());
409     return nullptr;
410   }
411   return DbgObjOrErr.get();
412 }
413 
414 ObjectFile *LLVMSymbolizer::lookUpBuildIDObject(const std::string &Path,
415                                                 const ELFObjectFileBase *Obj,
416                                                 const std::string &ArchName) {
417   auto BuildID = getBuildID(Obj);
418   if (BuildID.size() < 2)
419     return nullptr;
420   std::string DebugBinaryPath;
421   if (!getOrFindDebugBinary(BuildID, DebugBinaryPath))
422     return nullptr;
423   auto DbgObjOrErr = getOrCreateObject(DebugBinaryPath, ArchName);
424   if (!DbgObjOrErr) {
425     consumeError(DbgObjOrErr.takeError());
426     return nullptr;
427   }
428   return DbgObjOrErr.get();
429 }
430 
431 bool LLVMSymbolizer::findDebugBinary(const std::string &OrigPath,
432                                      const std::string &DebuglinkName,
433                                      uint32_t CRCHash, std::string &Result) {
434   SmallString<16> OrigDir(OrigPath);
435   llvm::sys::path::remove_filename(OrigDir);
436   SmallString<16> DebugPath = OrigDir;
437   // Try relative/path/to/original_binary/debuglink_name
438   llvm::sys::path::append(DebugPath, DebuglinkName);
439   if (checkFileCRC(DebugPath, CRCHash)) {
440     Result = std::string(DebugPath);
441     return true;
442   }
443   // Try relative/path/to/original_binary/.debug/debuglink_name
444   DebugPath = OrigDir;
445   llvm::sys::path::append(DebugPath, ".debug", DebuglinkName);
446   if (checkFileCRC(DebugPath, CRCHash)) {
447     Result = std::string(DebugPath);
448     return true;
449   }
450   // Make the path absolute so that lookups will go to
451   // "/usr/lib/debug/full/path/to/debug", not
452   // "/usr/lib/debug/to/debug"
453   llvm::sys::fs::make_absolute(OrigDir);
454   if (!Opts.FallbackDebugPath.empty()) {
455     // Try <FallbackDebugPath>/absolute/path/to/original_binary/debuglink_name
456     DebugPath = Opts.FallbackDebugPath;
457   } else {
458 #if defined(__NetBSD__)
459     // Try /usr/libdata/debug/absolute/path/to/original_binary/debuglink_name
460     DebugPath = "/usr/libdata/debug";
461 #else
462     // Try /usr/lib/debug/absolute/path/to/original_binary/debuglink_name
463     DebugPath = "/usr/lib/debug";
464 #endif
465   }
466   llvm::sys::path::append(DebugPath, llvm::sys::path::relative_path(OrigDir),
467                           DebuglinkName);
468   if (checkFileCRC(DebugPath, CRCHash)) {
469     Result = std::string(DebugPath);
470     return true;
471   }
472   return false;
473 }
474 
475 static StringRef getBuildIDStr(ArrayRef<uint8_t> BuildID) {
476   return StringRef(reinterpret_cast<const char *>(BuildID.data()),
477                    BuildID.size());
478 }
479 
480 bool LLVMSymbolizer::getOrFindDebugBinary(const ArrayRef<uint8_t> BuildID,
481                                           std::string &Result) {
482   StringRef BuildIDStr = getBuildIDStr(BuildID);
483   auto I = BuildIDPaths.find(BuildIDStr);
484   if (I != BuildIDPaths.end()) {
485     Result = I->second;
486     return true;
487   }
488   if (!BIDFetcher)
489     return false;
490   if (std::optional<std::string> Path = BIDFetcher->fetch(BuildID)) {
491     Result = *Path;
492     auto InsertResult = BuildIDPaths.insert({BuildIDStr, Result});
493     assert(InsertResult.second);
494     (void)InsertResult;
495     return true;
496   }
497 
498   return false;
499 }
500 
501 Expected<LLVMSymbolizer::ObjectPair>
502 LLVMSymbolizer::getOrCreateObjectPair(const std::string &Path,
503                                       const std::string &ArchName) {
504   auto I = ObjectPairForPathArch.find(std::make_pair(Path, ArchName));
505   if (I != ObjectPairForPathArch.end()) {
506     recordAccess(BinaryForPath.find(Path)->second);
507     return I->second;
508   }
509 
510   auto ObjOrErr = getOrCreateObject(Path, ArchName);
511   if (!ObjOrErr) {
512     ObjectPairForPathArch.emplace(std::make_pair(Path, ArchName),
513                                   ObjectPair(nullptr, nullptr));
514     return ObjOrErr.takeError();
515   }
516 
517   ObjectFile *Obj = ObjOrErr.get();
518   assert(Obj != nullptr);
519   ObjectFile *DbgObj = nullptr;
520 
521   if (auto MachObj = dyn_cast<const MachOObjectFile>(Obj))
522     DbgObj = lookUpDsymFile(Path, MachObj, ArchName);
523   else if (auto ELFObj = dyn_cast<const ELFObjectFileBase>(Obj))
524     DbgObj = lookUpBuildIDObject(Path, ELFObj, ArchName);
525   if (!DbgObj)
526     DbgObj = lookUpDebuglinkObject(Path, Obj, ArchName);
527   if (!DbgObj)
528     DbgObj = Obj;
529   ObjectPair Res = std::make_pair(Obj, DbgObj);
530   std::string DbgObjPath = DbgObj->getFileName().str();
531   auto Pair =
532       ObjectPairForPathArch.emplace(std::make_pair(Path, ArchName), Res);
533   BinaryForPath.find(DbgObjPath)->second.pushEvictor([this, I = Pair.first]() {
534     ObjectPairForPathArch.erase(I);
535   });
536   return Res;
537 }
538 
539 Expected<ObjectFile *>
540 LLVMSymbolizer::getOrCreateObject(const std::string &Path,
541                                   const std::string &ArchName) {
542   Binary *Bin;
543   auto Pair = BinaryForPath.emplace(Path, OwningBinary<Binary>());
544   if (!Pair.second) {
545     Bin = Pair.first->second->getBinary();
546     recordAccess(Pair.first->second);
547   } else {
548     Expected<OwningBinary<Binary>> BinOrErr = createBinary(Path);
549     if (!BinOrErr)
550       return BinOrErr.takeError();
551 
552     CachedBinary &CachedBin = Pair.first->second;
553     CachedBin = std::move(BinOrErr.get());
554     CachedBin.pushEvictor([this, I = Pair.first]() { BinaryForPath.erase(I); });
555     LRUBinaries.push_back(CachedBin);
556     CacheSize += CachedBin.size();
557     Bin = CachedBin->getBinary();
558   }
559 
560   if (!Bin)
561     return static_cast<ObjectFile *>(nullptr);
562 
563   if (MachOUniversalBinary *UB = dyn_cast_or_null<MachOUniversalBinary>(Bin)) {
564     auto I = ObjectForUBPathAndArch.find(std::make_pair(Path, ArchName));
565     if (I != ObjectForUBPathAndArch.end())
566       return I->second.get();
567 
568     Expected<std::unique_ptr<ObjectFile>> ObjOrErr =
569         UB->getMachOObjectForArch(ArchName);
570     if (!ObjOrErr) {
571       ObjectForUBPathAndArch.emplace(std::make_pair(Path, ArchName),
572                                      std::unique_ptr<ObjectFile>());
573       return ObjOrErr.takeError();
574     }
575     ObjectFile *Res = ObjOrErr->get();
576     auto Pair = ObjectForUBPathAndArch.emplace(std::make_pair(Path, ArchName),
577                                                std::move(ObjOrErr.get()));
578     BinaryForPath.find(Path)->second.pushEvictor(
579         [this, Iter = Pair.first]() { ObjectForUBPathAndArch.erase(Iter); });
580     return Res;
581   }
582   if (Bin->isObject()) {
583     return cast<ObjectFile>(Bin);
584   }
585   return errorCodeToError(object_error::arch_not_found);
586 }
587 
588 Expected<SymbolizableModule *>
589 LLVMSymbolizer::createModuleInfo(const ObjectFile *Obj,
590                                  std::unique_ptr<DIContext> Context,
591                                  StringRef ModuleName) {
592   auto InfoOrErr = SymbolizableObjectFile::create(Obj, std::move(Context),
593                                                   Opts.UntagAddresses);
594   std::unique_ptr<SymbolizableModule> SymMod;
595   if (InfoOrErr)
596     SymMod = std::move(*InfoOrErr);
597   auto InsertResult = Modules.insert(
598       std::make_pair(std::string(ModuleName), std::move(SymMod)));
599   assert(InsertResult.second);
600   if (!InfoOrErr)
601     return InfoOrErr.takeError();
602   return InsertResult.first->second.get();
603 }
604 
605 Expected<SymbolizableModule *>
606 LLVMSymbolizer::getOrCreateModuleInfo(StringRef ModuleName) {
607   StringRef BinaryName = ModuleName;
608   StringRef ArchName = Opts.DefaultArch;
609   size_t ColonPos = ModuleName.find_last_of(':');
610   // Verify that substring after colon form a valid arch name.
611   if (ColonPos != std::string::npos) {
612     StringRef ArchStr = ModuleName.substr(ColonPos + 1);
613     if (Triple(ArchStr).getArch() != Triple::UnknownArch) {
614       BinaryName = ModuleName.substr(0, ColonPos);
615       ArchName = ArchStr;
616     }
617   }
618 
619   auto I = Modules.find(ModuleName);
620   if (I != Modules.end()) {
621     recordAccess(BinaryForPath.find(BinaryName)->second);
622     return I->second.get();
623   }
624 
625   auto ObjectsOrErr =
626       getOrCreateObjectPair(std::string{BinaryName}, std::string{ArchName});
627   if (!ObjectsOrErr) {
628     // Failed to find valid object file.
629     Modules.emplace(ModuleName, std::unique_ptr<SymbolizableModule>());
630     return ObjectsOrErr.takeError();
631   }
632   ObjectPair Objects = ObjectsOrErr.get();
633 
634   std::unique_ptr<DIContext> Context;
635   // If this is a COFF object containing PDB info and not containing DWARF
636   // section, use a PDBContext to symbolize. Otherwise, use DWARF.
637   if (auto CoffObject = dyn_cast<COFFObjectFile>(Objects.first)) {
638     const codeview::DebugInfo *DebugInfo;
639     StringRef PDBFileName;
640     auto EC = CoffObject->getDebugPDBInfo(DebugInfo, PDBFileName);
641     // Use DWARF if there're DWARF sections.
642     bool HasDwarf =
643         llvm::any_of(Objects.first->sections(), [](SectionRef Section) -> bool {
644           if (Expected<StringRef> SectionName = Section.getName())
645             return SectionName.get() == ".debug_info";
646           return false;
647         });
648     if (!EC && !HasDwarf && DebugInfo != nullptr && !PDBFileName.empty()) {
649       using namespace pdb;
650       std::unique_ptr<IPDBSession> Session;
651 
652       PDB_ReaderType ReaderType =
653           Opts.UseDIA ? PDB_ReaderType::DIA : PDB_ReaderType::Native;
654       if (auto Err = loadDataForEXE(ReaderType, Objects.first->getFileName(),
655                                     Session)) {
656         Modules.emplace(ModuleName, std::unique_ptr<SymbolizableModule>());
657         // Return along the PDB filename to provide more context
658         return createFileError(PDBFileName, std::move(Err));
659       }
660       Context.reset(new PDBContext(*CoffObject, std::move(Session)));
661     }
662   }
663   if (!Context)
664     Context = DWARFContext::create(
665         *Objects.second, DWARFContext::ProcessDebugRelocations::Process,
666         nullptr, Opts.DWPName);
667   auto ModuleOrErr =
668       createModuleInfo(Objects.first, std::move(Context), ModuleName);
669   if (ModuleOrErr) {
670     auto I = Modules.find(ModuleName);
671     BinaryForPath.find(BinaryName)->second.pushEvictor([this, I]() {
672       Modules.erase(I);
673     });
674   }
675   return ModuleOrErr;
676 }
677 
678 // For BPF programs .BTF.ext section contains line numbers information,
679 // use it if regular DWARF is not available (e.g. for stripped binary).
680 static bool useBTFContext(const ObjectFile &Obj) {
681   return Obj.makeTriple().isBPF() && !Obj.hasDebugInfo() &&
682          BTFParser::hasBTFSections(Obj);
683 }
684 
685 Expected<SymbolizableModule *>
686 LLVMSymbolizer::getOrCreateModuleInfo(const ObjectFile &Obj) {
687   StringRef ObjName = Obj.getFileName();
688   auto I = Modules.find(ObjName);
689   if (I != Modules.end())
690     return I->second.get();
691 
692   std::unique_ptr<DIContext> Context;
693   if (useBTFContext(Obj))
694     Context = BTFContext::create(Obj);
695   else
696     Context = DWARFContext::create(Obj);
697   // FIXME: handle COFF object with PDB info to use PDBContext
698   return createModuleInfo(&Obj, std::move(Context), ObjName);
699 }
700 
701 Expected<SymbolizableModule *>
702 LLVMSymbolizer::getOrCreateModuleInfo(ArrayRef<uint8_t> BuildID) {
703   std::string Path;
704   if (!getOrFindDebugBinary(BuildID, Path)) {
705     return createStringError(errc::no_such_file_or_directory,
706                              "could not find build ID");
707   }
708   return getOrCreateModuleInfo(Path);
709 }
710 
711 namespace {
712 
713 // Undo these various manglings for Win32 extern "C" functions:
714 // cdecl       - _foo
715 // stdcall     - _foo@12
716 // fastcall    - @foo@12
717 // vectorcall  - foo@@12
718 // These are all different linkage names for 'foo'.
719 StringRef demanglePE32ExternCFunc(StringRef SymbolName) {
720   char Front = SymbolName.empty() ? '\0' : SymbolName[0];
721 
722   // Remove any '@[0-9]+' suffix.
723   bool HasAtNumSuffix = false;
724   if (Front != '?') {
725     size_t AtPos = SymbolName.rfind('@');
726     if (AtPos != StringRef::npos &&
727         all_of(drop_begin(SymbolName, AtPos + 1), isDigit)) {
728       SymbolName = SymbolName.substr(0, AtPos);
729       HasAtNumSuffix = true;
730     }
731   }
732 
733   // Remove any ending '@' for vectorcall.
734   bool IsVectorCall = false;
735   if (HasAtNumSuffix && SymbolName.ends_with("@")) {
736     SymbolName = SymbolName.drop_back();
737     IsVectorCall = true;
738   }
739 
740   // If not vectorcall, remove any '_' or '@' prefix.
741   if (!IsVectorCall && (Front == '_' || Front == '@'))
742     SymbolName = SymbolName.drop_front();
743 
744   return SymbolName;
745 }
746 
747 } // end anonymous namespace
748 
749 std::string
750 LLVMSymbolizer::DemangleName(StringRef Name,
751                              const SymbolizableModule *DbiModuleDescriptor) {
752   std::string Result;
753   if (nonMicrosoftDemangle(Name, Result))
754     return Result;
755 
756   if (!Name.empty() && Name.front() == '?') {
757     // Only do MSVC C++ demangling on symbols starting with '?'.
758     int status = 0;
759     char *DemangledName = microsoftDemangle(
760         Name, nullptr, &status,
761         MSDemangleFlags(MSDF_NoAccessSpecifier | MSDF_NoCallingConvention |
762                         MSDF_NoMemberType | MSDF_NoReturnType));
763     if (status != 0)
764       return std::string{Name};
765     Result = DemangledName;
766     free(DemangledName);
767     return Result;
768   }
769 
770   if (DbiModuleDescriptor && DbiModuleDescriptor->isWin32Module()) {
771     std::string DemangledCName(demanglePE32ExternCFunc(Name));
772     // On i386 Windows, the C name mangling for different calling conventions
773     // may also be applied on top of the Itanium or Rust name mangling.
774     if (nonMicrosoftDemangle(DemangledCName, Result))
775       return Result;
776     return DemangledCName;
777   }
778   return std::string{Name};
779 }
780 
781 void LLVMSymbolizer::recordAccess(CachedBinary &Bin) {
782   if (Bin->getBinary())
783     LRUBinaries.splice(LRUBinaries.end(), LRUBinaries, Bin.getIterator());
784 }
785 
786 void LLVMSymbolizer::pruneCache() {
787   // Evict the LRU binary until the max cache size is reached or there's <= 1
788   // item in the cache. The MRU binary is always kept to avoid thrashing if it's
789   // larger than the cache size.
790   while (CacheSize > Opts.MaxCacheSize && !LRUBinaries.empty() &&
791          std::next(LRUBinaries.begin()) != LRUBinaries.end()) {
792     CachedBinary &Bin = LRUBinaries.front();
793     CacheSize -= Bin.size();
794     LRUBinaries.pop_front();
795     Bin.evict();
796   }
797 }
798 
799 void CachedBinary::pushEvictor(std::function<void()> NewEvictor) {
800   if (Evictor) {
801     this->Evictor = [OldEvictor = std::move(this->Evictor),
802                      NewEvictor = std::move(NewEvictor)]() {
803       NewEvictor();
804       OldEvictor();
805     };
806   } else {
807     this->Evictor = std::move(NewEvictor);
808   }
809 }
810 
811 } // namespace symbolize
812 } // namespace llvm
813