xref: /llvm-project/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.cpp (revision ecf6466f01c52ebd8c86575dfdc03fe08ad1b2e0)
1 //=--------- MachOLinkGraphBuilder.cpp - MachO LinkGraph builder ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Generic MachO LinkGraph buliding code.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "MachOLinkGraphBuilder.h"
14 
15 #define DEBUG_TYPE "jitlink"
16 
17 static const char *CommonSectionName = "__common";
18 
19 namespace llvm {
20 namespace jitlink {
21 
22 MachOLinkGraphBuilder::~MachOLinkGraphBuilder() {}
23 
24 Expected<std::unique_ptr<LinkGraph>> MachOLinkGraphBuilder::buildGraph() {
25 
26   // Sanity check: we only operate on relocatable objects.
27   if (!Obj.isRelocatableObject())
28     return make_error<JITLinkError>("Object is not a relocatable MachO");
29 
30   if (auto Err = createNormalizedSections())
31     return std::move(Err);
32 
33   if (auto Err = createNormalizedSymbols())
34     return std::move(Err);
35 
36   if (auto Err = graphifyRegularSymbols())
37     return std::move(Err);
38 
39   if (auto Err = graphifySectionsWithCustomParsers())
40     return std::move(Err);
41 
42   if (auto Err = addRelocations())
43     return std::move(Err);
44 
45   return std::move(G);
46 }
47 
48 MachOLinkGraphBuilder::MachOLinkGraphBuilder(
49     const object::MachOObjectFile &Obj, Triple TT,
50     LinkGraph::GetEdgeKindNameFunction GetEdgeKindName)
51     : Obj(Obj),
52       G(std::make_unique<LinkGraph>(
53           std::string(Obj.getFileName()), std::move(TT), getPointerSize(Obj),
54           getEndianness(Obj), std::move(GetEdgeKindName))) {}
55 
56 void MachOLinkGraphBuilder::addCustomSectionParser(
57     StringRef SectionName, SectionParserFunction Parser) {
58   assert(!CustomSectionParserFunctions.count(SectionName) &&
59          "Custom parser for this section already exists");
60   CustomSectionParserFunctions[SectionName] = std::move(Parser);
61 }
62 
63 Linkage MachOLinkGraphBuilder::getLinkage(uint16_t Desc) {
64   if ((Desc & MachO::N_WEAK_DEF) || (Desc & MachO::N_WEAK_REF))
65     return Linkage::Weak;
66   return Linkage::Strong;
67 }
68 
69 Scope MachOLinkGraphBuilder::getScope(StringRef Name, uint8_t Type) {
70   if (Type & MachO::N_EXT) {
71     if ((Type & MachO::N_PEXT) || Name.startswith("l"))
72       return Scope::Hidden;
73     else
74       return Scope::Default;
75   }
76   return Scope::Local;
77 }
78 
79 bool MachOLinkGraphBuilder::isAltEntry(const NormalizedSymbol &NSym) {
80   return NSym.Desc & MachO::N_ALT_ENTRY;
81 }
82 
83 bool MachOLinkGraphBuilder::isDebugSection(const NormalizedSection &NSec) {
84   return (NSec.Flags & MachO::S_ATTR_DEBUG &&
85           strcmp(NSec.SegName, "__DWARF") == 0);
86 }
87 
88 unsigned
89 MachOLinkGraphBuilder::getPointerSize(const object::MachOObjectFile &Obj) {
90   return Obj.is64Bit() ? 8 : 4;
91 }
92 
93 support::endianness
94 MachOLinkGraphBuilder::getEndianness(const object::MachOObjectFile &Obj) {
95   return Obj.isLittleEndian() ? support::little : support::big;
96 }
97 
98 Section &MachOLinkGraphBuilder::getCommonSection() {
99   if (!CommonSection) {
100     auto Prot = static_cast<sys::Memory::ProtectionFlags>(
101         sys::Memory::MF_READ | sys::Memory::MF_WRITE);
102     CommonSection = &G->createSection(CommonSectionName, Prot);
103   }
104   return *CommonSection;
105 }
106 
107 Error MachOLinkGraphBuilder::createNormalizedSections() {
108   // Build normalized sections. Verifies that section data is in-range (for
109   // sections with content) and that address ranges are non-overlapping.
110 
111   LLVM_DEBUG(dbgs() << "Creating normalized sections...\n");
112 
113   for (auto &SecRef : Obj.sections()) {
114     NormalizedSection NSec;
115     uint32_t DataOffset = 0;
116 
117     auto SecIndex = Obj.getSectionIndex(SecRef.getRawDataRefImpl());
118 
119     auto Name = SecRef.getName();
120     if (!Name)
121       return Name.takeError();
122 
123     if (Obj.is64Bit()) {
124       const MachO::section_64 &Sec64 =
125           Obj.getSection64(SecRef.getRawDataRefImpl());
126 
127       memcpy(&NSec.SectName, &Sec64.sectname, 16);
128       NSec.SectName[16] = '\0';
129       memcpy(&NSec.SegName, Sec64.segname, 16);
130       NSec.SegName[16] = '\0';
131 
132       NSec.Address = Sec64.addr;
133       NSec.Size = Sec64.size;
134       NSec.Alignment = 1ULL << Sec64.align;
135       NSec.Flags = Sec64.flags;
136       DataOffset = Sec64.offset;
137     } else {
138       const MachO::section &Sec32 = Obj.getSection(SecRef.getRawDataRefImpl());
139 
140       memcpy(&NSec.SectName, &Sec32.sectname, 16);
141       NSec.SectName[16] = '\0';
142       memcpy(&NSec.SegName, Sec32.segname, 16);
143       NSec.SegName[16] = '\0';
144 
145       NSec.Address = Sec32.addr;
146       NSec.Size = Sec32.size;
147       NSec.Alignment = 1ULL << Sec32.align;
148       NSec.Flags = Sec32.flags;
149       DataOffset = Sec32.offset;
150     }
151 
152     LLVM_DEBUG({
153       dbgs() << "  " << *Name << ": " << formatv("{0:x16}", NSec.Address)
154              << " -- " << formatv("{0:x16}", NSec.Address + NSec.Size)
155              << ", align: " << NSec.Alignment << ", index: " << SecIndex
156              << "\n";
157     });
158 
159     // Get the section data if any.
160     {
161       unsigned SectionType = NSec.Flags & MachO::SECTION_TYPE;
162       if (SectionType != MachO::S_ZEROFILL &&
163           SectionType != MachO::S_GB_ZEROFILL) {
164 
165         if (DataOffset + NSec.Size > Obj.getData().size())
166           return make_error<JITLinkError>(
167               "Section data extends past end of file");
168 
169         NSec.Data = Obj.getData().data() + DataOffset;
170       }
171     }
172 
173     // Get prot flags.
174     // FIXME: Make sure this test is correct (it's probably missing cases
175     // as-is).
176     sys::Memory::ProtectionFlags Prot;
177     if (NSec.Flags & MachO::S_ATTR_PURE_INSTRUCTIONS)
178       Prot = static_cast<sys::Memory::ProtectionFlags>(sys::Memory::MF_READ |
179                                                        sys::Memory::MF_EXEC);
180     else
181       Prot = static_cast<sys::Memory::ProtectionFlags>(sys::Memory::MF_READ |
182                                                        sys::Memory::MF_WRITE);
183 
184     if (!isDebugSection(NSec))
185       NSec.GraphSection = &G->createSection(*Name, Prot);
186     else
187       LLVM_DEBUG({
188         dbgs() << "    " << *Name
189                << " is a debug section: No graph section will be created.\n";
190       });
191 
192     IndexToSection.insert(std::make_pair(SecIndex, std::move(NSec)));
193   }
194 
195   std::vector<NormalizedSection *> Sections;
196   Sections.reserve(IndexToSection.size());
197   for (auto &KV : IndexToSection)
198     Sections.push_back(&KV.second);
199 
200   // If we didn't end up creating any sections then bail out. The code below
201   // assumes that we have at least one section.
202   if (Sections.empty())
203     return Error::success();
204 
205   llvm::sort(Sections,
206              [](const NormalizedSection *LHS, const NormalizedSection *RHS) {
207                assert(LHS && RHS && "Null section?");
208                if (LHS->Address != RHS->Address)
209                  return LHS->Address < RHS->Address;
210                return LHS->Size < RHS->Size;
211              });
212 
213   for (unsigned I = 0, E = Sections.size() - 1; I != E; ++I) {
214     auto &Cur = *Sections[I];
215     auto &Next = *Sections[I + 1];
216     if (Next.Address < Cur.Address + Cur.Size)
217       return make_error<JITLinkError>(
218           "Address range for section " +
219           formatv("\"{0}/{1}\" [ {2:x16} -- {3:x16} ] ", Cur.SegName,
220                   Cur.SectName, Cur.Address, Cur.Address + Cur.Size) +
221           "overlaps section \"" + Next.SegName + "/" + Next.SectName + "\"" +
222           formatv("\"{0}/{1}\" [ {2:x16} -- {3:x16} ] ", Next.SegName,
223                   Next.SectName, Next.Address, Next.Address + Next.Size));
224   }
225 
226   return Error::success();
227 }
228 
229 Error MachOLinkGraphBuilder::createNormalizedSymbols() {
230   LLVM_DEBUG(dbgs() << "Creating normalized symbols...\n");
231 
232   for (auto &SymRef : Obj.symbols()) {
233 
234     unsigned SymbolIndex = Obj.getSymbolIndex(SymRef.getRawDataRefImpl());
235     uint64_t Value;
236     uint32_t NStrX;
237     uint8_t Type;
238     uint8_t Sect;
239     uint16_t Desc;
240 
241     if (Obj.is64Bit()) {
242       const MachO::nlist_64 &NL64 =
243           Obj.getSymbol64TableEntry(SymRef.getRawDataRefImpl());
244       Value = NL64.n_value;
245       NStrX = NL64.n_strx;
246       Type = NL64.n_type;
247       Sect = NL64.n_sect;
248       Desc = NL64.n_desc;
249     } else {
250       const MachO::nlist &NL32 =
251           Obj.getSymbolTableEntry(SymRef.getRawDataRefImpl());
252       Value = NL32.n_value;
253       NStrX = NL32.n_strx;
254       Type = NL32.n_type;
255       Sect = NL32.n_sect;
256       Desc = NL32.n_desc;
257     }
258 
259     // Skip stabs.
260     // FIXME: Are there other symbols we should be skipping?
261     if (Type & MachO::N_STAB)
262       continue;
263 
264     Optional<StringRef> Name;
265     if (NStrX) {
266       if (auto NameOrErr = SymRef.getName())
267         Name = *NameOrErr;
268       else
269         return NameOrErr.takeError();
270     }
271 
272     LLVM_DEBUG({
273       dbgs() << "  ";
274       if (!Name)
275         dbgs() << "<anonymous symbol>";
276       else
277         dbgs() << *Name;
278       dbgs() << ": value = " << formatv("{0:x16}", Value)
279              << ", type = " << formatv("{0:x2}", Type)
280              << ", desc = " << formatv("{0:x4}", Desc) << ", sect = ";
281       if (Sect)
282         dbgs() << static_cast<unsigned>(Sect - 1);
283       else
284         dbgs() << "none";
285       dbgs() << "\n";
286     });
287 
288     // If this symbol has a section, sanity check that the addresses line up.
289     if (Sect != 0) {
290       auto NSec = findSectionByIndex(Sect - 1);
291       if (!NSec)
292         return NSec.takeError();
293 
294       if (Value < NSec->Address || Value > NSec->Address + NSec->Size)
295         return make_error<JITLinkError>("Symbol address does not fall within "
296                                         "section");
297 
298       if (!NSec->GraphSection) {
299         LLVM_DEBUG({
300           dbgs() << "  Skipping: Symbol is in section " << NSec->SegName << "/"
301                  << NSec->SectName
302                  << " which has no associated graph section.\n";
303         });
304         continue;
305       }
306     }
307 
308     IndexToSymbol[SymbolIndex] =
309         &createNormalizedSymbol(*Name, Value, Type, Sect, Desc,
310                                 getLinkage(Desc), getScope(*Name, Type));
311   }
312 
313   return Error::success();
314 }
315 
316 void MachOLinkGraphBuilder::addSectionStartSymAndBlock(
317     Section &GraphSec, uint64_t Address, const char *Data, uint64_t Size,
318     uint32_t Alignment, bool IsLive) {
319   Block &B =
320       Data ? G->createContentBlock(GraphSec, StringRef(Data, Size), Address,
321                                    Alignment, 0)
322            : G->createZeroFillBlock(GraphSec, Size, Address, Alignment, 0);
323   auto &Sym = G->addAnonymousSymbol(B, 0, Size, false, IsLive);
324   assert(!AddrToCanonicalSymbol.count(Sym.getAddress()) &&
325          "Anonymous block start symbol clashes with existing symbol address");
326   AddrToCanonicalSymbol[Sym.getAddress()] = &Sym;
327 }
328 
329 Error MachOLinkGraphBuilder::graphifyRegularSymbols() {
330 
331   LLVM_DEBUG(dbgs() << "Creating graph symbols...\n");
332 
333   /// We only have 256 section indexes: Use a vector rather than a map.
334   std::vector<std::vector<NormalizedSymbol *>> SecIndexToSymbols;
335   SecIndexToSymbols.resize(256);
336 
337   // Create commons, externs, and absolutes, and partition all other symbols by
338   // section.
339   for (auto &KV : IndexToSymbol) {
340     auto &NSym = *KV.second;
341 
342     switch (NSym.Type & MachO::N_TYPE) {
343     case MachO::N_UNDF:
344       if (NSym.Value) {
345         if (!NSym.Name)
346           return make_error<JITLinkError>("Anonymous common symbol at index " +
347                                           Twine(KV.first));
348         NSym.GraphSymbol = &G->addCommonSymbol(
349             *NSym.Name, NSym.S, getCommonSection(), 0, NSym.Value,
350             1ull << MachO::GET_COMM_ALIGN(NSym.Desc),
351             NSym.Desc & MachO::N_NO_DEAD_STRIP);
352       } else {
353         if (!NSym.Name)
354           return make_error<JITLinkError>("Anonymous external symbol at "
355                                           "index " +
356                                           Twine(KV.first));
357         NSym.GraphSymbol = &G->addExternalSymbol(
358             *NSym.Name, 0,
359             NSym.Desc & MachO::N_WEAK_REF ? Linkage::Weak : Linkage::Strong);
360       }
361       break;
362     case MachO::N_ABS:
363       if (!NSym.Name)
364         return make_error<JITLinkError>("Anonymous absolute symbol at index " +
365                                         Twine(KV.first));
366       NSym.GraphSymbol = &G->addAbsoluteSymbol(
367           *NSym.Name, NSym.Value, 0, Linkage::Strong, Scope::Default,
368           NSym.Desc & MachO::N_NO_DEAD_STRIP);
369       break;
370     case MachO::N_SECT:
371       SecIndexToSymbols[NSym.Sect - 1].push_back(&NSym);
372       break;
373     case MachO::N_PBUD:
374       return make_error<JITLinkError>(
375           "Unupported N_PBUD symbol " +
376           (NSym.Name ? ("\"" + *NSym.Name + "\"") : Twine("<anon>")) +
377           " at index " + Twine(KV.first));
378     case MachO::N_INDR:
379       return make_error<JITLinkError>(
380           "Unupported N_INDR symbol " +
381           (NSym.Name ? ("\"" + *NSym.Name + "\"") : Twine("<anon>")) +
382           " at index " + Twine(KV.first));
383     default:
384       return make_error<JITLinkError>(
385           "Unrecognized symbol type " + Twine(NSym.Type & MachO::N_TYPE) +
386           " for symbol " +
387           (NSym.Name ? ("\"" + *NSym.Name + "\"") : Twine("<anon>")) +
388           " at index " + Twine(KV.first));
389     }
390   }
391 
392   // Loop over sections performing regular graphification for those that
393   // don't have custom parsers.
394   for (auto &KV : IndexToSection) {
395     auto SecIndex = KV.first;
396     auto &NSec = KV.second;
397 
398     if (!NSec.GraphSection) {
399       LLVM_DEBUG({
400         dbgs() << "  " << NSec.SegName << "/" << NSec.SectName
401                << " has no graph section. Skipping.\n";
402       });
403       continue;
404     }
405 
406     // Skip sections with custom parsers.
407     if (CustomSectionParserFunctions.count(NSec.GraphSection->getName())) {
408       LLVM_DEBUG({
409         dbgs() << "  Skipping section " << NSec.GraphSection->getName()
410                << " as it has a custom parser.\n";
411       });
412       continue;
413     } else
414       LLVM_DEBUG({
415         dbgs() << "  Processing section " << NSec.GraphSection->getName()
416                << "...\n";
417       });
418 
419     bool SectionIsNoDeadStrip = NSec.Flags & MachO::S_ATTR_NO_DEAD_STRIP;
420     bool SectionIsText = NSec.Flags & MachO::S_ATTR_PURE_INSTRUCTIONS;
421 
422     auto &SecNSymStack = SecIndexToSymbols[SecIndex];
423 
424     // If this section is non-empty but there are no symbols covering it then
425     // create one block and anonymous symbol to cover the entire section.
426     if (SecNSymStack.empty()) {
427       if (NSec.Size > 0) {
428         LLVM_DEBUG({
429           dbgs() << "    Section non-empty, but contains no symbols. "
430                     "Creating anonymous block to cover "
431                  << formatv("{0:x16}", NSec.Address) << " -- "
432                  << formatv("{0:x16}", NSec.Address + NSec.Size) << "\n";
433         });
434         addSectionStartSymAndBlock(*NSec.GraphSection, NSec.Address, NSec.Data,
435                                    NSec.Size, NSec.Alignment,
436                                    SectionIsNoDeadStrip);
437       } else
438         LLVM_DEBUG({
439           dbgs() << "    Section empty and contains no symbols. Skipping.\n";
440         });
441       continue;
442     }
443 
444     // Sort the symbol stack in by address, alt-entry status, scope, and name.
445     // We sort in reverse order so that symbols will be visited in the right
446     // order when we pop off the stack below.
447     llvm::sort(SecNSymStack, [](const NormalizedSymbol *LHS,
448                                 const NormalizedSymbol *RHS) {
449       if (LHS->Value != RHS->Value)
450         return LHS->Value > RHS->Value;
451       if (isAltEntry(*LHS) != isAltEntry(*RHS))
452         return isAltEntry(*RHS);
453       if (LHS->S != RHS->S)
454         return static_cast<uint8_t>(LHS->S) < static_cast<uint8_t>(RHS->S);
455       return LHS->Name < RHS->Name;
456     });
457 
458     // The first symbol in a section can not be an alt-entry symbol.
459     if (!SecNSymStack.empty() && isAltEntry(*SecNSymStack.back()))
460       return make_error<JITLinkError>(
461           "First symbol in " + NSec.GraphSection->getName() + " is alt-entry");
462 
463     // If the section is non-empty but there is no symbol covering the start
464     // address then add an anonymous one.
465     if (SecNSymStack.back()->Value != NSec.Address) {
466       auto AnonBlockSize = SecNSymStack.back()->Value - NSec.Address;
467       LLVM_DEBUG({
468         dbgs() << "    Section start not covered by symbol. "
469                << "Creating anonymous block to cover [ "
470                << formatv("{0:x16}", NSec.Address) << " -- "
471                << formatv("{0:x16}", NSec.Address + AnonBlockSize) << " ]\n";
472       });
473       addSectionStartSymAndBlock(*NSec.GraphSection, NSec.Address, NSec.Data,
474                                  AnonBlockSize, NSec.Alignment,
475                                  SectionIsNoDeadStrip);
476     }
477 
478     // Visit section symbols in order by popping off the reverse-sorted stack,
479     // building blocks for each alt-entry chain and creating symbols as we go.
480     while (!SecNSymStack.empty()) {
481       SmallVector<NormalizedSymbol *, 8> BlockSyms;
482 
483       BlockSyms.push_back(SecNSymStack.back());
484       SecNSymStack.pop_back();
485       while (!SecNSymStack.empty() &&
486              (isAltEntry(*SecNSymStack.back()) ||
487               SecNSymStack.back()->Value == BlockSyms.back()->Value)) {
488         BlockSyms.push_back(SecNSymStack.back());
489         SecNSymStack.pop_back();
490       }
491 
492       // BlockNSyms now contains the block symbols in reverse canonical order.
493       JITTargetAddress BlockStart = BlockSyms.front()->Value;
494       JITTargetAddress BlockEnd = SecNSymStack.empty()
495                                       ? NSec.Address + NSec.Size
496                                       : SecNSymStack.back()->Value;
497       JITTargetAddress BlockOffset = BlockStart - NSec.Address;
498       JITTargetAddress BlockSize = BlockEnd - BlockStart;
499 
500       LLVM_DEBUG({
501         dbgs() << "    Creating block for " << formatv("{0:x16}", BlockStart)
502                << " -- " << formatv("{0:x16}", BlockEnd) << ": "
503                << NSec.GraphSection->getName() << " + "
504                << formatv("{0:x16}", BlockOffset) << " with "
505                << BlockSyms.size() << " symbol(s)...\n";
506       });
507 
508       Block &B =
509           NSec.Data
510               ? G->createContentBlock(
511                     *NSec.GraphSection,
512                     StringRef(NSec.Data + BlockOffset, BlockSize), BlockStart,
513                     NSec.Alignment, BlockStart % NSec.Alignment)
514               : G->createZeroFillBlock(*NSec.GraphSection, BlockSize,
515                                        BlockStart, NSec.Alignment,
516                                        BlockStart % NSec.Alignment);
517 
518       Optional<JITTargetAddress> LastCanonicalAddr;
519       JITTargetAddress SymEnd = BlockEnd;
520       while (!BlockSyms.empty()) {
521         auto &NSym = *BlockSyms.back();
522         BlockSyms.pop_back();
523 
524         bool SymLive =
525             (NSym.Desc & MachO::N_NO_DEAD_STRIP) || SectionIsNoDeadStrip;
526 
527         LLVM_DEBUG({
528           dbgs() << "      " << formatv("{0:x16}", NSym.Value) << " -- "
529                  << formatv("{0:x16}", SymEnd) << ": ";
530           if (!NSym.Name)
531             dbgs() << "<anonymous symbol>";
532           else
533             dbgs() << NSym.Name;
534           if (SymLive)
535             dbgs() << " [no-dead-strip]";
536           if (LastCanonicalAddr == NSym.Value)
537             dbgs() << " [non-canonical]";
538           dbgs() << "\n";
539         });
540 
541         auto &Sym =
542             NSym.Name
543                 ? G->addDefinedSymbol(B, NSym.Value - BlockStart, *NSym.Name,
544                                       SymEnd - NSym.Value, NSym.L, NSym.S,
545                                       SectionIsText, SymLive)
546                 : G->addAnonymousSymbol(B, NSym.Value - BlockStart,
547                                         SymEnd - NSym.Value, SectionIsText,
548                                         SymLive);
549         NSym.GraphSymbol = &Sym;
550         if (LastCanonicalAddr != Sym.getAddress()) {
551           if (LastCanonicalAddr)
552             SymEnd = *LastCanonicalAddr;
553           LastCanonicalAddr = Sym.getAddress();
554           setCanonicalSymbol(Sym);
555         }
556       }
557     }
558   }
559 
560   return Error::success();
561 }
562 
563 Error MachOLinkGraphBuilder::graphifySectionsWithCustomParsers() {
564   // Graphify special sections.
565   for (auto &KV : IndexToSection) {
566     auto &NSec = KV.second;
567 
568     // Skip non-graph sections.
569     if (!NSec.GraphSection)
570       continue;
571 
572     auto HI = CustomSectionParserFunctions.find(NSec.GraphSection->getName());
573     if (HI != CustomSectionParserFunctions.end()) {
574       auto &Parse = HI->second;
575       if (auto Err = Parse(NSec))
576         return Err;
577     }
578   }
579 
580   return Error::success();
581 }
582 
583 } // end namespace jitlink
584 } // end namespace llvm
585