xref: /llvm-project/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.cpp (revision 2cc64df0bd6a802eab592dbc282463c3e4a4281c)
1 //=--------- MachOLinkGraphBuilder.cpp - MachO LinkGraph builder ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Generic MachO LinkGraph buliding code.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "MachOLinkGraphBuilder.h"
14 #include <optional>
15 
16 #define DEBUG_TYPE "jitlink"
17 
18 static const char *CommonSectionName = "__common";
19 
20 namespace llvm {
21 namespace jitlink {
22 
23 MachOLinkGraphBuilder::~MachOLinkGraphBuilder() = default;
24 
25 Expected<std::unique_ptr<LinkGraph>> MachOLinkGraphBuilder::buildGraph() {
26 
27   // We only operate on relocatable objects.
28   if (!Obj.isRelocatableObject())
29     return make_error<JITLinkError>("Object is not a relocatable MachO");
30 
31   if (auto Err = createNormalizedSections())
32     return std::move(Err);
33 
34   if (auto Err = createNormalizedSymbols())
35     return std::move(Err);
36 
37   if (auto Err = graphifyRegularSymbols())
38     return std::move(Err);
39 
40   if (auto Err = graphifySectionsWithCustomParsers())
41     return std::move(Err);
42 
43   if (auto Err = addRelocations())
44     return std::move(Err);
45 
46   return std::move(G);
47 }
48 
49 MachOLinkGraphBuilder::MachOLinkGraphBuilder(
50     const object::MachOObjectFile &Obj, Triple TT,
51     LinkGraph::GetEdgeKindNameFunction GetEdgeKindName)
52     : Obj(Obj),
53       G(std::make_unique<LinkGraph>(
54           std::string(Obj.getFileName()), std::move(TT), getPointerSize(Obj),
55           getEndianness(Obj), std::move(GetEdgeKindName))) {
56   auto &MachHeader = Obj.getHeader64();
57   SubsectionsViaSymbols = MachHeader.flags & MachO::MH_SUBSECTIONS_VIA_SYMBOLS;
58 }
59 
60 void MachOLinkGraphBuilder::addCustomSectionParser(
61     StringRef SectionName, SectionParserFunction Parser) {
62   assert(!CustomSectionParserFunctions.count(SectionName) &&
63          "Custom parser for this section already exists");
64   CustomSectionParserFunctions[SectionName] = std::move(Parser);
65 }
66 
67 Linkage MachOLinkGraphBuilder::getLinkage(uint16_t Desc) {
68   if ((Desc & MachO::N_WEAK_DEF) || (Desc & MachO::N_WEAK_REF))
69     return Linkage::Weak;
70   return Linkage::Strong;
71 }
72 
73 Scope MachOLinkGraphBuilder::getScope(StringRef Name, uint8_t Type) {
74   if (Type & MachO::N_EXT) {
75     if ((Type & MachO::N_PEXT) || Name.startswith("l"))
76       return Scope::Hidden;
77     else
78       return Scope::Default;
79   }
80   return Scope::Local;
81 }
82 
83 bool MachOLinkGraphBuilder::isAltEntry(const NormalizedSymbol &NSym) {
84   return NSym.Desc & MachO::N_ALT_ENTRY;
85 }
86 
87 bool MachOLinkGraphBuilder::isDebugSection(const NormalizedSection &NSec) {
88   return (NSec.Flags & MachO::S_ATTR_DEBUG &&
89           strcmp(NSec.SegName, "__DWARF") == 0);
90 }
91 
92 bool MachOLinkGraphBuilder::isZeroFillSection(const NormalizedSection &NSec) {
93   switch (NSec.Flags & MachO::SECTION_TYPE) {
94   case MachO::S_ZEROFILL:
95   case MachO::S_GB_ZEROFILL:
96   case MachO::S_THREAD_LOCAL_ZEROFILL:
97     return true;
98   default:
99     return false;
100   }
101 }
102 
103 unsigned
104 MachOLinkGraphBuilder::getPointerSize(const object::MachOObjectFile &Obj) {
105   return Obj.is64Bit() ? 8 : 4;
106 }
107 
108 support::endianness
109 MachOLinkGraphBuilder::getEndianness(const object::MachOObjectFile &Obj) {
110   return Obj.isLittleEndian() ? support::little : support::big;
111 }
112 
113 Section &MachOLinkGraphBuilder::getCommonSection() {
114   if (!CommonSection)
115     CommonSection = &G->createSection(CommonSectionName,
116                                       orc::MemProt::Read | orc::MemProt::Write);
117   return *CommonSection;
118 }
119 
120 Error MachOLinkGraphBuilder::createNormalizedSections() {
121   // Build normalized sections. Verifies that section data is in-range (for
122   // sections with content) and that address ranges are non-overlapping.
123 
124   LLVM_DEBUG(dbgs() << "Creating normalized sections...\n");
125 
126   for (auto &SecRef : Obj.sections()) {
127     NormalizedSection NSec;
128     uint32_t DataOffset = 0;
129 
130     auto SecIndex = Obj.getSectionIndex(SecRef.getRawDataRefImpl());
131 
132     if (Obj.is64Bit()) {
133       const MachO::section_64 &Sec64 =
134           Obj.getSection64(SecRef.getRawDataRefImpl());
135 
136       memcpy(&NSec.SectName, &Sec64.sectname, 16);
137       NSec.SectName[16] = '\0';
138       memcpy(&NSec.SegName, Sec64.segname, 16);
139       NSec.SegName[16] = '\0';
140 
141       NSec.Address = orc::ExecutorAddr(Sec64.addr);
142       NSec.Size = Sec64.size;
143       NSec.Alignment = 1ULL << Sec64.align;
144       NSec.Flags = Sec64.flags;
145       DataOffset = Sec64.offset;
146     } else {
147       const MachO::section &Sec32 = Obj.getSection(SecRef.getRawDataRefImpl());
148 
149       memcpy(&NSec.SectName, &Sec32.sectname, 16);
150       NSec.SectName[16] = '\0';
151       memcpy(&NSec.SegName, Sec32.segname, 16);
152       NSec.SegName[16] = '\0';
153 
154       NSec.Address = orc::ExecutorAddr(Sec32.addr);
155       NSec.Size = Sec32.size;
156       NSec.Alignment = 1ULL << Sec32.align;
157       NSec.Flags = Sec32.flags;
158       DataOffset = Sec32.offset;
159     }
160 
161     LLVM_DEBUG({
162       dbgs() << "  " << NSec.SegName << "," << NSec.SectName << ": "
163              << formatv("{0:x16}", NSec.Address) << " -- "
164              << formatv("{0:x16}", NSec.Address + NSec.Size)
165              << ", align: " << NSec.Alignment << ", index: " << SecIndex
166              << "\n";
167     });
168 
169     // Get the section data if any.
170     if (!isZeroFillSection(NSec)) {
171       if (DataOffset + NSec.Size > Obj.getData().size())
172         return make_error<JITLinkError>(
173             "Section data extends past end of file");
174 
175       NSec.Data = Obj.getData().data() + DataOffset;
176     }
177 
178     // Get prot flags.
179     // FIXME: Make sure this test is correct (it's probably missing cases
180     // as-is).
181     orc::MemProt Prot;
182     if (NSec.Flags & MachO::S_ATTR_PURE_INSTRUCTIONS)
183       Prot = orc::MemProt::Read | orc::MemProt::Exec;
184     else
185       Prot = orc::MemProt::Read | orc::MemProt::Write;
186 
187     auto FullyQualifiedName =
188         G->allocateContent(StringRef(NSec.SegName) + "," + NSec.SectName);
189     NSec.GraphSection = &G->createSection(
190         StringRef(FullyQualifiedName.data(), FullyQualifiedName.size()), Prot);
191 
192     // TODO: Are there any other criteria for NoAlloc lifetime?
193     if (NSec.Flags & MachO::S_ATTR_DEBUG)
194       NSec.GraphSection->setMemLifetimePolicy(orc::MemLifetimePolicy::NoAlloc);
195 
196     IndexToSection.insert(std::make_pair(SecIndex, std::move(NSec)));
197   }
198 
199   std::vector<NormalizedSection *> Sections;
200   Sections.reserve(IndexToSection.size());
201   for (auto &KV : IndexToSection)
202     Sections.push_back(&KV.second);
203 
204   // If we didn't end up creating any sections then bail out. The code below
205   // assumes that we have at least one section.
206   if (Sections.empty())
207     return Error::success();
208 
209   llvm::sort(Sections,
210              [](const NormalizedSection *LHS, const NormalizedSection *RHS) {
211                assert(LHS && RHS && "Null section?");
212                if (LHS->Address != RHS->Address)
213                  return LHS->Address < RHS->Address;
214                return LHS->Size < RHS->Size;
215              });
216 
217   for (unsigned I = 0, E = Sections.size() - 1; I != E; ++I) {
218     auto &Cur = *Sections[I];
219     auto &Next = *Sections[I + 1];
220     if (Next.Address < Cur.Address + Cur.Size)
221       return make_error<JITLinkError>(
222           "Address range for section " +
223           formatv("\"{0}/{1}\" [ {2:x16} -- {3:x16} ] ", Cur.SegName,
224                   Cur.SectName, Cur.Address, Cur.Address + Cur.Size) +
225           "overlaps section \"" + Next.SegName + "/" + Next.SectName + "\"" +
226           formatv("\"{0}/{1}\" [ {2:x16} -- {3:x16} ] ", Next.SegName,
227                   Next.SectName, Next.Address, Next.Address + Next.Size));
228   }
229 
230   return Error::success();
231 }
232 
233 Error MachOLinkGraphBuilder::createNormalizedSymbols() {
234   LLVM_DEBUG(dbgs() << "Creating normalized symbols...\n");
235 
236   for (auto &SymRef : Obj.symbols()) {
237 
238     unsigned SymbolIndex = Obj.getSymbolIndex(SymRef.getRawDataRefImpl());
239     uint64_t Value;
240     uint32_t NStrX;
241     uint8_t Type;
242     uint8_t Sect;
243     uint16_t Desc;
244 
245     if (Obj.is64Bit()) {
246       const MachO::nlist_64 &NL64 =
247           Obj.getSymbol64TableEntry(SymRef.getRawDataRefImpl());
248       Value = NL64.n_value;
249       NStrX = NL64.n_strx;
250       Type = NL64.n_type;
251       Sect = NL64.n_sect;
252       Desc = NL64.n_desc;
253     } else {
254       const MachO::nlist &NL32 =
255           Obj.getSymbolTableEntry(SymRef.getRawDataRefImpl());
256       Value = NL32.n_value;
257       NStrX = NL32.n_strx;
258       Type = NL32.n_type;
259       Sect = NL32.n_sect;
260       Desc = NL32.n_desc;
261     }
262 
263     // Skip stabs.
264     // FIXME: Are there other symbols we should be skipping?
265     if (Type & MachO::N_STAB)
266       continue;
267 
268     std::optional<StringRef> Name;
269     if (NStrX) {
270       if (auto NameOrErr = SymRef.getName())
271         Name = *NameOrErr;
272       else
273         return NameOrErr.takeError();
274     } else if (Type & MachO::N_EXT)
275       return make_error<JITLinkError>("Symbol at index " +
276                                       formatv("{0}", SymbolIndex) +
277                                       " has no name (string table index 0), "
278                                       "but N_EXT bit is set");
279 
280     LLVM_DEBUG({
281       dbgs() << "  ";
282       if (!Name)
283         dbgs() << "<anonymous symbol>";
284       else
285         dbgs() << *Name;
286       dbgs() << ": value = " << formatv("{0:x16}", Value)
287              << ", type = " << formatv("{0:x2}", Type)
288              << ", desc = " << formatv("{0:x4}", Desc) << ", sect = ";
289       if (Sect)
290         dbgs() << static_cast<unsigned>(Sect - 1);
291       else
292         dbgs() << "none";
293       dbgs() << "\n";
294     });
295 
296     // If this symbol has a section, verify that the addresses line up.
297     if (Sect != 0) {
298       auto NSec = findSectionByIndex(Sect - 1);
299       if (!NSec)
300         return NSec.takeError();
301 
302       if (orc::ExecutorAddr(Value) < NSec->Address ||
303           orc::ExecutorAddr(Value) > NSec->Address + NSec->Size)
304         return make_error<JITLinkError>("Address " + formatv("{0:x}", Value) +
305                                         " for symbol " + *Name +
306                                         " does not fall within section");
307 
308       if (!NSec->GraphSection) {
309         LLVM_DEBUG({
310           dbgs() << "  Skipping: Symbol is in section " << NSec->SegName << "/"
311                  << NSec->SectName
312                  << " which has no associated graph section.\n";
313         });
314         continue;
315       }
316     }
317 
318     IndexToSymbol[SymbolIndex] =
319         &createNormalizedSymbol(*Name, Value, Type, Sect, Desc,
320                                 getLinkage(Desc), getScope(*Name, Type));
321   }
322 
323   return Error::success();
324 }
325 
326 void MachOLinkGraphBuilder::addSectionStartSymAndBlock(
327     unsigned SecIndex, Section &GraphSec, orc::ExecutorAddr Address,
328     const char *Data, orc::ExecutorAddrDiff Size, uint32_t Alignment,
329     bool IsLive) {
330   Block &B =
331       Data ? G->createContentBlock(GraphSec, ArrayRef<char>(Data, Size),
332                                    Address, Alignment, 0)
333            : G->createZeroFillBlock(GraphSec, Size, Address, Alignment, 0);
334   auto &Sym = G->addAnonymousSymbol(B, 0, Size, false, IsLive);
335   auto SecI = IndexToSection.find(SecIndex);
336   assert(SecI != IndexToSection.end() && "SecIndex invalid");
337   auto &NSec = SecI->second;
338   assert(!NSec.CanonicalSymbols.count(Sym.getAddress()) &&
339          "Anonymous block start symbol clashes with existing symbol address");
340   NSec.CanonicalSymbols[Sym.getAddress()] = &Sym;
341 }
342 
343 Error MachOLinkGraphBuilder::graphifyRegularSymbols() {
344 
345   LLVM_DEBUG(dbgs() << "Creating graph symbols...\n");
346 
347   /// We only have 256 section indexes: Use a vector rather than a map.
348   std::vector<std::vector<NormalizedSymbol *>> SecIndexToSymbols;
349   SecIndexToSymbols.resize(256);
350 
351   // Create commons, externs, and absolutes, and partition all other symbols by
352   // section.
353   for (auto &KV : IndexToSymbol) {
354     auto &NSym = *KV.second;
355 
356     switch (NSym.Type & MachO::N_TYPE) {
357     case MachO::N_UNDF:
358       if (NSym.Value) {
359         if (!NSym.Name)
360           return make_error<JITLinkError>("Anonymous common symbol at index " +
361                                           Twine(KV.first));
362         NSym.GraphSymbol = &G->addDefinedSymbol(
363             G->createZeroFillBlock(getCommonSection(),
364                                    orc::ExecutorAddrDiff(NSym.Value),
365                                    orc::ExecutorAddr(),
366                                    1ull << MachO::GET_COMM_ALIGN(NSym.Desc), 0),
367             0, *NSym.Name, orc::ExecutorAddrDiff(NSym.Value), Linkage::Strong,
368             NSym.S, false, NSym.Desc & MachO::N_NO_DEAD_STRIP);
369       } else {
370         if (!NSym.Name)
371           return make_error<JITLinkError>("Anonymous external symbol at "
372                                           "index " +
373                                           Twine(KV.first));
374         NSym.GraphSymbol = &G->addExternalSymbol(
375             *NSym.Name, 0, (NSym.Desc & MachO::N_WEAK_REF) != 0);
376       }
377       break;
378     case MachO::N_ABS:
379       if (!NSym.Name)
380         return make_error<JITLinkError>("Anonymous absolute symbol at index " +
381                                         Twine(KV.first));
382       NSym.GraphSymbol = &G->addAbsoluteSymbol(
383           *NSym.Name, orc::ExecutorAddr(NSym.Value), 0, Linkage::Strong,
384           getScope(*NSym.Name, NSym.Type), NSym.Desc & MachO::N_NO_DEAD_STRIP);
385       break;
386     case MachO::N_SECT:
387       SecIndexToSymbols[NSym.Sect - 1].push_back(&NSym);
388       break;
389     case MachO::N_PBUD:
390       return make_error<JITLinkError>(
391           "Unupported N_PBUD symbol " +
392           (NSym.Name ? ("\"" + *NSym.Name + "\"") : Twine("<anon>")) +
393           " at index " + Twine(KV.first));
394     case MachO::N_INDR:
395       return make_error<JITLinkError>(
396           "Unupported N_INDR symbol " +
397           (NSym.Name ? ("\"" + *NSym.Name + "\"") : Twine("<anon>")) +
398           " at index " + Twine(KV.first));
399     default:
400       return make_error<JITLinkError>(
401           "Unrecognized symbol type " + Twine(NSym.Type & MachO::N_TYPE) +
402           " for symbol " +
403           (NSym.Name ? ("\"" + *NSym.Name + "\"") : Twine("<anon>")) +
404           " at index " + Twine(KV.first));
405     }
406   }
407 
408   // Loop over sections performing regular graphification for those that
409   // don't have custom parsers.
410   for (auto &KV : IndexToSection) {
411     auto SecIndex = KV.first;
412     auto &NSec = KV.second;
413 
414     if (!NSec.GraphSection) {
415       LLVM_DEBUG({
416         dbgs() << "  " << NSec.SegName << "/" << NSec.SectName
417                << " has no graph section. Skipping.\n";
418       });
419       continue;
420     }
421 
422     // Skip sections with custom parsers.
423     if (CustomSectionParserFunctions.count(NSec.GraphSection->getName())) {
424       LLVM_DEBUG({
425         dbgs() << "  Skipping section " << NSec.GraphSection->getName()
426                << " as it has a custom parser.\n";
427       });
428       continue;
429     } else if ((NSec.Flags & MachO::SECTION_TYPE) ==
430                MachO::S_CSTRING_LITERALS) {
431       if (auto Err = graphifyCStringSection(
432               NSec, std::move(SecIndexToSymbols[SecIndex])))
433         return Err;
434       continue;
435     } else
436       LLVM_DEBUG({
437         dbgs() << "  Graphifying regular section "
438                << NSec.GraphSection->getName() << "...\n";
439       });
440 
441     bool SectionIsNoDeadStrip = NSec.Flags & MachO::S_ATTR_NO_DEAD_STRIP;
442     bool SectionIsText = NSec.Flags & MachO::S_ATTR_PURE_INSTRUCTIONS;
443 
444     auto &SecNSymStack = SecIndexToSymbols[SecIndex];
445 
446     // If this section is non-empty but there are no symbols covering it then
447     // create one block and anonymous symbol to cover the entire section.
448     if (SecNSymStack.empty()) {
449       if (NSec.Size > 0) {
450         LLVM_DEBUG({
451           dbgs() << "    Section non-empty, but contains no symbols. "
452                     "Creating anonymous block to cover "
453                  << formatv("{0:x16}", NSec.Address) << " -- "
454                  << formatv("{0:x16}", NSec.Address + NSec.Size) << "\n";
455         });
456         addSectionStartSymAndBlock(SecIndex, *NSec.GraphSection, NSec.Address,
457                                    NSec.Data, NSec.Size, NSec.Alignment,
458                                    SectionIsNoDeadStrip);
459       } else
460         LLVM_DEBUG({
461           dbgs() << "    Section empty and contains no symbols. Skipping.\n";
462         });
463       continue;
464     }
465 
466     // Sort the symbol stack in by address, alt-entry status, scope, and name.
467     // We sort in reverse order so that symbols will be visited in the right
468     // order when we pop off the stack below.
469     llvm::sort(SecNSymStack, [](const NormalizedSymbol *LHS,
470                                 const NormalizedSymbol *RHS) {
471       if (LHS->Value != RHS->Value)
472         return LHS->Value > RHS->Value;
473       if (isAltEntry(*LHS) != isAltEntry(*RHS))
474         return isAltEntry(*RHS);
475       if (LHS->S != RHS->S)
476         return static_cast<uint8_t>(LHS->S) < static_cast<uint8_t>(RHS->S);
477       return LHS->Name < RHS->Name;
478     });
479 
480     // The first symbol in a section can not be an alt-entry symbol.
481     if (!SecNSymStack.empty() && isAltEntry(*SecNSymStack.back()))
482       return make_error<JITLinkError>(
483           "First symbol in " + NSec.GraphSection->getName() + " is alt-entry");
484 
485     // If the section is non-empty but there is no symbol covering the start
486     // address then add an anonymous one.
487     if (orc::ExecutorAddr(SecNSymStack.back()->Value) != NSec.Address) {
488       auto AnonBlockSize =
489           orc::ExecutorAddr(SecNSymStack.back()->Value) - NSec.Address;
490       LLVM_DEBUG({
491         dbgs() << "    Section start not covered by symbol. "
492                << "Creating anonymous block to cover [ " << NSec.Address
493                << " -- " << (NSec.Address + AnonBlockSize) << " ]\n";
494       });
495       addSectionStartSymAndBlock(SecIndex, *NSec.GraphSection, NSec.Address,
496                                  NSec.Data, AnonBlockSize, NSec.Alignment,
497                                  SectionIsNoDeadStrip);
498     }
499 
500     // Visit section symbols in order by popping off the reverse-sorted stack,
501     // building graph symbols as we go.
502     //
503     // If MH_SUBSECTIONS_VIA_SYMBOLS is set we'll build a block for each
504     // alt-entry chain.
505     //
506     // If MH_SUBSECTIONS_VIA_SYMBOLS is not set then we'll just build one block
507     // for the whole section.
508     while (!SecNSymStack.empty()) {
509       SmallVector<NormalizedSymbol *, 8> BlockSyms;
510 
511       // Get the symbols in this alt-entry chain, or the whole section (if
512       // !SubsectionsViaSymbols).
513       BlockSyms.push_back(SecNSymStack.back());
514       SecNSymStack.pop_back();
515       while (!SecNSymStack.empty() &&
516              (isAltEntry(*SecNSymStack.back()) ||
517               SecNSymStack.back()->Value == BlockSyms.back()->Value ||
518              !SubsectionsViaSymbols)) {
519         BlockSyms.push_back(SecNSymStack.back());
520         SecNSymStack.pop_back();
521       }
522 
523       // BlockNSyms now contains the block symbols in reverse canonical order.
524       auto BlockStart = orc::ExecutorAddr(BlockSyms.front()->Value);
525       orc::ExecutorAddr BlockEnd =
526           SecNSymStack.empty() ? NSec.Address + NSec.Size
527                                : orc::ExecutorAddr(SecNSymStack.back()->Value);
528       orc::ExecutorAddrDiff BlockOffset = BlockStart - NSec.Address;
529       orc::ExecutorAddrDiff BlockSize = BlockEnd - BlockStart;
530 
531       LLVM_DEBUG({
532         dbgs() << "    Creating block for " << formatv("{0:x16}", BlockStart)
533                << " -- " << formatv("{0:x16}", BlockEnd) << ": "
534                << NSec.GraphSection->getName() << " + "
535                << formatv("{0:x16}", BlockOffset) << " with "
536                << BlockSyms.size() << " symbol(s)...\n";
537       });
538 
539       Block &B =
540           NSec.Data
541               ? G->createContentBlock(
542                     *NSec.GraphSection,
543                     ArrayRef<char>(NSec.Data + BlockOffset, BlockSize),
544                     BlockStart, NSec.Alignment, BlockStart % NSec.Alignment)
545               : G->createZeroFillBlock(*NSec.GraphSection, BlockSize,
546                                        BlockStart, NSec.Alignment,
547                                        BlockStart % NSec.Alignment);
548 
549       std::optional<orc::ExecutorAddr> LastCanonicalAddr;
550       auto SymEnd = BlockEnd;
551       while (!BlockSyms.empty()) {
552         auto &NSym = *BlockSyms.back();
553         BlockSyms.pop_back();
554 
555         bool SymLive =
556             (NSym.Desc & MachO::N_NO_DEAD_STRIP) || SectionIsNoDeadStrip;
557 
558         auto &Sym = createStandardGraphSymbol(
559             NSym, B, SymEnd - orc::ExecutorAddr(NSym.Value), SectionIsText,
560             SymLive, LastCanonicalAddr != orc::ExecutorAddr(NSym.Value));
561 
562         if (LastCanonicalAddr != Sym.getAddress()) {
563           if (LastCanonicalAddr)
564             SymEnd = *LastCanonicalAddr;
565           LastCanonicalAddr = Sym.getAddress();
566         }
567       }
568     }
569   }
570 
571   return Error::success();
572 }
573 
574 Symbol &MachOLinkGraphBuilder::createStandardGraphSymbol(NormalizedSymbol &NSym,
575                                                          Block &B, size_t Size,
576                                                          bool IsText,
577                                                          bool IsNoDeadStrip,
578                                                          bool IsCanonical) {
579 
580   LLVM_DEBUG({
581     dbgs() << "      " << formatv("{0:x16}", NSym.Value) << " -- "
582            << formatv("{0:x16}", NSym.Value + Size) << ": ";
583     if (!NSym.Name)
584       dbgs() << "<anonymous symbol>";
585     else
586       dbgs() << NSym.Name;
587     if (IsText)
588       dbgs() << " [text]";
589     if (IsNoDeadStrip)
590       dbgs() << " [no-dead-strip]";
591     if (!IsCanonical)
592       dbgs() << " [non-canonical]";
593     dbgs() << "\n";
594   });
595 
596   auto SymOffset = orc::ExecutorAddr(NSym.Value) - B.getAddress();
597   auto &Sym =
598       NSym.Name
599           ? G->addDefinedSymbol(B, SymOffset, *NSym.Name, Size, NSym.L, NSym.S,
600                                 IsText, IsNoDeadStrip)
601           : G->addAnonymousSymbol(B, SymOffset, Size, IsText, IsNoDeadStrip);
602   NSym.GraphSymbol = &Sym;
603 
604   if (IsCanonical)
605     setCanonicalSymbol(getSectionByIndex(NSym.Sect - 1), Sym);
606 
607   return Sym;
608 }
609 
610 Error MachOLinkGraphBuilder::graphifySectionsWithCustomParsers() {
611   // Graphify special sections.
612   for (auto &KV : IndexToSection) {
613     auto &NSec = KV.second;
614 
615     // Skip non-graph sections.
616     if (!NSec.GraphSection)
617       continue;
618 
619     auto HI = CustomSectionParserFunctions.find(NSec.GraphSection->getName());
620     if (HI != CustomSectionParserFunctions.end()) {
621       auto &Parse = HI->second;
622       if (auto Err = Parse(NSec))
623         return Err;
624     }
625   }
626 
627   return Error::success();
628 }
629 
630 Error MachOLinkGraphBuilder::graphifyCStringSection(
631     NormalizedSection &NSec, std::vector<NormalizedSymbol *> NSyms) {
632   assert(NSec.GraphSection && "C string literal section missing graph section");
633   assert(NSec.Data && "C string literal section has no data");
634 
635   LLVM_DEBUG({
636     dbgs() << "  Graphifying C-string literal section "
637            << NSec.GraphSection->getName() << "\n";
638   });
639 
640   if (NSec.Data[NSec.Size - 1] != '\0')
641     return make_error<JITLinkError>("C string literal section " +
642                                     NSec.GraphSection->getName() +
643                                     " does not end with null terminator");
644 
645   /// Sort into reverse order to use as a stack.
646   llvm::sort(NSyms,
647              [](const NormalizedSymbol *LHS, const NormalizedSymbol *RHS) {
648                if (LHS->Value != RHS->Value)
649                  return LHS->Value > RHS->Value;
650                if (LHS->L != RHS->L)
651                  return LHS->L > RHS->L;
652                if (LHS->S != RHS->S)
653                  return LHS->S > RHS->S;
654                if (RHS->Name) {
655                  if (!LHS->Name)
656                    return true;
657                  return *LHS->Name > *RHS->Name;
658                }
659                return false;
660              });
661 
662   bool SectionIsNoDeadStrip = NSec.Flags & MachO::S_ATTR_NO_DEAD_STRIP;
663   bool SectionIsText = NSec.Flags & MachO::S_ATTR_PURE_INSTRUCTIONS;
664   orc::ExecutorAddrDiff BlockStart = 0;
665 
666   // Scan section for null characters.
667   for (size_t I = 0; I != NSec.Size; ++I) {
668     if (NSec.Data[I] == '\0') {
669       size_t BlockSize = I + 1 - BlockStart;
670       // Create a block for this null terminated string.
671       auto &B = G->createContentBlock(*NSec.GraphSection,
672                                       {NSec.Data + BlockStart, BlockSize},
673                                       NSec.Address + BlockStart, NSec.Alignment,
674                                       BlockStart % NSec.Alignment);
675 
676       LLVM_DEBUG({
677         dbgs() << "    Created block " << B.getRange()
678                << ", align = " << B.getAlignment()
679                << ", align-ofs = " << B.getAlignmentOffset() << " for \"";
680         for (size_t J = 0; J != std::min(B.getSize(), size_t(16)); ++J)
681           switch (B.getContent()[J]) {
682           case '\0': break;
683           case '\n': dbgs() << "\\n"; break;
684           case '\t': dbgs() << "\\t"; break;
685           default:   dbgs() << B.getContent()[J]; break;
686           }
687         if (B.getSize() > 16)
688           dbgs() << "...";
689         dbgs() << "\"\n";
690       });
691 
692       // If there's no symbol at the start of this block then create one.
693       if (NSyms.empty() ||
694           orc::ExecutorAddr(NSyms.back()->Value) != B.getAddress()) {
695         auto &S = G->addAnonymousSymbol(B, 0, BlockSize, false, false);
696         setCanonicalSymbol(NSec, S);
697         LLVM_DEBUG({
698           dbgs() << "      Adding symbol for c-string block " << B.getRange()
699                  << ": <anonymous symbol> at offset 0\n";
700         });
701       }
702 
703       // Process any remaining symbols that point into this block.
704       auto LastCanonicalAddr = B.getAddress() + BlockSize;
705       while (!NSyms.empty() && orc::ExecutorAddr(NSyms.back()->Value) <
706                                    B.getAddress() + BlockSize) {
707         auto &NSym = *NSyms.back();
708         size_t SymSize = (B.getAddress() + BlockSize) -
709                          orc::ExecutorAddr(NSyms.back()->Value);
710         bool SymLive =
711             (NSym.Desc & MachO::N_NO_DEAD_STRIP) || SectionIsNoDeadStrip;
712 
713         bool IsCanonical = false;
714         if (LastCanonicalAddr != orc::ExecutorAddr(NSym.Value)) {
715           IsCanonical = true;
716           LastCanonicalAddr = orc::ExecutorAddr(NSym.Value);
717         }
718 
719         auto &Sym = createStandardGraphSymbol(NSym, B, SymSize, SectionIsText,
720                                               SymLive, IsCanonical);
721         (void)Sym;
722         LLVM_DEBUG({
723           dbgs() << "      Adding symbol for c-string block " << B.getRange()
724                  << ": "
725                  << (Sym.hasName() ? Sym.getName() : "<anonymous symbol>")
726                  << " at offset " << formatv("{0:x}", Sym.getOffset()) << "\n";
727         });
728 
729         NSyms.pop_back();
730       }
731 
732       BlockStart += BlockSize;
733     }
734   }
735 
736   assert(llvm::all_of(NSec.GraphSection->blocks(),
737                       [](Block *B) { return isCStringBlock(*B); }) &&
738          "All blocks in section should hold single c-strings");
739 
740   return Error::success();
741 }
742 
743 Error CompactUnwindSplitter::operator()(LinkGraph &G) {
744   auto *CUSec = G.findSectionByName(CompactUnwindSectionName);
745   if (!CUSec)
746     return Error::success();
747 
748   if (!G.getTargetTriple().isOSBinFormatMachO())
749     return make_error<JITLinkError>(
750         "Error linking " + G.getName() +
751         ": compact unwind splitting not supported on non-macho target " +
752         G.getTargetTriple().str());
753 
754   unsigned CURecordSize = 0;
755   unsigned PersonalityEdgeOffset = 0;
756   unsigned LSDAEdgeOffset = 0;
757   switch (G.getTargetTriple().getArch()) {
758   case Triple::aarch64:
759   case Triple::x86_64:
760     // 64-bit compact-unwind record format:
761     // Range start: 8 bytes.
762     // Range size:  4 bytes.
763     // CU encoding: 4 bytes.
764     // Personality: 8 bytes.
765     // LSDA:        8 bytes.
766     CURecordSize = 32;
767     PersonalityEdgeOffset = 16;
768     LSDAEdgeOffset = 24;
769     break;
770   default:
771     return make_error<JITLinkError>(
772         "Error linking " + G.getName() +
773         ": compact unwind splitting not supported on " +
774         G.getTargetTriple().getArchName());
775   }
776 
777   std::vector<Block *> OriginalBlocks(CUSec->blocks().begin(),
778                                       CUSec->blocks().end());
779   LLVM_DEBUG({
780     dbgs() << "In " << G.getName() << " splitting compact unwind section "
781            << CompactUnwindSectionName << " containing "
782            << OriginalBlocks.size() << " initial blocks...\n";
783   });
784 
785   while (!OriginalBlocks.empty()) {
786     auto *B = OriginalBlocks.back();
787     OriginalBlocks.pop_back();
788 
789     if (B->getSize() == 0) {
790       LLVM_DEBUG({
791         dbgs() << "  Skipping empty block at "
792                << formatv("{0:x16}", B->getAddress()) << "\n";
793       });
794       continue;
795     }
796 
797     LLVM_DEBUG({
798       dbgs() << "  Splitting block at " << formatv("{0:x16}", B->getAddress())
799              << " into " << (B->getSize() / CURecordSize)
800              << " compact unwind record(s)\n";
801     });
802 
803     if (B->getSize() % CURecordSize)
804       return make_error<JITLinkError>(
805           "Error splitting compact unwind record in " + G.getName() +
806           ": block at " + formatv("{0:x}", B->getAddress()) + " has size " +
807           formatv("{0:x}", B->getSize()) +
808           " (not a multiple of CU record size of " +
809           formatv("{0:x}", CURecordSize) + ")");
810 
811     unsigned NumBlocks = B->getSize() / CURecordSize;
812     LinkGraph::SplitBlockCache C;
813 
814     for (unsigned I = 0; I != NumBlocks; ++I) {
815       auto &CURec = G.splitBlock(*B, CURecordSize, &C);
816       bool AddedKeepAlive = false;
817 
818       for (auto &E : CURec.edges()) {
819         if (E.getOffset() == 0) {
820           LLVM_DEBUG({
821             dbgs() << "    Updating compact unwind record at "
822                    << formatv("{0:x16}", CURec.getAddress()) << " to point to "
823                    << (E.getTarget().hasName() ? E.getTarget().getName()
824                                                : StringRef())
825                    << " (at " << formatv("{0:x16}", E.getTarget().getAddress())
826                    << ")\n";
827           });
828 
829           if (E.getTarget().isExternal())
830             return make_error<JITLinkError>(
831                 "Error adding keep-alive edge for compact unwind record at " +
832                 formatv("{0:x}", CURec.getAddress()) + ": target " +
833                 E.getTarget().getName() + " is an external symbol");
834           auto &TgtBlock = E.getTarget().getBlock();
835           auto &CURecSym =
836               G.addAnonymousSymbol(CURec, 0, CURecordSize, false, false);
837           TgtBlock.addEdge(Edge::KeepAlive, 0, CURecSym, 0);
838           AddedKeepAlive = true;
839         } else if (E.getOffset() != PersonalityEdgeOffset &&
840                    E.getOffset() != LSDAEdgeOffset)
841           return make_error<JITLinkError>("Unexpected edge at offset " +
842                                           formatv("{0:x}", E.getOffset()) +
843                                           " in compact unwind record at " +
844                                           formatv("{0:x}", CURec.getAddress()));
845       }
846 
847       if (!AddedKeepAlive)
848         return make_error<JITLinkError>(
849             "Error adding keep-alive edge for compact unwind record at " +
850             formatv("{0:x}", CURec.getAddress()) +
851             ": no outgoing target edge at offset 0");
852     }
853   }
854   return Error::success();
855 }
856 
857 } // end namespace jitlink
858 } // end namespace llvm
859