xref: /llvm-project/bolt/lib/Core/Exceptions.cpp (revision 9921197920fc3e9ad9605bd8fe0e835ca2dd41a5)
1 //===- bolt/Core/Exceptions.cpp - Helpers for C++ exceptions --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements functions for handling C++ exception meta data.
10 //
11 // Some of the code is taken from examples/ExceptionDemo
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "bolt/Core/Exceptions.h"
16 #include "bolt/Core/BinaryFunction.h"
17 #include "llvm/ADT/ArrayRef.h"
18 #include "llvm/ADT/Twine.h"
19 #include "llvm/BinaryFormat/Dwarf.h"
20 #include "llvm/DebugInfo/DWARF/DWARFDebugFrame.h"
21 #include "llvm/Support/Casting.h"
22 #include "llvm/Support/CommandLine.h"
23 #include "llvm/Support/Debug.h"
24 #include "llvm/Support/Errc.h"
25 #include "llvm/Support/LEB128.h"
26 #include "llvm/Support/MathExtras.h"
27 #include "llvm/Support/raw_ostream.h"
28 #include <map>
29 
30 #undef  DEBUG_TYPE
31 #define DEBUG_TYPE "bolt-exceptions"
32 
33 using namespace llvm::dwarf;
34 
35 namespace opts {
36 
37 extern llvm::cl::OptionCategory BoltCategory;
38 
39 extern llvm::cl::opt<unsigned> Verbosity;
40 
41 static llvm::cl::opt<bool>
42     PrintExceptions("print-exceptions",
43                     llvm::cl::desc("print exception handling data"),
44                     llvm::cl::Hidden, llvm::cl::cat(BoltCategory));
45 
46 } // namespace opts
47 
48 namespace llvm {
49 namespace bolt {
50 
51 // Read and dump the .gcc_exception_table section entry.
52 //
53 // .gcc_except_table section contains a set of Language-Specific Data Areas -
54 // a fancy name for exception handling tables. There's one  LSDA entry per
55 // function. However, we can't actually tell which function LSDA refers to
56 // unless we parse .eh_frame entry that refers to the LSDA.
57 // Then inside LSDA most addresses are encoded relative to the function start,
58 // so we need the function context in order to get to real addresses.
59 //
60 // The best visual representation of the tables comprising LSDA and
61 // relationships between them is illustrated at:
62 //   https://github.com/itanium-cxx-abi/cxx-abi/blob/master/exceptions.pdf
63 // Keep in mind that GCC implementation deviates slightly from that document.
64 //
65 // To summarize, there are 4 tables in LSDA: call site table, actions table,
66 // types table, and types index table (for indirection). The main table contains
67 // call site entries. Each call site includes a PC range that can throw an
68 // exception, a handler (landing pad), and a reference to an entry in the action
69 // table. The handler and/or action could be 0. The action entry is a head
70 // of a list of actions associated with a call site. The action table contains
71 // all such lists (it could be optimized to share list tails). Each action could
72 // be either to catch an exception of a given type, to perform a cleanup, or to
73 // propagate the exception after filtering it out (e.g. to make sure function
74 // exception specification is not violated). Catch action contains a reference
75 // to an entry in the type table, and filter action refers to an entry in the
76 // type index table to encode a set of types to filter.
77 //
78 // Call site table follows LSDA header. Action table immediately follows the
79 // call site table.
80 //
81 // Both types table and type index table start at the same location, but they
82 // grow in opposite directions (types go up, indices go down). The beginning of
83 // these tables is encoded in LSDA header. Sizes for both of the tables are not
84 // included anywhere.
85 //
86 // We have to parse all of the tables to determine their sizes. Then we have
87 // to parse the call site table and associate discovered information with
88 // actual call instructions and landing pad blocks.
89 //
90 // For the purpose of rewriting exception handling tables, we can reuse action,
91 // and type index tables in their original binary format.
92 //
93 // Type table could be encoded using position-independent references, and thus
94 // may require relocation.
95 //
96 // Ideally we should be able to re-write LSDA in-place, without the need to
97 // allocate a new space for it. Sadly there's no guarantee that the new call
98 // site table will be the same size as GCC uses uleb encodings for PC offsets.
99 //
100 // Note: some functions have LSDA entries with 0 call site entries.
101 void BinaryFunction::parseLSDA(ArrayRef<uint8_t> LSDASectionData,
102                                uint64_t LSDASectionAddress) {
103   assert(CurrentState == State::Disassembled && "unexpected function state");
104 
105   if (!getLSDAAddress())
106     return;
107 
108   DWARFDataExtractor Data(
109       StringRef(reinterpret_cast<const char *>(LSDASectionData.data()),
110                 LSDASectionData.size()),
111       BC.DwCtx->getDWARFObj().isLittleEndian(), 8);
112   uint64_t Offset = getLSDAAddress() - LSDASectionAddress;
113   assert(Data.isValidOffset(Offset) && "wrong LSDA address");
114 
115   uint8_t LPStartEncoding = Data.getU8(&Offset);
116   uint64_t LPStart = 0;
117   // Convert to offset if LPStartEncoding is typed absptr DW_EH_PE_absptr
118   if (std::optional<uint64_t> MaybeLPStart = Data.getEncodedPointer(
119           &Offset, LPStartEncoding, Offset + LSDASectionAddress))
120     LPStart = (LPStartEncoding && 0xFF == 0) ? *MaybeLPStart
121                                              : *MaybeLPStart - Address;
122 
123   const uint8_t TTypeEncoding = Data.getU8(&Offset);
124   LSDATypeEncoding = TTypeEncoding;
125   size_t TTypeEncodingSize = 0;
126   uintptr_t TTypeEnd = 0;
127   if (TTypeEncoding != DW_EH_PE_omit) {
128     TTypeEnd = Data.getULEB128(&Offset);
129     TTypeEncodingSize = BC.getDWARFEncodingSize(TTypeEncoding);
130   }
131 
132   if (opts::PrintExceptions) {
133     outs() << "[LSDA at 0x" << Twine::utohexstr(getLSDAAddress())
134            << " for function " << *this << "]:\n";
135     outs() << "LPStart Encoding = 0x" << Twine::utohexstr(LPStartEncoding)
136            << '\n';
137     outs() << "LPStart = 0x" << Twine::utohexstr(LPStart) << '\n';
138     outs() << "TType Encoding = 0x" << Twine::utohexstr(TTypeEncoding) << '\n';
139     outs() << "TType End = " << TTypeEnd << '\n';
140   }
141 
142   // Table to store list of indices in type table. Entries are uleb128 values.
143   const uint64_t TypeIndexTableStart = Offset + TTypeEnd;
144 
145   // Offset past the last decoded index.
146   uint64_t MaxTypeIndexTableOffset = 0;
147 
148   // Max positive index used in type table.
149   unsigned MaxTypeIndex = 0;
150 
151   // The actual type info table starts at the same location, but grows in
152   // opposite direction. TTypeEncoding is used to encode stored values.
153   const uint64_t TypeTableStart = Offset + TTypeEnd;
154 
155   uint8_t CallSiteEncoding = Data.getU8(&Offset);
156   uint32_t CallSiteTableLength = Data.getULEB128(&Offset);
157   uint64_t CallSiteTableStart = Offset;
158   uint64_t CallSiteTableEnd = CallSiteTableStart + CallSiteTableLength;
159   uint64_t CallSitePtr = CallSiteTableStart;
160   uint64_t ActionTableStart = CallSiteTableEnd;
161 
162   if (opts::PrintExceptions) {
163     outs() << "CallSite Encoding = " << (unsigned)CallSiteEncoding << '\n';
164     outs() << "CallSite table length = " << CallSiteTableLength << '\n';
165     outs() << '\n';
166   }
167 
168   this->HasEHRanges = CallSitePtr < CallSiteTableEnd;
169   const uint64_t RangeBase = getAddress();
170   while (CallSitePtr < CallSiteTableEnd) {
171     uint64_t Start = *Data.getEncodedPointer(&CallSitePtr, CallSiteEncoding,
172                                              CallSitePtr + LSDASectionAddress);
173     uint64_t Length = *Data.getEncodedPointer(&CallSitePtr, CallSiteEncoding,
174                                               CallSitePtr + LSDASectionAddress);
175     uint64_t LandingPad = *Data.getEncodedPointer(
176         &CallSitePtr, CallSiteEncoding, CallSitePtr + LSDASectionAddress);
177     uint64_t ActionEntry = Data.getULEB128(&CallSitePtr);
178 
179     uint64_t LPOffset = LPStart + LandingPad;
180     uint64_t LPAddress = Address + LPOffset;
181 
182     // Verify if landing pad code is located outside current function
183     // Support landing pad to builtin_unreachable
184     if (LPAddress < Address || LPAddress > Address + getSize()) {
185       BinaryFunction *Fragment =
186           BC.getBinaryFunctionContainingAddress(LPAddress);
187       assert(Fragment != nullptr &&
188              "BOLT-ERROR: cannot find landing pad fragment");
189       BC.addInterproceduralReference(this, Fragment->getAddress());
190       BC.processInterproceduralReferences();
191       auto isFragmentOf = [](BinaryFunction *Fragment,
192                              BinaryFunction *Parent) -> bool {
193         return (Fragment->isFragment() && Fragment->isParentFragment(Parent));
194       };
195       (void)isFragmentOf;
196       assert((isFragmentOf(this, Fragment) || isFragmentOf(Fragment, this)) &&
197              "BOLT-ERROR: cannot have landing pads in different "
198              "functions");
199       setHasIndirectTargetToSplitFragment(true);
200       BC.addFragmentsToSkip(this);
201       return;
202     }
203 
204     if (opts::PrintExceptions) {
205       outs() << "Call Site: [0x" << Twine::utohexstr(RangeBase + Start)
206              << ", 0x" << Twine::utohexstr(RangeBase + Start + Length)
207              << "); landing pad: 0x" << Twine::utohexstr(LPOffset)
208              << "; action entry: 0x" << Twine::utohexstr(ActionEntry) << "\n";
209       outs() << "  current offset is " << (CallSitePtr - CallSiteTableStart)
210              << '\n';
211     }
212 
213     // Create a handler entry if necessary.
214     MCSymbol *LPSymbol = nullptr;
215     if (LPOffset) {
216       if (!getInstructionAtOffset(LPOffset)) {
217         if (opts::Verbosity >= 1)
218           errs() << "BOLT-WARNING: landing pad " << Twine::utohexstr(LPOffset)
219                  << " not pointing to an instruction in function " << *this
220                  << " - ignoring.\n";
221       } else {
222         auto Label = Labels.find(LPOffset);
223         if (Label != Labels.end()) {
224           LPSymbol = Label->second;
225         } else {
226           LPSymbol = BC.Ctx->createNamedTempSymbol("LP");
227           Labels[LPOffset] = LPSymbol;
228         }
229       }
230     }
231 
232     // Mark all call instructions in the range.
233     auto II = Instructions.find(Start);
234     auto IE = Instructions.end();
235     assert(II != IE && "exception range not pointing to an instruction");
236     do {
237       MCInst &Instruction = II->second;
238       if (BC.MIB->isCall(Instruction) &&
239           !BC.MIB->getConditionalTailCall(Instruction)) {
240         assert(!BC.MIB->isInvoke(Instruction) &&
241                "overlapping exception ranges detected");
242         // Add extra operands to a call instruction making it an invoke from
243         // now on.
244         BC.MIB->addEHInfo(Instruction,
245                           MCPlus::MCLandingPad(LPSymbol, ActionEntry));
246       }
247       ++II;
248     } while (II != IE && II->first < Start + Length);
249 
250     if (ActionEntry != 0) {
251       auto printType = [&](int Index, raw_ostream &OS) {
252         assert(Index > 0 && "only positive indices are valid");
253         uint64_t TTEntry = TypeTableStart - Index * TTypeEncodingSize;
254         const uint64_t TTEntryAddress = TTEntry + LSDASectionAddress;
255         uint64_t TypeAddress =
256             *Data.getEncodedPointer(&TTEntry, TTypeEncoding, TTEntryAddress);
257         if ((TTypeEncoding & DW_EH_PE_pcrel) && TypeAddress == TTEntryAddress)
258           TypeAddress = 0;
259         if (TypeAddress == 0) {
260           OS << "<all>";
261           return;
262         }
263         if (TTypeEncoding & DW_EH_PE_indirect) {
264           ErrorOr<uint64_t> PointerOrErr = BC.getPointerAtAddress(TypeAddress);
265           assert(PointerOrErr && "failed to decode indirect address");
266           TypeAddress = *PointerOrErr;
267         }
268         if (BinaryData *TypeSymBD = BC.getBinaryDataAtAddress(TypeAddress))
269           OS << TypeSymBD->getName();
270         else
271           OS << "0x" << Twine::utohexstr(TypeAddress);
272       };
273       if (opts::PrintExceptions)
274         outs() << "    actions: ";
275       uint64_t ActionPtr = ActionTableStart + ActionEntry - 1;
276       int64_t ActionType;
277       int64_t ActionNext;
278       const char *Sep = "";
279       do {
280         ActionType = Data.getSLEB128(&ActionPtr);
281         const uint32_t Self = ActionPtr;
282         ActionNext = Data.getSLEB128(&ActionPtr);
283         if (opts::PrintExceptions)
284           outs() << Sep << "(" << ActionType << ", " << ActionNext << ") ";
285         if (ActionType == 0) {
286           if (opts::PrintExceptions)
287             outs() << "cleanup";
288         } else if (ActionType > 0) {
289           // It's an index into a type table.
290           MaxTypeIndex =
291               std::max(MaxTypeIndex, static_cast<unsigned>(ActionType));
292           if (opts::PrintExceptions) {
293             outs() << "catch type ";
294             printType(ActionType, outs());
295           }
296         } else { // ActionType < 0
297           if (opts::PrintExceptions)
298             outs() << "filter exception types ";
299           const char *TSep = "";
300           // ActionType is a negative *byte* offset into *uleb128-encoded* table
301           // of indices with base 1.
302           // E.g. -1 means offset 0, -2 is offset 1, etc. The indices are
303           // encoded using uleb128 thus we cannot directly dereference them.
304           uint64_t TypeIndexTablePtr = TypeIndexTableStart - ActionType - 1;
305           while (uint64_t Index = Data.getULEB128(&TypeIndexTablePtr)) {
306             MaxTypeIndex = std::max(MaxTypeIndex, static_cast<unsigned>(Index));
307             if (opts::PrintExceptions) {
308               outs() << TSep;
309               printType(Index, outs());
310               TSep = ", ";
311             }
312           }
313           MaxTypeIndexTableOffset = std::max(
314               MaxTypeIndexTableOffset, TypeIndexTablePtr - TypeIndexTableStart);
315         }
316 
317         Sep = "; ";
318 
319         ActionPtr = Self + ActionNext;
320       } while (ActionNext);
321       if (opts::PrintExceptions)
322         outs() << '\n';
323     }
324   }
325   if (opts::PrintExceptions)
326     outs() << '\n';
327 
328   assert(TypeIndexTableStart + MaxTypeIndexTableOffset <=
329              Data.getData().size() &&
330          "LSDA entry has crossed section boundary");
331 
332   if (TTypeEnd) {
333     LSDAActionTable = LSDASectionData.slice(
334         ActionTableStart, TypeIndexTableStart -
335                               MaxTypeIndex * TTypeEncodingSize -
336                               ActionTableStart);
337     for (unsigned Index = 1; Index <= MaxTypeIndex; ++Index) {
338       uint64_t TTEntry = TypeTableStart - Index * TTypeEncodingSize;
339       const uint64_t TTEntryAddress = TTEntry + LSDASectionAddress;
340       uint64_t TypeAddress =
341           *Data.getEncodedPointer(&TTEntry, TTypeEncoding, TTEntryAddress);
342       if ((TTypeEncoding & DW_EH_PE_pcrel) && (TypeAddress == TTEntryAddress))
343         TypeAddress = 0;
344       if (TTypeEncoding & DW_EH_PE_indirect) {
345         LSDATypeAddressTable.emplace_back(TypeAddress);
346         if (TypeAddress) {
347           ErrorOr<uint64_t> PointerOrErr = BC.getPointerAtAddress(TypeAddress);
348           assert(PointerOrErr && "failed to decode indirect address");
349           TypeAddress = *PointerOrErr;
350         }
351       }
352       LSDATypeTable.emplace_back(TypeAddress);
353     }
354     LSDATypeIndexTable =
355         LSDASectionData.slice(TypeIndexTableStart, MaxTypeIndexTableOffset);
356   }
357 }
358 
359 void BinaryFunction::updateEHRanges() {
360   if (getSize() == 0)
361     return;
362 
363   assert(CurrentState == State::CFG_Finalized && "unexpected state");
364 
365   // Build call sites table.
366   struct EHInfo {
367     const MCSymbol *LP; // landing pad
368     uint64_t Action;
369   };
370 
371   // Sites to update.
372   CallSitesList Sites;
373 
374   for (FunctionFragment &FF : getLayout().fragments()) {
375     // If previous call can throw, this is its exception handler.
376     EHInfo PreviousEH = {nullptr, 0};
377 
378     // Marker for the beginning of exceptions range.
379     const MCSymbol *StartRange = nullptr;
380 
381     for (BinaryBasicBlock *const BB : FF) {
382       for (auto II = BB->begin(); II != BB->end(); ++II) {
383         if (!BC.MIB->isCall(*II))
384           continue;
385 
386         // Instruction can throw an exception that should be handled.
387         const bool Throws = BC.MIB->isInvoke(*II);
388 
389         // Ignore the call if it's a continuation of a no-throw gap.
390         if (!Throws && !StartRange)
391           continue;
392 
393         // Extract exception handling information from the instruction.
394         const MCSymbol *LP = nullptr;
395         uint64_t Action = 0;
396         if (const std::optional<MCPlus::MCLandingPad> EHInfo =
397                 BC.MIB->getEHInfo(*II))
398           std::tie(LP, Action) = *EHInfo;
399 
400         // No action if the exception handler has not changed.
401         if (Throws && StartRange && PreviousEH.LP == LP &&
402             PreviousEH.Action == Action)
403           continue;
404 
405         // Same symbol is used for the beginning and the end of the range.
406         const MCSymbol *EHSymbol;
407         MCInst EHLabel;
408         {
409           std::unique_lock<llvm::sys::RWMutex> Lock(BC.CtxMutex);
410           EHSymbol = BC.Ctx->createNamedTempSymbol("EH");
411           BC.MIB->createEHLabel(EHLabel, EHSymbol, BC.Ctx.get());
412         }
413 
414         II = std::next(BB->insertPseudoInstr(II, EHLabel));
415 
416         // At this point we could be in one of the following states:
417         //
418         // I. Exception handler has changed and we need to close previous range
419         //    and start a new one.
420         //
421         // II. Start a new exception range after the gap.
422         //
423         // III. Close current exception range and start a new gap.
424         const MCSymbol *EndRange;
425         if (StartRange) {
426           // I, III:
427           EndRange = EHSymbol;
428         } else {
429           // II:
430           StartRange = EHSymbol;
431           EndRange = nullptr;
432         }
433 
434         // Close the previous range.
435         if (EndRange)
436           Sites.emplace_back(
437               FF.getFragmentNum(),
438               CallSite{StartRange, EndRange, PreviousEH.LP, PreviousEH.Action});
439 
440         if (Throws) {
441           // I, II:
442           StartRange = EHSymbol;
443           PreviousEH = EHInfo{LP, Action};
444         } else {
445           StartRange = nullptr;
446         }
447       }
448     }
449 
450     // Check if we need to close the range.
451     if (StartRange) {
452       const MCSymbol *EndRange = getFunctionEndLabel(FF.getFragmentNum());
453       Sites.emplace_back(
454           FF.getFragmentNum(),
455           CallSite{StartRange, EndRange, PreviousEH.LP, PreviousEH.Action});
456     }
457   }
458 
459   addCallSites(Sites);
460 }
461 
462 const uint8_t DWARF_CFI_PRIMARY_OPCODE_MASK = 0xc0;
463 
464 CFIReaderWriter::CFIReaderWriter(const DWARFDebugFrame &EHFrame) {
465   // Prepare FDEs for fast lookup
466   for (const dwarf::FrameEntry &Entry : EHFrame.entries()) {
467     const auto *CurFDE = dyn_cast<dwarf::FDE>(&Entry);
468     // Skip CIEs.
469     if (!CurFDE)
470       continue;
471     // There could me multiple FDEs with the same initial address, and perhaps
472     // different sizes (address ranges). Use the first entry with non-zero size.
473     auto FDEI = FDEs.lower_bound(CurFDE->getInitialLocation());
474     if (FDEI != FDEs.end() && FDEI->first == CurFDE->getInitialLocation()) {
475       if (CurFDE->getAddressRange()) {
476         if (FDEI->second->getAddressRange() == 0) {
477           FDEI->second = CurFDE;
478         } else if (opts::Verbosity > 0) {
479           errs() << "BOLT-WARNING: different FDEs for function at 0x"
480                  << Twine::utohexstr(FDEI->first)
481                  << " detected; sizes: " << FDEI->second->getAddressRange()
482                  << " and " << CurFDE->getAddressRange() << '\n';
483         }
484       }
485     } else {
486       FDEs.emplace_hint(FDEI, CurFDE->getInitialLocation(), CurFDE);
487     }
488   }
489 }
490 
491 bool CFIReaderWriter::fillCFIInfoFor(BinaryFunction &Function) const {
492   uint64_t Address = Function.getAddress();
493   auto I = FDEs.find(Address);
494   // Ignore zero-length FDE ranges.
495   if (I == FDEs.end() || !I->second->getAddressRange())
496     return true;
497 
498   const FDE &CurFDE = *I->second;
499   std::optional<uint64_t> LSDA = CurFDE.getLSDAAddress();
500   Function.setLSDAAddress(LSDA ? *LSDA : 0);
501 
502   uint64_t Offset = Function.getFirstInstructionOffset();
503   uint64_t CodeAlignment = CurFDE.getLinkedCIE()->getCodeAlignmentFactor();
504   uint64_t DataAlignment = CurFDE.getLinkedCIE()->getDataAlignmentFactor();
505   if (CurFDE.getLinkedCIE()->getPersonalityAddress()) {
506     Function.setPersonalityFunction(
507         *CurFDE.getLinkedCIE()->getPersonalityAddress());
508     Function.setPersonalityEncoding(
509         *CurFDE.getLinkedCIE()->getPersonalityEncoding());
510   }
511 
512   auto decodeFrameInstruction = [&Function, &Offset, Address, CodeAlignment,
513                                  DataAlignment](
514                                     const CFIProgram::Instruction &Instr) {
515     uint8_t Opcode = Instr.Opcode;
516     if (Opcode & DWARF_CFI_PRIMARY_OPCODE_MASK)
517       Opcode &= DWARF_CFI_PRIMARY_OPCODE_MASK;
518     switch (Instr.Opcode) {
519     case DW_CFA_nop:
520       break;
521     case DW_CFA_advance_loc4:
522     case DW_CFA_advance_loc2:
523     case DW_CFA_advance_loc1:
524     case DW_CFA_advance_loc:
525       // Advance our current address
526       Offset += CodeAlignment * int64_t(Instr.Ops[0]);
527       break;
528     case DW_CFA_offset_extended_sf:
529       Function.addCFIInstruction(
530           Offset,
531           MCCFIInstruction::createOffset(
532               nullptr, Instr.Ops[0], DataAlignment * int64_t(Instr.Ops[1])));
533       break;
534     case DW_CFA_offset_extended:
535     case DW_CFA_offset:
536       Function.addCFIInstruction(
537           Offset, MCCFIInstruction::createOffset(nullptr, Instr.Ops[0],
538                                                  DataAlignment * Instr.Ops[1]));
539       break;
540     case DW_CFA_restore_extended:
541     case DW_CFA_restore:
542       Function.addCFIInstruction(
543           Offset, MCCFIInstruction::createRestore(nullptr, Instr.Ops[0]));
544       break;
545     case DW_CFA_set_loc:
546       assert(Instr.Ops[0] >= Address && "set_loc out of function bounds");
547       assert(Instr.Ops[0] <= Address + Function.getSize() &&
548              "set_loc out of function bounds");
549       Offset = Instr.Ops[0] - Address;
550       break;
551 
552     case DW_CFA_undefined:
553       Function.addCFIInstruction(
554           Offset, MCCFIInstruction::createUndefined(nullptr, Instr.Ops[0]));
555       break;
556     case DW_CFA_same_value:
557       Function.addCFIInstruction(
558           Offset, MCCFIInstruction::createSameValue(nullptr, Instr.Ops[0]));
559       break;
560     case DW_CFA_register:
561       Function.addCFIInstruction(
562           Offset, MCCFIInstruction::createRegister(nullptr, Instr.Ops[0],
563                                                    Instr.Ops[1]));
564       break;
565     case DW_CFA_remember_state:
566       Function.addCFIInstruction(
567           Offset, MCCFIInstruction::createRememberState(nullptr));
568       break;
569     case DW_CFA_restore_state:
570       Function.addCFIInstruction(Offset,
571                                  MCCFIInstruction::createRestoreState(nullptr));
572       break;
573     case DW_CFA_def_cfa:
574       Function.addCFIInstruction(
575           Offset,
576           MCCFIInstruction::cfiDefCfa(nullptr, Instr.Ops[0], Instr.Ops[1]));
577       break;
578     case DW_CFA_def_cfa_sf:
579       Function.addCFIInstruction(
580           Offset,
581           MCCFIInstruction::cfiDefCfa(nullptr, Instr.Ops[0],
582                                       DataAlignment * int64_t(Instr.Ops[1])));
583       break;
584     case DW_CFA_def_cfa_register:
585       Function.addCFIInstruction(Offset, MCCFIInstruction::createDefCfaRegister(
586                                              nullptr, Instr.Ops[0]));
587       break;
588     case DW_CFA_def_cfa_offset:
589       Function.addCFIInstruction(
590           Offset, MCCFIInstruction::cfiDefCfaOffset(nullptr, Instr.Ops[0]));
591       break;
592     case DW_CFA_def_cfa_offset_sf:
593       Function.addCFIInstruction(
594           Offset, MCCFIInstruction::cfiDefCfaOffset(
595                       nullptr, DataAlignment * int64_t(Instr.Ops[0])));
596       break;
597     case DW_CFA_GNU_args_size:
598       Function.addCFIInstruction(
599           Offset, MCCFIInstruction::createGnuArgsSize(nullptr, Instr.Ops[0]));
600       Function.setUsesGnuArgsSize();
601       break;
602     case DW_CFA_val_offset_sf:
603     case DW_CFA_val_offset:
604       if (opts::Verbosity >= 1) {
605         errs() << "BOLT-WARNING: DWARF val_offset() unimplemented\n";
606       }
607       return false;
608     case DW_CFA_def_cfa_expression:
609     case DW_CFA_val_expression:
610     case DW_CFA_expression: {
611       StringRef ExprBytes = Instr.Expression->getData();
612       std::string Str;
613       raw_string_ostream OS(Str);
614       // Manually encode this instruction using CFI escape
615       OS << Opcode;
616       if (Opcode != DW_CFA_def_cfa_expression)
617         encodeULEB128(Instr.Ops[0], OS);
618       encodeULEB128(ExprBytes.size(), OS);
619       OS << ExprBytes;
620       Function.addCFIInstruction(
621           Offset, MCCFIInstruction::createEscape(nullptr, OS.str()));
622       break;
623     }
624     case DW_CFA_MIPS_advance_loc8:
625       if (opts::Verbosity >= 1)
626         errs() << "BOLT-WARNING: DW_CFA_MIPS_advance_loc unimplemented\n";
627       return false;
628     case DW_CFA_GNU_window_save:
629       // DW_CFA_GNU_window_save and DW_CFA_GNU_NegateRAState just use the same
630       // id but mean different things. The latter is used in AArch64.
631       if (Function.getBinaryContext().isAArch64()) {
632         Function.addCFIInstruction(
633             Offset, MCCFIInstruction::createNegateRAState(nullptr));
634         break;
635       }
636       if (opts::Verbosity >= 1)
637         errs() << "BOLT-WARNING: DW_CFA_GNU_window_save unimplemented\n";
638       return false;
639     case DW_CFA_lo_user:
640     case DW_CFA_hi_user:
641       if (opts::Verbosity >= 1)
642         errs() << "BOLT-WARNING: DW_CFA_*_user unimplemented\n";
643       return false;
644     default:
645       if (opts::Verbosity >= 1)
646         errs() << "BOLT-WARNING: Unrecognized CFI instruction: " << Instr.Opcode
647                << '\n';
648       return false;
649     }
650 
651     return true;
652   };
653 
654   for (const CFIProgram::Instruction &Instr : CurFDE.getLinkedCIE()->cfis())
655     if (!decodeFrameInstruction(Instr))
656       return false;
657 
658   for (const CFIProgram::Instruction &Instr : CurFDE.cfis())
659     if (!decodeFrameInstruction(Instr))
660       return false;
661 
662   return true;
663 }
664 
665 std::vector<char> CFIReaderWriter::generateEHFrameHeader(
666     const DWARFDebugFrame &OldEHFrame, const DWARFDebugFrame &NewEHFrame,
667     uint64_t EHFrameHeaderAddress,
668     std::vector<uint64_t> &FailedAddresses) const {
669   // Common PC -> FDE map to be written into .eh_frame_hdr.
670   std::map<uint64_t, uint64_t> PCToFDE;
671 
672   // Presort array for binary search.
673   llvm::sort(FailedAddresses);
674 
675   // Initialize PCToFDE using NewEHFrame.
676   for (dwarf::FrameEntry &Entry : NewEHFrame.entries()) {
677     const dwarf::FDE *FDE = dyn_cast<dwarf::FDE>(&Entry);
678     if (FDE == nullptr)
679       continue;
680     const uint64_t FuncAddress = FDE->getInitialLocation();
681     const uint64_t FDEAddress =
682         NewEHFrame.getEHFrameAddress() + FDE->getOffset();
683 
684     // Ignore unused FDEs.
685     if (FuncAddress == 0)
686       continue;
687 
688     // Add the address to the map unless we failed to write it.
689     if (!std::binary_search(FailedAddresses.begin(), FailedAddresses.end(),
690                             FuncAddress)) {
691       LLVM_DEBUG(dbgs() << "BOLT-DEBUG: FDE for function at 0x"
692                         << Twine::utohexstr(FuncAddress) << " is at 0x"
693                         << Twine::utohexstr(FDEAddress) << '\n');
694       PCToFDE[FuncAddress] = FDEAddress;
695     }
696   };
697 
698   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: new .eh_frame contains "
699                     << llvm::size(NewEHFrame.entries()) << " entries\n");
700 
701   // Add entries from the original .eh_frame corresponding to the functions
702   // that we did not update.
703   for (const dwarf::FrameEntry &Entry : OldEHFrame) {
704     const dwarf::FDE *FDE = dyn_cast<dwarf::FDE>(&Entry);
705     if (FDE == nullptr)
706       continue;
707     const uint64_t FuncAddress = FDE->getInitialLocation();
708     const uint64_t FDEAddress =
709         OldEHFrame.getEHFrameAddress() + FDE->getOffset();
710 
711     // Add the address if we failed to write it.
712     if (PCToFDE.count(FuncAddress) == 0) {
713       LLVM_DEBUG(dbgs() << "BOLT-DEBUG: old FDE for function at 0x"
714                         << Twine::utohexstr(FuncAddress) << " is at 0x"
715                         << Twine::utohexstr(FDEAddress) << '\n');
716       PCToFDE[FuncAddress] = FDEAddress;
717     }
718   };
719 
720   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: old .eh_frame contains "
721                     << llvm::size(OldEHFrame.entries()) << " entries\n");
722 
723   // Generate a new .eh_frame_hdr based on the new map.
724 
725   // Header plus table of entries of size 8 bytes.
726   std::vector<char> EHFrameHeader(12 + PCToFDE.size() * 8);
727 
728   // Version is 1.
729   EHFrameHeader[0] = 1;
730   // Encoding of the eh_frame pointer.
731   EHFrameHeader[1] = DW_EH_PE_pcrel | DW_EH_PE_sdata4;
732   // Encoding of the count field to follow.
733   EHFrameHeader[2] = DW_EH_PE_udata4;
734   // Encoding of the table entries - 4-byte offset from the start of the header.
735   EHFrameHeader[3] = DW_EH_PE_datarel | DW_EH_PE_sdata4;
736 
737   // Address of eh_frame. Use the new one.
738   support::ulittle32_t::ref(EHFrameHeader.data() + 4) =
739       NewEHFrame.getEHFrameAddress() - (EHFrameHeaderAddress + 4);
740 
741   // Number of entries in the table (FDE count).
742   support::ulittle32_t::ref(EHFrameHeader.data() + 8) = PCToFDE.size();
743 
744   // Write the table at offset 12.
745   char *Ptr = EHFrameHeader.data();
746   uint32_t Offset = 12;
747   for (const auto &PCI : PCToFDE) {
748     int64_t InitialPCOffset = PCI.first - EHFrameHeaderAddress;
749     assert(isInt<32>(InitialPCOffset) && "PC offset out of bounds");
750     support::ulittle32_t::ref(Ptr + Offset) = InitialPCOffset;
751     Offset += 4;
752     int64_t FDEOffset = PCI.second - EHFrameHeaderAddress;
753     assert(isInt<32>(FDEOffset) && "FDE offset out of bounds");
754     support::ulittle32_t::ref(Ptr + Offset) = FDEOffset;
755     Offset += 4;
756   }
757 
758   return EHFrameHeader;
759 }
760 
761 Error EHFrameParser::parseCIE(uint64_t StartOffset) {
762   uint8_t Version = Data.getU8(&Offset);
763   const char *Augmentation = Data.getCStr(&Offset);
764   StringRef AugmentationString(Augmentation ? Augmentation : "");
765   uint8_t AddressSize =
766       Version < 4 ? Data.getAddressSize() : Data.getU8(&Offset);
767   Data.setAddressSize(AddressSize);
768   // Skip segment descriptor size
769   if (Version >= 4)
770     Offset += 1;
771   // Skip code alignment factor
772   Data.getULEB128(&Offset);
773   // Skip data alignment
774   Data.getSLEB128(&Offset);
775   // Skip return address register
776   if (Version == 1)
777     Offset += 1;
778   else
779     Data.getULEB128(&Offset);
780 
781   uint32_t FDEPointerEncoding = DW_EH_PE_absptr;
782   uint32_t LSDAPointerEncoding = DW_EH_PE_omit;
783   // Walk the augmentation string to get all the augmentation data.
784   for (unsigned i = 0, e = AugmentationString.size(); i != e; ++i) {
785     switch (AugmentationString[i]) {
786     default:
787       return createStringError(
788           errc::invalid_argument,
789           "unknown augmentation character in entry at 0x%" PRIx64, StartOffset);
790     case 'L':
791       LSDAPointerEncoding = Data.getU8(&Offset);
792       break;
793     case 'P': {
794       uint32_t PersonalityEncoding = Data.getU8(&Offset);
795       std::optional<uint64_t> Personality =
796           Data.getEncodedPointer(&Offset, PersonalityEncoding,
797                                  EHFrameAddress ? EHFrameAddress + Offset : 0);
798       // Patch personality address
799       if (Personality)
800         PatcherCallback(*Personality, Offset, PersonalityEncoding);
801       break;
802     }
803     case 'R':
804       FDEPointerEncoding = Data.getU8(&Offset);
805       break;
806     case 'z':
807       if (i)
808         return createStringError(
809             errc::invalid_argument,
810             "'z' must be the first character at 0x%" PRIx64, StartOffset);
811       // Skip augmentation length
812       Data.getULEB128(&Offset);
813       break;
814     case 'S':
815     case 'B':
816       break;
817     }
818   }
819   Entries.emplace_back(std::make_unique<CIEInfo>(
820       FDEPointerEncoding, LSDAPointerEncoding, AugmentationString));
821   CIEs[StartOffset] = &*Entries.back();
822   return Error::success();
823 }
824 
825 Error EHFrameParser::parseFDE(uint64_t CIEPointer,
826                               uint64_t StartStructureOffset) {
827   std::optional<uint64_t> LSDAAddress;
828   CIEInfo *Cie = CIEs[StartStructureOffset - CIEPointer];
829 
830   // The address size is encoded in the CIE we reference.
831   if (!Cie)
832     return createStringError(errc::invalid_argument,
833                              "parsing FDE data at 0x%" PRIx64
834                              " failed due to missing CIE",
835                              StartStructureOffset);
836   // Patch initial location
837   if (auto Val = Data.getEncodedPointer(&Offset, Cie->FDEPtrEncoding,
838                                         EHFrameAddress + Offset)) {
839     PatcherCallback(*Val, Offset, Cie->FDEPtrEncoding);
840   }
841   // Skip address range
842   Data.getEncodedPointer(&Offset, Cie->FDEPtrEncoding, 0);
843 
844   // Process augmentation data for this FDE.
845   StringRef AugmentationString = Cie->AugmentationString;
846   if (!AugmentationString.empty() && Cie->LSDAPtrEncoding != DW_EH_PE_omit) {
847     // Skip augmentation length
848     Data.getULEB128(&Offset);
849     LSDAAddress =
850         Data.getEncodedPointer(&Offset, Cie->LSDAPtrEncoding,
851                                EHFrameAddress ? Offset + EHFrameAddress : 0);
852     // Patch LSDA address
853     PatcherCallback(*LSDAAddress, Offset, Cie->LSDAPtrEncoding);
854   }
855   return Error::success();
856 }
857 
858 Error EHFrameParser::parse() {
859   while (Data.isValidOffset(Offset)) {
860     const uint64_t StartOffset = Offset;
861 
862     uint64_t Length;
863     DwarfFormat Format;
864     std::tie(Length, Format) = Data.getInitialLength(&Offset);
865 
866     // If the Length is 0, then this CIE is a terminator
867     if (Length == 0)
868       break;
869 
870     const uint64_t StartStructureOffset = Offset;
871     const uint64_t EndStructureOffset = Offset + Length;
872 
873     Error Err = Error::success();
874     const uint64_t Id = Data.getRelocatedValue(4, &Offset,
875                                                /*SectionIndex=*/nullptr, &Err);
876     if (Err)
877       return Err;
878 
879     if (!Id) {
880       if (Error Err = parseCIE(StartOffset))
881         return Err;
882     } else {
883       if (Error Err = parseFDE(Id, StartStructureOffset))
884         return Err;
885     }
886     Offset = EndStructureOffset;
887   }
888 
889   return Error::success();
890 }
891 
892 Error EHFrameParser::parse(DWARFDataExtractor Data, uint64_t EHFrameAddress,
893                            PatcherCallbackTy PatcherCallback) {
894   EHFrameParser Parser(Data, EHFrameAddress, PatcherCallback);
895   return Parser.parse();
896 }
897 
898 } // namespace bolt
899 } // namespace llvm
900