xref: /llvm-project/bolt/lib/Core/Exceptions.cpp (revision 52cf07116bf0a8cab87b0f55176d198bcaa02575)
1 //===- bolt/Core/Exceptions.cpp - Helpers for C++ exceptions --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements functions for handling C++ exception meta data.
10 //
11 // Some of the code is taken from examples/ExceptionDemo
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "bolt/Core/Exceptions.h"
16 #include "bolt/Core/BinaryFunction.h"
17 #include "llvm/ADT/ArrayRef.h"
18 #include "llvm/ADT/Twine.h"
19 #include "llvm/BinaryFormat/Dwarf.h"
20 #include "llvm/DebugInfo/DWARF/DWARFDebugFrame.h"
21 #include "llvm/Support/Casting.h"
22 #include "llvm/Support/CommandLine.h"
23 #include "llvm/Support/Debug.h"
24 #include "llvm/Support/Errc.h"
25 #include "llvm/Support/LEB128.h"
26 #include "llvm/Support/MathExtras.h"
27 #include "llvm/Support/raw_ostream.h"
28 #include <map>
29 
30 #undef  DEBUG_TYPE
31 #define DEBUG_TYPE "bolt-exceptions"
32 
33 using namespace llvm::dwarf;
34 
35 namespace opts {
36 
37 extern llvm::cl::OptionCategory BoltCategory;
38 
39 extern llvm::cl::opt<unsigned> Verbosity;
40 
41 static llvm::cl::opt<bool>
42     PrintExceptions("print-exceptions",
43                     llvm::cl::desc("print exception handling data"),
44                     llvm::cl::Hidden, llvm::cl::cat(BoltCategory));
45 
46 } // namespace opts
47 
48 namespace llvm {
49 namespace bolt {
50 
51 // Read and dump the .gcc_exception_table section entry.
52 //
53 // .gcc_except_table section contains a set of Language-Specific Data Areas -
54 // a fancy name for exception handling tables. There's one  LSDA entry per
55 // function. However, we can't actually tell which function LSDA refers to
56 // unless we parse .eh_frame entry that refers to the LSDA.
57 // Then inside LSDA most addresses are encoded relative to the function start,
58 // so we need the function context in order to get to real addresses.
59 //
60 // The best visual representation of the tables comprising LSDA and
61 // relationships between them is illustrated at:
62 //   https://github.com/itanium-cxx-abi/cxx-abi/blob/master/exceptions.pdf
63 // Keep in mind that GCC implementation deviates slightly from that document.
64 //
65 // To summarize, there are 4 tables in LSDA: call site table, actions table,
66 // types table, and types index table (for indirection). The main table contains
67 // call site entries. Each call site includes a PC range that can throw an
68 // exception, a handler (landing pad), and a reference to an entry in the action
69 // table. The handler and/or action could be 0. The action entry is a head
70 // of a list of actions associated with a call site. The action table contains
71 // all such lists (it could be optimized to share list tails). Each action could
72 // be either to catch an exception of a given type, to perform a cleanup, or to
73 // propagate the exception after filtering it out (e.g. to make sure function
74 // exception specification is not violated). Catch action contains a reference
75 // to an entry in the type table, and filter action refers to an entry in the
76 // type index table to encode a set of types to filter.
77 //
78 // Call site table follows LSDA header. Action table immediately follows the
79 // call site table.
80 //
81 // Both types table and type index table start at the same location, but they
82 // grow in opposite directions (types go up, indices go down). The beginning of
83 // these tables is encoded in LSDA header. Sizes for both of the tables are not
84 // included anywhere.
85 //
86 // We have to parse all of the tables to determine their sizes. Then we have
87 // to parse the call site table and associate discovered information with
88 // actual call instructions and landing pad blocks.
89 //
90 // For the purpose of rewriting exception handling tables, we can reuse action,
91 // and type index tables in their original binary format.
92 //
93 // Type table could be encoded using position-independent references, and thus
94 // may require relocation.
95 //
96 // Ideally we should be able to re-write LSDA in-place, without the need to
97 // allocate a new space for it. Sadly there's no guarantee that the new call
98 // site table will be the same size as GCC uses uleb encodings for PC offsets.
99 //
100 // Note: some functions have LSDA entries with 0 call site entries.
101 Error BinaryFunction::parseLSDA(ArrayRef<uint8_t> LSDASectionData,
102                                 uint64_t LSDASectionAddress) {
103   assert(CurrentState == State::Disassembled && "unexpected function state");
104 
105   if (!getLSDAAddress())
106     return Error::success();
107 
108   DWARFDataExtractor Data(
109       StringRef(reinterpret_cast<const char *>(LSDASectionData.data()),
110                 LSDASectionData.size()),
111       BC.DwCtx->getDWARFObj().isLittleEndian(),
112       BC.DwCtx->getDWARFObj().getAddressSize());
113   uint64_t Offset = getLSDAAddress() - LSDASectionAddress;
114   assert(Data.isValidOffset(Offset) && "wrong LSDA address");
115 
116   const uint8_t LPStartEncoding = Data.getU8(&Offset);
117   uint64_t LPStart = Address;
118   if (LPStartEncoding != dwarf::DW_EH_PE_omit) {
119     std::optional<uint64_t> MaybeLPStart = Data.getEncodedPointer(
120         &Offset, LPStartEncoding, Offset + LSDASectionAddress);
121     if (!MaybeLPStart) {
122       BC.errs() << "BOLT-ERROR: unsupported LPStartEncoding: "
123                 << (unsigned)LPStartEncoding << '\n';
124       return createFatalBOLTError("");
125     }
126     LPStart = *MaybeLPStart;
127   }
128 
129   const uint8_t TTypeEncoding = Data.getU8(&Offset);
130   LSDATypeEncoding = TTypeEncoding;
131   size_t TTypeEncodingSize = 0;
132   uintptr_t TTypeEnd = 0;
133   if (TTypeEncoding != DW_EH_PE_omit) {
134     TTypeEnd = Data.getULEB128(&Offset);
135     TTypeEncodingSize = BC.getDWARFEncodingSize(TTypeEncoding);
136   }
137 
138   if (opts::PrintExceptions) {
139     BC.outs() << "[LSDA at 0x" << Twine::utohexstr(getLSDAAddress())
140               << " for function " << *this << "]:\n";
141     BC.outs() << "LPStart Encoding = 0x" << Twine::utohexstr(LPStartEncoding)
142               << '\n';
143     BC.outs() << "LPStart = 0x" << Twine::utohexstr(LPStart) << '\n';
144     BC.outs() << "TType Encoding = 0x" << Twine::utohexstr(TTypeEncoding)
145               << '\n';
146     BC.outs() << "TType End = " << TTypeEnd << '\n';
147   }
148 
149   // Table to store list of indices in type table. Entries are uleb128 values.
150   const uint64_t TypeIndexTableStart = Offset + TTypeEnd;
151 
152   // Offset past the last decoded index.
153   uint64_t MaxTypeIndexTableOffset = 0;
154 
155   // Max positive index used in type table.
156   unsigned MaxTypeIndex = 0;
157 
158   // The actual type info table starts at the same location, but grows in
159   // opposite direction. TTypeEncoding is used to encode stored values.
160   const uint64_t TypeTableStart = Offset + TTypeEnd;
161 
162   uint8_t CallSiteEncoding = Data.getU8(&Offset);
163   uint32_t CallSiteTableLength = Data.getULEB128(&Offset);
164   uint64_t CallSiteTableStart = Offset;
165   uint64_t CallSiteTableEnd = CallSiteTableStart + CallSiteTableLength;
166   uint64_t CallSitePtr = CallSiteTableStart;
167   uint64_t ActionTableStart = CallSiteTableEnd;
168 
169   if (opts::PrintExceptions) {
170     BC.outs() << "CallSite Encoding = " << (unsigned)CallSiteEncoding << '\n';
171     BC.outs() << "CallSite table length = " << CallSiteTableLength << '\n';
172     BC.outs() << '\n';
173   }
174 
175   this->HasEHRanges = CallSitePtr < CallSiteTableEnd;
176   const uint64_t RangeBase = getAddress();
177   while (CallSitePtr < CallSiteTableEnd) {
178     uint64_t Start = *Data.getEncodedPointer(&CallSitePtr, CallSiteEncoding,
179                                              CallSitePtr + LSDASectionAddress);
180     uint64_t Length = *Data.getEncodedPointer(&CallSitePtr, CallSiteEncoding,
181                                               CallSitePtr + LSDASectionAddress);
182     uint64_t LandingPad = *Data.getEncodedPointer(
183         &CallSitePtr, CallSiteEncoding, CallSitePtr + LSDASectionAddress);
184     uint64_t ActionEntry = Data.getULEB128(&CallSitePtr);
185     if (LandingPad)
186       LandingPad += LPStart;
187 
188     if (opts::PrintExceptions) {
189       BC.outs() << "Call Site: [0x" << Twine::utohexstr(RangeBase + Start)
190                 << ", 0x" << Twine::utohexstr(RangeBase + Start + Length)
191                 << "); landing pad: 0x" << Twine::utohexstr(LandingPad)
192                 << "; action entry: 0x" << Twine::utohexstr(ActionEntry)
193                 << "\n";
194       BC.outs() << "  current offset is " << (CallSitePtr - CallSiteTableStart)
195                 << '\n';
196     }
197 
198     // Create a handler entry if necessary.
199     MCSymbol *LPSymbol = nullptr;
200     if (LandingPad) {
201       // Verify if landing pad code is located outside current function
202       // Support landing pad to builtin_unreachable
203       if (LandingPad < Address || LandingPad > Address + getSize()) {
204         BinaryFunction *Fragment =
205             BC.getBinaryFunctionContainingAddress(LandingPad);
206         assert(Fragment != nullptr &&
207                "BOLT-ERROR: cannot find landing pad fragment");
208         BC.addInterproceduralReference(this, Fragment->getAddress());
209         BC.processInterproceduralReferences();
210         assert(isParentOrChildOf(*Fragment) &&
211                "BOLT-ERROR: cannot have landing pads in different functions");
212         setHasIndirectTargetToSplitFragment(true);
213         BC.addFragmentsToSkip(this);
214         return Error::success();
215       }
216 
217       const uint64_t LPOffset = LandingPad - getAddress();
218       if (!getInstructionAtOffset(LPOffset)) {
219         if (opts::Verbosity >= 1)
220           BC.errs() << "BOLT-WARNING: landing pad "
221                     << Twine::utohexstr(LPOffset)
222                     << " not pointing to an instruction in function " << *this
223                     << " - ignoring.\n";
224       } else {
225         auto Label = Labels.find(LPOffset);
226         if (Label != Labels.end()) {
227           LPSymbol = Label->second;
228         } else {
229           LPSymbol = BC.Ctx->createNamedTempSymbol("LP");
230           Labels[LPOffset] = LPSymbol;
231         }
232       }
233     }
234 
235     // Mark all call instructions in the range.
236     auto II = Instructions.find(Start);
237     auto IE = Instructions.end();
238     assert(II != IE && "exception range not pointing to an instruction");
239     do {
240       MCInst &Instruction = II->second;
241       if (BC.MIB->isCall(Instruction) &&
242           !BC.MIB->getConditionalTailCall(Instruction)) {
243         assert(!BC.MIB->isInvoke(Instruction) &&
244                "overlapping exception ranges detected");
245         // Add extra operands to a call instruction making it an invoke from
246         // now on.
247         BC.MIB->addEHInfo(Instruction,
248                           MCPlus::MCLandingPad(LPSymbol, ActionEntry));
249       }
250       ++II;
251     } while (II != IE && II->first < Start + Length);
252 
253     if (ActionEntry != 0) {
254       auto printType = [&](int Index, raw_ostream &OS) {
255         assert(Index > 0 && "only positive indices are valid");
256         uint64_t TTEntry = TypeTableStart - Index * TTypeEncodingSize;
257         const uint64_t TTEntryAddress = TTEntry + LSDASectionAddress;
258         uint64_t TypeAddress =
259             *Data.getEncodedPointer(&TTEntry, TTypeEncoding, TTEntryAddress);
260         if ((TTypeEncoding & DW_EH_PE_pcrel) && TypeAddress == TTEntryAddress)
261           TypeAddress = 0;
262         if (TypeAddress == 0) {
263           OS << "<all>";
264           return;
265         }
266         if (TTypeEncoding & DW_EH_PE_indirect) {
267           ErrorOr<uint64_t> PointerOrErr = BC.getPointerAtAddress(TypeAddress);
268           assert(PointerOrErr && "failed to decode indirect address");
269           TypeAddress = *PointerOrErr;
270         }
271         if (BinaryData *TypeSymBD = BC.getBinaryDataAtAddress(TypeAddress))
272           OS << TypeSymBD->getName();
273         else
274           OS << "0x" << Twine::utohexstr(TypeAddress);
275       };
276       if (opts::PrintExceptions)
277         BC.outs() << "    actions: ";
278       uint64_t ActionPtr = ActionTableStart + ActionEntry - 1;
279       int64_t ActionType;
280       int64_t ActionNext;
281       const char *Sep = "";
282       do {
283         ActionType = Data.getSLEB128(&ActionPtr);
284         const uint32_t Self = ActionPtr;
285         ActionNext = Data.getSLEB128(&ActionPtr);
286         if (opts::PrintExceptions)
287           BC.outs() << Sep << "(" << ActionType << ", " << ActionNext << ") ";
288         if (ActionType == 0) {
289           if (opts::PrintExceptions)
290             BC.outs() << "cleanup";
291         } else if (ActionType > 0) {
292           // It's an index into a type table.
293           MaxTypeIndex =
294               std::max(MaxTypeIndex, static_cast<unsigned>(ActionType));
295           if (opts::PrintExceptions) {
296             BC.outs() << "catch type ";
297             printType(ActionType, BC.outs());
298           }
299         } else { // ActionType < 0
300           if (opts::PrintExceptions)
301             BC.outs() << "filter exception types ";
302           const char *TSep = "";
303           // ActionType is a negative *byte* offset into *uleb128-encoded* table
304           // of indices with base 1.
305           // E.g. -1 means offset 0, -2 is offset 1, etc. The indices are
306           // encoded using uleb128 thus we cannot directly dereference them.
307           uint64_t TypeIndexTablePtr = TypeIndexTableStart - ActionType - 1;
308           while (uint64_t Index = Data.getULEB128(&TypeIndexTablePtr)) {
309             MaxTypeIndex = std::max(MaxTypeIndex, static_cast<unsigned>(Index));
310             if (opts::PrintExceptions) {
311               BC.outs() << TSep;
312               printType(Index, BC.outs());
313               TSep = ", ";
314             }
315           }
316           MaxTypeIndexTableOffset = std::max(
317               MaxTypeIndexTableOffset, TypeIndexTablePtr - TypeIndexTableStart);
318         }
319 
320         Sep = "; ";
321 
322         ActionPtr = Self + ActionNext;
323       } while (ActionNext);
324       if (opts::PrintExceptions)
325         BC.outs() << '\n';
326     }
327   }
328   if (opts::PrintExceptions)
329     BC.outs() << '\n';
330 
331   assert(TypeIndexTableStart + MaxTypeIndexTableOffset <=
332              Data.getData().size() &&
333          "LSDA entry has crossed section boundary");
334 
335   if (TTypeEnd) {
336     LSDAActionTable = LSDASectionData.slice(
337         ActionTableStart, TypeIndexTableStart -
338                               MaxTypeIndex * TTypeEncodingSize -
339                               ActionTableStart);
340     for (unsigned Index = 1; Index <= MaxTypeIndex; ++Index) {
341       uint64_t TTEntry = TypeTableStart - Index * TTypeEncodingSize;
342       const uint64_t TTEntryAddress = TTEntry + LSDASectionAddress;
343       uint64_t TypeAddress =
344           *Data.getEncodedPointer(&TTEntry, TTypeEncoding, TTEntryAddress);
345       if ((TTypeEncoding & DW_EH_PE_pcrel) && (TypeAddress == TTEntryAddress))
346         TypeAddress = 0;
347       if (TTypeEncoding & DW_EH_PE_indirect) {
348         LSDATypeAddressTable.emplace_back(TypeAddress);
349         if (TypeAddress) {
350           ErrorOr<uint64_t> PointerOrErr = BC.getPointerAtAddress(TypeAddress);
351           assert(PointerOrErr && "failed to decode indirect address");
352           TypeAddress = *PointerOrErr;
353         }
354       }
355       LSDATypeTable.emplace_back(TypeAddress);
356     }
357     LSDATypeIndexTable =
358         LSDASectionData.slice(TypeIndexTableStart, MaxTypeIndexTableOffset);
359   }
360   return Error::success();
361 }
362 
363 void BinaryFunction::updateEHRanges() {
364   if (getSize() == 0)
365     return;
366 
367   assert(CurrentState == State::CFG_Finalized && "unexpected state");
368 
369   // Build call sites table.
370   struct EHInfo {
371     const MCSymbol *LP; // landing pad
372     uint64_t Action;
373   };
374 
375   // Sites to update.
376   CallSitesList Sites;
377 
378   for (FunctionFragment &FF : getLayout().fragments()) {
379     // If previous call can throw, this is its exception handler.
380     EHInfo PreviousEH = {nullptr, 0};
381 
382     // Marker for the beginning of exceptions range.
383     const MCSymbol *StartRange = nullptr;
384 
385     for (BinaryBasicBlock *const BB : FF) {
386       for (MCInst &Instr : *BB) {
387         if (!BC.MIB->isCall(Instr))
388           continue;
389 
390         // Instruction can throw an exception that should be handled.
391         const bool Throws = BC.MIB->isInvoke(Instr);
392 
393         // Ignore the call if it's a continuation of a no-throw gap.
394         if (!Throws && !StartRange)
395           continue;
396 
397         // Extract exception handling information from the instruction.
398         const MCSymbol *LP = nullptr;
399         uint64_t Action = 0;
400         if (const std::optional<MCPlus::MCLandingPad> EHInfo =
401                 BC.MIB->getEHInfo(Instr))
402           std::tie(LP, Action) = *EHInfo;
403 
404         // No action if the exception handler has not changed.
405         if (Throws && StartRange && PreviousEH.LP == LP &&
406             PreviousEH.Action == Action)
407           continue;
408 
409         // Same symbol is used for the beginning and the end of the range.
410         MCSymbol *EHSymbol;
411         if (MCSymbol *InstrLabel = BC.MIB->getLabel(Instr)) {
412           EHSymbol = InstrLabel;
413         } else {
414           std::unique_lock<llvm::sys::RWMutex> Lock(BC.CtxMutex);
415           EHSymbol = BC.Ctx->createNamedTempSymbol("EH");
416           BC.MIB->setLabel(Instr, EHSymbol);
417         }
418 
419         // At this point we could be in one of the following states:
420         //
421         // I. Exception handler has changed and we need to close previous range
422         //    and start a new one.
423         //
424         // II. Start a new exception range after the gap.
425         //
426         // III. Close current exception range and start a new gap.
427         const MCSymbol *EndRange;
428         if (StartRange) {
429           // I, III:
430           EndRange = EHSymbol;
431         } else {
432           // II:
433           StartRange = EHSymbol;
434           EndRange = nullptr;
435         }
436 
437         // Close the previous range.
438         if (EndRange)
439           Sites.emplace_back(
440               FF.getFragmentNum(),
441               CallSite{StartRange, EndRange, PreviousEH.LP, PreviousEH.Action});
442 
443         if (Throws) {
444           // I, II:
445           StartRange = EHSymbol;
446           PreviousEH = EHInfo{LP, Action};
447         } else {
448           StartRange = nullptr;
449         }
450       }
451     }
452 
453     // Check if we need to close the range.
454     if (StartRange) {
455       const MCSymbol *EndRange = getFunctionEndLabel(FF.getFragmentNum());
456       Sites.emplace_back(
457           FF.getFragmentNum(),
458           CallSite{StartRange, EndRange, PreviousEH.LP, PreviousEH.Action});
459     }
460   }
461 
462   addCallSites(Sites);
463 }
464 
465 const uint8_t DWARF_CFI_PRIMARY_OPCODE_MASK = 0xc0;
466 
467 CFIReaderWriter::CFIReaderWriter(BinaryContext &BC,
468                                  const DWARFDebugFrame &EHFrame)
469     : BC(BC) {
470   // Prepare FDEs for fast lookup
471   for (const dwarf::FrameEntry &Entry : EHFrame.entries()) {
472     const auto *CurFDE = dyn_cast<dwarf::FDE>(&Entry);
473     // Skip CIEs.
474     if (!CurFDE)
475       continue;
476     // There could me multiple FDEs with the same initial address, and perhaps
477     // different sizes (address ranges). Use the first entry with non-zero size.
478     auto FDEI = FDEs.lower_bound(CurFDE->getInitialLocation());
479     if (FDEI != FDEs.end() && FDEI->first == CurFDE->getInitialLocation()) {
480       if (CurFDE->getAddressRange()) {
481         if (FDEI->second->getAddressRange() == 0) {
482           FDEI->second = CurFDE;
483         } else if (opts::Verbosity > 0) {
484           BC.errs() << "BOLT-WARNING: different FDEs for function at 0x"
485                     << Twine::utohexstr(FDEI->first)
486                     << " detected; sizes: " << FDEI->second->getAddressRange()
487                     << " and " << CurFDE->getAddressRange() << '\n';
488         }
489       }
490     } else {
491       FDEs.emplace_hint(FDEI, CurFDE->getInitialLocation(), CurFDE);
492     }
493   }
494 }
495 
496 bool CFIReaderWriter::fillCFIInfoFor(BinaryFunction &Function) const {
497   uint64_t Address = Function.getAddress();
498   auto I = FDEs.find(Address);
499   // Ignore zero-length FDE ranges.
500   if (I == FDEs.end() || !I->second->getAddressRange())
501     return true;
502 
503   const FDE &CurFDE = *I->second;
504   std::optional<uint64_t> LSDA = CurFDE.getLSDAAddress();
505   Function.setLSDAAddress(LSDA ? *LSDA : 0);
506 
507   uint64_t Offset = Function.getFirstInstructionOffset();
508   uint64_t CodeAlignment = CurFDE.getLinkedCIE()->getCodeAlignmentFactor();
509   uint64_t DataAlignment = CurFDE.getLinkedCIE()->getDataAlignmentFactor();
510   if (CurFDE.getLinkedCIE()->getPersonalityAddress()) {
511     Function.setPersonalityFunction(
512         *CurFDE.getLinkedCIE()->getPersonalityAddress());
513     Function.setPersonalityEncoding(
514         *CurFDE.getLinkedCIE()->getPersonalityEncoding());
515   }
516 
517   auto decodeFrameInstruction = [this, &Function, &Offset, Address,
518                                  CodeAlignment, DataAlignment](
519                                     const CFIProgram::Instruction &Instr) {
520     uint8_t Opcode = Instr.Opcode;
521     if (Opcode & DWARF_CFI_PRIMARY_OPCODE_MASK)
522       Opcode &= DWARF_CFI_PRIMARY_OPCODE_MASK;
523     switch (Instr.Opcode) {
524     case DW_CFA_nop:
525       break;
526     case DW_CFA_advance_loc4:
527     case DW_CFA_advance_loc2:
528     case DW_CFA_advance_loc1:
529     case DW_CFA_advance_loc:
530       // Advance our current address
531       Offset += CodeAlignment * int64_t(Instr.Ops[0]);
532       break;
533     case DW_CFA_offset_extended_sf:
534       Function.addCFIInstruction(
535           Offset,
536           MCCFIInstruction::createOffset(
537               nullptr, Instr.Ops[0], DataAlignment * int64_t(Instr.Ops[1])));
538       break;
539     case DW_CFA_offset_extended:
540     case DW_CFA_offset:
541       Function.addCFIInstruction(
542           Offset, MCCFIInstruction::createOffset(nullptr, Instr.Ops[0],
543                                                  DataAlignment * Instr.Ops[1]));
544       break;
545     case DW_CFA_restore_extended:
546     case DW_CFA_restore:
547       Function.addCFIInstruction(
548           Offset, MCCFIInstruction::createRestore(nullptr, Instr.Ops[0]));
549       break;
550     case DW_CFA_set_loc:
551       assert(Instr.Ops[0] >= Address && "set_loc out of function bounds");
552       assert(Instr.Ops[0] <= Address + Function.getSize() &&
553              "set_loc out of function bounds");
554       Offset = Instr.Ops[0] - Address;
555       break;
556 
557     case DW_CFA_undefined:
558       Function.addCFIInstruction(
559           Offset, MCCFIInstruction::createUndefined(nullptr, Instr.Ops[0]));
560       break;
561     case DW_CFA_same_value:
562       Function.addCFIInstruction(
563           Offset, MCCFIInstruction::createSameValue(nullptr, Instr.Ops[0]));
564       break;
565     case DW_CFA_register:
566       Function.addCFIInstruction(
567           Offset, MCCFIInstruction::createRegister(nullptr, Instr.Ops[0],
568                                                    Instr.Ops[1]));
569       break;
570     case DW_CFA_remember_state:
571       Function.addCFIInstruction(
572           Offset, MCCFIInstruction::createRememberState(nullptr));
573       break;
574     case DW_CFA_restore_state:
575       Function.addCFIInstruction(Offset,
576                                  MCCFIInstruction::createRestoreState(nullptr));
577       break;
578     case DW_CFA_def_cfa:
579       Function.addCFIInstruction(
580           Offset,
581           MCCFIInstruction::cfiDefCfa(nullptr, Instr.Ops[0], Instr.Ops[1]));
582       break;
583     case DW_CFA_def_cfa_sf:
584       Function.addCFIInstruction(
585           Offset,
586           MCCFIInstruction::cfiDefCfa(nullptr, Instr.Ops[0],
587                                       DataAlignment * int64_t(Instr.Ops[1])));
588       break;
589     case DW_CFA_def_cfa_register:
590       Function.addCFIInstruction(Offset, MCCFIInstruction::createDefCfaRegister(
591                                              nullptr, Instr.Ops[0]));
592       break;
593     case DW_CFA_def_cfa_offset:
594       Function.addCFIInstruction(
595           Offset, MCCFIInstruction::cfiDefCfaOffset(nullptr, Instr.Ops[0]));
596       break;
597     case DW_CFA_def_cfa_offset_sf:
598       Function.addCFIInstruction(
599           Offset, MCCFIInstruction::cfiDefCfaOffset(
600                       nullptr, DataAlignment * int64_t(Instr.Ops[0])));
601       break;
602     case DW_CFA_GNU_args_size:
603       Function.addCFIInstruction(
604           Offset, MCCFIInstruction::createGnuArgsSize(nullptr, Instr.Ops[0]));
605       Function.setUsesGnuArgsSize();
606       break;
607     case DW_CFA_val_offset_sf:
608     case DW_CFA_val_offset:
609       if (opts::Verbosity >= 1) {
610         BC.errs() << "BOLT-WARNING: DWARF val_offset() unimplemented\n";
611       }
612       return false;
613     case DW_CFA_def_cfa_expression:
614     case DW_CFA_val_expression:
615     case DW_CFA_expression: {
616       StringRef ExprBytes = Instr.Expression->getData();
617       std::string Str;
618       raw_string_ostream OS(Str);
619       // Manually encode this instruction using CFI escape
620       OS << Opcode;
621       if (Opcode != DW_CFA_def_cfa_expression)
622         encodeULEB128(Instr.Ops[0], OS);
623       encodeULEB128(ExprBytes.size(), OS);
624       OS << ExprBytes;
625       Function.addCFIInstruction(
626           Offset, MCCFIInstruction::createEscape(nullptr, OS.str()));
627       break;
628     }
629     case DW_CFA_MIPS_advance_loc8:
630       if (opts::Verbosity >= 1)
631         BC.errs() << "BOLT-WARNING: DW_CFA_MIPS_advance_loc unimplemented\n";
632       return false;
633     case DW_CFA_GNU_window_save:
634       // DW_CFA_GNU_window_save and DW_CFA_GNU_NegateRAState just use the same
635       // id but mean different things. The latter is used in AArch64.
636       if (Function.getBinaryContext().isAArch64()) {
637         Function.addCFIInstruction(
638             Offset, MCCFIInstruction::createNegateRAState(nullptr));
639         break;
640       }
641       if (opts::Verbosity >= 1)
642         BC.errs() << "BOLT-WARNING: DW_CFA_GNU_window_save unimplemented\n";
643       return false;
644     case DW_CFA_lo_user:
645     case DW_CFA_hi_user:
646       if (opts::Verbosity >= 1)
647         BC.errs() << "BOLT-WARNING: DW_CFA_*_user unimplemented\n";
648       return false;
649     default:
650       if (opts::Verbosity >= 1)
651         BC.errs() << "BOLT-WARNING: Unrecognized CFI instruction: "
652                   << Instr.Opcode << '\n';
653       return false;
654     }
655 
656     return true;
657   };
658 
659   for (const CFIProgram::Instruction &Instr : CurFDE.getLinkedCIE()->cfis())
660     if (!decodeFrameInstruction(Instr))
661       return false;
662 
663   for (const CFIProgram::Instruction &Instr : CurFDE.cfis())
664     if (!decodeFrameInstruction(Instr))
665       return false;
666 
667   return true;
668 }
669 
670 std::vector<char> CFIReaderWriter::generateEHFrameHeader(
671     const DWARFDebugFrame &OldEHFrame, const DWARFDebugFrame &NewEHFrame,
672     uint64_t EHFrameHeaderAddress,
673     std::vector<uint64_t> &FailedAddresses) const {
674   // Common PC -> FDE map to be written into .eh_frame_hdr.
675   std::map<uint64_t, uint64_t> PCToFDE;
676 
677   // Presort array for binary search.
678   llvm::sort(FailedAddresses);
679 
680   // Initialize PCToFDE using NewEHFrame.
681   for (dwarf::FrameEntry &Entry : NewEHFrame.entries()) {
682     const dwarf::FDE *FDE = dyn_cast<dwarf::FDE>(&Entry);
683     if (FDE == nullptr)
684       continue;
685     const uint64_t FuncAddress = FDE->getInitialLocation();
686     const uint64_t FDEAddress =
687         NewEHFrame.getEHFrameAddress() + FDE->getOffset();
688 
689     // Ignore unused FDEs.
690     if (FuncAddress == 0)
691       continue;
692 
693     // Add the address to the map unless we failed to write it.
694     if (!std::binary_search(FailedAddresses.begin(), FailedAddresses.end(),
695                             FuncAddress)) {
696       LLVM_DEBUG(dbgs() << "BOLT-DEBUG: FDE for function at 0x"
697                         << Twine::utohexstr(FuncAddress) << " is at 0x"
698                         << Twine::utohexstr(FDEAddress) << '\n');
699       PCToFDE[FuncAddress] = FDEAddress;
700     }
701   };
702 
703   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: new .eh_frame contains "
704                     << llvm::size(NewEHFrame.entries()) << " entries\n");
705 
706   // Add entries from the original .eh_frame corresponding to the functions
707   // that we did not update.
708   for (const dwarf::FrameEntry &Entry : OldEHFrame) {
709     const dwarf::FDE *FDE = dyn_cast<dwarf::FDE>(&Entry);
710     if (FDE == nullptr)
711       continue;
712     const uint64_t FuncAddress = FDE->getInitialLocation();
713     const uint64_t FDEAddress =
714         OldEHFrame.getEHFrameAddress() + FDE->getOffset();
715 
716     // Add the address if we failed to write it.
717     if (PCToFDE.count(FuncAddress) == 0) {
718       LLVM_DEBUG(dbgs() << "BOLT-DEBUG: old FDE for function at 0x"
719                         << Twine::utohexstr(FuncAddress) << " is at 0x"
720                         << Twine::utohexstr(FDEAddress) << '\n');
721       PCToFDE[FuncAddress] = FDEAddress;
722     }
723   };
724 
725   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: old .eh_frame contains "
726                     << llvm::size(OldEHFrame.entries()) << " entries\n");
727 
728   // Generate a new .eh_frame_hdr based on the new map.
729 
730   // Header plus table of entries of size 8 bytes.
731   std::vector<char> EHFrameHeader(12 + PCToFDE.size() * 8);
732 
733   // Version is 1.
734   EHFrameHeader[0] = 1;
735   // Encoding of the eh_frame pointer.
736   EHFrameHeader[1] = DW_EH_PE_pcrel | DW_EH_PE_sdata4;
737   // Encoding of the count field to follow.
738   EHFrameHeader[2] = DW_EH_PE_udata4;
739   // Encoding of the table entries - 4-byte offset from the start of the header.
740   EHFrameHeader[3] = DW_EH_PE_datarel | DW_EH_PE_sdata4;
741 
742   // Address of eh_frame. Use the new one.
743   support::ulittle32_t::ref(EHFrameHeader.data() + 4) =
744       NewEHFrame.getEHFrameAddress() - (EHFrameHeaderAddress + 4);
745 
746   // Number of entries in the table (FDE count).
747   support::ulittle32_t::ref(EHFrameHeader.data() + 8) = PCToFDE.size();
748 
749   // Write the table at offset 12.
750   char *Ptr = EHFrameHeader.data();
751   uint32_t Offset = 12;
752   for (const auto &PCI : PCToFDE) {
753     int64_t InitialPCOffset = PCI.first - EHFrameHeaderAddress;
754     assert(isInt<32>(InitialPCOffset) && "PC offset out of bounds");
755     support::ulittle32_t::ref(Ptr + Offset) = InitialPCOffset;
756     Offset += 4;
757     int64_t FDEOffset = PCI.second - EHFrameHeaderAddress;
758     assert(isInt<32>(FDEOffset) && "FDE offset out of bounds");
759     support::ulittle32_t::ref(Ptr + Offset) = FDEOffset;
760     Offset += 4;
761   }
762 
763   return EHFrameHeader;
764 }
765 
766 Error EHFrameParser::parseCIE(uint64_t StartOffset) {
767   uint8_t Version = Data.getU8(&Offset);
768   const char *Augmentation = Data.getCStr(&Offset);
769   StringRef AugmentationString(Augmentation ? Augmentation : "");
770   uint8_t AddressSize =
771       Version < 4 ? Data.getAddressSize() : Data.getU8(&Offset);
772   Data.setAddressSize(AddressSize);
773   // Skip segment descriptor size
774   if (Version >= 4)
775     Offset += 1;
776   // Skip code alignment factor
777   Data.getULEB128(&Offset);
778   // Skip data alignment
779   Data.getSLEB128(&Offset);
780   // Skip return address register
781   if (Version == 1)
782     Offset += 1;
783   else
784     Data.getULEB128(&Offset);
785 
786   uint32_t FDEPointerEncoding = DW_EH_PE_absptr;
787   uint32_t LSDAPointerEncoding = DW_EH_PE_omit;
788   // Walk the augmentation string to get all the augmentation data.
789   for (unsigned i = 0, e = AugmentationString.size(); i != e; ++i) {
790     switch (AugmentationString[i]) {
791     default:
792       return createStringError(
793           errc::invalid_argument,
794           "unknown augmentation character in entry at 0x%" PRIx64, StartOffset);
795     case 'L':
796       LSDAPointerEncoding = Data.getU8(&Offset);
797       break;
798     case 'P': {
799       uint32_t PersonalityEncoding = Data.getU8(&Offset);
800       std::optional<uint64_t> Personality =
801           Data.getEncodedPointer(&Offset, PersonalityEncoding,
802                                  EHFrameAddress ? EHFrameAddress + Offset : 0);
803       // Patch personality address
804       if (Personality)
805         PatcherCallback(*Personality, Offset, PersonalityEncoding);
806       break;
807     }
808     case 'R':
809       FDEPointerEncoding = Data.getU8(&Offset);
810       break;
811     case 'z':
812       if (i)
813         return createStringError(
814             errc::invalid_argument,
815             "'z' must be the first character at 0x%" PRIx64, StartOffset);
816       // Skip augmentation length
817       Data.getULEB128(&Offset);
818       break;
819     case 'S':
820     case 'B':
821       break;
822     }
823   }
824   Entries.emplace_back(std::make_unique<CIEInfo>(
825       FDEPointerEncoding, LSDAPointerEncoding, AugmentationString));
826   CIEs[StartOffset] = &*Entries.back();
827   return Error::success();
828 }
829 
830 Error EHFrameParser::parseFDE(uint64_t CIEPointer,
831                               uint64_t StartStructureOffset) {
832   std::optional<uint64_t> LSDAAddress;
833   CIEInfo *Cie = CIEs[StartStructureOffset - CIEPointer];
834 
835   // The address size is encoded in the CIE we reference.
836   if (!Cie)
837     return createStringError(errc::invalid_argument,
838                              "parsing FDE data at 0x%" PRIx64
839                              " failed due to missing CIE",
840                              StartStructureOffset);
841   // Patch initial location
842   if (auto Val = Data.getEncodedPointer(&Offset, Cie->FDEPtrEncoding,
843                                         EHFrameAddress + Offset)) {
844     PatcherCallback(*Val, Offset, Cie->FDEPtrEncoding);
845   }
846   // Skip address range
847   Data.getEncodedPointer(&Offset, Cie->FDEPtrEncoding, 0);
848 
849   // Process augmentation data for this FDE.
850   StringRef AugmentationString = Cie->AugmentationString;
851   if (!AugmentationString.empty() && Cie->LSDAPtrEncoding != DW_EH_PE_omit) {
852     // Skip augmentation length
853     Data.getULEB128(&Offset);
854     LSDAAddress =
855         Data.getEncodedPointer(&Offset, Cie->LSDAPtrEncoding,
856                                EHFrameAddress ? Offset + EHFrameAddress : 0);
857     // Patch LSDA address
858     PatcherCallback(*LSDAAddress, Offset, Cie->LSDAPtrEncoding);
859   }
860   return Error::success();
861 }
862 
863 Error EHFrameParser::parse() {
864   while (Data.isValidOffset(Offset)) {
865     const uint64_t StartOffset = Offset;
866 
867     uint64_t Length;
868     DwarfFormat Format;
869     std::tie(Length, Format) = Data.getInitialLength(&Offset);
870 
871     // If the Length is 0, then this CIE is a terminator
872     if (Length == 0)
873       break;
874 
875     const uint64_t StartStructureOffset = Offset;
876     const uint64_t EndStructureOffset = Offset + Length;
877 
878     Error Err = Error::success();
879     const uint64_t Id = Data.getRelocatedValue(4, &Offset,
880                                                /*SectionIndex=*/nullptr, &Err);
881     if (Err)
882       return Err;
883 
884     if (!Id) {
885       if (Error Err = parseCIE(StartOffset))
886         return Err;
887     } else {
888       if (Error Err = parseFDE(Id, StartStructureOffset))
889         return Err;
890     }
891     Offset = EndStructureOffset;
892   }
893 
894   return Error::success();
895 }
896 
897 Error EHFrameParser::parse(DWARFDataExtractor Data, uint64_t EHFrameAddress,
898                            PatcherCallbackTy PatcherCallback) {
899   EHFrameParser Parser(Data, EHFrameAddress, PatcherCallback);
900   return Parser.parse();
901 }
902 
903 } // namespace bolt
904 } // namespace llvm
905