xref: /llvm-project/bolt/lib/Core/Exceptions.cpp (revision 1b8e0cf090a08b2c517eb2a3e101332d692063c2)
1 //===- bolt/Core/Exceptions.cpp - Helpers for C++ exceptions --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements functions for handling C++ exception meta data.
10 //
11 // Some of the code is taken from examples/ExceptionDemo
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "bolt/Core/Exceptions.h"
16 #include "bolt/Core/BinaryFunction.h"
17 #include "llvm/ADT/ArrayRef.h"
18 #include "llvm/ADT/Twine.h"
19 #include "llvm/BinaryFormat/Dwarf.h"
20 #include "llvm/DebugInfo/DWARF/DWARFDebugFrame.h"
21 #include "llvm/Support/Casting.h"
22 #include "llvm/Support/CommandLine.h"
23 #include "llvm/Support/Debug.h"
24 #include "llvm/Support/Errc.h"
25 #include "llvm/Support/LEB128.h"
26 #include "llvm/Support/MathExtras.h"
27 #include "llvm/Support/raw_ostream.h"
28 #include <map>
29 
30 #undef  DEBUG_TYPE
31 #define DEBUG_TYPE "bolt-exceptions"
32 
33 using namespace llvm::dwarf;
34 
35 namespace opts {
36 
37 extern llvm::cl::OptionCategory BoltCategory;
38 
39 extern llvm::cl::opt<unsigned> Verbosity;
40 
41 static llvm::cl::opt<bool>
42     PrintExceptions("print-exceptions",
43                     llvm::cl::desc("print exception handling data"),
44                     llvm::cl::Hidden, llvm::cl::cat(BoltCategory));
45 
46 } // namespace opts
47 
48 namespace llvm {
49 namespace bolt {
50 
51 // Read and dump the .gcc_exception_table section entry.
52 //
53 // .gcc_except_table section contains a set of Language-Specific Data Areas -
54 // a fancy name for exception handling tables. There's one  LSDA entry per
55 // function. However, we can't actually tell which function LSDA refers to
56 // unless we parse .eh_frame entry that refers to the LSDA.
57 // Then inside LSDA most addresses are encoded relative to the function start,
58 // so we need the function context in order to get to real addresses.
59 //
60 // The best visual representation of the tables comprising LSDA and
61 // relationships between them is illustrated at:
62 //   https://github.com/itanium-cxx-abi/cxx-abi/blob/master/exceptions.pdf
63 // Keep in mind that GCC implementation deviates slightly from that document.
64 //
65 // To summarize, there are 4 tables in LSDA: call site table, actions table,
66 // types table, and types index table (for indirection). The main table contains
67 // call site entries. Each call site includes a PC range that can throw an
68 // exception, a handler (landing pad), and a reference to an entry in the action
69 // table. The handler and/or action could be 0. The action entry is a head
70 // of a list of actions associated with a call site. The action table contains
71 // all such lists (it could be optimized to share list tails). Each action could
72 // be either to catch an exception of a given type, to perform a cleanup, or to
73 // propagate the exception after filtering it out (e.g. to make sure function
74 // exception specification is not violated). Catch action contains a reference
75 // to an entry in the type table, and filter action refers to an entry in the
76 // type index table to encode a set of types to filter.
77 //
78 // Call site table follows LSDA header. Action table immediately follows the
79 // call site table.
80 //
81 // Both types table and type index table start at the same location, but they
82 // grow in opposite directions (types go up, indices go down). The beginning of
83 // these tables is encoded in LSDA header. Sizes for both of the tables are not
84 // included anywhere.
85 //
86 // We have to parse all of the tables to determine their sizes. Then we have
87 // to parse the call site table and associate discovered information with
88 // actual call instructions and landing pad blocks.
89 //
90 // For the purpose of rewriting exception handling tables, we can reuse action,
91 // and type index tables in their original binary format.
92 //
93 // Type table could be encoded using position-independent references, and thus
94 // may require relocation.
95 //
96 // Ideally we should be able to re-write LSDA in-place, without the need to
97 // allocate a new space for it. Sadly there's no guarantee that the new call
98 // site table will be the same size as GCC uses uleb encodings for PC offsets.
99 //
100 // Note: some functions have LSDA entries with 0 call site entries.
101 Error BinaryFunction::parseLSDA(ArrayRef<uint8_t> LSDASectionData,
102                                 uint64_t LSDASectionAddress) {
103   assert(CurrentState == State::Disassembled && "unexpected function state");
104 
105   if (!getLSDAAddress())
106     return Error::success();
107 
108   DWARFDataExtractor Data(
109       StringRef(reinterpret_cast<const char *>(LSDASectionData.data()),
110                 LSDASectionData.size()),
111       BC.AsmInfo->isLittleEndian(), BC.AsmInfo->getCodePointerSize());
112   uint64_t Offset = getLSDAAddress() - LSDASectionAddress;
113   assert(Data.isValidOffset(Offset) && "wrong LSDA address");
114 
115   const uint8_t LPStartEncoding = Data.getU8(&Offset);
116   uint64_t LPStart = Address;
117   if (LPStartEncoding != dwarf::DW_EH_PE_omit) {
118     std::optional<uint64_t> MaybeLPStart = Data.getEncodedPointer(
119         &Offset, LPStartEncoding, Offset + LSDASectionAddress);
120     if (!MaybeLPStart) {
121       BC.errs() << "BOLT-ERROR: unsupported LPStartEncoding: "
122                 << (unsigned)LPStartEncoding << '\n';
123       return createFatalBOLTError("");
124     }
125     LPStart = *MaybeLPStart;
126   }
127 
128   const uint8_t TTypeEncoding = Data.getU8(&Offset);
129   LSDATypeEncoding = TTypeEncoding;
130   size_t TTypeEncodingSize = 0;
131   uintptr_t TTypeEnd = 0;
132   if (TTypeEncoding != DW_EH_PE_omit) {
133     TTypeEnd = Data.getULEB128(&Offset);
134     TTypeEncodingSize = BC.getDWARFEncodingSize(TTypeEncoding);
135   }
136 
137   if (opts::PrintExceptions) {
138     BC.outs() << "[LSDA at 0x" << Twine::utohexstr(getLSDAAddress())
139               << " for function " << *this << "]:\n";
140     BC.outs() << "LPStart Encoding = 0x" << Twine::utohexstr(LPStartEncoding)
141               << '\n';
142     BC.outs() << "LPStart = 0x" << Twine::utohexstr(LPStart) << '\n';
143     BC.outs() << "TType Encoding = 0x" << Twine::utohexstr(TTypeEncoding)
144               << '\n';
145     BC.outs() << "TType End = " << TTypeEnd << '\n';
146   }
147 
148   // Table to store list of indices in type table. Entries are uleb128 values.
149   const uint64_t TypeIndexTableStart = Offset + TTypeEnd;
150 
151   // Offset past the last decoded index.
152   uint64_t MaxTypeIndexTableOffset = 0;
153 
154   // Max positive index used in type table.
155   unsigned MaxTypeIndex = 0;
156 
157   // The actual type info table starts at the same location, but grows in
158   // opposite direction. TTypeEncoding is used to encode stored values.
159   const uint64_t TypeTableStart = Offset + TTypeEnd;
160 
161   uint8_t CallSiteEncoding = Data.getU8(&Offset);
162   uint32_t CallSiteTableLength = Data.getULEB128(&Offset);
163   uint64_t CallSiteTableStart = Offset;
164   uint64_t CallSiteTableEnd = CallSiteTableStart + CallSiteTableLength;
165   uint64_t CallSitePtr = CallSiteTableStart;
166   uint64_t ActionTableStart = CallSiteTableEnd;
167 
168   if (opts::PrintExceptions) {
169     BC.outs() << "CallSite Encoding = " << (unsigned)CallSiteEncoding << '\n';
170     BC.outs() << "CallSite table length = " << CallSiteTableLength << '\n';
171     BC.outs() << '\n';
172   }
173 
174   this->HasEHRanges = CallSitePtr < CallSiteTableEnd;
175   const uint64_t RangeBase = getAddress();
176   while (CallSitePtr < CallSiteTableEnd) {
177     uint64_t Start = *Data.getEncodedPointer(&CallSitePtr, CallSiteEncoding,
178                                              CallSitePtr + LSDASectionAddress);
179     uint64_t Length = *Data.getEncodedPointer(&CallSitePtr, CallSiteEncoding,
180                                               CallSitePtr + LSDASectionAddress);
181     uint64_t LandingPad = *Data.getEncodedPointer(
182         &CallSitePtr, CallSiteEncoding, CallSitePtr + LSDASectionAddress);
183     uint64_t ActionEntry = Data.getULEB128(&CallSitePtr);
184     if (LandingPad)
185       LandingPad += LPStart;
186 
187     if (opts::PrintExceptions) {
188       BC.outs() << "Call Site: [0x" << Twine::utohexstr(RangeBase + Start)
189                 << ", 0x" << Twine::utohexstr(RangeBase + Start + Length)
190                 << "); landing pad: 0x" << Twine::utohexstr(LandingPad)
191                 << "; action entry: 0x" << Twine::utohexstr(ActionEntry)
192                 << "\n";
193       BC.outs() << "  current offset is " << (CallSitePtr - CallSiteTableStart)
194                 << '\n';
195     }
196 
197     // Create a handler entry if necessary.
198     MCSymbol *LPSymbol = nullptr;
199     if (LandingPad) {
200       // Verify if landing pad code is located outside current function
201       // Support landing pad to builtin_unreachable
202       if (LandingPad < Address || LandingPad > Address + getSize()) {
203         BinaryFunction *Fragment =
204             BC.getBinaryFunctionContainingAddress(LandingPad);
205         assert(Fragment != nullptr &&
206                "BOLT-ERROR: cannot find landing pad fragment");
207         BC.addInterproceduralReference(this, Fragment->getAddress());
208         BC.processInterproceduralReferences();
209         assert(BC.areRelatedFragments(this, Fragment) &&
210                "BOLT-ERROR: cannot have landing pads in different functions");
211         setHasIndirectTargetToSplitFragment(true);
212         BC.addFragmentsToSkip(this);
213         return Error::success();
214       }
215 
216       const uint64_t LPOffset = LandingPad - getAddress();
217       if (!getInstructionAtOffset(LPOffset)) {
218         if (opts::Verbosity >= 1)
219           BC.errs() << "BOLT-WARNING: landing pad "
220                     << Twine::utohexstr(LPOffset)
221                     << " not pointing to an instruction in function " << *this
222                     << " - ignoring.\n";
223       } else {
224         auto Label = Labels.find(LPOffset);
225         if (Label != Labels.end()) {
226           LPSymbol = Label->second;
227         } else {
228           LPSymbol = BC.Ctx->createNamedTempSymbol("LP");
229           Labels[LPOffset] = LPSymbol;
230         }
231       }
232     }
233 
234     // Mark all call instructions in the range.
235     auto II = Instructions.find(Start);
236     auto IE = Instructions.end();
237     assert(II != IE && "exception range not pointing to an instruction");
238     do {
239       MCInst &Instruction = II->second;
240       if (BC.MIB->isCall(Instruction) &&
241           !BC.MIB->getConditionalTailCall(Instruction)) {
242         assert(!BC.MIB->isInvoke(Instruction) &&
243                "overlapping exception ranges detected");
244         // Add extra operands to a call instruction making it an invoke from
245         // now on.
246         BC.MIB->addEHInfo(Instruction,
247                           MCPlus::MCLandingPad(LPSymbol, ActionEntry));
248       }
249       ++II;
250     } while (II != IE && II->first < Start + Length);
251 
252     if (ActionEntry != 0) {
253       auto printType = [&](int Index, raw_ostream &OS) {
254         assert(Index > 0 && "only positive indices are valid");
255         uint64_t TTEntry = TypeTableStart - Index * TTypeEncodingSize;
256         const uint64_t TTEntryAddress = TTEntry + LSDASectionAddress;
257         uint64_t TypeAddress =
258             *Data.getEncodedPointer(&TTEntry, TTypeEncoding, TTEntryAddress);
259         if ((TTypeEncoding & DW_EH_PE_pcrel) && TypeAddress == TTEntryAddress)
260           TypeAddress = 0;
261         if (TypeAddress == 0) {
262           OS << "<all>";
263           return;
264         }
265         if (TTypeEncoding & DW_EH_PE_indirect) {
266           ErrorOr<uint64_t> PointerOrErr = BC.getPointerAtAddress(TypeAddress);
267           assert(PointerOrErr && "failed to decode indirect address");
268           TypeAddress = *PointerOrErr;
269         }
270         if (BinaryData *TypeSymBD = BC.getBinaryDataAtAddress(TypeAddress))
271           OS << TypeSymBD->getName();
272         else
273           OS << "0x" << Twine::utohexstr(TypeAddress);
274       };
275       if (opts::PrintExceptions)
276         BC.outs() << "    actions: ";
277       uint64_t ActionPtr = ActionTableStart + ActionEntry - 1;
278       int64_t ActionType;
279       int64_t ActionNext;
280       const char *Sep = "";
281       do {
282         ActionType = Data.getSLEB128(&ActionPtr);
283         const uint32_t Self = ActionPtr;
284         ActionNext = Data.getSLEB128(&ActionPtr);
285         if (opts::PrintExceptions)
286           BC.outs() << Sep << "(" << ActionType << ", " << ActionNext << ") ";
287         if (ActionType == 0) {
288           if (opts::PrintExceptions)
289             BC.outs() << "cleanup";
290         } else if (ActionType > 0) {
291           // It's an index into a type table.
292           MaxTypeIndex =
293               std::max(MaxTypeIndex, static_cast<unsigned>(ActionType));
294           if (opts::PrintExceptions) {
295             BC.outs() << "catch type ";
296             printType(ActionType, BC.outs());
297           }
298         } else { // ActionType < 0
299           if (opts::PrintExceptions)
300             BC.outs() << "filter exception types ";
301           const char *TSep = "";
302           // ActionType is a negative *byte* offset into *uleb128-encoded* table
303           // of indices with base 1.
304           // E.g. -1 means offset 0, -2 is offset 1, etc. The indices are
305           // encoded using uleb128 thus we cannot directly dereference them.
306           uint64_t TypeIndexTablePtr = TypeIndexTableStart - ActionType - 1;
307           while (uint64_t Index = Data.getULEB128(&TypeIndexTablePtr)) {
308             MaxTypeIndex = std::max(MaxTypeIndex, static_cast<unsigned>(Index));
309             if (opts::PrintExceptions) {
310               BC.outs() << TSep;
311               printType(Index, BC.outs());
312               TSep = ", ";
313             }
314           }
315           MaxTypeIndexTableOffset = std::max(
316               MaxTypeIndexTableOffset, TypeIndexTablePtr - TypeIndexTableStart);
317         }
318 
319         Sep = "; ";
320 
321         ActionPtr = Self + ActionNext;
322       } while (ActionNext);
323       if (opts::PrintExceptions)
324         BC.outs() << '\n';
325     }
326   }
327   if (opts::PrintExceptions)
328     BC.outs() << '\n';
329 
330   assert(TypeIndexTableStart + MaxTypeIndexTableOffset <=
331              Data.getData().size() &&
332          "LSDA entry has crossed section boundary");
333 
334   if (TTypeEnd) {
335     LSDAActionTable = LSDASectionData.slice(
336         ActionTableStart, TypeIndexTableStart -
337                               MaxTypeIndex * TTypeEncodingSize -
338                               ActionTableStart);
339     for (unsigned Index = 1; Index <= MaxTypeIndex; ++Index) {
340       uint64_t TTEntry = TypeTableStart - Index * TTypeEncodingSize;
341       const uint64_t TTEntryAddress = TTEntry + LSDASectionAddress;
342       uint64_t TypeAddress =
343           *Data.getEncodedPointer(&TTEntry, TTypeEncoding, TTEntryAddress);
344       if ((TTypeEncoding & DW_EH_PE_pcrel) && (TypeAddress == TTEntryAddress))
345         TypeAddress = 0;
346       if (TTypeEncoding & DW_EH_PE_indirect) {
347         LSDATypeAddressTable.emplace_back(TypeAddress);
348         if (TypeAddress) {
349           ErrorOr<uint64_t> PointerOrErr = BC.getPointerAtAddress(TypeAddress);
350           assert(PointerOrErr && "failed to decode indirect address");
351           TypeAddress = *PointerOrErr;
352         }
353       }
354       LSDATypeTable.emplace_back(TypeAddress);
355     }
356     LSDATypeIndexTable =
357         LSDASectionData.slice(TypeIndexTableStart, MaxTypeIndexTableOffset);
358   }
359   return Error::success();
360 }
361 
362 void BinaryFunction::updateEHRanges() {
363   if (getSize() == 0)
364     return;
365 
366   assert(CurrentState == State::CFG_Finalized && "unexpected state");
367 
368   // Build call sites table.
369   struct EHInfo {
370     const MCSymbol *LP; // landing pad
371     uint64_t Action;
372   };
373 
374   // Sites to update.
375   CallSitesList Sites;
376 
377   for (FunctionFragment &FF : getLayout().fragments()) {
378     // If previous call can throw, this is its exception handler.
379     EHInfo PreviousEH = {nullptr, 0};
380 
381     // Marker for the beginning of exceptions range.
382     const MCSymbol *StartRange = nullptr;
383 
384     for (BinaryBasicBlock *const BB : FF) {
385       for (MCInst &Instr : *BB) {
386         if (!BC.MIB->isCall(Instr))
387           continue;
388 
389         // Instruction can throw an exception that should be handled.
390         const bool Throws = BC.MIB->isInvoke(Instr);
391 
392         // Ignore the call if it's a continuation of a no-throw gap.
393         if (!Throws && !StartRange)
394           continue;
395 
396         // Extract exception handling information from the instruction.
397         const MCSymbol *LP = nullptr;
398         uint64_t Action = 0;
399         if (const std::optional<MCPlus::MCLandingPad> EHInfo =
400                 BC.MIB->getEHInfo(Instr))
401           std::tie(LP, Action) = *EHInfo;
402 
403         // No action if the exception handler has not changed.
404         if (Throws && StartRange && PreviousEH.LP == LP &&
405             PreviousEH.Action == Action)
406           continue;
407 
408         // Same symbol is used for the beginning and the end of the range.
409         MCSymbol *EHSymbol;
410         if (MCSymbol *InstrLabel = BC.MIB->getInstLabel(Instr)) {
411           EHSymbol = InstrLabel;
412         } else {
413           std::unique_lock<llvm::sys::RWMutex> Lock(BC.CtxMutex);
414           EHSymbol = BC.MIB->getOrCreateInstLabel(Instr, "EH", BC.Ctx.get());
415         }
416 
417         // At this point we could be in one of the following states:
418         //
419         // I. Exception handler has changed and we need to close previous range
420         //    and start a new one.
421         //
422         // II. Start a new exception range after the gap.
423         //
424         // III. Close current exception range and start a new gap.
425         const MCSymbol *EndRange;
426         if (StartRange) {
427           // I, III:
428           EndRange = EHSymbol;
429         } else {
430           // II:
431           StartRange = EHSymbol;
432           EndRange = nullptr;
433         }
434 
435         // Close the previous range.
436         if (EndRange)
437           Sites.emplace_back(
438               FF.getFragmentNum(),
439               CallSite{StartRange, EndRange, PreviousEH.LP, PreviousEH.Action});
440 
441         if (Throws) {
442           // I, II:
443           StartRange = EHSymbol;
444           PreviousEH = EHInfo{LP, Action};
445         } else {
446           StartRange = nullptr;
447         }
448       }
449     }
450 
451     // Check if we need to close the range.
452     if (StartRange) {
453       const MCSymbol *EndRange = getFunctionEndLabel(FF.getFragmentNum());
454       Sites.emplace_back(
455           FF.getFragmentNum(),
456           CallSite{StartRange, EndRange, PreviousEH.LP, PreviousEH.Action});
457     }
458   }
459 
460   addCallSites(Sites);
461 }
462 
463 const uint8_t DWARF_CFI_PRIMARY_OPCODE_MASK = 0xc0;
464 
465 CFIReaderWriter::CFIReaderWriter(BinaryContext &BC,
466                                  const DWARFDebugFrame &EHFrame)
467     : BC(BC) {
468   // Prepare FDEs for fast lookup
469   for (const dwarf::FrameEntry &Entry : EHFrame.entries()) {
470     const auto *CurFDE = dyn_cast<dwarf::FDE>(&Entry);
471     // Skip CIEs.
472     if (!CurFDE)
473       continue;
474     // There could me multiple FDEs with the same initial address, and perhaps
475     // different sizes (address ranges). Use the first entry with non-zero size.
476     auto FDEI = FDEs.lower_bound(CurFDE->getInitialLocation());
477     if (FDEI != FDEs.end() && FDEI->first == CurFDE->getInitialLocation()) {
478       if (CurFDE->getAddressRange()) {
479         if (FDEI->second->getAddressRange() == 0) {
480           FDEI->second = CurFDE;
481         } else if (opts::Verbosity > 0) {
482           BC.errs() << "BOLT-WARNING: different FDEs for function at 0x"
483                     << Twine::utohexstr(FDEI->first)
484                     << " detected; sizes: " << FDEI->second->getAddressRange()
485                     << " and " << CurFDE->getAddressRange() << '\n';
486         }
487       }
488     } else {
489       FDEs.emplace_hint(FDEI, CurFDE->getInitialLocation(), CurFDE);
490     }
491   }
492 }
493 
494 bool CFIReaderWriter::fillCFIInfoFor(BinaryFunction &Function) const {
495   uint64_t Address = Function.getAddress();
496   auto I = FDEs.find(Address);
497   // Ignore zero-length FDE ranges.
498   if (I == FDEs.end() || !I->second->getAddressRange())
499     return true;
500 
501   const FDE &CurFDE = *I->second;
502   std::optional<uint64_t> LSDA = CurFDE.getLSDAAddress();
503   Function.setLSDAAddress(LSDA ? *LSDA : 0);
504 
505   uint64_t Offset = Function.getFirstInstructionOffset();
506   uint64_t CodeAlignment = CurFDE.getLinkedCIE()->getCodeAlignmentFactor();
507   uint64_t DataAlignment = CurFDE.getLinkedCIE()->getDataAlignmentFactor();
508   if (CurFDE.getLinkedCIE()->getPersonalityAddress()) {
509     Function.setPersonalityFunction(
510         *CurFDE.getLinkedCIE()->getPersonalityAddress());
511     Function.setPersonalityEncoding(
512         *CurFDE.getLinkedCIE()->getPersonalityEncoding());
513   }
514 
515   auto decodeFrameInstruction = [this, &Function, &Offset, Address,
516                                  CodeAlignment, DataAlignment](
517                                     const CFIProgram::Instruction &Instr) {
518     uint8_t Opcode = Instr.Opcode;
519     if (Opcode & DWARF_CFI_PRIMARY_OPCODE_MASK)
520       Opcode &= DWARF_CFI_PRIMARY_OPCODE_MASK;
521     switch (Instr.Opcode) {
522     case DW_CFA_nop:
523       break;
524     case DW_CFA_advance_loc4:
525     case DW_CFA_advance_loc2:
526     case DW_CFA_advance_loc1:
527     case DW_CFA_advance_loc:
528       // Advance our current address
529       Offset += CodeAlignment * int64_t(Instr.Ops[0]);
530       break;
531     case DW_CFA_offset_extended_sf:
532       Function.addCFIInstruction(
533           Offset,
534           MCCFIInstruction::createOffset(
535               nullptr, Instr.Ops[0], DataAlignment * int64_t(Instr.Ops[1])));
536       break;
537     case DW_CFA_offset_extended:
538     case DW_CFA_offset:
539       Function.addCFIInstruction(
540           Offset, MCCFIInstruction::createOffset(nullptr, Instr.Ops[0],
541                                                  DataAlignment * Instr.Ops[1]));
542       break;
543     case DW_CFA_restore_extended:
544     case DW_CFA_restore:
545       Function.addCFIInstruction(
546           Offset, MCCFIInstruction::createRestore(nullptr, Instr.Ops[0]));
547       break;
548     case DW_CFA_set_loc:
549       assert(Instr.Ops[0] >= Address && "set_loc out of function bounds");
550       assert(Instr.Ops[0] <= Address + Function.getSize() &&
551              "set_loc out of function bounds");
552       Offset = Instr.Ops[0] - Address;
553       break;
554 
555     case DW_CFA_undefined:
556       Function.addCFIInstruction(
557           Offset, MCCFIInstruction::createUndefined(nullptr, Instr.Ops[0]));
558       break;
559     case DW_CFA_same_value:
560       Function.addCFIInstruction(
561           Offset, MCCFIInstruction::createSameValue(nullptr, Instr.Ops[0]));
562       break;
563     case DW_CFA_register:
564       Function.addCFIInstruction(
565           Offset, MCCFIInstruction::createRegister(nullptr, Instr.Ops[0],
566                                                    Instr.Ops[1]));
567       break;
568     case DW_CFA_remember_state:
569       Function.addCFIInstruction(
570           Offset, MCCFIInstruction::createRememberState(nullptr));
571       break;
572     case DW_CFA_restore_state:
573       Function.addCFIInstruction(Offset,
574                                  MCCFIInstruction::createRestoreState(nullptr));
575       break;
576     case DW_CFA_def_cfa:
577       Function.addCFIInstruction(
578           Offset,
579           MCCFIInstruction::cfiDefCfa(nullptr, Instr.Ops[0], Instr.Ops[1]));
580       break;
581     case DW_CFA_def_cfa_sf:
582       Function.addCFIInstruction(
583           Offset,
584           MCCFIInstruction::cfiDefCfa(nullptr, Instr.Ops[0],
585                                       DataAlignment * int64_t(Instr.Ops[1])));
586       break;
587     case DW_CFA_def_cfa_register:
588       Function.addCFIInstruction(Offset, MCCFIInstruction::createDefCfaRegister(
589                                              nullptr, Instr.Ops[0]));
590       break;
591     case DW_CFA_def_cfa_offset:
592       Function.addCFIInstruction(
593           Offset, MCCFIInstruction::cfiDefCfaOffset(nullptr, Instr.Ops[0]));
594       break;
595     case DW_CFA_def_cfa_offset_sf:
596       Function.addCFIInstruction(
597           Offset, MCCFIInstruction::cfiDefCfaOffset(
598                       nullptr, DataAlignment * int64_t(Instr.Ops[0])));
599       break;
600     case DW_CFA_GNU_args_size:
601       Function.addCFIInstruction(
602           Offset, MCCFIInstruction::createGnuArgsSize(nullptr, Instr.Ops[0]));
603       Function.setUsesGnuArgsSize();
604       break;
605     case DW_CFA_val_offset_sf:
606     case DW_CFA_val_offset:
607       if (opts::Verbosity >= 1) {
608         BC.errs() << "BOLT-WARNING: DWARF val_offset() unimplemented\n";
609       }
610       return false;
611     case DW_CFA_def_cfa_expression:
612     case DW_CFA_val_expression:
613     case DW_CFA_expression: {
614       StringRef ExprBytes = Instr.Expression->getData();
615       std::string Str;
616       raw_string_ostream OS(Str);
617       // Manually encode this instruction using CFI escape
618       OS << Opcode;
619       if (Opcode != DW_CFA_def_cfa_expression)
620         encodeULEB128(Instr.Ops[0], OS);
621       encodeULEB128(ExprBytes.size(), OS);
622       OS << ExprBytes;
623       Function.addCFIInstruction(
624           Offset, MCCFIInstruction::createEscape(nullptr, OS.str()));
625       break;
626     }
627     case DW_CFA_MIPS_advance_loc8:
628       if (opts::Verbosity >= 1)
629         BC.errs() << "BOLT-WARNING: DW_CFA_MIPS_advance_loc unimplemented\n";
630       return false;
631     case DW_CFA_GNU_window_save:
632       // DW_CFA_GNU_window_save and DW_CFA_GNU_NegateRAState just use the same
633       // id but mean different things. The latter is used in AArch64.
634       if (Function.getBinaryContext().isAArch64()) {
635         Function.addCFIInstruction(
636             Offset, MCCFIInstruction::createNegateRAState(nullptr));
637         break;
638       }
639       if (opts::Verbosity >= 1)
640         BC.errs() << "BOLT-WARNING: DW_CFA_GNU_window_save unimplemented\n";
641       return false;
642     case DW_CFA_lo_user:
643     case DW_CFA_hi_user:
644       if (opts::Verbosity >= 1)
645         BC.errs() << "BOLT-WARNING: DW_CFA_*_user unimplemented\n";
646       return false;
647     default:
648       if (opts::Verbosity >= 1)
649         BC.errs() << "BOLT-WARNING: Unrecognized CFI instruction: "
650                   << Instr.Opcode << '\n';
651       return false;
652     }
653 
654     return true;
655   };
656 
657   for (const CFIProgram::Instruction &Instr : CurFDE.getLinkedCIE()->cfis())
658     if (!decodeFrameInstruction(Instr))
659       return false;
660 
661   for (const CFIProgram::Instruction &Instr : CurFDE.cfis())
662     if (!decodeFrameInstruction(Instr))
663       return false;
664 
665   return true;
666 }
667 
668 std::vector<char>
669 CFIReaderWriter::generateEHFrameHeader(const DWARFDebugFrame &OldEHFrame,
670                                        const DWARFDebugFrame &NewEHFrame,
671                                        uint64_t EHFrameHeaderAddress) const {
672   // Common PC -> FDE map to be written into .eh_frame_hdr.
673   std::map<uint64_t, uint64_t> PCToFDE;
674 
675   // Initialize PCToFDE using NewEHFrame.
676   for (dwarf::FrameEntry &Entry : NewEHFrame.entries()) {
677     const dwarf::FDE *FDE = dyn_cast<dwarf::FDE>(&Entry);
678     if (FDE == nullptr)
679       continue;
680     const uint64_t FuncAddress = FDE->getInitialLocation();
681     const uint64_t FDEAddress =
682         NewEHFrame.getEHFrameAddress() + FDE->getOffset();
683 
684     // Ignore unused FDEs.
685     if (FuncAddress == 0)
686       continue;
687 
688     // Add the address to the map unless we failed to write it.
689     PCToFDE[FuncAddress] = FDEAddress;
690   };
691 
692   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: new .eh_frame contains "
693                     << llvm::size(NewEHFrame.entries()) << " entries\n");
694 
695   // Add entries from the original .eh_frame corresponding to the functions
696   // that we did not update.
697   for (const dwarf::FrameEntry &Entry : OldEHFrame) {
698     const dwarf::FDE *FDE = dyn_cast<dwarf::FDE>(&Entry);
699     if (FDE == nullptr)
700       continue;
701     const uint64_t FuncAddress = FDE->getInitialLocation();
702     const uint64_t FDEAddress =
703         OldEHFrame.getEHFrameAddress() + FDE->getOffset();
704 
705     // Add the address if we failed to write it.
706     if (PCToFDE.count(FuncAddress) == 0) {
707       LLVM_DEBUG(dbgs() << "BOLT-DEBUG: old FDE for function at 0x"
708                         << Twine::utohexstr(FuncAddress) << " is at 0x"
709                         << Twine::utohexstr(FDEAddress) << '\n');
710       PCToFDE[FuncAddress] = FDEAddress;
711     }
712   };
713 
714   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: old .eh_frame contains "
715                     << llvm::size(OldEHFrame.entries()) << " entries\n");
716 
717   // Generate a new .eh_frame_hdr based on the new map.
718 
719   // Header plus table of entries of size 8 bytes.
720   std::vector<char> EHFrameHeader(12 + PCToFDE.size() * 8);
721 
722   // Version is 1.
723   EHFrameHeader[0] = 1;
724   // Encoding of the eh_frame pointer.
725   EHFrameHeader[1] = DW_EH_PE_pcrel | DW_EH_PE_sdata4;
726   // Encoding of the count field to follow.
727   EHFrameHeader[2] = DW_EH_PE_udata4;
728   // Encoding of the table entries - 4-byte offset from the start of the header.
729   EHFrameHeader[3] = DW_EH_PE_datarel | DW_EH_PE_sdata4;
730 
731   // Address of eh_frame. Use the new one.
732   support::ulittle32_t::ref(EHFrameHeader.data() + 4) =
733       NewEHFrame.getEHFrameAddress() - (EHFrameHeaderAddress + 4);
734 
735   // Number of entries in the table (FDE count).
736   support::ulittle32_t::ref(EHFrameHeader.data() + 8) = PCToFDE.size();
737 
738   // Write the table at offset 12.
739   char *Ptr = EHFrameHeader.data();
740   uint32_t Offset = 12;
741   for (const auto &PCI : PCToFDE) {
742     int64_t InitialPCOffset = PCI.first - EHFrameHeaderAddress;
743     assert(isInt<32>(InitialPCOffset) && "PC offset out of bounds");
744     support::ulittle32_t::ref(Ptr + Offset) = InitialPCOffset;
745     Offset += 4;
746     int64_t FDEOffset = PCI.second - EHFrameHeaderAddress;
747     assert(isInt<32>(FDEOffset) && "FDE offset out of bounds");
748     support::ulittle32_t::ref(Ptr + Offset) = FDEOffset;
749     Offset += 4;
750   }
751 
752   return EHFrameHeader;
753 }
754 
755 Error EHFrameParser::parseCIE(uint64_t StartOffset) {
756   uint8_t Version = Data.getU8(&Offset);
757   const char *Augmentation = Data.getCStr(&Offset);
758   StringRef AugmentationString(Augmentation ? Augmentation : "");
759   uint8_t AddressSize =
760       Version < 4 ? Data.getAddressSize() : Data.getU8(&Offset);
761   Data.setAddressSize(AddressSize);
762   // Skip segment descriptor size
763   if (Version >= 4)
764     Offset += 1;
765   // Skip code alignment factor
766   Data.getULEB128(&Offset);
767   // Skip data alignment
768   Data.getSLEB128(&Offset);
769   // Skip return address register
770   if (Version == 1)
771     Offset += 1;
772   else
773     Data.getULEB128(&Offset);
774 
775   uint32_t FDEPointerEncoding = DW_EH_PE_absptr;
776   uint32_t LSDAPointerEncoding = DW_EH_PE_omit;
777   // Walk the augmentation string to get all the augmentation data.
778   for (unsigned i = 0, e = AugmentationString.size(); i != e; ++i) {
779     switch (AugmentationString[i]) {
780     default:
781       return createStringError(
782           errc::invalid_argument,
783           "unknown augmentation character in entry at 0x%" PRIx64, StartOffset);
784     case 'L':
785       LSDAPointerEncoding = Data.getU8(&Offset);
786       break;
787     case 'P': {
788       uint32_t PersonalityEncoding = Data.getU8(&Offset);
789       std::optional<uint64_t> Personality =
790           Data.getEncodedPointer(&Offset, PersonalityEncoding,
791                                  EHFrameAddress ? EHFrameAddress + Offset : 0);
792       // Patch personality address
793       if (Personality)
794         PatcherCallback(*Personality, Offset, PersonalityEncoding);
795       break;
796     }
797     case 'R':
798       FDEPointerEncoding = Data.getU8(&Offset);
799       break;
800     case 'z':
801       if (i)
802         return createStringError(
803             errc::invalid_argument,
804             "'z' must be the first character at 0x%" PRIx64, StartOffset);
805       // Skip augmentation length
806       Data.getULEB128(&Offset);
807       break;
808     case 'S':
809     case 'B':
810       break;
811     }
812   }
813   Entries.emplace_back(std::make_unique<CIEInfo>(
814       FDEPointerEncoding, LSDAPointerEncoding, AugmentationString));
815   CIEs[StartOffset] = &*Entries.back();
816   return Error::success();
817 }
818 
819 Error EHFrameParser::parseFDE(uint64_t CIEPointer,
820                               uint64_t StartStructureOffset) {
821   std::optional<uint64_t> LSDAAddress;
822   CIEInfo *Cie = CIEs[StartStructureOffset - CIEPointer];
823 
824   // The address size is encoded in the CIE we reference.
825   if (!Cie)
826     return createStringError(errc::invalid_argument,
827                              "parsing FDE data at 0x%" PRIx64
828                              " failed due to missing CIE",
829                              StartStructureOffset);
830   // Patch initial location
831   if (auto Val = Data.getEncodedPointer(&Offset, Cie->FDEPtrEncoding,
832                                         EHFrameAddress + Offset)) {
833     PatcherCallback(*Val, Offset, Cie->FDEPtrEncoding);
834   }
835   // Skip address range
836   Data.getEncodedPointer(&Offset, Cie->FDEPtrEncoding, 0);
837 
838   // Process augmentation data for this FDE.
839   StringRef AugmentationString = Cie->AugmentationString;
840   if (!AugmentationString.empty() && Cie->LSDAPtrEncoding != DW_EH_PE_omit) {
841     // Skip augmentation length
842     Data.getULEB128(&Offset);
843     LSDAAddress =
844         Data.getEncodedPointer(&Offset, Cie->LSDAPtrEncoding,
845                                EHFrameAddress ? Offset + EHFrameAddress : 0);
846     // Patch LSDA address
847     PatcherCallback(*LSDAAddress, Offset, Cie->LSDAPtrEncoding);
848   }
849   return Error::success();
850 }
851 
852 Error EHFrameParser::parse() {
853   while (Data.isValidOffset(Offset)) {
854     const uint64_t StartOffset = Offset;
855 
856     uint64_t Length;
857     DwarfFormat Format;
858     std::tie(Length, Format) = Data.getInitialLength(&Offset);
859 
860     // If the Length is 0, then this CIE is a terminator
861     if (Length == 0)
862       break;
863 
864     const uint64_t StartStructureOffset = Offset;
865     const uint64_t EndStructureOffset = Offset + Length;
866 
867     Error Err = Error::success();
868     const uint64_t Id = Data.getRelocatedValue(4, &Offset,
869                                                /*SectionIndex=*/nullptr, &Err);
870     if (Err)
871       return Err;
872 
873     if (!Id) {
874       if (Error Err = parseCIE(StartOffset))
875         return Err;
876     } else {
877       if (Error Err = parseFDE(Id, StartStructureOffset))
878         return Err;
879     }
880     Offset = EndStructureOffset;
881   }
882 
883   return Error::success();
884 }
885 
886 Error EHFrameParser::parse(DWARFDataExtractor Data, uint64_t EHFrameAddress,
887                            PatcherCallbackTy PatcherCallback) {
888   EHFrameParser Parser(Data, EHFrameAddress, PatcherCallback);
889   return Parser.parse();
890 }
891 
892 } // namespace bolt
893 } // namespace llvm
894