xref: /llvm-project/bolt/lib/Core/Exceptions.cpp (revision 290e482342826ee4c65bd6d2aece25736d3f0c7b)
1 //===- bolt/Core/Exceptions.cpp - Helpers for C++ exceptions --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements functions for handling C++ exception meta data.
10 //
11 // Some of the code is taken from examples/ExceptionDemo
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "bolt/Core/Exceptions.h"
16 #include "bolt/Core/BinaryFunction.h"
17 #include "llvm/ADT/ArrayRef.h"
18 #include "llvm/ADT/Twine.h"
19 #include "llvm/BinaryFormat/Dwarf.h"
20 #include "llvm/DebugInfo/DWARF/DWARFDebugFrame.h"
21 #include "llvm/Support/Casting.h"
22 #include "llvm/Support/CommandLine.h"
23 #include "llvm/Support/Debug.h"
24 #include "llvm/Support/Errc.h"
25 #include "llvm/Support/LEB128.h"
26 #include "llvm/Support/MathExtras.h"
27 #include "llvm/Support/raw_ostream.h"
28 #include <map>
29 
30 #undef  DEBUG_TYPE
31 #define DEBUG_TYPE "bolt-exceptions"
32 
33 using namespace llvm::dwarf;
34 
35 namespace opts {
36 
37 extern llvm::cl::OptionCategory BoltCategory;
38 
39 extern llvm::cl::opt<unsigned> Verbosity;
40 
41 static llvm::cl::opt<bool>
42 PrintExceptions("print-exceptions",
43   llvm::cl::desc("print exception handling data"),
44   llvm::cl::ZeroOrMore,
45   llvm::cl::Hidden,
46   llvm::cl::cat(BoltCategory));
47 
48 } // namespace opts
49 
50 namespace llvm {
51 namespace bolt {
52 
53 // Read and dump the .gcc_exception_table section entry.
54 //
55 // .gcc_except_table section contains a set of Language-Specific Data Areas -
56 // a fancy name for exception handling tables. There's one  LSDA entry per
57 // function. However, we can't actually tell which function LSDA refers to
58 // unless we parse .eh_frame entry that refers to the LSDA.
59 // Then inside LSDA most addresses are encoded relative to the function start,
60 // so we need the function context in order to get to real addresses.
61 //
62 // The best visual representation of the tables comprising LSDA and
63 // relationships between them is illustrated at:
64 //   https://github.com/itanium-cxx-abi/cxx-abi/blob/master/exceptions.pdf
65 // Keep in mind that GCC implementation deviates slightly from that document.
66 //
67 // To summarize, there are 4 tables in LSDA: call site table, actions table,
68 // types table, and types index table (for indirection). The main table contains
69 // call site entries. Each call site includes a PC range that can throw an
70 // exception, a handler (landing pad), and a reference to an entry in the action
71 // table. The handler and/or action could be 0. The action entry is a head
72 // of a list of actions associated with a call site. The action table contains
73 // all such lists (it could be optimized to share list tails). Each action could
74 // be either to catch an exception of a given type, to perform a cleanup, or to
75 // propagate the exception after filtering it out (e.g. to make sure function
76 // exception specification is not violated). Catch action contains a reference
77 // to an entry in the type table, and filter action refers to an entry in the
78 // type index table to encode a set of types to filter.
79 //
80 // Call site table follows LSDA header. Action table immediately follows the
81 // call site table.
82 //
83 // Both types table and type index table start at the same location, but they
84 // grow in opposite directions (types go up, indices go down). The beginning of
85 // these tables is encoded in LSDA header. Sizes for both of the tables are not
86 // included anywhere.
87 //
88 // We have to parse all of the tables to determine their sizes. Then we have
89 // to parse the call site table and associate discovered information with
90 // actual call instructions and landing pad blocks.
91 //
92 // For the purpose of rewriting exception handling tables, we can reuse action,
93 // and type index tables in their original binary format.
94 //
95 // Type table could be encoded using position-independent references, and thus
96 // may require relocation.
97 //
98 // Ideally we should be able to re-write LSDA in-place, without the need to
99 // allocate a new space for it. Sadly there's no guarantee that the new call
100 // site table will be the same size as GCC uses uleb encodings for PC offsets.
101 //
102 // Note: some functions have LSDA entries with 0 call site entries.
103 void BinaryFunction::parseLSDA(ArrayRef<uint8_t> LSDASectionData,
104                                uint64_t LSDASectionAddress) {
105   assert(CurrentState == State::Disassembled && "unexpected function state");
106 
107   if (!getLSDAAddress())
108     return;
109 
110   DWARFDataExtractor Data(
111       StringRef(reinterpret_cast<const char *>(LSDASectionData.data()),
112                 LSDASectionData.size()),
113       BC.DwCtx->getDWARFObj().isLittleEndian(), 8);
114   uint64_t Offset = getLSDAAddress() - LSDASectionAddress;
115   assert(Data.isValidOffset(Offset) && "wrong LSDA address");
116 
117   uint8_t LPStartEncoding = Data.getU8(&Offset);
118   uint64_t LPStart = 0;
119   if (Optional<uint64_t> MaybeLPStart = Data.getEncodedPointer(
120           &Offset, LPStartEncoding, Offset + LSDASectionAddress))
121     LPStart = *MaybeLPStart;
122 
123   assert(LPStart == 0 && "support for split functions not implemented");
124 
125   const uint8_t TTypeEncoding = Data.getU8(&Offset);
126   size_t TTypeEncodingSize = 0;
127   uintptr_t TTypeEnd = 0;
128   if (TTypeEncoding != DW_EH_PE_omit) {
129     TTypeEnd = Data.getULEB128(&Offset);
130     TTypeEncodingSize = BC.getDWARFEncodingSize(TTypeEncoding);
131   }
132 
133   if (opts::PrintExceptions) {
134     outs() << "[LSDA at 0x" << Twine::utohexstr(getLSDAAddress())
135            << " for function " << *this << "]:\n";
136     outs() << "LPStart Encoding = 0x" << Twine::utohexstr(LPStartEncoding)
137            << '\n';
138     outs() << "LPStart = 0x" << Twine::utohexstr(LPStart) << '\n';
139     outs() << "TType Encoding = 0x" << Twine::utohexstr(TTypeEncoding) << '\n';
140     outs() << "TType End = " << TTypeEnd << '\n';
141   }
142 
143   // Table to store list of indices in type table. Entries are uleb128 values.
144   const uint64_t TypeIndexTableStart = Offset + TTypeEnd;
145 
146   // Offset past the last decoded index.
147   uint64_t MaxTypeIndexTableOffset = 0;
148 
149   // Max positive index used in type table.
150   unsigned MaxTypeIndex = 0;
151 
152   // The actual type info table starts at the same location, but grows in
153   // opposite direction. TTypeEncoding is used to encode stored values.
154   const uint64_t TypeTableStart = Offset + TTypeEnd;
155 
156   uint8_t CallSiteEncoding = Data.getU8(&Offset);
157   uint32_t CallSiteTableLength = Data.getULEB128(&Offset);
158   uint64_t CallSiteTableStart = Offset;
159   uint64_t CallSiteTableEnd = CallSiteTableStart + CallSiteTableLength;
160   uint64_t CallSitePtr = CallSiteTableStart;
161   uint64_t ActionTableStart = CallSiteTableEnd;
162 
163   if (opts::PrintExceptions) {
164     outs() << "CallSite Encoding = " << (unsigned)CallSiteEncoding << '\n';
165     outs() << "CallSite table length = " << CallSiteTableLength << '\n';
166     outs() << '\n';
167   }
168 
169   this->HasEHRanges = CallSitePtr < CallSiteTableEnd;
170   const uint64_t RangeBase = getAddress();
171   while (CallSitePtr < CallSiteTableEnd) {
172     uint64_t Start = *Data.getEncodedPointer(&CallSitePtr, CallSiteEncoding,
173                                              CallSitePtr + LSDASectionAddress);
174     uint64_t Length = *Data.getEncodedPointer(&CallSitePtr, CallSiteEncoding,
175                                               CallSitePtr + LSDASectionAddress);
176     uint64_t LandingPad = *Data.getEncodedPointer(
177         &CallSitePtr, CallSiteEncoding, CallSitePtr + LSDASectionAddress);
178     uint64_t ActionEntry = Data.getULEB128(&CallSitePtr);
179 
180     if (opts::PrintExceptions) {
181       outs() << "Call Site: [0x" << Twine::utohexstr(RangeBase + Start)
182              << ", 0x" << Twine::utohexstr(RangeBase + Start + Length)
183              << "); landing pad: 0x" << Twine::utohexstr(LPStart + LandingPad)
184              << "; action entry: 0x" << Twine::utohexstr(ActionEntry) << "\n";
185       outs() << "  current offset is " << (CallSitePtr - CallSiteTableStart)
186              << '\n';
187     }
188 
189     // Create a handler entry if necessary.
190     MCSymbol *LPSymbol = nullptr;
191     if (LandingPad) {
192       if (!getInstructionAtOffset(LandingPad)) {
193         if (opts::Verbosity >= 1)
194           errs() << "BOLT-WARNING: landing pad " << Twine::utohexstr(LandingPad)
195                  << " not pointing to an instruction in function " << *this
196                  << " - ignoring.\n";
197       } else {
198         auto Label = Labels.find(LandingPad);
199         if (Label != Labels.end()) {
200           LPSymbol = Label->second;
201         } else {
202           LPSymbol = BC.Ctx->createNamedTempSymbol("LP");
203           Labels[LandingPad] = LPSymbol;
204         }
205       }
206     }
207 
208     // Mark all call instructions in the range.
209     auto II = Instructions.find(Start);
210     auto IE = Instructions.end();
211     assert(II != IE && "exception range not pointing to an instruction");
212     do {
213       MCInst &Instruction = II->second;
214       if (BC.MIB->isCall(Instruction) &&
215           !BC.MIB->getConditionalTailCall(Instruction)) {
216         assert(!BC.MIB->isInvoke(Instruction) &&
217                "overlapping exception ranges detected");
218         // Add extra operands to a call instruction making it an invoke from
219         // now on.
220         BC.MIB->addEHInfo(Instruction,
221                           MCPlus::MCLandingPad(LPSymbol, ActionEntry));
222       }
223       ++II;
224     } while (II != IE && II->first < Start + Length);
225 
226     if (ActionEntry != 0) {
227       auto printType = [&](int Index, raw_ostream &OS) {
228         assert(Index > 0 && "only positive indices are valid");
229         uint64_t TTEntry = TypeTableStart - Index * TTypeEncodingSize;
230         const uint64_t TTEntryAddress = TTEntry + LSDASectionAddress;
231         uint64_t TypeAddress =
232             *Data.getEncodedPointer(&TTEntry, TTypeEncoding, TTEntryAddress);
233         if ((TTypeEncoding & DW_EH_PE_pcrel) && TypeAddress == TTEntryAddress)
234           TypeAddress = 0;
235         if (TypeAddress == 0) {
236           OS << "<all>";
237           return;
238         }
239         if (TTypeEncoding & DW_EH_PE_indirect) {
240           ErrorOr<uint64_t> PointerOrErr = BC.getPointerAtAddress(TypeAddress);
241           assert(PointerOrErr && "failed to decode indirect address");
242           TypeAddress = *PointerOrErr;
243         }
244         if (BinaryData *TypeSymBD = BC.getBinaryDataAtAddress(TypeAddress))
245           OS << TypeSymBD->getName();
246         else
247           OS << "0x" << Twine::utohexstr(TypeAddress);
248       };
249       if (opts::PrintExceptions)
250         outs() << "    actions: ";
251       uint64_t ActionPtr = ActionTableStart + ActionEntry - 1;
252       int64_t ActionType;
253       int64_t ActionNext;
254       const char *Sep = "";
255       do {
256         ActionType = Data.getSLEB128(&ActionPtr);
257         const uint32_t Self = ActionPtr;
258         ActionNext = Data.getSLEB128(&ActionPtr);
259         if (opts::PrintExceptions)
260           outs() << Sep << "(" << ActionType << ", " << ActionNext << ") ";
261         if (ActionType == 0) {
262           if (opts::PrintExceptions)
263             outs() << "cleanup";
264         } else if (ActionType > 0) {
265           // It's an index into a type table.
266           MaxTypeIndex =
267               std::max(MaxTypeIndex, static_cast<unsigned>(ActionType));
268           if (opts::PrintExceptions) {
269             outs() << "catch type ";
270             printType(ActionType, outs());
271           }
272         } else { // ActionType < 0
273           if (opts::PrintExceptions)
274             outs() << "filter exception types ";
275           const char *TSep = "";
276           // ActionType is a negative *byte* offset into *uleb128-encoded* table
277           // of indices with base 1.
278           // E.g. -1 means offset 0, -2 is offset 1, etc. The indices are
279           // encoded using uleb128 thus we cannot directly dereference them.
280           uint64_t TypeIndexTablePtr = TypeIndexTableStart - ActionType - 1;
281           while (uint64_t Index = Data.getULEB128(&TypeIndexTablePtr)) {
282             MaxTypeIndex = std::max(MaxTypeIndex, static_cast<unsigned>(Index));
283             if (opts::PrintExceptions) {
284               outs() << TSep;
285               printType(Index, outs());
286               TSep = ", ";
287             }
288           }
289           MaxTypeIndexTableOffset = std::max(
290               MaxTypeIndexTableOffset, TypeIndexTablePtr - TypeIndexTableStart);
291         }
292 
293         Sep = "; ";
294 
295         ActionPtr = Self + ActionNext;
296       } while (ActionNext);
297       if (opts::PrintExceptions)
298         outs() << '\n';
299     }
300   }
301   if (opts::PrintExceptions)
302     outs() << '\n';
303 
304   assert(TypeIndexTableStart + MaxTypeIndexTableOffset <=
305              Data.getData().size() &&
306          "LSDA entry has crossed section boundary");
307 
308   if (TTypeEnd) {
309     LSDAActionTable = LSDASectionData.slice(
310         ActionTableStart, TypeIndexTableStart -
311                               MaxTypeIndex * TTypeEncodingSize -
312                               ActionTableStart);
313     for (unsigned Index = 1; Index <= MaxTypeIndex; ++Index) {
314       uint64_t TTEntry = TypeTableStart - Index * TTypeEncodingSize;
315       const uint64_t TTEntryAddress = TTEntry + LSDASectionAddress;
316       uint64_t TypeAddress =
317           *Data.getEncodedPointer(&TTEntry, TTypeEncoding, TTEntryAddress);
318       if ((TTypeEncoding & DW_EH_PE_pcrel) && (TypeAddress == TTEntryAddress))
319         TypeAddress = 0;
320       if (TTypeEncoding & DW_EH_PE_indirect) {
321         LSDATypeAddressTable.emplace_back(TypeAddress);
322         if (TypeAddress) {
323           ErrorOr<uint64_t> PointerOrErr = BC.getPointerAtAddress(TypeAddress);
324           assert(PointerOrErr && "failed to decode indirect address");
325           TypeAddress = *PointerOrErr;
326         }
327       }
328       LSDATypeTable.emplace_back(TypeAddress);
329     }
330     LSDATypeIndexTable =
331         LSDASectionData.slice(TypeIndexTableStart, MaxTypeIndexTableOffset);
332   }
333 }
334 
335 void BinaryFunction::updateEHRanges() {
336   if (getSize() == 0)
337     return;
338 
339   assert(CurrentState == State::CFG_Finalized && "unexpected state");
340 
341   // Build call sites table.
342   struct EHInfo {
343     const MCSymbol *LP; // landing pad
344     uint64_t Action;
345   };
346 
347   // If previous call can throw, this is its exception handler.
348   EHInfo PreviousEH = {nullptr, 0};
349 
350   // Marker for the beginning of exceptions range.
351   const MCSymbol *StartRange = nullptr;
352 
353   // Indicates whether the start range is located in a cold part.
354   bool IsStartInCold = false;
355 
356   // Have we crossed hot/cold border for split functions?
357   bool SeenCold = false;
358 
359   // Sites to update - either regular or cold.
360   CallSitesType *Sites = &CallSites;
361 
362   for (BinaryBasicBlock *&BB : BasicBlocksLayout) {
363 
364     if (BB->isCold() && !SeenCold) {
365       SeenCold = true;
366 
367       // Close the range (if any) and change the target call sites.
368       if (StartRange) {
369         Sites->emplace_back(CallSite{StartRange, getFunctionEndLabel(),
370                                      PreviousEH.LP, PreviousEH.Action});
371       }
372       Sites = &ColdCallSites;
373 
374       // Reset the range.
375       StartRange = nullptr;
376       PreviousEH = {nullptr, 0};
377     }
378 
379     for (auto II = BB->begin(); II != BB->end(); ++II) {
380       if (!BC.MIB->isCall(*II))
381         continue;
382 
383       // Instruction can throw an exception that should be handled.
384       const bool Throws = BC.MIB->isInvoke(*II);
385 
386       // Ignore the call if it's a continuation of a no-throw gap.
387       if (!Throws && !StartRange)
388         continue;
389 
390       // Extract exception handling information from the instruction.
391       const MCSymbol *LP = nullptr;
392       uint64_t Action = 0;
393       if (const Optional<MCPlus::MCLandingPad> EHInfo = BC.MIB->getEHInfo(*II))
394         std::tie(LP, Action) = *EHInfo;
395 
396       // No action if the exception handler has not changed.
397       if (Throws && StartRange && PreviousEH.LP == LP &&
398           PreviousEH.Action == Action)
399         continue;
400 
401       // Same symbol is used for the beginning and the end of the range.
402       const MCSymbol *EHSymbol;
403       MCInst EHLabel;
404       {
405         std::unique_lock<std::shared_timed_mutex> Lock(BC.CtxMutex);
406         EHSymbol = BC.Ctx->createNamedTempSymbol("EH");
407         BC.MIB->createEHLabel(EHLabel, EHSymbol, BC.Ctx.get());
408       }
409 
410       II = std::next(BB->insertPseudoInstr(II, EHLabel));
411 
412       // At this point we could be in one of the following states:
413       //
414       // I. Exception handler has changed and we need to close previous range
415       //    and start a new one.
416       //
417       // II. Start a new exception range after the gap.
418       //
419       // III. Close current exception range and start a new gap.
420       const MCSymbol *EndRange;
421       if (StartRange) {
422         // I, III:
423         EndRange = EHSymbol;
424       } else {
425         // II:
426         StartRange = EHSymbol;
427         IsStartInCold = SeenCold;
428         EndRange = nullptr;
429       }
430 
431       // Close the previous range.
432       if (EndRange) {
433         Sites->emplace_back(
434             CallSite{StartRange, EndRange, PreviousEH.LP, PreviousEH.Action});
435       }
436 
437       if (Throws) {
438         // I, II:
439         StartRange = EHSymbol;
440         IsStartInCold = SeenCold;
441         PreviousEH = EHInfo{LP, Action};
442       } else {
443         StartRange = nullptr;
444       }
445     }
446   }
447 
448   // Check if we need to close the range.
449   if (StartRange) {
450     assert((!isSplit() || Sites == &ColdCallSites) && "sites mismatch");
451     const MCSymbol *EndRange =
452         IsStartInCold ? getFunctionColdEndLabel() : getFunctionEndLabel();
453     Sites->emplace_back(
454         CallSite{StartRange, EndRange, PreviousEH.LP, PreviousEH.Action});
455   }
456 }
457 
458 const uint8_t DWARF_CFI_PRIMARY_OPCODE_MASK = 0xc0;
459 
460 CFIReaderWriter::CFIReaderWriter(const DWARFDebugFrame &EHFrame) {
461   // Prepare FDEs for fast lookup
462   for (const dwarf::FrameEntry &Entry : EHFrame.entries()) {
463     const auto *CurFDE = dyn_cast<dwarf::FDE>(&Entry);
464     // Skip CIEs.
465     if (!CurFDE)
466       continue;
467     // There could me multiple FDEs with the same initial address, and perhaps
468     // different sizes (address ranges). Use the first entry with non-zero size.
469     auto FDEI = FDEs.lower_bound(CurFDE->getInitialLocation());
470     if (FDEI != FDEs.end() && FDEI->first == CurFDE->getInitialLocation()) {
471       if (CurFDE->getAddressRange()) {
472         if (FDEI->second->getAddressRange() == 0) {
473           FDEI->second = CurFDE;
474         } else if (opts::Verbosity > 0) {
475           errs() << "BOLT-WARNING: different FDEs for function at 0x"
476                  << Twine::utohexstr(FDEI->first)
477                  << " detected; sizes: " << FDEI->second->getAddressRange()
478                  << " and " << CurFDE->getAddressRange() << '\n';
479         }
480       }
481     } else {
482       FDEs.emplace_hint(FDEI, CurFDE->getInitialLocation(), CurFDE);
483     }
484   }
485 }
486 
487 bool CFIReaderWriter::fillCFIInfoFor(BinaryFunction &Function) const {
488   uint64_t Address = Function.getAddress();
489   auto I = FDEs.find(Address);
490   // Ignore zero-length FDE ranges.
491   if (I == FDEs.end() || !I->second->getAddressRange())
492     return true;
493 
494   const FDE &CurFDE = *I->second;
495   Optional<uint64_t> LSDA = CurFDE.getLSDAAddress();
496   Function.setLSDAAddress(LSDA ? *LSDA : 0);
497 
498   uint64_t Offset = 0;
499   uint64_t CodeAlignment = CurFDE.getLinkedCIE()->getCodeAlignmentFactor();
500   uint64_t DataAlignment = CurFDE.getLinkedCIE()->getDataAlignmentFactor();
501   if (CurFDE.getLinkedCIE()->getPersonalityAddress()) {
502     Function.setPersonalityFunction(
503         *CurFDE.getLinkedCIE()->getPersonalityAddress());
504     Function.setPersonalityEncoding(
505         *CurFDE.getLinkedCIE()->getPersonalityEncoding());
506   }
507 
508   auto decodeFrameInstruction = [&Function, &Offset, Address, CodeAlignment,
509                                  DataAlignment](
510                                     const CFIProgram::Instruction &Instr) {
511     uint8_t Opcode = Instr.Opcode;
512     if (Opcode & DWARF_CFI_PRIMARY_OPCODE_MASK)
513       Opcode &= DWARF_CFI_PRIMARY_OPCODE_MASK;
514     switch (Instr.Opcode) {
515     case DW_CFA_nop:
516       break;
517     case DW_CFA_advance_loc4:
518     case DW_CFA_advance_loc2:
519     case DW_CFA_advance_loc1:
520     case DW_CFA_advance_loc:
521       // Advance our current address
522       Offset += CodeAlignment * int64_t(Instr.Ops[0]);
523       break;
524     case DW_CFA_offset_extended_sf:
525       Function.addCFIInstruction(
526           Offset,
527           MCCFIInstruction::createOffset(
528               nullptr, Instr.Ops[0], DataAlignment * int64_t(Instr.Ops[1])));
529       break;
530     case DW_CFA_offset_extended:
531     case DW_CFA_offset:
532       Function.addCFIInstruction(
533           Offset, MCCFIInstruction::createOffset(nullptr, Instr.Ops[0],
534                                                  DataAlignment * Instr.Ops[1]));
535       break;
536     case DW_CFA_restore_extended:
537     case DW_CFA_restore:
538       Function.addCFIInstruction(
539           Offset, MCCFIInstruction::createRestore(nullptr, Instr.Ops[0]));
540       break;
541     case DW_CFA_set_loc:
542       assert(Instr.Ops[0] >= Address && "set_loc out of function bounds");
543       assert(Instr.Ops[0] <= Address + Function.getSize() &&
544              "set_loc out of function bounds");
545       Offset = Instr.Ops[0] - Address;
546       break;
547 
548     case DW_CFA_undefined:
549       Function.addCFIInstruction(
550           Offset, MCCFIInstruction::createUndefined(nullptr, Instr.Ops[0]));
551       break;
552     case DW_CFA_same_value:
553       Function.addCFIInstruction(
554           Offset, MCCFIInstruction::createSameValue(nullptr, Instr.Ops[0]));
555       break;
556     case DW_CFA_register:
557       Function.addCFIInstruction(
558           Offset, MCCFIInstruction::createRegister(nullptr, Instr.Ops[0],
559                                                    Instr.Ops[1]));
560       break;
561     case DW_CFA_remember_state:
562       Function.addCFIInstruction(
563           Offset, MCCFIInstruction::createRememberState(nullptr));
564       break;
565     case DW_CFA_restore_state:
566       Function.addCFIInstruction(Offset,
567                                  MCCFIInstruction::createRestoreState(nullptr));
568       break;
569     case DW_CFA_def_cfa:
570       Function.addCFIInstruction(
571           Offset,
572           MCCFIInstruction::cfiDefCfa(nullptr, Instr.Ops[0], Instr.Ops[1]));
573       break;
574     case DW_CFA_def_cfa_sf:
575       Function.addCFIInstruction(
576           Offset,
577           MCCFIInstruction::cfiDefCfa(nullptr, Instr.Ops[0],
578                                       DataAlignment * int64_t(Instr.Ops[1])));
579       break;
580     case DW_CFA_def_cfa_register:
581       Function.addCFIInstruction(Offset, MCCFIInstruction::createDefCfaRegister(
582                                              nullptr, Instr.Ops[0]));
583       break;
584     case DW_CFA_def_cfa_offset:
585       Function.addCFIInstruction(
586           Offset, MCCFIInstruction::cfiDefCfaOffset(nullptr, Instr.Ops[0]));
587       break;
588     case DW_CFA_def_cfa_offset_sf:
589       Function.addCFIInstruction(
590           Offset, MCCFIInstruction::cfiDefCfaOffset(
591                       nullptr, DataAlignment * int64_t(Instr.Ops[0])));
592       break;
593     case DW_CFA_GNU_args_size:
594       Function.addCFIInstruction(
595           Offset, MCCFIInstruction::createGnuArgsSize(nullptr, Instr.Ops[0]));
596       Function.setUsesGnuArgsSize();
597       break;
598     case DW_CFA_val_offset_sf:
599     case DW_CFA_val_offset:
600       if (opts::Verbosity >= 1) {
601         errs() << "BOLT-WARNING: DWARF val_offset() unimplemented\n";
602       }
603       return false;
604     case DW_CFA_def_cfa_expression:
605     case DW_CFA_val_expression:
606     case DW_CFA_expression: {
607       StringRef ExprBytes = Instr.Expression->getData();
608       std::string Str;
609       raw_string_ostream OS(Str);
610       // Manually encode this instruction using CFI escape
611       OS << Opcode;
612       if (Opcode != DW_CFA_def_cfa_expression)
613         encodeULEB128(Instr.Ops[0], OS);
614       encodeULEB128(ExprBytes.size(), OS);
615       OS << ExprBytes;
616       Function.addCFIInstruction(
617           Offset, MCCFIInstruction::createEscape(nullptr, OS.str()));
618       break;
619     }
620     case DW_CFA_MIPS_advance_loc8:
621       if (opts::Verbosity >= 1)
622         errs() << "BOLT-WARNING: DW_CFA_MIPS_advance_loc unimplemented\n";
623       return false;
624     case DW_CFA_GNU_window_save:
625     case DW_CFA_lo_user:
626     case DW_CFA_hi_user:
627       if (opts::Verbosity >= 1) {
628         errs() << "BOLT-WARNING: DW_CFA_GNU_* and DW_CFA_*_user "
629                   "unimplemented\n";
630       }
631       return false;
632     default:
633       if (opts::Verbosity >= 1) {
634         errs() << "BOLT-WARNING: Unrecognized CFI instruction: " << Instr.Opcode
635                << '\n';
636       }
637       return false;
638     }
639 
640     return true;
641   };
642 
643   for (const CFIProgram::Instruction &Instr : CurFDE.getLinkedCIE()->cfis())
644     if (!decodeFrameInstruction(Instr))
645       return false;
646 
647   for (const CFIProgram::Instruction &Instr : CurFDE.cfis())
648     if (!decodeFrameInstruction(Instr))
649       return false;
650 
651   return true;
652 }
653 
654 std::vector<char> CFIReaderWriter::generateEHFrameHeader(
655     const DWARFDebugFrame &OldEHFrame, const DWARFDebugFrame &NewEHFrame,
656     uint64_t EHFrameHeaderAddress,
657     std::vector<uint64_t> &FailedAddresses) const {
658   // Common PC -> FDE map to be written into .eh_frame_hdr.
659   std::map<uint64_t, uint64_t> PCToFDE;
660 
661   // Presort array for binary search.
662   std::sort(FailedAddresses.begin(), FailedAddresses.end());
663 
664   // Initialize PCToFDE using NewEHFrame.
665   for (dwarf::FrameEntry &Entry : NewEHFrame.entries()) {
666     const dwarf::FDE *FDE = dyn_cast<dwarf::FDE>(&Entry);
667     if (FDE == nullptr)
668       continue;
669     const uint64_t FuncAddress = FDE->getInitialLocation();
670     const uint64_t FDEAddress =
671         NewEHFrame.getEHFrameAddress() + FDE->getOffset();
672 
673     // Ignore unused FDEs.
674     if (FuncAddress == 0)
675       continue;
676 
677     // Add the address to the map unless we failed to write it.
678     if (!std::binary_search(FailedAddresses.begin(), FailedAddresses.end(),
679                             FuncAddress)) {
680       LLVM_DEBUG(dbgs() << "BOLT-DEBUG: FDE for function at 0x"
681                         << Twine::utohexstr(FuncAddress) << " is at 0x"
682                         << Twine::utohexstr(FDEAddress) << '\n');
683       PCToFDE[FuncAddress] = FDEAddress;
684     }
685   };
686 
687   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: new .eh_frame contains "
688                     << std::distance(NewEHFrame.entries().begin(),
689                                      NewEHFrame.entries().end())
690                     << " entries\n");
691 
692   // Add entries from the original .eh_frame corresponding to the functions
693   // that we did not update.
694   for (const dwarf::FrameEntry &Entry : OldEHFrame) {
695     const dwarf::FDE *FDE = dyn_cast<dwarf::FDE>(&Entry);
696     if (FDE == nullptr)
697       continue;
698     const uint64_t FuncAddress = FDE->getInitialLocation();
699     const uint64_t FDEAddress =
700         OldEHFrame.getEHFrameAddress() + FDE->getOffset();
701 
702     // Add the address if we failed to write it.
703     if (PCToFDE.count(FuncAddress) == 0) {
704       LLVM_DEBUG(dbgs() << "BOLT-DEBUG: old FDE for function at 0x"
705                         << Twine::utohexstr(FuncAddress) << " is at 0x"
706                         << Twine::utohexstr(FDEAddress) << '\n');
707       PCToFDE[FuncAddress] = FDEAddress;
708     }
709   };
710 
711   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: old .eh_frame contains "
712                     << std::distance(OldEHFrame.entries().begin(),
713                                      OldEHFrame.entries().end())
714                     << " entries\n");
715 
716   // Generate a new .eh_frame_hdr based on the new map.
717 
718   // Header plus table of entries of size 8 bytes.
719   std::vector<char> EHFrameHeader(12 + PCToFDE.size() * 8);
720 
721   // Version is 1.
722   EHFrameHeader[0] = 1;
723   // Encoding of the eh_frame pointer.
724   EHFrameHeader[1] = DW_EH_PE_pcrel | DW_EH_PE_sdata4;
725   // Encoding of the count field to follow.
726   EHFrameHeader[2] = DW_EH_PE_udata4;
727   // Encoding of the table entries - 4-byte offset from the start of the header.
728   EHFrameHeader[3] = DW_EH_PE_datarel | DW_EH_PE_sdata4;
729 
730   // Address of eh_frame. Use the new one.
731   support::ulittle32_t::ref(EHFrameHeader.data() + 4) =
732       NewEHFrame.getEHFrameAddress() - (EHFrameHeaderAddress + 4);
733 
734   // Number of entries in the table (FDE count).
735   support::ulittle32_t::ref(EHFrameHeader.data() + 8) = PCToFDE.size();
736 
737   // Write the table at offset 12.
738   char *Ptr = EHFrameHeader.data();
739   uint32_t Offset = 12;
740   for (const auto &PCI : PCToFDE) {
741     int64_t InitialPCOffset = PCI.first - EHFrameHeaderAddress;
742     assert(isInt<32>(InitialPCOffset) && "PC offset out of bounds");
743     support::ulittle32_t::ref(Ptr + Offset) = InitialPCOffset;
744     Offset += 4;
745     int64_t FDEOffset = PCI.second - EHFrameHeaderAddress;
746     assert(isInt<32>(FDEOffset) && "FDE offset out of bounds");
747     support::ulittle32_t::ref(Ptr + Offset) = FDEOffset;
748     Offset += 4;
749   }
750 
751   return EHFrameHeader;
752 }
753 
754 Error EHFrameParser::parseCIE(uint64_t StartOffset) {
755   uint8_t Version = Data.getU8(&Offset);
756   const char *Augmentation = Data.getCStr(&Offset);
757   StringRef AugmentationString(Augmentation ? Augmentation : "");
758   uint8_t AddressSize =
759       Version < 4 ? Data.getAddressSize() : Data.getU8(&Offset);
760   Data.setAddressSize(AddressSize);
761   // Skip segment descriptor size
762   if (Version >= 4)
763     Offset += 1;
764   // Skip code alignment factor
765   Data.getULEB128(&Offset);
766   // Skip data alignment
767   Data.getSLEB128(&Offset);
768   // Skip return address register
769   if (Version == 1)
770     Offset += 1;
771   else
772     Data.getULEB128(&Offset);
773 
774   uint32_t FDEPointerEncoding = DW_EH_PE_absptr;
775   uint32_t LSDAPointerEncoding = DW_EH_PE_omit;
776   // Walk the augmentation string to get all the augmentation data.
777   for (unsigned i = 0, e = AugmentationString.size(); i != e; ++i) {
778     switch (AugmentationString[i]) {
779     default:
780       return createStringError(
781           errc::invalid_argument,
782           "unknown augmentation character in entry at 0x%" PRIx64, StartOffset);
783     case 'L':
784       LSDAPointerEncoding = Data.getU8(&Offset);
785       break;
786     case 'P': {
787       uint32_t PersonalityEncoding = Data.getU8(&Offset);
788       Optional<uint64_t> Personality =
789           Data.getEncodedPointer(&Offset, PersonalityEncoding,
790                                  EHFrameAddress ? EHFrameAddress + Offset : 0);
791       // Patch personality address
792       if (Personality)
793         PatcherCallback(*Personality, Offset, PersonalityEncoding);
794       break;
795     }
796     case 'R':
797       FDEPointerEncoding = Data.getU8(&Offset);
798       break;
799     case 'z':
800       if (i)
801         return createStringError(
802             errc::invalid_argument,
803             "'z' must be the first character at 0x%" PRIx64, StartOffset);
804       // Skip augmentation length
805       Data.getULEB128(&Offset);
806       break;
807     case 'S':
808     case 'B':
809       break;
810     }
811   }
812   Entries.emplace_back(std::make_unique<CIEInfo>(
813       FDEPointerEncoding, LSDAPointerEncoding, AugmentationString));
814   CIEs[StartOffset] = &*Entries.back();
815   return Error::success();
816 }
817 
818 Error EHFrameParser::parseFDE(uint64_t CIEPointer,
819                               uint64_t StartStructureOffset) {
820   Optional<uint64_t> LSDAAddress;
821   CIEInfo *Cie = CIEs[StartStructureOffset - CIEPointer];
822 
823   // The address size is encoded in the CIE we reference.
824   if (!Cie)
825     return createStringError(errc::invalid_argument,
826                              "parsing FDE data at 0x%" PRIx64
827                              " failed due to missing CIE",
828                              StartStructureOffset);
829   // Patch initial location
830   if (auto Val = Data.getEncodedPointer(&Offset, Cie->FDEPtrEncoding,
831                                         EHFrameAddress + Offset)) {
832     PatcherCallback(*Val, Offset, Cie->FDEPtrEncoding);
833   }
834   // Skip address range
835   Data.getEncodedPointer(&Offset, Cie->FDEPtrEncoding, 0);
836 
837   // Process augmentation data for this FDE.
838   StringRef AugmentationString = Cie->AugmentationString;
839   if (!AugmentationString.empty() && Cie->LSDAPtrEncoding != DW_EH_PE_omit) {
840     // Skip augmentation length
841     Data.getULEB128(&Offset);
842     LSDAAddress =
843         Data.getEncodedPointer(&Offset, Cie->LSDAPtrEncoding,
844                                EHFrameAddress ? Offset + EHFrameAddress : 0);
845     // Patch LSDA address
846     PatcherCallback(*LSDAAddress, Offset, Cie->LSDAPtrEncoding);
847   }
848   return Error::success();
849 }
850 
851 Error EHFrameParser::parse() {
852   while (Data.isValidOffset(Offset)) {
853     const uint64_t StartOffset = Offset;
854 
855     uint64_t Length;
856     DwarfFormat Format;
857     std::tie(Length, Format) = Data.getInitialLength(&Offset);
858 
859     // If the Length is 0, then this CIE is a terminator
860     if (Length == 0)
861       break;
862 
863     const uint64_t StartStructureOffset = Offset;
864     const uint64_t EndStructureOffset = Offset + Length;
865 
866     Error Err = Error::success();
867     const uint64_t Id = Data.getRelocatedValue(4, &Offset,
868                                                /*SectionIndex=*/nullptr, &Err);
869     if (Err)
870       return Err;
871 
872     if (!Id) {
873       if (Error Err = parseCIE(StartOffset))
874         return Err;
875     } else {
876       if (Error Err = parseFDE(Id, StartStructureOffset))
877         return Err;
878     }
879     Offset = EndStructureOffset;
880   }
881 
882   return Error::success();
883 }
884 
885 Error EHFrameParser::parse(DWARFDataExtractor Data, uint64_t EHFrameAddress,
886                            PatcherCallbackTy PatcherCallback) {
887   EHFrameParser Parser(Data, EHFrameAddress, PatcherCallback);
888   return Parser.parse();
889 }
890 
891 } // namespace bolt
892 } // namespace llvm
893