xref: /llvm-project/bolt/lib/Core/Exceptions.cpp (revision 0b7e8baf83befdb9cd7c282d943492581f2dd17c)
1 //===- bolt/Core/Exceptions.cpp - Helpers for C++ exceptions --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements functions for handling C++ exception meta data.
10 //
11 // Some of the code is taken from examples/ExceptionDemo
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "bolt/Core/Exceptions.h"
16 #include "bolt/Core/BinaryFunction.h"
17 #include "llvm/ADT/ArrayRef.h"
18 #include "llvm/ADT/Twine.h"
19 #include "llvm/BinaryFormat/Dwarf.h"
20 #include "llvm/DebugInfo/DWARF/DWARFDebugFrame.h"
21 #include "llvm/Support/Casting.h"
22 #include "llvm/Support/CommandLine.h"
23 #include "llvm/Support/Debug.h"
24 #include "llvm/Support/Errc.h"
25 #include "llvm/Support/LEB128.h"
26 #include "llvm/Support/MathExtras.h"
27 #include "llvm/Support/raw_ostream.h"
28 #include <map>
29 
30 #undef  DEBUG_TYPE
31 #define DEBUG_TYPE "bolt-exceptions"
32 
33 using namespace llvm::dwarf;
34 
35 namespace opts {
36 
37 extern llvm::cl::OptionCategory BoltCategory;
38 
39 extern llvm::cl::opt<unsigned> Verbosity;
40 
41 static llvm::cl::opt<bool>
42     PrintExceptions("print-exceptions",
43                     llvm::cl::desc("print exception handling data"),
44                     llvm::cl::Hidden, llvm::cl::cat(BoltCategory));
45 
46 } // namespace opts
47 
48 namespace llvm {
49 namespace bolt {
50 
51 // Read and dump the .gcc_exception_table section entry.
52 //
53 // .gcc_except_table section contains a set of Language-Specific Data Areas -
54 // a fancy name for exception handling tables. There's one  LSDA entry per
55 // function. However, we can't actually tell which function LSDA refers to
56 // unless we parse .eh_frame entry that refers to the LSDA.
57 // Then inside LSDA most addresses are encoded relative to the function start,
58 // so we need the function context in order to get to real addresses.
59 //
60 // The best visual representation of the tables comprising LSDA and
61 // relationships between them is illustrated at:
62 //   https://github.com/itanium-cxx-abi/cxx-abi/blob/master/exceptions.pdf
63 // Keep in mind that GCC implementation deviates slightly from that document.
64 //
65 // To summarize, there are 4 tables in LSDA: call site table, actions table,
66 // types table, and types index table (for indirection). The main table contains
67 // call site entries. Each call site includes a PC range that can throw an
68 // exception, a handler (landing pad), and a reference to an entry in the action
69 // table. The handler and/or action could be 0. The action entry is a head
70 // of a list of actions associated with a call site. The action table contains
71 // all such lists (it could be optimized to share list tails). Each action could
72 // be either to catch an exception of a given type, to perform a cleanup, or to
73 // propagate the exception after filtering it out (e.g. to make sure function
74 // exception specification is not violated). Catch action contains a reference
75 // to an entry in the type table, and filter action refers to an entry in the
76 // type index table to encode a set of types to filter.
77 //
78 // Call site table follows LSDA header. Action table immediately follows the
79 // call site table.
80 //
81 // Both types table and type index table start at the same location, but they
82 // grow in opposite directions (types go up, indices go down). The beginning of
83 // these tables is encoded in LSDA header. Sizes for both of the tables are not
84 // included anywhere.
85 //
86 // We have to parse all of the tables to determine their sizes. Then we have
87 // to parse the call site table and associate discovered information with
88 // actual call instructions and landing pad blocks.
89 //
90 // For the purpose of rewriting exception handling tables, we can reuse action,
91 // and type index tables in their original binary format.
92 //
93 // Type table could be encoded using position-independent references, and thus
94 // may require relocation.
95 //
96 // Ideally we should be able to re-write LSDA in-place, without the need to
97 // allocate a new space for it. Sadly there's no guarantee that the new call
98 // site table will be the same size as GCC uses uleb encodings for PC offsets.
99 //
100 // Note: some functions have LSDA entries with 0 call site entries.
101 void BinaryFunction::parseLSDA(ArrayRef<uint8_t> LSDASectionData,
102                                uint64_t LSDASectionAddress) {
103   assert(CurrentState == State::Disassembled && "unexpected function state");
104 
105   if (!getLSDAAddress())
106     return;
107 
108   DWARFDataExtractor Data(
109       StringRef(reinterpret_cast<const char *>(LSDASectionData.data()),
110                 LSDASectionData.size()),
111       BC.DwCtx->getDWARFObj().isLittleEndian(), 8);
112   uint64_t Offset = getLSDAAddress() - LSDASectionAddress;
113   assert(Data.isValidOffset(Offset) && "wrong LSDA address");
114 
115   uint8_t LPStartEncoding = Data.getU8(&Offset);
116   uint64_t LPStart = 0;
117   if (Optional<uint64_t> MaybeLPStart = Data.getEncodedPointer(
118           &Offset, LPStartEncoding, Offset + LSDASectionAddress))
119     LPStart = *MaybeLPStart;
120 
121   assert(LPStart == 0 && "support for split functions not implemented");
122 
123   const uint8_t TTypeEncoding = Data.getU8(&Offset);
124   size_t TTypeEncodingSize = 0;
125   uintptr_t TTypeEnd = 0;
126   if (TTypeEncoding != DW_EH_PE_omit) {
127     TTypeEnd = Data.getULEB128(&Offset);
128     TTypeEncodingSize = BC.getDWARFEncodingSize(TTypeEncoding);
129   }
130 
131   if (opts::PrintExceptions) {
132     outs() << "[LSDA at 0x" << Twine::utohexstr(getLSDAAddress())
133            << " for function " << *this << "]:\n";
134     outs() << "LPStart Encoding = 0x" << Twine::utohexstr(LPStartEncoding)
135            << '\n';
136     outs() << "LPStart = 0x" << Twine::utohexstr(LPStart) << '\n';
137     outs() << "TType Encoding = 0x" << Twine::utohexstr(TTypeEncoding) << '\n';
138     outs() << "TType End = " << TTypeEnd << '\n';
139   }
140 
141   // Table to store list of indices in type table. Entries are uleb128 values.
142   const uint64_t TypeIndexTableStart = Offset + TTypeEnd;
143 
144   // Offset past the last decoded index.
145   uint64_t MaxTypeIndexTableOffset = 0;
146 
147   // Max positive index used in type table.
148   unsigned MaxTypeIndex = 0;
149 
150   // The actual type info table starts at the same location, but grows in
151   // opposite direction. TTypeEncoding is used to encode stored values.
152   const uint64_t TypeTableStart = Offset + TTypeEnd;
153 
154   uint8_t CallSiteEncoding = Data.getU8(&Offset);
155   uint32_t CallSiteTableLength = Data.getULEB128(&Offset);
156   uint64_t CallSiteTableStart = Offset;
157   uint64_t CallSiteTableEnd = CallSiteTableStart + CallSiteTableLength;
158   uint64_t CallSitePtr = CallSiteTableStart;
159   uint64_t ActionTableStart = CallSiteTableEnd;
160 
161   if (opts::PrintExceptions) {
162     outs() << "CallSite Encoding = " << (unsigned)CallSiteEncoding << '\n';
163     outs() << "CallSite table length = " << CallSiteTableLength << '\n';
164     outs() << '\n';
165   }
166 
167   this->HasEHRanges = CallSitePtr < CallSiteTableEnd;
168   const uint64_t RangeBase = getAddress();
169   while (CallSitePtr < CallSiteTableEnd) {
170     uint64_t Start = *Data.getEncodedPointer(&CallSitePtr, CallSiteEncoding,
171                                              CallSitePtr + LSDASectionAddress);
172     uint64_t Length = *Data.getEncodedPointer(&CallSitePtr, CallSiteEncoding,
173                                               CallSitePtr + LSDASectionAddress);
174     uint64_t LandingPad = *Data.getEncodedPointer(
175         &CallSitePtr, CallSiteEncoding, CallSitePtr + LSDASectionAddress);
176     uint64_t ActionEntry = Data.getULEB128(&CallSitePtr);
177 
178     if (opts::PrintExceptions) {
179       outs() << "Call Site: [0x" << Twine::utohexstr(RangeBase + Start)
180              << ", 0x" << Twine::utohexstr(RangeBase + Start + Length)
181              << "); landing pad: 0x" << Twine::utohexstr(LPStart + LandingPad)
182              << "; action entry: 0x" << Twine::utohexstr(ActionEntry) << "\n";
183       outs() << "  current offset is " << (CallSitePtr - CallSiteTableStart)
184              << '\n';
185     }
186 
187     // Create a handler entry if necessary.
188     MCSymbol *LPSymbol = nullptr;
189     if (LandingPad) {
190       if (!getInstructionAtOffset(LandingPad)) {
191         if (opts::Verbosity >= 1)
192           errs() << "BOLT-WARNING: landing pad " << Twine::utohexstr(LandingPad)
193                  << " not pointing to an instruction in function " << *this
194                  << " - ignoring.\n";
195       } else {
196         auto Label = Labels.find(LandingPad);
197         if (Label != Labels.end()) {
198           LPSymbol = Label->second;
199         } else {
200           LPSymbol = BC.Ctx->createNamedTempSymbol("LP");
201           Labels[LandingPad] = LPSymbol;
202         }
203       }
204     }
205 
206     // Mark all call instructions in the range.
207     auto II = Instructions.find(Start);
208     auto IE = Instructions.end();
209     assert(II != IE && "exception range not pointing to an instruction");
210     do {
211       MCInst &Instruction = II->second;
212       if (BC.MIB->isCall(Instruction) &&
213           !BC.MIB->getConditionalTailCall(Instruction)) {
214         assert(!BC.MIB->isInvoke(Instruction) &&
215                "overlapping exception ranges detected");
216         // Add extra operands to a call instruction making it an invoke from
217         // now on.
218         BC.MIB->addEHInfo(Instruction,
219                           MCPlus::MCLandingPad(LPSymbol, ActionEntry));
220       }
221       ++II;
222     } while (II != IE && II->first < Start + Length);
223 
224     if (ActionEntry != 0) {
225       auto printType = [&](int Index, raw_ostream &OS) {
226         assert(Index > 0 && "only positive indices are valid");
227         uint64_t TTEntry = TypeTableStart - Index * TTypeEncodingSize;
228         const uint64_t TTEntryAddress = TTEntry + LSDASectionAddress;
229         uint64_t TypeAddress =
230             *Data.getEncodedPointer(&TTEntry, TTypeEncoding, TTEntryAddress);
231         if ((TTypeEncoding & DW_EH_PE_pcrel) && TypeAddress == TTEntryAddress)
232           TypeAddress = 0;
233         if (TypeAddress == 0) {
234           OS << "<all>";
235           return;
236         }
237         if (TTypeEncoding & DW_EH_PE_indirect) {
238           ErrorOr<uint64_t> PointerOrErr = BC.getPointerAtAddress(TypeAddress);
239           assert(PointerOrErr && "failed to decode indirect address");
240           TypeAddress = *PointerOrErr;
241         }
242         if (BinaryData *TypeSymBD = BC.getBinaryDataAtAddress(TypeAddress))
243           OS << TypeSymBD->getName();
244         else
245           OS << "0x" << Twine::utohexstr(TypeAddress);
246       };
247       if (opts::PrintExceptions)
248         outs() << "    actions: ";
249       uint64_t ActionPtr = ActionTableStart + ActionEntry - 1;
250       int64_t ActionType;
251       int64_t ActionNext;
252       const char *Sep = "";
253       do {
254         ActionType = Data.getSLEB128(&ActionPtr);
255         const uint32_t Self = ActionPtr;
256         ActionNext = Data.getSLEB128(&ActionPtr);
257         if (opts::PrintExceptions)
258           outs() << Sep << "(" << ActionType << ", " << ActionNext << ") ";
259         if (ActionType == 0) {
260           if (opts::PrintExceptions)
261             outs() << "cleanup";
262         } else if (ActionType > 0) {
263           // It's an index into a type table.
264           MaxTypeIndex =
265               std::max(MaxTypeIndex, static_cast<unsigned>(ActionType));
266           if (opts::PrintExceptions) {
267             outs() << "catch type ";
268             printType(ActionType, outs());
269           }
270         } else { // ActionType < 0
271           if (opts::PrintExceptions)
272             outs() << "filter exception types ";
273           const char *TSep = "";
274           // ActionType is a negative *byte* offset into *uleb128-encoded* table
275           // of indices with base 1.
276           // E.g. -1 means offset 0, -2 is offset 1, etc. The indices are
277           // encoded using uleb128 thus we cannot directly dereference them.
278           uint64_t TypeIndexTablePtr = TypeIndexTableStart - ActionType - 1;
279           while (uint64_t Index = Data.getULEB128(&TypeIndexTablePtr)) {
280             MaxTypeIndex = std::max(MaxTypeIndex, static_cast<unsigned>(Index));
281             if (opts::PrintExceptions) {
282               outs() << TSep;
283               printType(Index, outs());
284               TSep = ", ";
285             }
286           }
287           MaxTypeIndexTableOffset = std::max(
288               MaxTypeIndexTableOffset, TypeIndexTablePtr - TypeIndexTableStart);
289         }
290 
291         Sep = "; ";
292 
293         ActionPtr = Self + ActionNext;
294       } while (ActionNext);
295       if (opts::PrintExceptions)
296         outs() << '\n';
297     }
298   }
299   if (opts::PrintExceptions)
300     outs() << '\n';
301 
302   assert(TypeIndexTableStart + MaxTypeIndexTableOffset <=
303              Data.getData().size() &&
304          "LSDA entry has crossed section boundary");
305 
306   if (TTypeEnd) {
307     LSDAActionTable = LSDASectionData.slice(
308         ActionTableStart, TypeIndexTableStart -
309                               MaxTypeIndex * TTypeEncodingSize -
310                               ActionTableStart);
311     for (unsigned Index = 1; Index <= MaxTypeIndex; ++Index) {
312       uint64_t TTEntry = TypeTableStart - Index * TTypeEncodingSize;
313       const uint64_t TTEntryAddress = TTEntry + LSDASectionAddress;
314       uint64_t TypeAddress =
315           *Data.getEncodedPointer(&TTEntry, TTypeEncoding, TTEntryAddress);
316       if ((TTypeEncoding & DW_EH_PE_pcrel) && (TypeAddress == TTEntryAddress))
317         TypeAddress = 0;
318       if (TTypeEncoding & DW_EH_PE_indirect) {
319         LSDATypeAddressTable.emplace_back(TypeAddress);
320         if (TypeAddress) {
321           ErrorOr<uint64_t> PointerOrErr = BC.getPointerAtAddress(TypeAddress);
322           assert(PointerOrErr && "failed to decode indirect address");
323           TypeAddress = *PointerOrErr;
324         }
325       }
326       LSDATypeTable.emplace_back(TypeAddress);
327     }
328     LSDATypeIndexTable =
329         LSDASectionData.slice(TypeIndexTableStart, MaxTypeIndexTableOffset);
330   }
331 }
332 
333 void BinaryFunction::updateEHRanges() {
334   if (getSize() == 0)
335     return;
336 
337   assert(CurrentState == State::CFG_Finalized && "unexpected state");
338 
339   // Build call sites table.
340   struct EHInfo {
341     const MCSymbol *LP; // landing pad
342     uint64_t Action;
343   };
344 
345   // If previous call can throw, this is its exception handler.
346   EHInfo PreviousEH = {nullptr, 0};
347 
348   // Marker for the beginning of exceptions range.
349   const MCSymbol *StartRange = nullptr;
350 
351   // Indicates whether the start range is located in a cold part.
352   bool IsStartInCold = false;
353 
354   // Have we crossed hot/cold border for split functions?
355   bool SeenCold = false;
356 
357   // Sites to update - either regular or cold.
358   CallSitesType *Sites = &CallSites;
359 
360   for (BinaryBasicBlock *&BB : BasicBlocksLayout) {
361 
362     if (BB->isCold() && !SeenCold) {
363       SeenCold = true;
364 
365       // Close the range (if any) and change the target call sites.
366       if (StartRange) {
367         Sites->emplace_back(CallSite{StartRange, getFunctionEndLabel(),
368                                      PreviousEH.LP, PreviousEH.Action});
369       }
370       Sites = &ColdCallSites;
371 
372       // Reset the range.
373       StartRange = nullptr;
374       PreviousEH = {nullptr, 0};
375     }
376 
377     for (auto II = BB->begin(); II != BB->end(); ++II) {
378       if (!BC.MIB->isCall(*II))
379         continue;
380 
381       // Instruction can throw an exception that should be handled.
382       const bool Throws = BC.MIB->isInvoke(*II);
383 
384       // Ignore the call if it's a continuation of a no-throw gap.
385       if (!Throws && !StartRange)
386         continue;
387 
388       // Extract exception handling information from the instruction.
389       const MCSymbol *LP = nullptr;
390       uint64_t Action = 0;
391       if (const Optional<MCPlus::MCLandingPad> EHInfo = BC.MIB->getEHInfo(*II))
392         std::tie(LP, Action) = *EHInfo;
393 
394       // No action if the exception handler has not changed.
395       if (Throws && StartRange && PreviousEH.LP == LP &&
396           PreviousEH.Action == Action)
397         continue;
398 
399       // Same symbol is used for the beginning and the end of the range.
400       const MCSymbol *EHSymbol;
401       MCInst EHLabel;
402       {
403         std::unique_lock<std::shared_timed_mutex> Lock(BC.CtxMutex);
404         EHSymbol = BC.Ctx->createNamedTempSymbol("EH");
405         BC.MIB->createEHLabel(EHLabel, EHSymbol, BC.Ctx.get());
406       }
407 
408       II = std::next(BB->insertPseudoInstr(II, EHLabel));
409 
410       // At this point we could be in one of the following states:
411       //
412       // I. Exception handler has changed and we need to close previous range
413       //    and start a new one.
414       //
415       // II. Start a new exception range after the gap.
416       //
417       // III. Close current exception range and start a new gap.
418       const MCSymbol *EndRange;
419       if (StartRange) {
420         // I, III:
421         EndRange = EHSymbol;
422       } else {
423         // II:
424         StartRange = EHSymbol;
425         IsStartInCold = SeenCold;
426         EndRange = nullptr;
427       }
428 
429       // Close the previous range.
430       if (EndRange) {
431         Sites->emplace_back(
432             CallSite{StartRange, EndRange, PreviousEH.LP, PreviousEH.Action});
433       }
434 
435       if (Throws) {
436         // I, II:
437         StartRange = EHSymbol;
438         IsStartInCold = SeenCold;
439         PreviousEH = EHInfo{LP, Action};
440       } else {
441         StartRange = nullptr;
442       }
443     }
444   }
445 
446   // Check if we need to close the range.
447   if (StartRange) {
448     assert((!isSplit() || Sites == &ColdCallSites) && "sites mismatch");
449     const MCSymbol *EndRange =
450         IsStartInCold ? getFunctionColdEndLabel() : getFunctionEndLabel();
451     Sites->emplace_back(
452         CallSite{StartRange, EndRange, PreviousEH.LP, PreviousEH.Action});
453   }
454 }
455 
456 const uint8_t DWARF_CFI_PRIMARY_OPCODE_MASK = 0xc0;
457 
458 CFIReaderWriter::CFIReaderWriter(const DWARFDebugFrame &EHFrame) {
459   // Prepare FDEs for fast lookup
460   for (const dwarf::FrameEntry &Entry : EHFrame.entries()) {
461     const auto *CurFDE = dyn_cast<dwarf::FDE>(&Entry);
462     // Skip CIEs.
463     if (!CurFDE)
464       continue;
465     // There could me multiple FDEs with the same initial address, and perhaps
466     // different sizes (address ranges). Use the first entry with non-zero size.
467     auto FDEI = FDEs.lower_bound(CurFDE->getInitialLocation());
468     if (FDEI != FDEs.end() && FDEI->first == CurFDE->getInitialLocation()) {
469       if (CurFDE->getAddressRange()) {
470         if (FDEI->second->getAddressRange() == 0) {
471           FDEI->second = CurFDE;
472         } else if (opts::Verbosity > 0) {
473           errs() << "BOLT-WARNING: different FDEs for function at 0x"
474                  << Twine::utohexstr(FDEI->first)
475                  << " detected; sizes: " << FDEI->second->getAddressRange()
476                  << " and " << CurFDE->getAddressRange() << '\n';
477         }
478       }
479     } else {
480       FDEs.emplace_hint(FDEI, CurFDE->getInitialLocation(), CurFDE);
481     }
482   }
483 }
484 
485 bool CFIReaderWriter::fillCFIInfoFor(BinaryFunction &Function) const {
486   uint64_t Address = Function.getAddress();
487   auto I = FDEs.find(Address);
488   // Ignore zero-length FDE ranges.
489   if (I == FDEs.end() || !I->second->getAddressRange())
490     return true;
491 
492   const FDE &CurFDE = *I->second;
493   Optional<uint64_t> LSDA = CurFDE.getLSDAAddress();
494   Function.setLSDAAddress(LSDA ? *LSDA : 0);
495 
496   uint64_t Offset = Function.getFirstInstructionOffset();
497   uint64_t CodeAlignment = CurFDE.getLinkedCIE()->getCodeAlignmentFactor();
498   uint64_t DataAlignment = CurFDE.getLinkedCIE()->getDataAlignmentFactor();
499   if (CurFDE.getLinkedCIE()->getPersonalityAddress()) {
500     Function.setPersonalityFunction(
501         *CurFDE.getLinkedCIE()->getPersonalityAddress());
502     Function.setPersonalityEncoding(
503         *CurFDE.getLinkedCIE()->getPersonalityEncoding());
504   }
505 
506   auto decodeFrameInstruction = [&Function, &Offset, Address, CodeAlignment,
507                                  DataAlignment](
508                                     const CFIProgram::Instruction &Instr) {
509     uint8_t Opcode = Instr.Opcode;
510     if (Opcode & DWARF_CFI_PRIMARY_OPCODE_MASK)
511       Opcode &= DWARF_CFI_PRIMARY_OPCODE_MASK;
512     switch (Instr.Opcode) {
513     case DW_CFA_nop:
514       break;
515     case DW_CFA_advance_loc4:
516     case DW_CFA_advance_loc2:
517     case DW_CFA_advance_loc1:
518     case DW_CFA_advance_loc:
519       // Advance our current address
520       Offset += CodeAlignment * int64_t(Instr.Ops[0]);
521       break;
522     case DW_CFA_offset_extended_sf:
523       Function.addCFIInstruction(
524           Offset,
525           MCCFIInstruction::createOffset(
526               nullptr, Instr.Ops[0], DataAlignment * int64_t(Instr.Ops[1])));
527       break;
528     case DW_CFA_offset_extended:
529     case DW_CFA_offset:
530       Function.addCFIInstruction(
531           Offset, MCCFIInstruction::createOffset(nullptr, Instr.Ops[0],
532                                                  DataAlignment * Instr.Ops[1]));
533       break;
534     case DW_CFA_restore_extended:
535     case DW_CFA_restore:
536       Function.addCFIInstruction(
537           Offset, MCCFIInstruction::createRestore(nullptr, Instr.Ops[0]));
538       break;
539     case DW_CFA_set_loc:
540       assert(Instr.Ops[0] >= Address && "set_loc out of function bounds");
541       assert(Instr.Ops[0] <= Address + Function.getSize() &&
542              "set_loc out of function bounds");
543       Offset = Instr.Ops[0] - Address;
544       break;
545 
546     case DW_CFA_undefined:
547       Function.addCFIInstruction(
548           Offset, MCCFIInstruction::createUndefined(nullptr, Instr.Ops[0]));
549       break;
550     case DW_CFA_same_value:
551       Function.addCFIInstruction(
552           Offset, MCCFIInstruction::createSameValue(nullptr, Instr.Ops[0]));
553       break;
554     case DW_CFA_register:
555       Function.addCFIInstruction(
556           Offset, MCCFIInstruction::createRegister(nullptr, Instr.Ops[0],
557                                                    Instr.Ops[1]));
558       break;
559     case DW_CFA_remember_state:
560       Function.addCFIInstruction(
561           Offset, MCCFIInstruction::createRememberState(nullptr));
562       break;
563     case DW_CFA_restore_state:
564       Function.addCFIInstruction(Offset,
565                                  MCCFIInstruction::createRestoreState(nullptr));
566       break;
567     case DW_CFA_def_cfa:
568       Function.addCFIInstruction(
569           Offset,
570           MCCFIInstruction::cfiDefCfa(nullptr, Instr.Ops[0], Instr.Ops[1]));
571       break;
572     case DW_CFA_def_cfa_sf:
573       Function.addCFIInstruction(
574           Offset,
575           MCCFIInstruction::cfiDefCfa(nullptr, Instr.Ops[0],
576                                       DataAlignment * int64_t(Instr.Ops[1])));
577       break;
578     case DW_CFA_def_cfa_register:
579       Function.addCFIInstruction(Offset, MCCFIInstruction::createDefCfaRegister(
580                                              nullptr, Instr.Ops[0]));
581       break;
582     case DW_CFA_def_cfa_offset:
583       Function.addCFIInstruction(
584           Offset, MCCFIInstruction::cfiDefCfaOffset(nullptr, Instr.Ops[0]));
585       break;
586     case DW_CFA_def_cfa_offset_sf:
587       Function.addCFIInstruction(
588           Offset, MCCFIInstruction::cfiDefCfaOffset(
589                       nullptr, DataAlignment * int64_t(Instr.Ops[0])));
590       break;
591     case DW_CFA_GNU_args_size:
592       Function.addCFIInstruction(
593           Offset, MCCFIInstruction::createGnuArgsSize(nullptr, Instr.Ops[0]));
594       Function.setUsesGnuArgsSize();
595       break;
596     case DW_CFA_val_offset_sf:
597     case DW_CFA_val_offset:
598       if (opts::Verbosity >= 1) {
599         errs() << "BOLT-WARNING: DWARF val_offset() unimplemented\n";
600       }
601       return false;
602     case DW_CFA_def_cfa_expression:
603     case DW_CFA_val_expression:
604     case DW_CFA_expression: {
605       StringRef ExprBytes = Instr.Expression->getData();
606       std::string Str;
607       raw_string_ostream OS(Str);
608       // Manually encode this instruction using CFI escape
609       OS << Opcode;
610       if (Opcode != DW_CFA_def_cfa_expression)
611         encodeULEB128(Instr.Ops[0], OS);
612       encodeULEB128(ExprBytes.size(), OS);
613       OS << ExprBytes;
614       Function.addCFIInstruction(
615           Offset, MCCFIInstruction::createEscape(nullptr, OS.str()));
616       break;
617     }
618     case DW_CFA_MIPS_advance_loc8:
619       if (opts::Verbosity >= 1)
620         errs() << "BOLT-WARNING: DW_CFA_MIPS_advance_loc unimplemented\n";
621       return false;
622     case DW_CFA_GNU_window_save:
623     case DW_CFA_lo_user:
624     case DW_CFA_hi_user:
625       if (opts::Verbosity >= 1) {
626         errs() << "BOLT-WARNING: DW_CFA_GNU_* and DW_CFA_*_user "
627                   "unimplemented\n";
628       }
629       return false;
630     default:
631       if (opts::Verbosity >= 1) {
632         errs() << "BOLT-WARNING: Unrecognized CFI instruction: " << Instr.Opcode
633                << '\n';
634       }
635       return false;
636     }
637 
638     return true;
639   };
640 
641   for (const CFIProgram::Instruction &Instr : CurFDE.getLinkedCIE()->cfis())
642     if (!decodeFrameInstruction(Instr))
643       return false;
644 
645   for (const CFIProgram::Instruction &Instr : CurFDE.cfis())
646     if (!decodeFrameInstruction(Instr))
647       return false;
648 
649   return true;
650 }
651 
652 std::vector<char> CFIReaderWriter::generateEHFrameHeader(
653     const DWARFDebugFrame &OldEHFrame, const DWARFDebugFrame &NewEHFrame,
654     uint64_t EHFrameHeaderAddress,
655     std::vector<uint64_t> &FailedAddresses) const {
656   // Common PC -> FDE map to be written into .eh_frame_hdr.
657   std::map<uint64_t, uint64_t> PCToFDE;
658 
659   // Presort array for binary search.
660   std::sort(FailedAddresses.begin(), FailedAddresses.end());
661 
662   // Initialize PCToFDE using NewEHFrame.
663   for (dwarf::FrameEntry &Entry : NewEHFrame.entries()) {
664     const dwarf::FDE *FDE = dyn_cast<dwarf::FDE>(&Entry);
665     if (FDE == nullptr)
666       continue;
667     const uint64_t FuncAddress = FDE->getInitialLocation();
668     const uint64_t FDEAddress =
669         NewEHFrame.getEHFrameAddress() + FDE->getOffset();
670 
671     // Ignore unused FDEs.
672     if (FuncAddress == 0)
673       continue;
674 
675     // Add the address to the map unless we failed to write it.
676     if (!std::binary_search(FailedAddresses.begin(), FailedAddresses.end(),
677                             FuncAddress)) {
678       LLVM_DEBUG(dbgs() << "BOLT-DEBUG: FDE for function at 0x"
679                         << Twine::utohexstr(FuncAddress) << " is at 0x"
680                         << Twine::utohexstr(FDEAddress) << '\n');
681       PCToFDE[FuncAddress] = FDEAddress;
682     }
683   };
684 
685   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: new .eh_frame contains "
686                     << std::distance(NewEHFrame.entries().begin(),
687                                      NewEHFrame.entries().end())
688                     << " entries\n");
689 
690   // Add entries from the original .eh_frame corresponding to the functions
691   // that we did not update.
692   for (const dwarf::FrameEntry &Entry : OldEHFrame) {
693     const dwarf::FDE *FDE = dyn_cast<dwarf::FDE>(&Entry);
694     if (FDE == nullptr)
695       continue;
696     const uint64_t FuncAddress = FDE->getInitialLocation();
697     const uint64_t FDEAddress =
698         OldEHFrame.getEHFrameAddress() + FDE->getOffset();
699 
700     // Add the address if we failed to write it.
701     if (PCToFDE.count(FuncAddress) == 0) {
702       LLVM_DEBUG(dbgs() << "BOLT-DEBUG: old FDE for function at 0x"
703                         << Twine::utohexstr(FuncAddress) << " is at 0x"
704                         << Twine::utohexstr(FDEAddress) << '\n');
705       PCToFDE[FuncAddress] = FDEAddress;
706     }
707   };
708 
709   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: old .eh_frame contains "
710                     << std::distance(OldEHFrame.entries().begin(),
711                                      OldEHFrame.entries().end())
712                     << " entries\n");
713 
714   // Generate a new .eh_frame_hdr based on the new map.
715 
716   // Header plus table of entries of size 8 bytes.
717   std::vector<char> EHFrameHeader(12 + PCToFDE.size() * 8);
718 
719   // Version is 1.
720   EHFrameHeader[0] = 1;
721   // Encoding of the eh_frame pointer.
722   EHFrameHeader[1] = DW_EH_PE_pcrel | DW_EH_PE_sdata4;
723   // Encoding of the count field to follow.
724   EHFrameHeader[2] = DW_EH_PE_udata4;
725   // Encoding of the table entries - 4-byte offset from the start of the header.
726   EHFrameHeader[3] = DW_EH_PE_datarel | DW_EH_PE_sdata4;
727 
728   // Address of eh_frame. Use the new one.
729   support::ulittle32_t::ref(EHFrameHeader.data() + 4) =
730       NewEHFrame.getEHFrameAddress() - (EHFrameHeaderAddress + 4);
731 
732   // Number of entries in the table (FDE count).
733   support::ulittle32_t::ref(EHFrameHeader.data() + 8) = PCToFDE.size();
734 
735   // Write the table at offset 12.
736   char *Ptr = EHFrameHeader.data();
737   uint32_t Offset = 12;
738   for (const auto &PCI : PCToFDE) {
739     int64_t InitialPCOffset = PCI.first - EHFrameHeaderAddress;
740     assert(isInt<32>(InitialPCOffset) && "PC offset out of bounds");
741     support::ulittle32_t::ref(Ptr + Offset) = InitialPCOffset;
742     Offset += 4;
743     int64_t FDEOffset = PCI.second - EHFrameHeaderAddress;
744     assert(isInt<32>(FDEOffset) && "FDE offset out of bounds");
745     support::ulittle32_t::ref(Ptr + Offset) = FDEOffset;
746     Offset += 4;
747   }
748 
749   return EHFrameHeader;
750 }
751 
752 Error EHFrameParser::parseCIE(uint64_t StartOffset) {
753   uint8_t Version = Data.getU8(&Offset);
754   const char *Augmentation = Data.getCStr(&Offset);
755   StringRef AugmentationString(Augmentation ? Augmentation : "");
756   uint8_t AddressSize =
757       Version < 4 ? Data.getAddressSize() : Data.getU8(&Offset);
758   Data.setAddressSize(AddressSize);
759   // Skip segment descriptor size
760   if (Version >= 4)
761     Offset += 1;
762   // Skip code alignment factor
763   Data.getULEB128(&Offset);
764   // Skip data alignment
765   Data.getSLEB128(&Offset);
766   // Skip return address register
767   if (Version == 1)
768     Offset += 1;
769   else
770     Data.getULEB128(&Offset);
771 
772   uint32_t FDEPointerEncoding = DW_EH_PE_absptr;
773   uint32_t LSDAPointerEncoding = DW_EH_PE_omit;
774   // Walk the augmentation string to get all the augmentation data.
775   for (unsigned i = 0, e = AugmentationString.size(); i != e; ++i) {
776     switch (AugmentationString[i]) {
777     default:
778       return createStringError(
779           errc::invalid_argument,
780           "unknown augmentation character in entry at 0x%" PRIx64, StartOffset);
781     case 'L':
782       LSDAPointerEncoding = Data.getU8(&Offset);
783       break;
784     case 'P': {
785       uint32_t PersonalityEncoding = Data.getU8(&Offset);
786       Optional<uint64_t> Personality =
787           Data.getEncodedPointer(&Offset, PersonalityEncoding,
788                                  EHFrameAddress ? EHFrameAddress + Offset : 0);
789       // Patch personality address
790       if (Personality)
791         PatcherCallback(*Personality, Offset, PersonalityEncoding);
792       break;
793     }
794     case 'R':
795       FDEPointerEncoding = Data.getU8(&Offset);
796       break;
797     case 'z':
798       if (i)
799         return createStringError(
800             errc::invalid_argument,
801             "'z' must be the first character at 0x%" PRIx64, StartOffset);
802       // Skip augmentation length
803       Data.getULEB128(&Offset);
804       break;
805     case 'S':
806     case 'B':
807       break;
808     }
809   }
810   Entries.emplace_back(std::make_unique<CIEInfo>(
811       FDEPointerEncoding, LSDAPointerEncoding, AugmentationString));
812   CIEs[StartOffset] = &*Entries.back();
813   return Error::success();
814 }
815 
816 Error EHFrameParser::parseFDE(uint64_t CIEPointer,
817                               uint64_t StartStructureOffset) {
818   Optional<uint64_t> LSDAAddress;
819   CIEInfo *Cie = CIEs[StartStructureOffset - CIEPointer];
820 
821   // The address size is encoded in the CIE we reference.
822   if (!Cie)
823     return createStringError(errc::invalid_argument,
824                              "parsing FDE data at 0x%" PRIx64
825                              " failed due to missing CIE",
826                              StartStructureOffset);
827   // Patch initial location
828   if (auto Val = Data.getEncodedPointer(&Offset, Cie->FDEPtrEncoding,
829                                         EHFrameAddress + Offset)) {
830     PatcherCallback(*Val, Offset, Cie->FDEPtrEncoding);
831   }
832   // Skip address range
833   Data.getEncodedPointer(&Offset, Cie->FDEPtrEncoding, 0);
834 
835   // Process augmentation data for this FDE.
836   StringRef AugmentationString = Cie->AugmentationString;
837   if (!AugmentationString.empty() && Cie->LSDAPtrEncoding != DW_EH_PE_omit) {
838     // Skip augmentation length
839     Data.getULEB128(&Offset);
840     LSDAAddress =
841         Data.getEncodedPointer(&Offset, Cie->LSDAPtrEncoding,
842                                EHFrameAddress ? Offset + EHFrameAddress : 0);
843     // Patch LSDA address
844     PatcherCallback(*LSDAAddress, Offset, Cie->LSDAPtrEncoding);
845   }
846   return Error::success();
847 }
848 
849 Error EHFrameParser::parse() {
850   while (Data.isValidOffset(Offset)) {
851     const uint64_t StartOffset = Offset;
852 
853     uint64_t Length;
854     DwarfFormat Format;
855     std::tie(Length, Format) = Data.getInitialLength(&Offset);
856 
857     // If the Length is 0, then this CIE is a terminator
858     if (Length == 0)
859       break;
860 
861     const uint64_t StartStructureOffset = Offset;
862     const uint64_t EndStructureOffset = Offset + Length;
863 
864     Error Err = Error::success();
865     const uint64_t Id = Data.getRelocatedValue(4, &Offset,
866                                                /*SectionIndex=*/nullptr, &Err);
867     if (Err)
868       return Err;
869 
870     if (!Id) {
871       if (Error Err = parseCIE(StartOffset))
872         return Err;
873     } else {
874       if (Error Err = parseFDE(Id, StartStructureOffset))
875         return Err;
876     }
877     Offset = EndStructureOffset;
878   }
879 
880   return Error::success();
881 }
882 
883 Error EHFrameParser::parse(DWARFDataExtractor Data, uint64_t EHFrameAddress,
884                            PatcherCallbackTy PatcherCallback) {
885   EHFrameParser Parser(Data, EHFrameAddress, PatcherCallback);
886   return Parser.parse();
887 }
888 
889 } // namespace bolt
890 } // namespace llvm
891