xref: /llvm-project/bolt/lib/Core/Exceptions.cpp (revision ebe51c4d23deacbe10b0129b004f694a350fd2b6)
1 //===-- Exceptions.cpp - Helpers for processing C++ exceptions ------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Some of the code is taken from examples/ExceptionDemo
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "bolt/Core/Exceptions.h"
14 #include "bolt/Core/BinaryFunction.h"
15 #include "llvm/ADT/ArrayRef.h"
16 #include "llvm/ADT/Twine.h"
17 #include "llvm/BinaryFormat/Dwarf.h"
18 #include "llvm/DebugInfo/DWARF/DWARFDebugFrame.h"
19 #include "llvm/Support/Casting.h"
20 #include "llvm/Support/CommandLine.h"
21 #include "llvm/Support/Debug.h"
22 #include "llvm/Support/LEB128.h"
23 #include "llvm/Support/MathExtras.h"
24 #include "llvm/Support/raw_ostream.h"
25 #include <map>
26 
27 #undef  DEBUG_TYPE
28 #define DEBUG_TYPE "bolt-exceptions"
29 
30 using namespace llvm::dwarf;
31 
32 namespace opts {
33 
34 extern llvm::cl::OptionCategory BoltCategory;
35 
36 extern llvm::cl::opt<unsigned> Verbosity;
37 
38 static llvm::cl::opt<bool>
39 PrintExceptions("print-exceptions",
40   llvm::cl::desc("print exception handling data"),
41   llvm::cl::ZeroOrMore,
42   llvm::cl::Hidden,
43   llvm::cl::cat(BoltCategory));
44 
45 } // namespace opts
46 
47 namespace llvm {
48 namespace bolt {
49 
50 // Read and dump the .gcc_exception_table section entry.
51 //
52 // .gcc_except_table section contains a set of Language-Specific Data Areas -
53 // a fancy name for exception handling tables. There's one  LSDA entry per
54 // function. However, we can't actually tell which function LSDA refers to
55 // unless we parse .eh_frame entry that refers to the LSDA.
56 // Then inside LSDA most addresses are encoded relative to the function start,
57 // so we need the function context in order to get to real addresses.
58 //
59 // The best visual representation of the tables comprising LSDA and
60 // relationships between them is illustrated at:
61 //   https://github.com/itanium-cxx-abi/cxx-abi/blob/master/exceptions.pdf
62 // Keep in mind that GCC implementation deviates slightly from that document.
63 //
64 // To summarize, there are 4 tables in LSDA: call site table, actions table,
65 // types table, and types index table (for indirection). The main table contains
66 // call site entries. Each call site includes a PC range that can throw an
67 // exception, a handler (landing pad), and a reference to an entry in the action
68 // table. The handler and/or action could be 0. The action entry is a head
69 // of a list of actions associated with a call site. The action table contains
70 // all such lists (it could be optimized to share list tails). Each action could
71 // be either to catch an exception of a given type, to perform a cleanup, or to
72 // propagate the exception after filtering it out (e.g. to make sure function
73 // exception specification is not violated). Catch action contains a reference
74 // to an entry in the type table, and filter action refers to an entry in the
75 // type index table to encode a set of types to filter.
76 //
77 // Call site table follows LSDA header. Action table immediately follows the
78 // call site table.
79 //
80 // Both types table and type index table start at the same location, but they
81 // grow in opposite directions (types go up, indices go down). The beginning of
82 // these tables is encoded in LSDA header. Sizes for both of the tables are not
83 // included anywhere.
84 //
85 // We have to parse all of the tables to determine their sizes. Then we have
86 // to parse the call site table and associate discovered information with
87 // actual call instructions and landing pad blocks.
88 //
89 // For the purpose of rewriting exception handling tables, we can reuse action,
90 // and type index tables in their original binary format.
91 //
92 // Type table could be encoded using position-independent references, and thus
93 // may require relocation.
94 //
95 // Ideally we should be able to re-write LSDA in-place, without the need to
96 // allocate a new space for it. Sadly there's no guarantee that the new call
97 // site table will be the same size as GCC uses uleb encodings for PC offsets.
98 //
99 // Note: some functions have LSDA entries with 0 call site entries.
100 void BinaryFunction::parseLSDA(ArrayRef<uint8_t> LSDASectionData,
101                                uint64_t LSDASectionAddress) {
102   assert(CurrentState == State::Disassembled && "unexpected function state");
103 
104   if (!getLSDAAddress())
105     return;
106 
107   DWARFDataExtractor Data(
108       StringRef(reinterpret_cast<const char *>(LSDASectionData.data()),
109                 LSDASectionData.size()),
110       BC.DwCtx->getDWARFObj().isLittleEndian(), 8);
111   uint64_t Offset = getLSDAAddress() - LSDASectionAddress;
112   assert(Data.isValidOffset(Offset) && "wrong LSDA address");
113 
114   uint8_t LPStartEncoding = Data.getU8(&Offset);
115   uint64_t LPStart = 0;
116   if (Optional<uint64_t> MaybeLPStart = Data.getEncodedPointer(
117           &Offset, LPStartEncoding, Offset + LSDASectionAddress))
118     LPStart = *MaybeLPStart;
119 
120   assert(LPStart == 0 && "support for split functions not implemented");
121 
122   const uint8_t TTypeEncoding = Data.getU8(&Offset);
123   size_t TTypeEncodingSize = 0;
124   uintptr_t TTypeEnd = 0;
125   if (TTypeEncoding != DW_EH_PE_omit) {
126     TTypeEnd = Data.getULEB128(&Offset);
127     TTypeEncodingSize = BC.getDWARFEncodingSize(TTypeEncoding);
128   }
129 
130   if (opts::PrintExceptions) {
131     outs() << "[LSDA at 0x" << Twine::utohexstr(getLSDAAddress())
132            << " for function " << *this << "]:\n";
133     outs() << "LPStart Encoding = 0x"
134            << Twine::utohexstr(LPStartEncoding) << '\n';
135     outs() << "LPStart = 0x" << Twine::utohexstr(LPStart) << '\n';
136     outs() << "TType Encoding = 0x" << Twine::utohexstr(TTypeEncoding) << '\n';
137     outs() << "TType End = " << TTypeEnd << '\n';
138   }
139 
140   // Table to store list of indices in type table. Entries are uleb128 values.
141   const uint64_t TypeIndexTableStart = Offset + TTypeEnd;
142 
143   // Offset past the last decoded index.
144   uint64_t MaxTypeIndexTableOffset = 0;
145 
146   // Max positive index used in type table.
147   unsigned MaxTypeIndex = 0;
148 
149   // The actual type info table starts at the same location, but grows in
150   // opposite direction. TTypeEncoding is used to encode stored values.
151   const uint64_t TypeTableStart = Offset + TTypeEnd;
152 
153   uint8_t CallSiteEncoding = Data.getU8(&Offset);
154   uint32_t CallSiteTableLength = Data.getULEB128(&Offset);
155   uint64_t CallSiteTableStart = Offset;
156   uint64_t CallSiteTableEnd = CallSiteTableStart + CallSiteTableLength;
157   uint64_t CallSitePtr = CallSiteTableStart;
158   uint64_t ActionTableStart = CallSiteTableEnd;
159 
160   if (opts::PrintExceptions) {
161     outs() << "CallSite Encoding = " << (unsigned)CallSiteEncoding << '\n';
162     outs() << "CallSite table length = " << CallSiteTableLength << '\n';
163     outs() << '\n';
164   }
165 
166   this->HasEHRanges = CallSitePtr < CallSiteTableEnd;
167   const uint64_t RangeBase = getAddress();
168   while (CallSitePtr < CallSiteTableEnd) {
169     uint64_t Start = *Data.getEncodedPointer(&CallSitePtr, CallSiteEncoding,
170                                              CallSitePtr + LSDASectionAddress);
171     uint64_t Length = *Data.getEncodedPointer(
172         &CallSitePtr, CallSiteEncoding, CallSitePtr + LSDASectionAddress);
173     uint64_t LandingPad = *Data.getEncodedPointer(
174         &CallSitePtr, CallSiteEncoding, CallSitePtr + LSDASectionAddress);
175     uint64_t ActionEntry = Data.getULEB128(&CallSitePtr);
176 
177     if (opts::PrintExceptions) {
178       outs() << "Call Site: [0x" << Twine::utohexstr(RangeBase + Start)
179              << ", 0x" << Twine::utohexstr(RangeBase + Start + Length)
180              << "); landing pad: 0x" << Twine::utohexstr(LPStart + LandingPad)
181              << "; action entry: 0x" << Twine::utohexstr(ActionEntry) << "\n";
182       outs() << "  current offset is " << (CallSitePtr - CallSiteTableStart)
183              << '\n';
184     }
185 
186     // Create a handler entry if necessary.
187     MCSymbol *LPSymbol = nullptr;
188     if (LandingPad) {
189       if (Instructions.find(LandingPad) == Instructions.end()) {
190         if (opts::Verbosity >= 1) {
191           errs() << "BOLT-WARNING: landing pad " << Twine::utohexstr(LandingPad)
192                  << " not pointing to an instruction in function "
193                  << *this << " - ignoring.\n";
194         }
195       } else {
196         auto Label = Labels.find(LandingPad);
197         if (Label != Labels.end()) {
198           LPSymbol = Label->second;
199         } else {
200           LPSymbol = BC.Ctx->createNamedTempSymbol("LP");
201           Labels[LandingPad] = LPSymbol;
202         }
203       }
204     }
205 
206     // Mark all call instructions in the range.
207     auto II = Instructions.find(Start);
208     auto IE = Instructions.end();
209     assert(II != IE && "exception range not pointing to an instruction");
210     do {
211       MCInst &Instruction = II->second;
212       if (BC.MIB->isCall(Instruction) &&
213           !BC.MIB->getConditionalTailCall(Instruction)) {
214         assert(!BC.MIB->isInvoke(Instruction) &&
215                "overlapping exception ranges detected");
216         // Add extra operands to a call instruction making it an invoke from
217         // now on.
218         BC.MIB->addEHInfo(Instruction,
219                           MCPlus::MCLandingPad(LPSymbol, ActionEntry));
220       }
221       ++II;
222     } while (II != IE && II->first < Start + Length);
223 
224     if (ActionEntry != 0) {
225       auto printType = [&](int Index, raw_ostream &OS) {
226         assert(Index > 0 && "only positive indices are valid");
227         uint64_t TTEntry = TypeTableStart - Index * TTypeEncodingSize;
228         const uint64_t TTEntryAddress = TTEntry + LSDASectionAddress;
229         uint64_t TypeAddress =
230             *Data.getEncodedPointer(&TTEntry, TTypeEncoding, TTEntryAddress);
231         if ((TTypeEncoding & DW_EH_PE_pcrel) && TypeAddress == TTEntryAddress) {
232           TypeAddress = 0;
233         }
234         if (TypeAddress == 0) {
235           OS << "<all>";
236           return;
237         }
238         if (TTypeEncoding & DW_EH_PE_indirect) {
239           ErrorOr<uint64_t> PointerOrErr = BC.getPointerAtAddress(TypeAddress);
240           assert(PointerOrErr && "failed to decode indirect address");
241           TypeAddress = *PointerOrErr;
242         }
243         if (BinaryData *TypeSymBD = BC.getBinaryDataAtAddress(TypeAddress)) {
244           OS << TypeSymBD->getName();
245         } else {
246           OS << "0x" << Twine::utohexstr(TypeAddress);
247         }
248       };
249       if (opts::PrintExceptions)
250         outs() << "    actions: ";
251       uint64_t ActionPtr = ActionTableStart + ActionEntry - 1;
252       int64_t ActionType;
253       int64_t ActionNext;
254       const char *Sep = "";
255       do {
256         ActionType = Data.getSLEB128(&ActionPtr);
257         const uint32_t Self = ActionPtr;
258         ActionNext = Data.getSLEB128(&ActionPtr);
259         if (opts::PrintExceptions)
260           outs() << Sep << "(" << ActionType << ", " << ActionNext << ") ";
261         if (ActionType == 0) {
262           if (opts::PrintExceptions)
263             outs() << "cleanup";
264         } else if (ActionType > 0) {
265           // It's an index into a type table.
266           MaxTypeIndex = std::max(MaxTypeIndex,
267                                   static_cast<unsigned>(ActionType));
268           if (opts::PrintExceptions) {
269             outs() << "catch type ";
270             printType(ActionType, outs());
271           }
272         } else { // ActionType < 0
273           if (opts::PrintExceptions)
274             outs() << "filter exception types ";
275           const char *TSep = "";
276           // ActionType is a negative *byte* offset into *uleb128-encoded* table
277           // of indices with base 1.
278           // E.g. -1 means offset 0, -2 is offset 1, etc. The indices are
279           // encoded using uleb128 thus we cannot directly dereference them.
280           uint64_t TypeIndexTablePtr = TypeIndexTableStart - ActionType - 1;
281           while (uint64_t Index = Data.getULEB128(&TypeIndexTablePtr)) {
282             MaxTypeIndex = std::max(MaxTypeIndex, static_cast<unsigned>(Index));
283             if (opts::PrintExceptions) {
284               outs() << TSep;
285               printType(Index, outs());
286               TSep = ", ";
287             }
288           }
289           MaxTypeIndexTableOffset =
290               std::max(MaxTypeIndexTableOffset,
291                        TypeIndexTablePtr - TypeIndexTableStart);
292         }
293 
294         Sep = "; ";
295 
296         ActionPtr = Self + ActionNext;
297       } while (ActionNext);
298       if (opts::PrintExceptions)
299         outs() << '\n';
300     }
301   }
302   if (opts::PrintExceptions)
303     outs() << '\n';
304 
305   assert(TypeIndexTableStart + MaxTypeIndexTableOffset <=
306              Data.getData().size() &&
307          "LSDA entry has crossed section boundary");
308 
309   if (TTypeEnd) {
310     LSDAActionTable = LSDASectionData.slice(
311         ActionTableStart, TypeIndexTableStart -
312                               MaxTypeIndex * TTypeEncodingSize -
313                               ActionTableStart);
314     for (unsigned Index = 1; Index <= MaxTypeIndex; ++Index) {
315       uint64_t TTEntry = TypeTableStart - Index * TTypeEncodingSize;
316       const uint64_t TTEntryAddress = TTEntry + LSDASectionAddress;
317       uint64_t TypeAddress =
318           *Data.getEncodedPointer(&TTEntry, TTypeEncoding, TTEntryAddress);
319       if ((TTypeEncoding & DW_EH_PE_pcrel) && (TypeAddress == TTEntryAddress))
320         TypeAddress = 0;
321       if (TTypeEncoding & DW_EH_PE_indirect) {
322         LSDATypeAddressTable.emplace_back(TypeAddress);
323         if (TypeAddress) {
324           ErrorOr<uint64_t> PointerOrErr = BC.getPointerAtAddress(TypeAddress);
325           assert(PointerOrErr && "failed to decode indirect address");
326           TypeAddress = *PointerOrErr;
327         }
328       }
329       LSDATypeTable.emplace_back(TypeAddress);
330     }
331     LSDATypeIndexTable =
332         LSDASectionData.slice(TypeIndexTableStart, MaxTypeIndexTableOffset);
333   }
334 }
335 
336 void BinaryFunction::updateEHRanges() {
337   if (getSize() == 0)
338     return;
339 
340   assert(CurrentState == State::CFG_Finalized && "unexpected state");
341 
342   // Build call sites table.
343   struct EHInfo {
344     const MCSymbol *LP; // landing pad
345     uint64_t Action;
346   };
347 
348   // If previous call can throw, this is its exception handler.
349   EHInfo PreviousEH = {nullptr, 0};
350 
351   // Marker for the beginning of exceptions range.
352   const MCSymbol *StartRange = nullptr;
353 
354   // Indicates whether the start range is located in a cold part.
355   bool IsStartInCold = false;
356 
357   // Have we crossed hot/cold border for split functions?
358   bool SeenCold = false;
359 
360   // Sites to update - either regular or cold.
361   CallSitesType *Sites = &CallSites;
362 
363   for (BinaryBasicBlock *&BB : BasicBlocksLayout) {
364 
365     if (BB->isCold() && !SeenCold) {
366       SeenCold = true;
367 
368       // Close the range (if any) and change the target call sites.
369       if (StartRange) {
370         Sites->emplace_back(CallSite{StartRange, getFunctionEndLabel(),
371                                      PreviousEH.LP, PreviousEH.Action});
372       }
373       Sites = &ColdCallSites;
374 
375       // Reset the range.
376       StartRange = nullptr;
377       PreviousEH = {nullptr, 0};
378     }
379 
380     for (auto II = BB->begin(); II != BB->end(); ++II) {
381       if (!BC.MIB->isCall(*II))
382         continue;
383 
384       // Instruction can throw an exception that should be handled.
385       const bool Throws = BC.MIB->isInvoke(*II);
386 
387       // Ignore the call if it's a continuation of a no-throw gap.
388       if (!Throws && !StartRange)
389         continue;
390 
391       // Extract exception handling information from the instruction.
392       const MCSymbol *LP = nullptr;
393       uint64_t Action = 0;
394       if (const Optional<MCPlus::MCLandingPad> EHInfo = BC.MIB->getEHInfo(*II))
395         std::tie(LP, Action) = *EHInfo;
396 
397       // No action if the exception handler has not changed.
398       if (Throws &&
399           StartRange &&
400           PreviousEH.LP == LP &&
401           PreviousEH.Action == Action)
402         continue;
403 
404       // Same symbol is used for the beginning and the end of the range.
405       const MCSymbol *EHSymbol;
406       MCInst EHLabel;
407       {
408         std::unique_lock<std::shared_timed_mutex> Lock(BC.CtxMutex);
409         EHSymbol = BC.Ctx->createNamedTempSymbol("EH");
410         BC.MIB->createEHLabel(EHLabel, EHSymbol, BC.Ctx.get());
411       }
412 
413       II = std::next(BB->insertPseudoInstr(II, EHLabel));
414 
415       // At this point we could be in one of the following states:
416       //
417       // I. Exception handler has changed and we need to close previous range
418       //    and start a new one.
419       //
420       // II. Start a new exception range after the gap.
421       //
422       // III. Close current exception range and start a new gap.
423       const MCSymbol *EndRange;
424       if (StartRange) {
425         // I, III:
426         EndRange = EHSymbol;
427       } else {
428         // II:
429         StartRange = EHSymbol;
430         IsStartInCold = SeenCold;
431         EndRange = nullptr;
432       }
433 
434       // Close the previous range.
435       if (EndRange) {
436         Sites->emplace_back(CallSite{StartRange, EndRange,
437                                      PreviousEH.LP, PreviousEH.Action});
438       }
439 
440       if (Throws) {
441         // I, II:
442         StartRange = EHSymbol;
443         IsStartInCold = SeenCold;
444         PreviousEH = EHInfo{LP, Action};
445       } else {
446         StartRange = nullptr;
447       }
448     }
449   }
450 
451   // Check if we need to close the range.
452   if (StartRange) {
453     assert((!isSplit() || Sites == &ColdCallSites) && "sites mismatch");
454     const MCSymbol *EndRange =
455         IsStartInCold ? getFunctionColdEndLabel() : getFunctionEndLabel();
456     Sites->emplace_back(CallSite{StartRange, EndRange,
457                                  PreviousEH.LP, PreviousEH.Action});
458   }
459 }
460 
461 const uint8_t DWARF_CFI_PRIMARY_OPCODE_MASK = 0xc0;
462 
463 CFIReaderWriter::CFIReaderWriter(const DWARFDebugFrame &EHFrame) {
464   // Prepare FDEs for fast lookup
465   for (const dwarf::FrameEntry &Entry : EHFrame.entries()) {
466     const auto *CurFDE = dyn_cast<dwarf::FDE>(&Entry);
467     // Skip CIEs.
468     if (!CurFDE)
469       continue;
470     // There could me multiple FDEs with the same initial address, and perhaps
471     // different sizes (address ranges). Use the first entry with non-zero size.
472     auto FDEI = FDEs.lower_bound(CurFDE->getInitialLocation());
473     if (FDEI != FDEs.end() && FDEI->first == CurFDE->getInitialLocation()) {
474       if (CurFDE->getAddressRange()) {
475         if (FDEI->second->getAddressRange() == 0) {
476           FDEI->second = CurFDE;
477         } else if (opts::Verbosity > 0) {
478           errs() << "BOLT-WARNING: different FDEs for function at 0x"
479                  << Twine::utohexstr(FDEI->first)
480                  << " detected; sizes: "
481                  << FDEI->second->getAddressRange() << " and "
482                  << CurFDE->getAddressRange() << '\n';
483         }
484       }
485     } else {
486       FDEs.emplace_hint(FDEI, CurFDE->getInitialLocation(), CurFDE);
487     }
488   }
489 }
490 
491 bool CFIReaderWriter::fillCFIInfoFor(BinaryFunction &Function) const {
492   uint64_t Address = Function.getAddress();
493   auto I = FDEs.find(Address);
494   // Ignore zero-length FDE ranges.
495   if (I == FDEs.end() || !I->second->getAddressRange())
496     return true;
497 
498   const FDE &CurFDE = *I->second;
499   Optional<uint64_t> LSDA = CurFDE.getLSDAAddress();
500   Function.setLSDAAddress(LSDA ? *LSDA : 0);
501 
502   uint64_t Offset = 0;
503   uint64_t CodeAlignment = CurFDE.getLinkedCIE()->getCodeAlignmentFactor();
504   uint64_t DataAlignment = CurFDE.getLinkedCIE()->getDataAlignmentFactor();
505   if (CurFDE.getLinkedCIE()->getPersonalityAddress()) {
506     Function.setPersonalityFunction(
507         *CurFDE.getLinkedCIE()->getPersonalityAddress());
508     Function.setPersonalityEncoding(
509         *CurFDE.getLinkedCIE()->getPersonalityEncoding());
510   }
511 
512   auto decodeFrameInstruction =
513       [&Function, &Offset, Address, CodeAlignment, DataAlignment](
514           const CFIProgram::Instruction &Instr) {
515         uint8_t Opcode = Instr.Opcode;
516         if (Opcode & DWARF_CFI_PRIMARY_OPCODE_MASK)
517           Opcode &= DWARF_CFI_PRIMARY_OPCODE_MASK;
518         switch (Instr.Opcode) {
519         case DW_CFA_nop:
520           break;
521         case DW_CFA_advance_loc4:
522         case DW_CFA_advance_loc2:
523         case DW_CFA_advance_loc1:
524         case DW_CFA_advance_loc:
525           // Advance our current address
526           Offset += CodeAlignment * int64_t(Instr.Ops[0]);
527           break;
528         case DW_CFA_offset_extended_sf:
529           Function.addCFIInstruction(
530               Offset, MCCFIInstruction::createOffset(
531                           nullptr, Instr.Ops[0],
532                           DataAlignment * int64_t(Instr.Ops[1])));
533           break;
534         case DW_CFA_offset_extended:
535         case DW_CFA_offset:
536           Function.addCFIInstruction(
537               Offset, MCCFIInstruction::createOffset(
538                           nullptr, Instr.Ops[0], DataAlignment * Instr.Ops[1]));
539           break;
540         case DW_CFA_restore_extended:
541         case DW_CFA_restore:
542           Function.addCFIInstruction(
543               Offset, MCCFIInstruction::createRestore(nullptr, Instr.Ops[0]));
544           break;
545         case DW_CFA_set_loc:
546           assert(Instr.Ops[0] >= Address && "set_loc out of function bounds");
547           assert(Instr.Ops[0] <= Address + Function.getSize() &&
548                  "set_loc out of function bounds");
549           Offset = Instr.Ops[0] - Address;
550           break;
551 
552         case DW_CFA_undefined:
553           Function.addCFIInstruction(
554               Offset, MCCFIInstruction::createUndefined(nullptr, Instr.Ops[0]));
555           break;
556         case DW_CFA_same_value:
557           Function.addCFIInstruction(
558               Offset, MCCFIInstruction::createSameValue(nullptr, Instr.Ops[0]));
559           break;
560         case DW_CFA_register:
561           Function.addCFIInstruction(
562               Offset, MCCFIInstruction::createRegister(nullptr, Instr.Ops[0],
563                                                        Instr.Ops[1]));
564           break;
565         case DW_CFA_remember_state:
566           Function.addCFIInstruction(
567               Offset, MCCFIInstruction::createRememberState(nullptr));
568           break;
569         case DW_CFA_restore_state:
570           Function.addCFIInstruction(
571               Offset, MCCFIInstruction::createRestoreState(nullptr));
572           break;
573         case DW_CFA_def_cfa:
574           Function.addCFIInstruction(
575               Offset, MCCFIInstruction::cfiDefCfa(nullptr, Instr.Ops[0],
576                                                   Instr.Ops[1]));
577           break;
578         case DW_CFA_def_cfa_sf:
579           Function.addCFIInstruction(
580               Offset, MCCFIInstruction::cfiDefCfa(
581                           nullptr, Instr.Ops[0],
582                           DataAlignment * int64_t(Instr.Ops[1])));
583           break;
584         case DW_CFA_def_cfa_register:
585           Function.addCFIInstruction(
586               Offset,
587               MCCFIInstruction::createDefCfaRegister(nullptr, Instr.Ops[0]));
588           break;
589         case DW_CFA_def_cfa_offset:
590           Function.addCFIInstruction(
591               Offset,
592               MCCFIInstruction::cfiDefCfaOffset(nullptr, Instr.Ops[0]));
593           break;
594         case DW_CFA_def_cfa_offset_sf:
595           Function.addCFIInstruction(
596               Offset, MCCFIInstruction::cfiDefCfaOffset(
597                           nullptr, DataAlignment * int64_t(Instr.Ops[0])));
598           break;
599         case DW_CFA_GNU_args_size:
600           Function.addCFIInstruction(
601               Offset,
602               MCCFIInstruction::createGnuArgsSize(nullptr, Instr.Ops[0]));
603           Function.setUsesGnuArgsSize();
604           break;
605         case DW_CFA_val_offset_sf:
606         case DW_CFA_val_offset:
607           if (opts::Verbosity >= 1) {
608             errs() << "BOLT-WARNING: DWARF val_offset() unimplemented\n";
609           }
610           return false;
611         case DW_CFA_def_cfa_expression:
612         case DW_CFA_val_expression:
613         case DW_CFA_expression: {
614           StringRef ExprBytes = Instr.Expression->getData();
615           std::string Str;
616           raw_string_ostream OS(Str);
617           // Manually encode this instruction using CFI escape
618           OS << Opcode;
619           if (Opcode != DW_CFA_def_cfa_expression) {
620             encodeULEB128(Instr.Ops[0], OS);
621           }
622           encodeULEB128(ExprBytes.size(), OS);
623           OS << ExprBytes;
624           Function.addCFIInstruction(
625               Offset, MCCFIInstruction::createEscape(nullptr, OS.str()));
626           break;
627         }
628         case DW_CFA_MIPS_advance_loc8:
629           if (opts::Verbosity >= 1) {
630             errs() << "BOLT-WARNING: DW_CFA_MIPS_advance_loc unimplemented\n";
631           }
632           return false;
633         case DW_CFA_GNU_window_save:
634         case DW_CFA_lo_user:
635         case DW_CFA_hi_user:
636           if (opts::Verbosity >= 1) {
637             errs() << "BOLT-WARNING: DW_CFA_GNU_* and DW_CFA_*_user "
638                       "unimplemented\n";
639           }
640           return false;
641         default:
642           if (opts::Verbosity >= 1) {
643             errs() << "BOLT-WARNING: Unrecognized CFI instruction: "
644                    << Instr.Opcode << '\n';
645           }
646           return false;
647         }
648 
649         return true;
650       };
651 
652   for (const CFIProgram::Instruction &Instr : CurFDE.getLinkedCIE()->cfis()) {
653     if (!decodeFrameInstruction(Instr))
654       return false;
655   }
656 
657   for (const CFIProgram::Instruction &Instr : CurFDE.cfis()) {
658     if (!decodeFrameInstruction(Instr))
659       return false;
660   }
661 
662   return true;
663 }
664 
665 std::vector<char> CFIReaderWriter::generateEHFrameHeader(
666     const DWARFDebugFrame &OldEHFrame,
667     const DWARFDebugFrame &NewEHFrame,
668     uint64_t EHFrameHeaderAddress,
669     std::vector<uint64_t> &FailedAddresses) const {
670   // Common PC -> FDE map to be written into .eh_frame_hdr.
671   std::map<uint64_t, uint64_t> PCToFDE;
672 
673   // Presort array for binary search.
674   std::sort(FailedAddresses.begin(), FailedAddresses.end());
675 
676   // Initialize PCToFDE using NewEHFrame.
677   for (dwarf::FrameEntry &Entry : NewEHFrame.entries()) {
678     const dwarf::FDE *FDE = dyn_cast<dwarf::FDE>(&Entry);
679     if (FDE == nullptr)
680       continue;
681     const uint64_t FuncAddress = FDE->getInitialLocation();
682     const uint64_t FDEAddress =
683         NewEHFrame.getEHFrameAddress() + FDE->getOffset();
684 
685     // Ignore unused FDEs.
686     if (FuncAddress == 0)
687       continue;
688 
689     // Add the address to the map unless we failed to write it.
690     if (!std::binary_search(FailedAddresses.begin(), FailedAddresses.end(),
691                             FuncAddress)) {
692       LLVM_DEBUG(dbgs() << "BOLT-DEBUG: FDE for function at 0x"
693                         << Twine::utohexstr(FuncAddress) << " is at 0x"
694                         << Twine::utohexstr(FDEAddress) << '\n');
695       PCToFDE[FuncAddress] = FDEAddress;
696     }
697   };
698 
699   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: new .eh_frame contains "
700                     << std::distance(NewEHFrame.entries().begin(),
701                                      NewEHFrame.entries().end())
702                     << " entries\n");
703 
704   // Add entries from the original .eh_frame corresponding to the functions
705   // that we did not update.
706   for (const dwarf::FrameEntry &Entry : OldEHFrame) {
707     const dwarf::FDE *FDE = dyn_cast<dwarf::FDE>(&Entry);
708     if (FDE == nullptr)
709       continue;
710     const uint64_t FuncAddress = FDE->getInitialLocation();
711     const uint64_t FDEAddress =
712         OldEHFrame.getEHFrameAddress() + FDE->getOffset();
713 
714     // Add the address if we failed to write it.
715     if (PCToFDE.count(FuncAddress) == 0) {
716       LLVM_DEBUG(dbgs() << "BOLT-DEBUG: old FDE for function at 0x"
717                         << Twine::utohexstr(FuncAddress) << " is at 0x"
718                         << Twine::utohexstr(FDEAddress) << '\n');
719       PCToFDE[FuncAddress] = FDEAddress;
720     }
721   };
722 
723   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: old .eh_frame contains "
724                     << std::distance(OldEHFrame.entries().begin(),
725                                      OldEHFrame.entries().end())
726                     << " entries\n");
727 
728   // Generate a new .eh_frame_hdr based on the new map.
729 
730   // Header plus table of entries of size 8 bytes.
731   std::vector<char> EHFrameHeader(12 + PCToFDE.size() * 8);
732 
733   // Version is 1.
734   EHFrameHeader[0] = 1;
735   // Encoding of the eh_frame pointer.
736   EHFrameHeader[1] = DW_EH_PE_pcrel | DW_EH_PE_sdata4;
737   // Encoding of the count field to follow.
738   EHFrameHeader[2] = DW_EH_PE_udata4;
739   // Encoding of the table entries - 4-byte offset from the start of the header.
740   EHFrameHeader[3] = DW_EH_PE_datarel | DW_EH_PE_sdata4;
741 
742   // Address of eh_frame. Use the new one.
743   support::ulittle32_t::ref(EHFrameHeader.data() + 4) =
744     NewEHFrame.getEHFrameAddress() - (EHFrameHeaderAddress + 4);
745 
746   // Number of entries in the table (FDE count).
747   support::ulittle32_t::ref(EHFrameHeader.data() + 8) = PCToFDE.size();
748 
749   // Write the table at offset 12.
750   char *Ptr = EHFrameHeader.data();
751   uint32_t Offset = 12;
752   for (const auto &PCI : PCToFDE) {
753     int64_t InitialPCOffset = PCI.first - EHFrameHeaderAddress;
754     assert(isInt<32>(InitialPCOffset) && "PC offset out of bounds");
755     support::ulittle32_t::ref(Ptr + Offset) = InitialPCOffset;
756     Offset += 4;
757     int64_t FDEOffset = PCI.second - EHFrameHeaderAddress;
758     assert(isInt<32>(FDEOffset) && "FDE offset out of bounds");
759     support::ulittle32_t::ref(Ptr + Offset) = FDEOffset;
760     Offset += 4;
761   }
762 
763   return EHFrameHeader;
764 }
765 
766 Error EHFrameParser::parseCIE(uint64_t StartOffset) {
767   uint8_t Version = Data.getU8(&Offset);
768   const char *Augmentation = Data.getCStr(&Offset);
769   StringRef AugmentationString(Augmentation ? Augmentation : "");
770   uint8_t AddressSize =
771       Version < 4 ? Data.getAddressSize() : Data.getU8(&Offset);
772   Data.setAddressSize(AddressSize);
773   // Skip segment descriptor size
774   if (Version >= 4)
775     Offset += 1;
776   // Skip code alignment factor
777   Data.getULEB128(&Offset);
778   // Skip data alignment
779   Data.getSLEB128(&Offset);
780   // Skip return address register
781   if (Version == 1) {
782     Offset += 1;
783   } else {
784     Data.getULEB128(&Offset);
785   }
786 
787   uint32_t FDEPointerEncoding = DW_EH_PE_absptr;
788   uint32_t LSDAPointerEncoding = DW_EH_PE_omit;
789   // Walk the augmentation string to get all the augmentation data.
790   for (unsigned i = 0, e = AugmentationString.size(); i != e; ++i) {
791     switch (AugmentationString[i]) {
792     default:
793       return createStringError(
794           errc::invalid_argument,
795           "unknown augmentation character in entry at 0x%" PRIx64, StartOffset);
796     case 'L':
797       LSDAPointerEncoding = Data.getU8(&Offset);
798       break;
799     case 'P': {
800       uint32_t PersonalityEncoding = Data.getU8(&Offset);
801       Optional<uint64_t> Personality =
802           Data.getEncodedPointer(&Offset, PersonalityEncoding,
803                                  EHFrameAddress ? EHFrameAddress + Offset : 0);
804       // Patch personality address
805       if (Personality)
806         PatcherCallback(*Personality, Offset, PersonalityEncoding);
807       break;
808     }
809     case 'R':
810       FDEPointerEncoding = Data.getU8(&Offset);
811       break;
812     case 'z':
813       if (i)
814         return createStringError(
815             errc::invalid_argument,
816             "'z' must be the first character at 0x%" PRIx64, StartOffset);
817       // Skip augmentation length
818       Data.getULEB128(&Offset);
819       break;
820     case 'S':
821     case 'B':
822       break;
823     }
824   }
825   Entries.emplace_back(std::make_unique<CIEInfo>(
826       FDEPointerEncoding, LSDAPointerEncoding, AugmentationString));
827   CIEs[StartOffset] = &*Entries.back();
828   return Error::success();
829 }
830 
831 Error EHFrameParser::parseFDE(uint64_t CIEPointer,
832                                uint64_t StartStructureOffset) {
833   Optional<uint64_t> LSDAAddress;
834   CIEInfo *Cie = CIEs[StartStructureOffset - CIEPointer];
835 
836   // The address size is encoded in the CIE we reference.
837   if (!Cie)
838     return createStringError(errc::invalid_argument,
839                              "parsing FDE data at 0x%" PRIx64
840                              " failed due to missing CIE",
841                              StartStructureOffset);
842   // Patch initial location
843   if (auto Val = Data.getEncodedPointer(&Offset, Cie->FDEPtrEncoding,
844                                         EHFrameAddress + Offset)) {
845     PatcherCallback(*Val, Offset, Cie->FDEPtrEncoding);
846   }
847   // Skip address range
848   Data.getEncodedPointer(&Offset, Cie->FDEPtrEncoding, 0);
849 
850   // Process augmentation data for this FDE.
851   StringRef AugmentationString = Cie->AugmentationString;
852   if (!AugmentationString.empty() && Cie->LSDAPtrEncoding != DW_EH_PE_omit) {
853     // Skip augmentation length
854     Data.getULEB128(&Offset);
855     LSDAAddress =
856         Data.getEncodedPointer(&Offset, Cie->LSDAPtrEncoding,
857                                EHFrameAddress ? Offset + EHFrameAddress : 0);
858     // Patch LSDA address
859     PatcherCallback(*LSDAAddress, Offset, Cie->LSDAPtrEncoding);
860   }
861   return Error::success();
862 }
863 
864 Error EHFrameParser::parse() {
865   while (Data.isValidOffset(Offset)) {
866     const uint64_t StartOffset = Offset;
867 
868     uint64_t Length;
869     DwarfFormat Format;
870     std::tie(Length, Format) = Data.getInitialLength(&Offset);
871 
872     // If the Length is 0, then this CIE is a terminator
873     if (Length == 0)
874       break;
875 
876     const uint64_t StartStructureOffset = Offset;
877     const uint64_t EndStructureOffset = Offset + Length;
878 
879     Error Err = Error::success();
880     const uint64_t Id = Data.getRelocatedValue(4, &Offset,
881                                                /*SectionIndex=*/nullptr, &Err);
882     if (Err)
883       return Err;
884 
885     if (!Id) {
886       if (Error Err = parseCIE(StartOffset))
887         return Err;
888     } else {
889       if (Error Err = parseFDE(Id, StartStructureOffset))
890         return Err;
891     }
892     Offset = EndStructureOffset;
893   }
894 
895   return Error::success();
896 }
897 
898 Error EHFrameParser::parse(DWARFDataExtractor Data, uint64_t EHFrameAddress,
899                             PatcherCallbackTy PatcherCallback) {
900   EHFrameParser Parser(Data, EHFrameAddress, PatcherCallback);
901   return Parser.parse();
902 }
903 
904 } // namespace bolt
905 } // namespace llvm
906