xref: /llvm-project/bolt/lib/Core/Exceptions.cpp (revision 2f09f445b2d6b3ef197aecd8d1e06d08140380f3)
1 //===- bolt/Core/Exceptions.cpp - Helpers for C++ exceptions --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements functions for handling C++ exception meta data.
10 //
11 // Some of the code is taken from examples/ExceptionDemo
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "bolt/Core/Exceptions.h"
16 #include "bolt/Core/BinaryFunction.h"
17 #include "llvm/ADT/ArrayRef.h"
18 #include "llvm/ADT/Twine.h"
19 #include "llvm/BinaryFormat/Dwarf.h"
20 #include "llvm/DebugInfo/DWARF/DWARFDebugFrame.h"
21 #include "llvm/Support/Casting.h"
22 #include "llvm/Support/CommandLine.h"
23 #include "llvm/Support/Debug.h"
24 #include "llvm/Support/LEB128.h"
25 #include "llvm/Support/MathExtras.h"
26 #include "llvm/Support/raw_ostream.h"
27 #include <map>
28 
29 #undef  DEBUG_TYPE
30 #define DEBUG_TYPE "bolt-exceptions"
31 
32 using namespace llvm::dwarf;
33 
34 namespace opts {
35 
36 extern llvm::cl::OptionCategory BoltCategory;
37 
38 extern llvm::cl::opt<unsigned> Verbosity;
39 
40 static llvm::cl::opt<bool>
41 PrintExceptions("print-exceptions",
42   llvm::cl::desc("print exception handling data"),
43   llvm::cl::ZeroOrMore,
44   llvm::cl::Hidden,
45   llvm::cl::cat(BoltCategory));
46 
47 } // namespace opts
48 
49 namespace llvm {
50 namespace bolt {
51 
52 // Read and dump the .gcc_exception_table section entry.
53 //
54 // .gcc_except_table section contains a set of Language-Specific Data Areas -
55 // a fancy name for exception handling tables. There's one  LSDA entry per
56 // function. However, we can't actually tell which function LSDA refers to
57 // unless we parse .eh_frame entry that refers to the LSDA.
58 // Then inside LSDA most addresses are encoded relative to the function start,
59 // so we need the function context in order to get to real addresses.
60 //
61 // The best visual representation of the tables comprising LSDA and
62 // relationships between them is illustrated at:
63 //   https://github.com/itanium-cxx-abi/cxx-abi/blob/master/exceptions.pdf
64 // Keep in mind that GCC implementation deviates slightly from that document.
65 //
66 // To summarize, there are 4 tables in LSDA: call site table, actions table,
67 // types table, and types index table (for indirection). The main table contains
68 // call site entries. Each call site includes a PC range that can throw an
69 // exception, a handler (landing pad), and a reference to an entry in the action
70 // table. The handler and/or action could be 0. The action entry is a head
71 // of a list of actions associated with a call site. The action table contains
72 // all such lists (it could be optimized to share list tails). Each action could
73 // be either to catch an exception of a given type, to perform a cleanup, or to
74 // propagate the exception after filtering it out (e.g. to make sure function
75 // exception specification is not violated). Catch action contains a reference
76 // to an entry in the type table, and filter action refers to an entry in the
77 // type index table to encode a set of types to filter.
78 //
79 // Call site table follows LSDA header. Action table immediately follows the
80 // call site table.
81 //
82 // Both types table and type index table start at the same location, but they
83 // grow in opposite directions (types go up, indices go down). The beginning of
84 // these tables is encoded in LSDA header. Sizes for both of the tables are not
85 // included anywhere.
86 //
87 // We have to parse all of the tables to determine their sizes. Then we have
88 // to parse the call site table and associate discovered information with
89 // actual call instructions and landing pad blocks.
90 //
91 // For the purpose of rewriting exception handling tables, we can reuse action,
92 // and type index tables in their original binary format.
93 //
94 // Type table could be encoded using position-independent references, and thus
95 // may require relocation.
96 //
97 // Ideally we should be able to re-write LSDA in-place, without the need to
98 // allocate a new space for it. Sadly there's no guarantee that the new call
99 // site table will be the same size as GCC uses uleb encodings for PC offsets.
100 //
101 // Note: some functions have LSDA entries with 0 call site entries.
102 void BinaryFunction::parseLSDA(ArrayRef<uint8_t> LSDASectionData,
103                                uint64_t LSDASectionAddress) {
104   assert(CurrentState == State::Disassembled && "unexpected function state");
105 
106   if (!getLSDAAddress())
107     return;
108 
109   DWARFDataExtractor Data(
110       StringRef(reinterpret_cast<const char *>(LSDASectionData.data()),
111                 LSDASectionData.size()),
112       BC.DwCtx->getDWARFObj().isLittleEndian(), 8);
113   uint64_t Offset = getLSDAAddress() - LSDASectionAddress;
114   assert(Data.isValidOffset(Offset) && "wrong LSDA address");
115 
116   uint8_t LPStartEncoding = Data.getU8(&Offset);
117   uint64_t LPStart = 0;
118   if (Optional<uint64_t> MaybeLPStart = Data.getEncodedPointer(
119           &Offset, LPStartEncoding, Offset + LSDASectionAddress))
120     LPStart = *MaybeLPStart;
121 
122   assert(LPStart == 0 && "support for split functions not implemented");
123 
124   const uint8_t TTypeEncoding = Data.getU8(&Offset);
125   size_t TTypeEncodingSize = 0;
126   uintptr_t TTypeEnd = 0;
127   if (TTypeEncoding != DW_EH_PE_omit) {
128     TTypeEnd = Data.getULEB128(&Offset);
129     TTypeEncodingSize = BC.getDWARFEncodingSize(TTypeEncoding);
130   }
131 
132   if (opts::PrintExceptions) {
133     outs() << "[LSDA at 0x" << Twine::utohexstr(getLSDAAddress())
134            << " for function " << *this << "]:\n";
135     outs() << "LPStart Encoding = 0x" << Twine::utohexstr(LPStartEncoding)
136            << '\n';
137     outs() << "LPStart = 0x" << Twine::utohexstr(LPStart) << '\n';
138     outs() << "TType Encoding = 0x" << Twine::utohexstr(TTypeEncoding) << '\n';
139     outs() << "TType End = " << TTypeEnd << '\n';
140   }
141 
142   // Table to store list of indices in type table. Entries are uleb128 values.
143   const uint64_t TypeIndexTableStart = Offset + TTypeEnd;
144 
145   // Offset past the last decoded index.
146   uint64_t MaxTypeIndexTableOffset = 0;
147 
148   // Max positive index used in type table.
149   unsigned MaxTypeIndex = 0;
150 
151   // The actual type info table starts at the same location, but grows in
152   // opposite direction. TTypeEncoding is used to encode stored values.
153   const uint64_t TypeTableStart = Offset + TTypeEnd;
154 
155   uint8_t CallSiteEncoding = Data.getU8(&Offset);
156   uint32_t CallSiteTableLength = Data.getULEB128(&Offset);
157   uint64_t CallSiteTableStart = Offset;
158   uint64_t CallSiteTableEnd = CallSiteTableStart + CallSiteTableLength;
159   uint64_t CallSitePtr = CallSiteTableStart;
160   uint64_t ActionTableStart = CallSiteTableEnd;
161 
162   if (opts::PrintExceptions) {
163     outs() << "CallSite Encoding = " << (unsigned)CallSiteEncoding << '\n';
164     outs() << "CallSite table length = " << CallSiteTableLength << '\n';
165     outs() << '\n';
166   }
167 
168   this->HasEHRanges = CallSitePtr < CallSiteTableEnd;
169   const uint64_t RangeBase = getAddress();
170   while (CallSitePtr < CallSiteTableEnd) {
171     uint64_t Start = *Data.getEncodedPointer(&CallSitePtr, CallSiteEncoding,
172                                              CallSitePtr + LSDASectionAddress);
173     uint64_t Length = *Data.getEncodedPointer(&CallSitePtr, CallSiteEncoding,
174                                               CallSitePtr + LSDASectionAddress);
175     uint64_t LandingPad = *Data.getEncodedPointer(
176         &CallSitePtr, CallSiteEncoding, CallSitePtr + LSDASectionAddress);
177     uint64_t ActionEntry = Data.getULEB128(&CallSitePtr);
178 
179     if (opts::PrintExceptions) {
180       outs() << "Call Site: [0x" << Twine::utohexstr(RangeBase + Start)
181              << ", 0x" << Twine::utohexstr(RangeBase + Start + Length)
182              << "); landing pad: 0x" << Twine::utohexstr(LPStart + LandingPad)
183              << "; action entry: 0x" << Twine::utohexstr(ActionEntry) << "\n";
184       outs() << "  current offset is " << (CallSitePtr - CallSiteTableStart)
185              << '\n';
186     }
187 
188     // Create a handler entry if necessary.
189     MCSymbol *LPSymbol = nullptr;
190     if (LandingPad) {
191       if (!getInstructionAtOffset(LandingPad)) {
192         if (opts::Verbosity >= 1)
193           errs() << "BOLT-WARNING: landing pad " << Twine::utohexstr(LandingPad)
194                  << " not pointing to an instruction in function " << *this
195                  << " - ignoring.\n";
196       } else {
197         auto Label = Labels.find(LandingPad);
198         if (Label != Labels.end()) {
199           LPSymbol = Label->second;
200         } else {
201           LPSymbol = BC.Ctx->createNamedTempSymbol("LP");
202           Labels[LandingPad] = LPSymbol;
203         }
204       }
205     }
206 
207     // Mark all call instructions in the range.
208     auto II = Instructions.find(Start);
209     auto IE = Instructions.end();
210     assert(II != IE && "exception range not pointing to an instruction");
211     do {
212       MCInst &Instruction = II->second;
213       if (BC.MIB->isCall(Instruction) &&
214           !BC.MIB->getConditionalTailCall(Instruction)) {
215         assert(!BC.MIB->isInvoke(Instruction) &&
216                "overlapping exception ranges detected");
217         // Add extra operands to a call instruction making it an invoke from
218         // now on.
219         BC.MIB->addEHInfo(Instruction,
220                           MCPlus::MCLandingPad(LPSymbol, ActionEntry));
221       }
222       ++II;
223     } while (II != IE && II->first < Start + Length);
224 
225     if (ActionEntry != 0) {
226       auto printType = [&](int Index, raw_ostream &OS) {
227         assert(Index > 0 && "only positive indices are valid");
228         uint64_t TTEntry = TypeTableStart - Index * TTypeEncodingSize;
229         const uint64_t TTEntryAddress = TTEntry + LSDASectionAddress;
230         uint64_t TypeAddress =
231             *Data.getEncodedPointer(&TTEntry, TTypeEncoding, TTEntryAddress);
232         if ((TTypeEncoding & DW_EH_PE_pcrel) && TypeAddress == TTEntryAddress) {
233           TypeAddress = 0;
234         }
235         if (TypeAddress == 0) {
236           OS << "<all>";
237           return;
238         }
239         if (TTypeEncoding & DW_EH_PE_indirect) {
240           ErrorOr<uint64_t> PointerOrErr = BC.getPointerAtAddress(TypeAddress);
241           assert(PointerOrErr && "failed to decode indirect address");
242           TypeAddress = *PointerOrErr;
243         }
244         if (BinaryData *TypeSymBD = BC.getBinaryDataAtAddress(TypeAddress)) {
245           OS << TypeSymBD->getName();
246         } else {
247           OS << "0x" << Twine::utohexstr(TypeAddress);
248         }
249       };
250       if (opts::PrintExceptions)
251         outs() << "    actions: ";
252       uint64_t ActionPtr = ActionTableStart + ActionEntry - 1;
253       int64_t ActionType;
254       int64_t ActionNext;
255       const char *Sep = "";
256       do {
257         ActionType = Data.getSLEB128(&ActionPtr);
258         const uint32_t Self = ActionPtr;
259         ActionNext = Data.getSLEB128(&ActionPtr);
260         if (opts::PrintExceptions)
261           outs() << Sep << "(" << ActionType << ", " << ActionNext << ") ";
262         if (ActionType == 0) {
263           if (opts::PrintExceptions)
264             outs() << "cleanup";
265         } else if (ActionType > 0) {
266           // It's an index into a type table.
267           MaxTypeIndex =
268               std::max(MaxTypeIndex, static_cast<unsigned>(ActionType));
269           if (opts::PrintExceptions) {
270             outs() << "catch type ";
271             printType(ActionType, outs());
272           }
273         } else { // ActionType < 0
274           if (opts::PrintExceptions)
275             outs() << "filter exception types ";
276           const char *TSep = "";
277           // ActionType is a negative *byte* offset into *uleb128-encoded* table
278           // of indices with base 1.
279           // E.g. -1 means offset 0, -2 is offset 1, etc. The indices are
280           // encoded using uleb128 thus we cannot directly dereference them.
281           uint64_t TypeIndexTablePtr = TypeIndexTableStart - ActionType - 1;
282           while (uint64_t Index = Data.getULEB128(&TypeIndexTablePtr)) {
283             MaxTypeIndex = std::max(MaxTypeIndex, static_cast<unsigned>(Index));
284             if (opts::PrintExceptions) {
285               outs() << TSep;
286               printType(Index, outs());
287               TSep = ", ";
288             }
289           }
290           MaxTypeIndexTableOffset = std::max(
291               MaxTypeIndexTableOffset, TypeIndexTablePtr - TypeIndexTableStart);
292         }
293 
294         Sep = "; ";
295 
296         ActionPtr = Self + ActionNext;
297       } while (ActionNext);
298       if (opts::PrintExceptions)
299         outs() << '\n';
300     }
301   }
302   if (opts::PrintExceptions)
303     outs() << '\n';
304 
305   assert(TypeIndexTableStart + MaxTypeIndexTableOffset <=
306              Data.getData().size() &&
307          "LSDA entry has crossed section boundary");
308 
309   if (TTypeEnd) {
310     LSDAActionTable = LSDASectionData.slice(
311         ActionTableStart, TypeIndexTableStart -
312                               MaxTypeIndex * TTypeEncodingSize -
313                               ActionTableStart);
314     for (unsigned Index = 1; Index <= MaxTypeIndex; ++Index) {
315       uint64_t TTEntry = TypeTableStart - Index * TTypeEncodingSize;
316       const uint64_t TTEntryAddress = TTEntry + LSDASectionAddress;
317       uint64_t TypeAddress =
318           *Data.getEncodedPointer(&TTEntry, TTypeEncoding, TTEntryAddress);
319       if ((TTypeEncoding & DW_EH_PE_pcrel) && (TypeAddress == TTEntryAddress))
320         TypeAddress = 0;
321       if (TTypeEncoding & DW_EH_PE_indirect) {
322         LSDATypeAddressTable.emplace_back(TypeAddress);
323         if (TypeAddress) {
324           ErrorOr<uint64_t> PointerOrErr = BC.getPointerAtAddress(TypeAddress);
325           assert(PointerOrErr && "failed to decode indirect address");
326           TypeAddress = *PointerOrErr;
327         }
328       }
329       LSDATypeTable.emplace_back(TypeAddress);
330     }
331     LSDATypeIndexTable =
332         LSDASectionData.slice(TypeIndexTableStart, MaxTypeIndexTableOffset);
333   }
334 }
335 
336 void BinaryFunction::updateEHRanges() {
337   if (getSize() == 0)
338     return;
339 
340   assert(CurrentState == State::CFG_Finalized && "unexpected state");
341 
342   // Build call sites table.
343   struct EHInfo {
344     const MCSymbol *LP; // landing pad
345     uint64_t Action;
346   };
347 
348   // If previous call can throw, this is its exception handler.
349   EHInfo PreviousEH = {nullptr, 0};
350 
351   // Marker for the beginning of exceptions range.
352   const MCSymbol *StartRange = nullptr;
353 
354   // Indicates whether the start range is located in a cold part.
355   bool IsStartInCold = false;
356 
357   // Have we crossed hot/cold border for split functions?
358   bool SeenCold = false;
359 
360   // Sites to update - either regular or cold.
361   CallSitesType *Sites = &CallSites;
362 
363   for (BinaryBasicBlock *&BB : BasicBlocksLayout) {
364 
365     if (BB->isCold() && !SeenCold) {
366       SeenCold = true;
367 
368       // Close the range (if any) and change the target call sites.
369       if (StartRange) {
370         Sites->emplace_back(CallSite{StartRange, getFunctionEndLabel(),
371                                      PreviousEH.LP, PreviousEH.Action});
372       }
373       Sites = &ColdCallSites;
374 
375       // Reset the range.
376       StartRange = nullptr;
377       PreviousEH = {nullptr, 0};
378     }
379 
380     for (auto II = BB->begin(); II != BB->end(); ++II) {
381       if (!BC.MIB->isCall(*II))
382         continue;
383 
384       // Instruction can throw an exception that should be handled.
385       const bool Throws = BC.MIB->isInvoke(*II);
386 
387       // Ignore the call if it's a continuation of a no-throw gap.
388       if (!Throws && !StartRange)
389         continue;
390 
391       // Extract exception handling information from the instruction.
392       const MCSymbol *LP = nullptr;
393       uint64_t Action = 0;
394       if (const Optional<MCPlus::MCLandingPad> EHInfo = BC.MIB->getEHInfo(*II))
395         std::tie(LP, Action) = *EHInfo;
396 
397       // No action if the exception handler has not changed.
398       if (Throws && StartRange && PreviousEH.LP == LP &&
399           PreviousEH.Action == Action)
400         continue;
401 
402       // Same symbol is used for the beginning and the end of the range.
403       const MCSymbol *EHSymbol;
404       MCInst EHLabel;
405       {
406         std::unique_lock<std::shared_timed_mutex> Lock(BC.CtxMutex);
407         EHSymbol = BC.Ctx->createNamedTempSymbol("EH");
408         BC.MIB->createEHLabel(EHLabel, EHSymbol, BC.Ctx.get());
409       }
410 
411       II = std::next(BB->insertPseudoInstr(II, EHLabel));
412 
413       // At this point we could be in one of the following states:
414       //
415       // I. Exception handler has changed and we need to close previous range
416       //    and start a new one.
417       //
418       // II. Start a new exception range after the gap.
419       //
420       // III. Close current exception range and start a new gap.
421       const MCSymbol *EndRange;
422       if (StartRange) {
423         // I, III:
424         EndRange = EHSymbol;
425       } else {
426         // II:
427         StartRange = EHSymbol;
428         IsStartInCold = SeenCold;
429         EndRange = nullptr;
430       }
431 
432       // Close the previous range.
433       if (EndRange) {
434         Sites->emplace_back(
435             CallSite{StartRange, EndRange, PreviousEH.LP, PreviousEH.Action});
436       }
437 
438       if (Throws) {
439         // I, II:
440         StartRange = EHSymbol;
441         IsStartInCold = SeenCold;
442         PreviousEH = EHInfo{LP, Action};
443       } else {
444         StartRange = nullptr;
445       }
446     }
447   }
448 
449   // Check if we need to close the range.
450   if (StartRange) {
451     assert((!isSplit() || Sites == &ColdCallSites) && "sites mismatch");
452     const MCSymbol *EndRange =
453         IsStartInCold ? getFunctionColdEndLabel() : getFunctionEndLabel();
454     Sites->emplace_back(
455         CallSite{StartRange, EndRange, PreviousEH.LP, PreviousEH.Action});
456   }
457 }
458 
459 const uint8_t DWARF_CFI_PRIMARY_OPCODE_MASK = 0xc0;
460 
461 CFIReaderWriter::CFIReaderWriter(const DWARFDebugFrame &EHFrame) {
462   // Prepare FDEs for fast lookup
463   for (const dwarf::FrameEntry &Entry : EHFrame.entries()) {
464     const auto *CurFDE = dyn_cast<dwarf::FDE>(&Entry);
465     // Skip CIEs.
466     if (!CurFDE)
467       continue;
468     // There could me multiple FDEs with the same initial address, and perhaps
469     // different sizes (address ranges). Use the first entry with non-zero size.
470     auto FDEI = FDEs.lower_bound(CurFDE->getInitialLocation());
471     if (FDEI != FDEs.end() && FDEI->first == CurFDE->getInitialLocation()) {
472       if (CurFDE->getAddressRange()) {
473         if (FDEI->second->getAddressRange() == 0) {
474           FDEI->second = CurFDE;
475         } else if (opts::Verbosity > 0) {
476           errs() << "BOLT-WARNING: different FDEs for function at 0x"
477                  << Twine::utohexstr(FDEI->first)
478                  << " detected; sizes: " << FDEI->second->getAddressRange()
479                  << " and " << CurFDE->getAddressRange() << '\n';
480         }
481       }
482     } else {
483       FDEs.emplace_hint(FDEI, CurFDE->getInitialLocation(), CurFDE);
484     }
485   }
486 }
487 
488 bool CFIReaderWriter::fillCFIInfoFor(BinaryFunction &Function) const {
489   uint64_t Address = Function.getAddress();
490   auto I = FDEs.find(Address);
491   // Ignore zero-length FDE ranges.
492   if (I == FDEs.end() || !I->second->getAddressRange())
493     return true;
494 
495   const FDE &CurFDE = *I->second;
496   Optional<uint64_t> LSDA = CurFDE.getLSDAAddress();
497   Function.setLSDAAddress(LSDA ? *LSDA : 0);
498 
499   uint64_t Offset = 0;
500   uint64_t CodeAlignment = CurFDE.getLinkedCIE()->getCodeAlignmentFactor();
501   uint64_t DataAlignment = CurFDE.getLinkedCIE()->getDataAlignmentFactor();
502   if (CurFDE.getLinkedCIE()->getPersonalityAddress()) {
503     Function.setPersonalityFunction(
504         *CurFDE.getLinkedCIE()->getPersonalityAddress());
505     Function.setPersonalityEncoding(
506         *CurFDE.getLinkedCIE()->getPersonalityEncoding());
507   }
508 
509   auto decodeFrameInstruction = [&Function, &Offset, Address, CodeAlignment,
510                                  DataAlignment](
511                                     const CFIProgram::Instruction &Instr) {
512     uint8_t Opcode = Instr.Opcode;
513     if (Opcode & DWARF_CFI_PRIMARY_OPCODE_MASK)
514       Opcode &= DWARF_CFI_PRIMARY_OPCODE_MASK;
515     switch (Instr.Opcode) {
516     case DW_CFA_nop:
517       break;
518     case DW_CFA_advance_loc4:
519     case DW_CFA_advance_loc2:
520     case DW_CFA_advance_loc1:
521     case DW_CFA_advance_loc:
522       // Advance our current address
523       Offset += CodeAlignment * int64_t(Instr.Ops[0]);
524       break;
525     case DW_CFA_offset_extended_sf:
526       Function.addCFIInstruction(
527           Offset,
528           MCCFIInstruction::createOffset(
529               nullptr, Instr.Ops[0], DataAlignment * int64_t(Instr.Ops[1])));
530       break;
531     case DW_CFA_offset_extended:
532     case DW_CFA_offset:
533       Function.addCFIInstruction(
534           Offset, MCCFIInstruction::createOffset(nullptr, Instr.Ops[0],
535                                                  DataAlignment * Instr.Ops[1]));
536       break;
537     case DW_CFA_restore_extended:
538     case DW_CFA_restore:
539       Function.addCFIInstruction(
540           Offset, MCCFIInstruction::createRestore(nullptr, Instr.Ops[0]));
541       break;
542     case DW_CFA_set_loc:
543       assert(Instr.Ops[0] >= Address && "set_loc out of function bounds");
544       assert(Instr.Ops[0] <= Address + Function.getSize() &&
545              "set_loc out of function bounds");
546       Offset = Instr.Ops[0] - Address;
547       break;
548 
549     case DW_CFA_undefined:
550       Function.addCFIInstruction(
551           Offset, MCCFIInstruction::createUndefined(nullptr, Instr.Ops[0]));
552       break;
553     case DW_CFA_same_value:
554       Function.addCFIInstruction(
555           Offset, MCCFIInstruction::createSameValue(nullptr, Instr.Ops[0]));
556       break;
557     case DW_CFA_register:
558       Function.addCFIInstruction(
559           Offset, MCCFIInstruction::createRegister(nullptr, Instr.Ops[0],
560                                                    Instr.Ops[1]));
561       break;
562     case DW_CFA_remember_state:
563       Function.addCFIInstruction(
564           Offset, MCCFIInstruction::createRememberState(nullptr));
565       break;
566     case DW_CFA_restore_state:
567       Function.addCFIInstruction(Offset,
568                                  MCCFIInstruction::createRestoreState(nullptr));
569       break;
570     case DW_CFA_def_cfa:
571       Function.addCFIInstruction(
572           Offset,
573           MCCFIInstruction::cfiDefCfa(nullptr, Instr.Ops[0], Instr.Ops[1]));
574       break;
575     case DW_CFA_def_cfa_sf:
576       Function.addCFIInstruction(
577           Offset,
578           MCCFIInstruction::cfiDefCfa(nullptr, Instr.Ops[0],
579                                       DataAlignment * int64_t(Instr.Ops[1])));
580       break;
581     case DW_CFA_def_cfa_register:
582       Function.addCFIInstruction(Offset, MCCFIInstruction::createDefCfaRegister(
583                                              nullptr, Instr.Ops[0]));
584       break;
585     case DW_CFA_def_cfa_offset:
586       Function.addCFIInstruction(
587           Offset, MCCFIInstruction::cfiDefCfaOffset(nullptr, Instr.Ops[0]));
588       break;
589     case DW_CFA_def_cfa_offset_sf:
590       Function.addCFIInstruction(
591           Offset, MCCFIInstruction::cfiDefCfaOffset(
592                       nullptr, DataAlignment * int64_t(Instr.Ops[0])));
593       break;
594     case DW_CFA_GNU_args_size:
595       Function.addCFIInstruction(
596           Offset, MCCFIInstruction::createGnuArgsSize(nullptr, Instr.Ops[0]));
597       Function.setUsesGnuArgsSize();
598       break;
599     case DW_CFA_val_offset_sf:
600     case DW_CFA_val_offset:
601       if (opts::Verbosity >= 1) {
602         errs() << "BOLT-WARNING: DWARF val_offset() unimplemented\n";
603       }
604       return false;
605     case DW_CFA_def_cfa_expression:
606     case DW_CFA_val_expression:
607     case DW_CFA_expression: {
608       StringRef ExprBytes = Instr.Expression->getData();
609       std::string Str;
610       raw_string_ostream OS(Str);
611       // Manually encode this instruction using CFI escape
612       OS << Opcode;
613       if (Opcode != DW_CFA_def_cfa_expression) {
614         encodeULEB128(Instr.Ops[0], OS);
615       }
616       encodeULEB128(ExprBytes.size(), OS);
617       OS << ExprBytes;
618       Function.addCFIInstruction(
619           Offset, MCCFIInstruction::createEscape(nullptr, OS.str()));
620       break;
621     }
622     case DW_CFA_MIPS_advance_loc8:
623       if (opts::Verbosity >= 1) {
624         errs() << "BOLT-WARNING: DW_CFA_MIPS_advance_loc unimplemented\n";
625       }
626       return false;
627     case DW_CFA_GNU_window_save:
628     case DW_CFA_lo_user:
629     case DW_CFA_hi_user:
630       if (opts::Verbosity >= 1) {
631         errs() << "BOLT-WARNING: DW_CFA_GNU_* and DW_CFA_*_user "
632                   "unimplemented\n";
633       }
634       return false;
635     default:
636       if (opts::Verbosity >= 1) {
637         errs() << "BOLT-WARNING: Unrecognized CFI instruction: " << Instr.Opcode
638                << '\n';
639       }
640       return false;
641     }
642 
643     return true;
644   };
645 
646   for (const CFIProgram::Instruction &Instr : CurFDE.getLinkedCIE()->cfis()) {
647     if (!decodeFrameInstruction(Instr))
648       return false;
649   }
650 
651   for (const CFIProgram::Instruction &Instr : CurFDE.cfis()) {
652     if (!decodeFrameInstruction(Instr))
653       return false;
654   }
655 
656   return true;
657 }
658 
659 std::vector<char> CFIReaderWriter::generateEHFrameHeader(
660     const DWARFDebugFrame &OldEHFrame, const DWARFDebugFrame &NewEHFrame,
661     uint64_t EHFrameHeaderAddress,
662     std::vector<uint64_t> &FailedAddresses) const {
663   // Common PC -> FDE map to be written into .eh_frame_hdr.
664   std::map<uint64_t, uint64_t> PCToFDE;
665 
666   // Presort array for binary search.
667   std::sort(FailedAddresses.begin(), FailedAddresses.end());
668 
669   // Initialize PCToFDE using NewEHFrame.
670   for (dwarf::FrameEntry &Entry : NewEHFrame.entries()) {
671     const dwarf::FDE *FDE = dyn_cast<dwarf::FDE>(&Entry);
672     if (FDE == nullptr)
673       continue;
674     const uint64_t FuncAddress = FDE->getInitialLocation();
675     const uint64_t FDEAddress =
676         NewEHFrame.getEHFrameAddress() + FDE->getOffset();
677 
678     // Ignore unused FDEs.
679     if (FuncAddress == 0)
680       continue;
681 
682     // Add the address to the map unless we failed to write it.
683     if (!std::binary_search(FailedAddresses.begin(), FailedAddresses.end(),
684                             FuncAddress)) {
685       LLVM_DEBUG(dbgs() << "BOLT-DEBUG: FDE for function at 0x"
686                         << Twine::utohexstr(FuncAddress) << " is at 0x"
687                         << Twine::utohexstr(FDEAddress) << '\n');
688       PCToFDE[FuncAddress] = FDEAddress;
689     }
690   };
691 
692   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: new .eh_frame contains "
693                     << std::distance(NewEHFrame.entries().begin(),
694                                      NewEHFrame.entries().end())
695                     << " entries\n");
696 
697   // Add entries from the original .eh_frame corresponding to the functions
698   // that we did not update.
699   for (const dwarf::FrameEntry &Entry : OldEHFrame) {
700     const dwarf::FDE *FDE = dyn_cast<dwarf::FDE>(&Entry);
701     if (FDE == nullptr)
702       continue;
703     const uint64_t FuncAddress = FDE->getInitialLocation();
704     const uint64_t FDEAddress =
705         OldEHFrame.getEHFrameAddress() + FDE->getOffset();
706 
707     // Add the address if we failed to write it.
708     if (PCToFDE.count(FuncAddress) == 0) {
709       LLVM_DEBUG(dbgs() << "BOLT-DEBUG: old FDE for function at 0x"
710                         << Twine::utohexstr(FuncAddress) << " is at 0x"
711                         << Twine::utohexstr(FDEAddress) << '\n');
712       PCToFDE[FuncAddress] = FDEAddress;
713     }
714   };
715 
716   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: old .eh_frame contains "
717                     << std::distance(OldEHFrame.entries().begin(),
718                                      OldEHFrame.entries().end())
719                     << " entries\n");
720 
721   // Generate a new .eh_frame_hdr based on the new map.
722 
723   // Header plus table of entries of size 8 bytes.
724   std::vector<char> EHFrameHeader(12 + PCToFDE.size() * 8);
725 
726   // Version is 1.
727   EHFrameHeader[0] = 1;
728   // Encoding of the eh_frame pointer.
729   EHFrameHeader[1] = DW_EH_PE_pcrel | DW_EH_PE_sdata4;
730   // Encoding of the count field to follow.
731   EHFrameHeader[2] = DW_EH_PE_udata4;
732   // Encoding of the table entries - 4-byte offset from the start of the header.
733   EHFrameHeader[3] = DW_EH_PE_datarel | DW_EH_PE_sdata4;
734 
735   // Address of eh_frame. Use the new one.
736   support::ulittle32_t::ref(EHFrameHeader.data() + 4) =
737       NewEHFrame.getEHFrameAddress() - (EHFrameHeaderAddress + 4);
738 
739   // Number of entries in the table (FDE count).
740   support::ulittle32_t::ref(EHFrameHeader.data() + 8) = PCToFDE.size();
741 
742   // Write the table at offset 12.
743   char *Ptr = EHFrameHeader.data();
744   uint32_t Offset = 12;
745   for (const auto &PCI : PCToFDE) {
746     int64_t InitialPCOffset = PCI.first - EHFrameHeaderAddress;
747     assert(isInt<32>(InitialPCOffset) && "PC offset out of bounds");
748     support::ulittle32_t::ref(Ptr + Offset) = InitialPCOffset;
749     Offset += 4;
750     int64_t FDEOffset = PCI.second - EHFrameHeaderAddress;
751     assert(isInt<32>(FDEOffset) && "FDE offset out of bounds");
752     support::ulittle32_t::ref(Ptr + Offset) = FDEOffset;
753     Offset += 4;
754   }
755 
756   return EHFrameHeader;
757 }
758 
759 Error EHFrameParser::parseCIE(uint64_t StartOffset) {
760   uint8_t Version = Data.getU8(&Offset);
761   const char *Augmentation = Data.getCStr(&Offset);
762   StringRef AugmentationString(Augmentation ? Augmentation : "");
763   uint8_t AddressSize =
764       Version < 4 ? Data.getAddressSize() : Data.getU8(&Offset);
765   Data.setAddressSize(AddressSize);
766   // Skip segment descriptor size
767   if (Version >= 4)
768     Offset += 1;
769   // Skip code alignment factor
770   Data.getULEB128(&Offset);
771   // Skip data alignment
772   Data.getSLEB128(&Offset);
773   // Skip return address register
774   if (Version == 1) {
775     Offset += 1;
776   } else {
777     Data.getULEB128(&Offset);
778   }
779 
780   uint32_t FDEPointerEncoding = DW_EH_PE_absptr;
781   uint32_t LSDAPointerEncoding = DW_EH_PE_omit;
782   // Walk the augmentation string to get all the augmentation data.
783   for (unsigned i = 0, e = AugmentationString.size(); i != e; ++i) {
784     switch (AugmentationString[i]) {
785     default:
786       return createStringError(
787           errc::invalid_argument,
788           "unknown augmentation character in entry at 0x%" PRIx64, StartOffset);
789     case 'L':
790       LSDAPointerEncoding = Data.getU8(&Offset);
791       break;
792     case 'P': {
793       uint32_t PersonalityEncoding = Data.getU8(&Offset);
794       Optional<uint64_t> Personality =
795           Data.getEncodedPointer(&Offset, PersonalityEncoding,
796                                  EHFrameAddress ? EHFrameAddress + Offset : 0);
797       // Patch personality address
798       if (Personality)
799         PatcherCallback(*Personality, Offset, PersonalityEncoding);
800       break;
801     }
802     case 'R':
803       FDEPointerEncoding = Data.getU8(&Offset);
804       break;
805     case 'z':
806       if (i)
807         return createStringError(
808             errc::invalid_argument,
809             "'z' must be the first character at 0x%" PRIx64, StartOffset);
810       // Skip augmentation length
811       Data.getULEB128(&Offset);
812       break;
813     case 'S':
814     case 'B':
815       break;
816     }
817   }
818   Entries.emplace_back(std::make_unique<CIEInfo>(
819       FDEPointerEncoding, LSDAPointerEncoding, AugmentationString));
820   CIEs[StartOffset] = &*Entries.back();
821   return Error::success();
822 }
823 
824 Error EHFrameParser::parseFDE(uint64_t CIEPointer,
825                               uint64_t StartStructureOffset) {
826   Optional<uint64_t> LSDAAddress;
827   CIEInfo *Cie = CIEs[StartStructureOffset - CIEPointer];
828 
829   // The address size is encoded in the CIE we reference.
830   if (!Cie)
831     return createStringError(errc::invalid_argument,
832                              "parsing FDE data at 0x%" PRIx64
833                              " failed due to missing CIE",
834                              StartStructureOffset);
835   // Patch initial location
836   if (auto Val = Data.getEncodedPointer(&Offset, Cie->FDEPtrEncoding,
837                                         EHFrameAddress + Offset)) {
838     PatcherCallback(*Val, Offset, Cie->FDEPtrEncoding);
839   }
840   // Skip address range
841   Data.getEncodedPointer(&Offset, Cie->FDEPtrEncoding, 0);
842 
843   // Process augmentation data for this FDE.
844   StringRef AugmentationString = Cie->AugmentationString;
845   if (!AugmentationString.empty() && Cie->LSDAPtrEncoding != DW_EH_PE_omit) {
846     // Skip augmentation length
847     Data.getULEB128(&Offset);
848     LSDAAddress =
849         Data.getEncodedPointer(&Offset, Cie->LSDAPtrEncoding,
850                                EHFrameAddress ? Offset + EHFrameAddress : 0);
851     // Patch LSDA address
852     PatcherCallback(*LSDAAddress, Offset, Cie->LSDAPtrEncoding);
853   }
854   return Error::success();
855 }
856 
857 Error EHFrameParser::parse() {
858   while (Data.isValidOffset(Offset)) {
859     const uint64_t StartOffset = Offset;
860 
861     uint64_t Length;
862     DwarfFormat Format;
863     std::tie(Length, Format) = Data.getInitialLength(&Offset);
864 
865     // If the Length is 0, then this CIE is a terminator
866     if (Length == 0)
867       break;
868 
869     const uint64_t StartStructureOffset = Offset;
870     const uint64_t EndStructureOffset = Offset + Length;
871 
872     Error Err = Error::success();
873     const uint64_t Id = Data.getRelocatedValue(4, &Offset,
874                                                /*SectionIndex=*/nullptr, &Err);
875     if (Err)
876       return Err;
877 
878     if (!Id) {
879       if (Error Err = parseCIE(StartOffset))
880         return Err;
881     } else {
882       if (Error Err = parseFDE(Id, StartStructureOffset))
883         return Err;
884     }
885     Offset = EndStructureOffset;
886   }
887 
888   return Error::success();
889 }
890 
891 Error EHFrameParser::parse(DWARFDataExtractor Data, uint64_t EHFrameAddress,
892                            PatcherCallbackTy PatcherCallback) {
893   EHFrameParser Parser(Data, EHFrameAddress, PatcherCallback);
894   return Parser.parse();
895 }
896 
897 } // namespace bolt
898 } // namespace llvm
899