xref: /llvm-project/bolt/lib/Core/Exceptions.cpp (revision 1b8e0cf090a08b2c517eb2a3e101332d692063c2)
12f09f445SMaksim Panchenko //===- bolt/Core/Exceptions.cpp - Helpers for C++ exceptions --------------===//
2a34c753fSRafael Auler //
3a34c753fSRafael Auler // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4a34c753fSRafael Auler // See https://llvm.org/LICENSE.txt for license information.
5a34c753fSRafael Auler // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6a34c753fSRafael Auler //
7a34c753fSRafael Auler //===----------------------------------------------------------------------===//
8a34c753fSRafael Auler //
92f09f445SMaksim Panchenko // This file implements functions for handling C++ exception meta data.
102f09f445SMaksim Panchenko //
11a34c753fSRafael Auler // Some of the code is taken from examples/ExceptionDemo
12a34c753fSRafael Auler //
13a34c753fSRafael Auler //===----------------------------------------------------------------------===//
14a34c753fSRafael Auler 
15a34c753fSRafael Auler #include "bolt/Core/Exceptions.h"
16a34c753fSRafael Auler #include "bolt/Core/BinaryFunction.h"
17a34c753fSRafael Auler #include "llvm/ADT/ArrayRef.h"
18a34c753fSRafael Auler #include "llvm/ADT/Twine.h"
19a34c753fSRafael Auler #include "llvm/BinaryFormat/Dwarf.h"
20a34c753fSRafael Auler #include "llvm/DebugInfo/DWARF/DWARFDebugFrame.h"
21a34c753fSRafael Auler #include "llvm/Support/Casting.h"
22a34c753fSRafael Auler #include "llvm/Support/CommandLine.h"
23a34c753fSRafael Auler #include "llvm/Support/Debug.h"
24290e4823Sserge-sans-paille #include "llvm/Support/Errc.h"
25a34c753fSRafael Auler #include "llvm/Support/LEB128.h"
26a34c753fSRafael Auler #include "llvm/Support/MathExtras.h"
27a34c753fSRafael Auler #include "llvm/Support/raw_ostream.h"
28a34c753fSRafael Auler #include <map>
29a34c753fSRafael Auler 
30a34c753fSRafael Auler #undef  DEBUG_TYPE
31a34c753fSRafael Auler #define DEBUG_TYPE "bolt-exceptions"
32a34c753fSRafael Auler 
33a34c753fSRafael Auler using namespace llvm::dwarf;
34a34c753fSRafael Auler 
35a34c753fSRafael Auler namespace opts {
36a34c753fSRafael Auler 
37a34c753fSRafael Auler extern llvm::cl::OptionCategory BoltCategory;
38a34c753fSRafael Auler 
39a34c753fSRafael Auler extern llvm::cl::opt<unsigned> Verbosity;
40a34c753fSRafael Auler 
41a34c753fSRafael Auler static llvm::cl::opt<bool>
42a34c753fSRafael Auler     PrintExceptions("print-exceptions",
43a34c753fSRafael Auler                     llvm::cl::desc("print exception handling data"),
44b92436efSFangrui Song                     llvm::cl::Hidden, llvm::cl::cat(BoltCategory));
45a34c753fSRafael Auler 
46a34c753fSRafael Auler } // namespace opts
47a34c753fSRafael Auler 
48a34c753fSRafael Auler namespace llvm {
49a34c753fSRafael Auler namespace bolt {
50a34c753fSRafael Auler 
51a34c753fSRafael Auler // Read and dump the .gcc_exception_table section entry.
52a34c753fSRafael Auler //
53a34c753fSRafael Auler // .gcc_except_table section contains a set of Language-Specific Data Areas -
54a34c753fSRafael Auler // a fancy name for exception handling tables. There's one  LSDA entry per
55a34c753fSRafael Auler // function. However, we can't actually tell which function LSDA refers to
56a34c753fSRafael Auler // unless we parse .eh_frame entry that refers to the LSDA.
57a34c753fSRafael Auler // Then inside LSDA most addresses are encoded relative to the function start,
58a34c753fSRafael Auler // so we need the function context in order to get to real addresses.
59a34c753fSRafael Auler //
60a34c753fSRafael Auler // The best visual representation of the tables comprising LSDA and
61a34c753fSRafael Auler // relationships between them is illustrated at:
62a34c753fSRafael Auler //   https://github.com/itanium-cxx-abi/cxx-abi/blob/master/exceptions.pdf
63a34c753fSRafael Auler // Keep in mind that GCC implementation deviates slightly from that document.
64a34c753fSRafael Auler //
65a34c753fSRafael Auler // To summarize, there are 4 tables in LSDA: call site table, actions table,
66a34c753fSRafael Auler // types table, and types index table (for indirection). The main table contains
67a34c753fSRafael Auler // call site entries. Each call site includes a PC range that can throw an
68a34c753fSRafael Auler // exception, a handler (landing pad), and a reference to an entry in the action
69a34c753fSRafael Auler // table. The handler and/or action could be 0. The action entry is a head
70a34c753fSRafael Auler // of a list of actions associated with a call site. The action table contains
71a34c753fSRafael Auler // all such lists (it could be optimized to share list tails). Each action could
72a34c753fSRafael Auler // be either to catch an exception of a given type, to perform a cleanup, or to
73a34c753fSRafael Auler // propagate the exception after filtering it out (e.g. to make sure function
74a34c753fSRafael Auler // exception specification is not violated). Catch action contains a reference
75a34c753fSRafael Auler // to an entry in the type table, and filter action refers to an entry in the
76a34c753fSRafael Auler // type index table to encode a set of types to filter.
77a34c753fSRafael Auler //
78a34c753fSRafael Auler // Call site table follows LSDA header. Action table immediately follows the
79a34c753fSRafael Auler // call site table.
80a34c753fSRafael Auler //
81a34c753fSRafael Auler // Both types table and type index table start at the same location, but they
82a34c753fSRafael Auler // grow in opposite directions (types go up, indices go down). The beginning of
83a34c753fSRafael Auler // these tables is encoded in LSDA header. Sizes for both of the tables are not
84a34c753fSRafael Auler // included anywhere.
85a34c753fSRafael Auler //
86a34c753fSRafael Auler // We have to parse all of the tables to determine their sizes. Then we have
87a34c753fSRafael Auler // to parse the call site table and associate discovered information with
88a34c753fSRafael Auler // actual call instructions and landing pad blocks.
89a34c753fSRafael Auler //
90a34c753fSRafael Auler // For the purpose of rewriting exception handling tables, we can reuse action,
91a34c753fSRafael Auler // and type index tables in their original binary format.
92a34c753fSRafael Auler //
93a34c753fSRafael Auler // Type table could be encoded using position-independent references, and thus
94a34c753fSRafael Auler // may require relocation.
95a34c753fSRafael Auler //
96a34c753fSRafael Auler // Ideally we should be able to re-write LSDA in-place, without the need to
97a34c753fSRafael Auler // allocate a new space for it. Sadly there's no guarantee that the new call
98a34c753fSRafael Auler // site table will be the same size as GCC uses uleb encodings for PC offsets.
99a34c753fSRafael Auler //
100a34c753fSRafael Auler // Note: some functions have LSDA entries with 0 call site entries.
10113d60ce2SAmir Ayupov Error BinaryFunction::parseLSDA(ArrayRef<uint8_t> LSDASectionData,
102a34c753fSRafael Auler                                 uint64_t LSDASectionAddress) {
103a34c753fSRafael Auler   assert(CurrentState == State::Disassembled && "unexpected function state");
104a34c753fSRafael Auler 
105a34c753fSRafael Auler   if (!getLSDAAddress())
10613d60ce2SAmir Ayupov     return Error::success();
107a34c753fSRafael Auler 
108a34c753fSRafael Auler   DWARFDataExtractor Data(
109a34c753fSRafael Auler       StringRef(reinterpret_cast<const char *>(LSDASectionData.data()),
110a34c753fSRafael Auler                 LSDASectionData.size()),
111d9220453SMaksim Panchenko       BC.AsmInfo->isLittleEndian(), BC.AsmInfo->getCodePointerSize());
112a34c753fSRafael Auler   uint64_t Offset = getLSDAAddress() - LSDASectionAddress;
113a34c753fSRafael Auler   assert(Data.isValidOffset(Offset) && "wrong LSDA address");
114a34c753fSRafael Auler 
11584602066SMaksim Panchenko   const uint8_t LPStartEncoding = Data.getU8(&Offset);
11684602066SMaksim Panchenko   uint64_t LPStart = Address;
11784602066SMaksim Panchenko   if (LPStartEncoding != dwarf::DW_EH_PE_omit) {
11884602066SMaksim Panchenko     std::optional<uint64_t> MaybeLPStart = Data.getEncodedPointer(
11984602066SMaksim Panchenko         &Offset, LPStartEncoding, Offset + LSDASectionAddress);
12084602066SMaksim Panchenko     if (!MaybeLPStart) {
12152cf0711SAmir Ayupov       BC.errs() << "BOLT-ERROR: unsupported LPStartEncoding: "
12284602066SMaksim Panchenko                 << (unsigned)LPStartEncoding << '\n';
12313d60ce2SAmir Ayupov       return createFatalBOLTError("");
12484602066SMaksim Panchenko     }
12584602066SMaksim Panchenko     LPStart = *MaybeLPStart;
12684602066SMaksim Panchenko   }
127a34c753fSRafael Auler 
128a34c753fSRafael Auler   const uint8_t TTypeEncoding = Data.getU8(&Offset);
129553c2389Srevunov.denis@huawei.com   LSDATypeEncoding = TTypeEncoding;
130a34c753fSRafael Auler   size_t TTypeEncodingSize = 0;
131a34c753fSRafael Auler   uintptr_t TTypeEnd = 0;
132a34c753fSRafael Auler   if (TTypeEncoding != DW_EH_PE_omit) {
133a34c753fSRafael Auler     TTypeEnd = Data.getULEB128(&Offset);
134a34c753fSRafael Auler     TTypeEncodingSize = BC.getDWARFEncodingSize(TTypeEncoding);
135a34c753fSRafael Auler   }
136a34c753fSRafael Auler 
137a34c753fSRafael Auler   if (opts::PrintExceptions) {
13852cf0711SAmir Ayupov     BC.outs() << "[LSDA at 0x" << Twine::utohexstr(getLSDAAddress())
139a34c753fSRafael Auler               << " for function " << *this << "]:\n";
14052cf0711SAmir Ayupov     BC.outs() << "LPStart Encoding = 0x" << Twine::utohexstr(LPStartEncoding)
14140c2e0faSMaksim Panchenko               << '\n';
14252cf0711SAmir Ayupov     BC.outs() << "LPStart = 0x" << Twine::utohexstr(LPStart) << '\n';
14352cf0711SAmir Ayupov     BC.outs() << "TType Encoding = 0x" << Twine::utohexstr(TTypeEncoding)
14452cf0711SAmir Ayupov               << '\n';
14552cf0711SAmir Ayupov     BC.outs() << "TType End = " << TTypeEnd << '\n';
146a34c753fSRafael Auler   }
147a34c753fSRafael Auler 
148a34c753fSRafael Auler   // Table to store list of indices in type table. Entries are uleb128 values.
149a34c753fSRafael Auler   const uint64_t TypeIndexTableStart = Offset + TTypeEnd;
150a34c753fSRafael Auler 
151a34c753fSRafael Auler   // Offset past the last decoded index.
152a34c753fSRafael Auler   uint64_t MaxTypeIndexTableOffset = 0;
153a34c753fSRafael Auler 
154a34c753fSRafael Auler   // Max positive index used in type table.
155a34c753fSRafael Auler   unsigned MaxTypeIndex = 0;
156a34c753fSRafael Auler 
157a34c753fSRafael Auler   // The actual type info table starts at the same location, but grows in
158a34c753fSRafael Auler   // opposite direction. TTypeEncoding is used to encode stored values.
159a34c753fSRafael Auler   const uint64_t TypeTableStart = Offset + TTypeEnd;
160a34c753fSRafael Auler 
161a34c753fSRafael Auler   uint8_t CallSiteEncoding = Data.getU8(&Offset);
162a34c753fSRafael Auler   uint32_t CallSiteTableLength = Data.getULEB128(&Offset);
163a34c753fSRafael Auler   uint64_t CallSiteTableStart = Offset;
164a34c753fSRafael Auler   uint64_t CallSiteTableEnd = CallSiteTableStart + CallSiteTableLength;
165a34c753fSRafael Auler   uint64_t CallSitePtr = CallSiteTableStart;
166a34c753fSRafael Auler   uint64_t ActionTableStart = CallSiteTableEnd;
167a34c753fSRafael Auler 
168a34c753fSRafael Auler   if (opts::PrintExceptions) {
16952cf0711SAmir Ayupov     BC.outs() << "CallSite Encoding = " << (unsigned)CallSiteEncoding << '\n';
17052cf0711SAmir Ayupov     BC.outs() << "CallSite table length = " << CallSiteTableLength << '\n';
17152cf0711SAmir Ayupov     BC.outs() << '\n';
172a34c753fSRafael Auler   }
173a34c753fSRafael Auler 
174a34c753fSRafael Auler   this->HasEHRanges = CallSitePtr < CallSiteTableEnd;
175a34c753fSRafael Auler   const uint64_t RangeBase = getAddress();
176a34c753fSRafael Auler   while (CallSitePtr < CallSiteTableEnd) {
177a34c753fSRafael Auler     uint64_t Start = *Data.getEncodedPointer(&CallSitePtr, CallSiteEncoding,
178a34c753fSRafael Auler                                              CallSitePtr + LSDASectionAddress);
17940c2e0faSMaksim Panchenko     uint64_t Length = *Data.getEncodedPointer(&CallSitePtr, CallSiteEncoding,
18040c2e0faSMaksim Panchenko                                               CallSitePtr + LSDASectionAddress);
181a34c753fSRafael Auler     uint64_t LandingPad = *Data.getEncodedPointer(
182a34c753fSRafael Auler         &CallSitePtr, CallSiteEncoding, CallSitePtr + LSDASectionAddress);
183a34c753fSRafael Auler     uint64_t ActionEntry = Data.getULEB128(&CallSitePtr);
18484602066SMaksim Panchenko     if (LandingPad)
18584602066SMaksim Panchenko       LandingPad += LPStart;
186a34c753fSRafael Auler 
18784602066SMaksim Panchenko     if (opts::PrintExceptions) {
18852cf0711SAmir Ayupov       BC.outs() << "Call Site: [0x" << Twine::utohexstr(RangeBase + Start)
18984602066SMaksim Panchenko                 << ", 0x" << Twine::utohexstr(RangeBase + Start + Length)
19084602066SMaksim Panchenko                 << "); landing pad: 0x" << Twine::utohexstr(LandingPad)
19152cf0711SAmir Ayupov                 << "; action entry: 0x" << Twine::utohexstr(ActionEntry)
19252cf0711SAmir Ayupov                 << "\n";
19352cf0711SAmir Ayupov       BC.outs() << "  current offset is " << (CallSitePtr - CallSiteTableStart)
19484602066SMaksim Panchenko                 << '\n';
19584602066SMaksim Panchenko     }
196ae563c91SHuan Nguyen 
19784602066SMaksim Panchenko     // Create a handler entry if necessary.
19884602066SMaksim Panchenko     MCSymbol *LPSymbol = nullptr;
19984602066SMaksim Panchenko     if (LandingPad) {
200ae563c91SHuan Nguyen       // Verify if landing pad code is located outside current function
201ae563c91SHuan Nguyen       // Support landing pad to builtin_unreachable
20284602066SMaksim Panchenko       if (LandingPad < Address || LandingPad > Address + getSize()) {
203ae563c91SHuan Nguyen         BinaryFunction *Fragment =
20484602066SMaksim Panchenko             BC.getBinaryFunctionContainingAddress(LandingPad);
205ae563c91SHuan Nguyen         assert(Fragment != nullptr &&
206ae563c91SHuan Nguyen                "BOLT-ERROR: cannot find landing pad fragment");
207ae563c91SHuan Nguyen         BC.addInterproceduralReference(this, Fragment->getAddress());
208ae563c91SHuan Nguyen         BC.processInterproceduralReferences();
20983ea7ce3SAmir Ayupov         assert(BC.areRelatedFragments(this, Fragment) &&
210068e9889SAmir Ayupov                "BOLT-ERROR: cannot have landing pads in different functions");
211ae563c91SHuan Nguyen         setHasIndirectTargetToSplitFragment(true);
212ae563c91SHuan Nguyen         BC.addFragmentsToSkip(this);
21313d60ce2SAmir Ayupov         return Error::success();
214ae563c91SHuan Nguyen       }
215ae563c91SHuan Nguyen 
21684602066SMaksim Panchenko       const uint64_t LPOffset = LandingPad - getAddress();
217ae563c91SHuan Nguyen       if (!getInstructionAtOffset(LPOffset)) {
21869706eafSMaksim Panchenko         if (opts::Verbosity >= 1)
21952cf0711SAmir Ayupov           BC.errs() << "BOLT-WARNING: landing pad "
22052cf0711SAmir Ayupov                     << Twine::utohexstr(LPOffset)
22169706eafSMaksim Panchenko                     << " not pointing to an instruction in function " << *this
22269706eafSMaksim Panchenko                     << " - ignoring.\n";
223a34c753fSRafael Auler       } else {
224ae563c91SHuan Nguyen         auto Label = Labels.find(LPOffset);
225a34c753fSRafael Auler         if (Label != Labels.end()) {
226a34c753fSRafael Auler           LPSymbol = Label->second;
227a34c753fSRafael Auler         } else {
228a34c753fSRafael Auler           LPSymbol = BC.Ctx->createNamedTempSymbol("LP");
229ae563c91SHuan Nguyen           Labels[LPOffset] = LPSymbol;
230a34c753fSRafael Auler         }
231a34c753fSRafael Auler       }
232a34c753fSRafael Auler     }
233a34c753fSRafael Auler 
234a34c753fSRafael Auler     // Mark all call instructions in the range.
235a34c753fSRafael Auler     auto II = Instructions.find(Start);
236a34c753fSRafael Auler     auto IE = Instructions.end();
237a34c753fSRafael Auler     assert(II != IE && "exception range not pointing to an instruction");
238a34c753fSRafael Auler     do {
239a34c753fSRafael Auler       MCInst &Instruction = II->second;
240a34c753fSRafael Auler       if (BC.MIB->isCall(Instruction) &&
241a34c753fSRafael Auler           !BC.MIB->getConditionalTailCall(Instruction)) {
242a34c753fSRafael Auler         assert(!BC.MIB->isInvoke(Instruction) &&
243a34c753fSRafael Auler                "overlapping exception ranges detected");
244a34c753fSRafael Auler         // Add extra operands to a call instruction making it an invoke from
245a34c753fSRafael Auler         // now on.
246a34c753fSRafael Auler         BC.MIB->addEHInfo(Instruction,
247a34c753fSRafael Auler                           MCPlus::MCLandingPad(LPSymbol, ActionEntry));
248a34c753fSRafael Auler       }
249a34c753fSRafael Auler       ++II;
250a34c753fSRafael Auler     } while (II != IE && II->first < Start + Length);
251a34c753fSRafael Auler 
252a34c753fSRafael Auler     if (ActionEntry != 0) {
253a34c753fSRafael Auler       auto printType = [&](int Index, raw_ostream &OS) {
254a34c753fSRafael Auler         assert(Index > 0 && "only positive indices are valid");
255a34c753fSRafael Auler         uint64_t TTEntry = TypeTableStart - Index * TTypeEncodingSize;
256a34c753fSRafael Auler         const uint64_t TTEntryAddress = TTEntry + LSDASectionAddress;
257a34c753fSRafael Auler         uint64_t TypeAddress =
258a34c753fSRafael Auler             *Data.getEncodedPointer(&TTEntry, TTypeEncoding, TTEntryAddress);
2593652483cSRafael Auler         if ((TTypeEncoding & DW_EH_PE_pcrel) && TypeAddress == TTEntryAddress)
260a34c753fSRafael Auler           TypeAddress = 0;
261a34c753fSRafael Auler         if (TypeAddress == 0) {
262a34c753fSRafael Auler           OS << "<all>";
263a34c753fSRafael Auler           return;
264a34c753fSRafael Auler         }
265a34c753fSRafael Auler         if (TTypeEncoding & DW_EH_PE_indirect) {
266a34c753fSRafael Auler           ErrorOr<uint64_t> PointerOrErr = BC.getPointerAtAddress(TypeAddress);
267a34c753fSRafael Auler           assert(PointerOrErr && "failed to decode indirect address");
268a34c753fSRafael Auler           TypeAddress = *PointerOrErr;
269a34c753fSRafael Auler         }
2703652483cSRafael Auler         if (BinaryData *TypeSymBD = BC.getBinaryDataAtAddress(TypeAddress))
271a34c753fSRafael Auler           OS << TypeSymBD->getName();
2723652483cSRafael Auler         else
273a34c753fSRafael Auler           OS << "0x" << Twine::utohexstr(TypeAddress);
274a34c753fSRafael Auler       };
275a34c753fSRafael Auler       if (opts::PrintExceptions)
27652cf0711SAmir Ayupov         BC.outs() << "    actions: ";
277a34c753fSRafael Auler       uint64_t ActionPtr = ActionTableStart + ActionEntry - 1;
278a34c753fSRafael Auler       int64_t ActionType;
279a34c753fSRafael Auler       int64_t ActionNext;
280a34c753fSRafael Auler       const char *Sep = "";
281a34c753fSRafael Auler       do {
282a34c753fSRafael Auler         ActionType = Data.getSLEB128(&ActionPtr);
283a34c753fSRafael Auler         const uint32_t Self = ActionPtr;
284a34c753fSRafael Auler         ActionNext = Data.getSLEB128(&ActionPtr);
285a34c753fSRafael Auler         if (opts::PrintExceptions)
28652cf0711SAmir Ayupov           BC.outs() << Sep << "(" << ActionType << ", " << ActionNext << ") ";
287a34c753fSRafael Auler         if (ActionType == 0) {
288a34c753fSRafael Auler           if (opts::PrintExceptions)
28952cf0711SAmir Ayupov             BC.outs() << "cleanup";
290a34c753fSRafael Auler         } else if (ActionType > 0) {
291a34c753fSRafael Auler           // It's an index into a type table.
29240c2e0faSMaksim Panchenko           MaxTypeIndex =
29340c2e0faSMaksim Panchenko               std::max(MaxTypeIndex, static_cast<unsigned>(ActionType));
294a34c753fSRafael Auler           if (opts::PrintExceptions) {
29552cf0711SAmir Ayupov             BC.outs() << "catch type ";
29652cf0711SAmir Ayupov             printType(ActionType, BC.outs());
297a34c753fSRafael Auler           }
298a34c753fSRafael Auler         } else { // ActionType < 0
299a34c753fSRafael Auler           if (opts::PrintExceptions)
30052cf0711SAmir Ayupov             BC.outs() << "filter exception types ";
301a34c753fSRafael Auler           const char *TSep = "";
302a34c753fSRafael Auler           // ActionType is a negative *byte* offset into *uleb128-encoded* table
303a34c753fSRafael Auler           // of indices with base 1.
304a34c753fSRafael Auler           // E.g. -1 means offset 0, -2 is offset 1, etc. The indices are
305a34c753fSRafael Auler           // encoded using uleb128 thus we cannot directly dereference them.
306a34c753fSRafael Auler           uint64_t TypeIndexTablePtr = TypeIndexTableStart - ActionType - 1;
307a34c753fSRafael Auler           while (uint64_t Index = Data.getULEB128(&TypeIndexTablePtr)) {
308a34c753fSRafael Auler             MaxTypeIndex = std::max(MaxTypeIndex, static_cast<unsigned>(Index));
309a34c753fSRafael Auler             if (opts::PrintExceptions) {
31052cf0711SAmir Ayupov               BC.outs() << TSep;
31152cf0711SAmir Ayupov               printType(Index, BC.outs());
312a34c753fSRafael Auler               TSep = ", ";
313a34c753fSRafael Auler             }
314a34c753fSRafael Auler           }
31540c2e0faSMaksim Panchenko           MaxTypeIndexTableOffset = std::max(
31640c2e0faSMaksim Panchenko               MaxTypeIndexTableOffset, TypeIndexTablePtr - TypeIndexTableStart);
317a34c753fSRafael Auler         }
318a34c753fSRafael Auler 
319a34c753fSRafael Auler         Sep = "; ";
320a34c753fSRafael Auler 
321a34c753fSRafael Auler         ActionPtr = Self + ActionNext;
322a34c753fSRafael Auler       } while (ActionNext);
323a34c753fSRafael Auler       if (opts::PrintExceptions)
32452cf0711SAmir Ayupov         BC.outs() << '\n';
325a34c753fSRafael Auler     }
326a34c753fSRafael Auler   }
327a34c753fSRafael Auler   if (opts::PrintExceptions)
32852cf0711SAmir Ayupov     BC.outs() << '\n';
329a34c753fSRafael Auler 
330a34c753fSRafael Auler   assert(TypeIndexTableStart + MaxTypeIndexTableOffset <=
331a34c753fSRafael Auler              Data.getData().size() &&
332a34c753fSRafael Auler          "LSDA entry has crossed section boundary");
333a34c753fSRafael Auler 
334a34c753fSRafael Auler   if (TTypeEnd) {
335a34c753fSRafael Auler     LSDAActionTable = LSDASectionData.slice(
336a34c753fSRafael Auler         ActionTableStart, TypeIndexTableStart -
337a34c753fSRafael Auler                               MaxTypeIndex * TTypeEncodingSize -
338a34c753fSRafael Auler                               ActionTableStart);
339a34c753fSRafael Auler     for (unsigned Index = 1; Index <= MaxTypeIndex; ++Index) {
340a34c753fSRafael Auler       uint64_t TTEntry = TypeTableStart - Index * TTypeEncodingSize;
341a34c753fSRafael Auler       const uint64_t TTEntryAddress = TTEntry + LSDASectionAddress;
342a34c753fSRafael Auler       uint64_t TypeAddress =
343a34c753fSRafael Auler           *Data.getEncodedPointer(&TTEntry, TTypeEncoding, TTEntryAddress);
344a34c753fSRafael Auler       if ((TTypeEncoding & DW_EH_PE_pcrel) && (TypeAddress == TTEntryAddress))
345a34c753fSRafael Auler         TypeAddress = 0;
346a34c753fSRafael Auler       if (TTypeEncoding & DW_EH_PE_indirect) {
347a34c753fSRafael Auler         LSDATypeAddressTable.emplace_back(TypeAddress);
348a34c753fSRafael Auler         if (TypeAddress) {
349a34c753fSRafael Auler           ErrorOr<uint64_t> PointerOrErr = BC.getPointerAtAddress(TypeAddress);
350a34c753fSRafael Auler           assert(PointerOrErr && "failed to decode indirect address");
351a34c753fSRafael Auler           TypeAddress = *PointerOrErr;
352a34c753fSRafael Auler         }
353a34c753fSRafael Auler       }
354a34c753fSRafael Auler       LSDATypeTable.emplace_back(TypeAddress);
355a34c753fSRafael Auler     }
356a34c753fSRafael Auler     LSDATypeIndexTable =
357a34c753fSRafael Auler         LSDASectionData.slice(TypeIndexTableStart, MaxTypeIndexTableOffset);
358a34c753fSRafael Auler   }
35913d60ce2SAmir Ayupov   return Error::success();
360a34c753fSRafael Auler }
361a34c753fSRafael Auler 
362a34c753fSRafael Auler void BinaryFunction::updateEHRanges() {
363a34c753fSRafael Auler   if (getSize() == 0)
364a34c753fSRafael Auler     return;
365a34c753fSRafael Auler 
366a34c753fSRafael Auler   assert(CurrentState == State::CFG_Finalized && "unexpected state");
367a34c753fSRafael Auler 
368a34c753fSRafael Auler   // Build call sites table.
369a34c753fSRafael Auler   struct EHInfo {
370a34c753fSRafael Auler     const MCSymbol *LP; // landing pad
371a34c753fSRafael Auler     uint64_t Action;
372a34c753fSRafael Auler   };
373a34c753fSRafael Auler 
3743ac46f37SFabian Parzefall   // Sites to update.
3753ac46f37SFabian Parzefall   CallSitesList Sites;
376a191ea7dSFabian Parzefall 
3773ac46f37SFabian Parzefall   for (FunctionFragment &FF : getLayout().fragments()) {
378a34c753fSRafael Auler     // If previous call can throw, this is its exception handler.
379a34c753fSRafael Auler     EHInfo PreviousEH = {nullptr, 0};
380a34c753fSRafael Auler 
381a34c753fSRafael Auler     // Marker for the beginning of exceptions range.
382a34c753fSRafael Auler     const MCSymbol *StartRange = nullptr;
383a34c753fSRafael Auler 
384a191ea7dSFabian Parzefall     for (BinaryBasicBlock *const BB : FF) {
3850df15467SMaksim Panchenko       for (MCInst &Instr : *BB) {
3860df15467SMaksim Panchenko         if (!BC.MIB->isCall(Instr))
387a34c753fSRafael Auler           continue;
388a34c753fSRafael Auler 
389a34c753fSRafael Auler         // Instruction can throw an exception that should be handled.
3900df15467SMaksim Panchenko         const bool Throws = BC.MIB->isInvoke(Instr);
391a34c753fSRafael Auler 
392a34c753fSRafael Auler         // Ignore the call if it's a continuation of a no-throw gap.
393a34c753fSRafael Auler         if (!Throws && !StartRange)
394a34c753fSRafael Auler           continue;
395a34c753fSRafael Auler 
396a34c753fSRafael Auler         // Extract exception handling information from the instruction.
397a34c753fSRafael Auler         const MCSymbol *LP = nullptr;
398a34c753fSRafael Auler         uint64_t Action = 0;
3992563fd63SAmir Ayupov         if (const std::optional<MCPlus::MCLandingPad> EHInfo =
4000df15467SMaksim Panchenko                 BC.MIB->getEHInfo(Instr))
401a34c753fSRafael Auler           std::tie(LP, Action) = *EHInfo;
402a34c753fSRafael Auler 
403a34c753fSRafael Auler         // No action if the exception handler has not changed.
40440c2e0faSMaksim Panchenko         if (Throws && StartRange && PreviousEH.LP == LP &&
405a34c753fSRafael Auler             PreviousEH.Action == Action)
406a34c753fSRafael Auler           continue;
407a34c753fSRafael Auler 
408a34c753fSRafael Auler         // Same symbol is used for the beginning and the end of the range.
4090df15467SMaksim Panchenko         MCSymbol *EHSymbol;
4107c206c78SMaksim Panchenko         if (MCSymbol *InstrLabel = BC.MIB->getInstLabel(Instr)) {
411d18b4f88SMaksim Panchenko           EHSymbol = InstrLabel;
4120df15467SMaksim Panchenko         } else {
413e8ce5f1eSNico Weber           std::unique_lock<llvm::sys::RWMutex> Lock(BC.CtxMutex);
4147c206c78SMaksim Panchenko           EHSymbol = BC.MIB->getOrCreateInstLabel(Instr, "EH", BC.Ctx.get());
415a34c753fSRafael Auler         }
416a34c753fSRafael Auler 
417a34c753fSRafael Auler         // At this point we could be in one of the following states:
418a34c753fSRafael Auler         //
419a34c753fSRafael Auler         // I. Exception handler has changed and we need to close previous range
420a34c753fSRafael Auler         //    and start a new one.
421a34c753fSRafael Auler         //
422a34c753fSRafael Auler         // II. Start a new exception range after the gap.
423a34c753fSRafael Auler         //
424a34c753fSRafael Auler         // III. Close current exception range and start a new gap.
425a34c753fSRafael Auler         const MCSymbol *EndRange;
426a34c753fSRafael Auler         if (StartRange) {
427a34c753fSRafael Auler           // I, III:
428a34c753fSRafael Auler           EndRange = EHSymbol;
429a34c753fSRafael Auler         } else {
430a34c753fSRafael Auler           // II:
431a34c753fSRafael Auler           StartRange = EHSymbol;
432a34c753fSRafael Auler           EndRange = nullptr;
433a34c753fSRafael Auler         }
434a34c753fSRafael Auler 
435a34c753fSRafael Auler         // Close the previous range.
4363ac46f37SFabian Parzefall         if (EndRange)
437a191ea7dSFabian Parzefall           Sites.emplace_back(
4383ac46f37SFabian Parzefall               FF.getFragmentNum(),
43940c2e0faSMaksim Panchenko               CallSite{StartRange, EndRange, PreviousEH.LP, PreviousEH.Action});
440a34c753fSRafael Auler 
441a34c753fSRafael Auler         if (Throws) {
442a34c753fSRafael Auler           // I, II:
443a34c753fSRafael Auler           StartRange = EHSymbol;
444a34c753fSRafael Auler           PreviousEH = EHInfo{LP, Action};
445a34c753fSRafael Auler         } else {
446a34c753fSRafael Auler           StartRange = nullptr;
447a34c753fSRafael Auler         }
448a34c753fSRafael Auler       }
449a34c753fSRafael Auler     }
450a34c753fSRafael Auler 
451a34c753fSRafael Auler     // Check if we need to close the range.
452a34c753fSRafael Auler     if (StartRange) {
453a191ea7dSFabian Parzefall       const MCSymbol *EndRange = getFunctionEndLabel(FF.getFragmentNum());
454a191ea7dSFabian Parzefall       Sites.emplace_back(
4553ac46f37SFabian Parzefall           FF.getFragmentNum(),
45640c2e0faSMaksim Panchenko           CallSite{StartRange, EndRange, PreviousEH.LP, PreviousEH.Action});
457a34c753fSRafael Auler     }
458a34c753fSRafael Auler   }
4593ac46f37SFabian Parzefall 
4603ac46f37SFabian Parzefall   addCallSites(Sites);
461a191ea7dSFabian Parzefall }
462a34c753fSRafael Auler 
463a34c753fSRafael Auler const uint8_t DWARF_CFI_PRIMARY_OPCODE_MASK = 0xc0;
464a34c753fSRafael Auler 
46552cf0711SAmir Ayupov CFIReaderWriter::CFIReaderWriter(BinaryContext &BC,
46652cf0711SAmir Ayupov                                  const DWARFDebugFrame &EHFrame)
46752cf0711SAmir Ayupov     : BC(BC) {
468a34c753fSRafael Auler   // Prepare FDEs for fast lookup
469a34c753fSRafael Auler   for (const dwarf::FrameEntry &Entry : EHFrame.entries()) {
470a34c753fSRafael Auler     const auto *CurFDE = dyn_cast<dwarf::FDE>(&Entry);
471a34c753fSRafael Auler     // Skip CIEs.
472a34c753fSRafael Auler     if (!CurFDE)
473a34c753fSRafael Auler       continue;
474a34c753fSRafael Auler     // There could me multiple FDEs with the same initial address, and perhaps
475a34c753fSRafael Auler     // different sizes (address ranges). Use the first entry with non-zero size.
476a34c753fSRafael Auler     auto FDEI = FDEs.lower_bound(CurFDE->getInitialLocation());
477a34c753fSRafael Auler     if (FDEI != FDEs.end() && FDEI->first == CurFDE->getInitialLocation()) {
478a34c753fSRafael Auler       if (CurFDE->getAddressRange()) {
479a34c753fSRafael Auler         if (FDEI->second->getAddressRange() == 0) {
480a34c753fSRafael Auler           FDEI->second = CurFDE;
481a34c753fSRafael Auler         } else if (opts::Verbosity > 0) {
48252cf0711SAmir Ayupov           BC.errs() << "BOLT-WARNING: different FDEs for function at 0x"
483a34c753fSRafael Auler                     << Twine::utohexstr(FDEI->first)
48440c2e0faSMaksim Panchenko                     << " detected; sizes: " << FDEI->second->getAddressRange()
48540c2e0faSMaksim Panchenko                     << " and " << CurFDE->getAddressRange() << '\n';
486a34c753fSRafael Auler         }
487a34c753fSRafael Auler       }
488a34c753fSRafael Auler     } else {
489a34c753fSRafael Auler       FDEs.emplace_hint(FDEI, CurFDE->getInitialLocation(), CurFDE);
490a34c753fSRafael Auler     }
491a34c753fSRafael Auler   }
492a34c753fSRafael Auler }
493a34c753fSRafael Auler 
494a34c753fSRafael Auler bool CFIReaderWriter::fillCFIInfoFor(BinaryFunction &Function) const {
495a34c753fSRafael Auler   uint64_t Address = Function.getAddress();
496a34c753fSRafael Auler   auto I = FDEs.find(Address);
497a34c753fSRafael Auler   // Ignore zero-length FDE ranges.
498a34c753fSRafael Auler   if (I == FDEs.end() || !I->second->getAddressRange())
499a34c753fSRafael Auler     return true;
500a34c753fSRafael Auler 
501a34c753fSRafael Auler   const FDE &CurFDE = *I->second;
50289fab98eSFangrui Song   std::optional<uint64_t> LSDA = CurFDE.getLSDAAddress();
503a34c753fSRafael Auler   Function.setLSDAAddress(LSDA ? *LSDA : 0);
504a34c753fSRafael Auler 
5050b7e8bafSDenis Revunov   uint64_t Offset = Function.getFirstInstructionOffset();
506a34c753fSRafael Auler   uint64_t CodeAlignment = CurFDE.getLinkedCIE()->getCodeAlignmentFactor();
507a34c753fSRafael Auler   uint64_t DataAlignment = CurFDE.getLinkedCIE()->getDataAlignmentFactor();
508a34c753fSRafael Auler   if (CurFDE.getLinkedCIE()->getPersonalityAddress()) {
509a34c753fSRafael Auler     Function.setPersonalityFunction(
510a34c753fSRafael Auler         *CurFDE.getLinkedCIE()->getPersonalityAddress());
511a34c753fSRafael Auler     Function.setPersonalityEncoding(
512a34c753fSRafael Auler         *CurFDE.getLinkedCIE()->getPersonalityEncoding());
513a34c753fSRafael Auler   }
514a34c753fSRafael Auler 
51552cf0711SAmir Ayupov   auto decodeFrameInstruction = [this, &Function, &Offset, Address,
51652cf0711SAmir Ayupov                                  CodeAlignment, DataAlignment](
517a34c753fSRafael Auler                                     const CFIProgram::Instruction &Instr) {
518a34c753fSRafael Auler     uint8_t Opcode = Instr.Opcode;
519a34c753fSRafael Auler     if (Opcode & DWARF_CFI_PRIMARY_OPCODE_MASK)
520a34c753fSRafael Auler       Opcode &= DWARF_CFI_PRIMARY_OPCODE_MASK;
521a34c753fSRafael Auler     switch (Instr.Opcode) {
522a34c753fSRafael Auler     case DW_CFA_nop:
523a34c753fSRafael Auler       break;
524a34c753fSRafael Auler     case DW_CFA_advance_loc4:
525a34c753fSRafael Auler     case DW_CFA_advance_loc2:
526a34c753fSRafael Auler     case DW_CFA_advance_loc1:
527a34c753fSRafael Auler     case DW_CFA_advance_loc:
528a34c753fSRafael Auler       // Advance our current address
529a34c753fSRafael Auler       Offset += CodeAlignment * int64_t(Instr.Ops[0]);
530a34c753fSRafael Auler       break;
531a34c753fSRafael Auler     case DW_CFA_offset_extended_sf:
532a34c753fSRafael Auler       Function.addCFIInstruction(
53340c2e0faSMaksim Panchenko           Offset,
53440c2e0faSMaksim Panchenko           MCCFIInstruction::createOffset(
53540c2e0faSMaksim Panchenko               nullptr, Instr.Ops[0], DataAlignment * int64_t(Instr.Ops[1])));
536a34c753fSRafael Auler       break;
537a34c753fSRafael Auler     case DW_CFA_offset_extended:
538a34c753fSRafael Auler     case DW_CFA_offset:
539a34c753fSRafael Auler       Function.addCFIInstruction(
54040c2e0faSMaksim Panchenko           Offset, MCCFIInstruction::createOffset(nullptr, Instr.Ops[0],
54140c2e0faSMaksim Panchenko                                                  DataAlignment * Instr.Ops[1]));
542a34c753fSRafael Auler       break;
543a34c753fSRafael Auler     case DW_CFA_restore_extended:
544a34c753fSRafael Auler     case DW_CFA_restore:
545a34c753fSRafael Auler       Function.addCFIInstruction(
546a34c753fSRafael Auler           Offset, MCCFIInstruction::createRestore(nullptr, Instr.Ops[0]));
547a34c753fSRafael Auler       break;
548a34c753fSRafael Auler     case DW_CFA_set_loc:
549a34c753fSRafael Auler       assert(Instr.Ops[0] >= Address && "set_loc out of function bounds");
550a34c753fSRafael Auler       assert(Instr.Ops[0] <= Address + Function.getSize() &&
551a34c753fSRafael Auler              "set_loc out of function bounds");
552a34c753fSRafael Auler       Offset = Instr.Ops[0] - Address;
553a34c753fSRafael Auler       break;
554a34c753fSRafael Auler 
555a34c753fSRafael Auler     case DW_CFA_undefined:
556a34c753fSRafael Auler       Function.addCFIInstruction(
557a34c753fSRafael Auler           Offset, MCCFIInstruction::createUndefined(nullptr, Instr.Ops[0]));
558a34c753fSRafael Auler       break;
559a34c753fSRafael Auler     case DW_CFA_same_value:
560a34c753fSRafael Auler       Function.addCFIInstruction(
561a34c753fSRafael Auler           Offset, MCCFIInstruction::createSameValue(nullptr, Instr.Ops[0]));
562a34c753fSRafael Auler       break;
563a34c753fSRafael Auler     case DW_CFA_register:
564a34c753fSRafael Auler       Function.addCFIInstruction(
565a34c753fSRafael Auler           Offset, MCCFIInstruction::createRegister(nullptr, Instr.Ops[0],
566a34c753fSRafael Auler                                                    Instr.Ops[1]));
567a34c753fSRafael Auler       break;
568a34c753fSRafael Auler     case DW_CFA_remember_state:
569a34c753fSRafael Auler       Function.addCFIInstruction(
570a34c753fSRafael Auler           Offset, MCCFIInstruction::createRememberState(nullptr));
571a34c753fSRafael Auler       break;
572a34c753fSRafael Auler     case DW_CFA_restore_state:
57340c2e0faSMaksim Panchenko       Function.addCFIInstruction(Offset,
57440c2e0faSMaksim Panchenko                                  MCCFIInstruction::createRestoreState(nullptr));
575a34c753fSRafael Auler       break;
576a34c753fSRafael Auler     case DW_CFA_def_cfa:
577a34c753fSRafael Auler       Function.addCFIInstruction(
57840c2e0faSMaksim Panchenko           Offset,
57940c2e0faSMaksim Panchenko           MCCFIInstruction::cfiDefCfa(nullptr, Instr.Ops[0], Instr.Ops[1]));
580a34c753fSRafael Auler       break;
581a34c753fSRafael Auler     case DW_CFA_def_cfa_sf:
582a34c753fSRafael Auler       Function.addCFIInstruction(
58340c2e0faSMaksim Panchenko           Offset,
58440c2e0faSMaksim Panchenko           MCCFIInstruction::cfiDefCfa(nullptr, Instr.Ops[0],
585a34c753fSRafael Auler                                       DataAlignment * int64_t(Instr.Ops[1])));
586a34c753fSRafael Auler       break;
587a34c753fSRafael Auler     case DW_CFA_def_cfa_register:
58840c2e0faSMaksim Panchenko       Function.addCFIInstruction(Offset, MCCFIInstruction::createDefCfaRegister(
58940c2e0faSMaksim Panchenko                                              nullptr, Instr.Ops[0]));
590a34c753fSRafael Auler       break;
591a34c753fSRafael Auler     case DW_CFA_def_cfa_offset:
592a34c753fSRafael Auler       Function.addCFIInstruction(
59340c2e0faSMaksim Panchenko           Offset, MCCFIInstruction::cfiDefCfaOffset(nullptr, Instr.Ops[0]));
594a34c753fSRafael Auler       break;
595a34c753fSRafael Auler     case DW_CFA_def_cfa_offset_sf:
596a34c753fSRafael Auler       Function.addCFIInstruction(
597a34c753fSRafael Auler           Offset, MCCFIInstruction::cfiDefCfaOffset(
598a34c753fSRafael Auler                       nullptr, DataAlignment * int64_t(Instr.Ops[0])));
599a34c753fSRafael Auler       break;
600a34c753fSRafael Auler     case DW_CFA_GNU_args_size:
601a34c753fSRafael Auler       Function.addCFIInstruction(
60240c2e0faSMaksim Panchenko           Offset, MCCFIInstruction::createGnuArgsSize(nullptr, Instr.Ops[0]));
603a34c753fSRafael Auler       Function.setUsesGnuArgsSize();
604a34c753fSRafael Auler       break;
605a34c753fSRafael Auler     case DW_CFA_val_offset_sf:
606a34c753fSRafael Auler     case DW_CFA_val_offset:
607a34c753fSRafael Auler       if (opts::Verbosity >= 1) {
60852cf0711SAmir Ayupov         BC.errs() << "BOLT-WARNING: DWARF val_offset() unimplemented\n";
609a34c753fSRafael Auler       }
610a34c753fSRafael Auler       return false;
611a34c753fSRafael Auler     case DW_CFA_def_cfa_expression:
612a34c753fSRafael Auler     case DW_CFA_val_expression:
613a34c753fSRafael Auler     case DW_CFA_expression: {
614a34c753fSRafael Auler       StringRef ExprBytes = Instr.Expression->getData();
615a34c753fSRafael Auler       std::string Str;
616a34c753fSRafael Auler       raw_string_ostream OS(Str);
617a34c753fSRafael Auler       // Manually encode this instruction using CFI escape
618a34c753fSRafael Auler       OS << Opcode;
6193652483cSRafael Auler       if (Opcode != DW_CFA_def_cfa_expression)
620a34c753fSRafael Auler         encodeULEB128(Instr.Ops[0], OS);
621a34c753fSRafael Auler       encodeULEB128(ExprBytes.size(), OS);
622a34c753fSRafael Auler       OS << ExprBytes;
623a34c753fSRafael Auler       Function.addCFIInstruction(
624a34c753fSRafael Auler           Offset, MCCFIInstruction::createEscape(nullptr, OS.str()));
625a34c753fSRafael Auler       break;
626a34c753fSRafael Auler     }
627a34c753fSRafael Auler     case DW_CFA_MIPS_advance_loc8:
6283652483cSRafael Auler       if (opts::Verbosity >= 1)
62952cf0711SAmir Ayupov         BC.errs() << "BOLT-WARNING: DW_CFA_MIPS_advance_loc unimplemented\n";
630a34c753fSRafael Auler       return false;
631a34c753fSRafael Auler     case DW_CFA_GNU_window_save:
63299211979SSebastian Pop       // DW_CFA_GNU_window_save and DW_CFA_GNU_NegateRAState just use the same
63399211979SSebastian Pop       // id but mean different things. The latter is used in AArch64.
63499211979SSebastian Pop       if (Function.getBinaryContext().isAArch64()) {
63599211979SSebastian Pop         Function.addCFIInstruction(
63699211979SSebastian Pop             Offset, MCCFIInstruction::createNegateRAState(nullptr));
63799211979SSebastian Pop         break;
63899211979SSebastian Pop       }
63999211979SSebastian Pop       if (opts::Verbosity >= 1)
64052cf0711SAmir Ayupov         BC.errs() << "BOLT-WARNING: DW_CFA_GNU_window_save unimplemented\n";
64199211979SSebastian Pop       return false;
642a34c753fSRafael Auler     case DW_CFA_lo_user:
643a34c753fSRafael Auler     case DW_CFA_hi_user:
64499211979SSebastian Pop       if (opts::Verbosity >= 1)
64552cf0711SAmir Ayupov         BC.errs() << "BOLT-WARNING: DW_CFA_*_user unimplemented\n";
646a34c753fSRafael Auler       return false;
647a34c753fSRafael Auler     default:
64899211979SSebastian Pop       if (opts::Verbosity >= 1)
64952cf0711SAmir Ayupov         BC.errs() << "BOLT-WARNING: Unrecognized CFI instruction: "
65052cf0711SAmir Ayupov                   << Instr.Opcode << '\n';
651a34c753fSRafael Auler       return false;
652a34c753fSRafael Auler     }
653a34c753fSRafael Auler 
654a34c753fSRafael Auler     return true;
655a34c753fSRafael Auler   };
656a34c753fSRafael Auler 
6573652483cSRafael Auler   for (const CFIProgram::Instruction &Instr : CurFDE.getLinkedCIE()->cfis())
658a34c753fSRafael Auler     if (!decodeFrameInstruction(Instr))
659a34c753fSRafael Auler       return false;
660a34c753fSRafael Auler 
6613652483cSRafael Auler   for (const CFIProgram::Instruction &Instr : CurFDE.cfis())
662a34c753fSRafael Auler     if (!decodeFrameInstruction(Instr))
663a34c753fSRafael Auler       return false;
664a34c753fSRafael Auler 
665a34c753fSRafael Auler   return true;
666a34c753fSRafael Auler }
667a34c753fSRafael Auler 
668*1b8e0cf0SMaksim Panchenko std::vector<char>
669*1b8e0cf0SMaksim Panchenko CFIReaderWriter::generateEHFrameHeader(const DWARFDebugFrame &OldEHFrame,
670*1b8e0cf0SMaksim Panchenko                                        const DWARFDebugFrame &NewEHFrame,
671*1b8e0cf0SMaksim Panchenko                                        uint64_t EHFrameHeaderAddress) const {
672a34c753fSRafael Auler   // Common PC -> FDE map to be written into .eh_frame_hdr.
673a34c753fSRafael Auler   std::map<uint64_t, uint64_t> PCToFDE;
674a34c753fSRafael Auler 
675a34c753fSRafael Auler   // Initialize PCToFDE using NewEHFrame.
676a34c753fSRafael Auler   for (dwarf::FrameEntry &Entry : NewEHFrame.entries()) {
677a34c753fSRafael Auler     const dwarf::FDE *FDE = dyn_cast<dwarf::FDE>(&Entry);
678a34c753fSRafael Auler     if (FDE == nullptr)
679a34c753fSRafael Auler       continue;
680a34c753fSRafael Auler     const uint64_t FuncAddress = FDE->getInitialLocation();
681a34c753fSRafael Auler     const uint64_t FDEAddress =
682a34c753fSRafael Auler         NewEHFrame.getEHFrameAddress() + FDE->getOffset();
683a34c753fSRafael Auler 
684a34c753fSRafael Auler     // Ignore unused FDEs.
685a34c753fSRafael Auler     if (FuncAddress == 0)
686a34c753fSRafael Auler       continue;
687a34c753fSRafael Auler 
688a34c753fSRafael Auler     // Add the address to the map unless we failed to write it.
689a34c753fSRafael Auler     PCToFDE[FuncAddress] = FDEAddress;
690a34c753fSRafael Auler   };
691a34c753fSRafael Auler 
692a34c753fSRafael Auler   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: new .eh_frame contains "
693d2c87699SAmir Ayupov                     << llvm::size(NewEHFrame.entries()) << " entries\n");
694a34c753fSRafael Auler 
695a34c753fSRafael Auler   // Add entries from the original .eh_frame corresponding to the functions
696a34c753fSRafael Auler   // that we did not update.
697a34c753fSRafael Auler   for (const dwarf::FrameEntry &Entry : OldEHFrame) {
698a34c753fSRafael Auler     const dwarf::FDE *FDE = dyn_cast<dwarf::FDE>(&Entry);
699a34c753fSRafael Auler     if (FDE == nullptr)
700a34c753fSRafael Auler       continue;
701a34c753fSRafael Auler     const uint64_t FuncAddress = FDE->getInitialLocation();
702a34c753fSRafael Auler     const uint64_t FDEAddress =
703a34c753fSRafael Auler         OldEHFrame.getEHFrameAddress() + FDE->getOffset();
704a34c753fSRafael Auler 
705a34c753fSRafael Auler     // Add the address if we failed to write it.
706a34c753fSRafael Auler     if (PCToFDE.count(FuncAddress) == 0) {
707a34c753fSRafael Auler       LLVM_DEBUG(dbgs() << "BOLT-DEBUG: old FDE for function at 0x"
708a34c753fSRafael Auler                         << Twine::utohexstr(FuncAddress) << " is at 0x"
709a34c753fSRafael Auler                         << Twine::utohexstr(FDEAddress) << '\n');
710a34c753fSRafael Auler       PCToFDE[FuncAddress] = FDEAddress;
711a34c753fSRafael Auler     }
712a34c753fSRafael Auler   };
713a34c753fSRafael Auler 
714a34c753fSRafael Auler   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: old .eh_frame contains "
715d2c87699SAmir Ayupov                     << llvm::size(OldEHFrame.entries()) << " entries\n");
716a34c753fSRafael Auler 
717a34c753fSRafael Auler   // Generate a new .eh_frame_hdr based on the new map.
718a34c753fSRafael Auler 
719a34c753fSRafael Auler   // Header plus table of entries of size 8 bytes.
720a34c753fSRafael Auler   std::vector<char> EHFrameHeader(12 + PCToFDE.size() * 8);
721a34c753fSRafael Auler 
722a34c753fSRafael Auler   // Version is 1.
723a34c753fSRafael Auler   EHFrameHeader[0] = 1;
724a34c753fSRafael Auler   // Encoding of the eh_frame pointer.
725a34c753fSRafael Auler   EHFrameHeader[1] = DW_EH_PE_pcrel | DW_EH_PE_sdata4;
726a34c753fSRafael Auler   // Encoding of the count field to follow.
727a34c753fSRafael Auler   EHFrameHeader[2] = DW_EH_PE_udata4;
728a34c753fSRafael Auler   // Encoding of the table entries - 4-byte offset from the start of the header.
729a34c753fSRafael Auler   EHFrameHeader[3] = DW_EH_PE_datarel | DW_EH_PE_sdata4;
730a34c753fSRafael Auler 
731a34c753fSRafael Auler   // Address of eh_frame. Use the new one.
732a34c753fSRafael Auler   support::ulittle32_t::ref(EHFrameHeader.data() + 4) =
733a34c753fSRafael Auler       NewEHFrame.getEHFrameAddress() - (EHFrameHeaderAddress + 4);
734a34c753fSRafael Auler 
735a34c753fSRafael Auler   // Number of entries in the table (FDE count).
736a34c753fSRafael Auler   support::ulittle32_t::ref(EHFrameHeader.data() + 8) = PCToFDE.size();
737a34c753fSRafael Auler 
738a34c753fSRafael Auler   // Write the table at offset 12.
739a34c753fSRafael Auler   char *Ptr = EHFrameHeader.data();
740a34c753fSRafael Auler   uint32_t Offset = 12;
741a34c753fSRafael Auler   for (const auto &PCI : PCToFDE) {
742a34c753fSRafael Auler     int64_t InitialPCOffset = PCI.first - EHFrameHeaderAddress;
743a34c753fSRafael Auler     assert(isInt<32>(InitialPCOffset) && "PC offset out of bounds");
744a34c753fSRafael Auler     support::ulittle32_t::ref(Ptr + Offset) = InitialPCOffset;
745a34c753fSRafael Auler     Offset += 4;
746a34c753fSRafael Auler     int64_t FDEOffset = PCI.second - EHFrameHeaderAddress;
747a34c753fSRafael Auler     assert(isInt<32>(FDEOffset) && "FDE offset out of bounds");
748a34c753fSRafael Auler     support::ulittle32_t::ref(Ptr + Offset) = FDEOffset;
749a34c753fSRafael Auler     Offset += 4;
750a34c753fSRafael Auler   }
751a34c753fSRafael Auler 
752a34c753fSRafael Auler   return EHFrameHeader;
753a34c753fSRafael Auler }
754a34c753fSRafael Auler 
755a34c753fSRafael Auler Error EHFrameParser::parseCIE(uint64_t StartOffset) {
756a34c753fSRafael Auler   uint8_t Version = Data.getU8(&Offset);
757a34c753fSRafael Auler   const char *Augmentation = Data.getCStr(&Offset);
758a34c753fSRafael Auler   StringRef AugmentationString(Augmentation ? Augmentation : "");
759a34c753fSRafael Auler   uint8_t AddressSize =
760a34c753fSRafael Auler       Version < 4 ? Data.getAddressSize() : Data.getU8(&Offset);
761a34c753fSRafael Auler   Data.setAddressSize(AddressSize);
762a34c753fSRafael Auler   // Skip segment descriptor size
763a34c753fSRafael Auler   if (Version >= 4)
764a34c753fSRafael Auler     Offset += 1;
765a34c753fSRafael Auler   // Skip code alignment factor
766a34c753fSRafael Auler   Data.getULEB128(&Offset);
767a34c753fSRafael Auler   // Skip data alignment
768a34c753fSRafael Auler   Data.getSLEB128(&Offset);
769a34c753fSRafael Auler   // Skip return address register
7703652483cSRafael Auler   if (Version == 1)
771a34c753fSRafael Auler     Offset += 1;
7723652483cSRafael Auler   else
773a34c753fSRafael Auler     Data.getULEB128(&Offset);
774a34c753fSRafael Auler 
775a34c753fSRafael Auler   uint32_t FDEPointerEncoding = DW_EH_PE_absptr;
776a34c753fSRafael Auler   uint32_t LSDAPointerEncoding = DW_EH_PE_omit;
777a34c753fSRafael Auler   // Walk the augmentation string to get all the augmentation data.
778a34c753fSRafael Auler   for (unsigned i = 0, e = AugmentationString.size(); i != e; ++i) {
779a34c753fSRafael Auler     switch (AugmentationString[i]) {
780a34c753fSRafael Auler     default:
781a34c753fSRafael Auler       return createStringError(
782a34c753fSRafael Auler           errc::invalid_argument,
783a34c753fSRafael Auler           "unknown augmentation character in entry at 0x%" PRIx64, StartOffset);
784a34c753fSRafael Auler     case 'L':
785a34c753fSRafael Auler       LSDAPointerEncoding = Data.getU8(&Offset);
786a34c753fSRafael Auler       break;
787a34c753fSRafael Auler     case 'P': {
788a34c753fSRafael Auler       uint32_t PersonalityEncoding = Data.getU8(&Offset);
78989fab98eSFangrui Song       std::optional<uint64_t> Personality =
790a34c753fSRafael Auler           Data.getEncodedPointer(&Offset, PersonalityEncoding,
791a34c753fSRafael Auler                                  EHFrameAddress ? EHFrameAddress + Offset : 0);
792a34c753fSRafael Auler       // Patch personality address
793a34c753fSRafael Auler       if (Personality)
794a34c753fSRafael Auler         PatcherCallback(*Personality, Offset, PersonalityEncoding);
795a34c753fSRafael Auler       break;
796a34c753fSRafael Auler     }
797a34c753fSRafael Auler     case 'R':
798a34c753fSRafael Auler       FDEPointerEncoding = Data.getU8(&Offset);
799a34c753fSRafael Auler       break;
800a34c753fSRafael Auler     case 'z':
801a34c753fSRafael Auler       if (i)
802a34c753fSRafael Auler         return createStringError(
803a34c753fSRafael Auler             errc::invalid_argument,
804a34c753fSRafael Auler             "'z' must be the first character at 0x%" PRIx64, StartOffset);
805a34c753fSRafael Auler       // Skip augmentation length
806a34c753fSRafael Auler       Data.getULEB128(&Offset);
807a34c753fSRafael Auler       break;
808a34c753fSRafael Auler     case 'S':
809a34c753fSRafael Auler     case 'B':
810a34c753fSRafael Auler       break;
811a34c753fSRafael Auler     }
812a34c753fSRafael Auler   }
813a34c753fSRafael Auler   Entries.emplace_back(std::make_unique<CIEInfo>(
814a34c753fSRafael Auler       FDEPointerEncoding, LSDAPointerEncoding, AugmentationString));
815a34c753fSRafael Auler   CIEs[StartOffset] = &*Entries.back();
816a34c753fSRafael Auler   return Error::success();
817a34c753fSRafael Auler }
818a34c753fSRafael Auler 
819a34c753fSRafael Auler Error EHFrameParser::parseFDE(uint64_t CIEPointer,
820a34c753fSRafael Auler                               uint64_t StartStructureOffset) {
82189fab98eSFangrui Song   std::optional<uint64_t> LSDAAddress;
822a34c753fSRafael Auler   CIEInfo *Cie = CIEs[StartStructureOffset - CIEPointer];
823a34c753fSRafael Auler 
824a34c753fSRafael Auler   // The address size is encoded in the CIE we reference.
825a34c753fSRafael Auler   if (!Cie)
826a34c753fSRafael Auler     return createStringError(errc::invalid_argument,
827a34c753fSRafael Auler                              "parsing FDE data at 0x%" PRIx64
828a34c753fSRafael Auler                              " failed due to missing CIE",
829a34c753fSRafael Auler                              StartStructureOffset);
830a34c753fSRafael Auler   // Patch initial location
831a34c753fSRafael Auler   if (auto Val = Data.getEncodedPointer(&Offset, Cie->FDEPtrEncoding,
832a34c753fSRafael Auler                                         EHFrameAddress + Offset)) {
833a34c753fSRafael Auler     PatcherCallback(*Val, Offset, Cie->FDEPtrEncoding);
834a34c753fSRafael Auler   }
835a34c753fSRafael Auler   // Skip address range
836a34c753fSRafael Auler   Data.getEncodedPointer(&Offset, Cie->FDEPtrEncoding, 0);
837a34c753fSRafael Auler 
838a34c753fSRafael Auler   // Process augmentation data for this FDE.
839a34c753fSRafael Auler   StringRef AugmentationString = Cie->AugmentationString;
840a34c753fSRafael Auler   if (!AugmentationString.empty() && Cie->LSDAPtrEncoding != DW_EH_PE_omit) {
841a34c753fSRafael Auler     // Skip augmentation length
842a34c753fSRafael Auler     Data.getULEB128(&Offset);
843a34c753fSRafael Auler     LSDAAddress =
844a34c753fSRafael Auler         Data.getEncodedPointer(&Offset, Cie->LSDAPtrEncoding,
845a34c753fSRafael Auler                                EHFrameAddress ? Offset + EHFrameAddress : 0);
846a34c753fSRafael Auler     // Patch LSDA address
847a34c753fSRafael Auler     PatcherCallback(*LSDAAddress, Offset, Cie->LSDAPtrEncoding);
848a34c753fSRafael Auler   }
849a34c753fSRafael Auler   return Error::success();
850a34c753fSRafael Auler }
851a34c753fSRafael Auler 
852a34c753fSRafael Auler Error EHFrameParser::parse() {
853a34c753fSRafael Auler   while (Data.isValidOffset(Offset)) {
854a34c753fSRafael Auler     const uint64_t StartOffset = Offset;
855a34c753fSRafael Auler 
856a34c753fSRafael Auler     uint64_t Length;
857a34c753fSRafael Auler     DwarfFormat Format;
858a34c753fSRafael Auler     std::tie(Length, Format) = Data.getInitialLength(&Offset);
859a34c753fSRafael Auler 
860a34c753fSRafael Auler     // If the Length is 0, then this CIE is a terminator
861a34c753fSRafael Auler     if (Length == 0)
862a34c753fSRafael Auler       break;
863a34c753fSRafael Auler 
864a34c753fSRafael Auler     const uint64_t StartStructureOffset = Offset;
865a34c753fSRafael Auler     const uint64_t EndStructureOffset = Offset + Length;
866a34c753fSRafael Auler 
867a34c753fSRafael Auler     Error Err = Error::success();
868a34c753fSRafael Auler     const uint64_t Id = Data.getRelocatedValue(4, &Offset,
869a34c753fSRafael Auler                                                /*SectionIndex=*/nullptr, &Err);
870a34c753fSRafael Auler     if (Err)
871a34c753fSRafael Auler       return Err;
872a34c753fSRafael Auler 
873a34c753fSRafael Auler     if (!Id) {
874a34c753fSRafael Auler       if (Error Err = parseCIE(StartOffset))
875a34c753fSRafael Auler         return Err;
876a34c753fSRafael Auler     } else {
877a34c753fSRafael Auler       if (Error Err = parseFDE(Id, StartStructureOffset))
878a34c753fSRafael Auler         return Err;
879a34c753fSRafael Auler     }
880a34c753fSRafael Auler     Offset = EndStructureOffset;
881a34c753fSRafael Auler   }
882a34c753fSRafael Auler 
883a34c753fSRafael Auler   return Error::success();
884a34c753fSRafael Auler }
885a34c753fSRafael Auler 
886a34c753fSRafael Auler Error EHFrameParser::parse(DWARFDataExtractor Data, uint64_t EHFrameAddress,
887a34c753fSRafael Auler                            PatcherCallbackTy PatcherCallback) {
888a34c753fSRafael Auler   EHFrameParser Parser(Data, EHFrameAddress, PatcherCallback);
889a34c753fSRafael Auler   return Parser.parse();
890a34c753fSRafael Auler }
891a34c753fSRafael Auler 
892a34c753fSRafael Auler } // namespace bolt
893a34c753fSRafael Auler } // namespace llvm
894