1 //===- bolt/Core/Exceptions.cpp - Helpers for C++ exceptions --------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements functions for handling C++ exception meta data. 10 // 11 // Some of the code is taken from examples/ExceptionDemo 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "bolt/Core/Exceptions.h" 16 #include "bolt/Core/BinaryFunction.h" 17 #include "llvm/ADT/ArrayRef.h" 18 #include "llvm/ADT/Twine.h" 19 #include "llvm/BinaryFormat/Dwarf.h" 20 #include "llvm/DebugInfo/DWARF/DWARFDebugFrame.h" 21 #include "llvm/Support/Casting.h" 22 #include "llvm/Support/CommandLine.h" 23 #include "llvm/Support/Debug.h" 24 #include "llvm/Support/Errc.h" 25 #include "llvm/Support/LEB128.h" 26 #include "llvm/Support/MathExtras.h" 27 #include "llvm/Support/raw_ostream.h" 28 #include <map> 29 30 #undef DEBUG_TYPE 31 #define DEBUG_TYPE "bolt-exceptions" 32 33 using namespace llvm::dwarf; 34 35 namespace opts { 36 37 extern llvm::cl::OptionCategory BoltCategory; 38 39 extern llvm::cl::opt<unsigned> Verbosity; 40 41 static llvm::cl::opt<bool> 42 PrintExceptions("print-exceptions", 43 llvm::cl::desc("print exception handling data"), 44 llvm::cl::Hidden, llvm::cl::cat(BoltCategory)); 45 46 } // namespace opts 47 48 namespace llvm { 49 namespace bolt { 50 51 // Read and dump the .gcc_exception_table section entry. 52 // 53 // .gcc_except_table section contains a set of Language-Specific Data Areas - 54 // a fancy name for exception handling tables. There's one LSDA entry per 55 // function. However, we can't actually tell which function LSDA refers to 56 // unless we parse .eh_frame entry that refers to the LSDA. 57 // Then inside LSDA most addresses are encoded relative to the function start, 58 // so we need the function context in order to get to real addresses. 59 // 60 // The best visual representation of the tables comprising LSDA and 61 // relationships between them is illustrated at: 62 // https://github.com/itanium-cxx-abi/cxx-abi/blob/master/exceptions.pdf 63 // Keep in mind that GCC implementation deviates slightly from that document. 64 // 65 // To summarize, there are 4 tables in LSDA: call site table, actions table, 66 // types table, and types index table (for indirection). The main table contains 67 // call site entries. Each call site includes a PC range that can throw an 68 // exception, a handler (landing pad), and a reference to an entry in the action 69 // table. The handler and/or action could be 0. The action entry is a head 70 // of a list of actions associated with a call site. The action table contains 71 // all such lists (it could be optimized to share list tails). Each action could 72 // be either to catch an exception of a given type, to perform a cleanup, or to 73 // propagate the exception after filtering it out (e.g. to make sure function 74 // exception specification is not violated). Catch action contains a reference 75 // to an entry in the type table, and filter action refers to an entry in the 76 // type index table to encode a set of types to filter. 77 // 78 // Call site table follows LSDA header. Action table immediately follows the 79 // call site table. 80 // 81 // Both types table and type index table start at the same location, but they 82 // grow in opposite directions (types go up, indices go down). The beginning of 83 // these tables is encoded in LSDA header. Sizes for both of the tables are not 84 // included anywhere. 85 // 86 // We have to parse all of the tables to determine their sizes. Then we have 87 // to parse the call site table and associate discovered information with 88 // actual call instructions and landing pad blocks. 89 // 90 // For the purpose of rewriting exception handling tables, we can reuse action, 91 // and type index tables in their original binary format. 92 // 93 // Type table could be encoded using position-independent references, and thus 94 // may require relocation. 95 // 96 // Ideally we should be able to re-write LSDA in-place, without the need to 97 // allocate a new space for it. Sadly there's no guarantee that the new call 98 // site table will be the same size as GCC uses uleb encodings for PC offsets. 99 // 100 // Note: some functions have LSDA entries with 0 call site entries. 101 Error BinaryFunction::parseLSDA(ArrayRef<uint8_t> LSDASectionData, 102 uint64_t LSDASectionAddress) { 103 assert(CurrentState == State::Disassembled && "unexpected function state"); 104 105 if (!getLSDAAddress()) 106 return Error::success(); 107 108 DWARFDataExtractor Data( 109 StringRef(reinterpret_cast<const char *>(LSDASectionData.data()), 110 LSDASectionData.size()), 111 BC.DwCtx->getDWARFObj().isLittleEndian(), 112 BC.DwCtx->getDWARFObj().getAddressSize()); 113 uint64_t Offset = getLSDAAddress() - LSDASectionAddress; 114 assert(Data.isValidOffset(Offset) && "wrong LSDA address"); 115 116 const uint8_t LPStartEncoding = Data.getU8(&Offset); 117 uint64_t LPStart = Address; 118 if (LPStartEncoding != dwarf::DW_EH_PE_omit) { 119 std::optional<uint64_t> MaybeLPStart = Data.getEncodedPointer( 120 &Offset, LPStartEncoding, Offset + LSDASectionAddress); 121 if (!MaybeLPStart) { 122 BC.errs() << "BOLT-ERROR: unsupported LPStartEncoding: " 123 << (unsigned)LPStartEncoding << '\n'; 124 return createFatalBOLTError(""); 125 } 126 LPStart = *MaybeLPStart; 127 } 128 129 const uint8_t TTypeEncoding = Data.getU8(&Offset); 130 LSDATypeEncoding = TTypeEncoding; 131 size_t TTypeEncodingSize = 0; 132 uintptr_t TTypeEnd = 0; 133 if (TTypeEncoding != DW_EH_PE_omit) { 134 TTypeEnd = Data.getULEB128(&Offset); 135 TTypeEncodingSize = BC.getDWARFEncodingSize(TTypeEncoding); 136 } 137 138 if (opts::PrintExceptions) { 139 BC.outs() << "[LSDA at 0x" << Twine::utohexstr(getLSDAAddress()) 140 << " for function " << *this << "]:\n"; 141 BC.outs() << "LPStart Encoding = 0x" << Twine::utohexstr(LPStartEncoding) 142 << '\n'; 143 BC.outs() << "LPStart = 0x" << Twine::utohexstr(LPStart) << '\n'; 144 BC.outs() << "TType Encoding = 0x" << Twine::utohexstr(TTypeEncoding) 145 << '\n'; 146 BC.outs() << "TType End = " << TTypeEnd << '\n'; 147 } 148 149 // Table to store list of indices in type table. Entries are uleb128 values. 150 const uint64_t TypeIndexTableStart = Offset + TTypeEnd; 151 152 // Offset past the last decoded index. 153 uint64_t MaxTypeIndexTableOffset = 0; 154 155 // Max positive index used in type table. 156 unsigned MaxTypeIndex = 0; 157 158 // The actual type info table starts at the same location, but grows in 159 // opposite direction. TTypeEncoding is used to encode stored values. 160 const uint64_t TypeTableStart = Offset + TTypeEnd; 161 162 uint8_t CallSiteEncoding = Data.getU8(&Offset); 163 uint32_t CallSiteTableLength = Data.getULEB128(&Offset); 164 uint64_t CallSiteTableStart = Offset; 165 uint64_t CallSiteTableEnd = CallSiteTableStart + CallSiteTableLength; 166 uint64_t CallSitePtr = CallSiteTableStart; 167 uint64_t ActionTableStart = CallSiteTableEnd; 168 169 if (opts::PrintExceptions) { 170 BC.outs() << "CallSite Encoding = " << (unsigned)CallSiteEncoding << '\n'; 171 BC.outs() << "CallSite table length = " << CallSiteTableLength << '\n'; 172 BC.outs() << '\n'; 173 } 174 175 this->HasEHRanges = CallSitePtr < CallSiteTableEnd; 176 const uint64_t RangeBase = getAddress(); 177 while (CallSitePtr < CallSiteTableEnd) { 178 uint64_t Start = *Data.getEncodedPointer(&CallSitePtr, CallSiteEncoding, 179 CallSitePtr + LSDASectionAddress); 180 uint64_t Length = *Data.getEncodedPointer(&CallSitePtr, CallSiteEncoding, 181 CallSitePtr + LSDASectionAddress); 182 uint64_t LandingPad = *Data.getEncodedPointer( 183 &CallSitePtr, CallSiteEncoding, CallSitePtr + LSDASectionAddress); 184 uint64_t ActionEntry = Data.getULEB128(&CallSitePtr); 185 if (LandingPad) 186 LandingPad += LPStart; 187 188 if (opts::PrintExceptions) { 189 BC.outs() << "Call Site: [0x" << Twine::utohexstr(RangeBase + Start) 190 << ", 0x" << Twine::utohexstr(RangeBase + Start + Length) 191 << "); landing pad: 0x" << Twine::utohexstr(LandingPad) 192 << "; action entry: 0x" << Twine::utohexstr(ActionEntry) 193 << "\n"; 194 BC.outs() << " current offset is " << (CallSitePtr - CallSiteTableStart) 195 << '\n'; 196 } 197 198 // Create a handler entry if necessary. 199 MCSymbol *LPSymbol = nullptr; 200 if (LandingPad) { 201 // Verify if landing pad code is located outside current function 202 // Support landing pad to builtin_unreachable 203 if (LandingPad < Address || LandingPad > Address + getSize()) { 204 BinaryFunction *Fragment = 205 BC.getBinaryFunctionContainingAddress(LandingPad); 206 assert(Fragment != nullptr && 207 "BOLT-ERROR: cannot find landing pad fragment"); 208 BC.addInterproceduralReference(this, Fragment->getAddress()); 209 BC.processInterproceduralReferences(); 210 assert(isParentOrChildOf(*Fragment) && 211 "BOLT-ERROR: cannot have landing pads in different functions"); 212 setHasIndirectTargetToSplitFragment(true); 213 BC.addFragmentsToSkip(this); 214 return Error::success(); 215 } 216 217 const uint64_t LPOffset = LandingPad - getAddress(); 218 if (!getInstructionAtOffset(LPOffset)) { 219 if (opts::Verbosity >= 1) 220 BC.errs() << "BOLT-WARNING: landing pad " 221 << Twine::utohexstr(LPOffset) 222 << " not pointing to an instruction in function " << *this 223 << " - ignoring.\n"; 224 } else { 225 auto Label = Labels.find(LPOffset); 226 if (Label != Labels.end()) { 227 LPSymbol = Label->second; 228 } else { 229 LPSymbol = BC.Ctx->createNamedTempSymbol("LP"); 230 Labels[LPOffset] = LPSymbol; 231 } 232 } 233 } 234 235 // Mark all call instructions in the range. 236 auto II = Instructions.find(Start); 237 auto IE = Instructions.end(); 238 assert(II != IE && "exception range not pointing to an instruction"); 239 do { 240 MCInst &Instruction = II->second; 241 if (BC.MIB->isCall(Instruction) && 242 !BC.MIB->getConditionalTailCall(Instruction)) { 243 assert(!BC.MIB->isInvoke(Instruction) && 244 "overlapping exception ranges detected"); 245 // Add extra operands to a call instruction making it an invoke from 246 // now on. 247 BC.MIB->addEHInfo(Instruction, 248 MCPlus::MCLandingPad(LPSymbol, ActionEntry)); 249 } 250 ++II; 251 } while (II != IE && II->first < Start + Length); 252 253 if (ActionEntry != 0) { 254 auto printType = [&](int Index, raw_ostream &OS) { 255 assert(Index > 0 && "only positive indices are valid"); 256 uint64_t TTEntry = TypeTableStart - Index * TTypeEncodingSize; 257 const uint64_t TTEntryAddress = TTEntry + LSDASectionAddress; 258 uint64_t TypeAddress = 259 *Data.getEncodedPointer(&TTEntry, TTypeEncoding, TTEntryAddress); 260 if ((TTypeEncoding & DW_EH_PE_pcrel) && TypeAddress == TTEntryAddress) 261 TypeAddress = 0; 262 if (TypeAddress == 0) { 263 OS << "<all>"; 264 return; 265 } 266 if (TTypeEncoding & DW_EH_PE_indirect) { 267 ErrorOr<uint64_t> PointerOrErr = BC.getPointerAtAddress(TypeAddress); 268 assert(PointerOrErr && "failed to decode indirect address"); 269 TypeAddress = *PointerOrErr; 270 } 271 if (BinaryData *TypeSymBD = BC.getBinaryDataAtAddress(TypeAddress)) 272 OS << TypeSymBD->getName(); 273 else 274 OS << "0x" << Twine::utohexstr(TypeAddress); 275 }; 276 if (opts::PrintExceptions) 277 BC.outs() << " actions: "; 278 uint64_t ActionPtr = ActionTableStart + ActionEntry - 1; 279 int64_t ActionType; 280 int64_t ActionNext; 281 const char *Sep = ""; 282 do { 283 ActionType = Data.getSLEB128(&ActionPtr); 284 const uint32_t Self = ActionPtr; 285 ActionNext = Data.getSLEB128(&ActionPtr); 286 if (opts::PrintExceptions) 287 BC.outs() << Sep << "(" << ActionType << ", " << ActionNext << ") "; 288 if (ActionType == 0) { 289 if (opts::PrintExceptions) 290 BC.outs() << "cleanup"; 291 } else if (ActionType > 0) { 292 // It's an index into a type table. 293 MaxTypeIndex = 294 std::max(MaxTypeIndex, static_cast<unsigned>(ActionType)); 295 if (opts::PrintExceptions) { 296 BC.outs() << "catch type "; 297 printType(ActionType, BC.outs()); 298 } 299 } else { // ActionType < 0 300 if (opts::PrintExceptions) 301 BC.outs() << "filter exception types "; 302 const char *TSep = ""; 303 // ActionType is a negative *byte* offset into *uleb128-encoded* table 304 // of indices with base 1. 305 // E.g. -1 means offset 0, -2 is offset 1, etc. The indices are 306 // encoded using uleb128 thus we cannot directly dereference them. 307 uint64_t TypeIndexTablePtr = TypeIndexTableStart - ActionType - 1; 308 while (uint64_t Index = Data.getULEB128(&TypeIndexTablePtr)) { 309 MaxTypeIndex = std::max(MaxTypeIndex, static_cast<unsigned>(Index)); 310 if (opts::PrintExceptions) { 311 BC.outs() << TSep; 312 printType(Index, BC.outs()); 313 TSep = ", "; 314 } 315 } 316 MaxTypeIndexTableOffset = std::max( 317 MaxTypeIndexTableOffset, TypeIndexTablePtr - TypeIndexTableStart); 318 } 319 320 Sep = "; "; 321 322 ActionPtr = Self + ActionNext; 323 } while (ActionNext); 324 if (opts::PrintExceptions) 325 BC.outs() << '\n'; 326 } 327 } 328 if (opts::PrintExceptions) 329 BC.outs() << '\n'; 330 331 assert(TypeIndexTableStart + MaxTypeIndexTableOffset <= 332 Data.getData().size() && 333 "LSDA entry has crossed section boundary"); 334 335 if (TTypeEnd) { 336 LSDAActionTable = LSDASectionData.slice( 337 ActionTableStart, TypeIndexTableStart - 338 MaxTypeIndex * TTypeEncodingSize - 339 ActionTableStart); 340 for (unsigned Index = 1; Index <= MaxTypeIndex; ++Index) { 341 uint64_t TTEntry = TypeTableStart - Index * TTypeEncodingSize; 342 const uint64_t TTEntryAddress = TTEntry + LSDASectionAddress; 343 uint64_t TypeAddress = 344 *Data.getEncodedPointer(&TTEntry, TTypeEncoding, TTEntryAddress); 345 if ((TTypeEncoding & DW_EH_PE_pcrel) && (TypeAddress == TTEntryAddress)) 346 TypeAddress = 0; 347 if (TTypeEncoding & DW_EH_PE_indirect) { 348 LSDATypeAddressTable.emplace_back(TypeAddress); 349 if (TypeAddress) { 350 ErrorOr<uint64_t> PointerOrErr = BC.getPointerAtAddress(TypeAddress); 351 assert(PointerOrErr && "failed to decode indirect address"); 352 TypeAddress = *PointerOrErr; 353 } 354 } 355 LSDATypeTable.emplace_back(TypeAddress); 356 } 357 LSDATypeIndexTable = 358 LSDASectionData.slice(TypeIndexTableStart, MaxTypeIndexTableOffset); 359 } 360 return Error::success(); 361 } 362 363 void BinaryFunction::updateEHRanges() { 364 if (getSize() == 0) 365 return; 366 367 assert(CurrentState == State::CFG_Finalized && "unexpected state"); 368 369 // Build call sites table. 370 struct EHInfo { 371 const MCSymbol *LP; // landing pad 372 uint64_t Action; 373 }; 374 375 // Sites to update. 376 CallSitesList Sites; 377 378 for (FunctionFragment &FF : getLayout().fragments()) { 379 // If previous call can throw, this is its exception handler. 380 EHInfo PreviousEH = {nullptr, 0}; 381 382 // Marker for the beginning of exceptions range. 383 const MCSymbol *StartRange = nullptr; 384 385 for (BinaryBasicBlock *const BB : FF) { 386 for (MCInst &Instr : *BB) { 387 if (!BC.MIB->isCall(Instr)) 388 continue; 389 390 // Instruction can throw an exception that should be handled. 391 const bool Throws = BC.MIB->isInvoke(Instr); 392 393 // Ignore the call if it's a continuation of a no-throw gap. 394 if (!Throws && !StartRange) 395 continue; 396 397 // Extract exception handling information from the instruction. 398 const MCSymbol *LP = nullptr; 399 uint64_t Action = 0; 400 if (const std::optional<MCPlus::MCLandingPad> EHInfo = 401 BC.MIB->getEHInfo(Instr)) 402 std::tie(LP, Action) = *EHInfo; 403 404 // No action if the exception handler has not changed. 405 if (Throws && StartRange && PreviousEH.LP == LP && 406 PreviousEH.Action == Action) 407 continue; 408 409 // Same symbol is used for the beginning and the end of the range. 410 MCSymbol *EHSymbol; 411 if (MCSymbol *InstrLabel = BC.MIB->getLabel(Instr)) { 412 EHSymbol = InstrLabel; 413 } else { 414 std::unique_lock<llvm::sys::RWMutex> Lock(BC.CtxMutex); 415 EHSymbol = BC.Ctx->createNamedTempSymbol("EH"); 416 BC.MIB->setLabel(Instr, EHSymbol); 417 } 418 419 // At this point we could be in one of the following states: 420 // 421 // I. Exception handler has changed and we need to close previous range 422 // and start a new one. 423 // 424 // II. Start a new exception range after the gap. 425 // 426 // III. Close current exception range and start a new gap. 427 const MCSymbol *EndRange; 428 if (StartRange) { 429 // I, III: 430 EndRange = EHSymbol; 431 } else { 432 // II: 433 StartRange = EHSymbol; 434 EndRange = nullptr; 435 } 436 437 // Close the previous range. 438 if (EndRange) 439 Sites.emplace_back( 440 FF.getFragmentNum(), 441 CallSite{StartRange, EndRange, PreviousEH.LP, PreviousEH.Action}); 442 443 if (Throws) { 444 // I, II: 445 StartRange = EHSymbol; 446 PreviousEH = EHInfo{LP, Action}; 447 } else { 448 StartRange = nullptr; 449 } 450 } 451 } 452 453 // Check if we need to close the range. 454 if (StartRange) { 455 const MCSymbol *EndRange = getFunctionEndLabel(FF.getFragmentNum()); 456 Sites.emplace_back( 457 FF.getFragmentNum(), 458 CallSite{StartRange, EndRange, PreviousEH.LP, PreviousEH.Action}); 459 } 460 } 461 462 addCallSites(Sites); 463 } 464 465 const uint8_t DWARF_CFI_PRIMARY_OPCODE_MASK = 0xc0; 466 467 CFIReaderWriter::CFIReaderWriter(BinaryContext &BC, 468 const DWARFDebugFrame &EHFrame) 469 : BC(BC) { 470 // Prepare FDEs for fast lookup 471 for (const dwarf::FrameEntry &Entry : EHFrame.entries()) { 472 const auto *CurFDE = dyn_cast<dwarf::FDE>(&Entry); 473 // Skip CIEs. 474 if (!CurFDE) 475 continue; 476 // There could me multiple FDEs with the same initial address, and perhaps 477 // different sizes (address ranges). Use the first entry with non-zero size. 478 auto FDEI = FDEs.lower_bound(CurFDE->getInitialLocation()); 479 if (FDEI != FDEs.end() && FDEI->first == CurFDE->getInitialLocation()) { 480 if (CurFDE->getAddressRange()) { 481 if (FDEI->second->getAddressRange() == 0) { 482 FDEI->second = CurFDE; 483 } else if (opts::Verbosity > 0) { 484 BC.errs() << "BOLT-WARNING: different FDEs for function at 0x" 485 << Twine::utohexstr(FDEI->first) 486 << " detected; sizes: " << FDEI->second->getAddressRange() 487 << " and " << CurFDE->getAddressRange() << '\n'; 488 } 489 } 490 } else { 491 FDEs.emplace_hint(FDEI, CurFDE->getInitialLocation(), CurFDE); 492 } 493 } 494 } 495 496 bool CFIReaderWriter::fillCFIInfoFor(BinaryFunction &Function) const { 497 uint64_t Address = Function.getAddress(); 498 auto I = FDEs.find(Address); 499 // Ignore zero-length FDE ranges. 500 if (I == FDEs.end() || !I->second->getAddressRange()) 501 return true; 502 503 const FDE &CurFDE = *I->second; 504 std::optional<uint64_t> LSDA = CurFDE.getLSDAAddress(); 505 Function.setLSDAAddress(LSDA ? *LSDA : 0); 506 507 uint64_t Offset = Function.getFirstInstructionOffset(); 508 uint64_t CodeAlignment = CurFDE.getLinkedCIE()->getCodeAlignmentFactor(); 509 uint64_t DataAlignment = CurFDE.getLinkedCIE()->getDataAlignmentFactor(); 510 if (CurFDE.getLinkedCIE()->getPersonalityAddress()) { 511 Function.setPersonalityFunction( 512 *CurFDE.getLinkedCIE()->getPersonalityAddress()); 513 Function.setPersonalityEncoding( 514 *CurFDE.getLinkedCIE()->getPersonalityEncoding()); 515 } 516 517 auto decodeFrameInstruction = [this, &Function, &Offset, Address, 518 CodeAlignment, DataAlignment]( 519 const CFIProgram::Instruction &Instr) { 520 uint8_t Opcode = Instr.Opcode; 521 if (Opcode & DWARF_CFI_PRIMARY_OPCODE_MASK) 522 Opcode &= DWARF_CFI_PRIMARY_OPCODE_MASK; 523 switch (Instr.Opcode) { 524 case DW_CFA_nop: 525 break; 526 case DW_CFA_advance_loc4: 527 case DW_CFA_advance_loc2: 528 case DW_CFA_advance_loc1: 529 case DW_CFA_advance_loc: 530 // Advance our current address 531 Offset += CodeAlignment * int64_t(Instr.Ops[0]); 532 break; 533 case DW_CFA_offset_extended_sf: 534 Function.addCFIInstruction( 535 Offset, 536 MCCFIInstruction::createOffset( 537 nullptr, Instr.Ops[0], DataAlignment * int64_t(Instr.Ops[1]))); 538 break; 539 case DW_CFA_offset_extended: 540 case DW_CFA_offset: 541 Function.addCFIInstruction( 542 Offset, MCCFIInstruction::createOffset(nullptr, Instr.Ops[0], 543 DataAlignment * Instr.Ops[1])); 544 break; 545 case DW_CFA_restore_extended: 546 case DW_CFA_restore: 547 Function.addCFIInstruction( 548 Offset, MCCFIInstruction::createRestore(nullptr, Instr.Ops[0])); 549 break; 550 case DW_CFA_set_loc: 551 assert(Instr.Ops[0] >= Address && "set_loc out of function bounds"); 552 assert(Instr.Ops[0] <= Address + Function.getSize() && 553 "set_loc out of function bounds"); 554 Offset = Instr.Ops[0] - Address; 555 break; 556 557 case DW_CFA_undefined: 558 Function.addCFIInstruction( 559 Offset, MCCFIInstruction::createUndefined(nullptr, Instr.Ops[0])); 560 break; 561 case DW_CFA_same_value: 562 Function.addCFIInstruction( 563 Offset, MCCFIInstruction::createSameValue(nullptr, Instr.Ops[0])); 564 break; 565 case DW_CFA_register: 566 Function.addCFIInstruction( 567 Offset, MCCFIInstruction::createRegister(nullptr, Instr.Ops[0], 568 Instr.Ops[1])); 569 break; 570 case DW_CFA_remember_state: 571 Function.addCFIInstruction( 572 Offset, MCCFIInstruction::createRememberState(nullptr)); 573 break; 574 case DW_CFA_restore_state: 575 Function.addCFIInstruction(Offset, 576 MCCFIInstruction::createRestoreState(nullptr)); 577 break; 578 case DW_CFA_def_cfa: 579 Function.addCFIInstruction( 580 Offset, 581 MCCFIInstruction::cfiDefCfa(nullptr, Instr.Ops[0], Instr.Ops[1])); 582 break; 583 case DW_CFA_def_cfa_sf: 584 Function.addCFIInstruction( 585 Offset, 586 MCCFIInstruction::cfiDefCfa(nullptr, Instr.Ops[0], 587 DataAlignment * int64_t(Instr.Ops[1]))); 588 break; 589 case DW_CFA_def_cfa_register: 590 Function.addCFIInstruction(Offset, MCCFIInstruction::createDefCfaRegister( 591 nullptr, Instr.Ops[0])); 592 break; 593 case DW_CFA_def_cfa_offset: 594 Function.addCFIInstruction( 595 Offset, MCCFIInstruction::cfiDefCfaOffset(nullptr, Instr.Ops[0])); 596 break; 597 case DW_CFA_def_cfa_offset_sf: 598 Function.addCFIInstruction( 599 Offset, MCCFIInstruction::cfiDefCfaOffset( 600 nullptr, DataAlignment * int64_t(Instr.Ops[0]))); 601 break; 602 case DW_CFA_GNU_args_size: 603 Function.addCFIInstruction( 604 Offset, MCCFIInstruction::createGnuArgsSize(nullptr, Instr.Ops[0])); 605 Function.setUsesGnuArgsSize(); 606 break; 607 case DW_CFA_val_offset_sf: 608 case DW_CFA_val_offset: 609 if (opts::Verbosity >= 1) { 610 BC.errs() << "BOLT-WARNING: DWARF val_offset() unimplemented\n"; 611 } 612 return false; 613 case DW_CFA_def_cfa_expression: 614 case DW_CFA_val_expression: 615 case DW_CFA_expression: { 616 StringRef ExprBytes = Instr.Expression->getData(); 617 std::string Str; 618 raw_string_ostream OS(Str); 619 // Manually encode this instruction using CFI escape 620 OS << Opcode; 621 if (Opcode != DW_CFA_def_cfa_expression) 622 encodeULEB128(Instr.Ops[0], OS); 623 encodeULEB128(ExprBytes.size(), OS); 624 OS << ExprBytes; 625 Function.addCFIInstruction( 626 Offset, MCCFIInstruction::createEscape(nullptr, OS.str())); 627 break; 628 } 629 case DW_CFA_MIPS_advance_loc8: 630 if (opts::Verbosity >= 1) 631 BC.errs() << "BOLT-WARNING: DW_CFA_MIPS_advance_loc unimplemented\n"; 632 return false; 633 case DW_CFA_GNU_window_save: 634 // DW_CFA_GNU_window_save and DW_CFA_GNU_NegateRAState just use the same 635 // id but mean different things. The latter is used in AArch64. 636 if (Function.getBinaryContext().isAArch64()) { 637 Function.addCFIInstruction( 638 Offset, MCCFIInstruction::createNegateRAState(nullptr)); 639 break; 640 } 641 if (opts::Verbosity >= 1) 642 BC.errs() << "BOLT-WARNING: DW_CFA_GNU_window_save unimplemented\n"; 643 return false; 644 case DW_CFA_lo_user: 645 case DW_CFA_hi_user: 646 if (opts::Verbosity >= 1) 647 BC.errs() << "BOLT-WARNING: DW_CFA_*_user unimplemented\n"; 648 return false; 649 default: 650 if (opts::Verbosity >= 1) 651 BC.errs() << "BOLT-WARNING: Unrecognized CFI instruction: " 652 << Instr.Opcode << '\n'; 653 return false; 654 } 655 656 return true; 657 }; 658 659 for (const CFIProgram::Instruction &Instr : CurFDE.getLinkedCIE()->cfis()) 660 if (!decodeFrameInstruction(Instr)) 661 return false; 662 663 for (const CFIProgram::Instruction &Instr : CurFDE.cfis()) 664 if (!decodeFrameInstruction(Instr)) 665 return false; 666 667 return true; 668 } 669 670 std::vector<char> CFIReaderWriter::generateEHFrameHeader( 671 const DWARFDebugFrame &OldEHFrame, const DWARFDebugFrame &NewEHFrame, 672 uint64_t EHFrameHeaderAddress, 673 std::vector<uint64_t> &FailedAddresses) const { 674 // Common PC -> FDE map to be written into .eh_frame_hdr. 675 std::map<uint64_t, uint64_t> PCToFDE; 676 677 // Presort array for binary search. 678 llvm::sort(FailedAddresses); 679 680 // Initialize PCToFDE using NewEHFrame. 681 for (dwarf::FrameEntry &Entry : NewEHFrame.entries()) { 682 const dwarf::FDE *FDE = dyn_cast<dwarf::FDE>(&Entry); 683 if (FDE == nullptr) 684 continue; 685 const uint64_t FuncAddress = FDE->getInitialLocation(); 686 const uint64_t FDEAddress = 687 NewEHFrame.getEHFrameAddress() + FDE->getOffset(); 688 689 // Ignore unused FDEs. 690 if (FuncAddress == 0) 691 continue; 692 693 // Add the address to the map unless we failed to write it. 694 if (!std::binary_search(FailedAddresses.begin(), FailedAddresses.end(), 695 FuncAddress)) { 696 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: FDE for function at 0x" 697 << Twine::utohexstr(FuncAddress) << " is at 0x" 698 << Twine::utohexstr(FDEAddress) << '\n'); 699 PCToFDE[FuncAddress] = FDEAddress; 700 } 701 }; 702 703 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: new .eh_frame contains " 704 << llvm::size(NewEHFrame.entries()) << " entries\n"); 705 706 // Add entries from the original .eh_frame corresponding to the functions 707 // that we did not update. 708 for (const dwarf::FrameEntry &Entry : OldEHFrame) { 709 const dwarf::FDE *FDE = dyn_cast<dwarf::FDE>(&Entry); 710 if (FDE == nullptr) 711 continue; 712 const uint64_t FuncAddress = FDE->getInitialLocation(); 713 const uint64_t FDEAddress = 714 OldEHFrame.getEHFrameAddress() + FDE->getOffset(); 715 716 // Add the address if we failed to write it. 717 if (PCToFDE.count(FuncAddress) == 0) { 718 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: old FDE for function at 0x" 719 << Twine::utohexstr(FuncAddress) << " is at 0x" 720 << Twine::utohexstr(FDEAddress) << '\n'); 721 PCToFDE[FuncAddress] = FDEAddress; 722 } 723 }; 724 725 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: old .eh_frame contains " 726 << llvm::size(OldEHFrame.entries()) << " entries\n"); 727 728 // Generate a new .eh_frame_hdr based on the new map. 729 730 // Header plus table of entries of size 8 bytes. 731 std::vector<char> EHFrameHeader(12 + PCToFDE.size() * 8); 732 733 // Version is 1. 734 EHFrameHeader[0] = 1; 735 // Encoding of the eh_frame pointer. 736 EHFrameHeader[1] = DW_EH_PE_pcrel | DW_EH_PE_sdata4; 737 // Encoding of the count field to follow. 738 EHFrameHeader[2] = DW_EH_PE_udata4; 739 // Encoding of the table entries - 4-byte offset from the start of the header. 740 EHFrameHeader[3] = DW_EH_PE_datarel | DW_EH_PE_sdata4; 741 742 // Address of eh_frame. Use the new one. 743 support::ulittle32_t::ref(EHFrameHeader.data() + 4) = 744 NewEHFrame.getEHFrameAddress() - (EHFrameHeaderAddress + 4); 745 746 // Number of entries in the table (FDE count). 747 support::ulittle32_t::ref(EHFrameHeader.data() + 8) = PCToFDE.size(); 748 749 // Write the table at offset 12. 750 char *Ptr = EHFrameHeader.data(); 751 uint32_t Offset = 12; 752 for (const auto &PCI : PCToFDE) { 753 int64_t InitialPCOffset = PCI.first - EHFrameHeaderAddress; 754 assert(isInt<32>(InitialPCOffset) && "PC offset out of bounds"); 755 support::ulittle32_t::ref(Ptr + Offset) = InitialPCOffset; 756 Offset += 4; 757 int64_t FDEOffset = PCI.second - EHFrameHeaderAddress; 758 assert(isInt<32>(FDEOffset) && "FDE offset out of bounds"); 759 support::ulittle32_t::ref(Ptr + Offset) = FDEOffset; 760 Offset += 4; 761 } 762 763 return EHFrameHeader; 764 } 765 766 Error EHFrameParser::parseCIE(uint64_t StartOffset) { 767 uint8_t Version = Data.getU8(&Offset); 768 const char *Augmentation = Data.getCStr(&Offset); 769 StringRef AugmentationString(Augmentation ? Augmentation : ""); 770 uint8_t AddressSize = 771 Version < 4 ? Data.getAddressSize() : Data.getU8(&Offset); 772 Data.setAddressSize(AddressSize); 773 // Skip segment descriptor size 774 if (Version >= 4) 775 Offset += 1; 776 // Skip code alignment factor 777 Data.getULEB128(&Offset); 778 // Skip data alignment 779 Data.getSLEB128(&Offset); 780 // Skip return address register 781 if (Version == 1) 782 Offset += 1; 783 else 784 Data.getULEB128(&Offset); 785 786 uint32_t FDEPointerEncoding = DW_EH_PE_absptr; 787 uint32_t LSDAPointerEncoding = DW_EH_PE_omit; 788 // Walk the augmentation string to get all the augmentation data. 789 for (unsigned i = 0, e = AugmentationString.size(); i != e; ++i) { 790 switch (AugmentationString[i]) { 791 default: 792 return createStringError( 793 errc::invalid_argument, 794 "unknown augmentation character in entry at 0x%" PRIx64, StartOffset); 795 case 'L': 796 LSDAPointerEncoding = Data.getU8(&Offset); 797 break; 798 case 'P': { 799 uint32_t PersonalityEncoding = Data.getU8(&Offset); 800 std::optional<uint64_t> Personality = 801 Data.getEncodedPointer(&Offset, PersonalityEncoding, 802 EHFrameAddress ? EHFrameAddress + Offset : 0); 803 // Patch personality address 804 if (Personality) 805 PatcherCallback(*Personality, Offset, PersonalityEncoding); 806 break; 807 } 808 case 'R': 809 FDEPointerEncoding = Data.getU8(&Offset); 810 break; 811 case 'z': 812 if (i) 813 return createStringError( 814 errc::invalid_argument, 815 "'z' must be the first character at 0x%" PRIx64, StartOffset); 816 // Skip augmentation length 817 Data.getULEB128(&Offset); 818 break; 819 case 'S': 820 case 'B': 821 break; 822 } 823 } 824 Entries.emplace_back(std::make_unique<CIEInfo>( 825 FDEPointerEncoding, LSDAPointerEncoding, AugmentationString)); 826 CIEs[StartOffset] = &*Entries.back(); 827 return Error::success(); 828 } 829 830 Error EHFrameParser::parseFDE(uint64_t CIEPointer, 831 uint64_t StartStructureOffset) { 832 std::optional<uint64_t> LSDAAddress; 833 CIEInfo *Cie = CIEs[StartStructureOffset - CIEPointer]; 834 835 // The address size is encoded in the CIE we reference. 836 if (!Cie) 837 return createStringError(errc::invalid_argument, 838 "parsing FDE data at 0x%" PRIx64 839 " failed due to missing CIE", 840 StartStructureOffset); 841 // Patch initial location 842 if (auto Val = Data.getEncodedPointer(&Offset, Cie->FDEPtrEncoding, 843 EHFrameAddress + Offset)) { 844 PatcherCallback(*Val, Offset, Cie->FDEPtrEncoding); 845 } 846 // Skip address range 847 Data.getEncodedPointer(&Offset, Cie->FDEPtrEncoding, 0); 848 849 // Process augmentation data for this FDE. 850 StringRef AugmentationString = Cie->AugmentationString; 851 if (!AugmentationString.empty() && Cie->LSDAPtrEncoding != DW_EH_PE_omit) { 852 // Skip augmentation length 853 Data.getULEB128(&Offset); 854 LSDAAddress = 855 Data.getEncodedPointer(&Offset, Cie->LSDAPtrEncoding, 856 EHFrameAddress ? Offset + EHFrameAddress : 0); 857 // Patch LSDA address 858 PatcherCallback(*LSDAAddress, Offset, Cie->LSDAPtrEncoding); 859 } 860 return Error::success(); 861 } 862 863 Error EHFrameParser::parse() { 864 while (Data.isValidOffset(Offset)) { 865 const uint64_t StartOffset = Offset; 866 867 uint64_t Length; 868 DwarfFormat Format; 869 std::tie(Length, Format) = Data.getInitialLength(&Offset); 870 871 // If the Length is 0, then this CIE is a terminator 872 if (Length == 0) 873 break; 874 875 const uint64_t StartStructureOffset = Offset; 876 const uint64_t EndStructureOffset = Offset + Length; 877 878 Error Err = Error::success(); 879 const uint64_t Id = Data.getRelocatedValue(4, &Offset, 880 /*SectionIndex=*/nullptr, &Err); 881 if (Err) 882 return Err; 883 884 if (!Id) { 885 if (Error Err = parseCIE(StartOffset)) 886 return Err; 887 } else { 888 if (Error Err = parseFDE(Id, StartStructureOffset)) 889 return Err; 890 } 891 Offset = EndStructureOffset; 892 } 893 894 return Error::success(); 895 } 896 897 Error EHFrameParser::parse(DWARFDataExtractor Data, uint64_t EHFrameAddress, 898 PatcherCallbackTy PatcherCallback) { 899 EHFrameParser Parser(Data, EHFrameAddress, PatcherCallback); 900 return Parser.parse(); 901 } 902 903 } // namespace bolt 904 } // namespace llvm 905