1 //===- bolt/Core/Exceptions.cpp - Helpers for C++ exceptions --------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements functions for handling C++ exception meta data. 10 // 11 // Some of the code is taken from examples/ExceptionDemo 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "bolt/Core/Exceptions.h" 16 #include "bolt/Core/BinaryFunction.h" 17 #include "llvm/ADT/ArrayRef.h" 18 #include "llvm/ADT/Twine.h" 19 #include "llvm/BinaryFormat/Dwarf.h" 20 #include "llvm/DebugInfo/DWARF/DWARFDebugFrame.h" 21 #include "llvm/Support/Casting.h" 22 #include "llvm/Support/CommandLine.h" 23 #include "llvm/Support/Debug.h" 24 #include "llvm/Support/Errc.h" 25 #include "llvm/Support/LEB128.h" 26 #include "llvm/Support/MathExtras.h" 27 #include "llvm/Support/raw_ostream.h" 28 #include <map> 29 30 #undef DEBUG_TYPE 31 #define DEBUG_TYPE "bolt-exceptions" 32 33 using namespace llvm::dwarf; 34 35 namespace opts { 36 37 extern llvm::cl::OptionCategory BoltCategory; 38 39 extern llvm::cl::opt<unsigned> Verbosity; 40 41 static llvm::cl::opt<bool> 42 PrintExceptions("print-exceptions", 43 llvm::cl::desc("print exception handling data"), 44 llvm::cl::Hidden, llvm::cl::cat(BoltCategory)); 45 46 } // namespace opts 47 48 namespace llvm { 49 namespace bolt { 50 51 // Read and dump the .gcc_exception_table section entry. 52 // 53 // .gcc_except_table section contains a set of Language-Specific Data Areas - 54 // a fancy name for exception handling tables. There's one LSDA entry per 55 // function. However, we can't actually tell which function LSDA refers to 56 // unless we parse .eh_frame entry that refers to the LSDA. 57 // Then inside LSDA most addresses are encoded relative to the function start, 58 // so we need the function context in order to get to real addresses. 59 // 60 // The best visual representation of the tables comprising LSDA and 61 // relationships between them is illustrated at: 62 // https://github.com/itanium-cxx-abi/cxx-abi/blob/master/exceptions.pdf 63 // Keep in mind that GCC implementation deviates slightly from that document. 64 // 65 // To summarize, there are 4 tables in LSDA: call site table, actions table, 66 // types table, and types index table (for indirection). The main table contains 67 // call site entries. Each call site includes a PC range that can throw an 68 // exception, a handler (landing pad), and a reference to an entry in the action 69 // table. The handler and/or action could be 0. The action entry is a head 70 // of a list of actions associated with a call site. The action table contains 71 // all such lists (it could be optimized to share list tails). Each action could 72 // be either to catch an exception of a given type, to perform a cleanup, or to 73 // propagate the exception after filtering it out (e.g. to make sure function 74 // exception specification is not violated). Catch action contains a reference 75 // to an entry in the type table, and filter action refers to an entry in the 76 // type index table to encode a set of types to filter. 77 // 78 // Call site table follows LSDA header. Action table immediately follows the 79 // call site table. 80 // 81 // Both types table and type index table start at the same location, but they 82 // grow in opposite directions (types go up, indices go down). The beginning of 83 // these tables is encoded in LSDA header. Sizes for both of the tables are not 84 // included anywhere. 85 // 86 // We have to parse all of the tables to determine their sizes. Then we have 87 // to parse the call site table and associate discovered information with 88 // actual call instructions and landing pad blocks. 89 // 90 // For the purpose of rewriting exception handling tables, we can reuse action, 91 // and type index tables in their original binary format. 92 // 93 // Type table could be encoded using position-independent references, and thus 94 // may require relocation. 95 // 96 // Ideally we should be able to re-write LSDA in-place, without the need to 97 // allocate a new space for it. Sadly there's no guarantee that the new call 98 // site table will be the same size as GCC uses uleb encodings for PC offsets. 99 // 100 // Note: some functions have LSDA entries with 0 call site entries. 101 void BinaryFunction::parseLSDA(ArrayRef<uint8_t> LSDASectionData, 102 uint64_t LSDASectionAddress) { 103 assert(CurrentState == State::Disassembled && "unexpected function state"); 104 105 if (!getLSDAAddress()) 106 return; 107 108 DWARFDataExtractor Data( 109 StringRef(reinterpret_cast<const char *>(LSDASectionData.data()), 110 LSDASectionData.size()), 111 BC.DwCtx->getDWARFObj().isLittleEndian(), 8); 112 uint64_t Offset = getLSDAAddress() - LSDASectionAddress; 113 assert(Data.isValidOffset(Offset) && "wrong LSDA address"); 114 115 uint8_t LPStartEncoding = Data.getU8(&Offset); 116 uint64_t LPStart = 0; 117 // Convert to offset if LPStartEncoding is typed absptr DW_EH_PE_absptr 118 if (std::optional<uint64_t> MaybeLPStart = Data.getEncodedPointer( 119 &Offset, LPStartEncoding, Offset + LSDASectionAddress)) 120 LPStart = (LPStartEncoding && 0xFF == 0) ? *MaybeLPStart 121 : *MaybeLPStart - Address; 122 123 const uint8_t TTypeEncoding = Data.getU8(&Offset); 124 LSDATypeEncoding = TTypeEncoding; 125 size_t TTypeEncodingSize = 0; 126 uintptr_t TTypeEnd = 0; 127 if (TTypeEncoding != DW_EH_PE_omit) { 128 TTypeEnd = Data.getULEB128(&Offset); 129 TTypeEncodingSize = BC.getDWARFEncodingSize(TTypeEncoding); 130 } 131 132 if (opts::PrintExceptions) { 133 outs() << "[LSDA at 0x" << Twine::utohexstr(getLSDAAddress()) 134 << " for function " << *this << "]:\n"; 135 outs() << "LPStart Encoding = 0x" << Twine::utohexstr(LPStartEncoding) 136 << '\n'; 137 outs() << "LPStart = 0x" << Twine::utohexstr(LPStart) << '\n'; 138 outs() << "TType Encoding = 0x" << Twine::utohexstr(TTypeEncoding) << '\n'; 139 outs() << "TType End = " << TTypeEnd << '\n'; 140 } 141 142 // Table to store list of indices in type table. Entries are uleb128 values. 143 const uint64_t TypeIndexTableStart = Offset + TTypeEnd; 144 145 // Offset past the last decoded index. 146 uint64_t MaxTypeIndexTableOffset = 0; 147 148 // Max positive index used in type table. 149 unsigned MaxTypeIndex = 0; 150 151 // The actual type info table starts at the same location, but grows in 152 // opposite direction. TTypeEncoding is used to encode stored values. 153 const uint64_t TypeTableStart = Offset + TTypeEnd; 154 155 uint8_t CallSiteEncoding = Data.getU8(&Offset); 156 uint32_t CallSiteTableLength = Data.getULEB128(&Offset); 157 uint64_t CallSiteTableStart = Offset; 158 uint64_t CallSiteTableEnd = CallSiteTableStart + CallSiteTableLength; 159 uint64_t CallSitePtr = CallSiteTableStart; 160 uint64_t ActionTableStart = CallSiteTableEnd; 161 162 if (opts::PrintExceptions) { 163 outs() << "CallSite Encoding = " << (unsigned)CallSiteEncoding << '\n'; 164 outs() << "CallSite table length = " << CallSiteTableLength << '\n'; 165 outs() << '\n'; 166 } 167 168 this->HasEHRanges = CallSitePtr < CallSiteTableEnd; 169 const uint64_t RangeBase = getAddress(); 170 while (CallSitePtr < CallSiteTableEnd) { 171 uint64_t Start = *Data.getEncodedPointer(&CallSitePtr, CallSiteEncoding, 172 CallSitePtr + LSDASectionAddress); 173 uint64_t Length = *Data.getEncodedPointer(&CallSitePtr, CallSiteEncoding, 174 CallSitePtr + LSDASectionAddress); 175 uint64_t LandingPad = *Data.getEncodedPointer( 176 &CallSitePtr, CallSiteEncoding, CallSitePtr + LSDASectionAddress); 177 uint64_t ActionEntry = Data.getULEB128(&CallSitePtr); 178 179 uint64_t LPOffset = LPStart + LandingPad; 180 uint64_t LPAddress = Address + LPOffset; 181 182 // Verify if landing pad code is located outside current function 183 // Support landing pad to builtin_unreachable 184 if (LPAddress < Address || LPAddress > Address + getSize()) { 185 BinaryFunction *Fragment = 186 BC.getBinaryFunctionContainingAddress(LPAddress); 187 assert(Fragment != nullptr && 188 "BOLT-ERROR: cannot find landing pad fragment"); 189 BC.addInterproceduralReference(this, Fragment->getAddress()); 190 BC.processInterproceduralReferences(); 191 assert((isChildOf(*Fragment) || Fragment->isChildOf(*this)) && 192 "BOLT-ERROR: cannot have landing pads in different " 193 "functions"); 194 setHasIndirectTargetToSplitFragment(true); 195 BC.addFragmentsToSkip(this); 196 return; 197 } 198 199 if (opts::PrintExceptions) { 200 outs() << "Call Site: [0x" << Twine::utohexstr(RangeBase + Start) 201 << ", 0x" << Twine::utohexstr(RangeBase + Start + Length) 202 << "); landing pad: 0x" << Twine::utohexstr(LPOffset) 203 << "; action entry: 0x" << Twine::utohexstr(ActionEntry) << "\n"; 204 outs() << " current offset is " << (CallSitePtr - CallSiteTableStart) 205 << '\n'; 206 } 207 208 // Create a handler entry if necessary. 209 MCSymbol *LPSymbol = nullptr; 210 if (LPOffset) { 211 if (!getInstructionAtOffset(LPOffset)) { 212 if (opts::Verbosity >= 1) 213 errs() << "BOLT-WARNING: landing pad " << Twine::utohexstr(LPOffset) 214 << " not pointing to an instruction in function " << *this 215 << " - ignoring.\n"; 216 } else { 217 auto Label = Labels.find(LPOffset); 218 if (Label != Labels.end()) { 219 LPSymbol = Label->second; 220 } else { 221 LPSymbol = BC.Ctx->createNamedTempSymbol("LP"); 222 Labels[LPOffset] = LPSymbol; 223 } 224 } 225 } 226 227 // Mark all call instructions in the range. 228 auto II = Instructions.find(Start); 229 auto IE = Instructions.end(); 230 assert(II != IE && "exception range not pointing to an instruction"); 231 do { 232 MCInst &Instruction = II->second; 233 if (BC.MIB->isCall(Instruction) && 234 !BC.MIB->getConditionalTailCall(Instruction)) { 235 assert(!BC.MIB->isInvoke(Instruction) && 236 "overlapping exception ranges detected"); 237 // Add extra operands to a call instruction making it an invoke from 238 // now on. 239 BC.MIB->addEHInfo(Instruction, 240 MCPlus::MCLandingPad(LPSymbol, ActionEntry)); 241 } 242 ++II; 243 } while (II != IE && II->first < Start + Length); 244 245 if (ActionEntry != 0) { 246 auto printType = [&](int Index, raw_ostream &OS) { 247 assert(Index > 0 && "only positive indices are valid"); 248 uint64_t TTEntry = TypeTableStart - Index * TTypeEncodingSize; 249 const uint64_t TTEntryAddress = TTEntry + LSDASectionAddress; 250 uint64_t TypeAddress = 251 *Data.getEncodedPointer(&TTEntry, TTypeEncoding, TTEntryAddress); 252 if ((TTypeEncoding & DW_EH_PE_pcrel) && TypeAddress == TTEntryAddress) 253 TypeAddress = 0; 254 if (TypeAddress == 0) { 255 OS << "<all>"; 256 return; 257 } 258 if (TTypeEncoding & DW_EH_PE_indirect) { 259 ErrorOr<uint64_t> PointerOrErr = BC.getPointerAtAddress(TypeAddress); 260 assert(PointerOrErr && "failed to decode indirect address"); 261 TypeAddress = *PointerOrErr; 262 } 263 if (BinaryData *TypeSymBD = BC.getBinaryDataAtAddress(TypeAddress)) 264 OS << TypeSymBD->getName(); 265 else 266 OS << "0x" << Twine::utohexstr(TypeAddress); 267 }; 268 if (opts::PrintExceptions) 269 outs() << " actions: "; 270 uint64_t ActionPtr = ActionTableStart + ActionEntry - 1; 271 int64_t ActionType; 272 int64_t ActionNext; 273 const char *Sep = ""; 274 do { 275 ActionType = Data.getSLEB128(&ActionPtr); 276 const uint32_t Self = ActionPtr; 277 ActionNext = Data.getSLEB128(&ActionPtr); 278 if (opts::PrintExceptions) 279 outs() << Sep << "(" << ActionType << ", " << ActionNext << ") "; 280 if (ActionType == 0) { 281 if (opts::PrintExceptions) 282 outs() << "cleanup"; 283 } else if (ActionType > 0) { 284 // It's an index into a type table. 285 MaxTypeIndex = 286 std::max(MaxTypeIndex, static_cast<unsigned>(ActionType)); 287 if (opts::PrintExceptions) { 288 outs() << "catch type "; 289 printType(ActionType, outs()); 290 } 291 } else { // ActionType < 0 292 if (opts::PrintExceptions) 293 outs() << "filter exception types "; 294 const char *TSep = ""; 295 // ActionType is a negative *byte* offset into *uleb128-encoded* table 296 // of indices with base 1. 297 // E.g. -1 means offset 0, -2 is offset 1, etc. The indices are 298 // encoded using uleb128 thus we cannot directly dereference them. 299 uint64_t TypeIndexTablePtr = TypeIndexTableStart - ActionType - 1; 300 while (uint64_t Index = Data.getULEB128(&TypeIndexTablePtr)) { 301 MaxTypeIndex = std::max(MaxTypeIndex, static_cast<unsigned>(Index)); 302 if (opts::PrintExceptions) { 303 outs() << TSep; 304 printType(Index, outs()); 305 TSep = ", "; 306 } 307 } 308 MaxTypeIndexTableOffset = std::max( 309 MaxTypeIndexTableOffset, TypeIndexTablePtr - TypeIndexTableStart); 310 } 311 312 Sep = "; "; 313 314 ActionPtr = Self + ActionNext; 315 } while (ActionNext); 316 if (opts::PrintExceptions) 317 outs() << '\n'; 318 } 319 } 320 if (opts::PrintExceptions) 321 outs() << '\n'; 322 323 assert(TypeIndexTableStart + MaxTypeIndexTableOffset <= 324 Data.getData().size() && 325 "LSDA entry has crossed section boundary"); 326 327 if (TTypeEnd) { 328 LSDAActionTable = LSDASectionData.slice( 329 ActionTableStart, TypeIndexTableStart - 330 MaxTypeIndex * TTypeEncodingSize - 331 ActionTableStart); 332 for (unsigned Index = 1; Index <= MaxTypeIndex; ++Index) { 333 uint64_t TTEntry = TypeTableStart - Index * TTypeEncodingSize; 334 const uint64_t TTEntryAddress = TTEntry + LSDASectionAddress; 335 uint64_t TypeAddress = 336 *Data.getEncodedPointer(&TTEntry, TTypeEncoding, TTEntryAddress); 337 if ((TTypeEncoding & DW_EH_PE_pcrel) && (TypeAddress == TTEntryAddress)) 338 TypeAddress = 0; 339 if (TTypeEncoding & DW_EH_PE_indirect) { 340 LSDATypeAddressTable.emplace_back(TypeAddress); 341 if (TypeAddress) { 342 ErrorOr<uint64_t> PointerOrErr = BC.getPointerAtAddress(TypeAddress); 343 assert(PointerOrErr && "failed to decode indirect address"); 344 TypeAddress = *PointerOrErr; 345 } 346 } 347 LSDATypeTable.emplace_back(TypeAddress); 348 } 349 LSDATypeIndexTable = 350 LSDASectionData.slice(TypeIndexTableStart, MaxTypeIndexTableOffset); 351 } 352 } 353 354 void BinaryFunction::updateEHRanges() { 355 if (getSize() == 0) 356 return; 357 358 assert(CurrentState == State::CFG_Finalized && "unexpected state"); 359 360 // Build call sites table. 361 struct EHInfo { 362 const MCSymbol *LP; // landing pad 363 uint64_t Action; 364 }; 365 366 // Sites to update. 367 CallSitesList Sites; 368 369 for (FunctionFragment &FF : getLayout().fragments()) { 370 // If previous call can throw, this is its exception handler. 371 EHInfo PreviousEH = {nullptr, 0}; 372 373 // Marker for the beginning of exceptions range. 374 const MCSymbol *StartRange = nullptr; 375 376 for (BinaryBasicBlock *const BB : FF) { 377 for (auto II = BB->begin(); II != BB->end(); ++II) { 378 if (!BC.MIB->isCall(*II)) 379 continue; 380 381 // Instruction can throw an exception that should be handled. 382 const bool Throws = BC.MIB->isInvoke(*II); 383 384 // Ignore the call if it's a continuation of a no-throw gap. 385 if (!Throws && !StartRange) 386 continue; 387 388 // Extract exception handling information from the instruction. 389 const MCSymbol *LP = nullptr; 390 uint64_t Action = 0; 391 if (const std::optional<MCPlus::MCLandingPad> EHInfo = 392 BC.MIB->getEHInfo(*II)) 393 std::tie(LP, Action) = *EHInfo; 394 395 // No action if the exception handler has not changed. 396 if (Throws && StartRange && PreviousEH.LP == LP && 397 PreviousEH.Action == Action) 398 continue; 399 400 // Same symbol is used for the beginning and the end of the range. 401 const MCSymbol *EHSymbol; 402 MCInst EHLabel; 403 { 404 std::unique_lock<llvm::sys::RWMutex> Lock(BC.CtxMutex); 405 EHSymbol = BC.Ctx->createNamedTempSymbol("EH"); 406 BC.MIB->createEHLabel(EHLabel, EHSymbol, BC.Ctx.get()); 407 } 408 409 II = std::next(BB->insertPseudoInstr(II, EHLabel)); 410 411 // At this point we could be in one of the following states: 412 // 413 // I. Exception handler has changed and we need to close previous range 414 // and start a new one. 415 // 416 // II. Start a new exception range after the gap. 417 // 418 // III. Close current exception range and start a new gap. 419 const MCSymbol *EndRange; 420 if (StartRange) { 421 // I, III: 422 EndRange = EHSymbol; 423 } else { 424 // II: 425 StartRange = EHSymbol; 426 EndRange = nullptr; 427 } 428 429 // Close the previous range. 430 if (EndRange) 431 Sites.emplace_back( 432 FF.getFragmentNum(), 433 CallSite{StartRange, EndRange, PreviousEH.LP, PreviousEH.Action}); 434 435 if (Throws) { 436 // I, II: 437 StartRange = EHSymbol; 438 PreviousEH = EHInfo{LP, Action}; 439 } else { 440 StartRange = nullptr; 441 } 442 } 443 } 444 445 // Check if we need to close the range. 446 if (StartRange) { 447 const MCSymbol *EndRange = getFunctionEndLabel(FF.getFragmentNum()); 448 Sites.emplace_back( 449 FF.getFragmentNum(), 450 CallSite{StartRange, EndRange, PreviousEH.LP, PreviousEH.Action}); 451 } 452 } 453 454 addCallSites(Sites); 455 } 456 457 const uint8_t DWARF_CFI_PRIMARY_OPCODE_MASK = 0xc0; 458 459 CFIReaderWriter::CFIReaderWriter(const DWARFDebugFrame &EHFrame) { 460 // Prepare FDEs for fast lookup 461 for (const dwarf::FrameEntry &Entry : EHFrame.entries()) { 462 const auto *CurFDE = dyn_cast<dwarf::FDE>(&Entry); 463 // Skip CIEs. 464 if (!CurFDE) 465 continue; 466 // There could me multiple FDEs with the same initial address, and perhaps 467 // different sizes (address ranges). Use the first entry with non-zero size. 468 auto FDEI = FDEs.lower_bound(CurFDE->getInitialLocation()); 469 if (FDEI != FDEs.end() && FDEI->first == CurFDE->getInitialLocation()) { 470 if (CurFDE->getAddressRange()) { 471 if (FDEI->second->getAddressRange() == 0) { 472 FDEI->second = CurFDE; 473 } else if (opts::Verbosity > 0) { 474 errs() << "BOLT-WARNING: different FDEs for function at 0x" 475 << Twine::utohexstr(FDEI->first) 476 << " detected; sizes: " << FDEI->second->getAddressRange() 477 << " and " << CurFDE->getAddressRange() << '\n'; 478 } 479 } 480 } else { 481 FDEs.emplace_hint(FDEI, CurFDE->getInitialLocation(), CurFDE); 482 } 483 } 484 } 485 486 bool CFIReaderWriter::fillCFIInfoFor(BinaryFunction &Function) const { 487 uint64_t Address = Function.getAddress(); 488 auto I = FDEs.find(Address); 489 // Ignore zero-length FDE ranges. 490 if (I == FDEs.end() || !I->second->getAddressRange()) 491 return true; 492 493 const FDE &CurFDE = *I->second; 494 std::optional<uint64_t> LSDA = CurFDE.getLSDAAddress(); 495 Function.setLSDAAddress(LSDA ? *LSDA : 0); 496 497 uint64_t Offset = Function.getFirstInstructionOffset(); 498 uint64_t CodeAlignment = CurFDE.getLinkedCIE()->getCodeAlignmentFactor(); 499 uint64_t DataAlignment = CurFDE.getLinkedCIE()->getDataAlignmentFactor(); 500 if (CurFDE.getLinkedCIE()->getPersonalityAddress()) { 501 Function.setPersonalityFunction( 502 *CurFDE.getLinkedCIE()->getPersonalityAddress()); 503 Function.setPersonalityEncoding( 504 *CurFDE.getLinkedCIE()->getPersonalityEncoding()); 505 } 506 507 auto decodeFrameInstruction = [&Function, &Offset, Address, CodeAlignment, 508 DataAlignment]( 509 const CFIProgram::Instruction &Instr) { 510 uint8_t Opcode = Instr.Opcode; 511 if (Opcode & DWARF_CFI_PRIMARY_OPCODE_MASK) 512 Opcode &= DWARF_CFI_PRIMARY_OPCODE_MASK; 513 switch (Instr.Opcode) { 514 case DW_CFA_nop: 515 break; 516 case DW_CFA_advance_loc4: 517 case DW_CFA_advance_loc2: 518 case DW_CFA_advance_loc1: 519 case DW_CFA_advance_loc: 520 // Advance our current address 521 Offset += CodeAlignment * int64_t(Instr.Ops[0]); 522 break; 523 case DW_CFA_offset_extended_sf: 524 Function.addCFIInstruction( 525 Offset, 526 MCCFIInstruction::createOffset( 527 nullptr, Instr.Ops[0], DataAlignment * int64_t(Instr.Ops[1]))); 528 break; 529 case DW_CFA_offset_extended: 530 case DW_CFA_offset: 531 Function.addCFIInstruction( 532 Offset, MCCFIInstruction::createOffset(nullptr, Instr.Ops[0], 533 DataAlignment * Instr.Ops[1])); 534 break; 535 case DW_CFA_restore_extended: 536 case DW_CFA_restore: 537 Function.addCFIInstruction( 538 Offset, MCCFIInstruction::createRestore(nullptr, Instr.Ops[0])); 539 break; 540 case DW_CFA_set_loc: 541 assert(Instr.Ops[0] >= Address && "set_loc out of function bounds"); 542 assert(Instr.Ops[0] <= Address + Function.getSize() && 543 "set_loc out of function bounds"); 544 Offset = Instr.Ops[0] - Address; 545 break; 546 547 case DW_CFA_undefined: 548 Function.addCFIInstruction( 549 Offset, MCCFIInstruction::createUndefined(nullptr, Instr.Ops[0])); 550 break; 551 case DW_CFA_same_value: 552 Function.addCFIInstruction( 553 Offset, MCCFIInstruction::createSameValue(nullptr, Instr.Ops[0])); 554 break; 555 case DW_CFA_register: 556 Function.addCFIInstruction( 557 Offset, MCCFIInstruction::createRegister(nullptr, Instr.Ops[0], 558 Instr.Ops[1])); 559 break; 560 case DW_CFA_remember_state: 561 Function.addCFIInstruction( 562 Offset, MCCFIInstruction::createRememberState(nullptr)); 563 break; 564 case DW_CFA_restore_state: 565 Function.addCFIInstruction(Offset, 566 MCCFIInstruction::createRestoreState(nullptr)); 567 break; 568 case DW_CFA_def_cfa: 569 Function.addCFIInstruction( 570 Offset, 571 MCCFIInstruction::cfiDefCfa(nullptr, Instr.Ops[0], Instr.Ops[1])); 572 break; 573 case DW_CFA_def_cfa_sf: 574 Function.addCFIInstruction( 575 Offset, 576 MCCFIInstruction::cfiDefCfa(nullptr, Instr.Ops[0], 577 DataAlignment * int64_t(Instr.Ops[1]))); 578 break; 579 case DW_CFA_def_cfa_register: 580 Function.addCFIInstruction(Offset, MCCFIInstruction::createDefCfaRegister( 581 nullptr, Instr.Ops[0])); 582 break; 583 case DW_CFA_def_cfa_offset: 584 Function.addCFIInstruction( 585 Offset, MCCFIInstruction::cfiDefCfaOffset(nullptr, Instr.Ops[0])); 586 break; 587 case DW_CFA_def_cfa_offset_sf: 588 Function.addCFIInstruction( 589 Offset, MCCFIInstruction::cfiDefCfaOffset( 590 nullptr, DataAlignment * int64_t(Instr.Ops[0]))); 591 break; 592 case DW_CFA_GNU_args_size: 593 Function.addCFIInstruction( 594 Offset, MCCFIInstruction::createGnuArgsSize(nullptr, Instr.Ops[0])); 595 Function.setUsesGnuArgsSize(); 596 break; 597 case DW_CFA_val_offset_sf: 598 case DW_CFA_val_offset: 599 if (opts::Verbosity >= 1) { 600 errs() << "BOLT-WARNING: DWARF val_offset() unimplemented\n"; 601 } 602 return false; 603 case DW_CFA_def_cfa_expression: 604 case DW_CFA_val_expression: 605 case DW_CFA_expression: { 606 StringRef ExprBytes = Instr.Expression->getData(); 607 std::string Str; 608 raw_string_ostream OS(Str); 609 // Manually encode this instruction using CFI escape 610 OS << Opcode; 611 if (Opcode != DW_CFA_def_cfa_expression) 612 encodeULEB128(Instr.Ops[0], OS); 613 encodeULEB128(ExprBytes.size(), OS); 614 OS << ExprBytes; 615 Function.addCFIInstruction( 616 Offset, MCCFIInstruction::createEscape(nullptr, OS.str())); 617 break; 618 } 619 case DW_CFA_MIPS_advance_loc8: 620 if (opts::Verbosity >= 1) 621 errs() << "BOLT-WARNING: DW_CFA_MIPS_advance_loc unimplemented\n"; 622 return false; 623 case DW_CFA_GNU_window_save: 624 // DW_CFA_GNU_window_save and DW_CFA_GNU_NegateRAState just use the same 625 // id but mean different things. The latter is used in AArch64. 626 if (Function.getBinaryContext().isAArch64()) { 627 Function.addCFIInstruction( 628 Offset, MCCFIInstruction::createNegateRAState(nullptr)); 629 break; 630 } 631 if (opts::Verbosity >= 1) 632 errs() << "BOLT-WARNING: DW_CFA_GNU_window_save unimplemented\n"; 633 return false; 634 case DW_CFA_lo_user: 635 case DW_CFA_hi_user: 636 if (opts::Verbosity >= 1) 637 errs() << "BOLT-WARNING: DW_CFA_*_user unimplemented\n"; 638 return false; 639 default: 640 if (opts::Verbosity >= 1) 641 errs() << "BOLT-WARNING: Unrecognized CFI instruction: " << Instr.Opcode 642 << '\n'; 643 return false; 644 } 645 646 return true; 647 }; 648 649 for (const CFIProgram::Instruction &Instr : CurFDE.getLinkedCIE()->cfis()) 650 if (!decodeFrameInstruction(Instr)) 651 return false; 652 653 for (const CFIProgram::Instruction &Instr : CurFDE.cfis()) 654 if (!decodeFrameInstruction(Instr)) 655 return false; 656 657 return true; 658 } 659 660 std::vector<char> CFIReaderWriter::generateEHFrameHeader( 661 const DWARFDebugFrame &OldEHFrame, const DWARFDebugFrame &NewEHFrame, 662 uint64_t EHFrameHeaderAddress, 663 std::vector<uint64_t> &FailedAddresses) const { 664 // Common PC -> FDE map to be written into .eh_frame_hdr. 665 std::map<uint64_t, uint64_t> PCToFDE; 666 667 // Presort array for binary search. 668 llvm::sort(FailedAddresses); 669 670 // Initialize PCToFDE using NewEHFrame. 671 for (dwarf::FrameEntry &Entry : NewEHFrame.entries()) { 672 const dwarf::FDE *FDE = dyn_cast<dwarf::FDE>(&Entry); 673 if (FDE == nullptr) 674 continue; 675 const uint64_t FuncAddress = FDE->getInitialLocation(); 676 const uint64_t FDEAddress = 677 NewEHFrame.getEHFrameAddress() + FDE->getOffset(); 678 679 // Ignore unused FDEs. 680 if (FuncAddress == 0) 681 continue; 682 683 // Add the address to the map unless we failed to write it. 684 if (!std::binary_search(FailedAddresses.begin(), FailedAddresses.end(), 685 FuncAddress)) { 686 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: FDE for function at 0x" 687 << Twine::utohexstr(FuncAddress) << " is at 0x" 688 << Twine::utohexstr(FDEAddress) << '\n'); 689 PCToFDE[FuncAddress] = FDEAddress; 690 } 691 }; 692 693 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: new .eh_frame contains " 694 << llvm::size(NewEHFrame.entries()) << " entries\n"); 695 696 // Add entries from the original .eh_frame corresponding to the functions 697 // that we did not update. 698 for (const dwarf::FrameEntry &Entry : OldEHFrame) { 699 const dwarf::FDE *FDE = dyn_cast<dwarf::FDE>(&Entry); 700 if (FDE == nullptr) 701 continue; 702 const uint64_t FuncAddress = FDE->getInitialLocation(); 703 const uint64_t FDEAddress = 704 OldEHFrame.getEHFrameAddress() + FDE->getOffset(); 705 706 // Add the address if we failed to write it. 707 if (PCToFDE.count(FuncAddress) == 0) { 708 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: old FDE for function at 0x" 709 << Twine::utohexstr(FuncAddress) << " is at 0x" 710 << Twine::utohexstr(FDEAddress) << '\n'); 711 PCToFDE[FuncAddress] = FDEAddress; 712 } 713 }; 714 715 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: old .eh_frame contains " 716 << llvm::size(OldEHFrame.entries()) << " entries\n"); 717 718 // Generate a new .eh_frame_hdr based on the new map. 719 720 // Header plus table of entries of size 8 bytes. 721 std::vector<char> EHFrameHeader(12 + PCToFDE.size() * 8); 722 723 // Version is 1. 724 EHFrameHeader[0] = 1; 725 // Encoding of the eh_frame pointer. 726 EHFrameHeader[1] = DW_EH_PE_pcrel | DW_EH_PE_sdata4; 727 // Encoding of the count field to follow. 728 EHFrameHeader[2] = DW_EH_PE_udata4; 729 // Encoding of the table entries - 4-byte offset from the start of the header. 730 EHFrameHeader[3] = DW_EH_PE_datarel | DW_EH_PE_sdata4; 731 732 // Address of eh_frame. Use the new one. 733 support::ulittle32_t::ref(EHFrameHeader.data() + 4) = 734 NewEHFrame.getEHFrameAddress() - (EHFrameHeaderAddress + 4); 735 736 // Number of entries in the table (FDE count). 737 support::ulittle32_t::ref(EHFrameHeader.data() + 8) = PCToFDE.size(); 738 739 // Write the table at offset 12. 740 char *Ptr = EHFrameHeader.data(); 741 uint32_t Offset = 12; 742 for (const auto &PCI : PCToFDE) { 743 int64_t InitialPCOffset = PCI.first - EHFrameHeaderAddress; 744 assert(isInt<32>(InitialPCOffset) && "PC offset out of bounds"); 745 support::ulittle32_t::ref(Ptr + Offset) = InitialPCOffset; 746 Offset += 4; 747 int64_t FDEOffset = PCI.second - EHFrameHeaderAddress; 748 assert(isInt<32>(FDEOffset) && "FDE offset out of bounds"); 749 support::ulittle32_t::ref(Ptr + Offset) = FDEOffset; 750 Offset += 4; 751 } 752 753 return EHFrameHeader; 754 } 755 756 Error EHFrameParser::parseCIE(uint64_t StartOffset) { 757 uint8_t Version = Data.getU8(&Offset); 758 const char *Augmentation = Data.getCStr(&Offset); 759 StringRef AugmentationString(Augmentation ? Augmentation : ""); 760 uint8_t AddressSize = 761 Version < 4 ? Data.getAddressSize() : Data.getU8(&Offset); 762 Data.setAddressSize(AddressSize); 763 // Skip segment descriptor size 764 if (Version >= 4) 765 Offset += 1; 766 // Skip code alignment factor 767 Data.getULEB128(&Offset); 768 // Skip data alignment 769 Data.getSLEB128(&Offset); 770 // Skip return address register 771 if (Version == 1) 772 Offset += 1; 773 else 774 Data.getULEB128(&Offset); 775 776 uint32_t FDEPointerEncoding = DW_EH_PE_absptr; 777 uint32_t LSDAPointerEncoding = DW_EH_PE_omit; 778 // Walk the augmentation string to get all the augmentation data. 779 for (unsigned i = 0, e = AugmentationString.size(); i != e; ++i) { 780 switch (AugmentationString[i]) { 781 default: 782 return createStringError( 783 errc::invalid_argument, 784 "unknown augmentation character in entry at 0x%" PRIx64, StartOffset); 785 case 'L': 786 LSDAPointerEncoding = Data.getU8(&Offset); 787 break; 788 case 'P': { 789 uint32_t PersonalityEncoding = Data.getU8(&Offset); 790 std::optional<uint64_t> Personality = 791 Data.getEncodedPointer(&Offset, PersonalityEncoding, 792 EHFrameAddress ? EHFrameAddress + Offset : 0); 793 // Patch personality address 794 if (Personality) 795 PatcherCallback(*Personality, Offset, PersonalityEncoding); 796 break; 797 } 798 case 'R': 799 FDEPointerEncoding = Data.getU8(&Offset); 800 break; 801 case 'z': 802 if (i) 803 return createStringError( 804 errc::invalid_argument, 805 "'z' must be the first character at 0x%" PRIx64, StartOffset); 806 // Skip augmentation length 807 Data.getULEB128(&Offset); 808 break; 809 case 'S': 810 case 'B': 811 break; 812 } 813 } 814 Entries.emplace_back(std::make_unique<CIEInfo>( 815 FDEPointerEncoding, LSDAPointerEncoding, AugmentationString)); 816 CIEs[StartOffset] = &*Entries.back(); 817 return Error::success(); 818 } 819 820 Error EHFrameParser::parseFDE(uint64_t CIEPointer, 821 uint64_t StartStructureOffset) { 822 std::optional<uint64_t> LSDAAddress; 823 CIEInfo *Cie = CIEs[StartStructureOffset - CIEPointer]; 824 825 // The address size is encoded in the CIE we reference. 826 if (!Cie) 827 return createStringError(errc::invalid_argument, 828 "parsing FDE data at 0x%" PRIx64 829 " failed due to missing CIE", 830 StartStructureOffset); 831 // Patch initial location 832 if (auto Val = Data.getEncodedPointer(&Offset, Cie->FDEPtrEncoding, 833 EHFrameAddress + Offset)) { 834 PatcherCallback(*Val, Offset, Cie->FDEPtrEncoding); 835 } 836 // Skip address range 837 Data.getEncodedPointer(&Offset, Cie->FDEPtrEncoding, 0); 838 839 // Process augmentation data for this FDE. 840 StringRef AugmentationString = Cie->AugmentationString; 841 if (!AugmentationString.empty() && Cie->LSDAPtrEncoding != DW_EH_PE_omit) { 842 // Skip augmentation length 843 Data.getULEB128(&Offset); 844 LSDAAddress = 845 Data.getEncodedPointer(&Offset, Cie->LSDAPtrEncoding, 846 EHFrameAddress ? Offset + EHFrameAddress : 0); 847 // Patch LSDA address 848 PatcherCallback(*LSDAAddress, Offset, Cie->LSDAPtrEncoding); 849 } 850 return Error::success(); 851 } 852 853 Error EHFrameParser::parse() { 854 while (Data.isValidOffset(Offset)) { 855 const uint64_t StartOffset = Offset; 856 857 uint64_t Length; 858 DwarfFormat Format; 859 std::tie(Length, Format) = Data.getInitialLength(&Offset); 860 861 // If the Length is 0, then this CIE is a terminator 862 if (Length == 0) 863 break; 864 865 const uint64_t StartStructureOffset = Offset; 866 const uint64_t EndStructureOffset = Offset + Length; 867 868 Error Err = Error::success(); 869 const uint64_t Id = Data.getRelocatedValue(4, &Offset, 870 /*SectionIndex=*/nullptr, &Err); 871 if (Err) 872 return Err; 873 874 if (!Id) { 875 if (Error Err = parseCIE(StartOffset)) 876 return Err; 877 } else { 878 if (Error Err = parseFDE(Id, StartStructureOffset)) 879 return Err; 880 } 881 Offset = EndStructureOffset; 882 } 883 884 return Error::success(); 885 } 886 887 Error EHFrameParser::parse(DWARFDataExtractor Data, uint64_t EHFrameAddress, 888 PatcherCallbackTy PatcherCallback) { 889 EHFrameParser Parser(Data, EHFrameAddress, PatcherCallback); 890 return Parser.parse(); 891 } 892 893 } // namespace bolt 894 } // namespace llvm 895