1 //===- bolt/Core/Exceptions.cpp - Helpers for C++ exceptions --------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements functions for handling C++ exception meta data. 10 // 11 // Some of the code is taken from examples/ExceptionDemo 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "bolt/Core/Exceptions.h" 16 #include "bolt/Core/BinaryFunction.h" 17 #include "llvm/ADT/ArrayRef.h" 18 #include "llvm/ADT/Twine.h" 19 #include "llvm/BinaryFormat/Dwarf.h" 20 #include "llvm/DebugInfo/DWARF/DWARFDebugFrame.h" 21 #include "llvm/Support/Casting.h" 22 #include "llvm/Support/CommandLine.h" 23 #include "llvm/Support/Debug.h" 24 #include "llvm/Support/Errc.h" 25 #include "llvm/Support/LEB128.h" 26 #include "llvm/Support/MathExtras.h" 27 #include "llvm/Support/raw_ostream.h" 28 #include <map> 29 30 #undef DEBUG_TYPE 31 #define DEBUG_TYPE "bolt-exceptions" 32 33 using namespace llvm::dwarf; 34 35 namespace opts { 36 37 extern llvm::cl::OptionCategory BoltCategory; 38 39 extern llvm::cl::opt<unsigned> Verbosity; 40 41 static llvm::cl::opt<bool> 42 PrintExceptions("print-exceptions", 43 llvm::cl::desc("print exception handling data"), 44 llvm::cl::Hidden, llvm::cl::cat(BoltCategory)); 45 46 } // namespace opts 47 48 namespace llvm { 49 namespace bolt { 50 51 // Read and dump the .gcc_exception_table section entry. 52 // 53 // .gcc_except_table section contains a set of Language-Specific Data Areas - 54 // a fancy name for exception handling tables. There's one LSDA entry per 55 // function. However, we can't actually tell which function LSDA refers to 56 // unless we parse .eh_frame entry that refers to the LSDA. 57 // Then inside LSDA most addresses are encoded relative to the function start, 58 // so we need the function context in order to get to real addresses. 59 // 60 // The best visual representation of the tables comprising LSDA and 61 // relationships between them is illustrated at: 62 // https://github.com/itanium-cxx-abi/cxx-abi/blob/master/exceptions.pdf 63 // Keep in mind that GCC implementation deviates slightly from that document. 64 // 65 // To summarize, there are 4 tables in LSDA: call site table, actions table, 66 // types table, and types index table (for indirection). The main table contains 67 // call site entries. Each call site includes a PC range that can throw an 68 // exception, a handler (landing pad), and a reference to an entry in the action 69 // table. The handler and/or action could be 0. The action entry is a head 70 // of a list of actions associated with a call site. The action table contains 71 // all such lists (it could be optimized to share list tails). Each action could 72 // be either to catch an exception of a given type, to perform a cleanup, or to 73 // propagate the exception after filtering it out (e.g. to make sure function 74 // exception specification is not violated). Catch action contains a reference 75 // to an entry in the type table, and filter action refers to an entry in the 76 // type index table to encode a set of types to filter. 77 // 78 // Call site table follows LSDA header. Action table immediately follows the 79 // call site table. 80 // 81 // Both types table and type index table start at the same location, but they 82 // grow in opposite directions (types go up, indices go down). The beginning of 83 // these tables is encoded in LSDA header. Sizes for both of the tables are not 84 // included anywhere. 85 // 86 // We have to parse all of the tables to determine their sizes. Then we have 87 // to parse the call site table and associate discovered information with 88 // actual call instructions and landing pad blocks. 89 // 90 // For the purpose of rewriting exception handling tables, we can reuse action, 91 // and type index tables in their original binary format. 92 // 93 // Type table could be encoded using position-independent references, and thus 94 // may require relocation. 95 // 96 // Ideally we should be able to re-write LSDA in-place, without the need to 97 // allocate a new space for it. Sadly there's no guarantee that the new call 98 // site table will be the same size as GCC uses uleb encodings for PC offsets. 99 // 100 // Note: some functions have LSDA entries with 0 call site entries. 101 Error BinaryFunction::parseLSDA(ArrayRef<uint8_t> LSDASectionData, 102 uint64_t LSDASectionAddress) { 103 assert(CurrentState == State::Disassembled && "unexpected function state"); 104 105 if (!getLSDAAddress()) 106 return Error::success(); 107 108 DWARFDataExtractor Data( 109 StringRef(reinterpret_cast<const char *>(LSDASectionData.data()), 110 LSDASectionData.size()), 111 BC.AsmInfo->isLittleEndian(), BC.AsmInfo->getCodePointerSize()); 112 uint64_t Offset = getLSDAAddress() - LSDASectionAddress; 113 assert(Data.isValidOffset(Offset) && "wrong LSDA address"); 114 115 const uint8_t LPStartEncoding = Data.getU8(&Offset); 116 uint64_t LPStart = Address; 117 if (LPStartEncoding != dwarf::DW_EH_PE_omit) { 118 std::optional<uint64_t> MaybeLPStart = Data.getEncodedPointer( 119 &Offset, LPStartEncoding, Offset + LSDASectionAddress); 120 if (!MaybeLPStart) { 121 BC.errs() << "BOLT-ERROR: unsupported LPStartEncoding: " 122 << (unsigned)LPStartEncoding << '\n'; 123 return createFatalBOLTError(""); 124 } 125 LPStart = *MaybeLPStart; 126 } 127 128 const uint8_t TTypeEncoding = Data.getU8(&Offset); 129 LSDATypeEncoding = TTypeEncoding; 130 size_t TTypeEncodingSize = 0; 131 uintptr_t TTypeEnd = 0; 132 if (TTypeEncoding != DW_EH_PE_omit) { 133 TTypeEnd = Data.getULEB128(&Offset); 134 TTypeEncodingSize = BC.getDWARFEncodingSize(TTypeEncoding); 135 } 136 137 if (opts::PrintExceptions) { 138 BC.outs() << "[LSDA at 0x" << Twine::utohexstr(getLSDAAddress()) 139 << " for function " << *this << "]:\n"; 140 BC.outs() << "LPStart Encoding = 0x" << Twine::utohexstr(LPStartEncoding) 141 << '\n'; 142 BC.outs() << "LPStart = 0x" << Twine::utohexstr(LPStart) << '\n'; 143 BC.outs() << "TType Encoding = 0x" << Twine::utohexstr(TTypeEncoding) 144 << '\n'; 145 BC.outs() << "TType End = " << TTypeEnd << '\n'; 146 } 147 148 // Table to store list of indices in type table. Entries are uleb128 values. 149 const uint64_t TypeIndexTableStart = Offset + TTypeEnd; 150 151 // Offset past the last decoded index. 152 uint64_t MaxTypeIndexTableOffset = 0; 153 154 // Max positive index used in type table. 155 unsigned MaxTypeIndex = 0; 156 157 // The actual type info table starts at the same location, but grows in 158 // opposite direction. TTypeEncoding is used to encode stored values. 159 const uint64_t TypeTableStart = Offset + TTypeEnd; 160 161 uint8_t CallSiteEncoding = Data.getU8(&Offset); 162 uint32_t CallSiteTableLength = Data.getULEB128(&Offset); 163 uint64_t CallSiteTableStart = Offset; 164 uint64_t CallSiteTableEnd = CallSiteTableStart + CallSiteTableLength; 165 uint64_t CallSitePtr = CallSiteTableStart; 166 uint64_t ActionTableStart = CallSiteTableEnd; 167 168 if (opts::PrintExceptions) { 169 BC.outs() << "CallSite Encoding = " << (unsigned)CallSiteEncoding << '\n'; 170 BC.outs() << "CallSite table length = " << CallSiteTableLength << '\n'; 171 BC.outs() << '\n'; 172 } 173 174 this->HasEHRanges = CallSitePtr < CallSiteTableEnd; 175 const uint64_t RangeBase = getAddress(); 176 while (CallSitePtr < CallSiteTableEnd) { 177 uint64_t Start = *Data.getEncodedPointer(&CallSitePtr, CallSiteEncoding, 178 CallSitePtr + LSDASectionAddress); 179 uint64_t Length = *Data.getEncodedPointer(&CallSitePtr, CallSiteEncoding, 180 CallSitePtr + LSDASectionAddress); 181 uint64_t LandingPad = *Data.getEncodedPointer( 182 &CallSitePtr, CallSiteEncoding, CallSitePtr + LSDASectionAddress); 183 uint64_t ActionEntry = Data.getULEB128(&CallSitePtr); 184 if (LandingPad) 185 LandingPad += LPStart; 186 187 if (opts::PrintExceptions) { 188 BC.outs() << "Call Site: [0x" << Twine::utohexstr(RangeBase + Start) 189 << ", 0x" << Twine::utohexstr(RangeBase + Start + Length) 190 << "); landing pad: 0x" << Twine::utohexstr(LandingPad) 191 << "; action entry: 0x" << Twine::utohexstr(ActionEntry) 192 << "\n"; 193 BC.outs() << " current offset is " << (CallSitePtr - CallSiteTableStart) 194 << '\n'; 195 } 196 197 // Create a handler entry if necessary. 198 MCSymbol *LPSymbol = nullptr; 199 if (LandingPad) { 200 // Verify if landing pad code is located outside current function 201 // Support landing pad to builtin_unreachable 202 if (LandingPad < Address || LandingPad > Address + getSize()) { 203 BinaryFunction *Fragment = 204 BC.getBinaryFunctionContainingAddress(LandingPad); 205 assert(Fragment != nullptr && 206 "BOLT-ERROR: cannot find landing pad fragment"); 207 BC.addInterproceduralReference(this, Fragment->getAddress()); 208 BC.processInterproceduralReferences(); 209 assert(BC.areRelatedFragments(this, Fragment) && 210 "BOLT-ERROR: cannot have landing pads in different functions"); 211 setHasIndirectTargetToSplitFragment(true); 212 BC.addFragmentsToSkip(this); 213 return Error::success(); 214 } 215 216 const uint64_t LPOffset = LandingPad - getAddress(); 217 if (!getInstructionAtOffset(LPOffset)) { 218 if (opts::Verbosity >= 1) 219 BC.errs() << "BOLT-WARNING: landing pad " 220 << Twine::utohexstr(LPOffset) 221 << " not pointing to an instruction in function " << *this 222 << " - ignoring.\n"; 223 } else { 224 auto Label = Labels.find(LPOffset); 225 if (Label != Labels.end()) { 226 LPSymbol = Label->second; 227 } else { 228 LPSymbol = BC.Ctx->createNamedTempSymbol("LP"); 229 Labels[LPOffset] = LPSymbol; 230 } 231 } 232 } 233 234 // Mark all call instructions in the range. 235 auto II = Instructions.find(Start); 236 auto IE = Instructions.end(); 237 assert(II != IE && "exception range not pointing to an instruction"); 238 do { 239 MCInst &Instruction = II->second; 240 if (BC.MIB->isCall(Instruction) && 241 !BC.MIB->getConditionalTailCall(Instruction)) { 242 assert(!BC.MIB->isInvoke(Instruction) && 243 "overlapping exception ranges detected"); 244 // Add extra operands to a call instruction making it an invoke from 245 // now on. 246 BC.MIB->addEHInfo(Instruction, 247 MCPlus::MCLandingPad(LPSymbol, ActionEntry)); 248 } 249 ++II; 250 } while (II != IE && II->first < Start + Length); 251 252 if (ActionEntry != 0) { 253 auto printType = [&](int Index, raw_ostream &OS) { 254 assert(Index > 0 && "only positive indices are valid"); 255 uint64_t TTEntry = TypeTableStart - Index * TTypeEncodingSize; 256 const uint64_t TTEntryAddress = TTEntry + LSDASectionAddress; 257 uint64_t TypeAddress = 258 *Data.getEncodedPointer(&TTEntry, TTypeEncoding, TTEntryAddress); 259 if ((TTypeEncoding & DW_EH_PE_pcrel) && TypeAddress == TTEntryAddress) 260 TypeAddress = 0; 261 if (TypeAddress == 0) { 262 OS << "<all>"; 263 return; 264 } 265 if (TTypeEncoding & DW_EH_PE_indirect) { 266 ErrorOr<uint64_t> PointerOrErr = BC.getPointerAtAddress(TypeAddress); 267 assert(PointerOrErr && "failed to decode indirect address"); 268 TypeAddress = *PointerOrErr; 269 } 270 if (BinaryData *TypeSymBD = BC.getBinaryDataAtAddress(TypeAddress)) 271 OS << TypeSymBD->getName(); 272 else 273 OS << "0x" << Twine::utohexstr(TypeAddress); 274 }; 275 if (opts::PrintExceptions) 276 BC.outs() << " actions: "; 277 uint64_t ActionPtr = ActionTableStart + ActionEntry - 1; 278 int64_t ActionType; 279 int64_t ActionNext; 280 const char *Sep = ""; 281 do { 282 ActionType = Data.getSLEB128(&ActionPtr); 283 const uint32_t Self = ActionPtr; 284 ActionNext = Data.getSLEB128(&ActionPtr); 285 if (opts::PrintExceptions) 286 BC.outs() << Sep << "(" << ActionType << ", " << ActionNext << ") "; 287 if (ActionType == 0) { 288 if (opts::PrintExceptions) 289 BC.outs() << "cleanup"; 290 } else if (ActionType > 0) { 291 // It's an index into a type table. 292 MaxTypeIndex = 293 std::max(MaxTypeIndex, static_cast<unsigned>(ActionType)); 294 if (opts::PrintExceptions) { 295 BC.outs() << "catch type "; 296 printType(ActionType, BC.outs()); 297 } 298 } else { // ActionType < 0 299 if (opts::PrintExceptions) 300 BC.outs() << "filter exception types "; 301 const char *TSep = ""; 302 // ActionType is a negative *byte* offset into *uleb128-encoded* table 303 // of indices with base 1. 304 // E.g. -1 means offset 0, -2 is offset 1, etc. The indices are 305 // encoded using uleb128 thus we cannot directly dereference them. 306 uint64_t TypeIndexTablePtr = TypeIndexTableStart - ActionType - 1; 307 while (uint64_t Index = Data.getULEB128(&TypeIndexTablePtr)) { 308 MaxTypeIndex = std::max(MaxTypeIndex, static_cast<unsigned>(Index)); 309 if (opts::PrintExceptions) { 310 BC.outs() << TSep; 311 printType(Index, BC.outs()); 312 TSep = ", "; 313 } 314 } 315 MaxTypeIndexTableOffset = std::max( 316 MaxTypeIndexTableOffset, TypeIndexTablePtr - TypeIndexTableStart); 317 } 318 319 Sep = "; "; 320 321 ActionPtr = Self + ActionNext; 322 } while (ActionNext); 323 if (opts::PrintExceptions) 324 BC.outs() << '\n'; 325 } 326 } 327 if (opts::PrintExceptions) 328 BC.outs() << '\n'; 329 330 assert(TypeIndexTableStart + MaxTypeIndexTableOffset <= 331 Data.getData().size() && 332 "LSDA entry has crossed section boundary"); 333 334 if (TTypeEnd) { 335 LSDAActionTable = LSDASectionData.slice( 336 ActionTableStart, TypeIndexTableStart - 337 MaxTypeIndex * TTypeEncodingSize - 338 ActionTableStart); 339 for (unsigned Index = 1; Index <= MaxTypeIndex; ++Index) { 340 uint64_t TTEntry = TypeTableStart - Index * TTypeEncodingSize; 341 const uint64_t TTEntryAddress = TTEntry + LSDASectionAddress; 342 uint64_t TypeAddress = 343 *Data.getEncodedPointer(&TTEntry, TTypeEncoding, TTEntryAddress); 344 if ((TTypeEncoding & DW_EH_PE_pcrel) && (TypeAddress == TTEntryAddress)) 345 TypeAddress = 0; 346 if (TTypeEncoding & DW_EH_PE_indirect) { 347 LSDATypeAddressTable.emplace_back(TypeAddress); 348 if (TypeAddress) { 349 ErrorOr<uint64_t> PointerOrErr = BC.getPointerAtAddress(TypeAddress); 350 assert(PointerOrErr && "failed to decode indirect address"); 351 TypeAddress = *PointerOrErr; 352 } 353 } 354 LSDATypeTable.emplace_back(TypeAddress); 355 } 356 LSDATypeIndexTable = 357 LSDASectionData.slice(TypeIndexTableStart, MaxTypeIndexTableOffset); 358 } 359 return Error::success(); 360 } 361 362 void BinaryFunction::updateEHRanges() { 363 if (getSize() == 0) 364 return; 365 366 assert(CurrentState == State::CFG_Finalized && "unexpected state"); 367 368 // Build call sites table. 369 struct EHInfo { 370 const MCSymbol *LP; // landing pad 371 uint64_t Action; 372 }; 373 374 // Sites to update. 375 CallSitesList Sites; 376 377 for (FunctionFragment &FF : getLayout().fragments()) { 378 // If previous call can throw, this is its exception handler. 379 EHInfo PreviousEH = {nullptr, 0}; 380 381 // Marker for the beginning of exceptions range. 382 const MCSymbol *StartRange = nullptr; 383 384 for (BinaryBasicBlock *const BB : FF) { 385 for (MCInst &Instr : *BB) { 386 if (!BC.MIB->isCall(Instr)) 387 continue; 388 389 // Instruction can throw an exception that should be handled. 390 const bool Throws = BC.MIB->isInvoke(Instr); 391 392 // Ignore the call if it's a continuation of a no-throw gap. 393 if (!Throws && !StartRange) 394 continue; 395 396 // Extract exception handling information from the instruction. 397 const MCSymbol *LP = nullptr; 398 uint64_t Action = 0; 399 if (const std::optional<MCPlus::MCLandingPad> EHInfo = 400 BC.MIB->getEHInfo(Instr)) 401 std::tie(LP, Action) = *EHInfo; 402 403 // No action if the exception handler has not changed. 404 if (Throws && StartRange && PreviousEH.LP == LP && 405 PreviousEH.Action == Action) 406 continue; 407 408 // Same symbol is used for the beginning and the end of the range. 409 MCSymbol *EHSymbol; 410 if (MCSymbol *InstrLabel = BC.MIB->getInstLabel(Instr)) { 411 EHSymbol = InstrLabel; 412 } else { 413 std::unique_lock<llvm::sys::RWMutex> Lock(BC.CtxMutex); 414 EHSymbol = BC.MIB->getOrCreateInstLabel(Instr, "EH", BC.Ctx.get()); 415 } 416 417 // At this point we could be in one of the following states: 418 // 419 // I. Exception handler has changed and we need to close previous range 420 // and start a new one. 421 // 422 // II. Start a new exception range after the gap. 423 // 424 // III. Close current exception range and start a new gap. 425 const MCSymbol *EndRange; 426 if (StartRange) { 427 // I, III: 428 EndRange = EHSymbol; 429 } else { 430 // II: 431 StartRange = EHSymbol; 432 EndRange = nullptr; 433 } 434 435 // Close the previous range. 436 if (EndRange) 437 Sites.emplace_back( 438 FF.getFragmentNum(), 439 CallSite{StartRange, EndRange, PreviousEH.LP, PreviousEH.Action}); 440 441 if (Throws) { 442 // I, II: 443 StartRange = EHSymbol; 444 PreviousEH = EHInfo{LP, Action}; 445 } else { 446 StartRange = nullptr; 447 } 448 } 449 } 450 451 // Check if we need to close the range. 452 if (StartRange) { 453 const MCSymbol *EndRange = getFunctionEndLabel(FF.getFragmentNum()); 454 Sites.emplace_back( 455 FF.getFragmentNum(), 456 CallSite{StartRange, EndRange, PreviousEH.LP, PreviousEH.Action}); 457 } 458 } 459 460 addCallSites(Sites); 461 } 462 463 const uint8_t DWARF_CFI_PRIMARY_OPCODE_MASK = 0xc0; 464 465 CFIReaderWriter::CFIReaderWriter(BinaryContext &BC, 466 const DWARFDebugFrame &EHFrame) 467 : BC(BC) { 468 // Prepare FDEs for fast lookup 469 for (const dwarf::FrameEntry &Entry : EHFrame.entries()) { 470 const auto *CurFDE = dyn_cast<dwarf::FDE>(&Entry); 471 // Skip CIEs. 472 if (!CurFDE) 473 continue; 474 // There could me multiple FDEs with the same initial address, and perhaps 475 // different sizes (address ranges). Use the first entry with non-zero size. 476 auto FDEI = FDEs.lower_bound(CurFDE->getInitialLocation()); 477 if (FDEI != FDEs.end() && FDEI->first == CurFDE->getInitialLocation()) { 478 if (CurFDE->getAddressRange()) { 479 if (FDEI->second->getAddressRange() == 0) { 480 FDEI->second = CurFDE; 481 } else if (opts::Verbosity > 0) { 482 BC.errs() << "BOLT-WARNING: different FDEs for function at 0x" 483 << Twine::utohexstr(FDEI->first) 484 << " detected; sizes: " << FDEI->second->getAddressRange() 485 << " and " << CurFDE->getAddressRange() << '\n'; 486 } 487 } 488 } else { 489 FDEs.emplace_hint(FDEI, CurFDE->getInitialLocation(), CurFDE); 490 } 491 } 492 } 493 494 bool CFIReaderWriter::fillCFIInfoFor(BinaryFunction &Function) const { 495 uint64_t Address = Function.getAddress(); 496 auto I = FDEs.find(Address); 497 // Ignore zero-length FDE ranges. 498 if (I == FDEs.end() || !I->second->getAddressRange()) 499 return true; 500 501 const FDE &CurFDE = *I->second; 502 std::optional<uint64_t> LSDA = CurFDE.getLSDAAddress(); 503 Function.setLSDAAddress(LSDA ? *LSDA : 0); 504 505 uint64_t Offset = Function.getFirstInstructionOffset(); 506 uint64_t CodeAlignment = CurFDE.getLinkedCIE()->getCodeAlignmentFactor(); 507 uint64_t DataAlignment = CurFDE.getLinkedCIE()->getDataAlignmentFactor(); 508 if (CurFDE.getLinkedCIE()->getPersonalityAddress()) { 509 Function.setPersonalityFunction( 510 *CurFDE.getLinkedCIE()->getPersonalityAddress()); 511 Function.setPersonalityEncoding( 512 *CurFDE.getLinkedCIE()->getPersonalityEncoding()); 513 } 514 515 auto decodeFrameInstruction = [this, &Function, &Offset, Address, 516 CodeAlignment, DataAlignment]( 517 const CFIProgram::Instruction &Instr) { 518 uint8_t Opcode = Instr.Opcode; 519 if (Opcode & DWARF_CFI_PRIMARY_OPCODE_MASK) 520 Opcode &= DWARF_CFI_PRIMARY_OPCODE_MASK; 521 switch (Instr.Opcode) { 522 case DW_CFA_nop: 523 break; 524 case DW_CFA_advance_loc4: 525 case DW_CFA_advance_loc2: 526 case DW_CFA_advance_loc1: 527 case DW_CFA_advance_loc: 528 // Advance our current address 529 Offset += CodeAlignment * int64_t(Instr.Ops[0]); 530 break; 531 case DW_CFA_offset_extended_sf: 532 Function.addCFIInstruction( 533 Offset, 534 MCCFIInstruction::createOffset( 535 nullptr, Instr.Ops[0], DataAlignment * int64_t(Instr.Ops[1]))); 536 break; 537 case DW_CFA_offset_extended: 538 case DW_CFA_offset: 539 Function.addCFIInstruction( 540 Offset, MCCFIInstruction::createOffset(nullptr, Instr.Ops[0], 541 DataAlignment * Instr.Ops[1])); 542 break; 543 case DW_CFA_restore_extended: 544 case DW_CFA_restore: 545 Function.addCFIInstruction( 546 Offset, MCCFIInstruction::createRestore(nullptr, Instr.Ops[0])); 547 break; 548 case DW_CFA_set_loc: 549 assert(Instr.Ops[0] >= Address && "set_loc out of function bounds"); 550 assert(Instr.Ops[0] <= Address + Function.getSize() && 551 "set_loc out of function bounds"); 552 Offset = Instr.Ops[0] - Address; 553 break; 554 555 case DW_CFA_undefined: 556 Function.addCFIInstruction( 557 Offset, MCCFIInstruction::createUndefined(nullptr, Instr.Ops[0])); 558 break; 559 case DW_CFA_same_value: 560 Function.addCFIInstruction( 561 Offset, MCCFIInstruction::createSameValue(nullptr, Instr.Ops[0])); 562 break; 563 case DW_CFA_register: 564 Function.addCFIInstruction( 565 Offset, MCCFIInstruction::createRegister(nullptr, Instr.Ops[0], 566 Instr.Ops[1])); 567 break; 568 case DW_CFA_remember_state: 569 Function.addCFIInstruction( 570 Offset, MCCFIInstruction::createRememberState(nullptr)); 571 break; 572 case DW_CFA_restore_state: 573 Function.addCFIInstruction(Offset, 574 MCCFIInstruction::createRestoreState(nullptr)); 575 break; 576 case DW_CFA_def_cfa: 577 Function.addCFIInstruction( 578 Offset, 579 MCCFIInstruction::cfiDefCfa(nullptr, Instr.Ops[0], Instr.Ops[1])); 580 break; 581 case DW_CFA_def_cfa_sf: 582 Function.addCFIInstruction( 583 Offset, 584 MCCFIInstruction::cfiDefCfa(nullptr, Instr.Ops[0], 585 DataAlignment * int64_t(Instr.Ops[1]))); 586 break; 587 case DW_CFA_def_cfa_register: 588 Function.addCFIInstruction(Offset, MCCFIInstruction::createDefCfaRegister( 589 nullptr, Instr.Ops[0])); 590 break; 591 case DW_CFA_def_cfa_offset: 592 Function.addCFIInstruction( 593 Offset, MCCFIInstruction::cfiDefCfaOffset(nullptr, Instr.Ops[0])); 594 break; 595 case DW_CFA_def_cfa_offset_sf: 596 Function.addCFIInstruction( 597 Offset, MCCFIInstruction::cfiDefCfaOffset( 598 nullptr, DataAlignment * int64_t(Instr.Ops[0]))); 599 break; 600 case DW_CFA_GNU_args_size: 601 Function.addCFIInstruction( 602 Offset, MCCFIInstruction::createGnuArgsSize(nullptr, Instr.Ops[0])); 603 Function.setUsesGnuArgsSize(); 604 break; 605 case DW_CFA_val_offset_sf: 606 case DW_CFA_val_offset: 607 if (opts::Verbosity >= 1) { 608 BC.errs() << "BOLT-WARNING: DWARF val_offset() unimplemented\n"; 609 } 610 return false; 611 case DW_CFA_def_cfa_expression: 612 case DW_CFA_val_expression: 613 case DW_CFA_expression: { 614 StringRef ExprBytes = Instr.Expression->getData(); 615 std::string Str; 616 raw_string_ostream OS(Str); 617 // Manually encode this instruction using CFI escape 618 OS << Opcode; 619 if (Opcode != DW_CFA_def_cfa_expression) 620 encodeULEB128(Instr.Ops[0], OS); 621 encodeULEB128(ExprBytes.size(), OS); 622 OS << ExprBytes; 623 Function.addCFIInstruction( 624 Offset, MCCFIInstruction::createEscape(nullptr, OS.str())); 625 break; 626 } 627 case DW_CFA_MIPS_advance_loc8: 628 if (opts::Verbosity >= 1) 629 BC.errs() << "BOLT-WARNING: DW_CFA_MIPS_advance_loc unimplemented\n"; 630 return false; 631 case DW_CFA_GNU_window_save: 632 // DW_CFA_GNU_window_save and DW_CFA_GNU_NegateRAState just use the same 633 // id but mean different things. The latter is used in AArch64. 634 if (Function.getBinaryContext().isAArch64()) { 635 Function.addCFIInstruction( 636 Offset, MCCFIInstruction::createNegateRAState(nullptr)); 637 break; 638 } 639 if (opts::Verbosity >= 1) 640 BC.errs() << "BOLT-WARNING: DW_CFA_GNU_window_save unimplemented\n"; 641 return false; 642 case DW_CFA_lo_user: 643 case DW_CFA_hi_user: 644 if (opts::Verbosity >= 1) 645 BC.errs() << "BOLT-WARNING: DW_CFA_*_user unimplemented\n"; 646 return false; 647 default: 648 if (opts::Verbosity >= 1) 649 BC.errs() << "BOLT-WARNING: Unrecognized CFI instruction: " 650 << Instr.Opcode << '\n'; 651 return false; 652 } 653 654 return true; 655 }; 656 657 for (const CFIProgram::Instruction &Instr : CurFDE.getLinkedCIE()->cfis()) 658 if (!decodeFrameInstruction(Instr)) 659 return false; 660 661 for (const CFIProgram::Instruction &Instr : CurFDE.cfis()) 662 if (!decodeFrameInstruction(Instr)) 663 return false; 664 665 return true; 666 } 667 668 std::vector<char> 669 CFIReaderWriter::generateEHFrameHeader(const DWARFDebugFrame &OldEHFrame, 670 const DWARFDebugFrame &NewEHFrame, 671 uint64_t EHFrameHeaderAddress) const { 672 // Common PC -> FDE map to be written into .eh_frame_hdr. 673 std::map<uint64_t, uint64_t> PCToFDE; 674 675 // Initialize PCToFDE using NewEHFrame. 676 for (dwarf::FrameEntry &Entry : NewEHFrame.entries()) { 677 const dwarf::FDE *FDE = dyn_cast<dwarf::FDE>(&Entry); 678 if (FDE == nullptr) 679 continue; 680 const uint64_t FuncAddress = FDE->getInitialLocation(); 681 const uint64_t FDEAddress = 682 NewEHFrame.getEHFrameAddress() + FDE->getOffset(); 683 684 // Ignore unused FDEs. 685 if (FuncAddress == 0) 686 continue; 687 688 // Add the address to the map unless we failed to write it. 689 PCToFDE[FuncAddress] = FDEAddress; 690 }; 691 692 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: new .eh_frame contains " 693 << llvm::size(NewEHFrame.entries()) << " entries\n"); 694 695 // Add entries from the original .eh_frame corresponding to the functions 696 // that we did not update. 697 for (const dwarf::FrameEntry &Entry : OldEHFrame) { 698 const dwarf::FDE *FDE = dyn_cast<dwarf::FDE>(&Entry); 699 if (FDE == nullptr) 700 continue; 701 const uint64_t FuncAddress = FDE->getInitialLocation(); 702 const uint64_t FDEAddress = 703 OldEHFrame.getEHFrameAddress() + FDE->getOffset(); 704 705 // Add the address if we failed to write it. 706 if (PCToFDE.count(FuncAddress) == 0) { 707 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: old FDE for function at 0x" 708 << Twine::utohexstr(FuncAddress) << " is at 0x" 709 << Twine::utohexstr(FDEAddress) << '\n'); 710 PCToFDE[FuncAddress] = FDEAddress; 711 } 712 }; 713 714 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: old .eh_frame contains " 715 << llvm::size(OldEHFrame.entries()) << " entries\n"); 716 717 // Generate a new .eh_frame_hdr based on the new map. 718 719 // Header plus table of entries of size 8 bytes. 720 std::vector<char> EHFrameHeader(12 + PCToFDE.size() * 8); 721 722 // Version is 1. 723 EHFrameHeader[0] = 1; 724 // Encoding of the eh_frame pointer. 725 EHFrameHeader[1] = DW_EH_PE_pcrel | DW_EH_PE_sdata4; 726 // Encoding of the count field to follow. 727 EHFrameHeader[2] = DW_EH_PE_udata4; 728 // Encoding of the table entries - 4-byte offset from the start of the header. 729 EHFrameHeader[3] = DW_EH_PE_datarel | DW_EH_PE_sdata4; 730 731 // Address of eh_frame. Use the new one. 732 support::ulittle32_t::ref(EHFrameHeader.data() + 4) = 733 NewEHFrame.getEHFrameAddress() - (EHFrameHeaderAddress + 4); 734 735 // Number of entries in the table (FDE count). 736 support::ulittle32_t::ref(EHFrameHeader.data() + 8) = PCToFDE.size(); 737 738 // Write the table at offset 12. 739 char *Ptr = EHFrameHeader.data(); 740 uint32_t Offset = 12; 741 for (const auto &PCI : PCToFDE) { 742 int64_t InitialPCOffset = PCI.first - EHFrameHeaderAddress; 743 assert(isInt<32>(InitialPCOffset) && "PC offset out of bounds"); 744 support::ulittle32_t::ref(Ptr + Offset) = InitialPCOffset; 745 Offset += 4; 746 int64_t FDEOffset = PCI.second - EHFrameHeaderAddress; 747 assert(isInt<32>(FDEOffset) && "FDE offset out of bounds"); 748 support::ulittle32_t::ref(Ptr + Offset) = FDEOffset; 749 Offset += 4; 750 } 751 752 return EHFrameHeader; 753 } 754 755 Error EHFrameParser::parseCIE(uint64_t StartOffset) { 756 uint8_t Version = Data.getU8(&Offset); 757 const char *Augmentation = Data.getCStr(&Offset); 758 StringRef AugmentationString(Augmentation ? Augmentation : ""); 759 uint8_t AddressSize = 760 Version < 4 ? Data.getAddressSize() : Data.getU8(&Offset); 761 Data.setAddressSize(AddressSize); 762 // Skip segment descriptor size 763 if (Version >= 4) 764 Offset += 1; 765 // Skip code alignment factor 766 Data.getULEB128(&Offset); 767 // Skip data alignment 768 Data.getSLEB128(&Offset); 769 // Skip return address register 770 if (Version == 1) 771 Offset += 1; 772 else 773 Data.getULEB128(&Offset); 774 775 uint32_t FDEPointerEncoding = DW_EH_PE_absptr; 776 uint32_t LSDAPointerEncoding = DW_EH_PE_omit; 777 // Walk the augmentation string to get all the augmentation data. 778 for (unsigned i = 0, e = AugmentationString.size(); i != e; ++i) { 779 switch (AugmentationString[i]) { 780 default: 781 return createStringError( 782 errc::invalid_argument, 783 "unknown augmentation character in entry at 0x%" PRIx64, StartOffset); 784 case 'L': 785 LSDAPointerEncoding = Data.getU8(&Offset); 786 break; 787 case 'P': { 788 uint32_t PersonalityEncoding = Data.getU8(&Offset); 789 std::optional<uint64_t> Personality = 790 Data.getEncodedPointer(&Offset, PersonalityEncoding, 791 EHFrameAddress ? EHFrameAddress + Offset : 0); 792 // Patch personality address 793 if (Personality) 794 PatcherCallback(*Personality, Offset, PersonalityEncoding); 795 break; 796 } 797 case 'R': 798 FDEPointerEncoding = Data.getU8(&Offset); 799 break; 800 case 'z': 801 if (i) 802 return createStringError( 803 errc::invalid_argument, 804 "'z' must be the first character at 0x%" PRIx64, StartOffset); 805 // Skip augmentation length 806 Data.getULEB128(&Offset); 807 break; 808 case 'S': 809 case 'B': 810 break; 811 } 812 } 813 Entries.emplace_back(std::make_unique<CIEInfo>( 814 FDEPointerEncoding, LSDAPointerEncoding, AugmentationString)); 815 CIEs[StartOffset] = &*Entries.back(); 816 return Error::success(); 817 } 818 819 Error EHFrameParser::parseFDE(uint64_t CIEPointer, 820 uint64_t StartStructureOffset) { 821 std::optional<uint64_t> LSDAAddress; 822 CIEInfo *Cie = CIEs[StartStructureOffset - CIEPointer]; 823 824 // The address size is encoded in the CIE we reference. 825 if (!Cie) 826 return createStringError(errc::invalid_argument, 827 "parsing FDE data at 0x%" PRIx64 828 " failed due to missing CIE", 829 StartStructureOffset); 830 // Patch initial location 831 if (auto Val = Data.getEncodedPointer(&Offset, Cie->FDEPtrEncoding, 832 EHFrameAddress + Offset)) { 833 PatcherCallback(*Val, Offset, Cie->FDEPtrEncoding); 834 } 835 // Skip address range 836 Data.getEncodedPointer(&Offset, Cie->FDEPtrEncoding, 0); 837 838 // Process augmentation data for this FDE. 839 StringRef AugmentationString = Cie->AugmentationString; 840 if (!AugmentationString.empty() && Cie->LSDAPtrEncoding != DW_EH_PE_omit) { 841 // Skip augmentation length 842 Data.getULEB128(&Offset); 843 LSDAAddress = 844 Data.getEncodedPointer(&Offset, Cie->LSDAPtrEncoding, 845 EHFrameAddress ? Offset + EHFrameAddress : 0); 846 // Patch LSDA address 847 PatcherCallback(*LSDAAddress, Offset, Cie->LSDAPtrEncoding); 848 } 849 return Error::success(); 850 } 851 852 Error EHFrameParser::parse() { 853 while (Data.isValidOffset(Offset)) { 854 const uint64_t StartOffset = Offset; 855 856 uint64_t Length; 857 DwarfFormat Format; 858 std::tie(Length, Format) = Data.getInitialLength(&Offset); 859 860 // If the Length is 0, then this CIE is a terminator 861 if (Length == 0) 862 break; 863 864 const uint64_t StartStructureOffset = Offset; 865 const uint64_t EndStructureOffset = Offset + Length; 866 867 Error Err = Error::success(); 868 const uint64_t Id = Data.getRelocatedValue(4, &Offset, 869 /*SectionIndex=*/nullptr, &Err); 870 if (Err) 871 return Err; 872 873 if (!Id) { 874 if (Error Err = parseCIE(StartOffset)) 875 return Err; 876 } else { 877 if (Error Err = parseFDE(Id, StartStructureOffset)) 878 return Err; 879 } 880 Offset = EndStructureOffset; 881 } 882 883 return Error::success(); 884 } 885 886 Error EHFrameParser::parse(DWARFDataExtractor Data, uint64_t EHFrameAddress, 887 PatcherCallbackTy PatcherCallback) { 888 EHFrameParser Parser(Data, EHFrameAddress, PatcherCallback); 889 return Parser.parse(); 890 } 891 892 } // namespace bolt 893 } // namespace llvm 894