1 //===- bolt/Core/Exceptions.cpp - Helpers for C++ exceptions --------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements functions for handling C++ exception meta data. 10 // 11 // Some of the code is taken from examples/ExceptionDemo 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "bolt/Core/Exceptions.h" 16 #include "bolt/Core/BinaryFunction.h" 17 #include "llvm/ADT/ArrayRef.h" 18 #include "llvm/ADT/Twine.h" 19 #include "llvm/BinaryFormat/Dwarf.h" 20 #include "llvm/DebugInfo/DWARF/DWARFDebugFrame.h" 21 #include "llvm/Support/Casting.h" 22 #include "llvm/Support/CommandLine.h" 23 #include "llvm/Support/Debug.h" 24 #include "llvm/Support/Errc.h" 25 #include "llvm/Support/LEB128.h" 26 #include "llvm/Support/MathExtras.h" 27 #include "llvm/Support/raw_ostream.h" 28 #include <map> 29 30 #undef DEBUG_TYPE 31 #define DEBUG_TYPE "bolt-exceptions" 32 33 using namespace llvm::dwarf; 34 35 namespace opts { 36 37 extern llvm::cl::OptionCategory BoltCategory; 38 39 extern llvm::cl::opt<unsigned> Verbosity; 40 41 static llvm::cl::opt<bool> 42 PrintExceptions("print-exceptions", 43 llvm::cl::desc("print exception handling data"), 44 llvm::cl::Hidden, llvm::cl::cat(BoltCategory)); 45 46 } // namespace opts 47 48 namespace llvm { 49 namespace bolt { 50 51 // Read and dump the .gcc_exception_table section entry. 52 // 53 // .gcc_except_table section contains a set of Language-Specific Data Areas - 54 // a fancy name for exception handling tables. There's one LSDA entry per 55 // function. However, we can't actually tell which function LSDA refers to 56 // unless we parse .eh_frame entry that refers to the LSDA. 57 // Then inside LSDA most addresses are encoded relative to the function start, 58 // so we need the function context in order to get to real addresses. 59 // 60 // The best visual representation of the tables comprising LSDA and 61 // relationships between them is illustrated at: 62 // https://github.com/itanium-cxx-abi/cxx-abi/blob/master/exceptions.pdf 63 // Keep in mind that GCC implementation deviates slightly from that document. 64 // 65 // To summarize, there are 4 tables in LSDA: call site table, actions table, 66 // types table, and types index table (for indirection). The main table contains 67 // call site entries. Each call site includes a PC range that can throw an 68 // exception, a handler (landing pad), and a reference to an entry in the action 69 // table. The handler and/or action could be 0. The action entry is a head 70 // of a list of actions associated with a call site. The action table contains 71 // all such lists (it could be optimized to share list tails). Each action could 72 // be either to catch an exception of a given type, to perform a cleanup, or to 73 // propagate the exception after filtering it out (e.g. to make sure function 74 // exception specification is not violated). Catch action contains a reference 75 // to an entry in the type table, and filter action refers to an entry in the 76 // type index table to encode a set of types to filter. 77 // 78 // Call site table follows LSDA header. Action table immediately follows the 79 // call site table. 80 // 81 // Both types table and type index table start at the same location, but they 82 // grow in opposite directions (types go up, indices go down). The beginning of 83 // these tables is encoded in LSDA header. Sizes for both of the tables are not 84 // included anywhere. 85 // 86 // We have to parse all of the tables to determine their sizes. Then we have 87 // to parse the call site table and associate discovered information with 88 // actual call instructions and landing pad blocks. 89 // 90 // For the purpose of rewriting exception handling tables, we can reuse action, 91 // and type index tables in their original binary format. 92 // 93 // Type table could be encoded using position-independent references, and thus 94 // may require relocation. 95 // 96 // Ideally we should be able to re-write LSDA in-place, without the need to 97 // allocate a new space for it. Sadly there's no guarantee that the new call 98 // site table will be the same size as GCC uses uleb encodings for PC offsets. 99 // 100 // Note: some functions have LSDA entries with 0 call site entries. 101 void BinaryFunction::parseLSDA(ArrayRef<uint8_t> LSDASectionData, 102 uint64_t LSDASectionAddress) { 103 assert(CurrentState == State::Disassembled && "unexpected function state"); 104 105 if (!getLSDAAddress()) 106 return; 107 108 DWARFDataExtractor Data( 109 StringRef(reinterpret_cast<const char *>(LSDASectionData.data()), 110 LSDASectionData.size()), 111 BC.DwCtx->getDWARFObj().isLittleEndian(), 8); 112 uint64_t Offset = getLSDAAddress() - LSDASectionAddress; 113 assert(Data.isValidOffset(Offset) && "wrong LSDA address"); 114 115 uint8_t LPStartEncoding = Data.getU8(&Offset); 116 uint64_t LPStart = 0; 117 // Convert to offset if LPStartEncoding is typed absptr DW_EH_PE_absptr 118 if (Optional<uint64_t> MaybeLPStart = Data.getEncodedPointer( 119 &Offset, LPStartEncoding, Offset + LSDASectionAddress)) 120 LPStart = (LPStartEncoding && 0xFF == 0) ? *MaybeLPStart 121 : *MaybeLPStart - Address; 122 123 const uint8_t TTypeEncoding = Data.getU8(&Offset); 124 size_t TTypeEncodingSize = 0; 125 uintptr_t TTypeEnd = 0; 126 if (TTypeEncoding != DW_EH_PE_omit) { 127 TTypeEnd = Data.getULEB128(&Offset); 128 TTypeEncodingSize = BC.getDWARFEncodingSize(TTypeEncoding); 129 } 130 131 if (opts::PrintExceptions) { 132 outs() << "[LSDA at 0x" << Twine::utohexstr(getLSDAAddress()) 133 << " for function " << *this << "]:\n"; 134 outs() << "LPStart Encoding = 0x" << Twine::utohexstr(LPStartEncoding) 135 << '\n'; 136 outs() << "LPStart = 0x" << Twine::utohexstr(LPStart) << '\n'; 137 outs() << "TType Encoding = 0x" << Twine::utohexstr(TTypeEncoding) << '\n'; 138 outs() << "TType End = " << TTypeEnd << '\n'; 139 } 140 141 // Table to store list of indices in type table. Entries are uleb128 values. 142 const uint64_t TypeIndexTableStart = Offset + TTypeEnd; 143 144 // Offset past the last decoded index. 145 uint64_t MaxTypeIndexTableOffset = 0; 146 147 // Max positive index used in type table. 148 unsigned MaxTypeIndex = 0; 149 150 // The actual type info table starts at the same location, but grows in 151 // opposite direction. TTypeEncoding is used to encode stored values. 152 const uint64_t TypeTableStart = Offset + TTypeEnd; 153 154 uint8_t CallSiteEncoding = Data.getU8(&Offset); 155 uint32_t CallSiteTableLength = Data.getULEB128(&Offset); 156 uint64_t CallSiteTableStart = Offset; 157 uint64_t CallSiteTableEnd = CallSiteTableStart + CallSiteTableLength; 158 uint64_t CallSitePtr = CallSiteTableStart; 159 uint64_t ActionTableStart = CallSiteTableEnd; 160 161 if (opts::PrintExceptions) { 162 outs() << "CallSite Encoding = " << (unsigned)CallSiteEncoding << '\n'; 163 outs() << "CallSite table length = " << CallSiteTableLength << '\n'; 164 outs() << '\n'; 165 } 166 167 this->HasEHRanges = CallSitePtr < CallSiteTableEnd; 168 const uint64_t RangeBase = getAddress(); 169 while (CallSitePtr < CallSiteTableEnd) { 170 uint64_t Start = *Data.getEncodedPointer(&CallSitePtr, CallSiteEncoding, 171 CallSitePtr + LSDASectionAddress); 172 uint64_t Length = *Data.getEncodedPointer(&CallSitePtr, CallSiteEncoding, 173 CallSitePtr + LSDASectionAddress); 174 uint64_t LandingPad = *Data.getEncodedPointer( 175 &CallSitePtr, CallSiteEncoding, CallSitePtr + LSDASectionAddress); 176 uint64_t ActionEntry = Data.getULEB128(&CallSitePtr); 177 178 uint64_t LPOffset = LPStart + LandingPad; 179 uint64_t LPAddress = Address + LPOffset; 180 181 // Verify if landing pad code is located outside current function 182 // Support landing pad to builtin_unreachable 183 if (LPAddress < Address || LPAddress > Address + getSize()) { 184 BinaryFunction *Fragment = 185 BC.getBinaryFunctionContainingAddress(LPAddress); 186 assert(Fragment != nullptr && 187 "BOLT-ERROR: cannot find landing pad fragment"); 188 BC.addInterproceduralReference(this, Fragment->getAddress()); 189 BC.processInterproceduralReferences(); 190 auto isFragmentOf = [](BinaryFunction *Fragment, 191 BinaryFunction *Parent) -> bool { 192 return (Fragment->isFragment() && Fragment->isParentFragment(Parent)); 193 }; 194 assert((isFragmentOf(this, Fragment) || isFragmentOf(Fragment, this)) && 195 "BOLT-ERROR: cannot have landing pads in different " 196 "functions"); 197 setHasIndirectTargetToSplitFragment(true); 198 BC.addFragmentsToSkip(this); 199 return; 200 } 201 202 if (opts::PrintExceptions) { 203 outs() << "Call Site: [0x" << Twine::utohexstr(RangeBase + Start) 204 << ", 0x" << Twine::utohexstr(RangeBase + Start + Length) 205 << "); landing pad: 0x" << Twine::utohexstr(LPOffset) 206 << "; action entry: 0x" << Twine::utohexstr(ActionEntry) << "\n"; 207 outs() << " current offset is " << (CallSitePtr - CallSiteTableStart) 208 << '\n'; 209 } 210 211 // Create a handler entry if necessary. 212 MCSymbol *LPSymbol = nullptr; 213 if (LPOffset) { 214 if (!getInstructionAtOffset(LPOffset)) { 215 if (opts::Verbosity >= 1) 216 errs() << "BOLT-WARNING: landing pad " << Twine::utohexstr(LPOffset) 217 << " not pointing to an instruction in function " << *this 218 << " - ignoring.\n"; 219 } else { 220 auto Label = Labels.find(LPOffset); 221 if (Label != Labels.end()) { 222 LPSymbol = Label->second; 223 } else { 224 LPSymbol = BC.Ctx->createNamedTempSymbol("LP"); 225 Labels[LPOffset] = LPSymbol; 226 } 227 } 228 } 229 230 // Mark all call instructions in the range. 231 auto II = Instructions.find(Start); 232 auto IE = Instructions.end(); 233 assert(II != IE && "exception range not pointing to an instruction"); 234 do { 235 MCInst &Instruction = II->second; 236 if (BC.MIB->isCall(Instruction) && 237 !BC.MIB->getConditionalTailCall(Instruction)) { 238 assert(!BC.MIB->isInvoke(Instruction) && 239 "overlapping exception ranges detected"); 240 // Add extra operands to a call instruction making it an invoke from 241 // now on. 242 BC.MIB->addEHInfo(Instruction, 243 MCPlus::MCLandingPad(LPSymbol, ActionEntry)); 244 } 245 ++II; 246 } while (II != IE && II->first < Start + Length); 247 248 if (ActionEntry != 0) { 249 auto printType = [&](int Index, raw_ostream &OS) { 250 assert(Index > 0 && "only positive indices are valid"); 251 uint64_t TTEntry = TypeTableStart - Index * TTypeEncodingSize; 252 const uint64_t TTEntryAddress = TTEntry + LSDASectionAddress; 253 uint64_t TypeAddress = 254 *Data.getEncodedPointer(&TTEntry, TTypeEncoding, TTEntryAddress); 255 if ((TTypeEncoding & DW_EH_PE_pcrel) && TypeAddress == TTEntryAddress) 256 TypeAddress = 0; 257 if (TypeAddress == 0) { 258 OS << "<all>"; 259 return; 260 } 261 if (TTypeEncoding & DW_EH_PE_indirect) { 262 ErrorOr<uint64_t> PointerOrErr = BC.getPointerAtAddress(TypeAddress); 263 assert(PointerOrErr && "failed to decode indirect address"); 264 TypeAddress = *PointerOrErr; 265 } 266 if (BinaryData *TypeSymBD = BC.getBinaryDataAtAddress(TypeAddress)) 267 OS << TypeSymBD->getName(); 268 else 269 OS << "0x" << Twine::utohexstr(TypeAddress); 270 }; 271 if (opts::PrintExceptions) 272 outs() << " actions: "; 273 uint64_t ActionPtr = ActionTableStart + ActionEntry - 1; 274 int64_t ActionType; 275 int64_t ActionNext; 276 const char *Sep = ""; 277 do { 278 ActionType = Data.getSLEB128(&ActionPtr); 279 const uint32_t Self = ActionPtr; 280 ActionNext = Data.getSLEB128(&ActionPtr); 281 if (opts::PrintExceptions) 282 outs() << Sep << "(" << ActionType << ", " << ActionNext << ") "; 283 if (ActionType == 0) { 284 if (opts::PrintExceptions) 285 outs() << "cleanup"; 286 } else if (ActionType > 0) { 287 // It's an index into a type table. 288 MaxTypeIndex = 289 std::max(MaxTypeIndex, static_cast<unsigned>(ActionType)); 290 if (opts::PrintExceptions) { 291 outs() << "catch type "; 292 printType(ActionType, outs()); 293 } 294 } else { // ActionType < 0 295 if (opts::PrintExceptions) 296 outs() << "filter exception types "; 297 const char *TSep = ""; 298 // ActionType is a negative *byte* offset into *uleb128-encoded* table 299 // of indices with base 1. 300 // E.g. -1 means offset 0, -2 is offset 1, etc. The indices are 301 // encoded using uleb128 thus we cannot directly dereference them. 302 uint64_t TypeIndexTablePtr = TypeIndexTableStart - ActionType - 1; 303 while (uint64_t Index = Data.getULEB128(&TypeIndexTablePtr)) { 304 MaxTypeIndex = std::max(MaxTypeIndex, static_cast<unsigned>(Index)); 305 if (opts::PrintExceptions) { 306 outs() << TSep; 307 printType(Index, outs()); 308 TSep = ", "; 309 } 310 } 311 MaxTypeIndexTableOffset = std::max( 312 MaxTypeIndexTableOffset, TypeIndexTablePtr - TypeIndexTableStart); 313 } 314 315 Sep = "; "; 316 317 ActionPtr = Self + ActionNext; 318 } while (ActionNext); 319 if (opts::PrintExceptions) 320 outs() << '\n'; 321 } 322 } 323 if (opts::PrintExceptions) 324 outs() << '\n'; 325 326 assert(TypeIndexTableStart + MaxTypeIndexTableOffset <= 327 Data.getData().size() && 328 "LSDA entry has crossed section boundary"); 329 330 if (TTypeEnd) { 331 LSDAActionTable = LSDASectionData.slice( 332 ActionTableStart, TypeIndexTableStart - 333 MaxTypeIndex * TTypeEncodingSize - 334 ActionTableStart); 335 for (unsigned Index = 1; Index <= MaxTypeIndex; ++Index) { 336 uint64_t TTEntry = TypeTableStart - Index * TTypeEncodingSize; 337 const uint64_t TTEntryAddress = TTEntry + LSDASectionAddress; 338 uint64_t TypeAddress = 339 *Data.getEncodedPointer(&TTEntry, TTypeEncoding, TTEntryAddress); 340 if ((TTypeEncoding & DW_EH_PE_pcrel) && (TypeAddress == TTEntryAddress)) 341 TypeAddress = 0; 342 if (TTypeEncoding & DW_EH_PE_indirect) { 343 LSDATypeAddressTable.emplace_back(TypeAddress); 344 if (TypeAddress) { 345 ErrorOr<uint64_t> PointerOrErr = BC.getPointerAtAddress(TypeAddress); 346 assert(PointerOrErr && "failed to decode indirect address"); 347 TypeAddress = *PointerOrErr; 348 } 349 } 350 LSDATypeTable.emplace_back(TypeAddress); 351 } 352 LSDATypeIndexTable = 353 LSDASectionData.slice(TypeIndexTableStart, MaxTypeIndexTableOffset); 354 } 355 } 356 357 void BinaryFunction::updateEHRanges() { 358 if (getSize() == 0) 359 return; 360 361 assert(CurrentState == State::CFG_Finalized && "unexpected state"); 362 363 // Build call sites table. 364 struct EHInfo { 365 const MCSymbol *LP; // landing pad 366 uint64_t Action; 367 }; 368 369 // If previous call can throw, this is its exception handler. 370 EHInfo PreviousEH = {nullptr, 0}; 371 372 // Marker for the beginning of exceptions range. 373 const MCSymbol *StartRange = nullptr; 374 375 // Indicates whether the start range is located in a cold part. 376 bool IsStartInCold = false; 377 378 // Have we crossed hot/cold border for split functions? 379 bool SeenCold = false; 380 381 // Sites to update - either regular or cold. 382 CallSitesType *Sites = &CallSites; 383 384 for (BinaryBasicBlock *BB : getLayout().blocks()) { 385 386 if (BB->isCold() && !SeenCold) { 387 SeenCold = true; 388 389 // Close the range (if any) and change the target call sites. 390 if (StartRange) { 391 Sites->emplace_back(CallSite{StartRange, getFunctionEndLabel(), 392 PreviousEH.LP, PreviousEH.Action}); 393 } 394 Sites = &ColdCallSites; 395 396 // Reset the range. 397 StartRange = nullptr; 398 PreviousEH = {nullptr, 0}; 399 } 400 401 for (auto II = BB->begin(); II != BB->end(); ++II) { 402 if (!BC.MIB->isCall(*II)) 403 continue; 404 405 // Instruction can throw an exception that should be handled. 406 const bool Throws = BC.MIB->isInvoke(*II); 407 408 // Ignore the call if it's a continuation of a no-throw gap. 409 if (!Throws && !StartRange) 410 continue; 411 412 // Extract exception handling information from the instruction. 413 const MCSymbol *LP = nullptr; 414 uint64_t Action = 0; 415 if (const Optional<MCPlus::MCLandingPad> EHInfo = BC.MIB->getEHInfo(*II)) 416 std::tie(LP, Action) = *EHInfo; 417 418 // No action if the exception handler has not changed. 419 if (Throws && StartRange && PreviousEH.LP == LP && 420 PreviousEH.Action == Action) 421 continue; 422 423 // Same symbol is used for the beginning and the end of the range. 424 const MCSymbol *EHSymbol; 425 MCInst EHLabel; 426 { 427 std::unique_lock<std::shared_timed_mutex> Lock(BC.CtxMutex); 428 EHSymbol = BC.Ctx->createNamedTempSymbol("EH"); 429 BC.MIB->createEHLabel(EHLabel, EHSymbol, BC.Ctx.get()); 430 } 431 432 II = std::next(BB->insertPseudoInstr(II, EHLabel)); 433 434 // At this point we could be in one of the following states: 435 // 436 // I. Exception handler has changed and we need to close previous range 437 // and start a new one. 438 // 439 // II. Start a new exception range after the gap. 440 // 441 // III. Close current exception range and start a new gap. 442 const MCSymbol *EndRange; 443 if (StartRange) { 444 // I, III: 445 EndRange = EHSymbol; 446 } else { 447 // II: 448 StartRange = EHSymbol; 449 IsStartInCold = SeenCold; 450 EndRange = nullptr; 451 } 452 453 // Close the previous range. 454 if (EndRange) { 455 Sites->emplace_back( 456 CallSite{StartRange, EndRange, PreviousEH.LP, PreviousEH.Action}); 457 } 458 459 if (Throws) { 460 // I, II: 461 StartRange = EHSymbol; 462 IsStartInCold = SeenCold; 463 PreviousEH = EHInfo{LP, Action}; 464 } else { 465 StartRange = nullptr; 466 } 467 } 468 } 469 470 // Check if we need to close the range. 471 if (StartRange) { 472 assert((!isSplit() || Sites == &ColdCallSites) && "sites mismatch"); 473 const MCSymbol *EndRange = 474 IsStartInCold ? getFunctionColdEndLabel() : getFunctionEndLabel(); 475 Sites->emplace_back( 476 CallSite{StartRange, EndRange, PreviousEH.LP, PreviousEH.Action}); 477 } 478 } 479 480 const uint8_t DWARF_CFI_PRIMARY_OPCODE_MASK = 0xc0; 481 482 CFIReaderWriter::CFIReaderWriter(const DWARFDebugFrame &EHFrame) { 483 // Prepare FDEs for fast lookup 484 for (const dwarf::FrameEntry &Entry : EHFrame.entries()) { 485 const auto *CurFDE = dyn_cast<dwarf::FDE>(&Entry); 486 // Skip CIEs. 487 if (!CurFDE) 488 continue; 489 // There could me multiple FDEs with the same initial address, and perhaps 490 // different sizes (address ranges). Use the first entry with non-zero size. 491 auto FDEI = FDEs.lower_bound(CurFDE->getInitialLocation()); 492 if (FDEI != FDEs.end() && FDEI->first == CurFDE->getInitialLocation()) { 493 if (CurFDE->getAddressRange()) { 494 if (FDEI->second->getAddressRange() == 0) { 495 FDEI->second = CurFDE; 496 } else if (opts::Verbosity > 0) { 497 errs() << "BOLT-WARNING: different FDEs for function at 0x" 498 << Twine::utohexstr(FDEI->first) 499 << " detected; sizes: " << FDEI->second->getAddressRange() 500 << " and " << CurFDE->getAddressRange() << '\n'; 501 } 502 } 503 } else { 504 FDEs.emplace_hint(FDEI, CurFDE->getInitialLocation(), CurFDE); 505 } 506 } 507 } 508 509 bool CFIReaderWriter::fillCFIInfoFor(BinaryFunction &Function) const { 510 uint64_t Address = Function.getAddress(); 511 auto I = FDEs.find(Address); 512 // Ignore zero-length FDE ranges. 513 if (I == FDEs.end() || !I->second->getAddressRange()) 514 return true; 515 516 const FDE &CurFDE = *I->second; 517 Optional<uint64_t> LSDA = CurFDE.getLSDAAddress(); 518 Function.setLSDAAddress(LSDA ? *LSDA : 0); 519 520 uint64_t Offset = Function.getFirstInstructionOffset(); 521 uint64_t CodeAlignment = CurFDE.getLinkedCIE()->getCodeAlignmentFactor(); 522 uint64_t DataAlignment = CurFDE.getLinkedCIE()->getDataAlignmentFactor(); 523 if (CurFDE.getLinkedCIE()->getPersonalityAddress()) { 524 Function.setPersonalityFunction( 525 *CurFDE.getLinkedCIE()->getPersonalityAddress()); 526 Function.setPersonalityEncoding( 527 *CurFDE.getLinkedCIE()->getPersonalityEncoding()); 528 } 529 530 auto decodeFrameInstruction = [&Function, &Offset, Address, CodeAlignment, 531 DataAlignment]( 532 const CFIProgram::Instruction &Instr) { 533 uint8_t Opcode = Instr.Opcode; 534 if (Opcode & DWARF_CFI_PRIMARY_OPCODE_MASK) 535 Opcode &= DWARF_CFI_PRIMARY_OPCODE_MASK; 536 switch (Instr.Opcode) { 537 case DW_CFA_nop: 538 break; 539 case DW_CFA_advance_loc4: 540 case DW_CFA_advance_loc2: 541 case DW_CFA_advance_loc1: 542 case DW_CFA_advance_loc: 543 // Advance our current address 544 Offset += CodeAlignment * int64_t(Instr.Ops[0]); 545 break; 546 case DW_CFA_offset_extended_sf: 547 Function.addCFIInstruction( 548 Offset, 549 MCCFIInstruction::createOffset( 550 nullptr, Instr.Ops[0], DataAlignment * int64_t(Instr.Ops[1]))); 551 break; 552 case DW_CFA_offset_extended: 553 case DW_CFA_offset: 554 Function.addCFIInstruction( 555 Offset, MCCFIInstruction::createOffset(nullptr, Instr.Ops[0], 556 DataAlignment * Instr.Ops[1])); 557 break; 558 case DW_CFA_restore_extended: 559 case DW_CFA_restore: 560 Function.addCFIInstruction( 561 Offset, MCCFIInstruction::createRestore(nullptr, Instr.Ops[0])); 562 break; 563 case DW_CFA_set_loc: 564 assert(Instr.Ops[0] >= Address && "set_loc out of function bounds"); 565 assert(Instr.Ops[0] <= Address + Function.getSize() && 566 "set_loc out of function bounds"); 567 Offset = Instr.Ops[0] - Address; 568 break; 569 570 case DW_CFA_undefined: 571 Function.addCFIInstruction( 572 Offset, MCCFIInstruction::createUndefined(nullptr, Instr.Ops[0])); 573 break; 574 case DW_CFA_same_value: 575 Function.addCFIInstruction( 576 Offset, MCCFIInstruction::createSameValue(nullptr, Instr.Ops[0])); 577 break; 578 case DW_CFA_register: 579 Function.addCFIInstruction( 580 Offset, MCCFIInstruction::createRegister(nullptr, Instr.Ops[0], 581 Instr.Ops[1])); 582 break; 583 case DW_CFA_remember_state: 584 Function.addCFIInstruction( 585 Offset, MCCFIInstruction::createRememberState(nullptr)); 586 break; 587 case DW_CFA_restore_state: 588 Function.addCFIInstruction(Offset, 589 MCCFIInstruction::createRestoreState(nullptr)); 590 break; 591 case DW_CFA_def_cfa: 592 Function.addCFIInstruction( 593 Offset, 594 MCCFIInstruction::cfiDefCfa(nullptr, Instr.Ops[0], Instr.Ops[1])); 595 break; 596 case DW_CFA_def_cfa_sf: 597 Function.addCFIInstruction( 598 Offset, 599 MCCFIInstruction::cfiDefCfa(nullptr, Instr.Ops[0], 600 DataAlignment * int64_t(Instr.Ops[1]))); 601 break; 602 case DW_CFA_def_cfa_register: 603 Function.addCFIInstruction(Offset, MCCFIInstruction::createDefCfaRegister( 604 nullptr, Instr.Ops[0])); 605 break; 606 case DW_CFA_def_cfa_offset: 607 Function.addCFIInstruction( 608 Offset, MCCFIInstruction::cfiDefCfaOffset(nullptr, Instr.Ops[0])); 609 break; 610 case DW_CFA_def_cfa_offset_sf: 611 Function.addCFIInstruction( 612 Offset, MCCFIInstruction::cfiDefCfaOffset( 613 nullptr, DataAlignment * int64_t(Instr.Ops[0]))); 614 break; 615 case DW_CFA_GNU_args_size: 616 Function.addCFIInstruction( 617 Offset, MCCFIInstruction::createGnuArgsSize(nullptr, Instr.Ops[0])); 618 Function.setUsesGnuArgsSize(); 619 break; 620 case DW_CFA_val_offset_sf: 621 case DW_CFA_val_offset: 622 if (opts::Verbosity >= 1) { 623 errs() << "BOLT-WARNING: DWARF val_offset() unimplemented\n"; 624 } 625 return false; 626 case DW_CFA_def_cfa_expression: 627 case DW_CFA_val_expression: 628 case DW_CFA_expression: { 629 StringRef ExprBytes = Instr.Expression->getData(); 630 std::string Str; 631 raw_string_ostream OS(Str); 632 // Manually encode this instruction using CFI escape 633 OS << Opcode; 634 if (Opcode != DW_CFA_def_cfa_expression) 635 encodeULEB128(Instr.Ops[0], OS); 636 encodeULEB128(ExprBytes.size(), OS); 637 OS << ExprBytes; 638 Function.addCFIInstruction( 639 Offset, MCCFIInstruction::createEscape(nullptr, OS.str())); 640 break; 641 } 642 case DW_CFA_MIPS_advance_loc8: 643 if (opts::Verbosity >= 1) 644 errs() << "BOLT-WARNING: DW_CFA_MIPS_advance_loc unimplemented\n"; 645 return false; 646 case DW_CFA_GNU_window_save: 647 case DW_CFA_lo_user: 648 case DW_CFA_hi_user: 649 if (opts::Verbosity >= 1) { 650 errs() << "BOLT-WARNING: DW_CFA_GNU_* and DW_CFA_*_user " 651 "unimplemented\n"; 652 } 653 return false; 654 default: 655 if (opts::Verbosity >= 1) { 656 errs() << "BOLT-WARNING: Unrecognized CFI instruction: " << Instr.Opcode 657 << '\n'; 658 } 659 return false; 660 } 661 662 return true; 663 }; 664 665 for (const CFIProgram::Instruction &Instr : CurFDE.getLinkedCIE()->cfis()) 666 if (!decodeFrameInstruction(Instr)) 667 return false; 668 669 for (const CFIProgram::Instruction &Instr : CurFDE.cfis()) 670 if (!decodeFrameInstruction(Instr)) 671 return false; 672 673 return true; 674 } 675 676 std::vector<char> CFIReaderWriter::generateEHFrameHeader( 677 const DWARFDebugFrame &OldEHFrame, const DWARFDebugFrame &NewEHFrame, 678 uint64_t EHFrameHeaderAddress, 679 std::vector<uint64_t> &FailedAddresses) const { 680 // Common PC -> FDE map to be written into .eh_frame_hdr. 681 std::map<uint64_t, uint64_t> PCToFDE; 682 683 // Presort array for binary search. 684 llvm::sort(FailedAddresses); 685 686 // Initialize PCToFDE using NewEHFrame. 687 for (dwarf::FrameEntry &Entry : NewEHFrame.entries()) { 688 const dwarf::FDE *FDE = dyn_cast<dwarf::FDE>(&Entry); 689 if (FDE == nullptr) 690 continue; 691 const uint64_t FuncAddress = FDE->getInitialLocation(); 692 const uint64_t FDEAddress = 693 NewEHFrame.getEHFrameAddress() + FDE->getOffset(); 694 695 // Ignore unused FDEs. 696 if (FuncAddress == 0) 697 continue; 698 699 // Add the address to the map unless we failed to write it. 700 if (!std::binary_search(FailedAddresses.begin(), FailedAddresses.end(), 701 FuncAddress)) { 702 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: FDE for function at 0x" 703 << Twine::utohexstr(FuncAddress) << " is at 0x" 704 << Twine::utohexstr(FDEAddress) << '\n'); 705 PCToFDE[FuncAddress] = FDEAddress; 706 } 707 }; 708 709 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: new .eh_frame contains " 710 << llvm::size(NewEHFrame.entries()) << " entries\n"); 711 712 // Add entries from the original .eh_frame corresponding to the functions 713 // that we did not update. 714 for (const dwarf::FrameEntry &Entry : OldEHFrame) { 715 const dwarf::FDE *FDE = dyn_cast<dwarf::FDE>(&Entry); 716 if (FDE == nullptr) 717 continue; 718 const uint64_t FuncAddress = FDE->getInitialLocation(); 719 const uint64_t FDEAddress = 720 OldEHFrame.getEHFrameAddress() + FDE->getOffset(); 721 722 // Add the address if we failed to write it. 723 if (PCToFDE.count(FuncAddress) == 0) { 724 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: old FDE for function at 0x" 725 << Twine::utohexstr(FuncAddress) << " is at 0x" 726 << Twine::utohexstr(FDEAddress) << '\n'); 727 PCToFDE[FuncAddress] = FDEAddress; 728 } 729 }; 730 731 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: old .eh_frame contains " 732 << llvm::size(OldEHFrame.entries()) << " entries\n"); 733 734 // Generate a new .eh_frame_hdr based on the new map. 735 736 // Header plus table of entries of size 8 bytes. 737 std::vector<char> EHFrameHeader(12 + PCToFDE.size() * 8); 738 739 // Version is 1. 740 EHFrameHeader[0] = 1; 741 // Encoding of the eh_frame pointer. 742 EHFrameHeader[1] = DW_EH_PE_pcrel | DW_EH_PE_sdata4; 743 // Encoding of the count field to follow. 744 EHFrameHeader[2] = DW_EH_PE_udata4; 745 // Encoding of the table entries - 4-byte offset from the start of the header. 746 EHFrameHeader[3] = DW_EH_PE_datarel | DW_EH_PE_sdata4; 747 748 // Address of eh_frame. Use the new one. 749 support::ulittle32_t::ref(EHFrameHeader.data() + 4) = 750 NewEHFrame.getEHFrameAddress() - (EHFrameHeaderAddress + 4); 751 752 // Number of entries in the table (FDE count). 753 support::ulittle32_t::ref(EHFrameHeader.data() + 8) = PCToFDE.size(); 754 755 // Write the table at offset 12. 756 char *Ptr = EHFrameHeader.data(); 757 uint32_t Offset = 12; 758 for (const auto &PCI : PCToFDE) { 759 int64_t InitialPCOffset = PCI.first - EHFrameHeaderAddress; 760 assert(isInt<32>(InitialPCOffset) && "PC offset out of bounds"); 761 support::ulittle32_t::ref(Ptr + Offset) = InitialPCOffset; 762 Offset += 4; 763 int64_t FDEOffset = PCI.second - EHFrameHeaderAddress; 764 assert(isInt<32>(FDEOffset) && "FDE offset out of bounds"); 765 support::ulittle32_t::ref(Ptr + Offset) = FDEOffset; 766 Offset += 4; 767 } 768 769 return EHFrameHeader; 770 } 771 772 Error EHFrameParser::parseCIE(uint64_t StartOffset) { 773 uint8_t Version = Data.getU8(&Offset); 774 const char *Augmentation = Data.getCStr(&Offset); 775 StringRef AugmentationString(Augmentation ? Augmentation : ""); 776 uint8_t AddressSize = 777 Version < 4 ? Data.getAddressSize() : Data.getU8(&Offset); 778 Data.setAddressSize(AddressSize); 779 // Skip segment descriptor size 780 if (Version >= 4) 781 Offset += 1; 782 // Skip code alignment factor 783 Data.getULEB128(&Offset); 784 // Skip data alignment 785 Data.getSLEB128(&Offset); 786 // Skip return address register 787 if (Version == 1) 788 Offset += 1; 789 else 790 Data.getULEB128(&Offset); 791 792 uint32_t FDEPointerEncoding = DW_EH_PE_absptr; 793 uint32_t LSDAPointerEncoding = DW_EH_PE_omit; 794 // Walk the augmentation string to get all the augmentation data. 795 for (unsigned i = 0, e = AugmentationString.size(); i != e; ++i) { 796 switch (AugmentationString[i]) { 797 default: 798 return createStringError( 799 errc::invalid_argument, 800 "unknown augmentation character in entry at 0x%" PRIx64, StartOffset); 801 case 'L': 802 LSDAPointerEncoding = Data.getU8(&Offset); 803 break; 804 case 'P': { 805 uint32_t PersonalityEncoding = Data.getU8(&Offset); 806 Optional<uint64_t> Personality = 807 Data.getEncodedPointer(&Offset, PersonalityEncoding, 808 EHFrameAddress ? EHFrameAddress + Offset : 0); 809 // Patch personality address 810 if (Personality) 811 PatcherCallback(*Personality, Offset, PersonalityEncoding); 812 break; 813 } 814 case 'R': 815 FDEPointerEncoding = Data.getU8(&Offset); 816 break; 817 case 'z': 818 if (i) 819 return createStringError( 820 errc::invalid_argument, 821 "'z' must be the first character at 0x%" PRIx64, StartOffset); 822 // Skip augmentation length 823 Data.getULEB128(&Offset); 824 break; 825 case 'S': 826 case 'B': 827 break; 828 } 829 } 830 Entries.emplace_back(std::make_unique<CIEInfo>( 831 FDEPointerEncoding, LSDAPointerEncoding, AugmentationString)); 832 CIEs[StartOffset] = &*Entries.back(); 833 return Error::success(); 834 } 835 836 Error EHFrameParser::parseFDE(uint64_t CIEPointer, 837 uint64_t StartStructureOffset) { 838 Optional<uint64_t> LSDAAddress; 839 CIEInfo *Cie = CIEs[StartStructureOffset - CIEPointer]; 840 841 // The address size is encoded in the CIE we reference. 842 if (!Cie) 843 return createStringError(errc::invalid_argument, 844 "parsing FDE data at 0x%" PRIx64 845 " failed due to missing CIE", 846 StartStructureOffset); 847 // Patch initial location 848 if (auto Val = Data.getEncodedPointer(&Offset, Cie->FDEPtrEncoding, 849 EHFrameAddress + Offset)) { 850 PatcherCallback(*Val, Offset, Cie->FDEPtrEncoding); 851 } 852 // Skip address range 853 Data.getEncodedPointer(&Offset, Cie->FDEPtrEncoding, 0); 854 855 // Process augmentation data for this FDE. 856 StringRef AugmentationString = Cie->AugmentationString; 857 if (!AugmentationString.empty() && Cie->LSDAPtrEncoding != DW_EH_PE_omit) { 858 // Skip augmentation length 859 Data.getULEB128(&Offset); 860 LSDAAddress = 861 Data.getEncodedPointer(&Offset, Cie->LSDAPtrEncoding, 862 EHFrameAddress ? Offset + EHFrameAddress : 0); 863 // Patch LSDA address 864 PatcherCallback(*LSDAAddress, Offset, Cie->LSDAPtrEncoding); 865 } 866 return Error::success(); 867 } 868 869 Error EHFrameParser::parse() { 870 while (Data.isValidOffset(Offset)) { 871 const uint64_t StartOffset = Offset; 872 873 uint64_t Length; 874 DwarfFormat Format; 875 std::tie(Length, Format) = Data.getInitialLength(&Offset); 876 877 // If the Length is 0, then this CIE is a terminator 878 if (Length == 0) 879 break; 880 881 const uint64_t StartStructureOffset = Offset; 882 const uint64_t EndStructureOffset = Offset + Length; 883 884 Error Err = Error::success(); 885 const uint64_t Id = Data.getRelocatedValue(4, &Offset, 886 /*SectionIndex=*/nullptr, &Err); 887 if (Err) 888 return Err; 889 890 if (!Id) { 891 if (Error Err = parseCIE(StartOffset)) 892 return Err; 893 } else { 894 if (Error Err = parseFDE(Id, StartStructureOffset)) 895 return Err; 896 } 897 Offset = EndStructureOffset; 898 } 899 900 return Error::success(); 901 } 902 903 Error EHFrameParser::parse(DWARFDataExtractor Data, uint64_t EHFrameAddress, 904 PatcherCallbackTy PatcherCallback) { 905 EHFrameParser Parser(Data, EHFrameAddress, PatcherCallback); 906 return Parser.parse(); 907 } 908 909 } // namespace bolt 910 } // namespace llvm 911