1 //===-- Exceptions.cpp - Helpers for processing C++ exceptions ------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Some of the code is taken from examples/ExceptionDemo 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "bolt/Core/Exceptions.h" 14 #include "bolt/Core/BinaryFunction.h" 15 #include "llvm/ADT/ArrayRef.h" 16 #include "llvm/ADT/Twine.h" 17 #include "llvm/BinaryFormat/Dwarf.h" 18 #include "llvm/DebugInfo/DWARF/DWARFDebugFrame.h" 19 #include "llvm/Support/Casting.h" 20 #include "llvm/Support/CommandLine.h" 21 #include "llvm/Support/Debug.h" 22 #include "llvm/Support/LEB128.h" 23 #include "llvm/Support/MathExtras.h" 24 #include "llvm/Support/raw_ostream.h" 25 #include <map> 26 27 #undef DEBUG_TYPE 28 #define DEBUG_TYPE "bolt-exceptions" 29 30 using namespace llvm::dwarf; 31 32 namespace opts { 33 34 extern llvm::cl::OptionCategory BoltCategory; 35 36 extern llvm::cl::opt<unsigned> Verbosity; 37 38 static llvm::cl::opt<bool> 39 PrintExceptions("print-exceptions", 40 llvm::cl::desc("print exception handling data"), 41 llvm::cl::ZeroOrMore, 42 llvm::cl::Hidden, 43 llvm::cl::cat(BoltCategory)); 44 45 } // namespace opts 46 47 namespace llvm { 48 namespace bolt { 49 50 // Read and dump the .gcc_exception_table section entry. 51 // 52 // .gcc_except_table section contains a set of Language-Specific Data Areas - 53 // a fancy name for exception handling tables. There's one LSDA entry per 54 // function. However, we can't actually tell which function LSDA refers to 55 // unless we parse .eh_frame entry that refers to the LSDA. 56 // Then inside LSDA most addresses are encoded relative to the function start, 57 // so we need the function context in order to get to real addresses. 58 // 59 // The best visual representation of the tables comprising LSDA and 60 // relationships between them is illustrated at: 61 // https://github.com/itanium-cxx-abi/cxx-abi/blob/master/exceptions.pdf 62 // Keep in mind that GCC implementation deviates slightly from that document. 63 // 64 // To summarize, there are 4 tables in LSDA: call site table, actions table, 65 // types table, and types index table (for indirection). The main table contains 66 // call site entries. Each call site includes a PC range that can throw an 67 // exception, a handler (landing pad), and a reference to an entry in the action 68 // table. The handler and/or action could be 0. The action entry is a head 69 // of a list of actions associated with a call site. The action table contains 70 // all such lists (it could be optimized to share list tails). Each action could 71 // be either to catch an exception of a given type, to perform a cleanup, or to 72 // propagate the exception after filtering it out (e.g. to make sure function 73 // exception specification is not violated). Catch action contains a reference 74 // to an entry in the type table, and filter action refers to an entry in the 75 // type index table to encode a set of types to filter. 76 // 77 // Call site table follows LSDA header. Action table immediately follows the 78 // call site table. 79 // 80 // Both types table and type index table start at the same location, but they 81 // grow in opposite directions (types go up, indices go down). The beginning of 82 // these tables is encoded in LSDA header. Sizes for both of the tables are not 83 // included anywhere. 84 // 85 // We have to parse all of the tables to determine their sizes. Then we have 86 // to parse the call site table and associate discovered information with 87 // actual call instructions and landing pad blocks. 88 // 89 // For the purpose of rewriting exception handling tables, we can reuse action, 90 // and type index tables in their original binary format. 91 // 92 // Type table could be encoded using position-independent references, and thus 93 // may require relocation. 94 // 95 // Ideally we should be able to re-write LSDA in-place, without the need to 96 // allocate a new space for it. Sadly there's no guarantee that the new call 97 // site table will be the same size as GCC uses uleb encodings for PC offsets. 98 // 99 // Note: some functions have LSDA entries with 0 call site entries. 100 void BinaryFunction::parseLSDA(ArrayRef<uint8_t> LSDASectionData, 101 uint64_t LSDASectionAddress) { 102 assert(CurrentState == State::Disassembled && "unexpected function state"); 103 104 if (!getLSDAAddress()) 105 return; 106 107 DWARFDataExtractor Data( 108 StringRef(reinterpret_cast<const char *>(LSDASectionData.data()), 109 LSDASectionData.size()), 110 BC.DwCtx->getDWARFObj().isLittleEndian(), 8); 111 uint64_t Offset = getLSDAAddress() - LSDASectionAddress; 112 assert(Data.isValidOffset(Offset) && "wrong LSDA address"); 113 114 uint8_t LPStartEncoding = Data.getU8(&Offset); 115 uint64_t LPStart = 0; 116 if (Optional<uint64_t> MaybeLPStart = Data.getEncodedPointer( 117 &Offset, LPStartEncoding, Offset + LSDASectionAddress)) 118 LPStart = *MaybeLPStart; 119 120 assert(LPStart == 0 && "support for split functions not implemented"); 121 122 const uint8_t TTypeEncoding = Data.getU8(&Offset); 123 size_t TTypeEncodingSize = 0; 124 uintptr_t TTypeEnd = 0; 125 if (TTypeEncoding != DW_EH_PE_omit) { 126 TTypeEnd = Data.getULEB128(&Offset); 127 TTypeEncodingSize = BC.getDWARFEncodingSize(TTypeEncoding); 128 } 129 130 if (opts::PrintExceptions) { 131 outs() << "[LSDA at 0x" << Twine::utohexstr(getLSDAAddress()) 132 << " for function " << *this << "]:\n"; 133 outs() << "LPStart Encoding = 0x" 134 << Twine::utohexstr(LPStartEncoding) << '\n'; 135 outs() << "LPStart = 0x" << Twine::utohexstr(LPStart) << '\n'; 136 outs() << "TType Encoding = 0x" << Twine::utohexstr(TTypeEncoding) << '\n'; 137 outs() << "TType End = " << TTypeEnd << '\n'; 138 } 139 140 // Table to store list of indices in type table. Entries are uleb128 values. 141 const uint64_t TypeIndexTableStart = Offset + TTypeEnd; 142 143 // Offset past the last decoded index. 144 uint64_t MaxTypeIndexTableOffset = 0; 145 146 // Max positive index used in type table. 147 unsigned MaxTypeIndex = 0; 148 149 // The actual type info table starts at the same location, but grows in 150 // opposite direction. TTypeEncoding is used to encode stored values. 151 const uint64_t TypeTableStart = Offset + TTypeEnd; 152 153 uint8_t CallSiteEncoding = Data.getU8(&Offset); 154 uint32_t CallSiteTableLength = Data.getULEB128(&Offset); 155 uint64_t CallSiteTableStart = Offset; 156 uint64_t CallSiteTableEnd = CallSiteTableStart + CallSiteTableLength; 157 uint64_t CallSitePtr = CallSiteTableStart; 158 uint64_t ActionTableStart = CallSiteTableEnd; 159 160 if (opts::PrintExceptions) { 161 outs() << "CallSite Encoding = " << (unsigned)CallSiteEncoding << '\n'; 162 outs() << "CallSite table length = " << CallSiteTableLength << '\n'; 163 outs() << '\n'; 164 } 165 166 this->HasEHRanges = CallSitePtr < CallSiteTableEnd; 167 const uint64_t RangeBase = getAddress(); 168 while (CallSitePtr < CallSiteTableEnd) { 169 uint64_t Start = *Data.getEncodedPointer(&CallSitePtr, CallSiteEncoding, 170 CallSitePtr + LSDASectionAddress); 171 uint64_t Length = *Data.getEncodedPointer( 172 &CallSitePtr, CallSiteEncoding, CallSitePtr + LSDASectionAddress); 173 uint64_t LandingPad = *Data.getEncodedPointer( 174 &CallSitePtr, CallSiteEncoding, CallSitePtr + LSDASectionAddress); 175 uint64_t ActionEntry = Data.getULEB128(&CallSitePtr); 176 177 if (opts::PrintExceptions) { 178 outs() << "Call Site: [0x" << Twine::utohexstr(RangeBase + Start) 179 << ", 0x" << Twine::utohexstr(RangeBase + Start + Length) 180 << "); landing pad: 0x" << Twine::utohexstr(LPStart + LandingPad) 181 << "; action entry: 0x" << Twine::utohexstr(ActionEntry) << "\n"; 182 outs() << " current offset is " << (CallSitePtr - CallSiteTableStart) 183 << '\n'; 184 } 185 186 // Create a handler entry if necessary. 187 MCSymbol *LPSymbol = nullptr; 188 if (LandingPad) { 189 if (Instructions.find(LandingPad) == Instructions.end()) { 190 if (opts::Verbosity >= 1) { 191 errs() << "BOLT-WARNING: landing pad " << Twine::utohexstr(LandingPad) 192 << " not pointing to an instruction in function " 193 << *this << " - ignoring.\n"; 194 } 195 } else { 196 auto Label = Labels.find(LandingPad); 197 if (Label != Labels.end()) { 198 LPSymbol = Label->second; 199 } else { 200 LPSymbol = BC.Ctx->createNamedTempSymbol("LP"); 201 Labels[LandingPad] = LPSymbol; 202 } 203 } 204 } 205 206 // Mark all call instructions in the range. 207 auto II = Instructions.find(Start); 208 auto IE = Instructions.end(); 209 assert(II != IE && "exception range not pointing to an instruction"); 210 do { 211 MCInst &Instruction = II->second; 212 if (BC.MIB->isCall(Instruction) && 213 !BC.MIB->getConditionalTailCall(Instruction)) { 214 assert(!BC.MIB->isInvoke(Instruction) && 215 "overlapping exception ranges detected"); 216 // Add extra operands to a call instruction making it an invoke from 217 // now on. 218 BC.MIB->addEHInfo(Instruction, 219 MCPlus::MCLandingPad(LPSymbol, ActionEntry)); 220 } 221 ++II; 222 } while (II != IE && II->first < Start + Length); 223 224 if (ActionEntry != 0) { 225 auto printType = [&](int Index, raw_ostream &OS) { 226 assert(Index > 0 && "only positive indices are valid"); 227 uint64_t TTEntry = TypeTableStart - Index * TTypeEncodingSize; 228 const uint64_t TTEntryAddress = TTEntry + LSDASectionAddress; 229 uint64_t TypeAddress = 230 *Data.getEncodedPointer(&TTEntry, TTypeEncoding, TTEntryAddress); 231 if ((TTypeEncoding & DW_EH_PE_pcrel) && TypeAddress == TTEntryAddress) { 232 TypeAddress = 0; 233 } 234 if (TypeAddress == 0) { 235 OS << "<all>"; 236 return; 237 } 238 if (TTypeEncoding & DW_EH_PE_indirect) { 239 ErrorOr<uint64_t> PointerOrErr = BC.getPointerAtAddress(TypeAddress); 240 assert(PointerOrErr && "failed to decode indirect address"); 241 TypeAddress = *PointerOrErr; 242 } 243 if (BinaryData *TypeSymBD = BC.getBinaryDataAtAddress(TypeAddress)) { 244 OS << TypeSymBD->getName(); 245 } else { 246 OS << "0x" << Twine::utohexstr(TypeAddress); 247 } 248 }; 249 if (opts::PrintExceptions) 250 outs() << " actions: "; 251 uint64_t ActionPtr = ActionTableStart + ActionEntry - 1; 252 int64_t ActionType; 253 int64_t ActionNext; 254 const char *Sep = ""; 255 do { 256 ActionType = Data.getSLEB128(&ActionPtr); 257 const uint32_t Self = ActionPtr; 258 ActionNext = Data.getSLEB128(&ActionPtr); 259 if (opts::PrintExceptions) 260 outs() << Sep << "(" << ActionType << ", " << ActionNext << ") "; 261 if (ActionType == 0) { 262 if (opts::PrintExceptions) 263 outs() << "cleanup"; 264 } else if (ActionType > 0) { 265 // It's an index into a type table. 266 MaxTypeIndex = std::max(MaxTypeIndex, 267 static_cast<unsigned>(ActionType)); 268 if (opts::PrintExceptions) { 269 outs() << "catch type "; 270 printType(ActionType, outs()); 271 } 272 } else { // ActionType < 0 273 if (opts::PrintExceptions) 274 outs() << "filter exception types "; 275 const char *TSep = ""; 276 // ActionType is a negative *byte* offset into *uleb128-encoded* table 277 // of indices with base 1. 278 // E.g. -1 means offset 0, -2 is offset 1, etc. The indices are 279 // encoded using uleb128 thus we cannot directly dereference them. 280 uint64_t TypeIndexTablePtr = TypeIndexTableStart - ActionType - 1; 281 while (uint64_t Index = Data.getULEB128(&TypeIndexTablePtr)) { 282 MaxTypeIndex = std::max(MaxTypeIndex, static_cast<unsigned>(Index)); 283 if (opts::PrintExceptions) { 284 outs() << TSep; 285 printType(Index, outs()); 286 TSep = ", "; 287 } 288 } 289 MaxTypeIndexTableOffset = 290 std::max(MaxTypeIndexTableOffset, 291 TypeIndexTablePtr - TypeIndexTableStart); 292 } 293 294 Sep = "; "; 295 296 ActionPtr = Self + ActionNext; 297 } while (ActionNext); 298 if (opts::PrintExceptions) 299 outs() << '\n'; 300 } 301 } 302 if (opts::PrintExceptions) 303 outs() << '\n'; 304 305 assert(TypeIndexTableStart + MaxTypeIndexTableOffset <= 306 Data.getData().size() && 307 "LSDA entry has crossed section boundary"); 308 309 if (TTypeEnd) { 310 LSDAActionTable = LSDASectionData.slice( 311 ActionTableStart, TypeIndexTableStart - 312 MaxTypeIndex * TTypeEncodingSize - 313 ActionTableStart); 314 for (unsigned Index = 1; Index <= MaxTypeIndex; ++Index) { 315 uint64_t TTEntry = TypeTableStart - Index * TTypeEncodingSize; 316 const uint64_t TTEntryAddress = TTEntry + LSDASectionAddress; 317 uint64_t TypeAddress = 318 *Data.getEncodedPointer(&TTEntry, TTypeEncoding, TTEntryAddress); 319 if ((TTypeEncoding & DW_EH_PE_pcrel) && (TypeAddress == TTEntryAddress)) 320 TypeAddress = 0; 321 if (TTypeEncoding & DW_EH_PE_indirect) { 322 LSDATypeAddressTable.emplace_back(TypeAddress); 323 if (TypeAddress) { 324 ErrorOr<uint64_t> PointerOrErr = BC.getPointerAtAddress(TypeAddress); 325 assert(PointerOrErr && "failed to decode indirect address"); 326 TypeAddress = *PointerOrErr; 327 } 328 } 329 LSDATypeTable.emplace_back(TypeAddress); 330 } 331 LSDATypeIndexTable = 332 LSDASectionData.slice(TypeIndexTableStart, MaxTypeIndexTableOffset); 333 } 334 } 335 336 void BinaryFunction::updateEHRanges() { 337 if (getSize() == 0) 338 return; 339 340 assert(CurrentState == State::CFG_Finalized && "unexpected state"); 341 342 // Build call sites table. 343 struct EHInfo { 344 const MCSymbol *LP; // landing pad 345 uint64_t Action; 346 }; 347 348 // If previous call can throw, this is its exception handler. 349 EHInfo PreviousEH = {nullptr, 0}; 350 351 // Marker for the beginning of exceptions range. 352 const MCSymbol *StartRange = nullptr; 353 354 // Indicates whether the start range is located in a cold part. 355 bool IsStartInCold = false; 356 357 // Have we crossed hot/cold border for split functions? 358 bool SeenCold = false; 359 360 // Sites to update - either regular or cold. 361 CallSitesType *Sites = &CallSites; 362 363 for (BinaryBasicBlock *&BB : BasicBlocksLayout) { 364 365 if (BB->isCold() && !SeenCold) { 366 SeenCold = true; 367 368 // Close the range (if any) and change the target call sites. 369 if (StartRange) { 370 Sites->emplace_back(CallSite{StartRange, getFunctionEndLabel(), 371 PreviousEH.LP, PreviousEH.Action}); 372 } 373 Sites = &ColdCallSites; 374 375 // Reset the range. 376 StartRange = nullptr; 377 PreviousEH = {nullptr, 0}; 378 } 379 380 for (auto II = BB->begin(); II != BB->end(); ++II) { 381 if (!BC.MIB->isCall(*II)) 382 continue; 383 384 // Instruction can throw an exception that should be handled. 385 const bool Throws = BC.MIB->isInvoke(*II); 386 387 // Ignore the call if it's a continuation of a no-throw gap. 388 if (!Throws && !StartRange) 389 continue; 390 391 // Extract exception handling information from the instruction. 392 const MCSymbol *LP = nullptr; 393 uint64_t Action = 0; 394 if (const Optional<MCPlus::MCLandingPad> EHInfo = BC.MIB->getEHInfo(*II)) 395 std::tie(LP, Action) = *EHInfo; 396 397 // No action if the exception handler has not changed. 398 if (Throws && 399 StartRange && 400 PreviousEH.LP == LP && 401 PreviousEH.Action == Action) 402 continue; 403 404 // Same symbol is used for the beginning and the end of the range. 405 const MCSymbol *EHSymbol; 406 MCInst EHLabel; 407 { 408 std::unique_lock<std::shared_timed_mutex> Lock(BC.CtxMutex); 409 EHSymbol = BC.Ctx->createNamedTempSymbol("EH"); 410 BC.MIB->createEHLabel(EHLabel, EHSymbol, BC.Ctx.get()); 411 } 412 413 II = std::next(BB->insertPseudoInstr(II, EHLabel)); 414 415 // At this point we could be in one of the following states: 416 // 417 // I. Exception handler has changed and we need to close previous range 418 // and start a new one. 419 // 420 // II. Start a new exception range after the gap. 421 // 422 // III. Close current exception range and start a new gap. 423 const MCSymbol *EndRange; 424 if (StartRange) { 425 // I, III: 426 EndRange = EHSymbol; 427 } else { 428 // II: 429 StartRange = EHSymbol; 430 IsStartInCold = SeenCold; 431 EndRange = nullptr; 432 } 433 434 // Close the previous range. 435 if (EndRange) { 436 Sites->emplace_back(CallSite{StartRange, EndRange, 437 PreviousEH.LP, PreviousEH.Action}); 438 } 439 440 if (Throws) { 441 // I, II: 442 StartRange = EHSymbol; 443 IsStartInCold = SeenCold; 444 PreviousEH = EHInfo{LP, Action}; 445 } else { 446 StartRange = nullptr; 447 } 448 } 449 } 450 451 // Check if we need to close the range. 452 if (StartRange) { 453 assert((!isSplit() || Sites == &ColdCallSites) && "sites mismatch"); 454 const MCSymbol *EndRange = 455 IsStartInCold ? getFunctionColdEndLabel() : getFunctionEndLabel(); 456 Sites->emplace_back(CallSite{StartRange, EndRange, 457 PreviousEH.LP, PreviousEH.Action}); 458 } 459 } 460 461 const uint8_t DWARF_CFI_PRIMARY_OPCODE_MASK = 0xc0; 462 463 CFIReaderWriter::CFIReaderWriter(const DWARFDebugFrame &EHFrame) { 464 // Prepare FDEs for fast lookup 465 for (const dwarf::FrameEntry &Entry : EHFrame.entries()) { 466 const auto *CurFDE = dyn_cast<dwarf::FDE>(&Entry); 467 // Skip CIEs. 468 if (!CurFDE) 469 continue; 470 // There could me multiple FDEs with the same initial address, and perhaps 471 // different sizes (address ranges). Use the first entry with non-zero size. 472 auto FDEI = FDEs.lower_bound(CurFDE->getInitialLocation()); 473 if (FDEI != FDEs.end() && FDEI->first == CurFDE->getInitialLocation()) { 474 if (CurFDE->getAddressRange()) { 475 if (FDEI->second->getAddressRange() == 0) { 476 FDEI->second = CurFDE; 477 } else if (opts::Verbosity > 0) { 478 errs() << "BOLT-WARNING: different FDEs for function at 0x" 479 << Twine::utohexstr(FDEI->first) 480 << " detected; sizes: " 481 << FDEI->second->getAddressRange() << " and " 482 << CurFDE->getAddressRange() << '\n'; 483 } 484 } 485 } else { 486 FDEs.emplace_hint(FDEI, CurFDE->getInitialLocation(), CurFDE); 487 } 488 } 489 } 490 491 bool CFIReaderWriter::fillCFIInfoFor(BinaryFunction &Function) const { 492 uint64_t Address = Function.getAddress(); 493 auto I = FDEs.find(Address); 494 // Ignore zero-length FDE ranges. 495 if (I == FDEs.end() || !I->second->getAddressRange()) 496 return true; 497 498 const FDE &CurFDE = *I->second; 499 Optional<uint64_t> LSDA = CurFDE.getLSDAAddress(); 500 Function.setLSDAAddress(LSDA ? *LSDA : 0); 501 502 uint64_t Offset = 0; 503 uint64_t CodeAlignment = CurFDE.getLinkedCIE()->getCodeAlignmentFactor(); 504 uint64_t DataAlignment = CurFDE.getLinkedCIE()->getDataAlignmentFactor(); 505 if (CurFDE.getLinkedCIE()->getPersonalityAddress()) { 506 Function.setPersonalityFunction( 507 *CurFDE.getLinkedCIE()->getPersonalityAddress()); 508 Function.setPersonalityEncoding( 509 *CurFDE.getLinkedCIE()->getPersonalityEncoding()); 510 } 511 512 auto decodeFrameInstruction = 513 [&Function, &Offset, Address, CodeAlignment, DataAlignment]( 514 const CFIProgram::Instruction &Instr) { 515 uint8_t Opcode = Instr.Opcode; 516 if (Opcode & DWARF_CFI_PRIMARY_OPCODE_MASK) 517 Opcode &= DWARF_CFI_PRIMARY_OPCODE_MASK; 518 switch (Instr.Opcode) { 519 case DW_CFA_nop: 520 break; 521 case DW_CFA_advance_loc4: 522 case DW_CFA_advance_loc2: 523 case DW_CFA_advance_loc1: 524 case DW_CFA_advance_loc: 525 // Advance our current address 526 Offset += CodeAlignment * int64_t(Instr.Ops[0]); 527 break; 528 case DW_CFA_offset_extended_sf: 529 Function.addCFIInstruction( 530 Offset, MCCFIInstruction::createOffset( 531 nullptr, Instr.Ops[0], 532 DataAlignment * int64_t(Instr.Ops[1]))); 533 break; 534 case DW_CFA_offset_extended: 535 case DW_CFA_offset: 536 Function.addCFIInstruction( 537 Offset, MCCFIInstruction::createOffset( 538 nullptr, Instr.Ops[0], DataAlignment * Instr.Ops[1])); 539 break; 540 case DW_CFA_restore_extended: 541 case DW_CFA_restore: 542 Function.addCFIInstruction( 543 Offset, MCCFIInstruction::createRestore(nullptr, Instr.Ops[0])); 544 break; 545 case DW_CFA_set_loc: 546 assert(Instr.Ops[0] >= Address && "set_loc out of function bounds"); 547 assert(Instr.Ops[0] <= Address + Function.getSize() && 548 "set_loc out of function bounds"); 549 Offset = Instr.Ops[0] - Address; 550 break; 551 552 case DW_CFA_undefined: 553 Function.addCFIInstruction( 554 Offset, MCCFIInstruction::createUndefined(nullptr, Instr.Ops[0])); 555 break; 556 case DW_CFA_same_value: 557 Function.addCFIInstruction( 558 Offset, MCCFIInstruction::createSameValue(nullptr, Instr.Ops[0])); 559 break; 560 case DW_CFA_register: 561 Function.addCFIInstruction( 562 Offset, MCCFIInstruction::createRegister(nullptr, Instr.Ops[0], 563 Instr.Ops[1])); 564 break; 565 case DW_CFA_remember_state: 566 Function.addCFIInstruction( 567 Offset, MCCFIInstruction::createRememberState(nullptr)); 568 break; 569 case DW_CFA_restore_state: 570 Function.addCFIInstruction( 571 Offset, MCCFIInstruction::createRestoreState(nullptr)); 572 break; 573 case DW_CFA_def_cfa: 574 Function.addCFIInstruction( 575 Offset, MCCFIInstruction::cfiDefCfa(nullptr, Instr.Ops[0], 576 Instr.Ops[1])); 577 break; 578 case DW_CFA_def_cfa_sf: 579 Function.addCFIInstruction( 580 Offset, MCCFIInstruction::cfiDefCfa( 581 nullptr, Instr.Ops[0], 582 DataAlignment * int64_t(Instr.Ops[1]))); 583 break; 584 case DW_CFA_def_cfa_register: 585 Function.addCFIInstruction( 586 Offset, 587 MCCFIInstruction::createDefCfaRegister(nullptr, Instr.Ops[0])); 588 break; 589 case DW_CFA_def_cfa_offset: 590 Function.addCFIInstruction( 591 Offset, 592 MCCFIInstruction::cfiDefCfaOffset(nullptr, Instr.Ops[0])); 593 break; 594 case DW_CFA_def_cfa_offset_sf: 595 Function.addCFIInstruction( 596 Offset, MCCFIInstruction::cfiDefCfaOffset( 597 nullptr, DataAlignment * int64_t(Instr.Ops[0]))); 598 break; 599 case DW_CFA_GNU_args_size: 600 Function.addCFIInstruction( 601 Offset, 602 MCCFIInstruction::createGnuArgsSize(nullptr, Instr.Ops[0])); 603 Function.setUsesGnuArgsSize(); 604 break; 605 case DW_CFA_val_offset_sf: 606 case DW_CFA_val_offset: 607 if (opts::Verbosity >= 1) { 608 errs() << "BOLT-WARNING: DWARF val_offset() unimplemented\n"; 609 } 610 return false; 611 case DW_CFA_def_cfa_expression: 612 case DW_CFA_val_expression: 613 case DW_CFA_expression: { 614 StringRef ExprBytes = Instr.Expression->getData(); 615 std::string Str; 616 raw_string_ostream OS(Str); 617 // Manually encode this instruction using CFI escape 618 OS << Opcode; 619 if (Opcode != DW_CFA_def_cfa_expression) { 620 encodeULEB128(Instr.Ops[0], OS); 621 } 622 encodeULEB128(ExprBytes.size(), OS); 623 OS << ExprBytes; 624 Function.addCFIInstruction( 625 Offset, MCCFIInstruction::createEscape(nullptr, OS.str())); 626 break; 627 } 628 case DW_CFA_MIPS_advance_loc8: 629 if (opts::Verbosity >= 1) { 630 errs() << "BOLT-WARNING: DW_CFA_MIPS_advance_loc unimplemented\n"; 631 } 632 return false; 633 case DW_CFA_GNU_window_save: 634 case DW_CFA_lo_user: 635 case DW_CFA_hi_user: 636 if (opts::Verbosity >= 1) { 637 errs() << "BOLT-WARNING: DW_CFA_GNU_* and DW_CFA_*_user " 638 "unimplemented\n"; 639 } 640 return false; 641 default: 642 if (opts::Verbosity >= 1) { 643 errs() << "BOLT-WARNING: Unrecognized CFI instruction: " 644 << Instr.Opcode << '\n'; 645 } 646 return false; 647 } 648 649 return true; 650 }; 651 652 for (const CFIProgram::Instruction &Instr : CurFDE.getLinkedCIE()->cfis()) { 653 if (!decodeFrameInstruction(Instr)) 654 return false; 655 } 656 657 for (const CFIProgram::Instruction &Instr : CurFDE.cfis()) { 658 if (!decodeFrameInstruction(Instr)) 659 return false; 660 } 661 662 return true; 663 } 664 665 std::vector<char> CFIReaderWriter::generateEHFrameHeader( 666 const DWARFDebugFrame &OldEHFrame, 667 const DWARFDebugFrame &NewEHFrame, 668 uint64_t EHFrameHeaderAddress, 669 std::vector<uint64_t> &FailedAddresses) const { 670 // Common PC -> FDE map to be written into .eh_frame_hdr. 671 std::map<uint64_t, uint64_t> PCToFDE; 672 673 // Presort array for binary search. 674 std::sort(FailedAddresses.begin(), FailedAddresses.end()); 675 676 // Initialize PCToFDE using NewEHFrame. 677 for (dwarf::FrameEntry &Entry : NewEHFrame.entries()) { 678 const dwarf::FDE *FDE = dyn_cast<dwarf::FDE>(&Entry); 679 if (FDE == nullptr) 680 continue; 681 const uint64_t FuncAddress = FDE->getInitialLocation(); 682 const uint64_t FDEAddress = 683 NewEHFrame.getEHFrameAddress() + FDE->getOffset(); 684 685 // Ignore unused FDEs. 686 if (FuncAddress == 0) 687 continue; 688 689 // Add the address to the map unless we failed to write it. 690 if (!std::binary_search(FailedAddresses.begin(), FailedAddresses.end(), 691 FuncAddress)) { 692 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: FDE for function at 0x" 693 << Twine::utohexstr(FuncAddress) << " is at 0x" 694 << Twine::utohexstr(FDEAddress) << '\n'); 695 PCToFDE[FuncAddress] = FDEAddress; 696 } 697 }; 698 699 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: new .eh_frame contains " 700 << std::distance(NewEHFrame.entries().begin(), 701 NewEHFrame.entries().end()) 702 << " entries\n"); 703 704 // Add entries from the original .eh_frame corresponding to the functions 705 // that we did not update. 706 for (const dwarf::FrameEntry &Entry : OldEHFrame) { 707 const dwarf::FDE *FDE = dyn_cast<dwarf::FDE>(&Entry); 708 if (FDE == nullptr) 709 continue; 710 const uint64_t FuncAddress = FDE->getInitialLocation(); 711 const uint64_t FDEAddress = 712 OldEHFrame.getEHFrameAddress() + FDE->getOffset(); 713 714 // Add the address if we failed to write it. 715 if (PCToFDE.count(FuncAddress) == 0) { 716 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: old FDE for function at 0x" 717 << Twine::utohexstr(FuncAddress) << " is at 0x" 718 << Twine::utohexstr(FDEAddress) << '\n'); 719 PCToFDE[FuncAddress] = FDEAddress; 720 } 721 }; 722 723 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: old .eh_frame contains " 724 << std::distance(OldEHFrame.entries().begin(), 725 OldEHFrame.entries().end()) 726 << " entries\n"); 727 728 // Generate a new .eh_frame_hdr based on the new map. 729 730 // Header plus table of entries of size 8 bytes. 731 std::vector<char> EHFrameHeader(12 + PCToFDE.size() * 8); 732 733 // Version is 1. 734 EHFrameHeader[0] = 1; 735 // Encoding of the eh_frame pointer. 736 EHFrameHeader[1] = DW_EH_PE_pcrel | DW_EH_PE_sdata4; 737 // Encoding of the count field to follow. 738 EHFrameHeader[2] = DW_EH_PE_udata4; 739 // Encoding of the table entries - 4-byte offset from the start of the header. 740 EHFrameHeader[3] = DW_EH_PE_datarel | DW_EH_PE_sdata4; 741 742 // Address of eh_frame. Use the new one. 743 support::ulittle32_t::ref(EHFrameHeader.data() + 4) = 744 NewEHFrame.getEHFrameAddress() - (EHFrameHeaderAddress + 4); 745 746 // Number of entries in the table (FDE count). 747 support::ulittle32_t::ref(EHFrameHeader.data() + 8) = PCToFDE.size(); 748 749 // Write the table at offset 12. 750 char *Ptr = EHFrameHeader.data(); 751 uint32_t Offset = 12; 752 for (const auto &PCI : PCToFDE) { 753 int64_t InitialPCOffset = PCI.first - EHFrameHeaderAddress; 754 assert(isInt<32>(InitialPCOffset) && "PC offset out of bounds"); 755 support::ulittle32_t::ref(Ptr + Offset) = InitialPCOffset; 756 Offset += 4; 757 int64_t FDEOffset = PCI.second - EHFrameHeaderAddress; 758 assert(isInt<32>(FDEOffset) && "FDE offset out of bounds"); 759 support::ulittle32_t::ref(Ptr + Offset) = FDEOffset; 760 Offset += 4; 761 } 762 763 return EHFrameHeader; 764 } 765 766 Error EHFrameParser::parseCIE(uint64_t StartOffset) { 767 uint8_t Version = Data.getU8(&Offset); 768 const char *Augmentation = Data.getCStr(&Offset); 769 StringRef AugmentationString(Augmentation ? Augmentation : ""); 770 uint8_t AddressSize = 771 Version < 4 ? Data.getAddressSize() : Data.getU8(&Offset); 772 Data.setAddressSize(AddressSize); 773 // Skip segment descriptor size 774 if (Version >= 4) 775 Offset += 1; 776 // Skip code alignment factor 777 Data.getULEB128(&Offset); 778 // Skip data alignment 779 Data.getSLEB128(&Offset); 780 // Skip return address register 781 if (Version == 1) { 782 Offset += 1; 783 } else { 784 Data.getULEB128(&Offset); 785 } 786 787 uint32_t FDEPointerEncoding = DW_EH_PE_absptr; 788 uint32_t LSDAPointerEncoding = DW_EH_PE_omit; 789 // Walk the augmentation string to get all the augmentation data. 790 for (unsigned i = 0, e = AugmentationString.size(); i != e; ++i) { 791 switch (AugmentationString[i]) { 792 default: 793 return createStringError( 794 errc::invalid_argument, 795 "unknown augmentation character in entry at 0x%" PRIx64, StartOffset); 796 case 'L': 797 LSDAPointerEncoding = Data.getU8(&Offset); 798 break; 799 case 'P': { 800 uint32_t PersonalityEncoding = Data.getU8(&Offset); 801 Optional<uint64_t> Personality = 802 Data.getEncodedPointer(&Offset, PersonalityEncoding, 803 EHFrameAddress ? EHFrameAddress + Offset : 0); 804 // Patch personality address 805 if (Personality) 806 PatcherCallback(*Personality, Offset, PersonalityEncoding); 807 break; 808 } 809 case 'R': 810 FDEPointerEncoding = Data.getU8(&Offset); 811 break; 812 case 'z': 813 if (i) 814 return createStringError( 815 errc::invalid_argument, 816 "'z' must be the first character at 0x%" PRIx64, StartOffset); 817 // Skip augmentation length 818 Data.getULEB128(&Offset); 819 break; 820 case 'S': 821 case 'B': 822 break; 823 } 824 } 825 Entries.emplace_back(std::make_unique<CIEInfo>( 826 FDEPointerEncoding, LSDAPointerEncoding, AugmentationString)); 827 CIEs[StartOffset] = &*Entries.back(); 828 return Error::success(); 829 } 830 831 Error EHFrameParser::parseFDE(uint64_t CIEPointer, 832 uint64_t StartStructureOffset) { 833 Optional<uint64_t> LSDAAddress; 834 CIEInfo *Cie = CIEs[StartStructureOffset - CIEPointer]; 835 836 // The address size is encoded in the CIE we reference. 837 if (!Cie) 838 return createStringError(errc::invalid_argument, 839 "parsing FDE data at 0x%" PRIx64 840 " failed due to missing CIE", 841 StartStructureOffset); 842 // Patch initial location 843 if (auto Val = Data.getEncodedPointer(&Offset, Cie->FDEPtrEncoding, 844 EHFrameAddress + Offset)) { 845 PatcherCallback(*Val, Offset, Cie->FDEPtrEncoding); 846 } 847 // Skip address range 848 Data.getEncodedPointer(&Offset, Cie->FDEPtrEncoding, 0); 849 850 // Process augmentation data for this FDE. 851 StringRef AugmentationString = Cie->AugmentationString; 852 if (!AugmentationString.empty() && Cie->LSDAPtrEncoding != DW_EH_PE_omit) { 853 // Skip augmentation length 854 Data.getULEB128(&Offset); 855 LSDAAddress = 856 Data.getEncodedPointer(&Offset, Cie->LSDAPtrEncoding, 857 EHFrameAddress ? Offset + EHFrameAddress : 0); 858 // Patch LSDA address 859 PatcherCallback(*LSDAAddress, Offset, Cie->LSDAPtrEncoding); 860 } 861 return Error::success(); 862 } 863 864 Error EHFrameParser::parse() { 865 while (Data.isValidOffset(Offset)) { 866 const uint64_t StartOffset = Offset; 867 868 uint64_t Length; 869 DwarfFormat Format; 870 std::tie(Length, Format) = Data.getInitialLength(&Offset); 871 872 // If the Length is 0, then this CIE is a terminator 873 if (Length == 0) 874 break; 875 876 const uint64_t StartStructureOffset = Offset; 877 const uint64_t EndStructureOffset = Offset + Length; 878 879 Error Err = Error::success(); 880 const uint64_t Id = Data.getRelocatedValue(4, &Offset, 881 /*SectionIndex=*/nullptr, &Err); 882 if (Err) 883 return Err; 884 885 if (!Id) { 886 if (Error Err = parseCIE(StartOffset)) 887 return Err; 888 } else { 889 if (Error Err = parseFDE(Id, StartStructureOffset)) 890 return Err; 891 } 892 Offset = EndStructureOffset; 893 } 894 895 return Error::success(); 896 } 897 898 Error EHFrameParser::parse(DWARFDataExtractor Data, uint64_t EHFrameAddress, 899 PatcherCallbackTy PatcherCallback) { 900 EHFrameParser Parser(Data, EHFrameAddress, PatcherCallback); 901 return Parser.parse(); 902 } 903 904 } // namespace bolt 905 } // namespace llvm 906