1 //===- bolt/Core/Exceptions.cpp - Helpers for C++ exceptions --------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements functions for handling C++ exception meta data. 10 // 11 // Some of the code is taken from examples/ExceptionDemo 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "bolt/Core/Exceptions.h" 16 #include "bolt/Core/BinaryFunction.h" 17 #include "llvm/ADT/ArrayRef.h" 18 #include "llvm/ADT/Twine.h" 19 #include "llvm/BinaryFormat/Dwarf.h" 20 #include "llvm/DebugInfo/DWARF/DWARFDebugFrame.h" 21 #include "llvm/Support/Casting.h" 22 #include "llvm/Support/CommandLine.h" 23 #include "llvm/Support/Debug.h" 24 #include "llvm/Support/Errc.h" 25 #include "llvm/Support/LEB128.h" 26 #include "llvm/Support/MathExtras.h" 27 #include "llvm/Support/raw_ostream.h" 28 #include <map> 29 30 #undef DEBUG_TYPE 31 #define DEBUG_TYPE "bolt-exceptions" 32 33 using namespace llvm::dwarf; 34 35 namespace opts { 36 37 extern llvm::cl::OptionCategory BoltCategory; 38 39 extern llvm::cl::opt<unsigned> Verbosity; 40 41 static llvm::cl::opt<bool> 42 PrintExceptions("print-exceptions", 43 llvm::cl::desc("print exception handling data"), 44 llvm::cl::ZeroOrMore, 45 llvm::cl::Hidden, 46 llvm::cl::cat(BoltCategory)); 47 48 } // namespace opts 49 50 namespace llvm { 51 namespace bolt { 52 53 // Read and dump the .gcc_exception_table section entry. 54 // 55 // .gcc_except_table section contains a set of Language-Specific Data Areas - 56 // a fancy name for exception handling tables. There's one LSDA entry per 57 // function. However, we can't actually tell which function LSDA refers to 58 // unless we parse .eh_frame entry that refers to the LSDA. 59 // Then inside LSDA most addresses are encoded relative to the function start, 60 // so we need the function context in order to get to real addresses. 61 // 62 // The best visual representation of the tables comprising LSDA and 63 // relationships between them is illustrated at: 64 // https://github.com/itanium-cxx-abi/cxx-abi/blob/master/exceptions.pdf 65 // Keep in mind that GCC implementation deviates slightly from that document. 66 // 67 // To summarize, there are 4 tables in LSDA: call site table, actions table, 68 // types table, and types index table (for indirection). The main table contains 69 // call site entries. Each call site includes a PC range that can throw an 70 // exception, a handler (landing pad), and a reference to an entry in the action 71 // table. The handler and/or action could be 0. The action entry is a head 72 // of a list of actions associated with a call site. The action table contains 73 // all such lists (it could be optimized to share list tails). Each action could 74 // be either to catch an exception of a given type, to perform a cleanup, or to 75 // propagate the exception after filtering it out (e.g. to make sure function 76 // exception specification is not violated). Catch action contains a reference 77 // to an entry in the type table, and filter action refers to an entry in the 78 // type index table to encode a set of types to filter. 79 // 80 // Call site table follows LSDA header. Action table immediately follows the 81 // call site table. 82 // 83 // Both types table and type index table start at the same location, but they 84 // grow in opposite directions (types go up, indices go down). The beginning of 85 // these tables is encoded in LSDA header. Sizes for both of the tables are not 86 // included anywhere. 87 // 88 // We have to parse all of the tables to determine their sizes. Then we have 89 // to parse the call site table and associate discovered information with 90 // actual call instructions and landing pad blocks. 91 // 92 // For the purpose of rewriting exception handling tables, we can reuse action, 93 // and type index tables in their original binary format. 94 // 95 // Type table could be encoded using position-independent references, and thus 96 // may require relocation. 97 // 98 // Ideally we should be able to re-write LSDA in-place, without the need to 99 // allocate a new space for it. Sadly there's no guarantee that the new call 100 // site table will be the same size as GCC uses uleb encodings for PC offsets. 101 // 102 // Note: some functions have LSDA entries with 0 call site entries. 103 void BinaryFunction::parseLSDA(ArrayRef<uint8_t> LSDASectionData, 104 uint64_t LSDASectionAddress) { 105 assert(CurrentState == State::Disassembled && "unexpected function state"); 106 107 if (!getLSDAAddress()) 108 return; 109 110 DWARFDataExtractor Data( 111 StringRef(reinterpret_cast<const char *>(LSDASectionData.data()), 112 LSDASectionData.size()), 113 BC.DwCtx->getDWARFObj().isLittleEndian(), 8); 114 uint64_t Offset = getLSDAAddress() - LSDASectionAddress; 115 assert(Data.isValidOffset(Offset) && "wrong LSDA address"); 116 117 uint8_t LPStartEncoding = Data.getU8(&Offset); 118 uint64_t LPStart = 0; 119 if (Optional<uint64_t> MaybeLPStart = Data.getEncodedPointer( 120 &Offset, LPStartEncoding, Offset + LSDASectionAddress)) 121 LPStart = *MaybeLPStart; 122 123 assert(LPStart == 0 && "support for split functions not implemented"); 124 125 const uint8_t TTypeEncoding = Data.getU8(&Offset); 126 size_t TTypeEncodingSize = 0; 127 uintptr_t TTypeEnd = 0; 128 if (TTypeEncoding != DW_EH_PE_omit) { 129 TTypeEnd = Data.getULEB128(&Offset); 130 TTypeEncodingSize = BC.getDWARFEncodingSize(TTypeEncoding); 131 } 132 133 if (opts::PrintExceptions) { 134 outs() << "[LSDA at 0x" << Twine::utohexstr(getLSDAAddress()) 135 << " for function " << *this << "]:\n"; 136 outs() << "LPStart Encoding = 0x" << Twine::utohexstr(LPStartEncoding) 137 << '\n'; 138 outs() << "LPStart = 0x" << Twine::utohexstr(LPStart) << '\n'; 139 outs() << "TType Encoding = 0x" << Twine::utohexstr(TTypeEncoding) << '\n'; 140 outs() << "TType End = " << TTypeEnd << '\n'; 141 } 142 143 // Table to store list of indices in type table. Entries are uleb128 values. 144 const uint64_t TypeIndexTableStart = Offset + TTypeEnd; 145 146 // Offset past the last decoded index. 147 uint64_t MaxTypeIndexTableOffset = 0; 148 149 // Max positive index used in type table. 150 unsigned MaxTypeIndex = 0; 151 152 // The actual type info table starts at the same location, but grows in 153 // opposite direction. TTypeEncoding is used to encode stored values. 154 const uint64_t TypeTableStart = Offset + TTypeEnd; 155 156 uint8_t CallSiteEncoding = Data.getU8(&Offset); 157 uint32_t CallSiteTableLength = Data.getULEB128(&Offset); 158 uint64_t CallSiteTableStart = Offset; 159 uint64_t CallSiteTableEnd = CallSiteTableStart + CallSiteTableLength; 160 uint64_t CallSitePtr = CallSiteTableStart; 161 uint64_t ActionTableStart = CallSiteTableEnd; 162 163 if (opts::PrintExceptions) { 164 outs() << "CallSite Encoding = " << (unsigned)CallSiteEncoding << '\n'; 165 outs() << "CallSite table length = " << CallSiteTableLength << '\n'; 166 outs() << '\n'; 167 } 168 169 this->HasEHRanges = CallSitePtr < CallSiteTableEnd; 170 const uint64_t RangeBase = getAddress(); 171 while (CallSitePtr < CallSiteTableEnd) { 172 uint64_t Start = *Data.getEncodedPointer(&CallSitePtr, CallSiteEncoding, 173 CallSitePtr + LSDASectionAddress); 174 uint64_t Length = *Data.getEncodedPointer(&CallSitePtr, CallSiteEncoding, 175 CallSitePtr + LSDASectionAddress); 176 uint64_t LandingPad = *Data.getEncodedPointer( 177 &CallSitePtr, CallSiteEncoding, CallSitePtr + LSDASectionAddress); 178 uint64_t ActionEntry = Data.getULEB128(&CallSitePtr); 179 180 if (opts::PrintExceptions) { 181 outs() << "Call Site: [0x" << Twine::utohexstr(RangeBase + Start) 182 << ", 0x" << Twine::utohexstr(RangeBase + Start + Length) 183 << "); landing pad: 0x" << Twine::utohexstr(LPStart + LandingPad) 184 << "; action entry: 0x" << Twine::utohexstr(ActionEntry) << "\n"; 185 outs() << " current offset is " << (CallSitePtr - CallSiteTableStart) 186 << '\n'; 187 } 188 189 // Create a handler entry if necessary. 190 MCSymbol *LPSymbol = nullptr; 191 if (LandingPad) { 192 if (!getInstructionAtOffset(LandingPad)) { 193 if (opts::Verbosity >= 1) 194 errs() << "BOLT-WARNING: landing pad " << Twine::utohexstr(LandingPad) 195 << " not pointing to an instruction in function " << *this 196 << " - ignoring.\n"; 197 } else { 198 auto Label = Labels.find(LandingPad); 199 if (Label != Labels.end()) { 200 LPSymbol = Label->second; 201 } else { 202 LPSymbol = BC.Ctx->createNamedTempSymbol("LP"); 203 Labels[LandingPad] = LPSymbol; 204 } 205 } 206 } 207 208 // Mark all call instructions in the range. 209 auto II = Instructions.find(Start); 210 auto IE = Instructions.end(); 211 assert(II != IE && "exception range not pointing to an instruction"); 212 do { 213 MCInst &Instruction = II->second; 214 if (BC.MIB->isCall(Instruction) && 215 !BC.MIB->getConditionalTailCall(Instruction)) { 216 assert(!BC.MIB->isInvoke(Instruction) && 217 "overlapping exception ranges detected"); 218 // Add extra operands to a call instruction making it an invoke from 219 // now on. 220 BC.MIB->addEHInfo(Instruction, 221 MCPlus::MCLandingPad(LPSymbol, ActionEntry)); 222 } 223 ++II; 224 } while (II != IE && II->first < Start + Length); 225 226 if (ActionEntry != 0) { 227 auto printType = [&](int Index, raw_ostream &OS) { 228 assert(Index > 0 && "only positive indices are valid"); 229 uint64_t TTEntry = TypeTableStart - Index * TTypeEncodingSize; 230 const uint64_t TTEntryAddress = TTEntry + LSDASectionAddress; 231 uint64_t TypeAddress = 232 *Data.getEncodedPointer(&TTEntry, TTypeEncoding, TTEntryAddress); 233 if ((TTypeEncoding & DW_EH_PE_pcrel) && TypeAddress == TTEntryAddress) 234 TypeAddress = 0; 235 if (TypeAddress == 0) { 236 OS << "<all>"; 237 return; 238 } 239 if (TTypeEncoding & DW_EH_PE_indirect) { 240 ErrorOr<uint64_t> PointerOrErr = BC.getPointerAtAddress(TypeAddress); 241 assert(PointerOrErr && "failed to decode indirect address"); 242 TypeAddress = *PointerOrErr; 243 } 244 if (BinaryData *TypeSymBD = BC.getBinaryDataAtAddress(TypeAddress)) 245 OS << TypeSymBD->getName(); 246 else 247 OS << "0x" << Twine::utohexstr(TypeAddress); 248 }; 249 if (opts::PrintExceptions) 250 outs() << " actions: "; 251 uint64_t ActionPtr = ActionTableStart + ActionEntry - 1; 252 int64_t ActionType; 253 int64_t ActionNext; 254 const char *Sep = ""; 255 do { 256 ActionType = Data.getSLEB128(&ActionPtr); 257 const uint32_t Self = ActionPtr; 258 ActionNext = Data.getSLEB128(&ActionPtr); 259 if (opts::PrintExceptions) 260 outs() << Sep << "(" << ActionType << ", " << ActionNext << ") "; 261 if (ActionType == 0) { 262 if (opts::PrintExceptions) 263 outs() << "cleanup"; 264 } else if (ActionType > 0) { 265 // It's an index into a type table. 266 MaxTypeIndex = 267 std::max(MaxTypeIndex, static_cast<unsigned>(ActionType)); 268 if (opts::PrintExceptions) { 269 outs() << "catch type "; 270 printType(ActionType, outs()); 271 } 272 } else { // ActionType < 0 273 if (opts::PrintExceptions) 274 outs() << "filter exception types "; 275 const char *TSep = ""; 276 // ActionType is a negative *byte* offset into *uleb128-encoded* table 277 // of indices with base 1. 278 // E.g. -1 means offset 0, -2 is offset 1, etc. The indices are 279 // encoded using uleb128 thus we cannot directly dereference them. 280 uint64_t TypeIndexTablePtr = TypeIndexTableStart - ActionType - 1; 281 while (uint64_t Index = Data.getULEB128(&TypeIndexTablePtr)) { 282 MaxTypeIndex = std::max(MaxTypeIndex, static_cast<unsigned>(Index)); 283 if (opts::PrintExceptions) { 284 outs() << TSep; 285 printType(Index, outs()); 286 TSep = ", "; 287 } 288 } 289 MaxTypeIndexTableOffset = std::max( 290 MaxTypeIndexTableOffset, TypeIndexTablePtr - TypeIndexTableStart); 291 } 292 293 Sep = "; "; 294 295 ActionPtr = Self + ActionNext; 296 } while (ActionNext); 297 if (opts::PrintExceptions) 298 outs() << '\n'; 299 } 300 } 301 if (opts::PrintExceptions) 302 outs() << '\n'; 303 304 assert(TypeIndexTableStart + MaxTypeIndexTableOffset <= 305 Data.getData().size() && 306 "LSDA entry has crossed section boundary"); 307 308 if (TTypeEnd) { 309 LSDAActionTable = LSDASectionData.slice( 310 ActionTableStart, TypeIndexTableStart - 311 MaxTypeIndex * TTypeEncodingSize - 312 ActionTableStart); 313 for (unsigned Index = 1; Index <= MaxTypeIndex; ++Index) { 314 uint64_t TTEntry = TypeTableStart - Index * TTypeEncodingSize; 315 const uint64_t TTEntryAddress = TTEntry + LSDASectionAddress; 316 uint64_t TypeAddress = 317 *Data.getEncodedPointer(&TTEntry, TTypeEncoding, TTEntryAddress); 318 if ((TTypeEncoding & DW_EH_PE_pcrel) && (TypeAddress == TTEntryAddress)) 319 TypeAddress = 0; 320 if (TTypeEncoding & DW_EH_PE_indirect) { 321 LSDATypeAddressTable.emplace_back(TypeAddress); 322 if (TypeAddress) { 323 ErrorOr<uint64_t> PointerOrErr = BC.getPointerAtAddress(TypeAddress); 324 assert(PointerOrErr && "failed to decode indirect address"); 325 TypeAddress = *PointerOrErr; 326 } 327 } 328 LSDATypeTable.emplace_back(TypeAddress); 329 } 330 LSDATypeIndexTable = 331 LSDASectionData.slice(TypeIndexTableStart, MaxTypeIndexTableOffset); 332 } 333 } 334 335 void BinaryFunction::updateEHRanges() { 336 if (getSize() == 0) 337 return; 338 339 assert(CurrentState == State::CFG_Finalized && "unexpected state"); 340 341 // Build call sites table. 342 struct EHInfo { 343 const MCSymbol *LP; // landing pad 344 uint64_t Action; 345 }; 346 347 // If previous call can throw, this is its exception handler. 348 EHInfo PreviousEH = {nullptr, 0}; 349 350 // Marker for the beginning of exceptions range. 351 const MCSymbol *StartRange = nullptr; 352 353 // Indicates whether the start range is located in a cold part. 354 bool IsStartInCold = false; 355 356 // Have we crossed hot/cold border for split functions? 357 bool SeenCold = false; 358 359 // Sites to update - either regular or cold. 360 CallSitesType *Sites = &CallSites; 361 362 for (BinaryBasicBlock *&BB : BasicBlocksLayout) { 363 364 if (BB->isCold() && !SeenCold) { 365 SeenCold = true; 366 367 // Close the range (if any) and change the target call sites. 368 if (StartRange) { 369 Sites->emplace_back(CallSite{StartRange, getFunctionEndLabel(), 370 PreviousEH.LP, PreviousEH.Action}); 371 } 372 Sites = &ColdCallSites; 373 374 // Reset the range. 375 StartRange = nullptr; 376 PreviousEH = {nullptr, 0}; 377 } 378 379 for (auto II = BB->begin(); II != BB->end(); ++II) { 380 if (!BC.MIB->isCall(*II)) 381 continue; 382 383 // Instruction can throw an exception that should be handled. 384 const bool Throws = BC.MIB->isInvoke(*II); 385 386 // Ignore the call if it's a continuation of a no-throw gap. 387 if (!Throws && !StartRange) 388 continue; 389 390 // Extract exception handling information from the instruction. 391 const MCSymbol *LP = nullptr; 392 uint64_t Action = 0; 393 if (const Optional<MCPlus::MCLandingPad> EHInfo = BC.MIB->getEHInfo(*II)) 394 std::tie(LP, Action) = *EHInfo; 395 396 // No action if the exception handler has not changed. 397 if (Throws && StartRange && PreviousEH.LP == LP && 398 PreviousEH.Action == Action) 399 continue; 400 401 // Same symbol is used for the beginning and the end of the range. 402 const MCSymbol *EHSymbol; 403 MCInst EHLabel; 404 { 405 std::unique_lock<std::shared_timed_mutex> Lock(BC.CtxMutex); 406 EHSymbol = BC.Ctx->createNamedTempSymbol("EH"); 407 BC.MIB->createEHLabel(EHLabel, EHSymbol, BC.Ctx.get()); 408 } 409 410 II = std::next(BB->insertPseudoInstr(II, EHLabel)); 411 412 // At this point we could be in one of the following states: 413 // 414 // I. Exception handler has changed and we need to close previous range 415 // and start a new one. 416 // 417 // II. Start a new exception range after the gap. 418 // 419 // III. Close current exception range and start a new gap. 420 const MCSymbol *EndRange; 421 if (StartRange) { 422 // I, III: 423 EndRange = EHSymbol; 424 } else { 425 // II: 426 StartRange = EHSymbol; 427 IsStartInCold = SeenCold; 428 EndRange = nullptr; 429 } 430 431 // Close the previous range. 432 if (EndRange) { 433 Sites->emplace_back( 434 CallSite{StartRange, EndRange, PreviousEH.LP, PreviousEH.Action}); 435 } 436 437 if (Throws) { 438 // I, II: 439 StartRange = EHSymbol; 440 IsStartInCold = SeenCold; 441 PreviousEH = EHInfo{LP, Action}; 442 } else { 443 StartRange = nullptr; 444 } 445 } 446 } 447 448 // Check if we need to close the range. 449 if (StartRange) { 450 assert((!isSplit() || Sites == &ColdCallSites) && "sites mismatch"); 451 const MCSymbol *EndRange = 452 IsStartInCold ? getFunctionColdEndLabel() : getFunctionEndLabel(); 453 Sites->emplace_back( 454 CallSite{StartRange, EndRange, PreviousEH.LP, PreviousEH.Action}); 455 } 456 } 457 458 const uint8_t DWARF_CFI_PRIMARY_OPCODE_MASK = 0xc0; 459 460 CFIReaderWriter::CFIReaderWriter(const DWARFDebugFrame &EHFrame) { 461 // Prepare FDEs for fast lookup 462 for (const dwarf::FrameEntry &Entry : EHFrame.entries()) { 463 const auto *CurFDE = dyn_cast<dwarf::FDE>(&Entry); 464 // Skip CIEs. 465 if (!CurFDE) 466 continue; 467 // There could me multiple FDEs with the same initial address, and perhaps 468 // different sizes (address ranges). Use the first entry with non-zero size. 469 auto FDEI = FDEs.lower_bound(CurFDE->getInitialLocation()); 470 if (FDEI != FDEs.end() && FDEI->first == CurFDE->getInitialLocation()) { 471 if (CurFDE->getAddressRange()) { 472 if (FDEI->second->getAddressRange() == 0) { 473 FDEI->second = CurFDE; 474 } else if (opts::Verbosity > 0) { 475 errs() << "BOLT-WARNING: different FDEs for function at 0x" 476 << Twine::utohexstr(FDEI->first) 477 << " detected; sizes: " << FDEI->second->getAddressRange() 478 << " and " << CurFDE->getAddressRange() << '\n'; 479 } 480 } 481 } else { 482 FDEs.emplace_hint(FDEI, CurFDE->getInitialLocation(), CurFDE); 483 } 484 } 485 } 486 487 bool CFIReaderWriter::fillCFIInfoFor(BinaryFunction &Function) const { 488 uint64_t Address = Function.getAddress(); 489 auto I = FDEs.find(Address); 490 // Ignore zero-length FDE ranges. 491 if (I == FDEs.end() || !I->second->getAddressRange()) 492 return true; 493 494 const FDE &CurFDE = *I->second; 495 Optional<uint64_t> LSDA = CurFDE.getLSDAAddress(); 496 Function.setLSDAAddress(LSDA ? *LSDA : 0); 497 498 uint64_t Offset = 0; 499 uint64_t CodeAlignment = CurFDE.getLinkedCIE()->getCodeAlignmentFactor(); 500 uint64_t DataAlignment = CurFDE.getLinkedCIE()->getDataAlignmentFactor(); 501 if (CurFDE.getLinkedCIE()->getPersonalityAddress()) { 502 Function.setPersonalityFunction( 503 *CurFDE.getLinkedCIE()->getPersonalityAddress()); 504 Function.setPersonalityEncoding( 505 *CurFDE.getLinkedCIE()->getPersonalityEncoding()); 506 } 507 508 auto decodeFrameInstruction = [&Function, &Offset, Address, CodeAlignment, 509 DataAlignment]( 510 const CFIProgram::Instruction &Instr) { 511 uint8_t Opcode = Instr.Opcode; 512 if (Opcode & DWARF_CFI_PRIMARY_OPCODE_MASK) 513 Opcode &= DWARF_CFI_PRIMARY_OPCODE_MASK; 514 switch (Instr.Opcode) { 515 case DW_CFA_nop: 516 break; 517 case DW_CFA_advance_loc4: 518 case DW_CFA_advance_loc2: 519 case DW_CFA_advance_loc1: 520 case DW_CFA_advance_loc: 521 // Advance our current address 522 Offset += CodeAlignment * int64_t(Instr.Ops[0]); 523 break; 524 case DW_CFA_offset_extended_sf: 525 Function.addCFIInstruction( 526 Offset, 527 MCCFIInstruction::createOffset( 528 nullptr, Instr.Ops[0], DataAlignment * int64_t(Instr.Ops[1]))); 529 break; 530 case DW_CFA_offset_extended: 531 case DW_CFA_offset: 532 Function.addCFIInstruction( 533 Offset, MCCFIInstruction::createOffset(nullptr, Instr.Ops[0], 534 DataAlignment * Instr.Ops[1])); 535 break; 536 case DW_CFA_restore_extended: 537 case DW_CFA_restore: 538 Function.addCFIInstruction( 539 Offset, MCCFIInstruction::createRestore(nullptr, Instr.Ops[0])); 540 break; 541 case DW_CFA_set_loc: 542 assert(Instr.Ops[0] >= Address && "set_loc out of function bounds"); 543 assert(Instr.Ops[0] <= Address + Function.getSize() && 544 "set_loc out of function bounds"); 545 Offset = Instr.Ops[0] - Address; 546 break; 547 548 case DW_CFA_undefined: 549 Function.addCFIInstruction( 550 Offset, MCCFIInstruction::createUndefined(nullptr, Instr.Ops[0])); 551 break; 552 case DW_CFA_same_value: 553 Function.addCFIInstruction( 554 Offset, MCCFIInstruction::createSameValue(nullptr, Instr.Ops[0])); 555 break; 556 case DW_CFA_register: 557 Function.addCFIInstruction( 558 Offset, MCCFIInstruction::createRegister(nullptr, Instr.Ops[0], 559 Instr.Ops[1])); 560 break; 561 case DW_CFA_remember_state: 562 Function.addCFIInstruction( 563 Offset, MCCFIInstruction::createRememberState(nullptr)); 564 break; 565 case DW_CFA_restore_state: 566 Function.addCFIInstruction(Offset, 567 MCCFIInstruction::createRestoreState(nullptr)); 568 break; 569 case DW_CFA_def_cfa: 570 Function.addCFIInstruction( 571 Offset, 572 MCCFIInstruction::cfiDefCfa(nullptr, Instr.Ops[0], Instr.Ops[1])); 573 break; 574 case DW_CFA_def_cfa_sf: 575 Function.addCFIInstruction( 576 Offset, 577 MCCFIInstruction::cfiDefCfa(nullptr, Instr.Ops[0], 578 DataAlignment * int64_t(Instr.Ops[1]))); 579 break; 580 case DW_CFA_def_cfa_register: 581 Function.addCFIInstruction(Offset, MCCFIInstruction::createDefCfaRegister( 582 nullptr, Instr.Ops[0])); 583 break; 584 case DW_CFA_def_cfa_offset: 585 Function.addCFIInstruction( 586 Offset, MCCFIInstruction::cfiDefCfaOffset(nullptr, Instr.Ops[0])); 587 break; 588 case DW_CFA_def_cfa_offset_sf: 589 Function.addCFIInstruction( 590 Offset, MCCFIInstruction::cfiDefCfaOffset( 591 nullptr, DataAlignment * int64_t(Instr.Ops[0]))); 592 break; 593 case DW_CFA_GNU_args_size: 594 Function.addCFIInstruction( 595 Offset, MCCFIInstruction::createGnuArgsSize(nullptr, Instr.Ops[0])); 596 Function.setUsesGnuArgsSize(); 597 break; 598 case DW_CFA_val_offset_sf: 599 case DW_CFA_val_offset: 600 if (opts::Verbosity >= 1) { 601 errs() << "BOLT-WARNING: DWARF val_offset() unimplemented\n"; 602 } 603 return false; 604 case DW_CFA_def_cfa_expression: 605 case DW_CFA_val_expression: 606 case DW_CFA_expression: { 607 StringRef ExprBytes = Instr.Expression->getData(); 608 std::string Str; 609 raw_string_ostream OS(Str); 610 // Manually encode this instruction using CFI escape 611 OS << Opcode; 612 if (Opcode != DW_CFA_def_cfa_expression) 613 encodeULEB128(Instr.Ops[0], OS); 614 encodeULEB128(ExprBytes.size(), OS); 615 OS << ExprBytes; 616 Function.addCFIInstruction( 617 Offset, MCCFIInstruction::createEscape(nullptr, OS.str())); 618 break; 619 } 620 case DW_CFA_MIPS_advance_loc8: 621 if (opts::Verbosity >= 1) 622 errs() << "BOLT-WARNING: DW_CFA_MIPS_advance_loc unimplemented\n"; 623 return false; 624 case DW_CFA_GNU_window_save: 625 case DW_CFA_lo_user: 626 case DW_CFA_hi_user: 627 if (opts::Verbosity >= 1) { 628 errs() << "BOLT-WARNING: DW_CFA_GNU_* and DW_CFA_*_user " 629 "unimplemented\n"; 630 } 631 return false; 632 default: 633 if (opts::Verbosity >= 1) { 634 errs() << "BOLT-WARNING: Unrecognized CFI instruction: " << Instr.Opcode 635 << '\n'; 636 } 637 return false; 638 } 639 640 return true; 641 }; 642 643 for (const CFIProgram::Instruction &Instr : CurFDE.getLinkedCIE()->cfis()) 644 if (!decodeFrameInstruction(Instr)) 645 return false; 646 647 for (const CFIProgram::Instruction &Instr : CurFDE.cfis()) 648 if (!decodeFrameInstruction(Instr)) 649 return false; 650 651 return true; 652 } 653 654 std::vector<char> CFIReaderWriter::generateEHFrameHeader( 655 const DWARFDebugFrame &OldEHFrame, const DWARFDebugFrame &NewEHFrame, 656 uint64_t EHFrameHeaderAddress, 657 std::vector<uint64_t> &FailedAddresses) const { 658 // Common PC -> FDE map to be written into .eh_frame_hdr. 659 std::map<uint64_t, uint64_t> PCToFDE; 660 661 // Presort array for binary search. 662 std::sort(FailedAddresses.begin(), FailedAddresses.end()); 663 664 // Initialize PCToFDE using NewEHFrame. 665 for (dwarf::FrameEntry &Entry : NewEHFrame.entries()) { 666 const dwarf::FDE *FDE = dyn_cast<dwarf::FDE>(&Entry); 667 if (FDE == nullptr) 668 continue; 669 const uint64_t FuncAddress = FDE->getInitialLocation(); 670 const uint64_t FDEAddress = 671 NewEHFrame.getEHFrameAddress() + FDE->getOffset(); 672 673 // Ignore unused FDEs. 674 if (FuncAddress == 0) 675 continue; 676 677 // Add the address to the map unless we failed to write it. 678 if (!std::binary_search(FailedAddresses.begin(), FailedAddresses.end(), 679 FuncAddress)) { 680 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: FDE for function at 0x" 681 << Twine::utohexstr(FuncAddress) << " is at 0x" 682 << Twine::utohexstr(FDEAddress) << '\n'); 683 PCToFDE[FuncAddress] = FDEAddress; 684 } 685 }; 686 687 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: new .eh_frame contains " 688 << std::distance(NewEHFrame.entries().begin(), 689 NewEHFrame.entries().end()) 690 << " entries\n"); 691 692 // Add entries from the original .eh_frame corresponding to the functions 693 // that we did not update. 694 for (const dwarf::FrameEntry &Entry : OldEHFrame) { 695 const dwarf::FDE *FDE = dyn_cast<dwarf::FDE>(&Entry); 696 if (FDE == nullptr) 697 continue; 698 const uint64_t FuncAddress = FDE->getInitialLocation(); 699 const uint64_t FDEAddress = 700 OldEHFrame.getEHFrameAddress() + FDE->getOffset(); 701 702 // Add the address if we failed to write it. 703 if (PCToFDE.count(FuncAddress) == 0) { 704 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: old FDE for function at 0x" 705 << Twine::utohexstr(FuncAddress) << " is at 0x" 706 << Twine::utohexstr(FDEAddress) << '\n'); 707 PCToFDE[FuncAddress] = FDEAddress; 708 } 709 }; 710 711 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: old .eh_frame contains " 712 << std::distance(OldEHFrame.entries().begin(), 713 OldEHFrame.entries().end()) 714 << " entries\n"); 715 716 // Generate a new .eh_frame_hdr based on the new map. 717 718 // Header plus table of entries of size 8 bytes. 719 std::vector<char> EHFrameHeader(12 + PCToFDE.size() * 8); 720 721 // Version is 1. 722 EHFrameHeader[0] = 1; 723 // Encoding of the eh_frame pointer. 724 EHFrameHeader[1] = DW_EH_PE_pcrel | DW_EH_PE_sdata4; 725 // Encoding of the count field to follow. 726 EHFrameHeader[2] = DW_EH_PE_udata4; 727 // Encoding of the table entries - 4-byte offset from the start of the header. 728 EHFrameHeader[3] = DW_EH_PE_datarel | DW_EH_PE_sdata4; 729 730 // Address of eh_frame. Use the new one. 731 support::ulittle32_t::ref(EHFrameHeader.data() + 4) = 732 NewEHFrame.getEHFrameAddress() - (EHFrameHeaderAddress + 4); 733 734 // Number of entries in the table (FDE count). 735 support::ulittle32_t::ref(EHFrameHeader.data() + 8) = PCToFDE.size(); 736 737 // Write the table at offset 12. 738 char *Ptr = EHFrameHeader.data(); 739 uint32_t Offset = 12; 740 for (const auto &PCI : PCToFDE) { 741 int64_t InitialPCOffset = PCI.first - EHFrameHeaderAddress; 742 assert(isInt<32>(InitialPCOffset) && "PC offset out of bounds"); 743 support::ulittle32_t::ref(Ptr + Offset) = InitialPCOffset; 744 Offset += 4; 745 int64_t FDEOffset = PCI.second - EHFrameHeaderAddress; 746 assert(isInt<32>(FDEOffset) && "FDE offset out of bounds"); 747 support::ulittle32_t::ref(Ptr + Offset) = FDEOffset; 748 Offset += 4; 749 } 750 751 return EHFrameHeader; 752 } 753 754 Error EHFrameParser::parseCIE(uint64_t StartOffset) { 755 uint8_t Version = Data.getU8(&Offset); 756 const char *Augmentation = Data.getCStr(&Offset); 757 StringRef AugmentationString(Augmentation ? Augmentation : ""); 758 uint8_t AddressSize = 759 Version < 4 ? Data.getAddressSize() : Data.getU8(&Offset); 760 Data.setAddressSize(AddressSize); 761 // Skip segment descriptor size 762 if (Version >= 4) 763 Offset += 1; 764 // Skip code alignment factor 765 Data.getULEB128(&Offset); 766 // Skip data alignment 767 Data.getSLEB128(&Offset); 768 // Skip return address register 769 if (Version == 1) 770 Offset += 1; 771 else 772 Data.getULEB128(&Offset); 773 774 uint32_t FDEPointerEncoding = DW_EH_PE_absptr; 775 uint32_t LSDAPointerEncoding = DW_EH_PE_omit; 776 // Walk the augmentation string to get all the augmentation data. 777 for (unsigned i = 0, e = AugmentationString.size(); i != e; ++i) { 778 switch (AugmentationString[i]) { 779 default: 780 return createStringError( 781 errc::invalid_argument, 782 "unknown augmentation character in entry at 0x%" PRIx64, StartOffset); 783 case 'L': 784 LSDAPointerEncoding = Data.getU8(&Offset); 785 break; 786 case 'P': { 787 uint32_t PersonalityEncoding = Data.getU8(&Offset); 788 Optional<uint64_t> Personality = 789 Data.getEncodedPointer(&Offset, PersonalityEncoding, 790 EHFrameAddress ? EHFrameAddress + Offset : 0); 791 // Patch personality address 792 if (Personality) 793 PatcherCallback(*Personality, Offset, PersonalityEncoding); 794 break; 795 } 796 case 'R': 797 FDEPointerEncoding = Data.getU8(&Offset); 798 break; 799 case 'z': 800 if (i) 801 return createStringError( 802 errc::invalid_argument, 803 "'z' must be the first character at 0x%" PRIx64, StartOffset); 804 // Skip augmentation length 805 Data.getULEB128(&Offset); 806 break; 807 case 'S': 808 case 'B': 809 break; 810 } 811 } 812 Entries.emplace_back(std::make_unique<CIEInfo>( 813 FDEPointerEncoding, LSDAPointerEncoding, AugmentationString)); 814 CIEs[StartOffset] = &*Entries.back(); 815 return Error::success(); 816 } 817 818 Error EHFrameParser::parseFDE(uint64_t CIEPointer, 819 uint64_t StartStructureOffset) { 820 Optional<uint64_t> LSDAAddress; 821 CIEInfo *Cie = CIEs[StartStructureOffset - CIEPointer]; 822 823 // The address size is encoded in the CIE we reference. 824 if (!Cie) 825 return createStringError(errc::invalid_argument, 826 "parsing FDE data at 0x%" PRIx64 827 " failed due to missing CIE", 828 StartStructureOffset); 829 // Patch initial location 830 if (auto Val = Data.getEncodedPointer(&Offset, Cie->FDEPtrEncoding, 831 EHFrameAddress + Offset)) { 832 PatcherCallback(*Val, Offset, Cie->FDEPtrEncoding); 833 } 834 // Skip address range 835 Data.getEncodedPointer(&Offset, Cie->FDEPtrEncoding, 0); 836 837 // Process augmentation data for this FDE. 838 StringRef AugmentationString = Cie->AugmentationString; 839 if (!AugmentationString.empty() && Cie->LSDAPtrEncoding != DW_EH_PE_omit) { 840 // Skip augmentation length 841 Data.getULEB128(&Offset); 842 LSDAAddress = 843 Data.getEncodedPointer(&Offset, Cie->LSDAPtrEncoding, 844 EHFrameAddress ? Offset + EHFrameAddress : 0); 845 // Patch LSDA address 846 PatcherCallback(*LSDAAddress, Offset, Cie->LSDAPtrEncoding); 847 } 848 return Error::success(); 849 } 850 851 Error EHFrameParser::parse() { 852 while (Data.isValidOffset(Offset)) { 853 const uint64_t StartOffset = Offset; 854 855 uint64_t Length; 856 DwarfFormat Format; 857 std::tie(Length, Format) = Data.getInitialLength(&Offset); 858 859 // If the Length is 0, then this CIE is a terminator 860 if (Length == 0) 861 break; 862 863 const uint64_t StartStructureOffset = Offset; 864 const uint64_t EndStructureOffset = Offset + Length; 865 866 Error Err = Error::success(); 867 const uint64_t Id = Data.getRelocatedValue(4, &Offset, 868 /*SectionIndex=*/nullptr, &Err); 869 if (Err) 870 return Err; 871 872 if (!Id) { 873 if (Error Err = parseCIE(StartOffset)) 874 return Err; 875 } else { 876 if (Error Err = parseFDE(Id, StartStructureOffset)) 877 return Err; 878 } 879 Offset = EndStructureOffset; 880 } 881 882 return Error::success(); 883 } 884 885 Error EHFrameParser::parse(DWARFDataExtractor Data, uint64_t EHFrameAddress, 886 PatcherCallbackTy PatcherCallback) { 887 EHFrameParser Parser(Data, EHFrameAddress, PatcherCallback); 888 return Parser.parse(); 889 } 890 891 } // namespace bolt 892 } // namespace llvm 893