1 //===- bolt/Core/Exceptions.cpp - Helpers for C++ exceptions --------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements functions for handling C++ exception meta data. 10 // 11 // Some of the code is taken from examples/ExceptionDemo 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "bolt/Core/Exceptions.h" 16 #include "bolt/Core/BinaryFunction.h" 17 #include "llvm/ADT/ArrayRef.h" 18 #include "llvm/ADT/Twine.h" 19 #include "llvm/BinaryFormat/Dwarf.h" 20 #include "llvm/DebugInfo/DWARF/DWARFDebugFrame.h" 21 #include "llvm/Support/Casting.h" 22 #include "llvm/Support/CommandLine.h" 23 #include "llvm/Support/Debug.h" 24 #include "llvm/Support/LEB128.h" 25 #include "llvm/Support/MathExtras.h" 26 #include "llvm/Support/raw_ostream.h" 27 #include <map> 28 29 #undef DEBUG_TYPE 30 #define DEBUG_TYPE "bolt-exceptions" 31 32 using namespace llvm::dwarf; 33 34 namespace opts { 35 36 extern llvm::cl::OptionCategory BoltCategory; 37 38 extern llvm::cl::opt<unsigned> Verbosity; 39 40 static llvm::cl::opt<bool> 41 PrintExceptions("print-exceptions", 42 llvm::cl::desc("print exception handling data"), 43 llvm::cl::ZeroOrMore, 44 llvm::cl::Hidden, 45 llvm::cl::cat(BoltCategory)); 46 47 } // namespace opts 48 49 namespace llvm { 50 namespace bolt { 51 52 // Read and dump the .gcc_exception_table section entry. 53 // 54 // .gcc_except_table section contains a set of Language-Specific Data Areas - 55 // a fancy name for exception handling tables. There's one LSDA entry per 56 // function. However, we can't actually tell which function LSDA refers to 57 // unless we parse .eh_frame entry that refers to the LSDA. 58 // Then inside LSDA most addresses are encoded relative to the function start, 59 // so we need the function context in order to get to real addresses. 60 // 61 // The best visual representation of the tables comprising LSDA and 62 // relationships between them is illustrated at: 63 // https://github.com/itanium-cxx-abi/cxx-abi/blob/master/exceptions.pdf 64 // Keep in mind that GCC implementation deviates slightly from that document. 65 // 66 // To summarize, there are 4 tables in LSDA: call site table, actions table, 67 // types table, and types index table (for indirection). The main table contains 68 // call site entries. Each call site includes a PC range that can throw an 69 // exception, a handler (landing pad), and a reference to an entry in the action 70 // table. The handler and/or action could be 0. The action entry is a head 71 // of a list of actions associated with a call site. The action table contains 72 // all such lists (it could be optimized to share list tails). Each action could 73 // be either to catch an exception of a given type, to perform a cleanup, or to 74 // propagate the exception after filtering it out (e.g. to make sure function 75 // exception specification is not violated). Catch action contains a reference 76 // to an entry in the type table, and filter action refers to an entry in the 77 // type index table to encode a set of types to filter. 78 // 79 // Call site table follows LSDA header. Action table immediately follows the 80 // call site table. 81 // 82 // Both types table and type index table start at the same location, but they 83 // grow in opposite directions (types go up, indices go down). The beginning of 84 // these tables is encoded in LSDA header. Sizes for both of the tables are not 85 // included anywhere. 86 // 87 // We have to parse all of the tables to determine their sizes. Then we have 88 // to parse the call site table and associate discovered information with 89 // actual call instructions and landing pad blocks. 90 // 91 // For the purpose of rewriting exception handling tables, we can reuse action, 92 // and type index tables in their original binary format. 93 // 94 // Type table could be encoded using position-independent references, and thus 95 // may require relocation. 96 // 97 // Ideally we should be able to re-write LSDA in-place, without the need to 98 // allocate a new space for it. Sadly there's no guarantee that the new call 99 // site table will be the same size as GCC uses uleb encodings for PC offsets. 100 // 101 // Note: some functions have LSDA entries with 0 call site entries. 102 void BinaryFunction::parseLSDA(ArrayRef<uint8_t> LSDASectionData, 103 uint64_t LSDASectionAddress) { 104 assert(CurrentState == State::Disassembled && "unexpected function state"); 105 106 if (!getLSDAAddress()) 107 return; 108 109 DWARFDataExtractor Data( 110 StringRef(reinterpret_cast<const char *>(LSDASectionData.data()), 111 LSDASectionData.size()), 112 BC.DwCtx->getDWARFObj().isLittleEndian(), 8); 113 uint64_t Offset = getLSDAAddress() - LSDASectionAddress; 114 assert(Data.isValidOffset(Offset) && "wrong LSDA address"); 115 116 uint8_t LPStartEncoding = Data.getU8(&Offset); 117 uint64_t LPStart = 0; 118 if (Optional<uint64_t> MaybeLPStart = Data.getEncodedPointer( 119 &Offset, LPStartEncoding, Offset + LSDASectionAddress)) 120 LPStart = *MaybeLPStart; 121 122 assert(LPStart == 0 && "support for split functions not implemented"); 123 124 const uint8_t TTypeEncoding = Data.getU8(&Offset); 125 size_t TTypeEncodingSize = 0; 126 uintptr_t TTypeEnd = 0; 127 if (TTypeEncoding != DW_EH_PE_omit) { 128 TTypeEnd = Data.getULEB128(&Offset); 129 TTypeEncodingSize = BC.getDWARFEncodingSize(TTypeEncoding); 130 } 131 132 if (opts::PrintExceptions) { 133 outs() << "[LSDA at 0x" << Twine::utohexstr(getLSDAAddress()) 134 << " for function " << *this << "]:\n"; 135 outs() << "LPStart Encoding = 0x" << Twine::utohexstr(LPStartEncoding) 136 << '\n'; 137 outs() << "LPStart = 0x" << Twine::utohexstr(LPStart) << '\n'; 138 outs() << "TType Encoding = 0x" << Twine::utohexstr(TTypeEncoding) << '\n'; 139 outs() << "TType End = " << TTypeEnd << '\n'; 140 } 141 142 // Table to store list of indices in type table. Entries are uleb128 values. 143 const uint64_t TypeIndexTableStart = Offset + TTypeEnd; 144 145 // Offset past the last decoded index. 146 uint64_t MaxTypeIndexTableOffset = 0; 147 148 // Max positive index used in type table. 149 unsigned MaxTypeIndex = 0; 150 151 // The actual type info table starts at the same location, but grows in 152 // opposite direction. TTypeEncoding is used to encode stored values. 153 const uint64_t TypeTableStart = Offset + TTypeEnd; 154 155 uint8_t CallSiteEncoding = Data.getU8(&Offset); 156 uint32_t CallSiteTableLength = Data.getULEB128(&Offset); 157 uint64_t CallSiteTableStart = Offset; 158 uint64_t CallSiteTableEnd = CallSiteTableStart + CallSiteTableLength; 159 uint64_t CallSitePtr = CallSiteTableStart; 160 uint64_t ActionTableStart = CallSiteTableEnd; 161 162 if (opts::PrintExceptions) { 163 outs() << "CallSite Encoding = " << (unsigned)CallSiteEncoding << '\n'; 164 outs() << "CallSite table length = " << CallSiteTableLength << '\n'; 165 outs() << '\n'; 166 } 167 168 this->HasEHRanges = CallSitePtr < CallSiteTableEnd; 169 const uint64_t RangeBase = getAddress(); 170 while (CallSitePtr < CallSiteTableEnd) { 171 uint64_t Start = *Data.getEncodedPointer(&CallSitePtr, CallSiteEncoding, 172 CallSitePtr + LSDASectionAddress); 173 uint64_t Length = *Data.getEncodedPointer(&CallSitePtr, CallSiteEncoding, 174 CallSitePtr + LSDASectionAddress); 175 uint64_t LandingPad = *Data.getEncodedPointer( 176 &CallSitePtr, CallSiteEncoding, CallSitePtr + LSDASectionAddress); 177 uint64_t ActionEntry = Data.getULEB128(&CallSitePtr); 178 179 if (opts::PrintExceptions) { 180 outs() << "Call Site: [0x" << Twine::utohexstr(RangeBase + Start) 181 << ", 0x" << Twine::utohexstr(RangeBase + Start + Length) 182 << "); landing pad: 0x" << Twine::utohexstr(LPStart + LandingPad) 183 << "; action entry: 0x" << Twine::utohexstr(ActionEntry) << "\n"; 184 outs() << " current offset is " << (CallSitePtr - CallSiteTableStart) 185 << '\n'; 186 } 187 188 // Create a handler entry if necessary. 189 MCSymbol *LPSymbol = nullptr; 190 if (LandingPad) { 191 if (!getInstructionAtOffset(LandingPad)) { 192 if (opts::Verbosity >= 1) 193 errs() << "BOLT-WARNING: landing pad " << Twine::utohexstr(LandingPad) 194 << " not pointing to an instruction in function " << *this 195 << " - ignoring.\n"; 196 } else { 197 auto Label = Labels.find(LandingPad); 198 if (Label != Labels.end()) { 199 LPSymbol = Label->second; 200 } else { 201 LPSymbol = BC.Ctx->createNamedTempSymbol("LP"); 202 Labels[LandingPad] = LPSymbol; 203 } 204 } 205 } 206 207 // Mark all call instructions in the range. 208 auto II = Instructions.find(Start); 209 auto IE = Instructions.end(); 210 assert(II != IE && "exception range not pointing to an instruction"); 211 do { 212 MCInst &Instruction = II->second; 213 if (BC.MIB->isCall(Instruction) && 214 !BC.MIB->getConditionalTailCall(Instruction)) { 215 assert(!BC.MIB->isInvoke(Instruction) && 216 "overlapping exception ranges detected"); 217 // Add extra operands to a call instruction making it an invoke from 218 // now on. 219 BC.MIB->addEHInfo(Instruction, 220 MCPlus::MCLandingPad(LPSymbol, ActionEntry)); 221 } 222 ++II; 223 } while (II != IE && II->first < Start + Length); 224 225 if (ActionEntry != 0) { 226 auto printType = [&](int Index, raw_ostream &OS) { 227 assert(Index > 0 && "only positive indices are valid"); 228 uint64_t TTEntry = TypeTableStart - Index * TTypeEncodingSize; 229 const uint64_t TTEntryAddress = TTEntry + LSDASectionAddress; 230 uint64_t TypeAddress = 231 *Data.getEncodedPointer(&TTEntry, TTypeEncoding, TTEntryAddress); 232 if ((TTypeEncoding & DW_EH_PE_pcrel) && TypeAddress == TTEntryAddress) { 233 TypeAddress = 0; 234 } 235 if (TypeAddress == 0) { 236 OS << "<all>"; 237 return; 238 } 239 if (TTypeEncoding & DW_EH_PE_indirect) { 240 ErrorOr<uint64_t> PointerOrErr = BC.getPointerAtAddress(TypeAddress); 241 assert(PointerOrErr && "failed to decode indirect address"); 242 TypeAddress = *PointerOrErr; 243 } 244 if (BinaryData *TypeSymBD = BC.getBinaryDataAtAddress(TypeAddress)) { 245 OS << TypeSymBD->getName(); 246 } else { 247 OS << "0x" << Twine::utohexstr(TypeAddress); 248 } 249 }; 250 if (opts::PrintExceptions) 251 outs() << " actions: "; 252 uint64_t ActionPtr = ActionTableStart + ActionEntry - 1; 253 int64_t ActionType; 254 int64_t ActionNext; 255 const char *Sep = ""; 256 do { 257 ActionType = Data.getSLEB128(&ActionPtr); 258 const uint32_t Self = ActionPtr; 259 ActionNext = Data.getSLEB128(&ActionPtr); 260 if (opts::PrintExceptions) 261 outs() << Sep << "(" << ActionType << ", " << ActionNext << ") "; 262 if (ActionType == 0) { 263 if (opts::PrintExceptions) 264 outs() << "cleanup"; 265 } else if (ActionType > 0) { 266 // It's an index into a type table. 267 MaxTypeIndex = 268 std::max(MaxTypeIndex, static_cast<unsigned>(ActionType)); 269 if (opts::PrintExceptions) { 270 outs() << "catch type "; 271 printType(ActionType, outs()); 272 } 273 } else { // ActionType < 0 274 if (opts::PrintExceptions) 275 outs() << "filter exception types "; 276 const char *TSep = ""; 277 // ActionType is a negative *byte* offset into *uleb128-encoded* table 278 // of indices with base 1. 279 // E.g. -1 means offset 0, -2 is offset 1, etc. The indices are 280 // encoded using uleb128 thus we cannot directly dereference them. 281 uint64_t TypeIndexTablePtr = TypeIndexTableStart - ActionType - 1; 282 while (uint64_t Index = Data.getULEB128(&TypeIndexTablePtr)) { 283 MaxTypeIndex = std::max(MaxTypeIndex, static_cast<unsigned>(Index)); 284 if (opts::PrintExceptions) { 285 outs() << TSep; 286 printType(Index, outs()); 287 TSep = ", "; 288 } 289 } 290 MaxTypeIndexTableOffset = std::max( 291 MaxTypeIndexTableOffset, TypeIndexTablePtr - TypeIndexTableStart); 292 } 293 294 Sep = "; "; 295 296 ActionPtr = Self + ActionNext; 297 } while (ActionNext); 298 if (opts::PrintExceptions) 299 outs() << '\n'; 300 } 301 } 302 if (opts::PrintExceptions) 303 outs() << '\n'; 304 305 assert(TypeIndexTableStart + MaxTypeIndexTableOffset <= 306 Data.getData().size() && 307 "LSDA entry has crossed section boundary"); 308 309 if (TTypeEnd) { 310 LSDAActionTable = LSDASectionData.slice( 311 ActionTableStart, TypeIndexTableStart - 312 MaxTypeIndex * TTypeEncodingSize - 313 ActionTableStart); 314 for (unsigned Index = 1; Index <= MaxTypeIndex; ++Index) { 315 uint64_t TTEntry = TypeTableStart - Index * TTypeEncodingSize; 316 const uint64_t TTEntryAddress = TTEntry + LSDASectionAddress; 317 uint64_t TypeAddress = 318 *Data.getEncodedPointer(&TTEntry, TTypeEncoding, TTEntryAddress); 319 if ((TTypeEncoding & DW_EH_PE_pcrel) && (TypeAddress == TTEntryAddress)) 320 TypeAddress = 0; 321 if (TTypeEncoding & DW_EH_PE_indirect) { 322 LSDATypeAddressTable.emplace_back(TypeAddress); 323 if (TypeAddress) { 324 ErrorOr<uint64_t> PointerOrErr = BC.getPointerAtAddress(TypeAddress); 325 assert(PointerOrErr && "failed to decode indirect address"); 326 TypeAddress = *PointerOrErr; 327 } 328 } 329 LSDATypeTable.emplace_back(TypeAddress); 330 } 331 LSDATypeIndexTable = 332 LSDASectionData.slice(TypeIndexTableStart, MaxTypeIndexTableOffset); 333 } 334 } 335 336 void BinaryFunction::updateEHRanges() { 337 if (getSize() == 0) 338 return; 339 340 assert(CurrentState == State::CFG_Finalized && "unexpected state"); 341 342 // Build call sites table. 343 struct EHInfo { 344 const MCSymbol *LP; // landing pad 345 uint64_t Action; 346 }; 347 348 // If previous call can throw, this is its exception handler. 349 EHInfo PreviousEH = {nullptr, 0}; 350 351 // Marker for the beginning of exceptions range. 352 const MCSymbol *StartRange = nullptr; 353 354 // Indicates whether the start range is located in a cold part. 355 bool IsStartInCold = false; 356 357 // Have we crossed hot/cold border for split functions? 358 bool SeenCold = false; 359 360 // Sites to update - either regular or cold. 361 CallSitesType *Sites = &CallSites; 362 363 for (BinaryBasicBlock *&BB : BasicBlocksLayout) { 364 365 if (BB->isCold() && !SeenCold) { 366 SeenCold = true; 367 368 // Close the range (if any) and change the target call sites. 369 if (StartRange) { 370 Sites->emplace_back(CallSite{StartRange, getFunctionEndLabel(), 371 PreviousEH.LP, PreviousEH.Action}); 372 } 373 Sites = &ColdCallSites; 374 375 // Reset the range. 376 StartRange = nullptr; 377 PreviousEH = {nullptr, 0}; 378 } 379 380 for (auto II = BB->begin(); II != BB->end(); ++II) { 381 if (!BC.MIB->isCall(*II)) 382 continue; 383 384 // Instruction can throw an exception that should be handled. 385 const bool Throws = BC.MIB->isInvoke(*II); 386 387 // Ignore the call if it's a continuation of a no-throw gap. 388 if (!Throws && !StartRange) 389 continue; 390 391 // Extract exception handling information from the instruction. 392 const MCSymbol *LP = nullptr; 393 uint64_t Action = 0; 394 if (const Optional<MCPlus::MCLandingPad> EHInfo = BC.MIB->getEHInfo(*II)) 395 std::tie(LP, Action) = *EHInfo; 396 397 // No action if the exception handler has not changed. 398 if (Throws && StartRange && PreviousEH.LP == LP && 399 PreviousEH.Action == Action) 400 continue; 401 402 // Same symbol is used for the beginning and the end of the range. 403 const MCSymbol *EHSymbol; 404 MCInst EHLabel; 405 { 406 std::unique_lock<std::shared_timed_mutex> Lock(BC.CtxMutex); 407 EHSymbol = BC.Ctx->createNamedTempSymbol("EH"); 408 BC.MIB->createEHLabel(EHLabel, EHSymbol, BC.Ctx.get()); 409 } 410 411 II = std::next(BB->insertPseudoInstr(II, EHLabel)); 412 413 // At this point we could be in one of the following states: 414 // 415 // I. Exception handler has changed and we need to close previous range 416 // and start a new one. 417 // 418 // II. Start a new exception range after the gap. 419 // 420 // III. Close current exception range and start a new gap. 421 const MCSymbol *EndRange; 422 if (StartRange) { 423 // I, III: 424 EndRange = EHSymbol; 425 } else { 426 // II: 427 StartRange = EHSymbol; 428 IsStartInCold = SeenCold; 429 EndRange = nullptr; 430 } 431 432 // Close the previous range. 433 if (EndRange) { 434 Sites->emplace_back( 435 CallSite{StartRange, EndRange, PreviousEH.LP, PreviousEH.Action}); 436 } 437 438 if (Throws) { 439 // I, II: 440 StartRange = EHSymbol; 441 IsStartInCold = SeenCold; 442 PreviousEH = EHInfo{LP, Action}; 443 } else { 444 StartRange = nullptr; 445 } 446 } 447 } 448 449 // Check if we need to close the range. 450 if (StartRange) { 451 assert((!isSplit() || Sites == &ColdCallSites) && "sites mismatch"); 452 const MCSymbol *EndRange = 453 IsStartInCold ? getFunctionColdEndLabel() : getFunctionEndLabel(); 454 Sites->emplace_back( 455 CallSite{StartRange, EndRange, PreviousEH.LP, PreviousEH.Action}); 456 } 457 } 458 459 const uint8_t DWARF_CFI_PRIMARY_OPCODE_MASK = 0xc0; 460 461 CFIReaderWriter::CFIReaderWriter(const DWARFDebugFrame &EHFrame) { 462 // Prepare FDEs for fast lookup 463 for (const dwarf::FrameEntry &Entry : EHFrame.entries()) { 464 const auto *CurFDE = dyn_cast<dwarf::FDE>(&Entry); 465 // Skip CIEs. 466 if (!CurFDE) 467 continue; 468 // There could me multiple FDEs with the same initial address, and perhaps 469 // different sizes (address ranges). Use the first entry with non-zero size. 470 auto FDEI = FDEs.lower_bound(CurFDE->getInitialLocation()); 471 if (FDEI != FDEs.end() && FDEI->first == CurFDE->getInitialLocation()) { 472 if (CurFDE->getAddressRange()) { 473 if (FDEI->second->getAddressRange() == 0) { 474 FDEI->second = CurFDE; 475 } else if (opts::Verbosity > 0) { 476 errs() << "BOLT-WARNING: different FDEs for function at 0x" 477 << Twine::utohexstr(FDEI->first) 478 << " detected; sizes: " << FDEI->second->getAddressRange() 479 << " and " << CurFDE->getAddressRange() << '\n'; 480 } 481 } 482 } else { 483 FDEs.emplace_hint(FDEI, CurFDE->getInitialLocation(), CurFDE); 484 } 485 } 486 } 487 488 bool CFIReaderWriter::fillCFIInfoFor(BinaryFunction &Function) const { 489 uint64_t Address = Function.getAddress(); 490 auto I = FDEs.find(Address); 491 // Ignore zero-length FDE ranges. 492 if (I == FDEs.end() || !I->second->getAddressRange()) 493 return true; 494 495 const FDE &CurFDE = *I->second; 496 Optional<uint64_t> LSDA = CurFDE.getLSDAAddress(); 497 Function.setLSDAAddress(LSDA ? *LSDA : 0); 498 499 uint64_t Offset = 0; 500 uint64_t CodeAlignment = CurFDE.getLinkedCIE()->getCodeAlignmentFactor(); 501 uint64_t DataAlignment = CurFDE.getLinkedCIE()->getDataAlignmentFactor(); 502 if (CurFDE.getLinkedCIE()->getPersonalityAddress()) { 503 Function.setPersonalityFunction( 504 *CurFDE.getLinkedCIE()->getPersonalityAddress()); 505 Function.setPersonalityEncoding( 506 *CurFDE.getLinkedCIE()->getPersonalityEncoding()); 507 } 508 509 auto decodeFrameInstruction = [&Function, &Offset, Address, CodeAlignment, 510 DataAlignment]( 511 const CFIProgram::Instruction &Instr) { 512 uint8_t Opcode = Instr.Opcode; 513 if (Opcode & DWARF_CFI_PRIMARY_OPCODE_MASK) 514 Opcode &= DWARF_CFI_PRIMARY_OPCODE_MASK; 515 switch (Instr.Opcode) { 516 case DW_CFA_nop: 517 break; 518 case DW_CFA_advance_loc4: 519 case DW_CFA_advance_loc2: 520 case DW_CFA_advance_loc1: 521 case DW_CFA_advance_loc: 522 // Advance our current address 523 Offset += CodeAlignment * int64_t(Instr.Ops[0]); 524 break; 525 case DW_CFA_offset_extended_sf: 526 Function.addCFIInstruction( 527 Offset, 528 MCCFIInstruction::createOffset( 529 nullptr, Instr.Ops[0], DataAlignment * int64_t(Instr.Ops[1]))); 530 break; 531 case DW_CFA_offset_extended: 532 case DW_CFA_offset: 533 Function.addCFIInstruction( 534 Offset, MCCFIInstruction::createOffset(nullptr, Instr.Ops[0], 535 DataAlignment * Instr.Ops[1])); 536 break; 537 case DW_CFA_restore_extended: 538 case DW_CFA_restore: 539 Function.addCFIInstruction( 540 Offset, MCCFIInstruction::createRestore(nullptr, Instr.Ops[0])); 541 break; 542 case DW_CFA_set_loc: 543 assert(Instr.Ops[0] >= Address && "set_loc out of function bounds"); 544 assert(Instr.Ops[0] <= Address + Function.getSize() && 545 "set_loc out of function bounds"); 546 Offset = Instr.Ops[0] - Address; 547 break; 548 549 case DW_CFA_undefined: 550 Function.addCFIInstruction( 551 Offset, MCCFIInstruction::createUndefined(nullptr, Instr.Ops[0])); 552 break; 553 case DW_CFA_same_value: 554 Function.addCFIInstruction( 555 Offset, MCCFIInstruction::createSameValue(nullptr, Instr.Ops[0])); 556 break; 557 case DW_CFA_register: 558 Function.addCFIInstruction( 559 Offset, MCCFIInstruction::createRegister(nullptr, Instr.Ops[0], 560 Instr.Ops[1])); 561 break; 562 case DW_CFA_remember_state: 563 Function.addCFIInstruction( 564 Offset, MCCFIInstruction::createRememberState(nullptr)); 565 break; 566 case DW_CFA_restore_state: 567 Function.addCFIInstruction(Offset, 568 MCCFIInstruction::createRestoreState(nullptr)); 569 break; 570 case DW_CFA_def_cfa: 571 Function.addCFIInstruction( 572 Offset, 573 MCCFIInstruction::cfiDefCfa(nullptr, Instr.Ops[0], Instr.Ops[1])); 574 break; 575 case DW_CFA_def_cfa_sf: 576 Function.addCFIInstruction( 577 Offset, 578 MCCFIInstruction::cfiDefCfa(nullptr, Instr.Ops[0], 579 DataAlignment * int64_t(Instr.Ops[1]))); 580 break; 581 case DW_CFA_def_cfa_register: 582 Function.addCFIInstruction(Offset, MCCFIInstruction::createDefCfaRegister( 583 nullptr, Instr.Ops[0])); 584 break; 585 case DW_CFA_def_cfa_offset: 586 Function.addCFIInstruction( 587 Offset, MCCFIInstruction::cfiDefCfaOffset(nullptr, Instr.Ops[0])); 588 break; 589 case DW_CFA_def_cfa_offset_sf: 590 Function.addCFIInstruction( 591 Offset, MCCFIInstruction::cfiDefCfaOffset( 592 nullptr, DataAlignment * int64_t(Instr.Ops[0]))); 593 break; 594 case DW_CFA_GNU_args_size: 595 Function.addCFIInstruction( 596 Offset, MCCFIInstruction::createGnuArgsSize(nullptr, Instr.Ops[0])); 597 Function.setUsesGnuArgsSize(); 598 break; 599 case DW_CFA_val_offset_sf: 600 case DW_CFA_val_offset: 601 if (opts::Verbosity >= 1) { 602 errs() << "BOLT-WARNING: DWARF val_offset() unimplemented\n"; 603 } 604 return false; 605 case DW_CFA_def_cfa_expression: 606 case DW_CFA_val_expression: 607 case DW_CFA_expression: { 608 StringRef ExprBytes = Instr.Expression->getData(); 609 std::string Str; 610 raw_string_ostream OS(Str); 611 // Manually encode this instruction using CFI escape 612 OS << Opcode; 613 if (Opcode != DW_CFA_def_cfa_expression) { 614 encodeULEB128(Instr.Ops[0], OS); 615 } 616 encodeULEB128(ExprBytes.size(), OS); 617 OS << ExprBytes; 618 Function.addCFIInstruction( 619 Offset, MCCFIInstruction::createEscape(nullptr, OS.str())); 620 break; 621 } 622 case DW_CFA_MIPS_advance_loc8: 623 if (opts::Verbosity >= 1) { 624 errs() << "BOLT-WARNING: DW_CFA_MIPS_advance_loc unimplemented\n"; 625 } 626 return false; 627 case DW_CFA_GNU_window_save: 628 case DW_CFA_lo_user: 629 case DW_CFA_hi_user: 630 if (opts::Verbosity >= 1) { 631 errs() << "BOLT-WARNING: DW_CFA_GNU_* and DW_CFA_*_user " 632 "unimplemented\n"; 633 } 634 return false; 635 default: 636 if (opts::Verbosity >= 1) { 637 errs() << "BOLT-WARNING: Unrecognized CFI instruction: " << Instr.Opcode 638 << '\n'; 639 } 640 return false; 641 } 642 643 return true; 644 }; 645 646 for (const CFIProgram::Instruction &Instr : CurFDE.getLinkedCIE()->cfis()) { 647 if (!decodeFrameInstruction(Instr)) 648 return false; 649 } 650 651 for (const CFIProgram::Instruction &Instr : CurFDE.cfis()) { 652 if (!decodeFrameInstruction(Instr)) 653 return false; 654 } 655 656 return true; 657 } 658 659 std::vector<char> CFIReaderWriter::generateEHFrameHeader( 660 const DWARFDebugFrame &OldEHFrame, const DWARFDebugFrame &NewEHFrame, 661 uint64_t EHFrameHeaderAddress, 662 std::vector<uint64_t> &FailedAddresses) const { 663 // Common PC -> FDE map to be written into .eh_frame_hdr. 664 std::map<uint64_t, uint64_t> PCToFDE; 665 666 // Presort array for binary search. 667 std::sort(FailedAddresses.begin(), FailedAddresses.end()); 668 669 // Initialize PCToFDE using NewEHFrame. 670 for (dwarf::FrameEntry &Entry : NewEHFrame.entries()) { 671 const dwarf::FDE *FDE = dyn_cast<dwarf::FDE>(&Entry); 672 if (FDE == nullptr) 673 continue; 674 const uint64_t FuncAddress = FDE->getInitialLocation(); 675 const uint64_t FDEAddress = 676 NewEHFrame.getEHFrameAddress() + FDE->getOffset(); 677 678 // Ignore unused FDEs. 679 if (FuncAddress == 0) 680 continue; 681 682 // Add the address to the map unless we failed to write it. 683 if (!std::binary_search(FailedAddresses.begin(), FailedAddresses.end(), 684 FuncAddress)) { 685 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: FDE for function at 0x" 686 << Twine::utohexstr(FuncAddress) << " is at 0x" 687 << Twine::utohexstr(FDEAddress) << '\n'); 688 PCToFDE[FuncAddress] = FDEAddress; 689 } 690 }; 691 692 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: new .eh_frame contains " 693 << std::distance(NewEHFrame.entries().begin(), 694 NewEHFrame.entries().end()) 695 << " entries\n"); 696 697 // Add entries from the original .eh_frame corresponding to the functions 698 // that we did not update. 699 for (const dwarf::FrameEntry &Entry : OldEHFrame) { 700 const dwarf::FDE *FDE = dyn_cast<dwarf::FDE>(&Entry); 701 if (FDE == nullptr) 702 continue; 703 const uint64_t FuncAddress = FDE->getInitialLocation(); 704 const uint64_t FDEAddress = 705 OldEHFrame.getEHFrameAddress() + FDE->getOffset(); 706 707 // Add the address if we failed to write it. 708 if (PCToFDE.count(FuncAddress) == 0) { 709 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: old FDE for function at 0x" 710 << Twine::utohexstr(FuncAddress) << " is at 0x" 711 << Twine::utohexstr(FDEAddress) << '\n'); 712 PCToFDE[FuncAddress] = FDEAddress; 713 } 714 }; 715 716 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: old .eh_frame contains " 717 << std::distance(OldEHFrame.entries().begin(), 718 OldEHFrame.entries().end()) 719 << " entries\n"); 720 721 // Generate a new .eh_frame_hdr based on the new map. 722 723 // Header plus table of entries of size 8 bytes. 724 std::vector<char> EHFrameHeader(12 + PCToFDE.size() * 8); 725 726 // Version is 1. 727 EHFrameHeader[0] = 1; 728 // Encoding of the eh_frame pointer. 729 EHFrameHeader[1] = DW_EH_PE_pcrel | DW_EH_PE_sdata4; 730 // Encoding of the count field to follow. 731 EHFrameHeader[2] = DW_EH_PE_udata4; 732 // Encoding of the table entries - 4-byte offset from the start of the header. 733 EHFrameHeader[3] = DW_EH_PE_datarel | DW_EH_PE_sdata4; 734 735 // Address of eh_frame. Use the new one. 736 support::ulittle32_t::ref(EHFrameHeader.data() + 4) = 737 NewEHFrame.getEHFrameAddress() - (EHFrameHeaderAddress + 4); 738 739 // Number of entries in the table (FDE count). 740 support::ulittle32_t::ref(EHFrameHeader.data() + 8) = PCToFDE.size(); 741 742 // Write the table at offset 12. 743 char *Ptr = EHFrameHeader.data(); 744 uint32_t Offset = 12; 745 for (const auto &PCI : PCToFDE) { 746 int64_t InitialPCOffset = PCI.first - EHFrameHeaderAddress; 747 assert(isInt<32>(InitialPCOffset) && "PC offset out of bounds"); 748 support::ulittle32_t::ref(Ptr + Offset) = InitialPCOffset; 749 Offset += 4; 750 int64_t FDEOffset = PCI.second - EHFrameHeaderAddress; 751 assert(isInt<32>(FDEOffset) && "FDE offset out of bounds"); 752 support::ulittle32_t::ref(Ptr + Offset) = FDEOffset; 753 Offset += 4; 754 } 755 756 return EHFrameHeader; 757 } 758 759 Error EHFrameParser::parseCIE(uint64_t StartOffset) { 760 uint8_t Version = Data.getU8(&Offset); 761 const char *Augmentation = Data.getCStr(&Offset); 762 StringRef AugmentationString(Augmentation ? Augmentation : ""); 763 uint8_t AddressSize = 764 Version < 4 ? Data.getAddressSize() : Data.getU8(&Offset); 765 Data.setAddressSize(AddressSize); 766 // Skip segment descriptor size 767 if (Version >= 4) 768 Offset += 1; 769 // Skip code alignment factor 770 Data.getULEB128(&Offset); 771 // Skip data alignment 772 Data.getSLEB128(&Offset); 773 // Skip return address register 774 if (Version == 1) { 775 Offset += 1; 776 } else { 777 Data.getULEB128(&Offset); 778 } 779 780 uint32_t FDEPointerEncoding = DW_EH_PE_absptr; 781 uint32_t LSDAPointerEncoding = DW_EH_PE_omit; 782 // Walk the augmentation string to get all the augmentation data. 783 for (unsigned i = 0, e = AugmentationString.size(); i != e; ++i) { 784 switch (AugmentationString[i]) { 785 default: 786 return createStringError( 787 errc::invalid_argument, 788 "unknown augmentation character in entry at 0x%" PRIx64, StartOffset); 789 case 'L': 790 LSDAPointerEncoding = Data.getU8(&Offset); 791 break; 792 case 'P': { 793 uint32_t PersonalityEncoding = Data.getU8(&Offset); 794 Optional<uint64_t> Personality = 795 Data.getEncodedPointer(&Offset, PersonalityEncoding, 796 EHFrameAddress ? EHFrameAddress + Offset : 0); 797 // Patch personality address 798 if (Personality) 799 PatcherCallback(*Personality, Offset, PersonalityEncoding); 800 break; 801 } 802 case 'R': 803 FDEPointerEncoding = Data.getU8(&Offset); 804 break; 805 case 'z': 806 if (i) 807 return createStringError( 808 errc::invalid_argument, 809 "'z' must be the first character at 0x%" PRIx64, StartOffset); 810 // Skip augmentation length 811 Data.getULEB128(&Offset); 812 break; 813 case 'S': 814 case 'B': 815 break; 816 } 817 } 818 Entries.emplace_back(std::make_unique<CIEInfo>( 819 FDEPointerEncoding, LSDAPointerEncoding, AugmentationString)); 820 CIEs[StartOffset] = &*Entries.back(); 821 return Error::success(); 822 } 823 824 Error EHFrameParser::parseFDE(uint64_t CIEPointer, 825 uint64_t StartStructureOffset) { 826 Optional<uint64_t> LSDAAddress; 827 CIEInfo *Cie = CIEs[StartStructureOffset - CIEPointer]; 828 829 // The address size is encoded in the CIE we reference. 830 if (!Cie) 831 return createStringError(errc::invalid_argument, 832 "parsing FDE data at 0x%" PRIx64 833 " failed due to missing CIE", 834 StartStructureOffset); 835 // Patch initial location 836 if (auto Val = Data.getEncodedPointer(&Offset, Cie->FDEPtrEncoding, 837 EHFrameAddress + Offset)) { 838 PatcherCallback(*Val, Offset, Cie->FDEPtrEncoding); 839 } 840 // Skip address range 841 Data.getEncodedPointer(&Offset, Cie->FDEPtrEncoding, 0); 842 843 // Process augmentation data for this FDE. 844 StringRef AugmentationString = Cie->AugmentationString; 845 if (!AugmentationString.empty() && Cie->LSDAPtrEncoding != DW_EH_PE_omit) { 846 // Skip augmentation length 847 Data.getULEB128(&Offset); 848 LSDAAddress = 849 Data.getEncodedPointer(&Offset, Cie->LSDAPtrEncoding, 850 EHFrameAddress ? Offset + EHFrameAddress : 0); 851 // Patch LSDA address 852 PatcherCallback(*LSDAAddress, Offset, Cie->LSDAPtrEncoding); 853 } 854 return Error::success(); 855 } 856 857 Error EHFrameParser::parse() { 858 while (Data.isValidOffset(Offset)) { 859 const uint64_t StartOffset = Offset; 860 861 uint64_t Length; 862 DwarfFormat Format; 863 std::tie(Length, Format) = Data.getInitialLength(&Offset); 864 865 // If the Length is 0, then this CIE is a terminator 866 if (Length == 0) 867 break; 868 869 const uint64_t StartStructureOffset = Offset; 870 const uint64_t EndStructureOffset = Offset + Length; 871 872 Error Err = Error::success(); 873 const uint64_t Id = Data.getRelocatedValue(4, &Offset, 874 /*SectionIndex=*/nullptr, &Err); 875 if (Err) 876 return Err; 877 878 if (!Id) { 879 if (Error Err = parseCIE(StartOffset)) 880 return Err; 881 } else { 882 if (Error Err = parseFDE(Id, StartStructureOffset)) 883 return Err; 884 } 885 Offset = EndStructureOffset; 886 } 887 888 return Error::success(); 889 } 890 891 Error EHFrameParser::parse(DWARFDataExtractor Data, uint64_t EHFrameAddress, 892 PatcherCallbackTy PatcherCallback) { 893 EHFrameParser Parser(Data, EHFrameAddress, PatcherCallback); 894 return Parser.parse(); 895 } 896 897 } // namespace bolt 898 } // namespace llvm 899