1 //===- bolt/Core/BinaryBasicBlock.h - Low-level basic block -----*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Sequence of MC/MCPlus instructions. Call/invoke does not terminate the block. 10 // CFI instructions are part of the instruction list with the initial CFI state 11 // defined at the beginning of the block. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #ifndef BOLT_CORE_BINARY_BASIC_BLOCK_H 16 #define BOLT_CORE_BINARY_BASIC_BLOCK_H 17 18 #include "bolt/Core/FunctionLayout.h" 19 #include "bolt/Core/MCPlus.h" 20 #include "llvm/ADT/GraphTraits.h" 21 #include "llvm/ADT/StringRef.h" 22 #include "llvm/Config/llvm-config.h" // for LLVM_ON_UNIX 23 #include "llvm/MC/MCInst.h" 24 #include "llvm/MC/MCSymbol.h" 25 #include "llvm/Support/ErrorOr.h" 26 #include "llvm/Support/raw_ostream.h" 27 #include <limits> 28 #include <utility> 29 30 namespace llvm { 31 class MCCodeEmitter; 32 33 namespace bolt { 34 35 class BinaryFunction; 36 class JumpTable; 37 38 class BinaryBasicBlock { 39 public: 40 /// Profile execution information for a given edge in CFG. 41 /// 42 /// If MispredictedCount equals COUNT_INFERRED, then we have a profile 43 /// data for a fall-through edge with a Count representing an inferred 44 /// execution count, i.e. the count we calculated internally, not the one 45 /// coming from profile data. 46 /// 47 /// For all other values of MispredictedCount, Count represents the number of 48 /// branch executions from a profile, and MispredictedCount is the number 49 /// of times the branch was mispredicted according to this profile. 50 struct BinaryBranchInfo { 51 uint64_t Count; 52 uint64_t MispredictedCount; /// number of branches mispredicted 53 54 bool operator<(const BinaryBranchInfo &Other) const { 55 return (Count < Other.Count) || 56 (Count == Other.Count && 57 MispredictedCount < Other.MispredictedCount); 58 } 59 }; 60 61 static constexpr uint32_t INVALID_OFFSET = 62 std::numeric_limits<uint32_t>::max(); 63 64 using BranchInfoType = SmallVector<BinaryBranchInfo, 0>; 65 66 private: 67 /// Vector of all instructions in the block. 68 InstructionListType Instructions; 69 70 /// CFG information. 71 using EdgeListType = SmallVector<BinaryBasicBlock *, 0>; 72 EdgeListType Predecessors; 73 EdgeListType Successors; 74 75 /// Each successor has a corresponding BranchInfo entry in the list. 76 BranchInfoType BranchInfo; 77 78 using ExceptionListType = SmallVector<BinaryBasicBlock *, 0>; 79 80 /// List of blocks that this landing pad is handling. 81 ExceptionListType Throwers; 82 83 /// List of blocks that can catch exceptions thrown by code in this block. 84 ExceptionListType LandingPads; 85 86 /// Function that owns this basic block. 87 BinaryFunction *Function; 88 89 /// Label associated with the block. 90 MCSymbol *Label{nullptr}; 91 92 /// [Begin, End) address range for this block in the output binary. 93 std::pair<uint32_t, uint32_t> OutputAddressRange = {0, 0}; 94 95 /// Original offset range of the basic block in the function. 96 std::pair<uint32_t, uint32_t> InputRange = {INVALID_OFFSET, INVALID_OFFSET}; 97 98 /// Map input offset (from function start) of an instruction to an output 99 /// symbol. Enables writing BOLT address translation tables used for mapping 100 /// control transfer in the output binary back to the original binary. 101 using LocSymsTy = std::vector<std::pair<uint32_t, const MCSymbol *>>; 102 std::unique_ptr<LocSymsTy> LocSyms; 103 104 /// Alignment requirements for the block. 105 uint32_t Alignment{1}; 106 107 /// Maximum number of bytes to use for alignment of the block. 108 uint32_t AlignmentMaxBytes{0}; 109 110 /// Number of times this basic block was executed. 111 uint64_t ExecutionCount{COUNT_NO_PROFILE}; 112 113 static constexpr unsigned InvalidIndex = ~0u; 114 115 /// Index to BasicBlocks vector in BinaryFunction. 116 unsigned Index{InvalidIndex}; 117 118 /// Index in the current layout. 119 unsigned LayoutIndex{InvalidIndex}; 120 121 /// Number of pseudo instructions in this block. 122 uint32_t NumPseudos{0}; 123 124 /// CFI state at the entry to this basic block. 125 int32_t CFIState{-1}; 126 127 /// In cases where the parent function has been split, FragmentNum > 0 means 128 /// this BB will be allocated in a fragment outside its parent function. 129 FragmentNum Fragment; 130 131 /// Indicates if the block could be outlined. 132 bool CanOutline{true}; 133 134 /// Flag to indicate whether this block is valid or not. Invalid 135 /// blocks may contain out of date or incorrect information. 136 bool IsValid{true}; 137 138 /// Last computed hash value. 139 mutable uint64_t Hash{0}; 140 141 private: 142 BinaryBasicBlock() = delete; 143 BinaryBasicBlock(const BinaryBasicBlock &) = delete; 144 BinaryBasicBlock(const BinaryBasicBlock &&) = delete; 145 BinaryBasicBlock &operator=(const BinaryBasicBlock &) = delete; 146 BinaryBasicBlock &operator=(const BinaryBasicBlock &&) = delete; 147 148 explicit BinaryBasicBlock(BinaryFunction *Function, MCSymbol *Label) 149 : Function(Function), Label(Label) { 150 assert(Function && "Function must be non-null"); 151 } 152 153 // Exclusively managed by BinaryFunction. 154 friend class BinaryFunction; 155 friend bool operator<(const BinaryBasicBlock &LHS, 156 const BinaryBasicBlock &RHS); 157 158 /// Assign new label to the basic block. 159 void setLabel(MCSymbol *Symbol) { Label = Symbol; } 160 161 public: 162 static constexpr uint64_t COUNT_INFERRED = 163 std::numeric_limits<uint64_t>::max(); 164 static constexpr uint64_t COUNT_NO_PROFILE = 165 std::numeric_limits<uint64_t>::max(); 166 167 // Instructions iterators. 168 using iterator = InstructionListType::iterator; 169 using const_iterator = InstructionListType::const_iterator; 170 using reverse_iterator = std::reverse_iterator<iterator>; 171 using const_reverse_iterator = std::reverse_iterator<const_iterator>; 172 173 bool empty() const { assert(hasInstructions()); 174 return Instructions.empty(); } 175 size_t size() const { assert(hasInstructions()); 176 return Instructions.size(); } 177 MCInst &front() { assert(hasInstructions()); 178 return Instructions.front(); } 179 MCInst &back() { assert(hasInstructions()); 180 return Instructions.back(); } 181 const MCInst &front() const { assert(hasInstructions()); 182 return Instructions.front(); } 183 const MCInst &back() const { assert(hasInstructions()); 184 return Instructions.back(); } 185 186 iterator begin() { assert(hasInstructions()); 187 return Instructions.begin(); } 188 const_iterator begin() const { assert(hasInstructions()); 189 return Instructions.begin(); } 190 iterator end () { assert(hasInstructions()); 191 return Instructions.end(); } 192 const_iterator end () const { assert(hasInstructions()); 193 return Instructions.end(); } 194 reverse_iterator rbegin() { assert(hasInstructions()); 195 return Instructions.rbegin(); } 196 const_reverse_iterator rbegin() const { assert(hasInstructions()); 197 return Instructions.rbegin(); } 198 reverse_iterator rend () { assert(hasInstructions()); 199 return Instructions.rend(); } 200 const_reverse_iterator rend () const { assert(hasInstructions()); 201 return Instructions.rend(); } 202 203 // CFG iterators. 204 using pred_iterator = EdgeListType::iterator; 205 using const_pred_iterator = EdgeListType::const_iterator; 206 using succ_iterator = EdgeListType::iterator; 207 using const_succ_iterator = EdgeListType::const_iterator; 208 using throw_iterator = decltype(Throwers)::iterator; 209 using const_throw_iterator = decltype(Throwers)::const_iterator; 210 using lp_iterator = decltype(LandingPads)::iterator; 211 using const_lp_iterator = decltype(LandingPads)::const_iterator; 212 213 using pred_reverse_iterator = std::reverse_iterator<pred_iterator>; 214 using const_pred_reverse_iterator = 215 std::reverse_iterator<const_pred_iterator>; 216 using succ_reverse_iterator = std::reverse_iterator<succ_iterator>; 217 using const_succ_reverse_iterator = 218 std::reverse_iterator<const_succ_iterator>; 219 220 pred_iterator pred_begin() { return Predecessors.begin(); } 221 const_pred_iterator pred_begin() const { return Predecessors.begin(); } 222 pred_iterator pred_end() { return Predecessors.end(); } 223 const_pred_iterator pred_end() const { return Predecessors.end(); } 224 pred_reverse_iterator pred_rbegin() 225 { return Predecessors.rbegin();} 226 const_pred_reverse_iterator pred_rbegin() const 227 { return Predecessors.rbegin();} 228 pred_reverse_iterator pred_rend() 229 { return Predecessors.rend(); } 230 const_pred_reverse_iterator pred_rend() const 231 { return Predecessors.rend(); } 232 size_t pred_size() const { 233 return Predecessors.size(); 234 } 235 bool pred_empty() const { return Predecessors.empty(); } 236 237 succ_iterator succ_begin() { return Successors.begin(); } 238 const_succ_iterator succ_begin() const { return Successors.begin(); } 239 succ_iterator succ_end() { return Successors.end(); } 240 const_succ_iterator succ_end() const { return Successors.end(); } 241 succ_reverse_iterator succ_rbegin() 242 { return Successors.rbegin(); } 243 const_succ_reverse_iterator succ_rbegin() const 244 { return Successors.rbegin(); } 245 succ_reverse_iterator succ_rend() 246 { return Successors.rend(); } 247 const_succ_reverse_iterator succ_rend() const 248 { return Successors.rend(); } 249 size_t succ_size() const { 250 return Successors.size(); 251 } 252 bool succ_empty() const { return Successors.empty(); } 253 254 throw_iterator throw_begin() { return Throwers.begin(); } 255 const_throw_iterator throw_begin() const { return Throwers.begin(); } 256 throw_iterator throw_end() { return Throwers.end(); } 257 const_throw_iterator throw_end() const { return Throwers.end(); } 258 size_t throw_size() const { 259 return Throwers.size(); 260 } 261 bool throw_empty() const { return Throwers.empty(); } 262 bool isLandingPad() const { return !Throwers.empty(); } 263 264 lp_iterator lp_begin() { return LandingPads.begin(); } 265 const_lp_iterator lp_begin() const { return LandingPads.begin(); } 266 lp_iterator lp_end() { return LandingPads.end(); } 267 const_lp_iterator lp_end() const { return LandingPads.end(); } 268 size_t lp_size() const { 269 return LandingPads.size(); 270 } 271 bool lp_empty() const { return LandingPads.empty(); } 272 273 inline iterator_range<iterator> instructions() { 274 assert(hasInstructions()); 275 return iterator_range<iterator>(begin(), end()); 276 } 277 inline iterator_range<const_iterator> instructions() const { 278 assert(hasInstructions()); 279 return iterator_range<const_iterator>(begin(), end()); 280 } 281 inline iterator_range<pred_iterator> predecessors() { 282 assert(hasCFG()); 283 return iterator_range<pred_iterator>(pred_begin(), pred_end()); 284 } 285 inline iterator_range<const_pred_iterator> predecessors() const { 286 assert(hasCFG()); 287 return iterator_range<const_pred_iterator>(pred_begin(), pred_end()); 288 } 289 inline iterator_range<succ_iterator> successors() { 290 assert(hasCFG()); 291 return iterator_range<succ_iterator>(succ_begin(), succ_end()); 292 } 293 inline iterator_range<const_succ_iterator> successors() const { 294 assert(hasCFG()); 295 return iterator_range<const_succ_iterator>(succ_begin(), succ_end()); 296 } 297 inline iterator_range<throw_iterator> throwers() { 298 assert(hasCFG()); 299 return iterator_range<throw_iterator>(throw_begin(), throw_end()); 300 } 301 inline iterator_range<const_throw_iterator> throwers() const { 302 assert(hasCFG()); 303 return iterator_range<const_throw_iterator>(throw_begin(), throw_end()); 304 } 305 inline iterator_range<lp_iterator> landing_pads() { 306 assert(hasCFG()); 307 return iterator_range<lp_iterator>(lp_begin(), lp_end()); 308 } 309 inline iterator_range<const_lp_iterator> landing_pads() const { 310 assert(hasCFG()); 311 return iterator_range<const_lp_iterator>(lp_begin(), lp_end()); 312 } 313 314 // BranchInfo iterators. 315 using branch_info_iterator = BranchInfoType::iterator; 316 using const_branch_info_iterator = BranchInfoType::const_iterator; 317 using branch_info_reverse_iterator = 318 std::reverse_iterator<branch_info_iterator>; 319 using const_branch_info_reverse_iterator = 320 std::reverse_iterator<const_branch_info_iterator>; 321 322 branch_info_iterator branch_info_begin() { return BranchInfo.begin(); } 323 branch_info_iterator branch_info_end() { return BranchInfo.end(); } 324 const_branch_info_iterator branch_info_begin() const { 325 return BranchInfo.begin(); 326 } 327 const_branch_info_iterator branch_info_end() const { 328 return BranchInfo.end(); 329 } 330 branch_info_reverse_iterator branch_info_rbegin() { 331 return BranchInfo.rbegin(); 332 } 333 branch_info_reverse_iterator branch_info_rend() { return BranchInfo.rend(); } 334 const_branch_info_reverse_iterator branch_info_rbegin() const { 335 return BranchInfo.rbegin(); 336 } 337 const_branch_info_reverse_iterator branch_info_rend() const { 338 return BranchInfo.rend(); 339 } 340 341 size_t branch_info_size() const { return BranchInfo.size(); } 342 bool branch_info_empty() const { return BranchInfo.empty(); } 343 344 inline iterator_range<branch_info_iterator> branch_info() { 345 return iterator_range<branch_info_iterator>(BranchInfo.begin(), 346 BranchInfo.end()); 347 } 348 inline iterator_range<const_branch_info_iterator> branch_info() const { 349 return iterator_range<const_branch_info_iterator>(BranchInfo.begin(), 350 BranchInfo.end()); 351 } 352 353 /// Get instruction at given index. 354 MCInst &getInstructionAtIndex(unsigned Index) { return Instructions[Index]; } 355 356 const MCInst &getInstructionAtIndex(unsigned Index) const { 357 return Instructions[Index]; 358 } 359 360 /// Return symbol marking the start of this basic block. 361 MCSymbol *getLabel() { return Label; } 362 363 /// Return symbol marking the start of this basic block (const version). 364 const MCSymbol *getLabel() const { return Label; } 365 366 /// Get successor with given \p Label if \p Label != nullptr. 367 /// Returns nullptr if no such successor is found. 368 /// If the \p Label == nullptr and the block has only one successor then 369 /// return the successor. 370 BinaryBasicBlock *getSuccessor(const MCSymbol *Label = nullptr) const; 371 372 /// Return the related branch info as well as the successor. 373 BinaryBasicBlock *getSuccessor(const MCSymbol *Label, 374 BinaryBranchInfo &BI) const; 375 376 /// If the basic block ends with a conditional branch (possibly followed by 377 /// an unconditional branch) and thus has 2 successors, return a successor 378 /// corresponding to a jump condition which could be true or false. 379 /// Return nullptr if the basic block does not have a conditional jump. 380 BinaryBasicBlock *getConditionalSuccessor(bool Condition) { 381 if (succ_size() != 2) 382 return nullptr; 383 return Successors[Condition == true ? 0 : 1]; 384 } 385 386 const BinaryBasicBlock *getConditionalSuccessor(bool Condition) const { 387 return const_cast<BinaryBasicBlock *>(this)->getConditionalSuccessor( 388 Condition); 389 } 390 391 /// Find the fallthrough successor for a block, or nullptr if there is 392 /// none. 393 BinaryBasicBlock *getFallthrough() { 394 if (succ_size() == 2) 395 return getConditionalSuccessor(false); 396 else 397 return getSuccessor(); 398 } 399 400 const BinaryBasicBlock *getFallthrough() const { 401 return const_cast<BinaryBasicBlock *>(this)->getFallthrough(); 402 } 403 404 /// Return branch info corresponding to a taken branch. 405 const BinaryBranchInfo &getTakenBranchInfo() const { 406 assert(BranchInfo.size() == 2 && 407 "could only be called for blocks with 2 successors"); 408 return BranchInfo[0]; 409 }; 410 411 /// Return branch info corresponding to a fall-through branch. 412 const BinaryBranchInfo &getFallthroughBranchInfo() const { 413 assert(BranchInfo.size() == 2 && 414 "could only be called for blocks with 2 successors"); 415 return BranchInfo[1]; 416 }; 417 418 /// Return branch info corresponding to an edge going to \p Succ basic block. 419 BinaryBranchInfo &getBranchInfo(const BinaryBasicBlock &Succ); 420 421 /// Return branch info corresponding to an edge going to \p Succ basic block. 422 const BinaryBranchInfo &getBranchInfo(const BinaryBasicBlock &Succ) const; 423 424 /// Set branch information for the outgoing edge to block \p Succ. 425 void setSuccessorBranchInfo(const BinaryBasicBlock &Succ, uint64_t Count, 426 uint64_t MispredictedCount) { 427 BinaryBranchInfo &BI = getBranchInfo(Succ); 428 BI.Count = Count; 429 BI.MispredictedCount = MispredictedCount; 430 } 431 432 /// Try to compute the taken and misprediction frequencies for the given 433 /// successor. The result is an error if no information can be found. 434 ErrorOr<std::pair<double, double>> 435 getBranchStats(const BinaryBasicBlock *Succ) const; 436 437 /// If the basic block ends with a conditional branch (possibly followed by 438 /// an unconditional branch) and thus has 2 successor, reverse the order of 439 /// its successors in CFG, update branch info, and return true. If the basic 440 /// block does not have 2 successors return false. 441 bool swapConditionalSuccessors(); 442 443 /// Add an instruction with unconditional control transfer to \p Successor 444 /// basic block to the end of this basic block. 445 void addBranchInstruction(const BinaryBasicBlock *Successor); 446 447 /// Add an instruction with tail call control transfer to \p Target 448 /// to the end of this basic block. 449 void addTailCallInstruction(const MCSymbol *Target); 450 451 /// Return the number of call instructions in this basic block. 452 uint32_t getNumCalls() const; 453 454 /// Get landing pad with given label. Returns nullptr if no such 455 /// landing pad is found. 456 BinaryBasicBlock *getLandingPad(const MCSymbol *Label) const; 457 458 /// Return local name for the block. 459 StringRef getName() const { return Label->getName(); } 460 461 /// Add instruction at the end of this basic block. 462 /// Returns iterator pointing to the inserted instruction. 463 iterator addInstruction(MCInst &&Inst) { 464 adjustNumPseudos(Inst, 1); 465 Instructions.emplace_back(Inst); 466 return std::prev(Instructions.end()); 467 } 468 469 /// Add instruction at the end of this basic block. 470 /// Returns iterator pointing to the inserted instruction. 471 iterator addInstruction(const MCInst &Inst) { 472 adjustNumPseudos(Inst, 1); 473 Instructions.push_back(Inst); 474 return std::prev(Instructions.end()); 475 } 476 477 /// Add a range of instructions to the end of this basic block. 478 template <typename Itr> void addInstructions(Itr Begin, Itr End) { 479 while (Begin != End) 480 addInstruction(*Begin++); 481 } 482 483 /// Add a range of instructions to the end of this basic block. 484 template <typename RangeTy> void addInstructions(RangeTy R) { 485 for (auto &I : R) 486 addInstruction(I); 487 } 488 489 /// Add instruction before Pos in this basic block. 490 template <typename Itr> Itr insertPseudoInstr(Itr Pos, MCInst &Instr) { 491 ++NumPseudos; 492 return Instructions.insert(Pos, Instr); 493 } 494 495 /// Return the number of pseudo instructions in the basic block. 496 uint32_t getNumPseudos() const; 497 498 /// Return the number of emitted instructions for this basic block. 499 uint32_t getNumNonPseudos() const { return size() - getNumPseudos(); } 500 501 /// Return iterator to the first non-pseudo instruction or end() 502 /// if no such instruction was found. 503 iterator getFirstNonPseudo(); 504 505 /// Return a pointer to the first non-pseudo instruction in this basic 506 /// block. Returns nullptr if none exists. 507 MCInst *getFirstNonPseudoInstr() { 508 auto II = getFirstNonPseudo(); 509 return II == Instructions.end() ? nullptr : &*II; 510 } 511 512 /// Return reverse iterator to the last non-pseudo instruction or rend() 513 /// if no such instruction was found. 514 reverse_iterator getLastNonPseudo(); 515 const_reverse_iterator getLastNonPseudo() const { 516 return const_cast<BinaryBasicBlock *>(this)->getLastNonPseudo(); 517 } 518 519 /// Return a pointer to the last non-pseudo instruction in this basic 520 /// block. Returns nullptr if none exists. 521 MCInst *getLastNonPseudoInstr() { 522 auto RII = getLastNonPseudo(); 523 return RII == Instructions.rend() ? nullptr : &*RII; 524 } 525 const MCInst *getLastNonPseudoInstr() const { 526 auto RII = getLastNonPseudo(); 527 return RII == Instructions.rend() ? nullptr : &*RII; 528 } 529 530 /// Set CFI state at entry to this basic block. 531 void setCFIState(int32_t NewCFIState) { 532 assert((CFIState == -1 || NewCFIState == CFIState) && 533 "unexpected change of CFI state for basic block"); 534 CFIState = NewCFIState; 535 } 536 537 /// Return CFI state (expected) at entry of this basic block. 538 int32_t getCFIState() const { 539 assert(CFIState >= 0 && "unknown CFI state"); 540 return CFIState; 541 } 542 543 /// Calculate and return CFI state right before instruction \p Instr in 544 /// this basic block. If \p Instr is nullptr then return the state at 545 /// the end of the basic block. 546 int32_t getCFIStateAtInstr(const MCInst *Instr) const; 547 548 /// Calculate and return CFI state after execution of this basic block. 549 /// The state depends on CFI state at entry and CFI instructions inside the 550 /// basic block. 551 int32_t getCFIStateAtExit() const { return getCFIStateAtInstr(nullptr); } 552 553 /// Set minimum alignment for the basic block. 554 void setAlignment(uint32_t Align) { Alignment = Align; } 555 556 /// Set alignment of the block based on the alignment of its offset. 557 void setDerivedAlignment() { 558 const uint64_t DerivedAlignment = getOffset() & (1 + ~getOffset()); 559 Alignment = std::min(DerivedAlignment, uint64_t(32)); 560 } 561 562 /// Return required alignment for the block. 563 Align getAlign() const { return Align(Alignment); } 564 uint32_t getAlignment() const { return Alignment; } 565 566 /// Set the maximum number of bytes to use for the block alignment. 567 void setAlignmentMaxBytes(uint32_t Value) { AlignmentMaxBytes = Value; } 568 569 /// Return the maximum number of bytes to use for the block alignment. 570 uint32_t getAlignmentMaxBytes() const { return AlignmentMaxBytes; } 571 572 /// Adds block to successor list, and also updates predecessor list for 573 /// successor block. 574 /// Set branch info for this path. 575 void addSuccessor(BinaryBasicBlock *Succ, uint64_t Count = 0, 576 uint64_t MispredictedCount = 0); 577 578 void addSuccessor(BinaryBasicBlock *Succ, const BinaryBranchInfo &BI) { 579 addSuccessor(Succ, BI.Count, BI.MispredictedCount); 580 } 581 582 /// Add a range of successors. 583 template <typename Itr> void addSuccessors(Itr Begin, Itr End) { 584 while (Begin != End) 585 addSuccessor(*Begin++); 586 } 587 588 /// Add a range of successors with branch info. 589 template <typename Itr, typename BrItr> 590 void addSuccessors(Itr Begin, Itr End, BrItr BrBegin, BrItr BrEnd) { 591 assert(std::distance(Begin, End) == std::distance(BrBegin, BrEnd)); 592 while (Begin != End) 593 addSuccessor(*Begin++, *BrBegin++); 594 } 595 596 /// Replace Succ with NewSucc. This routine is helpful for preserving 597 /// the order of conditional successors when editing the CFG. 598 void replaceSuccessor(BinaryBasicBlock *Succ, BinaryBasicBlock *NewSucc, 599 uint64_t Count = 0, uint64_t MispredictedCount = 0); 600 601 /// Move all of this block's successors to a new block, and set the 602 /// execution count of this new block with our execution count. This is 603 /// useful when splitting a block in two. 604 void moveAllSuccessorsTo(BinaryBasicBlock *New) { 605 New->addSuccessors(successors().begin(), successors().end(), 606 branch_info_begin(), branch_info_end()); 607 removeAllSuccessors(); 608 609 // Update the execution count on the new block. 610 New->setExecutionCount(getExecutionCount()); 611 } 612 613 /// Remove /p Succ basic block from the list of successors. Update the 614 /// list of predecessors of /p Succ and update branch info. 615 void removeSuccessor(BinaryBasicBlock *Succ); 616 617 /// Remove all successors of the basic block, and remove the block 618 /// from respective lists of predecessors. 619 void removeAllSuccessors(); 620 621 /// Remove useless duplicate successors. When the conditional 622 /// successor is the same as the unconditional successor, we can 623 /// remove the conditional successor and branch instruction. 624 void removeDuplicateConditionalSuccessor(MCInst *CondBranch); 625 626 /// Update successors of the basic block based on the jump table instruction. 627 /// The block must end with a jump table instruction. 628 void updateJumpTableSuccessors(); 629 630 /// Test if BB is a predecessor of this block. 631 bool isPredecessor(const BinaryBasicBlock *BB) const { 632 return llvm::is_contained(Predecessors, BB); 633 } 634 635 /// Test if BB is a successor of this block. 636 bool isSuccessor(const BinaryBasicBlock *BB) const { 637 return llvm::is_contained(Successors, BB); 638 } 639 640 /// Test if this BB has a valid execution count. 641 bool hasProfile() const { return ExecutionCount != COUNT_NO_PROFILE; } 642 643 /// Return the information about the number of times this basic block was 644 /// executed. 645 /// 646 /// Return COUNT_NO_PROFILE if there's no profile info. 647 uint64_t getExecutionCount() const { return ExecutionCount; } 648 649 /// Return the execution count for blocks with known profile. 650 /// Return 0 if the block has no profile. 651 uint64_t getKnownExecutionCount() const { 652 return !hasProfile() ? 0 : ExecutionCount; 653 } 654 655 /// Set the execution count for this block. 656 void setExecutionCount(uint64_t Count) { ExecutionCount = Count; } 657 658 /// Apply a given \p Ratio to the profile information of this basic block. 659 void adjustExecutionCount(double Ratio); 660 661 /// Return true if the basic block is an entry point into the function 662 /// (either primary or secondary). 663 bool isEntryPoint() const; 664 665 bool isValid() const { return IsValid; } 666 667 void markValid(const bool Valid) { IsValid = Valid; } 668 669 FragmentNum getFragmentNum() const { return Fragment; } 670 671 void setFragmentNum(const FragmentNum Value) { Fragment = Value; } 672 673 bool isSplit() const { return Fragment != FragmentNum::main(); } 674 675 bool isCold() const { 676 assert(Fragment.get() < 2 && 677 "Function is split into more than two (hot/cold)-fragments"); 678 return isSplit(); 679 } 680 681 void setIsCold(const bool Flag) { 682 Fragment = Flag ? FragmentNum::cold() : FragmentNum::main(); 683 } 684 685 /// Return true if the block can be outlined. At the moment we disallow 686 /// outlining of blocks that can potentially throw exceptions or are 687 /// the beginning of a landing pad. The entry basic block also can 688 /// never be outlined. 689 bool canOutline() const { return CanOutline; } 690 691 void setCanOutline(const bool Flag) { CanOutline = Flag; } 692 693 /// Erase pseudo instruction at a given iterator. 694 /// Return iterator following the removed instruction. 695 iterator erasePseudoInstruction(iterator II) { 696 --NumPseudos; 697 return Instructions.erase(II); 698 } 699 700 /// Erase non-pseudo instruction at a given iterator \p II. 701 /// Return iterator following the removed instruction. 702 iterator eraseInstruction(iterator II) { 703 adjustNumPseudos(*II, -1); 704 return Instructions.erase(II); 705 } 706 707 /// Erase non-pseudo instruction at a given \p Index 708 void eraseInstructionAtIndex(unsigned Index) { 709 eraseInstruction(Instructions.begin() + Index); 710 } 711 712 /// Erase instructions in the specified range. 713 template <typename ItrType> 714 void eraseInstructions(ItrType Begin, ItrType End) { 715 while (End > Begin) 716 eraseInstruction(findInstruction(*--End)); 717 } 718 719 /// Erase all instructions. 720 void clear() { 721 Instructions.clear(); 722 NumPseudos = 0; 723 } 724 725 /// Retrieve iterator for \p Inst or return end iterator if instruction is not 726 /// from this basic block. 727 decltype(Instructions)::iterator findInstruction(const MCInst *Inst) { 728 if (Instructions.empty()) 729 return Instructions.end(); 730 size_t Index = Inst - &Instructions[0]; 731 return Index >= Instructions.size() ? Instructions.end() 732 : Instructions.begin() + Index; 733 } 734 735 /// Replace instruction referenced by iterator \II with a sequence of 736 /// instructions defined by [\p Begin, \p End] range. 737 /// 738 /// Return iterator pointing to the first inserted instruction. 739 template <typename Itr> 740 iterator replaceInstruction(iterator II, Itr Begin, Itr End) { 741 adjustNumPseudos(*II, -1); 742 adjustNumPseudos(Begin, End, 1); 743 744 auto I = II - Instructions.begin(); 745 Instructions.insert(Instructions.erase(II), Begin, End); 746 return I + Instructions.begin(); 747 } 748 749 iterator replaceInstruction(iterator II, 750 const InstructionListType &Replacement) { 751 return replaceInstruction(II, Replacement.begin(), Replacement.end()); 752 } 753 754 /// Insert \p NewInst before \p At, which must be an existing instruction in 755 /// this BB. Return iterator pointing to the newly inserted instruction. 756 iterator insertInstruction(iterator At, MCInst &&NewInst) { 757 adjustNumPseudos(NewInst, 1); 758 return Instructions.emplace(At, std::move(NewInst)); 759 } 760 761 iterator insertInstruction(iterator At, MCInst &NewInst) { 762 adjustNumPseudos(NewInst, 1); 763 return Instructions.insert(At, NewInst); 764 } 765 766 /// Helper to retrieve any terminators in \p BB before \p Pos. This is used 767 /// to skip CFI instructions and to retrieve the first terminator instruction 768 /// in basic blocks with two terminators (conditional jump and unconditional 769 /// jump). 770 MCInst *getTerminatorBefore(MCInst *Pos); 771 772 /// Used to identify whether an instruction is before a terminator and whether 773 /// moving it to the end of the BB would render it dead code. 774 bool hasTerminatorAfter(MCInst *Pos); 775 776 /// Split apart the instructions in this basic block starting at Inst. 777 /// The instructions following Inst are removed and returned in a vector. 778 InstructionListType splitInstructions(const MCInst *Inst) { 779 InstructionListType SplitInst; 780 781 assert(!Instructions.empty()); 782 while (&Instructions.back() != Inst) { 783 SplitInst.push_back(Instructions.back()); 784 Instructions.pop_back(); 785 } 786 std::reverse(SplitInst.begin(), SplitInst.end()); 787 NumPseudos = 0; 788 adjustNumPseudos(Instructions.begin(), Instructions.end(), 1); 789 return SplitInst; 790 } 791 792 /// Split basic block at the instruction pointed to by II. 793 /// All iterators pointing after II get invalidated. 794 /// 795 /// Return the new basic block that starts with the instruction 796 /// at the split point. 797 BinaryBasicBlock *splitAt(iterator II); 798 799 /// Set start offset of this basic block in the input binary. 800 void setOffset(uint32_t Offset) { InputRange.first = Offset; }; 801 802 /// Sets address of the basic block in the output. 803 void setOutputStartAddress(uint64_t Address) { 804 OutputAddressRange.first = Address; 805 } 806 807 /// Sets address past the end of the basic block in the output. 808 void setOutputEndAddress(uint64_t Address) { 809 OutputAddressRange.second = Address; 810 } 811 812 /// Gets the memory address range of this BB in the input binary. 813 std::pair<uint64_t, uint64_t> getInputAddressRange() const { 814 return InputRange; 815 } 816 817 /// Gets the memory address range of this BB in the output binary. 818 std::pair<uint64_t, uint64_t> getOutputAddressRange() const { 819 return OutputAddressRange; 820 } 821 822 uint64_t getOutputStartAddress() const { return OutputAddressRange.first; } 823 uint64_t getOutputEndAddress() const { return OutputAddressRange.second; } 824 825 bool hasLocSyms() const { return LocSyms != nullptr; } 826 827 /// Return mapping of input offsets to symbols in the output. 828 LocSymsTy &getLocSyms() { 829 return LocSyms ? *LocSyms : *(LocSyms = std::make_unique<LocSymsTy>()); 830 } 831 832 /// Return mapping of input offsets to symbols in the output. 833 const LocSymsTy &getLocSyms() const { 834 return const_cast<BinaryBasicBlock *>(this)->getLocSyms(); 835 } 836 837 /// Return size of the basic block in the output binary. 838 uint64_t getOutputSize() const { 839 return OutputAddressRange.second - OutputAddressRange.first; 840 } 841 842 BinaryFunction *getFunction() const { return Function; } 843 844 /// Analyze and interpret the terminators of this basic block. TBB must be 845 /// initialized with the original fall-through for this BB. 846 bool analyzeBranch(const MCSymbol *&TBB, const MCSymbol *&FBB, 847 MCInst *&CondBranch, MCInst *&UncondBranch); 848 849 /// Printer required for printing dominator trees. 850 void printAsOperand(raw_ostream &OS, bool PrintType = true) { 851 if (PrintType) 852 OS << "basic block "; 853 OS << getName(); 854 } 855 856 /// A simple dump function for debugging. 857 void dump() const; 858 859 /// Validate successor invariants for this BB. 860 bool validateSuccessorInvariants(); 861 862 /// Return offset of the basic block from the function start on input. 863 uint32_t getInputOffset() const { return InputRange.first; } 864 865 /// Return offset from the function start to location immediately past 866 /// the end of the basic block. 867 uint32_t getEndOffset() const { return InputRange.second; } 868 869 /// Return size of the basic block on input. 870 uint32_t getOriginalSize() const { 871 return InputRange.second - InputRange.first; 872 } 873 874 /// Returns an estimate of size of basic block during run time optionally 875 /// using a user-supplied emitter for lock-free multi-thread work. 876 /// MCCodeEmitter is not thread safe and each thread should operate with its 877 /// own copy of it. 878 uint64_t estimateSize(const MCCodeEmitter *Emitter = nullptr) const; 879 880 /// Return index in the current layout. The user is responsible for 881 /// making sure the indices are up to date, 882 /// e.g. by calling BinaryFunction::updateLayoutIndices(); 883 unsigned getLayoutIndex() const { 884 assert(isValid()); 885 return LayoutIndex; 886 } 887 888 /// Set layout index. To be used by BinaryFunction. 889 void setLayoutIndex(unsigned Index) { LayoutIndex = Index; } 890 891 /// Needed by graph traits. 892 BinaryFunction *getParent() const { return getFunction(); } 893 894 /// Return true if the containing function is in CFG state. 895 bool hasCFG() const; 896 897 /// Return true if the containing function is in a state with instructions. 898 bool hasInstructions() const; 899 900 /// Return offset of the basic block from the function start. 901 uint32_t getOffset() const { return InputRange.first; } 902 903 /// Get the index of this basic block. 904 unsigned getIndex() const { 905 assert(isValid()); 906 return Index; 907 } 908 909 /// Return jump table if the block contains a jump table instruction or 910 /// nullptr otherwise. 911 const JumpTable *getJumpTable() const; 912 913 /// Check if the block has a jump table instruction. 914 bool hasJumpTable() const { return getJumpTable() != nullptr; } 915 916 /// Returns the last computed hash value of the block. 917 uint64_t getHash() const { return Hash; } 918 919 private: 920 void adjustNumPseudos(const MCInst &Inst, int Sign); 921 922 template <typename Itr> void adjustNumPseudos(Itr Begin, Itr End, int Sign) { 923 while (Begin != End) 924 adjustNumPseudos(*Begin++, Sign); 925 } 926 927 /// Adds predecessor to the BB. Most likely you don't need to call this. 928 void addPredecessor(BinaryBasicBlock *Pred); 929 930 /// Remove predecessor of the basic block. Don't use directly, instead 931 /// use removeSuccessor() function. 932 /// If \p Multiple is set to true, it will remove all predecessors that 933 /// are equal to \p Pred. Otherwise, the first instance of \p Pred found 934 /// will be removed. This only matters in awkward, redundant CFGs. 935 void removePredecessor(BinaryBasicBlock *Pred, bool Multiple = true); 936 937 /// Set end offset of this basic block. 938 void setEndOffset(uint32_t Offset) { InputRange.second = Offset; } 939 940 /// Set the index of this basic block. 941 void setIndex(unsigned I) { Index = I; } 942 943 /// Sets the hash value of the basic block. 944 void setHash(uint64_t Value) const { Hash = Value; } 945 946 template <typename T> void clearList(T &List) { 947 T TempList; 948 TempList.swap(List); 949 } 950 951 /// Release memory taken by CFG edges and instructions. 952 void releaseCFG() { 953 clearList(Predecessors); 954 clearList(Successors); 955 clearList(Throwers); 956 clearList(LandingPads); 957 clearList(BranchInfo); 958 clearList(Instructions); 959 } 960 }; 961 962 #if defined(LLVM_ON_UNIX) 963 /// Keep the size of the BinaryBasicBlock within a reasonable size class 964 /// (jemalloc bucket) on Linux 965 static_assert(sizeof(BinaryBasicBlock) <= 256); 966 #endif 967 968 bool operator<(const BinaryBasicBlock &LHS, const BinaryBasicBlock &RHS); 969 970 } // namespace bolt 971 972 // GraphTraits specializations for basic block graphs (CFGs) 973 template <> struct GraphTraits<bolt::BinaryBasicBlock *> { 974 using NodeRef = bolt::BinaryBasicBlock *; 975 using ChildIteratorType = bolt::BinaryBasicBlock::succ_iterator; 976 977 static NodeRef getEntryNode(bolt::BinaryBasicBlock *BB) { return BB; } 978 static inline ChildIteratorType child_begin(NodeRef N) { 979 return N->succ_begin(); 980 } 981 static inline ChildIteratorType child_end(NodeRef N) { return N->succ_end(); } 982 }; 983 984 template <> struct GraphTraits<const bolt::BinaryBasicBlock *> { 985 using NodeRef = const bolt::BinaryBasicBlock *; 986 using ChildIteratorType = bolt::BinaryBasicBlock::const_succ_iterator; 987 988 static NodeRef getEntryNode(const bolt::BinaryBasicBlock *BB) { return BB; } 989 static inline ChildIteratorType child_begin(NodeRef N) { 990 return N->succ_begin(); 991 } 992 static inline ChildIteratorType child_end(NodeRef N) { return N->succ_end(); } 993 }; 994 995 template <> struct GraphTraits<Inverse<bolt::BinaryBasicBlock *>> { 996 using NodeRef = bolt::BinaryBasicBlock *; 997 using ChildIteratorType = bolt::BinaryBasicBlock::pred_iterator; 998 static NodeRef getEntryNode(Inverse<bolt::BinaryBasicBlock *> G) { 999 return G.Graph; 1000 } 1001 static inline ChildIteratorType child_begin(NodeRef N) { 1002 return N->pred_begin(); 1003 } 1004 static inline ChildIteratorType child_end(NodeRef N) { return N->pred_end(); } 1005 }; 1006 1007 template <> struct GraphTraits<Inverse<const bolt::BinaryBasicBlock *>> { 1008 using NodeRef = const bolt::BinaryBasicBlock *; 1009 using ChildIteratorType = bolt::BinaryBasicBlock::const_pred_iterator; 1010 static NodeRef getEntryNode(Inverse<const bolt::BinaryBasicBlock *> G) { 1011 return G.Graph; 1012 } 1013 static inline ChildIteratorType child_begin(NodeRef N) { 1014 return N->pred_begin(); 1015 } 1016 static inline ChildIteratorType child_end(NodeRef N) { return N->pred_end(); } 1017 }; 1018 1019 } // namespace llvm 1020 1021 #endif 1022