xref: /openbsd-src/gnu/llvm/llvm/lib/CodeGen/MachineBlockPlacement.cpp (revision d415bd752c734aee168c4ee86ff32e8cc249eb16)
109467b48Spatrick //===- MachineBlockPlacement.cpp - Basic Block Code Layout optimization ---===//
209467b48Spatrick //
309467b48Spatrick // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
409467b48Spatrick // See https://llvm.org/LICENSE.txt for license information.
509467b48Spatrick // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
609467b48Spatrick //
709467b48Spatrick //===----------------------------------------------------------------------===//
809467b48Spatrick //
909467b48Spatrick // This file implements basic block placement transformations using the CFG
1009467b48Spatrick // structure and branch probability estimates.
1109467b48Spatrick //
1209467b48Spatrick // The pass strives to preserve the structure of the CFG (that is, retain
1309467b48Spatrick // a topological ordering of basic blocks) in the absence of a *strong* signal
1409467b48Spatrick // to the contrary from probabilities. However, within the CFG structure, it
1509467b48Spatrick // attempts to choose an ordering which favors placing more likely sequences of
1609467b48Spatrick // blocks adjacent to each other.
1709467b48Spatrick //
1809467b48Spatrick // The algorithm works from the inner-most loop within a function outward, and
1909467b48Spatrick // at each stage walks through the basic blocks, trying to coalesce them into
2009467b48Spatrick // sequential chains where allowed by the CFG (or demanded by heavy
2109467b48Spatrick // probabilities). Finally, it walks the blocks in topological order, and the
2209467b48Spatrick // first time it reaches a chain of basic blocks, it schedules them in the
2309467b48Spatrick // function in-order.
2409467b48Spatrick //
2509467b48Spatrick //===----------------------------------------------------------------------===//
2609467b48Spatrick 
2709467b48Spatrick #include "BranchFolding.h"
2809467b48Spatrick #include "llvm/ADT/ArrayRef.h"
2909467b48Spatrick #include "llvm/ADT/DenseMap.h"
3009467b48Spatrick #include "llvm/ADT/STLExtras.h"
3109467b48Spatrick #include "llvm/ADT/SetVector.h"
3209467b48Spatrick #include "llvm/ADT/SmallPtrSet.h"
3309467b48Spatrick #include "llvm/ADT/SmallVector.h"
3409467b48Spatrick #include "llvm/ADT/Statistic.h"
3509467b48Spatrick #include "llvm/Analysis/BlockFrequencyInfoImpl.h"
3609467b48Spatrick #include "llvm/Analysis/ProfileSummaryInfo.h"
37*d415bd75Srobert #include "llvm/CodeGen/MBFIWrapper.h"
3809467b48Spatrick #include "llvm/CodeGen/MachineBasicBlock.h"
3909467b48Spatrick #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
4009467b48Spatrick #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
4109467b48Spatrick #include "llvm/CodeGen/MachineFunction.h"
4209467b48Spatrick #include "llvm/CodeGen/MachineFunctionPass.h"
4309467b48Spatrick #include "llvm/CodeGen/MachineLoopInfo.h"
4409467b48Spatrick #include "llvm/CodeGen/MachinePostDominators.h"
4509467b48Spatrick #include "llvm/CodeGen/MachineSizeOpts.h"
4609467b48Spatrick #include "llvm/CodeGen/TailDuplicator.h"
4709467b48Spatrick #include "llvm/CodeGen/TargetInstrInfo.h"
4809467b48Spatrick #include "llvm/CodeGen/TargetLowering.h"
4909467b48Spatrick #include "llvm/CodeGen/TargetPassConfig.h"
5009467b48Spatrick #include "llvm/CodeGen/TargetSubtargetInfo.h"
5109467b48Spatrick #include "llvm/IR/DebugLoc.h"
5209467b48Spatrick #include "llvm/IR/Function.h"
53*d415bd75Srobert #include "llvm/IR/PrintPasses.h"
5409467b48Spatrick #include "llvm/InitializePasses.h"
5509467b48Spatrick #include "llvm/Pass.h"
5609467b48Spatrick #include "llvm/Support/Allocator.h"
5709467b48Spatrick #include "llvm/Support/BlockFrequency.h"
5809467b48Spatrick #include "llvm/Support/BranchProbability.h"
5909467b48Spatrick #include "llvm/Support/CodeGen.h"
6009467b48Spatrick #include "llvm/Support/CommandLine.h"
6109467b48Spatrick #include "llvm/Support/Compiler.h"
6209467b48Spatrick #include "llvm/Support/Debug.h"
6309467b48Spatrick #include "llvm/Support/raw_ostream.h"
6409467b48Spatrick #include "llvm/Target/TargetMachine.h"
65*d415bd75Srobert #include "llvm/Transforms/Utils/CodeLayout.h"
6609467b48Spatrick #include <algorithm>
6709467b48Spatrick #include <cassert>
6809467b48Spatrick #include <cstdint>
6909467b48Spatrick #include <iterator>
7009467b48Spatrick #include <memory>
7109467b48Spatrick #include <string>
7209467b48Spatrick #include <tuple>
7309467b48Spatrick #include <utility>
7409467b48Spatrick #include <vector>
7509467b48Spatrick 
7609467b48Spatrick using namespace llvm;
7709467b48Spatrick 
7809467b48Spatrick #define DEBUG_TYPE "block-placement"
7909467b48Spatrick 
8009467b48Spatrick STATISTIC(NumCondBranches, "Number of conditional branches");
8109467b48Spatrick STATISTIC(NumUncondBranches, "Number of unconditional branches");
8209467b48Spatrick STATISTIC(CondBranchTakenFreq,
8309467b48Spatrick           "Potential frequency of taking conditional branches");
8409467b48Spatrick STATISTIC(UncondBranchTakenFreq,
8509467b48Spatrick           "Potential frequency of taking unconditional branches");
8609467b48Spatrick 
8709467b48Spatrick static cl::opt<unsigned> AlignAllBlock(
8809467b48Spatrick     "align-all-blocks",
8909467b48Spatrick     cl::desc("Force the alignment of all blocks in the function in log2 format "
9009467b48Spatrick              "(e.g 4 means align on 16B boundaries)."),
9109467b48Spatrick     cl::init(0), cl::Hidden);
9209467b48Spatrick 
9309467b48Spatrick static cl::opt<unsigned> AlignAllNonFallThruBlocks(
9409467b48Spatrick     "align-all-nofallthru-blocks",
9509467b48Spatrick     cl::desc("Force the alignment of all blocks that have no fall-through "
9609467b48Spatrick              "predecessors (i.e. don't add nops that are executed). In log2 "
9709467b48Spatrick              "format (e.g 4 means align on 16B boundaries)."),
9809467b48Spatrick     cl::init(0), cl::Hidden);
9909467b48Spatrick 
100*d415bd75Srobert static cl::opt<unsigned> MaxBytesForAlignmentOverride(
101*d415bd75Srobert     "max-bytes-for-alignment",
102*d415bd75Srobert     cl::desc("Forces the maximum bytes allowed to be emitted when padding for "
103*d415bd75Srobert              "alignment"),
104*d415bd75Srobert     cl::init(0), cl::Hidden);
105*d415bd75Srobert 
10609467b48Spatrick // FIXME: Find a good default for this flag and remove the flag.
10709467b48Spatrick static cl::opt<unsigned> ExitBlockBias(
10809467b48Spatrick     "block-placement-exit-block-bias",
10909467b48Spatrick     cl::desc("Block frequency percentage a loop exit block needs "
11009467b48Spatrick              "over the original exit to be considered the new exit."),
11109467b48Spatrick     cl::init(0), cl::Hidden);
11209467b48Spatrick 
11309467b48Spatrick // Definition:
11409467b48Spatrick // - Outlining: placement of a basic block outside the chain or hot path.
11509467b48Spatrick 
11609467b48Spatrick static cl::opt<unsigned> LoopToColdBlockRatio(
11709467b48Spatrick     "loop-to-cold-block-ratio",
11809467b48Spatrick     cl::desc("Outline loop blocks from loop chain if (frequency of loop) / "
11909467b48Spatrick              "(frequency of block) is greater than this ratio"),
12009467b48Spatrick     cl::init(5), cl::Hidden);
12109467b48Spatrick 
12209467b48Spatrick static cl::opt<bool> ForceLoopColdBlock(
12309467b48Spatrick     "force-loop-cold-block",
12409467b48Spatrick     cl::desc("Force outlining cold blocks from loops."),
12509467b48Spatrick     cl::init(false), cl::Hidden);
12609467b48Spatrick 
12709467b48Spatrick static cl::opt<bool>
12809467b48Spatrick     PreciseRotationCost("precise-rotation-cost",
12909467b48Spatrick                         cl::desc("Model the cost of loop rotation more "
13009467b48Spatrick                                  "precisely by using profile data."),
13109467b48Spatrick                         cl::init(false), cl::Hidden);
13209467b48Spatrick 
13309467b48Spatrick static cl::opt<bool>
13409467b48Spatrick     ForcePreciseRotationCost("force-precise-rotation-cost",
13509467b48Spatrick                              cl::desc("Force the use of precise cost "
13609467b48Spatrick                                       "loop rotation strategy."),
13709467b48Spatrick                              cl::init(false), cl::Hidden);
13809467b48Spatrick 
13909467b48Spatrick static cl::opt<unsigned> MisfetchCost(
14009467b48Spatrick     "misfetch-cost",
14109467b48Spatrick     cl::desc("Cost that models the probabilistic risk of an instruction "
14209467b48Spatrick              "misfetch due to a jump comparing to falling through, whose cost "
14309467b48Spatrick              "is zero."),
14409467b48Spatrick     cl::init(1), cl::Hidden);
14509467b48Spatrick 
14609467b48Spatrick static cl::opt<unsigned> JumpInstCost("jump-inst-cost",
14709467b48Spatrick                                       cl::desc("Cost of jump instructions."),
14809467b48Spatrick                                       cl::init(1), cl::Hidden);
14909467b48Spatrick static cl::opt<bool>
15009467b48Spatrick TailDupPlacement("tail-dup-placement",
15109467b48Spatrick               cl::desc("Perform tail duplication during placement. "
15209467b48Spatrick                        "Creates more fallthrough opportunites in "
15309467b48Spatrick                        "outline branches."),
15409467b48Spatrick               cl::init(true), cl::Hidden);
15509467b48Spatrick 
15609467b48Spatrick static cl::opt<bool>
15709467b48Spatrick BranchFoldPlacement("branch-fold-placement",
15809467b48Spatrick               cl::desc("Perform branch folding during placement. "
15909467b48Spatrick                        "Reduces code size."),
16009467b48Spatrick               cl::init(true), cl::Hidden);
16109467b48Spatrick 
16209467b48Spatrick // Heuristic for tail duplication.
16309467b48Spatrick static cl::opt<unsigned> TailDupPlacementThreshold(
16409467b48Spatrick     "tail-dup-placement-threshold",
16509467b48Spatrick     cl::desc("Instruction cutoff for tail duplication during layout. "
16609467b48Spatrick              "Tail merging during layout is forced to have a threshold "
16709467b48Spatrick              "that won't conflict."), cl::init(2),
16809467b48Spatrick     cl::Hidden);
16909467b48Spatrick 
17009467b48Spatrick // Heuristic for aggressive tail duplication.
17109467b48Spatrick static cl::opt<unsigned> TailDupPlacementAggressiveThreshold(
17209467b48Spatrick     "tail-dup-placement-aggressive-threshold",
17309467b48Spatrick     cl::desc("Instruction cutoff for aggressive tail duplication during "
17409467b48Spatrick              "layout. Used at -O3. Tail merging during layout is forced to "
17509467b48Spatrick              "have a threshold that won't conflict."), cl::init(4),
17609467b48Spatrick     cl::Hidden);
17709467b48Spatrick 
17809467b48Spatrick // Heuristic for tail duplication.
17909467b48Spatrick static cl::opt<unsigned> TailDupPlacementPenalty(
18009467b48Spatrick     "tail-dup-placement-penalty",
18109467b48Spatrick     cl::desc("Cost penalty for blocks that can avoid breaking CFG by copying. "
18209467b48Spatrick              "Copying can increase fallthrough, but it also increases icache "
18309467b48Spatrick              "pressure. This parameter controls the penalty to account for that. "
18409467b48Spatrick              "Percent as integer."),
18509467b48Spatrick     cl::init(2),
18609467b48Spatrick     cl::Hidden);
18709467b48Spatrick 
18873471bf0Spatrick // Heuristic for tail duplication if profile count is used in cost model.
18973471bf0Spatrick static cl::opt<unsigned> TailDupProfilePercentThreshold(
19073471bf0Spatrick     "tail-dup-profile-percent-threshold",
19173471bf0Spatrick     cl::desc("If profile count information is used in tail duplication cost "
19273471bf0Spatrick              "model, the gained fall through number from tail duplication "
19373471bf0Spatrick              "should be at least this percent of hot count."),
19473471bf0Spatrick     cl::init(50), cl::Hidden);
19573471bf0Spatrick 
19609467b48Spatrick // Heuristic for triangle chains.
19709467b48Spatrick static cl::opt<unsigned> TriangleChainCount(
19809467b48Spatrick     "triangle-chain-count",
19909467b48Spatrick     cl::desc("Number of triangle-shaped-CFG's that need to be in a row for the "
20009467b48Spatrick              "triangle tail duplication heuristic to kick in. 0 to disable."),
20109467b48Spatrick     cl::init(2),
20209467b48Spatrick     cl::Hidden);
20309467b48Spatrick 
204*d415bd75Srobert // Use case: When block layout is visualized after MBP pass, the basic blocks
205*d415bd75Srobert // are labeled in layout order; meanwhile blocks could be numbered in a
206*d415bd75Srobert // different order. It's hard to map between the graph and pass output.
207*d415bd75Srobert // With this option on, the basic blocks are renumbered in function layout
208*d415bd75Srobert // order. For debugging only.
209*d415bd75Srobert static cl::opt<bool> RenumberBlocksBeforeView(
210*d415bd75Srobert     "renumber-blocks-before-view",
211*d415bd75Srobert     cl::desc(
212*d415bd75Srobert         "If true, basic blocks are re-numbered before MBP layout is printed "
213*d415bd75Srobert         "into a dot graph. Only used when a function is being printed."),
214*d415bd75Srobert     cl::init(false), cl::Hidden);
215*d415bd75Srobert 
216*d415bd75Srobert extern cl::opt<bool> EnableExtTspBlockPlacement;
217*d415bd75Srobert extern cl::opt<bool> ApplyExtTspWithoutProfile;
218*d415bd75Srobert 
21973471bf0Spatrick namespace llvm {
22009467b48Spatrick extern cl::opt<unsigned> StaticLikelyProb;
22109467b48Spatrick extern cl::opt<unsigned> ProfileLikelyProb;
22209467b48Spatrick 
22309467b48Spatrick // Internal option used to control BFI display only after MBP pass.
22409467b48Spatrick // Defined in CodeGen/MachineBlockFrequencyInfo.cpp:
22509467b48Spatrick // -view-block-layout-with-bfi=
22609467b48Spatrick extern cl::opt<GVDAGType> ViewBlockLayoutWithBFI;
22709467b48Spatrick 
22809467b48Spatrick // Command line option to specify the name of the function for CFG dump
22909467b48Spatrick // Defined in Analysis/BlockFrequencyInfo.cpp:  -view-bfi-func-name=
23009467b48Spatrick extern cl::opt<std::string> ViewBlockFreqFuncName;
23173471bf0Spatrick } // namespace llvm
23209467b48Spatrick 
23309467b48Spatrick namespace {
23409467b48Spatrick 
23509467b48Spatrick class BlockChain;
23609467b48Spatrick 
23709467b48Spatrick /// Type for our function-wide basic block -> block chain mapping.
23809467b48Spatrick using BlockToChainMapType = DenseMap<const MachineBasicBlock *, BlockChain *>;
23909467b48Spatrick 
24009467b48Spatrick /// A chain of blocks which will be laid out contiguously.
24109467b48Spatrick ///
24209467b48Spatrick /// This is the datastructure representing a chain of consecutive blocks that
24309467b48Spatrick /// are profitable to layout together in order to maximize fallthrough
24409467b48Spatrick /// probabilities and code locality. We also can use a block chain to represent
24509467b48Spatrick /// a sequence of basic blocks which have some external (correctness)
24609467b48Spatrick /// requirement for sequential layout.
24709467b48Spatrick ///
24809467b48Spatrick /// Chains can be built around a single basic block and can be merged to grow
24909467b48Spatrick /// them. They participate in a block-to-chain mapping, which is updated
25009467b48Spatrick /// automatically as chains are merged together.
25109467b48Spatrick class BlockChain {
25209467b48Spatrick   /// The sequence of blocks belonging to this chain.
25309467b48Spatrick   ///
25409467b48Spatrick   /// This is the sequence of blocks for a particular chain. These will be laid
25509467b48Spatrick   /// out in-order within the function.
25609467b48Spatrick   SmallVector<MachineBasicBlock *, 4> Blocks;
25709467b48Spatrick 
25809467b48Spatrick   /// A handle to the function-wide basic block to block chain mapping.
25909467b48Spatrick   ///
26009467b48Spatrick   /// This is retained in each block chain to simplify the computation of child
26109467b48Spatrick   /// block chains for SCC-formation and iteration. We store the edges to child
26209467b48Spatrick   /// basic blocks, and map them back to their associated chains using this
26309467b48Spatrick   /// structure.
26409467b48Spatrick   BlockToChainMapType &BlockToChain;
26509467b48Spatrick 
26609467b48Spatrick public:
26709467b48Spatrick   /// Construct a new BlockChain.
26809467b48Spatrick   ///
26909467b48Spatrick   /// This builds a new block chain representing a single basic block in the
27009467b48Spatrick   /// function. It also registers itself as the chain that block participates
27109467b48Spatrick   /// in with the BlockToChain mapping.
BlockChain(BlockToChainMapType & BlockToChain,MachineBasicBlock * BB)27209467b48Spatrick   BlockChain(BlockToChainMapType &BlockToChain, MachineBasicBlock *BB)
27309467b48Spatrick       : Blocks(1, BB), BlockToChain(BlockToChain) {
27409467b48Spatrick     assert(BB && "Cannot create a chain with a null basic block");
27509467b48Spatrick     BlockToChain[BB] = this;
27609467b48Spatrick   }
27709467b48Spatrick 
27809467b48Spatrick   /// Iterator over blocks within the chain.
27909467b48Spatrick   using iterator = SmallVectorImpl<MachineBasicBlock *>::iterator;
28009467b48Spatrick   using const_iterator = SmallVectorImpl<MachineBasicBlock *>::const_iterator;
28109467b48Spatrick 
28209467b48Spatrick   /// Beginning of blocks within the chain.
begin()28309467b48Spatrick   iterator begin() { return Blocks.begin(); }
begin() const28409467b48Spatrick   const_iterator begin() const { return Blocks.begin(); }
28509467b48Spatrick 
28609467b48Spatrick   /// End of blocks within the chain.
end()28709467b48Spatrick   iterator end() { return Blocks.end(); }
end() const28809467b48Spatrick   const_iterator end() const { return Blocks.end(); }
28909467b48Spatrick 
remove(MachineBasicBlock * BB)29009467b48Spatrick   bool remove(MachineBasicBlock* BB) {
29109467b48Spatrick     for(iterator i = begin(); i != end(); ++i) {
29209467b48Spatrick       if (*i == BB) {
29309467b48Spatrick         Blocks.erase(i);
29409467b48Spatrick         return true;
29509467b48Spatrick       }
29609467b48Spatrick     }
29709467b48Spatrick     return false;
29809467b48Spatrick   }
29909467b48Spatrick 
30009467b48Spatrick   /// Merge a block chain into this one.
30109467b48Spatrick   ///
30209467b48Spatrick   /// This routine merges a block chain into this one. It takes care of forming
30309467b48Spatrick   /// a contiguous sequence of basic blocks, updating the edge list, and
30409467b48Spatrick   /// updating the block -> chain mapping. It does not free or tear down the
30509467b48Spatrick   /// old chain, but the old chain's block list is no longer valid.
merge(MachineBasicBlock * BB,BlockChain * Chain)30609467b48Spatrick   void merge(MachineBasicBlock *BB, BlockChain *Chain) {
30709467b48Spatrick     assert(BB && "Can't merge a null block.");
30809467b48Spatrick     assert(!Blocks.empty() && "Can't merge into an empty chain.");
30909467b48Spatrick 
31009467b48Spatrick     // Fast path in case we don't have a chain already.
31109467b48Spatrick     if (!Chain) {
31209467b48Spatrick       assert(!BlockToChain[BB] &&
31309467b48Spatrick              "Passed chain is null, but BB has entry in BlockToChain.");
31409467b48Spatrick       Blocks.push_back(BB);
31509467b48Spatrick       BlockToChain[BB] = this;
31609467b48Spatrick       return;
31709467b48Spatrick     }
31809467b48Spatrick 
31909467b48Spatrick     assert(BB == *Chain->begin() && "Passed BB is not head of Chain.");
32009467b48Spatrick     assert(Chain->begin() != Chain->end());
32109467b48Spatrick 
32209467b48Spatrick     // Update the incoming blocks to point to this chain, and add them to the
32309467b48Spatrick     // chain structure.
32409467b48Spatrick     for (MachineBasicBlock *ChainBB : *Chain) {
32509467b48Spatrick       Blocks.push_back(ChainBB);
32609467b48Spatrick       assert(BlockToChain[ChainBB] == Chain && "Incoming blocks not in chain.");
32709467b48Spatrick       BlockToChain[ChainBB] = this;
32809467b48Spatrick     }
32909467b48Spatrick   }
33009467b48Spatrick 
33109467b48Spatrick #ifndef NDEBUG
33209467b48Spatrick   /// Dump the blocks in this chain.
dump()33309467b48Spatrick   LLVM_DUMP_METHOD void dump() {
33409467b48Spatrick     for (MachineBasicBlock *MBB : *this)
33509467b48Spatrick       MBB->dump();
33609467b48Spatrick   }
33709467b48Spatrick #endif // NDEBUG
33809467b48Spatrick 
33909467b48Spatrick   /// Count of predecessors of any block within the chain which have not
34009467b48Spatrick   /// yet been scheduled.  In general, we will delay scheduling this chain
34109467b48Spatrick   /// until those predecessors are scheduled (or we find a sufficiently good
34209467b48Spatrick   /// reason to override this heuristic.)  Note that when forming loop chains,
34309467b48Spatrick   /// blocks outside the loop are ignored and treated as if they were already
34409467b48Spatrick   /// scheduled.
34509467b48Spatrick   ///
34609467b48Spatrick   /// Note: This field is reinitialized multiple times - once for each loop,
34709467b48Spatrick   /// and then once for the function as a whole.
34809467b48Spatrick   unsigned UnscheduledPredecessors = 0;
34909467b48Spatrick };
35009467b48Spatrick 
35109467b48Spatrick class MachineBlockPlacement : public MachineFunctionPass {
35209467b48Spatrick   /// A type for a block filter set.
35309467b48Spatrick   using BlockFilterSet = SmallSetVector<const MachineBasicBlock *, 16>;
35409467b48Spatrick 
35509467b48Spatrick   /// Pair struct containing basic block and taildup profitability
35609467b48Spatrick   struct BlockAndTailDupResult {
35709467b48Spatrick     MachineBasicBlock *BB;
35809467b48Spatrick     bool ShouldTailDup;
35909467b48Spatrick   };
36009467b48Spatrick 
36109467b48Spatrick   /// Triple struct containing edge weight and the edge.
36209467b48Spatrick   struct WeightedEdge {
36309467b48Spatrick     BlockFrequency Weight;
36409467b48Spatrick     MachineBasicBlock *Src;
36509467b48Spatrick     MachineBasicBlock *Dest;
36609467b48Spatrick   };
36709467b48Spatrick 
36809467b48Spatrick   /// work lists of blocks that are ready to be laid out
36909467b48Spatrick   SmallVector<MachineBasicBlock *, 16> BlockWorkList;
37009467b48Spatrick   SmallVector<MachineBasicBlock *, 16> EHPadWorkList;
37109467b48Spatrick 
37209467b48Spatrick   /// Edges that have already been computed as optimal.
37309467b48Spatrick   DenseMap<const MachineBasicBlock *, BlockAndTailDupResult> ComputedEdges;
37409467b48Spatrick 
37509467b48Spatrick   /// Machine Function
37609467b48Spatrick   MachineFunction *F;
37709467b48Spatrick 
37809467b48Spatrick   /// A handle to the branch probability pass.
37909467b48Spatrick   const MachineBranchProbabilityInfo *MBPI;
38009467b48Spatrick 
38109467b48Spatrick   /// A handle to the function-wide block frequency pass.
382097a140dSpatrick   std::unique_ptr<MBFIWrapper> MBFI;
38309467b48Spatrick 
38409467b48Spatrick   /// A handle to the loop info.
38509467b48Spatrick   MachineLoopInfo *MLI;
38609467b48Spatrick 
38709467b48Spatrick   /// Preferred loop exit.
38809467b48Spatrick   /// Member variable for convenience. It may be removed by duplication deep
38909467b48Spatrick   /// in the call stack.
39009467b48Spatrick   MachineBasicBlock *PreferredLoopExit;
39109467b48Spatrick 
39209467b48Spatrick   /// A handle to the target's instruction info.
39309467b48Spatrick   const TargetInstrInfo *TII;
39409467b48Spatrick 
39509467b48Spatrick   /// A handle to the target's lowering info.
39609467b48Spatrick   const TargetLoweringBase *TLI;
39709467b48Spatrick 
39809467b48Spatrick   /// A handle to the post dominator tree.
39909467b48Spatrick   MachinePostDominatorTree *MPDT;
40009467b48Spatrick 
40109467b48Spatrick   ProfileSummaryInfo *PSI;
40209467b48Spatrick 
40309467b48Spatrick   /// Duplicator used to duplicate tails during placement.
40409467b48Spatrick   ///
40509467b48Spatrick   /// Placement decisions can open up new tail duplication opportunities, but
40609467b48Spatrick   /// since tail duplication affects placement decisions of later blocks, it
40709467b48Spatrick   /// must be done inline.
40809467b48Spatrick   TailDuplicator TailDup;
40909467b48Spatrick 
410097a140dSpatrick   /// Partial tail duplication threshold.
411097a140dSpatrick   BlockFrequency DupThreshold;
412097a140dSpatrick 
41373471bf0Spatrick   /// True:  use block profile count to compute tail duplication cost.
41473471bf0Spatrick   /// False: use block frequency to compute tail duplication cost.
41573471bf0Spatrick   bool UseProfileCount;
41673471bf0Spatrick 
41709467b48Spatrick   /// Allocator and owner of BlockChain structures.
41809467b48Spatrick   ///
41909467b48Spatrick   /// We build BlockChains lazily while processing the loop structure of
42009467b48Spatrick   /// a function. To reduce malloc traffic, we allocate them using this
42109467b48Spatrick   /// slab-like allocator, and destroy them after the pass completes. An
42209467b48Spatrick   /// important guarantee is that this allocator produces stable pointers to
42309467b48Spatrick   /// the chains.
42409467b48Spatrick   SpecificBumpPtrAllocator<BlockChain> ChainAllocator;
42509467b48Spatrick 
42609467b48Spatrick   /// Function wide BasicBlock to BlockChain mapping.
42709467b48Spatrick   ///
42809467b48Spatrick   /// This mapping allows efficiently moving from any given basic block to the
42909467b48Spatrick   /// BlockChain it participates in, if any. We use it to, among other things,
43009467b48Spatrick   /// allow implicitly defining edges between chains as the existing edges
43109467b48Spatrick   /// between basic blocks.
43209467b48Spatrick   DenseMap<const MachineBasicBlock *, BlockChain *> BlockToChain;
43309467b48Spatrick 
43409467b48Spatrick #ifndef NDEBUG
43509467b48Spatrick   /// The set of basic blocks that have terminators that cannot be fully
43609467b48Spatrick   /// analyzed.  These basic blocks cannot be re-ordered safely by
43709467b48Spatrick   /// MachineBlockPlacement, and we must preserve physical layout of these
43809467b48Spatrick   /// blocks and their successors through the pass.
43909467b48Spatrick   SmallPtrSet<MachineBasicBlock *, 4> BlocksWithUnanalyzableExits;
44009467b48Spatrick #endif
44109467b48Spatrick 
44273471bf0Spatrick   /// Get block profile count or frequency according to UseProfileCount.
44373471bf0Spatrick   /// The return value is used to model tail duplication cost.
getBlockCountOrFrequency(const MachineBasicBlock * BB)44473471bf0Spatrick   BlockFrequency getBlockCountOrFrequency(const MachineBasicBlock *BB) {
44573471bf0Spatrick     if (UseProfileCount) {
44673471bf0Spatrick       auto Count = MBFI->getBlockProfileCount(BB);
44773471bf0Spatrick       if (Count)
44873471bf0Spatrick         return *Count;
44973471bf0Spatrick       else
45073471bf0Spatrick         return 0;
45173471bf0Spatrick     } else
45273471bf0Spatrick       return MBFI->getBlockFreq(BB);
45373471bf0Spatrick   }
45473471bf0Spatrick 
455097a140dSpatrick   /// Scale the DupThreshold according to basic block size.
456097a140dSpatrick   BlockFrequency scaleThreshold(MachineBasicBlock *BB);
457097a140dSpatrick   void initDupThreshold();
458097a140dSpatrick 
45909467b48Spatrick   /// Decrease the UnscheduledPredecessors count for all blocks in chain, and
46009467b48Spatrick   /// if the count goes to 0, add them to the appropriate work list.
46109467b48Spatrick   void markChainSuccessors(
46209467b48Spatrick       const BlockChain &Chain, const MachineBasicBlock *LoopHeaderBB,
46309467b48Spatrick       const BlockFilterSet *BlockFilter = nullptr);
46409467b48Spatrick 
46509467b48Spatrick   /// Decrease the UnscheduledPredecessors count for a single block, and
46609467b48Spatrick   /// if the count goes to 0, add them to the appropriate work list.
46709467b48Spatrick   void markBlockSuccessors(
46809467b48Spatrick       const BlockChain &Chain, const MachineBasicBlock *BB,
46909467b48Spatrick       const MachineBasicBlock *LoopHeaderBB,
47009467b48Spatrick       const BlockFilterSet *BlockFilter = nullptr);
47109467b48Spatrick 
47209467b48Spatrick   BranchProbability
47309467b48Spatrick   collectViableSuccessors(
47409467b48Spatrick       const MachineBasicBlock *BB, const BlockChain &Chain,
47509467b48Spatrick       const BlockFilterSet *BlockFilter,
47609467b48Spatrick       SmallVector<MachineBasicBlock *, 4> &Successors);
477097a140dSpatrick   bool isBestSuccessor(MachineBasicBlock *BB, MachineBasicBlock *Pred,
478097a140dSpatrick                        BlockFilterSet *BlockFilter);
479097a140dSpatrick   void findDuplicateCandidates(SmallVectorImpl<MachineBasicBlock *> &Candidates,
480097a140dSpatrick                                MachineBasicBlock *BB,
481097a140dSpatrick                                BlockFilterSet *BlockFilter);
48209467b48Spatrick   bool repeatedlyTailDuplicateBlock(
48309467b48Spatrick       MachineBasicBlock *BB, MachineBasicBlock *&LPred,
48409467b48Spatrick       const MachineBasicBlock *LoopHeaderBB,
48509467b48Spatrick       BlockChain &Chain, BlockFilterSet *BlockFilter,
48609467b48Spatrick       MachineFunction::iterator &PrevUnplacedBlockIt);
48709467b48Spatrick   bool maybeTailDuplicateBlock(
48809467b48Spatrick       MachineBasicBlock *BB, MachineBasicBlock *LPred,
48909467b48Spatrick       BlockChain &Chain, BlockFilterSet *BlockFilter,
49009467b48Spatrick       MachineFunction::iterator &PrevUnplacedBlockIt,
49109467b48Spatrick       bool &DuplicatedToLPred);
49209467b48Spatrick   bool hasBetterLayoutPredecessor(
49309467b48Spatrick       const MachineBasicBlock *BB, const MachineBasicBlock *Succ,
49409467b48Spatrick       const BlockChain &SuccChain, BranchProbability SuccProb,
49509467b48Spatrick       BranchProbability RealSuccProb, const BlockChain &Chain,
49609467b48Spatrick       const BlockFilterSet *BlockFilter);
49709467b48Spatrick   BlockAndTailDupResult selectBestSuccessor(
49809467b48Spatrick       const MachineBasicBlock *BB, const BlockChain &Chain,
49909467b48Spatrick       const BlockFilterSet *BlockFilter);
50009467b48Spatrick   MachineBasicBlock *selectBestCandidateBlock(
50109467b48Spatrick       const BlockChain &Chain, SmallVectorImpl<MachineBasicBlock *> &WorkList);
50209467b48Spatrick   MachineBasicBlock *getFirstUnplacedBlock(
50309467b48Spatrick       const BlockChain &PlacedChain,
50409467b48Spatrick       MachineFunction::iterator &PrevUnplacedBlockIt,
50509467b48Spatrick       const BlockFilterSet *BlockFilter);
50609467b48Spatrick 
50709467b48Spatrick   /// Add a basic block to the work list if it is appropriate.
50809467b48Spatrick   ///
50909467b48Spatrick   /// If the optional parameter BlockFilter is provided, only MBB
51009467b48Spatrick   /// present in the set will be added to the worklist. If nullptr
51109467b48Spatrick   /// is provided, no filtering occurs.
51209467b48Spatrick   void fillWorkLists(const MachineBasicBlock *MBB,
51309467b48Spatrick                      SmallPtrSetImpl<BlockChain *> &UpdatedPreds,
51409467b48Spatrick                      const BlockFilterSet *BlockFilter);
51509467b48Spatrick 
51609467b48Spatrick   void buildChain(const MachineBasicBlock *BB, BlockChain &Chain,
51709467b48Spatrick                   BlockFilterSet *BlockFilter = nullptr);
51809467b48Spatrick   bool canMoveBottomBlockToTop(const MachineBasicBlock *BottomBlock,
51909467b48Spatrick                                const MachineBasicBlock *OldTop);
52009467b48Spatrick   bool hasViableTopFallthrough(const MachineBasicBlock *Top,
52109467b48Spatrick                                const BlockFilterSet &LoopBlockSet);
52209467b48Spatrick   BlockFrequency TopFallThroughFreq(const MachineBasicBlock *Top,
52309467b48Spatrick                                     const BlockFilterSet &LoopBlockSet);
52409467b48Spatrick   BlockFrequency FallThroughGains(const MachineBasicBlock *NewTop,
52509467b48Spatrick                                   const MachineBasicBlock *OldTop,
52609467b48Spatrick                                   const MachineBasicBlock *ExitBB,
52709467b48Spatrick                                   const BlockFilterSet &LoopBlockSet);
52809467b48Spatrick   MachineBasicBlock *findBestLoopTopHelper(MachineBasicBlock *OldTop,
52909467b48Spatrick       const MachineLoop &L, const BlockFilterSet &LoopBlockSet);
53009467b48Spatrick   MachineBasicBlock *findBestLoopTop(
53109467b48Spatrick       const MachineLoop &L, const BlockFilterSet &LoopBlockSet);
53209467b48Spatrick   MachineBasicBlock *findBestLoopExit(
53309467b48Spatrick       const MachineLoop &L, const BlockFilterSet &LoopBlockSet,
53409467b48Spatrick       BlockFrequency &ExitFreq);
53509467b48Spatrick   BlockFilterSet collectLoopBlockSet(const MachineLoop &L);
53609467b48Spatrick   void buildLoopChains(const MachineLoop &L);
53709467b48Spatrick   void rotateLoop(
53809467b48Spatrick       BlockChain &LoopChain, const MachineBasicBlock *ExitingBB,
53909467b48Spatrick       BlockFrequency ExitFreq, const BlockFilterSet &LoopBlockSet);
54009467b48Spatrick   void rotateLoopWithProfile(
54109467b48Spatrick       BlockChain &LoopChain, const MachineLoop &L,
54209467b48Spatrick       const BlockFilterSet &LoopBlockSet);
54309467b48Spatrick   void buildCFGChains();
54409467b48Spatrick   void optimizeBranches();
54509467b48Spatrick   void alignBlocks();
54609467b48Spatrick   /// Returns true if a block should be tail-duplicated to increase fallthrough
54709467b48Spatrick   /// opportunities.
54809467b48Spatrick   bool shouldTailDuplicate(MachineBasicBlock *BB);
54909467b48Spatrick   /// Check the edge frequencies to see if tail duplication will increase
55009467b48Spatrick   /// fallthroughs.
55109467b48Spatrick   bool isProfitableToTailDup(
55209467b48Spatrick     const MachineBasicBlock *BB, const MachineBasicBlock *Succ,
55309467b48Spatrick     BranchProbability QProb,
55409467b48Spatrick     const BlockChain &Chain, const BlockFilterSet *BlockFilter);
55509467b48Spatrick 
55609467b48Spatrick   /// Check for a trellis layout.
55709467b48Spatrick   bool isTrellis(const MachineBasicBlock *BB,
55809467b48Spatrick                  const SmallVectorImpl<MachineBasicBlock *> &ViableSuccs,
55909467b48Spatrick                  const BlockChain &Chain, const BlockFilterSet *BlockFilter);
56009467b48Spatrick 
56109467b48Spatrick   /// Get the best successor given a trellis layout.
56209467b48Spatrick   BlockAndTailDupResult getBestTrellisSuccessor(
56309467b48Spatrick       const MachineBasicBlock *BB,
56409467b48Spatrick       const SmallVectorImpl<MachineBasicBlock *> &ViableSuccs,
56509467b48Spatrick       BranchProbability AdjustedSumProb, const BlockChain &Chain,
56609467b48Spatrick       const BlockFilterSet *BlockFilter);
56709467b48Spatrick 
56809467b48Spatrick   /// Get the best pair of non-conflicting edges.
56909467b48Spatrick   static std::pair<WeightedEdge, WeightedEdge> getBestNonConflictingEdges(
57009467b48Spatrick       const MachineBasicBlock *BB,
57109467b48Spatrick       MutableArrayRef<SmallVector<WeightedEdge, 8>> Edges);
57209467b48Spatrick 
57309467b48Spatrick   /// Returns true if a block can tail duplicate into all unplaced
57409467b48Spatrick   /// predecessors. Filters based on loop.
57509467b48Spatrick   bool canTailDuplicateUnplacedPreds(
57609467b48Spatrick       const MachineBasicBlock *BB, MachineBasicBlock *Succ,
57709467b48Spatrick       const BlockChain &Chain, const BlockFilterSet *BlockFilter);
57809467b48Spatrick 
57909467b48Spatrick   /// Find chains of triangles to tail-duplicate where a global analysis works,
58009467b48Spatrick   /// but a local analysis would not find them.
58109467b48Spatrick   void precomputeTriangleChains();
58209467b48Spatrick 
583*d415bd75Srobert   /// Apply a post-processing step optimizing block placement.
584*d415bd75Srobert   void applyExtTsp();
585*d415bd75Srobert 
586*d415bd75Srobert   /// Modify the existing block placement in the function and adjust all jumps.
587*d415bd75Srobert   void assignBlockOrder(const std::vector<const MachineBasicBlock *> &NewOrder);
588*d415bd75Srobert 
589*d415bd75Srobert   /// Create a single CFG chain from the current block order.
590*d415bd75Srobert   void createCFGChainExtTsp();
591*d415bd75Srobert 
59209467b48Spatrick public:
59309467b48Spatrick   static char ID; // Pass identification, replacement for typeid
59409467b48Spatrick 
MachineBlockPlacement()59509467b48Spatrick   MachineBlockPlacement() : MachineFunctionPass(ID) {
59609467b48Spatrick     initializeMachineBlockPlacementPass(*PassRegistry::getPassRegistry());
59709467b48Spatrick   }
59809467b48Spatrick 
59909467b48Spatrick   bool runOnMachineFunction(MachineFunction &F) override;
60009467b48Spatrick 
allowTailDupPlacement() const60109467b48Spatrick   bool allowTailDupPlacement() const {
60209467b48Spatrick     assert(F);
60309467b48Spatrick     return TailDupPlacement && !F->getTarget().requiresStructuredCFG();
60409467b48Spatrick   }
60509467b48Spatrick 
getAnalysisUsage(AnalysisUsage & AU) const60609467b48Spatrick   void getAnalysisUsage(AnalysisUsage &AU) const override {
60709467b48Spatrick     AU.addRequired<MachineBranchProbabilityInfo>();
60809467b48Spatrick     AU.addRequired<MachineBlockFrequencyInfo>();
60909467b48Spatrick     if (TailDupPlacement)
61009467b48Spatrick       AU.addRequired<MachinePostDominatorTree>();
61109467b48Spatrick     AU.addRequired<MachineLoopInfo>();
61209467b48Spatrick     AU.addRequired<ProfileSummaryInfoWrapperPass>();
61309467b48Spatrick     AU.addRequired<TargetPassConfig>();
61409467b48Spatrick     MachineFunctionPass::getAnalysisUsage(AU);
61509467b48Spatrick   }
61609467b48Spatrick };
61709467b48Spatrick 
61809467b48Spatrick } // end anonymous namespace
61909467b48Spatrick 
62009467b48Spatrick char MachineBlockPlacement::ID = 0;
62109467b48Spatrick 
62209467b48Spatrick char &llvm::MachineBlockPlacementID = MachineBlockPlacement::ID;
62309467b48Spatrick 
62409467b48Spatrick INITIALIZE_PASS_BEGIN(MachineBlockPlacement, DEBUG_TYPE,
62509467b48Spatrick                       "Branch Probability Basic Block Placement", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)62609467b48Spatrick INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
62709467b48Spatrick INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
62809467b48Spatrick INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
62909467b48Spatrick INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
63009467b48Spatrick INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
63109467b48Spatrick INITIALIZE_PASS_END(MachineBlockPlacement, DEBUG_TYPE,
63209467b48Spatrick                     "Branch Probability Basic Block Placement", false, false)
63309467b48Spatrick 
63409467b48Spatrick #ifndef NDEBUG
63509467b48Spatrick /// Helper to print the name of a MBB.
63609467b48Spatrick ///
63709467b48Spatrick /// Only used by debug logging.
63809467b48Spatrick static std::string getBlockName(const MachineBasicBlock *BB) {
63909467b48Spatrick   std::string Result;
64009467b48Spatrick   raw_string_ostream OS(Result);
64109467b48Spatrick   OS << printMBBReference(*BB);
64209467b48Spatrick   OS << " ('" << BB->getName() << "')";
64309467b48Spatrick   OS.flush();
64409467b48Spatrick   return Result;
64509467b48Spatrick }
64609467b48Spatrick #endif
64709467b48Spatrick 
64809467b48Spatrick /// Mark a chain's successors as having one fewer preds.
64909467b48Spatrick ///
65009467b48Spatrick /// When a chain is being merged into the "placed" chain, this routine will
65109467b48Spatrick /// quickly walk the successors of each block in the chain and mark them as
65209467b48Spatrick /// having one fewer active predecessor. It also adds any successors of this
65309467b48Spatrick /// chain which reach the zero-predecessor state to the appropriate worklist.
markChainSuccessors(const BlockChain & Chain,const MachineBasicBlock * LoopHeaderBB,const BlockFilterSet * BlockFilter)65409467b48Spatrick void MachineBlockPlacement::markChainSuccessors(
65509467b48Spatrick     const BlockChain &Chain, const MachineBasicBlock *LoopHeaderBB,
65609467b48Spatrick     const BlockFilterSet *BlockFilter) {
65709467b48Spatrick   // Walk all the blocks in this chain, marking their successors as having
65809467b48Spatrick   // a predecessor placed.
65909467b48Spatrick   for (MachineBasicBlock *MBB : Chain) {
66009467b48Spatrick     markBlockSuccessors(Chain, MBB, LoopHeaderBB, BlockFilter);
66109467b48Spatrick   }
66209467b48Spatrick }
66309467b48Spatrick 
66409467b48Spatrick /// Mark a single block's successors as having one fewer preds.
66509467b48Spatrick ///
66609467b48Spatrick /// Under normal circumstances, this is only called by markChainSuccessors,
66709467b48Spatrick /// but if a block that was to be placed is completely tail-duplicated away,
66809467b48Spatrick /// and was duplicated into the chain end, we need to redo markBlockSuccessors
66909467b48Spatrick /// for just that block.
markBlockSuccessors(const BlockChain & Chain,const MachineBasicBlock * MBB,const MachineBasicBlock * LoopHeaderBB,const BlockFilterSet * BlockFilter)67009467b48Spatrick void MachineBlockPlacement::markBlockSuccessors(
67109467b48Spatrick     const BlockChain &Chain, const MachineBasicBlock *MBB,
67209467b48Spatrick     const MachineBasicBlock *LoopHeaderBB, const BlockFilterSet *BlockFilter) {
67309467b48Spatrick   // Add any successors for which this is the only un-placed in-loop
67409467b48Spatrick   // predecessor to the worklist as a viable candidate for CFG-neutral
67509467b48Spatrick   // placement. No subsequent placement of this block will violate the CFG
67609467b48Spatrick   // shape, so we get to use heuristics to choose a favorable placement.
67709467b48Spatrick   for (MachineBasicBlock *Succ : MBB->successors()) {
67809467b48Spatrick     if (BlockFilter && !BlockFilter->count(Succ))
67909467b48Spatrick       continue;
68009467b48Spatrick     BlockChain &SuccChain = *BlockToChain[Succ];
68109467b48Spatrick     // Disregard edges within a fixed chain, or edges to the loop header.
68209467b48Spatrick     if (&Chain == &SuccChain || Succ == LoopHeaderBB)
68309467b48Spatrick       continue;
68409467b48Spatrick 
68509467b48Spatrick     // This is a cross-chain edge that is within the loop, so decrement the
68609467b48Spatrick     // loop predecessor count of the destination chain.
68709467b48Spatrick     if (SuccChain.UnscheduledPredecessors == 0 ||
68809467b48Spatrick         --SuccChain.UnscheduledPredecessors > 0)
68909467b48Spatrick       continue;
69009467b48Spatrick 
69109467b48Spatrick     auto *NewBB = *SuccChain.begin();
69209467b48Spatrick     if (NewBB->isEHPad())
69309467b48Spatrick       EHPadWorkList.push_back(NewBB);
69409467b48Spatrick     else
69509467b48Spatrick       BlockWorkList.push_back(NewBB);
69609467b48Spatrick   }
69709467b48Spatrick }
69809467b48Spatrick 
69909467b48Spatrick /// This helper function collects the set of successors of block
70009467b48Spatrick /// \p BB that are allowed to be its layout successors, and return
70109467b48Spatrick /// the total branch probability of edges from \p BB to those
70209467b48Spatrick /// blocks.
collectViableSuccessors(const MachineBasicBlock * BB,const BlockChain & Chain,const BlockFilterSet * BlockFilter,SmallVector<MachineBasicBlock *,4> & Successors)70309467b48Spatrick BranchProbability MachineBlockPlacement::collectViableSuccessors(
70409467b48Spatrick     const MachineBasicBlock *BB, const BlockChain &Chain,
70509467b48Spatrick     const BlockFilterSet *BlockFilter,
70609467b48Spatrick     SmallVector<MachineBasicBlock *, 4> &Successors) {
70709467b48Spatrick   // Adjust edge probabilities by excluding edges pointing to blocks that is
70809467b48Spatrick   // either not in BlockFilter or is already in the current chain. Consider the
70909467b48Spatrick   // following CFG:
71009467b48Spatrick   //
71109467b48Spatrick   //     --->A
71209467b48Spatrick   //     |  / \
71309467b48Spatrick   //     | B   C
71409467b48Spatrick   //     |  \ / \
71509467b48Spatrick   //     ----D   E
71609467b48Spatrick   //
71709467b48Spatrick   // Assume A->C is very hot (>90%), and C->D has a 50% probability, then after
71809467b48Spatrick   // A->C is chosen as a fall-through, D won't be selected as a successor of C
71909467b48Spatrick   // due to CFG constraint (the probability of C->D is not greater than
72009467b48Spatrick   // HotProb to break topo-order). If we exclude E that is not in BlockFilter
72109467b48Spatrick   // when calculating the probability of C->D, D will be selected and we
72209467b48Spatrick   // will get A C D B as the layout of this loop.
72309467b48Spatrick   auto AdjustedSumProb = BranchProbability::getOne();
72409467b48Spatrick   for (MachineBasicBlock *Succ : BB->successors()) {
72509467b48Spatrick     bool SkipSucc = false;
72609467b48Spatrick     if (Succ->isEHPad() || (BlockFilter && !BlockFilter->count(Succ))) {
72709467b48Spatrick       SkipSucc = true;
72809467b48Spatrick     } else {
72909467b48Spatrick       BlockChain *SuccChain = BlockToChain[Succ];
73009467b48Spatrick       if (SuccChain == &Chain) {
73109467b48Spatrick         SkipSucc = true;
73209467b48Spatrick       } else if (Succ != *SuccChain->begin()) {
73309467b48Spatrick         LLVM_DEBUG(dbgs() << "    " << getBlockName(Succ)
73409467b48Spatrick                           << " -> Mid chain!\n");
73509467b48Spatrick         continue;
73609467b48Spatrick       }
73709467b48Spatrick     }
73809467b48Spatrick     if (SkipSucc)
73909467b48Spatrick       AdjustedSumProb -= MBPI->getEdgeProbability(BB, Succ);
74009467b48Spatrick     else
74109467b48Spatrick       Successors.push_back(Succ);
74209467b48Spatrick   }
74309467b48Spatrick 
74409467b48Spatrick   return AdjustedSumProb;
74509467b48Spatrick }
74609467b48Spatrick 
74709467b48Spatrick /// The helper function returns the branch probability that is adjusted
74809467b48Spatrick /// or normalized over the new total \p AdjustedSumProb.
74909467b48Spatrick static BranchProbability
getAdjustedProbability(BranchProbability OrigProb,BranchProbability AdjustedSumProb)75009467b48Spatrick getAdjustedProbability(BranchProbability OrigProb,
75109467b48Spatrick                        BranchProbability AdjustedSumProb) {
75209467b48Spatrick   BranchProbability SuccProb;
75309467b48Spatrick   uint32_t SuccProbN = OrigProb.getNumerator();
75409467b48Spatrick   uint32_t SuccProbD = AdjustedSumProb.getNumerator();
75509467b48Spatrick   if (SuccProbN >= SuccProbD)
75609467b48Spatrick     SuccProb = BranchProbability::getOne();
75709467b48Spatrick   else
75809467b48Spatrick     SuccProb = BranchProbability(SuccProbN, SuccProbD);
75909467b48Spatrick 
76009467b48Spatrick   return SuccProb;
76109467b48Spatrick }
76209467b48Spatrick 
76309467b48Spatrick /// Check if \p BB has exactly the successors in \p Successors.
76409467b48Spatrick static bool
hasSameSuccessors(MachineBasicBlock & BB,SmallPtrSetImpl<const MachineBasicBlock * > & Successors)76509467b48Spatrick hasSameSuccessors(MachineBasicBlock &BB,
76609467b48Spatrick                   SmallPtrSetImpl<const MachineBasicBlock *> &Successors) {
76709467b48Spatrick   if (BB.succ_size() != Successors.size())
76809467b48Spatrick     return false;
76909467b48Spatrick   // We don't want to count self-loops
77009467b48Spatrick   if (Successors.count(&BB))
77109467b48Spatrick     return false;
77209467b48Spatrick   for (MachineBasicBlock *Succ : BB.successors())
77309467b48Spatrick     if (!Successors.count(Succ))
77409467b48Spatrick       return false;
77509467b48Spatrick   return true;
77609467b48Spatrick }
77709467b48Spatrick 
77809467b48Spatrick /// Check if a block should be tail duplicated to increase fallthrough
77909467b48Spatrick /// opportunities.
78009467b48Spatrick /// \p BB Block to check.
shouldTailDuplicate(MachineBasicBlock * BB)78109467b48Spatrick bool MachineBlockPlacement::shouldTailDuplicate(MachineBasicBlock *BB) {
78209467b48Spatrick   // Blocks with single successors don't create additional fallthrough
78309467b48Spatrick   // opportunities. Don't duplicate them. TODO: When conditional exits are
78409467b48Spatrick   // analyzable, allow them to be duplicated.
78509467b48Spatrick   bool IsSimple = TailDup.isSimpleBB(BB);
78609467b48Spatrick 
78709467b48Spatrick   if (BB->succ_size() == 1)
78809467b48Spatrick     return false;
78909467b48Spatrick   return TailDup.shouldTailDuplicate(IsSimple, *BB);
79009467b48Spatrick }
79109467b48Spatrick 
79209467b48Spatrick /// Compare 2 BlockFrequency's with a small penalty for \p A.
79309467b48Spatrick /// In order to be conservative, we apply a X% penalty to account for
79409467b48Spatrick /// increased icache pressure and static heuristics. For small frequencies
79509467b48Spatrick /// we use only the numerators to improve accuracy. For simplicity, we assume the
79609467b48Spatrick /// penalty is less than 100%
79709467b48Spatrick /// TODO(iteratee): Use 64-bit fixed point edge frequencies everywhere.
greaterWithBias(BlockFrequency A,BlockFrequency B,uint64_t EntryFreq)79809467b48Spatrick static bool greaterWithBias(BlockFrequency A, BlockFrequency B,
79909467b48Spatrick                             uint64_t EntryFreq) {
80009467b48Spatrick   BranchProbability ThresholdProb(TailDupPlacementPenalty, 100);
80109467b48Spatrick   BlockFrequency Gain = A - B;
80209467b48Spatrick   return (Gain / ThresholdProb).getFrequency() >= EntryFreq;
80309467b48Spatrick }
80409467b48Spatrick 
80509467b48Spatrick /// Check the edge frequencies to see if tail duplication will increase
80609467b48Spatrick /// fallthroughs. It only makes sense to call this function when
80709467b48Spatrick /// \p Succ would not be chosen otherwise. Tail duplication of \p Succ is
80809467b48Spatrick /// always locally profitable if we would have picked \p Succ without
80909467b48Spatrick /// considering duplication.
isProfitableToTailDup(const MachineBasicBlock * BB,const MachineBasicBlock * Succ,BranchProbability QProb,const BlockChain & Chain,const BlockFilterSet * BlockFilter)81009467b48Spatrick bool MachineBlockPlacement::isProfitableToTailDup(
81109467b48Spatrick     const MachineBasicBlock *BB, const MachineBasicBlock *Succ,
81209467b48Spatrick     BranchProbability QProb,
81309467b48Spatrick     const BlockChain &Chain, const BlockFilterSet *BlockFilter) {
81409467b48Spatrick   // We need to do a probability calculation to make sure this is profitable.
81509467b48Spatrick   // First: does succ have a successor that post-dominates? This affects the
81609467b48Spatrick   // calculation. The 2 relevant cases are:
81709467b48Spatrick   //    BB         BB
81809467b48Spatrick   //    | \Qout    | \Qout
81909467b48Spatrick   //   P|  C       |P C
82009467b48Spatrick   //    =   C'     =   C'
82109467b48Spatrick   //    |  /Qin    |  /Qin
82209467b48Spatrick   //    | /        | /
82309467b48Spatrick   //    Succ       Succ
82409467b48Spatrick   //    / \        | \  V
82509467b48Spatrick   //  U/   =V      |U \
82609467b48Spatrick   //  /     \      =   D
82709467b48Spatrick   //  D      E     |  /
82809467b48Spatrick   //               | /
82909467b48Spatrick   //               |/
83009467b48Spatrick   //               PDom
83109467b48Spatrick   //  '=' : Branch taken for that CFG edge
83209467b48Spatrick   // In the second case, Placing Succ while duplicating it into C prevents the
83309467b48Spatrick   // fallthrough of Succ into either D or PDom, because they now have C as an
83409467b48Spatrick   // unplaced predecessor
83509467b48Spatrick 
83609467b48Spatrick   // Start by figuring out which case we fall into
83709467b48Spatrick   MachineBasicBlock *PDom = nullptr;
83809467b48Spatrick   SmallVector<MachineBasicBlock *, 4> SuccSuccs;
83909467b48Spatrick   // Only scan the relevant successors
84009467b48Spatrick   auto AdjustedSuccSumProb =
84109467b48Spatrick       collectViableSuccessors(Succ, Chain, BlockFilter, SuccSuccs);
84209467b48Spatrick   BranchProbability PProb = MBPI->getEdgeProbability(BB, Succ);
84309467b48Spatrick   auto BBFreq = MBFI->getBlockFreq(BB);
84409467b48Spatrick   auto SuccFreq = MBFI->getBlockFreq(Succ);
84509467b48Spatrick   BlockFrequency P = BBFreq * PProb;
84609467b48Spatrick   BlockFrequency Qout = BBFreq * QProb;
84709467b48Spatrick   uint64_t EntryFreq = MBFI->getEntryFreq();
84809467b48Spatrick   // If there are no more successors, it is profitable to copy, as it strictly
84909467b48Spatrick   // increases fallthrough.
85009467b48Spatrick   if (SuccSuccs.size() == 0)
85109467b48Spatrick     return greaterWithBias(P, Qout, EntryFreq);
85209467b48Spatrick 
85309467b48Spatrick   auto BestSuccSucc = BranchProbability::getZero();
85409467b48Spatrick   // Find the PDom or the best Succ if no PDom exists.
85509467b48Spatrick   for (MachineBasicBlock *SuccSucc : SuccSuccs) {
85609467b48Spatrick     auto Prob = MBPI->getEdgeProbability(Succ, SuccSucc);
85709467b48Spatrick     if (Prob > BestSuccSucc)
85809467b48Spatrick       BestSuccSucc = Prob;
85909467b48Spatrick     if (PDom == nullptr)
86009467b48Spatrick       if (MPDT->dominates(SuccSucc, Succ)) {
86109467b48Spatrick         PDom = SuccSucc;
86209467b48Spatrick         break;
86309467b48Spatrick       }
86409467b48Spatrick   }
86509467b48Spatrick   // For the comparisons, we need to know Succ's best incoming edge that isn't
86609467b48Spatrick   // from BB.
86709467b48Spatrick   auto SuccBestPred = BlockFrequency(0);
86809467b48Spatrick   for (MachineBasicBlock *SuccPred : Succ->predecessors()) {
86909467b48Spatrick     if (SuccPred == Succ || SuccPred == BB
87009467b48Spatrick         || BlockToChain[SuccPred] == &Chain
87109467b48Spatrick         || (BlockFilter && !BlockFilter->count(SuccPred)))
87209467b48Spatrick       continue;
87309467b48Spatrick     auto Freq = MBFI->getBlockFreq(SuccPred)
87409467b48Spatrick         * MBPI->getEdgeProbability(SuccPred, Succ);
87509467b48Spatrick     if (Freq > SuccBestPred)
87609467b48Spatrick       SuccBestPred = Freq;
87709467b48Spatrick   }
87809467b48Spatrick   // Qin is Succ's best unplaced incoming edge that isn't BB
87909467b48Spatrick   BlockFrequency Qin = SuccBestPred;
88009467b48Spatrick   // If it doesn't have a post-dominating successor, here is the calculation:
88109467b48Spatrick   //    BB        BB
88209467b48Spatrick   //    | \Qout   |  \
88309467b48Spatrick   //   P|  C      |   =
88409467b48Spatrick   //    =   C'    |    C
88509467b48Spatrick   //    |  /Qin   |     |
88609467b48Spatrick   //    | /       |     C' (+Succ)
88709467b48Spatrick   //    Succ      Succ /|
88809467b48Spatrick   //    / \       |  \/ |
88909467b48Spatrick   //  U/   =V     |  == |
89009467b48Spatrick   //  /     \     | /  \|
89109467b48Spatrick   //  D      E    D     E
89209467b48Spatrick   //  '=' : Branch taken for that CFG edge
89309467b48Spatrick   //  Cost in the first case is: P + V
89409467b48Spatrick   //  For this calculation, we always assume P > Qout. If Qout > P
89509467b48Spatrick   //  The result of this function will be ignored at the caller.
89609467b48Spatrick   //  Let F = SuccFreq - Qin
89709467b48Spatrick   //  Cost in the second case is: Qout + min(Qin, F) * U + max(Qin, F) * V
89809467b48Spatrick 
89909467b48Spatrick   if (PDom == nullptr || !Succ->isSuccessor(PDom)) {
90009467b48Spatrick     BranchProbability UProb = BestSuccSucc;
90109467b48Spatrick     BranchProbability VProb = AdjustedSuccSumProb - UProb;
90209467b48Spatrick     BlockFrequency F = SuccFreq - Qin;
90309467b48Spatrick     BlockFrequency V = SuccFreq * VProb;
90409467b48Spatrick     BlockFrequency QinU = std::min(Qin, F) * UProb;
90509467b48Spatrick     BlockFrequency BaseCost = P + V;
90609467b48Spatrick     BlockFrequency DupCost = Qout + QinU + std::max(Qin, F) * VProb;
90709467b48Spatrick     return greaterWithBias(BaseCost, DupCost, EntryFreq);
90809467b48Spatrick   }
90909467b48Spatrick   BranchProbability UProb = MBPI->getEdgeProbability(Succ, PDom);
91009467b48Spatrick   BranchProbability VProb = AdjustedSuccSumProb - UProb;
91109467b48Spatrick   BlockFrequency U = SuccFreq * UProb;
91209467b48Spatrick   BlockFrequency V = SuccFreq * VProb;
91309467b48Spatrick   BlockFrequency F = SuccFreq - Qin;
91409467b48Spatrick   // If there is a post-dominating successor, here is the calculation:
91509467b48Spatrick   // BB         BB                 BB          BB
91609467b48Spatrick   // | \Qout    |   \               | \Qout     |  \
91709467b48Spatrick   // |P C       |    =              |P C        |   =
91809467b48Spatrick   // =   C'     |P    C             =   C'      |P   C
91909467b48Spatrick   // |  /Qin    |      |            |  /Qin     |     |
92009467b48Spatrick   // | /        |      C' (+Succ)   | /         |     C' (+Succ)
92109467b48Spatrick   // Succ       Succ  /|            Succ        Succ /|
92209467b48Spatrick   // | \  V     |   \/ |            | \  V      |  \/ |
92309467b48Spatrick   // |U \       |U  /\ =?           |U =        |U /\ |
92409467b48Spatrick   // =   D      = =  =?|            |   D       | =  =|
92509467b48Spatrick   // |  /       |/     D            |  /        |/    D
92609467b48Spatrick   // | /        |     /             | =         |    /
92709467b48Spatrick   // |/         |    /              |/          |   =
92809467b48Spatrick   // Dom         Dom                Dom         Dom
92909467b48Spatrick   //  '=' : Branch taken for that CFG edge
93009467b48Spatrick   // The cost for taken branches in the first case is P + U
93109467b48Spatrick   // Let F = SuccFreq - Qin
93209467b48Spatrick   // The cost in the second case (assuming independence), given the layout:
93309467b48Spatrick   // BB, Succ, (C+Succ), D, Dom or the layout:
93409467b48Spatrick   // BB, Succ, D, Dom, (C+Succ)
93509467b48Spatrick   // is Qout + max(F, Qin) * U + min(F, Qin)
93609467b48Spatrick   // compare P + U vs Qout + P * U + Qin.
93709467b48Spatrick   //
93809467b48Spatrick   // The 3rd and 4th cases cover when Dom would be chosen to follow Succ.
93909467b48Spatrick   //
94009467b48Spatrick   // For the 3rd case, the cost is P + 2 * V
94109467b48Spatrick   // For the 4th case, the cost is Qout + min(Qin, F) * U + max(Qin, F) * V + V
94209467b48Spatrick   // We choose 4 over 3 when (P + V) > Qout + min(Qin, F) * U + max(Qin, F) * V
94309467b48Spatrick   if (UProb > AdjustedSuccSumProb / 2 &&
94409467b48Spatrick       !hasBetterLayoutPredecessor(Succ, PDom, *BlockToChain[PDom], UProb, UProb,
94509467b48Spatrick                                   Chain, BlockFilter))
94609467b48Spatrick     // Cases 3 & 4
94709467b48Spatrick     return greaterWithBias(
94809467b48Spatrick         (P + V), (Qout + std::max(Qin, F) * VProb + std::min(Qin, F) * UProb),
94909467b48Spatrick         EntryFreq);
95009467b48Spatrick   // Cases 1 & 2
95109467b48Spatrick   return greaterWithBias((P + U),
95209467b48Spatrick                          (Qout + std::min(Qin, F) * AdjustedSuccSumProb +
95309467b48Spatrick                           std::max(Qin, F) * UProb),
95409467b48Spatrick                          EntryFreq);
95509467b48Spatrick }
95609467b48Spatrick 
95709467b48Spatrick /// Check for a trellis layout. \p BB is the upper part of a trellis if its
95809467b48Spatrick /// successors form the lower part of a trellis. A successor set S forms the
95909467b48Spatrick /// lower part of a trellis if all of the predecessors of S are either in S or
96009467b48Spatrick /// have all of S as successors. We ignore trellises where BB doesn't have 2
96109467b48Spatrick /// successors because for fewer than 2, it's trivial, and for 3 or greater they
96209467b48Spatrick /// are very uncommon and complex to compute optimally. Allowing edges within S
96309467b48Spatrick /// is not strictly a trellis, but the same algorithm works, so we allow it.
isTrellis(const MachineBasicBlock * BB,const SmallVectorImpl<MachineBasicBlock * > & ViableSuccs,const BlockChain & Chain,const BlockFilterSet * BlockFilter)96409467b48Spatrick bool MachineBlockPlacement::isTrellis(
96509467b48Spatrick     const MachineBasicBlock *BB,
96609467b48Spatrick     const SmallVectorImpl<MachineBasicBlock *> &ViableSuccs,
96709467b48Spatrick     const BlockChain &Chain, const BlockFilterSet *BlockFilter) {
96809467b48Spatrick   // Technically BB could form a trellis with branching factor higher than 2.
96909467b48Spatrick   // But that's extremely uncommon.
97009467b48Spatrick   if (BB->succ_size() != 2 || ViableSuccs.size() != 2)
97109467b48Spatrick     return false;
97209467b48Spatrick 
97309467b48Spatrick   SmallPtrSet<const MachineBasicBlock *, 2> Successors(BB->succ_begin(),
97409467b48Spatrick                                                        BB->succ_end());
97509467b48Spatrick   // To avoid reviewing the same predecessors twice.
97609467b48Spatrick   SmallPtrSet<const MachineBasicBlock *, 8> SeenPreds;
97709467b48Spatrick 
97809467b48Spatrick   for (MachineBasicBlock *Succ : ViableSuccs) {
97909467b48Spatrick     int PredCount = 0;
980*d415bd75Srobert     for (auto *SuccPred : Succ->predecessors()) {
98109467b48Spatrick       // Allow triangle successors, but don't count them.
98209467b48Spatrick       if (Successors.count(SuccPred)) {
98309467b48Spatrick         // Make sure that it is actually a triangle.
98409467b48Spatrick         for (MachineBasicBlock *CheckSucc : SuccPred->successors())
98509467b48Spatrick           if (!Successors.count(CheckSucc))
98609467b48Spatrick             return false;
98709467b48Spatrick         continue;
98809467b48Spatrick       }
98909467b48Spatrick       const BlockChain *PredChain = BlockToChain[SuccPred];
99009467b48Spatrick       if (SuccPred == BB || (BlockFilter && !BlockFilter->count(SuccPred)) ||
99109467b48Spatrick           PredChain == &Chain || PredChain == BlockToChain[Succ])
99209467b48Spatrick         continue;
99309467b48Spatrick       ++PredCount;
99409467b48Spatrick       // Perform the successor check only once.
99509467b48Spatrick       if (!SeenPreds.insert(SuccPred).second)
99609467b48Spatrick         continue;
99709467b48Spatrick       if (!hasSameSuccessors(*SuccPred, Successors))
99809467b48Spatrick         return false;
99909467b48Spatrick     }
100009467b48Spatrick     // If one of the successors has only BB as a predecessor, it is not a
100109467b48Spatrick     // trellis.
100209467b48Spatrick     if (PredCount < 1)
100309467b48Spatrick       return false;
100409467b48Spatrick   }
100509467b48Spatrick   return true;
100609467b48Spatrick }
100709467b48Spatrick 
100809467b48Spatrick /// Pick the highest total weight pair of edges that can both be laid out.
100909467b48Spatrick /// The edges in \p Edges[0] are assumed to have a different destination than
101009467b48Spatrick /// the edges in \p Edges[1]. Simple counting shows that the best pair is either
101109467b48Spatrick /// the individual highest weight edges to the 2 different destinations, or in
101209467b48Spatrick /// case of a conflict, one of them should be replaced with a 2nd best edge.
101309467b48Spatrick std::pair<MachineBlockPlacement::WeightedEdge,
101409467b48Spatrick           MachineBlockPlacement::WeightedEdge>
getBestNonConflictingEdges(const MachineBasicBlock * BB,MutableArrayRef<SmallVector<MachineBlockPlacement::WeightedEdge,8>> Edges)101509467b48Spatrick MachineBlockPlacement::getBestNonConflictingEdges(
101609467b48Spatrick     const MachineBasicBlock *BB,
101709467b48Spatrick     MutableArrayRef<SmallVector<MachineBlockPlacement::WeightedEdge, 8>>
101809467b48Spatrick         Edges) {
101909467b48Spatrick   // Sort the edges, and then for each successor, find the best incoming
102009467b48Spatrick   // predecessor. If the best incoming predecessors aren't the same,
102109467b48Spatrick   // then that is clearly the best layout. If there is a conflict, one of the
102209467b48Spatrick   // successors will have to fallthrough from the second best predecessor. We
102309467b48Spatrick   // compare which combination is better overall.
102409467b48Spatrick 
102509467b48Spatrick   // Sort for highest frequency.
102609467b48Spatrick   auto Cmp = [](WeightedEdge A, WeightedEdge B) { return A.Weight > B.Weight; };
102709467b48Spatrick 
102809467b48Spatrick   llvm::stable_sort(Edges[0], Cmp);
102909467b48Spatrick   llvm::stable_sort(Edges[1], Cmp);
103009467b48Spatrick   auto BestA = Edges[0].begin();
103109467b48Spatrick   auto BestB = Edges[1].begin();
103209467b48Spatrick   // Arrange for the correct answer to be in BestA and BestB
103309467b48Spatrick   // If the 2 best edges don't conflict, the answer is already there.
103409467b48Spatrick   if (BestA->Src == BestB->Src) {
103509467b48Spatrick     // Compare the total fallthrough of (Best + Second Best) for both pairs
103609467b48Spatrick     auto SecondBestA = std::next(BestA);
103709467b48Spatrick     auto SecondBestB = std::next(BestB);
103809467b48Spatrick     BlockFrequency BestAScore = BestA->Weight + SecondBestB->Weight;
103909467b48Spatrick     BlockFrequency BestBScore = BestB->Weight + SecondBestA->Weight;
104009467b48Spatrick     if (BestAScore < BestBScore)
104109467b48Spatrick       BestA = SecondBestA;
104209467b48Spatrick     else
104309467b48Spatrick       BestB = SecondBestB;
104409467b48Spatrick   }
104509467b48Spatrick   // Arrange for the BB edge to be in BestA if it exists.
104609467b48Spatrick   if (BestB->Src == BB)
104709467b48Spatrick     std::swap(BestA, BestB);
104809467b48Spatrick   return std::make_pair(*BestA, *BestB);
104909467b48Spatrick }
105009467b48Spatrick 
105109467b48Spatrick /// Get the best successor from \p BB based on \p BB being part of a trellis.
105209467b48Spatrick /// We only handle trellises with 2 successors, so the algorithm is
105309467b48Spatrick /// straightforward: Find the best pair of edges that don't conflict. We find
105409467b48Spatrick /// the best incoming edge for each successor in the trellis. If those conflict,
105509467b48Spatrick /// we consider which of them should be replaced with the second best.
105609467b48Spatrick /// Upon return the two best edges will be in \p BestEdges. If one of the edges
105709467b48Spatrick /// comes from \p BB, it will be in \p BestEdges[0]
105809467b48Spatrick MachineBlockPlacement::BlockAndTailDupResult
getBestTrellisSuccessor(const MachineBasicBlock * BB,const SmallVectorImpl<MachineBasicBlock * > & ViableSuccs,BranchProbability AdjustedSumProb,const BlockChain & Chain,const BlockFilterSet * BlockFilter)105909467b48Spatrick MachineBlockPlacement::getBestTrellisSuccessor(
106009467b48Spatrick     const MachineBasicBlock *BB,
106109467b48Spatrick     const SmallVectorImpl<MachineBasicBlock *> &ViableSuccs,
106209467b48Spatrick     BranchProbability AdjustedSumProb, const BlockChain &Chain,
106309467b48Spatrick     const BlockFilterSet *BlockFilter) {
106409467b48Spatrick 
106509467b48Spatrick   BlockAndTailDupResult Result = {nullptr, false};
106609467b48Spatrick   SmallPtrSet<const MachineBasicBlock *, 4> Successors(BB->succ_begin(),
106709467b48Spatrick                                                        BB->succ_end());
106809467b48Spatrick 
106909467b48Spatrick   // We assume size 2 because it's common. For general n, we would have to do
107009467b48Spatrick   // the Hungarian algorithm, but it's not worth the complexity because more
107109467b48Spatrick   // than 2 successors is fairly uncommon, and a trellis even more so.
107209467b48Spatrick   if (Successors.size() != 2 || ViableSuccs.size() != 2)
107309467b48Spatrick     return Result;
107409467b48Spatrick 
107509467b48Spatrick   // Collect the edge frequencies of all edges that form the trellis.
107609467b48Spatrick   SmallVector<WeightedEdge, 8> Edges[2];
107709467b48Spatrick   int SuccIndex = 0;
1078*d415bd75Srobert   for (auto *Succ : ViableSuccs) {
107909467b48Spatrick     for (MachineBasicBlock *SuccPred : Succ->predecessors()) {
108009467b48Spatrick       // Skip any placed predecessors that are not BB
108109467b48Spatrick       if (SuccPred != BB)
108209467b48Spatrick         if ((BlockFilter && !BlockFilter->count(SuccPred)) ||
108309467b48Spatrick             BlockToChain[SuccPred] == &Chain ||
108409467b48Spatrick             BlockToChain[SuccPred] == BlockToChain[Succ])
108509467b48Spatrick           continue;
108609467b48Spatrick       BlockFrequency EdgeFreq = MBFI->getBlockFreq(SuccPred) *
108709467b48Spatrick                                 MBPI->getEdgeProbability(SuccPred, Succ);
108809467b48Spatrick       Edges[SuccIndex].push_back({EdgeFreq, SuccPred, Succ});
108909467b48Spatrick     }
109009467b48Spatrick     ++SuccIndex;
109109467b48Spatrick   }
109209467b48Spatrick 
109309467b48Spatrick   // Pick the best combination of 2 edges from all the edges in the trellis.
109409467b48Spatrick   WeightedEdge BestA, BestB;
109509467b48Spatrick   std::tie(BestA, BestB) = getBestNonConflictingEdges(BB, Edges);
109609467b48Spatrick 
109709467b48Spatrick   if (BestA.Src != BB) {
109809467b48Spatrick     // If we have a trellis, and BB doesn't have the best fallthrough edges,
109909467b48Spatrick     // we shouldn't choose any successor. We've already looked and there's a
110009467b48Spatrick     // better fallthrough edge for all the successors.
110109467b48Spatrick     LLVM_DEBUG(dbgs() << "Trellis, but not one of the chosen edges.\n");
110209467b48Spatrick     return Result;
110309467b48Spatrick   }
110409467b48Spatrick 
110509467b48Spatrick   // Did we pick the triangle edge? If tail-duplication is profitable, do
110609467b48Spatrick   // that instead. Otherwise merge the triangle edge now while we know it is
110709467b48Spatrick   // optimal.
110809467b48Spatrick   if (BestA.Dest == BestB.Src) {
110909467b48Spatrick     // The edges are BB->Succ1->Succ2, and we're looking to see if BB->Succ2
111009467b48Spatrick     // would be better.
111109467b48Spatrick     MachineBasicBlock *Succ1 = BestA.Dest;
111209467b48Spatrick     MachineBasicBlock *Succ2 = BestB.Dest;
111309467b48Spatrick     // Check to see if tail-duplication would be profitable.
111409467b48Spatrick     if (allowTailDupPlacement() && shouldTailDuplicate(Succ2) &&
111509467b48Spatrick         canTailDuplicateUnplacedPreds(BB, Succ2, Chain, BlockFilter) &&
111609467b48Spatrick         isProfitableToTailDup(BB, Succ2, MBPI->getEdgeProbability(BB, Succ1),
111709467b48Spatrick                               Chain, BlockFilter)) {
111809467b48Spatrick       LLVM_DEBUG(BranchProbability Succ2Prob = getAdjustedProbability(
111909467b48Spatrick                      MBPI->getEdgeProbability(BB, Succ2), AdjustedSumProb);
112009467b48Spatrick                  dbgs() << "    Selected: " << getBlockName(Succ2)
112109467b48Spatrick                         << ", probability: " << Succ2Prob
112209467b48Spatrick                         << " (Tail Duplicate)\n");
112309467b48Spatrick       Result.BB = Succ2;
112409467b48Spatrick       Result.ShouldTailDup = true;
112509467b48Spatrick       return Result;
112609467b48Spatrick     }
112709467b48Spatrick   }
112809467b48Spatrick   // We have already computed the optimal edge for the other side of the
112909467b48Spatrick   // trellis.
113009467b48Spatrick   ComputedEdges[BestB.Src] = { BestB.Dest, false };
113109467b48Spatrick 
113209467b48Spatrick   auto TrellisSucc = BestA.Dest;
113309467b48Spatrick   LLVM_DEBUG(BranchProbability SuccProb = getAdjustedProbability(
113409467b48Spatrick                  MBPI->getEdgeProbability(BB, TrellisSucc), AdjustedSumProb);
113509467b48Spatrick              dbgs() << "    Selected: " << getBlockName(TrellisSucc)
113609467b48Spatrick                     << ", probability: " << SuccProb << " (Trellis)\n");
113709467b48Spatrick   Result.BB = TrellisSucc;
113809467b48Spatrick   return Result;
113909467b48Spatrick }
114009467b48Spatrick 
114109467b48Spatrick /// When the option allowTailDupPlacement() is on, this method checks if the
114209467b48Spatrick /// fallthrough candidate block \p Succ (of block \p BB) can be tail-duplicated
114309467b48Spatrick /// into all of its unplaced, unfiltered predecessors, that are not BB.
canTailDuplicateUnplacedPreds(const MachineBasicBlock * BB,MachineBasicBlock * Succ,const BlockChain & Chain,const BlockFilterSet * BlockFilter)114409467b48Spatrick bool MachineBlockPlacement::canTailDuplicateUnplacedPreds(
114509467b48Spatrick     const MachineBasicBlock *BB, MachineBasicBlock *Succ,
114609467b48Spatrick     const BlockChain &Chain, const BlockFilterSet *BlockFilter) {
114709467b48Spatrick   if (!shouldTailDuplicate(Succ))
114809467b48Spatrick     return false;
114909467b48Spatrick 
115009467b48Spatrick   // The result of canTailDuplicate.
115109467b48Spatrick   bool Duplicate = true;
115209467b48Spatrick   // Number of possible duplication.
115309467b48Spatrick   unsigned int NumDup = 0;
115409467b48Spatrick 
115509467b48Spatrick   // For CFG checking.
115609467b48Spatrick   SmallPtrSet<const MachineBasicBlock *, 4> Successors(BB->succ_begin(),
115709467b48Spatrick                                                        BB->succ_end());
115809467b48Spatrick   for (MachineBasicBlock *Pred : Succ->predecessors()) {
115909467b48Spatrick     // Make sure all unplaced and unfiltered predecessors can be
116009467b48Spatrick     // tail-duplicated into.
116109467b48Spatrick     // Skip any blocks that are already placed or not in this loop.
116209467b48Spatrick     if (Pred == BB || (BlockFilter && !BlockFilter->count(Pred))
116309467b48Spatrick         || BlockToChain[Pred] == &Chain)
116409467b48Spatrick       continue;
116509467b48Spatrick     if (!TailDup.canTailDuplicate(Succ, Pred)) {
116609467b48Spatrick       if (Successors.size() > 1 && hasSameSuccessors(*Pred, Successors))
116709467b48Spatrick         // This will result in a trellis after tail duplication, so we don't
116809467b48Spatrick         // need to copy Succ into this predecessor. In the presence
116909467b48Spatrick         // of a trellis tail duplication can continue to be profitable.
117009467b48Spatrick         // For example:
117109467b48Spatrick         // A            A
117209467b48Spatrick         // |\           |\
117309467b48Spatrick         // | \          | \
117409467b48Spatrick         // |  C         |  C+BB
117509467b48Spatrick         // | /          |  |
117609467b48Spatrick         // |/           |  |
117709467b48Spatrick         // BB    =>     BB |
117809467b48Spatrick         // |\           |\/|
117909467b48Spatrick         // | \          |/\|
118009467b48Spatrick         // |  D         |  D
118109467b48Spatrick         // | /          | /
118209467b48Spatrick         // |/           |/
118309467b48Spatrick         // Succ         Succ
118409467b48Spatrick         //
118509467b48Spatrick         // After BB was duplicated into C, the layout looks like the one on the
118609467b48Spatrick         // right. BB and C now have the same successors. When considering
118709467b48Spatrick         // whether Succ can be duplicated into all its unplaced predecessors, we
118809467b48Spatrick         // ignore C.
118909467b48Spatrick         // We can do this because C already has a profitable fallthrough, namely
119009467b48Spatrick         // D. TODO(iteratee): ignore sufficiently cold predecessors for
119109467b48Spatrick         // duplication and for this test.
119209467b48Spatrick         //
119309467b48Spatrick         // This allows trellises to be laid out in 2 separate chains
119409467b48Spatrick         // (A,B,Succ,...) and later (C,D,...) This is a reasonable heuristic
119509467b48Spatrick         // because it allows the creation of 2 fallthrough paths with links
119609467b48Spatrick         // between them, and we correctly identify the best layout for these
119709467b48Spatrick         // CFGs. We want to extend trellises that the user created in addition
119809467b48Spatrick         // to trellises created by tail-duplication, so we just look for the
119909467b48Spatrick         // CFG.
120009467b48Spatrick         continue;
120109467b48Spatrick       Duplicate = false;
120209467b48Spatrick       continue;
120309467b48Spatrick     }
120409467b48Spatrick     NumDup++;
120509467b48Spatrick   }
120609467b48Spatrick 
120709467b48Spatrick   // No possible duplication in current filter set.
120809467b48Spatrick   if (NumDup == 0)
120909467b48Spatrick     return false;
121009467b48Spatrick 
1211097a140dSpatrick   // If profile information is available, findDuplicateCandidates can do more
1212097a140dSpatrick   // precise benefit analysis.
1213097a140dSpatrick   if (F->getFunction().hasProfileData())
1214097a140dSpatrick     return true;
1215097a140dSpatrick 
121609467b48Spatrick   // This is mainly for function exit BB.
121709467b48Spatrick   // The integrated tail duplication is really designed for increasing
121809467b48Spatrick   // fallthrough from predecessors from Succ to its successors. We may need
121909467b48Spatrick   // other machanism to handle different cases.
1220*d415bd75Srobert   if (Succ->succ_empty())
122109467b48Spatrick     return true;
122209467b48Spatrick 
122309467b48Spatrick   // Plus the already placed predecessor.
122409467b48Spatrick   NumDup++;
122509467b48Spatrick 
122609467b48Spatrick   // If the duplication candidate has more unplaced predecessors than
122709467b48Spatrick   // successors, the extra duplication can't bring more fallthrough.
122809467b48Spatrick   //
122909467b48Spatrick   //     Pred1 Pred2 Pred3
123009467b48Spatrick   //         \   |   /
123109467b48Spatrick   //          \  |  /
123209467b48Spatrick   //           \ | /
123309467b48Spatrick   //            Dup
123409467b48Spatrick   //            / \
123509467b48Spatrick   //           /   \
123609467b48Spatrick   //       Succ1  Succ2
123709467b48Spatrick   //
123809467b48Spatrick   // In this example Dup has 2 successors and 3 predecessors, duplication of Dup
123909467b48Spatrick   // can increase the fallthrough from Pred1 to Succ1 and from Pred2 to Succ2,
124009467b48Spatrick   // but the duplication into Pred3 can't increase fallthrough.
124109467b48Spatrick   //
124209467b48Spatrick   // A small number of extra duplication may not hurt too much. We need a better
124309467b48Spatrick   // heuristic to handle it.
124409467b48Spatrick   if ((NumDup > Succ->succ_size()) || !Duplicate)
124509467b48Spatrick     return false;
124609467b48Spatrick 
124709467b48Spatrick   return true;
124809467b48Spatrick }
124909467b48Spatrick 
125009467b48Spatrick /// Find chains of triangles where we believe it would be profitable to
125109467b48Spatrick /// tail-duplicate them all, but a local analysis would not find them.
125209467b48Spatrick /// There are 3 ways this can be profitable:
125309467b48Spatrick /// 1) The post-dominators marked 50% are actually taken 55% (This shrinks with
125409467b48Spatrick ///    longer chains)
125509467b48Spatrick /// 2) The chains are statically correlated. Branch probabilities have a very
125609467b48Spatrick ///    U-shaped distribution.
125709467b48Spatrick ///    [http://nrs.harvard.edu/urn-3:HUL.InstRepos:24015805]
125809467b48Spatrick ///    If the branches in a chain are likely to be from the same side of the
125909467b48Spatrick ///    distribution as their predecessor, but are independent at runtime, this
126009467b48Spatrick ///    transformation is profitable. (Because the cost of being wrong is a small
126109467b48Spatrick ///    fixed cost, unlike the standard triangle layout where the cost of being
126209467b48Spatrick ///    wrong scales with the # of triangles.)
126309467b48Spatrick /// 3) The chains are dynamically correlated. If the probability that a previous
126409467b48Spatrick ///    branch was taken positively influences whether the next branch will be
126509467b48Spatrick ///    taken
126609467b48Spatrick /// We believe that 2 and 3 are common enough to justify the small margin in 1.
precomputeTriangleChains()126709467b48Spatrick void MachineBlockPlacement::precomputeTriangleChains() {
126809467b48Spatrick   struct TriangleChain {
126909467b48Spatrick     std::vector<MachineBasicBlock *> Edges;
127009467b48Spatrick 
127109467b48Spatrick     TriangleChain(MachineBasicBlock *src, MachineBasicBlock *dst)
127209467b48Spatrick         : Edges({src, dst}) {}
127309467b48Spatrick 
127409467b48Spatrick     void append(MachineBasicBlock *dst) {
127509467b48Spatrick       assert(getKey()->isSuccessor(dst) &&
127609467b48Spatrick              "Attempting to append a block that is not a successor.");
127709467b48Spatrick       Edges.push_back(dst);
127809467b48Spatrick     }
127909467b48Spatrick 
128009467b48Spatrick     unsigned count() const { return Edges.size() - 1; }
128109467b48Spatrick 
128209467b48Spatrick     MachineBasicBlock *getKey() const {
128309467b48Spatrick       return Edges.back();
128409467b48Spatrick     }
128509467b48Spatrick   };
128609467b48Spatrick 
128709467b48Spatrick   if (TriangleChainCount == 0)
128809467b48Spatrick     return;
128909467b48Spatrick 
129009467b48Spatrick   LLVM_DEBUG(dbgs() << "Pre-computing triangle chains.\n");
129109467b48Spatrick   // Map from last block to the chain that contains it. This allows us to extend
129209467b48Spatrick   // chains as we find new triangles.
129309467b48Spatrick   DenseMap<const MachineBasicBlock *, TriangleChain> TriangleChainMap;
129409467b48Spatrick   for (MachineBasicBlock &BB : *F) {
129509467b48Spatrick     // If BB doesn't have 2 successors, it doesn't start a triangle.
129609467b48Spatrick     if (BB.succ_size() != 2)
129709467b48Spatrick       continue;
129809467b48Spatrick     MachineBasicBlock *PDom = nullptr;
129909467b48Spatrick     for (MachineBasicBlock *Succ : BB.successors()) {
130009467b48Spatrick       if (!MPDT->dominates(Succ, &BB))
130109467b48Spatrick         continue;
130209467b48Spatrick       PDom = Succ;
130309467b48Spatrick       break;
130409467b48Spatrick     }
130509467b48Spatrick     // If BB doesn't have a post-dominating successor, it doesn't form a
130609467b48Spatrick     // triangle.
130709467b48Spatrick     if (PDom == nullptr)
130809467b48Spatrick       continue;
130909467b48Spatrick     // If PDom has a hint that it is low probability, skip this triangle.
131009467b48Spatrick     if (MBPI->getEdgeProbability(&BB, PDom) < BranchProbability(50, 100))
131109467b48Spatrick       continue;
131209467b48Spatrick     // If PDom isn't eligible for duplication, this isn't the kind of triangle
131309467b48Spatrick     // we're looking for.
131409467b48Spatrick     if (!shouldTailDuplicate(PDom))
131509467b48Spatrick       continue;
131609467b48Spatrick     bool CanTailDuplicate = true;
131709467b48Spatrick     // If PDom can't tail-duplicate into it's non-BB predecessors, then this
131809467b48Spatrick     // isn't the kind of triangle we're looking for.
131909467b48Spatrick     for (MachineBasicBlock* Pred : PDom->predecessors()) {
132009467b48Spatrick       if (Pred == &BB)
132109467b48Spatrick         continue;
132209467b48Spatrick       if (!TailDup.canTailDuplicate(PDom, Pred)) {
132309467b48Spatrick         CanTailDuplicate = false;
132409467b48Spatrick         break;
132509467b48Spatrick       }
132609467b48Spatrick     }
132709467b48Spatrick     // If we can't tail-duplicate PDom to its predecessors, then skip this
132809467b48Spatrick     // triangle.
132909467b48Spatrick     if (!CanTailDuplicate)
133009467b48Spatrick       continue;
133109467b48Spatrick 
133209467b48Spatrick     // Now we have an interesting triangle. Insert it if it's not part of an
133309467b48Spatrick     // existing chain.
133409467b48Spatrick     // Note: This cannot be replaced with a call insert() or emplace() because
133509467b48Spatrick     // the find key is BB, but the insert/emplace key is PDom.
133609467b48Spatrick     auto Found = TriangleChainMap.find(&BB);
133709467b48Spatrick     // If it is, remove the chain from the map, grow it, and put it back in the
133809467b48Spatrick     // map with the end as the new key.
133909467b48Spatrick     if (Found != TriangleChainMap.end()) {
134009467b48Spatrick       TriangleChain Chain = std::move(Found->second);
134109467b48Spatrick       TriangleChainMap.erase(Found);
134209467b48Spatrick       Chain.append(PDom);
134309467b48Spatrick       TriangleChainMap.insert(std::make_pair(Chain.getKey(), std::move(Chain)));
134409467b48Spatrick     } else {
134509467b48Spatrick       auto InsertResult = TriangleChainMap.try_emplace(PDom, &BB, PDom);
134609467b48Spatrick       assert(InsertResult.second && "Block seen twice.");
134709467b48Spatrick       (void)InsertResult;
134809467b48Spatrick     }
134909467b48Spatrick   }
135009467b48Spatrick 
135109467b48Spatrick   // Iterating over a DenseMap is safe here, because the only thing in the body
135209467b48Spatrick   // of the loop is inserting into another DenseMap (ComputedEdges).
135309467b48Spatrick   // ComputedEdges is never iterated, so this doesn't lead to non-determinism.
135409467b48Spatrick   for (auto &ChainPair : TriangleChainMap) {
135509467b48Spatrick     TriangleChain &Chain = ChainPair.second;
135609467b48Spatrick     // Benchmarking has shown that due to branch correlation duplicating 2 or
135709467b48Spatrick     // more triangles is profitable, despite the calculations assuming
135809467b48Spatrick     // independence.
135909467b48Spatrick     if (Chain.count() < TriangleChainCount)
136009467b48Spatrick       continue;
136109467b48Spatrick     MachineBasicBlock *dst = Chain.Edges.back();
136209467b48Spatrick     Chain.Edges.pop_back();
136309467b48Spatrick     for (MachineBasicBlock *src : reverse(Chain.Edges)) {
136409467b48Spatrick       LLVM_DEBUG(dbgs() << "Marking edge: " << getBlockName(src) << "->"
136509467b48Spatrick                         << getBlockName(dst)
136609467b48Spatrick                         << " as pre-computed based on triangles.\n");
136709467b48Spatrick 
136809467b48Spatrick       auto InsertResult = ComputedEdges.insert({src, {dst, true}});
136909467b48Spatrick       assert(InsertResult.second && "Block seen twice.");
137009467b48Spatrick       (void)InsertResult;
137109467b48Spatrick 
137209467b48Spatrick       dst = src;
137309467b48Spatrick     }
137409467b48Spatrick   }
137509467b48Spatrick }
137609467b48Spatrick 
137709467b48Spatrick // When profile is not present, return the StaticLikelyProb.
137809467b48Spatrick // When profile is available, we need to handle the triangle-shape CFG.
getLayoutSuccessorProbThreshold(const MachineBasicBlock * BB)137909467b48Spatrick static BranchProbability getLayoutSuccessorProbThreshold(
138009467b48Spatrick       const MachineBasicBlock *BB) {
138109467b48Spatrick   if (!BB->getParent()->getFunction().hasProfileData())
138209467b48Spatrick     return BranchProbability(StaticLikelyProb, 100);
138309467b48Spatrick   if (BB->succ_size() == 2) {
138409467b48Spatrick     const MachineBasicBlock *Succ1 = *BB->succ_begin();
138509467b48Spatrick     const MachineBasicBlock *Succ2 = *(BB->succ_begin() + 1);
138609467b48Spatrick     if (Succ1->isSuccessor(Succ2) || Succ2->isSuccessor(Succ1)) {
138709467b48Spatrick       /* See case 1 below for the cost analysis. For BB->Succ to
138809467b48Spatrick        * be taken with smaller cost, the following needs to hold:
138909467b48Spatrick        *   Prob(BB->Succ) > 2 * Prob(BB->Pred)
139009467b48Spatrick        *   So the threshold T in the calculation below
139109467b48Spatrick        *   (1-T) * Prob(BB->Succ) > T * Prob(BB->Pred)
139209467b48Spatrick        *   So T / (1 - T) = 2, Yielding T = 2/3
139309467b48Spatrick        * Also adding user specified branch bias, we have
139409467b48Spatrick        *   T = (2/3)*(ProfileLikelyProb/50)
139509467b48Spatrick        *     = (2*ProfileLikelyProb)/150)
139609467b48Spatrick        */
139709467b48Spatrick       return BranchProbability(2 * ProfileLikelyProb, 150);
139809467b48Spatrick     }
139909467b48Spatrick   }
140009467b48Spatrick   return BranchProbability(ProfileLikelyProb, 100);
140109467b48Spatrick }
140209467b48Spatrick 
140309467b48Spatrick /// Checks to see if the layout candidate block \p Succ has a better layout
140409467b48Spatrick /// predecessor than \c BB. If yes, returns true.
140509467b48Spatrick /// \p SuccProb: The probability adjusted for only remaining blocks.
140609467b48Spatrick ///   Only used for logging
140709467b48Spatrick /// \p RealSuccProb: The un-adjusted probability.
140809467b48Spatrick /// \p Chain: The chain that BB belongs to and Succ is being considered for.
140909467b48Spatrick /// \p BlockFilter: if non-null, the set of blocks that make up the loop being
141009467b48Spatrick ///    considered
hasBetterLayoutPredecessor(const MachineBasicBlock * BB,const MachineBasicBlock * Succ,const BlockChain & SuccChain,BranchProbability SuccProb,BranchProbability RealSuccProb,const BlockChain & Chain,const BlockFilterSet * BlockFilter)141109467b48Spatrick bool MachineBlockPlacement::hasBetterLayoutPredecessor(
141209467b48Spatrick     const MachineBasicBlock *BB, const MachineBasicBlock *Succ,
141309467b48Spatrick     const BlockChain &SuccChain, BranchProbability SuccProb,
141409467b48Spatrick     BranchProbability RealSuccProb, const BlockChain &Chain,
141509467b48Spatrick     const BlockFilterSet *BlockFilter) {
141609467b48Spatrick 
141709467b48Spatrick   // There isn't a better layout when there are no unscheduled predecessors.
141809467b48Spatrick   if (SuccChain.UnscheduledPredecessors == 0)
141909467b48Spatrick     return false;
142009467b48Spatrick 
142109467b48Spatrick   // There are two basic scenarios here:
142209467b48Spatrick   // -------------------------------------
142309467b48Spatrick   // Case 1: triangular shape CFG (if-then):
142409467b48Spatrick   //     BB
142509467b48Spatrick   //     | \
142609467b48Spatrick   //     |  \
142709467b48Spatrick   //     |   Pred
142809467b48Spatrick   //     |   /
142909467b48Spatrick   //     Succ
143009467b48Spatrick   // In this case, we are evaluating whether to select edge -> Succ, e.g.
143109467b48Spatrick   // set Succ as the layout successor of BB. Picking Succ as BB's
143209467b48Spatrick   // successor breaks the CFG constraints (FIXME: define these constraints).
143309467b48Spatrick   // With this layout, Pred BB
143409467b48Spatrick   // is forced to be outlined, so the overall cost will be cost of the
143509467b48Spatrick   // branch taken from BB to Pred, plus the cost of back taken branch
143609467b48Spatrick   // from Pred to Succ, as well as the additional cost associated
143709467b48Spatrick   // with the needed unconditional jump instruction from Pred To Succ.
143809467b48Spatrick 
143909467b48Spatrick   // The cost of the topological order layout is the taken branch cost
144009467b48Spatrick   // from BB to Succ, so to make BB->Succ a viable candidate, the following
144109467b48Spatrick   // must hold:
144209467b48Spatrick   //     2 * freq(BB->Pred) * taken_branch_cost + unconditional_jump_cost
144309467b48Spatrick   //      < freq(BB->Succ) *  taken_branch_cost.
144409467b48Spatrick   // Ignoring unconditional jump cost, we get
144509467b48Spatrick   //    freq(BB->Succ) > 2 * freq(BB->Pred), i.e.,
144609467b48Spatrick   //    prob(BB->Succ) > 2 * prob(BB->Pred)
144709467b48Spatrick   //
144809467b48Spatrick   // When real profile data is available, we can precisely compute the
144909467b48Spatrick   // probability threshold that is needed for edge BB->Succ to be considered.
145009467b48Spatrick   // Without profile data, the heuristic requires the branch bias to be
145109467b48Spatrick   // a lot larger to make sure the signal is very strong (e.g. 80% default).
145209467b48Spatrick   // -----------------------------------------------------------------
145309467b48Spatrick   // Case 2: diamond like CFG (if-then-else):
145409467b48Spatrick   //     S
145509467b48Spatrick   //    / \
145609467b48Spatrick   //   |   \
145709467b48Spatrick   //  BB    Pred
145809467b48Spatrick   //   \    /
145909467b48Spatrick   //    Succ
146009467b48Spatrick   //    ..
146109467b48Spatrick   //
146209467b48Spatrick   // The current block is BB and edge BB->Succ is now being evaluated.
146309467b48Spatrick   // Note that edge S->BB was previously already selected because
146409467b48Spatrick   // prob(S->BB) > prob(S->Pred).
146509467b48Spatrick   // At this point, 2 blocks can be placed after BB: Pred or Succ. If we
146609467b48Spatrick   // choose Pred, we will have a topological ordering as shown on the left
146709467b48Spatrick   // in the picture below. If we choose Succ, we have the solution as shown
146809467b48Spatrick   // on the right:
146909467b48Spatrick   //
147009467b48Spatrick   //   topo-order:
147109467b48Spatrick   //
147209467b48Spatrick   //       S-----                             ---S
147309467b48Spatrick   //       |    |                             |  |
147409467b48Spatrick   //    ---BB   |                             |  BB
147509467b48Spatrick   //    |       |                             |  |
147609467b48Spatrick   //    |  Pred--                             |  Succ--
147709467b48Spatrick   //    |  |                                  |       |
147809467b48Spatrick   //    ---Succ                               ---Pred--
147909467b48Spatrick   //
148009467b48Spatrick   // cost = freq(S->Pred) + freq(BB->Succ)    cost = 2 * freq (S->Pred)
148109467b48Spatrick   //      = freq(S->Pred) + freq(S->BB)
148209467b48Spatrick   //
148309467b48Spatrick   // If we have profile data (i.e, branch probabilities can be trusted), the
148409467b48Spatrick   // cost (number of taken branches) with layout S->BB->Succ->Pred is 2 *
148509467b48Spatrick   // freq(S->Pred) while the cost of topo order is freq(S->Pred) + freq(S->BB).
148609467b48Spatrick   // We know Prob(S->BB) > Prob(S->Pred), so freq(S->BB) > freq(S->Pred), which
148709467b48Spatrick   // means the cost of topological order is greater.
148809467b48Spatrick   // When profile data is not available, however, we need to be more
148909467b48Spatrick   // conservative. If the branch prediction is wrong, breaking the topo-order
149009467b48Spatrick   // will actually yield a layout with large cost. For this reason, we need
149109467b48Spatrick   // strong biased branch at block S with Prob(S->BB) in order to select
149209467b48Spatrick   // BB->Succ. This is equivalent to looking the CFG backward with backward
149309467b48Spatrick   // edge: Prob(Succ->BB) needs to >= HotProb in order to be selected (without
149409467b48Spatrick   // profile data).
149509467b48Spatrick   // --------------------------------------------------------------------------
149609467b48Spatrick   // Case 3: forked diamond
149709467b48Spatrick   //       S
149809467b48Spatrick   //      / \
149909467b48Spatrick   //     /   \
150009467b48Spatrick   //   BB    Pred
150109467b48Spatrick   //   | \   / |
150209467b48Spatrick   //   |  \ /  |
150309467b48Spatrick   //   |   X   |
150409467b48Spatrick   //   |  / \  |
150509467b48Spatrick   //   | /   \ |
150609467b48Spatrick   //   S1     S2
150709467b48Spatrick   //
150809467b48Spatrick   // The current block is BB and edge BB->S1 is now being evaluated.
150909467b48Spatrick   // As above S->BB was already selected because
151009467b48Spatrick   // prob(S->BB) > prob(S->Pred). Assume that prob(BB->S1) >= prob(BB->S2).
151109467b48Spatrick   //
151209467b48Spatrick   // topo-order:
151309467b48Spatrick   //
151409467b48Spatrick   //     S-------|                     ---S
151509467b48Spatrick   //     |       |                     |  |
151609467b48Spatrick   //  ---BB      |                     |  BB
151709467b48Spatrick   //  |          |                     |  |
151809467b48Spatrick   //  |  Pred----|                     |  S1----
151909467b48Spatrick   //  |  |                             |       |
152009467b48Spatrick   //  --(S1 or S2)                     ---Pred--
152109467b48Spatrick   //                                        |
152209467b48Spatrick   //                                       S2
152309467b48Spatrick   //
152409467b48Spatrick   // topo-cost = freq(S->Pred) + freq(BB->S1) + freq(BB->S2)
152509467b48Spatrick   //    + min(freq(Pred->S1), freq(Pred->S2))
152609467b48Spatrick   // Non-topo-order cost:
152709467b48Spatrick   // non-topo-cost = 2 * freq(S->Pred) + freq(BB->S2).
152809467b48Spatrick   // To be conservative, we can assume that min(freq(Pred->S1), freq(Pred->S2))
152909467b48Spatrick   // is 0. Then the non topo layout is better when
153009467b48Spatrick   // freq(S->Pred) < freq(BB->S1).
153109467b48Spatrick   // This is exactly what is checked below.
153209467b48Spatrick   // Note there are other shapes that apply (Pred may not be a single block,
153309467b48Spatrick   // but they all fit this general pattern.)
153409467b48Spatrick   BranchProbability HotProb = getLayoutSuccessorProbThreshold(BB);
153509467b48Spatrick 
153609467b48Spatrick   // Make sure that a hot successor doesn't have a globally more
153709467b48Spatrick   // important predecessor.
153809467b48Spatrick   BlockFrequency CandidateEdgeFreq = MBFI->getBlockFreq(BB) * RealSuccProb;
153909467b48Spatrick   bool BadCFGConflict = false;
154009467b48Spatrick 
154109467b48Spatrick   for (MachineBasicBlock *Pred : Succ->predecessors()) {
154209467b48Spatrick     BlockChain *PredChain = BlockToChain[Pred];
154309467b48Spatrick     if (Pred == Succ || PredChain == &SuccChain ||
154409467b48Spatrick         (BlockFilter && !BlockFilter->count(Pred)) ||
154509467b48Spatrick         PredChain == &Chain || Pred != *std::prev(PredChain->end()) ||
154609467b48Spatrick         // This check is redundant except for look ahead. This function is
154709467b48Spatrick         // called for lookahead by isProfitableToTailDup when BB hasn't been
154809467b48Spatrick         // placed yet.
154909467b48Spatrick         (Pred == BB))
155009467b48Spatrick       continue;
155109467b48Spatrick     // Do backward checking.
155209467b48Spatrick     // For all cases above, we need a backward checking to filter out edges that
155309467b48Spatrick     // are not 'strongly' biased.
155409467b48Spatrick     // BB  Pred
155509467b48Spatrick     //  \ /
155609467b48Spatrick     //  Succ
155709467b48Spatrick     // We select edge BB->Succ if
155809467b48Spatrick     //      freq(BB->Succ) > freq(Succ) * HotProb
155909467b48Spatrick     //      i.e. freq(BB->Succ) > freq(BB->Succ) * HotProb + freq(Pred->Succ) *
156009467b48Spatrick     //      HotProb
156109467b48Spatrick     //      i.e. freq((BB->Succ) * (1 - HotProb) > freq(Pred->Succ) * HotProb
156209467b48Spatrick     // Case 1 is covered too, because the first equation reduces to:
156309467b48Spatrick     // prob(BB->Succ) > HotProb. (freq(Succ) = freq(BB) for a triangle)
156409467b48Spatrick     BlockFrequency PredEdgeFreq =
156509467b48Spatrick         MBFI->getBlockFreq(Pred) * MBPI->getEdgeProbability(Pred, Succ);
156609467b48Spatrick     if (PredEdgeFreq * HotProb >= CandidateEdgeFreq * HotProb.getCompl()) {
156709467b48Spatrick       BadCFGConflict = true;
156809467b48Spatrick       break;
156909467b48Spatrick     }
157009467b48Spatrick   }
157109467b48Spatrick 
157209467b48Spatrick   if (BadCFGConflict) {
157309467b48Spatrick     LLVM_DEBUG(dbgs() << "    Not a candidate: " << getBlockName(Succ) << " -> "
157409467b48Spatrick                       << SuccProb << " (prob) (non-cold CFG conflict)\n");
157509467b48Spatrick     return true;
157609467b48Spatrick   }
157709467b48Spatrick 
157809467b48Spatrick   return false;
157909467b48Spatrick }
158009467b48Spatrick 
158109467b48Spatrick /// Select the best successor for a block.
158209467b48Spatrick ///
158309467b48Spatrick /// This looks across all successors of a particular block and attempts to
158409467b48Spatrick /// select the "best" one to be the layout successor. It only considers direct
158509467b48Spatrick /// successors which also pass the block filter. It will attempt to avoid
158609467b48Spatrick /// breaking CFG structure, but cave and break such structures in the case of
158709467b48Spatrick /// very hot successor edges.
158809467b48Spatrick ///
158909467b48Spatrick /// \returns The best successor block found, or null if none are viable, along
159009467b48Spatrick /// with a boolean indicating if tail duplication is necessary.
159109467b48Spatrick MachineBlockPlacement::BlockAndTailDupResult
selectBestSuccessor(const MachineBasicBlock * BB,const BlockChain & Chain,const BlockFilterSet * BlockFilter)159209467b48Spatrick MachineBlockPlacement::selectBestSuccessor(
159309467b48Spatrick     const MachineBasicBlock *BB, const BlockChain &Chain,
159409467b48Spatrick     const BlockFilterSet *BlockFilter) {
159509467b48Spatrick   const BranchProbability HotProb(StaticLikelyProb, 100);
159609467b48Spatrick 
159709467b48Spatrick   BlockAndTailDupResult BestSucc = { nullptr, false };
159809467b48Spatrick   auto BestProb = BranchProbability::getZero();
159909467b48Spatrick 
160009467b48Spatrick   SmallVector<MachineBasicBlock *, 4> Successors;
160109467b48Spatrick   auto AdjustedSumProb =
160209467b48Spatrick       collectViableSuccessors(BB, Chain, BlockFilter, Successors);
160309467b48Spatrick 
160409467b48Spatrick   LLVM_DEBUG(dbgs() << "Selecting best successor for: " << getBlockName(BB)
160509467b48Spatrick                     << "\n");
160609467b48Spatrick 
160709467b48Spatrick   // if we already precomputed the best successor for BB, return that if still
160809467b48Spatrick   // applicable.
160909467b48Spatrick   auto FoundEdge = ComputedEdges.find(BB);
161009467b48Spatrick   if (FoundEdge != ComputedEdges.end()) {
161109467b48Spatrick     MachineBasicBlock *Succ = FoundEdge->second.BB;
161209467b48Spatrick     ComputedEdges.erase(FoundEdge);
161309467b48Spatrick     BlockChain *SuccChain = BlockToChain[Succ];
161409467b48Spatrick     if (BB->isSuccessor(Succ) && (!BlockFilter || BlockFilter->count(Succ)) &&
161509467b48Spatrick         SuccChain != &Chain && Succ == *SuccChain->begin())
161609467b48Spatrick       return FoundEdge->second;
161709467b48Spatrick   }
161809467b48Spatrick 
161909467b48Spatrick   // if BB is part of a trellis, Use the trellis to determine the optimal
162009467b48Spatrick   // fallthrough edges
162109467b48Spatrick   if (isTrellis(BB, Successors, Chain, BlockFilter))
162209467b48Spatrick     return getBestTrellisSuccessor(BB, Successors, AdjustedSumProb, Chain,
162309467b48Spatrick                                    BlockFilter);
162409467b48Spatrick 
162509467b48Spatrick   // For blocks with CFG violations, we may be able to lay them out anyway with
162609467b48Spatrick   // tail-duplication. We keep this vector so we can perform the probability
162709467b48Spatrick   // calculations the minimum number of times.
1628097a140dSpatrick   SmallVector<std::pair<BranchProbability, MachineBasicBlock *>, 4>
162909467b48Spatrick       DupCandidates;
163009467b48Spatrick   for (MachineBasicBlock *Succ : Successors) {
163109467b48Spatrick     auto RealSuccProb = MBPI->getEdgeProbability(BB, Succ);
163209467b48Spatrick     BranchProbability SuccProb =
163309467b48Spatrick         getAdjustedProbability(RealSuccProb, AdjustedSumProb);
163409467b48Spatrick 
163509467b48Spatrick     BlockChain &SuccChain = *BlockToChain[Succ];
163609467b48Spatrick     // Skip the edge \c BB->Succ if block \c Succ has a better layout
163709467b48Spatrick     // predecessor that yields lower global cost.
163809467b48Spatrick     if (hasBetterLayoutPredecessor(BB, Succ, SuccChain, SuccProb, RealSuccProb,
163909467b48Spatrick                                    Chain, BlockFilter)) {
164009467b48Spatrick       // If tail duplication would make Succ profitable, place it.
164109467b48Spatrick       if (allowTailDupPlacement() && shouldTailDuplicate(Succ))
1642097a140dSpatrick         DupCandidates.emplace_back(SuccProb, Succ);
164309467b48Spatrick       continue;
164409467b48Spatrick     }
164509467b48Spatrick 
164609467b48Spatrick     LLVM_DEBUG(
164709467b48Spatrick         dbgs() << "    Candidate: " << getBlockName(Succ)
164809467b48Spatrick                << ", probability: " << SuccProb
164909467b48Spatrick                << (SuccChain.UnscheduledPredecessors != 0 ? " (CFG break)" : "")
165009467b48Spatrick                << "\n");
165109467b48Spatrick 
165209467b48Spatrick     if (BestSucc.BB && BestProb >= SuccProb) {
165309467b48Spatrick       LLVM_DEBUG(dbgs() << "    Not the best candidate, continuing\n");
165409467b48Spatrick       continue;
165509467b48Spatrick     }
165609467b48Spatrick 
165709467b48Spatrick     LLVM_DEBUG(dbgs() << "    Setting it as best candidate\n");
165809467b48Spatrick     BestSucc.BB = Succ;
165909467b48Spatrick     BestProb = SuccProb;
166009467b48Spatrick   }
166109467b48Spatrick   // Handle the tail duplication candidates in order of decreasing probability.
166209467b48Spatrick   // Stop at the first one that is profitable. Also stop if they are less
166309467b48Spatrick   // profitable than BestSucc. Position is important because we preserve it and
166409467b48Spatrick   // prefer first best match. Here we aren't comparing in order, so we capture
166509467b48Spatrick   // the position instead.
166609467b48Spatrick   llvm::stable_sort(DupCandidates,
166709467b48Spatrick                     [](std::tuple<BranchProbability, MachineBasicBlock *> L,
166809467b48Spatrick                        std::tuple<BranchProbability, MachineBasicBlock *> R) {
166909467b48Spatrick                       return std::get<0>(L) > std::get<0>(R);
167009467b48Spatrick                     });
167109467b48Spatrick   for (auto &Tup : DupCandidates) {
167209467b48Spatrick     BranchProbability DupProb;
167309467b48Spatrick     MachineBasicBlock *Succ;
167409467b48Spatrick     std::tie(DupProb, Succ) = Tup;
167509467b48Spatrick     if (DupProb < BestProb)
167609467b48Spatrick       break;
167709467b48Spatrick     if (canTailDuplicateUnplacedPreds(BB, Succ, Chain, BlockFilter)
167809467b48Spatrick         && (isProfitableToTailDup(BB, Succ, BestProb, Chain, BlockFilter))) {
167909467b48Spatrick       LLVM_DEBUG(dbgs() << "    Candidate: " << getBlockName(Succ)
168009467b48Spatrick                         << ", probability: " << DupProb
168109467b48Spatrick                         << " (Tail Duplicate)\n");
168209467b48Spatrick       BestSucc.BB = Succ;
168309467b48Spatrick       BestSucc.ShouldTailDup = true;
168409467b48Spatrick       break;
168509467b48Spatrick     }
168609467b48Spatrick   }
168709467b48Spatrick 
168809467b48Spatrick   if (BestSucc.BB)
168909467b48Spatrick     LLVM_DEBUG(dbgs() << "    Selected: " << getBlockName(BestSucc.BB) << "\n");
169009467b48Spatrick 
169109467b48Spatrick   return BestSucc;
169209467b48Spatrick }
169309467b48Spatrick 
169409467b48Spatrick /// Select the best block from a worklist.
169509467b48Spatrick ///
169609467b48Spatrick /// This looks through the provided worklist as a list of candidate basic
169709467b48Spatrick /// blocks and select the most profitable one to place. The definition of
169809467b48Spatrick /// profitable only really makes sense in the context of a loop. This returns
169909467b48Spatrick /// the most frequently visited block in the worklist, which in the case of
170009467b48Spatrick /// a loop, is the one most desirable to be physically close to the rest of the
170109467b48Spatrick /// loop body in order to improve i-cache behavior.
170209467b48Spatrick ///
170309467b48Spatrick /// \returns The best block found, or null if none are viable.
selectBestCandidateBlock(const BlockChain & Chain,SmallVectorImpl<MachineBasicBlock * > & WorkList)170409467b48Spatrick MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock(
170509467b48Spatrick     const BlockChain &Chain, SmallVectorImpl<MachineBasicBlock *> &WorkList) {
170609467b48Spatrick   // Once we need to walk the worklist looking for a candidate, cleanup the
170709467b48Spatrick   // worklist of already placed entries.
170809467b48Spatrick   // FIXME: If this shows up on profiles, it could be folded (at the cost of
170909467b48Spatrick   // some code complexity) into the loop below.
171073471bf0Spatrick   llvm::erase_if(WorkList, [&](MachineBasicBlock *BB) {
171109467b48Spatrick     return BlockToChain.lookup(BB) == &Chain;
171273471bf0Spatrick   });
171309467b48Spatrick 
171409467b48Spatrick   if (WorkList.empty())
171509467b48Spatrick     return nullptr;
171609467b48Spatrick 
171709467b48Spatrick   bool IsEHPad = WorkList[0]->isEHPad();
171809467b48Spatrick 
171909467b48Spatrick   MachineBasicBlock *BestBlock = nullptr;
172009467b48Spatrick   BlockFrequency BestFreq;
172109467b48Spatrick   for (MachineBasicBlock *MBB : WorkList) {
172209467b48Spatrick     assert(MBB->isEHPad() == IsEHPad &&
172309467b48Spatrick            "EHPad mismatch between block and work list.");
172409467b48Spatrick 
172509467b48Spatrick     BlockChain &SuccChain = *BlockToChain[MBB];
172609467b48Spatrick     if (&SuccChain == &Chain)
172709467b48Spatrick       continue;
172809467b48Spatrick 
172909467b48Spatrick     assert(SuccChain.UnscheduledPredecessors == 0 &&
173009467b48Spatrick            "Found CFG-violating block");
173109467b48Spatrick 
173209467b48Spatrick     BlockFrequency CandidateFreq = MBFI->getBlockFreq(MBB);
173309467b48Spatrick     LLVM_DEBUG(dbgs() << "    " << getBlockName(MBB) << " -> ";
173409467b48Spatrick                MBFI->printBlockFreq(dbgs(), CandidateFreq) << " (freq)\n");
173509467b48Spatrick 
173609467b48Spatrick     // For ehpad, we layout the least probable first as to avoid jumping back
173709467b48Spatrick     // from least probable landingpads to more probable ones.
173809467b48Spatrick     //
173909467b48Spatrick     // FIXME: Using probability is probably (!) not the best way to achieve
174009467b48Spatrick     // this. We should probably have a more principled approach to layout
174109467b48Spatrick     // cleanup code.
174209467b48Spatrick     //
174309467b48Spatrick     // The goal is to get:
174409467b48Spatrick     //
174509467b48Spatrick     //                 +--------------------------+
174609467b48Spatrick     //                 |                          V
174709467b48Spatrick     // InnerLp -> InnerCleanup    OuterLp -> OuterCleanup -> Resume
174809467b48Spatrick     //
174909467b48Spatrick     // Rather than:
175009467b48Spatrick     //
175109467b48Spatrick     //                 +-------------------------------------+
175209467b48Spatrick     //                 V                                     |
175309467b48Spatrick     // OuterLp -> OuterCleanup -> Resume     InnerLp -> InnerCleanup
175409467b48Spatrick     if (BestBlock && (IsEHPad ^ (BestFreq >= CandidateFreq)))
175509467b48Spatrick       continue;
175609467b48Spatrick 
175709467b48Spatrick     BestBlock = MBB;
175809467b48Spatrick     BestFreq = CandidateFreq;
175909467b48Spatrick   }
176009467b48Spatrick 
176109467b48Spatrick   return BestBlock;
176209467b48Spatrick }
176309467b48Spatrick 
176409467b48Spatrick /// Retrieve the first unplaced basic block.
176509467b48Spatrick ///
176609467b48Spatrick /// This routine is called when we are unable to use the CFG to walk through
176709467b48Spatrick /// all of the basic blocks and form a chain due to unnatural loops in the CFG.
176809467b48Spatrick /// We walk through the function's blocks in order, starting from the
176909467b48Spatrick /// LastUnplacedBlockIt. We update this iterator on each call to avoid
177009467b48Spatrick /// re-scanning the entire sequence on repeated calls to this routine.
getFirstUnplacedBlock(const BlockChain & PlacedChain,MachineFunction::iterator & PrevUnplacedBlockIt,const BlockFilterSet * BlockFilter)177109467b48Spatrick MachineBasicBlock *MachineBlockPlacement::getFirstUnplacedBlock(
177209467b48Spatrick     const BlockChain &PlacedChain,
177309467b48Spatrick     MachineFunction::iterator &PrevUnplacedBlockIt,
177409467b48Spatrick     const BlockFilterSet *BlockFilter) {
177509467b48Spatrick   for (MachineFunction::iterator I = PrevUnplacedBlockIt, E = F->end(); I != E;
177609467b48Spatrick        ++I) {
177709467b48Spatrick     if (BlockFilter && !BlockFilter->count(&*I))
177809467b48Spatrick       continue;
177909467b48Spatrick     if (BlockToChain[&*I] != &PlacedChain) {
178009467b48Spatrick       PrevUnplacedBlockIt = I;
178109467b48Spatrick       // Now select the head of the chain to which the unplaced block belongs
178209467b48Spatrick       // as the block to place. This will force the entire chain to be placed,
178309467b48Spatrick       // and satisfies the requirements of merging chains.
178409467b48Spatrick       return *BlockToChain[&*I]->begin();
178509467b48Spatrick     }
178609467b48Spatrick   }
178709467b48Spatrick   return nullptr;
178809467b48Spatrick }
178909467b48Spatrick 
fillWorkLists(const MachineBasicBlock * MBB,SmallPtrSetImpl<BlockChain * > & UpdatedPreds,const BlockFilterSet * BlockFilter=nullptr)179009467b48Spatrick void MachineBlockPlacement::fillWorkLists(
179109467b48Spatrick     const MachineBasicBlock *MBB,
179209467b48Spatrick     SmallPtrSetImpl<BlockChain *> &UpdatedPreds,
179309467b48Spatrick     const BlockFilterSet *BlockFilter = nullptr) {
179409467b48Spatrick   BlockChain &Chain = *BlockToChain[MBB];
179509467b48Spatrick   if (!UpdatedPreds.insert(&Chain).second)
179609467b48Spatrick     return;
179709467b48Spatrick 
179809467b48Spatrick   assert(
179909467b48Spatrick       Chain.UnscheduledPredecessors == 0 &&
180009467b48Spatrick       "Attempting to place block with unscheduled predecessors in worklist.");
180109467b48Spatrick   for (MachineBasicBlock *ChainBB : Chain) {
180209467b48Spatrick     assert(BlockToChain[ChainBB] == &Chain &&
180309467b48Spatrick            "Block in chain doesn't match BlockToChain map.");
180409467b48Spatrick     for (MachineBasicBlock *Pred : ChainBB->predecessors()) {
180509467b48Spatrick       if (BlockFilter && !BlockFilter->count(Pred))
180609467b48Spatrick         continue;
180709467b48Spatrick       if (BlockToChain[Pred] == &Chain)
180809467b48Spatrick         continue;
180909467b48Spatrick       ++Chain.UnscheduledPredecessors;
181009467b48Spatrick     }
181109467b48Spatrick   }
181209467b48Spatrick 
181309467b48Spatrick   if (Chain.UnscheduledPredecessors != 0)
181409467b48Spatrick     return;
181509467b48Spatrick 
181609467b48Spatrick   MachineBasicBlock *BB = *Chain.begin();
181709467b48Spatrick   if (BB->isEHPad())
181809467b48Spatrick     EHPadWorkList.push_back(BB);
181909467b48Spatrick   else
182009467b48Spatrick     BlockWorkList.push_back(BB);
182109467b48Spatrick }
182209467b48Spatrick 
buildChain(const MachineBasicBlock * HeadBB,BlockChain & Chain,BlockFilterSet * BlockFilter)182309467b48Spatrick void MachineBlockPlacement::buildChain(
182409467b48Spatrick     const MachineBasicBlock *HeadBB, BlockChain &Chain,
182509467b48Spatrick     BlockFilterSet *BlockFilter) {
182609467b48Spatrick   assert(HeadBB && "BB must not be null.\n");
182709467b48Spatrick   assert(BlockToChain[HeadBB] == &Chain && "BlockToChainMap mis-match.\n");
182809467b48Spatrick   MachineFunction::iterator PrevUnplacedBlockIt = F->begin();
182909467b48Spatrick 
183009467b48Spatrick   const MachineBasicBlock *LoopHeaderBB = HeadBB;
183109467b48Spatrick   markChainSuccessors(Chain, LoopHeaderBB, BlockFilter);
183209467b48Spatrick   MachineBasicBlock *BB = *std::prev(Chain.end());
183309467b48Spatrick   while (true) {
183409467b48Spatrick     assert(BB && "null block found at end of chain in loop.");
183509467b48Spatrick     assert(BlockToChain[BB] == &Chain && "BlockToChainMap mis-match in loop.");
183609467b48Spatrick     assert(*std::prev(Chain.end()) == BB && "BB Not found at end of chain.");
183709467b48Spatrick 
183809467b48Spatrick 
183909467b48Spatrick     // Look for the best viable successor if there is one to place immediately
184009467b48Spatrick     // after this block.
184109467b48Spatrick     auto Result = selectBestSuccessor(BB, Chain, BlockFilter);
184209467b48Spatrick     MachineBasicBlock* BestSucc = Result.BB;
184309467b48Spatrick     bool ShouldTailDup = Result.ShouldTailDup;
184409467b48Spatrick     if (allowTailDupPlacement())
184509467b48Spatrick       ShouldTailDup |= (BestSucc && canTailDuplicateUnplacedPreds(BB, BestSucc,
184609467b48Spatrick                                                                   Chain,
184709467b48Spatrick                                                                   BlockFilter));
184809467b48Spatrick 
184909467b48Spatrick     // If an immediate successor isn't available, look for the best viable
185009467b48Spatrick     // block among those we've identified as not violating the loop's CFG at
185109467b48Spatrick     // this point. This won't be a fallthrough, but it will increase locality.
185209467b48Spatrick     if (!BestSucc)
185309467b48Spatrick       BestSucc = selectBestCandidateBlock(Chain, BlockWorkList);
185409467b48Spatrick     if (!BestSucc)
185509467b48Spatrick       BestSucc = selectBestCandidateBlock(Chain, EHPadWorkList);
185609467b48Spatrick 
185709467b48Spatrick     if (!BestSucc) {
185809467b48Spatrick       BestSucc = getFirstUnplacedBlock(Chain, PrevUnplacedBlockIt, BlockFilter);
185909467b48Spatrick       if (!BestSucc)
186009467b48Spatrick         break;
186109467b48Spatrick 
186209467b48Spatrick       LLVM_DEBUG(dbgs() << "Unnatural loop CFG detected, forcibly merging the "
186309467b48Spatrick                            "layout successor until the CFG reduces\n");
186409467b48Spatrick     }
186509467b48Spatrick 
186609467b48Spatrick     // Placement may have changed tail duplication opportunities.
186709467b48Spatrick     // Check for that now.
186809467b48Spatrick     if (allowTailDupPlacement() && BestSucc && ShouldTailDup) {
1869097a140dSpatrick       repeatedlyTailDuplicateBlock(BestSucc, BB, LoopHeaderBB, Chain,
1870097a140dSpatrick                                        BlockFilter, PrevUnplacedBlockIt);
1871097a140dSpatrick       // If the chosen successor was duplicated into BB, don't bother laying
1872097a140dSpatrick       // it out, just go round the loop again with BB as the chain end.
1873097a140dSpatrick       if (!BB->isSuccessor(BestSucc))
187409467b48Spatrick         continue;
187509467b48Spatrick     }
187609467b48Spatrick 
187709467b48Spatrick     // Place this block, updating the datastructures to reflect its placement.
187809467b48Spatrick     BlockChain &SuccChain = *BlockToChain[BestSucc];
187909467b48Spatrick     // Zero out UnscheduledPredecessors for the successor we're about to merge in case
188009467b48Spatrick     // we selected a successor that didn't fit naturally into the CFG.
188109467b48Spatrick     SuccChain.UnscheduledPredecessors = 0;
188209467b48Spatrick     LLVM_DEBUG(dbgs() << "Merging from " << getBlockName(BB) << " to "
188309467b48Spatrick                       << getBlockName(BestSucc) << "\n");
188409467b48Spatrick     markChainSuccessors(SuccChain, LoopHeaderBB, BlockFilter);
188509467b48Spatrick     Chain.merge(BestSucc, &SuccChain);
188609467b48Spatrick     BB = *std::prev(Chain.end());
188709467b48Spatrick   }
188809467b48Spatrick 
188909467b48Spatrick   LLVM_DEBUG(dbgs() << "Finished forming chain for header block "
189009467b48Spatrick                     << getBlockName(*Chain.begin()) << "\n");
189109467b48Spatrick }
189209467b48Spatrick 
189309467b48Spatrick // If bottom of block BB has only one successor OldTop, in most cases it is
189409467b48Spatrick // profitable to move it before OldTop, except the following case:
189509467b48Spatrick //
189609467b48Spatrick //     -->OldTop<-
189709467b48Spatrick //     |    .    |
189809467b48Spatrick //     |    .    |
189909467b48Spatrick //     |    .    |
190009467b48Spatrick //     ---Pred   |
190109467b48Spatrick //          |    |
190209467b48Spatrick //         BB-----
190309467b48Spatrick //
190409467b48Spatrick // If BB is moved before OldTop, Pred needs a taken branch to BB, and it can't
190509467b48Spatrick // layout the other successor below it, so it can't reduce taken branch.
190609467b48Spatrick // In this case we keep its original layout.
190709467b48Spatrick bool
canMoveBottomBlockToTop(const MachineBasicBlock * BottomBlock,const MachineBasicBlock * OldTop)190809467b48Spatrick MachineBlockPlacement::canMoveBottomBlockToTop(
190909467b48Spatrick     const MachineBasicBlock *BottomBlock,
191009467b48Spatrick     const MachineBasicBlock *OldTop) {
191109467b48Spatrick   if (BottomBlock->pred_size() != 1)
191209467b48Spatrick     return true;
191309467b48Spatrick   MachineBasicBlock *Pred = *BottomBlock->pred_begin();
191409467b48Spatrick   if (Pred->succ_size() != 2)
191509467b48Spatrick     return true;
191609467b48Spatrick 
191709467b48Spatrick   MachineBasicBlock *OtherBB = *Pred->succ_begin();
191809467b48Spatrick   if (OtherBB == BottomBlock)
191909467b48Spatrick     OtherBB = *Pred->succ_rbegin();
192009467b48Spatrick   if (OtherBB == OldTop)
192109467b48Spatrick     return false;
192209467b48Spatrick 
192309467b48Spatrick   return true;
192409467b48Spatrick }
192509467b48Spatrick 
192609467b48Spatrick // Find out the possible fall through frequence to the top of a loop.
192709467b48Spatrick BlockFrequency
TopFallThroughFreq(const MachineBasicBlock * Top,const BlockFilterSet & LoopBlockSet)192809467b48Spatrick MachineBlockPlacement::TopFallThroughFreq(
192909467b48Spatrick     const MachineBasicBlock *Top,
193009467b48Spatrick     const BlockFilterSet &LoopBlockSet) {
193109467b48Spatrick   BlockFrequency MaxFreq = 0;
193209467b48Spatrick   for (MachineBasicBlock *Pred : Top->predecessors()) {
193309467b48Spatrick     BlockChain *PredChain = BlockToChain[Pred];
193409467b48Spatrick     if (!LoopBlockSet.count(Pred) &&
193509467b48Spatrick         (!PredChain || Pred == *std::prev(PredChain->end()))) {
193609467b48Spatrick       // Found a Pred block can be placed before Top.
193709467b48Spatrick       // Check if Top is the best successor of Pred.
193809467b48Spatrick       auto TopProb = MBPI->getEdgeProbability(Pred, Top);
193909467b48Spatrick       bool TopOK = true;
194009467b48Spatrick       for (MachineBasicBlock *Succ : Pred->successors()) {
194109467b48Spatrick         auto SuccProb = MBPI->getEdgeProbability(Pred, Succ);
194209467b48Spatrick         BlockChain *SuccChain = BlockToChain[Succ];
194309467b48Spatrick         // Check if Succ can be placed after Pred.
194409467b48Spatrick         // Succ should not be in any chain, or it is the head of some chain.
194509467b48Spatrick         if (!LoopBlockSet.count(Succ) && (SuccProb > TopProb) &&
194609467b48Spatrick             (!SuccChain || Succ == *SuccChain->begin())) {
194709467b48Spatrick           TopOK = false;
194809467b48Spatrick           break;
194909467b48Spatrick         }
195009467b48Spatrick       }
195109467b48Spatrick       if (TopOK) {
195209467b48Spatrick         BlockFrequency EdgeFreq = MBFI->getBlockFreq(Pred) *
195309467b48Spatrick                                   MBPI->getEdgeProbability(Pred, Top);
195409467b48Spatrick         if (EdgeFreq > MaxFreq)
195509467b48Spatrick           MaxFreq = EdgeFreq;
195609467b48Spatrick       }
195709467b48Spatrick     }
195809467b48Spatrick   }
195909467b48Spatrick   return MaxFreq;
196009467b48Spatrick }
196109467b48Spatrick 
196209467b48Spatrick // Compute the fall through gains when move NewTop before OldTop.
196309467b48Spatrick //
196409467b48Spatrick // In following diagram, edges marked as "-" are reduced fallthrough, edges
196509467b48Spatrick // marked as "+" are increased fallthrough, this function computes
196609467b48Spatrick //
196709467b48Spatrick //      SUM(increased fallthrough) - SUM(decreased fallthrough)
196809467b48Spatrick //
196909467b48Spatrick //              |
197009467b48Spatrick //              | -
197109467b48Spatrick //              V
197209467b48Spatrick //        --->OldTop
197309467b48Spatrick //        |     .
197409467b48Spatrick //        |     .
197509467b48Spatrick //       +|     .    +
197609467b48Spatrick //        |   Pred --->
197709467b48Spatrick //        |     |-
197809467b48Spatrick //        |     V
197909467b48Spatrick //        --- NewTop <---
198009467b48Spatrick //              |-
198109467b48Spatrick //              V
198209467b48Spatrick //
198309467b48Spatrick BlockFrequency
FallThroughGains(const MachineBasicBlock * NewTop,const MachineBasicBlock * OldTop,const MachineBasicBlock * ExitBB,const BlockFilterSet & LoopBlockSet)198409467b48Spatrick MachineBlockPlacement::FallThroughGains(
198509467b48Spatrick     const MachineBasicBlock *NewTop,
198609467b48Spatrick     const MachineBasicBlock *OldTop,
198709467b48Spatrick     const MachineBasicBlock *ExitBB,
198809467b48Spatrick     const BlockFilterSet &LoopBlockSet) {
198909467b48Spatrick   BlockFrequency FallThrough2Top = TopFallThroughFreq(OldTop, LoopBlockSet);
199009467b48Spatrick   BlockFrequency FallThrough2Exit = 0;
199109467b48Spatrick   if (ExitBB)
199209467b48Spatrick     FallThrough2Exit = MBFI->getBlockFreq(NewTop) *
199309467b48Spatrick         MBPI->getEdgeProbability(NewTop, ExitBB);
199409467b48Spatrick   BlockFrequency BackEdgeFreq = MBFI->getBlockFreq(NewTop) *
199509467b48Spatrick       MBPI->getEdgeProbability(NewTop, OldTop);
199609467b48Spatrick 
199709467b48Spatrick   // Find the best Pred of NewTop.
199809467b48Spatrick    MachineBasicBlock *BestPred = nullptr;
199909467b48Spatrick    BlockFrequency FallThroughFromPred = 0;
200009467b48Spatrick    for (MachineBasicBlock *Pred : NewTop->predecessors()) {
200109467b48Spatrick      if (!LoopBlockSet.count(Pred))
200209467b48Spatrick        continue;
200309467b48Spatrick      BlockChain *PredChain = BlockToChain[Pred];
200409467b48Spatrick      if (!PredChain || Pred == *std::prev(PredChain->end())) {
200509467b48Spatrick        BlockFrequency EdgeFreq = MBFI->getBlockFreq(Pred) *
200609467b48Spatrick            MBPI->getEdgeProbability(Pred, NewTop);
200709467b48Spatrick        if (EdgeFreq > FallThroughFromPred) {
200809467b48Spatrick          FallThroughFromPred = EdgeFreq;
200909467b48Spatrick          BestPred = Pred;
201009467b48Spatrick        }
201109467b48Spatrick      }
201209467b48Spatrick    }
201309467b48Spatrick 
201409467b48Spatrick    // If NewTop is not placed after Pred, another successor can be placed
201509467b48Spatrick    // after Pred.
201609467b48Spatrick    BlockFrequency NewFreq = 0;
201709467b48Spatrick    if (BestPred) {
201809467b48Spatrick      for (MachineBasicBlock *Succ : BestPred->successors()) {
201909467b48Spatrick        if ((Succ == NewTop) || (Succ == BestPred) || !LoopBlockSet.count(Succ))
202009467b48Spatrick          continue;
202109467b48Spatrick        if (ComputedEdges.find(Succ) != ComputedEdges.end())
202209467b48Spatrick          continue;
202309467b48Spatrick        BlockChain *SuccChain = BlockToChain[Succ];
202409467b48Spatrick        if ((SuccChain && (Succ != *SuccChain->begin())) ||
202509467b48Spatrick            (SuccChain == BlockToChain[BestPred]))
202609467b48Spatrick          continue;
202709467b48Spatrick        BlockFrequency EdgeFreq = MBFI->getBlockFreq(BestPred) *
202809467b48Spatrick            MBPI->getEdgeProbability(BestPred, Succ);
202909467b48Spatrick        if (EdgeFreq > NewFreq)
203009467b48Spatrick          NewFreq = EdgeFreq;
203109467b48Spatrick      }
203209467b48Spatrick      BlockFrequency OrigEdgeFreq = MBFI->getBlockFreq(BestPred) *
203309467b48Spatrick          MBPI->getEdgeProbability(BestPred, NewTop);
203409467b48Spatrick      if (NewFreq > OrigEdgeFreq) {
203509467b48Spatrick        // If NewTop is not the best successor of Pred, then Pred doesn't
203609467b48Spatrick        // fallthrough to NewTop. So there is no FallThroughFromPred and
203709467b48Spatrick        // NewFreq.
203809467b48Spatrick        NewFreq = 0;
203909467b48Spatrick        FallThroughFromPred = 0;
204009467b48Spatrick      }
204109467b48Spatrick    }
204209467b48Spatrick 
204309467b48Spatrick    BlockFrequency Result = 0;
204409467b48Spatrick    BlockFrequency Gains = BackEdgeFreq + NewFreq;
204509467b48Spatrick    BlockFrequency Lost = FallThrough2Top + FallThrough2Exit +
204609467b48Spatrick        FallThroughFromPred;
204709467b48Spatrick    if (Gains > Lost)
204809467b48Spatrick      Result = Gains - Lost;
204909467b48Spatrick    return Result;
205009467b48Spatrick }
205109467b48Spatrick 
205209467b48Spatrick /// Helper function of findBestLoopTop. Find the best loop top block
205309467b48Spatrick /// from predecessors of old top.
205409467b48Spatrick ///
205509467b48Spatrick /// Look for a block which is strictly better than the old top for laying
205609467b48Spatrick /// out before the old top of the loop. This looks for only two patterns:
205709467b48Spatrick ///
205809467b48Spatrick ///     1. a block has only one successor, the old loop top
205909467b48Spatrick ///
206009467b48Spatrick ///        Because such a block will always result in an unconditional jump,
206109467b48Spatrick ///        rotating it in front of the old top is always profitable.
206209467b48Spatrick ///
206309467b48Spatrick ///     2. a block has two successors, one is old top, another is exit
206409467b48Spatrick ///        and it has more than one predecessors
206509467b48Spatrick ///
206609467b48Spatrick ///        If it is below one of its predecessors P, only P can fall through to
206709467b48Spatrick ///        it, all other predecessors need a jump to it, and another conditional
206809467b48Spatrick ///        jump to loop header. If it is moved before loop header, all its
206909467b48Spatrick ///        predecessors jump to it, then fall through to loop header. So all its
207009467b48Spatrick ///        predecessors except P can reduce one taken branch.
207109467b48Spatrick ///        At the same time, move it before old top increases the taken branch
207209467b48Spatrick ///        to loop exit block, so the reduced taken branch will be compared with
207309467b48Spatrick ///        the increased taken branch to the loop exit block.
207409467b48Spatrick MachineBasicBlock *
findBestLoopTopHelper(MachineBasicBlock * OldTop,const MachineLoop & L,const BlockFilterSet & LoopBlockSet)207509467b48Spatrick MachineBlockPlacement::findBestLoopTopHelper(
207609467b48Spatrick     MachineBasicBlock *OldTop,
207709467b48Spatrick     const MachineLoop &L,
207809467b48Spatrick     const BlockFilterSet &LoopBlockSet) {
207909467b48Spatrick   // Check that the header hasn't been fused with a preheader block due to
208009467b48Spatrick   // crazy branches. If it has, we need to start with the header at the top to
208109467b48Spatrick   // prevent pulling the preheader into the loop body.
208209467b48Spatrick   BlockChain &HeaderChain = *BlockToChain[OldTop];
208309467b48Spatrick   if (!LoopBlockSet.count(*HeaderChain.begin()))
208409467b48Spatrick     return OldTop;
2085*d415bd75Srobert   if (OldTop != *HeaderChain.begin())
2086*d415bd75Srobert     return OldTop;
208709467b48Spatrick 
208809467b48Spatrick   LLVM_DEBUG(dbgs() << "Finding best loop top for: " << getBlockName(OldTop)
208909467b48Spatrick                     << "\n");
209009467b48Spatrick 
209109467b48Spatrick   BlockFrequency BestGains = 0;
209209467b48Spatrick   MachineBasicBlock *BestPred = nullptr;
209309467b48Spatrick   for (MachineBasicBlock *Pred : OldTop->predecessors()) {
209409467b48Spatrick     if (!LoopBlockSet.count(Pred))
209509467b48Spatrick       continue;
209609467b48Spatrick     if (Pred == L.getHeader())
209709467b48Spatrick       continue;
209809467b48Spatrick     LLVM_DEBUG(dbgs() << "   old top pred: " << getBlockName(Pred) << ", has "
209909467b48Spatrick                       << Pred->succ_size() << " successors, ";
210009467b48Spatrick                MBFI->printBlockFreq(dbgs(), Pred) << " freq\n");
210109467b48Spatrick     if (Pred->succ_size() > 2)
210209467b48Spatrick       continue;
210309467b48Spatrick 
210409467b48Spatrick     MachineBasicBlock *OtherBB = nullptr;
210509467b48Spatrick     if (Pred->succ_size() == 2) {
210609467b48Spatrick       OtherBB = *Pred->succ_begin();
210709467b48Spatrick       if (OtherBB == OldTop)
210809467b48Spatrick         OtherBB = *Pred->succ_rbegin();
210909467b48Spatrick     }
211009467b48Spatrick 
211109467b48Spatrick     if (!canMoveBottomBlockToTop(Pred, OldTop))
211209467b48Spatrick       continue;
211309467b48Spatrick 
211409467b48Spatrick     BlockFrequency Gains = FallThroughGains(Pred, OldTop, OtherBB,
211509467b48Spatrick                                             LoopBlockSet);
211609467b48Spatrick     if ((Gains > 0) && (Gains > BestGains ||
211709467b48Spatrick         ((Gains == BestGains) && Pred->isLayoutSuccessor(OldTop)))) {
211809467b48Spatrick       BestPred = Pred;
211909467b48Spatrick       BestGains = Gains;
212009467b48Spatrick     }
212109467b48Spatrick   }
212209467b48Spatrick 
212309467b48Spatrick   // If no direct predecessor is fine, just use the loop header.
212409467b48Spatrick   if (!BestPred) {
212509467b48Spatrick     LLVM_DEBUG(dbgs() << "    final top unchanged\n");
212609467b48Spatrick     return OldTop;
212709467b48Spatrick   }
212809467b48Spatrick 
212909467b48Spatrick   // Walk backwards through any straight line of predecessors.
213009467b48Spatrick   while (BestPred->pred_size() == 1 &&
213109467b48Spatrick          (*BestPred->pred_begin())->succ_size() == 1 &&
213209467b48Spatrick          *BestPred->pred_begin() != L.getHeader())
213309467b48Spatrick     BestPred = *BestPred->pred_begin();
213409467b48Spatrick 
213509467b48Spatrick   LLVM_DEBUG(dbgs() << "    final top: " << getBlockName(BestPred) << "\n");
213609467b48Spatrick   return BestPred;
213709467b48Spatrick }
213809467b48Spatrick 
213909467b48Spatrick /// Find the best loop top block for layout.
214009467b48Spatrick ///
214109467b48Spatrick /// This function iteratively calls findBestLoopTopHelper, until no new better
214209467b48Spatrick /// BB can be found.
214309467b48Spatrick MachineBasicBlock *
findBestLoopTop(const MachineLoop & L,const BlockFilterSet & LoopBlockSet)214409467b48Spatrick MachineBlockPlacement::findBestLoopTop(const MachineLoop &L,
214509467b48Spatrick                                        const BlockFilterSet &LoopBlockSet) {
214609467b48Spatrick   // Placing the latch block before the header may introduce an extra branch
214709467b48Spatrick   // that skips this block the first time the loop is executed, which we want
214809467b48Spatrick   // to avoid when optimising for size.
214909467b48Spatrick   // FIXME: in theory there is a case that does not introduce a new branch,
215009467b48Spatrick   // i.e. when the layout predecessor does not fallthrough to the loop header.
215109467b48Spatrick   // In practice this never happens though: there always seems to be a preheader
215209467b48Spatrick   // that can fallthrough and that is also placed before the header.
215309467b48Spatrick   bool OptForSize = F->getFunction().hasOptSize() ||
2154097a140dSpatrick                     llvm::shouldOptimizeForSize(L.getHeader(), PSI, MBFI.get());
215509467b48Spatrick   if (OptForSize)
215609467b48Spatrick     return L.getHeader();
215709467b48Spatrick 
215809467b48Spatrick   MachineBasicBlock *OldTop = nullptr;
215909467b48Spatrick   MachineBasicBlock *NewTop = L.getHeader();
216009467b48Spatrick   while (NewTop != OldTop) {
216109467b48Spatrick     OldTop = NewTop;
216209467b48Spatrick     NewTop = findBestLoopTopHelper(OldTop, L, LoopBlockSet);
216309467b48Spatrick     if (NewTop != OldTop)
216409467b48Spatrick       ComputedEdges[NewTop] = { OldTop, false };
216509467b48Spatrick   }
216609467b48Spatrick   return NewTop;
216709467b48Spatrick }
216809467b48Spatrick 
216909467b48Spatrick /// Find the best loop exiting block for layout.
217009467b48Spatrick ///
217109467b48Spatrick /// This routine implements the logic to analyze the loop looking for the best
217209467b48Spatrick /// block to layout at the top of the loop. Typically this is done to maximize
217309467b48Spatrick /// fallthrough opportunities.
217409467b48Spatrick MachineBasicBlock *
findBestLoopExit(const MachineLoop & L,const BlockFilterSet & LoopBlockSet,BlockFrequency & ExitFreq)217509467b48Spatrick MachineBlockPlacement::findBestLoopExit(const MachineLoop &L,
217609467b48Spatrick                                         const BlockFilterSet &LoopBlockSet,
217709467b48Spatrick                                         BlockFrequency &ExitFreq) {
217809467b48Spatrick   // We don't want to layout the loop linearly in all cases. If the loop header
217909467b48Spatrick   // is just a normal basic block in the loop, we want to look for what block
218009467b48Spatrick   // within the loop is the best one to layout at the top. However, if the loop
218109467b48Spatrick   // header has be pre-merged into a chain due to predecessors not having
218209467b48Spatrick   // analyzable branches, *and* the predecessor it is merged with is *not* part
218309467b48Spatrick   // of the loop, rotating the header into the middle of the loop will create
218409467b48Spatrick   // a non-contiguous range of blocks which is Very Bad. So start with the
218509467b48Spatrick   // header and only rotate if safe.
218609467b48Spatrick   BlockChain &HeaderChain = *BlockToChain[L.getHeader()];
218709467b48Spatrick   if (!LoopBlockSet.count(*HeaderChain.begin()))
218809467b48Spatrick     return nullptr;
218909467b48Spatrick 
219009467b48Spatrick   BlockFrequency BestExitEdgeFreq;
219109467b48Spatrick   unsigned BestExitLoopDepth = 0;
219209467b48Spatrick   MachineBasicBlock *ExitingBB = nullptr;
219309467b48Spatrick   // If there are exits to outer loops, loop rotation can severely limit
219409467b48Spatrick   // fallthrough opportunities unless it selects such an exit. Keep a set of
219509467b48Spatrick   // blocks where rotating to exit with that block will reach an outer loop.
219609467b48Spatrick   SmallPtrSet<MachineBasicBlock *, 4> BlocksExitingToOuterLoop;
219709467b48Spatrick 
219809467b48Spatrick   LLVM_DEBUG(dbgs() << "Finding best loop exit for: "
219909467b48Spatrick                     << getBlockName(L.getHeader()) << "\n");
220009467b48Spatrick   for (MachineBasicBlock *MBB : L.getBlocks()) {
220109467b48Spatrick     BlockChain &Chain = *BlockToChain[MBB];
220209467b48Spatrick     // Ensure that this block is at the end of a chain; otherwise it could be
220309467b48Spatrick     // mid-way through an inner loop or a successor of an unanalyzable branch.
220409467b48Spatrick     if (MBB != *std::prev(Chain.end()))
220509467b48Spatrick       continue;
220609467b48Spatrick 
220709467b48Spatrick     // Now walk the successors. We need to establish whether this has a viable
220809467b48Spatrick     // exiting successor and whether it has a viable non-exiting successor.
220909467b48Spatrick     // We store the old exiting state and restore it if a viable looping
221009467b48Spatrick     // successor isn't found.
221109467b48Spatrick     MachineBasicBlock *OldExitingBB = ExitingBB;
221209467b48Spatrick     BlockFrequency OldBestExitEdgeFreq = BestExitEdgeFreq;
221309467b48Spatrick     bool HasLoopingSucc = false;
221409467b48Spatrick     for (MachineBasicBlock *Succ : MBB->successors()) {
221509467b48Spatrick       if (Succ->isEHPad())
221609467b48Spatrick         continue;
221709467b48Spatrick       if (Succ == MBB)
221809467b48Spatrick         continue;
221909467b48Spatrick       BlockChain &SuccChain = *BlockToChain[Succ];
222009467b48Spatrick       // Don't split chains, either this chain or the successor's chain.
222109467b48Spatrick       if (&Chain == &SuccChain) {
222209467b48Spatrick         LLVM_DEBUG(dbgs() << "    exiting: " << getBlockName(MBB) << " -> "
222309467b48Spatrick                           << getBlockName(Succ) << " (chain conflict)\n");
222409467b48Spatrick         continue;
222509467b48Spatrick       }
222609467b48Spatrick 
222709467b48Spatrick       auto SuccProb = MBPI->getEdgeProbability(MBB, Succ);
222809467b48Spatrick       if (LoopBlockSet.count(Succ)) {
222909467b48Spatrick         LLVM_DEBUG(dbgs() << "    looping: " << getBlockName(MBB) << " -> "
223009467b48Spatrick                           << getBlockName(Succ) << " (" << SuccProb << ")\n");
223109467b48Spatrick         HasLoopingSucc = true;
223209467b48Spatrick         continue;
223309467b48Spatrick       }
223409467b48Spatrick 
223509467b48Spatrick       unsigned SuccLoopDepth = 0;
223609467b48Spatrick       if (MachineLoop *ExitLoop = MLI->getLoopFor(Succ)) {
223709467b48Spatrick         SuccLoopDepth = ExitLoop->getLoopDepth();
223809467b48Spatrick         if (ExitLoop->contains(&L))
223909467b48Spatrick           BlocksExitingToOuterLoop.insert(MBB);
224009467b48Spatrick       }
224109467b48Spatrick 
224209467b48Spatrick       BlockFrequency ExitEdgeFreq = MBFI->getBlockFreq(MBB) * SuccProb;
224309467b48Spatrick       LLVM_DEBUG(dbgs() << "    exiting: " << getBlockName(MBB) << " -> "
224409467b48Spatrick                         << getBlockName(Succ) << " [L:" << SuccLoopDepth
224509467b48Spatrick                         << "] (";
224609467b48Spatrick                  MBFI->printBlockFreq(dbgs(), ExitEdgeFreq) << ")\n");
224709467b48Spatrick       // Note that we bias this toward an existing layout successor to retain
224809467b48Spatrick       // incoming order in the absence of better information. The exit must have
224909467b48Spatrick       // a frequency higher than the current exit before we consider breaking
225009467b48Spatrick       // the layout.
225109467b48Spatrick       BranchProbability Bias(100 - ExitBlockBias, 100);
225209467b48Spatrick       if (!ExitingBB || SuccLoopDepth > BestExitLoopDepth ||
225309467b48Spatrick           ExitEdgeFreq > BestExitEdgeFreq ||
225409467b48Spatrick           (MBB->isLayoutSuccessor(Succ) &&
225509467b48Spatrick            !(ExitEdgeFreq < BestExitEdgeFreq * Bias))) {
225609467b48Spatrick         BestExitEdgeFreq = ExitEdgeFreq;
225709467b48Spatrick         ExitingBB = MBB;
225809467b48Spatrick       }
225909467b48Spatrick     }
226009467b48Spatrick 
226109467b48Spatrick     if (!HasLoopingSucc) {
226209467b48Spatrick       // Restore the old exiting state, no viable looping successor was found.
226309467b48Spatrick       ExitingBB = OldExitingBB;
226409467b48Spatrick       BestExitEdgeFreq = OldBestExitEdgeFreq;
226509467b48Spatrick     }
226609467b48Spatrick   }
226709467b48Spatrick   // Without a candidate exiting block or with only a single block in the
226809467b48Spatrick   // loop, just use the loop header to layout the loop.
226909467b48Spatrick   if (!ExitingBB) {
227009467b48Spatrick     LLVM_DEBUG(
227109467b48Spatrick         dbgs() << "    No other candidate exit blocks, using loop header\n");
227209467b48Spatrick     return nullptr;
227309467b48Spatrick   }
227409467b48Spatrick   if (L.getNumBlocks() == 1) {
227509467b48Spatrick     LLVM_DEBUG(dbgs() << "    Loop has 1 block, using loop header as exit\n");
227609467b48Spatrick     return nullptr;
227709467b48Spatrick   }
227809467b48Spatrick 
227909467b48Spatrick   // Also, if we have exit blocks which lead to outer loops but didn't select
228009467b48Spatrick   // one of them as the exiting block we are rotating toward, disable loop
228109467b48Spatrick   // rotation altogether.
228209467b48Spatrick   if (!BlocksExitingToOuterLoop.empty() &&
228309467b48Spatrick       !BlocksExitingToOuterLoop.count(ExitingBB))
228409467b48Spatrick     return nullptr;
228509467b48Spatrick 
228609467b48Spatrick   LLVM_DEBUG(dbgs() << "  Best exiting block: " << getBlockName(ExitingBB)
228709467b48Spatrick                     << "\n");
228809467b48Spatrick   ExitFreq = BestExitEdgeFreq;
228909467b48Spatrick   return ExitingBB;
229009467b48Spatrick }
229109467b48Spatrick 
229209467b48Spatrick /// Check if there is a fallthrough to loop header Top.
229309467b48Spatrick ///
229409467b48Spatrick ///   1. Look for a Pred that can be layout before Top.
229509467b48Spatrick ///   2. Check if Top is the most possible successor of Pred.
229609467b48Spatrick bool
hasViableTopFallthrough(const MachineBasicBlock * Top,const BlockFilterSet & LoopBlockSet)229709467b48Spatrick MachineBlockPlacement::hasViableTopFallthrough(
229809467b48Spatrick     const MachineBasicBlock *Top,
229909467b48Spatrick     const BlockFilterSet &LoopBlockSet) {
230009467b48Spatrick   for (MachineBasicBlock *Pred : Top->predecessors()) {
230109467b48Spatrick     BlockChain *PredChain = BlockToChain[Pred];
230209467b48Spatrick     if (!LoopBlockSet.count(Pred) &&
230309467b48Spatrick         (!PredChain || Pred == *std::prev(PredChain->end()))) {
230409467b48Spatrick       // Found a Pred block can be placed before Top.
230509467b48Spatrick       // Check if Top is the best successor of Pred.
230609467b48Spatrick       auto TopProb = MBPI->getEdgeProbability(Pred, Top);
230709467b48Spatrick       bool TopOK = true;
230809467b48Spatrick       for (MachineBasicBlock *Succ : Pred->successors()) {
230909467b48Spatrick         auto SuccProb = MBPI->getEdgeProbability(Pred, Succ);
231009467b48Spatrick         BlockChain *SuccChain = BlockToChain[Succ];
231109467b48Spatrick         // Check if Succ can be placed after Pred.
231209467b48Spatrick         // Succ should not be in any chain, or it is the head of some chain.
231309467b48Spatrick         if ((!SuccChain || Succ == *SuccChain->begin()) && SuccProb > TopProb) {
231409467b48Spatrick           TopOK = false;
231509467b48Spatrick           break;
231609467b48Spatrick         }
231709467b48Spatrick       }
231809467b48Spatrick       if (TopOK)
231909467b48Spatrick         return true;
232009467b48Spatrick     }
232109467b48Spatrick   }
232209467b48Spatrick   return false;
232309467b48Spatrick }
232409467b48Spatrick 
232509467b48Spatrick /// Attempt to rotate an exiting block to the bottom of the loop.
232609467b48Spatrick ///
232709467b48Spatrick /// Once we have built a chain, try to rotate it to line up the hot exit block
232809467b48Spatrick /// with fallthrough out of the loop if doing so doesn't introduce unnecessary
232909467b48Spatrick /// branches. For example, if the loop has fallthrough into its header and out
233009467b48Spatrick /// of its bottom already, don't rotate it.
rotateLoop(BlockChain & LoopChain,const MachineBasicBlock * ExitingBB,BlockFrequency ExitFreq,const BlockFilterSet & LoopBlockSet)233109467b48Spatrick void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain,
233209467b48Spatrick                                        const MachineBasicBlock *ExitingBB,
233309467b48Spatrick                                        BlockFrequency ExitFreq,
233409467b48Spatrick                                        const BlockFilterSet &LoopBlockSet) {
233509467b48Spatrick   if (!ExitingBB)
233609467b48Spatrick     return;
233709467b48Spatrick 
233809467b48Spatrick   MachineBasicBlock *Top = *LoopChain.begin();
233909467b48Spatrick   MachineBasicBlock *Bottom = *std::prev(LoopChain.end());
234009467b48Spatrick 
234109467b48Spatrick   // If ExitingBB is already the last one in a chain then nothing to do.
234209467b48Spatrick   if (Bottom == ExitingBB)
234309467b48Spatrick     return;
234409467b48Spatrick 
234573471bf0Spatrick   // The entry block should always be the first BB in a function.
234673471bf0Spatrick   if (Top->isEntryBlock())
234773471bf0Spatrick     return;
234873471bf0Spatrick 
234909467b48Spatrick   bool ViableTopFallthrough = hasViableTopFallthrough(Top, LoopBlockSet);
235009467b48Spatrick 
235109467b48Spatrick   // If the header has viable fallthrough, check whether the current loop
235209467b48Spatrick   // bottom is a viable exiting block. If so, bail out as rotating will
235309467b48Spatrick   // introduce an unnecessary branch.
235409467b48Spatrick   if (ViableTopFallthrough) {
235509467b48Spatrick     for (MachineBasicBlock *Succ : Bottom->successors()) {
235609467b48Spatrick       BlockChain *SuccChain = BlockToChain[Succ];
235709467b48Spatrick       if (!LoopBlockSet.count(Succ) &&
235809467b48Spatrick           (!SuccChain || Succ == *SuccChain->begin()))
235909467b48Spatrick         return;
236009467b48Spatrick     }
236109467b48Spatrick 
236209467b48Spatrick     // Rotate will destroy the top fallthrough, we need to ensure the new exit
236309467b48Spatrick     // frequency is larger than top fallthrough.
236409467b48Spatrick     BlockFrequency FallThrough2Top = TopFallThroughFreq(Top, LoopBlockSet);
236509467b48Spatrick     if (FallThrough2Top >= ExitFreq)
236609467b48Spatrick       return;
236709467b48Spatrick   }
236809467b48Spatrick 
236909467b48Spatrick   BlockChain::iterator ExitIt = llvm::find(LoopChain, ExitingBB);
237009467b48Spatrick   if (ExitIt == LoopChain.end())
237109467b48Spatrick     return;
237209467b48Spatrick 
237309467b48Spatrick   // Rotating a loop exit to the bottom when there is a fallthrough to top
237409467b48Spatrick   // trades the entry fallthrough for an exit fallthrough.
237509467b48Spatrick   // If there is no bottom->top edge, but the chosen exit block does have
237609467b48Spatrick   // a fallthrough, we break that fallthrough for nothing in return.
237709467b48Spatrick 
237809467b48Spatrick   // Let's consider an example. We have a built chain of basic blocks
237909467b48Spatrick   // B1, B2, ..., Bn, where Bk is a ExitingBB - chosen exit block.
238009467b48Spatrick   // By doing a rotation we get
238109467b48Spatrick   // Bk+1, ..., Bn, B1, ..., Bk
238209467b48Spatrick   // Break of fallthrough to B1 is compensated by a fallthrough from Bk.
238309467b48Spatrick   // If we had a fallthrough Bk -> Bk+1 it is broken now.
238409467b48Spatrick   // It might be compensated by fallthrough Bn -> B1.
238509467b48Spatrick   // So we have a condition to avoid creation of extra branch by loop rotation.
238609467b48Spatrick   // All below must be true to avoid loop rotation:
238709467b48Spatrick   //   If there is a fallthrough to top (B1)
238809467b48Spatrick   //   There was fallthrough from chosen exit block (Bk) to next one (Bk+1)
238909467b48Spatrick   //   There is no fallthrough from bottom (Bn) to top (B1).
239009467b48Spatrick   // Please note that there is no exit fallthrough from Bn because we checked it
239109467b48Spatrick   // above.
239209467b48Spatrick   if (ViableTopFallthrough) {
239309467b48Spatrick     assert(std::next(ExitIt) != LoopChain.end() &&
239409467b48Spatrick            "Exit should not be last BB");
239509467b48Spatrick     MachineBasicBlock *NextBlockInChain = *std::next(ExitIt);
239609467b48Spatrick     if (ExitingBB->isSuccessor(NextBlockInChain))
239709467b48Spatrick       if (!Bottom->isSuccessor(Top))
239809467b48Spatrick         return;
239909467b48Spatrick   }
240009467b48Spatrick 
240109467b48Spatrick   LLVM_DEBUG(dbgs() << "Rotating loop to put exit " << getBlockName(ExitingBB)
240209467b48Spatrick                     << " at bottom\n");
240309467b48Spatrick   std::rotate(LoopChain.begin(), std::next(ExitIt), LoopChain.end());
240409467b48Spatrick }
240509467b48Spatrick 
240609467b48Spatrick /// Attempt to rotate a loop based on profile data to reduce branch cost.
240709467b48Spatrick ///
240809467b48Spatrick /// With profile data, we can determine the cost in terms of missed fall through
240909467b48Spatrick /// opportunities when rotating a loop chain and select the best rotation.
241009467b48Spatrick /// Basically, there are three kinds of cost to consider for each rotation:
241109467b48Spatrick ///    1. The possibly missed fall through edge (if it exists) from BB out of
241209467b48Spatrick ///    the loop to the loop header.
241309467b48Spatrick ///    2. The possibly missed fall through edges (if they exist) from the loop
241409467b48Spatrick ///    exits to BB out of the loop.
241509467b48Spatrick ///    3. The missed fall through edge (if it exists) from the last BB to the
241609467b48Spatrick ///    first BB in the loop chain.
241709467b48Spatrick ///  Therefore, the cost for a given rotation is the sum of costs listed above.
241809467b48Spatrick ///  We select the best rotation with the smallest cost.
rotateLoopWithProfile(BlockChain & LoopChain,const MachineLoop & L,const BlockFilterSet & LoopBlockSet)241909467b48Spatrick void MachineBlockPlacement::rotateLoopWithProfile(
242009467b48Spatrick     BlockChain &LoopChain, const MachineLoop &L,
242109467b48Spatrick     const BlockFilterSet &LoopBlockSet) {
242209467b48Spatrick   auto RotationPos = LoopChain.end();
242373471bf0Spatrick   MachineBasicBlock *ChainHeaderBB = *LoopChain.begin();
242473471bf0Spatrick 
242573471bf0Spatrick   // The entry block should always be the first BB in a function.
242673471bf0Spatrick   if (ChainHeaderBB->isEntryBlock())
242773471bf0Spatrick     return;
242809467b48Spatrick 
242909467b48Spatrick   BlockFrequency SmallestRotationCost = BlockFrequency::getMaxFrequency();
243009467b48Spatrick 
243109467b48Spatrick   // A utility lambda that scales up a block frequency by dividing it by a
243209467b48Spatrick   // branch probability which is the reciprocal of the scale.
243309467b48Spatrick   auto ScaleBlockFrequency = [](BlockFrequency Freq,
243409467b48Spatrick                                 unsigned Scale) -> BlockFrequency {
243509467b48Spatrick     if (Scale == 0)
243609467b48Spatrick       return 0;
243709467b48Spatrick     // Use operator / between BlockFrequency and BranchProbability to implement
243809467b48Spatrick     // saturating multiplication.
243909467b48Spatrick     return Freq / BranchProbability(1, Scale);
244009467b48Spatrick   };
244109467b48Spatrick 
244209467b48Spatrick   // Compute the cost of the missed fall-through edge to the loop header if the
244309467b48Spatrick   // chain head is not the loop header. As we only consider natural loops with
244409467b48Spatrick   // single header, this computation can be done only once.
244509467b48Spatrick   BlockFrequency HeaderFallThroughCost(0);
244609467b48Spatrick   for (auto *Pred : ChainHeaderBB->predecessors()) {
244709467b48Spatrick     BlockChain *PredChain = BlockToChain[Pred];
244809467b48Spatrick     if (!LoopBlockSet.count(Pred) &&
244909467b48Spatrick         (!PredChain || Pred == *std::prev(PredChain->end()))) {
245009467b48Spatrick       auto EdgeFreq = MBFI->getBlockFreq(Pred) *
245109467b48Spatrick           MBPI->getEdgeProbability(Pred, ChainHeaderBB);
245209467b48Spatrick       auto FallThruCost = ScaleBlockFrequency(EdgeFreq, MisfetchCost);
245309467b48Spatrick       // If the predecessor has only an unconditional jump to the header, we
245409467b48Spatrick       // need to consider the cost of this jump.
245509467b48Spatrick       if (Pred->succ_size() == 1)
245609467b48Spatrick         FallThruCost += ScaleBlockFrequency(EdgeFreq, JumpInstCost);
245709467b48Spatrick       HeaderFallThroughCost = std::max(HeaderFallThroughCost, FallThruCost);
245809467b48Spatrick     }
245909467b48Spatrick   }
246009467b48Spatrick 
246109467b48Spatrick   // Here we collect all exit blocks in the loop, and for each exit we find out
246209467b48Spatrick   // its hottest exit edge. For each loop rotation, we define the loop exit cost
246309467b48Spatrick   // as the sum of frequencies of exit edges we collect here, excluding the exit
246409467b48Spatrick   // edge from the tail of the loop chain.
246509467b48Spatrick   SmallVector<std::pair<MachineBasicBlock *, BlockFrequency>, 4> ExitsWithFreq;
2466*d415bd75Srobert   for (auto *BB : LoopChain) {
246709467b48Spatrick     auto LargestExitEdgeProb = BranchProbability::getZero();
246809467b48Spatrick     for (auto *Succ : BB->successors()) {
246909467b48Spatrick       BlockChain *SuccChain = BlockToChain[Succ];
247009467b48Spatrick       if (!LoopBlockSet.count(Succ) &&
247109467b48Spatrick           (!SuccChain || Succ == *SuccChain->begin())) {
247209467b48Spatrick         auto SuccProb = MBPI->getEdgeProbability(BB, Succ);
247309467b48Spatrick         LargestExitEdgeProb = std::max(LargestExitEdgeProb, SuccProb);
247409467b48Spatrick       }
247509467b48Spatrick     }
247609467b48Spatrick     if (LargestExitEdgeProb > BranchProbability::getZero()) {
247709467b48Spatrick       auto ExitFreq = MBFI->getBlockFreq(BB) * LargestExitEdgeProb;
247809467b48Spatrick       ExitsWithFreq.emplace_back(BB, ExitFreq);
247909467b48Spatrick     }
248009467b48Spatrick   }
248109467b48Spatrick 
248209467b48Spatrick   // In this loop we iterate every block in the loop chain and calculate the
248309467b48Spatrick   // cost assuming the block is the head of the loop chain. When the loop ends,
248409467b48Spatrick   // we should have found the best candidate as the loop chain's head.
248509467b48Spatrick   for (auto Iter = LoopChain.begin(), TailIter = std::prev(LoopChain.end()),
248609467b48Spatrick             EndIter = LoopChain.end();
248709467b48Spatrick        Iter != EndIter; Iter++, TailIter++) {
248809467b48Spatrick     // TailIter is used to track the tail of the loop chain if the block we are
248909467b48Spatrick     // checking (pointed by Iter) is the head of the chain.
249009467b48Spatrick     if (TailIter == LoopChain.end())
249109467b48Spatrick       TailIter = LoopChain.begin();
249209467b48Spatrick 
249309467b48Spatrick     auto TailBB = *TailIter;
249409467b48Spatrick 
249509467b48Spatrick     // Calculate the cost by putting this BB to the top.
249609467b48Spatrick     BlockFrequency Cost = 0;
249709467b48Spatrick 
249809467b48Spatrick     // If the current BB is the loop header, we need to take into account the
249909467b48Spatrick     // cost of the missed fall through edge from outside of the loop to the
250009467b48Spatrick     // header.
250109467b48Spatrick     if (Iter != LoopChain.begin())
250209467b48Spatrick       Cost += HeaderFallThroughCost;
250309467b48Spatrick 
250409467b48Spatrick     // Collect the loop exit cost by summing up frequencies of all exit edges
250509467b48Spatrick     // except the one from the chain tail.
250609467b48Spatrick     for (auto &ExitWithFreq : ExitsWithFreq)
250709467b48Spatrick       if (TailBB != ExitWithFreq.first)
250809467b48Spatrick         Cost += ExitWithFreq.second;
250909467b48Spatrick 
251009467b48Spatrick     // The cost of breaking the once fall-through edge from the tail to the top
251109467b48Spatrick     // of the loop chain. Here we need to consider three cases:
251209467b48Spatrick     // 1. If the tail node has only one successor, then we will get an
251309467b48Spatrick     //    additional jmp instruction. So the cost here is (MisfetchCost +
251409467b48Spatrick     //    JumpInstCost) * tail node frequency.
251509467b48Spatrick     // 2. If the tail node has two successors, then we may still get an
251609467b48Spatrick     //    additional jmp instruction if the layout successor after the loop
251709467b48Spatrick     //    chain is not its CFG successor. Note that the more frequently executed
251809467b48Spatrick     //    jmp instruction will be put ahead of the other one. Assume the
251909467b48Spatrick     //    frequency of those two branches are x and y, where x is the frequency
252009467b48Spatrick     //    of the edge to the chain head, then the cost will be
252109467b48Spatrick     //    (x * MisfetechCost + min(x, y) * JumpInstCost) * tail node frequency.
252209467b48Spatrick     // 3. If the tail node has more than two successors (this rarely happens),
252309467b48Spatrick     //    we won't consider any additional cost.
252409467b48Spatrick     if (TailBB->isSuccessor(*Iter)) {
252509467b48Spatrick       auto TailBBFreq = MBFI->getBlockFreq(TailBB);
252609467b48Spatrick       if (TailBB->succ_size() == 1)
252709467b48Spatrick         Cost += ScaleBlockFrequency(TailBBFreq.getFrequency(),
252809467b48Spatrick                                     MisfetchCost + JumpInstCost);
252909467b48Spatrick       else if (TailBB->succ_size() == 2) {
253009467b48Spatrick         auto TailToHeadProb = MBPI->getEdgeProbability(TailBB, *Iter);
253109467b48Spatrick         auto TailToHeadFreq = TailBBFreq * TailToHeadProb;
253209467b48Spatrick         auto ColderEdgeFreq = TailToHeadProb > BranchProbability(1, 2)
253309467b48Spatrick                                   ? TailBBFreq * TailToHeadProb.getCompl()
253409467b48Spatrick                                   : TailToHeadFreq;
253509467b48Spatrick         Cost += ScaleBlockFrequency(TailToHeadFreq, MisfetchCost) +
253609467b48Spatrick                 ScaleBlockFrequency(ColderEdgeFreq, JumpInstCost);
253709467b48Spatrick       }
253809467b48Spatrick     }
253909467b48Spatrick 
254009467b48Spatrick     LLVM_DEBUG(dbgs() << "The cost of loop rotation by making "
254109467b48Spatrick                       << getBlockName(*Iter)
254209467b48Spatrick                       << " to the top: " << Cost.getFrequency() << "\n");
254309467b48Spatrick 
254409467b48Spatrick     if (Cost < SmallestRotationCost) {
254509467b48Spatrick       SmallestRotationCost = Cost;
254609467b48Spatrick       RotationPos = Iter;
254709467b48Spatrick     }
254809467b48Spatrick   }
254909467b48Spatrick 
255009467b48Spatrick   if (RotationPos != LoopChain.end()) {
255109467b48Spatrick     LLVM_DEBUG(dbgs() << "Rotate loop by making " << getBlockName(*RotationPos)
255209467b48Spatrick                       << " to the top\n");
255309467b48Spatrick     std::rotate(LoopChain.begin(), RotationPos, LoopChain.end());
255409467b48Spatrick   }
255509467b48Spatrick }
255609467b48Spatrick 
255709467b48Spatrick /// Collect blocks in the given loop that are to be placed.
255809467b48Spatrick ///
255909467b48Spatrick /// When profile data is available, exclude cold blocks from the returned set;
256009467b48Spatrick /// otherwise, collect all blocks in the loop.
256109467b48Spatrick MachineBlockPlacement::BlockFilterSet
collectLoopBlockSet(const MachineLoop & L)256209467b48Spatrick MachineBlockPlacement::collectLoopBlockSet(const MachineLoop &L) {
256309467b48Spatrick   BlockFilterSet LoopBlockSet;
256409467b48Spatrick 
256509467b48Spatrick   // Filter cold blocks off from LoopBlockSet when profile data is available.
256609467b48Spatrick   // Collect the sum of frequencies of incoming edges to the loop header from
256709467b48Spatrick   // outside. If we treat the loop as a super block, this is the frequency of
256809467b48Spatrick   // the loop. Then for each block in the loop, we calculate the ratio between
256909467b48Spatrick   // its frequency and the frequency of the loop block. When it is too small,
257009467b48Spatrick   // don't add it to the loop chain. If there are outer loops, then this block
257109467b48Spatrick   // will be merged into the first outer loop chain for which this block is not
257209467b48Spatrick   // cold anymore. This needs precise profile data and we only do this when
257309467b48Spatrick   // profile data is available.
257409467b48Spatrick   if (F->getFunction().hasProfileData() || ForceLoopColdBlock) {
257509467b48Spatrick     BlockFrequency LoopFreq(0);
2576*d415bd75Srobert     for (auto *LoopPred : L.getHeader()->predecessors())
257709467b48Spatrick       if (!L.contains(LoopPred))
257809467b48Spatrick         LoopFreq += MBFI->getBlockFreq(LoopPred) *
257909467b48Spatrick                     MBPI->getEdgeProbability(LoopPred, L.getHeader());
258009467b48Spatrick 
258109467b48Spatrick     for (MachineBasicBlock *LoopBB : L.getBlocks()) {
258273471bf0Spatrick       if (LoopBlockSet.count(LoopBB))
258373471bf0Spatrick         continue;
258409467b48Spatrick       auto Freq = MBFI->getBlockFreq(LoopBB).getFrequency();
258509467b48Spatrick       if (Freq == 0 || LoopFreq.getFrequency() / Freq > LoopToColdBlockRatio)
258609467b48Spatrick         continue;
258773471bf0Spatrick       BlockChain *Chain = BlockToChain[LoopBB];
258873471bf0Spatrick       for (MachineBasicBlock *ChainBB : *Chain)
258973471bf0Spatrick         LoopBlockSet.insert(ChainBB);
259009467b48Spatrick     }
259109467b48Spatrick   } else
259209467b48Spatrick     LoopBlockSet.insert(L.block_begin(), L.block_end());
259309467b48Spatrick 
259409467b48Spatrick   return LoopBlockSet;
259509467b48Spatrick }
259609467b48Spatrick 
259709467b48Spatrick /// Forms basic block chains from the natural loop structures.
259809467b48Spatrick ///
259909467b48Spatrick /// These chains are designed to preserve the existing *structure* of the code
260009467b48Spatrick /// as much as possible. We can then stitch the chains together in a way which
260109467b48Spatrick /// both preserves the topological structure and minimizes taken conditional
260209467b48Spatrick /// branches.
buildLoopChains(const MachineLoop & L)260309467b48Spatrick void MachineBlockPlacement::buildLoopChains(const MachineLoop &L) {
260409467b48Spatrick   // First recurse through any nested loops, building chains for those inner
260509467b48Spatrick   // loops.
260609467b48Spatrick   for (const MachineLoop *InnerLoop : L)
260709467b48Spatrick     buildLoopChains(*InnerLoop);
260809467b48Spatrick 
260909467b48Spatrick   assert(BlockWorkList.empty() &&
261009467b48Spatrick          "BlockWorkList not empty when starting to build loop chains.");
261109467b48Spatrick   assert(EHPadWorkList.empty() &&
261209467b48Spatrick          "EHPadWorkList not empty when starting to build loop chains.");
261309467b48Spatrick   BlockFilterSet LoopBlockSet = collectLoopBlockSet(L);
261409467b48Spatrick 
261509467b48Spatrick   // Check if we have profile data for this function. If yes, we will rotate
261609467b48Spatrick   // this loop by modeling costs more precisely which requires the profile data
261709467b48Spatrick   // for better layout.
261809467b48Spatrick   bool RotateLoopWithProfile =
261909467b48Spatrick       ForcePreciseRotationCost ||
262009467b48Spatrick       (PreciseRotationCost && F->getFunction().hasProfileData());
262109467b48Spatrick 
262209467b48Spatrick   // First check to see if there is an obviously preferable top block for the
262309467b48Spatrick   // loop. This will default to the header, but may end up as one of the
262409467b48Spatrick   // predecessors to the header if there is one which will result in strictly
262509467b48Spatrick   // fewer branches in the loop body.
262609467b48Spatrick   MachineBasicBlock *LoopTop = findBestLoopTop(L, LoopBlockSet);
262709467b48Spatrick 
262809467b48Spatrick   // If we selected just the header for the loop top, look for a potentially
262909467b48Spatrick   // profitable exit block in the event that rotating the loop can eliminate
263009467b48Spatrick   // branches by placing an exit edge at the bottom.
263109467b48Spatrick   //
263209467b48Spatrick   // Loops are processed innermost to uttermost, make sure we clear
263309467b48Spatrick   // PreferredLoopExit before processing a new loop.
263409467b48Spatrick   PreferredLoopExit = nullptr;
263509467b48Spatrick   BlockFrequency ExitFreq;
263609467b48Spatrick   if (!RotateLoopWithProfile && LoopTop == L.getHeader())
263709467b48Spatrick     PreferredLoopExit = findBestLoopExit(L, LoopBlockSet, ExitFreq);
263809467b48Spatrick 
263909467b48Spatrick   BlockChain &LoopChain = *BlockToChain[LoopTop];
264009467b48Spatrick 
264109467b48Spatrick   // FIXME: This is a really lame way of walking the chains in the loop: we
264209467b48Spatrick   // walk the blocks, and use a set to prevent visiting a particular chain
264309467b48Spatrick   // twice.
264409467b48Spatrick   SmallPtrSet<BlockChain *, 4> UpdatedPreds;
264509467b48Spatrick   assert(LoopChain.UnscheduledPredecessors == 0 &&
264609467b48Spatrick          "LoopChain should not have unscheduled predecessors.");
264709467b48Spatrick   UpdatedPreds.insert(&LoopChain);
264809467b48Spatrick 
264909467b48Spatrick   for (const MachineBasicBlock *LoopBB : LoopBlockSet)
265009467b48Spatrick     fillWorkLists(LoopBB, UpdatedPreds, &LoopBlockSet);
265109467b48Spatrick 
265209467b48Spatrick   buildChain(LoopTop, LoopChain, &LoopBlockSet);
265309467b48Spatrick 
265409467b48Spatrick   if (RotateLoopWithProfile)
265509467b48Spatrick     rotateLoopWithProfile(LoopChain, L, LoopBlockSet);
265609467b48Spatrick   else
265709467b48Spatrick     rotateLoop(LoopChain, PreferredLoopExit, ExitFreq, LoopBlockSet);
265809467b48Spatrick 
265909467b48Spatrick   LLVM_DEBUG({
266009467b48Spatrick     // Crash at the end so we get all of the debugging output first.
266109467b48Spatrick     bool BadLoop = false;
266209467b48Spatrick     if (LoopChain.UnscheduledPredecessors) {
266309467b48Spatrick       BadLoop = true;
266409467b48Spatrick       dbgs() << "Loop chain contains a block without its preds placed!\n"
266509467b48Spatrick              << "  Loop header:  " << getBlockName(*L.block_begin()) << "\n"
266609467b48Spatrick              << "  Chain header: " << getBlockName(*LoopChain.begin()) << "\n";
266709467b48Spatrick     }
266809467b48Spatrick     for (MachineBasicBlock *ChainBB : LoopChain) {
266909467b48Spatrick       dbgs() << "          ... " << getBlockName(ChainBB) << "\n";
267009467b48Spatrick       if (!LoopBlockSet.remove(ChainBB)) {
267109467b48Spatrick         // We don't mark the loop as bad here because there are real situations
267209467b48Spatrick         // where this can occur. For example, with an unanalyzable fallthrough
267309467b48Spatrick         // from a loop block to a non-loop block or vice versa.
267409467b48Spatrick         dbgs() << "Loop chain contains a block not contained by the loop!\n"
267509467b48Spatrick                << "  Loop header:  " << getBlockName(*L.block_begin()) << "\n"
267609467b48Spatrick                << "  Chain header: " << getBlockName(*LoopChain.begin()) << "\n"
267709467b48Spatrick                << "  Bad block:    " << getBlockName(ChainBB) << "\n";
267809467b48Spatrick       }
267909467b48Spatrick     }
268009467b48Spatrick 
268109467b48Spatrick     if (!LoopBlockSet.empty()) {
268209467b48Spatrick       BadLoop = true;
268309467b48Spatrick       for (const MachineBasicBlock *LoopBB : LoopBlockSet)
268409467b48Spatrick         dbgs() << "Loop contains blocks never placed into a chain!\n"
268509467b48Spatrick                << "  Loop header:  " << getBlockName(*L.block_begin()) << "\n"
268609467b48Spatrick                << "  Chain header: " << getBlockName(*LoopChain.begin()) << "\n"
268709467b48Spatrick                << "  Bad block:    " << getBlockName(LoopBB) << "\n";
268809467b48Spatrick     }
268909467b48Spatrick     assert(!BadLoop && "Detected problems with the placement of this loop.");
269009467b48Spatrick   });
269109467b48Spatrick 
269209467b48Spatrick   BlockWorkList.clear();
269309467b48Spatrick   EHPadWorkList.clear();
269409467b48Spatrick }
269509467b48Spatrick 
buildCFGChains()269609467b48Spatrick void MachineBlockPlacement::buildCFGChains() {
269709467b48Spatrick   // Ensure that every BB in the function has an associated chain to simplify
269809467b48Spatrick   // the assumptions of the remaining algorithm.
2699097a140dSpatrick   SmallVector<MachineOperand, 4> Cond; // For analyzeBranch.
270009467b48Spatrick   for (MachineFunction::iterator FI = F->begin(), FE = F->end(); FI != FE;
270109467b48Spatrick        ++FI) {
270209467b48Spatrick     MachineBasicBlock *BB = &*FI;
270309467b48Spatrick     BlockChain *Chain =
270409467b48Spatrick         new (ChainAllocator.Allocate()) BlockChain(BlockToChain, BB);
270509467b48Spatrick     // Also, merge any blocks which we cannot reason about and must preserve
270609467b48Spatrick     // the exact fallthrough behavior for.
270709467b48Spatrick     while (true) {
270809467b48Spatrick       Cond.clear();
2709097a140dSpatrick       MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For analyzeBranch.
271009467b48Spatrick       if (!TII->analyzeBranch(*BB, TBB, FBB, Cond) || !FI->canFallThrough())
271109467b48Spatrick         break;
271209467b48Spatrick 
271309467b48Spatrick       MachineFunction::iterator NextFI = std::next(FI);
271409467b48Spatrick       MachineBasicBlock *NextBB = &*NextFI;
271509467b48Spatrick       // Ensure that the layout successor is a viable block, as we know that
271609467b48Spatrick       // fallthrough is a possibility.
271709467b48Spatrick       assert(NextFI != FE && "Can't fallthrough past the last block.");
271809467b48Spatrick       LLVM_DEBUG(dbgs() << "Pre-merging due to unanalyzable fallthrough: "
271909467b48Spatrick                         << getBlockName(BB) << " -> " << getBlockName(NextBB)
272009467b48Spatrick                         << "\n");
272109467b48Spatrick       Chain->merge(NextBB, nullptr);
272209467b48Spatrick #ifndef NDEBUG
272309467b48Spatrick       BlocksWithUnanalyzableExits.insert(&*BB);
272409467b48Spatrick #endif
272509467b48Spatrick       FI = NextFI;
272609467b48Spatrick       BB = NextBB;
272709467b48Spatrick     }
272809467b48Spatrick   }
272909467b48Spatrick 
273009467b48Spatrick   // Build any loop-based chains.
273109467b48Spatrick   PreferredLoopExit = nullptr;
273209467b48Spatrick   for (MachineLoop *L : *MLI)
273309467b48Spatrick     buildLoopChains(*L);
273409467b48Spatrick 
273509467b48Spatrick   assert(BlockWorkList.empty() &&
273609467b48Spatrick          "BlockWorkList should be empty before building final chain.");
273709467b48Spatrick   assert(EHPadWorkList.empty() &&
273809467b48Spatrick          "EHPadWorkList should be empty before building final chain.");
273909467b48Spatrick 
274009467b48Spatrick   SmallPtrSet<BlockChain *, 4> UpdatedPreds;
274109467b48Spatrick   for (MachineBasicBlock &MBB : *F)
274209467b48Spatrick     fillWorkLists(&MBB, UpdatedPreds);
274309467b48Spatrick 
274409467b48Spatrick   BlockChain &FunctionChain = *BlockToChain[&F->front()];
274509467b48Spatrick   buildChain(&F->front(), FunctionChain);
274609467b48Spatrick 
274709467b48Spatrick #ifndef NDEBUG
274809467b48Spatrick   using FunctionBlockSetType = SmallPtrSet<MachineBasicBlock *, 16>;
274909467b48Spatrick #endif
275009467b48Spatrick   LLVM_DEBUG({
275109467b48Spatrick     // Crash at the end so we get all of the debugging output first.
275209467b48Spatrick     bool BadFunc = false;
275309467b48Spatrick     FunctionBlockSetType FunctionBlockSet;
275409467b48Spatrick     for (MachineBasicBlock &MBB : *F)
275509467b48Spatrick       FunctionBlockSet.insert(&MBB);
275609467b48Spatrick 
275709467b48Spatrick     for (MachineBasicBlock *ChainBB : FunctionChain)
275809467b48Spatrick       if (!FunctionBlockSet.erase(ChainBB)) {
275909467b48Spatrick         BadFunc = true;
276009467b48Spatrick         dbgs() << "Function chain contains a block not in the function!\n"
276109467b48Spatrick                << "  Bad block:    " << getBlockName(ChainBB) << "\n";
276209467b48Spatrick       }
276309467b48Spatrick 
276409467b48Spatrick     if (!FunctionBlockSet.empty()) {
276509467b48Spatrick       BadFunc = true;
276609467b48Spatrick       for (MachineBasicBlock *RemainingBB : FunctionBlockSet)
276709467b48Spatrick         dbgs() << "Function contains blocks never placed into a chain!\n"
276809467b48Spatrick                << "  Bad block:    " << getBlockName(RemainingBB) << "\n";
276909467b48Spatrick     }
277009467b48Spatrick     assert(!BadFunc && "Detected problems with the block placement.");
277109467b48Spatrick   });
277209467b48Spatrick 
2773097a140dSpatrick   // Remember original layout ordering, so we can update terminators after
2774097a140dSpatrick   // reordering to point to the original layout successor.
2775097a140dSpatrick   SmallVector<MachineBasicBlock *, 4> OriginalLayoutSuccessors(
2776097a140dSpatrick       F->getNumBlockIDs());
2777097a140dSpatrick   {
2778097a140dSpatrick     MachineBasicBlock *LastMBB = nullptr;
2779097a140dSpatrick     for (auto &MBB : *F) {
2780097a140dSpatrick       if (LastMBB != nullptr)
2781097a140dSpatrick         OriginalLayoutSuccessors[LastMBB->getNumber()] = &MBB;
2782097a140dSpatrick       LastMBB = &MBB;
2783097a140dSpatrick     }
2784097a140dSpatrick     OriginalLayoutSuccessors[F->back().getNumber()] = nullptr;
2785097a140dSpatrick   }
2786097a140dSpatrick 
278709467b48Spatrick   // Splice the blocks into place.
278809467b48Spatrick   MachineFunction::iterator InsertPos = F->begin();
278909467b48Spatrick   LLVM_DEBUG(dbgs() << "[MBP] Function: " << F->getName() << "\n");
279009467b48Spatrick   for (MachineBasicBlock *ChainBB : FunctionChain) {
279109467b48Spatrick     LLVM_DEBUG(dbgs() << (ChainBB == *FunctionChain.begin() ? "Placing chain "
279209467b48Spatrick                                                             : "          ... ")
279309467b48Spatrick                       << getBlockName(ChainBB) << "\n");
279409467b48Spatrick     if (InsertPos != MachineFunction::iterator(ChainBB))
279509467b48Spatrick       F->splice(InsertPos, ChainBB);
279609467b48Spatrick     else
279709467b48Spatrick       ++InsertPos;
279809467b48Spatrick 
279909467b48Spatrick     // Update the terminator of the previous block.
280009467b48Spatrick     if (ChainBB == *FunctionChain.begin())
280109467b48Spatrick       continue;
280209467b48Spatrick     MachineBasicBlock *PrevBB = &*std::prev(MachineFunction::iterator(ChainBB));
280309467b48Spatrick 
280409467b48Spatrick     // FIXME: It would be awesome of updateTerminator would just return rather
280509467b48Spatrick     // than assert when the branch cannot be analyzed in order to remove this
280609467b48Spatrick     // boiler plate.
280709467b48Spatrick     Cond.clear();
2808097a140dSpatrick     MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For analyzeBranch.
280909467b48Spatrick 
281009467b48Spatrick #ifndef NDEBUG
281109467b48Spatrick     if (!BlocksWithUnanalyzableExits.count(PrevBB)) {
281209467b48Spatrick       // Given the exact block placement we chose, we may actually not _need_ to
281309467b48Spatrick       // be able to edit PrevBB's terminator sequence, but not being _able_ to
281409467b48Spatrick       // do that at this point is a bug.
281509467b48Spatrick       assert((!TII->analyzeBranch(*PrevBB, TBB, FBB, Cond) ||
281609467b48Spatrick               !PrevBB->canFallThrough()) &&
281709467b48Spatrick              "Unexpected block with un-analyzable fallthrough!");
281809467b48Spatrick       Cond.clear();
281909467b48Spatrick       TBB = FBB = nullptr;
282009467b48Spatrick     }
282109467b48Spatrick #endif
282209467b48Spatrick 
282309467b48Spatrick     // The "PrevBB" is not yet updated to reflect current code layout, so,
282409467b48Spatrick     //   o. it may fall-through to a block without explicit "goto" instruction
282509467b48Spatrick     //      before layout, and no longer fall-through it after layout; or
282609467b48Spatrick     //   o. just opposite.
282709467b48Spatrick     //
282809467b48Spatrick     // analyzeBranch() may return erroneous value for FBB when these two
282909467b48Spatrick     // situations take place. For the first scenario FBB is mistakenly set NULL;
283009467b48Spatrick     // for the 2nd scenario, the FBB, which is expected to be NULL, is
283109467b48Spatrick     // mistakenly pointing to "*BI".
283209467b48Spatrick     // Thus, if the future change needs to use FBB before the layout is set, it
283309467b48Spatrick     // has to correct FBB first by using the code similar to the following:
283409467b48Spatrick     //
283509467b48Spatrick     // if (!Cond.empty() && (!FBB || FBB == ChainBB)) {
283609467b48Spatrick     //   PrevBB->updateTerminator();
283709467b48Spatrick     //   Cond.clear();
283809467b48Spatrick     //   TBB = FBB = nullptr;
283909467b48Spatrick     //   if (TII->analyzeBranch(*PrevBB, TBB, FBB, Cond)) {
284009467b48Spatrick     //     // FIXME: This should never take place.
284109467b48Spatrick     //     TBB = FBB = nullptr;
284209467b48Spatrick     //   }
284309467b48Spatrick     // }
2844097a140dSpatrick     if (!TII->analyzeBranch(*PrevBB, TBB, FBB, Cond)) {
2845097a140dSpatrick       PrevBB->updateTerminator(OriginalLayoutSuccessors[PrevBB->getNumber()]);
2846097a140dSpatrick     }
284709467b48Spatrick   }
284809467b48Spatrick 
284909467b48Spatrick   // Fixup the last block.
285009467b48Spatrick   Cond.clear();
2851097a140dSpatrick   MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For analyzeBranch.
2852097a140dSpatrick   if (!TII->analyzeBranch(F->back(), TBB, FBB, Cond)) {
2853097a140dSpatrick     MachineBasicBlock *PrevBB = &F->back();
2854097a140dSpatrick     PrevBB->updateTerminator(OriginalLayoutSuccessors[PrevBB->getNumber()]);
2855097a140dSpatrick   }
285609467b48Spatrick 
285709467b48Spatrick   BlockWorkList.clear();
285809467b48Spatrick   EHPadWorkList.clear();
285909467b48Spatrick }
286009467b48Spatrick 
optimizeBranches()286109467b48Spatrick void MachineBlockPlacement::optimizeBranches() {
286209467b48Spatrick   BlockChain &FunctionChain = *BlockToChain[&F->front()];
2863097a140dSpatrick   SmallVector<MachineOperand, 4> Cond; // For analyzeBranch.
286409467b48Spatrick 
286509467b48Spatrick   // Now that all the basic blocks in the chain have the proper layout,
2866097a140dSpatrick   // make a final call to analyzeBranch with AllowModify set.
286709467b48Spatrick   // Indeed, the target may be able to optimize the branches in a way we
286809467b48Spatrick   // cannot because all branches may not be analyzable.
286909467b48Spatrick   // E.g., the target may be able to remove an unconditional branch to
287009467b48Spatrick   // a fallthrough when it occurs after predicated terminators.
287109467b48Spatrick   for (MachineBasicBlock *ChainBB : FunctionChain) {
287209467b48Spatrick     Cond.clear();
2873097a140dSpatrick     MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For analyzeBranch.
287409467b48Spatrick     if (!TII->analyzeBranch(*ChainBB, TBB, FBB, Cond, /*AllowModify*/ true)) {
287509467b48Spatrick       // If PrevBB has a two-way branch, try to re-order the branches
287609467b48Spatrick       // such that we branch to the successor with higher probability first.
287709467b48Spatrick       if (TBB && !Cond.empty() && FBB &&
287809467b48Spatrick           MBPI->getEdgeProbability(ChainBB, FBB) >
287909467b48Spatrick               MBPI->getEdgeProbability(ChainBB, TBB) &&
288009467b48Spatrick           !TII->reverseBranchCondition(Cond)) {
288109467b48Spatrick         LLVM_DEBUG(dbgs() << "Reverse order of the two branches: "
288209467b48Spatrick                           << getBlockName(ChainBB) << "\n");
288309467b48Spatrick         LLVM_DEBUG(dbgs() << "    Edge probability: "
288409467b48Spatrick                           << MBPI->getEdgeProbability(ChainBB, FBB) << " vs "
288509467b48Spatrick                           << MBPI->getEdgeProbability(ChainBB, TBB) << "\n");
288609467b48Spatrick         DebugLoc dl; // FIXME: this is nowhere
288709467b48Spatrick         TII->removeBranch(*ChainBB);
288809467b48Spatrick         TII->insertBranch(*ChainBB, FBB, TBB, Cond, dl);
288909467b48Spatrick       }
289009467b48Spatrick     }
289109467b48Spatrick   }
289209467b48Spatrick }
289309467b48Spatrick 
alignBlocks()289409467b48Spatrick void MachineBlockPlacement::alignBlocks() {
289509467b48Spatrick   // Walk through the backedges of the function now that we have fully laid out
289609467b48Spatrick   // the basic blocks and align the destination of each backedge. We don't rely
289709467b48Spatrick   // exclusively on the loop info here so that we can align backedges in
289809467b48Spatrick   // unnatural CFGs and backedges that were introduced purely because of the
289909467b48Spatrick   // loop rotations done during this layout pass.
290009467b48Spatrick   if (F->getFunction().hasMinSize() ||
290109467b48Spatrick       (F->getFunction().hasOptSize() && !TLI->alignLoopsWithOptSize()))
290209467b48Spatrick     return;
290309467b48Spatrick   BlockChain &FunctionChain = *BlockToChain[&F->front()];
290409467b48Spatrick   if (FunctionChain.begin() == FunctionChain.end())
290509467b48Spatrick     return; // Empty chain.
290609467b48Spatrick 
290709467b48Spatrick   const BranchProbability ColdProb(1, 5); // 20%
290809467b48Spatrick   BlockFrequency EntryFreq = MBFI->getBlockFreq(&F->front());
290909467b48Spatrick   BlockFrequency WeightedEntryFreq = EntryFreq * ColdProb;
291009467b48Spatrick   for (MachineBasicBlock *ChainBB : FunctionChain) {
291109467b48Spatrick     if (ChainBB == *FunctionChain.begin())
291209467b48Spatrick       continue;
291309467b48Spatrick 
291409467b48Spatrick     // Don't align non-looping basic blocks. These are unlikely to execute
291509467b48Spatrick     // enough times to matter in practice. Note that we'll still handle
291609467b48Spatrick     // unnatural CFGs inside of a natural outer loop (the common case) and
291709467b48Spatrick     // rotated loops.
291809467b48Spatrick     MachineLoop *L = MLI->getLoopFor(ChainBB);
291909467b48Spatrick     if (!L)
292009467b48Spatrick       continue;
292109467b48Spatrick 
292209467b48Spatrick     const Align Align = TLI->getPrefLoopAlignment(L);
292309467b48Spatrick     if (Align == 1)
292409467b48Spatrick       continue; // Don't care about loop alignment.
292509467b48Spatrick 
292609467b48Spatrick     // If the block is cold relative to the function entry don't waste space
292709467b48Spatrick     // aligning it.
292809467b48Spatrick     BlockFrequency Freq = MBFI->getBlockFreq(ChainBB);
292909467b48Spatrick     if (Freq < WeightedEntryFreq)
293009467b48Spatrick       continue;
293109467b48Spatrick 
293209467b48Spatrick     // If the block is cold relative to its loop header, don't align it
293309467b48Spatrick     // regardless of what edges into the block exist.
293409467b48Spatrick     MachineBasicBlock *LoopHeader = L->getHeader();
293509467b48Spatrick     BlockFrequency LoopHeaderFreq = MBFI->getBlockFreq(LoopHeader);
293609467b48Spatrick     if (Freq < (LoopHeaderFreq * ColdProb))
293709467b48Spatrick       continue;
293809467b48Spatrick 
293909467b48Spatrick     // If the global profiles indicates so, don't align it.
2940097a140dSpatrick     if (llvm::shouldOptimizeForSize(ChainBB, PSI, MBFI.get()) &&
294109467b48Spatrick         !TLI->alignLoopsWithOptSize())
294209467b48Spatrick       continue;
294309467b48Spatrick 
294409467b48Spatrick     // Check for the existence of a non-layout predecessor which would benefit
294509467b48Spatrick     // from aligning this block.
294609467b48Spatrick     MachineBasicBlock *LayoutPred =
294709467b48Spatrick         &*std::prev(MachineFunction::iterator(ChainBB));
294809467b48Spatrick 
2949*d415bd75Srobert     auto DetermineMaxAlignmentPadding = [&]() {
2950*d415bd75Srobert       // Set the maximum bytes allowed to be emitted for alignment.
2951*d415bd75Srobert       unsigned MaxBytes;
2952*d415bd75Srobert       if (MaxBytesForAlignmentOverride.getNumOccurrences() > 0)
2953*d415bd75Srobert         MaxBytes = MaxBytesForAlignmentOverride;
2954*d415bd75Srobert       else
2955*d415bd75Srobert         MaxBytes = TLI->getMaxPermittedBytesForAlignment(ChainBB);
2956*d415bd75Srobert       ChainBB->setMaxBytesForAlignment(MaxBytes);
2957*d415bd75Srobert     };
2958*d415bd75Srobert 
295909467b48Spatrick     // Force alignment if all the predecessors are jumps. We already checked
296009467b48Spatrick     // that the block isn't cold above.
296109467b48Spatrick     if (!LayoutPred->isSuccessor(ChainBB)) {
296209467b48Spatrick       ChainBB->setAlignment(Align);
2963*d415bd75Srobert       DetermineMaxAlignmentPadding();
296409467b48Spatrick       continue;
296509467b48Spatrick     }
296609467b48Spatrick 
296709467b48Spatrick     // Align this block if the layout predecessor's edge into this block is
296809467b48Spatrick     // cold relative to the block. When this is true, other predecessors make up
296909467b48Spatrick     // all of the hot entries into the block and thus alignment is likely to be
297009467b48Spatrick     // important.
297109467b48Spatrick     BranchProbability LayoutProb =
297209467b48Spatrick         MBPI->getEdgeProbability(LayoutPred, ChainBB);
297309467b48Spatrick     BlockFrequency LayoutEdgeFreq = MBFI->getBlockFreq(LayoutPred) * LayoutProb;
2974*d415bd75Srobert     if (LayoutEdgeFreq <= (Freq * ColdProb)) {
297509467b48Spatrick       ChainBB->setAlignment(Align);
2976*d415bd75Srobert       DetermineMaxAlignmentPadding();
2977*d415bd75Srobert     }
297809467b48Spatrick   }
297909467b48Spatrick }
298009467b48Spatrick 
298109467b48Spatrick /// Tail duplicate \p BB into (some) predecessors if profitable, repeating if
298209467b48Spatrick /// it was duplicated into its chain predecessor and removed.
298309467b48Spatrick /// \p BB    - Basic block that may be duplicated.
298409467b48Spatrick ///
298509467b48Spatrick /// \p LPred - Chosen layout predecessor of \p BB.
298609467b48Spatrick ///            Updated to be the chain end if LPred is removed.
298709467b48Spatrick /// \p Chain - Chain to which \p LPred belongs, and \p BB will belong.
298809467b48Spatrick /// \p BlockFilter - Set of blocks that belong to the loop being laid out.
298909467b48Spatrick ///                  Used to identify which blocks to update predecessor
299009467b48Spatrick ///                  counts.
299109467b48Spatrick /// \p PrevUnplacedBlockIt - Iterator pointing to the last block that was
299209467b48Spatrick ///                          chosen in the given order due to unnatural CFG
299309467b48Spatrick ///                          only needed if \p BB is removed and
299409467b48Spatrick ///                          \p PrevUnplacedBlockIt pointed to \p BB.
299509467b48Spatrick /// @return true if \p BB was removed.
repeatedlyTailDuplicateBlock(MachineBasicBlock * BB,MachineBasicBlock * & LPred,const MachineBasicBlock * LoopHeaderBB,BlockChain & Chain,BlockFilterSet * BlockFilter,MachineFunction::iterator & PrevUnplacedBlockIt)299609467b48Spatrick bool MachineBlockPlacement::repeatedlyTailDuplicateBlock(
299709467b48Spatrick     MachineBasicBlock *BB, MachineBasicBlock *&LPred,
299809467b48Spatrick     const MachineBasicBlock *LoopHeaderBB,
299909467b48Spatrick     BlockChain &Chain, BlockFilterSet *BlockFilter,
300009467b48Spatrick     MachineFunction::iterator &PrevUnplacedBlockIt) {
300109467b48Spatrick   bool Removed, DuplicatedToLPred;
300209467b48Spatrick   bool DuplicatedToOriginalLPred;
300309467b48Spatrick   Removed = maybeTailDuplicateBlock(BB, LPred, Chain, BlockFilter,
300409467b48Spatrick                                     PrevUnplacedBlockIt,
300509467b48Spatrick                                     DuplicatedToLPred);
300609467b48Spatrick   if (!Removed)
300709467b48Spatrick     return false;
300809467b48Spatrick   DuplicatedToOriginalLPred = DuplicatedToLPred;
300909467b48Spatrick   // Iteratively try to duplicate again. It can happen that a block that is
301009467b48Spatrick   // duplicated into is still small enough to be duplicated again.
301109467b48Spatrick   // No need to call markBlockSuccessors in this case, as the blocks being
301209467b48Spatrick   // duplicated from here on are already scheduled.
3013097a140dSpatrick   while (DuplicatedToLPred && Removed) {
301409467b48Spatrick     MachineBasicBlock *DupBB, *DupPred;
301509467b48Spatrick     // The removal callback causes Chain.end() to be updated when a block is
301609467b48Spatrick     // removed. On the first pass through the loop, the chain end should be the
301709467b48Spatrick     // same as it was on function entry. On subsequent passes, because we are
301809467b48Spatrick     // duplicating the block at the end of the chain, if it is removed the
301909467b48Spatrick     // chain will have shrunk by one block.
302009467b48Spatrick     BlockChain::iterator ChainEnd = Chain.end();
302109467b48Spatrick     DupBB = *(--ChainEnd);
302209467b48Spatrick     // Now try to duplicate again.
302309467b48Spatrick     if (ChainEnd == Chain.begin())
302409467b48Spatrick       break;
302509467b48Spatrick     DupPred = *std::prev(ChainEnd);
302609467b48Spatrick     Removed = maybeTailDuplicateBlock(DupBB, DupPred, Chain, BlockFilter,
302709467b48Spatrick                                       PrevUnplacedBlockIt,
302809467b48Spatrick                                       DuplicatedToLPred);
302909467b48Spatrick   }
303009467b48Spatrick   // If BB was duplicated into LPred, it is now scheduled. But because it was
303109467b48Spatrick   // removed, markChainSuccessors won't be called for its chain. Instead we
303209467b48Spatrick   // call markBlockSuccessors for LPred to achieve the same effect. This must go
303309467b48Spatrick   // at the end because repeating the tail duplication can increase the number
303409467b48Spatrick   // of unscheduled predecessors.
303509467b48Spatrick   LPred = *std::prev(Chain.end());
303609467b48Spatrick   if (DuplicatedToOriginalLPred)
303709467b48Spatrick     markBlockSuccessors(Chain, LPred, LoopHeaderBB, BlockFilter);
303809467b48Spatrick   return true;
303909467b48Spatrick }
304009467b48Spatrick 
304109467b48Spatrick /// Tail duplicate \p BB into (some) predecessors if profitable.
304209467b48Spatrick /// \p BB    - Basic block that may be duplicated
304309467b48Spatrick /// \p LPred - Chosen layout predecessor of \p BB
304409467b48Spatrick /// \p Chain - Chain to which \p LPred belongs, and \p BB will belong.
304509467b48Spatrick /// \p BlockFilter - Set of blocks that belong to the loop being laid out.
304609467b48Spatrick ///                  Used to identify which blocks to update predecessor
304709467b48Spatrick ///                  counts.
304809467b48Spatrick /// \p PrevUnplacedBlockIt - Iterator pointing to the last block that was
304909467b48Spatrick ///                          chosen in the given order due to unnatural CFG
305009467b48Spatrick ///                          only needed if \p BB is removed and
305109467b48Spatrick ///                          \p PrevUnplacedBlockIt pointed to \p BB.
3052097a140dSpatrick /// \p DuplicatedToLPred - True if the block was duplicated into LPred.
305309467b48Spatrick /// \return  - True if the block was duplicated into all preds and removed.
maybeTailDuplicateBlock(MachineBasicBlock * BB,MachineBasicBlock * LPred,BlockChain & Chain,BlockFilterSet * BlockFilter,MachineFunction::iterator & PrevUnplacedBlockIt,bool & DuplicatedToLPred)305409467b48Spatrick bool MachineBlockPlacement::maybeTailDuplicateBlock(
305509467b48Spatrick     MachineBasicBlock *BB, MachineBasicBlock *LPred,
305609467b48Spatrick     BlockChain &Chain, BlockFilterSet *BlockFilter,
305709467b48Spatrick     MachineFunction::iterator &PrevUnplacedBlockIt,
305809467b48Spatrick     bool &DuplicatedToLPred) {
305909467b48Spatrick   DuplicatedToLPred = false;
306009467b48Spatrick   if (!shouldTailDuplicate(BB))
306109467b48Spatrick     return false;
306209467b48Spatrick 
306309467b48Spatrick   LLVM_DEBUG(dbgs() << "Redoing tail duplication for Succ#" << BB->getNumber()
306409467b48Spatrick                     << "\n");
306509467b48Spatrick 
306609467b48Spatrick   // This has to be a callback because none of it can be done after
306709467b48Spatrick   // BB is deleted.
306809467b48Spatrick   bool Removed = false;
306909467b48Spatrick   auto RemovalCallback =
307009467b48Spatrick       [&](MachineBasicBlock *RemBB) {
307109467b48Spatrick         // Signal to outer function
307209467b48Spatrick         Removed = true;
307309467b48Spatrick 
307409467b48Spatrick         // Conservative default.
307509467b48Spatrick         bool InWorkList = true;
307609467b48Spatrick         // Remove from the Chain and Chain Map
307709467b48Spatrick         if (BlockToChain.count(RemBB)) {
307809467b48Spatrick           BlockChain *Chain = BlockToChain[RemBB];
307909467b48Spatrick           InWorkList = Chain->UnscheduledPredecessors == 0;
308009467b48Spatrick           Chain->remove(RemBB);
308109467b48Spatrick           BlockToChain.erase(RemBB);
308209467b48Spatrick         }
308309467b48Spatrick 
308409467b48Spatrick         // Handle the unplaced block iterator
308509467b48Spatrick         if (&(*PrevUnplacedBlockIt) == RemBB) {
308609467b48Spatrick           PrevUnplacedBlockIt++;
308709467b48Spatrick         }
308809467b48Spatrick 
308909467b48Spatrick         // Handle the Work Lists
309009467b48Spatrick         if (InWorkList) {
309109467b48Spatrick           SmallVectorImpl<MachineBasicBlock *> &RemoveList = BlockWorkList;
309209467b48Spatrick           if (RemBB->isEHPad())
309309467b48Spatrick             RemoveList = EHPadWorkList;
309473471bf0Spatrick           llvm::erase_value(RemoveList, RemBB);
309509467b48Spatrick         }
309609467b48Spatrick 
309709467b48Spatrick         // Handle the filter set
309809467b48Spatrick         if (BlockFilter) {
309909467b48Spatrick           BlockFilter->remove(RemBB);
310009467b48Spatrick         }
310109467b48Spatrick 
310209467b48Spatrick         // Remove the block from loop info.
310309467b48Spatrick         MLI->removeBlock(RemBB);
310409467b48Spatrick         if (RemBB == PreferredLoopExit)
310509467b48Spatrick           PreferredLoopExit = nullptr;
310609467b48Spatrick 
310709467b48Spatrick         LLVM_DEBUG(dbgs() << "TailDuplicator deleted block: "
310809467b48Spatrick                           << getBlockName(RemBB) << "\n");
310909467b48Spatrick       };
311009467b48Spatrick   auto RemovalCallbackRef =
311109467b48Spatrick       function_ref<void(MachineBasicBlock*)>(RemovalCallback);
311209467b48Spatrick 
311309467b48Spatrick   SmallVector<MachineBasicBlock *, 8> DuplicatedPreds;
311409467b48Spatrick   bool IsSimple = TailDup.isSimpleBB(BB);
3115097a140dSpatrick   SmallVector<MachineBasicBlock *, 8> CandidatePreds;
3116097a140dSpatrick   SmallVectorImpl<MachineBasicBlock *> *CandidatePtr = nullptr;
3117097a140dSpatrick   if (F->getFunction().hasProfileData()) {
3118097a140dSpatrick     // We can do partial duplication with precise profile information.
3119097a140dSpatrick     findDuplicateCandidates(CandidatePreds, BB, BlockFilter);
3120097a140dSpatrick     if (CandidatePreds.size() == 0)
3121097a140dSpatrick       return false;
3122097a140dSpatrick     if (CandidatePreds.size() < BB->pred_size())
3123097a140dSpatrick       CandidatePtr = &CandidatePreds;
3124097a140dSpatrick   }
3125097a140dSpatrick   TailDup.tailDuplicateAndUpdate(IsSimple, BB, LPred, &DuplicatedPreds,
3126097a140dSpatrick                                  &RemovalCallbackRef, CandidatePtr);
312709467b48Spatrick 
312809467b48Spatrick   // Update UnscheduledPredecessors to reflect tail-duplication.
312909467b48Spatrick   DuplicatedToLPred = false;
313009467b48Spatrick   for (MachineBasicBlock *Pred : DuplicatedPreds) {
313109467b48Spatrick     // We're only looking for unscheduled predecessors that match the filter.
313209467b48Spatrick     BlockChain* PredChain = BlockToChain[Pred];
313309467b48Spatrick     if (Pred == LPred)
313409467b48Spatrick       DuplicatedToLPred = true;
313509467b48Spatrick     if (Pred == LPred || (BlockFilter && !BlockFilter->count(Pred))
313609467b48Spatrick         || PredChain == &Chain)
313709467b48Spatrick       continue;
313809467b48Spatrick     for (MachineBasicBlock *NewSucc : Pred->successors()) {
313909467b48Spatrick       if (BlockFilter && !BlockFilter->count(NewSucc))
314009467b48Spatrick         continue;
314109467b48Spatrick       BlockChain *NewChain = BlockToChain[NewSucc];
314209467b48Spatrick       if (NewChain != &Chain && NewChain != PredChain)
314309467b48Spatrick         NewChain->UnscheduledPredecessors++;
314409467b48Spatrick     }
314509467b48Spatrick   }
314609467b48Spatrick   return Removed;
314709467b48Spatrick }
314809467b48Spatrick 
3149097a140dSpatrick // Count the number of actual machine instructions.
countMBBInstruction(MachineBasicBlock * MBB)3150097a140dSpatrick static uint64_t countMBBInstruction(MachineBasicBlock *MBB) {
3151097a140dSpatrick   uint64_t InstrCount = 0;
3152097a140dSpatrick   for (MachineInstr &MI : *MBB) {
3153097a140dSpatrick     if (!MI.isPHI() && !MI.isMetaInstruction())
3154097a140dSpatrick       InstrCount += 1;
3155097a140dSpatrick   }
3156097a140dSpatrick   return InstrCount;
3157097a140dSpatrick }
3158097a140dSpatrick 
3159097a140dSpatrick // The size cost of duplication is the instruction size of the duplicated block.
3160097a140dSpatrick // So we should scale the threshold accordingly. But the instruction size is not
3161097a140dSpatrick // available on all targets, so we use the number of instructions instead.
scaleThreshold(MachineBasicBlock * BB)3162097a140dSpatrick BlockFrequency MachineBlockPlacement::scaleThreshold(MachineBasicBlock *BB) {
3163097a140dSpatrick   return DupThreshold.getFrequency() * countMBBInstruction(BB);
3164097a140dSpatrick }
3165097a140dSpatrick 
3166097a140dSpatrick // Returns true if BB is Pred's best successor.
isBestSuccessor(MachineBasicBlock * BB,MachineBasicBlock * Pred,BlockFilterSet * BlockFilter)3167097a140dSpatrick bool MachineBlockPlacement::isBestSuccessor(MachineBasicBlock *BB,
3168097a140dSpatrick                                             MachineBasicBlock *Pred,
3169097a140dSpatrick                                             BlockFilterSet *BlockFilter) {
3170097a140dSpatrick   if (BB == Pred)
3171097a140dSpatrick     return false;
3172097a140dSpatrick   if (BlockFilter && !BlockFilter->count(Pred))
3173097a140dSpatrick     return false;
3174097a140dSpatrick   BlockChain *PredChain = BlockToChain[Pred];
3175097a140dSpatrick   if (PredChain && (Pred != *std::prev(PredChain->end())))
3176097a140dSpatrick     return false;
3177097a140dSpatrick 
3178097a140dSpatrick   // Find the successor with largest probability excluding BB.
3179097a140dSpatrick   BranchProbability BestProb = BranchProbability::getZero();
3180097a140dSpatrick   for (MachineBasicBlock *Succ : Pred->successors())
3181097a140dSpatrick     if (Succ != BB) {
3182097a140dSpatrick       if (BlockFilter && !BlockFilter->count(Succ))
3183097a140dSpatrick         continue;
3184097a140dSpatrick       BlockChain *SuccChain = BlockToChain[Succ];
3185097a140dSpatrick       if (SuccChain && (Succ != *SuccChain->begin()))
3186097a140dSpatrick         continue;
3187097a140dSpatrick       BranchProbability SuccProb = MBPI->getEdgeProbability(Pred, Succ);
3188097a140dSpatrick       if (SuccProb > BestProb)
3189097a140dSpatrick         BestProb = SuccProb;
3190097a140dSpatrick     }
3191097a140dSpatrick 
3192097a140dSpatrick   BranchProbability BBProb = MBPI->getEdgeProbability(Pred, BB);
3193097a140dSpatrick   if (BBProb <= BestProb)
3194097a140dSpatrick     return false;
3195097a140dSpatrick 
3196097a140dSpatrick   // Compute the number of reduced taken branches if Pred falls through to BB
3197097a140dSpatrick   // instead of another successor. Then compare it with threshold.
319873471bf0Spatrick   BlockFrequency PredFreq = getBlockCountOrFrequency(Pred);
3199097a140dSpatrick   BlockFrequency Gain = PredFreq * (BBProb - BestProb);
3200097a140dSpatrick   return Gain > scaleThreshold(BB);
3201097a140dSpatrick }
3202097a140dSpatrick 
3203097a140dSpatrick // Find out the predecessors of BB and BB can be beneficially duplicated into
3204097a140dSpatrick // them.
findDuplicateCandidates(SmallVectorImpl<MachineBasicBlock * > & Candidates,MachineBasicBlock * BB,BlockFilterSet * BlockFilter)3205097a140dSpatrick void MachineBlockPlacement::findDuplicateCandidates(
3206097a140dSpatrick     SmallVectorImpl<MachineBasicBlock *> &Candidates,
3207097a140dSpatrick     MachineBasicBlock *BB,
3208097a140dSpatrick     BlockFilterSet *BlockFilter) {
3209097a140dSpatrick   MachineBasicBlock *Fallthrough = nullptr;
3210097a140dSpatrick   BranchProbability DefaultBranchProb = BranchProbability::getZero();
3211097a140dSpatrick   BlockFrequency BBDupThreshold(scaleThreshold(BB));
321273471bf0Spatrick   SmallVector<MachineBasicBlock *, 8> Preds(BB->predecessors());
321373471bf0Spatrick   SmallVector<MachineBasicBlock *, 8> Succs(BB->successors());
3214097a140dSpatrick 
3215097a140dSpatrick   // Sort for highest frequency.
3216097a140dSpatrick   auto CmpSucc = [&](MachineBasicBlock *A, MachineBasicBlock *B) {
3217097a140dSpatrick     return MBPI->getEdgeProbability(BB, A) > MBPI->getEdgeProbability(BB, B);
3218097a140dSpatrick   };
3219097a140dSpatrick   auto CmpPred = [&](MachineBasicBlock *A, MachineBasicBlock *B) {
3220097a140dSpatrick     return MBFI->getBlockFreq(A) > MBFI->getBlockFreq(B);
3221097a140dSpatrick   };
3222097a140dSpatrick   llvm::stable_sort(Succs, CmpSucc);
3223097a140dSpatrick   llvm::stable_sort(Preds, CmpPred);
3224097a140dSpatrick 
3225097a140dSpatrick   auto SuccIt = Succs.begin();
3226097a140dSpatrick   if (SuccIt != Succs.end()) {
3227097a140dSpatrick     DefaultBranchProb = MBPI->getEdgeProbability(BB, *SuccIt).getCompl();
3228097a140dSpatrick   }
3229097a140dSpatrick 
3230097a140dSpatrick   // For each predecessors of BB, compute the benefit of duplicating BB,
3231097a140dSpatrick   // if it is larger than the threshold, add it into Candidates.
3232097a140dSpatrick   //
3233097a140dSpatrick   // If we have following control flow.
3234097a140dSpatrick   //
3235097a140dSpatrick   //     PB1 PB2 PB3 PB4
3236097a140dSpatrick   //      \   |  /    /\
3237097a140dSpatrick   //       \  | /    /  \
3238097a140dSpatrick   //        \ |/    /    \
3239097a140dSpatrick   //         BB----/     OB
3240097a140dSpatrick   //         /\
3241097a140dSpatrick   //        /  \
3242097a140dSpatrick   //      SB1 SB2
3243097a140dSpatrick   //
3244097a140dSpatrick   // And it can be partially duplicated as
3245097a140dSpatrick   //
3246097a140dSpatrick   //   PB2+BB
3247097a140dSpatrick   //      |  PB1 PB3 PB4
3248097a140dSpatrick   //      |   |  /    /\
3249097a140dSpatrick   //      |   | /    /  \
3250097a140dSpatrick   //      |   |/    /    \
3251097a140dSpatrick   //      |  BB----/     OB
3252097a140dSpatrick   //      |\ /|
3253097a140dSpatrick   //      | X |
3254097a140dSpatrick   //      |/ \|
3255097a140dSpatrick   //     SB2 SB1
3256097a140dSpatrick   //
3257097a140dSpatrick   // The benefit of duplicating into a predecessor is defined as
3258097a140dSpatrick   //         Orig_taken_branch - Duplicated_taken_branch
3259097a140dSpatrick   //
3260097a140dSpatrick   // The Orig_taken_branch is computed with the assumption that predecessor
3261097a140dSpatrick   // jumps to BB and the most possible successor is laid out after BB.
3262097a140dSpatrick   //
3263097a140dSpatrick   // The Duplicated_taken_branch is computed with the assumption that BB is
3264097a140dSpatrick   // duplicated into PB, and one successor is layout after it (SB1 for PB1 and
3265097a140dSpatrick   // SB2 for PB2 in our case). If there is no available successor, the combined
3266097a140dSpatrick   // block jumps to all BB's successor, like PB3 in this example.
3267097a140dSpatrick   //
3268097a140dSpatrick   // If a predecessor has multiple successors, so BB can't be duplicated into
3269097a140dSpatrick   // it. But it can beneficially fall through to BB, and duplicate BB into other
3270097a140dSpatrick   // predecessors.
3271097a140dSpatrick   for (MachineBasicBlock *Pred : Preds) {
327273471bf0Spatrick     BlockFrequency PredFreq = getBlockCountOrFrequency(Pred);
3273097a140dSpatrick 
3274097a140dSpatrick     if (!TailDup.canTailDuplicate(BB, Pred)) {
3275097a140dSpatrick       // BB can't be duplicated into Pred, but it is possible to be layout
3276097a140dSpatrick       // below Pred.
3277097a140dSpatrick       if (!Fallthrough && isBestSuccessor(BB, Pred, BlockFilter)) {
3278097a140dSpatrick         Fallthrough = Pred;
3279097a140dSpatrick         if (SuccIt != Succs.end())
3280097a140dSpatrick           SuccIt++;
3281097a140dSpatrick       }
3282097a140dSpatrick       continue;
3283097a140dSpatrick     }
3284097a140dSpatrick 
3285097a140dSpatrick     BlockFrequency OrigCost = PredFreq + PredFreq * DefaultBranchProb;
3286097a140dSpatrick     BlockFrequency DupCost;
3287097a140dSpatrick     if (SuccIt == Succs.end()) {
3288097a140dSpatrick       // Jump to all successors;
3289097a140dSpatrick       if (Succs.size() > 0)
3290097a140dSpatrick         DupCost += PredFreq;
3291097a140dSpatrick     } else {
3292097a140dSpatrick       // Fallthrough to *SuccIt, jump to all other successors;
3293097a140dSpatrick       DupCost += PredFreq;
3294097a140dSpatrick       DupCost -= PredFreq * MBPI->getEdgeProbability(BB, *SuccIt);
3295097a140dSpatrick     }
3296097a140dSpatrick 
3297097a140dSpatrick     assert(OrigCost >= DupCost);
3298097a140dSpatrick     OrigCost -= DupCost;
3299097a140dSpatrick     if (OrigCost > BBDupThreshold) {
3300097a140dSpatrick       Candidates.push_back(Pred);
3301097a140dSpatrick       if (SuccIt != Succs.end())
3302097a140dSpatrick         SuccIt++;
3303097a140dSpatrick     }
3304097a140dSpatrick   }
3305097a140dSpatrick 
3306097a140dSpatrick   // No predecessors can optimally fallthrough to BB.
3307097a140dSpatrick   // So we can change one duplication into fallthrough.
3308097a140dSpatrick   if (!Fallthrough) {
3309097a140dSpatrick     if ((Candidates.size() < Preds.size()) && (Candidates.size() > 0)) {
3310097a140dSpatrick       Candidates[0] = Candidates.back();
3311097a140dSpatrick       Candidates.pop_back();
3312097a140dSpatrick     }
3313097a140dSpatrick   }
3314097a140dSpatrick }
3315097a140dSpatrick 
initDupThreshold()3316097a140dSpatrick void MachineBlockPlacement::initDupThreshold() {
3317097a140dSpatrick   DupThreshold = 0;
3318097a140dSpatrick   if (!F->getFunction().hasProfileData())
3319097a140dSpatrick     return;
3320097a140dSpatrick 
332173471bf0Spatrick   // We prefer to use prifile count.
332273471bf0Spatrick   uint64_t HotThreshold = PSI->getOrCompHotCountThreshold();
332373471bf0Spatrick   if (HotThreshold != UINT64_MAX) {
332473471bf0Spatrick     UseProfileCount = true;
332573471bf0Spatrick     DupThreshold = HotThreshold * TailDupProfilePercentThreshold / 100;
332673471bf0Spatrick     return;
332773471bf0Spatrick   }
332873471bf0Spatrick 
332973471bf0Spatrick   // Profile count is not available, we can use block frequency instead.
3330097a140dSpatrick   BlockFrequency MaxFreq = 0;
3331097a140dSpatrick   for (MachineBasicBlock &MBB : *F) {
3332097a140dSpatrick     BlockFrequency Freq = MBFI->getBlockFreq(&MBB);
3333097a140dSpatrick     if (Freq > MaxFreq)
3334097a140dSpatrick       MaxFreq = Freq;
3335097a140dSpatrick   }
3336097a140dSpatrick 
3337097a140dSpatrick   BranchProbability ThresholdProb(TailDupPlacementPenalty, 100);
3338097a140dSpatrick   DupThreshold = MaxFreq * ThresholdProb;
333973471bf0Spatrick   UseProfileCount = false;
3340097a140dSpatrick }
3341097a140dSpatrick 
runOnMachineFunction(MachineFunction & MF)334209467b48Spatrick bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
334309467b48Spatrick   if (skipFunction(MF.getFunction()))
334409467b48Spatrick     return false;
334509467b48Spatrick 
334609467b48Spatrick   // Check for single-block functions and skip them.
334709467b48Spatrick   if (std::next(MF.begin()) == MF.end())
334809467b48Spatrick     return false;
334909467b48Spatrick 
335009467b48Spatrick   F = &MF;
335109467b48Spatrick   MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
3352097a140dSpatrick   MBFI = std::make_unique<MBFIWrapper>(
335309467b48Spatrick       getAnalysis<MachineBlockFrequencyInfo>());
335409467b48Spatrick   MLI = &getAnalysis<MachineLoopInfo>();
335509467b48Spatrick   TII = MF.getSubtarget().getInstrInfo();
335609467b48Spatrick   TLI = MF.getSubtarget().getTargetLowering();
335709467b48Spatrick   MPDT = nullptr;
335809467b48Spatrick   PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
335909467b48Spatrick 
3360097a140dSpatrick   initDupThreshold();
3361097a140dSpatrick 
336209467b48Spatrick   // Initialize PreferredLoopExit to nullptr here since it may never be set if
336309467b48Spatrick   // there are no MachineLoops.
336409467b48Spatrick   PreferredLoopExit = nullptr;
336509467b48Spatrick 
336609467b48Spatrick   assert(BlockToChain.empty() &&
336709467b48Spatrick          "BlockToChain map should be empty before starting placement.");
336809467b48Spatrick   assert(ComputedEdges.empty() &&
336909467b48Spatrick          "Computed Edge map should be empty before starting placement.");
337009467b48Spatrick 
337109467b48Spatrick   unsigned TailDupSize = TailDupPlacementThreshold;
337209467b48Spatrick   // If only the aggressive threshold is explicitly set, use it.
337309467b48Spatrick   if (TailDupPlacementAggressiveThreshold.getNumOccurrences() != 0 &&
337409467b48Spatrick       TailDupPlacementThreshold.getNumOccurrences() == 0)
337509467b48Spatrick     TailDupSize = TailDupPlacementAggressiveThreshold;
337609467b48Spatrick 
337709467b48Spatrick   TargetPassConfig *PassConfig = &getAnalysis<TargetPassConfig>();
337809467b48Spatrick   // For aggressive optimization, we can adjust some thresholds to be less
337909467b48Spatrick   // conservative.
338009467b48Spatrick   if (PassConfig->getOptLevel() >= CodeGenOpt::Aggressive) {
338109467b48Spatrick     // At O3 we should be more willing to copy blocks for tail duplication. This
338209467b48Spatrick     // increases size pressure, so we only do it at O3
338309467b48Spatrick     // Do this unless only the regular threshold is explicitly set.
338409467b48Spatrick     if (TailDupPlacementThreshold.getNumOccurrences() == 0 ||
338509467b48Spatrick         TailDupPlacementAggressiveThreshold.getNumOccurrences() != 0)
338609467b48Spatrick       TailDupSize = TailDupPlacementAggressiveThreshold;
338709467b48Spatrick   }
338809467b48Spatrick 
338973471bf0Spatrick   // If there's no threshold provided through options, query the target
339073471bf0Spatrick   // information for a threshold instead.
339173471bf0Spatrick   if (TailDupPlacementThreshold.getNumOccurrences() == 0 &&
339273471bf0Spatrick       (PassConfig->getOptLevel() < CodeGenOpt::Aggressive ||
339373471bf0Spatrick        TailDupPlacementAggressiveThreshold.getNumOccurrences() == 0))
339473471bf0Spatrick     TailDupSize = TII->getTailDuplicateSize(PassConfig->getOptLevel());
339573471bf0Spatrick 
339609467b48Spatrick   if (allowTailDupPlacement()) {
339709467b48Spatrick     MPDT = &getAnalysis<MachinePostDominatorTree>();
339809467b48Spatrick     bool OptForSize = MF.getFunction().hasOptSize() ||
339909467b48Spatrick                       llvm::shouldOptimizeForSize(&MF, PSI, &MBFI->getMBFI());
340009467b48Spatrick     if (OptForSize)
340109467b48Spatrick       TailDupSize = 1;
340209467b48Spatrick     bool PreRegAlloc = false;
3403097a140dSpatrick     TailDup.initMF(MF, PreRegAlloc, MBPI, MBFI.get(), PSI,
340409467b48Spatrick                    /* LayoutMode */ true, TailDupSize);
340509467b48Spatrick     precomputeTriangleChains();
340609467b48Spatrick   }
340709467b48Spatrick 
340809467b48Spatrick   buildCFGChains();
340909467b48Spatrick 
341009467b48Spatrick   // Changing the layout can create new tail merging opportunities.
341109467b48Spatrick   // TailMerge can create jump into if branches that make CFG irreducible for
341209467b48Spatrick   // HW that requires structured CFG.
341309467b48Spatrick   bool EnableTailMerge = !MF.getTarget().requiresStructuredCFG() &&
341409467b48Spatrick                          PassConfig->getEnableTailMerge() &&
341509467b48Spatrick                          BranchFoldPlacement;
341609467b48Spatrick   // No tail merging opportunities if the block number is less than four.
341709467b48Spatrick   if (MF.size() > 3 && EnableTailMerge) {
341809467b48Spatrick     unsigned TailMergeSize = TailDupSize + 1;
341973471bf0Spatrick     BranchFolder BF(/*DefaultEnableTailMerge=*/true, /*CommonHoist=*/false,
342073471bf0Spatrick                     *MBFI, *MBPI, PSI, TailMergeSize);
342109467b48Spatrick 
3422097a140dSpatrick     if (BF.OptimizeFunction(MF, TII, MF.getSubtarget().getRegisterInfo(), MLI,
342309467b48Spatrick                             /*AfterPlacement=*/true)) {
342409467b48Spatrick       // Redo the layout if tail merging creates/removes/moves blocks.
342509467b48Spatrick       BlockToChain.clear();
342609467b48Spatrick       ComputedEdges.clear();
342709467b48Spatrick       // Must redo the post-dominator tree if blocks were changed.
342809467b48Spatrick       if (MPDT)
342909467b48Spatrick         MPDT->runOnMachineFunction(MF);
343009467b48Spatrick       ChainAllocator.DestroyAll();
343109467b48Spatrick       buildCFGChains();
343209467b48Spatrick     }
343309467b48Spatrick   }
343409467b48Spatrick 
3435*d415bd75Srobert   // Apply a post-processing optimizing block placement.
3436*d415bd75Srobert   if (MF.size() >= 3 && EnableExtTspBlockPlacement &&
3437*d415bd75Srobert       (ApplyExtTspWithoutProfile || MF.getFunction().hasProfileData())) {
3438*d415bd75Srobert     // Find a new placement and modify the layout of the blocks in the function.
3439*d415bd75Srobert     applyExtTsp();
3440*d415bd75Srobert 
3441*d415bd75Srobert     // Re-create CFG chain so that we can optimizeBranches and alignBlocks.
3442*d415bd75Srobert     createCFGChainExtTsp();
3443*d415bd75Srobert   }
3444*d415bd75Srobert 
344509467b48Spatrick   optimizeBranches();
344609467b48Spatrick   alignBlocks();
344709467b48Spatrick 
344809467b48Spatrick   BlockToChain.clear();
344909467b48Spatrick   ComputedEdges.clear();
345009467b48Spatrick   ChainAllocator.DestroyAll();
345109467b48Spatrick 
3452*d415bd75Srobert   bool HasMaxBytesOverride =
3453*d415bd75Srobert       MaxBytesForAlignmentOverride.getNumOccurrences() > 0;
3454*d415bd75Srobert 
345509467b48Spatrick   if (AlignAllBlock)
345609467b48Spatrick     // Align all of the blocks in the function to a specific alignment.
3457*d415bd75Srobert     for (MachineBasicBlock &MBB : MF) {
3458*d415bd75Srobert       if (HasMaxBytesOverride)
3459*d415bd75Srobert         MBB.setAlignment(Align(1ULL << AlignAllBlock),
3460*d415bd75Srobert                          MaxBytesForAlignmentOverride);
3461*d415bd75Srobert       else
346209467b48Spatrick         MBB.setAlignment(Align(1ULL << AlignAllBlock));
3463*d415bd75Srobert     }
346409467b48Spatrick   else if (AlignAllNonFallThruBlocks) {
346509467b48Spatrick     // Align all of the blocks that have no fall-through predecessors to a
346609467b48Spatrick     // specific alignment.
346709467b48Spatrick     for (auto MBI = std::next(MF.begin()), MBE = MF.end(); MBI != MBE; ++MBI) {
346809467b48Spatrick       auto LayoutPred = std::prev(MBI);
3469*d415bd75Srobert       if (!LayoutPred->isSuccessor(&*MBI)) {
3470*d415bd75Srobert         if (HasMaxBytesOverride)
3471*d415bd75Srobert           MBI->setAlignment(Align(1ULL << AlignAllNonFallThruBlocks),
3472*d415bd75Srobert                             MaxBytesForAlignmentOverride);
3473*d415bd75Srobert         else
347409467b48Spatrick           MBI->setAlignment(Align(1ULL << AlignAllNonFallThruBlocks));
347509467b48Spatrick       }
347609467b48Spatrick     }
3477*d415bd75Srobert   }
347809467b48Spatrick   if (ViewBlockLayoutWithBFI != GVDT_None &&
347909467b48Spatrick       (ViewBlockFreqFuncName.empty() ||
348009467b48Spatrick        F->getFunction().getName().equals(ViewBlockFreqFuncName))) {
3481*d415bd75Srobert     if (RenumberBlocksBeforeView)
3482*d415bd75Srobert       MF.RenumberBlocks();
348309467b48Spatrick     MBFI->view("MBP." + MF.getName(), false);
348409467b48Spatrick   }
348509467b48Spatrick 
348609467b48Spatrick   // We always return true as we have no way to track whether the final order
348709467b48Spatrick   // differs from the original order.
348809467b48Spatrick   return true;
348909467b48Spatrick }
349009467b48Spatrick 
applyExtTsp()3491*d415bd75Srobert void MachineBlockPlacement::applyExtTsp() {
3492*d415bd75Srobert   // Prepare data; blocks are indexed by their index in the current ordering.
3493*d415bd75Srobert   DenseMap<const MachineBasicBlock *, uint64_t> BlockIndex;
3494*d415bd75Srobert   BlockIndex.reserve(F->size());
3495*d415bd75Srobert   std::vector<const MachineBasicBlock *> CurrentBlockOrder;
3496*d415bd75Srobert   CurrentBlockOrder.reserve(F->size());
3497*d415bd75Srobert   size_t NumBlocks = 0;
3498*d415bd75Srobert   for (const MachineBasicBlock &MBB : *F) {
3499*d415bd75Srobert     BlockIndex[&MBB] = NumBlocks++;
3500*d415bd75Srobert     CurrentBlockOrder.push_back(&MBB);
3501*d415bd75Srobert   }
3502*d415bd75Srobert 
3503*d415bd75Srobert   auto BlockSizes = std::vector<uint64_t>(F->size());
3504*d415bd75Srobert   auto BlockCounts = std::vector<uint64_t>(F->size());
3505*d415bd75Srobert   std::vector<EdgeCountT> JumpCounts;
3506*d415bd75Srobert   for (MachineBasicBlock &MBB : *F) {
3507*d415bd75Srobert     // Getting the block frequency.
3508*d415bd75Srobert     BlockFrequency BlockFreq = MBFI->getBlockFreq(&MBB);
3509*d415bd75Srobert     BlockCounts[BlockIndex[&MBB]] = BlockFreq.getFrequency();
3510*d415bd75Srobert     // Getting the block size:
3511*d415bd75Srobert     // - approximate the size of an instruction by 4 bytes, and
3512*d415bd75Srobert     // - ignore debug instructions.
3513*d415bd75Srobert     // Note: getting the exact size of each block is target-dependent and can be
3514*d415bd75Srobert     // done by extending the interface of MCCodeEmitter. Experimentally we do
3515*d415bd75Srobert     // not see a perf improvement with the exact block sizes.
3516*d415bd75Srobert     auto NonDbgInsts =
3517*d415bd75Srobert         instructionsWithoutDebug(MBB.instr_begin(), MBB.instr_end());
3518*d415bd75Srobert     int NumInsts = std::distance(NonDbgInsts.begin(), NonDbgInsts.end());
3519*d415bd75Srobert     BlockSizes[BlockIndex[&MBB]] = 4 * NumInsts;
3520*d415bd75Srobert     // Getting jump frequencies.
3521*d415bd75Srobert     for (MachineBasicBlock *Succ : MBB.successors()) {
3522*d415bd75Srobert       auto EP = MBPI->getEdgeProbability(&MBB, Succ);
3523*d415bd75Srobert       BlockFrequency JumpFreq = BlockFreq * EP;
3524*d415bd75Srobert       auto Jump = std::make_pair(BlockIndex[&MBB], BlockIndex[Succ]);
3525*d415bd75Srobert       JumpCounts.push_back(std::make_pair(Jump, JumpFreq.getFrequency()));
3526*d415bd75Srobert     }
3527*d415bd75Srobert   }
3528*d415bd75Srobert 
3529*d415bd75Srobert   LLVM_DEBUG(dbgs() << "Applying ext-tsp layout for |V| = " << F->size()
3530*d415bd75Srobert                     << " with profile = " << F->getFunction().hasProfileData()
3531*d415bd75Srobert                     << " (" << F->getName().str() << ")"
3532*d415bd75Srobert                     << "\n");
3533*d415bd75Srobert   LLVM_DEBUG(
3534*d415bd75Srobert       dbgs() << format("  original  layout score: %0.2f\n",
3535*d415bd75Srobert                        calcExtTspScore(BlockSizes, BlockCounts, JumpCounts)));
3536*d415bd75Srobert 
3537*d415bd75Srobert   // Run the layout algorithm.
3538*d415bd75Srobert   auto NewOrder = applyExtTspLayout(BlockSizes, BlockCounts, JumpCounts);
3539*d415bd75Srobert   std::vector<const MachineBasicBlock *> NewBlockOrder;
3540*d415bd75Srobert   NewBlockOrder.reserve(F->size());
3541*d415bd75Srobert   for (uint64_t Node : NewOrder) {
3542*d415bd75Srobert     NewBlockOrder.push_back(CurrentBlockOrder[Node]);
3543*d415bd75Srobert   }
3544*d415bd75Srobert   LLVM_DEBUG(dbgs() << format("  optimized layout score: %0.2f\n",
3545*d415bd75Srobert                               calcExtTspScore(NewOrder, BlockSizes, BlockCounts,
3546*d415bd75Srobert                                               JumpCounts)));
3547*d415bd75Srobert 
3548*d415bd75Srobert   // Assign new block order.
3549*d415bd75Srobert   assignBlockOrder(NewBlockOrder);
3550*d415bd75Srobert }
3551*d415bd75Srobert 
assignBlockOrder(const std::vector<const MachineBasicBlock * > & NewBlockOrder)3552*d415bd75Srobert void MachineBlockPlacement::assignBlockOrder(
3553*d415bd75Srobert     const std::vector<const MachineBasicBlock *> &NewBlockOrder) {
3554*d415bd75Srobert   assert(F->size() == NewBlockOrder.size() && "Incorrect size of block order");
3555*d415bd75Srobert   F->RenumberBlocks();
3556*d415bd75Srobert 
3557*d415bd75Srobert   bool HasChanges = false;
3558*d415bd75Srobert   for (size_t I = 0; I < NewBlockOrder.size(); I++) {
3559*d415bd75Srobert     if (NewBlockOrder[I] != F->getBlockNumbered(I)) {
3560*d415bd75Srobert       HasChanges = true;
3561*d415bd75Srobert       break;
3562*d415bd75Srobert     }
3563*d415bd75Srobert   }
3564*d415bd75Srobert   // Stop early if the new block order is identical to the existing one.
3565*d415bd75Srobert   if (!HasChanges)
3566*d415bd75Srobert     return;
3567*d415bd75Srobert 
3568*d415bd75Srobert   SmallVector<MachineBasicBlock *, 4> PrevFallThroughs(F->getNumBlockIDs());
3569*d415bd75Srobert   for (auto &MBB : *F) {
3570*d415bd75Srobert     PrevFallThroughs[MBB.getNumber()] = MBB.getFallThrough();
3571*d415bd75Srobert   }
3572*d415bd75Srobert 
3573*d415bd75Srobert   // Sort basic blocks in the function according to the computed order.
3574*d415bd75Srobert   DenseMap<const MachineBasicBlock *, size_t> NewIndex;
3575*d415bd75Srobert   for (const MachineBasicBlock *MBB : NewBlockOrder) {
3576*d415bd75Srobert     NewIndex[MBB] = NewIndex.size();
3577*d415bd75Srobert   }
3578*d415bd75Srobert   F->sort([&](MachineBasicBlock &L, MachineBasicBlock &R) {
3579*d415bd75Srobert     return NewIndex[&L] < NewIndex[&R];
3580*d415bd75Srobert   });
3581*d415bd75Srobert 
3582*d415bd75Srobert   // Update basic block branches by inserting explicit fallthrough branches
3583*d415bd75Srobert   // when required and re-optimize branches when possible.
3584*d415bd75Srobert   const TargetInstrInfo *TII = F->getSubtarget().getInstrInfo();
3585*d415bd75Srobert   SmallVector<MachineOperand, 4> Cond;
3586*d415bd75Srobert   for (auto &MBB : *F) {
3587*d415bd75Srobert     MachineFunction::iterator NextMBB = std::next(MBB.getIterator());
3588*d415bd75Srobert     MachineFunction::iterator EndIt = MBB.getParent()->end();
3589*d415bd75Srobert     auto *FTMBB = PrevFallThroughs[MBB.getNumber()];
3590*d415bd75Srobert     // If this block had a fallthrough before we need an explicit unconditional
3591*d415bd75Srobert     // branch to that block if the fallthrough block is not adjacent to the
3592*d415bd75Srobert     // block in the new order.
3593*d415bd75Srobert     if (FTMBB && (NextMBB == EndIt || &*NextMBB != FTMBB)) {
3594*d415bd75Srobert       TII->insertUnconditionalBranch(MBB, FTMBB, MBB.findBranchDebugLoc());
3595*d415bd75Srobert     }
3596*d415bd75Srobert 
3597*d415bd75Srobert     // It might be possible to optimize branches by flipping the condition.
3598*d415bd75Srobert     Cond.clear();
3599*d415bd75Srobert     MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
3600*d415bd75Srobert     if (TII->analyzeBranch(MBB, TBB, FBB, Cond))
3601*d415bd75Srobert       continue;
3602*d415bd75Srobert     MBB.updateTerminator(FTMBB);
3603*d415bd75Srobert   }
3604*d415bd75Srobert 
3605*d415bd75Srobert #ifndef NDEBUG
3606*d415bd75Srobert   // Make sure we correctly constructed all branches.
3607*d415bd75Srobert   F->verify(this, "After optimized block reordering");
3608*d415bd75Srobert #endif
3609*d415bd75Srobert }
3610*d415bd75Srobert 
createCFGChainExtTsp()3611*d415bd75Srobert void MachineBlockPlacement::createCFGChainExtTsp() {
3612*d415bd75Srobert   BlockToChain.clear();
3613*d415bd75Srobert   ComputedEdges.clear();
3614*d415bd75Srobert   ChainAllocator.DestroyAll();
3615*d415bd75Srobert 
3616*d415bd75Srobert   MachineBasicBlock *HeadBB = &F->front();
3617*d415bd75Srobert   BlockChain *FunctionChain =
3618*d415bd75Srobert       new (ChainAllocator.Allocate()) BlockChain(BlockToChain, HeadBB);
3619*d415bd75Srobert 
3620*d415bd75Srobert   for (MachineBasicBlock &MBB : *F) {
3621*d415bd75Srobert     if (HeadBB == &MBB)
3622*d415bd75Srobert       continue; // Ignore head of the chain
3623*d415bd75Srobert     FunctionChain->merge(&MBB, nullptr);
3624*d415bd75Srobert   }
3625*d415bd75Srobert }
3626*d415bd75Srobert 
362709467b48Spatrick namespace {
362809467b48Spatrick 
362909467b48Spatrick /// A pass to compute block placement statistics.
363009467b48Spatrick ///
363109467b48Spatrick /// A separate pass to compute interesting statistics for evaluating block
363209467b48Spatrick /// placement. This is separate from the actual placement pass so that they can
363309467b48Spatrick /// be computed in the absence of any placement transformations or when using
363409467b48Spatrick /// alternative placement strategies.
363509467b48Spatrick class MachineBlockPlacementStats : public MachineFunctionPass {
363609467b48Spatrick   /// A handle to the branch probability pass.
363709467b48Spatrick   const MachineBranchProbabilityInfo *MBPI;
363809467b48Spatrick 
363909467b48Spatrick   /// A handle to the function-wide block frequency pass.
364009467b48Spatrick   const MachineBlockFrequencyInfo *MBFI;
364109467b48Spatrick 
364209467b48Spatrick public:
364309467b48Spatrick   static char ID; // Pass identification, replacement for typeid
364409467b48Spatrick 
MachineBlockPlacementStats()364509467b48Spatrick   MachineBlockPlacementStats() : MachineFunctionPass(ID) {
364609467b48Spatrick     initializeMachineBlockPlacementStatsPass(*PassRegistry::getPassRegistry());
364709467b48Spatrick   }
364809467b48Spatrick 
364909467b48Spatrick   bool runOnMachineFunction(MachineFunction &F) override;
365009467b48Spatrick 
getAnalysisUsage(AnalysisUsage & AU) const365109467b48Spatrick   void getAnalysisUsage(AnalysisUsage &AU) const override {
365209467b48Spatrick     AU.addRequired<MachineBranchProbabilityInfo>();
365309467b48Spatrick     AU.addRequired<MachineBlockFrequencyInfo>();
365409467b48Spatrick     AU.setPreservesAll();
365509467b48Spatrick     MachineFunctionPass::getAnalysisUsage(AU);
365609467b48Spatrick   }
365709467b48Spatrick };
365809467b48Spatrick 
365909467b48Spatrick } // end anonymous namespace
366009467b48Spatrick 
366109467b48Spatrick char MachineBlockPlacementStats::ID = 0;
366209467b48Spatrick 
366309467b48Spatrick char &llvm::MachineBlockPlacementStatsID = MachineBlockPlacementStats::ID;
366409467b48Spatrick 
366509467b48Spatrick INITIALIZE_PASS_BEGIN(MachineBlockPlacementStats, "block-placement-stats",
366609467b48Spatrick                       "Basic Block Placement Stats", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)366709467b48Spatrick INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
366809467b48Spatrick INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
366909467b48Spatrick INITIALIZE_PASS_END(MachineBlockPlacementStats, "block-placement-stats",
367009467b48Spatrick                     "Basic Block Placement Stats", false, false)
367109467b48Spatrick 
367209467b48Spatrick bool MachineBlockPlacementStats::runOnMachineFunction(MachineFunction &F) {
367309467b48Spatrick   // Check for single-block functions and skip them.
367409467b48Spatrick   if (std::next(F.begin()) == F.end())
367509467b48Spatrick     return false;
367609467b48Spatrick 
3677*d415bd75Srobert   if (!isFunctionInPrintList(F.getName()))
3678*d415bd75Srobert     return false;
3679*d415bd75Srobert 
368009467b48Spatrick   MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
368109467b48Spatrick   MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
368209467b48Spatrick 
368309467b48Spatrick   for (MachineBasicBlock &MBB : F) {
368409467b48Spatrick     BlockFrequency BlockFreq = MBFI->getBlockFreq(&MBB);
368509467b48Spatrick     Statistic &NumBranches =
368609467b48Spatrick         (MBB.succ_size() > 1) ? NumCondBranches : NumUncondBranches;
368709467b48Spatrick     Statistic &BranchTakenFreq =
368809467b48Spatrick         (MBB.succ_size() > 1) ? CondBranchTakenFreq : UncondBranchTakenFreq;
368909467b48Spatrick     for (MachineBasicBlock *Succ : MBB.successors()) {
369009467b48Spatrick       // Skip if this successor is a fallthrough.
369109467b48Spatrick       if (MBB.isLayoutSuccessor(Succ))
369209467b48Spatrick         continue;
369309467b48Spatrick 
369409467b48Spatrick       BlockFrequency EdgeFreq =
369509467b48Spatrick           BlockFreq * MBPI->getEdgeProbability(&MBB, Succ);
369609467b48Spatrick       ++NumBranches;
369709467b48Spatrick       BranchTakenFreq += EdgeFreq.getFrequency();
369809467b48Spatrick     }
369909467b48Spatrick   }
370009467b48Spatrick 
370109467b48Spatrick   return false;
370209467b48Spatrick }
3703