xref: /freebsd-src/contrib/llvm-project/llvm/lib/Transforms/Utils/CodeLayout.cpp (revision 5f757f3ff9144b609b3c433dfd370cc6bdc191ad)
10eae32dcSDimitry Andric //===- CodeLayout.cpp - Implementation of code layout algorithms ----------===//
20eae32dcSDimitry Andric //
30eae32dcSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40eae32dcSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50eae32dcSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60eae32dcSDimitry Andric //
70eae32dcSDimitry Andric //===----------------------------------------------------------------------===//
80eae32dcSDimitry Andric //
906c3fb27SDimitry Andric // The file implements "cache-aware" layout algorithms of basic blocks and
1006c3fb27SDimitry Andric // functions in a binary.
110eae32dcSDimitry Andric //
120eae32dcSDimitry Andric // The algorithm tries to find a layout of nodes (basic blocks) of a given CFG
130eae32dcSDimitry Andric // optimizing jump locality and thus processor I-cache utilization. This is
140eae32dcSDimitry Andric // achieved via increasing the number of fall-through jumps and co-locating
150eae32dcSDimitry Andric // frequently executed nodes together. The name follows the underlying
160eae32dcSDimitry Andric // optimization problem, Extended-TSP, which is a generalization of classical
170eae32dcSDimitry Andric // (maximum) Traveling Salesmen Problem.
180eae32dcSDimitry Andric //
190eae32dcSDimitry Andric // The algorithm is a greedy heuristic that works with chains (ordered lists)
200eae32dcSDimitry Andric // of basic blocks. Initially all chains are isolated basic blocks. On every
210eae32dcSDimitry Andric // iteration, we pick a pair of chains whose merging yields the biggest increase
220eae32dcSDimitry Andric // in the ExtTSP score, which models how i-cache "friendly" a specific chain is.
230eae32dcSDimitry Andric // A pair of chains giving the maximum gain is merged into a new chain. The
240eae32dcSDimitry Andric // procedure stops when there is only one chain left, or when merging does not
250eae32dcSDimitry Andric // increase ExtTSP. In the latter case, the remaining chains are sorted by
260eae32dcSDimitry Andric // density in the decreasing order.
270eae32dcSDimitry Andric //
280eae32dcSDimitry Andric // An important aspect is the way two chains are merged. Unlike earlier
290eae32dcSDimitry Andric // algorithms (e.g., based on the approach of Pettis-Hansen), two
300eae32dcSDimitry Andric // chains, X and Y, are first split into three, X1, X2, and Y. Then we
310eae32dcSDimitry Andric // consider all possible ways of gluing the three chains (e.g., X1YX2, X1X2Y,
320eae32dcSDimitry Andric // X2X1Y, X2YX1, YX1X2, YX2X1) and choose the one producing the largest score.
330eae32dcSDimitry Andric // This improves the quality of the final result (the search space is larger)
340eae32dcSDimitry Andric // while keeping the implementation sufficiently fast.
350eae32dcSDimitry Andric //
360eae32dcSDimitry Andric // Reference:
370eae32dcSDimitry Andric //   * A. Newell and S. Pupyrev, Improved Basic Block Reordering,
380eae32dcSDimitry Andric //     IEEE Transactions on Computers, 2020
39bdd1243dSDimitry Andric //     https://arxiv.org/abs/1809.04676
400eae32dcSDimitry Andric //
410eae32dcSDimitry Andric //===----------------------------------------------------------------------===//
420eae32dcSDimitry Andric 
430eae32dcSDimitry Andric #include "llvm/Transforms/Utils/CodeLayout.h"
440eae32dcSDimitry Andric #include "llvm/Support/CommandLine.h"
4506c3fb27SDimitry Andric #include "llvm/Support/Debug.h"
460eae32dcSDimitry Andric 
47bdd1243dSDimitry Andric #include <cmath>
48*5f757f3fSDimitry Andric #include <set>
49bdd1243dSDimitry Andric 
500eae32dcSDimitry Andric using namespace llvm;
51*5f757f3fSDimitry Andric using namespace llvm::codelayout;
52*5f757f3fSDimitry Andric 
530eae32dcSDimitry Andric #define DEBUG_TYPE "code-layout"
540eae32dcSDimitry Andric 
5506c3fb27SDimitry Andric namespace llvm {
5681ad6265SDimitry Andric cl::opt<bool> EnableExtTspBlockPlacement(
5781ad6265SDimitry Andric     "enable-ext-tsp-block-placement", cl::Hidden, cl::init(false),
5881ad6265SDimitry Andric     cl::desc("Enable machine block placement based on the ext-tsp model, "
5981ad6265SDimitry Andric              "optimizing I-cache utilization."));
6081ad6265SDimitry Andric 
6181ad6265SDimitry Andric cl::opt<bool> ApplyExtTspWithoutProfile(
6281ad6265SDimitry Andric     "ext-tsp-apply-without-profile",
6381ad6265SDimitry Andric     cl::desc("Whether to apply ext-tsp placement for instances w/o profile"),
6481ad6265SDimitry Andric     cl::init(true), cl::Hidden);
6506c3fb27SDimitry Andric } // namespace llvm
6681ad6265SDimitry Andric 
67*5f757f3fSDimitry Andric // Algorithm-specific params for Ext-TSP. The values are tuned for the best
68*5f757f3fSDimitry Andric // performance of large-scale front-end bound binaries.
69bdd1243dSDimitry Andric static cl::opt<double> ForwardWeightCond(
70bdd1243dSDimitry Andric     "ext-tsp-forward-weight-cond", cl::ReallyHidden, cl::init(0.1),
71bdd1243dSDimitry Andric     cl::desc("The weight of conditional forward jumps for ExtTSP value"));
720eae32dcSDimitry Andric 
73bdd1243dSDimitry Andric static cl::opt<double> ForwardWeightUncond(
74bdd1243dSDimitry Andric     "ext-tsp-forward-weight-uncond", cl::ReallyHidden, cl::init(0.1),
75bdd1243dSDimitry Andric     cl::desc("The weight of unconditional forward jumps for ExtTSP value"));
76bdd1243dSDimitry Andric 
77bdd1243dSDimitry Andric static cl::opt<double> BackwardWeightCond(
78bdd1243dSDimitry Andric     "ext-tsp-backward-weight-cond", cl::ReallyHidden, cl::init(0.1),
7906c3fb27SDimitry Andric     cl::desc("The weight of conditional backward jumps for ExtTSP value"));
80bdd1243dSDimitry Andric 
81bdd1243dSDimitry Andric static cl::opt<double> BackwardWeightUncond(
82bdd1243dSDimitry Andric     "ext-tsp-backward-weight-uncond", cl::ReallyHidden, cl::init(0.1),
8306c3fb27SDimitry Andric     cl::desc("The weight of unconditional backward jumps for ExtTSP value"));
84bdd1243dSDimitry Andric 
85bdd1243dSDimitry Andric static cl::opt<double> FallthroughWeightCond(
86bdd1243dSDimitry Andric     "ext-tsp-fallthrough-weight-cond", cl::ReallyHidden, cl::init(1.0),
87bdd1243dSDimitry Andric     cl::desc("The weight of conditional fallthrough jumps for ExtTSP value"));
88bdd1243dSDimitry Andric 
89bdd1243dSDimitry Andric static cl::opt<double> FallthroughWeightUncond(
90bdd1243dSDimitry Andric     "ext-tsp-fallthrough-weight-uncond", cl::ReallyHidden, cl::init(1.05),
91bdd1243dSDimitry Andric     cl::desc("The weight of unconditional fallthrough jumps for ExtTSP value"));
920eae32dcSDimitry Andric 
930eae32dcSDimitry Andric static cl::opt<unsigned> ForwardDistance(
94bdd1243dSDimitry Andric     "ext-tsp-forward-distance", cl::ReallyHidden, cl::init(1024),
950eae32dcSDimitry Andric     cl::desc("The maximum distance (in bytes) of a forward jump for ExtTSP"));
960eae32dcSDimitry Andric 
970eae32dcSDimitry Andric static cl::opt<unsigned> BackwardDistance(
98bdd1243dSDimitry Andric     "ext-tsp-backward-distance", cl::ReallyHidden, cl::init(640),
990eae32dcSDimitry Andric     cl::desc("The maximum distance (in bytes) of a backward jump for ExtTSP"));
1000eae32dcSDimitry Andric 
10181ad6265SDimitry Andric // The maximum size of a chain created by the algorithm. The size is bounded
102*5f757f3fSDimitry Andric // so that the algorithm can efficiently process extremely large instances.
10381ad6265SDimitry Andric static cl::opt<unsigned>
104*5f757f3fSDimitry Andric     MaxChainSize("ext-tsp-max-chain-size", cl::ReallyHidden, cl::init(512),
105*5f757f3fSDimitry Andric                  cl::desc("The maximum size of a chain to create"));
10681ad6265SDimitry Andric 
1070eae32dcSDimitry Andric // The maximum size of a chain for splitting. Larger values of the threshold
1080eae32dcSDimitry Andric // may yield better quality at the cost of worsen run-time.
1090eae32dcSDimitry Andric static cl::opt<unsigned> ChainSplitThreshold(
110bdd1243dSDimitry Andric     "ext-tsp-chain-split-threshold", cl::ReallyHidden, cl::init(128),
1110eae32dcSDimitry Andric     cl::desc("The maximum size of a chain to apply splitting"));
1120eae32dcSDimitry Andric 
113*5f757f3fSDimitry Andric // The maximum ratio between densities of two chains for merging.
114*5f757f3fSDimitry Andric static cl::opt<double> MaxMergeDensityRatio(
115*5f757f3fSDimitry Andric     "ext-tsp-max-merge-density-ratio", cl::ReallyHidden, cl::init(100),
116*5f757f3fSDimitry Andric     cl::desc("The maximum ratio between densities of two chains for merging"));
117*5f757f3fSDimitry Andric 
118*5f757f3fSDimitry Andric // Algorithm-specific options for CDSort.
119*5f757f3fSDimitry Andric static cl::opt<unsigned> CacheEntries("cdsort-cache-entries", cl::ReallyHidden,
120*5f757f3fSDimitry Andric                                       cl::desc("The size of the cache"));
121*5f757f3fSDimitry Andric 
122*5f757f3fSDimitry Andric static cl::opt<unsigned> CacheSize("cdsort-cache-size", cl::ReallyHidden,
123*5f757f3fSDimitry Andric                                    cl::desc("The size of a line in the cache"));
124*5f757f3fSDimitry Andric 
125*5f757f3fSDimitry Andric static cl::opt<unsigned>
126*5f757f3fSDimitry Andric     CDMaxChainSize("cdsort-max-chain-size", cl::ReallyHidden,
127*5f757f3fSDimitry Andric                    cl::desc("The maximum size of a chain to create"));
128*5f757f3fSDimitry Andric 
129*5f757f3fSDimitry Andric static cl::opt<double> DistancePower(
130*5f757f3fSDimitry Andric     "cdsort-distance-power", cl::ReallyHidden,
131*5f757f3fSDimitry Andric     cl::desc("The power exponent for the distance-based locality"));
132*5f757f3fSDimitry Andric 
133*5f757f3fSDimitry Andric static cl::opt<double> FrequencyScale(
134*5f757f3fSDimitry Andric     "cdsort-frequency-scale", cl::ReallyHidden,
135*5f757f3fSDimitry Andric     cl::desc("The scale factor for the frequency-based locality"));
1360eae32dcSDimitry Andric 
1370eae32dcSDimitry Andric namespace {
1380eae32dcSDimitry Andric 
1390eae32dcSDimitry Andric // Epsilon for comparison of doubles.
1400eae32dcSDimitry Andric constexpr double EPS = 1e-8;
1410eae32dcSDimitry Andric 
142bdd1243dSDimitry Andric // Compute the Ext-TSP score for a given jump.
jumpExtTSPScore(uint64_t JumpDist,uint64_t JumpMaxDist,uint64_t Count,double Weight)143bdd1243dSDimitry Andric double jumpExtTSPScore(uint64_t JumpDist, uint64_t JumpMaxDist, uint64_t Count,
144bdd1243dSDimitry Andric                        double Weight) {
145bdd1243dSDimitry Andric   if (JumpDist > JumpMaxDist)
146bdd1243dSDimitry Andric     return 0;
147bdd1243dSDimitry Andric   double Prob = 1.0 - static_cast<double>(JumpDist) / JumpMaxDist;
148bdd1243dSDimitry Andric   return Weight * Prob * Count;
149bdd1243dSDimitry Andric }
150bdd1243dSDimitry Andric 
1510eae32dcSDimitry Andric // Compute the Ext-TSP score for a jump between a given pair of blocks,
1520eae32dcSDimitry Andric // using their sizes, (estimated) addresses and the jump execution count.
extTSPScore(uint64_t SrcAddr,uint64_t SrcSize,uint64_t DstAddr,uint64_t Count,bool IsConditional)1530eae32dcSDimitry Andric double extTSPScore(uint64_t SrcAddr, uint64_t SrcSize, uint64_t DstAddr,
154bdd1243dSDimitry Andric                    uint64_t Count, bool IsConditional) {
1550eae32dcSDimitry Andric   // Fallthrough
1560eae32dcSDimitry Andric   if (SrcAddr + SrcSize == DstAddr) {
157bdd1243dSDimitry Andric     return jumpExtTSPScore(0, 1, Count,
158bdd1243dSDimitry Andric                            IsConditional ? FallthroughWeightCond
159bdd1243dSDimitry Andric                                          : FallthroughWeightUncond);
1600eae32dcSDimitry Andric   }
1610eae32dcSDimitry Andric   // Forward
1620eae32dcSDimitry Andric   if (SrcAddr + SrcSize < DstAddr) {
163bdd1243dSDimitry Andric     const uint64_t Dist = DstAddr - (SrcAddr + SrcSize);
164bdd1243dSDimitry Andric     return jumpExtTSPScore(Dist, ForwardDistance, Count,
165bdd1243dSDimitry Andric                            IsConditional ? ForwardWeightCond
166bdd1243dSDimitry Andric                                          : ForwardWeightUncond);
1670eae32dcSDimitry Andric   }
1680eae32dcSDimitry Andric   // Backward
169bdd1243dSDimitry Andric   const uint64_t Dist = SrcAddr + SrcSize - DstAddr;
170bdd1243dSDimitry Andric   return jumpExtTSPScore(Dist, BackwardDistance, Count,
171bdd1243dSDimitry Andric                          IsConditional ? BackwardWeightCond
172bdd1243dSDimitry Andric                                        : BackwardWeightUncond);
1730eae32dcSDimitry Andric }
1740eae32dcSDimitry Andric 
1750eae32dcSDimitry Andric /// A type of merging two chains, X and Y. The former chain is split into
1760eae32dcSDimitry Andric /// X1 and X2 and then concatenated with Y in the order specified by the type.
17706c3fb27SDimitry Andric enum class MergeTypeT : int { X_Y, Y_X, X1_Y_X2, Y_X2_X1, X2_X1_Y };
1780eae32dcSDimitry Andric 
1790eae32dcSDimitry Andric /// The gain of merging two chains, that is, the Ext-TSP score of the merge
18006c3fb27SDimitry Andric /// together with the corresponding merge 'type' and 'offset'.
18106c3fb27SDimitry Andric struct MergeGainT {
18206c3fb27SDimitry Andric   explicit MergeGainT() = default;
MergeGainT__anonf633aad90111::MergeGainT18306c3fb27SDimitry Andric   explicit MergeGainT(double Score, size_t MergeOffset, MergeTypeT MergeType)
1840eae32dcSDimitry Andric       : Score(Score), MergeOffset(MergeOffset), MergeType(MergeType) {}
1850eae32dcSDimitry Andric 
score__anonf633aad90111::MergeGainT1860eae32dcSDimitry Andric   double score() const { return Score; }
1870eae32dcSDimitry Andric 
mergeOffset__anonf633aad90111::MergeGainT1880eae32dcSDimitry Andric   size_t mergeOffset() const { return MergeOffset; }
1890eae32dcSDimitry Andric 
mergeType__anonf633aad90111::MergeGainT19006c3fb27SDimitry Andric   MergeTypeT mergeType() const { return MergeType; }
19106c3fb27SDimitry Andric 
setMergeType__anonf633aad90111::MergeGainT19206c3fb27SDimitry Andric   void setMergeType(MergeTypeT Ty) { MergeType = Ty; }
1930eae32dcSDimitry Andric 
1940eae32dcSDimitry Andric   // Returns 'true' iff Other is preferred over this.
operator <__anonf633aad90111::MergeGainT19506c3fb27SDimitry Andric   bool operator<(const MergeGainT &Other) const {
1960eae32dcSDimitry Andric     return (Other.Score > EPS && Other.Score > Score + EPS);
1970eae32dcSDimitry Andric   }
1980eae32dcSDimitry Andric 
1990eae32dcSDimitry Andric   // Update the current gain if Other is preferred over this.
updateIfLessThan__anonf633aad90111::MergeGainT20006c3fb27SDimitry Andric   void updateIfLessThan(const MergeGainT &Other) {
2010eae32dcSDimitry Andric     if (*this < Other)
2020eae32dcSDimitry Andric       *this = Other;
2030eae32dcSDimitry Andric   }
2040eae32dcSDimitry Andric 
2050eae32dcSDimitry Andric private:
2060eae32dcSDimitry Andric   double Score{-1.0};
2070eae32dcSDimitry Andric   size_t MergeOffset{0};
20806c3fb27SDimitry Andric   MergeTypeT MergeType{MergeTypeT::X_Y};
2090eae32dcSDimitry Andric };
2100eae32dcSDimitry Andric 
21106c3fb27SDimitry Andric struct JumpT;
21206c3fb27SDimitry Andric struct ChainT;
21306c3fb27SDimitry Andric struct ChainEdge;
2140eae32dcSDimitry Andric 
21506c3fb27SDimitry Andric /// A node in the graph, typically corresponding to a basic block in the CFG or
21606c3fb27SDimitry Andric /// a function in the call graph.
21706c3fb27SDimitry Andric struct NodeT {
21806c3fb27SDimitry Andric   NodeT(const NodeT &) = delete;
21906c3fb27SDimitry Andric   NodeT(NodeT &&) = default;
22006c3fb27SDimitry Andric   NodeT &operator=(const NodeT &) = delete;
22106c3fb27SDimitry Andric   NodeT &operator=(NodeT &&) = default;
2220eae32dcSDimitry Andric 
NodeT__anonf633aad90111::NodeT223*5f757f3fSDimitry Andric   explicit NodeT(size_t Index, uint64_t Size, uint64_t Count)
224*5f757f3fSDimitry Andric       : Index(Index), Size(Size), ExecutionCount(Count) {}
22506c3fb27SDimitry Andric 
isEntry__anonf633aad90111::NodeT2260eae32dcSDimitry Andric   bool isEntry() const { return Index == 0; }
22706c3fb27SDimitry Andric 
228*5f757f3fSDimitry Andric   // Check if Other is a successor of the node.
229*5f757f3fSDimitry Andric   bool isSuccessor(const NodeT *Other) const;
230*5f757f3fSDimitry Andric 
23106c3fb27SDimitry Andric   // The total execution count of outgoing jumps.
23206c3fb27SDimitry Andric   uint64_t outCount() const;
23306c3fb27SDimitry Andric 
23406c3fb27SDimitry Andric   // The total execution count of incoming jumps.
23506c3fb27SDimitry Andric   uint64_t inCount() const;
23606c3fb27SDimitry Andric 
23706c3fb27SDimitry Andric   // The original index of the node in graph.
23806c3fb27SDimitry Andric   size_t Index{0};
23906c3fb27SDimitry Andric   // The index of the node in the current chain.
24006c3fb27SDimitry Andric   size_t CurIndex{0};
24106c3fb27SDimitry Andric   // The size of the node in the binary.
24206c3fb27SDimitry Andric   uint64_t Size{0};
24306c3fb27SDimitry Andric   // The execution count of the node in the profile data.
24406c3fb27SDimitry Andric   uint64_t ExecutionCount{0};
24506c3fb27SDimitry Andric   // The current chain of the node.
24606c3fb27SDimitry Andric   ChainT *CurChain{nullptr};
24706c3fb27SDimitry Andric   // The offset of the node in the current chain.
24806c3fb27SDimitry Andric   mutable uint64_t EstimatedAddr{0};
24906c3fb27SDimitry Andric   // Forced successor of the node in the graph.
25006c3fb27SDimitry Andric   NodeT *ForcedSucc{nullptr};
25106c3fb27SDimitry Andric   // Forced predecessor of the node in the graph.
25206c3fb27SDimitry Andric   NodeT *ForcedPred{nullptr};
25306c3fb27SDimitry Andric   // Outgoing jumps from the node.
25406c3fb27SDimitry Andric   std::vector<JumpT *> OutJumps;
25506c3fb27SDimitry Andric   // Incoming jumps to the node.
25606c3fb27SDimitry Andric   std::vector<JumpT *> InJumps;
2570eae32dcSDimitry Andric };
2580eae32dcSDimitry Andric 
25906c3fb27SDimitry Andric /// An arc in the graph, typically corresponding to a jump between two nodes.
26006c3fb27SDimitry Andric struct JumpT {
26106c3fb27SDimitry Andric   JumpT(const JumpT &) = delete;
26206c3fb27SDimitry Andric   JumpT(JumpT &&) = default;
26306c3fb27SDimitry Andric   JumpT &operator=(const JumpT &) = delete;
26406c3fb27SDimitry Andric   JumpT &operator=(JumpT &&) = default;
2650eae32dcSDimitry Andric 
JumpT__anonf633aad90111::JumpT26606c3fb27SDimitry Andric   explicit JumpT(NodeT *Source, NodeT *Target, uint64_t ExecutionCount)
26706c3fb27SDimitry Andric       : Source(Source), Target(Target), ExecutionCount(ExecutionCount) {}
26806c3fb27SDimitry Andric 
26906c3fb27SDimitry Andric   // Source node of the jump.
27006c3fb27SDimitry Andric   NodeT *Source;
27106c3fb27SDimitry Andric   // Target node of the jump.
27206c3fb27SDimitry Andric   NodeT *Target;
2730eae32dcSDimitry Andric   // Execution count of the arc in the profile data.
2740eae32dcSDimitry Andric   uint64_t ExecutionCount{0};
275bdd1243dSDimitry Andric   // Whether the jump corresponds to a conditional branch.
276bdd1243dSDimitry Andric   bool IsConditional{false};
27706c3fb27SDimitry Andric   // The offset of the jump from the source node.
27806c3fb27SDimitry Andric   uint64_t Offset{0};
2790eae32dcSDimitry Andric };
2800eae32dcSDimitry Andric 
28106c3fb27SDimitry Andric /// A chain (ordered sequence) of nodes in the graph.
28206c3fb27SDimitry Andric struct ChainT {
28306c3fb27SDimitry Andric   ChainT(const ChainT &) = delete;
28406c3fb27SDimitry Andric   ChainT(ChainT &&) = default;
28506c3fb27SDimitry Andric   ChainT &operator=(const ChainT &) = delete;
28606c3fb27SDimitry Andric   ChainT &operator=(ChainT &&) = default;
2870eae32dcSDimitry Andric 
ChainT__anonf633aad90111::ChainT28806c3fb27SDimitry Andric   explicit ChainT(uint64_t Id, NodeT *Node)
28906c3fb27SDimitry Andric       : Id(Id), ExecutionCount(Node->ExecutionCount), Size(Node->Size),
29006c3fb27SDimitry Andric         Nodes(1, Node) {}
2910eae32dcSDimitry Andric 
numBlocks__anonf633aad90111::ChainT29206c3fb27SDimitry Andric   size_t numBlocks() const { return Nodes.size(); }
2930eae32dcSDimitry Andric 
density__anonf633aad90111::ChainT294*5f757f3fSDimitry Andric   double density() const { return ExecutionCount / Size; }
29506c3fb27SDimitry Andric 
isEntry__anonf633aad90111::ChainT29606c3fb27SDimitry Andric   bool isEntry() const { return Nodes[0]->Index == 0; }
2970eae32dcSDimitry Andric 
isCold__anonf633aad90111::ChainT298bdd1243dSDimitry Andric   bool isCold() const {
29906c3fb27SDimitry Andric     for (NodeT *Node : Nodes) {
30006c3fb27SDimitry Andric       if (Node->ExecutionCount > 0)
301bdd1243dSDimitry Andric         return false;
302bdd1243dSDimitry Andric     }
303bdd1243dSDimitry Andric     return true;
304bdd1243dSDimitry Andric   }
305bdd1243dSDimitry Andric 
getEdge__anonf633aad90111::ChainT30606c3fb27SDimitry Andric   ChainEdge *getEdge(ChainT *Other) const {
307*5f757f3fSDimitry Andric     for (const auto &[Chain, ChainEdge] : Edges) {
308*5f757f3fSDimitry Andric       if (Chain == Other)
309*5f757f3fSDimitry Andric         return ChainEdge;
3100eae32dcSDimitry Andric     }
3110eae32dcSDimitry Andric     return nullptr;
3120eae32dcSDimitry Andric   }
3130eae32dcSDimitry Andric 
removeEdge__anonf633aad90111::ChainT31406c3fb27SDimitry Andric   void removeEdge(ChainT *Other) {
3150eae32dcSDimitry Andric     auto It = Edges.begin();
3160eae32dcSDimitry Andric     while (It != Edges.end()) {
3170eae32dcSDimitry Andric       if (It->first == Other) {
3180eae32dcSDimitry Andric         Edges.erase(It);
3190eae32dcSDimitry Andric         return;
3200eae32dcSDimitry Andric       }
3210eae32dcSDimitry Andric       It++;
3220eae32dcSDimitry Andric     }
3230eae32dcSDimitry Andric   }
3240eae32dcSDimitry Andric 
addEdge__anonf633aad90111::ChainT32506c3fb27SDimitry Andric   void addEdge(ChainT *Other, ChainEdge *Edge) {
3260eae32dcSDimitry Andric     Edges.push_back(std::make_pair(Other, Edge));
3270eae32dcSDimitry Andric   }
3280eae32dcSDimitry Andric 
merge__anonf633aad90111::ChainT329*5f757f3fSDimitry Andric   void merge(ChainT *Other, std::vector<NodeT *> MergedBlocks) {
330*5f757f3fSDimitry Andric     Nodes = std::move(MergedBlocks);
331*5f757f3fSDimitry Andric     // Update the chain's data.
33206c3fb27SDimitry Andric     ExecutionCount += Other->ExecutionCount;
33306c3fb27SDimitry Andric     Size += Other->Size;
33406c3fb27SDimitry Andric     Id = Nodes[0]->Index;
335*5f757f3fSDimitry Andric     // Update the node's data.
33606c3fb27SDimitry Andric     for (size_t Idx = 0; Idx < Nodes.size(); Idx++) {
33706c3fb27SDimitry Andric       Nodes[Idx]->CurChain = this;
33806c3fb27SDimitry Andric       Nodes[Idx]->CurIndex = Idx;
3390eae32dcSDimitry Andric     }
3400eae32dcSDimitry Andric   }
3410eae32dcSDimitry Andric 
34206c3fb27SDimitry Andric   void mergeEdges(ChainT *Other);
3430eae32dcSDimitry Andric 
clear__anonf633aad90111::ChainT3440eae32dcSDimitry Andric   void clear() {
34506c3fb27SDimitry Andric     Nodes.clear();
34606c3fb27SDimitry Andric     Nodes.shrink_to_fit();
3470eae32dcSDimitry Andric     Edges.clear();
3480eae32dcSDimitry Andric     Edges.shrink_to_fit();
3490eae32dcSDimitry Andric   }
3500eae32dcSDimitry Andric 
3510eae32dcSDimitry Andric   // Unique chain identifier.
3520eae32dcSDimitry Andric   uint64_t Id;
3530eae32dcSDimitry Andric   // Cached ext-tsp score for the chain.
35406c3fb27SDimitry Andric   double Score{0};
355*5f757f3fSDimitry Andric   // The total execution count of the chain. Since the execution count of
356*5f757f3fSDimitry Andric   // a basic block is uint64_t, using doubles here to avoid overflow.
357*5f757f3fSDimitry Andric   double ExecutionCount{0};
35806c3fb27SDimitry Andric   // The total size of the chain.
35906c3fb27SDimitry Andric   uint64_t Size{0};
36006c3fb27SDimitry Andric   // Nodes of the chain.
36106c3fb27SDimitry Andric   std::vector<NodeT *> Nodes;
3620eae32dcSDimitry Andric   // Adjacent chains and corresponding edges (lists of jumps).
36306c3fb27SDimitry Andric   std::vector<std::pair<ChainT *, ChainEdge *>> Edges;
3640eae32dcSDimitry Andric };
3650eae32dcSDimitry Andric 
36606c3fb27SDimitry Andric /// An edge in the graph representing jumps between two chains.
36706c3fb27SDimitry Andric /// When nodes are merged into chains, the edges are combined too so that
368*5f757f3fSDimitry Andric /// there is always at most one edge between a pair of chains.
36906c3fb27SDimitry Andric struct ChainEdge {
3700eae32dcSDimitry Andric   ChainEdge(const ChainEdge &) = delete;
3710eae32dcSDimitry Andric   ChainEdge(ChainEdge &&) = default;
3720eae32dcSDimitry Andric   ChainEdge &operator=(const ChainEdge &) = delete;
37306c3fb27SDimitry Andric   ChainEdge &operator=(ChainEdge &&) = delete;
3740eae32dcSDimitry Andric 
ChainEdge__anonf633aad90111::ChainEdge37506c3fb27SDimitry Andric   explicit ChainEdge(JumpT *Jump)
3760eae32dcSDimitry Andric       : SrcChain(Jump->Source->CurChain), DstChain(Jump->Target->CurChain),
3770eae32dcSDimitry Andric         Jumps(1, Jump) {}
3780eae32dcSDimitry Andric 
srcChain__anonf633aad90111::ChainEdge37906c3fb27SDimitry Andric   ChainT *srcChain() const { return SrcChain; }
3800eae32dcSDimitry Andric 
dstChain__anonf633aad90111::ChainEdge38106c3fb27SDimitry Andric   ChainT *dstChain() const { return DstChain; }
3820eae32dcSDimitry Andric 
isSelfEdge__anonf633aad90111::ChainEdge38306c3fb27SDimitry Andric   bool isSelfEdge() const { return SrcChain == DstChain; }
38406c3fb27SDimitry Andric 
jumps__anonf633aad90111::ChainEdge38506c3fb27SDimitry Andric   const std::vector<JumpT *> &jumps() const { return Jumps; }
38606c3fb27SDimitry Andric 
appendJump__anonf633aad90111::ChainEdge38706c3fb27SDimitry Andric   void appendJump(JumpT *Jump) { Jumps.push_back(Jump); }
3880eae32dcSDimitry Andric 
moveJumps__anonf633aad90111::ChainEdge3890eae32dcSDimitry Andric   void moveJumps(ChainEdge *Other) {
3900eae32dcSDimitry Andric     Jumps.insert(Jumps.end(), Other->Jumps.begin(), Other->Jumps.end());
3910eae32dcSDimitry Andric     Other->Jumps.clear();
3920eae32dcSDimitry Andric     Other->Jumps.shrink_to_fit();
3930eae32dcSDimitry Andric   }
3940eae32dcSDimitry Andric 
changeEndpoint__anonf633aad90111::ChainEdge39506c3fb27SDimitry Andric   void changeEndpoint(ChainT *From, ChainT *To) {
39606c3fb27SDimitry Andric     if (From == SrcChain)
39706c3fb27SDimitry Andric       SrcChain = To;
39806c3fb27SDimitry Andric     if (From == DstChain)
39906c3fb27SDimitry Andric       DstChain = To;
40006c3fb27SDimitry Andric   }
40106c3fb27SDimitry Andric 
hasCachedMergeGain__anonf633aad90111::ChainEdge40206c3fb27SDimitry Andric   bool hasCachedMergeGain(ChainT *Src, ChainT *Dst) const {
4030eae32dcSDimitry Andric     return Src == SrcChain ? CacheValidForward : CacheValidBackward;
4040eae32dcSDimitry Andric   }
4050eae32dcSDimitry Andric 
getCachedMergeGain__anonf633aad90111::ChainEdge40606c3fb27SDimitry Andric   MergeGainT getCachedMergeGain(ChainT *Src, ChainT *Dst) const {
4070eae32dcSDimitry Andric     return Src == SrcChain ? CachedGainForward : CachedGainBackward;
4080eae32dcSDimitry Andric   }
4090eae32dcSDimitry Andric 
setCachedMergeGain__anonf633aad90111::ChainEdge41006c3fb27SDimitry Andric   void setCachedMergeGain(ChainT *Src, ChainT *Dst, MergeGainT MergeGain) {
4110eae32dcSDimitry Andric     if (Src == SrcChain) {
4120eae32dcSDimitry Andric       CachedGainForward = MergeGain;
4130eae32dcSDimitry Andric       CacheValidForward = true;
4140eae32dcSDimitry Andric     } else {
4150eae32dcSDimitry Andric       CachedGainBackward = MergeGain;
4160eae32dcSDimitry Andric       CacheValidBackward = true;
4170eae32dcSDimitry Andric     }
4180eae32dcSDimitry Andric   }
4190eae32dcSDimitry Andric 
invalidateCache__anonf633aad90111::ChainEdge4200eae32dcSDimitry Andric   void invalidateCache() {
4210eae32dcSDimitry Andric     CacheValidForward = false;
4220eae32dcSDimitry Andric     CacheValidBackward = false;
4230eae32dcSDimitry Andric   }
4240eae32dcSDimitry Andric 
setMergeGain__anonf633aad90111::ChainEdge42506c3fb27SDimitry Andric   void setMergeGain(MergeGainT Gain) { CachedGain = Gain; }
42606c3fb27SDimitry Andric 
getMergeGain__anonf633aad90111::ChainEdge42706c3fb27SDimitry Andric   MergeGainT getMergeGain() const { return CachedGain; }
42806c3fb27SDimitry Andric 
gain__anonf633aad90111::ChainEdge42906c3fb27SDimitry Andric   double gain() const { return CachedGain.score(); }
43006c3fb27SDimitry Andric 
4310eae32dcSDimitry Andric private:
4320eae32dcSDimitry Andric   // Source chain.
43306c3fb27SDimitry Andric   ChainT *SrcChain{nullptr};
4340eae32dcSDimitry Andric   // Destination chain.
43506c3fb27SDimitry Andric   ChainT *DstChain{nullptr};
43606c3fb27SDimitry Andric   // Original jumps in the binary with corresponding execution counts.
43706c3fb27SDimitry Andric   std::vector<JumpT *> Jumps;
43806c3fb27SDimitry Andric   // Cached gain value for merging the pair of chains.
43906c3fb27SDimitry Andric   MergeGainT CachedGain;
44006c3fb27SDimitry Andric 
44106c3fb27SDimitry Andric   // Cached gain values for merging the pair of chains. Since the gain of
44206c3fb27SDimitry Andric   // merging (Src, Dst) and (Dst, Src) might be different, we store both values
44306c3fb27SDimitry Andric   // here and a flag indicating which of the options results in a higher gain.
44406c3fb27SDimitry Andric   // Cached gain values.
44506c3fb27SDimitry Andric   MergeGainT CachedGainForward;
44606c3fb27SDimitry Andric   MergeGainT CachedGainBackward;
4470eae32dcSDimitry Andric   // Whether the cached value must be recomputed.
4480eae32dcSDimitry Andric   bool CacheValidForward{false};
4490eae32dcSDimitry Andric   bool CacheValidBackward{false};
4500eae32dcSDimitry Andric };
4510eae32dcSDimitry Andric 
isSuccessor(const NodeT * Other) const452*5f757f3fSDimitry Andric bool NodeT::isSuccessor(const NodeT *Other) const {
453*5f757f3fSDimitry Andric   for (JumpT *Jump : OutJumps)
454*5f757f3fSDimitry Andric     if (Jump->Target == Other)
455*5f757f3fSDimitry Andric       return true;
456*5f757f3fSDimitry Andric   return false;
457*5f757f3fSDimitry Andric }
458*5f757f3fSDimitry Andric 
outCount() const45906c3fb27SDimitry Andric uint64_t NodeT::outCount() const {
46006c3fb27SDimitry Andric   uint64_t Count = 0;
461*5f757f3fSDimitry Andric   for (JumpT *Jump : OutJumps)
46206c3fb27SDimitry Andric     Count += Jump->ExecutionCount;
46306c3fb27SDimitry Andric   return Count;
46406c3fb27SDimitry Andric }
4650eae32dcSDimitry Andric 
inCount() const46606c3fb27SDimitry Andric uint64_t NodeT::inCount() const {
46706c3fb27SDimitry Andric   uint64_t Count = 0;
468*5f757f3fSDimitry Andric   for (JumpT *Jump : InJumps)
46906c3fb27SDimitry Andric     Count += Jump->ExecutionCount;
47006c3fb27SDimitry Andric   return Count;
47106c3fb27SDimitry Andric }
47206c3fb27SDimitry Andric 
mergeEdges(ChainT * Other)47306c3fb27SDimitry Andric void ChainT::mergeEdges(ChainT *Other) {
474*5f757f3fSDimitry Andric   // Update edges adjacent to chain Other.
475*5f757f3fSDimitry Andric   for (const auto &[DstChain, DstEdge] : Other->Edges) {
47606c3fb27SDimitry Andric     ChainT *TargetChain = DstChain == Other ? this : DstChain;
477bdd1243dSDimitry Andric     ChainEdge *CurEdge = getEdge(TargetChain);
4780eae32dcSDimitry Andric     if (CurEdge == nullptr) {
4790eae32dcSDimitry Andric       DstEdge->changeEndpoint(Other, this);
4800eae32dcSDimitry Andric       this->addEdge(TargetChain, DstEdge);
481*5f757f3fSDimitry Andric       if (DstChain != this && DstChain != Other)
4820eae32dcSDimitry Andric         DstChain->addEdge(this, DstEdge);
4830eae32dcSDimitry Andric     } else {
4840eae32dcSDimitry Andric       CurEdge->moveJumps(DstEdge);
4850eae32dcSDimitry Andric     }
486*5f757f3fSDimitry Andric     // Cleanup leftover edge.
487*5f757f3fSDimitry Andric     if (DstChain != Other)
4880eae32dcSDimitry Andric       DstChain->removeEdge(Other);
4890eae32dcSDimitry Andric   }
4900eae32dcSDimitry Andric }
4910eae32dcSDimitry Andric 
49206c3fb27SDimitry Andric using NodeIter = std::vector<NodeT *>::const_iterator;
493*5f757f3fSDimitry Andric static std::vector<NodeT *> EmptyList;
4940eae32dcSDimitry Andric 
495*5f757f3fSDimitry Andric /// A wrapper around three concatenated vectors (chains) of nodes; it is used
496*5f757f3fSDimitry Andric /// to avoid extra instantiation of the vectors.
497*5f757f3fSDimitry Andric struct MergedNodesT {
MergedNodesT__anonf633aad90111::MergedNodesT498*5f757f3fSDimitry Andric   MergedNodesT(NodeIter Begin1, NodeIter End1,
499*5f757f3fSDimitry Andric                NodeIter Begin2 = EmptyList.begin(),
500*5f757f3fSDimitry Andric                NodeIter End2 = EmptyList.end(),
501*5f757f3fSDimitry Andric                NodeIter Begin3 = EmptyList.begin(),
502*5f757f3fSDimitry Andric                NodeIter End3 = EmptyList.end())
5030eae32dcSDimitry Andric       : Begin1(Begin1), End1(End1), Begin2(Begin2), End2(End2), Begin3(Begin3),
5040eae32dcSDimitry Andric         End3(End3) {}
5050eae32dcSDimitry Andric 
forEach__anonf633aad90111::MergedNodesT5060eae32dcSDimitry Andric   template <typename F> void forEach(const F &Func) const {
5070eae32dcSDimitry Andric     for (auto It = Begin1; It != End1; It++)
5080eae32dcSDimitry Andric       Func(*It);
5090eae32dcSDimitry Andric     for (auto It = Begin2; It != End2; It++)
5100eae32dcSDimitry Andric       Func(*It);
5110eae32dcSDimitry Andric     for (auto It = Begin3; It != End3; It++)
5120eae32dcSDimitry Andric       Func(*It);
5130eae32dcSDimitry Andric   }
5140eae32dcSDimitry Andric 
getNodes__anonf633aad90111::MergedNodesT51506c3fb27SDimitry Andric   std::vector<NodeT *> getNodes() const {
51606c3fb27SDimitry Andric     std::vector<NodeT *> Result;
5170eae32dcSDimitry Andric     Result.reserve(std::distance(Begin1, End1) + std::distance(Begin2, End2) +
5180eae32dcSDimitry Andric                    std::distance(Begin3, End3));
5190eae32dcSDimitry Andric     Result.insert(Result.end(), Begin1, End1);
5200eae32dcSDimitry Andric     Result.insert(Result.end(), Begin2, End2);
5210eae32dcSDimitry Andric     Result.insert(Result.end(), Begin3, End3);
5220eae32dcSDimitry Andric     return Result;
5230eae32dcSDimitry Andric   }
5240eae32dcSDimitry Andric 
getFirstNode__anonf633aad90111::MergedNodesT52506c3fb27SDimitry Andric   const NodeT *getFirstNode() const { return *Begin1; }
5260eae32dcSDimitry Andric 
5270eae32dcSDimitry Andric private:
52806c3fb27SDimitry Andric   NodeIter Begin1;
52906c3fb27SDimitry Andric   NodeIter End1;
53006c3fb27SDimitry Andric   NodeIter Begin2;
53106c3fb27SDimitry Andric   NodeIter End2;
53206c3fb27SDimitry Andric   NodeIter Begin3;
53306c3fb27SDimitry Andric   NodeIter End3;
5340eae32dcSDimitry Andric };
5350eae32dcSDimitry Andric 
536*5f757f3fSDimitry Andric /// A wrapper around two concatenated vectors (chains) of jumps.
537*5f757f3fSDimitry Andric struct MergedJumpsT {
MergedJumpsT__anonf633aad90111::MergedJumpsT538*5f757f3fSDimitry Andric   MergedJumpsT(const std::vector<JumpT *> *Jumps1,
539*5f757f3fSDimitry Andric                const std::vector<JumpT *> *Jumps2 = nullptr) {
540*5f757f3fSDimitry Andric     assert(!Jumps1->empty() && "cannot merge empty jump list");
541*5f757f3fSDimitry Andric     JumpArray[0] = Jumps1;
542*5f757f3fSDimitry Andric     JumpArray[1] = Jumps2;
543*5f757f3fSDimitry Andric   }
544*5f757f3fSDimitry Andric 
forEach__anonf633aad90111::MergedJumpsT545*5f757f3fSDimitry Andric   template <typename F> void forEach(const F &Func) const {
546*5f757f3fSDimitry Andric     for (auto Jumps : JumpArray)
547*5f757f3fSDimitry Andric       if (Jumps != nullptr)
548*5f757f3fSDimitry Andric         for (JumpT *Jump : *Jumps)
549*5f757f3fSDimitry Andric           Func(Jump);
550*5f757f3fSDimitry Andric   }
551*5f757f3fSDimitry Andric 
552*5f757f3fSDimitry Andric private:
553*5f757f3fSDimitry Andric   std::array<const std::vector<JumpT *> *, 2> JumpArray{nullptr, nullptr};
554*5f757f3fSDimitry Andric };
555*5f757f3fSDimitry Andric 
55606c3fb27SDimitry Andric /// Merge two chains of nodes respecting a given 'type' and 'offset'.
55706c3fb27SDimitry Andric ///
55806c3fb27SDimitry Andric /// If MergeType == 0, then the result is a concatenation of two chains.
55906c3fb27SDimitry Andric /// Otherwise, the first chain is cut into two sub-chains at the offset,
56006c3fb27SDimitry Andric /// and merged using all possible ways of concatenating three chains.
mergeNodes(const std::vector<NodeT * > & X,const std::vector<NodeT * > & Y,size_t MergeOffset,MergeTypeT MergeType)561*5f757f3fSDimitry Andric MergedNodesT mergeNodes(const std::vector<NodeT *> &X,
56206c3fb27SDimitry Andric                         const std::vector<NodeT *> &Y, size_t MergeOffset,
56306c3fb27SDimitry Andric                         MergeTypeT MergeType) {
564*5f757f3fSDimitry Andric   // Split the first chain, X, into X1 and X2.
56506c3fb27SDimitry Andric   NodeIter BeginX1 = X.begin();
56606c3fb27SDimitry Andric   NodeIter EndX1 = X.begin() + MergeOffset;
56706c3fb27SDimitry Andric   NodeIter BeginX2 = X.begin() + MergeOffset;
56806c3fb27SDimitry Andric   NodeIter EndX2 = X.end();
56906c3fb27SDimitry Andric   NodeIter BeginY = Y.begin();
57006c3fb27SDimitry Andric   NodeIter EndY = Y.end();
57106c3fb27SDimitry Andric 
572*5f757f3fSDimitry Andric   // Construct a new chain from the three existing ones.
57306c3fb27SDimitry Andric   switch (MergeType) {
57406c3fb27SDimitry Andric   case MergeTypeT::X_Y:
575*5f757f3fSDimitry Andric     return MergedNodesT(BeginX1, EndX2, BeginY, EndY);
57606c3fb27SDimitry Andric   case MergeTypeT::Y_X:
577*5f757f3fSDimitry Andric     return MergedNodesT(BeginY, EndY, BeginX1, EndX2);
57806c3fb27SDimitry Andric   case MergeTypeT::X1_Y_X2:
579*5f757f3fSDimitry Andric     return MergedNodesT(BeginX1, EndX1, BeginY, EndY, BeginX2, EndX2);
58006c3fb27SDimitry Andric   case MergeTypeT::Y_X2_X1:
581*5f757f3fSDimitry Andric     return MergedNodesT(BeginY, EndY, BeginX2, EndX2, BeginX1, EndX1);
58206c3fb27SDimitry Andric   case MergeTypeT::X2_X1_Y:
583*5f757f3fSDimitry Andric     return MergedNodesT(BeginX2, EndX2, BeginX1, EndX1, BeginY, EndY);
58406c3fb27SDimitry Andric   }
58506c3fb27SDimitry Andric   llvm_unreachable("unexpected chain merge type");
58606c3fb27SDimitry Andric }
58706c3fb27SDimitry Andric 
5880eae32dcSDimitry Andric /// The implementation of the ExtTSP algorithm.
5890eae32dcSDimitry Andric class ExtTSPImpl {
5900eae32dcSDimitry Andric public:
ExtTSPImpl(ArrayRef<uint64_t> NodeSizes,ArrayRef<uint64_t> NodeCounts,ArrayRef<EdgeCount> EdgeCounts)591*5f757f3fSDimitry Andric   ExtTSPImpl(ArrayRef<uint64_t> NodeSizes, ArrayRef<uint64_t> NodeCounts,
592*5f757f3fSDimitry Andric              ArrayRef<EdgeCount> EdgeCounts)
59306c3fb27SDimitry Andric       : NumNodes(NodeSizes.size()) {
5940eae32dcSDimitry Andric     initialize(NodeSizes, NodeCounts, EdgeCounts);
5950eae32dcSDimitry Andric   }
5960eae32dcSDimitry Andric 
59706c3fb27SDimitry Andric   /// Run the algorithm and return an optimized ordering of nodes.
run()598*5f757f3fSDimitry Andric   std::vector<uint64_t> run() {
59906c3fb27SDimitry Andric     // Pass 1: Merge nodes with their mutually forced successors
6000eae32dcSDimitry Andric     mergeForcedPairs();
6010eae32dcSDimitry Andric 
6020eae32dcSDimitry Andric     // Pass 2: Merge pairs of chains while improving the ExtTSP objective
6030eae32dcSDimitry Andric     mergeChainPairs();
6040eae32dcSDimitry Andric 
60506c3fb27SDimitry Andric     // Pass 3: Merge cold nodes to reduce code size
6060eae32dcSDimitry Andric     mergeColdChains();
6070eae32dcSDimitry Andric 
60806c3fb27SDimitry Andric     // Collect nodes from all chains
609*5f757f3fSDimitry Andric     return concatChains();
6100eae32dcSDimitry Andric   }
6110eae32dcSDimitry Andric 
6120eae32dcSDimitry Andric private:
6130eae32dcSDimitry Andric   /// Initialize the algorithm's data structures.
initialize(const ArrayRef<uint64_t> & NodeSizes,const ArrayRef<uint64_t> & NodeCounts,const ArrayRef<EdgeCount> & EdgeCounts)614*5f757f3fSDimitry Andric   void initialize(const ArrayRef<uint64_t> &NodeSizes,
615*5f757f3fSDimitry Andric                   const ArrayRef<uint64_t> &NodeCounts,
616*5f757f3fSDimitry Andric                   const ArrayRef<EdgeCount> &EdgeCounts) {
617*5f757f3fSDimitry Andric     // Initialize nodes.
61806c3fb27SDimitry Andric     AllNodes.reserve(NumNodes);
61906c3fb27SDimitry Andric     for (uint64_t Idx = 0; Idx < NumNodes; Idx++) {
62006c3fb27SDimitry Andric       uint64_t Size = std::max<uint64_t>(NodeSizes[Idx], 1ULL);
62106c3fb27SDimitry Andric       uint64_t ExecutionCount = NodeCounts[Idx];
622*5f757f3fSDimitry Andric       // The execution count of the entry node is set to at least one.
62306c3fb27SDimitry Andric       if (Idx == 0 && ExecutionCount == 0)
6240eae32dcSDimitry Andric         ExecutionCount = 1;
62506c3fb27SDimitry Andric       AllNodes.emplace_back(Idx, Size, ExecutionCount);
6260eae32dcSDimitry Andric     }
6270eae32dcSDimitry Andric 
628*5f757f3fSDimitry Andric     // Initialize jumps between the nodes.
629bdd1243dSDimitry Andric     SuccNodes.resize(NumNodes);
630bdd1243dSDimitry Andric     PredNodes.resize(NumNodes);
631bdd1243dSDimitry Andric     std::vector<uint64_t> OutDegree(NumNodes, 0);
6320eae32dcSDimitry Andric     AllJumps.reserve(EdgeCounts.size());
633*5f757f3fSDimitry Andric     for (auto Edge : EdgeCounts) {
634*5f757f3fSDimitry Andric       ++OutDegree[Edge.src];
635*5f757f3fSDimitry Andric       // Ignore self-edges.
636*5f757f3fSDimitry Andric       if (Edge.src == Edge.dst)
6370eae32dcSDimitry Andric         continue;
6380eae32dcSDimitry Andric 
639*5f757f3fSDimitry Andric       SuccNodes[Edge.src].push_back(Edge.dst);
640*5f757f3fSDimitry Andric       PredNodes[Edge.dst].push_back(Edge.src);
641*5f757f3fSDimitry Andric       if (Edge.count > 0) {
642*5f757f3fSDimitry Andric         NodeT &PredNode = AllNodes[Edge.src];
643*5f757f3fSDimitry Andric         NodeT &SuccNode = AllNodes[Edge.dst];
644*5f757f3fSDimitry Andric         AllJumps.emplace_back(&PredNode, &SuccNode, Edge.count);
64506c3fb27SDimitry Andric         SuccNode.InJumps.push_back(&AllJumps.back());
64606c3fb27SDimitry Andric         PredNode.OutJumps.push_back(&AllJumps.back());
647*5f757f3fSDimitry Andric         // Adjust execution counts.
648*5f757f3fSDimitry Andric         PredNode.ExecutionCount = std::max(PredNode.ExecutionCount, Edge.count);
649*5f757f3fSDimitry Andric         SuccNode.ExecutionCount = std::max(SuccNode.ExecutionCount, Edge.count);
6500eae32dcSDimitry Andric       }
6510eae32dcSDimitry Andric     }
65206c3fb27SDimitry Andric     for (JumpT &Jump : AllJumps) {
653*5f757f3fSDimitry Andric       assert(OutDegree[Jump.Source->Index] > 0 &&
654*5f757f3fSDimitry Andric              "incorrectly computed out-degree of the block");
655bdd1243dSDimitry Andric       Jump.IsConditional = OutDegree[Jump.Source->Index] > 1;
656bdd1243dSDimitry Andric     }
6570eae32dcSDimitry Andric 
658*5f757f3fSDimitry Andric     // Initialize chains.
6590eae32dcSDimitry Andric     AllChains.reserve(NumNodes);
6600eae32dcSDimitry Andric     HotChains.reserve(NumNodes);
66106c3fb27SDimitry Andric     for (NodeT &Node : AllNodes) {
662*5f757f3fSDimitry Andric       // Create a chain.
66306c3fb27SDimitry Andric       AllChains.emplace_back(Node.Index, &Node);
66406c3fb27SDimitry Andric       Node.CurChain = &AllChains.back();
665*5f757f3fSDimitry Andric       if (Node.ExecutionCount > 0)
6660eae32dcSDimitry Andric         HotChains.push_back(&AllChains.back());
6670eae32dcSDimitry Andric     }
6680eae32dcSDimitry Andric 
669*5f757f3fSDimitry Andric     // Initialize chain edges.
6700eae32dcSDimitry Andric     AllEdges.reserve(AllJumps.size());
67106c3fb27SDimitry Andric     for (NodeT &PredNode : AllNodes) {
67206c3fb27SDimitry Andric       for (JumpT *Jump : PredNode.OutJumps) {
673*5f757f3fSDimitry Andric         assert(Jump->ExecutionCount > 0 && "incorrectly initialized jump");
67406c3fb27SDimitry Andric         NodeT *SuccNode = Jump->Target;
67506c3fb27SDimitry Andric         ChainEdge *CurEdge = PredNode.CurChain->getEdge(SuccNode->CurChain);
676*5f757f3fSDimitry Andric         // This edge is already present in the graph.
6770eae32dcSDimitry Andric         if (CurEdge != nullptr) {
67806c3fb27SDimitry Andric           assert(SuccNode->CurChain->getEdge(PredNode.CurChain) != nullptr);
6790eae32dcSDimitry Andric           CurEdge->appendJump(Jump);
6800eae32dcSDimitry Andric           continue;
6810eae32dcSDimitry Andric         }
682*5f757f3fSDimitry Andric         // This is a new edge.
6830eae32dcSDimitry Andric         AllEdges.emplace_back(Jump);
68406c3fb27SDimitry Andric         PredNode.CurChain->addEdge(SuccNode->CurChain, &AllEdges.back());
68506c3fb27SDimitry Andric         SuccNode->CurChain->addEdge(PredNode.CurChain, &AllEdges.back());
6860eae32dcSDimitry Andric       }
6870eae32dcSDimitry Andric     }
6880eae32dcSDimitry Andric   }
6890eae32dcSDimitry Andric 
69006c3fb27SDimitry Andric   /// For a pair of nodes, A and B, node B is the forced successor of A,
6910eae32dcSDimitry Andric   /// if (i) all jumps (based on profile) from A goes to B and (ii) all jumps
69206c3fb27SDimitry Andric   /// to B are from A. Such nodes should be adjacent in the optimal ordering;
69306c3fb27SDimitry Andric   /// the method finds and merges such pairs of nodes.
mergeForcedPairs()6940eae32dcSDimitry Andric   void mergeForcedPairs() {
695*5f757f3fSDimitry Andric     // Find forced pairs of blocks.
69606c3fb27SDimitry Andric     for (NodeT &Node : AllNodes) {
69706c3fb27SDimitry Andric       if (SuccNodes[Node.Index].size() == 1 &&
69806c3fb27SDimitry Andric           PredNodes[SuccNodes[Node.Index][0]].size() == 1 &&
69906c3fb27SDimitry Andric           SuccNodes[Node.Index][0] != 0) {
70006c3fb27SDimitry Andric         size_t SuccIndex = SuccNodes[Node.Index][0];
70106c3fb27SDimitry Andric         Node.ForcedSucc = &AllNodes[SuccIndex];
70206c3fb27SDimitry Andric         AllNodes[SuccIndex].ForcedPred = &Node;
7030eae32dcSDimitry Andric       }
7040eae32dcSDimitry Andric     }
7050eae32dcSDimitry Andric 
7060eae32dcSDimitry Andric     // There might be 'cycles' in the forced dependencies, since profile
7070eae32dcSDimitry Andric     // data isn't 100% accurate. Typically this is observed in loops, when the
7080eae32dcSDimitry Andric     // loop edges are the hottest successors for the basic blocks of the loop.
70906c3fb27SDimitry Andric     // Break the cycles by choosing the node with the smallest index as the
7100eae32dcSDimitry Andric     // head. This helps to keep the original order of the loops, which likely
7110eae32dcSDimitry Andric     // have already been rotated in the optimized manner.
71206c3fb27SDimitry Andric     for (NodeT &Node : AllNodes) {
71306c3fb27SDimitry Andric       if (Node.ForcedSucc == nullptr || Node.ForcedPred == nullptr)
7140eae32dcSDimitry Andric         continue;
7150eae32dcSDimitry Andric 
71606c3fb27SDimitry Andric       NodeT *SuccNode = Node.ForcedSucc;
71706c3fb27SDimitry Andric       while (SuccNode != nullptr && SuccNode != &Node) {
71806c3fb27SDimitry Andric         SuccNode = SuccNode->ForcedSucc;
7190eae32dcSDimitry Andric       }
72006c3fb27SDimitry Andric       if (SuccNode == nullptr)
7210eae32dcSDimitry Andric         continue;
722*5f757f3fSDimitry Andric       // Break the cycle.
72306c3fb27SDimitry Andric       AllNodes[Node.ForcedPred->Index].ForcedSucc = nullptr;
72406c3fb27SDimitry Andric       Node.ForcedPred = nullptr;
7250eae32dcSDimitry Andric     }
7260eae32dcSDimitry Andric 
727*5f757f3fSDimitry Andric     // Merge nodes with their fallthrough successors.
72806c3fb27SDimitry Andric     for (NodeT &Node : AllNodes) {
72906c3fb27SDimitry Andric       if (Node.ForcedPred == nullptr && Node.ForcedSucc != nullptr) {
73006c3fb27SDimitry Andric         const NodeT *CurBlock = &Node;
7310eae32dcSDimitry Andric         while (CurBlock->ForcedSucc != nullptr) {
73206c3fb27SDimitry Andric           const NodeT *NextBlock = CurBlock->ForcedSucc;
73306c3fb27SDimitry Andric           mergeChains(Node.CurChain, NextBlock->CurChain, 0, MergeTypeT::X_Y);
7340eae32dcSDimitry Andric           CurBlock = NextBlock;
7350eae32dcSDimitry Andric         }
7360eae32dcSDimitry Andric       }
7370eae32dcSDimitry Andric     }
7380eae32dcSDimitry Andric   }
7390eae32dcSDimitry Andric 
7400eae32dcSDimitry Andric   /// Merge pairs of chains while improving the ExtTSP objective.
mergeChainPairs()7410eae32dcSDimitry Andric   void mergeChainPairs() {
742*5f757f3fSDimitry Andric     /// Deterministically compare pairs of chains.
74306c3fb27SDimitry Andric     auto compareChainPairs = [](const ChainT *A1, const ChainT *B1,
74406c3fb27SDimitry Andric                                 const ChainT *A2, const ChainT *B2) {
745*5f757f3fSDimitry Andric       return std::make_tuple(A1->Id, B1->Id) < std::make_tuple(A2->Id, B2->Id);
7460eae32dcSDimitry Andric     };
7470eae32dcSDimitry Andric 
7480eae32dcSDimitry Andric     while (HotChains.size() > 1) {
74906c3fb27SDimitry Andric       ChainT *BestChainPred = nullptr;
75006c3fb27SDimitry Andric       ChainT *BestChainSucc = nullptr;
75106c3fb27SDimitry Andric       MergeGainT BestGain;
752*5f757f3fSDimitry Andric       // Iterate over all pairs of chains.
75306c3fb27SDimitry Andric       for (ChainT *ChainPred : HotChains) {
754*5f757f3fSDimitry Andric         // Get candidates for merging with the current chain.
755*5f757f3fSDimitry Andric         for (const auto &[ChainSucc, Edge] : ChainPred->Edges) {
756*5f757f3fSDimitry Andric           // Ignore loop edges.
757*5f757f3fSDimitry Andric           if (Edge->isSelfEdge())
7580eae32dcSDimitry Andric             continue;
759*5f757f3fSDimitry Andric           // Skip the merge if the combined chain violates the maximum specified
760*5f757f3fSDimitry Andric           // size.
76181ad6265SDimitry Andric           if (ChainPred->numBlocks() + ChainSucc->numBlocks() >= MaxChainSize)
76281ad6265SDimitry Andric             continue;
763*5f757f3fSDimitry Andric           // Don't merge the chains if they have vastly different densities.
764*5f757f3fSDimitry Andric           // Skip the merge if the ratio between the densities exceeds
765*5f757f3fSDimitry Andric           // MaxMergeDensityRatio. Smaller values of the option result in fewer
766*5f757f3fSDimitry Andric           // merges, and hence, more chains.
767*5f757f3fSDimitry Andric           const double ChainPredDensity = ChainPred->density();
768*5f757f3fSDimitry Andric           const double ChainSuccDensity = ChainSucc->density();
769*5f757f3fSDimitry Andric           assert(ChainPredDensity > 0.0 && ChainSuccDensity > 0.0 &&
770*5f757f3fSDimitry Andric                  "incorrectly computed chain densities");
771*5f757f3fSDimitry Andric           auto [MinDensity, MaxDensity] =
772*5f757f3fSDimitry Andric               std::minmax(ChainPredDensity, ChainSuccDensity);
773*5f757f3fSDimitry Andric           const double Ratio = MaxDensity / MinDensity;
774*5f757f3fSDimitry Andric           if (Ratio > MaxMergeDensityRatio)
775*5f757f3fSDimitry Andric             continue;
77681ad6265SDimitry Andric 
777*5f757f3fSDimitry Andric           // Compute the gain of merging the two chains.
77806c3fb27SDimitry Andric           MergeGainT CurGain = getBestMergeGain(ChainPred, ChainSucc, Edge);
7790eae32dcSDimitry Andric           if (CurGain.score() <= EPS)
7800eae32dcSDimitry Andric             continue;
7810eae32dcSDimitry Andric 
7820eae32dcSDimitry Andric           if (BestGain < CurGain ||
7830eae32dcSDimitry Andric               (std::abs(CurGain.score() - BestGain.score()) < EPS &&
7840eae32dcSDimitry Andric                compareChainPairs(ChainPred, ChainSucc, BestChainPred,
7850eae32dcSDimitry Andric                                  BestChainSucc))) {
7860eae32dcSDimitry Andric             BestGain = CurGain;
7870eae32dcSDimitry Andric             BestChainPred = ChainPred;
7880eae32dcSDimitry Andric             BestChainSucc = ChainSucc;
7890eae32dcSDimitry Andric           }
7900eae32dcSDimitry Andric         }
7910eae32dcSDimitry Andric       }
7920eae32dcSDimitry Andric 
793*5f757f3fSDimitry Andric       // Stop merging when there is no improvement.
7940eae32dcSDimitry Andric       if (BestGain.score() <= EPS)
7950eae32dcSDimitry Andric         break;
7960eae32dcSDimitry Andric 
797*5f757f3fSDimitry Andric       // Merge the best pair of chains.
7980eae32dcSDimitry Andric       mergeChains(BestChainPred, BestChainSucc, BestGain.mergeOffset(),
7990eae32dcSDimitry Andric                   BestGain.mergeType());
8000eae32dcSDimitry Andric     }
8010eae32dcSDimitry Andric   }
8020eae32dcSDimitry Andric 
80306c3fb27SDimitry Andric   /// Merge remaining nodes into chains w/o taking jump counts into
80406c3fb27SDimitry Andric   /// consideration. This allows to maintain the original node order in the
805*5f757f3fSDimitry Andric   /// absence of profile data.
mergeColdChains()8060eae32dcSDimitry Andric   void mergeColdChains() {
8070eae32dcSDimitry Andric     for (size_t SrcBB = 0; SrcBB < NumNodes; SrcBB++) {
808bdd1243dSDimitry Andric       // Iterating in reverse order to make sure original fallthrough jumps are
809bdd1243dSDimitry Andric       // merged first; this might be beneficial for code size.
8100eae32dcSDimitry Andric       size_t NumSuccs = SuccNodes[SrcBB].size();
8110eae32dcSDimitry Andric       for (size_t Idx = 0; Idx < NumSuccs; Idx++) {
81206c3fb27SDimitry Andric         size_t DstBB = SuccNodes[SrcBB][NumSuccs - Idx - 1];
81306c3fb27SDimitry Andric         ChainT *SrcChain = AllNodes[SrcBB].CurChain;
81406c3fb27SDimitry Andric         ChainT *DstChain = AllNodes[DstBB].CurChain;
8150eae32dcSDimitry Andric         if (SrcChain != DstChain && !DstChain->isEntry() &&
81606c3fb27SDimitry Andric             SrcChain->Nodes.back()->Index == SrcBB &&
81706c3fb27SDimitry Andric             DstChain->Nodes.front()->Index == DstBB &&
818bdd1243dSDimitry Andric             SrcChain->isCold() == DstChain->isCold()) {
81906c3fb27SDimitry Andric           mergeChains(SrcChain, DstChain, 0, MergeTypeT::X_Y);
8200eae32dcSDimitry Andric         }
8210eae32dcSDimitry Andric       }
8220eae32dcSDimitry Andric     }
8230eae32dcSDimitry Andric   }
8240eae32dcSDimitry Andric 
82506c3fb27SDimitry Andric   /// Compute the Ext-TSP score for a given node order and a list of jumps.
extTSPScore(const MergedNodesT & Nodes,const MergedJumpsT & Jumps) const826*5f757f3fSDimitry Andric   double extTSPScore(const MergedNodesT &Nodes,
827*5f757f3fSDimitry Andric                      const MergedJumpsT &Jumps) const {
8280eae32dcSDimitry Andric     uint64_t CurAddr = 0;
829*5f757f3fSDimitry Andric     Nodes.forEach([&](const NodeT *Node) {
83006c3fb27SDimitry Andric       Node->EstimatedAddr = CurAddr;
83106c3fb27SDimitry Andric       CurAddr += Node->Size;
8320eae32dcSDimitry Andric     });
8330eae32dcSDimitry Andric 
8340eae32dcSDimitry Andric     double Score = 0;
835*5f757f3fSDimitry Andric     Jumps.forEach([&](const JumpT *Jump) {
83606c3fb27SDimitry Andric       const NodeT *SrcBlock = Jump->Source;
83706c3fb27SDimitry Andric       const NodeT *DstBlock = Jump->Target;
8380eae32dcSDimitry Andric       Score += ::extTSPScore(SrcBlock->EstimatedAddr, SrcBlock->Size,
839bdd1243dSDimitry Andric                              DstBlock->EstimatedAddr, Jump->ExecutionCount,
840bdd1243dSDimitry Andric                              Jump->IsConditional);
841*5f757f3fSDimitry Andric     });
8420eae32dcSDimitry Andric     return Score;
8430eae32dcSDimitry Andric   }
8440eae32dcSDimitry Andric 
8450eae32dcSDimitry Andric   /// Compute the gain of merging two chains.
8460eae32dcSDimitry Andric   ///
8470eae32dcSDimitry Andric   /// The function considers all possible ways of merging two chains and
8480eae32dcSDimitry Andric   /// computes the one having the largest increase in ExtTSP objective. The
8490eae32dcSDimitry Andric   /// result is a pair with the first element being the gain and the second
8500eae32dcSDimitry Andric   /// element being the corresponding merging type.
getBestMergeGain(ChainT * ChainPred,ChainT * ChainSucc,ChainEdge * Edge) const85106c3fb27SDimitry Andric   MergeGainT getBestMergeGain(ChainT *ChainPred, ChainT *ChainSucc,
8520eae32dcSDimitry Andric                               ChainEdge *Edge) const {
853*5f757f3fSDimitry Andric     if (Edge->hasCachedMergeGain(ChainPred, ChainSucc))
8540eae32dcSDimitry Andric       return Edge->getCachedMergeGain(ChainPred, ChainSucc);
8550eae32dcSDimitry Andric 
856*5f757f3fSDimitry Andric     assert(!Edge->jumps().empty() && "trying to merge chains w/o jumps");
857*5f757f3fSDimitry Andric     // Precompute jumps between ChainPred and ChainSucc.
858bdd1243dSDimitry Andric     ChainEdge *EdgePP = ChainPred->getEdge(ChainPred);
859*5f757f3fSDimitry Andric     MergedJumpsT Jumps(&Edge->jumps(), EdgePP ? &EdgePP->jumps() : nullptr);
8600eae32dcSDimitry Andric 
861*5f757f3fSDimitry Andric     // This object holds the best chosen gain of merging two chains.
86206c3fb27SDimitry Andric     MergeGainT Gain = MergeGainT();
8630eae32dcSDimitry Andric 
8640eae32dcSDimitry Andric     /// Given a merge offset and a list of merge types, try to merge two chains
865*5f757f3fSDimitry Andric     /// and update Gain with a better alternative.
8660eae32dcSDimitry Andric     auto tryChainMerging = [&](size_t Offset,
86706c3fb27SDimitry Andric                                const std::vector<MergeTypeT> &MergeTypes) {
868*5f757f3fSDimitry Andric       // Skip merging corresponding to concatenation w/o splitting.
86906c3fb27SDimitry Andric       if (Offset == 0 || Offset == ChainPred->Nodes.size())
8700eae32dcSDimitry Andric         return;
871*5f757f3fSDimitry Andric       // Skip merging if it breaks Forced successors.
87206c3fb27SDimitry Andric       NodeT *Node = ChainPred->Nodes[Offset - 1];
87306c3fb27SDimitry Andric       if (Node->ForcedSucc != nullptr)
8740eae32dcSDimitry Andric         return;
8750eae32dcSDimitry Andric       // Apply the merge, compute the corresponding gain, and update the best
876*5f757f3fSDimitry Andric       // value, if the merge is beneficial.
87706c3fb27SDimitry Andric       for (const MergeTypeT &MergeType : MergeTypes) {
8780eae32dcSDimitry Andric         Gain.updateIfLessThan(
8790eae32dcSDimitry Andric             computeMergeGain(ChainPred, ChainSucc, Jumps, Offset, MergeType));
8800eae32dcSDimitry Andric       }
8810eae32dcSDimitry Andric     };
8820eae32dcSDimitry Andric 
883*5f757f3fSDimitry Andric     // Try to concatenate two chains w/o splitting.
8840eae32dcSDimitry Andric     Gain.updateIfLessThan(
88506c3fb27SDimitry Andric         computeMergeGain(ChainPred, ChainSucc, Jumps, 0, MergeTypeT::X_Y));
8860eae32dcSDimitry Andric 
887*5f757f3fSDimitry Andric     // Attach (a part of) ChainPred before the first node of ChainSucc.
88806c3fb27SDimitry Andric     for (JumpT *Jump : ChainSucc->Nodes.front()->InJumps) {
88906c3fb27SDimitry Andric       const NodeT *SrcBlock = Jump->Source;
8900eae32dcSDimitry Andric       if (SrcBlock->CurChain != ChainPred)
8910eae32dcSDimitry Andric         continue;
8920eae32dcSDimitry Andric       size_t Offset = SrcBlock->CurIndex + 1;
89306c3fb27SDimitry Andric       tryChainMerging(Offset, {MergeTypeT::X1_Y_X2, MergeTypeT::X2_X1_Y});
8940eae32dcSDimitry Andric     }
8950eae32dcSDimitry Andric 
896*5f757f3fSDimitry Andric     // Attach (a part of) ChainPred after the last node of ChainSucc.
89706c3fb27SDimitry Andric     for (JumpT *Jump : ChainSucc->Nodes.back()->OutJumps) {
898*5f757f3fSDimitry Andric       const NodeT *DstBlock = Jump->Target;
8990eae32dcSDimitry Andric       if (DstBlock->CurChain != ChainPred)
9000eae32dcSDimitry Andric         continue;
9010eae32dcSDimitry Andric       size_t Offset = DstBlock->CurIndex;
90206c3fb27SDimitry Andric       tryChainMerging(Offset, {MergeTypeT::X1_Y_X2, MergeTypeT::Y_X2_X1});
9030eae32dcSDimitry Andric     }
9040eae32dcSDimitry Andric 
905*5f757f3fSDimitry Andric     // Try to break ChainPred in various ways and concatenate with ChainSucc.
90606c3fb27SDimitry Andric     if (ChainPred->Nodes.size() <= ChainSplitThreshold) {
90706c3fb27SDimitry Andric       for (size_t Offset = 1; Offset < ChainPred->Nodes.size(); Offset++) {
908*5f757f3fSDimitry Andric         // Do not split the chain along a fall-through jump. One of the two
909*5f757f3fSDimitry Andric         // loops above may still "break" such a jump whenever it results in a
910*5f757f3fSDimitry Andric         // new fall-through.
911*5f757f3fSDimitry Andric         const NodeT *BB = ChainPred->Nodes[Offset - 1];
912*5f757f3fSDimitry Andric         const NodeT *BB2 = ChainPred->Nodes[Offset];
913*5f757f3fSDimitry Andric         if (BB->isSuccessor(BB2))
914*5f757f3fSDimitry Andric           continue;
915*5f757f3fSDimitry Andric 
916*5f757f3fSDimitry Andric         // In practice, applying X2_Y_X1 merging almost never provides benefits;
917*5f757f3fSDimitry Andric         // thus, we exclude it from consideration to reduce the search space.
91806c3fb27SDimitry Andric         tryChainMerging(Offset, {MergeTypeT::X1_Y_X2, MergeTypeT::Y_X2_X1,
91906c3fb27SDimitry Andric                                  MergeTypeT::X2_X1_Y});
9200eae32dcSDimitry Andric       }
9210eae32dcSDimitry Andric     }
922*5f757f3fSDimitry Andric 
9230eae32dcSDimitry Andric     Edge->setCachedMergeGain(ChainPred, ChainSucc, Gain);
9240eae32dcSDimitry Andric     return Gain;
9250eae32dcSDimitry Andric   }
9260eae32dcSDimitry Andric 
9270eae32dcSDimitry Andric   /// Compute the score gain of merging two chains, respecting a given
9280eae32dcSDimitry Andric   /// merge 'type' and 'offset'.
9290eae32dcSDimitry Andric   ///
9300eae32dcSDimitry Andric   /// The two chains are not modified in the method.
computeMergeGain(const ChainT * ChainPred,const ChainT * ChainSucc,const MergedJumpsT & Jumps,size_t MergeOffset,MergeTypeT MergeType) const93106c3fb27SDimitry Andric   MergeGainT computeMergeGain(const ChainT *ChainPred, const ChainT *ChainSucc,
932*5f757f3fSDimitry Andric                               const MergedJumpsT &Jumps, size_t MergeOffset,
933*5f757f3fSDimitry Andric                               MergeTypeT MergeType) const {
934*5f757f3fSDimitry Andric     MergedNodesT MergedNodes =
93506c3fb27SDimitry Andric         mergeNodes(ChainPred->Nodes, ChainSucc->Nodes, MergeOffset, MergeType);
9360eae32dcSDimitry Andric 
937*5f757f3fSDimitry Andric     // Do not allow a merge that does not preserve the original entry point.
9380eae32dcSDimitry Andric     if ((ChainPred->isEntry() || ChainSucc->isEntry()) &&
939*5f757f3fSDimitry Andric         !MergedNodes.getFirstNode()->isEntry())
94006c3fb27SDimitry Andric       return MergeGainT();
9410eae32dcSDimitry Andric 
942*5f757f3fSDimitry Andric     // The gain for the new chain.
943*5f757f3fSDimitry Andric     double NewScore = extTSPScore(MergedNodes, Jumps);
944*5f757f3fSDimitry Andric     double CurScore = ChainPred->Score;
945*5f757f3fSDimitry Andric     return MergeGainT(NewScore - CurScore, MergeOffset, MergeType);
9460eae32dcSDimitry Andric   }
9470eae32dcSDimitry Andric 
9480eae32dcSDimitry Andric   /// Merge chain From into chain Into, update the list of active chains,
9490eae32dcSDimitry Andric   /// adjacency information, and the corresponding cached values.
mergeChains(ChainT * Into,ChainT * From,size_t MergeOffset,MergeTypeT MergeType)95006c3fb27SDimitry Andric   void mergeChains(ChainT *Into, ChainT *From, size_t MergeOffset,
95106c3fb27SDimitry Andric                    MergeTypeT MergeType) {
9520eae32dcSDimitry Andric     assert(Into != From && "a chain cannot be merged with itself");
9530eae32dcSDimitry Andric 
954*5f757f3fSDimitry Andric     // Merge the nodes.
955*5f757f3fSDimitry Andric     MergedNodesT MergedNodes =
95606c3fb27SDimitry Andric         mergeNodes(Into->Nodes, From->Nodes, MergeOffset, MergeType);
95706c3fb27SDimitry Andric     Into->merge(From, MergedNodes.getNodes());
95806c3fb27SDimitry Andric 
959*5f757f3fSDimitry Andric     // Merge the edges.
9600eae32dcSDimitry Andric     Into->mergeEdges(From);
9610eae32dcSDimitry Andric     From->clear();
9620eae32dcSDimitry Andric 
963*5f757f3fSDimitry Andric     // Update cached ext-tsp score for the new chain.
964bdd1243dSDimitry Andric     ChainEdge *SelfEdge = Into->getEdge(Into);
9650eae32dcSDimitry Andric     if (SelfEdge != nullptr) {
966*5f757f3fSDimitry Andric       MergedNodes = MergedNodesT(Into->Nodes.begin(), Into->Nodes.end());
967*5f757f3fSDimitry Andric       MergedJumpsT MergedJumps(&SelfEdge->jumps());
968*5f757f3fSDimitry Andric       Into->Score = extTSPScore(MergedNodes, MergedJumps);
9690eae32dcSDimitry Andric     }
9700eae32dcSDimitry Andric 
971*5f757f3fSDimitry Andric     // Remove the chain from the list of active chains.
972*5f757f3fSDimitry Andric     llvm::erase(HotChains, From);
9730eae32dcSDimitry Andric 
974*5f757f3fSDimitry Andric     // Invalidate caches.
97506c3fb27SDimitry Andric     for (auto EdgeIt : Into->Edges)
97606c3fb27SDimitry Andric       EdgeIt.second->invalidateCache();
9770eae32dcSDimitry Andric   }
9780eae32dcSDimitry Andric 
97906c3fb27SDimitry Andric   /// Concatenate all chains into the final order.
concatChains()980*5f757f3fSDimitry Andric   std::vector<uint64_t> concatChains() {
981*5f757f3fSDimitry Andric     // Collect non-empty chains.
98206c3fb27SDimitry Andric     std::vector<const ChainT *> SortedChains;
98306c3fb27SDimitry Andric     for (ChainT &Chain : AllChains) {
984*5f757f3fSDimitry Andric       if (!Chain.Nodes.empty())
9850eae32dcSDimitry Andric         SortedChains.push_back(&Chain);
9860eae32dcSDimitry Andric     }
9870eae32dcSDimitry Andric 
988*5f757f3fSDimitry Andric     // Sorting chains by density in the decreasing order.
989*5f757f3fSDimitry Andric     std::sort(SortedChains.begin(), SortedChains.end(),
99006c3fb27SDimitry Andric               [&](const ChainT *L, const ChainT *R) {
991*5f757f3fSDimitry Andric                 // Place the entry point at the beginning of the order.
99206c3fb27SDimitry Andric                 if (L->isEntry() != R->isEntry())
99306c3fb27SDimitry Andric                   return L->isEntry();
9940eae32dcSDimitry Andric 
995*5f757f3fSDimitry Andric                 // Compare by density and break ties by chain identifiers.
996*5f757f3fSDimitry Andric                 return std::make_tuple(-L->density(), L->Id) <
997*5f757f3fSDimitry Andric                        std::make_tuple(-R->density(), R->Id);
9980eae32dcSDimitry Andric               });
9990eae32dcSDimitry Andric 
1000*5f757f3fSDimitry Andric     // Collect the nodes in the order specified by their chains.
1001*5f757f3fSDimitry Andric     std::vector<uint64_t> Order;
10020eae32dcSDimitry Andric     Order.reserve(NumNodes);
1003*5f757f3fSDimitry Andric     for (const ChainT *Chain : SortedChains)
1004*5f757f3fSDimitry Andric       for (NodeT *Node : Chain->Nodes)
100506c3fb27SDimitry Andric         Order.push_back(Node->Index);
1006*5f757f3fSDimitry Andric     return Order;
10070eae32dcSDimitry Andric   }
10080eae32dcSDimitry Andric 
10090eae32dcSDimitry Andric private:
10100eae32dcSDimitry Andric   /// The number of nodes in the graph.
10110eae32dcSDimitry Andric   const size_t NumNodes;
10120eae32dcSDimitry Andric 
10130eae32dcSDimitry Andric   /// Successors of each node.
10140eae32dcSDimitry Andric   std::vector<std::vector<uint64_t>> SuccNodes;
10150eae32dcSDimitry Andric 
10160eae32dcSDimitry Andric   /// Predecessors of each node.
10170eae32dcSDimitry Andric   std::vector<std::vector<uint64_t>> PredNodes;
10180eae32dcSDimitry Andric 
101906c3fb27SDimitry Andric   /// All nodes (basic blocks) in the graph.
102006c3fb27SDimitry Andric   std::vector<NodeT> AllNodes;
10210eae32dcSDimitry Andric 
102206c3fb27SDimitry Andric   /// All jumps between the nodes.
102306c3fb27SDimitry Andric   std::vector<JumpT> AllJumps;
10240eae32dcSDimitry Andric 
102506c3fb27SDimitry Andric   /// All chains of nodes.
102606c3fb27SDimitry Andric   std::vector<ChainT> AllChains;
10270eae32dcSDimitry Andric 
102806c3fb27SDimitry Andric   /// All edges between the chains.
10290eae32dcSDimitry Andric   std::vector<ChainEdge> AllEdges;
10300eae32dcSDimitry Andric 
10310eae32dcSDimitry Andric   /// Active chains. The vector gets updated at runtime when chains are merged.
103206c3fb27SDimitry Andric   std::vector<ChainT *> HotChains;
10330eae32dcSDimitry Andric };
10340eae32dcSDimitry Andric 
1035*5f757f3fSDimitry Andric /// The implementation of the Cache-Directed Sort (CDSort) algorithm for
1036*5f757f3fSDimitry Andric /// ordering functions represented by a call graph.
1037*5f757f3fSDimitry Andric class CDSortImpl {
1038*5f757f3fSDimitry Andric public:
CDSortImpl(const CDSortConfig & Config,ArrayRef<uint64_t> NodeSizes,ArrayRef<uint64_t> NodeCounts,ArrayRef<EdgeCount> EdgeCounts,ArrayRef<uint64_t> EdgeOffsets)1039*5f757f3fSDimitry Andric   CDSortImpl(const CDSortConfig &Config, ArrayRef<uint64_t> NodeSizes,
1040*5f757f3fSDimitry Andric              ArrayRef<uint64_t> NodeCounts, ArrayRef<EdgeCount> EdgeCounts,
1041*5f757f3fSDimitry Andric              ArrayRef<uint64_t> EdgeOffsets)
1042*5f757f3fSDimitry Andric       : Config(Config), NumNodes(NodeSizes.size()) {
1043*5f757f3fSDimitry Andric     initialize(NodeSizes, NodeCounts, EdgeCounts, EdgeOffsets);
1044*5f757f3fSDimitry Andric   }
1045*5f757f3fSDimitry Andric 
1046*5f757f3fSDimitry Andric   /// Run the algorithm and return an ordered set of function clusters.
run()1047*5f757f3fSDimitry Andric   std::vector<uint64_t> run() {
1048*5f757f3fSDimitry Andric     // Merge pairs of chains while improving the objective.
1049*5f757f3fSDimitry Andric     mergeChainPairs();
1050*5f757f3fSDimitry Andric 
1051*5f757f3fSDimitry Andric     // Collect nodes from all the chains.
1052*5f757f3fSDimitry Andric     return concatChains();
1053*5f757f3fSDimitry Andric   }
1054*5f757f3fSDimitry Andric 
1055*5f757f3fSDimitry Andric private:
1056*5f757f3fSDimitry Andric   /// Initialize the algorithm's data structures.
initialize(const ArrayRef<uint64_t> & NodeSizes,const ArrayRef<uint64_t> & NodeCounts,const ArrayRef<EdgeCount> & EdgeCounts,const ArrayRef<uint64_t> & EdgeOffsets)1057*5f757f3fSDimitry Andric   void initialize(const ArrayRef<uint64_t> &NodeSizes,
1058*5f757f3fSDimitry Andric                   const ArrayRef<uint64_t> &NodeCounts,
1059*5f757f3fSDimitry Andric                   const ArrayRef<EdgeCount> &EdgeCounts,
1060*5f757f3fSDimitry Andric                   const ArrayRef<uint64_t> &EdgeOffsets) {
1061*5f757f3fSDimitry Andric     // Initialize nodes.
1062*5f757f3fSDimitry Andric     AllNodes.reserve(NumNodes);
1063*5f757f3fSDimitry Andric     for (uint64_t Node = 0; Node < NumNodes; Node++) {
1064*5f757f3fSDimitry Andric       uint64_t Size = std::max<uint64_t>(NodeSizes[Node], 1ULL);
1065*5f757f3fSDimitry Andric       uint64_t ExecutionCount = NodeCounts[Node];
1066*5f757f3fSDimitry Andric       AllNodes.emplace_back(Node, Size, ExecutionCount);
1067*5f757f3fSDimitry Andric       TotalSamples += ExecutionCount;
1068*5f757f3fSDimitry Andric       if (ExecutionCount > 0)
1069*5f757f3fSDimitry Andric         TotalSize += Size;
1070*5f757f3fSDimitry Andric     }
1071*5f757f3fSDimitry Andric 
1072*5f757f3fSDimitry Andric     // Initialize jumps between the nodes.
1073*5f757f3fSDimitry Andric     SuccNodes.resize(NumNodes);
1074*5f757f3fSDimitry Andric     PredNodes.resize(NumNodes);
1075*5f757f3fSDimitry Andric     AllJumps.reserve(EdgeCounts.size());
1076*5f757f3fSDimitry Andric     for (size_t I = 0; I < EdgeCounts.size(); I++) {
1077*5f757f3fSDimitry Andric       auto [Pred, Succ, Count] = EdgeCounts[I];
1078*5f757f3fSDimitry Andric       // Ignore recursive calls.
1079*5f757f3fSDimitry Andric       if (Pred == Succ)
1080*5f757f3fSDimitry Andric         continue;
1081*5f757f3fSDimitry Andric 
1082*5f757f3fSDimitry Andric       SuccNodes[Pred].push_back(Succ);
1083*5f757f3fSDimitry Andric       PredNodes[Succ].push_back(Pred);
1084*5f757f3fSDimitry Andric       if (Count > 0) {
1085*5f757f3fSDimitry Andric         NodeT &PredNode = AllNodes[Pred];
1086*5f757f3fSDimitry Andric         NodeT &SuccNode = AllNodes[Succ];
1087*5f757f3fSDimitry Andric         AllJumps.emplace_back(&PredNode, &SuccNode, Count);
1088*5f757f3fSDimitry Andric         AllJumps.back().Offset = EdgeOffsets[I];
1089*5f757f3fSDimitry Andric         SuccNode.InJumps.push_back(&AllJumps.back());
1090*5f757f3fSDimitry Andric         PredNode.OutJumps.push_back(&AllJumps.back());
1091*5f757f3fSDimitry Andric         // Adjust execution counts.
1092*5f757f3fSDimitry Andric         PredNode.ExecutionCount = std::max(PredNode.ExecutionCount, Count);
1093*5f757f3fSDimitry Andric         SuccNode.ExecutionCount = std::max(SuccNode.ExecutionCount, Count);
1094*5f757f3fSDimitry Andric       }
1095*5f757f3fSDimitry Andric     }
1096*5f757f3fSDimitry Andric 
1097*5f757f3fSDimitry Andric     // Initialize chains.
1098*5f757f3fSDimitry Andric     AllChains.reserve(NumNodes);
1099*5f757f3fSDimitry Andric     for (NodeT &Node : AllNodes) {
1100*5f757f3fSDimitry Andric       // Adjust execution counts.
1101*5f757f3fSDimitry Andric       Node.ExecutionCount = std::max(Node.ExecutionCount, Node.inCount());
1102*5f757f3fSDimitry Andric       Node.ExecutionCount = std::max(Node.ExecutionCount, Node.outCount());
1103*5f757f3fSDimitry Andric       // Create chain.
1104*5f757f3fSDimitry Andric       AllChains.emplace_back(Node.Index, &Node);
1105*5f757f3fSDimitry Andric       Node.CurChain = &AllChains.back();
1106*5f757f3fSDimitry Andric     }
1107*5f757f3fSDimitry Andric 
1108*5f757f3fSDimitry Andric     // Initialize chain edges.
1109*5f757f3fSDimitry Andric     AllEdges.reserve(AllJumps.size());
1110*5f757f3fSDimitry Andric     for (NodeT &PredNode : AllNodes) {
1111*5f757f3fSDimitry Andric       for (JumpT *Jump : PredNode.OutJumps) {
1112*5f757f3fSDimitry Andric         NodeT *SuccNode = Jump->Target;
1113*5f757f3fSDimitry Andric         ChainEdge *CurEdge = PredNode.CurChain->getEdge(SuccNode->CurChain);
1114*5f757f3fSDimitry Andric         // This edge is already present in the graph.
1115*5f757f3fSDimitry Andric         if (CurEdge != nullptr) {
1116*5f757f3fSDimitry Andric           assert(SuccNode->CurChain->getEdge(PredNode.CurChain) != nullptr);
1117*5f757f3fSDimitry Andric           CurEdge->appendJump(Jump);
1118*5f757f3fSDimitry Andric           continue;
1119*5f757f3fSDimitry Andric         }
1120*5f757f3fSDimitry Andric         // This is a new edge.
1121*5f757f3fSDimitry Andric         AllEdges.emplace_back(Jump);
1122*5f757f3fSDimitry Andric         PredNode.CurChain->addEdge(SuccNode->CurChain, &AllEdges.back());
1123*5f757f3fSDimitry Andric         SuccNode->CurChain->addEdge(PredNode.CurChain, &AllEdges.back());
1124*5f757f3fSDimitry Andric       }
1125*5f757f3fSDimitry Andric     }
1126*5f757f3fSDimitry Andric   }
1127*5f757f3fSDimitry Andric 
1128*5f757f3fSDimitry Andric   /// Merge pairs of chains while there is an improvement in the objective.
mergeChainPairs()1129*5f757f3fSDimitry Andric   void mergeChainPairs() {
1130*5f757f3fSDimitry Andric     // Create a priority queue containing all edges ordered by the merge gain.
1131*5f757f3fSDimitry Andric     auto GainComparator = [](ChainEdge *L, ChainEdge *R) {
1132*5f757f3fSDimitry Andric       return std::make_tuple(-L->gain(), L->srcChain()->Id, L->dstChain()->Id) <
1133*5f757f3fSDimitry Andric              std::make_tuple(-R->gain(), R->srcChain()->Id, R->dstChain()->Id);
1134*5f757f3fSDimitry Andric     };
1135*5f757f3fSDimitry Andric     std::set<ChainEdge *, decltype(GainComparator)> Queue(GainComparator);
1136*5f757f3fSDimitry Andric 
1137*5f757f3fSDimitry Andric     // Insert the edges into the queue.
1138*5f757f3fSDimitry Andric     [[maybe_unused]] size_t NumActiveChains = 0;
1139*5f757f3fSDimitry Andric     for (NodeT &Node : AllNodes) {
1140*5f757f3fSDimitry Andric       if (Node.ExecutionCount == 0)
1141*5f757f3fSDimitry Andric         continue;
1142*5f757f3fSDimitry Andric       ++NumActiveChains;
1143*5f757f3fSDimitry Andric       for (const auto &[_, Edge] : Node.CurChain->Edges) {
1144*5f757f3fSDimitry Andric         // Ignore self-edges.
1145*5f757f3fSDimitry Andric         if (Edge->isSelfEdge())
1146*5f757f3fSDimitry Andric           continue;
1147*5f757f3fSDimitry Andric         // Ignore already processed edges.
1148*5f757f3fSDimitry Andric         if (Edge->gain() != -1.0)
1149*5f757f3fSDimitry Andric           continue;
1150*5f757f3fSDimitry Andric 
1151*5f757f3fSDimitry Andric         // Compute the gain of merging the two chains.
1152*5f757f3fSDimitry Andric         MergeGainT Gain = getBestMergeGain(Edge);
1153*5f757f3fSDimitry Andric         Edge->setMergeGain(Gain);
1154*5f757f3fSDimitry Andric 
1155*5f757f3fSDimitry Andric         if (Edge->gain() > EPS)
1156*5f757f3fSDimitry Andric           Queue.insert(Edge);
1157*5f757f3fSDimitry Andric       }
1158*5f757f3fSDimitry Andric     }
1159*5f757f3fSDimitry Andric 
1160*5f757f3fSDimitry Andric     // Merge the chains while the gain of merging is positive.
1161*5f757f3fSDimitry Andric     while (!Queue.empty()) {
1162*5f757f3fSDimitry Andric       // Extract the best (top) edge for merging.
1163*5f757f3fSDimitry Andric       ChainEdge *BestEdge = *Queue.begin();
1164*5f757f3fSDimitry Andric       Queue.erase(Queue.begin());
1165*5f757f3fSDimitry Andric       ChainT *BestSrcChain = BestEdge->srcChain();
1166*5f757f3fSDimitry Andric       ChainT *BestDstChain = BestEdge->dstChain();
1167*5f757f3fSDimitry Andric 
1168*5f757f3fSDimitry Andric       // Remove outdated edges from the queue.
1169*5f757f3fSDimitry Andric       for (const auto &[_, ChainEdge] : BestSrcChain->Edges)
1170*5f757f3fSDimitry Andric         Queue.erase(ChainEdge);
1171*5f757f3fSDimitry Andric       for (const auto &[_, ChainEdge] : BestDstChain->Edges)
1172*5f757f3fSDimitry Andric         Queue.erase(ChainEdge);
1173*5f757f3fSDimitry Andric 
1174*5f757f3fSDimitry Andric       // Merge the best pair of chains.
1175*5f757f3fSDimitry Andric       MergeGainT BestGain = BestEdge->getMergeGain();
1176*5f757f3fSDimitry Andric       mergeChains(BestSrcChain, BestDstChain, BestGain.mergeOffset(),
1177*5f757f3fSDimitry Andric                   BestGain.mergeType());
1178*5f757f3fSDimitry Andric       --NumActiveChains;
1179*5f757f3fSDimitry Andric 
1180*5f757f3fSDimitry Andric       // Insert newly created edges into the queue.
1181*5f757f3fSDimitry Andric       for (const auto &[_, Edge] : BestSrcChain->Edges) {
1182*5f757f3fSDimitry Andric         // Ignore loop edges.
1183*5f757f3fSDimitry Andric         if (Edge->isSelfEdge())
1184*5f757f3fSDimitry Andric           continue;
1185*5f757f3fSDimitry Andric         if (Edge->srcChain()->numBlocks() + Edge->dstChain()->numBlocks() >
1186*5f757f3fSDimitry Andric             Config.MaxChainSize)
1187*5f757f3fSDimitry Andric           continue;
1188*5f757f3fSDimitry Andric 
1189*5f757f3fSDimitry Andric         // Compute the gain of merging the two chains.
1190*5f757f3fSDimitry Andric         MergeGainT Gain = getBestMergeGain(Edge);
1191*5f757f3fSDimitry Andric         Edge->setMergeGain(Gain);
1192*5f757f3fSDimitry Andric 
1193*5f757f3fSDimitry Andric         if (Edge->gain() > EPS)
1194*5f757f3fSDimitry Andric           Queue.insert(Edge);
1195*5f757f3fSDimitry Andric       }
1196*5f757f3fSDimitry Andric     }
1197*5f757f3fSDimitry Andric 
1198*5f757f3fSDimitry Andric     LLVM_DEBUG(dbgs() << "Cache-directed function sorting reduced the number"
1199*5f757f3fSDimitry Andric                       << " of chains from " << NumNodes << " to "
1200*5f757f3fSDimitry Andric                       << NumActiveChains << "\n");
1201*5f757f3fSDimitry Andric   }
1202*5f757f3fSDimitry Andric 
1203*5f757f3fSDimitry Andric   /// Compute the gain of merging two chains.
1204*5f757f3fSDimitry Andric   ///
1205*5f757f3fSDimitry Andric   /// The function considers all possible ways of merging two chains and
1206*5f757f3fSDimitry Andric   /// computes the one having the largest increase in ExtTSP objective. The
1207*5f757f3fSDimitry Andric   /// result is a pair with the first element being the gain and the second
1208*5f757f3fSDimitry Andric   /// element being the corresponding merging type.
getBestMergeGain(ChainEdge * Edge) const1209*5f757f3fSDimitry Andric   MergeGainT getBestMergeGain(ChainEdge *Edge) const {
1210*5f757f3fSDimitry Andric     assert(!Edge->jumps().empty() && "trying to merge chains w/o jumps");
1211*5f757f3fSDimitry Andric     // Precompute jumps between ChainPred and ChainSucc.
1212*5f757f3fSDimitry Andric     MergedJumpsT Jumps(&Edge->jumps());
1213*5f757f3fSDimitry Andric     ChainT *SrcChain = Edge->srcChain();
1214*5f757f3fSDimitry Andric     ChainT *DstChain = Edge->dstChain();
1215*5f757f3fSDimitry Andric 
1216*5f757f3fSDimitry Andric     // This object holds the best currently chosen gain of merging two chains.
1217*5f757f3fSDimitry Andric     MergeGainT Gain = MergeGainT();
1218*5f757f3fSDimitry Andric 
1219*5f757f3fSDimitry Andric     /// Given a list of merge types, try to merge two chains and update Gain
1220*5f757f3fSDimitry Andric     /// with a better alternative.
1221*5f757f3fSDimitry Andric     auto tryChainMerging = [&](const std::vector<MergeTypeT> &MergeTypes) {
1222*5f757f3fSDimitry Andric       // Apply the merge, compute the corresponding gain, and update the best
1223*5f757f3fSDimitry Andric       // value, if the merge is beneficial.
1224*5f757f3fSDimitry Andric       for (const MergeTypeT &MergeType : MergeTypes) {
1225*5f757f3fSDimitry Andric         MergeGainT NewGain =
1226*5f757f3fSDimitry Andric             computeMergeGain(SrcChain, DstChain, Jumps, MergeType);
1227*5f757f3fSDimitry Andric 
1228*5f757f3fSDimitry Andric         // When forward and backward gains are the same, prioritize merging that
1229*5f757f3fSDimitry Andric         // preserves the original order of the functions in the binary.
1230*5f757f3fSDimitry Andric         if (std::abs(Gain.score() - NewGain.score()) < EPS) {
1231*5f757f3fSDimitry Andric           if ((MergeType == MergeTypeT::X_Y && SrcChain->Id < DstChain->Id) ||
1232*5f757f3fSDimitry Andric               (MergeType == MergeTypeT::Y_X && SrcChain->Id > DstChain->Id)) {
1233*5f757f3fSDimitry Andric             Gain = NewGain;
1234*5f757f3fSDimitry Andric           }
1235*5f757f3fSDimitry Andric         } else if (NewGain.score() > Gain.score() + EPS) {
1236*5f757f3fSDimitry Andric           Gain = NewGain;
1237*5f757f3fSDimitry Andric         }
1238*5f757f3fSDimitry Andric       }
1239*5f757f3fSDimitry Andric     };
1240*5f757f3fSDimitry Andric 
1241*5f757f3fSDimitry Andric     // Try to concatenate two chains w/o splitting.
1242*5f757f3fSDimitry Andric     tryChainMerging({MergeTypeT::X_Y, MergeTypeT::Y_X});
1243*5f757f3fSDimitry Andric 
1244*5f757f3fSDimitry Andric     return Gain;
1245*5f757f3fSDimitry Andric   }
1246*5f757f3fSDimitry Andric 
1247*5f757f3fSDimitry Andric   /// Compute the score gain of merging two chains, respecting a given type.
1248*5f757f3fSDimitry Andric   ///
1249*5f757f3fSDimitry Andric   /// The two chains are not modified in the method.
computeMergeGain(ChainT * ChainPred,ChainT * ChainSucc,const MergedJumpsT & Jumps,MergeTypeT MergeType) const1250*5f757f3fSDimitry Andric   MergeGainT computeMergeGain(ChainT *ChainPred, ChainT *ChainSucc,
1251*5f757f3fSDimitry Andric                               const MergedJumpsT &Jumps,
1252*5f757f3fSDimitry Andric                               MergeTypeT MergeType) const {
1253*5f757f3fSDimitry Andric     // This doesn't depend on the ordering of the nodes
1254*5f757f3fSDimitry Andric     double FreqGain = freqBasedLocalityGain(ChainPred, ChainSucc);
1255*5f757f3fSDimitry Andric 
1256*5f757f3fSDimitry Andric     // Merge offset is always 0, as the chains are not split.
1257*5f757f3fSDimitry Andric     size_t MergeOffset = 0;
1258*5f757f3fSDimitry Andric     auto MergedBlocks =
1259*5f757f3fSDimitry Andric         mergeNodes(ChainPred->Nodes, ChainSucc->Nodes, MergeOffset, MergeType);
1260*5f757f3fSDimitry Andric     double DistGain = distBasedLocalityGain(MergedBlocks, Jumps);
1261*5f757f3fSDimitry Andric 
1262*5f757f3fSDimitry Andric     double GainScore = DistGain + Config.FrequencyScale * FreqGain;
1263*5f757f3fSDimitry Andric     // Scale the result to increase the importance of merging short chains.
1264*5f757f3fSDimitry Andric     if (GainScore >= 0.0)
1265*5f757f3fSDimitry Andric       GainScore /= std::min(ChainPred->Size, ChainSucc->Size);
1266*5f757f3fSDimitry Andric 
1267*5f757f3fSDimitry Andric     return MergeGainT(GainScore, MergeOffset, MergeType);
1268*5f757f3fSDimitry Andric   }
1269*5f757f3fSDimitry Andric 
1270*5f757f3fSDimitry Andric   /// Compute the change of the frequency locality after merging the chains.
freqBasedLocalityGain(ChainT * ChainPred,ChainT * ChainSucc) const1271*5f757f3fSDimitry Andric   double freqBasedLocalityGain(ChainT *ChainPred, ChainT *ChainSucc) const {
1272*5f757f3fSDimitry Andric     auto missProbability = [&](double ChainDensity) {
1273*5f757f3fSDimitry Andric       double PageSamples = ChainDensity * Config.CacheSize;
1274*5f757f3fSDimitry Andric       if (PageSamples >= TotalSamples)
1275*5f757f3fSDimitry Andric         return 0.0;
1276*5f757f3fSDimitry Andric       double P = PageSamples / TotalSamples;
1277*5f757f3fSDimitry Andric       return pow(1.0 - P, static_cast<double>(Config.CacheEntries));
1278*5f757f3fSDimitry Andric     };
1279*5f757f3fSDimitry Andric 
1280*5f757f3fSDimitry Andric     // Cache misses on the chains before merging.
1281*5f757f3fSDimitry Andric     double CurScore =
1282*5f757f3fSDimitry Andric         ChainPred->ExecutionCount * missProbability(ChainPred->density()) +
1283*5f757f3fSDimitry Andric         ChainSucc->ExecutionCount * missProbability(ChainSucc->density());
1284*5f757f3fSDimitry Andric 
1285*5f757f3fSDimitry Andric     // Cache misses on the merged chain
1286*5f757f3fSDimitry Andric     double MergedCounts = ChainPred->ExecutionCount + ChainSucc->ExecutionCount;
1287*5f757f3fSDimitry Andric     double MergedSize = ChainPred->Size + ChainSucc->Size;
1288*5f757f3fSDimitry Andric     double MergedDensity = static_cast<double>(MergedCounts) / MergedSize;
1289*5f757f3fSDimitry Andric     double NewScore = MergedCounts * missProbability(MergedDensity);
1290*5f757f3fSDimitry Andric 
1291*5f757f3fSDimitry Andric     return CurScore - NewScore;
1292*5f757f3fSDimitry Andric   }
1293*5f757f3fSDimitry Andric 
1294*5f757f3fSDimitry Andric   /// Compute the distance locality for a jump / call.
distScore(uint64_t SrcAddr,uint64_t DstAddr,uint64_t Count) const1295*5f757f3fSDimitry Andric   double distScore(uint64_t SrcAddr, uint64_t DstAddr, uint64_t Count) const {
1296*5f757f3fSDimitry Andric     uint64_t Dist = SrcAddr <= DstAddr ? DstAddr - SrcAddr : SrcAddr - DstAddr;
1297*5f757f3fSDimitry Andric     double D = Dist == 0 ? 0.1 : static_cast<double>(Dist);
1298*5f757f3fSDimitry Andric     return static_cast<double>(Count) * std::pow(D, -Config.DistancePower);
1299*5f757f3fSDimitry Andric   }
1300*5f757f3fSDimitry Andric 
1301*5f757f3fSDimitry Andric   /// Compute the change of the distance locality after merging the chains.
distBasedLocalityGain(const MergedNodesT & Nodes,const MergedJumpsT & Jumps) const1302*5f757f3fSDimitry Andric   double distBasedLocalityGain(const MergedNodesT &Nodes,
1303*5f757f3fSDimitry Andric                                const MergedJumpsT &Jumps) const {
1304*5f757f3fSDimitry Andric     uint64_t CurAddr = 0;
1305*5f757f3fSDimitry Andric     Nodes.forEach([&](const NodeT *Node) {
1306*5f757f3fSDimitry Andric       Node->EstimatedAddr = CurAddr;
1307*5f757f3fSDimitry Andric       CurAddr += Node->Size;
1308*5f757f3fSDimitry Andric     });
1309*5f757f3fSDimitry Andric 
1310*5f757f3fSDimitry Andric     double CurScore = 0;
1311*5f757f3fSDimitry Andric     double NewScore = 0;
1312*5f757f3fSDimitry Andric     Jumps.forEach([&](const JumpT *Jump) {
1313*5f757f3fSDimitry Andric       uint64_t SrcAddr = Jump->Source->EstimatedAddr + Jump->Offset;
1314*5f757f3fSDimitry Andric       uint64_t DstAddr = Jump->Target->EstimatedAddr;
1315*5f757f3fSDimitry Andric       NewScore += distScore(SrcAddr, DstAddr, Jump->ExecutionCount);
1316*5f757f3fSDimitry Andric       CurScore += distScore(0, TotalSize, Jump->ExecutionCount);
1317*5f757f3fSDimitry Andric     });
1318*5f757f3fSDimitry Andric     return NewScore - CurScore;
1319*5f757f3fSDimitry Andric   }
1320*5f757f3fSDimitry Andric 
1321*5f757f3fSDimitry Andric   /// Merge chain From into chain Into, update the list of active chains,
1322*5f757f3fSDimitry Andric   /// adjacency information, and the corresponding cached values.
mergeChains(ChainT * Into,ChainT * From,size_t MergeOffset,MergeTypeT MergeType)1323*5f757f3fSDimitry Andric   void mergeChains(ChainT *Into, ChainT *From, size_t MergeOffset,
1324*5f757f3fSDimitry Andric                    MergeTypeT MergeType) {
1325*5f757f3fSDimitry Andric     assert(Into != From && "a chain cannot be merged with itself");
1326*5f757f3fSDimitry Andric 
1327*5f757f3fSDimitry Andric     // Merge the nodes.
1328*5f757f3fSDimitry Andric     MergedNodesT MergedNodes =
1329*5f757f3fSDimitry Andric         mergeNodes(Into->Nodes, From->Nodes, MergeOffset, MergeType);
1330*5f757f3fSDimitry Andric     Into->merge(From, MergedNodes.getNodes());
1331*5f757f3fSDimitry Andric 
1332*5f757f3fSDimitry Andric     // Merge the edges.
1333*5f757f3fSDimitry Andric     Into->mergeEdges(From);
1334*5f757f3fSDimitry Andric     From->clear();
1335*5f757f3fSDimitry Andric   }
1336*5f757f3fSDimitry Andric 
1337*5f757f3fSDimitry Andric   /// Concatenate all chains into the final order.
concatChains()1338*5f757f3fSDimitry Andric   std::vector<uint64_t> concatChains() {
1339*5f757f3fSDimitry Andric     // Collect chains and calculate density stats for their sorting.
1340*5f757f3fSDimitry Andric     std::vector<const ChainT *> SortedChains;
1341*5f757f3fSDimitry Andric     DenseMap<const ChainT *, double> ChainDensity;
1342*5f757f3fSDimitry Andric     for (ChainT &Chain : AllChains) {
1343*5f757f3fSDimitry Andric       if (!Chain.Nodes.empty()) {
1344*5f757f3fSDimitry Andric         SortedChains.push_back(&Chain);
1345*5f757f3fSDimitry Andric         // Using doubles to avoid overflow of ExecutionCounts.
1346*5f757f3fSDimitry Andric         double Size = 0;
1347*5f757f3fSDimitry Andric         double ExecutionCount = 0;
1348*5f757f3fSDimitry Andric         for (NodeT *Node : Chain.Nodes) {
1349*5f757f3fSDimitry Andric           Size += static_cast<double>(Node->Size);
1350*5f757f3fSDimitry Andric           ExecutionCount += static_cast<double>(Node->ExecutionCount);
1351*5f757f3fSDimitry Andric         }
1352*5f757f3fSDimitry Andric         assert(Size > 0 && "a chain of zero size");
1353*5f757f3fSDimitry Andric         ChainDensity[&Chain] = ExecutionCount / Size;
1354*5f757f3fSDimitry Andric       }
1355*5f757f3fSDimitry Andric     }
1356*5f757f3fSDimitry Andric 
1357*5f757f3fSDimitry Andric     // Sort chains by density in the decreasing order.
1358*5f757f3fSDimitry Andric     std::sort(SortedChains.begin(), SortedChains.end(),
1359*5f757f3fSDimitry Andric               [&](const ChainT *L, const ChainT *R) {
1360*5f757f3fSDimitry Andric                 const double DL = ChainDensity[L];
1361*5f757f3fSDimitry Andric                 const double DR = ChainDensity[R];
1362*5f757f3fSDimitry Andric                 // Compare by density and break ties by chain identifiers.
1363*5f757f3fSDimitry Andric                 return std::make_tuple(-DL, L->Id) <
1364*5f757f3fSDimitry Andric                        std::make_tuple(-DR, R->Id);
1365*5f757f3fSDimitry Andric               });
1366*5f757f3fSDimitry Andric 
1367*5f757f3fSDimitry Andric     // Collect the nodes in the order specified by their chains.
1368*5f757f3fSDimitry Andric     std::vector<uint64_t> Order;
1369*5f757f3fSDimitry Andric     Order.reserve(NumNodes);
1370*5f757f3fSDimitry Andric     for (const ChainT *Chain : SortedChains)
1371*5f757f3fSDimitry Andric       for (NodeT *Node : Chain->Nodes)
1372*5f757f3fSDimitry Andric         Order.push_back(Node->Index);
1373*5f757f3fSDimitry Andric     return Order;
1374*5f757f3fSDimitry Andric   }
1375*5f757f3fSDimitry Andric 
1376*5f757f3fSDimitry Andric private:
1377*5f757f3fSDimitry Andric   /// Config for the algorithm.
1378*5f757f3fSDimitry Andric   const CDSortConfig Config;
1379*5f757f3fSDimitry Andric 
1380*5f757f3fSDimitry Andric   /// The number of nodes in the graph.
1381*5f757f3fSDimitry Andric   const size_t NumNodes;
1382*5f757f3fSDimitry Andric 
1383*5f757f3fSDimitry Andric   /// Successors of each node.
1384*5f757f3fSDimitry Andric   std::vector<std::vector<uint64_t>> SuccNodes;
1385*5f757f3fSDimitry Andric 
1386*5f757f3fSDimitry Andric   /// Predecessors of each node.
1387*5f757f3fSDimitry Andric   std::vector<std::vector<uint64_t>> PredNodes;
1388*5f757f3fSDimitry Andric 
1389*5f757f3fSDimitry Andric   /// All nodes (functions) in the graph.
1390*5f757f3fSDimitry Andric   std::vector<NodeT> AllNodes;
1391*5f757f3fSDimitry Andric 
1392*5f757f3fSDimitry Andric   /// All jumps (function calls) between the nodes.
1393*5f757f3fSDimitry Andric   std::vector<JumpT> AllJumps;
1394*5f757f3fSDimitry Andric 
1395*5f757f3fSDimitry Andric   /// All chains of nodes.
1396*5f757f3fSDimitry Andric   std::vector<ChainT> AllChains;
1397*5f757f3fSDimitry Andric 
1398*5f757f3fSDimitry Andric   /// All edges between the chains.
1399*5f757f3fSDimitry Andric   std::vector<ChainEdge> AllEdges;
1400*5f757f3fSDimitry Andric 
1401*5f757f3fSDimitry Andric   /// The total number of samples in the graph.
1402*5f757f3fSDimitry Andric   uint64_t TotalSamples{0};
1403*5f757f3fSDimitry Andric 
1404*5f757f3fSDimitry Andric   /// The total size of the nodes in the graph.
1405*5f757f3fSDimitry Andric   uint64_t TotalSize{0};
1406*5f757f3fSDimitry Andric };
1407*5f757f3fSDimitry Andric 
14080eae32dcSDimitry Andric } // end of anonymous namespace
14090eae32dcSDimitry Andric 
141006c3fb27SDimitry Andric std::vector<uint64_t>
computeExtTspLayout(ArrayRef<uint64_t> NodeSizes,ArrayRef<uint64_t> NodeCounts,ArrayRef<EdgeCount> EdgeCounts)1411*5f757f3fSDimitry Andric codelayout::computeExtTspLayout(ArrayRef<uint64_t> NodeSizes,
1412*5f757f3fSDimitry Andric                                 ArrayRef<uint64_t> NodeCounts,
1413*5f757f3fSDimitry Andric                                 ArrayRef<EdgeCount> EdgeCounts) {
1414*5f757f3fSDimitry Andric   // Verify correctness of the input data.
14150eae32dcSDimitry Andric   assert(NodeCounts.size() == NodeSizes.size() && "Incorrect input");
141606c3fb27SDimitry Andric   assert(NodeSizes.size() > 2 && "Incorrect input");
14170eae32dcSDimitry Andric 
1418*5f757f3fSDimitry Andric   // Apply the reordering algorithm.
141906c3fb27SDimitry Andric   ExtTSPImpl Alg(NodeSizes, NodeCounts, EdgeCounts);
1420*5f757f3fSDimitry Andric   std::vector<uint64_t> Result = Alg.run();
14210eae32dcSDimitry Andric 
1422*5f757f3fSDimitry Andric   // Verify correctness of the output.
14230eae32dcSDimitry Andric   assert(Result.front() == 0 && "Original entry point is not preserved");
142406c3fb27SDimitry Andric   assert(Result.size() == NodeSizes.size() && "Incorrect size of layout");
14250eae32dcSDimitry Andric   return Result;
14260eae32dcSDimitry Andric }
14270eae32dcSDimitry Andric 
calcExtTspScore(ArrayRef<uint64_t> Order,ArrayRef<uint64_t> NodeSizes,ArrayRef<uint64_t> NodeCounts,ArrayRef<EdgeCount> EdgeCounts)1428*5f757f3fSDimitry Andric double codelayout::calcExtTspScore(ArrayRef<uint64_t> Order,
1429*5f757f3fSDimitry Andric                                    ArrayRef<uint64_t> NodeSizes,
1430*5f757f3fSDimitry Andric                                    ArrayRef<uint64_t> NodeCounts,
1431*5f757f3fSDimitry Andric                                    ArrayRef<EdgeCount> EdgeCounts) {
1432*5f757f3fSDimitry Andric   // Estimate addresses of the blocks in memory.
1433bdd1243dSDimitry Andric   std::vector<uint64_t> Addr(NodeSizes.size(), 0);
14340eae32dcSDimitry Andric   for (size_t Idx = 1; Idx < Order.size(); Idx++) {
14350eae32dcSDimitry Andric     Addr[Order[Idx]] = Addr[Order[Idx - 1]] + NodeSizes[Order[Idx - 1]];
14360eae32dcSDimitry Andric   }
1437bdd1243dSDimitry Andric   std::vector<uint64_t> OutDegree(NodeSizes.size(), 0);
1438*5f757f3fSDimitry Andric   for (auto Edge : EdgeCounts)
1439*5f757f3fSDimitry Andric     ++OutDegree[Edge.src];
14400eae32dcSDimitry Andric 
1441*5f757f3fSDimitry Andric   // Increase the score for each jump.
14420eae32dcSDimitry Andric   double Score = 0;
1443*5f757f3fSDimitry Andric   for (auto Edge : EdgeCounts) {
1444*5f757f3fSDimitry Andric     bool IsConditional = OutDegree[Edge.src] > 1;
1445*5f757f3fSDimitry Andric     Score += ::extTSPScore(Addr[Edge.src], NodeSizes[Edge.src], Addr[Edge.dst],
1446*5f757f3fSDimitry Andric                            Edge.count, IsConditional);
14470eae32dcSDimitry Andric   }
14480eae32dcSDimitry Andric   return Score;
14490eae32dcSDimitry Andric }
14500eae32dcSDimitry Andric 
calcExtTspScore(ArrayRef<uint64_t> NodeSizes,ArrayRef<uint64_t> NodeCounts,ArrayRef<EdgeCount> EdgeCounts)1451*5f757f3fSDimitry Andric double codelayout::calcExtTspScore(ArrayRef<uint64_t> NodeSizes,
1452*5f757f3fSDimitry Andric                                    ArrayRef<uint64_t> NodeCounts,
1453*5f757f3fSDimitry Andric                                    ArrayRef<EdgeCount> EdgeCounts) {
1454bdd1243dSDimitry Andric   std::vector<uint64_t> Order(NodeSizes.size());
14550eae32dcSDimitry Andric   for (size_t Idx = 0; Idx < NodeSizes.size(); Idx++) {
14560eae32dcSDimitry Andric     Order[Idx] = Idx;
14570eae32dcSDimitry Andric   }
14580eae32dcSDimitry Andric   return calcExtTspScore(Order, NodeSizes, NodeCounts, EdgeCounts);
14590eae32dcSDimitry Andric }
1460*5f757f3fSDimitry Andric 
computeCacheDirectedLayout(const CDSortConfig & Config,ArrayRef<uint64_t> FuncSizes,ArrayRef<uint64_t> FuncCounts,ArrayRef<EdgeCount> CallCounts,ArrayRef<uint64_t> CallOffsets)1461*5f757f3fSDimitry Andric std::vector<uint64_t> codelayout::computeCacheDirectedLayout(
1462*5f757f3fSDimitry Andric     const CDSortConfig &Config, ArrayRef<uint64_t> FuncSizes,
1463*5f757f3fSDimitry Andric     ArrayRef<uint64_t> FuncCounts, ArrayRef<EdgeCount> CallCounts,
1464*5f757f3fSDimitry Andric     ArrayRef<uint64_t> CallOffsets) {
1465*5f757f3fSDimitry Andric   // Verify correctness of the input data.
1466*5f757f3fSDimitry Andric   assert(FuncCounts.size() == FuncSizes.size() && "Incorrect input");
1467*5f757f3fSDimitry Andric 
1468*5f757f3fSDimitry Andric   // Apply the reordering algorithm.
1469*5f757f3fSDimitry Andric   CDSortImpl Alg(Config, FuncSizes, FuncCounts, CallCounts, CallOffsets);
1470*5f757f3fSDimitry Andric   std::vector<uint64_t> Result = Alg.run();
1471*5f757f3fSDimitry Andric   assert(Result.size() == FuncSizes.size() && "Incorrect size of layout");
1472*5f757f3fSDimitry Andric   return Result;
1473*5f757f3fSDimitry Andric }
1474*5f757f3fSDimitry Andric 
computeCacheDirectedLayout(ArrayRef<uint64_t> FuncSizes,ArrayRef<uint64_t> FuncCounts,ArrayRef<EdgeCount> CallCounts,ArrayRef<uint64_t> CallOffsets)1475*5f757f3fSDimitry Andric std::vector<uint64_t> codelayout::computeCacheDirectedLayout(
1476*5f757f3fSDimitry Andric     ArrayRef<uint64_t> FuncSizes, ArrayRef<uint64_t> FuncCounts,
1477*5f757f3fSDimitry Andric     ArrayRef<EdgeCount> CallCounts, ArrayRef<uint64_t> CallOffsets) {
1478*5f757f3fSDimitry Andric   CDSortConfig Config;
1479*5f757f3fSDimitry Andric   // Populate the config from the command-line options.
1480*5f757f3fSDimitry Andric   if (CacheEntries.getNumOccurrences() > 0)
1481*5f757f3fSDimitry Andric     Config.CacheEntries = CacheEntries;
1482*5f757f3fSDimitry Andric   if (CacheSize.getNumOccurrences() > 0)
1483*5f757f3fSDimitry Andric     Config.CacheSize = CacheSize;
1484*5f757f3fSDimitry Andric   if (CDMaxChainSize.getNumOccurrences() > 0)
1485*5f757f3fSDimitry Andric     Config.MaxChainSize = CDMaxChainSize;
1486*5f757f3fSDimitry Andric   if (DistancePower.getNumOccurrences() > 0)
1487*5f757f3fSDimitry Andric     Config.DistancePower = DistancePower;
1488*5f757f3fSDimitry Andric   if (FrequencyScale.getNumOccurrences() > 0)
1489*5f757f3fSDimitry Andric     Config.FrequencyScale = FrequencyScale;
1490*5f757f3fSDimitry Andric   return computeCacheDirectedLayout(Config, FuncSizes, FuncCounts, CallCounts,
1491*5f757f3fSDimitry Andric                                     CallOffsets);
1492*5f757f3fSDimitry Andric }
1493