10eae32dcSDimitry Andric //===- CodeLayout.cpp - Implementation of code layout algorithms ----------===//
20eae32dcSDimitry Andric //
30eae32dcSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40eae32dcSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50eae32dcSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60eae32dcSDimitry Andric //
70eae32dcSDimitry Andric //===----------------------------------------------------------------------===//
80eae32dcSDimitry Andric //
906c3fb27SDimitry Andric // The file implements "cache-aware" layout algorithms of basic blocks and
1006c3fb27SDimitry Andric // functions in a binary.
110eae32dcSDimitry Andric //
120eae32dcSDimitry Andric // The algorithm tries to find a layout of nodes (basic blocks) of a given CFG
130eae32dcSDimitry Andric // optimizing jump locality and thus processor I-cache utilization. This is
140eae32dcSDimitry Andric // achieved via increasing the number of fall-through jumps and co-locating
150eae32dcSDimitry Andric // frequently executed nodes together. The name follows the underlying
160eae32dcSDimitry Andric // optimization problem, Extended-TSP, which is a generalization of classical
170eae32dcSDimitry Andric // (maximum) Traveling Salesmen Problem.
180eae32dcSDimitry Andric //
190eae32dcSDimitry Andric // The algorithm is a greedy heuristic that works with chains (ordered lists)
200eae32dcSDimitry Andric // of basic blocks. Initially all chains are isolated basic blocks. On every
210eae32dcSDimitry Andric // iteration, we pick a pair of chains whose merging yields the biggest increase
220eae32dcSDimitry Andric // in the ExtTSP score, which models how i-cache "friendly" a specific chain is.
230eae32dcSDimitry Andric // A pair of chains giving the maximum gain is merged into a new chain. The
240eae32dcSDimitry Andric // procedure stops when there is only one chain left, or when merging does not
250eae32dcSDimitry Andric // increase ExtTSP. In the latter case, the remaining chains are sorted by
260eae32dcSDimitry Andric // density in the decreasing order.
270eae32dcSDimitry Andric //
280eae32dcSDimitry Andric // An important aspect is the way two chains are merged. Unlike earlier
290eae32dcSDimitry Andric // algorithms (e.g., based on the approach of Pettis-Hansen), two
300eae32dcSDimitry Andric // chains, X and Y, are first split into three, X1, X2, and Y. Then we
310eae32dcSDimitry Andric // consider all possible ways of gluing the three chains (e.g., X1YX2, X1X2Y,
320eae32dcSDimitry Andric // X2X1Y, X2YX1, YX1X2, YX2X1) and choose the one producing the largest score.
330eae32dcSDimitry Andric // This improves the quality of the final result (the search space is larger)
340eae32dcSDimitry Andric // while keeping the implementation sufficiently fast.
350eae32dcSDimitry Andric //
360eae32dcSDimitry Andric // Reference:
370eae32dcSDimitry Andric // * A. Newell and S. Pupyrev, Improved Basic Block Reordering,
380eae32dcSDimitry Andric // IEEE Transactions on Computers, 2020
39bdd1243dSDimitry Andric // https://arxiv.org/abs/1809.04676
400eae32dcSDimitry Andric //
410eae32dcSDimitry Andric //===----------------------------------------------------------------------===//
420eae32dcSDimitry Andric
430eae32dcSDimitry Andric #include "llvm/Transforms/Utils/CodeLayout.h"
440eae32dcSDimitry Andric #include "llvm/Support/CommandLine.h"
4506c3fb27SDimitry Andric #include "llvm/Support/Debug.h"
460eae32dcSDimitry Andric
47bdd1243dSDimitry Andric #include <cmath>
48*5f757f3fSDimitry Andric #include <set>
49bdd1243dSDimitry Andric
500eae32dcSDimitry Andric using namespace llvm;
51*5f757f3fSDimitry Andric using namespace llvm::codelayout;
52*5f757f3fSDimitry Andric
530eae32dcSDimitry Andric #define DEBUG_TYPE "code-layout"
540eae32dcSDimitry Andric
5506c3fb27SDimitry Andric namespace llvm {
5681ad6265SDimitry Andric cl::opt<bool> EnableExtTspBlockPlacement(
5781ad6265SDimitry Andric "enable-ext-tsp-block-placement", cl::Hidden, cl::init(false),
5881ad6265SDimitry Andric cl::desc("Enable machine block placement based on the ext-tsp model, "
5981ad6265SDimitry Andric "optimizing I-cache utilization."));
6081ad6265SDimitry Andric
6181ad6265SDimitry Andric cl::opt<bool> ApplyExtTspWithoutProfile(
6281ad6265SDimitry Andric "ext-tsp-apply-without-profile",
6381ad6265SDimitry Andric cl::desc("Whether to apply ext-tsp placement for instances w/o profile"),
6481ad6265SDimitry Andric cl::init(true), cl::Hidden);
6506c3fb27SDimitry Andric } // namespace llvm
6681ad6265SDimitry Andric
67*5f757f3fSDimitry Andric // Algorithm-specific params for Ext-TSP. The values are tuned for the best
68*5f757f3fSDimitry Andric // performance of large-scale front-end bound binaries.
69bdd1243dSDimitry Andric static cl::opt<double> ForwardWeightCond(
70bdd1243dSDimitry Andric "ext-tsp-forward-weight-cond", cl::ReallyHidden, cl::init(0.1),
71bdd1243dSDimitry Andric cl::desc("The weight of conditional forward jumps for ExtTSP value"));
720eae32dcSDimitry Andric
73bdd1243dSDimitry Andric static cl::opt<double> ForwardWeightUncond(
74bdd1243dSDimitry Andric "ext-tsp-forward-weight-uncond", cl::ReallyHidden, cl::init(0.1),
75bdd1243dSDimitry Andric cl::desc("The weight of unconditional forward jumps for ExtTSP value"));
76bdd1243dSDimitry Andric
77bdd1243dSDimitry Andric static cl::opt<double> BackwardWeightCond(
78bdd1243dSDimitry Andric "ext-tsp-backward-weight-cond", cl::ReallyHidden, cl::init(0.1),
7906c3fb27SDimitry Andric cl::desc("The weight of conditional backward jumps for ExtTSP value"));
80bdd1243dSDimitry Andric
81bdd1243dSDimitry Andric static cl::opt<double> BackwardWeightUncond(
82bdd1243dSDimitry Andric "ext-tsp-backward-weight-uncond", cl::ReallyHidden, cl::init(0.1),
8306c3fb27SDimitry Andric cl::desc("The weight of unconditional backward jumps for ExtTSP value"));
84bdd1243dSDimitry Andric
85bdd1243dSDimitry Andric static cl::opt<double> FallthroughWeightCond(
86bdd1243dSDimitry Andric "ext-tsp-fallthrough-weight-cond", cl::ReallyHidden, cl::init(1.0),
87bdd1243dSDimitry Andric cl::desc("The weight of conditional fallthrough jumps for ExtTSP value"));
88bdd1243dSDimitry Andric
89bdd1243dSDimitry Andric static cl::opt<double> FallthroughWeightUncond(
90bdd1243dSDimitry Andric "ext-tsp-fallthrough-weight-uncond", cl::ReallyHidden, cl::init(1.05),
91bdd1243dSDimitry Andric cl::desc("The weight of unconditional fallthrough jumps for ExtTSP value"));
920eae32dcSDimitry Andric
930eae32dcSDimitry Andric static cl::opt<unsigned> ForwardDistance(
94bdd1243dSDimitry Andric "ext-tsp-forward-distance", cl::ReallyHidden, cl::init(1024),
950eae32dcSDimitry Andric cl::desc("The maximum distance (in bytes) of a forward jump for ExtTSP"));
960eae32dcSDimitry Andric
970eae32dcSDimitry Andric static cl::opt<unsigned> BackwardDistance(
98bdd1243dSDimitry Andric "ext-tsp-backward-distance", cl::ReallyHidden, cl::init(640),
990eae32dcSDimitry Andric cl::desc("The maximum distance (in bytes) of a backward jump for ExtTSP"));
1000eae32dcSDimitry Andric
10181ad6265SDimitry Andric // The maximum size of a chain created by the algorithm. The size is bounded
102*5f757f3fSDimitry Andric // so that the algorithm can efficiently process extremely large instances.
10381ad6265SDimitry Andric static cl::opt<unsigned>
104*5f757f3fSDimitry Andric MaxChainSize("ext-tsp-max-chain-size", cl::ReallyHidden, cl::init(512),
105*5f757f3fSDimitry Andric cl::desc("The maximum size of a chain to create"));
10681ad6265SDimitry Andric
1070eae32dcSDimitry Andric // The maximum size of a chain for splitting. Larger values of the threshold
1080eae32dcSDimitry Andric // may yield better quality at the cost of worsen run-time.
1090eae32dcSDimitry Andric static cl::opt<unsigned> ChainSplitThreshold(
110bdd1243dSDimitry Andric "ext-tsp-chain-split-threshold", cl::ReallyHidden, cl::init(128),
1110eae32dcSDimitry Andric cl::desc("The maximum size of a chain to apply splitting"));
1120eae32dcSDimitry Andric
113*5f757f3fSDimitry Andric // The maximum ratio between densities of two chains for merging.
114*5f757f3fSDimitry Andric static cl::opt<double> MaxMergeDensityRatio(
115*5f757f3fSDimitry Andric "ext-tsp-max-merge-density-ratio", cl::ReallyHidden, cl::init(100),
116*5f757f3fSDimitry Andric cl::desc("The maximum ratio between densities of two chains for merging"));
117*5f757f3fSDimitry Andric
118*5f757f3fSDimitry Andric // Algorithm-specific options for CDSort.
119*5f757f3fSDimitry Andric static cl::opt<unsigned> CacheEntries("cdsort-cache-entries", cl::ReallyHidden,
120*5f757f3fSDimitry Andric cl::desc("The size of the cache"));
121*5f757f3fSDimitry Andric
122*5f757f3fSDimitry Andric static cl::opt<unsigned> CacheSize("cdsort-cache-size", cl::ReallyHidden,
123*5f757f3fSDimitry Andric cl::desc("The size of a line in the cache"));
124*5f757f3fSDimitry Andric
125*5f757f3fSDimitry Andric static cl::opt<unsigned>
126*5f757f3fSDimitry Andric CDMaxChainSize("cdsort-max-chain-size", cl::ReallyHidden,
127*5f757f3fSDimitry Andric cl::desc("The maximum size of a chain to create"));
128*5f757f3fSDimitry Andric
129*5f757f3fSDimitry Andric static cl::opt<double> DistancePower(
130*5f757f3fSDimitry Andric "cdsort-distance-power", cl::ReallyHidden,
131*5f757f3fSDimitry Andric cl::desc("The power exponent for the distance-based locality"));
132*5f757f3fSDimitry Andric
133*5f757f3fSDimitry Andric static cl::opt<double> FrequencyScale(
134*5f757f3fSDimitry Andric "cdsort-frequency-scale", cl::ReallyHidden,
135*5f757f3fSDimitry Andric cl::desc("The scale factor for the frequency-based locality"));
1360eae32dcSDimitry Andric
1370eae32dcSDimitry Andric namespace {
1380eae32dcSDimitry Andric
1390eae32dcSDimitry Andric // Epsilon for comparison of doubles.
1400eae32dcSDimitry Andric constexpr double EPS = 1e-8;
1410eae32dcSDimitry Andric
142bdd1243dSDimitry Andric // Compute the Ext-TSP score for a given jump.
jumpExtTSPScore(uint64_t JumpDist,uint64_t JumpMaxDist,uint64_t Count,double Weight)143bdd1243dSDimitry Andric double jumpExtTSPScore(uint64_t JumpDist, uint64_t JumpMaxDist, uint64_t Count,
144bdd1243dSDimitry Andric double Weight) {
145bdd1243dSDimitry Andric if (JumpDist > JumpMaxDist)
146bdd1243dSDimitry Andric return 0;
147bdd1243dSDimitry Andric double Prob = 1.0 - static_cast<double>(JumpDist) / JumpMaxDist;
148bdd1243dSDimitry Andric return Weight * Prob * Count;
149bdd1243dSDimitry Andric }
150bdd1243dSDimitry Andric
1510eae32dcSDimitry Andric // Compute the Ext-TSP score for a jump between a given pair of blocks,
1520eae32dcSDimitry Andric // using their sizes, (estimated) addresses and the jump execution count.
extTSPScore(uint64_t SrcAddr,uint64_t SrcSize,uint64_t DstAddr,uint64_t Count,bool IsConditional)1530eae32dcSDimitry Andric double extTSPScore(uint64_t SrcAddr, uint64_t SrcSize, uint64_t DstAddr,
154bdd1243dSDimitry Andric uint64_t Count, bool IsConditional) {
1550eae32dcSDimitry Andric // Fallthrough
1560eae32dcSDimitry Andric if (SrcAddr + SrcSize == DstAddr) {
157bdd1243dSDimitry Andric return jumpExtTSPScore(0, 1, Count,
158bdd1243dSDimitry Andric IsConditional ? FallthroughWeightCond
159bdd1243dSDimitry Andric : FallthroughWeightUncond);
1600eae32dcSDimitry Andric }
1610eae32dcSDimitry Andric // Forward
1620eae32dcSDimitry Andric if (SrcAddr + SrcSize < DstAddr) {
163bdd1243dSDimitry Andric const uint64_t Dist = DstAddr - (SrcAddr + SrcSize);
164bdd1243dSDimitry Andric return jumpExtTSPScore(Dist, ForwardDistance, Count,
165bdd1243dSDimitry Andric IsConditional ? ForwardWeightCond
166bdd1243dSDimitry Andric : ForwardWeightUncond);
1670eae32dcSDimitry Andric }
1680eae32dcSDimitry Andric // Backward
169bdd1243dSDimitry Andric const uint64_t Dist = SrcAddr + SrcSize - DstAddr;
170bdd1243dSDimitry Andric return jumpExtTSPScore(Dist, BackwardDistance, Count,
171bdd1243dSDimitry Andric IsConditional ? BackwardWeightCond
172bdd1243dSDimitry Andric : BackwardWeightUncond);
1730eae32dcSDimitry Andric }
1740eae32dcSDimitry Andric
1750eae32dcSDimitry Andric /// A type of merging two chains, X and Y. The former chain is split into
1760eae32dcSDimitry Andric /// X1 and X2 and then concatenated with Y in the order specified by the type.
17706c3fb27SDimitry Andric enum class MergeTypeT : int { X_Y, Y_X, X1_Y_X2, Y_X2_X1, X2_X1_Y };
1780eae32dcSDimitry Andric
1790eae32dcSDimitry Andric /// The gain of merging two chains, that is, the Ext-TSP score of the merge
18006c3fb27SDimitry Andric /// together with the corresponding merge 'type' and 'offset'.
18106c3fb27SDimitry Andric struct MergeGainT {
18206c3fb27SDimitry Andric explicit MergeGainT() = default;
MergeGainT__anonf633aad90111::MergeGainT18306c3fb27SDimitry Andric explicit MergeGainT(double Score, size_t MergeOffset, MergeTypeT MergeType)
1840eae32dcSDimitry Andric : Score(Score), MergeOffset(MergeOffset), MergeType(MergeType) {}
1850eae32dcSDimitry Andric
score__anonf633aad90111::MergeGainT1860eae32dcSDimitry Andric double score() const { return Score; }
1870eae32dcSDimitry Andric
mergeOffset__anonf633aad90111::MergeGainT1880eae32dcSDimitry Andric size_t mergeOffset() const { return MergeOffset; }
1890eae32dcSDimitry Andric
mergeType__anonf633aad90111::MergeGainT19006c3fb27SDimitry Andric MergeTypeT mergeType() const { return MergeType; }
19106c3fb27SDimitry Andric
setMergeType__anonf633aad90111::MergeGainT19206c3fb27SDimitry Andric void setMergeType(MergeTypeT Ty) { MergeType = Ty; }
1930eae32dcSDimitry Andric
1940eae32dcSDimitry Andric // Returns 'true' iff Other is preferred over this.
operator <__anonf633aad90111::MergeGainT19506c3fb27SDimitry Andric bool operator<(const MergeGainT &Other) const {
1960eae32dcSDimitry Andric return (Other.Score > EPS && Other.Score > Score + EPS);
1970eae32dcSDimitry Andric }
1980eae32dcSDimitry Andric
1990eae32dcSDimitry Andric // Update the current gain if Other is preferred over this.
updateIfLessThan__anonf633aad90111::MergeGainT20006c3fb27SDimitry Andric void updateIfLessThan(const MergeGainT &Other) {
2010eae32dcSDimitry Andric if (*this < Other)
2020eae32dcSDimitry Andric *this = Other;
2030eae32dcSDimitry Andric }
2040eae32dcSDimitry Andric
2050eae32dcSDimitry Andric private:
2060eae32dcSDimitry Andric double Score{-1.0};
2070eae32dcSDimitry Andric size_t MergeOffset{0};
20806c3fb27SDimitry Andric MergeTypeT MergeType{MergeTypeT::X_Y};
2090eae32dcSDimitry Andric };
2100eae32dcSDimitry Andric
21106c3fb27SDimitry Andric struct JumpT;
21206c3fb27SDimitry Andric struct ChainT;
21306c3fb27SDimitry Andric struct ChainEdge;
2140eae32dcSDimitry Andric
21506c3fb27SDimitry Andric /// A node in the graph, typically corresponding to a basic block in the CFG or
21606c3fb27SDimitry Andric /// a function in the call graph.
21706c3fb27SDimitry Andric struct NodeT {
21806c3fb27SDimitry Andric NodeT(const NodeT &) = delete;
21906c3fb27SDimitry Andric NodeT(NodeT &&) = default;
22006c3fb27SDimitry Andric NodeT &operator=(const NodeT &) = delete;
22106c3fb27SDimitry Andric NodeT &operator=(NodeT &&) = default;
2220eae32dcSDimitry Andric
NodeT__anonf633aad90111::NodeT223*5f757f3fSDimitry Andric explicit NodeT(size_t Index, uint64_t Size, uint64_t Count)
224*5f757f3fSDimitry Andric : Index(Index), Size(Size), ExecutionCount(Count) {}
22506c3fb27SDimitry Andric
isEntry__anonf633aad90111::NodeT2260eae32dcSDimitry Andric bool isEntry() const { return Index == 0; }
22706c3fb27SDimitry Andric
228*5f757f3fSDimitry Andric // Check if Other is a successor of the node.
229*5f757f3fSDimitry Andric bool isSuccessor(const NodeT *Other) const;
230*5f757f3fSDimitry Andric
23106c3fb27SDimitry Andric // The total execution count of outgoing jumps.
23206c3fb27SDimitry Andric uint64_t outCount() const;
23306c3fb27SDimitry Andric
23406c3fb27SDimitry Andric // The total execution count of incoming jumps.
23506c3fb27SDimitry Andric uint64_t inCount() const;
23606c3fb27SDimitry Andric
23706c3fb27SDimitry Andric // The original index of the node in graph.
23806c3fb27SDimitry Andric size_t Index{0};
23906c3fb27SDimitry Andric // The index of the node in the current chain.
24006c3fb27SDimitry Andric size_t CurIndex{0};
24106c3fb27SDimitry Andric // The size of the node in the binary.
24206c3fb27SDimitry Andric uint64_t Size{0};
24306c3fb27SDimitry Andric // The execution count of the node in the profile data.
24406c3fb27SDimitry Andric uint64_t ExecutionCount{0};
24506c3fb27SDimitry Andric // The current chain of the node.
24606c3fb27SDimitry Andric ChainT *CurChain{nullptr};
24706c3fb27SDimitry Andric // The offset of the node in the current chain.
24806c3fb27SDimitry Andric mutable uint64_t EstimatedAddr{0};
24906c3fb27SDimitry Andric // Forced successor of the node in the graph.
25006c3fb27SDimitry Andric NodeT *ForcedSucc{nullptr};
25106c3fb27SDimitry Andric // Forced predecessor of the node in the graph.
25206c3fb27SDimitry Andric NodeT *ForcedPred{nullptr};
25306c3fb27SDimitry Andric // Outgoing jumps from the node.
25406c3fb27SDimitry Andric std::vector<JumpT *> OutJumps;
25506c3fb27SDimitry Andric // Incoming jumps to the node.
25606c3fb27SDimitry Andric std::vector<JumpT *> InJumps;
2570eae32dcSDimitry Andric };
2580eae32dcSDimitry Andric
25906c3fb27SDimitry Andric /// An arc in the graph, typically corresponding to a jump between two nodes.
26006c3fb27SDimitry Andric struct JumpT {
26106c3fb27SDimitry Andric JumpT(const JumpT &) = delete;
26206c3fb27SDimitry Andric JumpT(JumpT &&) = default;
26306c3fb27SDimitry Andric JumpT &operator=(const JumpT &) = delete;
26406c3fb27SDimitry Andric JumpT &operator=(JumpT &&) = default;
2650eae32dcSDimitry Andric
JumpT__anonf633aad90111::JumpT26606c3fb27SDimitry Andric explicit JumpT(NodeT *Source, NodeT *Target, uint64_t ExecutionCount)
26706c3fb27SDimitry Andric : Source(Source), Target(Target), ExecutionCount(ExecutionCount) {}
26806c3fb27SDimitry Andric
26906c3fb27SDimitry Andric // Source node of the jump.
27006c3fb27SDimitry Andric NodeT *Source;
27106c3fb27SDimitry Andric // Target node of the jump.
27206c3fb27SDimitry Andric NodeT *Target;
2730eae32dcSDimitry Andric // Execution count of the arc in the profile data.
2740eae32dcSDimitry Andric uint64_t ExecutionCount{0};
275bdd1243dSDimitry Andric // Whether the jump corresponds to a conditional branch.
276bdd1243dSDimitry Andric bool IsConditional{false};
27706c3fb27SDimitry Andric // The offset of the jump from the source node.
27806c3fb27SDimitry Andric uint64_t Offset{0};
2790eae32dcSDimitry Andric };
2800eae32dcSDimitry Andric
28106c3fb27SDimitry Andric /// A chain (ordered sequence) of nodes in the graph.
28206c3fb27SDimitry Andric struct ChainT {
28306c3fb27SDimitry Andric ChainT(const ChainT &) = delete;
28406c3fb27SDimitry Andric ChainT(ChainT &&) = default;
28506c3fb27SDimitry Andric ChainT &operator=(const ChainT &) = delete;
28606c3fb27SDimitry Andric ChainT &operator=(ChainT &&) = default;
2870eae32dcSDimitry Andric
ChainT__anonf633aad90111::ChainT28806c3fb27SDimitry Andric explicit ChainT(uint64_t Id, NodeT *Node)
28906c3fb27SDimitry Andric : Id(Id), ExecutionCount(Node->ExecutionCount), Size(Node->Size),
29006c3fb27SDimitry Andric Nodes(1, Node) {}
2910eae32dcSDimitry Andric
numBlocks__anonf633aad90111::ChainT29206c3fb27SDimitry Andric size_t numBlocks() const { return Nodes.size(); }
2930eae32dcSDimitry Andric
density__anonf633aad90111::ChainT294*5f757f3fSDimitry Andric double density() const { return ExecutionCount / Size; }
29506c3fb27SDimitry Andric
isEntry__anonf633aad90111::ChainT29606c3fb27SDimitry Andric bool isEntry() const { return Nodes[0]->Index == 0; }
2970eae32dcSDimitry Andric
isCold__anonf633aad90111::ChainT298bdd1243dSDimitry Andric bool isCold() const {
29906c3fb27SDimitry Andric for (NodeT *Node : Nodes) {
30006c3fb27SDimitry Andric if (Node->ExecutionCount > 0)
301bdd1243dSDimitry Andric return false;
302bdd1243dSDimitry Andric }
303bdd1243dSDimitry Andric return true;
304bdd1243dSDimitry Andric }
305bdd1243dSDimitry Andric
getEdge__anonf633aad90111::ChainT30606c3fb27SDimitry Andric ChainEdge *getEdge(ChainT *Other) const {
307*5f757f3fSDimitry Andric for (const auto &[Chain, ChainEdge] : Edges) {
308*5f757f3fSDimitry Andric if (Chain == Other)
309*5f757f3fSDimitry Andric return ChainEdge;
3100eae32dcSDimitry Andric }
3110eae32dcSDimitry Andric return nullptr;
3120eae32dcSDimitry Andric }
3130eae32dcSDimitry Andric
removeEdge__anonf633aad90111::ChainT31406c3fb27SDimitry Andric void removeEdge(ChainT *Other) {
3150eae32dcSDimitry Andric auto It = Edges.begin();
3160eae32dcSDimitry Andric while (It != Edges.end()) {
3170eae32dcSDimitry Andric if (It->first == Other) {
3180eae32dcSDimitry Andric Edges.erase(It);
3190eae32dcSDimitry Andric return;
3200eae32dcSDimitry Andric }
3210eae32dcSDimitry Andric It++;
3220eae32dcSDimitry Andric }
3230eae32dcSDimitry Andric }
3240eae32dcSDimitry Andric
addEdge__anonf633aad90111::ChainT32506c3fb27SDimitry Andric void addEdge(ChainT *Other, ChainEdge *Edge) {
3260eae32dcSDimitry Andric Edges.push_back(std::make_pair(Other, Edge));
3270eae32dcSDimitry Andric }
3280eae32dcSDimitry Andric
merge__anonf633aad90111::ChainT329*5f757f3fSDimitry Andric void merge(ChainT *Other, std::vector<NodeT *> MergedBlocks) {
330*5f757f3fSDimitry Andric Nodes = std::move(MergedBlocks);
331*5f757f3fSDimitry Andric // Update the chain's data.
33206c3fb27SDimitry Andric ExecutionCount += Other->ExecutionCount;
33306c3fb27SDimitry Andric Size += Other->Size;
33406c3fb27SDimitry Andric Id = Nodes[0]->Index;
335*5f757f3fSDimitry Andric // Update the node's data.
33606c3fb27SDimitry Andric for (size_t Idx = 0; Idx < Nodes.size(); Idx++) {
33706c3fb27SDimitry Andric Nodes[Idx]->CurChain = this;
33806c3fb27SDimitry Andric Nodes[Idx]->CurIndex = Idx;
3390eae32dcSDimitry Andric }
3400eae32dcSDimitry Andric }
3410eae32dcSDimitry Andric
34206c3fb27SDimitry Andric void mergeEdges(ChainT *Other);
3430eae32dcSDimitry Andric
clear__anonf633aad90111::ChainT3440eae32dcSDimitry Andric void clear() {
34506c3fb27SDimitry Andric Nodes.clear();
34606c3fb27SDimitry Andric Nodes.shrink_to_fit();
3470eae32dcSDimitry Andric Edges.clear();
3480eae32dcSDimitry Andric Edges.shrink_to_fit();
3490eae32dcSDimitry Andric }
3500eae32dcSDimitry Andric
3510eae32dcSDimitry Andric // Unique chain identifier.
3520eae32dcSDimitry Andric uint64_t Id;
3530eae32dcSDimitry Andric // Cached ext-tsp score for the chain.
35406c3fb27SDimitry Andric double Score{0};
355*5f757f3fSDimitry Andric // The total execution count of the chain. Since the execution count of
356*5f757f3fSDimitry Andric // a basic block is uint64_t, using doubles here to avoid overflow.
357*5f757f3fSDimitry Andric double ExecutionCount{0};
35806c3fb27SDimitry Andric // The total size of the chain.
35906c3fb27SDimitry Andric uint64_t Size{0};
36006c3fb27SDimitry Andric // Nodes of the chain.
36106c3fb27SDimitry Andric std::vector<NodeT *> Nodes;
3620eae32dcSDimitry Andric // Adjacent chains and corresponding edges (lists of jumps).
36306c3fb27SDimitry Andric std::vector<std::pair<ChainT *, ChainEdge *>> Edges;
3640eae32dcSDimitry Andric };
3650eae32dcSDimitry Andric
36606c3fb27SDimitry Andric /// An edge in the graph representing jumps between two chains.
36706c3fb27SDimitry Andric /// When nodes are merged into chains, the edges are combined too so that
368*5f757f3fSDimitry Andric /// there is always at most one edge between a pair of chains.
36906c3fb27SDimitry Andric struct ChainEdge {
3700eae32dcSDimitry Andric ChainEdge(const ChainEdge &) = delete;
3710eae32dcSDimitry Andric ChainEdge(ChainEdge &&) = default;
3720eae32dcSDimitry Andric ChainEdge &operator=(const ChainEdge &) = delete;
37306c3fb27SDimitry Andric ChainEdge &operator=(ChainEdge &&) = delete;
3740eae32dcSDimitry Andric
ChainEdge__anonf633aad90111::ChainEdge37506c3fb27SDimitry Andric explicit ChainEdge(JumpT *Jump)
3760eae32dcSDimitry Andric : SrcChain(Jump->Source->CurChain), DstChain(Jump->Target->CurChain),
3770eae32dcSDimitry Andric Jumps(1, Jump) {}
3780eae32dcSDimitry Andric
srcChain__anonf633aad90111::ChainEdge37906c3fb27SDimitry Andric ChainT *srcChain() const { return SrcChain; }
3800eae32dcSDimitry Andric
dstChain__anonf633aad90111::ChainEdge38106c3fb27SDimitry Andric ChainT *dstChain() const { return DstChain; }
3820eae32dcSDimitry Andric
isSelfEdge__anonf633aad90111::ChainEdge38306c3fb27SDimitry Andric bool isSelfEdge() const { return SrcChain == DstChain; }
38406c3fb27SDimitry Andric
jumps__anonf633aad90111::ChainEdge38506c3fb27SDimitry Andric const std::vector<JumpT *> &jumps() const { return Jumps; }
38606c3fb27SDimitry Andric
appendJump__anonf633aad90111::ChainEdge38706c3fb27SDimitry Andric void appendJump(JumpT *Jump) { Jumps.push_back(Jump); }
3880eae32dcSDimitry Andric
moveJumps__anonf633aad90111::ChainEdge3890eae32dcSDimitry Andric void moveJumps(ChainEdge *Other) {
3900eae32dcSDimitry Andric Jumps.insert(Jumps.end(), Other->Jumps.begin(), Other->Jumps.end());
3910eae32dcSDimitry Andric Other->Jumps.clear();
3920eae32dcSDimitry Andric Other->Jumps.shrink_to_fit();
3930eae32dcSDimitry Andric }
3940eae32dcSDimitry Andric
changeEndpoint__anonf633aad90111::ChainEdge39506c3fb27SDimitry Andric void changeEndpoint(ChainT *From, ChainT *To) {
39606c3fb27SDimitry Andric if (From == SrcChain)
39706c3fb27SDimitry Andric SrcChain = To;
39806c3fb27SDimitry Andric if (From == DstChain)
39906c3fb27SDimitry Andric DstChain = To;
40006c3fb27SDimitry Andric }
40106c3fb27SDimitry Andric
hasCachedMergeGain__anonf633aad90111::ChainEdge40206c3fb27SDimitry Andric bool hasCachedMergeGain(ChainT *Src, ChainT *Dst) const {
4030eae32dcSDimitry Andric return Src == SrcChain ? CacheValidForward : CacheValidBackward;
4040eae32dcSDimitry Andric }
4050eae32dcSDimitry Andric
getCachedMergeGain__anonf633aad90111::ChainEdge40606c3fb27SDimitry Andric MergeGainT getCachedMergeGain(ChainT *Src, ChainT *Dst) const {
4070eae32dcSDimitry Andric return Src == SrcChain ? CachedGainForward : CachedGainBackward;
4080eae32dcSDimitry Andric }
4090eae32dcSDimitry Andric
setCachedMergeGain__anonf633aad90111::ChainEdge41006c3fb27SDimitry Andric void setCachedMergeGain(ChainT *Src, ChainT *Dst, MergeGainT MergeGain) {
4110eae32dcSDimitry Andric if (Src == SrcChain) {
4120eae32dcSDimitry Andric CachedGainForward = MergeGain;
4130eae32dcSDimitry Andric CacheValidForward = true;
4140eae32dcSDimitry Andric } else {
4150eae32dcSDimitry Andric CachedGainBackward = MergeGain;
4160eae32dcSDimitry Andric CacheValidBackward = true;
4170eae32dcSDimitry Andric }
4180eae32dcSDimitry Andric }
4190eae32dcSDimitry Andric
invalidateCache__anonf633aad90111::ChainEdge4200eae32dcSDimitry Andric void invalidateCache() {
4210eae32dcSDimitry Andric CacheValidForward = false;
4220eae32dcSDimitry Andric CacheValidBackward = false;
4230eae32dcSDimitry Andric }
4240eae32dcSDimitry Andric
setMergeGain__anonf633aad90111::ChainEdge42506c3fb27SDimitry Andric void setMergeGain(MergeGainT Gain) { CachedGain = Gain; }
42606c3fb27SDimitry Andric
getMergeGain__anonf633aad90111::ChainEdge42706c3fb27SDimitry Andric MergeGainT getMergeGain() const { return CachedGain; }
42806c3fb27SDimitry Andric
gain__anonf633aad90111::ChainEdge42906c3fb27SDimitry Andric double gain() const { return CachedGain.score(); }
43006c3fb27SDimitry Andric
4310eae32dcSDimitry Andric private:
4320eae32dcSDimitry Andric // Source chain.
43306c3fb27SDimitry Andric ChainT *SrcChain{nullptr};
4340eae32dcSDimitry Andric // Destination chain.
43506c3fb27SDimitry Andric ChainT *DstChain{nullptr};
43606c3fb27SDimitry Andric // Original jumps in the binary with corresponding execution counts.
43706c3fb27SDimitry Andric std::vector<JumpT *> Jumps;
43806c3fb27SDimitry Andric // Cached gain value for merging the pair of chains.
43906c3fb27SDimitry Andric MergeGainT CachedGain;
44006c3fb27SDimitry Andric
44106c3fb27SDimitry Andric // Cached gain values for merging the pair of chains. Since the gain of
44206c3fb27SDimitry Andric // merging (Src, Dst) and (Dst, Src) might be different, we store both values
44306c3fb27SDimitry Andric // here and a flag indicating which of the options results in a higher gain.
44406c3fb27SDimitry Andric // Cached gain values.
44506c3fb27SDimitry Andric MergeGainT CachedGainForward;
44606c3fb27SDimitry Andric MergeGainT CachedGainBackward;
4470eae32dcSDimitry Andric // Whether the cached value must be recomputed.
4480eae32dcSDimitry Andric bool CacheValidForward{false};
4490eae32dcSDimitry Andric bool CacheValidBackward{false};
4500eae32dcSDimitry Andric };
4510eae32dcSDimitry Andric
isSuccessor(const NodeT * Other) const452*5f757f3fSDimitry Andric bool NodeT::isSuccessor(const NodeT *Other) const {
453*5f757f3fSDimitry Andric for (JumpT *Jump : OutJumps)
454*5f757f3fSDimitry Andric if (Jump->Target == Other)
455*5f757f3fSDimitry Andric return true;
456*5f757f3fSDimitry Andric return false;
457*5f757f3fSDimitry Andric }
458*5f757f3fSDimitry Andric
outCount() const45906c3fb27SDimitry Andric uint64_t NodeT::outCount() const {
46006c3fb27SDimitry Andric uint64_t Count = 0;
461*5f757f3fSDimitry Andric for (JumpT *Jump : OutJumps)
46206c3fb27SDimitry Andric Count += Jump->ExecutionCount;
46306c3fb27SDimitry Andric return Count;
46406c3fb27SDimitry Andric }
4650eae32dcSDimitry Andric
inCount() const46606c3fb27SDimitry Andric uint64_t NodeT::inCount() const {
46706c3fb27SDimitry Andric uint64_t Count = 0;
468*5f757f3fSDimitry Andric for (JumpT *Jump : InJumps)
46906c3fb27SDimitry Andric Count += Jump->ExecutionCount;
47006c3fb27SDimitry Andric return Count;
47106c3fb27SDimitry Andric }
47206c3fb27SDimitry Andric
mergeEdges(ChainT * Other)47306c3fb27SDimitry Andric void ChainT::mergeEdges(ChainT *Other) {
474*5f757f3fSDimitry Andric // Update edges adjacent to chain Other.
475*5f757f3fSDimitry Andric for (const auto &[DstChain, DstEdge] : Other->Edges) {
47606c3fb27SDimitry Andric ChainT *TargetChain = DstChain == Other ? this : DstChain;
477bdd1243dSDimitry Andric ChainEdge *CurEdge = getEdge(TargetChain);
4780eae32dcSDimitry Andric if (CurEdge == nullptr) {
4790eae32dcSDimitry Andric DstEdge->changeEndpoint(Other, this);
4800eae32dcSDimitry Andric this->addEdge(TargetChain, DstEdge);
481*5f757f3fSDimitry Andric if (DstChain != this && DstChain != Other)
4820eae32dcSDimitry Andric DstChain->addEdge(this, DstEdge);
4830eae32dcSDimitry Andric } else {
4840eae32dcSDimitry Andric CurEdge->moveJumps(DstEdge);
4850eae32dcSDimitry Andric }
486*5f757f3fSDimitry Andric // Cleanup leftover edge.
487*5f757f3fSDimitry Andric if (DstChain != Other)
4880eae32dcSDimitry Andric DstChain->removeEdge(Other);
4890eae32dcSDimitry Andric }
4900eae32dcSDimitry Andric }
4910eae32dcSDimitry Andric
49206c3fb27SDimitry Andric using NodeIter = std::vector<NodeT *>::const_iterator;
493*5f757f3fSDimitry Andric static std::vector<NodeT *> EmptyList;
4940eae32dcSDimitry Andric
495*5f757f3fSDimitry Andric /// A wrapper around three concatenated vectors (chains) of nodes; it is used
496*5f757f3fSDimitry Andric /// to avoid extra instantiation of the vectors.
497*5f757f3fSDimitry Andric struct MergedNodesT {
MergedNodesT__anonf633aad90111::MergedNodesT498*5f757f3fSDimitry Andric MergedNodesT(NodeIter Begin1, NodeIter End1,
499*5f757f3fSDimitry Andric NodeIter Begin2 = EmptyList.begin(),
500*5f757f3fSDimitry Andric NodeIter End2 = EmptyList.end(),
501*5f757f3fSDimitry Andric NodeIter Begin3 = EmptyList.begin(),
502*5f757f3fSDimitry Andric NodeIter End3 = EmptyList.end())
5030eae32dcSDimitry Andric : Begin1(Begin1), End1(End1), Begin2(Begin2), End2(End2), Begin3(Begin3),
5040eae32dcSDimitry Andric End3(End3) {}
5050eae32dcSDimitry Andric
forEach__anonf633aad90111::MergedNodesT5060eae32dcSDimitry Andric template <typename F> void forEach(const F &Func) const {
5070eae32dcSDimitry Andric for (auto It = Begin1; It != End1; It++)
5080eae32dcSDimitry Andric Func(*It);
5090eae32dcSDimitry Andric for (auto It = Begin2; It != End2; It++)
5100eae32dcSDimitry Andric Func(*It);
5110eae32dcSDimitry Andric for (auto It = Begin3; It != End3; It++)
5120eae32dcSDimitry Andric Func(*It);
5130eae32dcSDimitry Andric }
5140eae32dcSDimitry Andric
getNodes__anonf633aad90111::MergedNodesT51506c3fb27SDimitry Andric std::vector<NodeT *> getNodes() const {
51606c3fb27SDimitry Andric std::vector<NodeT *> Result;
5170eae32dcSDimitry Andric Result.reserve(std::distance(Begin1, End1) + std::distance(Begin2, End2) +
5180eae32dcSDimitry Andric std::distance(Begin3, End3));
5190eae32dcSDimitry Andric Result.insert(Result.end(), Begin1, End1);
5200eae32dcSDimitry Andric Result.insert(Result.end(), Begin2, End2);
5210eae32dcSDimitry Andric Result.insert(Result.end(), Begin3, End3);
5220eae32dcSDimitry Andric return Result;
5230eae32dcSDimitry Andric }
5240eae32dcSDimitry Andric
getFirstNode__anonf633aad90111::MergedNodesT52506c3fb27SDimitry Andric const NodeT *getFirstNode() const { return *Begin1; }
5260eae32dcSDimitry Andric
5270eae32dcSDimitry Andric private:
52806c3fb27SDimitry Andric NodeIter Begin1;
52906c3fb27SDimitry Andric NodeIter End1;
53006c3fb27SDimitry Andric NodeIter Begin2;
53106c3fb27SDimitry Andric NodeIter End2;
53206c3fb27SDimitry Andric NodeIter Begin3;
53306c3fb27SDimitry Andric NodeIter End3;
5340eae32dcSDimitry Andric };
5350eae32dcSDimitry Andric
536*5f757f3fSDimitry Andric /// A wrapper around two concatenated vectors (chains) of jumps.
537*5f757f3fSDimitry Andric struct MergedJumpsT {
MergedJumpsT__anonf633aad90111::MergedJumpsT538*5f757f3fSDimitry Andric MergedJumpsT(const std::vector<JumpT *> *Jumps1,
539*5f757f3fSDimitry Andric const std::vector<JumpT *> *Jumps2 = nullptr) {
540*5f757f3fSDimitry Andric assert(!Jumps1->empty() && "cannot merge empty jump list");
541*5f757f3fSDimitry Andric JumpArray[0] = Jumps1;
542*5f757f3fSDimitry Andric JumpArray[1] = Jumps2;
543*5f757f3fSDimitry Andric }
544*5f757f3fSDimitry Andric
forEach__anonf633aad90111::MergedJumpsT545*5f757f3fSDimitry Andric template <typename F> void forEach(const F &Func) const {
546*5f757f3fSDimitry Andric for (auto Jumps : JumpArray)
547*5f757f3fSDimitry Andric if (Jumps != nullptr)
548*5f757f3fSDimitry Andric for (JumpT *Jump : *Jumps)
549*5f757f3fSDimitry Andric Func(Jump);
550*5f757f3fSDimitry Andric }
551*5f757f3fSDimitry Andric
552*5f757f3fSDimitry Andric private:
553*5f757f3fSDimitry Andric std::array<const std::vector<JumpT *> *, 2> JumpArray{nullptr, nullptr};
554*5f757f3fSDimitry Andric };
555*5f757f3fSDimitry Andric
55606c3fb27SDimitry Andric /// Merge two chains of nodes respecting a given 'type' and 'offset'.
55706c3fb27SDimitry Andric ///
55806c3fb27SDimitry Andric /// If MergeType == 0, then the result is a concatenation of two chains.
55906c3fb27SDimitry Andric /// Otherwise, the first chain is cut into two sub-chains at the offset,
56006c3fb27SDimitry Andric /// and merged using all possible ways of concatenating three chains.
mergeNodes(const std::vector<NodeT * > & X,const std::vector<NodeT * > & Y,size_t MergeOffset,MergeTypeT MergeType)561*5f757f3fSDimitry Andric MergedNodesT mergeNodes(const std::vector<NodeT *> &X,
56206c3fb27SDimitry Andric const std::vector<NodeT *> &Y, size_t MergeOffset,
56306c3fb27SDimitry Andric MergeTypeT MergeType) {
564*5f757f3fSDimitry Andric // Split the first chain, X, into X1 and X2.
56506c3fb27SDimitry Andric NodeIter BeginX1 = X.begin();
56606c3fb27SDimitry Andric NodeIter EndX1 = X.begin() + MergeOffset;
56706c3fb27SDimitry Andric NodeIter BeginX2 = X.begin() + MergeOffset;
56806c3fb27SDimitry Andric NodeIter EndX2 = X.end();
56906c3fb27SDimitry Andric NodeIter BeginY = Y.begin();
57006c3fb27SDimitry Andric NodeIter EndY = Y.end();
57106c3fb27SDimitry Andric
572*5f757f3fSDimitry Andric // Construct a new chain from the three existing ones.
57306c3fb27SDimitry Andric switch (MergeType) {
57406c3fb27SDimitry Andric case MergeTypeT::X_Y:
575*5f757f3fSDimitry Andric return MergedNodesT(BeginX1, EndX2, BeginY, EndY);
57606c3fb27SDimitry Andric case MergeTypeT::Y_X:
577*5f757f3fSDimitry Andric return MergedNodesT(BeginY, EndY, BeginX1, EndX2);
57806c3fb27SDimitry Andric case MergeTypeT::X1_Y_X2:
579*5f757f3fSDimitry Andric return MergedNodesT(BeginX1, EndX1, BeginY, EndY, BeginX2, EndX2);
58006c3fb27SDimitry Andric case MergeTypeT::Y_X2_X1:
581*5f757f3fSDimitry Andric return MergedNodesT(BeginY, EndY, BeginX2, EndX2, BeginX1, EndX1);
58206c3fb27SDimitry Andric case MergeTypeT::X2_X1_Y:
583*5f757f3fSDimitry Andric return MergedNodesT(BeginX2, EndX2, BeginX1, EndX1, BeginY, EndY);
58406c3fb27SDimitry Andric }
58506c3fb27SDimitry Andric llvm_unreachable("unexpected chain merge type");
58606c3fb27SDimitry Andric }
58706c3fb27SDimitry Andric
5880eae32dcSDimitry Andric /// The implementation of the ExtTSP algorithm.
5890eae32dcSDimitry Andric class ExtTSPImpl {
5900eae32dcSDimitry Andric public:
ExtTSPImpl(ArrayRef<uint64_t> NodeSizes,ArrayRef<uint64_t> NodeCounts,ArrayRef<EdgeCount> EdgeCounts)591*5f757f3fSDimitry Andric ExtTSPImpl(ArrayRef<uint64_t> NodeSizes, ArrayRef<uint64_t> NodeCounts,
592*5f757f3fSDimitry Andric ArrayRef<EdgeCount> EdgeCounts)
59306c3fb27SDimitry Andric : NumNodes(NodeSizes.size()) {
5940eae32dcSDimitry Andric initialize(NodeSizes, NodeCounts, EdgeCounts);
5950eae32dcSDimitry Andric }
5960eae32dcSDimitry Andric
59706c3fb27SDimitry Andric /// Run the algorithm and return an optimized ordering of nodes.
run()598*5f757f3fSDimitry Andric std::vector<uint64_t> run() {
59906c3fb27SDimitry Andric // Pass 1: Merge nodes with their mutually forced successors
6000eae32dcSDimitry Andric mergeForcedPairs();
6010eae32dcSDimitry Andric
6020eae32dcSDimitry Andric // Pass 2: Merge pairs of chains while improving the ExtTSP objective
6030eae32dcSDimitry Andric mergeChainPairs();
6040eae32dcSDimitry Andric
60506c3fb27SDimitry Andric // Pass 3: Merge cold nodes to reduce code size
6060eae32dcSDimitry Andric mergeColdChains();
6070eae32dcSDimitry Andric
60806c3fb27SDimitry Andric // Collect nodes from all chains
609*5f757f3fSDimitry Andric return concatChains();
6100eae32dcSDimitry Andric }
6110eae32dcSDimitry Andric
6120eae32dcSDimitry Andric private:
6130eae32dcSDimitry Andric /// Initialize the algorithm's data structures.
initialize(const ArrayRef<uint64_t> & NodeSizes,const ArrayRef<uint64_t> & NodeCounts,const ArrayRef<EdgeCount> & EdgeCounts)614*5f757f3fSDimitry Andric void initialize(const ArrayRef<uint64_t> &NodeSizes,
615*5f757f3fSDimitry Andric const ArrayRef<uint64_t> &NodeCounts,
616*5f757f3fSDimitry Andric const ArrayRef<EdgeCount> &EdgeCounts) {
617*5f757f3fSDimitry Andric // Initialize nodes.
61806c3fb27SDimitry Andric AllNodes.reserve(NumNodes);
61906c3fb27SDimitry Andric for (uint64_t Idx = 0; Idx < NumNodes; Idx++) {
62006c3fb27SDimitry Andric uint64_t Size = std::max<uint64_t>(NodeSizes[Idx], 1ULL);
62106c3fb27SDimitry Andric uint64_t ExecutionCount = NodeCounts[Idx];
622*5f757f3fSDimitry Andric // The execution count of the entry node is set to at least one.
62306c3fb27SDimitry Andric if (Idx == 0 && ExecutionCount == 0)
6240eae32dcSDimitry Andric ExecutionCount = 1;
62506c3fb27SDimitry Andric AllNodes.emplace_back(Idx, Size, ExecutionCount);
6260eae32dcSDimitry Andric }
6270eae32dcSDimitry Andric
628*5f757f3fSDimitry Andric // Initialize jumps between the nodes.
629bdd1243dSDimitry Andric SuccNodes.resize(NumNodes);
630bdd1243dSDimitry Andric PredNodes.resize(NumNodes);
631bdd1243dSDimitry Andric std::vector<uint64_t> OutDegree(NumNodes, 0);
6320eae32dcSDimitry Andric AllJumps.reserve(EdgeCounts.size());
633*5f757f3fSDimitry Andric for (auto Edge : EdgeCounts) {
634*5f757f3fSDimitry Andric ++OutDegree[Edge.src];
635*5f757f3fSDimitry Andric // Ignore self-edges.
636*5f757f3fSDimitry Andric if (Edge.src == Edge.dst)
6370eae32dcSDimitry Andric continue;
6380eae32dcSDimitry Andric
639*5f757f3fSDimitry Andric SuccNodes[Edge.src].push_back(Edge.dst);
640*5f757f3fSDimitry Andric PredNodes[Edge.dst].push_back(Edge.src);
641*5f757f3fSDimitry Andric if (Edge.count > 0) {
642*5f757f3fSDimitry Andric NodeT &PredNode = AllNodes[Edge.src];
643*5f757f3fSDimitry Andric NodeT &SuccNode = AllNodes[Edge.dst];
644*5f757f3fSDimitry Andric AllJumps.emplace_back(&PredNode, &SuccNode, Edge.count);
64506c3fb27SDimitry Andric SuccNode.InJumps.push_back(&AllJumps.back());
64606c3fb27SDimitry Andric PredNode.OutJumps.push_back(&AllJumps.back());
647*5f757f3fSDimitry Andric // Adjust execution counts.
648*5f757f3fSDimitry Andric PredNode.ExecutionCount = std::max(PredNode.ExecutionCount, Edge.count);
649*5f757f3fSDimitry Andric SuccNode.ExecutionCount = std::max(SuccNode.ExecutionCount, Edge.count);
6500eae32dcSDimitry Andric }
6510eae32dcSDimitry Andric }
65206c3fb27SDimitry Andric for (JumpT &Jump : AllJumps) {
653*5f757f3fSDimitry Andric assert(OutDegree[Jump.Source->Index] > 0 &&
654*5f757f3fSDimitry Andric "incorrectly computed out-degree of the block");
655bdd1243dSDimitry Andric Jump.IsConditional = OutDegree[Jump.Source->Index] > 1;
656bdd1243dSDimitry Andric }
6570eae32dcSDimitry Andric
658*5f757f3fSDimitry Andric // Initialize chains.
6590eae32dcSDimitry Andric AllChains.reserve(NumNodes);
6600eae32dcSDimitry Andric HotChains.reserve(NumNodes);
66106c3fb27SDimitry Andric for (NodeT &Node : AllNodes) {
662*5f757f3fSDimitry Andric // Create a chain.
66306c3fb27SDimitry Andric AllChains.emplace_back(Node.Index, &Node);
66406c3fb27SDimitry Andric Node.CurChain = &AllChains.back();
665*5f757f3fSDimitry Andric if (Node.ExecutionCount > 0)
6660eae32dcSDimitry Andric HotChains.push_back(&AllChains.back());
6670eae32dcSDimitry Andric }
6680eae32dcSDimitry Andric
669*5f757f3fSDimitry Andric // Initialize chain edges.
6700eae32dcSDimitry Andric AllEdges.reserve(AllJumps.size());
67106c3fb27SDimitry Andric for (NodeT &PredNode : AllNodes) {
67206c3fb27SDimitry Andric for (JumpT *Jump : PredNode.OutJumps) {
673*5f757f3fSDimitry Andric assert(Jump->ExecutionCount > 0 && "incorrectly initialized jump");
67406c3fb27SDimitry Andric NodeT *SuccNode = Jump->Target;
67506c3fb27SDimitry Andric ChainEdge *CurEdge = PredNode.CurChain->getEdge(SuccNode->CurChain);
676*5f757f3fSDimitry Andric // This edge is already present in the graph.
6770eae32dcSDimitry Andric if (CurEdge != nullptr) {
67806c3fb27SDimitry Andric assert(SuccNode->CurChain->getEdge(PredNode.CurChain) != nullptr);
6790eae32dcSDimitry Andric CurEdge->appendJump(Jump);
6800eae32dcSDimitry Andric continue;
6810eae32dcSDimitry Andric }
682*5f757f3fSDimitry Andric // This is a new edge.
6830eae32dcSDimitry Andric AllEdges.emplace_back(Jump);
68406c3fb27SDimitry Andric PredNode.CurChain->addEdge(SuccNode->CurChain, &AllEdges.back());
68506c3fb27SDimitry Andric SuccNode->CurChain->addEdge(PredNode.CurChain, &AllEdges.back());
6860eae32dcSDimitry Andric }
6870eae32dcSDimitry Andric }
6880eae32dcSDimitry Andric }
6890eae32dcSDimitry Andric
69006c3fb27SDimitry Andric /// For a pair of nodes, A and B, node B is the forced successor of A,
6910eae32dcSDimitry Andric /// if (i) all jumps (based on profile) from A goes to B and (ii) all jumps
69206c3fb27SDimitry Andric /// to B are from A. Such nodes should be adjacent in the optimal ordering;
69306c3fb27SDimitry Andric /// the method finds and merges such pairs of nodes.
mergeForcedPairs()6940eae32dcSDimitry Andric void mergeForcedPairs() {
695*5f757f3fSDimitry Andric // Find forced pairs of blocks.
69606c3fb27SDimitry Andric for (NodeT &Node : AllNodes) {
69706c3fb27SDimitry Andric if (SuccNodes[Node.Index].size() == 1 &&
69806c3fb27SDimitry Andric PredNodes[SuccNodes[Node.Index][0]].size() == 1 &&
69906c3fb27SDimitry Andric SuccNodes[Node.Index][0] != 0) {
70006c3fb27SDimitry Andric size_t SuccIndex = SuccNodes[Node.Index][0];
70106c3fb27SDimitry Andric Node.ForcedSucc = &AllNodes[SuccIndex];
70206c3fb27SDimitry Andric AllNodes[SuccIndex].ForcedPred = &Node;
7030eae32dcSDimitry Andric }
7040eae32dcSDimitry Andric }
7050eae32dcSDimitry Andric
7060eae32dcSDimitry Andric // There might be 'cycles' in the forced dependencies, since profile
7070eae32dcSDimitry Andric // data isn't 100% accurate. Typically this is observed in loops, when the
7080eae32dcSDimitry Andric // loop edges are the hottest successors for the basic blocks of the loop.
70906c3fb27SDimitry Andric // Break the cycles by choosing the node with the smallest index as the
7100eae32dcSDimitry Andric // head. This helps to keep the original order of the loops, which likely
7110eae32dcSDimitry Andric // have already been rotated in the optimized manner.
71206c3fb27SDimitry Andric for (NodeT &Node : AllNodes) {
71306c3fb27SDimitry Andric if (Node.ForcedSucc == nullptr || Node.ForcedPred == nullptr)
7140eae32dcSDimitry Andric continue;
7150eae32dcSDimitry Andric
71606c3fb27SDimitry Andric NodeT *SuccNode = Node.ForcedSucc;
71706c3fb27SDimitry Andric while (SuccNode != nullptr && SuccNode != &Node) {
71806c3fb27SDimitry Andric SuccNode = SuccNode->ForcedSucc;
7190eae32dcSDimitry Andric }
72006c3fb27SDimitry Andric if (SuccNode == nullptr)
7210eae32dcSDimitry Andric continue;
722*5f757f3fSDimitry Andric // Break the cycle.
72306c3fb27SDimitry Andric AllNodes[Node.ForcedPred->Index].ForcedSucc = nullptr;
72406c3fb27SDimitry Andric Node.ForcedPred = nullptr;
7250eae32dcSDimitry Andric }
7260eae32dcSDimitry Andric
727*5f757f3fSDimitry Andric // Merge nodes with their fallthrough successors.
72806c3fb27SDimitry Andric for (NodeT &Node : AllNodes) {
72906c3fb27SDimitry Andric if (Node.ForcedPred == nullptr && Node.ForcedSucc != nullptr) {
73006c3fb27SDimitry Andric const NodeT *CurBlock = &Node;
7310eae32dcSDimitry Andric while (CurBlock->ForcedSucc != nullptr) {
73206c3fb27SDimitry Andric const NodeT *NextBlock = CurBlock->ForcedSucc;
73306c3fb27SDimitry Andric mergeChains(Node.CurChain, NextBlock->CurChain, 0, MergeTypeT::X_Y);
7340eae32dcSDimitry Andric CurBlock = NextBlock;
7350eae32dcSDimitry Andric }
7360eae32dcSDimitry Andric }
7370eae32dcSDimitry Andric }
7380eae32dcSDimitry Andric }
7390eae32dcSDimitry Andric
7400eae32dcSDimitry Andric /// Merge pairs of chains while improving the ExtTSP objective.
mergeChainPairs()7410eae32dcSDimitry Andric void mergeChainPairs() {
742*5f757f3fSDimitry Andric /// Deterministically compare pairs of chains.
74306c3fb27SDimitry Andric auto compareChainPairs = [](const ChainT *A1, const ChainT *B1,
74406c3fb27SDimitry Andric const ChainT *A2, const ChainT *B2) {
745*5f757f3fSDimitry Andric return std::make_tuple(A1->Id, B1->Id) < std::make_tuple(A2->Id, B2->Id);
7460eae32dcSDimitry Andric };
7470eae32dcSDimitry Andric
7480eae32dcSDimitry Andric while (HotChains.size() > 1) {
74906c3fb27SDimitry Andric ChainT *BestChainPred = nullptr;
75006c3fb27SDimitry Andric ChainT *BestChainSucc = nullptr;
75106c3fb27SDimitry Andric MergeGainT BestGain;
752*5f757f3fSDimitry Andric // Iterate over all pairs of chains.
75306c3fb27SDimitry Andric for (ChainT *ChainPred : HotChains) {
754*5f757f3fSDimitry Andric // Get candidates for merging with the current chain.
755*5f757f3fSDimitry Andric for (const auto &[ChainSucc, Edge] : ChainPred->Edges) {
756*5f757f3fSDimitry Andric // Ignore loop edges.
757*5f757f3fSDimitry Andric if (Edge->isSelfEdge())
7580eae32dcSDimitry Andric continue;
759*5f757f3fSDimitry Andric // Skip the merge if the combined chain violates the maximum specified
760*5f757f3fSDimitry Andric // size.
76181ad6265SDimitry Andric if (ChainPred->numBlocks() + ChainSucc->numBlocks() >= MaxChainSize)
76281ad6265SDimitry Andric continue;
763*5f757f3fSDimitry Andric // Don't merge the chains if they have vastly different densities.
764*5f757f3fSDimitry Andric // Skip the merge if the ratio between the densities exceeds
765*5f757f3fSDimitry Andric // MaxMergeDensityRatio. Smaller values of the option result in fewer
766*5f757f3fSDimitry Andric // merges, and hence, more chains.
767*5f757f3fSDimitry Andric const double ChainPredDensity = ChainPred->density();
768*5f757f3fSDimitry Andric const double ChainSuccDensity = ChainSucc->density();
769*5f757f3fSDimitry Andric assert(ChainPredDensity > 0.0 && ChainSuccDensity > 0.0 &&
770*5f757f3fSDimitry Andric "incorrectly computed chain densities");
771*5f757f3fSDimitry Andric auto [MinDensity, MaxDensity] =
772*5f757f3fSDimitry Andric std::minmax(ChainPredDensity, ChainSuccDensity);
773*5f757f3fSDimitry Andric const double Ratio = MaxDensity / MinDensity;
774*5f757f3fSDimitry Andric if (Ratio > MaxMergeDensityRatio)
775*5f757f3fSDimitry Andric continue;
77681ad6265SDimitry Andric
777*5f757f3fSDimitry Andric // Compute the gain of merging the two chains.
77806c3fb27SDimitry Andric MergeGainT CurGain = getBestMergeGain(ChainPred, ChainSucc, Edge);
7790eae32dcSDimitry Andric if (CurGain.score() <= EPS)
7800eae32dcSDimitry Andric continue;
7810eae32dcSDimitry Andric
7820eae32dcSDimitry Andric if (BestGain < CurGain ||
7830eae32dcSDimitry Andric (std::abs(CurGain.score() - BestGain.score()) < EPS &&
7840eae32dcSDimitry Andric compareChainPairs(ChainPred, ChainSucc, BestChainPred,
7850eae32dcSDimitry Andric BestChainSucc))) {
7860eae32dcSDimitry Andric BestGain = CurGain;
7870eae32dcSDimitry Andric BestChainPred = ChainPred;
7880eae32dcSDimitry Andric BestChainSucc = ChainSucc;
7890eae32dcSDimitry Andric }
7900eae32dcSDimitry Andric }
7910eae32dcSDimitry Andric }
7920eae32dcSDimitry Andric
793*5f757f3fSDimitry Andric // Stop merging when there is no improvement.
7940eae32dcSDimitry Andric if (BestGain.score() <= EPS)
7950eae32dcSDimitry Andric break;
7960eae32dcSDimitry Andric
797*5f757f3fSDimitry Andric // Merge the best pair of chains.
7980eae32dcSDimitry Andric mergeChains(BestChainPred, BestChainSucc, BestGain.mergeOffset(),
7990eae32dcSDimitry Andric BestGain.mergeType());
8000eae32dcSDimitry Andric }
8010eae32dcSDimitry Andric }
8020eae32dcSDimitry Andric
80306c3fb27SDimitry Andric /// Merge remaining nodes into chains w/o taking jump counts into
80406c3fb27SDimitry Andric /// consideration. This allows to maintain the original node order in the
805*5f757f3fSDimitry Andric /// absence of profile data.
mergeColdChains()8060eae32dcSDimitry Andric void mergeColdChains() {
8070eae32dcSDimitry Andric for (size_t SrcBB = 0; SrcBB < NumNodes; SrcBB++) {
808bdd1243dSDimitry Andric // Iterating in reverse order to make sure original fallthrough jumps are
809bdd1243dSDimitry Andric // merged first; this might be beneficial for code size.
8100eae32dcSDimitry Andric size_t NumSuccs = SuccNodes[SrcBB].size();
8110eae32dcSDimitry Andric for (size_t Idx = 0; Idx < NumSuccs; Idx++) {
81206c3fb27SDimitry Andric size_t DstBB = SuccNodes[SrcBB][NumSuccs - Idx - 1];
81306c3fb27SDimitry Andric ChainT *SrcChain = AllNodes[SrcBB].CurChain;
81406c3fb27SDimitry Andric ChainT *DstChain = AllNodes[DstBB].CurChain;
8150eae32dcSDimitry Andric if (SrcChain != DstChain && !DstChain->isEntry() &&
81606c3fb27SDimitry Andric SrcChain->Nodes.back()->Index == SrcBB &&
81706c3fb27SDimitry Andric DstChain->Nodes.front()->Index == DstBB &&
818bdd1243dSDimitry Andric SrcChain->isCold() == DstChain->isCold()) {
81906c3fb27SDimitry Andric mergeChains(SrcChain, DstChain, 0, MergeTypeT::X_Y);
8200eae32dcSDimitry Andric }
8210eae32dcSDimitry Andric }
8220eae32dcSDimitry Andric }
8230eae32dcSDimitry Andric }
8240eae32dcSDimitry Andric
82506c3fb27SDimitry Andric /// Compute the Ext-TSP score for a given node order and a list of jumps.
extTSPScore(const MergedNodesT & Nodes,const MergedJumpsT & Jumps) const826*5f757f3fSDimitry Andric double extTSPScore(const MergedNodesT &Nodes,
827*5f757f3fSDimitry Andric const MergedJumpsT &Jumps) const {
8280eae32dcSDimitry Andric uint64_t CurAddr = 0;
829*5f757f3fSDimitry Andric Nodes.forEach([&](const NodeT *Node) {
83006c3fb27SDimitry Andric Node->EstimatedAddr = CurAddr;
83106c3fb27SDimitry Andric CurAddr += Node->Size;
8320eae32dcSDimitry Andric });
8330eae32dcSDimitry Andric
8340eae32dcSDimitry Andric double Score = 0;
835*5f757f3fSDimitry Andric Jumps.forEach([&](const JumpT *Jump) {
83606c3fb27SDimitry Andric const NodeT *SrcBlock = Jump->Source;
83706c3fb27SDimitry Andric const NodeT *DstBlock = Jump->Target;
8380eae32dcSDimitry Andric Score += ::extTSPScore(SrcBlock->EstimatedAddr, SrcBlock->Size,
839bdd1243dSDimitry Andric DstBlock->EstimatedAddr, Jump->ExecutionCount,
840bdd1243dSDimitry Andric Jump->IsConditional);
841*5f757f3fSDimitry Andric });
8420eae32dcSDimitry Andric return Score;
8430eae32dcSDimitry Andric }
8440eae32dcSDimitry Andric
8450eae32dcSDimitry Andric /// Compute the gain of merging two chains.
8460eae32dcSDimitry Andric ///
8470eae32dcSDimitry Andric /// The function considers all possible ways of merging two chains and
8480eae32dcSDimitry Andric /// computes the one having the largest increase in ExtTSP objective. The
8490eae32dcSDimitry Andric /// result is a pair with the first element being the gain and the second
8500eae32dcSDimitry Andric /// element being the corresponding merging type.
getBestMergeGain(ChainT * ChainPred,ChainT * ChainSucc,ChainEdge * Edge) const85106c3fb27SDimitry Andric MergeGainT getBestMergeGain(ChainT *ChainPred, ChainT *ChainSucc,
8520eae32dcSDimitry Andric ChainEdge *Edge) const {
853*5f757f3fSDimitry Andric if (Edge->hasCachedMergeGain(ChainPred, ChainSucc))
8540eae32dcSDimitry Andric return Edge->getCachedMergeGain(ChainPred, ChainSucc);
8550eae32dcSDimitry Andric
856*5f757f3fSDimitry Andric assert(!Edge->jumps().empty() && "trying to merge chains w/o jumps");
857*5f757f3fSDimitry Andric // Precompute jumps between ChainPred and ChainSucc.
858bdd1243dSDimitry Andric ChainEdge *EdgePP = ChainPred->getEdge(ChainPred);
859*5f757f3fSDimitry Andric MergedJumpsT Jumps(&Edge->jumps(), EdgePP ? &EdgePP->jumps() : nullptr);
8600eae32dcSDimitry Andric
861*5f757f3fSDimitry Andric // This object holds the best chosen gain of merging two chains.
86206c3fb27SDimitry Andric MergeGainT Gain = MergeGainT();
8630eae32dcSDimitry Andric
8640eae32dcSDimitry Andric /// Given a merge offset and a list of merge types, try to merge two chains
865*5f757f3fSDimitry Andric /// and update Gain with a better alternative.
8660eae32dcSDimitry Andric auto tryChainMerging = [&](size_t Offset,
86706c3fb27SDimitry Andric const std::vector<MergeTypeT> &MergeTypes) {
868*5f757f3fSDimitry Andric // Skip merging corresponding to concatenation w/o splitting.
86906c3fb27SDimitry Andric if (Offset == 0 || Offset == ChainPred->Nodes.size())
8700eae32dcSDimitry Andric return;
871*5f757f3fSDimitry Andric // Skip merging if it breaks Forced successors.
87206c3fb27SDimitry Andric NodeT *Node = ChainPred->Nodes[Offset - 1];
87306c3fb27SDimitry Andric if (Node->ForcedSucc != nullptr)
8740eae32dcSDimitry Andric return;
8750eae32dcSDimitry Andric // Apply the merge, compute the corresponding gain, and update the best
876*5f757f3fSDimitry Andric // value, if the merge is beneficial.
87706c3fb27SDimitry Andric for (const MergeTypeT &MergeType : MergeTypes) {
8780eae32dcSDimitry Andric Gain.updateIfLessThan(
8790eae32dcSDimitry Andric computeMergeGain(ChainPred, ChainSucc, Jumps, Offset, MergeType));
8800eae32dcSDimitry Andric }
8810eae32dcSDimitry Andric };
8820eae32dcSDimitry Andric
883*5f757f3fSDimitry Andric // Try to concatenate two chains w/o splitting.
8840eae32dcSDimitry Andric Gain.updateIfLessThan(
88506c3fb27SDimitry Andric computeMergeGain(ChainPred, ChainSucc, Jumps, 0, MergeTypeT::X_Y));
8860eae32dcSDimitry Andric
887*5f757f3fSDimitry Andric // Attach (a part of) ChainPred before the first node of ChainSucc.
88806c3fb27SDimitry Andric for (JumpT *Jump : ChainSucc->Nodes.front()->InJumps) {
88906c3fb27SDimitry Andric const NodeT *SrcBlock = Jump->Source;
8900eae32dcSDimitry Andric if (SrcBlock->CurChain != ChainPred)
8910eae32dcSDimitry Andric continue;
8920eae32dcSDimitry Andric size_t Offset = SrcBlock->CurIndex + 1;
89306c3fb27SDimitry Andric tryChainMerging(Offset, {MergeTypeT::X1_Y_X2, MergeTypeT::X2_X1_Y});
8940eae32dcSDimitry Andric }
8950eae32dcSDimitry Andric
896*5f757f3fSDimitry Andric // Attach (a part of) ChainPred after the last node of ChainSucc.
89706c3fb27SDimitry Andric for (JumpT *Jump : ChainSucc->Nodes.back()->OutJumps) {
898*5f757f3fSDimitry Andric const NodeT *DstBlock = Jump->Target;
8990eae32dcSDimitry Andric if (DstBlock->CurChain != ChainPred)
9000eae32dcSDimitry Andric continue;
9010eae32dcSDimitry Andric size_t Offset = DstBlock->CurIndex;
90206c3fb27SDimitry Andric tryChainMerging(Offset, {MergeTypeT::X1_Y_X2, MergeTypeT::Y_X2_X1});
9030eae32dcSDimitry Andric }
9040eae32dcSDimitry Andric
905*5f757f3fSDimitry Andric // Try to break ChainPred in various ways and concatenate with ChainSucc.
90606c3fb27SDimitry Andric if (ChainPred->Nodes.size() <= ChainSplitThreshold) {
90706c3fb27SDimitry Andric for (size_t Offset = 1; Offset < ChainPred->Nodes.size(); Offset++) {
908*5f757f3fSDimitry Andric // Do not split the chain along a fall-through jump. One of the two
909*5f757f3fSDimitry Andric // loops above may still "break" such a jump whenever it results in a
910*5f757f3fSDimitry Andric // new fall-through.
911*5f757f3fSDimitry Andric const NodeT *BB = ChainPred->Nodes[Offset - 1];
912*5f757f3fSDimitry Andric const NodeT *BB2 = ChainPred->Nodes[Offset];
913*5f757f3fSDimitry Andric if (BB->isSuccessor(BB2))
914*5f757f3fSDimitry Andric continue;
915*5f757f3fSDimitry Andric
916*5f757f3fSDimitry Andric // In practice, applying X2_Y_X1 merging almost never provides benefits;
917*5f757f3fSDimitry Andric // thus, we exclude it from consideration to reduce the search space.
91806c3fb27SDimitry Andric tryChainMerging(Offset, {MergeTypeT::X1_Y_X2, MergeTypeT::Y_X2_X1,
91906c3fb27SDimitry Andric MergeTypeT::X2_X1_Y});
9200eae32dcSDimitry Andric }
9210eae32dcSDimitry Andric }
922*5f757f3fSDimitry Andric
9230eae32dcSDimitry Andric Edge->setCachedMergeGain(ChainPred, ChainSucc, Gain);
9240eae32dcSDimitry Andric return Gain;
9250eae32dcSDimitry Andric }
9260eae32dcSDimitry Andric
9270eae32dcSDimitry Andric /// Compute the score gain of merging two chains, respecting a given
9280eae32dcSDimitry Andric /// merge 'type' and 'offset'.
9290eae32dcSDimitry Andric ///
9300eae32dcSDimitry Andric /// The two chains are not modified in the method.
computeMergeGain(const ChainT * ChainPred,const ChainT * ChainSucc,const MergedJumpsT & Jumps,size_t MergeOffset,MergeTypeT MergeType) const93106c3fb27SDimitry Andric MergeGainT computeMergeGain(const ChainT *ChainPred, const ChainT *ChainSucc,
932*5f757f3fSDimitry Andric const MergedJumpsT &Jumps, size_t MergeOffset,
933*5f757f3fSDimitry Andric MergeTypeT MergeType) const {
934*5f757f3fSDimitry Andric MergedNodesT MergedNodes =
93506c3fb27SDimitry Andric mergeNodes(ChainPred->Nodes, ChainSucc->Nodes, MergeOffset, MergeType);
9360eae32dcSDimitry Andric
937*5f757f3fSDimitry Andric // Do not allow a merge that does not preserve the original entry point.
9380eae32dcSDimitry Andric if ((ChainPred->isEntry() || ChainSucc->isEntry()) &&
939*5f757f3fSDimitry Andric !MergedNodes.getFirstNode()->isEntry())
94006c3fb27SDimitry Andric return MergeGainT();
9410eae32dcSDimitry Andric
942*5f757f3fSDimitry Andric // The gain for the new chain.
943*5f757f3fSDimitry Andric double NewScore = extTSPScore(MergedNodes, Jumps);
944*5f757f3fSDimitry Andric double CurScore = ChainPred->Score;
945*5f757f3fSDimitry Andric return MergeGainT(NewScore - CurScore, MergeOffset, MergeType);
9460eae32dcSDimitry Andric }
9470eae32dcSDimitry Andric
9480eae32dcSDimitry Andric /// Merge chain From into chain Into, update the list of active chains,
9490eae32dcSDimitry Andric /// adjacency information, and the corresponding cached values.
mergeChains(ChainT * Into,ChainT * From,size_t MergeOffset,MergeTypeT MergeType)95006c3fb27SDimitry Andric void mergeChains(ChainT *Into, ChainT *From, size_t MergeOffset,
95106c3fb27SDimitry Andric MergeTypeT MergeType) {
9520eae32dcSDimitry Andric assert(Into != From && "a chain cannot be merged with itself");
9530eae32dcSDimitry Andric
954*5f757f3fSDimitry Andric // Merge the nodes.
955*5f757f3fSDimitry Andric MergedNodesT MergedNodes =
95606c3fb27SDimitry Andric mergeNodes(Into->Nodes, From->Nodes, MergeOffset, MergeType);
95706c3fb27SDimitry Andric Into->merge(From, MergedNodes.getNodes());
95806c3fb27SDimitry Andric
959*5f757f3fSDimitry Andric // Merge the edges.
9600eae32dcSDimitry Andric Into->mergeEdges(From);
9610eae32dcSDimitry Andric From->clear();
9620eae32dcSDimitry Andric
963*5f757f3fSDimitry Andric // Update cached ext-tsp score for the new chain.
964bdd1243dSDimitry Andric ChainEdge *SelfEdge = Into->getEdge(Into);
9650eae32dcSDimitry Andric if (SelfEdge != nullptr) {
966*5f757f3fSDimitry Andric MergedNodes = MergedNodesT(Into->Nodes.begin(), Into->Nodes.end());
967*5f757f3fSDimitry Andric MergedJumpsT MergedJumps(&SelfEdge->jumps());
968*5f757f3fSDimitry Andric Into->Score = extTSPScore(MergedNodes, MergedJumps);
9690eae32dcSDimitry Andric }
9700eae32dcSDimitry Andric
971*5f757f3fSDimitry Andric // Remove the chain from the list of active chains.
972*5f757f3fSDimitry Andric llvm::erase(HotChains, From);
9730eae32dcSDimitry Andric
974*5f757f3fSDimitry Andric // Invalidate caches.
97506c3fb27SDimitry Andric for (auto EdgeIt : Into->Edges)
97606c3fb27SDimitry Andric EdgeIt.second->invalidateCache();
9770eae32dcSDimitry Andric }
9780eae32dcSDimitry Andric
97906c3fb27SDimitry Andric /// Concatenate all chains into the final order.
concatChains()980*5f757f3fSDimitry Andric std::vector<uint64_t> concatChains() {
981*5f757f3fSDimitry Andric // Collect non-empty chains.
98206c3fb27SDimitry Andric std::vector<const ChainT *> SortedChains;
98306c3fb27SDimitry Andric for (ChainT &Chain : AllChains) {
984*5f757f3fSDimitry Andric if (!Chain.Nodes.empty())
9850eae32dcSDimitry Andric SortedChains.push_back(&Chain);
9860eae32dcSDimitry Andric }
9870eae32dcSDimitry Andric
988*5f757f3fSDimitry Andric // Sorting chains by density in the decreasing order.
989*5f757f3fSDimitry Andric std::sort(SortedChains.begin(), SortedChains.end(),
99006c3fb27SDimitry Andric [&](const ChainT *L, const ChainT *R) {
991*5f757f3fSDimitry Andric // Place the entry point at the beginning of the order.
99206c3fb27SDimitry Andric if (L->isEntry() != R->isEntry())
99306c3fb27SDimitry Andric return L->isEntry();
9940eae32dcSDimitry Andric
995*5f757f3fSDimitry Andric // Compare by density and break ties by chain identifiers.
996*5f757f3fSDimitry Andric return std::make_tuple(-L->density(), L->Id) <
997*5f757f3fSDimitry Andric std::make_tuple(-R->density(), R->Id);
9980eae32dcSDimitry Andric });
9990eae32dcSDimitry Andric
1000*5f757f3fSDimitry Andric // Collect the nodes in the order specified by their chains.
1001*5f757f3fSDimitry Andric std::vector<uint64_t> Order;
10020eae32dcSDimitry Andric Order.reserve(NumNodes);
1003*5f757f3fSDimitry Andric for (const ChainT *Chain : SortedChains)
1004*5f757f3fSDimitry Andric for (NodeT *Node : Chain->Nodes)
100506c3fb27SDimitry Andric Order.push_back(Node->Index);
1006*5f757f3fSDimitry Andric return Order;
10070eae32dcSDimitry Andric }
10080eae32dcSDimitry Andric
10090eae32dcSDimitry Andric private:
10100eae32dcSDimitry Andric /// The number of nodes in the graph.
10110eae32dcSDimitry Andric const size_t NumNodes;
10120eae32dcSDimitry Andric
10130eae32dcSDimitry Andric /// Successors of each node.
10140eae32dcSDimitry Andric std::vector<std::vector<uint64_t>> SuccNodes;
10150eae32dcSDimitry Andric
10160eae32dcSDimitry Andric /// Predecessors of each node.
10170eae32dcSDimitry Andric std::vector<std::vector<uint64_t>> PredNodes;
10180eae32dcSDimitry Andric
101906c3fb27SDimitry Andric /// All nodes (basic blocks) in the graph.
102006c3fb27SDimitry Andric std::vector<NodeT> AllNodes;
10210eae32dcSDimitry Andric
102206c3fb27SDimitry Andric /// All jumps between the nodes.
102306c3fb27SDimitry Andric std::vector<JumpT> AllJumps;
10240eae32dcSDimitry Andric
102506c3fb27SDimitry Andric /// All chains of nodes.
102606c3fb27SDimitry Andric std::vector<ChainT> AllChains;
10270eae32dcSDimitry Andric
102806c3fb27SDimitry Andric /// All edges between the chains.
10290eae32dcSDimitry Andric std::vector<ChainEdge> AllEdges;
10300eae32dcSDimitry Andric
10310eae32dcSDimitry Andric /// Active chains. The vector gets updated at runtime when chains are merged.
103206c3fb27SDimitry Andric std::vector<ChainT *> HotChains;
10330eae32dcSDimitry Andric };
10340eae32dcSDimitry Andric
1035*5f757f3fSDimitry Andric /// The implementation of the Cache-Directed Sort (CDSort) algorithm for
1036*5f757f3fSDimitry Andric /// ordering functions represented by a call graph.
1037*5f757f3fSDimitry Andric class CDSortImpl {
1038*5f757f3fSDimitry Andric public:
CDSortImpl(const CDSortConfig & Config,ArrayRef<uint64_t> NodeSizes,ArrayRef<uint64_t> NodeCounts,ArrayRef<EdgeCount> EdgeCounts,ArrayRef<uint64_t> EdgeOffsets)1039*5f757f3fSDimitry Andric CDSortImpl(const CDSortConfig &Config, ArrayRef<uint64_t> NodeSizes,
1040*5f757f3fSDimitry Andric ArrayRef<uint64_t> NodeCounts, ArrayRef<EdgeCount> EdgeCounts,
1041*5f757f3fSDimitry Andric ArrayRef<uint64_t> EdgeOffsets)
1042*5f757f3fSDimitry Andric : Config(Config), NumNodes(NodeSizes.size()) {
1043*5f757f3fSDimitry Andric initialize(NodeSizes, NodeCounts, EdgeCounts, EdgeOffsets);
1044*5f757f3fSDimitry Andric }
1045*5f757f3fSDimitry Andric
1046*5f757f3fSDimitry Andric /// Run the algorithm and return an ordered set of function clusters.
run()1047*5f757f3fSDimitry Andric std::vector<uint64_t> run() {
1048*5f757f3fSDimitry Andric // Merge pairs of chains while improving the objective.
1049*5f757f3fSDimitry Andric mergeChainPairs();
1050*5f757f3fSDimitry Andric
1051*5f757f3fSDimitry Andric // Collect nodes from all the chains.
1052*5f757f3fSDimitry Andric return concatChains();
1053*5f757f3fSDimitry Andric }
1054*5f757f3fSDimitry Andric
1055*5f757f3fSDimitry Andric private:
1056*5f757f3fSDimitry Andric /// Initialize the algorithm's data structures.
initialize(const ArrayRef<uint64_t> & NodeSizes,const ArrayRef<uint64_t> & NodeCounts,const ArrayRef<EdgeCount> & EdgeCounts,const ArrayRef<uint64_t> & EdgeOffsets)1057*5f757f3fSDimitry Andric void initialize(const ArrayRef<uint64_t> &NodeSizes,
1058*5f757f3fSDimitry Andric const ArrayRef<uint64_t> &NodeCounts,
1059*5f757f3fSDimitry Andric const ArrayRef<EdgeCount> &EdgeCounts,
1060*5f757f3fSDimitry Andric const ArrayRef<uint64_t> &EdgeOffsets) {
1061*5f757f3fSDimitry Andric // Initialize nodes.
1062*5f757f3fSDimitry Andric AllNodes.reserve(NumNodes);
1063*5f757f3fSDimitry Andric for (uint64_t Node = 0; Node < NumNodes; Node++) {
1064*5f757f3fSDimitry Andric uint64_t Size = std::max<uint64_t>(NodeSizes[Node], 1ULL);
1065*5f757f3fSDimitry Andric uint64_t ExecutionCount = NodeCounts[Node];
1066*5f757f3fSDimitry Andric AllNodes.emplace_back(Node, Size, ExecutionCount);
1067*5f757f3fSDimitry Andric TotalSamples += ExecutionCount;
1068*5f757f3fSDimitry Andric if (ExecutionCount > 0)
1069*5f757f3fSDimitry Andric TotalSize += Size;
1070*5f757f3fSDimitry Andric }
1071*5f757f3fSDimitry Andric
1072*5f757f3fSDimitry Andric // Initialize jumps between the nodes.
1073*5f757f3fSDimitry Andric SuccNodes.resize(NumNodes);
1074*5f757f3fSDimitry Andric PredNodes.resize(NumNodes);
1075*5f757f3fSDimitry Andric AllJumps.reserve(EdgeCounts.size());
1076*5f757f3fSDimitry Andric for (size_t I = 0; I < EdgeCounts.size(); I++) {
1077*5f757f3fSDimitry Andric auto [Pred, Succ, Count] = EdgeCounts[I];
1078*5f757f3fSDimitry Andric // Ignore recursive calls.
1079*5f757f3fSDimitry Andric if (Pred == Succ)
1080*5f757f3fSDimitry Andric continue;
1081*5f757f3fSDimitry Andric
1082*5f757f3fSDimitry Andric SuccNodes[Pred].push_back(Succ);
1083*5f757f3fSDimitry Andric PredNodes[Succ].push_back(Pred);
1084*5f757f3fSDimitry Andric if (Count > 0) {
1085*5f757f3fSDimitry Andric NodeT &PredNode = AllNodes[Pred];
1086*5f757f3fSDimitry Andric NodeT &SuccNode = AllNodes[Succ];
1087*5f757f3fSDimitry Andric AllJumps.emplace_back(&PredNode, &SuccNode, Count);
1088*5f757f3fSDimitry Andric AllJumps.back().Offset = EdgeOffsets[I];
1089*5f757f3fSDimitry Andric SuccNode.InJumps.push_back(&AllJumps.back());
1090*5f757f3fSDimitry Andric PredNode.OutJumps.push_back(&AllJumps.back());
1091*5f757f3fSDimitry Andric // Adjust execution counts.
1092*5f757f3fSDimitry Andric PredNode.ExecutionCount = std::max(PredNode.ExecutionCount, Count);
1093*5f757f3fSDimitry Andric SuccNode.ExecutionCount = std::max(SuccNode.ExecutionCount, Count);
1094*5f757f3fSDimitry Andric }
1095*5f757f3fSDimitry Andric }
1096*5f757f3fSDimitry Andric
1097*5f757f3fSDimitry Andric // Initialize chains.
1098*5f757f3fSDimitry Andric AllChains.reserve(NumNodes);
1099*5f757f3fSDimitry Andric for (NodeT &Node : AllNodes) {
1100*5f757f3fSDimitry Andric // Adjust execution counts.
1101*5f757f3fSDimitry Andric Node.ExecutionCount = std::max(Node.ExecutionCount, Node.inCount());
1102*5f757f3fSDimitry Andric Node.ExecutionCount = std::max(Node.ExecutionCount, Node.outCount());
1103*5f757f3fSDimitry Andric // Create chain.
1104*5f757f3fSDimitry Andric AllChains.emplace_back(Node.Index, &Node);
1105*5f757f3fSDimitry Andric Node.CurChain = &AllChains.back();
1106*5f757f3fSDimitry Andric }
1107*5f757f3fSDimitry Andric
1108*5f757f3fSDimitry Andric // Initialize chain edges.
1109*5f757f3fSDimitry Andric AllEdges.reserve(AllJumps.size());
1110*5f757f3fSDimitry Andric for (NodeT &PredNode : AllNodes) {
1111*5f757f3fSDimitry Andric for (JumpT *Jump : PredNode.OutJumps) {
1112*5f757f3fSDimitry Andric NodeT *SuccNode = Jump->Target;
1113*5f757f3fSDimitry Andric ChainEdge *CurEdge = PredNode.CurChain->getEdge(SuccNode->CurChain);
1114*5f757f3fSDimitry Andric // This edge is already present in the graph.
1115*5f757f3fSDimitry Andric if (CurEdge != nullptr) {
1116*5f757f3fSDimitry Andric assert(SuccNode->CurChain->getEdge(PredNode.CurChain) != nullptr);
1117*5f757f3fSDimitry Andric CurEdge->appendJump(Jump);
1118*5f757f3fSDimitry Andric continue;
1119*5f757f3fSDimitry Andric }
1120*5f757f3fSDimitry Andric // This is a new edge.
1121*5f757f3fSDimitry Andric AllEdges.emplace_back(Jump);
1122*5f757f3fSDimitry Andric PredNode.CurChain->addEdge(SuccNode->CurChain, &AllEdges.back());
1123*5f757f3fSDimitry Andric SuccNode->CurChain->addEdge(PredNode.CurChain, &AllEdges.back());
1124*5f757f3fSDimitry Andric }
1125*5f757f3fSDimitry Andric }
1126*5f757f3fSDimitry Andric }
1127*5f757f3fSDimitry Andric
1128*5f757f3fSDimitry Andric /// Merge pairs of chains while there is an improvement in the objective.
mergeChainPairs()1129*5f757f3fSDimitry Andric void mergeChainPairs() {
1130*5f757f3fSDimitry Andric // Create a priority queue containing all edges ordered by the merge gain.
1131*5f757f3fSDimitry Andric auto GainComparator = [](ChainEdge *L, ChainEdge *R) {
1132*5f757f3fSDimitry Andric return std::make_tuple(-L->gain(), L->srcChain()->Id, L->dstChain()->Id) <
1133*5f757f3fSDimitry Andric std::make_tuple(-R->gain(), R->srcChain()->Id, R->dstChain()->Id);
1134*5f757f3fSDimitry Andric };
1135*5f757f3fSDimitry Andric std::set<ChainEdge *, decltype(GainComparator)> Queue(GainComparator);
1136*5f757f3fSDimitry Andric
1137*5f757f3fSDimitry Andric // Insert the edges into the queue.
1138*5f757f3fSDimitry Andric [[maybe_unused]] size_t NumActiveChains = 0;
1139*5f757f3fSDimitry Andric for (NodeT &Node : AllNodes) {
1140*5f757f3fSDimitry Andric if (Node.ExecutionCount == 0)
1141*5f757f3fSDimitry Andric continue;
1142*5f757f3fSDimitry Andric ++NumActiveChains;
1143*5f757f3fSDimitry Andric for (const auto &[_, Edge] : Node.CurChain->Edges) {
1144*5f757f3fSDimitry Andric // Ignore self-edges.
1145*5f757f3fSDimitry Andric if (Edge->isSelfEdge())
1146*5f757f3fSDimitry Andric continue;
1147*5f757f3fSDimitry Andric // Ignore already processed edges.
1148*5f757f3fSDimitry Andric if (Edge->gain() != -1.0)
1149*5f757f3fSDimitry Andric continue;
1150*5f757f3fSDimitry Andric
1151*5f757f3fSDimitry Andric // Compute the gain of merging the two chains.
1152*5f757f3fSDimitry Andric MergeGainT Gain = getBestMergeGain(Edge);
1153*5f757f3fSDimitry Andric Edge->setMergeGain(Gain);
1154*5f757f3fSDimitry Andric
1155*5f757f3fSDimitry Andric if (Edge->gain() > EPS)
1156*5f757f3fSDimitry Andric Queue.insert(Edge);
1157*5f757f3fSDimitry Andric }
1158*5f757f3fSDimitry Andric }
1159*5f757f3fSDimitry Andric
1160*5f757f3fSDimitry Andric // Merge the chains while the gain of merging is positive.
1161*5f757f3fSDimitry Andric while (!Queue.empty()) {
1162*5f757f3fSDimitry Andric // Extract the best (top) edge for merging.
1163*5f757f3fSDimitry Andric ChainEdge *BestEdge = *Queue.begin();
1164*5f757f3fSDimitry Andric Queue.erase(Queue.begin());
1165*5f757f3fSDimitry Andric ChainT *BestSrcChain = BestEdge->srcChain();
1166*5f757f3fSDimitry Andric ChainT *BestDstChain = BestEdge->dstChain();
1167*5f757f3fSDimitry Andric
1168*5f757f3fSDimitry Andric // Remove outdated edges from the queue.
1169*5f757f3fSDimitry Andric for (const auto &[_, ChainEdge] : BestSrcChain->Edges)
1170*5f757f3fSDimitry Andric Queue.erase(ChainEdge);
1171*5f757f3fSDimitry Andric for (const auto &[_, ChainEdge] : BestDstChain->Edges)
1172*5f757f3fSDimitry Andric Queue.erase(ChainEdge);
1173*5f757f3fSDimitry Andric
1174*5f757f3fSDimitry Andric // Merge the best pair of chains.
1175*5f757f3fSDimitry Andric MergeGainT BestGain = BestEdge->getMergeGain();
1176*5f757f3fSDimitry Andric mergeChains(BestSrcChain, BestDstChain, BestGain.mergeOffset(),
1177*5f757f3fSDimitry Andric BestGain.mergeType());
1178*5f757f3fSDimitry Andric --NumActiveChains;
1179*5f757f3fSDimitry Andric
1180*5f757f3fSDimitry Andric // Insert newly created edges into the queue.
1181*5f757f3fSDimitry Andric for (const auto &[_, Edge] : BestSrcChain->Edges) {
1182*5f757f3fSDimitry Andric // Ignore loop edges.
1183*5f757f3fSDimitry Andric if (Edge->isSelfEdge())
1184*5f757f3fSDimitry Andric continue;
1185*5f757f3fSDimitry Andric if (Edge->srcChain()->numBlocks() + Edge->dstChain()->numBlocks() >
1186*5f757f3fSDimitry Andric Config.MaxChainSize)
1187*5f757f3fSDimitry Andric continue;
1188*5f757f3fSDimitry Andric
1189*5f757f3fSDimitry Andric // Compute the gain of merging the two chains.
1190*5f757f3fSDimitry Andric MergeGainT Gain = getBestMergeGain(Edge);
1191*5f757f3fSDimitry Andric Edge->setMergeGain(Gain);
1192*5f757f3fSDimitry Andric
1193*5f757f3fSDimitry Andric if (Edge->gain() > EPS)
1194*5f757f3fSDimitry Andric Queue.insert(Edge);
1195*5f757f3fSDimitry Andric }
1196*5f757f3fSDimitry Andric }
1197*5f757f3fSDimitry Andric
1198*5f757f3fSDimitry Andric LLVM_DEBUG(dbgs() << "Cache-directed function sorting reduced the number"
1199*5f757f3fSDimitry Andric << " of chains from " << NumNodes << " to "
1200*5f757f3fSDimitry Andric << NumActiveChains << "\n");
1201*5f757f3fSDimitry Andric }
1202*5f757f3fSDimitry Andric
1203*5f757f3fSDimitry Andric /// Compute the gain of merging two chains.
1204*5f757f3fSDimitry Andric ///
1205*5f757f3fSDimitry Andric /// The function considers all possible ways of merging two chains and
1206*5f757f3fSDimitry Andric /// computes the one having the largest increase in ExtTSP objective. The
1207*5f757f3fSDimitry Andric /// result is a pair with the first element being the gain and the second
1208*5f757f3fSDimitry Andric /// element being the corresponding merging type.
getBestMergeGain(ChainEdge * Edge) const1209*5f757f3fSDimitry Andric MergeGainT getBestMergeGain(ChainEdge *Edge) const {
1210*5f757f3fSDimitry Andric assert(!Edge->jumps().empty() && "trying to merge chains w/o jumps");
1211*5f757f3fSDimitry Andric // Precompute jumps between ChainPred and ChainSucc.
1212*5f757f3fSDimitry Andric MergedJumpsT Jumps(&Edge->jumps());
1213*5f757f3fSDimitry Andric ChainT *SrcChain = Edge->srcChain();
1214*5f757f3fSDimitry Andric ChainT *DstChain = Edge->dstChain();
1215*5f757f3fSDimitry Andric
1216*5f757f3fSDimitry Andric // This object holds the best currently chosen gain of merging two chains.
1217*5f757f3fSDimitry Andric MergeGainT Gain = MergeGainT();
1218*5f757f3fSDimitry Andric
1219*5f757f3fSDimitry Andric /// Given a list of merge types, try to merge two chains and update Gain
1220*5f757f3fSDimitry Andric /// with a better alternative.
1221*5f757f3fSDimitry Andric auto tryChainMerging = [&](const std::vector<MergeTypeT> &MergeTypes) {
1222*5f757f3fSDimitry Andric // Apply the merge, compute the corresponding gain, and update the best
1223*5f757f3fSDimitry Andric // value, if the merge is beneficial.
1224*5f757f3fSDimitry Andric for (const MergeTypeT &MergeType : MergeTypes) {
1225*5f757f3fSDimitry Andric MergeGainT NewGain =
1226*5f757f3fSDimitry Andric computeMergeGain(SrcChain, DstChain, Jumps, MergeType);
1227*5f757f3fSDimitry Andric
1228*5f757f3fSDimitry Andric // When forward and backward gains are the same, prioritize merging that
1229*5f757f3fSDimitry Andric // preserves the original order of the functions in the binary.
1230*5f757f3fSDimitry Andric if (std::abs(Gain.score() - NewGain.score()) < EPS) {
1231*5f757f3fSDimitry Andric if ((MergeType == MergeTypeT::X_Y && SrcChain->Id < DstChain->Id) ||
1232*5f757f3fSDimitry Andric (MergeType == MergeTypeT::Y_X && SrcChain->Id > DstChain->Id)) {
1233*5f757f3fSDimitry Andric Gain = NewGain;
1234*5f757f3fSDimitry Andric }
1235*5f757f3fSDimitry Andric } else if (NewGain.score() > Gain.score() + EPS) {
1236*5f757f3fSDimitry Andric Gain = NewGain;
1237*5f757f3fSDimitry Andric }
1238*5f757f3fSDimitry Andric }
1239*5f757f3fSDimitry Andric };
1240*5f757f3fSDimitry Andric
1241*5f757f3fSDimitry Andric // Try to concatenate two chains w/o splitting.
1242*5f757f3fSDimitry Andric tryChainMerging({MergeTypeT::X_Y, MergeTypeT::Y_X});
1243*5f757f3fSDimitry Andric
1244*5f757f3fSDimitry Andric return Gain;
1245*5f757f3fSDimitry Andric }
1246*5f757f3fSDimitry Andric
1247*5f757f3fSDimitry Andric /// Compute the score gain of merging two chains, respecting a given type.
1248*5f757f3fSDimitry Andric ///
1249*5f757f3fSDimitry Andric /// The two chains are not modified in the method.
computeMergeGain(ChainT * ChainPred,ChainT * ChainSucc,const MergedJumpsT & Jumps,MergeTypeT MergeType) const1250*5f757f3fSDimitry Andric MergeGainT computeMergeGain(ChainT *ChainPred, ChainT *ChainSucc,
1251*5f757f3fSDimitry Andric const MergedJumpsT &Jumps,
1252*5f757f3fSDimitry Andric MergeTypeT MergeType) const {
1253*5f757f3fSDimitry Andric // This doesn't depend on the ordering of the nodes
1254*5f757f3fSDimitry Andric double FreqGain = freqBasedLocalityGain(ChainPred, ChainSucc);
1255*5f757f3fSDimitry Andric
1256*5f757f3fSDimitry Andric // Merge offset is always 0, as the chains are not split.
1257*5f757f3fSDimitry Andric size_t MergeOffset = 0;
1258*5f757f3fSDimitry Andric auto MergedBlocks =
1259*5f757f3fSDimitry Andric mergeNodes(ChainPred->Nodes, ChainSucc->Nodes, MergeOffset, MergeType);
1260*5f757f3fSDimitry Andric double DistGain = distBasedLocalityGain(MergedBlocks, Jumps);
1261*5f757f3fSDimitry Andric
1262*5f757f3fSDimitry Andric double GainScore = DistGain + Config.FrequencyScale * FreqGain;
1263*5f757f3fSDimitry Andric // Scale the result to increase the importance of merging short chains.
1264*5f757f3fSDimitry Andric if (GainScore >= 0.0)
1265*5f757f3fSDimitry Andric GainScore /= std::min(ChainPred->Size, ChainSucc->Size);
1266*5f757f3fSDimitry Andric
1267*5f757f3fSDimitry Andric return MergeGainT(GainScore, MergeOffset, MergeType);
1268*5f757f3fSDimitry Andric }
1269*5f757f3fSDimitry Andric
1270*5f757f3fSDimitry Andric /// Compute the change of the frequency locality after merging the chains.
freqBasedLocalityGain(ChainT * ChainPred,ChainT * ChainSucc) const1271*5f757f3fSDimitry Andric double freqBasedLocalityGain(ChainT *ChainPred, ChainT *ChainSucc) const {
1272*5f757f3fSDimitry Andric auto missProbability = [&](double ChainDensity) {
1273*5f757f3fSDimitry Andric double PageSamples = ChainDensity * Config.CacheSize;
1274*5f757f3fSDimitry Andric if (PageSamples >= TotalSamples)
1275*5f757f3fSDimitry Andric return 0.0;
1276*5f757f3fSDimitry Andric double P = PageSamples / TotalSamples;
1277*5f757f3fSDimitry Andric return pow(1.0 - P, static_cast<double>(Config.CacheEntries));
1278*5f757f3fSDimitry Andric };
1279*5f757f3fSDimitry Andric
1280*5f757f3fSDimitry Andric // Cache misses on the chains before merging.
1281*5f757f3fSDimitry Andric double CurScore =
1282*5f757f3fSDimitry Andric ChainPred->ExecutionCount * missProbability(ChainPred->density()) +
1283*5f757f3fSDimitry Andric ChainSucc->ExecutionCount * missProbability(ChainSucc->density());
1284*5f757f3fSDimitry Andric
1285*5f757f3fSDimitry Andric // Cache misses on the merged chain
1286*5f757f3fSDimitry Andric double MergedCounts = ChainPred->ExecutionCount + ChainSucc->ExecutionCount;
1287*5f757f3fSDimitry Andric double MergedSize = ChainPred->Size + ChainSucc->Size;
1288*5f757f3fSDimitry Andric double MergedDensity = static_cast<double>(MergedCounts) / MergedSize;
1289*5f757f3fSDimitry Andric double NewScore = MergedCounts * missProbability(MergedDensity);
1290*5f757f3fSDimitry Andric
1291*5f757f3fSDimitry Andric return CurScore - NewScore;
1292*5f757f3fSDimitry Andric }
1293*5f757f3fSDimitry Andric
1294*5f757f3fSDimitry Andric /// Compute the distance locality for a jump / call.
distScore(uint64_t SrcAddr,uint64_t DstAddr,uint64_t Count) const1295*5f757f3fSDimitry Andric double distScore(uint64_t SrcAddr, uint64_t DstAddr, uint64_t Count) const {
1296*5f757f3fSDimitry Andric uint64_t Dist = SrcAddr <= DstAddr ? DstAddr - SrcAddr : SrcAddr - DstAddr;
1297*5f757f3fSDimitry Andric double D = Dist == 0 ? 0.1 : static_cast<double>(Dist);
1298*5f757f3fSDimitry Andric return static_cast<double>(Count) * std::pow(D, -Config.DistancePower);
1299*5f757f3fSDimitry Andric }
1300*5f757f3fSDimitry Andric
1301*5f757f3fSDimitry Andric /// Compute the change of the distance locality after merging the chains.
distBasedLocalityGain(const MergedNodesT & Nodes,const MergedJumpsT & Jumps) const1302*5f757f3fSDimitry Andric double distBasedLocalityGain(const MergedNodesT &Nodes,
1303*5f757f3fSDimitry Andric const MergedJumpsT &Jumps) const {
1304*5f757f3fSDimitry Andric uint64_t CurAddr = 0;
1305*5f757f3fSDimitry Andric Nodes.forEach([&](const NodeT *Node) {
1306*5f757f3fSDimitry Andric Node->EstimatedAddr = CurAddr;
1307*5f757f3fSDimitry Andric CurAddr += Node->Size;
1308*5f757f3fSDimitry Andric });
1309*5f757f3fSDimitry Andric
1310*5f757f3fSDimitry Andric double CurScore = 0;
1311*5f757f3fSDimitry Andric double NewScore = 0;
1312*5f757f3fSDimitry Andric Jumps.forEach([&](const JumpT *Jump) {
1313*5f757f3fSDimitry Andric uint64_t SrcAddr = Jump->Source->EstimatedAddr + Jump->Offset;
1314*5f757f3fSDimitry Andric uint64_t DstAddr = Jump->Target->EstimatedAddr;
1315*5f757f3fSDimitry Andric NewScore += distScore(SrcAddr, DstAddr, Jump->ExecutionCount);
1316*5f757f3fSDimitry Andric CurScore += distScore(0, TotalSize, Jump->ExecutionCount);
1317*5f757f3fSDimitry Andric });
1318*5f757f3fSDimitry Andric return NewScore - CurScore;
1319*5f757f3fSDimitry Andric }
1320*5f757f3fSDimitry Andric
1321*5f757f3fSDimitry Andric /// Merge chain From into chain Into, update the list of active chains,
1322*5f757f3fSDimitry Andric /// adjacency information, and the corresponding cached values.
mergeChains(ChainT * Into,ChainT * From,size_t MergeOffset,MergeTypeT MergeType)1323*5f757f3fSDimitry Andric void mergeChains(ChainT *Into, ChainT *From, size_t MergeOffset,
1324*5f757f3fSDimitry Andric MergeTypeT MergeType) {
1325*5f757f3fSDimitry Andric assert(Into != From && "a chain cannot be merged with itself");
1326*5f757f3fSDimitry Andric
1327*5f757f3fSDimitry Andric // Merge the nodes.
1328*5f757f3fSDimitry Andric MergedNodesT MergedNodes =
1329*5f757f3fSDimitry Andric mergeNodes(Into->Nodes, From->Nodes, MergeOffset, MergeType);
1330*5f757f3fSDimitry Andric Into->merge(From, MergedNodes.getNodes());
1331*5f757f3fSDimitry Andric
1332*5f757f3fSDimitry Andric // Merge the edges.
1333*5f757f3fSDimitry Andric Into->mergeEdges(From);
1334*5f757f3fSDimitry Andric From->clear();
1335*5f757f3fSDimitry Andric }
1336*5f757f3fSDimitry Andric
1337*5f757f3fSDimitry Andric /// Concatenate all chains into the final order.
concatChains()1338*5f757f3fSDimitry Andric std::vector<uint64_t> concatChains() {
1339*5f757f3fSDimitry Andric // Collect chains and calculate density stats for their sorting.
1340*5f757f3fSDimitry Andric std::vector<const ChainT *> SortedChains;
1341*5f757f3fSDimitry Andric DenseMap<const ChainT *, double> ChainDensity;
1342*5f757f3fSDimitry Andric for (ChainT &Chain : AllChains) {
1343*5f757f3fSDimitry Andric if (!Chain.Nodes.empty()) {
1344*5f757f3fSDimitry Andric SortedChains.push_back(&Chain);
1345*5f757f3fSDimitry Andric // Using doubles to avoid overflow of ExecutionCounts.
1346*5f757f3fSDimitry Andric double Size = 0;
1347*5f757f3fSDimitry Andric double ExecutionCount = 0;
1348*5f757f3fSDimitry Andric for (NodeT *Node : Chain.Nodes) {
1349*5f757f3fSDimitry Andric Size += static_cast<double>(Node->Size);
1350*5f757f3fSDimitry Andric ExecutionCount += static_cast<double>(Node->ExecutionCount);
1351*5f757f3fSDimitry Andric }
1352*5f757f3fSDimitry Andric assert(Size > 0 && "a chain of zero size");
1353*5f757f3fSDimitry Andric ChainDensity[&Chain] = ExecutionCount / Size;
1354*5f757f3fSDimitry Andric }
1355*5f757f3fSDimitry Andric }
1356*5f757f3fSDimitry Andric
1357*5f757f3fSDimitry Andric // Sort chains by density in the decreasing order.
1358*5f757f3fSDimitry Andric std::sort(SortedChains.begin(), SortedChains.end(),
1359*5f757f3fSDimitry Andric [&](const ChainT *L, const ChainT *R) {
1360*5f757f3fSDimitry Andric const double DL = ChainDensity[L];
1361*5f757f3fSDimitry Andric const double DR = ChainDensity[R];
1362*5f757f3fSDimitry Andric // Compare by density and break ties by chain identifiers.
1363*5f757f3fSDimitry Andric return std::make_tuple(-DL, L->Id) <
1364*5f757f3fSDimitry Andric std::make_tuple(-DR, R->Id);
1365*5f757f3fSDimitry Andric });
1366*5f757f3fSDimitry Andric
1367*5f757f3fSDimitry Andric // Collect the nodes in the order specified by their chains.
1368*5f757f3fSDimitry Andric std::vector<uint64_t> Order;
1369*5f757f3fSDimitry Andric Order.reserve(NumNodes);
1370*5f757f3fSDimitry Andric for (const ChainT *Chain : SortedChains)
1371*5f757f3fSDimitry Andric for (NodeT *Node : Chain->Nodes)
1372*5f757f3fSDimitry Andric Order.push_back(Node->Index);
1373*5f757f3fSDimitry Andric return Order;
1374*5f757f3fSDimitry Andric }
1375*5f757f3fSDimitry Andric
1376*5f757f3fSDimitry Andric private:
1377*5f757f3fSDimitry Andric /// Config for the algorithm.
1378*5f757f3fSDimitry Andric const CDSortConfig Config;
1379*5f757f3fSDimitry Andric
1380*5f757f3fSDimitry Andric /// The number of nodes in the graph.
1381*5f757f3fSDimitry Andric const size_t NumNodes;
1382*5f757f3fSDimitry Andric
1383*5f757f3fSDimitry Andric /// Successors of each node.
1384*5f757f3fSDimitry Andric std::vector<std::vector<uint64_t>> SuccNodes;
1385*5f757f3fSDimitry Andric
1386*5f757f3fSDimitry Andric /// Predecessors of each node.
1387*5f757f3fSDimitry Andric std::vector<std::vector<uint64_t>> PredNodes;
1388*5f757f3fSDimitry Andric
1389*5f757f3fSDimitry Andric /// All nodes (functions) in the graph.
1390*5f757f3fSDimitry Andric std::vector<NodeT> AllNodes;
1391*5f757f3fSDimitry Andric
1392*5f757f3fSDimitry Andric /// All jumps (function calls) between the nodes.
1393*5f757f3fSDimitry Andric std::vector<JumpT> AllJumps;
1394*5f757f3fSDimitry Andric
1395*5f757f3fSDimitry Andric /// All chains of nodes.
1396*5f757f3fSDimitry Andric std::vector<ChainT> AllChains;
1397*5f757f3fSDimitry Andric
1398*5f757f3fSDimitry Andric /// All edges between the chains.
1399*5f757f3fSDimitry Andric std::vector<ChainEdge> AllEdges;
1400*5f757f3fSDimitry Andric
1401*5f757f3fSDimitry Andric /// The total number of samples in the graph.
1402*5f757f3fSDimitry Andric uint64_t TotalSamples{0};
1403*5f757f3fSDimitry Andric
1404*5f757f3fSDimitry Andric /// The total size of the nodes in the graph.
1405*5f757f3fSDimitry Andric uint64_t TotalSize{0};
1406*5f757f3fSDimitry Andric };
1407*5f757f3fSDimitry Andric
14080eae32dcSDimitry Andric } // end of anonymous namespace
14090eae32dcSDimitry Andric
141006c3fb27SDimitry Andric std::vector<uint64_t>
computeExtTspLayout(ArrayRef<uint64_t> NodeSizes,ArrayRef<uint64_t> NodeCounts,ArrayRef<EdgeCount> EdgeCounts)1411*5f757f3fSDimitry Andric codelayout::computeExtTspLayout(ArrayRef<uint64_t> NodeSizes,
1412*5f757f3fSDimitry Andric ArrayRef<uint64_t> NodeCounts,
1413*5f757f3fSDimitry Andric ArrayRef<EdgeCount> EdgeCounts) {
1414*5f757f3fSDimitry Andric // Verify correctness of the input data.
14150eae32dcSDimitry Andric assert(NodeCounts.size() == NodeSizes.size() && "Incorrect input");
141606c3fb27SDimitry Andric assert(NodeSizes.size() > 2 && "Incorrect input");
14170eae32dcSDimitry Andric
1418*5f757f3fSDimitry Andric // Apply the reordering algorithm.
141906c3fb27SDimitry Andric ExtTSPImpl Alg(NodeSizes, NodeCounts, EdgeCounts);
1420*5f757f3fSDimitry Andric std::vector<uint64_t> Result = Alg.run();
14210eae32dcSDimitry Andric
1422*5f757f3fSDimitry Andric // Verify correctness of the output.
14230eae32dcSDimitry Andric assert(Result.front() == 0 && "Original entry point is not preserved");
142406c3fb27SDimitry Andric assert(Result.size() == NodeSizes.size() && "Incorrect size of layout");
14250eae32dcSDimitry Andric return Result;
14260eae32dcSDimitry Andric }
14270eae32dcSDimitry Andric
calcExtTspScore(ArrayRef<uint64_t> Order,ArrayRef<uint64_t> NodeSizes,ArrayRef<uint64_t> NodeCounts,ArrayRef<EdgeCount> EdgeCounts)1428*5f757f3fSDimitry Andric double codelayout::calcExtTspScore(ArrayRef<uint64_t> Order,
1429*5f757f3fSDimitry Andric ArrayRef<uint64_t> NodeSizes,
1430*5f757f3fSDimitry Andric ArrayRef<uint64_t> NodeCounts,
1431*5f757f3fSDimitry Andric ArrayRef<EdgeCount> EdgeCounts) {
1432*5f757f3fSDimitry Andric // Estimate addresses of the blocks in memory.
1433bdd1243dSDimitry Andric std::vector<uint64_t> Addr(NodeSizes.size(), 0);
14340eae32dcSDimitry Andric for (size_t Idx = 1; Idx < Order.size(); Idx++) {
14350eae32dcSDimitry Andric Addr[Order[Idx]] = Addr[Order[Idx - 1]] + NodeSizes[Order[Idx - 1]];
14360eae32dcSDimitry Andric }
1437bdd1243dSDimitry Andric std::vector<uint64_t> OutDegree(NodeSizes.size(), 0);
1438*5f757f3fSDimitry Andric for (auto Edge : EdgeCounts)
1439*5f757f3fSDimitry Andric ++OutDegree[Edge.src];
14400eae32dcSDimitry Andric
1441*5f757f3fSDimitry Andric // Increase the score for each jump.
14420eae32dcSDimitry Andric double Score = 0;
1443*5f757f3fSDimitry Andric for (auto Edge : EdgeCounts) {
1444*5f757f3fSDimitry Andric bool IsConditional = OutDegree[Edge.src] > 1;
1445*5f757f3fSDimitry Andric Score += ::extTSPScore(Addr[Edge.src], NodeSizes[Edge.src], Addr[Edge.dst],
1446*5f757f3fSDimitry Andric Edge.count, IsConditional);
14470eae32dcSDimitry Andric }
14480eae32dcSDimitry Andric return Score;
14490eae32dcSDimitry Andric }
14500eae32dcSDimitry Andric
calcExtTspScore(ArrayRef<uint64_t> NodeSizes,ArrayRef<uint64_t> NodeCounts,ArrayRef<EdgeCount> EdgeCounts)1451*5f757f3fSDimitry Andric double codelayout::calcExtTspScore(ArrayRef<uint64_t> NodeSizes,
1452*5f757f3fSDimitry Andric ArrayRef<uint64_t> NodeCounts,
1453*5f757f3fSDimitry Andric ArrayRef<EdgeCount> EdgeCounts) {
1454bdd1243dSDimitry Andric std::vector<uint64_t> Order(NodeSizes.size());
14550eae32dcSDimitry Andric for (size_t Idx = 0; Idx < NodeSizes.size(); Idx++) {
14560eae32dcSDimitry Andric Order[Idx] = Idx;
14570eae32dcSDimitry Andric }
14580eae32dcSDimitry Andric return calcExtTspScore(Order, NodeSizes, NodeCounts, EdgeCounts);
14590eae32dcSDimitry Andric }
1460*5f757f3fSDimitry Andric
computeCacheDirectedLayout(const CDSortConfig & Config,ArrayRef<uint64_t> FuncSizes,ArrayRef<uint64_t> FuncCounts,ArrayRef<EdgeCount> CallCounts,ArrayRef<uint64_t> CallOffsets)1461*5f757f3fSDimitry Andric std::vector<uint64_t> codelayout::computeCacheDirectedLayout(
1462*5f757f3fSDimitry Andric const CDSortConfig &Config, ArrayRef<uint64_t> FuncSizes,
1463*5f757f3fSDimitry Andric ArrayRef<uint64_t> FuncCounts, ArrayRef<EdgeCount> CallCounts,
1464*5f757f3fSDimitry Andric ArrayRef<uint64_t> CallOffsets) {
1465*5f757f3fSDimitry Andric // Verify correctness of the input data.
1466*5f757f3fSDimitry Andric assert(FuncCounts.size() == FuncSizes.size() && "Incorrect input");
1467*5f757f3fSDimitry Andric
1468*5f757f3fSDimitry Andric // Apply the reordering algorithm.
1469*5f757f3fSDimitry Andric CDSortImpl Alg(Config, FuncSizes, FuncCounts, CallCounts, CallOffsets);
1470*5f757f3fSDimitry Andric std::vector<uint64_t> Result = Alg.run();
1471*5f757f3fSDimitry Andric assert(Result.size() == FuncSizes.size() && "Incorrect size of layout");
1472*5f757f3fSDimitry Andric return Result;
1473*5f757f3fSDimitry Andric }
1474*5f757f3fSDimitry Andric
computeCacheDirectedLayout(ArrayRef<uint64_t> FuncSizes,ArrayRef<uint64_t> FuncCounts,ArrayRef<EdgeCount> CallCounts,ArrayRef<uint64_t> CallOffsets)1475*5f757f3fSDimitry Andric std::vector<uint64_t> codelayout::computeCacheDirectedLayout(
1476*5f757f3fSDimitry Andric ArrayRef<uint64_t> FuncSizes, ArrayRef<uint64_t> FuncCounts,
1477*5f757f3fSDimitry Andric ArrayRef<EdgeCount> CallCounts, ArrayRef<uint64_t> CallOffsets) {
1478*5f757f3fSDimitry Andric CDSortConfig Config;
1479*5f757f3fSDimitry Andric // Populate the config from the command-line options.
1480*5f757f3fSDimitry Andric if (CacheEntries.getNumOccurrences() > 0)
1481*5f757f3fSDimitry Andric Config.CacheEntries = CacheEntries;
1482*5f757f3fSDimitry Andric if (CacheSize.getNumOccurrences() > 0)
1483*5f757f3fSDimitry Andric Config.CacheSize = CacheSize;
1484*5f757f3fSDimitry Andric if (CDMaxChainSize.getNumOccurrences() > 0)
1485*5f757f3fSDimitry Andric Config.MaxChainSize = CDMaxChainSize;
1486*5f757f3fSDimitry Andric if (DistancePower.getNumOccurrences() > 0)
1487*5f757f3fSDimitry Andric Config.DistancePower = DistancePower;
1488*5f757f3fSDimitry Andric if (FrequencyScale.getNumOccurrences() > 0)
1489*5f757f3fSDimitry Andric Config.FrequencyScale = FrequencyScale;
1490*5f757f3fSDimitry Andric return computeCacheDirectedLayout(Config, FuncSizes, FuncCounts, CallCounts,
1491*5f757f3fSDimitry Andric CallOffsets);
1492*5f757f3fSDimitry Andric }
1493