xref: /llvm-project/bolt/lib/Passes/HFSort.cpp (revision d2c876993625ce9b36bdd7ccc5e0c4cb04f32fb9)
12f09f445SMaksim Panchenko //===- bolt/Passes/HFSort.cpp - Cluster functions by hotness --------------===//
2a34c753fSRafael Auler //
3a34c753fSRafael Auler // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4a34c753fSRafael Auler // See https://llvm.org/LICENSE.txt for license information.
5a34c753fSRafael Auler // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6a34c753fSRafael Auler //
7a34c753fSRafael Auler //===----------------------------------------------------------------------===//
8a34c753fSRafael Auler //
9a73b1b72SMaksim Panchenko // Implementation of HFSort algorithm for function ordering:
10a73b1b72SMaksim Panchenko // https://research.fb.com/wp-content/uploads/2017/01/cgo2017-hfsort-final1.pdf
11a73b1b72SMaksim Panchenko //
12a34c753fSRafael Auler //===----------------------------------------------------------------------===//
13a34c753fSRafael Auler 
14a34c753fSRafael Auler #include "bolt/Passes/HFSort.h"
15a34c753fSRafael Auler #include "llvm/Support/CommandLine.h"
16a34c753fSRafael Auler #include "llvm/Support/Debug.h"
17a34c753fSRafael Auler #include "llvm/Support/Format.h"
18a34c753fSRafael Auler #include "llvm/Support/raw_ostream.h"
19a34c753fSRafael Auler #include <unordered_set>
20a34c753fSRafael Auler 
21a34c753fSRafael Auler #define DEBUG_TYPE "hfsort"
22a34c753fSRafael Auler 
23a34c753fSRafael Auler namespace opts {
24a34c753fSRafael Auler extern llvm::cl::opt<unsigned> Verbosity;
25a34c753fSRafael Auler }
26a34c753fSRafael Auler 
27a34c753fSRafael Auler namespace llvm {
28a34c753fSRafael Auler namespace bolt {
29a34c753fSRafael Auler 
30a34c753fSRafael Auler using NodeId = CallGraph::NodeId;
31a34c753fSRafael Auler using Arc = CallGraph::Arc;
32a34c753fSRafael Auler using Node = CallGraph::Node;
33a34c753fSRafael Auler 
34a34c753fSRafael Auler namespace {
35a34c753fSRafael Auler 
36a34c753fSRafael Auler // The number of pages to reserve for the functions with highest
37a34c753fSRafael Auler // density (samples / size).  The functions put in these pages are not
38a34c753fSRafael Auler // considered for clustering.
39a34c753fSRafael Auler constexpr uint32_t FrozenPages = 0;
40a34c753fSRafael Auler 
41a34c753fSRafael Auler // The minimum approximate probability of a callee being called from a
42a34c753fSRafael Auler // particular arc to consider merging with the caller's cluster.
43a34c753fSRafael Auler constexpr double MinArcProbability = 0.1;
44a34c753fSRafael Auler 
45a34c753fSRafael Auler // This is a factor to determine by how much a caller cluster is
46a34c753fSRafael Auler // willing to degrade it's density by merging a callee.
47a34c753fSRafael Auler constexpr int CallerDegradeFactor = 8;
48a34c753fSRafael Auler 
4940c2e0faSMaksim Panchenko } // namespace
50a34c753fSRafael Auler 
51a34c753fSRafael Auler ////////////////////////////////////////////////////////////////////////////////
52a34c753fSRafael Auler 
Cluster(NodeId Id,const Node & Func)53a34c753fSRafael Auler Cluster::Cluster(NodeId Id, const Node &Func)
5440c2e0faSMaksim Panchenko     : Samples(Func.samples()), Size(Func.size()),
55a34c753fSRafael Auler       Density((double)Samples / Size) {
56a34c753fSRafael Auler   Targets.push_back(Id);
57a34c753fSRafael Auler }
58a34c753fSRafael Auler 
Cluster(const std::vector<NodeId> & Nodes,const CallGraph & Cg)59a34c753fSRafael Auler Cluster::Cluster(const std::vector<NodeId> &Nodes, const CallGraph &Cg) {
60a34c753fSRafael Auler   Samples = 0;
61a34c753fSRafael Auler   Size = 0;
62a34c753fSRafael Auler   for (NodeId TargetId : Nodes) {
63a34c753fSRafael Auler     Targets.push_back(TargetId);
64a34c753fSRafael Auler     Samples += Cg.samples(TargetId);
65a34c753fSRafael Auler     Size += Cg.size(TargetId);
66a34c753fSRafael Auler   }
67a34c753fSRafael Auler   Density = (double)Samples / Size;
68a34c753fSRafael Auler }
69a34c753fSRafael Auler 
toString() const70a34c753fSRafael Auler std::string Cluster::toString() const {
71a34c753fSRafael Auler   std::string Str;
72a34c753fSRafael Auler   raw_string_ostream CS(Str);
73a34c753fSRafael Auler   bool PrintComma = false;
74a34c753fSRafael Auler   CS << "funcs = [";
75a34c753fSRafael Auler   for (const NodeId &Target : Targets) {
7640c2e0faSMaksim Panchenko     if (PrintComma)
7740c2e0faSMaksim Panchenko       CS << ", ";
78a34c753fSRafael Auler     CS << Target;
79a34c753fSRafael Auler     PrintComma = true;
80a34c753fSRafael Auler   }
81a34c753fSRafael Auler   CS << "]";
82a34c753fSRafael Auler   return CS.str();
83a34c753fSRafael Auler }
84a34c753fSRafael Auler 
85a34c753fSRafael Auler namespace {
86a34c753fSRafael Auler 
freezeClusters(const CallGraph & Cg,std::vector<Cluster> & Clusters)87a34c753fSRafael Auler void freezeClusters(const CallGraph &Cg, std::vector<Cluster> &Clusters) {
88a34c753fSRafael Auler   uint32_t TotalSize = 0;
89*d2c87699SAmir Ayupov   llvm::sort(Clusters, compareClustersDensity);
90a34c753fSRafael Auler   for (Cluster &C : Clusters) {
91a34c753fSRafael Auler     uint32_t NewSize = TotalSize + C.size();
9240c2e0faSMaksim Panchenko     if (NewSize > FrozenPages * HugePageSize)
9340c2e0faSMaksim Panchenko       break;
94a34c753fSRafael Auler     C.freeze();
95a34c753fSRafael Auler     TotalSize = NewSize;
9640c2e0faSMaksim Panchenko     LLVM_DEBUG(NodeId Fid = C.target(0);
9740c2e0faSMaksim Panchenko                dbgs() << format(
9840c2e0faSMaksim Panchenko                    "freezing cluster for func %d, size = %u, samples = %lu)\n",
99a34c753fSRafael Auler                    Fid, Cg.size(Fid), Cg.samples(Fid)););
100a34c753fSRafael Auler   }
101a34c753fSRafael Auler }
102a34c753fSRafael Auler 
10340c2e0faSMaksim Panchenko } // namespace
104a34c753fSRafael Auler 
reverseTargets()10540c2e0faSMaksim Panchenko void Cluster::reverseTargets() { std::reverse(Targets.begin(), Targets.end()); }
106a34c753fSRafael Auler 
merge(const Cluster & Other,const double Aw)107a34c753fSRafael Auler void Cluster::merge(const Cluster &Other, const double Aw) {
10840c2e0faSMaksim Panchenko   Targets.insert(Targets.end(), Other.Targets.begin(), Other.Targets.end());
109a34c753fSRafael Auler   Size += Other.Size;
110a34c753fSRafael Auler   Samples += Other.Samples;
111a34c753fSRafael Auler   Density = (double)Samples / Size;
112a34c753fSRafael Auler }
113a34c753fSRafael Auler 
merge(const Cluster & Other,const std::vector<CallGraph::NodeId> & Targets_)11440c2e0faSMaksim Panchenko void Cluster::merge(const Cluster &Other,
11540c2e0faSMaksim Panchenko                     const std::vector<CallGraph::NodeId> &Targets_) {
116a34c753fSRafael Auler   Targets = Targets_;
117a34c753fSRafael Auler   Size += Other.Size;
118a34c753fSRafael Auler   Samples += Other.Samples;
119a34c753fSRafael Auler   Density = (double)Samples / Size;
120a34c753fSRafael Auler }
121a34c753fSRafael Auler 
clear()122a34c753fSRafael Auler void Cluster::clear() {
123a34c753fSRafael Auler   Id = -1u;
124a34c753fSRafael Auler   Size = 0;
125a34c753fSRafael Auler   Samples = 0;
126a34c753fSRafael Auler   Density = 0.0;
127a34c753fSRafael Auler   Targets.clear();
128a34c753fSRafael Auler   Frozen = false;
129a34c753fSRafael Auler }
130a34c753fSRafael Auler 
clusterize(const CallGraph & Cg)131a34c753fSRafael Auler std::vector<Cluster> clusterize(const CallGraph &Cg) {
132a34c753fSRafael Auler   std::vector<NodeId> SortedFuncs;
133a34c753fSRafael Auler 
134a34c753fSRafael Auler   // indexed by NodeId, keeps it's current cluster
135a34c753fSRafael Auler   std::vector<Cluster *> FuncCluster(Cg.numNodes(), nullptr);
136a34c753fSRafael Auler   std::vector<Cluster> Clusters;
137a34c753fSRafael Auler   Clusters.reserve(Cg.numNodes());
138a34c753fSRafael Auler 
139a34c753fSRafael Auler   for (NodeId F = 0; F < Cg.numNodes(); F++) {
14040c2e0faSMaksim Panchenko     if (Cg.samples(F) == 0)
14140c2e0faSMaksim Panchenko       continue;
142a34c753fSRafael Auler     Clusters.emplace_back(F, Cg.getNode(F));
143a34c753fSRafael Auler     SortedFuncs.push_back(F);
144a34c753fSRafael Auler   }
145a34c753fSRafael Auler 
146a34c753fSRafael Auler   freezeClusters(Cg, Clusters);
147a34c753fSRafael Auler 
148a34c753fSRafael Auler   // The size and order of Clusters is fixed until we reshuffle it immediately
149a34c753fSRafael Auler   // before returning.
150f92ab6afSAmir Ayupov   for (Cluster &Cluster : Clusters)
151a34c753fSRafael Auler     FuncCluster[Cluster.targets().front()] = &Cluster;
152a34c753fSRafael Auler 
153*d2c87699SAmir Ayupov   llvm::sort(SortedFuncs, [&](const NodeId F1, const NodeId F2) {
154a34c753fSRafael Auler     const CallGraph::Node &Func1 = Cg.getNode(F1);
155a34c753fSRafael Auler     const CallGraph::Node &Func2 = Cg.getNode(F2);
15640c2e0faSMaksim Panchenko     return Func1.samples() * Func2.size() > // TODO: is this correct?
157a34c753fSRafael Auler            Func2.samples() * Func1.size();
15840c2e0faSMaksim Panchenko   });
159a34c753fSRafael Auler 
160a34c753fSRafael Auler   // Process each function, and consider merging its cluster with the
161a34c753fSRafael Auler   // one containing its most likely predecessor.
162a34c753fSRafael Auler   for (const NodeId Fid : SortedFuncs) {
163a34c753fSRafael Auler     Cluster *Cluster = FuncCluster[Fid];
16440c2e0faSMaksim Panchenko     if (Cluster->frozen())
16540c2e0faSMaksim Panchenko       continue;
166a34c753fSRafael Auler 
167a34c753fSRafael Auler     // Find best predecessor.
168a34c753fSRafael Auler     NodeId BestPred = CallGraph::InvalidId;
169a34c753fSRafael Auler     double BestProb = 0;
170a34c753fSRafael Auler 
171a34c753fSRafael Auler     for (const NodeId Src : Cg.predecessors(Fid)) {
172a34c753fSRafael Auler       const Arc &Arc = *Cg.findArc(Src, Fid);
17340c2e0faSMaksim Panchenko       if (BestPred == CallGraph::InvalidId ||
17440c2e0faSMaksim Panchenko           Arc.normalizedWeight() > BestProb) {
175a34c753fSRafael Auler         BestPred = Arc.src();
176a34c753fSRafael Auler         BestProb = Arc.normalizedWeight();
177a34c753fSRafael Auler       }
178a34c753fSRafael Auler     }
179a34c753fSRafael Auler 
180a34c753fSRafael Auler     // Check if the merge is good for the callee.
181a34c753fSRafael Auler     //   Don't merge if the probability of getting to the callee from the
182a34c753fSRafael Auler     //   caller is too low.
18340c2e0faSMaksim Panchenko     if (BestProb < MinArcProbability)
18440c2e0faSMaksim Panchenko       continue;
185a34c753fSRafael Auler 
186a34c753fSRafael Auler     assert(BestPred != CallGraph::InvalidId);
187a34c753fSRafael Auler 
188a34c753fSRafael Auler     class Cluster *PredCluster = FuncCluster[BestPred];
189a34c753fSRafael Auler 
190a34c753fSRafael Auler     // Skip if no predCluster (predecessor w/ no samples), or if same
191a34c753fSRafael Auler     // as cluster, of it's frozen.
192a34c753fSRafael Auler     if (PredCluster == nullptr || PredCluster == Cluster ||
193f92ab6afSAmir Ayupov         PredCluster->frozen())
194a34c753fSRafael Auler       continue;
195a34c753fSRafael Auler 
196a34c753fSRafael Auler     // Skip if merged cluster would be bigger than the threshold.
19740c2e0faSMaksim Panchenko     if (Cluster->size() + PredCluster->size() > MaxClusterSize)
19840c2e0faSMaksim Panchenko       continue;
199a34c753fSRafael Auler 
200a34c753fSRafael Auler     // Check if the merge is good for the caller.
201a34c753fSRafael Auler     //   Don't merge if the caller's density is significantly better
202a34c753fSRafael Auler     //   than the density resulting from the merge.
203a34c753fSRafael Auler     const double NewDensity =
204a34c753fSRafael Auler         ((double)PredCluster->samples() + Cluster->samples()) /
205a34c753fSRafael Auler         (PredCluster->size() + Cluster->size());
206a34c753fSRafael Auler     if (PredCluster->density() > NewDensity * CallerDegradeFactor) {
207a34c753fSRafael Auler       continue;
208a34c753fSRafael Auler     }
209a34c753fSRafael Auler 
21040c2e0faSMaksim Panchenko     LLVM_DEBUG(if (opts::Verbosity > 1) {
211a34c753fSRafael Auler       dbgs() << format("merging %s -> %s: %u\n",
212a34c753fSRafael Auler                        PredCluster->toString().c_str(),
21340c2e0faSMaksim Panchenko                        Cluster->toString().c_str(), Cg.samples(Fid));
214a34c753fSRafael Auler     });
215a34c753fSRafael Auler 
216f92ab6afSAmir Ayupov     for (NodeId F : Cluster->targets())
217a34c753fSRafael Auler       FuncCluster[F] = PredCluster;
218a34c753fSRafael Auler 
219a34c753fSRafael Auler     PredCluster->merge(*Cluster);
220a34c753fSRafael Auler     Cluster->clear();
221a34c753fSRafael Auler   }
222a34c753fSRafael Auler 
223a34c753fSRafael Auler   // Return the set of Clusters that are left, which are the ones that
224a34c753fSRafael Auler   // didn't get merged (so their first func is its original func).
225a34c753fSRafael Auler   std::vector<Cluster> SortedClusters;
226a34c753fSRafael Auler   std::unordered_set<Cluster *> Visited;
227a34c753fSRafael Auler   for (const NodeId Func : SortedFuncs) {
228a34c753fSRafael Auler     Cluster *Cluster = FuncCluster[Func];
229f92ab6afSAmir Ayupov     if (!Cluster || Visited.count(Cluster) == 1 || Cluster->target(0) != Func)
230a34c753fSRafael Auler       continue;
231f92ab6afSAmir Ayupov 
232a34c753fSRafael Auler     SortedClusters.emplace_back(std::move(*Cluster));
233a34c753fSRafael Auler     Visited.insert(Cluster);
234a34c753fSRafael Auler   }
235a34c753fSRafael Auler 
236*d2c87699SAmir Ayupov   llvm::sort(SortedClusters, compareClustersDensity);
237a34c753fSRafael Auler 
238a34c753fSRafael Auler   return SortedClusters;
239a34c753fSRafael Auler }
240a34c753fSRafael Auler 
randomClusters(const CallGraph & Cg)241a34c753fSRafael Auler std::vector<Cluster> randomClusters(const CallGraph &Cg) {
242a34c753fSRafael Auler   std::vector<NodeId> FuncIds(Cg.numNodes(), 0);
243a34c753fSRafael Auler   std::vector<Cluster> Clusters;
244a34c753fSRafael Auler   Clusters.reserve(Cg.numNodes());
245a34c753fSRafael Auler 
246a34c753fSRafael Auler   for (NodeId F = 0; F < Cg.numNodes(); F++) {
24740c2e0faSMaksim Panchenko     if (Cg.samples(F) == 0)
24840c2e0faSMaksim Panchenko       continue;
249a34c753fSRafael Auler     Clusters.emplace_back(F, Cg.getNode(F));
250a34c753fSRafael Auler   }
251a34c753fSRafael Auler 
252*d2c87699SAmir Ayupov   llvm::sort(Clusters, [](const Cluster &A, const Cluster &B) {
253*d2c87699SAmir Ayupov     return A.size() < B.size();
254*d2c87699SAmir Ayupov   });
255a34c753fSRafael Auler 
256a34c753fSRafael Auler   auto pickMergeCluster = [&Clusters](const size_t Idx) {
257a34c753fSRafael Auler     size_t MaxIdx = Idx + 1;
258a34c753fSRafael Auler 
259a34c753fSRafael Auler     while (MaxIdx < Clusters.size() &&
260f92ab6afSAmir Ayupov            Clusters[Idx].size() + Clusters[MaxIdx].size() <= MaxClusterSize)
261a34c753fSRafael Auler       ++MaxIdx;
262a34c753fSRafael Auler 
263a34c753fSRafael Auler     if (MaxIdx - Idx > 1) {
264a34c753fSRafael Auler       size_t MergeIdx = (std::rand() % (MaxIdx - Idx - 1)) + Idx + 1;
26540c2e0faSMaksim Panchenko       assert(Clusters[MergeIdx].size() + Clusters[Idx].size() <=
26640c2e0faSMaksim Panchenko              MaxClusterSize);
267a34c753fSRafael Auler       return MergeIdx;
268a34c753fSRafael Auler     }
269a34c753fSRafael Auler     return Clusters.size();
270a34c753fSRafael Auler   };
271a34c753fSRafael Auler 
272a34c753fSRafael Auler   size_t Idx = 0;
273a34c753fSRafael Auler   while (Idx < Clusters.size()) {
274a34c753fSRafael Auler     size_t MergeIdx = pickMergeCluster(Idx);
275a34c753fSRafael Auler     if (MergeIdx == Clusters.size()) {
276a34c753fSRafael Auler       ++Idx;
277a34c753fSRafael Auler     } else {
278a34c753fSRafael Auler       Clusters[Idx].merge(Clusters[MergeIdx]);
279a34c753fSRafael Auler       Clusters.erase(Clusters.begin() + MergeIdx);
280a34c753fSRafael Auler     }
281a34c753fSRafael Auler   }
282a34c753fSRafael Auler 
283a34c753fSRafael Auler   return Clusters;
284a34c753fSRafael Auler }
285a34c753fSRafael Auler 
28640c2e0faSMaksim Panchenko } // namespace bolt
28740c2e0faSMaksim Panchenko } // namespace llvm
288