xref: /llvm-project/bolt/lib/Passes/HFSort.cpp (revision d2c876993625ce9b36bdd7ccc5e0c4cb04f32fb9)
1 //===- bolt/Passes/HFSort.cpp - Cluster functions by hotness --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Implementation of HFSort algorithm for function ordering:
10 // https://research.fb.com/wp-content/uploads/2017/01/cgo2017-hfsort-final1.pdf
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "bolt/Passes/HFSort.h"
15 #include "llvm/Support/CommandLine.h"
16 #include "llvm/Support/Debug.h"
17 #include "llvm/Support/Format.h"
18 #include "llvm/Support/raw_ostream.h"
19 #include <unordered_set>
20 
21 #define DEBUG_TYPE "hfsort"
22 
23 namespace opts {
24 extern llvm::cl::opt<unsigned> Verbosity;
25 }
26 
27 namespace llvm {
28 namespace bolt {
29 
30 using NodeId = CallGraph::NodeId;
31 using Arc = CallGraph::Arc;
32 using Node = CallGraph::Node;
33 
34 namespace {
35 
36 // The number of pages to reserve for the functions with highest
37 // density (samples / size).  The functions put in these pages are not
38 // considered for clustering.
39 constexpr uint32_t FrozenPages = 0;
40 
41 // The minimum approximate probability of a callee being called from a
42 // particular arc to consider merging with the caller's cluster.
43 constexpr double MinArcProbability = 0.1;
44 
45 // This is a factor to determine by how much a caller cluster is
46 // willing to degrade it's density by merging a callee.
47 constexpr int CallerDegradeFactor = 8;
48 
49 } // namespace
50 
51 ////////////////////////////////////////////////////////////////////////////////
52 
Cluster(NodeId Id,const Node & Func)53 Cluster::Cluster(NodeId Id, const Node &Func)
54     : Samples(Func.samples()), Size(Func.size()),
55       Density((double)Samples / Size) {
56   Targets.push_back(Id);
57 }
58 
Cluster(const std::vector<NodeId> & Nodes,const CallGraph & Cg)59 Cluster::Cluster(const std::vector<NodeId> &Nodes, const CallGraph &Cg) {
60   Samples = 0;
61   Size = 0;
62   for (NodeId TargetId : Nodes) {
63     Targets.push_back(TargetId);
64     Samples += Cg.samples(TargetId);
65     Size += Cg.size(TargetId);
66   }
67   Density = (double)Samples / Size;
68 }
69 
toString() const70 std::string Cluster::toString() const {
71   std::string Str;
72   raw_string_ostream CS(Str);
73   bool PrintComma = false;
74   CS << "funcs = [";
75   for (const NodeId &Target : Targets) {
76     if (PrintComma)
77       CS << ", ";
78     CS << Target;
79     PrintComma = true;
80   }
81   CS << "]";
82   return CS.str();
83 }
84 
85 namespace {
86 
freezeClusters(const CallGraph & Cg,std::vector<Cluster> & Clusters)87 void freezeClusters(const CallGraph &Cg, std::vector<Cluster> &Clusters) {
88   uint32_t TotalSize = 0;
89   llvm::sort(Clusters, compareClustersDensity);
90   for (Cluster &C : Clusters) {
91     uint32_t NewSize = TotalSize + C.size();
92     if (NewSize > FrozenPages * HugePageSize)
93       break;
94     C.freeze();
95     TotalSize = NewSize;
96     LLVM_DEBUG(NodeId Fid = C.target(0);
97                dbgs() << format(
98                    "freezing cluster for func %d, size = %u, samples = %lu)\n",
99                    Fid, Cg.size(Fid), Cg.samples(Fid)););
100   }
101 }
102 
103 } // namespace
104 
reverseTargets()105 void Cluster::reverseTargets() { std::reverse(Targets.begin(), Targets.end()); }
106 
merge(const Cluster & Other,const double Aw)107 void Cluster::merge(const Cluster &Other, const double Aw) {
108   Targets.insert(Targets.end(), Other.Targets.begin(), Other.Targets.end());
109   Size += Other.Size;
110   Samples += Other.Samples;
111   Density = (double)Samples / Size;
112 }
113 
merge(const Cluster & Other,const std::vector<CallGraph::NodeId> & Targets_)114 void Cluster::merge(const Cluster &Other,
115                     const std::vector<CallGraph::NodeId> &Targets_) {
116   Targets = Targets_;
117   Size += Other.Size;
118   Samples += Other.Samples;
119   Density = (double)Samples / Size;
120 }
121 
clear()122 void Cluster::clear() {
123   Id = -1u;
124   Size = 0;
125   Samples = 0;
126   Density = 0.0;
127   Targets.clear();
128   Frozen = false;
129 }
130 
clusterize(const CallGraph & Cg)131 std::vector<Cluster> clusterize(const CallGraph &Cg) {
132   std::vector<NodeId> SortedFuncs;
133 
134   // indexed by NodeId, keeps it's current cluster
135   std::vector<Cluster *> FuncCluster(Cg.numNodes(), nullptr);
136   std::vector<Cluster> Clusters;
137   Clusters.reserve(Cg.numNodes());
138 
139   for (NodeId F = 0; F < Cg.numNodes(); F++) {
140     if (Cg.samples(F) == 0)
141       continue;
142     Clusters.emplace_back(F, Cg.getNode(F));
143     SortedFuncs.push_back(F);
144   }
145 
146   freezeClusters(Cg, Clusters);
147 
148   // The size and order of Clusters is fixed until we reshuffle it immediately
149   // before returning.
150   for (Cluster &Cluster : Clusters)
151     FuncCluster[Cluster.targets().front()] = &Cluster;
152 
153   llvm::sort(SortedFuncs, [&](const NodeId F1, const NodeId F2) {
154     const CallGraph::Node &Func1 = Cg.getNode(F1);
155     const CallGraph::Node &Func2 = Cg.getNode(F2);
156     return Func1.samples() * Func2.size() > // TODO: is this correct?
157            Func2.samples() * Func1.size();
158   });
159 
160   // Process each function, and consider merging its cluster with the
161   // one containing its most likely predecessor.
162   for (const NodeId Fid : SortedFuncs) {
163     Cluster *Cluster = FuncCluster[Fid];
164     if (Cluster->frozen())
165       continue;
166 
167     // Find best predecessor.
168     NodeId BestPred = CallGraph::InvalidId;
169     double BestProb = 0;
170 
171     for (const NodeId Src : Cg.predecessors(Fid)) {
172       const Arc &Arc = *Cg.findArc(Src, Fid);
173       if (BestPred == CallGraph::InvalidId ||
174           Arc.normalizedWeight() > BestProb) {
175         BestPred = Arc.src();
176         BestProb = Arc.normalizedWeight();
177       }
178     }
179 
180     // Check if the merge is good for the callee.
181     //   Don't merge if the probability of getting to the callee from the
182     //   caller is too low.
183     if (BestProb < MinArcProbability)
184       continue;
185 
186     assert(BestPred != CallGraph::InvalidId);
187 
188     class Cluster *PredCluster = FuncCluster[BestPred];
189 
190     // Skip if no predCluster (predecessor w/ no samples), or if same
191     // as cluster, of it's frozen.
192     if (PredCluster == nullptr || PredCluster == Cluster ||
193         PredCluster->frozen())
194       continue;
195 
196     // Skip if merged cluster would be bigger than the threshold.
197     if (Cluster->size() + PredCluster->size() > MaxClusterSize)
198       continue;
199 
200     // Check if the merge is good for the caller.
201     //   Don't merge if the caller's density is significantly better
202     //   than the density resulting from the merge.
203     const double NewDensity =
204         ((double)PredCluster->samples() + Cluster->samples()) /
205         (PredCluster->size() + Cluster->size());
206     if (PredCluster->density() > NewDensity * CallerDegradeFactor) {
207       continue;
208     }
209 
210     LLVM_DEBUG(if (opts::Verbosity > 1) {
211       dbgs() << format("merging %s -> %s: %u\n",
212                        PredCluster->toString().c_str(),
213                        Cluster->toString().c_str(), Cg.samples(Fid));
214     });
215 
216     for (NodeId F : Cluster->targets())
217       FuncCluster[F] = PredCluster;
218 
219     PredCluster->merge(*Cluster);
220     Cluster->clear();
221   }
222 
223   // Return the set of Clusters that are left, which are the ones that
224   // didn't get merged (so their first func is its original func).
225   std::vector<Cluster> SortedClusters;
226   std::unordered_set<Cluster *> Visited;
227   for (const NodeId Func : SortedFuncs) {
228     Cluster *Cluster = FuncCluster[Func];
229     if (!Cluster || Visited.count(Cluster) == 1 || Cluster->target(0) != Func)
230       continue;
231 
232     SortedClusters.emplace_back(std::move(*Cluster));
233     Visited.insert(Cluster);
234   }
235 
236   llvm::sort(SortedClusters, compareClustersDensity);
237 
238   return SortedClusters;
239 }
240 
randomClusters(const CallGraph & Cg)241 std::vector<Cluster> randomClusters(const CallGraph &Cg) {
242   std::vector<NodeId> FuncIds(Cg.numNodes(), 0);
243   std::vector<Cluster> Clusters;
244   Clusters.reserve(Cg.numNodes());
245 
246   for (NodeId F = 0; F < Cg.numNodes(); F++) {
247     if (Cg.samples(F) == 0)
248       continue;
249     Clusters.emplace_back(F, Cg.getNode(F));
250   }
251 
252   llvm::sort(Clusters, [](const Cluster &A, const Cluster &B) {
253     return A.size() < B.size();
254   });
255 
256   auto pickMergeCluster = [&Clusters](const size_t Idx) {
257     size_t MaxIdx = Idx + 1;
258 
259     while (MaxIdx < Clusters.size() &&
260            Clusters[Idx].size() + Clusters[MaxIdx].size() <= MaxClusterSize)
261       ++MaxIdx;
262 
263     if (MaxIdx - Idx > 1) {
264       size_t MergeIdx = (std::rand() % (MaxIdx - Idx - 1)) + Idx + 1;
265       assert(Clusters[MergeIdx].size() + Clusters[Idx].size() <=
266              MaxClusterSize);
267       return MergeIdx;
268     }
269     return Clusters.size();
270   };
271 
272   size_t Idx = 0;
273   while (Idx < Clusters.size()) {
274     size_t MergeIdx = pickMergeCluster(Idx);
275     if (MergeIdx == Clusters.size()) {
276       ++Idx;
277     } else {
278       Clusters[Idx].merge(Clusters[MergeIdx]);
279       Clusters.erase(Clusters.begin() + MergeIdx);
280     }
281   }
282 
283   return Clusters;
284 }
285 
286 } // namespace bolt
287 } // namespace llvm
288