xref: /minix3/external/bsd/llvm/dist/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp (revision 0a6a1f1d05b60e214de2f05a7310ddd1f0e590e7)
1*0a6a1f1dSLionel Sambuc //===- BlockFrequencyImplInfo.cpp - Block Frequency Info Implementation ---===//
2*0a6a1f1dSLionel Sambuc //
3*0a6a1f1dSLionel Sambuc //                     The LLVM Compiler Infrastructure
4*0a6a1f1dSLionel Sambuc //
5*0a6a1f1dSLionel Sambuc // This file is distributed under the University of Illinois Open Source
6*0a6a1f1dSLionel Sambuc // License. See LICENSE.TXT for details.
7*0a6a1f1dSLionel Sambuc //
8*0a6a1f1dSLionel Sambuc //===----------------------------------------------------------------------===//
9*0a6a1f1dSLionel Sambuc //
10*0a6a1f1dSLionel Sambuc // Loops should be simplified before this analysis.
11*0a6a1f1dSLionel Sambuc //
12*0a6a1f1dSLionel Sambuc //===----------------------------------------------------------------------===//
13*0a6a1f1dSLionel Sambuc 
14*0a6a1f1dSLionel Sambuc #include "llvm/Analysis/BlockFrequencyInfoImpl.h"
15*0a6a1f1dSLionel Sambuc #include "llvm/ADT/SCCIterator.h"
16*0a6a1f1dSLionel Sambuc #include "llvm/Support/raw_ostream.h"
17*0a6a1f1dSLionel Sambuc #include <numeric>
18*0a6a1f1dSLionel Sambuc 
19*0a6a1f1dSLionel Sambuc using namespace llvm;
20*0a6a1f1dSLionel Sambuc using namespace llvm::bfi_detail;
21*0a6a1f1dSLionel Sambuc 
22*0a6a1f1dSLionel Sambuc #define DEBUG_TYPE "block-freq"
23*0a6a1f1dSLionel Sambuc 
toScaled() const24*0a6a1f1dSLionel Sambuc ScaledNumber<uint64_t> BlockMass::toScaled() const {
25*0a6a1f1dSLionel Sambuc   if (isFull())
26*0a6a1f1dSLionel Sambuc     return ScaledNumber<uint64_t>(1, 0);
27*0a6a1f1dSLionel Sambuc   return ScaledNumber<uint64_t>(getMass() + 1, -64);
28*0a6a1f1dSLionel Sambuc }
29*0a6a1f1dSLionel Sambuc 
dump() const30*0a6a1f1dSLionel Sambuc void BlockMass::dump() const { print(dbgs()); }
31*0a6a1f1dSLionel Sambuc 
getHexDigit(int N)32*0a6a1f1dSLionel Sambuc static char getHexDigit(int N) {
33*0a6a1f1dSLionel Sambuc   assert(N < 16);
34*0a6a1f1dSLionel Sambuc   if (N < 10)
35*0a6a1f1dSLionel Sambuc     return '0' + N;
36*0a6a1f1dSLionel Sambuc   return 'a' + N - 10;
37*0a6a1f1dSLionel Sambuc }
print(raw_ostream & OS) const38*0a6a1f1dSLionel Sambuc raw_ostream &BlockMass::print(raw_ostream &OS) const {
39*0a6a1f1dSLionel Sambuc   for (int Digits = 0; Digits < 16; ++Digits)
40*0a6a1f1dSLionel Sambuc     OS << getHexDigit(Mass >> (60 - Digits * 4) & 0xf);
41*0a6a1f1dSLionel Sambuc   return OS;
42*0a6a1f1dSLionel Sambuc }
43*0a6a1f1dSLionel Sambuc 
44*0a6a1f1dSLionel Sambuc namespace {
45*0a6a1f1dSLionel Sambuc 
46*0a6a1f1dSLionel Sambuc typedef BlockFrequencyInfoImplBase::BlockNode BlockNode;
47*0a6a1f1dSLionel Sambuc typedef BlockFrequencyInfoImplBase::Distribution Distribution;
48*0a6a1f1dSLionel Sambuc typedef BlockFrequencyInfoImplBase::Distribution::WeightList WeightList;
49*0a6a1f1dSLionel Sambuc typedef BlockFrequencyInfoImplBase::Scaled64 Scaled64;
50*0a6a1f1dSLionel Sambuc typedef BlockFrequencyInfoImplBase::LoopData LoopData;
51*0a6a1f1dSLionel Sambuc typedef BlockFrequencyInfoImplBase::Weight Weight;
52*0a6a1f1dSLionel Sambuc typedef BlockFrequencyInfoImplBase::FrequencyData FrequencyData;
53*0a6a1f1dSLionel Sambuc 
54*0a6a1f1dSLionel Sambuc /// \brief Dithering mass distributer.
55*0a6a1f1dSLionel Sambuc ///
56*0a6a1f1dSLionel Sambuc /// This class splits up a single mass into portions by weight, dithering to
57*0a6a1f1dSLionel Sambuc /// spread out error.  No mass is lost.  The dithering precision depends on the
58*0a6a1f1dSLionel Sambuc /// precision of the product of \a BlockMass and \a BranchProbability.
59*0a6a1f1dSLionel Sambuc ///
60*0a6a1f1dSLionel Sambuc /// The distribution algorithm follows.
61*0a6a1f1dSLionel Sambuc ///
62*0a6a1f1dSLionel Sambuc ///  1. Initialize by saving the sum of the weights in \a RemWeight and the
63*0a6a1f1dSLionel Sambuc ///     mass to distribute in \a RemMass.
64*0a6a1f1dSLionel Sambuc ///
65*0a6a1f1dSLionel Sambuc ///  2. For each portion:
66*0a6a1f1dSLionel Sambuc ///
67*0a6a1f1dSLionel Sambuc ///      1. Construct a branch probability, P, as the portion's weight divided
68*0a6a1f1dSLionel Sambuc ///         by the current value of \a RemWeight.
69*0a6a1f1dSLionel Sambuc ///      2. Calculate the portion's mass as \a RemMass times P.
70*0a6a1f1dSLionel Sambuc ///      3. Update \a RemWeight and \a RemMass at each portion by subtracting
71*0a6a1f1dSLionel Sambuc ///         the current portion's weight and mass.
72*0a6a1f1dSLionel Sambuc struct DitheringDistributer {
73*0a6a1f1dSLionel Sambuc   uint32_t RemWeight;
74*0a6a1f1dSLionel Sambuc   BlockMass RemMass;
75*0a6a1f1dSLionel Sambuc 
76*0a6a1f1dSLionel Sambuc   DitheringDistributer(Distribution &Dist, const BlockMass &Mass);
77*0a6a1f1dSLionel Sambuc 
78*0a6a1f1dSLionel Sambuc   BlockMass takeMass(uint32_t Weight);
79*0a6a1f1dSLionel Sambuc };
80*0a6a1f1dSLionel Sambuc 
81*0a6a1f1dSLionel Sambuc } // end namespace
82*0a6a1f1dSLionel Sambuc 
DitheringDistributer(Distribution & Dist,const BlockMass & Mass)83*0a6a1f1dSLionel Sambuc DitheringDistributer::DitheringDistributer(Distribution &Dist,
84*0a6a1f1dSLionel Sambuc                                            const BlockMass &Mass) {
85*0a6a1f1dSLionel Sambuc   Dist.normalize();
86*0a6a1f1dSLionel Sambuc   RemWeight = Dist.Total;
87*0a6a1f1dSLionel Sambuc   RemMass = Mass;
88*0a6a1f1dSLionel Sambuc }
89*0a6a1f1dSLionel Sambuc 
takeMass(uint32_t Weight)90*0a6a1f1dSLionel Sambuc BlockMass DitheringDistributer::takeMass(uint32_t Weight) {
91*0a6a1f1dSLionel Sambuc   assert(Weight && "invalid weight");
92*0a6a1f1dSLionel Sambuc   assert(Weight <= RemWeight);
93*0a6a1f1dSLionel Sambuc   BlockMass Mass = RemMass * BranchProbability(Weight, RemWeight);
94*0a6a1f1dSLionel Sambuc 
95*0a6a1f1dSLionel Sambuc   // Decrement totals (dither).
96*0a6a1f1dSLionel Sambuc   RemWeight -= Weight;
97*0a6a1f1dSLionel Sambuc   RemMass -= Mass;
98*0a6a1f1dSLionel Sambuc   return Mass;
99*0a6a1f1dSLionel Sambuc }
100*0a6a1f1dSLionel Sambuc 
add(const BlockNode & Node,uint64_t Amount,Weight::DistType Type)101*0a6a1f1dSLionel Sambuc void Distribution::add(const BlockNode &Node, uint64_t Amount,
102*0a6a1f1dSLionel Sambuc                        Weight::DistType Type) {
103*0a6a1f1dSLionel Sambuc   assert(Amount && "invalid weight of 0");
104*0a6a1f1dSLionel Sambuc   uint64_t NewTotal = Total + Amount;
105*0a6a1f1dSLionel Sambuc 
106*0a6a1f1dSLionel Sambuc   // Check for overflow.  It should be impossible to overflow twice.
107*0a6a1f1dSLionel Sambuc   bool IsOverflow = NewTotal < Total;
108*0a6a1f1dSLionel Sambuc   assert(!(DidOverflow && IsOverflow) && "unexpected repeated overflow");
109*0a6a1f1dSLionel Sambuc   DidOverflow |= IsOverflow;
110*0a6a1f1dSLionel Sambuc 
111*0a6a1f1dSLionel Sambuc   // Update the total.
112*0a6a1f1dSLionel Sambuc   Total = NewTotal;
113*0a6a1f1dSLionel Sambuc 
114*0a6a1f1dSLionel Sambuc   // Save the weight.
115*0a6a1f1dSLionel Sambuc   Weights.push_back(Weight(Type, Node, Amount));
116*0a6a1f1dSLionel Sambuc }
117*0a6a1f1dSLionel Sambuc 
combineWeight(Weight & W,const Weight & OtherW)118*0a6a1f1dSLionel Sambuc static void combineWeight(Weight &W, const Weight &OtherW) {
119*0a6a1f1dSLionel Sambuc   assert(OtherW.TargetNode.isValid());
120*0a6a1f1dSLionel Sambuc   if (!W.Amount) {
121*0a6a1f1dSLionel Sambuc     W = OtherW;
122*0a6a1f1dSLionel Sambuc     return;
123*0a6a1f1dSLionel Sambuc   }
124*0a6a1f1dSLionel Sambuc   assert(W.Type == OtherW.Type);
125*0a6a1f1dSLionel Sambuc   assert(W.TargetNode == OtherW.TargetNode);
126*0a6a1f1dSLionel Sambuc   assert(OtherW.Amount && "Expected non-zero weight");
127*0a6a1f1dSLionel Sambuc   if (W.Amount > W.Amount + OtherW.Amount)
128*0a6a1f1dSLionel Sambuc     // Saturate on overflow.
129*0a6a1f1dSLionel Sambuc     W.Amount = UINT64_MAX;
130*0a6a1f1dSLionel Sambuc   else
131*0a6a1f1dSLionel Sambuc     W.Amount += OtherW.Amount;
132*0a6a1f1dSLionel Sambuc }
combineWeightsBySorting(WeightList & Weights)133*0a6a1f1dSLionel Sambuc static void combineWeightsBySorting(WeightList &Weights) {
134*0a6a1f1dSLionel Sambuc   // Sort so edges to the same node are adjacent.
135*0a6a1f1dSLionel Sambuc   std::sort(Weights.begin(), Weights.end(),
136*0a6a1f1dSLionel Sambuc             [](const Weight &L,
137*0a6a1f1dSLionel Sambuc                const Weight &R) { return L.TargetNode < R.TargetNode; });
138*0a6a1f1dSLionel Sambuc 
139*0a6a1f1dSLionel Sambuc   // Combine adjacent edges.
140*0a6a1f1dSLionel Sambuc   WeightList::iterator O = Weights.begin();
141*0a6a1f1dSLionel Sambuc   for (WeightList::const_iterator I = O, L = O, E = Weights.end(); I != E;
142*0a6a1f1dSLionel Sambuc        ++O, (I = L)) {
143*0a6a1f1dSLionel Sambuc     *O = *I;
144*0a6a1f1dSLionel Sambuc 
145*0a6a1f1dSLionel Sambuc     // Find the adjacent weights to the same node.
146*0a6a1f1dSLionel Sambuc     for (++L; L != E && I->TargetNode == L->TargetNode; ++L)
147*0a6a1f1dSLionel Sambuc       combineWeight(*O, *L);
148*0a6a1f1dSLionel Sambuc   }
149*0a6a1f1dSLionel Sambuc 
150*0a6a1f1dSLionel Sambuc   // Erase extra entries.
151*0a6a1f1dSLionel Sambuc   Weights.erase(O, Weights.end());
152*0a6a1f1dSLionel Sambuc   return;
153*0a6a1f1dSLionel Sambuc }
combineWeightsByHashing(WeightList & Weights)154*0a6a1f1dSLionel Sambuc static void combineWeightsByHashing(WeightList &Weights) {
155*0a6a1f1dSLionel Sambuc   // Collect weights into a DenseMap.
156*0a6a1f1dSLionel Sambuc   typedef DenseMap<BlockNode::IndexType, Weight> HashTable;
157*0a6a1f1dSLionel Sambuc   HashTable Combined(NextPowerOf2(2 * Weights.size()));
158*0a6a1f1dSLionel Sambuc   for (const Weight &W : Weights)
159*0a6a1f1dSLionel Sambuc     combineWeight(Combined[W.TargetNode.Index], W);
160*0a6a1f1dSLionel Sambuc 
161*0a6a1f1dSLionel Sambuc   // Check whether anything changed.
162*0a6a1f1dSLionel Sambuc   if (Weights.size() == Combined.size())
163*0a6a1f1dSLionel Sambuc     return;
164*0a6a1f1dSLionel Sambuc 
165*0a6a1f1dSLionel Sambuc   // Fill in the new weights.
166*0a6a1f1dSLionel Sambuc   Weights.clear();
167*0a6a1f1dSLionel Sambuc   Weights.reserve(Combined.size());
168*0a6a1f1dSLionel Sambuc   for (const auto &I : Combined)
169*0a6a1f1dSLionel Sambuc     Weights.push_back(I.second);
170*0a6a1f1dSLionel Sambuc }
combineWeights(WeightList & Weights)171*0a6a1f1dSLionel Sambuc static void combineWeights(WeightList &Weights) {
172*0a6a1f1dSLionel Sambuc   // Use a hash table for many successors to keep this linear.
173*0a6a1f1dSLionel Sambuc   if (Weights.size() > 128) {
174*0a6a1f1dSLionel Sambuc     combineWeightsByHashing(Weights);
175*0a6a1f1dSLionel Sambuc     return;
176*0a6a1f1dSLionel Sambuc   }
177*0a6a1f1dSLionel Sambuc 
178*0a6a1f1dSLionel Sambuc   combineWeightsBySorting(Weights);
179*0a6a1f1dSLionel Sambuc }
shiftRightAndRound(uint64_t N,int Shift)180*0a6a1f1dSLionel Sambuc static uint64_t shiftRightAndRound(uint64_t N, int Shift) {
181*0a6a1f1dSLionel Sambuc   assert(Shift >= 0);
182*0a6a1f1dSLionel Sambuc   assert(Shift < 64);
183*0a6a1f1dSLionel Sambuc   if (!Shift)
184*0a6a1f1dSLionel Sambuc     return N;
185*0a6a1f1dSLionel Sambuc   return (N >> Shift) + (UINT64_C(1) & N >> (Shift - 1));
186*0a6a1f1dSLionel Sambuc }
normalize()187*0a6a1f1dSLionel Sambuc void Distribution::normalize() {
188*0a6a1f1dSLionel Sambuc   // Early exit for termination nodes.
189*0a6a1f1dSLionel Sambuc   if (Weights.empty())
190*0a6a1f1dSLionel Sambuc     return;
191*0a6a1f1dSLionel Sambuc 
192*0a6a1f1dSLionel Sambuc   // Only bother if there are multiple successors.
193*0a6a1f1dSLionel Sambuc   if (Weights.size() > 1)
194*0a6a1f1dSLionel Sambuc     combineWeights(Weights);
195*0a6a1f1dSLionel Sambuc 
196*0a6a1f1dSLionel Sambuc   // Early exit when combined into a single successor.
197*0a6a1f1dSLionel Sambuc   if (Weights.size() == 1) {
198*0a6a1f1dSLionel Sambuc     Total = 1;
199*0a6a1f1dSLionel Sambuc     Weights.front().Amount = 1;
200*0a6a1f1dSLionel Sambuc     return;
201*0a6a1f1dSLionel Sambuc   }
202*0a6a1f1dSLionel Sambuc 
203*0a6a1f1dSLionel Sambuc   // Determine how much to shift right so that the total fits into 32-bits.
204*0a6a1f1dSLionel Sambuc   //
205*0a6a1f1dSLionel Sambuc   // If we shift at all, shift by 1 extra.  Otherwise, the lower limit of 1
206*0a6a1f1dSLionel Sambuc   // for each weight can cause a 32-bit overflow.
207*0a6a1f1dSLionel Sambuc   int Shift = 0;
208*0a6a1f1dSLionel Sambuc   if (DidOverflow)
209*0a6a1f1dSLionel Sambuc     Shift = 33;
210*0a6a1f1dSLionel Sambuc   else if (Total > UINT32_MAX)
211*0a6a1f1dSLionel Sambuc     Shift = 33 - countLeadingZeros(Total);
212*0a6a1f1dSLionel Sambuc 
213*0a6a1f1dSLionel Sambuc   // Early exit if nothing needs to be scaled.
214*0a6a1f1dSLionel Sambuc   if (!Shift) {
215*0a6a1f1dSLionel Sambuc     // If we didn't overflow then combineWeights() shouldn't have changed the
216*0a6a1f1dSLionel Sambuc     // sum of the weights, but let's double-check.
217*0a6a1f1dSLionel Sambuc     assert(Total == std::accumulate(Weights.begin(), Weights.end(), UINT64_C(0),
218*0a6a1f1dSLionel Sambuc                                     [](uint64_t Sum, const Weight &W) {
219*0a6a1f1dSLionel Sambuc                       return Sum + W.Amount;
220*0a6a1f1dSLionel Sambuc                     }) &&
221*0a6a1f1dSLionel Sambuc            "Expected total to be correct");
222*0a6a1f1dSLionel Sambuc     return;
223*0a6a1f1dSLionel Sambuc   }
224*0a6a1f1dSLionel Sambuc 
225*0a6a1f1dSLionel Sambuc   // Recompute the total through accumulation (rather than shifting it) so that
226*0a6a1f1dSLionel Sambuc   // it's accurate after shifting and any changes combineWeights() made above.
227*0a6a1f1dSLionel Sambuc   Total = 0;
228*0a6a1f1dSLionel Sambuc 
229*0a6a1f1dSLionel Sambuc   // Sum the weights to each node and shift right if necessary.
230*0a6a1f1dSLionel Sambuc   for (Weight &W : Weights) {
231*0a6a1f1dSLionel Sambuc     // Scale down below UINT32_MAX.  Since Shift is larger than necessary, we
232*0a6a1f1dSLionel Sambuc     // can round here without concern about overflow.
233*0a6a1f1dSLionel Sambuc     assert(W.TargetNode.isValid());
234*0a6a1f1dSLionel Sambuc     W.Amount = std::max(UINT64_C(1), shiftRightAndRound(W.Amount, Shift));
235*0a6a1f1dSLionel Sambuc     assert(W.Amount <= UINT32_MAX);
236*0a6a1f1dSLionel Sambuc 
237*0a6a1f1dSLionel Sambuc     // Update the total.
238*0a6a1f1dSLionel Sambuc     Total += W.Amount;
239*0a6a1f1dSLionel Sambuc   }
240*0a6a1f1dSLionel Sambuc   assert(Total <= UINT32_MAX);
241*0a6a1f1dSLionel Sambuc }
242*0a6a1f1dSLionel Sambuc 
clear()243*0a6a1f1dSLionel Sambuc void BlockFrequencyInfoImplBase::clear() {
244*0a6a1f1dSLionel Sambuc   // Swap with a default-constructed std::vector, since std::vector<>::clear()
245*0a6a1f1dSLionel Sambuc   // does not actually clear heap storage.
246*0a6a1f1dSLionel Sambuc   std::vector<FrequencyData>().swap(Freqs);
247*0a6a1f1dSLionel Sambuc   std::vector<WorkingData>().swap(Working);
248*0a6a1f1dSLionel Sambuc   Loops.clear();
249*0a6a1f1dSLionel Sambuc }
250*0a6a1f1dSLionel Sambuc 
251*0a6a1f1dSLionel Sambuc /// \brief Clear all memory not needed downstream.
252*0a6a1f1dSLionel Sambuc ///
253*0a6a1f1dSLionel Sambuc /// Releases all memory not used downstream.  In particular, saves Freqs.
cleanup(BlockFrequencyInfoImplBase & BFI)254*0a6a1f1dSLionel Sambuc static void cleanup(BlockFrequencyInfoImplBase &BFI) {
255*0a6a1f1dSLionel Sambuc   std::vector<FrequencyData> SavedFreqs(std::move(BFI.Freqs));
256*0a6a1f1dSLionel Sambuc   BFI.clear();
257*0a6a1f1dSLionel Sambuc   BFI.Freqs = std::move(SavedFreqs);
258*0a6a1f1dSLionel Sambuc }
259*0a6a1f1dSLionel Sambuc 
addToDist(Distribution & Dist,const LoopData * OuterLoop,const BlockNode & Pred,const BlockNode & Succ,uint64_t Weight)260*0a6a1f1dSLionel Sambuc bool BlockFrequencyInfoImplBase::addToDist(Distribution &Dist,
261*0a6a1f1dSLionel Sambuc                                            const LoopData *OuterLoop,
262*0a6a1f1dSLionel Sambuc                                            const BlockNode &Pred,
263*0a6a1f1dSLionel Sambuc                                            const BlockNode &Succ,
264*0a6a1f1dSLionel Sambuc                                            uint64_t Weight) {
265*0a6a1f1dSLionel Sambuc   if (!Weight)
266*0a6a1f1dSLionel Sambuc     Weight = 1;
267*0a6a1f1dSLionel Sambuc 
268*0a6a1f1dSLionel Sambuc   auto isLoopHeader = [&OuterLoop](const BlockNode &Node) {
269*0a6a1f1dSLionel Sambuc     return OuterLoop && OuterLoop->isHeader(Node);
270*0a6a1f1dSLionel Sambuc   };
271*0a6a1f1dSLionel Sambuc 
272*0a6a1f1dSLionel Sambuc   BlockNode Resolved = Working[Succ.Index].getResolvedNode();
273*0a6a1f1dSLionel Sambuc 
274*0a6a1f1dSLionel Sambuc #ifndef NDEBUG
275*0a6a1f1dSLionel Sambuc   auto debugSuccessor = [&](const char *Type) {
276*0a6a1f1dSLionel Sambuc     dbgs() << "  =>"
277*0a6a1f1dSLionel Sambuc            << " [" << Type << "] weight = " << Weight;
278*0a6a1f1dSLionel Sambuc     if (!isLoopHeader(Resolved))
279*0a6a1f1dSLionel Sambuc       dbgs() << ", succ = " << getBlockName(Succ);
280*0a6a1f1dSLionel Sambuc     if (Resolved != Succ)
281*0a6a1f1dSLionel Sambuc       dbgs() << ", resolved = " << getBlockName(Resolved);
282*0a6a1f1dSLionel Sambuc     dbgs() << "\n";
283*0a6a1f1dSLionel Sambuc   };
284*0a6a1f1dSLionel Sambuc   (void)debugSuccessor;
285*0a6a1f1dSLionel Sambuc #endif
286*0a6a1f1dSLionel Sambuc 
287*0a6a1f1dSLionel Sambuc   if (isLoopHeader(Resolved)) {
288*0a6a1f1dSLionel Sambuc     DEBUG(debugSuccessor("backedge"));
289*0a6a1f1dSLionel Sambuc     Dist.addBackedge(OuterLoop->getHeader(), Weight);
290*0a6a1f1dSLionel Sambuc     return true;
291*0a6a1f1dSLionel Sambuc   }
292*0a6a1f1dSLionel Sambuc 
293*0a6a1f1dSLionel Sambuc   if (Working[Resolved.Index].getContainingLoop() != OuterLoop) {
294*0a6a1f1dSLionel Sambuc     DEBUG(debugSuccessor("  exit  "));
295*0a6a1f1dSLionel Sambuc     Dist.addExit(Resolved, Weight);
296*0a6a1f1dSLionel Sambuc     return true;
297*0a6a1f1dSLionel Sambuc   }
298*0a6a1f1dSLionel Sambuc 
299*0a6a1f1dSLionel Sambuc   if (Resolved < Pred) {
300*0a6a1f1dSLionel Sambuc     if (!isLoopHeader(Pred)) {
301*0a6a1f1dSLionel Sambuc       // If OuterLoop is an irreducible loop, we can't actually handle this.
302*0a6a1f1dSLionel Sambuc       assert((!OuterLoop || !OuterLoop->isIrreducible()) &&
303*0a6a1f1dSLionel Sambuc              "unhandled irreducible control flow");
304*0a6a1f1dSLionel Sambuc 
305*0a6a1f1dSLionel Sambuc       // Irreducible backedge.  Abort.
306*0a6a1f1dSLionel Sambuc       DEBUG(debugSuccessor("abort!!!"));
307*0a6a1f1dSLionel Sambuc       return false;
308*0a6a1f1dSLionel Sambuc     }
309*0a6a1f1dSLionel Sambuc 
310*0a6a1f1dSLionel Sambuc     // If "Pred" is a loop header, then this isn't really a backedge; rather,
311*0a6a1f1dSLionel Sambuc     // OuterLoop must be irreducible.  These false backedges can come only from
312*0a6a1f1dSLionel Sambuc     // secondary loop headers.
313*0a6a1f1dSLionel Sambuc     assert(OuterLoop && OuterLoop->isIrreducible() && !isLoopHeader(Resolved) &&
314*0a6a1f1dSLionel Sambuc            "unhandled irreducible control flow");
315*0a6a1f1dSLionel Sambuc   }
316*0a6a1f1dSLionel Sambuc 
317*0a6a1f1dSLionel Sambuc   DEBUG(debugSuccessor(" local  "));
318*0a6a1f1dSLionel Sambuc   Dist.addLocal(Resolved, Weight);
319*0a6a1f1dSLionel Sambuc   return true;
320*0a6a1f1dSLionel Sambuc }
321*0a6a1f1dSLionel Sambuc 
addLoopSuccessorsToDist(const LoopData * OuterLoop,LoopData & Loop,Distribution & Dist)322*0a6a1f1dSLionel Sambuc bool BlockFrequencyInfoImplBase::addLoopSuccessorsToDist(
323*0a6a1f1dSLionel Sambuc     const LoopData *OuterLoop, LoopData &Loop, Distribution &Dist) {
324*0a6a1f1dSLionel Sambuc   // Copy the exit map into Dist.
325*0a6a1f1dSLionel Sambuc   for (const auto &I : Loop.Exits)
326*0a6a1f1dSLionel Sambuc     if (!addToDist(Dist, OuterLoop, Loop.getHeader(), I.first,
327*0a6a1f1dSLionel Sambuc                    I.second.getMass()))
328*0a6a1f1dSLionel Sambuc       // Irreducible backedge.
329*0a6a1f1dSLionel Sambuc       return false;
330*0a6a1f1dSLionel Sambuc 
331*0a6a1f1dSLionel Sambuc   return true;
332*0a6a1f1dSLionel Sambuc }
333*0a6a1f1dSLionel Sambuc 
334*0a6a1f1dSLionel Sambuc /// \brief Get the maximum allowed loop scale.
335*0a6a1f1dSLionel Sambuc ///
336*0a6a1f1dSLionel Sambuc /// Gives the maximum number of estimated iterations allowed for a loop.  Very
337*0a6a1f1dSLionel Sambuc /// large numbers cause problems downstream (even within 64-bits).
getMaxLoopScale()338*0a6a1f1dSLionel Sambuc static Scaled64 getMaxLoopScale() { return Scaled64(1, 12); }
339*0a6a1f1dSLionel Sambuc 
340*0a6a1f1dSLionel Sambuc /// \brief Compute the loop scale for a loop.
computeLoopScale(LoopData & Loop)341*0a6a1f1dSLionel Sambuc void BlockFrequencyInfoImplBase::computeLoopScale(LoopData &Loop) {
342*0a6a1f1dSLionel Sambuc   // Compute loop scale.
343*0a6a1f1dSLionel Sambuc   DEBUG(dbgs() << "compute-loop-scale: " << getLoopName(Loop) << "\n");
344*0a6a1f1dSLionel Sambuc 
345*0a6a1f1dSLionel Sambuc   // LoopScale == 1 / ExitMass
346*0a6a1f1dSLionel Sambuc   // ExitMass == HeadMass - BackedgeMass
347*0a6a1f1dSLionel Sambuc   BlockMass ExitMass = BlockMass::getFull() - Loop.BackedgeMass;
348*0a6a1f1dSLionel Sambuc 
349*0a6a1f1dSLionel Sambuc   // Block scale stores the inverse of the scale.
350*0a6a1f1dSLionel Sambuc   Loop.Scale = ExitMass.toScaled().inverse();
351*0a6a1f1dSLionel Sambuc 
352*0a6a1f1dSLionel Sambuc   DEBUG(dbgs() << " - exit-mass = " << ExitMass << " (" << BlockMass::getFull()
353*0a6a1f1dSLionel Sambuc                << " - " << Loop.BackedgeMass << ")\n"
354*0a6a1f1dSLionel Sambuc                << " - scale = " << Loop.Scale << "\n");
355*0a6a1f1dSLionel Sambuc 
356*0a6a1f1dSLionel Sambuc   if (Loop.Scale > getMaxLoopScale()) {
357*0a6a1f1dSLionel Sambuc     Loop.Scale = getMaxLoopScale();
358*0a6a1f1dSLionel Sambuc     DEBUG(dbgs() << " - reduced-to-max-scale: " << getMaxLoopScale() << "\n");
359*0a6a1f1dSLionel Sambuc   }
360*0a6a1f1dSLionel Sambuc }
361*0a6a1f1dSLionel Sambuc 
362*0a6a1f1dSLionel Sambuc /// \brief Package up a loop.
packageLoop(LoopData & Loop)363*0a6a1f1dSLionel Sambuc void BlockFrequencyInfoImplBase::packageLoop(LoopData &Loop) {
364*0a6a1f1dSLionel Sambuc   DEBUG(dbgs() << "packaging-loop: " << getLoopName(Loop) << "\n");
365*0a6a1f1dSLionel Sambuc 
366*0a6a1f1dSLionel Sambuc   // Clear the subloop exits to prevent quadratic memory usage.
367*0a6a1f1dSLionel Sambuc   for (const BlockNode &M : Loop.Nodes) {
368*0a6a1f1dSLionel Sambuc     if (auto *Loop = Working[M.Index].getPackagedLoop())
369*0a6a1f1dSLionel Sambuc       Loop->Exits.clear();
370*0a6a1f1dSLionel Sambuc     DEBUG(dbgs() << " - node: " << getBlockName(M.Index) << "\n");
371*0a6a1f1dSLionel Sambuc   }
372*0a6a1f1dSLionel Sambuc   Loop.IsPackaged = true;
373*0a6a1f1dSLionel Sambuc }
374*0a6a1f1dSLionel Sambuc 
distributeMass(const BlockNode & Source,LoopData * OuterLoop,Distribution & Dist)375*0a6a1f1dSLionel Sambuc void BlockFrequencyInfoImplBase::distributeMass(const BlockNode &Source,
376*0a6a1f1dSLionel Sambuc                                                 LoopData *OuterLoop,
377*0a6a1f1dSLionel Sambuc                                                 Distribution &Dist) {
378*0a6a1f1dSLionel Sambuc   BlockMass Mass = Working[Source.Index].getMass();
379*0a6a1f1dSLionel Sambuc   DEBUG(dbgs() << "  => mass:  " << Mass << "\n");
380*0a6a1f1dSLionel Sambuc 
381*0a6a1f1dSLionel Sambuc   // Distribute mass to successors as laid out in Dist.
382*0a6a1f1dSLionel Sambuc   DitheringDistributer D(Dist, Mass);
383*0a6a1f1dSLionel Sambuc 
384*0a6a1f1dSLionel Sambuc #ifndef NDEBUG
385*0a6a1f1dSLionel Sambuc   auto debugAssign = [&](const BlockNode &T, const BlockMass &M,
386*0a6a1f1dSLionel Sambuc                          const char *Desc) {
387*0a6a1f1dSLionel Sambuc     dbgs() << "  => assign " << M << " (" << D.RemMass << ")";
388*0a6a1f1dSLionel Sambuc     if (Desc)
389*0a6a1f1dSLionel Sambuc       dbgs() << " [" << Desc << "]";
390*0a6a1f1dSLionel Sambuc     if (T.isValid())
391*0a6a1f1dSLionel Sambuc       dbgs() << " to " << getBlockName(T);
392*0a6a1f1dSLionel Sambuc     dbgs() << "\n";
393*0a6a1f1dSLionel Sambuc   };
394*0a6a1f1dSLionel Sambuc   (void)debugAssign;
395*0a6a1f1dSLionel Sambuc #endif
396*0a6a1f1dSLionel Sambuc 
397*0a6a1f1dSLionel Sambuc   for (const Weight &W : Dist.Weights) {
398*0a6a1f1dSLionel Sambuc     // Check for a local edge (non-backedge and non-exit).
399*0a6a1f1dSLionel Sambuc     BlockMass Taken = D.takeMass(W.Amount);
400*0a6a1f1dSLionel Sambuc     if (W.Type == Weight::Local) {
401*0a6a1f1dSLionel Sambuc       Working[W.TargetNode.Index].getMass() += Taken;
402*0a6a1f1dSLionel Sambuc       DEBUG(debugAssign(W.TargetNode, Taken, nullptr));
403*0a6a1f1dSLionel Sambuc       continue;
404*0a6a1f1dSLionel Sambuc     }
405*0a6a1f1dSLionel Sambuc 
406*0a6a1f1dSLionel Sambuc     // Backedges and exits only make sense if we're processing a loop.
407*0a6a1f1dSLionel Sambuc     assert(OuterLoop && "backedge or exit outside of loop");
408*0a6a1f1dSLionel Sambuc 
409*0a6a1f1dSLionel Sambuc     // Check for a backedge.
410*0a6a1f1dSLionel Sambuc     if (W.Type == Weight::Backedge) {
411*0a6a1f1dSLionel Sambuc       OuterLoop->BackedgeMass += Taken;
412*0a6a1f1dSLionel Sambuc       DEBUG(debugAssign(BlockNode(), Taken, "back"));
413*0a6a1f1dSLionel Sambuc       continue;
414*0a6a1f1dSLionel Sambuc     }
415*0a6a1f1dSLionel Sambuc 
416*0a6a1f1dSLionel Sambuc     // This must be an exit.
417*0a6a1f1dSLionel Sambuc     assert(W.Type == Weight::Exit);
418*0a6a1f1dSLionel Sambuc     OuterLoop->Exits.push_back(std::make_pair(W.TargetNode, Taken));
419*0a6a1f1dSLionel Sambuc     DEBUG(debugAssign(W.TargetNode, Taken, "exit"));
420*0a6a1f1dSLionel Sambuc   }
421*0a6a1f1dSLionel Sambuc }
422*0a6a1f1dSLionel Sambuc 
convertFloatingToInteger(BlockFrequencyInfoImplBase & BFI,const Scaled64 & Min,const Scaled64 & Max)423*0a6a1f1dSLionel Sambuc static void convertFloatingToInteger(BlockFrequencyInfoImplBase &BFI,
424*0a6a1f1dSLionel Sambuc                                      const Scaled64 &Min, const Scaled64 &Max) {
425*0a6a1f1dSLionel Sambuc   // Scale the Factor to a size that creates integers.  Ideally, integers would
426*0a6a1f1dSLionel Sambuc   // be scaled so that Max == UINT64_MAX so that they can be best
427*0a6a1f1dSLionel Sambuc   // differentiated.  However, the register allocator currently deals poorly
428*0a6a1f1dSLionel Sambuc   // with large numbers.  Instead, push Min up a little from 1 to give some
429*0a6a1f1dSLionel Sambuc   // room to differentiate small, unequal numbers.
430*0a6a1f1dSLionel Sambuc   //
431*0a6a1f1dSLionel Sambuc   // TODO: fix issues downstream so that ScalingFactor can be
432*0a6a1f1dSLionel Sambuc   // Scaled64(1,64)/Max.
433*0a6a1f1dSLionel Sambuc   Scaled64 ScalingFactor = Min.inverse();
434*0a6a1f1dSLionel Sambuc   if ((Max / Min).lg() < 60)
435*0a6a1f1dSLionel Sambuc     ScalingFactor <<= 3;
436*0a6a1f1dSLionel Sambuc 
437*0a6a1f1dSLionel Sambuc   // Translate the floats to integers.
438*0a6a1f1dSLionel Sambuc   DEBUG(dbgs() << "float-to-int: min = " << Min << ", max = " << Max
439*0a6a1f1dSLionel Sambuc                << ", factor = " << ScalingFactor << "\n");
440*0a6a1f1dSLionel Sambuc   for (size_t Index = 0; Index < BFI.Freqs.size(); ++Index) {
441*0a6a1f1dSLionel Sambuc     Scaled64 Scaled = BFI.Freqs[Index].Scaled * ScalingFactor;
442*0a6a1f1dSLionel Sambuc     BFI.Freqs[Index].Integer = std::max(UINT64_C(1), Scaled.toInt<uint64_t>());
443*0a6a1f1dSLionel Sambuc     DEBUG(dbgs() << " - " << BFI.getBlockName(Index) << ": float = "
444*0a6a1f1dSLionel Sambuc                  << BFI.Freqs[Index].Scaled << ", scaled = " << Scaled
445*0a6a1f1dSLionel Sambuc                  << ", int = " << BFI.Freqs[Index].Integer << "\n");
446*0a6a1f1dSLionel Sambuc   }
447*0a6a1f1dSLionel Sambuc }
448*0a6a1f1dSLionel Sambuc 
449*0a6a1f1dSLionel Sambuc /// \brief Unwrap a loop package.
450*0a6a1f1dSLionel Sambuc ///
451*0a6a1f1dSLionel Sambuc /// Visits all the members of a loop, adjusting their BlockData according to
452*0a6a1f1dSLionel Sambuc /// the loop's pseudo-node.
unwrapLoop(BlockFrequencyInfoImplBase & BFI,LoopData & Loop)453*0a6a1f1dSLionel Sambuc static void unwrapLoop(BlockFrequencyInfoImplBase &BFI, LoopData &Loop) {
454*0a6a1f1dSLionel Sambuc   DEBUG(dbgs() << "unwrap-loop-package: " << BFI.getLoopName(Loop)
455*0a6a1f1dSLionel Sambuc                << ": mass = " << Loop.Mass << ", scale = " << Loop.Scale
456*0a6a1f1dSLionel Sambuc                << "\n");
457*0a6a1f1dSLionel Sambuc   Loop.Scale *= Loop.Mass.toScaled();
458*0a6a1f1dSLionel Sambuc   Loop.IsPackaged = false;
459*0a6a1f1dSLionel Sambuc   DEBUG(dbgs() << "  => combined-scale = " << Loop.Scale << "\n");
460*0a6a1f1dSLionel Sambuc 
461*0a6a1f1dSLionel Sambuc   // Propagate the head scale through the loop.  Since members are visited in
462*0a6a1f1dSLionel Sambuc   // RPO, the head scale will be updated by the loop scale first, and then the
463*0a6a1f1dSLionel Sambuc   // final head scale will be used for updated the rest of the members.
464*0a6a1f1dSLionel Sambuc   for (const BlockNode &N : Loop.Nodes) {
465*0a6a1f1dSLionel Sambuc     const auto &Working = BFI.Working[N.Index];
466*0a6a1f1dSLionel Sambuc     Scaled64 &F = Working.isAPackage() ? Working.getPackagedLoop()->Scale
467*0a6a1f1dSLionel Sambuc                                        : BFI.Freqs[N.Index].Scaled;
468*0a6a1f1dSLionel Sambuc     Scaled64 New = Loop.Scale * F;
469*0a6a1f1dSLionel Sambuc     DEBUG(dbgs() << " - " << BFI.getBlockName(N) << ": " << F << " => " << New
470*0a6a1f1dSLionel Sambuc                  << "\n");
471*0a6a1f1dSLionel Sambuc     F = New;
472*0a6a1f1dSLionel Sambuc   }
473*0a6a1f1dSLionel Sambuc }
474*0a6a1f1dSLionel Sambuc 
unwrapLoops()475*0a6a1f1dSLionel Sambuc void BlockFrequencyInfoImplBase::unwrapLoops() {
476*0a6a1f1dSLionel Sambuc   // Set initial frequencies from loop-local masses.
477*0a6a1f1dSLionel Sambuc   for (size_t Index = 0; Index < Working.size(); ++Index)
478*0a6a1f1dSLionel Sambuc     Freqs[Index].Scaled = Working[Index].Mass.toScaled();
479*0a6a1f1dSLionel Sambuc 
480*0a6a1f1dSLionel Sambuc   for (LoopData &Loop : Loops)
481*0a6a1f1dSLionel Sambuc     unwrapLoop(*this, Loop);
482*0a6a1f1dSLionel Sambuc }
483*0a6a1f1dSLionel Sambuc 
finalizeMetrics()484*0a6a1f1dSLionel Sambuc void BlockFrequencyInfoImplBase::finalizeMetrics() {
485*0a6a1f1dSLionel Sambuc   // Unwrap loop packages in reverse post-order, tracking min and max
486*0a6a1f1dSLionel Sambuc   // frequencies.
487*0a6a1f1dSLionel Sambuc   auto Min = Scaled64::getLargest();
488*0a6a1f1dSLionel Sambuc   auto Max = Scaled64::getZero();
489*0a6a1f1dSLionel Sambuc   for (size_t Index = 0; Index < Working.size(); ++Index) {
490*0a6a1f1dSLionel Sambuc     // Update min/max scale.
491*0a6a1f1dSLionel Sambuc     Min = std::min(Min, Freqs[Index].Scaled);
492*0a6a1f1dSLionel Sambuc     Max = std::max(Max, Freqs[Index].Scaled);
493*0a6a1f1dSLionel Sambuc   }
494*0a6a1f1dSLionel Sambuc 
495*0a6a1f1dSLionel Sambuc   // Convert to integers.
496*0a6a1f1dSLionel Sambuc   convertFloatingToInteger(*this, Min, Max);
497*0a6a1f1dSLionel Sambuc 
498*0a6a1f1dSLionel Sambuc   // Clean up data structures.
499*0a6a1f1dSLionel Sambuc   cleanup(*this);
500*0a6a1f1dSLionel Sambuc 
501*0a6a1f1dSLionel Sambuc   // Print out the final stats.
502*0a6a1f1dSLionel Sambuc   DEBUG(dump());
503*0a6a1f1dSLionel Sambuc }
504*0a6a1f1dSLionel Sambuc 
505*0a6a1f1dSLionel Sambuc BlockFrequency
getBlockFreq(const BlockNode & Node) const506*0a6a1f1dSLionel Sambuc BlockFrequencyInfoImplBase::getBlockFreq(const BlockNode &Node) const {
507*0a6a1f1dSLionel Sambuc   if (!Node.isValid())
508*0a6a1f1dSLionel Sambuc     return 0;
509*0a6a1f1dSLionel Sambuc   return Freqs[Node.Index].Integer;
510*0a6a1f1dSLionel Sambuc }
511*0a6a1f1dSLionel Sambuc Scaled64
getFloatingBlockFreq(const BlockNode & Node) const512*0a6a1f1dSLionel Sambuc BlockFrequencyInfoImplBase::getFloatingBlockFreq(const BlockNode &Node) const {
513*0a6a1f1dSLionel Sambuc   if (!Node.isValid())
514*0a6a1f1dSLionel Sambuc     return Scaled64::getZero();
515*0a6a1f1dSLionel Sambuc   return Freqs[Node.Index].Scaled;
516*0a6a1f1dSLionel Sambuc }
517*0a6a1f1dSLionel Sambuc 
518*0a6a1f1dSLionel Sambuc std::string
getBlockName(const BlockNode & Node) const519*0a6a1f1dSLionel Sambuc BlockFrequencyInfoImplBase::getBlockName(const BlockNode &Node) const {
520*0a6a1f1dSLionel Sambuc   return std::string();
521*0a6a1f1dSLionel Sambuc }
522*0a6a1f1dSLionel Sambuc std::string
getLoopName(const LoopData & Loop) const523*0a6a1f1dSLionel Sambuc BlockFrequencyInfoImplBase::getLoopName(const LoopData &Loop) const {
524*0a6a1f1dSLionel Sambuc   return getBlockName(Loop.getHeader()) + (Loop.isIrreducible() ? "**" : "*");
525*0a6a1f1dSLionel Sambuc }
526*0a6a1f1dSLionel Sambuc 
527*0a6a1f1dSLionel Sambuc raw_ostream &
printBlockFreq(raw_ostream & OS,const BlockNode & Node) const528*0a6a1f1dSLionel Sambuc BlockFrequencyInfoImplBase::printBlockFreq(raw_ostream &OS,
529*0a6a1f1dSLionel Sambuc                                            const BlockNode &Node) const {
530*0a6a1f1dSLionel Sambuc   return OS << getFloatingBlockFreq(Node);
531*0a6a1f1dSLionel Sambuc }
532*0a6a1f1dSLionel Sambuc 
533*0a6a1f1dSLionel Sambuc raw_ostream &
printBlockFreq(raw_ostream & OS,const BlockFrequency & Freq) const534*0a6a1f1dSLionel Sambuc BlockFrequencyInfoImplBase::printBlockFreq(raw_ostream &OS,
535*0a6a1f1dSLionel Sambuc                                            const BlockFrequency &Freq) const {
536*0a6a1f1dSLionel Sambuc   Scaled64 Block(Freq.getFrequency(), 0);
537*0a6a1f1dSLionel Sambuc   Scaled64 Entry(getEntryFreq(), 0);
538*0a6a1f1dSLionel Sambuc 
539*0a6a1f1dSLionel Sambuc   return OS << Block / Entry;
540*0a6a1f1dSLionel Sambuc }
541*0a6a1f1dSLionel Sambuc 
addNodesInLoop(const BFIBase::LoopData & OuterLoop)542*0a6a1f1dSLionel Sambuc void IrreducibleGraph::addNodesInLoop(const BFIBase::LoopData &OuterLoop) {
543*0a6a1f1dSLionel Sambuc   Start = OuterLoop.getHeader();
544*0a6a1f1dSLionel Sambuc   Nodes.reserve(OuterLoop.Nodes.size());
545*0a6a1f1dSLionel Sambuc   for (auto N : OuterLoop.Nodes)
546*0a6a1f1dSLionel Sambuc     addNode(N);
547*0a6a1f1dSLionel Sambuc   indexNodes();
548*0a6a1f1dSLionel Sambuc }
addNodesInFunction()549*0a6a1f1dSLionel Sambuc void IrreducibleGraph::addNodesInFunction() {
550*0a6a1f1dSLionel Sambuc   Start = 0;
551*0a6a1f1dSLionel Sambuc   for (uint32_t Index = 0; Index < BFI.Working.size(); ++Index)
552*0a6a1f1dSLionel Sambuc     if (!BFI.Working[Index].isPackaged())
553*0a6a1f1dSLionel Sambuc       addNode(Index);
554*0a6a1f1dSLionel Sambuc   indexNodes();
555*0a6a1f1dSLionel Sambuc }
indexNodes()556*0a6a1f1dSLionel Sambuc void IrreducibleGraph::indexNodes() {
557*0a6a1f1dSLionel Sambuc   for (auto &I : Nodes)
558*0a6a1f1dSLionel Sambuc     Lookup[I.Node.Index] = &I;
559*0a6a1f1dSLionel Sambuc }
addEdge(IrrNode & Irr,const BlockNode & Succ,const BFIBase::LoopData * OuterLoop)560*0a6a1f1dSLionel Sambuc void IrreducibleGraph::addEdge(IrrNode &Irr, const BlockNode &Succ,
561*0a6a1f1dSLionel Sambuc                                const BFIBase::LoopData *OuterLoop) {
562*0a6a1f1dSLionel Sambuc   if (OuterLoop && OuterLoop->isHeader(Succ))
563*0a6a1f1dSLionel Sambuc     return;
564*0a6a1f1dSLionel Sambuc   auto L = Lookup.find(Succ.Index);
565*0a6a1f1dSLionel Sambuc   if (L == Lookup.end())
566*0a6a1f1dSLionel Sambuc     return;
567*0a6a1f1dSLionel Sambuc   IrrNode &SuccIrr = *L->second;
568*0a6a1f1dSLionel Sambuc   Irr.Edges.push_back(&SuccIrr);
569*0a6a1f1dSLionel Sambuc   SuccIrr.Edges.push_front(&Irr);
570*0a6a1f1dSLionel Sambuc   ++SuccIrr.NumIn;
571*0a6a1f1dSLionel Sambuc }
572*0a6a1f1dSLionel Sambuc 
573*0a6a1f1dSLionel Sambuc namespace llvm {
574*0a6a1f1dSLionel Sambuc template <> struct GraphTraits<IrreducibleGraph> {
575*0a6a1f1dSLionel Sambuc   typedef bfi_detail::IrreducibleGraph GraphT;
576*0a6a1f1dSLionel Sambuc 
577*0a6a1f1dSLionel Sambuc   typedef const GraphT::IrrNode NodeType;
578*0a6a1f1dSLionel Sambuc   typedef GraphT::IrrNode::iterator ChildIteratorType;
579*0a6a1f1dSLionel Sambuc 
getEntryNodellvm::GraphTraits580*0a6a1f1dSLionel Sambuc   static const NodeType *getEntryNode(const GraphT &G) {
581*0a6a1f1dSLionel Sambuc     return G.StartIrr;
582*0a6a1f1dSLionel Sambuc   }
child_beginllvm::GraphTraits583*0a6a1f1dSLionel Sambuc   static ChildIteratorType child_begin(NodeType *N) { return N->succ_begin(); }
child_endllvm::GraphTraits584*0a6a1f1dSLionel Sambuc   static ChildIteratorType child_end(NodeType *N) { return N->succ_end(); }
585*0a6a1f1dSLionel Sambuc };
586*0a6a1f1dSLionel Sambuc }
587*0a6a1f1dSLionel Sambuc 
588*0a6a1f1dSLionel Sambuc /// \brief Find extra irreducible headers.
589*0a6a1f1dSLionel Sambuc ///
590*0a6a1f1dSLionel Sambuc /// Find entry blocks and other blocks with backedges, which exist when \c G
591*0a6a1f1dSLionel Sambuc /// contains irreducible sub-SCCs.
findIrreducibleHeaders(const BlockFrequencyInfoImplBase & BFI,const IrreducibleGraph & G,const std::vector<const IrreducibleGraph::IrrNode * > & SCC,LoopData::NodeList & Headers,LoopData::NodeList & Others)592*0a6a1f1dSLionel Sambuc static void findIrreducibleHeaders(
593*0a6a1f1dSLionel Sambuc     const BlockFrequencyInfoImplBase &BFI,
594*0a6a1f1dSLionel Sambuc     const IrreducibleGraph &G,
595*0a6a1f1dSLionel Sambuc     const std::vector<const IrreducibleGraph::IrrNode *> &SCC,
596*0a6a1f1dSLionel Sambuc     LoopData::NodeList &Headers, LoopData::NodeList &Others) {
597*0a6a1f1dSLionel Sambuc   // Map from nodes in the SCC to whether it's an entry block.
598*0a6a1f1dSLionel Sambuc   SmallDenseMap<const IrreducibleGraph::IrrNode *, bool, 8> InSCC;
599*0a6a1f1dSLionel Sambuc 
600*0a6a1f1dSLionel Sambuc   // InSCC also acts the set of nodes in the graph.  Seed it.
601*0a6a1f1dSLionel Sambuc   for (const auto *I : SCC)
602*0a6a1f1dSLionel Sambuc     InSCC[I] = false;
603*0a6a1f1dSLionel Sambuc 
604*0a6a1f1dSLionel Sambuc   for (auto I = InSCC.begin(), E = InSCC.end(); I != E; ++I) {
605*0a6a1f1dSLionel Sambuc     auto &Irr = *I->first;
606*0a6a1f1dSLionel Sambuc     for (const auto *P : make_range(Irr.pred_begin(), Irr.pred_end())) {
607*0a6a1f1dSLionel Sambuc       if (InSCC.count(P))
608*0a6a1f1dSLionel Sambuc         continue;
609*0a6a1f1dSLionel Sambuc 
610*0a6a1f1dSLionel Sambuc       // This is an entry block.
611*0a6a1f1dSLionel Sambuc       I->second = true;
612*0a6a1f1dSLionel Sambuc       Headers.push_back(Irr.Node);
613*0a6a1f1dSLionel Sambuc       DEBUG(dbgs() << "  => entry = " << BFI.getBlockName(Irr.Node) << "\n");
614*0a6a1f1dSLionel Sambuc       break;
615*0a6a1f1dSLionel Sambuc     }
616*0a6a1f1dSLionel Sambuc   }
617*0a6a1f1dSLionel Sambuc   assert(Headers.size() >= 2 &&
618*0a6a1f1dSLionel Sambuc          "Expected irreducible CFG; -loop-info is likely invalid");
619*0a6a1f1dSLionel Sambuc   if (Headers.size() == InSCC.size()) {
620*0a6a1f1dSLionel Sambuc     // Every block is a header.
621*0a6a1f1dSLionel Sambuc     std::sort(Headers.begin(), Headers.end());
622*0a6a1f1dSLionel Sambuc     return;
623*0a6a1f1dSLionel Sambuc   }
624*0a6a1f1dSLionel Sambuc 
625*0a6a1f1dSLionel Sambuc   // Look for extra headers from irreducible sub-SCCs.
626*0a6a1f1dSLionel Sambuc   for (const auto &I : InSCC) {
627*0a6a1f1dSLionel Sambuc     // Entry blocks are already headers.
628*0a6a1f1dSLionel Sambuc     if (I.second)
629*0a6a1f1dSLionel Sambuc       continue;
630*0a6a1f1dSLionel Sambuc 
631*0a6a1f1dSLionel Sambuc     auto &Irr = *I.first;
632*0a6a1f1dSLionel Sambuc     for (const auto *P : make_range(Irr.pred_begin(), Irr.pred_end())) {
633*0a6a1f1dSLionel Sambuc       // Skip forward edges.
634*0a6a1f1dSLionel Sambuc       if (P->Node < Irr.Node)
635*0a6a1f1dSLionel Sambuc         continue;
636*0a6a1f1dSLionel Sambuc 
637*0a6a1f1dSLionel Sambuc       // Skip predecessors from entry blocks.  These can have inverted
638*0a6a1f1dSLionel Sambuc       // ordering.
639*0a6a1f1dSLionel Sambuc       if (InSCC.lookup(P))
640*0a6a1f1dSLionel Sambuc         continue;
641*0a6a1f1dSLionel Sambuc 
642*0a6a1f1dSLionel Sambuc       // Store the extra header.
643*0a6a1f1dSLionel Sambuc       Headers.push_back(Irr.Node);
644*0a6a1f1dSLionel Sambuc       DEBUG(dbgs() << "  => extra = " << BFI.getBlockName(Irr.Node) << "\n");
645*0a6a1f1dSLionel Sambuc       break;
646*0a6a1f1dSLionel Sambuc     }
647*0a6a1f1dSLionel Sambuc     if (Headers.back() == Irr.Node)
648*0a6a1f1dSLionel Sambuc       // Added this as a header.
649*0a6a1f1dSLionel Sambuc       continue;
650*0a6a1f1dSLionel Sambuc 
651*0a6a1f1dSLionel Sambuc     // This is not a header.
652*0a6a1f1dSLionel Sambuc     Others.push_back(Irr.Node);
653*0a6a1f1dSLionel Sambuc     DEBUG(dbgs() << "  => other = " << BFI.getBlockName(Irr.Node) << "\n");
654*0a6a1f1dSLionel Sambuc   }
655*0a6a1f1dSLionel Sambuc   std::sort(Headers.begin(), Headers.end());
656*0a6a1f1dSLionel Sambuc   std::sort(Others.begin(), Others.end());
657*0a6a1f1dSLionel Sambuc }
658*0a6a1f1dSLionel Sambuc 
createIrreducibleLoop(BlockFrequencyInfoImplBase & BFI,const IrreducibleGraph & G,LoopData * OuterLoop,std::list<LoopData>::iterator Insert,const std::vector<const IrreducibleGraph::IrrNode * > & SCC)659*0a6a1f1dSLionel Sambuc static void createIrreducibleLoop(
660*0a6a1f1dSLionel Sambuc     BlockFrequencyInfoImplBase &BFI, const IrreducibleGraph &G,
661*0a6a1f1dSLionel Sambuc     LoopData *OuterLoop, std::list<LoopData>::iterator Insert,
662*0a6a1f1dSLionel Sambuc     const std::vector<const IrreducibleGraph::IrrNode *> &SCC) {
663*0a6a1f1dSLionel Sambuc   // Translate the SCC into RPO.
664*0a6a1f1dSLionel Sambuc   DEBUG(dbgs() << " - found-scc\n");
665*0a6a1f1dSLionel Sambuc 
666*0a6a1f1dSLionel Sambuc   LoopData::NodeList Headers;
667*0a6a1f1dSLionel Sambuc   LoopData::NodeList Others;
668*0a6a1f1dSLionel Sambuc   findIrreducibleHeaders(BFI, G, SCC, Headers, Others);
669*0a6a1f1dSLionel Sambuc 
670*0a6a1f1dSLionel Sambuc   auto Loop = BFI.Loops.emplace(Insert, OuterLoop, Headers.begin(),
671*0a6a1f1dSLionel Sambuc                                 Headers.end(), Others.begin(), Others.end());
672*0a6a1f1dSLionel Sambuc 
673*0a6a1f1dSLionel Sambuc   // Update loop hierarchy.
674*0a6a1f1dSLionel Sambuc   for (const auto &N : Loop->Nodes)
675*0a6a1f1dSLionel Sambuc     if (BFI.Working[N.Index].isLoopHeader())
676*0a6a1f1dSLionel Sambuc       BFI.Working[N.Index].Loop->Parent = &*Loop;
677*0a6a1f1dSLionel Sambuc     else
678*0a6a1f1dSLionel Sambuc       BFI.Working[N.Index].Loop = &*Loop;
679*0a6a1f1dSLionel Sambuc }
680*0a6a1f1dSLionel Sambuc 
681*0a6a1f1dSLionel Sambuc iterator_range<std::list<LoopData>::iterator>
analyzeIrreducible(const IrreducibleGraph & G,LoopData * OuterLoop,std::list<LoopData>::iterator Insert)682*0a6a1f1dSLionel Sambuc BlockFrequencyInfoImplBase::analyzeIrreducible(
683*0a6a1f1dSLionel Sambuc     const IrreducibleGraph &G, LoopData *OuterLoop,
684*0a6a1f1dSLionel Sambuc     std::list<LoopData>::iterator Insert) {
685*0a6a1f1dSLionel Sambuc   assert((OuterLoop == nullptr) == (Insert == Loops.begin()));
686*0a6a1f1dSLionel Sambuc   auto Prev = OuterLoop ? std::prev(Insert) : Loops.end();
687*0a6a1f1dSLionel Sambuc 
688*0a6a1f1dSLionel Sambuc   for (auto I = scc_begin(G); !I.isAtEnd(); ++I) {
689*0a6a1f1dSLionel Sambuc     if (I->size() < 2)
690*0a6a1f1dSLionel Sambuc       continue;
691*0a6a1f1dSLionel Sambuc 
692*0a6a1f1dSLionel Sambuc     // Translate the SCC into RPO.
693*0a6a1f1dSLionel Sambuc     createIrreducibleLoop(*this, G, OuterLoop, Insert, *I);
694*0a6a1f1dSLionel Sambuc   }
695*0a6a1f1dSLionel Sambuc 
696*0a6a1f1dSLionel Sambuc   if (OuterLoop)
697*0a6a1f1dSLionel Sambuc     return make_range(std::next(Prev), Insert);
698*0a6a1f1dSLionel Sambuc   return make_range(Loops.begin(), Insert);
699*0a6a1f1dSLionel Sambuc }
700*0a6a1f1dSLionel Sambuc 
701*0a6a1f1dSLionel Sambuc void
updateLoopWithIrreducible(LoopData & OuterLoop)702*0a6a1f1dSLionel Sambuc BlockFrequencyInfoImplBase::updateLoopWithIrreducible(LoopData &OuterLoop) {
703*0a6a1f1dSLionel Sambuc   OuterLoop.Exits.clear();
704*0a6a1f1dSLionel Sambuc   OuterLoop.BackedgeMass = BlockMass::getEmpty();
705*0a6a1f1dSLionel Sambuc   auto O = OuterLoop.Nodes.begin() + 1;
706*0a6a1f1dSLionel Sambuc   for (auto I = O, E = OuterLoop.Nodes.end(); I != E; ++I)
707*0a6a1f1dSLionel Sambuc     if (!Working[I->Index].isPackaged())
708*0a6a1f1dSLionel Sambuc       *O++ = *I;
709*0a6a1f1dSLionel Sambuc   OuterLoop.Nodes.erase(O, OuterLoop.Nodes.end());
710*0a6a1f1dSLionel Sambuc }
711