10b57cec5SDimitry Andric //===- BlockFrequencyImplInfo.cpp - Block Frequency Info Implementation ---===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric // Loops should be simplified before this analysis. 100b57cec5SDimitry Andric // 110b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 120b57cec5SDimitry Andric 130b57cec5SDimitry Andric #include "llvm/Analysis/BlockFrequencyInfoImpl.h" 140b57cec5SDimitry Andric #include "llvm/ADT/APInt.h" 150b57cec5SDimitry Andric #include "llvm/ADT/DenseMap.h" 160b57cec5SDimitry Andric #include "llvm/ADT/SCCIterator.h" 1706c3fb27SDimitry Andric #include "llvm/ADT/SmallString.h" 180b57cec5SDimitry Andric #include "llvm/Config/llvm-config.h" 190b57cec5SDimitry Andric #include "llvm/IR/Function.h" 200b57cec5SDimitry Andric #include "llvm/Support/BlockFrequency.h" 210b57cec5SDimitry Andric #include "llvm/Support/BranchProbability.h" 220b57cec5SDimitry Andric #include "llvm/Support/Compiler.h" 230b57cec5SDimitry Andric #include "llvm/Support/Debug.h" 240b57cec5SDimitry Andric #include "llvm/Support/MathExtras.h" 2581ad6265SDimitry Andric #include "llvm/Support/ScaledNumber.h" 260b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h" 270b57cec5SDimitry Andric #include <algorithm> 280b57cec5SDimitry Andric #include <cassert> 290b57cec5SDimitry Andric #include <cstddef> 300b57cec5SDimitry Andric #include <cstdint> 310b57cec5SDimitry Andric #include <iterator> 320b57cec5SDimitry Andric #include <list> 330b57cec5SDimitry Andric #include <numeric> 34bdd1243dSDimitry Andric #include <optional> 350b57cec5SDimitry Andric #include <utility> 360b57cec5SDimitry Andric #include <vector> 370b57cec5SDimitry Andric 380b57cec5SDimitry Andric using namespace llvm; 390b57cec5SDimitry Andric using namespace llvm::bfi_detail; 400b57cec5SDimitry Andric 410b57cec5SDimitry Andric #define DEBUG_TYPE "block-freq" 420b57cec5SDimitry Andric 43fe6060f1SDimitry Andric namespace llvm { 445ffd83dbSDimitry Andric cl::opt<bool> CheckBFIUnknownBlockQueries( 455ffd83dbSDimitry Andric "check-bfi-unknown-block-queries", 465ffd83dbSDimitry Andric cl::init(false), cl::Hidden, 475ffd83dbSDimitry Andric cl::desc("Check if block frequency is queried for an unknown block " 485ffd83dbSDimitry Andric "for debugging missed BFI updates")); 495ffd83dbSDimitry Andric 50fe6060f1SDimitry Andric cl::opt<bool> UseIterativeBFIInference( 5181ad6265SDimitry Andric "use-iterative-bfi-inference", cl::Hidden, 52fe6060f1SDimitry Andric cl::desc("Apply an iterative post-processing to infer correct BFI counts")); 53fe6060f1SDimitry Andric 54fe6060f1SDimitry Andric cl::opt<unsigned> IterativeBFIMaxIterationsPerBlock( 55fe6060f1SDimitry Andric "iterative-bfi-max-iterations-per-block", cl::init(1000), cl::Hidden, 56fe6060f1SDimitry Andric cl::desc("Iterative inference: maximum number of update iterations " 57fe6060f1SDimitry Andric "per block")); 58fe6060f1SDimitry Andric 59fe6060f1SDimitry Andric cl::opt<double> IterativeBFIPrecision( 60fe6060f1SDimitry Andric "iterative-bfi-precision", cl::init(1e-12), cl::Hidden, 61fe6060f1SDimitry Andric cl::desc("Iterative inference: delta convergence precision; smaller values " 62fe6060f1SDimitry Andric "typically lead to better results at the cost of worsen runtime")); 6306c3fb27SDimitry Andric } // namespace llvm 64fe6060f1SDimitry Andric 650b57cec5SDimitry Andric ScaledNumber<uint64_t> BlockMass::toScaled() const { 660b57cec5SDimitry Andric if (isFull()) 670b57cec5SDimitry Andric return ScaledNumber<uint64_t>(1, 0); 680b57cec5SDimitry Andric return ScaledNumber<uint64_t>(getMass() + 1, -64); 690b57cec5SDimitry Andric } 700b57cec5SDimitry Andric 710b57cec5SDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 720b57cec5SDimitry Andric LLVM_DUMP_METHOD void BlockMass::dump() const { print(dbgs()); } 730b57cec5SDimitry Andric #endif 740b57cec5SDimitry Andric 750b57cec5SDimitry Andric static char getHexDigit(int N) { 760b57cec5SDimitry Andric assert(N < 16); 770b57cec5SDimitry Andric if (N < 10) 780b57cec5SDimitry Andric return '0' + N; 790b57cec5SDimitry Andric return 'a' + N - 10; 800b57cec5SDimitry Andric } 810b57cec5SDimitry Andric 820b57cec5SDimitry Andric raw_ostream &BlockMass::print(raw_ostream &OS) const { 830b57cec5SDimitry Andric for (int Digits = 0; Digits < 16; ++Digits) 840b57cec5SDimitry Andric OS << getHexDigit(Mass >> (60 - Digits * 4) & 0xf); 850b57cec5SDimitry Andric return OS; 860b57cec5SDimitry Andric } 870b57cec5SDimitry Andric 880b57cec5SDimitry Andric namespace { 890b57cec5SDimitry Andric 900b57cec5SDimitry Andric using BlockNode = BlockFrequencyInfoImplBase::BlockNode; 910b57cec5SDimitry Andric using Distribution = BlockFrequencyInfoImplBase::Distribution; 920b57cec5SDimitry Andric using WeightList = BlockFrequencyInfoImplBase::Distribution::WeightList; 930b57cec5SDimitry Andric using Scaled64 = BlockFrequencyInfoImplBase::Scaled64; 940b57cec5SDimitry Andric using LoopData = BlockFrequencyInfoImplBase::LoopData; 950b57cec5SDimitry Andric using Weight = BlockFrequencyInfoImplBase::Weight; 960b57cec5SDimitry Andric using FrequencyData = BlockFrequencyInfoImplBase::FrequencyData; 970b57cec5SDimitry Andric 980b57cec5SDimitry Andric /// Dithering mass distributer. 990b57cec5SDimitry Andric /// 1000b57cec5SDimitry Andric /// This class splits up a single mass into portions by weight, dithering to 1010b57cec5SDimitry Andric /// spread out error. No mass is lost. The dithering precision depends on the 1020b57cec5SDimitry Andric /// precision of the product of \a BlockMass and \a BranchProbability. 1030b57cec5SDimitry Andric /// 1040b57cec5SDimitry Andric /// The distribution algorithm follows. 1050b57cec5SDimitry Andric /// 1060b57cec5SDimitry Andric /// 1. Initialize by saving the sum of the weights in \a RemWeight and the 1070b57cec5SDimitry Andric /// mass to distribute in \a RemMass. 1080b57cec5SDimitry Andric /// 1090b57cec5SDimitry Andric /// 2. For each portion: 1100b57cec5SDimitry Andric /// 1110b57cec5SDimitry Andric /// 1. Construct a branch probability, P, as the portion's weight divided 1120b57cec5SDimitry Andric /// by the current value of \a RemWeight. 1130b57cec5SDimitry Andric /// 2. Calculate the portion's mass as \a RemMass times P. 1140b57cec5SDimitry Andric /// 3. Update \a RemWeight and \a RemMass at each portion by subtracting 1150b57cec5SDimitry Andric /// the current portion's weight and mass. 1160b57cec5SDimitry Andric struct DitheringDistributer { 1170b57cec5SDimitry Andric uint32_t RemWeight; 1180b57cec5SDimitry Andric BlockMass RemMass; 1190b57cec5SDimitry Andric 1200b57cec5SDimitry Andric DitheringDistributer(Distribution &Dist, const BlockMass &Mass); 1210b57cec5SDimitry Andric 1220b57cec5SDimitry Andric BlockMass takeMass(uint32_t Weight); 1230b57cec5SDimitry Andric }; 1240b57cec5SDimitry Andric 1250b57cec5SDimitry Andric } // end anonymous namespace 1260b57cec5SDimitry Andric 1270b57cec5SDimitry Andric DitheringDistributer::DitheringDistributer(Distribution &Dist, 1280b57cec5SDimitry Andric const BlockMass &Mass) { 1290b57cec5SDimitry Andric Dist.normalize(); 1300b57cec5SDimitry Andric RemWeight = Dist.Total; 1310b57cec5SDimitry Andric RemMass = Mass; 1320b57cec5SDimitry Andric } 1330b57cec5SDimitry Andric 1340b57cec5SDimitry Andric BlockMass DitheringDistributer::takeMass(uint32_t Weight) { 1350b57cec5SDimitry Andric assert(Weight && "invalid weight"); 1360b57cec5SDimitry Andric assert(Weight <= RemWeight); 1370b57cec5SDimitry Andric BlockMass Mass = RemMass * BranchProbability(Weight, RemWeight); 1380b57cec5SDimitry Andric 1390b57cec5SDimitry Andric // Decrement totals (dither). 1400b57cec5SDimitry Andric RemWeight -= Weight; 1410b57cec5SDimitry Andric RemMass -= Mass; 1420b57cec5SDimitry Andric return Mass; 1430b57cec5SDimitry Andric } 1440b57cec5SDimitry Andric 1450b57cec5SDimitry Andric void Distribution::add(const BlockNode &Node, uint64_t Amount, 1460b57cec5SDimitry Andric Weight::DistType Type) { 1470b57cec5SDimitry Andric assert(Amount && "invalid weight of 0"); 1480b57cec5SDimitry Andric uint64_t NewTotal = Total + Amount; 1490b57cec5SDimitry Andric 1500b57cec5SDimitry Andric // Check for overflow. It should be impossible to overflow twice. 1510b57cec5SDimitry Andric bool IsOverflow = NewTotal < Total; 1520b57cec5SDimitry Andric assert(!(DidOverflow && IsOverflow) && "unexpected repeated overflow"); 1530b57cec5SDimitry Andric DidOverflow |= IsOverflow; 1540b57cec5SDimitry Andric 1550b57cec5SDimitry Andric // Update the total. 1560b57cec5SDimitry Andric Total = NewTotal; 1570b57cec5SDimitry Andric 1580b57cec5SDimitry Andric // Save the weight. 1590b57cec5SDimitry Andric Weights.push_back(Weight(Type, Node, Amount)); 1600b57cec5SDimitry Andric } 1610b57cec5SDimitry Andric 1620b57cec5SDimitry Andric static void combineWeight(Weight &W, const Weight &OtherW) { 1630b57cec5SDimitry Andric assert(OtherW.TargetNode.isValid()); 1640b57cec5SDimitry Andric if (!W.Amount) { 1650b57cec5SDimitry Andric W = OtherW; 1660b57cec5SDimitry Andric return; 1670b57cec5SDimitry Andric } 1680b57cec5SDimitry Andric assert(W.Type == OtherW.Type); 1690b57cec5SDimitry Andric assert(W.TargetNode == OtherW.TargetNode); 1700b57cec5SDimitry Andric assert(OtherW.Amount && "Expected non-zero weight"); 1710b57cec5SDimitry Andric if (W.Amount > W.Amount + OtherW.Amount) 1720b57cec5SDimitry Andric // Saturate on overflow. 1730b57cec5SDimitry Andric W.Amount = UINT64_MAX; 1740b57cec5SDimitry Andric else 1750b57cec5SDimitry Andric W.Amount += OtherW.Amount; 1760b57cec5SDimitry Andric } 1770b57cec5SDimitry Andric 1780b57cec5SDimitry Andric static void combineWeightsBySorting(WeightList &Weights) { 1790b57cec5SDimitry Andric // Sort so edges to the same node are adjacent. 1800b57cec5SDimitry Andric llvm::sort(Weights, [](const Weight &L, const Weight &R) { 1810b57cec5SDimitry Andric return L.TargetNode < R.TargetNode; 1820b57cec5SDimitry Andric }); 1830b57cec5SDimitry Andric 1840b57cec5SDimitry Andric // Combine adjacent edges. 1850b57cec5SDimitry Andric WeightList::iterator O = Weights.begin(); 1860b57cec5SDimitry Andric for (WeightList::const_iterator I = O, L = O, E = Weights.end(); I != E; 1870b57cec5SDimitry Andric ++O, (I = L)) { 1880b57cec5SDimitry Andric *O = *I; 1890b57cec5SDimitry Andric 1900b57cec5SDimitry Andric // Find the adjacent weights to the same node. 1910b57cec5SDimitry Andric for (++L; L != E && I->TargetNode == L->TargetNode; ++L) 1920b57cec5SDimitry Andric combineWeight(*O, *L); 1930b57cec5SDimitry Andric } 1940b57cec5SDimitry Andric 1950b57cec5SDimitry Andric // Erase extra entries. 1960b57cec5SDimitry Andric Weights.erase(O, Weights.end()); 1970b57cec5SDimitry Andric } 1980b57cec5SDimitry Andric 1990b57cec5SDimitry Andric static void combineWeightsByHashing(WeightList &Weights) { 2000b57cec5SDimitry Andric // Collect weights into a DenseMap. 2010b57cec5SDimitry Andric using HashTable = DenseMap<BlockNode::IndexType, Weight>; 2020b57cec5SDimitry Andric 2030b57cec5SDimitry Andric HashTable Combined(NextPowerOf2(2 * Weights.size())); 2040b57cec5SDimitry Andric for (const Weight &W : Weights) 2050b57cec5SDimitry Andric combineWeight(Combined[W.TargetNode.Index], W); 2060b57cec5SDimitry Andric 2070b57cec5SDimitry Andric // Check whether anything changed. 2080b57cec5SDimitry Andric if (Weights.size() == Combined.size()) 2090b57cec5SDimitry Andric return; 2100b57cec5SDimitry Andric 2110b57cec5SDimitry Andric // Fill in the new weights. 2120b57cec5SDimitry Andric Weights.clear(); 2130b57cec5SDimitry Andric Weights.reserve(Combined.size()); 2140b57cec5SDimitry Andric for (const auto &I : Combined) 2150b57cec5SDimitry Andric Weights.push_back(I.second); 2160b57cec5SDimitry Andric } 2170b57cec5SDimitry Andric 2180b57cec5SDimitry Andric static void combineWeights(WeightList &Weights) { 2190b57cec5SDimitry Andric // Use a hash table for many successors to keep this linear. 2200b57cec5SDimitry Andric if (Weights.size() > 128) { 2210b57cec5SDimitry Andric combineWeightsByHashing(Weights); 2220b57cec5SDimitry Andric return; 2230b57cec5SDimitry Andric } 2240b57cec5SDimitry Andric 2250b57cec5SDimitry Andric combineWeightsBySorting(Weights); 2260b57cec5SDimitry Andric } 2270b57cec5SDimitry Andric 2280b57cec5SDimitry Andric static uint64_t shiftRightAndRound(uint64_t N, int Shift) { 2290b57cec5SDimitry Andric assert(Shift >= 0); 2300b57cec5SDimitry Andric assert(Shift < 64); 2310b57cec5SDimitry Andric if (!Shift) 2320b57cec5SDimitry Andric return N; 2330b57cec5SDimitry Andric return (N >> Shift) + (UINT64_C(1) & N >> (Shift - 1)); 2340b57cec5SDimitry Andric } 2350b57cec5SDimitry Andric 2360b57cec5SDimitry Andric void Distribution::normalize() { 2370b57cec5SDimitry Andric // Early exit for termination nodes. 2380b57cec5SDimitry Andric if (Weights.empty()) 2390b57cec5SDimitry Andric return; 2400b57cec5SDimitry Andric 2410b57cec5SDimitry Andric // Only bother if there are multiple successors. 2420b57cec5SDimitry Andric if (Weights.size() > 1) 2430b57cec5SDimitry Andric combineWeights(Weights); 2440b57cec5SDimitry Andric 2450b57cec5SDimitry Andric // Early exit when combined into a single successor. 2460b57cec5SDimitry Andric if (Weights.size() == 1) { 2470b57cec5SDimitry Andric Total = 1; 2480b57cec5SDimitry Andric Weights.front().Amount = 1; 2490b57cec5SDimitry Andric return; 2500b57cec5SDimitry Andric } 2510b57cec5SDimitry Andric 2520b57cec5SDimitry Andric // Determine how much to shift right so that the total fits into 32-bits. 2530b57cec5SDimitry Andric // 2540b57cec5SDimitry Andric // If we shift at all, shift by 1 extra. Otherwise, the lower limit of 1 2550b57cec5SDimitry Andric // for each weight can cause a 32-bit overflow. 2560b57cec5SDimitry Andric int Shift = 0; 2570b57cec5SDimitry Andric if (DidOverflow) 2580b57cec5SDimitry Andric Shift = 33; 2590b57cec5SDimitry Andric else if (Total > UINT32_MAX) 26006c3fb27SDimitry Andric Shift = 33 - llvm::countl_zero(Total); 2610b57cec5SDimitry Andric 2620b57cec5SDimitry Andric // Early exit if nothing needs to be scaled. 2630b57cec5SDimitry Andric if (!Shift) { 2640b57cec5SDimitry Andric // If we didn't overflow then combineWeights() shouldn't have changed the 2650b57cec5SDimitry Andric // sum of the weights, but let's double-check. 2660b57cec5SDimitry Andric assert(Total == std::accumulate(Weights.begin(), Weights.end(), UINT64_C(0), 2670b57cec5SDimitry Andric [](uint64_t Sum, const Weight &W) { 2680b57cec5SDimitry Andric return Sum + W.Amount; 2690b57cec5SDimitry Andric }) && 2700b57cec5SDimitry Andric "Expected total to be correct"); 2710b57cec5SDimitry Andric return; 2720b57cec5SDimitry Andric } 2730b57cec5SDimitry Andric 2740b57cec5SDimitry Andric // Recompute the total through accumulation (rather than shifting it) so that 2750b57cec5SDimitry Andric // it's accurate after shifting and any changes combineWeights() made above. 2760b57cec5SDimitry Andric Total = 0; 2770b57cec5SDimitry Andric 2780b57cec5SDimitry Andric // Sum the weights to each node and shift right if necessary. 2790b57cec5SDimitry Andric for (Weight &W : Weights) { 2800b57cec5SDimitry Andric // Scale down below UINT32_MAX. Since Shift is larger than necessary, we 2810b57cec5SDimitry Andric // can round here without concern about overflow. 2820b57cec5SDimitry Andric assert(W.TargetNode.isValid()); 2830b57cec5SDimitry Andric W.Amount = std::max(UINT64_C(1), shiftRightAndRound(W.Amount, Shift)); 2840b57cec5SDimitry Andric assert(W.Amount <= UINT32_MAX); 2850b57cec5SDimitry Andric 2860b57cec5SDimitry Andric // Update the total. 2870b57cec5SDimitry Andric Total += W.Amount; 2880b57cec5SDimitry Andric } 2890b57cec5SDimitry Andric assert(Total <= UINT32_MAX); 2900b57cec5SDimitry Andric } 2910b57cec5SDimitry Andric 2920b57cec5SDimitry Andric void BlockFrequencyInfoImplBase::clear() { 2930b57cec5SDimitry Andric // Swap with a default-constructed std::vector, since std::vector<>::clear() 2940b57cec5SDimitry Andric // does not actually clear heap storage. 2950b57cec5SDimitry Andric std::vector<FrequencyData>().swap(Freqs); 2960b57cec5SDimitry Andric IsIrrLoopHeader.clear(); 2970b57cec5SDimitry Andric std::vector<WorkingData>().swap(Working); 2980b57cec5SDimitry Andric Loops.clear(); 2990b57cec5SDimitry Andric } 3000b57cec5SDimitry Andric 3010b57cec5SDimitry Andric /// Clear all memory not needed downstream. 3020b57cec5SDimitry Andric /// 3030b57cec5SDimitry Andric /// Releases all memory not used downstream. In particular, saves Freqs. 3040b57cec5SDimitry Andric static void cleanup(BlockFrequencyInfoImplBase &BFI) { 3050b57cec5SDimitry Andric std::vector<FrequencyData> SavedFreqs(std::move(BFI.Freqs)); 3060b57cec5SDimitry Andric SparseBitVector<> SavedIsIrrLoopHeader(std::move(BFI.IsIrrLoopHeader)); 3070b57cec5SDimitry Andric BFI.clear(); 3080b57cec5SDimitry Andric BFI.Freqs = std::move(SavedFreqs); 3090b57cec5SDimitry Andric BFI.IsIrrLoopHeader = std::move(SavedIsIrrLoopHeader); 3100b57cec5SDimitry Andric } 3110b57cec5SDimitry Andric 3120b57cec5SDimitry Andric bool BlockFrequencyInfoImplBase::addToDist(Distribution &Dist, 3130b57cec5SDimitry Andric const LoopData *OuterLoop, 3140b57cec5SDimitry Andric const BlockNode &Pred, 3150b57cec5SDimitry Andric const BlockNode &Succ, 3160b57cec5SDimitry Andric uint64_t Weight) { 3170b57cec5SDimitry Andric if (!Weight) 3180b57cec5SDimitry Andric Weight = 1; 3190b57cec5SDimitry Andric 3200b57cec5SDimitry Andric auto isLoopHeader = [&OuterLoop](const BlockNode &Node) { 3210b57cec5SDimitry Andric return OuterLoop && OuterLoop->isHeader(Node); 3220b57cec5SDimitry Andric }; 3230b57cec5SDimitry Andric 3240b57cec5SDimitry Andric BlockNode Resolved = Working[Succ.Index].getResolvedNode(); 3250b57cec5SDimitry Andric 3260b57cec5SDimitry Andric #ifndef NDEBUG 3270b57cec5SDimitry Andric auto debugSuccessor = [&](const char *Type) { 3280b57cec5SDimitry Andric dbgs() << " =>" 3290b57cec5SDimitry Andric << " [" << Type << "] weight = " << Weight; 3300b57cec5SDimitry Andric if (!isLoopHeader(Resolved)) 3310b57cec5SDimitry Andric dbgs() << ", succ = " << getBlockName(Succ); 3320b57cec5SDimitry Andric if (Resolved != Succ) 3330b57cec5SDimitry Andric dbgs() << ", resolved = " << getBlockName(Resolved); 3340b57cec5SDimitry Andric dbgs() << "\n"; 3350b57cec5SDimitry Andric }; 3360b57cec5SDimitry Andric (void)debugSuccessor; 3370b57cec5SDimitry Andric #endif 3380b57cec5SDimitry Andric 3390b57cec5SDimitry Andric if (isLoopHeader(Resolved)) { 3400b57cec5SDimitry Andric LLVM_DEBUG(debugSuccessor("backedge")); 3410b57cec5SDimitry Andric Dist.addBackedge(Resolved, Weight); 3420b57cec5SDimitry Andric return true; 3430b57cec5SDimitry Andric } 3440b57cec5SDimitry Andric 3450b57cec5SDimitry Andric if (Working[Resolved.Index].getContainingLoop() != OuterLoop) { 3460b57cec5SDimitry Andric LLVM_DEBUG(debugSuccessor(" exit ")); 3470b57cec5SDimitry Andric Dist.addExit(Resolved, Weight); 3480b57cec5SDimitry Andric return true; 3490b57cec5SDimitry Andric } 3500b57cec5SDimitry Andric 3510b57cec5SDimitry Andric if (Resolved < Pred) { 3520b57cec5SDimitry Andric if (!isLoopHeader(Pred)) { 3530b57cec5SDimitry Andric // If OuterLoop is an irreducible loop, we can't actually handle this. 3540b57cec5SDimitry Andric assert((!OuterLoop || !OuterLoop->isIrreducible()) && 3550b57cec5SDimitry Andric "unhandled irreducible control flow"); 3560b57cec5SDimitry Andric 3570b57cec5SDimitry Andric // Irreducible backedge. Abort. 3580b57cec5SDimitry Andric LLVM_DEBUG(debugSuccessor("abort!!!")); 3590b57cec5SDimitry Andric return false; 3600b57cec5SDimitry Andric } 3610b57cec5SDimitry Andric 3620b57cec5SDimitry Andric // If "Pred" is a loop header, then this isn't really a backedge; rather, 3630b57cec5SDimitry Andric // OuterLoop must be irreducible. These false backedges can come only from 3640b57cec5SDimitry Andric // secondary loop headers. 3650b57cec5SDimitry Andric assert(OuterLoop && OuterLoop->isIrreducible() && !isLoopHeader(Resolved) && 3660b57cec5SDimitry Andric "unhandled irreducible control flow"); 3670b57cec5SDimitry Andric } 3680b57cec5SDimitry Andric 3690b57cec5SDimitry Andric LLVM_DEBUG(debugSuccessor(" local ")); 3700b57cec5SDimitry Andric Dist.addLocal(Resolved, Weight); 3710b57cec5SDimitry Andric return true; 3720b57cec5SDimitry Andric } 3730b57cec5SDimitry Andric 3740b57cec5SDimitry Andric bool BlockFrequencyInfoImplBase::addLoopSuccessorsToDist( 3750b57cec5SDimitry Andric const LoopData *OuterLoop, LoopData &Loop, Distribution &Dist) { 3760b57cec5SDimitry Andric // Copy the exit map into Dist. 3770b57cec5SDimitry Andric for (const auto &I : Loop.Exits) 3780b57cec5SDimitry Andric if (!addToDist(Dist, OuterLoop, Loop.getHeader(), I.first, 3790b57cec5SDimitry Andric I.second.getMass())) 3800b57cec5SDimitry Andric // Irreducible backedge. 3810b57cec5SDimitry Andric return false; 3820b57cec5SDimitry Andric 3830b57cec5SDimitry Andric return true; 3840b57cec5SDimitry Andric } 3850b57cec5SDimitry Andric 3860b57cec5SDimitry Andric /// Compute the loop scale for a loop. 3870b57cec5SDimitry Andric void BlockFrequencyInfoImplBase::computeLoopScale(LoopData &Loop) { 3880b57cec5SDimitry Andric // Compute loop scale. 3890b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "compute-loop-scale: " << getLoopName(Loop) << "\n"); 3900b57cec5SDimitry Andric 3910b57cec5SDimitry Andric // Infinite loops need special handling. If we give the back edge an infinite 3920b57cec5SDimitry Andric // mass, they may saturate all the other scales in the function down to 1, 3930b57cec5SDimitry Andric // making all the other region temperatures look exactly the same. Choose an 3940b57cec5SDimitry Andric // arbitrary scale to avoid these issues. 3950b57cec5SDimitry Andric // 3960b57cec5SDimitry Andric // FIXME: An alternate way would be to select a symbolic scale which is later 3970b57cec5SDimitry Andric // replaced to be the maximum of all computed scales plus 1. This would 3980b57cec5SDimitry Andric // appropriately describe the loop as having a large scale, without skewing 3990b57cec5SDimitry Andric // the final frequency computation. 4000b57cec5SDimitry Andric const Scaled64 InfiniteLoopScale(1, 12); 4010b57cec5SDimitry Andric 4020b57cec5SDimitry Andric // LoopScale == 1 / ExitMass 4030b57cec5SDimitry Andric // ExitMass == HeadMass - BackedgeMass 4040b57cec5SDimitry Andric BlockMass TotalBackedgeMass; 4050b57cec5SDimitry Andric for (auto &Mass : Loop.BackedgeMass) 4060b57cec5SDimitry Andric TotalBackedgeMass += Mass; 4070b57cec5SDimitry Andric BlockMass ExitMass = BlockMass::getFull() - TotalBackedgeMass; 4080b57cec5SDimitry Andric 4090b57cec5SDimitry Andric // Block scale stores the inverse of the scale. If this is an infinite loop, 4100b57cec5SDimitry Andric // its exit mass will be zero. In this case, use an arbitrary scale for the 4110b57cec5SDimitry Andric // loop scale. 4120b57cec5SDimitry Andric Loop.Scale = 4130b57cec5SDimitry Andric ExitMass.isEmpty() ? InfiniteLoopScale : ExitMass.toScaled().inverse(); 4140b57cec5SDimitry Andric 4150b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " - exit-mass = " << ExitMass << " (" 4160b57cec5SDimitry Andric << BlockMass::getFull() << " - " << TotalBackedgeMass 4170b57cec5SDimitry Andric << ")\n" 4180b57cec5SDimitry Andric << " - scale = " << Loop.Scale << "\n"); 4190b57cec5SDimitry Andric } 4200b57cec5SDimitry Andric 4210b57cec5SDimitry Andric /// Package up a loop. 4220b57cec5SDimitry Andric void BlockFrequencyInfoImplBase::packageLoop(LoopData &Loop) { 4230b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "packaging-loop: " << getLoopName(Loop) << "\n"); 4240b57cec5SDimitry Andric 4250b57cec5SDimitry Andric // Clear the subloop exits to prevent quadratic memory usage. 4260b57cec5SDimitry Andric for (const BlockNode &M : Loop.Nodes) { 4270b57cec5SDimitry Andric if (auto *Loop = Working[M.Index].getPackagedLoop()) 4280b57cec5SDimitry Andric Loop->Exits.clear(); 4290b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " - node: " << getBlockName(M.Index) << "\n"); 4300b57cec5SDimitry Andric } 4310b57cec5SDimitry Andric Loop.IsPackaged = true; 4320b57cec5SDimitry Andric } 4330b57cec5SDimitry Andric 4340b57cec5SDimitry Andric #ifndef NDEBUG 4350b57cec5SDimitry Andric static void debugAssign(const BlockFrequencyInfoImplBase &BFI, 4360b57cec5SDimitry Andric const DitheringDistributer &D, const BlockNode &T, 4370b57cec5SDimitry Andric const BlockMass &M, const char *Desc) { 4380b57cec5SDimitry Andric dbgs() << " => assign " << M << " (" << D.RemMass << ")"; 4390b57cec5SDimitry Andric if (Desc) 4400b57cec5SDimitry Andric dbgs() << " [" << Desc << "]"; 4410b57cec5SDimitry Andric if (T.isValid()) 4420b57cec5SDimitry Andric dbgs() << " to " << BFI.getBlockName(T); 4430b57cec5SDimitry Andric dbgs() << "\n"; 4440b57cec5SDimitry Andric } 4450b57cec5SDimitry Andric #endif 4460b57cec5SDimitry Andric 4470b57cec5SDimitry Andric void BlockFrequencyInfoImplBase::distributeMass(const BlockNode &Source, 4480b57cec5SDimitry Andric LoopData *OuterLoop, 4490b57cec5SDimitry Andric Distribution &Dist) { 4500b57cec5SDimitry Andric BlockMass Mass = Working[Source.Index].getMass(); 4510b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " => mass: " << Mass << "\n"); 4520b57cec5SDimitry Andric 4530b57cec5SDimitry Andric // Distribute mass to successors as laid out in Dist. 4540b57cec5SDimitry Andric DitheringDistributer D(Dist, Mass); 4550b57cec5SDimitry Andric 4560b57cec5SDimitry Andric for (const Weight &W : Dist.Weights) { 4570b57cec5SDimitry Andric // Check for a local edge (non-backedge and non-exit). 4580b57cec5SDimitry Andric BlockMass Taken = D.takeMass(W.Amount); 4590b57cec5SDimitry Andric if (W.Type == Weight::Local) { 4600b57cec5SDimitry Andric Working[W.TargetNode.Index].getMass() += Taken; 4610b57cec5SDimitry Andric LLVM_DEBUG(debugAssign(*this, D, W.TargetNode, Taken, nullptr)); 4620b57cec5SDimitry Andric continue; 4630b57cec5SDimitry Andric } 4640b57cec5SDimitry Andric 4650b57cec5SDimitry Andric // Backedges and exits only make sense if we're processing a loop. 4660b57cec5SDimitry Andric assert(OuterLoop && "backedge or exit outside of loop"); 4670b57cec5SDimitry Andric 4680b57cec5SDimitry Andric // Check for a backedge. 4690b57cec5SDimitry Andric if (W.Type == Weight::Backedge) { 4700b57cec5SDimitry Andric OuterLoop->BackedgeMass[OuterLoop->getHeaderIndex(W.TargetNode)] += Taken; 4710b57cec5SDimitry Andric LLVM_DEBUG(debugAssign(*this, D, W.TargetNode, Taken, "back")); 4720b57cec5SDimitry Andric continue; 4730b57cec5SDimitry Andric } 4740b57cec5SDimitry Andric 4750b57cec5SDimitry Andric // This must be an exit. 4760b57cec5SDimitry Andric assert(W.Type == Weight::Exit); 4770b57cec5SDimitry Andric OuterLoop->Exits.push_back(std::make_pair(W.TargetNode, Taken)); 4780b57cec5SDimitry Andric LLVM_DEBUG(debugAssign(*this, D, W.TargetNode, Taken, "exit")); 4790b57cec5SDimitry Andric } 4800b57cec5SDimitry Andric } 4810b57cec5SDimitry Andric 4820b57cec5SDimitry Andric static void convertFloatingToInteger(BlockFrequencyInfoImplBase &BFI, 4830b57cec5SDimitry Andric const Scaled64 &Min, const Scaled64 &Max) { 484*5f757f3fSDimitry Andric // Scale the Factor to a size that creates integers. If possible scale 485*5f757f3fSDimitry Andric // integers so that Max == UINT64_MAX so that they can be best differentiated. 486*5f757f3fSDimitry Andric // Is is possible that the range between min and max cannot be accurately 487*5f757f3fSDimitry Andric // represented in a 64bit integer without either loosing precision for small 488*5f757f3fSDimitry Andric // values (so small unequal numbers all map to 1) or saturaturing big numbers 489*5f757f3fSDimitry Andric // loosing precision for big numbers (so unequal big numbers may map to 490*5f757f3fSDimitry Andric // UINT64_MAX). We choose to loose precision for small numbers. 491*5f757f3fSDimitry Andric const unsigned MaxBits = sizeof(Scaled64::DigitsType) * CHAR_BIT; 492*5f757f3fSDimitry Andric // Users often add up multiple BlockFrequency values or multiply them with 493*5f757f3fSDimitry Andric // things like instruction costs. Leave some room to avoid saturating 494*5f757f3fSDimitry Andric // operations reaching UIN64_MAX too early. 495*5f757f3fSDimitry Andric const unsigned Slack = 10; 496*5f757f3fSDimitry Andric Scaled64 ScalingFactor = Scaled64(1, MaxBits - Slack) / Max; 4970b57cec5SDimitry Andric 4980b57cec5SDimitry Andric // Translate the floats to integers. 4990b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "float-to-int: min = " << Min << ", max = " << Max 5000b57cec5SDimitry Andric << ", factor = " << ScalingFactor << "\n"); 501*5f757f3fSDimitry Andric (void)Min; 5020b57cec5SDimitry Andric for (size_t Index = 0; Index < BFI.Freqs.size(); ++Index) { 5030b57cec5SDimitry Andric Scaled64 Scaled = BFI.Freqs[Index].Scaled * ScalingFactor; 5040b57cec5SDimitry Andric BFI.Freqs[Index].Integer = std::max(UINT64_C(1), Scaled.toInt<uint64_t>()); 5050b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " - " << BFI.getBlockName(Index) << ": float = " 5060b57cec5SDimitry Andric << BFI.Freqs[Index].Scaled << ", scaled = " << Scaled 5070b57cec5SDimitry Andric << ", int = " << BFI.Freqs[Index].Integer << "\n"); 5080b57cec5SDimitry Andric } 5090b57cec5SDimitry Andric } 5100b57cec5SDimitry Andric 5110b57cec5SDimitry Andric /// Unwrap a loop package. 5120b57cec5SDimitry Andric /// 5130b57cec5SDimitry Andric /// Visits all the members of a loop, adjusting their BlockData according to 5140b57cec5SDimitry Andric /// the loop's pseudo-node. 5150b57cec5SDimitry Andric static void unwrapLoop(BlockFrequencyInfoImplBase &BFI, LoopData &Loop) { 5160b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "unwrap-loop-package: " << BFI.getLoopName(Loop) 5170b57cec5SDimitry Andric << ": mass = " << Loop.Mass << ", scale = " << Loop.Scale 5180b57cec5SDimitry Andric << "\n"); 5190b57cec5SDimitry Andric Loop.Scale *= Loop.Mass.toScaled(); 5200b57cec5SDimitry Andric Loop.IsPackaged = false; 5210b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " => combined-scale = " << Loop.Scale << "\n"); 5220b57cec5SDimitry Andric 5230b57cec5SDimitry Andric // Propagate the head scale through the loop. Since members are visited in 5240b57cec5SDimitry Andric // RPO, the head scale will be updated by the loop scale first, and then the 5250b57cec5SDimitry Andric // final head scale will be used for updated the rest of the members. 5260b57cec5SDimitry Andric for (const BlockNode &N : Loop.Nodes) { 5270b57cec5SDimitry Andric const auto &Working = BFI.Working[N.Index]; 5280b57cec5SDimitry Andric Scaled64 &F = Working.isAPackage() ? Working.getPackagedLoop()->Scale 5290b57cec5SDimitry Andric : BFI.Freqs[N.Index].Scaled; 5300b57cec5SDimitry Andric Scaled64 New = Loop.Scale * F; 5310b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " - " << BFI.getBlockName(N) << ": " << F << " => " 5320b57cec5SDimitry Andric << New << "\n"); 5330b57cec5SDimitry Andric F = New; 5340b57cec5SDimitry Andric } 5350b57cec5SDimitry Andric } 5360b57cec5SDimitry Andric 5370b57cec5SDimitry Andric void BlockFrequencyInfoImplBase::unwrapLoops() { 5380b57cec5SDimitry Andric // Set initial frequencies from loop-local masses. 5390b57cec5SDimitry Andric for (size_t Index = 0; Index < Working.size(); ++Index) 5400b57cec5SDimitry Andric Freqs[Index].Scaled = Working[Index].Mass.toScaled(); 5410b57cec5SDimitry Andric 5420b57cec5SDimitry Andric for (LoopData &Loop : Loops) 5430b57cec5SDimitry Andric unwrapLoop(*this, Loop); 5440b57cec5SDimitry Andric } 5450b57cec5SDimitry Andric 5460b57cec5SDimitry Andric void BlockFrequencyInfoImplBase::finalizeMetrics() { 5470b57cec5SDimitry Andric // Unwrap loop packages in reverse post-order, tracking min and max 5480b57cec5SDimitry Andric // frequencies. 5490b57cec5SDimitry Andric auto Min = Scaled64::getLargest(); 5500b57cec5SDimitry Andric auto Max = Scaled64::getZero(); 5510b57cec5SDimitry Andric for (size_t Index = 0; Index < Working.size(); ++Index) { 5520b57cec5SDimitry Andric // Update min/max scale. 5530b57cec5SDimitry Andric Min = std::min(Min, Freqs[Index].Scaled); 5540b57cec5SDimitry Andric Max = std::max(Max, Freqs[Index].Scaled); 5550b57cec5SDimitry Andric } 5560b57cec5SDimitry Andric 5570b57cec5SDimitry Andric // Convert to integers. 5580b57cec5SDimitry Andric convertFloatingToInteger(*this, Min, Max); 5590b57cec5SDimitry Andric 5600b57cec5SDimitry Andric // Clean up data structures. 5610b57cec5SDimitry Andric cleanup(*this); 5620b57cec5SDimitry Andric 5630b57cec5SDimitry Andric // Print out the final stats. 5640b57cec5SDimitry Andric LLVM_DEBUG(dump()); 5650b57cec5SDimitry Andric } 5660b57cec5SDimitry Andric 5670b57cec5SDimitry Andric BlockFrequency 5680b57cec5SDimitry Andric BlockFrequencyInfoImplBase::getBlockFreq(const BlockNode &Node) const { 5695ffd83dbSDimitry Andric if (!Node.isValid()) { 5705ffd83dbSDimitry Andric #ifndef NDEBUG 5715ffd83dbSDimitry Andric if (CheckBFIUnknownBlockQueries) { 5725ffd83dbSDimitry Andric SmallString<256> Msg; 5735ffd83dbSDimitry Andric raw_svector_ostream OS(Msg); 5745ffd83dbSDimitry Andric OS << "*** Detected BFI query for unknown block " << getBlockName(Node); 5755ffd83dbSDimitry Andric report_fatal_error(OS.str()); 5765ffd83dbSDimitry Andric } 5775ffd83dbSDimitry Andric #endif 578*5f757f3fSDimitry Andric return BlockFrequency(0); 5795ffd83dbSDimitry Andric } 580*5f757f3fSDimitry Andric return BlockFrequency(Freqs[Node.Index].Integer); 5810b57cec5SDimitry Andric } 5820b57cec5SDimitry Andric 583bdd1243dSDimitry Andric std::optional<uint64_t> 5840b57cec5SDimitry Andric BlockFrequencyInfoImplBase::getBlockProfileCount(const Function &F, 5850b57cec5SDimitry Andric const BlockNode &Node, 5860b57cec5SDimitry Andric bool AllowSynthetic) const { 587*5f757f3fSDimitry Andric return getProfileCountFromFreq(F, getBlockFreq(Node), AllowSynthetic); 5880b57cec5SDimitry Andric } 5890b57cec5SDimitry Andric 590*5f757f3fSDimitry Andric std::optional<uint64_t> BlockFrequencyInfoImplBase::getProfileCountFromFreq( 591*5f757f3fSDimitry Andric const Function &F, BlockFrequency Freq, bool AllowSynthetic) const { 5920b57cec5SDimitry Andric auto EntryCount = F.getEntryCount(AllowSynthetic); 5930b57cec5SDimitry Andric if (!EntryCount) 594bdd1243dSDimitry Andric return std::nullopt; 5950b57cec5SDimitry Andric // Use 128 bit APInt to do the arithmetic to avoid overflow. 596349cc55cSDimitry Andric APInt BlockCount(128, EntryCount->getCount()); 597*5f757f3fSDimitry Andric APInt BlockFreq(128, Freq.getFrequency()); 598*5f757f3fSDimitry Andric APInt EntryFreq(128, getEntryFreq().getFrequency()); 5990b57cec5SDimitry Andric BlockCount *= BlockFreq; 6000b57cec5SDimitry Andric // Rounded division of BlockCount by EntryFreq. Since EntryFreq is unsigned 6010b57cec5SDimitry Andric // lshr by 1 gives EntryFreq/2. 6020b57cec5SDimitry Andric BlockCount = (BlockCount + EntryFreq.lshr(1)).udiv(EntryFreq); 6030b57cec5SDimitry Andric return BlockCount.getLimitedValue(); 6040b57cec5SDimitry Andric } 6050b57cec5SDimitry Andric 6060b57cec5SDimitry Andric bool 6070b57cec5SDimitry Andric BlockFrequencyInfoImplBase::isIrrLoopHeader(const BlockNode &Node) { 6080b57cec5SDimitry Andric if (!Node.isValid()) 6090b57cec5SDimitry Andric return false; 6100b57cec5SDimitry Andric return IsIrrLoopHeader.test(Node.Index); 6110b57cec5SDimitry Andric } 6120b57cec5SDimitry Andric 6130b57cec5SDimitry Andric Scaled64 6140b57cec5SDimitry Andric BlockFrequencyInfoImplBase::getFloatingBlockFreq(const BlockNode &Node) const { 6150b57cec5SDimitry Andric if (!Node.isValid()) 6160b57cec5SDimitry Andric return Scaled64::getZero(); 6170b57cec5SDimitry Andric return Freqs[Node.Index].Scaled; 6180b57cec5SDimitry Andric } 6190b57cec5SDimitry Andric 6200b57cec5SDimitry Andric void BlockFrequencyInfoImplBase::setBlockFreq(const BlockNode &Node, 621*5f757f3fSDimitry Andric BlockFrequency Freq) { 6220b57cec5SDimitry Andric assert(Node.isValid() && "Expected valid node"); 6230b57cec5SDimitry Andric assert(Node.Index < Freqs.size() && "Expected legal index"); 624*5f757f3fSDimitry Andric Freqs[Node.Index].Integer = Freq.getFrequency(); 6250b57cec5SDimitry Andric } 6260b57cec5SDimitry Andric 6270b57cec5SDimitry Andric std::string 6280b57cec5SDimitry Andric BlockFrequencyInfoImplBase::getBlockName(const BlockNode &Node) const { 6290b57cec5SDimitry Andric return {}; 6300b57cec5SDimitry Andric } 6310b57cec5SDimitry Andric 6320b57cec5SDimitry Andric std::string 6330b57cec5SDimitry Andric BlockFrequencyInfoImplBase::getLoopName(const LoopData &Loop) const { 6340b57cec5SDimitry Andric return getBlockName(Loop.getHeader()) + (Loop.isIrreducible() ? "**" : "*"); 6350b57cec5SDimitry Andric } 6360b57cec5SDimitry Andric 6370b57cec5SDimitry Andric void IrreducibleGraph::addNodesInLoop(const BFIBase::LoopData &OuterLoop) { 6380b57cec5SDimitry Andric Start = OuterLoop.getHeader(); 6390b57cec5SDimitry Andric Nodes.reserve(OuterLoop.Nodes.size()); 6400b57cec5SDimitry Andric for (auto N : OuterLoop.Nodes) 6410b57cec5SDimitry Andric addNode(N); 6420b57cec5SDimitry Andric indexNodes(); 6430b57cec5SDimitry Andric } 6440b57cec5SDimitry Andric 6450b57cec5SDimitry Andric void IrreducibleGraph::addNodesInFunction() { 6460b57cec5SDimitry Andric Start = 0; 6470b57cec5SDimitry Andric for (uint32_t Index = 0; Index < BFI.Working.size(); ++Index) 6480b57cec5SDimitry Andric if (!BFI.Working[Index].isPackaged()) 6490b57cec5SDimitry Andric addNode(Index); 6500b57cec5SDimitry Andric indexNodes(); 6510b57cec5SDimitry Andric } 6520b57cec5SDimitry Andric 6530b57cec5SDimitry Andric void IrreducibleGraph::indexNodes() { 6540b57cec5SDimitry Andric for (auto &I : Nodes) 6550b57cec5SDimitry Andric Lookup[I.Node.Index] = &I; 6560b57cec5SDimitry Andric } 6570b57cec5SDimitry Andric 6580b57cec5SDimitry Andric void IrreducibleGraph::addEdge(IrrNode &Irr, const BlockNode &Succ, 6590b57cec5SDimitry Andric const BFIBase::LoopData *OuterLoop) { 6600b57cec5SDimitry Andric if (OuterLoop && OuterLoop->isHeader(Succ)) 6610b57cec5SDimitry Andric return; 6620b57cec5SDimitry Andric auto L = Lookup.find(Succ.Index); 6630b57cec5SDimitry Andric if (L == Lookup.end()) 6640b57cec5SDimitry Andric return; 6650b57cec5SDimitry Andric IrrNode &SuccIrr = *L->second; 6660b57cec5SDimitry Andric Irr.Edges.push_back(&SuccIrr); 6670b57cec5SDimitry Andric SuccIrr.Edges.push_front(&Irr); 6680b57cec5SDimitry Andric ++SuccIrr.NumIn; 6690b57cec5SDimitry Andric } 6700b57cec5SDimitry Andric 6710b57cec5SDimitry Andric namespace llvm { 6720b57cec5SDimitry Andric 6730b57cec5SDimitry Andric template <> struct GraphTraits<IrreducibleGraph> { 6740b57cec5SDimitry Andric using GraphT = bfi_detail::IrreducibleGraph; 6750b57cec5SDimitry Andric using NodeRef = const GraphT::IrrNode *; 6760b57cec5SDimitry Andric using ChildIteratorType = GraphT::IrrNode::iterator; 6770b57cec5SDimitry Andric 6780b57cec5SDimitry Andric static NodeRef getEntryNode(const GraphT &G) { return G.StartIrr; } 6790b57cec5SDimitry Andric static ChildIteratorType child_begin(NodeRef N) { return N->succ_begin(); } 6800b57cec5SDimitry Andric static ChildIteratorType child_end(NodeRef N) { return N->succ_end(); } 6810b57cec5SDimitry Andric }; 6820b57cec5SDimitry Andric 6830b57cec5SDimitry Andric } // end namespace llvm 6840b57cec5SDimitry Andric 6850b57cec5SDimitry Andric /// Find extra irreducible headers. 6860b57cec5SDimitry Andric /// 6870b57cec5SDimitry Andric /// Find entry blocks and other blocks with backedges, which exist when \c G 6880b57cec5SDimitry Andric /// contains irreducible sub-SCCs. 6890b57cec5SDimitry Andric static void findIrreducibleHeaders( 6900b57cec5SDimitry Andric const BlockFrequencyInfoImplBase &BFI, 6910b57cec5SDimitry Andric const IrreducibleGraph &G, 6920b57cec5SDimitry Andric const std::vector<const IrreducibleGraph::IrrNode *> &SCC, 6930b57cec5SDimitry Andric LoopData::NodeList &Headers, LoopData::NodeList &Others) { 6940b57cec5SDimitry Andric // Map from nodes in the SCC to whether it's an entry block. 6950b57cec5SDimitry Andric SmallDenseMap<const IrreducibleGraph::IrrNode *, bool, 8> InSCC; 6960b57cec5SDimitry Andric 6970b57cec5SDimitry Andric // InSCC also acts the set of nodes in the graph. Seed it. 6980b57cec5SDimitry Andric for (const auto *I : SCC) 6990b57cec5SDimitry Andric InSCC[I] = false; 7000b57cec5SDimitry Andric 7010b57cec5SDimitry Andric for (auto I = InSCC.begin(), E = InSCC.end(); I != E; ++I) { 7020b57cec5SDimitry Andric auto &Irr = *I->first; 7030b57cec5SDimitry Andric for (const auto *P : make_range(Irr.pred_begin(), Irr.pred_end())) { 7040b57cec5SDimitry Andric if (InSCC.count(P)) 7050b57cec5SDimitry Andric continue; 7060b57cec5SDimitry Andric 7070b57cec5SDimitry Andric // This is an entry block. 7080b57cec5SDimitry Andric I->second = true; 7090b57cec5SDimitry Andric Headers.push_back(Irr.Node); 7100b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " => entry = " << BFI.getBlockName(Irr.Node) 7110b57cec5SDimitry Andric << "\n"); 7120b57cec5SDimitry Andric break; 7130b57cec5SDimitry Andric } 7140b57cec5SDimitry Andric } 7150b57cec5SDimitry Andric assert(Headers.size() >= 2 && 7160b57cec5SDimitry Andric "Expected irreducible CFG; -loop-info is likely invalid"); 7170b57cec5SDimitry Andric if (Headers.size() == InSCC.size()) { 7180b57cec5SDimitry Andric // Every block is a header. 7190b57cec5SDimitry Andric llvm::sort(Headers); 7200b57cec5SDimitry Andric return; 7210b57cec5SDimitry Andric } 7220b57cec5SDimitry Andric 7230b57cec5SDimitry Andric // Look for extra headers from irreducible sub-SCCs. 7240b57cec5SDimitry Andric for (const auto &I : InSCC) { 7250b57cec5SDimitry Andric // Entry blocks are already headers. 7260b57cec5SDimitry Andric if (I.second) 7270b57cec5SDimitry Andric continue; 7280b57cec5SDimitry Andric 7290b57cec5SDimitry Andric auto &Irr = *I.first; 7300b57cec5SDimitry Andric for (const auto *P : make_range(Irr.pred_begin(), Irr.pred_end())) { 7310b57cec5SDimitry Andric // Skip forward edges. 7320b57cec5SDimitry Andric if (P->Node < Irr.Node) 7330b57cec5SDimitry Andric continue; 7340b57cec5SDimitry Andric 7350b57cec5SDimitry Andric // Skip predecessors from entry blocks. These can have inverted 7360b57cec5SDimitry Andric // ordering. 7370b57cec5SDimitry Andric if (InSCC.lookup(P)) 7380b57cec5SDimitry Andric continue; 7390b57cec5SDimitry Andric 7400b57cec5SDimitry Andric // Store the extra header. 7410b57cec5SDimitry Andric Headers.push_back(Irr.Node); 7420b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " => extra = " << BFI.getBlockName(Irr.Node) 7430b57cec5SDimitry Andric << "\n"); 7440b57cec5SDimitry Andric break; 7450b57cec5SDimitry Andric } 7460b57cec5SDimitry Andric if (Headers.back() == Irr.Node) 7470b57cec5SDimitry Andric // Added this as a header. 7480b57cec5SDimitry Andric continue; 7490b57cec5SDimitry Andric 7500b57cec5SDimitry Andric // This is not a header. 7510b57cec5SDimitry Andric Others.push_back(Irr.Node); 7520b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " => other = " << BFI.getBlockName(Irr.Node) << "\n"); 7530b57cec5SDimitry Andric } 7540b57cec5SDimitry Andric llvm::sort(Headers); 7550b57cec5SDimitry Andric llvm::sort(Others); 7560b57cec5SDimitry Andric } 7570b57cec5SDimitry Andric 7580b57cec5SDimitry Andric static void createIrreducibleLoop( 7590b57cec5SDimitry Andric BlockFrequencyInfoImplBase &BFI, const IrreducibleGraph &G, 7600b57cec5SDimitry Andric LoopData *OuterLoop, std::list<LoopData>::iterator Insert, 7610b57cec5SDimitry Andric const std::vector<const IrreducibleGraph::IrrNode *> &SCC) { 7620b57cec5SDimitry Andric // Translate the SCC into RPO. 7630b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " - found-scc\n"); 7640b57cec5SDimitry Andric 7650b57cec5SDimitry Andric LoopData::NodeList Headers; 7660b57cec5SDimitry Andric LoopData::NodeList Others; 7670b57cec5SDimitry Andric findIrreducibleHeaders(BFI, G, SCC, Headers, Others); 7680b57cec5SDimitry Andric 7690b57cec5SDimitry Andric auto Loop = BFI.Loops.emplace(Insert, OuterLoop, Headers.begin(), 7700b57cec5SDimitry Andric Headers.end(), Others.begin(), Others.end()); 7710b57cec5SDimitry Andric 7720b57cec5SDimitry Andric // Update loop hierarchy. 7730b57cec5SDimitry Andric for (const auto &N : Loop->Nodes) 7740b57cec5SDimitry Andric if (BFI.Working[N.Index].isLoopHeader()) 7750b57cec5SDimitry Andric BFI.Working[N.Index].Loop->Parent = &*Loop; 7760b57cec5SDimitry Andric else 7770b57cec5SDimitry Andric BFI.Working[N.Index].Loop = &*Loop; 7780b57cec5SDimitry Andric } 7790b57cec5SDimitry Andric 7800b57cec5SDimitry Andric iterator_range<std::list<LoopData>::iterator> 7810b57cec5SDimitry Andric BlockFrequencyInfoImplBase::analyzeIrreducible( 7820b57cec5SDimitry Andric const IrreducibleGraph &G, LoopData *OuterLoop, 7830b57cec5SDimitry Andric std::list<LoopData>::iterator Insert) { 7840b57cec5SDimitry Andric assert((OuterLoop == nullptr) == (Insert == Loops.begin())); 7850b57cec5SDimitry Andric auto Prev = OuterLoop ? std::prev(Insert) : Loops.end(); 7860b57cec5SDimitry Andric 7870b57cec5SDimitry Andric for (auto I = scc_begin(G); !I.isAtEnd(); ++I) { 7880b57cec5SDimitry Andric if (I->size() < 2) 7890b57cec5SDimitry Andric continue; 7900b57cec5SDimitry Andric 7910b57cec5SDimitry Andric // Translate the SCC into RPO. 7920b57cec5SDimitry Andric createIrreducibleLoop(*this, G, OuterLoop, Insert, *I); 7930b57cec5SDimitry Andric } 7940b57cec5SDimitry Andric 7950b57cec5SDimitry Andric if (OuterLoop) 7960b57cec5SDimitry Andric return make_range(std::next(Prev), Insert); 7970b57cec5SDimitry Andric return make_range(Loops.begin(), Insert); 7980b57cec5SDimitry Andric } 7990b57cec5SDimitry Andric 8000b57cec5SDimitry Andric void 8010b57cec5SDimitry Andric BlockFrequencyInfoImplBase::updateLoopWithIrreducible(LoopData &OuterLoop) { 8020b57cec5SDimitry Andric OuterLoop.Exits.clear(); 8030b57cec5SDimitry Andric for (auto &Mass : OuterLoop.BackedgeMass) 8040b57cec5SDimitry Andric Mass = BlockMass::getEmpty(); 8050b57cec5SDimitry Andric auto O = OuterLoop.Nodes.begin() + 1; 8060b57cec5SDimitry Andric for (auto I = O, E = OuterLoop.Nodes.end(); I != E; ++I) 8070b57cec5SDimitry Andric if (!Working[I->Index].isPackaged()) 8080b57cec5SDimitry Andric *O++ = *I; 8090b57cec5SDimitry Andric OuterLoop.Nodes.erase(O, OuterLoop.Nodes.end()); 8100b57cec5SDimitry Andric } 8110b57cec5SDimitry Andric 8120b57cec5SDimitry Andric void BlockFrequencyInfoImplBase::adjustLoopHeaderMass(LoopData &Loop) { 8130b57cec5SDimitry Andric assert(Loop.isIrreducible() && "this only makes sense on irreducible loops"); 8140b57cec5SDimitry Andric 8150b57cec5SDimitry Andric // Since the loop has more than one header block, the mass flowing back into 8160b57cec5SDimitry Andric // each header will be different. Adjust the mass in each header loop to 8170b57cec5SDimitry Andric // reflect the masses flowing through back edges. 8180b57cec5SDimitry Andric // 8190b57cec5SDimitry Andric // To do this, we distribute the initial mass using the backedge masses 8200b57cec5SDimitry Andric // as weights for the distribution. 8210b57cec5SDimitry Andric BlockMass LoopMass = BlockMass::getFull(); 8220b57cec5SDimitry Andric Distribution Dist; 8230b57cec5SDimitry Andric 8240b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "adjust-loop-header-mass:\n"); 8250b57cec5SDimitry Andric for (uint32_t H = 0; H < Loop.NumHeaders; ++H) { 8260b57cec5SDimitry Andric auto &HeaderNode = Loop.Nodes[H]; 8270b57cec5SDimitry Andric auto &BackedgeMass = Loop.BackedgeMass[Loop.getHeaderIndex(HeaderNode)]; 8280b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " - Add back edge mass for node " 8290b57cec5SDimitry Andric << getBlockName(HeaderNode) << ": " << BackedgeMass 8300b57cec5SDimitry Andric << "\n"); 8310b57cec5SDimitry Andric if (BackedgeMass.getMass() > 0) 8320b57cec5SDimitry Andric Dist.addLocal(HeaderNode, BackedgeMass.getMass()); 8330b57cec5SDimitry Andric else 8340b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Nothing added. Back edge mass is zero\n"); 8350b57cec5SDimitry Andric } 8360b57cec5SDimitry Andric 8370b57cec5SDimitry Andric DitheringDistributer D(Dist, LoopMass); 8380b57cec5SDimitry Andric 8390b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Distribute loop mass " << LoopMass 8400b57cec5SDimitry Andric << " to headers using above weights\n"); 8410b57cec5SDimitry Andric for (const Weight &W : Dist.Weights) { 8420b57cec5SDimitry Andric BlockMass Taken = D.takeMass(W.Amount); 8430b57cec5SDimitry Andric assert(W.Type == Weight::Local && "all weights should be local"); 8440b57cec5SDimitry Andric Working[W.TargetNode.Index].getMass() = Taken; 8450b57cec5SDimitry Andric LLVM_DEBUG(debugAssign(*this, D, W.TargetNode, Taken, nullptr)); 8460b57cec5SDimitry Andric } 8470b57cec5SDimitry Andric } 8480b57cec5SDimitry Andric 8490b57cec5SDimitry Andric void BlockFrequencyInfoImplBase::distributeIrrLoopHeaderMass(Distribution &Dist) { 8500b57cec5SDimitry Andric BlockMass LoopMass = BlockMass::getFull(); 8510b57cec5SDimitry Andric DitheringDistributer D(Dist, LoopMass); 8520b57cec5SDimitry Andric for (const Weight &W : Dist.Weights) { 8530b57cec5SDimitry Andric BlockMass Taken = D.takeMass(W.Amount); 8540b57cec5SDimitry Andric assert(W.Type == Weight::Local && "all weights should be local"); 8550b57cec5SDimitry Andric Working[W.TargetNode.Index].getMass() = Taken; 8560b57cec5SDimitry Andric LLVM_DEBUG(debugAssign(*this, D, W.TargetNode, Taken, nullptr)); 8570b57cec5SDimitry Andric } 8580b57cec5SDimitry Andric } 859