12f09f445SMaksim Panchenko //===- bolt/Passes/PettisAndHansen.cpp ------------------------------------===// 2a34c753fSRafael Auler // 3a34c753fSRafael Auler // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4a34c753fSRafael Auler // See https://llvm.org/LICENSE.txt for license information. 5a34c753fSRafael Auler // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6a34c753fSRafael Auler // 7a34c753fSRafael Auler //===----------------------------------------------------------------------===// 8a34c753fSRafael Auler // 92f09f445SMaksim Panchenko // The file implements Pettis and Hansen code-layout algorithm. 102f09f445SMaksim Panchenko // 11a34c753fSRafael Auler //===----------------------------------------------------------------------===// 12a34c753fSRafael Auler 13a34c753fSRafael Auler #include "bolt/Passes/HFSort.h" 14a34c753fSRafael Auler #include "llvm/Support/Debug.h" 15a34c753fSRafael Auler #include "llvm/Support/Format.h" 16a34c753fSRafael Auler #include "llvm/Support/raw_ostream.h" 17a34c753fSRafael Auler #include <set> 18a34c753fSRafael Auler #include <unordered_map> 19a34c753fSRafael Auler 20a34c753fSRafael Auler #define DEBUG_TYPE "hfsort" 21a34c753fSRafael Auler 22a34c753fSRafael Auler namespace llvm { 23a34c753fSRafael Auler namespace bolt { 24a34c753fSRafael Auler 25a34c753fSRafael Auler using NodeId = CallGraph::NodeId; 26a34c753fSRafael Auler using Arc = CallGraph::Arc; 27a34c753fSRafael Auler using Node = CallGraph::Node; 28a34c753fSRafael Auler 29a34c753fSRafael Auler namespace { 30a34c753fSRafael Auler class ClusterArc { 31a34c753fSRafael Auler public: 32a34c753fSRafael Auler ClusterArc(Cluster *Ca, Cluster *Cb, double W = 0) 3340c2e0faSMaksim Panchenko : C1(std::min(Ca, Cb)), C2(std::max(Ca, Cb)), Weight(W) {} 34a34c753fSRafael Auler 35a34c753fSRafael Auler friend bool operator==(const ClusterArc &Lhs, const ClusterArc &Rhs) { 36a34c753fSRafael Auler return Lhs.C1 == Rhs.C1 && Lhs.C2 == Rhs.C2; 37a34c753fSRafael Auler } 38a34c753fSRafael Auler 39a34c753fSRafael Auler Cluster *const C1; 40a34c753fSRafael Auler Cluster *const C2; 41a34c753fSRafael Auler mutable double Weight; 42a34c753fSRafael Auler }; 43a34c753fSRafael Auler 44a34c753fSRafael Auler class ClusterArcHash { 45a34c753fSRafael Auler public: 46a34c753fSRafael Auler int64_t operator()(const ClusterArc &Arc) const { 47a34c753fSRafael Auler std::hash<int64_t> Hasher; 48a34c753fSRafael Auler return hashCombine(Hasher(int64_t(Arc.C1)), int64_t(Arc.C2)); 49a34c753fSRafael Auler } 50a34c753fSRafael Auler }; 51a34c753fSRafael Auler 52a34c753fSRafael Auler using ClusterArcSet = std::unordered_set<ClusterArc, ClusterArcHash>; 53a34c753fSRafael Auler 54a34c753fSRafael Auler void orderFuncs(const CallGraph &Cg, Cluster *C1, Cluster *C2) { 55a34c753fSRafael Auler NodeId C1head = C1->targets().front(); 56a34c753fSRafael Auler NodeId C1tail = C1->targets().back(); 57a34c753fSRafael Auler NodeId C2head = C2->targets().front(); 58a34c753fSRafael Auler NodeId C2tail = C2->targets().back(); 59a34c753fSRafael Auler 60a34c753fSRafael Auler double C1headC2head = 0; 61a34c753fSRafael Auler double C1headC2tail = 0; 62a34c753fSRafael Auler double C1tailC2head = 0; 63a34c753fSRafael Auler double C1tailC2tail = 0; 64a34c753fSRafael Auler 65a34c753fSRafael Auler for (const Arc &Arc : Cg.arcs()) { 66a34c753fSRafael Auler if ((Arc.src() == C1head && Arc.dst() == C2head) || 67*f92ab6afSAmir Ayupov (Arc.dst() == C1head && Arc.src() == C2head)) 68a34c753fSRafael Auler C1headC2head += Arc.weight(); 69*f92ab6afSAmir Ayupov else if ((Arc.src() == C1head && Arc.dst() == C2tail) || 70*f92ab6afSAmir Ayupov (Arc.dst() == C1head && Arc.src() == C2tail)) 71a34c753fSRafael Auler C1headC2tail += Arc.weight(); 72*f92ab6afSAmir Ayupov else if ((Arc.src() == C1tail && Arc.dst() == C2head) || 73*f92ab6afSAmir Ayupov (Arc.dst() == C1tail && Arc.src() == C2head)) 74a34c753fSRafael Auler C1tailC2head += Arc.weight(); 75*f92ab6afSAmir Ayupov else if ((Arc.src() == C1tail && Arc.dst() == C2tail) || 76*f92ab6afSAmir Ayupov (Arc.dst() == C1tail && Arc.src() == C2tail)) 77a34c753fSRafael Auler C1tailC2tail += Arc.weight(); 78a34c753fSRafael Auler } 79a34c753fSRafael Auler 80a34c753fSRafael Auler const double Max = std::max(std::max(C1headC2head, C1headC2tail), 81a34c753fSRafael Auler std::max(C1tailC2head, C1tailC2tail)); 82a34c753fSRafael Auler 83a34c753fSRafael Auler if (C1headC2head == Max) { 84a34c753fSRafael Auler // flip C1 85a34c753fSRafael Auler C1->reverseTargets(); 86a34c753fSRafael Auler } else if (C1headC2tail == Max) { 87a34c753fSRafael Auler // flip C1 C2 88a34c753fSRafael Auler C1->reverseTargets(); 89a34c753fSRafael Auler C2->reverseTargets(); 90a34c753fSRafael Auler } else if (C1tailC2tail == Max) { 91a34c753fSRafael Auler // flip C2 92a34c753fSRafael Auler C2->reverseTargets(); 93a34c753fSRafael Auler } 94a34c753fSRafael Auler } 9540c2e0faSMaksim Panchenko } // namespace 96a34c753fSRafael Auler 97a34c753fSRafael Auler std::vector<Cluster> pettisAndHansen(const CallGraph &Cg) { 98a34c753fSRafael Auler // indexed by NodeId, keeps its current cluster 99a34c753fSRafael Auler std::vector<Cluster *> FuncCluster(Cg.numNodes(), nullptr); 100a34c753fSRafael Auler std::vector<Cluster> Clusters; 101a34c753fSRafael Auler std::vector<NodeId> Funcs; 102a34c753fSRafael Auler 103a34c753fSRafael Auler Clusters.reserve(Cg.numNodes()); 104a34c753fSRafael Auler 105a34c753fSRafael Auler for (NodeId F = 0; F < Cg.numNodes(); F++) { 10640c2e0faSMaksim Panchenko if (Cg.samples(F) == 0) 10740c2e0faSMaksim Panchenko continue; 108a34c753fSRafael Auler Clusters.emplace_back(F, Cg.getNode(F)); 109a34c753fSRafael Auler FuncCluster[F] = &Clusters.back(); 110a34c753fSRafael Auler Funcs.push_back(F); 111a34c753fSRafael Auler } 112a34c753fSRafael Auler 113a34c753fSRafael Auler ClusterArcSet Carcs; 114a34c753fSRafael Auler 115a34c753fSRafael Auler auto insertOrInc = [&](Cluster *C1, Cluster *C2, double Weight) { 116a34c753fSRafael Auler auto Res = Carcs.emplace(C1, C2, Weight); 117*f92ab6afSAmir Ayupov if (!Res.second) 118a34c753fSRafael Auler Res.first->Weight += Weight; 119a34c753fSRafael Auler }; 120a34c753fSRafael Auler 121a34c753fSRafael Auler // Create a std::vector of cluster arcs 122a34c753fSRafael Auler 123a34c753fSRafael Auler for (const Arc &Arc : Cg.arcs()) { 12440c2e0faSMaksim Panchenko if (Arc.weight() == 0) 12540c2e0faSMaksim Panchenko continue; 126a34c753fSRafael Auler 127a34c753fSRafael Auler Cluster *const S = FuncCluster[Arc.src()]; 128a34c753fSRafael Auler Cluster *const D = FuncCluster[Arc.dst()]; 129a34c753fSRafael Auler 130a34c753fSRafael Auler // ignore if s or d is nullptr 131a34c753fSRafael Auler 13240c2e0faSMaksim Panchenko if (S == nullptr || D == nullptr) 13340c2e0faSMaksim Panchenko continue; 134a34c753fSRafael Auler 135a34c753fSRafael Auler // ignore self-edges 136a34c753fSRafael Auler 13740c2e0faSMaksim Panchenko if (S == D) 13840c2e0faSMaksim Panchenko continue; 139a34c753fSRafael Auler 140a34c753fSRafael Auler insertOrInc(S, D, Arc.weight()); 141a34c753fSRafael Auler } 142a34c753fSRafael Auler 143a34c753fSRafael Auler // Find an arc with max weight and merge its nodes 144a34c753fSRafael Auler 145a34c753fSRafael Auler while (!Carcs.empty()) { 14640c2e0faSMaksim Panchenko auto Maxpos = 14740c2e0faSMaksim Panchenko std::max_element(Carcs.begin(), Carcs.end(), 148a34c753fSRafael Auler [&](const ClusterArc &Carc1, const ClusterArc &Carc2) { 149a34c753fSRafael Auler return Carc1.Weight < Carc2.Weight; 15040c2e0faSMaksim Panchenko }); 151a34c753fSRafael Auler 152a34c753fSRafael Auler ClusterArc Max = *Maxpos; 153a34c753fSRafael Auler Carcs.erase(Maxpos); 154a34c753fSRafael Auler 155a34c753fSRafael Auler Cluster *const C1 = Max.C1; 156a34c753fSRafael Auler Cluster *const C2 = Max.C2; 157a34c753fSRafael Auler 15840c2e0faSMaksim Panchenko if (C1->size() + C2->size() > MaxClusterSize) 15940c2e0faSMaksim Panchenko continue; 160a34c753fSRafael Auler 16140c2e0faSMaksim Panchenko if (C1->frozen() || C2->frozen()) 16240c2e0faSMaksim Panchenko continue; 163a34c753fSRafael Auler 164a34c753fSRafael Auler // order functions and merge cluster 165a34c753fSRafael Auler 166a34c753fSRafael Auler orderFuncs(Cg, C1, C2); 167a34c753fSRafael Auler 168a34c753fSRafael Auler LLVM_DEBUG(dbgs() << format("merging %s -> %s: %.1f\n", 169a34c753fSRafael Auler C2->toString().c_str(), C1->toString().c_str(), 170a34c753fSRafael Auler Max.Weight)); 171a34c753fSRafael Auler 172a34c753fSRafael Auler // update carcs: merge C1arcs to C2arcs 173a34c753fSRafael Auler 174a34c753fSRafael Auler std::unordered_map<ClusterArc, Cluster *, ClusterArcHash> C2arcs; 175a34c753fSRafael Auler for (const ClusterArc &Carc : Carcs) { 17640c2e0faSMaksim Panchenko if (Carc.C1 == C2) 17740c2e0faSMaksim Panchenko C2arcs.emplace(Carc, Carc.C2); 17840c2e0faSMaksim Panchenko if (Carc.C2 == C2) 17940c2e0faSMaksim Panchenko C2arcs.emplace(Carc, Carc.C1); 180a34c753fSRafael Auler } 181a34c753fSRafael Auler 182a34c753fSRafael Auler for (auto It : C2arcs) { 183a34c753fSRafael Auler Cluster *const C = It.second; 184a34c753fSRafael Auler ClusterArc const C2arc = It.first; 185a34c753fSRafael Auler 186a34c753fSRafael Auler insertOrInc(C, C1, C2arc.Weight); 187a34c753fSRafael Auler Carcs.erase(C2arc); 188a34c753fSRafael Auler } 189a34c753fSRafael Auler 190a34c753fSRafael Auler // update FuncCluster 191a34c753fSRafael Auler 192*f92ab6afSAmir Ayupov for (NodeId F : C2->targets()) 193a34c753fSRafael Auler FuncCluster[F] = C1; 194*f92ab6afSAmir Ayupov 195a34c753fSRafael Auler C1->merge(*C2, Max.Weight); 196a34c753fSRafael Auler C2->clear(); 197a34c753fSRafael Auler } 198a34c753fSRafael Auler 199a34c753fSRafael Auler // Return the set of Clusters that are left, which are the ones that 200a34c753fSRafael Auler // didn't get merged. 201a34c753fSRafael Auler 202a34c753fSRafael Auler std::set<Cluster *> LiveClusters; 203a34c753fSRafael Auler std::vector<Cluster> OutClusters; 204a34c753fSRafael Auler 205*f92ab6afSAmir Ayupov for (NodeId Fid : Funcs) 206a34c753fSRafael Auler LiveClusters.insert(FuncCluster[Fid]); 207*f92ab6afSAmir Ayupov for (Cluster *C : LiveClusters) 208a34c753fSRafael Auler OutClusters.push_back(std::move(*C)); 209a34c753fSRafael Auler 21040c2e0faSMaksim Panchenko std::sort(OutClusters.begin(), OutClusters.end(), compareClustersDensity); 211a34c753fSRafael Auler 212a34c753fSRafael Auler return OutClusters; 213a34c753fSRafael Auler } 214a34c753fSRafael Auler 21540c2e0faSMaksim Panchenko } // namespace bolt 21640c2e0faSMaksim Panchenko } // namespace llvm 217