106c3fb27SDimitry Andric //===- BalancedPartitioning.cpp -------------------------------------------===// 206c3fb27SDimitry Andric // 306c3fb27SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 406c3fb27SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 506c3fb27SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 606c3fb27SDimitry Andric // 706c3fb27SDimitry Andric //===----------------------------------------------------------------------===// 806c3fb27SDimitry Andric // 906c3fb27SDimitry Andric // This file implements BalancedPartitioning, a recursive balanced graph 1006c3fb27SDimitry Andric // partitioning algorithm. 1106c3fb27SDimitry Andric // 1206c3fb27SDimitry Andric //===----------------------------------------------------------------------===// 1306c3fb27SDimitry Andric 1406c3fb27SDimitry Andric #include "llvm/Support/BalancedPartitioning.h" 1506c3fb27SDimitry Andric #include "llvm/Support/Debug.h" 1606c3fb27SDimitry Andric #include "llvm/Support/Format.h" 1706c3fb27SDimitry Andric #include "llvm/Support/FormatVariadic.h" 1806c3fb27SDimitry Andric #include "llvm/Support/ThreadPool.h" 1906c3fb27SDimitry Andric 2006c3fb27SDimitry Andric using namespace llvm; 2106c3fb27SDimitry Andric #define DEBUG_TYPE "balanced-partitioning" 2206c3fb27SDimitry Andric 2306c3fb27SDimitry Andric void BPFunctionNode::dump(raw_ostream &OS) const { 2406c3fb27SDimitry Andric OS << formatv("{{ID={0} Utilities={{{1:$[,]}} Bucket={2}}", Id, 2506c3fb27SDimitry Andric make_range(UtilityNodes.begin(), UtilityNodes.end()), Bucket); 2606c3fb27SDimitry Andric } 2706c3fb27SDimitry Andric 2806c3fb27SDimitry Andric template <typename Func> 2906c3fb27SDimitry Andric void BalancedPartitioning::BPThreadPool::async(Func &&F) { 3006c3fb27SDimitry Andric #if LLVM_ENABLE_THREADS 3106c3fb27SDimitry Andric // This new thread could spawn more threads, so mark it as active 3206c3fb27SDimitry Andric ++NumActiveThreads; 3306c3fb27SDimitry Andric TheThreadPool.async([=]() { 3406c3fb27SDimitry Andric // Run the task 3506c3fb27SDimitry Andric F(); 3606c3fb27SDimitry Andric 3706c3fb27SDimitry Andric // This thread will no longer spawn new threads, so mark it as inactive 3806c3fb27SDimitry Andric if (--NumActiveThreads == 0) { 3906c3fb27SDimitry Andric // There are no more active threads, so mark as finished and notify 4006c3fb27SDimitry Andric { 4106c3fb27SDimitry Andric std::unique_lock<std::mutex> lock(mtx); 4206c3fb27SDimitry Andric assert(!IsFinishedSpawning); 4306c3fb27SDimitry Andric IsFinishedSpawning = true; 4406c3fb27SDimitry Andric } 4506c3fb27SDimitry Andric cv.notify_one(); 4606c3fb27SDimitry Andric } 4706c3fb27SDimitry Andric }); 4806c3fb27SDimitry Andric #else 4906c3fb27SDimitry Andric llvm_unreachable("threads are disabled"); 5006c3fb27SDimitry Andric #endif 5106c3fb27SDimitry Andric } 5206c3fb27SDimitry Andric 5306c3fb27SDimitry Andric void BalancedPartitioning::BPThreadPool::wait() { 5406c3fb27SDimitry Andric #if LLVM_ENABLE_THREADS 5506c3fb27SDimitry Andric // TODO: We could remove the mutex and condition variable and use 5606c3fb27SDimitry Andric // std::atomic::wait() instead, but that isn't available until C++20 5706c3fb27SDimitry Andric { 5806c3fb27SDimitry Andric std::unique_lock<std::mutex> lock(mtx); 5906c3fb27SDimitry Andric cv.wait(lock, [&]() { return IsFinishedSpawning; }); 6006c3fb27SDimitry Andric assert(IsFinishedSpawning && NumActiveThreads == 0); 6106c3fb27SDimitry Andric } 6206c3fb27SDimitry Andric // Now we can call ThreadPool::wait() since all tasks have been submitted 6306c3fb27SDimitry Andric TheThreadPool.wait(); 6406c3fb27SDimitry Andric #else 6506c3fb27SDimitry Andric llvm_unreachable("threads are disabled"); 6606c3fb27SDimitry Andric #endif 6706c3fb27SDimitry Andric } 6806c3fb27SDimitry Andric 6906c3fb27SDimitry Andric BalancedPartitioning::BalancedPartitioning( 7006c3fb27SDimitry Andric const BalancedPartitioningConfig &Config) 7106c3fb27SDimitry Andric : Config(Config) { 7206c3fb27SDimitry Andric // Pre-computing log2 values 7306c3fb27SDimitry Andric Log2Cache[0] = 0.0; 7406c3fb27SDimitry Andric for (unsigned I = 1; I < LOG_CACHE_SIZE; I++) 7506c3fb27SDimitry Andric Log2Cache[I] = std::log2(I); 7606c3fb27SDimitry Andric } 7706c3fb27SDimitry Andric 7806c3fb27SDimitry Andric void BalancedPartitioning::run(std::vector<BPFunctionNode> &Nodes) const { 7906c3fb27SDimitry Andric LLVM_DEBUG( 8006c3fb27SDimitry Andric dbgs() << format( 8106c3fb27SDimitry Andric "Partitioning %d nodes using depth %d and %d iterations per split\n", 8206c3fb27SDimitry Andric Nodes.size(), Config.SplitDepth, Config.IterationsPerSplit)); 8306c3fb27SDimitry Andric std::optional<BPThreadPool> TP; 8406c3fb27SDimitry Andric #if LLVM_ENABLE_THREADS 85*0fca6ea1SDimitry Andric DefaultThreadPool TheThreadPool; 8606c3fb27SDimitry Andric if (Config.TaskSplitDepth > 1) 8706c3fb27SDimitry Andric TP.emplace(TheThreadPool); 8806c3fb27SDimitry Andric #endif 8906c3fb27SDimitry Andric 9006c3fb27SDimitry Andric // Record the input order 9106c3fb27SDimitry Andric for (unsigned I = 0; I < Nodes.size(); I++) 9206c3fb27SDimitry Andric Nodes[I].InputOrderIndex = I; 9306c3fb27SDimitry Andric 9406c3fb27SDimitry Andric auto NodesRange = llvm::make_range(Nodes.begin(), Nodes.end()); 9506c3fb27SDimitry Andric auto BisectTask = [=, &TP]() { 9606c3fb27SDimitry Andric bisect(NodesRange, /*RecDepth=*/0, /*RootBucket=*/1, /*Offset=*/0, TP); 9706c3fb27SDimitry Andric }; 9806c3fb27SDimitry Andric if (TP) { 9906c3fb27SDimitry Andric TP->async(std::move(BisectTask)); 10006c3fb27SDimitry Andric TP->wait(); 10106c3fb27SDimitry Andric } else { 10206c3fb27SDimitry Andric BisectTask(); 10306c3fb27SDimitry Andric } 10406c3fb27SDimitry Andric 10506c3fb27SDimitry Andric llvm::stable_sort(NodesRange, [](const auto &L, const auto &R) { 10606c3fb27SDimitry Andric return L.Bucket < R.Bucket; 10706c3fb27SDimitry Andric }); 10806c3fb27SDimitry Andric 10906c3fb27SDimitry Andric LLVM_DEBUG(dbgs() << "Balanced partitioning completed\n"); 11006c3fb27SDimitry Andric } 11106c3fb27SDimitry Andric 11206c3fb27SDimitry Andric void BalancedPartitioning::bisect(const FunctionNodeRange Nodes, 11306c3fb27SDimitry Andric unsigned RecDepth, unsigned RootBucket, 11406c3fb27SDimitry Andric unsigned Offset, 11506c3fb27SDimitry Andric std::optional<BPThreadPool> &TP) const { 11606c3fb27SDimitry Andric unsigned NumNodes = std::distance(Nodes.begin(), Nodes.end()); 11706c3fb27SDimitry Andric if (NumNodes <= 1 || RecDepth >= Config.SplitDepth) { 11806c3fb27SDimitry Andric // We've reach the lowest level of the recursion tree. Fall back to the 11906c3fb27SDimitry Andric // original order and assign to buckets. 1207a6dacacSDimitry Andric llvm::sort(Nodes, [](const auto &L, const auto &R) { 12106c3fb27SDimitry Andric return L.InputOrderIndex < R.InputOrderIndex; 12206c3fb27SDimitry Andric }); 12306c3fb27SDimitry Andric for (auto &N : Nodes) 12406c3fb27SDimitry Andric N.Bucket = Offset++; 12506c3fb27SDimitry Andric return; 12606c3fb27SDimitry Andric } 12706c3fb27SDimitry Andric 12806c3fb27SDimitry Andric LLVM_DEBUG(dbgs() << format("Bisect with %d nodes and root bucket %d\n", 12906c3fb27SDimitry Andric NumNodes, RootBucket)); 13006c3fb27SDimitry Andric 13106c3fb27SDimitry Andric std::mt19937 RNG(RootBucket); 13206c3fb27SDimitry Andric 13306c3fb27SDimitry Andric unsigned LeftBucket = 2 * RootBucket; 13406c3fb27SDimitry Andric unsigned RightBucket = 2 * RootBucket + 1; 13506c3fb27SDimitry Andric 13606c3fb27SDimitry Andric // Split into two and assign to the left and right buckets 13706c3fb27SDimitry Andric split(Nodes, LeftBucket); 13806c3fb27SDimitry Andric 139*0fca6ea1SDimitry Andric runIterations(Nodes, LeftBucket, RightBucket, RNG); 14006c3fb27SDimitry Andric 14106c3fb27SDimitry Andric // Split nodes wrt the resulting buckets 14206c3fb27SDimitry Andric auto NodesMid = 14306c3fb27SDimitry Andric llvm::partition(Nodes, [&](auto &N) { return N.Bucket == LeftBucket; }); 14406c3fb27SDimitry Andric unsigned MidOffset = Offset + std::distance(Nodes.begin(), NodesMid); 14506c3fb27SDimitry Andric 14606c3fb27SDimitry Andric auto LeftNodes = llvm::make_range(Nodes.begin(), NodesMid); 14706c3fb27SDimitry Andric auto RightNodes = llvm::make_range(NodesMid, Nodes.end()); 14806c3fb27SDimitry Andric 14906c3fb27SDimitry Andric auto LeftRecTask = [=, &TP]() { 15006c3fb27SDimitry Andric bisect(LeftNodes, RecDepth + 1, LeftBucket, Offset, TP); 15106c3fb27SDimitry Andric }; 15206c3fb27SDimitry Andric auto RightRecTask = [=, &TP]() { 15306c3fb27SDimitry Andric bisect(RightNodes, RecDepth + 1, RightBucket, MidOffset, TP); 15406c3fb27SDimitry Andric }; 15506c3fb27SDimitry Andric 15606c3fb27SDimitry Andric if (TP && RecDepth < Config.TaskSplitDepth && NumNodes >= 4) { 15706c3fb27SDimitry Andric TP->async(std::move(LeftRecTask)); 15806c3fb27SDimitry Andric TP->async(std::move(RightRecTask)); 15906c3fb27SDimitry Andric } else { 16006c3fb27SDimitry Andric LeftRecTask(); 16106c3fb27SDimitry Andric RightRecTask(); 16206c3fb27SDimitry Andric } 16306c3fb27SDimitry Andric } 16406c3fb27SDimitry Andric 16506c3fb27SDimitry Andric void BalancedPartitioning::runIterations(const FunctionNodeRange Nodes, 166*0fca6ea1SDimitry Andric unsigned LeftBucket, 16706c3fb27SDimitry Andric unsigned RightBucket, 16806c3fb27SDimitry Andric std::mt19937 &RNG) const { 16906c3fb27SDimitry Andric unsigned NumNodes = std::distance(Nodes.begin(), Nodes.end()); 1707a6dacacSDimitry Andric DenseMap<BPFunctionNode::UtilityNodeT, unsigned> UtilityNodeIndex; 17106c3fb27SDimitry Andric for (auto &N : Nodes) 17206c3fb27SDimitry Andric for (auto &UN : N.UtilityNodes) 1737a6dacacSDimitry Andric ++UtilityNodeIndex[UN]; 17406c3fb27SDimitry Andric // Remove utility nodes if they have just one edge or are connected to all 17506c3fb27SDimitry Andric // functions 17606c3fb27SDimitry Andric for (auto &N : Nodes) 17706c3fb27SDimitry Andric llvm::erase_if(N.UtilityNodes, [&](auto &UN) { 1787a6dacacSDimitry Andric return UtilityNodeIndex[UN] == 1 || UtilityNodeIndex[UN] == NumNodes; 17906c3fb27SDimitry Andric }); 18006c3fb27SDimitry Andric 18106c3fb27SDimitry Andric // Renumber utility nodes so they can be used to index into Signatures 1827a6dacacSDimitry Andric UtilityNodeIndex.clear(); 18306c3fb27SDimitry Andric for (auto &N : Nodes) 18406c3fb27SDimitry Andric for (auto &UN : N.UtilityNodes) 1857a6dacacSDimitry Andric UN = UtilityNodeIndex.insert({UN, UtilityNodeIndex.size()}).first->second; 18606c3fb27SDimitry Andric 18706c3fb27SDimitry Andric // Initialize signatures 18806c3fb27SDimitry Andric SignaturesT Signatures(/*Size=*/UtilityNodeIndex.size()); 18906c3fb27SDimitry Andric for (auto &N : Nodes) { 19006c3fb27SDimitry Andric for (auto &UN : N.UtilityNodes) { 19106c3fb27SDimitry Andric assert(UN < Signatures.size()); 19206c3fb27SDimitry Andric if (N.Bucket == LeftBucket) { 19306c3fb27SDimitry Andric Signatures[UN].LeftCount++; 19406c3fb27SDimitry Andric } else { 19506c3fb27SDimitry Andric Signatures[UN].RightCount++; 19606c3fb27SDimitry Andric } 19706c3fb27SDimitry Andric } 19806c3fb27SDimitry Andric } 19906c3fb27SDimitry Andric 20006c3fb27SDimitry Andric for (unsigned I = 0; I < Config.IterationsPerSplit; I++) { 20106c3fb27SDimitry Andric unsigned NumMovedNodes = 20206c3fb27SDimitry Andric runIteration(Nodes, LeftBucket, RightBucket, Signatures, RNG); 20306c3fb27SDimitry Andric if (NumMovedNodes == 0) 20406c3fb27SDimitry Andric break; 20506c3fb27SDimitry Andric } 20606c3fb27SDimitry Andric } 20706c3fb27SDimitry Andric 20806c3fb27SDimitry Andric unsigned BalancedPartitioning::runIteration(const FunctionNodeRange Nodes, 20906c3fb27SDimitry Andric unsigned LeftBucket, 21006c3fb27SDimitry Andric unsigned RightBucket, 21106c3fb27SDimitry Andric SignaturesT &Signatures, 21206c3fb27SDimitry Andric std::mt19937 &RNG) const { 21306c3fb27SDimitry Andric // Init signature cost caches 21406c3fb27SDimitry Andric for (auto &Signature : Signatures) { 21506c3fb27SDimitry Andric if (Signature.CachedGainIsValid) 21606c3fb27SDimitry Andric continue; 21706c3fb27SDimitry Andric unsigned L = Signature.LeftCount; 21806c3fb27SDimitry Andric unsigned R = Signature.RightCount; 21906c3fb27SDimitry Andric assert((L > 0 || R > 0) && "incorrect signature"); 22006c3fb27SDimitry Andric float Cost = logCost(L, R); 22106c3fb27SDimitry Andric Signature.CachedGainLR = 0.f; 22206c3fb27SDimitry Andric Signature.CachedGainRL = 0.f; 22306c3fb27SDimitry Andric if (L > 0) 22406c3fb27SDimitry Andric Signature.CachedGainLR = Cost - logCost(L - 1, R + 1); 22506c3fb27SDimitry Andric if (R > 0) 22606c3fb27SDimitry Andric Signature.CachedGainRL = Cost - logCost(L + 1, R - 1); 22706c3fb27SDimitry Andric Signature.CachedGainIsValid = true; 22806c3fb27SDimitry Andric } 22906c3fb27SDimitry Andric 23006c3fb27SDimitry Andric // Compute move gains 23106c3fb27SDimitry Andric typedef std::pair<float, BPFunctionNode *> GainPair; 23206c3fb27SDimitry Andric std::vector<GainPair> Gains; 23306c3fb27SDimitry Andric for (auto &N : Nodes) { 23406c3fb27SDimitry Andric bool FromLeftToRight = (N.Bucket == LeftBucket); 23506c3fb27SDimitry Andric float Gain = moveGain(N, FromLeftToRight, Signatures); 23606c3fb27SDimitry Andric Gains.push_back(std::make_pair(Gain, &N)); 23706c3fb27SDimitry Andric } 23806c3fb27SDimitry Andric 23906c3fb27SDimitry Andric // Collect left and right gains 24006c3fb27SDimitry Andric auto LeftEnd = llvm::partition( 24106c3fb27SDimitry Andric Gains, [&](const auto &GP) { return GP.second->Bucket == LeftBucket; }); 24206c3fb27SDimitry Andric auto LeftRange = llvm::make_range(Gains.begin(), LeftEnd); 24306c3fb27SDimitry Andric auto RightRange = llvm::make_range(LeftEnd, Gains.end()); 24406c3fb27SDimitry Andric 24506c3fb27SDimitry Andric // Sort gains in descending order 24606c3fb27SDimitry Andric auto LargerGain = [](const auto &L, const auto &R) { 24706c3fb27SDimitry Andric return L.first > R.first; 24806c3fb27SDimitry Andric }; 24906c3fb27SDimitry Andric llvm::stable_sort(LeftRange, LargerGain); 25006c3fb27SDimitry Andric llvm::stable_sort(RightRange, LargerGain); 25106c3fb27SDimitry Andric 25206c3fb27SDimitry Andric unsigned NumMovedDataVertices = 0; 25306c3fb27SDimitry Andric for (auto [LeftPair, RightPair] : llvm::zip(LeftRange, RightRange)) { 25406c3fb27SDimitry Andric auto &[LeftGain, LeftNode] = LeftPair; 25506c3fb27SDimitry Andric auto &[RightGain, RightNode] = RightPair; 25606c3fb27SDimitry Andric // Stop when the gain is no longer beneficial 25706c3fb27SDimitry Andric if (LeftGain + RightGain <= 0.f) 25806c3fb27SDimitry Andric break; 25906c3fb27SDimitry Andric // Try to exchange the nodes between buckets 26006c3fb27SDimitry Andric if (moveFunctionNode(*LeftNode, LeftBucket, RightBucket, Signatures, RNG)) 26106c3fb27SDimitry Andric ++NumMovedDataVertices; 26206c3fb27SDimitry Andric if (moveFunctionNode(*RightNode, LeftBucket, RightBucket, Signatures, RNG)) 26306c3fb27SDimitry Andric ++NumMovedDataVertices; 26406c3fb27SDimitry Andric } 26506c3fb27SDimitry Andric return NumMovedDataVertices; 26606c3fb27SDimitry Andric } 26706c3fb27SDimitry Andric 26806c3fb27SDimitry Andric bool BalancedPartitioning::moveFunctionNode(BPFunctionNode &N, 26906c3fb27SDimitry Andric unsigned LeftBucket, 27006c3fb27SDimitry Andric unsigned RightBucket, 27106c3fb27SDimitry Andric SignaturesT &Signatures, 27206c3fb27SDimitry Andric std::mt19937 &RNG) const { 27306c3fb27SDimitry Andric // Sometimes we skip the move. This helps to escape local optima 27406c3fb27SDimitry Andric if (std::uniform_real_distribution<float>(0.f, 1.f)(RNG) <= 27506c3fb27SDimitry Andric Config.SkipProbability) 27606c3fb27SDimitry Andric return false; 27706c3fb27SDimitry Andric 27806c3fb27SDimitry Andric bool FromLeftToRight = (N.Bucket == LeftBucket); 27906c3fb27SDimitry Andric // Update the current bucket 28006c3fb27SDimitry Andric N.Bucket = (FromLeftToRight ? RightBucket : LeftBucket); 28106c3fb27SDimitry Andric 28206c3fb27SDimitry Andric // Update signatures and invalidate gain cache 28306c3fb27SDimitry Andric if (FromLeftToRight) { 28406c3fb27SDimitry Andric for (auto &UN : N.UtilityNodes) { 28506c3fb27SDimitry Andric auto &Signature = Signatures[UN]; 28606c3fb27SDimitry Andric Signature.LeftCount--; 28706c3fb27SDimitry Andric Signature.RightCount++; 28806c3fb27SDimitry Andric Signature.CachedGainIsValid = false; 28906c3fb27SDimitry Andric } 29006c3fb27SDimitry Andric } else { 29106c3fb27SDimitry Andric for (auto &UN : N.UtilityNodes) { 29206c3fb27SDimitry Andric auto &Signature = Signatures[UN]; 29306c3fb27SDimitry Andric Signature.LeftCount++; 29406c3fb27SDimitry Andric Signature.RightCount--; 29506c3fb27SDimitry Andric Signature.CachedGainIsValid = false; 29606c3fb27SDimitry Andric } 29706c3fb27SDimitry Andric } 29806c3fb27SDimitry Andric return true; 29906c3fb27SDimitry Andric } 30006c3fb27SDimitry Andric 30106c3fb27SDimitry Andric void BalancedPartitioning::split(const FunctionNodeRange Nodes, 30206c3fb27SDimitry Andric unsigned StartBucket) const { 30306c3fb27SDimitry Andric unsigned NumNodes = std::distance(Nodes.begin(), Nodes.end()); 30406c3fb27SDimitry Andric auto NodesMid = Nodes.begin() + (NumNodes + 1) / 2; 30506c3fb27SDimitry Andric 30606c3fb27SDimitry Andric std::nth_element(Nodes.begin(), NodesMid, Nodes.end(), [](auto &L, auto &R) { 30706c3fb27SDimitry Andric return L.InputOrderIndex < R.InputOrderIndex; 30806c3fb27SDimitry Andric }); 30906c3fb27SDimitry Andric 31006c3fb27SDimitry Andric for (auto &N : llvm::make_range(Nodes.begin(), NodesMid)) 31106c3fb27SDimitry Andric N.Bucket = StartBucket; 31206c3fb27SDimitry Andric for (auto &N : llvm::make_range(NodesMid, Nodes.end())) 31306c3fb27SDimitry Andric N.Bucket = StartBucket + 1; 31406c3fb27SDimitry Andric } 31506c3fb27SDimitry Andric 31606c3fb27SDimitry Andric float BalancedPartitioning::moveGain(const BPFunctionNode &N, 31706c3fb27SDimitry Andric bool FromLeftToRight, 31806c3fb27SDimitry Andric const SignaturesT &Signatures) { 31906c3fb27SDimitry Andric float Gain = 0.f; 32006c3fb27SDimitry Andric for (auto &UN : N.UtilityNodes) 32106c3fb27SDimitry Andric Gain += (FromLeftToRight ? Signatures[UN].CachedGainLR 32206c3fb27SDimitry Andric : Signatures[UN].CachedGainRL); 32306c3fb27SDimitry Andric return Gain; 32406c3fb27SDimitry Andric } 32506c3fb27SDimitry Andric 32606c3fb27SDimitry Andric float BalancedPartitioning::logCost(unsigned X, unsigned Y) const { 32706c3fb27SDimitry Andric return -(X * log2Cached(X + 1) + Y * log2Cached(Y + 1)); 32806c3fb27SDimitry Andric } 32906c3fb27SDimitry Andric 33006c3fb27SDimitry Andric float BalancedPartitioning::log2Cached(unsigned i) const { 33106c3fb27SDimitry Andric return (i < LOG_CACHE_SIZE) ? Log2Cache[i] : std::log2(i); 33206c3fb27SDimitry Andric } 333