12f09f445SMaksim Panchenko //===- bolt/Core/ParallelUtilities.cpp - Parallel utilities ---------------===// 2a34c753fSRafael Auler // 3a34c753fSRafael Auler // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4a34c753fSRafael Auler // See https://llvm.org/LICENSE.txt for license information. 5a34c753fSRafael Auler // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6a34c753fSRafael Auler // 7a34c753fSRafael Auler //===----------------------------------------------------------------------===// 8a34c753fSRafael Auler // 92f09f445SMaksim Panchenko // Implementation of the class that manages parallel work on BinaryFunctions. 102f09f445SMaksim Panchenko // 11a34c753fSRafael Auler //===----------------------------------------------------------------------===// 12a34c753fSRafael Auler 13a34c753fSRafael Auler #include "bolt/Core/ParallelUtilities.h" 14a34c753fSRafael Auler #include "bolt/Core/BinaryContext.h" 15a34c753fSRafael Auler #include "bolt/Core/BinaryFunction.h" 16e8ce5f1eSNico Weber #include "llvm/Support/RWMutex.h" 17a34c753fSRafael Auler #include "llvm/Support/ThreadPool.h" 18a34c753fSRafael Auler #include "llvm/Support/Timer.h" 19a34c753fSRafael Auler #include <mutex> 20a34c753fSRafael Auler 21a34c753fSRafael Auler #define DEBUG_TYPE "par-utils" 22a34c753fSRafael Auler 23a34c753fSRafael Auler namespace opts { 24a34c753fSRafael Auler extern cl::OptionCategory BoltCategory; 25a34c753fSRafael Auler 26a34c753fSRafael Auler cl::opt<unsigned> 27a34c753fSRafael Auler ThreadCount("thread-count", 28a34c753fSRafael Auler cl::desc("number of threads"), 29a34c753fSRafael Auler cl::init(hardware_concurrency().compute_thread_count()), 30a34c753fSRafael Auler cl::cat(BoltCategory)); 31a34c753fSRafael Auler 32a34c753fSRafael Auler cl::opt<bool> 33a34c753fSRafael Auler NoThreads("no-threads", 34a34c753fSRafael Auler cl::desc("disable multithreading"), 35a34c753fSRafael Auler cl::init(false), 36a34c753fSRafael Auler cl::cat(BoltCategory)); 37a34c753fSRafael Auler 38a34c753fSRafael Auler cl::opt<unsigned> 39a34c753fSRafael Auler TaskCount("tasks-per-thread", 40a34c753fSRafael Auler cl::desc("number of tasks to be created per thread"), 41a34c753fSRafael Auler cl::init(20), 42a34c753fSRafael Auler cl::cat(BoltCategory)); 43a34c753fSRafael Auler 44a34c753fSRafael Auler } // namespace opts 45a34c753fSRafael Auler 46a34c753fSRafael Auler namespace llvm { 47a34c753fSRafael Auler namespace bolt { 48a34c753fSRafael Auler namespace ParallelUtilities { 49a34c753fSRafael Auler 50a34c753fSRafael Auler namespace { 51a34c753fSRafael Auler /// A single thread pool that is used to run parallel tasks 52*6aad62cfSSayhaan Siddiqui std::unique_ptr<ThreadPoolInterface> ThreadPoolPtr; 53a34c753fSRafael Auler 54a34c753fSRafael Auler unsigned computeCostFor(const BinaryFunction &BF, 55a34c753fSRafael Auler const PredicateTy &SkipPredicate, 56a34c753fSRafael Auler const SchedulingPolicy &SchedPolicy) { 57a34c753fSRafael Auler if (SchedPolicy == SchedulingPolicy::SP_TRIVIAL) 58a34c753fSRafael Auler return 1; 59a34c753fSRafael Auler 60a34c753fSRafael Auler if (SkipPredicate && SkipPredicate(BF)) 61a34c753fSRafael Auler return 0; 62a34c753fSRafael Auler 63a34c753fSRafael Auler switch (SchedPolicy) { 64a34c753fSRafael Auler case SchedulingPolicy::SP_CONSTANT: 65a34c753fSRafael Auler return 1; 66a34c753fSRafael Auler case SchedulingPolicy::SP_INST_LINEAR: 67a34c753fSRafael Auler return BF.getSize(); 68a34c753fSRafael Auler case SchedulingPolicy::SP_INST_QUADRATIC: 69a34c753fSRafael Auler return BF.getSize() * BF.getSize(); 70a34c753fSRafael Auler case SchedulingPolicy::SP_BB_LINEAR: 71a34c753fSRafael Auler return BF.size(); 72a34c753fSRafael Auler case SchedulingPolicy::SP_BB_QUADRATIC: 73a34c753fSRafael Auler return BF.size() * BF.size(); 74a34c753fSRafael Auler default: 75a34c753fSRafael Auler llvm_unreachable("unsupported scheduling policy"); 76a34c753fSRafael Auler } 77a34c753fSRafael Auler } 78a34c753fSRafael Auler 79a34c753fSRafael Auler inline unsigned estimateTotalCost(const BinaryContext &BC, 80a34c753fSRafael Auler const PredicateTy &SkipPredicate, 81a34c753fSRafael Auler SchedulingPolicy &SchedPolicy) { 82a34c753fSRafael Auler if (SchedPolicy == SchedulingPolicy::SP_TRIVIAL) 83a34c753fSRafael Auler return BC.getBinaryFunctions().size(); 84a34c753fSRafael Auler 85a34c753fSRafael Auler unsigned TotalCost = 0; 86a34c753fSRafael Auler for (auto &BFI : BC.getBinaryFunctions()) { 87a34c753fSRafael Auler const BinaryFunction &BF = BFI.second; 88a34c753fSRafael Auler TotalCost += computeCostFor(BF, SkipPredicate, SchedPolicy); 89a34c753fSRafael Auler } 90a34c753fSRafael Auler 91a34c753fSRafael Auler // Switch to trivial scheduling if total estimated work is zero 92a34c753fSRafael Auler if (TotalCost == 0) { 9352cf0711SAmir Ayupov BC.outs() 9452cf0711SAmir Ayupov << "BOLT-WARNING: Running parallel work of 0 estimated cost, will " 95a34c753fSRafael Auler "switch to trivial scheduling.\n"; 96a34c753fSRafael Auler 97a34c753fSRafael Auler SchedPolicy = SP_TRIVIAL; 98a34c753fSRafael Auler TotalCost = BC.getBinaryFunctions().size(); 99a34c753fSRafael Auler } 100a34c753fSRafael Auler return TotalCost; 101a34c753fSRafael Auler } 102a34c753fSRafael Auler 103a34c753fSRafael Auler } // namespace 104a34c753fSRafael Auler 105*6aad62cfSSayhaan Siddiqui ThreadPoolInterface &getThreadPool(const unsigned ThreadsCount) { 106a34c753fSRafael Auler if (ThreadPoolPtr.get()) 107a34c753fSRafael Auler return *ThreadPoolPtr; 108a34c753fSRafael Auler 109*6aad62cfSSayhaan Siddiqui if (ThreadsCount > 1) 110716042a6SMehdi Amini ThreadPoolPtr = std::make_unique<DefaultThreadPool>( 111*6aad62cfSSayhaan Siddiqui llvm::hardware_concurrency(ThreadsCount)); 112*6aad62cfSSayhaan Siddiqui else 113*6aad62cfSSayhaan Siddiqui ThreadPoolPtr = std::make_unique<SingleThreadExecutor>(); 114a34c753fSRafael Auler return *ThreadPoolPtr; 115a34c753fSRafael Auler } 116a34c753fSRafael Auler 117a34c753fSRafael Auler void runOnEachFunction(BinaryContext &BC, SchedulingPolicy SchedPolicy, 118a34c753fSRafael Auler WorkFuncTy WorkFunction, PredicateTy SkipPredicate, 119a34c753fSRafael Auler std::string LogName, bool ForceSequential, 120a34c753fSRafael Auler unsigned TasksPerThread) { 121a34c753fSRafael Auler if (BC.getBinaryFunctions().size() == 0) 122a34c753fSRafael Auler return; 123a34c753fSRafael Auler 124a34c753fSRafael Auler auto runBlock = [&](std::map<uint64_t, BinaryFunction>::iterator BlockBegin, 125a34c753fSRafael Auler std::map<uint64_t, BinaryFunction>::iterator BlockEnd) { 126a34c753fSRafael Auler Timer T(LogName, LogName); 127a34c753fSRafael Auler LLVM_DEBUG(T.startTimer()); 128a34c753fSRafael Auler 129a34c753fSRafael Auler for (auto It = BlockBegin; It != BlockEnd; ++It) { 130a34c753fSRafael Auler BinaryFunction &BF = It->second; 131a34c753fSRafael Auler if (SkipPredicate && SkipPredicate(BF)) 132a34c753fSRafael Auler continue; 133a34c753fSRafael Auler 134a34c753fSRafael Auler WorkFunction(BF); 135a34c753fSRafael Auler } 136a34c753fSRafael Auler LLVM_DEBUG(T.stopTimer()); 137a34c753fSRafael Auler }; 138a34c753fSRafael Auler 139a34c753fSRafael Auler if (opts::NoThreads || ForceSequential) { 140a34c753fSRafael Auler runBlock(BC.getBinaryFunctions().begin(), BC.getBinaryFunctions().end()); 141a34c753fSRafael Auler return; 142a34c753fSRafael Auler } 143a34c753fSRafael Auler 144a34c753fSRafael Auler // Estimate the overall runtime cost using the scheduling policy 145a34c753fSRafael Auler const unsigned TotalCost = estimateTotalCost(BC, SkipPredicate, SchedPolicy); 146a34c753fSRafael Auler const unsigned BlocksCount = TasksPerThread * opts::ThreadCount; 147a34c753fSRafael Auler const unsigned BlockCost = 148a34c753fSRafael Auler TotalCost > BlocksCount ? TotalCost / BlocksCount : 1; 149a34c753fSRafael Auler 150a34c753fSRafael Auler // Divide work into blocks of equal cost 1514a4fb930SMehdi Amini ThreadPoolInterface &Pool = getThreadPool(); 152a34c753fSRafael Auler auto BlockBegin = BC.getBinaryFunctions().begin(); 153a34c753fSRafael Auler unsigned CurrentCost = 0; 154a34c753fSRafael Auler 155a34c753fSRafael Auler for (auto It = BC.getBinaryFunctions().begin(); 156a34c753fSRafael Auler It != BC.getBinaryFunctions().end(); ++It) { 157a34c753fSRafael Auler BinaryFunction &BF = It->second; 158a34c753fSRafael Auler CurrentCost += computeCostFor(BF, SkipPredicate, SchedPolicy); 159a34c753fSRafael Auler 160a34c753fSRafael Auler if (CurrentCost >= BlockCost) { 161a34c753fSRafael Auler Pool.async(runBlock, BlockBegin, std::next(It)); 162a34c753fSRafael Auler BlockBegin = std::next(It); 163a34c753fSRafael Auler CurrentCost = 0; 164a34c753fSRafael Auler } 165a34c753fSRafael Auler } 166a34c753fSRafael Auler Pool.async(runBlock, BlockBegin, BC.getBinaryFunctions().end()); 167a34c753fSRafael Auler Pool.wait(); 168a34c753fSRafael Auler } 169a34c753fSRafael Auler 170a34c753fSRafael Auler void runOnEachFunctionWithUniqueAllocId( 171a34c753fSRafael Auler BinaryContext &BC, SchedulingPolicy SchedPolicy, 172a34c753fSRafael Auler WorkFuncWithAllocTy WorkFunction, PredicateTy SkipPredicate, 173a34c753fSRafael Auler std::string LogName, bool ForceSequential, unsigned TasksPerThread) { 174a34c753fSRafael Auler if (BC.getBinaryFunctions().size() == 0) 175a34c753fSRafael Auler return; 176a34c753fSRafael Auler 177e8ce5f1eSNico Weber llvm::sys::RWMutex MainLock; 178a34c753fSRafael Auler auto runBlock = [&](std::map<uint64_t, BinaryFunction>::iterator BlockBegin, 179a34c753fSRafael Auler std::map<uint64_t, BinaryFunction>::iterator BlockEnd, 180a34c753fSRafael Auler MCPlusBuilder::AllocatorIdTy AllocId) { 181a34c753fSRafael Auler Timer T(LogName, LogName); 182a34c753fSRafael Auler LLVM_DEBUG(T.startTimer()); 183e8ce5f1eSNico Weber std::shared_lock<llvm::sys::RWMutex> Lock(MainLock); 184a34c753fSRafael Auler for (auto It = BlockBegin; It != BlockEnd; ++It) { 185a34c753fSRafael Auler BinaryFunction &BF = It->second; 186a34c753fSRafael Auler if (SkipPredicate && SkipPredicate(BF)) 187a34c753fSRafael Auler continue; 188a34c753fSRafael Auler 189a34c753fSRafael Auler WorkFunction(BF, AllocId); 190a34c753fSRafael Auler } 191a34c753fSRafael Auler LLVM_DEBUG(T.stopTimer()); 192a34c753fSRafael Auler }; 193a34c753fSRafael Auler 194554459a0SKristof Beyls unsigned AllocId = 1; 195554459a0SKristof Beyls auto EnsureAllocatorExists = [&BC](unsigned AllocId) { 196554459a0SKristof Beyls if (!BC.MIB->checkAllocatorExists(AllocId)) { 197554459a0SKristof Beyls MCPlusBuilder::AllocatorIdTy Id = 198554459a0SKristof Beyls BC.MIB->initializeNewAnnotationAllocator(); 199554459a0SKristof Beyls (void)Id; 200554459a0SKristof Beyls assert(AllocId == Id && "unexpected allocator id created"); 201554459a0SKristof Beyls } 202554459a0SKristof Beyls }; 203554459a0SKristof Beyls 204a34c753fSRafael Auler if (opts::NoThreads || ForceSequential) { 205554459a0SKristof Beyls EnsureAllocatorExists(AllocId); 206554459a0SKristof Beyls runBlock(BC.getBinaryFunctions().begin(), BC.getBinaryFunctions().end(), 207554459a0SKristof Beyls AllocId); 208a34c753fSRafael Auler return; 209a34c753fSRafael Auler } 210a34c753fSRafael Auler // This lock is used to postpone task execution 211e8ce5f1eSNico Weber std::unique_lock<llvm::sys::RWMutex> Lock(MainLock); 212a34c753fSRafael Auler 213a34c753fSRafael Auler // Estimate the overall runtime cost using the scheduling policy 214a34c753fSRafael Auler const unsigned TotalCost = estimateTotalCost(BC, SkipPredicate, SchedPolicy); 215a34c753fSRafael Auler const unsigned BlocksCount = TasksPerThread * opts::ThreadCount; 216a34c753fSRafael Auler const unsigned BlockCost = 217a34c753fSRafael Auler TotalCost > BlocksCount ? TotalCost / BlocksCount : 1; 218a34c753fSRafael Auler 219a34c753fSRafael Auler // Divide work into blocks of equal cost 2204a4fb930SMehdi Amini ThreadPoolInterface &Pool = getThreadPool(); 221a34c753fSRafael Auler auto BlockBegin = BC.getBinaryFunctions().begin(); 222a34c753fSRafael Auler unsigned CurrentCost = 0; 223a34c753fSRafael Auler for (auto It = BC.getBinaryFunctions().begin(); 224a34c753fSRafael Auler It != BC.getBinaryFunctions().end(); ++It) { 225a34c753fSRafael Auler BinaryFunction &BF = It->second; 226a34c753fSRafael Auler CurrentCost += computeCostFor(BF, SkipPredicate, SchedPolicy); 227a34c753fSRafael Auler 228a34c753fSRafael Auler if (CurrentCost >= BlockCost) { 229554459a0SKristof Beyls EnsureAllocatorExists(AllocId); 230a34c753fSRafael Auler Pool.async(runBlock, BlockBegin, std::next(It), AllocId); 231a34c753fSRafael Auler AllocId++; 232a34c753fSRafael Auler BlockBegin = std::next(It); 233a34c753fSRafael Auler CurrentCost = 0; 234a34c753fSRafael Auler } 235a34c753fSRafael Auler } 236a34c753fSRafael Auler 237e2326590SKristof Beyls EnsureAllocatorExists(AllocId); 238a34c753fSRafael Auler 239a34c753fSRafael Auler Pool.async(runBlock, BlockBegin, BC.getBinaryFunctions().end(), AllocId); 240a34c753fSRafael Auler Lock.unlock(); 241a34c753fSRafael Auler Pool.wait(); 242a34c753fSRafael Auler } 243a34c753fSRafael Auler 244a34c753fSRafael Auler } // namespace ParallelUtilities 245a34c753fSRafael Auler } // namespace bolt 246a34c753fSRafael Auler } // namespace llvm 247