1 //===- bolt/Core/ParallelUtilities.cpp - Parallel utilities ---------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Implementation of the class that manages parallel work on BinaryFunctions. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "bolt/Core/ParallelUtilities.h" 14 #include "bolt/Core/BinaryContext.h" 15 #include "bolt/Core/BinaryFunction.h" 16 #include "llvm/Support/RWMutex.h" 17 #include "llvm/Support/ThreadPool.h" 18 #include "llvm/Support/Timer.h" 19 #include <mutex> 20 21 #define DEBUG_TYPE "par-utils" 22 23 namespace opts { 24 extern cl::OptionCategory BoltCategory; 25 26 cl::opt<unsigned> 27 ThreadCount("thread-count", 28 cl::desc("number of threads"), 29 cl::init(hardware_concurrency().compute_thread_count()), 30 cl::cat(BoltCategory)); 31 32 cl::opt<bool> 33 NoThreads("no-threads", 34 cl::desc("disable multithreading"), 35 cl::init(false), 36 cl::cat(BoltCategory)); 37 38 cl::opt<unsigned> 39 TaskCount("tasks-per-thread", 40 cl::desc("number of tasks to be created per thread"), 41 cl::init(20), 42 cl::cat(BoltCategory)); 43 44 } // namespace opts 45 46 namespace llvm { 47 namespace bolt { 48 namespace ParallelUtilities { 49 50 namespace { 51 /// A single thread pool that is used to run parallel tasks 52 std::unique_ptr<ThreadPoolInterface> ThreadPoolPtr; 53 54 unsigned computeCostFor(const BinaryFunction &BF, 55 const PredicateTy &SkipPredicate, 56 const SchedulingPolicy &SchedPolicy) { 57 if (SchedPolicy == SchedulingPolicy::SP_TRIVIAL) 58 return 1; 59 60 if (SkipPredicate && SkipPredicate(BF)) 61 return 0; 62 63 switch (SchedPolicy) { 64 case SchedulingPolicy::SP_CONSTANT: 65 return 1; 66 case SchedulingPolicy::SP_INST_LINEAR: 67 return BF.getSize(); 68 case SchedulingPolicy::SP_INST_QUADRATIC: 69 return BF.getSize() * BF.getSize(); 70 case SchedulingPolicy::SP_BB_LINEAR: 71 return BF.size(); 72 case SchedulingPolicy::SP_BB_QUADRATIC: 73 return BF.size() * BF.size(); 74 default: 75 llvm_unreachable("unsupported scheduling policy"); 76 } 77 } 78 79 inline unsigned estimateTotalCost(const BinaryContext &BC, 80 const PredicateTy &SkipPredicate, 81 SchedulingPolicy &SchedPolicy) { 82 if (SchedPolicy == SchedulingPolicy::SP_TRIVIAL) 83 return BC.getBinaryFunctions().size(); 84 85 unsigned TotalCost = 0; 86 for (auto &BFI : BC.getBinaryFunctions()) { 87 const BinaryFunction &BF = BFI.second; 88 TotalCost += computeCostFor(BF, SkipPredicate, SchedPolicy); 89 } 90 91 // Switch to trivial scheduling if total estimated work is zero 92 if (TotalCost == 0) { 93 BC.outs() 94 << "BOLT-WARNING: Running parallel work of 0 estimated cost, will " 95 "switch to trivial scheduling.\n"; 96 97 SchedPolicy = SP_TRIVIAL; 98 TotalCost = BC.getBinaryFunctions().size(); 99 } 100 return TotalCost; 101 } 102 103 } // namespace 104 105 ThreadPoolInterface &getThreadPool(const unsigned ThreadsCount) { 106 if (ThreadPoolPtr.get()) 107 return *ThreadPoolPtr; 108 109 if (ThreadsCount > 1) 110 ThreadPoolPtr = std::make_unique<DefaultThreadPool>( 111 llvm::hardware_concurrency(ThreadsCount)); 112 else 113 ThreadPoolPtr = std::make_unique<SingleThreadExecutor>(); 114 return *ThreadPoolPtr; 115 } 116 117 void runOnEachFunction(BinaryContext &BC, SchedulingPolicy SchedPolicy, 118 WorkFuncTy WorkFunction, PredicateTy SkipPredicate, 119 std::string LogName, bool ForceSequential, 120 unsigned TasksPerThread) { 121 if (BC.getBinaryFunctions().size() == 0) 122 return; 123 124 auto runBlock = [&](std::map<uint64_t, BinaryFunction>::iterator BlockBegin, 125 std::map<uint64_t, BinaryFunction>::iterator BlockEnd) { 126 Timer T(LogName, LogName); 127 LLVM_DEBUG(T.startTimer()); 128 129 for (auto It = BlockBegin; It != BlockEnd; ++It) { 130 BinaryFunction &BF = It->second; 131 if (SkipPredicate && SkipPredicate(BF)) 132 continue; 133 134 WorkFunction(BF); 135 } 136 LLVM_DEBUG(T.stopTimer()); 137 }; 138 139 if (opts::NoThreads || ForceSequential) { 140 runBlock(BC.getBinaryFunctions().begin(), BC.getBinaryFunctions().end()); 141 return; 142 } 143 144 // Estimate the overall runtime cost using the scheduling policy 145 const unsigned TotalCost = estimateTotalCost(BC, SkipPredicate, SchedPolicy); 146 const unsigned BlocksCount = TasksPerThread * opts::ThreadCount; 147 const unsigned BlockCost = 148 TotalCost > BlocksCount ? TotalCost / BlocksCount : 1; 149 150 // Divide work into blocks of equal cost 151 ThreadPoolInterface &Pool = getThreadPool(); 152 auto BlockBegin = BC.getBinaryFunctions().begin(); 153 unsigned CurrentCost = 0; 154 155 for (auto It = BC.getBinaryFunctions().begin(); 156 It != BC.getBinaryFunctions().end(); ++It) { 157 BinaryFunction &BF = It->second; 158 CurrentCost += computeCostFor(BF, SkipPredicate, SchedPolicy); 159 160 if (CurrentCost >= BlockCost) { 161 Pool.async(runBlock, BlockBegin, std::next(It)); 162 BlockBegin = std::next(It); 163 CurrentCost = 0; 164 } 165 } 166 Pool.async(runBlock, BlockBegin, BC.getBinaryFunctions().end()); 167 Pool.wait(); 168 } 169 170 void runOnEachFunctionWithUniqueAllocId( 171 BinaryContext &BC, SchedulingPolicy SchedPolicy, 172 WorkFuncWithAllocTy WorkFunction, PredicateTy SkipPredicate, 173 std::string LogName, bool ForceSequential, unsigned TasksPerThread) { 174 if (BC.getBinaryFunctions().size() == 0) 175 return; 176 177 llvm::sys::RWMutex MainLock; 178 auto runBlock = [&](std::map<uint64_t, BinaryFunction>::iterator BlockBegin, 179 std::map<uint64_t, BinaryFunction>::iterator BlockEnd, 180 MCPlusBuilder::AllocatorIdTy AllocId) { 181 Timer T(LogName, LogName); 182 LLVM_DEBUG(T.startTimer()); 183 std::shared_lock<llvm::sys::RWMutex> Lock(MainLock); 184 for (auto It = BlockBegin; It != BlockEnd; ++It) { 185 BinaryFunction &BF = It->second; 186 if (SkipPredicate && SkipPredicate(BF)) 187 continue; 188 189 WorkFunction(BF, AllocId); 190 } 191 LLVM_DEBUG(T.stopTimer()); 192 }; 193 194 unsigned AllocId = 1; 195 auto EnsureAllocatorExists = [&BC](unsigned AllocId) { 196 if (!BC.MIB->checkAllocatorExists(AllocId)) { 197 MCPlusBuilder::AllocatorIdTy Id = 198 BC.MIB->initializeNewAnnotationAllocator(); 199 (void)Id; 200 assert(AllocId == Id && "unexpected allocator id created"); 201 } 202 }; 203 204 if (opts::NoThreads || ForceSequential) { 205 EnsureAllocatorExists(AllocId); 206 runBlock(BC.getBinaryFunctions().begin(), BC.getBinaryFunctions().end(), 207 AllocId); 208 return; 209 } 210 // This lock is used to postpone task execution 211 std::unique_lock<llvm::sys::RWMutex> Lock(MainLock); 212 213 // Estimate the overall runtime cost using the scheduling policy 214 const unsigned TotalCost = estimateTotalCost(BC, SkipPredicate, SchedPolicy); 215 const unsigned BlocksCount = TasksPerThread * opts::ThreadCount; 216 const unsigned BlockCost = 217 TotalCost > BlocksCount ? TotalCost / BlocksCount : 1; 218 219 // Divide work into blocks of equal cost 220 ThreadPoolInterface &Pool = getThreadPool(); 221 auto BlockBegin = BC.getBinaryFunctions().begin(); 222 unsigned CurrentCost = 0; 223 for (auto It = BC.getBinaryFunctions().begin(); 224 It != BC.getBinaryFunctions().end(); ++It) { 225 BinaryFunction &BF = It->second; 226 CurrentCost += computeCostFor(BF, SkipPredicate, SchedPolicy); 227 228 if (CurrentCost >= BlockCost) { 229 EnsureAllocatorExists(AllocId); 230 Pool.async(runBlock, BlockBegin, std::next(It), AllocId); 231 AllocId++; 232 BlockBegin = std::next(It); 233 CurrentCost = 0; 234 } 235 } 236 237 EnsureAllocatorExists(AllocId); 238 239 Pool.async(runBlock, BlockBegin, BC.getBinaryFunctions().end(), AllocId); 240 Lock.unlock(); 241 Pool.wait(); 242 } 243 244 } // namespace ParallelUtilities 245 } // namespace bolt 246 } // namespace llvm 247