1 //===- bolt/Core/ParallelUtilities.cpp - Parallel utilities ---------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Implementation of the class that manages parallel work on BinaryFunctions. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "bolt/Core/ParallelUtilities.h" 14 #include "bolt/Core/BinaryContext.h" 15 #include "bolt/Core/BinaryFunction.h" 16 #include "llvm/Support/RWMutex.h" 17 #include "llvm/Support/ThreadPool.h" 18 #include "llvm/Support/Timer.h" 19 #include <mutex> 20 21 #define DEBUG_TYPE "par-utils" 22 23 namespace opts { 24 extern cl::OptionCategory BoltCategory; 25 26 cl::opt<unsigned> 27 ThreadCount("thread-count", 28 cl::desc("number of threads"), 29 cl::init(hardware_concurrency().compute_thread_count()), 30 cl::cat(BoltCategory)); 31 32 cl::opt<bool> 33 NoThreads("no-threads", 34 cl::desc("disable multithreading"), 35 cl::init(false), 36 cl::cat(BoltCategory)); 37 38 cl::opt<unsigned> 39 TaskCount("tasks-per-thread", 40 cl::desc("number of tasks to be created per thread"), 41 cl::init(20), 42 cl::cat(BoltCategory)); 43 44 } // namespace opts 45 46 namespace llvm { 47 namespace bolt { 48 namespace ParallelUtilities { 49 50 namespace { 51 /// A single thread pool that is used to run parallel tasks 52 std::unique_ptr<DefaultThreadPool> ThreadPoolPtr; 53 54 unsigned computeCostFor(const BinaryFunction &BF, 55 const PredicateTy &SkipPredicate, 56 const SchedulingPolicy &SchedPolicy) { 57 if (SchedPolicy == SchedulingPolicy::SP_TRIVIAL) 58 return 1; 59 60 if (SkipPredicate && SkipPredicate(BF)) 61 return 0; 62 63 switch (SchedPolicy) { 64 case SchedulingPolicy::SP_CONSTANT: 65 return 1; 66 case SchedulingPolicy::SP_INST_LINEAR: 67 return BF.getSize(); 68 case SchedulingPolicy::SP_INST_QUADRATIC: 69 return BF.getSize() * BF.getSize(); 70 case SchedulingPolicy::SP_BB_LINEAR: 71 return BF.size(); 72 case SchedulingPolicy::SP_BB_QUADRATIC: 73 return BF.size() * BF.size(); 74 default: 75 llvm_unreachable("unsupported scheduling policy"); 76 } 77 } 78 79 inline unsigned estimateTotalCost(const BinaryContext &BC, 80 const PredicateTy &SkipPredicate, 81 SchedulingPolicy &SchedPolicy) { 82 if (SchedPolicy == SchedulingPolicy::SP_TRIVIAL) 83 return BC.getBinaryFunctions().size(); 84 85 unsigned TotalCost = 0; 86 for (auto &BFI : BC.getBinaryFunctions()) { 87 const BinaryFunction &BF = BFI.second; 88 TotalCost += computeCostFor(BF, SkipPredicate, SchedPolicy); 89 } 90 91 // Switch to trivial scheduling if total estimated work is zero 92 if (TotalCost == 0) { 93 BC.outs() 94 << "BOLT-WARNING: Running parallel work of 0 estimated cost, will " 95 "switch to trivial scheduling.\n"; 96 97 SchedPolicy = SP_TRIVIAL; 98 TotalCost = BC.getBinaryFunctions().size(); 99 } 100 return TotalCost; 101 } 102 103 } // namespace 104 105 ThreadPoolInterface &getThreadPool() { 106 if (ThreadPoolPtr.get()) 107 return *ThreadPoolPtr; 108 109 ThreadPoolPtr = std::make_unique<DefaultThreadPool>( 110 llvm::hardware_concurrency(opts::ThreadCount)); 111 return *ThreadPoolPtr; 112 } 113 114 void runOnEachFunction(BinaryContext &BC, SchedulingPolicy SchedPolicy, 115 WorkFuncTy WorkFunction, PredicateTy SkipPredicate, 116 std::string LogName, bool ForceSequential, 117 unsigned TasksPerThread) { 118 if (BC.getBinaryFunctions().size() == 0) 119 return; 120 121 auto runBlock = [&](std::map<uint64_t, BinaryFunction>::iterator BlockBegin, 122 std::map<uint64_t, BinaryFunction>::iterator BlockEnd) { 123 Timer T(LogName, LogName); 124 LLVM_DEBUG(T.startTimer()); 125 126 for (auto It = BlockBegin; It != BlockEnd; ++It) { 127 BinaryFunction &BF = It->second; 128 if (SkipPredicate && SkipPredicate(BF)) 129 continue; 130 131 WorkFunction(BF); 132 } 133 LLVM_DEBUG(T.stopTimer()); 134 }; 135 136 if (opts::NoThreads || ForceSequential) { 137 runBlock(BC.getBinaryFunctions().begin(), BC.getBinaryFunctions().end()); 138 return; 139 } 140 141 // Estimate the overall runtime cost using the scheduling policy 142 const unsigned TotalCost = estimateTotalCost(BC, SkipPredicate, SchedPolicy); 143 const unsigned BlocksCount = TasksPerThread * opts::ThreadCount; 144 const unsigned BlockCost = 145 TotalCost > BlocksCount ? TotalCost / BlocksCount : 1; 146 147 // Divide work into blocks of equal cost 148 ThreadPoolInterface &Pool = getThreadPool(); 149 auto BlockBegin = BC.getBinaryFunctions().begin(); 150 unsigned CurrentCost = 0; 151 152 for (auto It = BC.getBinaryFunctions().begin(); 153 It != BC.getBinaryFunctions().end(); ++It) { 154 BinaryFunction &BF = It->second; 155 CurrentCost += computeCostFor(BF, SkipPredicate, SchedPolicy); 156 157 if (CurrentCost >= BlockCost) { 158 Pool.async(runBlock, BlockBegin, std::next(It)); 159 BlockBegin = std::next(It); 160 CurrentCost = 0; 161 } 162 } 163 Pool.async(runBlock, BlockBegin, BC.getBinaryFunctions().end()); 164 Pool.wait(); 165 } 166 167 void runOnEachFunctionWithUniqueAllocId( 168 BinaryContext &BC, SchedulingPolicy SchedPolicy, 169 WorkFuncWithAllocTy WorkFunction, PredicateTy SkipPredicate, 170 std::string LogName, bool ForceSequential, unsigned TasksPerThread) { 171 if (BC.getBinaryFunctions().size() == 0) 172 return; 173 174 llvm::sys::RWMutex MainLock; 175 auto runBlock = [&](std::map<uint64_t, BinaryFunction>::iterator BlockBegin, 176 std::map<uint64_t, BinaryFunction>::iterator BlockEnd, 177 MCPlusBuilder::AllocatorIdTy AllocId) { 178 Timer T(LogName, LogName); 179 LLVM_DEBUG(T.startTimer()); 180 std::shared_lock<llvm::sys::RWMutex> Lock(MainLock); 181 for (auto It = BlockBegin; It != BlockEnd; ++It) { 182 BinaryFunction &BF = It->second; 183 if (SkipPredicate && SkipPredicate(BF)) 184 continue; 185 186 WorkFunction(BF, AllocId); 187 } 188 LLVM_DEBUG(T.stopTimer()); 189 }; 190 191 unsigned AllocId = 1; 192 auto EnsureAllocatorExists = [&BC](unsigned AllocId) { 193 if (!BC.MIB->checkAllocatorExists(AllocId)) { 194 MCPlusBuilder::AllocatorIdTy Id = 195 BC.MIB->initializeNewAnnotationAllocator(); 196 (void)Id; 197 assert(AllocId == Id && "unexpected allocator id created"); 198 } 199 }; 200 201 if (opts::NoThreads || ForceSequential) { 202 EnsureAllocatorExists(AllocId); 203 runBlock(BC.getBinaryFunctions().begin(), BC.getBinaryFunctions().end(), 204 AllocId); 205 return; 206 } 207 // This lock is used to postpone task execution 208 std::unique_lock<llvm::sys::RWMutex> Lock(MainLock); 209 210 // Estimate the overall runtime cost using the scheduling policy 211 const unsigned TotalCost = estimateTotalCost(BC, SkipPredicate, SchedPolicy); 212 const unsigned BlocksCount = TasksPerThread * opts::ThreadCount; 213 const unsigned BlockCost = 214 TotalCost > BlocksCount ? TotalCost / BlocksCount : 1; 215 216 // Divide work into blocks of equal cost 217 ThreadPoolInterface &Pool = getThreadPool(); 218 auto BlockBegin = BC.getBinaryFunctions().begin(); 219 unsigned CurrentCost = 0; 220 for (auto It = BC.getBinaryFunctions().begin(); 221 It != BC.getBinaryFunctions().end(); ++It) { 222 BinaryFunction &BF = It->second; 223 CurrentCost += computeCostFor(BF, SkipPredicate, SchedPolicy); 224 225 if (CurrentCost >= BlockCost) { 226 EnsureAllocatorExists(AllocId); 227 Pool.async(runBlock, BlockBegin, std::next(It), AllocId); 228 AllocId++; 229 BlockBegin = std::next(It); 230 CurrentCost = 0; 231 } 232 } 233 234 EnsureAllocatorExists(AllocId); 235 236 Pool.async(runBlock, BlockBegin, BC.getBinaryFunctions().end(), AllocId); 237 Lock.unlock(); 238 Pool.wait(); 239 } 240 241 } // namespace ParallelUtilities 242 } // namespace bolt 243 } // namespace llvm 244