1 //===- bolt/Core/ParallelUtilities.cpp - Parallel utilities ---------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Implementation of the class that manages parallel work on BinaryFunctions. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "bolt/Core/ParallelUtilities.h" 14 #include "bolt/Core/BinaryContext.h" 15 #include "bolt/Core/BinaryFunction.h" 16 #include "llvm/Support/RWMutex.h" 17 #include "llvm/Support/ThreadPool.h" 18 #include "llvm/Support/Timer.h" 19 #include <mutex> 20 21 #define DEBUG_TYPE "par-utils" 22 23 namespace opts { 24 extern cl::OptionCategory BoltCategory; 25 26 cl::opt<unsigned> 27 ThreadCount("thread-count", 28 cl::desc("number of threads"), 29 cl::init(hardware_concurrency().compute_thread_count()), 30 cl::cat(BoltCategory)); 31 32 cl::opt<bool> 33 NoThreads("no-threads", 34 cl::desc("disable multithreading"), 35 cl::init(false), 36 cl::cat(BoltCategory)); 37 38 cl::opt<unsigned> 39 TaskCount("tasks-per-thread", 40 cl::desc("number of tasks to be created per thread"), 41 cl::init(20), 42 cl::cat(BoltCategory)); 43 44 } // namespace opts 45 46 namespace llvm { 47 namespace bolt { 48 namespace ParallelUtilities { 49 50 namespace { 51 /// A single thread pool that is used to run parallel tasks 52 std::unique_ptr<ThreadPool> ThreadPoolPtr; 53 54 unsigned computeCostFor(const BinaryFunction &BF, 55 const PredicateTy &SkipPredicate, 56 const SchedulingPolicy &SchedPolicy) { 57 if (SchedPolicy == SchedulingPolicy::SP_TRIVIAL) 58 return 1; 59 60 if (SkipPredicate && SkipPredicate(BF)) 61 return 0; 62 63 switch (SchedPolicy) { 64 case SchedulingPolicy::SP_CONSTANT: 65 return 1; 66 case SchedulingPolicy::SP_INST_LINEAR: 67 return BF.getSize(); 68 case SchedulingPolicy::SP_INST_QUADRATIC: 69 return BF.getSize() * BF.getSize(); 70 case SchedulingPolicy::SP_BB_LINEAR: 71 return BF.size(); 72 case SchedulingPolicy::SP_BB_QUADRATIC: 73 return BF.size() * BF.size(); 74 default: 75 llvm_unreachable("unsupported scheduling policy"); 76 } 77 } 78 79 inline unsigned estimateTotalCost(const BinaryContext &BC, 80 const PredicateTy &SkipPredicate, 81 SchedulingPolicy &SchedPolicy) { 82 if (SchedPolicy == SchedulingPolicy::SP_TRIVIAL) 83 return BC.getBinaryFunctions().size(); 84 85 unsigned TotalCost = 0; 86 for (auto &BFI : BC.getBinaryFunctions()) { 87 const BinaryFunction &BF = BFI.second; 88 TotalCost += computeCostFor(BF, SkipPredicate, SchedPolicy); 89 } 90 91 // Switch to trivial scheduling if total estimated work is zero 92 if (TotalCost == 0) { 93 BC.outs() 94 << "BOLT-WARNING: Running parallel work of 0 estimated cost, will " 95 "switch to trivial scheduling.\n"; 96 97 SchedPolicy = SP_TRIVIAL; 98 TotalCost = BC.getBinaryFunctions().size(); 99 } 100 return TotalCost; 101 } 102 103 } // namespace 104 105 ThreadPool &getThreadPool() { 106 if (ThreadPoolPtr.get()) 107 return *ThreadPoolPtr; 108 109 ThreadPoolPtr = std::make_unique<ThreadPool>( 110 llvm::hardware_concurrency(opts::ThreadCount)); 111 return *ThreadPoolPtr; 112 } 113 114 void runOnEachFunction(BinaryContext &BC, SchedulingPolicy SchedPolicy, 115 WorkFuncTy WorkFunction, PredicateTy SkipPredicate, 116 std::string LogName, bool ForceSequential, 117 unsigned TasksPerThread) { 118 if (BC.getBinaryFunctions().size() == 0) 119 return; 120 121 auto runBlock = [&](std::map<uint64_t, BinaryFunction>::iterator BlockBegin, 122 std::map<uint64_t, BinaryFunction>::iterator BlockEnd) { 123 Timer T(LogName, LogName); 124 LLVM_DEBUG(T.startTimer()); 125 126 for (auto It = BlockBegin; It != BlockEnd; ++It) { 127 BinaryFunction &BF = It->second; 128 if (SkipPredicate && SkipPredicate(BF)) 129 continue; 130 131 WorkFunction(BF); 132 } 133 LLVM_DEBUG(T.stopTimer()); 134 }; 135 136 if (opts::NoThreads || ForceSequential) { 137 runBlock(BC.getBinaryFunctions().begin(), BC.getBinaryFunctions().end()); 138 return; 139 } 140 141 // Estimate the overall runtime cost using the scheduling policy 142 const unsigned TotalCost = estimateTotalCost(BC, SkipPredicate, SchedPolicy); 143 const unsigned BlocksCount = TasksPerThread * opts::ThreadCount; 144 const unsigned BlockCost = 145 TotalCost > BlocksCount ? TotalCost / BlocksCount : 1; 146 147 // Divide work into blocks of equal cost 148 ThreadPool &Pool = getThreadPool(); 149 auto BlockBegin = BC.getBinaryFunctions().begin(); 150 unsigned CurrentCost = 0; 151 152 for (auto It = BC.getBinaryFunctions().begin(); 153 It != BC.getBinaryFunctions().end(); ++It) { 154 BinaryFunction &BF = It->second; 155 CurrentCost += computeCostFor(BF, SkipPredicate, SchedPolicy); 156 157 if (CurrentCost >= BlockCost) { 158 Pool.async(runBlock, BlockBegin, std::next(It)); 159 BlockBegin = std::next(It); 160 CurrentCost = 0; 161 } 162 } 163 Pool.async(runBlock, BlockBegin, BC.getBinaryFunctions().end()); 164 Pool.wait(); 165 } 166 167 void runOnEachFunctionWithUniqueAllocId( 168 BinaryContext &BC, SchedulingPolicy SchedPolicy, 169 WorkFuncWithAllocTy WorkFunction, PredicateTy SkipPredicate, 170 std::string LogName, bool ForceSequential, unsigned TasksPerThread) { 171 if (BC.getBinaryFunctions().size() == 0) 172 return; 173 174 llvm::sys::RWMutex MainLock; 175 auto runBlock = [&](std::map<uint64_t, BinaryFunction>::iterator BlockBegin, 176 std::map<uint64_t, BinaryFunction>::iterator BlockEnd, 177 MCPlusBuilder::AllocatorIdTy AllocId) { 178 Timer T(LogName, LogName); 179 LLVM_DEBUG(T.startTimer()); 180 std::shared_lock<llvm::sys::RWMutex> Lock(MainLock); 181 for (auto It = BlockBegin; It != BlockEnd; ++It) { 182 BinaryFunction &BF = It->second; 183 if (SkipPredicate && SkipPredicate(BF)) 184 continue; 185 186 WorkFunction(BF, AllocId); 187 } 188 LLVM_DEBUG(T.stopTimer()); 189 }; 190 191 if (opts::NoThreads || ForceSequential) { 192 runBlock(BC.getBinaryFunctions().begin(), BC.getBinaryFunctions().end(), 0); 193 return; 194 } 195 // This lock is used to postpone task execution 196 std::unique_lock<llvm::sys::RWMutex> Lock(MainLock); 197 198 // Estimate the overall runtime cost using the scheduling policy 199 const unsigned TotalCost = estimateTotalCost(BC, SkipPredicate, SchedPolicy); 200 const unsigned BlocksCount = TasksPerThread * opts::ThreadCount; 201 const unsigned BlockCost = 202 TotalCost > BlocksCount ? TotalCost / BlocksCount : 1; 203 204 // Divide work into blocks of equal cost 205 ThreadPool &Pool = getThreadPool(); 206 auto BlockBegin = BC.getBinaryFunctions().begin(); 207 unsigned CurrentCost = 0; 208 unsigned AllocId = 1; 209 for (auto It = BC.getBinaryFunctions().begin(); 210 It != BC.getBinaryFunctions().end(); ++It) { 211 BinaryFunction &BF = It->second; 212 CurrentCost += computeCostFor(BF, SkipPredicate, SchedPolicy); 213 214 if (CurrentCost >= BlockCost) { 215 if (!BC.MIB->checkAllocatorExists(AllocId)) { 216 MCPlusBuilder::AllocatorIdTy Id = 217 BC.MIB->initializeNewAnnotationAllocator(); 218 (void)Id; 219 assert(AllocId == Id && "unexpected allocator id created"); 220 } 221 Pool.async(runBlock, BlockBegin, std::next(It), AllocId); 222 AllocId++; 223 BlockBegin = std::next(It); 224 CurrentCost = 0; 225 } 226 } 227 228 if (!BC.MIB->checkAllocatorExists(AllocId)) { 229 MCPlusBuilder::AllocatorIdTy Id = 230 BC.MIB->initializeNewAnnotationAllocator(); 231 (void)Id; 232 assert(AllocId == Id && "unexpected allocator id created"); 233 } 234 235 Pool.async(runBlock, BlockBegin, BC.getBinaryFunctions().end(), AllocId); 236 Lock.unlock(); 237 Pool.wait(); 238 } 239 240 } // namespace ParallelUtilities 241 } // namespace bolt 242 } // namespace llvm 243