1 //===- bolt/Core/ParallelUtilities.cpp - Parallel utilities ---------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Implementation of the class that manages parallel work on BinaryFunctions. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "bolt/Core/ParallelUtilities.h" 14 #include "bolt/Core/BinaryContext.h" 15 #include "bolt/Core/BinaryFunction.h" 16 #include "llvm/Support/ThreadPool.h" 17 #include "llvm/Support/Timer.h" 18 #include <mutex> 19 #include <shared_mutex> 20 21 #define DEBUG_TYPE "par-utils" 22 23 namespace opts { 24 extern cl::OptionCategory BoltCategory; 25 26 cl::opt<unsigned> 27 ThreadCount("thread-count", 28 cl::desc("number of threads"), 29 cl::init(hardware_concurrency().compute_thread_count()), 30 cl::cat(BoltCategory)); 31 32 cl::opt<bool> 33 NoThreads("no-threads", 34 cl::desc("disable multithreading"), 35 cl::init(false), 36 cl::cat(BoltCategory)); 37 38 cl::opt<unsigned> 39 TaskCount("tasks-per-thread", 40 cl::desc("number of tasks to be created per thread"), 41 cl::init(20), 42 cl::cat(BoltCategory)); 43 44 } // namespace opts 45 46 namespace llvm { 47 namespace bolt { 48 namespace ParallelUtilities { 49 50 namespace { 51 /// A single thread pool that is used to run parallel tasks 52 std::unique_ptr<ThreadPool> ThreadPoolPtr; 53 54 unsigned computeCostFor(const BinaryFunction &BF, 55 const PredicateTy &SkipPredicate, 56 const SchedulingPolicy &SchedPolicy) { 57 if (SchedPolicy == SchedulingPolicy::SP_TRIVIAL) 58 return 1; 59 60 if (SkipPredicate && SkipPredicate(BF)) 61 return 0; 62 63 switch (SchedPolicy) { 64 case SchedulingPolicy::SP_CONSTANT: 65 return 1; 66 case SchedulingPolicy::SP_INST_LINEAR: 67 return BF.getSize(); 68 case SchedulingPolicy::SP_INST_QUADRATIC: 69 return BF.getSize() * BF.getSize(); 70 case SchedulingPolicy::SP_BB_LINEAR: 71 return BF.size(); 72 case SchedulingPolicy::SP_BB_QUADRATIC: 73 return BF.size() * BF.size(); 74 default: 75 llvm_unreachable("unsupported scheduling policy"); 76 } 77 } 78 79 inline unsigned estimateTotalCost(const BinaryContext &BC, 80 const PredicateTy &SkipPredicate, 81 SchedulingPolicy &SchedPolicy) { 82 if (SchedPolicy == SchedulingPolicy::SP_TRIVIAL) 83 return BC.getBinaryFunctions().size(); 84 85 unsigned TotalCost = 0; 86 for (auto &BFI : BC.getBinaryFunctions()) { 87 const BinaryFunction &BF = BFI.second; 88 TotalCost += computeCostFor(BF, SkipPredicate, SchedPolicy); 89 } 90 91 // Switch to trivial scheduling if total estimated work is zero 92 if (TotalCost == 0) { 93 outs() << "BOLT-WARNING: Running parallel work of 0 estimated cost, will " 94 "switch to trivial scheduling.\n"; 95 96 SchedPolicy = SP_TRIVIAL; 97 TotalCost = BC.getBinaryFunctions().size(); 98 } 99 return TotalCost; 100 } 101 102 } // namespace 103 104 ThreadPool &getThreadPool() { 105 if (ThreadPoolPtr.get()) 106 return *ThreadPoolPtr; 107 108 ThreadPoolPtr = std::make_unique<ThreadPool>( 109 llvm::hardware_concurrency(opts::ThreadCount)); 110 return *ThreadPoolPtr; 111 } 112 113 void runOnEachFunction(BinaryContext &BC, SchedulingPolicy SchedPolicy, 114 WorkFuncTy WorkFunction, PredicateTy SkipPredicate, 115 std::string LogName, bool ForceSequential, 116 unsigned TasksPerThread) { 117 if (BC.getBinaryFunctions().size() == 0) 118 return; 119 120 auto runBlock = [&](std::map<uint64_t, BinaryFunction>::iterator BlockBegin, 121 std::map<uint64_t, BinaryFunction>::iterator BlockEnd) { 122 Timer T(LogName, LogName); 123 LLVM_DEBUG(T.startTimer()); 124 125 for (auto It = BlockBegin; It != BlockEnd; ++It) { 126 BinaryFunction &BF = It->second; 127 if (SkipPredicate && SkipPredicate(BF)) 128 continue; 129 130 WorkFunction(BF); 131 } 132 LLVM_DEBUG(T.stopTimer()); 133 }; 134 135 if (opts::NoThreads || ForceSequential) { 136 runBlock(BC.getBinaryFunctions().begin(), BC.getBinaryFunctions().end()); 137 return; 138 } 139 140 // Estimate the overall runtime cost using the scheduling policy 141 const unsigned TotalCost = estimateTotalCost(BC, SkipPredicate, SchedPolicy); 142 const unsigned BlocksCount = TasksPerThread * opts::ThreadCount; 143 const unsigned BlockCost = 144 TotalCost > BlocksCount ? TotalCost / BlocksCount : 1; 145 146 // Divide work into blocks of equal cost 147 ThreadPool &Pool = getThreadPool(); 148 auto BlockBegin = BC.getBinaryFunctions().begin(); 149 unsigned CurrentCost = 0; 150 151 for (auto It = BC.getBinaryFunctions().begin(); 152 It != BC.getBinaryFunctions().end(); ++It) { 153 BinaryFunction &BF = It->second; 154 CurrentCost += computeCostFor(BF, SkipPredicate, SchedPolicy); 155 156 if (CurrentCost >= BlockCost) { 157 Pool.async(runBlock, BlockBegin, std::next(It)); 158 BlockBegin = std::next(It); 159 CurrentCost = 0; 160 } 161 } 162 Pool.async(runBlock, BlockBegin, BC.getBinaryFunctions().end()); 163 Pool.wait(); 164 } 165 166 void runOnEachFunctionWithUniqueAllocId( 167 BinaryContext &BC, SchedulingPolicy SchedPolicy, 168 WorkFuncWithAllocTy WorkFunction, PredicateTy SkipPredicate, 169 std::string LogName, bool ForceSequential, unsigned TasksPerThread) { 170 if (BC.getBinaryFunctions().size() == 0) 171 return; 172 173 std::shared_timed_mutex MainLock; 174 auto runBlock = [&](std::map<uint64_t, BinaryFunction>::iterator BlockBegin, 175 std::map<uint64_t, BinaryFunction>::iterator BlockEnd, 176 MCPlusBuilder::AllocatorIdTy AllocId) { 177 Timer T(LogName, LogName); 178 LLVM_DEBUG(T.startTimer()); 179 std::shared_lock<std::shared_timed_mutex> Lock(MainLock); 180 for (auto It = BlockBegin; It != BlockEnd; ++It) { 181 BinaryFunction &BF = It->second; 182 if (SkipPredicate && SkipPredicate(BF)) 183 continue; 184 185 WorkFunction(BF, AllocId); 186 } 187 LLVM_DEBUG(T.stopTimer()); 188 }; 189 190 if (opts::NoThreads || ForceSequential) { 191 runBlock(BC.getBinaryFunctions().begin(), BC.getBinaryFunctions().end(), 0); 192 return; 193 } 194 // This lock is used to postpone task execution 195 std::unique_lock<std::shared_timed_mutex> Lock(MainLock); 196 197 // Estimate the overall runtime cost using the scheduling policy 198 const unsigned TotalCost = estimateTotalCost(BC, SkipPredicate, SchedPolicy); 199 const unsigned BlocksCount = TasksPerThread * opts::ThreadCount; 200 const unsigned BlockCost = 201 TotalCost > BlocksCount ? TotalCost / BlocksCount : 1; 202 203 // Divide work into blocks of equal cost 204 ThreadPool &Pool = getThreadPool(); 205 auto BlockBegin = BC.getBinaryFunctions().begin(); 206 unsigned CurrentCost = 0; 207 unsigned AllocId = 1; 208 for (auto It = BC.getBinaryFunctions().begin(); 209 It != BC.getBinaryFunctions().end(); ++It) { 210 BinaryFunction &BF = It->second; 211 CurrentCost += computeCostFor(BF, SkipPredicate, SchedPolicy); 212 213 if (CurrentCost >= BlockCost) { 214 if (!BC.MIB->checkAllocatorExists(AllocId)) { 215 MCPlusBuilder::AllocatorIdTy Id = 216 BC.MIB->initializeNewAnnotationAllocator(); 217 (void)Id; 218 assert(AllocId == Id && "unexpected allocator id created"); 219 } 220 Pool.async(runBlock, BlockBegin, std::next(It), AllocId); 221 AllocId++; 222 BlockBegin = std::next(It); 223 CurrentCost = 0; 224 } 225 } 226 227 if (!BC.MIB->checkAllocatorExists(AllocId)) { 228 MCPlusBuilder::AllocatorIdTy Id = 229 BC.MIB->initializeNewAnnotationAllocator(); 230 (void)Id; 231 assert(AllocId == Id && "unexpected allocator id created"); 232 } 233 234 Pool.async(runBlock, BlockBegin, BC.getBinaryFunctions().end(), AllocId); 235 Lock.unlock(); 236 Pool.wait(); 237 } 238 239 } // namespace ParallelUtilities 240 } // namespace bolt 241 } // namespace llvm 242