1 //===--- ParallelUtilities.cpp --------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 //===----------------------------------------------------------------------===// 10 11 #include "bolt/Core/ParallelUtilities.h" 12 #include "bolt/Core/BinaryContext.h" 13 #include "bolt/Core/BinaryFunction.h" 14 #include "llvm/Support/ThreadPool.h" 15 #include "llvm/Support/Timer.h" 16 #include <mutex> 17 #include <shared_mutex> 18 19 #define DEBUG_TYPE "par-utils" 20 21 namespace opts { 22 extern cl::OptionCategory BoltCategory; 23 24 cl::opt<unsigned> 25 ThreadCount("thread-count", 26 cl::desc("number of threads"), 27 cl::init(hardware_concurrency().compute_thread_count()), 28 cl::cat(BoltCategory)); 29 30 cl::opt<bool> 31 NoThreads("no-threads", 32 cl::desc("disable multithreading"), 33 cl::init(false), 34 cl::cat(BoltCategory)); 35 36 cl::opt<unsigned> 37 TaskCount("tasks-per-thread", 38 cl::desc("number of tasks to be created per thread"), 39 cl::init(20), 40 cl::cat(BoltCategory)); 41 42 } // namespace opts 43 44 namespace llvm { 45 namespace bolt { 46 namespace ParallelUtilities { 47 48 namespace { 49 /// A single thread pool that is used to run parallel tasks 50 std::unique_ptr<ThreadPool> ThreadPoolPtr; 51 52 unsigned computeCostFor(const BinaryFunction &BF, 53 const PredicateTy &SkipPredicate, 54 const SchedulingPolicy &SchedPolicy) { 55 if (SchedPolicy == SchedulingPolicy::SP_TRIVIAL) 56 return 1; 57 58 if (SkipPredicate && SkipPredicate(BF)) 59 return 0; 60 61 switch (SchedPolicy) { 62 case SchedulingPolicy::SP_CONSTANT: 63 return 1; 64 case SchedulingPolicy::SP_INST_LINEAR: 65 return BF.getSize(); 66 case SchedulingPolicy::SP_INST_QUADRATIC: 67 return BF.getSize() * BF.getSize(); 68 case SchedulingPolicy::SP_BB_LINEAR: 69 return BF.size(); 70 case SchedulingPolicy::SP_BB_QUADRATIC: 71 return BF.size() * BF.size(); 72 default: 73 llvm_unreachable("unsupported scheduling policy"); 74 } 75 } 76 77 inline unsigned estimateTotalCost(const BinaryContext &BC, 78 const PredicateTy &SkipPredicate, 79 SchedulingPolicy &SchedPolicy) { 80 if (SchedPolicy == SchedulingPolicy::SP_TRIVIAL) 81 return BC.getBinaryFunctions().size(); 82 83 unsigned TotalCost = 0; 84 for (auto &BFI : BC.getBinaryFunctions()) { 85 const BinaryFunction &BF = BFI.second; 86 TotalCost += computeCostFor(BF, SkipPredicate, SchedPolicy); 87 } 88 89 // Switch to trivial scheduling if total estimated work is zero 90 if (TotalCost == 0) { 91 outs() << "BOLT-WARNING: Running parallel work of 0 estimated cost, will " 92 "switch to trivial scheduling.\n"; 93 94 SchedPolicy = SP_TRIVIAL; 95 TotalCost = BC.getBinaryFunctions().size(); 96 } 97 return TotalCost; 98 } 99 100 } // namespace 101 102 ThreadPool &getThreadPool() { 103 if (ThreadPoolPtr.get()) 104 return *ThreadPoolPtr; 105 106 ThreadPoolPtr = std::make_unique<ThreadPool>( 107 llvm::hardware_concurrency(opts::ThreadCount)); 108 return *ThreadPoolPtr; 109 } 110 111 void runOnEachFunction(BinaryContext &BC, SchedulingPolicy SchedPolicy, 112 WorkFuncTy WorkFunction, PredicateTy SkipPredicate, 113 std::string LogName, bool ForceSequential, 114 unsigned TasksPerThread) { 115 if (BC.getBinaryFunctions().size() == 0) 116 return; 117 118 auto runBlock = [&](std::map<uint64_t, BinaryFunction>::iterator BlockBegin, 119 std::map<uint64_t, BinaryFunction>::iterator BlockEnd) { 120 Timer T(LogName, LogName); 121 LLVM_DEBUG(T.startTimer()); 122 123 for (auto It = BlockBegin; It != BlockEnd; ++It) { 124 BinaryFunction &BF = It->second; 125 if (SkipPredicate && SkipPredicate(BF)) 126 continue; 127 128 WorkFunction(BF); 129 } 130 LLVM_DEBUG(T.stopTimer()); 131 }; 132 133 if (opts::NoThreads || ForceSequential) { 134 runBlock(BC.getBinaryFunctions().begin(), BC.getBinaryFunctions().end()); 135 return; 136 } 137 138 // Estimate the overall runtime cost using the scheduling policy 139 const unsigned TotalCost = estimateTotalCost(BC, SkipPredicate, SchedPolicy); 140 const unsigned BlocksCount = TasksPerThread * opts::ThreadCount; 141 const unsigned BlockCost = 142 TotalCost > BlocksCount ? TotalCost / BlocksCount : 1; 143 144 // Divide work into blocks of equal cost 145 ThreadPool &Pool = getThreadPool(); 146 auto BlockBegin = BC.getBinaryFunctions().begin(); 147 unsigned CurrentCost = 0; 148 149 for (auto It = BC.getBinaryFunctions().begin(); 150 It != BC.getBinaryFunctions().end(); ++It) { 151 BinaryFunction &BF = It->second; 152 CurrentCost += computeCostFor(BF, SkipPredicate, SchedPolicy); 153 154 if (CurrentCost >= BlockCost) { 155 Pool.async(runBlock, BlockBegin, std::next(It)); 156 BlockBegin = std::next(It); 157 CurrentCost = 0; 158 } 159 } 160 Pool.async(runBlock, BlockBegin, BC.getBinaryFunctions().end()); 161 Pool.wait(); 162 } 163 164 void runOnEachFunctionWithUniqueAllocId( 165 BinaryContext &BC, SchedulingPolicy SchedPolicy, 166 WorkFuncWithAllocTy WorkFunction, PredicateTy SkipPredicate, 167 std::string LogName, bool ForceSequential, unsigned TasksPerThread) { 168 if (BC.getBinaryFunctions().size() == 0) 169 return; 170 171 std::shared_timed_mutex MainLock; 172 auto runBlock = [&](std::map<uint64_t, BinaryFunction>::iterator BlockBegin, 173 std::map<uint64_t, BinaryFunction>::iterator BlockEnd, 174 MCPlusBuilder::AllocatorIdTy AllocId) { 175 Timer T(LogName, LogName); 176 LLVM_DEBUG(T.startTimer()); 177 std::shared_lock<std::shared_timed_mutex> Lock(MainLock); 178 for (auto It = BlockBegin; It != BlockEnd; ++It) { 179 BinaryFunction &BF = It->second; 180 if (SkipPredicate && SkipPredicate(BF)) 181 continue; 182 183 WorkFunction(BF, AllocId); 184 } 185 LLVM_DEBUG(T.stopTimer()); 186 }; 187 188 if (opts::NoThreads || ForceSequential) { 189 runBlock(BC.getBinaryFunctions().begin(), BC.getBinaryFunctions().end(), 0); 190 return; 191 } 192 // This lock is used to postpone task execution 193 std::unique_lock<std::shared_timed_mutex> Lock(MainLock); 194 195 // Estimate the overall runtime cost using the scheduling policy 196 const unsigned TotalCost = estimateTotalCost(BC, SkipPredicate, SchedPolicy); 197 const unsigned BlocksCount = TasksPerThread * opts::ThreadCount; 198 const unsigned BlockCost = 199 TotalCost > BlocksCount ? TotalCost / BlocksCount : 1; 200 201 // Divide work into blocks of equal cost 202 ThreadPool &Pool = getThreadPool(); 203 auto BlockBegin = BC.getBinaryFunctions().begin(); 204 unsigned CurrentCost = 0; 205 unsigned AllocId = 1; 206 for (auto It = BC.getBinaryFunctions().begin(); 207 It != BC.getBinaryFunctions().end(); ++It) { 208 BinaryFunction &BF = It->second; 209 CurrentCost += computeCostFor(BF, SkipPredicate, SchedPolicy); 210 211 if (CurrentCost >= BlockCost) { 212 if (!BC.MIB->checkAllocatorExists(AllocId)) { 213 MCPlusBuilder::AllocatorIdTy Id = 214 BC.MIB->initializeNewAnnotationAllocator(); 215 (void)Id; 216 assert(AllocId == Id && "unexpected allocator id created"); 217 } 218 Pool.async(runBlock, BlockBegin, std::next(It), AllocId); 219 AllocId++; 220 BlockBegin = std::next(It); 221 CurrentCost = 0; 222 } 223 } 224 225 if (!BC.MIB->checkAllocatorExists(AllocId)) { 226 MCPlusBuilder::AllocatorIdTy Id = 227 BC.MIB->initializeNewAnnotationAllocator(); 228 (void)Id; 229 assert(AllocId == Id && "unexpected allocator id created"); 230 } 231 232 Pool.async(runBlock, BlockBegin, BC.getBinaryFunctions().end(), AllocId); 233 Lock.unlock(); 234 Pool.wait(); 235 } 236 237 } // namespace ParallelUtilities 238 } // namespace bolt 239 } // namespace llvm 240