xref: /llvm-project/bolt/lib/Core/ParallelUtilities.cpp (revision a34c753fe709a624f5b087397fb05adeac2311e4)
1 //===--- ParallelUtilities.cpp --------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //===----------------------------------------------------------------------===//
10 
11 #include "bolt/Core/ParallelUtilities.h"
12 #include "bolt/Core/BinaryContext.h"
13 #include "bolt/Core/BinaryFunction.h"
14 #include "llvm/Support/ThreadPool.h"
15 #include "llvm/Support/Timer.h"
16 #include <mutex>
17 #include <shared_mutex>
18 
19 #define DEBUG_TYPE "par-utils"
20 
21 namespace opts {
22 extern cl::OptionCategory BoltCategory;
23 
24 cl::opt<unsigned>
25 ThreadCount("thread-count",
26   cl::desc("number of threads"),
27   cl::init(hardware_concurrency().compute_thread_count()),
28   cl::cat(BoltCategory));
29 
30 cl::opt<bool>
31 NoThreads("no-threads",
32   cl::desc("disable multithreading"),
33   cl::init(false),
34   cl::cat(BoltCategory));
35 
36 cl::opt<unsigned>
37 TaskCount("tasks-per-thread",
38   cl::desc("number of tasks to be created per thread"),
39   cl::init(20),
40   cl::cat(BoltCategory));
41 
42 } // namespace opts
43 
44 namespace llvm {
45 namespace bolt {
46 namespace ParallelUtilities {
47 
48 namespace {
49 /// A single thread pool that is used to run parallel tasks
50 std::unique_ptr<ThreadPool> ThreadPoolPtr;
51 
52 unsigned computeCostFor(const BinaryFunction &BF,
53                         const PredicateTy &SkipPredicate,
54                         const SchedulingPolicy &SchedPolicy) {
55   if (SchedPolicy == SchedulingPolicy::SP_TRIVIAL)
56     return 1;
57 
58   if (SkipPredicate && SkipPredicate(BF))
59     return 0;
60 
61   switch (SchedPolicy) {
62   case SchedulingPolicy::SP_CONSTANT:
63     return 1;
64   case SchedulingPolicy::SP_INST_LINEAR:
65     return BF.getSize();
66   case SchedulingPolicy::SP_INST_QUADRATIC:
67     return BF.getSize() * BF.getSize();
68   case SchedulingPolicy::SP_BB_LINEAR:
69     return BF.size();
70   case SchedulingPolicy::SP_BB_QUADRATIC:
71     return BF.size() * BF.size();
72   default:
73     llvm_unreachable("unsupported scheduling policy");
74   }
75 }
76 
77 inline unsigned estimateTotalCost(const BinaryContext &BC,
78                                   const PredicateTy &SkipPredicate,
79                                   SchedulingPolicy &SchedPolicy) {
80   if (SchedPolicy == SchedulingPolicy::SP_TRIVIAL)
81     return BC.getBinaryFunctions().size();
82 
83   unsigned TotalCost = 0;
84   for (auto &BFI : BC.getBinaryFunctions()) {
85     const BinaryFunction &BF = BFI.second;
86     TotalCost += computeCostFor(BF, SkipPredicate, SchedPolicy);
87   }
88 
89   // Switch to trivial scheduling if total estimated work is zero
90   if (TotalCost == 0) {
91     outs() << "BOLT-WARNING: Running parallel work of 0 estimated cost, will "
92               "switch to  trivial scheduling.\n";
93 
94     SchedPolicy = SP_TRIVIAL;
95     TotalCost = BC.getBinaryFunctions().size();
96   }
97   return TotalCost;
98 }
99 
100 } // namespace
101 
102 ThreadPool &getThreadPool() {
103   if (ThreadPoolPtr.get())
104     return *ThreadPoolPtr;
105 
106   ThreadPoolPtr = std::make_unique<ThreadPool>(
107       llvm::hardware_concurrency(opts::ThreadCount));
108   return *ThreadPoolPtr;
109 }
110 
111 void runOnEachFunction(BinaryContext &BC, SchedulingPolicy SchedPolicy,
112                        WorkFuncTy WorkFunction, PredicateTy SkipPredicate,
113                        std::string LogName, bool ForceSequential,
114                        unsigned TasksPerThread) {
115   if (BC.getBinaryFunctions().size() == 0)
116     return;
117 
118   auto runBlock = [&](std::map<uint64_t, BinaryFunction>::iterator BlockBegin,
119                       std::map<uint64_t, BinaryFunction>::iterator BlockEnd) {
120     Timer T(LogName, LogName);
121     LLVM_DEBUG(T.startTimer());
122 
123     for (auto It = BlockBegin; It != BlockEnd; ++It) {
124       BinaryFunction &BF = It->second;
125       if (SkipPredicate && SkipPredicate(BF))
126         continue;
127 
128       WorkFunction(BF);
129     }
130     LLVM_DEBUG(T.stopTimer());
131   };
132 
133   if (opts::NoThreads || ForceSequential) {
134     runBlock(BC.getBinaryFunctions().begin(), BC.getBinaryFunctions().end());
135     return;
136   }
137 
138   // Estimate the overall runtime cost using the scheduling policy
139   const unsigned TotalCost = estimateTotalCost(BC, SkipPredicate, SchedPolicy);
140   const unsigned BlocksCount = TasksPerThread * opts::ThreadCount;
141   const unsigned BlockCost =
142       TotalCost > BlocksCount ? TotalCost / BlocksCount : 1;
143 
144   // Divide work into blocks of equal cost
145   ThreadPool &Pool = getThreadPool();
146   auto BlockBegin = BC.getBinaryFunctions().begin();
147   unsigned CurrentCost = 0;
148 
149   for (auto It = BC.getBinaryFunctions().begin();
150        It != BC.getBinaryFunctions().end(); ++It) {
151     BinaryFunction &BF = It->second;
152     CurrentCost += computeCostFor(BF, SkipPredicate, SchedPolicy);
153 
154     if (CurrentCost >= BlockCost) {
155       Pool.async(runBlock, BlockBegin, std::next(It));
156       BlockBegin = std::next(It);
157       CurrentCost = 0;
158     }
159   }
160   Pool.async(runBlock, BlockBegin, BC.getBinaryFunctions().end());
161   Pool.wait();
162 }
163 
164 void runOnEachFunctionWithUniqueAllocId(
165     BinaryContext &BC, SchedulingPolicy SchedPolicy,
166     WorkFuncWithAllocTy WorkFunction, PredicateTy SkipPredicate,
167     std::string LogName, bool ForceSequential, unsigned TasksPerThread) {
168   if (BC.getBinaryFunctions().size() == 0)
169     return;
170 
171   std::shared_timed_mutex MainLock;
172   auto runBlock = [&](std::map<uint64_t, BinaryFunction>::iterator BlockBegin,
173                       std::map<uint64_t, BinaryFunction>::iterator BlockEnd,
174                       MCPlusBuilder::AllocatorIdTy AllocId) {
175     Timer T(LogName, LogName);
176     LLVM_DEBUG(T.startTimer());
177     std::shared_lock<std::shared_timed_mutex> Lock(MainLock);
178     for (auto It = BlockBegin; It != BlockEnd; ++It) {
179       BinaryFunction &BF = It->second;
180       if (SkipPredicate && SkipPredicate(BF))
181         continue;
182 
183       WorkFunction(BF, AllocId);
184     }
185     LLVM_DEBUG(T.stopTimer());
186   };
187 
188   if (opts::NoThreads || ForceSequential) {
189     runBlock(BC.getBinaryFunctions().begin(), BC.getBinaryFunctions().end(), 0);
190     return;
191   }
192   // This lock is used to postpone task execution
193   std::unique_lock<std::shared_timed_mutex> Lock(MainLock);
194 
195   // Estimate the overall runtime cost using the scheduling policy
196   const unsigned TotalCost = estimateTotalCost(BC, SkipPredicate, SchedPolicy);
197   const unsigned BlocksCount = TasksPerThread * opts::ThreadCount;
198   const unsigned BlockCost =
199       TotalCost > BlocksCount ? TotalCost / BlocksCount : 1;
200 
201   // Divide work into blocks of equal cost
202   ThreadPool &Pool = getThreadPool();
203   auto BlockBegin = BC.getBinaryFunctions().begin();
204   unsigned CurrentCost = 0;
205   unsigned AllocId = 1;
206   for (auto It = BC.getBinaryFunctions().begin();
207        It != BC.getBinaryFunctions().end(); ++It) {
208     BinaryFunction &BF = It->second;
209     CurrentCost += computeCostFor(BF, SkipPredicate, SchedPolicy);
210 
211     if (CurrentCost >= BlockCost) {
212       if (!BC.MIB->checkAllocatorExists(AllocId)) {
213         MCPlusBuilder::AllocatorIdTy Id =
214             BC.MIB->initializeNewAnnotationAllocator();
215         (void)Id;
216         assert(AllocId == Id && "unexpected allocator id created");
217       }
218       Pool.async(runBlock, BlockBegin, std::next(It), AllocId);
219       AllocId++;
220       BlockBegin = std::next(It);
221       CurrentCost = 0;
222     }
223   }
224 
225   if (!BC.MIB->checkAllocatorExists(AllocId)) {
226     MCPlusBuilder::AllocatorIdTy Id =
227         BC.MIB->initializeNewAnnotationAllocator();
228     (void)Id;
229     assert(AllocId == Id && "unexpected allocator id created");
230   }
231 
232   Pool.async(runBlock, BlockBegin, BC.getBinaryFunctions().end(), AllocId);
233   Lock.unlock();
234   Pool.wait();
235 }
236 
237 } // namespace ParallelUtilities
238 } // namespace bolt
239 } // namespace llvm
240