xref: /llvm-project/bolt/lib/Core/ParallelUtilities.cpp (revision 4c106cfdf7cf7eec861ad3983a3dd9a9e8f3a8ae)
1 //===- bolt/Core/ParallelUtilities.cpp - Parallel utilities ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Implementation of the class that manages parallel work on BinaryFunctions.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "bolt/Core/ParallelUtilities.h"
14 #include "bolt/Core/BinaryContext.h"
15 #include "bolt/Core/BinaryFunction.h"
16 #include "llvm/Support/ThreadPool.h"
17 #include "llvm/Support/Timer.h"
18 #include <mutex>
19 #include <shared_mutex>
20 
21 #define DEBUG_TYPE "par-utils"
22 
23 namespace opts {
24 extern cl::OptionCategory BoltCategory;
25 
26 cl::opt<unsigned>
27 ThreadCount("thread-count",
28   cl::desc("number of threads"),
29   cl::init(hardware_concurrency().compute_thread_count()),
30   cl::cat(BoltCategory));
31 
32 cl::opt<bool>
33 NoThreads("no-threads",
34   cl::desc("disable multithreading"),
35   cl::init(false),
36   cl::cat(BoltCategory));
37 
38 cl::opt<unsigned>
39 TaskCount("tasks-per-thread",
40   cl::desc("number of tasks to be created per thread"),
41   cl::init(20),
42   cl::cat(BoltCategory));
43 
44 } // namespace opts
45 
46 namespace llvm {
47 namespace bolt {
48 namespace ParallelUtilities {
49 
50 namespace {
51 /// A single thread pool that is used to run parallel tasks
52 std::unique_ptr<ThreadPool> ThreadPoolPtr;
53 
54 unsigned computeCostFor(const BinaryFunction &BF,
55                         const PredicateTy &SkipPredicate,
56                         const SchedulingPolicy &SchedPolicy) {
57   if (SchedPolicy == SchedulingPolicy::SP_TRIVIAL)
58     return 1;
59 
60   if (SkipPredicate && SkipPredicate(BF))
61     return 0;
62 
63   switch (SchedPolicy) {
64   case SchedulingPolicy::SP_CONSTANT:
65     return 1;
66   case SchedulingPolicy::SP_INST_LINEAR:
67     return BF.getSize();
68   case SchedulingPolicy::SP_INST_QUADRATIC:
69     return BF.getSize() * BF.getSize();
70   case SchedulingPolicy::SP_BB_LINEAR:
71     return BF.size();
72   case SchedulingPolicy::SP_BB_QUADRATIC:
73     return BF.size() * BF.size();
74   default:
75     llvm_unreachable("unsupported scheduling policy");
76   }
77 }
78 
79 inline unsigned estimateTotalCost(const BinaryContext &BC,
80                                   const PredicateTy &SkipPredicate,
81                                   SchedulingPolicy &SchedPolicy) {
82   if (SchedPolicy == SchedulingPolicy::SP_TRIVIAL)
83     return BC.getBinaryFunctions().size();
84 
85   unsigned TotalCost = 0;
86   for (auto &BFI : BC.getBinaryFunctions()) {
87     const BinaryFunction &BF = BFI.second;
88     TotalCost += computeCostFor(BF, SkipPredicate, SchedPolicy);
89   }
90 
91   // Switch to trivial scheduling if total estimated work is zero
92   if (TotalCost == 0) {
93     outs() << "BOLT-WARNING: Running parallel work of 0 estimated cost, will "
94               "switch to  trivial scheduling.\n";
95 
96     SchedPolicy = SP_TRIVIAL;
97     TotalCost = BC.getBinaryFunctions().size();
98   }
99   return TotalCost;
100 }
101 
102 } // namespace
103 
104 ThreadPool &getThreadPool() {
105   if (ThreadPoolPtr.get())
106     return *ThreadPoolPtr;
107 
108   ThreadPoolPtr = std::make_unique<ThreadPool>(
109       llvm::hardware_concurrency(opts::ThreadCount));
110   return *ThreadPoolPtr;
111 }
112 
113 void runOnEachFunction(BinaryContext &BC, SchedulingPolicy SchedPolicy,
114                        WorkFuncTy WorkFunction, PredicateTy SkipPredicate,
115                        std::string LogName, bool ForceSequential,
116                        unsigned TasksPerThread) {
117   if (BC.getBinaryFunctions().size() == 0)
118     return;
119 
120   auto runBlock = [&](std::map<uint64_t, BinaryFunction>::iterator BlockBegin,
121                       std::map<uint64_t, BinaryFunction>::iterator BlockEnd) {
122     Timer T(LogName, LogName);
123     LLVM_DEBUG(T.startTimer());
124 
125     for (auto It = BlockBegin; It != BlockEnd; ++It) {
126       BinaryFunction &BF = It->second;
127       if (SkipPredicate && SkipPredicate(BF))
128         continue;
129 
130       WorkFunction(BF);
131     }
132     LLVM_DEBUG(T.stopTimer());
133   };
134 
135   if (opts::NoThreads || ForceSequential) {
136     runBlock(BC.getBinaryFunctions().begin(), BC.getBinaryFunctions().end());
137     return;
138   }
139 
140   // Estimate the overall runtime cost using the scheduling policy
141   const unsigned TotalCost = estimateTotalCost(BC, SkipPredicate, SchedPolicy);
142   const unsigned BlocksCount = TasksPerThread * opts::ThreadCount;
143   const unsigned BlockCost =
144       TotalCost > BlocksCount ? TotalCost / BlocksCount : 1;
145 
146   // Divide work into blocks of equal cost
147   ThreadPool &Pool = getThreadPool();
148   auto BlockBegin = BC.getBinaryFunctions().begin();
149   unsigned CurrentCost = 0;
150 
151   for (auto It = BC.getBinaryFunctions().begin();
152        It != BC.getBinaryFunctions().end(); ++It) {
153     BinaryFunction &BF = It->second;
154     CurrentCost += computeCostFor(BF, SkipPredicate, SchedPolicy);
155 
156     if (CurrentCost >= BlockCost) {
157       Pool.async(runBlock, BlockBegin, std::next(It));
158       BlockBegin = std::next(It);
159       CurrentCost = 0;
160     }
161   }
162   Pool.async(runBlock, BlockBegin, BC.getBinaryFunctions().end());
163   Pool.wait();
164 }
165 
166 void runOnEachFunctionWithUniqueAllocId(
167     BinaryContext &BC, SchedulingPolicy SchedPolicy,
168     WorkFuncWithAllocTy WorkFunction, PredicateTy SkipPredicate,
169     std::string LogName, bool ForceSequential, unsigned TasksPerThread) {
170   if (BC.getBinaryFunctions().size() == 0)
171     return;
172 
173   std::shared_timed_mutex MainLock;
174   auto runBlock = [&](std::map<uint64_t, BinaryFunction>::iterator BlockBegin,
175                       std::map<uint64_t, BinaryFunction>::iterator BlockEnd,
176                       MCPlusBuilder::AllocatorIdTy AllocId) {
177     Timer T(LogName, LogName);
178     LLVM_DEBUG(T.startTimer());
179     std::shared_lock<std::shared_timed_mutex> Lock(MainLock);
180     for (auto It = BlockBegin; It != BlockEnd; ++It) {
181       BinaryFunction &BF = It->second;
182       if (SkipPredicate && SkipPredicate(BF))
183         continue;
184 
185       WorkFunction(BF, AllocId);
186     }
187     LLVM_DEBUG(T.stopTimer());
188   };
189 
190   if (opts::NoThreads || ForceSequential) {
191     runBlock(BC.getBinaryFunctions().begin(), BC.getBinaryFunctions().end(), 0);
192     return;
193   }
194   // This lock is used to postpone task execution
195   std::unique_lock<std::shared_timed_mutex> Lock(MainLock);
196 
197   // Estimate the overall runtime cost using the scheduling policy
198   const unsigned TotalCost = estimateTotalCost(BC, SkipPredicate, SchedPolicy);
199   const unsigned BlocksCount = TasksPerThread * opts::ThreadCount;
200   const unsigned BlockCost =
201       TotalCost > BlocksCount ? TotalCost / BlocksCount : 1;
202 
203   // Divide work into blocks of equal cost
204   ThreadPool &Pool = getThreadPool();
205   auto BlockBegin = BC.getBinaryFunctions().begin();
206   unsigned CurrentCost = 0;
207   unsigned AllocId = 1;
208   for (auto It = BC.getBinaryFunctions().begin();
209        It != BC.getBinaryFunctions().end(); ++It) {
210     BinaryFunction &BF = It->second;
211     CurrentCost += computeCostFor(BF, SkipPredicate, SchedPolicy);
212 
213     if (CurrentCost >= BlockCost) {
214       if (!BC.MIB->checkAllocatorExists(AllocId)) {
215         MCPlusBuilder::AllocatorIdTy Id =
216             BC.MIB->initializeNewAnnotationAllocator();
217         (void)Id;
218         assert(AllocId == Id && "unexpected allocator id created");
219       }
220       Pool.async(runBlock, BlockBegin, std::next(It), AllocId);
221       AllocId++;
222       BlockBegin = std::next(It);
223       CurrentCost = 0;
224     }
225   }
226 
227   if (!BC.MIB->checkAllocatorExists(AllocId)) {
228     MCPlusBuilder::AllocatorIdTy Id =
229         BC.MIB->initializeNewAnnotationAllocator();
230     (void)Id;
231     assert(AllocId == Id && "unexpected allocator id created");
232   }
233 
234   Pool.async(runBlock, BlockBegin, BC.getBinaryFunctions().end(), AllocId);
235   Lock.unlock();
236   Pool.wait();
237 }
238 
239 } // namespace ParallelUtilities
240 } // namespace bolt
241 } // namespace llvm
242