xref: /llvm-project/bolt/lib/Core/ParallelUtilities.cpp (revision 6aad62cf5b7f91f4b02266cf72469e2c8e28dbef)
1 //===- bolt/Core/ParallelUtilities.cpp - Parallel utilities ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Implementation of the class that manages parallel work on BinaryFunctions.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "bolt/Core/ParallelUtilities.h"
14 #include "bolt/Core/BinaryContext.h"
15 #include "bolt/Core/BinaryFunction.h"
16 #include "llvm/Support/RWMutex.h"
17 #include "llvm/Support/ThreadPool.h"
18 #include "llvm/Support/Timer.h"
19 #include <mutex>
20 
21 #define DEBUG_TYPE "par-utils"
22 
23 namespace opts {
24 extern cl::OptionCategory BoltCategory;
25 
26 cl::opt<unsigned>
27 ThreadCount("thread-count",
28   cl::desc("number of threads"),
29   cl::init(hardware_concurrency().compute_thread_count()),
30   cl::cat(BoltCategory));
31 
32 cl::opt<bool>
33 NoThreads("no-threads",
34   cl::desc("disable multithreading"),
35   cl::init(false),
36   cl::cat(BoltCategory));
37 
38 cl::opt<unsigned>
39 TaskCount("tasks-per-thread",
40   cl::desc("number of tasks to be created per thread"),
41   cl::init(20),
42   cl::cat(BoltCategory));
43 
44 } // namespace opts
45 
46 namespace llvm {
47 namespace bolt {
48 namespace ParallelUtilities {
49 
50 namespace {
51 /// A single thread pool that is used to run parallel tasks
52 std::unique_ptr<ThreadPoolInterface> ThreadPoolPtr;
53 
54 unsigned computeCostFor(const BinaryFunction &BF,
55                         const PredicateTy &SkipPredicate,
56                         const SchedulingPolicy &SchedPolicy) {
57   if (SchedPolicy == SchedulingPolicy::SP_TRIVIAL)
58     return 1;
59 
60   if (SkipPredicate && SkipPredicate(BF))
61     return 0;
62 
63   switch (SchedPolicy) {
64   case SchedulingPolicy::SP_CONSTANT:
65     return 1;
66   case SchedulingPolicy::SP_INST_LINEAR:
67     return BF.getSize();
68   case SchedulingPolicy::SP_INST_QUADRATIC:
69     return BF.getSize() * BF.getSize();
70   case SchedulingPolicy::SP_BB_LINEAR:
71     return BF.size();
72   case SchedulingPolicy::SP_BB_QUADRATIC:
73     return BF.size() * BF.size();
74   default:
75     llvm_unreachable("unsupported scheduling policy");
76   }
77 }
78 
79 inline unsigned estimateTotalCost(const BinaryContext &BC,
80                                   const PredicateTy &SkipPredicate,
81                                   SchedulingPolicy &SchedPolicy) {
82   if (SchedPolicy == SchedulingPolicy::SP_TRIVIAL)
83     return BC.getBinaryFunctions().size();
84 
85   unsigned TotalCost = 0;
86   for (auto &BFI : BC.getBinaryFunctions()) {
87     const BinaryFunction &BF = BFI.second;
88     TotalCost += computeCostFor(BF, SkipPredicate, SchedPolicy);
89   }
90 
91   // Switch to trivial scheduling if total estimated work is zero
92   if (TotalCost == 0) {
93     BC.outs()
94         << "BOLT-WARNING: Running parallel work of 0 estimated cost, will "
95            "switch to  trivial scheduling.\n";
96 
97     SchedPolicy = SP_TRIVIAL;
98     TotalCost = BC.getBinaryFunctions().size();
99   }
100   return TotalCost;
101 }
102 
103 } // namespace
104 
105 ThreadPoolInterface &getThreadPool(const unsigned ThreadsCount) {
106   if (ThreadPoolPtr.get())
107     return *ThreadPoolPtr;
108 
109   if (ThreadsCount > 1)
110     ThreadPoolPtr = std::make_unique<DefaultThreadPool>(
111         llvm::hardware_concurrency(ThreadsCount));
112   else
113     ThreadPoolPtr = std::make_unique<SingleThreadExecutor>();
114   return *ThreadPoolPtr;
115 }
116 
117 void runOnEachFunction(BinaryContext &BC, SchedulingPolicy SchedPolicy,
118                        WorkFuncTy WorkFunction, PredicateTy SkipPredicate,
119                        std::string LogName, bool ForceSequential,
120                        unsigned TasksPerThread) {
121   if (BC.getBinaryFunctions().size() == 0)
122     return;
123 
124   auto runBlock = [&](std::map<uint64_t, BinaryFunction>::iterator BlockBegin,
125                       std::map<uint64_t, BinaryFunction>::iterator BlockEnd) {
126     Timer T(LogName, LogName);
127     LLVM_DEBUG(T.startTimer());
128 
129     for (auto It = BlockBegin; It != BlockEnd; ++It) {
130       BinaryFunction &BF = It->second;
131       if (SkipPredicate && SkipPredicate(BF))
132         continue;
133 
134       WorkFunction(BF);
135     }
136     LLVM_DEBUG(T.stopTimer());
137   };
138 
139   if (opts::NoThreads || ForceSequential) {
140     runBlock(BC.getBinaryFunctions().begin(), BC.getBinaryFunctions().end());
141     return;
142   }
143 
144   // Estimate the overall runtime cost using the scheduling policy
145   const unsigned TotalCost = estimateTotalCost(BC, SkipPredicate, SchedPolicy);
146   const unsigned BlocksCount = TasksPerThread * opts::ThreadCount;
147   const unsigned BlockCost =
148       TotalCost > BlocksCount ? TotalCost / BlocksCount : 1;
149 
150   // Divide work into blocks of equal cost
151   ThreadPoolInterface &Pool = getThreadPool();
152   auto BlockBegin = BC.getBinaryFunctions().begin();
153   unsigned CurrentCost = 0;
154 
155   for (auto It = BC.getBinaryFunctions().begin();
156        It != BC.getBinaryFunctions().end(); ++It) {
157     BinaryFunction &BF = It->second;
158     CurrentCost += computeCostFor(BF, SkipPredicate, SchedPolicy);
159 
160     if (CurrentCost >= BlockCost) {
161       Pool.async(runBlock, BlockBegin, std::next(It));
162       BlockBegin = std::next(It);
163       CurrentCost = 0;
164     }
165   }
166   Pool.async(runBlock, BlockBegin, BC.getBinaryFunctions().end());
167   Pool.wait();
168 }
169 
170 void runOnEachFunctionWithUniqueAllocId(
171     BinaryContext &BC, SchedulingPolicy SchedPolicy,
172     WorkFuncWithAllocTy WorkFunction, PredicateTy SkipPredicate,
173     std::string LogName, bool ForceSequential, unsigned TasksPerThread) {
174   if (BC.getBinaryFunctions().size() == 0)
175     return;
176 
177   llvm::sys::RWMutex MainLock;
178   auto runBlock = [&](std::map<uint64_t, BinaryFunction>::iterator BlockBegin,
179                       std::map<uint64_t, BinaryFunction>::iterator BlockEnd,
180                       MCPlusBuilder::AllocatorIdTy AllocId) {
181     Timer T(LogName, LogName);
182     LLVM_DEBUG(T.startTimer());
183     std::shared_lock<llvm::sys::RWMutex> Lock(MainLock);
184     for (auto It = BlockBegin; It != BlockEnd; ++It) {
185       BinaryFunction &BF = It->second;
186       if (SkipPredicate && SkipPredicate(BF))
187         continue;
188 
189       WorkFunction(BF, AllocId);
190     }
191     LLVM_DEBUG(T.stopTimer());
192   };
193 
194   unsigned AllocId = 1;
195   auto EnsureAllocatorExists = [&BC](unsigned AllocId) {
196     if (!BC.MIB->checkAllocatorExists(AllocId)) {
197       MCPlusBuilder::AllocatorIdTy Id =
198           BC.MIB->initializeNewAnnotationAllocator();
199       (void)Id;
200       assert(AllocId == Id && "unexpected allocator id created");
201     }
202   };
203 
204   if (opts::NoThreads || ForceSequential) {
205     EnsureAllocatorExists(AllocId);
206     runBlock(BC.getBinaryFunctions().begin(), BC.getBinaryFunctions().end(),
207              AllocId);
208     return;
209   }
210   // This lock is used to postpone task execution
211   std::unique_lock<llvm::sys::RWMutex> Lock(MainLock);
212 
213   // Estimate the overall runtime cost using the scheduling policy
214   const unsigned TotalCost = estimateTotalCost(BC, SkipPredicate, SchedPolicy);
215   const unsigned BlocksCount = TasksPerThread * opts::ThreadCount;
216   const unsigned BlockCost =
217       TotalCost > BlocksCount ? TotalCost / BlocksCount : 1;
218 
219   // Divide work into blocks of equal cost
220   ThreadPoolInterface &Pool = getThreadPool();
221   auto BlockBegin = BC.getBinaryFunctions().begin();
222   unsigned CurrentCost = 0;
223   for (auto It = BC.getBinaryFunctions().begin();
224        It != BC.getBinaryFunctions().end(); ++It) {
225     BinaryFunction &BF = It->second;
226     CurrentCost += computeCostFor(BF, SkipPredicate, SchedPolicy);
227 
228     if (CurrentCost >= BlockCost) {
229       EnsureAllocatorExists(AllocId);
230       Pool.async(runBlock, BlockBegin, std::next(It), AllocId);
231       AllocId++;
232       BlockBegin = std::next(It);
233       CurrentCost = 0;
234     }
235   }
236 
237   EnsureAllocatorExists(AllocId);
238 
239   Pool.async(runBlock, BlockBegin, BC.getBinaryFunctions().end(), AllocId);
240   Lock.unlock();
241   Pool.wait();
242 }
243 
244 } // namespace ParallelUtilities
245 } // namespace bolt
246 } // namespace llvm
247