xref: /llvm-project/bolt/lib/Core/ParallelUtilities.cpp (revision e232659028365b51feb001565884b3b8e62cc2a9)
1 //===- bolt/Core/ParallelUtilities.cpp - Parallel utilities ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Implementation of the class that manages parallel work on BinaryFunctions.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "bolt/Core/ParallelUtilities.h"
14 #include "bolt/Core/BinaryContext.h"
15 #include "bolt/Core/BinaryFunction.h"
16 #include "llvm/Support/RWMutex.h"
17 #include "llvm/Support/ThreadPool.h"
18 #include "llvm/Support/Timer.h"
19 #include <mutex>
20 
21 #define DEBUG_TYPE "par-utils"
22 
23 namespace opts {
24 extern cl::OptionCategory BoltCategory;
25 
26 cl::opt<unsigned>
27 ThreadCount("thread-count",
28   cl::desc("number of threads"),
29   cl::init(hardware_concurrency().compute_thread_count()),
30   cl::cat(BoltCategory));
31 
32 cl::opt<bool>
33 NoThreads("no-threads",
34   cl::desc("disable multithreading"),
35   cl::init(false),
36   cl::cat(BoltCategory));
37 
38 cl::opt<unsigned>
39 TaskCount("tasks-per-thread",
40   cl::desc("number of tasks to be created per thread"),
41   cl::init(20),
42   cl::cat(BoltCategory));
43 
44 } // namespace opts
45 
46 namespace llvm {
47 namespace bolt {
48 namespace ParallelUtilities {
49 
50 namespace {
51 /// A single thread pool that is used to run parallel tasks
52 std::unique_ptr<DefaultThreadPool> ThreadPoolPtr;
53 
54 unsigned computeCostFor(const BinaryFunction &BF,
55                         const PredicateTy &SkipPredicate,
56                         const SchedulingPolicy &SchedPolicy) {
57   if (SchedPolicy == SchedulingPolicy::SP_TRIVIAL)
58     return 1;
59 
60   if (SkipPredicate && SkipPredicate(BF))
61     return 0;
62 
63   switch (SchedPolicy) {
64   case SchedulingPolicy::SP_CONSTANT:
65     return 1;
66   case SchedulingPolicy::SP_INST_LINEAR:
67     return BF.getSize();
68   case SchedulingPolicy::SP_INST_QUADRATIC:
69     return BF.getSize() * BF.getSize();
70   case SchedulingPolicy::SP_BB_LINEAR:
71     return BF.size();
72   case SchedulingPolicy::SP_BB_QUADRATIC:
73     return BF.size() * BF.size();
74   default:
75     llvm_unreachable("unsupported scheduling policy");
76   }
77 }
78 
79 inline unsigned estimateTotalCost(const BinaryContext &BC,
80                                   const PredicateTy &SkipPredicate,
81                                   SchedulingPolicy &SchedPolicy) {
82   if (SchedPolicy == SchedulingPolicy::SP_TRIVIAL)
83     return BC.getBinaryFunctions().size();
84 
85   unsigned TotalCost = 0;
86   for (auto &BFI : BC.getBinaryFunctions()) {
87     const BinaryFunction &BF = BFI.second;
88     TotalCost += computeCostFor(BF, SkipPredicate, SchedPolicy);
89   }
90 
91   // Switch to trivial scheduling if total estimated work is zero
92   if (TotalCost == 0) {
93     BC.outs()
94         << "BOLT-WARNING: Running parallel work of 0 estimated cost, will "
95            "switch to  trivial scheduling.\n";
96 
97     SchedPolicy = SP_TRIVIAL;
98     TotalCost = BC.getBinaryFunctions().size();
99   }
100   return TotalCost;
101 }
102 
103 } // namespace
104 
105 ThreadPoolInterface &getThreadPool() {
106   if (ThreadPoolPtr.get())
107     return *ThreadPoolPtr;
108 
109   ThreadPoolPtr = std::make_unique<DefaultThreadPool>(
110       llvm::hardware_concurrency(opts::ThreadCount));
111   return *ThreadPoolPtr;
112 }
113 
114 void runOnEachFunction(BinaryContext &BC, SchedulingPolicy SchedPolicy,
115                        WorkFuncTy WorkFunction, PredicateTy SkipPredicate,
116                        std::string LogName, bool ForceSequential,
117                        unsigned TasksPerThread) {
118   if (BC.getBinaryFunctions().size() == 0)
119     return;
120 
121   auto runBlock = [&](std::map<uint64_t, BinaryFunction>::iterator BlockBegin,
122                       std::map<uint64_t, BinaryFunction>::iterator BlockEnd) {
123     Timer T(LogName, LogName);
124     LLVM_DEBUG(T.startTimer());
125 
126     for (auto It = BlockBegin; It != BlockEnd; ++It) {
127       BinaryFunction &BF = It->second;
128       if (SkipPredicate && SkipPredicate(BF))
129         continue;
130 
131       WorkFunction(BF);
132     }
133     LLVM_DEBUG(T.stopTimer());
134   };
135 
136   if (opts::NoThreads || ForceSequential) {
137     runBlock(BC.getBinaryFunctions().begin(), BC.getBinaryFunctions().end());
138     return;
139   }
140 
141   // Estimate the overall runtime cost using the scheduling policy
142   const unsigned TotalCost = estimateTotalCost(BC, SkipPredicate, SchedPolicy);
143   const unsigned BlocksCount = TasksPerThread * opts::ThreadCount;
144   const unsigned BlockCost =
145       TotalCost > BlocksCount ? TotalCost / BlocksCount : 1;
146 
147   // Divide work into blocks of equal cost
148   ThreadPoolInterface &Pool = getThreadPool();
149   auto BlockBegin = BC.getBinaryFunctions().begin();
150   unsigned CurrentCost = 0;
151 
152   for (auto It = BC.getBinaryFunctions().begin();
153        It != BC.getBinaryFunctions().end(); ++It) {
154     BinaryFunction &BF = It->second;
155     CurrentCost += computeCostFor(BF, SkipPredicate, SchedPolicy);
156 
157     if (CurrentCost >= BlockCost) {
158       Pool.async(runBlock, BlockBegin, std::next(It));
159       BlockBegin = std::next(It);
160       CurrentCost = 0;
161     }
162   }
163   Pool.async(runBlock, BlockBegin, BC.getBinaryFunctions().end());
164   Pool.wait();
165 }
166 
167 void runOnEachFunctionWithUniqueAllocId(
168     BinaryContext &BC, SchedulingPolicy SchedPolicy,
169     WorkFuncWithAllocTy WorkFunction, PredicateTy SkipPredicate,
170     std::string LogName, bool ForceSequential, unsigned TasksPerThread) {
171   if (BC.getBinaryFunctions().size() == 0)
172     return;
173 
174   llvm::sys::RWMutex MainLock;
175   auto runBlock = [&](std::map<uint64_t, BinaryFunction>::iterator BlockBegin,
176                       std::map<uint64_t, BinaryFunction>::iterator BlockEnd,
177                       MCPlusBuilder::AllocatorIdTy AllocId) {
178     Timer T(LogName, LogName);
179     LLVM_DEBUG(T.startTimer());
180     std::shared_lock<llvm::sys::RWMutex> Lock(MainLock);
181     for (auto It = BlockBegin; It != BlockEnd; ++It) {
182       BinaryFunction &BF = It->second;
183       if (SkipPredicate && SkipPredicate(BF))
184         continue;
185 
186       WorkFunction(BF, AllocId);
187     }
188     LLVM_DEBUG(T.stopTimer());
189   };
190 
191   unsigned AllocId = 1;
192   auto EnsureAllocatorExists = [&BC](unsigned AllocId) {
193     if (!BC.MIB->checkAllocatorExists(AllocId)) {
194       MCPlusBuilder::AllocatorIdTy Id =
195           BC.MIB->initializeNewAnnotationAllocator();
196       (void)Id;
197       assert(AllocId == Id && "unexpected allocator id created");
198     }
199   };
200 
201   if (opts::NoThreads || ForceSequential) {
202     EnsureAllocatorExists(AllocId);
203     runBlock(BC.getBinaryFunctions().begin(), BC.getBinaryFunctions().end(),
204              AllocId);
205     return;
206   }
207   // This lock is used to postpone task execution
208   std::unique_lock<llvm::sys::RWMutex> Lock(MainLock);
209 
210   // Estimate the overall runtime cost using the scheduling policy
211   const unsigned TotalCost = estimateTotalCost(BC, SkipPredicate, SchedPolicy);
212   const unsigned BlocksCount = TasksPerThread * opts::ThreadCount;
213   const unsigned BlockCost =
214       TotalCost > BlocksCount ? TotalCost / BlocksCount : 1;
215 
216   // Divide work into blocks of equal cost
217   ThreadPoolInterface &Pool = getThreadPool();
218   auto BlockBegin = BC.getBinaryFunctions().begin();
219   unsigned CurrentCost = 0;
220   for (auto It = BC.getBinaryFunctions().begin();
221        It != BC.getBinaryFunctions().end(); ++It) {
222     BinaryFunction &BF = It->second;
223     CurrentCost += computeCostFor(BF, SkipPredicate, SchedPolicy);
224 
225     if (CurrentCost >= BlockCost) {
226       EnsureAllocatorExists(AllocId);
227       Pool.async(runBlock, BlockBegin, std::next(It), AllocId);
228       AllocId++;
229       BlockBegin = std::next(It);
230       CurrentCost = 0;
231     }
232   }
233 
234   EnsureAllocatorExists(AllocId);
235 
236   Pool.async(runBlock, BlockBegin, BC.getBinaryFunctions().end(), AllocId);
237   Lock.unlock();
238   Pool.wait();
239 }
240 
241 } // namespace ParallelUtilities
242 } // namespace bolt
243 } // namespace llvm
244