xref: /llvm-project/bolt/lib/Core/ParallelUtilities.cpp (revision 6aad62cf5b7f91f4b02266cf72469e2c8e28dbef)
12f09f445SMaksim Panchenko //===- bolt/Core/ParallelUtilities.cpp - Parallel utilities ---------------===//
2a34c753fSRafael Auler //
3a34c753fSRafael Auler // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4a34c753fSRafael Auler // See https://llvm.org/LICENSE.txt for license information.
5a34c753fSRafael Auler // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6a34c753fSRafael Auler //
7a34c753fSRafael Auler //===----------------------------------------------------------------------===//
8a34c753fSRafael Auler //
92f09f445SMaksim Panchenko // Implementation of the class that manages parallel work on BinaryFunctions.
102f09f445SMaksim Panchenko //
11a34c753fSRafael Auler //===----------------------------------------------------------------------===//
12a34c753fSRafael Auler 
13a34c753fSRafael Auler #include "bolt/Core/ParallelUtilities.h"
14a34c753fSRafael Auler #include "bolt/Core/BinaryContext.h"
15a34c753fSRafael Auler #include "bolt/Core/BinaryFunction.h"
16e8ce5f1eSNico Weber #include "llvm/Support/RWMutex.h"
17a34c753fSRafael Auler #include "llvm/Support/ThreadPool.h"
18a34c753fSRafael Auler #include "llvm/Support/Timer.h"
19a34c753fSRafael Auler #include <mutex>
20a34c753fSRafael Auler 
21a34c753fSRafael Auler #define DEBUG_TYPE "par-utils"
22a34c753fSRafael Auler 
23a34c753fSRafael Auler namespace opts {
24a34c753fSRafael Auler extern cl::OptionCategory BoltCategory;
25a34c753fSRafael Auler 
26a34c753fSRafael Auler cl::opt<unsigned>
27a34c753fSRafael Auler ThreadCount("thread-count",
28a34c753fSRafael Auler   cl::desc("number of threads"),
29a34c753fSRafael Auler   cl::init(hardware_concurrency().compute_thread_count()),
30a34c753fSRafael Auler   cl::cat(BoltCategory));
31a34c753fSRafael Auler 
32a34c753fSRafael Auler cl::opt<bool>
33a34c753fSRafael Auler NoThreads("no-threads",
34a34c753fSRafael Auler   cl::desc("disable multithreading"),
35a34c753fSRafael Auler   cl::init(false),
36a34c753fSRafael Auler   cl::cat(BoltCategory));
37a34c753fSRafael Auler 
38a34c753fSRafael Auler cl::opt<unsigned>
39a34c753fSRafael Auler TaskCount("tasks-per-thread",
40a34c753fSRafael Auler   cl::desc("number of tasks to be created per thread"),
41a34c753fSRafael Auler   cl::init(20),
42a34c753fSRafael Auler   cl::cat(BoltCategory));
43a34c753fSRafael Auler 
44a34c753fSRafael Auler } // namespace opts
45a34c753fSRafael Auler 
46a34c753fSRafael Auler namespace llvm {
47a34c753fSRafael Auler namespace bolt {
48a34c753fSRafael Auler namespace ParallelUtilities {
49a34c753fSRafael Auler 
50a34c753fSRafael Auler namespace {
51a34c753fSRafael Auler /// A single thread pool that is used to run parallel tasks
52*6aad62cfSSayhaan Siddiqui std::unique_ptr<ThreadPoolInterface> ThreadPoolPtr;
53a34c753fSRafael Auler 
54a34c753fSRafael Auler unsigned computeCostFor(const BinaryFunction &BF,
55a34c753fSRafael Auler                         const PredicateTy &SkipPredicate,
56a34c753fSRafael Auler                         const SchedulingPolicy &SchedPolicy) {
57a34c753fSRafael Auler   if (SchedPolicy == SchedulingPolicy::SP_TRIVIAL)
58a34c753fSRafael Auler     return 1;
59a34c753fSRafael Auler 
60a34c753fSRafael Auler   if (SkipPredicate && SkipPredicate(BF))
61a34c753fSRafael Auler     return 0;
62a34c753fSRafael Auler 
63a34c753fSRafael Auler   switch (SchedPolicy) {
64a34c753fSRafael Auler   case SchedulingPolicy::SP_CONSTANT:
65a34c753fSRafael Auler     return 1;
66a34c753fSRafael Auler   case SchedulingPolicy::SP_INST_LINEAR:
67a34c753fSRafael Auler     return BF.getSize();
68a34c753fSRafael Auler   case SchedulingPolicy::SP_INST_QUADRATIC:
69a34c753fSRafael Auler     return BF.getSize() * BF.getSize();
70a34c753fSRafael Auler   case SchedulingPolicy::SP_BB_LINEAR:
71a34c753fSRafael Auler     return BF.size();
72a34c753fSRafael Auler   case SchedulingPolicy::SP_BB_QUADRATIC:
73a34c753fSRafael Auler     return BF.size() * BF.size();
74a34c753fSRafael Auler   default:
75a34c753fSRafael Auler     llvm_unreachable("unsupported scheduling policy");
76a34c753fSRafael Auler   }
77a34c753fSRafael Auler }
78a34c753fSRafael Auler 
79a34c753fSRafael Auler inline unsigned estimateTotalCost(const BinaryContext &BC,
80a34c753fSRafael Auler                                   const PredicateTy &SkipPredicate,
81a34c753fSRafael Auler                                   SchedulingPolicy &SchedPolicy) {
82a34c753fSRafael Auler   if (SchedPolicy == SchedulingPolicy::SP_TRIVIAL)
83a34c753fSRafael Auler     return BC.getBinaryFunctions().size();
84a34c753fSRafael Auler 
85a34c753fSRafael Auler   unsigned TotalCost = 0;
86a34c753fSRafael Auler   for (auto &BFI : BC.getBinaryFunctions()) {
87a34c753fSRafael Auler     const BinaryFunction &BF = BFI.second;
88a34c753fSRafael Auler     TotalCost += computeCostFor(BF, SkipPredicate, SchedPolicy);
89a34c753fSRafael Auler   }
90a34c753fSRafael Auler 
91a34c753fSRafael Auler   // Switch to trivial scheduling if total estimated work is zero
92a34c753fSRafael Auler   if (TotalCost == 0) {
9352cf0711SAmir Ayupov     BC.outs()
9452cf0711SAmir Ayupov         << "BOLT-WARNING: Running parallel work of 0 estimated cost, will "
95a34c753fSRafael Auler            "switch to  trivial scheduling.\n";
96a34c753fSRafael Auler 
97a34c753fSRafael Auler     SchedPolicy = SP_TRIVIAL;
98a34c753fSRafael Auler     TotalCost = BC.getBinaryFunctions().size();
99a34c753fSRafael Auler   }
100a34c753fSRafael Auler   return TotalCost;
101a34c753fSRafael Auler }
102a34c753fSRafael Auler 
103a34c753fSRafael Auler } // namespace
104a34c753fSRafael Auler 
105*6aad62cfSSayhaan Siddiqui ThreadPoolInterface &getThreadPool(const unsigned ThreadsCount) {
106a34c753fSRafael Auler   if (ThreadPoolPtr.get())
107a34c753fSRafael Auler     return *ThreadPoolPtr;
108a34c753fSRafael Auler 
109*6aad62cfSSayhaan Siddiqui   if (ThreadsCount > 1)
110716042a6SMehdi Amini     ThreadPoolPtr = std::make_unique<DefaultThreadPool>(
111*6aad62cfSSayhaan Siddiqui         llvm::hardware_concurrency(ThreadsCount));
112*6aad62cfSSayhaan Siddiqui   else
113*6aad62cfSSayhaan Siddiqui     ThreadPoolPtr = std::make_unique<SingleThreadExecutor>();
114a34c753fSRafael Auler   return *ThreadPoolPtr;
115a34c753fSRafael Auler }
116a34c753fSRafael Auler 
117a34c753fSRafael Auler void runOnEachFunction(BinaryContext &BC, SchedulingPolicy SchedPolicy,
118a34c753fSRafael Auler                        WorkFuncTy WorkFunction, PredicateTy SkipPredicate,
119a34c753fSRafael Auler                        std::string LogName, bool ForceSequential,
120a34c753fSRafael Auler                        unsigned TasksPerThread) {
121a34c753fSRafael Auler   if (BC.getBinaryFunctions().size() == 0)
122a34c753fSRafael Auler     return;
123a34c753fSRafael Auler 
124a34c753fSRafael Auler   auto runBlock = [&](std::map<uint64_t, BinaryFunction>::iterator BlockBegin,
125a34c753fSRafael Auler                       std::map<uint64_t, BinaryFunction>::iterator BlockEnd) {
126a34c753fSRafael Auler     Timer T(LogName, LogName);
127a34c753fSRafael Auler     LLVM_DEBUG(T.startTimer());
128a34c753fSRafael Auler 
129a34c753fSRafael Auler     for (auto It = BlockBegin; It != BlockEnd; ++It) {
130a34c753fSRafael Auler       BinaryFunction &BF = It->second;
131a34c753fSRafael Auler       if (SkipPredicate && SkipPredicate(BF))
132a34c753fSRafael Auler         continue;
133a34c753fSRafael Auler 
134a34c753fSRafael Auler       WorkFunction(BF);
135a34c753fSRafael Auler     }
136a34c753fSRafael Auler     LLVM_DEBUG(T.stopTimer());
137a34c753fSRafael Auler   };
138a34c753fSRafael Auler 
139a34c753fSRafael Auler   if (opts::NoThreads || ForceSequential) {
140a34c753fSRafael Auler     runBlock(BC.getBinaryFunctions().begin(), BC.getBinaryFunctions().end());
141a34c753fSRafael Auler     return;
142a34c753fSRafael Auler   }
143a34c753fSRafael Auler 
144a34c753fSRafael Auler   // Estimate the overall runtime cost using the scheduling policy
145a34c753fSRafael Auler   const unsigned TotalCost = estimateTotalCost(BC, SkipPredicate, SchedPolicy);
146a34c753fSRafael Auler   const unsigned BlocksCount = TasksPerThread * opts::ThreadCount;
147a34c753fSRafael Auler   const unsigned BlockCost =
148a34c753fSRafael Auler       TotalCost > BlocksCount ? TotalCost / BlocksCount : 1;
149a34c753fSRafael Auler 
150a34c753fSRafael Auler   // Divide work into blocks of equal cost
1514a4fb930SMehdi Amini   ThreadPoolInterface &Pool = getThreadPool();
152a34c753fSRafael Auler   auto BlockBegin = BC.getBinaryFunctions().begin();
153a34c753fSRafael Auler   unsigned CurrentCost = 0;
154a34c753fSRafael Auler 
155a34c753fSRafael Auler   for (auto It = BC.getBinaryFunctions().begin();
156a34c753fSRafael Auler        It != BC.getBinaryFunctions().end(); ++It) {
157a34c753fSRafael Auler     BinaryFunction &BF = It->second;
158a34c753fSRafael Auler     CurrentCost += computeCostFor(BF, SkipPredicate, SchedPolicy);
159a34c753fSRafael Auler 
160a34c753fSRafael Auler     if (CurrentCost >= BlockCost) {
161a34c753fSRafael Auler       Pool.async(runBlock, BlockBegin, std::next(It));
162a34c753fSRafael Auler       BlockBegin = std::next(It);
163a34c753fSRafael Auler       CurrentCost = 0;
164a34c753fSRafael Auler     }
165a34c753fSRafael Auler   }
166a34c753fSRafael Auler   Pool.async(runBlock, BlockBegin, BC.getBinaryFunctions().end());
167a34c753fSRafael Auler   Pool.wait();
168a34c753fSRafael Auler }
169a34c753fSRafael Auler 
170a34c753fSRafael Auler void runOnEachFunctionWithUniqueAllocId(
171a34c753fSRafael Auler     BinaryContext &BC, SchedulingPolicy SchedPolicy,
172a34c753fSRafael Auler     WorkFuncWithAllocTy WorkFunction, PredicateTy SkipPredicate,
173a34c753fSRafael Auler     std::string LogName, bool ForceSequential, unsigned TasksPerThread) {
174a34c753fSRafael Auler   if (BC.getBinaryFunctions().size() == 0)
175a34c753fSRafael Auler     return;
176a34c753fSRafael Auler 
177e8ce5f1eSNico Weber   llvm::sys::RWMutex MainLock;
178a34c753fSRafael Auler   auto runBlock = [&](std::map<uint64_t, BinaryFunction>::iterator BlockBegin,
179a34c753fSRafael Auler                       std::map<uint64_t, BinaryFunction>::iterator BlockEnd,
180a34c753fSRafael Auler                       MCPlusBuilder::AllocatorIdTy AllocId) {
181a34c753fSRafael Auler     Timer T(LogName, LogName);
182a34c753fSRafael Auler     LLVM_DEBUG(T.startTimer());
183e8ce5f1eSNico Weber     std::shared_lock<llvm::sys::RWMutex> Lock(MainLock);
184a34c753fSRafael Auler     for (auto It = BlockBegin; It != BlockEnd; ++It) {
185a34c753fSRafael Auler       BinaryFunction &BF = It->second;
186a34c753fSRafael Auler       if (SkipPredicate && SkipPredicate(BF))
187a34c753fSRafael Auler         continue;
188a34c753fSRafael Auler 
189a34c753fSRafael Auler       WorkFunction(BF, AllocId);
190a34c753fSRafael Auler     }
191a34c753fSRafael Auler     LLVM_DEBUG(T.stopTimer());
192a34c753fSRafael Auler   };
193a34c753fSRafael Auler 
194554459a0SKristof Beyls   unsigned AllocId = 1;
195554459a0SKristof Beyls   auto EnsureAllocatorExists = [&BC](unsigned AllocId) {
196554459a0SKristof Beyls     if (!BC.MIB->checkAllocatorExists(AllocId)) {
197554459a0SKristof Beyls       MCPlusBuilder::AllocatorIdTy Id =
198554459a0SKristof Beyls           BC.MIB->initializeNewAnnotationAllocator();
199554459a0SKristof Beyls       (void)Id;
200554459a0SKristof Beyls       assert(AllocId == Id && "unexpected allocator id created");
201554459a0SKristof Beyls     }
202554459a0SKristof Beyls   };
203554459a0SKristof Beyls 
204a34c753fSRafael Auler   if (opts::NoThreads || ForceSequential) {
205554459a0SKristof Beyls     EnsureAllocatorExists(AllocId);
206554459a0SKristof Beyls     runBlock(BC.getBinaryFunctions().begin(), BC.getBinaryFunctions().end(),
207554459a0SKristof Beyls              AllocId);
208a34c753fSRafael Auler     return;
209a34c753fSRafael Auler   }
210a34c753fSRafael Auler   // This lock is used to postpone task execution
211e8ce5f1eSNico Weber   std::unique_lock<llvm::sys::RWMutex> Lock(MainLock);
212a34c753fSRafael Auler 
213a34c753fSRafael Auler   // Estimate the overall runtime cost using the scheduling policy
214a34c753fSRafael Auler   const unsigned TotalCost = estimateTotalCost(BC, SkipPredicate, SchedPolicy);
215a34c753fSRafael Auler   const unsigned BlocksCount = TasksPerThread * opts::ThreadCount;
216a34c753fSRafael Auler   const unsigned BlockCost =
217a34c753fSRafael Auler       TotalCost > BlocksCount ? TotalCost / BlocksCount : 1;
218a34c753fSRafael Auler 
219a34c753fSRafael Auler   // Divide work into blocks of equal cost
2204a4fb930SMehdi Amini   ThreadPoolInterface &Pool = getThreadPool();
221a34c753fSRafael Auler   auto BlockBegin = BC.getBinaryFunctions().begin();
222a34c753fSRafael Auler   unsigned CurrentCost = 0;
223a34c753fSRafael Auler   for (auto It = BC.getBinaryFunctions().begin();
224a34c753fSRafael Auler        It != BC.getBinaryFunctions().end(); ++It) {
225a34c753fSRafael Auler     BinaryFunction &BF = It->second;
226a34c753fSRafael Auler     CurrentCost += computeCostFor(BF, SkipPredicate, SchedPolicy);
227a34c753fSRafael Auler 
228a34c753fSRafael Auler     if (CurrentCost >= BlockCost) {
229554459a0SKristof Beyls       EnsureAllocatorExists(AllocId);
230a34c753fSRafael Auler       Pool.async(runBlock, BlockBegin, std::next(It), AllocId);
231a34c753fSRafael Auler       AllocId++;
232a34c753fSRafael Auler       BlockBegin = std::next(It);
233a34c753fSRafael Auler       CurrentCost = 0;
234a34c753fSRafael Auler     }
235a34c753fSRafael Auler   }
236a34c753fSRafael Auler 
237e2326590SKristof Beyls   EnsureAllocatorExists(AllocId);
238a34c753fSRafael Auler 
239a34c753fSRafael Auler   Pool.async(runBlock, BlockBegin, BC.getBinaryFunctions().end(), AllocId);
240a34c753fSRafael Auler   Lock.unlock();
241a34c753fSRafael Auler   Pool.wait();
242a34c753fSRafael Auler }
243a34c753fSRafael Auler 
244a34c753fSRafael Auler } // namespace ParallelUtilities
245a34c753fSRafael Auler } // namespace bolt
246a34c753fSRafael Auler } // namespace llvm
247