xref: /llvm-project/bolt/lib/Passes/Instrumentation.cpp (revision 76fdc2e52719c7164e3f2cb05158a74df2b3d22e)
1 //===- bolt/Passes/Instrumentation.cpp ------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the Instrumentation class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "bolt/Passes/Instrumentation.h"
14 #include "bolt/Core/ParallelUtilities.h"
15 #include "bolt/RuntimeLibs/InstrumentationRuntimeLibrary.h"
16 #include "bolt/Utils/CommandLineOpts.h"
17 #include "bolt/Utils/Utils.h"
18 #include "llvm/Support/CommandLine.h"
19 #include "llvm/Support/RWMutex.h"
20 #include <queue>
21 #include <stack>
22 #include <unordered_set>
23 
24 #define DEBUG_TYPE "bolt-instrumentation"
25 
26 using namespace llvm;
27 
28 namespace opts {
29 extern cl::OptionCategory BoltInstrCategory;
30 
31 cl::opt<std::string> InstrumentationFilename(
32     "instrumentation-file",
33     cl::desc("file name where instrumented profile will be saved (default: "
34              "/tmp/prof.fdata)"),
35     cl::init("/tmp/prof.fdata"), cl::Optional, cl::cat(BoltInstrCategory));
36 
37 cl::opt<std::string> InstrumentationBinpath(
38     "instrumentation-binpath",
39     cl::desc("path to instrumented binary in case if /proc/self/map_files "
40              "is not accessible due to access restriction issues"),
41     cl::Optional, cl::cat(BoltInstrCategory));
42 
43 cl::opt<bool> InstrumentationFileAppendPID(
44     "instrumentation-file-append-pid",
45     cl::desc("append PID to saved profile file name (default: false)"),
46     cl::init(false), cl::Optional, cl::cat(BoltInstrCategory));
47 
48 cl::opt<bool> ConservativeInstrumentation(
49     "conservative-instrumentation",
50     cl::desc("disable instrumentation optimizations that sacrifice profile "
51              "accuracy (for debugging, default: false)"),
52     cl::init(false), cl::Optional, cl::cat(BoltInstrCategory));
53 
54 cl::opt<uint32_t> InstrumentationSleepTime(
55     "instrumentation-sleep-time",
56     cl::desc("interval between profile writes (default: 0 = write only at "
57              "program end).  This is useful for service workloads when you "
58              "want to dump profile every X minutes or if you are killing the "
59              "program and the profile is not being dumped at the end."),
60     cl::init(0), cl::Optional, cl::cat(BoltInstrCategory));
61 
62 cl::opt<bool> InstrumentationNoCountersClear(
63     "instrumentation-no-counters-clear",
64     cl::desc("Don't clear counters across dumps "
65              "(use with instrumentation-sleep-time option)"),
66     cl::init(false), cl::Optional, cl::cat(BoltInstrCategory));
67 
68 cl::opt<bool> InstrumentationWaitForks(
69     "instrumentation-wait-forks",
70     cl::desc("Wait until all forks of instrumented process will finish "
71              "(use with instrumentation-sleep-time option)"),
72     cl::init(false), cl::Optional, cl::cat(BoltInstrCategory));
73 
74 cl::opt<bool>
75     InstrumentHotOnly("instrument-hot-only",
76                       cl::desc("only insert instrumentation on hot functions "
77                                "(needs profile, default: false)"),
78                       cl::init(false), cl::Optional,
79                       cl::cat(BoltInstrCategory));
80 
81 cl::opt<bool> InstrumentCalls("instrument-calls",
82                               cl::desc("record profile for inter-function "
83                                        "control flow activity (default: true)"),
84                               cl::init(true), cl::Optional,
85                               cl::cat(BoltInstrCategory));
86 } // namespace opts
87 
88 namespace llvm {
89 namespace bolt {
90 
91 static bool hasAArch64ExclusiveMemop(
92     BinaryFunction &Function,
93     std::unordered_set<const BinaryBasicBlock *> &BBToSkip) {
94   // FIXME ARMv8-a architecture reference manual says that software must avoid
95   // having any explicit memory accesses between exclusive load and associated
96   // store instruction. So for now skip instrumentation for basic blocks that
97   // have these instructions, since it might lead to runtime deadlock.
98   BinaryContext &BC = Function.getBinaryContext();
99   std::queue<std::pair<BinaryBasicBlock *, bool>> BBQueue; // {BB, isLoad}
100   std::unordered_set<BinaryBasicBlock *> Visited;
101 
102   if (Function.getLayout().block_begin() == Function.getLayout().block_end())
103     return 0;
104 
105   BinaryBasicBlock *BBfirst = *Function.getLayout().block_begin();
106   BBQueue.push({BBfirst, false});
107 
108   while (!BBQueue.empty()) {
109     BinaryBasicBlock *BB = BBQueue.front().first;
110     bool IsLoad = BBQueue.front().second;
111     BBQueue.pop();
112     if (Visited.find(BB) != Visited.end())
113       continue;
114     Visited.insert(BB);
115 
116     for (const MCInst &Inst : *BB) {
117       // Two loads one after another - skip whole function
118       if (BC.MIB->isAArch64ExclusiveLoad(Inst) && IsLoad) {
119         if (opts::Verbosity >= 2) {
120           outs() << "BOLT-INSTRUMENTER: function " << Function.getPrintName()
121                  << " has two exclusive loads. Ignoring the function.\n";
122         }
123         return true;
124       }
125 
126       if (BC.MIB->isAArch64ExclusiveLoad(Inst))
127         IsLoad = true;
128 
129       if (IsLoad && BBToSkip.find(BB) == BBToSkip.end()) {
130         BBToSkip.insert(BB);
131         if (opts::Verbosity >= 2) {
132           outs() << "BOLT-INSTRUMENTER: skip BB " << BB->getName()
133                  << " due to exclusive instruction in function "
134                  << Function.getPrintName() << "\n";
135         }
136       }
137 
138       if (!IsLoad && BC.MIB->isAArch64ExclusiveStore(Inst)) {
139         if (opts::Verbosity >= 2) {
140           outs() << "BOLT-INSTRUMENTER: function " << Function.getPrintName()
141                  << " has exclusive store without corresponding load. Ignoring "
142                     "the function.\n";
143         }
144         return true;
145       }
146 
147       if (IsLoad && (BC.MIB->isAArch64ExclusiveStore(Inst) ||
148                      BC.MIB->isAArch64ExclusiveClear(Inst)))
149         IsLoad = false;
150     }
151 
152     if (IsLoad && BB->succ_size() == 0) {
153       if (opts::Verbosity >= 2) {
154         outs()
155             << "BOLT-INSTRUMENTER: function " << Function.getPrintName()
156             << " has exclusive load in trailing BB. Ignoring the function.\n";
157       }
158       return true;
159     }
160 
161     for (BinaryBasicBlock *BBS : BB->successors())
162       BBQueue.push({BBS, IsLoad});
163   }
164 
165   if (BBToSkip.size() == Visited.size()) {
166     if (opts::Verbosity >= 2) {
167       outs() << "BOLT-INSTRUMENTER: all BBs are marked with true. Ignoring the "
168                 "function "
169              << Function.getPrintName() << "\n";
170     }
171     return true;
172   }
173 
174   return false;
175 }
176 
177 uint32_t Instrumentation::getFunctionNameIndex(const BinaryFunction &Function) {
178   auto Iter = FuncToStringIdx.find(&Function);
179   if (Iter != FuncToStringIdx.end())
180     return Iter->second;
181   size_t Idx = Summary->StringTable.size();
182   FuncToStringIdx.emplace(std::make_pair(&Function, Idx));
183   Summary->StringTable.append(getEscapedName(Function.getOneName()));
184   Summary->StringTable.append(1, '\0');
185   return Idx;
186 }
187 
188 bool Instrumentation::createCallDescription(FunctionDescription &FuncDesc,
189                                             const BinaryFunction &FromFunction,
190                                             uint32_t From, uint32_t FromNodeID,
191                                             const BinaryFunction &ToFunction,
192                                             uint32_t To, bool IsInvoke) {
193   CallDescription CD;
194   // Ordinarily, we don't augment direct calls with an explicit counter, except
195   // when forced to do so or when we know this callee could be throwing
196   // exceptions, in which case there is no other way to accurately record its
197   // frequency.
198   bool ForceInstrumentation = opts::ConservativeInstrumentation || IsInvoke;
199   CD.FromLoc.FuncString = getFunctionNameIndex(FromFunction);
200   CD.FromLoc.Offset = From;
201   CD.FromNode = FromNodeID;
202   CD.Target = &ToFunction;
203   CD.ToLoc.FuncString = getFunctionNameIndex(ToFunction);
204   CD.ToLoc.Offset = To;
205   CD.Counter = ForceInstrumentation ? Summary->Counters.size() : 0xffffffff;
206   if (ForceInstrumentation)
207     ++DirectCallCounters;
208   FuncDesc.Calls.emplace_back(CD);
209   return ForceInstrumentation;
210 }
211 
212 void Instrumentation::createIndCallDescription(
213     const BinaryFunction &FromFunction, uint32_t From) {
214   IndCallDescription ICD;
215   ICD.FromLoc.FuncString = getFunctionNameIndex(FromFunction);
216   ICD.FromLoc.Offset = From;
217   Summary->IndCallDescriptions.emplace_back(ICD);
218 }
219 
220 void Instrumentation::createIndCallTargetDescription(
221     const BinaryFunction &ToFunction, uint32_t To) {
222   IndCallTargetDescription ICD;
223   ICD.ToLoc.FuncString = getFunctionNameIndex(ToFunction);
224   ICD.ToLoc.Offset = To;
225   ICD.Target = &ToFunction;
226   Summary->IndCallTargetDescriptions.emplace_back(ICD);
227 }
228 
229 bool Instrumentation::createEdgeDescription(FunctionDescription &FuncDesc,
230                                             const BinaryFunction &FromFunction,
231                                             uint32_t From, uint32_t FromNodeID,
232                                             const BinaryFunction &ToFunction,
233                                             uint32_t To, uint32_t ToNodeID,
234                                             bool Instrumented) {
235   EdgeDescription ED;
236   auto Result = FuncDesc.EdgesSet.insert(std::make_pair(FromNodeID, ToNodeID));
237   // Avoid creating duplicated edge descriptions. This happens in CFGs where a
238   // block jumps to its fall-through.
239   if (Result.second == false)
240     return false;
241   ED.FromLoc.FuncString = getFunctionNameIndex(FromFunction);
242   ED.FromLoc.Offset = From;
243   ED.FromNode = FromNodeID;
244   ED.ToLoc.FuncString = getFunctionNameIndex(ToFunction);
245   ED.ToLoc.Offset = To;
246   ED.ToNode = ToNodeID;
247   ED.Counter = Instrumented ? Summary->Counters.size() : 0xffffffff;
248   if (Instrumented)
249     ++BranchCounters;
250   FuncDesc.Edges.emplace_back(ED);
251   return Instrumented;
252 }
253 
254 void Instrumentation::createLeafNodeDescription(FunctionDescription &FuncDesc,
255                                                 uint32_t Node) {
256   InstrumentedNode IN;
257   IN.Node = Node;
258   IN.Counter = Summary->Counters.size();
259   ++LeafNodeCounters;
260   FuncDesc.LeafNodes.emplace_back(IN);
261 }
262 
263 InstructionListType
264 Instrumentation::createInstrumentationSnippet(BinaryContext &BC, bool IsLeaf) {
265   auto L = BC.scopeLock();
266   MCSymbol *Label = BC.Ctx->createNamedTempSymbol("InstrEntry");
267   Summary->Counters.emplace_back(Label);
268   return BC.MIB->createInstrIncMemory(Label, BC.Ctx.get(), IsLeaf,
269                                       BC.AsmInfo->getCodePointerSize());
270 }
271 
272 // Helper instruction sequence insertion function
273 static BinaryBasicBlock::iterator
274 insertInstructions(InstructionListType &Instrs, BinaryBasicBlock &BB,
275                    BinaryBasicBlock::iterator Iter) {
276   for (MCInst &NewInst : Instrs) {
277     Iter = BB.insertInstruction(Iter, NewInst);
278     ++Iter;
279   }
280   return Iter;
281 }
282 
283 void Instrumentation::instrumentLeafNode(BinaryBasicBlock &BB,
284                                          BinaryBasicBlock::iterator Iter,
285                                          bool IsLeaf,
286                                          FunctionDescription &FuncDesc,
287                                          uint32_t Node) {
288   createLeafNodeDescription(FuncDesc, Node);
289   InstructionListType CounterInstrs = createInstrumentationSnippet(
290       BB.getFunction()->getBinaryContext(), IsLeaf);
291   insertInstructions(CounterInstrs, BB, Iter);
292 }
293 
294 void Instrumentation::instrumentIndirectTarget(BinaryBasicBlock &BB,
295                                                BinaryBasicBlock::iterator &Iter,
296                                                BinaryFunction &FromFunction,
297                                                uint32_t From) {
298   auto L = FromFunction.getBinaryContext().scopeLock();
299   const size_t IndCallSiteID = Summary->IndCallDescriptions.size();
300   createIndCallDescription(FromFunction, From);
301 
302   BinaryContext &BC = FromFunction.getBinaryContext();
303   bool IsTailCall = BC.MIB->isTailCall(*Iter);
304   InstructionListType CounterInstrs = BC.MIB->createInstrumentedIndirectCall(
305       std::move(*Iter),
306       IsTailCall ? IndTailCallHandlerExitBBFunction->getSymbol()
307                  : IndCallHandlerExitBBFunction->getSymbol(),
308       IndCallSiteID, &*BC.Ctx);
309 
310   Iter = BB.eraseInstruction(Iter);
311   Iter = insertInstructions(CounterInstrs, BB, Iter);
312   --Iter;
313 }
314 
315 bool Instrumentation::instrumentOneTarget(
316     SplitWorklistTy &SplitWorklist, SplitInstrsTy &SplitInstrs,
317     BinaryBasicBlock::iterator &Iter, BinaryFunction &FromFunction,
318     BinaryBasicBlock &FromBB, uint32_t From, BinaryFunction &ToFunc,
319     BinaryBasicBlock *TargetBB, uint32_t ToOffset, bool IsLeaf, bool IsInvoke,
320     FunctionDescription *FuncDesc, uint32_t FromNodeID, uint32_t ToNodeID) {
321   BinaryContext &BC = FromFunction.getBinaryContext();
322   {
323     auto L = BC.scopeLock();
324     bool Created = true;
325     if (!TargetBB)
326       Created = createCallDescription(*FuncDesc, FromFunction, From, FromNodeID,
327                                       ToFunc, ToOffset, IsInvoke);
328     else
329       Created = createEdgeDescription(*FuncDesc, FromFunction, From, FromNodeID,
330                                       ToFunc, ToOffset, ToNodeID,
331                                       /*Instrumented=*/true);
332     if (!Created)
333       return false;
334   }
335 
336   InstructionListType CounterInstrs = createInstrumentationSnippet(BC, IsLeaf);
337 
338   const MCInst &Inst = *Iter;
339   if (BC.MIB->isCall(Inst)) {
340     // This code handles both
341     // - (regular) inter-function calls (cross-function control transfer),
342     // - (rare) intra-function calls (function-local control transfer)
343     Iter = insertInstructions(CounterInstrs, FromBB, Iter);
344     return true;
345   }
346 
347   if (!TargetBB || !FuncDesc)
348     return false;
349 
350   // Indirect branch, conditional branches or fall-throughs
351   // Regular cond branch, put counter at start of target block
352   //
353   // N.B.: (FromBB != TargetBBs) checks below handle conditional jumps where
354   // we can't put the instrumentation counter in this block because not all
355   // paths that reach it at this point will be taken and going to the target.
356   if (TargetBB->pred_size() == 1 && &FromBB != TargetBB &&
357       !TargetBB->isEntryPoint()) {
358     insertInstructions(CounterInstrs, *TargetBB, TargetBB->begin());
359     return true;
360   }
361   if (FromBB.succ_size() == 1 && &FromBB != TargetBB) {
362     Iter = insertInstructions(CounterInstrs, FromBB, Iter);
363     return true;
364   }
365   // Critical edge, create BB and put counter there
366   SplitWorklist.emplace_back(&FromBB, TargetBB);
367   SplitInstrs.emplace_back(std::move(CounterInstrs));
368   return true;
369 }
370 
371 void Instrumentation::instrumentFunction(BinaryFunction &Function,
372                                          MCPlusBuilder::AllocatorIdTy AllocId) {
373   if (Function.hasUnknownControlFlow())
374     return;
375 
376   BinaryContext &BC = Function.getBinaryContext();
377   if (BC.isMachO() && Function.hasName("___GLOBAL_init_65535/1"))
378     return;
379 
380   std::unordered_set<const BinaryBasicBlock *> BBToSkip;
381   if (BC.isAArch64() && hasAArch64ExclusiveMemop(Function, BBToSkip))
382     return;
383 
384   SplitWorklistTy SplitWorklist;
385   SplitInstrsTy SplitInstrs;
386 
387   FunctionDescription *FuncDesc = nullptr;
388   {
389     std::unique_lock<llvm::sys::RWMutex> L(FDMutex);
390     Summary->FunctionDescriptions.emplace_back();
391     FuncDesc = &Summary->FunctionDescriptions.back();
392   }
393 
394   FuncDesc->Function = &Function;
395   Function.disambiguateJumpTables(AllocId);
396   Function.deleteConservativeEdges();
397 
398   std::unordered_map<const BinaryBasicBlock *, uint32_t> BBToID;
399   uint32_t Id = 0;
400   for (auto BBI = Function.begin(); BBI != Function.end(); ++BBI) {
401     BBToID[&*BBI] = Id++;
402   }
403   std::unordered_set<const BinaryBasicBlock *> VisitedSet;
404   // DFS to establish edges we will use for a spanning tree. Edges in the
405   // spanning tree can be instrumentation-free since their count can be
406   // inferred by solving flow equations on a bottom-up traversal of the tree.
407   // Exit basic blocks are always instrumented so we start the traversal with
408   // a minimum number of defined variables to make the equation solvable.
409   std::stack<std::pair<const BinaryBasicBlock *, BinaryBasicBlock *>> Stack;
410   std::unordered_map<const BinaryBasicBlock *,
411                      std::set<const BinaryBasicBlock *>>
412       STOutSet;
413   for (auto BBI = Function.getLayout().block_rbegin();
414        BBI != Function.getLayout().block_rend(); ++BBI) {
415     if ((*BBI)->isEntryPoint() || (*BBI)->isLandingPad()) {
416       Stack.push(std::make_pair(nullptr, *BBI));
417       if (opts::InstrumentCalls && (*BBI)->isEntryPoint()) {
418         EntryNode E;
419         E.Node = BBToID[&**BBI];
420         E.Address = (*BBI)->getInputOffset();
421         FuncDesc->EntryNodes.emplace_back(E);
422         createIndCallTargetDescription(Function, (*BBI)->getInputOffset());
423       }
424     }
425   }
426 
427   // Modified version of BinaryFunction::dfs() to build a spanning tree
428   if (!opts::ConservativeInstrumentation) {
429     while (!Stack.empty()) {
430       BinaryBasicBlock *BB;
431       const BinaryBasicBlock *Pred;
432       std::tie(Pred, BB) = Stack.top();
433       Stack.pop();
434       if (llvm::is_contained(VisitedSet, BB))
435         continue;
436 
437       VisitedSet.insert(BB);
438       if (Pred)
439         STOutSet[Pred].insert(BB);
440 
441       for (BinaryBasicBlock *SuccBB : BB->successors())
442         Stack.push(std::make_pair(BB, SuccBB));
443     }
444   }
445 
446   // Determine whether this is a leaf function, which needs special
447   // instructions to protect the red zone
448   bool IsLeafFunction = true;
449   DenseSet<const BinaryBasicBlock *> InvokeBlocks;
450   for (const BinaryBasicBlock &BB : Function) {
451     for (const MCInst &Inst : BB) {
452       if (BC.MIB->isCall(Inst)) {
453         if (BC.MIB->isInvoke(Inst))
454           InvokeBlocks.insert(&BB);
455         if (!BC.MIB->isTailCall(Inst))
456           IsLeafFunction = false;
457       }
458     }
459   }
460 
461   for (auto BBI = Function.begin(), BBE = Function.end(); BBI != BBE; ++BBI) {
462     BinaryBasicBlock &BB = *BBI;
463 
464     // Skip BBs with exclusive load/stores
465     if (BBToSkip.find(&BB) != BBToSkip.end())
466       continue;
467 
468     bool HasUnconditionalBranch = false;
469     bool HasJumpTable = false;
470     bool IsInvokeBlock = InvokeBlocks.count(&BB) > 0;
471 
472     for (auto I = BB.begin(); I != BB.end(); ++I) {
473       const MCInst &Inst = *I;
474       if (!BC.MIB->getOffset(Inst))
475         continue;
476 
477       const bool IsJumpTable = Function.getJumpTable(Inst);
478       if (IsJumpTable)
479         HasJumpTable = true;
480       else if (BC.MIB->isUnconditionalBranch(Inst))
481         HasUnconditionalBranch = true;
482       else if ((!BC.MIB->isCall(Inst) && !BC.MIB->isConditionalBranch(Inst)) ||
483                !BC.MIB->isReversibleBranch(Inst))
484         continue;
485 
486       const uint32_t FromOffset = *BC.MIB->getOffset(Inst);
487       const MCSymbol *Target = BC.MIB->getTargetSymbol(Inst);
488       BinaryBasicBlock *TargetBB = Function.getBasicBlockForLabel(Target);
489       uint32_t ToOffset = TargetBB ? TargetBB->getInputOffset() : 0;
490       BinaryFunction *TargetFunc =
491           TargetBB ? &Function : BC.getFunctionForSymbol(Target);
492       if (TargetFunc && BC.MIB->isCall(Inst)) {
493         if (opts::InstrumentCalls) {
494           const BinaryBasicBlock *ForeignBB =
495               TargetFunc->getBasicBlockForLabel(Target);
496           if (ForeignBB)
497             ToOffset = ForeignBB->getInputOffset();
498           instrumentOneTarget(SplitWorklist, SplitInstrs, I, Function, BB,
499                               FromOffset, *TargetFunc, TargetBB, ToOffset,
500                               IsLeafFunction, IsInvokeBlock, FuncDesc,
501                               BBToID[&BB]);
502         }
503         continue;
504       }
505       if (TargetFunc) {
506         // Do not instrument edges in the spanning tree
507         if (llvm::is_contained(STOutSet[&BB], TargetBB)) {
508           auto L = BC.scopeLock();
509           createEdgeDescription(*FuncDesc, Function, FromOffset, BBToID[&BB],
510                                 Function, ToOffset, BBToID[TargetBB],
511                                 /*Instrumented=*/false);
512           continue;
513         }
514         instrumentOneTarget(SplitWorklist, SplitInstrs, I, Function, BB,
515                             FromOffset, *TargetFunc, TargetBB, ToOffset,
516                             IsLeafFunction, IsInvokeBlock, FuncDesc,
517                             BBToID[&BB], BBToID[TargetBB]);
518         continue;
519       }
520 
521       if (IsJumpTable) {
522         for (BinaryBasicBlock *&Succ : BB.successors()) {
523           // Do not instrument edges in the spanning tree
524           if (llvm::is_contained(STOutSet[&BB], &*Succ)) {
525             auto L = BC.scopeLock();
526             createEdgeDescription(*FuncDesc, Function, FromOffset, BBToID[&BB],
527                                   Function, Succ->getInputOffset(),
528                                   BBToID[&*Succ], /*Instrumented=*/false);
529             continue;
530           }
531           instrumentOneTarget(
532               SplitWorklist, SplitInstrs, I, Function, BB, FromOffset, Function,
533               &*Succ, Succ->getInputOffset(), IsLeafFunction, IsInvokeBlock,
534               FuncDesc, BBToID[&BB], BBToID[&*Succ]);
535         }
536         continue;
537       }
538 
539       // Handle indirect calls -- could be direct calls with unknown targets
540       // or secondary entry points of known functions, so check it is indirect
541       // to be sure.
542       if (opts::InstrumentCalls && BC.MIB->isIndirectCall(*I))
543         instrumentIndirectTarget(BB, I, Function, FromOffset);
544 
545     } // End of instructions loop
546 
547     // Instrument fallthroughs (when the direct jump instruction is missing)
548     if (!HasUnconditionalBranch && !HasJumpTable && BB.succ_size() > 0 &&
549         BB.size() > 0) {
550       BinaryBasicBlock *FTBB = BB.getFallthrough();
551       assert(FTBB && "expected valid fall-through basic block");
552       auto I = BB.begin();
553       auto LastInstr = BB.end();
554       --LastInstr;
555       while (LastInstr != I && BC.MIB->isPseudo(*LastInstr))
556         --LastInstr;
557       uint32_t FromOffset = 0;
558       // The last instruction in the BB should have an annotation, except
559       // if it was branching to the end of the function as a result of
560       // __builtin_unreachable(), in which case it was deleted by fixBranches.
561       // Ignore this case. FIXME: force fixBranches() to preserve the offset.
562       if (!BC.MIB->getOffset(*LastInstr))
563         continue;
564       FromOffset = *BC.MIB->getOffset(*LastInstr);
565 
566       // Do not instrument edges in the spanning tree
567       if (llvm::is_contained(STOutSet[&BB], FTBB)) {
568         auto L = BC.scopeLock();
569         createEdgeDescription(*FuncDesc, Function, FromOffset, BBToID[&BB],
570                               Function, FTBB->getInputOffset(), BBToID[FTBB],
571                               /*Instrumented=*/false);
572         continue;
573       }
574       instrumentOneTarget(SplitWorklist, SplitInstrs, I, Function, BB,
575                           FromOffset, Function, FTBB, FTBB->getInputOffset(),
576                           IsLeafFunction, IsInvokeBlock, FuncDesc, BBToID[&BB],
577                           BBToID[FTBB]);
578     }
579   } // End of BBs loop
580 
581   // Instrument spanning tree leaves
582   if (!opts::ConservativeInstrumentation) {
583     for (auto BBI = Function.begin(), BBE = Function.end(); BBI != BBE; ++BBI) {
584       BinaryBasicBlock &BB = *BBI;
585       if (STOutSet[&BB].size() == 0)
586         instrumentLeafNode(BB, BB.begin(), IsLeafFunction, *FuncDesc,
587                            BBToID[&BB]);
588     }
589   }
590 
591   // Consume list of critical edges: split them and add instrumentation to the
592   // newly created BBs
593   auto Iter = SplitInstrs.begin();
594   for (std::pair<BinaryBasicBlock *, BinaryBasicBlock *> &BBPair :
595        SplitWorklist) {
596     BinaryBasicBlock *NewBB = Function.splitEdge(BBPair.first, BBPair.second);
597     NewBB->addInstructions(Iter->begin(), Iter->end());
598     ++Iter;
599   }
600 
601   // Unused now
602   FuncDesc->EdgesSet.clear();
603 }
604 
605 Error Instrumentation::runOnFunctions(BinaryContext &BC) {
606   const unsigned Flags = BinarySection::getFlags(/*IsReadOnly=*/false,
607                                                  /*IsText=*/false,
608                                                  /*IsAllocatable=*/true);
609   BC.registerOrUpdateSection(".bolt.instr.counters", ELF::SHT_PROGBITS, Flags,
610                              nullptr, 0, 1);
611 
612   BC.registerOrUpdateNoteSection(".bolt.instr.tables", nullptr, 0,
613                                  /*Alignment=*/1,
614                                  /*IsReadOnly=*/true, ELF::SHT_NOTE);
615 
616   Summary->IndCallCounterFuncPtr =
617       BC.Ctx->getOrCreateSymbol("__bolt_ind_call_counter_func_pointer");
618   Summary->IndTailCallCounterFuncPtr =
619       BC.Ctx->getOrCreateSymbol("__bolt_ind_tailcall_counter_func_pointer");
620 
621   createAuxiliaryFunctions(BC);
622 
623   ParallelUtilities::PredicateTy SkipPredicate = [&](const BinaryFunction &BF) {
624     return (!BF.isSimple() || BF.isIgnored() ||
625             (opts::InstrumentHotOnly && !BF.getKnownExecutionCount()));
626   };
627 
628   ParallelUtilities::WorkFuncWithAllocTy WorkFun =
629       [&](BinaryFunction &BF, MCPlusBuilder::AllocatorIdTy AllocatorId) {
630         instrumentFunction(BF, AllocatorId);
631       };
632 
633   ParallelUtilities::runOnEachFunctionWithUniqueAllocId(
634       BC, ParallelUtilities::SchedulingPolicy::SP_INST_QUADRATIC, WorkFun,
635       SkipPredicate, "instrumentation", /* ForceSequential=*/true);
636 
637   if (BC.isMachO()) {
638     if (BC.StartFunctionAddress) {
639       BinaryFunction *Main =
640           BC.getBinaryFunctionAtAddress(*BC.StartFunctionAddress);
641       assert(Main && "Entry point function not found");
642       BinaryBasicBlock &BB = Main->front();
643 
644       ErrorOr<BinarySection &> SetupSection =
645           BC.getUniqueSectionByName("I__setup");
646       if (!SetupSection)
647         return createFatalBOLTError("Cannot find I__setup section\n");
648 
649       MCSymbol *Target = BC.registerNameAtAddress(
650           "__bolt_instr_setup", SetupSection->getAddress(), 0, 0);
651       MCInst NewInst;
652       BC.MIB->createCall(NewInst, Target, BC.Ctx.get());
653       BB.insertInstruction(BB.begin(), std::move(NewInst));
654     } else {
655       BC.errs() << "BOLT-WARNING: Entry point not found\n";
656     }
657 
658     if (BinaryData *BD = BC.getBinaryDataByName("___GLOBAL_init_65535/1")) {
659       BinaryFunction *Ctor = BC.getBinaryFunctionAtAddress(BD->getAddress());
660       assert(Ctor && "___GLOBAL_init_65535 function not found");
661       BinaryBasicBlock &BB = Ctor->front();
662       ErrorOr<BinarySection &> FiniSection =
663           BC.getUniqueSectionByName("I__fini");
664       if (!FiniSection)
665         return createFatalBOLTError("Cannot find I__fini section");
666 
667       MCSymbol *Target = BC.registerNameAtAddress(
668           "__bolt_instr_fini", FiniSection->getAddress(), 0, 0);
669       auto IsLEA = [&BC](const MCInst &Inst) { return BC.MIB->isLEA64r(Inst); };
670       const auto LEA = std::find_if(
671           std::next(llvm::find_if(reverse(BB), IsLEA)), BB.rend(), IsLEA);
672       LEA->getOperand(4).setExpr(
673           MCSymbolRefExpr::create(Target, MCSymbolRefExpr::VK_None, *BC.Ctx));
674     } else {
675       BC.errs() << "BOLT-WARNING: ___GLOBAL_init_65535 not found\n";
676     }
677   }
678 
679   setupRuntimeLibrary(BC);
680   return Error::success();
681 }
682 
683 void Instrumentation::createAuxiliaryFunctions(BinaryContext &BC) {
684   auto createSimpleFunction =
685       [&](StringRef Title, InstructionListType Instrs) -> BinaryFunction * {
686     BinaryFunction *Func = BC.createInjectedBinaryFunction(std::string(Title));
687 
688     std::vector<std::unique_ptr<BinaryBasicBlock>> BBs;
689     BBs.emplace_back(Func->createBasicBlock());
690     BBs.back()->addInstructions(Instrs.begin(), Instrs.end());
691     BBs.back()->setCFIState(0);
692     Func->insertBasicBlocks(nullptr, std::move(BBs),
693                             /*UpdateLayout=*/true,
694                             /*UpdateCFIState=*/false);
695     Func->updateState(BinaryFunction::State::CFG_Finalized);
696     return Func;
697   };
698 
699   // Here we are creating a set of functions to handle BB entry/exit.
700   // IndCallHandlerExitBB contains instructions to finish handling traffic to an
701   // indirect call. We pass it to createInstrumentedIndCallHandlerEntryBB(),
702   // which will check if a pointer to runtime library traffic accounting
703   // function was initialized (it is done during initialization of runtime
704   // library). If it is so - calls it. Then this routine returns to normal
705   // execution by jumping to exit BB.
706   BinaryFunction *IndCallHandlerExitBB =
707       createSimpleFunction("__bolt_instr_ind_call_handler",
708                            BC.MIB->createInstrumentedIndCallHandlerExitBB());
709 
710   IndCallHandlerExitBBFunction =
711       createSimpleFunction("__bolt_instr_ind_call_handler_func",
712                            BC.MIB->createInstrumentedIndCallHandlerEntryBB(
713                                Summary->IndCallCounterFuncPtr,
714                                IndCallHandlerExitBB->getSymbol(), &*BC.Ctx));
715 
716   BinaryFunction *IndTailCallHandlerExitBB = createSimpleFunction(
717       "__bolt_instr_ind_tail_call_handler",
718       BC.MIB->createInstrumentedIndTailCallHandlerExitBB());
719 
720   IndTailCallHandlerExitBBFunction = createSimpleFunction(
721       "__bolt_instr_ind_tailcall_handler_func",
722       BC.MIB->createInstrumentedIndCallHandlerEntryBB(
723           Summary->IndTailCallCounterFuncPtr,
724           IndTailCallHandlerExitBB->getSymbol(), &*BC.Ctx));
725 
726   createSimpleFunction("__bolt_num_counters_getter",
727                        BC.MIB->createNumCountersGetter(BC.Ctx.get()));
728   createSimpleFunction("__bolt_instr_locations_getter",
729                        BC.MIB->createInstrLocationsGetter(BC.Ctx.get()));
730   createSimpleFunction("__bolt_instr_tables_getter",
731                        BC.MIB->createInstrTablesGetter(BC.Ctx.get()));
732   createSimpleFunction("__bolt_instr_num_funcs_getter",
733                        BC.MIB->createInstrNumFuncsGetter(BC.Ctx.get()));
734 
735   if (BC.isELF()) {
736     if (BC.StartFunctionAddress) {
737       BinaryFunction *Start =
738           BC.getBinaryFunctionAtAddress(*BC.StartFunctionAddress);
739       assert(Start && "Entry point function not found");
740       const MCSymbol *StartSym = Start->getSymbol();
741       createSimpleFunction(
742           "__bolt_start_trampoline",
743           BC.MIB->createSymbolTrampoline(StartSym, BC.Ctx.get()));
744     }
745     if (BC.FiniFunctionAddress) {
746       BinaryFunction *Fini =
747           BC.getBinaryFunctionAtAddress(*BC.FiniFunctionAddress);
748       assert(Fini && "Finalization function not found");
749       const MCSymbol *FiniSym = Fini->getSymbol();
750       createSimpleFunction(
751           "__bolt_fini_trampoline",
752           BC.MIB->createSymbolTrampoline(FiniSym, BC.Ctx.get()));
753     } else {
754       // Create dummy return function for trampoline to avoid issues
755       // with unknown symbol in runtime library. E.g. for static PIE
756       // executable
757       createSimpleFunction("__bolt_fini_trampoline",
758                            BC.MIB->createDummyReturnFunction(BC.Ctx.get()));
759     }
760   }
761 }
762 
763 void Instrumentation::setupRuntimeLibrary(BinaryContext &BC) {
764   uint32_t FuncDescSize = Summary->getFDSize();
765 
766   BC.outs() << "BOLT-INSTRUMENTER: Number of indirect call site descriptors: "
767             << Summary->IndCallDescriptions.size() << "\n";
768   BC.outs() << "BOLT-INSTRUMENTER: Number of indirect call target descriptors: "
769             << Summary->IndCallTargetDescriptions.size() << "\n";
770   BC.outs() << "BOLT-INSTRUMENTER: Number of function descriptors: "
771             << Summary->FunctionDescriptions.size() << "\n";
772   BC.outs() << "BOLT-INSTRUMENTER: Number of branch counters: "
773             << BranchCounters << "\n";
774   BC.outs() << "BOLT-INSTRUMENTER: Number of ST leaf node counters: "
775             << LeafNodeCounters << "\n";
776   BC.outs() << "BOLT-INSTRUMENTER: Number of direct call counters: "
777             << DirectCallCounters << "\n";
778   BC.outs() << "BOLT-INSTRUMENTER: Total number of counters: "
779             << Summary->Counters.size() << "\n";
780   BC.outs() << "BOLT-INSTRUMENTER: Total size of counters: "
781             << (Summary->Counters.size() * 8)
782             << " bytes (static alloc memory)\n";
783   BC.outs() << "BOLT-INSTRUMENTER: Total size of string table emitted: "
784             << Summary->StringTable.size() << " bytes in file\n";
785   BC.outs() << "BOLT-INSTRUMENTER: Total size of descriptors: "
786             << (FuncDescSize +
787                 Summary->IndCallDescriptions.size() *
788                     sizeof(IndCallDescription) +
789                 Summary->IndCallTargetDescriptions.size() *
790                     sizeof(IndCallTargetDescription))
791             << " bytes in file\n";
792   BC.outs() << "BOLT-INSTRUMENTER: Profile will be saved to file "
793             << opts::InstrumentationFilename << "\n";
794 
795   InstrumentationRuntimeLibrary *RtLibrary =
796       static_cast<InstrumentationRuntimeLibrary *>(BC.getRuntimeLibrary());
797   assert(RtLibrary && "instrumentation runtime library object must be set");
798   RtLibrary->setSummary(std::move(Summary));
799 }
800 } // namespace bolt
801 } // namespace llvm
802