xref: /llvm-project/bolt/lib/Passes/Instrumentation.cpp (revision a9cd49d50e88aa142e6d57de90321d8810e1027f)
1 //===- bolt/Passes/Instrumentation.cpp ------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the Instrumentation class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "bolt/Passes/Instrumentation.h"
14 #include "bolt/Core/ParallelUtilities.h"
15 #include "bolt/RuntimeLibs/InstrumentationRuntimeLibrary.h"
16 #include "bolt/Utils/Utils.h"
17 #include "llvm/Support/CommandLine.h"
18 #include <stack>
19 
20 #define DEBUG_TYPE "bolt-instrumentation"
21 
22 using namespace llvm;
23 
24 namespace opts {
25 extern cl::OptionCategory BoltInstrCategory;
26 
27 cl::opt<std::string> InstrumentationFilename(
28     "instrumentation-file",
29     cl::desc("file name where instrumented profile will be saved (default: "
30              "/tmp/prof.fdata)"),
31     cl::init("/tmp/prof.fdata"), cl::Optional, cl::cat(BoltInstrCategory));
32 
33 cl::opt<std::string> InstrumentationBinpath(
34     "instrumentation-binpath",
35     cl::desc("path to instumented binary in case if /proc/self/map_files "
36              "is not accessible due to access restriction issues"),
37     cl::Optional, cl::cat(BoltInstrCategory));
38 
39 cl::opt<bool> InstrumentationFileAppendPID(
40     "instrumentation-file-append-pid",
41     cl::desc("append PID to saved profile file name (default: false)"),
42     cl::init(false), cl::Optional, cl::cat(BoltInstrCategory));
43 
44 cl::opt<bool> ConservativeInstrumentation(
45     "conservative-instrumentation",
46     cl::desc("disable instrumentation optimizations that sacrifice profile "
47              "accuracy (for debugging, default: false)"),
48     cl::init(false), cl::Optional, cl::cat(BoltInstrCategory));
49 
50 cl::opt<uint32_t> InstrumentationSleepTime(
51     "instrumentation-sleep-time",
52     cl::desc("interval between profile writes (default: 0 = write only at "
53              "program end).  This is useful for service workloads when you "
54              "want to dump profile every X minutes or if you are killing the "
55              "program and the profile is not being dumped at the end."),
56     cl::init(0), cl::Optional, cl::cat(BoltInstrCategory));
57 
58 cl::opt<bool> InstrumentationNoCountersClear(
59     "instrumentation-no-counters-clear",
60     cl::desc("Don't clear counters across dumps "
61              "(use with instrumentation-sleep-time option)"),
62     cl::init(false), cl::Optional, cl::cat(BoltInstrCategory));
63 
64 cl::opt<bool> InstrumentationWaitForks(
65     "instrumentation-wait-forks",
66     cl::desc("Wait until all forks of instrumented process will finish "
67              "(use with instrumentation-sleep-time option)"),
68     cl::init(false), cl::Optional, cl::cat(BoltInstrCategory));
69 
70 cl::opt<bool>
71     InstrumentHotOnly("instrument-hot-only",
72                       cl::desc("only insert instrumentation on hot functions "
73                                "(needs profile, default: false)"),
74                       cl::init(false), cl::Optional,
75                       cl::cat(BoltInstrCategory));
76 
77 cl::opt<bool> InstrumentCalls("instrument-calls",
78                               cl::desc("record profile for inter-function "
79                                        "control flow activity (default: true)"),
80                               cl::init(true), cl::Optional,
81                               cl::cat(BoltInstrCategory));
82 } // namespace opts
83 
84 namespace llvm {
85 namespace bolt {
86 
87 uint32_t Instrumentation::getFunctionNameIndex(const BinaryFunction &Function) {
88   auto Iter = FuncToStringIdx.find(&Function);
89   if (Iter != FuncToStringIdx.end())
90     return Iter->second;
91   size_t Idx = Summary->StringTable.size();
92   FuncToStringIdx.emplace(std::make_pair(&Function, Idx));
93   Summary->StringTable.append(getEscapedName(Function.getOneName()));
94   Summary->StringTable.append(1, '\0');
95   return Idx;
96 }
97 
98 bool Instrumentation::createCallDescription(FunctionDescription &FuncDesc,
99                                             const BinaryFunction &FromFunction,
100                                             uint32_t From, uint32_t FromNodeID,
101                                             const BinaryFunction &ToFunction,
102                                             uint32_t To, bool IsInvoke) {
103   CallDescription CD;
104   // Ordinarily, we don't augment direct calls with an explicit counter, except
105   // when forced to do so or when we know this callee could be throwing
106   // exceptions, in which case there is no other way to accurately record its
107   // frequency.
108   bool ForceInstrumentation = opts::ConservativeInstrumentation || IsInvoke;
109   CD.FromLoc.FuncString = getFunctionNameIndex(FromFunction);
110   CD.FromLoc.Offset = From;
111   CD.FromNode = FromNodeID;
112   CD.Target = &ToFunction;
113   CD.ToLoc.FuncString = getFunctionNameIndex(ToFunction);
114   CD.ToLoc.Offset = To;
115   CD.Counter = ForceInstrumentation ? Summary->Counters.size() : 0xffffffff;
116   if (ForceInstrumentation)
117     ++DirectCallCounters;
118   FuncDesc.Calls.emplace_back(CD);
119   return ForceInstrumentation;
120 }
121 
122 void Instrumentation::createIndCallDescription(
123     const BinaryFunction &FromFunction, uint32_t From) {
124   IndCallDescription ICD;
125   ICD.FromLoc.FuncString = getFunctionNameIndex(FromFunction);
126   ICD.FromLoc.Offset = From;
127   Summary->IndCallDescriptions.emplace_back(ICD);
128 }
129 
130 void Instrumentation::createIndCallTargetDescription(
131     const BinaryFunction &ToFunction, uint32_t To) {
132   IndCallTargetDescription ICD;
133   ICD.ToLoc.FuncString = getFunctionNameIndex(ToFunction);
134   ICD.ToLoc.Offset = To;
135   ICD.Target = &ToFunction;
136   Summary->IndCallTargetDescriptions.emplace_back(ICD);
137 }
138 
139 bool Instrumentation::createEdgeDescription(FunctionDescription &FuncDesc,
140                                             const BinaryFunction &FromFunction,
141                                             uint32_t From, uint32_t FromNodeID,
142                                             const BinaryFunction &ToFunction,
143                                             uint32_t To, uint32_t ToNodeID,
144                                             bool Instrumented) {
145   EdgeDescription ED;
146   auto Result = FuncDesc.EdgesSet.insert(std::make_pair(FromNodeID, ToNodeID));
147   // Avoid creating duplicated edge descriptions. This happens in CFGs where a
148   // block jumps to its fall-through.
149   if (Result.second == false)
150     return false;
151   ED.FromLoc.FuncString = getFunctionNameIndex(FromFunction);
152   ED.FromLoc.Offset = From;
153   ED.FromNode = FromNodeID;
154   ED.ToLoc.FuncString = getFunctionNameIndex(ToFunction);
155   ED.ToLoc.Offset = To;
156   ED.ToNode = ToNodeID;
157   ED.Counter = Instrumented ? Summary->Counters.size() : 0xffffffff;
158   if (Instrumented)
159     ++BranchCounters;
160   FuncDesc.Edges.emplace_back(ED);
161   return Instrumented;
162 }
163 
164 void Instrumentation::createLeafNodeDescription(FunctionDescription &FuncDesc,
165                                                 uint32_t Node) {
166   InstrumentedNode IN;
167   IN.Node = Node;
168   IN.Counter = Summary->Counters.size();
169   ++LeafNodeCounters;
170   FuncDesc.LeafNodes.emplace_back(IN);
171 }
172 
173 InstructionListType
174 Instrumentation::createInstrumentationSnippet(BinaryContext &BC, bool IsLeaf) {
175   auto L = BC.scopeLock();
176   MCSymbol *Label;
177   Label = BC.Ctx->createNamedTempSymbol("InstrEntry");
178   Summary->Counters.emplace_back(Label);
179   InstructionListType CounterInstrs;
180   BC.MIB->createInstrIncMemory(CounterInstrs, Label, &*BC.Ctx, IsLeaf);
181   return CounterInstrs;
182 }
183 
184 namespace {
185 
186 // Helper instruction sequence insertion function
187 BinaryBasicBlock::iterator insertInstructions(InstructionListType &Instrs,
188                                               BinaryBasicBlock &BB,
189                                               BinaryBasicBlock::iterator Iter) {
190   for (MCInst &NewInst : Instrs) {
191     Iter = BB.insertInstruction(Iter, NewInst);
192     ++Iter;
193   }
194   return Iter;
195 }
196 } // namespace
197 
198 void Instrumentation::instrumentLeafNode(BinaryBasicBlock &BB,
199                                          BinaryBasicBlock::iterator Iter,
200                                          bool IsLeaf,
201                                          FunctionDescription &FuncDesc,
202                                          uint32_t Node) {
203   createLeafNodeDescription(FuncDesc, Node);
204   InstructionListType CounterInstrs = createInstrumentationSnippet(
205       BB.getFunction()->getBinaryContext(), IsLeaf);
206   insertInstructions(CounterInstrs, BB, Iter);
207 }
208 
209 void Instrumentation::instrumentIndirectTarget(BinaryBasicBlock &BB,
210                                                BinaryBasicBlock::iterator &Iter,
211                                                BinaryFunction &FromFunction,
212                                                uint32_t From) {
213   auto L = FromFunction.getBinaryContext().scopeLock();
214   const size_t IndCallSiteID = Summary->IndCallDescriptions.size();
215   createIndCallDescription(FromFunction, From);
216 
217   BinaryContext &BC = FromFunction.getBinaryContext();
218   bool IsTailCall = BC.MIB->isTailCall(*Iter);
219   InstructionListType CounterInstrs = BC.MIB->createInstrumentedIndirectCall(
220       *Iter, IsTailCall,
221       IsTailCall ? IndTailCallHandlerExitBBFunction->getSymbol()
222                  : IndCallHandlerExitBBFunction->getSymbol(),
223       IndCallSiteID, &*BC.Ctx);
224 
225   Iter = BB.eraseInstruction(Iter);
226   Iter = insertInstructions(CounterInstrs, BB, Iter);
227   --Iter;
228 }
229 
230 bool Instrumentation::instrumentOneTarget(
231     SplitWorklistTy &SplitWorklist, SplitInstrsTy &SplitInstrs,
232     BinaryBasicBlock::iterator &Iter, BinaryFunction &FromFunction,
233     BinaryBasicBlock &FromBB, uint32_t From, BinaryFunction &ToFunc,
234     BinaryBasicBlock *TargetBB, uint32_t ToOffset, bool IsLeaf, bool IsInvoke,
235     FunctionDescription *FuncDesc, uint32_t FromNodeID, uint32_t ToNodeID) {
236   {
237     auto L = FromFunction.getBinaryContext().scopeLock();
238     bool Created = true;
239     if (!TargetBB)
240       Created = createCallDescription(*FuncDesc, FromFunction, From, FromNodeID,
241                                       ToFunc, ToOffset, IsInvoke);
242     else
243       Created = createEdgeDescription(*FuncDesc, FromFunction, From, FromNodeID,
244                                       ToFunc, ToOffset, ToNodeID,
245                                       /*Instrumented=*/true);
246     if (!Created)
247       return false;
248   }
249 
250   InstructionListType CounterInstrs =
251       createInstrumentationSnippet(FromFunction.getBinaryContext(), IsLeaf);
252 
253   BinaryContext &BC = FromFunction.getBinaryContext();
254   const MCInst &Inst = *Iter;
255   if (BC.MIB->isCall(Inst)) {
256     // This code handles both
257     // - (regular) inter-function calls (cross-function control transfer),
258     // - (rare) intra-function calls (function-local control transfer)
259     Iter = insertInstructions(CounterInstrs, FromBB, Iter);
260     return true;
261   }
262 
263   if (!TargetBB || !FuncDesc)
264     return false;
265 
266   // Indirect branch, conditional branches or fall-throughs
267   // Regular cond branch, put counter at start of target block
268   //
269   // N.B.: (FromBB != TargetBBs) checks below handle conditional jumps where
270   // we can't put the instrumentation counter in this block because not all
271   // paths that reach it at this point will be taken and going to the target.
272   if (TargetBB->pred_size() == 1 && &FromBB != TargetBB &&
273       !TargetBB->isEntryPoint()) {
274     insertInstructions(CounterInstrs, *TargetBB, TargetBB->begin());
275     return true;
276   }
277   if (FromBB.succ_size() == 1 && &FromBB != TargetBB) {
278     Iter = insertInstructions(CounterInstrs, FromBB, Iter);
279     return true;
280   }
281   // Critical edge, create BB and put counter there
282   SplitWorklist.emplace_back(&FromBB, TargetBB);
283   SplitInstrs.emplace_back(std::move(CounterInstrs));
284   return true;
285 }
286 
287 void Instrumentation::instrumentFunction(BinaryFunction &Function,
288                                          MCPlusBuilder::AllocatorIdTy AllocId) {
289   if (Function.hasUnknownControlFlow())
290     return;
291 
292   BinaryContext &BC = Function.getBinaryContext();
293   if (BC.isMachO() && Function.hasName("___GLOBAL_init_65535/1"))
294     return;
295 
296   SplitWorklistTy SplitWorklist;
297   SplitInstrsTy SplitInstrs;
298 
299   FunctionDescription *FuncDesc = nullptr;
300   {
301     std::unique_lock<std::shared_timed_mutex> L(FDMutex);
302     Summary->FunctionDescriptions.emplace_back();
303     FuncDesc = &Summary->FunctionDescriptions.back();
304   }
305 
306   FuncDesc->Function = &Function;
307   Function.disambiguateJumpTables(AllocId);
308   Function.deleteConservativeEdges();
309 
310   std::unordered_map<const BinaryBasicBlock *, uint32_t> BBToID;
311   uint32_t Id = 0;
312   for (auto BBI = Function.begin(); BBI != Function.end(); ++BBI) {
313     BBToID[&*BBI] = Id++;
314   }
315   std::unordered_set<const BinaryBasicBlock *> VisitedSet;
316   // DFS to establish edges we will use for a spanning tree. Edges in the
317   // spanning tree can be instrumentation-free since their count can be
318   // inferred by solving flow equations on a bottom-up traversal of the tree.
319   // Exit basic blocks are always instrumented so we start the traversal with
320   // a minimum number of defined variables to make the equation solvable.
321   std::stack<std::pair<const BinaryBasicBlock *, BinaryBasicBlock *>> Stack;
322   std::unordered_map<const BinaryBasicBlock *,
323                      std::set<const BinaryBasicBlock *>>
324       STOutSet;
325   for (auto BBI = Function.layout_rbegin(); BBI != Function.layout_rend();
326        ++BBI) {
327     if ((*BBI)->isEntryPoint() || (*BBI)->isLandingPad()) {
328       Stack.push(std::make_pair(nullptr, *BBI));
329       if (opts::InstrumentCalls && (*BBI)->isEntryPoint()) {
330         EntryNode E;
331         E.Node = BBToID[&**BBI];
332         E.Address = (*BBI)->getInputOffset();
333         FuncDesc->EntryNodes.emplace_back(E);
334         createIndCallTargetDescription(Function, (*BBI)->getInputOffset());
335       }
336     }
337   }
338 
339   // Modified version of BinaryFunction::dfs() to build a spanning tree
340   if (!opts::ConservativeInstrumentation) {
341     while (!Stack.empty()) {
342       BinaryBasicBlock *BB;
343       const BinaryBasicBlock *Pred;
344       std::tie(Pred, BB) = Stack.top();
345       Stack.pop();
346       if (VisitedSet.find(BB) != VisitedSet.end())
347         continue;
348 
349       VisitedSet.insert(BB);
350       if (Pred)
351         STOutSet[Pred].insert(BB);
352 
353       for (BinaryBasicBlock *SuccBB : BB->successors())
354         Stack.push(std::make_pair(BB, SuccBB));
355     }
356   }
357 
358   // Determine whether this is a leaf function, which needs special
359   // instructions to protect the red zone
360   bool IsLeafFunction = true;
361   DenseSet<const BinaryBasicBlock *> InvokeBlocks;
362   for (auto BBI = Function.begin(), BBE = Function.end(); BBI != BBE; ++BBI) {
363     for (auto I = BBI->begin(), E = BBI->end(); I != E; ++I) {
364       if (BC.MIB->isCall(*I)) {
365         if (BC.MIB->isInvoke(*I))
366           InvokeBlocks.insert(&*BBI);
367         IsLeafFunction = false;
368       }
369     }
370   }
371 
372   for (auto BBI = Function.begin(), BBE = Function.end(); BBI != BBE; ++BBI) {
373     BinaryBasicBlock &BB = *BBI;
374     bool HasUnconditionalBranch = false;
375     bool HasJumpTable = false;
376     bool IsInvokeBlock = InvokeBlocks.count(&BB) > 0;
377 
378     for (auto I = BB.begin(); I != BB.end(); ++I) {
379       const MCInst &Inst = *I;
380       if (!BC.MIB->getOffset(Inst))
381         continue;
382 
383       const bool IsJumpTable = Function.getJumpTable(Inst);
384       if (IsJumpTable)
385         HasJumpTable = true;
386       else if (BC.MIB->isUnconditionalBranch(Inst))
387         HasUnconditionalBranch = true;
388       else if ((!BC.MIB->isCall(Inst) && !BC.MIB->isConditionalBranch(Inst)) ||
389                BC.MIB->isUnsupportedBranch(Inst.getOpcode()))
390         continue;
391 
392       const uint32_t FromOffset = *BC.MIB->getOffset(Inst);
393       const MCSymbol *Target = BC.MIB->getTargetSymbol(Inst);
394       BinaryBasicBlock *TargetBB = Function.getBasicBlockForLabel(Target);
395       uint32_t ToOffset = TargetBB ? TargetBB->getInputOffset() : 0;
396       BinaryFunction *TargetFunc =
397           TargetBB ? &Function : BC.getFunctionForSymbol(Target);
398       if (TargetFunc && BC.MIB->isCall(Inst)) {
399         if (opts::InstrumentCalls) {
400           const BinaryBasicBlock *ForeignBB =
401               TargetFunc->getBasicBlockForLabel(Target);
402           if (ForeignBB)
403             ToOffset = ForeignBB->getInputOffset();
404           instrumentOneTarget(SplitWorklist, SplitInstrs, I, Function, BB,
405                               FromOffset, *TargetFunc, TargetBB, ToOffset,
406                               IsLeafFunction, IsInvokeBlock, FuncDesc,
407                               BBToID[&BB]);
408         }
409         continue;
410       }
411       if (TargetFunc) {
412         // Do not instrument edges in the spanning tree
413         if (STOutSet[&BB].find(TargetBB) != STOutSet[&BB].end()) {
414           auto L = BC.scopeLock();
415           createEdgeDescription(*FuncDesc, Function, FromOffset, BBToID[&BB],
416                                 Function, ToOffset, BBToID[TargetBB],
417                                 /*Instrumented=*/false);
418           continue;
419         }
420         instrumentOneTarget(SplitWorklist, SplitInstrs, I, Function, BB,
421                             FromOffset, *TargetFunc, TargetBB, ToOffset,
422                             IsLeafFunction, IsInvokeBlock, FuncDesc,
423                             BBToID[&BB], BBToID[TargetBB]);
424         continue;
425       }
426 
427       if (IsJumpTable) {
428         for (BinaryBasicBlock *&Succ : BB.successors()) {
429           // Do not instrument edges in the spanning tree
430           if (STOutSet[&BB].find(&*Succ) != STOutSet[&BB].end()) {
431             auto L = BC.scopeLock();
432             createEdgeDescription(*FuncDesc, Function, FromOffset, BBToID[&BB],
433                                   Function, Succ->getInputOffset(),
434                                   BBToID[&*Succ], /*Instrumented=*/false);
435             continue;
436           }
437           instrumentOneTarget(
438               SplitWorklist, SplitInstrs, I, Function, BB, FromOffset, Function,
439               &*Succ, Succ->getInputOffset(), IsLeafFunction, IsInvokeBlock,
440               FuncDesc, BBToID[&BB], BBToID[&*Succ]);
441         }
442         continue;
443       }
444 
445       // Handle indirect calls -- could be direct calls with unknown targets
446       // or secondary entry points of known functions, so check it is indirect
447       // to be sure.
448       if (opts::InstrumentCalls && BC.MIB->isIndirectCall(*I))
449         instrumentIndirectTarget(BB, I, Function, FromOffset);
450 
451     } // End of instructions loop
452 
453     // Instrument fallthroughs (when the direct jump instruction is missing)
454     if (!HasUnconditionalBranch && !HasJumpTable && BB.succ_size() > 0 &&
455         BB.size() > 0) {
456       BinaryBasicBlock *FTBB = BB.getFallthrough();
457       assert(FTBB && "expected valid fall-through basic block");
458       auto I = BB.begin();
459       auto LastInstr = BB.end();
460       --LastInstr;
461       while (LastInstr != I && BC.MIB->isPseudo(*LastInstr))
462         --LastInstr;
463       uint32_t FromOffset = 0;
464       // The last instruction in the BB should have an annotation, except
465       // if it was branching to the end of the function as a result of
466       // __builtin_unreachable(), in which case it was deleted by fixBranches.
467       // Ignore this case. FIXME: force fixBranches() to preserve the offset.
468       if (!BC.MIB->getOffset(*LastInstr))
469         continue;
470       FromOffset = *BC.MIB->getOffset(*LastInstr);
471 
472       // Do not instrument edges in the spanning tree
473       if (STOutSet[&BB].find(FTBB) != STOutSet[&BB].end()) {
474         auto L = BC.scopeLock();
475         createEdgeDescription(*FuncDesc, Function, FromOffset, BBToID[&BB],
476                               Function, FTBB->getInputOffset(), BBToID[FTBB],
477                               /*Instrumented=*/false);
478         continue;
479       }
480       instrumentOneTarget(SplitWorklist, SplitInstrs, I, Function, BB,
481                           FromOffset, Function, FTBB, FTBB->getInputOffset(),
482                           IsLeafFunction, IsInvokeBlock, FuncDesc, BBToID[&BB],
483                           BBToID[FTBB]);
484     }
485   } // End of BBs loop
486 
487   // Instrument spanning tree leaves
488   if (!opts::ConservativeInstrumentation) {
489     for (auto BBI = Function.begin(), BBE = Function.end(); BBI != BBE; ++BBI) {
490       BinaryBasicBlock &BB = *BBI;
491       if (STOutSet[&BB].size() == 0)
492         instrumentLeafNode(BB, BB.begin(), IsLeafFunction, *FuncDesc,
493                            BBToID[&BB]);
494     }
495   }
496 
497   // Consume list of critical edges: split them and add instrumentation to the
498   // newly created BBs
499   auto Iter = SplitInstrs.begin();
500   for (std::pair<BinaryBasicBlock *, BinaryBasicBlock *> &BBPair :
501        SplitWorklist) {
502     BinaryBasicBlock *NewBB = Function.splitEdge(BBPair.first, BBPair.second);
503     NewBB->addInstructions(Iter->begin(), Iter->end());
504     ++Iter;
505   }
506 
507   // Unused now
508   FuncDesc->EdgesSet.clear();
509 }
510 
511 void Instrumentation::runOnFunctions(BinaryContext &BC) {
512   if (!BC.isX86())
513     return;
514 
515   const unsigned Flags = BinarySection::getFlags(/*IsReadOnly=*/false,
516                                                  /*IsText=*/false,
517                                                  /*IsAllocatable=*/true);
518   BC.registerOrUpdateSection(".bolt.instr.counters", ELF::SHT_PROGBITS, Flags,
519                              nullptr, 0, 1);
520 
521   BC.registerOrUpdateNoteSection(".bolt.instr.tables", nullptr, 0,
522                                  /*Alignment=*/1,
523                                  /*IsReadOnly=*/true, ELF::SHT_NOTE);
524 
525   Summary->IndCallCounterFuncPtr =
526       BC.Ctx->getOrCreateSymbol("__bolt_ind_call_counter_func_pointer");
527   Summary->IndTailCallCounterFuncPtr =
528       BC.Ctx->getOrCreateSymbol("__bolt_ind_tailcall_counter_func_pointer");
529 
530   createAuxiliaryFunctions(BC);
531 
532   ParallelUtilities::PredicateTy SkipPredicate = [&](const BinaryFunction &BF) {
533     return (!BF.isSimple() || BF.isIgnored() ||
534             (opts::InstrumentHotOnly && !BF.getKnownExecutionCount()));
535   };
536 
537   ParallelUtilities::WorkFuncWithAllocTy WorkFun =
538       [&](BinaryFunction &BF, MCPlusBuilder::AllocatorIdTy AllocatorId) {
539         instrumentFunction(BF, AllocatorId);
540       };
541 
542   ParallelUtilities::runOnEachFunctionWithUniqueAllocId(
543       BC, ParallelUtilities::SchedulingPolicy::SP_INST_QUADRATIC, WorkFun,
544       SkipPredicate, "instrumentation", /* ForceSequential=*/true);
545 
546   if (BC.isMachO()) {
547     if (BC.StartFunctionAddress) {
548       BinaryFunction *Main =
549           BC.getBinaryFunctionAtAddress(*BC.StartFunctionAddress);
550       assert(Main && "Entry point function not found");
551       BinaryBasicBlock &BB = Main->front();
552 
553       ErrorOr<BinarySection &> SetupSection =
554           BC.getUniqueSectionByName("I__setup");
555       if (!SetupSection) {
556         llvm::errs() << "Cannot find I__setup section\n";
557         exit(1);
558       }
559       MCSymbol *Target = BC.registerNameAtAddress(
560           "__bolt_instr_setup", SetupSection->getAddress(), 0, 0);
561       MCInst NewInst;
562       BC.MIB->createCall(NewInst, Target, BC.Ctx.get());
563       BB.insertInstruction(BB.begin(), std::move(NewInst));
564     } else {
565       llvm::errs() << "BOLT-WARNING: Entry point not found\n";
566     }
567 
568     if (BinaryData *BD = BC.getBinaryDataByName("___GLOBAL_init_65535/1")) {
569       BinaryFunction *Ctor = BC.getBinaryFunctionAtAddress(BD->getAddress());
570       assert(Ctor && "___GLOBAL_init_65535 function not found");
571       BinaryBasicBlock &BB = Ctor->front();
572       ErrorOr<BinarySection &> FiniSection =
573           BC.getUniqueSectionByName("I__fini");
574       if (!FiniSection) {
575         llvm::errs() << "Cannot find I__fini section\n";
576         exit(1);
577       }
578       MCSymbol *Target = BC.registerNameAtAddress(
579           "__bolt_instr_fini", FiniSection->getAddress(), 0, 0);
580       auto IsLEA = [&BC](const MCInst &Inst) { return BC.MIB->isLEA64r(Inst); };
581       const auto LEA =
582           std::find_if(std::next(std::find_if(BB.rbegin(), BB.rend(), IsLEA)),
583                        BB.rend(), IsLEA);
584       LEA->getOperand(4).setExpr(
585           MCSymbolRefExpr::create(Target, MCSymbolRefExpr::VK_None, *BC.Ctx));
586     } else {
587       llvm::errs() << "BOLT-WARNING: ___GLOBAL_init_65535 not found\n";
588     }
589   }
590 
591   setupRuntimeLibrary(BC);
592 }
593 
594 void Instrumentation::createAuxiliaryFunctions(BinaryContext &BC) {
595   auto createSimpleFunction =
596       [&](StringRef Title, InstructionListType Instrs) -> BinaryFunction * {
597     BinaryFunction *Func = BC.createInjectedBinaryFunction(std::string(Title));
598 
599     std::vector<std::unique_ptr<BinaryBasicBlock>> BBs;
600     BBs.emplace_back(
601         Func->createBasicBlock(BinaryBasicBlock::INVALID_OFFSET, nullptr));
602     BBs.back()->addInstructions(Instrs.begin(), Instrs.end());
603     BBs.back()->setCFIState(0);
604     Func->insertBasicBlocks(nullptr, std::move(BBs),
605                             /*UpdateLayout=*/true,
606                             /*UpdateCFIState=*/false);
607     Func->updateState(BinaryFunction::State::CFG_Finalized);
608     return Func;
609   };
610 
611   // Here we are creating a set of functions to handle BB entry/exit.
612   // IndCallHandlerExitBB contains instructions to finish handling traffic to an
613   // indirect call. We pass it to createInstrumentedIndCallHandlerEntryBB(),
614   // which will check if a pointer to runtime library traffic accounting
615   // function was initialized (it is done during initialization of runtime
616   // library). If it is so - calls it. Then this routine returns to normal
617   // execution by jumping to exit BB.
618   BinaryFunction *IndCallHandlerExitBB =
619       createSimpleFunction("__bolt_instr_ind_call_handler",
620                            BC.MIB->createInstrumentedIndCallHandlerExitBB());
621 
622   IndCallHandlerExitBBFunction =
623       createSimpleFunction("__bolt_instr_ind_call_handler_func",
624                            BC.MIB->createInstrumentedIndCallHandlerEntryBB(
625                                Summary->IndCallCounterFuncPtr,
626                                IndCallHandlerExitBB->getSymbol(), &*BC.Ctx));
627 
628   BinaryFunction *IndTailCallHandlerExitBB = createSimpleFunction(
629       "__bolt_instr_ind_tail_call_handler",
630       BC.MIB->createInstrumentedIndTailCallHandlerExitBB());
631 
632   IndTailCallHandlerExitBBFunction = createSimpleFunction(
633       "__bolt_instr_ind_tailcall_handler_func",
634       BC.MIB->createInstrumentedIndCallHandlerEntryBB(
635           Summary->IndTailCallCounterFuncPtr,
636           IndTailCallHandlerExitBB->getSymbol(), &*BC.Ctx));
637 
638   createSimpleFunction("__bolt_num_counters_getter",
639                        BC.MIB->createNumCountersGetter(BC.Ctx.get()));
640   createSimpleFunction("__bolt_instr_locations_getter",
641                        BC.MIB->createInstrLocationsGetter(BC.Ctx.get()));
642   createSimpleFunction("__bolt_instr_tables_getter",
643                        BC.MIB->createInstrTablesGetter(BC.Ctx.get()));
644   createSimpleFunction("__bolt_instr_num_funcs_getter",
645                        BC.MIB->createInstrNumFuncsGetter(BC.Ctx.get()));
646 
647   if (BC.isELF()) {
648     if (BC.StartFunctionAddress) {
649       BinaryFunction *Start =
650           BC.getBinaryFunctionAtAddress(*BC.StartFunctionAddress);
651       assert(Start && "Entry point function not found");
652       const MCSymbol *StartSym = Start->getSymbol();
653       createSimpleFunction(
654           "__bolt_start_trampoline",
655           BC.MIB->createSymbolTrampoline(StartSym, BC.Ctx.get()));
656     }
657     if (BC.FiniFunctionAddress) {
658       BinaryFunction *Fini =
659           BC.getBinaryFunctionAtAddress(*BC.FiniFunctionAddress);
660       assert(Fini && "Finalization function not found");
661       const MCSymbol *FiniSym = Fini->getSymbol();
662       createSimpleFunction(
663           "__bolt_fini_trampoline",
664           BC.MIB->createSymbolTrampoline(FiniSym, BC.Ctx.get()));
665     } else {
666       // Create dummy return function for trampoline to avoid issues
667       // with unknown symbol in runtime library. E.g. for static PIE
668       // executable
669       createSimpleFunction("__bolt_fini_trampoline",
670                            BC.MIB->createDummyReturnFunction(BC.Ctx.get()));
671     }
672   }
673 }
674 
675 void Instrumentation::setupRuntimeLibrary(BinaryContext &BC) {
676   uint32_t FuncDescSize = Summary->getFDSize();
677 
678   outs() << "BOLT-INSTRUMENTER: Number of indirect call site descriptors: "
679          << Summary->IndCallDescriptions.size() << "\n";
680   outs() << "BOLT-INSTRUMENTER: Number of indirect call target descriptors: "
681          << Summary->IndCallTargetDescriptions.size() << "\n";
682   outs() << "BOLT-INSTRUMENTER: Number of function descriptors: "
683          << Summary->FunctionDescriptions.size() << "\n";
684   outs() << "BOLT-INSTRUMENTER: Number of branch counters: " << BranchCounters
685          << "\n";
686   outs() << "BOLT-INSTRUMENTER: Number of ST leaf node counters: "
687          << LeafNodeCounters << "\n";
688   outs() << "BOLT-INSTRUMENTER: Number of direct call counters: "
689          << DirectCallCounters << "\n";
690   outs() << "BOLT-INSTRUMENTER: Total number of counters: "
691          << Summary->Counters.size() << "\n";
692   outs() << "BOLT-INSTRUMENTER: Total size of counters: "
693          << (Summary->Counters.size() * 8) << " bytes (static alloc memory)\n";
694   outs() << "BOLT-INSTRUMENTER: Total size of string table emitted: "
695          << Summary->StringTable.size() << " bytes in file\n";
696   outs() << "BOLT-INSTRUMENTER: Total size of descriptors: "
697          << (FuncDescSize +
698              Summary->IndCallDescriptions.size() * sizeof(IndCallDescription) +
699              Summary->IndCallTargetDescriptions.size() *
700                  sizeof(IndCallTargetDescription))
701          << " bytes in file\n";
702   outs() << "BOLT-INSTRUMENTER: Profile will be saved to file "
703          << opts::InstrumentationFilename << "\n";
704 
705   InstrumentationRuntimeLibrary *RtLibrary =
706       static_cast<InstrumentationRuntimeLibrary *>(BC.getRuntimeLibrary());
707   assert(RtLibrary && "instrumentation runtime library object must be set");
708   RtLibrary->setSummary(std::move(Summary));
709 }
710 } // namespace bolt
711 } // namespace llvm
712