xref: /llvm-project/bolt/lib/Passes/Instrumentation.cpp (revision 23c834092eb4d52a606144960a253391de10e2ef)
1 //===- bolt/Passes/Instrumentation.cpp ------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the Instrumentation class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "bolt/Passes/Instrumentation.h"
14 #include "bolt/Core/ParallelUtilities.h"
15 #include "bolt/RuntimeLibs/InstrumentationRuntimeLibrary.h"
16 #include "bolt/Utils/CommandLineOpts.h"
17 #include "bolt/Utils/Utils.h"
18 #include "llvm/Support/CommandLine.h"
19 #include "llvm/Support/RWMutex.h"
20 #include <queue>
21 #include <stack>
22 #include <unordered_set>
23 
24 #define DEBUG_TYPE "bolt-instrumentation"
25 
26 using namespace llvm;
27 
28 namespace opts {
29 extern cl::OptionCategory BoltInstrCategory;
30 
31 cl::opt<std::string> InstrumentationFilename(
32     "instrumentation-file",
33     cl::desc("file name where instrumented profile will be saved (default: "
34              "/tmp/prof.fdata)"),
35     cl::init("/tmp/prof.fdata"), cl::Optional, cl::cat(BoltInstrCategory));
36 
37 cl::opt<std::string> InstrumentationBinpath(
38     "instrumentation-binpath",
39     cl::desc("path to instrumented binary in case if /proc/self/map_files "
40              "is not accessible due to access restriction issues"),
41     cl::Optional, cl::cat(BoltInstrCategory));
42 
43 cl::opt<bool> InstrumentationFileAppendPID(
44     "instrumentation-file-append-pid",
45     cl::desc("append PID to saved profile file name (default: false)"),
46     cl::init(false), cl::Optional, cl::cat(BoltInstrCategory));
47 
48 cl::opt<bool> ConservativeInstrumentation(
49     "conservative-instrumentation",
50     cl::desc("disable instrumentation optimizations that sacrifice profile "
51              "accuracy (for debugging, default: false)"),
52     cl::init(false), cl::Optional, cl::cat(BoltInstrCategory));
53 
54 cl::opt<uint32_t> InstrumentationSleepTime(
55     "instrumentation-sleep-time",
56     cl::desc("interval between profile writes (default: 0 = write only at "
57              "program end).  This is useful for service workloads when you "
58              "want to dump profile every X minutes or if you are killing the "
59              "program and the profile is not being dumped at the end."),
60     cl::init(0), cl::Optional, cl::cat(BoltInstrCategory));
61 
62 cl::opt<bool> InstrumentationNoCountersClear(
63     "instrumentation-no-counters-clear",
64     cl::desc("Don't clear counters across dumps "
65              "(use with instrumentation-sleep-time option)"),
66     cl::init(false), cl::Optional, cl::cat(BoltInstrCategory));
67 
68 cl::opt<bool> InstrumentationWaitForks(
69     "instrumentation-wait-forks",
70     cl::desc("Wait until all forks of instrumented process will finish "
71              "(use with instrumentation-sleep-time option)"),
72     cl::init(false), cl::Optional, cl::cat(BoltInstrCategory));
73 
74 cl::opt<bool>
75     InstrumentHotOnly("instrument-hot-only",
76                       cl::desc("only insert instrumentation on hot functions "
77                                "(needs profile, default: false)"),
78                       cl::init(false), cl::Optional,
79                       cl::cat(BoltInstrCategory));
80 
81 cl::opt<bool> InstrumentCalls("instrument-calls",
82                               cl::desc("record profile for inter-function "
83                                        "control flow activity (default: true)"),
84                               cl::init(true), cl::Optional,
85                               cl::cat(BoltInstrCategory));
86 } // namespace opts
87 
88 namespace llvm {
89 namespace bolt {
90 
91 static bool hasAArch64ExclusiveMemop(
92     BinaryFunction &Function,
93     std::unordered_set<const BinaryBasicBlock *> &BBToSkip) {
94   // FIXME ARMv8-a architecture reference manual says that software must avoid
95   // having any explicit memory accesses between exclusive load and associated
96   // store instruction. So for now skip instrumentation for basic blocks that
97   // have these instructions, since it might lead to runtime deadlock.
98   BinaryContext &BC = Function.getBinaryContext();
99   std::queue<std::pair<BinaryBasicBlock *, bool>> BBQueue; // {BB, isLoad}
100   std::unordered_set<BinaryBasicBlock *> Visited;
101 
102   if (Function.getLayout().block_begin() == Function.getLayout().block_end())
103     return 0;
104 
105   BinaryBasicBlock *BBfirst = *Function.getLayout().block_begin();
106   BBQueue.push({BBfirst, false});
107 
108   while (!BBQueue.empty()) {
109     BinaryBasicBlock *BB = BBQueue.front().first;
110     bool IsLoad = BBQueue.front().second;
111     BBQueue.pop();
112     if (!Visited.insert(BB).second)
113       continue;
114 
115     for (const MCInst &Inst : *BB) {
116       // Two loads one after another - skip whole function
117       if (BC.MIB->isAArch64ExclusiveLoad(Inst) && IsLoad) {
118         if (opts::Verbosity >= 2) {
119           outs() << "BOLT-INSTRUMENTER: function " << Function.getPrintName()
120                  << " has two exclusive loads. Ignoring the function.\n";
121         }
122         return true;
123       }
124 
125       if (BC.MIB->isAArch64ExclusiveLoad(Inst))
126         IsLoad = true;
127 
128       if (IsLoad && BBToSkip.insert(BB).second) {
129         if (opts::Verbosity >= 2) {
130           outs() << "BOLT-INSTRUMENTER: skip BB " << BB->getName()
131                  << " due to exclusive instruction in function "
132                  << Function.getPrintName() << "\n";
133         }
134       }
135 
136       if (!IsLoad && BC.MIB->isAArch64ExclusiveStore(Inst)) {
137         if (opts::Verbosity >= 2) {
138           outs() << "BOLT-INSTRUMENTER: function " << Function.getPrintName()
139                  << " has exclusive store without corresponding load. Ignoring "
140                     "the function.\n";
141         }
142         return true;
143       }
144 
145       if (IsLoad && (BC.MIB->isAArch64ExclusiveStore(Inst) ||
146                      BC.MIB->isAArch64ExclusiveClear(Inst)))
147         IsLoad = false;
148     }
149 
150     if (IsLoad && BB->succ_size() == 0) {
151       if (opts::Verbosity >= 2) {
152         outs()
153             << "BOLT-INSTRUMENTER: function " << Function.getPrintName()
154             << " has exclusive load in trailing BB. Ignoring the function.\n";
155       }
156       return true;
157     }
158 
159     for (BinaryBasicBlock *BBS : BB->successors())
160       BBQueue.push({BBS, IsLoad});
161   }
162 
163   if (BBToSkip.size() == Visited.size()) {
164     if (opts::Verbosity >= 2) {
165       outs() << "BOLT-INSTRUMENTER: all BBs are marked with true. Ignoring the "
166                 "function "
167              << Function.getPrintName() << "\n";
168     }
169     return true;
170   }
171 
172   return false;
173 }
174 
175 uint32_t Instrumentation::getFunctionNameIndex(const BinaryFunction &Function) {
176   auto Iter = FuncToStringIdx.find(&Function);
177   if (Iter != FuncToStringIdx.end())
178     return Iter->second;
179   size_t Idx = Summary->StringTable.size();
180   FuncToStringIdx.emplace(std::make_pair(&Function, Idx));
181   Summary->StringTable.append(getEscapedName(Function.getOneName()));
182   Summary->StringTable.append(1, '\0');
183   return Idx;
184 }
185 
186 bool Instrumentation::createCallDescription(FunctionDescription &FuncDesc,
187                                             const BinaryFunction &FromFunction,
188                                             uint32_t From, uint32_t FromNodeID,
189                                             const BinaryFunction &ToFunction,
190                                             uint32_t To, bool IsInvoke) {
191   CallDescription CD;
192   // Ordinarily, we don't augment direct calls with an explicit counter, except
193   // when forced to do so or when we know this callee could be throwing
194   // exceptions, in which case there is no other way to accurately record its
195   // frequency.
196   bool ForceInstrumentation = opts::ConservativeInstrumentation || IsInvoke;
197   CD.FromLoc.FuncString = getFunctionNameIndex(FromFunction);
198   CD.FromLoc.Offset = From;
199   CD.FromNode = FromNodeID;
200   CD.Target = &ToFunction;
201   CD.ToLoc.FuncString = getFunctionNameIndex(ToFunction);
202   CD.ToLoc.Offset = To;
203   CD.Counter = ForceInstrumentation ? Summary->Counters.size() : 0xffffffff;
204   if (ForceInstrumentation)
205     ++DirectCallCounters;
206   FuncDesc.Calls.emplace_back(CD);
207   return ForceInstrumentation;
208 }
209 
210 void Instrumentation::createIndCallDescription(
211     const BinaryFunction &FromFunction, uint32_t From) {
212   IndCallDescription ICD;
213   ICD.FromLoc.FuncString = getFunctionNameIndex(FromFunction);
214   ICD.FromLoc.Offset = From;
215   Summary->IndCallDescriptions.emplace_back(ICD);
216 }
217 
218 void Instrumentation::createIndCallTargetDescription(
219     const BinaryFunction &ToFunction, uint32_t To) {
220   IndCallTargetDescription ICD;
221   ICD.ToLoc.FuncString = getFunctionNameIndex(ToFunction);
222   ICD.ToLoc.Offset = To;
223   ICD.Target = &ToFunction;
224   Summary->IndCallTargetDescriptions.emplace_back(ICD);
225 }
226 
227 bool Instrumentation::createEdgeDescription(FunctionDescription &FuncDesc,
228                                             const BinaryFunction &FromFunction,
229                                             uint32_t From, uint32_t FromNodeID,
230                                             const BinaryFunction &ToFunction,
231                                             uint32_t To, uint32_t ToNodeID,
232                                             bool Instrumented) {
233   EdgeDescription ED;
234   auto Result = FuncDesc.EdgesSet.insert(std::make_pair(FromNodeID, ToNodeID));
235   // Avoid creating duplicated edge descriptions. This happens in CFGs where a
236   // block jumps to its fall-through.
237   if (Result.second == false)
238     return false;
239   ED.FromLoc.FuncString = getFunctionNameIndex(FromFunction);
240   ED.FromLoc.Offset = From;
241   ED.FromNode = FromNodeID;
242   ED.ToLoc.FuncString = getFunctionNameIndex(ToFunction);
243   ED.ToLoc.Offset = To;
244   ED.ToNode = ToNodeID;
245   ED.Counter = Instrumented ? Summary->Counters.size() : 0xffffffff;
246   if (Instrumented)
247     ++BranchCounters;
248   FuncDesc.Edges.emplace_back(ED);
249   return Instrumented;
250 }
251 
252 void Instrumentation::createLeafNodeDescription(FunctionDescription &FuncDesc,
253                                                 uint32_t Node) {
254   InstrumentedNode IN;
255   IN.Node = Node;
256   IN.Counter = Summary->Counters.size();
257   ++LeafNodeCounters;
258   FuncDesc.LeafNodes.emplace_back(IN);
259 }
260 
261 InstructionListType
262 Instrumentation::createInstrumentationSnippet(BinaryContext &BC, bool IsLeaf) {
263   auto L = BC.scopeLock();
264   MCSymbol *Label = BC.Ctx->createNamedTempSymbol("InstrEntry");
265   Summary->Counters.emplace_back(Label);
266   return BC.MIB->createInstrIncMemory(Label, BC.Ctx.get(), IsLeaf,
267                                       BC.AsmInfo->getCodePointerSize());
268 }
269 
270 // Helper instruction sequence insertion function
271 static BinaryBasicBlock::iterator
272 insertInstructions(InstructionListType &Instrs, BinaryBasicBlock &BB,
273                    BinaryBasicBlock::iterator Iter) {
274   for (MCInst &NewInst : Instrs) {
275     Iter = BB.insertInstruction(Iter, NewInst);
276     ++Iter;
277   }
278   return Iter;
279 }
280 
281 void Instrumentation::instrumentLeafNode(BinaryBasicBlock &BB,
282                                          BinaryBasicBlock::iterator Iter,
283                                          bool IsLeaf,
284                                          FunctionDescription &FuncDesc,
285                                          uint32_t Node) {
286   createLeafNodeDescription(FuncDesc, Node);
287   InstructionListType CounterInstrs = createInstrumentationSnippet(
288       BB.getFunction()->getBinaryContext(), IsLeaf);
289   insertInstructions(CounterInstrs, BB, Iter);
290 }
291 
292 void Instrumentation::instrumentIndirectTarget(BinaryBasicBlock &BB,
293                                                BinaryBasicBlock::iterator &Iter,
294                                                BinaryFunction &FromFunction,
295                                                uint32_t From) {
296   auto L = FromFunction.getBinaryContext().scopeLock();
297   const size_t IndCallSiteID = Summary->IndCallDescriptions.size();
298   createIndCallDescription(FromFunction, From);
299 
300   BinaryContext &BC = FromFunction.getBinaryContext();
301   bool IsTailCall = BC.MIB->isTailCall(*Iter);
302   InstructionListType CounterInstrs = BC.MIB->createInstrumentedIndirectCall(
303       std::move(*Iter),
304       IsTailCall ? IndTailCallHandlerExitBBFunction->getSymbol()
305                  : IndCallHandlerExitBBFunction->getSymbol(),
306       IndCallSiteID, &*BC.Ctx);
307 
308   Iter = BB.eraseInstruction(Iter);
309   Iter = insertInstructions(CounterInstrs, BB, Iter);
310   --Iter;
311 }
312 
313 bool Instrumentation::instrumentOneTarget(
314     SplitWorklistTy &SplitWorklist, SplitInstrsTy &SplitInstrs,
315     BinaryBasicBlock::iterator &Iter, BinaryFunction &FromFunction,
316     BinaryBasicBlock &FromBB, uint32_t From, BinaryFunction &ToFunc,
317     BinaryBasicBlock *TargetBB, uint32_t ToOffset, bool IsLeaf, bool IsInvoke,
318     FunctionDescription *FuncDesc, uint32_t FromNodeID, uint32_t ToNodeID) {
319   BinaryContext &BC = FromFunction.getBinaryContext();
320   {
321     auto L = BC.scopeLock();
322     bool Created = true;
323     if (!TargetBB)
324       Created = createCallDescription(*FuncDesc, FromFunction, From, FromNodeID,
325                                       ToFunc, ToOffset, IsInvoke);
326     else
327       Created = createEdgeDescription(*FuncDesc, FromFunction, From, FromNodeID,
328                                       ToFunc, ToOffset, ToNodeID,
329                                       /*Instrumented=*/true);
330     if (!Created)
331       return false;
332   }
333 
334   InstructionListType CounterInstrs = createInstrumentationSnippet(BC, IsLeaf);
335 
336   const MCInst &Inst = *Iter;
337   if (BC.MIB->isCall(Inst)) {
338     // This code handles both
339     // - (regular) inter-function calls (cross-function control transfer),
340     // - (rare) intra-function calls (function-local control transfer)
341     Iter = insertInstructions(CounterInstrs, FromBB, Iter);
342     return true;
343   }
344 
345   if (!TargetBB || !FuncDesc)
346     return false;
347 
348   // Indirect branch, conditional branches or fall-throughs
349   // Regular cond branch, put counter at start of target block
350   //
351   // N.B.: (FromBB != TargetBBs) checks below handle conditional jumps where
352   // we can't put the instrumentation counter in this block because not all
353   // paths that reach it at this point will be taken and going to the target.
354   if (TargetBB->pred_size() == 1 && &FromBB != TargetBB &&
355       !TargetBB->isEntryPoint()) {
356     insertInstructions(CounterInstrs, *TargetBB, TargetBB->begin());
357     return true;
358   }
359   if (FromBB.succ_size() == 1 && &FromBB != TargetBB) {
360     Iter = insertInstructions(CounterInstrs, FromBB, Iter);
361     return true;
362   }
363   // Critical edge, create BB and put counter there
364   SplitWorklist.emplace_back(&FromBB, TargetBB);
365   SplitInstrs.emplace_back(std::move(CounterInstrs));
366   return true;
367 }
368 
369 void Instrumentation::instrumentFunction(BinaryFunction &Function,
370                                          MCPlusBuilder::AllocatorIdTy AllocId) {
371   if (Function.hasUnknownControlFlow())
372     return;
373 
374   BinaryContext &BC = Function.getBinaryContext();
375   if (BC.isMachO() && Function.hasName("___GLOBAL_init_65535/1"))
376     return;
377 
378   std::unordered_set<const BinaryBasicBlock *> BBToSkip;
379   if (BC.isAArch64() && hasAArch64ExclusiveMemop(Function, BBToSkip))
380     return;
381 
382   SplitWorklistTy SplitWorklist;
383   SplitInstrsTy SplitInstrs;
384 
385   FunctionDescription *FuncDesc = nullptr;
386   {
387     std::unique_lock<llvm::sys::RWMutex> L(FDMutex);
388     Summary->FunctionDescriptions.emplace_back();
389     FuncDesc = &Summary->FunctionDescriptions.back();
390   }
391 
392   FuncDesc->Function = &Function;
393   Function.disambiguateJumpTables(AllocId);
394   Function.deleteConservativeEdges();
395 
396   std::unordered_map<const BinaryBasicBlock *, uint32_t> BBToID;
397   uint32_t Id = 0;
398   for (auto BBI = Function.begin(); BBI != Function.end(); ++BBI) {
399     BBToID[&*BBI] = Id++;
400   }
401   std::unordered_set<const BinaryBasicBlock *> VisitedSet;
402   // DFS to establish edges we will use for a spanning tree. Edges in the
403   // spanning tree can be instrumentation-free since their count can be
404   // inferred by solving flow equations on a bottom-up traversal of the tree.
405   // Exit basic blocks are always instrumented so we start the traversal with
406   // a minimum number of defined variables to make the equation solvable.
407   std::stack<std::pair<const BinaryBasicBlock *, BinaryBasicBlock *>> Stack;
408   std::unordered_map<const BinaryBasicBlock *,
409                      std::set<const BinaryBasicBlock *>>
410       STOutSet;
411   for (auto BBI = Function.getLayout().block_rbegin();
412        BBI != Function.getLayout().block_rend(); ++BBI) {
413     if ((*BBI)->isEntryPoint() || (*BBI)->isLandingPad()) {
414       Stack.push(std::make_pair(nullptr, *BBI));
415       if (opts::InstrumentCalls && (*BBI)->isEntryPoint()) {
416         EntryNode E;
417         E.Node = BBToID[&**BBI];
418         E.Address = (*BBI)->getInputOffset();
419         FuncDesc->EntryNodes.emplace_back(E);
420         createIndCallTargetDescription(Function, (*BBI)->getInputOffset());
421       }
422     }
423   }
424 
425   // Modified version of BinaryFunction::dfs() to build a spanning tree
426   if (!opts::ConservativeInstrumentation) {
427     while (!Stack.empty()) {
428       BinaryBasicBlock *BB;
429       const BinaryBasicBlock *Pred;
430       std::tie(Pred, BB) = Stack.top();
431       Stack.pop();
432       if (llvm::is_contained(VisitedSet, BB))
433         continue;
434 
435       VisitedSet.insert(BB);
436       if (Pred)
437         STOutSet[Pred].insert(BB);
438 
439       for (BinaryBasicBlock *SuccBB : BB->successors())
440         Stack.push(std::make_pair(BB, SuccBB));
441     }
442   }
443 
444   // Determine whether this is a leaf function, which needs special
445   // instructions to protect the red zone
446   bool IsLeafFunction = true;
447   DenseSet<const BinaryBasicBlock *> InvokeBlocks;
448   for (const BinaryBasicBlock &BB : Function) {
449     for (const MCInst &Inst : BB) {
450       if (BC.MIB->isCall(Inst)) {
451         if (BC.MIB->isInvoke(Inst))
452           InvokeBlocks.insert(&BB);
453         if (!BC.MIB->isTailCall(Inst))
454           IsLeafFunction = false;
455       }
456     }
457   }
458 
459   for (auto BBI = Function.begin(), BBE = Function.end(); BBI != BBE; ++BBI) {
460     BinaryBasicBlock &BB = *BBI;
461 
462     // Skip BBs with exclusive load/stores
463     if (BBToSkip.find(&BB) != BBToSkip.end())
464       continue;
465 
466     bool HasUnconditionalBranch = false;
467     bool HasJumpTable = false;
468     bool IsInvokeBlock = InvokeBlocks.count(&BB) > 0;
469 
470     for (auto I = BB.begin(); I != BB.end(); ++I) {
471       const MCInst &Inst = *I;
472       if (!BC.MIB->getOffset(Inst))
473         continue;
474 
475       const bool IsJumpTable = Function.getJumpTable(Inst);
476       if (IsJumpTable)
477         HasJumpTable = true;
478       else if (BC.MIB->isUnconditionalBranch(Inst))
479         HasUnconditionalBranch = true;
480       else if (!(BC.MIB->isCall(Inst) || BC.MIB->isConditionalBranch(Inst)))
481         continue;
482 
483       const uint32_t FromOffset = *BC.MIB->getOffset(Inst);
484       const MCSymbol *Target = BC.MIB->getTargetSymbol(Inst);
485       BinaryBasicBlock *TargetBB = Function.getBasicBlockForLabel(Target);
486       uint32_t ToOffset = TargetBB ? TargetBB->getInputOffset() : 0;
487       BinaryFunction *TargetFunc =
488           TargetBB ? &Function : BC.getFunctionForSymbol(Target);
489       if (TargetFunc && BC.MIB->isCall(Inst)) {
490         if (opts::InstrumentCalls) {
491           const BinaryBasicBlock *ForeignBB =
492               TargetFunc->getBasicBlockForLabel(Target);
493           if (ForeignBB)
494             ToOffset = ForeignBB->getInputOffset();
495           instrumentOneTarget(SplitWorklist, SplitInstrs, I, Function, BB,
496                               FromOffset, *TargetFunc, TargetBB, ToOffset,
497                               IsLeafFunction, IsInvokeBlock, FuncDesc,
498                               BBToID[&BB]);
499         }
500         continue;
501       }
502       if (TargetFunc) {
503         // Do not instrument edges in the spanning tree
504         if (llvm::is_contained(STOutSet[&BB], TargetBB)) {
505           auto L = BC.scopeLock();
506           createEdgeDescription(*FuncDesc, Function, FromOffset, BBToID[&BB],
507                                 Function, ToOffset, BBToID[TargetBB],
508                                 /*Instrumented=*/false);
509           continue;
510         }
511         instrumentOneTarget(SplitWorklist, SplitInstrs, I, Function, BB,
512                             FromOffset, *TargetFunc, TargetBB, ToOffset,
513                             IsLeafFunction, IsInvokeBlock, FuncDesc,
514                             BBToID[&BB], BBToID[TargetBB]);
515         continue;
516       }
517 
518       if (IsJumpTable) {
519         for (BinaryBasicBlock *&Succ : BB.successors()) {
520           // Do not instrument edges in the spanning tree
521           if (llvm::is_contained(STOutSet[&BB], &*Succ)) {
522             auto L = BC.scopeLock();
523             createEdgeDescription(*FuncDesc, Function, FromOffset, BBToID[&BB],
524                                   Function, Succ->getInputOffset(),
525                                   BBToID[&*Succ], /*Instrumented=*/false);
526             continue;
527           }
528           instrumentOneTarget(
529               SplitWorklist, SplitInstrs, I, Function, BB, FromOffset, Function,
530               &*Succ, Succ->getInputOffset(), IsLeafFunction, IsInvokeBlock,
531               FuncDesc, BBToID[&BB], BBToID[&*Succ]);
532         }
533         continue;
534       }
535 
536       // Handle indirect calls -- could be direct calls with unknown targets
537       // or secondary entry points of known functions, so check it is indirect
538       // to be sure.
539       if (opts::InstrumentCalls && BC.MIB->isIndirectCall(*I))
540         instrumentIndirectTarget(BB, I, Function, FromOffset);
541 
542     } // End of instructions loop
543 
544     // Instrument fallthroughs (when the direct jump instruction is missing)
545     if (!HasUnconditionalBranch && !HasJumpTable && BB.succ_size() > 0 &&
546         BB.size() > 0) {
547       BinaryBasicBlock *FTBB = BB.getFallthrough();
548       assert(FTBB && "expected valid fall-through basic block");
549       auto I = BB.begin();
550       auto LastInstr = BB.end();
551       --LastInstr;
552       while (LastInstr != I && BC.MIB->isPseudo(*LastInstr))
553         --LastInstr;
554       uint32_t FromOffset = 0;
555       // The last instruction in the BB should have an annotation, except
556       // if it was branching to the end of the function as a result of
557       // __builtin_unreachable(), in which case it was deleted by fixBranches.
558       // Ignore this case. FIXME: force fixBranches() to preserve the offset.
559       if (!BC.MIB->getOffset(*LastInstr))
560         continue;
561       FromOffset = *BC.MIB->getOffset(*LastInstr);
562 
563       // Do not instrument edges in the spanning tree
564       if (llvm::is_contained(STOutSet[&BB], FTBB)) {
565         auto L = BC.scopeLock();
566         createEdgeDescription(*FuncDesc, Function, FromOffset, BBToID[&BB],
567                               Function, FTBB->getInputOffset(), BBToID[FTBB],
568                               /*Instrumented=*/false);
569         continue;
570       }
571       instrumentOneTarget(SplitWorklist, SplitInstrs, I, Function, BB,
572                           FromOffset, Function, FTBB, FTBB->getInputOffset(),
573                           IsLeafFunction, IsInvokeBlock, FuncDesc, BBToID[&BB],
574                           BBToID[FTBB]);
575     }
576   } // End of BBs loop
577 
578   // Instrument spanning tree leaves
579   if (!opts::ConservativeInstrumentation) {
580     for (auto BBI = Function.begin(), BBE = Function.end(); BBI != BBE; ++BBI) {
581       BinaryBasicBlock &BB = *BBI;
582       if (STOutSet[&BB].size() == 0)
583         instrumentLeafNode(BB, BB.begin(), IsLeafFunction, *FuncDesc,
584                            BBToID[&BB]);
585     }
586   }
587 
588   // Consume list of critical edges: split them and add instrumentation to the
589   // newly created BBs
590   auto Iter = SplitInstrs.begin();
591   for (std::pair<BinaryBasicBlock *, BinaryBasicBlock *> &BBPair :
592        SplitWorklist) {
593     BinaryBasicBlock *NewBB = Function.splitEdge(BBPair.first, BBPair.second);
594     NewBB->addInstructions(Iter->begin(), Iter->end());
595     ++Iter;
596   }
597 
598   // Unused now
599   FuncDesc->EdgesSet.clear();
600 }
601 
602 Error Instrumentation::runOnFunctions(BinaryContext &BC) {
603   const unsigned Flags = BinarySection::getFlags(/*IsReadOnly=*/false,
604                                                  /*IsText=*/false,
605                                                  /*IsAllocatable=*/true);
606   BC.registerOrUpdateSection(".bolt.instr.counters", ELF::SHT_PROGBITS, Flags,
607                              nullptr, 0, 1);
608 
609   BC.registerOrUpdateNoteSection(".bolt.instr.tables", nullptr, 0,
610                                  /*Alignment=*/1,
611                                  /*IsReadOnly=*/true, ELF::SHT_NOTE);
612 
613   Summary->IndCallCounterFuncPtr =
614       BC.Ctx->getOrCreateSymbol("__bolt_ind_call_counter_func_pointer");
615   Summary->IndTailCallCounterFuncPtr =
616       BC.Ctx->getOrCreateSymbol("__bolt_ind_tailcall_counter_func_pointer");
617 
618   createAuxiliaryFunctions(BC);
619 
620   ParallelUtilities::PredicateTy SkipPredicate = [&](const BinaryFunction &BF) {
621     return (!BF.isSimple() || BF.isIgnored() ||
622             (opts::InstrumentHotOnly && !BF.getKnownExecutionCount()));
623   };
624 
625   ParallelUtilities::WorkFuncWithAllocTy WorkFun =
626       [&](BinaryFunction &BF, MCPlusBuilder::AllocatorIdTy AllocatorId) {
627         instrumentFunction(BF, AllocatorId);
628       };
629 
630   ParallelUtilities::runOnEachFunctionWithUniqueAllocId(
631       BC, ParallelUtilities::SchedulingPolicy::SP_INST_QUADRATIC, WorkFun,
632       SkipPredicate, "instrumentation", /* ForceSequential=*/true);
633 
634   if (BC.isMachO()) {
635     if (BC.StartFunctionAddress) {
636       BinaryFunction *Main =
637           BC.getBinaryFunctionAtAddress(*BC.StartFunctionAddress);
638       assert(Main && "Entry point function not found");
639       BinaryBasicBlock &BB = Main->front();
640 
641       ErrorOr<BinarySection &> SetupSection =
642           BC.getUniqueSectionByName("I__setup");
643       if (!SetupSection)
644         return createFatalBOLTError("Cannot find I__setup section\n");
645 
646       MCSymbol *Target = BC.registerNameAtAddress(
647           "__bolt_instr_setup", SetupSection->getAddress(), 0, 0);
648       MCInst NewInst;
649       BC.MIB->createCall(NewInst, Target, BC.Ctx.get());
650       BB.insertInstruction(BB.begin(), std::move(NewInst));
651     } else {
652       BC.errs() << "BOLT-WARNING: Entry point not found\n";
653     }
654 
655     if (BinaryData *BD = BC.getBinaryDataByName("___GLOBAL_init_65535/1")) {
656       BinaryFunction *Ctor = BC.getBinaryFunctionAtAddress(BD->getAddress());
657       assert(Ctor && "___GLOBAL_init_65535 function not found");
658       BinaryBasicBlock &BB = Ctor->front();
659       ErrorOr<BinarySection &> FiniSection =
660           BC.getUniqueSectionByName("I__fini");
661       if (!FiniSection)
662         return createFatalBOLTError("Cannot find I__fini section");
663 
664       MCSymbol *Target = BC.registerNameAtAddress(
665           "__bolt_instr_fini", FiniSection->getAddress(), 0, 0);
666       auto IsLEA = [&BC](const MCInst &Inst) { return BC.MIB->isLEA64r(Inst); };
667       const auto LEA = std::find_if(
668           std::next(llvm::find_if(reverse(BB), IsLEA)), BB.rend(), IsLEA);
669       LEA->getOperand(4).setExpr(
670           MCSymbolRefExpr::create(Target, MCSymbolRefExpr::VK_None, *BC.Ctx));
671     } else {
672       BC.errs() << "BOLT-WARNING: ___GLOBAL_init_65535 not found\n";
673     }
674   }
675 
676   setupRuntimeLibrary(BC);
677   return Error::success();
678 }
679 
680 void Instrumentation::createAuxiliaryFunctions(BinaryContext &BC) {
681   auto createSimpleFunction =
682       [&](StringRef Title, InstructionListType Instrs) -> BinaryFunction * {
683     BinaryFunction *Func = BC.createInjectedBinaryFunction(std::string(Title));
684 
685     std::vector<std::unique_ptr<BinaryBasicBlock>> BBs;
686     BBs.emplace_back(Func->createBasicBlock());
687     BBs.back()->addInstructions(Instrs.begin(), Instrs.end());
688     BBs.back()->setCFIState(0);
689     Func->insertBasicBlocks(nullptr, std::move(BBs),
690                             /*UpdateLayout=*/true,
691                             /*UpdateCFIState=*/false);
692     Func->updateState(BinaryFunction::State::CFG_Finalized);
693     return Func;
694   };
695 
696   // Here we are creating a set of functions to handle BB entry/exit.
697   // IndCallHandlerExitBB contains instructions to finish handling traffic to an
698   // indirect call. We pass it to createInstrumentedIndCallHandlerEntryBB(),
699   // which will check if a pointer to runtime library traffic accounting
700   // function was initialized (it is done during initialization of runtime
701   // library). If it is so - calls it. Then this routine returns to normal
702   // execution by jumping to exit BB.
703   BinaryFunction *IndCallHandlerExitBB =
704       createSimpleFunction("__bolt_instr_ind_call_handler",
705                            BC.MIB->createInstrumentedIndCallHandlerExitBB());
706 
707   IndCallHandlerExitBBFunction =
708       createSimpleFunction("__bolt_instr_ind_call_handler_func",
709                            BC.MIB->createInstrumentedIndCallHandlerEntryBB(
710                                Summary->IndCallCounterFuncPtr,
711                                IndCallHandlerExitBB->getSymbol(), &*BC.Ctx));
712 
713   BinaryFunction *IndTailCallHandlerExitBB = createSimpleFunction(
714       "__bolt_instr_ind_tail_call_handler",
715       BC.MIB->createInstrumentedIndTailCallHandlerExitBB());
716 
717   IndTailCallHandlerExitBBFunction = createSimpleFunction(
718       "__bolt_instr_ind_tailcall_handler_func",
719       BC.MIB->createInstrumentedIndCallHandlerEntryBB(
720           Summary->IndTailCallCounterFuncPtr,
721           IndTailCallHandlerExitBB->getSymbol(), &*BC.Ctx));
722 
723   createSimpleFunction("__bolt_num_counters_getter",
724                        BC.MIB->createNumCountersGetter(BC.Ctx.get()));
725   createSimpleFunction("__bolt_instr_locations_getter",
726                        BC.MIB->createInstrLocationsGetter(BC.Ctx.get()));
727   createSimpleFunction("__bolt_instr_tables_getter",
728                        BC.MIB->createInstrTablesGetter(BC.Ctx.get()));
729   createSimpleFunction("__bolt_instr_num_funcs_getter",
730                        BC.MIB->createInstrNumFuncsGetter(BC.Ctx.get()));
731 
732   if (BC.isELF()) {
733     if (BC.StartFunctionAddress) {
734       BinaryFunction *Start =
735           BC.getBinaryFunctionAtAddress(*BC.StartFunctionAddress);
736       assert(Start && "Entry point function not found");
737       const MCSymbol *StartSym = Start->getSymbol();
738       createSimpleFunction(
739           "__bolt_start_trampoline",
740           BC.MIB->createSymbolTrampoline(StartSym, BC.Ctx.get()));
741     }
742     if (BC.FiniFunctionAddress) {
743       BinaryFunction *Fini =
744           BC.getBinaryFunctionAtAddress(*BC.FiniFunctionAddress);
745       assert(Fini && "Finalization function not found");
746       const MCSymbol *FiniSym = Fini->getSymbol();
747       createSimpleFunction(
748           "__bolt_fini_trampoline",
749           BC.MIB->createSymbolTrampoline(FiniSym, BC.Ctx.get()));
750     } else {
751       // Create dummy return function for trampoline to avoid issues
752       // with unknown symbol in runtime library. E.g. for static PIE
753       // executable
754       createSimpleFunction("__bolt_fini_trampoline",
755                            BC.MIB->createReturnInstructionList(BC.Ctx.get()));
756     }
757   }
758 }
759 
760 void Instrumentation::setupRuntimeLibrary(BinaryContext &BC) {
761   uint32_t FuncDescSize = Summary->getFDSize();
762 
763   BC.outs() << "BOLT-INSTRUMENTER: Number of indirect call site descriptors: "
764             << Summary->IndCallDescriptions.size() << "\n";
765   BC.outs() << "BOLT-INSTRUMENTER: Number of indirect call target descriptors: "
766             << Summary->IndCallTargetDescriptions.size() << "\n";
767   BC.outs() << "BOLT-INSTRUMENTER: Number of function descriptors: "
768             << Summary->FunctionDescriptions.size() << "\n";
769   BC.outs() << "BOLT-INSTRUMENTER: Number of branch counters: "
770             << BranchCounters << "\n";
771   BC.outs() << "BOLT-INSTRUMENTER: Number of ST leaf node counters: "
772             << LeafNodeCounters << "\n";
773   BC.outs() << "BOLT-INSTRUMENTER: Number of direct call counters: "
774             << DirectCallCounters << "\n";
775   BC.outs() << "BOLT-INSTRUMENTER: Total number of counters: "
776             << Summary->Counters.size() << "\n";
777   BC.outs() << "BOLT-INSTRUMENTER: Total size of counters: "
778             << (Summary->Counters.size() * 8)
779             << " bytes (static alloc memory)\n";
780   BC.outs() << "BOLT-INSTRUMENTER: Total size of string table emitted: "
781             << Summary->StringTable.size() << " bytes in file\n";
782   BC.outs() << "BOLT-INSTRUMENTER: Total size of descriptors: "
783             << (FuncDescSize +
784                 Summary->IndCallDescriptions.size() *
785                     sizeof(IndCallDescription) +
786                 Summary->IndCallTargetDescriptions.size() *
787                     sizeof(IndCallTargetDescription))
788             << " bytes in file\n";
789   BC.outs() << "BOLT-INSTRUMENTER: Profile will be saved to file "
790             << opts::InstrumentationFilename << "\n";
791 
792   InstrumentationRuntimeLibrary *RtLibrary =
793       static_cast<InstrumentationRuntimeLibrary *>(BC.getRuntimeLibrary());
794   assert(RtLibrary && "instrumentation runtime library object must be set");
795   RtLibrary->setSummary(std::move(Summary));
796 }
797 } // namespace bolt
798 } // namespace llvm
799