xref: /llvm-project/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp (revision 416f1c465db62d829283f6902ef35e027e127aa7)
1 //===- GCOVProfiling.cpp - Insert edge counters for gcov profiling --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass implements GCOV-style profiling. When this pass is run it emits
10 // "gcno" files next to the existing source, and instruments the code that runs
11 // to records the edges between blocks that run and emit a complementary "gcda"
12 // file on exit.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "llvm/ADT/Hashing.h"
17 #include "llvm/ADT/MapVector.h"
18 #include "llvm/ADT/STLExtras.h"
19 #include "llvm/ADT/Sequence.h"
20 #include "llvm/ADT/StringMap.h"
21 #include "llvm/Analysis/BlockFrequencyInfo.h"
22 #include "llvm/Analysis/BranchProbabilityInfo.h"
23 #include "llvm/Analysis/TargetLibraryInfo.h"
24 #include "llvm/IR/DebugInfo.h"
25 #include "llvm/IR/DebugLoc.h"
26 #include "llvm/IR/EHPersonalities.h"
27 #include "llvm/IR/IRBuilder.h"
28 #include "llvm/IR/InstIterator.h"
29 #include "llvm/IR/Instructions.h"
30 #include "llvm/IR/IntrinsicInst.h"
31 #include "llvm/IR/Module.h"
32 #include "llvm/ProfileData/InstrProf.h"
33 #include "llvm/Support/CRC.h"
34 #include "llvm/Support/CommandLine.h"
35 #include "llvm/Support/Debug.h"
36 #include "llvm/Support/FileSystem.h"
37 #include "llvm/Support/Path.h"
38 #include "llvm/Support/Regex.h"
39 #include "llvm/Support/raw_ostream.h"
40 #include "llvm/Transforms/Instrumentation/CFGMST.h"
41 #include "llvm/Transforms/Instrumentation/GCOVProfiler.h"
42 #include "llvm/Transforms/Utils/Instrumentation.h"
43 #include "llvm/Transforms/Utils/ModuleUtils.h"
44 #include <algorithm>
45 #include <memory>
46 #include <string>
47 #include <utility>
48 
49 using namespace llvm;
50 namespace endian = llvm::support::endian;
51 
52 #define DEBUG_TYPE "insert-gcov-profiling"
53 
54 enum : uint32_t {
55   GCOV_ARC_ON_TREE = 1 << 0,
56 
57   GCOV_TAG_FUNCTION = 0x01000000,
58   GCOV_TAG_BLOCKS = 0x01410000,
59   GCOV_TAG_ARCS = 0x01430000,
60   GCOV_TAG_LINES = 0x01450000,
61 };
62 
63 static cl::opt<std::string> DefaultGCOVVersion("default-gcov-version",
64                                                cl::init("0000"), cl::Hidden,
65                                                cl::ValueRequired);
66 
67 static cl::opt<bool> AtomicCounter("gcov-atomic-counter", cl::Hidden,
68                                    cl::desc("Make counter updates atomic"));
69 
70 // Returns the number of words which will be used to represent this string.
71 static unsigned wordsOfString(StringRef s) {
72   // Length + NUL-terminated string + 0~3 padding NULs.
73   return (s.size() / 4) + 2;
74 }
75 
76 GCOVOptions GCOVOptions::getDefault() {
77   GCOVOptions Options;
78   Options.EmitNotes = true;
79   Options.EmitData = true;
80   Options.NoRedZone = false;
81   Options.Atomic = AtomicCounter;
82 
83   if (DefaultGCOVVersion.size() != 4) {
84     llvm::report_fatal_error(Twine("Invalid -default-gcov-version: ") +
85                              DefaultGCOVVersion, /*GenCrashDiag=*/false);
86   }
87   memcpy(Options.Version, DefaultGCOVVersion.c_str(), 4);
88   return Options;
89 }
90 
91 namespace {
92 class GCOVFunction;
93 
94 class GCOVProfiler {
95 public:
96   GCOVProfiler() : GCOVProfiler(GCOVOptions::getDefault()) {}
97   GCOVProfiler(const GCOVOptions &Opts) : Options(Opts) {}
98   bool
99   runOnModule(Module &M, function_ref<BlockFrequencyInfo *(Function &F)> GetBFI,
100               function_ref<BranchProbabilityInfo *(Function &F)> GetBPI,
101               std::function<const TargetLibraryInfo &(Function &F)> GetTLI);
102 
103   void write(uint32_t i) {
104     char Bytes[4];
105     endian::write32(Bytes, i, Endian);
106     os->write(Bytes, 4);
107   }
108   void writeString(StringRef s) {
109     write(wordsOfString(s) - 1);
110     os->write(s.data(), s.size());
111     os->write_zeros(4 - s.size() % 4);
112   }
113   void writeBytes(const char *Bytes, int Size) { os->write(Bytes, Size); }
114 
115 private:
116   // Create the .gcno files for the Module based on DebugInfo.
117   bool
118   emitProfileNotes(NamedMDNode *CUNode, bool HasExecOrFork,
119                    function_ref<BlockFrequencyInfo *(Function &F)> GetBFI,
120                    function_ref<BranchProbabilityInfo *(Function &F)> GetBPI,
121                    function_ref<const TargetLibraryInfo &(Function &F)> GetTLI);
122 
123   Function *createInternalFunction(FunctionType *FTy, StringRef Name,
124                                    StringRef MangledType = "");
125 
126   void emitGlobalConstructor(
127       SmallVectorImpl<std::pair<GlobalVariable *, MDNode *>> &CountersBySP);
128   void emitModuleInitFunctionPtrs(
129       SmallVectorImpl<std::pair<GlobalVariable *, MDNode *>> &CountersBySP);
130 
131   bool isFunctionInstrumented(const Function &F);
132   std::vector<Regex> createRegexesFromString(StringRef RegexesStr);
133   static bool doesFilenameMatchARegex(StringRef Filename,
134                                       std::vector<Regex> &Regexes);
135 
136   // Get pointers to the functions in the runtime library.
137   FunctionCallee getStartFileFunc(const TargetLibraryInfo *TLI);
138   FunctionCallee getEmitFunctionFunc(const TargetLibraryInfo *TLI);
139   FunctionCallee getEmitArcsFunc(const TargetLibraryInfo *TLI);
140   FunctionCallee getSummaryInfoFunc();
141   FunctionCallee getEndFileFunc();
142 
143   // Add the function to write out all our counters to the global destructor
144   // list.
145   Function *
146   insertCounterWriteout(ArrayRef<std::pair<GlobalVariable *, MDNode *>>);
147   Function *insertReset(ArrayRef<std::pair<GlobalVariable *, MDNode *>>);
148 
149   bool AddFlushBeforeForkAndExec();
150 
151   enum class GCovFileType { GCNO, GCDA };
152   std::string mangleName(const DICompileUnit *CU, GCovFileType FileType);
153 
154   GCOVOptions Options;
155   llvm::endianness Endian;
156   raw_ostream *os;
157   int Version = 0;
158 
159   // Checksum, produced by hash of EdgeDestinations
160   SmallVector<uint32_t, 4> FileChecksums;
161 
162   Module *M = nullptr;
163   std::function<const TargetLibraryInfo &(Function &F)> GetTLI;
164   LLVMContext *Ctx = nullptr;
165   SmallVector<std::unique_ptr<GCOVFunction>, 16> Funcs;
166   std::vector<Regex> FilterRe;
167   std::vector<Regex> ExcludeRe;
168   DenseSet<const BasicBlock *> ExecBlocks;
169   StringMap<bool> InstrumentedFiles;
170 };
171 
172 struct BBInfo {
173   BBInfo *Group;
174   uint32_t Index;
175   uint32_t Rank = 0;
176 
177   BBInfo(unsigned Index) : Group(this), Index(Index) {}
178   std::string infoString() const {
179     return (Twine("Index=") + Twine(Index)).str();
180   }
181 };
182 
183 struct Edge {
184   // This class implements the CFG edges. Note the CFG can be a multi-graph.
185   // So there might be multiple edges with same SrcBB and DestBB.
186   const BasicBlock *SrcBB;
187   const BasicBlock *DestBB;
188   uint64_t Weight;
189   BasicBlock *Place = nullptr;
190   uint32_t SrcNumber, DstNumber;
191   bool InMST = false;
192   bool Removed = false;
193   bool IsCritical = false;
194 
195   Edge(const BasicBlock *Src, const BasicBlock *Dest, uint64_t W = 1)
196       : SrcBB(Src), DestBB(Dest), Weight(W) {}
197 
198   // Return the information string of an edge.
199   std::string infoString() const {
200     return (Twine(Removed ? "-" : " ") + (InMST ? " " : "*") +
201             (IsCritical ? "c" : " ") + "  W=" + Twine(Weight))
202         .str();
203   }
204 };
205 }
206 
207 static StringRef getFunctionName(const DISubprogram *SP) {
208   if (!SP->getLinkageName().empty())
209     return SP->getLinkageName();
210   return SP->getName();
211 }
212 
213 /// Extract a filename for a DISubprogram.
214 ///
215 /// Prefer relative paths in the coverage notes. Clang also may split
216 /// up absolute paths into a directory and filename component. When
217 /// the relative path doesn't exist, reconstruct the absolute path.
218 static SmallString<128> getFilename(const DISubprogram *SP) {
219   SmallString<128> Path;
220   StringRef RelPath = SP->getFilename();
221   if (sys::fs::exists(RelPath))
222     Path = RelPath;
223   else
224     sys::path::append(Path, SP->getDirectory(), SP->getFilename());
225   return Path;
226 }
227 
228 namespace {
229   class GCOVRecord {
230   protected:
231     GCOVProfiler *P;
232 
233     GCOVRecord(GCOVProfiler *P) : P(P) {}
234 
235     void write(uint32_t i) { P->write(i); }
236     void writeString(StringRef s) { P->writeString(s); }
237     void writeBytes(const char *Bytes, int Size) { P->writeBytes(Bytes, Size); }
238   };
239 
240   class GCOVFunction;
241   class GCOVBlock;
242 
243   // Constructed only by requesting it from a GCOVBlock, this object stores a
244   // list of line numbers and a single filename, representing lines that belong
245   // to the block.
246   class GCOVLines : public GCOVRecord {
247    public:
248     void addLine(uint32_t Line) {
249       assert(Line != 0 && "Line zero is not a valid real line number.");
250       Lines.push_back(Line);
251     }
252 
253     uint32_t length() const {
254       return 1 + wordsOfString(Filename) + Lines.size();
255     }
256 
257     void writeOut() {
258       write(0);
259       writeString(Filename);
260       for (uint32_t L : Lines)
261         write(L);
262     }
263 
264     GCOVLines(GCOVProfiler *P, StringRef F)
265         : GCOVRecord(P), Filename(std::string(F)) {}
266 
267   private:
268     std::string Filename;
269     SmallVector<uint32_t, 32> Lines;
270   };
271 
272 
273   // Represent a basic block in GCOV. Each block has a unique number in the
274   // function, number of lines belonging to each block, and a set of edges to
275   // other blocks.
276   class GCOVBlock : public GCOVRecord {
277    public:
278     GCOVLines &getFile(StringRef Filename) {
279       return LinesByFile.try_emplace(Filename, P, Filename).first->second;
280     }
281 
282     void addEdge(GCOVBlock &Successor, uint32_t Flags) {
283       OutEdges.emplace_back(&Successor, Flags);
284     }
285 
286     void writeOut() {
287       uint32_t Len = 3;
288       SmallVector<StringMapEntry<GCOVLines> *, 32> SortedLinesByFile;
289       for (auto &I : LinesByFile) {
290         Len += I.second.length();
291         SortedLinesByFile.push_back(&I);
292       }
293 
294       write(GCOV_TAG_LINES);
295       write(Len);
296       write(Number);
297 
298       llvm::sort(SortedLinesByFile, [](StringMapEntry<GCOVLines> *LHS,
299                                        StringMapEntry<GCOVLines> *RHS) {
300         return LHS->getKey() < RHS->getKey();
301       });
302       for (auto &I : SortedLinesByFile)
303         I->getValue().writeOut();
304       write(0);
305       write(0);
306     }
307 
308     GCOVBlock(const GCOVBlock &RHS) : GCOVRecord(RHS), Number(RHS.Number) {
309       // Only allow copy before edges and lines have been added. After that,
310       // there are inter-block pointers (eg: edges) that won't take kindly to
311       // blocks being copied or moved around.
312       assert(LinesByFile.empty());
313       assert(OutEdges.empty());
314     }
315 
316     uint32_t Number;
317     SmallVector<std::pair<GCOVBlock *, uint32_t>, 4> OutEdges;
318 
319   private:
320     friend class GCOVFunction;
321 
322     GCOVBlock(GCOVProfiler *P, uint32_t Number)
323         : GCOVRecord(P), Number(Number) {}
324 
325     StringMap<GCOVLines> LinesByFile;
326   };
327 
328   // A function has a unique identifier, a checksum (we leave as zero) and a
329   // set of blocks and a map of edges between blocks. This is the only GCOV
330   // object users can construct, the blocks and lines will be rooted here.
331   class GCOVFunction : public GCOVRecord {
332   public:
333     GCOVFunction(GCOVProfiler *P, Function *F, const DISubprogram *SP,
334                  unsigned EndLine, uint32_t Ident, int Version)
335         : GCOVRecord(P), SP(SP), EndLine(EndLine), Ident(Ident),
336           Version(Version), EntryBlock(P, 0), ReturnBlock(P, 1) {
337       LLVM_DEBUG(dbgs() << "Function: " << getFunctionName(SP) << "\n");
338       uint32_t i = 2;
339       for (BasicBlock &BB : *F)
340         Blocks.insert(std::make_pair(&BB, GCOVBlock(P, i++)));
341 
342       std::string FunctionNameAndLine;
343       raw_string_ostream FNLOS(FunctionNameAndLine);
344       FNLOS << getFunctionName(SP) << SP->getLine();
345       FuncChecksum = hash_value(FunctionNameAndLine);
346     }
347 
348     GCOVBlock &getBlock(const BasicBlock *BB) {
349       return Blocks.find(const_cast<BasicBlock *>(BB))->second;
350     }
351 
352     GCOVBlock &getEntryBlock() { return EntryBlock; }
353     GCOVBlock &getReturnBlock() {
354       return ReturnBlock;
355     }
356 
357     uint32_t getFuncChecksum() const {
358       return FuncChecksum;
359     }
360 
361     void writeOut(uint32_t CfgChecksum) {
362       write(GCOV_TAG_FUNCTION);
363       SmallString<128> Filename = getFilename(SP);
364       uint32_t BlockLen = 3 + wordsOfString(getFunctionName(SP));
365       BlockLen += 1 + wordsOfString(Filename) + 4;
366 
367       write(BlockLen);
368       write(Ident);
369       write(FuncChecksum);
370       write(CfgChecksum);
371       writeString(getFunctionName(SP));
372 
373       write(SP->isArtificial()); // artificial
374       writeString(Filename);
375       write(SP->getLine()); // start_line
376       write(0);             // start_column
377       // EndLine is the last line with !dbg. It is not the } line as in GCC,
378       // but good enough.
379       write(EndLine);
380       write(0); // end_column
381 
382       // Emit count of blocks.
383       write(GCOV_TAG_BLOCKS);
384       write(1);
385       write(Blocks.size() + 2);
386       LLVM_DEBUG(dbgs() << (Blocks.size() + 1) << " blocks\n");
387 
388       // Emit edges between blocks.
389       const uint32_t Outgoing = EntryBlock.OutEdges.size();
390       if (Outgoing) {
391         write(GCOV_TAG_ARCS);
392         write(Outgoing * 2 + 1);
393         write(EntryBlock.Number);
394         for (const auto &E : EntryBlock.OutEdges) {
395           write(E.first->Number);
396           write(E.second);
397         }
398       }
399       for (auto &It : Blocks) {
400         const GCOVBlock &Block = It.second;
401         if (Block.OutEdges.empty()) continue;
402 
403         write(GCOV_TAG_ARCS);
404         write(Block.OutEdges.size() * 2 + 1);
405         write(Block.Number);
406         for (const auto &E : Block.OutEdges) {
407           write(E.first->Number);
408           write(E.second);
409         }
410       }
411 
412       // Emit lines for each block.
413       for (auto &It : Blocks)
414         It.second.writeOut();
415     }
416 
417   public:
418     const DISubprogram *SP;
419     unsigned EndLine;
420     uint32_t Ident;
421     uint32_t FuncChecksum;
422     int Version;
423     MapVector<BasicBlock *, GCOVBlock> Blocks;
424     GCOVBlock EntryBlock;
425     GCOVBlock ReturnBlock;
426   };
427 }
428 
429 // RegexesStr is a string containing differents regex separated by a semi-colon.
430 // For example "foo\..*$;bar\..*$".
431 std::vector<Regex> GCOVProfiler::createRegexesFromString(StringRef RegexesStr) {
432   std::vector<Regex> Regexes;
433   while (!RegexesStr.empty()) {
434     std::pair<StringRef, StringRef> HeadTail = RegexesStr.split(';');
435     if (!HeadTail.first.empty()) {
436       Regex Re(HeadTail.first);
437       std::string Err;
438       if (!Re.isValid(Err)) {
439         Ctx->emitError(Twine("Regex ") + HeadTail.first +
440                        " is not valid: " + Err);
441       }
442       Regexes.emplace_back(std::move(Re));
443     }
444     RegexesStr = HeadTail.second;
445   }
446   return Regexes;
447 }
448 
449 bool GCOVProfiler::doesFilenameMatchARegex(StringRef Filename,
450                                            std::vector<Regex> &Regexes) {
451   for (Regex &Re : Regexes)
452     if (Re.match(Filename))
453       return true;
454   return false;
455 }
456 
457 bool GCOVProfiler::isFunctionInstrumented(const Function &F) {
458   if (FilterRe.empty() && ExcludeRe.empty()) {
459     return true;
460   }
461   SmallString<128> Filename = getFilename(F.getSubprogram());
462   auto It = InstrumentedFiles.find(Filename);
463   if (It != InstrumentedFiles.end()) {
464     return It->second;
465   }
466 
467   SmallString<256> RealPath;
468   StringRef RealFilename;
469 
470   // Path can be
471   // /usr/lib/gcc/x86_64-linux-gnu/8/../../../../include/c++/8/bits/*.h so for
472   // such a case we must get the real_path.
473   if (sys::fs::real_path(Filename, RealPath)) {
474     // real_path can fail with path like "foo.c".
475     RealFilename = Filename;
476   } else {
477     RealFilename = RealPath;
478   }
479 
480   bool ShouldInstrument;
481   if (FilterRe.empty()) {
482     ShouldInstrument = !doesFilenameMatchARegex(RealFilename, ExcludeRe);
483   } else if (ExcludeRe.empty()) {
484     ShouldInstrument = doesFilenameMatchARegex(RealFilename, FilterRe);
485   } else {
486     ShouldInstrument = doesFilenameMatchARegex(RealFilename, FilterRe) &&
487                        !doesFilenameMatchARegex(RealFilename, ExcludeRe);
488   }
489   InstrumentedFiles[Filename] = ShouldInstrument;
490   return ShouldInstrument;
491 }
492 
493 std::string GCOVProfiler::mangleName(const DICompileUnit *CU,
494                                      GCovFileType OutputType) {
495   bool Notes = OutputType == GCovFileType::GCNO;
496 
497   if (NamedMDNode *GCov = M->getNamedMetadata("llvm.gcov")) {
498     for (int i = 0, e = GCov->getNumOperands(); i != e; ++i) {
499       MDNode *N = GCov->getOperand(i);
500       bool ThreeElement = N->getNumOperands() == 3;
501       if (!ThreeElement && N->getNumOperands() != 2)
502         continue;
503       if (dyn_cast<MDNode>(N->getOperand(ThreeElement ? 2 : 1)) != CU)
504         continue;
505 
506       if (ThreeElement) {
507         // These nodes have no mangling to apply, it's stored mangled in the
508         // bitcode.
509         MDString *NotesFile = dyn_cast<MDString>(N->getOperand(0));
510         MDString *DataFile = dyn_cast<MDString>(N->getOperand(1));
511         if (!NotesFile || !DataFile)
512           continue;
513         return std::string(Notes ? NotesFile->getString()
514                                  : DataFile->getString());
515       }
516 
517       MDString *GCovFile = dyn_cast<MDString>(N->getOperand(0));
518       if (!GCovFile)
519         continue;
520 
521       SmallString<128> Filename = GCovFile->getString();
522       sys::path::replace_extension(Filename, Notes ? "gcno" : "gcda");
523       return std::string(Filename);
524     }
525   }
526 
527   SmallString<128> Filename = CU->getFilename();
528   sys::path::replace_extension(Filename, Notes ? "gcno" : "gcda");
529   StringRef FName = sys::path::filename(Filename);
530   SmallString<128> CurPath;
531   if (sys::fs::current_path(CurPath))
532     return std::string(FName);
533   sys::path::append(CurPath, FName);
534   return std::string(CurPath);
535 }
536 
537 bool GCOVProfiler::runOnModule(
538     Module &M, function_ref<BlockFrequencyInfo *(Function &F)> GetBFI,
539     function_ref<BranchProbabilityInfo *(Function &F)> GetBPI,
540     std::function<const TargetLibraryInfo &(Function &F)> GetTLI) {
541   this->M = &M;
542   this->GetTLI = std::move(GetTLI);
543   Ctx = &M.getContext();
544 
545   NamedMDNode *CUNode = M.getNamedMetadata("llvm.dbg.cu");
546   if (!CUNode || (!Options.EmitNotes && !Options.EmitData))
547     return false;
548 
549   bool HasExecOrFork = AddFlushBeforeForkAndExec();
550 
551   FilterRe = createRegexesFromString(Options.Filter);
552   ExcludeRe = createRegexesFromString(Options.Exclude);
553   emitProfileNotes(CUNode, HasExecOrFork, GetBFI, GetBPI, this->GetTLI);
554   return true;
555 }
556 
557 PreservedAnalyses GCOVProfilerPass::run(Module &M,
558                                         ModuleAnalysisManager &AM) {
559 
560   GCOVProfiler Profiler(GCOVOpts);
561   FunctionAnalysisManager &FAM =
562       AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
563 
564   auto GetBFI = [&FAM](Function &F) {
565     return &FAM.getResult<BlockFrequencyAnalysis>(F);
566   };
567   auto GetBPI = [&FAM](Function &F) {
568     return &FAM.getResult<BranchProbabilityAnalysis>(F);
569   };
570   auto GetTLI = [&FAM](Function &F) -> const TargetLibraryInfo & {
571     return FAM.getResult<TargetLibraryAnalysis>(F);
572   };
573 
574   if (!Profiler.runOnModule(M, GetBFI, GetBPI, GetTLI))
575     return PreservedAnalyses::all();
576 
577   return PreservedAnalyses::none();
578 }
579 
580 static bool functionHasLines(const Function &F, unsigned &EndLine) {
581   // Check whether this function actually has any source lines. Not only
582   // do these waste space, they also can crash gcov.
583   EndLine = 0;
584   for (const auto &BB : F) {
585     for (const auto &I : BB) {
586       // Debug intrinsic locations correspond to the location of the
587       // declaration, not necessarily any statements or expressions.
588       if (isa<DbgInfoIntrinsic>(&I)) continue;
589 
590       const DebugLoc &Loc = I.getDebugLoc();
591       if (!Loc)
592         continue;
593 
594       // Artificial lines such as calls to the global constructors.
595       if (Loc.getLine() == 0) continue;
596       EndLine = std::max(EndLine, Loc.getLine());
597 
598       return true;
599     }
600   }
601   return false;
602 }
603 
604 static bool isUsingScopeBasedEH(Function &F) {
605   if (!F.hasPersonalityFn()) return false;
606 
607   EHPersonality Personality = classifyEHPersonality(F.getPersonalityFn());
608   return isScopedEHPersonality(Personality);
609 }
610 
611 bool GCOVProfiler::AddFlushBeforeForkAndExec() {
612   const TargetLibraryInfo *TLI = nullptr;
613   SmallVector<CallInst *, 2> Forks;
614   SmallVector<CallInst *, 2> Execs;
615   for (auto &F : M->functions()) {
616     TLI = TLI == nullptr ? &GetTLI(F) : TLI;
617     for (auto &I : instructions(F)) {
618       if (CallInst *CI = dyn_cast<CallInst>(&I)) {
619         if (Function *Callee = CI->getCalledFunction()) {
620           LibFunc LF;
621           if (TLI->getLibFunc(*Callee, LF)) {
622             if (LF == LibFunc_fork) {
623 #if !defined(_WIN32)
624               Forks.push_back(CI);
625 #endif
626             } else if (LF == LibFunc_execl || LF == LibFunc_execle ||
627                        LF == LibFunc_execlp || LF == LibFunc_execv ||
628                        LF == LibFunc_execvp || LF == LibFunc_execve ||
629                        LF == LibFunc_execvpe || LF == LibFunc_execvP) {
630               Execs.push_back(CI);
631             }
632           }
633         }
634       }
635     }
636   }
637 
638   for (auto *F : Forks) {
639     IRBuilder<> Builder(F);
640     BasicBlock *Parent = F->getParent();
641     auto NextInst = ++F->getIterator();
642 
643     // We've a fork so just reset the counters in the child process
644     FunctionType *FTy = FunctionType::get(Builder.getInt32Ty(), {}, false);
645     FunctionCallee GCOVFork = M->getOrInsertFunction(
646         "__gcov_fork", FTy,
647         TLI->getAttrList(Ctx, {}, /*Signed=*/true, /*Ret=*/true));
648     F->setCalledFunction(GCOVFork);
649 
650     // We split just after the fork to have a counter for the lines after
651     // Anyway there's a bug:
652     // void foo() { fork(); }
653     // void bar() { foo(); blah(); }
654     // then "blah();" will be called 2 times but showed as 1
655     // because "blah()" belongs to the same block as "foo();"
656     Parent->splitBasicBlock(NextInst);
657 
658     // back() is a br instruction with a debug location
659     // equals to the one from NextAfterFork
660     // So to avoid to have two debug locs on two blocks just change it
661     DebugLoc Loc = F->getDebugLoc();
662     Parent->back().setDebugLoc(Loc);
663   }
664 
665   for (auto *E : Execs) {
666     IRBuilder<> Builder(E);
667     BasicBlock *Parent = E->getParent();
668     auto NextInst = ++E->getIterator();
669 
670     // Since the process is replaced by a new one we need to write out gcdas
671     // No need to reset the counters since they'll be lost after the exec**
672     FunctionType *FTy = FunctionType::get(Builder.getVoidTy(), {}, false);
673     FunctionCallee WriteoutF =
674         M->getOrInsertFunction("llvm_writeout_files", FTy);
675     Builder.CreateCall(WriteoutF);
676 
677     DebugLoc Loc = E->getDebugLoc();
678     Builder.SetInsertPoint(&*NextInst);
679     // If the exec** fails we must reset the counters since they've been
680     // dumped
681     FunctionCallee ResetF = M->getOrInsertFunction("llvm_reset_counters", FTy);
682     Builder.CreateCall(ResetF)->setDebugLoc(Loc);
683     ExecBlocks.insert(Parent);
684     Parent->splitBasicBlock(NextInst);
685     Parent->back().setDebugLoc(Loc);
686   }
687 
688   return !Forks.empty() || !Execs.empty();
689 }
690 
691 static BasicBlock *getInstrBB(CFGMST<Edge, BBInfo> &MST, Edge &E,
692                               const DenseSet<const BasicBlock *> &ExecBlocks) {
693   if (E.InMST || E.Removed)
694     return nullptr;
695 
696   BasicBlock *SrcBB = const_cast<BasicBlock *>(E.SrcBB);
697   BasicBlock *DestBB = const_cast<BasicBlock *>(E.DestBB);
698   // For a fake edge, instrument the real BB.
699   if (SrcBB == nullptr)
700     return DestBB;
701   if (DestBB == nullptr)
702     return SrcBB;
703 
704   auto CanInstrument = [](BasicBlock *BB) -> BasicBlock * {
705     // There are basic blocks (such as catchswitch) cannot be instrumented.
706     // If the returned first insertion point is the end of BB, skip this BB.
707     if (BB->getFirstInsertionPt() == BB->end())
708       return nullptr;
709     return BB;
710   };
711 
712   // Instrument the SrcBB if it has a single successor,
713   // otherwise, the DestBB if this is not a critical edge.
714   Instruction *TI = SrcBB->getTerminator();
715   if (TI->getNumSuccessors() <= 1 && !ExecBlocks.count(SrcBB))
716     return CanInstrument(SrcBB);
717   if (!E.IsCritical)
718     return CanInstrument(DestBB);
719 
720   // Some IndirectBr critical edges cannot be split by the previous
721   // SplitIndirectBrCriticalEdges call. Bail out.
722   const unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB);
723   BasicBlock *InstrBB =
724       isa<IndirectBrInst>(TI) ? nullptr : SplitCriticalEdge(TI, SuccNum);
725   if (!InstrBB)
726     return nullptr;
727 
728   MST.addEdge(SrcBB, InstrBB, 0);
729   MST.addEdge(InstrBB, DestBB, 0).InMST = true;
730   E.Removed = true;
731 
732   return CanInstrument(InstrBB);
733 }
734 
735 #ifndef NDEBUG
736 static void dumpEdges(CFGMST<Edge, BBInfo> &MST, GCOVFunction &GF) {
737   size_t ID = 0;
738   for (const auto &E : make_pointee_range(MST.allEdges())) {
739     GCOVBlock &Src = E.SrcBB ? GF.getBlock(E.SrcBB) : GF.getEntryBlock();
740     GCOVBlock &Dst = E.DestBB ? GF.getBlock(E.DestBB) : GF.getReturnBlock();
741     dbgs() << "  Edge " << ID++ << ": " << Src.Number << "->" << Dst.Number
742            << E.infoString() << "\n";
743   }
744 }
745 #endif
746 
747 bool GCOVProfiler::emitProfileNotes(
748     NamedMDNode *CUNode, bool HasExecOrFork,
749     function_ref<BlockFrequencyInfo *(Function &F)> GetBFI,
750     function_ref<BranchProbabilityInfo *(Function &F)> GetBPI,
751     function_ref<const TargetLibraryInfo &(Function &F)> GetTLI) {
752   {
753     uint8_t c3 = Options.Version[0];
754     uint8_t c2 = Options.Version[1];
755     uint8_t c1 = Options.Version[2];
756     Version = c3 >= 'A' ? (c3 - 'A') * 100 + (c2 - '0') * 10 + c1 - '0'
757                         : (c3 - '0') * 10 + c1 - '0';
758   }
759   // Emit .gcno files that are compatible with GCC 11.1.
760   if (Version < 111) {
761     Version = 111;
762     memcpy(Options.Version, "B11*", 4);
763   }
764 
765   bool EmitGCDA = Options.EmitData;
766   for (unsigned i = 0, e = CUNode->getNumOperands(); i != e; ++i) {
767     // Each compile unit gets its own .gcno file. This means that whether we run
768     // this pass over the original .o's as they're produced, or run it after
769     // LTO, we'll generate the same .gcno files.
770 
771     auto *CU = cast<DICompileUnit>(CUNode->getOperand(i));
772 
773     // Skip module skeleton (and module) CUs.
774     if (CU->getDWOId())
775       continue;
776 
777     std::vector<uint8_t> EdgeDestinations;
778     SmallVector<std::pair<GlobalVariable *, MDNode *>, 8> CountersBySP;
779 
780     Endian = M->getDataLayout().isLittleEndian() ? llvm::endianness::little
781                                                  : llvm::endianness::big;
782     unsigned FunctionIdent = 0;
783     for (auto &F : M->functions()) {
784       DISubprogram *SP = F.getSubprogram();
785       unsigned EndLine;
786       if (!SP) continue;
787       if (!functionHasLines(F, EndLine) || !isFunctionInstrumented(F))
788         continue;
789       // TODO: Functions using scope-based EH are currently not supported.
790       if (isUsingScopeBasedEH(F)) continue;
791       if (F.hasFnAttribute(llvm::Attribute::NoProfile))
792         continue;
793       if (F.hasFnAttribute(llvm::Attribute::SkipProfile))
794         continue;
795 
796       // Add the function line number to the lines of the entry block
797       // to have a counter for the function definition.
798       uint32_t Line = SP->getLine();
799       auto Filename = getFilename(SP);
800 
801       BranchProbabilityInfo *BPI = GetBPI(F);
802       BlockFrequencyInfo *BFI = GetBFI(F);
803 
804       // Split indirectbr critical edges here before computing the MST rather
805       // than later in getInstrBB() to avoid invalidating it.
806       SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI,
807                                    BFI);
808 
809       CFGMST<Edge, BBInfo> MST(F, /*InstrumentFuncEntry=*/false,
810                                /*InstrumentLoopEntries=*/false, BPI, BFI);
811 
812       // getInstrBB can split basic blocks and push elements to AllEdges.
813       for (size_t I : llvm::seq<size_t>(0, MST.numEdges())) {
814         auto &E = *MST.allEdges()[I];
815         // For now, disable spanning tree optimization when fork or exec* is
816         // used.
817         if (HasExecOrFork)
818           E.InMST = false;
819         E.Place = getInstrBB(MST, E, ExecBlocks);
820       }
821       // Basic blocks in F are finalized at this point.
822       BasicBlock &EntryBlock = F.getEntryBlock();
823       Funcs.push_back(std::make_unique<GCOVFunction>(this, &F, SP, EndLine,
824                                                      FunctionIdent++, Version));
825       GCOVFunction &Func = *Funcs.back();
826 
827       // Some non-tree edges are IndirectBr which cannot be split. Ignore them
828       // as well.
829       llvm::erase_if(MST.allEdges(), [](std::unique_ptr<Edge> &E) {
830         return E->Removed || (!E->InMST && !E->Place);
831       });
832       const size_t Measured =
833           std::stable_partition(
834               MST.allEdges().begin(), MST.allEdges().end(),
835               [](std::unique_ptr<Edge> &E) { return E->Place; }) -
836           MST.allEdges().begin();
837       for (size_t I : llvm::seq<size_t>(0, Measured)) {
838         Edge &E = *MST.allEdges()[I];
839         GCOVBlock &Src =
840             E.SrcBB ? Func.getBlock(E.SrcBB) : Func.getEntryBlock();
841         GCOVBlock &Dst =
842             E.DestBB ? Func.getBlock(E.DestBB) : Func.getReturnBlock();
843         E.SrcNumber = Src.Number;
844         E.DstNumber = Dst.Number;
845       }
846       std::stable_sort(
847           MST.allEdges().begin(), MST.allEdges().begin() + Measured,
848           [](const std::unique_ptr<Edge> &L, const std::unique_ptr<Edge> &R) {
849             return L->SrcNumber != R->SrcNumber ? L->SrcNumber < R->SrcNumber
850                                                 : L->DstNumber < R->DstNumber;
851           });
852 
853       for (const Edge &E : make_pointee_range(MST.allEdges())) {
854         GCOVBlock &Src =
855             E.SrcBB ? Func.getBlock(E.SrcBB) : Func.getEntryBlock();
856         GCOVBlock &Dst =
857             E.DestBB ? Func.getBlock(E.DestBB) : Func.getReturnBlock();
858         Src.addEdge(Dst, E.Place ? 0 : uint32_t(GCOV_ARC_ON_TREE));
859       }
860 
861       // Artificial functions such as global initializers
862       if (!SP->isArtificial())
863         Func.getBlock(&EntryBlock).getFile(Filename).addLine(Line);
864 
865       LLVM_DEBUG(dumpEdges(MST, Func));
866 
867       for (auto &GB : Func.Blocks) {
868         const BasicBlock &BB = *GB.first;
869         auto &Block = GB.second;
870         for (auto Succ : Block.OutEdges) {
871           uint32_t Idx = Succ.first->Number;
872           do EdgeDestinations.push_back(Idx & 255);
873           while ((Idx >>= 8) > 0);
874         }
875 
876         for (const auto &I : BB) {
877           // Debug intrinsic locations correspond to the location of the
878           // declaration, not necessarily any statements or expressions.
879           if (isa<DbgInfoIntrinsic>(&I)) continue;
880 
881           const DebugLoc &Loc = I.getDebugLoc();
882           if (!Loc)
883             continue;
884 
885           // Artificial lines such as calls to the global constructors.
886           if (Loc.getLine() == 0 || Loc.isImplicitCode())
887             continue;
888 
889           if (Line == Loc.getLine()) continue;
890           Line = Loc.getLine();
891           MDNode *Scope = Loc.getScope();
892           // TODO: Handle blocks from another file due to #line, #include, etc.
893           if (isa<DILexicalBlockFile>(Scope) || SP != getDISubprogram(Scope))
894             continue;
895 
896           GCOVLines &Lines = Block.getFile(Filename);
897           Lines.addLine(Loc.getLine());
898         }
899         Line = 0;
900       }
901       if (EmitGCDA) {
902         DISubprogram *SP = F.getSubprogram();
903         ArrayType *CounterTy = ArrayType::get(Type::getInt64Ty(*Ctx), Measured);
904         GlobalVariable *Counters = new GlobalVariable(
905             *M, CounterTy, false, GlobalValue::InternalLinkage,
906             Constant::getNullValue(CounterTy), "__llvm_gcov_ctr");
907         const llvm::Triple &Triple = llvm::Triple(M->getTargetTriple());
908         if (Triple.getObjectFormat() == llvm::Triple::XCOFF)
909           Counters->setSection("__llvm_gcov_ctr_section");
910         CountersBySP.emplace_back(Counters, SP);
911 
912         for (size_t I : llvm::seq<size_t>(0, Measured)) {
913           const Edge &E = *MST.allEdges()[I];
914           IRBuilder<> Builder(E.Place, E.Place->getFirstInsertionPt());
915           Value *V = Builder.CreateConstInBoundsGEP2_64(
916               Counters->getValueType(), Counters, 0, I);
917           // Disable sanitizers to decrease size bloat. We don't expect
918           // sanitizers to catch interesting issues.
919           Instruction *Inst;
920           if (Options.Atomic) {
921             Inst = Builder.CreateAtomicRMW(AtomicRMWInst::Add, V,
922                                            Builder.getInt64(1), MaybeAlign(),
923                                            AtomicOrdering::Monotonic);
924           } else {
925             LoadInst *OldCount =
926                 Builder.CreateLoad(Builder.getInt64Ty(), V, "gcov_ctr");
927             OldCount->setNoSanitizeMetadata();
928             Value *NewCount = Builder.CreateAdd(OldCount, Builder.getInt64(1));
929             Inst = Builder.CreateStore(NewCount, V);
930           }
931           Inst->setNoSanitizeMetadata();
932         }
933       }
934     }
935 
936     char Tmp[4];
937     JamCRC JC;
938     JC.update(EdgeDestinations);
939     uint32_t Stamp = JC.getCRC();
940     FileChecksums.push_back(Stamp);
941 
942     if (Options.EmitNotes) {
943       std::error_code EC;
944       raw_fd_ostream out(mangleName(CU, GCovFileType::GCNO), EC,
945                          sys::fs::OF_None);
946       if (EC) {
947         Ctx->emitError(
948             Twine("failed to open coverage notes file for writing: ") +
949             EC.message());
950         continue;
951       }
952       os = &out;
953       if (Endian == llvm::endianness::big) {
954         out.write("gcno", 4);
955         out.write(Options.Version, 4);
956       } else {
957         out.write("oncg", 4);
958         std::reverse_copy(Options.Version, Options.Version + 4, Tmp);
959         out.write(Tmp, 4);
960       }
961       write(Stamp);
962       writeString("."); // unuseful current_working_directory
963       write(0);         // unuseful has_unexecuted_blocks
964 
965       for (auto &Func : Funcs)
966         Func->writeOut(Stamp);
967 
968       write(0);
969       write(0);
970       out.close();
971     }
972 
973     if (EmitGCDA) {
974       const llvm::Triple &Triple = llvm::Triple(M->getTargetTriple());
975       if (Triple.getObjectFormat() == llvm::Triple::XCOFF)
976         emitModuleInitFunctionPtrs(CountersBySP);
977       else
978         emitGlobalConstructor(CountersBySP);
979       EmitGCDA = false;
980     }
981   }
982   return true;
983 }
984 
985 Function *GCOVProfiler::createInternalFunction(FunctionType *FTy,
986                                                StringRef Name,
987                                                StringRef MangledType /*=""*/) {
988   Function *F = Function::createWithDefaultAttr(
989       FTy, GlobalValue::InternalLinkage, 0, Name, M);
990   F->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
991   F->addFnAttr(Attribute::NoUnwind);
992   if (Options.NoRedZone)
993     F->addFnAttr(Attribute::NoRedZone);
994   if (!MangledType.empty())
995     setKCFIType(*M, *F, MangledType);
996   return F;
997 }
998 
999 void GCOVProfiler::emitGlobalConstructor(
1000     SmallVectorImpl<std::pair<GlobalVariable *, MDNode *>> &CountersBySP) {
1001   Function *WriteoutF = insertCounterWriteout(CountersBySP);
1002   Function *ResetF = insertReset(CountersBySP);
1003 
1004   // Create a small bit of code that registers the "__llvm_gcov_writeout" to
1005   // be executed at exit and the "__llvm_gcov_reset" function to be executed
1006   // when "__gcov_flush" is called.
1007   FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
1008   Function *F = createInternalFunction(FTy, "__llvm_gcov_init", "_ZTSFvvE");
1009   F->addFnAttr(Attribute::NoInline);
1010 
1011   BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", F);
1012   IRBuilder<> Builder(BB);
1013 
1014   FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
1015   auto *PFTy = PointerType::get(*Ctx, 0);
1016   FTy = FunctionType::get(Builder.getVoidTy(), {PFTy, PFTy}, false);
1017 
1018   // Initialize the environment and register the local writeout, flush and
1019   // reset functions.
1020   FunctionCallee GCOVInit = M->getOrInsertFunction("llvm_gcov_init", FTy);
1021   Builder.CreateCall(GCOVInit, {WriteoutF, ResetF});
1022   Builder.CreateRetVoid();
1023 
1024   appendToGlobalCtors(*M, F, 0);
1025 }
1026 
1027 void GCOVProfiler::emitModuleInitFunctionPtrs(
1028     SmallVectorImpl<std::pair<GlobalVariable *, MDNode *>> &CountersBySP) {
1029   Function *WriteoutF = insertCounterWriteout(CountersBySP);
1030   Function *ResetF = insertReset(CountersBySP);
1031 
1032   // Instead of creating a function call and add it to the constructors list,
1033   // create a global variable in the __llvm_covinit section so the functions
1034   // can be registered by a constructor in the runtime.
1035 
1036   auto &Ctx = M->getContext();
1037 
1038   Type *InitFuncDataTy[] = {
1039 #define COVINIT_FUNC(Type, LLVMType, Name, Init) LLVMType,
1040 #include "llvm/ProfileData/InstrProfData.inc"
1041   };
1042 
1043   auto STy = StructType::get(Ctx, ArrayRef(InitFuncDataTy));
1044 
1045   Constant *InitFuncPtrs[] = {
1046 #define COVINIT_FUNC(Type, LLVMType, Name, Init) Init,
1047 #include "llvm/ProfileData/InstrProfData.inc"
1048   };
1049 
1050   auto *CovInitGV =
1051       new GlobalVariable(*M, STy, false, GlobalValue::PrivateLinkage, nullptr,
1052                          "__llvm_covinit_functions");
1053   CovInitGV->setInitializer(ConstantStruct::get(STy, InitFuncPtrs));
1054   CovInitGV->setVisibility(GlobalValue::VisibilityTypes::DefaultVisibility);
1055   CovInitGV->setSection(getInstrProfSectionName(
1056       IPSK_covinit, Triple(M->getTargetTriple()).getObjectFormat()));
1057   CovInitGV->setAlignment(Align(INSTR_PROF_DATA_ALIGNMENT));
1058   CovInitGV->setConstant(true);
1059 }
1060 
1061 FunctionCallee GCOVProfiler::getStartFileFunc(const TargetLibraryInfo *TLI) {
1062   Type *Args[] = {
1063       PointerType::getUnqual(*Ctx), // const char *orig_filename
1064       Type::getInt32Ty(*Ctx),       // uint32_t version
1065       Type::getInt32Ty(*Ctx),       // uint32_t checksum
1066   };
1067   FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), Args, false);
1068   return M->getOrInsertFunction("llvm_gcda_start_file", FTy,
1069                                 TLI->getAttrList(Ctx, {1, 2}, /*Signed=*/false));
1070 }
1071 
1072 FunctionCallee GCOVProfiler::getEmitFunctionFunc(const TargetLibraryInfo *TLI) {
1073   Type *Args[] = {
1074     Type::getInt32Ty(*Ctx),    // uint32_t ident
1075     Type::getInt32Ty(*Ctx),    // uint32_t func_checksum
1076     Type::getInt32Ty(*Ctx),    // uint32_t cfg_checksum
1077   };
1078   FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), Args, false);
1079   return M->getOrInsertFunction("llvm_gcda_emit_function", FTy,
1080                              TLI->getAttrList(Ctx, {0, 1, 2}, /*Signed=*/false));
1081 }
1082 
1083 FunctionCallee GCOVProfiler::getEmitArcsFunc(const TargetLibraryInfo *TLI) {
1084   Type *Args[] = {
1085       Type::getInt32Ty(*Ctx),       // uint32_t num_counters
1086       PointerType::getUnqual(*Ctx), // uint64_t *counters
1087   };
1088   FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), Args, false);
1089   return M->getOrInsertFunction("llvm_gcda_emit_arcs", FTy,
1090                                 TLI->getAttrList(Ctx, {0}, /*Signed=*/false));
1091 }
1092 
1093 FunctionCallee GCOVProfiler::getSummaryInfoFunc() {
1094   FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
1095   return M->getOrInsertFunction("llvm_gcda_summary_info", FTy);
1096 }
1097 
1098 FunctionCallee GCOVProfiler::getEndFileFunc() {
1099   FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
1100   return M->getOrInsertFunction("llvm_gcda_end_file", FTy);
1101 }
1102 
1103 Function *GCOVProfiler::insertCounterWriteout(
1104     ArrayRef<std::pair<GlobalVariable *, MDNode *> > CountersBySP) {
1105   FunctionType *WriteoutFTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
1106   Function *WriteoutF = M->getFunction("__llvm_gcov_writeout");
1107   if (!WriteoutF)
1108     WriteoutF =
1109         createInternalFunction(WriteoutFTy, "__llvm_gcov_writeout", "_ZTSFvvE");
1110   WriteoutF->addFnAttr(Attribute::NoInline);
1111 
1112   BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", WriteoutF);
1113   IRBuilder<> Builder(BB);
1114 
1115   auto *TLI = &GetTLI(*WriteoutF);
1116 
1117   FunctionCallee StartFile = getStartFileFunc(TLI);
1118   FunctionCallee EmitFunction = getEmitFunctionFunc(TLI);
1119   FunctionCallee EmitArcs = getEmitArcsFunc(TLI);
1120   FunctionCallee SummaryInfo = getSummaryInfoFunc();
1121   FunctionCallee EndFile = getEndFileFunc();
1122 
1123   NamedMDNode *CUNodes = M->getNamedMetadata("llvm.dbg.cu");
1124   if (!CUNodes) {
1125     Builder.CreateRetVoid();
1126     return WriteoutF;
1127   }
1128 
1129   // Collect the relevant data into a large constant data structure that we can
1130   // walk to write out everything.
1131   StructType *StartFileCallArgsTy = StructType::create(
1132       {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getInt32Ty()},
1133       "start_file_args_ty");
1134   StructType *EmitFunctionCallArgsTy = StructType::create(
1135       {Builder.getInt32Ty(), Builder.getInt32Ty(), Builder.getInt32Ty()},
1136       "emit_function_args_ty");
1137   auto *PtrTy = Builder.getPtrTy();
1138   StructType *EmitArcsCallArgsTy =
1139       StructType::create({Builder.getInt32Ty(), PtrTy}, "emit_arcs_args_ty");
1140   StructType *FileInfoTy = StructType::create(
1141       {StartFileCallArgsTy, Builder.getInt32Ty(), PtrTy, PtrTy}, "file_info");
1142 
1143   Constant *Zero32 = Builder.getInt32(0);
1144   // Build an explicit array of two zeros for use in ConstantExpr GEP building.
1145   Constant *TwoZero32s[] = {Zero32, Zero32};
1146 
1147   SmallVector<Constant *, 8> FileInfos;
1148   for (int i : llvm::seq<int>(0, CUNodes->getNumOperands())) {
1149     auto *CU = cast<DICompileUnit>(CUNodes->getOperand(i));
1150 
1151     // Skip module skeleton (and module) CUs.
1152     if (CU->getDWOId())
1153       continue;
1154 
1155     std::string FilenameGcda = mangleName(CU, GCovFileType::GCDA);
1156     uint32_t CfgChecksum = FileChecksums.empty() ? 0 : FileChecksums[i];
1157     auto *StartFileCallArgs = ConstantStruct::get(
1158         StartFileCallArgsTy,
1159         {Builder.CreateGlobalString(FilenameGcda),
1160          Builder.getInt32(endian::read32be(Options.Version)),
1161          Builder.getInt32(CfgChecksum)});
1162 
1163     SmallVector<Constant *, 8> EmitFunctionCallArgsArray;
1164     SmallVector<Constant *, 8> EmitArcsCallArgsArray;
1165     for (int j : llvm::seq<int>(0, CountersBySP.size())) {
1166       uint32_t FuncChecksum = Funcs.empty() ? 0 : Funcs[j]->getFuncChecksum();
1167       EmitFunctionCallArgsArray.push_back(ConstantStruct::get(
1168           EmitFunctionCallArgsTy,
1169           {Builder.getInt32(j),
1170            Builder.getInt32(FuncChecksum),
1171            Builder.getInt32(CfgChecksum)}));
1172 
1173       GlobalVariable *GV = CountersBySP[j].first;
1174       unsigned Arcs = cast<ArrayType>(GV->getValueType())->getNumElements();
1175       EmitArcsCallArgsArray.push_back(ConstantStruct::get(
1176           EmitArcsCallArgsTy,
1177           {Builder.getInt32(Arcs), ConstantExpr::getInBoundsGetElementPtr(
1178                                        GV->getValueType(), GV, TwoZero32s)}));
1179     }
1180     // Create global arrays for the two emit calls.
1181     int CountersSize = CountersBySP.size();
1182     assert(CountersSize == (int)EmitFunctionCallArgsArray.size() &&
1183            "Mismatched array size!");
1184     assert(CountersSize == (int)EmitArcsCallArgsArray.size() &&
1185            "Mismatched array size!");
1186     auto *EmitFunctionCallArgsArrayTy =
1187         ArrayType::get(EmitFunctionCallArgsTy, CountersSize);
1188     auto *EmitFunctionCallArgsArrayGV = new GlobalVariable(
1189         *M, EmitFunctionCallArgsArrayTy, /*isConstant*/ true,
1190         GlobalValue::InternalLinkage,
1191         ConstantArray::get(EmitFunctionCallArgsArrayTy,
1192                            EmitFunctionCallArgsArray),
1193         Twine("__llvm_internal_gcov_emit_function_args.") + Twine(i));
1194     auto *EmitArcsCallArgsArrayTy =
1195         ArrayType::get(EmitArcsCallArgsTy, CountersSize);
1196     EmitFunctionCallArgsArrayGV->setUnnamedAddr(
1197         GlobalValue::UnnamedAddr::Global);
1198     auto *EmitArcsCallArgsArrayGV = new GlobalVariable(
1199         *M, EmitArcsCallArgsArrayTy, /*isConstant*/ true,
1200         GlobalValue::InternalLinkage,
1201         ConstantArray::get(EmitArcsCallArgsArrayTy, EmitArcsCallArgsArray),
1202         Twine("__llvm_internal_gcov_emit_arcs_args.") + Twine(i));
1203     EmitArcsCallArgsArrayGV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
1204 
1205     FileInfos.push_back(ConstantStruct::get(
1206         FileInfoTy,
1207         {StartFileCallArgs, Builder.getInt32(CountersSize),
1208          ConstantExpr::getInBoundsGetElementPtr(EmitFunctionCallArgsArrayTy,
1209                                                 EmitFunctionCallArgsArrayGV,
1210                                                 TwoZero32s),
1211          ConstantExpr::getInBoundsGetElementPtr(
1212              EmitArcsCallArgsArrayTy, EmitArcsCallArgsArrayGV, TwoZero32s)}));
1213   }
1214 
1215   // If we didn't find anything to actually emit, bail on out.
1216   if (FileInfos.empty()) {
1217     Builder.CreateRetVoid();
1218     return WriteoutF;
1219   }
1220 
1221   // To simplify code, we cap the number of file infos we write out to fit
1222   // easily in a 32-bit signed integer. This gives consistent behavior between
1223   // 32-bit and 64-bit systems without requiring (potentially very slow) 64-bit
1224   // operations on 32-bit systems. It also seems unreasonable to try to handle
1225   // more than 2 billion files.
1226   if ((int64_t)FileInfos.size() > (int64_t)INT_MAX)
1227     FileInfos.resize(INT_MAX);
1228 
1229   // Create a global for the entire data structure so we can walk it more
1230   // easily.
1231   auto *FileInfoArrayTy = ArrayType::get(FileInfoTy, FileInfos.size());
1232   auto *FileInfoArrayGV = new GlobalVariable(
1233       *M, FileInfoArrayTy, /*isConstant*/ true, GlobalValue::InternalLinkage,
1234       ConstantArray::get(FileInfoArrayTy, FileInfos),
1235       "__llvm_internal_gcov_emit_file_info");
1236   FileInfoArrayGV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
1237 
1238   // Create the CFG for walking this data structure.
1239   auto *FileLoopHeader =
1240       BasicBlock::Create(*Ctx, "file.loop.header", WriteoutF);
1241   auto *CounterLoopHeader =
1242       BasicBlock::Create(*Ctx, "counter.loop.header", WriteoutF);
1243   auto *FileLoopLatch = BasicBlock::Create(*Ctx, "file.loop.latch", WriteoutF);
1244   auto *ExitBB = BasicBlock::Create(*Ctx, "exit", WriteoutF);
1245 
1246   // We always have at least one file, so just branch to the header.
1247   Builder.CreateBr(FileLoopHeader);
1248 
1249   // The index into the files structure is our loop induction variable.
1250   Builder.SetInsertPoint(FileLoopHeader);
1251   PHINode *IV = Builder.CreatePHI(Builder.getInt32Ty(), /*NumReservedValues*/ 2,
1252                                   "file_idx");
1253   IV->addIncoming(Builder.getInt32(0), BB);
1254   auto *FileInfoPtr = Builder.CreateInBoundsGEP(
1255       FileInfoArrayTy, FileInfoArrayGV, {Builder.getInt32(0), IV});
1256   auto *StartFileCallArgsPtr =
1257       Builder.CreateStructGEP(FileInfoTy, FileInfoPtr, 0, "start_file_args");
1258   auto *StartFileCall = Builder.CreateCall(
1259       StartFile,
1260       {Builder.CreateLoad(StartFileCallArgsTy->getElementType(0),
1261                           Builder.CreateStructGEP(StartFileCallArgsTy,
1262                                                   StartFileCallArgsPtr, 0),
1263                           "filename"),
1264        Builder.CreateLoad(StartFileCallArgsTy->getElementType(1),
1265                           Builder.CreateStructGEP(StartFileCallArgsTy,
1266                                                   StartFileCallArgsPtr, 1),
1267                           "version"),
1268        Builder.CreateLoad(StartFileCallArgsTy->getElementType(2),
1269                           Builder.CreateStructGEP(StartFileCallArgsTy,
1270                                                   StartFileCallArgsPtr, 2),
1271                           "stamp")});
1272   if (auto AK = TLI->getExtAttrForI32Param(false))
1273     StartFileCall->addParamAttr(2, AK);
1274   auto *NumCounters = Builder.CreateLoad(
1275       FileInfoTy->getElementType(1),
1276       Builder.CreateStructGEP(FileInfoTy, FileInfoPtr, 1), "num_ctrs");
1277   auto *EmitFunctionCallArgsArray =
1278       Builder.CreateLoad(FileInfoTy->getElementType(2),
1279                          Builder.CreateStructGEP(FileInfoTy, FileInfoPtr, 2),
1280                          "emit_function_args");
1281   auto *EmitArcsCallArgsArray = Builder.CreateLoad(
1282       FileInfoTy->getElementType(3),
1283       Builder.CreateStructGEP(FileInfoTy, FileInfoPtr, 3), "emit_arcs_args");
1284   auto *EnterCounterLoopCond =
1285       Builder.CreateICmpSLT(Builder.getInt32(0), NumCounters);
1286   Builder.CreateCondBr(EnterCounterLoopCond, CounterLoopHeader, FileLoopLatch);
1287 
1288   Builder.SetInsertPoint(CounterLoopHeader);
1289   auto *JV = Builder.CreatePHI(Builder.getInt32Ty(), /*NumReservedValues*/ 2,
1290                                "ctr_idx");
1291   JV->addIncoming(Builder.getInt32(0), FileLoopHeader);
1292   auto *EmitFunctionCallArgsPtr = Builder.CreateInBoundsGEP(
1293       EmitFunctionCallArgsTy, EmitFunctionCallArgsArray, JV);
1294   auto *EmitFunctionCall = Builder.CreateCall(
1295       EmitFunction,
1296       {Builder.CreateLoad(EmitFunctionCallArgsTy->getElementType(0),
1297                           Builder.CreateStructGEP(EmitFunctionCallArgsTy,
1298                                                   EmitFunctionCallArgsPtr, 0),
1299                           "ident"),
1300        Builder.CreateLoad(EmitFunctionCallArgsTy->getElementType(1),
1301                           Builder.CreateStructGEP(EmitFunctionCallArgsTy,
1302                                                   EmitFunctionCallArgsPtr, 1),
1303                           "func_checkssum"),
1304        Builder.CreateLoad(EmitFunctionCallArgsTy->getElementType(2),
1305                           Builder.CreateStructGEP(EmitFunctionCallArgsTy,
1306                                                   EmitFunctionCallArgsPtr, 2),
1307                           "cfg_checksum")});
1308   if (auto AK = TLI->getExtAttrForI32Param(false)) {
1309     EmitFunctionCall->addParamAttr(0, AK);
1310     EmitFunctionCall->addParamAttr(1, AK);
1311     EmitFunctionCall->addParamAttr(2, AK);
1312   }
1313   auto *EmitArcsCallArgsPtr =
1314       Builder.CreateInBoundsGEP(EmitArcsCallArgsTy, EmitArcsCallArgsArray, JV);
1315   auto *EmitArcsCall = Builder.CreateCall(
1316       EmitArcs,
1317       {Builder.CreateLoad(
1318            EmitArcsCallArgsTy->getElementType(0),
1319            Builder.CreateStructGEP(EmitArcsCallArgsTy, EmitArcsCallArgsPtr, 0),
1320            "num_counters"),
1321        Builder.CreateLoad(
1322            EmitArcsCallArgsTy->getElementType(1),
1323            Builder.CreateStructGEP(EmitArcsCallArgsTy, EmitArcsCallArgsPtr, 1),
1324            "counters")});
1325   if (auto AK = TLI->getExtAttrForI32Param(false))
1326     EmitArcsCall->addParamAttr(0, AK);
1327   auto *NextJV = Builder.CreateAdd(JV, Builder.getInt32(1));
1328   auto *CounterLoopCond = Builder.CreateICmpSLT(NextJV, NumCounters);
1329   Builder.CreateCondBr(CounterLoopCond, CounterLoopHeader, FileLoopLatch);
1330   JV->addIncoming(NextJV, CounterLoopHeader);
1331 
1332   Builder.SetInsertPoint(FileLoopLatch);
1333   Builder.CreateCall(SummaryInfo, {});
1334   Builder.CreateCall(EndFile, {});
1335   auto *NextIV = Builder.CreateAdd(IV, Builder.getInt32(1), "next_file_idx");
1336   auto *FileLoopCond =
1337       Builder.CreateICmpSLT(NextIV, Builder.getInt32(FileInfos.size()));
1338   Builder.CreateCondBr(FileLoopCond, FileLoopHeader, ExitBB);
1339   IV->addIncoming(NextIV, FileLoopLatch);
1340 
1341   Builder.SetInsertPoint(ExitBB);
1342   Builder.CreateRetVoid();
1343 
1344   return WriteoutF;
1345 }
1346 
1347 Function *GCOVProfiler::insertReset(
1348     ArrayRef<std::pair<GlobalVariable *, MDNode *>> CountersBySP) {
1349   FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
1350   Function *ResetF = M->getFunction("__llvm_gcov_reset");
1351   if (!ResetF)
1352     ResetF = createInternalFunction(FTy, "__llvm_gcov_reset", "_ZTSFvvE");
1353   ResetF->addFnAttr(Attribute::NoInline);
1354 
1355   BasicBlock *Entry = BasicBlock::Create(*Ctx, "entry", ResetF);
1356   IRBuilder<> Builder(Entry);
1357   LLVMContext &C = Entry->getContext();
1358 
1359   // Zero out the counters.
1360   for (const auto &I : CountersBySP) {
1361     GlobalVariable *GV = I.first;
1362     auto *GVTy = cast<ArrayType>(GV->getValueType());
1363     Builder.CreateMemSet(GV, Constant::getNullValue(Type::getInt8Ty(C)),
1364                          GVTy->getNumElements() *
1365                              GVTy->getElementType()->getScalarSizeInBits() / 8,
1366                          GV->getAlign());
1367   }
1368 
1369   Type *RetTy = ResetF->getReturnType();
1370   if (RetTy->isVoidTy())
1371     Builder.CreateRetVoid();
1372   else if (RetTy->isIntegerTy())
1373     // Used if __llvm_gcov_reset was implicitly declared.
1374     Builder.CreateRet(ConstantInt::get(RetTy, 0));
1375   else
1376     report_fatal_error("invalid return type for __llvm_gcov_reset");
1377 
1378   return ResetF;
1379 }
1380