xref: /freebsd-src/contrib/llvm-project/llvm/lib/ProfileData/SampleProfReader.cpp (revision e8d8bef961a50d4dc22501cde4fb9fb0be1b2532)
1 //===- SampleProfReader.cpp - Read LLVM sample profile data ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the class that reads LLVM sample profiles. It
10 // supports three file formats: text, binary and gcov.
11 //
12 // The textual representation is useful for debugging and testing purposes. The
13 // binary representation is more compact, resulting in smaller file sizes.
14 //
15 // The gcov encoding is the one generated by GCC's AutoFDO profile creation
16 // tool (https://github.com/google/autofdo)
17 //
18 // All three encodings can be used interchangeably as an input sample profile.
19 //
20 //===----------------------------------------------------------------------===//
21 
22 #include "llvm/ProfileData/SampleProfReader.h"
23 #include "llvm/ADT/DenseMap.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/ADT/StringRef.h"
26 #include "llvm/IR/ProfileSummary.h"
27 #include "llvm/ProfileData/ProfileCommon.h"
28 #include "llvm/ProfileData/SampleProf.h"
29 #include "llvm/Support/Compression.h"
30 #include "llvm/Support/ErrorOr.h"
31 #include "llvm/Support/LEB128.h"
32 #include "llvm/Support/LineIterator.h"
33 #include "llvm/Support/MD5.h"
34 #include "llvm/Support/MemoryBuffer.h"
35 #include "llvm/Support/raw_ostream.h"
36 #include <algorithm>
37 #include <cstddef>
38 #include <cstdint>
39 #include <limits>
40 #include <memory>
41 #include <system_error>
42 #include <vector>
43 
44 using namespace llvm;
45 using namespace sampleprof;
46 
47 /// Dump the function profile for \p FName.
48 ///
49 /// \param FName Name of the function to print.
50 /// \param OS Stream to emit the output to.
51 void SampleProfileReader::dumpFunctionProfile(StringRef FName,
52                                               raw_ostream &OS) {
53   OS << "Function: " << FName << ": " << Profiles[FName];
54 }
55 
56 /// Dump all the function profiles found on stream \p OS.
57 void SampleProfileReader::dump(raw_ostream &OS) {
58   for (const auto &I : Profiles)
59     dumpFunctionProfile(I.getKey(), OS);
60 }
61 
62 /// Parse \p Input as function head.
63 ///
64 /// Parse one line of \p Input, and update function name in \p FName,
65 /// function's total sample count in \p NumSamples, function's entry
66 /// count in \p NumHeadSamples.
67 ///
68 /// \returns true if parsing is successful.
69 static bool ParseHead(const StringRef &Input, StringRef &FName,
70                       uint64_t &NumSamples, uint64_t &NumHeadSamples) {
71   if (Input[0] == ' ')
72     return false;
73   size_t n2 = Input.rfind(':');
74   size_t n1 = Input.rfind(':', n2 - 1);
75   FName = Input.substr(0, n1);
76   if (Input.substr(n1 + 1, n2 - n1 - 1).getAsInteger(10, NumSamples))
77     return false;
78   if (Input.substr(n2 + 1).getAsInteger(10, NumHeadSamples))
79     return false;
80   return true;
81 }
82 
83 /// Returns true if line offset \p L is legal (only has 16 bits).
84 static bool isOffsetLegal(unsigned L) { return (L & 0xffff) == L; }
85 
86 /// Parse \p Input that contains metadata.
87 /// Possible metadata:
88 /// - CFG Checksum information:
89 ///     !CFGChecksum: 12345
90 /// Stores the FunctionHash (a.k.a. CFG Checksum) into \p FunctionHash.
91 static bool parseMetadata(const StringRef &Input, uint64_t &FunctionHash) {
92   if (!Input.startswith("!CFGChecksum:"))
93     return false;
94 
95   StringRef CFGInfo = Input.substr(strlen("!CFGChecksum:")).trim();
96   return !CFGInfo.getAsInteger(10, FunctionHash);
97 }
98 
99 enum class LineType {
100   CallSiteProfile,
101   BodyProfile,
102   Metadata,
103 };
104 
105 /// Parse \p Input as line sample.
106 ///
107 /// \param Input input line.
108 /// \param LineTy Type of this line.
109 /// \param Depth the depth of the inline stack.
110 /// \param NumSamples total samples of the line/inlined callsite.
111 /// \param LineOffset line offset to the start of the function.
112 /// \param Discriminator discriminator of the line.
113 /// \param TargetCountMap map from indirect call target to count.
114 /// \param FunctionHash the function's CFG hash, used by pseudo probe.
115 ///
116 /// returns true if parsing is successful.
117 static bool ParseLine(const StringRef &Input, LineType &LineTy, uint32_t &Depth,
118                       uint64_t &NumSamples, uint32_t &LineOffset,
119                       uint32_t &Discriminator, StringRef &CalleeName,
120                       DenseMap<StringRef, uint64_t> &TargetCountMap,
121                       uint64_t &FunctionHash) {
122   for (Depth = 0; Input[Depth] == ' '; Depth++)
123     ;
124   if (Depth == 0)
125     return false;
126 
127   if (Depth == 1 && Input[Depth] == '!') {
128     LineTy = LineType::Metadata;
129     return parseMetadata(Input.substr(Depth), FunctionHash);
130   }
131 
132   size_t n1 = Input.find(':');
133   StringRef Loc = Input.substr(Depth, n1 - Depth);
134   size_t n2 = Loc.find('.');
135   if (n2 == StringRef::npos) {
136     if (Loc.getAsInteger(10, LineOffset) || !isOffsetLegal(LineOffset))
137       return false;
138     Discriminator = 0;
139   } else {
140     if (Loc.substr(0, n2).getAsInteger(10, LineOffset))
141       return false;
142     if (Loc.substr(n2 + 1).getAsInteger(10, Discriminator))
143       return false;
144   }
145 
146   StringRef Rest = Input.substr(n1 + 2);
147   if (isDigit(Rest[0])) {
148     LineTy = LineType::BodyProfile;
149     size_t n3 = Rest.find(' ');
150     if (n3 == StringRef::npos) {
151       if (Rest.getAsInteger(10, NumSamples))
152         return false;
153     } else {
154       if (Rest.substr(0, n3).getAsInteger(10, NumSamples))
155         return false;
156     }
157     // Find call targets and their sample counts.
158     // Note: In some cases, there are symbols in the profile which are not
159     // mangled. To accommodate such cases, use colon + integer pairs as the
160     // anchor points.
161     // An example:
162     // _M_construct<char *>:1000 string_view<std::allocator<char> >:437
163     // ":1000" and ":437" are used as anchor points so the string above will
164     // be interpreted as
165     // target: _M_construct<char *>
166     // count: 1000
167     // target: string_view<std::allocator<char> >
168     // count: 437
169     while (n3 != StringRef::npos) {
170       n3 += Rest.substr(n3).find_first_not_of(' ');
171       Rest = Rest.substr(n3);
172       n3 = Rest.find_first_of(':');
173       if (n3 == StringRef::npos || n3 == 0)
174         return false;
175 
176       StringRef Target;
177       uint64_t count, n4;
178       while (true) {
179         // Get the segment after the current colon.
180         StringRef AfterColon = Rest.substr(n3 + 1);
181         // Get the target symbol before the current colon.
182         Target = Rest.substr(0, n3);
183         // Check if the word after the current colon is an integer.
184         n4 = AfterColon.find_first_of(' ');
185         n4 = (n4 != StringRef::npos) ? n3 + n4 + 1 : Rest.size();
186         StringRef WordAfterColon = Rest.substr(n3 + 1, n4 - n3 - 1);
187         if (!WordAfterColon.getAsInteger(10, count))
188           break;
189 
190         // Try to find the next colon.
191         uint64_t n5 = AfterColon.find_first_of(':');
192         if (n5 == StringRef::npos)
193           return false;
194         n3 += n5 + 1;
195       }
196 
197       // An anchor point is found. Save the {target, count} pair
198       TargetCountMap[Target] = count;
199       if (n4 == Rest.size())
200         break;
201       // Change n3 to the next blank space after colon + integer pair.
202       n3 = n4;
203     }
204   } else {
205     LineTy = LineType::CallSiteProfile;
206     size_t n3 = Rest.find_last_of(':');
207     CalleeName = Rest.substr(0, n3);
208     if (Rest.substr(n3 + 1).getAsInteger(10, NumSamples))
209       return false;
210   }
211   return true;
212 }
213 
214 /// Load samples from a text file.
215 ///
216 /// See the documentation at the top of the file for an explanation of
217 /// the expected format.
218 ///
219 /// \returns true if the file was loaded successfully, false otherwise.
220 std::error_code SampleProfileReaderText::readImpl() {
221   line_iterator LineIt(*Buffer, /*SkipBlanks=*/true, '#');
222   sampleprof_error Result = sampleprof_error::success;
223 
224   InlineCallStack InlineStack;
225   int CSProfileCount = 0;
226   int RegularProfileCount = 0;
227   uint32_t ProbeProfileCount = 0;
228 
229   // SeenMetadata tracks whether we have processed metadata for the current
230   // top-level function profile.
231   bool SeenMetadata = false;
232 
233   for (; !LineIt.is_at_eof(); ++LineIt) {
234     if ((*LineIt)[(*LineIt).find_first_not_of(' ')] == '#')
235       continue;
236     // Read the header of each function.
237     //
238     // Note that for function identifiers we are actually expecting
239     // mangled names, but we may not always get them. This happens when
240     // the compiler decides not to emit the function (e.g., it was inlined
241     // and removed). In this case, the binary will not have the linkage
242     // name for the function, so the profiler will emit the function's
243     // unmangled name, which may contain characters like ':' and '>' in its
244     // name (member functions, templates, etc).
245     //
246     // The only requirement we place on the identifier, then, is that it
247     // should not begin with a number.
248     if ((*LineIt)[0] != ' ') {
249       uint64_t NumSamples, NumHeadSamples;
250       StringRef FName;
251       if (!ParseHead(*LineIt, FName, NumSamples, NumHeadSamples)) {
252         reportError(LineIt.line_number(),
253                     "Expected 'mangled_name:NUM:NUM', found " + *LineIt);
254         return sampleprof_error::malformed;
255       }
256       SeenMetadata = false;
257       SampleContext FContext(FName);
258       if (FContext.hasContext())
259         ++CSProfileCount;
260       else
261         ++RegularProfileCount;
262       Profiles[FContext] = FunctionSamples();
263       FunctionSamples &FProfile = Profiles[FContext];
264       FProfile.setName(FContext.getName());
265       FProfile.setContext(FContext);
266       MergeResult(Result, FProfile.addTotalSamples(NumSamples));
267       MergeResult(Result, FProfile.addHeadSamples(NumHeadSamples));
268       InlineStack.clear();
269       InlineStack.push_back(&FProfile);
270     } else {
271       uint64_t NumSamples;
272       StringRef FName;
273       DenseMap<StringRef, uint64_t> TargetCountMap;
274       uint32_t Depth, LineOffset, Discriminator;
275       LineType LineTy;
276       uint64_t FunctionHash;
277       if (!ParseLine(*LineIt, LineTy, Depth, NumSamples, LineOffset,
278                      Discriminator, FName, TargetCountMap, FunctionHash)) {
279         reportError(LineIt.line_number(),
280                     "Expected 'NUM[.NUM]: NUM[ mangled_name:NUM]*', found " +
281                         *LineIt);
282         return sampleprof_error::malformed;
283       }
284       if (SeenMetadata && LineTy != LineType::Metadata) {
285         // Metadata must be put at the end of a function profile.
286         reportError(LineIt.line_number(),
287                     "Found non-metadata after metadata: " + *LineIt);
288         return sampleprof_error::malformed;
289       }
290       while (InlineStack.size() > Depth) {
291         InlineStack.pop_back();
292       }
293       switch (LineTy) {
294       case LineType::CallSiteProfile: {
295         FunctionSamples &FSamples = InlineStack.back()->functionSamplesAt(
296             LineLocation(LineOffset, Discriminator))[std::string(FName)];
297         FSamples.setName(FName);
298         MergeResult(Result, FSamples.addTotalSamples(NumSamples));
299         InlineStack.push_back(&FSamples);
300         break;
301       }
302       case LineType::BodyProfile: {
303         while (InlineStack.size() > Depth) {
304           InlineStack.pop_back();
305         }
306         FunctionSamples &FProfile = *InlineStack.back();
307         for (const auto &name_count : TargetCountMap) {
308           MergeResult(Result, FProfile.addCalledTargetSamples(
309                                   LineOffset, Discriminator, name_count.first,
310                                   name_count.second));
311         }
312         MergeResult(Result, FProfile.addBodySamples(LineOffset, Discriminator,
313                                                     NumSamples));
314         break;
315       }
316       case LineType::Metadata: {
317         FunctionSamples &FProfile = *InlineStack.back();
318         FProfile.setFunctionHash(FunctionHash);
319         ++ProbeProfileCount;
320         SeenMetadata = true;
321         break;
322       }
323       }
324     }
325   }
326 
327   assert((RegularProfileCount == 0 || CSProfileCount == 0) &&
328          "Cannot have both context-sensitive and regular profile");
329   ProfileIsCS = (CSProfileCount > 0);
330   assert((ProbeProfileCount == 0 || ProbeProfileCount == Profiles.size()) &&
331          "Cannot have both probe-based profiles and regular profiles");
332   ProfileIsProbeBased = (ProbeProfileCount > 0);
333   FunctionSamples::ProfileIsProbeBased = ProfileIsProbeBased;
334 
335   if (Result == sampleprof_error::success)
336     computeSummary();
337 
338   return Result;
339 }
340 
341 bool SampleProfileReaderText::hasFormat(const MemoryBuffer &Buffer) {
342   bool result = false;
343 
344   // Check that the first non-comment line is a valid function header.
345   line_iterator LineIt(Buffer, /*SkipBlanks=*/true, '#');
346   if (!LineIt.is_at_eof()) {
347     if ((*LineIt)[0] != ' ') {
348       uint64_t NumSamples, NumHeadSamples;
349       StringRef FName;
350       result = ParseHead(*LineIt, FName, NumSamples, NumHeadSamples);
351     }
352   }
353 
354   return result;
355 }
356 
357 template <typename T> ErrorOr<T> SampleProfileReaderBinary::readNumber() {
358   unsigned NumBytesRead = 0;
359   std::error_code EC;
360   uint64_t Val = decodeULEB128(Data, &NumBytesRead);
361 
362   if (Val > std::numeric_limits<T>::max())
363     EC = sampleprof_error::malformed;
364   else if (Data + NumBytesRead > End)
365     EC = sampleprof_error::truncated;
366   else
367     EC = sampleprof_error::success;
368 
369   if (EC) {
370     reportError(0, EC.message());
371     return EC;
372   }
373 
374   Data += NumBytesRead;
375   return static_cast<T>(Val);
376 }
377 
378 ErrorOr<StringRef> SampleProfileReaderBinary::readString() {
379   std::error_code EC;
380   StringRef Str(reinterpret_cast<const char *>(Data));
381   if (Data + Str.size() + 1 > End) {
382     EC = sampleprof_error::truncated;
383     reportError(0, EC.message());
384     return EC;
385   }
386 
387   Data += Str.size() + 1;
388   return Str;
389 }
390 
391 template <typename T>
392 ErrorOr<T> SampleProfileReaderBinary::readUnencodedNumber() {
393   std::error_code EC;
394 
395   if (Data + sizeof(T) > End) {
396     EC = sampleprof_error::truncated;
397     reportError(0, EC.message());
398     return EC;
399   }
400 
401   using namespace support;
402   T Val = endian::readNext<T, little, unaligned>(Data);
403   return Val;
404 }
405 
406 template <typename T>
407 inline ErrorOr<uint32_t> SampleProfileReaderBinary::readStringIndex(T &Table) {
408   std::error_code EC;
409   auto Idx = readNumber<uint32_t>();
410   if (std::error_code EC = Idx.getError())
411     return EC;
412   if (*Idx >= Table.size())
413     return sampleprof_error::truncated_name_table;
414   return *Idx;
415 }
416 
417 ErrorOr<StringRef> SampleProfileReaderBinary::readStringFromTable() {
418   auto Idx = readStringIndex(NameTable);
419   if (std::error_code EC = Idx.getError())
420     return EC;
421 
422   return NameTable[*Idx];
423 }
424 
425 ErrorOr<StringRef> SampleProfileReaderExtBinaryBase::readStringFromTable() {
426   if (!FixedLengthMD5)
427     return SampleProfileReaderBinary::readStringFromTable();
428 
429   // read NameTable index.
430   auto Idx = readStringIndex(NameTable);
431   if (std::error_code EC = Idx.getError())
432     return EC;
433 
434   // Check whether the name to be accessed has been accessed before,
435   // if not, read it from memory directly.
436   StringRef &SR = NameTable[*Idx];
437   if (SR.empty()) {
438     const uint8_t *SavedData = Data;
439     Data = MD5NameMemStart + ((*Idx) * sizeof(uint64_t));
440     auto FID = readUnencodedNumber<uint64_t>();
441     if (std::error_code EC = FID.getError())
442       return EC;
443     // Save the string converted from uint64_t in MD5StringBuf. All the
444     // references to the name are all StringRefs refering to the string
445     // in MD5StringBuf.
446     MD5StringBuf->push_back(std::to_string(*FID));
447     SR = MD5StringBuf->back();
448     Data = SavedData;
449   }
450   return SR;
451 }
452 
453 ErrorOr<StringRef> SampleProfileReaderCompactBinary::readStringFromTable() {
454   auto Idx = readStringIndex(NameTable);
455   if (std::error_code EC = Idx.getError())
456     return EC;
457 
458   return StringRef(NameTable[*Idx]);
459 }
460 
461 std::error_code
462 SampleProfileReaderBinary::readProfile(FunctionSamples &FProfile) {
463   auto NumSamples = readNumber<uint64_t>();
464   if (std::error_code EC = NumSamples.getError())
465     return EC;
466   FProfile.addTotalSamples(*NumSamples);
467 
468   // Read the samples in the body.
469   auto NumRecords = readNumber<uint32_t>();
470   if (std::error_code EC = NumRecords.getError())
471     return EC;
472 
473   for (uint32_t I = 0; I < *NumRecords; ++I) {
474     auto LineOffset = readNumber<uint64_t>();
475     if (std::error_code EC = LineOffset.getError())
476       return EC;
477 
478     if (!isOffsetLegal(*LineOffset)) {
479       return std::error_code();
480     }
481 
482     auto Discriminator = readNumber<uint64_t>();
483     if (std::error_code EC = Discriminator.getError())
484       return EC;
485 
486     auto NumSamples = readNumber<uint64_t>();
487     if (std::error_code EC = NumSamples.getError())
488       return EC;
489 
490     auto NumCalls = readNumber<uint32_t>();
491     if (std::error_code EC = NumCalls.getError())
492       return EC;
493 
494     for (uint32_t J = 0; J < *NumCalls; ++J) {
495       auto CalledFunction(readStringFromTable());
496       if (std::error_code EC = CalledFunction.getError())
497         return EC;
498 
499       auto CalledFunctionSamples = readNumber<uint64_t>();
500       if (std::error_code EC = CalledFunctionSamples.getError())
501         return EC;
502 
503       FProfile.addCalledTargetSamples(*LineOffset, *Discriminator,
504                                       *CalledFunction, *CalledFunctionSamples);
505     }
506 
507     FProfile.addBodySamples(*LineOffset, *Discriminator, *NumSamples);
508   }
509 
510   // Read all the samples for inlined function calls.
511   auto NumCallsites = readNumber<uint32_t>();
512   if (std::error_code EC = NumCallsites.getError())
513     return EC;
514 
515   for (uint32_t J = 0; J < *NumCallsites; ++J) {
516     auto LineOffset = readNumber<uint64_t>();
517     if (std::error_code EC = LineOffset.getError())
518       return EC;
519 
520     auto Discriminator = readNumber<uint64_t>();
521     if (std::error_code EC = Discriminator.getError())
522       return EC;
523 
524     auto FName(readStringFromTable());
525     if (std::error_code EC = FName.getError())
526       return EC;
527 
528     FunctionSamples &CalleeProfile = FProfile.functionSamplesAt(
529         LineLocation(*LineOffset, *Discriminator))[std::string(*FName)];
530     CalleeProfile.setName(*FName);
531     if (std::error_code EC = readProfile(CalleeProfile))
532       return EC;
533   }
534 
535   return sampleprof_error::success;
536 }
537 
538 std::error_code
539 SampleProfileReaderBinary::readFuncProfile(const uint8_t *Start) {
540   Data = Start;
541   auto NumHeadSamples = readNumber<uint64_t>();
542   if (std::error_code EC = NumHeadSamples.getError())
543     return EC;
544 
545   auto FName(readStringFromTable());
546   if (std::error_code EC = FName.getError())
547     return EC;
548 
549   Profiles[*FName] = FunctionSamples();
550   FunctionSamples &FProfile = Profiles[*FName];
551   FProfile.setName(*FName);
552 
553   FProfile.addHeadSamples(*NumHeadSamples);
554 
555   if (std::error_code EC = readProfile(FProfile))
556     return EC;
557   return sampleprof_error::success;
558 }
559 
560 std::error_code SampleProfileReaderBinary::readImpl() {
561   while (!at_eof()) {
562     if (std::error_code EC = readFuncProfile(Data))
563       return EC;
564   }
565 
566   return sampleprof_error::success;
567 }
568 
569 std::error_code SampleProfileReaderExtBinaryBase::readOneSection(
570     const uint8_t *Start, uint64_t Size, const SecHdrTableEntry &Entry) {
571   Data = Start;
572   End = Start + Size;
573   switch (Entry.Type) {
574   case SecProfSummary:
575     if (std::error_code EC = readSummary())
576       return EC;
577     if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagPartial))
578       Summary->setPartialProfile(true);
579     break;
580   case SecNameTable: {
581     FixedLengthMD5 =
582         hasSecFlag(Entry, SecNameTableFlags::SecFlagFixedLengthMD5);
583     bool UseMD5 = hasSecFlag(Entry, SecNameTableFlags::SecFlagMD5Name);
584     assert((!FixedLengthMD5 || UseMD5) &&
585            "If FixedLengthMD5 is true, UseMD5 has to be true");
586     if (std::error_code EC = readNameTableSec(UseMD5))
587       return EC;
588     break;
589   }
590   case SecLBRProfile:
591     if (std::error_code EC = readFuncProfiles())
592       return EC;
593     break;
594   case SecFuncOffsetTable:
595     if (std::error_code EC = readFuncOffsetTable())
596       return EC;
597     break;
598   case SecFuncMetadata:
599     ProfileIsProbeBased =
600         hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagIsProbeBased);
601     FunctionSamples::ProfileIsProbeBased = ProfileIsProbeBased;
602     if (std::error_code EC = readFuncMetadata())
603       return EC;
604     break;
605   case SecProfileSymbolList:
606     if (std::error_code EC = readProfileSymbolList())
607       return EC;
608     break;
609   default:
610     if (std::error_code EC = readCustomSection(Entry))
611       return EC;
612     break;
613   }
614   return sampleprof_error::success;
615 }
616 
617 void SampleProfileReaderExtBinaryBase::collectFuncsFrom(const Module &M) {
618   UseAllFuncs = false;
619   FuncsToUse.clear();
620   for (auto &F : M)
621     FuncsToUse.insert(FunctionSamples::getCanonicalFnName(F));
622 }
623 
624 std::error_code SampleProfileReaderExtBinaryBase::readFuncOffsetTable() {
625   // If there are more than one FuncOffsetTable, the profile read associated
626   // with previous FuncOffsetTable has to be done before next FuncOffsetTable
627   // is read.
628   FuncOffsetTable.clear();
629 
630   auto Size = readNumber<uint64_t>();
631   if (std::error_code EC = Size.getError())
632     return EC;
633 
634   FuncOffsetTable.reserve(*Size);
635   for (uint32_t I = 0; I < *Size; ++I) {
636     auto FName(readStringFromTable());
637     if (std::error_code EC = FName.getError())
638       return EC;
639 
640     auto Offset = readNumber<uint64_t>();
641     if (std::error_code EC = Offset.getError())
642       return EC;
643 
644     FuncOffsetTable[*FName] = *Offset;
645   }
646   return sampleprof_error::success;
647 }
648 
649 std::error_code SampleProfileReaderExtBinaryBase::readFuncProfiles() {
650   const uint8_t *Start = Data;
651   if (UseAllFuncs) {
652     while (Data < End) {
653       if (std::error_code EC = readFuncProfile(Data))
654         return EC;
655     }
656     assert(Data == End && "More data is read than expected");
657     return sampleprof_error::success;
658   }
659 
660   if (Remapper) {
661     for (auto Name : FuncsToUse) {
662       Remapper->insert(Name);
663     }
664   }
665 
666   if (useMD5()) {
667     for (auto Name : FuncsToUse) {
668       auto GUID = std::to_string(MD5Hash(Name));
669       auto iter = FuncOffsetTable.find(StringRef(GUID));
670       if (iter == FuncOffsetTable.end())
671         continue;
672       const uint8_t *FuncProfileAddr = Start + iter->second;
673       assert(FuncProfileAddr < End && "out of LBRProfile section");
674       if (std::error_code EC = readFuncProfile(FuncProfileAddr))
675         return EC;
676     }
677   } else {
678     for (auto NameOffset : FuncOffsetTable) {
679       auto FuncName = NameOffset.first;
680       if (!FuncsToUse.count(FuncName) &&
681           (!Remapper || !Remapper->exist(FuncName)))
682         continue;
683       const uint8_t *FuncProfileAddr = Start + NameOffset.second;
684       assert(FuncProfileAddr < End && "out of LBRProfile section");
685       if (std::error_code EC = readFuncProfile(FuncProfileAddr))
686         return EC;
687     }
688   }
689 
690   Data = End;
691   return sampleprof_error::success;
692 }
693 
694 std::error_code SampleProfileReaderExtBinaryBase::readProfileSymbolList() {
695   if (!ProfSymList)
696     ProfSymList = std::make_unique<ProfileSymbolList>();
697 
698   if (std::error_code EC = ProfSymList->read(Data, End - Data))
699     return EC;
700 
701   Data = End;
702   return sampleprof_error::success;
703 }
704 
705 std::error_code SampleProfileReaderExtBinaryBase::decompressSection(
706     const uint8_t *SecStart, const uint64_t SecSize,
707     const uint8_t *&DecompressBuf, uint64_t &DecompressBufSize) {
708   Data = SecStart;
709   End = SecStart + SecSize;
710   auto DecompressSize = readNumber<uint64_t>();
711   if (std::error_code EC = DecompressSize.getError())
712     return EC;
713   DecompressBufSize = *DecompressSize;
714 
715   auto CompressSize = readNumber<uint64_t>();
716   if (std::error_code EC = CompressSize.getError())
717     return EC;
718 
719   if (!llvm::zlib::isAvailable())
720     return sampleprof_error::zlib_unavailable;
721 
722   StringRef CompressedStrings(reinterpret_cast<const char *>(Data),
723                               *CompressSize);
724   char *Buffer = Allocator.Allocate<char>(DecompressBufSize);
725   size_t UCSize = DecompressBufSize;
726   llvm::Error E =
727       zlib::uncompress(CompressedStrings, Buffer, UCSize);
728   if (E)
729     return sampleprof_error::uncompress_failed;
730   DecompressBuf = reinterpret_cast<const uint8_t *>(Buffer);
731   return sampleprof_error::success;
732 }
733 
734 std::error_code SampleProfileReaderExtBinaryBase::readImpl() {
735   const uint8_t *BufStart =
736       reinterpret_cast<const uint8_t *>(Buffer->getBufferStart());
737 
738   for (auto &Entry : SecHdrTable) {
739     // Skip empty section.
740     if (!Entry.Size)
741       continue;
742 
743     // Skip sections without context when SkipFlatProf is true.
744     if (SkipFlatProf && hasSecFlag(Entry, SecCommonFlags::SecFlagFlat))
745       continue;
746 
747     const uint8_t *SecStart = BufStart + Entry.Offset;
748     uint64_t SecSize = Entry.Size;
749 
750     // If the section is compressed, decompress it into a buffer
751     // DecompressBuf before reading the actual data. The pointee of
752     // 'Data' will be changed to buffer hold by DecompressBuf
753     // temporarily when reading the actual data.
754     bool isCompressed = hasSecFlag(Entry, SecCommonFlags::SecFlagCompress);
755     if (isCompressed) {
756       const uint8_t *DecompressBuf;
757       uint64_t DecompressBufSize;
758       if (std::error_code EC = decompressSection(
759               SecStart, SecSize, DecompressBuf, DecompressBufSize))
760         return EC;
761       SecStart = DecompressBuf;
762       SecSize = DecompressBufSize;
763     }
764 
765     if (std::error_code EC = readOneSection(SecStart, SecSize, Entry))
766       return EC;
767     if (Data != SecStart + SecSize)
768       return sampleprof_error::malformed;
769 
770     // Change the pointee of 'Data' from DecompressBuf to original Buffer.
771     if (isCompressed) {
772       Data = BufStart + Entry.Offset;
773       End = BufStart + Buffer->getBufferSize();
774     }
775   }
776 
777   return sampleprof_error::success;
778 }
779 
780 std::error_code SampleProfileReaderCompactBinary::readImpl() {
781   std::vector<uint64_t> OffsetsToUse;
782   if (UseAllFuncs) {
783     for (auto FuncEntry : FuncOffsetTable) {
784       OffsetsToUse.push_back(FuncEntry.second);
785     }
786   }
787   else {
788     for (auto Name : FuncsToUse) {
789       auto GUID = std::to_string(MD5Hash(Name));
790       auto iter = FuncOffsetTable.find(StringRef(GUID));
791       if (iter == FuncOffsetTable.end())
792         continue;
793       OffsetsToUse.push_back(iter->second);
794     }
795   }
796 
797   for (auto Offset : OffsetsToUse) {
798     const uint8_t *SavedData = Data;
799     if (std::error_code EC = readFuncProfile(
800             reinterpret_cast<const uint8_t *>(Buffer->getBufferStart()) +
801             Offset))
802       return EC;
803     Data = SavedData;
804   }
805   return sampleprof_error::success;
806 }
807 
808 std::error_code SampleProfileReaderRawBinary::verifySPMagic(uint64_t Magic) {
809   if (Magic == SPMagic())
810     return sampleprof_error::success;
811   return sampleprof_error::bad_magic;
812 }
813 
814 std::error_code SampleProfileReaderExtBinary::verifySPMagic(uint64_t Magic) {
815   if (Magic == SPMagic(SPF_Ext_Binary))
816     return sampleprof_error::success;
817   return sampleprof_error::bad_magic;
818 }
819 
820 std::error_code
821 SampleProfileReaderCompactBinary::verifySPMagic(uint64_t Magic) {
822   if (Magic == SPMagic(SPF_Compact_Binary))
823     return sampleprof_error::success;
824   return sampleprof_error::bad_magic;
825 }
826 
827 std::error_code SampleProfileReaderBinary::readNameTable() {
828   auto Size = readNumber<uint32_t>();
829   if (std::error_code EC = Size.getError())
830     return EC;
831   NameTable.reserve(*Size + NameTable.size());
832   for (uint32_t I = 0; I < *Size; ++I) {
833     auto Name(readString());
834     if (std::error_code EC = Name.getError())
835       return EC;
836     NameTable.push_back(*Name);
837   }
838 
839   return sampleprof_error::success;
840 }
841 
842 std::error_code SampleProfileReaderExtBinaryBase::readMD5NameTable() {
843   auto Size = readNumber<uint64_t>();
844   if (std::error_code EC = Size.getError())
845     return EC;
846   MD5StringBuf = std::make_unique<std::vector<std::string>>();
847   MD5StringBuf->reserve(*Size);
848   if (FixedLengthMD5) {
849     // Preallocate and initialize NameTable so we can check whether a name
850     // index has been read before by checking whether the element in the
851     // NameTable is empty, meanwhile readStringIndex can do the boundary
852     // check using the size of NameTable.
853     NameTable.resize(*Size + NameTable.size());
854 
855     MD5NameMemStart = Data;
856     Data = Data + (*Size) * sizeof(uint64_t);
857     return sampleprof_error::success;
858   }
859   NameTable.reserve(*Size);
860   for (uint32_t I = 0; I < *Size; ++I) {
861     auto FID = readNumber<uint64_t>();
862     if (std::error_code EC = FID.getError())
863       return EC;
864     MD5StringBuf->push_back(std::to_string(*FID));
865     // NameTable is a vector of StringRef. Here it is pushing back a
866     // StringRef initialized with the last string in MD5stringBuf.
867     NameTable.push_back(MD5StringBuf->back());
868   }
869   return sampleprof_error::success;
870 }
871 
872 std::error_code SampleProfileReaderExtBinaryBase::readNameTableSec(bool IsMD5) {
873   if (IsMD5)
874     return readMD5NameTable();
875   return SampleProfileReaderBinary::readNameTable();
876 }
877 
878 std::error_code SampleProfileReaderExtBinaryBase::readFuncMetadata() {
879   if (!ProfileIsProbeBased)
880     return sampleprof_error::success;
881   for (unsigned I = 0; I < Profiles.size(); ++I) {
882     auto FName(readStringFromTable());
883     if (std::error_code EC = FName.getError())
884       return EC;
885 
886     auto Checksum = readNumber<uint64_t>();
887     if (std::error_code EC = Checksum.getError())
888       return EC;
889 
890     Profiles[*FName].setFunctionHash(*Checksum);
891   }
892   return sampleprof_error::success;
893 }
894 
895 std::error_code SampleProfileReaderCompactBinary::readNameTable() {
896   auto Size = readNumber<uint64_t>();
897   if (std::error_code EC = Size.getError())
898     return EC;
899   NameTable.reserve(*Size);
900   for (uint32_t I = 0; I < *Size; ++I) {
901     auto FID = readNumber<uint64_t>();
902     if (std::error_code EC = FID.getError())
903       return EC;
904     NameTable.push_back(std::to_string(*FID));
905   }
906   return sampleprof_error::success;
907 }
908 
909 std::error_code
910 SampleProfileReaderExtBinaryBase::readSecHdrTableEntry(uint32_t Idx) {
911   SecHdrTableEntry Entry;
912   auto Type = readUnencodedNumber<uint64_t>();
913   if (std::error_code EC = Type.getError())
914     return EC;
915   Entry.Type = static_cast<SecType>(*Type);
916 
917   auto Flags = readUnencodedNumber<uint64_t>();
918   if (std::error_code EC = Flags.getError())
919     return EC;
920   Entry.Flags = *Flags;
921 
922   auto Offset = readUnencodedNumber<uint64_t>();
923   if (std::error_code EC = Offset.getError())
924     return EC;
925   Entry.Offset = *Offset;
926 
927   auto Size = readUnencodedNumber<uint64_t>();
928   if (std::error_code EC = Size.getError())
929     return EC;
930   Entry.Size = *Size;
931 
932   Entry.LayoutIndex = Idx;
933   SecHdrTable.push_back(std::move(Entry));
934   return sampleprof_error::success;
935 }
936 
937 std::error_code SampleProfileReaderExtBinaryBase::readSecHdrTable() {
938   auto EntryNum = readUnencodedNumber<uint64_t>();
939   if (std::error_code EC = EntryNum.getError())
940     return EC;
941 
942   for (uint32_t i = 0; i < (*EntryNum); i++)
943     if (std::error_code EC = readSecHdrTableEntry(i))
944       return EC;
945 
946   return sampleprof_error::success;
947 }
948 
949 std::error_code SampleProfileReaderExtBinaryBase::readHeader() {
950   const uint8_t *BufStart =
951       reinterpret_cast<const uint8_t *>(Buffer->getBufferStart());
952   Data = BufStart;
953   End = BufStart + Buffer->getBufferSize();
954 
955   if (std::error_code EC = readMagicIdent())
956     return EC;
957 
958   if (std::error_code EC = readSecHdrTable())
959     return EC;
960 
961   return sampleprof_error::success;
962 }
963 
964 uint64_t SampleProfileReaderExtBinaryBase::getSectionSize(SecType Type) {
965   uint64_t Size = 0;
966   for (auto &Entry : SecHdrTable) {
967     if (Entry.Type == Type)
968       Size += Entry.Size;
969   }
970   return Size;
971 }
972 
973 uint64_t SampleProfileReaderExtBinaryBase::getFileSize() {
974   // Sections in SecHdrTable is not necessarily in the same order as
975   // sections in the profile because section like FuncOffsetTable needs
976   // to be written after section LBRProfile but needs to be read before
977   // section LBRProfile, so we cannot simply use the last entry in
978   // SecHdrTable to calculate the file size.
979   uint64_t FileSize = 0;
980   for (auto &Entry : SecHdrTable) {
981     FileSize = std::max(Entry.Offset + Entry.Size, FileSize);
982   }
983   return FileSize;
984 }
985 
986 static std::string getSecFlagsStr(const SecHdrTableEntry &Entry) {
987   std::string Flags;
988   if (hasSecFlag(Entry, SecCommonFlags::SecFlagCompress))
989     Flags.append("{compressed,");
990   else
991     Flags.append("{");
992 
993   if (hasSecFlag(Entry, SecCommonFlags::SecFlagFlat))
994     Flags.append("flat,");
995 
996   switch (Entry.Type) {
997   case SecNameTable:
998     if (hasSecFlag(Entry, SecNameTableFlags::SecFlagFixedLengthMD5))
999       Flags.append("fixlenmd5,");
1000     else if (hasSecFlag(Entry, SecNameTableFlags::SecFlagMD5Name))
1001       Flags.append("md5,");
1002     break;
1003   case SecProfSummary:
1004     if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagPartial))
1005       Flags.append("partial,");
1006     break;
1007   default:
1008     break;
1009   }
1010   char &last = Flags.back();
1011   if (last == ',')
1012     last = '}';
1013   else
1014     Flags.append("}");
1015   return Flags;
1016 }
1017 
1018 bool SampleProfileReaderExtBinaryBase::dumpSectionInfo(raw_ostream &OS) {
1019   uint64_t TotalSecsSize = 0;
1020   for (auto &Entry : SecHdrTable) {
1021     OS << getSecName(Entry.Type) << " - Offset: " << Entry.Offset
1022        << ", Size: " << Entry.Size << ", Flags: " << getSecFlagsStr(Entry)
1023        << "\n";
1024     ;
1025     TotalSecsSize += Entry.Size;
1026   }
1027   uint64_t HeaderSize = SecHdrTable.front().Offset;
1028   assert(HeaderSize + TotalSecsSize == getFileSize() &&
1029          "Size of 'header + sections' doesn't match the total size of profile");
1030 
1031   OS << "Header Size: " << HeaderSize << "\n";
1032   OS << "Total Sections Size: " << TotalSecsSize << "\n";
1033   OS << "File Size: " << getFileSize() << "\n";
1034   return true;
1035 }
1036 
1037 std::error_code SampleProfileReaderBinary::readMagicIdent() {
1038   // Read and check the magic identifier.
1039   auto Magic = readNumber<uint64_t>();
1040   if (std::error_code EC = Magic.getError())
1041     return EC;
1042   else if (std::error_code EC = verifySPMagic(*Magic))
1043     return EC;
1044 
1045   // Read the version number.
1046   auto Version = readNumber<uint64_t>();
1047   if (std::error_code EC = Version.getError())
1048     return EC;
1049   else if (*Version != SPVersion())
1050     return sampleprof_error::unsupported_version;
1051 
1052   return sampleprof_error::success;
1053 }
1054 
1055 std::error_code SampleProfileReaderBinary::readHeader() {
1056   Data = reinterpret_cast<const uint8_t *>(Buffer->getBufferStart());
1057   End = Data + Buffer->getBufferSize();
1058 
1059   if (std::error_code EC = readMagicIdent())
1060     return EC;
1061 
1062   if (std::error_code EC = readSummary())
1063     return EC;
1064 
1065   if (std::error_code EC = readNameTable())
1066     return EC;
1067   return sampleprof_error::success;
1068 }
1069 
1070 std::error_code SampleProfileReaderCompactBinary::readHeader() {
1071   SampleProfileReaderBinary::readHeader();
1072   if (std::error_code EC = readFuncOffsetTable())
1073     return EC;
1074   return sampleprof_error::success;
1075 }
1076 
1077 std::error_code SampleProfileReaderCompactBinary::readFuncOffsetTable() {
1078   auto TableOffset = readUnencodedNumber<uint64_t>();
1079   if (std::error_code EC = TableOffset.getError())
1080     return EC;
1081 
1082   const uint8_t *SavedData = Data;
1083   const uint8_t *TableStart =
1084       reinterpret_cast<const uint8_t *>(Buffer->getBufferStart()) +
1085       *TableOffset;
1086   Data = TableStart;
1087 
1088   auto Size = readNumber<uint64_t>();
1089   if (std::error_code EC = Size.getError())
1090     return EC;
1091 
1092   FuncOffsetTable.reserve(*Size);
1093   for (uint32_t I = 0; I < *Size; ++I) {
1094     auto FName(readStringFromTable());
1095     if (std::error_code EC = FName.getError())
1096       return EC;
1097 
1098     auto Offset = readNumber<uint64_t>();
1099     if (std::error_code EC = Offset.getError())
1100       return EC;
1101 
1102     FuncOffsetTable[*FName] = *Offset;
1103   }
1104   End = TableStart;
1105   Data = SavedData;
1106   return sampleprof_error::success;
1107 }
1108 
1109 void SampleProfileReaderCompactBinary::collectFuncsFrom(const Module &M) {
1110   UseAllFuncs = false;
1111   FuncsToUse.clear();
1112   for (auto &F : M)
1113     FuncsToUse.insert(FunctionSamples::getCanonicalFnName(F));
1114 }
1115 
1116 std::error_code SampleProfileReaderBinary::readSummaryEntry(
1117     std::vector<ProfileSummaryEntry> &Entries) {
1118   auto Cutoff = readNumber<uint64_t>();
1119   if (std::error_code EC = Cutoff.getError())
1120     return EC;
1121 
1122   auto MinBlockCount = readNumber<uint64_t>();
1123   if (std::error_code EC = MinBlockCount.getError())
1124     return EC;
1125 
1126   auto NumBlocks = readNumber<uint64_t>();
1127   if (std::error_code EC = NumBlocks.getError())
1128     return EC;
1129 
1130   Entries.emplace_back(*Cutoff, *MinBlockCount, *NumBlocks);
1131   return sampleprof_error::success;
1132 }
1133 
1134 std::error_code SampleProfileReaderBinary::readSummary() {
1135   auto TotalCount = readNumber<uint64_t>();
1136   if (std::error_code EC = TotalCount.getError())
1137     return EC;
1138 
1139   auto MaxBlockCount = readNumber<uint64_t>();
1140   if (std::error_code EC = MaxBlockCount.getError())
1141     return EC;
1142 
1143   auto MaxFunctionCount = readNumber<uint64_t>();
1144   if (std::error_code EC = MaxFunctionCount.getError())
1145     return EC;
1146 
1147   auto NumBlocks = readNumber<uint64_t>();
1148   if (std::error_code EC = NumBlocks.getError())
1149     return EC;
1150 
1151   auto NumFunctions = readNumber<uint64_t>();
1152   if (std::error_code EC = NumFunctions.getError())
1153     return EC;
1154 
1155   auto NumSummaryEntries = readNumber<uint64_t>();
1156   if (std::error_code EC = NumSummaryEntries.getError())
1157     return EC;
1158 
1159   std::vector<ProfileSummaryEntry> Entries;
1160   for (unsigned i = 0; i < *NumSummaryEntries; i++) {
1161     std::error_code EC = readSummaryEntry(Entries);
1162     if (EC != sampleprof_error::success)
1163       return EC;
1164   }
1165   Summary = std::make_unique<ProfileSummary>(
1166       ProfileSummary::PSK_Sample, Entries, *TotalCount, *MaxBlockCount, 0,
1167       *MaxFunctionCount, *NumBlocks, *NumFunctions);
1168 
1169   return sampleprof_error::success;
1170 }
1171 
1172 bool SampleProfileReaderRawBinary::hasFormat(const MemoryBuffer &Buffer) {
1173   const uint8_t *Data =
1174       reinterpret_cast<const uint8_t *>(Buffer.getBufferStart());
1175   uint64_t Magic = decodeULEB128(Data);
1176   return Magic == SPMagic();
1177 }
1178 
1179 bool SampleProfileReaderExtBinary::hasFormat(const MemoryBuffer &Buffer) {
1180   const uint8_t *Data =
1181       reinterpret_cast<const uint8_t *>(Buffer.getBufferStart());
1182   uint64_t Magic = decodeULEB128(Data);
1183   return Magic == SPMagic(SPF_Ext_Binary);
1184 }
1185 
1186 bool SampleProfileReaderCompactBinary::hasFormat(const MemoryBuffer &Buffer) {
1187   const uint8_t *Data =
1188       reinterpret_cast<const uint8_t *>(Buffer.getBufferStart());
1189   uint64_t Magic = decodeULEB128(Data);
1190   return Magic == SPMagic(SPF_Compact_Binary);
1191 }
1192 
1193 std::error_code SampleProfileReaderGCC::skipNextWord() {
1194   uint32_t dummy;
1195   if (!GcovBuffer.readInt(dummy))
1196     return sampleprof_error::truncated;
1197   return sampleprof_error::success;
1198 }
1199 
1200 template <typename T> ErrorOr<T> SampleProfileReaderGCC::readNumber() {
1201   if (sizeof(T) <= sizeof(uint32_t)) {
1202     uint32_t Val;
1203     if (GcovBuffer.readInt(Val) && Val <= std::numeric_limits<T>::max())
1204       return static_cast<T>(Val);
1205   } else if (sizeof(T) <= sizeof(uint64_t)) {
1206     uint64_t Val;
1207     if (GcovBuffer.readInt64(Val) && Val <= std::numeric_limits<T>::max())
1208       return static_cast<T>(Val);
1209   }
1210 
1211   std::error_code EC = sampleprof_error::malformed;
1212   reportError(0, EC.message());
1213   return EC;
1214 }
1215 
1216 ErrorOr<StringRef> SampleProfileReaderGCC::readString() {
1217   StringRef Str;
1218   if (!GcovBuffer.readString(Str))
1219     return sampleprof_error::truncated;
1220   return Str;
1221 }
1222 
1223 std::error_code SampleProfileReaderGCC::readHeader() {
1224   // Read the magic identifier.
1225   if (!GcovBuffer.readGCDAFormat())
1226     return sampleprof_error::unrecognized_format;
1227 
1228   // Read the version number. Note - the GCC reader does not validate this
1229   // version, but the profile creator generates v704.
1230   GCOV::GCOVVersion version;
1231   if (!GcovBuffer.readGCOVVersion(version))
1232     return sampleprof_error::unrecognized_format;
1233 
1234   if (version != GCOV::V407)
1235     return sampleprof_error::unsupported_version;
1236 
1237   // Skip the empty integer.
1238   if (std::error_code EC = skipNextWord())
1239     return EC;
1240 
1241   return sampleprof_error::success;
1242 }
1243 
1244 std::error_code SampleProfileReaderGCC::readSectionTag(uint32_t Expected) {
1245   uint32_t Tag;
1246   if (!GcovBuffer.readInt(Tag))
1247     return sampleprof_error::truncated;
1248 
1249   if (Tag != Expected)
1250     return sampleprof_error::malformed;
1251 
1252   if (std::error_code EC = skipNextWord())
1253     return EC;
1254 
1255   return sampleprof_error::success;
1256 }
1257 
1258 std::error_code SampleProfileReaderGCC::readNameTable() {
1259   if (std::error_code EC = readSectionTag(GCOVTagAFDOFileNames))
1260     return EC;
1261 
1262   uint32_t Size;
1263   if (!GcovBuffer.readInt(Size))
1264     return sampleprof_error::truncated;
1265 
1266   for (uint32_t I = 0; I < Size; ++I) {
1267     StringRef Str;
1268     if (!GcovBuffer.readString(Str))
1269       return sampleprof_error::truncated;
1270     Names.push_back(std::string(Str));
1271   }
1272 
1273   return sampleprof_error::success;
1274 }
1275 
1276 std::error_code SampleProfileReaderGCC::readFunctionProfiles() {
1277   if (std::error_code EC = readSectionTag(GCOVTagAFDOFunction))
1278     return EC;
1279 
1280   uint32_t NumFunctions;
1281   if (!GcovBuffer.readInt(NumFunctions))
1282     return sampleprof_error::truncated;
1283 
1284   InlineCallStack Stack;
1285   for (uint32_t I = 0; I < NumFunctions; ++I)
1286     if (std::error_code EC = readOneFunctionProfile(Stack, true, 0))
1287       return EC;
1288 
1289   computeSummary();
1290   return sampleprof_error::success;
1291 }
1292 
1293 std::error_code SampleProfileReaderGCC::readOneFunctionProfile(
1294     const InlineCallStack &InlineStack, bool Update, uint32_t Offset) {
1295   uint64_t HeadCount = 0;
1296   if (InlineStack.size() == 0)
1297     if (!GcovBuffer.readInt64(HeadCount))
1298       return sampleprof_error::truncated;
1299 
1300   uint32_t NameIdx;
1301   if (!GcovBuffer.readInt(NameIdx))
1302     return sampleprof_error::truncated;
1303 
1304   StringRef Name(Names[NameIdx]);
1305 
1306   uint32_t NumPosCounts;
1307   if (!GcovBuffer.readInt(NumPosCounts))
1308     return sampleprof_error::truncated;
1309 
1310   uint32_t NumCallsites;
1311   if (!GcovBuffer.readInt(NumCallsites))
1312     return sampleprof_error::truncated;
1313 
1314   FunctionSamples *FProfile = nullptr;
1315   if (InlineStack.size() == 0) {
1316     // If this is a top function that we have already processed, do not
1317     // update its profile again.  This happens in the presence of
1318     // function aliases.  Since these aliases share the same function
1319     // body, there will be identical replicated profiles for the
1320     // original function.  In this case, we simply not bother updating
1321     // the profile of the original function.
1322     FProfile = &Profiles[Name];
1323     FProfile->addHeadSamples(HeadCount);
1324     if (FProfile->getTotalSamples() > 0)
1325       Update = false;
1326   } else {
1327     // Otherwise, we are reading an inlined instance. The top of the
1328     // inline stack contains the profile of the caller. Insert this
1329     // callee in the caller's CallsiteMap.
1330     FunctionSamples *CallerProfile = InlineStack.front();
1331     uint32_t LineOffset = Offset >> 16;
1332     uint32_t Discriminator = Offset & 0xffff;
1333     FProfile = &CallerProfile->functionSamplesAt(
1334         LineLocation(LineOffset, Discriminator))[std::string(Name)];
1335   }
1336   FProfile->setName(Name);
1337 
1338   for (uint32_t I = 0; I < NumPosCounts; ++I) {
1339     uint32_t Offset;
1340     if (!GcovBuffer.readInt(Offset))
1341       return sampleprof_error::truncated;
1342 
1343     uint32_t NumTargets;
1344     if (!GcovBuffer.readInt(NumTargets))
1345       return sampleprof_error::truncated;
1346 
1347     uint64_t Count;
1348     if (!GcovBuffer.readInt64(Count))
1349       return sampleprof_error::truncated;
1350 
1351     // The line location is encoded in the offset as:
1352     //   high 16 bits: line offset to the start of the function.
1353     //   low 16 bits: discriminator.
1354     uint32_t LineOffset = Offset >> 16;
1355     uint32_t Discriminator = Offset & 0xffff;
1356 
1357     InlineCallStack NewStack;
1358     NewStack.push_back(FProfile);
1359     llvm::append_range(NewStack, InlineStack);
1360     if (Update) {
1361       // Walk up the inline stack, adding the samples on this line to
1362       // the total sample count of the callers in the chain.
1363       for (auto CallerProfile : NewStack)
1364         CallerProfile->addTotalSamples(Count);
1365 
1366       // Update the body samples for the current profile.
1367       FProfile->addBodySamples(LineOffset, Discriminator, Count);
1368     }
1369 
1370     // Process the list of functions called at an indirect call site.
1371     // These are all the targets that a function pointer (or virtual
1372     // function) resolved at runtime.
1373     for (uint32_t J = 0; J < NumTargets; J++) {
1374       uint32_t HistVal;
1375       if (!GcovBuffer.readInt(HistVal))
1376         return sampleprof_error::truncated;
1377 
1378       if (HistVal != HIST_TYPE_INDIR_CALL_TOPN)
1379         return sampleprof_error::malformed;
1380 
1381       uint64_t TargetIdx;
1382       if (!GcovBuffer.readInt64(TargetIdx))
1383         return sampleprof_error::truncated;
1384       StringRef TargetName(Names[TargetIdx]);
1385 
1386       uint64_t TargetCount;
1387       if (!GcovBuffer.readInt64(TargetCount))
1388         return sampleprof_error::truncated;
1389 
1390       if (Update)
1391         FProfile->addCalledTargetSamples(LineOffset, Discriminator,
1392                                          TargetName, TargetCount);
1393     }
1394   }
1395 
1396   // Process all the inlined callers into the current function. These
1397   // are all the callsites that were inlined into this function.
1398   for (uint32_t I = 0; I < NumCallsites; I++) {
1399     // The offset is encoded as:
1400     //   high 16 bits: line offset to the start of the function.
1401     //   low 16 bits: discriminator.
1402     uint32_t Offset;
1403     if (!GcovBuffer.readInt(Offset))
1404       return sampleprof_error::truncated;
1405     InlineCallStack NewStack;
1406     NewStack.push_back(FProfile);
1407     llvm::append_range(NewStack, InlineStack);
1408     if (std::error_code EC = readOneFunctionProfile(NewStack, Update, Offset))
1409       return EC;
1410   }
1411 
1412   return sampleprof_error::success;
1413 }
1414 
1415 /// Read a GCC AutoFDO profile.
1416 ///
1417 /// This format is generated by the Linux Perf conversion tool at
1418 /// https://github.com/google/autofdo.
1419 std::error_code SampleProfileReaderGCC::readImpl() {
1420   // Read the string table.
1421   if (std::error_code EC = readNameTable())
1422     return EC;
1423 
1424   // Read the source profile.
1425   if (std::error_code EC = readFunctionProfiles())
1426     return EC;
1427 
1428   return sampleprof_error::success;
1429 }
1430 
1431 bool SampleProfileReaderGCC::hasFormat(const MemoryBuffer &Buffer) {
1432   StringRef Magic(reinterpret_cast<const char *>(Buffer.getBufferStart()));
1433   return Magic == "adcg*704";
1434 }
1435 
1436 void SampleProfileReaderItaniumRemapper::applyRemapping(LLVMContext &Ctx) {
1437   // If the reader uses MD5 to represent string, we can't remap it because
1438   // we don't know what the original function names were.
1439   if (Reader.useMD5()) {
1440     Ctx.diagnose(DiagnosticInfoSampleProfile(
1441         Reader.getBuffer()->getBufferIdentifier(),
1442         "Profile data remapping cannot be applied to profile data "
1443         "in compact format (original mangled names are not available).",
1444         DS_Warning));
1445     return;
1446   }
1447 
1448   // CSSPGO-TODO: Remapper is not yet supported.
1449   // We will need to remap the entire context string.
1450   assert(Remappings && "should be initialized while creating remapper");
1451   for (auto &Sample : Reader.getProfiles()) {
1452     DenseSet<StringRef> NamesInSample;
1453     Sample.second.findAllNames(NamesInSample);
1454     for (auto &Name : NamesInSample)
1455       if (auto Key = Remappings->insert(Name))
1456         NameMap.insert({Key, Name});
1457   }
1458 
1459   RemappingApplied = true;
1460 }
1461 
1462 Optional<StringRef>
1463 SampleProfileReaderItaniumRemapper::lookUpNameInProfile(StringRef Fname) {
1464   if (auto Key = Remappings->lookup(Fname))
1465     return NameMap.lookup(Key);
1466   return None;
1467 }
1468 
1469 /// Prepare a memory buffer for the contents of \p Filename.
1470 ///
1471 /// \returns an error code indicating the status of the buffer.
1472 static ErrorOr<std::unique_ptr<MemoryBuffer>>
1473 setupMemoryBuffer(const Twine &Filename) {
1474   auto BufferOrErr = MemoryBuffer::getFileOrSTDIN(Filename);
1475   if (std::error_code EC = BufferOrErr.getError())
1476     return EC;
1477   auto Buffer = std::move(BufferOrErr.get());
1478 
1479   // Sanity check the file.
1480   if (uint64_t(Buffer->getBufferSize()) > std::numeric_limits<uint32_t>::max())
1481     return sampleprof_error::too_large;
1482 
1483   return std::move(Buffer);
1484 }
1485 
1486 /// Create a sample profile reader based on the format of the input file.
1487 ///
1488 /// \param Filename The file to open.
1489 ///
1490 /// \param C The LLVM context to use to emit diagnostics.
1491 ///
1492 /// \param RemapFilename The file used for profile remapping.
1493 ///
1494 /// \returns an error code indicating the status of the created reader.
1495 ErrorOr<std::unique_ptr<SampleProfileReader>>
1496 SampleProfileReader::create(const std::string Filename, LLVMContext &C,
1497                             const std::string RemapFilename) {
1498   auto BufferOrError = setupMemoryBuffer(Filename);
1499   if (std::error_code EC = BufferOrError.getError())
1500     return EC;
1501   return create(BufferOrError.get(), C, RemapFilename);
1502 }
1503 
1504 /// Create a sample profile remapper from the given input, to remap the
1505 /// function names in the given profile data.
1506 ///
1507 /// \param Filename The file to open.
1508 ///
1509 /// \param Reader The profile reader the remapper is going to be applied to.
1510 ///
1511 /// \param C The LLVM context to use to emit diagnostics.
1512 ///
1513 /// \returns an error code indicating the status of the created reader.
1514 ErrorOr<std::unique_ptr<SampleProfileReaderItaniumRemapper>>
1515 SampleProfileReaderItaniumRemapper::create(const std::string Filename,
1516                                            SampleProfileReader &Reader,
1517                                            LLVMContext &C) {
1518   auto BufferOrError = setupMemoryBuffer(Filename);
1519   if (std::error_code EC = BufferOrError.getError())
1520     return EC;
1521   return create(BufferOrError.get(), Reader, C);
1522 }
1523 
1524 /// Create a sample profile remapper from the given input, to remap the
1525 /// function names in the given profile data.
1526 ///
1527 /// \param B The memory buffer to create the reader from (assumes ownership).
1528 ///
1529 /// \param C The LLVM context to use to emit diagnostics.
1530 ///
1531 /// \param Reader The profile reader the remapper is going to be applied to.
1532 ///
1533 /// \returns an error code indicating the status of the created reader.
1534 ErrorOr<std::unique_ptr<SampleProfileReaderItaniumRemapper>>
1535 SampleProfileReaderItaniumRemapper::create(std::unique_ptr<MemoryBuffer> &B,
1536                                            SampleProfileReader &Reader,
1537                                            LLVMContext &C) {
1538   auto Remappings = std::make_unique<SymbolRemappingReader>();
1539   if (Error E = Remappings->read(*B.get())) {
1540     handleAllErrors(
1541         std::move(E), [&](const SymbolRemappingParseError &ParseError) {
1542           C.diagnose(DiagnosticInfoSampleProfile(B->getBufferIdentifier(),
1543                                                  ParseError.getLineNum(),
1544                                                  ParseError.getMessage()));
1545         });
1546     return sampleprof_error::malformed;
1547   }
1548 
1549   return std::make_unique<SampleProfileReaderItaniumRemapper>(
1550       std::move(B), std::move(Remappings), Reader);
1551 }
1552 
1553 /// Create a sample profile reader based on the format of the input data.
1554 ///
1555 /// \param B The memory buffer to create the reader from (assumes ownership).
1556 ///
1557 /// \param C The LLVM context to use to emit diagnostics.
1558 ///
1559 /// \param RemapFilename The file used for profile remapping.
1560 ///
1561 /// \returns an error code indicating the status of the created reader.
1562 ErrorOr<std::unique_ptr<SampleProfileReader>>
1563 SampleProfileReader::create(std::unique_ptr<MemoryBuffer> &B, LLVMContext &C,
1564                             const std::string RemapFilename) {
1565   std::unique_ptr<SampleProfileReader> Reader;
1566   if (SampleProfileReaderRawBinary::hasFormat(*B))
1567     Reader.reset(new SampleProfileReaderRawBinary(std::move(B), C));
1568   else if (SampleProfileReaderExtBinary::hasFormat(*B))
1569     Reader.reset(new SampleProfileReaderExtBinary(std::move(B), C));
1570   else if (SampleProfileReaderCompactBinary::hasFormat(*B))
1571     Reader.reset(new SampleProfileReaderCompactBinary(std::move(B), C));
1572   else if (SampleProfileReaderGCC::hasFormat(*B))
1573     Reader.reset(new SampleProfileReaderGCC(std::move(B), C));
1574   else if (SampleProfileReaderText::hasFormat(*B))
1575     Reader.reset(new SampleProfileReaderText(std::move(B), C));
1576   else
1577     return sampleprof_error::unrecognized_format;
1578 
1579   if (!RemapFilename.empty()) {
1580     auto ReaderOrErr =
1581         SampleProfileReaderItaniumRemapper::create(RemapFilename, *Reader, C);
1582     if (std::error_code EC = ReaderOrErr.getError()) {
1583       std::string Msg = "Could not create remapper: " + EC.message();
1584       C.diagnose(DiagnosticInfoSampleProfile(RemapFilename, Msg));
1585       return EC;
1586     }
1587     Reader->Remapper = std::move(ReaderOrErr.get());
1588   }
1589 
1590   FunctionSamples::Format = Reader->getFormat();
1591   if (std::error_code EC = Reader->readHeader()) {
1592     return EC;
1593   }
1594 
1595   return std::move(Reader);
1596 }
1597 
1598 // For text and GCC file formats, we compute the summary after reading the
1599 // profile. Binary format has the profile summary in its header.
1600 void SampleProfileReader::computeSummary() {
1601   SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs);
1602   for (const auto &I : Profiles) {
1603     const FunctionSamples &Profile = I.second;
1604     Builder.addRecord(Profile);
1605   }
1606   Summary = Builder.getSummary();
1607 }
1608