xref: /llvm-project/llvm/lib/ProfileData/SampleProfReader.cpp (revision 12e9c7aaa66b7624b5d7666ce2794d912bf9e4b7)
1 //===- SampleProfReader.cpp - Read LLVM sample profile data ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the class that reads LLVM sample profiles. It
10 // supports three file formats: text, binary and gcov.
11 //
12 // The textual representation is useful for debugging and testing purposes. The
13 // binary representation is more compact, resulting in smaller file sizes.
14 //
15 // The gcov encoding is the one generated by GCC's AutoFDO profile creation
16 // tool (https://github.com/google/autofdo)
17 //
18 // All three encodings can be used interchangeably as an input sample profile.
19 //
20 //===----------------------------------------------------------------------===//
21 
22 #include "llvm/ProfileData/SampleProfReader.h"
23 #include "llvm/ADT/DenseMap.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/ADT/StringRef.h"
26 #include "llvm/IR/Module.h"
27 #include "llvm/IR/ProfileSummary.h"
28 #include "llvm/ProfileData/ProfileCommon.h"
29 #include "llvm/ProfileData/SampleProf.h"
30 #include "llvm/Support/CommandLine.h"
31 #include "llvm/Support/Compression.h"
32 #include "llvm/Support/ErrorOr.h"
33 #include "llvm/Support/JSON.h"
34 #include "llvm/Support/LEB128.h"
35 #include "llvm/Support/LineIterator.h"
36 #include "llvm/Support/MD5.h"
37 #include "llvm/Support/MemoryBuffer.h"
38 #include "llvm/Support/VirtualFileSystem.h"
39 #include "llvm/Support/raw_ostream.h"
40 #include <algorithm>
41 #include <cstddef>
42 #include <cstdint>
43 #include <limits>
44 #include <memory>
45 #include <system_error>
46 #include <vector>
47 
48 using namespace llvm;
49 using namespace sampleprof;
50 
51 #define DEBUG_TYPE "samplepgo-reader"
52 
53 // This internal option specifies if the profile uses FS discriminators.
54 // It only applies to text, and binary format profiles.
55 // For ext-binary format profiles, the flag is set in the summary.
56 static cl::opt<bool> ProfileIsFSDisciminator(
57     "profile-isfs", cl::Hidden, cl::init(false),
58     cl::desc("Profile uses flow sensitive discriminators"));
59 
60 /// Dump the function profile for \p FName.
61 ///
62 /// \param FContext Name + context of the function to print.
63 /// \param OS Stream to emit the output to.
64 void SampleProfileReader::dumpFunctionProfile(const FunctionSamples &FS,
65                                               raw_ostream &OS) {
66   OS << "Function: " << FS.getContext().toString() << ": " << FS;
67 }
68 
69 /// Dump all the function profiles found on stream \p OS.
70 void SampleProfileReader::dump(raw_ostream &OS) {
71   std::vector<NameFunctionSamples> V;
72   sortFuncProfiles(Profiles, V);
73   for (const auto &I : V)
74     dumpFunctionProfile(*I.second, OS);
75 }
76 
77 static void dumpFunctionProfileJson(const FunctionSamples &S,
78                                     json::OStream &JOS, bool TopLevel = false) {
79   auto DumpBody = [&](const BodySampleMap &BodySamples) {
80     for (const auto &I : BodySamples) {
81       const LineLocation &Loc = I.first;
82       const SampleRecord &Sample = I.second;
83       JOS.object([&] {
84         JOS.attribute("line", Loc.LineOffset);
85         if (Loc.Discriminator)
86           JOS.attribute("discriminator", Loc.Discriminator);
87         JOS.attribute("samples", Sample.getSamples());
88 
89         auto CallTargets = Sample.getSortedCallTargets();
90         if (!CallTargets.empty()) {
91           JOS.attributeArray("calls", [&] {
92             for (const auto &J : CallTargets) {
93               JOS.object([&] {
94                 JOS.attribute("function", J.first);
95                 JOS.attribute("samples", J.second);
96               });
97             }
98           });
99         }
100       });
101     }
102   };
103 
104   auto DumpCallsiteSamples = [&](const CallsiteSampleMap &CallsiteSamples) {
105     for (const auto &I : CallsiteSamples)
106       for (const auto &FS : I.second) {
107         const LineLocation &Loc = I.first;
108         const FunctionSamples &CalleeSamples = FS.second;
109         JOS.object([&] {
110           JOS.attribute("line", Loc.LineOffset);
111           if (Loc.Discriminator)
112             JOS.attribute("discriminator", Loc.Discriminator);
113           JOS.attributeArray(
114               "samples", [&] { dumpFunctionProfileJson(CalleeSamples, JOS); });
115         });
116       }
117   };
118 
119   JOS.object([&] {
120     JOS.attribute("name", S.getName());
121     JOS.attribute("total", S.getTotalSamples());
122     if (TopLevel)
123       JOS.attribute("head", S.getHeadSamples());
124 
125     const auto &BodySamples = S.getBodySamples();
126     if (!BodySamples.empty())
127       JOS.attributeArray("body", [&] { DumpBody(BodySamples); });
128 
129     const auto &CallsiteSamples = S.getCallsiteSamples();
130     if (!CallsiteSamples.empty())
131       JOS.attributeArray("callsites",
132                          [&] { DumpCallsiteSamples(CallsiteSamples); });
133   });
134 }
135 
136 /// Dump all the function profiles found on stream \p OS in the JSON format.
137 void SampleProfileReader::dumpJson(raw_ostream &OS) {
138   std::vector<NameFunctionSamples> V;
139   sortFuncProfiles(Profiles, V);
140   json::OStream JOS(OS, 2);
141   JOS.arrayBegin();
142   for (const auto &F : V)
143     dumpFunctionProfileJson(*F.second, JOS, true);
144   JOS.arrayEnd();
145 
146   // Emit a newline character at the end as json::OStream doesn't emit one.
147   OS << "\n";
148 }
149 
150 /// Parse \p Input as function head.
151 ///
152 /// Parse one line of \p Input, and update function name in \p FName,
153 /// function's total sample count in \p NumSamples, function's entry
154 /// count in \p NumHeadSamples.
155 ///
156 /// \returns true if parsing is successful.
157 static bool ParseHead(const StringRef &Input, StringRef &FName,
158                       uint64_t &NumSamples, uint64_t &NumHeadSamples) {
159   if (Input[0] == ' ')
160     return false;
161   size_t n2 = Input.rfind(':');
162   size_t n1 = Input.rfind(':', n2 - 1);
163   FName = Input.substr(0, n1);
164   if (Input.substr(n1 + 1, n2 - n1 - 1).getAsInteger(10, NumSamples))
165     return false;
166   if (Input.substr(n2 + 1).getAsInteger(10, NumHeadSamples))
167     return false;
168   return true;
169 }
170 
171 /// Returns true if line offset \p L is legal (only has 16 bits).
172 static bool isOffsetLegal(unsigned L) { return (L & 0xffff) == L; }
173 
174 /// Parse \p Input that contains metadata.
175 /// Possible metadata:
176 /// - CFG Checksum information:
177 ///     !CFGChecksum: 12345
178 /// - CFG Checksum information:
179 ///     !Attributes: 1
180 /// Stores the FunctionHash (a.k.a. CFG Checksum) into \p FunctionHash.
181 static bool parseMetadata(const StringRef &Input, uint64_t &FunctionHash,
182                           uint32_t &Attributes) {
183   if (Input.startswith("!CFGChecksum:")) {
184     StringRef CFGInfo = Input.substr(strlen("!CFGChecksum:")).trim();
185     return !CFGInfo.getAsInteger(10, FunctionHash);
186   }
187 
188   if (Input.startswith("!Attributes:")) {
189     StringRef Attrib = Input.substr(strlen("!Attributes:")).trim();
190     return !Attrib.getAsInteger(10, Attributes);
191   }
192 
193   return false;
194 }
195 
196 enum class LineType {
197   CallSiteProfile,
198   BodyProfile,
199   Metadata,
200 };
201 
202 /// Parse \p Input as line sample.
203 ///
204 /// \param Input input line.
205 /// \param LineTy Type of this line.
206 /// \param Depth the depth of the inline stack.
207 /// \param NumSamples total samples of the line/inlined callsite.
208 /// \param LineOffset line offset to the start of the function.
209 /// \param Discriminator discriminator of the line.
210 /// \param TargetCountMap map from indirect call target to count.
211 /// \param FunctionHash the function's CFG hash, used by pseudo probe.
212 ///
213 /// returns true if parsing is successful.
214 static bool ParseLine(const StringRef &Input, LineType &LineTy, uint32_t &Depth,
215                       uint64_t &NumSamples, uint32_t &LineOffset,
216                       uint32_t &Discriminator, StringRef &CalleeName,
217                       DenseMap<StringRef, uint64_t> &TargetCountMap,
218                       uint64_t &FunctionHash, uint32_t &Attributes) {
219   for (Depth = 0; Input[Depth] == ' '; Depth++)
220     ;
221   if (Depth == 0)
222     return false;
223 
224   if (Input[Depth] == '!') {
225     LineTy = LineType::Metadata;
226     return parseMetadata(Input.substr(Depth), FunctionHash, Attributes);
227   }
228 
229   size_t n1 = Input.find(':');
230   StringRef Loc = Input.substr(Depth, n1 - Depth);
231   size_t n2 = Loc.find('.');
232   if (n2 == StringRef::npos) {
233     if (Loc.getAsInteger(10, LineOffset) || !isOffsetLegal(LineOffset))
234       return false;
235     Discriminator = 0;
236   } else {
237     if (Loc.substr(0, n2).getAsInteger(10, LineOffset))
238       return false;
239     if (Loc.substr(n2 + 1).getAsInteger(10, Discriminator))
240       return false;
241   }
242 
243   StringRef Rest = Input.substr(n1 + 2);
244   if (isDigit(Rest[0])) {
245     LineTy = LineType::BodyProfile;
246     size_t n3 = Rest.find(' ');
247     if (n3 == StringRef::npos) {
248       if (Rest.getAsInteger(10, NumSamples))
249         return false;
250     } else {
251       if (Rest.substr(0, n3).getAsInteger(10, NumSamples))
252         return false;
253     }
254     // Find call targets and their sample counts.
255     // Note: In some cases, there are symbols in the profile which are not
256     // mangled. To accommodate such cases, use colon + integer pairs as the
257     // anchor points.
258     // An example:
259     // _M_construct<char *>:1000 string_view<std::allocator<char> >:437
260     // ":1000" and ":437" are used as anchor points so the string above will
261     // be interpreted as
262     // target: _M_construct<char *>
263     // count: 1000
264     // target: string_view<std::allocator<char> >
265     // count: 437
266     while (n3 != StringRef::npos) {
267       n3 += Rest.substr(n3).find_first_not_of(' ');
268       Rest = Rest.substr(n3);
269       n3 = Rest.find_first_of(':');
270       if (n3 == StringRef::npos || n3 == 0)
271         return false;
272 
273       StringRef Target;
274       uint64_t count, n4;
275       while (true) {
276         // Get the segment after the current colon.
277         StringRef AfterColon = Rest.substr(n3 + 1);
278         // Get the target symbol before the current colon.
279         Target = Rest.substr(0, n3);
280         // Check if the word after the current colon is an integer.
281         n4 = AfterColon.find_first_of(' ');
282         n4 = (n4 != StringRef::npos) ? n3 + n4 + 1 : Rest.size();
283         StringRef WordAfterColon = Rest.substr(n3 + 1, n4 - n3 - 1);
284         if (!WordAfterColon.getAsInteger(10, count))
285           break;
286 
287         // Try to find the next colon.
288         uint64_t n5 = AfterColon.find_first_of(':');
289         if (n5 == StringRef::npos)
290           return false;
291         n3 += n5 + 1;
292       }
293 
294       // An anchor point is found. Save the {target, count} pair
295       TargetCountMap[Target] = count;
296       if (n4 == Rest.size())
297         break;
298       // Change n3 to the next blank space after colon + integer pair.
299       n3 = n4;
300     }
301   } else {
302     LineTy = LineType::CallSiteProfile;
303     size_t n3 = Rest.find_last_of(':');
304     CalleeName = Rest.substr(0, n3);
305     if (Rest.substr(n3 + 1).getAsInteger(10, NumSamples))
306       return false;
307   }
308   return true;
309 }
310 
311 /// Load samples from a text file.
312 ///
313 /// See the documentation at the top of the file for an explanation of
314 /// the expected format.
315 ///
316 /// \returns true if the file was loaded successfully, false otherwise.
317 std::error_code SampleProfileReaderText::readImpl() {
318   line_iterator LineIt(*Buffer, /*SkipBlanks=*/true, '#');
319   sampleprof_error Result = sampleprof_error::success;
320 
321   InlineCallStack InlineStack;
322   uint32_t TopLevelProbeProfileCount = 0;
323 
324   // DepthMetadata tracks whether we have processed metadata for the current
325   // top-level or nested function profile.
326   uint32_t DepthMetadata = 0;
327 
328   ProfileIsFS = ProfileIsFSDisciminator;
329   FunctionSamples::ProfileIsFS = ProfileIsFS;
330   for (; !LineIt.is_at_eof(); ++LineIt) {
331     size_t pos = LineIt->find_first_not_of(' ');
332     if (pos == LineIt->npos || (*LineIt)[pos] == '#')
333       continue;
334     // Read the header of each function.
335     //
336     // Note that for function identifiers we are actually expecting
337     // mangled names, but we may not always get them. This happens when
338     // the compiler decides not to emit the function (e.g., it was inlined
339     // and removed). In this case, the binary will not have the linkage
340     // name for the function, so the profiler will emit the function's
341     // unmangled name, which may contain characters like ':' and '>' in its
342     // name (member functions, templates, etc).
343     //
344     // The only requirement we place on the identifier, then, is that it
345     // should not begin with a number.
346     if ((*LineIt)[0] != ' ') {
347       uint64_t NumSamples, NumHeadSamples;
348       StringRef FName;
349       if (!ParseHead(*LineIt, FName, NumSamples, NumHeadSamples)) {
350         reportError(LineIt.line_number(),
351                     "Expected 'mangled_name:NUM:NUM', found " + *LineIt);
352         return sampleprof_error::malformed;
353       }
354       DepthMetadata = 0;
355       SampleContext FContext(FName, CSNameTable);
356       if (FContext.hasContext())
357         ++CSProfileCount;
358       FunctionSamples &FProfile = Profiles.Create(FContext);
359       MergeResult(Result, FProfile.addTotalSamples(NumSamples));
360       MergeResult(Result, FProfile.addHeadSamples(NumHeadSamples));
361       InlineStack.clear();
362       InlineStack.push_back(&FProfile);
363     } else {
364       uint64_t NumSamples;
365       StringRef FName;
366       DenseMap<StringRef, uint64_t> TargetCountMap;
367       uint32_t Depth, LineOffset, Discriminator;
368       LineType LineTy;
369       uint64_t FunctionHash = 0;
370       uint32_t Attributes = 0;
371       if (!ParseLine(*LineIt, LineTy, Depth, NumSamples, LineOffset,
372                      Discriminator, FName, TargetCountMap, FunctionHash,
373                      Attributes)) {
374         reportError(LineIt.line_number(),
375                     "Expected 'NUM[.NUM]: NUM[ mangled_name:NUM]*', found " +
376                         *LineIt);
377         return sampleprof_error::malformed;
378       }
379       if (LineTy != LineType::Metadata && Depth == DepthMetadata) {
380         // Metadata must be put at the end of a function profile.
381         reportError(LineIt.line_number(),
382                     "Found non-metadata after metadata: " + *LineIt);
383         return sampleprof_error::malformed;
384       }
385 
386       // Here we handle FS discriminators.
387       Discriminator &= getDiscriminatorMask();
388 
389       while (InlineStack.size() > Depth) {
390         InlineStack.pop_back();
391       }
392       switch (LineTy) {
393       case LineType::CallSiteProfile: {
394         FunctionSamples &FSamples = InlineStack.back()->functionSamplesAt(
395             LineLocation(LineOffset, Discriminator))[std::string(FName)];
396         FSamples.setName(FName);
397         MergeResult(Result, FSamples.addTotalSamples(NumSamples));
398         InlineStack.push_back(&FSamples);
399         DepthMetadata = 0;
400         break;
401       }
402       case LineType::BodyProfile: {
403         while (InlineStack.size() > Depth) {
404           InlineStack.pop_back();
405         }
406         FunctionSamples &FProfile = *InlineStack.back();
407         for (const auto &name_count : TargetCountMap) {
408           MergeResult(Result, FProfile.addCalledTargetSamples(
409                                   LineOffset, Discriminator, name_count.first,
410                                   name_count.second));
411         }
412         MergeResult(Result, FProfile.addBodySamples(LineOffset, Discriminator,
413                                                     NumSamples));
414         break;
415       }
416       case LineType::Metadata: {
417         FunctionSamples &FProfile = *InlineStack.back();
418         if (FunctionHash) {
419           FProfile.setFunctionHash(FunctionHash);
420           if (Depth == 1)
421             ++TopLevelProbeProfileCount;
422         }
423         FProfile.getContext().setAllAttributes(Attributes);
424         if (Attributes & (uint32_t)ContextShouldBeInlined)
425           ProfileIsPreInlined = true;
426         DepthMetadata = Depth;
427         break;
428       }
429       }
430     }
431   }
432 
433   assert((CSProfileCount == 0 || CSProfileCount == Profiles.size()) &&
434          "Cannot have both context-sensitive and regular profile");
435   ProfileIsCS = (CSProfileCount > 0);
436   assert((TopLevelProbeProfileCount == 0 ||
437           TopLevelProbeProfileCount == Profiles.size()) &&
438          "Cannot have both probe-based profiles and regular profiles");
439   ProfileIsProbeBased = (TopLevelProbeProfileCount > 0);
440   FunctionSamples::ProfileIsProbeBased = ProfileIsProbeBased;
441   FunctionSamples::ProfileIsCS = ProfileIsCS;
442   FunctionSamples::ProfileIsPreInlined = ProfileIsPreInlined;
443 
444   if (Result == sampleprof_error::success)
445     computeSummary();
446 
447   return Result;
448 }
449 
450 bool SampleProfileReaderText::hasFormat(const MemoryBuffer &Buffer) {
451   bool result = false;
452 
453   // Check that the first non-comment line is a valid function header.
454   line_iterator LineIt(Buffer, /*SkipBlanks=*/true, '#');
455   if (!LineIt.is_at_eof()) {
456     if ((*LineIt)[0] != ' ') {
457       uint64_t NumSamples, NumHeadSamples;
458       StringRef FName;
459       result = ParseHead(*LineIt, FName, NumSamples, NumHeadSamples);
460     }
461   }
462 
463   return result;
464 }
465 
466 template <typename T> ErrorOr<T> SampleProfileReaderBinary::readNumber() {
467   unsigned NumBytesRead = 0;
468   std::error_code EC;
469   uint64_t Val = decodeULEB128(Data, &NumBytesRead);
470 
471   if (Val > std::numeric_limits<T>::max())
472     EC = sampleprof_error::malformed;
473   else if (Data + NumBytesRead > End)
474     EC = sampleprof_error::truncated;
475   else
476     EC = sampleprof_error::success;
477 
478   if (EC) {
479     reportError(0, EC.message());
480     return EC;
481   }
482 
483   Data += NumBytesRead;
484   return static_cast<T>(Val);
485 }
486 
487 ErrorOr<StringRef> SampleProfileReaderBinary::readString() {
488   std::error_code EC;
489   StringRef Str(reinterpret_cast<const char *>(Data));
490   if (Data + Str.size() + 1 > End) {
491     EC = sampleprof_error::truncated;
492     reportError(0, EC.message());
493     return EC;
494   }
495 
496   Data += Str.size() + 1;
497   return Str;
498 }
499 
500 template <typename T>
501 ErrorOr<T> SampleProfileReaderBinary::readUnencodedNumber() {
502   std::error_code EC;
503 
504   if (Data + sizeof(T) > End) {
505     EC = sampleprof_error::truncated;
506     reportError(0, EC.message());
507     return EC;
508   }
509 
510   using namespace support;
511   T Val = endian::readNext<T, little, unaligned>(Data);
512   return Val;
513 }
514 
515 template <typename T>
516 inline ErrorOr<size_t> SampleProfileReaderBinary::readStringIndex(T &Table) {
517   std::error_code EC;
518   auto Idx = readNumber<size_t>();
519   if (std::error_code EC = Idx.getError())
520     return EC;
521   if (*Idx >= Table.size())
522     return sampleprof_error::truncated_name_table;
523   return *Idx;
524 }
525 
526 ErrorOr<StringRef>
527 SampleProfileReaderBinary::readStringFromTable(size_t *RetIdx) {
528   auto Idx = readStringIndex(NameTable);
529   if (std::error_code EC = Idx.getError())
530     return EC;
531 
532   // Lazy loading, if the string has not been materialized from memory storing
533   // MD5 values, then it is default initialized with the null pointer. This can
534   // only happen when using fixed length MD5, that bounds check is performed
535   // while parsing the name table to ensure MD5NameMemStart points to an array
536   // with enough MD5 entries.
537   StringRef &SR = NameTable[*Idx];
538   if (!SR.data()) {
539     assert(MD5NameMemStart);
540     using namespace support;
541     uint64_t FID = endian::read<uint64_t, little, unaligned>(
542        MD5NameMemStart + (*Idx) * sizeof(uint64_t));
543     SR = MD5StringBuf.emplace_back(std::to_string(FID));
544   }
545   if (RetIdx)
546     *RetIdx = *Idx;
547   return SR;
548 }
549 
550 ErrorOr<SampleContextFrames>
551 SampleProfileReaderBinary::readContextFromTable(size_t *RetIdx) {
552   auto ContextIdx = readNumber<size_t>();
553   if (std::error_code EC = ContextIdx.getError())
554     return EC;
555   if (*ContextIdx >= CSNameTable.size())
556     return sampleprof_error::truncated_name_table;
557   if (RetIdx)
558     *RetIdx = *ContextIdx;
559   return CSNameTable[*ContextIdx];
560 }
561 
562 ErrorOr<std::pair<SampleContext, hash_code>>
563 SampleProfileReaderBinary::readSampleContextFromTable() {
564   SampleContext Context;
565   size_t Idx;
566   if (ProfileIsCS) {
567     auto FContext(readContextFromTable(&Idx));
568     if (std::error_code EC = FContext.getError())
569       return EC;
570     Context = SampleContext(*FContext);
571   } else {
572     auto FName(readStringFromTable(&Idx));
573     if (std::error_code EC = FName.getError())
574       return EC;
575     Context = SampleContext(*FName);
576   }
577   hash_code Hash = MD5SampleContextStart[Idx];
578   // Lazy computing of hash value, write back to the table to cache it. Only
579   // compute the context's hash value if it is being referenced for the first
580   // time.
581   if (Hash == hash_code(0)) {
582     assert(MD5SampleContextStart == MD5SampleContextTable.data());
583     Hash = Context.getHashCode();
584     MD5SampleContextTable[Idx] = Hash;
585   }
586   return std::make_pair(Context, Hash);
587 }
588 
589 std::error_code
590 SampleProfileReaderBinary::readProfile(FunctionSamples &FProfile) {
591   auto NumSamples = readNumber<uint64_t>();
592   if (std::error_code EC = NumSamples.getError())
593     return EC;
594   FProfile.addTotalSamples(*NumSamples);
595 
596   // Read the samples in the body.
597   auto NumRecords = readNumber<uint32_t>();
598   if (std::error_code EC = NumRecords.getError())
599     return EC;
600 
601   for (uint32_t I = 0; I < *NumRecords; ++I) {
602     auto LineOffset = readNumber<uint64_t>();
603     if (std::error_code EC = LineOffset.getError())
604       return EC;
605 
606     if (!isOffsetLegal(*LineOffset)) {
607       return std::error_code();
608     }
609 
610     auto Discriminator = readNumber<uint64_t>();
611     if (std::error_code EC = Discriminator.getError())
612       return EC;
613 
614     auto NumSamples = readNumber<uint64_t>();
615     if (std::error_code EC = NumSamples.getError())
616       return EC;
617 
618     auto NumCalls = readNumber<uint32_t>();
619     if (std::error_code EC = NumCalls.getError())
620       return EC;
621 
622     // Here we handle FS discriminators:
623     uint32_t DiscriminatorVal = (*Discriminator) & getDiscriminatorMask();
624 
625     for (uint32_t J = 0; J < *NumCalls; ++J) {
626       auto CalledFunction(readStringFromTable());
627       if (std::error_code EC = CalledFunction.getError())
628         return EC;
629 
630       auto CalledFunctionSamples = readNumber<uint64_t>();
631       if (std::error_code EC = CalledFunctionSamples.getError())
632         return EC;
633 
634       FProfile.addCalledTargetSamples(*LineOffset, DiscriminatorVal,
635                                       *CalledFunction, *CalledFunctionSamples);
636     }
637 
638     FProfile.addBodySamples(*LineOffset, DiscriminatorVal, *NumSamples);
639   }
640 
641   // Read all the samples for inlined function calls.
642   auto NumCallsites = readNumber<uint32_t>();
643   if (std::error_code EC = NumCallsites.getError())
644     return EC;
645 
646   for (uint32_t J = 0; J < *NumCallsites; ++J) {
647     auto LineOffset = readNumber<uint64_t>();
648     if (std::error_code EC = LineOffset.getError())
649       return EC;
650 
651     auto Discriminator = readNumber<uint64_t>();
652     if (std::error_code EC = Discriminator.getError())
653       return EC;
654 
655     auto FName(readStringFromTable());
656     if (std::error_code EC = FName.getError())
657       return EC;
658 
659     // Here we handle FS discriminators:
660     uint32_t DiscriminatorVal = (*Discriminator) & getDiscriminatorMask();
661 
662     FunctionSamples &CalleeProfile = FProfile.functionSamplesAt(
663         LineLocation(*LineOffset, DiscriminatorVal))[std::string(*FName)];
664     CalleeProfile.setName(*FName);
665     if (std::error_code EC = readProfile(CalleeProfile))
666       return EC;
667   }
668 
669   return sampleprof_error::success;
670 }
671 
672 std::error_code
673 SampleProfileReaderBinary::readFuncProfile(const uint8_t *Start) {
674   Data = Start;
675   auto NumHeadSamples = readNumber<uint64_t>();
676   if (std::error_code EC = NumHeadSamples.getError())
677     return EC;
678 
679   auto FContextHash(readSampleContextFromTable());
680   if (std::error_code EC = FContextHash.getError())
681     return EC;
682 
683   auto &[FContext, Hash] = *FContextHash;
684   // Use the cached hash value for insertion instead of recalculating it.
685   auto Res = Profiles.try_emplace(Hash, FContext, FunctionSamples());
686   FunctionSamples &FProfile = Res.first->second;
687   FProfile.setContext(FContext);
688   FProfile.addHeadSamples(*NumHeadSamples);
689 
690   if (FContext.hasContext())
691     CSProfileCount++;
692 
693   if (std::error_code EC = readProfile(FProfile))
694     return EC;
695   return sampleprof_error::success;
696 }
697 
698 std::error_code SampleProfileReaderBinary::readImpl() {
699   ProfileIsFS = ProfileIsFSDisciminator;
700   FunctionSamples::ProfileIsFS = ProfileIsFS;
701   while (Data < End) {
702     if (std::error_code EC = readFuncProfile(Data))
703       return EC;
704   }
705 
706   return sampleprof_error::success;
707 }
708 
709 std::error_code SampleProfileReaderExtBinaryBase::readOneSection(
710     const uint8_t *Start, uint64_t Size, const SecHdrTableEntry &Entry) {
711   Data = Start;
712   End = Start + Size;
713   switch (Entry.Type) {
714   case SecProfSummary:
715     if (std::error_code EC = readSummary())
716       return EC;
717     if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagPartial))
718       Summary->setPartialProfile(true);
719     if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFullContext))
720       FunctionSamples::ProfileIsCS = ProfileIsCS = true;
721     if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagIsPreInlined))
722       FunctionSamples::ProfileIsPreInlined = ProfileIsPreInlined = true;
723     if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFSDiscriminator))
724       FunctionSamples::ProfileIsFS = ProfileIsFS = true;
725     break;
726   case SecNameTable: {
727     bool FixedLengthMD5 =
728         hasSecFlag(Entry, SecNameTableFlags::SecFlagFixedLengthMD5);
729     bool UseMD5 = hasSecFlag(Entry, SecNameTableFlags::SecFlagMD5Name);
730     // UseMD5 means if THIS section uses MD5, ProfileIsMD5 means if the entire
731     // profile uses MD5 for function name matching in IPO passes.
732     ProfileIsMD5 = ProfileIsMD5 || UseMD5;
733     FunctionSamples::HasUniqSuffix =
734         hasSecFlag(Entry, SecNameTableFlags::SecFlagUniqSuffix);
735     if (std::error_code EC = readNameTableSec(UseMD5, FixedLengthMD5))
736       return EC;
737     break;
738   }
739   case SecCSNameTable: {
740     if (std::error_code EC = readCSNameTableSec())
741       return EC;
742     break;
743   }
744   case SecLBRProfile:
745     if (std::error_code EC = readFuncProfiles())
746       return EC;
747     break;
748   case SecFuncOffsetTable:
749     // If module is absent, we are using LLVM tools, and need to read all
750     // profiles, so skip reading the function offset table.
751     if (!M) {
752       Data = End;
753     } else {
754       assert((!ProfileIsCS ||
755               hasSecFlag(Entry, SecFuncOffsetFlags::SecFlagOrdered)) &&
756              "func offset table should always be sorted in CS profile");
757       if (std::error_code EC = readFuncOffsetTable())
758         return EC;
759     }
760     break;
761   case SecFuncMetadata: {
762     ProfileIsProbeBased =
763         hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagIsProbeBased);
764     FunctionSamples::ProfileIsProbeBased = ProfileIsProbeBased;
765     bool HasAttribute =
766         hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagHasAttribute);
767     if (std::error_code EC = readFuncMetadata(HasAttribute))
768       return EC;
769     break;
770   }
771   case SecProfileSymbolList:
772     if (std::error_code EC = readProfileSymbolList())
773       return EC;
774     break;
775   default:
776     if (std::error_code EC = readCustomSection(Entry))
777       return EC;
778     break;
779   }
780   return sampleprof_error::success;
781 }
782 
783 bool SampleProfileReaderExtBinaryBase::useFuncOffsetList() const {
784   // If profile is CS, the function offset section is expected to consist of
785   // sequences of contexts in pre-order layout
786   // (e.g. [A, A:1 @ B, A:1 @ B:2.3 @ C] [D, D:1 @ E]), so that when a matched
787   // context in the module is found, the profiles of all its callees are
788   // recursively loaded. A list is needed since the order of profiles matters.
789   if (ProfileIsCS)
790     return true;
791 
792   // If the profile is MD5, use the map container to lookup functions in
793   // the module. A remapper has no use on MD5 names.
794   if (useMD5())
795     return false;
796 
797   // Profile is not MD5 and if a remapper is present, the remapped name of
798   // every function needed to be matched against the module, so use the list
799   // container since each entry is accessed.
800   if (Remapper)
801     return true;
802 
803   // Otherwise use the map container for faster lookup.
804   // TODO: If the cardinality of the function offset section is much smaller
805   // than the number of functions in the module, using the list container can
806   // be always faster, but we need to figure out the constant factor to
807   // determine the cutoff.
808   return false;
809 }
810 
811 
812 bool SampleProfileReaderExtBinaryBase::collectFuncsFromModule() {
813   if (!M)
814     return false;
815   FuncsToUse.clear();
816   for (auto &F : *M)
817     FuncsToUse.insert(FunctionSamples::getCanonicalFnName(F));
818   return true;
819 }
820 
821 std::error_code SampleProfileReaderExtBinaryBase::readFuncOffsetTable() {
822   // If there are more than one function offset section, the profile associated
823   // with the previous section has to be done reading before next one is read.
824   FuncOffsetTable.clear();
825   FuncOffsetList.clear();
826 
827   auto Size = readNumber<uint64_t>();
828   if (std::error_code EC = Size.getError())
829     return EC;
830 
831   bool UseFuncOffsetList = useFuncOffsetList();
832   if (UseFuncOffsetList)
833     FuncOffsetList.reserve(*Size);
834   else
835     FuncOffsetTable.reserve(*Size);
836 
837   for (uint64_t I = 0; I < *Size; ++I) {
838     auto FContextHash(readSampleContextFromTable());
839     if (std::error_code EC = FContextHash.getError())
840       return EC;
841 
842     auto &[FContext, Hash] = *FContextHash;
843     auto Offset = readNumber<uint64_t>();
844     if (std::error_code EC = Offset.getError())
845       return EC;
846 
847     if (UseFuncOffsetList)
848       FuncOffsetList.emplace_back(FContext, *Offset);
849     else
850       // Because Porfiles replace existing value with new value if collision
851       // happens, we also use the latest offset so that they are consistent.
852       FuncOffsetTable[Hash] = *Offset;
853  }
854 
855  return sampleprof_error::success;
856 }
857 
858 std::error_code SampleProfileReaderExtBinaryBase::readFuncProfiles() {
859   // Collect functions used by current module if the Reader has been
860   // given a module.
861   // collectFuncsFromModule uses FunctionSamples::getCanonicalFnName
862   // which will query FunctionSamples::HasUniqSuffix, so it has to be
863   // called after FunctionSamples::HasUniqSuffix is set, i.e. after
864   // NameTable section is read.
865   bool LoadFuncsToBeUsed = collectFuncsFromModule();
866 
867   // When LoadFuncsToBeUsed is false, we are using LLVM tool, need to read all
868   // profiles.
869   const uint8_t *Start = Data;
870   if (!LoadFuncsToBeUsed) {
871     while (Data < End) {
872       if (std::error_code EC = readFuncProfile(Data))
873         return EC;
874     }
875     assert(Data == End && "More data is read than expected");
876   } else {
877     // Load function profiles on demand.
878     if (Remapper) {
879       for (auto Name : FuncsToUse) {
880         Remapper->insert(Name);
881       }
882     }
883 
884     if (ProfileIsCS) {
885       assert(useFuncOffsetList());
886       DenseSet<uint64_t> FuncGuidsToUse;
887       if (useMD5()) {
888         for (auto Name : FuncsToUse)
889           FuncGuidsToUse.insert(Function::getGUID(Name));
890       }
891 
892       // For each function in current module, load all context profiles for
893       // the function as well as their callee contexts which can help profile
894       // guided importing for ThinLTO. This can be achieved by walking
895       // through an ordered context container, where contexts are laid out
896       // as if they were walked in preorder of a context trie. While
897       // traversing the trie, a link to the highest common ancestor node is
898       // kept so that all of its decendants will be loaded.
899       const SampleContext *CommonContext = nullptr;
900       for (const auto &NameOffset : FuncOffsetList) {
901         const auto &FContext = NameOffset.first;
902         auto FName = FContext.getName();
903         // For function in the current module, keep its farthest ancestor
904         // context. This can be used to load itself and its child and
905         // sibling contexts.
906         if ((useMD5() && FuncGuidsToUse.count(std::stoull(FName.data()))) ||
907             (!useMD5() && (FuncsToUse.count(FName) ||
908                            (Remapper && Remapper->exist(FName))))) {
909           if (!CommonContext || !CommonContext->IsPrefixOf(FContext))
910             CommonContext = &FContext;
911         }
912 
913         if (CommonContext == &FContext ||
914             (CommonContext && CommonContext->IsPrefixOf(FContext))) {
915           // Load profile for the current context which originated from
916           // the common ancestor.
917           const uint8_t *FuncProfileAddr = Start + NameOffset.second;
918           if (std::error_code EC = readFuncProfile(FuncProfileAddr))
919             return EC;
920         }
921       }
922     } else if (useMD5()) {
923       assert(!useFuncOffsetList());
924       for (auto Name : FuncsToUse) {
925         auto GUID = MD5Hash(Name);
926         auto iter = FuncOffsetTable.find(GUID);
927         if (iter == FuncOffsetTable.end())
928           continue;
929         const uint8_t *FuncProfileAddr = Start + iter->second;
930         if (std::error_code EC = readFuncProfile(FuncProfileAddr))
931           return EC;
932       }
933     } else if (Remapper) {
934       assert(useFuncOffsetList());
935       for (auto NameOffset : FuncOffsetList) {
936         SampleContext FContext(NameOffset.first);
937         auto FuncName = FContext.getName();
938         if (!FuncsToUse.count(FuncName) && !Remapper->exist(FuncName))
939           continue;
940         const uint8_t *FuncProfileAddr = Start + NameOffset.second;
941         if (std::error_code EC = readFuncProfile(FuncProfileAddr))
942           return EC;
943       }
944     } else {
945       assert(!useFuncOffsetList());
946       for (auto Name : FuncsToUse) {
947         auto iter = FuncOffsetTable.find(MD5Hash(Name));
948         if (iter == FuncOffsetTable.end())
949           continue;
950         const uint8_t *FuncProfileAddr = Start + iter->second;
951         if (std::error_code EC = readFuncProfile(FuncProfileAddr))
952           return EC;
953       }
954     }
955     Data = End;
956   }
957   assert((CSProfileCount == 0 || CSProfileCount == Profiles.size()) &&
958          "Cannot have both context-sensitive and regular profile");
959   assert((!CSProfileCount || ProfileIsCS) &&
960          "Section flag should be consistent with actual profile");
961   return sampleprof_error::success;
962 }
963 
964 std::error_code SampleProfileReaderExtBinaryBase::readProfileSymbolList() {
965   if (!ProfSymList)
966     ProfSymList = std::make_unique<ProfileSymbolList>();
967 
968   if (std::error_code EC = ProfSymList->read(Data, End - Data))
969     return EC;
970 
971   Data = End;
972   return sampleprof_error::success;
973 }
974 
975 std::error_code SampleProfileReaderExtBinaryBase::decompressSection(
976     const uint8_t *SecStart, const uint64_t SecSize,
977     const uint8_t *&DecompressBuf, uint64_t &DecompressBufSize) {
978   Data = SecStart;
979   End = SecStart + SecSize;
980   auto DecompressSize = readNumber<uint64_t>();
981   if (std::error_code EC = DecompressSize.getError())
982     return EC;
983   DecompressBufSize = *DecompressSize;
984 
985   auto CompressSize = readNumber<uint64_t>();
986   if (std::error_code EC = CompressSize.getError())
987     return EC;
988 
989   if (!llvm::compression::zlib::isAvailable())
990     return sampleprof_error::zlib_unavailable;
991 
992   uint8_t *Buffer = Allocator.Allocate<uint8_t>(DecompressBufSize);
993   size_t UCSize = DecompressBufSize;
994   llvm::Error E = compression::zlib::decompress(ArrayRef(Data, *CompressSize),
995                                                 Buffer, UCSize);
996   if (E)
997     return sampleprof_error::uncompress_failed;
998   DecompressBuf = reinterpret_cast<const uint8_t *>(Buffer);
999   return sampleprof_error::success;
1000 }
1001 
1002 std::error_code SampleProfileReaderExtBinaryBase::readImpl() {
1003   const uint8_t *BufStart =
1004       reinterpret_cast<const uint8_t *>(Buffer->getBufferStart());
1005 
1006   for (auto &Entry : SecHdrTable) {
1007     // Skip empty section.
1008     if (!Entry.Size)
1009       continue;
1010 
1011     // Skip sections without context when SkipFlatProf is true.
1012     if (SkipFlatProf && hasSecFlag(Entry, SecCommonFlags::SecFlagFlat))
1013       continue;
1014 
1015     const uint8_t *SecStart = BufStart + Entry.Offset;
1016     uint64_t SecSize = Entry.Size;
1017 
1018     // If the section is compressed, decompress it into a buffer
1019     // DecompressBuf before reading the actual data. The pointee of
1020     // 'Data' will be changed to buffer hold by DecompressBuf
1021     // temporarily when reading the actual data.
1022     bool isCompressed = hasSecFlag(Entry, SecCommonFlags::SecFlagCompress);
1023     if (isCompressed) {
1024       const uint8_t *DecompressBuf;
1025       uint64_t DecompressBufSize;
1026       if (std::error_code EC = decompressSection(
1027               SecStart, SecSize, DecompressBuf, DecompressBufSize))
1028         return EC;
1029       SecStart = DecompressBuf;
1030       SecSize = DecompressBufSize;
1031     }
1032 
1033     if (std::error_code EC = readOneSection(SecStart, SecSize, Entry))
1034       return EC;
1035     if (Data != SecStart + SecSize)
1036       return sampleprof_error::malformed;
1037 
1038     // Change the pointee of 'Data' from DecompressBuf to original Buffer.
1039     if (isCompressed) {
1040       Data = BufStart + Entry.Offset;
1041       End = BufStart + Buffer->getBufferSize();
1042     }
1043   }
1044 
1045   return sampleprof_error::success;
1046 }
1047 
1048 std::error_code SampleProfileReaderRawBinary::verifySPMagic(uint64_t Magic) {
1049   if (Magic == SPMagic())
1050     return sampleprof_error::success;
1051   return sampleprof_error::bad_magic;
1052 }
1053 
1054 std::error_code SampleProfileReaderExtBinary::verifySPMagic(uint64_t Magic) {
1055   if (Magic == SPMagic(SPF_Ext_Binary))
1056     return sampleprof_error::success;
1057   return sampleprof_error::bad_magic;
1058 }
1059 
1060 std::error_code SampleProfileReaderBinary::readNameTable() {
1061   auto Size = readNumber<size_t>();
1062   if (std::error_code EC = Size.getError())
1063     return EC;
1064 
1065   // Normally if useMD5 is true, the name table should have MD5 values, not
1066   // strings, however in the case that ExtBinary profile has multiple name
1067   // tables mixing string and MD5, all of them have to be normalized to use MD5,
1068   // because optimization passes can only handle either type.
1069   bool UseMD5 = useMD5();
1070   if (UseMD5)
1071     MD5StringBuf.reserve(MD5StringBuf.size() + *Size);
1072 
1073   NameTable.clear();
1074   NameTable.reserve(*Size);
1075   if (!ProfileIsCS) {
1076     MD5SampleContextTable.clear();
1077     if (UseMD5)
1078       MD5SampleContextTable.reserve(*Size);
1079     else
1080       // If we are using strings, delay MD5 computation since only a portion of
1081       // names are used by top level functions. Use 0 to indicate MD5 value is
1082       // to be calculated as no known string has a MD5 value of 0.
1083       MD5SampleContextTable.resize(*Size);
1084   }
1085   for (size_t I = 0; I < *Size; ++I) {
1086     auto Name(readString());
1087     if (std::error_code EC = Name.getError())
1088       return EC;
1089     if (UseMD5) {
1090       uint64_t FID = MD5Hash(*Name);
1091       if (!ProfileIsCS)
1092         MD5SampleContextTable.emplace_back(FID);
1093       NameTable.emplace_back(MD5StringBuf.emplace_back(std::to_string(FID)));
1094     } else
1095       NameTable.push_back(*Name);
1096   }
1097   if (!ProfileIsCS)
1098     MD5SampleContextStart = MD5SampleContextTable.data();
1099   return sampleprof_error::success;
1100 }
1101 
1102 std::error_code
1103 SampleProfileReaderExtBinaryBase::readNameTableSec(bool IsMD5,
1104                                                    bool FixedLengthMD5) {
1105   if (FixedLengthMD5) {
1106     if (!IsMD5)
1107       errs() << "If FixedLengthMD5 is true, UseMD5 has to be true";
1108     auto Size = readNumber<size_t>();
1109     if (std::error_code EC = Size.getError())
1110       return EC;
1111 
1112     assert(Data + (*Size) * sizeof(uint64_t) == End &&
1113            "Fixed length MD5 name table does not contain specified number of "
1114            "entries");
1115     if (Data + (*Size) * sizeof(uint64_t) > End)
1116       return sampleprof_error::truncated;
1117 
1118     // Preallocate and initialize NameTable so we can check whether a name
1119     // index has been read before by checking whether the element in the
1120     // NameTable is empty, meanwhile readStringIndex can do the boundary
1121     // check using the size of NameTable.
1122     MD5StringBuf.reserve(MD5StringBuf.size() + *Size);
1123     NameTable.clear();
1124     NameTable.resize(*Size);
1125     MD5NameMemStart = Data;
1126     if (!ProfileIsCS)
1127       MD5SampleContextStart = reinterpret_cast<const hash_code *>(Data);
1128     Data = Data + (*Size) * sizeof(uint64_t);
1129     return sampleprof_error::success;
1130   }
1131 
1132   if (IsMD5) {
1133     assert(!FixedLengthMD5 && "FixedLengthMD5 should be unreachable here");
1134     auto Size = readNumber<size_t>();
1135     if (std::error_code EC = Size.getError())
1136       return EC;
1137 
1138     MD5StringBuf.reserve(MD5StringBuf.size() + *Size);
1139     NameTable.clear();
1140     NameTable.reserve(*Size);
1141     if (!ProfileIsCS) {
1142       MD5SampleContextTable.clear();
1143       MD5SampleContextTable.reserve(*Size);
1144     }
1145     for (size_t I = 0; I < *Size; ++I) {
1146       auto FID = readNumber<uint64_t>();
1147       if (std::error_code EC = FID.getError())
1148         return EC;
1149       if (!ProfileIsCS)
1150         MD5SampleContextTable.emplace_back(*FID);
1151       NameTable.emplace_back(MD5StringBuf.emplace_back(std::to_string(*FID)));
1152     }
1153     if (!ProfileIsCS)
1154       MD5SampleContextStart = MD5SampleContextTable.data();
1155     return sampleprof_error::success;
1156   }
1157 
1158   return SampleProfileReaderBinary::readNameTable();
1159 }
1160 
1161 // Read in the CS name table section, which basically contains a list of context
1162 // vectors. Each element of a context vector, aka a frame, refers to the
1163 // underlying raw function names that are stored in the name table, as well as
1164 // a callsite identifier that only makes sense for non-leaf frames.
1165 std::error_code SampleProfileReaderExtBinaryBase::readCSNameTableSec() {
1166   auto Size = readNumber<size_t>();
1167   if (std::error_code EC = Size.getError())
1168     return EC;
1169 
1170   CSNameTable.clear();
1171   CSNameTable.reserve(*Size);
1172   if (ProfileIsCS) {
1173     // Delay MD5 computation of CS context until they are needed. Use 0 to
1174     // indicate MD5 value is to be calculated as no known string has a MD5
1175     // value of 0.
1176     MD5SampleContextTable.clear();
1177     MD5SampleContextTable.resize(*Size);
1178     MD5SampleContextStart = MD5SampleContextTable.data();
1179   }
1180   for (size_t I = 0; I < *Size; ++I) {
1181     CSNameTable.emplace_back(SampleContextFrameVector());
1182     auto ContextSize = readNumber<uint32_t>();
1183     if (std::error_code EC = ContextSize.getError())
1184       return EC;
1185     for (uint32_t J = 0; J < *ContextSize; ++J) {
1186       auto FName(readStringFromTable());
1187       if (std::error_code EC = FName.getError())
1188         return EC;
1189       auto LineOffset = readNumber<uint64_t>();
1190       if (std::error_code EC = LineOffset.getError())
1191         return EC;
1192 
1193       if (!isOffsetLegal(*LineOffset))
1194         return std::error_code();
1195 
1196       auto Discriminator = readNumber<uint64_t>();
1197       if (std::error_code EC = Discriminator.getError())
1198         return EC;
1199 
1200       CSNameTable.back().emplace_back(
1201           FName.get(), LineLocation(LineOffset.get(), Discriminator.get()));
1202     }
1203   }
1204 
1205   return sampleprof_error::success;
1206 }
1207 
1208 std::error_code
1209 SampleProfileReaderExtBinaryBase::readFuncMetadata(bool ProfileHasAttribute,
1210                                                    FunctionSamples *FProfile) {
1211   if (Data < End) {
1212     if (ProfileIsProbeBased) {
1213       auto Checksum = readNumber<uint64_t>();
1214       if (std::error_code EC = Checksum.getError())
1215         return EC;
1216       if (FProfile)
1217         FProfile->setFunctionHash(*Checksum);
1218     }
1219 
1220     if (ProfileHasAttribute) {
1221       auto Attributes = readNumber<uint32_t>();
1222       if (std::error_code EC = Attributes.getError())
1223         return EC;
1224       if (FProfile)
1225         FProfile->getContext().setAllAttributes(*Attributes);
1226     }
1227 
1228     if (!ProfileIsCS) {
1229       // Read all the attributes for inlined function calls.
1230       auto NumCallsites = readNumber<uint32_t>();
1231       if (std::error_code EC = NumCallsites.getError())
1232         return EC;
1233 
1234       for (uint32_t J = 0; J < *NumCallsites; ++J) {
1235         auto LineOffset = readNumber<uint64_t>();
1236         if (std::error_code EC = LineOffset.getError())
1237           return EC;
1238 
1239         auto Discriminator = readNumber<uint64_t>();
1240         if (std::error_code EC = Discriminator.getError())
1241           return EC;
1242 
1243         auto FContextHash(readSampleContextFromTable());
1244         if (std::error_code EC = FContextHash.getError())
1245           return EC;
1246 
1247         auto &[FContext, Hash] = *FContextHash;
1248         FunctionSamples *CalleeProfile = nullptr;
1249         if (FProfile) {
1250           CalleeProfile = const_cast<FunctionSamples *>(
1251               &FProfile->functionSamplesAt(LineLocation(
1252                   *LineOffset,
1253                   *Discriminator))[std::string(FContext.getName())]);
1254         }
1255         if (std::error_code EC =
1256                 readFuncMetadata(ProfileHasAttribute, CalleeProfile))
1257           return EC;
1258       }
1259     }
1260   }
1261 
1262   return sampleprof_error::success;
1263 }
1264 
1265 std::error_code
1266 SampleProfileReaderExtBinaryBase::readFuncMetadata(bool ProfileHasAttribute) {
1267   while (Data < End) {
1268     auto FContextHash(readSampleContextFromTable());
1269     if (std::error_code EC = FContextHash.getError())
1270       return EC;
1271     auto &[FContext, Hash] = *FContextHash;
1272     FunctionSamples *FProfile = nullptr;
1273     auto It = Profiles.find(FContext);
1274     if (It != Profiles.end())
1275       FProfile = &It->second;
1276 
1277     if (std::error_code EC = readFuncMetadata(ProfileHasAttribute, FProfile))
1278       return EC;
1279   }
1280 
1281   assert(Data == End && "More data is read than expected");
1282   return sampleprof_error::success;
1283 }
1284 
1285 std::error_code
1286 SampleProfileReaderExtBinaryBase::readSecHdrTableEntry(uint64_t Idx) {
1287   SecHdrTableEntry Entry;
1288   auto Type = readUnencodedNumber<uint64_t>();
1289   if (std::error_code EC = Type.getError())
1290     return EC;
1291   Entry.Type = static_cast<SecType>(*Type);
1292 
1293   auto Flags = readUnencodedNumber<uint64_t>();
1294   if (std::error_code EC = Flags.getError())
1295     return EC;
1296   Entry.Flags = *Flags;
1297 
1298   auto Offset = readUnencodedNumber<uint64_t>();
1299   if (std::error_code EC = Offset.getError())
1300     return EC;
1301   Entry.Offset = *Offset;
1302 
1303   auto Size = readUnencodedNumber<uint64_t>();
1304   if (std::error_code EC = Size.getError())
1305     return EC;
1306   Entry.Size = *Size;
1307 
1308   Entry.LayoutIndex = Idx;
1309   SecHdrTable.push_back(std::move(Entry));
1310   return sampleprof_error::success;
1311 }
1312 
1313 std::error_code SampleProfileReaderExtBinaryBase::readSecHdrTable() {
1314   auto EntryNum = readUnencodedNumber<uint64_t>();
1315   if (std::error_code EC = EntryNum.getError())
1316     return EC;
1317 
1318   for (uint64_t i = 0; i < (*EntryNum); i++)
1319     if (std::error_code EC = readSecHdrTableEntry(i))
1320       return EC;
1321 
1322   return sampleprof_error::success;
1323 }
1324 
1325 std::error_code SampleProfileReaderExtBinaryBase::readHeader() {
1326   const uint8_t *BufStart =
1327       reinterpret_cast<const uint8_t *>(Buffer->getBufferStart());
1328   Data = BufStart;
1329   End = BufStart + Buffer->getBufferSize();
1330 
1331   if (std::error_code EC = readMagicIdent())
1332     return EC;
1333 
1334   if (std::error_code EC = readSecHdrTable())
1335     return EC;
1336 
1337   return sampleprof_error::success;
1338 }
1339 
1340 uint64_t SampleProfileReaderExtBinaryBase::getSectionSize(SecType Type) {
1341   uint64_t Size = 0;
1342   for (auto &Entry : SecHdrTable) {
1343     if (Entry.Type == Type)
1344       Size += Entry.Size;
1345   }
1346   return Size;
1347 }
1348 
1349 uint64_t SampleProfileReaderExtBinaryBase::getFileSize() {
1350   // Sections in SecHdrTable is not necessarily in the same order as
1351   // sections in the profile because section like FuncOffsetTable needs
1352   // to be written after section LBRProfile but needs to be read before
1353   // section LBRProfile, so we cannot simply use the last entry in
1354   // SecHdrTable to calculate the file size.
1355   uint64_t FileSize = 0;
1356   for (auto &Entry : SecHdrTable) {
1357     FileSize = std::max(Entry.Offset + Entry.Size, FileSize);
1358   }
1359   return FileSize;
1360 }
1361 
1362 static std::string getSecFlagsStr(const SecHdrTableEntry &Entry) {
1363   std::string Flags;
1364   if (hasSecFlag(Entry, SecCommonFlags::SecFlagCompress))
1365     Flags.append("{compressed,");
1366   else
1367     Flags.append("{");
1368 
1369   if (hasSecFlag(Entry, SecCommonFlags::SecFlagFlat))
1370     Flags.append("flat,");
1371 
1372   switch (Entry.Type) {
1373   case SecNameTable:
1374     if (hasSecFlag(Entry, SecNameTableFlags::SecFlagFixedLengthMD5))
1375       Flags.append("fixlenmd5,");
1376     else if (hasSecFlag(Entry, SecNameTableFlags::SecFlagMD5Name))
1377       Flags.append("md5,");
1378     if (hasSecFlag(Entry, SecNameTableFlags::SecFlagUniqSuffix))
1379       Flags.append("uniq,");
1380     break;
1381   case SecProfSummary:
1382     if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagPartial))
1383       Flags.append("partial,");
1384     if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFullContext))
1385       Flags.append("context,");
1386     if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagIsPreInlined))
1387       Flags.append("preInlined,");
1388     if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFSDiscriminator))
1389       Flags.append("fs-discriminator,");
1390     break;
1391   case SecFuncOffsetTable:
1392     if (hasSecFlag(Entry, SecFuncOffsetFlags::SecFlagOrdered))
1393       Flags.append("ordered,");
1394     break;
1395   case SecFuncMetadata:
1396     if (hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagIsProbeBased))
1397       Flags.append("probe,");
1398     if (hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagHasAttribute))
1399       Flags.append("attr,");
1400     break;
1401   default:
1402     break;
1403   }
1404   char &last = Flags.back();
1405   if (last == ',')
1406     last = '}';
1407   else
1408     Flags.append("}");
1409   return Flags;
1410 }
1411 
1412 bool SampleProfileReaderExtBinaryBase::dumpSectionInfo(raw_ostream &OS) {
1413   uint64_t TotalSecsSize = 0;
1414   for (auto &Entry : SecHdrTable) {
1415     OS << getSecName(Entry.Type) << " - Offset: " << Entry.Offset
1416        << ", Size: " << Entry.Size << ", Flags: " << getSecFlagsStr(Entry)
1417        << "\n";
1418     ;
1419     TotalSecsSize += Entry.Size;
1420   }
1421   uint64_t HeaderSize = SecHdrTable.front().Offset;
1422   assert(HeaderSize + TotalSecsSize == getFileSize() &&
1423          "Size of 'header + sections' doesn't match the total size of profile");
1424 
1425   OS << "Header Size: " << HeaderSize << "\n";
1426   OS << "Total Sections Size: " << TotalSecsSize << "\n";
1427   OS << "File Size: " << getFileSize() << "\n";
1428   return true;
1429 }
1430 
1431 std::error_code SampleProfileReaderBinary::readMagicIdent() {
1432   // Read and check the magic identifier.
1433   auto Magic = readNumber<uint64_t>();
1434   if (std::error_code EC = Magic.getError())
1435     return EC;
1436   else if (std::error_code EC = verifySPMagic(*Magic))
1437     return EC;
1438 
1439   // Read the version number.
1440   auto Version = readNumber<uint64_t>();
1441   if (std::error_code EC = Version.getError())
1442     return EC;
1443   else if (*Version != SPVersion())
1444     return sampleprof_error::unsupported_version;
1445 
1446   return sampleprof_error::success;
1447 }
1448 
1449 std::error_code SampleProfileReaderBinary::readHeader() {
1450   Data = reinterpret_cast<const uint8_t *>(Buffer->getBufferStart());
1451   End = Data + Buffer->getBufferSize();
1452 
1453   if (std::error_code EC = readMagicIdent())
1454     return EC;
1455 
1456   if (std::error_code EC = readSummary())
1457     return EC;
1458 
1459   if (std::error_code EC = readNameTable())
1460     return EC;
1461   return sampleprof_error::success;
1462 }
1463 
1464 std::error_code SampleProfileReaderBinary::readSummaryEntry(
1465     std::vector<ProfileSummaryEntry> &Entries) {
1466   auto Cutoff = readNumber<uint64_t>();
1467   if (std::error_code EC = Cutoff.getError())
1468     return EC;
1469 
1470   auto MinBlockCount = readNumber<uint64_t>();
1471   if (std::error_code EC = MinBlockCount.getError())
1472     return EC;
1473 
1474   auto NumBlocks = readNumber<uint64_t>();
1475   if (std::error_code EC = NumBlocks.getError())
1476     return EC;
1477 
1478   Entries.emplace_back(*Cutoff, *MinBlockCount, *NumBlocks);
1479   return sampleprof_error::success;
1480 }
1481 
1482 std::error_code SampleProfileReaderBinary::readSummary() {
1483   auto TotalCount = readNumber<uint64_t>();
1484   if (std::error_code EC = TotalCount.getError())
1485     return EC;
1486 
1487   auto MaxBlockCount = readNumber<uint64_t>();
1488   if (std::error_code EC = MaxBlockCount.getError())
1489     return EC;
1490 
1491   auto MaxFunctionCount = readNumber<uint64_t>();
1492   if (std::error_code EC = MaxFunctionCount.getError())
1493     return EC;
1494 
1495   auto NumBlocks = readNumber<uint64_t>();
1496   if (std::error_code EC = NumBlocks.getError())
1497     return EC;
1498 
1499   auto NumFunctions = readNumber<uint64_t>();
1500   if (std::error_code EC = NumFunctions.getError())
1501     return EC;
1502 
1503   auto NumSummaryEntries = readNumber<uint64_t>();
1504   if (std::error_code EC = NumSummaryEntries.getError())
1505     return EC;
1506 
1507   std::vector<ProfileSummaryEntry> Entries;
1508   for (unsigned i = 0; i < *NumSummaryEntries; i++) {
1509     std::error_code EC = readSummaryEntry(Entries);
1510     if (EC != sampleprof_error::success)
1511       return EC;
1512   }
1513   Summary = std::make_unique<ProfileSummary>(
1514       ProfileSummary::PSK_Sample, Entries, *TotalCount, *MaxBlockCount, 0,
1515       *MaxFunctionCount, *NumBlocks, *NumFunctions);
1516 
1517   return sampleprof_error::success;
1518 }
1519 
1520 bool SampleProfileReaderRawBinary::hasFormat(const MemoryBuffer &Buffer) {
1521   const uint8_t *Data =
1522       reinterpret_cast<const uint8_t *>(Buffer.getBufferStart());
1523   uint64_t Magic = decodeULEB128(Data);
1524   return Magic == SPMagic();
1525 }
1526 
1527 bool SampleProfileReaderExtBinary::hasFormat(const MemoryBuffer &Buffer) {
1528   const uint8_t *Data =
1529       reinterpret_cast<const uint8_t *>(Buffer.getBufferStart());
1530   uint64_t Magic = decodeULEB128(Data);
1531   return Magic == SPMagic(SPF_Ext_Binary);
1532 }
1533 
1534 std::error_code SampleProfileReaderGCC::skipNextWord() {
1535   uint32_t dummy;
1536   if (!GcovBuffer.readInt(dummy))
1537     return sampleprof_error::truncated;
1538   return sampleprof_error::success;
1539 }
1540 
1541 template <typename T> ErrorOr<T> SampleProfileReaderGCC::readNumber() {
1542   if (sizeof(T) <= sizeof(uint32_t)) {
1543     uint32_t Val;
1544     if (GcovBuffer.readInt(Val) && Val <= std::numeric_limits<T>::max())
1545       return static_cast<T>(Val);
1546   } else if (sizeof(T) <= sizeof(uint64_t)) {
1547     uint64_t Val;
1548     if (GcovBuffer.readInt64(Val) && Val <= std::numeric_limits<T>::max())
1549       return static_cast<T>(Val);
1550   }
1551 
1552   std::error_code EC = sampleprof_error::malformed;
1553   reportError(0, EC.message());
1554   return EC;
1555 }
1556 
1557 ErrorOr<StringRef> SampleProfileReaderGCC::readString() {
1558   StringRef Str;
1559   if (!GcovBuffer.readString(Str))
1560     return sampleprof_error::truncated;
1561   return Str;
1562 }
1563 
1564 std::error_code SampleProfileReaderGCC::readHeader() {
1565   // Read the magic identifier.
1566   if (!GcovBuffer.readGCDAFormat())
1567     return sampleprof_error::unrecognized_format;
1568 
1569   // Read the version number. Note - the GCC reader does not validate this
1570   // version, but the profile creator generates v704.
1571   GCOV::GCOVVersion version;
1572   if (!GcovBuffer.readGCOVVersion(version))
1573     return sampleprof_error::unrecognized_format;
1574 
1575   if (version != GCOV::V407)
1576     return sampleprof_error::unsupported_version;
1577 
1578   // Skip the empty integer.
1579   if (std::error_code EC = skipNextWord())
1580     return EC;
1581 
1582   return sampleprof_error::success;
1583 }
1584 
1585 std::error_code SampleProfileReaderGCC::readSectionTag(uint32_t Expected) {
1586   uint32_t Tag;
1587   if (!GcovBuffer.readInt(Tag))
1588     return sampleprof_error::truncated;
1589 
1590   if (Tag != Expected)
1591     return sampleprof_error::malformed;
1592 
1593   if (std::error_code EC = skipNextWord())
1594     return EC;
1595 
1596   return sampleprof_error::success;
1597 }
1598 
1599 std::error_code SampleProfileReaderGCC::readNameTable() {
1600   if (std::error_code EC = readSectionTag(GCOVTagAFDOFileNames))
1601     return EC;
1602 
1603   uint32_t Size;
1604   if (!GcovBuffer.readInt(Size))
1605     return sampleprof_error::truncated;
1606 
1607   for (uint32_t I = 0; I < Size; ++I) {
1608     StringRef Str;
1609     if (!GcovBuffer.readString(Str))
1610       return sampleprof_error::truncated;
1611     Names.push_back(std::string(Str));
1612   }
1613 
1614   return sampleprof_error::success;
1615 }
1616 
1617 std::error_code SampleProfileReaderGCC::readFunctionProfiles() {
1618   if (std::error_code EC = readSectionTag(GCOVTagAFDOFunction))
1619     return EC;
1620 
1621   uint32_t NumFunctions;
1622   if (!GcovBuffer.readInt(NumFunctions))
1623     return sampleprof_error::truncated;
1624 
1625   InlineCallStack Stack;
1626   for (uint32_t I = 0; I < NumFunctions; ++I)
1627     if (std::error_code EC = readOneFunctionProfile(Stack, true, 0))
1628       return EC;
1629 
1630   computeSummary();
1631   return sampleprof_error::success;
1632 }
1633 
1634 std::error_code SampleProfileReaderGCC::readOneFunctionProfile(
1635     const InlineCallStack &InlineStack, bool Update, uint32_t Offset) {
1636   uint64_t HeadCount = 0;
1637   if (InlineStack.size() == 0)
1638     if (!GcovBuffer.readInt64(HeadCount))
1639       return sampleprof_error::truncated;
1640 
1641   uint32_t NameIdx;
1642   if (!GcovBuffer.readInt(NameIdx))
1643     return sampleprof_error::truncated;
1644 
1645   StringRef Name(Names[NameIdx]);
1646 
1647   uint32_t NumPosCounts;
1648   if (!GcovBuffer.readInt(NumPosCounts))
1649     return sampleprof_error::truncated;
1650 
1651   uint32_t NumCallsites;
1652   if (!GcovBuffer.readInt(NumCallsites))
1653     return sampleprof_error::truncated;
1654 
1655   FunctionSamples *FProfile = nullptr;
1656   if (InlineStack.size() == 0) {
1657     // If this is a top function that we have already processed, do not
1658     // update its profile again.  This happens in the presence of
1659     // function aliases.  Since these aliases share the same function
1660     // body, there will be identical replicated profiles for the
1661     // original function.  In this case, we simply not bother updating
1662     // the profile of the original function.
1663     FProfile = &Profiles[Name];
1664     FProfile->addHeadSamples(HeadCount);
1665     if (FProfile->getTotalSamples() > 0)
1666       Update = false;
1667   } else {
1668     // Otherwise, we are reading an inlined instance. The top of the
1669     // inline stack contains the profile of the caller. Insert this
1670     // callee in the caller's CallsiteMap.
1671     FunctionSamples *CallerProfile = InlineStack.front();
1672     uint32_t LineOffset = Offset >> 16;
1673     uint32_t Discriminator = Offset & 0xffff;
1674     FProfile = &CallerProfile->functionSamplesAt(
1675         LineLocation(LineOffset, Discriminator))[std::string(Name)];
1676   }
1677   FProfile->setName(Name);
1678 
1679   for (uint32_t I = 0; I < NumPosCounts; ++I) {
1680     uint32_t Offset;
1681     if (!GcovBuffer.readInt(Offset))
1682       return sampleprof_error::truncated;
1683 
1684     uint32_t NumTargets;
1685     if (!GcovBuffer.readInt(NumTargets))
1686       return sampleprof_error::truncated;
1687 
1688     uint64_t Count;
1689     if (!GcovBuffer.readInt64(Count))
1690       return sampleprof_error::truncated;
1691 
1692     // The line location is encoded in the offset as:
1693     //   high 16 bits: line offset to the start of the function.
1694     //   low 16 bits: discriminator.
1695     uint32_t LineOffset = Offset >> 16;
1696     uint32_t Discriminator = Offset & 0xffff;
1697 
1698     InlineCallStack NewStack;
1699     NewStack.push_back(FProfile);
1700     llvm::append_range(NewStack, InlineStack);
1701     if (Update) {
1702       // Walk up the inline stack, adding the samples on this line to
1703       // the total sample count of the callers in the chain.
1704       for (auto *CallerProfile : NewStack)
1705         CallerProfile->addTotalSamples(Count);
1706 
1707       // Update the body samples for the current profile.
1708       FProfile->addBodySamples(LineOffset, Discriminator, Count);
1709     }
1710 
1711     // Process the list of functions called at an indirect call site.
1712     // These are all the targets that a function pointer (or virtual
1713     // function) resolved at runtime.
1714     for (uint32_t J = 0; J < NumTargets; J++) {
1715       uint32_t HistVal;
1716       if (!GcovBuffer.readInt(HistVal))
1717         return sampleprof_error::truncated;
1718 
1719       if (HistVal != HIST_TYPE_INDIR_CALL_TOPN)
1720         return sampleprof_error::malformed;
1721 
1722       uint64_t TargetIdx;
1723       if (!GcovBuffer.readInt64(TargetIdx))
1724         return sampleprof_error::truncated;
1725       StringRef TargetName(Names[TargetIdx]);
1726 
1727       uint64_t TargetCount;
1728       if (!GcovBuffer.readInt64(TargetCount))
1729         return sampleprof_error::truncated;
1730 
1731       if (Update)
1732         FProfile->addCalledTargetSamples(LineOffset, Discriminator,
1733                                          TargetName, TargetCount);
1734     }
1735   }
1736 
1737   // Process all the inlined callers into the current function. These
1738   // are all the callsites that were inlined into this function.
1739   for (uint32_t I = 0; I < NumCallsites; I++) {
1740     // The offset is encoded as:
1741     //   high 16 bits: line offset to the start of the function.
1742     //   low 16 bits: discriminator.
1743     uint32_t Offset;
1744     if (!GcovBuffer.readInt(Offset))
1745       return sampleprof_error::truncated;
1746     InlineCallStack NewStack;
1747     NewStack.push_back(FProfile);
1748     llvm::append_range(NewStack, InlineStack);
1749     if (std::error_code EC = readOneFunctionProfile(NewStack, Update, Offset))
1750       return EC;
1751   }
1752 
1753   return sampleprof_error::success;
1754 }
1755 
1756 /// Read a GCC AutoFDO profile.
1757 ///
1758 /// This format is generated by the Linux Perf conversion tool at
1759 /// https://github.com/google/autofdo.
1760 std::error_code SampleProfileReaderGCC::readImpl() {
1761   assert(!ProfileIsFSDisciminator && "Gcc profiles not support FSDisciminator");
1762   // Read the string table.
1763   if (std::error_code EC = readNameTable())
1764     return EC;
1765 
1766   // Read the source profile.
1767   if (std::error_code EC = readFunctionProfiles())
1768     return EC;
1769 
1770   return sampleprof_error::success;
1771 }
1772 
1773 bool SampleProfileReaderGCC::hasFormat(const MemoryBuffer &Buffer) {
1774   StringRef Magic(reinterpret_cast<const char *>(Buffer.getBufferStart()));
1775   return Magic == "adcg*704";
1776 }
1777 
1778 void SampleProfileReaderItaniumRemapper::applyRemapping(LLVMContext &Ctx) {
1779   // If the reader uses MD5 to represent string, we can't remap it because
1780   // we don't know what the original function names were.
1781   if (Reader.useMD5()) {
1782     Ctx.diagnose(DiagnosticInfoSampleProfile(
1783         Reader.getBuffer()->getBufferIdentifier(),
1784         "Profile data remapping cannot be applied to profile data "
1785         "using MD5 names (original mangled names are not available).",
1786         DS_Warning));
1787     return;
1788   }
1789 
1790   // CSSPGO-TODO: Remapper is not yet supported.
1791   // We will need to remap the entire context string.
1792   assert(Remappings && "should be initialized while creating remapper");
1793   for (auto &Sample : Reader.getProfiles()) {
1794     DenseSet<StringRef> NamesInSample;
1795     Sample.second.findAllNames(NamesInSample);
1796     for (auto &Name : NamesInSample)
1797       if (auto Key = Remappings->insert(Name))
1798         NameMap.insert({Key, Name});
1799   }
1800 
1801   RemappingApplied = true;
1802 }
1803 
1804 std::optional<StringRef>
1805 SampleProfileReaderItaniumRemapper::lookUpNameInProfile(StringRef Fname) {
1806   if (auto Key = Remappings->lookup(Fname))
1807     return NameMap.lookup(Key);
1808   return std::nullopt;
1809 }
1810 
1811 /// Prepare a memory buffer for the contents of \p Filename.
1812 ///
1813 /// \returns an error code indicating the status of the buffer.
1814 static ErrorOr<std::unique_ptr<MemoryBuffer>>
1815 setupMemoryBuffer(const Twine &Filename, vfs::FileSystem &FS) {
1816   auto BufferOrErr = Filename.str() == "-" ? MemoryBuffer::getSTDIN()
1817                                            : FS.getBufferForFile(Filename);
1818   if (std::error_code EC = BufferOrErr.getError())
1819     return EC;
1820   auto Buffer = std::move(BufferOrErr.get());
1821 
1822   return std::move(Buffer);
1823 }
1824 
1825 /// Create a sample profile reader based on the format of the input file.
1826 ///
1827 /// \param Filename The file to open.
1828 ///
1829 /// \param C The LLVM context to use to emit diagnostics.
1830 ///
1831 /// \param P The FSDiscriminatorPass.
1832 ///
1833 /// \param RemapFilename The file used for profile remapping.
1834 ///
1835 /// \returns an error code indicating the status of the created reader.
1836 ErrorOr<std::unique_ptr<SampleProfileReader>>
1837 SampleProfileReader::create(const std::string Filename, LLVMContext &C,
1838                             vfs::FileSystem &FS, FSDiscriminatorPass P,
1839                             const std::string RemapFilename) {
1840   auto BufferOrError = setupMemoryBuffer(Filename, FS);
1841   if (std::error_code EC = BufferOrError.getError())
1842     return EC;
1843   return create(BufferOrError.get(), C, FS, P, RemapFilename);
1844 }
1845 
1846 /// Create a sample profile remapper from the given input, to remap the
1847 /// function names in the given profile data.
1848 ///
1849 /// \param Filename The file to open.
1850 ///
1851 /// \param Reader The profile reader the remapper is going to be applied to.
1852 ///
1853 /// \param C The LLVM context to use to emit diagnostics.
1854 ///
1855 /// \returns an error code indicating the status of the created reader.
1856 ErrorOr<std::unique_ptr<SampleProfileReaderItaniumRemapper>>
1857 SampleProfileReaderItaniumRemapper::create(const std::string Filename,
1858                                            vfs::FileSystem &FS,
1859                                            SampleProfileReader &Reader,
1860                                            LLVMContext &C) {
1861   auto BufferOrError = setupMemoryBuffer(Filename, FS);
1862   if (std::error_code EC = BufferOrError.getError())
1863     return EC;
1864   return create(BufferOrError.get(), Reader, C);
1865 }
1866 
1867 /// Create a sample profile remapper from the given input, to remap the
1868 /// function names in the given profile data.
1869 ///
1870 /// \param B The memory buffer to create the reader from (assumes ownership).
1871 ///
1872 /// \param C The LLVM context to use to emit diagnostics.
1873 ///
1874 /// \param Reader The profile reader the remapper is going to be applied to.
1875 ///
1876 /// \returns an error code indicating the status of the created reader.
1877 ErrorOr<std::unique_ptr<SampleProfileReaderItaniumRemapper>>
1878 SampleProfileReaderItaniumRemapper::create(std::unique_ptr<MemoryBuffer> &B,
1879                                            SampleProfileReader &Reader,
1880                                            LLVMContext &C) {
1881   auto Remappings = std::make_unique<SymbolRemappingReader>();
1882   if (Error E = Remappings->read(*B)) {
1883     handleAllErrors(
1884         std::move(E), [&](const SymbolRemappingParseError &ParseError) {
1885           C.diagnose(DiagnosticInfoSampleProfile(B->getBufferIdentifier(),
1886                                                  ParseError.getLineNum(),
1887                                                  ParseError.getMessage()));
1888         });
1889     return sampleprof_error::malformed;
1890   }
1891 
1892   return std::make_unique<SampleProfileReaderItaniumRemapper>(
1893       std::move(B), std::move(Remappings), Reader);
1894 }
1895 
1896 /// Create a sample profile reader based on the format of the input data.
1897 ///
1898 /// \param B The memory buffer to create the reader from (assumes ownership).
1899 ///
1900 /// \param C The LLVM context to use to emit diagnostics.
1901 ///
1902 /// \param P The FSDiscriminatorPass.
1903 ///
1904 /// \param RemapFilename The file used for profile remapping.
1905 ///
1906 /// \returns an error code indicating the status of the created reader.
1907 ErrorOr<std::unique_ptr<SampleProfileReader>>
1908 SampleProfileReader::create(std::unique_ptr<MemoryBuffer> &B, LLVMContext &C,
1909                             vfs::FileSystem &FS, FSDiscriminatorPass P,
1910                             const std::string RemapFilename) {
1911   std::unique_ptr<SampleProfileReader> Reader;
1912   if (SampleProfileReaderRawBinary::hasFormat(*B))
1913     Reader.reset(new SampleProfileReaderRawBinary(std::move(B), C));
1914   else if (SampleProfileReaderExtBinary::hasFormat(*B))
1915     Reader.reset(new SampleProfileReaderExtBinary(std::move(B), C));
1916   else if (SampleProfileReaderGCC::hasFormat(*B))
1917     Reader.reset(new SampleProfileReaderGCC(std::move(B), C));
1918   else if (SampleProfileReaderText::hasFormat(*B))
1919     Reader.reset(new SampleProfileReaderText(std::move(B), C));
1920   else
1921     return sampleprof_error::unrecognized_format;
1922 
1923   if (!RemapFilename.empty()) {
1924     auto ReaderOrErr = SampleProfileReaderItaniumRemapper::create(
1925         RemapFilename, FS, *Reader, C);
1926     if (std::error_code EC = ReaderOrErr.getError()) {
1927       std::string Msg = "Could not create remapper: " + EC.message();
1928       C.diagnose(DiagnosticInfoSampleProfile(RemapFilename, Msg));
1929       return EC;
1930     }
1931     Reader->Remapper = std::move(ReaderOrErr.get());
1932   }
1933 
1934   if (std::error_code EC = Reader->readHeader()) {
1935     return EC;
1936   }
1937 
1938   Reader->setDiscriminatorMaskedBitFrom(P);
1939 
1940   return std::move(Reader);
1941 }
1942 
1943 // For text and GCC file formats, we compute the summary after reading the
1944 // profile. Binary format has the profile summary in its header.
1945 void SampleProfileReader::computeSummary() {
1946   SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs);
1947   Summary = Builder.computeSummaryForProfiles(Profiles);
1948 }
1949