xref: /netbsd-src/external/apache2/llvm/dist/llvm/lib/ProfileData/SampleProfReader.cpp (revision 82d56013d7b633d116a93943de88e08335357a7c)
1 //===- SampleProfReader.cpp - Read LLVM sample profile data ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the class that reads LLVM sample profiles. It
10 // supports three file formats: text, binary and gcov.
11 //
12 // The textual representation is useful for debugging and testing purposes. The
13 // binary representation is more compact, resulting in smaller file sizes.
14 //
15 // The gcov encoding is the one generated by GCC's AutoFDO profile creation
16 // tool (https://github.com/google/autofdo)
17 //
18 // All three encodings can be used interchangeably as an input sample profile.
19 //
20 //===----------------------------------------------------------------------===//
21 
22 #include "llvm/ProfileData/SampleProfReader.h"
23 #include "llvm/ADT/DenseMap.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/ADT/StringRef.h"
26 #include "llvm/IR/ProfileSummary.h"
27 #include "llvm/ProfileData/ProfileCommon.h"
28 #include "llvm/ProfileData/SampleProf.h"
29 #include "llvm/Support/Compression.h"
30 #include "llvm/Support/ErrorOr.h"
31 #include "llvm/Support/LEB128.h"
32 #include "llvm/Support/LineIterator.h"
33 #include "llvm/Support/MD5.h"
34 #include "llvm/Support/MemoryBuffer.h"
35 #include "llvm/Support/raw_ostream.h"
36 #include <algorithm>
37 #include <cstddef>
38 #include <cstdint>
39 #include <limits>
40 #include <memory>
41 #include <set>
42 #include <system_error>
43 #include <vector>
44 
45 using namespace llvm;
46 using namespace sampleprof;
47 
48 /// Dump the function profile for \p FName.
49 ///
50 /// \param FName Name of the function to print.
51 /// \param OS Stream to emit the output to.
dumpFunctionProfile(StringRef FName,raw_ostream & OS)52 void SampleProfileReader::dumpFunctionProfile(StringRef FName,
53                                               raw_ostream &OS) {
54   OS << "Function: " << FName << ": " << Profiles[FName];
55 }
56 
57 /// Dump all the function profiles found on stream \p OS.
dump(raw_ostream & OS)58 void SampleProfileReader::dump(raw_ostream &OS) {
59   for (const auto &I : Profiles)
60     dumpFunctionProfile(I.getKey(), OS);
61 }
62 
63 /// Parse \p Input as function head.
64 ///
65 /// Parse one line of \p Input, and update function name in \p FName,
66 /// function's total sample count in \p NumSamples, function's entry
67 /// count in \p NumHeadSamples.
68 ///
69 /// \returns true if parsing is successful.
ParseHead(const StringRef & Input,StringRef & FName,uint64_t & NumSamples,uint64_t & NumHeadSamples)70 static bool ParseHead(const StringRef &Input, StringRef &FName,
71                       uint64_t &NumSamples, uint64_t &NumHeadSamples) {
72   if (Input[0] == ' ')
73     return false;
74   size_t n2 = Input.rfind(':');
75   size_t n1 = Input.rfind(':', n2 - 1);
76   FName = Input.substr(0, n1);
77   if (Input.substr(n1 + 1, n2 - n1 - 1).getAsInteger(10, NumSamples))
78     return false;
79   if (Input.substr(n2 + 1).getAsInteger(10, NumHeadSamples))
80     return false;
81   return true;
82 }
83 
84 /// Returns true if line offset \p L is legal (only has 16 bits).
isOffsetLegal(unsigned L)85 static bool isOffsetLegal(unsigned L) { return (L & 0xffff) == L; }
86 
87 /// Parse \p Input that contains metadata.
88 /// Possible metadata:
89 /// - CFG Checksum information:
90 ///     !CFGChecksum: 12345
91 /// - CFG Checksum information:
92 ///     !Attributes: 1
93 /// Stores the FunctionHash (a.k.a. CFG Checksum) into \p FunctionHash.
parseMetadata(const StringRef & Input,uint64_t & FunctionHash,uint32_t & Attributes)94 static bool parseMetadata(const StringRef &Input, uint64_t &FunctionHash,
95                           uint32_t &Attributes) {
96   if (Input.startswith("!CFGChecksum:")) {
97     StringRef CFGInfo = Input.substr(strlen("!CFGChecksum:")).trim();
98     return !CFGInfo.getAsInteger(10, FunctionHash);
99   }
100 
101   if (Input.startswith("!Attributes:")) {
102     StringRef Attrib = Input.substr(strlen("!Attributes:")).trim();
103     return !Attrib.getAsInteger(10, Attributes);
104   }
105 
106   return false;
107 }
108 
109 enum class LineType {
110   CallSiteProfile,
111   BodyProfile,
112   Metadata,
113 };
114 
115 /// Parse \p Input as line sample.
116 ///
117 /// \param Input input line.
118 /// \param LineTy Type of this line.
119 /// \param Depth the depth of the inline stack.
120 /// \param NumSamples total samples of the line/inlined callsite.
121 /// \param LineOffset line offset to the start of the function.
122 /// \param Discriminator discriminator of the line.
123 /// \param TargetCountMap map from indirect call target to count.
124 /// \param FunctionHash the function's CFG hash, used by pseudo probe.
125 ///
126 /// returns true if parsing is successful.
ParseLine(const StringRef & Input,LineType & LineTy,uint32_t & Depth,uint64_t & NumSamples,uint32_t & LineOffset,uint32_t & Discriminator,StringRef & CalleeName,DenseMap<StringRef,uint64_t> & TargetCountMap,uint64_t & FunctionHash,uint32_t & Attributes)127 static bool ParseLine(const StringRef &Input, LineType &LineTy, uint32_t &Depth,
128                       uint64_t &NumSamples, uint32_t &LineOffset,
129                       uint32_t &Discriminator, StringRef &CalleeName,
130                       DenseMap<StringRef, uint64_t> &TargetCountMap,
131                       uint64_t &FunctionHash, uint32_t &Attributes) {
132   for (Depth = 0; Input[Depth] == ' '; Depth++)
133     ;
134   if (Depth == 0)
135     return false;
136 
137   if (Depth == 1 && Input[Depth] == '!') {
138     LineTy = LineType::Metadata;
139     return parseMetadata(Input.substr(Depth), FunctionHash, Attributes);
140   }
141 
142   size_t n1 = Input.find(':');
143   StringRef Loc = Input.substr(Depth, n1 - Depth);
144   size_t n2 = Loc.find('.');
145   if (n2 == StringRef::npos) {
146     if (Loc.getAsInteger(10, LineOffset) || !isOffsetLegal(LineOffset))
147       return false;
148     Discriminator = 0;
149   } else {
150     if (Loc.substr(0, n2).getAsInteger(10, LineOffset))
151       return false;
152     if (Loc.substr(n2 + 1).getAsInteger(10, Discriminator))
153       return false;
154   }
155 
156   StringRef Rest = Input.substr(n1 + 2);
157   if (isDigit(Rest[0])) {
158     LineTy = LineType::BodyProfile;
159     size_t n3 = Rest.find(' ');
160     if (n3 == StringRef::npos) {
161       if (Rest.getAsInteger(10, NumSamples))
162         return false;
163     } else {
164       if (Rest.substr(0, n3).getAsInteger(10, NumSamples))
165         return false;
166     }
167     // Find call targets and their sample counts.
168     // Note: In some cases, there are symbols in the profile which are not
169     // mangled. To accommodate such cases, use colon + integer pairs as the
170     // anchor points.
171     // An example:
172     // _M_construct<char *>:1000 string_view<std::allocator<char> >:437
173     // ":1000" and ":437" are used as anchor points so the string above will
174     // be interpreted as
175     // target: _M_construct<char *>
176     // count: 1000
177     // target: string_view<std::allocator<char> >
178     // count: 437
179     while (n3 != StringRef::npos) {
180       n3 += Rest.substr(n3).find_first_not_of(' ');
181       Rest = Rest.substr(n3);
182       n3 = Rest.find_first_of(':');
183       if (n3 == StringRef::npos || n3 == 0)
184         return false;
185 
186       StringRef Target;
187       uint64_t count, n4;
188       while (true) {
189         // Get the segment after the current colon.
190         StringRef AfterColon = Rest.substr(n3 + 1);
191         // Get the target symbol before the current colon.
192         Target = Rest.substr(0, n3);
193         // Check if the word after the current colon is an integer.
194         n4 = AfterColon.find_first_of(' ');
195         n4 = (n4 != StringRef::npos) ? n3 + n4 + 1 : Rest.size();
196         StringRef WordAfterColon = Rest.substr(n3 + 1, n4 - n3 - 1);
197         if (!WordAfterColon.getAsInteger(10, count))
198           break;
199 
200         // Try to find the next colon.
201         uint64_t n5 = AfterColon.find_first_of(':');
202         if (n5 == StringRef::npos)
203           return false;
204         n3 += n5 + 1;
205       }
206 
207       // An anchor point is found. Save the {target, count} pair
208       TargetCountMap[Target] = count;
209       if (n4 == Rest.size())
210         break;
211       // Change n3 to the next blank space after colon + integer pair.
212       n3 = n4;
213     }
214   } else {
215     LineTy = LineType::CallSiteProfile;
216     size_t n3 = Rest.find_last_of(':');
217     CalleeName = Rest.substr(0, n3);
218     if (Rest.substr(n3 + 1).getAsInteger(10, NumSamples))
219       return false;
220   }
221   return true;
222 }
223 
224 /// Load samples from a text file.
225 ///
226 /// See the documentation at the top of the file for an explanation of
227 /// the expected format.
228 ///
229 /// \returns true if the file was loaded successfully, false otherwise.
readImpl()230 std::error_code SampleProfileReaderText::readImpl() {
231   line_iterator LineIt(*Buffer, /*SkipBlanks=*/true, '#');
232   sampleprof_error Result = sampleprof_error::success;
233 
234   InlineCallStack InlineStack;
235   uint32_t ProbeProfileCount = 0;
236 
237   // SeenMetadata tracks whether we have processed metadata for the current
238   // top-level function profile.
239   bool SeenMetadata = false;
240 
241   for (; !LineIt.is_at_eof(); ++LineIt) {
242     if ((*LineIt)[(*LineIt).find_first_not_of(' ')] == '#')
243       continue;
244     // Read the header of each function.
245     //
246     // Note that for function identifiers we are actually expecting
247     // mangled names, but we may not always get them. This happens when
248     // the compiler decides not to emit the function (e.g., it was inlined
249     // and removed). In this case, the binary will not have the linkage
250     // name for the function, so the profiler will emit the function's
251     // unmangled name, which may contain characters like ':' and '>' in its
252     // name (member functions, templates, etc).
253     //
254     // The only requirement we place on the identifier, then, is that it
255     // should not begin with a number.
256     if ((*LineIt)[0] != ' ') {
257       uint64_t NumSamples, NumHeadSamples;
258       StringRef FName;
259       if (!ParseHead(*LineIt, FName, NumSamples, NumHeadSamples)) {
260         reportError(LineIt.line_number(),
261                     "Expected 'mangled_name:NUM:NUM', found " + *LineIt);
262         return sampleprof_error::malformed;
263       }
264       SeenMetadata = false;
265       SampleContext FContext(FName);
266       if (FContext.hasContext())
267         ++CSProfileCount;
268       Profiles[FContext] = FunctionSamples();
269       FunctionSamples &FProfile = Profiles[FContext];
270       FProfile.setName(FContext.getNameWithoutContext());
271       FProfile.setContext(FContext);
272       MergeResult(Result, FProfile.addTotalSamples(NumSamples));
273       MergeResult(Result, FProfile.addHeadSamples(NumHeadSamples));
274       InlineStack.clear();
275       InlineStack.push_back(&FProfile);
276     } else {
277       uint64_t NumSamples;
278       StringRef FName;
279       DenseMap<StringRef, uint64_t> TargetCountMap;
280       uint32_t Depth, LineOffset, Discriminator;
281       LineType LineTy;
282       uint64_t FunctionHash = 0;
283       uint32_t Attributes = 0;
284       if (!ParseLine(*LineIt, LineTy, Depth, NumSamples, LineOffset,
285                      Discriminator, FName, TargetCountMap, FunctionHash,
286                      Attributes)) {
287         reportError(LineIt.line_number(),
288                     "Expected 'NUM[.NUM]: NUM[ mangled_name:NUM]*', found " +
289                         *LineIt);
290         return sampleprof_error::malformed;
291       }
292       if (SeenMetadata && LineTy != LineType::Metadata) {
293         // Metadata must be put at the end of a function profile.
294         reportError(LineIt.line_number(),
295                     "Found non-metadata after metadata: " + *LineIt);
296         return sampleprof_error::malformed;
297       }
298       while (InlineStack.size() > Depth) {
299         InlineStack.pop_back();
300       }
301       switch (LineTy) {
302       case LineType::CallSiteProfile: {
303         FunctionSamples &FSamples = InlineStack.back()->functionSamplesAt(
304             LineLocation(LineOffset, Discriminator))[std::string(FName)];
305         FSamples.setName(FName);
306         MergeResult(Result, FSamples.addTotalSamples(NumSamples));
307         InlineStack.push_back(&FSamples);
308         break;
309       }
310       case LineType::BodyProfile: {
311         while (InlineStack.size() > Depth) {
312           InlineStack.pop_back();
313         }
314         FunctionSamples &FProfile = *InlineStack.back();
315         for (const auto &name_count : TargetCountMap) {
316           MergeResult(Result, FProfile.addCalledTargetSamples(
317                                   LineOffset, Discriminator, name_count.first,
318                                   name_count.second));
319         }
320         MergeResult(Result, FProfile.addBodySamples(LineOffset, Discriminator,
321                                                     NumSamples));
322         break;
323       }
324       case LineType::Metadata: {
325         FunctionSamples &FProfile = *InlineStack.back();
326         if (FunctionHash) {
327           FProfile.setFunctionHash(FunctionHash);
328           ++ProbeProfileCount;
329         }
330         if (Attributes)
331           FProfile.getContext().setAllAttributes(Attributes);
332         SeenMetadata = true;
333         break;
334       }
335       }
336     }
337   }
338 
339   assert((CSProfileCount == 0 || CSProfileCount == Profiles.size()) &&
340          "Cannot have both context-sensitive and regular profile");
341   ProfileIsCS = (CSProfileCount > 0);
342   assert((ProbeProfileCount == 0 || ProbeProfileCount == Profiles.size()) &&
343          "Cannot have both probe-based profiles and regular profiles");
344   ProfileIsProbeBased = (ProbeProfileCount > 0);
345   FunctionSamples::ProfileIsProbeBased = ProfileIsProbeBased;
346   FunctionSamples::ProfileIsCS = ProfileIsCS;
347 
348   if (Result == sampleprof_error::success)
349     computeSummary();
350 
351   return Result;
352 }
353 
hasFormat(const MemoryBuffer & Buffer)354 bool SampleProfileReaderText::hasFormat(const MemoryBuffer &Buffer) {
355   bool result = false;
356 
357   // Check that the first non-comment line is a valid function header.
358   line_iterator LineIt(Buffer, /*SkipBlanks=*/true, '#');
359   if (!LineIt.is_at_eof()) {
360     if ((*LineIt)[0] != ' ') {
361       uint64_t NumSamples, NumHeadSamples;
362       StringRef FName;
363       result = ParseHead(*LineIt, FName, NumSamples, NumHeadSamples);
364     }
365   }
366 
367   return result;
368 }
369 
readNumber()370 template <typename T> ErrorOr<T> SampleProfileReaderBinary::readNumber() {
371   unsigned NumBytesRead = 0;
372   std::error_code EC;
373   uint64_t Val = decodeULEB128(Data, &NumBytesRead);
374 
375   if (Val > std::numeric_limits<T>::max())
376     EC = sampleprof_error::malformed;
377   else if (Data + NumBytesRead > End)
378     EC = sampleprof_error::truncated;
379   else
380     EC = sampleprof_error::success;
381 
382   if (EC) {
383     reportError(0, EC.message());
384     return EC;
385   }
386 
387   Data += NumBytesRead;
388   return static_cast<T>(Val);
389 }
390 
readString()391 ErrorOr<StringRef> SampleProfileReaderBinary::readString() {
392   std::error_code EC;
393   StringRef Str(reinterpret_cast<const char *>(Data));
394   if (Data + Str.size() + 1 > End) {
395     EC = sampleprof_error::truncated;
396     reportError(0, EC.message());
397     return EC;
398   }
399 
400   Data += Str.size() + 1;
401   return Str;
402 }
403 
404 template <typename T>
readUnencodedNumber()405 ErrorOr<T> SampleProfileReaderBinary::readUnencodedNumber() {
406   std::error_code EC;
407 
408   if (Data + sizeof(T) > End) {
409     EC = sampleprof_error::truncated;
410     reportError(0, EC.message());
411     return EC;
412   }
413 
414   using namespace support;
415   T Val = endian::readNext<T, little, unaligned>(Data);
416   return Val;
417 }
418 
419 template <typename T>
readStringIndex(T & Table)420 inline ErrorOr<uint32_t> SampleProfileReaderBinary::readStringIndex(T &Table) {
421   std::error_code EC;
422   auto Idx = readNumber<uint32_t>();
423   if (std::error_code EC = Idx.getError())
424     return EC;
425   if (*Idx >= Table.size())
426     return sampleprof_error::truncated_name_table;
427   return *Idx;
428 }
429 
readStringFromTable()430 ErrorOr<StringRef> SampleProfileReaderBinary::readStringFromTable() {
431   auto Idx = readStringIndex(NameTable);
432   if (std::error_code EC = Idx.getError())
433     return EC;
434 
435   return NameTable[*Idx];
436 }
437 
readStringFromTable()438 ErrorOr<StringRef> SampleProfileReaderExtBinaryBase::readStringFromTable() {
439   if (!FixedLengthMD5)
440     return SampleProfileReaderBinary::readStringFromTable();
441 
442   // read NameTable index.
443   auto Idx = readStringIndex(NameTable);
444   if (std::error_code EC = Idx.getError())
445     return EC;
446 
447   // Check whether the name to be accessed has been accessed before,
448   // if not, read it from memory directly.
449   StringRef &SR = NameTable[*Idx];
450   if (SR.empty()) {
451     const uint8_t *SavedData = Data;
452     Data = MD5NameMemStart + ((*Idx) * sizeof(uint64_t));
453     auto FID = readUnencodedNumber<uint64_t>();
454     if (std::error_code EC = FID.getError())
455       return EC;
456     // Save the string converted from uint64_t in MD5StringBuf. All the
457     // references to the name are all StringRefs refering to the string
458     // in MD5StringBuf.
459     MD5StringBuf->push_back(std::to_string(*FID));
460     SR = MD5StringBuf->back();
461     Data = SavedData;
462   }
463   return SR;
464 }
465 
readStringFromTable()466 ErrorOr<StringRef> SampleProfileReaderCompactBinary::readStringFromTable() {
467   auto Idx = readStringIndex(NameTable);
468   if (std::error_code EC = Idx.getError())
469     return EC;
470 
471   return StringRef(NameTable[*Idx]);
472 }
473 
474 std::error_code
readProfile(FunctionSamples & FProfile)475 SampleProfileReaderBinary::readProfile(FunctionSamples &FProfile) {
476   auto NumSamples = readNumber<uint64_t>();
477   if (std::error_code EC = NumSamples.getError())
478     return EC;
479   FProfile.addTotalSamples(*NumSamples);
480 
481   // Read the samples in the body.
482   auto NumRecords = readNumber<uint32_t>();
483   if (std::error_code EC = NumRecords.getError())
484     return EC;
485 
486   for (uint32_t I = 0; I < *NumRecords; ++I) {
487     auto LineOffset = readNumber<uint64_t>();
488     if (std::error_code EC = LineOffset.getError())
489       return EC;
490 
491     if (!isOffsetLegal(*LineOffset)) {
492       return std::error_code();
493     }
494 
495     auto Discriminator = readNumber<uint64_t>();
496     if (std::error_code EC = Discriminator.getError())
497       return EC;
498 
499     auto NumSamples = readNumber<uint64_t>();
500     if (std::error_code EC = NumSamples.getError())
501       return EC;
502 
503     auto NumCalls = readNumber<uint32_t>();
504     if (std::error_code EC = NumCalls.getError())
505       return EC;
506 
507     for (uint32_t J = 0; J < *NumCalls; ++J) {
508       auto CalledFunction(readStringFromTable());
509       if (std::error_code EC = CalledFunction.getError())
510         return EC;
511 
512       auto CalledFunctionSamples = readNumber<uint64_t>();
513       if (std::error_code EC = CalledFunctionSamples.getError())
514         return EC;
515 
516       FProfile.addCalledTargetSamples(*LineOffset, *Discriminator,
517                                       *CalledFunction, *CalledFunctionSamples);
518     }
519 
520     FProfile.addBodySamples(*LineOffset, *Discriminator, *NumSamples);
521   }
522 
523   // Read all the samples for inlined function calls.
524   auto NumCallsites = readNumber<uint32_t>();
525   if (std::error_code EC = NumCallsites.getError())
526     return EC;
527 
528   for (uint32_t J = 0; J < *NumCallsites; ++J) {
529     auto LineOffset = readNumber<uint64_t>();
530     if (std::error_code EC = LineOffset.getError())
531       return EC;
532 
533     auto Discriminator = readNumber<uint64_t>();
534     if (std::error_code EC = Discriminator.getError())
535       return EC;
536 
537     auto FName(readStringFromTable());
538     if (std::error_code EC = FName.getError())
539       return EC;
540 
541     FunctionSamples &CalleeProfile = FProfile.functionSamplesAt(
542         LineLocation(*LineOffset, *Discriminator))[std::string(*FName)];
543     CalleeProfile.setName(*FName);
544     if (std::error_code EC = readProfile(CalleeProfile))
545       return EC;
546   }
547 
548   return sampleprof_error::success;
549 }
550 
551 std::error_code
readFuncProfile(const uint8_t * Start)552 SampleProfileReaderBinary::readFuncProfile(const uint8_t *Start) {
553   Data = Start;
554   auto NumHeadSamples = readNumber<uint64_t>();
555   if (std::error_code EC = NumHeadSamples.getError())
556     return EC;
557 
558   auto FName(readStringFromTable());
559   if (std::error_code EC = FName.getError())
560     return EC;
561 
562   SampleContext FContext(*FName);
563   Profiles[FContext] = FunctionSamples();
564   FunctionSamples &FProfile = Profiles[FContext];
565   FProfile.setName(FContext.getNameWithoutContext());
566   FProfile.setContext(FContext);
567   FProfile.addHeadSamples(*NumHeadSamples);
568 
569   if (FContext.hasContext())
570     CSProfileCount++;
571 
572   if (std::error_code EC = readProfile(FProfile))
573     return EC;
574   return sampleprof_error::success;
575 }
576 
readImpl()577 std::error_code SampleProfileReaderBinary::readImpl() {
578   while (!at_eof()) {
579     if (std::error_code EC = readFuncProfile(Data))
580       return EC;
581   }
582 
583   return sampleprof_error::success;
584 }
585 
readOneSection(const uint8_t * Start,uint64_t Size,const SecHdrTableEntry & Entry)586 std::error_code SampleProfileReaderExtBinaryBase::readOneSection(
587     const uint8_t *Start, uint64_t Size, const SecHdrTableEntry &Entry) {
588   Data = Start;
589   End = Start + Size;
590   switch (Entry.Type) {
591   case SecProfSummary:
592     if (std::error_code EC = readSummary())
593       return EC;
594     if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagPartial))
595       Summary->setPartialProfile(true);
596     if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFullContext))
597       FunctionSamples::ProfileIsCS = ProfileIsCS = true;
598     break;
599   case SecNameTable: {
600     FixedLengthMD5 =
601         hasSecFlag(Entry, SecNameTableFlags::SecFlagFixedLengthMD5);
602     bool UseMD5 = hasSecFlag(Entry, SecNameTableFlags::SecFlagMD5Name);
603     assert((!FixedLengthMD5 || UseMD5) &&
604            "If FixedLengthMD5 is true, UseMD5 has to be true");
605     FunctionSamples::HasUniqSuffix =
606         hasSecFlag(Entry, SecNameTableFlags::SecFlagUniqSuffix);
607     if (std::error_code EC = readNameTableSec(UseMD5))
608       return EC;
609     break;
610   }
611   case SecLBRProfile:
612     if (std::error_code EC = readFuncProfiles())
613       return EC;
614     break;
615   case SecFuncOffsetTable:
616     if (std::error_code EC = readFuncOffsetTable())
617       return EC;
618     break;
619   case SecFuncMetadata: {
620     ProfileIsProbeBased =
621         hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagIsProbeBased);
622     FunctionSamples::ProfileIsProbeBased = ProfileIsProbeBased;
623     bool HasAttribute =
624         hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagHasAttribute);
625     if (std::error_code EC = readFuncMetadata(HasAttribute))
626       return EC;
627     break;
628   }
629   case SecProfileSymbolList:
630     if (std::error_code EC = readProfileSymbolList())
631       return EC;
632     break;
633   default:
634     if (std::error_code EC = readCustomSection(Entry))
635       return EC;
636     break;
637   }
638   return sampleprof_error::success;
639 }
640 
collectFuncsFromModule()641 bool SampleProfileReaderExtBinaryBase::collectFuncsFromModule() {
642   if (!M)
643     return false;
644   FuncsToUse.clear();
645   for (auto &F : *M)
646     FuncsToUse.insert(FunctionSamples::getCanonicalFnName(F));
647   return true;
648 }
649 
readFuncOffsetTable()650 std::error_code SampleProfileReaderExtBinaryBase::readFuncOffsetTable() {
651   // If there are more than one FuncOffsetTable, the profile read associated
652   // with previous FuncOffsetTable has to be done before next FuncOffsetTable
653   // is read.
654   FuncOffsetTable.clear();
655 
656   auto Size = readNumber<uint64_t>();
657   if (std::error_code EC = Size.getError())
658     return EC;
659 
660   FuncOffsetTable.reserve(*Size);
661   for (uint32_t I = 0; I < *Size; ++I) {
662     auto FName(readStringFromTable());
663     if (std::error_code EC = FName.getError())
664       return EC;
665 
666     auto Offset = readNumber<uint64_t>();
667     if (std::error_code EC = Offset.getError())
668       return EC;
669 
670     FuncOffsetTable[*FName] = *Offset;
671   }
672   return sampleprof_error::success;
673 }
674 
readFuncProfiles()675 std::error_code SampleProfileReaderExtBinaryBase::readFuncProfiles() {
676   // Collect functions used by current module if the Reader has been
677   // given a module.
678   // collectFuncsFromModule uses FunctionSamples::getCanonicalFnName
679   // which will query FunctionSamples::HasUniqSuffix, so it has to be
680   // called after FunctionSamples::HasUniqSuffix is set, i.e. after
681   // NameTable section is read.
682   bool LoadFuncsToBeUsed = collectFuncsFromModule();
683 
684   // When LoadFuncsToBeUsed is false, load all the function profiles.
685   const uint8_t *Start = Data;
686   if (!LoadFuncsToBeUsed) {
687     while (Data < End) {
688       if (std::error_code EC = readFuncProfile(Data))
689         return EC;
690     }
691     assert(Data == End && "More data is read than expected");
692   } else {
693     // Load function profiles on demand.
694     if (Remapper) {
695       for (auto Name : FuncsToUse) {
696         Remapper->insert(Name);
697       }
698     }
699 
700     if (useMD5()) {
701       for (auto Name : FuncsToUse) {
702         auto GUID = std::to_string(MD5Hash(Name));
703         auto iter = FuncOffsetTable.find(StringRef(GUID));
704         if (iter == FuncOffsetTable.end())
705           continue;
706         const uint8_t *FuncProfileAddr = Start + iter->second;
707         assert(FuncProfileAddr < End && "out of LBRProfile section");
708         if (std::error_code EC = readFuncProfile(FuncProfileAddr))
709           return EC;
710       }
711     } else if (FunctionSamples::ProfileIsCS) {
712       // Compute the ordered set of names, so we can
713       // get all context profiles under a subtree by
714       // iterating through the ordered names.
715       struct Comparer {
716         // Ignore the closing ']' when ordering context
717         bool operator()(const StringRef &L, const StringRef &R) const {
718           return L.substr(0, L.size() - 1) < R.substr(0, R.size() - 1);
719         }
720       };
721       std::set<StringRef, Comparer> OrderedNames;
722       for (auto Name : FuncOffsetTable) {
723         OrderedNames.insert(Name.first);
724       }
725 
726       // For each function in current module, load all
727       // context profiles for the function.
728       for (auto NameOffset : FuncOffsetTable) {
729         StringRef ContextName = NameOffset.first;
730         SampleContext FContext(ContextName);
731         auto FuncName = FContext.getNameWithoutContext();
732         if (!FuncsToUse.count(FuncName) &&
733             (!Remapper || !Remapper->exist(FuncName)))
734           continue;
735 
736         // For each context profile we need, try to load
737         // all context profile in the subtree. This can
738         // help profile guided importing for ThinLTO.
739         auto It = OrderedNames.find(ContextName);
740         while (It != OrderedNames.end() &&
741                It->startswith(ContextName.substr(0, ContextName.size() - 1))) {
742           const uint8_t *FuncProfileAddr = Start + FuncOffsetTable[*It];
743           assert(FuncProfileAddr < End && "out of LBRProfile section");
744           if (std::error_code EC = readFuncProfile(FuncProfileAddr))
745             return EC;
746           // Remove loaded context profile so we won't
747           // load it repeatedly.
748           It = OrderedNames.erase(It);
749         }
750       }
751     } else {
752       for (auto NameOffset : FuncOffsetTable) {
753         SampleContext FContext(NameOffset.first);
754         auto FuncName = FContext.getNameWithoutContext();
755         if (!FuncsToUse.count(FuncName) &&
756             (!Remapper || !Remapper->exist(FuncName)))
757           continue;
758         const uint8_t *FuncProfileAddr = Start + NameOffset.second;
759         assert(FuncProfileAddr < End && "out of LBRProfile section");
760         if (std::error_code EC = readFuncProfile(FuncProfileAddr))
761           return EC;
762       }
763     }
764     Data = End;
765   }
766   assert((CSProfileCount == 0 || CSProfileCount == Profiles.size()) &&
767          "Cannot have both context-sensitive and regular profile");
768   assert(ProfileIsCS == (CSProfileCount > 0) &&
769          "Section flag should be consistent with actual profile");
770   return sampleprof_error::success;
771 }
772 
readProfileSymbolList()773 std::error_code SampleProfileReaderExtBinaryBase::readProfileSymbolList() {
774   if (!ProfSymList)
775     ProfSymList = std::make_unique<ProfileSymbolList>();
776 
777   if (std::error_code EC = ProfSymList->read(Data, End - Data))
778     return EC;
779 
780   Data = End;
781   return sampleprof_error::success;
782 }
783 
decompressSection(const uint8_t * SecStart,const uint64_t SecSize,const uint8_t * & DecompressBuf,uint64_t & DecompressBufSize)784 std::error_code SampleProfileReaderExtBinaryBase::decompressSection(
785     const uint8_t *SecStart, const uint64_t SecSize,
786     const uint8_t *&DecompressBuf, uint64_t &DecompressBufSize) {
787   Data = SecStart;
788   End = SecStart + SecSize;
789   auto DecompressSize = readNumber<uint64_t>();
790   if (std::error_code EC = DecompressSize.getError())
791     return EC;
792   DecompressBufSize = *DecompressSize;
793 
794   auto CompressSize = readNumber<uint64_t>();
795   if (std::error_code EC = CompressSize.getError())
796     return EC;
797 
798   if (!llvm::zlib::isAvailable())
799     return sampleprof_error::zlib_unavailable;
800 
801   StringRef CompressedStrings(reinterpret_cast<const char *>(Data),
802                               *CompressSize);
803   char *Buffer = Allocator.Allocate<char>(DecompressBufSize);
804   size_t UCSize = DecompressBufSize;
805   llvm::Error E =
806       zlib::uncompress(CompressedStrings, Buffer, UCSize);
807   if (E)
808     return sampleprof_error::uncompress_failed;
809   DecompressBuf = reinterpret_cast<const uint8_t *>(Buffer);
810   return sampleprof_error::success;
811 }
812 
readImpl()813 std::error_code SampleProfileReaderExtBinaryBase::readImpl() {
814   const uint8_t *BufStart =
815       reinterpret_cast<const uint8_t *>(Buffer->getBufferStart());
816 
817   for (auto &Entry : SecHdrTable) {
818     // Skip empty section.
819     if (!Entry.Size)
820       continue;
821 
822     // Skip sections without context when SkipFlatProf is true.
823     if (SkipFlatProf && hasSecFlag(Entry, SecCommonFlags::SecFlagFlat))
824       continue;
825 
826     const uint8_t *SecStart = BufStart + Entry.Offset;
827     uint64_t SecSize = Entry.Size;
828 
829     // If the section is compressed, decompress it into a buffer
830     // DecompressBuf before reading the actual data. The pointee of
831     // 'Data' will be changed to buffer hold by DecompressBuf
832     // temporarily when reading the actual data.
833     bool isCompressed = hasSecFlag(Entry, SecCommonFlags::SecFlagCompress);
834     if (isCompressed) {
835       const uint8_t *DecompressBuf;
836       uint64_t DecompressBufSize;
837       if (std::error_code EC = decompressSection(
838               SecStart, SecSize, DecompressBuf, DecompressBufSize))
839         return EC;
840       SecStart = DecompressBuf;
841       SecSize = DecompressBufSize;
842     }
843 
844     if (std::error_code EC = readOneSection(SecStart, SecSize, Entry))
845       return EC;
846     if (Data != SecStart + SecSize)
847       return sampleprof_error::malformed;
848 
849     // Change the pointee of 'Data' from DecompressBuf to original Buffer.
850     if (isCompressed) {
851       Data = BufStart + Entry.Offset;
852       End = BufStart + Buffer->getBufferSize();
853     }
854   }
855 
856   return sampleprof_error::success;
857 }
858 
readImpl()859 std::error_code SampleProfileReaderCompactBinary::readImpl() {
860   // Collect functions used by current module if the Reader has been
861   // given a module.
862   bool LoadFuncsToBeUsed = collectFuncsFromModule();
863 
864   std::vector<uint64_t> OffsetsToUse;
865   if (!LoadFuncsToBeUsed) {
866     // load all the function profiles.
867     for (auto FuncEntry : FuncOffsetTable) {
868       OffsetsToUse.push_back(FuncEntry.second);
869     }
870   } else {
871     // load function profiles on demand.
872     for (auto Name : FuncsToUse) {
873       auto GUID = std::to_string(MD5Hash(Name));
874       auto iter = FuncOffsetTable.find(StringRef(GUID));
875       if (iter == FuncOffsetTable.end())
876         continue;
877       OffsetsToUse.push_back(iter->second);
878     }
879   }
880 
881   for (auto Offset : OffsetsToUse) {
882     const uint8_t *SavedData = Data;
883     if (std::error_code EC = readFuncProfile(
884             reinterpret_cast<const uint8_t *>(Buffer->getBufferStart()) +
885             Offset))
886       return EC;
887     Data = SavedData;
888   }
889   return sampleprof_error::success;
890 }
891 
verifySPMagic(uint64_t Magic)892 std::error_code SampleProfileReaderRawBinary::verifySPMagic(uint64_t Magic) {
893   if (Magic == SPMagic())
894     return sampleprof_error::success;
895   return sampleprof_error::bad_magic;
896 }
897 
verifySPMagic(uint64_t Magic)898 std::error_code SampleProfileReaderExtBinary::verifySPMagic(uint64_t Magic) {
899   if (Magic == SPMagic(SPF_Ext_Binary))
900     return sampleprof_error::success;
901   return sampleprof_error::bad_magic;
902 }
903 
904 std::error_code
verifySPMagic(uint64_t Magic)905 SampleProfileReaderCompactBinary::verifySPMagic(uint64_t Magic) {
906   if (Magic == SPMagic(SPF_Compact_Binary))
907     return sampleprof_error::success;
908   return sampleprof_error::bad_magic;
909 }
910 
readNameTable()911 std::error_code SampleProfileReaderBinary::readNameTable() {
912   auto Size = readNumber<uint32_t>();
913   if (std::error_code EC = Size.getError())
914     return EC;
915   NameTable.reserve(*Size + NameTable.size());
916   for (uint32_t I = 0; I < *Size; ++I) {
917     auto Name(readString());
918     if (std::error_code EC = Name.getError())
919       return EC;
920     NameTable.push_back(*Name);
921   }
922 
923   return sampleprof_error::success;
924 }
925 
readMD5NameTable()926 std::error_code SampleProfileReaderExtBinaryBase::readMD5NameTable() {
927   auto Size = readNumber<uint64_t>();
928   if (std::error_code EC = Size.getError())
929     return EC;
930   MD5StringBuf = std::make_unique<std::vector<std::string>>();
931   MD5StringBuf->reserve(*Size);
932   if (FixedLengthMD5) {
933     // Preallocate and initialize NameTable so we can check whether a name
934     // index has been read before by checking whether the element in the
935     // NameTable is empty, meanwhile readStringIndex can do the boundary
936     // check using the size of NameTable.
937     NameTable.resize(*Size + NameTable.size());
938 
939     MD5NameMemStart = Data;
940     Data = Data + (*Size) * sizeof(uint64_t);
941     return sampleprof_error::success;
942   }
943   NameTable.reserve(*Size);
944   for (uint32_t I = 0; I < *Size; ++I) {
945     auto FID = readNumber<uint64_t>();
946     if (std::error_code EC = FID.getError())
947       return EC;
948     MD5StringBuf->push_back(std::to_string(*FID));
949     // NameTable is a vector of StringRef. Here it is pushing back a
950     // StringRef initialized with the last string in MD5stringBuf.
951     NameTable.push_back(MD5StringBuf->back());
952   }
953   return sampleprof_error::success;
954 }
955 
readNameTableSec(bool IsMD5)956 std::error_code SampleProfileReaderExtBinaryBase::readNameTableSec(bool IsMD5) {
957   if (IsMD5)
958     return readMD5NameTable();
959   return SampleProfileReaderBinary::readNameTable();
960 }
961 
962 std::error_code
readFuncMetadata(bool ProfileHasAttribute)963 SampleProfileReaderExtBinaryBase::readFuncMetadata(bool ProfileHasAttribute) {
964   while (Data < End) {
965     auto FName(readStringFromTable());
966     if (std::error_code EC = FName.getError())
967       return EC;
968 
969     SampleContext FContext(*FName);
970     bool ProfileInMap = Profiles.count(FContext);
971 
972     if (ProfileIsProbeBased) {
973       auto Checksum = readNumber<uint64_t>();
974       if (std::error_code EC = Checksum.getError())
975         return EC;
976       if (ProfileInMap)
977         Profiles[FContext].setFunctionHash(*Checksum);
978     }
979 
980     if (ProfileHasAttribute) {
981       auto Attributes = readNumber<uint32_t>();
982       if (std::error_code EC = Attributes.getError())
983         return EC;
984       if (ProfileInMap)
985         Profiles[FContext].getContext().setAllAttributes(*Attributes);
986     }
987   }
988 
989   assert(Data == End && "More data is read than expected");
990   return sampleprof_error::success;
991 }
992 
readNameTable()993 std::error_code SampleProfileReaderCompactBinary::readNameTable() {
994   auto Size = readNumber<uint64_t>();
995   if (std::error_code EC = Size.getError())
996     return EC;
997   NameTable.reserve(*Size);
998   for (uint32_t I = 0; I < *Size; ++I) {
999     auto FID = readNumber<uint64_t>();
1000     if (std::error_code EC = FID.getError())
1001       return EC;
1002     NameTable.push_back(std::to_string(*FID));
1003   }
1004   return sampleprof_error::success;
1005 }
1006 
1007 std::error_code
readSecHdrTableEntry(uint32_t Idx)1008 SampleProfileReaderExtBinaryBase::readSecHdrTableEntry(uint32_t Idx) {
1009   SecHdrTableEntry Entry;
1010   auto Type = readUnencodedNumber<uint64_t>();
1011   if (std::error_code EC = Type.getError())
1012     return EC;
1013   Entry.Type = static_cast<SecType>(*Type);
1014 
1015   auto Flags = readUnencodedNumber<uint64_t>();
1016   if (std::error_code EC = Flags.getError())
1017     return EC;
1018   Entry.Flags = *Flags;
1019 
1020   auto Offset = readUnencodedNumber<uint64_t>();
1021   if (std::error_code EC = Offset.getError())
1022     return EC;
1023   Entry.Offset = *Offset;
1024 
1025   auto Size = readUnencodedNumber<uint64_t>();
1026   if (std::error_code EC = Size.getError())
1027     return EC;
1028   Entry.Size = *Size;
1029 
1030   Entry.LayoutIndex = Idx;
1031   SecHdrTable.push_back(std::move(Entry));
1032   return sampleprof_error::success;
1033 }
1034 
readSecHdrTable()1035 std::error_code SampleProfileReaderExtBinaryBase::readSecHdrTable() {
1036   auto EntryNum = readUnencodedNumber<uint64_t>();
1037   if (std::error_code EC = EntryNum.getError())
1038     return EC;
1039 
1040   for (uint32_t i = 0; i < (*EntryNum); i++)
1041     if (std::error_code EC = readSecHdrTableEntry(i))
1042       return EC;
1043 
1044   return sampleprof_error::success;
1045 }
1046 
readHeader()1047 std::error_code SampleProfileReaderExtBinaryBase::readHeader() {
1048   const uint8_t *BufStart =
1049       reinterpret_cast<const uint8_t *>(Buffer->getBufferStart());
1050   Data = BufStart;
1051   End = BufStart + Buffer->getBufferSize();
1052 
1053   if (std::error_code EC = readMagicIdent())
1054     return EC;
1055 
1056   if (std::error_code EC = readSecHdrTable())
1057     return EC;
1058 
1059   return sampleprof_error::success;
1060 }
1061 
getSectionSize(SecType Type)1062 uint64_t SampleProfileReaderExtBinaryBase::getSectionSize(SecType Type) {
1063   uint64_t Size = 0;
1064   for (auto &Entry : SecHdrTable) {
1065     if (Entry.Type == Type)
1066       Size += Entry.Size;
1067   }
1068   return Size;
1069 }
1070 
getFileSize()1071 uint64_t SampleProfileReaderExtBinaryBase::getFileSize() {
1072   // Sections in SecHdrTable is not necessarily in the same order as
1073   // sections in the profile because section like FuncOffsetTable needs
1074   // to be written after section LBRProfile but needs to be read before
1075   // section LBRProfile, so we cannot simply use the last entry in
1076   // SecHdrTable to calculate the file size.
1077   uint64_t FileSize = 0;
1078   for (auto &Entry : SecHdrTable) {
1079     FileSize = std::max(Entry.Offset + Entry.Size, FileSize);
1080   }
1081   return FileSize;
1082 }
1083 
getSecFlagsStr(const SecHdrTableEntry & Entry)1084 static std::string getSecFlagsStr(const SecHdrTableEntry &Entry) {
1085   std::string Flags;
1086   if (hasSecFlag(Entry, SecCommonFlags::SecFlagCompress))
1087     Flags.append("{compressed,");
1088   else
1089     Flags.append("{");
1090 
1091   if (hasSecFlag(Entry, SecCommonFlags::SecFlagFlat))
1092     Flags.append("flat,");
1093 
1094   switch (Entry.Type) {
1095   case SecNameTable:
1096     if (hasSecFlag(Entry, SecNameTableFlags::SecFlagFixedLengthMD5))
1097       Flags.append("fixlenmd5,");
1098     else if (hasSecFlag(Entry, SecNameTableFlags::SecFlagMD5Name))
1099       Flags.append("md5,");
1100     if (hasSecFlag(Entry, SecNameTableFlags::SecFlagUniqSuffix))
1101       Flags.append("uniq,");
1102     break;
1103   case SecProfSummary:
1104     if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagPartial))
1105       Flags.append("partial,");
1106     if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFullContext))
1107       Flags.append("context,");
1108     break;
1109   default:
1110     break;
1111   }
1112   char &last = Flags.back();
1113   if (last == ',')
1114     last = '}';
1115   else
1116     Flags.append("}");
1117   return Flags;
1118 }
1119 
dumpSectionInfo(raw_ostream & OS)1120 bool SampleProfileReaderExtBinaryBase::dumpSectionInfo(raw_ostream &OS) {
1121   uint64_t TotalSecsSize = 0;
1122   for (auto &Entry : SecHdrTable) {
1123     OS << getSecName(Entry.Type) << " - Offset: " << Entry.Offset
1124        << ", Size: " << Entry.Size << ", Flags: " << getSecFlagsStr(Entry)
1125        << "\n";
1126     ;
1127     TotalSecsSize += Entry.Size;
1128   }
1129   uint64_t HeaderSize = SecHdrTable.front().Offset;
1130   assert(HeaderSize + TotalSecsSize == getFileSize() &&
1131          "Size of 'header + sections' doesn't match the total size of profile");
1132 
1133   OS << "Header Size: " << HeaderSize << "\n";
1134   OS << "Total Sections Size: " << TotalSecsSize << "\n";
1135   OS << "File Size: " << getFileSize() << "\n";
1136   return true;
1137 }
1138 
readMagicIdent()1139 std::error_code SampleProfileReaderBinary::readMagicIdent() {
1140   // Read and check the magic identifier.
1141   auto Magic = readNumber<uint64_t>();
1142   if (std::error_code EC = Magic.getError())
1143     return EC;
1144   else if (std::error_code EC = verifySPMagic(*Magic))
1145     return EC;
1146 
1147   // Read the version number.
1148   auto Version = readNumber<uint64_t>();
1149   if (std::error_code EC = Version.getError())
1150     return EC;
1151   else if (*Version != SPVersion())
1152     return sampleprof_error::unsupported_version;
1153 
1154   return sampleprof_error::success;
1155 }
1156 
readHeader()1157 std::error_code SampleProfileReaderBinary::readHeader() {
1158   Data = reinterpret_cast<const uint8_t *>(Buffer->getBufferStart());
1159   End = Data + Buffer->getBufferSize();
1160 
1161   if (std::error_code EC = readMagicIdent())
1162     return EC;
1163 
1164   if (std::error_code EC = readSummary())
1165     return EC;
1166 
1167   if (std::error_code EC = readNameTable())
1168     return EC;
1169   return sampleprof_error::success;
1170 }
1171 
readHeader()1172 std::error_code SampleProfileReaderCompactBinary::readHeader() {
1173   SampleProfileReaderBinary::readHeader();
1174   if (std::error_code EC = readFuncOffsetTable())
1175     return EC;
1176   return sampleprof_error::success;
1177 }
1178 
readFuncOffsetTable()1179 std::error_code SampleProfileReaderCompactBinary::readFuncOffsetTable() {
1180   auto TableOffset = readUnencodedNumber<uint64_t>();
1181   if (std::error_code EC = TableOffset.getError())
1182     return EC;
1183 
1184   const uint8_t *SavedData = Data;
1185   const uint8_t *TableStart =
1186       reinterpret_cast<const uint8_t *>(Buffer->getBufferStart()) +
1187       *TableOffset;
1188   Data = TableStart;
1189 
1190   auto Size = readNumber<uint64_t>();
1191   if (std::error_code EC = Size.getError())
1192     return EC;
1193 
1194   FuncOffsetTable.reserve(*Size);
1195   for (uint32_t I = 0; I < *Size; ++I) {
1196     auto FName(readStringFromTable());
1197     if (std::error_code EC = FName.getError())
1198       return EC;
1199 
1200     auto Offset = readNumber<uint64_t>();
1201     if (std::error_code EC = Offset.getError())
1202       return EC;
1203 
1204     FuncOffsetTable[*FName] = *Offset;
1205   }
1206   End = TableStart;
1207   Data = SavedData;
1208   return sampleprof_error::success;
1209 }
1210 
collectFuncsFromModule()1211 bool SampleProfileReaderCompactBinary::collectFuncsFromModule() {
1212   if (!M)
1213     return false;
1214   FuncsToUse.clear();
1215   for (auto &F : *M)
1216     FuncsToUse.insert(FunctionSamples::getCanonicalFnName(F));
1217   return true;
1218 }
1219 
readSummaryEntry(std::vector<ProfileSummaryEntry> & Entries)1220 std::error_code SampleProfileReaderBinary::readSummaryEntry(
1221     std::vector<ProfileSummaryEntry> &Entries) {
1222   auto Cutoff = readNumber<uint64_t>();
1223   if (std::error_code EC = Cutoff.getError())
1224     return EC;
1225 
1226   auto MinBlockCount = readNumber<uint64_t>();
1227   if (std::error_code EC = MinBlockCount.getError())
1228     return EC;
1229 
1230   auto NumBlocks = readNumber<uint64_t>();
1231   if (std::error_code EC = NumBlocks.getError())
1232     return EC;
1233 
1234   Entries.emplace_back(*Cutoff, *MinBlockCount, *NumBlocks);
1235   return sampleprof_error::success;
1236 }
1237 
readSummary()1238 std::error_code SampleProfileReaderBinary::readSummary() {
1239   auto TotalCount = readNumber<uint64_t>();
1240   if (std::error_code EC = TotalCount.getError())
1241     return EC;
1242 
1243   auto MaxBlockCount = readNumber<uint64_t>();
1244   if (std::error_code EC = MaxBlockCount.getError())
1245     return EC;
1246 
1247   auto MaxFunctionCount = readNumber<uint64_t>();
1248   if (std::error_code EC = MaxFunctionCount.getError())
1249     return EC;
1250 
1251   auto NumBlocks = readNumber<uint64_t>();
1252   if (std::error_code EC = NumBlocks.getError())
1253     return EC;
1254 
1255   auto NumFunctions = readNumber<uint64_t>();
1256   if (std::error_code EC = NumFunctions.getError())
1257     return EC;
1258 
1259   auto NumSummaryEntries = readNumber<uint64_t>();
1260   if (std::error_code EC = NumSummaryEntries.getError())
1261     return EC;
1262 
1263   std::vector<ProfileSummaryEntry> Entries;
1264   for (unsigned i = 0; i < *NumSummaryEntries; i++) {
1265     std::error_code EC = readSummaryEntry(Entries);
1266     if (EC != sampleprof_error::success)
1267       return EC;
1268   }
1269   Summary = std::make_unique<ProfileSummary>(
1270       ProfileSummary::PSK_Sample, Entries, *TotalCount, *MaxBlockCount, 0,
1271       *MaxFunctionCount, *NumBlocks, *NumFunctions);
1272 
1273   return sampleprof_error::success;
1274 }
1275 
hasFormat(const MemoryBuffer & Buffer)1276 bool SampleProfileReaderRawBinary::hasFormat(const MemoryBuffer &Buffer) {
1277   const uint8_t *Data =
1278       reinterpret_cast<const uint8_t *>(Buffer.getBufferStart());
1279   uint64_t Magic = decodeULEB128(Data);
1280   return Magic == SPMagic();
1281 }
1282 
hasFormat(const MemoryBuffer & Buffer)1283 bool SampleProfileReaderExtBinary::hasFormat(const MemoryBuffer &Buffer) {
1284   const uint8_t *Data =
1285       reinterpret_cast<const uint8_t *>(Buffer.getBufferStart());
1286   uint64_t Magic = decodeULEB128(Data);
1287   return Magic == SPMagic(SPF_Ext_Binary);
1288 }
1289 
hasFormat(const MemoryBuffer & Buffer)1290 bool SampleProfileReaderCompactBinary::hasFormat(const MemoryBuffer &Buffer) {
1291   const uint8_t *Data =
1292       reinterpret_cast<const uint8_t *>(Buffer.getBufferStart());
1293   uint64_t Magic = decodeULEB128(Data);
1294   return Magic == SPMagic(SPF_Compact_Binary);
1295 }
1296 
skipNextWord()1297 std::error_code SampleProfileReaderGCC::skipNextWord() {
1298   uint32_t dummy;
1299   if (!GcovBuffer.readInt(dummy))
1300     return sampleprof_error::truncated;
1301   return sampleprof_error::success;
1302 }
1303 
readNumber()1304 template <typename T> ErrorOr<T> SampleProfileReaderGCC::readNumber() {
1305   if (sizeof(T) <= sizeof(uint32_t)) {
1306     uint32_t Val;
1307     if (GcovBuffer.readInt(Val) && Val <= std::numeric_limits<T>::max())
1308       return static_cast<T>(Val);
1309   } else if (sizeof(T) <= sizeof(uint64_t)) {
1310     uint64_t Val;
1311     if (GcovBuffer.readInt64(Val) && Val <= std::numeric_limits<T>::max())
1312       return static_cast<T>(Val);
1313   }
1314 
1315   std::error_code EC = sampleprof_error::malformed;
1316   reportError(0, EC.message());
1317   return EC;
1318 }
1319 
readString()1320 ErrorOr<StringRef> SampleProfileReaderGCC::readString() {
1321   StringRef Str;
1322   if (!GcovBuffer.readString(Str))
1323     return sampleprof_error::truncated;
1324   return Str;
1325 }
1326 
readHeader()1327 std::error_code SampleProfileReaderGCC::readHeader() {
1328   // Read the magic identifier.
1329   if (!GcovBuffer.readGCDAFormat())
1330     return sampleprof_error::unrecognized_format;
1331 
1332   // Read the version number. Note - the GCC reader does not validate this
1333   // version, but the profile creator generates v704.
1334   GCOV::GCOVVersion version;
1335   if (!GcovBuffer.readGCOVVersion(version))
1336     return sampleprof_error::unrecognized_format;
1337 
1338   if (version != GCOV::V407)
1339     return sampleprof_error::unsupported_version;
1340 
1341   // Skip the empty integer.
1342   if (std::error_code EC = skipNextWord())
1343     return EC;
1344 
1345   return sampleprof_error::success;
1346 }
1347 
readSectionTag(uint32_t Expected)1348 std::error_code SampleProfileReaderGCC::readSectionTag(uint32_t Expected) {
1349   uint32_t Tag;
1350   if (!GcovBuffer.readInt(Tag))
1351     return sampleprof_error::truncated;
1352 
1353   if (Tag != Expected)
1354     return sampleprof_error::malformed;
1355 
1356   if (std::error_code EC = skipNextWord())
1357     return EC;
1358 
1359   return sampleprof_error::success;
1360 }
1361 
readNameTable()1362 std::error_code SampleProfileReaderGCC::readNameTable() {
1363   if (std::error_code EC = readSectionTag(GCOVTagAFDOFileNames))
1364     return EC;
1365 
1366   uint32_t Size;
1367   if (!GcovBuffer.readInt(Size))
1368     return sampleprof_error::truncated;
1369 
1370   for (uint32_t I = 0; I < Size; ++I) {
1371     StringRef Str;
1372     if (!GcovBuffer.readString(Str))
1373       return sampleprof_error::truncated;
1374     Names.push_back(std::string(Str));
1375   }
1376 
1377   return sampleprof_error::success;
1378 }
1379 
readFunctionProfiles()1380 std::error_code SampleProfileReaderGCC::readFunctionProfiles() {
1381   if (std::error_code EC = readSectionTag(GCOVTagAFDOFunction))
1382     return EC;
1383 
1384   uint32_t NumFunctions;
1385   if (!GcovBuffer.readInt(NumFunctions))
1386     return sampleprof_error::truncated;
1387 
1388   InlineCallStack Stack;
1389   for (uint32_t I = 0; I < NumFunctions; ++I)
1390     if (std::error_code EC = readOneFunctionProfile(Stack, true, 0))
1391       return EC;
1392 
1393   computeSummary();
1394   return sampleprof_error::success;
1395 }
1396 
readOneFunctionProfile(const InlineCallStack & InlineStack,bool Update,uint32_t Offset)1397 std::error_code SampleProfileReaderGCC::readOneFunctionProfile(
1398     const InlineCallStack &InlineStack, bool Update, uint32_t Offset) {
1399   uint64_t HeadCount = 0;
1400   if (InlineStack.size() == 0)
1401     if (!GcovBuffer.readInt64(HeadCount))
1402       return sampleprof_error::truncated;
1403 
1404   uint32_t NameIdx;
1405   if (!GcovBuffer.readInt(NameIdx))
1406     return sampleprof_error::truncated;
1407 
1408   StringRef Name(Names[NameIdx]);
1409 
1410   uint32_t NumPosCounts;
1411   if (!GcovBuffer.readInt(NumPosCounts))
1412     return sampleprof_error::truncated;
1413 
1414   uint32_t NumCallsites;
1415   if (!GcovBuffer.readInt(NumCallsites))
1416     return sampleprof_error::truncated;
1417 
1418   FunctionSamples *FProfile = nullptr;
1419   if (InlineStack.size() == 0) {
1420     // If this is a top function that we have already processed, do not
1421     // update its profile again.  This happens in the presence of
1422     // function aliases.  Since these aliases share the same function
1423     // body, there will be identical replicated profiles for the
1424     // original function.  In this case, we simply not bother updating
1425     // the profile of the original function.
1426     FProfile = &Profiles[Name];
1427     FProfile->addHeadSamples(HeadCount);
1428     if (FProfile->getTotalSamples() > 0)
1429       Update = false;
1430   } else {
1431     // Otherwise, we are reading an inlined instance. The top of the
1432     // inline stack contains the profile of the caller. Insert this
1433     // callee in the caller's CallsiteMap.
1434     FunctionSamples *CallerProfile = InlineStack.front();
1435     uint32_t LineOffset = Offset >> 16;
1436     uint32_t Discriminator = Offset & 0xffff;
1437     FProfile = &CallerProfile->functionSamplesAt(
1438         LineLocation(LineOffset, Discriminator))[std::string(Name)];
1439   }
1440   FProfile->setName(Name);
1441 
1442   for (uint32_t I = 0; I < NumPosCounts; ++I) {
1443     uint32_t Offset;
1444     if (!GcovBuffer.readInt(Offset))
1445       return sampleprof_error::truncated;
1446 
1447     uint32_t NumTargets;
1448     if (!GcovBuffer.readInt(NumTargets))
1449       return sampleprof_error::truncated;
1450 
1451     uint64_t Count;
1452     if (!GcovBuffer.readInt64(Count))
1453       return sampleprof_error::truncated;
1454 
1455     // The line location is encoded in the offset as:
1456     //   high 16 bits: line offset to the start of the function.
1457     //   low 16 bits: discriminator.
1458     uint32_t LineOffset = Offset >> 16;
1459     uint32_t Discriminator = Offset & 0xffff;
1460 
1461     InlineCallStack NewStack;
1462     NewStack.push_back(FProfile);
1463     llvm::append_range(NewStack, InlineStack);
1464     if (Update) {
1465       // Walk up the inline stack, adding the samples on this line to
1466       // the total sample count of the callers in the chain.
1467       for (auto CallerProfile : NewStack)
1468         CallerProfile->addTotalSamples(Count);
1469 
1470       // Update the body samples for the current profile.
1471       FProfile->addBodySamples(LineOffset, Discriminator, Count);
1472     }
1473 
1474     // Process the list of functions called at an indirect call site.
1475     // These are all the targets that a function pointer (or virtual
1476     // function) resolved at runtime.
1477     for (uint32_t J = 0; J < NumTargets; J++) {
1478       uint32_t HistVal;
1479       if (!GcovBuffer.readInt(HistVal))
1480         return sampleprof_error::truncated;
1481 
1482       if (HistVal != HIST_TYPE_INDIR_CALL_TOPN)
1483         return sampleprof_error::malformed;
1484 
1485       uint64_t TargetIdx;
1486       if (!GcovBuffer.readInt64(TargetIdx))
1487         return sampleprof_error::truncated;
1488       StringRef TargetName(Names[TargetIdx]);
1489 
1490       uint64_t TargetCount;
1491       if (!GcovBuffer.readInt64(TargetCount))
1492         return sampleprof_error::truncated;
1493 
1494       if (Update)
1495         FProfile->addCalledTargetSamples(LineOffset, Discriminator,
1496                                          TargetName, TargetCount);
1497     }
1498   }
1499 
1500   // Process all the inlined callers into the current function. These
1501   // are all the callsites that were inlined into this function.
1502   for (uint32_t I = 0; I < NumCallsites; I++) {
1503     // The offset is encoded as:
1504     //   high 16 bits: line offset to the start of the function.
1505     //   low 16 bits: discriminator.
1506     uint32_t Offset;
1507     if (!GcovBuffer.readInt(Offset))
1508       return sampleprof_error::truncated;
1509     InlineCallStack NewStack;
1510     NewStack.push_back(FProfile);
1511     llvm::append_range(NewStack, InlineStack);
1512     if (std::error_code EC = readOneFunctionProfile(NewStack, Update, Offset))
1513       return EC;
1514   }
1515 
1516   return sampleprof_error::success;
1517 }
1518 
1519 /// Read a GCC AutoFDO profile.
1520 ///
1521 /// This format is generated by the Linux Perf conversion tool at
1522 /// https://github.com/google/autofdo.
readImpl()1523 std::error_code SampleProfileReaderGCC::readImpl() {
1524   // Read the string table.
1525   if (std::error_code EC = readNameTable())
1526     return EC;
1527 
1528   // Read the source profile.
1529   if (std::error_code EC = readFunctionProfiles())
1530     return EC;
1531 
1532   return sampleprof_error::success;
1533 }
1534 
hasFormat(const MemoryBuffer & Buffer)1535 bool SampleProfileReaderGCC::hasFormat(const MemoryBuffer &Buffer) {
1536   StringRef Magic(reinterpret_cast<const char *>(Buffer.getBufferStart()));
1537   return Magic == "adcg*704";
1538 }
1539 
applyRemapping(LLVMContext & Ctx)1540 void SampleProfileReaderItaniumRemapper::applyRemapping(LLVMContext &Ctx) {
1541   // If the reader uses MD5 to represent string, we can't remap it because
1542   // we don't know what the original function names were.
1543   if (Reader.useMD5()) {
1544     Ctx.diagnose(DiagnosticInfoSampleProfile(
1545         Reader.getBuffer()->getBufferIdentifier(),
1546         "Profile data remapping cannot be applied to profile data "
1547         "in compact format (original mangled names are not available).",
1548         DS_Warning));
1549     return;
1550   }
1551 
1552   // CSSPGO-TODO: Remapper is not yet supported.
1553   // We will need to remap the entire context string.
1554   assert(Remappings && "should be initialized while creating remapper");
1555   for (auto &Sample : Reader.getProfiles()) {
1556     DenseSet<StringRef> NamesInSample;
1557     Sample.second.findAllNames(NamesInSample);
1558     for (auto &Name : NamesInSample)
1559       if (auto Key = Remappings->insert(Name))
1560         NameMap.insert({Key, Name});
1561   }
1562 
1563   RemappingApplied = true;
1564 }
1565 
1566 Optional<StringRef>
lookUpNameInProfile(StringRef Fname)1567 SampleProfileReaderItaniumRemapper::lookUpNameInProfile(StringRef Fname) {
1568   if (auto Key = Remappings->lookup(Fname))
1569     return NameMap.lookup(Key);
1570   return None;
1571 }
1572 
1573 /// Prepare a memory buffer for the contents of \p Filename.
1574 ///
1575 /// \returns an error code indicating the status of the buffer.
1576 static ErrorOr<std::unique_ptr<MemoryBuffer>>
setupMemoryBuffer(const Twine & Filename)1577 setupMemoryBuffer(const Twine &Filename) {
1578   auto BufferOrErr = MemoryBuffer::getFileOrSTDIN(Filename, /*IsText=*/true);
1579   if (std::error_code EC = BufferOrErr.getError())
1580     return EC;
1581   auto Buffer = std::move(BufferOrErr.get());
1582 
1583   // Sanity check the file.
1584   if (uint64_t(Buffer->getBufferSize()) > std::numeric_limits<uint32_t>::max())
1585     return sampleprof_error::too_large;
1586 
1587   return std::move(Buffer);
1588 }
1589 
1590 /// Create a sample profile reader based on the format of the input file.
1591 ///
1592 /// \param Filename The file to open.
1593 ///
1594 /// \param C The LLVM context to use to emit diagnostics.
1595 ///
1596 /// \param RemapFilename The file used for profile remapping.
1597 ///
1598 /// \returns an error code indicating the status of the created reader.
1599 ErrorOr<std::unique_ptr<SampleProfileReader>>
create(const std::string Filename,LLVMContext & C,const std::string RemapFilename)1600 SampleProfileReader::create(const std::string Filename, LLVMContext &C,
1601                             const std::string RemapFilename) {
1602   auto BufferOrError = setupMemoryBuffer(Filename);
1603   if (std::error_code EC = BufferOrError.getError())
1604     return EC;
1605   return create(BufferOrError.get(), C, RemapFilename);
1606 }
1607 
1608 /// Create a sample profile remapper from the given input, to remap the
1609 /// function names in the given profile data.
1610 ///
1611 /// \param Filename The file to open.
1612 ///
1613 /// \param Reader The profile reader the remapper is going to be applied to.
1614 ///
1615 /// \param C The LLVM context to use to emit diagnostics.
1616 ///
1617 /// \returns an error code indicating the status of the created reader.
1618 ErrorOr<std::unique_ptr<SampleProfileReaderItaniumRemapper>>
create(const std::string Filename,SampleProfileReader & Reader,LLVMContext & C)1619 SampleProfileReaderItaniumRemapper::create(const std::string Filename,
1620                                            SampleProfileReader &Reader,
1621                                            LLVMContext &C) {
1622   auto BufferOrError = setupMemoryBuffer(Filename);
1623   if (std::error_code EC = BufferOrError.getError())
1624     return EC;
1625   return create(BufferOrError.get(), Reader, C);
1626 }
1627 
1628 /// Create a sample profile remapper from the given input, to remap the
1629 /// function names in the given profile data.
1630 ///
1631 /// \param B The memory buffer to create the reader from (assumes ownership).
1632 ///
1633 /// \param C The LLVM context to use to emit diagnostics.
1634 ///
1635 /// \param Reader The profile reader the remapper is going to be applied to.
1636 ///
1637 /// \returns an error code indicating the status of the created reader.
1638 ErrorOr<std::unique_ptr<SampleProfileReaderItaniumRemapper>>
create(std::unique_ptr<MemoryBuffer> & B,SampleProfileReader & Reader,LLVMContext & C)1639 SampleProfileReaderItaniumRemapper::create(std::unique_ptr<MemoryBuffer> &B,
1640                                            SampleProfileReader &Reader,
1641                                            LLVMContext &C) {
1642   auto Remappings = std::make_unique<SymbolRemappingReader>();
1643   if (Error E = Remappings->read(*B.get())) {
1644     handleAllErrors(
1645         std::move(E), [&](const SymbolRemappingParseError &ParseError) {
1646           C.diagnose(DiagnosticInfoSampleProfile(B->getBufferIdentifier(),
1647                                                  ParseError.getLineNum(),
1648                                                  ParseError.getMessage()));
1649         });
1650     return sampleprof_error::malformed;
1651   }
1652 
1653   return std::make_unique<SampleProfileReaderItaniumRemapper>(
1654       std::move(B), std::move(Remappings), Reader);
1655 }
1656 
1657 /// Create a sample profile reader based on the format of the input data.
1658 ///
1659 /// \param B The memory buffer to create the reader from (assumes ownership).
1660 ///
1661 /// \param C The LLVM context to use to emit diagnostics.
1662 ///
1663 /// \param RemapFilename The file used for profile remapping.
1664 ///
1665 /// \returns an error code indicating the status of the created reader.
1666 ErrorOr<std::unique_ptr<SampleProfileReader>>
create(std::unique_ptr<MemoryBuffer> & B,LLVMContext & C,const std::string RemapFilename)1667 SampleProfileReader::create(std::unique_ptr<MemoryBuffer> &B, LLVMContext &C,
1668                             const std::string RemapFilename) {
1669   std::unique_ptr<SampleProfileReader> Reader;
1670   if (SampleProfileReaderRawBinary::hasFormat(*B))
1671     Reader.reset(new SampleProfileReaderRawBinary(std::move(B), C));
1672   else if (SampleProfileReaderExtBinary::hasFormat(*B))
1673     Reader.reset(new SampleProfileReaderExtBinary(std::move(B), C));
1674   else if (SampleProfileReaderCompactBinary::hasFormat(*B))
1675     Reader.reset(new SampleProfileReaderCompactBinary(std::move(B), C));
1676   else if (SampleProfileReaderGCC::hasFormat(*B))
1677     Reader.reset(new SampleProfileReaderGCC(std::move(B), C));
1678   else if (SampleProfileReaderText::hasFormat(*B))
1679     Reader.reset(new SampleProfileReaderText(std::move(B), C));
1680   else
1681     return sampleprof_error::unrecognized_format;
1682 
1683   if (!RemapFilename.empty()) {
1684     auto ReaderOrErr =
1685         SampleProfileReaderItaniumRemapper::create(RemapFilename, *Reader, C);
1686     if (std::error_code EC = ReaderOrErr.getError()) {
1687       std::string Msg = "Could not create remapper: " + EC.message();
1688       C.diagnose(DiagnosticInfoSampleProfile(RemapFilename, Msg));
1689       return EC;
1690     }
1691     Reader->Remapper = std::move(ReaderOrErr.get());
1692   }
1693 
1694   FunctionSamples::Format = Reader->getFormat();
1695   if (std::error_code EC = Reader->readHeader()) {
1696     return EC;
1697   }
1698 
1699   return std::move(Reader);
1700 }
1701 
1702 // For text and GCC file formats, we compute the summary after reading the
1703 // profile. Binary format has the profile summary in its header.
computeSummary()1704 void SampleProfileReader::computeSummary() {
1705   SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs);
1706   Summary = Builder.computeSummaryForProfiles(Profiles);
1707 }
1708