1 //===- SampleProfReader.cpp - Read LLVM sample profile data ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the class that reads LLVM sample profiles. It
10 // supports three file formats: text, binary and gcov.
11 //
12 // The textual representation is useful for debugging and testing purposes. The
13 // binary representation is more compact, resulting in smaller file sizes.
14 //
15 // The gcov encoding is the one generated by GCC's AutoFDO profile creation
16 // tool (https://github.com/google/autofdo)
17 //
18 // All three encodings can be used interchangeably as an input sample profile.
19 //
20 //===----------------------------------------------------------------------===//
21
22 #include "llvm/ProfileData/SampleProfReader.h"
23 #include "llvm/ADT/DenseMap.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/ADT/StringRef.h"
26 #include "llvm/IR/ProfileSummary.h"
27 #include "llvm/ProfileData/ProfileCommon.h"
28 #include "llvm/ProfileData/SampleProf.h"
29 #include "llvm/Support/Compression.h"
30 #include "llvm/Support/ErrorOr.h"
31 #include "llvm/Support/LEB128.h"
32 #include "llvm/Support/LineIterator.h"
33 #include "llvm/Support/MD5.h"
34 #include "llvm/Support/MemoryBuffer.h"
35 #include "llvm/Support/raw_ostream.h"
36 #include <algorithm>
37 #include <cstddef>
38 #include <cstdint>
39 #include <limits>
40 #include <memory>
41 #include <set>
42 #include <system_error>
43 #include <vector>
44
45 using namespace llvm;
46 using namespace sampleprof;
47
48 /// Dump the function profile for \p FName.
49 ///
50 /// \param FName Name of the function to print.
51 /// \param OS Stream to emit the output to.
dumpFunctionProfile(StringRef FName,raw_ostream & OS)52 void SampleProfileReader::dumpFunctionProfile(StringRef FName,
53 raw_ostream &OS) {
54 OS << "Function: " << FName << ": " << Profiles[FName];
55 }
56
57 /// Dump all the function profiles found on stream \p OS.
dump(raw_ostream & OS)58 void SampleProfileReader::dump(raw_ostream &OS) {
59 for (const auto &I : Profiles)
60 dumpFunctionProfile(I.getKey(), OS);
61 }
62
63 /// Parse \p Input as function head.
64 ///
65 /// Parse one line of \p Input, and update function name in \p FName,
66 /// function's total sample count in \p NumSamples, function's entry
67 /// count in \p NumHeadSamples.
68 ///
69 /// \returns true if parsing is successful.
ParseHead(const StringRef & Input,StringRef & FName,uint64_t & NumSamples,uint64_t & NumHeadSamples)70 static bool ParseHead(const StringRef &Input, StringRef &FName,
71 uint64_t &NumSamples, uint64_t &NumHeadSamples) {
72 if (Input[0] == ' ')
73 return false;
74 size_t n2 = Input.rfind(':');
75 size_t n1 = Input.rfind(':', n2 - 1);
76 FName = Input.substr(0, n1);
77 if (Input.substr(n1 + 1, n2 - n1 - 1).getAsInteger(10, NumSamples))
78 return false;
79 if (Input.substr(n2 + 1).getAsInteger(10, NumHeadSamples))
80 return false;
81 return true;
82 }
83
84 /// Returns true if line offset \p L is legal (only has 16 bits).
isOffsetLegal(unsigned L)85 static bool isOffsetLegal(unsigned L) { return (L & 0xffff) == L; }
86
87 /// Parse \p Input that contains metadata.
88 /// Possible metadata:
89 /// - CFG Checksum information:
90 /// !CFGChecksum: 12345
91 /// - CFG Checksum information:
92 /// !Attributes: 1
93 /// Stores the FunctionHash (a.k.a. CFG Checksum) into \p FunctionHash.
parseMetadata(const StringRef & Input,uint64_t & FunctionHash,uint32_t & Attributes)94 static bool parseMetadata(const StringRef &Input, uint64_t &FunctionHash,
95 uint32_t &Attributes) {
96 if (Input.startswith("!CFGChecksum:")) {
97 StringRef CFGInfo = Input.substr(strlen("!CFGChecksum:")).trim();
98 return !CFGInfo.getAsInteger(10, FunctionHash);
99 }
100
101 if (Input.startswith("!Attributes:")) {
102 StringRef Attrib = Input.substr(strlen("!Attributes:")).trim();
103 return !Attrib.getAsInteger(10, Attributes);
104 }
105
106 return false;
107 }
108
109 enum class LineType {
110 CallSiteProfile,
111 BodyProfile,
112 Metadata,
113 };
114
115 /// Parse \p Input as line sample.
116 ///
117 /// \param Input input line.
118 /// \param LineTy Type of this line.
119 /// \param Depth the depth of the inline stack.
120 /// \param NumSamples total samples of the line/inlined callsite.
121 /// \param LineOffset line offset to the start of the function.
122 /// \param Discriminator discriminator of the line.
123 /// \param TargetCountMap map from indirect call target to count.
124 /// \param FunctionHash the function's CFG hash, used by pseudo probe.
125 ///
126 /// returns true if parsing is successful.
ParseLine(const StringRef & Input,LineType & LineTy,uint32_t & Depth,uint64_t & NumSamples,uint32_t & LineOffset,uint32_t & Discriminator,StringRef & CalleeName,DenseMap<StringRef,uint64_t> & TargetCountMap,uint64_t & FunctionHash,uint32_t & Attributes)127 static bool ParseLine(const StringRef &Input, LineType &LineTy, uint32_t &Depth,
128 uint64_t &NumSamples, uint32_t &LineOffset,
129 uint32_t &Discriminator, StringRef &CalleeName,
130 DenseMap<StringRef, uint64_t> &TargetCountMap,
131 uint64_t &FunctionHash, uint32_t &Attributes) {
132 for (Depth = 0; Input[Depth] == ' '; Depth++)
133 ;
134 if (Depth == 0)
135 return false;
136
137 if (Depth == 1 && Input[Depth] == '!') {
138 LineTy = LineType::Metadata;
139 return parseMetadata(Input.substr(Depth), FunctionHash, Attributes);
140 }
141
142 size_t n1 = Input.find(':');
143 StringRef Loc = Input.substr(Depth, n1 - Depth);
144 size_t n2 = Loc.find('.');
145 if (n2 == StringRef::npos) {
146 if (Loc.getAsInteger(10, LineOffset) || !isOffsetLegal(LineOffset))
147 return false;
148 Discriminator = 0;
149 } else {
150 if (Loc.substr(0, n2).getAsInteger(10, LineOffset))
151 return false;
152 if (Loc.substr(n2 + 1).getAsInteger(10, Discriminator))
153 return false;
154 }
155
156 StringRef Rest = Input.substr(n1 + 2);
157 if (isDigit(Rest[0])) {
158 LineTy = LineType::BodyProfile;
159 size_t n3 = Rest.find(' ');
160 if (n3 == StringRef::npos) {
161 if (Rest.getAsInteger(10, NumSamples))
162 return false;
163 } else {
164 if (Rest.substr(0, n3).getAsInteger(10, NumSamples))
165 return false;
166 }
167 // Find call targets and their sample counts.
168 // Note: In some cases, there are symbols in the profile which are not
169 // mangled. To accommodate such cases, use colon + integer pairs as the
170 // anchor points.
171 // An example:
172 // _M_construct<char *>:1000 string_view<std::allocator<char> >:437
173 // ":1000" and ":437" are used as anchor points so the string above will
174 // be interpreted as
175 // target: _M_construct<char *>
176 // count: 1000
177 // target: string_view<std::allocator<char> >
178 // count: 437
179 while (n3 != StringRef::npos) {
180 n3 += Rest.substr(n3).find_first_not_of(' ');
181 Rest = Rest.substr(n3);
182 n3 = Rest.find_first_of(':');
183 if (n3 == StringRef::npos || n3 == 0)
184 return false;
185
186 StringRef Target;
187 uint64_t count, n4;
188 while (true) {
189 // Get the segment after the current colon.
190 StringRef AfterColon = Rest.substr(n3 + 1);
191 // Get the target symbol before the current colon.
192 Target = Rest.substr(0, n3);
193 // Check if the word after the current colon is an integer.
194 n4 = AfterColon.find_first_of(' ');
195 n4 = (n4 != StringRef::npos) ? n3 + n4 + 1 : Rest.size();
196 StringRef WordAfterColon = Rest.substr(n3 + 1, n4 - n3 - 1);
197 if (!WordAfterColon.getAsInteger(10, count))
198 break;
199
200 // Try to find the next colon.
201 uint64_t n5 = AfterColon.find_first_of(':');
202 if (n5 == StringRef::npos)
203 return false;
204 n3 += n5 + 1;
205 }
206
207 // An anchor point is found. Save the {target, count} pair
208 TargetCountMap[Target] = count;
209 if (n4 == Rest.size())
210 break;
211 // Change n3 to the next blank space after colon + integer pair.
212 n3 = n4;
213 }
214 } else {
215 LineTy = LineType::CallSiteProfile;
216 size_t n3 = Rest.find_last_of(':');
217 CalleeName = Rest.substr(0, n3);
218 if (Rest.substr(n3 + 1).getAsInteger(10, NumSamples))
219 return false;
220 }
221 return true;
222 }
223
224 /// Load samples from a text file.
225 ///
226 /// See the documentation at the top of the file for an explanation of
227 /// the expected format.
228 ///
229 /// \returns true if the file was loaded successfully, false otherwise.
readImpl()230 std::error_code SampleProfileReaderText::readImpl() {
231 line_iterator LineIt(*Buffer, /*SkipBlanks=*/true, '#');
232 sampleprof_error Result = sampleprof_error::success;
233
234 InlineCallStack InlineStack;
235 uint32_t ProbeProfileCount = 0;
236
237 // SeenMetadata tracks whether we have processed metadata for the current
238 // top-level function profile.
239 bool SeenMetadata = false;
240
241 for (; !LineIt.is_at_eof(); ++LineIt) {
242 if ((*LineIt)[(*LineIt).find_first_not_of(' ')] == '#')
243 continue;
244 // Read the header of each function.
245 //
246 // Note that for function identifiers we are actually expecting
247 // mangled names, but we may not always get them. This happens when
248 // the compiler decides not to emit the function (e.g., it was inlined
249 // and removed). In this case, the binary will not have the linkage
250 // name for the function, so the profiler will emit the function's
251 // unmangled name, which may contain characters like ':' and '>' in its
252 // name (member functions, templates, etc).
253 //
254 // The only requirement we place on the identifier, then, is that it
255 // should not begin with a number.
256 if ((*LineIt)[0] != ' ') {
257 uint64_t NumSamples, NumHeadSamples;
258 StringRef FName;
259 if (!ParseHead(*LineIt, FName, NumSamples, NumHeadSamples)) {
260 reportError(LineIt.line_number(),
261 "Expected 'mangled_name:NUM:NUM', found " + *LineIt);
262 return sampleprof_error::malformed;
263 }
264 SeenMetadata = false;
265 SampleContext FContext(FName);
266 if (FContext.hasContext())
267 ++CSProfileCount;
268 Profiles[FContext] = FunctionSamples();
269 FunctionSamples &FProfile = Profiles[FContext];
270 FProfile.setName(FContext.getNameWithoutContext());
271 FProfile.setContext(FContext);
272 MergeResult(Result, FProfile.addTotalSamples(NumSamples));
273 MergeResult(Result, FProfile.addHeadSamples(NumHeadSamples));
274 InlineStack.clear();
275 InlineStack.push_back(&FProfile);
276 } else {
277 uint64_t NumSamples;
278 StringRef FName;
279 DenseMap<StringRef, uint64_t> TargetCountMap;
280 uint32_t Depth, LineOffset, Discriminator;
281 LineType LineTy;
282 uint64_t FunctionHash = 0;
283 uint32_t Attributes = 0;
284 if (!ParseLine(*LineIt, LineTy, Depth, NumSamples, LineOffset,
285 Discriminator, FName, TargetCountMap, FunctionHash,
286 Attributes)) {
287 reportError(LineIt.line_number(),
288 "Expected 'NUM[.NUM]: NUM[ mangled_name:NUM]*', found " +
289 *LineIt);
290 return sampleprof_error::malformed;
291 }
292 if (SeenMetadata && LineTy != LineType::Metadata) {
293 // Metadata must be put at the end of a function profile.
294 reportError(LineIt.line_number(),
295 "Found non-metadata after metadata: " + *LineIt);
296 return sampleprof_error::malformed;
297 }
298 while (InlineStack.size() > Depth) {
299 InlineStack.pop_back();
300 }
301 switch (LineTy) {
302 case LineType::CallSiteProfile: {
303 FunctionSamples &FSamples = InlineStack.back()->functionSamplesAt(
304 LineLocation(LineOffset, Discriminator))[std::string(FName)];
305 FSamples.setName(FName);
306 MergeResult(Result, FSamples.addTotalSamples(NumSamples));
307 InlineStack.push_back(&FSamples);
308 break;
309 }
310 case LineType::BodyProfile: {
311 while (InlineStack.size() > Depth) {
312 InlineStack.pop_back();
313 }
314 FunctionSamples &FProfile = *InlineStack.back();
315 for (const auto &name_count : TargetCountMap) {
316 MergeResult(Result, FProfile.addCalledTargetSamples(
317 LineOffset, Discriminator, name_count.first,
318 name_count.second));
319 }
320 MergeResult(Result, FProfile.addBodySamples(LineOffset, Discriminator,
321 NumSamples));
322 break;
323 }
324 case LineType::Metadata: {
325 FunctionSamples &FProfile = *InlineStack.back();
326 if (FunctionHash) {
327 FProfile.setFunctionHash(FunctionHash);
328 ++ProbeProfileCount;
329 }
330 if (Attributes)
331 FProfile.getContext().setAllAttributes(Attributes);
332 SeenMetadata = true;
333 break;
334 }
335 }
336 }
337 }
338
339 assert((CSProfileCount == 0 || CSProfileCount == Profiles.size()) &&
340 "Cannot have both context-sensitive and regular profile");
341 ProfileIsCS = (CSProfileCount > 0);
342 assert((ProbeProfileCount == 0 || ProbeProfileCount == Profiles.size()) &&
343 "Cannot have both probe-based profiles and regular profiles");
344 ProfileIsProbeBased = (ProbeProfileCount > 0);
345 FunctionSamples::ProfileIsProbeBased = ProfileIsProbeBased;
346 FunctionSamples::ProfileIsCS = ProfileIsCS;
347
348 if (Result == sampleprof_error::success)
349 computeSummary();
350
351 return Result;
352 }
353
hasFormat(const MemoryBuffer & Buffer)354 bool SampleProfileReaderText::hasFormat(const MemoryBuffer &Buffer) {
355 bool result = false;
356
357 // Check that the first non-comment line is a valid function header.
358 line_iterator LineIt(Buffer, /*SkipBlanks=*/true, '#');
359 if (!LineIt.is_at_eof()) {
360 if ((*LineIt)[0] != ' ') {
361 uint64_t NumSamples, NumHeadSamples;
362 StringRef FName;
363 result = ParseHead(*LineIt, FName, NumSamples, NumHeadSamples);
364 }
365 }
366
367 return result;
368 }
369
readNumber()370 template <typename T> ErrorOr<T> SampleProfileReaderBinary::readNumber() {
371 unsigned NumBytesRead = 0;
372 std::error_code EC;
373 uint64_t Val = decodeULEB128(Data, &NumBytesRead);
374
375 if (Val > std::numeric_limits<T>::max())
376 EC = sampleprof_error::malformed;
377 else if (Data + NumBytesRead > End)
378 EC = sampleprof_error::truncated;
379 else
380 EC = sampleprof_error::success;
381
382 if (EC) {
383 reportError(0, EC.message());
384 return EC;
385 }
386
387 Data += NumBytesRead;
388 return static_cast<T>(Val);
389 }
390
readString()391 ErrorOr<StringRef> SampleProfileReaderBinary::readString() {
392 std::error_code EC;
393 StringRef Str(reinterpret_cast<const char *>(Data));
394 if (Data + Str.size() + 1 > End) {
395 EC = sampleprof_error::truncated;
396 reportError(0, EC.message());
397 return EC;
398 }
399
400 Data += Str.size() + 1;
401 return Str;
402 }
403
404 template <typename T>
readUnencodedNumber()405 ErrorOr<T> SampleProfileReaderBinary::readUnencodedNumber() {
406 std::error_code EC;
407
408 if (Data + sizeof(T) > End) {
409 EC = sampleprof_error::truncated;
410 reportError(0, EC.message());
411 return EC;
412 }
413
414 using namespace support;
415 T Val = endian::readNext<T, little, unaligned>(Data);
416 return Val;
417 }
418
419 template <typename T>
readStringIndex(T & Table)420 inline ErrorOr<uint32_t> SampleProfileReaderBinary::readStringIndex(T &Table) {
421 std::error_code EC;
422 auto Idx = readNumber<uint32_t>();
423 if (std::error_code EC = Idx.getError())
424 return EC;
425 if (*Idx >= Table.size())
426 return sampleprof_error::truncated_name_table;
427 return *Idx;
428 }
429
readStringFromTable()430 ErrorOr<StringRef> SampleProfileReaderBinary::readStringFromTable() {
431 auto Idx = readStringIndex(NameTable);
432 if (std::error_code EC = Idx.getError())
433 return EC;
434
435 return NameTable[*Idx];
436 }
437
readStringFromTable()438 ErrorOr<StringRef> SampleProfileReaderExtBinaryBase::readStringFromTable() {
439 if (!FixedLengthMD5)
440 return SampleProfileReaderBinary::readStringFromTable();
441
442 // read NameTable index.
443 auto Idx = readStringIndex(NameTable);
444 if (std::error_code EC = Idx.getError())
445 return EC;
446
447 // Check whether the name to be accessed has been accessed before,
448 // if not, read it from memory directly.
449 StringRef &SR = NameTable[*Idx];
450 if (SR.empty()) {
451 const uint8_t *SavedData = Data;
452 Data = MD5NameMemStart + ((*Idx) * sizeof(uint64_t));
453 auto FID = readUnencodedNumber<uint64_t>();
454 if (std::error_code EC = FID.getError())
455 return EC;
456 // Save the string converted from uint64_t in MD5StringBuf. All the
457 // references to the name are all StringRefs refering to the string
458 // in MD5StringBuf.
459 MD5StringBuf->push_back(std::to_string(*FID));
460 SR = MD5StringBuf->back();
461 Data = SavedData;
462 }
463 return SR;
464 }
465
readStringFromTable()466 ErrorOr<StringRef> SampleProfileReaderCompactBinary::readStringFromTable() {
467 auto Idx = readStringIndex(NameTable);
468 if (std::error_code EC = Idx.getError())
469 return EC;
470
471 return StringRef(NameTable[*Idx]);
472 }
473
474 std::error_code
readProfile(FunctionSamples & FProfile)475 SampleProfileReaderBinary::readProfile(FunctionSamples &FProfile) {
476 auto NumSamples = readNumber<uint64_t>();
477 if (std::error_code EC = NumSamples.getError())
478 return EC;
479 FProfile.addTotalSamples(*NumSamples);
480
481 // Read the samples in the body.
482 auto NumRecords = readNumber<uint32_t>();
483 if (std::error_code EC = NumRecords.getError())
484 return EC;
485
486 for (uint32_t I = 0; I < *NumRecords; ++I) {
487 auto LineOffset = readNumber<uint64_t>();
488 if (std::error_code EC = LineOffset.getError())
489 return EC;
490
491 if (!isOffsetLegal(*LineOffset)) {
492 return std::error_code();
493 }
494
495 auto Discriminator = readNumber<uint64_t>();
496 if (std::error_code EC = Discriminator.getError())
497 return EC;
498
499 auto NumSamples = readNumber<uint64_t>();
500 if (std::error_code EC = NumSamples.getError())
501 return EC;
502
503 auto NumCalls = readNumber<uint32_t>();
504 if (std::error_code EC = NumCalls.getError())
505 return EC;
506
507 for (uint32_t J = 0; J < *NumCalls; ++J) {
508 auto CalledFunction(readStringFromTable());
509 if (std::error_code EC = CalledFunction.getError())
510 return EC;
511
512 auto CalledFunctionSamples = readNumber<uint64_t>();
513 if (std::error_code EC = CalledFunctionSamples.getError())
514 return EC;
515
516 FProfile.addCalledTargetSamples(*LineOffset, *Discriminator,
517 *CalledFunction, *CalledFunctionSamples);
518 }
519
520 FProfile.addBodySamples(*LineOffset, *Discriminator, *NumSamples);
521 }
522
523 // Read all the samples for inlined function calls.
524 auto NumCallsites = readNumber<uint32_t>();
525 if (std::error_code EC = NumCallsites.getError())
526 return EC;
527
528 for (uint32_t J = 0; J < *NumCallsites; ++J) {
529 auto LineOffset = readNumber<uint64_t>();
530 if (std::error_code EC = LineOffset.getError())
531 return EC;
532
533 auto Discriminator = readNumber<uint64_t>();
534 if (std::error_code EC = Discriminator.getError())
535 return EC;
536
537 auto FName(readStringFromTable());
538 if (std::error_code EC = FName.getError())
539 return EC;
540
541 FunctionSamples &CalleeProfile = FProfile.functionSamplesAt(
542 LineLocation(*LineOffset, *Discriminator))[std::string(*FName)];
543 CalleeProfile.setName(*FName);
544 if (std::error_code EC = readProfile(CalleeProfile))
545 return EC;
546 }
547
548 return sampleprof_error::success;
549 }
550
551 std::error_code
readFuncProfile(const uint8_t * Start)552 SampleProfileReaderBinary::readFuncProfile(const uint8_t *Start) {
553 Data = Start;
554 auto NumHeadSamples = readNumber<uint64_t>();
555 if (std::error_code EC = NumHeadSamples.getError())
556 return EC;
557
558 auto FName(readStringFromTable());
559 if (std::error_code EC = FName.getError())
560 return EC;
561
562 SampleContext FContext(*FName);
563 Profiles[FContext] = FunctionSamples();
564 FunctionSamples &FProfile = Profiles[FContext];
565 FProfile.setName(FContext.getNameWithoutContext());
566 FProfile.setContext(FContext);
567 FProfile.addHeadSamples(*NumHeadSamples);
568
569 if (FContext.hasContext())
570 CSProfileCount++;
571
572 if (std::error_code EC = readProfile(FProfile))
573 return EC;
574 return sampleprof_error::success;
575 }
576
readImpl()577 std::error_code SampleProfileReaderBinary::readImpl() {
578 while (!at_eof()) {
579 if (std::error_code EC = readFuncProfile(Data))
580 return EC;
581 }
582
583 return sampleprof_error::success;
584 }
585
readOneSection(const uint8_t * Start,uint64_t Size,const SecHdrTableEntry & Entry)586 std::error_code SampleProfileReaderExtBinaryBase::readOneSection(
587 const uint8_t *Start, uint64_t Size, const SecHdrTableEntry &Entry) {
588 Data = Start;
589 End = Start + Size;
590 switch (Entry.Type) {
591 case SecProfSummary:
592 if (std::error_code EC = readSummary())
593 return EC;
594 if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagPartial))
595 Summary->setPartialProfile(true);
596 if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFullContext))
597 FunctionSamples::ProfileIsCS = ProfileIsCS = true;
598 break;
599 case SecNameTable: {
600 FixedLengthMD5 =
601 hasSecFlag(Entry, SecNameTableFlags::SecFlagFixedLengthMD5);
602 bool UseMD5 = hasSecFlag(Entry, SecNameTableFlags::SecFlagMD5Name);
603 assert((!FixedLengthMD5 || UseMD5) &&
604 "If FixedLengthMD5 is true, UseMD5 has to be true");
605 FunctionSamples::HasUniqSuffix =
606 hasSecFlag(Entry, SecNameTableFlags::SecFlagUniqSuffix);
607 if (std::error_code EC = readNameTableSec(UseMD5))
608 return EC;
609 break;
610 }
611 case SecLBRProfile:
612 if (std::error_code EC = readFuncProfiles())
613 return EC;
614 break;
615 case SecFuncOffsetTable:
616 if (std::error_code EC = readFuncOffsetTable())
617 return EC;
618 break;
619 case SecFuncMetadata: {
620 ProfileIsProbeBased =
621 hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagIsProbeBased);
622 FunctionSamples::ProfileIsProbeBased = ProfileIsProbeBased;
623 bool HasAttribute =
624 hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagHasAttribute);
625 if (std::error_code EC = readFuncMetadata(HasAttribute))
626 return EC;
627 break;
628 }
629 case SecProfileSymbolList:
630 if (std::error_code EC = readProfileSymbolList())
631 return EC;
632 break;
633 default:
634 if (std::error_code EC = readCustomSection(Entry))
635 return EC;
636 break;
637 }
638 return sampleprof_error::success;
639 }
640
collectFuncsFromModule()641 bool SampleProfileReaderExtBinaryBase::collectFuncsFromModule() {
642 if (!M)
643 return false;
644 FuncsToUse.clear();
645 for (auto &F : *M)
646 FuncsToUse.insert(FunctionSamples::getCanonicalFnName(F));
647 return true;
648 }
649
readFuncOffsetTable()650 std::error_code SampleProfileReaderExtBinaryBase::readFuncOffsetTable() {
651 // If there are more than one FuncOffsetTable, the profile read associated
652 // with previous FuncOffsetTable has to be done before next FuncOffsetTable
653 // is read.
654 FuncOffsetTable.clear();
655
656 auto Size = readNumber<uint64_t>();
657 if (std::error_code EC = Size.getError())
658 return EC;
659
660 FuncOffsetTable.reserve(*Size);
661 for (uint32_t I = 0; I < *Size; ++I) {
662 auto FName(readStringFromTable());
663 if (std::error_code EC = FName.getError())
664 return EC;
665
666 auto Offset = readNumber<uint64_t>();
667 if (std::error_code EC = Offset.getError())
668 return EC;
669
670 FuncOffsetTable[*FName] = *Offset;
671 }
672 return sampleprof_error::success;
673 }
674
readFuncProfiles()675 std::error_code SampleProfileReaderExtBinaryBase::readFuncProfiles() {
676 // Collect functions used by current module if the Reader has been
677 // given a module.
678 // collectFuncsFromModule uses FunctionSamples::getCanonicalFnName
679 // which will query FunctionSamples::HasUniqSuffix, so it has to be
680 // called after FunctionSamples::HasUniqSuffix is set, i.e. after
681 // NameTable section is read.
682 bool LoadFuncsToBeUsed = collectFuncsFromModule();
683
684 // When LoadFuncsToBeUsed is false, load all the function profiles.
685 const uint8_t *Start = Data;
686 if (!LoadFuncsToBeUsed) {
687 while (Data < End) {
688 if (std::error_code EC = readFuncProfile(Data))
689 return EC;
690 }
691 assert(Data == End && "More data is read than expected");
692 } else {
693 // Load function profiles on demand.
694 if (Remapper) {
695 for (auto Name : FuncsToUse) {
696 Remapper->insert(Name);
697 }
698 }
699
700 if (useMD5()) {
701 for (auto Name : FuncsToUse) {
702 auto GUID = std::to_string(MD5Hash(Name));
703 auto iter = FuncOffsetTable.find(StringRef(GUID));
704 if (iter == FuncOffsetTable.end())
705 continue;
706 const uint8_t *FuncProfileAddr = Start + iter->second;
707 assert(FuncProfileAddr < End && "out of LBRProfile section");
708 if (std::error_code EC = readFuncProfile(FuncProfileAddr))
709 return EC;
710 }
711 } else if (FunctionSamples::ProfileIsCS) {
712 // Compute the ordered set of names, so we can
713 // get all context profiles under a subtree by
714 // iterating through the ordered names.
715 struct Comparer {
716 // Ignore the closing ']' when ordering context
717 bool operator()(const StringRef &L, const StringRef &R) const {
718 return L.substr(0, L.size() - 1) < R.substr(0, R.size() - 1);
719 }
720 };
721 std::set<StringRef, Comparer> OrderedNames;
722 for (auto Name : FuncOffsetTable) {
723 OrderedNames.insert(Name.first);
724 }
725
726 // For each function in current module, load all
727 // context profiles for the function.
728 for (auto NameOffset : FuncOffsetTable) {
729 StringRef ContextName = NameOffset.first;
730 SampleContext FContext(ContextName);
731 auto FuncName = FContext.getNameWithoutContext();
732 if (!FuncsToUse.count(FuncName) &&
733 (!Remapper || !Remapper->exist(FuncName)))
734 continue;
735
736 // For each context profile we need, try to load
737 // all context profile in the subtree. This can
738 // help profile guided importing for ThinLTO.
739 auto It = OrderedNames.find(ContextName);
740 while (It != OrderedNames.end() &&
741 It->startswith(ContextName.substr(0, ContextName.size() - 1))) {
742 const uint8_t *FuncProfileAddr = Start + FuncOffsetTable[*It];
743 assert(FuncProfileAddr < End && "out of LBRProfile section");
744 if (std::error_code EC = readFuncProfile(FuncProfileAddr))
745 return EC;
746 // Remove loaded context profile so we won't
747 // load it repeatedly.
748 It = OrderedNames.erase(It);
749 }
750 }
751 } else {
752 for (auto NameOffset : FuncOffsetTable) {
753 SampleContext FContext(NameOffset.first);
754 auto FuncName = FContext.getNameWithoutContext();
755 if (!FuncsToUse.count(FuncName) &&
756 (!Remapper || !Remapper->exist(FuncName)))
757 continue;
758 const uint8_t *FuncProfileAddr = Start + NameOffset.second;
759 assert(FuncProfileAddr < End && "out of LBRProfile section");
760 if (std::error_code EC = readFuncProfile(FuncProfileAddr))
761 return EC;
762 }
763 }
764 Data = End;
765 }
766 assert((CSProfileCount == 0 || CSProfileCount == Profiles.size()) &&
767 "Cannot have both context-sensitive and regular profile");
768 assert(ProfileIsCS == (CSProfileCount > 0) &&
769 "Section flag should be consistent with actual profile");
770 return sampleprof_error::success;
771 }
772
readProfileSymbolList()773 std::error_code SampleProfileReaderExtBinaryBase::readProfileSymbolList() {
774 if (!ProfSymList)
775 ProfSymList = std::make_unique<ProfileSymbolList>();
776
777 if (std::error_code EC = ProfSymList->read(Data, End - Data))
778 return EC;
779
780 Data = End;
781 return sampleprof_error::success;
782 }
783
decompressSection(const uint8_t * SecStart,const uint64_t SecSize,const uint8_t * & DecompressBuf,uint64_t & DecompressBufSize)784 std::error_code SampleProfileReaderExtBinaryBase::decompressSection(
785 const uint8_t *SecStart, const uint64_t SecSize,
786 const uint8_t *&DecompressBuf, uint64_t &DecompressBufSize) {
787 Data = SecStart;
788 End = SecStart + SecSize;
789 auto DecompressSize = readNumber<uint64_t>();
790 if (std::error_code EC = DecompressSize.getError())
791 return EC;
792 DecompressBufSize = *DecompressSize;
793
794 auto CompressSize = readNumber<uint64_t>();
795 if (std::error_code EC = CompressSize.getError())
796 return EC;
797
798 if (!llvm::zlib::isAvailable())
799 return sampleprof_error::zlib_unavailable;
800
801 StringRef CompressedStrings(reinterpret_cast<const char *>(Data),
802 *CompressSize);
803 char *Buffer = Allocator.Allocate<char>(DecompressBufSize);
804 size_t UCSize = DecompressBufSize;
805 llvm::Error E =
806 zlib::uncompress(CompressedStrings, Buffer, UCSize);
807 if (E)
808 return sampleprof_error::uncompress_failed;
809 DecompressBuf = reinterpret_cast<const uint8_t *>(Buffer);
810 return sampleprof_error::success;
811 }
812
readImpl()813 std::error_code SampleProfileReaderExtBinaryBase::readImpl() {
814 const uint8_t *BufStart =
815 reinterpret_cast<const uint8_t *>(Buffer->getBufferStart());
816
817 for (auto &Entry : SecHdrTable) {
818 // Skip empty section.
819 if (!Entry.Size)
820 continue;
821
822 // Skip sections without context when SkipFlatProf is true.
823 if (SkipFlatProf && hasSecFlag(Entry, SecCommonFlags::SecFlagFlat))
824 continue;
825
826 const uint8_t *SecStart = BufStart + Entry.Offset;
827 uint64_t SecSize = Entry.Size;
828
829 // If the section is compressed, decompress it into a buffer
830 // DecompressBuf before reading the actual data. The pointee of
831 // 'Data' will be changed to buffer hold by DecompressBuf
832 // temporarily when reading the actual data.
833 bool isCompressed = hasSecFlag(Entry, SecCommonFlags::SecFlagCompress);
834 if (isCompressed) {
835 const uint8_t *DecompressBuf;
836 uint64_t DecompressBufSize;
837 if (std::error_code EC = decompressSection(
838 SecStart, SecSize, DecompressBuf, DecompressBufSize))
839 return EC;
840 SecStart = DecompressBuf;
841 SecSize = DecompressBufSize;
842 }
843
844 if (std::error_code EC = readOneSection(SecStart, SecSize, Entry))
845 return EC;
846 if (Data != SecStart + SecSize)
847 return sampleprof_error::malformed;
848
849 // Change the pointee of 'Data' from DecompressBuf to original Buffer.
850 if (isCompressed) {
851 Data = BufStart + Entry.Offset;
852 End = BufStart + Buffer->getBufferSize();
853 }
854 }
855
856 return sampleprof_error::success;
857 }
858
readImpl()859 std::error_code SampleProfileReaderCompactBinary::readImpl() {
860 // Collect functions used by current module if the Reader has been
861 // given a module.
862 bool LoadFuncsToBeUsed = collectFuncsFromModule();
863
864 std::vector<uint64_t> OffsetsToUse;
865 if (!LoadFuncsToBeUsed) {
866 // load all the function profiles.
867 for (auto FuncEntry : FuncOffsetTable) {
868 OffsetsToUse.push_back(FuncEntry.second);
869 }
870 } else {
871 // load function profiles on demand.
872 for (auto Name : FuncsToUse) {
873 auto GUID = std::to_string(MD5Hash(Name));
874 auto iter = FuncOffsetTable.find(StringRef(GUID));
875 if (iter == FuncOffsetTable.end())
876 continue;
877 OffsetsToUse.push_back(iter->second);
878 }
879 }
880
881 for (auto Offset : OffsetsToUse) {
882 const uint8_t *SavedData = Data;
883 if (std::error_code EC = readFuncProfile(
884 reinterpret_cast<const uint8_t *>(Buffer->getBufferStart()) +
885 Offset))
886 return EC;
887 Data = SavedData;
888 }
889 return sampleprof_error::success;
890 }
891
verifySPMagic(uint64_t Magic)892 std::error_code SampleProfileReaderRawBinary::verifySPMagic(uint64_t Magic) {
893 if (Magic == SPMagic())
894 return sampleprof_error::success;
895 return sampleprof_error::bad_magic;
896 }
897
verifySPMagic(uint64_t Magic)898 std::error_code SampleProfileReaderExtBinary::verifySPMagic(uint64_t Magic) {
899 if (Magic == SPMagic(SPF_Ext_Binary))
900 return sampleprof_error::success;
901 return sampleprof_error::bad_magic;
902 }
903
904 std::error_code
verifySPMagic(uint64_t Magic)905 SampleProfileReaderCompactBinary::verifySPMagic(uint64_t Magic) {
906 if (Magic == SPMagic(SPF_Compact_Binary))
907 return sampleprof_error::success;
908 return sampleprof_error::bad_magic;
909 }
910
readNameTable()911 std::error_code SampleProfileReaderBinary::readNameTable() {
912 auto Size = readNumber<uint32_t>();
913 if (std::error_code EC = Size.getError())
914 return EC;
915 NameTable.reserve(*Size + NameTable.size());
916 for (uint32_t I = 0; I < *Size; ++I) {
917 auto Name(readString());
918 if (std::error_code EC = Name.getError())
919 return EC;
920 NameTable.push_back(*Name);
921 }
922
923 return sampleprof_error::success;
924 }
925
readMD5NameTable()926 std::error_code SampleProfileReaderExtBinaryBase::readMD5NameTable() {
927 auto Size = readNumber<uint64_t>();
928 if (std::error_code EC = Size.getError())
929 return EC;
930 MD5StringBuf = std::make_unique<std::vector<std::string>>();
931 MD5StringBuf->reserve(*Size);
932 if (FixedLengthMD5) {
933 // Preallocate and initialize NameTable so we can check whether a name
934 // index has been read before by checking whether the element in the
935 // NameTable is empty, meanwhile readStringIndex can do the boundary
936 // check using the size of NameTable.
937 NameTable.resize(*Size + NameTable.size());
938
939 MD5NameMemStart = Data;
940 Data = Data + (*Size) * sizeof(uint64_t);
941 return sampleprof_error::success;
942 }
943 NameTable.reserve(*Size);
944 for (uint32_t I = 0; I < *Size; ++I) {
945 auto FID = readNumber<uint64_t>();
946 if (std::error_code EC = FID.getError())
947 return EC;
948 MD5StringBuf->push_back(std::to_string(*FID));
949 // NameTable is a vector of StringRef. Here it is pushing back a
950 // StringRef initialized with the last string in MD5stringBuf.
951 NameTable.push_back(MD5StringBuf->back());
952 }
953 return sampleprof_error::success;
954 }
955
readNameTableSec(bool IsMD5)956 std::error_code SampleProfileReaderExtBinaryBase::readNameTableSec(bool IsMD5) {
957 if (IsMD5)
958 return readMD5NameTable();
959 return SampleProfileReaderBinary::readNameTable();
960 }
961
962 std::error_code
readFuncMetadata(bool ProfileHasAttribute)963 SampleProfileReaderExtBinaryBase::readFuncMetadata(bool ProfileHasAttribute) {
964 while (Data < End) {
965 auto FName(readStringFromTable());
966 if (std::error_code EC = FName.getError())
967 return EC;
968
969 SampleContext FContext(*FName);
970 bool ProfileInMap = Profiles.count(FContext);
971
972 if (ProfileIsProbeBased) {
973 auto Checksum = readNumber<uint64_t>();
974 if (std::error_code EC = Checksum.getError())
975 return EC;
976 if (ProfileInMap)
977 Profiles[FContext].setFunctionHash(*Checksum);
978 }
979
980 if (ProfileHasAttribute) {
981 auto Attributes = readNumber<uint32_t>();
982 if (std::error_code EC = Attributes.getError())
983 return EC;
984 if (ProfileInMap)
985 Profiles[FContext].getContext().setAllAttributes(*Attributes);
986 }
987 }
988
989 assert(Data == End && "More data is read than expected");
990 return sampleprof_error::success;
991 }
992
readNameTable()993 std::error_code SampleProfileReaderCompactBinary::readNameTable() {
994 auto Size = readNumber<uint64_t>();
995 if (std::error_code EC = Size.getError())
996 return EC;
997 NameTable.reserve(*Size);
998 for (uint32_t I = 0; I < *Size; ++I) {
999 auto FID = readNumber<uint64_t>();
1000 if (std::error_code EC = FID.getError())
1001 return EC;
1002 NameTable.push_back(std::to_string(*FID));
1003 }
1004 return sampleprof_error::success;
1005 }
1006
1007 std::error_code
readSecHdrTableEntry(uint32_t Idx)1008 SampleProfileReaderExtBinaryBase::readSecHdrTableEntry(uint32_t Idx) {
1009 SecHdrTableEntry Entry;
1010 auto Type = readUnencodedNumber<uint64_t>();
1011 if (std::error_code EC = Type.getError())
1012 return EC;
1013 Entry.Type = static_cast<SecType>(*Type);
1014
1015 auto Flags = readUnencodedNumber<uint64_t>();
1016 if (std::error_code EC = Flags.getError())
1017 return EC;
1018 Entry.Flags = *Flags;
1019
1020 auto Offset = readUnencodedNumber<uint64_t>();
1021 if (std::error_code EC = Offset.getError())
1022 return EC;
1023 Entry.Offset = *Offset;
1024
1025 auto Size = readUnencodedNumber<uint64_t>();
1026 if (std::error_code EC = Size.getError())
1027 return EC;
1028 Entry.Size = *Size;
1029
1030 Entry.LayoutIndex = Idx;
1031 SecHdrTable.push_back(std::move(Entry));
1032 return sampleprof_error::success;
1033 }
1034
readSecHdrTable()1035 std::error_code SampleProfileReaderExtBinaryBase::readSecHdrTable() {
1036 auto EntryNum = readUnencodedNumber<uint64_t>();
1037 if (std::error_code EC = EntryNum.getError())
1038 return EC;
1039
1040 for (uint32_t i = 0; i < (*EntryNum); i++)
1041 if (std::error_code EC = readSecHdrTableEntry(i))
1042 return EC;
1043
1044 return sampleprof_error::success;
1045 }
1046
readHeader()1047 std::error_code SampleProfileReaderExtBinaryBase::readHeader() {
1048 const uint8_t *BufStart =
1049 reinterpret_cast<const uint8_t *>(Buffer->getBufferStart());
1050 Data = BufStart;
1051 End = BufStart + Buffer->getBufferSize();
1052
1053 if (std::error_code EC = readMagicIdent())
1054 return EC;
1055
1056 if (std::error_code EC = readSecHdrTable())
1057 return EC;
1058
1059 return sampleprof_error::success;
1060 }
1061
getSectionSize(SecType Type)1062 uint64_t SampleProfileReaderExtBinaryBase::getSectionSize(SecType Type) {
1063 uint64_t Size = 0;
1064 for (auto &Entry : SecHdrTable) {
1065 if (Entry.Type == Type)
1066 Size += Entry.Size;
1067 }
1068 return Size;
1069 }
1070
getFileSize()1071 uint64_t SampleProfileReaderExtBinaryBase::getFileSize() {
1072 // Sections in SecHdrTable is not necessarily in the same order as
1073 // sections in the profile because section like FuncOffsetTable needs
1074 // to be written after section LBRProfile but needs to be read before
1075 // section LBRProfile, so we cannot simply use the last entry in
1076 // SecHdrTable to calculate the file size.
1077 uint64_t FileSize = 0;
1078 for (auto &Entry : SecHdrTable) {
1079 FileSize = std::max(Entry.Offset + Entry.Size, FileSize);
1080 }
1081 return FileSize;
1082 }
1083
getSecFlagsStr(const SecHdrTableEntry & Entry)1084 static std::string getSecFlagsStr(const SecHdrTableEntry &Entry) {
1085 std::string Flags;
1086 if (hasSecFlag(Entry, SecCommonFlags::SecFlagCompress))
1087 Flags.append("{compressed,");
1088 else
1089 Flags.append("{");
1090
1091 if (hasSecFlag(Entry, SecCommonFlags::SecFlagFlat))
1092 Flags.append("flat,");
1093
1094 switch (Entry.Type) {
1095 case SecNameTable:
1096 if (hasSecFlag(Entry, SecNameTableFlags::SecFlagFixedLengthMD5))
1097 Flags.append("fixlenmd5,");
1098 else if (hasSecFlag(Entry, SecNameTableFlags::SecFlagMD5Name))
1099 Flags.append("md5,");
1100 if (hasSecFlag(Entry, SecNameTableFlags::SecFlagUniqSuffix))
1101 Flags.append("uniq,");
1102 break;
1103 case SecProfSummary:
1104 if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagPartial))
1105 Flags.append("partial,");
1106 if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFullContext))
1107 Flags.append("context,");
1108 break;
1109 default:
1110 break;
1111 }
1112 char &last = Flags.back();
1113 if (last == ',')
1114 last = '}';
1115 else
1116 Flags.append("}");
1117 return Flags;
1118 }
1119
dumpSectionInfo(raw_ostream & OS)1120 bool SampleProfileReaderExtBinaryBase::dumpSectionInfo(raw_ostream &OS) {
1121 uint64_t TotalSecsSize = 0;
1122 for (auto &Entry : SecHdrTable) {
1123 OS << getSecName(Entry.Type) << " - Offset: " << Entry.Offset
1124 << ", Size: " << Entry.Size << ", Flags: " << getSecFlagsStr(Entry)
1125 << "\n";
1126 ;
1127 TotalSecsSize += Entry.Size;
1128 }
1129 uint64_t HeaderSize = SecHdrTable.front().Offset;
1130 assert(HeaderSize + TotalSecsSize == getFileSize() &&
1131 "Size of 'header + sections' doesn't match the total size of profile");
1132
1133 OS << "Header Size: " << HeaderSize << "\n";
1134 OS << "Total Sections Size: " << TotalSecsSize << "\n";
1135 OS << "File Size: " << getFileSize() << "\n";
1136 return true;
1137 }
1138
readMagicIdent()1139 std::error_code SampleProfileReaderBinary::readMagicIdent() {
1140 // Read and check the magic identifier.
1141 auto Magic = readNumber<uint64_t>();
1142 if (std::error_code EC = Magic.getError())
1143 return EC;
1144 else if (std::error_code EC = verifySPMagic(*Magic))
1145 return EC;
1146
1147 // Read the version number.
1148 auto Version = readNumber<uint64_t>();
1149 if (std::error_code EC = Version.getError())
1150 return EC;
1151 else if (*Version != SPVersion())
1152 return sampleprof_error::unsupported_version;
1153
1154 return sampleprof_error::success;
1155 }
1156
readHeader()1157 std::error_code SampleProfileReaderBinary::readHeader() {
1158 Data = reinterpret_cast<const uint8_t *>(Buffer->getBufferStart());
1159 End = Data + Buffer->getBufferSize();
1160
1161 if (std::error_code EC = readMagicIdent())
1162 return EC;
1163
1164 if (std::error_code EC = readSummary())
1165 return EC;
1166
1167 if (std::error_code EC = readNameTable())
1168 return EC;
1169 return sampleprof_error::success;
1170 }
1171
readHeader()1172 std::error_code SampleProfileReaderCompactBinary::readHeader() {
1173 SampleProfileReaderBinary::readHeader();
1174 if (std::error_code EC = readFuncOffsetTable())
1175 return EC;
1176 return sampleprof_error::success;
1177 }
1178
readFuncOffsetTable()1179 std::error_code SampleProfileReaderCompactBinary::readFuncOffsetTable() {
1180 auto TableOffset = readUnencodedNumber<uint64_t>();
1181 if (std::error_code EC = TableOffset.getError())
1182 return EC;
1183
1184 const uint8_t *SavedData = Data;
1185 const uint8_t *TableStart =
1186 reinterpret_cast<const uint8_t *>(Buffer->getBufferStart()) +
1187 *TableOffset;
1188 Data = TableStart;
1189
1190 auto Size = readNumber<uint64_t>();
1191 if (std::error_code EC = Size.getError())
1192 return EC;
1193
1194 FuncOffsetTable.reserve(*Size);
1195 for (uint32_t I = 0; I < *Size; ++I) {
1196 auto FName(readStringFromTable());
1197 if (std::error_code EC = FName.getError())
1198 return EC;
1199
1200 auto Offset = readNumber<uint64_t>();
1201 if (std::error_code EC = Offset.getError())
1202 return EC;
1203
1204 FuncOffsetTable[*FName] = *Offset;
1205 }
1206 End = TableStart;
1207 Data = SavedData;
1208 return sampleprof_error::success;
1209 }
1210
collectFuncsFromModule()1211 bool SampleProfileReaderCompactBinary::collectFuncsFromModule() {
1212 if (!M)
1213 return false;
1214 FuncsToUse.clear();
1215 for (auto &F : *M)
1216 FuncsToUse.insert(FunctionSamples::getCanonicalFnName(F));
1217 return true;
1218 }
1219
readSummaryEntry(std::vector<ProfileSummaryEntry> & Entries)1220 std::error_code SampleProfileReaderBinary::readSummaryEntry(
1221 std::vector<ProfileSummaryEntry> &Entries) {
1222 auto Cutoff = readNumber<uint64_t>();
1223 if (std::error_code EC = Cutoff.getError())
1224 return EC;
1225
1226 auto MinBlockCount = readNumber<uint64_t>();
1227 if (std::error_code EC = MinBlockCount.getError())
1228 return EC;
1229
1230 auto NumBlocks = readNumber<uint64_t>();
1231 if (std::error_code EC = NumBlocks.getError())
1232 return EC;
1233
1234 Entries.emplace_back(*Cutoff, *MinBlockCount, *NumBlocks);
1235 return sampleprof_error::success;
1236 }
1237
readSummary()1238 std::error_code SampleProfileReaderBinary::readSummary() {
1239 auto TotalCount = readNumber<uint64_t>();
1240 if (std::error_code EC = TotalCount.getError())
1241 return EC;
1242
1243 auto MaxBlockCount = readNumber<uint64_t>();
1244 if (std::error_code EC = MaxBlockCount.getError())
1245 return EC;
1246
1247 auto MaxFunctionCount = readNumber<uint64_t>();
1248 if (std::error_code EC = MaxFunctionCount.getError())
1249 return EC;
1250
1251 auto NumBlocks = readNumber<uint64_t>();
1252 if (std::error_code EC = NumBlocks.getError())
1253 return EC;
1254
1255 auto NumFunctions = readNumber<uint64_t>();
1256 if (std::error_code EC = NumFunctions.getError())
1257 return EC;
1258
1259 auto NumSummaryEntries = readNumber<uint64_t>();
1260 if (std::error_code EC = NumSummaryEntries.getError())
1261 return EC;
1262
1263 std::vector<ProfileSummaryEntry> Entries;
1264 for (unsigned i = 0; i < *NumSummaryEntries; i++) {
1265 std::error_code EC = readSummaryEntry(Entries);
1266 if (EC != sampleprof_error::success)
1267 return EC;
1268 }
1269 Summary = std::make_unique<ProfileSummary>(
1270 ProfileSummary::PSK_Sample, Entries, *TotalCount, *MaxBlockCount, 0,
1271 *MaxFunctionCount, *NumBlocks, *NumFunctions);
1272
1273 return sampleprof_error::success;
1274 }
1275
hasFormat(const MemoryBuffer & Buffer)1276 bool SampleProfileReaderRawBinary::hasFormat(const MemoryBuffer &Buffer) {
1277 const uint8_t *Data =
1278 reinterpret_cast<const uint8_t *>(Buffer.getBufferStart());
1279 uint64_t Magic = decodeULEB128(Data);
1280 return Magic == SPMagic();
1281 }
1282
hasFormat(const MemoryBuffer & Buffer)1283 bool SampleProfileReaderExtBinary::hasFormat(const MemoryBuffer &Buffer) {
1284 const uint8_t *Data =
1285 reinterpret_cast<const uint8_t *>(Buffer.getBufferStart());
1286 uint64_t Magic = decodeULEB128(Data);
1287 return Magic == SPMagic(SPF_Ext_Binary);
1288 }
1289
hasFormat(const MemoryBuffer & Buffer)1290 bool SampleProfileReaderCompactBinary::hasFormat(const MemoryBuffer &Buffer) {
1291 const uint8_t *Data =
1292 reinterpret_cast<const uint8_t *>(Buffer.getBufferStart());
1293 uint64_t Magic = decodeULEB128(Data);
1294 return Magic == SPMagic(SPF_Compact_Binary);
1295 }
1296
skipNextWord()1297 std::error_code SampleProfileReaderGCC::skipNextWord() {
1298 uint32_t dummy;
1299 if (!GcovBuffer.readInt(dummy))
1300 return sampleprof_error::truncated;
1301 return sampleprof_error::success;
1302 }
1303
readNumber()1304 template <typename T> ErrorOr<T> SampleProfileReaderGCC::readNumber() {
1305 if (sizeof(T) <= sizeof(uint32_t)) {
1306 uint32_t Val;
1307 if (GcovBuffer.readInt(Val) && Val <= std::numeric_limits<T>::max())
1308 return static_cast<T>(Val);
1309 } else if (sizeof(T) <= sizeof(uint64_t)) {
1310 uint64_t Val;
1311 if (GcovBuffer.readInt64(Val) && Val <= std::numeric_limits<T>::max())
1312 return static_cast<T>(Val);
1313 }
1314
1315 std::error_code EC = sampleprof_error::malformed;
1316 reportError(0, EC.message());
1317 return EC;
1318 }
1319
readString()1320 ErrorOr<StringRef> SampleProfileReaderGCC::readString() {
1321 StringRef Str;
1322 if (!GcovBuffer.readString(Str))
1323 return sampleprof_error::truncated;
1324 return Str;
1325 }
1326
readHeader()1327 std::error_code SampleProfileReaderGCC::readHeader() {
1328 // Read the magic identifier.
1329 if (!GcovBuffer.readGCDAFormat())
1330 return sampleprof_error::unrecognized_format;
1331
1332 // Read the version number. Note - the GCC reader does not validate this
1333 // version, but the profile creator generates v704.
1334 GCOV::GCOVVersion version;
1335 if (!GcovBuffer.readGCOVVersion(version))
1336 return sampleprof_error::unrecognized_format;
1337
1338 if (version != GCOV::V407)
1339 return sampleprof_error::unsupported_version;
1340
1341 // Skip the empty integer.
1342 if (std::error_code EC = skipNextWord())
1343 return EC;
1344
1345 return sampleprof_error::success;
1346 }
1347
readSectionTag(uint32_t Expected)1348 std::error_code SampleProfileReaderGCC::readSectionTag(uint32_t Expected) {
1349 uint32_t Tag;
1350 if (!GcovBuffer.readInt(Tag))
1351 return sampleprof_error::truncated;
1352
1353 if (Tag != Expected)
1354 return sampleprof_error::malformed;
1355
1356 if (std::error_code EC = skipNextWord())
1357 return EC;
1358
1359 return sampleprof_error::success;
1360 }
1361
readNameTable()1362 std::error_code SampleProfileReaderGCC::readNameTable() {
1363 if (std::error_code EC = readSectionTag(GCOVTagAFDOFileNames))
1364 return EC;
1365
1366 uint32_t Size;
1367 if (!GcovBuffer.readInt(Size))
1368 return sampleprof_error::truncated;
1369
1370 for (uint32_t I = 0; I < Size; ++I) {
1371 StringRef Str;
1372 if (!GcovBuffer.readString(Str))
1373 return sampleprof_error::truncated;
1374 Names.push_back(std::string(Str));
1375 }
1376
1377 return sampleprof_error::success;
1378 }
1379
readFunctionProfiles()1380 std::error_code SampleProfileReaderGCC::readFunctionProfiles() {
1381 if (std::error_code EC = readSectionTag(GCOVTagAFDOFunction))
1382 return EC;
1383
1384 uint32_t NumFunctions;
1385 if (!GcovBuffer.readInt(NumFunctions))
1386 return sampleprof_error::truncated;
1387
1388 InlineCallStack Stack;
1389 for (uint32_t I = 0; I < NumFunctions; ++I)
1390 if (std::error_code EC = readOneFunctionProfile(Stack, true, 0))
1391 return EC;
1392
1393 computeSummary();
1394 return sampleprof_error::success;
1395 }
1396
readOneFunctionProfile(const InlineCallStack & InlineStack,bool Update,uint32_t Offset)1397 std::error_code SampleProfileReaderGCC::readOneFunctionProfile(
1398 const InlineCallStack &InlineStack, bool Update, uint32_t Offset) {
1399 uint64_t HeadCount = 0;
1400 if (InlineStack.size() == 0)
1401 if (!GcovBuffer.readInt64(HeadCount))
1402 return sampleprof_error::truncated;
1403
1404 uint32_t NameIdx;
1405 if (!GcovBuffer.readInt(NameIdx))
1406 return sampleprof_error::truncated;
1407
1408 StringRef Name(Names[NameIdx]);
1409
1410 uint32_t NumPosCounts;
1411 if (!GcovBuffer.readInt(NumPosCounts))
1412 return sampleprof_error::truncated;
1413
1414 uint32_t NumCallsites;
1415 if (!GcovBuffer.readInt(NumCallsites))
1416 return sampleprof_error::truncated;
1417
1418 FunctionSamples *FProfile = nullptr;
1419 if (InlineStack.size() == 0) {
1420 // If this is a top function that we have already processed, do not
1421 // update its profile again. This happens in the presence of
1422 // function aliases. Since these aliases share the same function
1423 // body, there will be identical replicated profiles for the
1424 // original function. In this case, we simply not bother updating
1425 // the profile of the original function.
1426 FProfile = &Profiles[Name];
1427 FProfile->addHeadSamples(HeadCount);
1428 if (FProfile->getTotalSamples() > 0)
1429 Update = false;
1430 } else {
1431 // Otherwise, we are reading an inlined instance. The top of the
1432 // inline stack contains the profile of the caller. Insert this
1433 // callee in the caller's CallsiteMap.
1434 FunctionSamples *CallerProfile = InlineStack.front();
1435 uint32_t LineOffset = Offset >> 16;
1436 uint32_t Discriminator = Offset & 0xffff;
1437 FProfile = &CallerProfile->functionSamplesAt(
1438 LineLocation(LineOffset, Discriminator))[std::string(Name)];
1439 }
1440 FProfile->setName(Name);
1441
1442 for (uint32_t I = 0; I < NumPosCounts; ++I) {
1443 uint32_t Offset;
1444 if (!GcovBuffer.readInt(Offset))
1445 return sampleprof_error::truncated;
1446
1447 uint32_t NumTargets;
1448 if (!GcovBuffer.readInt(NumTargets))
1449 return sampleprof_error::truncated;
1450
1451 uint64_t Count;
1452 if (!GcovBuffer.readInt64(Count))
1453 return sampleprof_error::truncated;
1454
1455 // The line location is encoded in the offset as:
1456 // high 16 bits: line offset to the start of the function.
1457 // low 16 bits: discriminator.
1458 uint32_t LineOffset = Offset >> 16;
1459 uint32_t Discriminator = Offset & 0xffff;
1460
1461 InlineCallStack NewStack;
1462 NewStack.push_back(FProfile);
1463 llvm::append_range(NewStack, InlineStack);
1464 if (Update) {
1465 // Walk up the inline stack, adding the samples on this line to
1466 // the total sample count of the callers in the chain.
1467 for (auto CallerProfile : NewStack)
1468 CallerProfile->addTotalSamples(Count);
1469
1470 // Update the body samples for the current profile.
1471 FProfile->addBodySamples(LineOffset, Discriminator, Count);
1472 }
1473
1474 // Process the list of functions called at an indirect call site.
1475 // These are all the targets that a function pointer (or virtual
1476 // function) resolved at runtime.
1477 for (uint32_t J = 0; J < NumTargets; J++) {
1478 uint32_t HistVal;
1479 if (!GcovBuffer.readInt(HistVal))
1480 return sampleprof_error::truncated;
1481
1482 if (HistVal != HIST_TYPE_INDIR_CALL_TOPN)
1483 return sampleprof_error::malformed;
1484
1485 uint64_t TargetIdx;
1486 if (!GcovBuffer.readInt64(TargetIdx))
1487 return sampleprof_error::truncated;
1488 StringRef TargetName(Names[TargetIdx]);
1489
1490 uint64_t TargetCount;
1491 if (!GcovBuffer.readInt64(TargetCount))
1492 return sampleprof_error::truncated;
1493
1494 if (Update)
1495 FProfile->addCalledTargetSamples(LineOffset, Discriminator,
1496 TargetName, TargetCount);
1497 }
1498 }
1499
1500 // Process all the inlined callers into the current function. These
1501 // are all the callsites that were inlined into this function.
1502 for (uint32_t I = 0; I < NumCallsites; I++) {
1503 // The offset is encoded as:
1504 // high 16 bits: line offset to the start of the function.
1505 // low 16 bits: discriminator.
1506 uint32_t Offset;
1507 if (!GcovBuffer.readInt(Offset))
1508 return sampleprof_error::truncated;
1509 InlineCallStack NewStack;
1510 NewStack.push_back(FProfile);
1511 llvm::append_range(NewStack, InlineStack);
1512 if (std::error_code EC = readOneFunctionProfile(NewStack, Update, Offset))
1513 return EC;
1514 }
1515
1516 return sampleprof_error::success;
1517 }
1518
1519 /// Read a GCC AutoFDO profile.
1520 ///
1521 /// This format is generated by the Linux Perf conversion tool at
1522 /// https://github.com/google/autofdo.
readImpl()1523 std::error_code SampleProfileReaderGCC::readImpl() {
1524 // Read the string table.
1525 if (std::error_code EC = readNameTable())
1526 return EC;
1527
1528 // Read the source profile.
1529 if (std::error_code EC = readFunctionProfiles())
1530 return EC;
1531
1532 return sampleprof_error::success;
1533 }
1534
hasFormat(const MemoryBuffer & Buffer)1535 bool SampleProfileReaderGCC::hasFormat(const MemoryBuffer &Buffer) {
1536 StringRef Magic(reinterpret_cast<const char *>(Buffer.getBufferStart()));
1537 return Magic == "adcg*704";
1538 }
1539
applyRemapping(LLVMContext & Ctx)1540 void SampleProfileReaderItaniumRemapper::applyRemapping(LLVMContext &Ctx) {
1541 // If the reader uses MD5 to represent string, we can't remap it because
1542 // we don't know what the original function names were.
1543 if (Reader.useMD5()) {
1544 Ctx.diagnose(DiagnosticInfoSampleProfile(
1545 Reader.getBuffer()->getBufferIdentifier(),
1546 "Profile data remapping cannot be applied to profile data "
1547 "in compact format (original mangled names are not available).",
1548 DS_Warning));
1549 return;
1550 }
1551
1552 // CSSPGO-TODO: Remapper is not yet supported.
1553 // We will need to remap the entire context string.
1554 assert(Remappings && "should be initialized while creating remapper");
1555 for (auto &Sample : Reader.getProfiles()) {
1556 DenseSet<StringRef> NamesInSample;
1557 Sample.second.findAllNames(NamesInSample);
1558 for (auto &Name : NamesInSample)
1559 if (auto Key = Remappings->insert(Name))
1560 NameMap.insert({Key, Name});
1561 }
1562
1563 RemappingApplied = true;
1564 }
1565
1566 Optional<StringRef>
lookUpNameInProfile(StringRef Fname)1567 SampleProfileReaderItaniumRemapper::lookUpNameInProfile(StringRef Fname) {
1568 if (auto Key = Remappings->lookup(Fname))
1569 return NameMap.lookup(Key);
1570 return None;
1571 }
1572
1573 /// Prepare a memory buffer for the contents of \p Filename.
1574 ///
1575 /// \returns an error code indicating the status of the buffer.
1576 static ErrorOr<std::unique_ptr<MemoryBuffer>>
setupMemoryBuffer(const Twine & Filename)1577 setupMemoryBuffer(const Twine &Filename) {
1578 auto BufferOrErr = MemoryBuffer::getFileOrSTDIN(Filename, /*IsText=*/true);
1579 if (std::error_code EC = BufferOrErr.getError())
1580 return EC;
1581 auto Buffer = std::move(BufferOrErr.get());
1582
1583 // Sanity check the file.
1584 if (uint64_t(Buffer->getBufferSize()) > std::numeric_limits<uint32_t>::max())
1585 return sampleprof_error::too_large;
1586
1587 return std::move(Buffer);
1588 }
1589
1590 /// Create a sample profile reader based on the format of the input file.
1591 ///
1592 /// \param Filename The file to open.
1593 ///
1594 /// \param C The LLVM context to use to emit diagnostics.
1595 ///
1596 /// \param RemapFilename The file used for profile remapping.
1597 ///
1598 /// \returns an error code indicating the status of the created reader.
1599 ErrorOr<std::unique_ptr<SampleProfileReader>>
create(const std::string Filename,LLVMContext & C,const std::string RemapFilename)1600 SampleProfileReader::create(const std::string Filename, LLVMContext &C,
1601 const std::string RemapFilename) {
1602 auto BufferOrError = setupMemoryBuffer(Filename);
1603 if (std::error_code EC = BufferOrError.getError())
1604 return EC;
1605 return create(BufferOrError.get(), C, RemapFilename);
1606 }
1607
1608 /// Create a sample profile remapper from the given input, to remap the
1609 /// function names in the given profile data.
1610 ///
1611 /// \param Filename The file to open.
1612 ///
1613 /// \param Reader The profile reader the remapper is going to be applied to.
1614 ///
1615 /// \param C The LLVM context to use to emit diagnostics.
1616 ///
1617 /// \returns an error code indicating the status of the created reader.
1618 ErrorOr<std::unique_ptr<SampleProfileReaderItaniumRemapper>>
create(const std::string Filename,SampleProfileReader & Reader,LLVMContext & C)1619 SampleProfileReaderItaniumRemapper::create(const std::string Filename,
1620 SampleProfileReader &Reader,
1621 LLVMContext &C) {
1622 auto BufferOrError = setupMemoryBuffer(Filename);
1623 if (std::error_code EC = BufferOrError.getError())
1624 return EC;
1625 return create(BufferOrError.get(), Reader, C);
1626 }
1627
1628 /// Create a sample profile remapper from the given input, to remap the
1629 /// function names in the given profile data.
1630 ///
1631 /// \param B The memory buffer to create the reader from (assumes ownership).
1632 ///
1633 /// \param C The LLVM context to use to emit diagnostics.
1634 ///
1635 /// \param Reader The profile reader the remapper is going to be applied to.
1636 ///
1637 /// \returns an error code indicating the status of the created reader.
1638 ErrorOr<std::unique_ptr<SampleProfileReaderItaniumRemapper>>
create(std::unique_ptr<MemoryBuffer> & B,SampleProfileReader & Reader,LLVMContext & C)1639 SampleProfileReaderItaniumRemapper::create(std::unique_ptr<MemoryBuffer> &B,
1640 SampleProfileReader &Reader,
1641 LLVMContext &C) {
1642 auto Remappings = std::make_unique<SymbolRemappingReader>();
1643 if (Error E = Remappings->read(*B.get())) {
1644 handleAllErrors(
1645 std::move(E), [&](const SymbolRemappingParseError &ParseError) {
1646 C.diagnose(DiagnosticInfoSampleProfile(B->getBufferIdentifier(),
1647 ParseError.getLineNum(),
1648 ParseError.getMessage()));
1649 });
1650 return sampleprof_error::malformed;
1651 }
1652
1653 return std::make_unique<SampleProfileReaderItaniumRemapper>(
1654 std::move(B), std::move(Remappings), Reader);
1655 }
1656
1657 /// Create a sample profile reader based on the format of the input data.
1658 ///
1659 /// \param B The memory buffer to create the reader from (assumes ownership).
1660 ///
1661 /// \param C The LLVM context to use to emit diagnostics.
1662 ///
1663 /// \param RemapFilename The file used for profile remapping.
1664 ///
1665 /// \returns an error code indicating the status of the created reader.
1666 ErrorOr<std::unique_ptr<SampleProfileReader>>
create(std::unique_ptr<MemoryBuffer> & B,LLVMContext & C,const std::string RemapFilename)1667 SampleProfileReader::create(std::unique_ptr<MemoryBuffer> &B, LLVMContext &C,
1668 const std::string RemapFilename) {
1669 std::unique_ptr<SampleProfileReader> Reader;
1670 if (SampleProfileReaderRawBinary::hasFormat(*B))
1671 Reader.reset(new SampleProfileReaderRawBinary(std::move(B), C));
1672 else if (SampleProfileReaderExtBinary::hasFormat(*B))
1673 Reader.reset(new SampleProfileReaderExtBinary(std::move(B), C));
1674 else if (SampleProfileReaderCompactBinary::hasFormat(*B))
1675 Reader.reset(new SampleProfileReaderCompactBinary(std::move(B), C));
1676 else if (SampleProfileReaderGCC::hasFormat(*B))
1677 Reader.reset(new SampleProfileReaderGCC(std::move(B), C));
1678 else if (SampleProfileReaderText::hasFormat(*B))
1679 Reader.reset(new SampleProfileReaderText(std::move(B), C));
1680 else
1681 return sampleprof_error::unrecognized_format;
1682
1683 if (!RemapFilename.empty()) {
1684 auto ReaderOrErr =
1685 SampleProfileReaderItaniumRemapper::create(RemapFilename, *Reader, C);
1686 if (std::error_code EC = ReaderOrErr.getError()) {
1687 std::string Msg = "Could not create remapper: " + EC.message();
1688 C.diagnose(DiagnosticInfoSampleProfile(RemapFilename, Msg));
1689 return EC;
1690 }
1691 Reader->Remapper = std::move(ReaderOrErr.get());
1692 }
1693
1694 FunctionSamples::Format = Reader->getFormat();
1695 if (std::error_code EC = Reader->readHeader()) {
1696 return EC;
1697 }
1698
1699 return std::move(Reader);
1700 }
1701
1702 // For text and GCC file formats, we compute the summary after reading the
1703 // profile. Binary format has the profile summary in its header.
computeSummary()1704 void SampleProfileReader::computeSummary() {
1705 SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs);
1706 Summary = Builder.computeSummaryForProfiles(Profiles);
1707 }
1708