1 //===- SampleProfReader.cpp - Read LLVM sample profile data ---------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the class that reads LLVM sample profiles. It 10 // supports three file formats: text, binary and gcov. 11 // 12 // The textual representation is useful for debugging and testing purposes. The 13 // binary representation is more compact, resulting in smaller file sizes. 14 // 15 // The gcov encoding is the one generated by GCC's AutoFDO profile creation 16 // tool (https://github.com/google/autofdo) 17 // 18 // All three encodings can be used interchangeably as an input sample profile. 19 // 20 //===----------------------------------------------------------------------===// 21 22 #include "llvm/ProfileData/SampleProfReader.h" 23 #include "llvm/ADT/DenseMap.h" 24 #include "llvm/ADT/STLExtras.h" 25 #include "llvm/ADT/StringRef.h" 26 #include "llvm/IR/Module.h" 27 #include "llvm/IR/ProfileSummary.h" 28 #include "llvm/ProfileData/ProfileCommon.h" 29 #include "llvm/ProfileData/SampleProf.h" 30 #include "llvm/Support/CommandLine.h" 31 #include "llvm/Support/Compression.h" 32 #include "llvm/Support/ErrorOr.h" 33 #include "llvm/Support/JSON.h" 34 #include "llvm/Support/LEB128.h" 35 #include "llvm/Support/LineIterator.h" 36 #include "llvm/Support/MD5.h" 37 #include "llvm/Support/MemoryBuffer.h" 38 #include "llvm/Support/VirtualFileSystem.h" 39 #include "llvm/Support/raw_ostream.h" 40 #include <algorithm> 41 #include <cstddef> 42 #include <cstdint> 43 #include <limits> 44 #include <memory> 45 #include <system_error> 46 #include <vector> 47 48 using namespace llvm; 49 using namespace sampleprof; 50 51 #define DEBUG_TYPE "samplepgo-reader" 52 53 // This internal option specifies if the profile uses FS discriminators. 54 // It only applies to text, and binary format profiles. 55 // For ext-binary format profiles, the flag is set in the summary. 56 static cl::opt<bool> ProfileIsFSDisciminator( 57 "profile-isfs", cl::Hidden, cl::init(false), 58 cl::desc("Profile uses flow sensitive discriminators")); 59 60 /// Dump the function profile for \p FName. 61 /// 62 /// \param FContext Name + context of the function to print. 63 /// \param OS Stream to emit the output to. 64 void SampleProfileReader::dumpFunctionProfile(const FunctionSamples &FS, 65 raw_ostream &OS) { 66 OS << "Function: " << FS.getContext().toString() << ": " << FS; 67 } 68 69 /// Dump all the function profiles found on stream \p OS. 70 void SampleProfileReader::dump(raw_ostream &OS) { 71 std::vector<NameFunctionSamples> V; 72 sortFuncProfiles(Profiles, V); 73 for (const auto &I : V) 74 dumpFunctionProfile(*I.second, OS); 75 } 76 77 static void dumpFunctionProfileJson(const FunctionSamples &S, 78 json::OStream &JOS, bool TopLevel = false) { 79 auto DumpBody = [&](const BodySampleMap &BodySamples) { 80 for (const auto &I : BodySamples) { 81 const LineLocation &Loc = I.first; 82 const SampleRecord &Sample = I.second; 83 JOS.object([&] { 84 JOS.attribute("line", Loc.LineOffset); 85 if (Loc.Discriminator) 86 JOS.attribute("discriminator", Loc.Discriminator); 87 JOS.attribute("samples", Sample.getSamples()); 88 89 auto CallTargets = Sample.getSortedCallTargets(); 90 if (!CallTargets.empty()) { 91 JOS.attributeArray("calls", [&] { 92 for (const auto &J : CallTargets) { 93 JOS.object([&] { 94 JOS.attribute("function", J.first); 95 JOS.attribute("samples", J.second); 96 }); 97 } 98 }); 99 } 100 }); 101 } 102 }; 103 104 auto DumpCallsiteSamples = [&](const CallsiteSampleMap &CallsiteSamples) { 105 for (const auto &I : CallsiteSamples) 106 for (const auto &FS : I.second) { 107 const LineLocation &Loc = I.first; 108 const FunctionSamples &CalleeSamples = FS.second; 109 JOS.object([&] { 110 JOS.attribute("line", Loc.LineOffset); 111 if (Loc.Discriminator) 112 JOS.attribute("discriminator", Loc.Discriminator); 113 JOS.attributeArray( 114 "samples", [&] { dumpFunctionProfileJson(CalleeSamples, JOS); }); 115 }); 116 } 117 }; 118 119 JOS.object([&] { 120 JOS.attribute("name", S.getName()); 121 JOS.attribute("total", S.getTotalSamples()); 122 if (TopLevel) 123 JOS.attribute("head", S.getHeadSamples()); 124 125 const auto &BodySamples = S.getBodySamples(); 126 if (!BodySamples.empty()) 127 JOS.attributeArray("body", [&] { DumpBody(BodySamples); }); 128 129 const auto &CallsiteSamples = S.getCallsiteSamples(); 130 if (!CallsiteSamples.empty()) 131 JOS.attributeArray("callsites", 132 [&] { DumpCallsiteSamples(CallsiteSamples); }); 133 }); 134 } 135 136 /// Dump all the function profiles found on stream \p OS in the JSON format. 137 void SampleProfileReader::dumpJson(raw_ostream &OS) { 138 std::vector<NameFunctionSamples> V; 139 sortFuncProfiles(Profiles, V); 140 json::OStream JOS(OS, 2); 141 JOS.arrayBegin(); 142 for (const auto &F : V) 143 dumpFunctionProfileJson(*F.second, JOS, true); 144 JOS.arrayEnd(); 145 146 // Emit a newline character at the end as json::OStream doesn't emit one. 147 OS << "\n"; 148 } 149 150 /// Parse \p Input as function head. 151 /// 152 /// Parse one line of \p Input, and update function name in \p FName, 153 /// function's total sample count in \p NumSamples, function's entry 154 /// count in \p NumHeadSamples. 155 /// 156 /// \returns true if parsing is successful. 157 static bool ParseHead(const StringRef &Input, StringRef &FName, 158 uint64_t &NumSamples, uint64_t &NumHeadSamples) { 159 if (Input[0] == ' ') 160 return false; 161 size_t n2 = Input.rfind(':'); 162 size_t n1 = Input.rfind(':', n2 - 1); 163 FName = Input.substr(0, n1); 164 if (Input.substr(n1 + 1, n2 - n1 - 1).getAsInteger(10, NumSamples)) 165 return false; 166 if (Input.substr(n2 + 1).getAsInteger(10, NumHeadSamples)) 167 return false; 168 return true; 169 } 170 171 /// Returns true if line offset \p L is legal (only has 16 bits). 172 static bool isOffsetLegal(unsigned L) { return (L & 0xffff) == L; } 173 174 /// Parse \p Input that contains metadata. 175 /// Possible metadata: 176 /// - CFG Checksum information: 177 /// !CFGChecksum: 12345 178 /// - CFG Checksum information: 179 /// !Attributes: 1 180 /// Stores the FunctionHash (a.k.a. CFG Checksum) into \p FunctionHash. 181 static bool parseMetadata(const StringRef &Input, uint64_t &FunctionHash, 182 uint32_t &Attributes) { 183 if (Input.startswith("!CFGChecksum:")) { 184 StringRef CFGInfo = Input.substr(strlen("!CFGChecksum:")).trim(); 185 return !CFGInfo.getAsInteger(10, FunctionHash); 186 } 187 188 if (Input.startswith("!Attributes:")) { 189 StringRef Attrib = Input.substr(strlen("!Attributes:")).trim(); 190 return !Attrib.getAsInteger(10, Attributes); 191 } 192 193 return false; 194 } 195 196 enum class LineType { 197 CallSiteProfile, 198 BodyProfile, 199 Metadata, 200 }; 201 202 /// Parse \p Input as line sample. 203 /// 204 /// \param Input input line. 205 /// \param LineTy Type of this line. 206 /// \param Depth the depth of the inline stack. 207 /// \param NumSamples total samples of the line/inlined callsite. 208 /// \param LineOffset line offset to the start of the function. 209 /// \param Discriminator discriminator of the line. 210 /// \param TargetCountMap map from indirect call target to count. 211 /// \param FunctionHash the function's CFG hash, used by pseudo probe. 212 /// 213 /// returns true if parsing is successful. 214 static bool ParseLine(const StringRef &Input, LineType &LineTy, uint32_t &Depth, 215 uint64_t &NumSamples, uint32_t &LineOffset, 216 uint32_t &Discriminator, StringRef &CalleeName, 217 DenseMap<StringRef, uint64_t> &TargetCountMap, 218 uint64_t &FunctionHash, uint32_t &Attributes) { 219 for (Depth = 0; Input[Depth] == ' '; Depth++) 220 ; 221 if (Depth == 0) 222 return false; 223 224 if (Input[Depth] == '!') { 225 LineTy = LineType::Metadata; 226 return parseMetadata(Input.substr(Depth), FunctionHash, Attributes); 227 } 228 229 size_t n1 = Input.find(':'); 230 StringRef Loc = Input.substr(Depth, n1 - Depth); 231 size_t n2 = Loc.find('.'); 232 if (n2 == StringRef::npos) { 233 if (Loc.getAsInteger(10, LineOffset) || !isOffsetLegal(LineOffset)) 234 return false; 235 Discriminator = 0; 236 } else { 237 if (Loc.substr(0, n2).getAsInteger(10, LineOffset)) 238 return false; 239 if (Loc.substr(n2 + 1).getAsInteger(10, Discriminator)) 240 return false; 241 } 242 243 StringRef Rest = Input.substr(n1 + 2); 244 if (isDigit(Rest[0])) { 245 LineTy = LineType::BodyProfile; 246 size_t n3 = Rest.find(' '); 247 if (n3 == StringRef::npos) { 248 if (Rest.getAsInteger(10, NumSamples)) 249 return false; 250 } else { 251 if (Rest.substr(0, n3).getAsInteger(10, NumSamples)) 252 return false; 253 } 254 // Find call targets and their sample counts. 255 // Note: In some cases, there are symbols in the profile which are not 256 // mangled. To accommodate such cases, use colon + integer pairs as the 257 // anchor points. 258 // An example: 259 // _M_construct<char *>:1000 string_view<std::allocator<char> >:437 260 // ":1000" and ":437" are used as anchor points so the string above will 261 // be interpreted as 262 // target: _M_construct<char *> 263 // count: 1000 264 // target: string_view<std::allocator<char> > 265 // count: 437 266 while (n3 != StringRef::npos) { 267 n3 += Rest.substr(n3).find_first_not_of(' '); 268 Rest = Rest.substr(n3); 269 n3 = Rest.find_first_of(':'); 270 if (n3 == StringRef::npos || n3 == 0) 271 return false; 272 273 StringRef Target; 274 uint64_t count, n4; 275 while (true) { 276 // Get the segment after the current colon. 277 StringRef AfterColon = Rest.substr(n3 + 1); 278 // Get the target symbol before the current colon. 279 Target = Rest.substr(0, n3); 280 // Check if the word after the current colon is an integer. 281 n4 = AfterColon.find_first_of(' '); 282 n4 = (n4 != StringRef::npos) ? n3 + n4 + 1 : Rest.size(); 283 StringRef WordAfterColon = Rest.substr(n3 + 1, n4 - n3 - 1); 284 if (!WordAfterColon.getAsInteger(10, count)) 285 break; 286 287 // Try to find the next colon. 288 uint64_t n5 = AfterColon.find_first_of(':'); 289 if (n5 == StringRef::npos) 290 return false; 291 n3 += n5 + 1; 292 } 293 294 // An anchor point is found. Save the {target, count} pair 295 TargetCountMap[Target] = count; 296 if (n4 == Rest.size()) 297 break; 298 // Change n3 to the next blank space after colon + integer pair. 299 n3 = n4; 300 } 301 } else { 302 LineTy = LineType::CallSiteProfile; 303 size_t n3 = Rest.find_last_of(':'); 304 CalleeName = Rest.substr(0, n3); 305 if (Rest.substr(n3 + 1).getAsInteger(10, NumSamples)) 306 return false; 307 } 308 return true; 309 } 310 311 /// Load samples from a text file. 312 /// 313 /// See the documentation at the top of the file for an explanation of 314 /// the expected format. 315 /// 316 /// \returns true if the file was loaded successfully, false otherwise. 317 std::error_code SampleProfileReaderText::readImpl() { 318 line_iterator LineIt(*Buffer, /*SkipBlanks=*/true, '#'); 319 sampleprof_error Result = sampleprof_error::success; 320 321 InlineCallStack InlineStack; 322 uint32_t TopLevelProbeProfileCount = 0; 323 324 // DepthMetadata tracks whether we have processed metadata for the current 325 // top-level or nested function profile. 326 uint32_t DepthMetadata = 0; 327 328 ProfileIsFS = ProfileIsFSDisciminator; 329 FunctionSamples::ProfileIsFS = ProfileIsFS; 330 for (; !LineIt.is_at_eof(); ++LineIt) { 331 size_t pos = LineIt->find_first_not_of(' '); 332 if (pos == LineIt->npos || (*LineIt)[pos] == '#') 333 continue; 334 // Read the header of each function. 335 // 336 // Note that for function identifiers we are actually expecting 337 // mangled names, but we may not always get them. This happens when 338 // the compiler decides not to emit the function (e.g., it was inlined 339 // and removed). In this case, the binary will not have the linkage 340 // name for the function, so the profiler will emit the function's 341 // unmangled name, which may contain characters like ':' and '>' in its 342 // name (member functions, templates, etc). 343 // 344 // The only requirement we place on the identifier, then, is that it 345 // should not begin with a number. 346 if ((*LineIt)[0] != ' ') { 347 uint64_t NumSamples, NumHeadSamples; 348 StringRef FName; 349 if (!ParseHead(*LineIt, FName, NumSamples, NumHeadSamples)) { 350 reportError(LineIt.line_number(), 351 "Expected 'mangled_name:NUM:NUM', found " + *LineIt); 352 return sampleprof_error::malformed; 353 } 354 DepthMetadata = 0; 355 SampleContext FContext(FName, CSNameTable); 356 if (FContext.hasContext()) 357 ++CSProfileCount; 358 FunctionSamples &FProfile = Profiles.Create(FContext); 359 MergeResult(Result, FProfile.addTotalSamples(NumSamples)); 360 MergeResult(Result, FProfile.addHeadSamples(NumHeadSamples)); 361 InlineStack.clear(); 362 InlineStack.push_back(&FProfile); 363 } else { 364 uint64_t NumSamples; 365 StringRef FName; 366 DenseMap<StringRef, uint64_t> TargetCountMap; 367 uint32_t Depth, LineOffset, Discriminator; 368 LineType LineTy; 369 uint64_t FunctionHash = 0; 370 uint32_t Attributes = 0; 371 if (!ParseLine(*LineIt, LineTy, Depth, NumSamples, LineOffset, 372 Discriminator, FName, TargetCountMap, FunctionHash, 373 Attributes)) { 374 reportError(LineIt.line_number(), 375 "Expected 'NUM[.NUM]: NUM[ mangled_name:NUM]*', found " + 376 *LineIt); 377 return sampleprof_error::malformed; 378 } 379 if (LineTy != LineType::Metadata && Depth == DepthMetadata) { 380 // Metadata must be put at the end of a function profile. 381 reportError(LineIt.line_number(), 382 "Found non-metadata after metadata: " + *LineIt); 383 return sampleprof_error::malformed; 384 } 385 386 // Here we handle FS discriminators. 387 Discriminator &= getDiscriminatorMask(); 388 389 while (InlineStack.size() > Depth) { 390 InlineStack.pop_back(); 391 } 392 switch (LineTy) { 393 case LineType::CallSiteProfile: { 394 FunctionSamples &FSamples = InlineStack.back()->functionSamplesAt( 395 LineLocation(LineOffset, Discriminator))[std::string(FName)]; 396 FSamples.setName(FName); 397 MergeResult(Result, FSamples.addTotalSamples(NumSamples)); 398 InlineStack.push_back(&FSamples); 399 DepthMetadata = 0; 400 break; 401 } 402 case LineType::BodyProfile: { 403 while (InlineStack.size() > Depth) { 404 InlineStack.pop_back(); 405 } 406 FunctionSamples &FProfile = *InlineStack.back(); 407 for (const auto &name_count : TargetCountMap) { 408 MergeResult(Result, FProfile.addCalledTargetSamples( 409 LineOffset, Discriminator, name_count.first, 410 name_count.second)); 411 } 412 MergeResult(Result, FProfile.addBodySamples(LineOffset, Discriminator, 413 NumSamples)); 414 break; 415 } 416 case LineType::Metadata: { 417 FunctionSamples &FProfile = *InlineStack.back(); 418 if (FunctionHash) { 419 FProfile.setFunctionHash(FunctionHash); 420 if (Depth == 1) 421 ++TopLevelProbeProfileCount; 422 } 423 FProfile.getContext().setAllAttributes(Attributes); 424 if (Attributes & (uint32_t)ContextShouldBeInlined) 425 ProfileIsPreInlined = true; 426 DepthMetadata = Depth; 427 break; 428 } 429 } 430 } 431 } 432 433 assert((CSProfileCount == 0 || CSProfileCount == Profiles.size()) && 434 "Cannot have both context-sensitive and regular profile"); 435 ProfileIsCS = (CSProfileCount > 0); 436 assert((TopLevelProbeProfileCount == 0 || 437 TopLevelProbeProfileCount == Profiles.size()) && 438 "Cannot have both probe-based profiles and regular profiles"); 439 ProfileIsProbeBased = (TopLevelProbeProfileCount > 0); 440 FunctionSamples::ProfileIsProbeBased = ProfileIsProbeBased; 441 FunctionSamples::ProfileIsCS = ProfileIsCS; 442 FunctionSamples::ProfileIsPreInlined = ProfileIsPreInlined; 443 444 if (Result == sampleprof_error::success) 445 computeSummary(); 446 447 return Result; 448 } 449 450 bool SampleProfileReaderText::hasFormat(const MemoryBuffer &Buffer) { 451 bool result = false; 452 453 // Check that the first non-comment line is a valid function header. 454 line_iterator LineIt(Buffer, /*SkipBlanks=*/true, '#'); 455 if (!LineIt.is_at_eof()) { 456 if ((*LineIt)[0] != ' ') { 457 uint64_t NumSamples, NumHeadSamples; 458 StringRef FName; 459 result = ParseHead(*LineIt, FName, NumSamples, NumHeadSamples); 460 } 461 } 462 463 return result; 464 } 465 466 template <typename T> ErrorOr<T> SampleProfileReaderBinary::readNumber() { 467 unsigned NumBytesRead = 0; 468 std::error_code EC; 469 uint64_t Val = decodeULEB128(Data, &NumBytesRead); 470 471 if (Val > std::numeric_limits<T>::max()) 472 EC = sampleprof_error::malformed; 473 else if (Data + NumBytesRead > End) 474 EC = sampleprof_error::truncated; 475 else 476 EC = sampleprof_error::success; 477 478 if (EC) { 479 reportError(0, EC.message()); 480 return EC; 481 } 482 483 Data += NumBytesRead; 484 return static_cast<T>(Val); 485 } 486 487 ErrorOr<StringRef> SampleProfileReaderBinary::readString() { 488 std::error_code EC; 489 StringRef Str(reinterpret_cast<const char *>(Data)); 490 if (Data + Str.size() + 1 > End) { 491 EC = sampleprof_error::truncated; 492 reportError(0, EC.message()); 493 return EC; 494 } 495 496 Data += Str.size() + 1; 497 return Str; 498 } 499 500 template <typename T> 501 ErrorOr<T> SampleProfileReaderBinary::readUnencodedNumber() { 502 std::error_code EC; 503 504 if (Data + sizeof(T) > End) { 505 EC = sampleprof_error::truncated; 506 reportError(0, EC.message()); 507 return EC; 508 } 509 510 using namespace support; 511 T Val = endian::readNext<T, little, unaligned>(Data); 512 return Val; 513 } 514 515 template <typename T> 516 inline ErrorOr<size_t> SampleProfileReaderBinary::readStringIndex(T &Table) { 517 std::error_code EC; 518 auto Idx = readNumber<size_t>(); 519 if (std::error_code EC = Idx.getError()) 520 return EC; 521 if (*Idx >= Table.size()) 522 return sampleprof_error::truncated_name_table; 523 return *Idx; 524 } 525 526 ErrorOr<StringRef> 527 SampleProfileReaderBinary::readStringFromTable(size_t *RetIdx) { 528 auto Idx = readStringIndex(NameTable); 529 if (std::error_code EC = Idx.getError()) 530 return EC; 531 532 // Lazy loading, if the string has not been materialized from memory storing 533 // MD5 values, then it is default initialized with the null pointer. This can 534 // only happen when using fixed length MD5, that bounds check is performed 535 // while parsing the name table to ensure MD5NameMemStart points to an array 536 // with enough MD5 entries. 537 StringRef &SR = NameTable[*Idx]; 538 if (!SR.data()) { 539 assert(MD5NameMemStart); 540 using namespace support; 541 uint64_t FID = endian::read<uint64_t, little, unaligned>( 542 MD5NameMemStart + (*Idx) * sizeof(uint64_t)); 543 SR = MD5StringBuf.emplace_back(std::to_string(FID)); 544 } 545 if (RetIdx) 546 *RetIdx = *Idx; 547 return SR; 548 } 549 550 ErrorOr<SampleContextFrames> 551 SampleProfileReaderBinary::readContextFromTable(size_t *RetIdx) { 552 auto ContextIdx = readNumber<size_t>(); 553 if (std::error_code EC = ContextIdx.getError()) 554 return EC; 555 if (*ContextIdx >= CSNameTable.size()) 556 return sampleprof_error::truncated_name_table; 557 if (RetIdx) 558 *RetIdx = *ContextIdx; 559 return CSNameTable[*ContextIdx]; 560 } 561 562 ErrorOr<std::pair<SampleContext, hash_code>> 563 SampleProfileReaderBinary::readSampleContextFromTable() { 564 SampleContext Context; 565 size_t Idx; 566 if (ProfileIsCS) { 567 auto FContext(readContextFromTable(&Idx)); 568 if (std::error_code EC = FContext.getError()) 569 return EC; 570 Context = SampleContext(*FContext); 571 } else { 572 auto FName(readStringFromTable(&Idx)); 573 if (std::error_code EC = FName.getError()) 574 return EC; 575 Context = SampleContext(*FName); 576 } 577 hash_code Hash = MD5SampleContextStart[Idx]; 578 // Lazy computing of hash value, write back to the table to cache it. Only 579 // compute the context's hash value if it is being referenced for the first 580 // time. 581 if (Hash == hash_code(0)) { 582 assert(MD5SampleContextStart == MD5SampleContextTable.data()); 583 Hash = Context.getHashCode(); 584 MD5SampleContextTable[Idx] = Hash; 585 } 586 return std::make_pair(Context, Hash); 587 } 588 589 std::error_code 590 SampleProfileReaderBinary::readProfile(FunctionSamples &FProfile) { 591 auto NumSamples = readNumber<uint64_t>(); 592 if (std::error_code EC = NumSamples.getError()) 593 return EC; 594 FProfile.addTotalSamples(*NumSamples); 595 596 // Read the samples in the body. 597 auto NumRecords = readNumber<uint32_t>(); 598 if (std::error_code EC = NumRecords.getError()) 599 return EC; 600 601 for (uint32_t I = 0; I < *NumRecords; ++I) { 602 auto LineOffset = readNumber<uint64_t>(); 603 if (std::error_code EC = LineOffset.getError()) 604 return EC; 605 606 if (!isOffsetLegal(*LineOffset)) { 607 return std::error_code(); 608 } 609 610 auto Discriminator = readNumber<uint64_t>(); 611 if (std::error_code EC = Discriminator.getError()) 612 return EC; 613 614 auto NumSamples = readNumber<uint64_t>(); 615 if (std::error_code EC = NumSamples.getError()) 616 return EC; 617 618 auto NumCalls = readNumber<uint32_t>(); 619 if (std::error_code EC = NumCalls.getError()) 620 return EC; 621 622 // Here we handle FS discriminators: 623 uint32_t DiscriminatorVal = (*Discriminator) & getDiscriminatorMask(); 624 625 for (uint32_t J = 0; J < *NumCalls; ++J) { 626 auto CalledFunction(readStringFromTable()); 627 if (std::error_code EC = CalledFunction.getError()) 628 return EC; 629 630 auto CalledFunctionSamples = readNumber<uint64_t>(); 631 if (std::error_code EC = CalledFunctionSamples.getError()) 632 return EC; 633 634 FProfile.addCalledTargetSamples(*LineOffset, DiscriminatorVal, 635 *CalledFunction, *CalledFunctionSamples); 636 } 637 638 FProfile.addBodySamples(*LineOffset, DiscriminatorVal, *NumSamples); 639 } 640 641 // Read all the samples for inlined function calls. 642 auto NumCallsites = readNumber<uint32_t>(); 643 if (std::error_code EC = NumCallsites.getError()) 644 return EC; 645 646 for (uint32_t J = 0; J < *NumCallsites; ++J) { 647 auto LineOffset = readNumber<uint64_t>(); 648 if (std::error_code EC = LineOffset.getError()) 649 return EC; 650 651 auto Discriminator = readNumber<uint64_t>(); 652 if (std::error_code EC = Discriminator.getError()) 653 return EC; 654 655 auto FName(readStringFromTable()); 656 if (std::error_code EC = FName.getError()) 657 return EC; 658 659 // Here we handle FS discriminators: 660 uint32_t DiscriminatorVal = (*Discriminator) & getDiscriminatorMask(); 661 662 FunctionSamples &CalleeProfile = FProfile.functionSamplesAt( 663 LineLocation(*LineOffset, DiscriminatorVal))[std::string(*FName)]; 664 CalleeProfile.setName(*FName); 665 if (std::error_code EC = readProfile(CalleeProfile)) 666 return EC; 667 } 668 669 return sampleprof_error::success; 670 } 671 672 std::error_code 673 SampleProfileReaderBinary::readFuncProfile(const uint8_t *Start) { 674 Data = Start; 675 auto NumHeadSamples = readNumber<uint64_t>(); 676 if (std::error_code EC = NumHeadSamples.getError()) 677 return EC; 678 679 auto FContextHash(readSampleContextFromTable()); 680 if (std::error_code EC = FContextHash.getError()) 681 return EC; 682 683 auto &[FContext, Hash] = *FContextHash; 684 // Use the cached hash value for insertion instead of recalculating it. 685 auto Res = Profiles.try_emplace(Hash, FContext, FunctionSamples()); 686 FunctionSamples &FProfile = Res.first->second; 687 FProfile.setContext(FContext); 688 FProfile.addHeadSamples(*NumHeadSamples); 689 690 if (FContext.hasContext()) 691 CSProfileCount++; 692 693 if (std::error_code EC = readProfile(FProfile)) 694 return EC; 695 return sampleprof_error::success; 696 } 697 698 std::error_code SampleProfileReaderBinary::readImpl() { 699 ProfileIsFS = ProfileIsFSDisciminator; 700 FunctionSamples::ProfileIsFS = ProfileIsFS; 701 while (Data < End) { 702 if (std::error_code EC = readFuncProfile(Data)) 703 return EC; 704 } 705 706 return sampleprof_error::success; 707 } 708 709 std::error_code SampleProfileReaderExtBinaryBase::readOneSection( 710 const uint8_t *Start, uint64_t Size, const SecHdrTableEntry &Entry) { 711 Data = Start; 712 End = Start + Size; 713 switch (Entry.Type) { 714 case SecProfSummary: 715 if (std::error_code EC = readSummary()) 716 return EC; 717 if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagPartial)) 718 Summary->setPartialProfile(true); 719 if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFullContext)) 720 FunctionSamples::ProfileIsCS = ProfileIsCS = true; 721 if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagIsPreInlined)) 722 FunctionSamples::ProfileIsPreInlined = ProfileIsPreInlined = true; 723 if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFSDiscriminator)) 724 FunctionSamples::ProfileIsFS = ProfileIsFS = true; 725 break; 726 case SecNameTable: { 727 bool FixedLengthMD5 = 728 hasSecFlag(Entry, SecNameTableFlags::SecFlagFixedLengthMD5); 729 bool UseMD5 = hasSecFlag(Entry, SecNameTableFlags::SecFlagMD5Name); 730 // UseMD5 means if THIS section uses MD5, ProfileIsMD5 means if the entire 731 // profile uses MD5 for function name matching in IPO passes. 732 ProfileIsMD5 = ProfileIsMD5 || UseMD5; 733 FunctionSamples::HasUniqSuffix = 734 hasSecFlag(Entry, SecNameTableFlags::SecFlagUniqSuffix); 735 if (std::error_code EC = readNameTableSec(UseMD5, FixedLengthMD5)) 736 return EC; 737 break; 738 } 739 case SecCSNameTable: { 740 if (std::error_code EC = readCSNameTableSec()) 741 return EC; 742 break; 743 } 744 case SecLBRProfile: 745 if (std::error_code EC = readFuncProfiles()) 746 return EC; 747 break; 748 case SecFuncOffsetTable: 749 // If module is absent, we are using LLVM tools, and need to read all 750 // profiles, so skip reading the function offset table. 751 if (!M) { 752 Data = End; 753 } else { 754 assert((!ProfileIsCS || 755 hasSecFlag(Entry, SecFuncOffsetFlags::SecFlagOrdered)) && 756 "func offset table should always be sorted in CS profile"); 757 if (std::error_code EC = readFuncOffsetTable()) 758 return EC; 759 } 760 break; 761 case SecFuncMetadata: { 762 ProfileIsProbeBased = 763 hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagIsProbeBased); 764 FunctionSamples::ProfileIsProbeBased = ProfileIsProbeBased; 765 bool HasAttribute = 766 hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagHasAttribute); 767 if (std::error_code EC = readFuncMetadata(HasAttribute)) 768 return EC; 769 break; 770 } 771 case SecProfileSymbolList: 772 if (std::error_code EC = readProfileSymbolList()) 773 return EC; 774 break; 775 default: 776 if (std::error_code EC = readCustomSection(Entry)) 777 return EC; 778 break; 779 } 780 return sampleprof_error::success; 781 } 782 783 bool SampleProfileReaderExtBinaryBase::useFuncOffsetList() const { 784 // If profile is CS, the function offset section is expected to consist of 785 // sequences of contexts in pre-order layout 786 // (e.g. [A, A:1 @ B, A:1 @ B:2.3 @ C] [D, D:1 @ E]), so that when a matched 787 // context in the module is found, the profiles of all its callees are 788 // recursively loaded. A list is needed since the order of profiles matters. 789 if (ProfileIsCS) 790 return true; 791 792 // If the profile is MD5, use the map container to lookup functions in 793 // the module. A remapper has no use on MD5 names. 794 if (useMD5()) 795 return false; 796 797 // Profile is not MD5 and if a remapper is present, the remapped name of 798 // every function needed to be matched against the module, so use the list 799 // container since each entry is accessed. 800 if (Remapper) 801 return true; 802 803 // Otherwise use the map container for faster lookup. 804 // TODO: If the cardinality of the function offset section is much smaller 805 // than the number of functions in the module, using the list container can 806 // be always faster, but we need to figure out the constant factor to 807 // determine the cutoff. 808 return false; 809 } 810 811 812 bool SampleProfileReaderExtBinaryBase::collectFuncsFromModule() { 813 if (!M) 814 return false; 815 FuncsToUse.clear(); 816 for (auto &F : *M) 817 FuncsToUse.insert(FunctionSamples::getCanonicalFnName(F)); 818 return true; 819 } 820 821 std::error_code SampleProfileReaderExtBinaryBase::readFuncOffsetTable() { 822 // If there are more than one function offset section, the profile associated 823 // with the previous section has to be done reading before next one is read. 824 FuncOffsetTable.clear(); 825 FuncOffsetList.clear(); 826 827 auto Size = readNumber<uint64_t>(); 828 if (std::error_code EC = Size.getError()) 829 return EC; 830 831 bool UseFuncOffsetList = useFuncOffsetList(); 832 if (UseFuncOffsetList) 833 FuncOffsetList.reserve(*Size); 834 else 835 FuncOffsetTable.reserve(*Size); 836 837 for (uint64_t I = 0; I < *Size; ++I) { 838 auto FContextHash(readSampleContextFromTable()); 839 if (std::error_code EC = FContextHash.getError()) 840 return EC; 841 842 auto &[FContext, Hash] = *FContextHash; 843 auto Offset = readNumber<uint64_t>(); 844 if (std::error_code EC = Offset.getError()) 845 return EC; 846 847 if (UseFuncOffsetList) 848 FuncOffsetList.emplace_back(FContext, *Offset); 849 else 850 // Because Porfiles replace existing value with new value if collision 851 // happens, we also use the latest offset so that they are consistent. 852 FuncOffsetTable[Hash] = *Offset; 853 } 854 855 return sampleprof_error::success; 856 } 857 858 std::error_code SampleProfileReaderExtBinaryBase::readFuncProfiles() { 859 // Collect functions used by current module if the Reader has been 860 // given a module. 861 // collectFuncsFromModule uses FunctionSamples::getCanonicalFnName 862 // which will query FunctionSamples::HasUniqSuffix, so it has to be 863 // called after FunctionSamples::HasUniqSuffix is set, i.e. after 864 // NameTable section is read. 865 bool LoadFuncsToBeUsed = collectFuncsFromModule(); 866 867 // When LoadFuncsToBeUsed is false, we are using LLVM tool, need to read all 868 // profiles. 869 const uint8_t *Start = Data; 870 if (!LoadFuncsToBeUsed) { 871 while (Data < End) { 872 if (std::error_code EC = readFuncProfile(Data)) 873 return EC; 874 } 875 assert(Data == End && "More data is read than expected"); 876 } else { 877 // Load function profiles on demand. 878 if (Remapper) { 879 for (auto Name : FuncsToUse) { 880 Remapper->insert(Name); 881 } 882 } 883 884 if (ProfileIsCS) { 885 assert(useFuncOffsetList()); 886 DenseSet<uint64_t> FuncGuidsToUse; 887 if (useMD5()) { 888 for (auto Name : FuncsToUse) 889 FuncGuidsToUse.insert(Function::getGUID(Name)); 890 } 891 892 // For each function in current module, load all context profiles for 893 // the function as well as their callee contexts which can help profile 894 // guided importing for ThinLTO. This can be achieved by walking 895 // through an ordered context container, where contexts are laid out 896 // as if they were walked in preorder of a context trie. While 897 // traversing the trie, a link to the highest common ancestor node is 898 // kept so that all of its decendants will be loaded. 899 const SampleContext *CommonContext = nullptr; 900 for (const auto &NameOffset : FuncOffsetList) { 901 const auto &FContext = NameOffset.first; 902 auto FName = FContext.getName(); 903 // For function in the current module, keep its farthest ancestor 904 // context. This can be used to load itself and its child and 905 // sibling contexts. 906 if ((useMD5() && FuncGuidsToUse.count(std::stoull(FName.data()))) || 907 (!useMD5() && (FuncsToUse.count(FName) || 908 (Remapper && Remapper->exist(FName))))) { 909 if (!CommonContext || !CommonContext->IsPrefixOf(FContext)) 910 CommonContext = &FContext; 911 } 912 913 if (CommonContext == &FContext || 914 (CommonContext && CommonContext->IsPrefixOf(FContext))) { 915 // Load profile for the current context which originated from 916 // the common ancestor. 917 const uint8_t *FuncProfileAddr = Start + NameOffset.second; 918 if (std::error_code EC = readFuncProfile(FuncProfileAddr)) 919 return EC; 920 } 921 } 922 } else if (useMD5()) { 923 assert(!useFuncOffsetList()); 924 for (auto Name : FuncsToUse) { 925 auto GUID = MD5Hash(Name); 926 auto iter = FuncOffsetTable.find(GUID); 927 if (iter == FuncOffsetTable.end()) 928 continue; 929 const uint8_t *FuncProfileAddr = Start + iter->second; 930 if (std::error_code EC = readFuncProfile(FuncProfileAddr)) 931 return EC; 932 } 933 } else if (Remapper) { 934 assert(useFuncOffsetList()); 935 for (auto NameOffset : FuncOffsetList) { 936 SampleContext FContext(NameOffset.first); 937 auto FuncName = FContext.getName(); 938 if (!FuncsToUse.count(FuncName) && !Remapper->exist(FuncName)) 939 continue; 940 const uint8_t *FuncProfileAddr = Start + NameOffset.second; 941 if (std::error_code EC = readFuncProfile(FuncProfileAddr)) 942 return EC; 943 } 944 } else { 945 assert(!useFuncOffsetList()); 946 for (auto Name : FuncsToUse) { 947 auto iter = FuncOffsetTable.find(MD5Hash(Name)); 948 if (iter == FuncOffsetTable.end()) 949 continue; 950 const uint8_t *FuncProfileAddr = Start + iter->second; 951 if (std::error_code EC = readFuncProfile(FuncProfileAddr)) 952 return EC; 953 } 954 } 955 Data = End; 956 } 957 assert((CSProfileCount == 0 || CSProfileCount == Profiles.size()) && 958 "Cannot have both context-sensitive and regular profile"); 959 assert((!CSProfileCount || ProfileIsCS) && 960 "Section flag should be consistent with actual profile"); 961 return sampleprof_error::success; 962 } 963 964 std::error_code SampleProfileReaderExtBinaryBase::readProfileSymbolList() { 965 if (!ProfSymList) 966 ProfSymList = std::make_unique<ProfileSymbolList>(); 967 968 if (std::error_code EC = ProfSymList->read(Data, End - Data)) 969 return EC; 970 971 Data = End; 972 return sampleprof_error::success; 973 } 974 975 std::error_code SampleProfileReaderExtBinaryBase::decompressSection( 976 const uint8_t *SecStart, const uint64_t SecSize, 977 const uint8_t *&DecompressBuf, uint64_t &DecompressBufSize) { 978 Data = SecStart; 979 End = SecStart + SecSize; 980 auto DecompressSize = readNumber<uint64_t>(); 981 if (std::error_code EC = DecompressSize.getError()) 982 return EC; 983 DecompressBufSize = *DecompressSize; 984 985 auto CompressSize = readNumber<uint64_t>(); 986 if (std::error_code EC = CompressSize.getError()) 987 return EC; 988 989 if (!llvm::compression::zlib::isAvailable()) 990 return sampleprof_error::zlib_unavailable; 991 992 uint8_t *Buffer = Allocator.Allocate<uint8_t>(DecompressBufSize); 993 size_t UCSize = DecompressBufSize; 994 llvm::Error E = compression::zlib::decompress(ArrayRef(Data, *CompressSize), 995 Buffer, UCSize); 996 if (E) 997 return sampleprof_error::uncompress_failed; 998 DecompressBuf = reinterpret_cast<const uint8_t *>(Buffer); 999 return sampleprof_error::success; 1000 } 1001 1002 std::error_code SampleProfileReaderExtBinaryBase::readImpl() { 1003 const uint8_t *BufStart = 1004 reinterpret_cast<const uint8_t *>(Buffer->getBufferStart()); 1005 1006 for (auto &Entry : SecHdrTable) { 1007 // Skip empty section. 1008 if (!Entry.Size) 1009 continue; 1010 1011 // Skip sections without context when SkipFlatProf is true. 1012 if (SkipFlatProf && hasSecFlag(Entry, SecCommonFlags::SecFlagFlat)) 1013 continue; 1014 1015 const uint8_t *SecStart = BufStart + Entry.Offset; 1016 uint64_t SecSize = Entry.Size; 1017 1018 // If the section is compressed, decompress it into a buffer 1019 // DecompressBuf before reading the actual data. The pointee of 1020 // 'Data' will be changed to buffer hold by DecompressBuf 1021 // temporarily when reading the actual data. 1022 bool isCompressed = hasSecFlag(Entry, SecCommonFlags::SecFlagCompress); 1023 if (isCompressed) { 1024 const uint8_t *DecompressBuf; 1025 uint64_t DecompressBufSize; 1026 if (std::error_code EC = decompressSection( 1027 SecStart, SecSize, DecompressBuf, DecompressBufSize)) 1028 return EC; 1029 SecStart = DecompressBuf; 1030 SecSize = DecompressBufSize; 1031 } 1032 1033 if (std::error_code EC = readOneSection(SecStart, SecSize, Entry)) 1034 return EC; 1035 if (Data != SecStart + SecSize) 1036 return sampleprof_error::malformed; 1037 1038 // Change the pointee of 'Data' from DecompressBuf to original Buffer. 1039 if (isCompressed) { 1040 Data = BufStart + Entry.Offset; 1041 End = BufStart + Buffer->getBufferSize(); 1042 } 1043 } 1044 1045 return sampleprof_error::success; 1046 } 1047 1048 std::error_code SampleProfileReaderRawBinary::verifySPMagic(uint64_t Magic) { 1049 if (Magic == SPMagic()) 1050 return sampleprof_error::success; 1051 return sampleprof_error::bad_magic; 1052 } 1053 1054 std::error_code SampleProfileReaderExtBinary::verifySPMagic(uint64_t Magic) { 1055 if (Magic == SPMagic(SPF_Ext_Binary)) 1056 return sampleprof_error::success; 1057 return sampleprof_error::bad_magic; 1058 } 1059 1060 std::error_code SampleProfileReaderBinary::readNameTable() { 1061 auto Size = readNumber<size_t>(); 1062 if (std::error_code EC = Size.getError()) 1063 return EC; 1064 1065 // Normally if useMD5 is true, the name table should have MD5 values, not 1066 // strings, however in the case that ExtBinary profile has multiple name 1067 // tables mixing string and MD5, all of them have to be normalized to use MD5, 1068 // because optimization passes can only handle either type. 1069 bool UseMD5 = useMD5(); 1070 if (UseMD5) 1071 MD5StringBuf.reserve(MD5StringBuf.size() + *Size); 1072 1073 NameTable.clear(); 1074 NameTable.reserve(*Size); 1075 if (!ProfileIsCS) { 1076 MD5SampleContextTable.clear(); 1077 if (UseMD5) 1078 MD5SampleContextTable.reserve(*Size); 1079 else 1080 // If we are using strings, delay MD5 computation since only a portion of 1081 // names are used by top level functions. Use 0 to indicate MD5 value is 1082 // to be calculated as no known string has a MD5 value of 0. 1083 MD5SampleContextTable.resize(*Size); 1084 } 1085 for (size_t I = 0; I < *Size; ++I) { 1086 auto Name(readString()); 1087 if (std::error_code EC = Name.getError()) 1088 return EC; 1089 if (UseMD5) { 1090 uint64_t FID = MD5Hash(*Name); 1091 if (!ProfileIsCS) 1092 MD5SampleContextTable.emplace_back(FID); 1093 NameTable.emplace_back(MD5StringBuf.emplace_back(std::to_string(FID))); 1094 } else 1095 NameTable.push_back(*Name); 1096 } 1097 if (!ProfileIsCS) 1098 MD5SampleContextStart = MD5SampleContextTable.data(); 1099 return sampleprof_error::success; 1100 } 1101 1102 std::error_code 1103 SampleProfileReaderExtBinaryBase::readNameTableSec(bool IsMD5, 1104 bool FixedLengthMD5) { 1105 if (FixedLengthMD5) { 1106 if (!IsMD5) 1107 errs() << "If FixedLengthMD5 is true, UseMD5 has to be true"; 1108 auto Size = readNumber<size_t>(); 1109 if (std::error_code EC = Size.getError()) 1110 return EC; 1111 1112 assert(Data + (*Size) * sizeof(uint64_t) == End && 1113 "Fixed length MD5 name table does not contain specified number of " 1114 "entries"); 1115 if (Data + (*Size) * sizeof(uint64_t) > End) 1116 return sampleprof_error::truncated; 1117 1118 // Preallocate and initialize NameTable so we can check whether a name 1119 // index has been read before by checking whether the element in the 1120 // NameTable is empty, meanwhile readStringIndex can do the boundary 1121 // check using the size of NameTable. 1122 MD5StringBuf.reserve(MD5StringBuf.size() + *Size); 1123 NameTable.clear(); 1124 NameTable.resize(*Size); 1125 MD5NameMemStart = Data; 1126 if (!ProfileIsCS) 1127 MD5SampleContextStart = reinterpret_cast<const hash_code *>(Data); 1128 Data = Data + (*Size) * sizeof(uint64_t); 1129 return sampleprof_error::success; 1130 } 1131 1132 if (IsMD5) { 1133 assert(!FixedLengthMD5 && "FixedLengthMD5 should be unreachable here"); 1134 auto Size = readNumber<size_t>(); 1135 if (std::error_code EC = Size.getError()) 1136 return EC; 1137 1138 MD5StringBuf.reserve(MD5StringBuf.size() + *Size); 1139 NameTable.clear(); 1140 NameTable.reserve(*Size); 1141 if (!ProfileIsCS) { 1142 MD5SampleContextTable.clear(); 1143 MD5SampleContextTable.reserve(*Size); 1144 } 1145 for (size_t I = 0; I < *Size; ++I) { 1146 auto FID = readNumber<uint64_t>(); 1147 if (std::error_code EC = FID.getError()) 1148 return EC; 1149 if (!ProfileIsCS) 1150 MD5SampleContextTable.emplace_back(*FID); 1151 NameTable.emplace_back(MD5StringBuf.emplace_back(std::to_string(*FID))); 1152 } 1153 if (!ProfileIsCS) 1154 MD5SampleContextStart = MD5SampleContextTable.data(); 1155 return sampleprof_error::success; 1156 } 1157 1158 return SampleProfileReaderBinary::readNameTable(); 1159 } 1160 1161 // Read in the CS name table section, which basically contains a list of context 1162 // vectors. Each element of a context vector, aka a frame, refers to the 1163 // underlying raw function names that are stored in the name table, as well as 1164 // a callsite identifier that only makes sense for non-leaf frames. 1165 std::error_code SampleProfileReaderExtBinaryBase::readCSNameTableSec() { 1166 auto Size = readNumber<size_t>(); 1167 if (std::error_code EC = Size.getError()) 1168 return EC; 1169 1170 CSNameTable.clear(); 1171 CSNameTable.reserve(*Size); 1172 if (ProfileIsCS) { 1173 // Delay MD5 computation of CS context until they are needed. Use 0 to 1174 // indicate MD5 value is to be calculated as no known string has a MD5 1175 // value of 0. 1176 MD5SampleContextTable.clear(); 1177 MD5SampleContextTable.resize(*Size); 1178 MD5SampleContextStart = MD5SampleContextTable.data(); 1179 } 1180 for (size_t I = 0; I < *Size; ++I) { 1181 CSNameTable.emplace_back(SampleContextFrameVector()); 1182 auto ContextSize = readNumber<uint32_t>(); 1183 if (std::error_code EC = ContextSize.getError()) 1184 return EC; 1185 for (uint32_t J = 0; J < *ContextSize; ++J) { 1186 auto FName(readStringFromTable()); 1187 if (std::error_code EC = FName.getError()) 1188 return EC; 1189 auto LineOffset = readNumber<uint64_t>(); 1190 if (std::error_code EC = LineOffset.getError()) 1191 return EC; 1192 1193 if (!isOffsetLegal(*LineOffset)) 1194 return std::error_code(); 1195 1196 auto Discriminator = readNumber<uint64_t>(); 1197 if (std::error_code EC = Discriminator.getError()) 1198 return EC; 1199 1200 CSNameTable.back().emplace_back( 1201 FName.get(), LineLocation(LineOffset.get(), Discriminator.get())); 1202 } 1203 } 1204 1205 return sampleprof_error::success; 1206 } 1207 1208 std::error_code 1209 SampleProfileReaderExtBinaryBase::readFuncMetadata(bool ProfileHasAttribute, 1210 FunctionSamples *FProfile) { 1211 if (Data < End) { 1212 if (ProfileIsProbeBased) { 1213 auto Checksum = readNumber<uint64_t>(); 1214 if (std::error_code EC = Checksum.getError()) 1215 return EC; 1216 if (FProfile) 1217 FProfile->setFunctionHash(*Checksum); 1218 } 1219 1220 if (ProfileHasAttribute) { 1221 auto Attributes = readNumber<uint32_t>(); 1222 if (std::error_code EC = Attributes.getError()) 1223 return EC; 1224 if (FProfile) 1225 FProfile->getContext().setAllAttributes(*Attributes); 1226 } 1227 1228 if (!ProfileIsCS) { 1229 // Read all the attributes for inlined function calls. 1230 auto NumCallsites = readNumber<uint32_t>(); 1231 if (std::error_code EC = NumCallsites.getError()) 1232 return EC; 1233 1234 for (uint32_t J = 0; J < *NumCallsites; ++J) { 1235 auto LineOffset = readNumber<uint64_t>(); 1236 if (std::error_code EC = LineOffset.getError()) 1237 return EC; 1238 1239 auto Discriminator = readNumber<uint64_t>(); 1240 if (std::error_code EC = Discriminator.getError()) 1241 return EC; 1242 1243 auto FContextHash(readSampleContextFromTable()); 1244 if (std::error_code EC = FContextHash.getError()) 1245 return EC; 1246 1247 auto &[FContext, Hash] = *FContextHash; 1248 FunctionSamples *CalleeProfile = nullptr; 1249 if (FProfile) { 1250 CalleeProfile = const_cast<FunctionSamples *>( 1251 &FProfile->functionSamplesAt(LineLocation( 1252 *LineOffset, 1253 *Discriminator))[std::string(FContext.getName())]); 1254 } 1255 if (std::error_code EC = 1256 readFuncMetadata(ProfileHasAttribute, CalleeProfile)) 1257 return EC; 1258 } 1259 } 1260 } 1261 1262 return sampleprof_error::success; 1263 } 1264 1265 std::error_code 1266 SampleProfileReaderExtBinaryBase::readFuncMetadata(bool ProfileHasAttribute) { 1267 while (Data < End) { 1268 auto FContextHash(readSampleContextFromTable()); 1269 if (std::error_code EC = FContextHash.getError()) 1270 return EC; 1271 auto &[FContext, Hash] = *FContextHash; 1272 FunctionSamples *FProfile = nullptr; 1273 auto It = Profiles.find(FContext); 1274 if (It != Profiles.end()) 1275 FProfile = &It->second; 1276 1277 if (std::error_code EC = readFuncMetadata(ProfileHasAttribute, FProfile)) 1278 return EC; 1279 } 1280 1281 assert(Data == End && "More data is read than expected"); 1282 return sampleprof_error::success; 1283 } 1284 1285 std::error_code 1286 SampleProfileReaderExtBinaryBase::readSecHdrTableEntry(uint64_t Idx) { 1287 SecHdrTableEntry Entry; 1288 auto Type = readUnencodedNumber<uint64_t>(); 1289 if (std::error_code EC = Type.getError()) 1290 return EC; 1291 Entry.Type = static_cast<SecType>(*Type); 1292 1293 auto Flags = readUnencodedNumber<uint64_t>(); 1294 if (std::error_code EC = Flags.getError()) 1295 return EC; 1296 Entry.Flags = *Flags; 1297 1298 auto Offset = readUnencodedNumber<uint64_t>(); 1299 if (std::error_code EC = Offset.getError()) 1300 return EC; 1301 Entry.Offset = *Offset; 1302 1303 auto Size = readUnencodedNumber<uint64_t>(); 1304 if (std::error_code EC = Size.getError()) 1305 return EC; 1306 Entry.Size = *Size; 1307 1308 Entry.LayoutIndex = Idx; 1309 SecHdrTable.push_back(std::move(Entry)); 1310 return sampleprof_error::success; 1311 } 1312 1313 std::error_code SampleProfileReaderExtBinaryBase::readSecHdrTable() { 1314 auto EntryNum = readUnencodedNumber<uint64_t>(); 1315 if (std::error_code EC = EntryNum.getError()) 1316 return EC; 1317 1318 for (uint64_t i = 0; i < (*EntryNum); i++) 1319 if (std::error_code EC = readSecHdrTableEntry(i)) 1320 return EC; 1321 1322 return sampleprof_error::success; 1323 } 1324 1325 std::error_code SampleProfileReaderExtBinaryBase::readHeader() { 1326 const uint8_t *BufStart = 1327 reinterpret_cast<const uint8_t *>(Buffer->getBufferStart()); 1328 Data = BufStart; 1329 End = BufStart + Buffer->getBufferSize(); 1330 1331 if (std::error_code EC = readMagicIdent()) 1332 return EC; 1333 1334 if (std::error_code EC = readSecHdrTable()) 1335 return EC; 1336 1337 return sampleprof_error::success; 1338 } 1339 1340 uint64_t SampleProfileReaderExtBinaryBase::getSectionSize(SecType Type) { 1341 uint64_t Size = 0; 1342 for (auto &Entry : SecHdrTable) { 1343 if (Entry.Type == Type) 1344 Size += Entry.Size; 1345 } 1346 return Size; 1347 } 1348 1349 uint64_t SampleProfileReaderExtBinaryBase::getFileSize() { 1350 // Sections in SecHdrTable is not necessarily in the same order as 1351 // sections in the profile because section like FuncOffsetTable needs 1352 // to be written after section LBRProfile but needs to be read before 1353 // section LBRProfile, so we cannot simply use the last entry in 1354 // SecHdrTable to calculate the file size. 1355 uint64_t FileSize = 0; 1356 for (auto &Entry : SecHdrTable) { 1357 FileSize = std::max(Entry.Offset + Entry.Size, FileSize); 1358 } 1359 return FileSize; 1360 } 1361 1362 static std::string getSecFlagsStr(const SecHdrTableEntry &Entry) { 1363 std::string Flags; 1364 if (hasSecFlag(Entry, SecCommonFlags::SecFlagCompress)) 1365 Flags.append("{compressed,"); 1366 else 1367 Flags.append("{"); 1368 1369 if (hasSecFlag(Entry, SecCommonFlags::SecFlagFlat)) 1370 Flags.append("flat,"); 1371 1372 switch (Entry.Type) { 1373 case SecNameTable: 1374 if (hasSecFlag(Entry, SecNameTableFlags::SecFlagFixedLengthMD5)) 1375 Flags.append("fixlenmd5,"); 1376 else if (hasSecFlag(Entry, SecNameTableFlags::SecFlagMD5Name)) 1377 Flags.append("md5,"); 1378 if (hasSecFlag(Entry, SecNameTableFlags::SecFlagUniqSuffix)) 1379 Flags.append("uniq,"); 1380 break; 1381 case SecProfSummary: 1382 if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagPartial)) 1383 Flags.append("partial,"); 1384 if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFullContext)) 1385 Flags.append("context,"); 1386 if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagIsPreInlined)) 1387 Flags.append("preInlined,"); 1388 if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFSDiscriminator)) 1389 Flags.append("fs-discriminator,"); 1390 break; 1391 case SecFuncOffsetTable: 1392 if (hasSecFlag(Entry, SecFuncOffsetFlags::SecFlagOrdered)) 1393 Flags.append("ordered,"); 1394 break; 1395 case SecFuncMetadata: 1396 if (hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagIsProbeBased)) 1397 Flags.append("probe,"); 1398 if (hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagHasAttribute)) 1399 Flags.append("attr,"); 1400 break; 1401 default: 1402 break; 1403 } 1404 char &last = Flags.back(); 1405 if (last == ',') 1406 last = '}'; 1407 else 1408 Flags.append("}"); 1409 return Flags; 1410 } 1411 1412 bool SampleProfileReaderExtBinaryBase::dumpSectionInfo(raw_ostream &OS) { 1413 uint64_t TotalSecsSize = 0; 1414 for (auto &Entry : SecHdrTable) { 1415 OS << getSecName(Entry.Type) << " - Offset: " << Entry.Offset 1416 << ", Size: " << Entry.Size << ", Flags: " << getSecFlagsStr(Entry) 1417 << "\n"; 1418 ; 1419 TotalSecsSize += Entry.Size; 1420 } 1421 uint64_t HeaderSize = SecHdrTable.front().Offset; 1422 assert(HeaderSize + TotalSecsSize == getFileSize() && 1423 "Size of 'header + sections' doesn't match the total size of profile"); 1424 1425 OS << "Header Size: " << HeaderSize << "\n"; 1426 OS << "Total Sections Size: " << TotalSecsSize << "\n"; 1427 OS << "File Size: " << getFileSize() << "\n"; 1428 return true; 1429 } 1430 1431 std::error_code SampleProfileReaderBinary::readMagicIdent() { 1432 // Read and check the magic identifier. 1433 auto Magic = readNumber<uint64_t>(); 1434 if (std::error_code EC = Magic.getError()) 1435 return EC; 1436 else if (std::error_code EC = verifySPMagic(*Magic)) 1437 return EC; 1438 1439 // Read the version number. 1440 auto Version = readNumber<uint64_t>(); 1441 if (std::error_code EC = Version.getError()) 1442 return EC; 1443 else if (*Version != SPVersion()) 1444 return sampleprof_error::unsupported_version; 1445 1446 return sampleprof_error::success; 1447 } 1448 1449 std::error_code SampleProfileReaderBinary::readHeader() { 1450 Data = reinterpret_cast<const uint8_t *>(Buffer->getBufferStart()); 1451 End = Data + Buffer->getBufferSize(); 1452 1453 if (std::error_code EC = readMagicIdent()) 1454 return EC; 1455 1456 if (std::error_code EC = readSummary()) 1457 return EC; 1458 1459 if (std::error_code EC = readNameTable()) 1460 return EC; 1461 return sampleprof_error::success; 1462 } 1463 1464 std::error_code SampleProfileReaderBinary::readSummaryEntry( 1465 std::vector<ProfileSummaryEntry> &Entries) { 1466 auto Cutoff = readNumber<uint64_t>(); 1467 if (std::error_code EC = Cutoff.getError()) 1468 return EC; 1469 1470 auto MinBlockCount = readNumber<uint64_t>(); 1471 if (std::error_code EC = MinBlockCount.getError()) 1472 return EC; 1473 1474 auto NumBlocks = readNumber<uint64_t>(); 1475 if (std::error_code EC = NumBlocks.getError()) 1476 return EC; 1477 1478 Entries.emplace_back(*Cutoff, *MinBlockCount, *NumBlocks); 1479 return sampleprof_error::success; 1480 } 1481 1482 std::error_code SampleProfileReaderBinary::readSummary() { 1483 auto TotalCount = readNumber<uint64_t>(); 1484 if (std::error_code EC = TotalCount.getError()) 1485 return EC; 1486 1487 auto MaxBlockCount = readNumber<uint64_t>(); 1488 if (std::error_code EC = MaxBlockCount.getError()) 1489 return EC; 1490 1491 auto MaxFunctionCount = readNumber<uint64_t>(); 1492 if (std::error_code EC = MaxFunctionCount.getError()) 1493 return EC; 1494 1495 auto NumBlocks = readNumber<uint64_t>(); 1496 if (std::error_code EC = NumBlocks.getError()) 1497 return EC; 1498 1499 auto NumFunctions = readNumber<uint64_t>(); 1500 if (std::error_code EC = NumFunctions.getError()) 1501 return EC; 1502 1503 auto NumSummaryEntries = readNumber<uint64_t>(); 1504 if (std::error_code EC = NumSummaryEntries.getError()) 1505 return EC; 1506 1507 std::vector<ProfileSummaryEntry> Entries; 1508 for (unsigned i = 0; i < *NumSummaryEntries; i++) { 1509 std::error_code EC = readSummaryEntry(Entries); 1510 if (EC != sampleprof_error::success) 1511 return EC; 1512 } 1513 Summary = std::make_unique<ProfileSummary>( 1514 ProfileSummary::PSK_Sample, Entries, *TotalCount, *MaxBlockCount, 0, 1515 *MaxFunctionCount, *NumBlocks, *NumFunctions); 1516 1517 return sampleprof_error::success; 1518 } 1519 1520 bool SampleProfileReaderRawBinary::hasFormat(const MemoryBuffer &Buffer) { 1521 const uint8_t *Data = 1522 reinterpret_cast<const uint8_t *>(Buffer.getBufferStart()); 1523 uint64_t Magic = decodeULEB128(Data); 1524 return Magic == SPMagic(); 1525 } 1526 1527 bool SampleProfileReaderExtBinary::hasFormat(const MemoryBuffer &Buffer) { 1528 const uint8_t *Data = 1529 reinterpret_cast<const uint8_t *>(Buffer.getBufferStart()); 1530 uint64_t Magic = decodeULEB128(Data); 1531 return Magic == SPMagic(SPF_Ext_Binary); 1532 } 1533 1534 std::error_code SampleProfileReaderGCC::skipNextWord() { 1535 uint32_t dummy; 1536 if (!GcovBuffer.readInt(dummy)) 1537 return sampleprof_error::truncated; 1538 return sampleprof_error::success; 1539 } 1540 1541 template <typename T> ErrorOr<T> SampleProfileReaderGCC::readNumber() { 1542 if (sizeof(T) <= sizeof(uint32_t)) { 1543 uint32_t Val; 1544 if (GcovBuffer.readInt(Val) && Val <= std::numeric_limits<T>::max()) 1545 return static_cast<T>(Val); 1546 } else if (sizeof(T) <= sizeof(uint64_t)) { 1547 uint64_t Val; 1548 if (GcovBuffer.readInt64(Val) && Val <= std::numeric_limits<T>::max()) 1549 return static_cast<T>(Val); 1550 } 1551 1552 std::error_code EC = sampleprof_error::malformed; 1553 reportError(0, EC.message()); 1554 return EC; 1555 } 1556 1557 ErrorOr<StringRef> SampleProfileReaderGCC::readString() { 1558 StringRef Str; 1559 if (!GcovBuffer.readString(Str)) 1560 return sampleprof_error::truncated; 1561 return Str; 1562 } 1563 1564 std::error_code SampleProfileReaderGCC::readHeader() { 1565 // Read the magic identifier. 1566 if (!GcovBuffer.readGCDAFormat()) 1567 return sampleprof_error::unrecognized_format; 1568 1569 // Read the version number. Note - the GCC reader does not validate this 1570 // version, but the profile creator generates v704. 1571 GCOV::GCOVVersion version; 1572 if (!GcovBuffer.readGCOVVersion(version)) 1573 return sampleprof_error::unrecognized_format; 1574 1575 if (version != GCOV::V407) 1576 return sampleprof_error::unsupported_version; 1577 1578 // Skip the empty integer. 1579 if (std::error_code EC = skipNextWord()) 1580 return EC; 1581 1582 return sampleprof_error::success; 1583 } 1584 1585 std::error_code SampleProfileReaderGCC::readSectionTag(uint32_t Expected) { 1586 uint32_t Tag; 1587 if (!GcovBuffer.readInt(Tag)) 1588 return sampleprof_error::truncated; 1589 1590 if (Tag != Expected) 1591 return sampleprof_error::malformed; 1592 1593 if (std::error_code EC = skipNextWord()) 1594 return EC; 1595 1596 return sampleprof_error::success; 1597 } 1598 1599 std::error_code SampleProfileReaderGCC::readNameTable() { 1600 if (std::error_code EC = readSectionTag(GCOVTagAFDOFileNames)) 1601 return EC; 1602 1603 uint32_t Size; 1604 if (!GcovBuffer.readInt(Size)) 1605 return sampleprof_error::truncated; 1606 1607 for (uint32_t I = 0; I < Size; ++I) { 1608 StringRef Str; 1609 if (!GcovBuffer.readString(Str)) 1610 return sampleprof_error::truncated; 1611 Names.push_back(std::string(Str)); 1612 } 1613 1614 return sampleprof_error::success; 1615 } 1616 1617 std::error_code SampleProfileReaderGCC::readFunctionProfiles() { 1618 if (std::error_code EC = readSectionTag(GCOVTagAFDOFunction)) 1619 return EC; 1620 1621 uint32_t NumFunctions; 1622 if (!GcovBuffer.readInt(NumFunctions)) 1623 return sampleprof_error::truncated; 1624 1625 InlineCallStack Stack; 1626 for (uint32_t I = 0; I < NumFunctions; ++I) 1627 if (std::error_code EC = readOneFunctionProfile(Stack, true, 0)) 1628 return EC; 1629 1630 computeSummary(); 1631 return sampleprof_error::success; 1632 } 1633 1634 std::error_code SampleProfileReaderGCC::readOneFunctionProfile( 1635 const InlineCallStack &InlineStack, bool Update, uint32_t Offset) { 1636 uint64_t HeadCount = 0; 1637 if (InlineStack.size() == 0) 1638 if (!GcovBuffer.readInt64(HeadCount)) 1639 return sampleprof_error::truncated; 1640 1641 uint32_t NameIdx; 1642 if (!GcovBuffer.readInt(NameIdx)) 1643 return sampleprof_error::truncated; 1644 1645 StringRef Name(Names[NameIdx]); 1646 1647 uint32_t NumPosCounts; 1648 if (!GcovBuffer.readInt(NumPosCounts)) 1649 return sampleprof_error::truncated; 1650 1651 uint32_t NumCallsites; 1652 if (!GcovBuffer.readInt(NumCallsites)) 1653 return sampleprof_error::truncated; 1654 1655 FunctionSamples *FProfile = nullptr; 1656 if (InlineStack.size() == 0) { 1657 // If this is a top function that we have already processed, do not 1658 // update its profile again. This happens in the presence of 1659 // function aliases. Since these aliases share the same function 1660 // body, there will be identical replicated profiles for the 1661 // original function. In this case, we simply not bother updating 1662 // the profile of the original function. 1663 FProfile = &Profiles[Name]; 1664 FProfile->addHeadSamples(HeadCount); 1665 if (FProfile->getTotalSamples() > 0) 1666 Update = false; 1667 } else { 1668 // Otherwise, we are reading an inlined instance. The top of the 1669 // inline stack contains the profile of the caller. Insert this 1670 // callee in the caller's CallsiteMap. 1671 FunctionSamples *CallerProfile = InlineStack.front(); 1672 uint32_t LineOffset = Offset >> 16; 1673 uint32_t Discriminator = Offset & 0xffff; 1674 FProfile = &CallerProfile->functionSamplesAt( 1675 LineLocation(LineOffset, Discriminator))[std::string(Name)]; 1676 } 1677 FProfile->setName(Name); 1678 1679 for (uint32_t I = 0; I < NumPosCounts; ++I) { 1680 uint32_t Offset; 1681 if (!GcovBuffer.readInt(Offset)) 1682 return sampleprof_error::truncated; 1683 1684 uint32_t NumTargets; 1685 if (!GcovBuffer.readInt(NumTargets)) 1686 return sampleprof_error::truncated; 1687 1688 uint64_t Count; 1689 if (!GcovBuffer.readInt64(Count)) 1690 return sampleprof_error::truncated; 1691 1692 // The line location is encoded in the offset as: 1693 // high 16 bits: line offset to the start of the function. 1694 // low 16 bits: discriminator. 1695 uint32_t LineOffset = Offset >> 16; 1696 uint32_t Discriminator = Offset & 0xffff; 1697 1698 InlineCallStack NewStack; 1699 NewStack.push_back(FProfile); 1700 llvm::append_range(NewStack, InlineStack); 1701 if (Update) { 1702 // Walk up the inline stack, adding the samples on this line to 1703 // the total sample count of the callers in the chain. 1704 for (auto *CallerProfile : NewStack) 1705 CallerProfile->addTotalSamples(Count); 1706 1707 // Update the body samples for the current profile. 1708 FProfile->addBodySamples(LineOffset, Discriminator, Count); 1709 } 1710 1711 // Process the list of functions called at an indirect call site. 1712 // These are all the targets that a function pointer (or virtual 1713 // function) resolved at runtime. 1714 for (uint32_t J = 0; J < NumTargets; J++) { 1715 uint32_t HistVal; 1716 if (!GcovBuffer.readInt(HistVal)) 1717 return sampleprof_error::truncated; 1718 1719 if (HistVal != HIST_TYPE_INDIR_CALL_TOPN) 1720 return sampleprof_error::malformed; 1721 1722 uint64_t TargetIdx; 1723 if (!GcovBuffer.readInt64(TargetIdx)) 1724 return sampleprof_error::truncated; 1725 StringRef TargetName(Names[TargetIdx]); 1726 1727 uint64_t TargetCount; 1728 if (!GcovBuffer.readInt64(TargetCount)) 1729 return sampleprof_error::truncated; 1730 1731 if (Update) 1732 FProfile->addCalledTargetSamples(LineOffset, Discriminator, 1733 TargetName, TargetCount); 1734 } 1735 } 1736 1737 // Process all the inlined callers into the current function. These 1738 // are all the callsites that were inlined into this function. 1739 for (uint32_t I = 0; I < NumCallsites; I++) { 1740 // The offset is encoded as: 1741 // high 16 bits: line offset to the start of the function. 1742 // low 16 bits: discriminator. 1743 uint32_t Offset; 1744 if (!GcovBuffer.readInt(Offset)) 1745 return sampleprof_error::truncated; 1746 InlineCallStack NewStack; 1747 NewStack.push_back(FProfile); 1748 llvm::append_range(NewStack, InlineStack); 1749 if (std::error_code EC = readOneFunctionProfile(NewStack, Update, Offset)) 1750 return EC; 1751 } 1752 1753 return sampleprof_error::success; 1754 } 1755 1756 /// Read a GCC AutoFDO profile. 1757 /// 1758 /// This format is generated by the Linux Perf conversion tool at 1759 /// https://github.com/google/autofdo. 1760 std::error_code SampleProfileReaderGCC::readImpl() { 1761 assert(!ProfileIsFSDisciminator && "Gcc profiles not support FSDisciminator"); 1762 // Read the string table. 1763 if (std::error_code EC = readNameTable()) 1764 return EC; 1765 1766 // Read the source profile. 1767 if (std::error_code EC = readFunctionProfiles()) 1768 return EC; 1769 1770 return sampleprof_error::success; 1771 } 1772 1773 bool SampleProfileReaderGCC::hasFormat(const MemoryBuffer &Buffer) { 1774 StringRef Magic(reinterpret_cast<const char *>(Buffer.getBufferStart())); 1775 return Magic == "adcg*704"; 1776 } 1777 1778 void SampleProfileReaderItaniumRemapper::applyRemapping(LLVMContext &Ctx) { 1779 // If the reader uses MD5 to represent string, we can't remap it because 1780 // we don't know what the original function names were. 1781 if (Reader.useMD5()) { 1782 Ctx.diagnose(DiagnosticInfoSampleProfile( 1783 Reader.getBuffer()->getBufferIdentifier(), 1784 "Profile data remapping cannot be applied to profile data " 1785 "using MD5 names (original mangled names are not available).", 1786 DS_Warning)); 1787 return; 1788 } 1789 1790 // CSSPGO-TODO: Remapper is not yet supported. 1791 // We will need to remap the entire context string. 1792 assert(Remappings && "should be initialized while creating remapper"); 1793 for (auto &Sample : Reader.getProfiles()) { 1794 DenseSet<StringRef> NamesInSample; 1795 Sample.second.findAllNames(NamesInSample); 1796 for (auto &Name : NamesInSample) 1797 if (auto Key = Remappings->insert(Name)) 1798 NameMap.insert({Key, Name}); 1799 } 1800 1801 RemappingApplied = true; 1802 } 1803 1804 std::optional<StringRef> 1805 SampleProfileReaderItaniumRemapper::lookUpNameInProfile(StringRef Fname) { 1806 if (auto Key = Remappings->lookup(Fname)) 1807 return NameMap.lookup(Key); 1808 return std::nullopt; 1809 } 1810 1811 /// Prepare a memory buffer for the contents of \p Filename. 1812 /// 1813 /// \returns an error code indicating the status of the buffer. 1814 static ErrorOr<std::unique_ptr<MemoryBuffer>> 1815 setupMemoryBuffer(const Twine &Filename, vfs::FileSystem &FS) { 1816 auto BufferOrErr = Filename.str() == "-" ? MemoryBuffer::getSTDIN() 1817 : FS.getBufferForFile(Filename); 1818 if (std::error_code EC = BufferOrErr.getError()) 1819 return EC; 1820 auto Buffer = std::move(BufferOrErr.get()); 1821 1822 return std::move(Buffer); 1823 } 1824 1825 /// Create a sample profile reader based on the format of the input file. 1826 /// 1827 /// \param Filename The file to open. 1828 /// 1829 /// \param C The LLVM context to use to emit diagnostics. 1830 /// 1831 /// \param P The FSDiscriminatorPass. 1832 /// 1833 /// \param RemapFilename The file used for profile remapping. 1834 /// 1835 /// \returns an error code indicating the status of the created reader. 1836 ErrorOr<std::unique_ptr<SampleProfileReader>> 1837 SampleProfileReader::create(const std::string Filename, LLVMContext &C, 1838 vfs::FileSystem &FS, FSDiscriminatorPass P, 1839 const std::string RemapFilename) { 1840 auto BufferOrError = setupMemoryBuffer(Filename, FS); 1841 if (std::error_code EC = BufferOrError.getError()) 1842 return EC; 1843 return create(BufferOrError.get(), C, FS, P, RemapFilename); 1844 } 1845 1846 /// Create a sample profile remapper from the given input, to remap the 1847 /// function names in the given profile data. 1848 /// 1849 /// \param Filename The file to open. 1850 /// 1851 /// \param Reader The profile reader the remapper is going to be applied to. 1852 /// 1853 /// \param C The LLVM context to use to emit diagnostics. 1854 /// 1855 /// \returns an error code indicating the status of the created reader. 1856 ErrorOr<std::unique_ptr<SampleProfileReaderItaniumRemapper>> 1857 SampleProfileReaderItaniumRemapper::create(const std::string Filename, 1858 vfs::FileSystem &FS, 1859 SampleProfileReader &Reader, 1860 LLVMContext &C) { 1861 auto BufferOrError = setupMemoryBuffer(Filename, FS); 1862 if (std::error_code EC = BufferOrError.getError()) 1863 return EC; 1864 return create(BufferOrError.get(), Reader, C); 1865 } 1866 1867 /// Create a sample profile remapper from the given input, to remap the 1868 /// function names in the given profile data. 1869 /// 1870 /// \param B The memory buffer to create the reader from (assumes ownership). 1871 /// 1872 /// \param C The LLVM context to use to emit diagnostics. 1873 /// 1874 /// \param Reader The profile reader the remapper is going to be applied to. 1875 /// 1876 /// \returns an error code indicating the status of the created reader. 1877 ErrorOr<std::unique_ptr<SampleProfileReaderItaniumRemapper>> 1878 SampleProfileReaderItaniumRemapper::create(std::unique_ptr<MemoryBuffer> &B, 1879 SampleProfileReader &Reader, 1880 LLVMContext &C) { 1881 auto Remappings = std::make_unique<SymbolRemappingReader>(); 1882 if (Error E = Remappings->read(*B)) { 1883 handleAllErrors( 1884 std::move(E), [&](const SymbolRemappingParseError &ParseError) { 1885 C.diagnose(DiagnosticInfoSampleProfile(B->getBufferIdentifier(), 1886 ParseError.getLineNum(), 1887 ParseError.getMessage())); 1888 }); 1889 return sampleprof_error::malformed; 1890 } 1891 1892 return std::make_unique<SampleProfileReaderItaniumRemapper>( 1893 std::move(B), std::move(Remappings), Reader); 1894 } 1895 1896 /// Create a sample profile reader based on the format of the input data. 1897 /// 1898 /// \param B The memory buffer to create the reader from (assumes ownership). 1899 /// 1900 /// \param C The LLVM context to use to emit diagnostics. 1901 /// 1902 /// \param P The FSDiscriminatorPass. 1903 /// 1904 /// \param RemapFilename The file used for profile remapping. 1905 /// 1906 /// \returns an error code indicating the status of the created reader. 1907 ErrorOr<std::unique_ptr<SampleProfileReader>> 1908 SampleProfileReader::create(std::unique_ptr<MemoryBuffer> &B, LLVMContext &C, 1909 vfs::FileSystem &FS, FSDiscriminatorPass P, 1910 const std::string RemapFilename) { 1911 std::unique_ptr<SampleProfileReader> Reader; 1912 if (SampleProfileReaderRawBinary::hasFormat(*B)) 1913 Reader.reset(new SampleProfileReaderRawBinary(std::move(B), C)); 1914 else if (SampleProfileReaderExtBinary::hasFormat(*B)) 1915 Reader.reset(new SampleProfileReaderExtBinary(std::move(B), C)); 1916 else if (SampleProfileReaderGCC::hasFormat(*B)) 1917 Reader.reset(new SampleProfileReaderGCC(std::move(B), C)); 1918 else if (SampleProfileReaderText::hasFormat(*B)) 1919 Reader.reset(new SampleProfileReaderText(std::move(B), C)); 1920 else 1921 return sampleprof_error::unrecognized_format; 1922 1923 if (!RemapFilename.empty()) { 1924 auto ReaderOrErr = SampleProfileReaderItaniumRemapper::create( 1925 RemapFilename, FS, *Reader, C); 1926 if (std::error_code EC = ReaderOrErr.getError()) { 1927 std::string Msg = "Could not create remapper: " + EC.message(); 1928 C.diagnose(DiagnosticInfoSampleProfile(RemapFilename, Msg)); 1929 return EC; 1930 } 1931 Reader->Remapper = std::move(ReaderOrErr.get()); 1932 } 1933 1934 if (std::error_code EC = Reader->readHeader()) { 1935 return EC; 1936 } 1937 1938 Reader->setDiscriminatorMaskedBitFrom(P); 1939 1940 return std::move(Reader); 1941 } 1942 1943 // For text and GCC file formats, we compute the summary after reading the 1944 // profile. Binary format has the profile summary in its header. 1945 void SampleProfileReader::computeSummary() { 1946 SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs); 1947 Summary = Builder.computeSummaryForProfiles(Profiles); 1948 } 1949