1 //===- SampleProfReader.cpp - Read LLVM sample profile data ---------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the class that reads LLVM sample profiles. It 10 // supports three file formats: text, binary and gcov. 11 // 12 // The textual representation is useful for debugging and testing purposes. The 13 // binary representation is more compact, resulting in smaller file sizes. 14 // 15 // The gcov encoding is the one generated by GCC's AutoFDO profile creation 16 // tool (https://github.com/google/autofdo) 17 // 18 // All three encodings can be used interchangeably as an input sample profile. 19 // 20 //===----------------------------------------------------------------------===// 21 22 #include "llvm/ProfileData/SampleProfReader.h" 23 #include "llvm/ADT/DenseMap.h" 24 #include "llvm/ADT/STLExtras.h" 25 #include "llvm/ADT/StringRef.h" 26 #include "llvm/IR/Module.h" 27 #include "llvm/IR/ProfileSummary.h" 28 #include "llvm/ProfileData/ProfileCommon.h" 29 #include "llvm/ProfileData/SampleProf.h" 30 #include "llvm/Support/CommandLine.h" 31 #include "llvm/Support/Compression.h" 32 #include "llvm/Support/ErrorOr.h" 33 #include "llvm/Support/JSON.h" 34 #include "llvm/Support/LEB128.h" 35 #include "llvm/Support/LineIterator.h" 36 #include "llvm/Support/MD5.h" 37 #include "llvm/Support/MemoryBuffer.h" 38 #include "llvm/Support/VirtualFileSystem.h" 39 #include "llvm/Support/raw_ostream.h" 40 #include <algorithm> 41 #include <cstddef> 42 #include <cstdint> 43 #include <limits> 44 #include <memory> 45 #include <system_error> 46 #include <vector> 47 48 using namespace llvm; 49 using namespace sampleprof; 50 51 #define DEBUG_TYPE "samplepgo-reader" 52 53 // This internal option specifies if the profile uses FS discriminators. 54 // It only applies to text, and binary format profiles. 55 // For ext-binary format profiles, the flag is set in the summary. 56 static cl::opt<bool> ProfileIsFSDisciminator( 57 "profile-isfs", cl::Hidden, cl::init(false), 58 cl::desc("Profile uses flow sensitive discriminators")); 59 60 /// Dump the function profile for \p FName. 61 /// 62 /// \param FContext Name + context of the function to print. 63 /// \param OS Stream to emit the output to. 64 void SampleProfileReader::dumpFunctionProfile(const FunctionSamples &FS, 65 raw_ostream &OS) { 66 OS << "Function: " << FS.getContext().toString() << ": " << FS; 67 } 68 69 /// Dump all the function profiles found on stream \p OS. 70 void SampleProfileReader::dump(raw_ostream &OS) { 71 std::vector<NameFunctionSamples> V; 72 sortFuncProfiles(Profiles, V); 73 for (const auto &I : V) 74 dumpFunctionProfile(*I.second, OS); 75 } 76 77 static void dumpFunctionProfileJson(const FunctionSamples &S, 78 json::OStream &JOS, bool TopLevel = false) { 79 auto DumpBody = [&](const BodySampleMap &BodySamples) { 80 for (const auto &I : BodySamples) { 81 const LineLocation &Loc = I.first; 82 const SampleRecord &Sample = I.second; 83 JOS.object([&] { 84 JOS.attribute("line", Loc.LineOffset); 85 if (Loc.Discriminator) 86 JOS.attribute("discriminator", Loc.Discriminator); 87 JOS.attribute("samples", Sample.getSamples()); 88 89 auto CallTargets = Sample.getSortedCallTargets(); 90 if (!CallTargets.empty()) { 91 JOS.attributeArray("calls", [&] { 92 for (const auto &J : CallTargets) { 93 JOS.object([&] { 94 JOS.attribute("function", J.first.str()); 95 JOS.attribute("samples", J.second); 96 }); 97 } 98 }); 99 } 100 }); 101 } 102 }; 103 104 auto DumpCallsiteSamples = [&](const CallsiteSampleMap &CallsiteSamples) { 105 for (const auto &I : CallsiteSamples) 106 for (const auto &FS : I.second) { 107 const LineLocation &Loc = I.first; 108 const FunctionSamples &CalleeSamples = FS.second; 109 JOS.object([&] { 110 JOS.attribute("line", Loc.LineOffset); 111 if (Loc.Discriminator) 112 JOS.attribute("discriminator", Loc.Discriminator); 113 JOS.attributeArray( 114 "samples", [&] { dumpFunctionProfileJson(CalleeSamples, JOS); }); 115 }); 116 } 117 }; 118 119 JOS.object([&] { 120 JOS.attribute("name", S.getFunction().str()); 121 JOS.attribute("total", S.getTotalSamples()); 122 if (TopLevel) 123 JOS.attribute("head", S.getHeadSamples()); 124 125 const auto &BodySamples = S.getBodySamples(); 126 if (!BodySamples.empty()) 127 JOS.attributeArray("body", [&] { DumpBody(BodySamples); }); 128 129 const auto &CallsiteSamples = S.getCallsiteSamples(); 130 if (!CallsiteSamples.empty()) 131 JOS.attributeArray("callsites", 132 [&] { DumpCallsiteSamples(CallsiteSamples); }); 133 }); 134 } 135 136 /// Dump all the function profiles found on stream \p OS in the JSON format. 137 void SampleProfileReader::dumpJson(raw_ostream &OS) { 138 std::vector<NameFunctionSamples> V; 139 sortFuncProfiles(Profiles, V); 140 json::OStream JOS(OS, 2); 141 JOS.arrayBegin(); 142 for (const auto &F : V) 143 dumpFunctionProfileJson(*F.second, JOS, true); 144 JOS.arrayEnd(); 145 146 // Emit a newline character at the end as json::OStream doesn't emit one. 147 OS << "\n"; 148 } 149 150 /// Parse \p Input as function head. 151 /// 152 /// Parse one line of \p Input, and update function name in \p FName, 153 /// function's total sample count in \p NumSamples, function's entry 154 /// count in \p NumHeadSamples. 155 /// 156 /// \returns true if parsing is successful. 157 static bool ParseHead(const StringRef &Input, StringRef &FName, 158 uint64_t &NumSamples, uint64_t &NumHeadSamples) { 159 if (Input[0] == ' ') 160 return false; 161 size_t n2 = Input.rfind(':'); 162 size_t n1 = Input.rfind(':', n2 - 1); 163 FName = Input.substr(0, n1); 164 if (Input.substr(n1 + 1, n2 - n1 - 1).getAsInteger(10, NumSamples)) 165 return false; 166 if (Input.substr(n2 + 1).getAsInteger(10, NumHeadSamples)) 167 return false; 168 return true; 169 } 170 171 /// Returns true if line offset \p L is legal (only has 16 bits). 172 static bool isOffsetLegal(unsigned L) { return (L & 0xffff) == L; } 173 174 /// Parse \p Input that contains metadata. 175 /// Possible metadata: 176 /// - CFG Checksum information: 177 /// !CFGChecksum: 12345 178 /// - CFG Checksum information: 179 /// !Attributes: 1 180 /// Stores the FunctionHash (a.k.a. CFG Checksum) into \p FunctionHash. 181 static bool parseMetadata(const StringRef &Input, uint64_t &FunctionHash, 182 uint32_t &Attributes) { 183 if (Input.starts_with("!CFGChecksum:")) { 184 StringRef CFGInfo = Input.substr(strlen("!CFGChecksum:")).trim(); 185 return !CFGInfo.getAsInteger(10, FunctionHash); 186 } 187 188 if (Input.starts_with("!Attributes:")) { 189 StringRef Attrib = Input.substr(strlen("!Attributes:")).trim(); 190 return !Attrib.getAsInteger(10, Attributes); 191 } 192 193 return false; 194 } 195 196 enum class LineType { 197 CallSiteProfile, 198 BodyProfile, 199 Metadata, 200 }; 201 202 /// Parse \p Input as line sample. 203 /// 204 /// \param Input input line. 205 /// \param LineTy Type of this line. 206 /// \param Depth the depth of the inline stack. 207 /// \param NumSamples total samples of the line/inlined callsite. 208 /// \param LineOffset line offset to the start of the function. 209 /// \param Discriminator discriminator of the line. 210 /// \param TargetCountMap map from indirect call target to count. 211 /// \param FunctionHash the function's CFG hash, used by pseudo probe. 212 /// 213 /// returns true if parsing is successful. 214 static bool ParseLine(const StringRef &Input, LineType &LineTy, uint32_t &Depth, 215 uint64_t &NumSamples, uint32_t &LineOffset, 216 uint32_t &Discriminator, StringRef &CalleeName, 217 DenseMap<StringRef, uint64_t> &TargetCountMap, 218 uint64_t &FunctionHash, uint32_t &Attributes, 219 bool &IsFlat) { 220 for (Depth = 0; Input[Depth] == ' '; Depth++) 221 ; 222 if (Depth == 0) 223 return false; 224 225 if (Input[Depth] == '!') { 226 LineTy = LineType::Metadata; 227 // This metadata is only for manual inspection only. We already created a 228 // FunctionSamples and put it in the profile map, so there is no point 229 // to skip profiles even they have no use for ThinLTO. 230 if (Input == StringRef(" !Flat")) { 231 IsFlat = true; 232 return true; 233 } 234 return parseMetadata(Input.substr(Depth), FunctionHash, Attributes); 235 } 236 237 size_t n1 = Input.find(':'); 238 StringRef Loc = Input.substr(Depth, n1 - Depth); 239 size_t n2 = Loc.find('.'); 240 if (n2 == StringRef::npos) { 241 if (Loc.getAsInteger(10, LineOffset) || !isOffsetLegal(LineOffset)) 242 return false; 243 Discriminator = 0; 244 } else { 245 if (Loc.substr(0, n2).getAsInteger(10, LineOffset)) 246 return false; 247 if (Loc.substr(n2 + 1).getAsInteger(10, Discriminator)) 248 return false; 249 } 250 251 StringRef Rest = Input.substr(n1 + 2); 252 if (isDigit(Rest[0])) { 253 LineTy = LineType::BodyProfile; 254 size_t n3 = Rest.find(' '); 255 if (n3 == StringRef::npos) { 256 if (Rest.getAsInteger(10, NumSamples)) 257 return false; 258 } else { 259 if (Rest.substr(0, n3).getAsInteger(10, NumSamples)) 260 return false; 261 } 262 // Find call targets and their sample counts. 263 // Note: In some cases, there are symbols in the profile which are not 264 // mangled. To accommodate such cases, use colon + integer pairs as the 265 // anchor points. 266 // An example: 267 // _M_construct<char *>:1000 string_view<std::allocator<char> >:437 268 // ":1000" and ":437" are used as anchor points so the string above will 269 // be interpreted as 270 // target: _M_construct<char *> 271 // count: 1000 272 // target: string_view<std::allocator<char> > 273 // count: 437 274 while (n3 != StringRef::npos) { 275 n3 += Rest.substr(n3).find_first_not_of(' '); 276 Rest = Rest.substr(n3); 277 n3 = Rest.find_first_of(':'); 278 if (n3 == StringRef::npos || n3 == 0) 279 return false; 280 281 StringRef Target; 282 uint64_t count, n4; 283 while (true) { 284 // Get the segment after the current colon. 285 StringRef AfterColon = Rest.substr(n3 + 1); 286 // Get the target symbol before the current colon. 287 Target = Rest.substr(0, n3); 288 // Check if the word after the current colon is an integer. 289 n4 = AfterColon.find_first_of(' '); 290 n4 = (n4 != StringRef::npos) ? n3 + n4 + 1 : Rest.size(); 291 StringRef WordAfterColon = Rest.substr(n3 + 1, n4 - n3 - 1); 292 if (!WordAfterColon.getAsInteger(10, count)) 293 break; 294 295 // Try to find the next colon. 296 uint64_t n5 = AfterColon.find_first_of(':'); 297 if (n5 == StringRef::npos) 298 return false; 299 n3 += n5 + 1; 300 } 301 302 // An anchor point is found. Save the {target, count} pair 303 TargetCountMap[Target] = count; 304 if (n4 == Rest.size()) 305 break; 306 // Change n3 to the next blank space after colon + integer pair. 307 n3 = n4; 308 } 309 } else { 310 LineTy = LineType::CallSiteProfile; 311 size_t n3 = Rest.find_last_of(':'); 312 CalleeName = Rest.substr(0, n3); 313 if (Rest.substr(n3 + 1).getAsInteger(10, NumSamples)) 314 return false; 315 } 316 return true; 317 } 318 319 /// Load samples from a text file. 320 /// 321 /// See the documentation at the top of the file for an explanation of 322 /// the expected format. 323 /// 324 /// \returns true if the file was loaded successfully, false otherwise. 325 std::error_code SampleProfileReaderText::readImpl() { 326 line_iterator LineIt(*Buffer, /*SkipBlanks=*/true, '#'); 327 sampleprof_error Result = sampleprof_error::success; 328 329 InlineCallStack InlineStack; 330 uint32_t TopLevelProbeProfileCount = 0; 331 332 // DepthMetadata tracks whether we have processed metadata for the current 333 // top-level or nested function profile. 334 uint32_t DepthMetadata = 0; 335 336 std::vector<SampleContext *> FlatSamples; 337 338 ProfileIsFS = ProfileIsFSDisciminator; 339 FunctionSamples::ProfileIsFS = ProfileIsFS; 340 for (; !LineIt.is_at_eof(); ++LineIt) { 341 size_t pos = LineIt->find_first_not_of(' '); 342 if (pos == LineIt->npos || (*LineIt)[pos] == '#') 343 continue; 344 // Read the header of each function. 345 // 346 // Note that for function identifiers we are actually expecting 347 // mangled names, but we may not always get them. This happens when 348 // the compiler decides not to emit the function (e.g., it was inlined 349 // and removed). In this case, the binary will not have the linkage 350 // name for the function, so the profiler will emit the function's 351 // unmangled name, which may contain characters like ':' and '>' in its 352 // name (member functions, templates, etc). 353 // 354 // The only requirement we place on the identifier, then, is that it 355 // should not begin with a number. 356 if ((*LineIt)[0] != ' ') { 357 uint64_t NumSamples, NumHeadSamples; 358 StringRef FName; 359 if (!ParseHead(*LineIt, FName, NumSamples, NumHeadSamples)) { 360 reportError(LineIt.line_number(), 361 "Expected 'mangled_name:NUM:NUM', found " + *LineIt); 362 return sampleprof_error::malformed; 363 } 364 DepthMetadata = 0; 365 SampleContext FContext(FName, CSNameTable); 366 if (FContext.hasContext()) 367 ++CSProfileCount; 368 FunctionSamples &FProfile = Profiles.create(FContext); 369 mergeSampleProfErrors(Result, FProfile.addTotalSamples(NumSamples)); 370 mergeSampleProfErrors(Result, FProfile.addHeadSamples(NumHeadSamples)); 371 InlineStack.clear(); 372 InlineStack.push_back(&FProfile); 373 } else { 374 uint64_t NumSamples; 375 StringRef FName; 376 DenseMap<StringRef, uint64_t> TargetCountMap; 377 uint32_t Depth, LineOffset, Discriminator; 378 LineType LineTy; 379 uint64_t FunctionHash = 0; 380 uint32_t Attributes = 0; 381 bool IsFlat = false; 382 if (!ParseLine(*LineIt, LineTy, Depth, NumSamples, LineOffset, 383 Discriminator, FName, TargetCountMap, FunctionHash, 384 Attributes, IsFlat)) { 385 reportError(LineIt.line_number(), 386 "Expected 'NUM[.NUM]: NUM[ mangled_name:NUM]*', found " + 387 *LineIt); 388 return sampleprof_error::malformed; 389 } 390 if (LineTy != LineType::Metadata && Depth == DepthMetadata) { 391 // Metadata must be put at the end of a function profile. 392 reportError(LineIt.line_number(), 393 "Found non-metadata after metadata: " + *LineIt); 394 return sampleprof_error::malformed; 395 } 396 397 // Here we handle FS discriminators. 398 Discriminator &= getDiscriminatorMask(); 399 400 while (InlineStack.size() > Depth) { 401 InlineStack.pop_back(); 402 } 403 switch (LineTy) { 404 case LineType::CallSiteProfile: { 405 FunctionSamples &FSamples = InlineStack.back()->functionSamplesAt( 406 LineLocation(LineOffset, Discriminator))[FunctionId(FName)]; 407 FSamples.setFunction(FunctionId(FName)); 408 mergeSampleProfErrors(Result, FSamples.addTotalSamples(NumSamples)); 409 InlineStack.push_back(&FSamples); 410 DepthMetadata = 0; 411 break; 412 } 413 case LineType::BodyProfile: { 414 while (InlineStack.size() > Depth) { 415 InlineStack.pop_back(); 416 } 417 FunctionSamples &FProfile = *InlineStack.back(); 418 for (const auto &name_count : TargetCountMap) { 419 mergeSampleProfErrors(Result, FProfile.addCalledTargetSamples( 420 LineOffset, Discriminator, 421 FunctionId(name_count.first), 422 name_count.second)); 423 } 424 mergeSampleProfErrors( 425 Result, 426 FProfile.addBodySamples(LineOffset, Discriminator, NumSamples)); 427 break; 428 } 429 case LineType::Metadata: { 430 FunctionSamples &FProfile = *InlineStack.back(); 431 if (FunctionHash) { 432 FProfile.setFunctionHash(FunctionHash); 433 if (Depth == 1) 434 ++TopLevelProbeProfileCount; 435 } 436 FProfile.getContext().setAllAttributes(Attributes); 437 if (Attributes & (uint32_t)ContextShouldBeInlined) 438 ProfileIsPreInlined = true; 439 DepthMetadata = Depth; 440 if (IsFlat) { 441 if (Depth == 1) 442 FlatSamples.push_back(&FProfile.getContext()); 443 else 444 Ctx.diagnose(DiagnosticInfoSampleProfile( 445 Buffer->getBufferIdentifier(), LineIt.line_number(), 446 "!Flat may only be used at top level function.", DS_Warning)); 447 } 448 break; 449 } 450 } 451 } 452 } 453 454 // Honor the option to skip flat functions. Since they are already added to 455 // the profile map, remove them all here. 456 if (SkipFlatProf) 457 for (SampleContext *FlatSample : FlatSamples) 458 Profiles.erase(*FlatSample); 459 460 assert((CSProfileCount == 0 || CSProfileCount == Profiles.size()) && 461 "Cannot have both context-sensitive and regular profile"); 462 ProfileIsCS = (CSProfileCount > 0); 463 assert((TopLevelProbeProfileCount == 0 || 464 TopLevelProbeProfileCount == Profiles.size()) && 465 "Cannot have both probe-based profiles and regular profiles"); 466 ProfileIsProbeBased = (TopLevelProbeProfileCount > 0); 467 FunctionSamples::ProfileIsProbeBased = ProfileIsProbeBased; 468 FunctionSamples::ProfileIsCS = ProfileIsCS; 469 FunctionSamples::ProfileIsPreInlined = ProfileIsPreInlined; 470 471 if (Result == sampleprof_error::success) 472 computeSummary(); 473 474 return Result; 475 } 476 477 bool SampleProfileReaderText::hasFormat(const MemoryBuffer &Buffer) { 478 bool result = false; 479 480 // Check that the first non-comment line is a valid function header. 481 line_iterator LineIt(Buffer, /*SkipBlanks=*/true, '#'); 482 if (!LineIt.is_at_eof()) { 483 if ((*LineIt)[0] != ' ') { 484 uint64_t NumSamples, NumHeadSamples; 485 StringRef FName; 486 result = ParseHead(*LineIt, FName, NumSamples, NumHeadSamples); 487 } 488 } 489 490 return result; 491 } 492 493 template <typename T> ErrorOr<T> SampleProfileReaderBinary::readNumber() { 494 unsigned NumBytesRead = 0; 495 uint64_t Val = decodeULEB128(Data, &NumBytesRead); 496 497 if (Val > std::numeric_limits<T>::max()) { 498 std::error_code EC = sampleprof_error::malformed; 499 reportError(0, EC.message()); 500 return EC; 501 } else if (Data + NumBytesRead > End) { 502 std::error_code EC = sampleprof_error::truncated; 503 reportError(0, EC.message()); 504 return EC; 505 } 506 507 Data += NumBytesRead; 508 return static_cast<T>(Val); 509 } 510 511 ErrorOr<StringRef> SampleProfileReaderBinary::readString() { 512 StringRef Str(reinterpret_cast<const char *>(Data)); 513 if (Data + Str.size() + 1 > End) { 514 std::error_code EC = sampleprof_error::truncated; 515 reportError(0, EC.message()); 516 return EC; 517 } 518 519 Data += Str.size() + 1; 520 return Str; 521 } 522 523 template <typename T> 524 ErrorOr<T> SampleProfileReaderBinary::readUnencodedNumber() { 525 if (Data + sizeof(T) > End) { 526 std::error_code EC = sampleprof_error::truncated; 527 reportError(0, EC.message()); 528 return EC; 529 } 530 531 using namespace support; 532 T Val = endian::readNext<T, llvm::endianness::little>(Data); 533 return Val; 534 } 535 536 template <typename T> 537 inline ErrorOr<size_t> SampleProfileReaderBinary::readStringIndex(T &Table) { 538 auto Idx = readNumber<size_t>(); 539 if (std::error_code EC = Idx.getError()) 540 return EC; 541 if (*Idx >= Table.size()) 542 return sampleprof_error::truncated_name_table; 543 return *Idx; 544 } 545 546 ErrorOr<FunctionId> 547 SampleProfileReaderBinary::readStringFromTable(size_t *RetIdx) { 548 auto Idx = readStringIndex(NameTable); 549 if (std::error_code EC = Idx.getError()) 550 return EC; 551 if (RetIdx) 552 *RetIdx = *Idx; 553 return NameTable[*Idx]; 554 } 555 556 ErrorOr<SampleContextFrames> 557 SampleProfileReaderBinary::readContextFromTable(size_t *RetIdx) { 558 auto ContextIdx = readNumber<size_t>(); 559 if (std::error_code EC = ContextIdx.getError()) 560 return EC; 561 if (*ContextIdx >= CSNameTable.size()) 562 return sampleprof_error::truncated_name_table; 563 if (RetIdx) 564 *RetIdx = *ContextIdx; 565 return CSNameTable[*ContextIdx]; 566 } 567 568 ErrorOr<std::pair<SampleContext, uint64_t>> 569 SampleProfileReaderBinary::readSampleContextFromTable() { 570 SampleContext Context; 571 size_t Idx; 572 if (ProfileIsCS) { 573 auto FContext(readContextFromTable(&Idx)); 574 if (std::error_code EC = FContext.getError()) 575 return EC; 576 Context = SampleContext(*FContext); 577 } else { 578 auto FName(readStringFromTable(&Idx)); 579 if (std::error_code EC = FName.getError()) 580 return EC; 581 Context = SampleContext(*FName); 582 } 583 // Since MD5SampleContextStart may point to the profile's file data, need to 584 // make sure it is reading the same value on big endian CPU. 585 uint64_t Hash = support::endian::read64le(MD5SampleContextStart + Idx); 586 // Lazy computing of hash value, write back to the table to cache it. Only 587 // compute the context's hash value if it is being referenced for the first 588 // time. 589 if (Hash == 0) { 590 assert(MD5SampleContextStart == MD5SampleContextTable.data()); 591 Hash = Context.getHashCode(); 592 support::endian::write64le(&MD5SampleContextTable[Idx], Hash); 593 } 594 return std::make_pair(Context, Hash); 595 } 596 597 std::error_code 598 SampleProfileReaderBinary::readProfile(FunctionSamples &FProfile) { 599 auto NumSamples = readNumber<uint64_t>(); 600 if (std::error_code EC = NumSamples.getError()) 601 return EC; 602 FProfile.addTotalSamples(*NumSamples); 603 604 // Read the samples in the body. 605 auto NumRecords = readNumber<uint32_t>(); 606 if (std::error_code EC = NumRecords.getError()) 607 return EC; 608 609 for (uint32_t I = 0; I < *NumRecords; ++I) { 610 auto LineOffset = readNumber<uint64_t>(); 611 if (std::error_code EC = LineOffset.getError()) 612 return EC; 613 614 if (!isOffsetLegal(*LineOffset)) { 615 return std::error_code(); 616 } 617 618 auto Discriminator = readNumber<uint64_t>(); 619 if (std::error_code EC = Discriminator.getError()) 620 return EC; 621 622 auto NumSamples = readNumber<uint64_t>(); 623 if (std::error_code EC = NumSamples.getError()) 624 return EC; 625 626 auto NumCalls = readNumber<uint32_t>(); 627 if (std::error_code EC = NumCalls.getError()) 628 return EC; 629 630 // Here we handle FS discriminators: 631 uint32_t DiscriminatorVal = (*Discriminator) & getDiscriminatorMask(); 632 633 for (uint32_t J = 0; J < *NumCalls; ++J) { 634 auto CalledFunction(readStringFromTable()); 635 if (std::error_code EC = CalledFunction.getError()) 636 return EC; 637 638 auto CalledFunctionSamples = readNumber<uint64_t>(); 639 if (std::error_code EC = CalledFunctionSamples.getError()) 640 return EC; 641 642 FProfile.addCalledTargetSamples(*LineOffset, DiscriminatorVal, 643 *CalledFunction, *CalledFunctionSamples); 644 } 645 646 FProfile.addBodySamples(*LineOffset, DiscriminatorVal, *NumSamples); 647 } 648 649 // Read all the samples for inlined function calls. 650 auto NumCallsites = readNumber<uint32_t>(); 651 if (std::error_code EC = NumCallsites.getError()) 652 return EC; 653 654 for (uint32_t J = 0; J < *NumCallsites; ++J) { 655 auto LineOffset = readNumber<uint64_t>(); 656 if (std::error_code EC = LineOffset.getError()) 657 return EC; 658 659 auto Discriminator = readNumber<uint64_t>(); 660 if (std::error_code EC = Discriminator.getError()) 661 return EC; 662 663 auto FName(readStringFromTable()); 664 if (std::error_code EC = FName.getError()) 665 return EC; 666 667 // Here we handle FS discriminators: 668 uint32_t DiscriminatorVal = (*Discriminator) & getDiscriminatorMask(); 669 670 FunctionSamples &CalleeProfile = FProfile.functionSamplesAt( 671 LineLocation(*LineOffset, DiscriminatorVal))[*FName]; 672 CalleeProfile.setFunction(*FName); 673 if (std::error_code EC = readProfile(CalleeProfile)) 674 return EC; 675 } 676 677 return sampleprof_error::success; 678 } 679 680 std::error_code 681 SampleProfileReaderBinary::readFuncProfile(const uint8_t *Start, 682 SampleProfileMap &Profiles) { 683 Data = Start; 684 auto NumHeadSamples = readNumber<uint64_t>(); 685 if (std::error_code EC = NumHeadSamples.getError()) 686 return EC; 687 688 auto FContextHash(readSampleContextFromTable()); 689 if (std::error_code EC = FContextHash.getError()) 690 return EC; 691 692 auto &[FContext, Hash] = *FContextHash; 693 // Use the cached hash value for insertion instead of recalculating it. 694 auto Res = Profiles.try_emplace(Hash, FContext, FunctionSamples()); 695 FunctionSamples &FProfile = Res.first->second; 696 FProfile.setContext(FContext); 697 FProfile.addHeadSamples(*NumHeadSamples); 698 699 if (FContext.hasContext()) 700 CSProfileCount++; 701 702 if (std::error_code EC = readProfile(FProfile)) 703 return EC; 704 return sampleprof_error::success; 705 } 706 707 std::error_code 708 SampleProfileReaderBinary::readFuncProfile(const uint8_t *Start) { 709 return readFuncProfile(Start, Profiles); 710 } 711 712 std::error_code SampleProfileReaderBinary::readImpl() { 713 ProfileIsFS = ProfileIsFSDisciminator; 714 FunctionSamples::ProfileIsFS = ProfileIsFS; 715 while (Data < End) { 716 if (std::error_code EC = readFuncProfile(Data)) 717 return EC; 718 } 719 720 return sampleprof_error::success; 721 } 722 723 std::error_code SampleProfileReaderExtBinaryBase::readOneSection( 724 const uint8_t *Start, uint64_t Size, const SecHdrTableEntry &Entry) { 725 Data = Start; 726 End = Start + Size; 727 switch (Entry.Type) { 728 case SecProfSummary: 729 if (std::error_code EC = readSummary()) 730 return EC; 731 if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagPartial)) 732 Summary->setPartialProfile(true); 733 if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFullContext)) 734 FunctionSamples::ProfileIsCS = ProfileIsCS = true; 735 if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagIsPreInlined)) 736 FunctionSamples::ProfileIsPreInlined = ProfileIsPreInlined = true; 737 if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFSDiscriminator)) 738 FunctionSamples::ProfileIsFS = ProfileIsFS = true; 739 break; 740 case SecNameTable: { 741 bool FixedLengthMD5 = 742 hasSecFlag(Entry, SecNameTableFlags::SecFlagFixedLengthMD5); 743 bool UseMD5 = hasSecFlag(Entry, SecNameTableFlags::SecFlagMD5Name); 744 // UseMD5 means if THIS section uses MD5, ProfileIsMD5 means if the entire 745 // profile uses MD5 for function name matching in IPO passes. 746 ProfileIsMD5 = ProfileIsMD5 || UseMD5; 747 FunctionSamples::HasUniqSuffix = 748 hasSecFlag(Entry, SecNameTableFlags::SecFlagUniqSuffix); 749 if (std::error_code EC = readNameTableSec(UseMD5, FixedLengthMD5)) 750 return EC; 751 break; 752 } 753 case SecCSNameTable: { 754 if (std::error_code EC = readCSNameTableSec()) 755 return EC; 756 break; 757 } 758 case SecLBRProfile: 759 ProfileSecRange = std::make_pair(Data, End); 760 if (std::error_code EC = readFuncProfiles()) 761 return EC; 762 break; 763 case SecFuncOffsetTable: 764 // If module is absent, we are using LLVM tools, and need to read all 765 // profiles, so skip reading the function offset table. 766 if (!M) { 767 Data = End; 768 } else { 769 assert((!ProfileIsCS || 770 hasSecFlag(Entry, SecFuncOffsetFlags::SecFlagOrdered)) && 771 "func offset table should always be sorted in CS profile"); 772 if (std::error_code EC = readFuncOffsetTable()) 773 return EC; 774 } 775 break; 776 case SecFuncMetadata: { 777 ProfileIsProbeBased = 778 hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagIsProbeBased); 779 FunctionSamples::ProfileIsProbeBased = ProfileIsProbeBased; 780 ProfileHasAttribute = 781 hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagHasAttribute); 782 if (std::error_code EC = readFuncMetadata(ProfileHasAttribute)) 783 return EC; 784 break; 785 } 786 case SecProfileSymbolList: 787 if (std::error_code EC = readProfileSymbolList()) 788 return EC; 789 break; 790 default: 791 if (std::error_code EC = readCustomSection(Entry)) 792 return EC; 793 break; 794 } 795 return sampleprof_error::success; 796 } 797 798 bool SampleProfileReaderExtBinaryBase::useFuncOffsetList() const { 799 // If profile is CS, the function offset section is expected to consist of 800 // sequences of contexts in pre-order layout 801 // (e.g. [A, A:1 @ B, A:1 @ B:2.3 @ C] [D, D:1 @ E]), so that when a matched 802 // context in the module is found, the profiles of all its callees are 803 // recursively loaded. A list is needed since the order of profiles matters. 804 if (ProfileIsCS) 805 return true; 806 807 // If the profile is MD5, use the map container to lookup functions in 808 // the module. A remapper has no use on MD5 names. 809 if (useMD5()) 810 return false; 811 812 // Profile is not MD5 and if a remapper is present, the remapped name of 813 // every function needed to be matched against the module, so use the list 814 // container since each entry is accessed. 815 if (Remapper) 816 return true; 817 818 // Otherwise use the map container for faster lookup. 819 // TODO: If the cardinality of the function offset section is much smaller 820 // than the number of functions in the module, using the list container can 821 // be always faster, but we need to figure out the constant factor to 822 // determine the cutoff. 823 return false; 824 } 825 826 std::error_code 827 SampleProfileReaderExtBinaryBase::read(const DenseSet<StringRef> &FuncsToUse, 828 SampleProfileMap &Profiles) { 829 Data = ProfileSecRange.first; 830 End = ProfileSecRange.second; 831 if (std::error_code EC = readFuncProfiles(FuncsToUse, Profiles)) 832 return EC; 833 End = Data; 834 835 if (std::error_code EC = readFuncMetadata(ProfileHasAttribute, Profiles)) 836 return EC; 837 return sampleprof_error::success; 838 } 839 840 bool SampleProfileReaderExtBinaryBase::collectFuncsFromModule() { 841 if (!M) 842 return false; 843 FuncsToUse.clear(); 844 for (auto &F : *M) 845 FuncsToUse.insert(FunctionSamples::getCanonicalFnName(F)); 846 return true; 847 } 848 849 std::error_code SampleProfileReaderExtBinaryBase::readFuncOffsetTable() { 850 // If there are more than one function offset section, the profile associated 851 // with the previous section has to be done reading before next one is read. 852 FuncOffsetTable.clear(); 853 FuncOffsetList.clear(); 854 855 auto Size = readNumber<uint64_t>(); 856 if (std::error_code EC = Size.getError()) 857 return EC; 858 859 bool UseFuncOffsetList = useFuncOffsetList(); 860 if (UseFuncOffsetList) 861 FuncOffsetList.reserve(*Size); 862 else 863 FuncOffsetTable.reserve(*Size); 864 865 for (uint64_t I = 0; I < *Size; ++I) { 866 auto FContextHash(readSampleContextFromTable()); 867 if (std::error_code EC = FContextHash.getError()) 868 return EC; 869 870 auto &[FContext, Hash] = *FContextHash; 871 auto Offset = readNumber<uint64_t>(); 872 if (std::error_code EC = Offset.getError()) 873 return EC; 874 875 if (UseFuncOffsetList) 876 FuncOffsetList.emplace_back(FContext, *Offset); 877 else 878 // Because Porfiles replace existing value with new value if collision 879 // happens, we also use the latest offset so that they are consistent. 880 FuncOffsetTable[Hash] = *Offset; 881 } 882 883 return sampleprof_error::success; 884 } 885 886 std::error_code SampleProfileReaderExtBinaryBase::readFuncProfiles( 887 const DenseSet<StringRef> &FuncsToUse, SampleProfileMap &Profiles) { 888 const uint8_t *Start = Data; 889 890 if (Remapper) { 891 for (auto Name : FuncsToUse) { 892 Remapper->insert(Name); 893 } 894 } 895 896 if (ProfileIsCS) { 897 assert(useFuncOffsetList()); 898 DenseSet<uint64_t> FuncGuidsToUse; 899 if (useMD5()) { 900 for (auto Name : FuncsToUse) 901 FuncGuidsToUse.insert(Function::getGUID(Name)); 902 } 903 904 // For each function in current module, load all context profiles for 905 // the function as well as their callee contexts which can help profile 906 // guided importing for ThinLTO. This can be achieved by walking 907 // through an ordered context container, where contexts are laid out 908 // as if they were walked in preorder of a context trie. While 909 // traversing the trie, a link to the highest common ancestor node is 910 // kept so that all of its decendants will be loaded. 911 const SampleContext *CommonContext = nullptr; 912 for (const auto &NameOffset : FuncOffsetList) { 913 const auto &FContext = NameOffset.first; 914 FunctionId FName = FContext.getFunction(); 915 StringRef FNameString; 916 if (!useMD5()) 917 FNameString = FName.stringRef(); 918 919 // For function in the current module, keep its farthest ancestor 920 // context. This can be used to load itself and its child and 921 // sibling contexts. 922 if ((useMD5() && FuncGuidsToUse.count(FName.getHashCode())) || 923 (!useMD5() && (FuncsToUse.count(FNameString) || 924 (Remapper && Remapper->exist(FNameString))))) { 925 if (!CommonContext || !CommonContext->isPrefixOf(FContext)) 926 CommonContext = &FContext; 927 } 928 929 if (CommonContext == &FContext || 930 (CommonContext && CommonContext->isPrefixOf(FContext))) { 931 // Load profile for the current context which originated from 932 // the common ancestor. 933 const uint8_t *FuncProfileAddr = Start + NameOffset.second; 934 if (std::error_code EC = readFuncProfile(FuncProfileAddr)) 935 return EC; 936 } 937 } 938 } else if (useMD5()) { 939 assert(!useFuncOffsetList()); 940 for (auto Name : FuncsToUse) { 941 auto GUID = MD5Hash(Name); 942 auto iter = FuncOffsetTable.find(GUID); 943 if (iter == FuncOffsetTable.end()) 944 continue; 945 const uint8_t *FuncProfileAddr = Start + iter->second; 946 if (std::error_code EC = readFuncProfile(FuncProfileAddr, Profiles)) 947 return EC; 948 } 949 } else if (Remapper) { 950 assert(useFuncOffsetList()); 951 for (auto NameOffset : FuncOffsetList) { 952 SampleContext FContext(NameOffset.first); 953 auto FuncName = FContext.getFunction(); 954 StringRef FuncNameStr = FuncName.stringRef(); 955 if (!FuncsToUse.count(FuncNameStr) && !Remapper->exist(FuncNameStr)) 956 continue; 957 const uint8_t *FuncProfileAddr = Start + NameOffset.second; 958 if (std::error_code EC = readFuncProfile(FuncProfileAddr, Profiles)) 959 return EC; 960 } 961 } else { 962 assert(!useFuncOffsetList()); 963 for (auto Name : FuncsToUse) { 964 965 auto iter = FuncOffsetTable.find(MD5Hash(Name)); 966 if (iter == FuncOffsetTable.end()) 967 continue; 968 const uint8_t *FuncProfileAddr = Start + iter->second; 969 if (std::error_code EC = readFuncProfile(FuncProfileAddr, Profiles)) 970 return EC; 971 } 972 } 973 974 return sampleprof_error::success; 975 } 976 977 std::error_code SampleProfileReaderExtBinaryBase::readFuncProfiles() { 978 // Collect functions used by current module if the Reader has been 979 // given a module. 980 // collectFuncsFromModule uses FunctionSamples::getCanonicalFnName 981 // which will query FunctionSamples::HasUniqSuffix, so it has to be 982 // called after FunctionSamples::HasUniqSuffix is set, i.e. after 983 // NameTable section is read. 984 bool LoadFuncsToBeUsed = collectFuncsFromModule(); 985 986 // When LoadFuncsToBeUsed is false, we are using LLVM tool, need to read all 987 // profiles. 988 if (!LoadFuncsToBeUsed) { 989 while (Data < End) { 990 if (std::error_code EC = readFuncProfile(Data)) 991 return EC; 992 } 993 assert(Data == End && "More data is read than expected"); 994 } else { 995 // Load function profiles on demand. 996 if (std::error_code EC = readFuncProfiles(FuncsToUse, Profiles)) 997 return EC; 998 Data = End; 999 } 1000 assert((CSProfileCount == 0 || CSProfileCount == Profiles.size()) && 1001 "Cannot have both context-sensitive and regular profile"); 1002 assert((!CSProfileCount || ProfileIsCS) && 1003 "Section flag should be consistent with actual profile"); 1004 return sampleprof_error::success; 1005 } 1006 1007 std::error_code SampleProfileReaderExtBinaryBase::readProfileSymbolList() { 1008 if (!ProfSymList) 1009 ProfSymList = std::make_unique<ProfileSymbolList>(); 1010 1011 if (std::error_code EC = ProfSymList->read(Data, End - Data)) 1012 return EC; 1013 1014 Data = End; 1015 return sampleprof_error::success; 1016 } 1017 1018 std::error_code SampleProfileReaderExtBinaryBase::decompressSection( 1019 const uint8_t *SecStart, const uint64_t SecSize, 1020 const uint8_t *&DecompressBuf, uint64_t &DecompressBufSize) { 1021 Data = SecStart; 1022 End = SecStart + SecSize; 1023 auto DecompressSize = readNumber<uint64_t>(); 1024 if (std::error_code EC = DecompressSize.getError()) 1025 return EC; 1026 DecompressBufSize = *DecompressSize; 1027 1028 auto CompressSize = readNumber<uint64_t>(); 1029 if (std::error_code EC = CompressSize.getError()) 1030 return EC; 1031 1032 if (!llvm::compression::zlib::isAvailable()) 1033 return sampleprof_error::zlib_unavailable; 1034 1035 uint8_t *Buffer = Allocator.Allocate<uint8_t>(DecompressBufSize); 1036 size_t UCSize = DecompressBufSize; 1037 llvm::Error E = compression::zlib::decompress(ArrayRef(Data, *CompressSize), 1038 Buffer, UCSize); 1039 if (E) 1040 return sampleprof_error::uncompress_failed; 1041 DecompressBuf = reinterpret_cast<const uint8_t *>(Buffer); 1042 return sampleprof_error::success; 1043 } 1044 1045 std::error_code SampleProfileReaderExtBinaryBase::readImpl() { 1046 const uint8_t *BufStart = 1047 reinterpret_cast<const uint8_t *>(Buffer->getBufferStart()); 1048 1049 for (auto &Entry : SecHdrTable) { 1050 // Skip empty section. 1051 if (!Entry.Size) 1052 continue; 1053 1054 // Skip sections without inlined functions when SkipFlatProf is true. 1055 if (SkipFlatProf && hasSecFlag(Entry, SecCommonFlags::SecFlagFlat)) 1056 continue; 1057 1058 const uint8_t *SecStart = BufStart + Entry.Offset; 1059 uint64_t SecSize = Entry.Size; 1060 1061 // If the section is compressed, decompress it into a buffer 1062 // DecompressBuf before reading the actual data. The pointee of 1063 // 'Data' will be changed to buffer hold by DecompressBuf 1064 // temporarily when reading the actual data. 1065 bool isCompressed = hasSecFlag(Entry, SecCommonFlags::SecFlagCompress); 1066 if (isCompressed) { 1067 const uint8_t *DecompressBuf; 1068 uint64_t DecompressBufSize; 1069 if (std::error_code EC = decompressSection( 1070 SecStart, SecSize, DecompressBuf, DecompressBufSize)) 1071 return EC; 1072 SecStart = DecompressBuf; 1073 SecSize = DecompressBufSize; 1074 } 1075 1076 if (std::error_code EC = readOneSection(SecStart, SecSize, Entry)) 1077 return EC; 1078 if (Data != SecStart + SecSize) 1079 return sampleprof_error::malformed; 1080 1081 // Change the pointee of 'Data' from DecompressBuf to original Buffer. 1082 if (isCompressed) { 1083 Data = BufStart + Entry.Offset; 1084 End = BufStart + Buffer->getBufferSize(); 1085 } 1086 } 1087 1088 return sampleprof_error::success; 1089 } 1090 1091 std::error_code SampleProfileReaderRawBinary::verifySPMagic(uint64_t Magic) { 1092 if (Magic == SPMagic()) 1093 return sampleprof_error::success; 1094 return sampleprof_error::bad_magic; 1095 } 1096 1097 std::error_code SampleProfileReaderExtBinary::verifySPMagic(uint64_t Magic) { 1098 if (Magic == SPMagic(SPF_Ext_Binary)) 1099 return sampleprof_error::success; 1100 return sampleprof_error::bad_magic; 1101 } 1102 1103 std::error_code SampleProfileReaderBinary::readNameTable() { 1104 auto Size = readNumber<size_t>(); 1105 if (std::error_code EC = Size.getError()) 1106 return EC; 1107 1108 // Normally if useMD5 is true, the name table should have MD5 values, not 1109 // strings, however in the case that ExtBinary profile has multiple name 1110 // tables mixing string and MD5, all of them have to be normalized to use MD5, 1111 // because optimization passes can only handle either type. 1112 bool UseMD5 = useMD5(); 1113 1114 NameTable.clear(); 1115 NameTable.reserve(*Size); 1116 if (!ProfileIsCS) { 1117 MD5SampleContextTable.clear(); 1118 if (UseMD5) 1119 MD5SampleContextTable.reserve(*Size); 1120 else 1121 // If we are using strings, delay MD5 computation since only a portion of 1122 // names are used by top level functions. Use 0 to indicate MD5 value is 1123 // to be calculated as no known string has a MD5 value of 0. 1124 MD5SampleContextTable.resize(*Size); 1125 } 1126 for (size_t I = 0; I < *Size; ++I) { 1127 auto Name(readString()); 1128 if (std::error_code EC = Name.getError()) 1129 return EC; 1130 if (UseMD5) { 1131 FunctionId FID(*Name); 1132 if (!ProfileIsCS) 1133 MD5SampleContextTable.emplace_back(FID.getHashCode()); 1134 NameTable.emplace_back(FID); 1135 } else 1136 NameTable.push_back(FunctionId(*Name)); 1137 } 1138 if (!ProfileIsCS) 1139 MD5SampleContextStart = MD5SampleContextTable.data(); 1140 return sampleprof_error::success; 1141 } 1142 1143 std::error_code 1144 SampleProfileReaderExtBinaryBase::readNameTableSec(bool IsMD5, 1145 bool FixedLengthMD5) { 1146 if (FixedLengthMD5) { 1147 if (!IsMD5) 1148 errs() << "If FixedLengthMD5 is true, UseMD5 has to be true"; 1149 auto Size = readNumber<size_t>(); 1150 if (std::error_code EC = Size.getError()) 1151 return EC; 1152 1153 assert(Data + (*Size) * sizeof(uint64_t) == End && 1154 "Fixed length MD5 name table does not contain specified number of " 1155 "entries"); 1156 if (Data + (*Size) * sizeof(uint64_t) > End) 1157 return sampleprof_error::truncated; 1158 1159 NameTable.clear(); 1160 NameTable.reserve(*Size); 1161 for (size_t I = 0; I < *Size; ++I) { 1162 using namespace support; 1163 uint64_t FID = endian::read<uint64_t, endianness::little, unaligned>( 1164 Data + I * sizeof(uint64_t)); 1165 NameTable.emplace_back(FunctionId(FID)); 1166 } 1167 if (!ProfileIsCS) 1168 MD5SampleContextStart = reinterpret_cast<const uint64_t *>(Data); 1169 Data = Data + (*Size) * sizeof(uint64_t); 1170 return sampleprof_error::success; 1171 } 1172 1173 if (IsMD5) { 1174 assert(!FixedLengthMD5 && "FixedLengthMD5 should be unreachable here"); 1175 auto Size = readNumber<size_t>(); 1176 if (std::error_code EC = Size.getError()) 1177 return EC; 1178 1179 NameTable.clear(); 1180 NameTable.reserve(*Size); 1181 if (!ProfileIsCS) 1182 MD5SampleContextTable.resize(*Size); 1183 for (size_t I = 0; I < *Size; ++I) { 1184 auto FID = readNumber<uint64_t>(); 1185 if (std::error_code EC = FID.getError()) 1186 return EC; 1187 if (!ProfileIsCS) 1188 support::endian::write64le(&MD5SampleContextTable[I], *FID); 1189 NameTable.emplace_back(FunctionId(*FID)); 1190 } 1191 if (!ProfileIsCS) 1192 MD5SampleContextStart = MD5SampleContextTable.data(); 1193 return sampleprof_error::success; 1194 } 1195 1196 return SampleProfileReaderBinary::readNameTable(); 1197 } 1198 1199 // Read in the CS name table section, which basically contains a list of context 1200 // vectors. Each element of a context vector, aka a frame, refers to the 1201 // underlying raw function names that are stored in the name table, as well as 1202 // a callsite identifier that only makes sense for non-leaf frames. 1203 std::error_code SampleProfileReaderExtBinaryBase::readCSNameTableSec() { 1204 auto Size = readNumber<size_t>(); 1205 if (std::error_code EC = Size.getError()) 1206 return EC; 1207 1208 CSNameTable.clear(); 1209 CSNameTable.reserve(*Size); 1210 if (ProfileIsCS) { 1211 // Delay MD5 computation of CS context until they are needed. Use 0 to 1212 // indicate MD5 value is to be calculated as no known string has a MD5 1213 // value of 0. 1214 MD5SampleContextTable.clear(); 1215 MD5SampleContextTable.resize(*Size); 1216 MD5SampleContextStart = MD5SampleContextTable.data(); 1217 } 1218 for (size_t I = 0; I < *Size; ++I) { 1219 CSNameTable.emplace_back(SampleContextFrameVector()); 1220 auto ContextSize = readNumber<uint32_t>(); 1221 if (std::error_code EC = ContextSize.getError()) 1222 return EC; 1223 for (uint32_t J = 0; J < *ContextSize; ++J) { 1224 auto FName(readStringFromTable()); 1225 if (std::error_code EC = FName.getError()) 1226 return EC; 1227 auto LineOffset = readNumber<uint64_t>(); 1228 if (std::error_code EC = LineOffset.getError()) 1229 return EC; 1230 1231 if (!isOffsetLegal(*LineOffset)) 1232 return std::error_code(); 1233 1234 auto Discriminator = readNumber<uint64_t>(); 1235 if (std::error_code EC = Discriminator.getError()) 1236 return EC; 1237 1238 CSNameTable.back().emplace_back( 1239 FName.get(), LineLocation(LineOffset.get(), Discriminator.get())); 1240 } 1241 } 1242 1243 return sampleprof_error::success; 1244 } 1245 1246 std::error_code 1247 SampleProfileReaderExtBinaryBase::readFuncMetadata(bool ProfileHasAttribute, 1248 FunctionSamples *FProfile) { 1249 if (Data < End) { 1250 if (ProfileIsProbeBased) { 1251 auto Checksum = readNumber<uint64_t>(); 1252 if (std::error_code EC = Checksum.getError()) 1253 return EC; 1254 if (FProfile) 1255 FProfile->setFunctionHash(*Checksum); 1256 } 1257 1258 if (ProfileHasAttribute) { 1259 auto Attributes = readNumber<uint32_t>(); 1260 if (std::error_code EC = Attributes.getError()) 1261 return EC; 1262 if (FProfile) 1263 FProfile->getContext().setAllAttributes(*Attributes); 1264 } 1265 1266 if (!ProfileIsCS) { 1267 // Read all the attributes for inlined function calls. 1268 auto NumCallsites = readNumber<uint32_t>(); 1269 if (std::error_code EC = NumCallsites.getError()) 1270 return EC; 1271 1272 for (uint32_t J = 0; J < *NumCallsites; ++J) { 1273 auto LineOffset = readNumber<uint64_t>(); 1274 if (std::error_code EC = LineOffset.getError()) 1275 return EC; 1276 1277 auto Discriminator = readNumber<uint64_t>(); 1278 if (std::error_code EC = Discriminator.getError()) 1279 return EC; 1280 1281 auto FContextHash(readSampleContextFromTable()); 1282 if (std::error_code EC = FContextHash.getError()) 1283 return EC; 1284 1285 auto &[FContext, Hash] = *FContextHash; 1286 FunctionSamples *CalleeProfile = nullptr; 1287 if (FProfile) { 1288 CalleeProfile = const_cast<FunctionSamples *>( 1289 &FProfile->functionSamplesAt(LineLocation( 1290 *LineOffset, 1291 *Discriminator))[FContext.getFunction()]); 1292 } 1293 if (std::error_code EC = 1294 readFuncMetadata(ProfileHasAttribute, CalleeProfile)) 1295 return EC; 1296 } 1297 } 1298 } 1299 1300 return sampleprof_error::success; 1301 } 1302 1303 std::error_code 1304 SampleProfileReaderExtBinaryBase::readFuncMetadata(bool ProfileHasAttribute, 1305 SampleProfileMap &Profiles) { 1306 if (FuncMetadataIndex.empty()) 1307 return sampleprof_error::success; 1308 1309 for (auto &I : Profiles) { 1310 FunctionSamples *FProfile = &I.second; 1311 auto R = FuncMetadataIndex.find(FProfile->getContext().getHashCode()); 1312 if (R == FuncMetadataIndex.end()) 1313 continue; 1314 1315 Data = R->second.first; 1316 End = R->second.second; 1317 if (std::error_code EC = readFuncMetadata(ProfileHasAttribute, FProfile)) 1318 return EC; 1319 assert(Data == End && "More data is read than expected"); 1320 } 1321 return sampleprof_error::success; 1322 } 1323 1324 std::error_code 1325 SampleProfileReaderExtBinaryBase::readFuncMetadata(bool ProfileHasAttribute) { 1326 while (Data < End) { 1327 auto FContextHash(readSampleContextFromTable()); 1328 if (std::error_code EC = FContextHash.getError()) 1329 return EC; 1330 auto &[FContext, Hash] = *FContextHash; 1331 FunctionSamples *FProfile = nullptr; 1332 auto It = Profiles.find(FContext); 1333 if (It != Profiles.end()) 1334 FProfile = &It->second; 1335 1336 const uint8_t *Start = Data; 1337 if (std::error_code EC = readFuncMetadata(ProfileHasAttribute, FProfile)) 1338 return EC; 1339 1340 FuncMetadataIndex[FContext.getHashCode()] = {Start, Data}; 1341 } 1342 1343 assert(Data == End && "More data is read than expected"); 1344 return sampleprof_error::success; 1345 } 1346 1347 std::error_code 1348 SampleProfileReaderExtBinaryBase::readSecHdrTableEntry(uint64_t Idx) { 1349 SecHdrTableEntry Entry; 1350 auto Type = readUnencodedNumber<uint64_t>(); 1351 if (std::error_code EC = Type.getError()) 1352 return EC; 1353 Entry.Type = static_cast<SecType>(*Type); 1354 1355 auto Flags = readUnencodedNumber<uint64_t>(); 1356 if (std::error_code EC = Flags.getError()) 1357 return EC; 1358 Entry.Flags = *Flags; 1359 1360 auto Offset = readUnencodedNumber<uint64_t>(); 1361 if (std::error_code EC = Offset.getError()) 1362 return EC; 1363 Entry.Offset = *Offset; 1364 1365 auto Size = readUnencodedNumber<uint64_t>(); 1366 if (std::error_code EC = Size.getError()) 1367 return EC; 1368 Entry.Size = *Size; 1369 1370 Entry.LayoutIndex = Idx; 1371 SecHdrTable.push_back(std::move(Entry)); 1372 return sampleprof_error::success; 1373 } 1374 1375 std::error_code SampleProfileReaderExtBinaryBase::readSecHdrTable() { 1376 auto EntryNum = readUnencodedNumber<uint64_t>(); 1377 if (std::error_code EC = EntryNum.getError()) 1378 return EC; 1379 1380 for (uint64_t i = 0; i < (*EntryNum); i++) 1381 if (std::error_code EC = readSecHdrTableEntry(i)) 1382 return EC; 1383 1384 return sampleprof_error::success; 1385 } 1386 1387 std::error_code SampleProfileReaderExtBinaryBase::readHeader() { 1388 const uint8_t *BufStart = 1389 reinterpret_cast<const uint8_t *>(Buffer->getBufferStart()); 1390 Data = BufStart; 1391 End = BufStart + Buffer->getBufferSize(); 1392 1393 if (std::error_code EC = readMagicIdent()) 1394 return EC; 1395 1396 if (std::error_code EC = readSecHdrTable()) 1397 return EC; 1398 1399 return sampleprof_error::success; 1400 } 1401 1402 uint64_t SampleProfileReaderExtBinaryBase::getSectionSize(SecType Type) { 1403 uint64_t Size = 0; 1404 for (auto &Entry : SecHdrTable) { 1405 if (Entry.Type == Type) 1406 Size += Entry.Size; 1407 } 1408 return Size; 1409 } 1410 1411 uint64_t SampleProfileReaderExtBinaryBase::getFileSize() { 1412 // Sections in SecHdrTable is not necessarily in the same order as 1413 // sections in the profile because section like FuncOffsetTable needs 1414 // to be written after section LBRProfile but needs to be read before 1415 // section LBRProfile, so we cannot simply use the last entry in 1416 // SecHdrTable to calculate the file size. 1417 uint64_t FileSize = 0; 1418 for (auto &Entry : SecHdrTable) { 1419 FileSize = std::max(Entry.Offset + Entry.Size, FileSize); 1420 } 1421 return FileSize; 1422 } 1423 1424 static std::string getSecFlagsStr(const SecHdrTableEntry &Entry) { 1425 std::string Flags; 1426 if (hasSecFlag(Entry, SecCommonFlags::SecFlagCompress)) 1427 Flags.append("{compressed,"); 1428 else 1429 Flags.append("{"); 1430 1431 if (hasSecFlag(Entry, SecCommonFlags::SecFlagFlat)) 1432 Flags.append("flat,"); 1433 1434 switch (Entry.Type) { 1435 case SecNameTable: 1436 if (hasSecFlag(Entry, SecNameTableFlags::SecFlagFixedLengthMD5)) 1437 Flags.append("fixlenmd5,"); 1438 else if (hasSecFlag(Entry, SecNameTableFlags::SecFlagMD5Name)) 1439 Flags.append("md5,"); 1440 if (hasSecFlag(Entry, SecNameTableFlags::SecFlagUniqSuffix)) 1441 Flags.append("uniq,"); 1442 break; 1443 case SecProfSummary: 1444 if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagPartial)) 1445 Flags.append("partial,"); 1446 if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFullContext)) 1447 Flags.append("context,"); 1448 if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagIsPreInlined)) 1449 Flags.append("preInlined,"); 1450 if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFSDiscriminator)) 1451 Flags.append("fs-discriminator,"); 1452 break; 1453 case SecFuncOffsetTable: 1454 if (hasSecFlag(Entry, SecFuncOffsetFlags::SecFlagOrdered)) 1455 Flags.append("ordered,"); 1456 break; 1457 case SecFuncMetadata: 1458 if (hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagIsProbeBased)) 1459 Flags.append("probe,"); 1460 if (hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagHasAttribute)) 1461 Flags.append("attr,"); 1462 break; 1463 default: 1464 break; 1465 } 1466 char &last = Flags.back(); 1467 if (last == ',') 1468 last = '}'; 1469 else 1470 Flags.append("}"); 1471 return Flags; 1472 } 1473 1474 bool SampleProfileReaderExtBinaryBase::dumpSectionInfo(raw_ostream &OS) { 1475 uint64_t TotalSecsSize = 0; 1476 for (auto &Entry : SecHdrTable) { 1477 OS << getSecName(Entry.Type) << " - Offset: " << Entry.Offset 1478 << ", Size: " << Entry.Size << ", Flags: " << getSecFlagsStr(Entry) 1479 << "\n"; 1480 ; 1481 TotalSecsSize += Entry.Size; 1482 } 1483 uint64_t HeaderSize = SecHdrTable.front().Offset; 1484 assert(HeaderSize + TotalSecsSize == getFileSize() && 1485 "Size of 'header + sections' doesn't match the total size of profile"); 1486 1487 OS << "Header Size: " << HeaderSize << "\n"; 1488 OS << "Total Sections Size: " << TotalSecsSize << "\n"; 1489 OS << "File Size: " << getFileSize() << "\n"; 1490 return true; 1491 } 1492 1493 std::error_code SampleProfileReaderBinary::readMagicIdent() { 1494 // Read and check the magic identifier. 1495 auto Magic = readNumber<uint64_t>(); 1496 if (std::error_code EC = Magic.getError()) 1497 return EC; 1498 else if (std::error_code EC = verifySPMagic(*Magic)) 1499 return EC; 1500 1501 // Read the version number. 1502 auto Version = readNumber<uint64_t>(); 1503 if (std::error_code EC = Version.getError()) 1504 return EC; 1505 else if (*Version != SPVersion()) 1506 return sampleprof_error::unsupported_version; 1507 1508 return sampleprof_error::success; 1509 } 1510 1511 std::error_code SampleProfileReaderBinary::readHeader() { 1512 Data = reinterpret_cast<const uint8_t *>(Buffer->getBufferStart()); 1513 End = Data + Buffer->getBufferSize(); 1514 1515 if (std::error_code EC = readMagicIdent()) 1516 return EC; 1517 1518 if (std::error_code EC = readSummary()) 1519 return EC; 1520 1521 if (std::error_code EC = readNameTable()) 1522 return EC; 1523 return sampleprof_error::success; 1524 } 1525 1526 std::error_code SampleProfileReaderBinary::readSummaryEntry( 1527 std::vector<ProfileSummaryEntry> &Entries) { 1528 auto Cutoff = readNumber<uint64_t>(); 1529 if (std::error_code EC = Cutoff.getError()) 1530 return EC; 1531 1532 auto MinBlockCount = readNumber<uint64_t>(); 1533 if (std::error_code EC = MinBlockCount.getError()) 1534 return EC; 1535 1536 auto NumBlocks = readNumber<uint64_t>(); 1537 if (std::error_code EC = NumBlocks.getError()) 1538 return EC; 1539 1540 Entries.emplace_back(*Cutoff, *MinBlockCount, *NumBlocks); 1541 return sampleprof_error::success; 1542 } 1543 1544 std::error_code SampleProfileReaderBinary::readSummary() { 1545 auto TotalCount = readNumber<uint64_t>(); 1546 if (std::error_code EC = TotalCount.getError()) 1547 return EC; 1548 1549 auto MaxBlockCount = readNumber<uint64_t>(); 1550 if (std::error_code EC = MaxBlockCount.getError()) 1551 return EC; 1552 1553 auto MaxFunctionCount = readNumber<uint64_t>(); 1554 if (std::error_code EC = MaxFunctionCount.getError()) 1555 return EC; 1556 1557 auto NumBlocks = readNumber<uint64_t>(); 1558 if (std::error_code EC = NumBlocks.getError()) 1559 return EC; 1560 1561 auto NumFunctions = readNumber<uint64_t>(); 1562 if (std::error_code EC = NumFunctions.getError()) 1563 return EC; 1564 1565 auto NumSummaryEntries = readNumber<uint64_t>(); 1566 if (std::error_code EC = NumSummaryEntries.getError()) 1567 return EC; 1568 1569 std::vector<ProfileSummaryEntry> Entries; 1570 for (unsigned i = 0; i < *NumSummaryEntries; i++) { 1571 std::error_code EC = readSummaryEntry(Entries); 1572 if (EC != sampleprof_error::success) 1573 return EC; 1574 } 1575 Summary = std::make_unique<ProfileSummary>( 1576 ProfileSummary::PSK_Sample, Entries, *TotalCount, *MaxBlockCount, 0, 1577 *MaxFunctionCount, *NumBlocks, *NumFunctions); 1578 1579 return sampleprof_error::success; 1580 } 1581 1582 bool SampleProfileReaderRawBinary::hasFormat(const MemoryBuffer &Buffer) { 1583 const uint8_t *Data = 1584 reinterpret_cast<const uint8_t *>(Buffer.getBufferStart()); 1585 uint64_t Magic = decodeULEB128(Data); 1586 return Magic == SPMagic(); 1587 } 1588 1589 bool SampleProfileReaderExtBinary::hasFormat(const MemoryBuffer &Buffer) { 1590 const uint8_t *Data = 1591 reinterpret_cast<const uint8_t *>(Buffer.getBufferStart()); 1592 uint64_t Magic = decodeULEB128(Data); 1593 return Magic == SPMagic(SPF_Ext_Binary); 1594 } 1595 1596 std::error_code SampleProfileReaderGCC::skipNextWord() { 1597 uint32_t dummy; 1598 if (!GcovBuffer.readInt(dummy)) 1599 return sampleprof_error::truncated; 1600 return sampleprof_error::success; 1601 } 1602 1603 template <typename T> ErrorOr<T> SampleProfileReaderGCC::readNumber() { 1604 if (sizeof(T) <= sizeof(uint32_t)) { 1605 uint32_t Val; 1606 if (GcovBuffer.readInt(Val) && Val <= std::numeric_limits<T>::max()) 1607 return static_cast<T>(Val); 1608 } else if (sizeof(T) <= sizeof(uint64_t)) { 1609 uint64_t Val; 1610 if (GcovBuffer.readInt64(Val) && Val <= std::numeric_limits<T>::max()) 1611 return static_cast<T>(Val); 1612 } 1613 1614 std::error_code EC = sampleprof_error::malformed; 1615 reportError(0, EC.message()); 1616 return EC; 1617 } 1618 1619 ErrorOr<StringRef> SampleProfileReaderGCC::readString() { 1620 StringRef Str; 1621 if (!GcovBuffer.readString(Str)) 1622 return sampleprof_error::truncated; 1623 return Str; 1624 } 1625 1626 std::error_code SampleProfileReaderGCC::readHeader() { 1627 // Read the magic identifier. 1628 if (!GcovBuffer.readGCDAFormat()) 1629 return sampleprof_error::unrecognized_format; 1630 1631 // Read the version number. Note - the GCC reader does not validate this 1632 // version, but the profile creator generates v704. 1633 GCOV::GCOVVersion version; 1634 if (!GcovBuffer.readGCOVVersion(version)) 1635 return sampleprof_error::unrecognized_format; 1636 1637 if (version != GCOV::V407) 1638 return sampleprof_error::unsupported_version; 1639 1640 // Skip the empty integer. 1641 if (std::error_code EC = skipNextWord()) 1642 return EC; 1643 1644 return sampleprof_error::success; 1645 } 1646 1647 std::error_code SampleProfileReaderGCC::readSectionTag(uint32_t Expected) { 1648 uint32_t Tag; 1649 if (!GcovBuffer.readInt(Tag)) 1650 return sampleprof_error::truncated; 1651 1652 if (Tag != Expected) 1653 return sampleprof_error::malformed; 1654 1655 if (std::error_code EC = skipNextWord()) 1656 return EC; 1657 1658 return sampleprof_error::success; 1659 } 1660 1661 std::error_code SampleProfileReaderGCC::readNameTable() { 1662 if (std::error_code EC = readSectionTag(GCOVTagAFDOFileNames)) 1663 return EC; 1664 1665 uint32_t Size; 1666 if (!GcovBuffer.readInt(Size)) 1667 return sampleprof_error::truncated; 1668 1669 for (uint32_t I = 0; I < Size; ++I) { 1670 StringRef Str; 1671 if (!GcovBuffer.readString(Str)) 1672 return sampleprof_error::truncated; 1673 Names.push_back(std::string(Str)); 1674 } 1675 1676 return sampleprof_error::success; 1677 } 1678 1679 std::error_code SampleProfileReaderGCC::readFunctionProfiles() { 1680 if (std::error_code EC = readSectionTag(GCOVTagAFDOFunction)) 1681 return EC; 1682 1683 uint32_t NumFunctions; 1684 if (!GcovBuffer.readInt(NumFunctions)) 1685 return sampleprof_error::truncated; 1686 1687 InlineCallStack Stack; 1688 for (uint32_t I = 0; I < NumFunctions; ++I) 1689 if (std::error_code EC = readOneFunctionProfile(Stack, true, 0)) 1690 return EC; 1691 1692 computeSummary(); 1693 return sampleprof_error::success; 1694 } 1695 1696 std::error_code SampleProfileReaderGCC::readOneFunctionProfile( 1697 const InlineCallStack &InlineStack, bool Update, uint32_t Offset) { 1698 uint64_t HeadCount = 0; 1699 if (InlineStack.size() == 0) 1700 if (!GcovBuffer.readInt64(HeadCount)) 1701 return sampleprof_error::truncated; 1702 1703 uint32_t NameIdx; 1704 if (!GcovBuffer.readInt(NameIdx)) 1705 return sampleprof_error::truncated; 1706 1707 StringRef Name(Names[NameIdx]); 1708 1709 uint32_t NumPosCounts; 1710 if (!GcovBuffer.readInt(NumPosCounts)) 1711 return sampleprof_error::truncated; 1712 1713 uint32_t NumCallsites; 1714 if (!GcovBuffer.readInt(NumCallsites)) 1715 return sampleprof_error::truncated; 1716 1717 FunctionSamples *FProfile = nullptr; 1718 if (InlineStack.size() == 0) { 1719 // If this is a top function that we have already processed, do not 1720 // update its profile again. This happens in the presence of 1721 // function aliases. Since these aliases share the same function 1722 // body, there will be identical replicated profiles for the 1723 // original function. In this case, we simply not bother updating 1724 // the profile of the original function. 1725 FProfile = &Profiles[FunctionId(Name)]; 1726 FProfile->addHeadSamples(HeadCount); 1727 if (FProfile->getTotalSamples() > 0) 1728 Update = false; 1729 } else { 1730 // Otherwise, we are reading an inlined instance. The top of the 1731 // inline stack contains the profile of the caller. Insert this 1732 // callee in the caller's CallsiteMap. 1733 FunctionSamples *CallerProfile = InlineStack.front(); 1734 uint32_t LineOffset = Offset >> 16; 1735 uint32_t Discriminator = Offset & 0xffff; 1736 FProfile = &CallerProfile->functionSamplesAt( 1737 LineLocation(LineOffset, Discriminator))[FunctionId(Name)]; 1738 } 1739 FProfile->setFunction(FunctionId(Name)); 1740 1741 for (uint32_t I = 0; I < NumPosCounts; ++I) { 1742 uint32_t Offset; 1743 if (!GcovBuffer.readInt(Offset)) 1744 return sampleprof_error::truncated; 1745 1746 uint32_t NumTargets; 1747 if (!GcovBuffer.readInt(NumTargets)) 1748 return sampleprof_error::truncated; 1749 1750 uint64_t Count; 1751 if (!GcovBuffer.readInt64(Count)) 1752 return sampleprof_error::truncated; 1753 1754 // The line location is encoded in the offset as: 1755 // high 16 bits: line offset to the start of the function. 1756 // low 16 bits: discriminator. 1757 uint32_t LineOffset = Offset >> 16; 1758 uint32_t Discriminator = Offset & 0xffff; 1759 1760 InlineCallStack NewStack; 1761 NewStack.push_back(FProfile); 1762 llvm::append_range(NewStack, InlineStack); 1763 if (Update) { 1764 // Walk up the inline stack, adding the samples on this line to 1765 // the total sample count of the callers in the chain. 1766 for (auto *CallerProfile : NewStack) 1767 CallerProfile->addTotalSamples(Count); 1768 1769 // Update the body samples for the current profile. 1770 FProfile->addBodySamples(LineOffset, Discriminator, Count); 1771 } 1772 1773 // Process the list of functions called at an indirect call site. 1774 // These are all the targets that a function pointer (or virtual 1775 // function) resolved at runtime. 1776 for (uint32_t J = 0; J < NumTargets; J++) { 1777 uint32_t HistVal; 1778 if (!GcovBuffer.readInt(HistVal)) 1779 return sampleprof_error::truncated; 1780 1781 if (HistVal != HIST_TYPE_INDIR_CALL_TOPN) 1782 return sampleprof_error::malformed; 1783 1784 uint64_t TargetIdx; 1785 if (!GcovBuffer.readInt64(TargetIdx)) 1786 return sampleprof_error::truncated; 1787 StringRef TargetName(Names[TargetIdx]); 1788 1789 uint64_t TargetCount; 1790 if (!GcovBuffer.readInt64(TargetCount)) 1791 return sampleprof_error::truncated; 1792 1793 if (Update) 1794 FProfile->addCalledTargetSamples(LineOffset, Discriminator, 1795 FunctionId(TargetName), 1796 TargetCount); 1797 } 1798 } 1799 1800 // Process all the inlined callers into the current function. These 1801 // are all the callsites that were inlined into this function. 1802 for (uint32_t I = 0; I < NumCallsites; I++) { 1803 // The offset is encoded as: 1804 // high 16 bits: line offset to the start of the function. 1805 // low 16 bits: discriminator. 1806 uint32_t Offset; 1807 if (!GcovBuffer.readInt(Offset)) 1808 return sampleprof_error::truncated; 1809 InlineCallStack NewStack; 1810 NewStack.push_back(FProfile); 1811 llvm::append_range(NewStack, InlineStack); 1812 if (std::error_code EC = readOneFunctionProfile(NewStack, Update, Offset)) 1813 return EC; 1814 } 1815 1816 return sampleprof_error::success; 1817 } 1818 1819 /// Read a GCC AutoFDO profile. 1820 /// 1821 /// This format is generated by the Linux Perf conversion tool at 1822 /// https://github.com/google/autofdo. 1823 std::error_code SampleProfileReaderGCC::readImpl() { 1824 assert(!ProfileIsFSDisciminator && "Gcc profiles not support FSDisciminator"); 1825 // Read the string table. 1826 if (std::error_code EC = readNameTable()) 1827 return EC; 1828 1829 // Read the source profile. 1830 if (std::error_code EC = readFunctionProfiles()) 1831 return EC; 1832 1833 return sampleprof_error::success; 1834 } 1835 1836 bool SampleProfileReaderGCC::hasFormat(const MemoryBuffer &Buffer) { 1837 StringRef Magic(reinterpret_cast<const char *>(Buffer.getBufferStart())); 1838 return Magic == "adcg*704"; 1839 } 1840 1841 void SampleProfileReaderItaniumRemapper::applyRemapping(LLVMContext &Ctx) { 1842 // If the reader uses MD5 to represent string, we can't remap it because 1843 // we don't know what the original function names were. 1844 if (Reader.useMD5()) { 1845 Ctx.diagnose(DiagnosticInfoSampleProfile( 1846 Reader.getBuffer()->getBufferIdentifier(), 1847 "Profile data remapping cannot be applied to profile data " 1848 "using MD5 names (original mangled names are not available).", 1849 DS_Warning)); 1850 return; 1851 } 1852 1853 // CSSPGO-TODO: Remapper is not yet supported. 1854 // We will need to remap the entire context string. 1855 assert(Remappings && "should be initialized while creating remapper"); 1856 for (auto &Sample : Reader.getProfiles()) { 1857 DenseSet<FunctionId> NamesInSample; 1858 Sample.second.findAllNames(NamesInSample); 1859 for (auto &Name : NamesInSample) { 1860 StringRef NameStr = Name.stringRef(); 1861 if (auto Key = Remappings->insert(NameStr)) 1862 NameMap.insert({Key, NameStr}); 1863 } 1864 } 1865 1866 RemappingApplied = true; 1867 } 1868 1869 std::optional<StringRef> 1870 SampleProfileReaderItaniumRemapper::lookUpNameInProfile(StringRef Fname) { 1871 if (auto Key = Remappings->lookup(Fname)) { 1872 StringRef Result = NameMap.lookup(Key); 1873 if (!Result.empty()) 1874 return Result; 1875 } 1876 return std::nullopt; 1877 } 1878 1879 /// Prepare a memory buffer for the contents of \p Filename. 1880 /// 1881 /// \returns an error code indicating the status of the buffer. 1882 static ErrorOr<std::unique_ptr<MemoryBuffer>> 1883 setupMemoryBuffer(const Twine &Filename, vfs::FileSystem &FS) { 1884 auto BufferOrErr = Filename.str() == "-" ? MemoryBuffer::getSTDIN() 1885 : FS.getBufferForFile(Filename); 1886 if (std::error_code EC = BufferOrErr.getError()) 1887 return EC; 1888 auto Buffer = std::move(BufferOrErr.get()); 1889 1890 return std::move(Buffer); 1891 } 1892 1893 /// Create a sample profile reader based on the format of the input file. 1894 /// 1895 /// \param Filename The file to open. 1896 /// 1897 /// \param C The LLVM context to use to emit diagnostics. 1898 /// 1899 /// \param P The FSDiscriminatorPass. 1900 /// 1901 /// \param RemapFilename The file used for profile remapping. 1902 /// 1903 /// \returns an error code indicating the status of the created reader. 1904 ErrorOr<std::unique_ptr<SampleProfileReader>> 1905 SampleProfileReader::create(StringRef Filename, LLVMContext &C, 1906 vfs::FileSystem &FS, FSDiscriminatorPass P, 1907 StringRef RemapFilename) { 1908 auto BufferOrError = setupMemoryBuffer(Filename, FS); 1909 if (std::error_code EC = BufferOrError.getError()) 1910 return EC; 1911 return create(BufferOrError.get(), C, FS, P, RemapFilename); 1912 } 1913 1914 /// Create a sample profile remapper from the given input, to remap the 1915 /// function names in the given profile data. 1916 /// 1917 /// \param Filename The file to open. 1918 /// 1919 /// \param Reader The profile reader the remapper is going to be applied to. 1920 /// 1921 /// \param C The LLVM context to use to emit diagnostics. 1922 /// 1923 /// \returns an error code indicating the status of the created reader. 1924 ErrorOr<std::unique_ptr<SampleProfileReaderItaniumRemapper>> 1925 SampleProfileReaderItaniumRemapper::create(StringRef Filename, 1926 vfs::FileSystem &FS, 1927 SampleProfileReader &Reader, 1928 LLVMContext &C) { 1929 auto BufferOrError = setupMemoryBuffer(Filename, FS); 1930 if (std::error_code EC = BufferOrError.getError()) 1931 return EC; 1932 return create(BufferOrError.get(), Reader, C); 1933 } 1934 1935 /// Create a sample profile remapper from the given input, to remap the 1936 /// function names in the given profile data. 1937 /// 1938 /// \param B The memory buffer to create the reader from (assumes ownership). 1939 /// 1940 /// \param C The LLVM context to use to emit diagnostics. 1941 /// 1942 /// \param Reader The profile reader the remapper is going to be applied to. 1943 /// 1944 /// \returns an error code indicating the status of the created reader. 1945 ErrorOr<std::unique_ptr<SampleProfileReaderItaniumRemapper>> 1946 SampleProfileReaderItaniumRemapper::create(std::unique_ptr<MemoryBuffer> &B, 1947 SampleProfileReader &Reader, 1948 LLVMContext &C) { 1949 auto Remappings = std::make_unique<SymbolRemappingReader>(); 1950 if (Error E = Remappings->read(*B)) { 1951 handleAllErrors( 1952 std::move(E), [&](const SymbolRemappingParseError &ParseError) { 1953 C.diagnose(DiagnosticInfoSampleProfile(B->getBufferIdentifier(), 1954 ParseError.getLineNum(), 1955 ParseError.getMessage())); 1956 }); 1957 return sampleprof_error::malformed; 1958 } 1959 1960 return std::make_unique<SampleProfileReaderItaniumRemapper>( 1961 std::move(B), std::move(Remappings), Reader); 1962 } 1963 1964 /// Create a sample profile reader based on the format of the input data. 1965 /// 1966 /// \param B The memory buffer to create the reader from (assumes ownership). 1967 /// 1968 /// \param C The LLVM context to use to emit diagnostics. 1969 /// 1970 /// \param P The FSDiscriminatorPass. 1971 /// 1972 /// \param RemapFilename The file used for profile remapping. 1973 /// 1974 /// \returns an error code indicating the status of the created reader. 1975 ErrorOr<std::unique_ptr<SampleProfileReader>> 1976 SampleProfileReader::create(std::unique_ptr<MemoryBuffer> &B, LLVMContext &C, 1977 vfs::FileSystem &FS, FSDiscriminatorPass P, 1978 StringRef RemapFilename) { 1979 std::unique_ptr<SampleProfileReader> Reader; 1980 if (SampleProfileReaderRawBinary::hasFormat(*B)) 1981 Reader.reset(new SampleProfileReaderRawBinary(std::move(B), C)); 1982 else if (SampleProfileReaderExtBinary::hasFormat(*B)) 1983 Reader.reset(new SampleProfileReaderExtBinary(std::move(B), C)); 1984 else if (SampleProfileReaderGCC::hasFormat(*B)) 1985 Reader.reset(new SampleProfileReaderGCC(std::move(B), C)); 1986 else if (SampleProfileReaderText::hasFormat(*B)) 1987 Reader.reset(new SampleProfileReaderText(std::move(B), C)); 1988 else 1989 return sampleprof_error::unrecognized_format; 1990 1991 if (!RemapFilename.empty()) { 1992 auto ReaderOrErr = SampleProfileReaderItaniumRemapper::create( 1993 RemapFilename, FS, *Reader, C); 1994 if (std::error_code EC = ReaderOrErr.getError()) { 1995 std::string Msg = "Could not create remapper: " + EC.message(); 1996 C.diagnose(DiagnosticInfoSampleProfile(RemapFilename, Msg)); 1997 return EC; 1998 } 1999 Reader->Remapper = std::move(ReaderOrErr.get()); 2000 } 2001 2002 if (std::error_code EC = Reader->readHeader()) { 2003 return EC; 2004 } 2005 2006 Reader->setDiscriminatorMaskedBitFrom(P); 2007 2008 return std::move(Reader); 2009 } 2010 2011 // For text and GCC file formats, we compute the summary after reading the 2012 // profile. Binary format has the profile summary in its header. 2013 void SampleProfileReader::computeSummary() { 2014 SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs); 2015 Summary = Builder.computeSummaryForProfiles(Profiles); 2016 } 2017