1 //===- InstrProfReader.cpp - Instrumented profiling reader ----------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains support for reading profiling data for clang's 10 // instrumentation based PGO and coverage. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/ProfileData/InstrProfReader.h" 15 #include "llvm/ADT/ArrayRef.h" 16 #include "llvm/ADT/DenseMap.h" 17 #include "llvm/ADT/STLExtras.h" 18 #include "llvm/ADT/StringExtras.h" 19 #include "llvm/ADT/StringRef.h" 20 #include "llvm/IR/ProfileSummary.h" 21 #include "llvm/ProfileData/InstrProf.h" 22 #include "llvm/ProfileData/ProfileCommon.h" 23 #include "llvm/Support/Endian.h" 24 #include "llvm/Support/Error.h" 25 #include "llvm/Support/ErrorOr.h" 26 #include "llvm/Support/MemoryBuffer.h" 27 #include "llvm/Support/SwapByteOrder.h" 28 #include "llvm/Support/SymbolRemappingReader.h" 29 #include <algorithm> 30 #include <cctype> 31 #include <cstddef> 32 #include <cstdint> 33 #include <limits> 34 #include <memory> 35 #include <system_error> 36 #include <utility> 37 #include <vector> 38 39 using namespace llvm; 40 41 static Expected<std::unique_ptr<MemoryBuffer>> 42 setupMemoryBuffer(const Twine &Path) { 43 ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr = 44 MemoryBuffer::getFileOrSTDIN(Path, /*IsText=*/true); 45 if (std::error_code EC = BufferOrErr.getError()) 46 return errorCodeToError(EC); 47 return std::move(BufferOrErr.get()); 48 } 49 50 static Error initializeReader(InstrProfReader &Reader) { 51 return Reader.readHeader(); 52 } 53 54 Expected<std::unique_ptr<InstrProfReader>> 55 InstrProfReader::create(const Twine &Path) { 56 // Set up the buffer to read. 57 auto BufferOrError = setupMemoryBuffer(Path); 58 if (Error E = BufferOrError.takeError()) 59 return std::move(E); 60 return InstrProfReader::create(std::move(BufferOrError.get())); 61 } 62 63 Expected<std::unique_ptr<InstrProfReader>> 64 InstrProfReader::create(std::unique_ptr<MemoryBuffer> Buffer) { 65 if (uint64_t(Buffer->getBufferSize()) > std::numeric_limits<uint64_t>::max()) 66 return make_error<InstrProfError>(instrprof_error::too_large); 67 68 if (Buffer->getBufferSize() == 0) 69 return make_error<InstrProfError>(instrprof_error::empty_raw_profile); 70 71 std::unique_ptr<InstrProfReader> Result; 72 // Create the reader. 73 if (IndexedInstrProfReader::hasFormat(*Buffer)) 74 Result.reset(new IndexedInstrProfReader(std::move(Buffer))); 75 else if (RawInstrProfReader64::hasFormat(*Buffer)) 76 Result.reset(new RawInstrProfReader64(std::move(Buffer))); 77 else if (RawInstrProfReader32::hasFormat(*Buffer)) 78 Result.reset(new RawInstrProfReader32(std::move(Buffer))); 79 else if (TextInstrProfReader::hasFormat(*Buffer)) 80 Result.reset(new TextInstrProfReader(std::move(Buffer))); 81 else 82 return make_error<InstrProfError>(instrprof_error::unrecognized_format); 83 84 // Initialize the reader and return the result. 85 if (Error E = initializeReader(*Result)) 86 return std::move(E); 87 88 return std::move(Result); 89 } 90 91 Expected<std::unique_ptr<IndexedInstrProfReader>> 92 IndexedInstrProfReader::create(const Twine &Path, const Twine &RemappingPath) { 93 // Set up the buffer to read. 94 auto BufferOrError = setupMemoryBuffer(Path); 95 if (Error E = BufferOrError.takeError()) 96 return std::move(E); 97 98 // Set up the remapping buffer if requested. 99 std::unique_ptr<MemoryBuffer> RemappingBuffer; 100 std::string RemappingPathStr = RemappingPath.str(); 101 if (!RemappingPathStr.empty()) { 102 auto RemappingBufferOrError = setupMemoryBuffer(RemappingPathStr); 103 if (Error E = RemappingBufferOrError.takeError()) 104 return std::move(E); 105 RemappingBuffer = std::move(RemappingBufferOrError.get()); 106 } 107 108 return IndexedInstrProfReader::create(std::move(BufferOrError.get()), 109 std::move(RemappingBuffer)); 110 } 111 112 Expected<std::unique_ptr<IndexedInstrProfReader>> 113 IndexedInstrProfReader::create(std::unique_ptr<MemoryBuffer> Buffer, 114 std::unique_ptr<MemoryBuffer> RemappingBuffer) { 115 if (uint64_t(Buffer->getBufferSize()) > std::numeric_limits<uint64_t>::max()) 116 return make_error<InstrProfError>(instrprof_error::too_large); 117 118 // Create the reader. 119 if (!IndexedInstrProfReader::hasFormat(*Buffer)) 120 return make_error<InstrProfError>(instrprof_error::bad_magic); 121 auto Result = std::make_unique<IndexedInstrProfReader>( 122 std::move(Buffer), std::move(RemappingBuffer)); 123 124 // Initialize the reader and return the result. 125 if (Error E = initializeReader(*Result)) 126 return std::move(E); 127 128 return std::move(Result); 129 } 130 131 void InstrProfIterator::Increment() { 132 if (auto E = Reader->readNextRecord(Record)) { 133 // Handle errors in the reader. 134 InstrProfError::take(std::move(E)); 135 *this = InstrProfIterator(); 136 } 137 } 138 139 bool TextInstrProfReader::hasFormat(const MemoryBuffer &Buffer) { 140 // Verify that this really looks like plain ASCII text by checking a 141 // 'reasonable' number of characters (up to profile magic size). 142 size_t count = std::min(Buffer.getBufferSize(), sizeof(uint64_t)); 143 StringRef buffer = Buffer.getBufferStart(); 144 return count == 0 || 145 std::all_of(buffer.begin(), buffer.begin() + count, 146 [](char c) { return isPrint(c) || isSpace(c); }); 147 } 148 149 // Read the profile variant flag from the header: ":FE" means this is a FE 150 // generated profile. ":IR" means this is an IR level profile. Other strings 151 // with a leading ':' will be reported an error format. 152 Error TextInstrProfReader::readHeader() { 153 Symtab.reset(new InstrProfSymtab()); 154 bool IsIRInstr = false; 155 bool IsEntryFirst = false; 156 bool IsCS = false; 157 158 while (Line->startswith(":")) { 159 StringRef Str = Line->substr(1); 160 if (Str.equals_insensitive("ir")) 161 IsIRInstr = true; 162 else if (Str.equals_insensitive("fe")) 163 IsIRInstr = false; 164 else if (Str.equals_insensitive("csir")) { 165 IsIRInstr = true; 166 IsCS = true; 167 } else if (Str.equals_insensitive("entry_first")) 168 IsEntryFirst = true; 169 else if (Str.equals_insensitive("not_entry_first")) 170 IsEntryFirst = false; 171 else 172 return error(instrprof_error::bad_header); 173 ++Line; 174 } 175 IsIRLevelProfile = IsIRInstr; 176 InstrEntryBBEnabled = IsEntryFirst; 177 HasCSIRLevelProfile = IsCS; 178 return success(); 179 } 180 181 Error 182 TextInstrProfReader::readValueProfileData(InstrProfRecord &Record) { 183 184 #define CHECK_LINE_END(Line) \ 185 if (Line.is_at_end()) \ 186 return error(instrprof_error::truncated); 187 #define READ_NUM(Str, Dst) \ 188 if ((Str).getAsInteger(10, (Dst))) \ 189 return error(instrprof_error::malformed); 190 #define VP_READ_ADVANCE(Val) \ 191 CHECK_LINE_END(Line); \ 192 uint32_t Val; \ 193 READ_NUM((*Line), (Val)); \ 194 Line++; 195 196 if (Line.is_at_end()) 197 return success(); 198 199 uint32_t NumValueKinds; 200 if (Line->getAsInteger(10, NumValueKinds)) { 201 // No value profile data 202 return success(); 203 } 204 if (NumValueKinds == 0 || NumValueKinds > IPVK_Last + 1) 205 return error(instrprof_error::malformed, 206 "number of value kinds is invalid"); 207 Line++; 208 209 for (uint32_t VK = 0; VK < NumValueKinds; VK++) { 210 VP_READ_ADVANCE(ValueKind); 211 if (ValueKind > IPVK_Last) 212 return error(instrprof_error::malformed, "value kind is invalid"); 213 ; 214 VP_READ_ADVANCE(NumValueSites); 215 if (!NumValueSites) 216 continue; 217 218 Record.reserveSites(VK, NumValueSites); 219 for (uint32_t S = 0; S < NumValueSites; S++) { 220 VP_READ_ADVANCE(NumValueData); 221 222 std::vector<InstrProfValueData> CurrentValues; 223 for (uint32_t V = 0; V < NumValueData; V++) { 224 CHECK_LINE_END(Line); 225 std::pair<StringRef, StringRef> VD = Line->rsplit(':'); 226 uint64_t TakenCount, Value; 227 if (ValueKind == IPVK_IndirectCallTarget) { 228 if (InstrProfSymtab::isExternalSymbol(VD.first)) { 229 Value = 0; 230 } else { 231 if (Error E = Symtab->addFuncName(VD.first)) 232 return E; 233 Value = IndexedInstrProf::ComputeHash(VD.first); 234 } 235 } else { 236 READ_NUM(VD.first, Value); 237 } 238 READ_NUM(VD.second, TakenCount); 239 CurrentValues.push_back({Value, TakenCount}); 240 Line++; 241 } 242 Record.addValueData(ValueKind, S, CurrentValues.data(), NumValueData, 243 nullptr); 244 } 245 } 246 return success(); 247 248 #undef CHECK_LINE_END 249 #undef READ_NUM 250 #undef VP_READ_ADVANCE 251 } 252 253 Error TextInstrProfReader::readNextRecord(NamedInstrProfRecord &Record) { 254 // Skip empty lines and comments. 255 while (!Line.is_at_end() && (Line->empty() || Line->startswith("#"))) 256 ++Line; 257 // If we hit EOF while looking for a name, we're done. 258 if (Line.is_at_end()) { 259 return error(instrprof_error::eof); 260 } 261 262 // Read the function name. 263 Record.Name = *Line++; 264 if (Error E = Symtab->addFuncName(Record.Name)) 265 return error(std::move(E)); 266 267 // Read the function hash. 268 if (Line.is_at_end()) 269 return error(instrprof_error::truncated); 270 if ((Line++)->getAsInteger(0, Record.Hash)) 271 return error(instrprof_error::malformed, 272 "function hash is not a valid integer"); 273 274 // Read the number of counters. 275 uint64_t NumCounters; 276 if (Line.is_at_end()) 277 return error(instrprof_error::truncated); 278 if ((Line++)->getAsInteger(10, NumCounters)) 279 return error(instrprof_error::malformed, 280 "number of counters is not a valid integer"); 281 if (NumCounters == 0) 282 return error(instrprof_error::malformed, "number of counters is zero"); 283 284 // Read each counter and fill our internal storage with the values. 285 Record.Clear(); 286 Record.Counts.reserve(NumCounters); 287 for (uint64_t I = 0; I < NumCounters; ++I) { 288 if (Line.is_at_end()) 289 return error(instrprof_error::truncated); 290 uint64_t Count; 291 if ((Line++)->getAsInteger(10, Count)) 292 return error(instrprof_error::malformed, "count is invalid"); 293 Record.Counts.push_back(Count); 294 } 295 296 // Check if value profile data exists and read it if so. 297 if (Error E = readValueProfileData(Record)) 298 return error(std::move(E)); 299 300 return success(); 301 } 302 303 template <class IntPtrT> 304 bool RawInstrProfReader<IntPtrT>::hasFormat(const MemoryBuffer &DataBuffer) { 305 if (DataBuffer.getBufferSize() < sizeof(uint64_t)) 306 return false; 307 uint64_t Magic = 308 *reinterpret_cast<const uint64_t *>(DataBuffer.getBufferStart()); 309 return RawInstrProf::getMagic<IntPtrT>() == Magic || 310 sys::getSwappedBytes(RawInstrProf::getMagic<IntPtrT>()) == Magic; 311 } 312 313 template <class IntPtrT> 314 Error RawInstrProfReader<IntPtrT>::readHeader() { 315 if (!hasFormat(*DataBuffer)) 316 return error(instrprof_error::bad_magic); 317 if (DataBuffer->getBufferSize() < sizeof(RawInstrProf::Header)) 318 return error(instrprof_error::bad_header); 319 auto *Header = reinterpret_cast<const RawInstrProf::Header *>( 320 DataBuffer->getBufferStart()); 321 ShouldSwapBytes = Header->Magic != RawInstrProf::getMagic<IntPtrT>(); 322 return readHeader(*Header); 323 } 324 325 template <class IntPtrT> 326 Error RawInstrProfReader<IntPtrT>::readNextHeader(const char *CurrentPos) { 327 const char *End = DataBuffer->getBufferEnd(); 328 // Skip zero padding between profiles. 329 while (CurrentPos != End && *CurrentPos == 0) 330 ++CurrentPos; 331 // If there's nothing left, we're done. 332 if (CurrentPos == End) 333 return make_error<InstrProfError>(instrprof_error::eof); 334 // If there isn't enough space for another header, this is probably just 335 // garbage at the end of the file. 336 if (CurrentPos + sizeof(RawInstrProf::Header) > End) 337 return make_error<InstrProfError>(instrprof_error::malformed, 338 "not enough space for another header"); 339 // The writer ensures each profile is padded to start at an aligned address. 340 if (reinterpret_cast<size_t>(CurrentPos) % alignof(uint64_t)) 341 return make_error<InstrProfError>(instrprof_error::malformed, 342 "insufficient padding"); 343 // The magic should have the same byte order as in the previous header. 344 uint64_t Magic = *reinterpret_cast<const uint64_t *>(CurrentPos); 345 if (Magic != swap(RawInstrProf::getMagic<IntPtrT>())) 346 return make_error<InstrProfError>(instrprof_error::bad_magic); 347 348 // There's another profile to read, so we need to process the header. 349 auto *Header = reinterpret_cast<const RawInstrProf::Header *>(CurrentPos); 350 return readHeader(*Header); 351 } 352 353 template <class IntPtrT> 354 Error RawInstrProfReader<IntPtrT>::createSymtab(InstrProfSymtab &Symtab) { 355 if (Error E = Symtab.create(StringRef(NamesStart, NamesSize))) 356 return error(std::move(E)); 357 for (const RawInstrProf::ProfileData<IntPtrT> *I = Data; I != DataEnd; ++I) { 358 const IntPtrT FPtr = swap(I->FunctionPointer); 359 if (!FPtr) 360 continue; 361 Symtab.mapAddress(FPtr, I->NameRef); 362 } 363 return success(); 364 } 365 366 template <class IntPtrT> 367 Error RawInstrProfReader<IntPtrT>::readHeader( 368 const RawInstrProf::Header &Header) { 369 Version = swap(Header.Version); 370 if (GET_VERSION(Version) != RawInstrProf::Version) 371 return error(instrprof_error::unsupported_version); 372 373 BinaryIdsSize = swap(Header.BinaryIdsSize); 374 if (BinaryIdsSize % sizeof(uint64_t)) 375 return error(instrprof_error::bad_header); 376 377 CountersDelta = swap(Header.CountersDelta); 378 NamesDelta = swap(Header.NamesDelta); 379 auto DataSize = swap(Header.DataSize); 380 auto PaddingBytesBeforeCounters = swap(Header.PaddingBytesBeforeCounters); 381 auto CountersSize = swap(Header.CountersSize); 382 auto PaddingBytesAfterCounters = swap(Header.PaddingBytesAfterCounters); 383 NamesSize = swap(Header.NamesSize); 384 ValueKindLast = swap(Header.ValueKindLast); 385 386 auto DataSizeInBytes = DataSize * sizeof(RawInstrProf::ProfileData<IntPtrT>); 387 auto PaddingSize = getNumPaddingBytes(NamesSize); 388 389 // Profile data starts after profile header and binary ids if exist. 390 ptrdiff_t DataOffset = sizeof(RawInstrProf::Header) + BinaryIdsSize; 391 ptrdiff_t CountersOffset = 392 DataOffset + DataSizeInBytes + PaddingBytesBeforeCounters; 393 ptrdiff_t NamesOffset = CountersOffset + (sizeof(uint64_t) * CountersSize) + 394 PaddingBytesAfterCounters; 395 ptrdiff_t ValueDataOffset = NamesOffset + NamesSize + PaddingSize; 396 397 auto *Start = reinterpret_cast<const char *>(&Header); 398 if (Start + ValueDataOffset > DataBuffer->getBufferEnd()) 399 return error(instrprof_error::bad_header); 400 401 Data = reinterpret_cast<const RawInstrProf::ProfileData<IntPtrT> *>( 402 Start + DataOffset); 403 DataEnd = Data + DataSize; 404 405 // Binary ids start just after the header. 406 BinaryIdsStart = 407 reinterpret_cast<const uint8_t *>(&Header) + sizeof(RawInstrProf::Header); 408 CountersStart = reinterpret_cast<const uint64_t *>(Start + CountersOffset); 409 NamesStart = Start + NamesOffset; 410 ValueDataStart = reinterpret_cast<const uint8_t *>(Start + ValueDataOffset); 411 412 const uint8_t *BufferEnd = (const uint8_t *)DataBuffer->getBufferEnd(); 413 if (BinaryIdsStart + BinaryIdsSize > BufferEnd) 414 return error(instrprof_error::bad_header); 415 416 std::unique_ptr<InstrProfSymtab> NewSymtab = std::make_unique<InstrProfSymtab>(); 417 if (Error E = createSymtab(*NewSymtab.get())) 418 return E; 419 420 Symtab = std::move(NewSymtab); 421 return success(); 422 } 423 424 template <class IntPtrT> 425 Error RawInstrProfReader<IntPtrT>::readName(NamedInstrProfRecord &Record) { 426 Record.Name = getName(Data->NameRef); 427 return success(); 428 } 429 430 template <class IntPtrT> 431 Error RawInstrProfReader<IntPtrT>::readFuncHash(NamedInstrProfRecord &Record) { 432 Record.Hash = swap(Data->FuncHash); 433 return success(); 434 } 435 436 template <class IntPtrT> 437 Error RawInstrProfReader<IntPtrT>::readRawCounts( 438 InstrProfRecord &Record) { 439 uint32_t NumCounters = swap(Data->NumCounters); 440 if (NumCounters == 0) 441 return error(instrprof_error::malformed, "number of counters is zero"); 442 443 IntPtrT CounterPtr = Data->CounterPtr; 444 auto *NamesStartAsCounter = reinterpret_cast<const uint64_t *>(NamesStart); 445 ptrdiff_t MaxNumCounters = NamesStartAsCounter - CountersStart; 446 447 // Check bounds. Note that the counter pointer embedded in the data record 448 // may itself be corrupt. 449 if (MaxNumCounters < 0 || NumCounters > (uint32_t)MaxNumCounters) 450 return error(instrprof_error::malformed, 451 "counter pointer is out of bounds"); 452 453 // We need to compute the in-buffer counter offset from the in-memory address 454 // distance. The initial CountersDelta is the in-memory address difference 455 // start(__llvm_prf_cnts)-start(__llvm_prf_data), so SrcData->CounterPtr - 456 // CountersDelta computes the offset into the in-buffer counter section. 457 // 458 // CountersDelta decreases as we advance to the next data record. 459 ptrdiff_t CounterOffset = getCounterOffset(CounterPtr); 460 CountersDelta -= sizeof(*Data); 461 if (CounterOffset < 0) 462 return error( 463 instrprof_error::malformed, 464 ("counter offset " + Twine(CounterOffset) + " is negative").str()); 465 466 if (CounterOffset > MaxNumCounters) 467 return error(instrprof_error::malformed, 468 ("counter offset " + Twine(CounterOffset) + 469 " is greater than the maximum number of counters " + 470 Twine((uint32_t)MaxNumCounters)) 471 .str()); 472 473 if (((uint32_t)CounterOffset + NumCounters) > (uint32_t)MaxNumCounters) 474 return error(instrprof_error::malformed, 475 ("number of counters " + 476 Twine(((uint32_t)CounterOffset + NumCounters)) + 477 " is greater than the maximum number of counters " + 478 Twine((uint32_t)MaxNumCounters)) 479 .str()); 480 481 auto RawCounts = makeArrayRef(getCounter(CounterOffset), NumCounters); 482 483 if (ShouldSwapBytes) { 484 Record.Counts.clear(); 485 Record.Counts.reserve(RawCounts.size()); 486 for (uint64_t Count : RawCounts) 487 Record.Counts.push_back(swap(Count)); 488 } else 489 Record.Counts = RawCounts; 490 491 return success(); 492 } 493 494 template <class IntPtrT> 495 Error RawInstrProfReader<IntPtrT>::readValueProfilingData( 496 InstrProfRecord &Record) { 497 Record.clearValueData(); 498 CurValueDataSize = 0; 499 // Need to match the logic in value profile dumper code in compiler-rt: 500 uint32_t NumValueKinds = 0; 501 for (uint32_t I = 0; I < IPVK_Last + 1; I++) 502 NumValueKinds += (Data->NumValueSites[I] != 0); 503 504 if (!NumValueKinds) 505 return success(); 506 507 Expected<std::unique_ptr<ValueProfData>> VDataPtrOrErr = 508 ValueProfData::getValueProfData( 509 ValueDataStart, (const unsigned char *)DataBuffer->getBufferEnd(), 510 getDataEndianness()); 511 512 if (Error E = VDataPtrOrErr.takeError()) 513 return E; 514 515 // Note that besides deserialization, this also performs the conversion for 516 // indirect call targets. The function pointers from the raw profile are 517 // remapped into function name hashes. 518 VDataPtrOrErr.get()->deserializeTo(Record, Symtab.get()); 519 CurValueDataSize = VDataPtrOrErr.get()->getSize(); 520 return success(); 521 } 522 523 template <class IntPtrT> 524 Error RawInstrProfReader<IntPtrT>::readNextRecord(NamedInstrProfRecord &Record) { 525 if (atEnd()) 526 // At this point, ValueDataStart field points to the next header. 527 if (Error E = readNextHeader(getNextHeaderPos())) 528 return error(std::move(E)); 529 530 // Read name ad set it in Record. 531 if (Error E = readName(Record)) 532 return error(std::move(E)); 533 534 // Read FuncHash and set it in Record. 535 if (Error E = readFuncHash(Record)) 536 return error(std::move(E)); 537 538 // Read raw counts and set Record. 539 if (Error E = readRawCounts(Record)) 540 return error(std::move(E)); 541 542 // Read value data and set Record. 543 if (Error E = readValueProfilingData(Record)) 544 return error(std::move(E)); 545 546 // Iterate. 547 advanceData(); 548 return success(); 549 } 550 551 static size_t RoundUp(size_t size, size_t align) { 552 return (size + align - 1) & ~(align - 1); 553 } 554 555 template <class IntPtrT> 556 Error RawInstrProfReader<IntPtrT>::printBinaryIds(raw_ostream &OS) { 557 if (BinaryIdsSize == 0) 558 return success(); 559 560 OS << "Binary IDs: \n"; 561 const uint8_t *BI = BinaryIdsStart; 562 const uint8_t *BIEnd = BinaryIdsStart + BinaryIdsSize; 563 while (BI < BIEnd) { 564 size_t Remaining = BIEnd - BI; 565 566 // There should be enough left to read the binary ID size field. 567 if (Remaining < sizeof(uint64_t)) 568 return make_error<InstrProfError>( 569 instrprof_error::malformed, 570 "not enough data to read binary id length"); 571 572 uint64_t BinaryIdLen = swap(*reinterpret_cast<const uint64_t *>(BI)); 573 574 // There should be enough left to read the binary ID size field, and the 575 // binary ID. 576 if (Remaining < sizeof(BinaryIdLen) + BinaryIdLen) 577 return make_error<InstrProfError>( 578 instrprof_error::malformed, "not enough data to read binary id data"); 579 580 // Increment by binary id length data type size. 581 BI += sizeof(BinaryIdLen); 582 if (BI > (const uint8_t *)DataBuffer->getBufferEnd()) 583 return make_error<InstrProfError>( 584 instrprof_error::malformed, 585 "binary id that is read is bigger than buffer size"); 586 587 for (uint64_t I = 0; I < BinaryIdLen; I++) 588 OS << format("%02x", BI[I]); 589 OS << "\n"; 590 591 // Increment by binary id data length, rounded to the next 8 bytes. This 592 // accounts for the zero-padding after each build ID. 593 BI += RoundUp(BinaryIdLen, sizeof(uint64_t)); 594 if (BI > (const uint8_t *)DataBuffer->getBufferEnd()) 595 return make_error<InstrProfError>(instrprof_error::malformed); 596 } 597 598 return success(); 599 } 600 601 namespace llvm { 602 603 template class RawInstrProfReader<uint32_t>; 604 template class RawInstrProfReader<uint64_t>; 605 606 } // end namespace llvm 607 608 InstrProfLookupTrait::hash_value_type 609 InstrProfLookupTrait::ComputeHash(StringRef K) { 610 return IndexedInstrProf::ComputeHash(HashType, K); 611 } 612 613 using data_type = InstrProfLookupTrait::data_type; 614 using offset_type = InstrProfLookupTrait::offset_type; 615 616 bool InstrProfLookupTrait::readValueProfilingData( 617 const unsigned char *&D, const unsigned char *const End) { 618 Expected<std::unique_ptr<ValueProfData>> VDataPtrOrErr = 619 ValueProfData::getValueProfData(D, End, ValueProfDataEndianness); 620 621 if (VDataPtrOrErr.takeError()) 622 return false; 623 624 VDataPtrOrErr.get()->deserializeTo(DataBuffer.back(), nullptr); 625 D += VDataPtrOrErr.get()->TotalSize; 626 627 return true; 628 } 629 630 data_type InstrProfLookupTrait::ReadData(StringRef K, const unsigned char *D, 631 offset_type N) { 632 using namespace support; 633 634 // Check if the data is corrupt. If so, don't try to read it. 635 if (N % sizeof(uint64_t)) 636 return data_type(); 637 638 DataBuffer.clear(); 639 std::vector<uint64_t> CounterBuffer; 640 641 const unsigned char *End = D + N; 642 while (D < End) { 643 // Read hash. 644 if (D + sizeof(uint64_t) >= End) 645 return data_type(); 646 uint64_t Hash = endian::readNext<uint64_t, little, unaligned>(D); 647 648 // Initialize number of counters for GET_VERSION(FormatVersion) == 1. 649 uint64_t CountsSize = N / sizeof(uint64_t) - 1; 650 // If format version is different then read the number of counters. 651 if (GET_VERSION(FormatVersion) != IndexedInstrProf::ProfVersion::Version1) { 652 if (D + sizeof(uint64_t) > End) 653 return data_type(); 654 CountsSize = endian::readNext<uint64_t, little, unaligned>(D); 655 } 656 // Read counter values. 657 if (D + CountsSize * sizeof(uint64_t) > End) 658 return data_type(); 659 660 CounterBuffer.clear(); 661 CounterBuffer.reserve(CountsSize); 662 for (uint64_t J = 0; J < CountsSize; ++J) 663 CounterBuffer.push_back(endian::readNext<uint64_t, little, unaligned>(D)); 664 665 DataBuffer.emplace_back(K, Hash, std::move(CounterBuffer)); 666 667 // Read value profiling data. 668 if (GET_VERSION(FormatVersion) > IndexedInstrProf::ProfVersion::Version2 && 669 !readValueProfilingData(D, End)) { 670 DataBuffer.clear(); 671 return data_type(); 672 } 673 } 674 return DataBuffer; 675 } 676 677 template <typename HashTableImpl> 678 Error InstrProfReaderIndex<HashTableImpl>::getRecords( 679 StringRef FuncName, ArrayRef<NamedInstrProfRecord> &Data) { 680 auto Iter = HashTable->find(FuncName); 681 if (Iter == HashTable->end()) 682 return make_error<InstrProfError>(instrprof_error::unknown_function); 683 684 Data = (*Iter); 685 if (Data.empty()) 686 return make_error<InstrProfError>(instrprof_error::malformed, 687 "profile data is empty"); 688 689 return Error::success(); 690 } 691 692 template <typename HashTableImpl> 693 Error InstrProfReaderIndex<HashTableImpl>::getRecords( 694 ArrayRef<NamedInstrProfRecord> &Data) { 695 if (atEnd()) 696 return make_error<InstrProfError>(instrprof_error::eof); 697 698 Data = *RecordIterator; 699 700 if (Data.empty()) 701 return make_error<InstrProfError>(instrprof_error::malformed, 702 "profile data is empty"); 703 704 return Error::success(); 705 } 706 707 template <typename HashTableImpl> 708 InstrProfReaderIndex<HashTableImpl>::InstrProfReaderIndex( 709 const unsigned char *Buckets, const unsigned char *const Payload, 710 const unsigned char *const Base, IndexedInstrProf::HashT HashType, 711 uint64_t Version) { 712 FormatVersion = Version; 713 HashTable.reset(HashTableImpl::Create( 714 Buckets, Payload, Base, 715 typename HashTableImpl::InfoType(HashType, Version))); 716 RecordIterator = HashTable->data_begin(); 717 } 718 719 namespace { 720 /// A remapper that does not apply any remappings. 721 class InstrProfReaderNullRemapper : public InstrProfReaderRemapper { 722 InstrProfReaderIndexBase &Underlying; 723 724 public: 725 InstrProfReaderNullRemapper(InstrProfReaderIndexBase &Underlying) 726 : Underlying(Underlying) {} 727 728 Error getRecords(StringRef FuncName, 729 ArrayRef<NamedInstrProfRecord> &Data) override { 730 return Underlying.getRecords(FuncName, Data); 731 } 732 }; 733 } // namespace 734 735 /// A remapper that applies remappings based on a symbol remapping file. 736 template <typename HashTableImpl> 737 class llvm::InstrProfReaderItaniumRemapper 738 : public InstrProfReaderRemapper { 739 public: 740 InstrProfReaderItaniumRemapper( 741 std::unique_ptr<MemoryBuffer> RemapBuffer, 742 InstrProfReaderIndex<HashTableImpl> &Underlying) 743 : RemapBuffer(std::move(RemapBuffer)), Underlying(Underlying) { 744 } 745 746 /// Extract the original function name from a PGO function name. 747 static StringRef extractName(StringRef Name) { 748 // We can have multiple :-separated pieces; there can be pieces both 749 // before and after the mangled name. Find the first part that starts 750 // with '_Z'; we'll assume that's the mangled name we want. 751 std::pair<StringRef, StringRef> Parts = {StringRef(), Name}; 752 while (true) { 753 Parts = Parts.second.split(':'); 754 if (Parts.first.startswith("_Z")) 755 return Parts.first; 756 if (Parts.second.empty()) 757 return Name; 758 } 759 } 760 761 /// Given a mangled name extracted from a PGO function name, and a new 762 /// form for that mangled name, reconstitute the name. 763 static void reconstituteName(StringRef OrigName, StringRef ExtractedName, 764 StringRef Replacement, 765 SmallVectorImpl<char> &Out) { 766 Out.reserve(OrigName.size() + Replacement.size() - ExtractedName.size()); 767 Out.insert(Out.end(), OrigName.begin(), ExtractedName.begin()); 768 Out.insert(Out.end(), Replacement.begin(), Replacement.end()); 769 Out.insert(Out.end(), ExtractedName.end(), OrigName.end()); 770 } 771 772 Error populateRemappings() override { 773 if (Error E = Remappings.read(*RemapBuffer)) 774 return E; 775 for (StringRef Name : Underlying.HashTable->keys()) { 776 StringRef RealName = extractName(Name); 777 if (auto Key = Remappings.insert(RealName)) { 778 // FIXME: We could theoretically map the same equivalence class to 779 // multiple names in the profile data. If that happens, we should 780 // return NamedInstrProfRecords from all of them. 781 MappedNames.insert({Key, RealName}); 782 } 783 } 784 return Error::success(); 785 } 786 787 Error getRecords(StringRef FuncName, 788 ArrayRef<NamedInstrProfRecord> &Data) override { 789 StringRef RealName = extractName(FuncName); 790 if (auto Key = Remappings.lookup(RealName)) { 791 StringRef Remapped = MappedNames.lookup(Key); 792 if (!Remapped.empty()) { 793 if (RealName.begin() == FuncName.begin() && 794 RealName.end() == FuncName.end()) 795 FuncName = Remapped; 796 else { 797 // Try rebuilding the name from the given remapping. 798 SmallString<256> Reconstituted; 799 reconstituteName(FuncName, RealName, Remapped, Reconstituted); 800 Error E = Underlying.getRecords(Reconstituted, Data); 801 if (!E) 802 return E; 803 804 // If we failed because the name doesn't exist, fall back to asking 805 // about the original name. 806 if (Error Unhandled = handleErrors( 807 std::move(E), [](std::unique_ptr<InstrProfError> Err) { 808 return Err->get() == instrprof_error::unknown_function 809 ? Error::success() 810 : Error(std::move(Err)); 811 })) 812 return Unhandled; 813 } 814 } 815 } 816 return Underlying.getRecords(FuncName, Data); 817 } 818 819 private: 820 /// The memory buffer containing the remapping configuration. Remappings 821 /// holds pointers into this buffer. 822 std::unique_ptr<MemoryBuffer> RemapBuffer; 823 824 /// The mangling remapper. 825 SymbolRemappingReader Remappings; 826 827 /// Mapping from mangled name keys to the name used for the key in the 828 /// profile data. 829 /// FIXME: Can we store a location within the on-disk hash table instead of 830 /// redoing lookup? 831 DenseMap<SymbolRemappingReader::Key, StringRef> MappedNames; 832 833 /// The real profile data reader. 834 InstrProfReaderIndex<HashTableImpl> &Underlying; 835 }; 836 837 bool IndexedInstrProfReader::hasFormat(const MemoryBuffer &DataBuffer) { 838 using namespace support; 839 840 if (DataBuffer.getBufferSize() < 8) 841 return false; 842 uint64_t Magic = 843 endian::read<uint64_t, little, aligned>(DataBuffer.getBufferStart()); 844 // Verify that it's magical. 845 return Magic == IndexedInstrProf::Magic; 846 } 847 848 const unsigned char * 849 IndexedInstrProfReader::readSummary(IndexedInstrProf::ProfVersion Version, 850 const unsigned char *Cur, bool UseCS) { 851 using namespace IndexedInstrProf; 852 using namespace support; 853 854 if (Version >= IndexedInstrProf::Version4) { 855 const IndexedInstrProf::Summary *SummaryInLE = 856 reinterpret_cast<const IndexedInstrProf::Summary *>(Cur); 857 uint64_t NFields = 858 endian::byte_swap<uint64_t, little>(SummaryInLE->NumSummaryFields); 859 uint64_t NEntries = 860 endian::byte_swap<uint64_t, little>(SummaryInLE->NumCutoffEntries); 861 uint32_t SummarySize = 862 IndexedInstrProf::Summary::getSize(NFields, NEntries); 863 std::unique_ptr<IndexedInstrProf::Summary> SummaryData = 864 IndexedInstrProf::allocSummary(SummarySize); 865 866 const uint64_t *Src = reinterpret_cast<const uint64_t *>(SummaryInLE); 867 uint64_t *Dst = reinterpret_cast<uint64_t *>(SummaryData.get()); 868 for (unsigned I = 0; I < SummarySize / sizeof(uint64_t); I++) 869 Dst[I] = endian::byte_swap<uint64_t, little>(Src[I]); 870 871 SummaryEntryVector DetailedSummary; 872 for (unsigned I = 0; I < SummaryData->NumCutoffEntries; I++) { 873 const IndexedInstrProf::Summary::Entry &Ent = SummaryData->getEntry(I); 874 DetailedSummary.emplace_back((uint32_t)Ent.Cutoff, Ent.MinBlockCount, 875 Ent.NumBlocks); 876 } 877 std::unique_ptr<llvm::ProfileSummary> &Summary = 878 UseCS ? this->CS_Summary : this->Summary; 879 880 // initialize InstrProfSummary using the SummaryData from disk. 881 Summary = std::make_unique<ProfileSummary>( 882 UseCS ? ProfileSummary::PSK_CSInstr : ProfileSummary::PSK_Instr, 883 DetailedSummary, SummaryData->get(Summary::TotalBlockCount), 884 SummaryData->get(Summary::MaxBlockCount), 885 SummaryData->get(Summary::MaxInternalBlockCount), 886 SummaryData->get(Summary::MaxFunctionCount), 887 SummaryData->get(Summary::TotalNumBlocks), 888 SummaryData->get(Summary::TotalNumFunctions)); 889 return Cur + SummarySize; 890 } else { 891 // The older versions do not support a profile summary. This just computes 892 // an empty summary, which will not result in accurate hot/cold detection. 893 // We would need to call addRecord for all NamedInstrProfRecords to get the 894 // correct summary. However, this version is old (prior to early 2016) and 895 // has not been supporting an accurate summary for several years. 896 InstrProfSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs); 897 Summary = Builder.getSummary(); 898 return Cur; 899 } 900 } 901 902 Error IndexedInstrProfReader::readHeader() { 903 using namespace support; 904 905 const unsigned char *Start = 906 (const unsigned char *)DataBuffer->getBufferStart(); 907 const unsigned char *Cur = Start; 908 if ((const unsigned char *)DataBuffer->getBufferEnd() - Cur < 24) 909 return error(instrprof_error::truncated); 910 911 auto *Header = reinterpret_cast<const IndexedInstrProf::Header *>(Cur); 912 Cur += sizeof(IndexedInstrProf::Header); 913 914 // Check the magic number. 915 uint64_t Magic = endian::byte_swap<uint64_t, little>(Header->Magic); 916 if (Magic != IndexedInstrProf::Magic) 917 return error(instrprof_error::bad_magic); 918 919 // Read the version. 920 uint64_t FormatVersion = endian::byte_swap<uint64_t, little>(Header->Version); 921 if (GET_VERSION(FormatVersion) > 922 IndexedInstrProf::ProfVersion::CurrentVersion) 923 return error(instrprof_error::unsupported_version); 924 925 Cur = readSummary((IndexedInstrProf::ProfVersion)FormatVersion, Cur, 926 /* UseCS */ false); 927 if (FormatVersion & VARIANT_MASK_CSIR_PROF) 928 Cur = readSummary((IndexedInstrProf::ProfVersion)FormatVersion, Cur, 929 /* UseCS */ true); 930 931 // Read the hash type and start offset. 932 IndexedInstrProf::HashT HashType = static_cast<IndexedInstrProf::HashT>( 933 endian::byte_swap<uint64_t, little>(Header->HashType)); 934 if (HashType > IndexedInstrProf::HashT::Last) 935 return error(instrprof_error::unsupported_hash_type); 936 937 uint64_t HashOffset = endian::byte_swap<uint64_t, little>(Header->HashOffset); 938 939 // The rest of the file is an on disk hash table. 940 auto IndexPtr = 941 std::make_unique<InstrProfReaderIndex<OnDiskHashTableImplV3>>( 942 Start + HashOffset, Cur, Start, HashType, FormatVersion); 943 944 // Load the remapping table now if requested. 945 if (RemappingBuffer) { 946 Remapper = std::make_unique< 947 InstrProfReaderItaniumRemapper<OnDiskHashTableImplV3>>( 948 std::move(RemappingBuffer), *IndexPtr); 949 if (Error E = Remapper->populateRemappings()) 950 return E; 951 } else { 952 Remapper = std::make_unique<InstrProfReaderNullRemapper>(*IndexPtr); 953 } 954 Index = std::move(IndexPtr); 955 956 return success(); 957 } 958 959 InstrProfSymtab &IndexedInstrProfReader::getSymtab() { 960 if (Symtab.get()) 961 return *Symtab.get(); 962 963 std::unique_ptr<InstrProfSymtab> NewSymtab = std::make_unique<InstrProfSymtab>(); 964 if (Error E = Index->populateSymtab(*NewSymtab.get())) { 965 consumeError(error(InstrProfError::take(std::move(E)))); 966 } 967 968 Symtab = std::move(NewSymtab); 969 return *Symtab.get(); 970 } 971 972 Expected<InstrProfRecord> 973 IndexedInstrProfReader::getInstrProfRecord(StringRef FuncName, 974 uint64_t FuncHash) { 975 ArrayRef<NamedInstrProfRecord> Data; 976 Error Err = Remapper->getRecords(FuncName, Data); 977 if (Err) 978 return std::move(Err); 979 // Found it. Look for counters with the right hash. 980 for (unsigned I = 0, E = Data.size(); I < E; ++I) { 981 // Check for a match and fill the vector if there is one. 982 if (Data[I].Hash == FuncHash) { 983 return std::move(Data[I]); 984 } 985 } 986 return error(instrprof_error::hash_mismatch); 987 } 988 989 Error IndexedInstrProfReader::getFunctionCounts(StringRef FuncName, 990 uint64_t FuncHash, 991 std::vector<uint64_t> &Counts) { 992 Expected<InstrProfRecord> Record = getInstrProfRecord(FuncName, FuncHash); 993 if (Error E = Record.takeError()) 994 return error(std::move(E)); 995 996 Counts = Record.get().Counts; 997 return success(); 998 } 999 1000 Error IndexedInstrProfReader::readNextRecord(NamedInstrProfRecord &Record) { 1001 ArrayRef<NamedInstrProfRecord> Data; 1002 1003 Error E = Index->getRecords(Data); 1004 if (E) 1005 return error(std::move(E)); 1006 1007 Record = Data[RecordIndex++]; 1008 if (RecordIndex >= Data.size()) { 1009 Index->advanceToNextKey(); 1010 RecordIndex = 0; 1011 } 1012 return success(); 1013 } 1014 1015 void InstrProfReader::accumulateCounts(CountSumOrPercent &Sum, bool IsCS) { 1016 uint64_t NumFuncs = 0; 1017 for (const auto &Func : *this) { 1018 if (isIRLevelProfile()) { 1019 bool FuncIsCS = NamedInstrProfRecord::hasCSFlagInHash(Func.Hash); 1020 if (FuncIsCS != IsCS) 1021 continue; 1022 } 1023 Func.accumulateCounts(Sum); 1024 ++NumFuncs; 1025 } 1026 Sum.NumEntries = NumFuncs; 1027 } 1028