1 //===- InstrProfReader.cpp - Instrumented profiling reader ----------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains support for reading profiling data for clang's 10 // instrumentation based PGO and coverage. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/ProfileData/InstrProfReader.h" 15 #include "llvm/ADT/ArrayRef.h" 16 #include "llvm/ADT/DenseMap.h" 17 #include "llvm/ADT/STLExtras.h" 18 #include "llvm/ADT/StringExtras.h" 19 #include "llvm/ADT/StringRef.h" 20 #include "llvm/IR/ProfileSummary.h" 21 #include "llvm/ProfileData/InstrProf.h" 22 #include "llvm/ProfileData/ProfileCommon.h" 23 #include "llvm/Support/Endian.h" 24 #include "llvm/Support/Error.h" 25 #include "llvm/Support/ErrorOr.h" 26 #include "llvm/Support/MemoryBuffer.h" 27 #include "llvm/Support/SwapByteOrder.h" 28 #include "llvm/Support/SymbolRemappingReader.h" 29 #include <algorithm> 30 #include <cctype> 31 #include <cstddef> 32 #include <cstdint> 33 #include <limits> 34 #include <memory> 35 #include <system_error> 36 #include <utility> 37 #include <vector> 38 39 using namespace llvm; 40 41 static Expected<std::unique_ptr<MemoryBuffer>> 42 setupMemoryBuffer(const Twine &Path) { 43 ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr = 44 MemoryBuffer::getFileOrSTDIN(Path, /*IsText=*/true); 45 if (std::error_code EC = BufferOrErr.getError()) 46 return errorCodeToError(EC); 47 return std::move(BufferOrErr.get()); 48 } 49 50 static Error initializeReader(InstrProfReader &Reader) { 51 return Reader.readHeader(); 52 } 53 54 Expected<std::unique_ptr<InstrProfReader>> 55 InstrProfReader::create(const Twine &Path, 56 const InstrProfCorrelator *Correlator) { 57 // Set up the buffer to read. 58 auto BufferOrError = setupMemoryBuffer(Path); 59 if (Error E = BufferOrError.takeError()) 60 return std::move(E); 61 return InstrProfReader::create(std::move(BufferOrError.get()), Correlator); 62 } 63 64 Expected<std::unique_ptr<InstrProfReader>> 65 InstrProfReader::create(std::unique_ptr<MemoryBuffer> Buffer, 66 const InstrProfCorrelator *Correlator) { 67 // Sanity check the buffer. 68 if (uint64_t(Buffer->getBufferSize()) > std::numeric_limits<uint64_t>::max()) 69 return make_error<InstrProfError>(instrprof_error::too_large); 70 71 if (Buffer->getBufferSize() == 0) 72 return make_error<InstrProfError>(instrprof_error::empty_raw_profile); 73 74 std::unique_ptr<InstrProfReader> Result; 75 // Create the reader. 76 if (IndexedInstrProfReader::hasFormat(*Buffer)) 77 Result.reset(new IndexedInstrProfReader(std::move(Buffer))); 78 else if (RawInstrProfReader64::hasFormat(*Buffer)) 79 Result.reset(new RawInstrProfReader64(std::move(Buffer), Correlator)); 80 else if (RawInstrProfReader32::hasFormat(*Buffer)) 81 Result.reset(new RawInstrProfReader32(std::move(Buffer), Correlator)); 82 else if (TextInstrProfReader::hasFormat(*Buffer)) 83 Result.reset(new TextInstrProfReader(std::move(Buffer))); 84 else 85 return make_error<InstrProfError>(instrprof_error::unrecognized_format); 86 87 // Initialize the reader and return the result. 88 if (Error E = initializeReader(*Result)) 89 return std::move(E); 90 91 return std::move(Result); 92 } 93 94 Expected<std::unique_ptr<IndexedInstrProfReader>> 95 IndexedInstrProfReader::create(const Twine &Path, const Twine &RemappingPath) { 96 // Set up the buffer to read. 97 auto BufferOrError = setupMemoryBuffer(Path); 98 if (Error E = BufferOrError.takeError()) 99 return std::move(E); 100 101 // Set up the remapping buffer if requested. 102 std::unique_ptr<MemoryBuffer> RemappingBuffer; 103 std::string RemappingPathStr = RemappingPath.str(); 104 if (!RemappingPathStr.empty()) { 105 auto RemappingBufferOrError = setupMemoryBuffer(RemappingPathStr); 106 if (Error E = RemappingBufferOrError.takeError()) 107 return std::move(E); 108 RemappingBuffer = std::move(RemappingBufferOrError.get()); 109 } 110 111 return IndexedInstrProfReader::create(std::move(BufferOrError.get()), 112 std::move(RemappingBuffer)); 113 } 114 115 Expected<std::unique_ptr<IndexedInstrProfReader>> 116 IndexedInstrProfReader::create(std::unique_ptr<MemoryBuffer> Buffer, 117 std::unique_ptr<MemoryBuffer> RemappingBuffer) { 118 if (uint64_t(Buffer->getBufferSize()) > std::numeric_limits<uint64_t>::max()) 119 return make_error<InstrProfError>(instrprof_error::too_large); 120 121 // Create the reader. 122 if (!IndexedInstrProfReader::hasFormat(*Buffer)) 123 return make_error<InstrProfError>(instrprof_error::bad_magic); 124 auto Result = std::make_unique<IndexedInstrProfReader>( 125 std::move(Buffer), std::move(RemappingBuffer)); 126 127 // Initialize the reader and return the result. 128 if (Error E = initializeReader(*Result)) 129 return std::move(E); 130 131 return std::move(Result); 132 } 133 134 void InstrProfIterator::Increment() { 135 if (auto E = Reader->readNextRecord(Record)) { 136 // Handle errors in the reader. 137 InstrProfError::take(std::move(E)); 138 *this = InstrProfIterator(); 139 } 140 } 141 142 bool TextInstrProfReader::hasFormat(const MemoryBuffer &Buffer) { 143 // Verify that this really looks like plain ASCII text by checking a 144 // 'reasonable' number of characters (up to profile magic size). 145 size_t count = std::min(Buffer.getBufferSize(), sizeof(uint64_t)); 146 StringRef buffer = Buffer.getBufferStart(); 147 return count == 0 || 148 std::all_of(buffer.begin(), buffer.begin() + count, 149 [](char c) { return isPrint(c) || isSpace(c); }); 150 } 151 152 // Read the profile variant flag from the header: ":FE" means this is a FE 153 // generated profile. ":IR" means this is an IR level profile. Other strings 154 // with a leading ':' will be reported an error format. 155 Error TextInstrProfReader::readHeader() { 156 Symtab.reset(new InstrProfSymtab()); 157 bool IsIRInstr = false; 158 bool IsEntryFirst = false; 159 bool IsCS = false; 160 161 while (Line->startswith(":")) { 162 StringRef Str = Line->substr(1); 163 if (Str.equals_insensitive("ir")) 164 IsIRInstr = true; 165 else if (Str.equals_insensitive("fe")) 166 IsIRInstr = false; 167 else if (Str.equals_insensitive("csir")) { 168 IsIRInstr = true; 169 IsCS = true; 170 } else if (Str.equals_insensitive("entry_first")) 171 IsEntryFirst = true; 172 else if (Str.equals_insensitive("not_entry_first")) 173 IsEntryFirst = false; 174 else 175 return error(instrprof_error::bad_header); 176 ++Line; 177 } 178 IsIRLevelProfile = IsIRInstr; 179 InstrEntryBBEnabled = IsEntryFirst; 180 HasCSIRLevelProfile = IsCS; 181 return success(); 182 } 183 184 Error 185 TextInstrProfReader::readValueProfileData(InstrProfRecord &Record) { 186 187 #define CHECK_LINE_END(Line) \ 188 if (Line.is_at_end()) \ 189 return error(instrprof_error::truncated); 190 #define READ_NUM(Str, Dst) \ 191 if ((Str).getAsInteger(10, (Dst))) \ 192 return error(instrprof_error::malformed); 193 #define VP_READ_ADVANCE(Val) \ 194 CHECK_LINE_END(Line); \ 195 uint32_t Val; \ 196 READ_NUM((*Line), (Val)); \ 197 Line++; 198 199 if (Line.is_at_end()) 200 return success(); 201 202 uint32_t NumValueKinds; 203 if (Line->getAsInteger(10, NumValueKinds)) { 204 // No value profile data 205 return success(); 206 } 207 if (NumValueKinds == 0 || NumValueKinds > IPVK_Last + 1) 208 return error(instrprof_error::malformed, 209 "number of value kinds is invalid"); 210 Line++; 211 212 for (uint32_t VK = 0; VK < NumValueKinds; VK++) { 213 VP_READ_ADVANCE(ValueKind); 214 if (ValueKind > IPVK_Last) 215 return error(instrprof_error::malformed, "value kind is invalid"); 216 ; 217 VP_READ_ADVANCE(NumValueSites); 218 if (!NumValueSites) 219 continue; 220 221 Record.reserveSites(VK, NumValueSites); 222 for (uint32_t S = 0; S < NumValueSites; S++) { 223 VP_READ_ADVANCE(NumValueData); 224 225 std::vector<InstrProfValueData> CurrentValues; 226 for (uint32_t V = 0; V < NumValueData; V++) { 227 CHECK_LINE_END(Line); 228 std::pair<StringRef, StringRef> VD = Line->rsplit(':'); 229 uint64_t TakenCount, Value; 230 if (ValueKind == IPVK_IndirectCallTarget) { 231 if (InstrProfSymtab::isExternalSymbol(VD.first)) { 232 Value = 0; 233 } else { 234 if (Error E = Symtab->addFuncName(VD.first)) 235 return E; 236 Value = IndexedInstrProf::ComputeHash(VD.first); 237 } 238 } else { 239 READ_NUM(VD.first, Value); 240 } 241 READ_NUM(VD.second, TakenCount); 242 CurrentValues.push_back({Value, TakenCount}); 243 Line++; 244 } 245 Record.addValueData(ValueKind, S, CurrentValues.data(), NumValueData, 246 nullptr); 247 } 248 } 249 return success(); 250 251 #undef CHECK_LINE_END 252 #undef READ_NUM 253 #undef VP_READ_ADVANCE 254 } 255 256 Error TextInstrProfReader::readNextRecord(NamedInstrProfRecord &Record) { 257 // Skip empty lines and comments. 258 while (!Line.is_at_end() && (Line->empty() || Line->startswith("#"))) 259 ++Line; 260 // If we hit EOF while looking for a name, we're done. 261 if (Line.is_at_end()) { 262 return error(instrprof_error::eof); 263 } 264 265 // Read the function name. 266 Record.Name = *Line++; 267 if (Error E = Symtab->addFuncName(Record.Name)) 268 return error(std::move(E)); 269 270 // Read the function hash. 271 if (Line.is_at_end()) 272 return error(instrprof_error::truncated); 273 if ((Line++)->getAsInteger(0, Record.Hash)) 274 return error(instrprof_error::malformed, 275 "function hash is not a valid integer"); 276 277 // Read the number of counters. 278 uint64_t NumCounters; 279 if (Line.is_at_end()) 280 return error(instrprof_error::truncated); 281 if ((Line++)->getAsInteger(10, NumCounters)) 282 return error(instrprof_error::malformed, 283 "number of counters is not a valid integer"); 284 if (NumCounters == 0) 285 return error(instrprof_error::malformed, "number of counters is zero"); 286 287 // Read each counter and fill our internal storage with the values. 288 Record.Clear(); 289 Record.Counts.reserve(NumCounters); 290 for (uint64_t I = 0; I < NumCounters; ++I) { 291 if (Line.is_at_end()) 292 return error(instrprof_error::truncated); 293 uint64_t Count; 294 if ((Line++)->getAsInteger(10, Count)) 295 return error(instrprof_error::malformed, "count is invalid"); 296 Record.Counts.push_back(Count); 297 } 298 299 // Check if value profile data exists and read it if so. 300 if (Error E = readValueProfileData(Record)) 301 return error(std::move(E)); 302 303 return success(); 304 } 305 306 template <class IntPtrT> 307 bool RawInstrProfReader<IntPtrT>::hasFormat(const MemoryBuffer &DataBuffer) { 308 if (DataBuffer.getBufferSize() < sizeof(uint64_t)) 309 return false; 310 uint64_t Magic = 311 *reinterpret_cast<const uint64_t *>(DataBuffer.getBufferStart()); 312 return RawInstrProf::getMagic<IntPtrT>() == Magic || 313 sys::getSwappedBytes(RawInstrProf::getMagic<IntPtrT>()) == Magic; 314 } 315 316 template <class IntPtrT> 317 Error RawInstrProfReader<IntPtrT>::readHeader() { 318 if (!hasFormat(*DataBuffer)) 319 return error(instrprof_error::bad_magic); 320 if (DataBuffer->getBufferSize() < sizeof(RawInstrProf::Header)) 321 return error(instrprof_error::bad_header); 322 auto *Header = reinterpret_cast<const RawInstrProf::Header *>( 323 DataBuffer->getBufferStart()); 324 ShouldSwapBytes = Header->Magic != RawInstrProf::getMagic<IntPtrT>(); 325 return readHeader(*Header); 326 } 327 328 template <class IntPtrT> 329 Error RawInstrProfReader<IntPtrT>::readNextHeader(const char *CurrentPos) { 330 const char *End = DataBuffer->getBufferEnd(); 331 // Skip zero padding between profiles. 332 while (CurrentPos != End && *CurrentPos == 0) 333 ++CurrentPos; 334 // If there's nothing left, we're done. 335 if (CurrentPos == End) 336 return make_error<InstrProfError>(instrprof_error::eof); 337 // If there isn't enough space for another header, this is probably just 338 // garbage at the end of the file. 339 if (CurrentPos + sizeof(RawInstrProf::Header) > End) 340 return make_error<InstrProfError>(instrprof_error::malformed, 341 "not enough space for another header"); 342 // The writer ensures each profile is padded to start at an aligned address. 343 if (reinterpret_cast<size_t>(CurrentPos) % alignof(uint64_t)) 344 return make_error<InstrProfError>(instrprof_error::malformed, 345 "insufficient padding"); 346 // The magic should have the same byte order as in the previous header. 347 uint64_t Magic = *reinterpret_cast<const uint64_t *>(CurrentPos); 348 if (Magic != swap(RawInstrProf::getMagic<IntPtrT>())) 349 return make_error<InstrProfError>(instrprof_error::bad_magic); 350 351 // There's another profile to read, so we need to process the header. 352 auto *Header = reinterpret_cast<const RawInstrProf::Header *>(CurrentPos); 353 return readHeader(*Header); 354 } 355 356 template <class IntPtrT> 357 Error RawInstrProfReader<IntPtrT>::createSymtab(InstrProfSymtab &Symtab) { 358 if (Error E = Symtab.create(StringRef(NamesStart, NamesEnd - NamesStart))) 359 return error(std::move(E)); 360 for (const RawInstrProf::ProfileData<IntPtrT> *I = Data; I != DataEnd; ++I) { 361 const IntPtrT FPtr = swap(I->FunctionPointer); 362 if (!FPtr) 363 continue; 364 Symtab.mapAddress(FPtr, I->NameRef); 365 } 366 return success(); 367 } 368 369 template <class IntPtrT> 370 Error RawInstrProfReader<IntPtrT>::readHeader( 371 const RawInstrProf::Header &Header) { 372 Version = swap(Header.Version); 373 if (GET_VERSION(Version) != RawInstrProf::Version) 374 return error(instrprof_error::unsupported_version); 375 if (useDebugInfoCorrelate() && !Correlator) 376 return error(instrprof_error::missing_debug_info_for_correlation); 377 if (!useDebugInfoCorrelate() && Correlator) 378 return error(instrprof_error::unexpected_debug_info_for_correlation); 379 380 BinaryIdsSize = swap(Header.BinaryIdsSize); 381 if (BinaryIdsSize % sizeof(uint64_t)) 382 return error(instrprof_error::bad_header); 383 384 CountersDelta = swap(Header.CountersDelta); 385 NamesDelta = swap(Header.NamesDelta); 386 auto DataSize = swap(Header.DataSize); 387 auto PaddingBytesBeforeCounters = swap(Header.PaddingBytesBeforeCounters); 388 auto CountersSize = swap(Header.CountersSize); 389 auto PaddingBytesAfterCounters = swap(Header.PaddingBytesAfterCounters); 390 auto NamesSize = swap(Header.NamesSize); 391 ValueKindLast = swap(Header.ValueKindLast); 392 393 auto DataSizeInBytes = DataSize * sizeof(RawInstrProf::ProfileData<IntPtrT>); 394 auto PaddingSize = getNumPaddingBytes(NamesSize); 395 396 // Profile data starts after profile header and binary ids if exist. 397 ptrdiff_t DataOffset = sizeof(RawInstrProf::Header) + BinaryIdsSize; 398 ptrdiff_t CountersOffset = 399 DataOffset + DataSizeInBytes + PaddingBytesBeforeCounters; 400 ptrdiff_t NamesOffset = CountersOffset + (sizeof(uint64_t) * CountersSize) + 401 PaddingBytesAfterCounters; 402 ptrdiff_t ValueDataOffset = NamesOffset + NamesSize + PaddingSize; 403 404 auto *Start = reinterpret_cast<const char *>(&Header); 405 if (Start + ValueDataOffset > DataBuffer->getBufferEnd()) 406 return error(instrprof_error::bad_header); 407 408 if (Correlator) { 409 // These sizes in the raw file are zero because we constructed them in the 410 // Correlator. 411 assert(DataSize == 0 && NamesSize == 0); 412 assert(CountersDelta == 0 && NamesDelta == 0); 413 Data = Correlator->getDataPointer(); 414 DataEnd = Data + Correlator->getDataSize(); 415 NamesStart = Correlator->getCompressedNamesPointer(); 416 NamesEnd = NamesStart + Correlator->getCompressedNamesSize(); 417 } else { 418 Data = reinterpret_cast<const RawInstrProf::ProfileData<IntPtrT> *>( 419 Start + DataOffset); 420 DataEnd = Data + DataSize; 421 NamesStart = Start + NamesOffset; 422 NamesEnd = NamesStart + NamesSize; 423 } 424 425 // Binary ids start just after the header. 426 BinaryIdsStart = 427 reinterpret_cast<const uint8_t *>(&Header) + sizeof(RawInstrProf::Header); 428 CountersStart = reinterpret_cast<const uint64_t *>(Start + CountersOffset); 429 ValueDataStart = reinterpret_cast<const uint8_t *>(Start + ValueDataOffset); 430 431 const uint8_t *BufferEnd = (const uint8_t *)DataBuffer->getBufferEnd(); 432 if (BinaryIdsStart + BinaryIdsSize > BufferEnd) 433 return error(instrprof_error::bad_header); 434 435 std::unique_ptr<InstrProfSymtab> NewSymtab = std::make_unique<InstrProfSymtab>(); 436 if (Error E = createSymtab(*NewSymtab.get())) 437 return E; 438 439 Symtab = std::move(NewSymtab); 440 return success(); 441 } 442 443 template <class IntPtrT> 444 Error RawInstrProfReader<IntPtrT>::readName(NamedInstrProfRecord &Record) { 445 Record.Name = getName(Data->NameRef); 446 return success(); 447 } 448 449 template <class IntPtrT> 450 Error RawInstrProfReader<IntPtrT>::readFuncHash(NamedInstrProfRecord &Record) { 451 Record.Hash = swap(Data->FuncHash); 452 return success(); 453 } 454 455 template <class IntPtrT> 456 Error RawInstrProfReader<IntPtrT>::readRawCounts( 457 InstrProfRecord &Record) { 458 uint32_t NumCounters = swap(Data->NumCounters); 459 if (NumCounters == 0) 460 return error(instrprof_error::malformed, "number of counters is zero"); 461 462 ArrayRef<uint64_t> RawCounts; 463 if (Correlator) { 464 uint64_t CounterOffset = swap<IntPtrT>(Data->CounterPtr) / sizeof(uint64_t); 465 RawCounts = 466 makeArrayRef<uint64_t>(CountersStart + CounterOffset, NumCounters); 467 } else { 468 IntPtrT CounterPtr = Data->CounterPtr; 469 ptrdiff_t CounterOffset = getCounterOffset(CounterPtr); 470 if (CounterOffset < 0) 471 return error( 472 instrprof_error::malformed, 473 ("counter offset " + Twine(CounterOffset) + " is negative").str()); 474 475 // Check bounds. Note that the counter pointer embedded in the data record 476 // may itself be corrupt. 477 auto *NamesStartAsCounter = reinterpret_cast<const uint64_t *>(NamesStart); 478 ptrdiff_t MaxNumCounters = NamesStartAsCounter - CountersStart; 479 if (MaxNumCounters < 0 || NumCounters > (uint32_t)MaxNumCounters) 480 return error(instrprof_error::malformed, 481 "counter pointer is out of bounds"); 482 // We need to compute the in-buffer counter offset from the in-memory 483 // address distance. The initial CountersDelta is the in-memory address 484 // difference start(__llvm_prf_cnts)-start(__llvm_prf_data), so 485 // SrcData->CounterPtr - CountersDelta computes the offset into the 486 // in-buffer counter section. 487 if (CounterOffset > MaxNumCounters) 488 return error(instrprof_error::malformed, 489 ("counter offset " + Twine(CounterOffset) + 490 " is greater than the maximum number of counters " + 491 Twine((uint32_t)MaxNumCounters)) 492 .str()); 493 494 if (((uint32_t)CounterOffset + NumCounters) > (uint32_t)MaxNumCounters) 495 return error(instrprof_error::malformed, 496 ("number of counters " + 497 Twine(((uint32_t)CounterOffset + NumCounters)) + 498 " is greater than the maximum number of counters " + 499 Twine((uint32_t)MaxNumCounters)) 500 .str()); 501 // CountersDelta decreases as we advance to the next data record. 502 CountersDelta -= sizeof(*Data); 503 504 RawCounts = makeArrayRef(getCounter(CounterOffset), NumCounters); 505 } 506 507 if (ShouldSwapBytes) { 508 Record.Counts.clear(); 509 Record.Counts.reserve(RawCounts.size()); 510 for (uint64_t Count : RawCounts) 511 Record.Counts.push_back(swap(Count)); 512 } else 513 Record.Counts = RawCounts; 514 515 return success(); 516 } 517 518 template <class IntPtrT> 519 Error RawInstrProfReader<IntPtrT>::readValueProfilingData( 520 InstrProfRecord &Record) { 521 Record.clearValueData(); 522 CurValueDataSize = 0; 523 // Need to match the logic in value profile dumper code in compiler-rt: 524 uint32_t NumValueKinds = 0; 525 for (uint32_t I = 0; I < IPVK_Last + 1; I++) 526 NumValueKinds += (Data->NumValueSites[I] != 0); 527 528 if (!NumValueKinds) 529 return success(); 530 531 Expected<std::unique_ptr<ValueProfData>> VDataPtrOrErr = 532 ValueProfData::getValueProfData( 533 ValueDataStart, (const unsigned char *)DataBuffer->getBufferEnd(), 534 getDataEndianness()); 535 536 if (Error E = VDataPtrOrErr.takeError()) 537 return E; 538 539 // Note that besides deserialization, this also performs the conversion for 540 // indirect call targets. The function pointers from the raw profile are 541 // remapped into function name hashes. 542 VDataPtrOrErr.get()->deserializeTo(Record, Symtab.get()); 543 CurValueDataSize = VDataPtrOrErr.get()->getSize(); 544 return success(); 545 } 546 547 template <class IntPtrT> 548 Error RawInstrProfReader<IntPtrT>::readNextRecord(NamedInstrProfRecord &Record) { 549 if (atEnd()) 550 // At this point, ValueDataStart field points to the next header. 551 if (Error E = readNextHeader(getNextHeaderPos())) 552 return error(std::move(E)); 553 554 // Read name ad set it in Record. 555 if (Error E = readName(Record)) 556 return error(std::move(E)); 557 558 // Read FuncHash and set it in Record. 559 if (Error E = readFuncHash(Record)) 560 return error(std::move(E)); 561 562 // Read raw counts and set Record. 563 if (Error E = readRawCounts(Record)) 564 return error(std::move(E)); 565 566 // Read value data and set Record. 567 if (Error E = readValueProfilingData(Record)) 568 return error(std::move(E)); 569 570 // Iterate. 571 advanceData(); 572 return success(); 573 } 574 575 static size_t RoundUp(size_t size, size_t align) { 576 return (size + align - 1) & ~(align - 1); 577 } 578 579 template <class IntPtrT> 580 Error RawInstrProfReader<IntPtrT>::printBinaryIds(raw_ostream &OS) { 581 if (BinaryIdsSize == 0) 582 return success(); 583 584 OS << "Binary IDs: \n"; 585 const uint8_t *BI = BinaryIdsStart; 586 const uint8_t *BIEnd = BinaryIdsStart + BinaryIdsSize; 587 while (BI < BIEnd) { 588 size_t Remaining = BIEnd - BI; 589 590 // There should be enough left to read the binary ID size field. 591 if (Remaining < sizeof(uint64_t)) 592 return make_error<InstrProfError>( 593 instrprof_error::malformed, 594 "not enough data to read binary id length"); 595 596 uint64_t BinaryIdLen = swap(*reinterpret_cast<const uint64_t *>(BI)); 597 598 // There should be enough left to read the binary ID size field, and the 599 // binary ID. 600 if (Remaining < sizeof(BinaryIdLen) + BinaryIdLen) 601 return make_error<InstrProfError>( 602 instrprof_error::malformed, "not enough data to read binary id data"); 603 604 // Increment by binary id length data type size. 605 BI += sizeof(BinaryIdLen); 606 if (BI > (const uint8_t *)DataBuffer->getBufferEnd()) 607 return make_error<InstrProfError>( 608 instrprof_error::malformed, 609 "binary id that is read is bigger than buffer size"); 610 611 for (uint64_t I = 0; I < BinaryIdLen; I++) 612 OS << format("%02x", BI[I]); 613 OS << "\n"; 614 615 // Increment by binary id data length, rounded to the next 8 bytes. This 616 // accounts for the zero-padding after each build ID. 617 BI += RoundUp(BinaryIdLen, sizeof(uint64_t)); 618 if (BI > (const uint8_t *)DataBuffer->getBufferEnd()) 619 return make_error<InstrProfError>(instrprof_error::malformed); 620 } 621 622 return success(); 623 } 624 625 namespace llvm { 626 627 template class RawInstrProfReader<uint32_t>; 628 template class RawInstrProfReader<uint64_t>; 629 630 } // end namespace llvm 631 632 InstrProfLookupTrait::hash_value_type 633 InstrProfLookupTrait::ComputeHash(StringRef K) { 634 return IndexedInstrProf::ComputeHash(HashType, K); 635 } 636 637 using data_type = InstrProfLookupTrait::data_type; 638 using offset_type = InstrProfLookupTrait::offset_type; 639 640 bool InstrProfLookupTrait::readValueProfilingData( 641 const unsigned char *&D, const unsigned char *const End) { 642 Expected<std::unique_ptr<ValueProfData>> VDataPtrOrErr = 643 ValueProfData::getValueProfData(D, End, ValueProfDataEndianness); 644 645 if (VDataPtrOrErr.takeError()) 646 return false; 647 648 VDataPtrOrErr.get()->deserializeTo(DataBuffer.back(), nullptr); 649 D += VDataPtrOrErr.get()->TotalSize; 650 651 return true; 652 } 653 654 data_type InstrProfLookupTrait::ReadData(StringRef K, const unsigned char *D, 655 offset_type N) { 656 using namespace support; 657 658 // Check if the data is corrupt. If so, don't try to read it. 659 if (N % sizeof(uint64_t)) 660 return data_type(); 661 662 DataBuffer.clear(); 663 std::vector<uint64_t> CounterBuffer; 664 665 const unsigned char *End = D + N; 666 while (D < End) { 667 // Read hash. 668 if (D + sizeof(uint64_t) >= End) 669 return data_type(); 670 uint64_t Hash = endian::readNext<uint64_t, little, unaligned>(D); 671 672 // Initialize number of counters for GET_VERSION(FormatVersion) == 1. 673 uint64_t CountsSize = N / sizeof(uint64_t) - 1; 674 // If format version is different then read the number of counters. 675 if (GET_VERSION(FormatVersion) != IndexedInstrProf::ProfVersion::Version1) { 676 if (D + sizeof(uint64_t) > End) 677 return data_type(); 678 CountsSize = endian::readNext<uint64_t, little, unaligned>(D); 679 } 680 // Read counter values. 681 if (D + CountsSize * sizeof(uint64_t) > End) 682 return data_type(); 683 684 CounterBuffer.clear(); 685 CounterBuffer.reserve(CountsSize); 686 for (uint64_t J = 0; J < CountsSize; ++J) 687 CounterBuffer.push_back(endian::readNext<uint64_t, little, unaligned>(D)); 688 689 DataBuffer.emplace_back(K, Hash, std::move(CounterBuffer)); 690 691 // Read value profiling data. 692 if (GET_VERSION(FormatVersion) > IndexedInstrProf::ProfVersion::Version2 && 693 !readValueProfilingData(D, End)) { 694 DataBuffer.clear(); 695 return data_type(); 696 } 697 } 698 return DataBuffer; 699 } 700 701 template <typename HashTableImpl> 702 Error InstrProfReaderIndex<HashTableImpl>::getRecords( 703 StringRef FuncName, ArrayRef<NamedInstrProfRecord> &Data) { 704 auto Iter = HashTable->find(FuncName); 705 if (Iter == HashTable->end()) 706 return make_error<InstrProfError>(instrprof_error::unknown_function); 707 708 Data = (*Iter); 709 if (Data.empty()) 710 return make_error<InstrProfError>(instrprof_error::malformed, 711 "profile data is empty"); 712 713 return Error::success(); 714 } 715 716 template <typename HashTableImpl> 717 Error InstrProfReaderIndex<HashTableImpl>::getRecords( 718 ArrayRef<NamedInstrProfRecord> &Data) { 719 if (atEnd()) 720 return make_error<InstrProfError>(instrprof_error::eof); 721 722 Data = *RecordIterator; 723 724 if (Data.empty()) 725 return make_error<InstrProfError>(instrprof_error::malformed, 726 "profile data is empty"); 727 728 return Error::success(); 729 } 730 731 template <typename HashTableImpl> 732 InstrProfReaderIndex<HashTableImpl>::InstrProfReaderIndex( 733 const unsigned char *Buckets, const unsigned char *const Payload, 734 const unsigned char *const Base, IndexedInstrProf::HashT HashType, 735 uint64_t Version) { 736 FormatVersion = Version; 737 HashTable.reset(HashTableImpl::Create( 738 Buckets, Payload, Base, 739 typename HashTableImpl::InfoType(HashType, Version))); 740 RecordIterator = HashTable->data_begin(); 741 } 742 743 namespace { 744 /// A remapper that does not apply any remappings. 745 class InstrProfReaderNullRemapper : public InstrProfReaderRemapper { 746 InstrProfReaderIndexBase &Underlying; 747 748 public: 749 InstrProfReaderNullRemapper(InstrProfReaderIndexBase &Underlying) 750 : Underlying(Underlying) {} 751 752 Error getRecords(StringRef FuncName, 753 ArrayRef<NamedInstrProfRecord> &Data) override { 754 return Underlying.getRecords(FuncName, Data); 755 } 756 }; 757 } // namespace 758 759 /// A remapper that applies remappings based on a symbol remapping file. 760 template <typename HashTableImpl> 761 class llvm::InstrProfReaderItaniumRemapper 762 : public InstrProfReaderRemapper { 763 public: 764 InstrProfReaderItaniumRemapper( 765 std::unique_ptr<MemoryBuffer> RemapBuffer, 766 InstrProfReaderIndex<HashTableImpl> &Underlying) 767 : RemapBuffer(std::move(RemapBuffer)), Underlying(Underlying) { 768 } 769 770 /// Extract the original function name from a PGO function name. 771 static StringRef extractName(StringRef Name) { 772 // We can have multiple :-separated pieces; there can be pieces both 773 // before and after the mangled name. Find the first part that starts 774 // with '_Z'; we'll assume that's the mangled name we want. 775 std::pair<StringRef, StringRef> Parts = {StringRef(), Name}; 776 while (true) { 777 Parts = Parts.second.split(':'); 778 if (Parts.first.startswith("_Z")) 779 return Parts.first; 780 if (Parts.second.empty()) 781 return Name; 782 } 783 } 784 785 /// Given a mangled name extracted from a PGO function name, and a new 786 /// form for that mangled name, reconstitute the name. 787 static void reconstituteName(StringRef OrigName, StringRef ExtractedName, 788 StringRef Replacement, 789 SmallVectorImpl<char> &Out) { 790 Out.reserve(OrigName.size() + Replacement.size() - ExtractedName.size()); 791 Out.insert(Out.end(), OrigName.begin(), ExtractedName.begin()); 792 Out.insert(Out.end(), Replacement.begin(), Replacement.end()); 793 Out.insert(Out.end(), ExtractedName.end(), OrigName.end()); 794 } 795 796 Error populateRemappings() override { 797 if (Error E = Remappings.read(*RemapBuffer)) 798 return E; 799 for (StringRef Name : Underlying.HashTable->keys()) { 800 StringRef RealName = extractName(Name); 801 if (auto Key = Remappings.insert(RealName)) { 802 // FIXME: We could theoretically map the same equivalence class to 803 // multiple names in the profile data. If that happens, we should 804 // return NamedInstrProfRecords from all of them. 805 MappedNames.insert({Key, RealName}); 806 } 807 } 808 return Error::success(); 809 } 810 811 Error getRecords(StringRef FuncName, 812 ArrayRef<NamedInstrProfRecord> &Data) override { 813 StringRef RealName = extractName(FuncName); 814 if (auto Key = Remappings.lookup(RealName)) { 815 StringRef Remapped = MappedNames.lookup(Key); 816 if (!Remapped.empty()) { 817 if (RealName.begin() == FuncName.begin() && 818 RealName.end() == FuncName.end()) 819 FuncName = Remapped; 820 else { 821 // Try rebuilding the name from the given remapping. 822 SmallString<256> Reconstituted; 823 reconstituteName(FuncName, RealName, Remapped, Reconstituted); 824 Error E = Underlying.getRecords(Reconstituted, Data); 825 if (!E) 826 return E; 827 828 // If we failed because the name doesn't exist, fall back to asking 829 // about the original name. 830 if (Error Unhandled = handleErrors( 831 std::move(E), [](std::unique_ptr<InstrProfError> Err) { 832 return Err->get() == instrprof_error::unknown_function 833 ? Error::success() 834 : Error(std::move(Err)); 835 })) 836 return Unhandled; 837 } 838 } 839 } 840 return Underlying.getRecords(FuncName, Data); 841 } 842 843 private: 844 /// The memory buffer containing the remapping configuration. Remappings 845 /// holds pointers into this buffer. 846 std::unique_ptr<MemoryBuffer> RemapBuffer; 847 848 /// The mangling remapper. 849 SymbolRemappingReader Remappings; 850 851 /// Mapping from mangled name keys to the name used for the key in the 852 /// profile data. 853 /// FIXME: Can we store a location within the on-disk hash table instead of 854 /// redoing lookup? 855 DenseMap<SymbolRemappingReader::Key, StringRef> MappedNames; 856 857 /// The real profile data reader. 858 InstrProfReaderIndex<HashTableImpl> &Underlying; 859 }; 860 861 bool IndexedInstrProfReader::hasFormat(const MemoryBuffer &DataBuffer) { 862 using namespace support; 863 864 if (DataBuffer.getBufferSize() < 8) 865 return false; 866 uint64_t Magic = 867 endian::read<uint64_t, little, aligned>(DataBuffer.getBufferStart()); 868 // Verify that it's magical. 869 return Magic == IndexedInstrProf::Magic; 870 } 871 872 const unsigned char * 873 IndexedInstrProfReader::readSummary(IndexedInstrProf::ProfVersion Version, 874 const unsigned char *Cur, bool UseCS) { 875 using namespace IndexedInstrProf; 876 using namespace support; 877 878 if (Version >= IndexedInstrProf::Version4) { 879 const IndexedInstrProf::Summary *SummaryInLE = 880 reinterpret_cast<const IndexedInstrProf::Summary *>(Cur); 881 uint64_t NFields = 882 endian::byte_swap<uint64_t, little>(SummaryInLE->NumSummaryFields); 883 uint64_t NEntries = 884 endian::byte_swap<uint64_t, little>(SummaryInLE->NumCutoffEntries); 885 uint32_t SummarySize = 886 IndexedInstrProf::Summary::getSize(NFields, NEntries); 887 std::unique_ptr<IndexedInstrProf::Summary> SummaryData = 888 IndexedInstrProf::allocSummary(SummarySize); 889 890 const uint64_t *Src = reinterpret_cast<const uint64_t *>(SummaryInLE); 891 uint64_t *Dst = reinterpret_cast<uint64_t *>(SummaryData.get()); 892 for (unsigned I = 0; I < SummarySize / sizeof(uint64_t); I++) 893 Dst[I] = endian::byte_swap<uint64_t, little>(Src[I]); 894 895 SummaryEntryVector DetailedSummary; 896 for (unsigned I = 0; I < SummaryData->NumCutoffEntries; I++) { 897 const IndexedInstrProf::Summary::Entry &Ent = SummaryData->getEntry(I); 898 DetailedSummary.emplace_back((uint32_t)Ent.Cutoff, Ent.MinBlockCount, 899 Ent.NumBlocks); 900 } 901 std::unique_ptr<llvm::ProfileSummary> &Summary = 902 UseCS ? this->CS_Summary : this->Summary; 903 904 // initialize InstrProfSummary using the SummaryData from disk. 905 Summary = std::make_unique<ProfileSummary>( 906 UseCS ? ProfileSummary::PSK_CSInstr : ProfileSummary::PSK_Instr, 907 DetailedSummary, SummaryData->get(Summary::TotalBlockCount), 908 SummaryData->get(Summary::MaxBlockCount), 909 SummaryData->get(Summary::MaxInternalBlockCount), 910 SummaryData->get(Summary::MaxFunctionCount), 911 SummaryData->get(Summary::TotalNumBlocks), 912 SummaryData->get(Summary::TotalNumFunctions)); 913 return Cur + SummarySize; 914 } else { 915 // The older versions do not support a profile summary. This just computes 916 // an empty summary, which will not result in accurate hot/cold detection. 917 // We would need to call addRecord for all NamedInstrProfRecords to get the 918 // correct summary. However, this version is old (prior to early 2016) and 919 // has not been supporting an accurate summary for several years. 920 InstrProfSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs); 921 Summary = Builder.getSummary(); 922 return Cur; 923 } 924 } 925 926 Error IndexedInstrProfReader::readHeader() { 927 using namespace support; 928 929 const unsigned char *Start = 930 (const unsigned char *)DataBuffer->getBufferStart(); 931 const unsigned char *Cur = Start; 932 if ((const unsigned char *)DataBuffer->getBufferEnd() - Cur < 24) 933 return error(instrprof_error::truncated); 934 935 auto *Header = reinterpret_cast<const IndexedInstrProf::Header *>(Cur); 936 Cur += sizeof(IndexedInstrProf::Header); 937 938 // Check the magic number. 939 uint64_t Magic = endian::byte_swap<uint64_t, little>(Header->Magic); 940 if (Magic != IndexedInstrProf::Magic) 941 return error(instrprof_error::bad_magic); 942 943 // Read the version. 944 uint64_t FormatVersion = endian::byte_swap<uint64_t, little>(Header->Version); 945 if (GET_VERSION(FormatVersion) > 946 IndexedInstrProf::ProfVersion::CurrentVersion) 947 return error(instrprof_error::unsupported_version); 948 949 Cur = readSummary((IndexedInstrProf::ProfVersion)FormatVersion, Cur, 950 /* UseCS */ false); 951 if (FormatVersion & VARIANT_MASK_CSIR_PROF) 952 Cur = readSummary((IndexedInstrProf::ProfVersion)FormatVersion, Cur, 953 /* UseCS */ true); 954 955 // Read the hash type and start offset. 956 IndexedInstrProf::HashT HashType = static_cast<IndexedInstrProf::HashT>( 957 endian::byte_swap<uint64_t, little>(Header->HashType)); 958 if (HashType > IndexedInstrProf::HashT::Last) 959 return error(instrprof_error::unsupported_hash_type); 960 961 uint64_t HashOffset = endian::byte_swap<uint64_t, little>(Header->HashOffset); 962 963 // The rest of the file is an on disk hash table. 964 auto IndexPtr = 965 std::make_unique<InstrProfReaderIndex<OnDiskHashTableImplV3>>( 966 Start + HashOffset, Cur, Start, HashType, FormatVersion); 967 968 // Load the remapping table now if requested. 969 if (RemappingBuffer) { 970 Remapper = std::make_unique< 971 InstrProfReaderItaniumRemapper<OnDiskHashTableImplV3>>( 972 std::move(RemappingBuffer), *IndexPtr); 973 if (Error E = Remapper->populateRemappings()) 974 return E; 975 } else { 976 Remapper = std::make_unique<InstrProfReaderNullRemapper>(*IndexPtr); 977 } 978 Index = std::move(IndexPtr); 979 980 return success(); 981 } 982 983 InstrProfSymtab &IndexedInstrProfReader::getSymtab() { 984 if (Symtab.get()) 985 return *Symtab.get(); 986 987 std::unique_ptr<InstrProfSymtab> NewSymtab = std::make_unique<InstrProfSymtab>(); 988 if (Error E = Index->populateSymtab(*NewSymtab.get())) { 989 consumeError(error(InstrProfError::take(std::move(E)))); 990 } 991 992 Symtab = std::move(NewSymtab); 993 return *Symtab.get(); 994 } 995 996 Expected<InstrProfRecord> 997 IndexedInstrProfReader::getInstrProfRecord(StringRef FuncName, 998 uint64_t FuncHash) { 999 ArrayRef<NamedInstrProfRecord> Data; 1000 Error Err = Remapper->getRecords(FuncName, Data); 1001 if (Err) 1002 return std::move(Err); 1003 // Found it. Look for counters with the right hash. 1004 for (const NamedInstrProfRecord &I : Data) { 1005 // Check for a match and fill the vector if there is one. 1006 if (I.Hash == FuncHash) 1007 return std::move(I); 1008 } 1009 return error(instrprof_error::hash_mismatch); 1010 } 1011 1012 Error IndexedInstrProfReader::getFunctionCounts(StringRef FuncName, 1013 uint64_t FuncHash, 1014 std::vector<uint64_t> &Counts) { 1015 Expected<InstrProfRecord> Record = getInstrProfRecord(FuncName, FuncHash); 1016 if (Error E = Record.takeError()) 1017 return error(std::move(E)); 1018 1019 Counts = Record.get().Counts; 1020 return success(); 1021 } 1022 1023 Error IndexedInstrProfReader::readNextRecord(NamedInstrProfRecord &Record) { 1024 ArrayRef<NamedInstrProfRecord> Data; 1025 1026 Error E = Index->getRecords(Data); 1027 if (E) 1028 return error(std::move(E)); 1029 1030 Record = Data[RecordIndex++]; 1031 if (RecordIndex >= Data.size()) { 1032 Index->advanceToNextKey(); 1033 RecordIndex = 0; 1034 } 1035 return success(); 1036 } 1037 1038 void InstrProfReader::accumulateCounts(CountSumOrPercent &Sum, bool IsCS) { 1039 uint64_t NumFuncs = 0; 1040 for (const auto &Func : *this) { 1041 if (isIRLevelProfile()) { 1042 bool FuncIsCS = NamedInstrProfRecord::hasCSFlagInHash(Func.Hash); 1043 if (FuncIsCS != IsCS) 1044 continue; 1045 } 1046 Func.accumulateCounts(Sum); 1047 ++NumFuncs; 1048 } 1049 Sum.NumEntries = NumFuncs; 1050 } 1051