1 //===- InstrProfReader.cpp - Instrumented profiling reader ----------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains support for reading profiling data for clang's 10 // instrumentation based PGO and coverage. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/ProfileData/InstrProfReader.h" 15 #include "llvm/ADT/ArrayRef.h" 16 #include "llvm/ADT/DenseMap.h" 17 #include "llvm/ADT/STLExtras.h" 18 #include "llvm/ADT/StringExtras.h" 19 #include "llvm/ADT/StringRef.h" 20 #include "llvm/IR/ProfileSummary.h" 21 #include "llvm/ProfileData/InstrProf.h" 22 #include "llvm/ProfileData/ProfileCommon.h" 23 #include "llvm/Support/Endian.h" 24 #include "llvm/Support/Error.h" 25 #include "llvm/Support/ErrorOr.h" 26 #include "llvm/Support/MemoryBuffer.h" 27 #include "llvm/Support/SwapByteOrder.h" 28 #include "llvm/Support/SymbolRemappingReader.h" 29 #include <algorithm> 30 #include <cctype> 31 #include <cstddef> 32 #include <cstdint> 33 #include <limits> 34 #include <memory> 35 #include <system_error> 36 #include <utility> 37 #include <vector> 38 39 using namespace llvm; 40 41 static Expected<std::unique_ptr<MemoryBuffer>> 42 setupMemoryBuffer(const Twine &Path) { 43 ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr = 44 MemoryBuffer::getFileOrSTDIN(Path, /*IsText=*/true); 45 if (std::error_code EC = BufferOrErr.getError()) 46 return errorCodeToError(EC); 47 return std::move(BufferOrErr.get()); 48 } 49 50 static Error initializeReader(InstrProfReader &Reader) { 51 return Reader.readHeader(); 52 } 53 54 Expected<std::unique_ptr<InstrProfReader>> 55 InstrProfReader::create(const Twine &Path) { 56 // Set up the buffer to read. 57 auto BufferOrError = setupMemoryBuffer(Path); 58 if (Error E = BufferOrError.takeError()) 59 return std::move(E); 60 return InstrProfReader::create(std::move(BufferOrError.get())); 61 } 62 63 Expected<std::unique_ptr<InstrProfReader>> 64 InstrProfReader::create(std::unique_ptr<MemoryBuffer> Buffer) { 65 // Sanity check the buffer. 66 if (uint64_t(Buffer->getBufferSize()) > std::numeric_limits<uint64_t>::max()) 67 return make_error<InstrProfError>(instrprof_error::too_large); 68 69 if (Buffer->getBufferSize() == 0) 70 return make_error<InstrProfError>(instrprof_error::empty_raw_profile); 71 72 std::unique_ptr<InstrProfReader> Result; 73 // Create the reader. 74 if (IndexedInstrProfReader::hasFormat(*Buffer)) 75 Result.reset(new IndexedInstrProfReader(std::move(Buffer))); 76 else if (RawInstrProfReader64::hasFormat(*Buffer)) 77 Result.reset(new RawInstrProfReader64(std::move(Buffer))); 78 else if (RawInstrProfReader32::hasFormat(*Buffer)) 79 Result.reset(new RawInstrProfReader32(std::move(Buffer))); 80 else if (TextInstrProfReader::hasFormat(*Buffer)) 81 Result.reset(new TextInstrProfReader(std::move(Buffer))); 82 else 83 return make_error<InstrProfError>(instrprof_error::unrecognized_format); 84 85 // Initialize the reader and return the result. 86 if (Error E = initializeReader(*Result)) 87 return std::move(E); 88 89 return std::move(Result); 90 } 91 92 Expected<std::unique_ptr<IndexedInstrProfReader>> 93 IndexedInstrProfReader::create(const Twine &Path, const Twine &RemappingPath) { 94 // Set up the buffer to read. 95 auto BufferOrError = setupMemoryBuffer(Path); 96 if (Error E = BufferOrError.takeError()) 97 return std::move(E); 98 99 // Set up the remapping buffer if requested. 100 std::unique_ptr<MemoryBuffer> RemappingBuffer; 101 std::string RemappingPathStr = RemappingPath.str(); 102 if (!RemappingPathStr.empty()) { 103 auto RemappingBufferOrError = setupMemoryBuffer(RemappingPathStr); 104 if (Error E = RemappingBufferOrError.takeError()) 105 return std::move(E); 106 RemappingBuffer = std::move(RemappingBufferOrError.get()); 107 } 108 109 return IndexedInstrProfReader::create(std::move(BufferOrError.get()), 110 std::move(RemappingBuffer)); 111 } 112 113 Expected<std::unique_ptr<IndexedInstrProfReader>> 114 IndexedInstrProfReader::create(std::unique_ptr<MemoryBuffer> Buffer, 115 std::unique_ptr<MemoryBuffer> RemappingBuffer) { 116 // Sanity check the buffer. 117 if (uint64_t(Buffer->getBufferSize()) > std::numeric_limits<uint64_t>::max()) 118 return make_error<InstrProfError>(instrprof_error::too_large); 119 120 // Create the reader. 121 if (!IndexedInstrProfReader::hasFormat(*Buffer)) 122 return make_error<InstrProfError>(instrprof_error::bad_magic); 123 auto Result = std::make_unique<IndexedInstrProfReader>( 124 std::move(Buffer), std::move(RemappingBuffer)); 125 126 // Initialize the reader and return the result. 127 if (Error E = initializeReader(*Result)) 128 return std::move(E); 129 130 return std::move(Result); 131 } 132 133 void InstrProfIterator::Increment() { 134 if (auto E = Reader->readNextRecord(Record)) { 135 // Handle errors in the reader. 136 InstrProfError::take(std::move(E)); 137 *this = InstrProfIterator(); 138 } 139 } 140 141 bool TextInstrProfReader::hasFormat(const MemoryBuffer &Buffer) { 142 // Verify that this really looks like plain ASCII text by checking a 143 // 'reasonable' number of characters (up to profile magic size). 144 size_t count = std::min(Buffer.getBufferSize(), sizeof(uint64_t)); 145 StringRef buffer = Buffer.getBufferStart(); 146 return count == 0 || 147 std::all_of(buffer.begin(), buffer.begin() + count, 148 [](char c) { return isPrint(c) || isSpace(c); }); 149 } 150 151 // Read the profile variant flag from the header: ":FE" means this is a FE 152 // generated profile. ":IR" means this is an IR level profile. Other strings 153 // with a leading ':' will be reported an error format. 154 Error TextInstrProfReader::readHeader() { 155 Symtab.reset(new InstrProfSymtab()); 156 bool IsIRInstr = false; 157 bool IsEntryFirst = false; 158 bool IsCS = false; 159 160 while (Line->startswith(":")) { 161 StringRef Str = Line->substr(1); 162 if (Str.equals_insensitive("ir")) 163 IsIRInstr = true; 164 else if (Str.equals_insensitive("fe")) 165 IsIRInstr = false; 166 else if (Str.equals_insensitive("csir")) { 167 IsIRInstr = true; 168 IsCS = true; 169 } else if (Str.equals_insensitive("entry_first")) 170 IsEntryFirst = true; 171 else if (Str.equals_insensitive("not_entry_first")) 172 IsEntryFirst = false; 173 else 174 return error(instrprof_error::bad_header); 175 ++Line; 176 } 177 IsIRLevelProfile = IsIRInstr; 178 InstrEntryBBEnabled = IsEntryFirst; 179 HasCSIRLevelProfile = IsCS; 180 return success(); 181 } 182 183 Error 184 TextInstrProfReader::readValueProfileData(InstrProfRecord &Record) { 185 186 #define CHECK_LINE_END(Line) \ 187 if (Line.is_at_end()) \ 188 return error(instrprof_error::truncated); 189 #define READ_NUM(Str, Dst) \ 190 if ((Str).getAsInteger(10, (Dst))) \ 191 return error(instrprof_error::malformed); 192 #define VP_READ_ADVANCE(Val) \ 193 CHECK_LINE_END(Line); \ 194 uint32_t Val; \ 195 READ_NUM((*Line), (Val)); \ 196 Line++; 197 198 if (Line.is_at_end()) 199 return success(); 200 201 uint32_t NumValueKinds; 202 if (Line->getAsInteger(10, NumValueKinds)) { 203 // No value profile data 204 return success(); 205 } 206 if (NumValueKinds == 0 || NumValueKinds > IPVK_Last + 1) 207 return error(instrprof_error::malformed, 208 "number of value kinds is invalid"); 209 Line++; 210 211 for (uint32_t VK = 0; VK < NumValueKinds; VK++) { 212 VP_READ_ADVANCE(ValueKind); 213 if (ValueKind > IPVK_Last) 214 return error(instrprof_error::malformed, "value kind is invalid"); 215 ; 216 VP_READ_ADVANCE(NumValueSites); 217 if (!NumValueSites) 218 continue; 219 220 Record.reserveSites(VK, NumValueSites); 221 for (uint32_t S = 0; S < NumValueSites; S++) { 222 VP_READ_ADVANCE(NumValueData); 223 224 std::vector<InstrProfValueData> CurrentValues; 225 for (uint32_t V = 0; V < NumValueData; V++) { 226 CHECK_LINE_END(Line); 227 std::pair<StringRef, StringRef> VD = Line->rsplit(':'); 228 uint64_t TakenCount, Value; 229 if (ValueKind == IPVK_IndirectCallTarget) { 230 if (InstrProfSymtab::isExternalSymbol(VD.first)) { 231 Value = 0; 232 } else { 233 if (Error E = Symtab->addFuncName(VD.first)) 234 return E; 235 Value = IndexedInstrProf::ComputeHash(VD.first); 236 } 237 } else { 238 READ_NUM(VD.first, Value); 239 } 240 READ_NUM(VD.second, TakenCount); 241 CurrentValues.push_back({Value, TakenCount}); 242 Line++; 243 } 244 Record.addValueData(ValueKind, S, CurrentValues.data(), NumValueData, 245 nullptr); 246 } 247 } 248 return success(); 249 250 #undef CHECK_LINE_END 251 #undef READ_NUM 252 #undef VP_READ_ADVANCE 253 } 254 255 Error TextInstrProfReader::readNextRecord(NamedInstrProfRecord &Record) { 256 // Skip empty lines and comments. 257 while (!Line.is_at_end() && (Line->empty() || Line->startswith("#"))) 258 ++Line; 259 // If we hit EOF while looking for a name, we're done. 260 if (Line.is_at_end()) { 261 return error(instrprof_error::eof); 262 } 263 264 // Read the function name. 265 Record.Name = *Line++; 266 if (Error E = Symtab->addFuncName(Record.Name)) 267 return error(std::move(E)); 268 269 // Read the function hash. 270 if (Line.is_at_end()) 271 return error(instrprof_error::truncated); 272 if ((Line++)->getAsInteger(0, Record.Hash)) 273 return error(instrprof_error::malformed, 274 "function hash is not a valid integer"); 275 276 // Read the number of counters. 277 uint64_t NumCounters; 278 if (Line.is_at_end()) 279 return error(instrprof_error::truncated); 280 if ((Line++)->getAsInteger(10, NumCounters)) 281 return error(instrprof_error::malformed, 282 "number of counters is not a valid integer"); 283 if (NumCounters == 0) 284 return error(instrprof_error::malformed, "number of counters is zero"); 285 286 // Read each counter and fill our internal storage with the values. 287 Record.Clear(); 288 Record.Counts.reserve(NumCounters); 289 for (uint64_t I = 0; I < NumCounters; ++I) { 290 if (Line.is_at_end()) 291 return error(instrprof_error::truncated); 292 uint64_t Count; 293 if ((Line++)->getAsInteger(10, Count)) 294 return error(instrprof_error::malformed, "count is invalid"); 295 Record.Counts.push_back(Count); 296 } 297 298 // Check if value profile data exists and read it if so. 299 if (Error E = readValueProfileData(Record)) 300 return error(std::move(E)); 301 302 return success(); 303 } 304 305 template <class IntPtrT> 306 bool RawInstrProfReader<IntPtrT>::hasFormat(const MemoryBuffer &DataBuffer) { 307 if (DataBuffer.getBufferSize() < sizeof(uint64_t)) 308 return false; 309 uint64_t Magic = 310 *reinterpret_cast<const uint64_t *>(DataBuffer.getBufferStart()); 311 return RawInstrProf::getMagic<IntPtrT>() == Magic || 312 sys::getSwappedBytes(RawInstrProf::getMagic<IntPtrT>()) == Magic; 313 } 314 315 template <class IntPtrT> 316 Error RawInstrProfReader<IntPtrT>::readHeader() { 317 if (!hasFormat(*DataBuffer)) 318 return error(instrprof_error::bad_magic); 319 if (DataBuffer->getBufferSize() < sizeof(RawInstrProf::Header)) 320 return error(instrprof_error::bad_header); 321 auto *Header = reinterpret_cast<const RawInstrProf::Header *>( 322 DataBuffer->getBufferStart()); 323 ShouldSwapBytes = Header->Magic != RawInstrProf::getMagic<IntPtrT>(); 324 return readHeader(*Header); 325 } 326 327 template <class IntPtrT> 328 Error RawInstrProfReader<IntPtrT>::readNextHeader(const char *CurrentPos) { 329 const char *End = DataBuffer->getBufferEnd(); 330 // Skip zero padding between profiles. 331 while (CurrentPos != End && *CurrentPos == 0) 332 ++CurrentPos; 333 // If there's nothing left, we're done. 334 if (CurrentPos == End) 335 return make_error<InstrProfError>(instrprof_error::eof); 336 // If there isn't enough space for another header, this is probably just 337 // garbage at the end of the file. 338 if (CurrentPos + sizeof(RawInstrProf::Header) > End) 339 return make_error<InstrProfError>(instrprof_error::malformed, 340 "not enough space for another header"); 341 // The writer ensures each profile is padded to start at an aligned address. 342 if (reinterpret_cast<size_t>(CurrentPos) % alignof(uint64_t)) 343 return make_error<InstrProfError>(instrprof_error::malformed, 344 "insufficient padding"); 345 // The magic should have the same byte order as in the previous header. 346 uint64_t Magic = *reinterpret_cast<const uint64_t *>(CurrentPos); 347 if (Magic != swap(RawInstrProf::getMagic<IntPtrT>())) 348 return make_error<InstrProfError>(instrprof_error::bad_magic); 349 350 // There's another profile to read, so we need to process the header. 351 auto *Header = reinterpret_cast<const RawInstrProf::Header *>(CurrentPos); 352 return readHeader(*Header); 353 } 354 355 template <class IntPtrT> 356 Error RawInstrProfReader<IntPtrT>::createSymtab(InstrProfSymtab &Symtab) { 357 if (Error E = Symtab.create(StringRef(NamesStart, NamesSize))) 358 return error(std::move(E)); 359 for (const RawInstrProf::ProfileData<IntPtrT> *I = Data; I != DataEnd; ++I) { 360 const IntPtrT FPtr = swap(I->FunctionPointer); 361 if (!FPtr) 362 continue; 363 Symtab.mapAddress(FPtr, I->NameRef); 364 } 365 return success(); 366 } 367 368 template <class IntPtrT> 369 Error RawInstrProfReader<IntPtrT>::readHeader( 370 const RawInstrProf::Header &Header) { 371 Version = swap(Header.Version); 372 if (GET_VERSION(Version) != RawInstrProf::Version) 373 return error(instrprof_error::unsupported_version); 374 375 BinaryIdsSize = swap(Header.BinaryIdsSize); 376 if (BinaryIdsSize % sizeof(uint64_t)) 377 return error(instrprof_error::bad_header); 378 379 CountersDelta = swap(Header.CountersDelta); 380 NamesDelta = swap(Header.NamesDelta); 381 auto DataSize = swap(Header.DataSize); 382 auto PaddingBytesBeforeCounters = swap(Header.PaddingBytesBeforeCounters); 383 auto CountersSize = swap(Header.CountersSize); 384 auto PaddingBytesAfterCounters = swap(Header.PaddingBytesAfterCounters); 385 NamesSize = swap(Header.NamesSize); 386 ValueKindLast = swap(Header.ValueKindLast); 387 388 auto DataSizeInBytes = DataSize * sizeof(RawInstrProf::ProfileData<IntPtrT>); 389 auto PaddingSize = getNumPaddingBytes(NamesSize); 390 391 // Profile data starts after profile header and binary ids if exist. 392 ptrdiff_t DataOffset = sizeof(RawInstrProf::Header) + BinaryIdsSize; 393 ptrdiff_t CountersOffset = 394 DataOffset + DataSizeInBytes + PaddingBytesBeforeCounters; 395 ptrdiff_t NamesOffset = CountersOffset + (sizeof(uint64_t) * CountersSize) + 396 PaddingBytesAfterCounters; 397 ptrdiff_t ValueDataOffset = NamesOffset + NamesSize + PaddingSize; 398 399 auto *Start = reinterpret_cast<const char *>(&Header); 400 if (Start + ValueDataOffset > DataBuffer->getBufferEnd()) 401 return error(instrprof_error::bad_header); 402 403 Data = reinterpret_cast<const RawInstrProf::ProfileData<IntPtrT> *>( 404 Start + DataOffset); 405 DataEnd = Data + DataSize; 406 407 // Binary ids start just after the header. 408 BinaryIdsStart = 409 reinterpret_cast<const uint8_t *>(&Header) + sizeof(RawInstrProf::Header); 410 CountersStart = reinterpret_cast<const uint64_t *>(Start + CountersOffset); 411 NamesStart = Start + NamesOffset; 412 ValueDataStart = reinterpret_cast<const uint8_t *>(Start + ValueDataOffset); 413 414 const uint8_t *BufferEnd = (const uint8_t *)DataBuffer->getBufferEnd(); 415 if (BinaryIdsStart + BinaryIdsSize > BufferEnd) 416 return error(instrprof_error::bad_header); 417 418 std::unique_ptr<InstrProfSymtab> NewSymtab = std::make_unique<InstrProfSymtab>(); 419 if (Error E = createSymtab(*NewSymtab.get())) 420 return E; 421 422 Symtab = std::move(NewSymtab); 423 return success(); 424 } 425 426 template <class IntPtrT> 427 Error RawInstrProfReader<IntPtrT>::readName(NamedInstrProfRecord &Record) { 428 Record.Name = getName(Data->NameRef); 429 return success(); 430 } 431 432 template <class IntPtrT> 433 Error RawInstrProfReader<IntPtrT>::readFuncHash(NamedInstrProfRecord &Record) { 434 Record.Hash = swap(Data->FuncHash); 435 return success(); 436 } 437 438 template <class IntPtrT> 439 Error RawInstrProfReader<IntPtrT>::readRawCounts( 440 InstrProfRecord &Record) { 441 uint32_t NumCounters = swap(Data->NumCounters); 442 if (NumCounters == 0) 443 return error(instrprof_error::malformed, "number of counters is zero"); 444 445 IntPtrT CounterPtr = Data->CounterPtr; 446 auto *NamesStartAsCounter = reinterpret_cast<const uint64_t *>(NamesStart); 447 ptrdiff_t MaxNumCounters = NamesStartAsCounter - CountersStart; 448 449 // Check bounds. Note that the counter pointer embedded in the data record 450 // may itself be corrupt. 451 if (MaxNumCounters < 0 || NumCounters > (uint32_t)MaxNumCounters) 452 return error(instrprof_error::malformed, 453 "counter pointer is out of bounds"); 454 455 // We need to compute the in-buffer counter offset from the in-memory address 456 // distance. The initial CountersDelta is the in-memory address difference 457 // start(__llvm_prf_cnts)-start(__llvm_prf_data), so SrcData->CounterPtr - 458 // CountersDelta computes the offset into the in-buffer counter section. 459 // 460 // CountersDelta decreases as we advance to the next data record. 461 ptrdiff_t CounterOffset = getCounterOffset(CounterPtr); 462 CountersDelta -= sizeof(*Data); 463 if (CounterOffset < 0) 464 return error( 465 instrprof_error::malformed, 466 ("counter offset " + Twine(CounterOffset) + " is negative").str()); 467 468 if (CounterOffset > MaxNumCounters) 469 return error(instrprof_error::malformed, 470 ("counter offset " + Twine(CounterOffset) + 471 " is greater than the maximum number of counters " + 472 Twine((uint32_t)MaxNumCounters)) 473 .str()); 474 475 if (((uint32_t)CounterOffset + NumCounters) > (uint32_t)MaxNumCounters) 476 return error(instrprof_error::malformed, 477 ("number of counters " + 478 Twine(((uint32_t)CounterOffset + NumCounters)) + 479 " is greater than the maximum number of counters " + 480 Twine((uint32_t)MaxNumCounters)) 481 .str()); 482 483 auto RawCounts = makeArrayRef(getCounter(CounterOffset), NumCounters); 484 485 if (ShouldSwapBytes) { 486 Record.Counts.clear(); 487 Record.Counts.reserve(RawCounts.size()); 488 for (uint64_t Count : RawCounts) 489 Record.Counts.push_back(swap(Count)); 490 } else 491 Record.Counts = RawCounts; 492 493 return success(); 494 } 495 496 template <class IntPtrT> 497 Error RawInstrProfReader<IntPtrT>::readValueProfilingData( 498 InstrProfRecord &Record) { 499 Record.clearValueData(); 500 CurValueDataSize = 0; 501 // Need to match the logic in value profile dumper code in compiler-rt: 502 uint32_t NumValueKinds = 0; 503 for (uint32_t I = 0; I < IPVK_Last + 1; I++) 504 NumValueKinds += (Data->NumValueSites[I] != 0); 505 506 if (!NumValueKinds) 507 return success(); 508 509 Expected<std::unique_ptr<ValueProfData>> VDataPtrOrErr = 510 ValueProfData::getValueProfData( 511 ValueDataStart, (const unsigned char *)DataBuffer->getBufferEnd(), 512 getDataEndianness()); 513 514 if (Error E = VDataPtrOrErr.takeError()) 515 return E; 516 517 // Note that besides deserialization, this also performs the conversion for 518 // indirect call targets. The function pointers from the raw profile are 519 // remapped into function name hashes. 520 VDataPtrOrErr.get()->deserializeTo(Record, Symtab.get()); 521 CurValueDataSize = VDataPtrOrErr.get()->getSize(); 522 return success(); 523 } 524 525 template <class IntPtrT> 526 Error RawInstrProfReader<IntPtrT>::readNextRecord(NamedInstrProfRecord &Record) { 527 if (atEnd()) 528 // At this point, ValueDataStart field points to the next header. 529 if (Error E = readNextHeader(getNextHeaderPos())) 530 return error(std::move(E)); 531 532 // Read name ad set it in Record. 533 if (Error E = readName(Record)) 534 return error(std::move(E)); 535 536 // Read FuncHash and set it in Record. 537 if (Error E = readFuncHash(Record)) 538 return error(std::move(E)); 539 540 // Read raw counts and set Record. 541 if (Error E = readRawCounts(Record)) 542 return error(std::move(E)); 543 544 // Read value data and set Record. 545 if (Error E = readValueProfilingData(Record)) 546 return error(std::move(E)); 547 548 // Iterate. 549 advanceData(); 550 return success(); 551 } 552 553 static size_t RoundUp(size_t size, size_t align) { 554 return (size + align - 1) & ~(align - 1); 555 } 556 557 template <class IntPtrT> 558 Error RawInstrProfReader<IntPtrT>::printBinaryIds(raw_ostream &OS) { 559 if (BinaryIdsSize == 0) 560 return success(); 561 562 OS << "Binary IDs: \n"; 563 const uint8_t *BI = BinaryIdsStart; 564 const uint8_t *BIEnd = BinaryIdsStart + BinaryIdsSize; 565 while (BI < BIEnd) { 566 size_t Remaining = BIEnd - BI; 567 568 // There should be enough left to read the binary ID size field. 569 if (Remaining < sizeof(uint64_t)) 570 return make_error<InstrProfError>( 571 instrprof_error::malformed, 572 "not enough data to read binary id length"); 573 574 uint64_t BinaryIdLen = swap(*reinterpret_cast<const uint64_t *>(BI)); 575 576 // There should be enough left to read the binary ID size field, and the 577 // binary ID. 578 if (Remaining < sizeof(BinaryIdLen) + BinaryIdLen) 579 return make_error<InstrProfError>( 580 instrprof_error::malformed, "not enough data to read binary id data"); 581 582 // Increment by binary id length data type size. 583 BI += sizeof(BinaryIdLen); 584 if (BI > (const uint8_t *)DataBuffer->getBufferEnd()) 585 return make_error<InstrProfError>( 586 instrprof_error::malformed, 587 "binary id that is read is bigger than buffer size"); 588 589 for (uint64_t I = 0; I < BinaryIdLen; I++) 590 OS << format("%02x", BI[I]); 591 OS << "\n"; 592 593 // Increment by binary id data length, rounded to the next 8 bytes. This 594 // accounts for the zero-padding after each build ID. 595 BI += RoundUp(BinaryIdLen, sizeof(uint64_t)); 596 if (BI > (const uint8_t *)DataBuffer->getBufferEnd()) 597 return make_error<InstrProfError>(instrprof_error::malformed); 598 } 599 600 return success(); 601 } 602 603 namespace llvm { 604 605 template class RawInstrProfReader<uint32_t>; 606 template class RawInstrProfReader<uint64_t>; 607 608 } // end namespace llvm 609 610 InstrProfLookupTrait::hash_value_type 611 InstrProfLookupTrait::ComputeHash(StringRef K) { 612 return IndexedInstrProf::ComputeHash(HashType, K); 613 } 614 615 using data_type = InstrProfLookupTrait::data_type; 616 using offset_type = InstrProfLookupTrait::offset_type; 617 618 bool InstrProfLookupTrait::readValueProfilingData( 619 const unsigned char *&D, const unsigned char *const End) { 620 Expected<std::unique_ptr<ValueProfData>> VDataPtrOrErr = 621 ValueProfData::getValueProfData(D, End, ValueProfDataEndianness); 622 623 if (VDataPtrOrErr.takeError()) 624 return false; 625 626 VDataPtrOrErr.get()->deserializeTo(DataBuffer.back(), nullptr); 627 D += VDataPtrOrErr.get()->TotalSize; 628 629 return true; 630 } 631 632 data_type InstrProfLookupTrait::ReadData(StringRef K, const unsigned char *D, 633 offset_type N) { 634 using namespace support; 635 636 // Check if the data is corrupt. If so, don't try to read it. 637 if (N % sizeof(uint64_t)) 638 return data_type(); 639 640 DataBuffer.clear(); 641 std::vector<uint64_t> CounterBuffer; 642 643 const unsigned char *End = D + N; 644 while (D < End) { 645 // Read hash. 646 if (D + sizeof(uint64_t) >= End) 647 return data_type(); 648 uint64_t Hash = endian::readNext<uint64_t, little, unaligned>(D); 649 650 // Initialize number of counters for GET_VERSION(FormatVersion) == 1. 651 uint64_t CountsSize = N / sizeof(uint64_t) - 1; 652 // If format version is different then read the number of counters. 653 if (GET_VERSION(FormatVersion) != IndexedInstrProf::ProfVersion::Version1) { 654 if (D + sizeof(uint64_t) > End) 655 return data_type(); 656 CountsSize = endian::readNext<uint64_t, little, unaligned>(D); 657 } 658 // Read counter values. 659 if (D + CountsSize * sizeof(uint64_t) > End) 660 return data_type(); 661 662 CounterBuffer.clear(); 663 CounterBuffer.reserve(CountsSize); 664 for (uint64_t J = 0; J < CountsSize; ++J) 665 CounterBuffer.push_back(endian::readNext<uint64_t, little, unaligned>(D)); 666 667 DataBuffer.emplace_back(K, Hash, std::move(CounterBuffer)); 668 669 // Read value profiling data. 670 if (GET_VERSION(FormatVersion) > IndexedInstrProf::ProfVersion::Version2 && 671 !readValueProfilingData(D, End)) { 672 DataBuffer.clear(); 673 return data_type(); 674 } 675 } 676 return DataBuffer; 677 } 678 679 template <typename HashTableImpl> 680 Error InstrProfReaderIndex<HashTableImpl>::getRecords( 681 StringRef FuncName, ArrayRef<NamedInstrProfRecord> &Data) { 682 auto Iter = HashTable->find(FuncName); 683 if (Iter == HashTable->end()) 684 return make_error<InstrProfError>(instrprof_error::unknown_function); 685 686 Data = (*Iter); 687 if (Data.empty()) 688 return make_error<InstrProfError>(instrprof_error::malformed, 689 "profile data is empty"); 690 691 return Error::success(); 692 } 693 694 template <typename HashTableImpl> 695 Error InstrProfReaderIndex<HashTableImpl>::getRecords( 696 ArrayRef<NamedInstrProfRecord> &Data) { 697 if (atEnd()) 698 return make_error<InstrProfError>(instrprof_error::eof); 699 700 Data = *RecordIterator; 701 702 if (Data.empty()) 703 return make_error<InstrProfError>(instrprof_error::malformed, 704 "profile data is empty"); 705 706 return Error::success(); 707 } 708 709 template <typename HashTableImpl> 710 InstrProfReaderIndex<HashTableImpl>::InstrProfReaderIndex( 711 const unsigned char *Buckets, const unsigned char *const Payload, 712 const unsigned char *const Base, IndexedInstrProf::HashT HashType, 713 uint64_t Version) { 714 FormatVersion = Version; 715 HashTable.reset(HashTableImpl::Create( 716 Buckets, Payload, Base, 717 typename HashTableImpl::InfoType(HashType, Version))); 718 RecordIterator = HashTable->data_begin(); 719 } 720 721 namespace { 722 /// A remapper that does not apply any remappings. 723 class InstrProfReaderNullRemapper : public InstrProfReaderRemapper { 724 InstrProfReaderIndexBase &Underlying; 725 726 public: 727 InstrProfReaderNullRemapper(InstrProfReaderIndexBase &Underlying) 728 : Underlying(Underlying) {} 729 730 Error getRecords(StringRef FuncName, 731 ArrayRef<NamedInstrProfRecord> &Data) override { 732 return Underlying.getRecords(FuncName, Data); 733 } 734 }; 735 } // namespace 736 737 /// A remapper that applies remappings based on a symbol remapping file. 738 template <typename HashTableImpl> 739 class llvm::InstrProfReaderItaniumRemapper 740 : public InstrProfReaderRemapper { 741 public: 742 InstrProfReaderItaniumRemapper( 743 std::unique_ptr<MemoryBuffer> RemapBuffer, 744 InstrProfReaderIndex<HashTableImpl> &Underlying) 745 : RemapBuffer(std::move(RemapBuffer)), Underlying(Underlying) { 746 } 747 748 /// Extract the original function name from a PGO function name. 749 static StringRef extractName(StringRef Name) { 750 // We can have multiple :-separated pieces; there can be pieces both 751 // before and after the mangled name. Find the first part that starts 752 // with '_Z'; we'll assume that's the mangled name we want. 753 std::pair<StringRef, StringRef> Parts = {StringRef(), Name}; 754 while (true) { 755 Parts = Parts.second.split(':'); 756 if (Parts.first.startswith("_Z")) 757 return Parts.first; 758 if (Parts.second.empty()) 759 return Name; 760 } 761 } 762 763 /// Given a mangled name extracted from a PGO function name, and a new 764 /// form for that mangled name, reconstitute the name. 765 static void reconstituteName(StringRef OrigName, StringRef ExtractedName, 766 StringRef Replacement, 767 SmallVectorImpl<char> &Out) { 768 Out.reserve(OrigName.size() + Replacement.size() - ExtractedName.size()); 769 Out.insert(Out.end(), OrigName.begin(), ExtractedName.begin()); 770 Out.insert(Out.end(), Replacement.begin(), Replacement.end()); 771 Out.insert(Out.end(), ExtractedName.end(), OrigName.end()); 772 } 773 774 Error populateRemappings() override { 775 if (Error E = Remappings.read(*RemapBuffer)) 776 return E; 777 for (StringRef Name : Underlying.HashTable->keys()) { 778 StringRef RealName = extractName(Name); 779 if (auto Key = Remappings.insert(RealName)) { 780 // FIXME: We could theoretically map the same equivalence class to 781 // multiple names in the profile data. If that happens, we should 782 // return NamedInstrProfRecords from all of them. 783 MappedNames.insert({Key, RealName}); 784 } 785 } 786 return Error::success(); 787 } 788 789 Error getRecords(StringRef FuncName, 790 ArrayRef<NamedInstrProfRecord> &Data) override { 791 StringRef RealName = extractName(FuncName); 792 if (auto Key = Remappings.lookup(RealName)) { 793 StringRef Remapped = MappedNames.lookup(Key); 794 if (!Remapped.empty()) { 795 if (RealName.begin() == FuncName.begin() && 796 RealName.end() == FuncName.end()) 797 FuncName = Remapped; 798 else { 799 // Try rebuilding the name from the given remapping. 800 SmallString<256> Reconstituted; 801 reconstituteName(FuncName, RealName, Remapped, Reconstituted); 802 Error E = Underlying.getRecords(Reconstituted, Data); 803 if (!E) 804 return E; 805 806 // If we failed because the name doesn't exist, fall back to asking 807 // about the original name. 808 if (Error Unhandled = handleErrors( 809 std::move(E), [](std::unique_ptr<InstrProfError> Err) { 810 return Err->get() == instrprof_error::unknown_function 811 ? Error::success() 812 : Error(std::move(Err)); 813 })) 814 return Unhandled; 815 } 816 } 817 } 818 return Underlying.getRecords(FuncName, Data); 819 } 820 821 private: 822 /// The memory buffer containing the remapping configuration. Remappings 823 /// holds pointers into this buffer. 824 std::unique_ptr<MemoryBuffer> RemapBuffer; 825 826 /// The mangling remapper. 827 SymbolRemappingReader Remappings; 828 829 /// Mapping from mangled name keys to the name used for the key in the 830 /// profile data. 831 /// FIXME: Can we store a location within the on-disk hash table instead of 832 /// redoing lookup? 833 DenseMap<SymbolRemappingReader::Key, StringRef> MappedNames; 834 835 /// The real profile data reader. 836 InstrProfReaderIndex<HashTableImpl> &Underlying; 837 }; 838 839 bool IndexedInstrProfReader::hasFormat(const MemoryBuffer &DataBuffer) { 840 using namespace support; 841 842 if (DataBuffer.getBufferSize() < 8) 843 return false; 844 uint64_t Magic = 845 endian::read<uint64_t, little, aligned>(DataBuffer.getBufferStart()); 846 // Verify that it's magical. 847 return Magic == IndexedInstrProf::Magic; 848 } 849 850 const unsigned char * 851 IndexedInstrProfReader::readSummary(IndexedInstrProf::ProfVersion Version, 852 const unsigned char *Cur, bool UseCS) { 853 using namespace IndexedInstrProf; 854 using namespace support; 855 856 if (Version >= IndexedInstrProf::Version4) { 857 const IndexedInstrProf::Summary *SummaryInLE = 858 reinterpret_cast<const IndexedInstrProf::Summary *>(Cur); 859 uint64_t NFields = 860 endian::byte_swap<uint64_t, little>(SummaryInLE->NumSummaryFields); 861 uint64_t NEntries = 862 endian::byte_swap<uint64_t, little>(SummaryInLE->NumCutoffEntries); 863 uint32_t SummarySize = 864 IndexedInstrProf::Summary::getSize(NFields, NEntries); 865 std::unique_ptr<IndexedInstrProf::Summary> SummaryData = 866 IndexedInstrProf::allocSummary(SummarySize); 867 868 const uint64_t *Src = reinterpret_cast<const uint64_t *>(SummaryInLE); 869 uint64_t *Dst = reinterpret_cast<uint64_t *>(SummaryData.get()); 870 for (unsigned I = 0; I < SummarySize / sizeof(uint64_t); I++) 871 Dst[I] = endian::byte_swap<uint64_t, little>(Src[I]); 872 873 SummaryEntryVector DetailedSummary; 874 for (unsigned I = 0; I < SummaryData->NumCutoffEntries; I++) { 875 const IndexedInstrProf::Summary::Entry &Ent = SummaryData->getEntry(I); 876 DetailedSummary.emplace_back((uint32_t)Ent.Cutoff, Ent.MinBlockCount, 877 Ent.NumBlocks); 878 } 879 std::unique_ptr<llvm::ProfileSummary> &Summary = 880 UseCS ? this->CS_Summary : this->Summary; 881 882 // initialize InstrProfSummary using the SummaryData from disk. 883 Summary = std::make_unique<ProfileSummary>( 884 UseCS ? ProfileSummary::PSK_CSInstr : ProfileSummary::PSK_Instr, 885 DetailedSummary, SummaryData->get(Summary::TotalBlockCount), 886 SummaryData->get(Summary::MaxBlockCount), 887 SummaryData->get(Summary::MaxInternalBlockCount), 888 SummaryData->get(Summary::MaxFunctionCount), 889 SummaryData->get(Summary::TotalNumBlocks), 890 SummaryData->get(Summary::TotalNumFunctions)); 891 return Cur + SummarySize; 892 } else { 893 // The older versions do not support a profile summary. This just computes 894 // an empty summary, which will not result in accurate hot/cold detection. 895 // We would need to call addRecord for all NamedInstrProfRecords to get the 896 // correct summary. However, this version is old (prior to early 2016) and 897 // has not been supporting an accurate summary for several years. 898 InstrProfSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs); 899 Summary = Builder.getSummary(); 900 return Cur; 901 } 902 } 903 904 Error IndexedInstrProfReader::readHeader() { 905 using namespace support; 906 907 const unsigned char *Start = 908 (const unsigned char *)DataBuffer->getBufferStart(); 909 const unsigned char *Cur = Start; 910 if ((const unsigned char *)DataBuffer->getBufferEnd() - Cur < 24) 911 return error(instrprof_error::truncated); 912 913 auto *Header = reinterpret_cast<const IndexedInstrProf::Header *>(Cur); 914 Cur += sizeof(IndexedInstrProf::Header); 915 916 // Check the magic number. 917 uint64_t Magic = endian::byte_swap<uint64_t, little>(Header->Magic); 918 if (Magic != IndexedInstrProf::Magic) 919 return error(instrprof_error::bad_magic); 920 921 // Read the version. 922 uint64_t FormatVersion = endian::byte_swap<uint64_t, little>(Header->Version); 923 if (GET_VERSION(FormatVersion) > 924 IndexedInstrProf::ProfVersion::CurrentVersion) 925 return error(instrprof_error::unsupported_version); 926 927 Cur = readSummary((IndexedInstrProf::ProfVersion)FormatVersion, Cur, 928 /* UseCS */ false); 929 if (FormatVersion & VARIANT_MASK_CSIR_PROF) 930 Cur = readSummary((IndexedInstrProf::ProfVersion)FormatVersion, Cur, 931 /* UseCS */ true); 932 933 // Read the hash type and start offset. 934 IndexedInstrProf::HashT HashType = static_cast<IndexedInstrProf::HashT>( 935 endian::byte_swap<uint64_t, little>(Header->HashType)); 936 if (HashType > IndexedInstrProf::HashT::Last) 937 return error(instrprof_error::unsupported_hash_type); 938 939 uint64_t HashOffset = endian::byte_swap<uint64_t, little>(Header->HashOffset); 940 941 // The rest of the file is an on disk hash table. 942 auto IndexPtr = 943 std::make_unique<InstrProfReaderIndex<OnDiskHashTableImplV3>>( 944 Start + HashOffset, Cur, Start, HashType, FormatVersion); 945 946 // Load the remapping table now if requested. 947 if (RemappingBuffer) { 948 Remapper = std::make_unique< 949 InstrProfReaderItaniumRemapper<OnDiskHashTableImplV3>>( 950 std::move(RemappingBuffer), *IndexPtr); 951 if (Error E = Remapper->populateRemappings()) 952 return E; 953 } else { 954 Remapper = std::make_unique<InstrProfReaderNullRemapper>(*IndexPtr); 955 } 956 Index = std::move(IndexPtr); 957 958 return success(); 959 } 960 961 InstrProfSymtab &IndexedInstrProfReader::getSymtab() { 962 if (Symtab.get()) 963 return *Symtab.get(); 964 965 std::unique_ptr<InstrProfSymtab> NewSymtab = std::make_unique<InstrProfSymtab>(); 966 if (Error E = Index->populateSymtab(*NewSymtab.get())) { 967 consumeError(error(InstrProfError::take(std::move(E)))); 968 } 969 970 Symtab = std::move(NewSymtab); 971 return *Symtab.get(); 972 } 973 974 Expected<InstrProfRecord> 975 IndexedInstrProfReader::getInstrProfRecord(StringRef FuncName, 976 uint64_t FuncHash) { 977 ArrayRef<NamedInstrProfRecord> Data; 978 Error Err = Remapper->getRecords(FuncName, Data); 979 if (Err) 980 return std::move(Err); 981 // Found it. Look for counters with the right hash. 982 for (unsigned I = 0, E = Data.size(); I < E; ++I) { 983 // Check for a match and fill the vector if there is one. 984 if (Data[I].Hash == FuncHash) { 985 return std::move(Data[I]); 986 } 987 } 988 return error(instrprof_error::hash_mismatch); 989 } 990 991 Error IndexedInstrProfReader::getFunctionCounts(StringRef FuncName, 992 uint64_t FuncHash, 993 std::vector<uint64_t> &Counts) { 994 Expected<InstrProfRecord> Record = getInstrProfRecord(FuncName, FuncHash); 995 if (Error E = Record.takeError()) 996 return error(std::move(E)); 997 998 Counts = Record.get().Counts; 999 return success(); 1000 } 1001 1002 Error IndexedInstrProfReader::readNextRecord(NamedInstrProfRecord &Record) { 1003 ArrayRef<NamedInstrProfRecord> Data; 1004 1005 Error E = Index->getRecords(Data); 1006 if (E) 1007 return error(std::move(E)); 1008 1009 Record = Data[RecordIndex++]; 1010 if (RecordIndex >= Data.size()) { 1011 Index->advanceToNextKey(); 1012 RecordIndex = 0; 1013 } 1014 return success(); 1015 } 1016 1017 void InstrProfReader::accumulateCounts(CountSumOrPercent &Sum, bool IsCS) { 1018 uint64_t NumFuncs = 0; 1019 for (const auto &Func : *this) { 1020 if (isIRLevelProfile()) { 1021 bool FuncIsCS = NamedInstrProfRecord::hasCSFlagInHash(Func.Hash); 1022 if (FuncIsCS != IsCS) 1023 continue; 1024 } 1025 Func.accumulateCounts(Sum); 1026 ++NumFuncs; 1027 } 1028 Sum.NumEntries = NumFuncs; 1029 } 1030