1 //===- InstrProfReader.h - Instrumented profiling readers -------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains support for reading profiling data for instrumentation 10 // based PGO and coverage. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_PROFILEDATA_INSTRPROFREADER_H 15 #define LLVM_PROFILEDATA_INSTRPROFREADER_H 16 17 #include "llvm/ADT/ArrayRef.h" 18 #include "llvm/ADT/StringRef.h" 19 #include "llvm/IR/ProfileSummary.h" 20 #include "llvm/ProfileData/InstrProf.h" 21 #include "llvm/ProfileData/InstrProfCorrelator.h" 22 #include "llvm/Support/Endian.h" 23 #include "llvm/Support/Error.h" 24 #include "llvm/Support/LineIterator.h" 25 #include "llvm/Support/MemoryBuffer.h" 26 #include "llvm/Support/OnDiskHashTable.h" 27 #include "llvm/Support/SwapByteOrder.h" 28 #include <algorithm> 29 #include <cassert> 30 #include <cstddef> 31 #include <cstdint> 32 #include <iterator> 33 #include <memory> 34 #include <utility> 35 #include <vector> 36 37 namespace llvm { 38 39 class InstrProfReader; 40 41 /// A file format agnostic iterator over profiling data. 42 class InstrProfIterator { 43 public: 44 using iterator_category = std::input_iterator_tag; 45 using value_type = NamedInstrProfRecord; 46 using difference_type = std::ptrdiff_t; 47 using pointer = value_type *; 48 using reference = value_type &; 49 50 private: 51 InstrProfReader *Reader = nullptr; 52 value_type Record; 53 54 void Increment(); 55 56 public: 57 InstrProfIterator() = default; 58 InstrProfIterator(InstrProfReader *Reader) : Reader(Reader) { Increment(); } 59 60 InstrProfIterator &operator++() { Increment(); return *this; } 61 bool operator==(const InstrProfIterator &RHS) const { 62 return Reader == RHS.Reader; 63 } 64 bool operator!=(const InstrProfIterator &RHS) const { 65 return Reader != RHS.Reader; 66 } 67 value_type &operator*() { return Record; } 68 value_type *operator->() { return &Record; } 69 }; 70 71 /// Base class and interface for reading profiling data of any known instrprof 72 /// format. Provides an iterator over NamedInstrProfRecords. 73 class InstrProfReader { 74 instrprof_error LastError = instrprof_error::success; 75 std::string LastErrorMsg; 76 77 public: 78 InstrProfReader() = default; 79 virtual ~InstrProfReader() = default; 80 81 /// Read the header. Required before reading first record. 82 virtual Error readHeader() = 0; 83 84 /// Read a single record. 85 virtual Error readNextRecord(NamedInstrProfRecord &Record) = 0; 86 87 /// Print binary ids on stream OS. 88 virtual Error printBinaryIds(raw_ostream &OS) { return success(); }; 89 90 /// Iterator over profile data. 91 InstrProfIterator begin() { return InstrProfIterator(this); } 92 InstrProfIterator end() { return InstrProfIterator(); } 93 94 virtual bool isIRLevelProfile() const = 0; 95 96 virtual bool hasCSIRLevelProfile() const = 0; 97 98 virtual bool instrEntryBBEnabled() const = 0; 99 100 /// Return true if we must provide debug info to create PGO profiles. 101 virtual bool useDebugInfoCorrelate() const { return false; } 102 103 /// Return the PGO symtab. There are three different readers: 104 /// Raw, Text, and Indexed profile readers. The first two types 105 /// of readers are used only by llvm-profdata tool, while the indexed 106 /// profile reader is also used by llvm-cov tool and the compiler ( 107 /// backend or frontend). Since creating PGO symtab can create 108 /// significant runtime and memory overhead (as it touches data 109 /// for the whole program), InstrProfSymtab for the indexed profile 110 /// reader should be created on demand and it is recommended to be 111 /// only used for dumping purpose with llvm-proftool, not with the 112 /// compiler. 113 virtual InstrProfSymtab &getSymtab() = 0; 114 115 /// Compute the sum of counts and return in Sum. 116 void accumulateCounts(CountSumOrPercent &Sum, bool IsCS); 117 118 protected: 119 std::unique_ptr<InstrProfSymtab> Symtab; 120 121 /// Set the current error and return same. 122 Error error(instrprof_error Err, const std::string &ErrMsg = "") { 123 LastError = Err; 124 LastErrorMsg = ErrMsg; 125 if (Err == instrprof_error::success) 126 return Error::success(); 127 return make_error<InstrProfError>(Err, ErrMsg); 128 } 129 130 Error error(Error &&E) { 131 handleAllErrors(std::move(E), [&](const InstrProfError &IPE) { 132 LastError = IPE.get(); 133 LastErrorMsg = IPE.getMessage(); 134 }); 135 return make_error<InstrProfError>(LastError, LastErrorMsg); 136 } 137 138 /// Clear the current error and return a successful one. 139 Error success() { return error(instrprof_error::success); } 140 141 public: 142 /// Return true if the reader has finished reading the profile data. 143 bool isEOF() { return LastError == instrprof_error::eof; } 144 145 /// Return true if the reader encountered an error reading profiling data. 146 bool hasError() { return LastError != instrprof_error::success && !isEOF(); } 147 148 /// Get the current error. 149 Error getError() { 150 if (hasError()) 151 return make_error<InstrProfError>(LastError, LastErrorMsg); 152 return Error::success(); 153 } 154 155 /// Factory method to create an appropriately typed reader for the given 156 /// instrprof file. 157 static Expected<std::unique_ptr<InstrProfReader>> 158 create(const Twine &Path, const InstrProfCorrelator *Correlator = nullptr); 159 160 static Expected<std::unique_ptr<InstrProfReader>> 161 create(std::unique_ptr<MemoryBuffer> Buffer, 162 const InstrProfCorrelator *Correlator = nullptr); 163 }; 164 165 /// Reader for the simple text based instrprof format. 166 /// 167 /// This format is a simple text format that's suitable for test data. Records 168 /// are separated by one or more blank lines, and record fields are separated by 169 /// new lines. 170 /// 171 /// Each record consists of a function name, a function hash, a number of 172 /// counters, and then each counter value, in that order. 173 class TextInstrProfReader : public InstrProfReader { 174 private: 175 /// The profile data file contents. 176 std::unique_ptr<MemoryBuffer> DataBuffer; 177 /// Iterator over the profile data. 178 line_iterator Line; 179 bool IsIRLevelProfile = false; 180 bool HasCSIRLevelProfile = false; 181 bool InstrEntryBBEnabled = false; 182 183 Error readValueProfileData(InstrProfRecord &Record); 184 185 public: 186 TextInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer_) 187 : DataBuffer(std::move(DataBuffer_)), Line(*DataBuffer, true, '#') {} 188 TextInstrProfReader(const TextInstrProfReader &) = delete; 189 TextInstrProfReader &operator=(const TextInstrProfReader &) = delete; 190 191 /// Return true if the given buffer is in text instrprof format. 192 static bool hasFormat(const MemoryBuffer &Buffer); 193 194 bool isIRLevelProfile() const override { return IsIRLevelProfile; } 195 196 bool hasCSIRLevelProfile() const override { return HasCSIRLevelProfile; } 197 198 bool instrEntryBBEnabled() const override { return InstrEntryBBEnabled; } 199 200 /// Read the header. 201 Error readHeader() override; 202 203 /// Read a single record. 204 Error readNextRecord(NamedInstrProfRecord &Record) override; 205 206 InstrProfSymtab &getSymtab() override { 207 assert(Symtab.get()); 208 return *Symtab.get(); 209 } 210 }; 211 212 /// Reader for the raw instrprof binary format from runtime. 213 /// 214 /// This format is a raw memory dump of the instrumentation-based profiling data 215 /// from the runtime. It has no index. 216 /// 217 /// Templated on the unsigned type whose size matches pointers on the platform 218 /// that wrote the profile. 219 template <class IntPtrT> 220 class RawInstrProfReader : public InstrProfReader { 221 private: 222 /// The profile data file contents. 223 std::unique_ptr<MemoryBuffer> DataBuffer; 224 /// If available, this hold the ProfileData array used to correlate raw 225 /// instrumentation data to their functions. 226 const InstrProfCorrelatorImpl<IntPtrT> *Correlator; 227 bool ShouldSwapBytes; 228 // The value of the version field of the raw profile data header. The lower 56 229 // bits specifies the format version and the most significant 8 bits specify 230 // the variant types of the profile. 231 uint64_t Version; 232 uint64_t CountersDelta; 233 uint64_t NamesDelta; 234 const RawInstrProf::ProfileData<IntPtrT> *Data; 235 const RawInstrProf::ProfileData<IntPtrT> *DataEnd; 236 const uint64_t *CountersStart; 237 const char *NamesStart; 238 const char *NamesEnd; 239 // After value profile is all read, this pointer points to 240 // the header of next profile data (if exists) 241 const uint8_t *ValueDataStart; 242 uint32_t ValueKindLast; 243 uint32_t CurValueDataSize; 244 245 uint64_t BinaryIdsSize; 246 const uint8_t *BinaryIdsStart; 247 248 public: 249 RawInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer, 250 const InstrProfCorrelator *Correlator) 251 : DataBuffer(std::move(DataBuffer)), 252 Correlator(dyn_cast_or_null<const InstrProfCorrelatorImpl<IntPtrT>>( 253 Correlator)) {} 254 RawInstrProfReader(const RawInstrProfReader &) = delete; 255 RawInstrProfReader &operator=(const RawInstrProfReader &) = delete; 256 257 static bool hasFormat(const MemoryBuffer &DataBuffer); 258 Error readHeader() override; 259 Error readNextRecord(NamedInstrProfRecord &Record) override; 260 Error printBinaryIds(raw_ostream &OS) override; 261 262 bool isIRLevelProfile() const override { 263 return (Version & VARIANT_MASK_IR_PROF) != 0; 264 } 265 266 bool hasCSIRLevelProfile() const override { 267 return (Version & VARIANT_MASK_CSIR_PROF) != 0; 268 } 269 270 bool instrEntryBBEnabled() const override { 271 return (Version & VARIANT_MASK_INSTR_ENTRY) != 0; 272 } 273 274 bool useDebugInfoCorrelate() const override { 275 return (Version & VARIANT_MASK_DBG_CORRELATE) != 0; 276 } 277 278 InstrProfSymtab &getSymtab() override { 279 assert(Symtab.get()); 280 return *Symtab.get(); 281 } 282 283 private: 284 Error createSymtab(InstrProfSymtab &Symtab); 285 Error readNextHeader(const char *CurrentPos); 286 Error readHeader(const RawInstrProf::Header &Header); 287 288 template <class IntT> IntT swap(IntT Int) const { 289 return ShouldSwapBytes ? sys::getSwappedBytes(Int) : Int; 290 } 291 292 support::endianness getDataEndianness() const { 293 support::endianness HostEndian = getHostEndianness(); 294 if (!ShouldSwapBytes) 295 return HostEndian; 296 if (HostEndian == support::little) 297 return support::big; 298 else 299 return support::little; 300 } 301 302 inline uint8_t getNumPaddingBytes(uint64_t SizeInBytes) { 303 return 7 & (sizeof(uint64_t) - SizeInBytes % sizeof(uint64_t)); 304 } 305 306 Error readName(NamedInstrProfRecord &Record); 307 Error readFuncHash(NamedInstrProfRecord &Record); 308 Error readRawCounts(InstrProfRecord &Record); 309 Error readValueProfilingData(InstrProfRecord &Record); 310 bool atEnd() const { return Data == DataEnd; } 311 312 void advanceData() { 313 Data++; 314 ValueDataStart += CurValueDataSize; 315 } 316 317 const char *getNextHeaderPos() const { 318 assert(atEnd()); 319 return (const char *)ValueDataStart; 320 } 321 322 /// Get the offset of \p CounterPtr from the start of the counters section of 323 /// the profile. The offset has units of "number of counters", i.e. increasing 324 /// the offset by 1 corresponds to an increase in the *byte offset* by 8. 325 ptrdiff_t getCounterOffset(IntPtrT CounterPtr) const { 326 return (swap(CounterPtr) - CountersDelta) / sizeof(uint64_t); 327 } 328 329 const uint64_t *getCounter(ptrdiff_t Offset) const { 330 return CountersStart + Offset; 331 } 332 333 StringRef getName(uint64_t NameRef) const { 334 return Symtab->getFuncName(swap(NameRef)); 335 } 336 }; 337 338 using RawInstrProfReader32 = RawInstrProfReader<uint32_t>; 339 using RawInstrProfReader64 = RawInstrProfReader<uint64_t>; 340 341 namespace IndexedInstrProf { 342 343 enum class HashT : uint32_t; 344 345 } // end namespace IndexedInstrProf 346 347 /// Trait for lookups into the on-disk hash table for the binary instrprof 348 /// format. 349 class InstrProfLookupTrait { 350 std::vector<NamedInstrProfRecord> DataBuffer; 351 IndexedInstrProf::HashT HashType; 352 unsigned FormatVersion; 353 // Endianness of the input value profile data. 354 // It should be LE by default, but can be changed 355 // for testing purpose. 356 support::endianness ValueProfDataEndianness = support::little; 357 358 public: 359 InstrProfLookupTrait(IndexedInstrProf::HashT HashType, unsigned FormatVersion) 360 : HashType(HashType), FormatVersion(FormatVersion) {} 361 362 using data_type = ArrayRef<NamedInstrProfRecord>; 363 364 using internal_key_type = StringRef; 365 using external_key_type = StringRef; 366 using hash_value_type = uint64_t; 367 using offset_type = uint64_t; 368 369 static bool EqualKey(StringRef A, StringRef B) { return A == B; } 370 static StringRef GetInternalKey(StringRef K) { return K; } 371 static StringRef GetExternalKey(StringRef K) { return K; } 372 373 hash_value_type ComputeHash(StringRef K); 374 375 static std::pair<offset_type, offset_type> 376 ReadKeyDataLength(const unsigned char *&D) { 377 using namespace support; 378 379 offset_type KeyLen = endian::readNext<offset_type, little, unaligned>(D); 380 offset_type DataLen = endian::readNext<offset_type, little, unaligned>(D); 381 return std::make_pair(KeyLen, DataLen); 382 } 383 384 StringRef ReadKey(const unsigned char *D, offset_type N) { 385 return StringRef((const char *)D, N); 386 } 387 388 bool readValueProfilingData(const unsigned char *&D, 389 const unsigned char *const End); 390 data_type ReadData(StringRef K, const unsigned char *D, offset_type N); 391 392 // Used for testing purpose only. 393 void setValueProfDataEndianness(support::endianness Endianness) { 394 ValueProfDataEndianness = Endianness; 395 } 396 }; 397 398 struct InstrProfReaderIndexBase { 399 virtual ~InstrProfReaderIndexBase() = default; 400 401 // Read all the profile records with the same key pointed to the current 402 // iterator. 403 virtual Error getRecords(ArrayRef<NamedInstrProfRecord> &Data) = 0; 404 405 // Read all the profile records with the key equal to FuncName 406 virtual Error getRecords(StringRef FuncName, 407 ArrayRef<NamedInstrProfRecord> &Data) = 0; 408 virtual void advanceToNextKey() = 0; 409 virtual bool atEnd() const = 0; 410 virtual void setValueProfDataEndianness(support::endianness Endianness) = 0; 411 virtual uint64_t getVersion() const = 0; 412 virtual bool isIRLevelProfile() const = 0; 413 virtual bool hasCSIRLevelProfile() const = 0; 414 virtual bool instrEntryBBEnabled() const = 0; 415 virtual Error populateSymtab(InstrProfSymtab &) = 0; 416 }; 417 418 using OnDiskHashTableImplV3 = 419 OnDiskIterableChainedHashTable<InstrProfLookupTrait>; 420 421 template <typename HashTableImpl> 422 class InstrProfReaderItaniumRemapper; 423 424 template <typename HashTableImpl> 425 class InstrProfReaderIndex : public InstrProfReaderIndexBase { 426 private: 427 std::unique_ptr<HashTableImpl> HashTable; 428 typename HashTableImpl::data_iterator RecordIterator; 429 uint64_t FormatVersion; 430 431 friend class InstrProfReaderItaniumRemapper<HashTableImpl>; 432 433 public: 434 InstrProfReaderIndex(const unsigned char *Buckets, 435 const unsigned char *const Payload, 436 const unsigned char *const Base, 437 IndexedInstrProf::HashT HashType, uint64_t Version); 438 ~InstrProfReaderIndex() override = default; 439 440 Error getRecords(ArrayRef<NamedInstrProfRecord> &Data) override; 441 Error getRecords(StringRef FuncName, 442 ArrayRef<NamedInstrProfRecord> &Data) override; 443 void advanceToNextKey() override { RecordIterator++; } 444 445 bool atEnd() const override { 446 return RecordIterator == HashTable->data_end(); 447 } 448 449 void setValueProfDataEndianness(support::endianness Endianness) override { 450 HashTable->getInfoObj().setValueProfDataEndianness(Endianness); 451 } 452 453 uint64_t getVersion() const override { return GET_VERSION(FormatVersion); } 454 455 bool isIRLevelProfile() const override { 456 return (FormatVersion & VARIANT_MASK_IR_PROF) != 0; 457 } 458 459 bool hasCSIRLevelProfile() const override { 460 return (FormatVersion & VARIANT_MASK_CSIR_PROF) != 0; 461 } 462 463 bool instrEntryBBEnabled() const override { 464 return (FormatVersion & VARIANT_MASK_INSTR_ENTRY) != 0; 465 } 466 467 Error populateSymtab(InstrProfSymtab &Symtab) override { 468 return Symtab.create(HashTable->keys()); 469 } 470 }; 471 472 /// Name matcher supporting fuzzy matching of symbol names to names in profiles. 473 class InstrProfReaderRemapper { 474 public: 475 virtual ~InstrProfReaderRemapper() {} 476 virtual Error populateRemappings() { return Error::success(); } 477 virtual Error getRecords(StringRef FuncName, 478 ArrayRef<NamedInstrProfRecord> &Data) = 0; 479 }; 480 481 /// Reader for the indexed binary instrprof format. 482 class IndexedInstrProfReader : public InstrProfReader { 483 private: 484 /// The profile data file contents. 485 std::unique_ptr<MemoryBuffer> DataBuffer; 486 /// The profile remapping file contents. 487 std::unique_ptr<MemoryBuffer> RemappingBuffer; 488 /// The index into the profile data. 489 std::unique_ptr<InstrProfReaderIndexBase> Index; 490 /// The profile remapping file contents. 491 std::unique_ptr<InstrProfReaderRemapper> Remapper; 492 /// Profile summary data. 493 std::unique_ptr<ProfileSummary> Summary; 494 /// Context sensitive profile summary data. 495 std::unique_ptr<ProfileSummary> CS_Summary; 496 // Index to the current record in the record array. 497 unsigned RecordIndex; 498 499 // Read the profile summary. Return a pointer pointing to one byte past the 500 // end of the summary data if it exists or the input \c Cur. 501 // \c UseCS indicates whether to use the context-sensitive profile summary. 502 const unsigned char *readSummary(IndexedInstrProf::ProfVersion Version, 503 const unsigned char *Cur, bool UseCS); 504 505 public: 506 IndexedInstrProfReader( 507 std::unique_ptr<MemoryBuffer> DataBuffer, 508 std::unique_ptr<MemoryBuffer> RemappingBuffer = nullptr) 509 : DataBuffer(std::move(DataBuffer)), 510 RemappingBuffer(std::move(RemappingBuffer)), RecordIndex(0) {} 511 IndexedInstrProfReader(const IndexedInstrProfReader &) = delete; 512 IndexedInstrProfReader &operator=(const IndexedInstrProfReader &) = delete; 513 514 /// Return the profile version. 515 uint64_t getVersion() const { return Index->getVersion(); } 516 bool isIRLevelProfile() const override { return Index->isIRLevelProfile(); } 517 bool hasCSIRLevelProfile() const override { 518 return Index->hasCSIRLevelProfile(); 519 } 520 521 bool instrEntryBBEnabled() const override { 522 return Index->instrEntryBBEnabled(); 523 } 524 525 /// Return true if the given buffer is in an indexed instrprof format. 526 static bool hasFormat(const MemoryBuffer &DataBuffer); 527 528 /// Read the file header. 529 Error readHeader() override; 530 /// Read a single record. 531 Error readNextRecord(NamedInstrProfRecord &Record) override; 532 533 /// Return the NamedInstrProfRecord associated with FuncName and FuncHash 534 Expected<InstrProfRecord> getInstrProfRecord(StringRef FuncName, 535 uint64_t FuncHash); 536 537 /// Fill Counts with the profile data for the given function name. 538 Error getFunctionCounts(StringRef FuncName, uint64_t FuncHash, 539 std::vector<uint64_t> &Counts); 540 541 /// Return the maximum of all known function counts. 542 /// \c UseCS indicates whether to use the context-sensitive count. 543 uint64_t getMaximumFunctionCount(bool UseCS) { 544 if (UseCS) { 545 assert(CS_Summary && "No context sensitive profile summary"); 546 return CS_Summary->getMaxFunctionCount(); 547 } else { 548 assert(Summary && "No profile summary"); 549 return Summary->getMaxFunctionCount(); 550 } 551 } 552 553 /// Factory method to create an indexed reader. 554 static Expected<std::unique_ptr<IndexedInstrProfReader>> 555 create(const Twine &Path, const Twine &RemappingPath = ""); 556 557 static Expected<std::unique_ptr<IndexedInstrProfReader>> 558 create(std::unique_ptr<MemoryBuffer> Buffer, 559 std::unique_ptr<MemoryBuffer> RemappingBuffer = nullptr); 560 561 // Used for testing purpose only. 562 void setValueProfDataEndianness(support::endianness Endianness) { 563 Index->setValueProfDataEndianness(Endianness); 564 } 565 566 // See description in the base class. This interface is designed 567 // to be used by llvm-profdata (for dumping). Avoid using this when 568 // the client is the compiler. 569 InstrProfSymtab &getSymtab() override; 570 571 /// Return the profile summary. 572 /// \c UseCS indicates whether to use the context-sensitive summary. 573 ProfileSummary &getSummary(bool UseCS) { 574 if (UseCS) { 575 assert(CS_Summary && "No context sensitive summary"); 576 return *(CS_Summary.get()); 577 } else { 578 assert(Summary && "No profile summary"); 579 return *(Summary.get()); 580 } 581 } 582 }; 583 584 } // end namespace llvm 585 586 #endif // LLVM_PROFILEDATA_INSTRPROFREADER_H 587