1 //===- InstrProfReader.h - Instrumented profiling readers -------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains support for reading profiling data for instrumentation 10 // based PGO and coverage. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_PROFILEDATA_INSTRPROFREADER_H 15 #define LLVM_PROFILEDATA_INSTRPROFREADER_H 16 17 #include "llvm/ADT/ArrayRef.h" 18 #include "llvm/ADT/StringRef.h" 19 #include "llvm/IR/ProfileSummary.h" 20 #include "llvm/ProfileData/InstrProf.h" 21 #include "llvm/Support/Endian.h" 22 #include "llvm/Support/Error.h" 23 #include "llvm/Support/LineIterator.h" 24 #include "llvm/Support/MemoryBuffer.h" 25 #include "llvm/Support/OnDiskHashTable.h" 26 #include "llvm/Support/SwapByteOrder.h" 27 #include <algorithm> 28 #include <cassert> 29 #include <cstddef> 30 #include <cstdint> 31 #include <iterator> 32 #include <memory> 33 #include <utility> 34 #include <vector> 35 36 namespace llvm { 37 38 class InstrProfReader; 39 40 /// A file format agnostic iterator over profiling data. 41 class InstrProfIterator { 42 public: 43 using iterator_category = std::input_iterator_tag; 44 using value_type = NamedInstrProfRecord; 45 using difference_type = std::ptrdiff_t; 46 using pointer = value_type *; 47 using reference = value_type &; 48 49 private: 50 InstrProfReader *Reader = nullptr; 51 value_type Record; 52 53 void Increment(); 54 55 public: 56 InstrProfIterator() = default; InstrProfIterator(InstrProfReader * Reader)57 InstrProfIterator(InstrProfReader *Reader) : Reader(Reader) { Increment(); } 58 59 InstrProfIterator &operator++() { Increment(); return *this; } 60 bool operator==(const InstrProfIterator &RHS) const { 61 return Reader == RHS.Reader; 62 } 63 bool operator!=(const InstrProfIterator &RHS) const { 64 return Reader != RHS.Reader; 65 } 66 value_type &operator*() { return Record; } 67 value_type *operator->() { return &Record; } 68 }; 69 70 /// Base class and interface for reading profiling data of any known instrprof 71 /// format. Provides an iterator over NamedInstrProfRecords. 72 class InstrProfReader { 73 instrprof_error LastError = instrprof_error::success; 74 75 public: 76 InstrProfReader() = default; 77 virtual ~InstrProfReader() = default; 78 79 /// Read the header. Required before reading first record. 80 virtual Error readHeader() = 0; 81 82 /// Read a single record. 83 virtual Error readNextRecord(NamedInstrProfRecord &Record) = 0; 84 85 /// Iterator over profile data. begin()86 InstrProfIterator begin() { return InstrProfIterator(this); } end()87 InstrProfIterator end() { return InstrProfIterator(); } 88 89 virtual bool isIRLevelProfile() const = 0; 90 91 virtual bool hasCSIRLevelProfile() const = 0; 92 93 virtual bool instrEntryBBEnabled() const = 0; 94 95 /// Return the PGO symtab. There are three different readers: 96 /// Raw, Text, and Indexed profile readers. The first two types 97 /// of readers are used only by llvm-profdata tool, while the indexed 98 /// profile reader is also used by llvm-cov tool and the compiler ( 99 /// backend or frontend). Since creating PGO symtab can create 100 /// significant runtime and memory overhead (as it touches data 101 /// for the whole program), InstrProfSymtab for the indexed profile 102 /// reader should be created on demand and it is recommended to be 103 /// only used for dumping purpose with llvm-proftool, not with the 104 /// compiler. 105 virtual InstrProfSymtab &getSymtab() = 0; 106 107 /// Compute the sum of counts and return in Sum. 108 void accumulateCounts(CountSumOrPercent &Sum, bool IsCS); 109 110 protected: 111 std::unique_ptr<InstrProfSymtab> Symtab; 112 113 /// Set the current error and return same. error(instrprof_error Err)114 Error error(instrprof_error Err) { 115 LastError = Err; 116 if (Err == instrprof_error::success) 117 return Error::success(); 118 return make_error<InstrProfError>(Err); 119 } 120 error(Error && E)121 Error error(Error &&E) { return error(InstrProfError::take(std::move(E))); } 122 123 /// Clear the current error and return a successful one. success()124 Error success() { return error(instrprof_error::success); } 125 126 public: 127 /// Return true if the reader has finished reading the profile data. isEOF()128 bool isEOF() { return LastError == instrprof_error::eof; } 129 130 /// Return true if the reader encountered an error reading profiling data. hasError()131 bool hasError() { return LastError != instrprof_error::success && !isEOF(); } 132 133 /// Get the current error. getError()134 Error getError() { 135 if (hasError()) 136 return make_error<InstrProfError>(LastError); 137 return Error::success(); 138 } 139 140 /// Factory method to create an appropriately typed reader for the given 141 /// instrprof file. 142 static Expected<std::unique_ptr<InstrProfReader>> create(const Twine &Path); 143 144 static Expected<std::unique_ptr<InstrProfReader>> 145 create(std::unique_ptr<MemoryBuffer> Buffer); 146 }; 147 148 /// Reader for the simple text based instrprof format. 149 /// 150 /// This format is a simple text format that's suitable for test data. Records 151 /// are separated by one or more blank lines, and record fields are separated by 152 /// new lines. 153 /// 154 /// Each record consists of a function name, a function hash, a number of 155 /// counters, and then each counter value, in that order. 156 class TextInstrProfReader : public InstrProfReader { 157 private: 158 /// The profile data file contents. 159 std::unique_ptr<MemoryBuffer> DataBuffer; 160 /// Iterator over the profile data. 161 line_iterator Line; 162 bool IsIRLevelProfile = false; 163 bool HasCSIRLevelProfile = false; 164 bool InstrEntryBBEnabled = false; 165 166 Error readValueProfileData(InstrProfRecord &Record); 167 168 public: TextInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer_)169 TextInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer_) 170 : DataBuffer(std::move(DataBuffer_)), Line(*DataBuffer, true, '#') {} 171 TextInstrProfReader(const TextInstrProfReader &) = delete; 172 TextInstrProfReader &operator=(const TextInstrProfReader &) = delete; 173 174 /// Return true if the given buffer is in text instrprof format. 175 static bool hasFormat(const MemoryBuffer &Buffer); 176 isIRLevelProfile()177 bool isIRLevelProfile() const override { return IsIRLevelProfile; } 178 hasCSIRLevelProfile()179 bool hasCSIRLevelProfile() const override { return HasCSIRLevelProfile; } 180 instrEntryBBEnabled()181 bool instrEntryBBEnabled() const override { return InstrEntryBBEnabled; } 182 183 /// Read the header. 184 Error readHeader() override; 185 186 /// Read a single record. 187 Error readNextRecord(NamedInstrProfRecord &Record) override; 188 getSymtab()189 InstrProfSymtab &getSymtab() override { 190 assert(Symtab.get()); 191 return *Symtab.get(); 192 } 193 }; 194 195 /// Reader for the raw instrprof binary format from runtime. 196 /// 197 /// This format is a raw memory dump of the instrumentation-baed profiling data 198 /// from the runtime. It has no index. 199 /// 200 /// Templated on the unsigned type whose size matches pointers on the platform 201 /// that wrote the profile. 202 template <class IntPtrT> 203 class RawInstrProfReader : public InstrProfReader { 204 private: 205 /// The profile data file contents. 206 std::unique_ptr<MemoryBuffer> DataBuffer; 207 bool ShouldSwapBytes; 208 // The value of the version field of the raw profile data header. The lower 56 209 // bits specifies the format version and the most significant 8 bits specify 210 // the variant types of the profile. 211 uint64_t Version; 212 uint64_t CountersDelta; 213 uint64_t NamesDelta; 214 const RawInstrProf::ProfileData<IntPtrT> *Data; 215 const RawInstrProf::ProfileData<IntPtrT> *DataEnd; 216 const uint64_t *CountersStart; 217 const char *NamesStart; 218 uint64_t NamesSize; 219 // After value profile is all read, this pointer points to 220 // the header of next profile data (if exists) 221 const uint8_t *ValueDataStart; 222 uint32_t ValueKindLast; 223 uint32_t CurValueDataSize; 224 225 public: RawInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer)226 RawInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer) 227 : DataBuffer(std::move(DataBuffer)) {} 228 RawInstrProfReader(const RawInstrProfReader &) = delete; 229 RawInstrProfReader &operator=(const RawInstrProfReader &) = delete; 230 231 static bool hasFormat(const MemoryBuffer &DataBuffer); 232 Error readHeader() override; 233 Error readNextRecord(NamedInstrProfRecord &Record) override; 234 isIRLevelProfile()235 bool isIRLevelProfile() const override { 236 return (Version & VARIANT_MASK_IR_PROF) != 0; 237 } 238 hasCSIRLevelProfile()239 bool hasCSIRLevelProfile() const override { 240 return (Version & VARIANT_MASK_CSIR_PROF) != 0; 241 } 242 instrEntryBBEnabled()243 bool instrEntryBBEnabled() const override { 244 return (Version & VARIANT_MASK_INSTR_ENTRY) != 0; 245 } 246 getSymtab()247 InstrProfSymtab &getSymtab() override { 248 assert(Symtab.get()); 249 return *Symtab.get(); 250 } 251 252 private: 253 Error createSymtab(InstrProfSymtab &Symtab); 254 Error readNextHeader(const char *CurrentPos); 255 Error readHeader(const RawInstrProf::Header &Header); 256 swap(IntT Int)257 template <class IntT> IntT swap(IntT Int) const { 258 return ShouldSwapBytes ? sys::getSwappedBytes(Int) : Int; 259 } 260 getDataEndianness()261 support::endianness getDataEndianness() const { 262 support::endianness HostEndian = getHostEndianness(); 263 if (!ShouldSwapBytes) 264 return HostEndian; 265 if (HostEndian == support::little) 266 return support::big; 267 else 268 return support::little; 269 } 270 getNumPaddingBytes(uint64_t SizeInBytes)271 inline uint8_t getNumPaddingBytes(uint64_t SizeInBytes) { 272 return 7 & (sizeof(uint64_t) - SizeInBytes % sizeof(uint64_t)); 273 } 274 275 Error readName(NamedInstrProfRecord &Record); 276 Error readFuncHash(NamedInstrProfRecord &Record); 277 Error readRawCounts(InstrProfRecord &Record); 278 Error readValueProfilingData(InstrProfRecord &Record); atEnd()279 bool atEnd() const { return Data == DataEnd; } 280 advanceData()281 void advanceData() { 282 Data++; 283 ValueDataStart += CurValueDataSize; 284 } 285 getNextHeaderPos()286 const char *getNextHeaderPos() const { 287 assert(atEnd()); 288 return (const char *)ValueDataStart; 289 } 290 291 /// Get the offset of \p CounterPtr from the start of the counters section of 292 /// the profile. The offset has units of "number of counters", i.e. increasing 293 /// the offset by 1 corresponds to an increase in the *byte offset* by 8. getCounterOffset(IntPtrT CounterPtr)294 ptrdiff_t getCounterOffset(IntPtrT CounterPtr) const { 295 return (swap(CounterPtr) - CountersDelta) / sizeof(uint64_t); 296 } 297 getCounter(ptrdiff_t Offset)298 const uint64_t *getCounter(ptrdiff_t Offset) const { 299 return CountersStart + Offset; 300 } 301 getName(uint64_t NameRef)302 StringRef getName(uint64_t NameRef) const { 303 return Symtab->getFuncName(swap(NameRef)); 304 } 305 }; 306 307 using RawInstrProfReader32 = RawInstrProfReader<uint32_t>; 308 using RawInstrProfReader64 = RawInstrProfReader<uint64_t>; 309 310 namespace IndexedInstrProf { 311 312 enum class HashT : uint32_t; 313 314 } // end namespace IndexedInstrProf 315 316 /// Trait for lookups into the on-disk hash table for the binary instrprof 317 /// format. 318 class InstrProfLookupTrait { 319 std::vector<NamedInstrProfRecord> DataBuffer; 320 IndexedInstrProf::HashT HashType; 321 unsigned FormatVersion; 322 // Endianness of the input value profile data. 323 // It should be LE by default, but can be changed 324 // for testing purpose. 325 support::endianness ValueProfDataEndianness = support::little; 326 327 public: InstrProfLookupTrait(IndexedInstrProf::HashT HashType,unsigned FormatVersion)328 InstrProfLookupTrait(IndexedInstrProf::HashT HashType, unsigned FormatVersion) 329 : HashType(HashType), FormatVersion(FormatVersion) {} 330 331 using data_type = ArrayRef<NamedInstrProfRecord>; 332 333 using internal_key_type = StringRef; 334 using external_key_type = StringRef; 335 using hash_value_type = uint64_t; 336 using offset_type = uint64_t; 337 EqualKey(StringRef A,StringRef B)338 static bool EqualKey(StringRef A, StringRef B) { return A == B; } GetInternalKey(StringRef K)339 static StringRef GetInternalKey(StringRef K) { return K; } GetExternalKey(StringRef K)340 static StringRef GetExternalKey(StringRef K) { return K; } 341 342 hash_value_type ComputeHash(StringRef K); 343 344 static std::pair<offset_type, offset_type> ReadKeyDataLength(const unsigned char * & D)345 ReadKeyDataLength(const unsigned char *&D) { 346 using namespace support; 347 348 offset_type KeyLen = endian::readNext<offset_type, little, unaligned>(D); 349 offset_type DataLen = endian::readNext<offset_type, little, unaligned>(D); 350 return std::make_pair(KeyLen, DataLen); 351 } 352 ReadKey(const unsigned char * D,offset_type N)353 StringRef ReadKey(const unsigned char *D, offset_type N) { 354 return StringRef((const char *)D, N); 355 } 356 357 bool readValueProfilingData(const unsigned char *&D, 358 const unsigned char *const End); 359 data_type ReadData(StringRef K, const unsigned char *D, offset_type N); 360 361 // Used for testing purpose only. setValueProfDataEndianness(support::endianness Endianness)362 void setValueProfDataEndianness(support::endianness Endianness) { 363 ValueProfDataEndianness = Endianness; 364 } 365 }; 366 367 struct InstrProfReaderIndexBase { 368 virtual ~InstrProfReaderIndexBase() = default; 369 370 // Read all the profile records with the same key pointed to the current 371 // iterator. 372 virtual Error getRecords(ArrayRef<NamedInstrProfRecord> &Data) = 0; 373 374 // Read all the profile records with the key equal to FuncName 375 virtual Error getRecords(StringRef FuncName, 376 ArrayRef<NamedInstrProfRecord> &Data) = 0; 377 virtual void advanceToNextKey() = 0; 378 virtual bool atEnd() const = 0; 379 virtual void setValueProfDataEndianness(support::endianness Endianness) = 0; 380 virtual uint64_t getVersion() const = 0; 381 virtual bool isIRLevelProfile() const = 0; 382 virtual bool hasCSIRLevelProfile() const = 0; 383 virtual bool instrEntryBBEnabled() const = 0; 384 virtual Error populateSymtab(InstrProfSymtab &) = 0; 385 }; 386 387 using OnDiskHashTableImplV3 = 388 OnDiskIterableChainedHashTable<InstrProfLookupTrait>; 389 390 template <typename HashTableImpl> 391 class InstrProfReaderItaniumRemapper; 392 393 template <typename HashTableImpl> 394 class InstrProfReaderIndex : public InstrProfReaderIndexBase { 395 private: 396 std::unique_ptr<HashTableImpl> HashTable; 397 typename HashTableImpl::data_iterator RecordIterator; 398 uint64_t FormatVersion; 399 400 friend class InstrProfReaderItaniumRemapper<HashTableImpl>; 401 402 public: 403 InstrProfReaderIndex(const unsigned char *Buckets, 404 const unsigned char *const Payload, 405 const unsigned char *const Base, 406 IndexedInstrProf::HashT HashType, uint64_t Version); 407 ~InstrProfReaderIndex() override = default; 408 409 Error getRecords(ArrayRef<NamedInstrProfRecord> &Data) override; 410 Error getRecords(StringRef FuncName, 411 ArrayRef<NamedInstrProfRecord> &Data) override; advanceToNextKey()412 void advanceToNextKey() override { RecordIterator++; } 413 atEnd()414 bool atEnd() const override { 415 return RecordIterator == HashTable->data_end(); 416 } 417 setValueProfDataEndianness(support::endianness Endianness)418 void setValueProfDataEndianness(support::endianness Endianness) override { 419 HashTable->getInfoObj().setValueProfDataEndianness(Endianness); 420 } 421 getVersion()422 uint64_t getVersion() const override { return GET_VERSION(FormatVersion); } 423 isIRLevelProfile()424 bool isIRLevelProfile() const override { 425 return (FormatVersion & VARIANT_MASK_IR_PROF) != 0; 426 } 427 hasCSIRLevelProfile()428 bool hasCSIRLevelProfile() const override { 429 return (FormatVersion & VARIANT_MASK_CSIR_PROF) != 0; 430 } 431 instrEntryBBEnabled()432 bool instrEntryBBEnabled() const override { 433 return (FormatVersion & VARIANT_MASK_INSTR_ENTRY) != 0; 434 } 435 populateSymtab(InstrProfSymtab & Symtab)436 Error populateSymtab(InstrProfSymtab &Symtab) override { 437 return Symtab.create(HashTable->keys()); 438 } 439 }; 440 441 /// Name matcher supporting fuzzy matching of symbol names to names in profiles. 442 class InstrProfReaderRemapper { 443 public: ~InstrProfReaderRemapper()444 virtual ~InstrProfReaderRemapper() {} populateRemappings()445 virtual Error populateRemappings() { return Error::success(); } 446 virtual Error getRecords(StringRef FuncName, 447 ArrayRef<NamedInstrProfRecord> &Data) = 0; 448 }; 449 450 /// Reader for the indexed binary instrprof format. 451 class IndexedInstrProfReader : public InstrProfReader { 452 private: 453 /// The profile data file contents. 454 std::unique_ptr<MemoryBuffer> DataBuffer; 455 /// The profile remapping file contents. 456 std::unique_ptr<MemoryBuffer> RemappingBuffer; 457 /// The index into the profile data. 458 std::unique_ptr<InstrProfReaderIndexBase> Index; 459 /// The profile remapping file contents. 460 std::unique_ptr<InstrProfReaderRemapper> Remapper; 461 /// Profile summary data. 462 std::unique_ptr<ProfileSummary> Summary; 463 /// Context sensitive profile summary data. 464 std::unique_ptr<ProfileSummary> CS_Summary; 465 // Index to the current record in the record array. 466 unsigned RecordIndex; 467 468 // Read the profile summary. Return a pointer pointing to one byte past the 469 // end of the summary data if it exists or the input \c Cur. 470 // \c UseCS indicates whether to use the context-sensitive profile summary. 471 const unsigned char *readSummary(IndexedInstrProf::ProfVersion Version, 472 const unsigned char *Cur, bool UseCS); 473 474 public: 475 IndexedInstrProfReader( 476 std::unique_ptr<MemoryBuffer> DataBuffer, 477 std::unique_ptr<MemoryBuffer> RemappingBuffer = nullptr) DataBuffer(std::move (DataBuffer))478 : DataBuffer(std::move(DataBuffer)), 479 RemappingBuffer(std::move(RemappingBuffer)), RecordIndex(0) {} 480 IndexedInstrProfReader(const IndexedInstrProfReader &) = delete; 481 IndexedInstrProfReader &operator=(const IndexedInstrProfReader &) = delete; 482 483 /// Return the profile version. getVersion()484 uint64_t getVersion() const { return Index->getVersion(); } isIRLevelProfile()485 bool isIRLevelProfile() const override { return Index->isIRLevelProfile(); } hasCSIRLevelProfile()486 bool hasCSIRLevelProfile() const override { 487 return Index->hasCSIRLevelProfile(); 488 } 489 instrEntryBBEnabled()490 bool instrEntryBBEnabled() const override { 491 return Index->instrEntryBBEnabled(); 492 } 493 494 /// Return true if the given buffer is in an indexed instrprof format. 495 static bool hasFormat(const MemoryBuffer &DataBuffer); 496 497 /// Read the file header. 498 Error readHeader() override; 499 /// Read a single record. 500 Error readNextRecord(NamedInstrProfRecord &Record) override; 501 502 /// Return the NamedInstrProfRecord associated with FuncName and FuncHash 503 Expected<InstrProfRecord> getInstrProfRecord(StringRef FuncName, 504 uint64_t FuncHash); 505 506 /// Fill Counts with the profile data for the given function name. 507 Error getFunctionCounts(StringRef FuncName, uint64_t FuncHash, 508 std::vector<uint64_t> &Counts); 509 510 /// Return the maximum of all known function counts. 511 /// \c UseCS indicates whether to use the context-sensitive count. getMaximumFunctionCount(bool UseCS)512 uint64_t getMaximumFunctionCount(bool UseCS) { 513 if (UseCS) { 514 assert(CS_Summary && "No context sensitive profile summary"); 515 return CS_Summary->getMaxFunctionCount(); 516 } else { 517 assert(Summary && "No profile summary"); 518 return Summary->getMaxFunctionCount(); 519 } 520 } 521 522 /// Factory method to create an indexed reader. 523 static Expected<std::unique_ptr<IndexedInstrProfReader>> 524 create(const Twine &Path, const Twine &RemappingPath = ""); 525 526 static Expected<std::unique_ptr<IndexedInstrProfReader>> 527 create(std::unique_ptr<MemoryBuffer> Buffer, 528 std::unique_ptr<MemoryBuffer> RemappingBuffer = nullptr); 529 530 // Used for testing purpose only. setValueProfDataEndianness(support::endianness Endianness)531 void setValueProfDataEndianness(support::endianness Endianness) { 532 Index->setValueProfDataEndianness(Endianness); 533 } 534 535 // See description in the base class. This interface is designed 536 // to be used by llvm-profdata (for dumping). Avoid using this when 537 // the client is the compiler. 538 InstrProfSymtab &getSymtab() override; 539 540 /// Return the profile summary. 541 /// \c UseCS indicates whether to use the context-sensitive summary. getSummary(bool UseCS)542 ProfileSummary &getSummary(bool UseCS) { 543 if (UseCS) { 544 assert(CS_Summary && "No context sensitive summary"); 545 return *(CS_Summary.get()); 546 } else { 547 assert(Summary && "No profile summary"); 548 return *(Summary.get()); 549 } 550 } 551 }; 552 553 } // end namespace llvm 554 555 #endif // LLVM_PROFILEDATA_INSTRPROFREADER_H 556