1 //===- InstrProfReader.h - Instrumented profiling readers -------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains support for reading profiling data for instrumentation 10 // based PGO and coverage. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_PROFILEDATA_INSTRPROFREADER_H 15 #define LLVM_PROFILEDATA_INSTRPROFREADER_H 16 17 #include "llvm/ADT/ArrayRef.h" 18 #include "llvm/ADT/StringRef.h" 19 #include "llvm/IR/ProfileSummary.h" 20 #include "llvm/ProfileData/InstrProf.h" 21 #include "llvm/Support/Endian.h" 22 #include "llvm/Support/Error.h" 23 #include "llvm/Support/LineIterator.h" 24 #include "llvm/Support/MemoryBuffer.h" 25 #include "llvm/Support/OnDiskHashTable.h" 26 #include "llvm/Support/SwapByteOrder.h" 27 #include <algorithm> 28 #include <cassert> 29 #include <cstddef> 30 #include <cstdint> 31 #include <iterator> 32 #include <memory> 33 #include <utility> 34 #include <vector> 35 36 namespace llvm { 37 38 class InstrProfReader; 39 40 /// A file format agnostic iterator over profiling data. 41 class InstrProfIterator { 42 public: 43 using iterator_category = std::input_iterator_tag; 44 using value_type = NamedInstrProfRecord; 45 using difference_type = std::ptrdiff_t; 46 using pointer = value_type *; 47 using reference = value_type &; 48 49 private: 50 InstrProfReader *Reader = nullptr; 51 value_type Record; 52 53 void Increment(); 54 55 public: 56 InstrProfIterator() = default; 57 InstrProfIterator(InstrProfReader *Reader) : Reader(Reader) { Increment(); } 58 59 InstrProfIterator &operator++() { Increment(); return *this; } 60 bool operator==(const InstrProfIterator &RHS) const { 61 return Reader == RHS.Reader; 62 } 63 bool operator!=(const InstrProfIterator &RHS) const { 64 return Reader != RHS.Reader; 65 } 66 value_type &operator*() { return Record; } 67 value_type *operator->() { return &Record; } 68 }; 69 70 /// Base class and interface for reading profiling data of any known instrprof 71 /// format. Provides an iterator over NamedInstrProfRecords. 72 class InstrProfReader { 73 instrprof_error LastError = instrprof_error::success; 74 std::string LastErrorMsg; 75 76 public: 77 InstrProfReader() = default; 78 virtual ~InstrProfReader() = default; 79 80 /// Read the header. Required before reading first record. 81 virtual Error readHeader() = 0; 82 83 /// Read a single record. 84 virtual Error readNextRecord(NamedInstrProfRecord &Record) = 0; 85 86 /// Print binary ids on stream OS. 87 virtual Error printBinaryIds(raw_ostream &OS) { return success(); }; 88 89 /// Iterator over profile data. 90 InstrProfIterator begin() { return InstrProfIterator(this); } 91 InstrProfIterator end() { return InstrProfIterator(); } 92 93 virtual bool isIRLevelProfile() const = 0; 94 95 virtual bool hasCSIRLevelProfile() const = 0; 96 97 virtual bool instrEntryBBEnabled() const = 0; 98 99 /// Return the PGO symtab. There are three different readers: 100 /// Raw, Text, and Indexed profile readers. The first two types 101 /// of readers are used only by llvm-profdata tool, while the indexed 102 /// profile reader is also used by llvm-cov tool and the compiler ( 103 /// backend or frontend). Since creating PGO symtab can create 104 /// significant runtime and memory overhead (as it touches data 105 /// for the whole program), InstrProfSymtab for the indexed profile 106 /// reader should be created on demand and it is recommended to be 107 /// only used for dumping purpose with llvm-proftool, not with the 108 /// compiler. 109 virtual InstrProfSymtab &getSymtab() = 0; 110 111 /// Compute the sum of counts and return in Sum. 112 void accumulateCounts(CountSumOrPercent &Sum, bool IsCS); 113 114 protected: 115 std::unique_ptr<InstrProfSymtab> Symtab; 116 117 /// Set the current error and return same. 118 Error error(instrprof_error Err, const std::string &ErrMsg = "") { 119 LastError = Err; 120 LastErrorMsg = ErrMsg; 121 if (Err == instrprof_error::success) 122 return Error::success(); 123 return make_error<InstrProfError>(Err, ErrMsg); 124 } 125 126 Error error(Error &&E) { 127 handleAllErrors(std::move(E), [&](const InstrProfError &IPE) { 128 LastError = IPE.get(); 129 LastErrorMsg = IPE.getMessage(); 130 }); 131 return make_error<InstrProfError>(LastError, LastErrorMsg); 132 } 133 134 /// Clear the current error and return a successful one. 135 Error success() { return error(instrprof_error::success); } 136 137 public: 138 /// Return true if the reader has finished reading the profile data. 139 bool isEOF() { return LastError == instrprof_error::eof; } 140 141 /// Return true if the reader encountered an error reading profiling data. 142 bool hasError() { return LastError != instrprof_error::success && !isEOF(); } 143 144 /// Get the current error. 145 Error getError() { 146 if (hasError()) 147 return make_error<InstrProfError>(LastError, LastErrorMsg); 148 return Error::success(); 149 } 150 151 /// Factory method to create an appropriately typed reader for the given 152 /// instrprof file. 153 static Expected<std::unique_ptr<InstrProfReader>> create(const Twine &Path); 154 155 static Expected<std::unique_ptr<InstrProfReader>> 156 create(std::unique_ptr<MemoryBuffer> Buffer); 157 }; 158 159 /// Reader for the simple text based instrprof format. 160 /// 161 /// This format is a simple text format that's suitable for test data. Records 162 /// are separated by one or more blank lines, and record fields are separated by 163 /// new lines. 164 /// 165 /// Each record consists of a function name, a function hash, a number of 166 /// counters, and then each counter value, in that order. 167 class TextInstrProfReader : public InstrProfReader { 168 private: 169 /// The profile data file contents. 170 std::unique_ptr<MemoryBuffer> DataBuffer; 171 /// Iterator over the profile data. 172 line_iterator Line; 173 bool IsIRLevelProfile = false; 174 bool HasCSIRLevelProfile = false; 175 bool InstrEntryBBEnabled = false; 176 177 Error readValueProfileData(InstrProfRecord &Record); 178 179 public: 180 TextInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer_) 181 : DataBuffer(std::move(DataBuffer_)), Line(*DataBuffer, true, '#') {} 182 TextInstrProfReader(const TextInstrProfReader &) = delete; 183 TextInstrProfReader &operator=(const TextInstrProfReader &) = delete; 184 185 /// Return true if the given buffer is in text instrprof format. 186 static bool hasFormat(const MemoryBuffer &Buffer); 187 188 bool isIRLevelProfile() const override { return IsIRLevelProfile; } 189 190 bool hasCSIRLevelProfile() const override { return HasCSIRLevelProfile; } 191 192 bool instrEntryBBEnabled() const override { return InstrEntryBBEnabled; } 193 194 /// Read the header. 195 Error readHeader() override; 196 197 /// Read a single record. 198 Error readNextRecord(NamedInstrProfRecord &Record) override; 199 200 InstrProfSymtab &getSymtab() override { 201 assert(Symtab.get()); 202 return *Symtab.get(); 203 } 204 }; 205 206 /// Reader for the raw instrprof binary format from runtime. 207 /// 208 /// This format is a raw memory dump of the instrumentation-based profiling data 209 /// from the runtime. It has no index. 210 /// 211 /// Templated on the unsigned type whose size matches pointers on the platform 212 /// that wrote the profile. 213 template <class IntPtrT> 214 class RawInstrProfReader : public InstrProfReader { 215 private: 216 /// The profile data file contents. 217 std::unique_ptr<MemoryBuffer> DataBuffer; 218 bool ShouldSwapBytes; 219 // The value of the version field of the raw profile data header. The lower 56 220 // bits specifies the format version and the most significant 8 bits specify 221 // the variant types of the profile. 222 uint64_t Version; 223 uint64_t CountersDelta; 224 uint64_t NamesDelta; 225 const RawInstrProf::ProfileData<IntPtrT> *Data; 226 const RawInstrProf::ProfileData<IntPtrT> *DataEnd; 227 const uint64_t *CountersStart; 228 const char *NamesStart; 229 uint64_t NamesSize; 230 // After value profile is all read, this pointer points to 231 // the header of next profile data (if exists) 232 const uint8_t *ValueDataStart; 233 uint32_t ValueKindLast; 234 uint32_t CurValueDataSize; 235 236 uint64_t BinaryIdsSize; 237 const uint8_t *BinaryIdsStart; 238 239 public: 240 RawInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer) 241 : DataBuffer(std::move(DataBuffer)) {} 242 RawInstrProfReader(const RawInstrProfReader &) = delete; 243 RawInstrProfReader &operator=(const RawInstrProfReader &) = delete; 244 245 static bool hasFormat(const MemoryBuffer &DataBuffer); 246 Error readHeader() override; 247 Error readNextRecord(NamedInstrProfRecord &Record) override; 248 Error printBinaryIds(raw_ostream &OS) override; 249 250 bool isIRLevelProfile() const override { 251 return (Version & VARIANT_MASK_IR_PROF) != 0; 252 } 253 254 bool hasCSIRLevelProfile() const override { 255 return (Version & VARIANT_MASK_CSIR_PROF) != 0; 256 } 257 258 bool instrEntryBBEnabled() const override { 259 return (Version & VARIANT_MASK_INSTR_ENTRY) != 0; 260 } 261 262 InstrProfSymtab &getSymtab() override { 263 assert(Symtab.get()); 264 return *Symtab.get(); 265 } 266 267 private: 268 Error createSymtab(InstrProfSymtab &Symtab); 269 Error readNextHeader(const char *CurrentPos); 270 Error readHeader(const RawInstrProf::Header &Header); 271 272 template <class IntT> IntT swap(IntT Int) const { 273 return ShouldSwapBytes ? sys::getSwappedBytes(Int) : Int; 274 } 275 276 support::endianness getDataEndianness() const { 277 support::endianness HostEndian = getHostEndianness(); 278 if (!ShouldSwapBytes) 279 return HostEndian; 280 if (HostEndian == support::little) 281 return support::big; 282 else 283 return support::little; 284 } 285 286 inline uint8_t getNumPaddingBytes(uint64_t SizeInBytes) { 287 return 7 & (sizeof(uint64_t) - SizeInBytes % sizeof(uint64_t)); 288 } 289 290 Error readName(NamedInstrProfRecord &Record); 291 Error readFuncHash(NamedInstrProfRecord &Record); 292 Error readRawCounts(InstrProfRecord &Record); 293 Error readValueProfilingData(InstrProfRecord &Record); 294 bool atEnd() const { return Data == DataEnd; } 295 296 void advanceData() { 297 Data++; 298 ValueDataStart += CurValueDataSize; 299 } 300 301 const char *getNextHeaderPos() const { 302 assert(atEnd()); 303 return (const char *)ValueDataStart; 304 } 305 306 /// Get the offset of \p CounterPtr from the start of the counters section of 307 /// the profile. The offset has units of "number of counters", i.e. increasing 308 /// the offset by 1 corresponds to an increase in the *byte offset* by 8. 309 ptrdiff_t getCounterOffset(IntPtrT CounterPtr) const { 310 return (swap(CounterPtr) - CountersDelta) / sizeof(uint64_t); 311 } 312 313 const uint64_t *getCounter(ptrdiff_t Offset) const { 314 return CountersStart + Offset; 315 } 316 317 StringRef getName(uint64_t NameRef) const { 318 return Symtab->getFuncName(swap(NameRef)); 319 } 320 }; 321 322 using RawInstrProfReader32 = RawInstrProfReader<uint32_t>; 323 using RawInstrProfReader64 = RawInstrProfReader<uint64_t>; 324 325 namespace IndexedInstrProf { 326 327 enum class HashT : uint32_t; 328 329 } // end namespace IndexedInstrProf 330 331 /// Trait for lookups into the on-disk hash table for the binary instrprof 332 /// format. 333 class InstrProfLookupTrait { 334 std::vector<NamedInstrProfRecord> DataBuffer; 335 IndexedInstrProf::HashT HashType; 336 unsigned FormatVersion; 337 // Endianness of the input value profile data. 338 // It should be LE by default, but can be changed 339 // for testing purpose. 340 support::endianness ValueProfDataEndianness = support::little; 341 342 public: 343 InstrProfLookupTrait(IndexedInstrProf::HashT HashType, unsigned FormatVersion) 344 : HashType(HashType), FormatVersion(FormatVersion) {} 345 346 using data_type = ArrayRef<NamedInstrProfRecord>; 347 348 using internal_key_type = StringRef; 349 using external_key_type = StringRef; 350 using hash_value_type = uint64_t; 351 using offset_type = uint64_t; 352 353 static bool EqualKey(StringRef A, StringRef B) { return A == B; } 354 static StringRef GetInternalKey(StringRef K) { return K; } 355 static StringRef GetExternalKey(StringRef K) { return K; } 356 357 hash_value_type ComputeHash(StringRef K); 358 359 static std::pair<offset_type, offset_type> 360 ReadKeyDataLength(const unsigned char *&D) { 361 using namespace support; 362 363 offset_type KeyLen = endian::readNext<offset_type, little, unaligned>(D); 364 offset_type DataLen = endian::readNext<offset_type, little, unaligned>(D); 365 return std::make_pair(KeyLen, DataLen); 366 } 367 368 StringRef ReadKey(const unsigned char *D, offset_type N) { 369 return StringRef((const char *)D, N); 370 } 371 372 bool readValueProfilingData(const unsigned char *&D, 373 const unsigned char *const End); 374 data_type ReadData(StringRef K, const unsigned char *D, offset_type N); 375 376 // Used for testing purpose only. 377 void setValueProfDataEndianness(support::endianness Endianness) { 378 ValueProfDataEndianness = Endianness; 379 } 380 }; 381 382 struct InstrProfReaderIndexBase { 383 virtual ~InstrProfReaderIndexBase() = default; 384 385 // Read all the profile records with the same key pointed to the current 386 // iterator. 387 virtual Error getRecords(ArrayRef<NamedInstrProfRecord> &Data) = 0; 388 389 // Read all the profile records with the key equal to FuncName 390 virtual Error getRecords(StringRef FuncName, 391 ArrayRef<NamedInstrProfRecord> &Data) = 0; 392 virtual void advanceToNextKey() = 0; 393 virtual bool atEnd() const = 0; 394 virtual void setValueProfDataEndianness(support::endianness Endianness) = 0; 395 virtual uint64_t getVersion() const = 0; 396 virtual bool isIRLevelProfile() const = 0; 397 virtual bool hasCSIRLevelProfile() const = 0; 398 virtual bool instrEntryBBEnabled() const = 0; 399 virtual Error populateSymtab(InstrProfSymtab &) = 0; 400 }; 401 402 using OnDiskHashTableImplV3 = 403 OnDiskIterableChainedHashTable<InstrProfLookupTrait>; 404 405 template <typename HashTableImpl> 406 class InstrProfReaderItaniumRemapper; 407 408 template <typename HashTableImpl> 409 class InstrProfReaderIndex : public InstrProfReaderIndexBase { 410 private: 411 std::unique_ptr<HashTableImpl> HashTable; 412 typename HashTableImpl::data_iterator RecordIterator; 413 uint64_t FormatVersion; 414 415 friend class InstrProfReaderItaniumRemapper<HashTableImpl>; 416 417 public: 418 InstrProfReaderIndex(const unsigned char *Buckets, 419 const unsigned char *const Payload, 420 const unsigned char *const Base, 421 IndexedInstrProf::HashT HashType, uint64_t Version); 422 ~InstrProfReaderIndex() override = default; 423 424 Error getRecords(ArrayRef<NamedInstrProfRecord> &Data) override; 425 Error getRecords(StringRef FuncName, 426 ArrayRef<NamedInstrProfRecord> &Data) override; 427 void advanceToNextKey() override { RecordIterator++; } 428 429 bool atEnd() const override { 430 return RecordIterator == HashTable->data_end(); 431 } 432 433 void setValueProfDataEndianness(support::endianness Endianness) override { 434 HashTable->getInfoObj().setValueProfDataEndianness(Endianness); 435 } 436 437 uint64_t getVersion() const override { return GET_VERSION(FormatVersion); } 438 439 bool isIRLevelProfile() const override { 440 return (FormatVersion & VARIANT_MASK_IR_PROF) != 0; 441 } 442 443 bool hasCSIRLevelProfile() const override { 444 return (FormatVersion & VARIANT_MASK_CSIR_PROF) != 0; 445 } 446 447 bool instrEntryBBEnabled() const override { 448 return (FormatVersion & VARIANT_MASK_INSTR_ENTRY) != 0; 449 } 450 451 Error populateSymtab(InstrProfSymtab &Symtab) override { 452 return Symtab.create(HashTable->keys()); 453 } 454 }; 455 456 /// Name matcher supporting fuzzy matching of symbol names to names in profiles. 457 class InstrProfReaderRemapper { 458 public: 459 virtual ~InstrProfReaderRemapper() {} 460 virtual Error populateRemappings() { return Error::success(); } 461 virtual Error getRecords(StringRef FuncName, 462 ArrayRef<NamedInstrProfRecord> &Data) = 0; 463 }; 464 465 /// Reader for the indexed binary instrprof format. 466 class IndexedInstrProfReader : public InstrProfReader { 467 private: 468 /// The profile data file contents. 469 std::unique_ptr<MemoryBuffer> DataBuffer; 470 /// The profile remapping file contents. 471 std::unique_ptr<MemoryBuffer> RemappingBuffer; 472 /// The index into the profile data. 473 std::unique_ptr<InstrProfReaderIndexBase> Index; 474 /// The profile remapping file contents. 475 std::unique_ptr<InstrProfReaderRemapper> Remapper; 476 /// Profile summary data. 477 std::unique_ptr<ProfileSummary> Summary; 478 /// Context sensitive profile summary data. 479 std::unique_ptr<ProfileSummary> CS_Summary; 480 // Index to the current record in the record array. 481 unsigned RecordIndex; 482 483 // Read the profile summary. Return a pointer pointing to one byte past the 484 // end of the summary data if it exists or the input \c Cur. 485 // \c UseCS indicates whether to use the context-sensitive profile summary. 486 const unsigned char *readSummary(IndexedInstrProf::ProfVersion Version, 487 const unsigned char *Cur, bool UseCS); 488 489 public: 490 IndexedInstrProfReader( 491 std::unique_ptr<MemoryBuffer> DataBuffer, 492 std::unique_ptr<MemoryBuffer> RemappingBuffer = nullptr) 493 : DataBuffer(std::move(DataBuffer)), 494 RemappingBuffer(std::move(RemappingBuffer)), RecordIndex(0) {} 495 IndexedInstrProfReader(const IndexedInstrProfReader &) = delete; 496 IndexedInstrProfReader &operator=(const IndexedInstrProfReader &) = delete; 497 498 /// Return the profile version. 499 uint64_t getVersion() const { return Index->getVersion(); } 500 bool isIRLevelProfile() const override { return Index->isIRLevelProfile(); } 501 bool hasCSIRLevelProfile() const override { 502 return Index->hasCSIRLevelProfile(); 503 } 504 505 bool instrEntryBBEnabled() const override { 506 return Index->instrEntryBBEnabled(); 507 } 508 509 /// Return true if the given buffer is in an indexed instrprof format. 510 static bool hasFormat(const MemoryBuffer &DataBuffer); 511 512 /// Read the file header. 513 Error readHeader() override; 514 /// Read a single record. 515 Error readNextRecord(NamedInstrProfRecord &Record) override; 516 517 /// Return the NamedInstrProfRecord associated with FuncName and FuncHash 518 Expected<InstrProfRecord> getInstrProfRecord(StringRef FuncName, 519 uint64_t FuncHash); 520 521 /// Fill Counts with the profile data for the given function name. 522 Error getFunctionCounts(StringRef FuncName, uint64_t FuncHash, 523 std::vector<uint64_t> &Counts); 524 525 /// Return the maximum of all known function counts. 526 /// \c UseCS indicates whether to use the context-sensitive count. 527 uint64_t getMaximumFunctionCount(bool UseCS) { 528 if (UseCS) { 529 assert(CS_Summary && "No context sensitive profile summary"); 530 return CS_Summary->getMaxFunctionCount(); 531 } else { 532 assert(Summary && "No profile summary"); 533 return Summary->getMaxFunctionCount(); 534 } 535 } 536 537 /// Factory method to create an indexed reader. 538 static Expected<std::unique_ptr<IndexedInstrProfReader>> 539 create(const Twine &Path, const Twine &RemappingPath = ""); 540 541 static Expected<std::unique_ptr<IndexedInstrProfReader>> 542 create(std::unique_ptr<MemoryBuffer> Buffer, 543 std::unique_ptr<MemoryBuffer> RemappingBuffer = nullptr); 544 545 // Used for testing purpose only. 546 void setValueProfDataEndianness(support::endianness Endianness) { 547 Index->setValueProfDataEndianness(Endianness); 548 } 549 550 // See description in the base class. This interface is designed 551 // to be used by llvm-profdata (for dumping). Avoid using this when 552 // the client is the compiler. 553 InstrProfSymtab &getSymtab() override; 554 555 /// Return the profile summary. 556 /// \c UseCS indicates whether to use the context-sensitive summary. 557 ProfileSummary &getSummary(bool UseCS) { 558 if (UseCS) { 559 assert(CS_Summary && "No context sensitive summary"); 560 return *(CS_Summary.get()); 561 } else { 562 assert(Summary && "No profile summary"); 563 return *(Summary.get()); 564 } 565 } 566 }; 567 568 } // end namespace llvm 569 570 #endif // LLVM_PROFILEDATA_INSTRPROFREADER_H 571