xref: /llvm-project/llvm/include/llvm/ProfileData/InstrProfReader.h (revision 66edefaee5e87baabe2367cf1dd82ef40cee8c86)
1 //===- InstrProfReader.h - Instrumented profiling readers -------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains support for reading profiling data for instrumentation
10 // based PGO and coverage.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_PROFILEDATA_INSTRPROFREADER_H
15 #define LLVM_PROFILEDATA_INSTRPROFREADER_H
16 
17 #include "llvm/ADT/ArrayRef.h"
18 #include "llvm/ADT/StringRef.h"
19 #include "llvm/IR/ProfileSummary.h"
20 #include "llvm/Object/BuildID.h"
21 #include "llvm/ProfileData/InstrProf.h"
22 #include "llvm/ProfileData/InstrProfCorrelator.h"
23 #include "llvm/ProfileData/MemProf.h"
24 #include "llvm/ProfileData/MemProfYAML.h"
25 #include "llvm/Support/Endian.h"
26 #include "llvm/Support/Error.h"
27 #include "llvm/Support/LineIterator.h"
28 #include "llvm/Support/MathExtras.h"
29 #include "llvm/Support/MemoryBuffer.h"
30 #include "llvm/Support/OnDiskHashTable.h"
31 #include "llvm/Support/SwapByteOrder.h"
32 #include <algorithm>
33 #include <cassert>
34 #include <cstddef>
35 #include <cstdint>
36 #include <iterator>
37 #include <memory>
38 #include <utility>
39 #include <vector>
40 
41 namespace llvm {
42 
43 class InstrProfReader;
44 
45 namespace vfs {
46 class FileSystem;
47 } // namespace vfs
48 
49 /// A file format agnostic iterator over profiling data.
50 template <class record_type = NamedInstrProfRecord,
51           class reader_type = InstrProfReader>
52 class InstrProfIterator {
53 public:
54   using iterator_category = std::input_iterator_tag;
55   using value_type = record_type;
56   using difference_type = std::ptrdiff_t;
57   using pointer = value_type *;
58   using reference = value_type &;
59 
60 private:
61   reader_type *Reader = nullptr;
62   value_type Record;
63 
64   void increment() {
65     if (Error E = Reader->readNextRecord(Record)) {
66       // Handle errors in the reader.
67       InstrProfError::take(std::move(E));
68       *this = InstrProfIterator();
69     }
70   }
71 
72 public:
73   InstrProfIterator() = default;
74   InstrProfIterator(reader_type *Reader) : Reader(Reader) { increment(); }
75 
76   InstrProfIterator &operator++() {
77     increment();
78     return *this;
79   }
80   bool operator==(const InstrProfIterator &RHS) const {
81     return Reader == RHS.Reader;
82   }
83   bool operator!=(const InstrProfIterator &RHS) const {
84     return Reader != RHS.Reader;
85   }
86   value_type &operator*() { return Record; }
87   value_type *operator->() { return &Record; }
88 };
89 
90 /// Base class and interface for reading profiling data of any known instrprof
91 /// format. Provides an iterator over NamedInstrProfRecords.
92 class InstrProfReader {
93   instrprof_error LastError = instrprof_error::success;
94   std::string LastErrorMsg;
95 
96 public:
97   InstrProfReader() = default;
98   virtual ~InstrProfReader() = default;
99 
100   /// Read the header.  Required before reading first record.
101   virtual Error readHeader() = 0;
102 
103   /// Read a single record.
104   virtual Error readNextRecord(NamedInstrProfRecord &Record) = 0;
105 
106   /// Read a list of binary ids.
107   virtual Error readBinaryIds(std::vector<llvm::object::BuildID> &BinaryIds) {
108     return success();
109   }
110 
111   /// Print binary ids.
112   virtual Error printBinaryIds(raw_ostream &OS) { return success(); };
113 
114   /// Iterator over profile data.
115   InstrProfIterator<> begin() { return InstrProfIterator<>(this); }
116   InstrProfIterator<> end() { return InstrProfIterator<>(); }
117 
118   /// Return the profile version.
119   virtual uint64_t getVersion() const = 0;
120 
121   virtual bool isIRLevelProfile() const = 0;
122 
123   virtual bool hasCSIRLevelProfile() const = 0;
124 
125   virtual bool instrEntryBBEnabled() const = 0;
126 
127   /// Return true if the profile instruments all loop entries.
128   virtual bool instrLoopEntriesEnabled() const = 0;
129 
130   /// Return true if the profile has single byte counters representing coverage.
131   virtual bool hasSingleByteCoverage() const = 0;
132 
133   /// Return true if the profile only instruments function entries.
134   virtual bool functionEntryOnly() const = 0;
135 
136   /// Return true if profile includes a memory profile.
137   virtual bool hasMemoryProfile() const = 0;
138 
139   /// Return true if this has a temporal profile.
140   virtual bool hasTemporalProfile() const = 0;
141 
142   /// Returns a BitsetEnum describing the attributes of the profile. To check
143   /// individual attributes prefer using the helpers above.
144   virtual InstrProfKind getProfileKind() const = 0;
145 
146   /// Return the PGO symtab. There are three different readers:
147   /// Raw, Text, and Indexed profile readers. The first two types
148   /// of readers are used only by llvm-profdata tool, while the indexed
149   /// profile reader is also used by llvm-cov tool and the compiler (
150   /// backend or frontend). Since creating PGO symtab can create
151   /// significant runtime and memory overhead (as it touches data
152   /// for the whole program), InstrProfSymtab for the indexed profile
153   /// reader should be created on demand and it is recommended to be
154   /// only used for dumping purpose with llvm-proftool, not with the
155   /// compiler.
156   virtual InstrProfSymtab &getSymtab() = 0;
157 
158   /// Compute the sum of counts and return in Sum.
159   void accumulateCounts(CountSumOrPercent &Sum, bool IsCS);
160 
161 protected:
162   std::unique_ptr<InstrProfSymtab> Symtab;
163   /// A list of temporal profile traces.
164   SmallVector<TemporalProfTraceTy> TemporalProfTraces;
165   /// The total number of temporal profile traces seen.
166   uint64_t TemporalProfTraceStreamSize = 0;
167 
168   /// Set the current error and return same.
169   Error error(instrprof_error Err, const std::string &ErrMsg = "") {
170     LastError = Err;
171     LastErrorMsg = ErrMsg;
172     if (Err == instrprof_error::success)
173       return Error::success();
174     return make_error<InstrProfError>(Err, ErrMsg);
175   }
176 
177   Error error(Error &&E) {
178     handleAllErrors(std::move(E), [&](const InstrProfError &IPE) {
179       LastError = IPE.get();
180       LastErrorMsg = IPE.getMessage();
181     });
182     return make_error<InstrProfError>(LastError, LastErrorMsg);
183   }
184 
185   /// Clear the current error and return a successful one.
186   Error success() { return error(instrprof_error::success); }
187 
188 public:
189   /// Return true if the reader has finished reading the profile data.
190   bool isEOF() { return LastError == instrprof_error::eof; }
191 
192   /// Return true if the reader encountered an error reading profiling data.
193   bool hasError() { return LastError != instrprof_error::success && !isEOF(); }
194 
195   /// Get the current error.
196   Error getError() {
197     if (hasError())
198       return make_error<InstrProfError>(LastError, LastErrorMsg);
199     return Error::success();
200   }
201 
202   /// Factory method to create an appropriately typed reader for the given
203   /// instrprof file.
204   static Expected<std::unique_ptr<InstrProfReader>> create(
205       const Twine &Path, vfs::FileSystem &FS,
206       const InstrProfCorrelator *Correlator = nullptr,
207       const object::BuildIDFetcher *BIDFetcher = nullptr,
208       const InstrProfCorrelator::ProfCorrelatorKind BIDFetcherCorrelatorKind =
209           InstrProfCorrelator::ProfCorrelatorKind::NONE,
210       std::function<void(Error)> Warn = nullptr);
211 
212   static Expected<std::unique_ptr<InstrProfReader>> create(
213       std::unique_ptr<MemoryBuffer> Buffer,
214       const InstrProfCorrelator *Correlator = nullptr,
215       const object::BuildIDFetcher *BIDFetcher = nullptr,
216       const InstrProfCorrelator::ProfCorrelatorKind BIDFetcherCorrelatorKind =
217           InstrProfCorrelator::ProfCorrelatorKind::NONE,
218       std::function<void(Error)> Warn = nullptr);
219 
220   /// \param Weight for raw profiles use this as the temporal profile trace
221   ///               weight
222   /// \returns a list of temporal profile traces.
223   virtual SmallVector<TemporalProfTraceTy> &
224   getTemporalProfTraces(std::optional<uint64_t> Weight = {}) {
225     // For non-raw profiles we ignore the input weight and instead use the
226     // weights already in the traces.
227     return TemporalProfTraces;
228   }
229   /// \returns the total number of temporal profile traces seen.
230   uint64_t getTemporalProfTraceStreamSize() {
231     return TemporalProfTraceStreamSize;
232   }
233 };
234 
235 /// Reader for the simple text based instrprof format.
236 ///
237 /// This format is a simple text format that's suitable for test data. Records
238 /// are separated by one or more blank lines, and record fields are separated by
239 /// new lines.
240 ///
241 /// Each record consists of a function name, a function hash, a number of
242 /// counters, and then each counter value, in that order.
243 class TextInstrProfReader : public InstrProfReader {
244 private:
245   /// The profile data file contents.
246   std::unique_ptr<MemoryBuffer> DataBuffer;
247   /// Iterator over the profile data.
248   line_iterator Line;
249   /// The attributes of the current profile.
250   InstrProfKind ProfileKind = InstrProfKind::Unknown;
251 
252   Error readValueProfileData(InstrProfRecord &Record);
253 
254   Error readTemporalProfTraceData();
255 
256 public:
257   TextInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer_)
258       : DataBuffer(std::move(DataBuffer_)), Line(*DataBuffer, true, '#') {}
259   TextInstrProfReader(const TextInstrProfReader &) = delete;
260   TextInstrProfReader &operator=(const TextInstrProfReader &) = delete;
261 
262   /// Return true if the given buffer is in text instrprof format.
263   static bool hasFormat(const MemoryBuffer &Buffer);
264 
265   // Text format does not have version, so return 0.
266   uint64_t getVersion() const override { return 0; }
267 
268   bool isIRLevelProfile() const override {
269     return static_cast<bool>(ProfileKind & InstrProfKind::IRInstrumentation);
270   }
271 
272   bool hasCSIRLevelProfile() const override {
273     return static_cast<bool>(ProfileKind & InstrProfKind::ContextSensitive);
274   }
275 
276   bool instrEntryBBEnabled() const override {
277     return static_cast<bool>(ProfileKind &
278                              InstrProfKind::FunctionEntryInstrumentation);
279   }
280 
281   bool instrLoopEntriesEnabled() const override {
282     return static_cast<bool>(ProfileKind &
283                              InstrProfKind::LoopEntriesInstrumentation);
284   }
285 
286   bool hasSingleByteCoverage() const override {
287     return static_cast<bool>(ProfileKind & InstrProfKind::SingleByteCoverage);
288   }
289 
290   bool functionEntryOnly() const override {
291     return static_cast<bool>(ProfileKind & InstrProfKind::FunctionEntryOnly);
292   }
293 
294   bool hasMemoryProfile() const override {
295     // TODO: Add support for text format memory profiles.
296     return false;
297   }
298 
299   bool hasTemporalProfile() const override {
300     return static_cast<bool>(ProfileKind & InstrProfKind::TemporalProfile);
301   }
302 
303   InstrProfKind getProfileKind() const override { return ProfileKind; }
304 
305   /// Read the header.
306   Error readHeader() override;
307 
308   /// Read a single record.
309   Error readNextRecord(NamedInstrProfRecord &Record) override;
310 
311   InstrProfSymtab &getSymtab() override {
312     assert(Symtab);
313     return *Symtab;
314   }
315 };
316 
317 /// Reader for the raw instrprof binary format from runtime.
318 ///
319 /// This format is a raw memory dump of the instrumentation-based profiling data
320 /// from the runtime.  It has no index.
321 ///
322 /// Templated on the unsigned type whose size matches pointers on the platform
323 /// that wrote the profile.
324 template <class IntPtrT>
325 class RawInstrProfReader : public InstrProfReader {
326 private:
327   /// The profile data file contents.
328   std::unique_ptr<MemoryBuffer> DataBuffer;
329   /// If available, this hold the ProfileData array used to correlate raw
330   /// instrumentation data to their functions.
331   const InstrProfCorrelatorImpl<IntPtrT> *Correlator;
332   /// Fetches debuginfo by build id to correlate profiles.
333   const object::BuildIDFetcher *BIDFetcher;
334   /// Correlates profiles with build id fetcher by fetching debuginfo with build
335   /// ID.
336   std::unique_ptr<InstrProfCorrelator> BIDFetcherCorrelator;
337   /// Indicates if should use debuginfo or binary to correlate with build id
338   /// fetcher.
339   InstrProfCorrelator::ProfCorrelatorKind BIDFetcherCorrelatorKind;
340   /// A list of timestamps paired with a function name reference.
341   std::vector<std::pair<uint64_t, uint64_t>> TemporalProfTimestamps;
342   bool ShouldSwapBytes;
343   // The value of the version field of the raw profile data header. The lower 32
344   // bits specifies the format version and the most significant 32 bits specify
345   // the variant types of the profile.
346   uint64_t Version;
347   uint64_t CountersDelta;
348   uint64_t BitmapDelta;
349   uint64_t NamesDelta;
350   const RawInstrProf::ProfileData<IntPtrT> *Data;
351   const RawInstrProf::ProfileData<IntPtrT> *DataEnd;
352   const RawInstrProf::VTableProfileData<IntPtrT> *VTableBegin = nullptr;
353   const RawInstrProf::VTableProfileData<IntPtrT> *VTableEnd = nullptr;
354   const char *CountersStart;
355   const char *CountersEnd;
356   const char *BitmapStart;
357   const char *BitmapEnd;
358   const char *NamesStart;
359   const char *NamesEnd;
360   const char *VNamesStart = nullptr;
361   const char *VNamesEnd = nullptr;
362   // After value profile is all read, this pointer points to
363   // the header of next profile data (if exists)
364   const uint8_t *ValueDataStart;
365   uint32_t ValueKindLast;
366   uint32_t CurValueDataSize;
367   std::vector<llvm::object::BuildID> BinaryIds;
368 
369   std::function<void(Error)> Warn;
370 
371   /// Maxium counter value 2^56.
372   static const uint64_t MaxCounterValue = (1ULL << 56);
373 
374 public:
375   RawInstrProfReader(
376       std::unique_ptr<MemoryBuffer> DataBuffer,
377       const InstrProfCorrelator *Correlator,
378       const object::BuildIDFetcher *BIDFetcher,
379       const InstrProfCorrelator::ProfCorrelatorKind BIDFetcherCorrelatorKind,
380       std::function<void(Error)> Warn)
381       : DataBuffer(std::move(DataBuffer)),
382         Correlator(dyn_cast_or_null<const InstrProfCorrelatorImpl<IntPtrT>>(
383             Correlator)),
384         BIDFetcher(BIDFetcher),
385         BIDFetcherCorrelatorKind(BIDFetcherCorrelatorKind), Warn(Warn) {}
386 
387   RawInstrProfReader(const RawInstrProfReader &) = delete;
388   RawInstrProfReader &operator=(const RawInstrProfReader &) = delete;
389 
390   static bool hasFormat(const MemoryBuffer &DataBuffer);
391   Error readHeader() override;
392   Error readNextRecord(NamedInstrProfRecord &Record) override;
393   Error readBinaryIds(std::vector<llvm::object::BuildID> &BinaryIds) override;
394   Error printBinaryIds(raw_ostream &OS) override;
395 
396   uint64_t getVersion() const override { return Version; }
397 
398   bool isIRLevelProfile() const override {
399     return (Version & VARIANT_MASK_IR_PROF) != 0;
400   }
401 
402   bool hasCSIRLevelProfile() const override {
403     return (Version & VARIANT_MASK_CSIR_PROF) != 0;
404   }
405 
406   bool instrEntryBBEnabled() const override {
407     return (Version & VARIANT_MASK_INSTR_ENTRY) != 0;
408   }
409 
410   bool instrLoopEntriesEnabled() const override {
411     return (Version & VARIANT_MASK_INSTR_LOOP_ENTRIES) != 0;
412   }
413 
414   bool hasSingleByteCoverage() const override {
415     return (Version & VARIANT_MASK_BYTE_COVERAGE) != 0;
416   }
417 
418   bool functionEntryOnly() const override {
419     return (Version & VARIANT_MASK_FUNCTION_ENTRY_ONLY) != 0;
420   }
421 
422   bool hasMemoryProfile() const override {
423     // Memory profiles have a separate raw format, so this should never be set.
424     assert(!(Version & VARIANT_MASK_MEMPROF));
425     return false;
426   }
427 
428   bool hasTemporalProfile() const override {
429     return (Version & VARIANT_MASK_TEMPORAL_PROF) != 0;
430   }
431 
432   /// Returns a BitsetEnum describing the attributes of the raw instr profile.
433   InstrProfKind getProfileKind() const override;
434 
435   InstrProfSymtab &getSymtab() override {
436     assert(Symtab.get());
437     return *Symtab.get();
438   }
439 
440   SmallVector<TemporalProfTraceTy> &
441   getTemporalProfTraces(std::optional<uint64_t> Weight = {}) override;
442 
443 private:
444   Error createSymtab(InstrProfSymtab &Symtab);
445   Error readNextHeader(const char *CurrentPos);
446   Error readHeader(const RawInstrProf::Header &Header);
447 
448   template <class IntT> IntT swap(IntT Int) const {
449     return ShouldSwapBytes ? llvm::byteswap(Int) : Int;
450   }
451 
452   llvm::endianness getDataEndianness() const {
453     if (!ShouldSwapBytes)
454       return llvm::endianness::native;
455     if (llvm::endianness::native == llvm::endianness::little)
456       return llvm::endianness::big;
457     else
458       return llvm::endianness::little;
459   }
460 
461   inline uint8_t getNumPaddingBytes(uint64_t SizeInBytes) {
462     return 7 & (sizeof(uint64_t) - SizeInBytes % sizeof(uint64_t));
463   }
464 
465   Error readName(NamedInstrProfRecord &Record);
466   Error readFuncHash(NamedInstrProfRecord &Record);
467   Error readRawCounts(InstrProfRecord &Record);
468   Error readRawBitmapBytes(InstrProfRecord &Record);
469   Error readValueProfilingData(InstrProfRecord &Record);
470   bool atEnd() const { return Data == DataEnd; }
471 
472   void advanceData() {
473     // `CountersDelta` is a constant zero when using debug info correlation.
474     if (!Correlator && !BIDFetcherCorrelator) {
475       // The initial CountersDelta is the in-memory address difference between
476       // the data and counts sections:
477       // start(__llvm_prf_cnts) - start(__llvm_prf_data)
478       // As we advance to the next record, we maintain the correct CountersDelta
479       // with respect to the next record.
480       CountersDelta -= sizeof(*Data);
481       BitmapDelta -= sizeof(*Data);
482     }
483     Data++;
484     ValueDataStart += CurValueDataSize;
485   }
486 
487   const char *getNextHeaderPos() const {
488       assert(atEnd());
489       return (const char *)ValueDataStart;
490   }
491 
492   StringRef getName(uint64_t NameRef) const {
493     return Symtab->getFuncOrVarName(swap(NameRef));
494   }
495 
496   int getCounterTypeSize() const {
497     return hasSingleByteCoverage() ? sizeof(uint8_t) : sizeof(uint64_t);
498   }
499 };
500 
501 using RawInstrProfReader32 = RawInstrProfReader<uint32_t>;
502 using RawInstrProfReader64 = RawInstrProfReader<uint64_t>;
503 
504 namespace IndexedInstrProf {
505 
506 enum class HashT : uint32_t;
507 
508 } // end namespace IndexedInstrProf
509 
510 /// Trait for lookups into the on-disk hash table for the binary instrprof
511 /// format.
512 class InstrProfLookupTrait {
513   std::vector<NamedInstrProfRecord> DataBuffer;
514   IndexedInstrProf::HashT HashType;
515   unsigned FormatVersion;
516   // Endianness of the input value profile data.
517   // It should be LE by default, but can be changed
518   // for testing purpose.
519   llvm::endianness ValueProfDataEndianness = llvm::endianness::little;
520 
521 public:
522   InstrProfLookupTrait(IndexedInstrProf::HashT HashType, unsigned FormatVersion)
523       : HashType(HashType), FormatVersion(FormatVersion) {}
524 
525   using data_type = ArrayRef<NamedInstrProfRecord>;
526 
527   using internal_key_type = StringRef;
528   using external_key_type = StringRef;
529   using hash_value_type = uint64_t;
530   using offset_type = uint64_t;
531 
532   static bool EqualKey(StringRef A, StringRef B) { return A == B; }
533   static StringRef GetInternalKey(StringRef K) { return K; }
534   static StringRef GetExternalKey(StringRef K) { return K; }
535 
536   hash_value_type ComputeHash(StringRef K);
537 
538   static std::pair<offset_type, offset_type>
539   ReadKeyDataLength(const unsigned char *&D) {
540     using namespace support;
541 
542     offset_type KeyLen =
543         endian::readNext<offset_type, llvm::endianness::little>(D);
544     offset_type DataLen =
545         endian::readNext<offset_type, llvm::endianness::little>(D);
546     return std::make_pair(KeyLen, DataLen);
547   }
548 
549   StringRef ReadKey(const unsigned char *D, offset_type N) {
550     return StringRef((const char *)D, N);
551   }
552 
553   bool readValueProfilingData(const unsigned char *&D,
554                               const unsigned char *const End);
555   data_type ReadData(StringRef K, const unsigned char *D, offset_type N);
556 
557   // Used for testing purpose only.
558   void setValueProfDataEndianness(llvm::endianness Endianness) {
559     ValueProfDataEndianness = Endianness;
560   }
561 };
562 
563 struct InstrProfReaderIndexBase {
564   virtual ~InstrProfReaderIndexBase() = default;
565 
566   // Read all the profile records with the same key pointed to the current
567   // iterator.
568   virtual Error getRecords(ArrayRef<NamedInstrProfRecord> &Data) = 0;
569 
570   // Read all the profile records with the key equal to FuncName
571   virtual Error getRecords(StringRef FuncName,
572                                      ArrayRef<NamedInstrProfRecord> &Data) = 0;
573   virtual void advanceToNextKey() = 0;
574   virtual bool atEnd() const = 0;
575   virtual void setValueProfDataEndianness(llvm::endianness Endianness) = 0;
576   virtual uint64_t getVersion() const = 0;
577   virtual bool isIRLevelProfile() const = 0;
578   virtual bool hasCSIRLevelProfile() const = 0;
579   virtual bool instrEntryBBEnabled() const = 0;
580   virtual bool instrLoopEntriesEnabled() const = 0;
581   virtual bool hasSingleByteCoverage() const = 0;
582   virtual bool functionEntryOnly() const = 0;
583   virtual bool hasMemoryProfile() const = 0;
584   virtual bool hasTemporalProfile() const = 0;
585   virtual InstrProfKind getProfileKind() const = 0;
586   virtual Error populateSymtab(InstrProfSymtab &) = 0;
587 };
588 
589 using OnDiskHashTableImplV3 =
590     OnDiskIterableChainedHashTable<InstrProfLookupTrait>;
591 
592 using MemProfRecordHashTable =
593     OnDiskIterableChainedHashTable<memprof::RecordLookupTrait>;
594 using MemProfFrameHashTable =
595     OnDiskIterableChainedHashTable<memprof::FrameLookupTrait>;
596 using MemProfCallStackHashTable =
597     OnDiskIterableChainedHashTable<memprof::CallStackLookupTrait>;
598 
599 template <typename HashTableImpl>
600 class InstrProfReaderItaniumRemapper;
601 
602 template <typename HashTableImpl>
603 class InstrProfReaderIndex : public InstrProfReaderIndexBase {
604 private:
605   std::unique_ptr<HashTableImpl> HashTable;
606   typename HashTableImpl::data_iterator RecordIterator;
607   uint64_t FormatVersion;
608 
609   friend class InstrProfReaderItaniumRemapper<HashTableImpl>;
610 
611 public:
612   InstrProfReaderIndex(const unsigned char *Buckets,
613                        const unsigned char *const Payload,
614                        const unsigned char *const Base,
615                        IndexedInstrProf::HashT HashType, uint64_t Version);
616   ~InstrProfReaderIndex() override = default;
617 
618   Error getRecords(ArrayRef<NamedInstrProfRecord> &Data) override;
619   Error getRecords(StringRef FuncName,
620                    ArrayRef<NamedInstrProfRecord> &Data) override;
621   void advanceToNextKey() override { RecordIterator++; }
622 
623   bool atEnd() const override {
624     return RecordIterator == HashTable->data_end();
625   }
626 
627   void setValueProfDataEndianness(llvm::endianness Endianness) override {
628     HashTable->getInfoObj().setValueProfDataEndianness(Endianness);
629   }
630 
631   uint64_t getVersion() const override { return GET_VERSION(FormatVersion); }
632 
633   bool isIRLevelProfile() const override {
634     return (FormatVersion & VARIANT_MASK_IR_PROF) != 0;
635   }
636 
637   bool hasCSIRLevelProfile() const override {
638     return (FormatVersion & VARIANT_MASK_CSIR_PROF) != 0;
639   }
640 
641   bool instrEntryBBEnabled() const override {
642     return (FormatVersion & VARIANT_MASK_INSTR_ENTRY) != 0;
643   }
644 
645   bool instrLoopEntriesEnabled() const override {
646     return (FormatVersion & VARIANT_MASK_INSTR_LOOP_ENTRIES) != 0;
647   }
648 
649   bool hasSingleByteCoverage() const override {
650     return (FormatVersion & VARIANT_MASK_BYTE_COVERAGE) != 0;
651   }
652 
653   bool functionEntryOnly() const override {
654     return (FormatVersion & VARIANT_MASK_FUNCTION_ENTRY_ONLY) != 0;
655   }
656 
657   bool hasMemoryProfile() const override {
658     return (FormatVersion & VARIANT_MASK_MEMPROF) != 0;
659   }
660 
661   bool hasTemporalProfile() const override {
662     return (FormatVersion & VARIANT_MASK_TEMPORAL_PROF) != 0;
663   }
664 
665   InstrProfKind getProfileKind() const override;
666 
667   Error populateSymtab(InstrProfSymtab &Symtab) override {
668     // FIXME: the create method calls 'finalizeSymtab' and sorts a bunch of
669     // arrays/maps. Since there are other data sources other than 'HashTable' to
670     // populate a symtab, it might make sense to have something like this
671     // 1. Let each data source populate Symtab and init the arrays/maps without
672     // calling 'finalizeSymtab'
673     // 2. Call 'finalizeSymtab' once to get all arrays/maps sorted if needed.
674     return Symtab.create(HashTable->keys());
675   }
676 };
677 
678 /// Name matcher supporting fuzzy matching of symbol names to names in profiles.
679 class InstrProfReaderRemapper {
680 public:
681   virtual ~InstrProfReaderRemapper() = default;
682   virtual Error populateRemappings() { return Error::success(); }
683   virtual Error getRecords(StringRef FuncName,
684                            ArrayRef<NamedInstrProfRecord> &Data) = 0;
685 };
686 
687 class IndexedMemProfReader {
688 private:
689   /// The MemProf version.
690   memprof::IndexedVersion Version =
691       static_cast<memprof::IndexedVersion>(memprof::MinimumSupportedVersion);
692   /// MemProf profile schema (if available).
693   memprof::MemProfSchema Schema;
694   /// MemProf record profile data on-disk indexed via llvm::md5(FunctionName).
695   std::unique_ptr<MemProfRecordHashTable> MemProfRecordTable;
696   /// MemProf frame profile data on-disk indexed via frame id.
697   std::unique_ptr<MemProfFrameHashTable> MemProfFrameTable;
698   /// MemProf call stack data on-disk indexed via call stack id.
699   std::unique_ptr<MemProfCallStackHashTable> MemProfCallStackTable;
700   /// The starting address of the frame array.
701   const unsigned char *FrameBase = nullptr;
702   /// The starting address of the call stack array.
703   const unsigned char *CallStackBase = nullptr;
704   // The number of elements in the radix tree array.
705   unsigned RadixTreeSize = 0;
706 
707   Error deserializeV2(const unsigned char *Start, const unsigned char *Ptr);
708   Error deserializeV3(const unsigned char *Start, const unsigned char *Ptr);
709 
710 public:
711   IndexedMemProfReader() = default;
712 
713   Error deserialize(const unsigned char *Start, uint64_t MemProfOffset);
714 
715   Expected<memprof::MemProfRecord>
716   getMemProfRecord(const uint64_t FuncNameHash) const;
717 
718   DenseMap<uint64_t, SmallVector<memprof::CallEdgeTy, 0>>
719   getMemProfCallerCalleePairs() const;
720 
721   // Return the entire MemProf profile.
722   memprof::AllMemProfData getAllMemProfData() const;
723 };
724 
725 /// Reader for the indexed binary instrprof format.
726 class IndexedInstrProfReader : public InstrProfReader {
727 private:
728   /// The profile data file contents.
729   std::unique_ptr<MemoryBuffer> DataBuffer;
730   /// The profile remapping file contents.
731   std::unique_ptr<MemoryBuffer> RemappingBuffer;
732   /// The index into the profile data.
733   std::unique_ptr<InstrProfReaderIndexBase> Index;
734   /// The profile remapping file contents.
735   std::unique_ptr<InstrProfReaderRemapper> Remapper;
736   /// Profile summary data.
737   std::unique_ptr<ProfileSummary> Summary;
738   /// Context sensitive profile summary data.
739   std::unique_ptr<ProfileSummary> CS_Summary;
740   IndexedMemProfReader MemProfReader;
741   /// The compressed vtable names, to be used for symtab construction.
742   /// A compiler that reads indexed profiles could construct symtab from module
743   /// IR so it doesn't need the decompressed names.
744   StringRef VTableName;
745   /// A memory buffer holding binary ids.
746   ArrayRef<uint8_t> BinaryIdsBuffer;
747 
748   // Index to the current record in the record array.
749   unsigned RecordIndex = 0;
750 
751   // Read the profile summary. Return a pointer pointing to one byte past the
752   // end of the summary data if it exists or the input \c Cur.
753   // \c UseCS indicates whether to use the context-sensitive profile summary.
754   const unsigned char *readSummary(IndexedInstrProf::ProfVersion Version,
755                                    const unsigned char *Cur, bool UseCS);
756 
757 public:
758   IndexedInstrProfReader(
759       std::unique_ptr<MemoryBuffer> DataBuffer,
760       std::unique_ptr<MemoryBuffer> RemappingBuffer = nullptr)
761       : DataBuffer(std::move(DataBuffer)),
762         RemappingBuffer(std::move(RemappingBuffer)) {}
763   IndexedInstrProfReader(const IndexedInstrProfReader &) = delete;
764   IndexedInstrProfReader &operator=(const IndexedInstrProfReader &) = delete;
765 
766   /// Return the profile version.
767   uint64_t getVersion() const override { return Index->getVersion(); }
768   bool isIRLevelProfile() const override { return Index->isIRLevelProfile(); }
769   bool hasCSIRLevelProfile() const override {
770     return Index->hasCSIRLevelProfile();
771   }
772 
773   bool instrEntryBBEnabled() const override {
774     return Index->instrEntryBBEnabled();
775   }
776 
777   bool instrLoopEntriesEnabled() const override {
778     return Index->instrLoopEntriesEnabled();
779   }
780 
781   bool hasSingleByteCoverage() const override {
782     return Index->hasSingleByteCoverage();
783   }
784 
785   bool functionEntryOnly() const override { return Index->functionEntryOnly(); }
786 
787   bool hasMemoryProfile() const override { return Index->hasMemoryProfile(); }
788 
789   bool hasTemporalProfile() const override {
790     return Index->hasTemporalProfile();
791   }
792 
793   /// Returns a BitsetEnum describing the attributes of the indexed instr
794   /// profile.
795   InstrProfKind getProfileKind() const override {
796     return Index->getProfileKind();
797   }
798 
799   /// Return true if the given buffer is in an indexed instrprof format.
800   static bool hasFormat(const MemoryBuffer &DataBuffer);
801 
802   /// Read the file header.
803   Error readHeader() override;
804   /// Read a single record.
805   Error readNextRecord(NamedInstrProfRecord &Record) override;
806 
807   /// Return the NamedInstrProfRecord associated with FuncName and FuncHash.
808   /// When return a hash_mismatch error and MismatchedFuncSum is not nullptr,
809   /// the sum of all counters in the mismatched function will be set to
810   /// MismatchedFuncSum. If there are multiple instances of mismatched
811   /// functions, MismatchedFuncSum returns the maximum. If \c FuncName is not
812   /// found, try to lookup \c DeprecatedFuncName to handle profiles built by
813   /// older compilers.
814   Expected<InstrProfRecord>
815   getInstrProfRecord(StringRef FuncName, uint64_t FuncHash,
816                      StringRef DeprecatedFuncName = "",
817                      uint64_t *MismatchedFuncSum = nullptr);
818 
819   /// Return the memprof record for the function identified by
820   /// llvm::md5(Name).
821   Expected<memprof::MemProfRecord> getMemProfRecord(uint64_t FuncNameHash) {
822     return MemProfReader.getMemProfRecord(FuncNameHash);
823   }
824 
825   DenseMap<uint64_t, SmallVector<memprof::CallEdgeTy, 0>>
826   getMemProfCallerCalleePairs() {
827     return MemProfReader.getMemProfCallerCalleePairs();
828   }
829 
830   memprof::AllMemProfData getAllMemProfData() const {
831     return MemProfReader.getAllMemProfData();
832   }
833 
834   /// Fill Counts with the profile data for the given function name.
835   Error getFunctionCounts(StringRef FuncName, uint64_t FuncHash,
836                           std::vector<uint64_t> &Counts);
837 
838   /// Fill Bitmap with the profile data for the given function name.
839   Error getFunctionBitmap(StringRef FuncName, uint64_t FuncHash,
840                           BitVector &Bitmap);
841 
842   /// Return the maximum of all known function counts.
843   /// \c UseCS indicates whether to use the context-sensitive count.
844   uint64_t getMaximumFunctionCount(bool UseCS) {
845     if (UseCS) {
846       assert(CS_Summary && "No context sensitive profile summary");
847       return CS_Summary->getMaxFunctionCount();
848     } else {
849       assert(Summary && "No profile summary");
850       return Summary->getMaxFunctionCount();
851     }
852   }
853 
854   /// Factory method to create an indexed reader.
855   static Expected<std::unique_ptr<IndexedInstrProfReader>>
856   create(const Twine &Path, vfs::FileSystem &FS,
857          const Twine &RemappingPath = "");
858 
859   static Expected<std::unique_ptr<IndexedInstrProfReader>>
860   create(std::unique_ptr<MemoryBuffer> Buffer,
861          std::unique_ptr<MemoryBuffer> RemappingBuffer = nullptr);
862 
863   // Used for testing purpose only.
864   void setValueProfDataEndianness(llvm::endianness Endianness) {
865     Index->setValueProfDataEndianness(Endianness);
866   }
867 
868   // See description in the base class. This interface is designed
869   // to be used by llvm-profdata (for dumping). Avoid using this when
870   // the client is the compiler.
871   InstrProfSymtab &getSymtab() override;
872 
873   /// Return the profile summary.
874   /// \c UseCS indicates whether to use the context-sensitive summary.
875   ProfileSummary &getSummary(bool UseCS) {
876     if (UseCS) {
877       assert(CS_Summary && "No context sensitive summary");
878       return *CS_Summary;
879     } else {
880       assert(Summary && "No profile summary");
881       return *Summary;
882     }
883   }
884 
885   Error readBinaryIds(std::vector<llvm::object::BuildID> &BinaryIds) override;
886   Error printBinaryIds(raw_ostream &OS) override;
887 };
888 
889 } // end namespace llvm
890 
891 #endif // LLVM_PROFILEDATA_INSTRPROFREADER_H
892