xref: /freebsd-src/contrib/llvm-project/llvm/include/llvm/ProfileData/InstrProfReader.h (revision 4824e7fd18a1223177218d4aec1b3c6c5c4a444e)
1 //===- InstrProfReader.h - Instrumented profiling readers -------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains support for reading profiling data for instrumentation
10 // based PGO and coverage.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_PROFILEDATA_INSTRPROFREADER_H
15 #define LLVM_PROFILEDATA_INSTRPROFREADER_H
16 
17 #include "llvm/ADT/ArrayRef.h"
18 #include "llvm/ADT/StringRef.h"
19 #include "llvm/IR/ProfileSummary.h"
20 #include "llvm/ProfileData/InstrProf.h"
21 #include "llvm/Support/Endian.h"
22 #include "llvm/Support/Error.h"
23 #include "llvm/Support/LineIterator.h"
24 #include "llvm/Support/MemoryBuffer.h"
25 #include "llvm/Support/OnDiskHashTable.h"
26 #include "llvm/Support/SwapByteOrder.h"
27 #include <algorithm>
28 #include <cassert>
29 #include <cstddef>
30 #include <cstdint>
31 #include <iterator>
32 #include <memory>
33 #include <utility>
34 #include <vector>
35 
36 namespace llvm {
37 
38 class InstrProfReader;
39 
40 /// A file format agnostic iterator over profiling data.
41 class InstrProfIterator {
42 public:
43   using iterator_category = std::input_iterator_tag;
44   using value_type = NamedInstrProfRecord;
45   using difference_type = std::ptrdiff_t;
46   using pointer = value_type *;
47   using reference = value_type &;
48 
49 private:
50   InstrProfReader *Reader = nullptr;
51   value_type Record;
52 
53   void Increment();
54 
55 public:
56   InstrProfIterator() = default;
57   InstrProfIterator(InstrProfReader *Reader) : Reader(Reader) { Increment(); }
58 
59   InstrProfIterator &operator++() { Increment(); return *this; }
60   bool operator==(const InstrProfIterator &RHS) const {
61     return Reader == RHS.Reader;
62   }
63   bool operator!=(const InstrProfIterator &RHS) const {
64     return Reader != RHS.Reader;
65   }
66   value_type &operator*() { return Record; }
67   value_type *operator->() { return &Record; }
68 };
69 
70 /// Base class and interface for reading profiling data of any known instrprof
71 /// format. Provides an iterator over NamedInstrProfRecords.
72 class InstrProfReader {
73   instrprof_error LastError = instrprof_error::success;
74   std::string LastErrorMsg;
75 
76 public:
77   InstrProfReader() = default;
78   virtual ~InstrProfReader() = default;
79 
80   /// Read the header.  Required before reading first record.
81   virtual Error readHeader() = 0;
82 
83   /// Read a single record.
84   virtual Error readNextRecord(NamedInstrProfRecord &Record) = 0;
85 
86   /// Print binary ids on stream OS.
87   virtual Error printBinaryIds(raw_ostream &OS) { return success(); };
88 
89   /// Iterator over profile data.
90   InstrProfIterator begin() { return InstrProfIterator(this); }
91   InstrProfIterator end() { return InstrProfIterator(); }
92 
93   virtual bool isIRLevelProfile() const = 0;
94 
95   virtual bool hasCSIRLevelProfile() const = 0;
96 
97   virtual bool instrEntryBBEnabled() const = 0;
98 
99   /// Return the PGO symtab. There are three different readers:
100   /// Raw, Text, and Indexed profile readers. The first two types
101   /// of readers are used only by llvm-profdata tool, while the indexed
102   /// profile reader is also used by llvm-cov tool and the compiler (
103   /// backend or frontend). Since creating PGO symtab can create
104   /// significant runtime and memory overhead (as it touches data
105   /// for the whole program), InstrProfSymtab for the indexed profile
106   /// reader should be created on demand and it is recommended to be
107   /// only used for dumping purpose with llvm-proftool, not with the
108   /// compiler.
109   virtual InstrProfSymtab &getSymtab() = 0;
110 
111   /// Compute the sum of counts and return in Sum.
112   void accumulateCounts(CountSumOrPercent &Sum, bool IsCS);
113 
114 protected:
115   std::unique_ptr<InstrProfSymtab> Symtab;
116 
117   /// Set the current error and return same.
118   Error error(instrprof_error Err, const std::string &ErrMsg = "") {
119     LastError = Err;
120     LastErrorMsg = ErrMsg;
121     if (Err == instrprof_error::success)
122       return Error::success();
123     return make_error<InstrProfError>(Err, ErrMsg);
124   }
125 
126   Error error(Error &&E) {
127     handleAllErrors(std::move(E), [&](const InstrProfError &IPE) {
128       LastError = IPE.get();
129       LastErrorMsg = IPE.getMessage();
130     });
131     return make_error<InstrProfError>(LastError, LastErrorMsg);
132   }
133 
134   /// Clear the current error and return a successful one.
135   Error success() { return error(instrprof_error::success); }
136 
137 public:
138   /// Return true if the reader has finished reading the profile data.
139   bool isEOF() { return LastError == instrprof_error::eof; }
140 
141   /// Return true if the reader encountered an error reading profiling data.
142   bool hasError() { return LastError != instrprof_error::success && !isEOF(); }
143 
144   /// Get the current error.
145   Error getError() {
146     if (hasError())
147       return make_error<InstrProfError>(LastError, LastErrorMsg);
148     return Error::success();
149   }
150 
151   /// Factory method to create an appropriately typed reader for the given
152   /// instrprof file.
153   static Expected<std::unique_ptr<InstrProfReader>> create(const Twine &Path);
154 
155   static Expected<std::unique_ptr<InstrProfReader>>
156   create(std::unique_ptr<MemoryBuffer> Buffer);
157 };
158 
159 /// Reader for the simple text based instrprof format.
160 ///
161 /// This format is a simple text format that's suitable for test data. Records
162 /// are separated by one or more blank lines, and record fields are separated by
163 /// new lines.
164 ///
165 /// Each record consists of a function name, a function hash, a number of
166 /// counters, and then each counter value, in that order.
167 class TextInstrProfReader : public InstrProfReader {
168 private:
169   /// The profile data file contents.
170   std::unique_ptr<MemoryBuffer> DataBuffer;
171   /// Iterator over the profile data.
172   line_iterator Line;
173   bool IsIRLevelProfile = false;
174   bool HasCSIRLevelProfile = false;
175   bool InstrEntryBBEnabled = false;
176 
177   Error readValueProfileData(InstrProfRecord &Record);
178 
179 public:
180   TextInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer_)
181       : DataBuffer(std::move(DataBuffer_)), Line(*DataBuffer, true, '#') {}
182   TextInstrProfReader(const TextInstrProfReader &) = delete;
183   TextInstrProfReader &operator=(const TextInstrProfReader &) = delete;
184 
185   /// Return true if the given buffer is in text instrprof format.
186   static bool hasFormat(const MemoryBuffer &Buffer);
187 
188   bool isIRLevelProfile() const override { return IsIRLevelProfile; }
189 
190   bool hasCSIRLevelProfile() const override { return HasCSIRLevelProfile; }
191 
192   bool instrEntryBBEnabled() const override { return InstrEntryBBEnabled; }
193 
194   /// Read the header.
195   Error readHeader() override;
196 
197   /// Read a single record.
198   Error readNextRecord(NamedInstrProfRecord &Record) override;
199 
200   InstrProfSymtab &getSymtab() override {
201     assert(Symtab.get());
202     return *Symtab.get();
203   }
204 };
205 
206 /// Reader for the raw instrprof binary format from runtime.
207 ///
208 /// This format is a raw memory dump of the instrumentation-based profiling data
209 /// from the runtime.  It has no index.
210 ///
211 /// Templated on the unsigned type whose size matches pointers on the platform
212 /// that wrote the profile.
213 template <class IntPtrT>
214 class RawInstrProfReader : public InstrProfReader {
215 private:
216   /// The profile data file contents.
217   std::unique_ptr<MemoryBuffer> DataBuffer;
218   bool ShouldSwapBytes;
219   // The value of the version field of the raw profile data header. The lower 56
220   // bits specifies the format version and the most significant 8 bits specify
221   // the variant types of the profile.
222   uint64_t Version;
223   uint64_t CountersDelta;
224   uint64_t NamesDelta;
225   const RawInstrProf::ProfileData<IntPtrT> *Data;
226   const RawInstrProf::ProfileData<IntPtrT> *DataEnd;
227   const uint64_t *CountersStart;
228   const char *NamesStart;
229   uint64_t NamesSize;
230   // After value profile is all read, this pointer points to
231   // the header of next profile data (if exists)
232   const uint8_t *ValueDataStart;
233   uint32_t ValueKindLast;
234   uint32_t CurValueDataSize;
235 
236   uint64_t BinaryIdsSize;
237   const uint8_t *BinaryIdsStart;
238 
239 public:
240   RawInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer)
241       : DataBuffer(std::move(DataBuffer)) {}
242   RawInstrProfReader(const RawInstrProfReader &) = delete;
243   RawInstrProfReader &operator=(const RawInstrProfReader &) = delete;
244 
245   static bool hasFormat(const MemoryBuffer &DataBuffer);
246   Error readHeader() override;
247   Error readNextRecord(NamedInstrProfRecord &Record) override;
248   Error printBinaryIds(raw_ostream &OS) override;
249 
250   bool isIRLevelProfile() const override {
251     return (Version & VARIANT_MASK_IR_PROF) != 0;
252   }
253 
254   bool hasCSIRLevelProfile() const override {
255     return (Version & VARIANT_MASK_CSIR_PROF) != 0;
256   }
257 
258   bool instrEntryBBEnabled() const override {
259     return (Version & VARIANT_MASK_INSTR_ENTRY) != 0;
260   }
261 
262   InstrProfSymtab &getSymtab() override {
263     assert(Symtab.get());
264     return *Symtab.get();
265   }
266 
267 private:
268   Error createSymtab(InstrProfSymtab &Symtab);
269   Error readNextHeader(const char *CurrentPos);
270   Error readHeader(const RawInstrProf::Header &Header);
271 
272   template <class IntT> IntT swap(IntT Int) const {
273     return ShouldSwapBytes ? sys::getSwappedBytes(Int) : Int;
274   }
275 
276   support::endianness getDataEndianness() const {
277     support::endianness HostEndian = getHostEndianness();
278     if (!ShouldSwapBytes)
279       return HostEndian;
280     if (HostEndian == support::little)
281       return support::big;
282     else
283       return support::little;
284   }
285 
286   inline uint8_t getNumPaddingBytes(uint64_t SizeInBytes) {
287     return 7 & (sizeof(uint64_t) - SizeInBytes % sizeof(uint64_t));
288   }
289 
290   Error readName(NamedInstrProfRecord &Record);
291   Error readFuncHash(NamedInstrProfRecord &Record);
292   Error readRawCounts(InstrProfRecord &Record);
293   Error readValueProfilingData(InstrProfRecord &Record);
294   bool atEnd() const { return Data == DataEnd; }
295 
296   void advanceData() {
297     Data++;
298     ValueDataStart += CurValueDataSize;
299   }
300 
301   const char *getNextHeaderPos() const {
302       assert(atEnd());
303       return (const char *)ValueDataStart;
304   }
305 
306   /// Get the offset of \p CounterPtr from the start of the counters section of
307   /// the profile. The offset has units of "number of counters", i.e. increasing
308   /// the offset by 1 corresponds to an increase in the *byte offset* by 8.
309   ptrdiff_t getCounterOffset(IntPtrT CounterPtr) const {
310     return (swap(CounterPtr) - CountersDelta) / sizeof(uint64_t);
311   }
312 
313   const uint64_t *getCounter(ptrdiff_t Offset) const {
314     return CountersStart + Offset;
315   }
316 
317   StringRef getName(uint64_t NameRef) const {
318     return Symtab->getFuncName(swap(NameRef));
319   }
320 };
321 
322 using RawInstrProfReader32 = RawInstrProfReader<uint32_t>;
323 using RawInstrProfReader64 = RawInstrProfReader<uint64_t>;
324 
325 namespace IndexedInstrProf {
326 
327 enum class HashT : uint32_t;
328 
329 } // end namespace IndexedInstrProf
330 
331 /// Trait for lookups into the on-disk hash table for the binary instrprof
332 /// format.
333 class InstrProfLookupTrait {
334   std::vector<NamedInstrProfRecord> DataBuffer;
335   IndexedInstrProf::HashT HashType;
336   unsigned FormatVersion;
337   // Endianness of the input value profile data.
338   // It should be LE by default, but can be changed
339   // for testing purpose.
340   support::endianness ValueProfDataEndianness = support::little;
341 
342 public:
343   InstrProfLookupTrait(IndexedInstrProf::HashT HashType, unsigned FormatVersion)
344       : HashType(HashType), FormatVersion(FormatVersion) {}
345 
346   using data_type = ArrayRef<NamedInstrProfRecord>;
347 
348   using internal_key_type = StringRef;
349   using external_key_type = StringRef;
350   using hash_value_type = uint64_t;
351   using offset_type = uint64_t;
352 
353   static bool EqualKey(StringRef A, StringRef B) { return A == B; }
354   static StringRef GetInternalKey(StringRef K) { return K; }
355   static StringRef GetExternalKey(StringRef K) { return K; }
356 
357   hash_value_type ComputeHash(StringRef K);
358 
359   static std::pair<offset_type, offset_type>
360   ReadKeyDataLength(const unsigned char *&D) {
361     using namespace support;
362 
363     offset_type KeyLen = endian::readNext<offset_type, little, unaligned>(D);
364     offset_type DataLen = endian::readNext<offset_type, little, unaligned>(D);
365     return std::make_pair(KeyLen, DataLen);
366   }
367 
368   StringRef ReadKey(const unsigned char *D, offset_type N) {
369     return StringRef((const char *)D, N);
370   }
371 
372   bool readValueProfilingData(const unsigned char *&D,
373                               const unsigned char *const End);
374   data_type ReadData(StringRef K, const unsigned char *D, offset_type N);
375 
376   // Used for testing purpose only.
377   void setValueProfDataEndianness(support::endianness Endianness) {
378     ValueProfDataEndianness = Endianness;
379   }
380 };
381 
382 struct InstrProfReaderIndexBase {
383   virtual ~InstrProfReaderIndexBase() = default;
384 
385   // Read all the profile records with the same key pointed to the current
386   // iterator.
387   virtual Error getRecords(ArrayRef<NamedInstrProfRecord> &Data) = 0;
388 
389   // Read all the profile records with the key equal to FuncName
390   virtual Error getRecords(StringRef FuncName,
391                                      ArrayRef<NamedInstrProfRecord> &Data) = 0;
392   virtual void advanceToNextKey() = 0;
393   virtual bool atEnd() const = 0;
394   virtual void setValueProfDataEndianness(support::endianness Endianness) = 0;
395   virtual uint64_t getVersion() const = 0;
396   virtual bool isIRLevelProfile() const = 0;
397   virtual bool hasCSIRLevelProfile() const = 0;
398   virtual bool instrEntryBBEnabled() const = 0;
399   virtual Error populateSymtab(InstrProfSymtab &) = 0;
400 };
401 
402 using OnDiskHashTableImplV3 =
403     OnDiskIterableChainedHashTable<InstrProfLookupTrait>;
404 
405 template <typename HashTableImpl>
406 class InstrProfReaderItaniumRemapper;
407 
408 template <typename HashTableImpl>
409 class InstrProfReaderIndex : public InstrProfReaderIndexBase {
410 private:
411   std::unique_ptr<HashTableImpl> HashTable;
412   typename HashTableImpl::data_iterator RecordIterator;
413   uint64_t FormatVersion;
414 
415   friend class InstrProfReaderItaniumRemapper<HashTableImpl>;
416 
417 public:
418   InstrProfReaderIndex(const unsigned char *Buckets,
419                        const unsigned char *const Payload,
420                        const unsigned char *const Base,
421                        IndexedInstrProf::HashT HashType, uint64_t Version);
422   ~InstrProfReaderIndex() override = default;
423 
424   Error getRecords(ArrayRef<NamedInstrProfRecord> &Data) override;
425   Error getRecords(StringRef FuncName,
426                    ArrayRef<NamedInstrProfRecord> &Data) override;
427   void advanceToNextKey() override { RecordIterator++; }
428 
429   bool atEnd() const override {
430     return RecordIterator == HashTable->data_end();
431   }
432 
433   void setValueProfDataEndianness(support::endianness Endianness) override {
434     HashTable->getInfoObj().setValueProfDataEndianness(Endianness);
435   }
436 
437   uint64_t getVersion() const override { return GET_VERSION(FormatVersion); }
438 
439   bool isIRLevelProfile() const override {
440     return (FormatVersion & VARIANT_MASK_IR_PROF) != 0;
441   }
442 
443   bool hasCSIRLevelProfile() const override {
444     return (FormatVersion & VARIANT_MASK_CSIR_PROF) != 0;
445   }
446 
447   bool instrEntryBBEnabled() const override {
448     return (FormatVersion & VARIANT_MASK_INSTR_ENTRY) != 0;
449   }
450 
451   Error populateSymtab(InstrProfSymtab &Symtab) override {
452     return Symtab.create(HashTable->keys());
453   }
454 };
455 
456 /// Name matcher supporting fuzzy matching of symbol names to names in profiles.
457 class InstrProfReaderRemapper {
458 public:
459   virtual ~InstrProfReaderRemapper() {}
460   virtual Error populateRemappings() { return Error::success(); }
461   virtual Error getRecords(StringRef FuncName,
462                            ArrayRef<NamedInstrProfRecord> &Data) = 0;
463 };
464 
465 /// Reader for the indexed binary instrprof format.
466 class IndexedInstrProfReader : public InstrProfReader {
467 private:
468   /// The profile data file contents.
469   std::unique_ptr<MemoryBuffer> DataBuffer;
470   /// The profile remapping file contents.
471   std::unique_ptr<MemoryBuffer> RemappingBuffer;
472   /// The index into the profile data.
473   std::unique_ptr<InstrProfReaderIndexBase> Index;
474   /// The profile remapping file contents.
475   std::unique_ptr<InstrProfReaderRemapper> Remapper;
476   /// Profile summary data.
477   std::unique_ptr<ProfileSummary> Summary;
478   /// Context sensitive profile summary data.
479   std::unique_ptr<ProfileSummary> CS_Summary;
480   // Index to the current record in the record array.
481   unsigned RecordIndex;
482 
483   // Read the profile summary. Return a pointer pointing to one byte past the
484   // end of the summary data if it exists or the input \c Cur.
485   // \c UseCS indicates whether to use the context-sensitive profile summary.
486   const unsigned char *readSummary(IndexedInstrProf::ProfVersion Version,
487                                    const unsigned char *Cur, bool UseCS);
488 
489 public:
490   IndexedInstrProfReader(
491       std::unique_ptr<MemoryBuffer> DataBuffer,
492       std::unique_ptr<MemoryBuffer> RemappingBuffer = nullptr)
493       : DataBuffer(std::move(DataBuffer)),
494         RemappingBuffer(std::move(RemappingBuffer)), RecordIndex(0) {}
495   IndexedInstrProfReader(const IndexedInstrProfReader &) = delete;
496   IndexedInstrProfReader &operator=(const IndexedInstrProfReader &) = delete;
497 
498   /// Return the profile version.
499   uint64_t getVersion() const { return Index->getVersion(); }
500   bool isIRLevelProfile() const override { return Index->isIRLevelProfile(); }
501   bool hasCSIRLevelProfile() const override {
502     return Index->hasCSIRLevelProfile();
503   }
504 
505   bool instrEntryBBEnabled() const override {
506     return Index->instrEntryBBEnabled();
507   }
508 
509   /// Return true if the given buffer is in an indexed instrprof format.
510   static bool hasFormat(const MemoryBuffer &DataBuffer);
511 
512   /// Read the file header.
513   Error readHeader() override;
514   /// Read a single record.
515   Error readNextRecord(NamedInstrProfRecord &Record) override;
516 
517   /// Return the NamedInstrProfRecord associated with FuncName and FuncHash
518   Expected<InstrProfRecord> getInstrProfRecord(StringRef FuncName,
519                                                uint64_t FuncHash);
520 
521   /// Fill Counts with the profile data for the given function name.
522   Error getFunctionCounts(StringRef FuncName, uint64_t FuncHash,
523                           std::vector<uint64_t> &Counts);
524 
525   /// Return the maximum of all known function counts.
526   /// \c UseCS indicates whether to use the context-sensitive count.
527   uint64_t getMaximumFunctionCount(bool UseCS) {
528     if (UseCS) {
529       assert(CS_Summary && "No context sensitive profile summary");
530       return CS_Summary->getMaxFunctionCount();
531     } else {
532       assert(Summary && "No profile summary");
533       return Summary->getMaxFunctionCount();
534     }
535   }
536 
537   /// Factory method to create an indexed reader.
538   static Expected<std::unique_ptr<IndexedInstrProfReader>>
539   create(const Twine &Path, const Twine &RemappingPath = "");
540 
541   static Expected<std::unique_ptr<IndexedInstrProfReader>>
542   create(std::unique_ptr<MemoryBuffer> Buffer,
543          std::unique_ptr<MemoryBuffer> RemappingBuffer = nullptr);
544 
545   // Used for testing purpose only.
546   void setValueProfDataEndianness(support::endianness Endianness) {
547     Index->setValueProfDataEndianness(Endianness);
548   }
549 
550   // See description in the base class. This interface is designed
551   // to be used by llvm-profdata (for dumping). Avoid using this when
552   // the client is the compiler.
553   InstrProfSymtab &getSymtab() override;
554 
555   /// Return the profile summary.
556   /// \c UseCS indicates whether to use the context-sensitive summary.
557   ProfileSummary &getSummary(bool UseCS) {
558     if (UseCS) {
559       assert(CS_Summary && "No context sensitive summary");
560       return *(CS_Summary.get());
561     } else {
562       assert(Summary && "No profile summary");
563       return *(Summary.get());
564     }
565   }
566 };
567 
568 } // end namespace llvm
569 
570 #endif // LLVM_PROFILEDATA_INSTRPROFREADER_H
571