xref: /netbsd-src/external/apache2/llvm/dist/llvm/include/llvm/ProfileData/InstrProfReader.h (revision 82d56013d7b633d116a93943de88e08335357a7c)
1 //===- InstrProfReader.h - Instrumented profiling readers -------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains support for reading profiling data for instrumentation
10 // based PGO and coverage.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_PROFILEDATA_INSTRPROFREADER_H
15 #define LLVM_PROFILEDATA_INSTRPROFREADER_H
16 
17 #include "llvm/ADT/ArrayRef.h"
18 #include "llvm/ADT/StringRef.h"
19 #include "llvm/IR/ProfileSummary.h"
20 #include "llvm/ProfileData/InstrProf.h"
21 #include "llvm/Support/Endian.h"
22 #include "llvm/Support/Error.h"
23 #include "llvm/Support/LineIterator.h"
24 #include "llvm/Support/MemoryBuffer.h"
25 #include "llvm/Support/OnDiskHashTable.h"
26 #include "llvm/Support/SwapByteOrder.h"
27 #include <algorithm>
28 #include <cassert>
29 #include <cstddef>
30 #include <cstdint>
31 #include <iterator>
32 #include <memory>
33 #include <utility>
34 #include <vector>
35 
36 namespace llvm {
37 
38 class InstrProfReader;
39 
40 /// A file format agnostic iterator over profiling data.
41 class InstrProfIterator {
42 public:
43   using iterator_category = std::input_iterator_tag;
44   using value_type = NamedInstrProfRecord;
45   using difference_type = std::ptrdiff_t;
46   using pointer = value_type *;
47   using reference = value_type &;
48 
49 private:
50   InstrProfReader *Reader = nullptr;
51   value_type Record;
52 
53   void Increment();
54 
55 public:
56   InstrProfIterator() = default;
InstrProfIterator(InstrProfReader * Reader)57   InstrProfIterator(InstrProfReader *Reader) : Reader(Reader) { Increment(); }
58 
59   InstrProfIterator &operator++() { Increment(); return *this; }
60   bool operator==(const InstrProfIterator &RHS) const {
61     return Reader == RHS.Reader;
62   }
63   bool operator!=(const InstrProfIterator &RHS) const {
64     return Reader != RHS.Reader;
65   }
66   value_type &operator*() { return Record; }
67   value_type *operator->() { return &Record; }
68 };
69 
70 /// Base class and interface for reading profiling data of any known instrprof
71 /// format. Provides an iterator over NamedInstrProfRecords.
72 class InstrProfReader {
73   instrprof_error LastError = instrprof_error::success;
74 
75 public:
76   InstrProfReader() = default;
77   virtual ~InstrProfReader() = default;
78 
79   /// Read the header.  Required before reading first record.
80   virtual Error readHeader() = 0;
81 
82   /// Read a single record.
83   virtual Error readNextRecord(NamedInstrProfRecord &Record) = 0;
84 
85   /// Iterator over profile data.
begin()86   InstrProfIterator begin() { return InstrProfIterator(this); }
end()87   InstrProfIterator end() { return InstrProfIterator(); }
88 
89   virtual bool isIRLevelProfile() const = 0;
90 
91   virtual bool hasCSIRLevelProfile() const = 0;
92 
93   virtual bool instrEntryBBEnabled() const = 0;
94 
95   /// Return the PGO symtab. There are three different readers:
96   /// Raw, Text, and Indexed profile readers. The first two types
97   /// of readers are used only by llvm-profdata tool, while the indexed
98   /// profile reader is also used by llvm-cov tool and the compiler (
99   /// backend or frontend). Since creating PGO symtab can create
100   /// significant runtime and memory overhead (as it touches data
101   /// for the whole program), InstrProfSymtab for the indexed profile
102   /// reader should be created on demand and it is recommended to be
103   /// only used for dumping purpose with llvm-proftool, not with the
104   /// compiler.
105   virtual InstrProfSymtab &getSymtab() = 0;
106 
107   /// Compute the sum of counts and return in Sum.
108   void accumulateCounts(CountSumOrPercent &Sum, bool IsCS);
109 
110 protected:
111   std::unique_ptr<InstrProfSymtab> Symtab;
112 
113   /// Set the current error and return same.
error(instrprof_error Err)114   Error error(instrprof_error Err) {
115     LastError = Err;
116     if (Err == instrprof_error::success)
117       return Error::success();
118     return make_error<InstrProfError>(Err);
119   }
120 
error(Error && E)121   Error error(Error &&E) { return error(InstrProfError::take(std::move(E))); }
122 
123   /// Clear the current error and return a successful one.
success()124   Error success() { return error(instrprof_error::success); }
125 
126 public:
127   /// Return true if the reader has finished reading the profile data.
isEOF()128   bool isEOF() { return LastError == instrprof_error::eof; }
129 
130   /// Return true if the reader encountered an error reading profiling data.
hasError()131   bool hasError() { return LastError != instrprof_error::success && !isEOF(); }
132 
133   /// Get the current error.
getError()134   Error getError() {
135     if (hasError())
136       return make_error<InstrProfError>(LastError);
137     return Error::success();
138   }
139 
140   /// Factory method to create an appropriately typed reader for the given
141   /// instrprof file.
142   static Expected<std::unique_ptr<InstrProfReader>> create(const Twine &Path);
143 
144   static Expected<std::unique_ptr<InstrProfReader>>
145   create(std::unique_ptr<MemoryBuffer> Buffer);
146 };
147 
148 /// Reader for the simple text based instrprof format.
149 ///
150 /// This format is a simple text format that's suitable for test data. Records
151 /// are separated by one or more blank lines, and record fields are separated by
152 /// new lines.
153 ///
154 /// Each record consists of a function name, a function hash, a number of
155 /// counters, and then each counter value, in that order.
156 class TextInstrProfReader : public InstrProfReader {
157 private:
158   /// The profile data file contents.
159   std::unique_ptr<MemoryBuffer> DataBuffer;
160   /// Iterator over the profile data.
161   line_iterator Line;
162   bool IsIRLevelProfile = false;
163   bool HasCSIRLevelProfile = false;
164   bool InstrEntryBBEnabled = false;
165 
166   Error readValueProfileData(InstrProfRecord &Record);
167 
168 public:
TextInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer_)169   TextInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer_)
170       : DataBuffer(std::move(DataBuffer_)), Line(*DataBuffer, true, '#') {}
171   TextInstrProfReader(const TextInstrProfReader &) = delete;
172   TextInstrProfReader &operator=(const TextInstrProfReader &) = delete;
173 
174   /// Return true if the given buffer is in text instrprof format.
175   static bool hasFormat(const MemoryBuffer &Buffer);
176 
isIRLevelProfile()177   bool isIRLevelProfile() const override { return IsIRLevelProfile; }
178 
hasCSIRLevelProfile()179   bool hasCSIRLevelProfile() const override { return HasCSIRLevelProfile; }
180 
instrEntryBBEnabled()181   bool instrEntryBBEnabled() const override { return InstrEntryBBEnabled; }
182 
183   /// Read the header.
184   Error readHeader() override;
185 
186   /// Read a single record.
187   Error readNextRecord(NamedInstrProfRecord &Record) override;
188 
getSymtab()189   InstrProfSymtab &getSymtab() override {
190     assert(Symtab.get());
191     return *Symtab.get();
192   }
193 };
194 
195 /// Reader for the raw instrprof binary format from runtime.
196 ///
197 /// This format is a raw memory dump of the instrumentation-baed profiling data
198 /// from the runtime.  It has no index.
199 ///
200 /// Templated on the unsigned type whose size matches pointers on the platform
201 /// that wrote the profile.
202 template <class IntPtrT>
203 class RawInstrProfReader : public InstrProfReader {
204 private:
205   /// The profile data file contents.
206   std::unique_ptr<MemoryBuffer> DataBuffer;
207   bool ShouldSwapBytes;
208   // The value of the version field of the raw profile data header. The lower 56
209   // bits specifies the format version and the most significant 8 bits specify
210   // the variant types of the profile.
211   uint64_t Version;
212   uint64_t CountersDelta;
213   uint64_t NamesDelta;
214   const RawInstrProf::ProfileData<IntPtrT> *Data;
215   const RawInstrProf::ProfileData<IntPtrT> *DataEnd;
216   const uint64_t *CountersStart;
217   const char *NamesStart;
218   uint64_t NamesSize;
219   // After value profile is all read, this pointer points to
220   // the header of next profile data (if exists)
221   const uint8_t *ValueDataStart;
222   uint32_t ValueKindLast;
223   uint32_t CurValueDataSize;
224 
225 public:
RawInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer)226   RawInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer)
227       : DataBuffer(std::move(DataBuffer)) {}
228   RawInstrProfReader(const RawInstrProfReader &) = delete;
229   RawInstrProfReader &operator=(const RawInstrProfReader &) = delete;
230 
231   static bool hasFormat(const MemoryBuffer &DataBuffer);
232   Error readHeader() override;
233   Error readNextRecord(NamedInstrProfRecord &Record) override;
234 
isIRLevelProfile()235   bool isIRLevelProfile() const override {
236     return (Version & VARIANT_MASK_IR_PROF) != 0;
237   }
238 
hasCSIRLevelProfile()239   bool hasCSIRLevelProfile() const override {
240     return (Version & VARIANT_MASK_CSIR_PROF) != 0;
241   }
242 
instrEntryBBEnabled()243   bool instrEntryBBEnabled() const override {
244     return (Version & VARIANT_MASK_INSTR_ENTRY) != 0;
245   }
246 
getSymtab()247   InstrProfSymtab &getSymtab() override {
248     assert(Symtab.get());
249     return *Symtab.get();
250   }
251 
252 private:
253   Error createSymtab(InstrProfSymtab &Symtab);
254   Error readNextHeader(const char *CurrentPos);
255   Error readHeader(const RawInstrProf::Header &Header);
256 
swap(IntT Int)257   template <class IntT> IntT swap(IntT Int) const {
258     return ShouldSwapBytes ? sys::getSwappedBytes(Int) : Int;
259   }
260 
getDataEndianness()261   support::endianness getDataEndianness() const {
262     support::endianness HostEndian = getHostEndianness();
263     if (!ShouldSwapBytes)
264       return HostEndian;
265     if (HostEndian == support::little)
266       return support::big;
267     else
268       return support::little;
269   }
270 
getNumPaddingBytes(uint64_t SizeInBytes)271   inline uint8_t getNumPaddingBytes(uint64_t SizeInBytes) {
272     return 7 & (sizeof(uint64_t) - SizeInBytes % sizeof(uint64_t));
273   }
274 
275   Error readName(NamedInstrProfRecord &Record);
276   Error readFuncHash(NamedInstrProfRecord &Record);
277   Error readRawCounts(InstrProfRecord &Record);
278   Error readValueProfilingData(InstrProfRecord &Record);
atEnd()279   bool atEnd() const { return Data == DataEnd; }
280 
advanceData()281   void advanceData() {
282     Data++;
283     ValueDataStart += CurValueDataSize;
284   }
285 
getNextHeaderPos()286   const char *getNextHeaderPos() const {
287       assert(atEnd());
288       return (const char *)ValueDataStart;
289   }
290 
291   /// Get the offset of \p CounterPtr from the start of the counters section of
292   /// the profile. The offset has units of "number of counters", i.e. increasing
293   /// the offset by 1 corresponds to an increase in the *byte offset* by 8.
getCounterOffset(IntPtrT CounterPtr)294   ptrdiff_t getCounterOffset(IntPtrT CounterPtr) const {
295     return (swap(CounterPtr) - CountersDelta) / sizeof(uint64_t);
296   }
297 
getCounter(ptrdiff_t Offset)298   const uint64_t *getCounter(ptrdiff_t Offset) const {
299     return CountersStart + Offset;
300   }
301 
getName(uint64_t NameRef)302   StringRef getName(uint64_t NameRef) const {
303     return Symtab->getFuncName(swap(NameRef));
304   }
305 };
306 
307 using RawInstrProfReader32 = RawInstrProfReader<uint32_t>;
308 using RawInstrProfReader64 = RawInstrProfReader<uint64_t>;
309 
310 namespace IndexedInstrProf {
311 
312 enum class HashT : uint32_t;
313 
314 } // end namespace IndexedInstrProf
315 
316 /// Trait for lookups into the on-disk hash table for the binary instrprof
317 /// format.
318 class InstrProfLookupTrait {
319   std::vector<NamedInstrProfRecord> DataBuffer;
320   IndexedInstrProf::HashT HashType;
321   unsigned FormatVersion;
322   // Endianness of the input value profile data.
323   // It should be LE by default, but can be changed
324   // for testing purpose.
325   support::endianness ValueProfDataEndianness = support::little;
326 
327 public:
InstrProfLookupTrait(IndexedInstrProf::HashT HashType,unsigned FormatVersion)328   InstrProfLookupTrait(IndexedInstrProf::HashT HashType, unsigned FormatVersion)
329       : HashType(HashType), FormatVersion(FormatVersion) {}
330 
331   using data_type = ArrayRef<NamedInstrProfRecord>;
332 
333   using internal_key_type = StringRef;
334   using external_key_type = StringRef;
335   using hash_value_type = uint64_t;
336   using offset_type = uint64_t;
337 
EqualKey(StringRef A,StringRef B)338   static bool EqualKey(StringRef A, StringRef B) { return A == B; }
GetInternalKey(StringRef K)339   static StringRef GetInternalKey(StringRef K) { return K; }
GetExternalKey(StringRef K)340   static StringRef GetExternalKey(StringRef K) { return K; }
341 
342   hash_value_type ComputeHash(StringRef K);
343 
344   static std::pair<offset_type, offset_type>
ReadKeyDataLength(const unsigned char * & D)345   ReadKeyDataLength(const unsigned char *&D) {
346     using namespace support;
347 
348     offset_type KeyLen = endian::readNext<offset_type, little, unaligned>(D);
349     offset_type DataLen = endian::readNext<offset_type, little, unaligned>(D);
350     return std::make_pair(KeyLen, DataLen);
351   }
352 
ReadKey(const unsigned char * D,offset_type N)353   StringRef ReadKey(const unsigned char *D, offset_type N) {
354     return StringRef((const char *)D, N);
355   }
356 
357   bool readValueProfilingData(const unsigned char *&D,
358                               const unsigned char *const End);
359   data_type ReadData(StringRef K, const unsigned char *D, offset_type N);
360 
361   // Used for testing purpose only.
setValueProfDataEndianness(support::endianness Endianness)362   void setValueProfDataEndianness(support::endianness Endianness) {
363     ValueProfDataEndianness = Endianness;
364   }
365 };
366 
367 struct InstrProfReaderIndexBase {
368   virtual ~InstrProfReaderIndexBase() = default;
369 
370   // Read all the profile records with the same key pointed to the current
371   // iterator.
372   virtual Error getRecords(ArrayRef<NamedInstrProfRecord> &Data) = 0;
373 
374   // Read all the profile records with the key equal to FuncName
375   virtual Error getRecords(StringRef FuncName,
376                                      ArrayRef<NamedInstrProfRecord> &Data) = 0;
377   virtual void advanceToNextKey() = 0;
378   virtual bool atEnd() const = 0;
379   virtual void setValueProfDataEndianness(support::endianness Endianness) = 0;
380   virtual uint64_t getVersion() const = 0;
381   virtual bool isIRLevelProfile() const = 0;
382   virtual bool hasCSIRLevelProfile() const = 0;
383   virtual bool instrEntryBBEnabled() const = 0;
384   virtual Error populateSymtab(InstrProfSymtab &) = 0;
385 };
386 
387 using OnDiskHashTableImplV3 =
388     OnDiskIterableChainedHashTable<InstrProfLookupTrait>;
389 
390 template <typename HashTableImpl>
391 class InstrProfReaderItaniumRemapper;
392 
393 template <typename HashTableImpl>
394 class InstrProfReaderIndex : public InstrProfReaderIndexBase {
395 private:
396   std::unique_ptr<HashTableImpl> HashTable;
397   typename HashTableImpl::data_iterator RecordIterator;
398   uint64_t FormatVersion;
399 
400   friend class InstrProfReaderItaniumRemapper<HashTableImpl>;
401 
402 public:
403   InstrProfReaderIndex(const unsigned char *Buckets,
404                        const unsigned char *const Payload,
405                        const unsigned char *const Base,
406                        IndexedInstrProf::HashT HashType, uint64_t Version);
407   ~InstrProfReaderIndex() override = default;
408 
409   Error getRecords(ArrayRef<NamedInstrProfRecord> &Data) override;
410   Error getRecords(StringRef FuncName,
411                    ArrayRef<NamedInstrProfRecord> &Data) override;
advanceToNextKey()412   void advanceToNextKey() override { RecordIterator++; }
413 
atEnd()414   bool atEnd() const override {
415     return RecordIterator == HashTable->data_end();
416   }
417 
setValueProfDataEndianness(support::endianness Endianness)418   void setValueProfDataEndianness(support::endianness Endianness) override {
419     HashTable->getInfoObj().setValueProfDataEndianness(Endianness);
420   }
421 
getVersion()422   uint64_t getVersion() const override { return GET_VERSION(FormatVersion); }
423 
isIRLevelProfile()424   bool isIRLevelProfile() const override {
425     return (FormatVersion & VARIANT_MASK_IR_PROF) != 0;
426   }
427 
hasCSIRLevelProfile()428   bool hasCSIRLevelProfile() const override {
429     return (FormatVersion & VARIANT_MASK_CSIR_PROF) != 0;
430   }
431 
instrEntryBBEnabled()432   bool instrEntryBBEnabled() const override {
433     return (FormatVersion & VARIANT_MASK_INSTR_ENTRY) != 0;
434   }
435 
populateSymtab(InstrProfSymtab & Symtab)436   Error populateSymtab(InstrProfSymtab &Symtab) override {
437     return Symtab.create(HashTable->keys());
438   }
439 };
440 
441 /// Name matcher supporting fuzzy matching of symbol names to names in profiles.
442 class InstrProfReaderRemapper {
443 public:
~InstrProfReaderRemapper()444   virtual ~InstrProfReaderRemapper() {}
populateRemappings()445   virtual Error populateRemappings() { return Error::success(); }
446   virtual Error getRecords(StringRef FuncName,
447                            ArrayRef<NamedInstrProfRecord> &Data) = 0;
448 };
449 
450 /// Reader for the indexed binary instrprof format.
451 class IndexedInstrProfReader : public InstrProfReader {
452 private:
453   /// The profile data file contents.
454   std::unique_ptr<MemoryBuffer> DataBuffer;
455   /// The profile remapping file contents.
456   std::unique_ptr<MemoryBuffer> RemappingBuffer;
457   /// The index into the profile data.
458   std::unique_ptr<InstrProfReaderIndexBase> Index;
459   /// The profile remapping file contents.
460   std::unique_ptr<InstrProfReaderRemapper> Remapper;
461   /// Profile summary data.
462   std::unique_ptr<ProfileSummary> Summary;
463   /// Context sensitive profile summary data.
464   std::unique_ptr<ProfileSummary> CS_Summary;
465   // Index to the current record in the record array.
466   unsigned RecordIndex;
467 
468   // Read the profile summary. Return a pointer pointing to one byte past the
469   // end of the summary data if it exists or the input \c Cur.
470   // \c UseCS indicates whether to use the context-sensitive profile summary.
471   const unsigned char *readSummary(IndexedInstrProf::ProfVersion Version,
472                                    const unsigned char *Cur, bool UseCS);
473 
474 public:
475   IndexedInstrProfReader(
476       std::unique_ptr<MemoryBuffer> DataBuffer,
477       std::unique_ptr<MemoryBuffer> RemappingBuffer = nullptr)
DataBuffer(std::move (DataBuffer))478       : DataBuffer(std::move(DataBuffer)),
479         RemappingBuffer(std::move(RemappingBuffer)), RecordIndex(0) {}
480   IndexedInstrProfReader(const IndexedInstrProfReader &) = delete;
481   IndexedInstrProfReader &operator=(const IndexedInstrProfReader &) = delete;
482 
483   /// Return the profile version.
getVersion()484   uint64_t getVersion() const { return Index->getVersion(); }
isIRLevelProfile()485   bool isIRLevelProfile() const override { return Index->isIRLevelProfile(); }
hasCSIRLevelProfile()486   bool hasCSIRLevelProfile() const override {
487     return Index->hasCSIRLevelProfile();
488   }
489 
instrEntryBBEnabled()490   bool instrEntryBBEnabled() const override {
491     return Index->instrEntryBBEnabled();
492   }
493 
494   /// Return true if the given buffer is in an indexed instrprof format.
495   static bool hasFormat(const MemoryBuffer &DataBuffer);
496 
497   /// Read the file header.
498   Error readHeader() override;
499   /// Read a single record.
500   Error readNextRecord(NamedInstrProfRecord &Record) override;
501 
502   /// Return the NamedInstrProfRecord associated with FuncName and FuncHash
503   Expected<InstrProfRecord> getInstrProfRecord(StringRef FuncName,
504                                                uint64_t FuncHash);
505 
506   /// Fill Counts with the profile data for the given function name.
507   Error getFunctionCounts(StringRef FuncName, uint64_t FuncHash,
508                           std::vector<uint64_t> &Counts);
509 
510   /// Return the maximum of all known function counts.
511   /// \c UseCS indicates whether to use the context-sensitive count.
getMaximumFunctionCount(bool UseCS)512   uint64_t getMaximumFunctionCount(bool UseCS) {
513     if (UseCS) {
514       assert(CS_Summary && "No context sensitive profile summary");
515       return CS_Summary->getMaxFunctionCount();
516     } else {
517       assert(Summary && "No profile summary");
518       return Summary->getMaxFunctionCount();
519     }
520   }
521 
522   /// Factory method to create an indexed reader.
523   static Expected<std::unique_ptr<IndexedInstrProfReader>>
524   create(const Twine &Path, const Twine &RemappingPath = "");
525 
526   static Expected<std::unique_ptr<IndexedInstrProfReader>>
527   create(std::unique_ptr<MemoryBuffer> Buffer,
528          std::unique_ptr<MemoryBuffer> RemappingBuffer = nullptr);
529 
530   // Used for testing purpose only.
setValueProfDataEndianness(support::endianness Endianness)531   void setValueProfDataEndianness(support::endianness Endianness) {
532     Index->setValueProfDataEndianness(Endianness);
533   }
534 
535   // See description in the base class. This interface is designed
536   // to be used by llvm-profdata (for dumping). Avoid using this when
537   // the client is the compiler.
538   InstrProfSymtab &getSymtab() override;
539 
540   /// Return the profile summary.
541   /// \c UseCS indicates whether to use the context-sensitive summary.
getSummary(bool UseCS)542   ProfileSummary &getSummary(bool UseCS) {
543     if (UseCS) {
544       assert(CS_Summary && "No context sensitive summary");
545       return *(CS_Summary.get());
546     } else {
547       assert(Summary && "No profile summary");
548       return *(Summary.get());
549     }
550   }
551 };
552 
553 } // end namespace llvm
554 
555 #endif // LLVM_PROFILEDATA_INSTRPROFREADER_H
556