xref: /freebsd-src/contrib/llvm-project/llvm/include/llvm/ProfileData/InstrProfReader.h (revision 0eae32dcef82f6f06de6419a0d623d7def0cc8f6)
1 //===- InstrProfReader.h - Instrumented profiling readers -------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains support for reading profiling data for instrumentation
10 // based PGO and coverage.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_PROFILEDATA_INSTRPROFREADER_H
15 #define LLVM_PROFILEDATA_INSTRPROFREADER_H
16 
17 #include "llvm/ADT/ArrayRef.h"
18 #include "llvm/ADT/StringRef.h"
19 #include "llvm/IR/ProfileSummary.h"
20 #include "llvm/ProfileData/InstrProf.h"
21 #include "llvm/ProfileData/InstrProfCorrelator.h"
22 #include "llvm/Support/Endian.h"
23 #include "llvm/Support/Error.h"
24 #include "llvm/Support/LineIterator.h"
25 #include "llvm/Support/MemoryBuffer.h"
26 #include "llvm/Support/OnDiskHashTable.h"
27 #include "llvm/Support/SwapByteOrder.h"
28 #include <algorithm>
29 #include <cassert>
30 #include <cstddef>
31 #include <cstdint>
32 #include <iterator>
33 #include <memory>
34 #include <utility>
35 #include <vector>
36 
37 namespace llvm {
38 
39 class InstrProfReader;
40 
41 /// A file format agnostic iterator over profiling data.
42 class InstrProfIterator {
43 public:
44   using iterator_category = std::input_iterator_tag;
45   using value_type = NamedInstrProfRecord;
46   using difference_type = std::ptrdiff_t;
47   using pointer = value_type *;
48   using reference = value_type &;
49 
50 private:
51   InstrProfReader *Reader = nullptr;
52   value_type Record;
53 
54   void Increment();
55 
56 public:
57   InstrProfIterator() = default;
58   InstrProfIterator(InstrProfReader *Reader) : Reader(Reader) { Increment(); }
59 
60   InstrProfIterator &operator++() { Increment(); return *this; }
61   bool operator==(const InstrProfIterator &RHS) const {
62     return Reader == RHS.Reader;
63   }
64   bool operator!=(const InstrProfIterator &RHS) const {
65     return Reader != RHS.Reader;
66   }
67   value_type &operator*() { return Record; }
68   value_type *operator->() { return &Record; }
69 };
70 
71 /// Base class and interface for reading profiling data of any known instrprof
72 /// format. Provides an iterator over NamedInstrProfRecords.
73 class InstrProfReader {
74   instrprof_error LastError = instrprof_error::success;
75   std::string LastErrorMsg;
76 
77 public:
78   InstrProfReader() = default;
79   virtual ~InstrProfReader() = default;
80 
81   /// Read the header.  Required before reading first record.
82   virtual Error readHeader() = 0;
83 
84   /// Read a single record.
85   virtual Error readNextRecord(NamedInstrProfRecord &Record) = 0;
86 
87   /// Print binary ids on stream OS.
88   virtual Error printBinaryIds(raw_ostream &OS) { return success(); };
89 
90   /// Iterator over profile data.
91   InstrProfIterator begin() { return InstrProfIterator(this); }
92   InstrProfIterator end() { return InstrProfIterator(); }
93 
94   virtual bool isIRLevelProfile() const = 0;
95 
96   virtual bool hasCSIRLevelProfile() const = 0;
97 
98   virtual bool instrEntryBBEnabled() const = 0;
99 
100   /// Return true if we must provide debug info to create PGO profiles.
101   virtual bool useDebugInfoCorrelate() const { return false; }
102 
103   /// Return the PGO symtab. There are three different readers:
104   /// Raw, Text, and Indexed profile readers. The first two types
105   /// of readers are used only by llvm-profdata tool, while the indexed
106   /// profile reader is also used by llvm-cov tool and the compiler (
107   /// backend or frontend). Since creating PGO symtab can create
108   /// significant runtime and memory overhead (as it touches data
109   /// for the whole program), InstrProfSymtab for the indexed profile
110   /// reader should be created on demand and it is recommended to be
111   /// only used for dumping purpose with llvm-proftool, not with the
112   /// compiler.
113   virtual InstrProfSymtab &getSymtab() = 0;
114 
115   /// Compute the sum of counts and return in Sum.
116   void accumulateCounts(CountSumOrPercent &Sum, bool IsCS);
117 
118 protected:
119   std::unique_ptr<InstrProfSymtab> Symtab;
120 
121   /// Set the current error and return same.
122   Error error(instrprof_error Err, const std::string &ErrMsg = "") {
123     LastError = Err;
124     LastErrorMsg = ErrMsg;
125     if (Err == instrprof_error::success)
126       return Error::success();
127     return make_error<InstrProfError>(Err, ErrMsg);
128   }
129 
130   Error error(Error &&E) {
131     handleAllErrors(std::move(E), [&](const InstrProfError &IPE) {
132       LastError = IPE.get();
133       LastErrorMsg = IPE.getMessage();
134     });
135     return make_error<InstrProfError>(LastError, LastErrorMsg);
136   }
137 
138   /// Clear the current error and return a successful one.
139   Error success() { return error(instrprof_error::success); }
140 
141 public:
142   /// Return true if the reader has finished reading the profile data.
143   bool isEOF() { return LastError == instrprof_error::eof; }
144 
145   /// Return true if the reader encountered an error reading profiling data.
146   bool hasError() { return LastError != instrprof_error::success && !isEOF(); }
147 
148   /// Get the current error.
149   Error getError() {
150     if (hasError())
151       return make_error<InstrProfError>(LastError, LastErrorMsg);
152     return Error::success();
153   }
154 
155   /// Factory method to create an appropriately typed reader for the given
156   /// instrprof file.
157   static Expected<std::unique_ptr<InstrProfReader>>
158   create(const Twine &Path, const InstrProfCorrelator *Correlator = nullptr);
159 
160   static Expected<std::unique_ptr<InstrProfReader>>
161   create(std::unique_ptr<MemoryBuffer> Buffer,
162          const InstrProfCorrelator *Correlator = nullptr);
163 };
164 
165 /// Reader for the simple text based instrprof format.
166 ///
167 /// This format is a simple text format that's suitable for test data. Records
168 /// are separated by one or more blank lines, and record fields are separated by
169 /// new lines.
170 ///
171 /// Each record consists of a function name, a function hash, a number of
172 /// counters, and then each counter value, in that order.
173 class TextInstrProfReader : public InstrProfReader {
174 private:
175   /// The profile data file contents.
176   std::unique_ptr<MemoryBuffer> DataBuffer;
177   /// Iterator over the profile data.
178   line_iterator Line;
179   bool IsIRLevelProfile = false;
180   bool HasCSIRLevelProfile = false;
181   bool InstrEntryBBEnabled = false;
182 
183   Error readValueProfileData(InstrProfRecord &Record);
184 
185 public:
186   TextInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer_)
187       : DataBuffer(std::move(DataBuffer_)), Line(*DataBuffer, true, '#') {}
188   TextInstrProfReader(const TextInstrProfReader &) = delete;
189   TextInstrProfReader &operator=(const TextInstrProfReader &) = delete;
190 
191   /// Return true if the given buffer is in text instrprof format.
192   static bool hasFormat(const MemoryBuffer &Buffer);
193 
194   bool isIRLevelProfile() const override { return IsIRLevelProfile; }
195 
196   bool hasCSIRLevelProfile() const override { return HasCSIRLevelProfile; }
197 
198   bool instrEntryBBEnabled() const override { return InstrEntryBBEnabled; }
199 
200   /// Read the header.
201   Error readHeader() override;
202 
203   /// Read a single record.
204   Error readNextRecord(NamedInstrProfRecord &Record) override;
205 
206   InstrProfSymtab &getSymtab() override {
207     assert(Symtab.get());
208     return *Symtab.get();
209   }
210 };
211 
212 /// Reader for the raw instrprof binary format from runtime.
213 ///
214 /// This format is a raw memory dump of the instrumentation-based profiling data
215 /// from the runtime.  It has no index.
216 ///
217 /// Templated on the unsigned type whose size matches pointers on the platform
218 /// that wrote the profile.
219 template <class IntPtrT>
220 class RawInstrProfReader : public InstrProfReader {
221 private:
222   /// The profile data file contents.
223   std::unique_ptr<MemoryBuffer> DataBuffer;
224   /// If available, this hold the ProfileData array used to correlate raw
225   /// instrumentation data to their functions.
226   const InstrProfCorrelatorImpl<IntPtrT> *Correlator;
227   bool ShouldSwapBytes;
228   // The value of the version field of the raw profile data header. The lower 56
229   // bits specifies the format version and the most significant 8 bits specify
230   // the variant types of the profile.
231   uint64_t Version;
232   uint64_t CountersDelta;
233   uint64_t NamesDelta;
234   const RawInstrProf::ProfileData<IntPtrT> *Data;
235   const RawInstrProf::ProfileData<IntPtrT> *DataEnd;
236   const uint64_t *CountersStart;
237   const char *NamesStart;
238   const char *NamesEnd;
239   // After value profile is all read, this pointer points to
240   // the header of next profile data (if exists)
241   const uint8_t *ValueDataStart;
242   uint32_t ValueKindLast;
243   uint32_t CurValueDataSize;
244 
245   uint64_t BinaryIdsSize;
246   const uint8_t *BinaryIdsStart;
247 
248 public:
249   RawInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer,
250                      const InstrProfCorrelator *Correlator)
251       : DataBuffer(std::move(DataBuffer)),
252         Correlator(dyn_cast_or_null<const InstrProfCorrelatorImpl<IntPtrT>>(
253             Correlator)) {}
254   RawInstrProfReader(const RawInstrProfReader &) = delete;
255   RawInstrProfReader &operator=(const RawInstrProfReader &) = delete;
256 
257   static bool hasFormat(const MemoryBuffer &DataBuffer);
258   Error readHeader() override;
259   Error readNextRecord(NamedInstrProfRecord &Record) override;
260   Error printBinaryIds(raw_ostream &OS) override;
261 
262   bool isIRLevelProfile() const override {
263     return (Version & VARIANT_MASK_IR_PROF) != 0;
264   }
265 
266   bool hasCSIRLevelProfile() const override {
267     return (Version & VARIANT_MASK_CSIR_PROF) != 0;
268   }
269 
270   bool instrEntryBBEnabled() const override {
271     return (Version & VARIANT_MASK_INSTR_ENTRY) != 0;
272   }
273 
274   bool useDebugInfoCorrelate() const override {
275     return (Version & VARIANT_MASK_DBG_CORRELATE) != 0;
276   }
277 
278   InstrProfSymtab &getSymtab() override {
279     assert(Symtab.get());
280     return *Symtab.get();
281   }
282 
283 private:
284   Error createSymtab(InstrProfSymtab &Symtab);
285   Error readNextHeader(const char *CurrentPos);
286   Error readHeader(const RawInstrProf::Header &Header);
287 
288   template <class IntT> IntT swap(IntT Int) const {
289     return ShouldSwapBytes ? sys::getSwappedBytes(Int) : Int;
290   }
291 
292   support::endianness getDataEndianness() const {
293     support::endianness HostEndian = getHostEndianness();
294     if (!ShouldSwapBytes)
295       return HostEndian;
296     if (HostEndian == support::little)
297       return support::big;
298     else
299       return support::little;
300   }
301 
302   inline uint8_t getNumPaddingBytes(uint64_t SizeInBytes) {
303     return 7 & (sizeof(uint64_t) - SizeInBytes % sizeof(uint64_t));
304   }
305 
306   Error readName(NamedInstrProfRecord &Record);
307   Error readFuncHash(NamedInstrProfRecord &Record);
308   Error readRawCounts(InstrProfRecord &Record);
309   Error readValueProfilingData(InstrProfRecord &Record);
310   bool atEnd() const { return Data == DataEnd; }
311 
312   void advanceData() {
313     Data++;
314     ValueDataStart += CurValueDataSize;
315   }
316 
317   const char *getNextHeaderPos() const {
318       assert(atEnd());
319       return (const char *)ValueDataStart;
320   }
321 
322   /// Get the offset of \p CounterPtr from the start of the counters section of
323   /// the profile. The offset has units of "number of counters", i.e. increasing
324   /// the offset by 1 corresponds to an increase in the *byte offset* by 8.
325   ptrdiff_t getCounterOffset(IntPtrT CounterPtr) const {
326     return (swap(CounterPtr) - CountersDelta) / sizeof(uint64_t);
327   }
328 
329   const uint64_t *getCounter(ptrdiff_t Offset) const {
330     return CountersStart + Offset;
331   }
332 
333   StringRef getName(uint64_t NameRef) const {
334     return Symtab->getFuncName(swap(NameRef));
335   }
336 };
337 
338 using RawInstrProfReader32 = RawInstrProfReader<uint32_t>;
339 using RawInstrProfReader64 = RawInstrProfReader<uint64_t>;
340 
341 namespace IndexedInstrProf {
342 
343 enum class HashT : uint32_t;
344 
345 } // end namespace IndexedInstrProf
346 
347 /// Trait for lookups into the on-disk hash table for the binary instrprof
348 /// format.
349 class InstrProfLookupTrait {
350   std::vector<NamedInstrProfRecord> DataBuffer;
351   IndexedInstrProf::HashT HashType;
352   unsigned FormatVersion;
353   // Endianness of the input value profile data.
354   // It should be LE by default, but can be changed
355   // for testing purpose.
356   support::endianness ValueProfDataEndianness = support::little;
357 
358 public:
359   InstrProfLookupTrait(IndexedInstrProf::HashT HashType, unsigned FormatVersion)
360       : HashType(HashType), FormatVersion(FormatVersion) {}
361 
362   using data_type = ArrayRef<NamedInstrProfRecord>;
363 
364   using internal_key_type = StringRef;
365   using external_key_type = StringRef;
366   using hash_value_type = uint64_t;
367   using offset_type = uint64_t;
368 
369   static bool EqualKey(StringRef A, StringRef B) { return A == B; }
370   static StringRef GetInternalKey(StringRef K) { return K; }
371   static StringRef GetExternalKey(StringRef K) { return K; }
372 
373   hash_value_type ComputeHash(StringRef K);
374 
375   static std::pair<offset_type, offset_type>
376   ReadKeyDataLength(const unsigned char *&D) {
377     using namespace support;
378 
379     offset_type KeyLen = endian::readNext<offset_type, little, unaligned>(D);
380     offset_type DataLen = endian::readNext<offset_type, little, unaligned>(D);
381     return std::make_pair(KeyLen, DataLen);
382   }
383 
384   StringRef ReadKey(const unsigned char *D, offset_type N) {
385     return StringRef((const char *)D, N);
386   }
387 
388   bool readValueProfilingData(const unsigned char *&D,
389                               const unsigned char *const End);
390   data_type ReadData(StringRef K, const unsigned char *D, offset_type N);
391 
392   // Used for testing purpose only.
393   void setValueProfDataEndianness(support::endianness Endianness) {
394     ValueProfDataEndianness = Endianness;
395   }
396 };
397 
398 struct InstrProfReaderIndexBase {
399   virtual ~InstrProfReaderIndexBase() = default;
400 
401   // Read all the profile records with the same key pointed to the current
402   // iterator.
403   virtual Error getRecords(ArrayRef<NamedInstrProfRecord> &Data) = 0;
404 
405   // Read all the profile records with the key equal to FuncName
406   virtual Error getRecords(StringRef FuncName,
407                                      ArrayRef<NamedInstrProfRecord> &Data) = 0;
408   virtual void advanceToNextKey() = 0;
409   virtual bool atEnd() const = 0;
410   virtual void setValueProfDataEndianness(support::endianness Endianness) = 0;
411   virtual uint64_t getVersion() const = 0;
412   virtual bool isIRLevelProfile() const = 0;
413   virtual bool hasCSIRLevelProfile() const = 0;
414   virtual bool instrEntryBBEnabled() const = 0;
415   virtual Error populateSymtab(InstrProfSymtab &) = 0;
416 };
417 
418 using OnDiskHashTableImplV3 =
419     OnDiskIterableChainedHashTable<InstrProfLookupTrait>;
420 
421 template <typename HashTableImpl>
422 class InstrProfReaderItaniumRemapper;
423 
424 template <typename HashTableImpl>
425 class InstrProfReaderIndex : public InstrProfReaderIndexBase {
426 private:
427   std::unique_ptr<HashTableImpl> HashTable;
428   typename HashTableImpl::data_iterator RecordIterator;
429   uint64_t FormatVersion;
430 
431   friend class InstrProfReaderItaniumRemapper<HashTableImpl>;
432 
433 public:
434   InstrProfReaderIndex(const unsigned char *Buckets,
435                        const unsigned char *const Payload,
436                        const unsigned char *const Base,
437                        IndexedInstrProf::HashT HashType, uint64_t Version);
438   ~InstrProfReaderIndex() override = default;
439 
440   Error getRecords(ArrayRef<NamedInstrProfRecord> &Data) override;
441   Error getRecords(StringRef FuncName,
442                    ArrayRef<NamedInstrProfRecord> &Data) override;
443   void advanceToNextKey() override { RecordIterator++; }
444 
445   bool atEnd() const override {
446     return RecordIterator == HashTable->data_end();
447   }
448 
449   void setValueProfDataEndianness(support::endianness Endianness) override {
450     HashTable->getInfoObj().setValueProfDataEndianness(Endianness);
451   }
452 
453   uint64_t getVersion() const override { return GET_VERSION(FormatVersion); }
454 
455   bool isIRLevelProfile() const override {
456     return (FormatVersion & VARIANT_MASK_IR_PROF) != 0;
457   }
458 
459   bool hasCSIRLevelProfile() const override {
460     return (FormatVersion & VARIANT_MASK_CSIR_PROF) != 0;
461   }
462 
463   bool instrEntryBBEnabled() const override {
464     return (FormatVersion & VARIANT_MASK_INSTR_ENTRY) != 0;
465   }
466 
467   Error populateSymtab(InstrProfSymtab &Symtab) override {
468     return Symtab.create(HashTable->keys());
469   }
470 };
471 
472 /// Name matcher supporting fuzzy matching of symbol names to names in profiles.
473 class InstrProfReaderRemapper {
474 public:
475   virtual ~InstrProfReaderRemapper() {}
476   virtual Error populateRemappings() { return Error::success(); }
477   virtual Error getRecords(StringRef FuncName,
478                            ArrayRef<NamedInstrProfRecord> &Data) = 0;
479 };
480 
481 /// Reader for the indexed binary instrprof format.
482 class IndexedInstrProfReader : public InstrProfReader {
483 private:
484   /// The profile data file contents.
485   std::unique_ptr<MemoryBuffer> DataBuffer;
486   /// The profile remapping file contents.
487   std::unique_ptr<MemoryBuffer> RemappingBuffer;
488   /// The index into the profile data.
489   std::unique_ptr<InstrProfReaderIndexBase> Index;
490   /// The profile remapping file contents.
491   std::unique_ptr<InstrProfReaderRemapper> Remapper;
492   /// Profile summary data.
493   std::unique_ptr<ProfileSummary> Summary;
494   /// Context sensitive profile summary data.
495   std::unique_ptr<ProfileSummary> CS_Summary;
496   // Index to the current record in the record array.
497   unsigned RecordIndex;
498 
499   // Read the profile summary. Return a pointer pointing to one byte past the
500   // end of the summary data if it exists or the input \c Cur.
501   // \c UseCS indicates whether to use the context-sensitive profile summary.
502   const unsigned char *readSummary(IndexedInstrProf::ProfVersion Version,
503                                    const unsigned char *Cur, bool UseCS);
504 
505 public:
506   IndexedInstrProfReader(
507       std::unique_ptr<MemoryBuffer> DataBuffer,
508       std::unique_ptr<MemoryBuffer> RemappingBuffer = nullptr)
509       : DataBuffer(std::move(DataBuffer)),
510         RemappingBuffer(std::move(RemappingBuffer)), RecordIndex(0) {}
511   IndexedInstrProfReader(const IndexedInstrProfReader &) = delete;
512   IndexedInstrProfReader &operator=(const IndexedInstrProfReader &) = delete;
513 
514   /// Return the profile version.
515   uint64_t getVersion() const { return Index->getVersion(); }
516   bool isIRLevelProfile() const override { return Index->isIRLevelProfile(); }
517   bool hasCSIRLevelProfile() const override {
518     return Index->hasCSIRLevelProfile();
519   }
520 
521   bool instrEntryBBEnabled() const override {
522     return Index->instrEntryBBEnabled();
523   }
524 
525   /// Return true if the given buffer is in an indexed instrprof format.
526   static bool hasFormat(const MemoryBuffer &DataBuffer);
527 
528   /// Read the file header.
529   Error readHeader() override;
530   /// Read a single record.
531   Error readNextRecord(NamedInstrProfRecord &Record) override;
532 
533   /// Return the NamedInstrProfRecord associated with FuncName and FuncHash
534   Expected<InstrProfRecord> getInstrProfRecord(StringRef FuncName,
535                                                uint64_t FuncHash);
536 
537   /// Fill Counts with the profile data for the given function name.
538   Error getFunctionCounts(StringRef FuncName, uint64_t FuncHash,
539                           std::vector<uint64_t> &Counts);
540 
541   /// Return the maximum of all known function counts.
542   /// \c UseCS indicates whether to use the context-sensitive count.
543   uint64_t getMaximumFunctionCount(bool UseCS) {
544     if (UseCS) {
545       assert(CS_Summary && "No context sensitive profile summary");
546       return CS_Summary->getMaxFunctionCount();
547     } else {
548       assert(Summary && "No profile summary");
549       return Summary->getMaxFunctionCount();
550     }
551   }
552 
553   /// Factory method to create an indexed reader.
554   static Expected<std::unique_ptr<IndexedInstrProfReader>>
555   create(const Twine &Path, const Twine &RemappingPath = "");
556 
557   static Expected<std::unique_ptr<IndexedInstrProfReader>>
558   create(std::unique_ptr<MemoryBuffer> Buffer,
559          std::unique_ptr<MemoryBuffer> RemappingBuffer = nullptr);
560 
561   // Used for testing purpose only.
562   void setValueProfDataEndianness(support::endianness Endianness) {
563     Index->setValueProfDataEndianness(Endianness);
564   }
565 
566   // See description in the base class. This interface is designed
567   // to be used by llvm-profdata (for dumping). Avoid using this when
568   // the client is the compiler.
569   InstrProfSymtab &getSymtab() override;
570 
571   /// Return the profile summary.
572   /// \c UseCS indicates whether to use the context-sensitive summary.
573   ProfileSummary &getSummary(bool UseCS) {
574     if (UseCS) {
575       assert(CS_Summary && "No context sensitive summary");
576       return *(CS_Summary.get());
577     } else {
578       assert(Summary && "No profile summary");
579       return *(Summary.get());
580     }
581   }
582 };
583 
584 } // end namespace llvm
585 
586 #endif // LLVM_PROFILEDATA_INSTRPROFREADER_H
587