xref: /llvm-project/llvm/tools/llvm-profgen/ProfileGenerator.h (revision b9d40a7ae4b3e5b9829eca8a497637c9fab6dd3e)
1 //===-- ProfileGenerator.h - Profile Generator -----------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_TOOLS_LLVM_PROGEN_PROFILEGENERATOR_H
10 #define LLVM_TOOLS_LLVM_PROGEN_PROFILEGENERATOR_H
11 #include "CSPreInliner.h"
12 #include "ErrorHandling.h"
13 #include "PerfReader.h"
14 #include "ProfiledBinary.h"
15 #include "llvm/IR/DebugInfoMetadata.h"
16 #include "llvm/ProfileData/SampleProfWriter.h"
17 #include <memory>
18 #include <unordered_set>
19 
20 using namespace llvm;
21 using namespace sampleprof;
22 
23 namespace llvm {
24 namespace sampleprof {
25 
26 using ProbeCounterMap =
27     std::unordered_map<const MCDecodedPseudoProbe *, uint64_t>;
28 
29 // This base class for profile generation of sample-based PGO. We reuse all
30 // structures relating to function profiles and profile writers as seen in
31 // /ProfileData/SampleProf.h.
32 class ProfileGeneratorBase {
33 
34 public:
ProfileGeneratorBase(ProfiledBinary * Binary)35   ProfileGeneratorBase(ProfiledBinary *Binary) : Binary(Binary){};
ProfileGeneratorBase(ProfiledBinary * Binary,const ContextSampleCounterMap * Counters)36   ProfileGeneratorBase(ProfiledBinary *Binary,
37                        const ContextSampleCounterMap *Counters)
38       : Binary(Binary), SampleCounters(Counters){};
ProfileGeneratorBase(ProfiledBinary * Binary,const SampleProfileMap && Profiles)39   ProfileGeneratorBase(ProfiledBinary *Binary,
40                        const SampleProfileMap &&Profiles)
41       : Binary(Binary), ProfileMap(std::move(Profiles)){};
42 
43   virtual ~ProfileGeneratorBase() = default;
44   static std::unique_ptr<ProfileGeneratorBase>
45   create(ProfiledBinary *Binary, const ContextSampleCounterMap *Counters,
46          bool profileIsCS);
47   static std::unique_ptr<ProfileGeneratorBase>
48   create(ProfiledBinary *Binary, SampleProfileMap &ProfileMap,
49          bool profileIsCS);
50   virtual void generateProfile() = 0;
51   void write();
52 
53   static uint32_t
54   getDuplicationFactor(unsigned Discriminator,
55                        bool UseFSD = ProfileGeneratorBase::UseFSDiscriminator) {
56     return UseFSD ? 1
57                   : llvm::DILocation::getDuplicationFactorFromDiscriminator(
58                         Discriminator);
59   }
60 
61   static uint32_t
62   getBaseDiscriminator(unsigned Discriminator,
63                        bool UseFSD = ProfileGeneratorBase::UseFSDiscriminator) {
64     return UseFSD ? Discriminator
65                   : DILocation::getBaseDiscriminatorFromDiscriminator(
66                         Discriminator, /* IsFSDiscriminator */ false);
67   }
68 
69   static bool UseFSDiscriminator;
70 
71 protected:
72   // Use SampleProfileWriter to serialize profile map
73   void write(std::unique_ptr<SampleProfileWriter> Writer,
74              SampleProfileMap &ProfileMap);
75   /*
76   For each region boundary point, mark if it is begin or end (or both) of
77   the region. Boundary points are inclusive. Log the sample count as well
78   so we can use it when we compute the sample count of each disjoint region
79   later. Note that there might be multiple ranges with different sample
80   count that share same begin/end point. We need to accumulate the sample
81   count for the boundary point for such case, because for the example
82   below,
83 
84   |<--100-->|
85   |<------200------>|
86   A         B       C
87 
88   sample count for disjoint region [A,B] would be 300.
89   */
90   void findDisjointRanges(RangeSample &DisjointRanges,
91                           const RangeSample &Ranges);
92 
93   // Go through each address from range to extract the top frame probe by
94   // looking up in the Address2ProbeMap
95   void extractProbesFromRange(const RangeSample &RangeCounter,
96                               ProbeCounterMap &ProbeCounter,
97                               bool FindDisjointRanges = true);
98 
99   // Helper function for updating body sample for a leaf location in
100   // FunctionProfile
101   void updateBodySamplesforFunctionProfile(FunctionSamples &FunctionProfile,
102                                            const SampleContextFrame &LeafLoc,
103                                            uint64_t Count);
104 
105   void updateFunctionSamples();
106 
107   void updateTotalSamples();
108 
109   void updateCallsiteSamples();
110 
111   void filterAmbiguousProfile(SampleProfileMap &Profiles);
112 
113   bool filterAmbiguousProfile(FunctionSamples &FS);
114 
115   StringRef getCalleeNameForAddress(uint64_t TargetAddress);
116 
117   void computeSummaryAndThreshold(SampleProfileMap &ProfileMap);
118 
119   void calculateBodySamplesAndSize(const FunctionSamples &FSamples,
120                                    uint64_t &TotalBodySamples,
121                                    uint64_t &FuncBodySize);
122 
123   double calculateDensity(const SampleProfileMap &Profiles);
124 
125   void calculateAndShowDensity(const SampleProfileMap &Profiles);
126 
127   void showDensitySuggestion(double Density);
128 
129   void collectProfiledFunctions();
130 
131   bool collectFunctionsFromRawProfile(
132       std::unordered_set<const BinaryFunction *> &ProfiledFunctions);
133 
134   // Collect profiled Functions for llvm sample profile input.
135   virtual bool collectFunctionsFromLLVMProfile(
136       std::unordered_set<const BinaryFunction *> &ProfiledFunctions) = 0;
137 
138   // List of function prefix to filter out.
139   static constexpr const char *FuncPrefixsToFilter[] = {"__cxx_global_var_init",
140                                                         "__tls_init"};
141 
142   // Thresholds from profile summary to answer isHotCount/isColdCount queries.
143   uint64_t HotCountThreshold;
144 
145   uint64_t ColdCountThreshold;
146 
147   ProfiledBinary *Binary = nullptr;
148 
149   std::unique_ptr<ProfileSummary> Summary;
150 
151   // Used by SampleProfileWriter
152   SampleProfileMap ProfileMap;
153 
154   const ContextSampleCounterMap *SampleCounters = nullptr;
155 };
156 
157 class ProfileGenerator : public ProfileGeneratorBase {
158 
159 public:
ProfileGenerator(ProfiledBinary * Binary,const ContextSampleCounterMap * Counters)160   ProfileGenerator(ProfiledBinary *Binary,
161                    const ContextSampleCounterMap *Counters)
162       : ProfileGeneratorBase(Binary, Counters){};
ProfileGenerator(ProfiledBinary * Binary,const SampleProfileMap && Profiles)163   ProfileGenerator(ProfiledBinary *Binary, const SampleProfileMap &&Profiles)
164       : ProfileGeneratorBase(Binary, std::move(Profiles)){};
165   void generateProfile() override;
166 
167 private:
168   void generateLineNumBasedProfile();
169   void generateProbeBasedProfile();
170   RangeSample preprocessRangeCounter(const RangeSample &RangeCounter);
171   FunctionSamples &getTopLevelFunctionProfile(FunctionId FuncName);
172   // Helper function to get the leaf frame's FunctionProfile by traversing the
173   // inline stack and meanwhile it adds the total samples for each frame's
174   // function profile.
175   FunctionSamples &
176   getLeafProfileAndAddTotalSamples(const SampleContextFrameVector &FrameVec,
177                                    uint64_t Count);
178   void populateBodySamplesForAllFunctions(const RangeSample &RangeCounter);
179   void
180   populateBoundarySamplesForAllFunctions(const BranchSample &BranchCounters);
181   void
182   populateBodySamplesWithProbesForAllFunctions(const RangeSample &RangeCounter);
183   void populateBoundarySamplesWithProbesForAllFunctions(
184       const BranchSample &BranchCounters);
185   void postProcessProfiles();
186   void trimColdProfiles(const SampleProfileMap &Profiles,
187                         uint64_t ColdCntThreshold);
188   bool collectFunctionsFromLLVMProfile(
189       std::unordered_set<const BinaryFunction *> &ProfiledFunctions) override;
190 };
191 
192 class CSProfileGenerator : public ProfileGeneratorBase {
193 public:
CSProfileGenerator(ProfiledBinary * Binary,const ContextSampleCounterMap * Counters)194   CSProfileGenerator(ProfiledBinary *Binary,
195                      const ContextSampleCounterMap *Counters)
196       : ProfileGeneratorBase(Binary, Counters){};
CSProfileGenerator(ProfiledBinary * Binary,SampleProfileMap & Profiles)197   CSProfileGenerator(ProfiledBinary *Binary, SampleProfileMap &Profiles)
198       : ProfileGeneratorBase(Binary), ContextTracker(Profiles, nullptr){};
199   void generateProfile() override;
200 
201   // Trim the context stack at a given depth.
202   template <typename T>
203   static void trimContext(SmallVectorImpl<T> &S, int Depth = MaxContextDepth) {
204     if (Depth < 0 || static_cast<size_t>(Depth) >= S.size())
205       return;
206     std::copy(S.begin() + S.size() - static_cast<size_t>(Depth), S.end(),
207               S.begin());
208     S.resize(Depth);
209   }
210 
211   // Remove adjacent repeated context sequences up to a given sequence length,
212   // -1 means no size limit. Note that repeated sequences are identified based
213   // on the exact call site, this is finer granularity than function recursion.
214   template <typename T>
215   static void compressRecursionContext(SmallVectorImpl<T> &Context,
216                                        int32_t CSize = MaxCompressionSize) {
217     uint32_t I = 1;
218     uint32_t HS = static_cast<uint32_t>(Context.size() / 2);
219     uint32_t MaxDedupSize =
220         CSize == -1 ? HS : std::min(static_cast<uint32_t>(CSize), HS);
221     auto BeginIter = Context.begin();
222     // Use an in-place algorithm to save memory copy
223     // End indicates the end location of current iteration's data
224     uint32_t End = 0;
225     // Deduplicate from length 1 to the max possible size of a repeated
226     // sequence.
227     while (I <= MaxDedupSize) {
228       // This is a linear algorithm that deduplicates adjacent repeated
229       // sequences of size I. The deduplication detection runs on a sliding
230       // window whose size is 2*I and it keeps sliding the window to deduplicate
231       // the data inside. Once duplication is detected, deduplicate it by
232       // skipping the right half part of the window, otherwise just copy back
233       // the new one by appending them at the back of End pointer(for the next
234       // iteration).
235       //
236       // For example:
237       // Input: [a1, a2, b1, b2]
238       // (Added index to distinguish the same char, the origin is [a, a, b,
239       // b], the size of the dedup window is 2(I = 1) at the beginning)
240       //
241       // 1) The initial status is a dummy window[null, a1], then just copy the
242       // right half of the window(End = 0), then slide the window.
243       // Result: [a1], a2, b1, b2 (End points to the element right before ],
244       // after ] is the data of the previous iteration)
245       //
246       // 2) Next window is [a1, a2]. Since a1 == a2, then skip the right half of
247       // the window i.e the duplication happen. Only slide the window.
248       // Result: [a1], a2, b1, b2
249       //
250       // 3) Next window is [a2, b1], copy the right half of the window(b1 is
251       // new) to the End and slide the window.
252       // Result: [a1, b1], b1, b2
253       //
254       // 4) Next window is [b1, b2], same to 2), skip b2.
255       // Result: [a1, b1], b1, b2
256       // After resize, it will be [a, b]
257 
258       // Use pointers like below to do comparison inside the window
259       //    [a         b         c        a       b        c]
260       //     |         |         |                |        |
261       // LeftBoundary Left     Right           Left+I    Right+I
262       // A duplication found if Left < LeftBoundry.
263 
264       int32_t Right = I - 1;
265       End = I;
266       int32_t LeftBoundary = 0;
267       while (Right + I < Context.size()) {
268         // To avoids scanning a part of a sequence repeatedly, it finds out
269         // the common suffix of two hald in the window. The common suffix will
270         // serve as the common prefix of next possible pair of duplicate
271         // sequences. The non-common part will be ignored and never scanned
272         // again.
273 
274         // For example.
275         // Input: [a, b1], c1, b2, c2
276         // I = 2
277         //
278         // 1) For the window [a, b1, c1, b2], non-common-suffix for the right
279         // part is 'c1', copy it and only slide the window 1 step.
280         // Result: [a, b1, c1], b2, c2
281         //
282         // 2) Next window is [b1, c1, b2, c2], so duplication happen.
283         // Result after resize: [a, b, c]
284 
285         int32_t Left = Right;
286         while (Left >= LeftBoundary && Context[Left] == Context[Left + I]) {
287           // Find the longest suffix inside the window. When stops, Left points
288           // at the diverging point in the current sequence.
289           Left--;
290         }
291 
292         bool DuplicationFound = (Left < LeftBoundary);
293         // Don't need to recheck the data before Right
294         LeftBoundary = Right + 1;
295         if (DuplicationFound) {
296           // Duplication found, skip right half of the window.
297           Right += I;
298         } else {
299           // Copy the non-common-suffix part of the adjacent sequence.
300           std::copy(BeginIter + Right + 1, BeginIter + Left + I + 1,
301                     BeginIter + End);
302           End += Left + I - Right;
303           // Only slide the window by the size of non-common-suffix
304           Right = Left + I;
305         }
306       }
307       // Don't forget the remaining part that's not scanned.
308       std::copy(BeginIter + Right + 1, Context.end(), BeginIter + End);
309       End += Context.size() - Right - 1;
310       I++;
311       Context.resize(End);
312       MaxDedupSize = std::min(static_cast<uint32_t>(End / 2), MaxDedupSize);
313     }
314   }
315 
316 private:
317   void generateLineNumBasedProfile();
318 
319   FunctionSamples *getOrCreateFunctionSamples(ContextTrieNode *ContextNode,
320                                               bool WasLeafInlined = false);
321 
322   // Lookup or create ContextTrieNode for the context, FunctionSamples is
323   // created inside this function.
324   ContextTrieNode *getOrCreateContextNode(const SampleContextFrames Context,
325                                           bool WasLeafInlined = false);
326 
327   // For profiled only functions, on-demand compute their inline context
328   // function byte size which is used by the pre-inliner.
329   void computeSizeForProfiledFunctions();
330   // Post processing for profiles before writing out, such as mermining
331   // and trimming cold profiles, running preinliner on profiles.
332   void postProcessProfiles();
333 
334   void populateBodySamplesForFunction(FunctionSamples &FunctionProfile,
335                                       const RangeSample &RangeCounters);
336 
337   void populateBoundarySamplesForFunction(ContextTrieNode *CallerNode,
338                                           const BranchSample &BranchCounters);
339 
340   void populateInferredFunctionSamples(ContextTrieNode &Node);
341 
342   void updateFunctionSamples();
343 
344   void generateProbeBasedProfile();
345 
346   // Fill in function body samples from probes
347   void populateBodySamplesWithProbes(const RangeSample &RangeCounter,
348                                      const AddrBasedCtxKey *CtxKey);
349   // Fill in boundary samples for a call probe
350   void populateBoundarySamplesWithProbes(const BranchSample &BranchCounter,
351                                          const AddrBasedCtxKey *CtxKey);
352 
353   ContextTrieNode *
354   getContextNodeForLeafProbe(const AddrBasedCtxKey *CtxKey,
355                              const MCDecodedPseudoProbe *LeafProbe);
356 
357   // Helper function to get FunctionSamples for the leaf probe
358   FunctionSamples &
359   getFunctionProfileForLeafProbe(const AddrBasedCtxKey *CtxKey,
360                                  const MCDecodedPseudoProbe *LeafProbe);
361 
362   void convertToProfileMap(ContextTrieNode &Node,
363                            SampleContextFrameVector &Context);
364 
365   void convertToProfileMap();
366 
367   void computeSummaryAndThreshold();
368 
369   bool collectFunctionsFromLLVMProfile(
370       std::unordered_set<const BinaryFunction *> &ProfiledFunctions) override;
371 
372   void initializeMissingFrameInferrer();
373 
374   // Given an input `Context`, output `NewContext` with inferred missing tail
375   // call frames.
376   void inferMissingFrames(const SmallVectorImpl<uint64_t> &Context,
377                           SmallVectorImpl<uint64_t> &NewContext);
378 
getRootContext()379   ContextTrieNode &getRootContext() { return ContextTracker.getRootContext(); };
380 
381   // The container for holding the FunctionSamples used by context trie.
382   std::list<FunctionSamples> FSamplesList;
383 
384   // Underlying context table serves for sample profile writer.
385   std::unordered_set<SampleContextFrameVector, SampleContextFrameHash> Contexts;
386 
387   SampleContextTracker ContextTracker;
388 
389   bool IsProfileValidOnTrie = true;
390 
391 public:
392   // Deduplicate adjacent repeated context sequences up to a given sequence
393   // length. -1 means no size limit.
394   static int32_t MaxCompressionSize;
395   static int MaxContextDepth;
396 };
397 
398 } // end namespace sampleprof
399 } // end namespace llvm
400 
401 #endif
402