1 //===-- ProfileGenerator.h - Profile Generator -----------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLVM_TOOLS_LLVM_PROGEN_PROFILEGENERATOR_H 10 #define LLVM_TOOLS_LLVM_PROGEN_PROFILEGENERATOR_H 11 #include "CSPreInliner.h" 12 #include "ErrorHandling.h" 13 #include "PerfReader.h" 14 #include "ProfiledBinary.h" 15 #include "llvm/IR/DebugInfoMetadata.h" 16 #include "llvm/ProfileData/SampleProfWriter.h" 17 #include <memory> 18 #include <unordered_set> 19 20 using namespace llvm; 21 using namespace sampleprof; 22 23 namespace llvm { 24 namespace sampleprof { 25 26 using ProbeCounterMap = 27 std::unordered_map<const MCDecodedPseudoProbe *, uint64_t>; 28 29 // This base class for profile generation of sample-based PGO. We reuse all 30 // structures relating to function profiles and profile writers as seen in 31 // /ProfileData/SampleProf.h. 32 class ProfileGeneratorBase { 33 34 public: ProfileGeneratorBase(ProfiledBinary * Binary)35 ProfileGeneratorBase(ProfiledBinary *Binary) : Binary(Binary){}; ProfileGeneratorBase(ProfiledBinary * Binary,const ContextSampleCounterMap * Counters)36 ProfileGeneratorBase(ProfiledBinary *Binary, 37 const ContextSampleCounterMap *Counters) 38 : Binary(Binary), SampleCounters(Counters){}; ProfileGeneratorBase(ProfiledBinary * Binary,const SampleProfileMap && Profiles)39 ProfileGeneratorBase(ProfiledBinary *Binary, 40 const SampleProfileMap &&Profiles) 41 : Binary(Binary), ProfileMap(std::move(Profiles)){}; 42 43 virtual ~ProfileGeneratorBase() = default; 44 static std::unique_ptr<ProfileGeneratorBase> 45 create(ProfiledBinary *Binary, const ContextSampleCounterMap *Counters, 46 bool profileIsCS); 47 static std::unique_ptr<ProfileGeneratorBase> 48 create(ProfiledBinary *Binary, SampleProfileMap &ProfileMap, 49 bool profileIsCS); 50 virtual void generateProfile() = 0; 51 void write(); 52 53 static uint32_t 54 getDuplicationFactor(unsigned Discriminator, 55 bool UseFSD = ProfileGeneratorBase::UseFSDiscriminator) { 56 return UseFSD ? 1 57 : llvm::DILocation::getDuplicationFactorFromDiscriminator( 58 Discriminator); 59 } 60 61 static uint32_t 62 getBaseDiscriminator(unsigned Discriminator, 63 bool UseFSD = ProfileGeneratorBase::UseFSDiscriminator) { 64 return UseFSD ? Discriminator 65 : DILocation::getBaseDiscriminatorFromDiscriminator( 66 Discriminator, /* IsFSDiscriminator */ false); 67 } 68 69 static bool UseFSDiscriminator; 70 71 protected: 72 // Use SampleProfileWriter to serialize profile map 73 void write(std::unique_ptr<SampleProfileWriter> Writer, 74 SampleProfileMap &ProfileMap); 75 /* 76 For each region boundary point, mark if it is begin or end (or both) of 77 the region. Boundary points are inclusive. Log the sample count as well 78 so we can use it when we compute the sample count of each disjoint region 79 later. Note that there might be multiple ranges with different sample 80 count that share same begin/end point. We need to accumulate the sample 81 count for the boundary point for such case, because for the example 82 below, 83 84 |<--100-->| 85 |<------200------>| 86 A B C 87 88 sample count for disjoint region [A,B] would be 300. 89 */ 90 void findDisjointRanges(RangeSample &DisjointRanges, 91 const RangeSample &Ranges); 92 93 // Go through each address from range to extract the top frame probe by 94 // looking up in the Address2ProbeMap 95 void extractProbesFromRange(const RangeSample &RangeCounter, 96 ProbeCounterMap &ProbeCounter, 97 bool FindDisjointRanges = true); 98 99 // Helper function for updating body sample for a leaf location in 100 // FunctionProfile 101 void updateBodySamplesforFunctionProfile(FunctionSamples &FunctionProfile, 102 const SampleContextFrame &LeafLoc, 103 uint64_t Count); 104 105 void updateFunctionSamples(); 106 107 void updateTotalSamples(); 108 109 void updateCallsiteSamples(); 110 111 void filterAmbiguousProfile(SampleProfileMap &Profiles); 112 113 bool filterAmbiguousProfile(FunctionSamples &FS); 114 115 StringRef getCalleeNameForAddress(uint64_t TargetAddress); 116 117 void computeSummaryAndThreshold(SampleProfileMap &ProfileMap); 118 119 void calculateBodySamplesAndSize(const FunctionSamples &FSamples, 120 uint64_t &TotalBodySamples, 121 uint64_t &FuncBodySize); 122 123 double calculateDensity(const SampleProfileMap &Profiles); 124 125 void calculateAndShowDensity(const SampleProfileMap &Profiles); 126 127 void showDensitySuggestion(double Density); 128 129 void collectProfiledFunctions(); 130 131 bool collectFunctionsFromRawProfile( 132 std::unordered_set<const BinaryFunction *> &ProfiledFunctions); 133 134 // Collect profiled Functions for llvm sample profile input. 135 virtual bool collectFunctionsFromLLVMProfile( 136 std::unordered_set<const BinaryFunction *> &ProfiledFunctions) = 0; 137 138 // List of function prefix to filter out. 139 static constexpr const char *FuncPrefixsToFilter[] = {"__cxx_global_var_init", 140 "__tls_init"}; 141 142 // Thresholds from profile summary to answer isHotCount/isColdCount queries. 143 uint64_t HotCountThreshold; 144 145 uint64_t ColdCountThreshold; 146 147 ProfiledBinary *Binary = nullptr; 148 149 std::unique_ptr<ProfileSummary> Summary; 150 151 // Used by SampleProfileWriter 152 SampleProfileMap ProfileMap; 153 154 const ContextSampleCounterMap *SampleCounters = nullptr; 155 }; 156 157 class ProfileGenerator : public ProfileGeneratorBase { 158 159 public: ProfileGenerator(ProfiledBinary * Binary,const ContextSampleCounterMap * Counters)160 ProfileGenerator(ProfiledBinary *Binary, 161 const ContextSampleCounterMap *Counters) 162 : ProfileGeneratorBase(Binary, Counters){}; ProfileGenerator(ProfiledBinary * Binary,const SampleProfileMap && Profiles)163 ProfileGenerator(ProfiledBinary *Binary, const SampleProfileMap &&Profiles) 164 : ProfileGeneratorBase(Binary, std::move(Profiles)){}; 165 void generateProfile() override; 166 167 private: 168 void generateLineNumBasedProfile(); 169 void generateProbeBasedProfile(); 170 RangeSample preprocessRangeCounter(const RangeSample &RangeCounter); 171 FunctionSamples &getTopLevelFunctionProfile(FunctionId FuncName); 172 // Helper function to get the leaf frame's FunctionProfile by traversing the 173 // inline stack and meanwhile it adds the total samples for each frame's 174 // function profile. 175 FunctionSamples & 176 getLeafProfileAndAddTotalSamples(const SampleContextFrameVector &FrameVec, 177 uint64_t Count); 178 void populateBodySamplesForAllFunctions(const RangeSample &RangeCounter); 179 void 180 populateBoundarySamplesForAllFunctions(const BranchSample &BranchCounters); 181 void 182 populateBodySamplesWithProbesForAllFunctions(const RangeSample &RangeCounter); 183 void populateBoundarySamplesWithProbesForAllFunctions( 184 const BranchSample &BranchCounters); 185 void postProcessProfiles(); 186 void trimColdProfiles(const SampleProfileMap &Profiles, 187 uint64_t ColdCntThreshold); 188 bool collectFunctionsFromLLVMProfile( 189 std::unordered_set<const BinaryFunction *> &ProfiledFunctions) override; 190 }; 191 192 class CSProfileGenerator : public ProfileGeneratorBase { 193 public: CSProfileGenerator(ProfiledBinary * Binary,const ContextSampleCounterMap * Counters)194 CSProfileGenerator(ProfiledBinary *Binary, 195 const ContextSampleCounterMap *Counters) 196 : ProfileGeneratorBase(Binary, Counters){}; CSProfileGenerator(ProfiledBinary * Binary,SampleProfileMap & Profiles)197 CSProfileGenerator(ProfiledBinary *Binary, SampleProfileMap &Profiles) 198 : ProfileGeneratorBase(Binary), ContextTracker(Profiles, nullptr){}; 199 void generateProfile() override; 200 201 // Trim the context stack at a given depth. 202 template <typename T> 203 static void trimContext(SmallVectorImpl<T> &S, int Depth = MaxContextDepth) { 204 if (Depth < 0 || static_cast<size_t>(Depth) >= S.size()) 205 return; 206 std::copy(S.begin() + S.size() - static_cast<size_t>(Depth), S.end(), 207 S.begin()); 208 S.resize(Depth); 209 } 210 211 // Remove adjacent repeated context sequences up to a given sequence length, 212 // -1 means no size limit. Note that repeated sequences are identified based 213 // on the exact call site, this is finer granularity than function recursion. 214 template <typename T> 215 static void compressRecursionContext(SmallVectorImpl<T> &Context, 216 int32_t CSize = MaxCompressionSize) { 217 uint32_t I = 1; 218 uint32_t HS = static_cast<uint32_t>(Context.size() / 2); 219 uint32_t MaxDedupSize = 220 CSize == -1 ? HS : std::min(static_cast<uint32_t>(CSize), HS); 221 auto BeginIter = Context.begin(); 222 // Use an in-place algorithm to save memory copy 223 // End indicates the end location of current iteration's data 224 uint32_t End = 0; 225 // Deduplicate from length 1 to the max possible size of a repeated 226 // sequence. 227 while (I <= MaxDedupSize) { 228 // This is a linear algorithm that deduplicates adjacent repeated 229 // sequences of size I. The deduplication detection runs on a sliding 230 // window whose size is 2*I and it keeps sliding the window to deduplicate 231 // the data inside. Once duplication is detected, deduplicate it by 232 // skipping the right half part of the window, otherwise just copy back 233 // the new one by appending them at the back of End pointer(for the next 234 // iteration). 235 // 236 // For example: 237 // Input: [a1, a2, b1, b2] 238 // (Added index to distinguish the same char, the origin is [a, a, b, 239 // b], the size of the dedup window is 2(I = 1) at the beginning) 240 // 241 // 1) The initial status is a dummy window[null, a1], then just copy the 242 // right half of the window(End = 0), then slide the window. 243 // Result: [a1], a2, b1, b2 (End points to the element right before ], 244 // after ] is the data of the previous iteration) 245 // 246 // 2) Next window is [a1, a2]. Since a1 == a2, then skip the right half of 247 // the window i.e the duplication happen. Only slide the window. 248 // Result: [a1], a2, b1, b2 249 // 250 // 3) Next window is [a2, b1], copy the right half of the window(b1 is 251 // new) to the End and slide the window. 252 // Result: [a1, b1], b1, b2 253 // 254 // 4) Next window is [b1, b2], same to 2), skip b2. 255 // Result: [a1, b1], b1, b2 256 // After resize, it will be [a, b] 257 258 // Use pointers like below to do comparison inside the window 259 // [a b c a b c] 260 // | | | | | 261 // LeftBoundary Left Right Left+I Right+I 262 // A duplication found if Left < LeftBoundry. 263 264 int32_t Right = I - 1; 265 End = I; 266 int32_t LeftBoundary = 0; 267 while (Right + I < Context.size()) { 268 // To avoids scanning a part of a sequence repeatedly, it finds out 269 // the common suffix of two hald in the window. The common suffix will 270 // serve as the common prefix of next possible pair of duplicate 271 // sequences. The non-common part will be ignored and never scanned 272 // again. 273 274 // For example. 275 // Input: [a, b1], c1, b2, c2 276 // I = 2 277 // 278 // 1) For the window [a, b1, c1, b2], non-common-suffix for the right 279 // part is 'c1', copy it and only slide the window 1 step. 280 // Result: [a, b1, c1], b2, c2 281 // 282 // 2) Next window is [b1, c1, b2, c2], so duplication happen. 283 // Result after resize: [a, b, c] 284 285 int32_t Left = Right; 286 while (Left >= LeftBoundary && Context[Left] == Context[Left + I]) { 287 // Find the longest suffix inside the window. When stops, Left points 288 // at the diverging point in the current sequence. 289 Left--; 290 } 291 292 bool DuplicationFound = (Left < LeftBoundary); 293 // Don't need to recheck the data before Right 294 LeftBoundary = Right + 1; 295 if (DuplicationFound) { 296 // Duplication found, skip right half of the window. 297 Right += I; 298 } else { 299 // Copy the non-common-suffix part of the adjacent sequence. 300 std::copy(BeginIter + Right + 1, BeginIter + Left + I + 1, 301 BeginIter + End); 302 End += Left + I - Right; 303 // Only slide the window by the size of non-common-suffix 304 Right = Left + I; 305 } 306 } 307 // Don't forget the remaining part that's not scanned. 308 std::copy(BeginIter + Right + 1, Context.end(), BeginIter + End); 309 End += Context.size() - Right - 1; 310 I++; 311 Context.resize(End); 312 MaxDedupSize = std::min(static_cast<uint32_t>(End / 2), MaxDedupSize); 313 } 314 } 315 316 private: 317 void generateLineNumBasedProfile(); 318 319 FunctionSamples *getOrCreateFunctionSamples(ContextTrieNode *ContextNode, 320 bool WasLeafInlined = false); 321 322 // Lookup or create ContextTrieNode for the context, FunctionSamples is 323 // created inside this function. 324 ContextTrieNode *getOrCreateContextNode(const SampleContextFrames Context, 325 bool WasLeafInlined = false); 326 327 // For profiled only functions, on-demand compute their inline context 328 // function byte size which is used by the pre-inliner. 329 void computeSizeForProfiledFunctions(); 330 // Post processing for profiles before writing out, such as mermining 331 // and trimming cold profiles, running preinliner on profiles. 332 void postProcessProfiles(); 333 334 void populateBodySamplesForFunction(FunctionSamples &FunctionProfile, 335 const RangeSample &RangeCounters); 336 337 void populateBoundarySamplesForFunction(ContextTrieNode *CallerNode, 338 const BranchSample &BranchCounters); 339 340 void populateInferredFunctionSamples(ContextTrieNode &Node); 341 342 void updateFunctionSamples(); 343 344 void generateProbeBasedProfile(); 345 346 // Fill in function body samples from probes 347 void populateBodySamplesWithProbes(const RangeSample &RangeCounter, 348 const AddrBasedCtxKey *CtxKey); 349 // Fill in boundary samples for a call probe 350 void populateBoundarySamplesWithProbes(const BranchSample &BranchCounter, 351 const AddrBasedCtxKey *CtxKey); 352 353 ContextTrieNode * 354 getContextNodeForLeafProbe(const AddrBasedCtxKey *CtxKey, 355 const MCDecodedPseudoProbe *LeafProbe); 356 357 // Helper function to get FunctionSamples for the leaf probe 358 FunctionSamples & 359 getFunctionProfileForLeafProbe(const AddrBasedCtxKey *CtxKey, 360 const MCDecodedPseudoProbe *LeafProbe); 361 362 void convertToProfileMap(ContextTrieNode &Node, 363 SampleContextFrameVector &Context); 364 365 void convertToProfileMap(); 366 367 void computeSummaryAndThreshold(); 368 369 bool collectFunctionsFromLLVMProfile( 370 std::unordered_set<const BinaryFunction *> &ProfiledFunctions) override; 371 372 void initializeMissingFrameInferrer(); 373 374 // Given an input `Context`, output `NewContext` with inferred missing tail 375 // call frames. 376 void inferMissingFrames(const SmallVectorImpl<uint64_t> &Context, 377 SmallVectorImpl<uint64_t> &NewContext); 378 getRootContext()379 ContextTrieNode &getRootContext() { return ContextTracker.getRootContext(); }; 380 381 // The container for holding the FunctionSamples used by context trie. 382 std::list<FunctionSamples> FSamplesList; 383 384 // Underlying context table serves for sample profile writer. 385 std::unordered_set<SampleContextFrameVector, SampleContextFrameHash> Contexts; 386 387 SampleContextTracker ContextTracker; 388 389 bool IsProfileValidOnTrie = true; 390 391 public: 392 // Deduplicate adjacent repeated context sequences up to a given sequence 393 // length. -1 means no size limit. 394 static int32_t MaxCompressionSize; 395 static int MaxContextDepth; 396 }; 397 398 } // end namespace sampleprof 399 } // end namespace llvm 400 401 #endif 402