xref: /freebsd-src/contrib/llvm-project/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp (revision cb14a3fe5122c879eae1fb480ed7ce82a699ddb6)
1 //===-- BasicBlockSectionsProfileReader.cpp -------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Implementation of the basic block sections profile reader pass. It parses
10 // and stores the basic block sections profile file (which is specified via the
11 // `-basic-block-sections` flag).
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/CodeGen/BasicBlockSectionsProfileReader.h"
16 #include "llvm/ADT/DenseSet.h"
17 #include "llvm/ADT/SmallSet.h"
18 #include "llvm/ADT/SmallString.h"
19 #include "llvm/ADT/SmallVector.h"
20 #include "llvm/ADT/StringMap.h"
21 #include "llvm/ADT/StringRef.h"
22 #include "llvm/IR/DebugInfoMetadata.h"
23 #include "llvm/Pass.h"
24 #include "llvm/Support/Error.h"
25 #include "llvm/Support/ErrorHandling.h"
26 #include "llvm/Support/LineIterator.h"
27 #include "llvm/Support/MemoryBuffer.h"
28 #include "llvm/Support/Path.h"
29 #include <llvm/ADT/STLExtras.h>
30 
31 using namespace llvm;
32 
33 char BasicBlockSectionsProfileReader::ID = 0;
34 INITIALIZE_PASS(BasicBlockSectionsProfileReader, "bbsections-profile-reader",
35                 "Reads and parses a basic block sections profile.", false,
36                 false)
37 
38 Expected<UniqueBBID>
39 BasicBlockSectionsProfileReader::parseUniqueBBID(StringRef S) const {
40   SmallVector<StringRef, 2> Parts;
41   S.split(Parts, '.');
42   if (Parts.size() > 2)
43     return createProfileParseError(Twine("unable to parse basic block id: '") +
44                                    S + "'");
45   unsigned long long BaseBBID;
46   if (getAsUnsignedInteger(Parts[0], 10, BaseBBID))
47     return createProfileParseError(
48         Twine("unable to parse BB id: '" + Parts[0]) +
49         "': unsigned integer expected");
50   unsigned long long CloneID = 0;
51   if (Parts.size() > 1 && getAsUnsignedInteger(Parts[1], 10, CloneID))
52     return createProfileParseError(Twine("unable to parse clone id: '") +
53                                    Parts[1] + "': unsigned integer expected");
54   return UniqueBBID{static_cast<unsigned>(BaseBBID),
55                     static_cast<unsigned>(CloneID)};
56 }
57 
58 bool BasicBlockSectionsProfileReader::isFunctionHot(StringRef FuncName) const {
59   return getClusterInfoForFunction(FuncName).first;
60 }
61 
62 std::pair<bool, SmallVector<BBClusterInfo>>
63 BasicBlockSectionsProfileReader::getClusterInfoForFunction(
64     StringRef FuncName) const {
65   auto R = ProgramPathAndClusterInfo.find(getAliasName(FuncName));
66   return R != ProgramPathAndClusterInfo.end()
67              ? std::pair(true, R->second.ClusterInfo)
68              : std::pair(false, SmallVector<BBClusterInfo>());
69 }
70 
71 SmallVector<SmallVector<unsigned>>
72 BasicBlockSectionsProfileReader::getClonePathsForFunction(
73     StringRef FuncName) const {
74   return ProgramPathAndClusterInfo.lookup(getAliasName(FuncName)).ClonePaths;
75 }
76 
77 // Reads the version 1 basic block sections profile. Profile for each function
78 // is encoded as follows:
79 //   m <module_name>
80 //   f <function_name_1> <function_name_2> ...
81 //   c <bb_id_1> <bb_id_2> <bb_id_3>
82 //   c <bb_id_4> <bb_id_5>
83 //   ...
84 // Module name specifier (starting with 'm') is optional and allows
85 // distinguishing profile for internal-linkage functions with the same name. If
86 // not specified, it will apply to any function with the same name. Function
87 // name specifier (starting with 'f') can specify multiple function name
88 // aliases. Basic block clusters are specified by 'c' and specify the cluster of
89 // basic blocks, and the internal order in which they must be placed in the same
90 // section.
91 // This profile can also specify cloning paths which instruct the compiler to
92 // clone basic blocks along a path. The cloned blocks are then specified in the
93 // cluster information.
94 // The following profile lists two cloning paths (starting with 'p') for
95 // function bar and places the total 9 blocks within two clusters. The first two
96 // blocks of a cloning path specify the edge along which the path is cloned. For
97 // instance, path 1 (1 -> 3 -> 4) instructs that 3 and 4 must be cloned along
98 // the edge 1->3. Within the given clusters, each cloned block is identified by
99 // "<original block id>.<clone id>". For instance, 3.1 represents the first
100 // clone of block 3. Original blocks are specified just with their block ids. A
101 // block cloned multiple times appears with distinct clone ids. The CFG for bar
102 // is shown below before and after cloning with its final clusters labeled.
103 //
104 // f main
105 // f bar
106 // p 1 3 4           # cloning path 1
107 // p 4 2             # cloning path 2
108 // c 1 3.1 4.1 6     # basic block cluster 1
109 // c 0 2 3 4 2.1 5   # basic block cluster 2
110 // ****************************************************************************
111 // function bar before and after cloning with basic block clusters shown.
112 // ****************************************************************************
113 //                                ....      ..............
114 //      0 -------+                : 0 :---->: 1 ---> 3.1 :
115 //      |        |                : | :     :........ |  :
116 //      v        v                : v :             : v  :
117 // +--> 2 --> 5  1   ~~~~~~>  +---: 2 :             : 4.1: clsuter 1
118 // |    |        |            |   : | :             : |  :
119 // |    v        |            |   : v .......       : v  :
120 // |    3 <------+            |   : 3 <--+  :       : 6  :
121 // |    |                     |   : |    |  :       :....:
122 // |    v                     |   : v    |  :
123 // +--- 4 ---> 6              |   : 4    |  :
124 //                            |   : |    |  :
125 //                            |   : v    |  :
126 //                            |   :2.1---+  : cluster 2
127 //                            |   : | ......:
128 //                            |   : v :
129 //                            +-->: 5 :
130 //                                ....
131 // ****************************************************************************
132 Error BasicBlockSectionsProfileReader::ReadV1Profile() {
133   auto FI = ProgramPathAndClusterInfo.end();
134 
135   // Current cluster ID corresponding to this function.
136   unsigned CurrentCluster = 0;
137   // Current position in the current cluster.
138   unsigned CurrentPosition = 0;
139 
140   // Temporary set to ensure every basic block ID appears once in the clusters
141   // of a function.
142   DenseSet<UniqueBBID> FuncBBIDs;
143 
144   // Debug-info-based module filename for the current function. Empty string
145   // means no filename.
146   StringRef DIFilename;
147 
148   for (; !LineIt.is_at_eof(); ++LineIt) {
149     StringRef S(*LineIt);
150     char Specifier = S[0];
151     S = S.drop_front().trim();
152     SmallVector<StringRef, 4> Values;
153     S.split(Values, ' ');
154     switch (Specifier) {
155     case '@':
156       continue;
157     case 'm': // Module name speicifer.
158       if (Values.size() != 1) {
159         return createProfileParseError(Twine("invalid module name value: '") +
160                                        S + "'");
161       }
162       DIFilename = sys::path::remove_leading_dotslash(Values[0]);
163       continue;
164     case 'f': { // Function names specifier.
165       bool FunctionFound = any_of(Values, [&](StringRef Alias) {
166         auto It = FunctionNameToDIFilename.find(Alias);
167         // No match if this function name is not found in this module.
168         if (It == FunctionNameToDIFilename.end())
169           return false;
170         // Return a match if debug-info-filename is not specified. Otherwise,
171         // check for equality.
172         return DIFilename.empty() || It->second.equals(DIFilename);
173       });
174       if (!FunctionFound) {
175         // Skip the following profile by setting the profile iterator (FI) to
176         // the past-the-end element.
177         FI = ProgramPathAndClusterInfo.end();
178         DIFilename = "";
179         continue;
180       }
181       for (size_t i = 1; i < Values.size(); ++i)
182         FuncAliasMap.try_emplace(Values[i], Values.front());
183 
184       // Prepare for parsing clusters of this function name.
185       // Start a new cluster map for this function name.
186       auto R = ProgramPathAndClusterInfo.try_emplace(Values.front());
187       // Report error when multiple profiles have been specified for the same
188       // function.
189       if (!R.second)
190         return createProfileParseError("duplicate profile for function '" +
191                                        Values.front() + "'");
192       FI = R.first;
193       CurrentCluster = 0;
194       FuncBBIDs.clear();
195       // We won't need DIFilename anymore. Clean it up to avoid its application
196       // on the next function.
197       DIFilename = "";
198       continue;
199     }
200     case 'c': // Basic block cluster specifier.
201       // Skip the profile when we the profile iterator (FI) refers to the
202       // past-the-end element.
203       if (FI == ProgramPathAndClusterInfo.end())
204         continue;
205       // Reset current cluster position.
206       CurrentPosition = 0;
207       for (auto BasicBlockIDStr : Values) {
208         auto BasicBlockID = parseUniqueBBID(BasicBlockIDStr);
209         if (!BasicBlockID)
210           return BasicBlockID.takeError();
211         if (!FuncBBIDs.insert(*BasicBlockID).second)
212           return createProfileParseError(
213               Twine("duplicate basic block id found '") + BasicBlockIDStr +
214               "'");
215 
216         if (!BasicBlockID->BaseID && CurrentPosition)
217           return createProfileParseError(
218               "entry BB (0) does not begin a cluster.");
219 
220         FI->second.ClusterInfo.emplace_back(BBClusterInfo{
221             *std::move(BasicBlockID), CurrentCluster, CurrentPosition++});
222       }
223       CurrentCluster++;
224       continue;
225     case 'p': { // Basic block cloning path specifier.
226       // Skip the profile when we the profile iterator (FI) refers to the
227       // past-the-end element.
228       if (FI == ProgramPathAndClusterInfo.end())
229         continue;
230       SmallSet<unsigned, 5> BBsInPath;
231       FI->second.ClonePaths.push_back({});
232       for (size_t I = 0; I < Values.size(); ++I) {
233         auto BaseBBIDStr = Values[I];
234         unsigned long long BaseBBID = 0;
235         if (getAsUnsignedInteger(BaseBBIDStr, 10, BaseBBID))
236           return createProfileParseError(Twine("unsigned integer expected: '") +
237                                          BaseBBIDStr + "'");
238         if (I != 0 && !BBsInPath.insert(BaseBBID).second)
239           return createProfileParseError(
240               Twine("duplicate cloned block in path: '") + BaseBBIDStr + "'");
241         FI->second.ClonePaths.back().push_back(BaseBBID);
242       }
243       continue;
244     }
245     default:
246       return createProfileParseError(Twine("invalid specifier: '") +
247                                      Twine(Specifier) + "'");
248     }
249     llvm_unreachable("should not break from this switch statement");
250   }
251   return Error::success();
252 }
253 
254 Error BasicBlockSectionsProfileReader::ReadV0Profile() {
255   auto FI = ProgramPathAndClusterInfo.end();
256   // Current cluster ID corresponding to this function.
257   unsigned CurrentCluster = 0;
258   // Current position in the current cluster.
259   unsigned CurrentPosition = 0;
260 
261   // Temporary set to ensure every basic block ID appears once in the clusters
262   // of a function.
263   SmallSet<unsigned, 4> FuncBBIDs;
264 
265   for (; !LineIt.is_at_eof(); ++LineIt) {
266     StringRef S(*LineIt);
267     if (S[0] == '@')
268       continue;
269     // Check for the leading "!"
270     if (!S.consume_front("!") || S.empty())
271       break;
272     // Check for second "!" which indicates a cluster of basic blocks.
273     if (S.consume_front("!")) {
274       // Skip the profile when we the profile iterator (FI) refers to the
275       // past-the-end element.
276       if (FI == ProgramPathAndClusterInfo.end())
277         continue;
278       SmallVector<StringRef, 4> BBIDs;
279       S.split(BBIDs, ' ');
280       // Reset current cluster position.
281       CurrentPosition = 0;
282       for (auto BBIDStr : BBIDs) {
283         unsigned long long BBID;
284         if (getAsUnsignedInteger(BBIDStr, 10, BBID))
285           return createProfileParseError(Twine("unsigned integer expected: '") +
286                                          BBIDStr + "'");
287         if (!FuncBBIDs.insert(BBID).second)
288           return createProfileParseError(
289               Twine("duplicate basic block id found '") + BBIDStr + "'");
290         if (BBID == 0 && CurrentPosition)
291           return createProfileParseError(
292               "entry BB (0) does not begin a cluster");
293 
294         FI->second.ClusterInfo.emplace_back(
295             BBClusterInfo({{static_cast<unsigned>(BBID), 0},
296                            CurrentCluster,
297                            CurrentPosition++}));
298       }
299       CurrentCluster++;
300     } else {
301       // This is a function name specifier. It may include a debug info filename
302       // specifier starting with `M=`.
303       auto [AliasesStr, DIFilenameStr] = S.split(' ');
304       SmallString<128> DIFilename;
305       if (DIFilenameStr.starts_with("M=")) {
306         DIFilename =
307             sys::path::remove_leading_dotslash(DIFilenameStr.substr(2));
308         if (DIFilename.empty())
309           return createProfileParseError("empty module name specifier");
310       } else if (!DIFilenameStr.empty()) {
311         return createProfileParseError("unknown string found: '" +
312                                        DIFilenameStr + "'");
313       }
314       // Function aliases are separated using '/'. We use the first function
315       // name for the cluster info mapping and delegate all other aliases to
316       // this one.
317       SmallVector<StringRef, 4> Aliases;
318       AliasesStr.split(Aliases, '/');
319       bool FunctionFound = any_of(Aliases, [&](StringRef Alias) {
320         auto It = FunctionNameToDIFilename.find(Alias);
321         // No match if this function name is not found in this module.
322         if (It == FunctionNameToDIFilename.end())
323           return false;
324         // Return a match if debug-info-filename is not specified. Otherwise,
325         // check for equality.
326         return DIFilename.empty() || It->second.equals(DIFilename);
327       });
328       if (!FunctionFound) {
329         // Skip the following profile by setting the profile iterator (FI) to
330         // the past-the-end element.
331         FI = ProgramPathAndClusterInfo.end();
332         continue;
333       }
334       for (size_t i = 1; i < Aliases.size(); ++i)
335         FuncAliasMap.try_emplace(Aliases[i], Aliases.front());
336 
337       // Prepare for parsing clusters of this function name.
338       // Start a new cluster map for this function name.
339       auto R = ProgramPathAndClusterInfo.try_emplace(Aliases.front());
340       // Report error when multiple profiles have been specified for the same
341       // function.
342       if (!R.second)
343         return createProfileParseError("duplicate profile for function '" +
344                                        Aliases.front() + "'");
345       FI = R.first;
346       CurrentCluster = 0;
347       FuncBBIDs.clear();
348     }
349   }
350   return Error::success();
351 }
352 
353 // Basic Block Sections can be enabled for a subset of machine basic blocks.
354 // This is done by passing a file containing names of functions for which basic
355 // block sections are desired. Additionally, machine basic block ids of the
356 // functions can also be specified for a finer granularity. Moreover, a cluster
357 // of basic blocks could be assigned to the same section.
358 // Optionally, a debug-info filename can be specified for each function to allow
359 // distinguishing internal-linkage functions of the same name.
360 // A file with basic block sections for all of function main and three blocks
361 // for function foo (of which 1 and 2 are placed in a cluster) looks like this:
362 // (Profile for function foo is only loaded when its debug-info filename
363 // matches 'path/to/foo_file.cc').
364 // ----------------------------
365 // list.txt:
366 // !main
367 // !foo M=path/to/foo_file.cc
368 // !!1 2
369 // !!4
370 Error BasicBlockSectionsProfileReader::ReadProfile() {
371   assert(MBuf);
372 
373   unsigned long long Version = 0;
374   StringRef FirstLine(*LineIt);
375   if (FirstLine.consume_front("v")) {
376     if (getAsUnsignedInteger(FirstLine, 10, Version)) {
377       return createProfileParseError(Twine("version number expected: '") +
378                                      FirstLine + "'");
379     }
380     if (Version > 1) {
381       return createProfileParseError(Twine("invalid profile version: ") +
382                                      Twine(Version));
383     }
384     ++LineIt;
385   }
386 
387   switch (Version) {
388   case 0:
389     // TODO: Deprecate V0 once V1 is fully integrated downstream.
390     return ReadV0Profile();
391   case 1:
392     return ReadV1Profile();
393   default:
394     llvm_unreachable("Invalid profile version.");
395   }
396 }
397 
398 bool BasicBlockSectionsProfileReader::doInitialization(Module &M) {
399   if (!MBuf)
400     return false;
401   // Get the function name to debug info filename mapping.
402   FunctionNameToDIFilename.clear();
403   for (const Function &F : M) {
404     SmallString<128> DIFilename;
405     if (F.isDeclaration())
406       continue;
407     DISubprogram *Subprogram = F.getSubprogram();
408     if (Subprogram) {
409       llvm::DICompileUnit *CU = Subprogram->getUnit();
410       if (CU)
411         DIFilename = sys::path::remove_leading_dotslash(CU->getFilename());
412     }
413     [[maybe_unused]] bool inserted =
414         FunctionNameToDIFilename.try_emplace(F.getName(), DIFilename).second;
415     assert(inserted);
416   }
417   if (auto Err = ReadProfile())
418     report_fatal_error(std::move(Err));
419   return false;
420 }
421 
422 ImmutablePass *
423 llvm::createBasicBlockSectionsProfileReaderPass(const MemoryBuffer *Buf) {
424   return new BasicBlockSectionsProfileReader(Buf);
425 }
426