xref: /llvm-project/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp (revision 28b912687900bc0a67cd61c374fce296b09963c4)
1 //===-- BasicBlockSectionsProfileReader.cpp -------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Implementation of the basic block sections profile reader pass. It parses
10 // and stores the basic block sections profile file (which is specified via the
11 // `-basic-block-sections` flag).
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/CodeGen/BasicBlockSectionsProfileReader.h"
16 #include "llvm/ADT/DenseSet.h"
17 #include "llvm/ADT/SmallSet.h"
18 #include "llvm/ADT/SmallString.h"
19 #include "llvm/ADT/SmallVector.h"
20 #include "llvm/ADT/StringMap.h"
21 #include "llvm/ADT/StringRef.h"
22 #include "llvm/IR/DebugInfoMetadata.h"
23 #include "llvm/Pass.h"
24 #include "llvm/Support/Error.h"
25 #include "llvm/Support/ErrorHandling.h"
26 #include "llvm/Support/LineIterator.h"
27 #include "llvm/Support/MemoryBuffer.h"
28 #include "llvm/Support/Path.h"
29 #include <llvm/ADT/STLExtras.h>
30 
31 using namespace llvm;
32 
33 char BasicBlockSectionsProfileReader::ID = 0;
34 INITIALIZE_PASS(BasicBlockSectionsProfileReader, "bbsections-profile-reader",
35                 "Reads and parses a basic block sections profile.", false,
36                 false)
37 
38 Expected<ProfileBBID>
39 BasicBlockSectionsProfileReader::parseProfileBBID(StringRef S) const {
40   SmallVector<StringRef, 2> Parts;
41   S.split(Parts, '.');
42   if (Parts.size() > 2)
43     return createProfileParseError(Twine("unable to parse basic block id: '") +
44                                    S + "'");
45   unsigned long long BBID;
46   if (getAsUnsignedInteger(Parts[0], 10, BBID))
47     return createProfileParseError(
48         Twine("unable to parse BB id: '" + Parts[0]) +
49         "': unsigned integer expected");
50   unsigned long long CloneID = 0;
51   if (Parts.size() > 1 && getAsUnsignedInteger(Parts[1], 10, CloneID))
52     return createProfileParseError(Twine("unable to parse clone id: '") +
53                                    Parts[1] + "': unsigned integer expected");
54   return ProfileBBID{static_cast<unsigned>(BBID),
55                      static_cast<unsigned>(CloneID)};
56 }
57 
58 bool BasicBlockSectionsProfileReader::isFunctionHot(StringRef FuncName) const {
59   return getPathAndClusterInfoForFunction(FuncName).first;
60 }
61 
62 std::pair<bool, FunctionPathAndClusterInfo>
63 BasicBlockSectionsProfileReader::getPathAndClusterInfoForFunction(
64     StringRef FuncName) const {
65   auto R = ProgramPathAndClusterInfo.find(getAliasName(FuncName));
66   return R != ProgramPathAndClusterInfo.end()
67              ? std::pair(true, R->second)
68              : std::pair(false, FunctionPathAndClusterInfo());
69 }
70 
71 // Reads the version 1 basic block sections profile. Profile for each function
72 // is encoded as follows:
73 //   m <module_name>
74 //   f <function_name_1> <function_name_2> ...
75 //   c <bb_id_1> <bb_id_2> <bb_id_3>
76 //   c <bb_id_4> <bb_id_5>
77 //   ...
78 // Module name specifier (starting with 'm') is optional and allows
79 // distinguishing profile for internal-linkage functions with the same name. If
80 // not specified, it will apply to any function with the same name. Function
81 // name specifier (starting with 'f') can specify multiple function name
82 // aliases. Basic block clusters are specified by 'c' and specify the cluster of
83 // basic blocks, and the internal order in which they must be placed in the same
84 // section.
85 // This profile can also specify cloning paths which instruct the compiler to
86 // clone basic blocks along a path. The cloned blocks are then specified in the
87 // cluster information.
88 // The following profile lists two cloning paths (starting with 'p') for
89 // function bar and places the total 9 blocks within two clusters. The first two
90 // blocks of a cloning path specify the edge along which the path is cloned. For
91 // instance, path 1 (1 -> 3 -> 4) instructs that 3 and 4 must be cloned along
92 // the edge 1->3. Within the given clusters, each cloned block is identified by
93 // "<original block id>.<clone id>". For instance, 3.1 represents the first
94 // clone of block 3. Original blocks are specified just with their block ids. A
95 // block cloned multiple times appears with distinct clone ids. The CFG for bar
96 // is shown below before and after cloning with its final clusters labeled.
97 //
98 // f main
99 // f bar
100 // p 1 3 4           # cloning path 1
101 // p 4 2             # cloning path 2
102 // c 1 3.1 4.1 6     # basic block cluster 1
103 // c 0 2 3 4 2.1 5   # basic block cluster 2
104 // ****************************************************************************
105 // function bar before and after cloning with basic block clusters shown.
106 // ****************************************************************************
107 //                                ....      ..............
108 //      0 -------+                : 0 :---->: 1 ---> 3.1 :
109 //      |        |                : | :     :........ |  :
110 //      v        v                : v :             : v  :
111 // +--> 2 --> 5  1   ~~~~~~>  +---: 2 :             : 4.1: clsuter 1
112 // |    |        |            |   : | :             : |  :
113 // |    v        |            |   : v .......       : v  :
114 // |    3 <------+            |   : 3 <--+  :       : 6  :
115 // |    |                     |   : |    |  :       :....:
116 // |    v                     |   : v    |  :
117 // +--- 4 ---> 6              |   : 4    |  :
118 //                            |   : |    |  :
119 //                            |   : v    |  :
120 //                            |   :2.1---+  : cluster 2
121 //                            |   : | ......:
122 //                            |   : v :
123 //                            +-->: 5 :
124 //                                ....
125 // ****************************************************************************
126 Error BasicBlockSectionsProfileReader::ReadV1Profile() {
127   auto FI = ProgramPathAndClusterInfo.end();
128 
129   // Current cluster ID corresponding to this function.
130   unsigned CurrentCluster = 0;
131   // Current position in the current cluster.
132   unsigned CurrentPosition = 0;
133 
134   // Temporary set to ensure every basic block ID appears once in the clusters
135   // of a function.
136   DenseSet<ProfileBBID> FuncBBIDs;
137 
138   // Debug-info-based module filename for the current function. Empty string
139   // means no filename.
140   StringRef DIFilename;
141 
142   for (; !LineIt.is_at_eof(); ++LineIt) {
143     StringRef S(*LineIt);
144     char Specifier = S[0];
145     S = S.drop_front().trim();
146     SmallVector<StringRef, 4> Values;
147     S.split(Values, ' ');
148     switch (Specifier) {
149     case '@':
150       continue;
151     case 'm': // Module name speicifer.
152       if (Values.size() != 1) {
153         return createProfileParseError(Twine("invalid module name value: '") +
154                                        S + "'");
155       }
156       DIFilename = sys::path::remove_leading_dotslash(Values[0]);
157       continue;
158     case 'f': { // Function names specifier.
159       bool FunctionFound = any_of(Values, [&](StringRef Alias) {
160         auto It = FunctionNameToDIFilename.find(Alias);
161         // No match if this function name is not found in this module.
162         if (It == FunctionNameToDIFilename.end())
163           return false;
164         // Return a match if debug-info-filename is not specified. Otherwise,
165         // check for equality.
166         return DIFilename.empty() || It->second.equals(DIFilename);
167       });
168       if (!FunctionFound) {
169         // Skip the following profile by setting the profile iterator (FI) to
170         // the past-the-end element.
171         FI = ProgramPathAndClusterInfo.end();
172         DIFilename = "";
173         continue;
174       }
175       for (size_t i = 1; i < Values.size(); ++i)
176         FuncAliasMap.try_emplace(Values[i], Values.front());
177 
178       // Prepare for parsing clusters of this function name.
179       // Start a new cluster map for this function name.
180       auto R = ProgramPathAndClusterInfo.try_emplace(Values.front());
181       // Report error when multiple profiles have been specified for the same
182       // function.
183       if (!R.second)
184         return createProfileParseError("duplicate profile for function '" +
185                                        Values.front() + "'");
186       FI = R.first;
187       CurrentCluster = 0;
188       FuncBBIDs.clear();
189       // We won't need DIFilename anymore. Clean it up to avoid its application
190       // on the next function.
191       DIFilename = "";
192       continue;
193     }
194     case 'c': // Basic block cluster specifier.
195       // Skip the profile when we the profile iterator (FI) refers to the
196       // past-the-end element.
197       if (FI == ProgramPathAndClusterInfo.end())
198         continue;
199       // Reset current cluster position.
200       CurrentPosition = 0;
201       for (auto BasicBlockIDStr : Values) {
202         auto BasicBlockID = parseProfileBBID(BasicBlockIDStr);
203         if (!BasicBlockID)
204           return BasicBlockID.takeError();
205         if (!FuncBBIDs.insert(*BasicBlockID).second)
206           return createProfileParseError(
207               Twine("duplicate basic block id found '") + BasicBlockIDStr +
208               "'");
209 
210         if (!BasicBlockID->BBID && CurrentPosition)
211           return createProfileParseError(
212               "entry BB (0) does not begin a cluster.");
213 
214         FI->second.ClusterInfo.emplace_back(BBClusterInfo<ProfileBBID>{
215             *std::move(BasicBlockID), CurrentCluster, CurrentPosition++});
216       }
217       CurrentCluster++;
218       continue;
219     case 'p': { // Basic block cloning path specifier.
220       SmallSet<unsigned, 5> BBsInPath;
221       FI->second.ClonePaths.push_back({});
222       for (size_t I = 0; I < Values.size(); ++I) {
223         auto BBIDStr = Values[I];
224         unsigned long long BBID = 0;
225         if (getAsUnsignedInteger(BBIDStr, 10, BBID))
226           return createProfileParseError(Twine("unsigned integer expected: '") +
227                                          BBIDStr + "'");
228         if (I != 0 && !BBsInPath.insert(BBID).second)
229           return createProfileParseError(
230               Twine("duplicate cloned block in path: '") + BBIDStr + "'");
231         FI->second.ClonePaths.back().push_back(BBID);
232       }
233       continue;
234     }
235     default:
236       return createProfileParseError(Twine("invalid specifier: '") +
237                                      Twine(Specifier) + "'");
238     }
239     llvm_unreachable("should not break from this switch statement");
240   }
241   return Error::success();
242 }
243 
244 Error BasicBlockSectionsProfileReader::ReadV0Profile() {
245   auto FI = ProgramPathAndClusterInfo.end();
246   // Current cluster ID corresponding to this function.
247   unsigned CurrentCluster = 0;
248   // Current position in the current cluster.
249   unsigned CurrentPosition = 0;
250 
251   // Temporary set to ensure every basic block ID appears once in the clusters
252   // of a function.
253   SmallSet<unsigned, 4> FuncBBIDs;
254 
255   for (; !LineIt.is_at_eof(); ++LineIt) {
256     StringRef S(*LineIt);
257     if (S[0] == '@')
258       continue;
259     // Check for the leading "!"
260     if (!S.consume_front("!") || S.empty())
261       break;
262     // Check for second "!" which indicates a cluster of basic blocks.
263     if (S.consume_front("!")) {
264       // Skip the profile when we the profile iterator (FI) refers to the
265       // past-the-end element.
266       if (FI == ProgramPathAndClusterInfo.end())
267         continue;
268       SmallVector<StringRef, 4> BBIDs;
269       S.split(BBIDs, ' ');
270       // Reset current cluster position.
271       CurrentPosition = 0;
272       for (auto BBIDStr : BBIDs) {
273         unsigned long long BBID;
274         if (getAsUnsignedInteger(BBIDStr, 10, BBID))
275           return createProfileParseError(Twine("unsigned integer expected: '") +
276                                          BBIDStr + "'");
277         if (!FuncBBIDs.insert(BBID).second)
278           return createProfileParseError(
279               Twine("duplicate basic block id found '") + BBIDStr + "'");
280         if (BBID == 0 && CurrentPosition)
281           return createProfileParseError(
282               "entry BB (0) does not begin a cluster");
283 
284         FI->second.ClusterInfo.emplace_back(
285             BBClusterInfo<ProfileBBID>({{static_cast<unsigned>(BBID), 0},
286                                         CurrentCluster,
287                                         CurrentPosition++}));
288       }
289       CurrentCluster++;
290     } else {
291       // This is a function name specifier. It may include a debug info filename
292       // specifier starting with `M=`.
293       auto [AliasesStr, DIFilenameStr] = S.split(' ');
294       SmallString<128> DIFilename;
295       if (DIFilenameStr.startswith("M=")) {
296         DIFilename =
297             sys::path::remove_leading_dotslash(DIFilenameStr.substr(2));
298         if (DIFilename.empty())
299           return createProfileParseError("empty module name specifier");
300       } else if (!DIFilenameStr.empty()) {
301         return createProfileParseError("unknown string found: '" +
302                                        DIFilenameStr + "'");
303       }
304       // Function aliases are separated using '/'. We use the first function
305       // name for the cluster info mapping and delegate all other aliases to
306       // this one.
307       SmallVector<StringRef, 4> Aliases;
308       AliasesStr.split(Aliases, '/');
309       bool FunctionFound = any_of(Aliases, [&](StringRef Alias) {
310         auto It = FunctionNameToDIFilename.find(Alias);
311         // No match if this function name is not found in this module.
312         if (It == FunctionNameToDIFilename.end())
313           return false;
314         // Return a match if debug-info-filename is not specified. Otherwise,
315         // check for equality.
316         return DIFilename.empty() || It->second.equals(DIFilename);
317       });
318       if (!FunctionFound) {
319         // Skip the following profile by setting the profile iterator (FI) to
320         // the past-the-end element.
321         FI = ProgramPathAndClusterInfo.end();
322         continue;
323       }
324       for (size_t i = 1; i < Aliases.size(); ++i)
325         FuncAliasMap.try_emplace(Aliases[i], Aliases.front());
326 
327       // Prepare for parsing clusters of this function name.
328       // Start a new cluster map for this function name.
329       auto R = ProgramPathAndClusterInfo.try_emplace(Aliases.front());
330       // Report error when multiple profiles have been specified for the same
331       // function.
332       if (!R.second)
333         return createProfileParseError("duplicate profile for function '" +
334                                        Aliases.front() + "'");
335       FI = R.first;
336       CurrentCluster = 0;
337       FuncBBIDs.clear();
338     }
339   }
340   return Error::success();
341 }
342 
343 // Basic Block Sections can be enabled for a subset of machine basic blocks.
344 // This is done by passing a file containing names of functions for which basic
345 // block sections are desired. Additionally, machine basic block ids of the
346 // functions can also be specified for a finer granularity. Moreover, a cluster
347 // of basic blocks could be assigned to the same section.
348 // Optionally, a debug-info filename can be specified for each function to allow
349 // distinguishing internal-linkage functions of the same name.
350 // A file with basic block sections for all of function main and three blocks
351 // for function foo (of which 1 and 2 are placed in a cluster) looks like this:
352 // (Profile for function foo is only loaded when its debug-info filename
353 // matches 'path/to/foo_file.cc').
354 // ----------------------------
355 // list.txt:
356 // !main
357 // !foo M=path/to/foo_file.cc
358 // !!1 2
359 // !!4
360 Error BasicBlockSectionsProfileReader::ReadProfile() {
361   assert(MBuf);
362 
363   unsigned long long Version = 0;
364   StringRef FirstLine(*LineIt);
365   if (FirstLine.consume_front("v")) {
366     if (getAsUnsignedInteger(FirstLine, 10, Version)) {
367       return createProfileParseError(Twine("version number expected: '") +
368                                      FirstLine + "'");
369     }
370     if (Version > 1) {
371       return createProfileParseError(Twine("invalid profile version: ") +
372                                      Twine(Version));
373     }
374     ++LineIt;
375   }
376 
377   switch (Version) {
378   case 0:
379     // TODO: Deprecate V0 once V1 is fully integrated downstream.
380     return ReadV0Profile();
381   case 1:
382     return ReadV1Profile();
383   default:
384     llvm_unreachable("Invalid profile version.");
385   }
386 }
387 
388 bool BasicBlockSectionsProfileReader::doInitialization(Module &M) {
389   if (!MBuf)
390     return false;
391   // Get the function name to debug info filename mapping.
392   FunctionNameToDIFilename.clear();
393   for (const Function &F : M) {
394     SmallString<128> DIFilename;
395     if (F.isDeclaration())
396       continue;
397     DISubprogram *Subprogram = F.getSubprogram();
398     if (Subprogram) {
399       llvm::DICompileUnit *CU = Subprogram->getUnit();
400       if (CU)
401         DIFilename = sys::path::remove_leading_dotslash(CU->getFilename());
402     }
403     [[maybe_unused]] bool inserted =
404         FunctionNameToDIFilename.try_emplace(F.getName(), DIFilename).second;
405     assert(inserted);
406   }
407   if (auto Err = ReadProfile())
408     report_fatal_error(std::move(Err));
409   return false;
410 }
411 
412 ImmutablePass *
413 llvm::createBasicBlockSectionsProfileReaderPass(const MemoryBuffer *Buf) {
414   return new BasicBlockSectionsProfileReader(Buf);
415 }
416