xref: /llvm-project/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp (revision 6ac71a0149cf92afac71b8b61f17f93af2731bfd)
1 //===-- BasicBlockSectionsProfileReader.cpp -------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Implementation of the basic block sections profile reader pass. It parses
10 // and stores the basic block sections profile file (which is specified via the
11 // `-basic-block-sections` flag).
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/CodeGen/BasicBlockSectionsProfileReader.h"
16 #include "llvm/ADT/SmallSet.h"
17 #include "llvm/ADT/SmallString.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/ADT/StringMap.h"
20 #include "llvm/ADT/StringRef.h"
21 #include "llvm/IR/DebugInfoMetadata.h"
22 #include "llvm/Pass.h"
23 #include "llvm/Support/Error.h"
24 #include "llvm/Support/ErrorHandling.h"
25 #include "llvm/Support/LineIterator.h"
26 #include "llvm/Support/MemoryBuffer.h"
27 #include "llvm/Support/Path.h"
28 #include <llvm/ADT/STLExtras.h>
29 
30 using namespace llvm;
31 
32 char BasicBlockSectionsProfileReader::ID = 0;
33 INITIALIZE_PASS(BasicBlockSectionsProfileReader, "bbsections-profile-reader",
34                 "Reads and parses a basic block sections profile.", false,
35                 false)
36 
37 bool BasicBlockSectionsProfileReader::isFunctionHot(StringRef FuncName) const {
38   return getBBClusterInfoForFunction(FuncName).first;
39 }
40 
41 std::pair<bool, SmallVector<BBClusterInfo>>
42 BasicBlockSectionsProfileReader::getBBClusterInfoForFunction(
43     StringRef FuncName) const {
44   auto R = ProgramBBClusterInfo.find(getAliasName(FuncName));
45   return R != ProgramBBClusterInfo.end()
46              ? std::pair(true, R->second)
47              : std::pair(false, SmallVector<BBClusterInfo>{});
48 }
49 
50 // Reads the version 1 basic block sections profile. Profile for each function
51 // is encoded as follows:
52 //   m <module_name>
53 //   f <function_name_1> <function_name_2> ...
54 //   c <bb_id_1> <bb_id_2> <bb_id_3>
55 //   c <bb_id_4> <bb_id_5>
56 //   ...
57 // Module name specifier (starting with 'm') is optional and allows
58 // distinguishing profile for internal-linkage functions with the same name. If
59 // not specified, it will apply to any function with the same name. Function
60 // name specifier (starting with 'f') can specify multiple function name
61 // aliases. Basic block clusters are specified by 'c' and specify the cluster of
62 // basic blocks, and the internal order in which they must be placed in the same
63 // section.
64 Error BasicBlockSectionsProfileReader::ReadV1Profile() {
65   auto FI = ProgramBBClusterInfo.end();
66 
67   // Current cluster ID corresponding to this function.
68   unsigned CurrentCluster = 0;
69   // Current position in the current cluster.
70   unsigned CurrentPosition = 0;
71 
72   // Temporary set to ensure every basic block ID appears once in the clusters
73   // of a function.
74   SmallSet<unsigned, 4> FuncBBIDs;
75 
76   // Debug-info-based module filename for the current function. Empty string
77   // means no filename.
78   StringRef DIFilename;
79 
80   for (; !LineIt.is_at_eof(); ++LineIt) {
81     StringRef S(*LineIt);
82     char Specifier = S[0];
83     S = S.drop_front().trim();
84     SmallVector<StringRef, 4> Values;
85     S.split(Values, ' ');
86     switch (Specifier) {
87     case '@':
88       break;
89     case 'm': // Module name speicifer.
90       if (Values.size() != 1) {
91         return createProfileParseError(Twine("invalid module name value: '") +
92                                        S + "'");
93       }
94       DIFilename = sys::path::remove_leading_dotslash(Values[0]);
95       continue;
96     case 'f': { // Function names specifier.
97       bool FunctionFound = any_of(Values, [&](StringRef Alias) {
98         auto It = FunctionNameToDIFilename.find(Alias);
99         // No match if this function name is not found in this module.
100         if (It == FunctionNameToDIFilename.end())
101           return false;
102         // Return a match if debug-info-filename is not specified. Otherwise,
103         // check for equality.
104         return DIFilename.empty() || It->second.equals(DIFilename);
105       });
106       if (!FunctionFound) {
107         // Skip the following profile by setting the profile iterator (FI) to
108         // the past-the-end element.
109         FI = ProgramBBClusterInfo.end();
110         DIFilename = "";
111         continue;
112       }
113       for (size_t i = 1; i < Values.size(); ++i)
114         FuncAliasMap.try_emplace(Values[i], Values.front());
115 
116       // Prepare for parsing clusters of this function name.
117       // Start a new cluster map for this function name.
118       auto R = ProgramBBClusterInfo.try_emplace(Values.front());
119       // Report error when multiple profiles have been specified for the same
120       // function.
121       if (!R.second)
122         return createProfileParseError("duplicate profile for function '" +
123                                        Values.front() + "'");
124       FI = R.first;
125       CurrentCluster = 0;
126       FuncBBIDs.clear();
127       // We won't need DIFilename anymore. Clean it up to avoid its application
128       // on the next function.
129       DIFilename = "";
130       continue;
131     }
132     case 'c': // Basic block cluster specifier.
133       // Skip the profile when we the profile iterator (FI) refers to the
134       // past-the-end element.
135       if (FI == ProgramBBClusterInfo.end())
136         break;
137       // Reset current cluster position.
138       CurrentPosition = 0;
139       for (auto BBIDStr : Values) {
140         unsigned long long BBID;
141         if (getAsUnsignedInteger(BBIDStr, 10, BBID))
142           return createProfileParseError(Twine("unsigned integer expected: '") +
143                                          BBIDStr + "'");
144         if (!FuncBBIDs.insert(BBID).second)
145           return createProfileParseError(
146               Twine("duplicate basic block id found '") + BBIDStr + "'");
147         if (BBID == 0 && CurrentPosition)
148           return createProfileParseError(
149               "entry BB (0) does not begin a cluster");
150 
151         FI->second.emplace_back(
152             BBClusterInfo{((unsigned)BBID), CurrentCluster, CurrentPosition++});
153       }
154       CurrentCluster++;
155       continue;
156     default:
157       return createProfileParseError(Twine("invalid specifier: '") +
158                                      Twine(Specifier) + "'");
159     }
160   }
161   return Error::success();
162 }
163 
164 Error BasicBlockSectionsProfileReader::ReadV0Profile() {
165   auto FI = ProgramBBClusterInfo.end();
166 
167   // Current cluster ID corresponding to this function.
168   unsigned CurrentCluster = 0;
169   // Current position in the current cluster.
170   unsigned CurrentPosition = 0;
171 
172   // Temporary set to ensure every basic block ID appears once in the clusters
173   // of a function.
174   SmallSet<unsigned, 4> FuncBBIDs;
175 
176   for (; !LineIt.is_at_eof(); ++LineIt) {
177     StringRef S(*LineIt);
178     if (S[0] == '@')
179       continue;
180     // Check for the leading "!"
181     if (!S.consume_front("!") || S.empty())
182       break;
183     // Check for second "!" which indicates a cluster of basic blocks.
184     if (S.consume_front("!")) {
185       // Skip the profile when we the profile iterator (FI) refers to the
186       // past-the-end element.
187       if (FI == ProgramBBClusterInfo.end())
188         continue;
189       SmallVector<StringRef, 4> BBIDs;
190       S.split(BBIDs, ' ');
191       // Reset current cluster position.
192       CurrentPosition = 0;
193       for (auto BBIDStr : BBIDs) {
194         unsigned long long BBID;
195         if (getAsUnsignedInteger(BBIDStr, 10, BBID))
196           return createProfileParseError(Twine("unsigned integer expected: '") +
197                                          BBIDStr + "'");
198         if (!FuncBBIDs.insert(BBID).second)
199           return createProfileParseError(
200               Twine("duplicate basic block id found '") + BBIDStr + "'");
201         if (BBID == 0 && CurrentPosition)
202           return createProfileParseError(
203               "entry BB (0) does not begin a cluster");
204 
205         FI->second.emplace_back(
206             BBClusterInfo{((unsigned)BBID), CurrentCluster, CurrentPosition++});
207       }
208       CurrentCluster++;
209     } else {
210       // This is a function name specifier. It may include a debug info filename
211       // specifier starting with `M=`.
212       auto [AliasesStr, DIFilenameStr] = S.split(' ');
213       SmallString<128> DIFilename;
214       if (DIFilenameStr.startswith("M=")) {
215         DIFilename =
216             sys::path::remove_leading_dotslash(DIFilenameStr.substr(2));
217         if (DIFilename.empty())
218           return createProfileParseError("empty module name specifier");
219       } else if (!DIFilenameStr.empty()) {
220         return createProfileParseError("unknown string found: '" +
221                                        DIFilenameStr + "'");
222       }
223       // Function aliases are separated using '/'. We use the first function
224       // name for the cluster info mapping and delegate all other aliases to
225       // this one.
226       SmallVector<StringRef, 4> Aliases;
227       AliasesStr.split(Aliases, '/');
228       bool FunctionFound = any_of(Aliases, [&](StringRef Alias) {
229         auto It = FunctionNameToDIFilename.find(Alias);
230         // No match if this function name is not found in this module.
231         if (It == FunctionNameToDIFilename.end())
232           return false;
233         // Return a match if debug-info-filename is not specified. Otherwise,
234         // check for equality.
235         return DIFilename.empty() || It->second.equals(DIFilename);
236       });
237       if (!FunctionFound) {
238         // Skip the following profile by setting the profile iterator (FI) to
239         // the past-the-end element.
240         FI = ProgramBBClusterInfo.end();
241         continue;
242       }
243       for (size_t i = 1; i < Aliases.size(); ++i)
244         FuncAliasMap.try_emplace(Aliases[i], Aliases.front());
245 
246       // Prepare for parsing clusters of this function name.
247       // Start a new cluster map for this function name.
248       auto R = ProgramBBClusterInfo.try_emplace(Aliases.front());
249       // Report error when multiple profiles have been specified for the same
250       // function.
251       if (!R.second)
252         return createProfileParseError("duplicate profile for function '" +
253                                        Aliases.front() + "'");
254       FI = R.first;
255       CurrentCluster = 0;
256       FuncBBIDs.clear();
257     }
258   }
259   return Error::success();
260 }
261 
262 // Basic Block Sections can be enabled for a subset of machine basic blocks.
263 // This is done by passing a file containing names of functions for which basic
264 // block sections are desired.  Additionally, machine basic block ids of the
265 // functions can also be specified for a finer granularity. Moreover, a cluster
266 // of basic blocks could be assigned to the same section.
267 // Optionally, a debug-info filename can be specified for each function to allow
268 // distinguishing internal-linkage functions of the same name.
269 // A file with basic block sections for all of function main and three blocks
270 // for function foo (of which 1 and 2 are placed in a cluster) looks like this:
271 // (Profile for function foo is only loaded when its debug-info filename
272 // matches 'path/to/foo_file.cc').
273 // ----------------------------
274 // list.txt:
275 // !main
276 // !foo M=path/to/foo_file.cc
277 // !!1 2
278 // !!4
279 Error BasicBlockSectionsProfileReader::ReadProfile() {
280   assert(MBuf);
281 
282   unsigned long long Version = 0;
283   StringRef FirstLine(*LineIt);
284   if (FirstLine.consume_front("v")) {
285     if (getAsUnsignedInteger(FirstLine, 10, Version)) {
286       return createProfileParseError(Twine("version number expected: '") +
287                                      FirstLine + "'");
288     }
289     if (Version > 1) {
290       return createProfileParseError(Twine("invalid profile version: ") +
291                                      Twine(Version));
292     }
293     ++LineIt;
294   }
295 
296   switch (Version) {
297   case 0:
298     // TODO: Deprecate V0 once V1 is fully integrated downstream.
299     return ReadV0Profile();
300   case 1:
301     return ReadV1Profile();
302   default:
303     llvm_unreachable("Invalid profile version.");
304   }
305 }
306 
307 bool BasicBlockSectionsProfileReader::doInitialization(Module &M) {
308   if (!MBuf)
309     return false;
310   // Get the function name to debug info filename mapping.
311   FunctionNameToDIFilename.clear();
312   for (const Function &F : M) {
313     SmallString<128> DIFilename;
314     if (F.isDeclaration())
315       continue;
316     DISubprogram *Subprogram = F.getSubprogram();
317     if (Subprogram) {
318       llvm::DICompileUnit *CU = Subprogram->getUnit();
319       if (CU)
320         DIFilename = sys::path::remove_leading_dotslash(CU->getFilename());
321     }
322     [[maybe_unused]] bool inserted =
323         FunctionNameToDIFilename.try_emplace(F.getName(), DIFilename).second;
324     assert(inserted);
325   }
326   if (auto Err = ReadProfile())
327     report_fatal_error(std::move(Err));
328   return false;
329 }
330 
331 ImmutablePass *
332 llvm::createBasicBlockSectionsProfileReaderPass(const MemoryBuffer *Buf) {
333   return new BasicBlockSectionsProfileReader(Buf);
334 }
335