xref: /freebsd-src/contrib/llvm-project/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp (revision 06c3fb2749bda94cb5201f81ffdb8fa6c3161b2e)
181ad6265SDimitry Andric //===-- BasicBlockSectionsProfileReader.cpp -------------------------------===//
281ad6265SDimitry Andric //
381ad6265SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
481ad6265SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
581ad6265SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
681ad6265SDimitry Andric //
781ad6265SDimitry Andric //===----------------------------------------------------------------------===//
881ad6265SDimitry Andric //
981ad6265SDimitry Andric // Implementation of the basic block sections profile reader pass. It parses
1081ad6265SDimitry Andric // and stores the basic block sections profile file (which is specified via the
1181ad6265SDimitry Andric // `-basic-block-sections` flag).
1281ad6265SDimitry Andric //
1381ad6265SDimitry Andric //===----------------------------------------------------------------------===//
1481ad6265SDimitry Andric 
1581ad6265SDimitry Andric #include "llvm/CodeGen/BasicBlockSectionsProfileReader.h"
1681ad6265SDimitry Andric #include "llvm/ADT/SmallSet.h"
17*06c3fb27SDimitry Andric #include "llvm/ADT/SmallString.h"
1881ad6265SDimitry Andric #include "llvm/ADT/SmallVector.h"
1981ad6265SDimitry Andric #include "llvm/ADT/StringMap.h"
2081ad6265SDimitry Andric #include "llvm/ADT/StringRef.h"
21*06c3fb27SDimitry Andric #include "llvm/CodeGen/Passes.h"
22*06c3fb27SDimitry Andric #include "llvm/IR/DebugInfoMetadata.h"
2381ad6265SDimitry Andric #include "llvm/Support/Error.h"
2481ad6265SDimitry Andric #include "llvm/Support/LineIterator.h"
2581ad6265SDimitry Andric #include "llvm/Support/MemoryBuffer.h"
26*06c3fb27SDimitry Andric #include "llvm/Support/Path.h"
27*06c3fb27SDimitry Andric #include <llvm/ADT/STLExtras.h>
2881ad6265SDimitry Andric 
2981ad6265SDimitry Andric using namespace llvm;
3081ad6265SDimitry Andric 
3181ad6265SDimitry Andric char BasicBlockSectionsProfileReader::ID = 0;
3281ad6265SDimitry Andric INITIALIZE_PASS(BasicBlockSectionsProfileReader, "bbsections-profile-reader",
3381ad6265SDimitry Andric                 "Reads and parses a basic block sections profile.", false,
3481ad6265SDimitry Andric                 false)
3581ad6265SDimitry Andric 
3681ad6265SDimitry Andric bool BasicBlockSectionsProfileReader::isFunctionHot(StringRef FuncName) const {
3781ad6265SDimitry Andric   return getBBClusterInfoForFunction(FuncName).first;
3881ad6265SDimitry Andric }
3981ad6265SDimitry Andric 
4081ad6265SDimitry Andric std::pair<bool, SmallVector<BBClusterInfo>>
4181ad6265SDimitry Andric BasicBlockSectionsProfileReader::getBBClusterInfoForFunction(
4281ad6265SDimitry Andric     StringRef FuncName) const {
4381ad6265SDimitry Andric   auto R = ProgramBBClusterInfo.find(getAliasName(FuncName));
44*06c3fb27SDimitry Andric   return R != ProgramBBClusterInfo.end()
45*06c3fb27SDimitry Andric              ? std::pair(true, R->second)
46*06c3fb27SDimitry Andric              : std::pair(false, SmallVector<BBClusterInfo>{});
4781ad6265SDimitry Andric }
4881ad6265SDimitry Andric 
4981ad6265SDimitry Andric // Basic Block Sections can be enabled for a subset of machine basic blocks.
5081ad6265SDimitry Andric // This is done by passing a file containing names of functions for which basic
5181ad6265SDimitry Andric // block sections are desired.  Additionally, machine basic block ids of the
5281ad6265SDimitry Andric // functions can also be specified for a finer granularity. Moreover, a cluster
5381ad6265SDimitry Andric // of basic blocks could be assigned to the same section.
54*06c3fb27SDimitry Andric // Optionally, a debug-info filename can be specified for each function to allow
55*06c3fb27SDimitry Andric // distinguishing internal-linkage functions of the same name.
5681ad6265SDimitry Andric // A file with basic block sections for all of function main and three blocks
5781ad6265SDimitry Andric // for function foo (of which 1 and 2 are placed in a cluster) looks like this:
58*06c3fb27SDimitry Andric // (Profile for function foo is only loaded when its debug-info filename
59*06c3fb27SDimitry Andric // matches 'path/to/foo_file.cc').
6081ad6265SDimitry Andric // ----------------------------
6181ad6265SDimitry Andric // list.txt:
6281ad6265SDimitry Andric // !main
63*06c3fb27SDimitry Andric // !foo M=path/to/foo_file.cc
6481ad6265SDimitry Andric // !!1 2
6581ad6265SDimitry Andric // !!4
66*06c3fb27SDimitry Andric Error BasicBlockSectionsProfileReader::ReadProfile() {
6781ad6265SDimitry Andric   assert(MBuf);
6881ad6265SDimitry Andric   line_iterator LineIt(*MBuf, /*SkipBlanks=*/true, /*CommentMarker=*/'#');
6981ad6265SDimitry Andric 
7081ad6265SDimitry Andric   auto invalidProfileError = [&](auto Message) {
7181ad6265SDimitry Andric     return make_error<StringError>(
7281ad6265SDimitry Andric         Twine("Invalid profile " + MBuf->getBufferIdentifier() + " at line " +
7381ad6265SDimitry Andric               Twine(LineIt.line_number()) + ": " + Message),
7481ad6265SDimitry Andric         inconvertibleErrorCode());
7581ad6265SDimitry Andric   };
7681ad6265SDimitry Andric 
7781ad6265SDimitry Andric   auto FI = ProgramBBClusterInfo.end();
7881ad6265SDimitry Andric 
7981ad6265SDimitry Andric   // Current cluster ID corresponding to this function.
8081ad6265SDimitry Andric   unsigned CurrentCluster = 0;
8181ad6265SDimitry Andric   // Current position in the current cluster.
8281ad6265SDimitry Andric   unsigned CurrentPosition = 0;
8381ad6265SDimitry Andric 
8481ad6265SDimitry Andric   // Temporary set to ensure every basic block ID appears once in the clusters
8581ad6265SDimitry Andric   // of a function.
8681ad6265SDimitry Andric   SmallSet<unsigned, 4> FuncBBIDs;
8781ad6265SDimitry Andric 
8881ad6265SDimitry Andric   for (; !LineIt.is_at_eof(); ++LineIt) {
8981ad6265SDimitry Andric     StringRef S(*LineIt);
9081ad6265SDimitry Andric     if (S[0] == '@')
9181ad6265SDimitry Andric       continue;
9281ad6265SDimitry Andric     // Check for the leading "!"
9381ad6265SDimitry Andric     if (!S.consume_front("!") || S.empty())
9481ad6265SDimitry Andric       break;
9581ad6265SDimitry Andric     // Check for second "!" which indicates a cluster of basic blocks.
9681ad6265SDimitry Andric     if (S.consume_front("!")) {
97*06c3fb27SDimitry Andric       // Skip the profile when we the profile iterator (FI) refers to the
98*06c3fb27SDimitry Andric       // past-the-end element.
9981ad6265SDimitry Andric       if (FI == ProgramBBClusterInfo.end())
100*06c3fb27SDimitry Andric         continue;
101bdd1243dSDimitry Andric       SmallVector<StringRef, 4> BBIDs;
102bdd1243dSDimitry Andric       S.split(BBIDs, ' ');
10381ad6265SDimitry Andric       // Reset current cluster position.
10481ad6265SDimitry Andric       CurrentPosition = 0;
105bdd1243dSDimitry Andric       for (auto BBIDStr : BBIDs) {
106bdd1243dSDimitry Andric         unsigned long long BBID;
107bdd1243dSDimitry Andric         if (getAsUnsignedInteger(BBIDStr, 10, BBID))
10881ad6265SDimitry Andric           return invalidProfileError(Twine("Unsigned integer expected: '") +
109bdd1243dSDimitry Andric                                      BBIDStr + "'.");
110bdd1243dSDimitry Andric         if (!FuncBBIDs.insert(BBID).second)
11181ad6265SDimitry Andric           return invalidProfileError(Twine("Duplicate basic block id found '") +
112bdd1243dSDimitry Andric                                      BBIDStr + "'.");
113bdd1243dSDimitry Andric         if (BBID == 0 && CurrentPosition)
11481ad6265SDimitry Andric           return invalidProfileError("Entry BB (0) does not begin a cluster.");
11581ad6265SDimitry Andric 
116bdd1243dSDimitry Andric         FI->second.emplace_back(
117bdd1243dSDimitry Andric             BBClusterInfo{((unsigned)BBID), CurrentCluster, CurrentPosition++});
11881ad6265SDimitry Andric       }
11981ad6265SDimitry Andric       CurrentCluster++;
120*06c3fb27SDimitry Andric     } else {
121*06c3fb27SDimitry Andric       // This is a function name specifier. It may include a debug info filename
122*06c3fb27SDimitry Andric       // specifier starting with `M=`.
123*06c3fb27SDimitry Andric       auto [AliasesStr, DIFilenameStr] = S.split(' ');
124*06c3fb27SDimitry Andric       SmallString<128> DIFilename;
125*06c3fb27SDimitry Andric       if (DIFilenameStr.startswith("M=")) {
126*06c3fb27SDimitry Andric         DIFilename =
127*06c3fb27SDimitry Andric             sys::path::remove_leading_dotslash(DIFilenameStr.substr(2));
128*06c3fb27SDimitry Andric         if (DIFilename.empty())
129*06c3fb27SDimitry Andric           return invalidProfileError("Empty module name specifier.");
130*06c3fb27SDimitry Andric       } else if (!DIFilenameStr.empty()) {
131*06c3fb27SDimitry Andric         return invalidProfileError("Unknown string found: '" + DIFilenameStr +
132*06c3fb27SDimitry Andric                                    "'.");
133*06c3fb27SDimitry Andric       }
13481ad6265SDimitry Andric       // Function aliases are separated using '/'. We use the first function
13581ad6265SDimitry Andric       // name for the cluster info mapping and delegate all other aliases to
13681ad6265SDimitry Andric       // this one.
13781ad6265SDimitry Andric       SmallVector<StringRef, 4> Aliases;
138*06c3fb27SDimitry Andric       AliasesStr.split(Aliases, '/');
139*06c3fb27SDimitry Andric       bool FunctionFound = any_of(Aliases, [&](StringRef Alias) {
140*06c3fb27SDimitry Andric         auto It = FunctionNameToDIFilename.find(Alias);
141*06c3fb27SDimitry Andric         // No match if this function name is not found in this module.
142*06c3fb27SDimitry Andric         if (It == FunctionNameToDIFilename.end())
143*06c3fb27SDimitry Andric           return false;
144*06c3fb27SDimitry Andric         // Return a match if debug-info-filename is not specified. Otherwise,
145*06c3fb27SDimitry Andric         // check for equality.
146*06c3fb27SDimitry Andric         return DIFilename.empty() || It->second.equals(DIFilename);
147*06c3fb27SDimitry Andric       });
148*06c3fb27SDimitry Andric       if (!FunctionFound) {
149*06c3fb27SDimitry Andric         // Skip the following profile by setting the profile iterator (FI) to
150*06c3fb27SDimitry Andric         // the past-the-end element.
151*06c3fb27SDimitry Andric         FI = ProgramBBClusterInfo.end();
152*06c3fb27SDimitry Andric         continue;
153*06c3fb27SDimitry Andric       }
15481ad6265SDimitry Andric       for (size_t i = 1; i < Aliases.size(); ++i)
15581ad6265SDimitry Andric         FuncAliasMap.try_emplace(Aliases[i], Aliases.front());
15681ad6265SDimitry Andric 
15781ad6265SDimitry Andric       // Prepare for parsing clusters of this function name.
15881ad6265SDimitry Andric       // Start a new cluster map for this function name.
159*06c3fb27SDimitry Andric       auto R = ProgramBBClusterInfo.try_emplace(Aliases.front());
160*06c3fb27SDimitry Andric       // Report error when multiple profiles have been specified for the same
161*06c3fb27SDimitry Andric       // function.
162*06c3fb27SDimitry Andric       if (!R.second)
163*06c3fb27SDimitry Andric         return invalidProfileError("Duplicate profile for function '" +
164*06c3fb27SDimitry Andric                                    Aliases.front() + "'.");
165*06c3fb27SDimitry Andric       FI = R.first;
16681ad6265SDimitry Andric       CurrentCluster = 0;
16781ad6265SDimitry Andric       FuncBBIDs.clear();
16881ad6265SDimitry Andric     }
16981ad6265SDimitry Andric   }
17081ad6265SDimitry Andric   return Error::success();
17181ad6265SDimitry Andric }
17281ad6265SDimitry Andric 
173*06c3fb27SDimitry Andric bool BasicBlockSectionsProfileReader::doInitialization(Module &M) {
17481ad6265SDimitry Andric   if (!MBuf)
175*06c3fb27SDimitry Andric     return false;
176*06c3fb27SDimitry Andric   // Get the function name to debug info filename mapping.
177*06c3fb27SDimitry Andric   FunctionNameToDIFilename.clear();
178*06c3fb27SDimitry Andric   for (const Function &F : M) {
179*06c3fb27SDimitry Andric     SmallString<128> DIFilename;
180*06c3fb27SDimitry Andric     if (F.isDeclaration())
181*06c3fb27SDimitry Andric       continue;
182*06c3fb27SDimitry Andric     DISubprogram *Subprogram = F.getSubprogram();
183*06c3fb27SDimitry Andric     if (Subprogram) {
184*06c3fb27SDimitry Andric       llvm::DICompileUnit *CU = Subprogram->getUnit();
185*06c3fb27SDimitry Andric       if (CU)
186*06c3fb27SDimitry Andric         DIFilename = sys::path::remove_leading_dotslash(CU->getFilename());
187*06c3fb27SDimitry Andric     }
188*06c3fb27SDimitry Andric     [[maybe_unused]] bool inserted =
189*06c3fb27SDimitry Andric         FunctionNameToDIFilename.try_emplace(F.getName(), DIFilename).second;
190*06c3fb27SDimitry Andric     assert(inserted);
191*06c3fb27SDimitry Andric   }
192*06c3fb27SDimitry Andric   if (auto Err = ReadProfile())
19381ad6265SDimitry Andric     report_fatal_error(std::move(Err));
194*06c3fb27SDimitry Andric   return false;
19581ad6265SDimitry Andric }
19681ad6265SDimitry Andric 
19781ad6265SDimitry Andric ImmutablePass *
19881ad6265SDimitry Andric llvm::createBasicBlockSectionsProfileReaderPass(const MemoryBuffer *Buf) {
19981ad6265SDimitry Andric   return new BasicBlockSectionsProfileReader(Buf);
20081ad6265SDimitry Andric }
201