1 //===-- BasicBlockSectionsProfileReader.cpp -------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Implementation of the basic block sections profile reader pass. It parses 10 // and stores the basic block sections profile file (which is specified via the 11 // `-basic-block-sections` flag). 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/CodeGen/BasicBlockSectionsProfileReader.h" 16 #include "llvm/ADT/DenseSet.h" 17 #include "llvm/ADT/SmallSet.h" 18 #include "llvm/ADT/SmallString.h" 19 #include "llvm/ADT/SmallVector.h" 20 #include "llvm/ADT/StringMap.h" 21 #include "llvm/ADT/StringRef.h" 22 #include "llvm/IR/DebugInfoMetadata.h" 23 #include "llvm/Pass.h" 24 #include "llvm/Support/Error.h" 25 #include "llvm/Support/ErrorHandling.h" 26 #include "llvm/Support/LineIterator.h" 27 #include "llvm/Support/MemoryBuffer.h" 28 #include "llvm/Support/Path.h" 29 #include <llvm/ADT/STLExtras.h> 30 31 using namespace llvm; 32 33 char BasicBlockSectionsProfileReader::ID = 0; 34 INITIALIZE_PASS(BasicBlockSectionsProfileReader, "bbsections-profile-reader", 35 "Reads and parses a basic block sections profile.", false, 36 false) 37 38 Expected<UniqueBBID> 39 BasicBlockSectionsProfileReader::parseUniqueBBID(StringRef S) const { 40 SmallVector<StringRef, 2> Parts; 41 S.split(Parts, '.'); 42 if (Parts.size() > 2) 43 return createProfileParseError(Twine("unable to parse basic block id: '") + 44 S + "'"); 45 unsigned long long BaseBBID; 46 if (getAsUnsignedInteger(Parts[0], 10, BaseBBID)) 47 return createProfileParseError( 48 Twine("unable to parse BB id: '" + Parts[0]) + 49 "': unsigned integer expected"); 50 unsigned long long CloneID = 0; 51 if (Parts.size() > 1 && getAsUnsignedInteger(Parts[1], 10, CloneID)) 52 return createProfileParseError(Twine("unable to parse clone id: '") + 53 Parts[1] + "': unsigned integer expected"); 54 return UniqueBBID{static_cast<unsigned>(BaseBBID), 55 static_cast<unsigned>(CloneID)}; 56 } 57 58 bool BasicBlockSectionsProfileReader::isFunctionHot(StringRef FuncName) const { 59 return getClusterInfoForFunction(FuncName).first; 60 } 61 62 std::pair<bool, SmallVector<BBClusterInfo>> 63 BasicBlockSectionsProfileReader::getClusterInfoForFunction( 64 StringRef FuncName) const { 65 auto R = ProgramPathAndClusterInfo.find(getAliasName(FuncName)); 66 return R != ProgramPathAndClusterInfo.end() 67 ? std::pair(true, R->second.ClusterInfo) 68 : std::pair(false, SmallVector<BBClusterInfo>()); 69 } 70 71 SmallVector<SmallVector<unsigned>> 72 BasicBlockSectionsProfileReader::getClonePathsForFunction( 73 StringRef FuncName) const { 74 return ProgramPathAndClusterInfo.lookup(getAliasName(FuncName)).ClonePaths; 75 } 76 77 // Reads the version 1 basic block sections profile. Profile for each function 78 // is encoded as follows: 79 // m <module_name> 80 // f <function_name_1> <function_name_2> ... 81 // c <bb_id_1> <bb_id_2> <bb_id_3> 82 // c <bb_id_4> <bb_id_5> 83 // ... 84 // Module name specifier (starting with 'm') is optional and allows 85 // distinguishing profile for internal-linkage functions with the same name. If 86 // not specified, it will apply to any function with the same name. Function 87 // name specifier (starting with 'f') can specify multiple function name 88 // aliases. Basic block clusters are specified by 'c' and specify the cluster of 89 // basic blocks, and the internal order in which they must be placed in the same 90 // section. 91 // This profile can also specify cloning paths which instruct the compiler to 92 // clone basic blocks along a path. The cloned blocks are then specified in the 93 // cluster information. 94 // The following profile lists two cloning paths (starting with 'p') for 95 // function bar and places the total 9 blocks within two clusters. The first two 96 // blocks of a cloning path specify the edge along which the path is cloned. For 97 // instance, path 1 (1 -> 3 -> 4) instructs that 3 and 4 must be cloned along 98 // the edge 1->3. Within the given clusters, each cloned block is identified by 99 // "<original block id>.<clone id>". For instance, 3.1 represents the first 100 // clone of block 3. Original blocks are specified just with their block ids. A 101 // block cloned multiple times appears with distinct clone ids. The CFG for bar 102 // is shown below before and after cloning with its final clusters labeled. 103 // 104 // f main 105 // f bar 106 // p 1 3 4 # cloning path 1 107 // p 4 2 # cloning path 2 108 // c 1 3.1 4.1 6 # basic block cluster 1 109 // c 0 2 3 4 2.1 5 # basic block cluster 2 110 // **************************************************************************** 111 // function bar before and after cloning with basic block clusters shown. 112 // **************************************************************************** 113 // .... .............. 114 // 0 -------+ : 0 :---->: 1 ---> 3.1 : 115 // | | : | : :........ | : 116 // v v : v : : v : 117 // +--> 2 --> 5 1 ~~~~~~> +---: 2 : : 4.1: clsuter 1 118 // | | | | : | : : | : 119 // | v | | : v ....... : v : 120 // | 3 <------+ | : 3 <--+ : : 6 : 121 // | | | : | | : :....: 122 // | v | : v | : 123 // +--- 4 ---> 6 | : 4 | : 124 // | : | | : 125 // | : v | : 126 // | :2.1---+ : cluster 2 127 // | : | ......: 128 // | : v : 129 // +-->: 5 : 130 // .... 131 // **************************************************************************** 132 Error BasicBlockSectionsProfileReader::ReadV1Profile() { 133 auto FI = ProgramPathAndClusterInfo.end(); 134 135 // Current cluster ID corresponding to this function. 136 unsigned CurrentCluster = 0; 137 // Current position in the current cluster. 138 unsigned CurrentPosition = 0; 139 140 // Temporary set to ensure every basic block ID appears once in the clusters 141 // of a function. 142 DenseSet<UniqueBBID> FuncBBIDs; 143 144 // Debug-info-based module filename for the current function. Empty string 145 // means no filename. 146 StringRef DIFilename; 147 148 for (; !LineIt.is_at_eof(); ++LineIt) { 149 StringRef S(*LineIt); 150 char Specifier = S[0]; 151 S = S.drop_front().trim(); 152 SmallVector<StringRef, 4> Values; 153 S.split(Values, ' '); 154 switch (Specifier) { 155 case '@': 156 continue; 157 case 'm': // Module name speicifer. 158 if (Values.size() != 1) { 159 return createProfileParseError(Twine("invalid module name value: '") + 160 S + "'"); 161 } 162 DIFilename = sys::path::remove_leading_dotslash(Values[0]); 163 continue; 164 case 'f': { // Function names specifier. 165 bool FunctionFound = any_of(Values, [&](StringRef Alias) { 166 auto It = FunctionNameToDIFilename.find(Alias); 167 // No match if this function name is not found in this module. 168 if (It == FunctionNameToDIFilename.end()) 169 return false; 170 // Return a match if debug-info-filename is not specified. Otherwise, 171 // check for equality. 172 return DIFilename.empty() || It->second.equals(DIFilename); 173 }); 174 if (!FunctionFound) { 175 // Skip the following profile by setting the profile iterator (FI) to 176 // the past-the-end element. 177 FI = ProgramPathAndClusterInfo.end(); 178 DIFilename = ""; 179 continue; 180 } 181 for (size_t i = 1; i < Values.size(); ++i) 182 FuncAliasMap.try_emplace(Values[i], Values.front()); 183 184 // Prepare for parsing clusters of this function name. 185 // Start a new cluster map for this function name. 186 auto R = ProgramPathAndClusterInfo.try_emplace(Values.front()); 187 // Report error when multiple profiles have been specified for the same 188 // function. 189 if (!R.second) 190 return createProfileParseError("duplicate profile for function '" + 191 Values.front() + "'"); 192 FI = R.first; 193 CurrentCluster = 0; 194 FuncBBIDs.clear(); 195 // We won't need DIFilename anymore. Clean it up to avoid its application 196 // on the next function. 197 DIFilename = ""; 198 continue; 199 } 200 case 'c': // Basic block cluster specifier. 201 // Skip the profile when we the profile iterator (FI) refers to the 202 // past-the-end element. 203 if (FI == ProgramPathAndClusterInfo.end()) 204 continue; 205 // Reset current cluster position. 206 CurrentPosition = 0; 207 for (auto BasicBlockIDStr : Values) { 208 auto BasicBlockID = parseUniqueBBID(BasicBlockIDStr); 209 if (!BasicBlockID) 210 return BasicBlockID.takeError(); 211 if (!FuncBBIDs.insert(*BasicBlockID).second) 212 return createProfileParseError( 213 Twine("duplicate basic block id found '") + BasicBlockIDStr + 214 "'"); 215 216 if (!BasicBlockID->BaseID && CurrentPosition) 217 return createProfileParseError( 218 "entry BB (0) does not begin a cluster."); 219 220 FI->second.ClusterInfo.emplace_back(BBClusterInfo{ 221 *std::move(BasicBlockID), CurrentCluster, CurrentPosition++}); 222 } 223 CurrentCluster++; 224 continue; 225 case 'p': { // Basic block cloning path specifier. 226 // Skip the profile when we the profile iterator (FI) refers to the 227 // past-the-end element. 228 if (FI == ProgramPathAndClusterInfo.end()) 229 continue; 230 SmallSet<unsigned, 5> BBsInPath; 231 FI->second.ClonePaths.push_back({}); 232 for (size_t I = 0; I < Values.size(); ++I) { 233 auto BaseBBIDStr = Values[I]; 234 unsigned long long BaseBBID = 0; 235 if (getAsUnsignedInteger(BaseBBIDStr, 10, BaseBBID)) 236 return createProfileParseError(Twine("unsigned integer expected: '") + 237 BaseBBIDStr + "'"); 238 if (I != 0 && !BBsInPath.insert(BaseBBID).second) 239 return createProfileParseError( 240 Twine("duplicate cloned block in path: '") + BaseBBIDStr + "'"); 241 FI->second.ClonePaths.back().push_back(BaseBBID); 242 } 243 continue; 244 } 245 default: 246 return createProfileParseError(Twine("invalid specifier: '") + 247 Twine(Specifier) + "'"); 248 } 249 llvm_unreachable("should not break from this switch statement"); 250 } 251 return Error::success(); 252 } 253 254 Error BasicBlockSectionsProfileReader::ReadV0Profile() { 255 auto FI = ProgramPathAndClusterInfo.end(); 256 // Current cluster ID corresponding to this function. 257 unsigned CurrentCluster = 0; 258 // Current position in the current cluster. 259 unsigned CurrentPosition = 0; 260 261 // Temporary set to ensure every basic block ID appears once in the clusters 262 // of a function. 263 SmallSet<unsigned, 4> FuncBBIDs; 264 265 for (; !LineIt.is_at_eof(); ++LineIt) { 266 StringRef S(*LineIt); 267 if (S[0] == '@') 268 continue; 269 // Check for the leading "!" 270 if (!S.consume_front("!") || S.empty()) 271 break; 272 // Check for second "!" which indicates a cluster of basic blocks. 273 if (S.consume_front("!")) { 274 // Skip the profile when we the profile iterator (FI) refers to the 275 // past-the-end element. 276 if (FI == ProgramPathAndClusterInfo.end()) 277 continue; 278 SmallVector<StringRef, 4> BBIDs; 279 S.split(BBIDs, ' '); 280 // Reset current cluster position. 281 CurrentPosition = 0; 282 for (auto BBIDStr : BBIDs) { 283 unsigned long long BBID; 284 if (getAsUnsignedInteger(BBIDStr, 10, BBID)) 285 return createProfileParseError(Twine("unsigned integer expected: '") + 286 BBIDStr + "'"); 287 if (!FuncBBIDs.insert(BBID).second) 288 return createProfileParseError( 289 Twine("duplicate basic block id found '") + BBIDStr + "'"); 290 if (BBID == 0 && CurrentPosition) 291 return createProfileParseError( 292 "entry BB (0) does not begin a cluster"); 293 294 FI->second.ClusterInfo.emplace_back( 295 BBClusterInfo({{static_cast<unsigned>(BBID), 0}, 296 CurrentCluster, 297 CurrentPosition++})); 298 } 299 CurrentCluster++; 300 } else { 301 // This is a function name specifier. It may include a debug info filename 302 // specifier starting with `M=`. 303 auto [AliasesStr, DIFilenameStr] = S.split(' '); 304 SmallString<128> DIFilename; 305 if (DIFilenameStr.starts_with("M=")) { 306 DIFilename = 307 sys::path::remove_leading_dotslash(DIFilenameStr.substr(2)); 308 if (DIFilename.empty()) 309 return createProfileParseError("empty module name specifier"); 310 } else if (!DIFilenameStr.empty()) { 311 return createProfileParseError("unknown string found: '" + 312 DIFilenameStr + "'"); 313 } 314 // Function aliases are separated using '/'. We use the first function 315 // name for the cluster info mapping and delegate all other aliases to 316 // this one. 317 SmallVector<StringRef, 4> Aliases; 318 AliasesStr.split(Aliases, '/'); 319 bool FunctionFound = any_of(Aliases, [&](StringRef Alias) { 320 auto It = FunctionNameToDIFilename.find(Alias); 321 // No match if this function name is not found in this module. 322 if (It == FunctionNameToDIFilename.end()) 323 return false; 324 // Return a match if debug-info-filename is not specified. Otherwise, 325 // check for equality. 326 return DIFilename.empty() || It->second.equals(DIFilename); 327 }); 328 if (!FunctionFound) { 329 // Skip the following profile by setting the profile iterator (FI) to 330 // the past-the-end element. 331 FI = ProgramPathAndClusterInfo.end(); 332 continue; 333 } 334 for (size_t i = 1; i < Aliases.size(); ++i) 335 FuncAliasMap.try_emplace(Aliases[i], Aliases.front()); 336 337 // Prepare for parsing clusters of this function name. 338 // Start a new cluster map for this function name. 339 auto R = ProgramPathAndClusterInfo.try_emplace(Aliases.front()); 340 // Report error when multiple profiles have been specified for the same 341 // function. 342 if (!R.second) 343 return createProfileParseError("duplicate profile for function '" + 344 Aliases.front() + "'"); 345 FI = R.first; 346 CurrentCluster = 0; 347 FuncBBIDs.clear(); 348 } 349 } 350 return Error::success(); 351 } 352 353 // Basic Block Sections can be enabled for a subset of machine basic blocks. 354 // This is done by passing a file containing names of functions for which basic 355 // block sections are desired. Additionally, machine basic block ids of the 356 // functions can also be specified for a finer granularity. Moreover, a cluster 357 // of basic blocks could be assigned to the same section. 358 // Optionally, a debug-info filename can be specified for each function to allow 359 // distinguishing internal-linkage functions of the same name. 360 // A file with basic block sections for all of function main and three blocks 361 // for function foo (of which 1 and 2 are placed in a cluster) looks like this: 362 // (Profile for function foo is only loaded when its debug-info filename 363 // matches 'path/to/foo_file.cc'). 364 // ---------------------------- 365 // list.txt: 366 // !main 367 // !foo M=path/to/foo_file.cc 368 // !!1 2 369 // !!4 370 Error BasicBlockSectionsProfileReader::ReadProfile() { 371 assert(MBuf); 372 373 unsigned long long Version = 0; 374 StringRef FirstLine(*LineIt); 375 if (FirstLine.consume_front("v")) { 376 if (getAsUnsignedInteger(FirstLine, 10, Version)) { 377 return createProfileParseError(Twine("version number expected: '") + 378 FirstLine + "'"); 379 } 380 if (Version > 1) { 381 return createProfileParseError(Twine("invalid profile version: ") + 382 Twine(Version)); 383 } 384 ++LineIt; 385 } 386 387 switch (Version) { 388 case 0: 389 // TODO: Deprecate V0 once V1 is fully integrated downstream. 390 return ReadV0Profile(); 391 case 1: 392 return ReadV1Profile(); 393 default: 394 llvm_unreachable("Invalid profile version."); 395 } 396 } 397 398 bool BasicBlockSectionsProfileReader::doInitialization(Module &M) { 399 if (!MBuf) 400 return false; 401 // Get the function name to debug info filename mapping. 402 FunctionNameToDIFilename.clear(); 403 for (const Function &F : M) { 404 SmallString<128> DIFilename; 405 if (F.isDeclaration()) 406 continue; 407 DISubprogram *Subprogram = F.getSubprogram(); 408 if (Subprogram) { 409 llvm::DICompileUnit *CU = Subprogram->getUnit(); 410 if (CU) 411 DIFilename = sys::path::remove_leading_dotslash(CU->getFilename()); 412 } 413 [[maybe_unused]] bool inserted = 414 FunctionNameToDIFilename.try_emplace(F.getName(), DIFilename).second; 415 assert(inserted); 416 } 417 if (auto Err = ReadProfile()) 418 report_fatal_error(std::move(Err)); 419 return false; 420 } 421 422 ImmutablePass * 423 llvm::createBasicBlockSectionsProfileReaderPass(const MemoryBuffer *Buf) { 424 return new BasicBlockSectionsProfileReader(Buf); 425 } 426