1 //===-- BasicBlockSectionsProfileReader.cpp -------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Implementation of the basic block sections profile reader pass. It parses 10 // and stores the basic block sections profile file (which is specified via the 11 // `-basic-block-sections` flag). 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/CodeGen/BasicBlockSectionsProfileReader.h" 16 #include "llvm/ADT/DenseSet.h" 17 #include "llvm/ADT/SmallSet.h" 18 #include "llvm/ADT/SmallString.h" 19 #include "llvm/ADT/SmallVector.h" 20 #include "llvm/ADT/StringMap.h" 21 #include "llvm/ADT/StringRef.h" 22 #include "llvm/IR/DebugInfoMetadata.h" 23 #include "llvm/Pass.h" 24 #include "llvm/Support/Error.h" 25 #include "llvm/Support/ErrorHandling.h" 26 #include "llvm/Support/LineIterator.h" 27 #include "llvm/Support/MemoryBuffer.h" 28 #include "llvm/Support/Path.h" 29 #include <llvm/ADT/STLExtras.h> 30 31 using namespace llvm; 32 33 char BasicBlockSectionsProfileReader::ID = 0; 34 INITIALIZE_PASS(BasicBlockSectionsProfileReader, "bbsections-profile-reader", 35 "Reads and parses a basic block sections profile.", false, 36 false) 37 38 Expected<ProfileBBID> 39 BasicBlockSectionsProfileReader::parseProfileBBID(StringRef S) const { 40 SmallVector<StringRef, 2> Parts; 41 S.split(Parts, '.'); 42 if (Parts.size() > 2) 43 return createProfileParseError(Twine("unable to parse basic block id: '") + 44 S + "'"); 45 unsigned long long BBID; 46 if (getAsUnsignedInteger(Parts[0], 10, BBID)) 47 return createProfileParseError( 48 Twine("unable to parse BB id: '" + Parts[0]) + 49 "': unsigned integer expected"); 50 unsigned long long CloneID = 0; 51 if (Parts.size() > 1 && getAsUnsignedInteger(Parts[1], 10, CloneID)) 52 return createProfileParseError(Twine("unable to parse clone id: '") + 53 Parts[1] + "': unsigned integer expected"); 54 return ProfileBBID{static_cast<unsigned>(BBID), 55 static_cast<unsigned>(CloneID)}; 56 } 57 58 bool BasicBlockSectionsProfileReader::isFunctionHot(StringRef FuncName) const { 59 return getPathAndClusterInfoForFunction(FuncName).first; 60 } 61 62 std::pair<bool, FunctionPathAndClusterInfo> 63 BasicBlockSectionsProfileReader::getPathAndClusterInfoForFunction( 64 StringRef FuncName) const { 65 auto R = ProgramPathAndClusterInfo.find(getAliasName(FuncName)); 66 return R != ProgramPathAndClusterInfo.end() 67 ? std::pair(true, R->second) 68 : std::pair(false, FunctionPathAndClusterInfo()); 69 } 70 71 // Reads the version 1 basic block sections profile. Profile for each function 72 // is encoded as follows: 73 // m <module_name> 74 // f <function_name_1> <function_name_2> ... 75 // c <bb_id_1> <bb_id_2> <bb_id_3> 76 // c <bb_id_4> <bb_id_5> 77 // ... 78 // Module name specifier (starting with 'm') is optional and allows 79 // distinguishing profile for internal-linkage functions with the same name. If 80 // not specified, it will apply to any function with the same name. Function 81 // name specifier (starting with 'f') can specify multiple function name 82 // aliases. Basic block clusters are specified by 'c' and specify the cluster of 83 // basic blocks, and the internal order in which they must be placed in the same 84 // section. 85 // This profile can also specify cloning paths which instruct the compiler to 86 // clone basic blocks along a path. The cloned blocks are then specified in the 87 // cluster information. 88 // The following profile lists two cloning paths (starting with 'p') for 89 // function bar and places the total 9 blocks within two clusters. The first two 90 // blocks of a cloning path specify the edge along which the path is cloned. For 91 // instance, path 1 (1 -> 3 -> 4) instructs that 3 and 4 must be cloned along 92 // the edge 1->3. Within the given clusters, each cloned block is identified by 93 // "<original block id>.<clone id>". For instance, 3.1 represents the first 94 // clone of block 3. Original blocks are specified just with their block ids. A 95 // block cloned multiple times appears with distinct clone ids. The CFG for bar 96 // is shown below before and after cloning with its final clusters labeled. 97 // 98 // f main 99 // f bar 100 // p 1 3 4 # cloning path 1 101 // p 4 2 # cloning path 2 102 // c 1 3.1 4.1 6 # basic block cluster 1 103 // c 0 2 3 4 2.1 5 # basic block cluster 2 104 // **************************************************************************** 105 // function bar before and after cloning with basic block clusters shown. 106 // **************************************************************************** 107 // .... .............. 108 // 0 -------+ : 0 :---->: 1 ---> 3.1 : 109 // | | : | : :........ | : 110 // v v : v : : v : 111 // +--> 2 --> 5 1 ~~~~~~> +---: 2 : : 4.1: clsuter 1 112 // | | | | : | : : | : 113 // | v | | : v ....... : v : 114 // | 3 <------+ | : 3 <--+ : : 6 : 115 // | | | : | | : :....: 116 // | v | : v | : 117 // +--- 4 ---> 6 | : 4 | : 118 // | : | | : 119 // | : v | : 120 // | :2.1---+ : cluster 2 121 // | : | ......: 122 // | : v : 123 // +-->: 5 : 124 // .... 125 // **************************************************************************** 126 Error BasicBlockSectionsProfileReader::ReadV1Profile() { 127 auto FI = ProgramPathAndClusterInfo.end(); 128 129 // Current cluster ID corresponding to this function. 130 unsigned CurrentCluster = 0; 131 // Current position in the current cluster. 132 unsigned CurrentPosition = 0; 133 134 // Temporary set to ensure every basic block ID appears once in the clusters 135 // of a function. 136 DenseSet<ProfileBBID> FuncBBIDs; 137 138 // Debug-info-based module filename for the current function. Empty string 139 // means no filename. 140 StringRef DIFilename; 141 142 for (; !LineIt.is_at_eof(); ++LineIt) { 143 StringRef S(*LineIt); 144 char Specifier = S[0]; 145 S = S.drop_front().trim(); 146 SmallVector<StringRef, 4> Values; 147 S.split(Values, ' '); 148 switch (Specifier) { 149 case '@': 150 continue; 151 case 'm': // Module name speicifer. 152 if (Values.size() != 1) { 153 return createProfileParseError(Twine("invalid module name value: '") + 154 S + "'"); 155 } 156 DIFilename = sys::path::remove_leading_dotslash(Values[0]); 157 continue; 158 case 'f': { // Function names specifier. 159 bool FunctionFound = any_of(Values, [&](StringRef Alias) { 160 auto It = FunctionNameToDIFilename.find(Alias); 161 // No match if this function name is not found in this module. 162 if (It == FunctionNameToDIFilename.end()) 163 return false; 164 // Return a match if debug-info-filename is not specified. Otherwise, 165 // check for equality. 166 return DIFilename.empty() || It->second.equals(DIFilename); 167 }); 168 if (!FunctionFound) { 169 // Skip the following profile by setting the profile iterator (FI) to 170 // the past-the-end element. 171 FI = ProgramPathAndClusterInfo.end(); 172 DIFilename = ""; 173 continue; 174 } 175 for (size_t i = 1; i < Values.size(); ++i) 176 FuncAliasMap.try_emplace(Values[i], Values.front()); 177 178 // Prepare for parsing clusters of this function name. 179 // Start a new cluster map for this function name. 180 auto R = ProgramPathAndClusterInfo.try_emplace(Values.front()); 181 // Report error when multiple profiles have been specified for the same 182 // function. 183 if (!R.second) 184 return createProfileParseError("duplicate profile for function '" + 185 Values.front() + "'"); 186 FI = R.first; 187 CurrentCluster = 0; 188 FuncBBIDs.clear(); 189 // We won't need DIFilename anymore. Clean it up to avoid its application 190 // on the next function. 191 DIFilename = ""; 192 continue; 193 } 194 case 'c': // Basic block cluster specifier. 195 // Skip the profile when we the profile iterator (FI) refers to the 196 // past-the-end element. 197 if (FI == ProgramPathAndClusterInfo.end()) 198 continue; 199 // Reset current cluster position. 200 CurrentPosition = 0; 201 for (auto BasicBlockIDStr : Values) { 202 auto BasicBlockID = parseProfileBBID(BasicBlockIDStr); 203 if (!BasicBlockID) 204 return BasicBlockID.takeError(); 205 if (!FuncBBIDs.insert(*BasicBlockID).second) 206 return createProfileParseError( 207 Twine("duplicate basic block id found '") + BasicBlockIDStr + 208 "'"); 209 210 if (!BasicBlockID->BBID && CurrentPosition) 211 return createProfileParseError( 212 "entry BB (0) does not begin a cluster."); 213 214 FI->second.ClusterInfo.emplace_back(BBClusterInfo<ProfileBBID>{ 215 *std::move(BasicBlockID), CurrentCluster, CurrentPosition++}); 216 } 217 CurrentCluster++; 218 continue; 219 case 'p': { // Basic block cloning path specifier. 220 SmallSet<unsigned, 5> BBsInPath; 221 FI->second.ClonePaths.push_back({}); 222 for (size_t I = 0; I < Values.size(); ++I) { 223 auto BBIDStr = Values[I]; 224 unsigned long long BBID = 0; 225 if (getAsUnsignedInteger(BBIDStr, 10, BBID)) 226 return createProfileParseError(Twine("unsigned integer expected: '") + 227 BBIDStr + "'"); 228 if (I != 0 && !BBsInPath.insert(BBID).second) 229 return createProfileParseError( 230 Twine("duplicate cloned block in path: '") + BBIDStr + "'"); 231 FI->second.ClonePaths.back().push_back(BBID); 232 } 233 continue; 234 } 235 default: 236 return createProfileParseError(Twine("invalid specifier: '") + 237 Twine(Specifier) + "'"); 238 } 239 llvm_unreachable("should not break from this switch statement"); 240 } 241 return Error::success(); 242 } 243 244 Error BasicBlockSectionsProfileReader::ReadV0Profile() { 245 auto FI = ProgramPathAndClusterInfo.end(); 246 // Current cluster ID corresponding to this function. 247 unsigned CurrentCluster = 0; 248 // Current position in the current cluster. 249 unsigned CurrentPosition = 0; 250 251 // Temporary set to ensure every basic block ID appears once in the clusters 252 // of a function. 253 SmallSet<unsigned, 4> FuncBBIDs; 254 255 for (; !LineIt.is_at_eof(); ++LineIt) { 256 StringRef S(*LineIt); 257 if (S[0] == '@') 258 continue; 259 // Check for the leading "!" 260 if (!S.consume_front("!") || S.empty()) 261 break; 262 // Check for second "!" which indicates a cluster of basic blocks. 263 if (S.consume_front("!")) { 264 // Skip the profile when we the profile iterator (FI) refers to the 265 // past-the-end element. 266 if (FI == ProgramPathAndClusterInfo.end()) 267 continue; 268 SmallVector<StringRef, 4> BBIDs; 269 S.split(BBIDs, ' '); 270 // Reset current cluster position. 271 CurrentPosition = 0; 272 for (auto BBIDStr : BBIDs) { 273 unsigned long long BBID; 274 if (getAsUnsignedInteger(BBIDStr, 10, BBID)) 275 return createProfileParseError(Twine("unsigned integer expected: '") + 276 BBIDStr + "'"); 277 if (!FuncBBIDs.insert(BBID).second) 278 return createProfileParseError( 279 Twine("duplicate basic block id found '") + BBIDStr + "'"); 280 if (BBID == 0 && CurrentPosition) 281 return createProfileParseError( 282 "entry BB (0) does not begin a cluster"); 283 284 FI->second.ClusterInfo.emplace_back( 285 BBClusterInfo<ProfileBBID>({{static_cast<unsigned>(BBID), 0}, 286 CurrentCluster, 287 CurrentPosition++})); 288 } 289 CurrentCluster++; 290 } else { 291 // This is a function name specifier. It may include a debug info filename 292 // specifier starting with `M=`. 293 auto [AliasesStr, DIFilenameStr] = S.split(' '); 294 SmallString<128> DIFilename; 295 if (DIFilenameStr.startswith("M=")) { 296 DIFilename = 297 sys::path::remove_leading_dotslash(DIFilenameStr.substr(2)); 298 if (DIFilename.empty()) 299 return createProfileParseError("empty module name specifier"); 300 } else if (!DIFilenameStr.empty()) { 301 return createProfileParseError("unknown string found: '" + 302 DIFilenameStr + "'"); 303 } 304 // Function aliases are separated using '/'. We use the first function 305 // name for the cluster info mapping and delegate all other aliases to 306 // this one. 307 SmallVector<StringRef, 4> Aliases; 308 AliasesStr.split(Aliases, '/'); 309 bool FunctionFound = any_of(Aliases, [&](StringRef Alias) { 310 auto It = FunctionNameToDIFilename.find(Alias); 311 // No match if this function name is not found in this module. 312 if (It == FunctionNameToDIFilename.end()) 313 return false; 314 // Return a match if debug-info-filename is not specified. Otherwise, 315 // check for equality. 316 return DIFilename.empty() || It->second.equals(DIFilename); 317 }); 318 if (!FunctionFound) { 319 // Skip the following profile by setting the profile iterator (FI) to 320 // the past-the-end element. 321 FI = ProgramPathAndClusterInfo.end(); 322 continue; 323 } 324 for (size_t i = 1; i < Aliases.size(); ++i) 325 FuncAliasMap.try_emplace(Aliases[i], Aliases.front()); 326 327 // Prepare for parsing clusters of this function name. 328 // Start a new cluster map for this function name. 329 auto R = ProgramPathAndClusterInfo.try_emplace(Aliases.front()); 330 // Report error when multiple profiles have been specified for the same 331 // function. 332 if (!R.second) 333 return createProfileParseError("duplicate profile for function '" + 334 Aliases.front() + "'"); 335 FI = R.first; 336 CurrentCluster = 0; 337 FuncBBIDs.clear(); 338 } 339 } 340 return Error::success(); 341 } 342 343 // Basic Block Sections can be enabled for a subset of machine basic blocks. 344 // This is done by passing a file containing names of functions for which basic 345 // block sections are desired. Additionally, machine basic block ids of the 346 // functions can also be specified for a finer granularity. Moreover, a cluster 347 // of basic blocks could be assigned to the same section. 348 // Optionally, a debug-info filename can be specified for each function to allow 349 // distinguishing internal-linkage functions of the same name. 350 // A file with basic block sections for all of function main and three blocks 351 // for function foo (of which 1 and 2 are placed in a cluster) looks like this: 352 // (Profile for function foo is only loaded when its debug-info filename 353 // matches 'path/to/foo_file.cc'). 354 // ---------------------------- 355 // list.txt: 356 // !main 357 // !foo M=path/to/foo_file.cc 358 // !!1 2 359 // !!4 360 Error BasicBlockSectionsProfileReader::ReadProfile() { 361 assert(MBuf); 362 363 unsigned long long Version = 0; 364 StringRef FirstLine(*LineIt); 365 if (FirstLine.consume_front("v")) { 366 if (getAsUnsignedInteger(FirstLine, 10, Version)) { 367 return createProfileParseError(Twine("version number expected: '") + 368 FirstLine + "'"); 369 } 370 if (Version > 1) { 371 return createProfileParseError(Twine("invalid profile version: ") + 372 Twine(Version)); 373 } 374 ++LineIt; 375 } 376 377 switch (Version) { 378 case 0: 379 // TODO: Deprecate V0 once V1 is fully integrated downstream. 380 return ReadV0Profile(); 381 case 1: 382 return ReadV1Profile(); 383 default: 384 llvm_unreachable("Invalid profile version."); 385 } 386 } 387 388 bool BasicBlockSectionsProfileReader::doInitialization(Module &M) { 389 if (!MBuf) 390 return false; 391 // Get the function name to debug info filename mapping. 392 FunctionNameToDIFilename.clear(); 393 for (const Function &F : M) { 394 SmallString<128> DIFilename; 395 if (F.isDeclaration()) 396 continue; 397 DISubprogram *Subprogram = F.getSubprogram(); 398 if (Subprogram) { 399 llvm::DICompileUnit *CU = Subprogram->getUnit(); 400 if (CU) 401 DIFilename = sys::path::remove_leading_dotslash(CU->getFilename()); 402 } 403 [[maybe_unused]] bool inserted = 404 FunctionNameToDIFilename.try_emplace(F.getName(), DIFilename).second; 405 assert(inserted); 406 } 407 if (auto Err = ReadProfile()) 408 report_fatal_error(std::move(Err)); 409 return false; 410 } 411 412 ImmutablePass * 413 llvm::createBasicBlockSectionsProfileReaderPass(const MemoryBuffer *Buf) { 414 return new BasicBlockSectionsProfileReader(Buf); 415 } 416