1 //===- FunctionPropertiesAnalysis.cpp - Function Properties Analysis ------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the FunctionPropertiesInfo and FunctionPropertiesAnalysis 10 // classes used to extract function properties. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/Analysis/FunctionPropertiesAnalysis.h" 15 #include "llvm/ADT/STLExtras.h" 16 #include "llvm/ADT/SetVector.h" 17 #include "llvm/Analysis/LoopInfo.h" 18 #include "llvm/IR/CFG.h" 19 #include "llvm/IR/Constants.h" 20 #include "llvm/IR/Dominators.h" 21 #include "llvm/IR/Instructions.h" 22 #include "llvm/IR/IntrinsicInst.h" 23 #include "llvm/Support/CommandLine.h" 24 #include <deque> 25 26 using namespace llvm; 27 28 namespace llvm { 29 cl::opt<bool> EnableDetailedFunctionProperties( 30 "enable-detailed-function-properties", cl::Hidden, cl::init(false), 31 cl::desc("Whether or not to compute detailed function properties.")); 32 33 cl::opt<unsigned> BigBasicBlockInstructionThreshold( 34 "big-basic-block-instruction-threshold", cl::Hidden, cl::init(500), 35 cl::desc("The minimum number of instructions a basic block should contain " 36 "before being considered big.")); 37 38 cl::opt<unsigned> MediumBasicBlockInstructionThreshold( 39 "medium-basic-block-instruction-threshold", cl::Hidden, cl::init(15), 40 cl::desc("The minimum number of instructions a basic block should contain " 41 "before being considered medium-sized.")); 42 } // namespace llvm 43 44 static cl::opt<unsigned> CallWithManyArgumentsThreshold( 45 "call-with-many-arguments-threshold", cl::Hidden, cl::init(4), 46 cl::desc("The minimum number of arguments a function call must have before " 47 "it is considered having many arguments.")); 48 49 namespace { 50 int64_t getNumBlocksFromCond(const BasicBlock &BB) { 51 int64_t Ret = 0; 52 if (const auto *BI = dyn_cast<BranchInst>(BB.getTerminator())) { 53 if (BI->isConditional()) 54 Ret += BI->getNumSuccessors(); 55 } else if (const auto *SI = dyn_cast<SwitchInst>(BB.getTerminator())) { 56 Ret += (SI->getNumCases() + (nullptr != SI->getDefaultDest())); 57 } 58 return Ret; 59 } 60 61 int64_t getUses(const Function &F) { 62 return ((!F.hasLocalLinkage()) ? 1 : 0) + F.getNumUses(); 63 } 64 } // namespace 65 66 void FunctionPropertiesInfo::reIncludeBB(const BasicBlock &BB) { 67 updateForBB(BB, +1); 68 } 69 70 void FunctionPropertiesInfo::updateForBB(const BasicBlock &BB, 71 int64_t Direction) { 72 assert(Direction == 1 || Direction == -1); 73 BasicBlockCount += Direction; 74 BlocksReachedFromConditionalInstruction += 75 (Direction * getNumBlocksFromCond(BB)); 76 for (const auto &I : BB) { 77 if (auto *CS = dyn_cast<CallBase>(&I)) { 78 const auto *Callee = CS->getCalledFunction(); 79 if (Callee && !Callee->isIntrinsic() && !Callee->isDeclaration()) 80 DirectCallsToDefinedFunctions += Direction; 81 } 82 if (I.getOpcode() == Instruction::Load) { 83 LoadInstCount += Direction; 84 } else if (I.getOpcode() == Instruction::Store) { 85 StoreInstCount += Direction; 86 } 87 } 88 TotalInstructionCount += Direction * BB.sizeWithoutDebug(); 89 90 if (EnableDetailedFunctionProperties) { 91 unsigned SuccessorCount = succ_size(&BB); 92 if (SuccessorCount == 1) 93 BasicBlocksWithSingleSuccessor += Direction; 94 else if (SuccessorCount == 2) 95 BasicBlocksWithTwoSuccessors += Direction; 96 else if (SuccessorCount > 2) 97 BasicBlocksWithMoreThanTwoSuccessors += Direction; 98 99 unsigned PredecessorCount = pred_size(&BB); 100 if (PredecessorCount == 1) 101 BasicBlocksWithSinglePredecessor += Direction; 102 else if (PredecessorCount == 2) 103 BasicBlocksWithTwoPredecessors += Direction; 104 else if (PredecessorCount > 2) 105 BasicBlocksWithMoreThanTwoPredecessors += Direction; 106 107 if (TotalInstructionCount > BigBasicBlockInstructionThreshold) 108 BigBasicBlocks += Direction; 109 else if (TotalInstructionCount > MediumBasicBlockInstructionThreshold) 110 MediumBasicBlocks += Direction; 111 else 112 SmallBasicBlocks += Direction; 113 114 // Calculate critical edges by looking through all successors of a basic 115 // block that has multiple successors and finding ones that have multiple 116 // predecessors, which represent critical edges. 117 if (SuccessorCount > 1) { 118 for (const auto *Successor : successors(&BB)) { 119 if (pred_size(Successor) > 1) 120 CriticalEdgeCount += Direction; 121 } 122 } 123 124 ControlFlowEdgeCount += Direction * SuccessorCount; 125 126 if (const auto *BI = dyn_cast<BranchInst>(BB.getTerminator())) { 127 if (!BI->isConditional()) 128 UnconditionalBranchCount += Direction; 129 } 130 131 for (const Instruction &I : BB.instructionsWithoutDebug()) { 132 if (I.isCast()) 133 CastInstructionCount += Direction; 134 135 if (I.getType()->isFloatTy()) 136 FloatingPointInstructionCount += Direction; 137 else if (I.getType()->isIntegerTy()) 138 IntegerInstructionCount += Direction; 139 140 if (isa<IntrinsicInst>(I)) 141 ++IntrinsicCount; 142 143 if (const auto *Call = dyn_cast<CallInst>(&I)) { 144 if (Call->isIndirectCall()) 145 IndirectCallCount += Direction; 146 else 147 DirectCallCount += Direction; 148 149 if (Call->getType()->isIntegerTy()) 150 CallReturnsIntegerCount += Direction; 151 else if (Call->getType()->isFloatingPointTy()) 152 CallReturnsFloatCount += Direction; 153 else if (Call->getType()->isPointerTy()) 154 CallReturnsPointerCount += Direction; 155 else if (Call->getType()->isVectorTy()) { 156 if (Call->getType()->getScalarType()->isIntegerTy()) 157 CallReturnsVectorIntCount += Direction; 158 else if (Call->getType()->getScalarType()->isFloatingPointTy()) 159 CallReturnsVectorFloatCount += Direction; 160 else if (Call->getType()->getScalarType()->isPointerTy()) 161 CallReturnsVectorPointerCount += Direction; 162 } 163 164 if (Call->arg_size() > CallWithManyArgumentsThreshold) 165 CallWithManyArgumentsCount += Direction; 166 167 for (const auto &Arg : Call->args()) { 168 if (Arg->getType()->isPointerTy()) { 169 CallWithPointerArgumentCount += Direction; 170 break; 171 } 172 } 173 } 174 175 #define COUNT_OPERAND(OPTYPE) \ 176 if (isa<OPTYPE>(Operand)) { \ 177 OPTYPE##OperandCount += Direction; \ 178 continue; \ 179 } 180 181 for (unsigned int OperandIndex = 0; OperandIndex < I.getNumOperands(); 182 ++OperandIndex) { 183 Value *Operand = I.getOperand(OperandIndex); 184 COUNT_OPERAND(GlobalValue) 185 COUNT_OPERAND(ConstantInt) 186 COUNT_OPERAND(ConstantFP) 187 COUNT_OPERAND(Constant) 188 COUNT_OPERAND(Instruction) 189 COUNT_OPERAND(BasicBlock) 190 COUNT_OPERAND(InlineAsm) 191 COUNT_OPERAND(Argument) 192 193 // We only get to this point if we haven't matched any of the other 194 // operand types. 195 UnknownOperandCount += Direction; 196 } 197 198 #undef CHECK_OPERAND 199 } 200 } 201 } 202 203 void FunctionPropertiesInfo::updateAggregateStats(const Function &F, 204 const LoopInfo &LI) { 205 206 Uses = getUses(F); 207 TopLevelLoopCount = llvm::size(LI); 208 MaxLoopDepth = 0; 209 std::deque<const Loop *> Worklist; 210 llvm::append_range(Worklist, LI); 211 while (!Worklist.empty()) { 212 const auto *L = Worklist.front(); 213 MaxLoopDepth = 214 std::max(MaxLoopDepth, static_cast<int64_t>(L->getLoopDepth())); 215 Worklist.pop_front(); 216 llvm::append_range(Worklist, L->getSubLoops()); 217 } 218 } 219 220 FunctionPropertiesInfo FunctionPropertiesInfo::getFunctionPropertiesInfo( 221 Function &F, FunctionAnalysisManager &FAM) { 222 return getFunctionPropertiesInfo(F, FAM.getResult<DominatorTreeAnalysis>(F), 223 FAM.getResult<LoopAnalysis>(F)); 224 } 225 226 FunctionPropertiesInfo FunctionPropertiesInfo::getFunctionPropertiesInfo( 227 const Function &F, const DominatorTree &DT, const LoopInfo &LI) { 228 229 FunctionPropertiesInfo FPI; 230 for (const auto &BB : F) 231 if (DT.isReachableFromEntry(&BB)) 232 FPI.reIncludeBB(BB); 233 FPI.updateAggregateStats(F, LI); 234 return FPI; 235 } 236 237 void FunctionPropertiesInfo::print(raw_ostream &OS) const { 238 #define PRINT_PROPERTY(PROP_NAME) OS << #PROP_NAME ": " << PROP_NAME << "\n"; 239 240 PRINT_PROPERTY(BasicBlockCount) 241 PRINT_PROPERTY(BlocksReachedFromConditionalInstruction) 242 PRINT_PROPERTY(Uses) 243 PRINT_PROPERTY(DirectCallsToDefinedFunctions) 244 PRINT_PROPERTY(LoadInstCount) 245 PRINT_PROPERTY(StoreInstCount) 246 PRINT_PROPERTY(MaxLoopDepth) 247 PRINT_PROPERTY(TopLevelLoopCount) 248 PRINT_PROPERTY(TotalInstructionCount) 249 250 if (EnableDetailedFunctionProperties) { 251 PRINT_PROPERTY(BasicBlocksWithSingleSuccessor) 252 PRINT_PROPERTY(BasicBlocksWithTwoSuccessors) 253 PRINT_PROPERTY(BasicBlocksWithMoreThanTwoSuccessors) 254 PRINT_PROPERTY(BasicBlocksWithSinglePredecessor) 255 PRINT_PROPERTY(BasicBlocksWithTwoPredecessors) 256 PRINT_PROPERTY(BasicBlocksWithMoreThanTwoPredecessors) 257 PRINT_PROPERTY(BigBasicBlocks) 258 PRINT_PROPERTY(MediumBasicBlocks) 259 PRINT_PROPERTY(SmallBasicBlocks) 260 PRINT_PROPERTY(CastInstructionCount) 261 PRINT_PROPERTY(FloatingPointInstructionCount) 262 PRINT_PROPERTY(IntegerInstructionCount) 263 PRINT_PROPERTY(ConstantIntOperandCount) 264 PRINT_PROPERTY(ConstantFPOperandCount) 265 PRINT_PROPERTY(ConstantOperandCount) 266 PRINT_PROPERTY(InstructionOperandCount) 267 PRINT_PROPERTY(BasicBlockOperandCount) 268 PRINT_PROPERTY(GlobalValueOperandCount) 269 PRINT_PROPERTY(InlineAsmOperandCount) 270 PRINT_PROPERTY(ArgumentOperandCount) 271 PRINT_PROPERTY(UnknownOperandCount) 272 PRINT_PROPERTY(CriticalEdgeCount) 273 PRINT_PROPERTY(ControlFlowEdgeCount) 274 PRINT_PROPERTY(UnconditionalBranchCount) 275 PRINT_PROPERTY(IntrinsicCount) 276 PRINT_PROPERTY(DirectCallCount) 277 PRINT_PROPERTY(IndirectCallCount) 278 PRINT_PROPERTY(CallReturnsIntegerCount) 279 PRINT_PROPERTY(CallReturnsFloatCount) 280 PRINT_PROPERTY(CallReturnsPointerCount) 281 PRINT_PROPERTY(CallReturnsVectorIntCount) 282 PRINT_PROPERTY(CallReturnsVectorFloatCount) 283 PRINT_PROPERTY(CallReturnsVectorPointerCount) 284 PRINT_PROPERTY(CallWithManyArgumentsCount) 285 PRINT_PROPERTY(CallWithPointerArgumentCount) 286 } 287 288 #undef PRINT_PROPERTY 289 290 OS << "\n"; 291 } 292 293 AnalysisKey FunctionPropertiesAnalysis::Key; 294 295 FunctionPropertiesInfo 296 FunctionPropertiesAnalysis::run(Function &F, FunctionAnalysisManager &FAM) { 297 return FunctionPropertiesInfo::getFunctionPropertiesInfo(F, FAM); 298 } 299 300 PreservedAnalyses 301 FunctionPropertiesPrinterPass::run(Function &F, FunctionAnalysisManager &AM) { 302 OS << "Printing analysis results of CFA for function " 303 << "'" << F.getName() << "':" 304 << "\n"; 305 AM.getResult<FunctionPropertiesAnalysis>(F).print(OS); 306 return PreservedAnalyses::all(); 307 } 308 309 FunctionPropertiesUpdater::FunctionPropertiesUpdater( 310 FunctionPropertiesInfo &FPI, CallBase &CB) 311 : FPI(FPI), CallSiteBB(*CB.getParent()), Caller(*CallSiteBB.getParent()) { 312 assert(isa<CallInst>(CB) || isa<InvokeInst>(CB)); 313 // For BBs that are likely to change, we subtract from feature totals their 314 // contribution. Some features, like max loop counts or depths, are left 315 // invalid, as they will be updated post-inlining. 316 SmallPtrSet<const BasicBlock *, 4> LikelyToChangeBBs; 317 // The CB BB will change - it'll either be split or the callee's body (single 318 // BB) will be pasted in. 319 LikelyToChangeBBs.insert(&CallSiteBB); 320 321 // The caller's entry BB may change due to new alloca instructions. 322 LikelyToChangeBBs.insert(&*Caller.begin()); 323 324 // The successors may become unreachable in the case of `invoke` inlining. 325 // We track successors separately, too, because they form a boundary, together 326 // with the CB BB ('Entry') between which the inlined callee will be pasted. 327 Successors.insert(succ_begin(&CallSiteBB), succ_end(&CallSiteBB)); 328 329 // the outcome of the inlining may be that some edges get lost (DCEd BBs 330 // because inlining brought some constant, for example). We don't know which 331 // edges will be removed, so we list all of them as potentially removable. 332 // Some BBs have (at this point) duplicate edges. Remove duplicates, otherwise 333 // the DT updater will not apply changes correctly. 334 DenseSet<const BasicBlock *> Inserted; 335 for (auto *Succ : successors(&CallSiteBB)) 336 if (Inserted.insert(Succ).second) 337 DomTreeUpdates.emplace_back(DominatorTree::UpdateKind::Delete, 338 const_cast<BasicBlock *>(&CallSiteBB), 339 const_cast<BasicBlock *>(Succ)); 340 // Reuse Inserted (which has some allocated capacity at this point) below, if 341 // we have an invoke. 342 Inserted.clear(); 343 // Inlining only handles invoke and calls. If this is an invoke, and inlining 344 // it pulls another invoke, the original landing pad may get split, so as to 345 // share its content with other potential users. So the edge up to which we 346 // need to invalidate and then re-account BB data is the successors of the 347 // current landing pad. We can leave the current lp, too - if it doesn't get 348 // split, then it will be the place traversal stops. Either way, the 349 // discounted BBs will be checked if reachable and re-added. 350 if (const auto *II = dyn_cast<InvokeInst>(&CB)) { 351 const auto *UnwindDest = II->getUnwindDest(); 352 Successors.insert(succ_begin(UnwindDest), succ_end(UnwindDest)); 353 // Same idea as above, we pretend we lose all these edges. 354 for (auto *Succ : successors(UnwindDest)) 355 if (Inserted.insert(Succ).second) 356 DomTreeUpdates.emplace_back(DominatorTree::UpdateKind::Delete, 357 const_cast<BasicBlock *>(UnwindDest), 358 const_cast<BasicBlock *>(Succ)); 359 } 360 361 // Exclude the CallSiteBB, if it happens to be its own successor (1-BB loop). 362 // We are only interested in BBs the graph moves past the callsite BB to 363 // define the frontier past which we don't want to re-process BBs. Including 364 // the callsite BB in this case would prematurely stop the traversal in 365 // finish(). 366 Successors.erase(&CallSiteBB); 367 368 for (const auto *BB : Successors) 369 LikelyToChangeBBs.insert(BB); 370 371 // Commit the change. While some of the BBs accounted for above may play dual 372 // role - e.g. caller's entry BB may be the same as the callsite BB - set 373 // insertion semantics make sure we account them once. This needs to be 374 // followed in `finish`, too. 375 for (const auto *BB : LikelyToChangeBBs) 376 FPI.updateForBB(*BB, -1); 377 } 378 379 DominatorTree &FunctionPropertiesUpdater::getUpdatedDominatorTree( 380 FunctionAnalysisManager &FAM) const { 381 auto &DT = 382 FAM.getResult<DominatorTreeAnalysis>(const_cast<Function &>(Caller)); 383 384 SmallVector<DominatorTree::UpdateType, 2> FinalDomTreeUpdates; 385 386 DenseSet<const BasicBlock *> Inserted; 387 for (auto *Succ : successors(&CallSiteBB)) 388 if (Inserted.insert(Succ).second) 389 FinalDomTreeUpdates.push_back({DominatorTree::UpdateKind::Insert, 390 const_cast<BasicBlock *>(&CallSiteBB), 391 const_cast<BasicBlock *>(Succ)}); 392 393 // Perform the deletes last, so that any new nodes connected to nodes 394 // participating in the edge deletion are known to the DT. 395 for (auto &Upd : DomTreeUpdates) 396 if (!llvm::is_contained(successors(Upd.getFrom()), Upd.getTo())) 397 FinalDomTreeUpdates.push_back(Upd); 398 399 DT.applyUpdates(FinalDomTreeUpdates); 400 #ifdef EXPENSIVE_CHECKS 401 assert(DT.verify(DominatorTree::VerificationLevel::Full)); 402 #endif 403 return DT; 404 } 405 406 void FunctionPropertiesUpdater::finish(FunctionAnalysisManager &FAM) const { 407 // Update feature values from the BBs that were copied from the callee, or 408 // might have been modified because of inlining. The latter have been 409 // subtracted in the FunctionPropertiesUpdater ctor. 410 // There could be successors that were reached before but now are only 411 // reachable from elsewhere in the CFG. 412 // One example is the following diamond CFG (lines are arrows pointing down): 413 // A 414 // / \ 415 // B C 416 // | | 417 // | D 418 // | | 419 // | E 420 // \ / 421 // F 422 // There's a call site in C that is inlined. Upon doing that, it turns out 423 // it expands to 424 // call void @llvm.trap() 425 // unreachable 426 // F isn't reachable from C anymore, but we did discount it when we set up 427 // FunctionPropertiesUpdater, so we need to re-include it here. 428 // At the same time, D and E were reachable before, but now are not anymore, 429 // so we need to leave D out (we discounted it at setup), and explicitly 430 // remove E. 431 SetVector<const BasicBlock *> Reinclude; 432 SetVector<const BasicBlock *> Unreachable; 433 auto &DT = getUpdatedDominatorTree(FAM); 434 435 if (&CallSiteBB != &*Caller.begin()) 436 Reinclude.insert(&*Caller.begin()); 437 438 // Distribute the successors to the 2 buckets. 439 for (const auto *Succ : Successors) 440 if (DT.isReachableFromEntry(Succ)) 441 Reinclude.insert(Succ); 442 else 443 Unreachable.insert(Succ); 444 445 // For reinclusion, we want to stop at the reachable successors, who are at 446 // the beginning of the worklist; but, starting from the callsite bb and 447 // ending at those successors, we also want to perform a traversal. 448 // IncludeSuccessorsMark is the index after which we include successors. 449 const auto IncludeSuccessorsMark = Reinclude.size(); 450 bool CSInsertion = Reinclude.insert(&CallSiteBB); 451 (void)CSInsertion; 452 assert(CSInsertion); 453 for (size_t I = 0; I < Reinclude.size(); ++I) { 454 const auto *BB = Reinclude[I]; 455 FPI.reIncludeBB(*BB); 456 if (I >= IncludeSuccessorsMark) 457 Reinclude.insert(succ_begin(BB), succ_end(BB)); 458 } 459 460 // For exclusion, we don't need to exclude the set of BBs that were successors 461 // before and are now unreachable, because we already did that at setup. For 462 // the rest, as long as a successor is unreachable, we want to explicitly 463 // exclude it. 464 const auto AlreadyExcludedMark = Unreachable.size(); 465 for (size_t I = 0; I < Unreachable.size(); ++I) { 466 const auto *U = Unreachable[I]; 467 if (I >= AlreadyExcludedMark) 468 FPI.updateForBB(*U, -1); 469 for (const auto *Succ : successors(U)) 470 if (!DT.isReachableFromEntry(Succ)) 471 Unreachable.insert(Succ); 472 } 473 474 const auto &LI = FAM.getResult<LoopAnalysis>(const_cast<Function &>(Caller)); 475 FPI.updateAggregateStats(Caller, LI); 476 #ifdef EXPENSIVE_CHECKS 477 assert(isUpdateValid(Caller, FPI, FAM)); 478 #endif 479 } 480 481 bool FunctionPropertiesUpdater::isUpdateValid(Function &F, 482 const FunctionPropertiesInfo &FPI, 483 FunctionAnalysisManager &FAM) { 484 if (!FAM.getResult<DominatorTreeAnalysis>(F).verify( 485 DominatorTree::VerificationLevel::Full)) 486 return false; 487 DominatorTree DT(F); 488 LoopInfo LI(DT); 489 auto Fresh = FunctionPropertiesInfo::getFunctionPropertiesInfo(F, DT, LI); 490 return FPI == Fresh; 491 } 492