1 //===- FunctionAttrs.cpp - Pass which marks functions attributes ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// This file implements interprocedural passes which walk the 11 /// call-graph deducing and/or propagating function attributes. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/Transforms/IPO/FunctionAttrs.h" 16 #include "llvm/ADT/ArrayRef.h" 17 #include "llvm/ADT/DenseMap.h" 18 #include "llvm/ADT/PostOrderIterator.h" 19 #include "llvm/ADT/SCCIterator.h" 20 #include "llvm/ADT/STLExtras.h" 21 #include "llvm/ADT/SetVector.h" 22 #include "llvm/ADT/SmallPtrSet.h" 23 #include "llvm/ADT/SmallSet.h" 24 #include "llvm/ADT/SmallVector.h" 25 #include "llvm/ADT/Statistic.h" 26 #include "llvm/Analysis/AssumptionCache.h" 27 #include "llvm/Analysis/BasicAliasAnalysis.h" 28 #include "llvm/Analysis/CFG.h" 29 #include "llvm/Analysis/CGSCCPassManager.h" 30 #include "llvm/Analysis/CallGraph.h" 31 #include "llvm/Analysis/CallGraphSCCPass.h" 32 #include "llvm/Analysis/CaptureTracking.h" 33 #include "llvm/Analysis/LazyCallGraph.h" 34 #include "llvm/Analysis/MemoryLocation.h" 35 #include "llvm/Analysis/ValueTracking.h" 36 #include "llvm/IR/Argument.h" 37 #include "llvm/IR/Attributes.h" 38 #include "llvm/IR/BasicBlock.h" 39 #include "llvm/IR/Constant.h" 40 #include "llvm/IR/ConstantRangeList.h" 41 #include "llvm/IR/Constants.h" 42 #include "llvm/IR/Function.h" 43 #include "llvm/IR/InstIterator.h" 44 #include "llvm/IR/InstrTypes.h" 45 #include "llvm/IR/Instruction.h" 46 #include "llvm/IR/Instructions.h" 47 #include "llvm/IR/IntrinsicInst.h" 48 #include "llvm/IR/Metadata.h" 49 #include "llvm/IR/ModuleSummaryIndex.h" 50 #include "llvm/IR/PassManager.h" 51 #include "llvm/IR/Type.h" 52 #include "llvm/IR/Use.h" 53 #include "llvm/IR/User.h" 54 #include "llvm/IR/Value.h" 55 #include "llvm/Support/Casting.h" 56 #include "llvm/Support/CommandLine.h" 57 #include "llvm/Support/Compiler.h" 58 #include "llvm/Support/Debug.h" 59 #include "llvm/Support/ErrorHandling.h" 60 #include "llvm/Support/raw_ostream.h" 61 #include "llvm/Transforms/IPO.h" 62 #include "llvm/Transforms/Utils/Local.h" 63 #include <cassert> 64 #include <iterator> 65 #include <map> 66 #include <optional> 67 #include <vector> 68 69 using namespace llvm; 70 71 #define DEBUG_TYPE "function-attrs" 72 73 STATISTIC(NumMemoryAttr, "Number of functions with improved memory attribute"); 74 STATISTIC(NumNoCapture, "Number of arguments marked nocapture"); 75 STATISTIC(NumReturned, "Number of arguments marked returned"); 76 STATISTIC(NumReadNoneArg, "Number of arguments marked readnone"); 77 STATISTIC(NumReadOnlyArg, "Number of arguments marked readonly"); 78 STATISTIC(NumWriteOnlyArg, "Number of arguments marked writeonly"); 79 STATISTIC(NumNoAlias, "Number of function returns marked noalias"); 80 STATISTIC(NumNonNullReturn, "Number of function returns marked nonnull"); 81 STATISTIC(NumNoUndefReturn, "Number of function returns marked noundef"); 82 STATISTIC(NumNoRecurse, "Number of functions marked as norecurse"); 83 STATISTIC(NumNoUnwind, "Number of functions marked as nounwind"); 84 STATISTIC(NumNoFree, "Number of functions marked as nofree"); 85 STATISTIC(NumWillReturn, "Number of functions marked as willreturn"); 86 STATISTIC(NumNoSync, "Number of functions marked as nosync"); 87 STATISTIC(NumCold, "Number of functions marked as cold"); 88 89 STATISTIC(NumThinLinkNoRecurse, 90 "Number of functions marked as norecurse during thinlink"); 91 STATISTIC(NumThinLinkNoUnwind, 92 "Number of functions marked as nounwind during thinlink"); 93 94 static cl::opt<bool> EnableNonnullArgPropagation( 95 "enable-nonnull-arg-prop", cl::init(true), cl::Hidden, 96 cl::desc("Try to propagate nonnull argument attributes from callsites to " 97 "caller functions.")); 98 99 static cl::opt<bool> DisableNoUnwindInference( 100 "disable-nounwind-inference", cl::Hidden, 101 cl::desc("Stop inferring nounwind attribute during function-attrs pass")); 102 103 static cl::opt<bool> DisableNoFreeInference( 104 "disable-nofree-inference", cl::Hidden, 105 cl::desc("Stop inferring nofree attribute during function-attrs pass")); 106 107 static cl::opt<bool> DisableThinLTOPropagation( 108 "disable-thinlto-funcattrs", cl::init(true), cl::Hidden, 109 cl::desc("Don't propagate function-attrs in thinLTO")); 110 111 namespace { 112 113 using SCCNodeSet = SmallSetVector<Function *, 8>; 114 115 } // end anonymous namespace 116 117 static void addLocAccess(MemoryEffects &ME, const MemoryLocation &Loc, 118 ModRefInfo MR, AAResults &AAR) { 119 // Ignore accesses to known-invariant or local memory. 120 MR &= AAR.getModRefInfoMask(Loc, /*IgnoreLocal=*/true); 121 if (isNoModRef(MR)) 122 return; 123 124 const Value *UO = getUnderlyingObjectAggressive(Loc.Ptr); 125 if (isa<AllocaInst>(UO)) 126 return; 127 if (isa<Argument>(UO)) { 128 ME |= MemoryEffects::argMemOnly(MR); 129 return; 130 } 131 132 // If it's not an identified object, it might be an argument. 133 if (!isIdentifiedObject(UO)) 134 ME |= MemoryEffects::argMemOnly(MR); 135 ME |= MemoryEffects(IRMemLocation::Other, MR); 136 } 137 138 static void addArgLocs(MemoryEffects &ME, const CallBase *Call, 139 ModRefInfo ArgMR, AAResults &AAR) { 140 for (const Value *Arg : Call->args()) { 141 if (!Arg->getType()->isPtrOrPtrVectorTy()) 142 continue; 143 144 addLocAccess(ME, 145 MemoryLocation::getBeforeOrAfter(Arg, Call->getAAMetadata()), 146 ArgMR, AAR); 147 } 148 } 149 150 /// Returns the memory access attribute for function F using AAR for AA results, 151 /// where SCCNodes is the current SCC. 152 /// 153 /// If ThisBody is true, this function may examine the function body and will 154 /// return a result pertaining to this copy of the function. If it is false, the 155 /// result will be based only on AA results for the function declaration; it 156 /// will be assumed that some other (perhaps less optimized) version of the 157 /// function may be selected at link time. 158 /// 159 /// The return value is split into two parts: Memory effects that always apply, 160 /// and additional memory effects that apply if any of the functions in the SCC 161 /// can access argmem. 162 static std::pair<MemoryEffects, MemoryEffects> 163 checkFunctionMemoryAccess(Function &F, bool ThisBody, AAResults &AAR, 164 const SCCNodeSet &SCCNodes) { 165 MemoryEffects OrigME = AAR.getMemoryEffects(&F); 166 if (OrigME.doesNotAccessMemory()) 167 // Already perfect! 168 return {OrigME, MemoryEffects::none()}; 169 170 if (!ThisBody) 171 return {OrigME, MemoryEffects::none()}; 172 173 MemoryEffects ME = MemoryEffects::none(); 174 // Additional locations accessed if the SCC accesses argmem. 175 MemoryEffects RecursiveArgME = MemoryEffects::none(); 176 177 // Inalloca and preallocated arguments are always clobbered by the call. 178 if (F.getAttributes().hasAttrSomewhere(Attribute::InAlloca) || 179 F.getAttributes().hasAttrSomewhere(Attribute::Preallocated)) 180 ME |= MemoryEffects::argMemOnly(ModRefInfo::ModRef); 181 182 // Scan the function body for instructions that may read or write memory. 183 for (Instruction &I : instructions(F)) { 184 // Some instructions can be ignored even if they read or write memory. 185 // Detect these now, skipping to the next instruction if one is found. 186 if (auto *Call = dyn_cast<CallBase>(&I)) { 187 // We can optimistically ignore calls to functions in the same SCC, with 188 // two caveats: 189 // * Calls with operand bundles may have additional effects. 190 // * Argument memory accesses may imply additional effects depending on 191 // what the argument location is. 192 if (!Call->hasOperandBundles() && Call->getCalledFunction() && 193 SCCNodes.count(Call->getCalledFunction())) { 194 // Keep track of which additional locations are accessed if the SCC 195 // turns out to access argmem. 196 addArgLocs(RecursiveArgME, Call, ModRefInfo::ModRef, AAR); 197 continue; 198 } 199 200 MemoryEffects CallME = AAR.getMemoryEffects(Call); 201 202 // If the call doesn't access memory, we're done. 203 if (CallME.doesNotAccessMemory()) 204 continue; 205 206 // A pseudo probe call shouldn't change any function attribute since it 207 // doesn't translate to a real instruction. It comes with a memory access 208 // tag to prevent itself being removed by optimizations and not block 209 // other instructions being optimized. 210 if (isa<PseudoProbeInst>(I)) 211 continue; 212 213 ME |= CallME.getWithoutLoc(IRMemLocation::ArgMem); 214 215 // If the call accesses captured memory (currently part of "other") and 216 // an argument is captured (currently not tracked), then it may also 217 // access argument memory. 218 ModRefInfo OtherMR = CallME.getModRef(IRMemLocation::Other); 219 ME |= MemoryEffects::argMemOnly(OtherMR); 220 221 // Check whether all pointer arguments point to local memory, and 222 // ignore calls that only access local memory. 223 ModRefInfo ArgMR = CallME.getModRef(IRMemLocation::ArgMem); 224 if (ArgMR != ModRefInfo::NoModRef) 225 addArgLocs(ME, Call, ArgMR, AAR); 226 continue; 227 } 228 229 ModRefInfo MR = ModRefInfo::NoModRef; 230 if (I.mayWriteToMemory()) 231 MR |= ModRefInfo::Mod; 232 if (I.mayReadFromMemory()) 233 MR |= ModRefInfo::Ref; 234 if (MR == ModRefInfo::NoModRef) 235 continue; 236 237 std::optional<MemoryLocation> Loc = MemoryLocation::getOrNone(&I); 238 if (!Loc) { 239 // If no location is known, conservatively assume anything can be 240 // accessed. 241 ME |= MemoryEffects(MR); 242 continue; 243 } 244 245 // Volatile operations may access inaccessible memory. 246 if (I.isVolatile()) 247 ME |= MemoryEffects::inaccessibleMemOnly(MR); 248 249 addLocAccess(ME, *Loc, MR, AAR); 250 } 251 252 return {OrigME & ME, RecursiveArgME}; 253 } 254 255 MemoryEffects llvm::computeFunctionBodyMemoryAccess(Function &F, 256 AAResults &AAR) { 257 return checkFunctionMemoryAccess(F, /*ThisBody=*/true, AAR, {}).first; 258 } 259 260 /// Deduce readonly/readnone/writeonly attributes for the SCC. 261 template <typename AARGetterT> 262 static void addMemoryAttrs(const SCCNodeSet &SCCNodes, AARGetterT &&AARGetter, 263 SmallSet<Function *, 8> &Changed) { 264 MemoryEffects ME = MemoryEffects::none(); 265 MemoryEffects RecursiveArgME = MemoryEffects::none(); 266 for (Function *F : SCCNodes) { 267 // Call the callable parameter to look up AA results for this function. 268 AAResults &AAR = AARGetter(*F); 269 // Non-exact function definitions may not be selected at link time, and an 270 // alternative version that writes to memory may be selected. See the 271 // comment on GlobalValue::isDefinitionExact for more details. 272 auto [FnME, FnRecursiveArgME] = 273 checkFunctionMemoryAccess(*F, F->hasExactDefinition(), AAR, SCCNodes); 274 ME |= FnME; 275 RecursiveArgME |= FnRecursiveArgME; 276 // Reached bottom of the lattice, we will not be able to improve the result. 277 if (ME == MemoryEffects::unknown()) 278 return; 279 } 280 281 // If the SCC accesses argmem, add recursive accesses resulting from that. 282 ModRefInfo ArgMR = ME.getModRef(IRMemLocation::ArgMem); 283 if (ArgMR != ModRefInfo::NoModRef) 284 ME |= RecursiveArgME & MemoryEffects(ArgMR); 285 286 for (Function *F : SCCNodes) { 287 MemoryEffects OldME = F->getMemoryEffects(); 288 MemoryEffects NewME = ME & OldME; 289 if (NewME != OldME) { 290 ++NumMemoryAttr; 291 F->setMemoryEffects(NewME); 292 // Remove conflicting writable attributes. 293 if (!isModSet(NewME.getModRef(IRMemLocation::ArgMem))) 294 for (Argument &A : F->args()) 295 A.removeAttr(Attribute::Writable); 296 Changed.insert(F); 297 } 298 } 299 } 300 301 // Compute definitive function attributes for a function taking into account 302 // prevailing definitions and linkage types 303 static FunctionSummary *calculatePrevailingSummary( 304 ValueInfo VI, 305 DenseMap<ValueInfo, FunctionSummary *> &CachedPrevailingSummary, 306 function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)> 307 IsPrevailing) { 308 309 if (CachedPrevailingSummary.count(VI)) 310 return CachedPrevailingSummary[VI]; 311 312 /// At this point, prevailing symbols have been resolved. The following leads 313 /// to returning a conservative result: 314 /// - Multiple instances with local linkage. Normally local linkage would be 315 /// unique per module 316 /// as the GUID includes the module path. We could have a guid alias if 317 /// there wasn't any distinguishing path when each file was compiled, but 318 /// that should be rare so we'll punt on those. 319 320 /// These next 2 cases should not happen and will assert: 321 /// - Multiple instances with external linkage. This should be caught in 322 /// symbol resolution 323 /// - Non-existent FunctionSummary for Aliasee. This presents a hole in our 324 /// knowledge meaning we have to go conservative. 325 326 /// Otherwise, we calculate attributes for a function as: 327 /// 1. If we have a local linkage, take its attributes. If there's somehow 328 /// multiple, bail and go conservative. 329 /// 2. If we have an external/WeakODR/LinkOnceODR linkage check that it is 330 /// prevailing, take its attributes. 331 /// 3. If we have a Weak/LinkOnce linkage the copies can have semantic 332 /// differences. However, if the prevailing copy is known it will be used 333 /// so take its attributes. If the prevailing copy is in a native file 334 /// all IR copies will be dead and propagation will go conservative. 335 /// 4. AvailableExternally summaries without a prevailing copy are known to 336 /// occur in a couple of circumstances: 337 /// a. An internal function gets imported due to its caller getting 338 /// imported, it becomes AvailableExternally but no prevailing 339 /// definition exists. Because it has to get imported along with its 340 /// caller the attributes will be captured by propagating on its 341 /// caller. 342 /// b. C++11 [temp.explicit]p10 can generate AvailableExternally 343 /// definitions of explicitly instanced template declarations 344 /// for inlining which are ultimately dropped from the TU. Since this 345 /// is localized to the TU the attributes will have already made it to 346 /// the callers. 347 /// These are edge cases and already captured by their callers so we 348 /// ignore these for now. If they become relevant to optimize in the 349 /// future this can be revisited. 350 /// 5. Otherwise, go conservative. 351 352 CachedPrevailingSummary[VI] = nullptr; 353 FunctionSummary *Local = nullptr; 354 FunctionSummary *Prevailing = nullptr; 355 356 for (const auto &GVS : VI.getSummaryList()) { 357 if (!GVS->isLive()) 358 continue; 359 360 FunctionSummary *FS = dyn_cast<FunctionSummary>(GVS->getBaseObject()); 361 // Virtual and Unknown (e.g. indirect) calls require going conservative 362 if (!FS || FS->fflags().HasUnknownCall) 363 return nullptr; 364 365 const auto &Linkage = GVS->linkage(); 366 if (GlobalValue::isLocalLinkage(Linkage)) { 367 if (Local) { 368 LLVM_DEBUG( 369 dbgs() 370 << "ThinLTO FunctionAttrs: Multiple Local Linkage, bailing on " 371 "function " 372 << VI.name() << " from " << FS->modulePath() << ". Previous module " 373 << Local->modulePath() << "\n"); 374 return nullptr; 375 } 376 Local = FS; 377 } else if (GlobalValue::isExternalLinkage(Linkage)) { 378 assert(IsPrevailing(VI.getGUID(), GVS.get())); 379 Prevailing = FS; 380 break; 381 } else if (GlobalValue::isWeakODRLinkage(Linkage) || 382 GlobalValue::isLinkOnceODRLinkage(Linkage) || 383 GlobalValue::isWeakAnyLinkage(Linkage) || 384 GlobalValue::isLinkOnceAnyLinkage(Linkage)) { 385 if (IsPrevailing(VI.getGUID(), GVS.get())) { 386 Prevailing = FS; 387 break; 388 } 389 } else if (GlobalValue::isAvailableExternallyLinkage(Linkage)) { 390 // TODO: Handle these cases if they become meaningful 391 continue; 392 } 393 } 394 395 if (Local) { 396 assert(!Prevailing); 397 CachedPrevailingSummary[VI] = Local; 398 } else if (Prevailing) { 399 assert(!Local); 400 CachedPrevailingSummary[VI] = Prevailing; 401 } 402 403 return CachedPrevailingSummary[VI]; 404 } 405 406 bool llvm::thinLTOPropagateFunctionAttrs( 407 ModuleSummaryIndex &Index, 408 function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)> 409 IsPrevailing) { 410 // TODO: implement addNoAliasAttrs once 411 // there's more information about the return type in the summary 412 if (DisableThinLTOPropagation) 413 return false; 414 415 DenseMap<ValueInfo, FunctionSummary *> CachedPrevailingSummary; 416 bool Changed = false; 417 418 auto PropagateAttributes = [&](std::vector<ValueInfo> &SCCNodes) { 419 // Assume we can propagate unless we discover otherwise 420 FunctionSummary::FFlags InferredFlags; 421 InferredFlags.NoRecurse = (SCCNodes.size() == 1); 422 InferredFlags.NoUnwind = true; 423 424 for (auto &V : SCCNodes) { 425 FunctionSummary *CallerSummary = 426 calculatePrevailingSummary(V, CachedPrevailingSummary, IsPrevailing); 427 428 // Function summaries can fail to contain information such as declarations 429 if (!CallerSummary) 430 return; 431 432 if (CallerSummary->fflags().MayThrow) 433 InferredFlags.NoUnwind = false; 434 435 for (const auto &Callee : CallerSummary->calls()) { 436 FunctionSummary *CalleeSummary = calculatePrevailingSummary( 437 Callee.first, CachedPrevailingSummary, IsPrevailing); 438 439 if (!CalleeSummary) 440 return; 441 442 if (!CalleeSummary->fflags().NoRecurse) 443 InferredFlags.NoRecurse = false; 444 445 if (!CalleeSummary->fflags().NoUnwind) 446 InferredFlags.NoUnwind = false; 447 448 if (!InferredFlags.NoUnwind && !InferredFlags.NoRecurse) 449 break; 450 } 451 } 452 453 if (InferredFlags.NoUnwind || InferredFlags.NoRecurse) { 454 Changed = true; 455 for (auto &V : SCCNodes) { 456 if (InferredFlags.NoRecurse) { 457 LLVM_DEBUG(dbgs() << "ThinLTO FunctionAttrs: Propagated NoRecurse to " 458 << V.name() << "\n"); 459 ++NumThinLinkNoRecurse; 460 } 461 462 if (InferredFlags.NoUnwind) { 463 LLVM_DEBUG(dbgs() << "ThinLTO FunctionAttrs: Propagated NoUnwind to " 464 << V.name() << "\n"); 465 ++NumThinLinkNoUnwind; 466 } 467 468 for (const auto &S : V.getSummaryList()) { 469 if (auto *FS = dyn_cast<FunctionSummary>(S.get())) { 470 if (InferredFlags.NoRecurse) 471 FS->setNoRecurse(); 472 473 if (InferredFlags.NoUnwind) 474 FS->setNoUnwind(); 475 } 476 } 477 } 478 } 479 }; 480 481 // Call propagation functions on each SCC in the Index 482 for (scc_iterator<ModuleSummaryIndex *> I = scc_begin(&Index); !I.isAtEnd(); 483 ++I) { 484 std::vector<ValueInfo> Nodes(*I); 485 PropagateAttributes(Nodes); 486 } 487 return Changed; 488 } 489 490 namespace { 491 492 /// For a given pointer Argument, this retains a list of Arguments of functions 493 /// in the same SCC that the pointer data flows into. We use this to build an 494 /// SCC of the arguments. 495 struct ArgumentGraphNode { 496 Argument *Definition; 497 SmallVector<ArgumentGraphNode *, 4> Uses; 498 }; 499 500 class ArgumentGraph { 501 // We store pointers to ArgumentGraphNode objects, so it's important that 502 // that they not move around upon insert. 503 using ArgumentMapTy = std::map<Argument *, ArgumentGraphNode>; 504 505 ArgumentMapTy ArgumentMap; 506 507 // There is no root node for the argument graph, in fact: 508 // void f(int *x, int *y) { if (...) f(x, y); } 509 // is an example where the graph is disconnected. The SCCIterator requires a 510 // single entry point, so we maintain a fake ("synthetic") root node that 511 // uses every node. Because the graph is directed and nothing points into 512 // the root, it will not participate in any SCCs (except for its own). 513 ArgumentGraphNode SyntheticRoot; 514 515 public: 516 ArgumentGraph() { SyntheticRoot.Definition = nullptr; } 517 518 using iterator = SmallVectorImpl<ArgumentGraphNode *>::iterator; 519 520 iterator begin() { return SyntheticRoot.Uses.begin(); } 521 iterator end() { return SyntheticRoot.Uses.end(); } 522 ArgumentGraphNode *getEntryNode() { return &SyntheticRoot; } 523 524 ArgumentGraphNode *operator[](Argument *A) { 525 ArgumentGraphNode &Node = ArgumentMap[A]; 526 Node.Definition = A; 527 SyntheticRoot.Uses.push_back(&Node); 528 return &Node; 529 } 530 }; 531 532 /// This tracker checks whether callees are in the SCC, and if so it does not 533 /// consider that a capture, instead adding it to the "Uses" list and 534 /// continuing with the analysis. 535 struct ArgumentUsesTracker : public CaptureTracker { 536 ArgumentUsesTracker(const SCCNodeSet &SCCNodes) : SCCNodes(SCCNodes) {} 537 538 void tooManyUses() override { Captured = true; } 539 540 bool captured(const Use *U) override { 541 CallBase *CB = dyn_cast<CallBase>(U->getUser()); 542 if (!CB) { 543 Captured = true; 544 return true; 545 } 546 547 Function *F = CB->getCalledFunction(); 548 if (!F || !F->hasExactDefinition() || !SCCNodes.count(F)) { 549 Captured = true; 550 return true; 551 } 552 553 assert(!CB->isCallee(U) && "callee operand reported captured?"); 554 const unsigned UseIndex = CB->getDataOperandNo(U); 555 if (UseIndex >= CB->arg_size()) { 556 // Data operand, but not a argument operand -- must be a bundle operand 557 assert(CB->hasOperandBundles() && "Must be!"); 558 559 // CaptureTracking told us that we're being captured by an operand bundle 560 // use. In this case it does not matter if the callee is within our SCC 561 // or not -- we've been captured in some unknown way, and we have to be 562 // conservative. 563 Captured = true; 564 return true; 565 } 566 567 if (UseIndex >= F->arg_size()) { 568 assert(F->isVarArg() && "More params than args in non-varargs call"); 569 Captured = true; 570 return true; 571 } 572 573 Uses.push_back(&*std::next(F->arg_begin(), UseIndex)); 574 return false; 575 } 576 577 // True only if certainly captured (used outside our SCC). 578 bool Captured = false; 579 580 // Uses within our SCC. 581 SmallVector<Argument *, 4> Uses; 582 583 const SCCNodeSet &SCCNodes; 584 }; 585 586 /// A struct of argument use: a Use and the offset it accesses. This struct 587 /// is to track uses inside function via GEP. If GEP has a non-constant index, 588 /// the Offset field is nullopt. 589 struct ArgumentUse { 590 Use *U; 591 std::optional<int64_t> Offset; 592 }; 593 594 /// A struct of argument access info. "Unknown" accesses are the cases like 595 /// unrecognized instructions, instructions that have more than one use of 596 /// the argument, or volatile memory accesses. "WriteWithSideEffect" are call 597 /// instructions that not only write an argument but also capture it. 598 struct ArgumentAccessInfo { 599 enum class AccessType : uint8_t { Write, WriteWithSideEffect, Read, Unknown }; 600 AccessType ArgAccessType; 601 ConstantRangeList AccessRanges; 602 }; 603 604 /// A struct to wrap the argument use info per block. 605 struct UsesPerBlockInfo { 606 SmallDenseMap<Instruction *, ArgumentAccessInfo, 4> Insts; 607 bool HasWrites = false; 608 bool HasUnknownAccess = false; 609 }; 610 611 /// A struct to summarize the argument use info in a function. 612 struct ArgumentUsesSummary { 613 bool HasAnyWrite = false; 614 bool HasWriteOutsideEntryBB = false; 615 SmallDenseMap<const BasicBlock *, UsesPerBlockInfo, 16> UsesPerBlock; 616 }; 617 618 ArgumentAccessInfo getArgmentAccessInfo(const Instruction *I, 619 const ArgumentUse &ArgUse, 620 const DataLayout &DL) { 621 auto GetTypeAccessRange = 622 [&DL](Type *Ty, 623 std::optional<int64_t> Offset) -> std::optional<ConstantRange> { 624 auto TypeSize = DL.getTypeStoreSize(Ty); 625 if (!TypeSize.isScalable() && Offset) { 626 int64_t Size = TypeSize.getFixedValue(); 627 return ConstantRange(APInt(64, *Offset, true), 628 APInt(64, *Offset + Size, true)); 629 } 630 return std::nullopt; 631 }; 632 auto GetConstantIntRange = 633 [](Value *Length, 634 std::optional<int64_t> Offset) -> std::optional<ConstantRange> { 635 auto *ConstantLength = dyn_cast<ConstantInt>(Length); 636 if (ConstantLength && Offset && 637 ConstantLength->getValue().isStrictlyPositive()) { 638 return ConstantRange( 639 APInt(64, *Offset, true), 640 APInt(64, *Offset + ConstantLength->getSExtValue(), true)); 641 } 642 return std::nullopt; 643 }; 644 if (auto *SI = dyn_cast<StoreInst>(I)) { 645 if (SI->isSimple() && &SI->getOperandUse(1) == ArgUse.U) { 646 // Get the fixed type size of "SI". Since the access range of a write 647 // will be unioned, if "SI" doesn't have a fixed type size, we just set 648 // the access range to empty. 649 ConstantRangeList AccessRanges; 650 if (auto TypeAccessRange = 651 GetTypeAccessRange(SI->getAccessType(), ArgUse.Offset)) 652 AccessRanges.insert(*TypeAccessRange); 653 return {ArgumentAccessInfo::AccessType::Write, std::move(AccessRanges)}; 654 } 655 } else if (auto *LI = dyn_cast<LoadInst>(I)) { 656 if (LI->isSimple()) { 657 assert(&LI->getOperandUse(0) == ArgUse.U); 658 // Get the fixed type size of "LI". Different from Write, if "LI" 659 // doesn't have a fixed type size, we conservatively set as a clobber 660 // with an empty access range. 661 if (auto TypeAccessRange = 662 GetTypeAccessRange(LI->getAccessType(), ArgUse.Offset)) 663 return {ArgumentAccessInfo::AccessType::Read, {*TypeAccessRange}}; 664 } 665 } else if (auto *MemSet = dyn_cast<MemSetInst>(I)) { 666 if (!MemSet->isVolatile()) { 667 ConstantRangeList AccessRanges; 668 if (auto AccessRange = 669 GetConstantIntRange(MemSet->getLength(), ArgUse.Offset)) 670 AccessRanges.insert(*AccessRange); 671 return {ArgumentAccessInfo::AccessType::Write, AccessRanges}; 672 } 673 } else if (auto *MTI = dyn_cast<MemTransferInst>(I)) { 674 if (!MTI->isVolatile()) { 675 if (&MTI->getOperandUse(0) == ArgUse.U) { 676 ConstantRangeList AccessRanges; 677 if (auto AccessRange = 678 GetConstantIntRange(MTI->getLength(), ArgUse.Offset)) 679 AccessRanges.insert(*AccessRange); 680 return {ArgumentAccessInfo::AccessType::Write, AccessRanges}; 681 } else if (&MTI->getOperandUse(1) == ArgUse.U) { 682 if (auto AccessRange = 683 GetConstantIntRange(MTI->getLength(), ArgUse.Offset)) 684 return {ArgumentAccessInfo::AccessType::Read, {*AccessRange}}; 685 } 686 } 687 } else if (auto *CB = dyn_cast<CallBase>(I)) { 688 if (CB->isArgOperand(ArgUse.U) && 689 !CB->isByValArgument(CB->getArgOperandNo(ArgUse.U))) { 690 unsigned ArgNo = CB->getArgOperandNo(ArgUse.U); 691 bool IsInitialize = CB->paramHasAttr(ArgNo, Attribute::Initializes); 692 if (IsInitialize && ArgUse.Offset) { 693 // Argument is a Write when parameter is writeonly/readnone 694 // and nocapture. Otherwise, it's a WriteWithSideEffect. 695 auto Access = CB->onlyWritesMemory(ArgNo) && CB->doesNotCapture(ArgNo) 696 ? ArgumentAccessInfo::AccessType::Write 697 : ArgumentAccessInfo::AccessType::WriteWithSideEffect; 698 ConstantRangeList AccessRanges; 699 Attribute Attr = CB->getParamAttr(ArgNo, Attribute::Initializes); 700 ConstantRangeList CBCRL = Attr.getValueAsConstantRangeList(); 701 for (ConstantRange &CR : CBCRL) 702 AccessRanges.insert(ConstantRange(CR.getLower() + *ArgUse.Offset, 703 CR.getUpper() + *ArgUse.Offset)); 704 return {Access, AccessRanges}; 705 } 706 } 707 } 708 // Other unrecognized instructions are considered as unknown. 709 return {ArgumentAccessInfo::AccessType::Unknown, {}}; 710 } 711 712 // Collect the uses of argument "A" in "F". 713 ArgumentUsesSummary collectArgumentUsesPerBlock(Argument &A, Function &F) { 714 auto &DL = F.getParent()->getDataLayout(); 715 unsigned PointerSize = 716 DL.getIndexSizeInBits(A.getType()->getPointerAddressSpace()); 717 ArgumentUsesSummary Result; 718 719 BasicBlock &EntryBB = F.getEntryBlock(); 720 SmallVector<ArgumentUse, 4> Worklist; 721 for (Use &U : A.uses()) 722 Worklist.push_back({&U, 0}); 723 724 // Update "UsesPerBlock" with the block of "I" as key and "Info" as value. 725 // Return true if the block of "I" has write accesses after updating. 726 auto UpdateUseInfo = [&Result](Instruction *I, ArgumentAccessInfo Info) { 727 auto *BB = I->getParent(); 728 auto &BBInfo = Result.UsesPerBlock[BB]; 729 bool AlreadyVisitedInst = BBInfo.Insts.contains(I); 730 auto &IInfo = BBInfo.Insts[I]; 731 732 // Instructions that have more than one use of the argument are considered 733 // as clobbers. 734 if (AlreadyVisitedInst) { 735 IInfo = {ArgumentAccessInfo::AccessType::Unknown, {}}; 736 BBInfo.HasUnknownAccess = true; 737 return false; 738 } 739 740 IInfo = std::move(Info); 741 BBInfo.HasUnknownAccess |= 742 IInfo.ArgAccessType == ArgumentAccessInfo::AccessType::Unknown; 743 bool InfoHasWrites = 744 (IInfo.ArgAccessType == ArgumentAccessInfo::AccessType::Write || 745 IInfo.ArgAccessType == 746 ArgumentAccessInfo::AccessType::WriteWithSideEffect) && 747 !IInfo.AccessRanges.empty(); 748 BBInfo.HasWrites |= InfoHasWrites; 749 return InfoHasWrites; 750 }; 751 752 // No need for a visited set because we don't look through phis, so there are 753 // no cycles. 754 while (!Worklist.empty()) { 755 ArgumentUse ArgUse = Worklist.pop_back_val(); 756 User *U = ArgUse.U->getUser(); 757 // Add GEP uses to worklist. 758 // If the GEP is not a constant GEP, set the ArgumentUse::Offset to nullopt. 759 if (auto *GEP = dyn_cast<GEPOperator>(U)) { 760 std::optional<int64_t> NewOffset = std::nullopt; 761 if (ArgUse.Offset) { 762 APInt Offset(PointerSize, 0); 763 if (GEP->accumulateConstantOffset(DL, Offset)) 764 NewOffset = *ArgUse.Offset + Offset.getSExtValue(); 765 } 766 for (Use &U : GEP->uses()) 767 Worklist.push_back({&U, NewOffset}); 768 continue; 769 } 770 771 auto *I = cast<Instruction>(U); 772 bool HasWrite = UpdateUseInfo(I, getArgmentAccessInfo(I, ArgUse, DL)); 773 774 Result.HasAnyWrite |= HasWrite; 775 776 if (HasWrite && I->getParent() != &EntryBB) 777 Result.HasWriteOutsideEntryBB = true; 778 } 779 return Result; 780 } 781 782 } // end anonymous namespace 783 784 namespace llvm { 785 786 template <> struct GraphTraits<ArgumentGraphNode *> { 787 using NodeRef = ArgumentGraphNode *; 788 using ChildIteratorType = SmallVectorImpl<ArgumentGraphNode *>::iterator; 789 790 static NodeRef getEntryNode(NodeRef A) { return A; } 791 static ChildIteratorType child_begin(NodeRef N) { return N->Uses.begin(); } 792 static ChildIteratorType child_end(NodeRef N) { return N->Uses.end(); } 793 }; 794 795 template <> 796 struct GraphTraits<ArgumentGraph *> : public GraphTraits<ArgumentGraphNode *> { 797 static NodeRef getEntryNode(ArgumentGraph *AG) { return AG->getEntryNode(); } 798 799 static ChildIteratorType nodes_begin(ArgumentGraph *AG) { 800 return AG->begin(); 801 } 802 803 static ChildIteratorType nodes_end(ArgumentGraph *AG) { return AG->end(); } 804 }; 805 806 } // end namespace llvm 807 808 /// Returns Attribute::None, Attribute::ReadOnly or Attribute::ReadNone. 809 static Attribute::AttrKind 810 determinePointerAccessAttrs(Argument *A, 811 const SmallPtrSet<Argument *, 8> &SCCNodes) { 812 SmallVector<Use *, 32> Worklist; 813 SmallPtrSet<Use *, 32> Visited; 814 815 // inalloca arguments are always clobbered by the call. 816 if (A->hasInAllocaAttr() || A->hasPreallocatedAttr()) 817 return Attribute::None; 818 819 bool IsRead = false; 820 bool IsWrite = false; 821 822 for (Use &U : A->uses()) { 823 Visited.insert(&U); 824 Worklist.push_back(&U); 825 } 826 827 while (!Worklist.empty()) { 828 if (IsWrite && IsRead) 829 // No point in searching further.. 830 return Attribute::None; 831 832 Use *U = Worklist.pop_back_val(); 833 Instruction *I = cast<Instruction>(U->getUser()); 834 835 switch (I->getOpcode()) { 836 case Instruction::BitCast: 837 case Instruction::GetElementPtr: 838 case Instruction::PHI: 839 case Instruction::Select: 840 case Instruction::AddrSpaceCast: 841 // The original value is not read/written via this if the new value isn't. 842 for (Use &UU : I->uses()) 843 if (Visited.insert(&UU).second) 844 Worklist.push_back(&UU); 845 break; 846 847 case Instruction::Call: 848 case Instruction::Invoke: { 849 CallBase &CB = cast<CallBase>(*I); 850 if (CB.isCallee(U)) { 851 IsRead = true; 852 // Note that indirect calls do not capture, see comment in 853 // CaptureTracking for context 854 continue; 855 } 856 857 // Given we've explicitly handled the callee operand above, what's left 858 // must be a data operand (e.g. argument or operand bundle) 859 const unsigned UseIndex = CB.getDataOperandNo(U); 860 861 // Some intrinsics (for instance ptrmask) do not capture their results, 862 // but return results thas alias their pointer argument, and thus should 863 // be handled like GEP or addrspacecast above. 864 if (isIntrinsicReturningPointerAliasingArgumentWithoutCapturing( 865 &CB, /*MustPreserveNullness=*/false)) { 866 for (Use &UU : CB.uses()) 867 if (Visited.insert(&UU).second) 868 Worklist.push_back(&UU); 869 } else if (!CB.doesNotCapture(UseIndex)) { 870 if (!CB.onlyReadsMemory()) 871 // If the callee can save a copy into other memory, then simply 872 // scanning uses of the call is insufficient. We have no way 873 // of tracking copies of the pointer through memory to see 874 // if a reloaded copy is written to, thus we must give up. 875 return Attribute::None; 876 // Push users for processing once we finish this one 877 if (!I->getType()->isVoidTy()) 878 for (Use &UU : I->uses()) 879 if (Visited.insert(&UU).second) 880 Worklist.push_back(&UU); 881 } 882 883 ModRefInfo ArgMR = CB.getMemoryEffects().getModRef(IRMemLocation::ArgMem); 884 if (isNoModRef(ArgMR)) 885 continue; 886 887 if (Function *F = CB.getCalledFunction()) 888 if (CB.isArgOperand(U) && UseIndex < F->arg_size() && 889 SCCNodes.count(F->getArg(UseIndex))) 890 // This is an argument which is part of the speculative SCC. Note 891 // that only operands corresponding to formal arguments of the callee 892 // can participate in the speculation. 893 break; 894 895 // The accessors used on call site here do the right thing for calls and 896 // invokes with operand bundles. 897 if (CB.doesNotAccessMemory(UseIndex)) { 898 /* nop */ 899 } else if (!isModSet(ArgMR) || CB.onlyReadsMemory(UseIndex)) { 900 IsRead = true; 901 } else if (!isRefSet(ArgMR) || 902 CB.dataOperandHasImpliedAttr(UseIndex, Attribute::WriteOnly)) { 903 IsWrite = true; 904 } else { 905 return Attribute::None; 906 } 907 break; 908 } 909 910 case Instruction::Load: 911 // A volatile load has side effects beyond what readonly can be relied 912 // upon. 913 if (cast<LoadInst>(I)->isVolatile()) 914 return Attribute::None; 915 916 IsRead = true; 917 break; 918 919 case Instruction::Store: 920 if (cast<StoreInst>(I)->getValueOperand() == *U) 921 // untrackable capture 922 return Attribute::None; 923 924 // A volatile store has side effects beyond what writeonly can be relied 925 // upon. 926 if (cast<StoreInst>(I)->isVolatile()) 927 return Attribute::None; 928 929 IsWrite = true; 930 break; 931 932 case Instruction::ICmp: 933 case Instruction::Ret: 934 break; 935 936 default: 937 return Attribute::None; 938 } 939 } 940 941 if (IsWrite && IsRead) 942 return Attribute::None; 943 else if (IsRead) 944 return Attribute::ReadOnly; 945 else if (IsWrite) 946 return Attribute::WriteOnly; 947 else 948 return Attribute::ReadNone; 949 } 950 951 /// Deduce returned attributes for the SCC. 952 static void addArgumentReturnedAttrs(const SCCNodeSet &SCCNodes, 953 SmallSet<Function *, 8> &Changed) { 954 // Check each function in turn, determining if an argument is always returned. 955 for (Function *F : SCCNodes) { 956 // We can infer and propagate function attributes only when we know that the 957 // definition we'll get at link time is *exactly* the definition we see now. 958 // For more details, see GlobalValue::mayBeDerefined. 959 if (!F->hasExactDefinition()) 960 continue; 961 962 if (F->getReturnType()->isVoidTy()) 963 continue; 964 965 // There is nothing to do if an argument is already marked as 'returned'. 966 if (F->getAttributes().hasAttrSomewhere(Attribute::Returned)) 967 continue; 968 969 auto FindRetArg = [&]() -> Argument * { 970 Argument *RetArg = nullptr; 971 for (BasicBlock &BB : *F) 972 if (auto *Ret = dyn_cast<ReturnInst>(BB.getTerminator())) { 973 // Note that stripPointerCasts should look through functions with 974 // returned arguments. 975 auto *RetVal = 976 dyn_cast<Argument>(Ret->getReturnValue()->stripPointerCasts()); 977 if (!RetVal || RetVal->getType() != F->getReturnType()) 978 return nullptr; 979 980 if (!RetArg) 981 RetArg = RetVal; 982 else if (RetArg != RetVal) 983 return nullptr; 984 } 985 986 return RetArg; 987 }; 988 989 if (Argument *RetArg = FindRetArg()) { 990 RetArg->addAttr(Attribute::Returned); 991 ++NumReturned; 992 Changed.insert(F); 993 } 994 } 995 } 996 997 /// If a callsite has arguments that are also arguments to the parent function, 998 /// try to propagate attributes from the callsite's arguments to the parent's 999 /// arguments. This may be important because inlining can cause information loss 1000 /// when attribute knowledge disappears with the inlined call. 1001 static bool addArgumentAttrsFromCallsites(Function &F) { 1002 if (!EnableNonnullArgPropagation) 1003 return false; 1004 1005 bool Changed = false; 1006 1007 // For an argument attribute to transfer from a callsite to the parent, the 1008 // call must be guaranteed to execute every time the parent is called. 1009 // Conservatively, just check for calls in the entry block that are guaranteed 1010 // to execute. 1011 // TODO: This could be enhanced by testing if the callsite post-dominates the 1012 // entry block or by doing simple forward walks or backward walks to the 1013 // callsite. 1014 BasicBlock &Entry = F.getEntryBlock(); 1015 for (Instruction &I : Entry) { 1016 if (auto *CB = dyn_cast<CallBase>(&I)) { 1017 if (auto *CalledFunc = CB->getCalledFunction()) { 1018 for (auto &CSArg : CalledFunc->args()) { 1019 if (!CSArg.hasNonNullAttr(/* AllowUndefOrPoison */ false)) 1020 continue; 1021 1022 // If the non-null callsite argument operand is an argument to 'F' 1023 // (the caller) and the call is guaranteed to execute, then the value 1024 // must be non-null throughout 'F'. 1025 auto *FArg = dyn_cast<Argument>(CB->getArgOperand(CSArg.getArgNo())); 1026 if (FArg && !FArg->hasNonNullAttr()) { 1027 FArg->addAttr(Attribute::NonNull); 1028 Changed = true; 1029 } 1030 } 1031 } 1032 } 1033 if (!isGuaranteedToTransferExecutionToSuccessor(&I)) 1034 break; 1035 } 1036 1037 return Changed; 1038 } 1039 1040 static bool addAccessAttr(Argument *A, Attribute::AttrKind R) { 1041 assert((R == Attribute::ReadOnly || R == Attribute::ReadNone || 1042 R == Attribute::WriteOnly) 1043 && "Must be an access attribute."); 1044 assert(A && "Argument must not be null."); 1045 1046 // If the argument already has the attribute, nothing needs to be done. 1047 if (A->hasAttribute(R)) 1048 return false; 1049 1050 // Otherwise, remove potentially conflicting attribute, add the new one, 1051 // and update statistics. 1052 A->removeAttr(Attribute::WriteOnly); 1053 A->removeAttr(Attribute::ReadOnly); 1054 A->removeAttr(Attribute::ReadNone); 1055 // Remove conflicting writable attribute. 1056 if (R == Attribute::ReadNone || R == Attribute::ReadOnly) 1057 A->removeAttr(Attribute::Writable); 1058 A->addAttr(R); 1059 if (R == Attribute::ReadOnly) 1060 ++NumReadOnlyArg; 1061 else if (R == Attribute::WriteOnly) 1062 ++NumWriteOnlyArg; 1063 else 1064 ++NumReadNoneArg; 1065 return true; 1066 } 1067 1068 static bool inferInitializes(Argument &A, Function &F) { 1069 auto ArgumentUses = collectArgumentUsesPerBlock(A, F); 1070 // No write anywhere in the function, bail. 1071 if (!ArgumentUses.HasAnyWrite) 1072 return false; 1073 1074 auto &UsesPerBlock = ArgumentUses.UsesPerBlock; 1075 BasicBlock &EntryBB = F.getEntryBlock(); 1076 // A map to store the argument ranges initialized by a BasicBlock (including 1077 // its successors). 1078 DenseMap<const BasicBlock *, ConstantRangeList> Initialized; 1079 // Visit the successors of "BB" block and the instructions in BB (post-order) 1080 // to get the argument ranges initialized by "BB" (including its successors). 1081 // The result will be cached in "Initialized". 1082 auto VisitBlock = [&](const BasicBlock *BB) -> ConstantRangeList { 1083 auto UPB = UsesPerBlock.find(BB); 1084 ConstantRangeList CRL; 1085 1086 // Start with intersection of successors. 1087 // If this block has any clobbering use, we're going to clear out the 1088 // ranges at some point in this block anyway, so don't bother looking at 1089 // successors. 1090 if (UPB == UsesPerBlock.end() || !UPB->second.HasUnknownAccess) { 1091 bool HasAddedSuccessor = false; 1092 for (auto *Succ : successors(BB)) { 1093 if (auto SuccI = Initialized.find(Succ); SuccI != Initialized.end()) { 1094 if (HasAddedSuccessor) { 1095 CRL = CRL.intersectWith(SuccI->second); 1096 } else { 1097 CRL = SuccI->second; 1098 HasAddedSuccessor = true; 1099 } 1100 } else { 1101 CRL = ConstantRangeList(); 1102 break; 1103 } 1104 } 1105 } 1106 1107 if (UPB != UsesPerBlock.end()) { 1108 // Sort uses in this block by instruction order. 1109 SmallVector<std::pair<Instruction *, ArgumentAccessInfo>, 2> Insts; 1110 append_range(Insts, UPB->second.Insts); 1111 sort(Insts, [](std::pair<Instruction *, ArgumentAccessInfo> &LHS, 1112 std::pair<Instruction *, ArgumentAccessInfo> &RHS) { 1113 return LHS.first->comesBefore(RHS.first); 1114 }); 1115 1116 // From the end of the block to the beginning of the block, set 1117 // initializes ranges. 1118 for (auto &[_, Info] : reverse(Insts)) { 1119 if (Info.ArgAccessType == ArgumentAccessInfo::AccessType::Unknown || 1120 Info.ArgAccessType == 1121 ArgumentAccessInfo::AccessType::WriteWithSideEffect) 1122 CRL = ConstantRangeList(); 1123 if (!Info.AccessRanges.empty()) { 1124 if (Info.ArgAccessType == ArgumentAccessInfo::AccessType::Write || 1125 Info.ArgAccessType == 1126 ArgumentAccessInfo::AccessType::WriteWithSideEffect) { 1127 CRL = CRL.unionWith(Info.AccessRanges); 1128 } else { 1129 assert(Info.ArgAccessType == ArgumentAccessInfo::AccessType::Read); 1130 for (const auto &ReadRange : Info.AccessRanges) 1131 CRL.subtract(ReadRange); 1132 } 1133 } 1134 } 1135 } 1136 return CRL; 1137 }; 1138 1139 ConstantRangeList EntryCRL; 1140 // If all write instructions are in the EntryBB, or if the EntryBB has 1141 // a clobbering use, we only need to look at EntryBB. 1142 bool OnlyScanEntryBlock = !ArgumentUses.HasWriteOutsideEntryBB; 1143 if (!OnlyScanEntryBlock) 1144 if (auto EntryUPB = UsesPerBlock.find(&EntryBB); 1145 EntryUPB != UsesPerBlock.end()) 1146 OnlyScanEntryBlock = EntryUPB->second.HasUnknownAccess; 1147 if (OnlyScanEntryBlock) { 1148 EntryCRL = VisitBlock(&EntryBB); 1149 if (EntryCRL.empty()) 1150 return false; 1151 } else { 1152 // Now we have to go through CFG to get the initialized argument ranges 1153 // across blocks. With dominance and post-dominance, the initialized ranges 1154 // by a block include both accesses inside this block and accesses in its 1155 // (transitive) successors. So visit successors before predecessors with a 1156 // post-order walk of the blocks and memorize the results in "Initialized". 1157 for (const BasicBlock *BB : post_order(&F)) { 1158 ConstantRangeList CRL = VisitBlock(BB); 1159 if (!CRL.empty()) 1160 Initialized[BB] = CRL; 1161 } 1162 1163 auto EntryCRLI = Initialized.find(&EntryBB); 1164 if (EntryCRLI == Initialized.end()) 1165 return false; 1166 1167 EntryCRL = EntryCRLI->second; 1168 } 1169 1170 assert(!EntryCRL.empty() && 1171 "should have bailed already if EntryCRL is empty"); 1172 1173 if (A.hasAttribute(Attribute::Initializes)) { 1174 ConstantRangeList PreviousCRL = 1175 A.getAttribute(Attribute::Initializes).getValueAsConstantRangeList(); 1176 if (PreviousCRL == EntryCRL) 1177 return false; 1178 EntryCRL = EntryCRL.unionWith(PreviousCRL); 1179 } 1180 1181 A.addAttr(Attribute::get(A.getContext(), Attribute::Initializes, 1182 EntryCRL.rangesRef())); 1183 1184 return true; 1185 } 1186 1187 /// Deduce nocapture attributes for the SCC. 1188 static void addArgumentAttrs(const SCCNodeSet &SCCNodes, 1189 SmallSet<Function *, 8> &Changed, 1190 bool SkipInitializes) { 1191 ArgumentGraph AG; 1192 1193 // Check each function in turn, determining which pointer arguments are not 1194 // captured. 1195 for (Function *F : SCCNodes) { 1196 // We can infer and propagate function attributes only when we know that the 1197 // definition we'll get at link time is *exactly* the definition we see now. 1198 // For more details, see GlobalValue::mayBeDerefined. 1199 if (!F->hasExactDefinition()) 1200 continue; 1201 1202 if (addArgumentAttrsFromCallsites(*F)) 1203 Changed.insert(F); 1204 1205 // Functions that are readonly (or readnone) and nounwind and don't return 1206 // a value can't capture arguments. Don't analyze them. 1207 if (F->onlyReadsMemory() && F->doesNotThrow() && 1208 F->getReturnType()->isVoidTy()) { 1209 for (Argument &A : F->args()) { 1210 if (A.getType()->isPointerTy() && !A.hasNoCaptureAttr()) { 1211 A.addAttr(Attribute::getWithCaptureInfo(A.getContext(), 1212 CaptureInfo::none())); 1213 ++NumNoCapture; 1214 Changed.insert(F); 1215 } 1216 } 1217 continue; 1218 } 1219 1220 for (Argument &A : F->args()) { 1221 if (!A.getType()->isPointerTy()) 1222 continue; 1223 bool HasNonLocalUses = false; 1224 if (!A.hasNoCaptureAttr()) { 1225 ArgumentUsesTracker Tracker(SCCNodes); 1226 PointerMayBeCaptured(&A, &Tracker); 1227 if (!Tracker.Captured) { 1228 if (Tracker.Uses.empty()) { 1229 // If it's trivially not captured, mark it nocapture now. 1230 A.addAttr(Attribute::getWithCaptureInfo(A.getContext(), 1231 CaptureInfo::none())); 1232 ++NumNoCapture; 1233 Changed.insert(F); 1234 } else { 1235 // If it's not trivially captured and not trivially not captured, 1236 // then it must be calling into another function in our SCC. Save 1237 // its particulars for Argument-SCC analysis later. 1238 ArgumentGraphNode *Node = AG[&A]; 1239 for (Argument *Use : Tracker.Uses) { 1240 Node->Uses.push_back(AG[Use]); 1241 if (Use != &A) 1242 HasNonLocalUses = true; 1243 } 1244 } 1245 } 1246 // Otherwise, it's captured. Don't bother doing SCC analysis on it. 1247 } 1248 if (!HasNonLocalUses && !A.onlyReadsMemory()) { 1249 // Can we determine that it's readonly/readnone/writeonly without doing 1250 // an SCC? Note that we don't allow any calls at all here, or else our 1251 // result will be dependent on the iteration order through the 1252 // functions in the SCC. 1253 SmallPtrSet<Argument *, 8> Self; 1254 Self.insert(&A); 1255 Attribute::AttrKind R = determinePointerAccessAttrs(&A, Self); 1256 if (R != Attribute::None) 1257 if (addAccessAttr(&A, R)) 1258 Changed.insert(F); 1259 } 1260 if (!SkipInitializes && !A.onlyReadsMemory()) { 1261 if (inferInitializes(A, *F)) 1262 Changed.insert(F); 1263 } 1264 } 1265 } 1266 1267 // The graph we've collected is partial because we stopped scanning for 1268 // argument uses once we solved the argument trivially. These partial nodes 1269 // show up as ArgumentGraphNode objects with an empty Uses list, and for 1270 // these nodes the final decision about whether they capture has already been 1271 // made. If the definition doesn't have a 'nocapture' attribute by now, it 1272 // captures. 1273 1274 for (scc_iterator<ArgumentGraph *> I = scc_begin(&AG); !I.isAtEnd(); ++I) { 1275 const std::vector<ArgumentGraphNode *> &ArgumentSCC = *I; 1276 if (ArgumentSCC.size() == 1) { 1277 if (!ArgumentSCC[0]->Definition) 1278 continue; // synthetic root node 1279 1280 // eg. "void f(int* x) { if (...) f(x); }" 1281 if (ArgumentSCC[0]->Uses.size() == 1 && 1282 ArgumentSCC[0]->Uses[0] == ArgumentSCC[0]) { 1283 Argument *A = ArgumentSCC[0]->Definition; 1284 A->addAttr(Attribute::getWithCaptureInfo(A->getContext(), 1285 CaptureInfo::none())); 1286 ++NumNoCapture; 1287 Changed.insert(A->getParent()); 1288 1289 // Infer the access attributes given the new nocapture one 1290 SmallPtrSet<Argument *, 8> Self; 1291 Self.insert(&*A); 1292 Attribute::AttrKind R = determinePointerAccessAttrs(&*A, Self); 1293 if (R != Attribute::None) 1294 addAccessAttr(A, R); 1295 } 1296 continue; 1297 } 1298 1299 bool SCCCaptured = false; 1300 for (ArgumentGraphNode *Node : ArgumentSCC) { 1301 if (Node->Uses.empty() && !Node->Definition->hasNoCaptureAttr()) { 1302 SCCCaptured = true; 1303 break; 1304 } 1305 } 1306 if (SCCCaptured) 1307 continue; 1308 1309 SmallPtrSet<Argument *, 8> ArgumentSCCNodes; 1310 // Fill ArgumentSCCNodes with the elements of the ArgumentSCC. Used for 1311 // quickly looking up whether a given Argument is in this ArgumentSCC. 1312 for (ArgumentGraphNode *I : ArgumentSCC) { 1313 ArgumentSCCNodes.insert(I->Definition); 1314 } 1315 1316 for (ArgumentGraphNode *N : ArgumentSCC) { 1317 for (ArgumentGraphNode *Use : N->Uses) { 1318 Argument *A = Use->Definition; 1319 if (A->hasNoCaptureAttr() || ArgumentSCCNodes.count(A)) 1320 continue; 1321 SCCCaptured = true; 1322 break; 1323 } 1324 if (SCCCaptured) 1325 break; 1326 } 1327 if (SCCCaptured) 1328 continue; 1329 1330 for (ArgumentGraphNode *N : ArgumentSCC) { 1331 Argument *A = N->Definition; 1332 A->addAttr( 1333 Attribute::getWithCaptureInfo(A->getContext(), CaptureInfo::none())); 1334 ++NumNoCapture; 1335 Changed.insert(A->getParent()); 1336 } 1337 1338 // We also want to compute readonly/readnone/writeonly. With a small number 1339 // of false negatives, we can assume that any pointer which is captured 1340 // isn't going to be provably readonly or readnone, since by definition 1341 // we can't analyze all uses of a captured pointer. 1342 // 1343 // The false negatives happen when the pointer is captured by a function 1344 // that promises readonly/readnone behaviour on the pointer, then the 1345 // pointer's lifetime ends before anything that writes to arbitrary memory. 1346 // Also, a readonly/readnone pointer may be returned, but returning a 1347 // pointer is capturing it. 1348 1349 auto meetAccessAttr = [](Attribute::AttrKind A, Attribute::AttrKind B) { 1350 if (A == B) 1351 return A; 1352 if (A == Attribute::ReadNone) 1353 return B; 1354 if (B == Attribute::ReadNone) 1355 return A; 1356 return Attribute::None; 1357 }; 1358 1359 Attribute::AttrKind AccessAttr = Attribute::ReadNone; 1360 for (ArgumentGraphNode *N : ArgumentSCC) { 1361 Argument *A = N->Definition; 1362 Attribute::AttrKind K = determinePointerAccessAttrs(A, ArgumentSCCNodes); 1363 AccessAttr = meetAccessAttr(AccessAttr, K); 1364 if (AccessAttr == Attribute::None) 1365 break; 1366 } 1367 1368 if (AccessAttr != Attribute::None) { 1369 for (ArgumentGraphNode *N : ArgumentSCC) { 1370 Argument *A = N->Definition; 1371 if (addAccessAttr(A, AccessAttr)) 1372 Changed.insert(A->getParent()); 1373 } 1374 } 1375 } 1376 } 1377 1378 /// Tests whether a function is "malloc-like". 1379 /// 1380 /// A function is "malloc-like" if it returns either null or a pointer that 1381 /// doesn't alias any other pointer visible to the caller. 1382 static bool isFunctionMallocLike(Function *F, const SCCNodeSet &SCCNodes) { 1383 SmallSetVector<Value *, 8> FlowsToReturn; 1384 for (BasicBlock &BB : *F) 1385 if (ReturnInst *Ret = dyn_cast<ReturnInst>(BB.getTerminator())) 1386 FlowsToReturn.insert(Ret->getReturnValue()); 1387 1388 for (unsigned i = 0; i != FlowsToReturn.size(); ++i) { 1389 Value *RetVal = FlowsToReturn[i]; 1390 1391 if (Constant *C = dyn_cast<Constant>(RetVal)) { 1392 if (!C->isNullValue() && !isa<UndefValue>(C)) 1393 return false; 1394 1395 continue; 1396 } 1397 1398 if (isa<Argument>(RetVal)) 1399 return false; 1400 1401 if (Instruction *RVI = dyn_cast<Instruction>(RetVal)) 1402 switch (RVI->getOpcode()) { 1403 // Extend the analysis by looking upwards. 1404 case Instruction::BitCast: 1405 case Instruction::GetElementPtr: 1406 case Instruction::AddrSpaceCast: 1407 FlowsToReturn.insert(RVI->getOperand(0)); 1408 continue; 1409 case Instruction::Select: { 1410 SelectInst *SI = cast<SelectInst>(RVI); 1411 FlowsToReturn.insert(SI->getTrueValue()); 1412 FlowsToReturn.insert(SI->getFalseValue()); 1413 continue; 1414 } 1415 case Instruction::PHI: { 1416 PHINode *PN = cast<PHINode>(RVI); 1417 for (Value *IncValue : PN->incoming_values()) 1418 FlowsToReturn.insert(IncValue); 1419 continue; 1420 } 1421 1422 // Check whether the pointer came from an allocation. 1423 case Instruction::Alloca: 1424 break; 1425 case Instruction::Call: 1426 case Instruction::Invoke: { 1427 CallBase &CB = cast<CallBase>(*RVI); 1428 if (CB.hasRetAttr(Attribute::NoAlias)) 1429 break; 1430 if (CB.getCalledFunction() && SCCNodes.count(CB.getCalledFunction())) 1431 break; 1432 [[fallthrough]]; 1433 } 1434 default: 1435 return false; // Did not come from an allocation. 1436 } 1437 1438 if (PointerMayBeCaptured(RetVal, false, /*StoreCaptures=*/false)) 1439 return false; 1440 } 1441 1442 return true; 1443 } 1444 1445 /// Deduce noalias attributes for the SCC. 1446 static void addNoAliasAttrs(const SCCNodeSet &SCCNodes, 1447 SmallSet<Function *, 8> &Changed) { 1448 // Check each function in turn, determining which functions return noalias 1449 // pointers. 1450 for (Function *F : SCCNodes) { 1451 // Already noalias. 1452 if (F->returnDoesNotAlias()) 1453 continue; 1454 1455 // We can infer and propagate function attributes only when we know that the 1456 // definition we'll get at link time is *exactly* the definition we see now. 1457 // For more details, see GlobalValue::mayBeDerefined. 1458 if (!F->hasExactDefinition()) 1459 return; 1460 1461 // We annotate noalias return values, which are only applicable to 1462 // pointer types. 1463 if (!F->getReturnType()->isPointerTy()) 1464 continue; 1465 1466 if (!isFunctionMallocLike(F, SCCNodes)) 1467 return; 1468 } 1469 1470 for (Function *F : SCCNodes) { 1471 if (F->returnDoesNotAlias() || 1472 !F->getReturnType()->isPointerTy()) 1473 continue; 1474 1475 F->setReturnDoesNotAlias(); 1476 ++NumNoAlias; 1477 Changed.insert(F); 1478 } 1479 } 1480 1481 /// Tests whether this function is known to not return null. 1482 /// 1483 /// Requires that the function returns a pointer. 1484 /// 1485 /// Returns true if it believes the function will not return a null, and sets 1486 /// \p Speculative based on whether the returned conclusion is a speculative 1487 /// conclusion due to SCC calls. 1488 static bool isReturnNonNull(Function *F, const SCCNodeSet &SCCNodes, 1489 bool &Speculative) { 1490 assert(F->getReturnType()->isPointerTy() && 1491 "nonnull only meaningful on pointer types"); 1492 Speculative = false; 1493 1494 SmallSetVector<Value *, 8> FlowsToReturn; 1495 for (BasicBlock &BB : *F) 1496 if (auto *Ret = dyn_cast<ReturnInst>(BB.getTerminator())) 1497 FlowsToReturn.insert(Ret->getReturnValue()); 1498 1499 auto &DL = F->getDataLayout(); 1500 1501 for (unsigned i = 0; i != FlowsToReturn.size(); ++i) { 1502 Value *RetVal = FlowsToReturn[i]; 1503 1504 // If this value is locally known to be non-null, we're good 1505 if (isKnownNonZero(RetVal, DL)) 1506 continue; 1507 1508 // Otherwise, we need to look upwards since we can't make any local 1509 // conclusions. 1510 Instruction *RVI = dyn_cast<Instruction>(RetVal); 1511 if (!RVI) 1512 return false; 1513 switch (RVI->getOpcode()) { 1514 // Extend the analysis by looking upwards. 1515 case Instruction::BitCast: 1516 case Instruction::AddrSpaceCast: 1517 FlowsToReturn.insert(RVI->getOperand(0)); 1518 continue; 1519 case Instruction::GetElementPtr: 1520 if (cast<GEPOperator>(RVI)->isInBounds()) { 1521 FlowsToReturn.insert(RVI->getOperand(0)); 1522 continue; 1523 } 1524 return false; 1525 case Instruction::Select: { 1526 SelectInst *SI = cast<SelectInst>(RVI); 1527 FlowsToReturn.insert(SI->getTrueValue()); 1528 FlowsToReturn.insert(SI->getFalseValue()); 1529 continue; 1530 } 1531 case Instruction::PHI: { 1532 PHINode *PN = cast<PHINode>(RVI); 1533 for (int i = 0, e = PN->getNumIncomingValues(); i != e; ++i) 1534 FlowsToReturn.insert(PN->getIncomingValue(i)); 1535 continue; 1536 } 1537 case Instruction::Call: 1538 case Instruction::Invoke: { 1539 CallBase &CB = cast<CallBase>(*RVI); 1540 Function *Callee = CB.getCalledFunction(); 1541 // A call to a node within the SCC is assumed to return null until 1542 // proven otherwise 1543 if (Callee && SCCNodes.count(Callee)) { 1544 Speculative = true; 1545 continue; 1546 } 1547 return false; 1548 } 1549 default: 1550 return false; // Unknown source, may be null 1551 }; 1552 llvm_unreachable("should have either continued or returned"); 1553 } 1554 1555 return true; 1556 } 1557 1558 /// Deduce nonnull attributes for the SCC. 1559 static void addNonNullAttrs(const SCCNodeSet &SCCNodes, 1560 SmallSet<Function *, 8> &Changed) { 1561 // Speculative that all functions in the SCC return only nonnull 1562 // pointers. We may refute this as we analyze functions. 1563 bool SCCReturnsNonNull = true; 1564 1565 // Check each function in turn, determining which functions return nonnull 1566 // pointers. 1567 for (Function *F : SCCNodes) { 1568 // Already nonnull. 1569 if (F->getAttributes().hasRetAttr(Attribute::NonNull)) 1570 continue; 1571 1572 // We can infer and propagate function attributes only when we know that the 1573 // definition we'll get at link time is *exactly* the definition we see now. 1574 // For more details, see GlobalValue::mayBeDerefined. 1575 if (!F->hasExactDefinition()) 1576 return; 1577 1578 // We annotate nonnull return values, which are only applicable to 1579 // pointer types. 1580 if (!F->getReturnType()->isPointerTy()) 1581 continue; 1582 1583 bool Speculative = false; 1584 if (isReturnNonNull(F, SCCNodes, Speculative)) { 1585 if (!Speculative) { 1586 // Mark the function eagerly since we may discover a function 1587 // which prevents us from speculating about the entire SCC 1588 LLVM_DEBUG(dbgs() << "Eagerly marking " << F->getName() 1589 << " as nonnull\n"); 1590 F->addRetAttr(Attribute::NonNull); 1591 ++NumNonNullReturn; 1592 Changed.insert(F); 1593 } 1594 continue; 1595 } 1596 // At least one function returns something which could be null, can't 1597 // speculate any more. 1598 SCCReturnsNonNull = false; 1599 } 1600 1601 if (SCCReturnsNonNull) { 1602 for (Function *F : SCCNodes) { 1603 if (F->getAttributes().hasRetAttr(Attribute::NonNull) || 1604 !F->getReturnType()->isPointerTy()) 1605 continue; 1606 1607 LLVM_DEBUG(dbgs() << "SCC marking " << F->getName() << " as nonnull\n"); 1608 F->addRetAttr(Attribute::NonNull); 1609 ++NumNonNullReturn; 1610 Changed.insert(F); 1611 } 1612 } 1613 } 1614 1615 /// Deduce noundef attributes for the SCC. 1616 static void addNoUndefAttrs(const SCCNodeSet &SCCNodes, 1617 SmallSet<Function *, 8> &Changed) { 1618 // Check each function in turn, determining which functions return noundef 1619 // values. 1620 for (Function *F : SCCNodes) { 1621 // Already noundef. 1622 AttributeList Attrs = F->getAttributes(); 1623 if (Attrs.hasRetAttr(Attribute::NoUndef)) 1624 continue; 1625 1626 // We can infer and propagate function attributes only when we know that the 1627 // definition we'll get at link time is *exactly* the definition we see now. 1628 // For more details, see GlobalValue::mayBeDerefined. 1629 if (!F->hasExactDefinition()) 1630 return; 1631 1632 // MemorySanitizer assumes that the definition and declaration of a 1633 // function will be consistent. A function with sanitize_memory attribute 1634 // should be skipped from inference. 1635 if (F->hasFnAttribute(Attribute::SanitizeMemory)) 1636 continue; 1637 1638 if (F->getReturnType()->isVoidTy()) 1639 continue; 1640 1641 const DataLayout &DL = F->getDataLayout(); 1642 if (all_of(*F, [&](BasicBlock &BB) { 1643 if (auto *Ret = dyn_cast<ReturnInst>(BB.getTerminator())) { 1644 // TODO: perform context-sensitive analysis? 1645 Value *RetVal = Ret->getReturnValue(); 1646 if (!isGuaranteedNotToBeUndefOrPoison(RetVal)) 1647 return false; 1648 1649 // We know the original return value is not poison now, but it 1650 // could still be converted to poison by another return attribute. 1651 // Try to explicitly re-prove the relevant attributes. 1652 if (Attrs.hasRetAttr(Attribute::NonNull) && 1653 !isKnownNonZero(RetVal, DL)) 1654 return false; 1655 1656 if (MaybeAlign Align = Attrs.getRetAlignment()) 1657 if (RetVal->getPointerAlignment(DL) < *Align) 1658 return false; 1659 1660 Attribute Attr = Attrs.getRetAttr(Attribute::Range); 1661 if (Attr.isValid() && 1662 !Attr.getRange().contains( 1663 computeConstantRange(RetVal, /*ForSigned=*/false))) 1664 return false; 1665 } 1666 return true; 1667 })) { 1668 F->addRetAttr(Attribute::NoUndef); 1669 ++NumNoUndefReturn; 1670 Changed.insert(F); 1671 } 1672 } 1673 } 1674 1675 namespace { 1676 1677 /// Collects a set of attribute inference requests and performs them all in one 1678 /// go on a single SCC Node. Inference involves scanning function bodies 1679 /// looking for instructions that violate attribute assumptions. 1680 /// As soon as all the bodies are fine we are free to set the attribute. 1681 /// Customization of inference for individual attributes is performed by 1682 /// providing a handful of predicates for each attribute. 1683 class AttributeInferer { 1684 public: 1685 /// Describes a request for inference of a single attribute. 1686 struct InferenceDescriptor { 1687 1688 /// Returns true if this function does not have to be handled. 1689 /// General intent for this predicate is to provide an optimization 1690 /// for functions that do not need this attribute inference at all 1691 /// (say, for functions that already have the attribute). 1692 std::function<bool(const Function &)> SkipFunction; 1693 1694 /// Returns true if this instruction violates attribute assumptions. 1695 std::function<bool(Instruction &)> InstrBreaksAttribute; 1696 1697 /// Sets the inferred attribute for this function. 1698 std::function<void(Function &)> SetAttribute; 1699 1700 /// Attribute we derive. 1701 Attribute::AttrKind AKind; 1702 1703 /// If true, only "exact" definitions can be used to infer this attribute. 1704 /// See GlobalValue::isDefinitionExact. 1705 bool RequiresExactDefinition; 1706 1707 InferenceDescriptor(Attribute::AttrKind AK, 1708 std::function<bool(const Function &)> SkipFunc, 1709 std::function<bool(Instruction &)> InstrScan, 1710 std::function<void(Function &)> SetAttr, 1711 bool ReqExactDef) 1712 : SkipFunction(SkipFunc), InstrBreaksAttribute(InstrScan), 1713 SetAttribute(SetAttr), AKind(AK), 1714 RequiresExactDefinition(ReqExactDef) {} 1715 }; 1716 1717 private: 1718 SmallVector<InferenceDescriptor, 4> InferenceDescriptors; 1719 1720 public: 1721 void registerAttrInference(InferenceDescriptor AttrInference) { 1722 InferenceDescriptors.push_back(AttrInference); 1723 } 1724 1725 void run(const SCCNodeSet &SCCNodes, SmallSet<Function *, 8> &Changed); 1726 }; 1727 1728 /// Perform all the requested attribute inference actions according to the 1729 /// attribute predicates stored before. 1730 void AttributeInferer::run(const SCCNodeSet &SCCNodes, 1731 SmallSet<Function *, 8> &Changed) { 1732 SmallVector<InferenceDescriptor, 4> InferInSCC = InferenceDescriptors; 1733 // Go through all the functions in SCC and check corresponding attribute 1734 // assumptions for each of them. Attributes that are invalid for this SCC 1735 // will be removed from InferInSCC. 1736 for (Function *F : SCCNodes) { 1737 1738 // No attributes whose assumptions are still valid - done. 1739 if (InferInSCC.empty()) 1740 return; 1741 1742 // Check if our attributes ever need scanning/can be scanned. 1743 llvm::erase_if(InferInSCC, [F](const InferenceDescriptor &ID) { 1744 if (ID.SkipFunction(*F)) 1745 return false; 1746 1747 // Remove from further inference (invalidate) when visiting a function 1748 // that has no instructions to scan/has an unsuitable definition. 1749 return F->isDeclaration() || 1750 (ID.RequiresExactDefinition && !F->hasExactDefinition()); 1751 }); 1752 1753 // For each attribute still in InferInSCC that doesn't explicitly skip F, 1754 // set up the F instructions scan to verify assumptions of the attribute. 1755 SmallVector<InferenceDescriptor, 4> InferInThisFunc; 1756 llvm::copy_if( 1757 InferInSCC, std::back_inserter(InferInThisFunc), 1758 [F](const InferenceDescriptor &ID) { return !ID.SkipFunction(*F); }); 1759 1760 if (InferInThisFunc.empty()) 1761 continue; 1762 1763 // Start instruction scan. 1764 for (Instruction &I : instructions(*F)) { 1765 llvm::erase_if(InferInThisFunc, [&](const InferenceDescriptor &ID) { 1766 if (!ID.InstrBreaksAttribute(I)) 1767 return false; 1768 // Remove attribute from further inference on any other functions 1769 // because attribute assumptions have just been violated. 1770 llvm::erase_if(InferInSCC, [&ID](const InferenceDescriptor &D) { 1771 return D.AKind == ID.AKind; 1772 }); 1773 // Remove attribute from the rest of current instruction scan. 1774 return true; 1775 }); 1776 1777 if (InferInThisFunc.empty()) 1778 break; 1779 } 1780 } 1781 1782 if (InferInSCC.empty()) 1783 return; 1784 1785 for (Function *F : SCCNodes) 1786 // At this point InferInSCC contains only functions that were either: 1787 // - explicitly skipped from scan/inference, or 1788 // - verified to have no instructions that break attribute assumptions. 1789 // Hence we just go and force the attribute for all non-skipped functions. 1790 for (auto &ID : InferInSCC) { 1791 if (ID.SkipFunction(*F)) 1792 continue; 1793 Changed.insert(F); 1794 ID.SetAttribute(*F); 1795 } 1796 } 1797 1798 struct SCCNodesResult { 1799 SCCNodeSet SCCNodes; 1800 bool HasUnknownCall; 1801 }; 1802 1803 } // end anonymous namespace 1804 1805 /// Helper for non-Convergent inference predicate InstrBreaksAttribute. 1806 static bool InstrBreaksNonConvergent(Instruction &I, 1807 const SCCNodeSet &SCCNodes) { 1808 const CallBase *CB = dyn_cast<CallBase>(&I); 1809 // Breaks non-convergent assumption if CS is a convergent call to a function 1810 // not in the SCC. 1811 return CB && CB->isConvergent() && 1812 !SCCNodes.contains(CB->getCalledFunction()); 1813 } 1814 1815 /// Helper for NoUnwind inference predicate InstrBreaksAttribute. 1816 static bool InstrBreaksNonThrowing(Instruction &I, const SCCNodeSet &SCCNodes) { 1817 if (!I.mayThrow(/* IncludePhaseOneUnwind */ true)) 1818 return false; 1819 if (const auto *CI = dyn_cast<CallInst>(&I)) { 1820 if (Function *Callee = CI->getCalledFunction()) { 1821 // I is a may-throw call to a function inside our SCC. This doesn't 1822 // invalidate our current working assumption that the SCC is no-throw; we 1823 // just have to scan that other function. 1824 if (SCCNodes.contains(Callee)) 1825 return false; 1826 } 1827 } 1828 return true; 1829 } 1830 1831 /// Helper for NoFree inference predicate InstrBreaksAttribute. 1832 static bool InstrBreaksNoFree(Instruction &I, const SCCNodeSet &SCCNodes) { 1833 CallBase *CB = dyn_cast<CallBase>(&I); 1834 if (!CB) 1835 return false; 1836 1837 if (CB->hasFnAttr(Attribute::NoFree)) 1838 return false; 1839 1840 // Speculatively assume in SCC. 1841 if (Function *Callee = CB->getCalledFunction()) 1842 if (SCCNodes.contains(Callee)) 1843 return false; 1844 1845 return true; 1846 } 1847 1848 // Return true if this is an atomic which has an ordering stronger than 1849 // unordered. Note that this is different than the predicate we use in 1850 // Attributor. Here we chose to be conservative and consider monotonic 1851 // operations potentially synchronizing. We generally don't do much with 1852 // monotonic operations, so this is simply risk reduction. 1853 static bool isOrderedAtomic(Instruction *I) { 1854 if (!I->isAtomic()) 1855 return false; 1856 1857 if (auto *FI = dyn_cast<FenceInst>(I)) 1858 // All legal orderings for fence are stronger than monotonic. 1859 return FI->getSyncScopeID() != SyncScope::SingleThread; 1860 else if (isa<AtomicCmpXchgInst>(I) || isa<AtomicRMWInst>(I)) 1861 return true; 1862 else if (auto *SI = dyn_cast<StoreInst>(I)) 1863 return !SI->isUnordered(); 1864 else if (auto *LI = dyn_cast<LoadInst>(I)) 1865 return !LI->isUnordered(); 1866 else { 1867 llvm_unreachable("unknown atomic instruction?"); 1868 } 1869 } 1870 1871 static bool InstrBreaksNoSync(Instruction &I, const SCCNodeSet &SCCNodes) { 1872 // Volatile may synchronize 1873 if (I.isVolatile()) 1874 return true; 1875 1876 // An ordered atomic may synchronize. (See comment about on monotonic.) 1877 if (isOrderedAtomic(&I)) 1878 return true; 1879 1880 auto *CB = dyn_cast<CallBase>(&I); 1881 if (!CB) 1882 // Non call site cases covered by the two checks above 1883 return false; 1884 1885 if (CB->hasFnAttr(Attribute::NoSync)) 1886 return false; 1887 1888 // Non volatile memset/memcpy/memmoves are nosync 1889 // NOTE: Only intrinsics with volatile flags should be handled here. All 1890 // others should be marked in Intrinsics.td. 1891 if (auto *MI = dyn_cast<MemIntrinsic>(&I)) 1892 if (!MI->isVolatile()) 1893 return false; 1894 1895 // Speculatively assume in SCC. 1896 if (Function *Callee = CB->getCalledFunction()) 1897 if (SCCNodes.contains(Callee)) 1898 return false; 1899 1900 return true; 1901 } 1902 1903 /// Attempt to remove convergent function attribute when possible. 1904 /// 1905 /// Returns true if any changes to function attributes were made. 1906 static void inferConvergent(const SCCNodeSet &SCCNodes, 1907 SmallSet<Function *, 8> &Changed) { 1908 AttributeInferer AI; 1909 1910 // Request to remove the convergent attribute from all functions in the SCC 1911 // if every callsite within the SCC is not convergent (except for calls 1912 // to functions within the SCC). 1913 // Note: Removal of the attr from the callsites will happen in 1914 // InstCombineCalls separately. 1915 AI.registerAttrInference(AttributeInferer::InferenceDescriptor{ 1916 Attribute::Convergent, 1917 // Skip non-convergent functions. 1918 [](const Function &F) { return !F.isConvergent(); }, 1919 // Instructions that break non-convergent assumption. 1920 [SCCNodes](Instruction &I) { 1921 return InstrBreaksNonConvergent(I, SCCNodes); 1922 }, 1923 [](Function &F) { 1924 LLVM_DEBUG(dbgs() << "Removing convergent attr from fn " << F.getName() 1925 << "\n"); 1926 F.setNotConvergent(); 1927 }, 1928 /* RequiresExactDefinition= */ false}); 1929 // Perform all the requested attribute inference actions. 1930 AI.run(SCCNodes, Changed); 1931 } 1932 1933 /// Infer attributes from all functions in the SCC by scanning every 1934 /// instruction for compliance to the attribute assumptions. 1935 /// 1936 /// Returns true if any changes to function attributes were made. 1937 static void inferAttrsFromFunctionBodies(const SCCNodeSet &SCCNodes, 1938 SmallSet<Function *, 8> &Changed) { 1939 AttributeInferer AI; 1940 1941 if (!DisableNoUnwindInference) 1942 // Request to infer nounwind attribute for all the functions in the SCC if 1943 // every callsite within the SCC is not throwing (except for calls to 1944 // functions within the SCC). Note that nounwind attribute suffers from 1945 // derefinement - results may change depending on how functions are 1946 // optimized. Thus it can be inferred only from exact definitions. 1947 AI.registerAttrInference(AttributeInferer::InferenceDescriptor{ 1948 Attribute::NoUnwind, 1949 // Skip non-throwing functions. 1950 [](const Function &F) { return F.doesNotThrow(); }, 1951 // Instructions that break non-throwing assumption. 1952 [&SCCNodes](Instruction &I) { 1953 return InstrBreaksNonThrowing(I, SCCNodes); 1954 }, 1955 [](Function &F) { 1956 LLVM_DEBUG(dbgs() 1957 << "Adding nounwind attr to fn " << F.getName() << "\n"); 1958 F.setDoesNotThrow(); 1959 ++NumNoUnwind; 1960 }, 1961 /* RequiresExactDefinition= */ true}); 1962 1963 if (!DisableNoFreeInference) 1964 // Request to infer nofree attribute for all the functions in the SCC if 1965 // every callsite within the SCC does not directly or indirectly free 1966 // memory (except for calls to functions within the SCC). Note that nofree 1967 // attribute suffers from derefinement - results may change depending on 1968 // how functions are optimized. Thus it can be inferred only from exact 1969 // definitions. 1970 AI.registerAttrInference(AttributeInferer::InferenceDescriptor{ 1971 Attribute::NoFree, 1972 // Skip functions known not to free memory. 1973 [](const Function &F) { return F.doesNotFreeMemory(); }, 1974 // Instructions that break non-deallocating assumption. 1975 [&SCCNodes](Instruction &I) { 1976 return InstrBreaksNoFree(I, SCCNodes); 1977 }, 1978 [](Function &F) { 1979 LLVM_DEBUG(dbgs() 1980 << "Adding nofree attr to fn " << F.getName() << "\n"); 1981 F.setDoesNotFreeMemory(); 1982 ++NumNoFree; 1983 }, 1984 /* RequiresExactDefinition= */ true}); 1985 1986 AI.registerAttrInference(AttributeInferer::InferenceDescriptor{ 1987 Attribute::NoSync, 1988 // Skip already marked functions. 1989 [](const Function &F) { return F.hasNoSync(); }, 1990 // Instructions that break nosync assumption. 1991 [&SCCNodes](Instruction &I) { 1992 return InstrBreaksNoSync(I, SCCNodes); 1993 }, 1994 [](Function &F) { 1995 LLVM_DEBUG(dbgs() 1996 << "Adding nosync attr to fn " << F.getName() << "\n"); 1997 F.setNoSync(); 1998 ++NumNoSync; 1999 }, 2000 /* RequiresExactDefinition= */ true}); 2001 2002 // Perform all the requested attribute inference actions. 2003 AI.run(SCCNodes, Changed); 2004 } 2005 2006 static void addNoRecurseAttrs(const SCCNodeSet &SCCNodes, 2007 SmallSet<Function *, 8> &Changed) { 2008 // Try and identify functions that do not recurse. 2009 2010 // If the SCC contains multiple nodes we know for sure there is recursion. 2011 if (SCCNodes.size() != 1) 2012 return; 2013 2014 Function *F = *SCCNodes.begin(); 2015 if (!F || !F->hasExactDefinition() || F->doesNotRecurse()) 2016 return; 2017 2018 // If all of the calls in F are identifiable and are to norecurse functions, F 2019 // is norecurse. This check also detects self-recursion as F is not currently 2020 // marked norecurse, so any called from F to F will not be marked norecurse. 2021 for (auto &BB : *F) 2022 for (auto &I : BB.instructionsWithoutDebug()) 2023 if (auto *CB = dyn_cast<CallBase>(&I)) { 2024 Function *Callee = CB->getCalledFunction(); 2025 if (!Callee || Callee == F || 2026 (!Callee->doesNotRecurse() && 2027 !(Callee->isDeclaration() && 2028 Callee->hasFnAttribute(Attribute::NoCallback)))) 2029 // Function calls a potentially recursive function. 2030 return; 2031 } 2032 2033 // Every call was to a non-recursive function other than this function, and 2034 // we have no indirect recursion as the SCC size is one. This function cannot 2035 // recurse. 2036 F->setDoesNotRecurse(); 2037 ++NumNoRecurse; 2038 Changed.insert(F); 2039 } 2040 2041 static bool instructionDoesNotReturn(Instruction &I) { 2042 if (auto *CB = dyn_cast<CallBase>(&I)) 2043 return CB->hasFnAttr(Attribute::NoReturn); 2044 return false; 2045 } 2046 2047 // A basic block can only return if it terminates with a ReturnInst and does not 2048 // contain calls to noreturn functions. 2049 static bool basicBlockCanReturn(BasicBlock &BB) { 2050 if (!isa<ReturnInst>(BB.getTerminator())) 2051 return false; 2052 return none_of(BB, instructionDoesNotReturn); 2053 } 2054 2055 // FIXME: this doesn't handle recursion. 2056 static bool canReturn(Function &F) { 2057 SmallVector<BasicBlock *, 16> Worklist; 2058 SmallPtrSet<BasicBlock *, 16> Visited; 2059 2060 Visited.insert(&F.front()); 2061 Worklist.push_back(&F.front()); 2062 2063 do { 2064 BasicBlock *BB = Worklist.pop_back_val(); 2065 if (basicBlockCanReturn(*BB)) 2066 return true; 2067 for (BasicBlock *Succ : successors(BB)) 2068 if (Visited.insert(Succ).second) 2069 Worklist.push_back(Succ); 2070 } while (!Worklist.empty()); 2071 2072 return false; 2073 } 2074 2075 2076 // Set the noreturn function attribute if possible. 2077 static void addNoReturnAttrs(const SCCNodeSet &SCCNodes, 2078 SmallSet<Function *, 8> &Changed) { 2079 for (Function *F : SCCNodes) { 2080 if (!F || !F->hasExactDefinition() || F->hasFnAttribute(Attribute::Naked) || 2081 F->doesNotReturn()) 2082 continue; 2083 2084 if (!canReturn(*F)) { 2085 F->setDoesNotReturn(); 2086 Changed.insert(F); 2087 } 2088 } 2089 } 2090 2091 static bool allPathsGoThroughCold(Function &F) { 2092 SmallDenseMap<BasicBlock *, bool, 16> ColdPaths; 2093 ColdPaths[&F.front()] = false; 2094 SmallVector<BasicBlock *> Jobs; 2095 Jobs.push_back(&F.front()); 2096 2097 while (!Jobs.empty()) { 2098 BasicBlock *BB = Jobs.pop_back_val(); 2099 2100 // If block contains a cold callsite this path through the CG is cold. 2101 // Ignore whether the instructions actually are guaranteed to transfer 2102 // execution. Divergent behavior is considered unlikely. 2103 if (any_of(*BB, [](Instruction &I) { 2104 if (auto *CB = dyn_cast<CallBase>(&I)) 2105 return CB->hasFnAttr(Attribute::Cold); 2106 return false; 2107 })) { 2108 ColdPaths[BB] = true; 2109 continue; 2110 } 2111 2112 auto Succs = successors(BB); 2113 // We found a path that doesn't go through any cold callsite. 2114 if (Succs.empty()) 2115 return false; 2116 2117 // We didn't find a cold callsite in this BB, so check that all successors 2118 // contain a cold callsite (or that their successors do). 2119 // Potential TODO: We could use static branch hints to assume certain 2120 // successor paths are inherently cold, irrespective of if they contain a 2121 // cold callsite. 2122 for (BasicBlock *Succ : Succs) { 2123 // Start with false, this is necessary to ensure we don't turn loops into 2124 // cold. 2125 auto [Iter, Inserted] = ColdPaths.try_emplace(Succ, false); 2126 if (!Inserted) { 2127 if (Iter->second) 2128 continue; 2129 return false; 2130 } 2131 Jobs.push_back(Succ); 2132 } 2133 } 2134 return true; 2135 } 2136 2137 // Set the cold function attribute if possible. 2138 static void addColdAttrs(const SCCNodeSet &SCCNodes, 2139 SmallSet<Function *, 8> &Changed) { 2140 for (Function *F : SCCNodes) { 2141 if (!F || !F->hasExactDefinition() || F->hasFnAttribute(Attribute::Naked) || 2142 F->hasFnAttribute(Attribute::Cold) || F->hasFnAttribute(Attribute::Hot)) 2143 continue; 2144 2145 // Potential TODO: We could add attribute `cold` on functions with `coldcc`. 2146 if (allPathsGoThroughCold(*F)) { 2147 F->addFnAttr(Attribute::Cold); 2148 ++NumCold; 2149 Changed.insert(F); 2150 continue; 2151 } 2152 } 2153 } 2154 2155 static bool functionWillReturn(const Function &F) { 2156 // We can infer and propagate function attributes only when we know that the 2157 // definition we'll get at link time is *exactly* the definition we see now. 2158 // For more details, see GlobalValue::mayBeDerefined. 2159 if (!F.hasExactDefinition()) 2160 return false; 2161 2162 // Must-progress function without side-effects must return. 2163 if (F.mustProgress() && F.onlyReadsMemory()) 2164 return true; 2165 2166 // Can only analyze functions with a definition. 2167 if (F.isDeclaration()) 2168 return false; 2169 2170 // Functions with loops require more sophisticated analysis, as the loop 2171 // may be infinite. For now, don't try to handle them. 2172 SmallVector<std::pair<const BasicBlock *, const BasicBlock *>> Backedges; 2173 FindFunctionBackedges(F, Backedges); 2174 if (!Backedges.empty()) 2175 return false; 2176 2177 // If there are no loops, then the function is willreturn if all calls in 2178 // it are willreturn. 2179 return all_of(instructions(F), [](const Instruction &I) { 2180 return I.willReturn(); 2181 }); 2182 } 2183 2184 // Set the willreturn function attribute if possible. 2185 static void addWillReturn(const SCCNodeSet &SCCNodes, 2186 SmallSet<Function *, 8> &Changed) { 2187 for (Function *F : SCCNodes) { 2188 if (!F || F->willReturn() || !functionWillReturn(*F)) 2189 continue; 2190 2191 F->setWillReturn(); 2192 NumWillReturn++; 2193 Changed.insert(F); 2194 } 2195 } 2196 2197 static SCCNodesResult createSCCNodeSet(ArrayRef<Function *> Functions) { 2198 SCCNodesResult Res; 2199 Res.HasUnknownCall = false; 2200 for (Function *F : Functions) { 2201 if (!F || F->hasOptNone() || F->hasFnAttribute(Attribute::Naked) || 2202 F->isPresplitCoroutine()) { 2203 // Treat any function we're trying not to optimize as if it were an 2204 // indirect call and omit it from the node set used below. 2205 Res.HasUnknownCall = true; 2206 continue; 2207 } 2208 // Track whether any functions in this SCC have an unknown call edge. 2209 // Note: if this is ever a performance hit, we can common it with 2210 // subsequent routines which also do scans over the instructions of the 2211 // function. 2212 if (!Res.HasUnknownCall) { 2213 for (Instruction &I : instructions(*F)) { 2214 if (auto *CB = dyn_cast<CallBase>(&I)) { 2215 if (!CB->getCalledFunction()) { 2216 Res.HasUnknownCall = true; 2217 break; 2218 } 2219 } 2220 } 2221 } 2222 Res.SCCNodes.insert(F); 2223 } 2224 return Res; 2225 } 2226 2227 template <typename AARGetterT> 2228 static SmallSet<Function *, 8> 2229 deriveAttrsInPostOrder(ArrayRef<Function *> Functions, AARGetterT &&AARGetter, 2230 bool ArgAttrsOnly) { 2231 SCCNodesResult Nodes = createSCCNodeSet(Functions); 2232 2233 // Bail if the SCC only contains optnone functions. 2234 if (Nodes.SCCNodes.empty()) 2235 return {}; 2236 2237 SmallSet<Function *, 8> Changed; 2238 if (ArgAttrsOnly) { 2239 // ArgAttrsOnly means to only infer attributes that may aid optimizations 2240 // on the *current* function. "initializes" attribute is to aid 2241 // optimizations (like DSE) on the callers, so skip "initializes" here. 2242 addArgumentAttrs(Nodes.SCCNodes, Changed, /*SkipInitializes=*/true); 2243 return Changed; 2244 } 2245 2246 addArgumentReturnedAttrs(Nodes.SCCNodes, Changed); 2247 addMemoryAttrs(Nodes.SCCNodes, AARGetter, Changed); 2248 addArgumentAttrs(Nodes.SCCNodes, Changed, /*SkipInitializes=*/false); 2249 inferConvergent(Nodes.SCCNodes, Changed); 2250 addNoReturnAttrs(Nodes.SCCNodes, Changed); 2251 addColdAttrs(Nodes.SCCNodes, Changed); 2252 addWillReturn(Nodes.SCCNodes, Changed); 2253 addNoUndefAttrs(Nodes.SCCNodes, Changed); 2254 2255 // If we have no external nodes participating in the SCC, we can deduce some 2256 // more precise attributes as well. 2257 if (!Nodes.HasUnknownCall) { 2258 addNoAliasAttrs(Nodes.SCCNodes, Changed); 2259 addNonNullAttrs(Nodes.SCCNodes, Changed); 2260 inferAttrsFromFunctionBodies(Nodes.SCCNodes, Changed); 2261 addNoRecurseAttrs(Nodes.SCCNodes, Changed); 2262 } 2263 2264 // Finally, infer the maximal set of attributes from the ones we've inferred 2265 // above. This is handling the cases where one attribute on a signature 2266 // implies another, but for implementation reasons the inference rule for 2267 // the later is missing (or simply less sophisticated). 2268 for (Function *F : Nodes.SCCNodes) 2269 if (F) 2270 if (inferAttributesFromOthers(*F)) 2271 Changed.insert(F); 2272 2273 return Changed; 2274 } 2275 2276 PreservedAnalyses PostOrderFunctionAttrsPass::run(LazyCallGraph::SCC &C, 2277 CGSCCAnalysisManager &AM, 2278 LazyCallGraph &CG, 2279 CGSCCUpdateResult &) { 2280 // Skip non-recursive functions if requested. 2281 // Only infer argument attributes for non-recursive functions, because 2282 // it can affect optimization behavior in conjunction with noalias. 2283 bool ArgAttrsOnly = false; 2284 if (C.size() == 1 && SkipNonRecursive) { 2285 LazyCallGraph::Node &N = *C.begin(); 2286 if (!N->lookup(N)) 2287 ArgAttrsOnly = true; 2288 } 2289 2290 FunctionAnalysisManager &FAM = 2291 AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager(); 2292 2293 // We pass a lambda into functions to wire them up to the analysis manager 2294 // for getting function analyses. 2295 auto AARGetter = [&](Function &F) -> AAResults & { 2296 return FAM.getResult<AAManager>(F); 2297 }; 2298 2299 SmallVector<Function *, 8> Functions; 2300 for (LazyCallGraph::Node &N : C) { 2301 Functions.push_back(&N.getFunction()); 2302 } 2303 2304 auto ChangedFunctions = 2305 deriveAttrsInPostOrder(Functions, AARGetter, ArgAttrsOnly); 2306 if (ChangedFunctions.empty()) 2307 return PreservedAnalyses::all(); 2308 2309 // Invalidate analyses for modified functions so that we don't have to 2310 // invalidate all analyses for all functions in this SCC. 2311 PreservedAnalyses FuncPA; 2312 // We haven't changed the CFG for modified functions. 2313 FuncPA.preserveSet<CFGAnalyses>(); 2314 for (Function *Changed : ChangedFunctions) { 2315 FAM.invalidate(*Changed, FuncPA); 2316 // Also invalidate any direct callers of changed functions since analyses 2317 // may care about attributes of direct callees. For example, MemorySSA cares 2318 // about whether or not a call's callee modifies memory and queries that 2319 // through function attributes. 2320 for (auto *U : Changed->users()) { 2321 if (auto *Call = dyn_cast<CallBase>(U)) { 2322 if (Call->getCalledFunction() == Changed) 2323 FAM.invalidate(*Call->getFunction(), FuncPA); 2324 } 2325 } 2326 } 2327 2328 PreservedAnalyses PA; 2329 // We have not added or removed functions. 2330 PA.preserve<FunctionAnalysisManagerCGSCCProxy>(); 2331 // We already invalidated all relevant function analyses above. 2332 PA.preserveSet<AllAnalysesOn<Function>>(); 2333 return PA; 2334 } 2335 2336 void PostOrderFunctionAttrsPass::printPipeline( 2337 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) { 2338 static_cast<PassInfoMixin<PostOrderFunctionAttrsPass> *>(this)->printPipeline( 2339 OS, MapClassName2PassName); 2340 if (SkipNonRecursive) 2341 OS << "<skip-non-recursive-function-attrs>"; 2342 } 2343 2344 template <typename AARGetterT> 2345 static bool runImpl(CallGraphSCC &SCC, AARGetterT AARGetter) { 2346 SmallVector<Function *, 8> Functions; 2347 for (CallGraphNode *I : SCC) { 2348 Functions.push_back(I->getFunction()); 2349 } 2350 2351 return !deriveAttrsInPostOrder(Functions, AARGetter).empty(); 2352 } 2353 2354 static bool addNoRecurseAttrsTopDown(Function &F) { 2355 // We check the preconditions for the function prior to calling this to avoid 2356 // the cost of building up a reversible post-order list. We assert them here 2357 // to make sure none of the invariants this relies on were violated. 2358 assert(!F.isDeclaration() && "Cannot deduce norecurse without a definition!"); 2359 assert(!F.doesNotRecurse() && 2360 "This function has already been deduced as norecurs!"); 2361 assert(F.hasInternalLinkage() && 2362 "Can only do top-down deduction for internal linkage functions!"); 2363 2364 // If F is internal and all of its uses are calls from a non-recursive 2365 // functions, then none of its calls could in fact recurse without going 2366 // through a function marked norecurse, and so we can mark this function too 2367 // as norecurse. Note that the uses must actually be calls -- otherwise 2368 // a pointer to this function could be returned from a norecurse function but 2369 // this function could be recursively (indirectly) called. Note that this 2370 // also detects if F is directly recursive as F is not yet marked as 2371 // a norecurse function. 2372 for (auto &U : F.uses()) { 2373 auto *I = dyn_cast<Instruction>(U.getUser()); 2374 if (!I) 2375 return false; 2376 CallBase *CB = dyn_cast<CallBase>(I); 2377 if (!CB || !CB->isCallee(&U) || 2378 !CB->getParent()->getParent()->doesNotRecurse()) 2379 return false; 2380 } 2381 F.setDoesNotRecurse(); 2382 ++NumNoRecurse; 2383 return true; 2384 } 2385 2386 static bool deduceFunctionAttributeInRPO(Module &M, LazyCallGraph &CG) { 2387 // We only have a post-order SCC traversal (because SCCs are inherently 2388 // discovered in post-order), so we accumulate them in a vector and then walk 2389 // it in reverse. This is simpler than using the RPO iterator infrastructure 2390 // because we need to combine SCC detection and the PO walk of the call 2391 // graph. We can also cheat egregiously because we're primarily interested in 2392 // synthesizing norecurse and so we can only save the singular SCCs as SCCs 2393 // with multiple functions in them will clearly be recursive. 2394 2395 SmallVector<Function *, 16> Worklist; 2396 CG.buildRefSCCs(); 2397 for (LazyCallGraph::RefSCC &RC : CG.postorder_ref_sccs()) { 2398 for (LazyCallGraph::SCC &SCC : RC) { 2399 if (SCC.size() != 1) 2400 continue; 2401 Function &F = SCC.begin()->getFunction(); 2402 if (!F.isDeclaration() && !F.doesNotRecurse() && F.hasInternalLinkage()) 2403 Worklist.push_back(&F); 2404 } 2405 } 2406 bool Changed = false; 2407 for (auto *F : llvm::reverse(Worklist)) 2408 Changed |= addNoRecurseAttrsTopDown(*F); 2409 2410 return Changed; 2411 } 2412 2413 PreservedAnalyses 2414 ReversePostOrderFunctionAttrsPass::run(Module &M, ModuleAnalysisManager &AM) { 2415 auto &CG = AM.getResult<LazyCallGraphAnalysis>(M); 2416 2417 if (!deduceFunctionAttributeInRPO(M, CG)) 2418 return PreservedAnalyses::all(); 2419 2420 PreservedAnalyses PA; 2421 PA.preserve<LazyCallGraphAnalysis>(); 2422 return PA; 2423 } 2424