1 //===- InlineFunction.cpp - Code to perform function inlining -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements inlining of a function into a call site, resolving 10 // parameters and the return value as appropriate. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/ADT/DenseMap.h" 15 #include "llvm/ADT/STLExtras.h" 16 #include "llvm/ADT/SetVector.h" 17 #include "llvm/ADT/SmallPtrSet.h" 18 #include "llvm/ADT/SmallVector.h" 19 #include "llvm/ADT/StringExtras.h" 20 #include "llvm/ADT/iterator_range.h" 21 #include "llvm/Analysis/AliasAnalysis.h" 22 #include "llvm/Analysis/AssumptionCache.h" 23 #include "llvm/Analysis/BlockFrequencyInfo.h" 24 #include "llvm/Analysis/CallGraph.h" 25 #include "llvm/Analysis/CaptureTracking.h" 26 #include "llvm/Analysis/CtxProfAnalysis.h" 27 #include "llvm/Analysis/IndirectCallVisitor.h" 28 #include "llvm/Analysis/InstructionSimplify.h" 29 #include "llvm/Analysis/MemoryProfileInfo.h" 30 #include "llvm/Analysis/ObjCARCAnalysisUtils.h" 31 #include "llvm/Analysis/ObjCARCUtil.h" 32 #include "llvm/Analysis/ProfileSummaryInfo.h" 33 #include "llvm/Analysis/ValueTracking.h" 34 #include "llvm/Analysis/VectorUtils.h" 35 #include "llvm/IR/Argument.h" 36 #include "llvm/IR/AttributeMask.h" 37 #include "llvm/IR/Attributes.h" 38 #include "llvm/IR/BasicBlock.h" 39 #include "llvm/IR/CFG.h" 40 #include "llvm/IR/Constant.h" 41 #include "llvm/IR/ConstantRange.h" 42 #include "llvm/IR/Constants.h" 43 #include "llvm/IR/DataLayout.h" 44 #include "llvm/IR/DebugInfo.h" 45 #include "llvm/IR/DebugInfoMetadata.h" 46 #include "llvm/IR/DebugLoc.h" 47 #include "llvm/IR/DerivedTypes.h" 48 #include "llvm/IR/Dominators.h" 49 #include "llvm/IR/EHPersonalities.h" 50 #include "llvm/IR/Function.h" 51 #include "llvm/IR/GlobalVariable.h" 52 #include "llvm/IR/IRBuilder.h" 53 #include "llvm/IR/InlineAsm.h" 54 #include "llvm/IR/InstrTypes.h" 55 #include "llvm/IR/Instruction.h" 56 #include "llvm/IR/Instructions.h" 57 #include "llvm/IR/IntrinsicInst.h" 58 #include "llvm/IR/Intrinsics.h" 59 #include "llvm/IR/LLVMContext.h" 60 #include "llvm/IR/MDBuilder.h" 61 #include "llvm/IR/Metadata.h" 62 #include "llvm/IR/Module.h" 63 #include "llvm/IR/PatternMatch.h" 64 #include "llvm/IR/ProfDataUtils.h" 65 #include "llvm/IR/Type.h" 66 #include "llvm/IR/User.h" 67 #include "llvm/IR/Value.h" 68 #include "llvm/Support/Casting.h" 69 #include "llvm/Support/CommandLine.h" 70 #include "llvm/Support/ErrorHandling.h" 71 #include "llvm/Transforms/Utils/AssumeBundleBuilder.h" 72 #include "llvm/Transforms/Utils/Cloning.h" 73 #include "llvm/Transforms/Utils/Local.h" 74 #include "llvm/Transforms/Utils/ValueMapper.h" 75 #include <algorithm> 76 #include <cassert> 77 #include <cstdint> 78 #include <deque> 79 #include <iterator> 80 #include <limits> 81 #include <optional> 82 #include <string> 83 #include <utility> 84 #include <vector> 85 86 #define DEBUG_TYPE "inline-function" 87 88 using namespace llvm; 89 using namespace llvm::memprof; 90 using ProfileCount = Function::ProfileCount; 91 92 static cl::opt<bool> 93 EnableNoAliasConversion("enable-noalias-to-md-conversion", cl::init(true), 94 cl::Hidden, 95 cl::desc("Convert noalias attributes to metadata during inlining.")); 96 97 static cl::opt<bool> 98 UseNoAliasIntrinsic("use-noalias-intrinsic-during-inlining", cl::Hidden, 99 cl::init(true), 100 cl::desc("Use the llvm.experimental.noalias.scope.decl " 101 "intrinsic during inlining.")); 102 103 // Disabled by default, because the added alignment assumptions may increase 104 // compile-time and block optimizations. This option is not suitable for use 105 // with frontends that emit comprehensive parameter alignment annotations. 106 static cl::opt<bool> 107 PreserveAlignmentAssumptions("preserve-alignment-assumptions-during-inlining", 108 cl::init(false), cl::Hidden, 109 cl::desc("Convert align attributes to assumptions during inlining.")); 110 111 static cl::opt<unsigned> InlinerAttributeWindow( 112 "max-inst-checked-for-throw-during-inlining", cl::Hidden, 113 cl::desc("the maximum number of instructions analyzed for may throw during " 114 "attribute inference in inlined body"), 115 cl::init(4)); 116 117 namespace { 118 119 /// A class for recording information about inlining a landing pad. 120 class LandingPadInliningInfo { 121 /// Destination of the invoke's unwind. 122 BasicBlock *OuterResumeDest; 123 124 /// Destination for the callee's resume. 125 BasicBlock *InnerResumeDest = nullptr; 126 127 /// LandingPadInst associated with the invoke. 128 LandingPadInst *CallerLPad = nullptr; 129 130 /// PHI for EH values from landingpad insts. 131 PHINode *InnerEHValuesPHI = nullptr; 132 133 SmallVector<Value*, 8> UnwindDestPHIValues; 134 135 public: 136 LandingPadInliningInfo(InvokeInst *II) 137 : OuterResumeDest(II->getUnwindDest()) { 138 // If there are PHI nodes in the unwind destination block, we need to keep 139 // track of which values came into them from the invoke before removing 140 // the edge from this block. 141 BasicBlock *InvokeBB = II->getParent(); 142 BasicBlock::iterator I = OuterResumeDest->begin(); 143 for (; isa<PHINode>(I); ++I) { 144 // Save the value to use for this edge. 145 PHINode *PHI = cast<PHINode>(I); 146 UnwindDestPHIValues.push_back(PHI->getIncomingValueForBlock(InvokeBB)); 147 } 148 149 CallerLPad = cast<LandingPadInst>(I); 150 } 151 152 /// The outer unwind destination is the target of 153 /// unwind edges introduced for calls within the inlined function. 154 BasicBlock *getOuterResumeDest() const { 155 return OuterResumeDest; 156 } 157 158 BasicBlock *getInnerResumeDest(); 159 160 LandingPadInst *getLandingPadInst() const { return CallerLPad; } 161 162 /// Forward the 'resume' instruction to the caller's landing pad block. 163 /// When the landing pad block has only one predecessor, this is 164 /// a simple branch. When there is more than one predecessor, we need to 165 /// split the landing pad block after the landingpad instruction and jump 166 /// to there. 167 void forwardResume(ResumeInst *RI, 168 SmallPtrSetImpl<LandingPadInst*> &InlinedLPads); 169 170 /// Add incoming-PHI values to the unwind destination block for the given 171 /// basic block, using the values for the original invoke's source block. 172 void addIncomingPHIValuesFor(BasicBlock *BB) const { 173 addIncomingPHIValuesForInto(BB, OuterResumeDest); 174 } 175 176 void addIncomingPHIValuesForInto(BasicBlock *src, BasicBlock *dest) const { 177 BasicBlock::iterator I = dest->begin(); 178 for (unsigned i = 0, e = UnwindDestPHIValues.size(); i != e; ++i, ++I) { 179 PHINode *phi = cast<PHINode>(I); 180 phi->addIncoming(UnwindDestPHIValues[i], src); 181 } 182 } 183 }; 184 } // end anonymous namespace 185 186 static IntrinsicInst *getConvergenceEntry(BasicBlock &BB) { 187 BasicBlock::iterator It = BB.getFirstNonPHIIt(); 188 while (It != BB.end()) { 189 if (auto *IntrinsicCall = dyn_cast<ConvergenceControlInst>(It)) { 190 if (IntrinsicCall->isEntry()) { 191 return IntrinsicCall; 192 } 193 } 194 It = std::next(It); 195 } 196 return nullptr; 197 } 198 199 /// Get or create a target for the branch from ResumeInsts. 200 BasicBlock *LandingPadInliningInfo::getInnerResumeDest() { 201 if (InnerResumeDest) return InnerResumeDest; 202 203 // Split the landing pad. 204 BasicBlock::iterator SplitPoint = ++CallerLPad->getIterator(); 205 InnerResumeDest = 206 OuterResumeDest->splitBasicBlock(SplitPoint, 207 OuterResumeDest->getName() + ".body"); 208 209 // The number of incoming edges we expect to the inner landing pad. 210 const unsigned PHICapacity = 2; 211 212 // Create corresponding new PHIs for all the PHIs in the outer landing pad. 213 BasicBlock::iterator InsertPoint = InnerResumeDest->begin(); 214 BasicBlock::iterator I = OuterResumeDest->begin(); 215 for (unsigned i = 0, e = UnwindDestPHIValues.size(); i != e; ++i, ++I) { 216 PHINode *OuterPHI = cast<PHINode>(I); 217 PHINode *InnerPHI = PHINode::Create(OuterPHI->getType(), PHICapacity, 218 OuterPHI->getName() + ".lpad-body"); 219 InnerPHI->insertBefore(InsertPoint); 220 OuterPHI->replaceAllUsesWith(InnerPHI); 221 InnerPHI->addIncoming(OuterPHI, OuterResumeDest); 222 } 223 224 // Create a PHI for the exception values. 225 InnerEHValuesPHI = 226 PHINode::Create(CallerLPad->getType(), PHICapacity, "eh.lpad-body"); 227 InnerEHValuesPHI->insertBefore(InsertPoint); 228 CallerLPad->replaceAllUsesWith(InnerEHValuesPHI); 229 InnerEHValuesPHI->addIncoming(CallerLPad, OuterResumeDest); 230 231 // All done. 232 return InnerResumeDest; 233 } 234 235 /// Forward the 'resume' instruction to the caller's landing pad block. 236 /// When the landing pad block has only one predecessor, this is a simple 237 /// branch. When there is more than one predecessor, we need to split the 238 /// landing pad block after the landingpad instruction and jump to there. 239 void LandingPadInliningInfo::forwardResume( 240 ResumeInst *RI, SmallPtrSetImpl<LandingPadInst *> &InlinedLPads) { 241 BasicBlock *Dest = getInnerResumeDest(); 242 BasicBlock *Src = RI->getParent(); 243 244 BranchInst::Create(Dest, Src); 245 246 // Update the PHIs in the destination. They were inserted in an order which 247 // makes this work. 248 addIncomingPHIValuesForInto(Src, Dest); 249 250 InnerEHValuesPHI->addIncoming(RI->getOperand(0), Src); 251 RI->eraseFromParent(); 252 } 253 254 /// Helper for getUnwindDestToken/getUnwindDestTokenHelper. 255 static Value *getParentPad(Value *EHPad) { 256 if (auto *FPI = dyn_cast<FuncletPadInst>(EHPad)) 257 return FPI->getParentPad(); 258 return cast<CatchSwitchInst>(EHPad)->getParentPad(); 259 } 260 261 using UnwindDestMemoTy = DenseMap<Instruction *, Value *>; 262 263 /// Helper for getUnwindDestToken that does the descendant-ward part of 264 /// the search. 265 static Value *getUnwindDestTokenHelper(Instruction *EHPad, 266 UnwindDestMemoTy &MemoMap) { 267 SmallVector<Instruction *, 8> Worklist(1, EHPad); 268 269 while (!Worklist.empty()) { 270 Instruction *CurrentPad = Worklist.pop_back_val(); 271 // We only put pads on the worklist that aren't in the MemoMap. When 272 // we find an unwind dest for a pad we may update its ancestors, but 273 // the queue only ever contains uncles/great-uncles/etc. of CurrentPad, 274 // so they should never get updated while queued on the worklist. 275 assert(!MemoMap.count(CurrentPad)); 276 Value *UnwindDestToken = nullptr; 277 if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(CurrentPad)) { 278 if (CatchSwitch->hasUnwindDest()) { 279 UnwindDestToken = &*CatchSwitch->getUnwindDest()->getFirstNonPHIIt(); 280 } else { 281 // Catchswitch doesn't have a 'nounwind' variant, and one might be 282 // annotated as "unwinds to caller" when really it's nounwind (see 283 // e.g. SimplifyCFGOpt::SimplifyUnreachable), so we can't infer the 284 // parent's unwind dest from this. We can check its catchpads' 285 // descendants, since they might include a cleanuppad with an 286 // "unwinds to caller" cleanupret, which can be trusted. 287 for (auto HI = CatchSwitch->handler_begin(), 288 HE = CatchSwitch->handler_end(); 289 HI != HE && !UnwindDestToken; ++HI) { 290 BasicBlock *HandlerBlock = *HI; 291 auto *CatchPad = 292 cast<CatchPadInst>(&*HandlerBlock->getFirstNonPHIIt()); 293 for (User *Child : CatchPad->users()) { 294 // Intentionally ignore invokes here -- since the catchswitch is 295 // marked "unwind to caller", it would be a verifier error if it 296 // contained an invoke which unwinds out of it, so any invoke we'd 297 // encounter must unwind to some child of the catch. 298 if (!isa<CleanupPadInst>(Child) && !isa<CatchSwitchInst>(Child)) 299 continue; 300 301 Instruction *ChildPad = cast<Instruction>(Child); 302 auto Memo = MemoMap.find(ChildPad); 303 if (Memo == MemoMap.end()) { 304 // Haven't figured out this child pad yet; queue it. 305 Worklist.push_back(ChildPad); 306 continue; 307 } 308 // We've already checked this child, but might have found that 309 // it offers no proof either way. 310 Value *ChildUnwindDestToken = Memo->second; 311 if (!ChildUnwindDestToken) 312 continue; 313 // We already know the child's unwind dest, which can either 314 // be ConstantTokenNone to indicate unwind to caller, or can 315 // be another child of the catchpad. Only the former indicates 316 // the unwind dest of the catchswitch. 317 if (isa<ConstantTokenNone>(ChildUnwindDestToken)) { 318 UnwindDestToken = ChildUnwindDestToken; 319 break; 320 } 321 assert(getParentPad(ChildUnwindDestToken) == CatchPad); 322 } 323 } 324 } 325 } else { 326 auto *CleanupPad = cast<CleanupPadInst>(CurrentPad); 327 for (User *U : CleanupPad->users()) { 328 if (auto *CleanupRet = dyn_cast<CleanupReturnInst>(U)) { 329 if (BasicBlock *RetUnwindDest = CleanupRet->getUnwindDest()) 330 UnwindDestToken = &*RetUnwindDest->getFirstNonPHIIt(); 331 else 332 UnwindDestToken = ConstantTokenNone::get(CleanupPad->getContext()); 333 break; 334 } 335 Value *ChildUnwindDestToken; 336 if (auto *Invoke = dyn_cast<InvokeInst>(U)) { 337 ChildUnwindDestToken = &*Invoke->getUnwindDest()->getFirstNonPHIIt(); 338 } else if (isa<CleanupPadInst>(U) || isa<CatchSwitchInst>(U)) { 339 Instruction *ChildPad = cast<Instruction>(U); 340 auto Memo = MemoMap.find(ChildPad); 341 if (Memo == MemoMap.end()) { 342 // Haven't resolved this child yet; queue it and keep searching. 343 Worklist.push_back(ChildPad); 344 continue; 345 } 346 // We've checked this child, but still need to ignore it if it 347 // had no proof either way. 348 ChildUnwindDestToken = Memo->second; 349 if (!ChildUnwindDestToken) 350 continue; 351 } else { 352 // Not a relevant user of the cleanuppad 353 continue; 354 } 355 // In a well-formed program, the child/invoke must either unwind to 356 // an(other) child of the cleanup, or exit the cleanup. In the 357 // first case, continue searching. 358 if (isa<Instruction>(ChildUnwindDestToken) && 359 getParentPad(ChildUnwindDestToken) == CleanupPad) 360 continue; 361 UnwindDestToken = ChildUnwindDestToken; 362 break; 363 } 364 } 365 // If we haven't found an unwind dest for CurrentPad, we may have queued its 366 // children, so move on to the next in the worklist. 367 if (!UnwindDestToken) 368 continue; 369 370 // Now we know that CurrentPad unwinds to UnwindDestToken. It also exits 371 // any ancestors of CurrentPad up to but not including UnwindDestToken's 372 // parent pad. Record this in the memo map, and check to see if the 373 // original EHPad being queried is one of the ones exited. 374 Value *UnwindParent; 375 if (auto *UnwindPad = dyn_cast<Instruction>(UnwindDestToken)) 376 UnwindParent = getParentPad(UnwindPad); 377 else 378 UnwindParent = nullptr; 379 bool ExitedOriginalPad = false; 380 for (Instruction *ExitedPad = CurrentPad; 381 ExitedPad && ExitedPad != UnwindParent; 382 ExitedPad = dyn_cast<Instruction>(getParentPad(ExitedPad))) { 383 // Skip over catchpads since they just follow their catchswitches. 384 if (isa<CatchPadInst>(ExitedPad)) 385 continue; 386 MemoMap[ExitedPad] = UnwindDestToken; 387 ExitedOriginalPad |= (ExitedPad == EHPad); 388 } 389 390 if (ExitedOriginalPad) 391 return UnwindDestToken; 392 393 // Continue the search. 394 } 395 396 // No definitive information is contained within this funclet. 397 return nullptr; 398 } 399 400 /// Given an EH pad, find where it unwinds. If it unwinds to an EH pad, 401 /// return that pad instruction. If it unwinds to caller, return 402 /// ConstantTokenNone. If it does not have a definitive unwind destination, 403 /// return nullptr. 404 /// 405 /// This routine gets invoked for calls in funclets in inlinees when inlining 406 /// an invoke. Since many funclets don't have calls inside them, it's queried 407 /// on-demand rather than building a map of pads to unwind dests up front. 408 /// Determining a funclet's unwind dest may require recursively searching its 409 /// descendants, and also ancestors and cousins if the descendants don't provide 410 /// an answer. Since most funclets will have their unwind dest immediately 411 /// available as the unwind dest of a catchswitch or cleanupret, this routine 412 /// searches top-down from the given pad and then up. To avoid worst-case 413 /// quadratic run-time given that approach, it uses a memo map to avoid 414 /// re-processing funclet trees. The callers that rewrite the IR as they go 415 /// take advantage of this, for correctness, by checking/forcing rewritten 416 /// pads' entries to match the original callee view. 417 static Value *getUnwindDestToken(Instruction *EHPad, 418 UnwindDestMemoTy &MemoMap) { 419 // Catchpads unwind to the same place as their catchswitch; 420 // redirct any queries on catchpads so the code below can 421 // deal with just catchswitches and cleanuppads. 422 if (auto *CPI = dyn_cast<CatchPadInst>(EHPad)) 423 EHPad = CPI->getCatchSwitch(); 424 425 // Check if we've already determined the unwind dest for this pad. 426 auto Memo = MemoMap.find(EHPad); 427 if (Memo != MemoMap.end()) 428 return Memo->second; 429 430 // Search EHPad and, if necessary, its descendants. 431 Value *UnwindDestToken = getUnwindDestTokenHelper(EHPad, MemoMap); 432 assert((UnwindDestToken == nullptr) != (MemoMap.count(EHPad) != 0)); 433 if (UnwindDestToken) 434 return UnwindDestToken; 435 436 // No information is available for this EHPad from itself or any of its 437 // descendants. An unwind all the way out to a pad in the caller would 438 // need also to agree with the unwind dest of the parent funclet, so 439 // search up the chain to try to find a funclet with information. Put 440 // null entries in the memo map to avoid re-processing as we go up. 441 MemoMap[EHPad] = nullptr; 442 #ifndef NDEBUG 443 SmallPtrSet<Instruction *, 4> TempMemos; 444 TempMemos.insert(EHPad); 445 #endif 446 Instruction *LastUselessPad = EHPad; 447 Value *AncestorToken; 448 for (AncestorToken = getParentPad(EHPad); 449 auto *AncestorPad = dyn_cast<Instruction>(AncestorToken); 450 AncestorToken = getParentPad(AncestorToken)) { 451 // Skip over catchpads since they just follow their catchswitches. 452 if (isa<CatchPadInst>(AncestorPad)) 453 continue; 454 // If the MemoMap had an entry mapping AncestorPad to nullptr, since we 455 // haven't yet called getUnwindDestTokenHelper for AncestorPad in this 456 // call to getUnwindDestToken, that would mean that AncestorPad had no 457 // information in itself, its descendants, or its ancestors. If that 458 // were the case, then we should also have recorded the lack of information 459 // for the descendant that we're coming from. So assert that we don't 460 // find a null entry in the MemoMap for AncestorPad. 461 assert(!MemoMap.count(AncestorPad) || MemoMap[AncestorPad]); 462 auto AncestorMemo = MemoMap.find(AncestorPad); 463 if (AncestorMemo == MemoMap.end()) { 464 UnwindDestToken = getUnwindDestTokenHelper(AncestorPad, MemoMap); 465 } else { 466 UnwindDestToken = AncestorMemo->second; 467 } 468 if (UnwindDestToken) 469 break; 470 LastUselessPad = AncestorPad; 471 MemoMap[LastUselessPad] = nullptr; 472 #ifndef NDEBUG 473 TempMemos.insert(LastUselessPad); 474 #endif 475 } 476 477 // We know that getUnwindDestTokenHelper was called on LastUselessPad and 478 // returned nullptr (and likewise for EHPad and any of its ancestors up to 479 // LastUselessPad), so LastUselessPad has no information from below. Since 480 // getUnwindDestTokenHelper must investigate all downward paths through 481 // no-information nodes to prove that a node has no information like this, 482 // and since any time it finds information it records it in the MemoMap for 483 // not just the immediately-containing funclet but also any ancestors also 484 // exited, it must be the case that, walking downward from LastUselessPad, 485 // visiting just those nodes which have not been mapped to an unwind dest 486 // by getUnwindDestTokenHelper (the nullptr TempMemos notwithstanding, since 487 // they are just used to keep getUnwindDestTokenHelper from repeating work), 488 // any node visited must have been exhaustively searched with no information 489 // for it found. 490 SmallVector<Instruction *, 8> Worklist(1, LastUselessPad); 491 while (!Worklist.empty()) { 492 Instruction *UselessPad = Worklist.pop_back_val(); 493 auto Memo = MemoMap.find(UselessPad); 494 if (Memo != MemoMap.end() && Memo->second) { 495 // Here the name 'UselessPad' is a bit of a misnomer, because we've found 496 // that it is a funclet that does have information about unwinding to 497 // a particular destination; its parent was a useless pad. 498 // Since its parent has no information, the unwind edge must not escape 499 // the parent, and must target a sibling of this pad. This local unwind 500 // gives us no information about EHPad. Leave it and the subtree rooted 501 // at it alone. 502 assert(getParentPad(Memo->second) == getParentPad(UselessPad)); 503 continue; 504 } 505 // We know we don't have information for UselesPad. If it has an entry in 506 // the MemoMap (mapping it to nullptr), it must be one of the TempMemos 507 // added on this invocation of getUnwindDestToken; if a previous invocation 508 // recorded nullptr, it would have had to prove that the ancestors of 509 // UselessPad, which include LastUselessPad, had no information, and that 510 // in turn would have required proving that the descendants of 511 // LastUselesPad, which include EHPad, have no information about 512 // LastUselessPad, which would imply that EHPad was mapped to nullptr in 513 // the MemoMap on that invocation, which isn't the case if we got here. 514 assert(!MemoMap.count(UselessPad) || TempMemos.count(UselessPad)); 515 // Assert as we enumerate users that 'UselessPad' doesn't have any unwind 516 // information that we'd be contradicting by making a map entry for it 517 // (which is something that getUnwindDestTokenHelper must have proved for 518 // us to get here). Just assert on is direct users here; the checks in 519 // this downward walk at its descendants will verify that they don't have 520 // any unwind edges that exit 'UselessPad' either (i.e. they either have no 521 // unwind edges or unwind to a sibling). 522 MemoMap[UselessPad] = UnwindDestToken; 523 if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(UselessPad)) { 524 assert(CatchSwitch->getUnwindDest() == nullptr && "Expected useless pad"); 525 for (BasicBlock *HandlerBlock : CatchSwitch->handlers()) { 526 auto *CatchPad = &*HandlerBlock->getFirstNonPHIIt(); 527 for (User *U : CatchPad->users()) { 528 assert((!isa<InvokeInst>(U) || 529 (getParentPad(&*cast<InvokeInst>(U) 530 ->getUnwindDest() 531 ->getFirstNonPHIIt()) == CatchPad)) && 532 "Expected useless pad"); 533 if (isa<CatchSwitchInst>(U) || isa<CleanupPadInst>(U)) 534 Worklist.push_back(cast<Instruction>(U)); 535 } 536 } 537 } else { 538 assert(isa<CleanupPadInst>(UselessPad)); 539 for (User *U : UselessPad->users()) { 540 assert(!isa<CleanupReturnInst>(U) && "Expected useless pad"); 541 assert( 542 (!isa<InvokeInst>(U) || 543 (getParentPad( 544 &*cast<InvokeInst>(U)->getUnwindDest()->getFirstNonPHIIt()) == 545 UselessPad)) && 546 "Expected useless pad"); 547 if (isa<CatchSwitchInst>(U) || isa<CleanupPadInst>(U)) 548 Worklist.push_back(cast<Instruction>(U)); 549 } 550 } 551 } 552 553 return UnwindDestToken; 554 } 555 556 /// When we inline a basic block into an invoke, 557 /// we have to turn all of the calls that can throw into invokes. 558 /// This function analyze BB to see if there are any calls, and if so, 559 /// it rewrites them to be invokes that jump to InvokeDest and fills in the PHI 560 /// nodes in that block with the values specified in InvokeDestPHIValues. 561 static BasicBlock *HandleCallsInBlockInlinedThroughInvoke( 562 BasicBlock *BB, BasicBlock *UnwindEdge, 563 UnwindDestMemoTy *FuncletUnwindMap = nullptr) { 564 for (Instruction &I : llvm::make_early_inc_range(*BB)) { 565 // We only need to check for function calls: inlined invoke 566 // instructions require no special handling. 567 CallInst *CI = dyn_cast<CallInst>(&I); 568 569 if (!CI || CI->doesNotThrow()) 570 continue; 571 572 // We do not need to (and in fact, cannot) convert possibly throwing calls 573 // to @llvm.experimental_deoptimize (resp. @llvm.experimental.guard) into 574 // invokes. The caller's "segment" of the deoptimization continuation 575 // attached to the newly inlined @llvm.experimental_deoptimize 576 // (resp. @llvm.experimental.guard) call should contain the exception 577 // handling logic, if any. 578 if (auto *F = CI->getCalledFunction()) 579 if (F->getIntrinsicID() == Intrinsic::experimental_deoptimize || 580 F->getIntrinsicID() == Intrinsic::experimental_guard) 581 continue; 582 583 if (auto FuncletBundle = CI->getOperandBundle(LLVMContext::OB_funclet)) { 584 // This call is nested inside a funclet. If that funclet has an unwind 585 // destination within the inlinee, then unwinding out of this call would 586 // be UB. Rewriting this call to an invoke which targets the inlined 587 // invoke's unwind dest would give the call's parent funclet multiple 588 // unwind destinations, which is something that subsequent EH table 589 // generation can't handle and that the veirifer rejects. So when we 590 // see such a call, leave it as a call. 591 auto *FuncletPad = cast<Instruction>(FuncletBundle->Inputs[0]); 592 Value *UnwindDestToken = 593 getUnwindDestToken(FuncletPad, *FuncletUnwindMap); 594 if (UnwindDestToken && !isa<ConstantTokenNone>(UnwindDestToken)) 595 continue; 596 #ifndef NDEBUG 597 Instruction *MemoKey; 598 if (auto *CatchPad = dyn_cast<CatchPadInst>(FuncletPad)) 599 MemoKey = CatchPad->getCatchSwitch(); 600 else 601 MemoKey = FuncletPad; 602 assert(FuncletUnwindMap->count(MemoKey) && 603 (*FuncletUnwindMap)[MemoKey] == UnwindDestToken && 604 "must get memoized to avoid confusing later searches"); 605 #endif // NDEBUG 606 } 607 608 changeToInvokeAndSplitBasicBlock(CI, UnwindEdge); 609 return BB; 610 } 611 return nullptr; 612 } 613 614 /// If we inlined an invoke site, we need to convert calls 615 /// in the body of the inlined function into invokes. 616 /// 617 /// II is the invoke instruction being inlined. FirstNewBlock is the first 618 /// block of the inlined code (the last block is the end of the function), 619 /// and InlineCodeInfo is information about the code that got inlined. 620 static void HandleInlinedLandingPad(InvokeInst *II, BasicBlock *FirstNewBlock, 621 ClonedCodeInfo &InlinedCodeInfo) { 622 BasicBlock *InvokeDest = II->getUnwindDest(); 623 624 Function *Caller = FirstNewBlock->getParent(); 625 626 // The inlined code is currently at the end of the function, scan from the 627 // start of the inlined code to its end, checking for stuff we need to 628 // rewrite. 629 LandingPadInliningInfo Invoke(II); 630 631 // Get all of the inlined landing pad instructions. 632 SmallPtrSet<LandingPadInst*, 16> InlinedLPads; 633 for (Function::iterator I = FirstNewBlock->getIterator(), E = Caller->end(); 634 I != E; ++I) 635 if (InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator())) 636 InlinedLPads.insert(II->getLandingPadInst()); 637 638 // Append the clauses from the outer landing pad instruction into the inlined 639 // landing pad instructions. 640 LandingPadInst *OuterLPad = Invoke.getLandingPadInst(); 641 for (LandingPadInst *InlinedLPad : InlinedLPads) { 642 unsigned OuterNum = OuterLPad->getNumClauses(); 643 InlinedLPad->reserveClauses(OuterNum); 644 for (unsigned OuterIdx = 0; OuterIdx != OuterNum; ++OuterIdx) 645 InlinedLPad->addClause(OuterLPad->getClause(OuterIdx)); 646 if (OuterLPad->isCleanup()) 647 InlinedLPad->setCleanup(true); 648 } 649 650 for (Function::iterator BB = FirstNewBlock->getIterator(), E = Caller->end(); 651 BB != E; ++BB) { 652 if (InlinedCodeInfo.ContainsCalls) 653 if (BasicBlock *NewBB = HandleCallsInBlockInlinedThroughInvoke( 654 &*BB, Invoke.getOuterResumeDest())) 655 // Update any PHI nodes in the exceptional block to indicate that there 656 // is now a new entry in them. 657 Invoke.addIncomingPHIValuesFor(NewBB); 658 659 // Forward any resumes that are remaining here. 660 if (ResumeInst *RI = dyn_cast<ResumeInst>(BB->getTerminator())) 661 Invoke.forwardResume(RI, InlinedLPads); 662 } 663 664 // Now that everything is happy, we have one final detail. The PHI nodes in 665 // the exception destination block still have entries due to the original 666 // invoke instruction. Eliminate these entries (which might even delete the 667 // PHI node) now. 668 InvokeDest->removePredecessor(II->getParent()); 669 } 670 671 /// If we inlined an invoke site, we need to convert calls 672 /// in the body of the inlined function into invokes. 673 /// 674 /// II is the invoke instruction being inlined. FirstNewBlock is the first 675 /// block of the inlined code (the last block is the end of the function), 676 /// and InlineCodeInfo is information about the code that got inlined. 677 static void HandleInlinedEHPad(InvokeInst *II, BasicBlock *FirstNewBlock, 678 ClonedCodeInfo &InlinedCodeInfo) { 679 BasicBlock *UnwindDest = II->getUnwindDest(); 680 Function *Caller = FirstNewBlock->getParent(); 681 682 assert(UnwindDest->getFirstNonPHIIt()->isEHPad() && "unexpected BasicBlock!"); 683 684 // If there are PHI nodes in the unwind destination block, we need to keep 685 // track of which values came into them from the invoke before removing the 686 // edge from this block. 687 SmallVector<Value *, 8> UnwindDestPHIValues; 688 BasicBlock *InvokeBB = II->getParent(); 689 for (PHINode &PHI : UnwindDest->phis()) { 690 // Save the value to use for this edge. 691 UnwindDestPHIValues.push_back(PHI.getIncomingValueForBlock(InvokeBB)); 692 } 693 694 // Add incoming-PHI values to the unwind destination block for the given basic 695 // block, using the values for the original invoke's source block. 696 auto UpdatePHINodes = [&](BasicBlock *Src) { 697 BasicBlock::iterator I = UnwindDest->begin(); 698 for (Value *V : UnwindDestPHIValues) { 699 PHINode *PHI = cast<PHINode>(I); 700 PHI->addIncoming(V, Src); 701 ++I; 702 } 703 }; 704 705 // This connects all the instructions which 'unwind to caller' to the invoke 706 // destination. 707 UnwindDestMemoTy FuncletUnwindMap; 708 for (Function::iterator BB = FirstNewBlock->getIterator(), E = Caller->end(); 709 BB != E; ++BB) { 710 if (auto *CRI = dyn_cast<CleanupReturnInst>(BB->getTerminator())) { 711 if (CRI->unwindsToCaller()) { 712 auto *CleanupPad = CRI->getCleanupPad(); 713 CleanupReturnInst::Create(CleanupPad, UnwindDest, CRI->getIterator()); 714 CRI->eraseFromParent(); 715 UpdatePHINodes(&*BB); 716 // Finding a cleanupret with an unwind destination would confuse 717 // subsequent calls to getUnwindDestToken, so map the cleanuppad 718 // to short-circuit any such calls and recognize this as an "unwind 719 // to caller" cleanup. 720 assert(!FuncletUnwindMap.count(CleanupPad) || 721 isa<ConstantTokenNone>(FuncletUnwindMap[CleanupPad])); 722 FuncletUnwindMap[CleanupPad] = 723 ConstantTokenNone::get(Caller->getContext()); 724 } 725 } 726 727 BasicBlock::iterator I = BB->getFirstNonPHIIt(); 728 if (!I->isEHPad()) 729 continue; 730 731 Instruction *Replacement = nullptr; 732 if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(I)) { 733 if (CatchSwitch->unwindsToCaller()) { 734 Value *UnwindDestToken; 735 if (auto *ParentPad = 736 dyn_cast<Instruction>(CatchSwitch->getParentPad())) { 737 // This catchswitch is nested inside another funclet. If that 738 // funclet has an unwind destination within the inlinee, then 739 // unwinding out of this catchswitch would be UB. Rewriting this 740 // catchswitch to unwind to the inlined invoke's unwind dest would 741 // give the parent funclet multiple unwind destinations, which is 742 // something that subsequent EH table generation can't handle and 743 // that the veirifer rejects. So when we see such a call, leave it 744 // as "unwind to caller". 745 UnwindDestToken = getUnwindDestToken(ParentPad, FuncletUnwindMap); 746 if (UnwindDestToken && !isa<ConstantTokenNone>(UnwindDestToken)) 747 continue; 748 } else { 749 // This catchswitch has no parent to inherit constraints from, and 750 // none of its descendants can have an unwind edge that exits it and 751 // targets another funclet in the inlinee. It may or may not have a 752 // descendant that definitively has an unwind to caller. In either 753 // case, we'll have to assume that any unwinds out of it may need to 754 // be routed to the caller, so treat it as though it has a definitive 755 // unwind to caller. 756 UnwindDestToken = ConstantTokenNone::get(Caller->getContext()); 757 } 758 auto *NewCatchSwitch = CatchSwitchInst::Create( 759 CatchSwitch->getParentPad(), UnwindDest, 760 CatchSwitch->getNumHandlers(), CatchSwitch->getName(), 761 CatchSwitch->getIterator()); 762 for (BasicBlock *PadBB : CatchSwitch->handlers()) 763 NewCatchSwitch->addHandler(PadBB); 764 // Propagate info for the old catchswitch over to the new one in 765 // the unwind map. This also serves to short-circuit any subsequent 766 // checks for the unwind dest of this catchswitch, which would get 767 // confused if they found the outer handler in the callee. 768 FuncletUnwindMap[NewCatchSwitch] = UnwindDestToken; 769 Replacement = NewCatchSwitch; 770 } 771 } else if (!isa<FuncletPadInst>(I)) { 772 llvm_unreachable("unexpected EHPad!"); 773 } 774 775 if (Replacement) { 776 Replacement->takeName(&*I); 777 I->replaceAllUsesWith(Replacement); 778 I->eraseFromParent(); 779 UpdatePHINodes(&*BB); 780 } 781 } 782 783 if (InlinedCodeInfo.ContainsCalls) 784 for (Function::iterator BB = FirstNewBlock->getIterator(), 785 E = Caller->end(); 786 BB != E; ++BB) 787 if (BasicBlock *NewBB = HandleCallsInBlockInlinedThroughInvoke( 788 &*BB, UnwindDest, &FuncletUnwindMap)) 789 // Update any PHI nodes in the exceptional block to indicate that there 790 // is now a new entry in them. 791 UpdatePHINodes(NewBB); 792 793 // Now that everything is happy, we have one final detail. The PHI nodes in 794 // the exception destination block still have entries due to the original 795 // invoke instruction. Eliminate these entries (which might even delete the 796 // PHI node) now. 797 UnwindDest->removePredecessor(InvokeBB); 798 } 799 800 static bool haveCommonPrefix(MDNode *MIBStackContext, 801 MDNode *CallsiteStackContext) { 802 assert(MIBStackContext->getNumOperands() > 0 && 803 CallsiteStackContext->getNumOperands() > 0); 804 // Because of the context trimming performed during matching, the callsite 805 // context could have more stack ids than the MIB. We match up to the end of 806 // the shortest stack context. 807 for (auto MIBStackIter = MIBStackContext->op_begin(), 808 CallsiteStackIter = CallsiteStackContext->op_begin(); 809 MIBStackIter != MIBStackContext->op_end() && 810 CallsiteStackIter != CallsiteStackContext->op_end(); 811 MIBStackIter++, CallsiteStackIter++) { 812 auto *Val1 = mdconst::dyn_extract<ConstantInt>(*MIBStackIter); 813 auto *Val2 = mdconst::dyn_extract<ConstantInt>(*CallsiteStackIter); 814 assert(Val1 && Val2); 815 if (Val1->getZExtValue() != Val2->getZExtValue()) 816 return false; 817 } 818 return true; 819 } 820 821 static void removeMemProfMetadata(CallBase *Call) { 822 Call->setMetadata(LLVMContext::MD_memprof, nullptr); 823 } 824 825 static void removeCallsiteMetadata(CallBase *Call) { 826 Call->setMetadata(LLVMContext::MD_callsite, nullptr); 827 } 828 829 static void updateMemprofMetadata(CallBase *CI, 830 const std::vector<Metadata *> &MIBList) { 831 assert(!MIBList.empty()); 832 // Remove existing memprof, which will either be replaced or may not be needed 833 // if we are able to use a single allocation type function attribute. 834 removeMemProfMetadata(CI); 835 CallStackTrie CallStack; 836 for (Metadata *MIB : MIBList) 837 CallStack.addCallStack(cast<MDNode>(MIB)); 838 bool MemprofMDAttached = CallStack.buildAndAttachMIBMetadata(CI); 839 assert(MemprofMDAttached == CI->hasMetadata(LLVMContext::MD_memprof)); 840 if (!MemprofMDAttached) 841 // If we used a function attribute remove the callsite metadata as well. 842 removeCallsiteMetadata(CI); 843 } 844 845 // Update the metadata on the inlined copy ClonedCall of a call OrigCall in the 846 // inlined callee body, based on the callsite metadata InlinedCallsiteMD from 847 // the call that was inlined. 848 static void propagateMemProfHelper(const CallBase *OrigCall, 849 CallBase *ClonedCall, 850 MDNode *InlinedCallsiteMD) { 851 MDNode *OrigCallsiteMD = ClonedCall->getMetadata(LLVMContext::MD_callsite); 852 MDNode *ClonedCallsiteMD = nullptr; 853 // Check if the call originally had callsite metadata, and update it for the 854 // new call in the inlined body. 855 if (OrigCallsiteMD) { 856 // The cloned call's context is now the concatenation of the original call's 857 // callsite metadata and the callsite metadata on the call where it was 858 // inlined. 859 ClonedCallsiteMD = MDNode::concatenate(OrigCallsiteMD, InlinedCallsiteMD); 860 ClonedCall->setMetadata(LLVMContext::MD_callsite, ClonedCallsiteMD); 861 } 862 863 // Update any memprof metadata on the cloned call. 864 MDNode *OrigMemProfMD = ClonedCall->getMetadata(LLVMContext::MD_memprof); 865 if (!OrigMemProfMD) 866 return; 867 // We currently expect that allocations with memprof metadata also have 868 // callsite metadata for the allocation's part of the context. 869 assert(OrigCallsiteMD); 870 871 // New call's MIB list. 872 std::vector<Metadata *> NewMIBList; 873 874 // For each MIB metadata, check if its call stack context starts with the 875 // new clone's callsite metadata. If so, that MIB goes onto the cloned call in 876 // the inlined body. If not, it stays on the out-of-line original call. 877 for (auto &MIBOp : OrigMemProfMD->operands()) { 878 MDNode *MIB = dyn_cast<MDNode>(MIBOp); 879 // Stack is first operand of MIB. 880 MDNode *StackMD = getMIBStackNode(MIB); 881 assert(StackMD); 882 // See if the new cloned callsite context matches this profiled context. 883 if (haveCommonPrefix(StackMD, ClonedCallsiteMD)) 884 // Add it to the cloned call's MIB list. 885 NewMIBList.push_back(MIB); 886 } 887 if (NewMIBList.empty()) { 888 removeMemProfMetadata(ClonedCall); 889 removeCallsiteMetadata(ClonedCall); 890 return; 891 } 892 if (NewMIBList.size() < OrigMemProfMD->getNumOperands()) 893 updateMemprofMetadata(ClonedCall, NewMIBList); 894 } 895 896 // Update memprof related metadata (!memprof and !callsite) based on the 897 // inlining of Callee into the callsite at CB. The updates include merging the 898 // inlined callee's callsite metadata with that of the inlined call, 899 // and moving the subset of any memprof contexts to the inlined callee 900 // allocations if they match the new inlined call stack. 901 static void 902 propagateMemProfMetadata(Function *Callee, CallBase &CB, 903 bool ContainsMemProfMetadata, 904 const ValueMap<const Value *, WeakTrackingVH> &VMap) { 905 MDNode *CallsiteMD = CB.getMetadata(LLVMContext::MD_callsite); 906 // Only need to update if the inlined callsite had callsite metadata, or if 907 // there was any memprof metadata inlined. 908 if (!CallsiteMD && !ContainsMemProfMetadata) 909 return; 910 911 // Propagate metadata onto the cloned calls in the inlined callee. 912 for (const auto &Entry : VMap) { 913 // See if this is a call that has been inlined and remapped, and not 914 // simplified away in the process. 915 auto *OrigCall = dyn_cast_or_null<CallBase>(Entry.first); 916 auto *ClonedCall = dyn_cast_or_null<CallBase>(Entry.second); 917 if (!OrigCall || !ClonedCall) 918 continue; 919 // If the inlined callsite did not have any callsite metadata, then it isn't 920 // involved in any profiled call contexts, and we can remove any memprof 921 // metadata on the cloned call. 922 if (!CallsiteMD) { 923 removeMemProfMetadata(ClonedCall); 924 removeCallsiteMetadata(ClonedCall); 925 continue; 926 } 927 propagateMemProfHelper(OrigCall, ClonedCall, CallsiteMD); 928 } 929 } 930 931 /// When inlining a call site that has !llvm.mem.parallel_loop_access, 932 /// !llvm.access.group, !alias.scope or !noalias metadata, that metadata should 933 /// be propagated to all memory-accessing cloned instructions. 934 static void PropagateCallSiteMetadata(CallBase &CB, Function::iterator FStart, 935 Function::iterator FEnd) { 936 MDNode *MemParallelLoopAccess = 937 CB.getMetadata(LLVMContext::MD_mem_parallel_loop_access); 938 MDNode *AccessGroup = CB.getMetadata(LLVMContext::MD_access_group); 939 MDNode *AliasScope = CB.getMetadata(LLVMContext::MD_alias_scope); 940 MDNode *NoAlias = CB.getMetadata(LLVMContext::MD_noalias); 941 if (!MemParallelLoopAccess && !AccessGroup && !AliasScope && !NoAlias) 942 return; 943 944 for (BasicBlock &BB : make_range(FStart, FEnd)) { 945 for (Instruction &I : BB) { 946 // This metadata is only relevant for instructions that access memory. 947 if (!I.mayReadOrWriteMemory()) 948 continue; 949 950 if (MemParallelLoopAccess) { 951 // TODO: This probably should not overwrite MemParalleLoopAccess. 952 MemParallelLoopAccess = MDNode::concatenate( 953 I.getMetadata(LLVMContext::MD_mem_parallel_loop_access), 954 MemParallelLoopAccess); 955 I.setMetadata(LLVMContext::MD_mem_parallel_loop_access, 956 MemParallelLoopAccess); 957 } 958 959 if (AccessGroup) 960 I.setMetadata(LLVMContext::MD_access_group, uniteAccessGroups( 961 I.getMetadata(LLVMContext::MD_access_group), AccessGroup)); 962 963 if (AliasScope) 964 I.setMetadata(LLVMContext::MD_alias_scope, MDNode::concatenate( 965 I.getMetadata(LLVMContext::MD_alias_scope), AliasScope)); 966 967 if (NoAlias) 968 I.setMetadata(LLVMContext::MD_noalias, MDNode::concatenate( 969 I.getMetadata(LLVMContext::MD_noalias), NoAlias)); 970 } 971 } 972 } 973 974 /// Bundle operands of the inlined function must be added to inlined call sites. 975 static void PropagateOperandBundles(Function::iterator InlinedBB, 976 Instruction *CallSiteEHPad) { 977 for (Instruction &II : llvm::make_early_inc_range(*InlinedBB)) { 978 CallBase *I = dyn_cast<CallBase>(&II); 979 if (!I) 980 continue; 981 // Skip call sites which already have a "funclet" bundle. 982 if (I->getOperandBundle(LLVMContext::OB_funclet)) 983 continue; 984 // Skip call sites which are nounwind intrinsics (as long as they don't 985 // lower into regular function calls in the course of IR transformations). 986 auto *CalledFn = 987 dyn_cast<Function>(I->getCalledOperand()->stripPointerCasts()); 988 if (CalledFn && CalledFn->isIntrinsic() && I->doesNotThrow() && 989 !IntrinsicInst::mayLowerToFunctionCall(CalledFn->getIntrinsicID())) 990 continue; 991 992 SmallVector<OperandBundleDef, 1> OpBundles; 993 I->getOperandBundlesAsDefs(OpBundles); 994 OpBundles.emplace_back("funclet", CallSiteEHPad); 995 996 Instruction *NewInst = CallBase::Create(I, OpBundles, I->getIterator()); 997 NewInst->takeName(I); 998 I->replaceAllUsesWith(NewInst); 999 I->eraseFromParent(); 1000 } 1001 } 1002 1003 namespace { 1004 /// Utility for cloning !noalias and !alias.scope metadata. When a code region 1005 /// using scoped alias metadata is inlined, the aliasing relationships may not 1006 /// hold between the two version. It is necessary to create a deep clone of the 1007 /// metadata, putting the two versions in separate scope domains. 1008 class ScopedAliasMetadataDeepCloner { 1009 using MetadataMap = DenseMap<const MDNode *, TrackingMDNodeRef>; 1010 SetVector<const MDNode *> MD; 1011 MetadataMap MDMap; 1012 void addRecursiveMetadataUses(); 1013 1014 public: 1015 ScopedAliasMetadataDeepCloner(const Function *F); 1016 1017 /// Create a new clone of the scoped alias metadata, which will be used by 1018 /// subsequent remap() calls. 1019 void clone(); 1020 1021 /// Remap instructions in the given range from the original to the cloned 1022 /// metadata. 1023 void remap(Function::iterator FStart, Function::iterator FEnd); 1024 }; 1025 } // namespace 1026 1027 ScopedAliasMetadataDeepCloner::ScopedAliasMetadataDeepCloner( 1028 const Function *F) { 1029 for (const BasicBlock &BB : *F) { 1030 for (const Instruction &I : BB) { 1031 if (const MDNode *M = I.getMetadata(LLVMContext::MD_alias_scope)) 1032 MD.insert(M); 1033 if (const MDNode *M = I.getMetadata(LLVMContext::MD_noalias)) 1034 MD.insert(M); 1035 1036 // We also need to clone the metadata in noalias intrinsics. 1037 if (const auto *Decl = dyn_cast<NoAliasScopeDeclInst>(&I)) 1038 MD.insert(Decl->getScopeList()); 1039 } 1040 } 1041 addRecursiveMetadataUses(); 1042 } 1043 1044 void ScopedAliasMetadataDeepCloner::addRecursiveMetadataUses() { 1045 SmallVector<const Metadata *, 16> Queue(MD.begin(), MD.end()); 1046 while (!Queue.empty()) { 1047 const MDNode *M = cast<MDNode>(Queue.pop_back_val()); 1048 for (const Metadata *Op : M->operands()) 1049 if (const MDNode *OpMD = dyn_cast<MDNode>(Op)) 1050 if (MD.insert(OpMD)) 1051 Queue.push_back(OpMD); 1052 } 1053 } 1054 1055 void ScopedAliasMetadataDeepCloner::clone() { 1056 assert(MDMap.empty() && "clone() already called ?"); 1057 1058 SmallVector<TempMDTuple, 16> DummyNodes; 1059 for (const MDNode *I : MD) { 1060 DummyNodes.push_back(MDTuple::getTemporary(I->getContext(), {})); 1061 MDMap[I].reset(DummyNodes.back().get()); 1062 } 1063 1064 // Create new metadata nodes to replace the dummy nodes, replacing old 1065 // metadata references with either a dummy node or an already-created new 1066 // node. 1067 SmallVector<Metadata *, 4> NewOps; 1068 for (const MDNode *I : MD) { 1069 for (const Metadata *Op : I->operands()) { 1070 if (const MDNode *M = dyn_cast<MDNode>(Op)) 1071 NewOps.push_back(MDMap[M]); 1072 else 1073 NewOps.push_back(const_cast<Metadata *>(Op)); 1074 } 1075 1076 MDNode *NewM = MDNode::get(I->getContext(), NewOps); 1077 MDTuple *TempM = cast<MDTuple>(MDMap[I]); 1078 assert(TempM->isTemporary() && "Expected temporary node"); 1079 1080 TempM->replaceAllUsesWith(NewM); 1081 NewOps.clear(); 1082 } 1083 } 1084 1085 void ScopedAliasMetadataDeepCloner::remap(Function::iterator FStart, 1086 Function::iterator FEnd) { 1087 if (MDMap.empty()) 1088 return; // Nothing to do. 1089 1090 for (BasicBlock &BB : make_range(FStart, FEnd)) { 1091 for (Instruction &I : BB) { 1092 // TODO: The null checks for the MDMap.lookup() results should no longer 1093 // be necessary. 1094 if (MDNode *M = I.getMetadata(LLVMContext::MD_alias_scope)) 1095 if (MDNode *MNew = MDMap.lookup(M)) 1096 I.setMetadata(LLVMContext::MD_alias_scope, MNew); 1097 1098 if (MDNode *M = I.getMetadata(LLVMContext::MD_noalias)) 1099 if (MDNode *MNew = MDMap.lookup(M)) 1100 I.setMetadata(LLVMContext::MD_noalias, MNew); 1101 1102 if (auto *Decl = dyn_cast<NoAliasScopeDeclInst>(&I)) 1103 if (MDNode *MNew = MDMap.lookup(Decl->getScopeList())) 1104 Decl->setScopeList(MNew); 1105 } 1106 } 1107 } 1108 1109 /// If the inlined function has noalias arguments, 1110 /// then add new alias scopes for each noalias argument, tag the mapped noalias 1111 /// parameters with noalias metadata specifying the new scope, and tag all 1112 /// non-derived loads, stores and memory intrinsics with the new alias scopes. 1113 static void AddAliasScopeMetadata(CallBase &CB, ValueToValueMapTy &VMap, 1114 const DataLayout &DL, AAResults *CalleeAAR, 1115 ClonedCodeInfo &InlinedFunctionInfo) { 1116 if (!EnableNoAliasConversion) 1117 return; 1118 1119 const Function *CalledFunc = CB.getCalledFunction(); 1120 SmallVector<const Argument *, 4> NoAliasArgs; 1121 1122 for (const Argument &Arg : CalledFunc->args()) 1123 if (CB.paramHasAttr(Arg.getArgNo(), Attribute::NoAlias) && !Arg.use_empty()) 1124 NoAliasArgs.push_back(&Arg); 1125 1126 if (NoAliasArgs.empty()) 1127 return; 1128 1129 // To do a good job, if a noalias variable is captured, we need to know if 1130 // the capture point dominates the particular use we're considering. 1131 DominatorTree DT; 1132 DT.recalculate(const_cast<Function&>(*CalledFunc)); 1133 1134 // noalias indicates that pointer values based on the argument do not alias 1135 // pointer values which are not based on it. So we add a new "scope" for each 1136 // noalias function argument. Accesses using pointers based on that argument 1137 // become part of that alias scope, accesses using pointers not based on that 1138 // argument are tagged as noalias with that scope. 1139 1140 DenseMap<const Argument *, MDNode *> NewScopes; 1141 MDBuilder MDB(CalledFunc->getContext()); 1142 1143 // Create a new scope domain for this function. 1144 MDNode *NewDomain = 1145 MDB.createAnonymousAliasScopeDomain(CalledFunc->getName()); 1146 for (unsigned i = 0, e = NoAliasArgs.size(); i != e; ++i) { 1147 const Argument *A = NoAliasArgs[i]; 1148 1149 std::string Name = std::string(CalledFunc->getName()); 1150 if (A->hasName()) { 1151 Name += ": %"; 1152 Name += A->getName(); 1153 } else { 1154 Name += ": argument "; 1155 Name += utostr(i); 1156 } 1157 1158 // Note: We always create a new anonymous root here. This is true regardless 1159 // of the linkage of the callee because the aliasing "scope" is not just a 1160 // property of the callee, but also all control dependencies in the caller. 1161 MDNode *NewScope = MDB.createAnonymousAliasScope(NewDomain, Name); 1162 NewScopes.insert(std::make_pair(A, NewScope)); 1163 1164 if (UseNoAliasIntrinsic) { 1165 // Introduce a llvm.experimental.noalias.scope.decl for the noalias 1166 // argument. 1167 MDNode *AScopeList = MDNode::get(CalledFunc->getContext(), NewScope); 1168 auto *NoAliasDecl = 1169 IRBuilder<>(&CB).CreateNoAliasScopeDeclaration(AScopeList); 1170 // Ignore the result for now. The result will be used when the 1171 // llvm.noalias intrinsic is introduced. 1172 (void)NoAliasDecl; 1173 } 1174 } 1175 1176 // Iterate over all new instructions in the map; for all memory-access 1177 // instructions, add the alias scope metadata. 1178 for (ValueToValueMapTy::iterator VMI = VMap.begin(), VMIE = VMap.end(); 1179 VMI != VMIE; ++VMI) { 1180 if (const Instruction *I = dyn_cast<Instruction>(VMI->first)) { 1181 if (!VMI->second) 1182 continue; 1183 1184 Instruction *NI = dyn_cast<Instruction>(VMI->second); 1185 if (!NI || InlinedFunctionInfo.isSimplified(I, NI)) 1186 continue; 1187 1188 bool IsArgMemOnlyCall = false, IsFuncCall = false; 1189 SmallVector<const Value *, 2> PtrArgs; 1190 1191 if (const LoadInst *LI = dyn_cast<LoadInst>(I)) 1192 PtrArgs.push_back(LI->getPointerOperand()); 1193 else if (const StoreInst *SI = dyn_cast<StoreInst>(I)) 1194 PtrArgs.push_back(SI->getPointerOperand()); 1195 else if (const VAArgInst *VAAI = dyn_cast<VAArgInst>(I)) 1196 PtrArgs.push_back(VAAI->getPointerOperand()); 1197 else if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(I)) 1198 PtrArgs.push_back(CXI->getPointerOperand()); 1199 else if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(I)) 1200 PtrArgs.push_back(RMWI->getPointerOperand()); 1201 else if (const auto *Call = dyn_cast<CallBase>(I)) { 1202 // If we know that the call does not access memory, then we'll still 1203 // know that about the inlined clone of this call site, and we don't 1204 // need to add metadata. 1205 if (Call->doesNotAccessMemory()) 1206 continue; 1207 1208 IsFuncCall = true; 1209 if (CalleeAAR) { 1210 MemoryEffects ME = CalleeAAR->getMemoryEffects(Call); 1211 1212 // We'll retain this knowledge without additional metadata. 1213 if (ME.onlyAccessesInaccessibleMem()) 1214 continue; 1215 1216 if (ME.onlyAccessesArgPointees()) 1217 IsArgMemOnlyCall = true; 1218 } 1219 1220 for (Value *Arg : Call->args()) { 1221 // Only care about pointer arguments. If a noalias argument is 1222 // accessed through a non-pointer argument, it must be captured 1223 // first (e.g. via ptrtoint), and we protect against captures below. 1224 if (!Arg->getType()->isPointerTy()) 1225 continue; 1226 1227 PtrArgs.push_back(Arg); 1228 } 1229 } 1230 1231 // If we found no pointers, then this instruction is not suitable for 1232 // pairing with an instruction to receive aliasing metadata. 1233 // However, if this is a call, this we might just alias with none of the 1234 // noalias arguments. 1235 if (PtrArgs.empty() && !IsFuncCall) 1236 continue; 1237 1238 // It is possible that there is only one underlying object, but you 1239 // need to go through several PHIs to see it, and thus could be 1240 // repeated in the Objects list. 1241 SmallPtrSet<const Value *, 4> ObjSet; 1242 SmallVector<Metadata *, 4> Scopes, NoAliases; 1243 1244 for (const Value *V : PtrArgs) { 1245 SmallVector<const Value *, 4> Objects; 1246 getUnderlyingObjects(V, Objects, /* LI = */ nullptr); 1247 1248 for (const Value *O : Objects) 1249 ObjSet.insert(O); 1250 } 1251 1252 // Figure out if we're derived from anything that is not a noalias 1253 // argument. 1254 bool RequiresNoCaptureBefore = false, UsesAliasingPtr = false, 1255 UsesUnknownObject = false; 1256 for (const Value *V : ObjSet) { 1257 // Is this value a constant that cannot be derived from any pointer 1258 // value (we need to exclude constant expressions, for example, that 1259 // are formed from arithmetic on global symbols). 1260 bool IsNonPtrConst = isa<ConstantInt>(V) || isa<ConstantFP>(V) || 1261 isa<ConstantPointerNull>(V) || 1262 isa<ConstantDataVector>(V) || isa<UndefValue>(V); 1263 if (IsNonPtrConst) 1264 continue; 1265 1266 // If this is anything other than a noalias argument, then we cannot 1267 // completely describe the aliasing properties using alias.scope 1268 // metadata (and, thus, won't add any). 1269 if (const Argument *A = dyn_cast<Argument>(V)) { 1270 if (!CB.paramHasAttr(A->getArgNo(), Attribute::NoAlias)) 1271 UsesAliasingPtr = true; 1272 } else { 1273 UsesAliasingPtr = true; 1274 } 1275 1276 if (isEscapeSource(V)) { 1277 // An escape source can only alias with a noalias argument if it has 1278 // been captured beforehand. 1279 RequiresNoCaptureBefore = true; 1280 } else if (!isa<Argument>(V) && !isIdentifiedObject(V)) { 1281 // If this is neither an escape source, nor some identified object 1282 // (which cannot directly alias a noalias argument), nor some other 1283 // argument (which, by definition, also cannot alias a noalias 1284 // argument), conservatively do not make any assumptions. 1285 UsesUnknownObject = true; 1286 } 1287 } 1288 1289 // Nothing we can do if the used underlying object cannot be reliably 1290 // determined. 1291 if (UsesUnknownObject) 1292 continue; 1293 1294 // A function call can always get captured noalias pointers (via other 1295 // parameters, globals, etc.). 1296 if (IsFuncCall && !IsArgMemOnlyCall) 1297 RequiresNoCaptureBefore = true; 1298 1299 // First, we want to figure out all of the sets with which we definitely 1300 // don't alias. Iterate over all noalias set, and add those for which: 1301 // 1. The noalias argument is not in the set of objects from which we 1302 // definitely derive. 1303 // 2. The noalias argument has not yet been captured. 1304 // An arbitrary function that might load pointers could see captured 1305 // noalias arguments via other noalias arguments or globals, and so we 1306 // must always check for prior capture. 1307 for (const Argument *A : NoAliasArgs) { 1308 if (ObjSet.contains(A)) 1309 continue; // May be based on a noalias argument. 1310 1311 // It might be tempting to skip the PointerMayBeCapturedBefore check if 1312 // A->hasNoCaptureAttr() is true, but this is incorrect because 1313 // nocapture only guarantees that no copies outlive the function, not 1314 // that the value cannot be locally captured. 1315 if (!RequiresNoCaptureBefore || 1316 !PointerMayBeCapturedBefore(A, /* ReturnCaptures */ false, 1317 /* StoreCaptures */ false, I, &DT)) 1318 NoAliases.push_back(NewScopes[A]); 1319 } 1320 1321 if (!NoAliases.empty()) 1322 NI->setMetadata(LLVMContext::MD_noalias, 1323 MDNode::concatenate( 1324 NI->getMetadata(LLVMContext::MD_noalias), 1325 MDNode::get(CalledFunc->getContext(), NoAliases))); 1326 1327 // Next, we want to figure out all of the sets to which we might belong. 1328 // We might belong to a set if the noalias argument is in the set of 1329 // underlying objects. If there is some non-noalias argument in our list 1330 // of underlying objects, then we cannot add a scope because the fact 1331 // that some access does not alias with any set of our noalias arguments 1332 // cannot itself guarantee that it does not alias with this access 1333 // (because there is some pointer of unknown origin involved and the 1334 // other access might also depend on this pointer). We also cannot add 1335 // scopes to arbitrary functions unless we know they don't access any 1336 // non-parameter pointer-values. 1337 bool CanAddScopes = !UsesAliasingPtr; 1338 if (CanAddScopes && IsFuncCall) 1339 CanAddScopes = IsArgMemOnlyCall; 1340 1341 if (CanAddScopes) 1342 for (const Argument *A : NoAliasArgs) { 1343 if (ObjSet.count(A)) 1344 Scopes.push_back(NewScopes[A]); 1345 } 1346 1347 if (!Scopes.empty()) 1348 NI->setMetadata( 1349 LLVMContext::MD_alias_scope, 1350 MDNode::concatenate(NI->getMetadata(LLVMContext::MD_alias_scope), 1351 MDNode::get(CalledFunc->getContext(), Scopes))); 1352 } 1353 } 1354 } 1355 1356 static bool MayContainThrowingOrExitingCallAfterCB(CallBase *Begin, 1357 ReturnInst *End) { 1358 1359 assert(Begin->getParent() == End->getParent() && 1360 "Expected to be in same basic block!"); 1361 auto BeginIt = Begin->getIterator(); 1362 assert(BeginIt != End->getIterator() && "Non-empty BB has empty iterator"); 1363 return !llvm::isGuaranteedToTransferExecutionToSuccessor( 1364 ++BeginIt, End->getIterator(), InlinerAttributeWindow + 1); 1365 } 1366 1367 // Add attributes from CB params and Fn attributes that can always be propagated 1368 // to the corresponding argument / inner callbases. 1369 static void AddParamAndFnBasicAttributes(const CallBase &CB, 1370 ValueToValueMapTy &VMap, 1371 ClonedCodeInfo &InlinedFunctionInfo) { 1372 auto *CalledFunction = CB.getCalledFunction(); 1373 auto &Context = CalledFunction->getContext(); 1374 1375 // Collect valid attributes for all params. 1376 SmallVector<AttrBuilder> ValidObjParamAttrs, ValidExactParamAttrs; 1377 bool HasAttrToPropagate = false; 1378 1379 // Attributes we can only propagate if the exact parameter is forwarded. 1380 // We can propagate both poison generating and UB generating attributes 1381 // without any extra checks. The only attribute that is tricky to propagate 1382 // is `noundef` (skipped for now) as that can create new UB where previous 1383 // behavior was just using a poison value. 1384 static const Attribute::AttrKind ExactAttrsToPropagate[] = { 1385 Attribute::Dereferenceable, Attribute::DereferenceableOrNull, 1386 Attribute::NonNull, Attribute::Alignment, Attribute::Range}; 1387 1388 for (unsigned I = 0, E = CB.arg_size(); I < E; ++I) { 1389 ValidObjParamAttrs.emplace_back(AttrBuilder{CB.getContext()}); 1390 ValidExactParamAttrs.emplace_back(AttrBuilder{CB.getContext()}); 1391 // Access attributes can be propagated to any param with the same underlying 1392 // object as the argument. 1393 if (CB.paramHasAttr(I, Attribute::ReadNone)) 1394 ValidObjParamAttrs.back().addAttribute(Attribute::ReadNone); 1395 if (CB.paramHasAttr(I, Attribute::ReadOnly)) 1396 ValidObjParamAttrs.back().addAttribute(Attribute::ReadOnly); 1397 1398 for (Attribute::AttrKind AK : ExactAttrsToPropagate) { 1399 Attribute Attr = CB.getParamAttr(I, AK); 1400 if (Attr.isValid()) 1401 ValidExactParamAttrs.back().addAttribute(Attr); 1402 } 1403 1404 HasAttrToPropagate |= ValidObjParamAttrs.back().hasAttributes(); 1405 HasAttrToPropagate |= ValidExactParamAttrs.back().hasAttributes(); 1406 } 1407 1408 // Won't be able to propagate anything. 1409 if (!HasAttrToPropagate) 1410 return; 1411 1412 for (BasicBlock &BB : *CalledFunction) { 1413 for (Instruction &Ins : BB) { 1414 const auto *InnerCB = dyn_cast<CallBase>(&Ins); 1415 if (!InnerCB) 1416 continue; 1417 auto *NewInnerCB = dyn_cast_or_null<CallBase>(VMap.lookup(InnerCB)); 1418 if (!NewInnerCB) 1419 continue; 1420 // The InnerCB might have be simplified during the inlining 1421 // process which can make propagation incorrect. 1422 if (InlinedFunctionInfo.isSimplified(InnerCB, NewInnerCB)) 1423 continue; 1424 1425 AttributeList AL = NewInnerCB->getAttributes(); 1426 for (unsigned I = 0, E = InnerCB->arg_size(); I < E; ++I) { 1427 // It's unsound or requires special handling to propagate 1428 // attributes to byval arguments. Even if CalledFunction 1429 // doesn't e.g. write to the argument (readonly), the call to 1430 // NewInnerCB may write to its by-value copy. 1431 if (NewInnerCB->paramHasAttr(I, Attribute::ByVal)) 1432 continue; 1433 1434 // Don't bother propagating attrs to constants. 1435 if (match(NewInnerCB->getArgOperand(I), 1436 llvm::PatternMatch::m_ImmConstant())) 1437 continue; 1438 1439 // Check if the underlying value for the parameter is an argument. 1440 const Argument *Arg = dyn_cast<Argument>(InnerCB->getArgOperand(I)); 1441 unsigned ArgNo; 1442 if (Arg) { 1443 ArgNo = Arg->getArgNo(); 1444 // For dereferenceable, dereferenceable_or_null, align, etc... 1445 // we don't want to propagate if the existing param has the same 1446 // attribute with "better" constraints. So remove from the 1447 // new AL if the region of the existing param is larger than 1448 // what we can propagate. 1449 AttrBuilder NewAB{ 1450 Context, AttributeSet::get(Context, ValidExactParamAttrs[ArgNo])}; 1451 if (AL.getParamDereferenceableBytes(I) > 1452 NewAB.getDereferenceableBytes()) 1453 NewAB.removeAttribute(Attribute::Dereferenceable); 1454 if (AL.getParamDereferenceableOrNullBytes(I) > 1455 NewAB.getDereferenceableOrNullBytes()) 1456 NewAB.removeAttribute(Attribute::DereferenceableOrNull); 1457 if (AL.getParamAlignment(I).valueOrOne() > 1458 NewAB.getAlignment().valueOrOne()) 1459 NewAB.removeAttribute(Attribute::Alignment); 1460 if (auto ExistingRange = AL.getParamRange(I)) { 1461 if (auto NewRange = NewAB.getRange()) { 1462 ConstantRange CombinedRange = 1463 ExistingRange->intersectWith(*NewRange); 1464 NewAB.removeAttribute(Attribute::Range); 1465 NewAB.addRangeAttr(CombinedRange); 1466 } 1467 } 1468 AL = AL.addParamAttributes(Context, I, NewAB); 1469 } else if (NewInnerCB->getArgOperand(I)->getType()->isPointerTy()) { 1470 // Check if the underlying value for the parameter is an argument. 1471 const Value *UnderlyingV = 1472 getUnderlyingObject(InnerCB->getArgOperand(I)); 1473 Arg = dyn_cast<Argument>(UnderlyingV); 1474 if (!Arg) 1475 continue; 1476 ArgNo = Arg->getArgNo(); 1477 } else { 1478 continue; 1479 } 1480 1481 // If so, propagate its access attributes. 1482 AL = AL.addParamAttributes(Context, I, ValidObjParamAttrs[ArgNo]); 1483 1484 // We can have conflicting attributes from the inner callsite and 1485 // to-be-inlined callsite. In that case, choose the most 1486 // restrictive. 1487 1488 // readonly + writeonly means we can never deref so make readnone. 1489 if (AL.hasParamAttr(I, Attribute::ReadOnly) && 1490 AL.hasParamAttr(I, Attribute::WriteOnly)) 1491 AL = AL.addParamAttribute(Context, I, Attribute::ReadNone); 1492 1493 // If have readnone, need to clear readonly/writeonly 1494 if (AL.hasParamAttr(I, Attribute::ReadNone)) { 1495 AL = AL.removeParamAttribute(Context, I, Attribute::ReadOnly); 1496 AL = AL.removeParamAttribute(Context, I, Attribute::WriteOnly); 1497 } 1498 1499 // Writable cannot exist in conjunction w/ readonly/readnone 1500 if (AL.hasParamAttr(I, Attribute::ReadOnly) || 1501 AL.hasParamAttr(I, Attribute::ReadNone)) 1502 AL = AL.removeParamAttribute(Context, I, Attribute::Writable); 1503 } 1504 NewInnerCB->setAttributes(AL); 1505 } 1506 } 1507 } 1508 1509 // Only allow these white listed attributes to be propagated back to the 1510 // callee. This is because other attributes may only be valid on the call 1511 // itself, i.e. attributes such as signext and zeroext. 1512 1513 // Attributes that are always okay to propagate as if they are violated its 1514 // immediate UB. 1515 static AttrBuilder IdentifyValidUBGeneratingAttributes(CallBase &CB) { 1516 AttrBuilder Valid(CB.getContext()); 1517 if (auto DerefBytes = CB.getRetDereferenceableBytes()) 1518 Valid.addDereferenceableAttr(DerefBytes); 1519 if (auto DerefOrNullBytes = CB.getRetDereferenceableOrNullBytes()) 1520 Valid.addDereferenceableOrNullAttr(DerefOrNullBytes); 1521 if (CB.hasRetAttr(Attribute::NoAlias)) 1522 Valid.addAttribute(Attribute::NoAlias); 1523 if (CB.hasRetAttr(Attribute::NoUndef)) 1524 Valid.addAttribute(Attribute::NoUndef); 1525 return Valid; 1526 } 1527 1528 // Attributes that need additional checks as propagating them may change 1529 // behavior or cause new UB. 1530 static AttrBuilder IdentifyValidPoisonGeneratingAttributes(CallBase &CB) { 1531 AttrBuilder Valid(CB.getContext()); 1532 if (CB.hasRetAttr(Attribute::NonNull)) 1533 Valid.addAttribute(Attribute::NonNull); 1534 if (CB.hasRetAttr(Attribute::Alignment)) 1535 Valid.addAlignmentAttr(CB.getRetAlign()); 1536 if (std::optional<ConstantRange> Range = CB.getRange()) 1537 Valid.addRangeAttr(*Range); 1538 return Valid; 1539 } 1540 1541 static void AddReturnAttributes(CallBase &CB, ValueToValueMapTy &VMap, 1542 ClonedCodeInfo &InlinedFunctionInfo) { 1543 AttrBuilder ValidUB = IdentifyValidUBGeneratingAttributes(CB); 1544 AttrBuilder ValidPG = IdentifyValidPoisonGeneratingAttributes(CB); 1545 if (!ValidUB.hasAttributes() && !ValidPG.hasAttributes()) 1546 return; 1547 auto *CalledFunction = CB.getCalledFunction(); 1548 auto &Context = CalledFunction->getContext(); 1549 1550 for (auto &BB : *CalledFunction) { 1551 auto *RI = dyn_cast<ReturnInst>(BB.getTerminator()); 1552 if (!RI || !isa<CallBase>(RI->getOperand(0))) 1553 continue; 1554 auto *RetVal = cast<CallBase>(RI->getOperand(0)); 1555 // Check that the cloned RetVal exists and is a call, otherwise we cannot 1556 // add the attributes on the cloned RetVal. Simplification during inlining 1557 // could have transformed the cloned instruction. 1558 auto *NewRetVal = dyn_cast_or_null<CallBase>(VMap.lookup(RetVal)); 1559 if (!NewRetVal) 1560 continue; 1561 1562 // The RetVal might have be simplified during the inlining 1563 // process which can make propagation incorrect. 1564 if (InlinedFunctionInfo.isSimplified(RetVal, NewRetVal)) 1565 continue; 1566 // Backward propagation of attributes to the returned value may be incorrect 1567 // if it is control flow dependent. 1568 // Consider: 1569 // @callee { 1570 // %rv = call @foo() 1571 // %rv2 = call @bar() 1572 // if (%rv2 != null) 1573 // return %rv2 1574 // if (%rv == null) 1575 // exit() 1576 // return %rv 1577 // } 1578 // caller() { 1579 // %val = call nonnull @callee() 1580 // } 1581 // Here we cannot add the nonnull attribute on either foo or bar. So, we 1582 // limit the check to both RetVal and RI are in the same basic block and 1583 // there are no throwing/exiting instructions between these instructions. 1584 if (RI->getParent() != RetVal->getParent() || 1585 MayContainThrowingOrExitingCallAfterCB(RetVal, RI)) 1586 continue; 1587 // Add to the existing attributes of NewRetVal, i.e. the cloned call 1588 // instruction. 1589 // NB! When we have the same attribute already existing on NewRetVal, but 1590 // with a differing value, the AttributeList's merge API honours the already 1591 // existing attribute value (i.e. attributes such as dereferenceable, 1592 // dereferenceable_or_null etc). See AttrBuilder::merge for more details. 1593 AttributeList AL = NewRetVal->getAttributes(); 1594 if (ValidUB.getDereferenceableBytes() < AL.getRetDereferenceableBytes()) 1595 ValidUB.removeAttribute(Attribute::Dereferenceable); 1596 if (ValidUB.getDereferenceableOrNullBytes() < 1597 AL.getRetDereferenceableOrNullBytes()) 1598 ValidUB.removeAttribute(Attribute::DereferenceableOrNull); 1599 AttributeList NewAL = AL.addRetAttributes(Context, ValidUB); 1600 // Attributes that may generate poison returns are a bit tricky. If we 1601 // propagate them, other uses of the callsite might have their behavior 1602 // change or cause UB (if they have noundef) b.c of the new potential 1603 // poison. 1604 // Take the following three cases: 1605 // 1606 // 1) 1607 // define nonnull ptr @foo() { 1608 // %p = call ptr @bar() 1609 // call void @use(ptr %p) willreturn nounwind 1610 // ret ptr %p 1611 // } 1612 // 1613 // 2) 1614 // define noundef nonnull ptr @foo() { 1615 // %p = call ptr @bar() 1616 // call void @use(ptr %p) willreturn nounwind 1617 // ret ptr %p 1618 // } 1619 // 1620 // 3) 1621 // define nonnull ptr @foo() { 1622 // %p = call noundef ptr @bar() 1623 // ret ptr %p 1624 // } 1625 // 1626 // In case 1, we can't propagate nonnull because poison value in @use may 1627 // change behavior or trigger UB. 1628 // In case 2, we don't need to be concerned about propagating nonnull, as 1629 // any new poison at @use will trigger UB anyways. 1630 // In case 3, we can never propagate nonnull because it may create UB due to 1631 // the noundef on @bar. 1632 if (ValidPG.getAlignment().valueOrOne() < AL.getRetAlignment().valueOrOne()) 1633 ValidPG.removeAttribute(Attribute::Alignment); 1634 if (ValidPG.hasAttributes()) { 1635 Attribute CBRange = ValidPG.getAttribute(Attribute::Range); 1636 if (CBRange.isValid()) { 1637 Attribute NewRange = AL.getRetAttr(Attribute::Range); 1638 if (NewRange.isValid()) { 1639 ValidPG.addRangeAttr( 1640 CBRange.getRange().intersectWith(NewRange.getRange())); 1641 } 1642 } 1643 // Three checks. 1644 // If the callsite has `noundef`, then a poison due to violating the 1645 // return attribute will create UB anyways so we can always propagate. 1646 // Otherwise, if the return value (callee to be inlined) has `noundef`, we 1647 // can't propagate as a new poison return will cause UB. 1648 // Finally, check if the return value has no uses whose behavior may 1649 // change/may cause UB if we potentially return poison. At the moment this 1650 // is implemented overly conservatively with a single-use check. 1651 // TODO: Update the single-use check to iterate through uses and only bail 1652 // if we have a potentially dangerous use. 1653 1654 if (CB.hasRetAttr(Attribute::NoUndef) || 1655 (RetVal->hasOneUse() && !RetVal->hasRetAttr(Attribute::NoUndef))) 1656 NewAL = NewAL.addRetAttributes(Context, ValidPG); 1657 } 1658 NewRetVal->setAttributes(NewAL); 1659 } 1660 } 1661 1662 /// If the inlined function has non-byval align arguments, then 1663 /// add @llvm.assume-based alignment assumptions to preserve this information. 1664 static void AddAlignmentAssumptions(CallBase &CB, InlineFunctionInfo &IFI) { 1665 if (!PreserveAlignmentAssumptions || !IFI.GetAssumptionCache) 1666 return; 1667 1668 AssumptionCache *AC = &IFI.GetAssumptionCache(*CB.getCaller()); 1669 auto &DL = CB.getDataLayout(); 1670 1671 // To avoid inserting redundant assumptions, we should check for assumptions 1672 // already in the caller. To do this, we might need a DT of the caller. 1673 DominatorTree DT; 1674 bool DTCalculated = false; 1675 1676 Function *CalledFunc = CB.getCalledFunction(); 1677 for (Argument &Arg : CalledFunc->args()) { 1678 if (!Arg.getType()->isPointerTy() || Arg.hasPassPointeeByValueCopyAttr() || 1679 Arg.hasNUses(0)) 1680 continue; 1681 MaybeAlign Alignment = Arg.getParamAlign(); 1682 if (!Alignment) 1683 continue; 1684 1685 if (!DTCalculated) { 1686 DT.recalculate(*CB.getCaller()); 1687 DTCalculated = true; 1688 } 1689 // If we can already prove the asserted alignment in the context of the 1690 // caller, then don't bother inserting the assumption. 1691 Value *ArgVal = CB.getArgOperand(Arg.getArgNo()); 1692 if (getKnownAlignment(ArgVal, DL, &CB, AC, &DT) >= *Alignment) 1693 continue; 1694 1695 CallInst *NewAsmp = IRBuilder<>(&CB).CreateAlignmentAssumption( 1696 DL, ArgVal, Alignment->value()); 1697 AC->registerAssumption(cast<AssumeInst>(NewAsmp)); 1698 } 1699 } 1700 1701 static void HandleByValArgumentInit(Type *ByValType, Value *Dst, Value *Src, 1702 Module *M, BasicBlock *InsertBlock, 1703 InlineFunctionInfo &IFI, 1704 Function *CalledFunc) { 1705 IRBuilder<> Builder(InsertBlock, InsertBlock->begin()); 1706 1707 Value *Size = 1708 Builder.getInt64(M->getDataLayout().getTypeStoreSize(ByValType)); 1709 1710 // Always generate a memcpy of alignment 1 here because we don't know 1711 // the alignment of the src pointer. Other optimizations can infer 1712 // better alignment. 1713 CallInst *CI = Builder.CreateMemCpy(Dst, /*DstAlign*/ Align(1), Src, 1714 /*SrcAlign*/ Align(1), Size); 1715 1716 // The verifier requires that all calls of debug-info-bearing functions 1717 // from debug-info-bearing functions have a debug location (for inlining 1718 // purposes). Assign a dummy location to satisfy the constraint. 1719 if (!CI->getDebugLoc() && InsertBlock->getParent()->getSubprogram()) 1720 if (DISubprogram *SP = CalledFunc->getSubprogram()) 1721 CI->setDebugLoc(DILocation::get(SP->getContext(), 0, 0, SP)); 1722 } 1723 1724 /// When inlining a call site that has a byval argument, 1725 /// we have to make the implicit memcpy explicit by adding it. 1726 static Value *HandleByValArgument(Type *ByValType, Value *Arg, 1727 Instruction *TheCall, 1728 const Function *CalledFunc, 1729 InlineFunctionInfo &IFI, 1730 MaybeAlign ByValAlignment) { 1731 Function *Caller = TheCall->getFunction(); 1732 const DataLayout &DL = Caller->getDataLayout(); 1733 1734 // If the called function is readonly, then it could not mutate the caller's 1735 // copy of the byval'd memory. In this case, it is safe to elide the copy and 1736 // temporary. 1737 if (CalledFunc->onlyReadsMemory()) { 1738 // If the byval argument has a specified alignment that is greater than the 1739 // passed in pointer, then we either have to round up the input pointer or 1740 // give up on this transformation. 1741 if (ByValAlignment.valueOrOne() == 1) 1742 return Arg; 1743 1744 AssumptionCache *AC = 1745 IFI.GetAssumptionCache ? &IFI.GetAssumptionCache(*Caller) : nullptr; 1746 1747 // If the pointer is already known to be sufficiently aligned, or if we can 1748 // round it up to a larger alignment, then we don't need a temporary. 1749 if (getOrEnforceKnownAlignment(Arg, *ByValAlignment, DL, TheCall, AC) >= 1750 *ByValAlignment) 1751 return Arg; 1752 1753 // Otherwise, we have to make a memcpy to get a safe alignment. This is bad 1754 // for code quality, but rarely happens and is required for correctness. 1755 } 1756 1757 // Create the alloca. If we have DataLayout, use nice alignment. 1758 Align Alignment = DL.getPrefTypeAlign(ByValType); 1759 1760 // If the byval had an alignment specified, we *must* use at least that 1761 // alignment, as it is required by the byval argument (and uses of the 1762 // pointer inside the callee). 1763 if (ByValAlignment) 1764 Alignment = std::max(Alignment, *ByValAlignment); 1765 1766 AllocaInst *NewAlloca = 1767 new AllocaInst(ByValType, Arg->getType()->getPointerAddressSpace(), 1768 nullptr, Alignment, Arg->getName()); 1769 NewAlloca->insertBefore(Caller->begin()->begin()); 1770 IFI.StaticAllocas.push_back(NewAlloca); 1771 1772 // Uses of the argument in the function should use our new alloca 1773 // instead. 1774 return NewAlloca; 1775 } 1776 1777 // Check whether this Value is used by a lifetime intrinsic. 1778 static bool isUsedByLifetimeMarker(Value *V) { 1779 for (User *U : V->users()) 1780 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) 1781 if (II->isLifetimeStartOrEnd()) 1782 return true; 1783 return false; 1784 } 1785 1786 // Check whether the given alloca already has 1787 // lifetime.start or lifetime.end intrinsics. 1788 static bool hasLifetimeMarkers(AllocaInst *AI) { 1789 Type *Ty = AI->getType(); 1790 Type *Int8PtrTy = 1791 PointerType::get(Ty->getContext(), Ty->getPointerAddressSpace()); 1792 if (Ty == Int8PtrTy) 1793 return isUsedByLifetimeMarker(AI); 1794 1795 // Do a scan to find all the casts to i8*. 1796 for (User *U : AI->users()) { 1797 if (U->getType() != Int8PtrTy) continue; 1798 if (U->stripPointerCasts() != AI) continue; 1799 if (isUsedByLifetimeMarker(U)) 1800 return true; 1801 } 1802 return false; 1803 } 1804 1805 /// Return the result of AI->isStaticAlloca() if AI were moved to the entry 1806 /// block. Allocas used in inalloca calls and allocas of dynamic array size 1807 /// cannot be static. 1808 static bool allocaWouldBeStaticInEntry(const AllocaInst *AI ) { 1809 return isa<Constant>(AI->getArraySize()) && !AI->isUsedWithInAlloca(); 1810 } 1811 1812 /// Returns a DebugLoc for a new DILocation which is a clone of \p OrigDL 1813 /// inlined at \p InlinedAt. \p IANodes is an inlined-at cache. 1814 static DebugLoc inlineDebugLoc(DebugLoc OrigDL, DILocation *InlinedAt, 1815 LLVMContext &Ctx, 1816 DenseMap<const MDNode *, MDNode *> &IANodes) { 1817 auto IA = DebugLoc::appendInlinedAt(OrigDL, InlinedAt, Ctx, IANodes); 1818 return DILocation::get(Ctx, OrigDL.getLine(), OrigDL.getCol(), 1819 OrigDL.getScope(), IA); 1820 } 1821 1822 /// Update inlined instructions' line numbers to 1823 /// to encode location where these instructions are inlined. 1824 static void fixupLineNumbers(Function *Fn, Function::iterator FI, 1825 Instruction *TheCall, bool CalleeHasDebugInfo) { 1826 const DebugLoc &TheCallDL = TheCall->getDebugLoc(); 1827 if (!TheCallDL) 1828 return; 1829 1830 auto &Ctx = Fn->getContext(); 1831 DILocation *InlinedAtNode = TheCallDL; 1832 1833 // Create a unique call site, not to be confused with any other call from the 1834 // same location. 1835 InlinedAtNode = DILocation::getDistinct( 1836 Ctx, InlinedAtNode->getLine(), InlinedAtNode->getColumn(), 1837 InlinedAtNode->getScope(), InlinedAtNode->getInlinedAt()); 1838 1839 // Cache the inlined-at nodes as they're built so they are reused, without 1840 // this every instruction's inlined-at chain would become distinct from each 1841 // other. 1842 DenseMap<const MDNode *, MDNode *> IANodes; 1843 1844 // Check if we are not generating inline line tables and want to use 1845 // the call site location instead. 1846 bool NoInlineLineTables = Fn->hasFnAttribute("no-inline-line-tables"); 1847 1848 // Helper-util for updating the metadata attached to an instruction. 1849 auto UpdateInst = [&](Instruction &I) { 1850 // Loop metadata needs to be updated so that the start and end locs 1851 // reference inlined-at locations. 1852 auto updateLoopInfoLoc = [&Ctx, &InlinedAtNode, 1853 &IANodes](Metadata *MD) -> Metadata * { 1854 if (auto *Loc = dyn_cast_or_null<DILocation>(MD)) 1855 return inlineDebugLoc(Loc, InlinedAtNode, Ctx, IANodes).get(); 1856 return MD; 1857 }; 1858 updateLoopMetadataDebugLocations(I, updateLoopInfoLoc); 1859 1860 if (!NoInlineLineTables) 1861 if (DebugLoc DL = I.getDebugLoc()) { 1862 DebugLoc IDL = 1863 inlineDebugLoc(DL, InlinedAtNode, I.getContext(), IANodes); 1864 I.setDebugLoc(IDL); 1865 return; 1866 } 1867 1868 if (CalleeHasDebugInfo && !NoInlineLineTables) 1869 return; 1870 1871 // If the inlined instruction has no line number, or if inline info 1872 // is not being generated, make it look as if it originates from the call 1873 // location. This is important for ((__always_inline, __nodebug__)) 1874 // functions which must use caller location for all instructions in their 1875 // function body. 1876 1877 // Don't update static allocas, as they may get moved later. 1878 if (auto *AI = dyn_cast<AllocaInst>(&I)) 1879 if (allocaWouldBeStaticInEntry(AI)) 1880 return; 1881 1882 // Do not force a debug loc for pseudo probes, since they do not need to 1883 // be debuggable, and also they are expected to have a zero/null dwarf 1884 // discriminator at this point which could be violated otherwise. 1885 if (isa<PseudoProbeInst>(I)) 1886 return; 1887 1888 I.setDebugLoc(TheCallDL); 1889 }; 1890 1891 // Helper-util for updating debug-info records attached to instructions. 1892 auto UpdateDVR = [&](DbgRecord *DVR) { 1893 assert(DVR->getDebugLoc() && "Debug Value must have debug loc"); 1894 if (NoInlineLineTables) { 1895 DVR->setDebugLoc(TheCallDL); 1896 return; 1897 } 1898 DebugLoc DL = DVR->getDebugLoc(); 1899 DebugLoc IDL = 1900 inlineDebugLoc(DL, InlinedAtNode, 1901 DVR->getMarker()->getParent()->getContext(), IANodes); 1902 DVR->setDebugLoc(IDL); 1903 }; 1904 1905 // Iterate over all instructions, updating metadata and debug-info records. 1906 for (; FI != Fn->end(); ++FI) { 1907 for (Instruction &I : *FI) { 1908 UpdateInst(I); 1909 for (DbgRecord &DVR : I.getDbgRecordRange()) { 1910 UpdateDVR(&DVR); 1911 } 1912 } 1913 1914 // Remove debug info intrinsics if we're not keeping inline info. 1915 if (NoInlineLineTables) { 1916 BasicBlock::iterator BI = FI->begin(); 1917 while (BI != FI->end()) { 1918 if (isa<DbgInfoIntrinsic>(BI)) { 1919 BI = BI->eraseFromParent(); 1920 continue; 1921 } else { 1922 BI->dropDbgRecords(); 1923 } 1924 ++BI; 1925 } 1926 } 1927 } 1928 } 1929 1930 #undef DEBUG_TYPE 1931 #define DEBUG_TYPE "assignment-tracking" 1932 /// Find Alloca and linked DbgAssignIntrinsic for locals escaped by \p CB. 1933 static at::StorageToVarsMap collectEscapedLocals(const DataLayout &DL, 1934 const CallBase &CB) { 1935 at::StorageToVarsMap EscapedLocals; 1936 SmallPtrSet<const Value *, 4> SeenBases; 1937 1938 LLVM_DEBUG( 1939 errs() << "# Finding caller local variables escaped by callee\n"); 1940 for (const Value *Arg : CB.args()) { 1941 LLVM_DEBUG(errs() << "INSPECT: " << *Arg << "\n"); 1942 if (!Arg->getType()->isPointerTy()) { 1943 LLVM_DEBUG(errs() << " | SKIP: Not a pointer\n"); 1944 continue; 1945 } 1946 1947 const Instruction *I = dyn_cast<Instruction>(Arg); 1948 if (!I) { 1949 LLVM_DEBUG(errs() << " | SKIP: Not result of instruction\n"); 1950 continue; 1951 } 1952 1953 // Walk back to the base storage. 1954 assert(Arg->getType()->isPtrOrPtrVectorTy()); 1955 APInt TmpOffset(DL.getIndexTypeSizeInBits(Arg->getType()), 0, false); 1956 const AllocaInst *Base = dyn_cast<AllocaInst>( 1957 Arg->stripAndAccumulateConstantOffsets(DL, TmpOffset, true)); 1958 if (!Base) { 1959 LLVM_DEBUG(errs() << " | SKIP: Couldn't walk back to base storage\n"); 1960 continue; 1961 } 1962 1963 assert(Base); 1964 LLVM_DEBUG(errs() << " | BASE: " << *Base << "\n"); 1965 // We only need to process each base address once - skip any duplicates. 1966 if (!SeenBases.insert(Base).second) 1967 continue; 1968 1969 // Find all local variables associated with the backing storage. 1970 auto CollectAssignsForStorage = [&](auto *DbgAssign) { 1971 // Skip variables from inlined functions - they are not local variables. 1972 if (DbgAssign->getDebugLoc().getInlinedAt()) 1973 return; 1974 LLVM_DEBUG(errs() << " > DEF : " << *DbgAssign << "\n"); 1975 EscapedLocals[Base].insert(at::VarRecord(DbgAssign)); 1976 }; 1977 for_each(at::getAssignmentMarkers(Base), CollectAssignsForStorage); 1978 for_each(at::getDVRAssignmentMarkers(Base), CollectAssignsForStorage); 1979 } 1980 return EscapedLocals; 1981 } 1982 1983 static void trackInlinedStores(Function::iterator Start, Function::iterator End, 1984 const CallBase &CB) { 1985 LLVM_DEBUG(errs() << "trackInlinedStores into " 1986 << Start->getParent()->getName() << " from " 1987 << CB.getCalledFunction()->getName() << "\n"); 1988 const DataLayout &DL = CB.getDataLayout(); 1989 at::trackAssignments(Start, End, collectEscapedLocals(DL, CB), DL); 1990 } 1991 1992 /// Update inlined instructions' DIAssignID metadata. We need to do this 1993 /// otherwise a function inlined more than once into the same function 1994 /// will cause DIAssignID to be shared by many instructions. 1995 static void fixupAssignments(Function::iterator Start, Function::iterator End) { 1996 DenseMap<DIAssignID *, DIAssignID *> Map; 1997 // Loop over all the inlined instructions. If we find a DIAssignID 1998 // attachment or use, replace it with a new version. 1999 for (auto BBI = Start; BBI != End; ++BBI) { 2000 for (Instruction &I : *BBI) 2001 at::remapAssignID(Map, I); 2002 } 2003 } 2004 #undef DEBUG_TYPE 2005 #define DEBUG_TYPE "inline-function" 2006 2007 /// Update the block frequencies of the caller after a callee has been inlined. 2008 /// 2009 /// Each block cloned into the caller has its block frequency scaled by the 2010 /// ratio of CallSiteFreq/CalleeEntryFreq. This ensures that the cloned copy of 2011 /// callee's entry block gets the same frequency as the callsite block and the 2012 /// relative frequencies of all cloned blocks remain the same after cloning. 2013 static void updateCallerBFI(BasicBlock *CallSiteBlock, 2014 const ValueToValueMapTy &VMap, 2015 BlockFrequencyInfo *CallerBFI, 2016 BlockFrequencyInfo *CalleeBFI, 2017 const BasicBlock &CalleeEntryBlock) { 2018 SmallPtrSet<BasicBlock *, 16> ClonedBBs; 2019 for (auto Entry : VMap) { 2020 if (!isa<BasicBlock>(Entry.first) || !Entry.second) 2021 continue; 2022 auto *OrigBB = cast<BasicBlock>(Entry.first); 2023 auto *ClonedBB = cast<BasicBlock>(Entry.second); 2024 BlockFrequency Freq = CalleeBFI->getBlockFreq(OrigBB); 2025 if (!ClonedBBs.insert(ClonedBB).second) { 2026 // Multiple blocks in the callee might get mapped to one cloned block in 2027 // the caller since we prune the callee as we clone it. When that happens, 2028 // we want to use the maximum among the original blocks' frequencies. 2029 BlockFrequency NewFreq = CallerBFI->getBlockFreq(ClonedBB); 2030 if (NewFreq > Freq) 2031 Freq = NewFreq; 2032 } 2033 CallerBFI->setBlockFreq(ClonedBB, Freq); 2034 } 2035 BasicBlock *EntryClone = cast<BasicBlock>(VMap.lookup(&CalleeEntryBlock)); 2036 CallerBFI->setBlockFreqAndScale( 2037 EntryClone, CallerBFI->getBlockFreq(CallSiteBlock), ClonedBBs); 2038 } 2039 2040 /// Update the branch metadata for cloned call instructions. 2041 static void updateCallProfile(Function *Callee, const ValueToValueMapTy &VMap, 2042 const ProfileCount &CalleeEntryCount, 2043 const CallBase &TheCall, ProfileSummaryInfo *PSI, 2044 BlockFrequencyInfo *CallerBFI) { 2045 if (CalleeEntryCount.isSynthetic() || CalleeEntryCount.getCount() < 1) 2046 return; 2047 auto CallSiteCount = 2048 PSI ? PSI->getProfileCount(TheCall, CallerBFI) : std::nullopt; 2049 int64_t CallCount = 2050 std::min(CallSiteCount.value_or(0), CalleeEntryCount.getCount()); 2051 updateProfileCallee(Callee, -CallCount, &VMap); 2052 } 2053 2054 void llvm::updateProfileCallee( 2055 Function *Callee, int64_t EntryDelta, 2056 const ValueMap<const Value *, WeakTrackingVH> *VMap) { 2057 auto CalleeCount = Callee->getEntryCount(); 2058 if (!CalleeCount) 2059 return; 2060 2061 const uint64_t PriorEntryCount = CalleeCount->getCount(); 2062 2063 // Since CallSiteCount is an estimate, it could exceed the original callee 2064 // count and has to be set to 0 so guard against underflow. 2065 const uint64_t NewEntryCount = 2066 (EntryDelta < 0 && static_cast<uint64_t>(-EntryDelta) > PriorEntryCount) 2067 ? 0 2068 : PriorEntryCount + EntryDelta; 2069 2070 auto updateVTableProfWeight = [](CallBase *CB, const uint64_t NewEntryCount, 2071 const uint64_t PriorEntryCount) { 2072 Instruction *VPtr = PGOIndirectCallVisitor::tryGetVTableInstruction(CB); 2073 if (VPtr) 2074 scaleProfData(*VPtr, NewEntryCount, PriorEntryCount); 2075 }; 2076 2077 // During inlining ? 2078 if (VMap) { 2079 uint64_t CloneEntryCount = PriorEntryCount - NewEntryCount; 2080 for (auto Entry : *VMap) { 2081 if (isa<CallInst>(Entry.first)) 2082 if (auto *CI = dyn_cast_or_null<CallInst>(Entry.second)) { 2083 CI->updateProfWeight(CloneEntryCount, PriorEntryCount); 2084 updateVTableProfWeight(CI, CloneEntryCount, PriorEntryCount); 2085 } 2086 2087 if (isa<InvokeInst>(Entry.first)) 2088 if (auto *II = dyn_cast_or_null<InvokeInst>(Entry.second)) { 2089 II->updateProfWeight(CloneEntryCount, PriorEntryCount); 2090 updateVTableProfWeight(II, CloneEntryCount, PriorEntryCount); 2091 } 2092 } 2093 } 2094 2095 if (EntryDelta) { 2096 Callee->setEntryCount(NewEntryCount); 2097 2098 for (BasicBlock &BB : *Callee) 2099 // No need to update the callsite if it is pruned during inlining. 2100 if (!VMap || VMap->count(&BB)) 2101 for (Instruction &I : BB) { 2102 if (CallInst *CI = dyn_cast<CallInst>(&I)) { 2103 CI->updateProfWeight(NewEntryCount, PriorEntryCount); 2104 updateVTableProfWeight(CI, NewEntryCount, PriorEntryCount); 2105 } 2106 if (InvokeInst *II = dyn_cast<InvokeInst>(&I)) { 2107 II->updateProfWeight(NewEntryCount, PriorEntryCount); 2108 updateVTableProfWeight(II, NewEntryCount, PriorEntryCount); 2109 } 2110 } 2111 } 2112 } 2113 2114 /// An operand bundle "clang.arc.attachedcall" on a call indicates the call 2115 /// result is implicitly consumed by a call to retainRV or claimRV immediately 2116 /// after the call. This function inlines the retainRV/claimRV calls. 2117 /// 2118 /// There are three cases to consider: 2119 /// 2120 /// 1. If there is a call to autoreleaseRV that takes a pointer to the returned 2121 /// object in the callee return block, the autoreleaseRV call and the 2122 /// retainRV/claimRV call in the caller cancel out. If the call in the caller 2123 /// is a claimRV call, a call to objc_release is emitted. 2124 /// 2125 /// 2. If there is a call in the callee return block that doesn't have operand 2126 /// bundle "clang.arc.attachedcall", the operand bundle on the original call 2127 /// is transferred to the call in the callee. 2128 /// 2129 /// 3. Otherwise, a call to objc_retain is inserted if the call in the caller is 2130 /// a retainRV call. 2131 static void 2132 inlineRetainOrClaimRVCalls(CallBase &CB, objcarc::ARCInstKind RVCallKind, 2133 const SmallVectorImpl<ReturnInst *> &Returns) { 2134 assert(objcarc::isRetainOrClaimRV(RVCallKind) && "unexpected ARC function"); 2135 bool IsRetainRV = RVCallKind == objcarc::ARCInstKind::RetainRV, 2136 IsUnsafeClaimRV = !IsRetainRV; 2137 2138 for (auto *RI : Returns) { 2139 Value *RetOpnd = objcarc::GetRCIdentityRoot(RI->getOperand(0)); 2140 bool InsertRetainCall = IsRetainRV; 2141 IRBuilder<> Builder(RI->getContext()); 2142 2143 // Walk backwards through the basic block looking for either a matching 2144 // autoreleaseRV call or an unannotated call. 2145 auto InstRange = llvm::make_range(++(RI->getIterator().getReverse()), 2146 RI->getParent()->rend()); 2147 for (Instruction &I : llvm::make_early_inc_range(InstRange)) { 2148 // Ignore casts. 2149 if (isa<CastInst>(I)) 2150 continue; 2151 2152 if (auto *II = dyn_cast<IntrinsicInst>(&I)) { 2153 if (II->getIntrinsicID() != Intrinsic::objc_autoreleaseReturnValue || 2154 !II->hasNUses(0) || 2155 objcarc::GetRCIdentityRoot(II->getOperand(0)) != RetOpnd) 2156 break; 2157 2158 // If we've found a matching authoreleaseRV call: 2159 // - If claimRV is attached to the call, insert a call to objc_release 2160 // and erase the autoreleaseRV call. 2161 // - If retainRV is attached to the call, just erase the autoreleaseRV 2162 // call. 2163 if (IsUnsafeClaimRV) { 2164 Builder.SetInsertPoint(II); 2165 Builder.CreateIntrinsic(Intrinsic::objc_release, {}, RetOpnd); 2166 } 2167 II->eraseFromParent(); 2168 InsertRetainCall = false; 2169 break; 2170 } 2171 2172 auto *CI = dyn_cast<CallInst>(&I); 2173 2174 if (!CI) 2175 break; 2176 2177 if (objcarc::GetRCIdentityRoot(CI) != RetOpnd || 2178 objcarc::hasAttachedCallOpBundle(CI)) 2179 break; 2180 2181 // If we've found an unannotated call that defines RetOpnd, add a 2182 // "clang.arc.attachedcall" operand bundle. 2183 Value *BundleArgs[] = {*objcarc::getAttachedARCFunction(&CB)}; 2184 OperandBundleDef OB("clang.arc.attachedcall", BundleArgs); 2185 auto *NewCall = CallBase::addOperandBundle( 2186 CI, LLVMContext::OB_clang_arc_attachedcall, OB, CI->getIterator()); 2187 NewCall->copyMetadata(*CI); 2188 CI->replaceAllUsesWith(NewCall); 2189 CI->eraseFromParent(); 2190 InsertRetainCall = false; 2191 break; 2192 } 2193 2194 if (InsertRetainCall) { 2195 // The retainRV is attached to the call and we've failed to find a 2196 // matching autoreleaseRV or an annotated call in the callee. Emit a call 2197 // to objc_retain. 2198 Builder.SetInsertPoint(RI); 2199 Builder.CreateIntrinsic(Intrinsic::objc_retain, {}, RetOpnd); 2200 } 2201 } 2202 } 2203 2204 // In contextual profiling, when an inline succeeds, we want to remap the 2205 // indices of the callee into the index space of the caller. We can't just leave 2206 // them as-is because the same callee may appear in other places in this caller 2207 // (other callsites), and its (callee's) counters and sub-contextual profile 2208 // tree would be potentially different. 2209 // Not all BBs of the callee may survive the opportunistic DCE InlineFunction 2210 // does (same goes for callsites in the callee). 2211 // We will return a pair of vectors, one for basic block IDs and one for 2212 // callsites. For such a vector V, V[Idx] will be -1 if the callee 2213 // instrumentation with index Idx did not survive inlining, and a new value 2214 // otherwise. 2215 // This function will update the caller's instrumentation intrinsics 2216 // accordingly, mapping indices as described above. We also replace the "name" 2217 // operand because we use it to distinguish between "own" instrumentation and 2218 // "from callee" instrumentation when performing the traversal of the CFG of the 2219 // caller. We traverse depth-first from the callsite's BB and up to the point we 2220 // hit BBs owned by the caller. 2221 // The return values will be then used to update the contextual 2222 // profile. Note: we only update the "name" and "index" operands in the 2223 // instrumentation intrinsics, we leave the hash and total nr of indices as-is, 2224 // it's not worth updating those. 2225 static const std::pair<std::vector<int64_t>, std::vector<int64_t>> 2226 remapIndices(Function &Caller, BasicBlock *StartBB, 2227 PGOContextualProfile &CtxProf, uint32_t CalleeCounters, 2228 uint32_t CalleeCallsites) { 2229 // We'll allocate a new ID to imported callsite counters and callsites. We're 2230 // using -1 to indicate a counter we delete. Most likely the entry ID, for 2231 // example, will be deleted - we don't want 2 IDs in the same BB, and the 2232 // entry would have been cloned in the callsite's old BB. 2233 std::vector<int64_t> CalleeCounterMap; 2234 std::vector<int64_t> CalleeCallsiteMap; 2235 CalleeCounterMap.resize(CalleeCounters, -1); 2236 CalleeCallsiteMap.resize(CalleeCallsites, -1); 2237 2238 auto RewriteInstrIfNeeded = [&](InstrProfIncrementInst &Ins) -> bool { 2239 if (Ins.getNameValue() == &Caller) 2240 return false; 2241 const auto OldID = static_cast<uint32_t>(Ins.getIndex()->getZExtValue()); 2242 if (CalleeCounterMap[OldID] == -1) 2243 CalleeCounterMap[OldID] = CtxProf.allocateNextCounterIndex(Caller); 2244 const auto NewID = static_cast<uint32_t>(CalleeCounterMap[OldID]); 2245 2246 Ins.setNameValue(&Caller); 2247 Ins.setIndex(NewID); 2248 return true; 2249 }; 2250 2251 auto RewriteCallsiteInsIfNeeded = [&](InstrProfCallsite &Ins) -> bool { 2252 if (Ins.getNameValue() == &Caller) 2253 return false; 2254 const auto OldID = static_cast<uint32_t>(Ins.getIndex()->getZExtValue()); 2255 if (CalleeCallsiteMap[OldID] == -1) 2256 CalleeCallsiteMap[OldID] = CtxProf.allocateNextCallsiteIndex(Caller); 2257 const auto NewID = static_cast<uint32_t>(CalleeCallsiteMap[OldID]); 2258 2259 Ins.setNameValue(&Caller); 2260 Ins.setIndex(NewID); 2261 return true; 2262 }; 2263 2264 std::deque<BasicBlock *> Worklist; 2265 DenseSet<const BasicBlock *> Seen; 2266 // We will traverse the BBs starting from the callsite BB. The callsite BB 2267 // will have at least a BB ID - maybe its own, and in any case the one coming 2268 // from the cloned function's entry BB. The other BBs we'll start seeing from 2269 // there on may or may not have BB IDs. BBs with IDs belonging to our caller 2270 // are definitely not coming from the imported function and form a boundary 2271 // past which we don't need to traverse anymore. BBs may have no 2272 // instrumentation (because we originally inserted instrumentation as per 2273 // MST), in which case we'll traverse past them. An invariant we'll keep is 2274 // that a BB will have at most 1 BB ID. For example, in the callsite BB, we 2275 // will delete the callee BB's instrumentation. This doesn't result in 2276 // information loss: the entry BB of the callee will have the same count as 2277 // the callsite's BB. At the end of this traversal, all the callee's 2278 // instrumentation would be mapped into the caller's instrumentation index 2279 // space. Some of the callee's counters may be deleted (as mentioned, this 2280 // should result in no loss of information). 2281 Worklist.push_back(StartBB); 2282 while (!Worklist.empty()) { 2283 auto *BB = Worklist.front(); 2284 Worklist.pop_front(); 2285 bool Changed = false; 2286 auto *BBID = CtxProfAnalysis::getBBInstrumentation(*BB); 2287 if (BBID) { 2288 Changed |= RewriteInstrIfNeeded(*BBID); 2289 // this may be the entryblock from the inlined callee, coming into a BB 2290 // that didn't have instrumentation because of MST decisions. Let's make 2291 // sure it's placed accordingly. This is a noop elsewhere. 2292 BBID->moveBefore(BB->getFirstInsertionPt()); 2293 } 2294 for (auto &I : llvm::make_early_inc_range(*BB)) { 2295 if (auto *Inc = dyn_cast<InstrProfIncrementInst>(&I)) { 2296 if (isa<InstrProfIncrementInstStep>(Inc)) { 2297 // Step instrumentation is used for select instructions. Inlining may 2298 // have propagated a constant resulting in the condition of the select 2299 // being resolved, case in which function cloning resolves the value 2300 // of the select, and elides the select instruction. If that is the 2301 // case, the step parameter of the instrumentation will reflect that. 2302 // We can delete the instrumentation in that case. 2303 if (isa<Constant>(Inc->getStep())) { 2304 assert(!Inc->getNextNode() || !isa<SelectInst>(Inc->getNextNode())); 2305 Inc->eraseFromParent(); 2306 } else { 2307 assert(isa_and_nonnull<SelectInst>(Inc->getNextNode())); 2308 RewriteInstrIfNeeded(*Inc); 2309 } 2310 } else if (Inc != BBID) { 2311 // If we're here it means that the BB had more than 1 IDs, presumably 2312 // some coming from the callee. We "made up our mind" to keep the 2313 // first one (which may or may not have been originally the caller's). 2314 // All the others are superfluous and we delete them. 2315 Inc->eraseFromParent(); 2316 Changed = true; 2317 } 2318 } else if (auto *CS = dyn_cast<InstrProfCallsite>(&I)) { 2319 Changed |= RewriteCallsiteInsIfNeeded(*CS); 2320 } 2321 } 2322 if (!BBID || Changed) 2323 for (auto *Succ : successors(BB)) 2324 if (Seen.insert(Succ).second) 2325 Worklist.push_back(Succ); 2326 } 2327 2328 assert( 2329 llvm::all_of(CalleeCounterMap, [&](const auto &V) { return V != 0; }) && 2330 "Counter index mapping should be either to -1 or to non-zero index, " 2331 "because the 0 " 2332 "index corresponds to the entry BB of the caller"); 2333 assert( 2334 llvm::all_of(CalleeCallsiteMap, [&](const auto &V) { return V != 0; }) && 2335 "Callsite index mapping should be either to -1 or to non-zero index, " 2336 "because there should have been at least a callsite - the inlined one " 2337 "- which would have had a 0 index."); 2338 2339 return {std::move(CalleeCounterMap), std::move(CalleeCallsiteMap)}; 2340 } 2341 2342 // Inline. If successful, update the contextual profile (if a valid one is 2343 // given). 2344 // The contextual profile data is organized in trees, as follows: 2345 // - each node corresponds to a function 2346 // - the root of each tree corresponds to an "entrypoint" - e.g. 2347 // RPC handler for server side 2348 // - the path from the root to a node is a particular call path 2349 // - the counters stored in a node are counter values observed in that 2350 // particular call path ("context") 2351 // - the edges between nodes are annotated with callsite IDs. 2352 // 2353 // Updating the contextual profile after an inlining means, at a high level, 2354 // copying over the data of the callee, **intentionally without any value 2355 // scaling**, and copying over the callees of the inlined callee. 2356 llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, 2357 PGOContextualProfile &CtxProf, 2358 bool MergeAttributes, 2359 AAResults *CalleeAAR, 2360 bool InsertLifetime, 2361 Function *ForwardVarArgsTo) { 2362 if (!CtxProf) 2363 return InlineFunction(CB, IFI, MergeAttributes, CalleeAAR, InsertLifetime, 2364 ForwardVarArgsTo); 2365 2366 auto &Caller = *CB.getCaller(); 2367 auto &Callee = *CB.getCalledFunction(); 2368 auto *StartBB = CB.getParent(); 2369 2370 // Get some preliminary data about the callsite before it might get inlined. 2371 // Inlining shouldn't delete the callee, but it's cleaner (and low-cost) to 2372 // get this data upfront and rely less on InlineFunction's behavior. 2373 const auto CalleeGUID = AssignGUIDPass::getGUID(Callee); 2374 auto *CallsiteIDIns = CtxProfAnalysis::getCallsiteInstrumentation(CB); 2375 const auto CallsiteID = 2376 static_cast<uint32_t>(CallsiteIDIns->getIndex()->getZExtValue()); 2377 2378 const auto NumCalleeCounters = CtxProf.getNumCounters(Callee); 2379 const auto NumCalleeCallsites = CtxProf.getNumCallsites(Callee); 2380 2381 auto Ret = InlineFunction(CB, IFI, MergeAttributes, CalleeAAR, InsertLifetime, 2382 ForwardVarArgsTo); 2383 if (!Ret.isSuccess()) 2384 return Ret; 2385 2386 // Inlining succeeded, we don't need the instrumentation of the inlined 2387 // callsite. 2388 CallsiteIDIns->eraseFromParent(); 2389 2390 // Assinging Maps and then capturing references into it in the lambda because 2391 // captured structured bindings are a C++20 extension. We do also need a 2392 // capture here, though. 2393 const auto IndicesMaps = remapIndices(Caller, StartBB, CtxProf, 2394 NumCalleeCounters, NumCalleeCallsites); 2395 const uint32_t NewCountersSize = CtxProf.getNumCounters(Caller); 2396 2397 auto Updater = [&](PGOCtxProfContext &Ctx) { 2398 assert(Ctx.guid() == AssignGUIDPass::getGUID(Caller)); 2399 const auto &[CalleeCounterMap, CalleeCallsiteMap] = IndicesMaps; 2400 assert( 2401 (Ctx.counters().size() + 2402 llvm::count_if(CalleeCounterMap, [](auto V) { return V != -1; }) == 2403 NewCountersSize) && 2404 "The caller's counters size should have grown by the number of new " 2405 "distinct counters inherited from the inlined callee."); 2406 Ctx.resizeCounters(NewCountersSize); 2407 // If the callsite wasn't exercised in this context, the value of the 2408 // counters coming from it is 0 - which it is right now, after resizing them 2409 // - and so we're done. 2410 auto CSIt = Ctx.callsites().find(CallsiteID); 2411 if (CSIt == Ctx.callsites().end()) 2412 return; 2413 auto CalleeCtxIt = CSIt->second.find(CalleeGUID); 2414 // The callsite was exercised, but not with this callee (so presumably this 2415 // is an indirect callsite). Again, we're done here. 2416 if (CalleeCtxIt == CSIt->second.end()) 2417 return; 2418 2419 // Let's pull in the counter values and the subcontexts coming from the 2420 // inlined callee. 2421 auto &CalleeCtx = CalleeCtxIt->second; 2422 assert(CalleeCtx.guid() == CalleeGUID); 2423 2424 for (auto I = 0U; I < CalleeCtx.counters().size(); ++I) { 2425 const int64_t NewIndex = CalleeCounterMap[I]; 2426 if (NewIndex >= 0) { 2427 assert(NewIndex != 0 && "counter index mapping shouldn't happen to a 0 " 2428 "index, that's the caller's entry BB"); 2429 Ctx.counters()[NewIndex] = CalleeCtx.counters()[I]; 2430 } 2431 } 2432 for (auto &[I, OtherSet] : CalleeCtx.callsites()) { 2433 const int64_t NewCSIdx = CalleeCallsiteMap[I]; 2434 if (NewCSIdx >= 0) { 2435 assert(NewCSIdx != 0 && 2436 "callsite index mapping shouldn't happen to a 0 index, the " 2437 "caller must've had at least one callsite (with such an index)"); 2438 Ctx.ingestAllContexts(NewCSIdx, std::move(OtherSet)); 2439 } 2440 } 2441 // We know the traversal is preorder, so it wouldn't have yet looked at the 2442 // sub-contexts of this context that it's currently visiting. Meaning, the 2443 // erase below invalidates no iterators. 2444 auto Deleted = Ctx.callsites().erase(CallsiteID); 2445 assert(Deleted); 2446 (void)Deleted; 2447 }; 2448 CtxProf.update(Updater, Caller); 2449 return Ret; 2450 } 2451 2452 /// This function inlines the called function into the basic block of the 2453 /// caller. This returns false if it is not possible to inline this call. 2454 /// The program is still in a well defined state if this occurs though. 2455 /// 2456 /// Note that this only does one level of inlining. For example, if the 2457 /// instruction 'call B' is inlined, and 'B' calls 'C', then the call to 'C' now 2458 /// exists in the instruction stream. Similarly this will inline a recursive 2459 /// function by one level. 2460 llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, 2461 bool MergeAttributes, 2462 AAResults *CalleeAAR, 2463 bool InsertLifetime, 2464 Function *ForwardVarArgsTo) { 2465 assert(CB.getParent() && CB.getFunction() && "Instruction not in function!"); 2466 2467 // FIXME: we don't inline callbr yet. 2468 if (isa<CallBrInst>(CB)) 2469 return InlineResult::failure("We don't inline callbr yet."); 2470 2471 // If IFI has any state in it, zap it before we fill it in. 2472 IFI.reset(); 2473 2474 Function *CalledFunc = CB.getCalledFunction(); 2475 if (!CalledFunc || // Can't inline external function or indirect 2476 CalledFunc->isDeclaration()) // call! 2477 return InlineResult::failure("external or indirect"); 2478 2479 // The inliner does not know how to inline through calls with operand bundles 2480 // in general ... 2481 Value *ConvergenceControlToken = nullptr; 2482 if (CB.hasOperandBundles()) { 2483 for (int i = 0, e = CB.getNumOperandBundles(); i != e; ++i) { 2484 auto OBUse = CB.getOperandBundleAt(i); 2485 uint32_t Tag = OBUse.getTagID(); 2486 // ... but it knows how to inline through "deopt" operand bundles ... 2487 if (Tag == LLVMContext::OB_deopt) 2488 continue; 2489 // ... and "funclet" operand bundles. 2490 if (Tag == LLVMContext::OB_funclet) 2491 continue; 2492 if (Tag == LLVMContext::OB_clang_arc_attachedcall) 2493 continue; 2494 if (Tag == LLVMContext::OB_kcfi) 2495 continue; 2496 if (Tag == LLVMContext::OB_convergencectrl) { 2497 ConvergenceControlToken = OBUse.Inputs[0].get(); 2498 continue; 2499 } 2500 2501 return InlineResult::failure("unsupported operand bundle"); 2502 } 2503 } 2504 2505 // FIXME: The check below is redundant and incomplete. According to spec, if a 2506 // convergent call is missing a token, then the caller is using uncontrolled 2507 // convergence. If the callee has an entry intrinsic, then the callee is using 2508 // controlled convergence, and the call cannot be inlined. A proper 2509 // implemenation of this check requires a whole new analysis that identifies 2510 // convergence in every function. For now, we skip that and just do this one 2511 // cursory check. The underlying assumption is that in a compiler flow that 2512 // fully implements convergence control tokens, there is no mixing of 2513 // controlled and uncontrolled convergent operations in the whole program. 2514 if (CB.isConvergent()) { 2515 if (!ConvergenceControlToken && 2516 getConvergenceEntry(CalledFunc->getEntryBlock())) { 2517 return InlineResult::failure( 2518 "convergent call needs convergencectrl operand"); 2519 } 2520 } 2521 2522 // If the call to the callee cannot throw, set the 'nounwind' flag on any 2523 // calls that we inline. 2524 bool MarkNoUnwind = CB.doesNotThrow(); 2525 2526 BasicBlock *OrigBB = CB.getParent(); 2527 Function *Caller = OrigBB->getParent(); 2528 2529 // GC poses two hazards to inlining, which only occur when the callee has GC: 2530 // 1. If the caller has no GC, then the callee's GC must be propagated to the 2531 // caller. 2532 // 2. If the caller has a differing GC, it is invalid to inline. 2533 if (CalledFunc->hasGC()) { 2534 if (!Caller->hasGC()) 2535 Caller->setGC(CalledFunc->getGC()); 2536 else if (CalledFunc->getGC() != Caller->getGC()) 2537 return InlineResult::failure("incompatible GC"); 2538 } 2539 2540 // Get the personality function from the callee if it contains a landing pad. 2541 Constant *CalledPersonality = 2542 CalledFunc->hasPersonalityFn() 2543 ? CalledFunc->getPersonalityFn()->stripPointerCasts() 2544 : nullptr; 2545 2546 // Find the personality function used by the landing pads of the caller. If it 2547 // exists, then check to see that it matches the personality function used in 2548 // the callee. 2549 Constant *CallerPersonality = 2550 Caller->hasPersonalityFn() 2551 ? Caller->getPersonalityFn()->stripPointerCasts() 2552 : nullptr; 2553 if (CalledPersonality) { 2554 if (!CallerPersonality) 2555 Caller->setPersonalityFn(CalledPersonality); 2556 // If the personality functions match, then we can perform the 2557 // inlining. Otherwise, we can't inline. 2558 // TODO: This isn't 100% true. Some personality functions are proper 2559 // supersets of others and can be used in place of the other. 2560 else if (CalledPersonality != CallerPersonality) 2561 return InlineResult::failure("incompatible personality"); 2562 } 2563 2564 // We need to figure out which funclet the callsite was in so that we may 2565 // properly nest the callee. 2566 Instruction *CallSiteEHPad = nullptr; 2567 if (CallerPersonality) { 2568 EHPersonality Personality = classifyEHPersonality(CallerPersonality); 2569 if (isScopedEHPersonality(Personality)) { 2570 std::optional<OperandBundleUse> ParentFunclet = 2571 CB.getOperandBundle(LLVMContext::OB_funclet); 2572 if (ParentFunclet) 2573 CallSiteEHPad = cast<FuncletPadInst>(ParentFunclet->Inputs.front()); 2574 2575 // OK, the inlining site is legal. What about the target function? 2576 2577 if (CallSiteEHPad) { 2578 if (Personality == EHPersonality::MSVC_CXX) { 2579 // The MSVC personality cannot tolerate catches getting inlined into 2580 // cleanup funclets. 2581 if (isa<CleanupPadInst>(CallSiteEHPad)) { 2582 // Ok, the call site is within a cleanuppad. Let's check the callee 2583 // for catchpads. 2584 for (const BasicBlock &CalledBB : *CalledFunc) { 2585 if (isa<CatchSwitchInst>(CalledBB.getFirstNonPHIIt())) 2586 return InlineResult::failure("catch in cleanup funclet"); 2587 } 2588 } 2589 } else if (isAsynchronousEHPersonality(Personality)) { 2590 // SEH is even less tolerant, there may not be any sort of exceptional 2591 // funclet in the callee. 2592 for (const BasicBlock &CalledBB : *CalledFunc) { 2593 if (CalledBB.isEHPad()) 2594 return InlineResult::failure("SEH in cleanup funclet"); 2595 } 2596 } 2597 } 2598 } 2599 } 2600 2601 // Determine if we are dealing with a call in an EHPad which does not unwind 2602 // to caller. 2603 bool EHPadForCallUnwindsLocally = false; 2604 if (CallSiteEHPad && isa<CallInst>(CB)) { 2605 UnwindDestMemoTy FuncletUnwindMap; 2606 Value *CallSiteUnwindDestToken = 2607 getUnwindDestToken(CallSiteEHPad, FuncletUnwindMap); 2608 2609 EHPadForCallUnwindsLocally = 2610 CallSiteUnwindDestToken && 2611 !isa<ConstantTokenNone>(CallSiteUnwindDestToken); 2612 } 2613 2614 // Get an iterator to the last basic block in the function, which will have 2615 // the new function inlined after it. 2616 Function::iterator LastBlock = --Caller->end(); 2617 2618 // Make sure to capture all of the return instructions from the cloned 2619 // function. 2620 SmallVector<ReturnInst*, 8> Returns; 2621 ClonedCodeInfo InlinedFunctionInfo; 2622 Function::iterator FirstNewBlock; 2623 2624 { // Scope to destroy VMap after cloning. 2625 ValueToValueMapTy VMap; 2626 struct ByValInit { 2627 Value *Dst; 2628 Value *Src; 2629 Type *Ty; 2630 }; 2631 // Keep a list of pair (dst, src) to emit byval initializations. 2632 SmallVector<ByValInit, 4> ByValInits; 2633 2634 // When inlining a function that contains noalias scope metadata, 2635 // this metadata needs to be cloned so that the inlined blocks 2636 // have different "unique scopes" at every call site. 2637 // Track the metadata that must be cloned. Do this before other changes to 2638 // the function, so that we do not get in trouble when inlining caller == 2639 // callee. 2640 ScopedAliasMetadataDeepCloner SAMetadataCloner(CB.getCalledFunction()); 2641 2642 auto &DL = Caller->getDataLayout(); 2643 2644 // Calculate the vector of arguments to pass into the function cloner, which 2645 // matches up the formal to the actual argument values. 2646 auto AI = CB.arg_begin(); 2647 unsigned ArgNo = 0; 2648 for (Function::arg_iterator I = CalledFunc->arg_begin(), 2649 E = CalledFunc->arg_end(); I != E; ++I, ++AI, ++ArgNo) { 2650 Value *ActualArg = *AI; 2651 2652 // When byval arguments actually inlined, we need to make the copy implied 2653 // by them explicit. However, we don't do this if the callee is readonly 2654 // or readnone, because the copy would be unneeded: the callee doesn't 2655 // modify the struct. 2656 if (CB.isByValArgument(ArgNo)) { 2657 ActualArg = HandleByValArgument(CB.getParamByValType(ArgNo), ActualArg, 2658 &CB, CalledFunc, IFI, 2659 CalledFunc->getParamAlign(ArgNo)); 2660 if (ActualArg != *AI) 2661 ByValInits.push_back( 2662 {ActualArg, (Value *)*AI, CB.getParamByValType(ArgNo)}); 2663 } 2664 2665 VMap[&*I] = ActualArg; 2666 } 2667 2668 // TODO: Remove this when users have been updated to the assume bundles. 2669 // Add alignment assumptions if necessary. We do this before the inlined 2670 // instructions are actually cloned into the caller so that we can easily 2671 // check what will be known at the start of the inlined code. 2672 AddAlignmentAssumptions(CB, IFI); 2673 2674 AssumptionCache *AC = 2675 IFI.GetAssumptionCache ? &IFI.GetAssumptionCache(*Caller) : nullptr; 2676 2677 /// Preserve all attributes on of the call and its parameters. 2678 salvageKnowledge(&CB, AC); 2679 2680 // We want the inliner to prune the code as it copies. We would LOVE to 2681 // have no dead or constant instructions leftover after inlining occurs 2682 // (which can happen, e.g., because an argument was constant), but we'll be 2683 // happy with whatever the cloner can do. 2684 CloneAndPruneFunctionInto(Caller, CalledFunc, VMap, 2685 /*ModuleLevelChanges=*/false, Returns, ".i", 2686 &InlinedFunctionInfo); 2687 // Remember the first block that is newly cloned over. 2688 FirstNewBlock = LastBlock; ++FirstNewBlock; 2689 2690 // Insert retainRV/clainRV runtime calls. 2691 objcarc::ARCInstKind RVCallKind = objcarc::getAttachedARCFunctionKind(&CB); 2692 if (RVCallKind != objcarc::ARCInstKind::None) 2693 inlineRetainOrClaimRVCalls(CB, RVCallKind, Returns); 2694 2695 // Updated caller/callee profiles only when requested. For sample loader 2696 // inlining, the context-sensitive inlinee profile doesn't need to be 2697 // subtracted from callee profile, and the inlined clone also doesn't need 2698 // to be scaled based on call site count. 2699 if (IFI.UpdateProfile) { 2700 if (IFI.CallerBFI != nullptr && IFI.CalleeBFI != nullptr) 2701 // Update the BFI of blocks cloned into the caller. 2702 updateCallerBFI(OrigBB, VMap, IFI.CallerBFI, IFI.CalleeBFI, 2703 CalledFunc->front()); 2704 2705 if (auto Profile = CalledFunc->getEntryCount()) 2706 updateCallProfile(CalledFunc, VMap, *Profile, CB, IFI.PSI, 2707 IFI.CallerBFI); 2708 } 2709 2710 // Inject byval arguments initialization. 2711 for (ByValInit &Init : ByValInits) 2712 HandleByValArgumentInit(Init.Ty, Init.Dst, Init.Src, Caller->getParent(), 2713 &*FirstNewBlock, IFI, CalledFunc); 2714 2715 std::optional<OperandBundleUse> ParentDeopt = 2716 CB.getOperandBundle(LLVMContext::OB_deopt); 2717 if (ParentDeopt) { 2718 SmallVector<OperandBundleDef, 2> OpDefs; 2719 2720 for (auto &VH : InlinedFunctionInfo.OperandBundleCallSites) { 2721 CallBase *ICS = dyn_cast_or_null<CallBase>(VH); 2722 if (!ICS) 2723 continue; // instruction was DCE'd or RAUW'ed to undef 2724 2725 OpDefs.clear(); 2726 2727 OpDefs.reserve(ICS->getNumOperandBundles()); 2728 2729 for (unsigned COBi = 0, COBe = ICS->getNumOperandBundles(); COBi < COBe; 2730 ++COBi) { 2731 auto ChildOB = ICS->getOperandBundleAt(COBi); 2732 if (ChildOB.getTagID() != LLVMContext::OB_deopt) { 2733 // If the inlined call has other operand bundles, let them be 2734 OpDefs.emplace_back(ChildOB); 2735 continue; 2736 } 2737 2738 // It may be useful to separate this logic (of handling operand 2739 // bundles) out to a separate "policy" component if this gets crowded. 2740 // Prepend the parent's deoptimization continuation to the newly 2741 // inlined call's deoptimization continuation. 2742 std::vector<Value *> MergedDeoptArgs; 2743 MergedDeoptArgs.reserve(ParentDeopt->Inputs.size() + 2744 ChildOB.Inputs.size()); 2745 2746 llvm::append_range(MergedDeoptArgs, ParentDeopt->Inputs); 2747 llvm::append_range(MergedDeoptArgs, ChildOB.Inputs); 2748 2749 OpDefs.emplace_back("deopt", std::move(MergedDeoptArgs)); 2750 } 2751 2752 Instruction *NewI = CallBase::Create(ICS, OpDefs, ICS->getIterator()); 2753 2754 // Note: the RAUW does the appropriate fixup in VMap, so we need to do 2755 // this even if the call returns void. 2756 ICS->replaceAllUsesWith(NewI); 2757 2758 VH = nullptr; 2759 ICS->eraseFromParent(); 2760 } 2761 } 2762 2763 // For 'nodebug' functions, the associated DISubprogram is always null. 2764 // Conservatively avoid propagating the callsite debug location to 2765 // instructions inlined from a function whose DISubprogram is not null. 2766 fixupLineNumbers(Caller, FirstNewBlock, &CB, 2767 CalledFunc->getSubprogram() != nullptr); 2768 2769 if (isAssignmentTrackingEnabled(*Caller->getParent())) { 2770 // Interpret inlined stores to caller-local variables as assignments. 2771 trackInlinedStores(FirstNewBlock, Caller->end(), CB); 2772 2773 // Update DIAssignID metadata attachments and uses so that they are 2774 // unique to this inlined instance. 2775 fixupAssignments(FirstNewBlock, Caller->end()); 2776 } 2777 2778 // Now clone the inlined noalias scope metadata. 2779 SAMetadataCloner.clone(); 2780 SAMetadataCloner.remap(FirstNewBlock, Caller->end()); 2781 2782 // Add noalias metadata if necessary. 2783 AddAliasScopeMetadata(CB, VMap, DL, CalleeAAR, InlinedFunctionInfo); 2784 2785 // Clone return attributes on the callsite into the calls within the inlined 2786 // function which feed into its return value. 2787 AddReturnAttributes(CB, VMap, InlinedFunctionInfo); 2788 2789 // Clone attributes on the params of the callsite to calls within the 2790 // inlined function which use the same param. 2791 AddParamAndFnBasicAttributes(CB, VMap, InlinedFunctionInfo); 2792 2793 propagateMemProfMetadata(CalledFunc, CB, 2794 InlinedFunctionInfo.ContainsMemProfMetadata, VMap); 2795 2796 // Propagate metadata on the callsite if necessary. 2797 PropagateCallSiteMetadata(CB, FirstNewBlock, Caller->end()); 2798 2799 // Register any cloned assumptions. 2800 if (IFI.GetAssumptionCache) 2801 for (BasicBlock &NewBlock : 2802 make_range(FirstNewBlock->getIterator(), Caller->end())) 2803 for (Instruction &I : NewBlock) 2804 if (auto *II = dyn_cast<AssumeInst>(&I)) 2805 IFI.GetAssumptionCache(*Caller).registerAssumption(II); 2806 } 2807 2808 if (ConvergenceControlToken) { 2809 IntrinsicInst *IntrinsicCall = getConvergenceEntry(*FirstNewBlock); 2810 if (IntrinsicCall) { 2811 IntrinsicCall->replaceAllUsesWith(ConvergenceControlToken); 2812 IntrinsicCall->eraseFromParent(); 2813 } 2814 } 2815 2816 // If there are any alloca instructions in the block that used to be the entry 2817 // block for the callee, move them to the entry block of the caller. First 2818 // calculate which instruction they should be inserted before. We insert the 2819 // instructions at the end of the current alloca list. 2820 { 2821 BasicBlock::iterator InsertPoint = Caller->begin()->begin(); 2822 for (BasicBlock::iterator I = FirstNewBlock->begin(), 2823 E = FirstNewBlock->end(); I != E; ) { 2824 AllocaInst *AI = dyn_cast<AllocaInst>(I++); 2825 if (!AI) continue; 2826 2827 // If the alloca is now dead, remove it. This often occurs due to code 2828 // specialization. 2829 if (AI->use_empty()) { 2830 AI->eraseFromParent(); 2831 continue; 2832 } 2833 2834 if (!allocaWouldBeStaticInEntry(AI)) 2835 continue; 2836 2837 // Keep track of the static allocas that we inline into the caller. 2838 IFI.StaticAllocas.push_back(AI); 2839 2840 // Scan for the block of allocas that we can move over, and move them 2841 // all at once. 2842 while (isa<AllocaInst>(I) && 2843 !cast<AllocaInst>(I)->use_empty() && 2844 allocaWouldBeStaticInEntry(cast<AllocaInst>(I))) { 2845 IFI.StaticAllocas.push_back(cast<AllocaInst>(I)); 2846 ++I; 2847 } 2848 2849 // Transfer all of the allocas over in a block. Using splice means 2850 // that the instructions aren't removed from the symbol table, then 2851 // reinserted. 2852 I.setTailBit(true); 2853 Caller->getEntryBlock().splice(InsertPoint, &*FirstNewBlock, 2854 AI->getIterator(), I); 2855 } 2856 } 2857 2858 SmallVector<Value*,4> VarArgsToForward; 2859 SmallVector<AttributeSet, 4> VarArgsAttrs; 2860 for (unsigned i = CalledFunc->getFunctionType()->getNumParams(); 2861 i < CB.arg_size(); i++) { 2862 VarArgsToForward.push_back(CB.getArgOperand(i)); 2863 VarArgsAttrs.push_back(CB.getAttributes().getParamAttrs(i)); 2864 } 2865 2866 bool InlinedMustTailCalls = false, InlinedDeoptimizeCalls = false; 2867 if (InlinedFunctionInfo.ContainsCalls) { 2868 CallInst::TailCallKind CallSiteTailKind = CallInst::TCK_None; 2869 if (CallInst *CI = dyn_cast<CallInst>(&CB)) 2870 CallSiteTailKind = CI->getTailCallKind(); 2871 2872 // For inlining purposes, the "notail" marker is the same as no marker. 2873 if (CallSiteTailKind == CallInst::TCK_NoTail) 2874 CallSiteTailKind = CallInst::TCK_None; 2875 2876 for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E; 2877 ++BB) { 2878 for (Instruction &I : llvm::make_early_inc_range(*BB)) { 2879 CallInst *CI = dyn_cast<CallInst>(&I); 2880 if (!CI) 2881 continue; 2882 2883 // Forward varargs from inlined call site to calls to the 2884 // ForwardVarArgsTo function, if requested, and to musttail calls. 2885 if (!VarArgsToForward.empty() && 2886 ((ForwardVarArgsTo && 2887 CI->getCalledFunction() == ForwardVarArgsTo) || 2888 CI->isMustTailCall())) { 2889 // Collect attributes for non-vararg parameters. 2890 AttributeList Attrs = CI->getAttributes(); 2891 SmallVector<AttributeSet, 8> ArgAttrs; 2892 if (!Attrs.isEmpty() || !VarArgsAttrs.empty()) { 2893 for (unsigned ArgNo = 0; 2894 ArgNo < CI->getFunctionType()->getNumParams(); ++ArgNo) 2895 ArgAttrs.push_back(Attrs.getParamAttrs(ArgNo)); 2896 } 2897 2898 // Add VarArg attributes. 2899 ArgAttrs.append(VarArgsAttrs.begin(), VarArgsAttrs.end()); 2900 Attrs = AttributeList::get(CI->getContext(), Attrs.getFnAttrs(), 2901 Attrs.getRetAttrs(), ArgAttrs); 2902 // Add VarArgs to existing parameters. 2903 SmallVector<Value *, 6> Params(CI->args()); 2904 Params.append(VarArgsToForward.begin(), VarArgsToForward.end()); 2905 CallInst *NewCI = CallInst::Create( 2906 CI->getFunctionType(), CI->getCalledOperand(), Params, "", CI->getIterator()); 2907 NewCI->setDebugLoc(CI->getDebugLoc()); 2908 NewCI->setAttributes(Attrs); 2909 NewCI->setCallingConv(CI->getCallingConv()); 2910 CI->replaceAllUsesWith(NewCI); 2911 CI->eraseFromParent(); 2912 CI = NewCI; 2913 } 2914 2915 if (Function *F = CI->getCalledFunction()) 2916 InlinedDeoptimizeCalls |= 2917 F->getIntrinsicID() == Intrinsic::experimental_deoptimize; 2918 2919 // We need to reduce the strength of any inlined tail calls. For 2920 // musttail, we have to avoid introducing potential unbounded stack 2921 // growth. For example, if functions 'f' and 'g' are mutually recursive 2922 // with musttail, we can inline 'g' into 'f' so long as we preserve 2923 // musttail on the cloned call to 'f'. If either the inlined call site 2924 // or the cloned call site is *not* musttail, the program already has 2925 // one frame of stack growth, so it's safe to remove musttail. Here is 2926 // a table of example transformations: 2927 // 2928 // f -> musttail g -> musttail f ==> f -> musttail f 2929 // f -> musttail g -> tail f ==> f -> tail f 2930 // f -> g -> musttail f ==> f -> f 2931 // f -> g -> tail f ==> f -> f 2932 // 2933 // Inlined notail calls should remain notail calls. 2934 CallInst::TailCallKind ChildTCK = CI->getTailCallKind(); 2935 if (ChildTCK != CallInst::TCK_NoTail) 2936 ChildTCK = std::min(CallSiteTailKind, ChildTCK); 2937 CI->setTailCallKind(ChildTCK); 2938 InlinedMustTailCalls |= CI->isMustTailCall(); 2939 2940 // Call sites inlined through a 'nounwind' call site should be 2941 // 'nounwind' as well. However, avoid marking call sites explicitly 2942 // where possible. This helps expose more opportunities for CSE after 2943 // inlining, commonly when the callee is an intrinsic. 2944 if (MarkNoUnwind && !CI->doesNotThrow()) 2945 CI->setDoesNotThrow(); 2946 } 2947 } 2948 } 2949 2950 // Leave lifetime markers for the static alloca's, scoping them to the 2951 // function we just inlined. 2952 // We need to insert lifetime intrinsics even at O0 to avoid invalid 2953 // access caused by multithreaded coroutines. The check 2954 // `Caller->isPresplitCoroutine()` would affect AlwaysInliner at O0 only. 2955 if ((InsertLifetime || Caller->isPresplitCoroutine()) && 2956 !IFI.StaticAllocas.empty()) { 2957 IRBuilder<> builder(&*FirstNewBlock, FirstNewBlock->begin()); 2958 for (AllocaInst *AI : IFI.StaticAllocas) { 2959 // Don't mark swifterror allocas. They can't have bitcast uses. 2960 if (AI->isSwiftError()) 2961 continue; 2962 2963 // If the alloca is already scoped to something smaller than the whole 2964 // function then there's no need to add redundant, less accurate markers. 2965 if (hasLifetimeMarkers(AI)) 2966 continue; 2967 2968 // Try to determine the size of the allocation. 2969 ConstantInt *AllocaSize = nullptr; 2970 if (ConstantInt *AIArraySize = 2971 dyn_cast<ConstantInt>(AI->getArraySize())) { 2972 auto &DL = Caller->getDataLayout(); 2973 Type *AllocaType = AI->getAllocatedType(); 2974 TypeSize AllocaTypeSize = DL.getTypeAllocSize(AllocaType); 2975 uint64_t AllocaArraySize = AIArraySize->getLimitedValue(); 2976 2977 // Don't add markers for zero-sized allocas. 2978 if (AllocaArraySize == 0) 2979 continue; 2980 2981 // Check that array size doesn't saturate uint64_t and doesn't 2982 // overflow when it's multiplied by type size. 2983 if (!AllocaTypeSize.isScalable() && 2984 AllocaArraySize != std::numeric_limits<uint64_t>::max() && 2985 std::numeric_limits<uint64_t>::max() / AllocaArraySize >= 2986 AllocaTypeSize.getFixedValue()) { 2987 AllocaSize = ConstantInt::get(Type::getInt64Ty(AI->getContext()), 2988 AllocaArraySize * AllocaTypeSize); 2989 } 2990 } 2991 2992 builder.CreateLifetimeStart(AI, AllocaSize); 2993 for (ReturnInst *RI : Returns) { 2994 // Don't insert llvm.lifetime.end calls between a musttail or deoptimize 2995 // call and a return. The return kills all local allocas. 2996 if (InlinedMustTailCalls && 2997 RI->getParent()->getTerminatingMustTailCall()) 2998 continue; 2999 if (InlinedDeoptimizeCalls && 3000 RI->getParent()->getTerminatingDeoptimizeCall()) 3001 continue; 3002 IRBuilder<>(RI).CreateLifetimeEnd(AI, AllocaSize); 3003 } 3004 } 3005 } 3006 3007 // If the inlined code contained dynamic alloca instructions, wrap the inlined 3008 // code with llvm.stacksave/llvm.stackrestore intrinsics. 3009 if (InlinedFunctionInfo.ContainsDynamicAllocas) { 3010 // Insert the llvm.stacksave. 3011 CallInst *SavedPtr = IRBuilder<>(&*FirstNewBlock, FirstNewBlock->begin()) 3012 .CreateStackSave("savedstack"); 3013 3014 // Insert a call to llvm.stackrestore before any return instructions in the 3015 // inlined function. 3016 for (ReturnInst *RI : Returns) { 3017 // Don't insert llvm.stackrestore calls between a musttail or deoptimize 3018 // call and a return. The return will restore the stack pointer. 3019 if (InlinedMustTailCalls && RI->getParent()->getTerminatingMustTailCall()) 3020 continue; 3021 if (InlinedDeoptimizeCalls && RI->getParent()->getTerminatingDeoptimizeCall()) 3022 continue; 3023 IRBuilder<>(RI).CreateStackRestore(SavedPtr); 3024 } 3025 } 3026 3027 // If we are inlining for an invoke instruction, we must make sure to rewrite 3028 // any call instructions into invoke instructions. This is sensitive to which 3029 // funclet pads were top-level in the inlinee, so must be done before 3030 // rewriting the "parent pad" links. 3031 if (auto *II = dyn_cast<InvokeInst>(&CB)) { 3032 BasicBlock *UnwindDest = II->getUnwindDest(); 3033 BasicBlock::iterator FirstNonPHI = UnwindDest->getFirstNonPHIIt(); 3034 if (isa<LandingPadInst>(FirstNonPHI)) { 3035 HandleInlinedLandingPad(II, &*FirstNewBlock, InlinedFunctionInfo); 3036 } else { 3037 HandleInlinedEHPad(II, &*FirstNewBlock, InlinedFunctionInfo); 3038 } 3039 } 3040 3041 // Update the lexical scopes of the new funclets and callsites. 3042 // Anything that had 'none' as its parent is now nested inside the callsite's 3043 // EHPad. 3044 if (CallSiteEHPad) { 3045 for (Function::iterator BB = FirstNewBlock->getIterator(), 3046 E = Caller->end(); 3047 BB != E; ++BB) { 3048 // Add bundle operands to inlined call sites. 3049 PropagateOperandBundles(BB, CallSiteEHPad); 3050 3051 // It is problematic if the inlinee has a cleanupret which unwinds to 3052 // caller and we inline it into a call site which doesn't unwind but into 3053 // an EH pad that does. Such an edge must be dynamically unreachable. 3054 // As such, we replace the cleanupret with unreachable. 3055 if (auto *CleanupRet = dyn_cast<CleanupReturnInst>(BB->getTerminator())) 3056 if (CleanupRet->unwindsToCaller() && EHPadForCallUnwindsLocally) 3057 changeToUnreachable(CleanupRet); 3058 3059 BasicBlock::iterator I = BB->getFirstNonPHIIt(); 3060 if (!I->isEHPad()) 3061 continue; 3062 3063 if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(I)) { 3064 if (isa<ConstantTokenNone>(CatchSwitch->getParentPad())) 3065 CatchSwitch->setParentPad(CallSiteEHPad); 3066 } else { 3067 auto *FPI = cast<FuncletPadInst>(I); 3068 if (isa<ConstantTokenNone>(FPI->getParentPad())) 3069 FPI->setParentPad(CallSiteEHPad); 3070 } 3071 } 3072 } 3073 3074 if (InlinedDeoptimizeCalls) { 3075 // We need to at least remove the deoptimizing returns from the Return set, 3076 // so that the control flow from those returns does not get merged into the 3077 // caller (but terminate it instead). If the caller's return type does not 3078 // match the callee's return type, we also need to change the return type of 3079 // the intrinsic. 3080 if (Caller->getReturnType() == CB.getType()) { 3081 llvm::erase_if(Returns, [](ReturnInst *RI) { 3082 return RI->getParent()->getTerminatingDeoptimizeCall() != nullptr; 3083 }); 3084 } else { 3085 SmallVector<ReturnInst *, 8> NormalReturns; 3086 Function *NewDeoptIntrinsic = Intrinsic::getOrInsertDeclaration( 3087 Caller->getParent(), Intrinsic::experimental_deoptimize, 3088 {Caller->getReturnType()}); 3089 3090 for (ReturnInst *RI : Returns) { 3091 CallInst *DeoptCall = RI->getParent()->getTerminatingDeoptimizeCall(); 3092 if (!DeoptCall) { 3093 NormalReturns.push_back(RI); 3094 continue; 3095 } 3096 3097 // The calling convention on the deoptimize call itself may be bogus, 3098 // since the code we're inlining may have undefined behavior (and may 3099 // never actually execute at runtime); but all 3100 // @llvm.experimental.deoptimize declarations have to have the same 3101 // calling convention in a well-formed module. 3102 auto CallingConv = DeoptCall->getCalledFunction()->getCallingConv(); 3103 NewDeoptIntrinsic->setCallingConv(CallingConv); 3104 auto *CurBB = RI->getParent(); 3105 RI->eraseFromParent(); 3106 3107 SmallVector<Value *, 4> CallArgs(DeoptCall->args()); 3108 3109 SmallVector<OperandBundleDef, 1> OpBundles; 3110 DeoptCall->getOperandBundlesAsDefs(OpBundles); 3111 auto DeoptAttributes = DeoptCall->getAttributes(); 3112 DeoptCall->eraseFromParent(); 3113 assert(!OpBundles.empty() && 3114 "Expected at least the deopt operand bundle"); 3115 3116 IRBuilder<> Builder(CurBB); 3117 CallInst *NewDeoptCall = 3118 Builder.CreateCall(NewDeoptIntrinsic, CallArgs, OpBundles); 3119 NewDeoptCall->setCallingConv(CallingConv); 3120 NewDeoptCall->setAttributes(DeoptAttributes); 3121 if (NewDeoptCall->getType()->isVoidTy()) 3122 Builder.CreateRetVoid(); 3123 else 3124 Builder.CreateRet(NewDeoptCall); 3125 // Since the ret type is changed, remove the incompatible attributes. 3126 NewDeoptCall->removeRetAttrs(AttributeFuncs::typeIncompatible( 3127 NewDeoptCall->getType(), NewDeoptCall->getRetAttributes())); 3128 } 3129 3130 // Leave behind the normal returns so we can merge control flow. 3131 std::swap(Returns, NormalReturns); 3132 } 3133 } 3134 3135 // Handle any inlined musttail call sites. In order for a new call site to be 3136 // musttail, the source of the clone and the inlined call site must have been 3137 // musttail. Therefore it's safe to return without merging control into the 3138 // phi below. 3139 if (InlinedMustTailCalls) { 3140 // Check if we need to bitcast the result of any musttail calls. 3141 Type *NewRetTy = Caller->getReturnType(); 3142 bool NeedBitCast = !CB.use_empty() && CB.getType() != NewRetTy; 3143 3144 // Handle the returns preceded by musttail calls separately. 3145 SmallVector<ReturnInst *, 8> NormalReturns; 3146 for (ReturnInst *RI : Returns) { 3147 CallInst *ReturnedMustTail = 3148 RI->getParent()->getTerminatingMustTailCall(); 3149 if (!ReturnedMustTail) { 3150 NormalReturns.push_back(RI); 3151 continue; 3152 } 3153 if (!NeedBitCast) 3154 continue; 3155 3156 // Delete the old return and any preceding bitcast. 3157 BasicBlock *CurBB = RI->getParent(); 3158 auto *OldCast = dyn_cast_or_null<BitCastInst>(RI->getReturnValue()); 3159 RI->eraseFromParent(); 3160 if (OldCast) 3161 OldCast->eraseFromParent(); 3162 3163 // Insert a new bitcast and return with the right type. 3164 IRBuilder<> Builder(CurBB); 3165 Builder.CreateRet(Builder.CreateBitCast(ReturnedMustTail, NewRetTy)); 3166 } 3167 3168 // Leave behind the normal returns so we can merge control flow. 3169 std::swap(Returns, NormalReturns); 3170 } 3171 3172 // Now that all of the transforms on the inlined code have taken place but 3173 // before we splice the inlined code into the CFG and lose track of which 3174 // blocks were actually inlined, collect the call sites. We only do this if 3175 // call graph updates weren't requested, as those provide value handle based 3176 // tracking of inlined call sites instead. Calls to intrinsics are not 3177 // collected because they are not inlineable. 3178 if (InlinedFunctionInfo.ContainsCalls) { 3179 // Otherwise just collect the raw call sites that were inlined. 3180 for (BasicBlock &NewBB : 3181 make_range(FirstNewBlock->getIterator(), Caller->end())) 3182 for (Instruction &I : NewBB) 3183 if (auto *CB = dyn_cast<CallBase>(&I)) 3184 if (!(CB->getCalledFunction() && 3185 CB->getCalledFunction()->isIntrinsic())) 3186 IFI.InlinedCallSites.push_back(CB); 3187 } 3188 3189 // If we cloned in _exactly one_ basic block, and if that block ends in a 3190 // return instruction, we splice the body of the inlined callee directly into 3191 // the calling basic block. 3192 if (Returns.size() == 1 && std::distance(FirstNewBlock, Caller->end()) == 1) { 3193 // Move all of the instructions right before the call. 3194 OrigBB->splice(CB.getIterator(), &*FirstNewBlock, FirstNewBlock->begin(), 3195 FirstNewBlock->end()); 3196 // Remove the cloned basic block. 3197 Caller->back().eraseFromParent(); 3198 3199 // If the call site was an invoke instruction, add a branch to the normal 3200 // destination. 3201 if (InvokeInst *II = dyn_cast<InvokeInst>(&CB)) { 3202 BranchInst *NewBr = BranchInst::Create(II->getNormalDest(), CB.getIterator()); 3203 NewBr->setDebugLoc(Returns[0]->getDebugLoc()); 3204 } 3205 3206 // If the return instruction returned a value, replace uses of the call with 3207 // uses of the returned value. 3208 if (!CB.use_empty()) { 3209 ReturnInst *R = Returns[0]; 3210 if (&CB == R->getReturnValue()) 3211 CB.replaceAllUsesWith(PoisonValue::get(CB.getType())); 3212 else 3213 CB.replaceAllUsesWith(R->getReturnValue()); 3214 } 3215 // Since we are now done with the Call/Invoke, we can delete it. 3216 CB.eraseFromParent(); 3217 3218 // Since we are now done with the return instruction, delete it also. 3219 Returns[0]->eraseFromParent(); 3220 3221 if (MergeAttributes) 3222 AttributeFuncs::mergeAttributesForInlining(*Caller, *CalledFunc); 3223 3224 // We are now done with the inlining. 3225 return InlineResult::success(); 3226 } 3227 3228 // Otherwise, we have the normal case, of more than one block to inline or 3229 // multiple return sites. 3230 3231 // We want to clone the entire callee function into the hole between the 3232 // "starter" and "ender" blocks. How we accomplish this depends on whether 3233 // this is an invoke instruction or a call instruction. 3234 BasicBlock *AfterCallBB; 3235 BranchInst *CreatedBranchToNormalDest = nullptr; 3236 if (InvokeInst *II = dyn_cast<InvokeInst>(&CB)) { 3237 3238 // Add an unconditional branch to make this look like the CallInst case... 3239 CreatedBranchToNormalDest = BranchInst::Create(II->getNormalDest(), CB.getIterator()); 3240 3241 // Split the basic block. This guarantees that no PHI nodes will have to be 3242 // updated due to new incoming edges, and make the invoke case more 3243 // symmetric to the call case. 3244 AfterCallBB = 3245 OrigBB->splitBasicBlock(CreatedBranchToNormalDest->getIterator(), 3246 CalledFunc->getName() + ".exit"); 3247 3248 } else { // It's a call 3249 // If this is a call instruction, we need to split the basic block that 3250 // the call lives in. 3251 // 3252 AfterCallBB = OrigBB->splitBasicBlock(CB.getIterator(), 3253 CalledFunc->getName() + ".exit"); 3254 } 3255 3256 if (IFI.CallerBFI) { 3257 // Copy original BB's block frequency to AfterCallBB 3258 IFI.CallerBFI->setBlockFreq(AfterCallBB, 3259 IFI.CallerBFI->getBlockFreq(OrigBB)); 3260 } 3261 3262 // Change the branch that used to go to AfterCallBB to branch to the first 3263 // basic block of the inlined function. 3264 // 3265 Instruction *Br = OrigBB->getTerminator(); 3266 assert(Br && Br->getOpcode() == Instruction::Br && 3267 "splitBasicBlock broken!"); 3268 Br->setOperand(0, &*FirstNewBlock); 3269 3270 // Now that the function is correct, make it a little bit nicer. In 3271 // particular, move the basic blocks inserted from the end of the function 3272 // into the space made by splitting the source basic block. 3273 Caller->splice(AfterCallBB->getIterator(), Caller, FirstNewBlock, 3274 Caller->end()); 3275 3276 // Handle all of the return instructions that we just cloned in, and eliminate 3277 // any users of the original call/invoke instruction. 3278 Type *RTy = CalledFunc->getReturnType(); 3279 3280 PHINode *PHI = nullptr; 3281 if (Returns.size() > 1) { 3282 // The PHI node should go at the front of the new basic block to merge all 3283 // possible incoming values. 3284 if (!CB.use_empty()) { 3285 PHI = PHINode::Create(RTy, Returns.size(), CB.getName()); 3286 PHI->insertBefore(AfterCallBB->begin()); 3287 // Anything that used the result of the function call should now use the 3288 // PHI node as their operand. 3289 CB.replaceAllUsesWith(PHI); 3290 } 3291 3292 // Loop over all of the return instructions adding entries to the PHI node 3293 // as appropriate. 3294 if (PHI) { 3295 for (ReturnInst *RI : Returns) { 3296 assert(RI->getReturnValue()->getType() == PHI->getType() && 3297 "Ret value not consistent in function!"); 3298 PHI->addIncoming(RI->getReturnValue(), RI->getParent()); 3299 } 3300 } 3301 3302 // Add a branch to the merge points and remove return instructions. 3303 DebugLoc Loc; 3304 for (ReturnInst *RI : Returns) { 3305 BranchInst *BI = BranchInst::Create(AfterCallBB, RI->getIterator()); 3306 Loc = RI->getDebugLoc(); 3307 BI->setDebugLoc(Loc); 3308 RI->eraseFromParent(); 3309 } 3310 // We need to set the debug location to *somewhere* inside the 3311 // inlined function. The line number may be nonsensical, but the 3312 // instruction will at least be associated with the right 3313 // function. 3314 if (CreatedBranchToNormalDest) 3315 CreatedBranchToNormalDest->setDebugLoc(Loc); 3316 } else if (!Returns.empty()) { 3317 // Otherwise, if there is exactly one return value, just replace anything 3318 // using the return value of the call with the computed value. 3319 if (!CB.use_empty()) { 3320 if (&CB == Returns[0]->getReturnValue()) 3321 CB.replaceAllUsesWith(PoisonValue::get(CB.getType())); 3322 else 3323 CB.replaceAllUsesWith(Returns[0]->getReturnValue()); 3324 } 3325 3326 // Update PHI nodes that use the ReturnBB to use the AfterCallBB. 3327 BasicBlock *ReturnBB = Returns[0]->getParent(); 3328 ReturnBB->replaceAllUsesWith(AfterCallBB); 3329 3330 // Splice the code from the return block into the block that it will return 3331 // to, which contains the code that was after the call. 3332 AfterCallBB->splice(AfterCallBB->begin(), ReturnBB); 3333 3334 if (CreatedBranchToNormalDest) 3335 CreatedBranchToNormalDest->setDebugLoc(Returns[0]->getDebugLoc()); 3336 3337 // Delete the return instruction now and empty ReturnBB now. 3338 Returns[0]->eraseFromParent(); 3339 ReturnBB->eraseFromParent(); 3340 } else if (!CB.use_empty()) { 3341 // No returns, but something is using the return value of the call. Just 3342 // nuke the result. 3343 CB.replaceAllUsesWith(PoisonValue::get(CB.getType())); 3344 } 3345 3346 // Since we are now done with the Call/Invoke, we can delete it. 3347 CB.eraseFromParent(); 3348 3349 // If we inlined any musttail calls and the original return is now 3350 // unreachable, delete it. It can only contain a bitcast and ret. 3351 if (InlinedMustTailCalls && pred_empty(AfterCallBB)) 3352 AfterCallBB->eraseFromParent(); 3353 3354 // We should always be able to fold the entry block of the function into the 3355 // single predecessor of the block... 3356 assert(cast<BranchInst>(Br)->isUnconditional() && "splitBasicBlock broken!"); 3357 BasicBlock *CalleeEntry = cast<BranchInst>(Br)->getSuccessor(0); 3358 3359 // Splice the code entry block into calling block, right before the 3360 // unconditional branch. 3361 CalleeEntry->replaceAllUsesWith(OrigBB); // Update PHI nodes 3362 OrigBB->splice(Br->getIterator(), CalleeEntry); 3363 3364 // Remove the unconditional branch. 3365 Br->eraseFromParent(); 3366 3367 // Now we can remove the CalleeEntry block, which is now empty. 3368 CalleeEntry->eraseFromParent(); 3369 3370 // If we inserted a phi node, check to see if it has a single value (e.g. all 3371 // the entries are the same or undef). If so, remove the PHI so it doesn't 3372 // block other optimizations. 3373 if (PHI) { 3374 AssumptionCache *AC = 3375 IFI.GetAssumptionCache ? &IFI.GetAssumptionCache(*Caller) : nullptr; 3376 auto &DL = Caller->getDataLayout(); 3377 if (Value *V = simplifyInstruction(PHI, {DL, nullptr, nullptr, AC})) { 3378 PHI->replaceAllUsesWith(V); 3379 PHI->eraseFromParent(); 3380 } 3381 } 3382 3383 if (MergeAttributes) 3384 AttributeFuncs::mergeAttributesForInlining(*Caller, *CalledFunc); 3385 3386 return InlineResult::success(); 3387 } 3388