1 //===- InlineFunction.cpp - Code to perform function inlining -------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements inlining of a function into a call site, resolving 11 // parameters and the return value as appropriate. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/Transforms/Utils/Cloning.h" 16 #include "llvm/ADT/SmallSet.h" 17 #include "llvm/ADT/SmallVector.h" 18 #include "llvm/ADT/SetVector.h" 19 #include "llvm/ADT/StringExtras.h" 20 #include "llvm/Analysis/AliasAnalysis.h" 21 #include "llvm/Analysis/AssumptionCache.h" 22 #include "llvm/Analysis/CallGraph.h" 23 #include "llvm/Analysis/CaptureTracking.h" 24 #include "llvm/Analysis/InstructionSimplify.h" 25 #include "llvm/Analysis/ValueTracking.h" 26 #include "llvm/IR/Attributes.h" 27 #include "llvm/IR/CallSite.h" 28 #include "llvm/IR/CFG.h" 29 #include "llvm/IR/Constants.h" 30 #include "llvm/IR/DataLayout.h" 31 #include "llvm/IR/DebugInfo.h" 32 #include "llvm/IR/DerivedTypes.h" 33 #include "llvm/IR/DIBuilder.h" 34 #include "llvm/IR/Dominators.h" 35 #include "llvm/IR/IRBuilder.h" 36 #include "llvm/IR/Instructions.h" 37 #include "llvm/IR/IntrinsicInst.h" 38 #include "llvm/IR/Intrinsics.h" 39 #include "llvm/IR/MDBuilder.h" 40 #include "llvm/IR/Module.h" 41 #include "llvm/Transforms/Utils/Local.h" 42 #include "llvm/Support/CommandLine.h" 43 #include <algorithm> 44 45 using namespace llvm; 46 47 static cl::opt<bool> 48 EnableNoAliasConversion("enable-noalias-to-md-conversion", cl::init(true), 49 cl::Hidden, 50 cl::desc("Convert noalias attributes to metadata during inlining.")); 51 52 static cl::opt<bool> 53 PreserveAlignmentAssumptions("preserve-alignment-assumptions-during-inlining", 54 cl::init(true), cl::Hidden, 55 cl::desc("Convert align attributes to assumptions during inlining.")); 56 57 bool llvm::InlineFunction(CallInst *CI, InlineFunctionInfo &IFI, 58 AAResults *CalleeAAR, bool InsertLifetime) { 59 return InlineFunction(CallSite(CI), IFI, CalleeAAR, InsertLifetime); 60 } 61 bool llvm::InlineFunction(InvokeInst *II, InlineFunctionInfo &IFI, 62 AAResults *CalleeAAR, bool InsertLifetime) { 63 return InlineFunction(CallSite(II), IFI, CalleeAAR, InsertLifetime); 64 } 65 66 namespace { 67 /// A class for recording information about inlining a landing pad. 68 class LandingPadInliningInfo { 69 BasicBlock *OuterResumeDest; ///< Destination of the invoke's unwind. 70 BasicBlock *InnerResumeDest; ///< Destination for the callee's resume. 71 LandingPadInst *CallerLPad; ///< LandingPadInst associated with the invoke. 72 PHINode *InnerEHValuesPHI; ///< PHI for EH values from landingpad insts. 73 SmallVector<Value*, 8> UnwindDestPHIValues; 74 75 public: 76 LandingPadInliningInfo(InvokeInst *II) 77 : OuterResumeDest(II->getUnwindDest()), InnerResumeDest(nullptr), 78 CallerLPad(nullptr), InnerEHValuesPHI(nullptr) { 79 // If there are PHI nodes in the unwind destination block, we need to keep 80 // track of which values came into them from the invoke before removing 81 // the edge from this block. 82 llvm::BasicBlock *InvokeBB = II->getParent(); 83 BasicBlock::iterator I = OuterResumeDest->begin(); 84 for (; isa<PHINode>(I); ++I) { 85 // Save the value to use for this edge. 86 PHINode *PHI = cast<PHINode>(I); 87 UnwindDestPHIValues.push_back(PHI->getIncomingValueForBlock(InvokeBB)); 88 } 89 90 CallerLPad = cast<LandingPadInst>(I); 91 } 92 93 /// The outer unwind destination is the target of 94 /// unwind edges introduced for calls within the inlined function. 95 BasicBlock *getOuterResumeDest() const { 96 return OuterResumeDest; 97 } 98 99 BasicBlock *getInnerResumeDest(); 100 101 LandingPadInst *getLandingPadInst() const { return CallerLPad; } 102 103 /// Forward the 'resume' instruction to the caller's landing pad block. 104 /// When the landing pad block has only one predecessor, this is 105 /// a simple branch. When there is more than one predecessor, we need to 106 /// split the landing pad block after the landingpad instruction and jump 107 /// to there. 108 void forwardResume(ResumeInst *RI, 109 SmallPtrSetImpl<LandingPadInst*> &InlinedLPads); 110 111 /// Add incoming-PHI values to the unwind destination block for the given 112 /// basic block, using the values for the original invoke's source block. 113 void addIncomingPHIValuesFor(BasicBlock *BB) const { 114 addIncomingPHIValuesForInto(BB, OuterResumeDest); 115 } 116 117 void addIncomingPHIValuesForInto(BasicBlock *src, BasicBlock *dest) const { 118 BasicBlock::iterator I = dest->begin(); 119 for (unsigned i = 0, e = UnwindDestPHIValues.size(); i != e; ++i, ++I) { 120 PHINode *phi = cast<PHINode>(I); 121 phi->addIncoming(UnwindDestPHIValues[i], src); 122 } 123 } 124 }; 125 } // anonymous namespace 126 127 /// Get or create a target for the branch from ResumeInsts. 128 BasicBlock *LandingPadInliningInfo::getInnerResumeDest() { 129 if (InnerResumeDest) return InnerResumeDest; 130 131 // Split the landing pad. 132 BasicBlock::iterator SplitPoint = ++CallerLPad->getIterator(); 133 InnerResumeDest = 134 OuterResumeDest->splitBasicBlock(SplitPoint, 135 OuterResumeDest->getName() + ".body"); 136 137 // The number of incoming edges we expect to the inner landing pad. 138 const unsigned PHICapacity = 2; 139 140 // Create corresponding new PHIs for all the PHIs in the outer landing pad. 141 Instruction *InsertPoint = &InnerResumeDest->front(); 142 BasicBlock::iterator I = OuterResumeDest->begin(); 143 for (unsigned i = 0, e = UnwindDestPHIValues.size(); i != e; ++i, ++I) { 144 PHINode *OuterPHI = cast<PHINode>(I); 145 PHINode *InnerPHI = PHINode::Create(OuterPHI->getType(), PHICapacity, 146 OuterPHI->getName() + ".lpad-body", 147 InsertPoint); 148 OuterPHI->replaceAllUsesWith(InnerPHI); 149 InnerPHI->addIncoming(OuterPHI, OuterResumeDest); 150 } 151 152 // Create a PHI for the exception values. 153 InnerEHValuesPHI = PHINode::Create(CallerLPad->getType(), PHICapacity, 154 "eh.lpad-body", InsertPoint); 155 CallerLPad->replaceAllUsesWith(InnerEHValuesPHI); 156 InnerEHValuesPHI->addIncoming(CallerLPad, OuterResumeDest); 157 158 // All done. 159 return InnerResumeDest; 160 } 161 162 /// Forward the 'resume' instruction to the caller's landing pad block. 163 /// When the landing pad block has only one predecessor, this is a simple 164 /// branch. When there is more than one predecessor, we need to split the 165 /// landing pad block after the landingpad instruction and jump to there. 166 void LandingPadInliningInfo::forwardResume( 167 ResumeInst *RI, SmallPtrSetImpl<LandingPadInst *> &InlinedLPads) { 168 BasicBlock *Dest = getInnerResumeDest(); 169 BasicBlock *Src = RI->getParent(); 170 171 BranchInst::Create(Dest, Src); 172 173 // Update the PHIs in the destination. They were inserted in an order which 174 // makes this work. 175 addIncomingPHIValuesForInto(Src, Dest); 176 177 InnerEHValuesPHI->addIncoming(RI->getOperand(0), Src); 178 RI->eraseFromParent(); 179 } 180 181 /// When we inline a basic block into an invoke, 182 /// we have to turn all of the calls that can throw into invokes. 183 /// This function analyze BB to see if there are any calls, and if so, 184 /// it rewrites them to be invokes that jump to InvokeDest and fills in the PHI 185 /// nodes in that block with the values specified in InvokeDestPHIValues. 186 static BasicBlock * 187 HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB, BasicBlock *UnwindEdge) { 188 for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) { 189 Instruction *I = &*BBI++; 190 191 // We only need to check for function calls: inlined invoke 192 // instructions require no special handling. 193 CallInst *CI = dyn_cast<CallInst>(I); 194 195 // If this call cannot unwind, don't convert it to an invoke. 196 // Inline asm calls cannot throw. 197 if (!CI || CI->doesNotThrow() || isa<InlineAsm>(CI->getCalledValue())) 198 continue; 199 200 // Convert this function call into an invoke instruction. First, split the 201 // basic block. 202 BasicBlock *Split = 203 BB->splitBasicBlock(CI->getIterator(), CI->getName() + ".noexc"); 204 205 // Delete the unconditional branch inserted by splitBasicBlock 206 BB->getInstList().pop_back(); 207 208 // Create the new invoke instruction. 209 ImmutableCallSite CS(CI); 210 SmallVector<Value*, 8> InvokeArgs(CS.arg_begin(), CS.arg_end()); 211 InvokeInst *II = InvokeInst::Create(CI->getCalledValue(), Split, UnwindEdge, 212 InvokeArgs, CI->getName(), BB); 213 II->setDebugLoc(CI->getDebugLoc()); 214 II->setCallingConv(CI->getCallingConv()); 215 II->setAttributes(CI->getAttributes()); 216 217 // Make sure that anything using the call now uses the invoke! This also 218 // updates the CallGraph if present, because it uses a WeakVH. 219 CI->replaceAllUsesWith(II); 220 221 // Delete the original call 222 Split->getInstList().pop_front(); 223 return BB; 224 } 225 return nullptr; 226 } 227 228 /// If we inlined an invoke site, we need to convert calls 229 /// in the body of the inlined function into invokes. 230 /// 231 /// II is the invoke instruction being inlined. FirstNewBlock is the first 232 /// block of the inlined code (the last block is the end of the function), 233 /// and InlineCodeInfo is information about the code that got inlined. 234 static void HandleInlinedLandingPad(InvokeInst *II, BasicBlock *FirstNewBlock, 235 ClonedCodeInfo &InlinedCodeInfo) { 236 BasicBlock *InvokeDest = II->getUnwindDest(); 237 238 Function *Caller = FirstNewBlock->getParent(); 239 240 // The inlined code is currently at the end of the function, scan from the 241 // start of the inlined code to its end, checking for stuff we need to 242 // rewrite. 243 LandingPadInliningInfo Invoke(II); 244 245 // Get all of the inlined landing pad instructions. 246 SmallPtrSet<LandingPadInst*, 16> InlinedLPads; 247 for (Function::iterator I = FirstNewBlock->getIterator(), E = Caller->end(); 248 I != E; ++I) 249 if (InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator())) 250 InlinedLPads.insert(II->getLandingPadInst()); 251 252 // Append the clauses from the outer landing pad instruction into the inlined 253 // landing pad instructions. 254 LandingPadInst *OuterLPad = Invoke.getLandingPadInst(); 255 for (LandingPadInst *InlinedLPad : InlinedLPads) { 256 unsigned OuterNum = OuterLPad->getNumClauses(); 257 InlinedLPad->reserveClauses(OuterNum); 258 for (unsigned OuterIdx = 0; OuterIdx != OuterNum; ++OuterIdx) 259 InlinedLPad->addClause(OuterLPad->getClause(OuterIdx)); 260 if (OuterLPad->isCleanup()) 261 InlinedLPad->setCleanup(true); 262 } 263 264 for (Function::iterator BB = FirstNewBlock->getIterator(), E = Caller->end(); 265 BB != E; ++BB) { 266 if (InlinedCodeInfo.ContainsCalls) 267 if (BasicBlock *NewBB = HandleCallsInBlockInlinedThroughInvoke( 268 &*BB, Invoke.getOuterResumeDest())) 269 // Update any PHI nodes in the exceptional block to indicate that there 270 // is now a new entry in them. 271 Invoke.addIncomingPHIValuesFor(NewBB); 272 273 // Forward any resumes that are remaining here. 274 if (ResumeInst *RI = dyn_cast<ResumeInst>(BB->getTerminator())) 275 Invoke.forwardResume(RI, InlinedLPads); 276 } 277 278 // Now that everything is happy, we have one final detail. The PHI nodes in 279 // the exception destination block still have entries due to the original 280 // invoke instruction. Eliminate these entries (which might even delete the 281 // PHI node) now. 282 InvokeDest->removePredecessor(II->getParent()); 283 } 284 285 /// If we inlined an invoke site, we need to convert calls 286 /// in the body of the inlined function into invokes. 287 /// 288 /// II is the invoke instruction being inlined. FirstNewBlock is the first 289 /// block of the inlined code (the last block is the end of the function), 290 /// and InlineCodeInfo is information about the code that got inlined. 291 static void HandleInlinedEHPad(InvokeInst *II, BasicBlock *FirstNewBlock, 292 ClonedCodeInfo &InlinedCodeInfo) { 293 BasicBlock *UnwindDest = II->getUnwindDest(); 294 Function *Caller = FirstNewBlock->getParent(); 295 296 assert(UnwindDest->getFirstNonPHI()->isEHPad() && "unexpected BasicBlock!"); 297 298 // If there are PHI nodes in the unwind destination block, we need to keep 299 // track of which values came into them from the invoke before removing the 300 // edge from this block. 301 SmallVector<Value *, 8> UnwindDestPHIValues; 302 llvm::BasicBlock *InvokeBB = II->getParent(); 303 for (Instruction &I : *UnwindDest) { 304 // Save the value to use for this edge. 305 PHINode *PHI = dyn_cast<PHINode>(&I); 306 if (!PHI) 307 break; 308 UnwindDestPHIValues.push_back(PHI->getIncomingValueForBlock(InvokeBB)); 309 } 310 311 // Add incoming-PHI values to the unwind destination block for the given basic 312 // block, using the values for the original invoke's source block. 313 auto UpdatePHINodes = [&](BasicBlock *Src) { 314 BasicBlock::iterator I = UnwindDest->begin(); 315 for (Value *V : UnwindDestPHIValues) { 316 PHINode *PHI = cast<PHINode>(I); 317 PHI->addIncoming(V, Src); 318 ++I; 319 } 320 }; 321 322 // Forward EH terminator instructions to the caller's invoke destination. 323 // This is as simple as connect all the instructions which 'unwind to caller' 324 // to the invoke destination. 325 for (Function::iterator BB = FirstNewBlock->getIterator(), E = Caller->end(); 326 BB != E; ++BB) { 327 Instruction *I = BB->getFirstNonPHI(); 328 if (I->isEHPad()) { 329 if (auto *CEPI = dyn_cast<CatchEndPadInst>(I)) { 330 if (CEPI->unwindsToCaller()) { 331 CatchEndPadInst::Create(CEPI->getContext(), UnwindDest, CEPI); 332 CEPI->eraseFromParent(); 333 UpdatePHINodes(&*BB); 334 } 335 } else if (auto *CEPI = dyn_cast<CleanupEndPadInst>(I)) { 336 if (CEPI->unwindsToCaller()) { 337 CleanupEndPadInst::Create(CEPI->getCleanupPad(), UnwindDest, CEPI); 338 CEPI->eraseFromParent(); 339 UpdatePHINodes(&*BB); 340 } 341 } else if (auto *TPI = dyn_cast<TerminatePadInst>(I)) { 342 if (TPI->unwindsToCaller()) { 343 SmallVector<Value *, 3> TerminatePadArgs; 344 for (Value *Operand : TPI->operands()) 345 TerminatePadArgs.push_back(Operand); 346 TerminatePadInst::Create(TPI->getContext(), UnwindDest, TPI); 347 TPI->eraseFromParent(); 348 UpdatePHINodes(&*BB); 349 } 350 } else { 351 assert(isa<CatchPadInst>(I) || isa<CleanupPadInst>(I)); 352 } 353 } 354 355 if (auto *CRI = dyn_cast<CleanupReturnInst>(BB->getTerminator())) { 356 if (CRI->unwindsToCaller()) { 357 CleanupReturnInst::Create(CRI->getCleanupPad(), UnwindDest, CRI); 358 CRI->eraseFromParent(); 359 UpdatePHINodes(&*BB); 360 } 361 } 362 } 363 364 if (InlinedCodeInfo.ContainsCalls) 365 for (Function::iterator BB = FirstNewBlock->getIterator(), 366 E = Caller->end(); 367 BB != E; ++BB) 368 if (BasicBlock *NewBB = 369 HandleCallsInBlockInlinedThroughInvoke(&*BB, UnwindDest)) 370 // Update any PHI nodes in the exceptional block to indicate that there 371 // is now a new entry in them. 372 UpdatePHINodes(NewBB); 373 374 // Now that everything is happy, we have one final detail. The PHI nodes in 375 // the exception destination block still have entries due to the original 376 // invoke instruction. Eliminate these entries (which might even delete the 377 // PHI node) now. 378 UnwindDest->removePredecessor(InvokeBB); 379 } 380 381 /// When inlining a function that contains noalias scope metadata, 382 /// this metadata needs to be cloned so that the inlined blocks 383 /// have different "unqiue scopes" at every call site. Were this not done, then 384 /// aliasing scopes from a function inlined into a caller multiple times could 385 /// not be differentiated (and this would lead to miscompiles because the 386 /// non-aliasing property communicated by the metadata could have 387 /// call-site-specific control dependencies). 388 static void CloneAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap) { 389 const Function *CalledFunc = CS.getCalledFunction(); 390 SetVector<const MDNode *> MD; 391 392 // Note: We could only clone the metadata if it is already used in the 393 // caller. I'm omitting that check here because it might confuse 394 // inter-procedural alias analysis passes. We can revisit this if it becomes 395 // an efficiency or overhead problem. 396 397 for (Function::const_iterator I = CalledFunc->begin(), IE = CalledFunc->end(); 398 I != IE; ++I) 399 for (BasicBlock::const_iterator J = I->begin(), JE = I->end(); J != JE; ++J) { 400 if (const MDNode *M = J->getMetadata(LLVMContext::MD_alias_scope)) 401 MD.insert(M); 402 if (const MDNode *M = J->getMetadata(LLVMContext::MD_noalias)) 403 MD.insert(M); 404 } 405 406 if (MD.empty()) 407 return; 408 409 // Walk the existing metadata, adding the complete (perhaps cyclic) chain to 410 // the set. 411 SmallVector<const Metadata *, 16> Queue(MD.begin(), MD.end()); 412 while (!Queue.empty()) { 413 const MDNode *M = cast<MDNode>(Queue.pop_back_val()); 414 for (unsigned i = 0, ie = M->getNumOperands(); i != ie; ++i) 415 if (const MDNode *M1 = dyn_cast<MDNode>(M->getOperand(i))) 416 if (MD.insert(M1)) 417 Queue.push_back(M1); 418 } 419 420 // Now we have a complete set of all metadata in the chains used to specify 421 // the noalias scopes and the lists of those scopes. 422 SmallVector<TempMDTuple, 16> DummyNodes; 423 DenseMap<const MDNode *, TrackingMDNodeRef> MDMap; 424 for (SetVector<const MDNode *>::iterator I = MD.begin(), IE = MD.end(); 425 I != IE; ++I) { 426 DummyNodes.push_back(MDTuple::getTemporary(CalledFunc->getContext(), None)); 427 MDMap[*I].reset(DummyNodes.back().get()); 428 } 429 430 // Create new metadata nodes to replace the dummy nodes, replacing old 431 // metadata references with either a dummy node or an already-created new 432 // node. 433 for (SetVector<const MDNode *>::iterator I = MD.begin(), IE = MD.end(); 434 I != IE; ++I) { 435 SmallVector<Metadata *, 4> NewOps; 436 for (unsigned i = 0, ie = (*I)->getNumOperands(); i != ie; ++i) { 437 const Metadata *V = (*I)->getOperand(i); 438 if (const MDNode *M = dyn_cast<MDNode>(V)) 439 NewOps.push_back(MDMap[M]); 440 else 441 NewOps.push_back(const_cast<Metadata *>(V)); 442 } 443 444 MDNode *NewM = MDNode::get(CalledFunc->getContext(), NewOps); 445 MDTuple *TempM = cast<MDTuple>(MDMap[*I]); 446 assert(TempM->isTemporary() && "Expected temporary node"); 447 448 TempM->replaceAllUsesWith(NewM); 449 } 450 451 // Now replace the metadata in the new inlined instructions with the 452 // repacements from the map. 453 for (ValueToValueMapTy::iterator VMI = VMap.begin(), VMIE = VMap.end(); 454 VMI != VMIE; ++VMI) { 455 if (!VMI->second) 456 continue; 457 458 Instruction *NI = dyn_cast<Instruction>(VMI->second); 459 if (!NI) 460 continue; 461 462 if (MDNode *M = NI->getMetadata(LLVMContext::MD_alias_scope)) { 463 MDNode *NewMD = MDMap[M]; 464 // If the call site also had alias scope metadata (a list of scopes to 465 // which instructions inside it might belong), propagate those scopes to 466 // the inlined instructions. 467 if (MDNode *CSM = 468 CS.getInstruction()->getMetadata(LLVMContext::MD_alias_scope)) 469 NewMD = MDNode::concatenate(NewMD, CSM); 470 NI->setMetadata(LLVMContext::MD_alias_scope, NewMD); 471 } else if (NI->mayReadOrWriteMemory()) { 472 if (MDNode *M = 473 CS.getInstruction()->getMetadata(LLVMContext::MD_alias_scope)) 474 NI->setMetadata(LLVMContext::MD_alias_scope, M); 475 } 476 477 if (MDNode *M = NI->getMetadata(LLVMContext::MD_noalias)) { 478 MDNode *NewMD = MDMap[M]; 479 // If the call site also had noalias metadata (a list of scopes with 480 // which instructions inside it don't alias), propagate those scopes to 481 // the inlined instructions. 482 if (MDNode *CSM = 483 CS.getInstruction()->getMetadata(LLVMContext::MD_noalias)) 484 NewMD = MDNode::concatenate(NewMD, CSM); 485 NI->setMetadata(LLVMContext::MD_noalias, NewMD); 486 } else if (NI->mayReadOrWriteMemory()) { 487 if (MDNode *M = CS.getInstruction()->getMetadata(LLVMContext::MD_noalias)) 488 NI->setMetadata(LLVMContext::MD_noalias, M); 489 } 490 } 491 } 492 493 /// If the inlined function has noalias arguments, 494 /// then add new alias scopes for each noalias argument, tag the mapped noalias 495 /// parameters with noalias metadata specifying the new scope, and tag all 496 /// non-derived loads, stores and memory intrinsics with the new alias scopes. 497 static void AddAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap, 498 const DataLayout &DL, AAResults *CalleeAAR) { 499 if (!EnableNoAliasConversion) 500 return; 501 502 const Function *CalledFunc = CS.getCalledFunction(); 503 SmallVector<const Argument *, 4> NoAliasArgs; 504 505 for (const Argument &I : CalledFunc->args()) { 506 if (I.hasNoAliasAttr() && !I.hasNUses(0)) 507 NoAliasArgs.push_back(&I); 508 } 509 510 if (NoAliasArgs.empty()) 511 return; 512 513 // To do a good job, if a noalias variable is captured, we need to know if 514 // the capture point dominates the particular use we're considering. 515 DominatorTree DT; 516 DT.recalculate(const_cast<Function&>(*CalledFunc)); 517 518 // noalias indicates that pointer values based on the argument do not alias 519 // pointer values which are not based on it. So we add a new "scope" for each 520 // noalias function argument. Accesses using pointers based on that argument 521 // become part of that alias scope, accesses using pointers not based on that 522 // argument are tagged as noalias with that scope. 523 524 DenseMap<const Argument *, MDNode *> NewScopes; 525 MDBuilder MDB(CalledFunc->getContext()); 526 527 // Create a new scope domain for this function. 528 MDNode *NewDomain = 529 MDB.createAnonymousAliasScopeDomain(CalledFunc->getName()); 530 for (unsigned i = 0, e = NoAliasArgs.size(); i != e; ++i) { 531 const Argument *A = NoAliasArgs[i]; 532 533 std::string Name = CalledFunc->getName(); 534 if (A->hasName()) { 535 Name += ": %"; 536 Name += A->getName(); 537 } else { 538 Name += ": argument "; 539 Name += utostr(i); 540 } 541 542 // Note: We always create a new anonymous root here. This is true regardless 543 // of the linkage of the callee because the aliasing "scope" is not just a 544 // property of the callee, but also all control dependencies in the caller. 545 MDNode *NewScope = MDB.createAnonymousAliasScope(NewDomain, Name); 546 NewScopes.insert(std::make_pair(A, NewScope)); 547 } 548 549 // Iterate over all new instructions in the map; for all memory-access 550 // instructions, add the alias scope metadata. 551 for (ValueToValueMapTy::iterator VMI = VMap.begin(), VMIE = VMap.end(); 552 VMI != VMIE; ++VMI) { 553 if (const Instruction *I = dyn_cast<Instruction>(VMI->first)) { 554 if (!VMI->second) 555 continue; 556 557 Instruction *NI = dyn_cast<Instruction>(VMI->second); 558 if (!NI) 559 continue; 560 561 bool IsArgMemOnlyCall = false, IsFuncCall = false; 562 SmallVector<const Value *, 2> PtrArgs; 563 564 if (const LoadInst *LI = dyn_cast<LoadInst>(I)) 565 PtrArgs.push_back(LI->getPointerOperand()); 566 else if (const StoreInst *SI = dyn_cast<StoreInst>(I)) 567 PtrArgs.push_back(SI->getPointerOperand()); 568 else if (const VAArgInst *VAAI = dyn_cast<VAArgInst>(I)) 569 PtrArgs.push_back(VAAI->getPointerOperand()); 570 else if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(I)) 571 PtrArgs.push_back(CXI->getPointerOperand()); 572 else if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(I)) 573 PtrArgs.push_back(RMWI->getPointerOperand()); 574 else if (ImmutableCallSite ICS = ImmutableCallSite(I)) { 575 // If we know that the call does not access memory, then we'll still 576 // know that about the inlined clone of this call site, and we don't 577 // need to add metadata. 578 if (ICS.doesNotAccessMemory()) 579 continue; 580 581 IsFuncCall = true; 582 if (CalleeAAR) { 583 FunctionModRefBehavior MRB = CalleeAAR->getModRefBehavior(ICS); 584 if (MRB == FMRB_OnlyAccessesArgumentPointees || 585 MRB == FMRB_OnlyReadsArgumentPointees) 586 IsArgMemOnlyCall = true; 587 } 588 589 for (ImmutableCallSite::arg_iterator AI = ICS.arg_begin(), 590 AE = ICS.arg_end(); AI != AE; ++AI) { 591 // We need to check the underlying objects of all arguments, not just 592 // the pointer arguments, because we might be passing pointers as 593 // integers, etc. 594 // However, if we know that the call only accesses pointer arguments, 595 // then we only need to check the pointer arguments. 596 if (IsArgMemOnlyCall && !(*AI)->getType()->isPointerTy()) 597 continue; 598 599 PtrArgs.push_back(*AI); 600 } 601 } 602 603 // If we found no pointers, then this instruction is not suitable for 604 // pairing with an instruction to receive aliasing metadata. 605 // However, if this is a call, this we might just alias with none of the 606 // noalias arguments. 607 if (PtrArgs.empty() && !IsFuncCall) 608 continue; 609 610 // It is possible that there is only one underlying object, but you 611 // need to go through several PHIs to see it, and thus could be 612 // repeated in the Objects list. 613 SmallPtrSet<const Value *, 4> ObjSet; 614 SmallVector<Metadata *, 4> Scopes, NoAliases; 615 616 SmallSetVector<const Argument *, 4> NAPtrArgs; 617 for (unsigned i = 0, ie = PtrArgs.size(); i != ie; ++i) { 618 SmallVector<Value *, 4> Objects; 619 GetUnderlyingObjects(const_cast<Value*>(PtrArgs[i]), 620 Objects, DL, /* LI = */ nullptr); 621 622 for (Value *O : Objects) 623 ObjSet.insert(O); 624 } 625 626 // Figure out if we're derived from anything that is not a noalias 627 // argument. 628 bool CanDeriveViaCapture = false, UsesAliasingPtr = false; 629 for (const Value *V : ObjSet) { 630 // Is this value a constant that cannot be derived from any pointer 631 // value (we need to exclude constant expressions, for example, that 632 // are formed from arithmetic on global symbols). 633 bool IsNonPtrConst = isa<ConstantInt>(V) || isa<ConstantFP>(V) || 634 isa<ConstantPointerNull>(V) || 635 isa<ConstantDataVector>(V) || isa<UndefValue>(V); 636 if (IsNonPtrConst) 637 continue; 638 639 // If this is anything other than a noalias argument, then we cannot 640 // completely describe the aliasing properties using alias.scope 641 // metadata (and, thus, won't add any). 642 if (const Argument *A = dyn_cast<Argument>(V)) { 643 if (!A->hasNoAliasAttr()) 644 UsesAliasingPtr = true; 645 } else { 646 UsesAliasingPtr = true; 647 } 648 649 // If this is not some identified function-local object (which cannot 650 // directly alias a noalias argument), or some other argument (which, 651 // by definition, also cannot alias a noalias argument), then we could 652 // alias a noalias argument that has been captured). 653 if (!isa<Argument>(V) && 654 !isIdentifiedFunctionLocal(const_cast<Value*>(V))) 655 CanDeriveViaCapture = true; 656 } 657 658 // A function call can always get captured noalias pointers (via other 659 // parameters, globals, etc.). 660 if (IsFuncCall && !IsArgMemOnlyCall) 661 CanDeriveViaCapture = true; 662 663 // First, we want to figure out all of the sets with which we definitely 664 // don't alias. Iterate over all noalias set, and add those for which: 665 // 1. The noalias argument is not in the set of objects from which we 666 // definitely derive. 667 // 2. The noalias argument has not yet been captured. 668 // An arbitrary function that might load pointers could see captured 669 // noalias arguments via other noalias arguments or globals, and so we 670 // must always check for prior capture. 671 for (const Argument *A : NoAliasArgs) { 672 if (!ObjSet.count(A) && (!CanDeriveViaCapture || 673 // It might be tempting to skip the 674 // PointerMayBeCapturedBefore check if 675 // A->hasNoCaptureAttr() is true, but this is 676 // incorrect because nocapture only guarantees 677 // that no copies outlive the function, not 678 // that the value cannot be locally captured. 679 !PointerMayBeCapturedBefore(A, 680 /* ReturnCaptures */ false, 681 /* StoreCaptures */ false, I, &DT))) 682 NoAliases.push_back(NewScopes[A]); 683 } 684 685 if (!NoAliases.empty()) 686 NI->setMetadata(LLVMContext::MD_noalias, 687 MDNode::concatenate( 688 NI->getMetadata(LLVMContext::MD_noalias), 689 MDNode::get(CalledFunc->getContext(), NoAliases))); 690 691 // Next, we want to figure out all of the sets to which we might belong. 692 // We might belong to a set if the noalias argument is in the set of 693 // underlying objects. If there is some non-noalias argument in our list 694 // of underlying objects, then we cannot add a scope because the fact 695 // that some access does not alias with any set of our noalias arguments 696 // cannot itself guarantee that it does not alias with this access 697 // (because there is some pointer of unknown origin involved and the 698 // other access might also depend on this pointer). We also cannot add 699 // scopes to arbitrary functions unless we know they don't access any 700 // non-parameter pointer-values. 701 bool CanAddScopes = !UsesAliasingPtr; 702 if (CanAddScopes && IsFuncCall) 703 CanAddScopes = IsArgMemOnlyCall; 704 705 if (CanAddScopes) 706 for (const Argument *A : NoAliasArgs) { 707 if (ObjSet.count(A)) 708 Scopes.push_back(NewScopes[A]); 709 } 710 711 if (!Scopes.empty()) 712 NI->setMetadata( 713 LLVMContext::MD_alias_scope, 714 MDNode::concatenate(NI->getMetadata(LLVMContext::MD_alias_scope), 715 MDNode::get(CalledFunc->getContext(), Scopes))); 716 } 717 } 718 } 719 720 /// If the inlined function has non-byval align arguments, then 721 /// add @llvm.assume-based alignment assumptions to preserve this information. 722 static void AddAlignmentAssumptions(CallSite CS, InlineFunctionInfo &IFI) { 723 if (!PreserveAlignmentAssumptions) 724 return; 725 auto &DL = CS.getCaller()->getParent()->getDataLayout(); 726 727 // To avoid inserting redundant assumptions, we should check for assumptions 728 // already in the caller. To do this, we might need a DT of the caller. 729 DominatorTree DT; 730 bool DTCalculated = false; 731 732 Function *CalledFunc = CS.getCalledFunction(); 733 for (Function::arg_iterator I = CalledFunc->arg_begin(), 734 E = CalledFunc->arg_end(); 735 I != E; ++I) { 736 unsigned Align = I->getType()->isPointerTy() ? I->getParamAlignment() : 0; 737 if (Align && !I->hasByValOrInAllocaAttr() && !I->hasNUses(0)) { 738 if (!DTCalculated) { 739 DT.recalculate(const_cast<Function&>(*CS.getInstruction()->getParent() 740 ->getParent())); 741 DTCalculated = true; 742 } 743 744 // If we can already prove the asserted alignment in the context of the 745 // caller, then don't bother inserting the assumption. 746 Value *Arg = CS.getArgument(I->getArgNo()); 747 if (getKnownAlignment(Arg, DL, CS.getInstruction(), 748 &IFI.ACT->getAssumptionCache(*CS.getCaller()), 749 &DT) >= Align) 750 continue; 751 752 IRBuilder<>(CS.getInstruction()) 753 .CreateAlignmentAssumption(DL, Arg, Align); 754 } 755 } 756 } 757 758 /// Once we have cloned code over from a callee into the caller, 759 /// update the specified callgraph to reflect the changes we made. 760 /// Note that it's possible that not all code was copied over, so only 761 /// some edges of the callgraph may remain. 762 static void UpdateCallGraphAfterInlining(CallSite CS, 763 Function::iterator FirstNewBlock, 764 ValueToValueMapTy &VMap, 765 InlineFunctionInfo &IFI) { 766 CallGraph &CG = *IFI.CG; 767 const Function *Caller = CS.getInstruction()->getParent()->getParent(); 768 const Function *Callee = CS.getCalledFunction(); 769 CallGraphNode *CalleeNode = CG[Callee]; 770 CallGraphNode *CallerNode = CG[Caller]; 771 772 // Since we inlined some uninlined call sites in the callee into the caller, 773 // add edges from the caller to all of the callees of the callee. 774 CallGraphNode::iterator I = CalleeNode->begin(), E = CalleeNode->end(); 775 776 // Consider the case where CalleeNode == CallerNode. 777 CallGraphNode::CalledFunctionsVector CallCache; 778 if (CalleeNode == CallerNode) { 779 CallCache.assign(I, E); 780 I = CallCache.begin(); 781 E = CallCache.end(); 782 } 783 784 for (; I != E; ++I) { 785 const Value *OrigCall = I->first; 786 787 ValueToValueMapTy::iterator VMI = VMap.find(OrigCall); 788 // Only copy the edge if the call was inlined! 789 if (VMI == VMap.end() || VMI->second == nullptr) 790 continue; 791 792 // If the call was inlined, but then constant folded, there is no edge to 793 // add. Check for this case. 794 Instruction *NewCall = dyn_cast<Instruction>(VMI->second); 795 if (!NewCall) 796 continue; 797 798 // We do not treat intrinsic calls like real function calls because we 799 // expect them to become inline code; do not add an edge for an intrinsic. 800 CallSite CS = CallSite(NewCall); 801 if (CS && CS.getCalledFunction() && CS.getCalledFunction()->isIntrinsic()) 802 continue; 803 804 // Remember that this call site got inlined for the client of 805 // InlineFunction. 806 IFI.InlinedCalls.push_back(NewCall); 807 808 // It's possible that inlining the callsite will cause it to go from an 809 // indirect to a direct call by resolving a function pointer. If this 810 // happens, set the callee of the new call site to a more precise 811 // destination. This can also happen if the call graph node of the caller 812 // was just unnecessarily imprecise. 813 if (!I->second->getFunction()) 814 if (Function *F = CallSite(NewCall).getCalledFunction()) { 815 // Indirect call site resolved to direct call. 816 CallerNode->addCalledFunction(CallSite(NewCall), CG[F]); 817 818 continue; 819 } 820 821 CallerNode->addCalledFunction(CallSite(NewCall), I->second); 822 } 823 824 // Update the call graph by deleting the edge from Callee to Caller. We must 825 // do this after the loop above in case Caller and Callee are the same. 826 CallerNode->removeCallEdgeFor(CS); 827 } 828 829 static void HandleByValArgumentInit(Value *Dst, Value *Src, Module *M, 830 BasicBlock *InsertBlock, 831 InlineFunctionInfo &IFI) { 832 Type *AggTy = cast<PointerType>(Src->getType())->getElementType(); 833 IRBuilder<> Builder(InsertBlock, InsertBlock->begin()); 834 835 Value *Size = Builder.getInt64(M->getDataLayout().getTypeStoreSize(AggTy)); 836 837 // Always generate a memcpy of alignment 1 here because we don't know 838 // the alignment of the src pointer. Other optimizations can infer 839 // better alignment. 840 Builder.CreateMemCpy(Dst, Src, Size, /*Align=*/1); 841 } 842 843 /// When inlining a call site that has a byval argument, 844 /// we have to make the implicit memcpy explicit by adding it. 845 static Value *HandleByValArgument(Value *Arg, Instruction *TheCall, 846 const Function *CalledFunc, 847 InlineFunctionInfo &IFI, 848 unsigned ByValAlignment) { 849 PointerType *ArgTy = cast<PointerType>(Arg->getType()); 850 Type *AggTy = ArgTy->getElementType(); 851 852 Function *Caller = TheCall->getParent()->getParent(); 853 854 // If the called function is readonly, then it could not mutate the caller's 855 // copy of the byval'd memory. In this case, it is safe to elide the copy and 856 // temporary. 857 if (CalledFunc->onlyReadsMemory()) { 858 // If the byval argument has a specified alignment that is greater than the 859 // passed in pointer, then we either have to round up the input pointer or 860 // give up on this transformation. 861 if (ByValAlignment <= 1) // 0 = unspecified, 1 = no particular alignment. 862 return Arg; 863 864 const DataLayout &DL = Caller->getParent()->getDataLayout(); 865 866 // If the pointer is already known to be sufficiently aligned, or if we can 867 // round it up to a larger alignment, then we don't need a temporary. 868 if (getOrEnforceKnownAlignment(Arg, ByValAlignment, DL, TheCall, 869 &IFI.ACT->getAssumptionCache(*Caller)) >= 870 ByValAlignment) 871 return Arg; 872 873 // Otherwise, we have to make a memcpy to get a safe alignment. This is bad 874 // for code quality, but rarely happens and is required for correctness. 875 } 876 877 // Create the alloca. If we have DataLayout, use nice alignment. 878 unsigned Align = 879 Caller->getParent()->getDataLayout().getPrefTypeAlignment(AggTy); 880 881 // If the byval had an alignment specified, we *must* use at least that 882 // alignment, as it is required by the byval argument (and uses of the 883 // pointer inside the callee). 884 Align = std::max(Align, ByValAlignment); 885 886 Value *NewAlloca = new AllocaInst(AggTy, nullptr, Align, Arg->getName(), 887 &*Caller->begin()->begin()); 888 IFI.StaticAllocas.push_back(cast<AllocaInst>(NewAlloca)); 889 890 // Uses of the argument in the function should use our new alloca 891 // instead. 892 return NewAlloca; 893 } 894 895 // Check whether this Value is used by a lifetime intrinsic. 896 static bool isUsedByLifetimeMarker(Value *V) { 897 for (User *U : V->users()) { 898 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) { 899 switch (II->getIntrinsicID()) { 900 default: break; 901 case Intrinsic::lifetime_start: 902 case Intrinsic::lifetime_end: 903 return true; 904 } 905 } 906 } 907 return false; 908 } 909 910 // Check whether the given alloca already has 911 // lifetime.start or lifetime.end intrinsics. 912 static bool hasLifetimeMarkers(AllocaInst *AI) { 913 Type *Ty = AI->getType(); 914 Type *Int8PtrTy = Type::getInt8PtrTy(Ty->getContext(), 915 Ty->getPointerAddressSpace()); 916 if (Ty == Int8PtrTy) 917 return isUsedByLifetimeMarker(AI); 918 919 // Do a scan to find all the casts to i8*. 920 for (User *U : AI->users()) { 921 if (U->getType() != Int8PtrTy) continue; 922 if (U->stripPointerCasts() != AI) continue; 923 if (isUsedByLifetimeMarker(U)) 924 return true; 925 } 926 return false; 927 } 928 929 /// Rebuild the entire inlined-at chain for this instruction so that the top of 930 /// the chain now is inlined-at the new call site. 931 static DebugLoc 932 updateInlinedAtInfo(DebugLoc DL, DILocation *InlinedAtNode, LLVMContext &Ctx, 933 DenseMap<const DILocation *, DILocation *> &IANodes) { 934 SmallVector<DILocation *, 3> InlinedAtLocations; 935 DILocation *Last = InlinedAtNode; 936 DILocation *CurInlinedAt = DL; 937 938 // Gather all the inlined-at nodes 939 while (DILocation *IA = CurInlinedAt->getInlinedAt()) { 940 // Skip any we've already built nodes for 941 if (DILocation *Found = IANodes[IA]) { 942 Last = Found; 943 break; 944 } 945 946 InlinedAtLocations.push_back(IA); 947 CurInlinedAt = IA; 948 } 949 950 // Starting from the top, rebuild the nodes to point to the new inlined-at 951 // location (then rebuilding the rest of the chain behind it) and update the 952 // map of already-constructed inlined-at nodes. 953 for (const DILocation *MD : make_range(InlinedAtLocations.rbegin(), 954 InlinedAtLocations.rend())) { 955 Last = IANodes[MD] = DILocation::getDistinct( 956 Ctx, MD->getLine(), MD->getColumn(), MD->getScope(), Last); 957 } 958 959 // And finally create the normal location for this instruction, referring to 960 // the new inlined-at chain. 961 return DebugLoc::get(DL.getLine(), DL.getCol(), DL.getScope(), Last); 962 } 963 964 /// Update inlined instructions' line numbers to 965 /// to encode location where these instructions are inlined. 966 static void fixupLineNumbers(Function *Fn, Function::iterator FI, 967 Instruction *TheCall) { 968 DebugLoc TheCallDL = TheCall->getDebugLoc(); 969 if (!TheCallDL) 970 return; 971 972 auto &Ctx = Fn->getContext(); 973 DILocation *InlinedAtNode = TheCallDL; 974 975 // Create a unique call site, not to be confused with any other call from the 976 // same location. 977 InlinedAtNode = DILocation::getDistinct( 978 Ctx, InlinedAtNode->getLine(), InlinedAtNode->getColumn(), 979 InlinedAtNode->getScope(), InlinedAtNode->getInlinedAt()); 980 981 // Cache the inlined-at nodes as they're built so they are reused, without 982 // this every instruction's inlined-at chain would become distinct from each 983 // other. 984 DenseMap<const DILocation *, DILocation *> IANodes; 985 986 for (; FI != Fn->end(); ++FI) { 987 for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); 988 BI != BE; ++BI) { 989 DebugLoc DL = BI->getDebugLoc(); 990 if (!DL) { 991 // If the inlined instruction has no line number, make it look as if it 992 // originates from the call location. This is important for 993 // ((__always_inline__, __nodebug__)) functions which must use caller 994 // location for all instructions in their function body. 995 996 // Don't update static allocas, as they may get moved later. 997 if (auto *AI = dyn_cast<AllocaInst>(BI)) 998 if (isa<Constant>(AI->getArraySize())) 999 continue; 1000 1001 BI->setDebugLoc(TheCallDL); 1002 } else { 1003 BI->setDebugLoc(updateInlinedAtInfo(DL, InlinedAtNode, BI->getContext(), IANodes)); 1004 } 1005 } 1006 } 1007 } 1008 1009 /// This function inlines the called function into the basic block of the 1010 /// caller. This returns false if it is not possible to inline this call. 1011 /// The program is still in a well defined state if this occurs though. 1012 /// 1013 /// Note that this only does one level of inlining. For example, if the 1014 /// instruction 'call B' is inlined, and 'B' calls 'C', then the call to 'C' now 1015 /// exists in the instruction stream. Similarly this will inline a recursive 1016 /// function by one level. 1017 bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, 1018 AAResults *CalleeAAR, bool InsertLifetime) { 1019 Instruction *TheCall = CS.getInstruction(); 1020 assert(TheCall->getParent() && TheCall->getParent()->getParent() && 1021 "Instruction not in function!"); 1022 1023 // If IFI has any state in it, zap it before we fill it in. 1024 IFI.reset(); 1025 1026 const Function *CalledFunc = CS.getCalledFunction(); 1027 if (!CalledFunc || // Can't inline external function or indirect 1028 CalledFunc->isDeclaration() || // call, or call to a vararg function! 1029 CalledFunc->getFunctionType()->isVarArg()) return false; 1030 1031 // If the call to the callee cannot throw, set the 'nounwind' flag on any 1032 // calls that we inline. 1033 bool MarkNoUnwind = CS.doesNotThrow(); 1034 1035 BasicBlock *OrigBB = TheCall->getParent(); 1036 Function *Caller = OrigBB->getParent(); 1037 1038 // GC poses two hazards to inlining, which only occur when the callee has GC: 1039 // 1. If the caller has no GC, then the callee's GC must be propagated to the 1040 // caller. 1041 // 2. If the caller has a differing GC, it is invalid to inline. 1042 if (CalledFunc->hasGC()) { 1043 if (!Caller->hasGC()) 1044 Caller->setGC(CalledFunc->getGC()); 1045 else if (CalledFunc->getGC() != Caller->getGC()) 1046 return false; 1047 } 1048 1049 // Get the personality function from the callee if it contains a landing pad. 1050 Constant *CalledPersonality = 1051 CalledFunc->hasPersonalityFn() ? CalledFunc->getPersonalityFn() : nullptr; 1052 1053 // Find the personality function used by the landing pads of the caller. If it 1054 // exists, then check to see that it matches the personality function used in 1055 // the callee. 1056 Constant *CallerPersonality = 1057 Caller->hasPersonalityFn() ? Caller->getPersonalityFn() : nullptr; 1058 if (CalledPersonality) { 1059 if (!CallerPersonality) 1060 Caller->setPersonalityFn(CalledPersonality); 1061 // If the personality functions match, then we can perform the 1062 // inlining. Otherwise, we can't inline. 1063 // TODO: This isn't 100% true. Some personality functions are proper 1064 // supersets of others and can be used in place of the other. 1065 else if (CalledPersonality != CallerPersonality) 1066 return false; 1067 } 1068 1069 // Get an iterator to the last basic block in the function, which will have 1070 // the new function inlined after it. 1071 Function::iterator LastBlock = --Caller->end(); 1072 1073 // Make sure to capture all of the return instructions from the cloned 1074 // function. 1075 SmallVector<ReturnInst*, 8> Returns; 1076 ClonedCodeInfo InlinedFunctionInfo; 1077 Function::iterator FirstNewBlock; 1078 1079 { // Scope to destroy VMap after cloning. 1080 ValueToValueMapTy VMap; 1081 // Keep a list of pair (dst, src) to emit byval initializations. 1082 SmallVector<std::pair<Value*, Value*>, 4> ByValInit; 1083 1084 auto &DL = Caller->getParent()->getDataLayout(); 1085 1086 assert(CalledFunc->arg_size() == CS.arg_size() && 1087 "No varargs calls can be inlined!"); 1088 1089 // Calculate the vector of arguments to pass into the function cloner, which 1090 // matches up the formal to the actual argument values. 1091 CallSite::arg_iterator AI = CS.arg_begin(); 1092 unsigned ArgNo = 0; 1093 for (Function::const_arg_iterator I = CalledFunc->arg_begin(), 1094 E = CalledFunc->arg_end(); I != E; ++I, ++AI, ++ArgNo) { 1095 Value *ActualArg = *AI; 1096 1097 // When byval arguments actually inlined, we need to make the copy implied 1098 // by them explicit. However, we don't do this if the callee is readonly 1099 // or readnone, because the copy would be unneeded: the callee doesn't 1100 // modify the struct. 1101 if (CS.isByValArgument(ArgNo)) { 1102 ActualArg = HandleByValArgument(ActualArg, TheCall, CalledFunc, IFI, 1103 CalledFunc->getParamAlignment(ArgNo+1)); 1104 if (ActualArg != *AI) 1105 ByValInit.push_back(std::make_pair(ActualArg, (Value*) *AI)); 1106 } 1107 1108 VMap[&*I] = ActualArg; 1109 } 1110 1111 // Add alignment assumptions if necessary. We do this before the inlined 1112 // instructions are actually cloned into the caller so that we can easily 1113 // check what will be known at the start of the inlined code. 1114 AddAlignmentAssumptions(CS, IFI); 1115 1116 // We want the inliner to prune the code as it copies. We would LOVE to 1117 // have no dead or constant instructions leftover after inlining occurs 1118 // (which can happen, e.g., because an argument was constant), but we'll be 1119 // happy with whatever the cloner can do. 1120 CloneAndPruneFunctionInto(Caller, CalledFunc, VMap, 1121 /*ModuleLevelChanges=*/false, Returns, ".i", 1122 &InlinedFunctionInfo, TheCall); 1123 1124 // Remember the first block that is newly cloned over. 1125 FirstNewBlock = LastBlock; ++FirstNewBlock; 1126 1127 // Inject byval arguments initialization. 1128 for (std::pair<Value*, Value*> &Init : ByValInit) 1129 HandleByValArgumentInit(Init.first, Init.second, Caller->getParent(), 1130 &*FirstNewBlock, IFI); 1131 1132 // Update the callgraph if requested. 1133 if (IFI.CG) 1134 UpdateCallGraphAfterInlining(CS, FirstNewBlock, VMap, IFI); 1135 1136 // Update inlined instructions' line number information. 1137 fixupLineNumbers(Caller, FirstNewBlock, TheCall); 1138 1139 // Clone existing noalias metadata if necessary. 1140 CloneAliasScopeMetadata(CS, VMap); 1141 1142 // Add noalias metadata if necessary. 1143 AddAliasScopeMetadata(CS, VMap, DL, CalleeAAR); 1144 1145 // FIXME: We could register any cloned assumptions instead of clearing the 1146 // whole function's cache. 1147 if (IFI.ACT) 1148 IFI.ACT->getAssumptionCache(*Caller).clear(); 1149 } 1150 1151 // If there are any alloca instructions in the block that used to be the entry 1152 // block for the callee, move them to the entry block of the caller. First 1153 // calculate which instruction they should be inserted before. We insert the 1154 // instructions at the end of the current alloca list. 1155 { 1156 BasicBlock::iterator InsertPoint = Caller->begin()->begin(); 1157 for (BasicBlock::iterator I = FirstNewBlock->begin(), 1158 E = FirstNewBlock->end(); I != E; ) { 1159 AllocaInst *AI = dyn_cast<AllocaInst>(I++); 1160 if (!AI) continue; 1161 1162 // If the alloca is now dead, remove it. This often occurs due to code 1163 // specialization. 1164 if (AI->use_empty()) { 1165 AI->eraseFromParent(); 1166 continue; 1167 } 1168 1169 if (!isa<Constant>(AI->getArraySize())) 1170 continue; 1171 1172 // Keep track of the static allocas that we inline into the caller. 1173 IFI.StaticAllocas.push_back(AI); 1174 1175 // Scan for the block of allocas that we can move over, and move them 1176 // all at once. 1177 while (isa<AllocaInst>(I) && 1178 isa<Constant>(cast<AllocaInst>(I)->getArraySize())) { 1179 IFI.StaticAllocas.push_back(cast<AllocaInst>(I)); 1180 ++I; 1181 } 1182 1183 // Transfer all of the allocas over in a block. Using splice means 1184 // that the instructions aren't removed from the symbol table, then 1185 // reinserted. 1186 Caller->getEntryBlock().getInstList().splice( 1187 InsertPoint, FirstNewBlock->getInstList(), AI->getIterator(), I); 1188 } 1189 // Move any dbg.declares describing the allocas into the entry basic block. 1190 DIBuilder DIB(*Caller->getParent()); 1191 for (auto &AI : IFI.StaticAllocas) 1192 replaceDbgDeclareForAlloca(AI, AI, DIB, /*Deref=*/false); 1193 } 1194 1195 bool InlinedMustTailCalls = false; 1196 if (InlinedFunctionInfo.ContainsCalls) { 1197 CallInst::TailCallKind CallSiteTailKind = CallInst::TCK_None; 1198 if (CallInst *CI = dyn_cast<CallInst>(TheCall)) 1199 CallSiteTailKind = CI->getTailCallKind(); 1200 1201 for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E; 1202 ++BB) { 1203 for (Instruction &I : *BB) { 1204 CallInst *CI = dyn_cast<CallInst>(&I); 1205 if (!CI) 1206 continue; 1207 1208 // We need to reduce the strength of any inlined tail calls. For 1209 // musttail, we have to avoid introducing potential unbounded stack 1210 // growth. For example, if functions 'f' and 'g' are mutually recursive 1211 // with musttail, we can inline 'g' into 'f' so long as we preserve 1212 // musttail on the cloned call to 'f'. If either the inlined call site 1213 // or the cloned call site is *not* musttail, the program already has 1214 // one frame of stack growth, so it's safe to remove musttail. Here is 1215 // a table of example transformations: 1216 // 1217 // f -> musttail g -> musttail f ==> f -> musttail f 1218 // f -> musttail g -> tail f ==> f -> tail f 1219 // f -> g -> musttail f ==> f -> f 1220 // f -> g -> tail f ==> f -> f 1221 CallInst::TailCallKind ChildTCK = CI->getTailCallKind(); 1222 ChildTCK = std::min(CallSiteTailKind, ChildTCK); 1223 CI->setTailCallKind(ChildTCK); 1224 InlinedMustTailCalls |= CI->isMustTailCall(); 1225 1226 // Calls inlined through a 'nounwind' call site should be marked 1227 // 'nounwind'. 1228 if (MarkNoUnwind) 1229 CI->setDoesNotThrow(); 1230 } 1231 } 1232 } 1233 1234 // Leave lifetime markers for the static alloca's, scoping them to the 1235 // function we just inlined. 1236 if (InsertLifetime && !IFI.StaticAllocas.empty()) { 1237 IRBuilder<> builder(&FirstNewBlock->front()); 1238 for (unsigned ai = 0, ae = IFI.StaticAllocas.size(); ai != ae; ++ai) { 1239 AllocaInst *AI = IFI.StaticAllocas[ai]; 1240 1241 // If the alloca is already scoped to something smaller than the whole 1242 // function then there's no need to add redundant, less accurate markers. 1243 if (hasLifetimeMarkers(AI)) 1244 continue; 1245 1246 // Try to determine the size of the allocation. 1247 ConstantInt *AllocaSize = nullptr; 1248 if (ConstantInt *AIArraySize = 1249 dyn_cast<ConstantInt>(AI->getArraySize())) { 1250 auto &DL = Caller->getParent()->getDataLayout(); 1251 Type *AllocaType = AI->getAllocatedType(); 1252 uint64_t AllocaTypeSize = DL.getTypeAllocSize(AllocaType); 1253 uint64_t AllocaArraySize = AIArraySize->getLimitedValue(); 1254 1255 // Don't add markers for zero-sized allocas. 1256 if (AllocaArraySize == 0) 1257 continue; 1258 1259 // Check that array size doesn't saturate uint64_t and doesn't 1260 // overflow when it's multiplied by type size. 1261 if (AllocaArraySize != ~0ULL && 1262 UINT64_MAX / AllocaArraySize >= AllocaTypeSize) { 1263 AllocaSize = ConstantInt::get(Type::getInt64Ty(AI->getContext()), 1264 AllocaArraySize * AllocaTypeSize); 1265 } 1266 } 1267 1268 builder.CreateLifetimeStart(AI, AllocaSize); 1269 for (ReturnInst *RI : Returns) { 1270 // Don't insert llvm.lifetime.end calls between a musttail call and a 1271 // return. The return kills all local allocas. 1272 if (InlinedMustTailCalls && 1273 RI->getParent()->getTerminatingMustTailCall()) 1274 continue; 1275 IRBuilder<>(RI).CreateLifetimeEnd(AI, AllocaSize); 1276 } 1277 } 1278 } 1279 1280 // If the inlined code contained dynamic alloca instructions, wrap the inlined 1281 // code with llvm.stacksave/llvm.stackrestore intrinsics. 1282 if (InlinedFunctionInfo.ContainsDynamicAllocas) { 1283 Module *M = Caller->getParent(); 1284 // Get the two intrinsics we care about. 1285 Function *StackSave = Intrinsic::getDeclaration(M, Intrinsic::stacksave); 1286 Function *StackRestore=Intrinsic::getDeclaration(M,Intrinsic::stackrestore); 1287 1288 // Insert the llvm.stacksave. 1289 CallInst *SavedPtr = IRBuilder<>(&*FirstNewBlock, FirstNewBlock->begin()) 1290 .CreateCall(StackSave, {}, "savedstack"); 1291 1292 // Insert a call to llvm.stackrestore before any return instructions in the 1293 // inlined function. 1294 for (ReturnInst *RI : Returns) { 1295 // Don't insert llvm.stackrestore calls between a musttail call and a 1296 // return. The return will restore the stack pointer. 1297 if (InlinedMustTailCalls && RI->getParent()->getTerminatingMustTailCall()) 1298 continue; 1299 IRBuilder<>(RI).CreateCall(StackRestore, SavedPtr); 1300 } 1301 } 1302 1303 // If we are inlining for an invoke instruction, we must make sure to rewrite 1304 // any call instructions into invoke instructions. 1305 if (auto *II = dyn_cast<InvokeInst>(TheCall)) { 1306 BasicBlock *UnwindDest = II->getUnwindDest(); 1307 Instruction *FirstNonPHI = UnwindDest->getFirstNonPHI(); 1308 if (isa<LandingPadInst>(FirstNonPHI)) { 1309 HandleInlinedLandingPad(II, &*FirstNewBlock, InlinedFunctionInfo); 1310 } else { 1311 HandleInlinedEHPad(II, &*FirstNewBlock, InlinedFunctionInfo); 1312 } 1313 } 1314 1315 // Handle any inlined musttail call sites. In order for a new call site to be 1316 // musttail, the source of the clone and the inlined call site must have been 1317 // musttail. Therefore it's safe to return without merging control into the 1318 // phi below. 1319 if (InlinedMustTailCalls) { 1320 // Check if we need to bitcast the result of any musttail calls. 1321 Type *NewRetTy = Caller->getReturnType(); 1322 bool NeedBitCast = !TheCall->use_empty() && TheCall->getType() != NewRetTy; 1323 1324 // Handle the returns preceded by musttail calls separately. 1325 SmallVector<ReturnInst *, 8> NormalReturns; 1326 for (ReturnInst *RI : Returns) { 1327 CallInst *ReturnedMustTail = 1328 RI->getParent()->getTerminatingMustTailCall(); 1329 if (!ReturnedMustTail) { 1330 NormalReturns.push_back(RI); 1331 continue; 1332 } 1333 if (!NeedBitCast) 1334 continue; 1335 1336 // Delete the old return and any preceding bitcast. 1337 BasicBlock *CurBB = RI->getParent(); 1338 auto *OldCast = dyn_cast_or_null<BitCastInst>(RI->getReturnValue()); 1339 RI->eraseFromParent(); 1340 if (OldCast) 1341 OldCast->eraseFromParent(); 1342 1343 // Insert a new bitcast and return with the right type. 1344 IRBuilder<> Builder(CurBB); 1345 Builder.CreateRet(Builder.CreateBitCast(ReturnedMustTail, NewRetTy)); 1346 } 1347 1348 // Leave behind the normal returns so we can merge control flow. 1349 std::swap(Returns, NormalReturns); 1350 } 1351 1352 // If we cloned in _exactly one_ basic block, and if that block ends in a 1353 // return instruction, we splice the body of the inlined callee directly into 1354 // the calling basic block. 1355 if (Returns.size() == 1 && std::distance(FirstNewBlock, Caller->end()) == 1) { 1356 // Move all of the instructions right before the call. 1357 OrigBB->getInstList().splice(TheCall->getIterator(), 1358 FirstNewBlock->getInstList(), 1359 FirstNewBlock->begin(), FirstNewBlock->end()); 1360 // Remove the cloned basic block. 1361 Caller->getBasicBlockList().pop_back(); 1362 1363 // If the call site was an invoke instruction, add a branch to the normal 1364 // destination. 1365 if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) { 1366 BranchInst *NewBr = BranchInst::Create(II->getNormalDest(), TheCall); 1367 NewBr->setDebugLoc(Returns[0]->getDebugLoc()); 1368 } 1369 1370 // If the return instruction returned a value, replace uses of the call with 1371 // uses of the returned value. 1372 if (!TheCall->use_empty()) { 1373 ReturnInst *R = Returns[0]; 1374 if (TheCall == R->getReturnValue()) 1375 TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType())); 1376 else 1377 TheCall->replaceAllUsesWith(R->getReturnValue()); 1378 } 1379 // Since we are now done with the Call/Invoke, we can delete it. 1380 TheCall->eraseFromParent(); 1381 1382 // Since we are now done with the return instruction, delete it also. 1383 Returns[0]->eraseFromParent(); 1384 1385 // We are now done with the inlining. 1386 return true; 1387 } 1388 1389 // Otherwise, we have the normal case, of more than one block to inline or 1390 // multiple return sites. 1391 1392 // We want to clone the entire callee function into the hole between the 1393 // "starter" and "ender" blocks. How we accomplish this depends on whether 1394 // this is an invoke instruction or a call instruction. 1395 BasicBlock *AfterCallBB; 1396 BranchInst *CreatedBranchToNormalDest = nullptr; 1397 if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) { 1398 1399 // Add an unconditional branch to make this look like the CallInst case... 1400 CreatedBranchToNormalDest = BranchInst::Create(II->getNormalDest(), TheCall); 1401 1402 // Split the basic block. This guarantees that no PHI nodes will have to be 1403 // updated due to new incoming edges, and make the invoke case more 1404 // symmetric to the call case. 1405 AfterCallBB = 1406 OrigBB->splitBasicBlock(CreatedBranchToNormalDest->getIterator(), 1407 CalledFunc->getName() + ".exit"); 1408 1409 } else { // It's a call 1410 // If this is a call instruction, we need to split the basic block that 1411 // the call lives in. 1412 // 1413 AfterCallBB = OrigBB->splitBasicBlock(TheCall->getIterator(), 1414 CalledFunc->getName() + ".exit"); 1415 } 1416 1417 // Change the branch that used to go to AfterCallBB to branch to the first 1418 // basic block of the inlined function. 1419 // 1420 TerminatorInst *Br = OrigBB->getTerminator(); 1421 assert(Br && Br->getOpcode() == Instruction::Br && 1422 "splitBasicBlock broken!"); 1423 Br->setOperand(0, &*FirstNewBlock); 1424 1425 // Now that the function is correct, make it a little bit nicer. In 1426 // particular, move the basic blocks inserted from the end of the function 1427 // into the space made by splitting the source basic block. 1428 Caller->getBasicBlockList().splice(AfterCallBB->getIterator(), 1429 Caller->getBasicBlockList(), FirstNewBlock, 1430 Caller->end()); 1431 1432 // Handle all of the return instructions that we just cloned in, and eliminate 1433 // any users of the original call/invoke instruction. 1434 Type *RTy = CalledFunc->getReturnType(); 1435 1436 PHINode *PHI = nullptr; 1437 if (Returns.size() > 1) { 1438 // The PHI node should go at the front of the new basic block to merge all 1439 // possible incoming values. 1440 if (!TheCall->use_empty()) { 1441 PHI = PHINode::Create(RTy, Returns.size(), TheCall->getName(), 1442 &AfterCallBB->front()); 1443 // Anything that used the result of the function call should now use the 1444 // PHI node as their operand. 1445 TheCall->replaceAllUsesWith(PHI); 1446 } 1447 1448 // Loop over all of the return instructions adding entries to the PHI node 1449 // as appropriate. 1450 if (PHI) { 1451 for (unsigned i = 0, e = Returns.size(); i != e; ++i) { 1452 ReturnInst *RI = Returns[i]; 1453 assert(RI->getReturnValue()->getType() == PHI->getType() && 1454 "Ret value not consistent in function!"); 1455 PHI->addIncoming(RI->getReturnValue(), RI->getParent()); 1456 } 1457 } 1458 1459 // Add a branch to the merge points and remove return instructions. 1460 DebugLoc Loc; 1461 for (unsigned i = 0, e = Returns.size(); i != e; ++i) { 1462 ReturnInst *RI = Returns[i]; 1463 BranchInst* BI = BranchInst::Create(AfterCallBB, RI); 1464 Loc = RI->getDebugLoc(); 1465 BI->setDebugLoc(Loc); 1466 RI->eraseFromParent(); 1467 } 1468 // We need to set the debug location to *somewhere* inside the 1469 // inlined function. The line number may be nonsensical, but the 1470 // instruction will at least be associated with the right 1471 // function. 1472 if (CreatedBranchToNormalDest) 1473 CreatedBranchToNormalDest->setDebugLoc(Loc); 1474 } else if (!Returns.empty()) { 1475 // Otherwise, if there is exactly one return value, just replace anything 1476 // using the return value of the call with the computed value. 1477 if (!TheCall->use_empty()) { 1478 if (TheCall == Returns[0]->getReturnValue()) 1479 TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType())); 1480 else 1481 TheCall->replaceAllUsesWith(Returns[0]->getReturnValue()); 1482 } 1483 1484 // Update PHI nodes that use the ReturnBB to use the AfterCallBB. 1485 BasicBlock *ReturnBB = Returns[0]->getParent(); 1486 ReturnBB->replaceAllUsesWith(AfterCallBB); 1487 1488 // Splice the code from the return block into the block that it will return 1489 // to, which contains the code that was after the call. 1490 AfterCallBB->getInstList().splice(AfterCallBB->begin(), 1491 ReturnBB->getInstList()); 1492 1493 if (CreatedBranchToNormalDest) 1494 CreatedBranchToNormalDest->setDebugLoc(Returns[0]->getDebugLoc()); 1495 1496 // Delete the return instruction now and empty ReturnBB now. 1497 Returns[0]->eraseFromParent(); 1498 ReturnBB->eraseFromParent(); 1499 } else if (!TheCall->use_empty()) { 1500 // No returns, but something is using the return value of the call. Just 1501 // nuke the result. 1502 TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType())); 1503 } 1504 1505 // Since we are now done with the Call/Invoke, we can delete it. 1506 TheCall->eraseFromParent(); 1507 1508 // If we inlined any musttail calls and the original return is now 1509 // unreachable, delete it. It can only contain a bitcast and ret. 1510 if (InlinedMustTailCalls && pred_begin(AfterCallBB) == pred_end(AfterCallBB)) 1511 AfterCallBB->eraseFromParent(); 1512 1513 // We should always be able to fold the entry block of the function into the 1514 // single predecessor of the block... 1515 assert(cast<BranchInst>(Br)->isUnconditional() && "splitBasicBlock broken!"); 1516 BasicBlock *CalleeEntry = cast<BranchInst>(Br)->getSuccessor(0); 1517 1518 // Splice the code entry block into calling block, right before the 1519 // unconditional branch. 1520 CalleeEntry->replaceAllUsesWith(OrigBB); // Update PHI nodes 1521 OrigBB->getInstList().splice(Br->getIterator(), CalleeEntry->getInstList()); 1522 1523 // Remove the unconditional branch. 1524 OrigBB->getInstList().erase(Br); 1525 1526 // Now we can remove the CalleeEntry block, which is now empty. 1527 Caller->getBasicBlockList().erase(CalleeEntry); 1528 1529 // If we inserted a phi node, check to see if it has a single value (e.g. all 1530 // the entries are the same or undef). If so, remove the PHI so it doesn't 1531 // block other optimizations. 1532 if (PHI) { 1533 auto &DL = Caller->getParent()->getDataLayout(); 1534 if (Value *V = SimplifyInstruction(PHI, DL, nullptr, nullptr, 1535 &IFI.ACT->getAssumptionCache(*Caller))) { 1536 PHI->replaceAllUsesWith(V); 1537 PHI->eraseFromParent(); 1538 } 1539 } 1540 1541 return true; 1542 } 1543