1 //===- CoroSplit.cpp - Converts a coroutine into a state machine ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // This pass builds the coroutine frame and outlines resume and destroy parts 9 // of the coroutine into separate functions. 10 // 11 // We present a coroutine to an LLVM as an ordinary function with suspension 12 // points marked up with intrinsics. We let the optimizer party on the coroutine 13 // as a single function for as long as possible. Shortly before the coroutine is 14 // eligible to be inlined into its callers, we split up the coroutine into parts 15 // corresponding to an initial, resume and destroy invocations of the coroutine, 16 // add them to the current SCC and restart the IPO pipeline to optimize the 17 // coroutine subfunctions we extracted before proceeding to the caller of the 18 // coroutine. 19 //===----------------------------------------------------------------------===// 20 21 #include "llvm/Transforms/Coroutines/CoroSplit.h" 22 #include "CoroInstr.h" 23 #include "CoroInternal.h" 24 #include "llvm/ADT/DenseMap.h" 25 #include "llvm/ADT/PriorityWorklist.h" 26 #include "llvm/ADT/SmallPtrSet.h" 27 #include "llvm/ADT/SmallVector.h" 28 #include "llvm/ADT/StringRef.h" 29 #include "llvm/ADT/Twine.h" 30 #include "llvm/Analysis/CFG.h" 31 #include "llvm/Analysis/CallGraph.h" 32 #include "llvm/Analysis/ConstantFolding.h" 33 #include "llvm/Analysis/LazyCallGraph.h" 34 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 35 #include "llvm/Analysis/TargetTransformInfo.h" 36 #include "llvm/BinaryFormat/Dwarf.h" 37 #include "llvm/IR/Argument.h" 38 #include "llvm/IR/Attributes.h" 39 #include "llvm/IR/BasicBlock.h" 40 #include "llvm/IR/CFG.h" 41 #include "llvm/IR/CallingConv.h" 42 #include "llvm/IR/Constants.h" 43 #include "llvm/IR/DataLayout.h" 44 #include "llvm/IR/DerivedTypes.h" 45 #include "llvm/IR/Dominators.h" 46 #include "llvm/IR/Function.h" 47 #include "llvm/IR/GlobalValue.h" 48 #include "llvm/IR/GlobalVariable.h" 49 #include "llvm/IR/IRBuilder.h" 50 #include "llvm/IR/InstIterator.h" 51 #include "llvm/IR/InstrTypes.h" 52 #include "llvm/IR/Instruction.h" 53 #include "llvm/IR/Instructions.h" 54 #include "llvm/IR/IntrinsicInst.h" 55 #include "llvm/IR/LLVMContext.h" 56 #include "llvm/IR/Module.h" 57 #include "llvm/IR/Type.h" 58 #include "llvm/IR/Value.h" 59 #include "llvm/IR/Verifier.h" 60 #include "llvm/Support/Casting.h" 61 #include "llvm/Support/Debug.h" 62 #include "llvm/Support/PrettyStackTrace.h" 63 #include "llvm/Support/raw_ostream.h" 64 #include "llvm/Transforms/Scalar.h" 65 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 66 #include "llvm/Transforms/Utils/CallGraphUpdater.h" 67 #include "llvm/Transforms/Utils/Cloning.h" 68 #include "llvm/Transforms/Utils/Local.h" 69 #include "llvm/Transforms/Utils/ValueMapper.h" 70 #include <cassert> 71 #include <cstddef> 72 #include <cstdint> 73 #include <initializer_list> 74 #include <iterator> 75 76 using namespace llvm; 77 78 #define DEBUG_TYPE "coro-split" 79 80 namespace { 81 82 /// A little helper class for building 83 class CoroCloner { 84 public: 85 enum class Kind { 86 /// The shared resume function for a switch lowering. 87 SwitchResume, 88 89 /// The shared unwind function for a switch lowering. 90 SwitchUnwind, 91 92 /// The shared cleanup function for a switch lowering. 93 SwitchCleanup, 94 95 /// An individual continuation function. 96 Continuation, 97 98 /// An async resume function. 99 Async, 100 }; 101 102 private: 103 Function &OrigF; 104 Function *NewF; 105 const Twine &Suffix; 106 coro::Shape &Shape; 107 Kind FKind; 108 ValueToValueMapTy VMap; 109 IRBuilder<> Builder; 110 Value *NewFramePtr = nullptr; 111 112 /// The active suspend instruction; meaningful only for continuation and async 113 /// ABIs. 114 AnyCoroSuspendInst *ActiveSuspend = nullptr; 115 116 TargetTransformInfo &TTI; 117 118 public: 119 /// Create a cloner for a switch lowering. 120 CoroCloner(Function &OrigF, const Twine &Suffix, coro::Shape &Shape, 121 Kind FKind, TargetTransformInfo &TTI) 122 : OrigF(OrigF), NewF(nullptr), Suffix(Suffix), Shape(Shape), FKind(FKind), 123 Builder(OrigF.getContext()), TTI(TTI) { 124 assert(Shape.ABI == coro::ABI::Switch); 125 } 126 127 /// Create a cloner for a continuation lowering. 128 CoroCloner(Function &OrigF, const Twine &Suffix, coro::Shape &Shape, 129 Function *NewF, AnyCoroSuspendInst *ActiveSuspend, 130 TargetTransformInfo &TTI) 131 : OrigF(OrigF), NewF(NewF), Suffix(Suffix), Shape(Shape), 132 FKind(Shape.ABI == coro::ABI::Async ? Kind::Async : Kind::Continuation), 133 Builder(OrigF.getContext()), ActiveSuspend(ActiveSuspend), TTI(TTI) { 134 assert(Shape.ABI == coro::ABI::Retcon || 135 Shape.ABI == coro::ABI::RetconOnce || Shape.ABI == coro::ABI::Async); 136 assert(NewF && "need existing function for continuation"); 137 assert(ActiveSuspend && "need active suspend point for continuation"); 138 } 139 140 Function *getFunction() const { 141 assert(NewF != nullptr && "declaration not yet set"); 142 return NewF; 143 } 144 145 void create(); 146 147 private: 148 bool isSwitchDestroyFunction() { 149 switch (FKind) { 150 case Kind::Async: 151 case Kind::Continuation: 152 case Kind::SwitchResume: 153 return false; 154 case Kind::SwitchUnwind: 155 case Kind::SwitchCleanup: 156 return true; 157 } 158 llvm_unreachable("Unknown CoroCloner::Kind enum"); 159 } 160 161 void replaceEntryBlock(); 162 Value *deriveNewFramePointer(); 163 void replaceRetconOrAsyncSuspendUses(); 164 void replaceCoroSuspends(); 165 void replaceCoroEnds(); 166 void replaceSwiftErrorOps(); 167 void salvageDebugInfo(); 168 void handleFinalSuspend(); 169 }; 170 171 } // end anonymous namespace 172 173 // FIXME: 174 // Lower the intrinisc in CoroEarly phase if coroutine frame doesn't escape 175 // and it is known that other transformations, for example, sanitizers 176 // won't lead to incorrect code. 177 static void lowerAwaitSuspend(IRBuilder<> &Builder, CoroAwaitSuspendInst *CB, 178 coro::Shape &Shape) { 179 auto Wrapper = CB->getWrapperFunction(); 180 auto Awaiter = CB->getAwaiter(); 181 auto FramePtr = CB->getFrame(); 182 183 Builder.SetInsertPoint(CB); 184 185 CallBase *NewCall = nullptr; 186 // await_suspend has only 2 parameters, awaiter and handle. 187 // Copy parameter attributes from the intrinsic call, but remove the last, 188 // because the last parameter now becomes the function that is being called. 189 AttributeList NewAttributes = 190 CB->getAttributes().removeParamAttributes(CB->getContext(), 2); 191 192 if (auto Invoke = dyn_cast<InvokeInst>(CB)) { 193 auto WrapperInvoke = 194 Builder.CreateInvoke(Wrapper, Invoke->getNormalDest(), 195 Invoke->getUnwindDest(), {Awaiter, FramePtr}); 196 197 WrapperInvoke->setCallingConv(Invoke->getCallingConv()); 198 std::copy(Invoke->bundle_op_info_begin(), Invoke->bundle_op_info_end(), 199 WrapperInvoke->bundle_op_info_begin()); 200 WrapperInvoke->setAttributes(NewAttributes); 201 WrapperInvoke->setDebugLoc(Invoke->getDebugLoc()); 202 NewCall = WrapperInvoke; 203 } else if (auto Call = dyn_cast<CallInst>(CB)) { 204 auto WrapperCall = Builder.CreateCall(Wrapper, {Awaiter, FramePtr}); 205 206 WrapperCall->setAttributes(NewAttributes); 207 WrapperCall->setDebugLoc(Call->getDebugLoc()); 208 NewCall = WrapperCall; 209 } else { 210 llvm_unreachable("Unexpected coro_await_suspend invocation method"); 211 } 212 213 if (CB->getCalledFunction()->getIntrinsicID() == 214 Intrinsic::coro_await_suspend_handle) { 215 // Follow the lowered await_suspend call above with a lowered resume call 216 // to the returned coroutine. 217 if (auto *Invoke = dyn_cast<InvokeInst>(CB)) { 218 // If the await_suspend call is an invoke, we continue in the next block. 219 Builder.SetInsertPoint(Invoke->getNormalDest()->getFirstInsertionPt()); 220 } 221 222 coro::LowererBase LB(*Wrapper->getParent()); 223 auto *ResumeAddr = LB.makeSubFnCall(NewCall, CoroSubFnInst::ResumeIndex, 224 &*Builder.GetInsertPoint()); 225 226 LLVMContext &Ctx = Builder.getContext(); 227 FunctionType *ResumeTy = FunctionType::get( 228 Type::getVoidTy(Ctx), PointerType::getUnqual(Ctx), false); 229 auto *ResumeCall = Builder.CreateCall(ResumeTy, ResumeAddr, {NewCall}); 230 ResumeCall->setCallingConv(CallingConv::Fast); 231 232 // We can't insert the 'ret' instruction and adjust the cc until the 233 // function has been split, so remember this for later. 234 Shape.SymmetricTransfers.push_back(ResumeCall); 235 236 NewCall = ResumeCall; 237 } 238 239 CB->replaceAllUsesWith(NewCall); 240 CB->eraseFromParent(); 241 } 242 243 static void lowerAwaitSuspends(Function &F, coro::Shape &Shape) { 244 IRBuilder<> Builder(F.getContext()); 245 for (auto *AWS : Shape.CoroAwaitSuspends) 246 lowerAwaitSuspend(Builder, AWS, Shape); 247 } 248 249 static void maybeFreeRetconStorage(IRBuilder<> &Builder, 250 const coro::Shape &Shape, Value *FramePtr, 251 CallGraph *CG) { 252 assert(Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce); 253 if (Shape.RetconLowering.IsFrameInlineInStorage) 254 return; 255 256 Shape.emitDealloc(Builder, FramePtr, CG); 257 } 258 259 /// Replace an llvm.coro.end.async. 260 /// Will inline the must tail call function call if there is one. 261 /// \returns true if cleanup of the coro.end block is needed, false otherwise. 262 static bool replaceCoroEndAsync(AnyCoroEndInst *End) { 263 IRBuilder<> Builder(End); 264 265 auto *EndAsync = dyn_cast<CoroAsyncEndInst>(End); 266 if (!EndAsync) { 267 Builder.CreateRetVoid(); 268 return true /*needs cleanup of coro.end block*/; 269 } 270 271 auto *MustTailCallFunc = EndAsync->getMustTailCallFunction(); 272 if (!MustTailCallFunc) { 273 Builder.CreateRetVoid(); 274 return true /*needs cleanup of coro.end block*/; 275 } 276 277 // Move the must tail call from the predecessor block into the end block. 278 auto *CoroEndBlock = End->getParent(); 279 auto *MustTailCallFuncBlock = CoroEndBlock->getSinglePredecessor(); 280 assert(MustTailCallFuncBlock && "Must have a single predecessor block"); 281 auto It = MustTailCallFuncBlock->getTerminator()->getIterator(); 282 auto *MustTailCall = cast<CallInst>(&*std::prev(It)); 283 CoroEndBlock->splice(End->getIterator(), MustTailCallFuncBlock, 284 MustTailCall->getIterator()); 285 286 // Insert the return instruction. 287 Builder.SetInsertPoint(End); 288 Builder.CreateRetVoid(); 289 InlineFunctionInfo FnInfo; 290 291 // Remove the rest of the block, by splitting it into an unreachable block. 292 auto *BB = End->getParent(); 293 BB->splitBasicBlock(End); 294 BB->getTerminator()->eraseFromParent(); 295 296 auto InlineRes = InlineFunction(*MustTailCall, FnInfo); 297 assert(InlineRes.isSuccess() && "Expected inlining to succeed"); 298 (void)InlineRes; 299 300 // We have cleaned up the coro.end block above. 301 return false; 302 } 303 304 /// Replace a non-unwind call to llvm.coro.end. 305 static void replaceFallthroughCoroEnd(AnyCoroEndInst *End, 306 const coro::Shape &Shape, Value *FramePtr, 307 bool InResume, CallGraph *CG) { 308 // Start inserting right before the coro.end. 309 IRBuilder<> Builder(End); 310 311 // Create the return instruction. 312 switch (Shape.ABI) { 313 // The cloned functions in switch-lowering always return void. 314 case coro::ABI::Switch: 315 assert(!cast<CoroEndInst>(End)->hasResults() && 316 "switch coroutine should not return any values"); 317 // coro.end doesn't immediately end the coroutine in the main function 318 // in this lowering, because we need to deallocate the coroutine. 319 if (!InResume) 320 return; 321 Builder.CreateRetVoid(); 322 break; 323 324 // In async lowering this returns. 325 case coro::ABI::Async: { 326 bool CoroEndBlockNeedsCleanup = replaceCoroEndAsync(End); 327 if (!CoroEndBlockNeedsCleanup) 328 return; 329 break; 330 } 331 332 // In unique continuation lowering, the continuations always return void. 333 // But we may have implicitly allocated storage. 334 case coro::ABI::RetconOnce: { 335 maybeFreeRetconStorage(Builder, Shape, FramePtr, CG); 336 auto *CoroEnd = cast<CoroEndInst>(End); 337 auto *RetTy = Shape.getResumeFunctionType()->getReturnType(); 338 339 if (!CoroEnd->hasResults()) { 340 assert(RetTy->isVoidTy()); 341 Builder.CreateRetVoid(); 342 break; 343 } 344 345 auto *CoroResults = CoroEnd->getResults(); 346 unsigned NumReturns = CoroResults->numReturns(); 347 348 if (auto *RetStructTy = dyn_cast<StructType>(RetTy)) { 349 assert(RetStructTy->getNumElements() == NumReturns && 350 "numbers of returns should match resume function singature"); 351 Value *ReturnValue = UndefValue::get(RetStructTy); 352 unsigned Idx = 0; 353 for (Value *RetValEl : CoroResults->return_values()) 354 ReturnValue = Builder.CreateInsertValue(ReturnValue, RetValEl, Idx++); 355 Builder.CreateRet(ReturnValue); 356 } else if (NumReturns == 0) { 357 assert(RetTy->isVoidTy()); 358 Builder.CreateRetVoid(); 359 } else { 360 assert(NumReturns == 1); 361 Builder.CreateRet(*CoroResults->retval_begin()); 362 } 363 CoroResults->replaceAllUsesWith( 364 ConstantTokenNone::get(CoroResults->getContext())); 365 CoroResults->eraseFromParent(); 366 break; 367 } 368 369 // In non-unique continuation lowering, we signal completion by returning 370 // a null continuation. 371 case coro::ABI::Retcon: { 372 assert(!cast<CoroEndInst>(End)->hasResults() && 373 "retcon coroutine should not return any values"); 374 maybeFreeRetconStorage(Builder, Shape, FramePtr, CG); 375 auto RetTy = Shape.getResumeFunctionType()->getReturnType(); 376 auto RetStructTy = dyn_cast<StructType>(RetTy); 377 PointerType *ContinuationTy = 378 cast<PointerType>(RetStructTy ? RetStructTy->getElementType(0) : RetTy); 379 380 Value *ReturnValue = ConstantPointerNull::get(ContinuationTy); 381 if (RetStructTy) { 382 ReturnValue = Builder.CreateInsertValue(UndefValue::get(RetStructTy), 383 ReturnValue, 0); 384 } 385 Builder.CreateRet(ReturnValue); 386 break; 387 } 388 } 389 390 // Remove the rest of the block, by splitting it into an unreachable block. 391 auto *BB = End->getParent(); 392 BB->splitBasicBlock(End); 393 BB->getTerminator()->eraseFromParent(); 394 } 395 396 // Mark a coroutine as done, which implies that the coroutine is finished and 397 // never get resumed. 398 // 399 // In resume-switched ABI, the done state is represented by storing zero in 400 // ResumeFnAddr. 401 // 402 // NOTE: We couldn't omit the argument `FramePtr`. It is necessary because the 403 // pointer to the frame in splitted function is not stored in `Shape`. 404 static void markCoroutineAsDone(IRBuilder<> &Builder, const coro::Shape &Shape, 405 Value *FramePtr) { 406 assert( 407 Shape.ABI == coro::ABI::Switch && 408 "markCoroutineAsDone is only supported for Switch-Resumed ABI for now."); 409 auto *GepIndex = Builder.CreateStructGEP( 410 Shape.FrameTy, FramePtr, coro::Shape::SwitchFieldIndex::Resume, 411 "ResumeFn.addr"); 412 auto *NullPtr = ConstantPointerNull::get(cast<PointerType>( 413 Shape.FrameTy->getTypeAtIndex(coro::Shape::SwitchFieldIndex::Resume))); 414 Builder.CreateStore(NullPtr, GepIndex); 415 416 // If the coroutine don't have unwind coro end, we could omit the store to 417 // the final suspend point since we could infer the coroutine is suspended 418 // at the final suspend point by the nullness of ResumeFnAddr. 419 // However, we can't skip it if the coroutine have unwind coro end. Since 420 // the coroutine reaches unwind coro end is considered suspended at the 421 // final suspend point (the ResumeFnAddr is null) but in fact the coroutine 422 // didn't complete yet. We need the IndexVal for the final suspend point 423 // to make the states clear. 424 if (Shape.SwitchLowering.HasUnwindCoroEnd && 425 Shape.SwitchLowering.HasFinalSuspend) { 426 assert(cast<CoroSuspendInst>(Shape.CoroSuspends.back())->isFinal() && 427 "The final suspend should only live in the last position of " 428 "CoroSuspends."); 429 ConstantInt *IndexVal = Shape.getIndex(Shape.CoroSuspends.size() - 1); 430 auto *FinalIndex = Builder.CreateStructGEP( 431 Shape.FrameTy, FramePtr, Shape.getSwitchIndexField(), "index.addr"); 432 433 Builder.CreateStore(IndexVal, FinalIndex); 434 } 435 } 436 437 /// Replace an unwind call to llvm.coro.end. 438 static void replaceUnwindCoroEnd(AnyCoroEndInst *End, const coro::Shape &Shape, 439 Value *FramePtr, bool InResume, 440 CallGraph *CG) { 441 IRBuilder<> Builder(End); 442 443 switch (Shape.ABI) { 444 // In switch-lowering, this does nothing in the main function. 445 case coro::ABI::Switch: { 446 // In C++'s specification, the coroutine should be marked as done 447 // if promise.unhandled_exception() throws. The frontend will 448 // call coro.end(true) along this path. 449 // 450 // FIXME: We should refactor this once there is other language 451 // which uses Switch-Resumed style other than C++. 452 markCoroutineAsDone(Builder, Shape, FramePtr); 453 if (!InResume) 454 return; 455 break; 456 } 457 // In async lowering this does nothing. 458 case coro::ABI::Async: 459 break; 460 // In continuation-lowering, this frees the continuation storage. 461 case coro::ABI::Retcon: 462 case coro::ABI::RetconOnce: 463 maybeFreeRetconStorage(Builder, Shape, FramePtr, CG); 464 break; 465 } 466 467 // If coro.end has an associated bundle, add cleanupret instruction. 468 if (auto Bundle = End->getOperandBundle(LLVMContext::OB_funclet)) { 469 auto *FromPad = cast<CleanupPadInst>(Bundle->Inputs[0]); 470 auto *CleanupRet = Builder.CreateCleanupRet(FromPad, nullptr); 471 End->getParent()->splitBasicBlock(End); 472 CleanupRet->getParent()->getTerminator()->eraseFromParent(); 473 } 474 } 475 476 static void replaceCoroEnd(AnyCoroEndInst *End, const coro::Shape &Shape, 477 Value *FramePtr, bool InResume, CallGraph *CG) { 478 if (End->isUnwind()) 479 replaceUnwindCoroEnd(End, Shape, FramePtr, InResume, CG); 480 else 481 replaceFallthroughCoroEnd(End, Shape, FramePtr, InResume, CG); 482 483 auto &Context = End->getContext(); 484 End->replaceAllUsesWith(InResume ? ConstantInt::getTrue(Context) 485 : ConstantInt::getFalse(Context)); 486 End->eraseFromParent(); 487 } 488 489 // In the resume function, we remove the last case (when coro::Shape is built, 490 // the final suspend point (if present) is always the last element of 491 // CoroSuspends array) since it is an undefined behavior to resume a coroutine 492 // suspended at the final suspend point. 493 // In the destroy function, if it isn't possible that the ResumeFnAddr is NULL 494 // and the coroutine doesn't suspend at the final suspend point actually (this 495 // is possible since the coroutine is considered suspended at the final suspend 496 // point if promise.unhandled_exception() exits via an exception), we can 497 // remove the last case. 498 void CoroCloner::handleFinalSuspend() { 499 assert(Shape.ABI == coro::ABI::Switch && 500 Shape.SwitchLowering.HasFinalSuspend); 501 502 if (isSwitchDestroyFunction() && Shape.SwitchLowering.HasUnwindCoroEnd) 503 return; 504 505 auto *Switch = cast<SwitchInst>(VMap[Shape.SwitchLowering.ResumeSwitch]); 506 auto FinalCaseIt = std::prev(Switch->case_end()); 507 BasicBlock *ResumeBB = FinalCaseIt->getCaseSuccessor(); 508 Switch->removeCase(FinalCaseIt); 509 if (isSwitchDestroyFunction()) { 510 BasicBlock *OldSwitchBB = Switch->getParent(); 511 auto *NewSwitchBB = OldSwitchBB->splitBasicBlock(Switch, "Switch"); 512 Builder.SetInsertPoint(OldSwitchBB->getTerminator()); 513 514 if (NewF->isCoroOnlyDestroyWhenComplete()) { 515 // When the coroutine can only be destroyed when complete, we don't need 516 // to generate code for other cases. 517 Builder.CreateBr(ResumeBB); 518 } else { 519 auto *GepIndex = Builder.CreateStructGEP( 520 Shape.FrameTy, NewFramePtr, coro::Shape::SwitchFieldIndex::Resume, 521 "ResumeFn.addr"); 522 auto *Load = 523 Builder.CreateLoad(Shape.getSwitchResumePointerType(), GepIndex); 524 auto *Cond = Builder.CreateIsNull(Load); 525 Builder.CreateCondBr(Cond, ResumeBB, NewSwitchBB); 526 } 527 OldSwitchBB->getTerminator()->eraseFromParent(); 528 } 529 } 530 531 static FunctionType * 532 getFunctionTypeFromAsyncSuspend(AnyCoroSuspendInst *Suspend) { 533 auto *AsyncSuspend = cast<CoroSuspendAsyncInst>(Suspend); 534 auto *StructTy = cast<StructType>(AsyncSuspend->getType()); 535 auto &Context = Suspend->getParent()->getParent()->getContext(); 536 auto *VoidTy = Type::getVoidTy(Context); 537 return FunctionType::get(VoidTy, StructTy->elements(), false); 538 } 539 540 static Function *createCloneDeclaration(Function &OrigF, coro::Shape &Shape, 541 const Twine &Suffix, 542 Module::iterator InsertBefore, 543 AnyCoroSuspendInst *ActiveSuspend) { 544 Module *M = OrigF.getParent(); 545 auto *FnTy = (Shape.ABI != coro::ABI::Async) 546 ? Shape.getResumeFunctionType() 547 : getFunctionTypeFromAsyncSuspend(ActiveSuspend); 548 549 Function *NewF = 550 Function::Create(FnTy, GlobalValue::LinkageTypes::InternalLinkage, 551 OrigF.getName() + Suffix); 552 553 M->getFunctionList().insert(InsertBefore, NewF); 554 555 return NewF; 556 } 557 558 /// Replace uses of the active llvm.coro.suspend.retcon/async call with the 559 /// arguments to the continuation function. 560 /// 561 /// This assumes that the builder has a meaningful insertion point. 562 void CoroCloner::replaceRetconOrAsyncSuspendUses() { 563 assert(Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce || 564 Shape.ABI == coro::ABI::Async); 565 566 auto NewS = VMap[ActiveSuspend]; 567 if (NewS->use_empty()) 568 return; 569 570 // Copy out all the continuation arguments after the buffer pointer into 571 // an easily-indexed data structure for convenience. 572 SmallVector<Value *, 8> Args; 573 // The async ABI includes all arguments -- including the first argument. 574 bool IsAsyncABI = Shape.ABI == coro::ABI::Async; 575 for (auto I = IsAsyncABI ? NewF->arg_begin() : std::next(NewF->arg_begin()), 576 E = NewF->arg_end(); 577 I != E; ++I) 578 Args.push_back(&*I); 579 580 // If the suspend returns a single scalar value, we can just do a simple 581 // replacement. 582 if (!isa<StructType>(NewS->getType())) { 583 assert(Args.size() == 1); 584 NewS->replaceAllUsesWith(Args.front()); 585 return; 586 } 587 588 // Try to peephole extracts of an aggregate return. 589 for (Use &U : llvm::make_early_inc_range(NewS->uses())) { 590 auto *EVI = dyn_cast<ExtractValueInst>(U.getUser()); 591 if (!EVI || EVI->getNumIndices() != 1) 592 continue; 593 594 EVI->replaceAllUsesWith(Args[EVI->getIndices().front()]); 595 EVI->eraseFromParent(); 596 } 597 598 // If we have no remaining uses, we're done. 599 if (NewS->use_empty()) 600 return; 601 602 // Otherwise, we need to create an aggregate. 603 Value *Agg = PoisonValue::get(NewS->getType()); 604 for (size_t I = 0, E = Args.size(); I != E; ++I) 605 Agg = Builder.CreateInsertValue(Agg, Args[I], I); 606 607 NewS->replaceAllUsesWith(Agg); 608 } 609 610 void CoroCloner::replaceCoroSuspends() { 611 Value *SuspendResult; 612 613 switch (Shape.ABI) { 614 // In switch lowering, replace coro.suspend with the appropriate value 615 // for the type of function we're extracting. 616 // Replacing coro.suspend with (0) will result in control flow proceeding to 617 // a resume label associated with a suspend point, replacing it with (1) will 618 // result in control flow proceeding to a cleanup label associated with this 619 // suspend point. 620 case coro::ABI::Switch: 621 SuspendResult = Builder.getInt8(isSwitchDestroyFunction() ? 1 : 0); 622 break; 623 624 // In async lowering there are no uses of the result. 625 case coro::ABI::Async: 626 return; 627 628 // In returned-continuation lowering, the arguments from earlier 629 // continuations are theoretically arbitrary, and they should have been 630 // spilled. 631 case coro::ABI::RetconOnce: 632 case coro::ABI::Retcon: 633 return; 634 } 635 636 for (AnyCoroSuspendInst *CS : Shape.CoroSuspends) { 637 // The active suspend was handled earlier. 638 if (CS == ActiveSuspend) 639 continue; 640 641 auto *MappedCS = cast<AnyCoroSuspendInst>(VMap[CS]); 642 MappedCS->replaceAllUsesWith(SuspendResult); 643 MappedCS->eraseFromParent(); 644 } 645 } 646 647 void CoroCloner::replaceCoroEnds() { 648 for (AnyCoroEndInst *CE : Shape.CoroEnds) { 649 // We use a null call graph because there's no call graph node for 650 // the cloned function yet. We'll just be rebuilding that later. 651 auto *NewCE = cast<AnyCoroEndInst>(VMap[CE]); 652 replaceCoroEnd(NewCE, Shape, NewFramePtr, /*in resume*/ true, nullptr); 653 } 654 } 655 656 static void replaceSwiftErrorOps(Function &F, coro::Shape &Shape, 657 ValueToValueMapTy *VMap) { 658 if (Shape.ABI == coro::ABI::Async && Shape.CoroSuspends.empty()) 659 return; 660 Value *CachedSlot = nullptr; 661 auto getSwiftErrorSlot = [&](Type *ValueTy) -> Value * { 662 if (CachedSlot) 663 return CachedSlot; 664 665 // Check if the function has a swifterror argument. 666 for (auto &Arg : F.args()) { 667 if (Arg.isSwiftError()) { 668 CachedSlot = &Arg; 669 return &Arg; 670 } 671 } 672 673 // Create a swifterror alloca. 674 IRBuilder<> Builder(F.getEntryBlock().getFirstNonPHIOrDbg()); 675 auto Alloca = Builder.CreateAlloca(ValueTy); 676 Alloca->setSwiftError(true); 677 678 CachedSlot = Alloca; 679 return Alloca; 680 }; 681 682 for (CallInst *Op : Shape.SwiftErrorOps) { 683 auto MappedOp = VMap ? cast<CallInst>((*VMap)[Op]) : Op; 684 IRBuilder<> Builder(MappedOp); 685 686 // If there are no arguments, this is a 'get' operation. 687 Value *MappedResult; 688 if (Op->arg_empty()) { 689 auto ValueTy = Op->getType(); 690 auto Slot = getSwiftErrorSlot(ValueTy); 691 MappedResult = Builder.CreateLoad(ValueTy, Slot); 692 } else { 693 assert(Op->arg_size() == 1); 694 auto Value = MappedOp->getArgOperand(0); 695 auto ValueTy = Value->getType(); 696 auto Slot = getSwiftErrorSlot(ValueTy); 697 Builder.CreateStore(Value, Slot); 698 MappedResult = Slot; 699 } 700 701 MappedOp->replaceAllUsesWith(MappedResult); 702 MappedOp->eraseFromParent(); 703 } 704 705 // If we're updating the original function, we've invalidated SwiftErrorOps. 706 if (VMap == nullptr) { 707 Shape.SwiftErrorOps.clear(); 708 } 709 } 710 711 /// Returns all DbgVariableIntrinsic in F. 712 static std::pair<SmallVector<DbgVariableIntrinsic *, 8>, 713 SmallVector<DbgVariableRecord *>> 714 collectDbgVariableIntrinsics(Function &F) { 715 SmallVector<DbgVariableIntrinsic *, 8> Intrinsics; 716 SmallVector<DbgVariableRecord *> DbgVariableRecords; 717 for (auto &I : instructions(F)) { 718 for (DbgVariableRecord &DVR : filterDbgVars(I.getDbgRecordRange())) 719 DbgVariableRecords.push_back(&DVR); 720 if (auto *DVI = dyn_cast<DbgVariableIntrinsic>(&I)) 721 Intrinsics.push_back(DVI); 722 } 723 return {Intrinsics, DbgVariableRecords}; 724 } 725 726 void CoroCloner::replaceSwiftErrorOps() { 727 ::replaceSwiftErrorOps(*NewF, Shape, &VMap); 728 } 729 730 void CoroCloner::salvageDebugInfo() { 731 auto [Worklist, DbgVariableRecords] = collectDbgVariableIntrinsics(*NewF); 732 SmallDenseMap<Argument *, AllocaInst *, 4> ArgToAllocaMap; 733 734 // Only 64-bit ABIs have a register we can refer to with the entry value. 735 bool UseEntryValue = 736 llvm::Triple(OrigF.getParent()->getTargetTriple()).isArch64Bit(); 737 for (DbgVariableIntrinsic *DVI : Worklist) 738 coro::salvageDebugInfo(ArgToAllocaMap, *DVI, Shape.OptimizeFrame, 739 UseEntryValue); 740 for (DbgVariableRecord *DVR : DbgVariableRecords) 741 coro::salvageDebugInfo(ArgToAllocaMap, *DVR, Shape.OptimizeFrame, 742 UseEntryValue); 743 744 // Remove all salvaged dbg.declare intrinsics that became 745 // either unreachable or stale due to the CoroSplit transformation. 746 DominatorTree DomTree(*NewF); 747 auto IsUnreachableBlock = [&](BasicBlock *BB) { 748 return !isPotentiallyReachable(&NewF->getEntryBlock(), BB, nullptr, 749 &DomTree); 750 }; 751 auto RemoveOne = [&](auto *DVI) { 752 if (IsUnreachableBlock(DVI->getParent())) 753 DVI->eraseFromParent(); 754 else if (isa_and_nonnull<AllocaInst>(DVI->getVariableLocationOp(0))) { 755 // Count all non-debuginfo uses in reachable blocks. 756 unsigned Uses = 0; 757 for (auto *User : DVI->getVariableLocationOp(0)->users()) 758 if (auto *I = dyn_cast<Instruction>(User)) 759 if (!isa<AllocaInst>(I) && !IsUnreachableBlock(I->getParent())) 760 ++Uses; 761 if (!Uses) 762 DVI->eraseFromParent(); 763 } 764 }; 765 for_each(Worklist, RemoveOne); 766 for_each(DbgVariableRecords, RemoveOne); 767 } 768 769 void CoroCloner::replaceEntryBlock() { 770 // In the original function, the AllocaSpillBlock is a block immediately 771 // following the allocation of the frame object which defines GEPs for 772 // all the allocas that have been moved into the frame, and it ends by 773 // branching to the original beginning of the coroutine. Make this 774 // the entry block of the cloned function. 775 auto *Entry = cast<BasicBlock>(VMap[Shape.AllocaSpillBlock]); 776 auto *OldEntry = &NewF->getEntryBlock(); 777 Entry->setName("entry" + Suffix); 778 Entry->moveBefore(OldEntry); 779 Entry->getTerminator()->eraseFromParent(); 780 781 // Clear all predecessors of the new entry block. There should be 782 // exactly one predecessor, which we created when splitting out 783 // AllocaSpillBlock to begin with. 784 assert(Entry->hasOneUse()); 785 auto BranchToEntry = cast<BranchInst>(Entry->user_back()); 786 assert(BranchToEntry->isUnconditional()); 787 Builder.SetInsertPoint(BranchToEntry); 788 Builder.CreateUnreachable(); 789 BranchToEntry->eraseFromParent(); 790 791 // Branch from the entry to the appropriate place. 792 Builder.SetInsertPoint(Entry); 793 switch (Shape.ABI) { 794 case coro::ABI::Switch: { 795 // In switch-lowering, we built a resume-entry block in the original 796 // function. Make the entry block branch to this. 797 auto *SwitchBB = 798 cast<BasicBlock>(VMap[Shape.SwitchLowering.ResumeEntryBlock]); 799 Builder.CreateBr(SwitchBB); 800 break; 801 } 802 case coro::ABI::Async: 803 case coro::ABI::Retcon: 804 case coro::ABI::RetconOnce: { 805 // In continuation ABIs, we want to branch to immediately after the 806 // active suspend point. Earlier phases will have put the suspend in its 807 // own basic block, so just thread our jump directly to its successor. 808 assert((Shape.ABI == coro::ABI::Async && 809 isa<CoroSuspendAsyncInst>(ActiveSuspend)) || 810 ((Shape.ABI == coro::ABI::Retcon || 811 Shape.ABI == coro::ABI::RetconOnce) && 812 isa<CoroSuspendRetconInst>(ActiveSuspend))); 813 auto *MappedCS = cast<AnyCoroSuspendInst>(VMap[ActiveSuspend]); 814 auto Branch = cast<BranchInst>(MappedCS->getNextNode()); 815 assert(Branch->isUnconditional()); 816 Builder.CreateBr(Branch->getSuccessor(0)); 817 break; 818 } 819 } 820 821 // Any static alloca that's still being used but not reachable from the new 822 // entry needs to be moved to the new entry. 823 Function *F = OldEntry->getParent(); 824 DominatorTree DT{*F}; 825 for (Instruction &I : llvm::make_early_inc_range(instructions(F))) { 826 auto *Alloca = dyn_cast<AllocaInst>(&I); 827 if (!Alloca || I.use_empty()) 828 continue; 829 if (DT.isReachableFromEntry(I.getParent()) || 830 !isa<ConstantInt>(Alloca->getArraySize())) 831 continue; 832 I.moveBefore(*Entry, Entry->getFirstInsertionPt()); 833 } 834 } 835 836 /// Derive the value of the new frame pointer. 837 Value *CoroCloner::deriveNewFramePointer() { 838 // Builder should be inserting to the front of the new entry block. 839 840 switch (Shape.ABI) { 841 // In switch-lowering, the argument is the frame pointer. 842 case coro::ABI::Switch: 843 return &*NewF->arg_begin(); 844 // In async-lowering, one of the arguments is an async context as determined 845 // by the `llvm.coro.id.async` intrinsic. We can retrieve the async context of 846 // the resume function from the async context projection function associated 847 // with the active suspend. The frame is located as a tail to the async 848 // context header. 849 case coro::ABI::Async: { 850 auto *ActiveAsyncSuspend = cast<CoroSuspendAsyncInst>(ActiveSuspend); 851 auto ContextIdx = ActiveAsyncSuspend->getStorageArgumentIndex() & 0xff; 852 auto *CalleeContext = NewF->getArg(ContextIdx); 853 auto *ProjectionFunc = 854 ActiveAsyncSuspend->getAsyncContextProjectionFunction(); 855 auto DbgLoc = 856 cast<CoroSuspendAsyncInst>(VMap[ActiveSuspend])->getDebugLoc(); 857 // Calling i8* (i8*) 858 auto *CallerContext = Builder.CreateCall(ProjectionFunc->getFunctionType(), 859 ProjectionFunc, CalleeContext); 860 CallerContext->setCallingConv(ProjectionFunc->getCallingConv()); 861 CallerContext->setDebugLoc(DbgLoc); 862 // The frame is located after the async_context header. 863 auto &Context = Builder.getContext(); 864 auto *FramePtrAddr = Builder.CreateConstInBoundsGEP1_32( 865 Type::getInt8Ty(Context), CallerContext, 866 Shape.AsyncLowering.FrameOffset, "async.ctx.frameptr"); 867 // Inline the projection function. 868 InlineFunctionInfo InlineInfo; 869 auto InlineRes = InlineFunction(*CallerContext, InlineInfo); 870 assert(InlineRes.isSuccess()); 871 (void)InlineRes; 872 return FramePtrAddr; 873 } 874 // In continuation-lowering, the argument is the opaque storage. 875 case coro::ABI::Retcon: 876 case coro::ABI::RetconOnce: { 877 Argument *NewStorage = &*NewF->arg_begin(); 878 auto FramePtrTy = PointerType::getUnqual(Shape.FrameTy->getContext()); 879 880 // If the storage is inline, just bitcast to the storage to the frame type. 881 if (Shape.RetconLowering.IsFrameInlineInStorage) 882 return NewStorage; 883 884 // Otherwise, load the real frame from the opaque storage. 885 return Builder.CreateLoad(FramePtrTy, NewStorage); 886 } 887 } 888 llvm_unreachable("bad ABI"); 889 } 890 891 static void addFramePointerAttrs(AttributeList &Attrs, LLVMContext &Context, 892 unsigned ParamIndex, uint64_t Size, 893 Align Alignment, bool NoAlias) { 894 AttrBuilder ParamAttrs(Context); 895 ParamAttrs.addAttribute(Attribute::NonNull); 896 ParamAttrs.addAttribute(Attribute::NoUndef); 897 898 if (NoAlias) 899 ParamAttrs.addAttribute(Attribute::NoAlias); 900 901 ParamAttrs.addAlignmentAttr(Alignment); 902 ParamAttrs.addDereferenceableAttr(Size); 903 Attrs = Attrs.addParamAttributes(Context, ParamIndex, ParamAttrs); 904 } 905 906 static void addAsyncContextAttrs(AttributeList &Attrs, LLVMContext &Context, 907 unsigned ParamIndex) { 908 AttrBuilder ParamAttrs(Context); 909 ParamAttrs.addAttribute(Attribute::SwiftAsync); 910 Attrs = Attrs.addParamAttributes(Context, ParamIndex, ParamAttrs); 911 } 912 913 static void addSwiftSelfAttrs(AttributeList &Attrs, LLVMContext &Context, 914 unsigned ParamIndex) { 915 AttrBuilder ParamAttrs(Context); 916 ParamAttrs.addAttribute(Attribute::SwiftSelf); 917 Attrs = Attrs.addParamAttributes(Context, ParamIndex, ParamAttrs); 918 } 919 920 /// Clone the body of the original function into a resume function of 921 /// some sort. 922 void CoroCloner::create() { 923 // Create the new function if we don't already have one. 924 if (!NewF) { 925 NewF = createCloneDeclaration(OrigF, Shape, Suffix, 926 OrigF.getParent()->end(), ActiveSuspend); 927 } 928 929 // Replace all args with dummy instructions. If an argument is the old frame 930 // pointer, the dummy will be replaced by the new frame pointer once it is 931 // computed below. Uses of all other arguments should have already been 932 // rewritten by buildCoroutineFrame() to use loads/stores on the coroutine 933 // frame. 934 SmallVector<Instruction *> DummyArgs; 935 for (Argument &A : OrigF.args()) { 936 DummyArgs.push_back(new FreezeInst(PoisonValue::get(A.getType()))); 937 VMap[&A] = DummyArgs.back(); 938 } 939 940 SmallVector<ReturnInst *, 4> Returns; 941 942 // Ignore attempts to change certain attributes of the function. 943 // TODO: maybe there should be a way to suppress this during cloning? 944 auto savedVisibility = NewF->getVisibility(); 945 auto savedUnnamedAddr = NewF->getUnnamedAddr(); 946 auto savedDLLStorageClass = NewF->getDLLStorageClass(); 947 948 // NewF's linkage (which CloneFunctionInto does *not* change) might not 949 // be compatible with the visibility of OrigF (which it *does* change), 950 // so protect against that. 951 auto savedLinkage = NewF->getLinkage(); 952 NewF->setLinkage(llvm::GlobalValue::ExternalLinkage); 953 954 CloneFunctionInto(NewF, &OrigF, VMap, 955 CloneFunctionChangeType::LocalChangesOnly, Returns); 956 957 auto &Context = NewF->getContext(); 958 959 // For async functions / continuations, adjust the scope line of the 960 // clone to the line number of the suspend point. However, only 961 // adjust the scope line when the files are the same. This ensures 962 // line number and file name belong together. The scope line is 963 // associated with all pre-prologue instructions. This avoids a jump 964 // in the linetable from the function declaration to the suspend point. 965 if (DISubprogram *SP = NewF->getSubprogram()) { 966 assert(SP != OrigF.getSubprogram() && SP->isDistinct()); 967 if (ActiveSuspend) 968 if (auto DL = ActiveSuspend->getDebugLoc()) 969 if (SP->getFile() == DL->getFile()) 970 SP->setScopeLine(DL->getLine()); 971 // Update the linkage name to reflect the modified symbol name. It 972 // is necessary to update the linkage name in Swift, since the 973 // mangling changes for resume functions. It might also be the 974 // right thing to do in C++, but due to a limitation in LLVM's 975 // AsmPrinter we can only do this if the function doesn't have an 976 // abstract specification, since the DWARF backend expects the 977 // abstract specification to contain the linkage name and asserts 978 // that they are identical. 979 if (SP->getUnit() && 980 SP->getUnit()->getSourceLanguage() == dwarf::DW_LANG_Swift) { 981 SP->replaceLinkageName(MDString::get(Context, NewF->getName())); 982 if (auto *Decl = SP->getDeclaration()) { 983 auto *NewDecl = DISubprogram::get( 984 Decl->getContext(), Decl->getScope(), Decl->getName(), 985 NewF->getName(), Decl->getFile(), Decl->getLine(), Decl->getType(), 986 Decl->getScopeLine(), Decl->getContainingType(), 987 Decl->getVirtualIndex(), Decl->getThisAdjustment(), 988 Decl->getFlags(), Decl->getSPFlags(), Decl->getUnit(), 989 Decl->getTemplateParams(), nullptr, Decl->getRetainedNodes(), 990 Decl->getThrownTypes(), Decl->getAnnotations(), 991 Decl->getTargetFuncName()); 992 SP->replaceDeclaration(NewDecl); 993 } 994 } 995 } 996 997 NewF->setLinkage(savedLinkage); 998 NewF->setVisibility(savedVisibility); 999 NewF->setUnnamedAddr(savedUnnamedAddr); 1000 NewF->setDLLStorageClass(savedDLLStorageClass); 1001 // The function sanitizer metadata needs to match the signature of the 1002 // function it is being attached to. However this does not hold for split 1003 // functions here. Thus remove the metadata for split functions. 1004 if (Shape.ABI == coro::ABI::Switch && 1005 NewF->hasMetadata(LLVMContext::MD_func_sanitize)) 1006 NewF->eraseMetadata(LLVMContext::MD_func_sanitize); 1007 1008 // Replace the attributes of the new function: 1009 auto OrigAttrs = NewF->getAttributes(); 1010 auto NewAttrs = AttributeList(); 1011 1012 switch (Shape.ABI) { 1013 case coro::ABI::Switch: 1014 // Bootstrap attributes by copying function attributes from the 1015 // original function. This should include optimization settings and so on. 1016 NewAttrs = NewAttrs.addFnAttributes( 1017 Context, AttrBuilder(Context, OrigAttrs.getFnAttrs())); 1018 1019 addFramePointerAttrs(NewAttrs, Context, 0, Shape.FrameSize, 1020 Shape.FrameAlign, /*NoAlias=*/false); 1021 break; 1022 case coro::ABI::Async: { 1023 auto *ActiveAsyncSuspend = cast<CoroSuspendAsyncInst>(ActiveSuspend); 1024 if (OrigF.hasParamAttribute(Shape.AsyncLowering.ContextArgNo, 1025 Attribute::SwiftAsync)) { 1026 uint32_t ArgAttributeIndices = 1027 ActiveAsyncSuspend->getStorageArgumentIndex(); 1028 auto ContextArgIndex = ArgAttributeIndices & 0xff; 1029 addAsyncContextAttrs(NewAttrs, Context, ContextArgIndex); 1030 1031 // `swiftasync` must preceed `swiftself` so 0 is not a valid index for 1032 // `swiftself`. 1033 auto SwiftSelfIndex = ArgAttributeIndices >> 8; 1034 if (SwiftSelfIndex) 1035 addSwiftSelfAttrs(NewAttrs, Context, SwiftSelfIndex); 1036 } 1037 1038 // Transfer the original function's attributes. 1039 auto FnAttrs = OrigF.getAttributes().getFnAttrs(); 1040 NewAttrs = NewAttrs.addFnAttributes(Context, AttrBuilder(Context, FnAttrs)); 1041 break; 1042 } 1043 case coro::ABI::Retcon: 1044 case coro::ABI::RetconOnce: 1045 // If we have a continuation prototype, just use its attributes, 1046 // full-stop. 1047 NewAttrs = Shape.RetconLowering.ResumePrototype->getAttributes(); 1048 1049 /// FIXME: Is it really good to add the NoAlias attribute? 1050 addFramePointerAttrs(NewAttrs, Context, 0, 1051 Shape.getRetconCoroId()->getStorageSize(), 1052 Shape.getRetconCoroId()->getStorageAlignment(), 1053 /*NoAlias=*/true); 1054 1055 break; 1056 } 1057 1058 switch (Shape.ABI) { 1059 // In these ABIs, the cloned functions always return 'void', and the 1060 // existing return sites are meaningless. Note that for unique 1061 // continuations, this includes the returns associated with suspends; 1062 // this is fine because we can't suspend twice. 1063 case coro::ABI::Switch: 1064 case coro::ABI::RetconOnce: 1065 // Remove old returns. 1066 for (ReturnInst *Return : Returns) 1067 changeToUnreachable(Return); 1068 break; 1069 1070 // With multi-suspend continuations, we'll already have eliminated the 1071 // original returns and inserted returns before all the suspend points, 1072 // so we want to leave any returns in place. 1073 case coro::ABI::Retcon: 1074 break; 1075 // Async lowering will insert musttail call functions at all suspend points 1076 // followed by a return. 1077 // Don't change returns to unreachable because that will trip up the verifier. 1078 // These returns should be unreachable from the clone. 1079 case coro::ABI::Async: 1080 break; 1081 } 1082 1083 NewF->setAttributes(NewAttrs); 1084 NewF->setCallingConv(Shape.getResumeFunctionCC()); 1085 1086 // Set up the new entry block. 1087 replaceEntryBlock(); 1088 1089 // Turn symmetric transfers into musttail calls. 1090 for (CallInst *ResumeCall : Shape.SymmetricTransfers) { 1091 ResumeCall = cast<CallInst>(VMap[ResumeCall]); 1092 if (TTI.supportsTailCallFor(ResumeCall)) { 1093 // FIXME: Could we support symmetric transfer effectively without 1094 // musttail? 1095 ResumeCall->setTailCallKind(CallInst::TCK_MustTail); 1096 } 1097 1098 // Put a 'ret void' after the call, and split any remaining instructions to 1099 // an unreachable block. 1100 BasicBlock *BB = ResumeCall->getParent(); 1101 BB->splitBasicBlock(ResumeCall->getNextNode()); 1102 Builder.SetInsertPoint(BB->getTerminator()); 1103 Builder.CreateRetVoid(); 1104 BB->getTerminator()->eraseFromParent(); 1105 } 1106 1107 Builder.SetInsertPoint(&NewF->getEntryBlock().front()); 1108 NewFramePtr = deriveNewFramePointer(); 1109 1110 // Remap frame pointer. 1111 Value *OldFramePtr = VMap[Shape.FramePtr]; 1112 NewFramePtr->takeName(OldFramePtr); 1113 OldFramePtr->replaceAllUsesWith(NewFramePtr); 1114 1115 // Remap vFrame pointer. 1116 auto *NewVFrame = Builder.CreateBitCast( 1117 NewFramePtr, PointerType::getUnqual(Builder.getContext()), "vFrame"); 1118 Value *OldVFrame = cast<Value>(VMap[Shape.CoroBegin]); 1119 if (OldVFrame != NewVFrame) 1120 OldVFrame->replaceAllUsesWith(NewVFrame); 1121 1122 // All uses of the arguments should have been resolved by this point, 1123 // so we can safely remove the dummy values. 1124 for (Instruction *DummyArg : DummyArgs) { 1125 DummyArg->replaceAllUsesWith(PoisonValue::get(DummyArg->getType())); 1126 DummyArg->deleteValue(); 1127 } 1128 1129 switch (Shape.ABI) { 1130 case coro::ABI::Switch: 1131 // Rewrite final suspend handling as it is not done via switch (allows to 1132 // remove final case from the switch, since it is undefined behavior to 1133 // resume the coroutine suspended at the final suspend point. 1134 if (Shape.SwitchLowering.HasFinalSuspend) 1135 handleFinalSuspend(); 1136 break; 1137 case coro::ABI::Async: 1138 case coro::ABI::Retcon: 1139 case coro::ABI::RetconOnce: 1140 // Replace uses of the active suspend with the corresponding 1141 // continuation-function arguments. 1142 assert(ActiveSuspend != nullptr && 1143 "no active suspend when lowering a continuation-style coroutine"); 1144 replaceRetconOrAsyncSuspendUses(); 1145 break; 1146 } 1147 1148 // Handle suspends. 1149 replaceCoroSuspends(); 1150 1151 // Handle swifterror. 1152 replaceSwiftErrorOps(); 1153 1154 // Remove coro.end intrinsics. 1155 replaceCoroEnds(); 1156 1157 // Salvage debug info that points into the coroutine frame. 1158 salvageDebugInfo(); 1159 1160 // Eliminate coro.free from the clones, replacing it with 'null' in cleanup, 1161 // to suppress deallocation code. 1162 if (Shape.ABI == coro::ABI::Switch) 1163 coro::replaceCoroFree(cast<CoroIdInst>(VMap[Shape.CoroBegin->getId()]), 1164 /*Elide=*/FKind == CoroCloner::Kind::SwitchCleanup); 1165 } 1166 1167 static void updateAsyncFuncPointerContextSize(coro::Shape &Shape) { 1168 assert(Shape.ABI == coro::ABI::Async); 1169 1170 auto *FuncPtrStruct = cast<ConstantStruct>( 1171 Shape.AsyncLowering.AsyncFuncPointer->getInitializer()); 1172 auto *OrigRelativeFunOffset = FuncPtrStruct->getOperand(0); 1173 auto *OrigContextSize = FuncPtrStruct->getOperand(1); 1174 auto *NewContextSize = ConstantInt::get(OrigContextSize->getType(), 1175 Shape.AsyncLowering.ContextSize); 1176 auto *NewFuncPtrStruct = ConstantStruct::get( 1177 FuncPtrStruct->getType(), OrigRelativeFunOffset, NewContextSize); 1178 1179 Shape.AsyncLowering.AsyncFuncPointer->setInitializer(NewFuncPtrStruct); 1180 } 1181 1182 static void replaceFrameSizeAndAlignment(coro::Shape &Shape) { 1183 if (Shape.ABI == coro::ABI::Async) 1184 updateAsyncFuncPointerContextSize(Shape); 1185 1186 for (CoroAlignInst *CA : Shape.CoroAligns) { 1187 CA->replaceAllUsesWith( 1188 ConstantInt::get(CA->getType(), Shape.FrameAlign.value())); 1189 CA->eraseFromParent(); 1190 } 1191 1192 if (Shape.CoroSizes.empty()) 1193 return; 1194 1195 // In the same function all coro.sizes should have the same result type. 1196 auto *SizeIntrin = Shape.CoroSizes.back(); 1197 Module *M = SizeIntrin->getModule(); 1198 const DataLayout &DL = M->getDataLayout(); 1199 auto Size = DL.getTypeAllocSize(Shape.FrameTy); 1200 auto *SizeConstant = ConstantInt::get(SizeIntrin->getType(), Size); 1201 1202 for (CoroSizeInst *CS : Shape.CoroSizes) { 1203 CS->replaceAllUsesWith(SizeConstant); 1204 CS->eraseFromParent(); 1205 } 1206 } 1207 1208 static void postSplitCleanup(Function &F) { 1209 removeUnreachableBlocks(F); 1210 1211 #ifndef NDEBUG 1212 // For now, we do a mandatory verification step because we don't 1213 // entirely trust this pass. Note that we don't want to add a verifier 1214 // pass to FPM below because it will also verify all the global data. 1215 if (verifyFunction(F, &errs())) 1216 report_fatal_error("Broken function"); 1217 #endif 1218 } 1219 1220 // Coroutine has no suspend points. Remove heap allocation for the coroutine 1221 // frame if possible. 1222 static void handleNoSuspendCoroutine(coro::Shape &Shape) { 1223 auto *CoroBegin = Shape.CoroBegin; 1224 auto *CoroId = CoroBegin->getId(); 1225 auto *AllocInst = CoroId->getCoroAlloc(); 1226 switch (Shape.ABI) { 1227 case coro::ABI::Switch: { 1228 auto SwitchId = cast<CoroIdInst>(CoroId); 1229 coro::replaceCoroFree(SwitchId, /*Elide=*/AllocInst != nullptr); 1230 if (AllocInst) { 1231 IRBuilder<> Builder(AllocInst); 1232 auto *Frame = Builder.CreateAlloca(Shape.FrameTy); 1233 Frame->setAlignment(Shape.FrameAlign); 1234 AllocInst->replaceAllUsesWith(Builder.getFalse()); 1235 AllocInst->eraseFromParent(); 1236 CoroBegin->replaceAllUsesWith(Frame); 1237 } else { 1238 CoroBegin->replaceAllUsesWith(CoroBegin->getMem()); 1239 } 1240 1241 break; 1242 } 1243 case coro::ABI::Async: 1244 case coro::ABI::Retcon: 1245 case coro::ABI::RetconOnce: 1246 CoroBegin->replaceAllUsesWith(UndefValue::get(CoroBegin->getType())); 1247 break; 1248 } 1249 1250 CoroBegin->eraseFromParent(); 1251 Shape.CoroBegin = nullptr; 1252 } 1253 1254 // SimplifySuspendPoint needs to check that there is no calls between 1255 // coro_save and coro_suspend, since any of the calls may potentially resume 1256 // the coroutine and if that is the case we cannot eliminate the suspend point. 1257 static bool hasCallsInBlockBetween(Instruction *From, Instruction *To) { 1258 for (Instruction *I = From; I != To; I = I->getNextNode()) { 1259 // Assume that no intrinsic can resume the coroutine. 1260 if (isa<IntrinsicInst>(I)) 1261 continue; 1262 1263 if (isa<CallBase>(I)) 1264 return true; 1265 } 1266 return false; 1267 } 1268 1269 static bool hasCallsInBlocksBetween(BasicBlock *SaveBB, BasicBlock *ResDesBB) { 1270 SmallPtrSet<BasicBlock *, 8> Set; 1271 SmallVector<BasicBlock *, 8> Worklist; 1272 1273 Set.insert(SaveBB); 1274 Worklist.push_back(ResDesBB); 1275 1276 // Accumulate all blocks between SaveBB and ResDesBB. Because CoroSaveIntr 1277 // returns a token consumed by suspend instruction, all blocks in between 1278 // will have to eventually hit SaveBB when going backwards from ResDesBB. 1279 while (!Worklist.empty()) { 1280 auto *BB = Worklist.pop_back_val(); 1281 Set.insert(BB); 1282 for (auto *Pred : predecessors(BB)) 1283 if (!Set.contains(Pred)) 1284 Worklist.push_back(Pred); 1285 } 1286 1287 // SaveBB and ResDesBB are checked separately in hasCallsBetween. 1288 Set.erase(SaveBB); 1289 Set.erase(ResDesBB); 1290 1291 for (auto *BB : Set) 1292 if (hasCallsInBlockBetween(BB->getFirstNonPHI(), nullptr)) 1293 return true; 1294 1295 return false; 1296 } 1297 1298 static bool hasCallsBetween(Instruction *Save, Instruction *ResumeOrDestroy) { 1299 auto *SaveBB = Save->getParent(); 1300 auto *ResumeOrDestroyBB = ResumeOrDestroy->getParent(); 1301 1302 if (SaveBB == ResumeOrDestroyBB) 1303 return hasCallsInBlockBetween(Save->getNextNode(), ResumeOrDestroy); 1304 1305 // Any calls from Save to the end of the block? 1306 if (hasCallsInBlockBetween(Save->getNextNode(), nullptr)) 1307 return true; 1308 1309 // Any calls from begging of the block up to ResumeOrDestroy? 1310 if (hasCallsInBlockBetween(ResumeOrDestroyBB->getFirstNonPHI(), 1311 ResumeOrDestroy)) 1312 return true; 1313 1314 // Any calls in all of the blocks between SaveBB and ResumeOrDestroyBB? 1315 if (hasCallsInBlocksBetween(SaveBB, ResumeOrDestroyBB)) 1316 return true; 1317 1318 return false; 1319 } 1320 1321 // If a SuspendIntrin is preceded by Resume or Destroy, we can eliminate the 1322 // suspend point and replace it with nornal control flow. 1323 static bool simplifySuspendPoint(CoroSuspendInst *Suspend, 1324 CoroBeginInst *CoroBegin) { 1325 Instruction *Prev = Suspend->getPrevNode(); 1326 if (!Prev) { 1327 auto *Pred = Suspend->getParent()->getSinglePredecessor(); 1328 if (!Pred) 1329 return false; 1330 Prev = Pred->getTerminator(); 1331 } 1332 1333 CallBase *CB = dyn_cast<CallBase>(Prev); 1334 if (!CB) 1335 return false; 1336 1337 auto *Callee = CB->getCalledOperand()->stripPointerCasts(); 1338 1339 // See if the callsite is for resumption or destruction of the coroutine. 1340 auto *SubFn = dyn_cast<CoroSubFnInst>(Callee); 1341 if (!SubFn) 1342 return false; 1343 1344 // Does not refer to the current coroutine, we cannot do anything with it. 1345 if (SubFn->getFrame() != CoroBegin) 1346 return false; 1347 1348 // See if the transformation is safe. Specifically, see if there are any 1349 // calls in between Save and CallInstr. They can potenitally resume the 1350 // coroutine rendering this optimization unsafe. 1351 auto *Save = Suspend->getCoroSave(); 1352 if (hasCallsBetween(Save, CB)) 1353 return false; 1354 1355 // Replace llvm.coro.suspend with the value that results in resumption over 1356 // the resume or cleanup path. 1357 Suspend->replaceAllUsesWith(SubFn->getRawIndex()); 1358 Suspend->eraseFromParent(); 1359 Save->eraseFromParent(); 1360 1361 // No longer need a call to coro.resume or coro.destroy. 1362 if (auto *Invoke = dyn_cast<InvokeInst>(CB)) { 1363 BranchInst::Create(Invoke->getNormalDest(), Invoke->getIterator()); 1364 } 1365 1366 // Grab the CalledValue from CB before erasing the CallInstr. 1367 auto *CalledValue = CB->getCalledOperand(); 1368 CB->eraseFromParent(); 1369 1370 // If no more users remove it. Usually it is a bitcast of SubFn. 1371 if (CalledValue != SubFn && CalledValue->user_empty()) 1372 if (auto *I = dyn_cast<Instruction>(CalledValue)) 1373 I->eraseFromParent(); 1374 1375 // Now we are good to remove SubFn. 1376 if (SubFn->user_empty()) 1377 SubFn->eraseFromParent(); 1378 1379 return true; 1380 } 1381 1382 // Remove suspend points that are simplified. 1383 static void simplifySuspendPoints(coro::Shape &Shape) { 1384 // Currently, the only simplification we do is switch-lowering-specific. 1385 if (Shape.ABI != coro::ABI::Switch) 1386 return; 1387 1388 auto &S = Shape.CoroSuspends; 1389 size_t I = 0, N = S.size(); 1390 if (N == 0) 1391 return; 1392 1393 size_t ChangedFinalIndex = std::numeric_limits<size_t>::max(); 1394 while (true) { 1395 auto SI = cast<CoroSuspendInst>(S[I]); 1396 // Leave final.suspend to handleFinalSuspend since it is undefined behavior 1397 // to resume a coroutine suspended at the final suspend point. 1398 if (!SI->isFinal() && simplifySuspendPoint(SI, Shape.CoroBegin)) { 1399 if (--N == I) 1400 break; 1401 1402 std::swap(S[I], S[N]); 1403 1404 if (cast<CoroSuspendInst>(S[I])->isFinal()) { 1405 assert(Shape.SwitchLowering.HasFinalSuspend); 1406 ChangedFinalIndex = I; 1407 } 1408 1409 continue; 1410 } 1411 if (++I == N) 1412 break; 1413 } 1414 S.resize(N); 1415 1416 // Maintain final.suspend in case final suspend was swapped. 1417 // Due to we requrie the final suspend to be the last element of CoroSuspends. 1418 if (ChangedFinalIndex < N) { 1419 assert(cast<CoroSuspendInst>(S[ChangedFinalIndex])->isFinal()); 1420 std::swap(S[ChangedFinalIndex], S.back()); 1421 } 1422 } 1423 1424 namespace { 1425 1426 struct SwitchCoroutineSplitter { 1427 static void split(Function &F, coro::Shape &Shape, 1428 SmallVectorImpl<Function *> &Clones, 1429 TargetTransformInfo &TTI) { 1430 assert(Shape.ABI == coro::ABI::Switch); 1431 1432 createResumeEntryBlock(F, Shape); 1433 auto *ResumeClone = 1434 createClone(F, ".resume", Shape, CoroCloner::Kind::SwitchResume, TTI); 1435 auto *DestroyClone = 1436 createClone(F, ".destroy", Shape, CoroCloner::Kind::SwitchUnwind, TTI); 1437 auto *CleanupClone = 1438 createClone(F, ".cleanup", Shape, CoroCloner::Kind::SwitchCleanup, TTI); 1439 1440 postSplitCleanup(*ResumeClone); 1441 postSplitCleanup(*DestroyClone); 1442 postSplitCleanup(*CleanupClone); 1443 1444 // Store addresses resume/destroy/cleanup functions in the coroutine frame. 1445 updateCoroFrame(Shape, ResumeClone, DestroyClone, CleanupClone); 1446 1447 assert(Clones.empty()); 1448 Clones.push_back(ResumeClone); 1449 Clones.push_back(DestroyClone); 1450 Clones.push_back(CleanupClone); 1451 1452 // Create a constant array referring to resume/destroy/clone functions 1453 // pointed by the last argument of @llvm.coro.info, so that CoroElide pass 1454 // can determined correct function to call. 1455 setCoroInfo(F, Shape, Clones); 1456 } 1457 1458 private: 1459 // Create a resume clone by cloning the body of the original function, setting 1460 // new entry block and replacing coro.suspend an appropriate value to force 1461 // resume or cleanup pass for every suspend point. 1462 static Function *createClone(Function &F, const Twine &Suffix, 1463 coro::Shape &Shape, CoroCloner::Kind FKind, 1464 TargetTransformInfo &TTI) { 1465 CoroCloner Cloner(F, Suffix, Shape, FKind, TTI); 1466 Cloner.create(); 1467 return Cloner.getFunction(); 1468 } 1469 1470 // Create an entry block for a resume function with a switch that will jump to 1471 // suspend points. 1472 static void createResumeEntryBlock(Function &F, coro::Shape &Shape) { 1473 LLVMContext &C = F.getContext(); 1474 1475 // resume.entry: 1476 // %index.addr = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 1477 // 0, i32 2 % index = load i32, i32* %index.addr switch i32 %index, label 1478 // %unreachable [ 1479 // i32 0, label %resume.0 1480 // i32 1, label %resume.1 1481 // ... 1482 // ] 1483 1484 auto *NewEntry = BasicBlock::Create(C, "resume.entry", &F); 1485 auto *UnreachBB = BasicBlock::Create(C, "unreachable", &F); 1486 1487 IRBuilder<> Builder(NewEntry); 1488 auto *FramePtr = Shape.FramePtr; 1489 auto *FrameTy = Shape.FrameTy; 1490 auto *GepIndex = Builder.CreateStructGEP( 1491 FrameTy, FramePtr, Shape.getSwitchIndexField(), "index.addr"); 1492 auto *Index = Builder.CreateLoad(Shape.getIndexType(), GepIndex, "index"); 1493 auto *Switch = 1494 Builder.CreateSwitch(Index, UnreachBB, Shape.CoroSuspends.size()); 1495 Shape.SwitchLowering.ResumeSwitch = Switch; 1496 1497 size_t SuspendIndex = 0; 1498 for (auto *AnyS : Shape.CoroSuspends) { 1499 auto *S = cast<CoroSuspendInst>(AnyS); 1500 ConstantInt *IndexVal = Shape.getIndex(SuspendIndex); 1501 1502 // Replace CoroSave with a store to Index: 1503 // %index.addr = getelementptr %f.frame... (index field number) 1504 // store i32 %IndexVal, i32* %index.addr1 1505 auto *Save = S->getCoroSave(); 1506 Builder.SetInsertPoint(Save); 1507 if (S->isFinal()) { 1508 // The coroutine should be marked done if it reaches the final suspend 1509 // point. 1510 markCoroutineAsDone(Builder, Shape, FramePtr); 1511 } else { 1512 auto *GepIndex = Builder.CreateStructGEP( 1513 FrameTy, FramePtr, Shape.getSwitchIndexField(), "index.addr"); 1514 Builder.CreateStore(IndexVal, GepIndex); 1515 } 1516 1517 Save->replaceAllUsesWith(ConstantTokenNone::get(C)); 1518 Save->eraseFromParent(); 1519 1520 // Split block before and after coro.suspend and add a jump from an entry 1521 // switch: 1522 // 1523 // whateverBB: 1524 // whatever 1525 // %0 = call i8 @llvm.coro.suspend(token none, i1 false) 1526 // switch i8 %0, label %suspend[i8 0, label %resume 1527 // i8 1, label %cleanup] 1528 // becomes: 1529 // 1530 // whateverBB: 1531 // whatever 1532 // br label %resume.0.landing 1533 // 1534 // resume.0: ; <--- jump from the switch in the resume.entry 1535 // %0 = tail call i8 @llvm.coro.suspend(token none, i1 false) 1536 // br label %resume.0.landing 1537 // 1538 // resume.0.landing: 1539 // %1 = phi i8[-1, %whateverBB], [%0, %resume.0] 1540 // switch i8 % 1, label %suspend [i8 0, label %resume 1541 // i8 1, label %cleanup] 1542 1543 auto *SuspendBB = S->getParent(); 1544 auto *ResumeBB = 1545 SuspendBB->splitBasicBlock(S, "resume." + Twine(SuspendIndex)); 1546 auto *LandingBB = ResumeBB->splitBasicBlock( 1547 S->getNextNode(), ResumeBB->getName() + Twine(".landing")); 1548 Switch->addCase(IndexVal, ResumeBB); 1549 1550 cast<BranchInst>(SuspendBB->getTerminator())->setSuccessor(0, LandingBB); 1551 auto *PN = PHINode::Create(Builder.getInt8Ty(), 2, ""); 1552 PN->insertBefore(LandingBB->begin()); 1553 S->replaceAllUsesWith(PN); 1554 PN->addIncoming(Builder.getInt8(-1), SuspendBB); 1555 PN->addIncoming(S, ResumeBB); 1556 1557 ++SuspendIndex; 1558 } 1559 1560 Builder.SetInsertPoint(UnreachBB); 1561 Builder.CreateUnreachable(); 1562 1563 Shape.SwitchLowering.ResumeEntryBlock = NewEntry; 1564 } 1565 1566 // Store addresses of Resume/Destroy/Cleanup functions in the coroutine frame. 1567 static void updateCoroFrame(coro::Shape &Shape, Function *ResumeFn, 1568 Function *DestroyFn, Function *CleanupFn) { 1569 IRBuilder<> Builder(&*Shape.getInsertPtAfterFramePtr()); 1570 1571 auto *ResumeAddr = Builder.CreateStructGEP( 1572 Shape.FrameTy, Shape.FramePtr, coro::Shape::SwitchFieldIndex::Resume, 1573 "resume.addr"); 1574 Builder.CreateStore(ResumeFn, ResumeAddr); 1575 1576 Value *DestroyOrCleanupFn = DestroyFn; 1577 1578 CoroIdInst *CoroId = Shape.getSwitchCoroId(); 1579 if (CoroAllocInst *CA = CoroId->getCoroAlloc()) { 1580 // If there is a CoroAlloc and it returns false (meaning we elide the 1581 // allocation, use CleanupFn instead of DestroyFn). 1582 DestroyOrCleanupFn = Builder.CreateSelect(CA, DestroyFn, CleanupFn); 1583 } 1584 1585 auto *DestroyAddr = Builder.CreateStructGEP( 1586 Shape.FrameTy, Shape.FramePtr, coro::Shape::SwitchFieldIndex::Destroy, 1587 "destroy.addr"); 1588 Builder.CreateStore(DestroyOrCleanupFn, DestroyAddr); 1589 } 1590 1591 // Create a global constant array containing pointers to functions provided 1592 // and set Info parameter of CoroBegin to point at this constant. Example: 1593 // 1594 // @f.resumers = internal constant [2 x void(%f.frame*)*] 1595 // [void(%f.frame*)* @f.resume, void(%f.frame*)* 1596 // @f.destroy] 1597 // define void @f() { 1598 // ... 1599 // call i8* @llvm.coro.begin(i8* null, i32 0, i8* null, 1600 // i8* bitcast([2 x void(%f.frame*)*] * @f.resumers to 1601 // i8*)) 1602 // 1603 // Assumes that all the functions have the same signature. 1604 static void setCoroInfo(Function &F, coro::Shape &Shape, 1605 ArrayRef<Function *> Fns) { 1606 // This only works under the switch-lowering ABI because coro elision 1607 // only works on the switch-lowering ABI. 1608 SmallVector<Constant *, 4> Args(Fns.begin(), Fns.end()); 1609 assert(!Args.empty()); 1610 Function *Part = *Fns.begin(); 1611 Module *M = Part->getParent(); 1612 auto *ArrTy = ArrayType::get(Part->getType(), Args.size()); 1613 1614 auto *ConstVal = ConstantArray::get(ArrTy, Args); 1615 auto *GV = new GlobalVariable(*M, ConstVal->getType(), /*isConstant=*/true, 1616 GlobalVariable::PrivateLinkage, ConstVal, 1617 F.getName() + Twine(".resumers")); 1618 1619 // Update coro.begin instruction to refer to this constant. 1620 LLVMContext &C = F.getContext(); 1621 auto *BC = ConstantExpr::getPointerCast(GV, PointerType::getUnqual(C)); 1622 Shape.getSwitchCoroId()->setInfo(BC); 1623 } 1624 }; 1625 1626 } // namespace 1627 1628 static void replaceAsyncResumeFunction(CoroSuspendAsyncInst *Suspend, 1629 Value *Continuation) { 1630 auto *ResumeIntrinsic = Suspend->getResumeFunction(); 1631 auto &Context = Suspend->getParent()->getParent()->getContext(); 1632 auto *Int8PtrTy = PointerType::getUnqual(Context); 1633 1634 IRBuilder<> Builder(ResumeIntrinsic); 1635 auto *Val = Builder.CreateBitOrPointerCast(Continuation, Int8PtrTy); 1636 ResumeIntrinsic->replaceAllUsesWith(Val); 1637 ResumeIntrinsic->eraseFromParent(); 1638 Suspend->setOperand(CoroSuspendAsyncInst::ResumeFunctionArg, 1639 UndefValue::get(Int8PtrTy)); 1640 } 1641 1642 /// Coerce the arguments in \p FnArgs according to \p FnTy in \p CallArgs. 1643 static void coerceArguments(IRBuilder<> &Builder, FunctionType *FnTy, 1644 ArrayRef<Value *> FnArgs, 1645 SmallVectorImpl<Value *> &CallArgs) { 1646 size_t ArgIdx = 0; 1647 for (auto *paramTy : FnTy->params()) { 1648 assert(ArgIdx < FnArgs.size()); 1649 if (paramTy != FnArgs[ArgIdx]->getType()) 1650 CallArgs.push_back( 1651 Builder.CreateBitOrPointerCast(FnArgs[ArgIdx], paramTy)); 1652 else 1653 CallArgs.push_back(FnArgs[ArgIdx]); 1654 ++ArgIdx; 1655 } 1656 } 1657 1658 CallInst *coro::createMustTailCall(DebugLoc Loc, Function *MustTailCallFn, 1659 TargetTransformInfo &TTI, 1660 ArrayRef<Value *> Arguments, 1661 IRBuilder<> &Builder) { 1662 auto *FnTy = MustTailCallFn->getFunctionType(); 1663 // Coerce the arguments, llvm optimizations seem to ignore the types in 1664 // vaarg functions and throws away casts in optimized mode. 1665 SmallVector<Value *, 8> CallArgs; 1666 coerceArguments(Builder, FnTy, Arguments, CallArgs); 1667 1668 auto *TailCall = Builder.CreateCall(FnTy, MustTailCallFn, CallArgs); 1669 // Skip targets which don't support tail call. 1670 if (TTI.supportsTailCallFor(TailCall)) { 1671 TailCall->setTailCallKind(CallInst::TCK_MustTail); 1672 } 1673 TailCall->setDebugLoc(Loc); 1674 TailCall->setCallingConv(MustTailCallFn->getCallingConv()); 1675 return TailCall; 1676 } 1677 1678 static void splitAsyncCoroutine(Function &F, coro::Shape &Shape, 1679 SmallVectorImpl<Function *> &Clones, 1680 TargetTransformInfo &TTI) { 1681 assert(Shape.ABI == coro::ABI::Async); 1682 assert(Clones.empty()); 1683 // Reset various things that the optimizer might have decided it 1684 // "knows" about the coroutine function due to not seeing a return. 1685 F.removeFnAttr(Attribute::NoReturn); 1686 F.removeRetAttr(Attribute::NoAlias); 1687 F.removeRetAttr(Attribute::NonNull); 1688 1689 auto &Context = F.getContext(); 1690 auto *Int8PtrTy = PointerType::getUnqual(Context); 1691 1692 auto *Id = cast<CoroIdAsyncInst>(Shape.CoroBegin->getId()); 1693 IRBuilder<> Builder(Id); 1694 1695 auto *FramePtr = Id->getStorage(); 1696 FramePtr = Builder.CreateBitOrPointerCast(FramePtr, Int8PtrTy); 1697 FramePtr = Builder.CreateConstInBoundsGEP1_32( 1698 Type::getInt8Ty(Context), FramePtr, Shape.AsyncLowering.FrameOffset, 1699 "async.ctx.frameptr"); 1700 1701 // Map all uses of llvm.coro.begin to the allocated frame pointer. 1702 { 1703 // Make sure we don't invalidate Shape.FramePtr. 1704 TrackingVH<Value> Handle(Shape.FramePtr); 1705 Shape.CoroBegin->replaceAllUsesWith(FramePtr); 1706 Shape.FramePtr = Handle.getValPtr(); 1707 } 1708 1709 // Create all the functions in order after the main function. 1710 auto NextF = std::next(F.getIterator()); 1711 1712 // Create a continuation function for each of the suspend points. 1713 Clones.reserve(Shape.CoroSuspends.size()); 1714 for (size_t Idx = 0, End = Shape.CoroSuspends.size(); Idx != End; ++Idx) { 1715 auto *Suspend = cast<CoroSuspendAsyncInst>(Shape.CoroSuspends[Idx]); 1716 1717 // Create the clone declaration. 1718 auto ResumeNameSuffix = ".resume."; 1719 auto ProjectionFunctionName = 1720 Suspend->getAsyncContextProjectionFunction()->getName(); 1721 bool UseSwiftMangling = false; 1722 if (ProjectionFunctionName == "__swift_async_resume_project_context") { 1723 ResumeNameSuffix = "TQ"; 1724 UseSwiftMangling = true; 1725 } else if (ProjectionFunctionName == "__swift_async_resume_get_context") { 1726 ResumeNameSuffix = "TY"; 1727 UseSwiftMangling = true; 1728 } 1729 auto *Continuation = createCloneDeclaration( 1730 F, Shape, 1731 UseSwiftMangling ? ResumeNameSuffix + Twine(Idx) + "_" 1732 : ResumeNameSuffix + Twine(Idx), 1733 NextF, Suspend); 1734 Clones.push_back(Continuation); 1735 1736 // Insert a branch to a new return block immediately before the suspend 1737 // point. 1738 auto *SuspendBB = Suspend->getParent(); 1739 auto *NewSuspendBB = SuspendBB->splitBasicBlock(Suspend); 1740 auto *Branch = cast<BranchInst>(SuspendBB->getTerminator()); 1741 1742 // Place it before the first suspend. 1743 auto *ReturnBB = 1744 BasicBlock::Create(F.getContext(), "coro.return", &F, NewSuspendBB); 1745 Branch->setSuccessor(0, ReturnBB); 1746 1747 IRBuilder<> Builder(ReturnBB); 1748 1749 // Insert the call to the tail call function and inline it. 1750 auto *Fn = Suspend->getMustTailCallFunction(); 1751 SmallVector<Value *, 8> Args(Suspend->args()); 1752 auto FnArgs = ArrayRef<Value *>(Args).drop_front( 1753 CoroSuspendAsyncInst::MustTailCallFuncArg + 1); 1754 auto *TailCall = coro::createMustTailCall(Suspend->getDebugLoc(), Fn, TTI, 1755 FnArgs, Builder); 1756 Builder.CreateRetVoid(); 1757 InlineFunctionInfo FnInfo; 1758 (void)InlineFunction(*TailCall, FnInfo); 1759 1760 // Replace the lvm.coro.async.resume intrisic call. 1761 replaceAsyncResumeFunction(Suspend, Continuation); 1762 } 1763 1764 assert(Clones.size() == Shape.CoroSuspends.size()); 1765 for (size_t Idx = 0, End = Shape.CoroSuspends.size(); Idx != End; ++Idx) { 1766 auto *Suspend = Shape.CoroSuspends[Idx]; 1767 auto *Clone = Clones[Idx]; 1768 1769 CoroCloner(F, "resume." + Twine(Idx), Shape, Clone, Suspend, TTI).create(); 1770 } 1771 } 1772 1773 static void splitRetconCoroutine(Function &F, coro::Shape &Shape, 1774 SmallVectorImpl<Function *> &Clones, 1775 TargetTransformInfo &TTI) { 1776 assert(Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce); 1777 assert(Clones.empty()); 1778 1779 // Reset various things that the optimizer might have decided it 1780 // "knows" about the coroutine function due to not seeing a return. 1781 F.removeFnAttr(Attribute::NoReturn); 1782 F.removeRetAttr(Attribute::NoAlias); 1783 F.removeRetAttr(Attribute::NonNull); 1784 1785 // Allocate the frame. 1786 auto *Id = cast<AnyCoroIdRetconInst>(Shape.CoroBegin->getId()); 1787 Value *RawFramePtr; 1788 if (Shape.RetconLowering.IsFrameInlineInStorage) { 1789 RawFramePtr = Id->getStorage(); 1790 } else { 1791 IRBuilder<> Builder(Id); 1792 1793 // Determine the size of the frame. 1794 const DataLayout &DL = F.getDataLayout(); 1795 auto Size = DL.getTypeAllocSize(Shape.FrameTy); 1796 1797 // Allocate. We don't need to update the call graph node because we're 1798 // going to recompute it from scratch after splitting. 1799 // FIXME: pass the required alignment 1800 RawFramePtr = Shape.emitAlloc(Builder, Builder.getInt64(Size), nullptr); 1801 RawFramePtr = 1802 Builder.CreateBitCast(RawFramePtr, Shape.CoroBegin->getType()); 1803 1804 // Stash the allocated frame pointer in the continuation storage. 1805 Builder.CreateStore(RawFramePtr, Id->getStorage()); 1806 } 1807 1808 // Map all uses of llvm.coro.begin to the allocated frame pointer. 1809 { 1810 // Make sure we don't invalidate Shape.FramePtr. 1811 TrackingVH<Value> Handle(Shape.FramePtr); 1812 Shape.CoroBegin->replaceAllUsesWith(RawFramePtr); 1813 Shape.FramePtr = Handle.getValPtr(); 1814 } 1815 1816 // Create a unique return block. 1817 BasicBlock *ReturnBB = nullptr; 1818 SmallVector<PHINode *, 4> ReturnPHIs; 1819 1820 // Create all the functions in order after the main function. 1821 auto NextF = std::next(F.getIterator()); 1822 1823 // Create a continuation function for each of the suspend points. 1824 Clones.reserve(Shape.CoroSuspends.size()); 1825 for (size_t i = 0, e = Shape.CoroSuspends.size(); i != e; ++i) { 1826 auto Suspend = cast<CoroSuspendRetconInst>(Shape.CoroSuspends[i]); 1827 1828 // Create the clone declaration. 1829 auto Continuation = 1830 createCloneDeclaration(F, Shape, ".resume." + Twine(i), NextF, nullptr); 1831 Clones.push_back(Continuation); 1832 1833 // Insert a branch to the unified return block immediately before 1834 // the suspend point. 1835 auto SuspendBB = Suspend->getParent(); 1836 auto NewSuspendBB = SuspendBB->splitBasicBlock(Suspend); 1837 auto Branch = cast<BranchInst>(SuspendBB->getTerminator()); 1838 1839 // Create the unified return block. 1840 if (!ReturnBB) { 1841 // Place it before the first suspend. 1842 ReturnBB = 1843 BasicBlock::Create(F.getContext(), "coro.return", &F, NewSuspendBB); 1844 Shape.RetconLowering.ReturnBlock = ReturnBB; 1845 1846 IRBuilder<> Builder(ReturnBB); 1847 1848 // Create PHIs for all the return values. 1849 assert(ReturnPHIs.empty()); 1850 1851 // First, the continuation. 1852 ReturnPHIs.push_back(Builder.CreatePHI(Continuation->getType(), 1853 Shape.CoroSuspends.size())); 1854 1855 // Next, all the directly-yielded values. 1856 for (auto *ResultTy : Shape.getRetconResultTypes()) 1857 ReturnPHIs.push_back( 1858 Builder.CreatePHI(ResultTy, Shape.CoroSuspends.size())); 1859 1860 // Build the return value. 1861 auto RetTy = F.getReturnType(); 1862 1863 // Cast the continuation value if necessary. 1864 // We can't rely on the types matching up because that type would 1865 // have to be infinite. 1866 auto CastedContinuationTy = 1867 (ReturnPHIs.size() == 1 ? RetTy : RetTy->getStructElementType(0)); 1868 auto *CastedContinuation = 1869 Builder.CreateBitCast(ReturnPHIs[0], CastedContinuationTy); 1870 1871 Value *RetV; 1872 if (ReturnPHIs.size() == 1) { 1873 RetV = CastedContinuation; 1874 } else { 1875 RetV = PoisonValue::get(RetTy); 1876 RetV = Builder.CreateInsertValue(RetV, CastedContinuation, 0); 1877 for (size_t I = 1, E = ReturnPHIs.size(); I != E; ++I) 1878 RetV = Builder.CreateInsertValue(RetV, ReturnPHIs[I], I); 1879 } 1880 1881 Builder.CreateRet(RetV); 1882 } 1883 1884 // Branch to the return block. 1885 Branch->setSuccessor(0, ReturnBB); 1886 ReturnPHIs[0]->addIncoming(Continuation, SuspendBB); 1887 size_t NextPHIIndex = 1; 1888 for (auto &VUse : Suspend->value_operands()) 1889 ReturnPHIs[NextPHIIndex++]->addIncoming(&*VUse, SuspendBB); 1890 assert(NextPHIIndex == ReturnPHIs.size()); 1891 } 1892 1893 assert(Clones.size() == Shape.CoroSuspends.size()); 1894 for (size_t i = 0, e = Shape.CoroSuspends.size(); i != e; ++i) { 1895 auto Suspend = Shape.CoroSuspends[i]; 1896 auto Clone = Clones[i]; 1897 1898 CoroCloner(F, "resume." + Twine(i), Shape, Clone, Suspend, TTI).create(); 1899 } 1900 } 1901 1902 namespace { 1903 class PrettyStackTraceFunction : public PrettyStackTraceEntry { 1904 Function &F; 1905 1906 public: 1907 PrettyStackTraceFunction(Function &F) : F(F) {} 1908 void print(raw_ostream &OS) const override { 1909 OS << "While splitting coroutine "; 1910 F.printAsOperand(OS, /*print type*/ false, F.getParent()); 1911 OS << "\n"; 1912 } 1913 }; 1914 } // namespace 1915 1916 static coro::Shape 1917 splitCoroutine(Function &F, SmallVectorImpl<Function *> &Clones, 1918 TargetTransformInfo &TTI, bool OptimizeFrame, 1919 std::function<bool(Instruction &)> MaterializableCallback) { 1920 PrettyStackTraceFunction prettyStackTrace(F); 1921 1922 // The suspend-crossing algorithm in buildCoroutineFrame get tripped 1923 // up by uses in unreachable blocks, so remove them as a first pass. 1924 removeUnreachableBlocks(F); 1925 1926 coro::Shape Shape(F, OptimizeFrame); 1927 if (!Shape.CoroBegin) 1928 return Shape; 1929 1930 lowerAwaitSuspends(F, Shape); 1931 1932 simplifySuspendPoints(Shape); 1933 buildCoroutineFrame(F, Shape, TTI, MaterializableCallback); 1934 replaceFrameSizeAndAlignment(Shape); 1935 1936 // If there are no suspend points, no split required, just remove 1937 // the allocation and deallocation blocks, they are not needed. 1938 if (Shape.CoroSuspends.empty()) { 1939 handleNoSuspendCoroutine(Shape); 1940 } else { 1941 switch (Shape.ABI) { 1942 case coro::ABI::Switch: 1943 SwitchCoroutineSplitter::split(F, Shape, Clones, TTI); 1944 break; 1945 case coro::ABI::Async: 1946 splitAsyncCoroutine(F, Shape, Clones, TTI); 1947 break; 1948 case coro::ABI::Retcon: 1949 case coro::ABI::RetconOnce: 1950 splitRetconCoroutine(F, Shape, Clones, TTI); 1951 break; 1952 } 1953 } 1954 1955 // Replace all the swifterror operations in the original function. 1956 // This invalidates SwiftErrorOps in the Shape. 1957 replaceSwiftErrorOps(F, Shape, nullptr); 1958 1959 // Salvage debug intrinsics that point into the coroutine frame in the 1960 // original function. The Cloner has already salvaged debug info in the new 1961 // coroutine funclets. 1962 SmallDenseMap<Argument *, AllocaInst *, 4> ArgToAllocaMap; 1963 auto [DbgInsts, DbgVariableRecords] = collectDbgVariableIntrinsics(F); 1964 for (auto *DDI : DbgInsts) 1965 coro::salvageDebugInfo(ArgToAllocaMap, *DDI, Shape.OptimizeFrame, 1966 false /*UseEntryValue*/); 1967 for (DbgVariableRecord *DVR : DbgVariableRecords) 1968 coro::salvageDebugInfo(ArgToAllocaMap, *DVR, Shape.OptimizeFrame, 1969 false /*UseEntryValue*/); 1970 return Shape; 1971 } 1972 1973 /// Remove calls to llvm.coro.end in the original function. 1974 static void removeCoroEndsFromRampFunction(const coro::Shape &Shape) { 1975 if (Shape.ABI != coro::ABI::Switch) { 1976 for (auto *End : Shape.CoroEnds) { 1977 replaceCoroEnd(End, Shape, Shape.FramePtr, /*in resume*/ false, nullptr); 1978 } 1979 } else { 1980 for (llvm::AnyCoroEndInst *End : Shape.CoroEnds) { 1981 auto &Context = End->getContext(); 1982 End->replaceAllUsesWith(ConstantInt::getFalse(Context)); 1983 End->eraseFromParent(); 1984 } 1985 } 1986 } 1987 1988 static void updateCallGraphAfterCoroutineSplit( 1989 LazyCallGraph::Node &N, const coro::Shape &Shape, 1990 const SmallVectorImpl<Function *> &Clones, LazyCallGraph::SCC &C, 1991 LazyCallGraph &CG, CGSCCAnalysisManager &AM, CGSCCUpdateResult &UR, 1992 FunctionAnalysisManager &FAM) { 1993 1994 if (!Clones.empty()) { 1995 switch (Shape.ABI) { 1996 case coro::ABI::Switch: 1997 // Each clone in the Switch lowering is independent of the other clones. 1998 // Let the LazyCallGraph know about each one separately. 1999 for (Function *Clone : Clones) 2000 CG.addSplitFunction(N.getFunction(), *Clone); 2001 break; 2002 case coro::ABI::Async: 2003 case coro::ABI::Retcon: 2004 case coro::ABI::RetconOnce: 2005 // Each clone in the Async/Retcon lowering references of the other clones. 2006 // Let the LazyCallGraph know about all of them at once. 2007 if (!Clones.empty()) 2008 CG.addSplitRefRecursiveFunctions(N.getFunction(), Clones); 2009 break; 2010 } 2011 2012 // Let the CGSCC infra handle the changes to the original function. 2013 updateCGAndAnalysisManagerForCGSCCPass(CG, C, N, AM, UR, FAM); 2014 } 2015 2016 // Do some cleanup and let the CGSCC infra see if we've cleaned up any edges 2017 // to the split functions. 2018 postSplitCleanup(N.getFunction()); 2019 updateCGAndAnalysisManagerForFunctionPass(CG, C, N, AM, UR, FAM); 2020 } 2021 2022 /// Replace a call to llvm.coro.prepare.retcon. 2023 static void replacePrepare(CallInst *Prepare, LazyCallGraph &CG, 2024 LazyCallGraph::SCC &C) { 2025 auto CastFn = Prepare->getArgOperand(0); // as an i8* 2026 auto Fn = CastFn->stripPointerCasts(); // as its original type 2027 2028 // Attempt to peephole this pattern: 2029 // %0 = bitcast [[TYPE]] @some_function to i8* 2030 // %1 = call @llvm.coro.prepare.retcon(i8* %0) 2031 // %2 = bitcast %1 to [[TYPE]] 2032 // ==> 2033 // %2 = @some_function 2034 for (Use &U : llvm::make_early_inc_range(Prepare->uses())) { 2035 // Look for bitcasts back to the original function type. 2036 auto *Cast = dyn_cast<BitCastInst>(U.getUser()); 2037 if (!Cast || Cast->getType() != Fn->getType()) 2038 continue; 2039 2040 // Replace and remove the cast. 2041 Cast->replaceAllUsesWith(Fn); 2042 Cast->eraseFromParent(); 2043 } 2044 2045 // Replace any remaining uses with the function as an i8*. 2046 // This can never directly be a callee, so we don't need to update CG. 2047 Prepare->replaceAllUsesWith(CastFn); 2048 Prepare->eraseFromParent(); 2049 2050 // Kill dead bitcasts. 2051 while (auto *Cast = dyn_cast<BitCastInst>(CastFn)) { 2052 if (!Cast->use_empty()) 2053 break; 2054 CastFn = Cast->getOperand(0); 2055 Cast->eraseFromParent(); 2056 } 2057 } 2058 2059 static bool replaceAllPrepares(Function *PrepareFn, LazyCallGraph &CG, 2060 LazyCallGraph::SCC &C) { 2061 bool Changed = false; 2062 for (Use &P : llvm::make_early_inc_range(PrepareFn->uses())) { 2063 // Intrinsics can only be used in calls. 2064 auto *Prepare = cast<CallInst>(P.getUser()); 2065 replacePrepare(Prepare, CG, C); 2066 Changed = true; 2067 } 2068 2069 return Changed; 2070 } 2071 2072 static void addPrepareFunction(const Module &M, 2073 SmallVectorImpl<Function *> &Fns, 2074 StringRef Name) { 2075 auto *PrepareFn = M.getFunction(Name); 2076 if (PrepareFn && !PrepareFn->use_empty()) 2077 Fns.push_back(PrepareFn); 2078 } 2079 2080 CoroSplitPass::CoroSplitPass(bool OptimizeFrame) 2081 : MaterializableCallback(coro::defaultMaterializable), 2082 OptimizeFrame(OptimizeFrame) {} 2083 2084 PreservedAnalyses CoroSplitPass::run(LazyCallGraph::SCC &C, 2085 CGSCCAnalysisManager &AM, 2086 LazyCallGraph &CG, CGSCCUpdateResult &UR) { 2087 // NB: One invariant of a valid LazyCallGraph::SCC is that it must contain a 2088 // non-zero number of nodes, so we assume that here and grab the first 2089 // node's function's module. 2090 Module &M = *C.begin()->getFunction().getParent(); 2091 auto &FAM = 2092 AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager(); 2093 2094 // Check for uses of llvm.coro.prepare.retcon/async. 2095 SmallVector<Function *, 2> PrepareFns; 2096 addPrepareFunction(M, PrepareFns, "llvm.coro.prepare.retcon"); 2097 addPrepareFunction(M, PrepareFns, "llvm.coro.prepare.async"); 2098 2099 // Find coroutines for processing. 2100 SmallVector<LazyCallGraph::Node *> Coroutines; 2101 for (LazyCallGraph::Node &N : C) 2102 if (N.getFunction().isPresplitCoroutine()) 2103 Coroutines.push_back(&N); 2104 2105 if (Coroutines.empty() && PrepareFns.empty()) 2106 return PreservedAnalyses::all(); 2107 2108 // Split all the coroutines. 2109 for (LazyCallGraph::Node *N : Coroutines) { 2110 Function &F = N->getFunction(); 2111 LLVM_DEBUG(dbgs() << "CoroSplit: Processing coroutine '" << F.getName() 2112 << "\n"); 2113 F.setSplittedCoroutine(); 2114 2115 SmallVector<Function *, 4> Clones; 2116 auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F); 2117 const coro::Shape Shape = 2118 splitCoroutine(F, Clones, FAM.getResult<TargetIRAnalysis>(F), 2119 OptimizeFrame, MaterializableCallback); 2120 removeCoroEndsFromRampFunction(Shape); 2121 updateCallGraphAfterCoroutineSplit(*N, Shape, Clones, C, CG, AM, UR, FAM); 2122 2123 ORE.emit([&]() { 2124 return OptimizationRemark(DEBUG_TYPE, "CoroSplit", &F) 2125 << "Split '" << ore::NV("function", F.getName()) 2126 << "' (frame_size=" << ore::NV("frame_size", Shape.FrameSize) 2127 << ", align=" << ore::NV("align", Shape.FrameAlign.value()) << ")"; 2128 }); 2129 2130 if (!Shape.CoroSuspends.empty()) { 2131 // Run the CGSCC pipeline on the original and newly split functions. 2132 UR.CWorklist.insert(&C); 2133 for (Function *Clone : Clones) 2134 UR.CWorklist.insert(CG.lookupSCC(CG.get(*Clone))); 2135 } 2136 } 2137 2138 for (auto *PrepareFn : PrepareFns) { 2139 replaceAllPrepares(PrepareFn, CG, C); 2140 } 2141 2142 return PreservedAnalyses::none(); 2143 } 2144