//===- CoroSplit.cpp - Converts a coroutine into a state machine ----------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // This pass builds the coroutine frame and outlines resume and destroy parts // of the coroutine into separate functions. // // We present a coroutine to an LLVM as an ordinary function with suspension // points marked up with intrinsics. We let the optimizer party on the coroutine // as a single function for as long as possible. Shortly before the coroutine is // eligible to be inlined into its callers, we split up the coroutine into parts // corresponding to an initial, resume and destroy invocations of the coroutine, // add them to the current SCC and restart the IPO pipeline to optimize the // coroutine subfunctions we extracted before proceeding to the caller of the // coroutine. //===----------------------------------------------------------------------===// #include "llvm/Transforms/Coroutines/CoroSplit.h" #include "CoroCloner.h" #include "CoroInternal.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/PriorityWorklist.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/LazyCallGraph.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/IR/Argument.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" #include "llvm/IR/Verifier.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Debug.h" #include "llvm/Support/PrettyStackTrace.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Coroutines/MaterializationUtils.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/CallGraphUpdater.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" #include #include #include #include #include using namespace llvm; #define DEBUG_TYPE "coro-split" namespace { /// Collect (a known) subset of global debug info metadata potentially used by /// the function \p F. /// /// This metadata set can be used to avoid cloning debug info not owned by \p F /// and is shared among all potential clones \p F. MetadataSetTy collectCommonDebugInfo(Function &F) { TimeTraceScope FunctionScope("CollectCommonDebugInfo"); DebugInfoFinder DIFinder; DISubprogram *SPClonedWithinModule = CollectDebugInfoForCloning( F, CloneFunctionChangeType::LocalChangesOnly, DIFinder); return FindDebugInfoToIdentityMap(CloneFunctionChangeType::LocalChangesOnly, DIFinder, SPClonedWithinModule); } } // end anonymous namespace // FIXME: // Lower the intrinisc in CoroEarly phase if coroutine frame doesn't escape // and it is known that other transformations, for example, sanitizers // won't lead to incorrect code. static void lowerAwaitSuspend(IRBuilder<> &Builder, CoroAwaitSuspendInst *CB, coro::Shape &Shape) { auto Wrapper = CB->getWrapperFunction(); auto Awaiter = CB->getAwaiter(); auto FramePtr = CB->getFrame(); Builder.SetInsertPoint(CB); CallBase *NewCall = nullptr; // await_suspend has only 2 parameters, awaiter and handle. // Copy parameter attributes from the intrinsic call, but remove the last, // because the last parameter now becomes the function that is being called. AttributeList NewAttributes = CB->getAttributes().removeParamAttributes(CB->getContext(), 2); if (auto Invoke = dyn_cast(CB)) { auto WrapperInvoke = Builder.CreateInvoke(Wrapper, Invoke->getNormalDest(), Invoke->getUnwindDest(), {Awaiter, FramePtr}); WrapperInvoke->setCallingConv(Invoke->getCallingConv()); std::copy(Invoke->bundle_op_info_begin(), Invoke->bundle_op_info_end(), WrapperInvoke->bundle_op_info_begin()); WrapperInvoke->setAttributes(NewAttributes); WrapperInvoke->setDebugLoc(Invoke->getDebugLoc()); NewCall = WrapperInvoke; } else if (auto Call = dyn_cast(CB)) { auto WrapperCall = Builder.CreateCall(Wrapper, {Awaiter, FramePtr}); WrapperCall->setAttributes(NewAttributes); WrapperCall->setDebugLoc(Call->getDebugLoc()); NewCall = WrapperCall; } else { llvm_unreachable("Unexpected coro_await_suspend invocation method"); } if (CB->getCalledFunction()->getIntrinsicID() == Intrinsic::coro_await_suspend_handle) { // Follow the lowered await_suspend call above with a lowered resume call // to the returned coroutine. if (auto *Invoke = dyn_cast(CB)) { // If the await_suspend call is an invoke, we continue in the next block. Builder.SetInsertPoint(Invoke->getNormalDest()->getFirstInsertionPt()); } coro::LowererBase LB(*Wrapper->getParent()); auto *ResumeAddr = LB.makeSubFnCall(NewCall, CoroSubFnInst::ResumeIndex, &*Builder.GetInsertPoint()); LLVMContext &Ctx = Builder.getContext(); FunctionType *ResumeTy = FunctionType::get( Type::getVoidTy(Ctx), PointerType::getUnqual(Ctx), false); auto *ResumeCall = Builder.CreateCall(ResumeTy, ResumeAddr, {NewCall}); ResumeCall->setCallingConv(CallingConv::Fast); // We can't insert the 'ret' instruction and adjust the cc until the // function has been split, so remember this for later. Shape.SymmetricTransfers.push_back(ResumeCall); NewCall = ResumeCall; } CB->replaceAllUsesWith(NewCall); CB->eraseFromParent(); } static void lowerAwaitSuspends(Function &F, coro::Shape &Shape) { IRBuilder<> Builder(F.getContext()); for (auto *AWS : Shape.CoroAwaitSuspends) lowerAwaitSuspend(Builder, AWS, Shape); } static void maybeFreeRetconStorage(IRBuilder<> &Builder, const coro::Shape &Shape, Value *FramePtr, CallGraph *CG) { assert(Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce); if (Shape.RetconLowering.IsFrameInlineInStorage) return; Shape.emitDealloc(Builder, FramePtr, CG); } /// Replace an llvm.coro.end.async. /// Will inline the must tail call function call if there is one. /// \returns true if cleanup of the coro.end block is needed, false otherwise. static bool replaceCoroEndAsync(AnyCoroEndInst *End) { IRBuilder<> Builder(End); auto *EndAsync = dyn_cast(End); if (!EndAsync) { Builder.CreateRetVoid(); return true /*needs cleanup of coro.end block*/; } auto *MustTailCallFunc = EndAsync->getMustTailCallFunction(); if (!MustTailCallFunc) { Builder.CreateRetVoid(); return true /*needs cleanup of coro.end block*/; } // Move the must tail call from the predecessor block into the end block. auto *CoroEndBlock = End->getParent(); auto *MustTailCallFuncBlock = CoroEndBlock->getSinglePredecessor(); assert(MustTailCallFuncBlock && "Must have a single predecessor block"); auto It = MustTailCallFuncBlock->getTerminator()->getIterator(); auto *MustTailCall = cast(&*std::prev(It)); CoroEndBlock->splice(End->getIterator(), MustTailCallFuncBlock, MustTailCall->getIterator()); // Insert the return instruction. Builder.SetInsertPoint(End); Builder.CreateRetVoid(); InlineFunctionInfo FnInfo; // Remove the rest of the block, by splitting it into an unreachable block. auto *BB = End->getParent(); BB->splitBasicBlock(End); BB->getTerminator()->eraseFromParent(); auto InlineRes = InlineFunction(*MustTailCall, FnInfo); assert(InlineRes.isSuccess() && "Expected inlining to succeed"); (void)InlineRes; // We have cleaned up the coro.end block above. return false; } /// Replace a non-unwind call to llvm.coro.end. static void replaceFallthroughCoroEnd(AnyCoroEndInst *End, const coro::Shape &Shape, Value *FramePtr, bool InResume, CallGraph *CG) { // Start inserting right before the coro.end. IRBuilder<> Builder(End); // Create the return instruction. switch (Shape.ABI) { // The cloned functions in switch-lowering always return void. case coro::ABI::Switch: assert(!cast(End)->hasResults() && "switch coroutine should not return any values"); // coro.end doesn't immediately end the coroutine in the main function // in this lowering, because we need to deallocate the coroutine. if (!InResume) return; Builder.CreateRetVoid(); break; // In async lowering this returns. case coro::ABI::Async: { bool CoroEndBlockNeedsCleanup = replaceCoroEndAsync(End); if (!CoroEndBlockNeedsCleanup) return; break; } // In unique continuation lowering, the continuations always return void. // But we may have implicitly allocated storage. case coro::ABI::RetconOnce: { maybeFreeRetconStorage(Builder, Shape, FramePtr, CG); auto *CoroEnd = cast(End); auto *RetTy = Shape.getResumeFunctionType()->getReturnType(); if (!CoroEnd->hasResults()) { assert(RetTy->isVoidTy()); Builder.CreateRetVoid(); break; } auto *CoroResults = CoroEnd->getResults(); unsigned NumReturns = CoroResults->numReturns(); if (auto *RetStructTy = dyn_cast(RetTy)) { assert(RetStructTy->getNumElements() == NumReturns && "numbers of returns should match resume function singature"); Value *ReturnValue = PoisonValue::get(RetStructTy); unsigned Idx = 0; for (Value *RetValEl : CoroResults->return_values()) ReturnValue = Builder.CreateInsertValue(ReturnValue, RetValEl, Idx++); Builder.CreateRet(ReturnValue); } else if (NumReturns == 0) { assert(RetTy->isVoidTy()); Builder.CreateRetVoid(); } else { assert(NumReturns == 1); Builder.CreateRet(*CoroResults->retval_begin()); } CoroResults->replaceAllUsesWith( ConstantTokenNone::get(CoroResults->getContext())); CoroResults->eraseFromParent(); break; } // In non-unique continuation lowering, we signal completion by returning // a null continuation. case coro::ABI::Retcon: { assert(!cast(End)->hasResults() && "retcon coroutine should not return any values"); maybeFreeRetconStorage(Builder, Shape, FramePtr, CG); auto RetTy = Shape.getResumeFunctionType()->getReturnType(); auto RetStructTy = dyn_cast(RetTy); PointerType *ContinuationTy = cast(RetStructTy ? RetStructTy->getElementType(0) : RetTy); Value *ReturnValue = ConstantPointerNull::get(ContinuationTy); if (RetStructTy) { ReturnValue = Builder.CreateInsertValue(PoisonValue::get(RetStructTy), ReturnValue, 0); } Builder.CreateRet(ReturnValue); break; } } // Remove the rest of the block, by splitting it into an unreachable block. auto *BB = End->getParent(); BB->splitBasicBlock(End); BB->getTerminator()->eraseFromParent(); } // Mark a coroutine as done, which implies that the coroutine is finished and // never get resumed. // // In resume-switched ABI, the done state is represented by storing zero in // ResumeFnAddr. // // NOTE: We couldn't omit the argument `FramePtr`. It is necessary because the // pointer to the frame in splitted function is not stored in `Shape`. static void markCoroutineAsDone(IRBuilder<> &Builder, const coro::Shape &Shape, Value *FramePtr) { assert( Shape.ABI == coro::ABI::Switch && "markCoroutineAsDone is only supported for Switch-Resumed ABI for now."); auto *GepIndex = Builder.CreateStructGEP( Shape.FrameTy, FramePtr, coro::Shape::SwitchFieldIndex::Resume, "ResumeFn.addr"); auto *NullPtr = ConstantPointerNull::get(cast( Shape.FrameTy->getTypeAtIndex(coro::Shape::SwitchFieldIndex::Resume))); Builder.CreateStore(NullPtr, GepIndex); // If the coroutine don't have unwind coro end, we could omit the store to // the final suspend point since we could infer the coroutine is suspended // at the final suspend point by the nullness of ResumeFnAddr. // However, we can't skip it if the coroutine have unwind coro end. Since // the coroutine reaches unwind coro end is considered suspended at the // final suspend point (the ResumeFnAddr is null) but in fact the coroutine // didn't complete yet. We need the IndexVal for the final suspend point // to make the states clear. if (Shape.SwitchLowering.HasUnwindCoroEnd && Shape.SwitchLowering.HasFinalSuspend) { assert(cast(Shape.CoroSuspends.back())->isFinal() && "The final suspend should only live in the last position of " "CoroSuspends."); ConstantInt *IndexVal = Shape.getIndex(Shape.CoroSuspends.size() - 1); auto *FinalIndex = Builder.CreateStructGEP( Shape.FrameTy, FramePtr, Shape.getSwitchIndexField(), "index.addr"); Builder.CreateStore(IndexVal, FinalIndex); } } /// Replace an unwind call to llvm.coro.end. static void replaceUnwindCoroEnd(AnyCoroEndInst *End, const coro::Shape &Shape, Value *FramePtr, bool InResume, CallGraph *CG) { IRBuilder<> Builder(End); switch (Shape.ABI) { // In switch-lowering, this does nothing in the main function. case coro::ABI::Switch: { // In C++'s specification, the coroutine should be marked as done // if promise.unhandled_exception() throws. The frontend will // call coro.end(true) along this path. // // FIXME: We should refactor this once there is other language // which uses Switch-Resumed style other than C++. markCoroutineAsDone(Builder, Shape, FramePtr); if (!InResume) return; break; } // In async lowering this does nothing. case coro::ABI::Async: break; // In continuation-lowering, this frees the continuation storage. case coro::ABI::Retcon: case coro::ABI::RetconOnce: maybeFreeRetconStorage(Builder, Shape, FramePtr, CG); break; } // If coro.end has an associated bundle, add cleanupret instruction. if (auto Bundle = End->getOperandBundle(LLVMContext::OB_funclet)) { auto *FromPad = cast(Bundle->Inputs[0]); auto *CleanupRet = Builder.CreateCleanupRet(FromPad, nullptr); End->getParent()->splitBasicBlock(End); CleanupRet->getParent()->getTerminator()->eraseFromParent(); } } static void replaceCoroEnd(AnyCoroEndInst *End, const coro::Shape &Shape, Value *FramePtr, bool InResume, CallGraph *CG) { if (End->isUnwind()) replaceUnwindCoroEnd(End, Shape, FramePtr, InResume, CG); else replaceFallthroughCoroEnd(End, Shape, FramePtr, InResume, CG); auto &Context = End->getContext(); End->replaceAllUsesWith(InResume ? ConstantInt::getTrue(Context) : ConstantInt::getFalse(Context)); End->eraseFromParent(); } // In the resume function, we remove the last case (when coro::Shape is built, // the final suspend point (if present) is always the last element of // CoroSuspends array) since it is an undefined behavior to resume a coroutine // suspended at the final suspend point. // In the destroy function, if it isn't possible that the ResumeFnAddr is NULL // and the coroutine doesn't suspend at the final suspend point actually (this // is possible since the coroutine is considered suspended at the final suspend // point if promise.unhandled_exception() exits via an exception), we can // remove the last case. void coro::BaseCloner::handleFinalSuspend() { assert(Shape.ABI == coro::ABI::Switch && Shape.SwitchLowering.HasFinalSuspend); if (isSwitchDestroyFunction() && Shape.SwitchLowering.HasUnwindCoroEnd) return; auto *Switch = cast(VMap[Shape.SwitchLowering.ResumeSwitch]); auto FinalCaseIt = std::prev(Switch->case_end()); BasicBlock *ResumeBB = FinalCaseIt->getCaseSuccessor(); Switch->removeCase(FinalCaseIt); if (isSwitchDestroyFunction()) { BasicBlock *OldSwitchBB = Switch->getParent(); auto *NewSwitchBB = OldSwitchBB->splitBasicBlock(Switch, "Switch"); Builder.SetInsertPoint(OldSwitchBB->getTerminator()); if (NewF->isCoroOnlyDestroyWhenComplete()) { // When the coroutine can only be destroyed when complete, we don't need // to generate code for other cases. Builder.CreateBr(ResumeBB); } else { auto *GepIndex = Builder.CreateStructGEP( Shape.FrameTy, NewFramePtr, coro::Shape::SwitchFieldIndex::Resume, "ResumeFn.addr"); auto *Load = Builder.CreateLoad(Shape.getSwitchResumePointerType(), GepIndex); auto *Cond = Builder.CreateIsNull(Load); Builder.CreateCondBr(Cond, ResumeBB, NewSwitchBB); } OldSwitchBB->getTerminator()->eraseFromParent(); } } static FunctionType * getFunctionTypeFromAsyncSuspend(AnyCoroSuspendInst *Suspend) { auto *AsyncSuspend = cast(Suspend); auto *StructTy = cast(AsyncSuspend->getType()); auto &Context = Suspend->getParent()->getParent()->getContext(); auto *VoidTy = Type::getVoidTy(Context); return FunctionType::get(VoidTy, StructTy->elements(), false); } static Function *createCloneDeclaration(Function &OrigF, coro::Shape &Shape, const Twine &Suffix, Module::iterator InsertBefore, AnyCoroSuspendInst *ActiveSuspend) { Module *M = OrigF.getParent(); auto *FnTy = (Shape.ABI != coro::ABI::Async) ? Shape.getResumeFunctionType() : getFunctionTypeFromAsyncSuspend(ActiveSuspend); Function *NewF = Function::Create(FnTy, GlobalValue::LinkageTypes::InternalLinkage, OrigF.getName() + Suffix); M->getFunctionList().insert(InsertBefore, NewF); return NewF; } /// Replace uses of the active llvm.coro.suspend.retcon/async call with the /// arguments to the continuation function. /// /// This assumes that the builder has a meaningful insertion point. void coro::BaseCloner::replaceRetconOrAsyncSuspendUses() { assert(Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce || Shape.ABI == coro::ABI::Async); auto NewS = VMap[ActiveSuspend]; if (NewS->use_empty()) return; // Copy out all the continuation arguments after the buffer pointer into // an easily-indexed data structure for convenience. SmallVector Args; // The async ABI includes all arguments -- including the first argument. bool IsAsyncABI = Shape.ABI == coro::ABI::Async; for (auto I = IsAsyncABI ? NewF->arg_begin() : std::next(NewF->arg_begin()), E = NewF->arg_end(); I != E; ++I) Args.push_back(&*I); // If the suspend returns a single scalar value, we can just do a simple // replacement. if (!isa(NewS->getType())) { assert(Args.size() == 1); NewS->replaceAllUsesWith(Args.front()); return; } // Try to peephole extracts of an aggregate return. for (Use &U : llvm::make_early_inc_range(NewS->uses())) { auto *EVI = dyn_cast(U.getUser()); if (!EVI || EVI->getNumIndices() != 1) continue; EVI->replaceAllUsesWith(Args[EVI->getIndices().front()]); EVI->eraseFromParent(); } // If we have no remaining uses, we're done. if (NewS->use_empty()) return; // Otherwise, we need to create an aggregate. Value *Aggr = PoisonValue::get(NewS->getType()); for (auto [Idx, Arg] : llvm::enumerate(Args)) Aggr = Builder.CreateInsertValue(Aggr, Arg, Idx); NewS->replaceAllUsesWith(Aggr); } void coro::BaseCloner::replaceCoroSuspends() { Value *SuspendResult; switch (Shape.ABI) { // In switch lowering, replace coro.suspend with the appropriate value // for the type of function we're extracting. // Replacing coro.suspend with (0) will result in control flow proceeding to // a resume label associated with a suspend point, replacing it with (1) will // result in control flow proceeding to a cleanup label associated with this // suspend point. case coro::ABI::Switch: SuspendResult = Builder.getInt8(isSwitchDestroyFunction() ? 1 : 0); break; // In async lowering there are no uses of the result. case coro::ABI::Async: return; // In returned-continuation lowering, the arguments from earlier // continuations are theoretically arbitrary, and they should have been // spilled. case coro::ABI::RetconOnce: case coro::ABI::Retcon: return; } for (AnyCoroSuspendInst *CS : Shape.CoroSuspends) { // The active suspend was handled earlier. if (CS == ActiveSuspend) continue; auto *MappedCS = cast(VMap[CS]); MappedCS->replaceAllUsesWith(SuspendResult); MappedCS->eraseFromParent(); } } void coro::BaseCloner::replaceCoroEnds() { for (AnyCoroEndInst *CE : Shape.CoroEnds) { // We use a null call graph because there's no call graph node for // the cloned function yet. We'll just be rebuilding that later. auto *NewCE = cast(VMap[CE]); replaceCoroEnd(NewCE, Shape, NewFramePtr, /*in resume*/ true, nullptr); } } static void replaceSwiftErrorOps(Function &F, coro::Shape &Shape, ValueToValueMapTy *VMap) { if (Shape.ABI == coro::ABI::Async && Shape.CoroSuspends.empty()) return; Value *CachedSlot = nullptr; auto getSwiftErrorSlot = [&](Type *ValueTy) -> Value * { if (CachedSlot) return CachedSlot; // Check if the function has a swifterror argument. for (auto &Arg : F.args()) { if (Arg.isSwiftError()) { CachedSlot = &Arg; return &Arg; } } // Create a swifterror alloca. IRBuilder<> Builder(&F.getEntryBlock(), F.getEntryBlock().getFirstNonPHIOrDbg()); auto Alloca = Builder.CreateAlloca(ValueTy); Alloca->setSwiftError(true); CachedSlot = Alloca; return Alloca; }; for (CallInst *Op : Shape.SwiftErrorOps) { auto MappedOp = VMap ? cast((*VMap)[Op]) : Op; IRBuilder<> Builder(MappedOp); // If there are no arguments, this is a 'get' operation. Value *MappedResult; if (Op->arg_empty()) { auto ValueTy = Op->getType(); auto Slot = getSwiftErrorSlot(ValueTy); MappedResult = Builder.CreateLoad(ValueTy, Slot); } else { assert(Op->arg_size() == 1); auto Value = MappedOp->getArgOperand(0); auto ValueTy = Value->getType(); auto Slot = getSwiftErrorSlot(ValueTy); Builder.CreateStore(Value, Slot); MappedResult = Slot; } MappedOp->replaceAllUsesWith(MappedResult); MappedOp->eraseFromParent(); } // If we're updating the original function, we've invalidated SwiftErrorOps. if (VMap == nullptr) { Shape.SwiftErrorOps.clear(); } } /// Returns all DbgVariableIntrinsic in F. static std::pair, SmallVector> collectDbgVariableIntrinsics(Function &F) { SmallVector Intrinsics; SmallVector DbgVariableRecords; for (auto &I : instructions(F)) { for (DbgVariableRecord &DVR : filterDbgVars(I.getDbgRecordRange())) DbgVariableRecords.push_back(&DVR); if (auto *DVI = dyn_cast(&I)) Intrinsics.push_back(DVI); } return {Intrinsics, DbgVariableRecords}; } void coro::BaseCloner::replaceSwiftErrorOps() { ::replaceSwiftErrorOps(*NewF, Shape, &VMap); } void coro::BaseCloner::salvageDebugInfo() { auto [Worklist, DbgVariableRecords] = collectDbgVariableIntrinsics(*NewF); SmallDenseMap ArgToAllocaMap; // Only 64-bit ABIs have a register we can refer to with the entry value. bool UseEntryValue = llvm::Triple(OrigF.getParent()->getTargetTriple()).isArch64Bit(); for (DbgVariableIntrinsic *DVI : Worklist) coro::salvageDebugInfo(ArgToAllocaMap, *DVI, UseEntryValue); for (DbgVariableRecord *DVR : DbgVariableRecords) coro::salvageDebugInfo(ArgToAllocaMap, *DVR, UseEntryValue); // Remove all salvaged dbg.declare intrinsics that became // either unreachable or stale due to the CoroSplit transformation. DominatorTree DomTree(*NewF); auto IsUnreachableBlock = [&](BasicBlock *BB) { return !isPotentiallyReachable(&NewF->getEntryBlock(), BB, nullptr, &DomTree); }; auto RemoveOne = [&](auto *DVI) { if (IsUnreachableBlock(DVI->getParent())) DVI->eraseFromParent(); else if (isa_and_nonnull(DVI->getVariableLocationOp(0))) { // Count all non-debuginfo uses in reachable blocks. unsigned Uses = 0; for (auto *User : DVI->getVariableLocationOp(0)->users()) if (auto *I = dyn_cast(User)) if (!isa(I) && !IsUnreachableBlock(I->getParent())) ++Uses; if (!Uses) DVI->eraseFromParent(); } }; for_each(Worklist, RemoveOne); for_each(DbgVariableRecords, RemoveOne); } void coro::BaseCloner::replaceEntryBlock() { // In the original function, the AllocaSpillBlock is a block immediately // following the allocation of the frame object which defines GEPs for // all the allocas that have been moved into the frame, and it ends by // branching to the original beginning of the coroutine. Make this // the entry block of the cloned function. auto *Entry = cast(VMap[Shape.AllocaSpillBlock]); auto *OldEntry = &NewF->getEntryBlock(); Entry->setName("entry" + Suffix); Entry->moveBefore(OldEntry); Entry->getTerminator()->eraseFromParent(); // Clear all predecessors of the new entry block. There should be // exactly one predecessor, which we created when splitting out // AllocaSpillBlock to begin with. assert(Entry->hasOneUse()); auto BranchToEntry = cast(Entry->user_back()); assert(BranchToEntry->isUnconditional()); Builder.SetInsertPoint(BranchToEntry); Builder.CreateUnreachable(); BranchToEntry->eraseFromParent(); // Branch from the entry to the appropriate place. Builder.SetInsertPoint(Entry); switch (Shape.ABI) { case coro::ABI::Switch: { // In switch-lowering, we built a resume-entry block in the original // function. Make the entry block branch to this. auto *SwitchBB = cast(VMap[Shape.SwitchLowering.ResumeEntryBlock]); Builder.CreateBr(SwitchBB); break; } case coro::ABI::Async: case coro::ABI::Retcon: case coro::ABI::RetconOnce: { // In continuation ABIs, we want to branch to immediately after the // active suspend point. Earlier phases will have put the suspend in its // own basic block, so just thread our jump directly to its successor. assert((Shape.ABI == coro::ABI::Async && isa(ActiveSuspend)) || ((Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce) && isa(ActiveSuspend))); auto *MappedCS = cast(VMap[ActiveSuspend]); auto Branch = cast(MappedCS->getNextNode()); assert(Branch->isUnconditional()); Builder.CreateBr(Branch->getSuccessor(0)); break; } } // Any static alloca that's still being used but not reachable from the new // entry needs to be moved to the new entry. Function *F = OldEntry->getParent(); DominatorTree DT{*F}; for (Instruction &I : llvm::make_early_inc_range(instructions(F))) { auto *Alloca = dyn_cast(&I); if (!Alloca || I.use_empty()) continue; if (DT.isReachableFromEntry(I.getParent()) || !isa(Alloca->getArraySize())) continue; I.moveBefore(*Entry, Entry->getFirstInsertionPt()); } } /// Derive the value of the new frame pointer. Value *coro::BaseCloner::deriveNewFramePointer() { // Builder should be inserting to the front of the new entry block. switch (Shape.ABI) { // In switch-lowering, the argument is the frame pointer. case coro::ABI::Switch: return &*NewF->arg_begin(); // In async-lowering, one of the arguments is an async context as determined // by the `llvm.coro.id.async` intrinsic. We can retrieve the async context of // the resume function from the async context projection function associated // with the active suspend. The frame is located as a tail to the async // context header. case coro::ABI::Async: { auto *ActiveAsyncSuspend = cast(ActiveSuspend); auto ContextIdx = ActiveAsyncSuspend->getStorageArgumentIndex() & 0xff; auto *CalleeContext = NewF->getArg(ContextIdx); auto *ProjectionFunc = ActiveAsyncSuspend->getAsyncContextProjectionFunction(); auto DbgLoc = cast(VMap[ActiveSuspend])->getDebugLoc(); // Calling i8* (i8*) auto *CallerContext = Builder.CreateCall(ProjectionFunc->getFunctionType(), ProjectionFunc, CalleeContext); CallerContext->setCallingConv(ProjectionFunc->getCallingConv()); CallerContext->setDebugLoc(DbgLoc); // The frame is located after the async_context header. auto &Context = Builder.getContext(); auto *FramePtrAddr = Builder.CreateConstInBoundsGEP1_32( Type::getInt8Ty(Context), CallerContext, Shape.AsyncLowering.FrameOffset, "async.ctx.frameptr"); // Inline the projection function. InlineFunctionInfo InlineInfo; auto InlineRes = InlineFunction(*CallerContext, InlineInfo); assert(InlineRes.isSuccess()); (void)InlineRes; return FramePtrAddr; } // In continuation-lowering, the argument is the opaque storage. case coro::ABI::Retcon: case coro::ABI::RetconOnce: { Argument *NewStorage = &*NewF->arg_begin(); auto FramePtrTy = PointerType::getUnqual(Shape.FrameTy->getContext()); // If the storage is inline, just bitcast to the storage to the frame type. if (Shape.RetconLowering.IsFrameInlineInStorage) return NewStorage; // Otherwise, load the real frame from the opaque storage. return Builder.CreateLoad(FramePtrTy, NewStorage); } } llvm_unreachable("bad ABI"); } /// Adjust the scope line of the funclet to the first line number after the /// suspend point. This avoids a jump in the line table from the function /// declaration (where prologue instructions are attributed to) to the suspend /// point. /// Only adjust the scope line when the files are the same. /// If no candidate line number is found, fallback to the line of ActiveSuspend. static void updateScopeLine(Instruction *ActiveSuspend, DISubprogram &SPToUpdate) { if (!ActiveSuspend) return; // No subsequent instruction -> fallback to the location of ActiveSuspend. if (!ActiveSuspend->getNextNonDebugInstruction()) { if (auto DL = ActiveSuspend->getDebugLoc()) if (SPToUpdate.getFile() == DL->getFile()) SPToUpdate.setScopeLine(DL->getLine()); return; } BasicBlock::iterator Successor = ActiveSuspend->getNextNonDebugInstruction()->getIterator(); // Corosplit splits the BB around ActiveSuspend, so the meaningful // instructions are not in the same BB. if (auto *Branch = dyn_cast_or_null(Successor); Branch && Branch->isUnconditional()) Successor = Branch->getSuccessor(0)->getFirstNonPHIOrDbg(); // Find the first successor of ActiveSuspend with a non-zero line location. // If that matches the file of ActiveSuspend, use it. BasicBlock *PBB = Successor->getParent(); for (; Successor != PBB->end(); Successor = std::next(Successor)) { Successor = skipDebugIntrinsics(Successor); auto DL = Successor->getDebugLoc(); if (!DL || DL.getLine() == 0) continue; if (SPToUpdate.getFile() == DL->getFile()) { SPToUpdate.setScopeLine(DL.getLine()); return; } break; } // If the search above failed, fallback to the location of ActiveSuspend. if (auto DL = ActiveSuspend->getDebugLoc()) if (SPToUpdate.getFile() == DL->getFile()) SPToUpdate.setScopeLine(DL->getLine()); } static void addFramePointerAttrs(AttributeList &Attrs, LLVMContext &Context, unsigned ParamIndex, uint64_t Size, Align Alignment, bool NoAlias) { AttrBuilder ParamAttrs(Context); ParamAttrs.addAttribute(Attribute::NonNull); ParamAttrs.addAttribute(Attribute::NoUndef); if (NoAlias) ParamAttrs.addAttribute(Attribute::NoAlias); ParamAttrs.addAlignmentAttr(Alignment); ParamAttrs.addDereferenceableAttr(Size); Attrs = Attrs.addParamAttributes(Context, ParamIndex, ParamAttrs); } static void addAsyncContextAttrs(AttributeList &Attrs, LLVMContext &Context, unsigned ParamIndex) { AttrBuilder ParamAttrs(Context); ParamAttrs.addAttribute(Attribute::SwiftAsync); Attrs = Attrs.addParamAttributes(Context, ParamIndex, ParamAttrs); } static void addSwiftSelfAttrs(AttributeList &Attrs, LLVMContext &Context, unsigned ParamIndex) { AttrBuilder ParamAttrs(Context); ParamAttrs.addAttribute(Attribute::SwiftSelf); Attrs = Attrs.addParamAttributes(Context, ParamIndex, ParamAttrs); } /// Clone the body of the original function into a resume function of /// some sort. void coro::BaseCloner::create() { assert(NewF); // Replace all args with dummy instructions. If an argument is the old frame // pointer, the dummy will be replaced by the new frame pointer once it is // computed below. Uses of all other arguments should have already been // rewritten by buildCoroutineFrame() to use loads/stores on the coroutine // frame. SmallVector DummyArgs; for (Argument &A : OrigF.args()) { DummyArgs.push_back(new FreezeInst(PoisonValue::get(A.getType()))); VMap[&A] = DummyArgs.back(); } SmallVector Returns; // Ignore attempts to change certain attributes of the function. // TODO: maybe there should be a way to suppress this during cloning? auto savedVisibility = NewF->getVisibility(); auto savedUnnamedAddr = NewF->getUnnamedAddr(); auto savedDLLStorageClass = NewF->getDLLStorageClass(); // NewF's linkage (which CloneFunctionInto does *not* change) might not // be compatible with the visibility of OrigF (which it *does* change), // so protect against that. auto savedLinkage = NewF->getLinkage(); NewF->setLinkage(llvm::GlobalValue::ExternalLinkage); CloneFunctionAttributesInto(NewF, &OrigF, VMap, false); CloneFunctionMetadataInto(*NewF, OrigF, VMap, RF_None, nullptr, nullptr, &CommonDebugInfo); CloneFunctionBodyInto(*NewF, OrigF, VMap, RF_None, Returns, "", nullptr, nullptr, nullptr, &CommonDebugInfo); auto &Context = NewF->getContext(); if (DISubprogram *SP = NewF->getSubprogram()) { assert(SP != OrigF.getSubprogram() && SP->isDistinct()); updateScopeLine(ActiveSuspend, *SP); // Update the linkage name to reflect the modified symbol name. It // is necessary to update the linkage name in Swift, since the // mangling changes for resume functions. It might also be the // right thing to do in C++, but due to a limitation in LLVM's // AsmPrinter we can only do this if the function doesn't have an // abstract specification, since the DWARF backend expects the // abstract specification to contain the linkage name and asserts // that they are identical. if (SP->getUnit() && SP->getUnit()->getSourceLanguage() == dwarf::DW_LANG_Swift) { SP->replaceLinkageName(MDString::get(Context, NewF->getName())); if (auto *Decl = SP->getDeclaration()) { auto *NewDecl = DISubprogram::get( Decl->getContext(), Decl->getScope(), Decl->getName(), NewF->getName(), Decl->getFile(), Decl->getLine(), Decl->getType(), Decl->getScopeLine(), Decl->getContainingType(), Decl->getVirtualIndex(), Decl->getThisAdjustment(), Decl->getFlags(), Decl->getSPFlags(), Decl->getUnit(), Decl->getTemplateParams(), nullptr, Decl->getRetainedNodes(), Decl->getThrownTypes(), Decl->getAnnotations(), Decl->getTargetFuncName()); SP->replaceDeclaration(NewDecl); } } } NewF->setLinkage(savedLinkage); NewF->setVisibility(savedVisibility); NewF->setUnnamedAddr(savedUnnamedAddr); NewF->setDLLStorageClass(savedDLLStorageClass); // The function sanitizer metadata needs to match the signature of the // function it is being attached to. However this does not hold for split // functions here. Thus remove the metadata for split functions. if (Shape.ABI == coro::ABI::Switch && NewF->hasMetadata(LLVMContext::MD_func_sanitize)) NewF->eraseMetadata(LLVMContext::MD_func_sanitize); // Replace the attributes of the new function: auto OrigAttrs = NewF->getAttributes(); auto NewAttrs = AttributeList(); switch (Shape.ABI) { case coro::ABI::Switch: // Bootstrap attributes by copying function attributes from the // original function. This should include optimization settings and so on. NewAttrs = NewAttrs.addFnAttributes( Context, AttrBuilder(Context, OrigAttrs.getFnAttrs())); addFramePointerAttrs(NewAttrs, Context, 0, Shape.FrameSize, Shape.FrameAlign, /*NoAlias=*/false); break; case coro::ABI::Async: { auto *ActiveAsyncSuspend = cast(ActiveSuspend); if (OrigF.hasParamAttribute(Shape.AsyncLowering.ContextArgNo, Attribute::SwiftAsync)) { uint32_t ArgAttributeIndices = ActiveAsyncSuspend->getStorageArgumentIndex(); auto ContextArgIndex = ArgAttributeIndices & 0xff; addAsyncContextAttrs(NewAttrs, Context, ContextArgIndex); // `swiftasync` must preceed `swiftself` so 0 is not a valid index for // `swiftself`. auto SwiftSelfIndex = ArgAttributeIndices >> 8; if (SwiftSelfIndex) addSwiftSelfAttrs(NewAttrs, Context, SwiftSelfIndex); } // Transfer the original function's attributes. auto FnAttrs = OrigF.getAttributes().getFnAttrs(); NewAttrs = NewAttrs.addFnAttributes(Context, AttrBuilder(Context, FnAttrs)); break; } case coro::ABI::Retcon: case coro::ABI::RetconOnce: // If we have a continuation prototype, just use its attributes, // full-stop. NewAttrs = Shape.RetconLowering.ResumePrototype->getAttributes(); /// FIXME: Is it really good to add the NoAlias attribute? addFramePointerAttrs(NewAttrs, Context, 0, Shape.getRetconCoroId()->getStorageSize(), Shape.getRetconCoroId()->getStorageAlignment(), /*NoAlias=*/true); break; } switch (Shape.ABI) { // In these ABIs, the cloned functions always return 'void', and the // existing return sites are meaningless. Note that for unique // continuations, this includes the returns associated with suspends; // this is fine because we can't suspend twice. case coro::ABI::Switch: case coro::ABI::RetconOnce: // Remove old returns. for (ReturnInst *Return : Returns) changeToUnreachable(Return); break; // With multi-suspend continuations, we'll already have eliminated the // original returns and inserted returns before all the suspend points, // so we want to leave any returns in place. case coro::ABI::Retcon: break; // Async lowering will insert musttail call functions at all suspend points // followed by a return. // Don't change returns to unreachable because that will trip up the verifier. // These returns should be unreachable from the clone. case coro::ABI::Async: break; } NewF->setAttributes(NewAttrs); NewF->setCallingConv(Shape.getResumeFunctionCC()); // Set up the new entry block. replaceEntryBlock(); // Turn symmetric transfers into musttail calls. for (CallInst *ResumeCall : Shape.SymmetricTransfers) { ResumeCall = cast(VMap[ResumeCall]); if (TTI.supportsTailCallFor(ResumeCall)) { // FIXME: Could we support symmetric transfer effectively without // musttail? ResumeCall->setTailCallKind(CallInst::TCK_MustTail); } // Put a 'ret void' after the call, and split any remaining instructions to // an unreachable block. BasicBlock *BB = ResumeCall->getParent(); BB->splitBasicBlock(ResumeCall->getNextNode()); Builder.SetInsertPoint(BB->getTerminator()); Builder.CreateRetVoid(); BB->getTerminator()->eraseFromParent(); } Builder.SetInsertPoint(&NewF->getEntryBlock().front()); NewFramePtr = deriveNewFramePointer(); // Remap frame pointer. Value *OldFramePtr = VMap[Shape.FramePtr]; NewFramePtr->takeName(OldFramePtr); OldFramePtr->replaceAllUsesWith(NewFramePtr); // Remap vFrame pointer. auto *NewVFrame = Builder.CreateBitCast( NewFramePtr, PointerType::getUnqual(Builder.getContext()), "vFrame"); Value *OldVFrame = cast(VMap[Shape.CoroBegin]); if (OldVFrame != NewVFrame) OldVFrame->replaceAllUsesWith(NewVFrame); // All uses of the arguments should have been resolved by this point, // so we can safely remove the dummy values. for (Instruction *DummyArg : DummyArgs) { DummyArg->replaceAllUsesWith(PoisonValue::get(DummyArg->getType())); DummyArg->deleteValue(); } switch (Shape.ABI) { case coro::ABI::Switch: // Rewrite final suspend handling as it is not done via switch (allows to // remove final case from the switch, since it is undefined behavior to // resume the coroutine suspended at the final suspend point. if (Shape.SwitchLowering.HasFinalSuspend) handleFinalSuspend(); break; case coro::ABI::Async: case coro::ABI::Retcon: case coro::ABI::RetconOnce: // Replace uses of the active suspend with the corresponding // continuation-function arguments. assert(ActiveSuspend != nullptr && "no active suspend when lowering a continuation-style coroutine"); replaceRetconOrAsyncSuspendUses(); break; } // Handle suspends. replaceCoroSuspends(); // Handle swifterror. replaceSwiftErrorOps(); // Remove coro.end intrinsics. replaceCoroEnds(); // Salvage debug info that points into the coroutine frame. salvageDebugInfo(); } void coro::SwitchCloner::create() { // Create a new function matching the original type NewF = createCloneDeclaration(OrigF, Shape, Suffix, OrigF.getParent()->end(), ActiveSuspend); // Clone the function coro::BaseCloner::create(); // Eliminate coro.free from the clones, replacing it with 'null' in cleanup, // to suppress deallocation code. coro::replaceCoroFree(cast(VMap[Shape.CoroBegin->getId()]), /*Elide=*/FKind == coro::CloneKind::SwitchCleanup); } static void updateAsyncFuncPointerContextSize(coro::Shape &Shape) { assert(Shape.ABI == coro::ABI::Async); auto *FuncPtrStruct = cast( Shape.AsyncLowering.AsyncFuncPointer->getInitializer()); auto *OrigRelativeFunOffset = FuncPtrStruct->getOperand(0); auto *OrigContextSize = FuncPtrStruct->getOperand(1); auto *NewContextSize = ConstantInt::get(OrigContextSize->getType(), Shape.AsyncLowering.ContextSize); auto *NewFuncPtrStruct = ConstantStruct::get( FuncPtrStruct->getType(), OrigRelativeFunOffset, NewContextSize); Shape.AsyncLowering.AsyncFuncPointer->setInitializer(NewFuncPtrStruct); } static TypeSize getFrameSizeForShape(coro::Shape &Shape) { // In the same function all coro.sizes should have the same result type. auto *SizeIntrin = Shape.CoroSizes.back(); Module *M = SizeIntrin->getModule(); const DataLayout &DL = M->getDataLayout(); return DL.getTypeAllocSize(Shape.FrameTy); } static void replaceFrameSizeAndAlignment(coro::Shape &Shape) { if (Shape.ABI == coro::ABI::Async) updateAsyncFuncPointerContextSize(Shape); for (CoroAlignInst *CA : Shape.CoroAligns) { CA->replaceAllUsesWith( ConstantInt::get(CA->getType(), Shape.FrameAlign.value())); CA->eraseFromParent(); } if (Shape.CoroSizes.empty()) return; // In the same function all coro.sizes should have the same result type. auto *SizeIntrin = Shape.CoroSizes.back(); auto *SizeConstant = ConstantInt::get(SizeIntrin->getType(), getFrameSizeForShape(Shape)); for (CoroSizeInst *CS : Shape.CoroSizes) { CS->replaceAllUsesWith(SizeConstant); CS->eraseFromParent(); } } static void postSplitCleanup(Function &F) { removeUnreachableBlocks(F); #ifndef NDEBUG // For now, we do a mandatory verification step because we don't // entirely trust this pass. Note that we don't want to add a verifier // pass to FPM below because it will also verify all the global data. if (verifyFunction(F, &errs())) report_fatal_error("Broken function"); #endif } // Coroutine has no suspend points. Remove heap allocation for the coroutine // frame if possible. static void handleNoSuspendCoroutine(coro::Shape &Shape) { auto *CoroBegin = Shape.CoroBegin; switch (Shape.ABI) { case coro::ABI::Switch: { auto SwitchId = Shape.getSwitchCoroId(); auto *AllocInst = SwitchId->getCoroAlloc(); coro::replaceCoroFree(SwitchId, /*Elide=*/AllocInst != nullptr); if (AllocInst) { IRBuilder<> Builder(AllocInst); auto *Frame = Builder.CreateAlloca(Shape.FrameTy); Frame->setAlignment(Shape.FrameAlign); AllocInst->replaceAllUsesWith(Builder.getFalse()); AllocInst->eraseFromParent(); CoroBegin->replaceAllUsesWith(Frame); } else { CoroBegin->replaceAllUsesWith(CoroBegin->getMem()); } break; } case coro::ABI::Async: case coro::ABI::Retcon: case coro::ABI::RetconOnce: CoroBegin->replaceAllUsesWith(PoisonValue::get(CoroBegin->getType())); break; } CoroBegin->eraseFromParent(); Shape.CoroBegin = nullptr; } // SimplifySuspendPoint needs to check that there is no calls between // coro_save and coro_suspend, since any of the calls may potentially resume // the coroutine and if that is the case we cannot eliminate the suspend point. static bool hasCallsInBlockBetween(iterator_range R) { for (Instruction &I : R) { // Assume that no intrinsic can resume the coroutine. if (isa(I)) continue; if (isa(I)) return true; } return false; } static bool hasCallsInBlocksBetween(BasicBlock *SaveBB, BasicBlock *ResDesBB) { SmallPtrSet Set; SmallVector Worklist; Set.insert(SaveBB); Worklist.push_back(ResDesBB); // Accumulate all blocks between SaveBB and ResDesBB. Because CoroSaveIntr // returns a token consumed by suspend instruction, all blocks in between // will have to eventually hit SaveBB when going backwards from ResDesBB. while (!Worklist.empty()) { auto *BB = Worklist.pop_back_val(); Set.insert(BB); for (auto *Pred : predecessors(BB)) if (!Set.contains(Pred)) Worklist.push_back(Pred); } // SaveBB and ResDesBB are checked separately in hasCallsBetween. Set.erase(SaveBB); Set.erase(ResDesBB); for (auto *BB : Set) if (hasCallsInBlockBetween({BB->getFirstNonPHIIt(), BB->end()})) return true; return false; } static bool hasCallsBetween(Instruction *Save, Instruction *ResumeOrDestroy) { auto *SaveBB = Save->getParent(); auto *ResumeOrDestroyBB = ResumeOrDestroy->getParent(); BasicBlock::iterator SaveIt = Save->getIterator(); BasicBlock::iterator ResumeOrDestroyIt = ResumeOrDestroy->getIterator(); if (SaveBB == ResumeOrDestroyBB) return hasCallsInBlockBetween({std::next(SaveIt), ResumeOrDestroyIt}); // Any calls from Save to the end of the block? if (hasCallsInBlockBetween({std::next(SaveIt), SaveBB->end()})) return true; // Any calls from begging of the block up to ResumeOrDestroy? if (hasCallsInBlockBetween( {ResumeOrDestroyBB->getFirstNonPHIIt(), ResumeOrDestroyIt})) return true; // Any calls in all of the blocks between SaveBB and ResumeOrDestroyBB? if (hasCallsInBlocksBetween(SaveBB, ResumeOrDestroyBB)) return true; return false; } // If a SuspendIntrin is preceded by Resume or Destroy, we can eliminate the // suspend point and replace it with nornal control flow. static bool simplifySuspendPoint(CoroSuspendInst *Suspend, CoroBeginInst *CoroBegin) { Instruction *Prev = Suspend->getPrevNode(); if (!Prev) { auto *Pred = Suspend->getParent()->getSinglePredecessor(); if (!Pred) return false; Prev = Pred->getTerminator(); } CallBase *CB = dyn_cast(Prev); if (!CB) return false; auto *Callee = CB->getCalledOperand()->stripPointerCasts(); // See if the callsite is for resumption or destruction of the coroutine. auto *SubFn = dyn_cast(Callee); if (!SubFn) return false; // Does not refer to the current coroutine, we cannot do anything with it. if (SubFn->getFrame() != CoroBegin) return false; // See if the transformation is safe. Specifically, see if there are any // calls in between Save and CallInstr. They can potenitally resume the // coroutine rendering this optimization unsafe. auto *Save = Suspend->getCoroSave(); if (hasCallsBetween(Save, CB)) return false; // Replace llvm.coro.suspend with the value that results in resumption over // the resume or cleanup path. Suspend->replaceAllUsesWith(SubFn->getRawIndex()); Suspend->eraseFromParent(); Save->eraseFromParent(); // No longer need a call to coro.resume or coro.destroy. if (auto *Invoke = dyn_cast(CB)) { BranchInst::Create(Invoke->getNormalDest(), Invoke->getIterator()); } // Grab the CalledValue from CB before erasing the CallInstr. auto *CalledValue = CB->getCalledOperand(); CB->eraseFromParent(); // If no more users remove it. Usually it is a bitcast of SubFn. if (CalledValue != SubFn && CalledValue->user_empty()) if (auto *I = dyn_cast(CalledValue)) I->eraseFromParent(); // Now we are good to remove SubFn. if (SubFn->user_empty()) SubFn->eraseFromParent(); return true; } // Remove suspend points that are simplified. static void simplifySuspendPoints(coro::Shape &Shape) { // Currently, the only simplification we do is switch-lowering-specific. if (Shape.ABI != coro::ABI::Switch) return; auto &S = Shape.CoroSuspends; size_t I = 0, N = S.size(); if (N == 0) return; size_t ChangedFinalIndex = std::numeric_limits::max(); while (true) { auto SI = cast(S[I]); // Leave final.suspend to handleFinalSuspend since it is undefined behavior // to resume a coroutine suspended at the final suspend point. if (!SI->isFinal() && simplifySuspendPoint(SI, Shape.CoroBegin)) { if (--N == I) break; std::swap(S[I], S[N]); if (cast(S[I])->isFinal()) { assert(Shape.SwitchLowering.HasFinalSuspend); ChangedFinalIndex = I; } continue; } if (++I == N) break; } S.resize(N); // Maintain final.suspend in case final suspend was swapped. // Due to we requrie the final suspend to be the last element of CoroSuspends. if (ChangedFinalIndex < N) { assert(cast(S[ChangedFinalIndex])->isFinal()); std::swap(S[ChangedFinalIndex], S.back()); } } namespace { struct SwitchCoroutineSplitter { static void split(Function &F, coro::Shape &Shape, SmallVectorImpl &Clones, TargetTransformInfo &TTI) { assert(Shape.ABI == coro::ABI::Switch); MetadataSetTy CommonDebugInfo{collectCommonDebugInfo(F)}; // Create a resume clone by cloning the body of the original function, // setting new entry block and replacing coro.suspend an appropriate value // to force resume or cleanup pass for every suspend point. createResumeEntryBlock(F, Shape); auto *ResumeClone = coro::SwitchCloner::createClone( F, ".resume", Shape, coro::CloneKind::SwitchResume, TTI, CommonDebugInfo); auto *DestroyClone = coro::SwitchCloner::createClone( F, ".destroy", Shape, coro::CloneKind::SwitchUnwind, TTI, CommonDebugInfo); auto *CleanupClone = coro::SwitchCloner::createClone( F, ".cleanup", Shape, coro::CloneKind::SwitchCleanup, TTI, CommonDebugInfo); postSplitCleanup(*ResumeClone); postSplitCleanup(*DestroyClone); postSplitCleanup(*CleanupClone); // Store addresses resume/destroy/cleanup functions in the coroutine frame. updateCoroFrame(Shape, ResumeClone, DestroyClone, CleanupClone); assert(Clones.empty()); Clones.push_back(ResumeClone); Clones.push_back(DestroyClone); Clones.push_back(CleanupClone); // Create a constant array referring to resume/destroy/clone functions // pointed by the last argument of @llvm.coro.info, so that CoroElide pass // can determined correct function to call. setCoroInfo(F, Shape, Clones); } // Create a variant of ramp function that does not perform heap allocation // for a switch ABI coroutine. // // The newly split `.noalloc` ramp function has the following differences: // - Has one additional frame pointer parameter in lieu of dynamic // allocation. // - Suppressed allocations by replacing coro.alloc and coro.free. static Function *createNoAllocVariant(Function &F, coro::Shape &Shape, SmallVectorImpl &Clones) { assert(Shape.ABI == coro::ABI::Switch); auto *OrigFnTy = F.getFunctionType(); auto OldParams = OrigFnTy->params(); SmallVector NewParams; NewParams.reserve(OldParams.size() + 1); NewParams.append(OldParams.begin(), OldParams.end()); NewParams.push_back(PointerType::getUnqual(Shape.FrameTy->getContext())); auto *NewFnTy = FunctionType::get(OrigFnTy->getReturnType(), NewParams, OrigFnTy->isVarArg()); Function *NoAllocF = Function::Create(NewFnTy, F.getLinkage(), F.getName() + ".noalloc"); ValueToValueMapTy VMap; unsigned int Idx = 0; for (const auto &I : F.args()) { VMap[&I] = NoAllocF->getArg(Idx++); } // We just appended the frame pointer as the last argument of the new // function. auto FrameIdx = NoAllocF->arg_size() - 1; SmallVector Returns; CloneFunctionInto(NoAllocF, &F, VMap, CloneFunctionChangeType::LocalChangesOnly, Returns); if (Shape.CoroBegin) { auto *NewCoroBegin = cast_if_present(VMap[Shape.CoroBegin]); auto *NewCoroId = cast(NewCoroBegin->getId()); coro::replaceCoroFree(NewCoroId, /*Elide=*/true); coro::suppressCoroAllocs(NewCoroId); NewCoroBegin->replaceAllUsesWith(NoAllocF->getArg(FrameIdx)); NewCoroBegin->eraseFromParent(); } Module *M = F.getParent(); M->getFunctionList().insert(M->end(), NoAllocF); removeUnreachableBlocks(*NoAllocF); auto NewAttrs = NoAllocF->getAttributes(); // When we elide allocation, we read these attributes to determine the // frame size and alignment. addFramePointerAttrs(NewAttrs, NoAllocF->getContext(), FrameIdx, Shape.FrameSize, Shape.FrameAlign, /*NoAlias=*/false); NoAllocF->setAttributes(NewAttrs); Clones.push_back(NoAllocF); // Reset the original function's coro info, make the new noalloc variant // connected to the original ramp function. setCoroInfo(F, Shape, Clones); // After copying, set the linkage to internal linkage. Original function // may have different linkage, but optimization dependent on this function // generally relies on LTO. NoAllocF->setLinkage(llvm::GlobalValue::InternalLinkage); return NoAllocF; } private: // Create an entry block for a resume function with a switch that will jump to // suspend points. static void createResumeEntryBlock(Function &F, coro::Shape &Shape) { LLVMContext &C = F.getContext(); // resume.entry: // %index.addr = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 // 0, i32 2 % index = load i32, i32* %index.addr switch i32 %index, label // %unreachable [ // i32 0, label %resume.0 // i32 1, label %resume.1 // ... // ] auto *NewEntry = BasicBlock::Create(C, "resume.entry", &F); auto *UnreachBB = BasicBlock::Create(C, "unreachable", &F); IRBuilder<> Builder(NewEntry); auto *FramePtr = Shape.FramePtr; auto *FrameTy = Shape.FrameTy; auto *GepIndex = Builder.CreateStructGEP( FrameTy, FramePtr, Shape.getSwitchIndexField(), "index.addr"); auto *Index = Builder.CreateLoad(Shape.getIndexType(), GepIndex, "index"); auto *Switch = Builder.CreateSwitch(Index, UnreachBB, Shape.CoroSuspends.size()); Shape.SwitchLowering.ResumeSwitch = Switch; size_t SuspendIndex = 0; for (auto *AnyS : Shape.CoroSuspends) { auto *S = cast(AnyS); ConstantInt *IndexVal = Shape.getIndex(SuspendIndex); // Replace CoroSave with a store to Index: // %index.addr = getelementptr %f.frame... (index field number) // store i32 %IndexVal, i32* %index.addr1 auto *Save = S->getCoroSave(); Builder.SetInsertPoint(Save); if (S->isFinal()) { // The coroutine should be marked done if it reaches the final suspend // point. markCoroutineAsDone(Builder, Shape, FramePtr); } else { auto *GepIndex = Builder.CreateStructGEP( FrameTy, FramePtr, Shape.getSwitchIndexField(), "index.addr"); Builder.CreateStore(IndexVal, GepIndex); } Save->replaceAllUsesWith(ConstantTokenNone::get(C)); Save->eraseFromParent(); // Split block before and after coro.suspend and add a jump from an entry // switch: // // whateverBB: // whatever // %0 = call i8 @llvm.coro.suspend(token none, i1 false) // switch i8 %0, label %suspend[i8 0, label %resume // i8 1, label %cleanup] // becomes: // // whateverBB: // whatever // br label %resume.0.landing // // resume.0: ; <--- jump from the switch in the resume.entry // %0 = tail call i8 @llvm.coro.suspend(token none, i1 false) // br label %resume.0.landing // // resume.0.landing: // %1 = phi i8[-1, %whateverBB], [%0, %resume.0] // switch i8 % 1, label %suspend [i8 0, label %resume // i8 1, label %cleanup] auto *SuspendBB = S->getParent(); auto *ResumeBB = SuspendBB->splitBasicBlock(S, "resume." + Twine(SuspendIndex)); auto *LandingBB = ResumeBB->splitBasicBlock( S->getNextNode(), ResumeBB->getName() + Twine(".landing")); Switch->addCase(IndexVal, ResumeBB); cast(SuspendBB->getTerminator())->setSuccessor(0, LandingBB); auto *PN = PHINode::Create(Builder.getInt8Ty(), 2, ""); PN->insertBefore(LandingBB->begin()); S->replaceAllUsesWith(PN); PN->addIncoming(Builder.getInt8(-1), SuspendBB); PN->addIncoming(S, ResumeBB); ++SuspendIndex; } Builder.SetInsertPoint(UnreachBB); Builder.CreateUnreachable(); Shape.SwitchLowering.ResumeEntryBlock = NewEntry; } // Store addresses of Resume/Destroy/Cleanup functions in the coroutine frame. static void updateCoroFrame(coro::Shape &Shape, Function *ResumeFn, Function *DestroyFn, Function *CleanupFn) { IRBuilder<> Builder(&*Shape.getInsertPtAfterFramePtr()); auto *ResumeAddr = Builder.CreateStructGEP( Shape.FrameTy, Shape.FramePtr, coro::Shape::SwitchFieldIndex::Resume, "resume.addr"); Builder.CreateStore(ResumeFn, ResumeAddr); Value *DestroyOrCleanupFn = DestroyFn; CoroIdInst *CoroId = Shape.getSwitchCoroId(); if (CoroAllocInst *CA = CoroId->getCoroAlloc()) { // If there is a CoroAlloc and it returns false (meaning we elide the // allocation, use CleanupFn instead of DestroyFn). DestroyOrCleanupFn = Builder.CreateSelect(CA, DestroyFn, CleanupFn); } auto *DestroyAddr = Builder.CreateStructGEP( Shape.FrameTy, Shape.FramePtr, coro::Shape::SwitchFieldIndex::Destroy, "destroy.addr"); Builder.CreateStore(DestroyOrCleanupFn, DestroyAddr); } // Create a global constant array containing pointers to functions provided // and set Info parameter of CoroBegin to point at this constant. Example: // // @f.resumers = internal constant [2 x void(%f.frame*)*] // [void(%f.frame*)* @f.resume, void(%f.frame*)* // @f.destroy] // define void @f() { // ... // call i8* @llvm.coro.begin(i8* null, i32 0, i8* null, // i8* bitcast([2 x void(%f.frame*)*] * @f.resumers to // i8*)) // // Assumes that all the functions have the same signature. static void setCoroInfo(Function &F, coro::Shape &Shape, ArrayRef Fns) { // This only works under the switch-lowering ABI because coro elision // only works on the switch-lowering ABI. SmallVector Args(Fns); assert(!Args.empty()); Function *Part = *Fns.begin(); Module *M = Part->getParent(); auto *ArrTy = ArrayType::get(Part->getType(), Args.size()); auto *ConstVal = ConstantArray::get(ArrTy, Args); auto *GV = new GlobalVariable(*M, ConstVal->getType(), /*isConstant=*/true, GlobalVariable::PrivateLinkage, ConstVal, F.getName() + Twine(".resumers")); // Update coro.begin instruction to refer to this constant. LLVMContext &C = F.getContext(); auto *BC = ConstantExpr::getPointerCast(GV, PointerType::getUnqual(C)); Shape.getSwitchCoroId()->setInfo(BC); } }; } // namespace static void replaceAsyncResumeFunction(CoroSuspendAsyncInst *Suspend, Value *Continuation) { auto *ResumeIntrinsic = Suspend->getResumeFunction(); auto &Context = Suspend->getParent()->getParent()->getContext(); auto *Int8PtrTy = PointerType::getUnqual(Context); IRBuilder<> Builder(ResumeIntrinsic); auto *Val = Builder.CreateBitOrPointerCast(Continuation, Int8PtrTy); ResumeIntrinsic->replaceAllUsesWith(Val); ResumeIntrinsic->eraseFromParent(); Suspend->setOperand(CoroSuspendAsyncInst::ResumeFunctionArg, PoisonValue::get(Int8PtrTy)); } /// Coerce the arguments in \p FnArgs according to \p FnTy in \p CallArgs. static void coerceArguments(IRBuilder<> &Builder, FunctionType *FnTy, ArrayRef FnArgs, SmallVectorImpl &CallArgs) { size_t ArgIdx = 0; for (auto *paramTy : FnTy->params()) { assert(ArgIdx < FnArgs.size()); if (paramTy != FnArgs[ArgIdx]->getType()) CallArgs.push_back( Builder.CreateBitOrPointerCast(FnArgs[ArgIdx], paramTy)); else CallArgs.push_back(FnArgs[ArgIdx]); ++ArgIdx; } } CallInst *coro::createMustTailCall(DebugLoc Loc, Function *MustTailCallFn, TargetTransformInfo &TTI, ArrayRef Arguments, IRBuilder<> &Builder) { auto *FnTy = MustTailCallFn->getFunctionType(); // Coerce the arguments, llvm optimizations seem to ignore the types in // vaarg functions and throws away casts in optimized mode. SmallVector CallArgs; coerceArguments(Builder, FnTy, Arguments, CallArgs); auto *TailCall = Builder.CreateCall(FnTy, MustTailCallFn, CallArgs); // Skip targets which don't support tail call. if (TTI.supportsTailCallFor(TailCall)) { TailCall->setTailCallKind(CallInst::TCK_MustTail); } TailCall->setDebugLoc(Loc); TailCall->setCallingConv(MustTailCallFn->getCallingConv()); return TailCall; } void coro::AsyncABI::splitCoroutine(Function &F, coro::Shape &Shape, SmallVectorImpl &Clones, TargetTransformInfo &TTI) { assert(Shape.ABI == coro::ABI::Async); assert(Clones.empty()); // Reset various things that the optimizer might have decided it // "knows" about the coroutine function due to not seeing a return. F.removeFnAttr(Attribute::NoReturn); F.removeRetAttr(Attribute::NoAlias); F.removeRetAttr(Attribute::NonNull); auto &Context = F.getContext(); auto *Int8PtrTy = PointerType::getUnqual(Context); auto *Id = Shape.getAsyncCoroId(); IRBuilder<> Builder(Id); auto *FramePtr = Id->getStorage(); FramePtr = Builder.CreateBitOrPointerCast(FramePtr, Int8PtrTy); FramePtr = Builder.CreateConstInBoundsGEP1_32( Type::getInt8Ty(Context), FramePtr, Shape.AsyncLowering.FrameOffset, "async.ctx.frameptr"); // Map all uses of llvm.coro.begin to the allocated frame pointer. { // Make sure we don't invalidate Shape.FramePtr. TrackingVH Handle(Shape.FramePtr); Shape.CoroBegin->replaceAllUsesWith(FramePtr); Shape.FramePtr = Handle.getValPtr(); } // Create all the functions in order after the main function. auto NextF = std::next(F.getIterator()); // Create a continuation function for each of the suspend points. Clones.reserve(Shape.CoroSuspends.size()); for (auto [Idx, CS] : llvm::enumerate(Shape.CoroSuspends)) { auto *Suspend = cast(CS); // Create the clone declaration. auto ResumeNameSuffix = ".resume."; auto ProjectionFunctionName = Suspend->getAsyncContextProjectionFunction()->getName(); bool UseSwiftMangling = false; if (ProjectionFunctionName == "__swift_async_resume_project_context") { ResumeNameSuffix = "TQ"; UseSwiftMangling = true; } else if (ProjectionFunctionName == "__swift_async_resume_get_context") { ResumeNameSuffix = "TY"; UseSwiftMangling = true; } auto *Continuation = createCloneDeclaration( F, Shape, UseSwiftMangling ? ResumeNameSuffix + Twine(Idx) + "_" : ResumeNameSuffix + Twine(Idx), NextF, Suspend); Clones.push_back(Continuation); // Insert a branch to a new return block immediately before the suspend // point. auto *SuspendBB = Suspend->getParent(); auto *NewSuspendBB = SuspendBB->splitBasicBlock(Suspend); auto *Branch = cast(SuspendBB->getTerminator()); // Place it before the first suspend. auto *ReturnBB = BasicBlock::Create(F.getContext(), "coro.return", &F, NewSuspendBB); Branch->setSuccessor(0, ReturnBB); IRBuilder<> Builder(ReturnBB); // Insert the call to the tail call function and inline it. auto *Fn = Suspend->getMustTailCallFunction(); SmallVector Args(Suspend->args()); auto FnArgs = ArrayRef(Args).drop_front( CoroSuspendAsyncInst::MustTailCallFuncArg + 1); auto *TailCall = coro::createMustTailCall(Suspend->getDebugLoc(), Fn, TTI, FnArgs, Builder); Builder.CreateRetVoid(); InlineFunctionInfo FnInfo; (void)InlineFunction(*TailCall, FnInfo); // Replace the lvm.coro.async.resume intrisic call. replaceAsyncResumeFunction(Suspend, Continuation); } assert(Clones.size() == Shape.CoroSuspends.size()); MetadataSetTy CommonDebugInfo{collectCommonDebugInfo(F)}; for (auto [Idx, CS] : llvm::enumerate(Shape.CoroSuspends)) { auto *Suspend = CS; auto *Clone = Clones[Idx]; coro::BaseCloner::createClone(F, "resume." + Twine(Idx), Shape, Clone, Suspend, TTI, CommonDebugInfo); } } void coro::AnyRetconABI::splitCoroutine(Function &F, coro::Shape &Shape, SmallVectorImpl &Clones, TargetTransformInfo &TTI) { assert(Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce); assert(Clones.empty()); // Reset various things that the optimizer might have decided it // "knows" about the coroutine function due to not seeing a return. F.removeFnAttr(Attribute::NoReturn); F.removeRetAttr(Attribute::NoAlias); F.removeRetAttr(Attribute::NonNull); // Allocate the frame. auto *Id = Shape.getRetconCoroId(); Value *RawFramePtr; if (Shape.RetconLowering.IsFrameInlineInStorage) { RawFramePtr = Id->getStorage(); } else { IRBuilder<> Builder(Id); // Determine the size of the frame. const DataLayout &DL = F.getDataLayout(); auto Size = DL.getTypeAllocSize(Shape.FrameTy); // Allocate. We don't need to update the call graph node because we're // going to recompute it from scratch after splitting. // FIXME: pass the required alignment RawFramePtr = Shape.emitAlloc(Builder, Builder.getInt64(Size), nullptr); RawFramePtr = Builder.CreateBitCast(RawFramePtr, Shape.CoroBegin->getType()); // Stash the allocated frame pointer in the continuation storage. Builder.CreateStore(RawFramePtr, Id->getStorage()); } // Map all uses of llvm.coro.begin to the allocated frame pointer. { // Make sure we don't invalidate Shape.FramePtr. TrackingVH Handle(Shape.FramePtr); Shape.CoroBegin->replaceAllUsesWith(RawFramePtr); Shape.FramePtr = Handle.getValPtr(); } // Create a unique return block. BasicBlock *ReturnBB = nullptr; PHINode *ContinuationPhi = nullptr; SmallVector ReturnPHIs; // Create all the functions in order after the main function. auto NextF = std::next(F.getIterator()); // Create a continuation function for each of the suspend points. Clones.reserve(Shape.CoroSuspends.size()); for (auto [Idx, CS] : llvm::enumerate(Shape.CoroSuspends)) { auto Suspend = cast(CS); // Create the clone declaration. auto Continuation = createCloneDeclaration( F, Shape, ".resume." + Twine(Idx), NextF, nullptr); Clones.push_back(Continuation); // Insert a branch to the unified return block immediately before // the suspend point. auto SuspendBB = Suspend->getParent(); auto NewSuspendBB = SuspendBB->splitBasicBlock(Suspend); auto Branch = cast(SuspendBB->getTerminator()); // Create the unified return block. if (!ReturnBB) { // Place it before the first suspend. ReturnBB = BasicBlock::Create(F.getContext(), "coro.return", &F, NewSuspendBB); Shape.RetconLowering.ReturnBlock = ReturnBB; IRBuilder<> Builder(ReturnBB); // First, the continuation. ContinuationPhi = Builder.CreatePHI(Continuation->getType(), Shape.CoroSuspends.size()); // Create PHIs for all other return values. assert(ReturnPHIs.empty()); // Next, all the directly-yielded values. for (auto *ResultTy : Shape.getRetconResultTypes()) ReturnPHIs.push_back( Builder.CreatePHI(ResultTy, Shape.CoroSuspends.size())); // Build the return value. auto RetTy = F.getReturnType(); // Cast the continuation value if necessary. // We can't rely on the types matching up because that type would // have to be infinite. auto CastedContinuationTy = (ReturnPHIs.empty() ? RetTy : RetTy->getStructElementType(0)); auto *CastedContinuation = Builder.CreateBitCast(ContinuationPhi, CastedContinuationTy); Value *RetV = CastedContinuation; if (!ReturnPHIs.empty()) { auto ValueIdx = 0; RetV = PoisonValue::get(RetTy); RetV = Builder.CreateInsertValue(RetV, CastedContinuation, ValueIdx++); for (auto Phi : ReturnPHIs) RetV = Builder.CreateInsertValue(RetV, Phi, ValueIdx++); } Builder.CreateRet(RetV); } // Branch to the return block. Branch->setSuccessor(0, ReturnBB); assert(ContinuationPhi); ContinuationPhi->addIncoming(Continuation, SuspendBB); for (auto [Phi, VUse] : llvm::zip_equal(ReturnPHIs, Suspend->value_operands())) Phi->addIncoming(VUse, SuspendBB); } assert(Clones.size() == Shape.CoroSuspends.size()); MetadataSetTy CommonDebugInfo{collectCommonDebugInfo(F)}; for (auto [Idx, CS] : llvm::enumerate(Shape.CoroSuspends)) { auto Suspend = CS; auto Clone = Clones[Idx]; coro::BaseCloner::createClone(F, "resume." + Twine(Idx), Shape, Clone, Suspend, TTI, CommonDebugInfo); } } namespace { class PrettyStackTraceFunction : public PrettyStackTraceEntry { Function &F; public: PrettyStackTraceFunction(Function &F) : F(F) {} void print(raw_ostream &OS) const override { OS << "While splitting coroutine "; F.printAsOperand(OS, /*print type*/ false, F.getParent()); OS << "\n"; } }; } // namespace /// Remove calls to llvm.coro.end in the original function. static void removeCoroEndsFromRampFunction(const coro::Shape &Shape) { if (Shape.ABI != coro::ABI::Switch) { for (auto *End : Shape.CoroEnds) { replaceCoroEnd(End, Shape, Shape.FramePtr, /*in resume*/ false, nullptr); } } else { for (llvm::AnyCoroEndInst *End : Shape.CoroEnds) { auto &Context = End->getContext(); End->replaceAllUsesWith(ConstantInt::getFalse(Context)); End->eraseFromParent(); } } } static bool hasSafeElideCaller(Function &F) { for (auto *U : F.users()) { if (auto *CB = dyn_cast(U)) { auto *Caller = CB->getFunction(); if (Caller && Caller->isPresplitCoroutine() && CB->hasFnAttr(llvm::Attribute::CoroElideSafe)) return true; } } return false; } void coro::SwitchABI::splitCoroutine(Function &F, coro::Shape &Shape, SmallVectorImpl &Clones, TargetTransformInfo &TTI) { SwitchCoroutineSplitter::split(F, Shape, Clones, TTI); } static void doSplitCoroutine(Function &F, SmallVectorImpl &Clones, coro::BaseABI &ABI, TargetTransformInfo &TTI, bool OptimizeFrame) { PrettyStackTraceFunction prettyStackTrace(F); auto &Shape = ABI.Shape; assert(Shape.CoroBegin); lowerAwaitSuspends(F, Shape); simplifySuspendPoints(Shape); normalizeCoroutine(F, Shape, TTI); ABI.buildCoroutineFrame(OptimizeFrame); replaceFrameSizeAndAlignment(Shape); bool isNoSuspendCoroutine = Shape.CoroSuspends.empty(); bool shouldCreateNoAllocVariant = !isNoSuspendCoroutine && Shape.ABI == coro::ABI::Switch && hasSafeElideCaller(F) && !F.hasFnAttribute(llvm::Attribute::NoInline); // If there are no suspend points, no split required, just remove // the allocation and deallocation blocks, they are not needed. if (isNoSuspendCoroutine) { handleNoSuspendCoroutine(Shape); } else { ABI.splitCoroutine(F, Shape, Clones, TTI); } // Replace all the swifterror operations in the original function. // This invalidates SwiftErrorOps in the Shape. replaceSwiftErrorOps(F, Shape, nullptr); // Salvage debug intrinsics that point into the coroutine frame in the // original function. The Cloner has already salvaged debug info in the new // coroutine funclets. SmallDenseMap ArgToAllocaMap; auto [DbgInsts, DbgVariableRecords] = collectDbgVariableIntrinsics(F); for (auto *DDI : DbgInsts) coro::salvageDebugInfo(ArgToAllocaMap, *DDI, false /*UseEntryValue*/); for (DbgVariableRecord *DVR : DbgVariableRecords) coro::salvageDebugInfo(ArgToAllocaMap, *DVR, false /*UseEntryValue*/); removeCoroEndsFromRampFunction(Shape); if (shouldCreateNoAllocVariant) SwitchCoroutineSplitter::createNoAllocVariant(F, Shape, Clones); } static LazyCallGraph::SCC &updateCallGraphAfterCoroutineSplit( LazyCallGraph::Node &N, const coro::Shape &Shape, const SmallVectorImpl &Clones, LazyCallGraph::SCC &C, LazyCallGraph &CG, CGSCCAnalysisManager &AM, CGSCCUpdateResult &UR, FunctionAnalysisManager &FAM) { auto *CurrentSCC = &C; if (!Clones.empty()) { switch (Shape.ABI) { case coro::ABI::Switch: // Each clone in the Switch lowering is independent of the other clones. // Let the LazyCallGraph know about each one separately. for (Function *Clone : Clones) CG.addSplitFunction(N.getFunction(), *Clone); break; case coro::ABI::Async: case coro::ABI::Retcon: case coro::ABI::RetconOnce: // Each clone in the Async/Retcon lowering references of the other clones. // Let the LazyCallGraph know about all of them at once. if (!Clones.empty()) CG.addSplitRefRecursiveFunctions(N.getFunction(), Clones); break; } // Let the CGSCC infra handle the changes to the original function. CurrentSCC = &updateCGAndAnalysisManagerForCGSCCPass(CG, *CurrentSCC, N, AM, UR, FAM); } // Do some cleanup and let the CGSCC infra see if we've cleaned up any edges // to the split functions. postSplitCleanup(N.getFunction()); CurrentSCC = &updateCGAndAnalysisManagerForFunctionPass(CG, *CurrentSCC, N, AM, UR, FAM); return *CurrentSCC; } /// Replace a call to llvm.coro.prepare.retcon. static void replacePrepare(CallInst *Prepare, LazyCallGraph &CG, LazyCallGraph::SCC &C) { auto CastFn = Prepare->getArgOperand(0); // as an i8* auto Fn = CastFn->stripPointerCasts(); // as its original type // Attempt to peephole this pattern: // %0 = bitcast [[TYPE]] @some_function to i8* // %1 = call @llvm.coro.prepare.retcon(i8* %0) // %2 = bitcast %1 to [[TYPE]] // ==> // %2 = @some_function for (Use &U : llvm::make_early_inc_range(Prepare->uses())) { // Look for bitcasts back to the original function type. auto *Cast = dyn_cast(U.getUser()); if (!Cast || Cast->getType() != Fn->getType()) continue; // Replace and remove the cast. Cast->replaceAllUsesWith(Fn); Cast->eraseFromParent(); } // Replace any remaining uses with the function as an i8*. // This can never directly be a callee, so we don't need to update CG. Prepare->replaceAllUsesWith(CastFn); Prepare->eraseFromParent(); // Kill dead bitcasts. while (auto *Cast = dyn_cast(CastFn)) { if (!Cast->use_empty()) break; CastFn = Cast->getOperand(0); Cast->eraseFromParent(); } } static bool replaceAllPrepares(Function *PrepareFn, LazyCallGraph &CG, LazyCallGraph::SCC &C) { bool Changed = false; for (Use &P : llvm::make_early_inc_range(PrepareFn->uses())) { // Intrinsics can only be used in calls. auto *Prepare = cast(P.getUser()); replacePrepare(Prepare, CG, C); Changed = true; } return Changed; } static void addPrepareFunction(const Module &M, SmallVectorImpl &Fns, StringRef Name) { auto *PrepareFn = M.getFunction(Name); if (PrepareFn && !PrepareFn->use_empty()) Fns.push_back(PrepareFn); } static std::unique_ptr CreateNewABI(Function &F, coro::Shape &S, std::function IsMatCallback, const SmallVector GenCustomABIs) { if (S.CoroBegin->hasCustomABI()) { unsigned CustomABI = S.CoroBegin->getCustomABI(); if (CustomABI >= GenCustomABIs.size()) llvm_unreachable("Custom ABI not found amoung those specified"); return GenCustomABIs[CustomABI](F, S); } switch (S.ABI) { case coro::ABI::Switch: return std::make_unique(F, S, IsMatCallback); case coro::ABI::Async: return std::make_unique(F, S, IsMatCallback); case coro::ABI::Retcon: return std::make_unique(F, S, IsMatCallback); case coro::ABI::RetconOnce: return std::make_unique(F, S, IsMatCallback); } llvm_unreachable("Unknown ABI"); } CoroSplitPass::CoroSplitPass(bool OptimizeFrame) : CreateAndInitABI([](Function &F, coro::Shape &S) { std::unique_ptr ABI = CreateNewABI(F, S, coro::isTriviallyMaterializable, {}); ABI->init(); return ABI; }), OptimizeFrame(OptimizeFrame) {} CoroSplitPass::CoroSplitPass( SmallVector GenCustomABIs, bool OptimizeFrame) : CreateAndInitABI([=](Function &F, coro::Shape &S) { std::unique_ptr ABI = CreateNewABI(F, S, coro::isTriviallyMaterializable, GenCustomABIs); ABI->init(); return ABI; }), OptimizeFrame(OptimizeFrame) {} // For back compatibility, constructor takes a materializable callback and // creates a generator for an ABI with a modified materializable callback. CoroSplitPass::CoroSplitPass(std::function IsMatCallback, bool OptimizeFrame) : CreateAndInitABI([=](Function &F, coro::Shape &S) { std::unique_ptr ABI = CreateNewABI(F, S, IsMatCallback, {}); ABI->init(); return ABI; }), OptimizeFrame(OptimizeFrame) {} // For back compatibility, constructor takes a materializable callback and // creates a generator for an ABI with a modified materializable callback. CoroSplitPass::CoroSplitPass( std::function IsMatCallback, SmallVector GenCustomABIs, bool OptimizeFrame) : CreateAndInitABI([=](Function &F, coro::Shape &S) { std::unique_ptr ABI = CreateNewABI(F, S, IsMatCallback, GenCustomABIs); ABI->init(); return ABI; }), OptimizeFrame(OptimizeFrame) {} PreservedAnalyses CoroSplitPass::run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM, LazyCallGraph &CG, CGSCCUpdateResult &UR) { // NB: One invariant of a valid LazyCallGraph::SCC is that it must contain a // non-zero number of nodes, so we assume that here and grab the first // node's function's module. Module &M = *C.begin()->getFunction().getParent(); auto &FAM = AM.getResult(C, CG).getManager(); // Check for uses of llvm.coro.prepare.retcon/async. SmallVector PrepareFns; addPrepareFunction(M, PrepareFns, "llvm.coro.prepare.retcon"); addPrepareFunction(M, PrepareFns, "llvm.coro.prepare.async"); // Find coroutines for processing. SmallVector Coroutines; for (LazyCallGraph::Node &N : C) if (N.getFunction().isPresplitCoroutine()) Coroutines.push_back(&N); if (Coroutines.empty() && PrepareFns.empty()) return PreservedAnalyses::all(); auto *CurrentSCC = &C; // Split all the coroutines. for (LazyCallGraph::Node *N : Coroutines) { Function &F = N->getFunction(); LLVM_DEBUG(dbgs() << "CoroSplit: Processing coroutine '" << F.getName() << "\n"); // The suspend-crossing algorithm in buildCoroutineFrame gets tripped up // by unreachable blocks, so remove them as a first pass. Remove the // unreachable blocks before collecting intrinsics into Shape. removeUnreachableBlocks(F); coro::Shape Shape(F); if (!Shape.CoroBegin) continue; F.setSplittedCoroutine(); std::unique_ptr ABI = CreateAndInitABI(F, Shape); SmallVector Clones; auto &TTI = FAM.getResult(F); doSplitCoroutine(F, Clones, *ABI, TTI, OptimizeFrame); CurrentSCC = &updateCallGraphAfterCoroutineSplit( *N, Shape, Clones, *CurrentSCC, CG, AM, UR, FAM); auto &ORE = FAM.getResult(F); ORE.emit([&]() { return OptimizationRemark(DEBUG_TYPE, "CoroSplit", &F) << "Split '" << ore::NV("function", F.getName()) << "' (frame_size=" << ore::NV("frame_size", Shape.FrameSize) << ", align=" << ore::NV("align", Shape.FrameAlign.value()) << ")"; }); if (!Shape.CoroSuspends.empty()) { // Run the CGSCC pipeline on the original and newly split functions. UR.CWorklist.insert(CurrentSCC); for (Function *Clone : Clones) UR.CWorklist.insert(CG.lookupSCC(CG.get(*Clone))); } } for (auto *PrepareFn : PrepareFns) { replaceAllPrepares(PrepareFn, CG, *CurrentSCC); } return PreservedAnalyses::none(); }