1 //===- CoroSplit.cpp - Converts a coroutine into a state machine ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // This pass builds the coroutine frame and outlines resume and destroy parts 9 // of the coroutine into separate functions. 10 // 11 // We present a coroutine to an LLVM as an ordinary function with suspension 12 // points marked up with intrinsics. We let the optimizer party on the coroutine 13 // as a single function for as long as possible. Shortly before the coroutine is 14 // eligible to be inlined into its callers, we split up the coroutine into parts 15 // corresponding to an initial, resume and destroy invocations of the coroutine, 16 // add them to the current SCC and restart the IPO pipeline to optimize the 17 // coroutine subfunctions we extracted before proceeding to the caller of the 18 // coroutine. 19 //===----------------------------------------------------------------------===// 20 21 #include "llvm/Transforms/Coroutines/CoroSplit.h" 22 #include "CoroCloner.h" 23 #include "CoroInternal.h" 24 #include "llvm/ADT/DenseMap.h" 25 #include "llvm/ADT/PriorityWorklist.h" 26 #include "llvm/ADT/STLExtras.h" 27 #include "llvm/ADT/SmallPtrSet.h" 28 #include "llvm/ADT/SmallVector.h" 29 #include "llvm/ADT/StringExtras.h" 30 #include "llvm/ADT/StringRef.h" 31 #include "llvm/ADT/Twine.h" 32 #include "llvm/Analysis/CFG.h" 33 #include "llvm/Analysis/CallGraph.h" 34 #include "llvm/Analysis/ConstantFolding.h" 35 #include "llvm/Analysis/LazyCallGraph.h" 36 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 37 #include "llvm/Analysis/TargetTransformInfo.h" 38 #include "llvm/BinaryFormat/Dwarf.h" 39 #include "llvm/IR/Argument.h" 40 #include "llvm/IR/Attributes.h" 41 #include "llvm/IR/BasicBlock.h" 42 #include "llvm/IR/CFG.h" 43 #include "llvm/IR/CallingConv.h" 44 #include "llvm/IR/Constants.h" 45 #include "llvm/IR/DataLayout.h" 46 #include "llvm/IR/DebugInfo.h" 47 #include "llvm/IR/DerivedTypes.h" 48 #include "llvm/IR/Dominators.h" 49 #include "llvm/IR/GlobalValue.h" 50 #include "llvm/IR/GlobalVariable.h" 51 #include "llvm/IR/InstIterator.h" 52 #include "llvm/IR/InstrTypes.h" 53 #include "llvm/IR/Instruction.h" 54 #include "llvm/IR/Instructions.h" 55 #include "llvm/IR/IntrinsicInst.h" 56 #include "llvm/IR/LLVMContext.h" 57 #include "llvm/IR/Module.h" 58 #include "llvm/IR/Type.h" 59 #include "llvm/IR/Value.h" 60 #include "llvm/IR/Verifier.h" 61 #include "llvm/Support/Casting.h" 62 #include "llvm/Support/Debug.h" 63 #include "llvm/Support/PrettyStackTrace.h" 64 #include "llvm/Support/raw_ostream.h" 65 #include "llvm/Transforms/Coroutines/MaterializationUtils.h" 66 #include "llvm/Transforms/Scalar.h" 67 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 68 #include "llvm/Transforms/Utils/CallGraphUpdater.h" 69 #include "llvm/Transforms/Utils/Cloning.h" 70 #include "llvm/Transforms/Utils/Local.h" 71 #include <cassert> 72 #include <cstddef> 73 #include <cstdint> 74 #include <initializer_list> 75 #include <iterator> 76 77 using namespace llvm; 78 79 #define DEBUG_TYPE "coro-split" 80 81 namespace { 82 /// Collect (a known) subset of global debug info metadata potentially used by 83 /// the function \p F. 84 /// 85 /// This metadata set can be used to avoid cloning debug info not owned by \p F 86 /// and is shared among all potential clones \p F. 87 MetadataSetTy collectCommonDebugInfo(Function &F) { 88 TimeTraceScope FunctionScope("CollectCommonDebugInfo"); 89 90 DebugInfoFinder DIFinder; 91 DISubprogram *SPClonedWithinModule = CollectDebugInfoForCloning( 92 F, CloneFunctionChangeType::LocalChangesOnly, DIFinder); 93 94 return FindDebugInfoToIdentityMap(CloneFunctionChangeType::LocalChangesOnly, 95 DIFinder, SPClonedWithinModule); 96 } 97 } // end anonymous namespace 98 99 // FIXME: 100 // Lower the intrinisc in CoroEarly phase if coroutine frame doesn't escape 101 // and it is known that other transformations, for example, sanitizers 102 // won't lead to incorrect code. 103 static void lowerAwaitSuspend(IRBuilder<> &Builder, CoroAwaitSuspendInst *CB, 104 coro::Shape &Shape) { 105 auto Wrapper = CB->getWrapperFunction(); 106 auto Awaiter = CB->getAwaiter(); 107 auto FramePtr = CB->getFrame(); 108 109 Builder.SetInsertPoint(CB); 110 111 CallBase *NewCall = nullptr; 112 // await_suspend has only 2 parameters, awaiter and handle. 113 // Copy parameter attributes from the intrinsic call, but remove the last, 114 // because the last parameter now becomes the function that is being called. 115 AttributeList NewAttributes = 116 CB->getAttributes().removeParamAttributes(CB->getContext(), 2); 117 118 if (auto Invoke = dyn_cast<InvokeInst>(CB)) { 119 auto WrapperInvoke = 120 Builder.CreateInvoke(Wrapper, Invoke->getNormalDest(), 121 Invoke->getUnwindDest(), {Awaiter, FramePtr}); 122 123 WrapperInvoke->setCallingConv(Invoke->getCallingConv()); 124 std::copy(Invoke->bundle_op_info_begin(), Invoke->bundle_op_info_end(), 125 WrapperInvoke->bundle_op_info_begin()); 126 WrapperInvoke->setAttributes(NewAttributes); 127 WrapperInvoke->setDebugLoc(Invoke->getDebugLoc()); 128 NewCall = WrapperInvoke; 129 } else if (auto Call = dyn_cast<CallInst>(CB)) { 130 auto WrapperCall = Builder.CreateCall(Wrapper, {Awaiter, FramePtr}); 131 132 WrapperCall->setAttributes(NewAttributes); 133 WrapperCall->setDebugLoc(Call->getDebugLoc()); 134 NewCall = WrapperCall; 135 } else { 136 llvm_unreachable("Unexpected coro_await_suspend invocation method"); 137 } 138 139 if (CB->getCalledFunction()->getIntrinsicID() == 140 Intrinsic::coro_await_suspend_handle) { 141 // Follow the lowered await_suspend call above with a lowered resume call 142 // to the returned coroutine. 143 if (auto *Invoke = dyn_cast<InvokeInst>(CB)) { 144 // If the await_suspend call is an invoke, we continue in the next block. 145 Builder.SetInsertPoint(Invoke->getNormalDest()->getFirstInsertionPt()); 146 } 147 148 coro::LowererBase LB(*Wrapper->getParent()); 149 auto *ResumeAddr = LB.makeSubFnCall(NewCall, CoroSubFnInst::ResumeIndex, 150 &*Builder.GetInsertPoint()); 151 152 LLVMContext &Ctx = Builder.getContext(); 153 FunctionType *ResumeTy = FunctionType::get( 154 Type::getVoidTy(Ctx), PointerType::getUnqual(Ctx), false); 155 auto *ResumeCall = Builder.CreateCall(ResumeTy, ResumeAddr, {NewCall}); 156 ResumeCall->setCallingConv(CallingConv::Fast); 157 158 // We can't insert the 'ret' instruction and adjust the cc until the 159 // function has been split, so remember this for later. 160 Shape.SymmetricTransfers.push_back(ResumeCall); 161 162 NewCall = ResumeCall; 163 } 164 165 CB->replaceAllUsesWith(NewCall); 166 CB->eraseFromParent(); 167 } 168 169 static void lowerAwaitSuspends(Function &F, coro::Shape &Shape) { 170 IRBuilder<> Builder(F.getContext()); 171 for (auto *AWS : Shape.CoroAwaitSuspends) 172 lowerAwaitSuspend(Builder, AWS, Shape); 173 } 174 175 static void maybeFreeRetconStorage(IRBuilder<> &Builder, 176 const coro::Shape &Shape, Value *FramePtr, 177 CallGraph *CG) { 178 assert(Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce); 179 if (Shape.RetconLowering.IsFrameInlineInStorage) 180 return; 181 182 Shape.emitDealloc(Builder, FramePtr, CG); 183 } 184 185 /// Replace an llvm.coro.end.async. 186 /// Will inline the must tail call function call if there is one. 187 /// \returns true if cleanup of the coro.end block is needed, false otherwise. 188 static bool replaceCoroEndAsync(AnyCoroEndInst *End) { 189 IRBuilder<> Builder(End); 190 191 auto *EndAsync = dyn_cast<CoroAsyncEndInst>(End); 192 if (!EndAsync) { 193 Builder.CreateRetVoid(); 194 return true /*needs cleanup of coro.end block*/; 195 } 196 197 auto *MustTailCallFunc = EndAsync->getMustTailCallFunction(); 198 if (!MustTailCallFunc) { 199 Builder.CreateRetVoid(); 200 return true /*needs cleanup of coro.end block*/; 201 } 202 203 // Move the must tail call from the predecessor block into the end block. 204 auto *CoroEndBlock = End->getParent(); 205 auto *MustTailCallFuncBlock = CoroEndBlock->getSinglePredecessor(); 206 assert(MustTailCallFuncBlock && "Must have a single predecessor block"); 207 auto It = MustTailCallFuncBlock->getTerminator()->getIterator(); 208 auto *MustTailCall = cast<CallInst>(&*std::prev(It)); 209 CoroEndBlock->splice(End->getIterator(), MustTailCallFuncBlock, 210 MustTailCall->getIterator()); 211 212 // Insert the return instruction. 213 Builder.SetInsertPoint(End); 214 Builder.CreateRetVoid(); 215 InlineFunctionInfo FnInfo; 216 217 // Remove the rest of the block, by splitting it into an unreachable block. 218 auto *BB = End->getParent(); 219 BB->splitBasicBlock(End); 220 BB->getTerminator()->eraseFromParent(); 221 222 auto InlineRes = InlineFunction(*MustTailCall, FnInfo); 223 assert(InlineRes.isSuccess() && "Expected inlining to succeed"); 224 (void)InlineRes; 225 226 // We have cleaned up the coro.end block above. 227 return false; 228 } 229 230 /// Replace a non-unwind call to llvm.coro.end. 231 static void replaceFallthroughCoroEnd(AnyCoroEndInst *End, 232 const coro::Shape &Shape, Value *FramePtr, 233 bool InResume, CallGraph *CG) { 234 // Start inserting right before the coro.end. 235 IRBuilder<> Builder(End); 236 237 // Create the return instruction. 238 switch (Shape.ABI) { 239 // The cloned functions in switch-lowering always return void. 240 case coro::ABI::Switch: 241 assert(!cast<CoroEndInst>(End)->hasResults() && 242 "switch coroutine should not return any values"); 243 // coro.end doesn't immediately end the coroutine in the main function 244 // in this lowering, because we need to deallocate the coroutine. 245 if (!InResume) 246 return; 247 Builder.CreateRetVoid(); 248 break; 249 250 // In async lowering this returns. 251 case coro::ABI::Async: { 252 bool CoroEndBlockNeedsCleanup = replaceCoroEndAsync(End); 253 if (!CoroEndBlockNeedsCleanup) 254 return; 255 break; 256 } 257 258 // In unique continuation lowering, the continuations always return void. 259 // But we may have implicitly allocated storage. 260 case coro::ABI::RetconOnce: { 261 maybeFreeRetconStorage(Builder, Shape, FramePtr, CG); 262 auto *CoroEnd = cast<CoroEndInst>(End); 263 auto *RetTy = Shape.getResumeFunctionType()->getReturnType(); 264 265 if (!CoroEnd->hasResults()) { 266 assert(RetTy->isVoidTy()); 267 Builder.CreateRetVoid(); 268 break; 269 } 270 271 auto *CoroResults = CoroEnd->getResults(); 272 unsigned NumReturns = CoroResults->numReturns(); 273 274 if (auto *RetStructTy = dyn_cast<StructType>(RetTy)) { 275 assert(RetStructTy->getNumElements() == NumReturns && 276 "numbers of returns should match resume function singature"); 277 Value *ReturnValue = PoisonValue::get(RetStructTy); 278 unsigned Idx = 0; 279 for (Value *RetValEl : CoroResults->return_values()) 280 ReturnValue = Builder.CreateInsertValue(ReturnValue, RetValEl, Idx++); 281 Builder.CreateRet(ReturnValue); 282 } else if (NumReturns == 0) { 283 assert(RetTy->isVoidTy()); 284 Builder.CreateRetVoid(); 285 } else { 286 assert(NumReturns == 1); 287 Builder.CreateRet(*CoroResults->retval_begin()); 288 } 289 CoroResults->replaceAllUsesWith( 290 ConstantTokenNone::get(CoroResults->getContext())); 291 CoroResults->eraseFromParent(); 292 break; 293 } 294 295 // In non-unique continuation lowering, we signal completion by returning 296 // a null continuation. 297 case coro::ABI::Retcon: { 298 assert(!cast<CoroEndInst>(End)->hasResults() && 299 "retcon coroutine should not return any values"); 300 maybeFreeRetconStorage(Builder, Shape, FramePtr, CG); 301 auto RetTy = Shape.getResumeFunctionType()->getReturnType(); 302 auto RetStructTy = dyn_cast<StructType>(RetTy); 303 PointerType *ContinuationTy = 304 cast<PointerType>(RetStructTy ? RetStructTy->getElementType(0) : RetTy); 305 306 Value *ReturnValue = ConstantPointerNull::get(ContinuationTy); 307 if (RetStructTy) { 308 ReturnValue = Builder.CreateInsertValue(PoisonValue::get(RetStructTy), 309 ReturnValue, 0); 310 } 311 Builder.CreateRet(ReturnValue); 312 break; 313 } 314 } 315 316 // Remove the rest of the block, by splitting it into an unreachable block. 317 auto *BB = End->getParent(); 318 BB->splitBasicBlock(End); 319 BB->getTerminator()->eraseFromParent(); 320 } 321 322 // Mark a coroutine as done, which implies that the coroutine is finished and 323 // never get resumed. 324 // 325 // In resume-switched ABI, the done state is represented by storing zero in 326 // ResumeFnAddr. 327 // 328 // NOTE: We couldn't omit the argument `FramePtr`. It is necessary because the 329 // pointer to the frame in splitted function is not stored in `Shape`. 330 static void markCoroutineAsDone(IRBuilder<> &Builder, const coro::Shape &Shape, 331 Value *FramePtr) { 332 assert( 333 Shape.ABI == coro::ABI::Switch && 334 "markCoroutineAsDone is only supported for Switch-Resumed ABI for now."); 335 auto *GepIndex = Builder.CreateStructGEP( 336 Shape.FrameTy, FramePtr, coro::Shape::SwitchFieldIndex::Resume, 337 "ResumeFn.addr"); 338 auto *NullPtr = ConstantPointerNull::get(cast<PointerType>( 339 Shape.FrameTy->getTypeAtIndex(coro::Shape::SwitchFieldIndex::Resume))); 340 Builder.CreateStore(NullPtr, GepIndex); 341 342 // If the coroutine don't have unwind coro end, we could omit the store to 343 // the final suspend point since we could infer the coroutine is suspended 344 // at the final suspend point by the nullness of ResumeFnAddr. 345 // However, we can't skip it if the coroutine have unwind coro end. Since 346 // the coroutine reaches unwind coro end is considered suspended at the 347 // final suspend point (the ResumeFnAddr is null) but in fact the coroutine 348 // didn't complete yet. We need the IndexVal for the final suspend point 349 // to make the states clear. 350 if (Shape.SwitchLowering.HasUnwindCoroEnd && 351 Shape.SwitchLowering.HasFinalSuspend) { 352 assert(cast<CoroSuspendInst>(Shape.CoroSuspends.back())->isFinal() && 353 "The final suspend should only live in the last position of " 354 "CoroSuspends."); 355 ConstantInt *IndexVal = Shape.getIndex(Shape.CoroSuspends.size() - 1); 356 auto *FinalIndex = Builder.CreateStructGEP( 357 Shape.FrameTy, FramePtr, Shape.getSwitchIndexField(), "index.addr"); 358 359 Builder.CreateStore(IndexVal, FinalIndex); 360 } 361 } 362 363 /// Replace an unwind call to llvm.coro.end. 364 static void replaceUnwindCoroEnd(AnyCoroEndInst *End, const coro::Shape &Shape, 365 Value *FramePtr, bool InResume, 366 CallGraph *CG) { 367 IRBuilder<> Builder(End); 368 369 switch (Shape.ABI) { 370 // In switch-lowering, this does nothing in the main function. 371 case coro::ABI::Switch: { 372 // In C++'s specification, the coroutine should be marked as done 373 // if promise.unhandled_exception() throws. The frontend will 374 // call coro.end(true) along this path. 375 // 376 // FIXME: We should refactor this once there is other language 377 // which uses Switch-Resumed style other than C++. 378 markCoroutineAsDone(Builder, Shape, FramePtr); 379 if (!InResume) 380 return; 381 break; 382 } 383 // In async lowering this does nothing. 384 case coro::ABI::Async: 385 break; 386 // In continuation-lowering, this frees the continuation storage. 387 case coro::ABI::Retcon: 388 case coro::ABI::RetconOnce: 389 maybeFreeRetconStorage(Builder, Shape, FramePtr, CG); 390 break; 391 } 392 393 // If coro.end has an associated bundle, add cleanupret instruction. 394 if (auto Bundle = End->getOperandBundle(LLVMContext::OB_funclet)) { 395 auto *FromPad = cast<CleanupPadInst>(Bundle->Inputs[0]); 396 auto *CleanupRet = Builder.CreateCleanupRet(FromPad, nullptr); 397 End->getParent()->splitBasicBlock(End); 398 CleanupRet->getParent()->getTerminator()->eraseFromParent(); 399 } 400 } 401 402 static void replaceCoroEnd(AnyCoroEndInst *End, const coro::Shape &Shape, 403 Value *FramePtr, bool InResume, CallGraph *CG) { 404 if (End->isUnwind()) 405 replaceUnwindCoroEnd(End, Shape, FramePtr, InResume, CG); 406 else 407 replaceFallthroughCoroEnd(End, Shape, FramePtr, InResume, CG); 408 409 auto &Context = End->getContext(); 410 End->replaceAllUsesWith(InResume ? ConstantInt::getTrue(Context) 411 : ConstantInt::getFalse(Context)); 412 End->eraseFromParent(); 413 } 414 415 // In the resume function, we remove the last case (when coro::Shape is built, 416 // the final suspend point (if present) is always the last element of 417 // CoroSuspends array) since it is an undefined behavior to resume a coroutine 418 // suspended at the final suspend point. 419 // In the destroy function, if it isn't possible that the ResumeFnAddr is NULL 420 // and the coroutine doesn't suspend at the final suspend point actually (this 421 // is possible since the coroutine is considered suspended at the final suspend 422 // point if promise.unhandled_exception() exits via an exception), we can 423 // remove the last case. 424 void coro::BaseCloner::handleFinalSuspend() { 425 assert(Shape.ABI == coro::ABI::Switch && 426 Shape.SwitchLowering.HasFinalSuspend); 427 428 if (isSwitchDestroyFunction() && Shape.SwitchLowering.HasUnwindCoroEnd) 429 return; 430 431 auto *Switch = cast<SwitchInst>(VMap[Shape.SwitchLowering.ResumeSwitch]); 432 auto FinalCaseIt = std::prev(Switch->case_end()); 433 BasicBlock *ResumeBB = FinalCaseIt->getCaseSuccessor(); 434 Switch->removeCase(FinalCaseIt); 435 if (isSwitchDestroyFunction()) { 436 BasicBlock *OldSwitchBB = Switch->getParent(); 437 auto *NewSwitchBB = OldSwitchBB->splitBasicBlock(Switch, "Switch"); 438 Builder.SetInsertPoint(OldSwitchBB->getTerminator()); 439 440 if (NewF->isCoroOnlyDestroyWhenComplete()) { 441 // When the coroutine can only be destroyed when complete, we don't need 442 // to generate code for other cases. 443 Builder.CreateBr(ResumeBB); 444 } else { 445 auto *GepIndex = Builder.CreateStructGEP( 446 Shape.FrameTy, NewFramePtr, coro::Shape::SwitchFieldIndex::Resume, 447 "ResumeFn.addr"); 448 auto *Load = 449 Builder.CreateLoad(Shape.getSwitchResumePointerType(), GepIndex); 450 auto *Cond = Builder.CreateIsNull(Load); 451 Builder.CreateCondBr(Cond, ResumeBB, NewSwitchBB); 452 } 453 OldSwitchBB->getTerminator()->eraseFromParent(); 454 } 455 } 456 457 static FunctionType * 458 getFunctionTypeFromAsyncSuspend(AnyCoroSuspendInst *Suspend) { 459 auto *AsyncSuspend = cast<CoroSuspendAsyncInst>(Suspend); 460 auto *StructTy = cast<StructType>(AsyncSuspend->getType()); 461 auto &Context = Suspend->getParent()->getParent()->getContext(); 462 auto *VoidTy = Type::getVoidTy(Context); 463 return FunctionType::get(VoidTy, StructTy->elements(), false); 464 } 465 466 static Function *createCloneDeclaration(Function &OrigF, coro::Shape &Shape, 467 const Twine &Suffix, 468 Module::iterator InsertBefore, 469 AnyCoroSuspendInst *ActiveSuspend) { 470 Module *M = OrigF.getParent(); 471 auto *FnTy = (Shape.ABI != coro::ABI::Async) 472 ? Shape.getResumeFunctionType() 473 : getFunctionTypeFromAsyncSuspend(ActiveSuspend); 474 475 Function *NewF = 476 Function::Create(FnTy, GlobalValue::LinkageTypes::InternalLinkage, 477 OrigF.getName() + Suffix); 478 479 M->getFunctionList().insert(InsertBefore, NewF); 480 481 return NewF; 482 } 483 484 /// Replace uses of the active llvm.coro.suspend.retcon/async call with the 485 /// arguments to the continuation function. 486 /// 487 /// This assumes that the builder has a meaningful insertion point. 488 void coro::BaseCloner::replaceRetconOrAsyncSuspendUses() { 489 assert(Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce || 490 Shape.ABI == coro::ABI::Async); 491 492 auto NewS = VMap[ActiveSuspend]; 493 if (NewS->use_empty()) 494 return; 495 496 // Copy out all the continuation arguments after the buffer pointer into 497 // an easily-indexed data structure for convenience. 498 SmallVector<Value *, 8> Args; 499 // The async ABI includes all arguments -- including the first argument. 500 bool IsAsyncABI = Shape.ABI == coro::ABI::Async; 501 for (auto I = IsAsyncABI ? NewF->arg_begin() : std::next(NewF->arg_begin()), 502 E = NewF->arg_end(); 503 I != E; ++I) 504 Args.push_back(&*I); 505 506 // If the suspend returns a single scalar value, we can just do a simple 507 // replacement. 508 if (!isa<StructType>(NewS->getType())) { 509 assert(Args.size() == 1); 510 NewS->replaceAllUsesWith(Args.front()); 511 return; 512 } 513 514 // Try to peephole extracts of an aggregate return. 515 for (Use &U : llvm::make_early_inc_range(NewS->uses())) { 516 auto *EVI = dyn_cast<ExtractValueInst>(U.getUser()); 517 if (!EVI || EVI->getNumIndices() != 1) 518 continue; 519 520 EVI->replaceAllUsesWith(Args[EVI->getIndices().front()]); 521 EVI->eraseFromParent(); 522 } 523 524 // If we have no remaining uses, we're done. 525 if (NewS->use_empty()) 526 return; 527 528 // Otherwise, we need to create an aggregate. 529 Value *Aggr = PoisonValue::get(NewS->getType()); 530 for (auto [Idx, Arg] : llvm::enumerate(Args)) 531 Aggr = Builder.CreateInsertValue(Aggr, Arg, Idx); 532 533 NewS->replaceAllUsesWith(Aggr); 534 } 535 536 void coro::BaseCloner::replaceCoroSuspends() { 537 Value *SuspendResult; 538 539 switch (Shape.ABI) { 540 // In switch lowering, replace coro.suspend with the appropriate value 541 // for the type of function we're extracting. 542 // Replacing coro.suspend with (0) will result in control flow proceeding to 543 // a resume label associated with a suspend point, replacing it with (1) will 544 // result in control flow proceeding to a cleanup label associated with this 545 // suspend point. 546 case coro::ABI::Switch: 547 SuspendResult = Builder.getInt8(isSwitchDestroyFunction() ? 1 : 0); 548 break; 549 550 // In async lowering there are no uses of the result. 551 case coro::ABI::Async: 552 return; 553 554 // In returned-continuation lowering, the arguments from earlier 555 // continuations are theoretically arbitrary, and they should have been 556 // spilled. 557 case coro::ABI::RetconOnce: 558 case coro::ABI::Retcon: 559 return; 560 } 561 562 for (AnyCoroSuspendInst *CS : Shape.CoroSuspends) { 563 // The active suspend was handled earlier. 564 if (CS == ActiveSuspend) 565 continue; 566 567 auto *MappedCS = cast<AnyCoroSuspendInst>(VMap[CS]); 568 MappedCS->replaceAllUsesWith(SuspendResult); 569 MappedCS->eraseFromParent(); 570 } 571 } 572 573 void coro::BaseCloner::replaceCoroEnds() { 574 for (AnyCoroEndInst *CE : Shape.CoroEnds) { 575 // We use a null call graph because there's no call graph node for 576 // the cloned function yet. We'll just be rebuilding that later. 577 auto *NewCE = cast<AnyCoroEndInst>(VMap[CE]); 578 replaceCoroEnd(NewCE, Shape, NewFramePtr, /*in resume*/ true, nullptr); 579 } 580 } 581 582 static void replaceSwiftErrorOps(Function &F, coro::Shape &Shape, 583 ValueToValueMapTy *VMap) { 584 if (Shape.ABI == coro::ABI::Async && Shape.CoroSuspends.empty()) 585 return; 586 Value *CachedSlot = nullptr; 587 auto getSwiftErrorSlot = [&](Type *ValueTy) -> Value * { 588 if (CachedSlot) 589 return CachedSlot; 590 591 // Check if the function has a swifterror argument. 592 for (auto &Arg : F.args()) { 593 if (Arg.isSwiftError()) { 594 CachedSlot = &Arg; 595 return &Arg; 596 } 597 } 598 599 // Create a swifterror alloca. 600 IRBuilder<> Builder(&F.getEntryBlock(), 601 F.getEntryBlock().getFirstNonPHIOrDbg()); 602 auto Alloca = Builder.CreateAlloca(ValueTy); 603 Alloca->setSwiftError(true); 604 605 CachedSlot = Alloca; 606 return Alloca; 607 }; 608 609 for (CallInst *Op : Shape.SwiftErrorOps) { 610 auto MappedOp = VMap ? cast<CallInst>((*VMap)[Op]) : Op; 611 IRBuilder<> Builder(MappedOp); 612 613 // If there are no arguments, this is a 'get' operation. 614 Value *MappedResult; 615 if (Op->arg_empty()) { 616 auto ValueTy = Op->getType(); 617 auto Slot = getSwiftErrorSlot(ValueTy); 618 MappedResult = Builder.CreateLoad(ValueTy, Slot); 619 } else { 620 assert(Op->arg_size() == 1); 621 auto Value = MappedOp->getArgOperand(0); 622 auto ValueTy = Value->getType(); 623 auto Slot = getSwiftErrorSlot(ValueTy); 624 Builder.CreateStore(Value, Slot); 625 MappedResult = Slot; 626 } 627 628 MappedOp->replaceAllUsesWith(MappedResult); 629 MappedOp->eraseFromParent(); 630 } 631 632 // If we're updating the original function, we've invalidated SwiftErrorOps. 633 if (VMap == nullptr) { 634 Shape.SwiftErrorOps.clear(); 635 } 636 } 637 638 /// Returns all DbgVariableIntrinsic in F. 639 static std::pair<SmallVector<DbgVariableIntrinsic *, 8>, 640 SmallVector<DbgVariableRecord *>> 641 collectDbgVariableIntrinsics(Function &F) { 642 SmallVector<DbgVariableIntrinsic *, 8> Intrinsics; 643 SmallVector<DbgVariableRecord *> DbgVariableRecords; 644 for (auto &I : instructions(F)) { 645 for (DbgVariableRecord &DVR : filterDbgVars(I.getDbgRecordRange())) 646 DbgVariableRecords.push_back(&DVR); 647 if (auto *DVI = dyn_cast<DbgVariableIntrinsic>(&I)) 648 Intrinsics.push_back(DVI); 649 } 650 return {Intrinsics, DbgVariableRecords}; 651 } 652 653 void coro::BaseCloner::replaceSwiftErrorOps() { 654 ::replaceSwiftErrorOps(*NewF, Shape, &VMap); 655 } 656 657 void coro::BaseCloner::salvageDebugInfo() { 658 auto [Worklist, DbgVariableRecords] = collectDbgVariableIntrinsics(*NewF); 659 SmallDenseMap<Argument *, AllocaInst *, 4> ArgToAllocaMap; 660 661 // Only 64-bit ABIs have a register we can refer to with the entry value. 662 bool UseEntryValue = 663 llvm::Triple(OrigF.getParent()->getTargetTriple()).isArch64Bit(); 664 for (DbgVariableIntrinsic *DVI : Worklist) 665 coro::salvageDebugInfo(ArgToAllocaMap, *DVI, UseEntryValue); 666 for (DbgVariableRecord *DVR : DbgVariableRecords) 667 coro::salvageDebugInfo(ArgToAllocaMap, *DVR, UseEntryValue); 668 669 // Remove all salvaged dbg.declare intrinsics that became 670 // either unreachable or stale due to the CoroSplit transformation. 671 DominatorTree DomTree(*NewF); 672 auto IsUnreachableBlock = [&](BasicBlock *BB) { 673 return !isPotentiallyReachable(&NewF->getEntryBlock(), BB, nullptr, 674 &DomTree); 675 }; 676 auto RemoveOne = [&](auto *DVI) { 677 if (IsUnreachableBlock(DVI->getParent())) 678 DVI->eraseFromParent(); 679 else if (isa_and_nonnull<AllocaInst>(DVI->getVariableLocationOp(0))) { 680 // Count all non-debuginfo uses in reachable blocks. 681 unsigned Uses = 0; 682 for (auto *User : DVI->getVariableLocationOp(0)->users()) 683 if (auto *I = dyn_cast<Instruction>(User)) 684 if (!isa<AllocaInst>(I) && !IsUnreachableBlock(I->getParent())) 685 ++Uses; 686 if (!Uses) 687 DVI->eraseFromParent(); 688 } 689 }; 690 for_each(Worklist, RemoveOne); 691 for_each(DbgVariableRecords, RemoveOne); 692 } 693 694 void coro::BaseCloner::replaceEntryBlock() { 695 // In the original function, the AllocaSpillBlock is a block immediately 696 // following the allocation of the frame object which defines GEPs for 697 // all the allocas that have been moved into the frame, and it ends by 698 // branching to the original beginning of the coroutine. Make this 699 // the entry block of the cloned function. 700 auto *Entry = cast<BasicBlock>(VMap[Shape.AllocaSpillBlock]); 701 auto *OldEntry = &NewF->getEntryBlock(); 702 Entry->setName("entry" + Suffix); 703 Entry->moveBefore(OldEntry); 704 Entry->getTerminator()->eraseFromParent(); 705 706 // Clear all predecessors of the new entry block. There should be 707 // exactly one predecessor, which we created when splitting out 708 // AllocaSpillBlock to begin with. 709 assert(Entry->hasOneUse()); 710 auto BranchToEntry = cast<BranchInst>(Entry->user_back()); 711 assert(BranchToEntry->isUnconditional()); 712 Builder.SetInsertPoint(BranchToEntry); 713 Builder.CreateUnreachable(); 714 BranchToEntry->eraseFromParent(); 715 716 // Branch from the entry to the appropriate place. 717 Builder.SetInsertPoint(Entry); 718 switch (Shape.ABI) { 719 case coro::ABI::Switch: { 720 // In switch-lowering, we built a resume-entry block in the original 721 // function. Make the entry block branch to this. 722 auto *SwitchBB = 723 cast<BasicBlock>(VMap[Shape.SwitchLowering.ResumeEntryBlock]); 724 Builder.CreateBr(SwitchBB); 725 break; 726 } 727 case coro::ABI::Async: 728 case coro::ABI::Retcon: 729 case coro::ABI::RetconOnce: { 730 // In continuation ABIs, we want to branch to immediately after the 731 // active suspend point. Earlier phases will have put the suspend in its 732 // own basic block, so just thread our jump directly to its successor. 733 assert((Shape.ABI == coro::ABI::Async && 734 isa<CoroSuspendAsyncInst>(ActiveSuspend)) || 735 ((Shape.ABI == coro::ABI::Retcon || 736 Shape.ABI == coro::ABI::RetconOnce) && 737 isa<CoroSuspendRetconInst>(ActiveSuspend))); 738 auto *MappedCS = cast<AnyCoroSuspendInst>(VMap[ActiveSuspend]); 739 auto Branch = cast<BranchInst>(MappedCS->getNextNode()); 740 assert(Branch->isUnconditional()); 741 Builder.CreateBr(Branch->getSuccessor(0)); 742 break; 743 } 744 } 745 746 // Any static alloca that's still being used but not reachable from the new 747 // entry needs to be moved to the new entry. 748 Function *F = OldEntry->getParent(); 749 DominatorTree DT{*F}; 750 for (Instruction &I : llvm::make_early_inc_range(instructions(F))) { 751 auto *Alloca = dyn_cast<AllocaInst>(&I); 752 if (!Alloca || I.use_empty()) 753 continue; 754 if (DT.isReachableFromEntry(I.getParent()) || 755 !isa<ConstantInt>(Alloca->getArraySize())) 756 continue; 757 I.moveBefore(*Entry, Entry->getFirstInsertionPt()); 758 } 759 } 760 761 /// Derive the value of the new frame pointer. 762 Value *coro::BaseCloner::deriveNewFramePointer() { 763 // Builder should be inserting to the front of the new entry block. 764 765 switch (Shape.ABI) { 766 // In switch-lowering, the argument is the frame pointer. 767 case coro::ABI::Switch: 768 return &*NewF->arg_begin(); 769 // In async-lowering, one of the arguments is an async context as determined 770 // by the `llvm.coro.id.async` intrinsic. We can retrieve the async context of 771 // the resume function from the async context projection function associated 772 // with the active suspend. The frame is located as a tail to the async 773 // context header. 774 case coro::ABI::Async: { 775 auto *ActiveAsyncSuspend = cast<CoroSuspendAsyncInst>(ActiveSuspend); 776 auto ContextIdx = ActiveAsyncSuspend->getStorageArgumentIndex() & 0xff; 777 auto *CalleeContext = NewF->getArg(ContextIdx); 778 auto *ProjectionFunc = 779 ActiveAsyncSuspend->getAsyncContextProjectionFunction(); 780 auto DbgLoc = 781 cast<CoroSuspendAsyncInst>(VMap[ActiveSuspend])->getDebugLoc(); 782 // Calling i8* (i8*) 783 auto *CallerContext = Builder.CreateCall(ProjectionFunc->getFunctionType(), 784 ProjectionFunc, CalleeContext); 785 CallerContext->setCallingConv(ProjectionFunc->getCallingConv()); 786 CallerContext->setDebugLoc(DbgLoc); 787 // The frame is located after the async_context header. 788 auto &Context = Builder.getContext(); 789 auto *FramePtrAddr = Builder.CreateConstInBoundsGEP1_32( 790 Type::getInt8Ty(Context), CallerContext, 791 Shape.AsyncLowering.FrameOffset, "async.ctx.frameptr"); 792 // Inline the projection function. 793 InlineFunctionInfo InlineInfo; 794 auto InlineRes = InlineFunction(*CallerContext, InlineInfo); 795 assert(InlineRes.isSuccess()); 796 (void)InlineRes; 797 return FramePtrAddr; 798 } 799 // In continuation-lowering, the argument is the opaque storage. 800 case coro::ABI::Retcon: 801 case coro::ABI::RetconOnce: { 802 Argument *NewStorage = &*NewF->arg_begin(); 803 auto FramePtrTy = PointerType::getUnqual(Shape.FrameTy->getContext()); 804 805 // If the storage is inline, just bitcast to the storage to the frame type. 806 if (Shape.RetconLowering.IsFrameInlineInStorage) 807 return NewStorage; 808 809 // Otherwise, load the real frame from the opaque storage. 810 return Builder.CreateLoad(FramePtrTy, NewStorage); 811 } 812 } 813 llvm_unreachable("bad ABI"); 814 } 815 816 /// Adjust the scope line of the funclet to the first line number after the 817 /// suspend point. This avoids a jump in the line table from the function 818 /// declaration (where prologue instructions are attributed to) to the suspend 819 /// point. 820 /// Only adjust the scope line when the files are the same. 821 /// If no candidate line number is found, fallback to the line of ActiveSuspend. 822 static void updateScopeLine(Instruction *ActiveSuspend, 823 DISubprogram &SPToUpdate) { 824 if (!ActiveSuspend) 825 return; 826 827 // No subsequent instruction -> fallback to the location of ActiveSuspend. 828 if (!ActiveSuspend->getNextNonDebugInstruction()) { 829 if (auto DL = ActiveSuspend->getDebugLoc()) 830 if (SPToUpdate.getFile() == DL->getFile()) 831 SPToUpdate.setScopeLine(DL->getLine()); 832 return; 833 } 834 835 BasicBlock::iterator Successor = 836 ActiveSuspend->getNextNonDebugInstruction()->getIterator(); 837 // Corosplit splits the BB around ActiveSuspend, so the meaningful 838 // instructions are not in the same BB. 839 if (auto *Branch = dyn_cast_or_null<BranchInst>(Successor); 840 Branch && Branch->isUnconditional()) 841 Successor = Branch->getSuccessor(0)->getFirstNonPHIOrDbg(); 842 843 // Find the first successor of ActiveSuspend with a non-zero line location. 844 // If that matches the file of ActiveSuspend, use it. 845 BasicBlock *PBB = Successor->getParent(); 846 for (; Successor != PBB->end(); Successor = std::next(Successor)) { 847 Successor = skipDebugIntrinsics(Successor); 848 auto DL = Successor->getDebugLoc(); 849 if (!DL || DL.getLine() == 0) 850 continue; 851 852 if (SPToUpdate.getFile() == DL->getFile()) { 853 SPToUpdate.setScopeLine(DL.getLine()); 854 return; 855 } 856 857 break; 858 } 859 860 // If the search above failed, fallback to the location of ActiveSuspend. 861 if (auto DL = ActiveSuspend->getDebugLoc()) 862 if (SPToUpdate.getFile() == DL->getFile()) 863 SPToUpdate.setScopeLine(DL->getLine()); 864 } 865 866 static void addFramePointerAttrs(AttributeList &Attrs, LLVMContext &Context, 867 unsigned ParamIndex, uint64_t Size, 868 Align Alignment, bool NoAlias) { 869 AttrBuilder ParamAttrs(Context); 870 ParamAttrs.addAttribute(Attribute::NonNull); 871 ParamAttrs.addAttribute(Attribute::NoUndef); 872 873 if (NoAlias) 874 ParamAttrs.addAttribute(Attribute::NoAlias); 875 876 ParamAttrs.addAlignmentAttr(Alignment); 877 ParamAttrs.addDereferenceableAttr(Size); 878 Attrs = Attrs.addParamAttributes(Context, ParamIndex, ParamAttrs); 879 } 880 881 static void addAsyncContextAttrs(AttributeList &Attrs, LLVMContext &Context, 882 unsigned ParamIndex) { 883 AttrBuilder ParamAttrs(Context); 884 ParamAttrs.addAttribute(Attribute::SwiftAsync); 885 Attrs = Attrs.addParamAttributes(Context, ParamIndex, ParamAttrs); 886 } 887 888 static void addSwiftSelfAttrs(AttributeList &Attrs, LLVMContext &Context, 889 unsigned ParamIndex) { 890 AttrBuilder ParamAttrs(Context); 891 ParamAttrs.addAttribute(Attribute::SwiftSelf); 892 Attrs = Attrs.addParamAttributes(Context, ParamIndex, ParamAttrs); 893 } 894 895 /// Clone the body of the original function into a resume function of 896 /// some sort. 897 void coro::BaseCloner::create() { 898 assert(NewF); 899 900 // Replace all args with dummy instructions. If an argument is the old frame 901 // pointer, the dummy will be replaced by the new frame pointer once it is 902 // computed below. Uses of all other arguments should have already been 903 // rewritten by buildCoroutineFrame() to use loads/stores on the coroutine 904 // frame. 905 SmallVector<Instruction *> DummyArgs; 906 for (Argument &A : OrigF.args()) { 907 DummyArgs.push_back(new FreezeInst(PoisonValue::get(A.getType()))); 908 VMap[&A] = DummyArgs.back(); 909 } 910 911 SmallVector<ReturnInst *, 4> Returns; 912 913 // Ignore attempts to change certain attributes of the function. 914 // TODO: maybe there should be a way to suppress this during cloning? 915 auto savedVisibility = NewF->getVisibility(); 916 auto savedUnnamedAddr = NewF->getUnnamedAddr(); 917 auto savedDLLStorageClass = NewF->getDLLStorageClass(); 918 919 // NewF's linkage (which CloneFunctionInto does *not* change) might not 920 // be compatible with the visibility of OrigF (which it *does* change), 921 // so protect against that. 922 auto savedLinkage = NewF->getLinkage(); 923 NewF->setLinkage(llvm::GlobalValue::ExternalLinkage); 924 925 CloneFunctionAttributesInto(NewF, &OrigF, VMap, false); 926 CloneFunctionMetadataInto(*NewF, OrigF, VMap, RF_None, nullptr, nullptr, 927 &CommonDebugInfo); 928 CloneFunctionBodyInto(*NewF, OrigF, VMap, RF_None, Returns, "", nullptr, 929 nullptr, nullptr, &CommonDebugInfo); 930 931 auto &Context = NewF->getContext(); 932 933 if (DISubprogram *SP = NewF->getSubprogram()) { 934 assert(SP != OrigF.getSubprogram() && SP->isDistinct()); 935 updateScopeLine(ActiveSuspend, *SP); 936 937 // Update the linkage name to reflect the modified symbol name. It 938 // is necessary to update the linkage name in Swift, since the 939 // mangling changes for resume functions. It might also be the 940 // right thing to do in C++, but due to a limitation in LLVM's 941 // AsmPrinter we can only do this if the function doesn't have an 942 // abstract specification, since the DWARF backend expects the 943 // abstract specification to contain the linkage name and asserts 944 // that they are identical. 945 if (SP->getUnit() && 946 SP->getUnit()->getSourceLanguage() == dwarf::DW_LANG_Swift) { 947 SP->replaceLinkageName(MDString::get(Context, NewF->getName())); 948 if (auto *Decl = SP->getDeclaration()) { 949 auto *NewDecl = DISubprogram::get( 950 Decl->getContext(), Decl->getScope(), Decl->getName(), 951 NewF->getName(), Decl->getFile(), Decl->getLine(), Decl->getType(), 952 Decl->getScopeLine(), Decl->getContainingType(), 953 Decl->getVirtualIndex(), Decl->getThisAdjustment(), 954 Decl->getFlags(), Decl->getSPFlags(), Decl->getUnit(), 955 Decl->getTemplateParams(), nullptr, Decl->getRetainedNodes(), 956 Decl->getThrownTypes(), Decl->getAnnotations(), 957 Decl->getTargetFuncName()); 958 SP->replaceDeclaration(NewDecl); 959 } 960 } 961 } 962 963 NewF->setLinkage(savedLinkage); 964 NewF->setVisibility(savedVisibility); 965 NewF->setUnnamedAddr(savedUnnamedAddr); 966 NewF->setDLLStorageClass(savedDLLStorageClass); 967 // The function sanitizer metadata needs to match the signature of the 968 // function it is being attached to. However this does not hold for split 969 // functions here. Thus remove the metadata for split functions. 970 if (Shape.ABI == coro::ABI::Switch && 971 NewF->hasMetadata(LLVMContext::MD_func_sanitize)) 972 NewF->eraseMetadata(LLVMContext::MD_func_sanitize); 973 974 // Replace the attributes of the new function: 975 auto OrigAttrs = NewF->getAttributes(); 976 auto NewAttrs = AttributeList(); 977 978 switch (Shape.ABI) { 979 case coro::ABI::Switch: 980 // Bootstrap attributes by copying function attributes from the 981 // original function. This should include optimization settings and so on. 982 NewAttrs = NewAttrs.addFnAttributes( 983 Context, AttrBuilder(Context, OrigAttrs.getFnAttrs())); 984 985 addFramePointerAttrs(NewAttrs, Context, 0, Shape.FrameSize, 986 Shape.FrameAlign, /*NoAlias=*/false); 987 break; 988 case coro::ABI::Async: { 989 auto *ActiveAsyncSuspend = cast<CoroSuspendAsyncInst>(ActiveSuspend); 990 if (OrigF.hasParamAttribute(Shape.AsyncLowering.ContextArgNo, 991 Attribute::SwiftAsync)) { 992 uint32_t ArgAttributeIndices = 993 ActiveAsyncSuspend->getStorageArgumentIndex(); 994 auto ContextArgIndex = ArgAttributeIndices & 0xff; 995 addAsyncContextAttrs(NewAttrs, Context, ContextArgIndex); 996 997 // `swiftasync` must preceed `swiftself` so 0 is not a valid index for 998 // `swiftself`. 999 auto SwiftSelfIndex = ArgAttributeIndices >> 8; 1000 if (SwiftSelfIndex) 1001 addSwiftSelfAttrs(NewAttrs, Context, SwiftSelfIndex); 1002 } 1003 1004 // Transfer the original function's attributes. 1005 auto FnAttrs = OrigF.getAttributes().getFnAttrs(); 1006 NewAttrs = NewAttrs.addFnAttributes(Context, AttrBuilder(Context, FnAttrs)); 1007 break; 1008 } 1009 case coro::ABI::Retcon: 1010 case coro::ABI::RetconOnce: 1011 // If we have a continuation prototype, just use its attributes, 1012 // full-stop. 1013 NewAttrs = Shape.RetconLowering.ResumePrototype->getAttributes(); 1014 1015 /// FIXME: Is it really good to add the NoAlias attribute? 1016 addFramePointerAttrs(NewAttrs, Context, 0, 1017 Shape.getRetconCoroId()->getStorageSize(), 1018 Shape.getRetconCoroId()->getStorageAlignment(), 1019 /*NoAlias=*/true); 1020 1021 break; 1022 } 1023 1024 switch (Shape.ABI) { 1025 // In these ABIs, the cloned functions always return 'void', and the 1026 // existing return sites are meaningless. Note that for unique 1027 // continuations, this includes the returns associated with suspends; 1028 // this is fine because we can't suspend twice. 1029 case coro::ABI::Switch: 1030 case coro::ABI::RetconOnce: 1031 // Remove old returns. 1032 for (ReturnInst *Return : Returns) 1033 changeToUnreachable(Return); 1034 break; 1035 1036 // With multi-suspend continuations, we'll already have eliminated the 1037 // original returns and inserted returns before all the suspend points, 1038 // so we want to leave any returns in place. 1039 case coro::ABI::Retcon: 1040 break; 1041 // Async lowering will insert musttail call functions at all suspend points 1042 // followed by a return. 1043 // Don't change returns to unreachable because that will trip up the verifier. 1044 // These returns should be unreachable from the clone. 1045 case coro::ABI::Async: 1046 break; 1047 } 1048 1049 NewF->setAttributes(NewAttrs); 1050 NewF->setCallingConv(Shape.getResumeFunctionCC()); 1051 1052 // Set up the new entry block. 1053 replaceEntryBlock(); 1054 1055 // Turn symmetric transfers into musttail calls. 1056 for (CallInst *ResumeCall : Shape.SymmetricTransfers) { 1057 ResumeCall = cast<CallInst>(VMap[ResumeCall]); 1058 if (TTI.supportsTailCallFor(ResumeCall)) { 1059 // FIXME: Could we support symmetric transfer effectively without 1060 // musttail? 1061 ResumeCall->setTailCallKind(CallInst::TCK_MustTail); 1062 } 1063 1064 // Put a 'ret void' after the call, and split any remaining instructions to 1065 // an unreachable block. 1066 BasicBlock *BB = ResumeCall->getParent(); 1067 BB->splitBasicBlock(ResumeCall->getNextNode()); 1068 Builder.SetInsertPoint(BB->getTerminator()); 1069 Builder.CreateRetVoid(); 1070 BB->getTerminator()->eraseFromParent(); 1071 } 1072 1073 Builder.SetInsertPoint(&NewF->getEntryBlock().front()); 1074 NewFramePtr = deriveNewFramePointer(); 1075 1076 // Remap frame pointer. 1077 Value *OldFramePtr = VMap[Shape.FramePtr]; 1078 NewFramePtr->takeName(OldFramePtr); 1079 OldFramePtr->replaceAllUsesWith(NewFramePtr); 1080 1081 // Remap vFrame pointer. 1082 auto *NewVFrame = Builder.CreateBitCast( 1083 NewFramePtr, PointerType::getUnqual(Builder.getContext()), "vFrame"); 1084 Value *OldVFrame = cast<Value>(VMap[Shape.CoroBegin]); 1085 if (OldVFrame != NewVFrame) 1086 OldVFrame->replaceAllUsesWith(NewVFrame); 1087 1088 // All uses of the arguments should have been resolved by this point, 1089 // so we can safely remove the dummy values. 1090 for (Instruction *DummyArg : DummyArgs) { 1091 DummyArg->replaceAllUsesWith(PoisonValue::get(DummyArg->getType())); 1092 DummyArg->deleteValue(); 1093 } 1094 1095 switch (Shape.ABI) { 1096 case coro::ABI::Switch: 1097 // Rewrite final suspend handling as it is not done via switch (allows to 1098 // remove final case from the switch, since it is undefined behavior to 1099 // resume the coroutine suspended at the final suspend point. 1100 if (Shape.SwitchLowering.HasFinalSuspend) 1101 handleFinalSuspend(); 1102 break; 1103 case coro::ABI::Async: 1104 case coro::ABI::Retcon: 1105 case coro::ABI::RetconOnce: 1106 // Replace uses of the active suspend with the corresponding 1107 // continuation-function arguments. 1108 assert(ActiveSuspend != nullptr && 1109 "no active suspend when lowering a continuation-style coroutine"); 1110 replaceRetconOrAsyncSuspendUses(); 1111 break; 1112 } 1113 1114 // Handle suspends. 1115 replaceCoroSuspends(); 1116 1117 // Handle swifterror. 1118 replaceSwiftErrorOps(); 1119 1120 // Remove coro.end intrinsics. 1121 replaceCoroEnds(); 1122 1123 // Salvage debug info that points into the coroutine frame. 1124 salvageDebugInfo(); 1125 } 1126 1127 void coro::SwitchCloner::create() { 1128 // Create a new function matching the original type 1129 NewF = createCloneDeclaration(OrigF, Shape, Suffix, OrigF.getParent()->end(), 1130 ActiveSuspend); 1131 1132 // Clone the function 1133 coro::BaseCloner::create(); 1134 1135 // Eliminate coro.free from the clones, replacing it with 'null' in cleanup, 1136 // to suppress deallocation code. 1137 coro::replaceCoroFree(cast<CoroIdInst>(VMap[Shape.CoroBegin->getId()]), 1138 /*Elide=*/FKind == coro::CloneKind::SwitchCleanup); 1139 } 1140 1141 static void updateAsyncFuncPointerContextSize(coro::Shape &Shape) { 1142 assert(Shape.ABI == coro::ABI::Async); 1143 1144 auto *FuncPtrStruct = cast<ConstantStruct>( 1145 Shape.AsyncLowering.AsyncFuncPointer->getInitializer()); 1146 auto *OrigRelativeFunOffset = FuncPtrStruct->getOperand(0); 1147 auto *OrigContextSize = FuncPtrStruct->getOperand(1); 1148 auto *NewContextSize = ConstantInt::get(OrigContextSize->getType(), 1149 Shape.AsyncLowering.ContextSize); 1150 auto *NewFuncPtrStruct = ConstantStruct::get( 1151 FuncPtrStruct->getType(), OrigRelativeFunOffset, NewContextSize); 1152 1153 Shape.AsyncLowering.AsyncFuncPointer->setInitializer(NewFuncPtrStruct); 1154 } 1155 1156 static TypeSize getFrameSizeForShape(coro::Shape &Shape) { 1157 // In the same function all coro.sizes should have the same result type. 1158 auto *SizeIntrin = Shape.CoroSizes.back(); 1159 Module *M = SizeIntrin->getModule(); 1160 const DataLayout &DL = M->getDataLayout(); 1161 return DL.getTypeAllocSize(Shape.FrameTy); 1162 } 1163 1164 static void replaceFrameSizeAndAlignment(coro::Shape &Shape) { 1165 if (Shape.ABI == coro::ABI::Async) 1166 updateAsyncFuncPointerContextSize(Shape); 1167 1168 for (CoroAlignInst *CA : Shape.CoroAligns) { 1169 CA->replaceAllUsesWith( 1170 ConstantInt::get(CA->getType(), Shape.FrameAlign.value())); 1171 CA->eraseFromParent(); 1172 } 1173 1174 if (Shape.CoroSizes.empty()) 1175 return; 1176 1177 // In the same function all coro.sizes should have the same result type. 1178 auto *SizeIntrin = Shape.CoroSizes.back(); 1179 auto *SizeConstant = 1180 ConstantInt::get(SizeIntrin->getType(), getFrameSizeForShape(Shape)); 1181 1182 for (CoroSizeInst *CS : Shape.CoroSizes) { 1183 CS->replaceAllUsesWith(SizeConstant); 1184 CS->eraseFromParent(); 1185 } 1186 } 1187 1188 static void postSplitCleanup(Function &F) { 1189 removeUnreachableBlocks(F); 1190 1191 #ifndef NDEBUG 1192 // For now, we do a mandatory verification step because we don't 1193 // entirely trust this pass. Note that we don't want to add a verifier 1194 // pass to FPM below because it will also verify all the global data. 1195 if (verifyFunction(F, &errs())) 1196 report_fatal_error("Broken function"); 1197 #endif 1198 } 1199 1200 // Coroutine has no suspend points. Remove heap allocation for the coroutine 1201 // frame if possible. 1202 static void handleNoSuspendCoroutine(coro::Shape &Shape) { 1203 auto *CoroBegin = Shape.CoroBegin; 1204 switch (Shape.ABI) { 1205 case coro::ABI::Switch: { 1206 auto SwitchId = Shape.getSwitchCoroId(); 1207 auto *AllocInst = SwitchId->getCoroAlloc(); 1208 coro::replaceCoroFree(SwitchId, /*Elide=*/AllocInst != nullptr); 1209 if (AllocInst) { 1210 IRBuilder<> Builder(AllocInst); 1211 auto *Frame = Builder.CreateAlloca(Shape.FrameTy); 1212 Frame->setAlignment(Shape.FrameAlign); 1213 AllocInst->replaceAllUsesWith(Builder.getFalse()); 1214 AllocInst->eraseFromParent(); 1215 CoroBegin->replaceAllUsesWith(Frame); 1216 } else { 1217 CoroBegin->replaceAllUsesWith(CoroBegin->getMem()); 1218 } 1219 1220 break; 1221 } 1222 case coro::ABI::Async: 1223 case coro::ABI::Retcon: 1224 case coro::ABI::RetconOnce: 1225 CoroBegin->replaceAllUsesWith(PoisonValue::get(CoroBegin->getType())); 1226 break; 1227 } 1228 1229 CoroBegin->eraseFromParent(); 1230 Shape.CoroBegin = nullptr; 1231 } 1232 1233 // SimplifySuspendPoint needs to check that there is no calls between 1234 // coro_save and coro_suspend, since any of the calls may potentially resume 1235 // the coroutine and if that is the case we cannot eliminate the suspend point. 1236 static bool hasCallsInBlockBetween(iterator_range<BasicBlock::iterator> R) { 1237 for (Instruction &I : R) { 1238 // Assume that no intrinsic can resume the coroutine. 1239 if (isa<IntrinsicInst>(I)) 1240 continue; 1241 1242 if (isa<CallBase>(I)) 1243 return true; 1244 } 1245 return false; 1246 } 1247 1248 static bool hasCallsInBlocksBetween(BasicBlock *SaveBB, BasicBlock *ResDesBB) { 1249 SmallPtrSet<BasicBlock *, 8> Set; 1250 SmallVector<BasicBlock *, 8> Worklist; 1251 1252 Set.insert(SaveBB); 1253 Worklist.push_back(ResDesBB); 1254 1255 // Accumulate all blocks between SaveBB and ResDesBB. Because CoroSaveIntr 1256 // returns a token consumed by suspend instruction, all blocks in between 1257 // will have to eventually hit SaveBB when going backwards from ResDesBB. 1258 while (!Worklist.empty()) { 1259 auto *BB = Worklist.pop_back_val(); 1260 Set.insert(BB); 1261 for (auto *Pred : predecessors(BB)) 1262 if (!Set.contains(Pred)) 1263 Worklist.push_back(Pred); 1264 } 1265 1266 // SaveBB and ResDesBB are checked separately in hasCallsBetween. 1267 Set.erase(SaveBB); 1268 Set.erase(ResDesBB); 1269 1270 for (auto *BB : Set) 1271 if (hasCallsInBlockBetween({BB->getFirstNonPHIIt(), BB->end()})) 1272 return true; 1273 1274 return false; 1275 } 1276 1277 static bool hasCallsBetween(Instruction *Save, Instruction *ResumeOrDestroy) { 1278 auto *SaveBB = Save->getParent(); 1279 auto *ResumeOrDestroyBB = ResumeOrDestroy->getParent(); 1280 BasicBlock::iterator SaveIt = Save->getIterator(); 1281 BasicBlock::iterator ResumeOrDestroyIt = ResumeOrDestroy->getIterator(); 1282 1283 if (SaveBB == ResumeOrDestroyBB) 1284 return hasCallsInBlockBetween({std::next(SaveIt), ResumeOrDestroyIt}); 1285 1286 // Any calls from Save to the end of the block? 1287 if (hasCallsInBlockBetween({std::next(SaveIt), SaveBB->end()})) 1288 return true; 1289 1290 // Any calls from begging of the block up to ResumeOrDestroy? 1291 if (hasCallsInBlockBetween( 1292 {ResumeOrDestroyBB->getFirstNonPHIIt(), ResumeOrDestroyIt})) 1293 return true; 1294 1295 // Any calls in all of the blocks between SaveBB and ResumeOrDestroyBB? 1296 if (hasCallsInBlocksBetween(SaveBB, ResumeOrDestroyBB)) 1297 return true; 1298 1299 return false; 1300 } 1301 1302 // If a SuspendIntrin is preceded by Resume or Destroy, we can eliminate the 1303 // suspend point and replace it with nornal control flow. 1304 static bool simplifySuspendPoint(CoroSuspendInst *Suspend, 1305 CoroBeginInst *CoroBegin) { 1306 Instruction *Prev = Suspend->getPrevNode(); 1307 if (!Prev) { 1308 auto *Pred = Suspend->getParent()->getSinglePredecessor(); 1309 if (!Pred) 1310 return false; 1311 Prev = Pred->getTerminator(); 1312 } 1313 1314 CallBase *CB = dyn_cast<CallBase>(Prev); 1315 if (!CB) 1316 return false; 1317 1318 auto *Callee = CB->getCalledOperand()->stripPointerCasts(); 1319 1320 // See if the callsite is for resumption or destruction of the coroutine. 1321 auto *SubFn = dyn_cast<CoroSubFnInst>(Callee); 1322 if (!SubFn) 1323 return false; 1324 1325 // Does not refer to the current coroutine, we cannot do anything with it. 1326 if (SubFn->getFrame() != CoroBegin) 1327 return false; 1328 1329 // See if the transformation is safe. Specifically, see if there are any 1330 // calls in between Save and CallInstr. They can potenitally resume the 1331 // coroutine rendering this optimization unsafe. 1332 auto *Save = Suspend->getCoroSave(); 1333 if (hasCallsBetween(Save, CB)) 1334 return false; 1335 1336 // Replace llvm.coro.suspend with the value that results in resumption over 1337 // the resume or cleanup path. 1338 Suspend->replaceAllUsesWith(SubFn->getRawIndex()); 1339 Suspend->eraseFromParent(); 1340 Save->eraseFromParent(); 1341 1342 // No longer need a call to coro.resume or coro.destroy. 1343 if (auto *Invoke = dyn_cast<InvokeInst>(CB)) { 1344 BranchInst::Create(Invoke->getNormalDest(), Invoke->getIterator()); 1345 } 1346 1347 // Grab the CalledValue from CB before erasing the CallInstr. 1348 auto *CalledValue = CB->getCalledOperand(); 1349 CB->eraseFromParent(); 1350 1351 // If no more users remove it. Usually it is a bitcast of SubFn. 1352 if (CalledValue != SubFn && CalledValue->user_empty()) 1353 if (auto *I = dyn_cast<Instruction>(CalledValue)) 1354 I->eraseFromParent(); 1355 1356 // Now we are good to remove SubFn. 1357 if (SubFn->user_empty()) 1358 SubFn->eraseFromParent(); 1359 1360 return true; 1361 } 1362 1363 // Remove suspend points that are simplified. 1364 static void simplifySuspendPoints(coro::Shape &Shape) { 1365 // Currently, the only simplification we do is switch-lowering-specific. 1366 if (Shape.ABI != coro::ABI::Switch) 1367 return; 1368 1369 auto &S = Shape.CoroSuspends; 1370 size_t I = 0, N = S.size(); 1371 if (N == 0) 1372 return; 1373 1374 size_t ChangedFinalIndex = std::numeric_limits<size_t>::max(); 1375 while (true) { 1376 auto SI = cast<CoroSuspendInst>(S[I]); 1377 // Leave final.suspend to handleFinalSuspend since it is undefined behavior 1378 // to resume a coroutine suspended at the final suspend point. 1379 if (!SI->isFinal() && simplifySuspendPoint(SI, Shape.CoroBegin)) { 1380 if (--N == I) 1381 break; 1382 1383 std::swap(S[I], S[N]); 1384 1385 if (cast<CoroSuspendInst>(S[I])->isFinal()) { 1386 assert(Shape.SwitchLowering.HasFinalSuspend); 1387 ChangedFinalIndex = I; 1388 } 1389 1390 continue; 1391 } 1392 if (++I == N) 1393 break; 1394 } 1395 S.resize(N); 1396 1397 // Maintain final.suspend in case final suspend was swapped. 1398 // Due to we requrie the final suspend to be the last element of CoroSuspends. 1399 if (ChangedFinalIndex < N) { 1400 assert(cast<CoroSuspendInst>(S[ChangedFinalIndex])->isFinal()); 1401 std::swap(S[ChangedFinalIndex], S.back()); 1402 } 1403 } 1404 1405 namespace { 1406 1407 struct SwitchCoroutineSplitter { 1408 static void split(Function &F, coro::Shape &Shape, 1409 SmallVectorImpl<Function *> &Clones, 1410 TargetTransformInfo &TTI) { 1411 assert(Shape.ABI == coro::ABI::Switch); 1412 1413 MetadataSetTy CommonDebugInfo{collectCommonDebugInfo(F)}; 1414 1415 // Create a resume clone by cloning the body of the original function, 1416 // setting new entry block and replacing coro.suspend an appropriate value 1417 // to force resume or cleanup pass for every suspend point. 1418 createResumeEntryBlock(F, Shape); 1419 auto *ResumeClone = coro::SwitchCloner::createClone( 1420 F, ".resume", Shape, coro::CloneKind::SwitchResume, TTI, 1421 CommonDebugInfo); 1422 auto *DestroyClone = coro::SwitchCloner::createClone( 1423 F, ".destroy", Shape, coro::CloneKind::SwitchUnwind, TTI, 1424 CommonDebugInfo); 1425 auto *CleanupClone = coro::SwitchCloner::createClone( 1426 F, ".cleanup", Shape, coro::CloneKind::SwitchCleanup, TTI, 1427 CommonDebugInfo); 1428 1429 postSplitCleanup(*ResumeClone); 1430 postSplitCleanup(*DestroyClone); 1431 postSplitCleanup(*CleanupClone); 1432 1433 // Store addresses resume/destroy/cleanup functions in the coroutine frame. 1434 updateCoroFrame(Shape, ResumeClone, DestroyClone, CleanupClone); 1435 1436 assert(Clones.empty()); 1437 Clones.push_back(ResumeClone); 1438 Clones.push_back(DestroyClone); 1439 Clones.push_back(CleanupClone); 1440 1441 // Create a constant array referring to resume/destroy/clone functions 1442 // pointed by the last argument of @llvm.coro.info, so that CoroElide pass 1443 // can determined correct function to call. 1444 setCoroInfo(F, Shape, Clones); 1445 } 1446 1447 // Create a variant of ramp function that does not perform heap allocation 1448 // for a switch ABI coroutine. 1449 // 1450 // The newly split `.noalloc` ramp function has the following differences: 1451 // - Has one additional frame pointer parameter in lieu of dynamic 1452 // allocation. 1453 // - Suppressed allocations by replacing coro.alloc and coro.free. 1454 static Function *createNoAllocVariant(Function &F, coro::Shape &Shape, 1455 SmallVectorImpl<Function *> &Clones) { 1456 assert(Shape.ABI == coro::ABI::Switch); 1457 auto *OrigFnTy = F.getFunctionType(); 1458 auto OldParams = OrigFnTy->params(); 1459 1460 SmallVector<Type *> NewParams; 1461 NewParams.reserve(OldParams.size() + 1); 1462 NewParams.append(OldParams.begin(), OldParams.end()); 1463 NewParams.push_back(PointerType::getUnqual(Shape.FrameTy->getContext())); 1464 1465 auto *NewFnTy = FunctionType::get(OrigFnTy->getReturnType(), NewParams, 1466 OrigFnTy->isVarArg()); 1467 Function *NoAllocF = 1468 Function::Create(NewFnTy, F.getLinkage(), F.getName() + ".noalloc"); 1469 1470 ValueToValueMapTy VMap; 1471 unsigned int Idx = 0; 1472 for (const auto &I : F.args()) { 1473 VMap[&I] = NoAllocF->getArg(Idx++); 1474 } 1475 // We just appended the frame pointer as the last argument of the new 1476 // function. 1477 auto FrameIdx = NoAllocF->arg_size() - 1; 1478 SmallVector<ReturnInst *, 4> Returns; 1479 CloneFunctionInto(NoAllocF, &F, VMap, 1480 CloneFunctionChangeType::LocalChangesOnly, Returns); 1481 1482 if (Shape.CoroBegin) { 1483 auto *NewCoroBegin = 1484 cast_if_present<CoroBeginInst>(VMap[Shape.CoroBegin]); 1485 auto *NewCoroId = cast<CoroIdInst>(NewCoroBegin->getId()); 1486 coro::replaceCoroFree(NewCoroId, /*Elide=*/true); 1487 coro::suppressCoroAllocs(NewCoroId); 1488 NewCoroBegin->replaceAllUsesWith(NoAllocF->getArg(FrameIdx)); 1489 NewCoroBegin->eraseFromParent(); 1490 } 1491 1492 Module *M = F.getParent(); 1493 M->getFunctionList().insert(M->end(), NoAllocF); 1494 1495 removeUnreachableBlocks(*NoAllocF); 1496 auto NewAttrs = NoAllocF->getAttributes(); 1497 // When we elide allocation, we read these attributes to determine the 1498 // frame size and alignment. 1499 addFramePointerAttrs(NewAttrs, NoAllocF->getContext(), FrameIdx, 1500 Shape.FrameSize, Shape.FrameAlign, 1501 /*NoAlias=*/false); 1502 1503 NoAllocF->setAttributes(NewAttrs); 1504 1505 Clones.push_back(NoAllocF); 1506 // Reset the original function's coro info, make the new noalloc variant 1507 // connected to the original ramp function. 1508 setCoroInfo(F, Shape, Clones); 1509 // After copying, set the linkage to internal linkage. Original function 1510 // may have different linkage, but optimization dependent on this function 1511 // generally relies on LTO. 1512 NoAllocF->setLinkage(llvm::GlobalValue::InternalLinkage); 1513 return NoAllocF; 1514 } 1515 1516 private: 1517 // Create an entry block for a resume function with a switch that will jump to 1518 // suspend points. 1519 static void createResumeEntryBlock(Function &F, coro::Shape &Shape) { 1520 LLVMContext &C = F.getContext(); 1521 1522 // resume.entry: 1523 // %index.addr = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 1524 // 0, i32 2 % index = load i32, i32* %index.addr switch i32 %index, label 1525 // %unreachable [ 1526 // i32 0, label %resume.0 1527 // i32 1, label %resume.1 1528 // ... 1529 // ] 1530 1531 auto *NewEntry = BasicBlock::Create(C, "resume.entry", &F); 1532 auto *UnreachBB = BasicBlock::Create(C, "unreachable", &F); 1533 1534 IRBuilder<> Builder(NewEntry); 1535 auto *FramePtr = Shape.FramePtr; 1536 auto *FrameTy = Shape.FrameTy; 1537 auto *GepIndex = Builder.CreateStructGEP( 1538 FrameTy, FramePtr, Shape.getSwitchIndexField(), "index.addr"); 1539 auto *Index = Builder.CreateLoad(Shape.getIndexType(), GepIndex, "index"); 1540 auto *Switch = 1541 Builder.CreateSwitch(Index, UnreachBB, Shape.CoroSuspends.size()); 1542 Shape.SwitchLowering.ResumeSwitch = Switch; 1543 1544 size_t SuspendIndex = 0; 1545 for (auto *AnyS : Shape.CoroSuspends) { 1546 auto *S = cast<CoroSuspendInst>(AnyS); 1547 ConstantInt *IndexVal = Shape.getIndex(SuspendIndex); 1548 1549 // Replace CoroSave with a store to Index: 1550 // %index.addr = getelementptr %f.frame... (index field number) 1551 // store i32 %IndexVal, i32* %index.addr1 1552 auto *Save = S->getCoroSave(); 1553 Builder.SetInsertPoint(Save); 1554 if (S->isFinal()) { 1555 // The coroutine should be marked done if it reaches the final suspend 1556 // point. 1557 markCoroutineAsDone(Builder, Shape, FramePtr); 1558 } else { 1559 auto *GepIndex = Builder.CreateStructGEP( 1560 FrameTy, FramePtr, Shape.getSwitchIndexField(), "index.addr"); 1561 Builder.CreateStore(IndexVal, GepIndex); 1562 } 1563 1564 Save->replaceAllUsesWith(ConstantTokenNone::get(C)); 1565 Save->eraseFromParent(); 1566 1567 // Split block before and after coro.suspend and add a jump from an entry 1568 // switch: 1569 // 1570 // whateverBB: 1571 // whatever 1572 // %0 = call i8 @llvm.coro.suspend(token none, i1 false) 1573 // switch i8 %0, label %suspend[i8 0, label %resume 1574 // i8 1, label %cleanup] 1575 // becomes: 1576 // 1577 // whateverBB: 1578 // whatever 1579 // br label %resume.0.landing 1580 // 1581 // resume.0: ; <--- jump from the switch in the resume.entry 1582 // %0 = tail call i8 @llvm.coro.suspend(token none, i1 false) 1583 // br label %resume.0.landing 1584 // 1585 // resume.0.landing: 1586 // %1 = phi i8[-1, %whateverBB], [%0, %resume.0] 1587 // switch i8 % 1, label %suspend [i8 0, label %resume 1588 // i8 1, label %cleanup] 1589 1590 auto *SuspendBB = S->getParent(); 1591 auto *ResumeBB = 1592 SuspendBB->splitBasicBlock(S, "resume." + Twine(SuspendIndex)); 1593 auto *LandingBB = ResumeBB->splitBasicBlock( 1594 S->getNextNode(), ResumeBB->getName() + Twine(".landing")); 1595 Switch->addCase(IndexVal, ResumeBB); 1596 1597 cast<BranchInst>(SuspendBB->getTerminator())->setSuccessor(0, LandingBB); 1598 auto *PN = PHINode::Create(Builder.getInt8Ty(), 2, ""); 1599 PN->insertBefore(LandingBB->begin()); 1600 S->replaceAllUsesWith(PN); 1601 PN->addIncoming(Builder.getInt8(-1), SuspendBB); 1602 PN->addIncoming(S, ResumeBB); 1603 1604 ++SuspendIndex; 1605 } 1606 1607 Builder.SetInsertPoint(UnreachBB); 1608 Builder.CreateUnreachable(); 1609 1610 Shape.SwitchLowering.ResumeEntryBlock = NewEntry; 1611 } 1612 1613 // Store addresses of Resume/Destroy/Cleanup functions in the coroutine frame. 1614 static void updateCoroFrame(coro::Shape &Shape, Function *ResumeFn, 1615 Function *DestroyFn, Function *CleanupFn) { 1616 IRBuilder<> Builder(&*Shape.getInsertPtAfterFramePtr()); 1617 1618 auto *ResumeAddr = Builder.CreateStructGEP( 1619 Shape.FrameTy, Shape.FramePtr, coro::Shape::SwitchFieldIndex::Resume, 1620 "resume.addr"); 1621 Builder.CreateStore(ResumeFn, ResumeAddr); 1622 1623 Value *DestroyOrCleanupFn = DestroyFn; 1624 1625 CoroIdInst *CoroId = Shape.getSwitchCoroId(); 1626 if (CoroAllocInst *CA = CoroId->getCoroAlloc()) { 1627 // If there is a CoroAlloc and it returns false (meaning we elide the 1628 // allocation, use CleanupFn instead of DestroyFn). 1629 DestroyOrCleanupFn = Builder.CreateSelect(CA, DestroyFn, CleanupFn); 1630 } 1631 1632 auto *DestroyAddr = Builder.CreateStructGEP( 1633 Shape.FrameTy, Shape.FramePtr, coro::Shape::SwitchFieldIndex::Destroy, 1634 "destroy.addr"); 1635 Builder.CreateStore(DestroyOrCleanupFn, DestroyAddr); 1636 } 1637 1638 // Create a global constant array containing pointers to functions provided 1639 // and set Info parameter of CoroBegin to point at this constant. Example: 1640 // 1641 // @f.resumers = internal constant [2 x void(%f.frame*)*] 1642 // [void(%f.frame*)* @f.resume, void(%f.frame*)* 1643 // @f.destroy] 1644 // define void @f() { 1645 // ... 1646 // call i8* @llvm.coro.begin(i8* null, i32 0, i8* null, 1647 // i8* bitcast([2 x void(%f.frame*)*] * @f.resumers to 1648 // i8*)) 1649 // 1650 // Assumes that all the functions have the same signature. 1651 static void setCoroInfo(Function &F, coro::Shape &Shape, 1652 ArrayRef<Function *> Fns) { 1653 // This only works under the switch-lowering ABI because coro elision 1654 // only works on the switch-lowering ABI. 1655 SmallVector<Constant *, 4> Args(Fns); 1656 assert(!Args.empty()); 1657 Function *Part = *Fns.begin(); 1658 Module *M = Part->getParent(); 1659 auto *ArrTy = ArrayType::get(Part->getType(), Args.size()); 1660 1661 auto *ConstVal = ConstantArray::get(ArrTy, Args); 1662 auto *GV = new GlobalVariable(*M, ConstVal->getType(), /*isConstant=*/true, 1663 GlobalVariable::PrivateLinkage, ConstVal, 1664 F.getName() + Twine(".resumers")); 1665 1666 // Update coro.begin instruction to refer to this constant. 1667 LLVMContext &C = F.getContext(); 1668 auto *BC = ConstantExpr::getPointerCast(GV, PointerType::getUnqual(C)); 1669 Shape.getSwitchCoroId()->setInfo(BC); 1670 } 1671 }; 1672 1673 } // namespace 1674 1675 static void replaceAsyncResumeFunction(CoroSuspendAsyncInst *Suspend, 1676 Value *Continuation) { 1677 auto *ResumeIntrinsic = Suspend->getResumeFunction(); 1678 auto &Context = Suspend->getParent()->getParent()->getContext(); 1679 auto *Int8PtrTy = PointerType::getUnqual(Context); 1680 1681 IRBuilder<> Builder(ResumeIntrinsic); 1682 auto *Val = Builder.CreateBitOrPointerCast(Continuation, Int8PtrTy); 1683 ResumeIntrinsic->replaceAllUsesWith(Val); 1684 ResumeIntrinsic->eraseFromParent(); 1685 Suspend->setOperand(CoroSuspendAsyncInst::ResumeFunctionArg, 1686 PoisonValue::get(Int8PtrTy)); 1687 } 1688 1689 /// Coerce the arguments in \p FnArgs according to \p FnTy in \p CallArgs. 1690 static void coerceArguments(IRBuilder<> &Builder, FunctionType *FnTy, 1691 ArrayRef<Value *> FnArgs, 1692 SmallVectorImpl<Value *> &CallArgs) { 1693 size_t ArgIdx = 0; 1694 for (auto *paramTy : FnTy->params()) { 1695 assert(ArgIdx < FnArgs.size()); 1696 if (paramTy != FnArgs[ArgIdx]->getType()) 1697 CallArgs.push_back( 1698 Builder.CreateBitOrPointerCast(FnArgs[ArgIdx], paramTy)); 1699 else 1700 CallArgs.push_back(FnArgs[ArgIdx]); 1701 ++ArgIdx; 1702 } 1703 } 1704 1705 CallInst *coro::createMustTailCall(DebugLoc Loc, Function *MustTailCallFn, 1706 TargetTransformInfo &TTI, 1707 ArrayRef<Value *> Arguments, 1708 IRBuilder<> &Builder) { 1709 auto *FnTy = MustTailCallFn->getFunctionType(); 1710 // Coerce the arguments, llvm optimizations seem to ignore the types in 1711 // vaarg functions and throws away casts in optimized mode. 1712 SmallVector<Value *, 8> CallArgs; 1713 coerceArguments(Builder, FnTy, Arguments, CallArgs); 1714 1715 auto *TailCall = Builder.CreateCall(FnTy, MustTailCallFn, CallArgs); 1716 // Skip targets which don't support tail call. 1717 if (TTI.supportsTailCallFor(TailCall)) { 1718 TailCall->setTailCallKind(CallInst::TCK_MustTail); 1719 } 1720 TailCall->setDebugLoc(Loc); 1721 TailCall->setCallingConv(MustTailCallFn->getCallingConv()); 1722 return TailCall; 1723 } 1724 1725 void coro::AsyncABI::splitCoroutine(Function &F, coro::Shape &Shape, 1726 SmallVectorImpl<Function *> &Clones, 1727 TargetTransformInfo &TTI) { 1728 assert(Shape.ABI == coro::ABI::Async); 1729 assert(Clones.empty()); 1730 // Reset various things that the optimizer might have decided it 1731 // "knows" about the coroutine function due to not seeing a return. 1732 F.removeFnAttr(Attribute::NoReturn); 1733 F.removeRetAttr(Attribute::NoAlias); 1734 F.removeRetAttr(Attribute::NonNull); 1735 1736 auto &Context = F.getContext(); 1737 auto *Int8PtrTy = PointerType::getUnqual(Context); 1738 1739 auto *Id = Shape.getAsyncCoroId(); 1740 IRBuilder<> Builder(Id); 1741 1742 auto *FramePtr = Id->getStorage(); 1743 FramePtr = Builder.CreateBitOrPointerCast(FramePtr, Int8PtrTy); 1744 FramePtr = Builder.CreateConstInBoundsGEP1_32( 1745 Type::getInt8Ty(Context), FramePtr, Shape.AsyncLowering.FrameOffset, 1746 "async.ctx.frameptr"); 1747 1748 // Map all uses of llvm.coro.begin to the allocated frame pointer. 1749 { 1750 // Make sure we don't invalidate Shape.FramePtr. 1751 TrackingVH<Value> Handle(Shape.FramePtr); 1752 Shape.CoroBegin->replaceAllUsesWith(FramePtr); 1753 Shape.FramePtr = Handle.getValPtr(); 1754 } 1755 1756 // Create all the functions in order after the main function. 1757 auto NextF = std::next(F.getIterator()); 1758 1759 // Create a continuation function for each of the suspend points. 1760 Clones.reserve(Shape.CoroSuspends.size()); 1761 for (auto [Idx, CS] : llvm::enumerate(Shape.CoroSuspends)) { 1762 auto *Suspend = cast<CoroSuspendAsyncInst>(CS); 1763 1764 // Create the clone declaration. 1765 auto ResumeNameSuffix = ".resume."; 1766 auto ProjectionFunctionName = 1767 Suspend->getAsyncContextProjectionFunction()->getName(); 1768 bool UseSwiftMangling = false; 1769 if (ProjectionFunctionName == "__swift_async_resume_project_context") { 1770 ResumeNameSuffix = "TQ"; 1771 UseSwiftMangling = true; 1772 } else if (ProjectionFunctionName == "__swift_async_resume_get_context") { 1773 ResumeNameSuffix = "TY"; 1774 UseSwiftMangling = true; 1775 } 1776 auto *Continuation = createCloneDeclaration( 1777 F, Shape, 1778 UseSwiftMangling ? ResumeNameSuffix + Twine(Idx) + "_" 1779 : ResumeNameSuffix + Twine(Idx), 1780 NextF, Suspend); 1781 Clones.push_back(Continuation); 1782 1783 // Insert a branch to a new return block immediately before the suspend 1784 // point. 1785 auto *SuspendBB = Suspend->getParent(); 1786 auto *NewSuspendBB = SuspendBB->splitBasicBlock(Suspend); 1787 auto *Branch = cast<BranchInst>(SuspendBB->getTerminator()); 1788 1789 // Place it before the first suspend. 1790 auto *ReturnBB = 1791 BasicBlock::Create(F.getContext(), "coro.return", &F, NewSuspendBB); 1792 Branch->setSuccessor(0, ReturnBB); 1793 1794 IRBuilder<> Builder(ReturnBB); 1795 1796 // Insert the call to the tail call function and inline it. 1797 auto *Fn = Suspend->getMustTailCallFunction(); 1798 SmallVector<Value *, 8> Args(Suspend->args()); 1799 auto FnArgs = ArrayRef<Value *>(Args).drop_front( 1800 CoroSuspendAsyncInst::MustTailCallFuncArg + 1); 1801 auto *TailCall = coro::createMustTailCall(Suspend->getDebugLoc(), Fn, TTI, 1802 FnArgs, Builder); 1803 Builder.CreateRetVoid(); 1804 InlineFunctionInfo FnInfo; 1805 (void)InlineFunction(*TailCall, FnInfo); 1806 1807 // Replace the lvm.coro.async.resume intrisic call. 1808 replaceAsyncResumeFunction(Suspend, Continuation); 1809 } 1810 1811 assert(Clones.size() == Shape.CoroSuspends.size()); 1812 1813 MetadataSetTy CommonDebugInfo{collectCommonDebugInfo(F)}; 1814 1815 for (auto [Idx, CS] : llvm::enumerate(Shape.CoroSuspends)) { 1816 auto *Suspend = CS; 1817 auto *Clone = Clones[Idx]; 1818 1819 coro::BaseCloner::createClone(F, "resume." + Twine(Idx), Shape, Clone, 1820 Suspend, TTI, CommonDebugInfo); 1821 } 1822 } 1823 1824 void coro::AnyRetconABI::splitCoroutine(Function &F, coro::Shape &Shape, 1825 SmallVectorImpl<Function *> &Clones, 1826 TargetTransformInfo &TTI) { 1827 assert(Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce); 1828 assert(Clones.empty()); 1829 1830 // Reset various things that the optimizer might have decided it 1831 // "knows" about the coroutine function due to not seeing a return. 1832 F.removeFnAttr(Attribute::NoReturn); 1833 F.removeRetAttr(Attribute::NoAlias); 1834 F.removeRetAttr(Attribute::NonNull); 1835 1836 // Allocate the frame. 1837 auto *Id = Shape.getRetconCoroId(); 1838 Value *RawFramePtr; 1839 if (Shape.RetconLowering.IsFrameInlineInStorage) { 1840 RawFramePtr = Id->getStorage(); 1841 } else { 1842 IRBuilder<> Builder(Id); 1843 1844 // Determine the size of the frame. 1845 const DataLayout &DL = F.getDataLayout(); 1846 auto Size = DL.getTypeAllocSize(Shape.FrameTy); 1847 1848 // Allocate. We don't need to update the call graph node because we're 1849 // going to recompute it from scratch after splitting. 1850 // FIXME: pass the required alignment 1851 RawFramePtr = Shape.emitAlloc(Builder, Builder.getInt64(Size), nullptr); 1852 RawFramePtr = 1853 Builder.CreateBitCast(RawFramePtr, Shape.CoroBegin->getType()); 1854 1855 // Stash the allocated frame pointer in the continuation storage. 1856 Builder.CreateStore(RawFramePtr, Id->getStorage()); 1857 } 1858 1859 // Map all uses of llvm.coro.begin to the allocated frame pointer. 1860 { 1861 // Make sure we don't invalidate Shape.FramePtr. 1862 TrackingVH<Value> Handle(Shape.FramePtr); 1863 Shape.CoroBegin->replaceAllUsesWith(RawFramePtr); 1864 Shape.FramePtr = Handle.getValPtr(); 1865 } 1866 1867 // Create a unique return block. 1868 BasicBlock *ReturnBB = nullptr; 1869 PHINode *ContinuationPhi = nullptr; 1870 SmallVector<PHINode *, 4> ReturnPHIs; 1871 1872 // Create all the functions in order after the main function. 1873 auto NextF = std::next(F.getIterator()); 1874 1875 // Create a continuation function for each of the suspend points. 1876 Clones.reserve(Shape.CoroSuspends.size()); 1877 for (auto [Idx, CS] : llvm::enumerate(Shape.CoroSuspends)) { 1878 auto Suspend = cast<CoroSuspendRetconInst>(CS); 1879 1880 // Create the clone declaration. 1881 auto Continuation = createCloneDeclaration( 1882 F, Shape, ".resume." + Twine(Idx), NextF, nullptr); 1883 Clones.push_back(Continuation); 1884 1885 // Insert a branch to the unified return block immediately before 1886 // the suspend point. 1887 auto SuspendBB = Suspend->getParent(); 1888 auto NewSuspendBB = SuspendBB->splitBasicBlock(Suspend); 1889 auto Branch = cast<BranchInst>(SuspendBB->getTerminator()); 1890 1891 // Create the unified return block. 1892 if (!ReturnBB) { 1893 // Place it before the first suspend. 1894 ReturnBB = 1895 BasicBlock::Create(F.getContext(), "coro.return", &F, NewSuspendBB); 1896 Shape.RetconLowering.ReturnBlock = ReturnBB; 1897 1898 IRBuilder<> Builder(ReturnBB); 1899 1900 // First, the continuation. 1901 ContinuationPhi = 1902 Builder.CreatePHI(Continuation->getType(), Shape.CoroSuspends.size()); 1903 1904 // Create PHIs for all other return values. 1905 assert(ReturnPHIs.empty()); 1906 1907 // Next, all the directly-yielded values. 1908 for (auto *ResultTy : Shape.getRetconResultTypes()) 1909 ReturnPHIs.push_back( 1910 Builder.CreatePHI(ResultTy, Shape.CoroSuspends.size())); 1911 1912 // Build the return value. 1913 auto RetTy = F.getReturnType(); 1914 1915 // Cast the continuation value if necessary. 1916 // We can't rely on the types matching up because that type would 1917 // have to be infinite. 1918 auto CastedContinuationTy = 1919 (ReturnPHIs.empty() ? RetTy : RetTy->getStructElementType(0)); 1920 auto *CastedContinuation = 1921 Builder.CreateBitCast(ContinuationPhi, CastedContinuationTy); 1922 1923 Value *RetV = CastedContinuation; 1924 if (!ReturnPHIs.empty()) { 1925 auto ValueIdx = 0; 1926 RetV = PoisonValue::get(RetTy); 1927 RetV = Builder.CreateInsertValue(RetV, CastedContinuation, ValueIdx++); 1928 1929 for (auto Phi : ReturnPHIs) 1930 RetV = Builder.CreateInsertValue(RetV, Phi, ValueIdx++); 1931 } 1932 1933 Builder.CreateRet(RetV); 1934 } 1935 1936 // Branch to the return block. 1937 Branch->setSuccessor(0, ReturnBB); 1938 assert(ContinuationPhi); 1939 ContinuationPhi->addIncoming(Continuation, SuspendBB); 1940 for (auto [Phi, VUse] : 1941 llvm::zip_equal(ReturnPHIs, Suspend->value_operands())) 1942 Phi->addIncoming(VUse, SuspendBB); 1943 } 1944 1945 assert(Clones.size() == Shape.CoroSuspends.size()); 1946 1947 MetadataSetTy CommonDebugInfo{collectCommonDebugInfo(F)}; 1948 1949 for (auto [Idx, CS] : llvm::enumerate(Shape.CoroSuspends)) { 1950 auto Suspend = CS; 1951 auto Clone = Clones[Idx]; 1952 1953 coro::BaseCloner::createClone(F, "resume." + Twine(Idx), Shape, Clone, 1954 Suspend, TTI, CommonDebugInfo); 1955 } 1956 } 1957 1958 namespace { 1959 class PrettyStackTraceFunction : public PrettyStackTraceEntry { 1960 Function &F; 1961 1962 public: 1963 PrettyStackTraceFunction(Function &F) : F(F) {} 1964 void print(raw_ostream &OS) const override { 1965 OS << "While splitting coroutine "; 1966 F.printAsOperand(OS, /*print type*/ false, F.getParent()); 1967 OS << "\n"; 1968 } 1969 }; 1970 } // namespace 1971 1972 /// Remove calls to llvm.coro.end in the original function. 1973 static void removeCoroEndsFromRampFunction(const coro::Shape &Shape) { 1974 if (Shape.ABI != coro::ABI::Switch) { 1975 for (auto *End : Shape.CoroEnds) { 1976 replaceCoroEnd(End, Shape, Shape.FramePtr, /*in resume*/ false, nullptr); 1977 } 1978 } else { 1979 for (llvm::AnyCoroEndInst *End : Shape.CoroEnds) { 1980 auto &Context = End->getContext(); 1981 End->replaceAllUsesWith(ConstantInt::getFalse(Context)); 1982 End->eraseFromParent(); 1983 } 1984 } 1985 } 1986 1987 static bool hasSafeElideCaller(Function &F) { 1988 for (auto *U : F.users()) { 1989 if (auto *CB = dyn_cast<CallBase>(U)) { 1990 auto *Caller = CB->getFunction(); 1991 if (Caller && Caller->isPresplitCoroutine() && 1992 CB->hasFnAttr(llvm::Attribute::CoroElideSafe)) 1993 return true; 1994 } 1995 } 1996 return false; 1997 } 1998 1999 void coro::SwitchABI::splitCoroutine(Function &F, coro::Shape &Shape, 2000 SmallVectorImpl<Function *> &Clones, 2001 TargetTransformInfo &TTI) { 2002 SwitchCoroutineSplitter::split(F, Shape, Clones, TTI); 2003 } 2004 2005 static void doSplitCoroutine(Function &F, SmallVectorImpl<Function *> &Clones, 2006 coro::BaseABI &ABI, TargetTransformInfo &TTI, 2007 bool OptimizeFrame) { 2008 PrettyStackTraceFunction prettyStackTrace(F); 2009 2010 auto &Shape = ABI.Shape; 2011 assert(Shape.CoroBegin); 2012 2013 lowerAwaitSuspends(F, Shape); 2014 2015 simplifySuspendPoints(Shape); 2016 2017 normalizeCoroutine(F, Shape, TTI); 2018 ABI.buildCoroutineFrame(OptimizeFrame); 2019 replaceFrameSizeAndAlignment(Shape); 2020 2021 bool isNoSuspendCoroutine = Shape.CoroSuspends.empty(); 2022 2023 bool shouldCreateNoAllocVariant = 2024 !isNoSuspendCoroutine && Shape.ABI == coro::ABI::Switch && 2025 hasSafeElideCaller(F) && !F.hasFnAttribute(llvm::Attribute::NoInline); 2026 2027 // If there are no suspend points, no split required, just remove 2028 // the allocation and deallocation blocks, they are not needed. 2029 if (isNoSuspendCoroutine) { 2030 handleNoSuspendCoroutine(Shape); 2031 } else { 2032 ABI.splitCoroutine(F, Shape, Clones, TTI); 2033 } 2034 2035 // Replace all the swifterror operations in the original function. 2036 // This invalidates SwiftErrorOps in the Shape. 2037 replaceSwiftErrorOps(F, Shape, nullptr); 2038 2039 // Salvage debug intrinsics that point into the coroutine frame in the 2040 // original function. The Cloner has already salvaged debug info in the new 2041 // coroutine funclets. 2042 SmallDenseMap<Argument *, AllocaInst *, 4> ArgToAllocaMap; 2043 auto [DbgInsts, DbgVariableRecords] = collectDbgVariableIntrinsics(F); 2044 for (auto *DDI : DbgInsts) 2045 coro::salvageDebugInfo(ArgToAllocaMap, *DDI, false /*UseEntryValue*/); 2046 for (DbgVariableRecord *DVR : DbgVariableRecords) 2047 coro::salvageDebugInfo(ArgToAllocaMap, *DVR, false /*UseEntryValue*/); 2048 2049 removeCoroEndsFromRampFunction(Shape); 2050 2051 if (shouldCreateNoAllocVariant) 2052 SwitchCoroutineSplitter::createNoAllocVariant(F, Shape, Clones); 2053 } 2054 2055 static LazyCallGraph::SCC &updateCallGraphAfterCoroutineSplit( 2056 LazyCallGraph::Node &N, const coro::Shape &Shape, 2057 const SmallVectorImpl<Function *> &Clones, LazyCallGraph::SCC &C, 2058 LazyCallGraph &CG, CGSCCAnalysisManager &AM, CGSCCUpdateResult &UR, 2059 FunctionAnalysisManager &FAM) { 2060 2061 auto *CurrentSCC = &C; 2062 if (!Clones.empty()) { 2063 switch (Shape.ABI) { 2064 case coro::ABI::Switch: 2065 // Each clone in the Switch lowering is independent of the other clones. 2066 // Let the LazyCallGraph know about each one separately. 2067 for (Function *Clone : Clones) 2068 CG.addSplitFunction(N.getFunction(), *Clone); 2069 break; 2070 case coro::ABI::Async: 2071 case coro::ABI::Retcon: 2072 case coro::ABI::RetconOnce: 2073 // Each clone in the Async/Retcon lowering references of the other clones. 2074 // Let the LazyCallGraph know about all of them at once. 2075 if (!Clones.empty()) 2076 CG.addSplitRefRecursiveFunctions(N.getFunction(), Clones); 2077 break; 2078 } 2079 2080 // Let the CGSCC infra handle the changes to the original function. 2081 CurrentSCC = &updateCGAndAnalysisManagerForCGSCCPass(CG, *CurrentSCC, N, AM, 2082 UR, FAM); 2083 } 2084 2085 // Do some cleanup and let the CGSCC infra see if we've cleaned up any edges 2086 // to the split functions. 2087 postSplitCleanup(N.getFunction()); 2088 CurrentSCC = &updateCGAndAnalysisManagerForFunctionPass(CG, *CurrentSCC, N, 2089 AM, UR, FAM); 2090 return *CurrentSCC; 2091 } 2092 2093 /// Replace a call to llvm.coro.prepare.retcon. 2094 static void replacePrepare(CallInst *Prepare, LazyCallGraph &CG, 2095 LazyCallGraph::SCC &C) { 2096 auto CastFn = Prepare->getArgOperand(0); // as an i8* 2097 auto Fn = CastFn->stripPointerCasts(); // as its original type 2098 2099 // Attempt to peephole this pattern: 2100 // %0 = bitcast [[TYPE]] @some_function to i8* 2101 // %1 = call @llvm.coro.prepare.retcon(i8* %0) 2102 // %2 = bitcast %1 to [[TYPE]] 2103 // ==> 2104 // %2 = @some_function 2105 for (Use &U : llvm::make_early_inc_range(Prepare->uses())) { 2106 // Look for bitcasts back to the original function type. 2107 auto *Cast = dyn_cast<BitCastInst>(U.getUser()); 2108 if (!Cast || Cast->getType() != Fn->getType()) 2109 continue; 2110 2111 // Replace and remove the cast. 2112 Cast->replaceAllUsesWith(Fn); 2113 Cast->eraseFromParent(); 2114 } 2115 2116 // Replace any remaining uses with the function as an i8*. 2117 // This can never directly be a callee, so we don't need to update CG. 2118 Prepare->replaceAllUsesWith(CastFn); 2119 Prepare->eraseFromParent(); 2120 2121 // Kill dead bitcasts. 2122 while (auto *Cast = dyn_cast<BitCastInst>(CastFn)) { 2123 if (!Cast->use_empty()) 2124 break; 2125 CastFn = Cast->getOperand(0); 2126 Cast->eraseFromParent(); 2127 } 2128 } 2129 2130 static bool replaceAllPrepares(Function *PrepareFn, LazyCallGraph &CG, 2131 LazyCallGraph::SCC &C) { 2132 bool Changed = false; 2133 for (Use &P : llvm::make_early_inc_range(PrepareFn->uses())) { 2134 // Intrinsics can only be used in calls. 2135 auto *Prepare = cast<CallInst>(P.getUser()); 2136 replacePrepare(Prepare, CG, C); 2137 Changed = true; 2138 } 2139 2140 return Changed; 2141 } 2142 2143 static void addPrepareFunction(const Module &M, 2144 SmallVectorImpl<Function *> &Fns, 2145 StringRef Name) { 2146 auto *PrepareFn = M.getFunction(Name); 2147 if (PrepareFn && !PrepareFn->use_empty()) 2148 Fns.push_back(PrepareFn); 2149 } 2150 2151 static std::unique_ptr<coro::BaseABI> 2152 CreateNewABI(Function &F, coro::Shape &S, 2153 std::function<bool(Instruction &)> IsMatCallback, 2154 const SmallVector<CoroSplitPass::BaseABITy> GenCustomABIs) { 2155 if (S.CoroBegin->hasCustomABI()) { 2156 unsigned CustomABI = S.CoroBegin->getCustomABI(); 2157 if (CustomABI >= GenCustomABIs.size()) 2158 llvm_unreachable("Custom ABI not found amoung those specified"); 2159 return GenCustomABIs[CustomABI](F, S); 2160 } 2161 2162 switch (S.ABI) { 2163 case coro::ABI::Switch: 2164 return std::make_unique<coro::SwitchABI>(F, S, IsMatCallback); 2165 case coro::ABI::Async: 2166 return std::make_unique<coro::AsyncABI>(F, S, IsMatCallback); 2167 case coro::ABI::Retcon: 2168 return std::make_unique<coro::AnyRetconABI>(F, S, IsMatCallback); 2169 case coro::ABI::RetconOnce: 2170 return std::make_unique<coro::AnyRetconABI>(F, S, IsMatCallback); 2171 } 2172 llvm_unreachable("Unknown ABI"); 2173 } 2174 2175 CoroSplitPass::CoroSplitPass(bool OptimizeFrame) 2176 : CreateAndInitABI([](Function &F, coro::Shape &S) { 2177 std::unique_ptr<coro::BaseABI> ABI = 2178 CreateNewABI(F, S, coro::isTriviallyMaterializable, {}); 2179 ABI->init(); 2180 return ABI; 2181 }), 2182 OptimizeFrame(OptimizeFrame) {} 2183 2184 CoroSplitPass::CoroSplitPass( 2185 SmallVector<CoroSplitPass::BaseABITy> GenCustomABIs, bool OptimizeFrame) 2186 : CreateAndInitABI([=](Function &F, coro::Shape &S) { 2187 std::unique_ptr<coro::BaseABI> ABI = 2188 CreateNewABI(F, S, coro::isTriviallyMaterializable, GenCustomABIs); 2189 ABI->init(); 2190 return ABI; 2191 }), 2192 OptimizeFrame(OptimizeFrame) {} 2193 2194 // For back compatibility, constructor takes a materializable callback and 2195 // creates a generator for an ABI with a modified materializable callback. 2196 CoroSplitPass::CoroSplitPass(std::function<bool(Instruction &)> IsMatCallback, 2197 bool OptimizeFrame) 2198 : CreateAndInitABI([=](Function &F, coro::Shape &S) { 2199 std::unique_ptr<coro::BaseABI> ABI = 2200 CreateNewABI(F, S, IsMatCallback, {}); 2201 ABI->init(); 2202 return ABI; 2203 }), 2204 OptimizeFrame(OptimizeFrame) {} 2205 2206 // For back compatibility, constructor takes a materializable callback and 2207 // creates a generator for an ABI with a modified materializable callback. 2208 CoroSplitPass::CoroSplitPass( 2209 std::function<bool(Instruction &)> IsMatCallback, 2210 SmallVector<CoroSplitPass::BaseABITy> GenCustomABIs, bool OptimizeFrame) 2211 : CreateAndInitABI([=](Function &F, coro::Shape &S) { 2212 std::unique_ptr<coro::BaseABI> ABI = 2213 CreateNewABI(F, S, IsMatCallback, GenCustomABIs); 2214 ABI->init(); 2215 return ABI; 2216 }), 2217 OptimizeFrame(OptimizeFrame) {} 2218 2219 PreservedAnalyses CoroSplitPass::run(LazyCallGraph::SCC &C, 2220 CGSCCAnalysisManager &AM, 2221 LazyCallGraph &CG, CGSCCUpdateResult &UR) { 2222 // NB: One invariant of a valid LazyCallGraph::SCC is that it must contain a 2223 // non-zero number of nodes, so we assume that here and grab the first 2224 // node's function's module. 2225 Module &M = *C.begin()->getFunction().getParent(); 2226 auto &FAM = 2227 AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager(); 2228 2229 // Check for uses of llvm.coro.prepare.retcon/async. 2230 SmallVector<Function *, 2> PrepareFns; 2231 addPrepareFunction(M, PrepareFns, "llvm.coro.prepare.retcon"); 2232 addPrepareFunction(M, PrepareFns, "llvm.coro.prepare.async"); 2233 2234 // Find coroutines for processing. 2235 SmallVector<LazyCallGraph::Node *> Coroutines; 2236 for (LazyCallGraph::Node &N : C) 2237 if (N.getFunction().isPresplitCoroutine()) 2238 Coroutines.push_back(&N); 2239 2240 if (Coroutines.empty() && PrepareFns.empty()) 2241 return PreservedAnalyses::all(); 2242 2243 auto *CurrentSCC = &C; 2244 // Split all the coroutines. 2245 for (LazyCallGraph::Node *N : Coroutines) { 2246 Function &F = N->getFunction(); 2247 LLVM_DEBUG(dbgs() << "CoroSplit: Processing coroutine '" << F.getName() 2248 << "\n"); 2249 2250 // The suspend-crossing algorithm in buildCoroutineFrame gets tripped up 2251 // by unreachable blocks, so remove them as a first pass. Remove the 2252 // unreachable blocks before collecting intrinsics into Shape. 2253 removeUnreachableBlocks(F); 2254 2255 coro::Shape Shape(F); 2256 if (!Shape.CoroBegin) 2257 continue; 2258 2259 F.setSplittedCoroutine(); 2260 2261 std::unique_ptr<coro::BaseABI> ABI = CreateAndInitABI(F, Shape); 2262 2263 SmallVector<Function *, 4> Clones; 2264 auto &TTI = FAM.getResult<TargetIRAnalysis>(F); 2265 doSplitCoroutine(F, Clones, *ABI, TTI, OptimizeFrame); 2266 CurrentSCC = &updateCallGraphAfterCoroutineSplit( 2267 *N, Shape, Clones, *CurrentSCC, CG, AM, UR, FAM); 2268 2269 auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F); 2270 ORE.emit([&]() { 2271 return OptimizationRemark(DEBUG_TYPE, "CoroSplit", &F) 2272 << "Split '" << ore::NV("function", F.getName()) 2273 << "' (frame_size=" << ore::NV("frame_size", Shape.FrameSize) 2274 << ", align=" << ore::NV("align", Shape.FrameAlign.value()) << ")"; 2275 }); 2276 2277 if (!Shape.CoroSuspends.empty()) { 2278 // Run the CGSCC pipeline on the original and newly split functions. 2279 UR.CWorklist.insert(CurrentSCC); 2280 for (Function *Clone : Clones) 2281 UR.CWorklist.insert(CG.lookupSCC(CG.get(*Clone))); 2282 } 2283 } 2284 2285 for (auto *PrepareFn : PrepareFns) { 2286 replaceAllPrepares(PrepareFn, CG, *CurrentSCC); 2287 } 2288 2289 return PreservedAnalyses::none(); 2290 } 2291