xref: /llvm-project/llvm/lib/Transforms/Coroutines/CoroFrame.cpp (revision 1e81056b31749f7b60d56260089f75a4813749c0)
1 //===- CoroFrame.cpp - Builds and manipulates coroutine frame -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 // This file contains classes used to discover if for a particular value
9 // its definition precedes and its uses follow a suspend block. This is
10 // referred to as a suspend crossing value.
11 //
12 // Using the information discovered we form a Coroutine Frame structure to
13 // contain those values. All uses of those values are replaced with appropriate
14 // GEP + load from the coroutine frame. At the point of the definition we spill
15 // the value into the coroutine frame.
16 //===----------------------------------------------------------------------===//
17 
18 #include "ABI.h"
19 #include "CoroInternal.h"
20 #include "MaterializationUtils.h"
21 #include "SpillUtils.h"
22 #include "SuspendCrossingInfo.h"
23 #include "llvm/ADT/BitVector.h"
24 #include "llvm/ADT/ScopeExit.h"
25 #include "llvm/ADT/SmallString.h"
26 #include "llvm/Analysis/StackLifetime.h"
27 #include "llvm/IR/DIBuilder.h"
28 #include "llvm/IR/DebugInfo.h"
29 #include "llvm/IR/Dominators.h"
30 #include "llvm/IR/IRBuilder.h"
31 #include "llvm/IR/InstIterator.h"
32 #include "llvm/IR/IntrinsicInst.h"
33 #include "llvm/IR/Module.h"
34 #include "llvm/Support/Debug.h"
35 #include "llvm/Support/OptimizedStructLayout.h"
36 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
37 #include "llvm/Transforms/Utils/Local.h"
38 #include "llvm/Transforms/Utils/PromoteMemToReg.h"
39 #include <algorithm>
40 #include <optional>
41 
42 using namespace llvm;
43 
44 extern cl::opt<bool> UseNewDbgInfoFormat;
45 
46 #define DEBUG_TYPE "coro-frame"
47 
48 namespace {
49 class FrameTypeBuilder;
50 // Mapping from the to-be-spilled value to all the users that need reload.
51 struct FrameDataInfo {
52   // All the values (that are not allocas) that needs to be spilled to the
53   // frame.
54   coro::SpillInfo &Spills;
55   // Allocas contains all values defined as allocas that need to live in the
56   // frame.
57   SmallVectorImpl<coro::AllocaInfo> &Allocas;
58 
59   FrameDataInfo(coro::SpillInfo &Spills,
60                 SmallVectorImpl<coro::AllocaInfo> &Allocas)
61       : Spills(Spills), Allocas(Allocas) {}
62 
63   SmallVector<Value *, 8> getAllDefs() const {
64     SmallVector<Value *, 8> Defs;
65     for (const auto &P : Spills)
66       Defs.push_back(P.first);
67     for (const auto &A : Allocas)
68       Defs.push_back(A.Alloca);
69     return Defs;
70   }
71 
72   uint32_t getFieldIndex(Value *V) const {
73     auto Itr = FieldIndexMap.find(V);
74     assert(Itr != FieldIndexMap.end() &&
75            "Value does not have a frame field index");
76     return Itr->second;
77   }
78 
79   void setFieldIndex(Value *V, uint32_t Index) {
80     assert((LayoutIndexUpdateStarted || FieldIndexMap.count(V) == 0) &&
81            "Cannot set the index for the same field twice.");
82     FieldIndexMap[V] = Index;
83   }
84 
85   Align getAlign(Value *V) const {
86     auto Iter = FieldAlignMap.find(V);
87     assert(Iter != FieldAlignMap.end());
88     return Iter->second;
89   }
90 
91   void setAlign(Value *V, Align AL) {
92     assert(FieldAlignMap.count(V) == 0);
93     FieldAlignMap.insert({V, AL});
94   }
95 
96   uint64_t getDynamicAlign(Value *V) const {
97     auto Iter = FieldDynamicAlignMap.find(V);
98     assert(Iter != FieldDynamicAlignMap.end());
99     return Iter->second;
100   }
101 
102   void setDynamicAlign(Value *V, uint64_t Align) {
103     assert(FieldDynamicAlignMap.count(V) == 0);
104     FieldDynamicAlignMap.insert({V, Align});
105   }
106 
107   uint64_t getOffset(Value *V) const {
108     auto Iter = FieldOffsetMap.find(V);
109     assert(Iter != FieldOffsetMap.end());
110     return Iter->second;
111   }
112 
113   void setOffset(Value *V, uint64_t Offset) {
114     assert(FieldOffsetMap.count(V) == 0);
115     FieldOffsetMap.insert({V, Offset});
116   }
117 
118   // Remap the index of every field in the frame, using the final layout index.
119   void updateLayoutIndex(FrameTypeBuilder &B);
120 
121 private:
122   // LayoutIndexUpdateStarted is used to avoid updating the index of any field
123   // twice by mistake.
124   bool LayoutIndexUpdateStarted = false;
125   // Map from values to their slot indexes on the frame. They will be first set
126   // with their original insertion field index. After the frame is built, their
127   // indexes will be updated into the final layout index.
128   DenseMap<Value *, uint32_t> FieldIndexMap;
129   // Map from values to their alignment on the frame. They would be set after
130   // the frame is built.
131   DenseMap<Value *, Align> FieldAlignMap;
132   DenseMap<Value *, uint64_t> FieldDynamicAlignMap;
133   // Map from values to their offset on the frame. They would be set after
134   // the frame is built.
135   DenseMap<Value *, uint64_t> FieldOffsetMap;
136 };
137 } // namespace
138 
139 #ifndef NDEBUG
140 static void dumpSpills(StringRef Title, const coro::SpillInfo &Spills) {
141   dbgs() << "------------- " << Title << " --------------\n";
142   for (const auto &E : Spills) {
143     E.first->dump();
144     dbgs() << "   user: ";
145     for (auto *I : E.second)
146       I->dump();
147   }
148 }
149 
150 static void dumpAllocas(const SmallVectorImpl<coro::AllocaInfo> &Allocas) {
151   dbgs() << "------------- Allocas --------------\n";
152   for (const auto &A : Allocas) {
153     A.Alloca->dump();
154   }
155 }
156 #endif
157 
158 namespace {
159 using FieldIDType = size_t;
160 // We cannot rely solely on natural alignment of a type when building a
161 // coroutine frame and if the alignment specified on the Alloca instruction
162 // differs from the natural alignment of the alloca type we will need to insert
163 // padding.
164 class FrameTypeBuilder {
165 private:
166   struct Field {
167     uint64_t Size;
168     uint64_t Offset;
169     Type *Ty;
170     FieldIDType LayoutFieldIndex;
171     Align Alignment;
172     Align TyAlignment;
173     uint64_t DynamicAlignBuffer;
174   };
175 
176   const DataLayout &DL;
177   LLVMContext &Context;
178   uint64_t StructSize = 0;
179   Align StructAlign;
180   bool IsFinished = false;
181 
182   std::optional<Align> MaxFrameAlignment;
183 
184   SmallVector<Field, 8> Fields;
185   DenseMap<Value*, unsigned> FieldIndexByKey;
186 
187 public:
188   FrameTypeBuilder(LLVMContext &Context, const DataLayout &DL,
189                    std::optional<Align> MaxFrameAlignment)
190       : DL(DL), Context(Context), MaxFrameAlignment(MaxFrameAlignment) {}
191 
192   /// Add a field to this structure for the storage of an `alloca`
193   /// instruction.
194   [[nodiscard]] FieldIDType addFieldForAlloca(AllocaInst *AI,
195                                               bool IsHeader = false) {
196     Type *Ty = AI->getAllocatedType();
197 
198     // Make an array type if this is a static array allocation.
199     if (AI->isArrayAllocation()) {
200       if (auto *CI = dyn_cast<ConstantInt>(AI->getArraySize()))
201         Ty = ArrayType::get(Ty, CI->getValue().getZExtValue());
202       else
203         report_fatal_error("Coroutines cannot handle non static allocas yet");
204     }
205 
206     return addField(Ty, AI->getAlign(), IsHeader);
207   }
208 
209   /// We want to put the allocas whose lifetime-ranges are not overlapped
210   /// into one slot of coroutine frame.
211   /// Consider the example at:https://bugs.llvm.org/show_bug.cgi?id=45566
212   ///
213   ///     cppcoro::task<void> alternative_paths(bool cond) {
214   ///         if (cond) {
215   ///             big_structure a;
216   ///             process(a);
217   ///             co_await something();
218   ///         } else {
219   ///             big_structure b;
220   ///             process2(b);
221   ///             co_await something();
222   ///         }
223   ///     }
224   ///
225   /// We want to put variable a and variable b in the same slot to
226   /// reduce the size of coroutine frame.
227   ///
228   /// This function use StackLifetime algorithm to partition the AllocaInsts in
229   /// Spills to non-overlapped sets in order to put Alloca in the same
230   /// non-overlapped set into the same slot in the Coroutine Frame. Then add
231   /// field for the allocas in the same non-overlapped set by using the largest
232   /// type as the field type.
233   ///
234   /// Side Effects: Because We sort the allocas, the order of allocas in the
235   /// frame may be different with the order in the source code.
236   void addFieldForAllocas(const Function &F, FrameDataInfo &FrameData,
237                           coro::Shape &Shape, bool OptimizeFrame);
238 
239   /// Add a field to this structure.
240   [[nodiscard]] FieldIDType addField(Type *Ty, MaybeAlign MaybeFieldAlignment,
241                                      bool IsHeader = false,
242                                      bool IsSpillOfValue = false) {
243     assert(!IsFinished && "adding fields to a finished builder");
244     assert(Ty && "must provide a type for a field");
245 
246     // The field size is always the alloc size of the type.
247     uint64_t FieldSize = DL.getTypeAllocSize(Ty);
248 
249     // For an alloca with size=0, we don't need to add a field and they
250     // can just point to any index in the frame. Use index 0.
251     if (FieldSize == 0) {
252       return 0;
253     }
254 
255     // The field alignment might not be the type alignment, but we need
256     // to remember the type alignment anyway to build the type.
257     // If we are spilling values we don't need to worry about ABI alignment
258     // concerns.
259     Align ABIAlign = DL.getABITypeAlign(Ty);
260     Align TyAlignment = ABIAlign;
261     if (IsSpillOfValue && MaxFrameAlignment && *MaxFrameAlignment < ABIAlign)
262       TyAlignment = *MaxFrameAlignment;
263     Align FieldAlignment = MaybeFieldAlignment.value_or(TyAlignment);
264 
265     // The field alignment could be bigger than the max frame case, in that case
266     // we request additional storage to be able to dynamically align the
267     // pointer.
268     uint64_t DynamicAlignBuffer = 0;
269     if (MaxFrameAlignment && (FieldAlignment > *MaxFrameAlignment)) {
270       DynamicAlignBuffer =
271           offsetToAlignment(MaxFrameAlignment->value(), FieldAlignment);
272       FieldAlignment = *MaxFrameAlignment;
273       FieldSize = FieldSize + DynamicAlignBuffer;
274     }
275 
276     // Lay out header fields immediately.
277     uint64_t Offset;
278     if (IsHeader) {
279       Offset = alignTo(StructSize, FieldAlignment);
280       StructSize = Offset + FieldSize;
281 
282       // Everything else has a flexible offset.
283     } else {
284       Offset = OptimizedStructLayoutField::FlexibleOffset;
285     }
286 
287     Fields.push_back({FieldSize, Offset, Ty, 0, FieldAlignment, TyAlignment,
288                       DynamicAlignBuffer});
289     return Fields.size() - 1;
290   }
291 
292   /// Finish the layout and set the body on the given type.
293   void finish(StructType *Ty);
294 
295   uint64_t getStructSize() const {
296     assert(IsFinished && "not yet finished!");
297     return StructSize;
298   }
299 
300   Align getStructAlign() const {
301     assert(IsFinished && "not yet finished!");
302     return StructAlign;
303   }
304 
305   FieldIDType getLayoutFieldIndex(FieldIDType Id) const {
306     assert(IsFinished && "not yet finished!");
307     return Fields[Id].LayoutFieldIndex;
308   }
309 
310   Field getLayoutField(FieldIDType Id) const {
311     assert(IsFinished && "not yet finished!");
312     return Fields[Id];
313   }
314 };
315 } // namespace
316 
317 void FrameDataInfo::updateLayoutIndex(FrameTypeBuilder &B) {
318   auto Updater = [&](Value *I) {
319     auto Field = B.getLayoutField(getFieldIndex(I));
320     setFieldIndex(I, Field.LayoutFieldIndex);
321     setAlign(I, Field.Alignment);
322     uint64_t dynamicAlign =
323         Field.DynamicAlignBuffer
324             ? Field.DynamicAlignBuffer + Field.Alignment.value()
325             : 0;
326     setDynamicAlign(I, dynamicAlign);
327     setOffset(I, Field.Offset);
328   };
329   LayoutIndexUpdateStarted = true;
330   for (auto &S : Spills)
331     Updater(S.first);
332   for (const auto &A : Allocas)
333     Updater(A.Alloca);
334   LayoutIndexUpdateStarted = false;
335 }
336 
337 void FrameTypeBuilder::addFieldForAllocas(const Function &F,
338                                           FrameDataInfo &FrameData,
339                                           coro::Shape &Shape,
340                                           bool OptimizeFrame) {
341   using AllocaSetType = SmallVector<AllocaInst *, 4>;
342   SmallVector<AllocaSetType, 4> NonOverlapedAllocas;
343 
344   // We need to add field for allocas at the end of this function.
345   auto AddFieldForAllocasAtExit = make_scope_exit([&]() {
346     for (auto AllocaList : NonOverlapedAllocas) {
347       auto *LargestAI = *AllocaList.begin();
348       FieldIDType Id = addFieldForAlloca(LargestAI);
349       for (auto *Alloca : AllocaList)
350         FrameData.setFieldIndex(Alloca, Id);
351     }
352   });
353 
354   if (!OptimizeFrame) {
355     for (const auto &A : FrameData.Allocas) {
356       AllocaInst *Alloca = A.Alloca;
357       NonOverlapedAllocas.emplace_back(AllocaSetType(1, Alloca));
358     }
359     return;
360   }
361 
362   // Because there are paths from the lifetime.start to coro.end
363   // for each alloca, the liferanges for every alloca is overlaped
364   // in the blocks who contain coro.end and the successor blocks.
365   // So we choose to skip there blocks when we calculate the liferange
366   // for each alloca. It should be reasonable since there shouldn't be uses
367   // in these blocks and the coroutine frame shouldn't be used outside the
368   // coroutine body.
369   //
370   // Note that the user of coro.suspend may not be SwitchInst. However, this
371   // case seems too complex to handle. And it is harmless to skip these
372   // patterns since it just prevend putting the allocas to live in the same
373   // slot.
374   DenseMap<SwitchInst *, BasicBlock *> DefaultSuspendDest;
375   for (auto *CoroSuspendInst : Shape.CoroSuspends) {
376     for (auto *U : CoroSuspendInst->users()) {
377       if (auto *ConstSWI = dyn_cast<SwitchInst>(U)) {
378         auto *SWI = const_cast<SwitchInst *>(ConstSWI);
379         DefaultSuspendDest[SWI] = SWI->getDefaultDest();
380         SWI->setDefaultDest(SWI->getSuccessor(1));
381       }
382     }
383   }
384 
385   auto ExtractAllocas = [&]() {
386     AllocaSetType Allocas;
387     Allocas.reserve(FrameData.Allocas.size());
388     for (const auto &A : FrameData.Allocas)
389       Allocas.push_back(A.Alloca);
390     return Allocas;
391   };
392   StackLifetime StackLifetimeAnalyzer(F, ExtractAllocas(),
393                                       StackLifetime::LivenessType::May);
394   StackLifetimeAnalyzer.run();
395   auto DoAllocasInterfere = [&](const AllocaInst *AI1, const AllocaInst *AI2) {
396     return StackLifetimeAnalyzer.getLiveRange(AI1).overlaps(
397         StackLifetimeAnalyzer.getLiveRange(AI2));
398   };
399   auto GetAllocaSize = [&](const coro::AllocaInfo &A) {
400     std::optional<TypeSize> RetSize = A.Alloca->getAllocationSize(DL);
401     assert(RetSize && "Variable Length Arrays (VLA) are not supported.\n");
402     assert(!RetSize->isScalable() && "Scalable vectors are not yet supported");
403     return RetSize->getFixedValue();
404   };
405   // Put larger allocas in the front. So the larger allocas have higher
406   // priority to merge, which can save more space potentially. Also each
407   // AllocaSet would be ordered. So we can get the largest Alloca in one
408   // AllocaSet easily.
409   sort(FrameData.Allocas, [&](const auto &Iter1, const auto &Iter2) {
410     return GetAllocaSize(Iter1) > GetAllocaSize(Iter2);
411   });
412   for (const auto &A : FrameData.Allocas) {
413     AllocaInst *Alloca = A.Alloca;
414     bool Merged = false;
415     // Try to find if the Alloca does not interfere with any existing
416     // NonOverlappedAllocaSet. If it is true, insert the alloca to that
417     // NonOverlappedAllocaSet.
418     for (auto &AllocaSet : NonOverlapedAllocas) {
419       assert(!AllocaSet.empty() && "Processing Alloca Set is not empty.\n");
420       bool NoInterference = none_of(AllocaSet, [&](auto Iter) {
421         return DoAllocasInterfere(Alloca, Iter);
422       });
423       // If the alignment of A is multiple of the alignment of B, the address
424       // of A should satisfy the requirement for aligning for B.
425       //
426       // There may be other more fine-grained strategies to handle the alignment
427       // infomation during the merging process. But it seems hard to handle
428       // these strategies and benefit little.
429       bool Alignable = [&]() -> bool {
430         auto *LargestAlloca = *AllocaSet.begin();
431         return LargestAlloca->getAlign().value() % Alloca->getAlign().value() ==
432                0;
433       }();
434       bool CouldMerge = NoInterference && Alignable;
435       if (!CouldMerge)
436         continue;
437       AllocaSet.push_back(Alloca);
438       Merged = true;
439       break;
440     }
441     if (!Merged) {
442       NonOverlapedAllocas.emplace_back(AllocaSetType(1, Alloca));
443     }
444   }
445   // Recover the default target destination for each Switch statement
446   // reserved.
447   for (auto SwitchAndDefaultDest : DefaultSuspendDest) {
448     SwitchInst *SWI = SwitchAndDefaultDest.first;
449     BasicBlock *DestBB = SwitchAndDefaultDest.second;
450     SWI->setDefaultDest(DestBB);
451   }
452   // This Debug Info could tell us which allocas are merged into one slot.
453   LLVM_DEBUG(for (auto &AllocaSet
454                   : NonOverlapedAllocas) {
455     if (AllocaSet.size() > 1) {
456       dbgs() << "In Function:" << F.getName() << "\n";
457       dbgs() << "Find Union Set "
458              << "\n";
459       dbgs() << "\tAllocas are \n";
460       for (auto Alloca : AllocaSet)
461         dbgs() << "\t\t" << *Alloca << "\n";
462     }
463   });
464 }
465 
466 void FrameTypeBuilder::finish(StructType *Ty) {
467   assert(!IsFinished && "already finished!");
468 
469   // Prepare the optimal-layout field array.
470   // The Id in the layout field is a pointer to our Field for it.
471   SmallVector<OptimizedStructLayoutField, 8> LayoutFields;
472   LayoutFields.reserve(Fields.size());
473   for (auto &Field : Fields) {
474     LayoutFields.emplace_back(&Field, Field.Size, Field.Alignment,
475                               Field.Offset);
476   }
477 
478   // Perform layout.
479   auto SizeAndAlign = performOptimizedStructLayout(LayoutFields);
480   StructSize = SizeAndAlign.first;
481   StructAlign = SizeAndAlign.second;
482 
483   auto getField = [](const OptimizedStructLayoutField &LayoutField) -> Field & {
484     return *static_cast<Field *>(const_cast<void*>(LayoutField.Id));
485   };
486 
487   // We need to produce a packed struct type if there's a field whose
488   // assigned offset isn't a multiple of its natural type alignment.
489   bool Packed = [&] {
490     for (auto &LayoutField : LayoutFields) {
491       auto &F = getField(LayoutField);
492       if (!isAligned(F.TyAlignment, LayoutField.Offset))
493         return true;
494     }
495     return false;
496   }();
497 
498   // Build the struct body.
499   SmallVector<Type*, 16> FieldTypes;
500   FieldTypes.reserve(LayoutFields.size() * 3 / 2);
501   uint64_t LastOffset = 0;
502   for (auto &LayoutField : LayoutFields) {
503     auto &F = getField(LayoutField);
504 
505     auto Offset = LayoutField.Offset;
506 
507     // Add a padding field if there's a padding gap and we're either
508     // building a packed struct or the padding gap is more than we'd
509     // get from aligning to the field type's natural alignment.
510     assert(Offset >= LastOffset);
511     if (Offset != LastOffset) {
512       if (Packed || alignTo(LastOffset, F.TyAlignment) != Offset)
513         FieldTypes.push_back(ArrayType::get(Type::getInt8Ty(Context),
514                                             Offset - LastOffset));
515     }
516 
517     F.Offset = Offset;
518     F.LayoutFieldIndex = FieldTypes.size();
519 
520     FieldTypes.push_back(F.Ty);
521     if (F.DynamicAlignBuffer) {
522       FieldTypes.push_back(
523           ArrayType::get(Type::getInt8Ty(Context), F.DynamicAlignBuffer));
524     }
525     LastOffset = Offset + F.Size;
526   }
527 
528   Ty->setBody(FieldTypes, Packed);
529 
530 #ifndef NDEBUG
531   // Check that the IR layout matches the offsets we expect.
532   auto Layout = DL.getStructLayout(Ty);
533   for (auto &F : Fields) {
534     assert(Ty->getElementType(F.LayoutFieldIndex) == F.Ty);
535     assert(Layout->getElementOffset(F.LayoutFieldIndex) == F.Offset);
536   }
537 #endif
538 
539   IsFinished = true;
540 }
541 
542 static void cacheDIVar(FrameDataInfo &FrameData,
543                        DenseMap<Value *, DILocalVariable *> &DIVarCache) {
544   for (auto *V : FrameData.getAllDefs()) {
545     if (DIVarCache.contains(V))
546       continue;
547 
548     auto CacheIt = [&DIVarCache, V](const auto &Container) {
549       auto *I = llvm::find_if(Container, [](auto *DDI) {
550         return DDI->getExpression()->getNumElements() == 0;
551       });
552       if (I != Container.end())
553         DIVarCache.insert({V, (*I)->getVariable()});
554     };
555     CacheIt(findDbgDeclares(V));
556     CacheIt(findDVRDeclares(V));
557   }
558 }
559 
560 /// Create name for Type. It uses MDString to store new created string to
561 /// avoid memory leak.
562 static StringRef solveTypeName(Type *Ty) {
563   if (Ty->isIntegerTy()) {
564     // The longest name in common may be '__int_128', which has 9 bits.
565     SmallString<16> Buffer;
566     raw_svector_ostream OS(Buffer);
567     OS << "__int_" << cast<IntegerType>(Ty)->getBitWidth();
568     auto *MDName = MDString::get(Ty->getContext(), OS.str());
569     return MDName->getString();
570   }
571 
572   if (Ty->isFloatingPointTy()) {
573     if (Ty->isFloatTy())
574       return "__float_";
575     if (Ty->isDoubleTy())
576       return "__double_";
577     return "__floating_type_";
578   }
579 
580   if (Ty->isPointerTy())
581     return "PointerType";
582 
583   if (Ty->isStructTy()) {
584     if (!cast<StructType>(Ty)->hasName())
585       return "__LiteralStructType_";
586 
587     auto Name = Ty->getStructName();
588 
589     SmallString<16> Buffer(Name);
590     for (auto &Iter : Buffer)
591       if (Iter == '.' || Iter == ':')
592         Iter = '_';
593     auto *MDName = MDString::get(Ty->getContext(), Buffer.str());
594     return MDName->getString();
595   }
596 
597   return "UnknownType";
598 }
599 
600 static DIType *solveDIType(DIBuilder &Builder, Type *Ty,
601                            const DataLayout &Layout, DIScope *Scope,
602                            unsigned LineNum,
603                            DenseMap<Type *, DIType *> &DITypeCache) {
604   if (DIType *DT = DITypeCache.lookup(Ty))
605     return DT;
606 
607   StringRef Name = solveTypeName(Ty);
608 
609   DIType *RetType = nullptr;
610 
611   if (Ty->isIntegerTy()) {
612     auto BitWidth = cast<IntegerType>(Ty)->getBitWidth();
613     RetType = Builder.createBasicType(Name, BitWidth, dwarf::DW_ATE_signed,
614                                       llvm::DINode::FlagArtificial);
615   } else if (Ty->isFloatingPointTy()) {
616     RetType = Builder.createBasicType(Name, Layout.getTypeSizeInBits(Ty),
617                                       dwarf::DW_ATE_float,
618                                       llvm::DINode::FlagArtificial);
619   } else if (Ty->isPointerTy()) {
620     // Construct PointerType points to null (aka void *) instead of exploring
621     // pointee type to avoid infinite search problem. For example, we would be
622     // in trouble if we traverse recursively:
623     //
624     //  struct Node {
625     //      Node* ptr;
626     //  };
627     RetType =
628         Builder.createPointerType(nullptr, Layout.getTypeSizeInBits(Ty),
629                                   Layout.getABITypeAlign(Ty).value() * CHAR_BIT,
630                                   /*DWARFAddressSpace=*/std::nullopt, Name);
631   } else if (Ty->isStructTy()) {
632     auto *DIStruct = Builder.createStructType(
633         Scope, Name, Scope->getFile(), LineNum, Layout.getTypeSizeInBits(Ty),
634         Layout.getPrefTypeAlign(Ty).value() * CHAR_BIT,
635         llvm::DINode::FlagArtificial, nullptr, llvm::DINodeArray());
636 
637     auto *StructTy = cast<StructType>(Ty);
638     SmallVector<Metadata *, 16> Elements;
639     for (unsigned I = 0; I < StructTy->getNumElements(); I++) {
640       DIType *DITy = solveDIType(Builder, StructTy->getElementType(I), Layout,
641                                  Scope, LineNum, DITypeCache);
642       assert(DITy);
643       Elements.push_back(Builder.createMemberType(
644           Scope, DITy->getName(), Scope->getFile(), LineNum,
645           DITy->getSizeInBits(), DITy->getAlignInBits(),
646           Layout.getStructLayout(StructTy)->getElementOffsetInBits(I),
647           llvm::DINode::FlagArtificial, DITy));
648     }
649 
650     Builder.replaceArrays(DIStruct, Builder.getOrCreateArray(Elements));
651 
652     RetType = DIStruct;
653   } else {
654     LLVM_DEBUG(dbgs() << "Unresolved Type: " << *Ty << "\n");
655     TypeSize Size = Layout.getTypeSizeInBits(Ty);
656     auto *CharSizeType = Builder.createBasicType(
657         Name, 8, dwarf::DW_ATE_unsigned_char, llvm::DINode::FlagArtificial);
658 
659     if (Size <= 8)
660       RetType = CharSizeType;
661     else {
662       if (Size % 8 != 0)
663         Size = TypeSize::getFixed(Size + 8 - (Size % 8));
664 
665       RetType = Builder.createArrayType(
666           Size, Layout.getPrefTypeAlign(Ty).value(), CharSizeType,
667           Builder.getOrCreateArray(Builder.getOrCreateSubrange(0, Size / 8)));
668     }
669   }
670 
671   DITypeCache.insert({Ty, RetType});
672   return RetType;
673 }
674 
675 /// Build artificial debug info for C++ coroutine frames to allow users to
676 /// inspect the contents of the frame directly
677 ///
678 /// Create Debug information for coroutine frame with debug name "__coro_frame".
679 /// The debug information for the fields of coroutine frame is constructed from
680 /// the following way:
681 /// 1. For all the value in the Frame, we search the use of dbg.declare to find
682 ///    the corresponding debug variables for the value. If we can find the
683 ///    debug variable, we can get full and accurate debug information.
684 /// 2. If we can't get debug information in step 1 and 2, we could only try to
685 ///    build the DIType by Type. We did this in solveDIType. We only handle
686 ///    integer, float, double, integer type and struct type for now.
687 static void buildFrameDebugInfo(Function &F, coro::Shape &Shape,
688                                 FrameDataInfo &FrameData) {
689   DISubprogram *DIS = F.getSubprogram();
690   // If there is no DISubprogram for F, it implies the Function are not compiled
691   // with debug info. So we also don't need to generate debug info for the frame
692   // neither.
693   if (!DIS || !DIS->getUnit() ||
694       !dwarf::isCPlusPlus(
695           (dwarf::SourceLanguage)DIS->getUnit()->getSourceLanguage()) ||
696       DIS->getUnit()->getEmissionKind() != DICompileUnit::DebugEmissionKind::FullDebug)
697     return;
698 
699   assert(Shape.ABI == coro::ABI::Switch &&
700          "We could only build debug infomation for C++ coroutine now.\n");
701 
702   DIBuilder DBuilder(*F.getParent(), /*AllowUnresolved*/ false);
703 
704   assert(Shape.getPromiseAlloca() &&
705          "Coroutine with switch ABI should own Promise alloca");
706 
707   DIFile *DFile = DIS->getFile();
708   unsigned LineNum = DIS->getLine();
709 
710   DICompositeType *FrameDITy = DBuilder.createStructType(
711       DIS->getUnit(), Twine(F.getName() + ".coro_frame_ty").str(),
712       DFile, LineNum, Shape.FrameSize * 8,
713       Shape.FrameAlign.value() * 8, llvm::DINode::FlagArtificial, nullptr,
714       llvm::DINodeArray());
715   StructType *FrameTy = Shape.FrameTy;
716   SmallVector<Metadata *, 16> Elements;
717   DataLayout Layout = F.getDataLayout();
718 
719   DenseMap<Value *, DILocalVariable *> DIVarCache;
720   cacheDIVar(FrameData, DIVarCache);
721 
722   unsigned ResumeIndex = coro::Shape::SwitchFieldIndex::Resume;
723   unsigned DestroyIndex = coro::Shape::SwitchFieldIndex::Destroy;
724   unsigned IndexIndex = Shape.SwitchLowering.IndexField;
725 
726   DenseMap<unsigned, StringRef> NameCache;
727   NameCache.insert({ResumeIndex, "__resume_fn"});
728   NameCache.insert({DestroyIndex, "__destroy_fn"});
729   NameCache.insert({IndexIndex, "__coro_index"});
730 
731   Type *ResumeFnTy = FrameTy->getElementType(ResumeIndex),
732        *DestroyFnTy = FrameTy->getElementType(DestroyIndex),
733        *IndexTy = FrameTy->getElementType(IndexIndex);
734 
735   DenseMap<unsigned, DIType *> TyCache;
736   TyCache.insert(
737       {ResumeIndex, DBuilder.createPointerType(
738                         nullptr, Layout.getTypeSizeInBits(ResumeFnTy))});
739   TyCache.insert(
740       {DestroyIndex, DBuilder.createPointerType(
741                          nullptr, Layout.getTypeSizeInBits(DestroyFnTy))});
742 
743   /// FIXME: If we fill the field `SizeInBits` with the actual size of
744   /// __coro_index in bits, then __coro_index wouldn't show in the debugger.
745   TyCache.insert({IndexIndex, DBuilder.createBasicType(
746                                   "__coro_index",
747                                   (Layout.getTypeSizeInBits(IndexTy) < 8)
748                                       ? 8
749                                       : Layout.getTypeSizeInBits(IndexTy),
750                                   dwarf::DW_ATE_unsigned_char)});
751 
752   for (auto *V : FrameData.getAllDefs()) {
753     if (!DIVarCache.contains(V))
754       continue;
755 
756     auto Index = FrameData.getFieldIndex(V);
757 
758     NameCache.insert({Index, DIVarCache[V]->getName()});
759     TyCache.insert({Index, DIVarCache[V]->getType()});
760   }
761 
762   // Cache from index to (Align, Offset Pair)
763   DenseMap<unsigned, std::pair<unsigned, unsigned>> OffsetCache;
764   // The Align and Offset of Resume function and Destroy function are fixed.
765   OffsetCache.insert({ResumeIndex, {8, 0}});
766   OffsetCache.insert({DestroyIndex, {8, 8}});
767   OffsetCache.insert(
768       {IndexIndex,
769        {Shape.SwitchLowering.IndexAlign, Shape.SwitchLowering.IndexOffset}});
770 
771   for (auto *V : FrameData.getAllDefs()) {
772     auto Index = FrameData.getFieldIndex(V);
773 
774     OffsetCache.insert(
775         {Index, {FrameData.getAlign(V).value(), FrameData.getOffset(V)}});
776   }
777 
778   DenseMap<Type *, DIType *> DITypeCache;
779   // This counter is used to avoid same type names. e.g., there would be
780   // many i32 and i64 types in one coroutine. And we would use i32_0 and
781   // i32_1 to avoid the same type. Since it makes no sense the name of the
782   // fields confilicts with each other.
783   unsigned UnknownTypeNum = 0;
784   for (unsigned Index = 0; Index < FrameTy->getNumElements(); Index++) {
785     if (!OffsetCache.contains(Index))
786       continue;
787 
788     std::string Name;
789     uint64_t SizeInBits;
790     uint32_t AlignInBits;
791     uint64_t OffsetInBits;
792     DIType *DITy = nullptr;
793 
794     Type *Ty = FrameTy->getElementType(Index);
795     assert(Ty->isSized() && "We can't handle type which is not sized.\n");
796     SizeInBits = Layout.getTypeSizeInBits(Ty).getFixedValue();
797     AlignInBits = OffsetCache[Index].first * 8;
798     OffsetInBits = OffsetCache[Index].second * 8;
799 
800     if (auto It = NameCache.find(Index); It != NameCache.end()) {
801       Name = It->second.str();
802       DITy = TyCache[Index];
803     } else {
804       DITy = solveDIType(DBuilder, Ty, Layout, FrameDITy, LineNum, DITypeCache);
805       assert(DITy && "SolveDIType shouldn't return nullptr.\n");
806       Name = DITy->getName().str();
807       Name += "_" + std::to_string(UnknownTypeNum);
808       UnknownTypeNum++;
809     }
810 
811     Elements.push_back(DBuilder.createMemberType(
812         FrameDITy, Name, DFile, LineNum, SizeInBits, AlignInBits, OffsetInBits,
813         llvm::DINode::FlagArtificial, DITy));
814   }
815 
816   DBuilder.replaceArrays(FrameDITy, DBuilder.getOrCreateArray(Elements));
817 
818   auto *FrameDIVar =
819       DBuilder.createAutoVariable(DIS, "__coro_frame", DFile, LineNum,
820                                   FrameDITy, true, DINode::FlagArtificial);
821 
822   // Subprogram would have ContainedNodes field which records the debug
823   // variables it contained. So we need to add __coro_frame to the
824   // ContainedNodes of it.
825   //
826   // If we don't add __coro_frame to the RetainedNodes, user may get
827   // `no symbol __coro_frame in context` rather than `__coro_frame`
828   // is optimized out, which is more precise.
829   auto RetainedNodes = DIS->getRetainedNodes();
830   SmallVector<Metadata *, 32> RetainedNodesVec(RetainedNodes.begin(),
831                                                RetainedNodes.end());
832   RetainedNodesVec.push_back(FrameDIVar);
833   DIS->replaceOperandWith(7, (MDTuple::get(F.getContext(), RetainedNodesVec)));
834 
835   // Construct the location for the frame debug variable. The column number
836   // is fake but it should be fine.
837   DILocation *DILoc =
838       DILocation::get(DIS->getContext(), LineNum, /*Column=*/1, DIS);
839   assert(FrameDIVar->isValidLocationForIntrinsic(DILoc));
840 
841   if (UseNewDbgInfoFormat) {
842     DbgVariableRecord *NewDVR =
843         new DbgVariableRecord(ValueAsMetadata::get(Shape.FramePtr), FrameDIVar,
844                               DBuilder.createExpression(), DILoc,
845                               DbgVariableRecord::LocationType::Declare);
846     BasicBlock::iterator It = Shape.getInsertPtAfterFramePtr();
847     It->getParent()->insertDbgRecordBefore(NewDVR, It);
848   } else {
849     DBuilder.insertDeclare(Shape.FramePtr, FrameDIVar,
850                            DBuilder.createExpression(), DILoc,
851                            &*Shape.getInsertPtAfterFramePtr());
852   }
853 }
854 
855 // Build a struct that will keep state for an active coroutine.
856 //   struct f.frame {
857 //     ResumeFnTy ResumeFnAddr;
858 //     ResumeFnTy DestroyFnAddr;
859 //     ... promise (if present) ...
860 //     int ResumeIndex;
861 //     ... spills ...
862 //   };
863 static StructType *buildFrameType(Function &F, coro::Shape &Shape,
864                                   FrameDataInfo &FrameData,
865                                   bool OptimizeFrame) {
866   LLVMContext &C = F.getContext();
867   const DataLayout &DL = F.getDataLayout();
868   StructType *FrameTy = [&] {
869     SmallString<32> Name(F.getName());
870     Name.append(".Frame");
871     return StructType::create(C, Name);
872   }();
873 
874   // We will use this value to cap the alignment of spilled values.
875   std::optional<Align> MaxFrameAlignment;
876   if (Shape.ABI == coro::ABI::Async)
877     MaxFrameAlignment = Shape.AsyncLowering.getContextAlignment();
878   FrameTypeBuilder B(C, DL, MaxFrameAlignment);
879 
880   AllocaInst *PromiseAlloca = Shape.getPromiseAlloca();
881   std::optional<FieldIDType> SwitchIndexFieldId;
882 
883   if (Shape.ABI == coro::ABI::Switch) {
884     auto *FnPtrTy = PointerType::getUnqual(C);
885 
886     // Add header fields for the resume and destroy functions.
887     // We can rely on these being perfectly packed.
888     (void)B.addField(FnPtrTy, std::nullopt, /*header*/ true);
889     (void)B.addField(FnPtrTy, std::nullopt, /*header*/ true);
890 
891     // PromiseAlloca field needs to be explicitly added here because it's
892     // a header field with a fixed offset based on its alignment. Hence it
893     // needs special handling and cannot be added to FrameData.Allocas.
894     if (PromiseAlloca)
895       FrameData.setFieldIndex(
896           PromiseAlloca, B.addFieldForAlloca(PromiseAlloca, /*header*/ true));
897 
898     // Add a field to store the suspend index.  This doesn't need to
899     // be in the header.
900     unsigned IndexBits = std::max(1U, Log2_64_Ceil(Shape.CoroSuspends.size()));
901     Type *IndexType = Type::getIntNTy(C, IndexBits);
902 
903     SwitchIndexFieldId = B.addField(IndexType, std::nullopt);
904   } else {
905     assert(PromiseAlloca == nullptr && "lowering doesn't support promises");
906   }
907 
908   // Because multiple allocas may own the same field slot,
909   // we add allocas to field here.
910   B.addFieldForAllocas(F, FrameData, Shape, OptimizeFrame);
911   // Add PromiseAlloca to Allocas list so that
912   // 1. updateLayoutIndex could update its index after
913   // `performOptimizedStructLayout`
914   // 2. it is processed in insertSpills.
915   if (Shape.ABI == coro::ABI::Switch && PromiseAlloca)
916     // We assume that the promise alloca won't be modified before
917     // CoroBegin and no alias will be create before CoroBegin.
918     FrameData.Allocas.emplace_back(
919         PromiseAlloca, DenseMap<Instruction *, std::optional<APInt>>{}, false);
920   // Create an entry for every spilled value.
921   for (auto &S : FrameData.Spills) {
922     Type *FieldType = S.first->getType();
923     // For byval arguments, we need to store the pointed value in the frame,
924     // instead of the pointer itself.
925     if (const Argument *A = dyn_cast<Argument>(S.first))
926       if (A->hasByValAttr())
927         FieldType = A->getParamByValType();
928     FieldIDType Id = B.addField(FieldType, std::nullopt, false /*header*/,
929                                 true /*IsSpillOfValue*/);
930     FrameData.setFieldIndex(S.first, Id);
931   }
932 
933   B.finish(FrameTy);
934   FrameData.updateLayoutIndex(B);
935   Shape.FrameAlign = B.getStructAlign();
936   Shape.FrameSize = B.getStructSize();
937 
938   switch (Shape.ABI) {
939   case coro::ABI::Switch: {
940     // In the switch ABI, remember the switch-index field.
941     auto IndexField = B.getLayoutField(*SwitchIndexFieldId);
942     Shape.SwitchLowering.IndexField = IndexField.LayoutFieldIndex;
943     Shape.SwitchLowering.IndexAlign = IndexField.Alignment.value();
944     Shape.SwitchLowering.IndexOffset = IndexField.Offset;
945 
946     // Also round the frame size up to a multiple of its alignment, as is
947     // generally expected in C/C++.
948     Shape.FrameSize = alignTo(Shape.FrameSize, Shape.FrameAlign);
949     break;
950   }
951 
952   // In the retcon ABI, remember whether the frame is inline in the storage.
953   case coro::ABI::Retcon:
954   case coro::ABI::RetconOnce: {
955     auto Id = Shape.getRetconCoroId();
956     Shape.RetconLowering.IsFrameInlineInStorage
957       = (B.getStructSize() <= Id->getStorageSize() &&
958          B.getStructAlign() <= Id->getStorageAlignment());
959     break;
960   }
961   case coro::ABI::Async: {
962     Shape.AsyncLowering.FrameOffset =
963         alignTo(Shape.AsyncLowering.ContextHeaderSize, Shape.FrameAlign);
964     // Also make the final context size a multiple of the context alignment to
965     // make allocation easier for allocators.
966     Shape.AsyncLowering.ContextSize =
967         alignTo(Shape.AsyncLowering.FrameOffset + Shape.FrameSize,
968                 Shape.AsyncLowering.getContextAlignment());
969     if (Shape.AsyncLowering.getContextAlignment() < Shape.FrameAlign) {
970       report_fatal_error(
971           "The alignment requirment of frame variables cannot be higher than "
972           "the alignment of the async function context");
973     }
974     break;
975   }
976   }
977 
978   return FrameTy;
979 }
980 
981 // Replace all alloca and SSA values that are accessed across suspend points
982 // with GetElementPointer from coroutine frame + loads and stores. Create an
983 // AllocaSpillBB that will become the new entry block for the resume parts of
984 // the coroutine:
985 //
986 //    %hdl = coro.begin(...)
987 //    whatever
988 //
989 // becomes:
990 //
991 //    %hdl = coro.begin(...)
992 //    br label %AllocaSpillBB
993 //
994 //  AllocaSpillBB:
995 //    ; geps corresponding to allocas that were moved to coroutine frame
996 //    br label PostSpill
997 //
998 //  PostSpill:
999 //    whatever
1000 //
1001 //
1002 static void insertSpills(const FrameDataInfo &FrameData, coro::Shape &Shape) {
1003   LLVMContext &C = Shape.CoroBegin->getContext();
1004   Function *F = Shape.CoroBegin->getFunction();
1005   IRBuilder<> Builder(C);
1006   StructType *FrameTy = Shape.FrameTy;
1007   Value *FramePtr = Shape.FramePtr;
1008   DominatorTree DT(*F);
1009   SmallDenseMap<Argument *, AllocaInst *, 4> ArgToAllocaMap;
1010 
1011   // Create a GEP with the given index into the coroutine frame for the original
1012   // value Orig. Appends an extra 0 index for array-allocas, preserving the
1013   // original type.
1014   auto GetFramePointer = [&](Value *Orig) -> Value * {
1015     FieldIDType Index = FrameData.getFieldIndex(Orig);
1016     SmallVector<Value *, 3> Indices = {
1017         ConstantInt::get(Type::getInt32Ty(C), 0),
1018         ConstantInt::get(Type::getInt32Ty(C), Index),
1019     };
1020 
1021     if (auto *AI = dyn_cast<AllocaInst>(Orig)) {
1022       if (auto *CI = dyn_cast<ConstantInt>(AI->getArraySize())) {
1023         auto Count = CI->getValue().getZExtValue();
1024         if (Count > 1) {
1025           Indices.push_back(ConstantInt::get(Type::getInt32Ty(C), 0));
1026         }
1027       } else {
1028         report_fatal_error("Coroutines cannot handle non static allocas yet");
1029       }
1030     }
1031 
1032     auto GEP = cast<GetElementPtrInst>(
1033         Builder.CreateInBoundsGEP(FrameTy, FramePtr, Indices));
1034     if (auto *AI = dyn_cast<AllocaInst>(Orig)) {
1035       if (FrameData.getDynamicAlign(Orig) != 0) {
1036         assert(FrameData.getDynamicAlign(Orig) == AI->getAlign().value());
1037         auto *M = AI->getModule();
1038         auto *IntPtrTy = M->getDataLayout().getIntPtrType(AI->getType());
1039         auto *PtrValue = Builder.CreatePtrToInt(GEP, IntPtrTy);
1040         auto *AlignMask =
1041             ConstantInt::get(IntPtrTy, AI->getAlign().value() - 1);
1042         PtrValue = Builder.CreateAdd(PtrValue, AlignMask);
1043         PtrValue = Builder.CreateAnd(PtrValue, Builder.CreateNot(AlignMask));
1044         return Builder.CreateIntToPtr(PtrValue, AI->getType());
1045       }
1046       // If the type of GEP is not equal to the type of AllocaInst, it implies
1047       // that the AllocaInst may be reused in the Frame slot of other
1048       // AllocaInst. So We cast GEP to the AllocaInst here to re-use
1049       // the Frame storage.
1050       //
1051       // Note: If we change the strategy dealing with alignment, we need to refine
1052       // this casting.
1053       if (GEP->getType() != Orig->getType())
1054         return Builder.CreateAddrSpaceCast(GEP, Orig->getType(),
1055                                            Orig->getName() + Twine(".cast"));
1056     }
1057     return GEP;
1058   };
1059 
1060   for (auto const &E : FrameData.Spills) {
1061     Value *Def = E.first;
1062     auto SpillAlignment = Align(FrameData.getAlign(Def));
1063     // Create a store instruction storing the value into the
1064     // coroutine frame.
1065     BasicBlock::iterator InsertPt = coro::getSpillInsertionPt(Shape, Def, DT);
1066 
1067     Type *ByValTy = nullptr;
1068     if (auto *Arg = dyn_cast<Argument>(Def)) {
1069       // If we're spilling an Argument, make sure we clear 'nocapture'
1070       // from the coroutine function.
1071       Arg->getParent()->removeParamAttr(Arg->getArgNo(), Attribute::NoCapture);
1072 
1073       if (Arg->hasByValAttr())
1074         ByValTy = Arg->getParamByValType();
1075     }
1076 
1077     auto Index = FrameData.getFieldIndex(Def);
1078     Builder.SetInsertPoint(InsertPt->getParent(), InsertPt);
1079     auto *G = Builder.CreateConstInBoundsGEP2_32(
1080         FrameTy, FramePtr, 0, Index, Def->getName() + Twine(".spill.addr"));
1081     if (ByValTy) {
1082       // For byval arguments, we need to store the pointed value in the frame,
1083       // instead of the pointer itself.
1084       auto *Value = Builder.CreateLoad(ByValTy, Def);
1085       Builder.CreateAlignedStore(Value, G, SpillAlignment);
1086     } else {
1087       Builder.CreateAlignedStore(Def, G, SpillAlignment);
1088     }
1089 
1090     BasicBlock *CurrentBlock = nullptr;
1091     Value *CurrentReload = nullptr;
1092     for (auto *U : E.second) {
1093       // If we have not seen the use block, create a load instruction to reload
1094       // the spilled value from the coroutine frame. Populates the Value pointer
1095       // reference provided with the frame GEP.
1096       if (CurrentBlock != U->getParent()) {
1097         CurrentBlock = U->getParent();
1098         Builder.SetInsertPoint(CurrentBlock,
1099                                CurrentBlock->getFirstInsertionPt());
1100 
1101         auto *GEP = GetFramePointer(E.first);
1102         GEP->setName(E.first->getName() + Twine(".reload.addr"));
1103         if (ByValTy)
1104           CurrentReload = GEP;
1105         else
1106           CurrentReload = Builder.CreateAlignedLoad(
1107               FrameTy->getElementType(FrameData.getFieldIndex(E.first)), GEP,
1108               SpillAlignment, E.first->getName() + Twine(".reload"));
1109 
1110         TinyPtrVector<DbgDeclareInst *> DIs = findDbgDeclares(Def);
1111         TinyPtrVector<DbgVariableRecord *> DVRs = findDVRDeclares(Def);
1112         // Try best to find dbg.declare. If the spill is a temp, there may not
1113         // be a direct dbg.declare. Walk up the load chain to find one from an
1114         // alias.
1115         if (F->getSubprogram()) {
1116           auto *CurDef = Def;
1117           while (DIs.empty() && DVRs.empty() && isa<LoadInst>(CurDef)) {
1118             auto *LdInst = cast<LoadInst>(CurDef);
1119             // Only consider ptr to ptr same type load.
1120             if (LdInst->getPointerOperandType() != LdInst->getType())
1121               break;
1122             CurDef = LdInst->getPointerOperand();
1123             if (!isa<AllocaInst, LoadInst>(CurDef))
1124               break;
1125             DIs = findDbgDeclares(CurDef);
1126             DVRs = findDVRDeclares(CurDef);
1127           }
1128         }
1129 
1130         auto SalvageOne = [&](auto *DDI) {
1131           bool AllowUnresolved = false;
1132           // This dbg.declare is preserved for all coro-split function
1133           // fragments. It will be unreachable in the main function, and
1134           // processed by coro::salvageDebugInfo() by CoroCloner.
1135           if (UseNewDbgInfoFormat) {
1136             DbgVariableRecord *NewDVR = new DbgVariableRecord(
1137                 ValueAsMetadata::get(CurrentReload), DDI->getVariable(),
1138                 DDI->getExpression(), DDI->getDebugLoc(),
1139                 DbgVariableRecord::LocationType::Declare);
1140             Builder.GetInsertPoint()->getParent()->insertDbgRecordBefore(
1141                 NewDVR, Builder.GetInsertPoint());
1142           } else {
1143             DIBuilder(*CurrentBlock->getParent()->getParent(), AllowUnresolved)
1144                 .insertDeclare(CurrentReload, DDI->getVariable(),
1145                                DDI->getExpression(), DDI->getDebugLoc(),
1146                                &*Builder.GetInsertPoint());
1147           }
1148           // This dbg.declare is for the main function entry point.  It
1149           // will be deleted in all coro-split functions.
1150           coro::salvageDebugInfo(ArgToAllocaMap, *DDI, false /*UseEntryValue*/);
1151         };
1152         for_each(DIs, SalvageOne);
1153         for_each(DVRs, SalvageOne);
1154       }
1155 
1156       // If we have a single edge PHINode, remove it and replace it with a
1157       // reload from the coroutine frame. (We already took care of multi edge
1158       // PHINodes by normalizing them in the rewritePHIs function).
1159       if (auto *PN = dyn_cast<PHINode>(U)) {
1160         assert(PN->getNumIncomingValues() == 1 &&
1161                "unexpected number of incoming "
1162                "values in the PHINode");
1163         PN->replaceAllUsesWith(CurrentReload);
1164         PN->eraseFromParent();
1165         continue;
1166       }
1167 
1168       // Replace all uses of CurrentValue in the current instruction with
1169       // reload.
1170       U->replaceUsesOfWith(Def, CurrentReload);
1171       // Instructions are added to Def's user list if the attached
1172       // debug records use Def. Update those now.
1173       for (DbgVariableRecord &DVR : filterDbgVars(U->getDbgRecordRange()))
1174         DVR.replaceVariableLocationOp(Def, CurrentReload, true);
1175     }
1176   }
1177 
1178   BasicBlock *FramePtrBB = Shape.getInsertPtAfterFramePtr()->getParent();
1179 
1180   auto SpillBlock = FramePtrBB->splitBasicBlock(
1181       Shape.getInsertPtAfterFramePtr(), "AllocaSpillBB");
1182   SpillBlock->splitBasicBlock(&SpillBlock->front(), "PostSpill");
1183   Shape.AllocaSpillBlock = SpillBlock;
1184 
1185   // retcon and retcon.once lowering assumes all uses have been sunk.
1186   if (Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce ||
1187       Shape.ABI == coro::ABI::Async) {
1188     // If we found any allocas, replace all of their remaining uses with Geps.
1189     Builder.SetInsertPoint(SpillBlock, SpillBlock->begin());
1190     for (const auto &P : FrameData.Allocas) {
1191       AllocaInst *Alloca = P.Alloca;
1192       auto *G = GetFramePointer(Alloca);
1193 
1194       // We are not using ReplaceInstWithInst(P.first, cast<Instruction>(G))
1195       // here, as we are changing location of the instruction.
1196       G->takeName(Alloca);
1197       Alloca->replaceAllUsesWith(G);
1198       Alloca->eraseFromParent();
1199     }
1200     return;
1201   }
1202 
1203   // If we found any alloca, replace all of their remaining uses with GEP
1204   // instructions. To remain debugbility, we replace the uses of allocas for
1205   // dbg.declares and dbg.values with the reload from the frame.
1206   // Note: We cannot replace the alloca with GEP instructions indiscriminately,
1207   // as some of the uses may not be dominated by CoroBegin.
1208   Builder.SetInsertPoint(Shape.AllocaSpillBlock,
1209                          Shape.AllocaSpillBlock->begin());
1210   SmallVector<Instruction *, 4> UsersToUpdate;
1211   for (const auto &A : FrameData.Allocas) {
1212     AllocaInst *Alloca = A.Alloca;
1213     UsersToUpdate.clear();
1214     for (User *U : Alloca->users()) {
1215       auto *I = cast<Instruction>(U);
1216       if (DT.dominates(Shape.CoroBegin, I))
1217         UsersToUpdate.push_back(I);
1218     }
1219     if (UsersToUpdate.empty())
1220       continue;
1221     auto *G = GetFramePointer(Alloca);
1222     G->setName(Alloca->getName() + Twine(".reload.addr"));
1223 
1224     SmallVector<DbgVariableIntrinsic *, 4> DIs;
1225     SmallVector<DbgVariableRecord *> DbgVariableRecords;
1226     findDbgUsers(DIs, Alloca, &DbgVariableRecords);
1227     for (auto *DVI : DIs)
1228       DVI->replaceUsesOfWith(Alloca, G);
1229     for (auto *DVR : DbgVariableRecords)
1230       DVR->replaceVariableLocationOp(Alloca, G);
1231 
1232     for (Instruction *I : UsersToUpdate) {
1233       // It is meaningless to retain the lifetime intrinsics refer for the
1234       // member of coroutine frames and the meaningless lifetime intrinsics
1235       // are possible to block further optimizations.
1236       if (I->isLifetimeStartOrEnd()) {
1237         I->eraseFromParent();
1238         continue;
1239       }
1240 
1241       I->replaceUsesOfWith(Alloca, G);
1242     }
1243   }
1244   Builder.SetInsertPoint(&*Shape.getInsertPtAfterFramePtr());
1245   for (const auto &A : FrameData.Allocas) {
1246     AllocaInst *Alloca = A.Alloca;
1247     if (A.MayWriteBeforeCoroBegin) {
1248       // isEscaped really means potentially modified before CoroBegin.
1249       if (Alloca->isArrayAllocation())
1250         report_fatal_error(
1251             "Coroutines cannot handle copying of array allocas yet");
1252 
1253       auto *G = GetFramePointer(Alloca);
1254       auto *Value = Builder.CreateLoad(Alloca->getAllocatedType(), Alloca);
1255       Builder.CreateStore(Value, G);
1256     }
1257     // For each alias to Alloca created before CoroBegin but used after
1258     // CoroBegin, we recreate them after CoroBegin by applying the offset
1259     // to the pointer in the frame.
1260     for (const auto &Alias : A.Aliases) {
1261       auto *FramePtr = GetFramePointer(Alloca);
1262       auto &Value = *Alias.second;
1263       auto ITy = IntegerType::get(C, Value.getBitWidth());
1264       auto *AliasPtr =
1265           Builder.CreatePtrAdd(FramePtr, ConstantInt::get(ITy, Value));
1266       Alias.first->replaceUsesWithIf(
1267           AliasPtr, [&](Use &U) { return DT.dominates(Shape.CoroBegin, U); });
1268     }
1269   }
1270 
1271   // PromiseAlloca is not collected in FrameData.Allocas. So we don't handle
1272   // the case that the PromiseAlloca may have writes before CoroBegin in the
1273   // above codes. And it may be problematic in edge cases. See
1274   // https://github.com/llvm/llvm-project/issues/57861 for an example.
1275   if (Shape.ABI == coro::ABI::Switch && Shape.SwitchLowering.PromiseAlloca) {
1276     AllocaInst *PA = Shape.SwitchLowering.PromiseAlloca;
1277     // If there is memory accessing to promise alloca before CoroBegin;
1278     bool HasAccessingPromiseBeforeCB = llvm::any_of(PA->uses(), [&](Use &U) {
1279       auto *Inst = dyn_cast<Instruction>(U.getUser());
1280       if (!Inst || DT.dominates(Shape.CoroBegin, Inst))
1281         return false;
1282 
1283       if (auto *CI = dyn_cast<CallInst>(Inst)) {
1284         // It is fine if the call wouldn't write to the Promise.
1285         // This is possible for @llvm.coro.id intrinsics, which
1286         // would take the promise as the second argument as a
1287         // marker.
1288         if (CI->onlyReadsMemory() ||
1289             CI->onlyReadsMemory(CI->getArgOperandNo(&U)))
1290           return false;
1291         return true;
1292       }
1293 
1294       return isa<StoreInst>(Inst) ||
1295              // It may take too much time to track the uses.
1296              // Be conservative about the case the use may escape.
1297              isa<GetElementPtrInst>(Inst) ||
1298              // There would always be a bitcast for the promise alloca
1299              // before we enabled Opaque pointers. And now given
1300              // opaque pointers are enabled by default. This should be
1301              // fine.
1302              isa<BitCastInst>(Inst);
1303     });
1304     if (HasAccessingPromiseBeforeCB) {
1305       Builder.SetInsertPoint(&*Shape.getInsertPtAfterFramePtr());
1306       auto *G = GetFramePointer(PA);
1307       auto *Value = Builder.CreateLoad(PA->getAllocatedType(), PA);
1308       Builder.CreateStore(Value, G);
1309     }
1310   }
1311 }
1312 
1313 // Moves the values in the PHIs in SuccBB that correspong to PredBB into a new
1314 // PHI in InsertedBB.
1315 static void movePHIValuesToInsertedBlock(BasicBlock *SuccBB,
1316                                          BasicBlock *InsertedBB,
1317                                          BasicBlock *PredBB,
1318                                          PHINode *UntilPHI = nullptr) {
1319   auto *PN = cast<PHINode>(&SuccBB->front());
1320   do {
1321     int Index = PN->getBasicBlockIndex(InsertedBB);
1322     Value *V = PN->getIncomingValue(Index);
1323     PHINode *InputV = PHINode::Create(
1324         V->getType(), 1, V->getName() + Twine(".") + SuccBB->getName());
1325     InputV->insertBefore(InsertedBB->begin());
1326     InputV->addIncoming(V, PredBB);
1327     PN->setIncomingValue(Index, InputV);
1328     PN = dyn_cast<PHINode>(PN->getNextNode());
1329   } while (PN != UntilPHI);
1330 }
1331 
1332 // Rewrites the PHI Nodes in a cleanuppad.
1333 static void rewritePHIsForCleanupPad(BasicBlock *CleanupPadBB,
1334                                      CleanupPadInst *CleanupPad) {
1335   // For every incoming edge to a CleanupPad we will create a new block holding
1336   // all incoming values in single-value PHI nodes. We will then create another
1337   // block to act as a dispather (as all unwind edges for related EH blocks
1338   // must be the same).
1339   //
1340   // cleanuppad:
1341   //    %2 = phi i32[%0, %catchswitch], [%1, %catch.1]
1342   //    %3 = cleanuppad within none []
1343   //
1344   // It will create:
1345   //
1346   // cleanuppad.corodispatch
1347   //    %2 = phi i8[0, %catchswitch], [1, %catch.1]
1348   //    %3 = cleanuppad within none []
1349   //    switch i8 % 2, label %unreachable
1350   //            [i8 0, label %cleanuppad.from.catchswitch
1351   //             i8 1, label %cleanuppad.from.catch.1]
1352   // cleanuppad.from.catchswitch:
1353   //    %4 = phi i32 [%0, %catchswitch]
1354   //    br %label cleanuppad
1355   // cleanuppad.from.catch.1:
1356   //    %6 = phi i32 [%1, %catch.1]
1357   //    br %label cleanuppad
1358   // cleanuppad:
1359   //    %8 = phi i32 [%4, %cleanuppad.from.catchswitch],
1360   //                 [%6, %cleanuppad.from.catch.1]
1361 
1362   // Unreachable BB, in case switching on an invalid value in the dispatcher.
1363   auto *UnreachBB = BasicBlock::Create(
1364       CleanupPadBB->getContext(), "unreachable", CleanupPadBB->getParent());
1365   IRBuilder<> Builder(UnreachBB);
1366   Builder.CreateUnreachable();
1367 
1368   // Create a new cleanuppad which will be the dispatcher.
1369   auto *NewCleanupPadBB =
1370       BasicBlock::Create(CleanupPadBB->getContext(),
1371                          CleanupPadBB->getName() + Twine(".corodispatch"),
1372                          CleanupPadBB->getParent(), CleanupPadBB);
1373   Builder.SetInsertPoint(NewCleanupPadBB);
1374   auto *SwitchType = Builder.getInt8Ty();
1375   auto *SetDispatchValuePN =
1376       Builder.CreatePHI(SwitchType, pred_size(CleanupPadBB));
1377   CleanupPad->removeFromParent();
1378   CleanupPad->insertAfter(SetDispatchValuePN);
1379   auto *SwitchOnDispatch = Builder.CreateSwitch(SetDispatchValuePN, UnreachBB,
1380                                                 pred_size(CleanupPadBB));
1381 
1382   int SwitchIndex = 0;
1383   SmallVector<BasicBlock *, 8> Preds(predecessors(CleanupPadBB));
1384   for (BasicBlock *Pred : Preds) {
1385     // Create a new cleanuppad and move the PHI values to there.
1386     auto *CaseBB = BasicBlock::Create(CleanupPadBB->getContext(),
1387                                       CleanupPadBB->getName() +
1388                                           Twine(".from.") + Pred->getName(),
1389                                       CleanupPadBB->getParent(), CleanupPadBB);
1390     updatePhiNodes(CleanupPadBB, Pred, CaseBB);
1391     CaseBB->setName(CleanupPadBB->getName() + Twine(".from.") +
1392                     Pred->getName());
1393     Builder.SetInsertPoint(CaseBB);
1394     Builder.CreateBr(CleanupPadBB);
1395     movePHIValuesToInsertedBlock(CleanupPadBB, CaseBB, NewCleanupPadBB);
1396 
1397     // Update this Pred to the new unwind point.
1398     setUnwindEdgeTo(Pred->getTerminator(), NewCleanupPadBB);
1399 
1400     // Setup the switch in the dispatcher.
1401     auto *SwitchConstant = ConstantInt::get(SwitchType, SwitchIndex);
1402     SetDispatchValuePN->addIncoming(SwitchConstant, Pred);
1403     SwitchOnDispatch->addCase(SwitchConstant, CaseBB);
1404     SwitchIndex++;
1405   }
1406 }
1407 
1408 static void cleanupSinglePredPHIs(Function &F) {
1409   SmallVector<PHINode *, 32> Worklist;
1410   for (auto &BB : F) {
1411     for (auto &Phi : BB.phis()) {
1412       if (Phi.getNumIncomingValues() == 1) {
1413         Worklist.push_back(&Phi);
1414       } else
1415         break;
1416     }
1417   }
1418   while (!Worklist.empty()) {
1419     auto *Phi = Worklist.pop_back_val();
1420     auto *OriginalValue = Phi->getIncomingValue(0);
1421     Phi->replaceAllUsesWith(OriginalValue);
1422   }
1423 }
1424 
1425 static void rewritePHIs(BasicBlock &BB) {
1426   // For every incoming edge we will create a block holding all
1427   // incoming values in a single PHI nodes.
1428   //
1429   // loop:
1430   //    %n.val = phi i32[%n, %entry], [%inc, %loop]
1431   //
1432   // It will create:
1433   //
1434   // loop.from.entry:
1435   //    %n.loop.pre = phi i32 [%n, %entry]
1436   //    br %label loop
1437   // loop.from.loop:
1438   //    %inc.loop.pre = phi i32 [%inc, %loop]
1439   //    br %label loop
1440   //
1441   // After this rewrite, further analysis will ignore any phi nodes with more
1442   // than one incoming edge.
1443 
1444   // TODO: Simplify PHINodes in the basic block to remove duplicate
1445   // predecessors.
1446 
1447   // Special case for CleanupPad: all EH blocks must have the same unwind edge
1448   // so we need to create an additional "dispatcher" block.
1449   if (auto *CleanupPad =
1450           dyn_cast_or_null<CleanupPadInst>(BB.getFirstNonPHI())) {
1451     SmallVector<BasicBlock *, 8> Preds(predecessors(&BB));
1452     for (BasicBlock *Pred : Preds) {
1453       if (CatchSwitchInst *CS =
1454               dyn_cast<CatchSwitchInst>(Pred->getTerminator())) {
1455         // CleanupPad with a CatchSwitch predecessor: therefore this is an
1456         // unwind destination that needs to be handle specially.
1457         assert(CS->getUnwindDest() == &BB);
1458         (void)CS;
1459         rewritePHIsForCleanupPad(&BB, CleanupPad);
1460         return;
1461       }
1462     }
1463   }
1464 
1465   LandingPadInst *LandingPad = nullptr;
1466   PHINode *ReplPHI = nullptr;
1467   if ((LandingPad = dyn_cast_or_null<LandingPadInst>(BB.getFirstNonPHI()))) {
1468     // ehAwareSplitEdge will clone the LandingPad in all the edge blocks.
1469     // We replace the original landing pad with a PHINode that will collect the
1470     // results from all of them.
1471     ReplPHI = PHINode::Create(LandingPad->getType(), 1, "");
1472     ReplPHI->insertBefore(LandingPad->getIterator());
1473     ReplPHI->takeName(LandingPad);
1474     LandingPad->replaceAllUsesWith(ReplPHI);
1475     // We will erase the original landing pad at the end of this function after
1476     // ehAwareSplitEdge cloned it in the transition blocks.
1477   }
1478 
1479   SmallVector<BasicBlock *, 8> Preds(predecessors(&BB));
1480   for (BasicBlock *Pred : Preds) {
1481     auto *IncomingBB = ehAwareSplitEdge(Pred, &BB, LandingPad, ReplPHI);
1482     IncomingBB->setName(BB.getName() + Twine(".from.") + Pred->getName());
1483 
1484     // Stop the moving of values at ReplPHI, as this is either null or the PHI
1485     // that replaced the landing pad.
1486     movePHIValuesToInsertedBlock(&BB, IncomingBB, Pred, ReplPHI);
1487   }
1488 
1489   if (LandingPad) {
1490     // Calls to ehAwareSplitEdge function cloned the original lading pad.
1491     // No longer need it.
1492     LandingPad->eraseFromParent();
1493   }
1494 }
1495 
1496 static void rewritePHIs(Function &F) {
1497   SmallVector<BasicBlock *, 8> WorkList;
1498 
1499   for (BasicBlock &BB : F)
1500     if (auto *PN = dyn_cast<PHINode>(&BB.front()))
1501       if (PN->getNumIncomingValues() > 1)
1502         WorkList.push_back(&BB);
1503 
1504   for (BasicBlock *BB : WorkList)
1505     rewritePHIs(*BB);
1506 }
1507 
1508 // Splits the block at a particular instruction unless it is the first
1509 // instruction in the block with a single predecessor.
1510 static BasicBlock *splitBlockIfNotFirst(Instruction *I, const Twine &Name) {
1511   auto *BB = I->getParent();
1512   if (&BB->front() == I) {
1513     if (BB->getSinglePredecessor()) {
1514       BB->setName(Name);
1515       return BB;
1516     }
1517   }
1518   return BB->splitBasicBlock(I, Name);
1519 }
1520 
1521 // Split above and below a particular instruction so that it
1522 // will be all alone by itself in a block.
1523 static void splitAround(Instruction *I, const Twine &Name) {
1524   splitBlockIfNotFirst(I, Name);
1525   splitBlockIfNotFirst(I->getNextNode(), "After" + Name);
1526 }
1527 
1528 /// After we split the coroutine, will the given basic block be along
1529 /// an obvious exit path for the resumption function?
1530 static bool willLeaveFunctionImmediatelyAfter(BasicBlock *BB,
1531                                               unsigned depth = 3) {
1532   // If we've bottomed out our depth count, stop searching and assume
1533   // that the path might loop back.
1534   if (depth == 0) return false;
1535 
1536   // If this is a suspend block, we're about to exit the resumption function.
1537   if (coro::isSuspendBlock(BB))
1538     return true;
1539 
1540   // Recurse into the successors.
1541   for (auto *Succ : successors(BB)) {
1542     if (!willLeaveFunctionImmediatelyAfter(Succ, depth - 1))
1543       return false;
1544   }
1545 
1546   // If none of the successors leads back in a loop, we're on an exit/abort.
1547   return true;
1548 }
1549 
1550 static bool localAllocaNeedsStackSave(CoroAllocaAllocInst *AI) {
1551   // Look for a free that isn't sufficiently obviously followed by
1552   // either a suspend or a termination, i.e. something that will leave
1553   // the coro resumption frame.
1554   for (auto *U : AI->users()) {
1555     auto FI = dyn_cast<CoroAllocaFreeInst>(U);
1556     if (!FI) continue;
1557 
1558     if (!willLeaveFunctionImmediatelyAfter(FI->getParent()))
1559       return true;
1560   }
1561 
1562   // If we never found one, we don't need a stack save.
1563   return false;
1564 }
1565 
1566 /// Turn each of the given local allocas into a normal (dynamic) alloca
1567 /// instruction.
1568 static void lowerLocalAllocas(ArrayRef<CoroAllocaAllocInst*> LocalAllocas,
1569                               SmallVectorImpl<Instruction*> &DeadInsts) {
1570   for (auto *AI : LocalAllocas) {
1571     IRBuilder<> Builder(AI);
1572 
1573     // Save the stack depth.  Try to avoid doing this if the stackrestore
1574     // is going to immediately precede a return or something.
1575     Value *StackSave = nullptr;
1576     if (localAllocaNeedsStackSave(AI))
1577       StackSave = Builder.CreateStackSave();
1578 
1579     // Allocate memory.
1580     auto Alloca = Builder.CreateAlloca(Builder.getInt8Ty(), AI->getSize());
1581     Alloca->setAlignment(AI->getAlignment());
1582 
1583     for (auto *U : AI->users()) {
1584       // Replace gets with the allocation.
1585       if (isa<CoroAllocaGetInst>(U)) {
1586         U->replaceAllUsesWith(Alloca);
1587 
1588       // Replace frees with stackrestores.  This is safe because
1589       // alloca.alloc is required to obey a stack discipline, although we
1590       // don't enforce that structurally.
1591       } else {
1592         auto FI = cast<CoroAllocaFreeInst>(U);
1593         if (StackSave) {
1594           Builder.SetInsertPoint(FI);
1595           Builder.CreateStackRestore(StackSave);
1596         }
1597       }
1598       DeadInsts.push_back(cast<Instruction>(U));
1599     }
1600 
1601     DeadInsts.push_back(AI);
1602   }
1603 }
1604 
1605 /// Get the current swifterror value.
1606 static Value *emitGetSwiftErrorValue(IRBuilder<> &Builder, Type *ValueTy,
1607                                      coro::Shape &Shape) {
1608   // Make a fake function pointer as a sort of intrinsic.
1609   auto FnTy = FunctionType::get(ValueTy, {}, false);
1610   auto Fn = ConstantPointerNull::get(Builder.getPtrTy());
1611 
1612   auto Call = Builder.CreateCall(FnTy, Fn, {});
1613   Shape.SwiftErrorOps.push_back(Call);
1614 
1615   return Call;
1616 }
1617 
1618 /// Set the given value as the current swifterror value.
1619 ///
1620 /// Returns a slot that can be used as a swifterror slot.
1621 static Value *emitSetSwiftErrorValue(IRBuilder<> &Builder, Value *V,
1622                                      coro::Shape &Shape) {
1623   // Make a fake function pointer as a sort of intrinsic.
1624   auto FnTy = FunctionType::get(Builder.getPtrTy(),
1625                                 {V->getType()}, false);
1626   auto Fn = ConstantPointerNull::get(Builder.getPtrTy());
1627 
1628   auto Call = Builder.CreateCall(FnTy, Fn, { V });
1629   Shape.SwiftErrorOps.push_back(Call);
1630 
1631   return Call;
1632 }
1633 
1634 /// Set the swifterror value from the given alloca before a call,
1635 /// then put in back in the alloca afterwards.
1636 ///
1637 /// Returns an address that will stand in for the swifterror slot
1638 /// until splitting.
1639 static Value *emitSetAndGetSwiftErrorValueAround(Instruction *Call,
1640                                                  AllocaInst *Alloca,
1641                                                  coro::Shape &Shape) {
1642   auto ValueTy = Alloca->getAllocatedType();
1643   IRBuilder<> Builder(Call);
1644 
1645   // Load the current value from the alloca and set it as the
1646   // swifterror value.
1647   auto ValueBeforeCall = Builder.CreateLoad(ValueTy, Alloca);
1648   auto Addr = emitSetSwiftErrorValue(Builder, ValueBeforeCall, Shape);
1649 
1650   // Move to after the call.  Since swifterror only has a guaranteed
1651   // value on normal exits, we can ignore implicit and explicit unwind
1652   // edges.
1653   if (isa<CallInst>(Call)) {
1654     Builder.SetInsertPoint(Call->getNextNode());
1655   } else {
1656     auto Invoke = cast<InvokeInst>(Call);
1657     Builder.SetInsertPoint(Invoke->getNormalDest()->getFirstNonPHIOrDbg());
1658   }
1659 
1660   // Get the current swifterror value and store it to the alloca.
1661   auto ValueAfterCall = emitGetSwiftErrorValue(Builder, ValueTy, Shape);
1662   Builder.CreateStore(ValueAfterCall, Alloca);
1663 
1664   return Addr;
1665 }
1666 
1667 /// Eliminate a formerly-swifterror alloca by inserting the get/set
1668 /// intrinsics and attempting to MemToReg the alloca away.
1669 static void eliminateSwiftErrorAlloca(Function &F, AllocaInst *Alloca,
1670                                       coro::Shape &Shape) {
1671   for (Use &Use : llvm::make_early_inc_range(Alloca->uses())) {
1672     // swifterror values can only be used in very specific ways.
1673     // We take advantage of that here.
1674     auto User = Use.getUser();
1675     if (isa<LoadInst>(User) || isa<StoreInst>(User))
1676       continue;
1677 
1678     assert(isa<CallInst>(User) || isa<InvokeInst>(User));
1679     auto Call = cast<Instruction>(User);
1680 
1681     auto Addr = emitSetAndGetSwiftErrorValueAround(Call, Alloca, Shape);
1682 
1683     // Use the returned slot address as the call argument.
1684     Use.set(Addr);
1685   }
1686 
1687   // All the uses should be loads and stores now.
1688   assert(isAllocaPromotable(Alloca));
1689 }
1690 
1691 /// "Eliminate" a swifterror argument by reducing it to the alloca case
1692 /// and then loading and storing in the prologue and epilog.
1693 ///
1694 /// The argument keeps the swifterror flag.
1695 static void eliminateSwiftErrorArgument(Function &F, Argument &Arg,
1696                                         coro::Shape &Shape,
1697                              SmallVectorImpl<AllocaInst*> &AllocasToPromote) {
1698   IRBuilder<> Builder(F.getEntryBlock().getFirstNonPHIOrDbg());
1699 
1700   auto ArgTy = cast<PointerType>(Arg.getType());
1701   auto ValueTy = PointerType::getUnqual(F.getContext());
1702 
1703   // Reduce to the alloca case:
1704 
1705   // Create an alloca and replace all uses of the arg with it.
1706   auto Alloca = Builder.CreateAlloca(ValueTy, ArgTy->getAddressSpace());
1707   Arg.replaceAllUsesWith(Alloca);
1708 
1709   // Set an initial value in the alloca.  swifterror is always null on entry.
1710   auto InitialValue = Constant::getNullValue(ValueTy);
1711   Builder.CreateStore(InitialValue, Alloca);
1712 
1713   // Find all the suspends in the function and save and restore around them.
1714   for (auto *Suspend : Shape.CoroSuspends) {
1715     (void) emitSetAndGetSwiftErrorValueAround(Suspend, Alloca, Shape);
1716   }
1717 
1718   // Find all the coro.ends in the function and restore the error value.
1719   for (auto *End : Shape.CoroEnds) {
1720     Builder.SetInsertPoint(End);
1721     auto FinalValue = Builder.CreateLoad(ValueTy, Alloca);
1722     (void) emitSetSwiftErrorValue(Builder, FinalValue, Shape);
1723   }
1724 
1725   // Now we can use the alloca logic.
1726   AllocasToPromote.push_back(Alloca);
1727   eliminateSwiftErrorAlloca(F, Alloca, Shape);
1728 }
1729 
1730 /// Eliminate all problematic uses of swifterror arguments and allocas
1731 /// from the function.  We'll fix them up later when splitting the function.
1732 static void eliminateSwiftError(Function &F, coro::Shape &Shape) {
1733   SmallVector<AllocaInst*, 4> AllocasToPromote;
1734 
1735   // Look for a swifterror argument.
1736   for (auto &Arg : F.args()) {
1737     if (!Arg.hasSwiftErrorAttr()) continue;
1738 
1739     eliminateSwiftErrorArgument(F, Arg, Shape, AllocasToPromote);
1740     break;
1741   }
1742 
1743   // Look for swifterror allocas.
1744   for (auto &Inst : F.getEntryBlock()) {
1745     auto Alloca = dyn_cast<AllocaInst>(&Inst);
1746     if (!Alloca || !Alloca->isSwiftError()) continue;
1747 
1748     // Clear the swifterror flag.
1749     Alloca->setSwiftError(false);
1750 
1751     AllocasToPromote.push_back(Alloca);
1752     eliminateSwiftErrorAlloca(F, Alloca, Shape);
1753   }
1754 
1755   // If we have any allocas to promote, compute a dominator tree and
1756   // promote them en masse.
1757   if (!AllocasToPromote.empty()) {
1758     DominatorTree DT(F);
1759     PromoteMemToReg(AllocasToPromote, DT);
1760   }
1761 }
1762 
1763 /// For each local variable that all of its user are only used inside one of
1764 /// suspended region, we sink their lifetime.start markers to the place where
1765 /// after the suspend block. Doing so minimizes the lifetime of each variable,
1766 /// hence minimizing the amount of data we end up putting on the frame.
1767 static void sinkLifetimeStartMarkers(Function &F, coro::Shape &Shape,
1768                                      SuspendCrossingInfo &Checker,
1769                                      const DominatorTree &DT) {
1770   if (F.hasOptNone())
1771     return;
1772 
1773   // Collect all possible basic blocks which may dominate all uses of allocas.
1774   SmallPtrSet<BasicBlock *, 4> DomSet;
1775   DomSet.insert(&F.getEntryBlock());
1776   for (auto *CSI : Shape.CoroSuspends) {
1777     BasicBlock *SuspendBlock = CSI->getParent();
1778     assert(coro::isSuspendBlock(SuspendBlock) &&
1779            SuspendBlock->getSingleSuccessor() &&
1780            "should have split coro.suspend into its own block");
1781     DomSet.insert(SuspendBlock->getSingleSuccessor());
1782   }
1783 
1784   for (Instruction &I : instructions(F)) {
1785     AllocaInst* AI = dyn_cast<AllocaInst>(&I);
1786     if (!AI)
1787       continue;
1788 
1789     for (BasicBlock *DomBB : DomSet) {
1790       bool Valid = true;
1791       SmallVector<Instruction *, 1> Lifetimes;
1792 
1793       auto isLifetimeStart = [](Instruction* I) {
1794         if (auto* II = dyn_cast<IntrinsicInst>(I))
1795           return II->getIntrinsicID() == Intrinsic::lifetime_start;
1796         return false;
1797       };
1798 
1799       auto collectLifetimeStart = [&](Instruction *U, AllocaInst *AI) {
1800         if (isLifetimeStart(U)) {
1801           Lifetimes.push_back(U);
1802           return true;
1803         }
1804         if (!U->hasOneUse() || U->stripPointerCasts() != AI)
1805           return false;
1806         if (isLifetimeStart(U->user_back())) {
1807           Lifetimes.push_back(U->user_back());
1808           return true;
1809         }
1810         return false;
1811       };
1812 
1813       for (User *U : AI->users()) {
1814         Instruction *UI = cast<Instruction>(U);
1815         // For all users except lifetime.start markers, if they are all
1816         // dominated by one of the basic blocks and do not cross
1817         // suspend points as well, then there is no need to spill the
1818         // instruction.
1819         if (!DT.dominates(DomBB, UI->getParent()) ||
1820             Checker.isDefinitionAcrossSuspend(DomBB, UI)) {
1821           // Skip lifetime.start, GEP and bitcast used by lifetime.start
1822           // markers.
1823           if (collectLifetimeStart(UI, AI))
1824             continue;
1825           Valid = false;
1826           break;
1827         }
1828       }
1829       // Sink lifetime.start markers to dominate block when they are
1830       // only used outside the region.
1831       if (Valid && Lifetimes.size() != 0) {
1832         auto *NewLifetime = Lifetimes[0]->clone();
1833         NewLifetime->replaceUsesOfWith(NewLifetime->getOperand(1), AI);
1834         NewLifetime->insertBefore(DomBB->getTerminator());
1835 
1836         // All the outsided lifetime.start markers are no longer necessary.
1837         for (Instruction *S : Lifetimes)
1838           S->eraseFromParent();
1839 
1840         break;
1841       }
1842     }
1843   }
1844 }
1845 
1846 static std::optional<std::pair<Value &, DIExpression &>>
1847 salvageDebugInfoImpl(SmallDenseMap<Argument *, AllocaInst *, 4> &ArgToAllocaMap,
1848                      bool UseEntryValue, Function *F, Value *Storage,
1849                      DIExpression *Expr, bool SkipOutermostLoad) {
1850   IRBuilder<> Builder(F->getContext());
1851   auto InsertPt = F->getEntryBlock().getFirstInsertionPt();
1852   while (isa<IntrinsicInst>(InsertPt))
1853     ++InsertPt;
1854   Builder.SetInsertPoint(&F->getEntryBlock(), InsertPt);
1855 
1856   while (auto *Inst = dyn_cast_or_null<Instruction>(Storage)) {
1857     if (auto *LdInst = dyn_cast<LoadInst>(Inst)) {
1858       Storage = LdInst->getPointerOperand();
1859       // FIXME: This is a heuristic that works around the fact that
1860       // LLVM IR debug intrinsics cannot yet distinguish between
1861       // memory and value locations: Because a dbg.declare(alloca) is
1862       // implicitly a memory location no DW_OP_deref operation for the
1863       // last direct load from an alloca is necessary.  This condition
1864       // effectively drops the *last* DW_OP_deref in the expression.
1865       if (!SkipOutermostLoad)
1866         Expr = DIExpression::prepend(Expr, DIExpression::DerefBefore);
1867     } else if (auto *StInst = dyn_cast<StoreInst>(Inst)) {
1868       Storage = StInst->getValueOperand();
1869     } else {
1870       SmallVector<uint64_t, 16> Ops;
1871       SmallVector<Value *, 0> AdditionalValues;
1872       Value *Op = llvm::salvageDebugInfoImpl(
1873           *Inst, Expr ? Expr->getNumLocationOperands() : 0, Ops,
1874           AdditionalValues);
1875       if (!Op || !AdditionalValues.empty()) {
1876         // If salvaging failed or salvaging produced more than one location
1877         // operand, give up.
1878         break;
1879       }
1880       Storage = Op;
1881       Expr = DIExpression::appendOpsToArg(Expr, Ops, 0, /*StackValue*/ false);
1882     }
1883     SkipOutermostLoad = false;
1884   }
1885   if (!Storage)
1886     return std::nullopt;
1887 
1888   auto *StorageAsArg = dyn_cast<Argument>(Storage);
1889   const bool IsSwiftAsyncArg =
1890       StorageAsArg && StorageAsArg->hasAttribute(Attribute::SwiftAsync);
1891 
1892   // Swift async arguments are described by an entry value of the ABI-defined
1893   // register containing the coroutine context.
1894   // Entry values in variadic expressions are not supported.
1895   if (IsSwiftAsyncArg && UseEntryValue && !Expr->isEntryValue() &&
1896       Expr->isSingleLocationExpression())
1897     Expr = DIExpression::prepend(Expr, DIExpression::EntryValue);
1898 
1899   // If the coroutine frame is an Argument, store it in an alloca to improve
1900   // its availability (e.g. registers may be clobbered).
1901   // Avoid this if the value is guaranteed to be available through other means
1902   // (e.g. swift ABI guarantees).
1903   if (StorageAsArg && !IsSwiftAsyncArg) {
1904     auto &Cached = ArgToAllocaMap[StorageAsArg];
1905     if (!Cached) {
1906       Cached = Builder.CreateAlloca(Storage->getType(), 0, nullptr,
1907                                     Storage->getName() + ".debug");
1908       Builder.CreateStore(Storage, Cached);
1909     }
1910     Storage = Cached;
1911     // FIXME: LLVM lacks nuanced semantics to differentiate between
1912     // memory and direct locations at the IR level. The backend will
1913     // turn a dbg.declare(alloca, ..., DIExpression()) into a memory
1914     // location. Thus, if there are deref and offset operations in the
1915     // expression, we need to add a DW_OP_deref at the *start* of the
1916     // expression to first load the contents of the alloca before
1917     // adjusting it with the expression.
1918     Expr = DIExpression::prepend(Expr, DIExpression::DerefBefore);
1919   }
1920 
1921   Expr = Expr->foldConstantMath();
1922   return {{*Storage, *Expr}};
1923 }
1924 
1925 void coro::salvageDebugInfo(
1926     SmallDenseMap<Argument *, AllocaInst *, 4> &ArgToAllocaMap,
1927     DbgVariableIntrinsic &DVI, bool UseEntryValue) {
1928 
1929   Function *F = DVI.getFunction();
1930   // Follow the pointer arithmetic all the way to the incoming
1931   // function argument and convert into a DIExpression.
1932   bool SkipOutermostLoad = !isa<DbgValueInst>(DVI);
1933   Value *OriginalStorage = DVI.getVariableLocationOp(0);
1934 
1935   auto SalvagedInfo =
1936       ::salvageDebugInfoImpl(ArgToAllocaMap, UseEntryValue, F, OriginalStorage,
1937                              DVI.getExpression(), SkipOutermostLoad);
1938   if (!SalvagedInfo)
1939     return;
1940 
1941   Value *Storage = &SalvagedInfo->first;
1942   DIExpression *Expr = &SalvagedInfo->second;
1943 
1944   DVI.replaceVariableLocationOp(OriginalStorage, Storage);
1945   DVI.setExpression(Expr);
1946   // We only hoist dbg.declare today since it doesn't make sense to hoist
1947   // dbg.value since it does not have the same function wide guarantees that
1948   // dbg.declare does.
1949   if (isa<DbgDeclareInst>(DVI)) {
1950     std::optional<BasicBlock::iterator> InsertPt;
1951     if (auto *I = dyn_cast<Instruction>(Storage)) {
1952       InsertPt = I->getInsertionPointAfterDef();
1953       // Update DILocation only if variable was not inlined.
1954       DebugLoc ILoc = I->getDebugLoc();
1955       DebugLoc DVILoc = DVI.getDebugLoc();
1956       if (ILoc && DVILoc &&
1957           DVILoc->getScope()->getSubprogram() ==
1958               ILoc->getScope()->getSubprogram())
1959         DVI.setDebugLoc(I->getDebugLoc());
1960     } else if (isa<Argument>(Storage))
1961       InsertPt = F->getEntryBlock().begin();
1962     if (InsertPt)
1963       DVI.moveBefore(*(*InsertPt)->getParent(), *InsertPt);
1964   }
1965 }
1966 
1967 void coro::salvageDebugInfo(
1968     SmallDenseMap<Argument *, AllocaInst *, 4> &ArgToAllocaMap,
1969     DbgVariableRecord &DVR, bool UseEntryValue) {
1970 
1971   Function *F = DVR.getFunction();
1972   // Follow the pointer arithmetic all the way to the incoming
1973   // function argument and convert into a DIExpression.
1974   bool SkipOutermostLoad = DVR.isDbgDeclare();
1975   Value *OriginalStorage = DVR.getVariableLocationOp(0);
1976 
1977   auto SalvagedInfo =
1978       ::salvageDebugInfoImpl(ArgToAllocaMap, UseEntryValue, F, OriginalStorage,
1979                              DVR.getExpression(), SkipOutermostLoad);
1980   if (!SalvagedInfo)
1981     return;
1982 
1983   Value *Storage = &SalvagedInfo->first;
1984   DIExpression *Expr = &SalvagedInfo->second;
1985 
1986   DVR.replaceVariableLocationOp(OriginalStorage, Storage);
1987   DVR.setExpression(Expr);
1988   // We only hoist dbg.declare today since it doesn't make sense to hoist
1989   // dbg.value since it does not have the same function wide guarantees that
1990   // dbg.declare does.
1991   if (DVR.getType() == DbgVariableRecord::LocationType::Declare) {
1992     std::optional<BasicBlock::iterator> InsertPt;
1993     if (auto *I = dyn_cast<Instruction>(Storage)) {
1994       InsertPt = I->getInsertionPointAfterDef();
1995       // Update DILocation only if variable was not inlined.
1996       DebugLoc ILoc = I->getDebugLoc();
1997       DebugLoc DVRLoc = DVR.getDebugLoc();
1998       if (ILoc && DVRLoc &&
1999           DVRLoc->getScope()->getSubprogram() ==
2000               ILoc->getScope()->getSubprogram())
2001         DVR.setDebugLoc(ILoc);
2002     } else if (isa<Argument>(Storage))
2003       InsertPt = F->getEntryBlock().begin();
2004     if (InsertPt) {
2005       DVR.removeFromParent();
2006       (*InsertPt)->getParent()->insertDbgRecordBefore(&DVR, *InsertPt);
2007     }
2008   }
2009 }
2010 
2011 void coro::normalizeCoroutine(Function &F, coro::Shape &Shape,
2012                               TargetTransformInfo &TTI) {
2013   // Don't eliminate swifterror in async functions that won't be split.
2014   if (Shape.ABI != coro::ABI::Async || !Shape.CoroSuspends.empty())
2015     eliminateSwiftError(F, Shape);
2016 
2017   if (Shape.ABI == coro::ABI::Switch &&
2018       Shape.SwitchLowering.PromiseAlloca) {
2019     Shape.getSwitchCoroId()->clearPromise();
2020   }
2021 
2022   // Make sure that all coro.save, coro.suspend and the fallthrough coro.end
2023   // intrinsics are in their own blocks to simplify the logic of building up
2024   // SuspendCrossing data.
2025   for (auto *CSI : Shape.CoroSuspends) {
2026     if (auto *Save = CSI->getCoroSave())
2027       splitAround(Save, "CoroSave");
2028     splitAround(CSI, "CoroSuspend");
2029   }
2030 
2031   // Put CoroEnds into their own blocks.
2032   for (AnyCoroEndInst *CE : Shape.CoroEnds) {
2033     splitAround(CE, "CoroEnd");
2034 
2035     // Emit the musttail call function in a new block before the CoroEnd.
2036     // We do this here so that the right suspend crossing info is computed for
2037     // the uses of the musttail call function call. (Arguments to the coro.end
2038     // instructions would be ignored)
2039     if (auto *AsyncEnd = dyn_cast<CoroAsyncEndInst>(CE)) {
2040       auto *MustTailCallFn = AsyncEnd->getMustTailCallFunction();
2041       if (!MustTailCallFn)
2042         continue;
2043       IRBuilder<> Builder(AsyncEnd);
2044       SmallVector<Value *, 8> Args(AsyncEnd->args());
2045       auto Arguments = ArrayRef<Value *>(Args).drop_front(3);
2046       auto *Call = coro::createMustTailCall(
2047           AsyncEnd->getDebugLoc(), MustTailCallFn, TTI, Arguments, Builder);
2048       splitAround(Call, "MustTailCall.Before.CoroEnd");
2049     }
2050   }
2051 
2052   // Later code makes structural assumptions about single predecessors phis e.g
2053   // that they are not live across a suspend point.
2054   cleanupSinglePredPHIs(F);
2055 
2056   // Transforms multi-edge PHI Nodes, so that any value feeding into a PHI will
2057   // never have its definition separated from the PHI by the suspend point.
2058   rewritePHIs(F);
2059 }
2060 
2061 void coro::BaseABI::buildCoroutineFrame(bool OptimizeFrame) {
2062   SuspendCrossingInfo Checker(F, Shape.CoroSuspends, Shape.CoroEnds);
2063   doRematerializations(F, Checker, IsMaterializable);
2064 
2065   const DominatorTree DT(F);
2066   if (Shape.ABI != coro::ABI::Async && Shape.ABI != coro::ABI::Retcon &&
2067       Shape.ABI != coro::ABI::RetconOnce)
2068     sinkLifetimeStartMarkers(F, Shape, Checker, DT);
2069 
2070   // All values (that are not allocas) that needs to be spilled to the frame.
2071   coro::SpillInfo Spills;
2072   // All values defined as allocas that need to live in the frame.
2073   SmallVector<coro::AllocaInfo, 8> Allocas;
2074 
2075   // Collect the spills for arguments and other not-materializable values.
2076   coro::collectSpillsFromArgs(Spills, F, Checker);
2077   SmallVector<Instruction *, 4> DeadInstructions;
2078   SmallVector<CoroAllocaAllocInst *, 4> LocalAllocas;
2079   coro::collectSpillsAndAllocasFromInsts(Spills, Allocas, DeadInstructions,
2080                                          LocalAllocas, F, Checker, DT, Shape);
2081   coro::collectSpillsFromDbgInfo(Spills, F, Checker);
2082 
2083   LLVM_DEBUG(dumpAllocas(Allocas));
2084   LLVM_DEBUG(dumpSpills("Spills", Spills));
2085 
2086   if (Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce ||
2087       Shape.ABI == coro::ABI::Async)
2088     sinkSpillUsesAfterCoroBegin(DT, Shape.CoroBegin, Spills, Allocas);
2089 
2090   // Build frame
2091   FrameDataInfo FrameData(Spills, Allocas);
2092   Shape.FrameTy = buildFrameType(F, Shape, FrameData, OptimizeFrame);
2093   Shape.FramePtr = Shape.CoroBegin;
2094   // For now, this works for C++ programs only.
2095   buildFrameDebugInfo(F, Shape, FrameData);
2096   // Insert spills and reloads
2097   insertSpills(FrameData, Shape);
2098   lowerLocalAllocas(LocalAllocas, DeadInstructions);
2099 
2100   for (auto *I : DeadInstructions)
2101     I->eraseFromParent();
2102 }
2103