xref: /llvm-project/llvm/lib/Transforms/Coroutines/CoroFrame.cpp (revision 2e414799d0ad511cd7999895014a2cae2ea5e3e3)
1 //===- CoroFrame.cpp - Builds and manipulates coroutine frame -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 // This file contains classes used to discover if for a particular value
9 // its definition precedes and its uses follow a suspend block. This is
10 // referred to as a suspend crossing value.
11 //
12 // Using the information discovered we form a Coroutine Frame structure to
13 // contain those values. All uses of those values are replaced with appropriate
14 // GEP + load from the coroutine frame. At the point of the definition we spill
15 // the value into the coroutine frame.
16 //===----------------------------------------------------------------------===//
17 
18 #include "ABI.h"
19 #include "CoroInternal.h"
20 #include "MaterializationUtils.h"
21 #include "SpillUtils.h"
22 #include "SuspendCrossingInfo.h"
23 #include "llvm/ADT/BitVector.h"
24 #include "llvm/ADT/ScopeExit.h"
25 #include "llvm/ADT/SmallString.h"
26 #include "llvm/Analysis/StackLifetime.h"
27 #include "llvm/IR/DIBuilder.h"
28 #include "llvm/IR/DebugInfo.h"
29 #include "llvm/IR/Dominators.h"
30 #include "llvm/IR/IRBuilder.h"
31 #include "llvm/IR/InstIterator.h"
32 #include "llvm/IR/IntrinsicInst.h"
33 #include "llvm/IR/Module.h"
34 #include "llvm/Support/Debug.h"
35 #include "llvm/Support/OptimizedStructLayout.h"
36 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
37 #include "llvm/Transforms/Utils/Local.h"
38 #include "llvm/Transforms/Utils/PromoteMemToReg.h"
39 #include <algorithm>
40 #include <optional>
41 
42 using namespace llvm;
43 
44 extern cl::opt<bool> UseNewDbgInfoFormat;
45 
46 #define DEBUG_TYPE "coro-frame"
47 
48 namespace {
49 class FrameTypeBuilder;
50 // Mapping from the to-be-spilled value to all the users that need reload.
51 struct FrameDataInfo {
52   // All the values (that are not allocas) that needs to be spilled to the
53   // frame.
54   coro::SpillInfo &Spills;
55   // Allocas contains all values defined as allocas that need to live in the
56   // frame.
57   SmallVectorImpl<coro::AllocaInfo> &Allocas;
58 
59   FrameDataInfo(coro::SpillInfo &Spills,
60                 SmallVectorImpl<coro::AllocaInfo> &Allocas)
61       : Spills(Spills), Allocas(Allocas) {}
62 
63   SmallVector<Value *, 8> getAllDefs() const {
64     SmallVector<Value *, 8> Defs;
65     for (const auto &P : Spills)
66       Defs.push_back(P.first);
67     for (const auto &A : Allocas)
68       Defs.push_back(A.Alloca);
69     return Defs;
70   }
71 
72   uint32_t getFieldIndex(Value *V) const {
73     auto Itr = FieldIndexMap.find(V);
74     assert(Itr != FieldIndexMap.end() &&
75            "Value does not have a frame field index");
76     return Itr->second;
77   }
78 
79   void setFieldIndex(Value *V, uint32_t Index) {
80     assert((LayoutIndexUpdateStarted || FieldIndexMap.count(V) == 0) &&
81            "Cannot set the index for the same field twice.");
82     FieldIndexMap[V] = Index;
83   }
84 
85   Align getAlign(Value *V) const {
86     auto Iter = FieldAlignMap.find(V);
87     assert(Iter != FieldAlignMap.end());
88     return Iter->second;
89   }
90 
91   void setAlign(Value *V, Align AL) {
92     assert(FieldAlignMap.count(V) == 0);
93     FieldAlignMap.insert({V, AL});
94   }
95 
96   uint64_t getDynamicAlign(Value *V) const {
97     auto Iter = FieldDynamicAlignMap.find(V);
98     assert(Iter != FieldDynamicAlignMap.end());
99     return Iter->second;
100   }
101 
102   void setDynamicAlign(Value *V, uint64_t Align) {
103     assert(FieldDynamicAlignMap.count(V) == 0);
104     FieldDynamicAlignMap.insert({V, Align});
105   }
106 
107   uint64_t getOffset(Value *V) const {
108     auto Iter = FieldOffsetMap.find(V);
109     assert(Iter != FieldOffsetMap.end());
110     return Iter->second;
111   }
112 
113   void setOffset(Value *V, uint64_t Offset) {
114     assert(FieldOffsetMap.count(V) == 0);
115     FieldOffsetMap.insert({V, Offset});
116   }
117 
118   // Remap the index of every field in the frame, using the final layout index.
119   void updateLayoutIndex(FrameTypeBuilder &B);
120 
121 private:
122   // LayoutIndexUpdateStarted is used to avoid updating the index of any field
123   // twice by mistake.
124   bool LayoutIndexUpdateStarted = false;
125   // Map from values to their slot indexes on the frame. They will be first set
126   // with their original insertion field index. After the frame is built, their
127   // indexes will be updated into the final layout index.
128   DenseMap<Value *, uint32_t> FieldIndexMap;
129   // Map from values to their alignment on the frame. They would be set after
130   // the frame is built.
131   DenseMap<Value *, Align> FieldAlignMap;
132   DenseMap<Value *, uint64_t> FieldDynamicAlignMap;
133   // Map from values to their offset on the frame. They would be set after
134   // the frame is built.
135   DenseMap<Value *, uint64_t> FieldOffsetMap;
136 };
137 } // namespace
138 
139 #ifndef NDEBUG
140 static void dumpSpills(StringRef Title, const coro::SpillInfo &Spills) {
141   dbgs() << "------------- " << Title << " --------------\n";
142   for (const auto &E : Spills) {
143     E.first->dump();
144     dbgs() << "   user: ";
145     for (auto *I : E.second)
146       I->dump();
147   }
148 }
149 
150 static void dumpAllocas(const SmallVectorImpl<coro::AllocaInfo> &Allocas) {
151   dbgs() << "------------- Allocas --------------\n";
152   for (const auto &A : Allocas) {
153     A.Alloca->dump();
154   }
155 }
156 #endif
157 
158 namespace {
159 using FieldIDType = size_t;
160 // We cannot rely solely on natural alignment of a type when building a
161 // coroutine frame and if the alignment specified on the Alloca instruction
162 // differs from the natural alignment of the alloca type we will need to insert
163 // padding.
164 class FrameTypeBuilder {
165 private:
166   struct Field {
167     uint64_t Size;
168     uint64_t Offset;
169     Type *Ty;
170     FieldIDType LayoutFieldIndex;
171     Align Alignment;
172     Align TyAlignment;
173     uint64_t DynamicAlignBuffer;
174   };
175 
176   const DataLayout &DL;
177   LLVMContext &Context;
178   uint64_t StructSize = 0;
179   Align StructAlign;
180   bool IsFinished = false;
181 
182   std::optional<Align> MaxFrameAlignment;
183 
184   SmallVector<Field, 8> Fields;
185   DenseMap<Value*, unsigned> FieldIndexByKey;
186 
187 public:
188   FrameTypeBuilder(LLVMContext &Context, const DataLayout &DL,
189                    std::optional<Align> MaxFrameAlignment)
190       : DL(DL), Context(Context), MaxFrameAlignment(MaxFrameAlignment) {}
191 
192   /// Add a field to this structure for the storage of an `alloca`
193   /// instruction.
194   [[nodiscard]] FieldIDType addFieldForAlloca(AllocaInst *AI,
195                                               bool IsHeader = false) {
196     Type *Ty = AI->getAllocatedType();
197 
198     // Make an array type if this is a static array allocation.
199     if (AI->isArrayAllocation()) {
200       if (auto *CI = dyn_cast<ConstantInt>(AI->getArraySize()))
201         Ty = ArrayType::get(Ty, CI->getValue().getZExtValue());
202       else
203         report_fatal_error("Coroutines cannot handle non static allocas yet");
204     }
205 
206     return addField(Ty, AI->getAlign(), IsHeader);
207   }
208 
209   /// We want to put the allocas whose lifetime-ranges are not overlapped
210   /// into one slot of coroutine frame.
211   /// Consider the example at:https://bugs.llvm.org/show_bug.cgi?id=45566
212   ///
213   ///     cppcoro::task<void> alternative_paths(bool cond) {
214   ///         if (cond) {
215   ///             big_structure a;
216   ///             process(a);
217   ///             co_await something();
218   ///         } else {
219   ///             big_structure b;
220   ///             process2(b);
221   ///             co_await something();
222   ///         }
223   ///     }
224   ///
225   /// We want to put variable a and variable b in the same slot to
226   /// reduce the size of coroutine frame.
227   ///
228   /// This function use StackLifetime algorithm to partition the AllocaInsts in
229   /// Spills to non-overlapped sets in order to put Alloca in the same
230   /// non-overlapped set into the same slot in the Coroutine Frame. Then add
231   /// field for the allocas in the same non-overlapped set by using the largest
232   /// type as the field type.
233   ///
234   /// Side Effects: Because We sort the allocas, the order of allocas in the
235   /// frame may be different with the order in the source code.
236   void addFieldForAllocas(const Function &F, FrameDataInfo &FrameData,
237                           coro::Shape &Shape);
238 
239   /// Add a field to this structure.
240   [[nodiscard]] FieldIDType addField(Type *Ty, MaybeAlign MaybeFieldAlignment,
241                                      bool IsHeader = false,
242                                      bool IsSpillOfValue = false) {
243     assert(!IsFinished && "adding fields to a finished builder");
244     assert(Ty && "must provide a type for a field");
245 
246     // The field size is always the alloc size of the type.
247     uint64_t FieldSize = DL.getTypeAllocSize(Ty);
248 
249     // For an alloca with size=0, we don't need to add a field and they
250     // can just point to any index in the frame. Use index 0.
251     if (FieldSize == 0) {
252       return 0;
253     }
254 
255     // The field alignment might not be the type alignment, but we need
256     // to remember the type alignment anyway to build the type.
257     // If we are spilling values we don't need to worry about ABI alignment
258     // concerns.
259     Align ABIAlign = DL.getABITypeAlign(Ty);
260     Align TyAlignment = ABIAlign;
261     if (IsSpillOfValue && MaxFrameAlignment && *MaxFrameAlignment < ABIAlign)
262       TyAlignment = *MaxFrameAlignment;
263     Align FieldAlignment = MaybeFieldAlignment.value_or(TyAlignment);
264 
265     // The field alignment could be bigger than the max frame case, in that case
266     // we request additional storage to be able to dynamically align the
267     // pointer.
268     uint64_t DynamicAlignBuffer = 0;
269     if (MaxFrameAlignment && (FieldAlignment > *MaxFrameAlignment)) {
270       DynamicAlignBuffer =
271           offsetToAlignment(MaxFrameAlignment->value(), FieldAlignment);
272       FieldAlignment = *MaxFrameAlignment;
273       FieldSize = FieldSize + DynamicAlignBuffer;
274     }
275 
276     // Lay out header fields immediately.
277     uint64_t Offset;
278     if (IsHeader) {
279       Offset = alignTo(StructSize, FieldAlignment);
280       StructSize = Offset + FieldSize;
281 
282       // Everything else has a flexible offset.
283     } else {
284       Offset = OptimizedStructLayoutField::FlexibleOffset;
285     }
286 
287     Fields.push_back({FieldSize, Offset, Ty, 0, FieldAlignment, TyAlignment,
288                       DynamicAlignBuffer});
289     return Fields.size() - 1;
290   }
291 
292   /// Finish the layout and set the body on the given type.
293   void finish(StructType *Ty);
294 
295   uint64_t getStructSize() const {
296     assert(IsFinished && "not yet finished!");
297     return StructSize;
298   }
299 
300   Align getStructAlign() const {
301     assert(IsFinished && "not yet finished!");
302     return StructAlign;
303   }
304 
305   FieldIDType getLayoutFieldIndex(FieldIDType Id) const {
306     assert(IsFinished && "not yet finished!");
307     return Fields[Id].LayoutFieldIndex;
308   }
309 
310   Field getLayoutField(FieldIDType Id) const {
311     assert(IsFinished && "not yet finished!");
312     return Fields[Id];
313   }
314 };
315 } // namespace
316 
317 void FrameDataInfo::updateLayoutIndex(FrameTypeBuilder &B) {
318   auto Updater = [&](Value *I) {
319     auto Field = B.getLayoutField(getFieldIndex(I));
320     setFieldIndex(I, Field.LayoutFieldIndex);
321     setAlign(I, Field.Alignment);
322     uint64_t dynamicAlign =
323         Field.DynamicAlignBuffer
324             ? Field.DynamicAlignBuffer + Field.Alignment.value()
325             : 0;
326     setDynamicAlign(I, dynamicAlign);
327     setOffset(I, Field.Offset);
328   };
329   LayoutIndexUpdateStarted = true;
330   for (auto &S : Spills)
331     Updater(S.first);
332   for (const auto &A : Allocas)
333     Updater(A.Alloca);
334   LayoutIndexUpdateStarted = false;
335 }
336 
337 void FrameTypeBuilder::addFieldForAllocas(const Function &F,
338                                           FrameDataInfo &FrameData,
339                                           coro::Shape &Shape) {
340   using AllocaSetType = SmallVector<AllocaInst *, 4>;
341   SmallVector<AllocaSetType, 4> NonOverlapedAllocas;
342 
343   // We need to add field for allocas at the end of this function.
344   auto AddFieldForAllocasAtExit = make_scope_exit([&]() {
345     for (auto AllocaList : NonOverlapedAllocas) {
346       auto *LargestAI = *AllocaList.begin();
347       FieldIDType Id = addFieldForAlloca(LargestAI);
348       for (auto *Alloca : AllocaList)
349         FrameData.setFieldIndex(Alloca, Id);
350     }
351   });
352 
353   if (!Shape.OptimizeFrame) {
354     for (const auto &A : FrameData.Allocas) {
355       AllocaInst *Alloca = A.Alloca;
356       NonOverlapedAllocas.emplace_back(AllocaSetType(1, Alloca));
357     }
358     return;
359   }
360 
361   // Because there are paths from the lifetime.start to coro.end
362   // for each alloca, the liferanges for every alloca is overlaped
363   // in the blocks who contain coro.end and the successor blocks.
364   // So we choose to skip there blocks when we calculate the liferange
365   // for each alloca. It should be reasonable since there shouldn't be uses
366   // in these blocks and the coroutine frame shouldn't be used outside the
367   // coroutine body.
368   //
369   // Note that the user of coro.suspend may not be SwitchInst. However, this
370   // case seems too complex to handle. And it is harmless to skip these
371   // patterns since it just prevend putting the allocas to live in the same
372   // slot.
373   DenseMap<SwitchInst *, BasicBlock *> DefaultSuspendDest;
374   for (auto *CoroSuspendInst : Shape.CoroSuspends) {
375     for (auto *U : CoroSuspendInst->users()) {
376       if (auto *ConstSWI = dyn_cast<SwitchInst>(U)) {
377         auto *SWI = const_cast<SwitchInst *>(ConstSWI);
378         DefaultSuspendDest[SWI] = SWI->getDefaultDest();
379         SWI->setDefaultDest(SWI->getSuccessor(1));
380       }
381     }
382   }
383 
384   auto ExtractAllocas = [&]() {
385     AllocaSetType Allocas;
386     Allocas.reserve(FrameData.Allocas.size());
387     for (const auto &A : FrameData.Allocas)
388       Allocas.push_back(A.Alloca);
389     return Allocas;
390   };
391   StackLifetime StackLifetimeAnalyzer(F, ExtractAllocas(),
392                                       StackLifetime::LivenessType::May);
393   StackLifetimeAnalyzer.run();
394   auto DoAllocasInterfere = [&](const AllocaInst *AI1, const AllocaInst *AI2) {
395     return StackLifetimeAnalyzer.getLiveRange(AI1).overlaps(
396         StackLifetimeAnalyzer.getLiveRange(AI2));
397   };
398   auto GetAllocaSize = [&](const coro::AllocaInfo &A) {
399     std::optional<TypeSize> RetSize = A.Alloca->getAllocationSize(DL);
400     assert(RetSize && "Variable Length Arrays (VLA) are not supported.\n");
401     assert(!RetSize->isScalable() && "Scalable vectors are not yet supported");
402     return RetSize->getFixedValue();
403   };
404   // Put larger allocas in the front. So the larger allocas have higher
405   // priority to merge, which can save more space potentially. Also each
406   // AllocaSet would be ordered. So we can get the largest Alloca in one
407   // AllocaSet easily.
408   sort(FrameData.Allocas, [&](const auto &Iter1, const auto &Iter2) {
409     return GetAllocaSize(Iter1) > GetAllocaSize(Iter2);
410   });
411   for (const auto &A : FrameData.Allocas) {
412     AllocaInst *Alloca = A.Alloca;
413     bool Merged = false;
414     // Try to find if the Alloca does not interfere with any existing
415     // NonOverlappedAllocaSet. If it is true, insert the alloca to that
416     // NonOverlappedAllocaSet.
417     for (auto &AllocaSet : NonOverlapedAllocas) {
418       assert(!AllocaSet.empty() && "Processing Alloca Set is not empty.\n");
419       bool NoInterference = none_of(AllocaSet, [&](auto Iter) {
420         return DoAllocasInterfere(Alloca, Iter);
421       });
422       // If the alignment of A is multiple of the alignment of B, the address
423       // of A should satisfy the requirement for aligning for B.
424       //
425       // There may be other more fine-grained strategies to handle the alignment
426       // infomation during the merging process. But it seems hard to handle
427       // these strategies and benefit little.
428       bool Alignable = [&]() -> bool {
429         auto *LargestAlloca = *AllocaSet.begin();
430         return LargestAlloca->getAlign().value() % Alloca->getAlign().value() ==
431                0;
432       }();
433       bool CouldMerge = NoInterference && Alignable;
434       if (!CouldMerge)
435         continue;
436       AllocaSet.push_back(Alloca);
437       Merged = true;
438       break;
439     }
440     if (!Merged) {
441       NonOverlapedAllocas.emplace_back(AllocaSetType(1, Alloca));
442     }
443   }
444   // Recover the default target destination for each Switch statement
445   // reserved.
446   for (auto SwitchAndDefaultDest : DefaultSuspendDest) {
447     SwitchInst *SWI = SwitchAndDefaultDest.first;
448     BasicBlock *DestBB = SwitchAndDefaultDest.second;
449     SWI->setDefaultDest(DestBB);
450   }
451   // This Debug Info could tell us which allocas are merged into one slot.
452   LLVM_DEBUG(for (auto &AllocaSet
453                   : NonOverlapedAllocas) {
454     if (AllocaSet.size() > 1) {
455       dbgs() << "In Function:" << F.getName() << "\n";
456       dbgs() << "Find Union Set "
457              << "\n";
458       dbgs() << "\tAllocas are \n";
459       for (auto Alloca : AllocaSet)
460         dbgs() << "\t\t" << *Alloca << "\n";
461     }
462   });
463 }
464 
465 void FrameTypeBuilder::finish(StructType *Ty) {
466   assert(!IsFinished && "already finished!");
467 
468   // Prepare the optimal-layout field array.
469   // The Id in the layout field is a pointer to our Field for it.
470   SmallVector<OptimizedStructLayoutField, 8> LayoutFields;
471   LayoutFields.reserve(Fields.size());
472   for (auto &Field : Fields) {
473     LayoutFields.emplace_back(&Field, Field.Size, Field.Alignment,
474                               Field.Offset);
475   }
476 
477   // Perform layout.
478   auto SizeAndAlign = performOptimizedStructLayout(LayoutFields);
479   StructSize = SizeAndAlign.first;
480   StructAlign = SizeAndAlign.second;
481 
482   auto getField = [](const OptimizedStructLayoutField &LayoutField) -> Field & {
483     return *static_cast<Field *>(const_cast<void*>(LayoutField.Id));
484   };
485 
486   // We need to produce a packed struct type if there's a field whose
487   // assigned offset isn't a multiple of its natural type alignment.
488   bool Packed = [&] {
489     for (auto &LayoutField : LayoutFields) {
490       auto &F = getField(LayoutField);
491       if (!isAligned(F.TyAlignment, LayoutField.Offset))
492         return true;
493     }
494     return false;
495   }();
496 
497   // Build the struct body.
498   SmallVector<Type*, 16> FieldTypes;
499   FieldTypes.reserve(LayoutFields.size() * 3 / 2);
500   uint64_t LastOffset = 0;
501   for (auto &LayoutField : LayoutFields) {
502     auto &F = getField(LayoutField);
503 
504     auto Offset = LayoutField.Offset;
505 
506     // Add a padding field if there's a padding gap and we're either
507     // building a packed struct or the padding gap is more than we'd
508     // get from aligning to the field type's natural alignment.
509     assert(Offset >= LastOffset);
510     if (Offset != LastOffset) {
511       if (Packed || alignTo(LastOffset, F.TyAlignment) != Offset)
512         FieldTypes.push_back(ArrayType::get(Type::getInt8Ty(Context),
513                                             Offset - LastOffset));
514     }
515 
516     F.Offset = Offset;
517     F.LayoutFieldIndex = FieldTypes.size();
518 
519     FieldTypes.push_back(F.Ty);
520     if (F.DynamicAlignBuffer) {
521       FieldTypes.push_back(
522           ArrayType::get(Type::getInt8Ty(Context), F.DynamicAlignBuffer));
523     }
524     LastOffset = Offset + F.Size;
525   }
526 
527   Ty->setBody(FieldTypes, Packed);
528 
529 #ifndef NDEBUG
530   // Check that the IR layout matches the offsets we expect.
531   auto Layout = DL.getStructLayout(Ty);
532   for (auto &F : Fields) {
533     assert(Ty->getElementType(F.LayoutFieldIndex) == F.Ty);
534     assert(Layout->getElementOffset(F.LayoutFieldIndex) == F.Offset);
535   }
536 #endif
537 
538   IsFinished = true;
539 }
540 
541 static void cacheDIVar(FrameDataInfo &FrameData,
542                        DenseMap<Value *, DILocalVariable *> &DIVarCache) {
543   for (auto *V : FrameData.getAllDefs()) {
544     if (DIVarCache.contains(V))
545       continue;
546 
547     auto CacheIt = [&DIVarCache, V](const auto &Container) {
548       auto *I = llvm::find_if(Container, [](auto *DDI) {
549         return DDI->getExpression()->getNumElements() == 0;
550       });
551       if (I != Container.end())
552         DIVarCache.insert({V, (*I)->getVariable()});
553     };
554     CacheIt(findDbgDeclares(V));
555     CacheIt(findDVRDeclares(V));
556   }
557 }
558 
559 /// Create name for Type. It uses MDString to store new created string to
560 /// avoid memory leak.
561 static StringRef solveTypeName(Type *Ty) {
562   if (Ty->isIntegerTy()) {
563     // The longest name in common may be '__int_128', which has 9 bits.
564     SmallString<16> Buffer;
565     raw_svector_ostream OS(Buffer);
566     OS << "__int_" << cast<IntegerType>(Ty)->getBitWidth();
567     auto *MDName = MDString::get(Ty->getContext(), OS.str());
568     return MDName->getString();
569   }
570 
571   if (Ty->isFloatingPointTy()) {
572     if (Ty->isFloatTy())
573       return "__float_";
574     if (Ty->isDoubleTy())
575       return "__double_";
576     return "__floating_type_";
577   }
578 
579   if (Ty->isPointerTy())
580     return "PointerType";
581 
582   if (Ty->isStructTy()) {
583     if (!cast<StructType>(Ty)->hasName())
584       return "__LiteralStructType_";
585 
586     auto Name = Ty->getStructName();
587 
588     SmallString<16> Buffer(Name);
589     for (auto &Iter : Buffer)
590       if (Iter == '.' || Iter == ':')
591         Iter = '_';
592     auto *MDName = MDString::get(Ty->getContext(), Buffer.str());
593     return MDName->getString();
594   }
595 
596   return "UnknownType";
597 }
598 
599 static DIType *solveDIType(DIBuilder &Builder, Type *Ty,
600                            const DataLayout &Layout, DIScope *Scope,
601                            unsigned LineNum,
602                            DenseMap<Type *, DIType *> &DITypeCache) {
603   if (DIType *DT = DITypeCache.lookup(Ty))
604     return DT;
605 
606   StringRef Name = solveTypeName(Ty);
607 
608   DIType *RetType = nullptr;
609 
610   if (Ty->isIntegerTy()) {
611     auto BitWidth = cast<IntegerType>(Ty)->getBitWidth();
612     RetType = Builder.createBasicType(Name, BitWidth, dwarf::DW_ATE_signed,
613                                       llvm::DINode::FlagArtificial);
614   } else if (Ty->isFloatingPointTy()) {
615     RetType = Builder.createBasicType(Name, Layout.getTypeSizeInBits(Ty),
616                                       dwarf::DW_ATE_float,
617                                       llvm::DINode::FlagArtificial);
618   } else if (Ty->isPointerTy()) {
619     // Construct PointerType points to null (aka void *) instead of exploring
620     // pointee type to avoid infinite search problem. For example, we would be
621     // in trouble if we traverse recursively:
622     //
623     //  struct Node {
624     //      Node* ptr;
625     //  };
626     RetType =
627         Builder.createPointerType(nullptr, Layout.getTypeSizeInBits(Ty),
628                                   Layout.getABITypeAlign(Ty).value() * CHAR_BIT,
629                                   /*DWARFAddressSpace=*/std::nullopt, Name);
630   } else if (Ty->isStructTy()) {
631     auto *DIStruct = Builder.createStructType(
632         Scope, Name, Scope->getFile(), LineNum, Layout.getTypeSizeInBits(Ty),
633         Layout.getPrefTypeAlign(Ty).value() * CHAR_BIT,
634         llvm::DINode::FlagArtificial, nullptr, llvm::DINodeArray());
635 
636     auto *StructTy = cast<StructType>(Ty);
637     SmallVector<Metadata *, 16> Elements;
638     for (unsigned I = 0; I < StructTy->getNumElements(); I++) {
639       DIType *DITy = solveDIType(Builder, StructTy->getElementType(I), Layout,
640                                  Scope, LineNum, DITypeCache);
641       assert(DITy);
642       Elements.push_back(Builder.createMemberType(
643           Scope, DITy->getName(), Scope->getFile(), LineNum,
644           DITy->getSizeInBits(), DITy->getAlignInBits(),
645           Layout.getStructLayout(StructTy)->getElementOffsetInBits(I),
646           llvm::DINode::FlagArtificial, DITy));
647     }
648 
649     Builder.replaceArrays(DIStruct, Builder.getOrCreateArray(Elements));
650 
651     RetType = DIStruct;
652   } else {
653     LLVM_DEBUG(dbgs() << "Unresolved Type: " << *Ty << "\n");
654     TypeSize Size = Layout.getTypeSizeInBits(Ty);
655     auto *CharSizeType = Builder.createBasicType(
656         Name, 8, dwarf::DW_ATE_unsigned_char, llvm::DINode::FlagArtificial);
657 
658     if (Size <= 8)
659       RetType = CharSizeType;
660     else {
661       if (Size % 8 != 0)
662         Size = TypeSize::getFixed(Size + 8 - (Size % 8));
663 
664       RetType = Builder.createArrayType(
665           Size, Layout.getPrefTypeAlign(Ty).value(), CharSizeType,
666           Builder.getOrCreateArray(Builder.getOrCreateSubrange(0, Size / 8)));
667     }
668   }
669 
670   DITypeCache.insert({Ty, RetType});
671   return RetType;
672 }
673 
674 /// Build artificial debug info for C++ coroutine frames to allow users to
675 /// inspect the contents of the frame directly
676 ///
677 /// Create Debug information for coroutine frame with debug name "__coro_frame".
678 /// The debug information for the fields of coroutine frame is constructed from
679 /// the following way:
680 /// 1. For all the value in the Frame, we search the use of dbg.declare to find
681 ///    the corresponding debug variables for the value. If we can find the
682 ///    debug variable, we can get full and accurate debug information.
683 /// 2. If we can't get debug information in step 1 and 2, we could only try to
684 ///    build the DIType by Type. We did this in solveDIType. We only handle
685 ///    integer, float, double, integer type and struct type for now.
686 static void buildFrameDebugInfo(Function &F, coro::Shape &Shape,
687                                 FrameDataInfo &FrameData) {
688   DISubprogram *DIS = F.getSubprogram();
689   // If there is no DISubprogram for F, it implies the Function are not compiled
690   // with debug info. So we also don't need to generate debug info for the frame
691   // neither.
692   if (!DIS || !DIS->getUnit() ||
693       !dwarf::isCPlusPlus(
694           (dwarf::SourceLanguage)DIS->getUnit()->getSourceLanguage()) ||
695       DIS->getUnit()->getEmissionKind() != DICompileUnit::DebugEmissionKind::FullDebug)
696     return;
697 
698   assert(Shape.ABI == coro::ABI::Switch &&
699          "We could only build debug infomation for C++ coroutine now.\n");
700 
701   DIBuilder DBuilder(*F.getParent(), /*AllowUnresolved*/ false);
702 
703   assert(Shape.getPromiseAlloca() &&
704          "Coroutine with switch ABI should own Promise alloca");
705 
706   DIFile *DFile = DIS->getFile();
707   unsigned LineNum = DIS->getLine();
708 
709   DICompositeType *FrameDITy = DBuilder.createStructType(
710       DIS->getUnit(), Twine(F.getName() + ".coro_frame_ty").str(),
711       DFile, LineNum, Shape.FrameSize * 8,
712       Shape.FrameAlign.value() * 8, llvm::DINode::FlagArtificial, nullptr,
713       llvm::DINodeArray());
714   StructType *FrameTy = Shape.FrameTy;
715   SmallVector<Metadata *, 16> Elements;
716   DataLayout Layout = F.getDataLayout();
717 
718   DenseMap<Value *, DILocalVariable *> DIVarCache;
719   cacheDIVar(FrameData, DIVarCache);
720 
721   unsigned ResumeIndex = coro::Shape::SwitchFieldIndex::Resume;
722   unsigned DestroyIndex = coro::Shape::SwitchFieldIndex::Destroy;
723   unsigned IndexIndex = Shape.SwitchLowering.IndexField;
724 
725   DenseMap<unsigned, StringRef> NameCache;
726   NameCache.insert({ResumeIndex, "__resume_fn"});
727   NameCache.insert({DestroyIndex, "__destroy_fn"});
728   NameCache.insert({IndexIndex, "__coro_index"});
729 
730   Type *ResumeFnTy = FrameTy->getElementType(ResumeIndex),
731        *DestroyFnTy = FrameTy->getElementType(DestroyIndex),
732        *IndexTy = FrameTy->getElementType(IndexIndex);
733 
734   DenseMap<unsigned, DIType *> TyCache;
735   TyCache.insert(
736       {ResumeIndex, DBuilder.createPointerType(
737                         nullptr, Layout.getTypeSizeInBits(ResumeFnTy))});
738   TyCache.insert(
739       {DestroyIndex, DBuilder.createPointerType(
740                          nullptr, Layout.getTypeSizeInBits(DestroyFnTy))});
741 
742   /// FIXME: If we fill the field `SizeInBits` with the actual size of
743   /// __coro_index in bits, then __coro_index wouldn't show in the debugger.
744   TyCache.insert({IndexIndex, DBuilder.createBasicType(
745                                   "__coro_index",
746                                   (Layout.getTypeSizeInBits(IndexTy) < 8)
747                                       ? 8
748                                       : Layout.getTypeSizeInBits(IndexTy),
749                                   dwarf::DW_ATE_unsigned_char)});
750 
751   for (auto *V : FrameData.getAllDefs()) {
752     if (!DIVarCache.contains(V))
753       continue;
754 
755     auto Index = FrameData.getFieldIndex(V);
756 
757     NameCache.insert({Index, DIVarCache[V]->getName()});
758     TyCache.insert({Index, DIVarCache[V]->getType()});
759   }
760 
761   // Cache from index to (Align, Offset Pair)
762   DenseMap<unsigned, std::pair<unsigned, unsigned>> OffsetCache;
763   // The Align and Offset of Resume function and Destroy function are fixed.
764   OffsetCache.insert({ResumeIndex, {8, 0}});
765   OffsetCache.insert({DestroyIndex, {8, 8}});
766   OffsetCache.insert(
767       {IndexIndex,
768        {Shape.SwitchLowering.IndexAlign, Shape.SwitchLowering.IndexOffset}});
769 
770   for (auto *V : FrameData.getAllDefs()) {
771     auto Index = FrameData.getFieldIndex(V);
772 
773     OffsetCache.insert(
774         {Index, {FrameData.getAlign(V).value(), FrameData.getOffset(V)}});
775   }
776 
777   DenseMap<Type *, DIType *> DITypeCache;
778   // This counter is used to avoid same type names. e.g., there would be
779   // many i32 and i64 types in one coroutine. And we would use i32_0 and
780   // i32_1 to avoid the same type. Since it makes no sense the name of the
781   // fields confilicts with each other.
782   unsigned UnknownTypeNum = 0;
783   for (unsigned Index = 0; Index < FrameTy->getNumElements(); Index++) {
784     if (!OffsetCache.contains(Index))
785       continue;
786 
787     std::string Name;
788     uint64_t SizeInBits;
789     uint32_t AlignInBits;
790     uint64_t OffsetInBits;
791     DIType *DITy = nullptr;
792 
793     Type *Ty = FrameTy->getElementType(Index);
794     assert(Ty->isSized() && "We can't handle type which is not sized.\n");
795     SizeInBits = Layout.getTypeSizeInBits(Ty).getFixedValue();
796     AlignInBits = OffsetCache[Index].first * 8;
797     OffsetInBits = OffsetCache[Index].second * 8;
798 
799     if (NameCache.contains(Index)) {
800       Name = NameCache[Index].str();
801       DITy = TyCache[Index];
802     } else {
803       DITy = solveDIType(DBuilder, Ty, Layout, FrameDITy, LineNum, DITypeCache);
804       assert(DITy && "SolveDIType shouldn't return nullptr.\n");
805       Name = DITy->getName().str();
806       Name += "_" + std::to_string(UnknownTypeNum);
807       UnknownTypeNum++;
808     }
809 
810     Elements.push_back(DBuilder.createMemberType(
811         FrameDITy, Name, DFile, LineNum, SizeInBits, AlignInBits, OffsetInBits,
812         llvm::DINode::FlagArtificial, DITy));
813   }
814 
815   DBuilder.replaceArrays(FrameDITy, DBuilder.getOrCreateArray(Elements));
816 
817   auto *FrameDIVar =
818       DBuilder.createAutoVariable(DIS, "__coro_frame", DFile, LineNum,
819                                   FrameDITy, true, DINode::FlagArtificial);
820 
821   // Subprogram would have ContainedNodes field which records the debug
822   // variables it contained. So we need to add __coro_frame to the
823   // ContainedNodes of it.
824   //
825   // If we don't add __coro_frame to the RetainedNodes, user may get
826   // `no symbol __coro_frame in context` rather than `__coro_frame`
827   // is optimized out, which is more precise.
828   auto RetainedNodes = DIS->getRetainedNodes();
829   SmallVector<Metadata *, 32> RetainedNodesVec(RetainedNodes.begin(),
830                                                RetainedNodes.end());
831   RetainedNodesVec.push_back(FrameDIVar);
832   DIS->replaceOperandWith(7, (MDTuple::get(F.getContext(), RetainedNodesVec)));
833 
834   // Construct the location for the frame debug variable. The column number
835   // is fake but it should be fine.
836   DILocation *DILoc =
837       DILocation::get(DIS->getContext(), LineNum, /*Column=*/1, DIS);
838   assert(FrameDIVar->isValidLocationForIntrinsic(DILoc));
839 
840   if (UseNewDbgInfoFormat) {
841     DbgVariableRecord *NewDVR =
842         new DbgVariableRecord(ValueAsMetadata::get(Shape.FramePtr), FrameDIVar,
843                               DBuilder.createExpression(), DILoc,
844                               DbgVariableRecord::LocationType::Declare);
845     BasicBlock::iterator It = Shape.getInsertPtAfterFramePtr();
846     It->getParent()->insertDbgRecordBefore(NewDVR, It);
847   } else {
848     DBuilder.insertDeclare(Shape.FramePtr, FrameDIVar,
849                            DBuilder.createExpression(), DILoc,
850                            &*Shape.getInsertPtAfterFramePtr());
851   }
852 }
853 
854 // Build a struct that will keep state for an active coroutine.
855 //   struct f.frame {
856 //     ResumeFnTy ResumeFnAddr;
857 //     ResumeFnTy DestroyFnAddr;
858 //     ... promise (if present) ...
859 //     int ResumeIndex;
860 //     ... spills ...
861 //   };
862 static StructType *buildFrameType(Function &F, coro::Shape &Shape,
863                                   FrameDataInfo &FrameData) {
864   LLVMContext &C = F.getContext();
865   const DataLayout &DL = F.getDataLayout();
866   StructType *FrameTy = [&] {
867     SmallString<32> Name(F.getName());
868     Name.append(".Frame");
869     return StructType::create(C, Name);
870   }();
871 
872   // We will use this value to cap the alignment of spilled values.
873   std::optional<Align> MaxFrameAlignment;
874   if (Shape.ABI == coro::ABI::Async)
875     MaxFrameAlignment = Shape.AsyncLowering.getContextAlignment();
876   FrameTypeBuilder B(C, DL, MaxFrameAlignment);
877 
878   AllocaInst *PromiseAlloca = Shape.getPromiseAlloca();
879   std::optional<FieldIDType> SwitchIndexFieldId;
880 
881   if (Shape.ABI == coro::ABI::Switch) {
882     auto *FnPtrTy = PointerType::getUnqual(C);
883 
884     // Add header fields for the resume and destroy functions.
885     // We can rely on these being perfectly packed.
886     (void)B.addField(FnPtrTy, std::nullopt, /*header*/ true);
887     (void)B.addField(FnPtrTy, std::nullopt, /*header*/ true);
888 
889     // PromiseAlloca field needs to be explicitly added here because it's
890     // a header field with a fixed offset based on its alignment. Hence it
891     // needs special handling and cannot be added to FrameData.Allocas.
892     if (PromiseAlloca)
893       FrameData.setFieldIndex(
894           PromiseAlloca, B.addFieldForAlloca(PromiseAlloca, /*header*/ true));
895 
896     // Add a field to store the suspend index.  This doesn't need to
897     // be in the header.
898     unsigned IndexBits = std::max(1U, Log2_64_Ceil(Shape.CoroSuspends.size()));
899     Type *IndexType = Type::getIntNTy(C, IndexBits);
900 
901     SwitchIndexFieldId = B.addField(IndexType, std::nullopt);
902   } else {
903     assert(PromiseAlloca == nullptr && "lowering doesn't support promises");
904   }
905 
906   // Because multiple allocas may own the same field slot,
907   // we add allocas to field here.
908   B.addFieldForAllocas(F, FrameData, Shape);
909   // Add PromiseAlloca to Allocas list so that
910   // 1. updateLayoutIndex could update its index after
911   // `performOptimizedStructLayout`
912   // 2. it is processed in insertSpills.
913   if (Shape.ABI == coro::ABI::Switch && PromiseAlloca)
914     // We assume that the promise alloca won't be modified before
915     // CoroBegin and no alias will be create before CoroBegin.
916     FrameData.Allocas.emplace_back(
917         PromiseAlloca, DenseMap<Instruction *, std::optional<APInt>>{}, false);
918   // Create an entry for every spilled value.
919   for (auto &S : FrameData.Spills) {
920     Type *FieldType = S.first->getType();
921     // For byval arguments, we need to store the pointed value in the frame,
922     // instead of the pointer itself.
923     if (const Argument *A = dyn_cast<Argument>(S.first))
924       if (A->hasByValAttr())
925         FieldType = A->getParamByValType();
926     FieldIDType Id = B.addField(FieldType, std::nullopt, false /*header*/,
927                                 true /*IsSpillOfValue*/);
928     FrameData.setFieldIndex(S.first, Id);
929   }
930 
931   B.finish(FrameTy);
932   FrameData.updateLayoutIndex(B);
933   Shape.FrameAlign = B.getStructAlign();
934   Shape.FrameSize = B.getStructSize();
935 
936   switch (Shape.ABI) {
937   case coro::ABI::Switch: {
938     // In the switch ABI, remember the switch-index field.
939     auto IndexField = B.getLayoutField(*SwitchIndexFieldId);
940     Shape.SwitchLowering.IndexField = IndexField.LayoutFieldIndex;
941     Shape.SwitchLowering.IndexAlign = IndexField.Alignment.value();
942     Shape.SwitchLowering.IndexOffset = IndexField.Offset;
943 
944     // Also round the frame size up to a multiple of its alignment, as is
945     // generally expected in C/C++.
946     Shape.FrameSize = alignTo(Shape.FrameSize, Shape.FrameAlign);
947     break;
948   }
949 
950   // In the retcon ABI, remember whether the frame is inline in the storage.
951   case coro::ABI::Retcon:
952   case coro::ABI::RetconOnce: {
953     auto Id = Shape.getRetconCoroId();
954     Shape.RetconLowering.IsFrameInlineInStorage
955       = (B.getStructSize() <= Id->getStorageSize() &&
956          B.getStructAlign() <= Id->getStorageAlignment());
957     break;
958   }
959   case coro::ABI::Async: {
960     Shape.AsyncLowering.FrameOffset =
961         alignTo(Shape.AsyncLowering.ContextHeaderSize, Shape.FrameAlign);
962     // Also make the final context size a multiple of the context alignment to
963     // make allocation easier for allocators.
964     Shape.AsyncLowering.ContextSize =
965         alignTo(Shape.AsyncLowering.FrameOffset + Shape.FrameSize,
966                 Shape.AsyncLowering.getContextAlignment());
967     if (Shape.AsyncLowering.getContextAlignment() < Shape.FrameAlign) {
968       report_fatal_error(
969           "The alignment requirment of frame variables cannot be higher than "
970           "the alignment of the async function context");
971     }
972     break;
973   }
974   }
975 
976   return FrameTy;
977 }
978 
979 // Replace all alloca and SSA values that are accessed across suspend points
980 // with GetElementPointer from coroutine frame + loads and stores. Create an
981 // AllocaSpillBB that will become the new entry block for the resume parts of
982 // the coroutine:
983 //
984 //    %hdl = coro.begin(...)
985 //    whatever
986 //
987 // becomes:
988 //
989 //    %hdl = coro.begin(...)
990 //    br label %AllocaSpillBB
991 //
992 //  AllocaSpillBB:
993 //    ; geps corresponding to allocas that were moved to coroutine frame
994 //    br label PostSpill
995 //
996 //  PostSpill:
997 //    whatever
998 //
999 //
1000 static void insertSpills(const FrameDataInfo &FrameData, coro::Shape &Shape) {
1001   LLVMContext &C = Shape.CoroBegin->getContext();
1002   Function *F = Shape.CoroBegin->getFunction();
1003   IRBuilder<> Builder(C);
1004   StructType *FrameTy = Shape.FrameTy;
1005   Value *FramePtr = Shape.FramePtr;
1006   DominatorTree DT(*F);
1007   SmallDenseMap<Argument *, AllocaInst *, 4> ArgToAllocaMap;
1008 
1009   // Create a GEP with the given index into the coroutine frame for the original
1010   // value Orig. Appends an extra 0 index for array-allocas, preserving the
1011   // original type.
1012   auto GetFramePointer = [&](Value *Orig) -> Value * {
1013     FieldIDType Index = FrameData.getFieldIndex(Orig);
1014     SmallVector<Value *, 3> Indices = {
1015         ConstantInt::get(Type::getInt32Ty(C), 0),
1016         ConstantInt::get(Type::getInt32Ty(C), Index),
1017     };
1018 
1019     if (auto *AI = dyn_cast<AllocaInst>(Orig)) {
1020       if (auto *CI = dyn_cast<ConstantInt>(AI->getArraySize())) {
1021         auto Count = CI->getValue().getZExtValue();
1022         if (Count > 1) {
1023           Indices.push_back(ConstantInt::get(Type::getInt32Ty(C), 0));
1024         }
1025       } else {
1026         report_fatal_error("Coroutines cannot handle non static allocas yet");
1027       }
1028     }
1029 
1030     auto GEP = cast<GetElementPtrInst>(
1031         Builder.CreateInBoundsGEP(FrameTy, FramePtr, Indices));
1032     if (auto *AI = dyn_cast<AllocaInst>(Orig)) {
1033       if (FrameData.getDynamicAlign(Orig) != 0) {
1034         assert(FrameData.getDynamicAlign(Orig) == AI->getAlign().value());
1035         auto *M = AI->getModule();
1036         auto *IntPtrTy = M->getDataLayout().getIntPtrType(AI->getType());
1037         auto *PtrValue = Builder.CreatePtrToInt(GEP, IntPtrTy);
1038         auto *AlignMask =
1039             ConstantInt::get(IntPtrTy, AI->getAlign().value() - 1);
1040         PtrValue = Builder.CreateAdd(PtrValue, AlignMask);
1041         PtrValue = Builder.CreateAnd(PtrValue, Builder.CreateNot(AlignMask));
1042         return Builder.CreateIntToPtr(PtrValue, AI->getType());
1043       }
1044       // If the type of GEP is not equal to the type of AllocaInst, it implies
1045       // that the AllocaInst may be reused in the Frame slot of other
1046       // AllocaInst. So We cast GEP to the AllocaInst here to re-use
1047       // the Frame storage.
1048       //
1049       // Note: If we change the strategy dealing with alignment, we need to refine
1050       // this casting.
1051       if (GEP->getType() != Orig->getType())
1052         return Builder.CreateAddrSpaceCast(GEP, Orig->getType(),
1053                                            Orig->getName() + Twine(".cast"));
1054     }
1055     return GEP;
1056   };
1057 
1058   for (auto const &E : FrameData.Spills) {
1059     Value *Def = E.first;
1060     auto SpillAlignment = Align(FrameData.getAlign(Def));
1061     // Create a store instruction storing the value into the
1062     // coroutine frame.
1063     BasicBlock::iterator InsertPt = coro::getSpillInsertionPt(Shape, Def, DT);
1064 
1065     Type *ByValTy = nullptr;
1066     if (auto *Arg = dyn_cast<Argument>(Def)) {
1067       // If we're spilling an Argument, make sure we clear 'nocapture'
1068       // from the coroutine function.
1069       Arg->getParent()->removeParamAttr(Arg->getArgNo(), Attribute::NoCapture);
1070 
1071       if (Arg->hasByValAttr())
1072         ByValTy = Arg->getParamByValType();
1073     }
1074 
1075     auto Index = FrameData.getFieldIndex(Def);
1076     Builder.SetInsertPoint(InsertPt->getParent(), InsertPt);
1077     auto *G = Builder.CreateConstInBoundsGEP2_32(
1078         FrameTy, FramePtr, 0, Index, Def->getName() + Twine(".spill.addr"));
1079     if (ByValTy) {
1080       // For byval arguments, we need to store the pointed value in the frame,
1081       // instead of the pointer itself.
1082       auto *Value = Builder.CreateLoad(ByValTy, Def);
1083       Builder.CreateAlignedStore(Value, G, SpillAlignment);
1084     } else {
1085       Builder.CreateAlignedStore(Def, G, SpillAlignment);
1086     }
1087 
1088     BasicBlock *CurrentBlock = nullptr;
1089     Value *CurrentReload = nullptr;
1090     for (auto *U : E.second) {
1091       // If we have not seen the use block, create a load instruction to reload
1092       // the spilled value from the coroutine frame. Populates the Value pointer
1093       // reference provided with the frame GEP.
1094       if (CurrentBlock != U->getParent()) {
1095         CurrentBlock = U->getParent();
1096         Builder.SetInsertPoint(CurrentBlock,
1097                                CurrentBlock->getFirstInsertionPt());
1098 
1099         auto *GEP = GetFramePointer(E.first);
1100         GEP->setName(E.first->getName() + Twine(".reload.addr"));
1101         if (ByValTy)
1102           CurrentReload = GEP;
1103         else
1104           CurrentReload = Builder.CreateAlignedLoad(
1105               FrameTy->getElementType(FrameData.getFieldIndex(E.first)), GEP,
1106               SpillAlignment, E.first->getName() + Twine(".reload"));
1107 
1108         TinyPtrVector<DbgDeclareInst *> DIs = findDbgDeclares(Def);
1109         TinyPtrVector<DbgVariableRecord *> DVRs = findDVRDeclares(Def);
1110         // Try best to find dbg.declare. If the spill is a temp, there may not
1111         // be a direct dbg.declare. Walk up the load chain to find one from an
1112         // alias.
1113         if (F->getSubprogram()) {
1114           auto *CurDef = Def;
1115           while (DIs.empty() && DVRs.empty() && isa<LoadInst>(CurDef)) {
1116             auto *LdInst = cast<LoadInst>(CurDef);
1117             // Only consider ptr to ptr same type load.
1118             if (LdInst->getPointerOperandType() != LdInst->getType())
1119               break;
1120             CurDef = LdInst->getPointerOperand();
1121             if (!isa<AllocaInst, LoadInst>(CurDef))
1122               break;
1123             DIs = findDbgDeclares(CurDef);
1124             DVRs = findDVRDeclares(CurDef);
1125           }
1126         }
1127 
1128         auto SalvageOne = [&](auto *DDI) {
1129           bool AllowUnresolved = false;
1130           // This dbg.declare is preserved for all coro-split function
1131           // fragments. It will be unreachable in the main function, and
1132           // processed by coro::salvageDebugInfo() by CoroCloner.
1133           if (UseNewDbgInfoFormat) {
1134             DbgVariableRecord *NewDVR = new DbgVariableRecord(
1135                 ValueAsMetadata::get(CurrentReload), DDI->getVariable(),
1136                 DDI->getExpression(), DDI->getDebugLoc(),
1137                 DbgVariableRecord::LocationType::Declare);
1138             Builder.GetInsertPoint()->getParent()->insertDbgRecordBefore(
1139                 NewDVR, Builder.GetInsertPoint());
1140           } else {
1141             DIBuilder(*CurrentBlock->getParent()->getParent(), AllowUnresolved)
1142                 .insertDeclare(CurrentReload, DDI->getVariable(),
1143                                DDI->getExpression(), DDI->getDebugLoc(),
1144                                &*Builder.GetInsertPoint());
1145           }
1146           // This dbg.declare is for the main function entry point.  It
1147           // will be deleted in all coro-split functions.
1148           coro::salvageDebugInfo(ArgToAllocaMap, *DDI, false /*UseEntryValue*/);
1149         };
1150         for_each(DIs, SalvageOne);
1151         for_each(DVRs, SalvageOne);
1152       }
1153 
1154       // If we have a single edge PHINode, remove it and replace it with a
1155       // reload from the coroutine frame. (We already took care of multi edge
1156       // PHINodes by normalizing them in the rewritePHIs function).
1157       if (auto *PN = dyn_cast<PHINode>(U)) {
1158         assert(PN->getNumIncomingValues() == 1 &&
1159                "unexpected number of incoming "
1160                "values in the PHINode");
1161         PN->replaceAllUsesWith(CurrentReload);
1162         PN->eraseFromParent();
1163         continue;
1164       }
1165 
1166       // Replace all uses of CurrentValue in the current instruction with
1167       // reload.
1168       U->replaceUsesOfWith(Def, CurrentReload);
1169       // Instructions are added to Def's user list if the attached
1170       // debug records use Def. Update those now.
1171       for (DbgVariableRecord &DVR : filterDbgVars(U->getDbgRecordRange()))
1172         DVR.replaceVariableLocationOp(Def, CurrentReload, true);
1173     }
1174   }
1175 
1176   BasicBlock *FramePtrBB = Shape.getInsertPtAfterFramePtr()->getParent();
1177 
1178   auto SpillBlock = FramePtrBB->splitBasicBlock(
1179       Shape.getInsertPtAfterFramePtr(), "AllocaSpillBB");
1180   SpillBlock->splitBasicBlock(&SpillBlock->front(), "PostSpill");
1181   Shape.AllocaSpillBlock = SpillBlock;
1182 
1183   // retcon and retcon.once lowering assumes all uses have been sunk.
1184   if (Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce ||
1185       Shape.ABI == coro::ABI::Async) {
1186     // If we found any allocas, replace all of their remaining uses with Geps.
1187     Builder.SetInsertPoint(SpillBlock, SpillBlock->begin());
1188     for (const auto &P : FrameData.Allocas) {
1189       AllocaInst *Alloca = P.Alloca;
1190       auto *G = GetFramePointer(Alloca);
1191 
1192       // We are not using ReplaceInstWithInst(P.first, cast<Instruction>(G))
1193       // here, as we are changing location of the instruction.
1194       G->takeName(Alloca);
1195       Alloca->replaceAllUsesWith(G);
1196       Alloca->eraseFromParent();
1197     }
1198     return;
1199   }
1200 
1201   // If we found any alloca, replace all of their remaining uses with GEP
1202   // instructions. To remain debugbility, we replace the uses of allocas for
1203   // dbg.declares and dbg.values with the reload from the frame.
1204   // Note: We cannot replace the alloca with GEP instructions indiscriminately,
1205   // as some of the uses may not be dominated by CoroBegin.
1206   Builder.SetInsertPoint(Shape.AllocaSpillBlock,
1207                          Shape.AllocaSpillBlock->begin());
1208   SmallVector<Instruction *, 4> UsersToUpdate;
1209   for (const auto &A : FrameData.Allocas) {
1210     AllocaInst *Alloca = A.Alloca;
1211     UsersToUpdate.clear();
1212     for (User *U : Alloca->users()) {
1213       auto *I = cast<Instruction>(U);
1214       if (DT.dominates(Shape.CoroBegin, I))
1215         UsersToUpdate.push_back(I);
1216     }
1217     if (UsersToUpdate.empty())
1218       continue;
1219     auto *G = GetFramePointer(Alloca);
1220     G->setName(Alloca->getName() + Twine(".reload.addr"));
1221 
1222     SmallVector<DbgVariableIntrinsic *, 4> DIs;
1223     SmallVector<DbgVariableRecord *> DbgVariableRecords;
1224     findDbgUsers(DIs, Alloca, &DbgVariableRecords);
1225     for (auto *DVI : DIs)
1226       DVI->replaceUsesOfWith(Alloca, G);
1227     for (auto *DVR : DbgVariableRecords)
1228       DVR->replaceVariableLocationOp(Alloca, G);
1229 
1230     for (Instruction *I : UsersToUpdate) {
1231       // It is meaningless to retain the lifetime intrinsics refer for the
1232       // member of coroutine frames and the meaningless lifetime intrinsics
1233       // are possible to block further optimizations.
1234       if (I->isLifetimeStartOrEnd()) {
1235         I->eraseFromParent();
1236         continue;
1237       }
1238 
1239       I->replaceUsesOfWith(Alloca, G);
1240     }
1241   }
1242   Builder.SetInsertPoint(&*Shape.getInsertPtAfterFramePtr());
1243   for (const auto &A : FrameData.Allocas) {
1244     AllocaInst *Alloca = A.Alloca;
1245     if (A.MayWriteBeforeCoroBegin) {
1246       // isEscaped really means potentially modified before CoroBegin.
1247       if (Alloca->isArrayAllocation())
1248         report_fatal_error(
1249             "Coroutines cannot handle copying of array allocas yet");
1250 
1251       auto *G = GetFramePointer(Alloca);
1252       auto *Value = Builder.CreateLoad(Alloca->getAllocatedType(), Alloca);
1253       Builder.CreateStore(Value, G);
1254     }
1255     // For each alias to Alloca created before CoroBegin but used after
1256     // CoroBegin, we recreate them after CoroBegin by applying the offset
1257     // to the pointer in the frame.
1258     for (const auto &Alias : A.Aliases) {
1259       auto *FramePtr = GetFramePointer(Alloca);
1260       auto &Value = *Alias.second;
1261       auto ITy = IntegerType::get(C, Value.getBitWidth());
1262       auto *AliasPtr =
1263           Builder.CreatePtrAdd(FramePtr, ConstantInt::get(ITy, Value));
1264       Alias.first->replaceUsesWithIf(
1265           AliasPtr, [&](Use &U) { return DT.dominates(Shape.CoroBegin, U); });
1266     }
1267   }
1268 
1269   // PromiseAlloca is not collected in FrameData.Allocas. So we don't handle
1270   // the case that the PromiseAlloca may have writes before CoroBegin in the
1271   // above codes. And it may be problematic in edge cases. See
1272   // https://github.com/llvm/llvm-project/issues/57861 for an example.
1273   if (Shape.ABI == coro::ABI::Switch && Shape.SwitchLowering.PromiseAlloca) {
1274     AllocaInst *PA = Shape.SwitchLowering.PromiseAlloca;
1275     // If there is memory accessing to promise alloca before CoroBegin;
1276     bool HasAccessingPromiseBeforeCB = llvm::any_of(PA->uses(), [&](Use &U) {
1277       auto *Inst = dyn_cast<Instruction>(U.getUser());
1278       if (!Inst || DT.dominates(Shape.CoroBegin, Inst))
1279         return false;
1280 
1281       if (auto *CI = dyn_cast<CallInst>(Inst)) {
1282         // It is fine if the call wouldn't write to the Promise.
1283         // This is possible for @llvm.coro.id intrinsics, which
1284         // would take the promise as the second argument as a
1285         // marker.
1286         if (CI->onlyReadsMemory() ||
1287             CI->onlyReadsMemory(CI->getArgOperandNo(&U)))
1288           return false;
1289         return true;
1290       }
1291 
1292       return isa<StoreInst>(Inst) ||
1293              // It may take too much time to track the uses.
1294              // Be conservative about the case the use may escape.
1295              isa<GetElementPtrInst>(Inst) ||
1296              // There would always be a bitcast for the promise alloca
1297              // before we enabled Opaque pointers. And now given
1298              // opaque pointers are enabled by default. This should be
1299              // fine.
1300              isa<BitCastInst>(Inst);
1301     });
1302     if (HasAccessingPromiseBeforeCB) {
1303       Builder.SetInsertPoint(&*Shape.getInsertPtAfterFramePtr());
1304       auto *G = GetFramePointer(PA);
1305       auto *Value = Builder.CreateLoad(PA->getAllocatedType(), PA);
1306       Builder.CreateStore(Value, G);
1307     }
1308   }
1309 }
1310 
1311 // Moves the values in the PHIs in SuccBB that correspong to PredBB into a new
1312 // PHI in InsertedBB.
1313 static void movePHIValuesToInsertedBlock(BasicBlock *SuccBB,
1314                                          BasicBlock *InsertedBB,
1315                                          BasicBlock *PredBB,
1316                                          PHINode *UntilPHI = nullptr) {
1317   auto *PN = cast<PHINode>(&SuccBB->front());
1318   do {
1319     int Index = PN->getBasicBlockIndex(InsertedBB);
1320     Value *V = PN->getIncomingValue(Index);
1321     PHINode *InputV = PHINode::Create(
1322         V->getType(), 1, V->getName() + Twine(".") + SuccBB->getName());
1323     InputV->insertBefore(InsertedBB->begin());
1324     InputV->addIncoming(V, PredBB);
1325     PN->setIncomingValue(Index, InputV);
1326     PN = dyn_cast<PHINode>(PN->getNextNode());
1327   } while (PN != UntilPHI);
1328 }
1329 
1330 // Rewrites the PHI Nodes in a cleanuppad.
1331 static void rewritePHIsForCleanupPad(BasicBlock *CleanupPadBB,
1332                                      CleanupPadInst *CleanupPad) {
1333   // For every incoming edge to a CleanupPad we will create a new block holding
1334   // all incoming values in single-value PHI nodes. We will then create another
1335   // block to act as a dispather (as all unwind edges for related EH blocks
1336   // must be the same).
1337   //
1338   // cleanuppad:
1339   //    %2 = phi i32[%0, %catchswitch], [%1, %catch.1]
1340   //    %3 = cleanuppad within none []
1341   //
1342   // It will create:
1343   //
1344   // cleanuppad.corodispatch
1345   //    %2 = phi i8[0, %catchswitch], [1, %catch.1]
1346   //    %3 = cleanuppad within none []
1347   //    switch i8 % 2, label %unreachable
1348   //            [i8 0, label %cleanuppad.from.catchswitch
1349   //             i8 1, label %cleanuppad.from.catch.1]
1350   // cleanuppad.from.catchswitch:
1351   //    %4 = phi i32 [%0, %catchswitch]
1352   //    br %label cleanuppad
1353   // cleanuppad.from.catch.1:
1354   //    %6 = phi i32 [%1, %catch.1]
1355   //    br %label cleanuppad
1356   // cleanuppad:
1357   //    %8 = phi i32 [%4, %cleanuppad.from.catchswitch],
1358   //                 [%6, %cleanuppad.from.catch.1]
1359 
1360   // Unreachable BB, in case switching on an invalid value in the dispatcher.
1361   auto *UnreachBB = BasicBlock::Create(
1362       CleanupPadBB->getContext(), "unreachable", CleanupPadBB->getParent());
1363   IRBuilder<> Builder(UnreachBB);
1364   Builder.CreateUnreachable();
1365 
1366   // Create a new cleanuppad which will be the dispatcher.
1367   auto *NewCleanupPadBB =
1368       BasicBlock::Create(CleanupPadBB->getContext(),
1369                          CleanupPadBB->getName() + Twine(".corodispatch"),
1370                          CleanupPadBB->getParent(), CleanupPadBB);
1371   Builder.SetInsertPoint(NewCleanupPadBB);
1372   auto *SwitchType = Builder.getInt8Ty();
1373   auto *SetDispatchValuePN =
1374       Builder.CreatePHI(SwitchType, pred_size(CleanupPadBB));
1375   CleanupPad->removeFromParent();
1376   CleanupPad->insertAfter(SetDispatchValuePN);
1377   auto *SwitchOnDispatch = Builder.CreateSwitch(SetDispatchValuePN, UnreachBB,
1378                                                 pred_size(CleanupPadBB));
1379 
1380   int SwitchIndex = 0;
1381   SmallVector<BasicBlock *, 8> Preds(predecessors(CleanupPadBB));
1382   for (BasicBlock *Pred : Preds) {
1383     // Create a new cleanuppad and move the PHI values to there.
1384     auto *CaseBB = BasicBlock::Create(CleanupPadBB->getContext(),
1385                                       CleanupPadBB->getName() +
1386                                           Twine(".from.") + Pred->getName(),
1387                                       CleanupPadBB->getParent(), CleanupPadBB);
1388     updatePhiNodes(CleanupPadBB, Pred, CaseBB);
1389     CaseBB->setName(CleanupPadBB->getName() + Twine(".from.") +
1390                     Pred->getName());
1391     Builder.SetInsertPoint(CaseBB);
1392     Builder.CreateBr(CleanupPadBB);
1393     movePHIValuesToInsertedBlock(CleanupPadBB, CaseBB, NewCleanupPadBB);
1394 
1395     // Update this Pred to the new unwind point.
1396     setUnwindEdgeTo(Pred->getTerminator(), NewCleanupPadBB);
1397 
1398     // Setup the switch in the dispatcher.
1399     auto *SwitchConstant = ConstantInt::get(SwitchType, SwitchIndex);
1400     SetDispatchValuePN->addIncoming(SwitchConstant, Pred);
1401     SwitchOnDispatch->addCase(SwitchConstant, CaseBB);
1402     SwitchIndex++;
1403   }
1404 }
1405 
1406 static void cleanupSinglePredPHIs(Function &F) {
1407   SmallVector<PHINode *, 32> Worklist;
1408   for (auto &BB : F) {
1409     for (auto &Phi : BB.phis()) {
1410       if (Phi.getNumIncomingValues() == 1) {
1411         Worklist.push_back(&Phi);
1412       } else
1413         break;
1414     }
1415   }
1416   while (!Worklist.empty()) {
1417     auto *Phi = Worklist.pop_back_val();
1418     auto *OriginalValue = Phi->getIncomingValue(0);
1419     Phi->replaceAllUsesWith(OriginalValue);
1420   }
1421 }
1422 
1423 static void rewritePHIs(BasicBlock &BB) {
1424   // For every incoming edge we will create a block holding all
1425   // incoming values in a single PHI nodes.
1426   //
1427   // loop:
1428   //    %n.val = phi i32[%n, %entry], [%inc, %loop]
1429   //
1430   // It will create:
1431   //
1432   // loop.from.entry:
1433   //    %n.loop.pre = phi i32 [%n, %entry]
1434   //    br %label loop
1435   // loop.from.loop:
1436   //    %inc.loop.pre = phi i32 [%inc, %loop]
1437   //    br %label loop
1438   //
1439   // After this rewrite, further analysis will ignore any phi nodes with more
1440   // than one incoming edge.
1441 
1442   // TODO: Simplify PHINodes in the basic block to remove duplicate
1443   // predecessors.
1444 
1445   // Special case for CleanupPad: all EH blocks must have the same unwind edge
1446   // so we need to create an additional "dispatcher" block.
1447   if (auto *CleanupPad =
1448           dyn_cast_or_null<CleanupPadInst>(BB.getFirstNonPHI())) {
1449     SmallVector<BasicBlock *, 8> Preds(predecessors(&BB));
1450     for (BasicBlock *Pred : Preds) {
1451       if (CatchSwitchInst *CS =
1452               dyn_cast<CatchSwitchInst>(Pred->getTerminator())) {
1453         // CleanupPad with a CatchSwitch predecessor: therefore this is an
1454         // unwind destination that needs to be handle specially.
1455         assert(CS->getUnwindDest() == &BB);
1456         (void)CS;
1457         rewritePHIsForCleanupPad(&BB, CleanupPad);
1458         return;
1459       }
1460     }
1461   }
1462 
1463   LandingPadInst *LandingPad = nullptr;
1464   PHINode *ReplPHI = nullptr;
1465   if ((LandingPad = dyn_cast_or_null<LandingPadInst>(BB.getFirstNonPHI()))) {
1466     // ehAwareSplitEdge will clone the LandingPad in all the edge blocks.
1467     // We replace the original landing pad with a PHINode that will collect the
1468     // results from all of them.
1469     ReplPHI = PHINode::Create(LandingPad->getType(), 1, "");
1470     ReplPHI->insertBefore(LandingPad->getIterator());
1471     ReplPHI->takeName(LandingPad);
1472     LandingPad->replaceAllUsesWith(ReplPHI);
1473     // We will erase the original landing pad at the end of this function after
1474     // ehAwareSplitEdge cloned it in the transition blocks.
1475   }
1476 
1477   SmallVector<BasicBlock *, 8> Preds(predecessors(&BB));
1478   for (BasicBlock *Pred : Preds) {
1479     auto *IncomingBB = ehAwareSplitEdge(Pred, &BB, LandingPad, ReplPHI);
1480     IncomingBB->setName(BB.getName() + Twine(".from.") + Pred->getName());
1481 
1482     // Stop the moving of values at ReplPHI, as this is either null or the PHI
1483     // that replaced the landing pad.
1484     movePHIValuesToInsertedBlock(&BB, IncomingBB, Pred, ReplPHI);
1485   }
1486 
1487   if (LandingPad) {
1488     // Calls to ehAwareSplitEdge function cloned the original lading pad.
1489     // No longer need it.
1490     LandingPad->eraseFromParent();
1491   }
1492 }
1493 
1494 static void rewritePHIs(Function &F) {
1495   SmallVector<BasicBlock *, 8> WorkList;
1496 
1497   for (BasicBlock &BB : F)
1498     if (auto *PN = dyn_cast<PHINode>(&BB.front()))
1499       if (PN->getNumIncomingValues() > 1)
1500         WorkList.push_back(&BB);
1501 
1502   for (BasicBlock *BB : WorkList)
1503     rewritePHIs(*BB);
1504 }
1505 
1506 // Splits the block at a particular instruction unless it is the first
1507 // instruction in the block with a single predecessor.
1508 static BasicBlock *splitBlockIfNotFirst(Instruction *I, const Twine &Name) {
1509   auto *BB = I->getParent();
1510   if (&BB->front() == I) {
1511     if (BB->getSinglePredecessor()) {
1512       BB->setName(Name);
1513       return BB;
1514     }
1515   }
1516   return BB->splitBasicBlock(I, Name);
1517 }
1518 
1519 // Split above and below a particular instruction so that it
1520 // will be all alone by itself in a block.
1521 static void splitAround(Instruction *I, const Twine &Name) {
1522   splitBlockIfNotFirst(I, Name);
1523   splitBlockIfNotFirst(I->getNextNode(), "After" + Name);
1524 }
1525 
1526 /// After we split the coroutine, will the given basic block be along
1527 /// an obvious exit path for the resumption function?
1528 static bool willLeaveFunctionImmediatelyAfter(BasicBlock *BB,
1529                                               unsigned depth = 3) {
1530   // If we've bottomed out our depth count, stop searching and assume
1531   // that the path might loop back.
1532   if (depth == 0) return false;
1533 
1534   // If this is a suspend block, we're about to exit the resumption function.
1535   if (coro::isSuspendBlock(BB))
1536     return true;
1537 
1538   // Recurse into the successors.
1539   for (auto *Succ : successors(BB)) {
1540     if (!willLeaveFunctionImmediatelyAfter(Succ, depth - 1))
1541       return false;
1542   }
1543 
1544   // If none of the successors leads back in a loop, we're on an exit/abort.
1545   return true;
1546 }
1547 
1548 static bool localAllocaNeedsStackSave(CoroAllocaAllocInst *AI) {
1549   // Look for a free that isn't sufficiently obviously followed by
1550   // either a suspend or a termination, i.e. something that will leave
1551   // the coro resumption frame.
1552   for (auto *U : AI->users()) {
1553     auto FI = dyn_cast<CoroAllocaFreeInst>(U);
1554     if (!FI) continue;
1555 
1556     if (!willLeaveFunctionImmediatelyAfter(FI->getParent()))
1557       return true;
1558   }
1559 
1560   // If we never found one, we don't need a stack save.
1561   return false;
1562 }
1563 
1564 /// Turn each of the given local allocas into a normal (dynamic) alloca
1565 /// instruction.
1566 static void lowerLocalAllocas(ArrayRef<CoroAllocaAllocInst*> LocalAllocas,
1567                               SmallVectorImpl<Instruction*> &DeadInsts) {
1568   for (auto *AI : LocalAllocas) {
1569     IRBuilder<> Builder(AI);
1570 
1571     // Save the stack depth.  Try to avoid doing this if the stackrestore
1572     // is going to immediately precede a return or something.
1573     Value *StackSave = nullptr;
1574     if (localAllocaNeedsStackSave(AI))
1575       StackSave = Builder.CreateStackSave();
1576 
1577     // Allocate memory.
1578     auto Alloca = Builder.CreateAlloca(Builder.getInt8Ty(), AI->getSize());
1579     Alloca->setAlignment(AI->getAlignment());
1580 
1581     for (auto *U : AI->users()) {
1582       // Replace gets with the allocation.
1583       if (isa<CoroAllocaGetInst>(U)) {
1584         U->replaceAllUsesWith(Alloca);
1585 
1586       // Replace frees with stackrestores.  This is safe because
1587       // alloca.alloc is required to obey a stack discipline, although we
1588       // don't enforce that structurally.
1589       } else {
1590         auto FI = cast<CoroAllocaFreeInst>(U);
1591         if (StackSave) {
1592           Builder.SetInsertPoint(FI);
1593           Builder.CreateStackRestore(StackSave);
1594         }
1595       }
1596       DeadInsts.push_back(cast<Instruction>(U));
1597     }
1598 
1599     DeadInsts.push_back(AI);
1600   }
1601 }
1602 
1603 /// Get the current swifterror value.
1604 static Value *emitGetSwiftErrorValue(IRBuilder<> &Builder, Type *ValueTy,
1605                                      coro::Shape &Shape) {
1606   // Make a fake function pointer as a sort of intrinsic.
1607   auto FnTy = FunctionType::get(ValueTy, {}, false);
1608   auto Fn = ConstantPointerNull::get(Builder.getPtrTy());
1609 
1610   auto Call = Builder.CreateCall(FnTy, Fn, {});
1611   Shape.SwiftErrorOps.push_back(Call);
1612 
1613   return Call;
1614 }
1615 
1616 /// Set the given value as the current swifterror value.
1617 ///
1618 /// Returns a slot that can be used as a swifterror slot.
1619 static Value *emitSetSwiftErrorValue(IRBuilder<> &Builder, Value *V,
1620                                      coro::Shape &Shape) {
1621   // Make a fake function pointer as a sort of intrinsic.
1622   auto FnTy = FunctionType::get(Builder.getPtrTy(),
1623                                 {V->getType()}, false);
1624   auto Fn = ConstantPointerNull::get(Builder.getPtrTy());
1625 
1626   auto Call = Builder.CreateCall(FnTy, Fn, { V });
1627   Shape.SwiftErrorOps.push_back(Call);
1628 
1629   return Call;
1630 }
1631 
1632 /// Set the swifterror value from the given alloca before a call,
1633 /// then put in back in the alloca afterwards.
1634 ///
1635 /// Returns an address that will stand in for the swifterror slot
1636 /// until splitting.
1637 static Value *emitSetAndGetSwiftErrorValueAround(Instruction *Call,
1638                                                  AllocaInst *Alloca,
1639                                                  coro::Shape &Shape) {
1640   auto ValueTy = Alloca->getAllocatedType();
1641   IRBuilder<> Builder(Call);
1642 
1643   // Load the current value from the alloca and set it as the
1644   // swifterror value.
1645   auto ValueBeforeCall = Builder.CreateLoad(ValueTy, Alloca);
1646   auto Addr = emitSetSwiftErrorValue(Builder, ValueBeforeCall, Shape);
1647 
1648   // Move to after the call.  Since swifterror only has a guaranteed
1649   // value on normal exits, we can ignore implicit and explicit unwind
1650   // edges.
1651   if (isa<CallInst>(Call)) {
1652     Builder.SetInsertPoint(Call->getNextNode());
1653   } else {
1654     auto Invoke = cast<InvokeInst>(Call);
1655     Builder.SetInsertPoint(Invoke->getNormalDest()->getFirstNonPHIOrDbg());
1656   }
1657 
1658   // Get the current swifterror value and store it to the alloca.
1659   auto ValueAfterCall = emitGetSwiftErrorValue(Builder, ValueTy, Shape);
1660   Builder.CreateStore(ValueAfterCall, Alloca);
1661 
1662   return Addr;
1663 }
1664 
1665 /// Eliminate a formerly-swifterror alloca by inserting the get/set
1666 /// intrinsics and attempting to MemToReg the alloca away.
1667 static void eliminateSwiftErrorAlloca(Function &F, AllocaInst *Alloca,
1668                                       coro::Shape &Shape) {
1669   for (Use &Use : llvm::make_early_inc_range(Alloca->uses())) {
1670     // swifterror values can only be used in very specific ways.
1671     // We take advantage of that here.
1672     auto User = Use.getUser();
1673     if (isa<LoadInst>(User) || isa<StoreInst>(User))
1674       continue;
1675 
1676     assert(isa<CallInst>(User) || isa<InvokeInst>(User));
1677     auto Call = cast<Instruction>(User);
1678 
1679     auto Addr = emitSetAndGetSwiftErrorValueAround(Call, Alloca, Shape);
1680 
1681     // Use the returned slot address as the call argument.
1682     Use.set(Addr);
1683   }
1684 
1685   // All the uses should be loads and stores now.
1686   assert(isAllocaPromotable(Alloca));
1687 }
1688 
1689 /// "Eliminate" a swifterror argument by reducing it to the alloca case
1690 /// and then loading and storing in the prologue and epilog.
1691 ///
1692 /// The argument keeps the swifterror flag.
1693 static void eliminateSwiftErrorArgument(Function &F, Argument &Arg,
1694                                         coro::Shape &Shape,
1695                              SmallVectorImpl<AllocaInst*> &AllocasToPromote) {
1696   IRBuilder<> Builder(F.getEntryBlock().getFirstNonPHIOrDbg());
1697 
1698   auto ArgTy = cast<PointerType>(Arg.getType());
1699   auto ValueTy = PointerType::getUnqual(F.getContext());
1700 
1701   // Reduce to the alloca case:
1702 
1703   // Create an alloca and replace all uses of the arg with it.
1704   auto Alloca = Builder.CreateAlloca(ValueTy, ArgTy->getAddressSpace());
1705   Arg.replaceAllUsesWith(Alloca);
1706 
1707   // Set an initial value in the alloca.  swifterror is always null on entry.
1708   auto InitialValue = Constant::getNullValue(ValueTy);
1709   Builder.CreateStore(InitialValue, Alloca);
1710 
1711   // Find all the suspends in the function and save and restore around them.
1712   for (auto *Suspend : Shape.CoroSuspends) {
1713     (void) emitSetAndGetSwiftErrorValueAround(Suspend, Alloca, Shape);
1714   }
1715 
1716   // Find all the coro.ends in the function and restore the error value.
1717   for (auto *End : Shape.CoroEnds) {
1718     Builder.SetInsertPoint(End);
1719     auto FinalValue = Builder.CreateLoad(ValueTy, Alloca);
1720     (void) emitSetSwiftErrorValue(Builder, FinalValue, Shape);
1721   }
1722 
1723   // Now we can use the alloca logic.
1724   AllocasToPromote.push_back(Alloca);
1725   eliminateSwiftErrorAlloca(F, Alloca, Shape);
1726 }
1727 
1728 /// Eliminate all problematic uses of swifterror arguments and allocas
1729 /// from the function.  We'll fix them up later when splitting the function.
1730 static void eliminateSwiftError(Function &F, coro::Shape &Shape) {
1731   SmallVector<AllocaInst*, 4> AllocasToPromote;
1732 
1733   // Look for a swifterror argument.
1734   for (auto &Arg : F.args()) {
1735     if (!Arg.hasSwiftErrorAttr()) continue;
1736 
1737     eliminateSwiftErrorArgument(F, Arg, Shape, AllocasToPromote);
1738     break;
1739   }
1740 
1741   // Look for swifterror allocas.
1742   for (auto &Inst : F.getEntryBlock()) {
1743     auto Alloca = dyn_cast<AllocaInst>(&Inst);
1744     if (!Alloca || !Alloca->isSwiftError()) continue;
1745 
1746     // Clear the swifterror flag.
1747     Alloca->setSwiftError(false);
1748 
1749     AllocasToPromote.push_back(Alloca);
1750     eliminateSwiftErrorAlloca(F, Alloca, Shape);
1751   }
1752 
1753   // If we have any allocas to promote, compute a dominator tree and
1754   // promote them en masse.
1755   if (!AllocasToPromote.empty()) {
1756     DominatorTree DT(F);
1757     PromoteMemToReg(AllocasToPromote, DT);
1758   }
1759 }
1760 
1761 /// For each local variable that all of its user are only used inside one of
1762 /// suspended region, we sink their lifetime.start markers to the place where
1763 /// after the suspend block. Doing so minimizes the lifetime of each variable,
1764 /// hence minimizing the amount of data we end up putting on the frame.
1765 static void sinkLifetimeStartMarkers(Function &F, coro::Shape &Shape,
1766                                      SuspendCrossingInfo &Checker,
1767                                      const DominatorTree &DT) {
1768   if (F.hasOptNone())
1769     return;
1770 
1771   // Collect all possible basic blocks which may dominate all uses of allocas.
1772   SmallPtrSet<BasicBlock *, 4> DomSet;
1773   DomSet.insert(&F.getEntryBlock());
1774   for (auto *CSI : Shape.CoroSuspends) {
1775     BasicBlock *SuspendBlock = CSI->getParent();
1776     assert(coro::isSuspendBlock(SuspendBlock) &&
1777            SuspendBlock->getSingleSuccessor() &&
1778            "should have split coro.suspend into its own block");
1779     DomSet.insert(SuspendBlock->getSingleSuccessor());
1780   }
1781 
1782   for (Instruction &I : instructions(F)) {
1783     AllocaInst* AI = dyn_cast<AllocaInst>(&I);
1784     if (!AI)
1785       continue;
1786 
1787     for (BasicBlock *DomBB : DomSet) {
1788       bool Valid = true;
1789       SmallVector<Instruction *, 1> Lifetimes;
1790 
1791       auto isLifetimeStart = [](Instruction* I) {
1792         if (auto* II = dyn_cast<IntrinsicInst>(I))
1793           return II->getIntrinsicID() == Intrinsic::lifetime_start;
1794         return false;
1795       };
1796 
1797       auto collectLifetimeStart = [&](Instruction *U, AllocaInst *AI) {
1798         if (isLifetimeStart(U)) {
1799           Lifetimes.push_back(U);
1800           return true;
1801         }
1802         if (!U->hasOneUse() || U->stripPointerCasts() != AI)
1803           return false;
1804         if (isLifetimeStart(U->user_back())) {
1805           Lifetimes.push_back(U->user_back());
1806           return true;
1807         }
1808         return false;
1809       };
1810 
1811       for (User *U : AI->users()) {
1812         Instruction *UI = cast<Instruction>(U);
1813         // For all users except lifetime.start markers, if they are all
1814         // dominated by one of the basic blocks and do not cross
1815         // suspend points as well, then there is no need to spill the
1816         // instruction.
1817         if (!DT.dominates(DomBB, UI->getParent()) ||
1818             Checker.isDefinitionAcrossSuspend(DomBB, UI)) {
1819           // Skip lifetime.start, GEP and bitcast used by lifetime.start
1820           // markers.
1821           if (collectLifetimeStart(UI, AI))
1822             continue;
1823           Valid = false;
1824           break;
1825         }
1826       }
1827       // Sink lifetime.start markers to dominate block when they are
1828       // only used outside the region.
1829       if (Valid && Lifetimes.size() != 0) {
1830         auto *NewLifetime = Lifetimes[0]->clone();
1831         NewLifetime->replaceUsesOfWith(NewLifetime->getOperand(1), AI);
1832         NewLifetime->insertBefore(DomBB->getTerminator());
1833 
1834         // All the outsided lifetime.start markers are no longer necessary.
1835         for (Instruction *S : Lifetimes)
1836           S->eraseFromParent();
1837 
1838         break;
1839       }
1840     }
1841   }
1842 }
1843 
1844 static std::optional<std::pair<Value &, DIExpression &>>
1845 salvageDebugInfoImpl(SmallDenseMap<Argument *, AllocaInst *, 4> &ArgToAllocaMap,
1846                      bool UseEntryValue, Function *F, Value *Storage,
1847                      DIExpression *Expr, bool SkipOutermostLoad) {
1848   IRBuilder<> Builder(F->getContext());
1849   auto InsertPt = F->getEntryBlock().getFirstInsertionPt();
1850   while (isa<IntrinsicInst>(InsertPt))
1851     ++InsertPt;
1852   Builder.SetInsertPoint(&F->getEntryBlock(), InsertPt);
1853 
1854   while (auto *Inst = dyn_cast_or_null<Instruction>(Storage)) {
1855     if (auto *LdInst = dyn_cast<LoadInst>(Inst)) {
1856       Storage = LdInst->getPointerOperand();
1857       // FIXME: This is a heuristic that works around the fact that
1858       // LLVM IR debug intrinsics cannot yet distinguish between
1859       // memory and value locations: Because a dbg.declare(alloca) is
1860       // implicitly a memory location no DW_OP_deref operation for the
1861       // last direct load from an alloca is necessary.  This condition
1862       // effectively drops the *last* DW_OP_deref in the expression.
1863       if (!SkipOutermostLoad)
1864         Expr = DIExpression::prepend(Expr, DIExpression::DerefBefore);
1865     } else if (auto *StInst = dyn_cast<StoreInst>(Inst)) {
1866       Storage = StInst->getValueOperand();
1867     } else {
1868       SmallVector<uint64_t, 16> Ops;
1869       SmallVector<Value *, 0> AdditionalValues;
1870       Value *Op = llvm::salvageDebugInfoImpl(
1871           *Inst, Expr ? Expr->getNumLocationOperands() : 0, Ops,
1872           AdditionalValues);
1873       if (!Op || !AdditionalValues.empty()) {
1874         // If salvaging failed or salvaging produced more than one location
1875         // operand, give up.
1876         break;
1877       }
1878       Storage = Op;
1879       Expr = DIExpression::appendOpsToArg(Expr, Ops, 0, /*StackValue*/ false);
1880     }
1881     SkipOutermostLoad = false;
1882   }
1883   if (!Storage)
1884     return std::nullopt;
1885 
1886   auto *StorageAsArg = dyn_cast<Argument>(Storage);
1887   const bool IsSwiftAsyncArg =
1888       StorageAsArg && StorageAsArg->hasAttribute(Attribute::SwiftAsync);
1889 
1890   // Swift async arguments are described by an entry value of the ABI-defined
1891   // register containing the coroutine context.
1892   // Entry values in variadic expressions are not supported.
1893   if (IsSwiftAsyncArg && UseEntryValue && !Expr->isEntryValue() &&
1894       Expr->isSingleLocationExpression())
1895     Expr = DIExpression::prepend(Expr, DIExpression::EntryValue);
1896 
1897   // If the coroutine frame is an Argument, store it in an alloca to improve
1898   // its availability (e.g. registers may be clobbered).
1899   // Avoid this if the value is guaranteed to be available through other means
1900   // (e.g. swift ABI guarantees).
1901   if (StorageAsArg && !IsSwiftAsyncArg) {
1902     auto &Cached = ArgToAllocaMap[StorageAsArg];
1903     if (!Cached) {
1904       Cached = Builder.CreateAlloca(Storage->getType(), 0, nullptr,
1905                                     Storage->getName() + ".debug");
1906       Builder.CreateStore(Storage, Cached);
1907     }
1908     Storage = Cached;
1909     // FIXME: LLVM lacks nuanced semantics to differentiate between
1910     // memory and direct locations at the IR level. The backend will
1911     // turn a dbg.declare(alloca, ..., DIExpression()) into a memory
1912     // location. Thus, if there are deref and offset operations in the
1913     // expression, we need to add a DW_OP_deref at the *start* of the
1914     // expression to first load the contents of the alloca before
1915     // adjusting it with the expression.
1916     Expr = DIExpression::prepend(Expr, DIExpression::DerefBefore);
1917   }
1918 
1919   Expr = Expr->foldConstantMath();
1920   return {{*Storage, *Expr}};
1921 }
1922 
1923 void coro::salvageDebugInfo(
1924     SmallDenseMap<Argument *, AllocaInst *, 4> &ArgToAllocaMap,
1925     DbgVariableIntrinsic &DVI, bool UseEntryValue) {
1926 
1927   Function *F = DVI.getFunction();
1928   // Follow the pointer arithmetic all the way to the incoming
1929   // function argument and convert into a DIExpression.
1930   bool SkipOutermostLoad = !isa<DbgValueInst>(DVI);
1931   Value *OriginalStorage = DVI.getVariableLocationOp(0);
1932 
1933   auto SalvagedInfo =
1934       ::salvageDebugInfoImpl(ArgToAllocaMap, UseEntryValue, F, OriginalStorage,
1935                              DVI.getExpression(), SkipOutermostLoad);
1936   if (!SalvagedInfo)
1937     return;
1938 
1939   Value *Storage = &SalvagedInfo->first;
1940   DIExpression *Expr = &SalvagedInfo->second;
1941 
1942   DVI.replaceVariableLocationOp(OriginalStorage, Storage);
1943   DVI.setExpression(Expr);
1944   // We only hoist dbg.declare today since it doesn't make sense to hoist
1945   // dbg.value since it does not have the same function wide guarantees that
1946   // dbg.declare does.
1947   if (isa<DbgDeclareInst>(DVI)) {
1948     std::optional<BasicBlock::iterator> InsertPt;
1949     if (auto *I = dyn_cast<Instruction>(Storage)) {
1950       InsertPt = I->getInsertionPointAfterDef();
1951       // Update DILocation only if variable was not inlined.
1952       DebugLoc ILoc = I->getDebugLoc();
1953       DebugLoc DVILoc = DVI.getDebugLoc();
1954       if (ILoc && DVILoc &&
1955           DVILoc->getScope()->getSubprogram() ==
1956               ILoc->getScope()->getSubprogram())
1957         DVI.setDebugLoc(I->getDebugLoc());
1958     } else if (isa<Argument>(Storage))
1959       InsertPt = F->getEntryBlock().begin();
1960     if (InsertPt)
1961       DVI.moveBefore(*(*InsertPt)->getParent(), *InsertPt);
1962   }
1963 }
1964 
1965 void coro::salvageDebugInfo(
1966     SmallDenseMap<Argument *, AllocaInst *, 4> &ArgToAllocaMap,
1967     DbgVariableRecord &DVR, bool UseEntryValue) {
1968 
1969   Function *F = DVR.getFunction();
1970   // Follow the pointer arithmetic all the way to the incoming
1971   // function argument and convert into a DIExpression.
1972   bool SkipOutermostLoad = DVR.isDbgDeclare();
1973   Value *OriginalStorage = DVR.getVariableLocationOp(0);
1974 
1975   auto SalvagedInfo =
1976       ::salvageDebugInfoImpl(ArgToAllocaMap, UseEntryValue, F, OriginalStorage,
1977                              DVR.getExpression(), SkipOutermostLoad);
1978   if (!SalvagedInfo)
1979     return;
1980 
1981   Value *Storage = &SalvagedInfo->first;
1982   DIExpression *Expr = &SalvagedInfo->second;
1983 
1984   DVR.replaceVariableLocationOp(OriginalStorage, Storage);
1985   DVR.setExpression(Expr);
1986   // We only hoist dbg.declare today since it doesn't make sense to hoist
1987   // dbg.value since it does not have the same function wide guarantees that
1988   // dbg.declare does.
1989   if (DVR.getType() == DbgVariableRecord::LocationType::Declare) {
1990     std::optional<BasicBlock::iterator> InsertPt;
1991     if (auto *I = dyn_cast<Instruction>(Storage)) {
1992       InsertPt = I->getInsertionPointAfterDef();
1993       // Update DILocation only if variable was not inlined.
1994       DebugLoc ILoc = I->getDebugLoc();
1995       DebugLoc DVRLoc = DVR.getDebugLoc();
1996       if (ILoc && DVRLoc &&
1997           DVRLoc->getScope()->getSubprogram() ==
1998               ILoc->getScope()->getSubprogram())
1999         DVR.setDebugLoc(ILoc);
2000     } else if (isa<Argument>(Storage))
2001       InsertPt = F->getEntryBlock().begin();
2002     if (InsertPt) {
2003       DVR.removeFromParent();
2004       (*InsertPt)->getParent()->insertDbgRecordBefore(&DVR, *InsertPt);
2005     }
2006   }
2007 }
2008 
2009 void coro::normalizeCoroutine(Function &F, coro::Shape &Shape,
2010                               TargetTransformInfo &TTI) {
2011   // Don't eliminate swifterror in async functions that won't be split.
2012   if (Shape.ABI != coro::ABI::Async || !Shape.CoroSuspends.empty())
2013     eliminateSwiftError(F, Shape);
2014 
2015   if (Shape.ABI == coro::ABI::Switch &&
2016       Shape.SwitchLowering.PromiseAlloca) {
2017     Shape.getSwitchCoroId()->clearPromise();
2018   }
2019 
2020   // Make sure that all coro.save, coro.suspend and the fallthrough coro.end
2021   // intrinsics are in their own blocks to simplify the logic of building up
2022   // SuspendCrossing data.
2023   for (auto *CSI : Shape.CoroSuspends) {
2024     if (auto *Save = CSI->getCoroSave())
2025       splitAround(Save, "CoroSave");
2026     splitAround(CSI, "CoroSuspend");
2027   }
2028 
2029   // Put CoroEnds into their own blocks.
2030   for (AnyCoroEndInst *CE : Shape.CoroEnds) {
2031     splitAround(CE, "CoroEnd");
2032 
2033     // Emit the musttail call function in a new block before the CoroEnd.
2034     // We do this here so that the right suspend crossing info is computed for
2035     // the uses of the musttail call function call. (Arguments to the coro.end
2036     // instructions would be ignored)
2037     if (auto *AsyncEnd = dyn_cast<CoroAsyncEndInst>(CE)) {
2038       auto *MustTailCallFn = AsyncEnd->getMustTailCallFunction();
2039       if (!MustTailCallFn)
2040         continue;
2041       IRBuilder<> Builder(AsyncEnd);
2042       SmallVector<Value *, 8> Args(AsyncEnd->args());
2043       auto Arguments = ArrayRef<Value *>(Args).drop_front(3);
2044       auto *Call = coro::createMustTailCall(
2045           AsyncEnd->getDebugLoc(), MustTailCallFn, TTI, Arguments, Builder);
2046       splitAround(Call, "MustTailCall.Before.CoroEnd");
2047     }
2048   }
2049 
2050   // Later code makes structural assumptions about single predecessors phis e.g
2051   // that they are not live across a suspend point.
2052   cleanupSinglePredPHIs(F);
2053 
2054   // Transforms multi-edge PHI Nodes, so that any value feeding into a PHI will
2055   // never have its definition separated from the PHI by the suspend point.
2056   rewritePHIs(F);
2057 }
2058 
2059 void coro::BaseABI::buildCoroutineFrame() {
2060   SuspendCrossingInfo Checker(F, Shape.CoroSuspends, Shape.CoroEnds);
2061   doRematerializations(F, Checker, IsMaterializable);
2062 
2063   const DominatorTree DT(F);
2064   if (Shape.ABI != coro::ABI::Async && Shape.ABI != coro::ABI::Retcon &&
2065       Shape.ABI != coro::ABI::RetconOnce)
2066     sinkLifetimeStartMarkers(F, Shape, Checker, DT);
2067 
2068   // All values (that are not allocas) that needs to be spilled to the frame.
2069   coro::SpillInfo Spills;
2070   // All values defined as allocas that need to live in the frame.
2071   SmallVector<coro::AllocaInfo, 8> Allocas;
2072 
2073   // Collect the spills for arguments and other not-materializable values.
2074   coro::collectSpillsFromArgs(Spills, F, Checker);
2075   SmallVector<Instruction *, 4> DeadInstructions;
2076   SmallVector<CoroAllocaAllocInst *, 4> LocalAllocas;
2077   coro::collectSpillsAndAllocasFromInsts(Spills, Allocas, DeadInstructions,
2078                                          LocalAllocas, F, Checker, DT, Shape);
2079   coro::collectSpillsFromDbgInfo(Spills, F, Checker);
2080 
2081   LLVM_DEBUG(dumpAllocas(Allocas));
2082   LLVM_DEBUG(dumpSpills("Spills", Spills));
2083 
2084   if (Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce ||
2085       Shape.ABI == coro::ABI::Async)
2086     sinkSpillUsesAfterCoroBegin(DT, Shape.CoroBegin, Spills, Allocas);
2087 
2088   // Build frame
2089   FrameDataInfo FrameData(Spills, Allocas);
2090   Shape.FrameTy = buildFrameType(F, Shape, FrameData);
2091   Shape.FramePtr = Shape.CoroBegin;
2092   // For now, this works for C++ programs only.
2093   buildFrameDebugInfo(F, Shape, FrameData);
2094   // Insert spills and reloads
2095   insertSpills(FrameData, Shape);
2096   lowerLocalAllocas(LocalAllocas, DeadInstructions);
2097 
2098   for (auto *I : DeadInstructions)
2099     I->eraseFromParent();
2100 }
2101