xref: /llvm-project/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp (revision 0f46e31cfbf415fcd3d3ce121bef94e92c6ccfc8)
1 //===-- AMDGPULowerBufferFatPointers.cpp ---------------------------=//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass lowers operations on buffer fat pointers (addrspace 7) to
10 // operations on buffer resources (addrspace 8) and is needed for correct
11 // codegen.
12 //
13 // # Background
14 //
15 // Address space 7 (the buffer fat pointer) is a 160-bit pointer that consists
16 // of a 128-bit buffer descriptor and a 32-bit offset into that descriptor.
17 // The buffer resource part needs to be it needs to be a "raw" buffer resource
18 // (it must have a stride of 0 and bounds checks must be in raw buffer mode
19 // or disabled).
20 //
21 // When these requirements are met, a buffer resource can be treated as a
22 // typical (though quite wide) pointer that follows typical LLVM pointer
23 // semantics. This allows the frontend to reason about such buffers (which are
24 // often encountered in the context of SPIR-V kernels).
25 //
26 // However, because of their non-power-of-2 size, these fat pointers cannot be
27 // present during translation to MIR (though this restriction may be lifted
28 // during the transition to GlobalISel). Therefore, this pass is needed in order
29 // to correctly implement these fat pointers.
30 //
31 // The resource intrinsics take the resource part (the address space 8 pointer)
32 // and the offset part (the 32-bit integer) as separate arguments. In addition,
33 // many users of these buffers manipulate the offset while leaving the resource
34 // part alone. For these reasons, we want to typically separate the resource
35 // and offset parts into separate variables, but combine them together when
36 // encountering cases where this is required, such as by inserting these values
37 // into aggretates or moving them to memory.
38 //
39 // Therefore, at a high level, `ptr addrspace(7) %x` becomes `ptr addrspace(8)
40 // %x.rsrc` and `i32 %x.off`, which will be combined into `{ptr addrspace(8),
41 // i32} %x = {%x.rsrc, %x.off}` if needed. Similarly, `vector<Nxp7>` becomes
42 // `{vector<Nxp8>, vector<Nxi32 >}` and its component parts.
43 //
44 // # Implementation
45 //
46 // This pass proceeds in three main phases:
47 //
48 // ## Rewriting loads and stores of p7
49 //
50 // The first phase is to rewrite away all loads and stors of `ptr addrspace(7)`,
51 // including aggregates containing such pointers, to ones that use `i160`. This
52 // is handled by `StoreFatPtrsAsIntsVisitor` , which visits loads, stores, and
53 // allocas and, if the loaded or stored type contains `ptr addrspace(7)`,
54 // rewrites that type to one where the p7s are replaced by i160s, copying other
55 // parts of aggregates as needed. In the case of a store, each pointer is
56 // `ptrtoint`d to i160 before storing, and load integers are `inttoptr`d back.
57 // This same transformation is applied to vectors of pointers.
58 //
59 // Such a transformation allows the later phases of the pass to not need
60 // to handle buffer fat pointers moving to and from memory, where we load
61 // have to handle the incompatibility between a `{Nxp8, Nxi32}` representation
62 // and `Nxi60` directly. Instead, that transposing action (where the vectors
63 // of resources and vectors of offsets are concatentated before being stored to
64 // memory) are handled through implementing `inttoptr` and `ptrtoint` only.
65 //
66 // Atomics operations on `ptr addrspace(7)` values are not suppported, as the
67 // hardware does not include a 160-bit atomic.
68 //
69 // ## Type remapping
70 //
71 // We use a `ValueMapper` to mangle uses of [vectors of] buffer fat pointers
72 // to the corresponding struct type, which has a resource part and an offset
73 // part.
74 //
75 // This uses a `BufferFatPtrToStructTypeMap` and a `FatPtrConstMaterializer`
76 // to, usually by way of `setType`ing values. Constants are handled here
77 // because there isn't a good way to fix them up later.
78 //
79 // This has the downside of leaving the IR in an invalid state (for example,
80 // the instruction `getelementptr {ptr addrspace(8), i32} %p, ...` will exist),
81 // but all such invalid states will be resolved by the third phase.
82 //
83 // Functions that don't take buffer fat pointers are modified in place. Those
84 // that do take such pointers have their basic blocks moved to a new function
85 // with arguments that are {ptr addrspace(8), i32} arguments and return values.
86 // This phase also records intrinsics so that they can be remangled or deleted
87 // later.
88 //
89 //
90 // ## Splitting pointer structs
91 //
92 // The meat of this pass consists of defining semantics for operations that
93 // produce or consume [vectors of] buffer fat pointers in terms of their
94 // resource and offset parts. This is accomplished throgh the `SplitPtrStructs`
95 // visitor.
96 //
97 // In the first pass through each function that is being lowered, the splitter
98 // inserts new instructions to implement the split-structures behavior, which is
99 // needed for correctness and performance. It records a list of "split users",
100 // instructions that are being replaced by operations on the resource and offset
101 // parts.
102 //
103 // Split users do not necessarily need to produce parts themselves (
104 // a `load float, ptr addrspace(7)` does not, for example), but, if they do not
105 // generate fat buffer pointers, they must RAUW in their replacement
106 // instructions during the initial visit.
107 //
108 // When these new instructions are created, they use the split parts recorded
109 // for their initial arguments in order to generate their replacements, creating
110 // a parallel set of instructions that does not refer to the original fat
111 // pointer values but instead to their resource and offset components.
112 //
113 // Instructions, such as `extractvalue`, that produce buffer fat pointers from
114 // sources that do not have split parts, have such parts generated using
115 // `extractvalue`. This is also the initial handling of PHI nodes, which
116 // are then cleaned up.
117 //
118 // ### Conditionals
119 //
120 // PHI nodes are initially given resource parts via `extractvalue`. However,
121 // this is not an efficient rewrite of such nodes, as, in most cases, the
122 // resource part in a conditional or loop remains constant throughout the loop
123 // and only the offset varies. Failing to optimize away these constant resources
124 // would cause additional registers to be sent around loops and might lead to
125 // waterfall loops being generated for buffer operations due to the
126 // "non-uniform" resource argument.
127 //
128 // Therefore, after all instructions have been visited, the pointer splitter
129 // post-processes all encountered conditionals. Given a PHI node or select,
130 // getPossibleRsrcRoots() collects all values that the resource parts of that
131 // conditional's input could come from as well as collecting all conditional
132 // instructions encountered during the search. If, after filtering out the
133 // initial node itself, the set of encountered conditionals is a subset of the
134 // potential roots and there is a single potential resource that isn't in the
135 // conditional set, that value is the only possible value the resource argument
136 // could have throughout the control flow.
137 //
138 // If that condition is met, then a PHI node can have its resource part changed
139 // to the singleton value and then be replaced by a PHI on the offsets.
140 // Otherwise, each PHI node is split into two, one for the resource part and one
141 // for the offset part, which replace the temporary `extractvalue` instructions
142 // that were added during the first pass.
143 //
144 // Similar logic applies to `select`, where
145 // `%z = select i1 %cond, %cond, ptr addrspace(7) %x, ptr addrspace(7) %y`
146 // can be split into `%z.rsrc = %x.rsrc` and
147 // `%z.off = select i1 %cond, ptr i32 %x.off, i32 %y.off`
148 // if both `%x` and `%y` have the same resource part, but two `select`
149 // operations will be needed if they do not.
150 //
151 // ### Final processing
152 //
153 // After conditionals have been cleaned up, the IR for each function is
154 // rewritten to remove all the old instructions that have been split up.
155 //
156 // Any instruction that used to produce a buffer fat pointer (and therefore now
157 // produces a resource-and-offset struct after type remapping) is
158 // replaced as follows:
159 // 1. All debug value annotations are cloned to reflect that the resource part
160 //    and offset parts are computed separately and constitute different
161 //    fragments of the underlying source language variable.
162 // 2. All uses that were themselves split are replaced by a `poison` of the
163 //    struct type, as they will themselves be erased soon. This rule, combined
164 //    with debug handling, should leave the use lists of split instructions
165 //    empty in almost all cases.
166 // 3. If a user of the original struct-valued result remains, the structure
167 //    needed for the new types to work is constructed out of the newly-defined
168 //    parts, and the original instruction is replaced by this structure
169 //    before being erased. Instructions requiring this construction include
170 //    `ret` and `insertvalue`.
171 //
172 // # Consequences
173 //
174 // This pass does not alter the CFG.
175 //
176 // Alias analysis information will become coarser, as the LLVM alias analyzer
177 // cannot handle the buffer intrinsics. Specifically, while we can determine
178 // that the following two loads do not alias:
179 // ```
180 //   %y = getelementptr i32, ptr addrspace(7) %x, i32 1
181 //   %a = load i32, ptr addrspace(7) %x
182 //   %b = load i32, ptr addrspace(7) %y
183 // ```
184 // we cannot (except through some code that runs during scheduling) determine
185 // that the rewritten loads below do not alias.
186 // ```
187 //   %y.off = add i32 %x.off, 1
188 //   %a = call @llvm.amdgcn.raw.ptr.buffer.load(ptr addrspace(8) %x.rsrc, i32
189 //     %x.off, ...)
190 //   %b = call @llvm.amdgcn.raw.ptr.buffer.load(ptr addrspace(8)
191 //     %x.rsrc, i32 %y.off, ...)
192 // ```
193 // However, existing alias information is preserved.
194 //===----------------------------------------------------------------------===//
195 
196 #include "AMDGPU.h"
197 #include "AMDGPUTargetMachine.h"
198 #include "GCNSubtarget.h"
199 #include "SIDefines.h"
200 #include "llvm/ADT/SetOperations.h"
201 #include "llvm/ADT/SmallVector.h"
202 #include "llvm/Analysis/ConstantFolding.h"
203 #include "llvm/CodeGen/TargetPassConfig.h"
204 #include "llvm/IR/AttributeMask.h"
205 #include "llvm/IR/Constants.h"
206 #include "llvm/IR/DebugInfo.h"
207 #include "llvm/IR/DerivedTypes.h"
208 #include "llvm/IR/IRBuilder.h"
209 #include "llvm/IR/InstIterator.h"
210 #include "llvm/IR/InstVisitor.h"
211 #include "llvm/IR/Instructions.h"
212 #include "llvm/IR/Intrinsics.h"
213 #include "llvm/IR/IntrinsicsAMDGPU.h"
214 #include "llvm/IR/Metadata.h"
215 #include "llvm/IR/Operator.h"
216 #include "llvm/IR/PatternMatch.h"
217 #include "llvm/InitializePasses.h"
218 #include "llvm/Pass.h"
219 #include "llvm/Support/AtomicOrdering.h"
220 #include "llvm/Support/Debug.h"
221 #include "llvm/Support/ErrorHandling.h"
222 #include "llvm/Transforms/Utils/Cloning.h"
223 #include "llvm/Transforms/Utils/Local.h"
224 #include "llvm/Transforms/Utils/ValueMapper.h"
225 
226 #define DEBUG_TYPE "amdgpu-lower-buffer-fat-pointers"
227 
228 using namespace llvm;
229 
230 static constexpr unsigned BufferOffsetWidth = 32;
231 
232 namespace {
233 /// Recursively replace instances of ptr addrspace(7) and vector<Nxptr
234 /// addrspace(7)> with some other type as defined by the relevant subclass.
235 class BufferFatPtrTypeLoweringBase : public ValueMapTypeRemapper {
236   DenseMap<Type *, Type *> Map;
237 
238   Type *remapTypeImpl(Type *Ty, SmallPtrSetImpl<StructType *> &Seen);
239 
240 protected:
241   virtual Type *remapScalar(PointerType *PT) = 0;
242   virtual Type *remapVector(VectorType *VT) = 0;
243 
244   const DataLayout &DL;
245 
246 public:
247   BufferFatPtrTypeLoweringBase(const DataLayout &DL) : DL(DL) {}
248   Type *remapType(Type *SrcTy) override;
249   void clear() { Map.clear(); }
250 };
251 
252 /// Remap ptr addrspace(7) to i160 and vector<Nxptr addrspace(7)> to
253 /// vector<Nxi60> in order to correctly handling loading/storing these values
254 /// from memory.
255 class BufferFatPtrToIntTypeMap : public BufferFatPtrTypeLoweringBase {
256   using BufferFatPtrTypeLoweringBase::BufferFatPtrTypeLoweringBase;
257 
258 protected:
259   Type *remapScalar(PointerType *PT) override { return DL.getIntPtrType(PT); }
260   Type *remapVector(VectorType *VT) override { return DL.getIntPtrType(VT); }
261 };
262 
263 /// Remap ptr addrspace(7) to {ptr addrspace(8), i32} (the resource and offset
264 /// parts of the pointer) so that we can easily rewrite operations on these
265 /// values that aren't loading them from or storing them to memory.
266 class BufferFatPtrToStructTypeMap : public BufferFatPtrTypeLoweringBase {
267   using BufferFatPtrTypeLoweringBase::BufferFatPtrTypeLoweringBase;
268 
269 protected:
270   Type *remapScalar(PointerType *PT) override;
271   Type *remapVector(VectorType *VT) override;
272 };
273 } // namespace
274 
275 // This code is adapted from the type remapper in lib/Linker/IRMover.cpp
276 Type *BufferFatPtrTypeLoweringBase::remapTypeImpl(
277     Type *Ty, SmallPtrSetImpl<StructType *> &Seen) {
278   Type **Entry = &Map[Ty];
279   if (*Entry)
280     return *Entry;
281   if (auto *PT = dyn_cast<PointerType>(Ty)) {
282     if (PT->getAddressSpace() == AMDGPUAS::BUFFER_FAT_POINTER) {
283       return *Entry = remapScalar(PT);
284     }
285   }
286   if (auto *VT = dyn_cast<VectorType>(Ty)) {
287     auto *PT = dyn_cast<PointerType>(VT->getElementType());
288     if (PT && PT->getAddressSpace() == AMDGPUAS::BUFFER_FAT_POINTER) {
289       return *Entry = remapVector(VT);
290     }
291     return *Entry = Ty;
292   }
293   // Whether the type is one that is structurally uniqued - that is, if it is
294   // not a named struct (the only kind of type where multiple structurally
295   // identical types that have a distinct `Type*`)
296   StructType *TyAsStruct = dyn_cast<StructType>(Ty);
297   bool IsUniqued = !TyAsStruct || TyAsStruct->isLiteral();
298   // Base case for ints, floats, opaque pointers, and so on, which don't
299   // require recursion.
300   if (Ty->getNumContainedTypes() == 0 && IsUniqued)
301     return *Entry = Ty;
302   if (!IsUniqued) {
303     // Create a dummy type for recursion purposes.
304     if (!Seen.insert(TyAsStruct).second) {
305       StructType *Placeholder = StructType::create(Ty->getContext());
306       return *Entry = Placeholder;
307     }
308   }
309   bool Changed = false;
310   SmallVector<Type *> ElementTypes(Ty->getNumContainedTypes(), nullptr);
311   for (unsigned int I = 0, E = Ty->getNumContainedTypes(); I < E; ++I) {
312     Type *OldElem = Ty->getContainedType(I);
313     Type *NewElem = remapTypeImpl(OldElem, Seen);
314     ElementTypes[I] = NewElem;
315     Changed |= (OldElem != NewElem);
316   }
317   // Recursive calls to remapTypeImpl() may have invalidated pointer.
318   Entry = &Map[Ty];
319   if (!Changed) {
320     return *Entry = Ty;
321   }
322   if (auto *ArrTy = dyn_cast<ArrayType>(Ty))
323     return *Entry = ArrayType::get(ElementTypes[0], ArrTy->getNumElements());
324   if (auto *FnTy = dyn_cast<FunctionType>(Ty))
325     return *Entry = FunctionType::get(ElementTypes[0],
326                                       ArrayRef(ElementTypes).slice(1),
327                                       FnTy->isVarArg());
328   if (auto *STy = dyn_cast<StructType>(Ty)) {
329     // Genuine opaque types don't have a remapping.
330     if (STy->isOpaque())
331       return *Entry = Ty;
332     bool IsPacked = STy->isPacked();
333     if (IsUniqued)
334       return *Entry = StructType::get(Ty->getContext(), ElementTypes, IsPacked);
335     SmallString<16> Name(STy->getName());
336     STy->setName("");
337     Type **RecursionEntry = &Map[Ty];
338     if (*RecursionEntry) {
339       auto *Placeholder = cast<StructType>(*RecursionEntry);
340       Placeholder->setBody(ElementTypes, IsPacked);
341       Placeholder->setName(Name);
342       return *Entry = Placeholder;
343     }
344     return *Entry = StructType::create(Ty->getContext(), ElementTypes, Name,
345                                        IsPacked);
346   }
347   llvm_unreachable("Unknown type of type that contains elements");
348 }
349 
350 Type *BufferFatPtrTypeLoweringBase::remapType(Type *SrcTy) {
351   SmallPtrSet<StructType *, 2> Visited;
352   return remapTypeImpl(SrcTy, Visited);
353 }
354 
355 Type *BufferFatPtrToStructTypeMap::remapScalar(PointerType *PT) {
356   LLVMContext &Ctx = PT->getContext();
357   return StructType::get(PointerType::get(Ctx, AMDGPUAS::BUFFER_RESOURCE),
358                          IntegerType::get(Ctx, BufferOffsetWidth));
359 }
360 
361 Type *BufferFatPtrToStructTypeMap::remapVector(VectorType *VT) {
362   ElementCount EC = VT->getElementCount();
363   LLVMContext &Ctx = VT->getContext();
364   Type *RsrcVec =
365       VectorType::get(PointerType::get(Ctx, AMDGPUAS::BUFFER_RESOURCE), EC);
366   Type *OffVec = VectorType::get(IntegerType::get(Ctx, BufferOffsetWidth), EC);
367   return StructType::get(RsrcVec, OffVec);
368 }
369 
370 static bool isBufferFatPtrOrVector(Type *Ty) {
371   if (auto *PT = dyn_cast<PointerType>(Ty->getScalarType()))
372     return PT->getAddressSpace() == AMDGPUAS::BUFFER_FAT_POINTER;
373   return false;
374 }
375 
376 // True if the type is {ptr addrspace(8), i32} or a struct containing vectors of
377 // those types. Used to quickly skip instructions we don't need to process.
378 static bool isSplitFatPtr(Type *Ty) {
379   auto *ST = dyn_cast<StructType>(Ty);
380   if (!ST)
381     return false;
382   if (!ST->isLiteral() || ST->getNumElements() != 2)
383     return false;
384   auto *MaybeRsrc =
385       dyn_cast<PointerType>(ST->getElementType(0)->getScalarType());
386   auto *MaybeOff =
387       dyn_cast<IntegerType>(ST->getElementType(1)->getScalarType());
388   return MaybeRsrc && MaybeOff &&
389          MaybeRsrc->getAddressSpace() == AMDGPUAS::BUFFER_RESOURCE &&
390          MaybeOff->getBitWidth() == BufferOffsetWidth;
391 }
392 
393 // True if the result type or any argument types are buffer fat pointers.
394 static bool isBufferFatPtrConst(Constant *C) {
395   Type *T = C->getType();
396   return isBufferFatPtrOrVector(T) || any_of(C->operands(), [](const Use &U) {
397            return isBufferFatPtrOrVector(U.get()->getType());
398          });
399 }
400 
401 namespace {
402 /// Convert [vectors of] buffer fat pointers to integers when they are read from
403 /// or stored to memory. This ensures that these pointers will have the same
404 /// memory layout as before they are lowered, even though they will no longer
405 /// have their previous layout in registers/in the program (they'll be broken
406 /// down into resource and offset parts). This has the downside of imposing
407 /// marshalling costs when reading or storing these values, but since placing
408 /// such pointers into memory is an uncommon operation at best, we feel that
409 /// this cost is acceptable for better performance in the common case.
410 class StoreFatPtrsAsIntsVisitor
411     : public InstVisitor<StoreFatPtrsAsIntsVisitor, bool> {
412   BufferFatPtrToIntTypeMap *TypeMap;
413 
414   ValueToValueMapTy ConvertedForStore;
415 
416   IRBuilder<> IRB;
417 
418   // Convert all the buffer fat pointers within the input value to inttegers
419   // so that it can be stored in memory.
420   Value *fatPtrsToInts(Value *V, Type *From, Type *To, const Twine &Name);
421   // Convert all the i160s that need to be buffer fat pointers (as specified)
422   // by the To type) into those pointers to preserve the semantics of the rest
423   // of the program.
424   Value *intsToFatPtrs(Value *V, Type *From, Type *To, const Twine &Name);
425 
426 public:
427   StoreFatPtrsAsIntsVisitor(BufferFatPtrToIntTypeMap *TypeMap, LLVMContext &Ctx)
428       : TypeMap(TypeMap), IRB(Ctx) {}
429   bool processFunction(Function &F);
430 
431   bool visitInstruction(Instruction &I) { return false; }
432   bool visitAllocaInst(AllocaInst &I);
433   bool visitLoadInst(LoadInst &LI);
434   bool visitStoreInst(StoreInst &SI);
435   bool visitGetElementPtrInst(GetElementPtrInst &I);
436 };
437 } // namespace
438 
439 Value *StoreFatPtrsAsIntsVisitor::fatPtrsToInts(Value *V, Type *From, Type *To,
440                                                 const Twine &Name) {
441   if (From == To)
442     return V;
443   ValueToValueMapTy::iterator Find = ConvertedForStore.find(V);
444   if (Find != ConvertedForStore.end())
445     return Find->second;
446   if (isBufferFatPtrOrVector(From)) {
447     Value *Cast = IRB.CreatePtrToInt(V, To, Name + ".int");
448     ConvertedForStore[V] = Cast;
449     return Cast;
450   }
451   if (From->getNumContainedTypes() == 0)
452     return V;
453   // Structs, arrays, and other compound types.
454   Value *Ret = PoisonValue::get(To);
455   if (auto *AT = dyn_cast<ArrayType>(From)) {
456     Type *FromPart = AT->getArrayElementType();
457     Type *ToPart = cast<ArrayType>(To)->getElementType();
458     for (uint64_t I = 0, E = AT->getArrayNumElements(); I < E; ++I) {
459       Value *Field = IRB.CreateExtractValue(V, I);
460       Value *NewField =
461           fatPtrsToInts(Field, FromPart, ToPart, Name + "." + Twine(I));
462       Ret = IRB.CreateInsertValue(Ret, NewField, I);
463     }
464   } else {
465     for (auto [Idx, FromPart, ToPart] :
466          enumerate(From->subtypes(), To->subtypes())) {
467       Value *Field = IRB.CreateExtractValue(V, Idx);
468       Value *NewField =
469           fatPtrsToInts(Field, FromPart, ToPart, Name + "." + Twine(Idx));
470       Ret = IRB.CreateInsertValue(Ret, NewField, Idx);
471     }
472   }
473   ConvertedForStore[V] = Ret;
474   return Ret;
475 }
476 
477 Value *StoreFatPtrsAsIntsVisitor::intsToFatPtrs(Value *V, Type *From, Type *To,
478                                                 const Twine &Name) {
479   if (From == To)
480     return V;
481   if (isBufferFatPtrOrVector(To)) {
482     Value *Cast = IRB.CreateIntToPtr(V, To, Name + ".ptr");
483     return Cast;
484   }
485   if (From->getNumContainedTypes() == 0)
486     return V;
487   // Structs, arrays, and other compound types.
488   Value *Ret = PoisonValue::get(To);
489   if (auto *AT = dyn_cast<ArrayType>(From)) {
490     Type *FromPart = AT->getArrayElementType();
491     Type *ToPart = cast<ArrayType>(To)->getElementType();
492     for (uint64_t I = 0, E = AT->getArrayNumElements(); I < E; ++I) {
493       Value *Field = IRB.CreateExtractValue(V, I);
494       Value *NewField =
495           intsToFatPtrs(Field, FromPart, ToPart, Name + "." + Twine(I));
496       Ret = IRB.CreateInsertValue(Ret, NewField, I);
497     }
498   } else {
499     for (auto [Idx, FromPart, ToPart] :
500          enumerate(From->subtypes(), To->subtypes())) {
501       Value *Field = IRB.CreateExtractValue(V, Idx);
502       Value *NewField =
503           intsToFatPtrs(Field, FromPart, ToPart, Name + "." + Twine(Idx));
504       Ret = IRB.CreateInsertValue(Ret, NewField, Idx);
505     }
506   }
507   return Ret;
508 }
509 
510 bool StoreFatPtrsAsIntsVisitor::processFunction(Function &F) {
511   bool Changed = false;
512   // The visitors will mutate GEPs and allocas, but will push loads and stores
513   // to the worklist to avoid invalidation.
514   for (Instruction &I : make_early_inc_range(instructions(F))) {
515     Changed |= visit(I);
516   }
517   ConvertedForStore.clear();
518   return Changed;
519 }
520 
521 bool StoreFatPtrsAsIntsVisitor::visitAllocaInst(AllocaInst &I) {
522   Type *Ty = I.getAllocatedType();
523   Type *NewTy = TypeMap->remapType(Ty);
524   if (Ty == NewTy)
525     return false;
526   I.setAllocatedType(NewTy);
527   return true;
528 }
529 
530 bool StoreFatPtrsAsIntsVisitor::visitGetElementPtrInst(GetElementPtrInst &I) {
531   Type *Ty = I.getSourceElementType();
532   Type *NewTy = TypeMap->remapType(Ty);
533   if (Ty == NewTy)
534     return false;
535   // We'll be rewriting the type `ptr addrspace(7)` out of existence soon, so
536   // make sure GEPs don't have different semantics with the new type.
537   I.setSourceElementType(NewTy);
538   I.setResultElementType(TypeMap->remapType(I.getResultElementType()));
539   return true;
540 }
541 
542 bool StoreFatPtrsAsIntsVisitor::visitLoadInst(LoadInst &LI) {
543   Type *Ty = LI.getType();
544   Type *IntTy = TypeMap->remapType(Ty);
545   if (Ty == IntTy)
546     return false;
547 
548   IRB.SetInsertPoint(&LI);
549   auto *NLI = cast<LoadInst>(LI.clone());
550   NLI->mutateType(IntTy);
551   NLI = IRB.Insert(NLI);
552   copyMetadataForLoad(*NLI, LI);
553   NLI->takeName(&LI);
554 
555   Value *CastBack = intsToFatPtrs(NLI, IntTy, Ty, NLI->getName());
556   LI.replaceAllUsesWith(CastBack);
557   LI.eraseFromParent();
558   return true;
559 }
560 
561 bool StoreFatPtrsAsIntsVisitor::visitStoreInst(StoreInst &SI) {
562   Value *V = SI.getValueOperand();
563   Type *Ty = V->getType();
564   Type *IntTy = TypeMap->remapType(Ty);
565   if (Ty == IntTy)
566     return false;
567 
568   IRB.SetInsertPoint(&SI);
569   Value *IntV = fatPtrsToInts(V, Ty, IntTy, V->getName());
570   for (auto *Dbg : at::getAssignmentMarkers(&SI))
571     Dbg->setValue(IntV);
572 
573   SI.setOperand(0, IntV);
574   return true;
575 }
576 
577 /// Return the ptr addrspace(8) and i32 (resource and offset parts) in a lowered
578 /// buffer fat pointer constant.
579 static std::pair<Constant *, Constant *>
580 splitLoweredFatBufferConst(Constant *C) {
581   if (auto *AZ = dyn_cast<ConstantAggregateZero>(C))
582     return std::make_pair(AZ->getStructElement(0), AZ->getStructElement(1));
583   if (auto *SC = dyn_cast<ConstantStruct>(C))
584     return std::make_pair(SC->getOperand(0), SC->getOperand(1));
585   llvm_unreachable("Conversion should've created a {p8, i32} struct");
586 }
587 
588 namespace {
589 /// Handle the remapping of ptr addrspace(7) constants.
590 class FatPtrConstMaterializer final : public ValueMaterializer {
591   BufferFatPtrToStructTypeMap *TypeMap;
592   BufferFatPtrToIntTypeMap *IntTypeMap;
593   // An internal mapper that is used to recurse into the arguments of constants.
594   // While the documentation for `ValueMapper` specifies not to use it
595   // recursively, examination of the logic in mapValue() shows that it can
596   // safely be used recursively when handling constants, like it does in its own
597   // logic.
598   ValueMapper InternalMapper;
599 
600   Constant *materializeBufferFatPtrConst(Constant *C);
601 
602   const DataLayout &DL;
603 
604 public:
605   // UnderlyingMap is the value map this materializer will be filling.
606   FatPtrConstMaterializer(BufferFatPtrToStructTypeMap *TypeMap,
607                           ValueToValueMapTy &UnderlyingMap,
608                           BufferFatPtrToIntTypeMap *IntTypeMap,
609                           const DataLayout &DL)
610       : TypeMap(TypeMap), IntTypeMap(IntTypeMap),
611         InternalMapper(UnderlyingMap, RF_None, TypeMap, this), DL(DL) {}
612   virtual ~FatPtrConstMaterializer() = default;
613 
614   Value *materialize(Value *V) override;
615 };
616 } // namespace
617 
618 Constant *FatPtrConstMaterializer::materializeBufferFatPtrConst(Constant *C) {
619   Type *SrcTy = C->getType();
620   auto *NewTy = dyn_cast<StructType>(TypeMap->remapType(SrcTy));
621   if (C->isNullValue())
622     return ConstantAggregateZero::getNullValue(NewTy);
623   if (isa<PoisonValue>(C)) {
624     return ConstantStruct::get(NewTy,
625                                {PoisonValue::get(NewTy->getElementType(0)),
626                                 PoisonValue::get(NewTy->getElementType(1))});
627   }
628   if (isa<UndefValue>(C)) {
629     return ConstantStruct::get(NewTy,
630                                {UndefValue::get(NewTy->getElementType(0)),
631                                 UndefValue::get(NewTy->getElementType(1))});
632   }
633 
634   if (isa<GlobalValue>(C))
635     report_fatal_error("Global values containing ptr addrspace(7) (buffer "
636                        "fat pointer) values are not supported");
637 
638   if (auto *VC = dyn_cast<ConstantVector>(C)) {
639     if (Constant *S = VC->getSplatValue()) {
640       Constant *NewS = InternalMapper.mapConstant(*S);
641       if (!NewS)
642         return nullptr;
643       auto [Rsrc, Off] = splitLoweredFatBufferConst(NewS);
644       auto EC = VC->getType()->getElementCount();
645       return ConstantStruct::get(NewTy, {ConstantVector::getSplat(EC, Rsrc),
646                                          ConstantVector::getSplat(EC, Off)});
647     }
648     SmallVector<Constant *> Rsrcs;
649     SmallVector<Constant *> Offs;
650     for (Value *Op : VC->operand_values()) {
651       auto *NewOp = dyn_cast_or_null<Constant>(InternalMapper.mapValue(*Op));
652       if (!NewOp)
653         return nullptr;
654       auto [Rsrc, Off] = splitLoweredFatBufferConst(NewOp);
655       Rsrcs.push_back(Rsrc);
656       Offs.push_back(Off);
657     }
658     Constant *RsrcVec = ConstantVector::get(Rsrcs);
659     Constant *OffVec = ConstantVector::get(Offs);
660     return ConstantStruct::get(NewTy, {RsrcVec, OffVec});
661   }
662 
663   // Constant expressions. This code mirrors how we fix up the equivalent
664   // instructions later.
665   auto *CE = dyn_cast<ConstantExpr>(C);
666   if (!CE)
667     return nullptr;
668   if (auto *GEPO = dyn_cast<GEPOperator>(C)) {
669     Constant *RemappedPtr =
670         InternalMapper.mapConstant(*cast<Constant>(GEPO->getPointerOperand()));
671     auto [Rsrc, Off] = splitLoweredFatBufferConst(RemappedPtr);
672     Type *OffTy = Off->getType();
673     bool InBounds = GEPO->isInBounds();
674 
675     MapVector<Value *, APInt> VariableOffs;
676     APInt NewConstOffVal = APInt::getZero(BufferOffsetWidth);
677     if (!GEPO->collectOffset(DL, BufferOffsetWidth, VariableOffs,
678                              NewConstOffVal))
679       report_fatal_error(
680           "Scalable vector or unsized struct in fat pointer GEP");
681     Constant *OffAccum = nullptr;
682     // Accumulate offsets together before adding to the base in order to
683     // preserve as many of the inbounds properties as possible.
684     for (auto [Arg, Multiple] : VariableOffs) {
685       Constant *NewArg = InternalMapper.mapConstant(*cast<Constant>(Arg));
686       NewArg = ConstantFoldIntegerCast(NewArg, OffTy, /*IsSigned=*/true, DL);
687       if (!Multiple.isOne()) {
688         if (Multiple.isPowerOf2()) {
689           NewArg = ConstantExpr::getShl(
690               NewArg,
691               CE->getIntegerValue(
692                   OffTy, APInt(BufferOffsetWidth, Multiple.logBase2())),
693               /*hasNUW=*/InBounds, /*HasNSW=*/InBounds);
694         } else {
695           NewArg =
696               ConstantExpr::getMul(NewArg, CE->getIntegerValue(OffTy, Multiple),
697                                    /*hasNUW=*/InBounds, /*hasNSW=*/InBounds);
698         }
699       }
700       if (OffAccum) {
701         OffAccum = ConstantExpr::getAdd(OffAccum, NewArg, /*hasNUW=*/InBounds,
702                                         /*hasNSW=*/InBounds);
703       } else {
704         OffAccum = NewArg;
705       }
706     }
707     Constant *NewConstOff = CE->getIntegerValue(OffTy, NewConstOffVal);
708     if (OffAccum)
709       OffAccum = ConstantExpr::getAdd(OffAccum, NewConstOff,
710                                       /*hasNUW=*/InBounds, /*hasNSW=*/InBounds);
711     else
712       OffAccum = NewConstOff;
713     bool HasNonNegativeOff = false;
714     if (auto *CI = dyn_cast<ConstantInt>(OffAccum)) {
715       HasNonNegativeOff = !CI->isNegative();
716     }
717     Constant *NewOff = ConstantExpr::getAdd(
718         Off, OffAccum, /*hasNUW=*/InBounds && HasNonNegativeOff,
719         /*hasNSW=*/false);
720     return ConstantStruct::get(NewTy, {Rsrc, NewOff});
721   }
722 
723   if (auto *PI = dyn_cast<PtrToIntOperator>(CE)) {
724     Constant *Parts =
725         InternalMapper.mapConstant(*cast<Constant>(PI->getPointerOperand()));
726     auto [Rsrc, Off] = splitLoweredFatBufferConst(Parts);
727     // Here, we take advantage of the fact that ptrtoint has a built-in
728     // zero-extension behavior.
729     unsigned FatPtrWidth =
730         DL.getPointerSizeInBits(AMDGPUAS::BUFFER_FAT_POINTER);
731     Constant *RsrcInt = CE->getPtrToInt(Rsrc, SrcTy);
732     unsigned Width = SrcTy->getScalarSizeInBits();
733     Constant *Shift =
734         CE->getIntegerValue(SrcTy, APInt(Width, BufferOffsetWidth));
735     Constant *OffCast =
736         ConstantFoldIntegerCast(Off, SrcTy, /*IsSigned=*/false, DL);
737     Constant *RsrcHi = ConstantExpr::getShl(
738         RsrcInt, Shift, Width >= FatPtrWidth, Width > FatPtrWidth);
739     // This should be an or, but those got recently removed.
740     Constant *Result = ConstantExpr::getAdd(RsrcHi, OffCast, true, true);
741     return Result;
742   }
743 
744   if (CE->getOpcode() == Instruction::IntToPtr) {
745     auto *Arg = cast<Constant>(CE->getOperand(0));
746     unsigned FatPtrWidth =
747         DL.getPointerSizeInBits(AMDGPUAS::BUFFER_FAT_POINTER);
748     unsigned RsrcPtrWidth = DL.getPointerSizeInBits(AMDGPUAS::BUFFER_RESOURCE);
749     auto *WantedTy = Arg->getType()->getWithNewBitWidth(FatPtrWidth);
750     Arg = ConstantFoldIntegerCast(Arg, WantedTy, /*IsSigned=*/false, DL);
751 
752     Constant *Shift =
753         CE->getIntegerValue(WantedTy, APInt(FatPtrWidth, BufferOffsetWidth));
754     Type *RsrcIntType = WantedTy->getWithNewBitWidth(RsrcPtrWidth);
755     Type *RsrcTy = NewTy->getElementType(0);
756     Type *OffTy = WantedTy->getWithNewBitWidth(BufferOffsetWidth);
757     Constant *RsrcInt = CE->getTrunc(
758         ConstantFoldBinaryOpOperands(Instruction::LShr, Arg, Shift, DL),
759         RsrcIntType);
760     Constant *Rsrc = CE->getIntToPtr(RsrcInt, RsrcTy);
761     Constant *Off = ConstantFoldIntegerCast(Arg, OffTy, /*isSigned=*/false, DL);
762 
763     return ConstantStruct::get(NewTy, {Rsrc, Off});
764   }
765 
766   if (auto *AC = dyn_cast<AddrSpaceCastOperator>(CE)) {
767     unsigned SrcAS = AC->getSrcAddressSpace();
768     unsigned DstAS = AC->getDestAddressSpace();
769     auto *Arg = cast<Constant>(AC->getPointerOperand());
770     auto *NewArg = InternalMapper.mapConstant(*Arg);
771     if (!NewArg)
772       return nullptr;
773     if (SrcAS == AMDGPUAS::BUFFER_FAT_POINTER &&
774         DstAS == AMDGPUAS::BUFFER_FAT_POINTER)
775       return NewArg;
776     if (SrcAS == AMDGPUAS::BUFFER_RESOURCE &&
777         DstAS == AMDGPUAS::BUFFER_FAT_POINTER) {
778       auto *NullOff = CE->getNullValue(NewTy->getElementType(1));
779       return ConstantStruct::get(NewTy, {NewArg, NullOff});
780     }
781     report_fatal_error(
782         "Unsupported address space cast for a buffer fat pointer");
783   }
784   return nullptr;
785 }
786 
787 Value *FatPtrConstMaterializer::materialize(Value *V) {
788   Constant *C = dyn_cast<Constant>(V);
789   if (!C)
790     return nullptr;
791   if (auto *GEPO = dyn_cast<GEPOperator>(C)) {
792     // As a special case, adjust GEP constants that have a ptr addrspace(7) in
793     // their source types here, since the earlier local changes didn't handle
794     // htis.
795     Type *SrcTy = GEPO->getSourceElementType();
796     Type *NewSrcTy = IntTypeMap->remapType(SrcTy);
797     if (SrcTy != NewSrcTy) {
798       SmallVector<Constant *> Ops;
799       Ops.reserve(GEPO->getNumOperands());
800       for (const Use &U : GEPO->operands())
801         Ops.push_back(cast<Constant>(U.get()));
802       auto *NewGEP = ConstantExpr::getGetElementPtr(
803           NewSrcTy, Ops[0], ArrayRef<Constant *>(Ops).slice(1),
804           GEPO->isInBounds(), GEPO->getInRange());
805       LLVM_DEBUG(dbgs() << "p7-getting GEP: " << *GEPO << " becomes " << *NewGEP
806                         << "\n");
807       Value *FurtherMap = materialize(NewGEP);
808       return FurtherMap ? FurtherMap : NewGEP;
809     }
810   }
811   // Structs and other types that happen to contain fat pointers get remapped
812   // by the mapValue() logic.
813   if (!isBufferFatPtrConst(C))
814     return nullptr;
815   return materializeBufferFatPtrConst(C);
816 }
817 
818 using PtrParts = std::pair<Value *, Value *>;
819 namespace {
820 // The visitor returns the resource and offset parts for an instruction if they
821 // can be computed, or (nullptr, nullptr) for cases that don't have a meaningful
822 // value mapping.
823 class SplitPtrStructs : public InstVisitor<SplitPtrStructs, PtrParts> {
824   ValueToValueMapTy RsrcParts;
825   ValueToValueMapTy OffParts;
826 
827   // Track instructions that have been rewritten into a user of the component
828   // parts of their ptr addrspace(7) input. Instructions that produced
829   // ptr addrspace(7) parts should **not** be RAUW'd before being added to this
830   // set, as that replacement will be handled in a post-visit step. However,
831   // instructions that yield values that aren't fat pointers (ex. ptrtoint)
832   // should RAUW themselves with new instructions that use the split parts
833   // of their arguments during processing.
834   DenseSet<Instruction *> SplitUsers;
835 
836   // Nodes that need a second look once we've computed the parts for all other
837   // instructions to see if, for example, we really need to phi on the resource
838   // part.
839   SmallVector<Instruction *> Conditionals;
840   // Temporary instructions produced while lowering conditionals that should be
841   // killed.
842   SmallVector<Instruction *> ConditionalTemps;
843 
844   // Subtarget info, needed for determining what cache control bits to set.
845   const TargetMachine *TM;
846   const GCNSubtarget *ST;
847 
848   IRBuilder<> IRB;
849 
850   // Copy metadata between instructions if applicable.
851   void copyMetadata(Value *Dest, Value *Src);
852 
853   // Get the resource and offset parts of the value V, inserting appropriate
854   // extractvalue calls if needed.
855   PtrParts getPtrParts(Value *V);
856 
857   // Given an instruction that could produce multiple resource parts (a PHI or
858   // select), collect the set of possible instructions that could have provided
859   // its resource parts  that it could have (the `Roots`) and the set of
860   // conditional instructions visited during the search (`Seen`). If, after
861   // removing the root of the search from `Seen` and `Roots`, `Seen` is a subset
862   // of `Roots` and `Roots - Seen` contains one element, the resource part of
863   // that element can replace the resource part of all other elements in `Seen`.
864   void getPossibleRsrcRoots(Instruction *I, SmallPtrSetImpl<Value *> &Roots,
865                             SmallPtrSetImpl<Value *> &Seen);
866   void processConditionals();
867 
868   // If an instruction hav been split into resource and offset parts,
869   // delete that instruction. If any of its uses have not themselves been split
870   // into parts (for example, an insertvalue), construct the structure
871   // that the type rewrites declared should be produced by the dying instruction
872   // and use that.
873   // Also, kill the temporary extractvalue operations produced by the two-stage
874   // lowering of PHIs and conditionals.
875   void killAndReplaceSplitInstructions(SmallVectorImpl<Instruction *> &Origs);
876 
877   void setAlign(CallInst *Intr, Align A, unsigned RsrcArgIdx);
878   void insertPreMemOpFence(AtomicOrdering Order, SyncScope::ID SSID);
879   void insertPostMemOpFence(AtomicOrdering Order, SyncScope::ID SSID);
880   Value *handleMemoryInst(Instruction *I, Value *Arg, Value *Ptr, Type *Ty,
881                           Align Alignment, AtomicOrdering Order,
882                           bool IsVolatile, SyncScope::ID SSID);
883 
884 public:
885   SplitPtrStructs(LLVMContext &Ctx, const TargetMachine *TM)
886       : TM(TM), ST(nullptr), IRB(Ctx) {}
887 
888   void processFunction(Function &F);
889 
890   PtrParts visitInstruction(Instruction &I);
891   PtrParts visitLoadInst(LoadInst &LI);
892   PtrParts visitStoreInst(StoreInst &SI);
893   PtrParts visitAtomicRMWInst(AtomicRMWInst &AI);
894   PtrParts visitAtomicCmpXchgInst(AtomicCmpXchgInst &AI);
895   PtrParts visitGetElementPtrInst(GetElementPtrInst &GEP);
896 
897   PtrParts visitPtrToIntInst(PtrToIntInst &PI);
898   PtrParts visitIntToPtrInst(IntToPtrInst &IP);
899   PtrParts visitAddrSpaceCastInst(AddrSpaceCastInst &I);
900   PtrParts visitICmpInst(ICmpInst &Cmp);
901   PtrParts visitFreezeInst(FreezeInst &I);
902 
903   PtrParts visitExtractElementInst(ExtractElementInst &I);
904   PtrParts visitInsertElementInst(InsertElementInst &I);
905   PtrParts visitShuffleVectorInst(ShuffleVectorInst &I);
906 
907   PtrParts visitPHINode(PHINode &PHI);
908   PtrParts visitSelectInst(SelectInst &SI);
909 
910   PtrParts visitIntrinsicInst(IntrinsicInst &II);
911 };
912 } // namespace
913 
914 void SplitPtrStructs::copyMetadata(Value *Dest, Value *Src) {
915   auto *DestI = dyn_cast<Instruction>(Dest);
916   auto *SrcI = dyn_cast<Instruction>(Src);
917 
918   if (!DestI || !SrcI)
919     return;
920 
921   DestI->copyMetadata(*SrcI);
922 }
923 
924 PtrParts SplitPtrStructs::getPtrParts(Value *V) {
925   assert(isSplitFatPtr(V->getType()) && "it's not meaningful to get the parts "
926                                         "of something that wasn't rewritten");
927   auto *RsrcEntry = &RsrcParts[V];
928   auto *OffEntry = &OffParts[V];
929   if (*RsrcEntry && *OffEntry)
930     return {*RsrcEntry, *OffEntry};
931 
932   if (auto *C = dyn_cast<Constant>(V)) {
933     auto [Rsrc, Off] = splitLoweredFatBufferConst(C);
934     return {*RsrcEntry = Rsrc, *OffEntry = Off};
935   }
936 
937   IRBuilder<>::InsertPointGuard Guard(IRB);
938   if (auto *I = dyn_cast<Instruction>(V)) {
939     LLVM_DEBUG(dbgs() << "Recursing to split parts of " << *I << "\n");
940     auto [Rsrc, Off] = visit(*I);
941     if (Rsrc && Off)
942       return {*RsrcEntry = Rsrc, *OffEntry = Off};
943     // We'll be creating the new values after the relevant instruction.
944     // This instruction generates a value and so isn't a terminator.
945     IRB.SetInsertPoint(*I->getInsertionPointAfterDef());
946     IRB.SetCurrentDebugLocation(I->getDebugLoc());
947   } else if (auto *A = dyn_cast<Argument>(V)) {
948     IRB.SetInsertPointPastAllocas(A->getParent());
949     IRB.SetCurrentDebugLocation(DebugLoc());
950   }
951   Value *Rsrc = IRB.CreateExtractValue(V, 0, V->getName() + ".rsrc");
952   Value *Off = IRB.CreateExtractValue(V, 1, V->getName() + ".off");
953   return {*RsrcEntry = Rsrc, *OffEntry = Off};
954 }
955 
956 /// Returns the instruction that defines the resource part of the value V.
957 /// Note that this is not getUnderlyingObject(), since that looks through
958 /// operations like ptrmask which might modify the resource part.
959 ///
960 /// We can limit ourselves to just looking through GEPs followed by looking
961 /// through addrspacecasts because only those two operations preserve the
962 /// resource part, and because operations on an `addrspace(8)` (which is the
963 /// legal input to this addrspacecast) would produce a different resource part.
964 static Value *rsrcPartRoot(Value *V) {
965   while (auto *GEP = dyn_cast<GEPOperator>(V))
966     V = GEP->getPointerOperand();
967   while (auto *ASC = dyn_cast<AddrSpaceCastOperator>(V))
968     V = ASC->getPointerOperand();
969   return V;
970 }
971 
972 void SplitPtrStructs::getPossibleRsrcRoots(Instruction *I,
973                                            SmallPtrSetImpl<Value *> &Roots,
974                                            SmallPtrSetImpl<Value *> &Seen) {
975   if (auto *PHI = dyn_cast<PHINode>(I)) {
976     if (!Seen.insert(I).second)
977       return;
978     for (Value *In : PHI->incoming_values()) {
979       In = rsrcPartRoot(In);
980       Roots.insert(In);
981       if (isa<PHINode, SelectInst>(In))
982         getPossibleRsrcRoots(cast<Instruction>(In), Roots, Seen);
983     }
984   } else if (auto *SI = dyn_cast<SelectInst>(I)) {
985     if (!Seen.insert(SI).second)
986       return;
987     Value *TrueVal = rsrcPartRoot(SI->getTrueValue());
988     Value *FalseVal = rsrcPartRoot(SI->getFalseValue());
989     Roots.insert(TrueVal);
990     Roots.insert(FalseVal);
991     if (isa<PHINode, SelectInst>(TrueVal))
992       getPossibleRsrcRoots(cast<Instruction>(TrueVal), Roots, Seen);
993     if (isa<PHINode, SelectInst>(FalseVal))
994       getPossibleRsrcRoots(cast<Instruction>(FalseVal), Roots, Seen);
995   } else {
996     llvm_unreachable("getPossibleRsrcParts() only works on phi and select");
997   }
998 }
999 
1000 void SplitPtrStructs::processConditionals() {
1001   SmallDenseMap<Instruction *, Value *> FoundRsrcs;
1002   SmallPtrSet<Value *, 4> Roots;
1003   SmallPtrSet<Value *, 4> Seen;
1004   for (Instruction *I : Conditionals) {
1005     // These have to exist by now because we've visited these nodes.
1006     Value *Rsrc = RsrcParts[I];
1007     Value *Off = OffParts[I];
1008     assert(Rsrc && Off && "must have visited conditionals by now");
1009 
1010     std::optional<Value *> MaybeRsrc;
1011     auto MaybeFoundRsrc = FoundRsrcs.find(I);
1012     if (MaybeFoundRsrc != FoundRsrcs.end()) {
1013       MaybeRsrc = MaybeFoundRsrc->second;
1014     } else {
1015       IRBuilder<>::InsertPointGuard Guard(IRB);
1016       Roots.clear();
1017       Seen.clear();
1018       getPossibleRsrcRoots(I, Roots, Seen);
1019       LLVM_DEBUG(dbgs() << "Processing conditional: " << *I << "\n");
1020 #ifndef NDEBUG
1021       for (Value *V : Roots)
1022         LLVM_DEBUG(dbgs() << "Root: " << *V << "\n");
1023       for (Value *V : Seen)
1024         LLVM_DEBUG(dbgs() << "Seen: " << *V << "\n");
1025 #endif
1026       // If we are our own possible root, then we shouldn't block our
1027       // replacement with a valid incoming value.
1028       Roots.erase(I);
1029       // We don't want to block the optimization for conditionals that don't
1030       // refer to themselves but did see themselves during the traversal.
1031       Seen.erase(I);
1032 
1033       if (set_is_subset(Seen, Roots)) {
1034         auto Diff = set_difference(Roots, Seen);
1035         if (Diff.size() == 1) {
1036           Value *RootVal = *Diff.begin();
1037           // Handle the case where previous loops already looked through
1038           // an addrspacecast.
1039           if (isSplitFatPtr(RootVal->getType()))
1040             MaybeRsrc = std::get<0>(getPtrParts(RootVal));
1041           else
1042             MaybeRsrc = RootVal;
1043         }
1044       }
1045     }
1046 
1047     if (auto *PHI = dyn_cast<PHINode>(I)) {
1048       Value *NewRsrc;
1049       StructType *PHITy = cast<StructType>(PHI->getType());
1050       IRB.SetInsertPoint(*PHI->getInsertionPointAfterDef());
1051       IRB.SetCurrentDebugLocation(PHI->getDebugLoc());
1052       if (MaybeRsrc) {
1053         NewRsrc = *MaybeRsrc;
1054       } else {
1055         Type *RsrcTy = PHITy->getElementType(0);
1056         auto *RsrcPHI = IRB.CreatePHI(RsrcTy, PHI->getNumIncomingValues());
1057         RsrcPHI->takeName(Rsrc);
1058         for (auto [V, BB] : llvm::zip(PHI->incoming_values(), PHI->blocks())) {
1059           Value *VRsrc = std::get<0>(getPtrParts(V));
1060           RsrcPHI->addIncoming(VRsrc, BB);
1061         }
1062         copyMetadata(RsrcPHI, PHI);
1063         NewRsrc = RsrcPHI;
1064       }
1065 
1066       Type *OffTy = PHITy->getElementType(1);
1067       auto *NewOff = IRB.CreatePHI(OffTy, PHI->getNumIncomingValues());
1068       NewOff->takeName(Off);
1069       for (auto [V, BB] : llvm::zip(PHI->incoming_values(), PHI->blocks())) {
1070         assert(OffParts.count(V) && "An offset part had to be created by now");
1071         Value *VOff = std::get<1>(getPtrParts(V));
1072         NewOff->addIncoming(VOff, BB);
1073       }
1074       copyMetadata(NewOff, PHI);
1075 
1076       // Note: We don't eraseFromParent() the temporaries because we don't want
1077       // to put the corrections maps in an inconstent state. That'll be handed
1078       // during the rest of the killing. Also, `ValueToValueMapTy` guarantees
1079       // that references in that map will be updated as well.
1080       ConditionalTemps.push_back(cast<Instruction>(Rsrc));
1081       ConditionalTemps.push_back(cast<Instruction>(Off));
1082       Rsrc->replaceAllUsesWith(NewRsrc);
1083       Off->replaceAllUsesWith(NewOff);
1084 
1085       // Save on recomputing the cycle traversals in known-root cases.
1086       if (MaybeRsrc)
1087         for (Value *V : Seen)
1088           FoundRsrcs[cast<Instruction>(V)] = NewRsrc;
1089     } else if (auto *SI = dyn_cast<SelectInst>(I)) {
1090       if (MaybeRsrc) {
1091         ConditionalTemps.push_back(cast<Instruction>(Rsrc));
1092         Rsrc->replaceAllUsesWith(*MaybeRsrc);
1093         for (Value *V : Seen)
1094           FoundRsrcs[cast<Instruction>(V)] = *MaybeRsrc;
1095       }
1096     } else {
1097       llvm_unreachable("Only PHIs and selects go in the conditionals list");
1098     }
1099   }
1100 }
1101 
1102 void SplitPtrStructs::killAndReplaceSplitInstructions(
1103     SmallVectorImpl<Instruction *> &Origs) {
1104   for (Instruction *I : ConditionalTemps)
1105     I->eraseFromParent();
1106 
1107   for (Instruction *I : Origs) {
1108     if (!SplitUsers.contains(I))
1109       continue;
1110 
1111     SmallVector<DbgValueInst *> Dbgs;
1112     findDbgValues(Dbgs, I);
1113     for (auto *Dbg : Dbgs) {
1114       IRB.SetInsertPoint(Dbg);
1115       auto &DL = I->getModule()->getDataLayout();
1116       assert(isSplitFatPtr(I->getType()) &&
1117              "We should've RAUW'd away loads, stores, etc. at this point");
1118       auto *OffDbg = cast<DbgValueInst>(Dbg->clone());
1119       copyMetadata(OffDbg, Dbg);
1120       auto [Rsrc, Off] = getPtrParts(I);
1121 
1122       int64_t RsrcSz = DL.getTypeSizeInBits(Rsrc->getType());
1123       int64_t OffSz = DL.getTypeSizeInBits(Off->getType());
1124 
1125       std::optional<DIExpression *> RsrcExpr =
1126           DIExpression::createFragmentExpression(Dbg->getExpression(), 0,
1127                                                  RsrcSz);
1128       std::optional<DIExpression *> OffExpr =
1129           DIExpression::createFragmentExpression(Dbg->getExpression(), RsrcSz,
1130                                                  OffSz);
1131       if (OffExpr) {
1132         OffDbg->setExpression(*OffExpr);
1133         OffDbg->replaceVariableLocationOp(I, Off);
1134         IRB.Insert(OffDbg);
1135       } else {
1136         OffDbg->deleteValue();
1137       }
1138       if (RsrcExpr) {
1139         Dbg->setExpression(*RsrcExpr);
1140         Dbg->replaceVariableLocationOp(I, Rsrc);
1141       } else {
1142         Dbg->replaceVariableLocationOp(I, UndefValue::get(I->getType()));
1143       }
1144     }
1145 
1146     Value *Poison = PoisonValue::get(I->getType());
1147     I->replaceUsesWithIf(Poison, [&](const Use &U) -> bool {
1148       if (const auto *UI = dyn_cast<Instruction>(U.getUser()))
1149         return SplitUsers.contains(UI);
1150       return false;
1151     });
1152 
1153     if (I->use_empty()) {
1154       I->eraseFromParent();
1155       continue;
1156     }
1157     IRB.SetInsertPoint(*I->getInsertionPointAfterDef());
1158     IRB.SetCurrentDebugLocation(I->getDebugLoc());
1159     auto [Rsrc, Off] = getPtrParts(I);
1160     Value *Struct = PoisonValue::get(I->getType());
1161     Struct = IRB.CreateInsertValue(Struct, Rsrc, 0);
1162     Struct = IRB.CreateInsertValue(Struct, Off, 1);
1163     copyMetadata(Struct, I);
1164     Struct->takeName(I);
1165     I->replaceAllUsesWith(Struct);
1166     I->eraseFromParent();
1167   }
1168 }
1169 
1170 void SplitPtrStructs::setAlign(CallInst *Intr, Align A, unsigned RsrcArgIdx) {
1171   LLVMContext &Ctx = Intr->getContext();
1172   Intr->addParamAttr(RsrcArgIdx, Attribute::getWithAlignment(Ctx, A));
1173 }
1174 
1175 void SplitPtrStructs::insertPreMemOpFence(AtomicOrdering Order,
1176                                           SyncScope::ID SSID) {
1177   switch (Order) {
1178   case AtomicOrdering::Release:
1179   case AtomicOrdering::AcquireRelease:
1180   case AtomicOrdering::SequentiallyConsistent:
1181     IRB.CreateFence(AtomicOrdering::Release, SSID);
1182     break;
1183   default:
1184     break;
1185   }
1186 }
1187 
1188 void SplitPtrStructs::insertPostMemOpFence(AtomicOrdering Order,
1189                                            SyncScope::ID SSID) {
1190   switch (Order) {
1191   case AtomicOrdering::Acquire:
1192   case AtomicOrdering::AcquireRelease:
1193   case AtomicOrdering::SequentiallyConsistent:
1194     IRB.CreateFence(AtomicOrdering::Acquire, SSID);
1195     break;
1196   default:
1197     break;
1198   }
1199 }
1200 
1201 Value *SplitPtrStructs::handleMemoryInst(Instruction *I, Value *Arg, Value *Ptr,
1202                                          Type *Ty, Align Alignment,
1203                                          AtomicOrdering Order, bool IsVolatile,
1204                                          SyncScope::ID SSID) {
1205   IRB.SetInsertPoint(I);
1206 
1207   auto [Rsrc, Off] = getPtrParts(Ptr);
1208   SmallVector<Value *, 5> Args;
1209   if (Arg)
1210     Args.push_back(Arg);
1211   Args.push_back(Rsrc);
1212   Args.push_back(Off);
1213   insertPreMemOpFence(Order, SSID);
1214   // soffset is always 0 for these cases, where we always want any offset to be
1215   // part of bounds checking and we don't know which parts of the GEPs is
1216   // uniform.
1217   Args.push_back(IRB.getInt32(0));
1218 
1219   uint32_t Aux = 0;
1220   bool IsInvariant =
1221       (isa<LoadInst>(I) && I->getMetadata(LLVMContext::MD_invariant_load));
1222   bool IsNonTemporal = I->getMetadata(LLVMContext::MD_nontemporal);
1223   // Atomic loads and stores need glc, atomic read-modify-write doesn't.
1224   bool IsOneWayAtomic =
1225       !isa<AtomicRMWInst>(I) && Order != AtomicOrdering::NotAtomic;
1226   if (IsOneWayAtomic)
1227     Aux |= AMDGPU::CPol::GLC;
1228   if (IsNonTemporal && !IsInvariant)
1229     Aux |= AMDGPU::CPol::SLC;
1230   if (isa<LoadInst>(I) && ST->getGeneration() == AMDGPUSubtarget::GFX10)
1231     Aux |= (Aux & AMDGPU::CPol::GLC ? AMDGPU::CPol::DLC : 0);
1232   if (IsVolatile)
1233     Aux |= AMDGPU::CPol::VOLATILE;
1234   Args.push_back(IRB.getInt32(Aux));
1235 
1236   Intrinsic::ID IID = Intrinsic::not_intrinsic;
1237   if (isa<LoadInst>(I))
1238     // TODO: Do we need to do something about atomic loads?
1239     IID = Intrinsic::amdgcn_raw_ptr_buffer_load;
1240   else if (isa<StoreInst>(I))
1241     IID = Intrinsic::amdgcn_raw_ptr_buffer_store;
1242   else if (auto *RMW = dyn_cast<AtomicRMWInst>(I)) {
1243     switch (RMW->getOperation()) {
1244     case AtomicRMWInst::Xchg:
1245       IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_swap;
1246       break;
1247     case AtomicRMWInst::Add:
1248       IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_add;
1249       break;
1250     case AtomicRMWInst::Sub:
1251       IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_sub;
1252       break;
1253     case AtomicRMWInst::And:
1254       IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_and;
1255       break;
1256     case AtomicRMWInst::Or:
1257       IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_or;
1258       break;
1259     case AtomicRMWInst::Xor:
1260       IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_xor;
1261       break;
1262     case AtomicRMWInst::Max:
1263       IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_smax;
1264       break;
1265     case AtomicRMWInst::Min:
1266       IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_smin;
1267       break;
1268     case AtomicRMWInst::UMax:
1269       IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_umax;
1270       break;
1271     case AtomicRMWInst::UMin:
1272       IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_umin;
1273       break;
1274     case AtomicRMWInst::FAdd:
1275       IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_fadd;
1276       break;
1277     case AtomicRMWInst::FMax:
1278       IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_fmax;
1279       break;
1280     case AtomicRMWInst::FMin:
1281       IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_fmin;
1282       break;
1283     case AtomicRMWInst::FSub: {
1284       report_fatal_error("atomic floating point subtraction not supported for "
1285                          "buffer resources and should've been expanded away");
1286       break;
1287     }
1288     case AtomicRMWInst::Nand:
1289       report_fatal_error("atomic nand not supported for buffer resources and "
1290                          "should've been expanded away");
1291       break;
1292     case AtomicRMWInst::UIncWrap:
1293     case AtomicRMWInst::UDecWrap:
1294       report_fatal_error("wrapping increment/decrement not supported for "
1295                          "buffer resources and should've ben expanded away");
1296       break;
1297     case AtomicRMWInst::BAD_BINOP:
1298       llvm_unreachable("Not sure how we got a bad binop");
1299     }
1300   }
1301 
1302   auto *Call = IRB.CreateIntrinsic(IID, Ty, Args);
1303   copyMetadata(Call, I);
1304   setAlign(Call, Alignment, Arg ? 1 : 0);
1305   Call->takeName(I);
1306 
1307   insertPostMemOpFence(Order, SSID);
1308   // The "no moving p7 directly" rewrites ensure that this load or store won't
1309   // itself need to be split into parts.
1310   SplitUsers.insert(I);
1311   I->replaceAllUsesWith(Call);
1312   return Call;
1313 }
1314 
1315 PtrParts SplitPtrStructs::visitInstruction(Instruction &I) {
1316   return {nullptr, nullptr};
1317 }
1318 
1319 PtrParts SplitPtrStructs::visitLoadInst(LoadInst &LI) {
1320   if (!isSplitFatPtr(LI.getPointerOperandType()))
1321     return {nullptr, nullptr};
1322   handleMemoryInst(&LI, nullptr, LI.getPointerOperand(), LI.getType(),
1323                    LI.getAlign(), LI.getOrdering(), LI.isVolatile(),
1324                    LI.getSyncScopeID());
1325   return {nullptr, nullptr};
1326 }
1327 
1328 PtrParts SplitPtrStructs::visitStoreInst(StoreInst &SI) {
1329   if (!isSplitFatPtr(SI.getPointerOperandType()))
1330     return {nullptr, nullptr};
1331   Value *Arg = SI.getValueOperand();
1332   handleMemoryInst(&SI, Arg, SI.getPointerOperand(), Arg->getType(),
1333                    SI.getAlign(), SI.getOrdering(), SI.isVolatile(),
1334                    SI.getSyncScopeID());
1335   return {nullptr, nullptr};
1336 }
1337 
1338 PtrParts SplitPtrStructs::visitAtomicRMWInst(AtomicRMWInst &AI) {
1339   if (!isSplitFatPtr(AI.getPointerOperand()->getType()))
1340     return {nullptr, nullptr};
1341   Value *Arg = AI.getValOperand();
1342   handleMemoryInst(&AI, Arg, AI.getPointerOperand(), Arg->getType(),
1343                    AI.getAlign(), AI.getOrdering(), AI.isVolatile(),
1344                    AI.getSyncScopeID());
1345   return {nullptr, nullptr};
1346 }
1347 
1348 // Unlike load, store, and RMW, cmpxchg needs special handling to account
1349 // for the boolean argument.
1350 PtrParts SplitPtrStructs::visitAtomicCmpXchgInst(AtomicCmpXchgInst &AI) {
1351   Value *Ptr = AI.getPointerOperand();
1352   if (!isSplitFatPtr(Ptr->getType()))
1353     return {nullptr, nullptr};
1354   IRB.SetInsertPoint(&AI);
1355 
1356   Type *Ty = AI.getNewValOperand()->getType();
1357   AtomicOrdering Order = AI.getMergedOrdering();
1358   SyncScope::ID SSID = AI.getSyncScopeID();
1359   bool IsNonTemporal = AI.getMetadata(LLVMContext::MD_nontemporal);
1360 
1361   auto [Rsrc, Off] = getPtrParts(Ptr);
1362   insertPreMemOpFence(Order, SSID);
1363 
1364   uint32_t Aux = 0;
1365   if (IsNonTemporal)
1366     Aux |= AMDGPU::CPol::SLC;
1367   if (AI.isVolatile())
1368     Aux |= AMDGPU::CPol::VOLATILE;
1369   auto *Call =
1370       IRB.CreateIntrinsic(Intrinsic::amdgcn_raw_ptr_buffer_atomic_cmpswap, Ty,
1371                           {AI.getNewValOperand(), AI.getCompareOperand(), Rsrc,
1372                            Off, IRB.getInt32(0), IRB.getInt32(Aux)});
1373   copyMetadata(Call, &AI);
1374   setAlign(Call, AI.getAlign(), 2);
1375   Call->takeName(&AI);
1376   insertPostMemOpFence(Order, SSID);
1377 
1378   Value *Res = PoisonValue::get(AI.getType());
1379   Res = IRB.CreateInsertValue(Res, Call, 0);
1380   if (!AI.isWeak()) {
1381     Value *Succeeded = IRB.CreateICmpEQ(Call, AI.getCompareOperand());
1382     Res = IRB.CreateInsertValue(Res, Succeeded, 1);
1383   }
1384   SplitUsers.insert(&AI);
1385   AI.replaceAllUsesWith(Res);
1386   return {nullptr, nullptr};
1387 }
1388 
1389 PtrParts SplitPtrStructs::visitGetElementPtrInst(GetElementPtrInst &GEP) {
1390   Value *Ptr = GEP.getPointerOperand();
1391   if (!isSplitFatPtr(Ptr->getType()))
1392     return {nullptr, nullptr};
1393   IRB.SetInsertPoint(&GEP);
1394 
1395   auto [Rsrc, Off] = getPtrParts(Ptr);
1396   Type *OffTy = Off->getType();
1397   const DataLayout &DL = GEP.getModule()->getDataLayout();
1398   bool InBounds = GEP.isInBounds();
1399 
1400   // In order to call collectOffset() and thus not have to reimplement it,
1401   // we need the GEP's pointer operand to have ptr addrspace(7) type
1402   GEP.setOperand(GEP.getPointerOperandIndex(),
1403                  PoisonValue::get(IRB.getPtrTy(AMDGPUAS::BUFFER_FAT_POINTER)));
1404   MapVector<Value *, APInt> VariableOffs;
1405   APInt ConstOffVal = APInt::getZero(BufferOffsetWidth);
1406   if (!GEP.collectOffset(DL, BufferOffsetWidth, VariableOffs, ConstOffVal))
1407     report_fatal_error("Scalable vector or unsized struct in fat pointer GEP");
1408   GEP.setOperand(GEP.getPointerOperandIndex(), Ptr);
1409   Value *OffAccum = nullptr;
1410   // Accumulate offsets together before adding to the base in order to preserve
1411   // as many of the inbounds properties as possible.
1412   for (auto [Arg, Multiple] : VariableOffs) {
1413     if (auto *OffVecTy = dyn_cast<VectorType>(OffTy))
1414       if (!Arg->getType()->isVectorTy())
1415         Arg = IRB.CreateVectorSplat(OffVecTy->getElementCount(), Arg);
1416     Arg = IRB.CreateIntCast(Arg, OffTy, /*isSigned=*/true);
1417     if (!Multiple.isOne()) {
1418       if (Multiple.isPowerOf2())
1419         Arg = IRB.CreateShl(Arg, Multiple.logBase2(), "", /*hasNUW=*/InBounds,
1420                             /*HasNSW=*/InBounds);
1421       else
1422         Arg = IRB.CreateMul(Arg, ConstantExpr::getIntegerValue(OffTy, Multiple),
1423                             "", /*hasNUW=*/InBounds, /*hasNSW=*/InBounds);
1424     }
1425     if (OffAccum)
1426       OffAccum = IRB.CreateAdd(OffAccum, Arg, "", /*hasNUW=*/InBounds,
1427                                /*hasNSW=*/InBounds);
1428     else
1429       OffAccum = Arg;
1430   }
1431   if (!ConstOffVal.isZero()) {
1432     Constant *ConstOff = ConstantExpr::getIntegerValue(OffTy, ConstOffVal);
1433     if (OffAccum)
1434       OffAccum = IRB.CreateAdd(OffAccum, ConstOff, "", /*hasNUW=*/InBounds,
1435                                /*hasNSW=*/InBounds);
1436     else
1437       OffAccum = ConstOff;
1438   }
1439 
1440   if (!OffAccum) { // Constant-zero offset
1441     SplitUsers.insert(&GEP);
1442     return {Rsrc, Off};
1443   }
1444 
1445   bool HasNonNegativeOff = false;
1446   if (auto *CI = dyn_cast<ConstantInt>(OffAccum)) {
1447     HasNonNegativeOff = !CI->isNegative();
1448   }
1449   Value *NewOff;
1450   if (PatternMatch::match(Off, PatternMatch::is_zero())) {
1451     NewOff = OffAccum;
1452   } else {
1453     NewOff = IRB.CreateAdd(Off, OffAccum, "",
1454                            /*hasNUW=*/InBounds && HasNonNegativeOff,
1455                            /*hasNSW=*/false);
1456   }
1457   copyMetadata(NewOff, &GEP);
1458   NewOff->takeName(&GEP);
1459   SplitUsers.insert(&GEP);
1460   return {Rsrc, NewOff};
1461 }
1462 
1463 PtrParts SplitPtrStructs::visitPtrToIntInst(PtrToIntInst &PI) {
1464   Value *Ptr = PI.getPointerOperand();
1465   if (!isSplitFatPtr(Ptr->getType()))
1466     return {nullptr, nullptr};
1467   IRB.SetInsertPoint(&PI);
1468 
1469   Type *ResTy = PI.getType();
1470   unsigned Width = ResTy->getScalarSizeInBits();
1471 
1472   auto [Rsrc, Off] = getPtrParts(Ptr);
1473   const DataLayout &DL = PI.getModule()->getDataLayout();
1474   unsigned FatPtrWidth = DL.getPointerSizeInBits(AMDGPUAS::BUFFER_FAT_POINTER);
1475 
1476   Value *RsrcInt;
1477   if (Width <= BufferOffsetWidth)
1478     RsrcInt = ConstantExpr::getIntegerValue(ResTy, APInt::getZero(Width));
1479   else
1480     RsrcInt = IRB.CreatePtrToInt(Rsrc, ResTy, PI.getName() + ".rsrc");
1481   copyMetadata(RsrcInt, &PI);
1482 
1483   Value *Shl = IRB.CreateShl(
1484       RsrcInt,
1485       ConstantExpr::getIntegerValue(ResTy, APInt(Width, BufferOffsetWidth)), "",
1486       Width >= FatPtrWidth, Width > FatPtrWidth);
1487   Value *OffCast =
1488       IRB.CreateIntCast(Off, ResTy, /*isSigned=*/false, PI.getName() + ".off");
1489   Value *Res = IRB.CreateOr(Shl, OffCast);
1490   Res->takeName(&PI);
1491   SplitUsers.insert(&PI);
1492   PI.replaceAllUsesWith(Res);
1493   return {nullptr, nullptr};
1494 }
1495 
1496 PtrParts SplitPtrStructs::visitIntToPtrInst(IntToPtrInst &IP) {
1497   if (!isSplitFatPtr(IP.getType()))
1498     return {nullptr, nullptr};
1499   IRB.SetInsertPoint(&IP);
1500   const DataLayout &DL = IP.getModule()->getDataLayout();
1501   unsigned RsrcPtrWidth = DL.getPointerSizeInBits(AMDGPUAS::BUFFER_RESOURCE);
1502   Value *Int = IP.getOperand(0);
1503   Type *IntTy = Int->getType();
1504   Type *RsrcIntTy = IntTy->getWithNewBitWidth(RsrcPtrWidth);
1505   unsigned Width = IntTy->getScalarSizeInBits();
1506 
1507   auto *RetTy = cast<StructType>(IP.getType());
1508   Type *RsrcTy = RetTy->getElementType(0);
1509   Type *OffTy = RetTy->getElementType(1);
1510   Value *RsrcPart = IRB.CreateLShr(
1511       Int,
1512       ConstantExpr::getIntegerValue(IntTy, APInt(Width, BufferOffsetWidth)));
1513   Value *RsrcInt = IRB.CreateIntCast(RsrcPart, RsrcIntTy, /*isSigned=*/false);
1514   Value *Rsrc = IRB.CreateIntToPtr(RsrcInt, RsrcTy, IP.getName() + ".rsrc");
1515   Value *Off =
1516       IRB.CreateIntCast(Int, OffTy, /*IsSigned=*/false, IP.getName() + ".off");
1517 
1518   copyMetadata(Rsrc, &IP);
1519   SplitUsers.insert(&IP);
1520   return {Rsrc, Off};
1521 }
1522 
1523 PtrParts SplitPtrStructs::visitAddrSpaceCastInst(AddrSpaceCastInst &I) {
1524   if (!isSplitFatPtr(I.getType()))
1525     return {nullptr, nullptr};
1526   IRB.SetInsertPoint(&I);
1527   Value *In = I.getPointerOperand();
1528   // No-op casts preserve parts
1529   if (In->getType() == I.getType()) {
1530     auto [Rsrc, Off] = getPtrParts(In);
1531     SplitUsers.insert(&I);
1532     return {Rsrc, Off};
1533   }
1534   if (I.getSrcAddressSpace() != AMDGPUAS::BUFFER_RESOURCE)
1535     report_fatal_error("Only buffer resources (addrspace 8) can be cast to "
1536                        "buffer fat pointers (addrspace 7)");
1537   Type *OffTy = cast<StructType>(I.getType())->getElementType(1);
1538   Value *ZeroOff = Constant::getNullValue(OffTy);
1539   SplitUsers.insert(&I);
1540   return {In, ZeroOff};
1541 }
1542 
1543 PtrParts SplitPtrStructs::visitICmpInst(ICmpInst &Cmp) {
1544   Value *Lhs = Cmp.getOperand(0);
1545   if (!isSplitFatPtr(Lhs->getType()))
1546     return {nullptr, nullptr};
1547   Value *Rhs = Cmp.getOperand(1);
1548   IRB.SetInsertPoint(&Cmp);
1549   ICmpInst::Predicate Pred = Cmp.getPredicate();
1550 
1551   assert((Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_NE) &&
1552          "Pointer comparison is only equal or unequal");
1553   auto [LhsRsrc, LhsOff] = getPtrParts(Lhs);
1554   auto [RhsRsrc, RhsOff] = getPtrParts(Rhs);
1555   Value *RsrcCmp =
1556       IRB.CreateICmp(Pred, LhsRsrc, RhsRsrc, Cmp.getName() + ".rsrc");
1557   copyMetadata(RsrcCmp, &Cmp);
1558   Value *OffCmp = IRB.CreateICmp(Pred, LhsOff, RhsOff, Cmp.getName() + ".off");
1559   copyMetadata(OffCmp, &Cmp);
1560 
1561   Value *Res = nullptr;
1562   if (Pred == ICmpInst::ICMP_EQ)
1563     Res = IRB.CreateAnd(RsrcCmp, OffCmp);
1564   else if (Pred == ICmpInst::ICMP_NE)
1565     Res = IRB.CreateOr(RsrcCmp, OffCmp);
1566   copyMetadata(Res, &Cmp);
1567   Res->takeName(&Cmp);
1568   SplitUsers.insert(&Cmp);
1569   Cmp.replaceAllUsesWith(Res);
1570   return {nullptr, nullptr};
1571 }
1572 
1573 PtrParts SplitPtrStructs::visitFreezeInst(FreezeInst &I) {
1574   if (!isSplitFatPtr(I.getType()))
1575     return {nullptr, nullptr};
1576   IRB.SetInsertPoint(&I);
1577   auto [Rsrc, Off] = getPtrParts(I.getOperand(0));
1578 
1579   Value *RsrcRes = IRB.CreateFreeze(Rsrc, I.getName() + ".rsrc");
1580   copyMetadata(RsrcRes, &I);
1581   Value *OffRes = IRB.CreateFreeze(Off, I.getName() + ".off");
1582   copyMetadata(OffRes, &I);
1583   SplitUsers.insert(&I);
1584   return {RsrcRes, OffRes};
1585 }
1586 
1587 PtrParts SplitPtrStructs::visitExtractElementInst(ExtractElementInst &I) {
1588   if (!isSplitFatPtr(I.getType()))
1589     return {nullptr, nullptr};
1590   IRB.SetInsertPoint(&I);
1591   Value *Vec = I.getVectorOperand();
1592   Value *Idx = I.getIndexOperand();
1593   auto [Rsrc, Off] = getPtrParts(Vec);
1594 
1595   Value *RsrcRes = IRB.CreateExtractElement(Rsrc, Idx, I.getName() + ".rsrc");
1596   copyMetadata(RsrcRes, &I);
1597   Value *OffRes = IRB.CreateExtractElement(Off, Idx, I.getName() + ".off");
1598   copyMetadata(OffRes, &I);
1599   SplitUsers.insert(&I);
1600   return {RsrcRes, OffRes};
1601 }
1602 
1603 PtrParts SplitPtrStructs::visitInsertElementInst(InsertElementInst &I) {
1604   // The mutated instructions temporarily don't return vectors, and so
1605   // we need the generic getType() here to avoid crashes.
1606   if (!isSplitFatPtr(cast<Instruction>(I).getType()))
1607     return {nullptr, nullptr};
1608   IRB.SetInsertPoint(&I);
1609   Value *Vec = I.getOperand(0);
1610   Value *Elem = I.getOperand(1);
1611   Value *Idx = I.getOperand(2);
1612   auto [VecRsrc, VecOff] = getPtrParts(Vec);
1613   auto [ElemRsrc, ElemOff] = getPtrParts(Elem);
1614 
1615   Value *RsrcRes =
1616       IRB.CreateInsertElement(VecRsrc, ElemRsrc, Idx, I.getName() + ".rsrc");
1617   copyMetadata(RsrcRes, &I);
1618   Value *OffRes =
1619       IRB.CreateInsertElement(VecOff, ElemOff, Idx, I.getName() + ".off");
1620   copyMetadata(OffRes, &I);
1621   SplitUsers.insert(&I);
1622   return {RsrcRes, OffRes};
1623 }
1624 
1625 PtrParts SplitPtrStructs::visitShuffleVectorInst(ShuffleVectorInst &I) {
1626   // Cast is needed for the same reason as insertelement's.
1627   if (!isSplitFatPtr(cast<Instruction>(I).getType()))
1628     return {nullptr, nullptr};
1629   IRB.SetInsertPoint(&I);
1630 
1631   Value *V1 = I.getOperand(0);
1632   Value *V2 = I.getOperand(1);
1633   ArrayRef<int> Mask = I.getShuffleMask();
1634   auto [V1Rsrc, V1Off] = getPtrParts(V1);
1635   auto [V2Rsrc, V2Off] = getPtrParts(V2);
1636 
1637   Value *RsrcRes =
1638       IRB.CreateShuffleVector(V1Rsrc, V2Rsrc, Mask, I.getName() + ".rsrc");
1639   copyMetadata(RsrcRes, &I);
1640   Value *OffRes =
1641       IRB.CreateShuffleVector(V1Off, V2Off, Mask, I.getName() + ".off");
1642   copyMetadata(OffRes, &I);
1643   SplitUsers.insert(&I);
1644   return {RsrcRes, OffRes};
1645 }
1646 
1647 PtrParts SplitPtrStructs::visitPHINode(PHINode &PHI) {
1648   if (!isSplitFatPtr(PHI.getType()))
1649     return {nullptr, nullptr};
1650   IRB.SetInsertPoint(*PHI.getInsertionPointAfterDef());
1651   // Phi nodes will be handled in post-processing after we've visited every
1652   // instruction. However, instead of just returning {nullptr, nullptr},
1653   // we explicitly create the temporary extractvalue operations that are our
1654   // temporary results so that they end up at the beginning of the block with
1655   // the PHIs.
1656   Value *TmpRsrc = IRB.CreateExtractValue(&PHI, 0, PHI.getName() + ".rsrc");
1657   Value *TmpOff = IRB.CreateExtractValue(&PHI, 1, PHI.getName() + ".off");
1658   Conditionals.push_back(&PHI);
1659   SplitUsers.insert(&PHI);
1660   return {TmpRsrc, TmpOff};
1661 }
1662 
1663 PtrParts SplitPtrStructs::visitSelectInst(SelectInst &SI) {
1664   if (!isSplitFatPtr(SI.getType()))
1665     return {nullptr, nullptr};
1666   IRB.SetInsertPoint(&SI);
1667 
1668   Value *Cond = SI.getCondition();
1669   Value *True = SI.getTrueValue();
1670   Value *False = SI.getFalseValue();
1671   auto [TrueRsrc, TrueOff] = getPtrParts(True);
1672   auto [FalseRsrc, FalseOff] = getPtrParts(False);
1673 
1674   Value *RsrcRes =
1675       IRB.CreateSelect(Cond, TrueRsrc, FalseRsrc, SI.getName() + ".rsrc", &SI);
1676   copyMetadata(RsrcRes, &SI);
1677   Conditionals.push_back(&SI);
1678   Value *OffRes =
1679       IRB.CreateSelect(Cond, TrueOff, FalseOff, SI.getName() + ".off", &SI);
1680   copyMetadata(OffRes, &SI);
1681   SplitUsers.insert(&SI);
1682   return {RsrcRes, OffRes};
1683 }
1684 
1685 /// Returns true if this intrinsic needs to be removed when it is
1686 /// applied to `ptr addrspace(7)` values. Calls to these intrinsics are
1687 /// rewritten into calls to versions of that intrinsic on the resource
1688 /// descriptor.
1689 static bool isRemovablePointerIntrinsic(Intrinsic::ID IID) {
1690   switch (IID) {
1691   default:
1692     return false;
1693   case Intrinsic::ptrmask:
1694   case Intrinsic::invariant_start:
1695   case Intrinsic::invariant_end:
1696   case Intrinsic::launder_invariant_group:
1697   case Intrinsic::strip_invariant_group:
1698     return true;
1699   }
1700 }
1701 
1702 PtrParts SplitPtrStructs::visitIntrinsicInst(IntrinsicInst &I) {
1703   Intrinsic::ID IID = I.getIntrinsicID();
1704   switch (IID) {
1705   default:
1706     break;
1707   case Intrinsic::ptrmask: {
1708     Value *Ptr = I.getArgOperand(0);
1709     if (!isSplitFatPtr(Ptr->getType()))
1710       return {nullptr, nullptr};
1711     Value *Mask = I.getArgOperand(1);
1712     IRB.SetInsertPoint(&I);
1713     auto [Rsrc, Off] = getPtrParts(Ptr);
1714     if (Mask->getType() != Off->getType())
1715       report_fatal_error("offset width is not equal to index width of fat "
1716                          "pointer (data layout not set up correctly?)");
1717     Value *OffRes = IRB.CreateAnd(Off, Mask, I.getName() + ".off");
1718     copyMetadata(OffRes, &I);
1719     SplitUsers.insert(&I);
1720     return {Rsrc, OffRes};
1721   }
1722   // Pointer annotation intrinsics that, given their object-wide nature
1723   // operate on the resource part.
1724   case Intrinsic::invariant_start: {
1725     Value *Ptr = I.getArgOperand(1);
1726     if (!isSplitFatPtr(Ptr->getType()))
1727       return {nullptr, nullptr};
1728     IRB.SetInsertPoint(&I);
1729     auto [Rsrc, Off] = getPtrParts(Ptr);
1730     Type *NewTy = PointerType::get(I.getContext(), AMDGPUAS::BUFFER_RESOURCE);
1731     auto *NewRsrc = IRB.CreateIntrinsic(IID, {NewTy}, {I.getOperand(0), Rsrc});
1732     copyMetadata(NewRsrc, &I);
1733     NewRsrc->takeName(&I);
1734     SplitUsers.insert(&I);
1735     I.replaceAllUsesWith(NewRsrc);
1736     return {nullptr, nullptr};
1737   }
1738   case Intrinsic::invariant_end: {
1739     Value *RealPtr = I.getArgOperand(2);
1740     if (!isSplitFatPtr(RealPtr->getType()))
1741       return {nullptr, nullptr};
1742     IRB.SetInsertPoint(&I);
1743     Value *RealRsrc = getPtrParts(RealPtr).first;
1744     Value *InvPtr = I.getArgOperand(0);
1745     Value *Size = I.getArgOperand(1);
1746     Value *NewRsrc = IRB.CreateIntrinsic(IID, {RealRsrc->getType()},
1747                                          {InvPtr, Size, RealRsrc});
1748     copyMetadata(NewRsrc, &I);
1749     NewRsrc->takeName(&I);
1750     SplitUsers.insert(&I);
1751     I.replaceAllUsesWith(NewRsrc);
1752     return {nullptr, nullptr};
1753   }
1754   case Intrinsic::launder_invariant_group:
1755   case Intrinsic::strip_invariant_group: {
1756     Value *Ptr = I.getArgOperand(0);
1757     if (!isSplitFatPtr(Ptr->getType()))
1758       return {nullptr, nullptr};
1759     IRB.SetInsertPoint(&I);
1760     auto [Rsrc, Off] = getPtrParts(Ptr);
1761     Value *NewRsrc = IRB.CreateIntrinsic(IID, {Rsrc->getType()}, {Rsrc});
1762     copyMetadata(NewRsrc, &I);
1763     NewRsrc->takeName(&I);
1764     SplitUsers.insert(&I);
1765     return {NewRsrc, Off};
1766   }
1767   }
1768   return {nullptr, nullptr};
1769 }
1770 
1771 void SplitPtrStructs::processFunction(Function &F) {
1772   ST = &TM->getSubtarget<GCNSubtarget>(F);
1773   SmallVector<Instruction *, 0> Originals;
1774   LLVM_DEBUG(dbgs() << "Splitting pointer structs in function: " << F.getName()
1775                     << "\n");
1776   for (Instruction &I : instructions(F))
1777     Originals.push_back(&I);
1778   for (Instruction *I : Originals) {
1779     auto [Rsrc, Off] = visit(I);
1780     assert((Rsrc && Off) ||
1781            (!Rsrc && !Off) && "Can't have a resource but no offset");
1782     if (Rsrc)
1783       RsrcParts[I] = Rsrc;
1784     if (Off)
1785       OffParts[I] = Off;
1786   }
1787   processConditionals();
1788   killAndReplaceSplitInstructions(Originals);
1789 
1790   // Clean up after ourselves to save on memory.
1791   RsrcParts.clear();
1792   OffParts.clear();
1793   SplitUsers.clear();
1794   Conditionals.clear();
1795   ConditionalTemps.clear();
1796 }
1797 
1798 namespace {
1799 class AMDGPULowerBufferFatPointers : public ModulePass {
1800 public:
1801   static char ID;
1802 
1803   AMDGPULowerBufferFatPointers() : ModulePass(ID) {
1804     initializeAMDGPULowerBufferFatPointersPass(
1805         *PassRegistry::getPassRegistry());
1806   }
1807 
1808   bool run(Module &M, const TargetMachine &TM);
1809   bool runOnModule(Module &M) override;
1810 
1811   void getAnalysisUsage(AnalysisUsage &AU) const override;
1812 };
1813 } // namespace
1814 
1815 /// Returns true if there are values that have a buffer fat pointer in them,
1816 /// which means we'll need to perform rewrites on this function. As a side
1817 /// effect, this will populate the type remapping cache.
1818 static bool containsBufferFatPointers(const Function &F,
1819                                       BufferFatPtrToStructTypeMap *TypeMap) {
1820   bool HasFatPointers = false;
1821   for (const BasicBlock &BB : F) {
1822     for (const Instruction &I : BB) {
1823       HasFatPointers |= (I.getType() != TypeMap->remapType(I.getType()));
1824       for (const Use &U : I.operands())
1825         if (auto *C = dyn_cast<Constant>(U.get()))
1826           HasFatPointers |= isBufferFatPtrConst(C);
1827     }
1828   }
1829   return HasFatPointers;
1830 }
1831 
1832 static bool hasFatPointerInterface(const Function &F,
1833                                    BufferFatPtrToStructTypeMap *TypeMap) {
1834   Type *Ty = F.getFunctionType();
1835   return Ty != TypeMap->remapType(Ty);
1836 }
1837 
1838 /// Move the body of `OldF` into a new function, returning it.
1839 static Function *moveFunctionAdaptingType(Function *OldF, FunctionType *NewTy,
1840                                           ValueToValueMapTy &CloneMap) {
1841   bool IsIntrinsic = OldF->isIntrinsic();
1842   Function *NewF =
1843       Function::Create(NewTy, OldF->getLinkage(), OldF->getAddressSpace());
1844   NewF->IsNewDbgInfoFormat = OldF->IsNewDbgInfoFormat;
1845   NewF->copyAttributesFrom(OldF);
1846   NewF->copyMetadata(OldF, 0);
1847   NewF->takeName(OldF);
1848   NewF->updateAfterNameChange();
1849   NewF->setDLLStorageClass(OldF->getDLLStorageClass());
1850   OldF->getParent()->getFunctionList().insertAfter(OldF->getIterator(), NewF);
1851 
1852   while (!OldF->empty()) {
1853     BasicBlock *BB = &OldF->front();
1854     BB->removeFromParent();
1855     BB->insertInto(NewF);
1856     CloneMap[BB] = BB;
1857     for (Instruction &I : *BB) {
1858       CloneMap[&I] = &I;
1859     }
1860   }
1861 
1862   AttributeMask PtrOnlyAttrs;
1863   for (auto K :
1864        {Attribute::Dereferenceable, Attribute::DereferenceableOrNull,
1865         Attribute::NoAlias, Attribute::NoCapture, Attribute::NoFree,
1866         Attribute::NonNull, Attribute::NullPointerIsValid, Attribute::ReadNone,
1867         Attribute::ReadOnly, Attribute::WriteOnly}) {
1868     PtrOnlyAttrs.addAttribute(K);
1869   }
1870   SmallVector<AttributeSet> ArgAttrs;
1871   AttributeList OldAttrs = OldF->getAttributes();
1872 
1873   for (auto [I, OldArg, NewArg] : enumerate(OldF->args(), NewF->args())) {
1874     CloneMap[&NewArg] = &OldArg;
1875     NewArg.takeName(&OldArg);
1876     Type *OldArgTy = OldArg.getType(), *NewArgTy = NewArg.getType();
1877     // Temporarily mutate type of `NewArg` to allow RAUW to work.
1878     NewArg.mutateType(OldArgTy);
1879     OldArg.replaceAllUsesWith(&NewArg);
1880     NewArg.mutateType(NewArgTy);
1881 
1882     AttributeSet ArgAttr = OldAttrs.getParamAttrs(I);
1883     // Intrinsics get their attributes fixed later.
1884     if (OldArgTy != NewArgTy && !IsIntrinsic)
1885       ArgAttr = ArgAttr.removeAttributes(NewF->getContext(), PtrOnlyAttrs);
1886     ArgAttrs.push_back(ArgAttr);
1887   }
1888   AttributeSet RetAttrs = OldAttrs.getRetAttrs();
1889   if (OldF->getReturnType() != NewF->getReturnType() && !IsIntrinsic)
1890     RetAttrs = RetAttrs.removeAttributes(NewF->getContext(), PtrOnlyAttrs);
1891   NewF->setAttributes(AttributeList::get(
1892       NewF->getContext(), OldAttrs.getFnAttrs(), RetAttrs, ArgAttrs));
1893   return NewF;
1894 }
1895 
1896 static void makeCloneInPraceMap(Function *F, ValueToValueMapTy &CloneMap) {
1897   for (Argument &A : F->args())
1898     CloneMap[&A] = &A;
1899   for (BasicBlock &BB : *F) {
1900     CloneMap[&BB] = &BB;
1901     for (Instruction &I : BB)
1902       CloneMap[&I] = &I;
1903   }
1904 }
1905 
1906 bool AMDGPULowerBufferFatPointers::run(Module &M, const TargetMachine &TM) {
1907   bool Changed = false;
1908   const DataLayout &DL = M.getDataLayout();
1909   // Record the functions which need to be remapped.
1910   // The second element of the pair indicates whether the function has to have
1911   // its arguments or return types adjusted.
1912   SmallVector<std::pair<Function *, bool>> NeedsRemap;
1913 
1914   BufferFatPtrToStructTypeMap StructTM(DL);
1915   BufferFatPtrToIntTypeMap IntTM(DL);
1916   for (const GlobalVariable &GV : M.globals()) {
1917     if (GV.getAddressSpace() == AMDGPUAS::BUFFER_FAT_POINTER)
1918       report_fatal_error("Global variables with a buffer fat pointer address "
1919                          "space (7) are not supported");
1920     Type *VT = GV.getValueType();
1921     if (VT != StructTM.remapType(VT))
1922       report_fatal_error("Global variables that contain buffer fat pointers "
1923                          "(address space 7 pointers) are unsupported. Use "
1924                          "buffer resource pointers (address space 8) instead.");
1925   }
1926 
1927   StoreFatPtrsAsIntsVisitor MemOpsRewrite(&IntTM, M.getContext());
1928   for (Function &F : M.functions()) {
1929     bool InterfaceChange = hasFatPointerInterface(F, &StructTM);
1930     bool BodyChanges = containsBufferFatPointers(F, &StructTM);
1931     Changed |= MemOpsRewrite.processFunction(F);
1932     if (InterfaceChange || BodyChanges)
1933       NeedsRemap.push_back(std::make_pair(&F, InterfaceChange));
1934   }
1935   if (NeedsRemap.empty())
1936     return Changed;
1937 
1938   SmallVector<Function *> NeedsPostProcess;
1939   SmallVector<Function *> Intrinsics;
1940   // Keep one big map so as to memoize constants across functions.
1941   ValueToValueMapTy CloneMap;
1942   FatPtrConstMaterializer Materializer(&StructTM, CloneMap, &IntTM, DL);
1943 
1944   ValueMapper LowerInFuncs(CloneMap, RF_None, &StructTM, &Materializer);
1945   for (auto [F, InterfaceChange] : NeedsRemap) {
1946     Function *NewF = F;
1947     if (InterfaceChange)
1948       NewF = moveFunctionAdaptingType(
1949           F, cast<FunctionType>(StructTM.remapType(F->getFunctionType())),
1950           CloneMap);
1951     else
1952       makeCloneInPraceMap(F, CloneMap);
1953     LowerInFuncs.remapFunction(*NewF);
1954     if (NewF->isIntrinsic())
1955       Intrinsics.push_back(NewF);
1956     else
1957       NeedsPostProcess.push_back(NewF);
1958     if (InterfaceChange) {
1959       F->replaceAllUsesWith(NewF);
1960       F->eraseFromParent();
1961     }
1962     Changed = true;
1963   }
1964   StructTM.clear();
1965   IntTM.clear();
1966   CloneMap.clear();
1967 
1968   SplitPtrStructs Splitter(M.getContext(), &TM);
1969   for (Function *F : NeedsPostProcess)
1970     Splitter.processFunction(*F);
1971   for (Function *F : Intrinsics) {
1972     if (isRemovablePointerIntrinsic(F->getIntrinsicID())) {
1973       F->eraseFromParent();
1974     } else {
1975       std::optional<Function *> NewF = Intrinsic::remangleIntrinsicFunction(F);
1976       if (NewF)
1977         F->replaceAllUsesWith(*NewF);
1978     }
1979   }
1980   return Changed;
1981 }
1982 
1983 bool AMDGPULowerBufferFatPointers::runOnModule(Module &M) {
1984   TargetPassConfig &TPC = getAnalysis<TargetPassConfig>();
1985   const TargetMachine &TM = TPC.getTM<TargetMachine>();
1986   return run(M, TM);
1987 }
1988 
1989 char AMDGPULowerBufferFatPointers::ID = 0;
1990 
1991 char &llvm::AMDGPULowerBufferFatPointersID = AMDGPULowerBufferFatPointers::ID;
1992 
1993 void AMDGPULowerBufferFatPointers::getAnalysisUsage(AnalysisUsage &AU) const {
1994   AU.addRequired<TargetPassConfig>();
1995 }
1996 
1997 #define PASS_DESC "Lower buffer fat pointer operations to buffer resources"
1998 INITIALIZE_PASS_BEGIN(AMDGPULowerBufferFatPointers, DEBUG_TYPE, PASS_DESC,
1999                       false, false)
2000 INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
2001 INITIALIZE_PASS_END(AMDGPULowerBufferFatPointers, DEBUG_TYPE, PASS_DESC, false,
2002                     false)
2003 #undef PASS_DESC
2004 
2005 ModulePass *llvm::createAMDGPULowerBufferFatPointersPass() {
2006   return new AMDGPULowerBufferFatPointers();
2007 }
2008 
2009 PreservedAnalyses
2010 AMDGPULowerBufferFatPointersPass::run(Module &M, ModuleAnalysisManager &MA) {
2011   return AMDGPULowerBufferFatPointers().run(M, TM) ? PreservedAnalyses::none()
2012                                                    : PreservedAnalyses::all();
2013 }
2014