xref: /freebsd-src/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1*0fca6ea1SDimitry Andric //===-- AMDGPULowerBufferFatPointers.cpp ---------------------------=//
2*0fca6ea1SDimitry Andric //
3*0fca6ea1SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*0fca6ea1SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5*0fca6ea1SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*0fca6ea1SDimitry Andric //
7*0fca6ea1SDimitry Andric //===----------------------------------------------------------------------===//
8*0fca6ea1SDimitry Andric //
9*0fca6ea1SDimitry Andric // This pass lowers operations on buffer fat pointers (addrspace 7) to
10*0fca6ea1SDimitry Andric // operations on buffer resources (addrspace 8) and is needed for correct
11*0fca6ea1SDimitry Andric // codegen.
12*0fca6ea1SDimitry Andric //
13*0fca6ea1SDimitry Andric // # Background
14*0fca6ea1SDimitry Andric //
15*0fca6ea1SDimitry Andric // Address space 7 (the buffer fat pointer) is a 160-bit pointer that consists
16*0fca6ea1SDimitry Andric // of a 128-bit buffer descriptor and a 32-bit offset into that descriptor.
17*0fca6ea1SDimitry Andric // The buffer resource part needs to be it needs to be a "raw" buffer resource
18*0fca6ea1SDimitry Andric // (it must have a stride of 0 and bounds checks must be in raw buffer mode
19*0fca6ea1SDimitry Andric // or disabled).
20*0fca6ea1SDimitry Andric //
21*0fca6ea1SDimitry Andric // When these requirements are met, a buffer resource can be treated as a
22*0fca6ea1SDimitry Andric // typical (though quite wide) pointer that follows typical LLVM pointer
23*0fca6ea1SDimitry Andric // semantics. This allows the frontend to reason about such buffers (which are
24*0fca6ea1SDimitry Andric // often encountered in the context of SPIR-V kernels).
25*0fca6ea1SDimitry Andric //
26*0fca6ea1SDimitry Andric // However, because of their non-power-of-2 size, these fat pointers cannot be
27*0fca6ea1SDimitry Andric // present during translation to MIR (though this restriction may be lifted
28*0fca6ea1SDimitry Andric // during the transition to GlobalISel). Therefore, this pass is needed in order
29*0fca6ea1SDimitry Andric // to correctly implement these fat pointers.
30*0fca6ea1SDimitry Andric //
31*0fca6ea1SDimitry Andric // The resource intrinsics take the resource part (the address space 8 pointer)
32*0fca6ea1SDimitry Andric // and the offset part (the 32-bit integer) as separate arguments. In addition,
33*0fca6ea1SDimitry Andric // many users of these buffers manipulate the offset while leaving the resource
34*0fca6ea1SDimitry Andric // part alone. For these reasons, we want to typically separate the resource
35*0fca6ea1SDimitry Andric // and offset parts into separate variables, but combine them together when
36*0fca6ea1SDimitry Andric // encountering cases where this is required, such as by inserting these values
37*0fca6ea1SDimitry Andric // into aggretates or moving them to memory.
38*0fca6ea1SDimitry Andric //
39*0fca6ea1SDimitry Andric // Therefore, at a high level, `ptr addrspace(7) %x` becomes `ptr addrspace(8)
40*0fca6ea1SDimitry Andric // %x.rsrc` and `i32 %x.off`, which will be combined into `{ptr addrspace(8),
41*0fca6ea1SDimitry Andric // i32} %x = {%x.rsrc, %x.off}` if needed. Similarly, `vector<Nxp7>` becomes
42*0fca6ea1SDimitry Andric // `{vector<Nxp8>, vector<Nxi32 >}` and its component parts.
43*0fca6ea1SDimitry Andric //
44*0fca6ea1SDimitry Andric // # Implementation
45*0fca6ea1SDimitry Andric //
46*0fca6ea1SDimitry Andric // This pass proceeds in three main phases:
47*0fca6ea1SDimitry Andric //
48*0fca6ea1SDimitry Andric // ## Rewriting loads and stores of p7
49*0fca6ea1SDimitry Andric //
50*0fca6ea1SDimitry Andric // The first phase is to rewrite away all loads and stors of `ptr addrspace(7)`,
51*0fca6ea1SDimitry Andric // including aggregates containing such pointers, to ones that use `i160`. This
52*0fca6ea1SDimitry Andric // is handled by `StoreFatPtrsAsIntsVisitor` , which visits loads, stores, and
53*0fca6ea1SDimitry Andric // allocas and, if the loaded or stored type contains `ptr addrspace(7)`,
54*0fca6ea1SDimitry Andric // rewrites that type to one where the p7s are replaced by i160s, copying other
55*0fca6ea1SDimitry Andric // parts of aggregates as needed. In the case of a store, each pointer is
56*0fca6ea1SDimitry Andric // `ptrtoint`d to i160 before storing, and load integers are `inttoptr`d back.
57*0fca6ea1SDimitry Andric // This same transformation is applied to vectors of pointers.
58*0fca6ea1SDimitry Andric //
59*0fca6ea1SDimitry Andric // Such a transformation allows the later phases of the pass to not need
60*0fca6ea1SDimitry Andric // to handle buffer fat pointers moving to and from memory, where we load
61*0fca6ea1SDimitry Andric // have to handle the incompatibility between a `{Nxp8, Nxi32}` representation
62*0fca6ea1SDimitry Andric // and `Nxi60` directly. Instead, that transposing action (where the vectors
63*0fca6ea1SDimitry Andric // of resources and vectors of offsets are concatentated before being stored to
64*0fca6ea1SDimitry Andric // memory) are handled through implementing `inttoptr` and `ptrtoint` only.
65*0fca6ea1SDimitry Andric //
66*0fca6ea1SDimitry Andric // Atomics operations on `ptr addrspace(7)` values are not suppported, as the
67*0fca6ea1SDimitry Andric // hardware does not include a 160-bit atomic.
68*0fca6ea1SDimitry Andric //
69*0fca6ea1SDimitry Andric // ## Type remapping
70*0fca6ea1SDimitry Andric //
71*0fca6ea1SDimitry Andric // We use a `ValueMapper` to mangle uses of [vectors of] buffer fat pointers
72*0fca6ea1SDimitry Andric // to the corresponding struct type, which has a resource part and an offset
73*0fca6ea1SDimitry Andric // part.
74*0fca6ea1SDimitry Andric //
75*0fca6ea1SDimitry Andric // This uses a `BufferFatPtrToStructTypeMap` and a `FatPtrConstMaterializer`
76*0fca6ea1SDimitry Andric // to, usually by way of `setType`ing values. Constants are handled here
77*0fca6ea1SDimitry Andric // because there isn't a good way to fix them up later.
78*0fca6ea1SDimitry Andric //
79*0fca6ea1SDimitry Andric // This has the downside of leaving the IR in an invalid state (for example,
80*0fca6ea1SDimitry Andric // the instruction `getelementptr {ptr addrspace(8), i32} %p, ...` will exist),
81*0fca6ea1SDimitry Andric // but all such invalid states will be resolved by the third phase.
82*0fca6ea1SDimitry Andric //
83*0fca6ea1SDimitry Andric // Functions that don't take buffer fat pointers are modified in place. Those
84*0fca6ea1SDimitry Andric // that do take such pointers have their basic blocks moved to a new function
85*0fca6ea1SDimitry Andric // with arguments that are {ptr addrspace(8), i32} arguments and return values.
86*0fca6ea1SDimitry Andric // This phase also records intrinsics so that they can be remangled or deleted
87*0fca6ea1SDimitry Andric // later.
88*0fca6ea1SDimitry Andric //
89*0fca6ea1SDimitry Andric //
90*0fca6ea1SDimitry Andric // ## Splitting pointer structs
91*0fca6ea1SDimitry Andric //
92*0fca6ea1SDimitry Andric // The meat of this pass consists of defining semantics for operations that
93*0fca6ea1SDimitry Andric // produce or consume [vectors of] buffer fat pointers in terms of their
94*0fca6ea1SDimitry Andric // resource and offset parts. This is accomplished throgh the `SplitPtrStructs`
95*0fca6ea1SDimitry Andric // visitor.
96*0fca6ea1SDimitry Andric //
97*0fca6ea1SDimitry Andric // In the first pass through each function that is being lowered, the splitter
98*0fca6ea1SDimitry Andric // inserts new instructions to implement the split-structures behavior, which is
99*0fca6ea1SDimitry Andric // needed for correctness and performance. It records a list of "split users",
100*0fca6ea1SDimitry Andric // instructions that are being replaced by operations on the resource and offset
101*0fca6ea1SDimitry Andric // parts.
102*0fca6ea1SDimitry Andric //
103*0fca6ea1SDimitry Andric // Split users do not necessarily need to produce parts themselves (
104*0fca6ea1SDimitry Andric // a `load float, ptr addrspace(7)` does not, for example), but, if they do not
105*0fca6ea1SDimitry Andric // generate fat buffer pointers, they must RAUW in their replacement
106*0fca6ea1SDimitry Andric // instructions during the initial visit.
107*0fca6ea1SDimitry Andric //
108*0fca6ea1SDimitry Andric // When these new instructions are created, they use the split parts recorded
109*0fca6ea1SDimitry Andric // for their initial arguments in order to generate their replacements, creating
110*0fca6ea1SDimitry Andric // a parallel set of instructions that does not refer to the original fat
111*0fca6ea1SDimitry Andric // pointer values but instead to their resource and offset components.
112*0fca6ea1SDimitry Andric //
113*0fca6ea1SDimitry Andric // Instructions, such as `extractvalue`, that produce buffer fat pointers from
114*0fca6ea1SDimitry Andric // sources that do not have split parts, have such parts generated using
115*0fca6ea1SDimitry Andric // `extractvalue`. This is also the initial handling of PHI nodes, which
116*0fca6ea1SDimitry Andric // are then cleaned up.
117*0fca6ea1SDimitry Andric //
118*0fca6ea1SDimitry Andric // ### Conditionals
119*0fca6ea1SDimitry Andric //
120*0fca6ea1SDimitry Andric // PHI nodes are initially given resource parts via `extractvalue`. However,
121*0fca6ea1SDimitry Andric // this is not an efficient rewrite of such nodes, as, in most cases, the
122*0fca6ea1SDimitry Andric // resource part in a conditional or loop remains constant throughout the loop
123*0fca6ea1SDimitry Andric // and only the offset varies. Failing to optimize away these constant resources
124*0fca6ea1SDimitry Andric // would cause additional registers to be sent around loops and might lead to
125*0fca6ea1SDimitry Andric // waterfall loops being generated for buffer operations due to the
126*0fca6ea1SDimitry Andric // "non-uniform" resource argument.
127*0fca6ea1SDimitry Andric //
128*0fca6ea1SDimitry Andric // Therefore, after all instructions have been visited, the pointer splitter
129*0fca6ea1SDimitry Andric // post-processes all encountered conditionals. Given a PHI node or select,
130*0fca6ea1SDimitry Andric // getPossibleRsrcRoots() collects all values that the resource parts of that
131*0fca6ea1SDimitry Andric // conditional's input could come from as well as collecting all conditional
132*0fca6ea1SDimitry Andric // instructions encountered during the search. If, after filtering out the
133*0fca6ea1SDimitry Andric // initial node itself, the set of encountered conditionals is a subset of the
134*0fca6ea1SDimitry Andric // potential roots and there is a single potential resource that isn't in the
135*0fca6ea1SDimitry Andric // conditional set, that value is the only possible value the resource argument
136*0fca6ea1SDimitry Andric // could have throughout the control flow.
137*0fca6ea1SDimitry Andric //
138*0fca6ea1SDimitry Andric // If that condition is met, then a PHI node can have its resource part changed
139*0fca6ea1SDimitry Andric // to the singleton value and then be replaced by a PHI on the offsets.
140*0fca6ea1SDimitry Andric // Otherwise, each PHI node is split into two, one for the resource part and one
141*0fca6ea1SDimitry Andric // for the offset part, which replace the temporary `extractvalue` instructions
142*0fca6ea1SDimitry Andric // that were added during the first pass.
143*0fca6ea1SDimitry Andric //
144*0fca6ea1SDimitry Andric // Similar logic applies to `select`, where
145*0fca6ea1SDimitry Andric // `%z = select i1 %cond, %cond, ptr addrspace(7) %x, ptr addrspace(7) %y`
146*0fca6ea1SDimitry Andric // can be split into `%z.rsrc = %x.rsrc` and
147*0fca6ea1SDimitry Andric // `%z.off = select i1 %cond, ptr i32 %x.off, i32 %y.off`
148*0fca6ea1SDimitry Andric // if both `%x` and `%y` have the same resource part, but two `select`
149*0fca6ea1SDimitry Andric // operations will be needed if they do not.
150*0fca6ea1SDimitry Andric //
151*0fca6ea1SDimitry Andric // ### Final processing
152*0fca6ea1SDimitry Andric //
153*0fca6ea1SDimitry Andric // After conditionals have been cleaned up, the IR for each function is
154*0fca6ea1SDimitry Andric // rewritten to remove all the old instructions that have been split up.
155*0fca6ea1SDimitry Andric //
156*0fca6ea1SDimitry Andric // Any instruction that used to produce a buffer fat pointer (and therefore now
157*0fca6ea1SDimitry Andric // produces a resource-and-offset struct after type remapping) is
158*0fca6ea1SDimitry Andric // replaced as follows:
159*0fca6ea1SDimitry Andric // 1. All debug value annotations are cloned to reflect that the resource part
160*0fca6ea1SDimitry Andric //    and offset parts are computed separately and constitute different
161*0fca6ea1SDimitry Andric //    fragments of the underlying source language variable.
162*0fca6ea1SDimitry Andric // 2. All uses that were themselves split are replaced by a `poison` of the
163*0fca6ea1SDimitry Andric //    struct type, as they will themselves be erased soon. This rule, combined
164*0fca6ea1SDimitry Andric //    with debug handling, should leave the use lists of split instructions
165*0fca6ea1SDimitry Andric //    empty in almost all cases.
166*0fca6ea1SDimitry Andric // 3. If a user of the original struct-valued result remains, the structure
167*0fca6ea1SDimitry Andric //    needed for the new types to work is constructed out of the newly-defined
168*0fca6ea1SDimitry Andric //    parts, and the original instruction is replaced by this structure
169*0fca6ea1SDimitry Andric //    before being erased. Instructions requiring this construction include
170*0fca6ea1SDimitry Andric //    `ret` and `insertvalue`.
171*0fca6ea1SDimitry Andric //
172*0fca6ea1SDimitry Andric // # Consequences
173*0fca6ea1SDimitry Andric //
174*0fca6ea1SDimitry Andric // This pass does not alter the CFG.
175*0fca6ea1SDimitry Andric //
176*0fca6ea1SDimitry Andric // Alias analysis information will become coarser, as the LLVM alias analyzer
177*0fca6ea1SDimitry Andric // cannot handle the buffer intrinsics. Specifically, while we can determine
178*0fca6ea1SDimitry Andric // that the following two loads do not alias:
179*0fca6ea1SDimitry Andric // ```
180*0fca6ea1SDimitry Andric //   %y = getelementptr i32, ptr addrspace(7) %x, i32 1
181*0fca6ea1SDimitry Andric //   %a = load i32, ptr addrspace(7) %x
182*0fca6ea1SDimitry Andric //   %b = load i32, ptr addrspace(7) %y
183*0fca6ea1SDimitry Andric // ```
184*0fca6ea1SDimitry Andric // we cannot (except through some code that runs during scheduling) determine
185*0fca6ea1SDimitry Andric // that the rewritten loads below do not alias.
186*0fca6ea1SDimitry Andric // ```
187*0fca6ea1SDimitry Andric //   %y.off = add i32 %x.off, 1
188*0fca6ea1SDimitry Andric //   %a = call @llvm.amdgcn.raw.ptr.buffer.load(ptr addrspace(8) %x.rsrc, i32
189*0fca6ea1SDimitry Andric //     %x.off, ...)
190*0fca6ea1SDimitry Andric //   %b = call @llvm.amdgcn.raw.ptr.buffer.load(ptr addrspace(8)
191*0fca6ea1SDimitry Andric //     %x.rsrc, i32 %y.off, ...)
192*0fca6ea1SDimitry Andric // ```
193*0fca6ea1SDimitry Andric // However, existing alias information is preserved.
194*0fca6ea1SDimitry Andric //===----------------------------------------------------------------------===//
195*0fca6ea1SDimitry Andric 
196*0fca6ea1SDimitry Andric #include "AMDGPU.h"
197*0fca6ea1SDimitry Andric #include "AMDGPUTargetMachine.h"
198*0fca6ea1SDimitry Andric #include "GCNSubtarget.h"
199*0fca6ea1SDimitry Andric #include "SIDefines.h"
200*0fca6ea1SDimitry Andric #include "llvm/ADT/SetOperations.h"
201*0fca6ea1SDimitry Andric #include "llvm/ADT/SmallVector.h"
202*0fca6ea1SDimitry Andric #include "llvm/Analysis/ConstantFolding.h"
203*0fca6ea1SDimitry Andric #include "llvm/Analysis/Utils/Local.h"
204*0fca6ea1SDimitry Andric #include "llvm/CodeGen/TargetPassConfig.h"
205*0fca6ea1SDimitry Andric #include "llvm/IR/AttributeMask.h"
206*0fca6ea1SDimitry Andric #include "llvm/IR/Constants.h"
207*0fca6ea1SDimitry Andric #include "llvm/IR/DebugInfo.h"
208*0fca6ea1SDimitry Andric #include "llvm/IR/DerivedTypes.h"
209*0fca6ea1SDimitry Andric #include "llvm/IR/IRBuilder.h"
210*0fca6ea1SDimitry Andric #include "llvm/IR/InstIterator.h"
211*0fca6ea1SDimitry Andric #include "llvm/IR/InstVisitor.h"
212*0fca6ea1SDimitry Andric #include "llvm/IR/Instructions.h"
213*0fca6ea1SDimitry Andric #include "llvm/IR/Intrinsics.h"
214*0fca6ea1SDimitry Andric #include "llvm/IR/IntrinsicsAMDGPU.h"
215*0fca6ea1SDimitry Andric #include "llvm/IR/Metadata.h"
216*0fca6ea1SDimitry Andric #include "llvm/IR/Operator.h"
217*0fca6ea1SDimitry Andric #include "llvm/IR/PatternMatch.h"
218*0fca6ea1SDimitry Andric #include "llvm/IR/ReplaceConstant.h"
219*0fca6ea1SDimitry Andric #include "llvm/InitializePasses.h"
220*0fca6ea1SDimitry Andric #include "llvm/Pass.h"
221*0fca6ea1SDimitry Andric #include "llvm/Support/AtomicOrdering.h"
222*0fca6ea1SDimitry Andric #include "llvm/Support/Debug.h"
223*0fca6ea1SDimitry Andric #include "llvm/Support/ErrorHandling.h"
224*0fca6ea1SDimitry Andric #include "llvm/Transforms/Utils/Cloning.h"
225*0fca6ea1SDimitry Andric #include "llvm/Transforms/Utils/Local.h"
226*0fca6ea1SDimitry Andric #include "llvm/Transforms/Utils/ValueMapper.h"
227*0fca6ea1SDimitry Andric 
228*0fca6ea1SDimitry Andric #define DEBUG_TYPE "amdgpu-lower-buffer-fat-pointers"
229*0fca6ea1SDimitry Andric 
230*0fca6ea1SDimitry Andric using namespace llvm;
231*0fca6ea1SDimitry Andric 
232*0fca6ea1SDimitry Andric static constexpr unsigned BufferOffsetWidth = 32;
233*0fca6ea1SDimitry Andric 
234*0fca6ea1SDimitry Andric namespace {
235*0fca6ea1SDimitry Andric /// Recursively replace instances of ptr addrspace(7) and vector<Nxptr
236*0fca6ea1SDimitry Andric /// addrspace(7)> with some other type as defined by the relevant subclass.
237*0fca6ea1SDimitry Andric class BufferFatPtrTypeLoweringBase : public ValueMapTypeRemapper {
238*0fca6ea1SDimitry Andric   DenseMap<Type *, Type *> Map;
239*0fca6ea1SDimitry Andric 
240*0fca6ea1SDimitry Andric   Type *remapTypeImpl(Type *Ty, SmallPtrSetImpl<StructType *> &Seen);
241*0fca6ea1SDimitry Andric 
242*0fca6ea1SDimitry Andric protected:
243*0fca6ea1SDimitry Andric   virtual Type *remapScalar(PointerType *PT) = 0;
244*0fca6ea1SDimitry Andric   virtual Type *remapVector(VectorType *VT) = 0;
245*0fca6ea1SDimitry Andric 
246*0fca6ea1SDimitry Andric   const DataLayout &DL;
247*0fca6ea1SDimitry Andric 
248*0fca6ea1SDimitry Andric public:
249*0fca6ea1SDimitry Andric   BufferFatPtrTypeLoweringBase(const DataLayout &DL) : DL(DL) {}
250*0fca6ea1SDimitry Andric   Type *remapType(Type *SrcTy) override;
251*0fca6ea1SDimitry Andric   void clear() { Map.clear(); }
252*0fca6ea1SDimitry Andric };
253*0fca6ea1SDimitry Andric 
254*0fca6ea1SDimitry Andric /// Remap ptr addrspace(7) to i160 and vector<Nxptr addrspace(7)> to
255*0fca6ea1SDimitry Andric /// vector<Nxi60> in order to correctly handling loading/storing these values
256*0fca6ea1SDimitry Andric /// from memory.
257*0fca6ea1SDimitry Andric class BufferFatPtrToIntTypeMap : public BufferFatPtrTypeLoweringBase {
258*0fca6ea1SDimitry Andric   using BufferFatPtrTypeLoweringBase::BufferFatPtrTypeLoweringBase;
259*0fca6ea1SDimitry Andric 
260*0fca6ea1SDimitry Andric protected:
261*0fca6ea1SDimitry Andric   Type *remapScalar(PointerType *PT) override { return DL.getIntPtrType(PT); }
262*0fca6ea1SDimitry Andric   Type *remapVector(VectorType *VT) override { return DL.getIntPtrType(VT); }
263*0fca6ea1SDimitry Andric };
264*0fca6ea1SDimitry Andric 
265*0fca6ea1SDimitry Andric /// Remap ptr addrspace(7) to {ptr addrspace(8), i32} (the resource and offset
266*0fca6ea1SDimitry Andric /// parts of the pointer) so that we can easily rewrite operations on these
267*0fca6ea1SDimitry Andric /// values that aren't loading them from or storing them to memory.
268*0fca6ea1SDimitry Andric class BufferFatPtrToStructTypeMap : public BufferFatPtrTypeLoweringBase {
269*0fca6ea1SDimitry Andric   using BufferFatPtrTypeLoweringBase::BufferFatPtrTypeLoweringBase;
270*0fca6ea1SDimitry Andric 
271*0fca6ea1SDimitry Andric protected:
272*0fca6ea1SDimitry Andric   Type *remapScalar(PointerType *PT) override;
273*0fca6ea1SDimitry Andric   Type *remapVector(VectorType *VT) override;
274*0fca6ea1SDimitry Andric };
275*0fca6ea1SDimitry Andric } // namespace
276*0fca6ea1SDimitry Andric 
277*0fca6ea1SDimitry Andric // This code is adapted from the type remapper in lib/Linker/IRMover.cpp
278*0fca6ea1SDimitry Andric Type *BufferFatPtrTypeLoweringBase::remapTypeImpl(
279*0fca6ea1SDimitry Andric     Type *Ty, SmallPtrSetImpl<StructType *> &Seen) {
280*0fca6ea1SDimitry Andric   Type **Entry = &Map[Ty];
281*0fca6ea1SDimitry Andric   if (*Entry)
282*0fca6ea1SDimitry Andric     return *Entry;
283*0fca6ea1SDimitry Andric   if (auto *PT = dyn_cast<PointerType>(Ty)) {
284*0fca6ea1SDimitry Andric     if (PT->getAddressSpace() == AMDGPUAS::BUFFER_FAT_POINTER) {
285*0fca6ea1SDimitry Andric       return *Entry = remapScalar(PT);
286*0fca6ea1SDimitry Andric     }
287*0fca6ea1SDimitry Andric   }
288*0fca6ea1SDimitry Andric   if (auto *VT = dyn_cast<VectorType>(Ty)) {
289*0fca6ea1SDimitry Andric     auto *PT = dyn_cast<PointerType>(VT->getElementType());
290*0fca6ea1SDimitry Andric     if (PT && PT->getAddressSpace() == AMDGPUAS::BUFFER_FAT_POINTER) {
291*0fca6ea1SDimitry Andric       return *Entry = remapVector(VT);
292*0fca6ea1SDimitry Andric     }
293*0fca6ea1SDimitry Andric     return *Entry = Ty;
294*0fca6ea1SDimitry Andric   }
295*0fca6ea1SDimitry Andric   // Whether the type is one that is structurally uniqued - that is, if it is
296*0fca6ea1SDimitry Andric   // not a named struct (the only kind of type where multiple structurally
297*0fca6ea1SDimitry Andric   // identical types that have a distinct `Type*`)
298*0fca6ea1SDimitry Andric   StructType *TyAsStruct = dyn_cast<StructType>(Ty);
299*0fca6ea1SDimitry Andric   bool IsUniqued = !TyAsStruct || TyAsStruct->isLiteral();
300*0fca6ea1SDimitry Andric   // Base case for ints, floats, opaque pointers, and so on, which don't
301*0fca6ea1SDimitry Andric   // require recursion.
302*0fca6ea1SDimitry Andric   if (Ty->getNumContainedTypes() == 0 && IsUniqued)
303*0fca6ea1SDimitry Andric     return *Entry = Ty;
304*0fca6ea1SDimitry Andric   if (!IsUniqued) {
305*0fca6ea1SDimitry Andric     // Create a dummy type for recursion purposes.
306*0fca6ea1SDimitry Andric     if (!Seen.insert(TyAsStruct).second) {
307*0fca6ea1SDimitry Andric       StructType *Placeholder = StructType::create(Ty->getContext());
308*0fca6ea1SDimitry Andric       return *Entry = Placeholder;
309*0fca6ea1SDimitry Andric     }
310*0fca6ea1SDimitry Andric   }
311*0fca6ea1SDimitry Andric   bool Changed = false;
312*0fca6ea1SDimitry Andric   SmallVector<Type *> ElementTypes(Ty->getNumContainedTypes(), nullptr);
313*0fca6ea1SDimitry Andric   for (unsigned int I = 0, E = Ty->getNumContainedTypes(); I < E; ++I) {
314*0fca6ea1SDimitry Andric     Type *OldElem = Ty->getContainedType(I);
315*0fca6ea1SDimitry Andric     Type *NewElem = remapTypeImpl(OldElem, Seen);
316*0fca6ea1SDimitry Andric     ElementTypes[I] = NewElem;
317*0fca6ea1SDimitry Andric     Changed |= (OldElem != NewElem);
318*0fca6ea1SDimitry Andric   }
319*0fca6ea1SDimitry Andric   // Recursive calls to remapTypeImpl() may have invalidated pointer.
320*0fca6ea1SDimitry Andric   Entry = &Map[Ty];
321*0fca6ea1SDimitry Andric   if (!Changed) {
322*0fca6ea1SDimitry Andric     return *Entry = Ty;
323*0fca6ea1SDimitry Andric   }
324*0fca6ea1SDimitry Andric   if (auto *ArrTy = dyn_cast<ArrayType>(Ty))
325*0fca6ea1SDimitry Andric     return *Entry = ArrayType::get(ElementTypes[0], ArrTy->getNumElements());
326*0fca6ea1SDimitry Andric   if (auto *FnTy = dyn_cast<FunctionType>(Ty))
327*0fca6ea1SDimitry Andric     return *Entry = FunctionType::get(ElementTypes[0],
328*0fca6ea1SDimitry Andric                                       ArrayRef(ElementTypes).slice(1),
329*0fca6ea1SDimitry Andric                                       FnTy->isVarArg());
330*0fca6ea1SDimitry Andric   if (auto *STy = dyn_cast<StructType>(Ty)) {
331*0fca6ea1SDimitry Andric     // Genuine opaque types don't have a remapping.
332*0fca6ea1SDimitry Andric     if (STy->isOpaque())
333*0fca6ea1SDimitry Andric       return *Entry = Ty;
334*0fca6ea1SDimitry Andric     bool IsPacked = STy->isPacked();
335*0fca6ea1SDimitry Andric     if (IsUniqued)
336*0fca6ea1SDimitry Andric       return *Entry = StructType::get(Ty->getContext(), ElementTypes, IsPacked);
337*0fca6ea1SDimitry Andric     SmallString<16> Name(STy->getName());
338*0fca6ea1SDimitry Andric     STy->setName("");
339*0fca6ea1SDimitry Andric     Type **RecursionEntry = &Map[Ty];
340*0fca6ea1SDimitry Andric     if (*RecursionEntry) {
341*0fca6ea1SDimitry Andric       auto *Placeholder = cast<StructType>(*RecursionEntry);
342*0fca6ea1SDimitry Andric       Placeholder->setBody(ElementTypes, IsPacked);
343*0fca6ea1SDimitry Andric       Placeholder->setName(Name);
344*0fca6ea1SDimitry Andric       return *Entry = Placeholder;
345*0fca6ea1SDimitry Andric     }
346*0fca6ea1SDimitry Andric     return *Entry = StructType::create(Ty->getContext(), ElementTypes, Name,
347*0fca6ea1SDimitry Andric                                        IsPacked);
348*0fca6ea1SDimitry Andric   }
349*0fca6ea1SDimitry Andric   llvm_unreachable("Unknown type of type that contains elements");
350*0fca6ea1SDimitry Andric }
351*0fca6ea1SDimitry Andric 
352*0fca6ea1SDimitry Andric Type *BufferFatPtrTypeLoweringBase::remapType(Type *SrcTy) {
353*0fca6ea1SDimitry Andric   SmallPtrSet<StructType *, 2> Visited;
354*0fca6ea1SDimitry Andric   return remapTypeImpl(SrcTy, Visited);
355*0fca6ea1SDimitry Andric }
356*0fca6ea1SDimitry Andric 
357*0fca6ea1SDimitry Andric Type *BufferFatPtrToStructTypeMap::remapScalar(PointerType *PT) {
358*0fca6ea1SDimitry Andric   LLVMContext &Ctx = PT->getContext();
359*0fca6ea1SDimitry Andric   return StructType::get(PointerType::get(Ctx, AMDGPUAS::BUFFER_RESOURCE),
360*0fca6ea1SDimitry Andric                          IntegerType::get(Ctx, BufferOffsetWidth));
361*0fca6ea1SDimitry Andric }
362*0fca6ea1SDimitry Andric 
363*0fca6ea1SDimitry Andric Type *BufferFatPtrToStructTypeMap::remapVector(VectorType *VT) {
364*0fca6ea1SDimitry Andric   ElementCount EC = VT->getElementCount();
365*0fca6ea1SDimitry Andric   LLVMContext &Ctx = VT->getContext();
366*0fca6ea1SDimitry Andric   Type *RsrcVec =
367*0fca6ea1SDimitry Andric       VectorType::get(PointerType::get(Ctx, AMDGPUAS::BUFFER_RESOURCE), EC);
368*0fca6ea1SDimitry Andric   Type *OffVec = VectorType::get(IntegerType::get(Ctx, BufferOffsetWidth), EC);
369*0fca6ea1SDimitry Andric   return StructType::get(RsrcVec, OffVec);
370*0fca6ea1SDimitry Andric }
371*0fca6ea1SDimitry Andric 
372*0fca6ea1SDimitry Andric static bool isBufferFatPtrOrVector(Type *Ty) {
373*0fca6ea1SDimitry Andric   if (auto *PT = dyn_cast<PointerType>(Ty->getScalarType()))
374*0fca6ea1SDimitry Andric     return PT->getAddressSpace() == AMDGPUAS::BUFFER_FAT_POINTER;
375*0fca6ea1SDimitry Andric   return false;
376*0fca6ea1SDimitry Andric }
377*0fca6ea1SDimitry Andric 
378*0fca6ea1SDimitry Andric // True if the type is {ptr addrspace(8), i32} or a struct containing vectors of
379*0fca6ea1SDimitry Andric // those types. Used to quickly skip instructions we don't need to process.
380*0fca6ea1SDimitry Andric static bool isSplitFatPtr(Type *Ty) {
381*0fca6ea1SDimitry Andric   auto *ST = dyn_cast<StructType>(Ty);
382*0fca6ea1SDimitry Andric   if (!ST)
383*0fca6ea1SDimitry Andric     return false;
384*0fca6ea1SDimitry Andric   if (!ST->isLiteral() || ST->getNumElements() != 2)
385*0fca6ea1SDimitry Andric     return false;
386*0fca6ea1SDimitry Andric   auto *MaybeRsrc =
387*0fca6ea1SDimitry Andric       dyn_cast<PointerType>(ST->getElementType(0)->getScalarType());
388*0fca6ea1SDimitry Andric   auto *MaybeOff =
389*0fca6ea1SDimitry Andric       dyn_cast<IntegerType>(ST->getElementType(1)->getScalarType());
390*0fca6ea1SDimitry Andric   return MaybeRsrc && MaybeOff &&
391*0fca6ea1SDimitry Andric          MaybeRsrc->getAddressSpace() == AMDGPUAS::BUFFER_RESOURCE &&
392*0fca6ea1SDimitry Andric          MaybeOff->getBitWidth() == BufferOffsetWidth;
393*0fca6ea1SDimitry Andric }
394*0fca6ea1SDimitry Andric 
395*0fca6ea1SDimitry Andric // True if the result type or any argument types are buffer fat pointers.
396*0fca6ea1SDimitry Andric static bool isBufferFatPtrConst(Constant *C) {
397*0fca6ea1SDimitry Andric   Type *T = C->getType();
398*0fca6ea1SDimitry Andric   return isBufferFatPtrOrVector(T) || any_of(C->operands(), [](const Use &U) {
399*0fca6ea1SDimitry Andric            return isBufferFatPtrOrVector(U.get()->getType());
400*0fca6ea1SDimitry Andric          });
401*0fca6ea1SDimitry Andric }
402*0fca6ea1SDimitry Andric 
403*0fca6ea1SDimitry Andric namespace {
404*0fca6ea1SDimitry Andric /// Convert [vectors of] buffer fat pointers to integers when they are read from
405*0fca6ea1SDimitry Andric /// or stored to memory. This ensures that these pointers will have the same
406*0fca6ea1SDimitry Andric /// memory layout as before they are lowered, even though they will no longer
407*0fca6ea1SDimitry Andric /// have their previous layout in registers/in the program (they'll be broken
408*0fca6ea1SDimitry Andric /// down into resource and offset parts). This has the downside of imposing
409*0fca6ea1SDimitry Andric /// marshalling costs when reading or storing these values, but since placing
410*0fca6ea1SDimitry Andric /// such pointers into memory is an uncommon operation at best, we feel that
411*0fca6ea1SDimitry Andric /// this cost is acceptable for better performance in the common case.
412*0fca6ea1SDimitry Andric class StoreFatPtrsAsIntsVisitor
413*0fca6ea1SDimitry Andric     : public InstVisitor<StoreFatPtrsAsIntsVisitor, bool> {
414*0fca6ea1SDimitry Andric   BufferFatPtrToIntTypeMap *TypeMap;
415*0fca6ea1SDimitry Andric 
416*0fca6ea1SDimitry Andric   ValueToValueMapTy ConvertedForStore;
417*0fca6ea1SDimitry Andric 
418*0fca6ea1SDimitry Andric   IRBuilder<> IRB;
419*0fca6ea1SDimitry Andric 
420*0fca6ea1SDimitry Andric   // Convert all the buffer fat pointers within the input value to inttegers
421*0fca6ea1SDimitry Andric   // so that it can be stored in memory.
422*0fca6ea1SDimitry Andric   Value *fatPtrsToInts(Value *V, Type *From, Type *To, const Twine &Name);
423*0fca6ea1SDimitry Andric   // Convert all the i160s that need to be buffer fat pointers (as specified)
424*0fca6ea1SDimitry Andric   // by the To type) into those pointers to preserve the semantics of the rest
425*0fca6ea1SDimitry Andric   // of the program.
426*0fca6ea1SDimitry Andric   Value *intsToFatPtrs(Value *V, Type *From, Type *To, const Twine &Name);
427*0fca6ea1SDimitry Andric 
428*0fca6ea1SDimitry Andric public:
429*0fca6ea1SDimitry Andric   StoreFatPtrsAsIntsVisitor(BufferFatPtrToIntTypeMap *TypeMap, LLVMContext &Ctx)
430*0fca6ea1SDimitry Andric       : TypeMap(TypeMap), IRB(Ctx) {}
431*0fca6ea1SDimitry Andric   bool processFunction(Function &F);
432*0fca6ea1SDimitry Andric 
433*0fca6ea1SDimitry Andric   bool visitInstruction(Instruction &I) { return false; }
434*0fca6ea1SDimitry Andric   bool visitAllocaInst(AllocaInst &I);
435*0fca6ea1SDimitry Andric   bool visitLoadInst(LoadInst &LI);
436*0fca6ea1SDimitry Andric   bool visitStoreInst(StoreInst &SI);
437*0fca6ea1SDimitry Andric   bool visitGetElementPtrInst(GetElementPtrInst &I);
438*0fca6ea1SDimitry Andric };
439*0fca6ea1SDimitry Andric } // namespace
440*0fca6ea1SDimitry Andric 
441*0fca6ea1SDimitry Andric Value *StoreFatPtrsAsIntsVisitor::fatPtrsToInts(Value *V, Type *From, Type *To,
442*0fca6ea1SDimitry Andric                                                 const Twine &Name) {
443*0fca6ea1SDimitry Andric   if (From == To)
444*0fca6ea1SDimitry Andric     return V;
445*0fca6ea1SDimitry Andric   ValueToValueMapTy::iterator Find = ConvertedForStore.find(V);
446*0fca6ea1SDimitry Andric   if (Find != ConvertedForStore.end())
447*0fca6ea1SDimitry Andric     return Find->second;
448*0fca6ea1SDimitry Andric   if (isBufferFatPtrOrVector(From)) {
449*0fca6ea1SDimitry Andric     Value *Cast = IRB.CreatePtrToInt(V, To, Name + ".int");
450*0fca6ea1SDimitry Andric     ConvertedForStore[V] = Cast;
451*0fca6ea1SDimitry Andric     return Cast;
452*0fca6ea1SDimitry Andric   }
453*0fca6ea1SDimitry Andric   if (From->getNumContainedTypes() == 0)
454*0fca6ea1SDimitry Andric     return V;
455*0fca6ea1SDimitry Andric   // Structs, arrays, and other compound types.
456*0fca6ea1SDimitry Andric   Value *Ret = PoisonValue::get(To);
457*0fca6ea1SDimitry Andric   if (auto *AT = dyn_cast<ArrayType>(From)) {
458*0fca6ea1SDimitry Andric     Type *FromPart = AT->getArrayElementType();
459*0fca6ea1SDimitry Andric     Type *ToPart = cast<ArrayType>(To)->getElementType();
460*0fca6ea1SDimitry Andric     for (uint64_t I = 0, E = AT->getArrayNumElements(); I < E; ++I) {
461*0fca6ea1SDimitry Andric       Value *Field = IRB.CreateExtractValue(V, I);
462*0fca6ea1SDimitry Andric       Value *NewField =
463*0fca6ea1SDimitry Andric           fatPtrsToInts(Field, FromPart, ToPart, Name + "." + Twine(I));
464*0fca6ea1SDimitry Andric       Ret = IRB.CreateInsertValue(Ret, NewField, I);
465*0fca6ea1SDimitry Andric     }
466*0fca6ea1SDimitry Andric   } else {
467*0fca6ea1SDimitry Andric     for (auto [Idx, FromPart, ToPart] :
468*0fca6ea1SDimitry Andric          enumerate(From->subtypes(), To->subtypes())) {
469*0fca6ea1SDimitry Andric       Value *Field = IRB.CreateExtractValue(V, Idx);
470*0fca6ea1SDimitry Andric       Value *NewField =
471*0fca6ea1SDimitry Andric           fatPtrsToInts(Field, FromPart, ToPart, Name + "." + Twine(Idx));
472*0fca6ea1SDimitry Andric       Ret = IRB.CreateInsertValue(Ret, NewField, Idx);
473*0fca6ea1SDimitry Andric     }
474*0fca6ea1SDimitry Andric   }
475*0fca6ea1SDimitry Andric   ConvertedForStore[V] = Ret;
476*0fca6ea1SDimitry Andric   return Ret;
477*0fca6ea1SDimitry Andric }
478*0fca6ea1SDimitry Andric 
479*0fca6ea1SDimitry Andric Value *StoreFatPtrsAsIntsVisitor::intsToFatPtrs(Value *V, Type *From, Type *To,
480*0fca6ea1SDimitry Andric                                                 const Twine &Name) {
481*0fca6ea1SDimitry Andric   if (From == To)
482*0fca6ea1SDimitry Andric     return V;
483*0fca6ea1SDimitry Andric   if (isBufferFatPtrOrVector(To)) {
484*0fca6ea1SDimitry Andric     Value *Cast = IRB.CreateIntToPtr(V, To, Name + ".ptr");
485*0fca6ea1SDimitry Andric     return Cast;
486*0fca6ea1SDimitry Andric   }
487*0fca6ea1SDimitry Andric   if (From->getNumContainedTypes() == 0)
488*0fca6ea1SDimitry Andric     return V;
489*0fca6ea1SDimitry Andric   // Structs, arrays, and other compound types.
490*0fca6ea1SDimitry Andric   Value *Ret = PoisonValue::get(To);
491*0fca6ea1SDimitry Andric   if (auto *AT = dyn_cast<ArrayType>(From)) {
492*0fca6ea1SDimitry Andric     Type *FromPart = AT->getArrayElementType();
493*0fca6ea1SDimitry Andric     Type *ToPart = cast<ArrayType>(To)->getElementType();
494*0fca6ea1SDimitry Andric     for (uint64_t I = 0, E = AT->getArrayNumElements(); I < E; ++I) {
495*0fca6ea1SDimitry Andric       Value *Field = IRB.CreateExtractValue(V, I);
496*0fca6ea1SDimitry Andric       Value *NewField =
497*0fca6ea1SDimitry Andric           intsToFatPtrs(Field, FromPart, ToPart, Name + "." + Twine(I));
498*0fca6ea1SDimitry Andric       Ret = IRB.CreateInsertValue(Ret, NewField, I);
499*0fca6ea1SDimitry Andric     }
500*0fca6ea1SDimitry Andric   } else {
501*0fca6ea1SDimitry Andric     for (auto [Idx, FromPart, ToPart] :
502*0fca6ea1SDimitry Andric          enumerate(From->subtypes(), To->subtypes())) {
503*0fca6ea1SDimitry Andric       Value *Field = IRB.CreateExtractValue(V, Idx);
504*0fca6ea1SDimitry Andric       Value *NewField =
505*0fca6ea1SDimitry Andric           intsToFatPtrs(Field, FromPart, ToPart, Name + "." + Twine(Idx));
506*0fca6ea1SDimitry Andric       Ret = IRB.CreateInsertValue(Ret, NewField, Idx);
507*0fca6ea1SDimitry Andric     }
508*0fca6ea1SDimitry Andric   }
509*0fca6ea1SDimitry Andric   return Ret;
510*0fca6ea1SDimitry Andric }
511*0fca6ea1SDimitry Andric 
512*0fca6ea1SDimitry Andric bool StoreFatPtrsAsIntsVisitor::processFunction(Function &F) {
513*0fca6ea1SDimitry Andric   bool Changed = false;
514*0fca6ea1SDimitry Andric   // The visitors will mutate GEPs and allocas, but will push loads and stores
515*0fca6ea1SDimitry Andric   // to the worklist to avoid invalidation.
516*0fca6ea1SDimitry Andric   for (Instruction &I : make_early_inc_range(instructions(F))) {
517*0fca6ea1SDimitry Andric     Changed |= visit(I);
518*0fca6ea1SDimitry Andric   }
519*0fca6ea1SDimitry Andric   ConvertedForStore.clear();
520*0fca6ea1SDimitry Andric   return Changed;
521*0fca6ea1SDimitry Andric }
522*0fca6ea1SDimitry Andric 
523*0fca6ea1SDimitry Andric bool StoreFatPtrsAsIntsVisitor::visitAllocaInst(AllocaInst &I) {
524*0fca6ea1SDimitry Andric   Type *Ty = I.getAllocatedType();
525*0fca6ea1SDimitry Andric   Type *NewTy = TypeMap->remapType(Ty);
526*0fca6ea1SDimitry Andric   if (Ty == NewTy)
527*0fca6ea1SDimitry Andric     return false;
528*0fca6ea1SDimitry Andric   I.setAllocatedType(NewTy);
529*0fca6ea1SDimitry Andric   return true;
530*0fca6ea1SDimitry Andric }
531*0fca6ea1SDimitry Andric 
532*0fca6ea1SDimitry Andric bool StoreFatPtrsAsIntsVisitor::visitGetElementPtrInst(GetElementPtrInst &I) {
533*0fca6ea1SDimitry Andric   Type *Ty = I.getSourceElementType();
534*0fca6ea1SDimitry Andric   Type *NewTy = TypeMap->remapType(Ty);
535*0fca6ea1SDimitry Andric   if (Ty == NewTy)
536*0fca6ea1SDimitry Andric     return false;
537*0fca6ea1SDimitry Andric   // We'll be rewriting the type `ptr addrspace(7)` out of existence soon, so
538*0fca6ea1SDimitry Andric   // make sure GEPs don't have different semantics with the new type.
539*0fca6ea1SDimitry Andric   I.setSourceElementType(NewTy);
540*0fca6ea1SDimitry Andric   I.setResultElementType(TypeMap->remapType(I.getResultElementType()));
541*0fca6ea1SDimitry Andric   return true;
542*0fca6ea1SDimitry Andric }
543*0fca6ea1SDimitry Andric 
544*0fca6ea1SDimitry Andric bool StoreFatPtrsAsIntsVisitor::visitLoadInst(LoadInst &LI) {
545*0fca6ea1SDimitry Andric   Type *Ty = LI.getType();
546*0fca6ea1SDimitry Andric   Type *IntTy = TypeMap->remapType(Ty);
547*0fca6ea1SDimitry Andric   if (Ty == IntTy)
548*0fca6ea1SDimitry Andric     return false;
549*0fca6ea1SDimitry Andric 
550*0fca6ea1SDimitry Andric   IRB.SetInsertPoint(&LI);
551*0fca6ea1SDimitry Andric   auto *NLI = cast<LoadInst>(LI.clone());
552*0fca6ea1SDimitry Andric   NLI->mutateType(IntTy);
553*0fca6ea1SDimitry Andric   NLI = IRB.Insert(NLI);
554*0fca6ea1SDimitry Andric   copyMetadataForLoad(*NLI, LI);
555*0fca6ea1SDimitry Andric   NLI->takeName(&LI);
556*0fca6ea1SDimitry Andric 
557*0fca6ea1SDimitry Andric   Value *CastBack = intsToFatPtrs(NLI, IntTy, Ty, NLI->getName());
558*0fca6ea1SDimitry Andric   LI.replaceAllUsesWith(CastBack);
559*0fca6ea1SDimitry Andric   LI.eraseFromParent();
560*0fca6ea1SDimitry Andric   return true;
561*0fca6ea1SDimitry Andric }
562*0fca6ea1SDimitry Andric 
563*0fca6ea1SDimitry Andric bool StoreFatPtrsAsIntsVisitor::visitStoreInst(StoreInst &SI) {
564*0fca6ea1SDimitry Andric   Value *V = SI.getValueOperand();
565*0fca6ea1SDimitry Andric   Type *Ty = V->getType();
566*0fca6ea1SDimitry Andric   Type *IntTy = TypeMap->remapType(Ty);
567*0fca6ea1SDimitry Andric   if (Ty == IntTy)
568*0fca6ea1SDimitry Andric     return false;
569*0fca6ea1SDimitry Andric 
570*0fca6ea1SDimitry Andric   IRB.SetInsertPoint(&SI);
571*0fca6ea1SDimitry Andric   Value *IntV = fatPtrsToInts(V, Ty, IntTy, V->getName());
572*0fca6ea1SDimitry Andric   for (auto *Dbg : at::getAssignmentMarkers(&SI))
573*0fca6ea1SDimitry Andric     Dbg->setValue(IntV);
574*0fca6ea1SDimitry Andric 
575*0fca6ea1SDimitry Andric   SI.setOperand(0, IntV);
576*0fca6ea1SDimitry Andric   return true;
577*0fca6ea1SDimitry Andric }
578*0fca6ea1SDimitry Andric 
579*0fca6ea1SDimitry Andric /// Return the ptr addrspace(8) and i32 (resource and offset parts) in a lowered
580*0fca6ea1SDimitry Andric /// buffer fat pointer constant.
581*0fca6ea1SDimitry Andric static std::pair<Constant *, Constant *>
582*0fca6ea1SDimitry Andric splitLoweredFatBufferConst(Constant *C) {
583*0fca6ea1SDimitry Andric   assert(isSplitFatPtr(C->getType()) && "Not a split fat buffer pointer");
584*0fca6ea1SDimitry Andric   return std::make_pair(C->getAggregateElement(0u), C->getAggregateElement(1u));
585*0fca6ea1SDimitry Andric }
586*0fca6ea1SDimitry Andric 
587*0fca6ea1SDimitry Andric namespace {
588*0fca6ea1SDimitry Andric /// Handle the remapping of ptr addrspace(7) constants.
589*0fca6ea1SDimitry Andric class FatPtrConstMaterializer final : public ValueMaterializer {
590*0fca6ea1SDimitry Andric   BufferFatPtrToStructTypeMap *TypeMap;
591*0fca6ea1SDimitry Andric   // An internal mapper that is used to recurse into the arguments of constants.
592*0fca6ea1SDimitry Andric   // While the documentation for `ValueMapper` specifies not to use it
593*0fca6ea1SDimitry Andric   // recursively, examination of the logic in mapValue() shows that it can
594*0fca6ea1SDimitry Andric   // safely be used recursively when handling constants, like it does in its own
595*0fca6ea1SDimitry Andric   // logic.
596*0fca6ea1SDimitry Andric   ValueMapper InternalMapper;
597*0fca6ea1SDimitry Andric 
598*0fca6ea1SDimitry Andric   Constant *materializeBufferFatPtrConst(Constant *C);
599*0fca6ea1SDimitry Andric 
600*0fca6ea1SDimitry Andric public:
601*0fca6ea1SDimitry Andric   // UnderlyingMap is the value map this materializer will be filling.
602*0fca6ea1SDimitry Andric   FatPtrConstMaterializer(BufferFatPtrToStructTypeMap *TypeMap,
603*0fca6ea1SDimitry Andric                           ValueToValueMapTy &UnderlyingMap)
604*0fca6ea1SDimitry Andric       : TypeMap(TypeMap),
605*0fca6ea1SDimitry Andric         InternalMapper(UnderlyingMap, RF_None, TypeMap, this) {}
606*0fca6ea1SDimitry Andric   virtual ~FatPtrConstMaterializer() = default;
607*0fca6ea1SDimitry Andric 
608*0fca6ea1SDimitry Andric   Value *materialize(Value *V) override;
609*0fca6ea1SDimitry Andric };
610*0fca6ea1SDimitry Andric } // namespace
611*0fca6ea1SDimitry Andric 
612*0fca6ea1SDimitry Andric Constant *FatPtrConstMaterializer::materializeBufferFatPtrConst(Constant *C) {
613*0fca6ea1SDimitry Andric   Type *SrcTy = C->getType();
614*0fca6ea1SDimitry Andric   auto *NewTy = dyn_cast<StructType>(TypeMap->remapType(SrcTy));
615*0fca6ea1SDimitry Andric   if (C->isNullValue())
616*0fca6ea1SDimitry Andric     return ConstantAggregateZero::getNullValue(NewTy);
617*0fca6ea1SDimitry Andric   if (isa<PoisonValue>(C)) {
618*0fca6ea1SDimitry Andric     return ConstantStruct::get(NewTy,
619*0fca6ea1SDimitry Andric                                {PoisonValue::get(NewTy->getElementType(0)),
620*0fca6ea1SDimitry Andric                                 PoisonValue::get(NewTy->getElementType(1))});
621*0fca6ea1SDimitry Andric   }
622*0fca6ea1SDimitry Andric   if (isa<UndefValue>(C)) {
623*0fca6ea1SDimitry Andric     return ConstantStruct::get(NewTy,
624*0fca6ea1SDimitry Andric                                {UndefValue::get(NewTy->getElementType(0)),
625*0fca6ea1SDimitry Andric                                 UndefValue::get(NewTy->getElementType(1))});
626*0fca6ea1SDimitry Andric   }
627*0fca6ea1SDimitry Andric 
628*0fca6ea1SDimitry Andric   if (auto *VC = dyn_cast<ConstantVector>(C)) {
629*0fca6ea1SDimitry Andric     if (Constant *S = VC->getSplatValue()) {
630*0fca6ea1SDimitry Andric       Constant *NewS = InternalMapper.mapConstant(*S);
631*0fca6ea1SDimitry Andric       if (!NewS)
632*0fca6ea1SDimitry Andric         return nullptr;
633*0fca6ea1SDimitry Andric       auto [Rsrc, Off] = splitLoweredFatBufferConst(NewS);
634*0fca6ea1SDimitry Andric       auto EC = VC->getType()->getElementCount();
635*0fca6ea1SDimitry Andric       return ConstantStruct::get(NewTy, {ConstantVector::getSplat(EC, Rsrc),
636*0fca6ea1SDimitry Andric                                          ConstantVector::getSplat(EC, Off)});
637*0fca6ea1SDimitry Andric     }
638*0fca6ea1SDimitry Andric     SmallVector<Constant *> Rsrcs;
639*0fca6ea1SDimitry Andric     SmallVector<Constant *> Offs;
640*0fca6ea1SDimitry Andric     for (Value *Op : VC->operand_values()) {
641*0fca6ea1SDimitry Andric       auto *NewOp = dyn_cast_or_null<Constant>(InternalMapper.mapValue(*Op));
642*0fca6ea1SDimitry Andric       if (!NewOp)
643*0fca6ea1SDimitry Andric         return nullptr;
644*0fca6ea1SDimitry Andric       auto [Rsrc, Off] = splitLoweredFatBufferConst(NewOp);
645*0fca6ea1SDimitry Andric       Rsrcs.push_back(Rsrc);
646*0fca6ea1SDimitry Andric       Offs.push_back(Off);
647*0fca6ea1SDimitry Andric     }
648*0fca6ea1SDimitry Andric     Constant *RsrcVec = ConstantVector::get(Rsrcs);
649*0fca6ea1SDimitry Andric     Constant *OffVec = ConstantVector::get(Offs);
650*0fca6ea1SDimitry Andric     return ConstantStruct::get(NewTy, {RsrcVec, OffVec});
651*0fca6ea1SDimitry Andric   }
652*0fca6ea1SDimitry Andric 
653*0fca6ea1SDimitry Andric   if (isa<GlobalValue>(C))
654*0fca6ea1SDimitry Andric     report_fatal_error("Global values containing ptr addrspace(7) (buffer "
655*0fca6ea1SDimitry Andric                        "fat pointer) values are not supported");
656*0fca6ea1SDimitry Andric 
657*0fca6ea1SDimitry Andric   if (isa<ConstantExpr>(C))
658*0fca6ea1SDimitry Andric     report_fatal_error("Constant exprs containing ptr addrspace(7) (buffer "
659*0fca6ea1SDimitry Andric                        "fat pointer) values should have been expanded earlier");
660*0fca6ea1SDimitry Andric 
661*0fca6ea1SDimitry Andric   return nullptr;
662*0fca6ea1SDimitry Andric }
663*0fca6ea1SDimitry Andric 
664*0fca6ea1SDimitry Andric Value *FatPtrConstMaterializer::materialize(Value *V) {
665*0fca6ea1SDimitry Andric   Constant *C = dyn_cast<Constant>(V);
666*0fca6ea1SDimitry Andric   if (!C)
667*0fca6ea1SDimitry Andric     return nullptr;
668*0fca6ea1SDimitry Andric   // Structs and other types that happen to contain fat pointers get remapped
669*0fca6ea1SDimitry Andric   // by the mapValue() logic.
670*0fca6ea1SDimitry Andric   if (!isBufferFatPtrConst(C))
671*0fca6ea1SDimitry Andric     return nullptr;
672*0fca6ea1SDimitry Andric   return materializeBufferFatPtrConst(C);
673*0fca6ea1SDimitry Andric }
674*0fca6ea1SDimitry Andric 
675*0fca6ea1SDimitry Andric using PtrParts = std::pair<Value *, Value *>;
676*0fca6ea1SDimitry Andric namespace {
677*0fca6ea1SDimitry Andric // The visitor returns the resource and offset parts for an instruction if they
678*0fca6ea1SDimitry Andric // can be computed, or (nullptr, nullptr) for cases that don't have a meaningful
679*0fca6ea1SDimitry Andric // value mapping.
680*0fca6ea1SDimitry Andric class SplitPtrStructs : public InstVisitor<SplitPtrStructs, PtrParts> {
681*0fca6ea1SDimitry Andric   ValueToValueMapTy RsrcParts;
682*0fca6ea1SDimitry Andric   ValueToValueMapTy OffParts;
683*0fca6ea1SDimitry Andric 
684*0fca6ea1SDimitry Andric   // Track instructions that have been rewritten into a user of the component
685*0fca6ea1SDimitry Andric   // parts of their ptr addrspace(7) input. Instructions that produced
686*0fca6ea1SDimitry Andric   // ptr addrspace(7) parts should **not** be RAUW'd before being added to this
687*0fca6ea1SDimitry Andric   // set, as that replacement will be handled in a post-visit step. However,
688*0fca6ea1SDimitry Andric   // instructions that yield values that aren't fat pointers (ex. ptrtoint)
689*0fca6ea1SDimitry Andric   // should RAUW themselves with new instructions that use the split parts
690*0fca6ea1SDimitry Andric   // of their arguments during processing.
691*0fca6ea1SDimitry Andric   DenseSet<Instruction *> SplitUsers;
692*0fca6ea1SDimitry Andric 
693*0fca6ea1SDimitry Andric   // Nodes that need a second look once we've computed the parts for all other
694*0fca6ea1SDimitry Andric   // instructions to see if, for example, we really need to phi on the resource
695*0fca6ea1SDimitry Andric   // part.
696*0fca6ea1SDimitry Andric   SmallVector<Instruction *> Conditionals;
697*0fca6ea1SDimitry Andric   // Temporary instructions produced while lowering conditionals that should be
698*0fca6ea1SDimitry Andric   // killed.
699*0fca6ea1SDimitry Andric   SmallVector<Instruction *> ConditionalTemps;
700*0fca6ea1SDimitry Andric 
701*0fca6ea1SDimitry Andric   // Subtarget info, needed for determining what cache control bits to set.
702*0fca6ea1SDimitry Andric   const TargetMachine *TM;
703*0fca6ea1SDimitry Andric   const GCNSubtarget *ST = nullptr;
704*0fca6ea1SDimitry Andric 
705*0fca6ea1SDimitry Andric   IRBuilder<> IRB;
706*0fca6ea1SDimitry Andric 
707*0fca6ea1SDimitry Andric   // Copy metadata between instructions if applicable.
708*0fca6ea1SDimitry Andric   void copyMetadata(Value *Dest, Value *Src);
709*0fca6ea1SDimitry Andric 
710*0fca6ea1SDimitry Andric   // Get the resource and offset parts of the value V, inserting appropriate
711*0fca6ea1SDimitry Andric   // extractvalue calls if needed.
712*0fca6ea1SDimitry Andric   PtrParts getPtrParts(Value *V);
713*0fca6ea1SDimitry Andric 
714*0fca6ea1SDimitry Andric   // Given an instruction that could produce multiple resource parts (a PHI or
715*0fca6ea1SDimitry Andric   // select), collect the set of possible instructions that could have provided
716*0fca6ea1SDimitry Andric   // its resource parts  that it could have (the `Roots`) and the set of
717*0fca6ea1SDimitry Andric   // conditional instructions visited during the search (`Seen`). If, after
718*0fca6ea1SDimitry Andric   // removing the root of the search from `Seen` and `Roots`, `Seen` is a subset
719*0fca6ea1SDimitry Andric   // of `Roots` and `Roots - Seen` contains one element, the resource part of
720*0fca6ea1SDimitry Andric   // that element can replace the resource part of all other elements in `Seen`.
721*0fca6ea1SDimitry Andric   void getPossibleRsrcRoots(Instruction *I, SmallPtrSetImpl<Value *> &Roots,
722*0fca6ea1SDimitry Andric                             SmallPtrSetImpl<Value *> &Seen);
723*0fca6ea1SDimitry Andric   void processConditionals();
724*0fca6ea1SDimitry Andric 
725*0fca6ea1SDimitry Andric   // If an instruction hav been split into resource and offset parts,
726*0fca6ea1SDimitry Andric   // delete that instruction. If any of its uses have not themselves been split
727*0fca6ea1SDimitry Andric   // into parts (for example, an insertvalue), construct the structure
728*0fca6ea1SDimitry Andric   // that the type rewrites declared should be produced by the dying instruction
729*0fca6ea1SDimitry Andric   // and use that.
730*0fca6ea1SDimitry Andric   // Also, kill the temporary extractvalue operations produced by the two-stage
731*0fca6ea1SDimitry Andric   // lowering of PHIs and conditionals.
732*0fca6ea1SDimitry Andric   void killAndReplaceSplitInstructions(SmallVectorImpl<Instruction *> &Origs);
733*0fca6ea1SDimitry Andric 
734*0fca6ea1SDimitry Andric   void setAlign(CallInst *Intr, Align A, unsigned RsrcArgIdx);
735*0fca6ea1SDimitry Andric   void insertPreMemOpFence(AtomicOrdering Order, SyncScope::ID SSID);
736*0fca6ea1SDimitry Andric   void insertPostMemOpFence(AtomicOrdering Order, SyncScope::ID SSID);
737*0fca6ea1SDimitry Andric   Value *handleMemoryInst(Instruction *I, Value *Arg, Value *Ptr, Type *Ty,
738*0fca6ea1SDimitry Andric                           Align Alignment, AtomicOrdering Order,
739*0fca6ea1SDimitry Andric                           bool IsVolatile, SyncScope::ID SSID);
740*0fca6ea1SDimitry Andric 
741*0fca6ea1SDimitry Andric public:
742*0fca6ea1SDimitry Andric   SplitPtrStructs(LLVMContext &Ctx, const TargetMachine *TM)
743*0fca6ea1SDimitry Andric       : TM(TM), IRB(Ctx) {}
744*0fca6ea1SDimitry Andric 
745*0fca6ea1SDimitry Andric   void processFunction(Function &F);
746*0fca6ea1SDimitry Andric 
747*0fca6ea1SDimitry Andric   PtrParts visitInstruction(Instruction &I);
748*0fca6ea1SDimitry Andric   PtrParts visitLoadInst(LoadInst &LI);
749*0fca6ea1SDimitry Andric   PtrParts visitStoreInst(StoreInst &SI);
750*0fca6ea1SDimitry Andric   PtrParts visitAtomicRMWInst(AtomicRMWInst &AI);
751*0fca6ea1SDimitry Andric   PtrParts visitAtomicCmpXchgInst(AtomicCmpXchgInst &AI);
752*0fca6ea1SDimitry Andric   PtrParts visitGetElementPtrInst(GetElementPtrInst &GEP);
753*0fca6ea1SDimitry Andric 
754*0fca6ea1SDimitry Andric   PtrParts visitPtrToIntInst(PtrToIntInst &PI);
755*0fca6ea1SDimitry Andric   PtrParts visitIntToPtrInst(IntToPtrInst &IP);
756*0fca6ea1SDimitry Andric   PtrParts visitAddrSpaceCastInst(AddrSpaceCastInst &I);
757*0fca6ea1SDimitry Andric   PtrParts visitICmpInst(ICmpInst &Cmp);
758*0fca6ea1SDimitry Andric   PtrParts visitFreezeInst(FreezeInst &I);
759*0fca6ea1SDimitry Andric 
760*0fca6ea1SDimitry Andric   PtrParts visitExtractElementInst(ExtractElementInst &I);
761*0fca6ea1SDimitry Andric   PtrParts visitInsertElementInst(InsertElementInst &I);
762*0fca6ea1SDimitry Andric   PtrParts visitShuffleVectorInst(ShuffleVectorInst &I);
763*0fca6ea1SDimitry Andric 
764*0fca6ea1SDimitry Andric   PtrParts visitPHINode(PHINode &PHI);
765*0fca6ea1SDimitry Andric   PtrParts visitSelectInst(SelectInst &SI);
766*0fca6ea1SDimitry Andric 
767*0fca6ea1SDimitry Andric   PtrParts visitIntrinsicInst(IntrinsicInst &II);
768*0fca6ea1SDimitry Andric };
769*0fca6ea1SDimitry Andric } // namespace
770*0fca6ea1SDimitry Andric 
771*0fca6ea1SDimitry Andric void SplitPtrStructs::copyMetadata(Value *Dest, Value *Src) {
772*0fca6ea1SDimitry Andric   auto *DestI = dyn_cast<Instruction>(Dest);
773*0fca6ea1SDimitry Andric   auto *SrcI = dyn_cast<Instruction>(Src);
774*0fca6ea1SDimitry Andric 
775*0fca6ea1SDimitry Andric   if (!DestI || !SrcI)
776*0fca6ea1SDimitry Andric     return;
777*0fca6ea1SDimitry Andric 
778*0fca6ea1SDimitry Andric   DestI->copyMetadata(*SrcI);
779*0fca6ea1SDimitry Andric }
780*0fca6ea1SDimitry Andric 
781*0fca6ea1SDimitry Andric PtrParts SplitPtrStructs::getPtrParts(Value *V) {
782*0fca6ea1SDimitry Andric   assert(isSplitFatPtr(V->getType()) && "it's not meaningful to get the parts "
783*0fca6ea1SDimitry Andric                                         "of something that wasn't rewritten");
784*0fca6ea1SDimitry Andric   auto *RsrcEntry = &RsrcParts[V];
785*0fca6ea1SDimitry Andric   auto *OffEntry = &OffParts[V];
786*0fca6ea1SDimitry Andric   if (*RsrcEntry && *OffEntry)
787*0fca6ea1SDimitry Andric     return {*RsrcEntry, *OffEntry};
788*0fca6ea1SDimitry Andric 
789*0fca6ea1SDimitry Andric   if (auto *C = dyn_cast<Constant>(V)) {
790*0fca6ea1SDimitry Andric     auto [Rsrc, Off] = splitLoweredFatBufferConst(C);
791*0fca6ea1SDimitry Andric     return {*RsrcEntry = Rsrc, *OffEntry = Off};
792*0fca6ea1SDimitry Andric   }
793*0fca6ea1SDimitry Andric 
794*0fca6ea1SDimitry Andric   IRBuilder<>::InsertPointGuard Guard(IRB);
795*0fca6ea1SDimitry Andric   if (auto *I = dyn_cast<Instruction>(V)) {
796*0fca6ea1SDimitry Andric     LLVM_DEBUG(dbgs() << "Recursing to split parts of " << *I << "\n");
797*0fca6ea1SDimitry Andric     auto [Rsrc, Off] = visit(*I);
798*0fca6ea1SDimitry Andric     if (Rsrc && Off)
799*0fca6ea1SDimitry Andric       return {*RsrcEntry = Rsrc, *OffEntry = Off};
800*0fca6ea1SDimitry Andric     // We'll be creating the new values after the relevant instruction.
801*0fca6ea1SDimitry Andric     // This instruction generates a value and so isn't a terminator.
802*0fca6ea1SDimitry Andric     IRB.SetInsertPoint(*I->getInsertionPointAfterDef());
803*0fca6ea1SDimitry Andric     IRB.SetCurrentDebugLocation(I->getDebugLoc());
804*0fca6ea1SDimitry Andric   } else if (auto *A = dyn_cast<Argument>(V)) {
805*0fca6ea1SDimitry Andric     IRB.SetInsertPointPastAllocas(A->getParent());
806*0fca6ea1SDimitry Andric     IRB.SetCurrentDebugLocation(DebugLoc());
807*0fca6ea1SDimitry Andric   }
808*0fca6ea1SDimitry Andric   Value *Rsrc = IRB.CreateExtractValue(V, 0, V->getName() + ".rsrc");
809*0fca6ea1SDimitry Andric   Value *Off = IRB.CreateExtractValue(V, 1, V->getName() + ".off");
810*0fca6ea1SDimitry Andric   return {*RsrcEntry = Rsrc, *OffEntry = Off};
811*0fca6ea1SDimitry Andric }
812*0fca6ea1SDimitry Andric 
813*0fca6ea1SDimitry Andric /// Returns the instruction that defines the resource part of the value V.
814*0fca6ea1SDimitry Andric /// Note that this is not getUnderlyingObject(), since that looks through
815*0fca6ea1SDimitry Andric /// operations like ptrmask which might modify the resource part.
816*0fca6ea1SDimitry Andric ///
817*0fca6ea1SDimitry Andric /// We can limit ourselves to just looking through GEPs followed by looking
818*0fca6ea1SDimitry Andric /// through addrspacecasts because only those two operations preserve the
819*0fca6ea1SDimitry Andric /// resource part, and because operations on an `addrspace(8)` (which is the
820*0fca6ea1SDimitry Andric /// legal input to this addrspacecast) would produce a different resource part.
821*0fca6ea1SDimitry Andric static Value *rsrcPartRoot(Value *V) {
822*0fca6ea1SDimitry Andric   while (auto *GEP = dyn_cast<GEPOperator>(V))
823*0fca6ea1SDimitry Andric     V = GEP->getPointerOperand();
824*0fca6ea1SDimitry Andric   while (auto *ASC = dyn_cast<AddrSpaceCastOperator>(V))
825*0fca6ea1SDimitry Andric     V = ASC->getPointerOperand();
826*0fca6ea1SDimitry Andric   return V;
827*0fca6ea1SDimitry Andric }
828*0fca6ea1SDimitry Andric 
829*0fca6ea1SDimitry Andric void SplitPtrStructs::getPossibleRsrcRoots(Instruction *I,
830*0fca6ea1SDimitry Andric                                            SmallPtrSetImpl<Value *> &Roots,
831*0fca6ea1SDimitry Andric                                            SmallPtrSetImpl<Value *> &Seen) {
832*0fca6ea1SDimitry Andric   if (auto *PHI = dyn_cast<PHINode>(I)) {
833*0fca6ea1SDimitry Andric     if (!Seen.insert(I).second)
834*0fca6ea1SDimitry Andric       return;
835*0fca6ea1SDimitry Andric     for (Value *In : PHI->incoming_values()) {
836*0fca6ea1SDimitry Andric       In = rsrcPartRoot(In);
837*0fca6ea1SDimitry Andric       Roots.insert(In);
838*0fca6ea1SDimitry Andric       if (isa<PHINode, SelectInst>(In))
839*0fca6ea1SDimitry Andric         getPossibleRsrcRoots(cast<Instruction>(In), Roots, Seen);
840*0fca6ea1SDimitry Andric     }
841*0fca6ea1SDimitry Andric   } else if (auto *SI = dyn_cast<SelectInst>(I)) {
842*0fca6ea1SDimitry Andric     if (!Seen.insert(SI).second)
843*0fca6ea1SDimitry Andric       return;
844*0fca6ea1SDimitry Andric     Value *TrueVal = rsrcPartRoot(SI->getTrueValue());
845*0fca6ea1SDimitry Andric     Value *FalseVal = rsrcPartRoot(SI->getFalseValue());
846*0fca6ea1SDimitry Andric     Roots.insert(TrueVal);
847*0fca6ea1SDimitry Andric     Roots.insert(FalseVal);
848*0fca6ea1SDimitry Andric     if (isa<PHINode, SelectInst>(TrueVal))
849*0fca6ea1SDimitry Andric       getPossibleRsrcRoots(cast<Instruction>(TrueVal), Roots, Seen);
850*0fca6ea1SDimitry Andric     if (isa<PHINode, SelectInst>(FalseVal))
851*0fca6ea1SDimitry Andric       getPossibleRsrcRoots(cast<Instruction>(FalseVal), Roots, Seen);
852*0fca6ea1SDimitry Andric   } else {
853*0fca6ea1SDimitry Andric     llvm_unreachable("getPossibleRsrcParts() only works on phi and select");
854*0fca6ea1SDimitry Andric   }
855*0fca6ea1SDimitry Andric }
856*0fca6ea1SDimitry Andric 
857*0fca6ea1SDimitry Andric void SplitPtrStructs::processConditionals() {
858*0fca6ea1SDimitry Andric   SmallDenseMap<Instruction *, Value *> FoundRsrcs;
859*0fca6ea1SDimitry Andric   SmallPtrSet<Value *, 4> Roots;
860*0fca6ea1SDimitry Andric   SmallPtrSet<Value *, 4> Seen;
861*0fca6ea1SDimitry Andric   for (Instruction *I : Conditionals) {
862*0fca6ea1SDimitry Andric     // These have to exist by now because we've visited these nodes.
863*0fca6ea1SDimitry Andric     Value *Rsrc = RsrcParts[I];
864*0fca6ea1SDimitry Andric     Value *Off = OffParts[I];
865*0fca6ea1SDimitry Andric     assert(Rsrc && Off && "must have visited conditionals by now");
866*0fca6ea1SDimitry Andric 
867*0fca6ea1SDimitry Andric     std::optional<Value *> MaybeRsrc;
868*0fca6ea1SDimitry Andric     auto MaybeFoundRsrc = FoundRsrcs.find(I);
869*0fca6ea1SDimitry Andric     if (MaybeFoundRsrc != FoundRsrcs.end()) {
870*0fca6ea1SDimitry Andric       MaybeRsrc = MaybeFoundRsrc->second;
871*0fca6ea1SDimitry Andric     } else {
872*0fca6ea1SDimitry Andric       IRBuilder<>::InsertPointGuard Guard(IRB);
873*0fca6ea1SDimitry Andric       Roots.clear();
874*0fca6ea1SDimitry Andric       Seen.clear();
875*0fca6ea1SDimitry Andric       getPossibleRsrcRoots(I, Roots, Seen);
876*0fca6ea1SDimitry Andric       LLVM_DEBUG(dbgs() << "Processing conditional: " << *I << "\n");
877*0fca6ea1SDimitry Andric #ifndef NDEBUG
878*0fca6ea1SDimitry Andric       for (Value *V : Roots)
879*0fca6ea1SDimitry Andric         LLVM_DEBUG(dbgs() << "Root: " << *V << "\n");
880*0fca6ea1SDimitry Andric       for (Value *V : Seen)
881*0fca6ea1SDimitry Andric         LLVM_DEBUG(dbgs() << "Seen: " << *V << "\n");
882*0fca6ea1SDimitry Andric #endif
883*0fca6ea1SDimitry Andric       // If we are our own possible root, then we shouldn't block our
884*0fca6ea1SDimitry Andric       // replacement with a valid incoming value.
885*0fca6ea1SDimitry Andric       Roots.erase(I);
886*0fca6ea1SDimitry Andric       // We don't want to block the optimization for conditionals that don't
887*0fca6ea1SDimitry Andric       // refer to themselves but did see themselves during the traversal.
888*0fca6ea1SDimitry Andric       Seen.erase(I);
889*0fca6ea1SDimitry Andric 
890*0fca6ea1SDimitry Andric       if (set_is_subset(Seen, Roots)) {
891*0fca6ea1SDimitry Andric         auto Diff = set_difference(Roots, Seen);
892*0fca6ea1SDimitry Andric         if (Diff.size() == 1) {
893*0fca6ea1SDimitry Andric           Value *RootVal = *Diff.begin();
894*0fca6ea1SDimitry Andric           // Handle the case where previous loops already looked through
895*0fca6ea1SDimitry Andric           // an addrspacecast.
896*0fca6ea1SDimitry Andric           if (isSplitFatPtr(RootVal->getType()))
897*0fca6ea1SDimitry Andric             MaybeRsrc = std::get<0>(getPtrParts(RootVal));
898*0fca6ea1SDimitry Andric           else
899*0fca6ea1SDimitry Andric             MaybeRsrc = RootVal;
900*0fca6ea1SDimitry Andric         }
901*0fca6ea1SDimitry Andric       }
902*0fca6ea1SDimitry Andric     }
903*0fca6ea1SDimitry Andric 
904*0fca6ea1SDimitry Andric     if (auto *PHI = dyn_cast<PHINode>(I)) {
905*0fca6ea1SDimitry Andric       Value *NewRsrc;
906*0fca6ea1SDimitry Andric       StructType *PHITy = cast<StructType>(PHI->getType());
907*0fca6ea1SDimitry Andric       IRB.SetInsertPoint(*PHI->getInsertionPointAfterDef());
908*0fca6ea1SDimitry Andric       IRB.SetCurrentDebugLocation(PHI->getDebugLoc());
909*0fca6ea1SDimitry Andric       if (MaybeRsrc) {
910*0fca6ea1SDimitry Andric         NewRsrc = *MaybeRsrc;
911*0fca6ea1SDimitry Andric       } else {
912*0fca6ea1SDimitry Andric         Type *RsrcTy = PHITy->getElementType(0);
913*0fca6ea1SDimitry Andric         auto *RsrcPHI = IRB.CreatePHI(RsrcTy, PHI->getNumIncomingValues());
914*0fca6ea1SDimitry Andric         RsrcPHI->takeName(Rsrc);
915*0fca6ea1SDimitry Andric         for (auto [V, BB] : llvm::zip(PHI->incoming_values(), PHI->blocks())) {
916*0fca6ea1SDimitry Andric           Value *VRsrc = std::get<0>(getPtrParts(V));
917*0fca6ea1SDimitry Andric           RsrcPHI->addIncoming(VRsrc, BB);
918*0fca6ea1SDimitry Andric         }
919*0fca6ea1SDimitry Andric         copyMetadata(RsrcPHI, PHI);
920*0fca6ea1SDimitry Andric         NewRsrc = RsrcPHI;
921*0fca6ea1SDimitry Andric       }
922*0fca6ea1SDimitry Andric 
923*0fca6ea1SDimitry Andric       Type *OffTy = PHITy->getElementType(1);
924*0fca6ea1SDimitry Andric       auto *NewOff = IRB.CreatePHI(OffTy, PHI->getNumIncomingValues());
925*0fca6ea1SDimitry Andric       NewOff->takeName(Off);
926*0fca6ea1SDimitry Andric       for (auto [V, BB] : llvm::zip(PHI->incoming_values(), PHI->blocks())) {
927*0fca6ea1SDimitry Andric         assert(OffParts.count(V) && "An offset part had to be created by now");
928*0fca6ea1SDimitry Andric         Value *VOff = std::get<1>(getPtrParts(V));
929*0fca6ea1SDimitry Andric         NewOff->addIncoming(VOff, BB);
930*0fca6ea1SDimitry Andric       }
931*0fca6ea1SDimitry Andric       copyMetadata(NewOff, PHI);
932*0fca6ea1SDimitry Andric 
933*0fca6ea1SDimitry Andric       // Note: We don't eraseFromParent() the temporaries because we don't want
934*0fca6ea1SDimitry Andric       // to put the corrections maps in an inconstent state. That'll be handed
935*0fca6ea1SDimitry Andric       // during the rest of the killing. Also, `ValueToValueMapTy` guarantees
936*0fca6ea1SDimitry Andric       // that references in that map will be updated as well.
937*0fca6ea1SDimitry Andric       ConditionalTemps.push_back(cast<Instruction>(Rsrc));
938*0fca6ea1SDimitry Andric       ConditionalTemps.push_back(cast<Instruction>(Off));
939*0fca6ea1SDimitry Andric       Rsrc->replaceAllUsesWith(NewRsrc);
940*0fca6ea1SDimitry Andric       Off->replaceAllUsesWith(NewOff);
941*0fca6ea1SDimitry Andric 
942*0fca6ea1SDimitry Andric       // Save on recomputing the cycle traversals in known-root cases.
943*0fca6ea1SDimitry Andric       if (MaybeRsrc)
944*0fca6ea1SDimitry Andric         for (Value *V : Seen)
945*0fca6ea1SDimitry Andric           FoundRsrcs[cast<Instruction>(V)] = NewRsrc;
946*0fca6ea1SDimitry Andric     } else if (isa<SelectInst>(I)) {
947*0fca6ea1SDimitry Andric       if (MaybeRsrc) {
948*0fca6ea1SDimitry Andric         ConditionalTemps.push_back(cast<Instruction>(Rsrc));
949*0fca6ea1SDimitry Andric         Rsrc->replaceAllUsesWith(*MaybeRsrc);
950*0fca6ea1SDimitry Andric         for (Value *V : Seen)
951*0fca6ea1SDimitry Andric           FoundRsrcs[cast<Instruction>(V)] = *MaybeRsrc;
952*0fca6ea1SDimitry Andric       }
953*0fca6ea1SDimitry Andric     } else {
954*0fca6ea1SDimitry Andric       llvm_unreachable("Only PHIs and selects go in the conditionals list");
955*0fca6ea1SDimitry Andric     }
956*0fca6ea1SDimitry Andric   }
957*0fca6ea1SDimitry Andric }
958*0fca6ea1SDimitry Andric 
959*0fca6ea1SDimitry Andric void SplitPtrStructs::killAndReplaceSplitInstructions(
960*0fca6ea1SDimitry Andric     SmallVectorImpl<Instruction *> &Origs) {
961*0fca6ea1SDimitry Andric   for (Instruction *I : ConditionalTemps)
962*0fca6ea1SDimitry Andric     I->eraseFromParent();
963*0fca6ea1SDimitry Andric 
964*0fca6ea1SDimitry Andric   for (Instruction *I : Origs) {
965*0fca6ea1SDimitry Andric     if (!SplitUsers.contains(I))
966*0fca6ea1SDimitry Andric       continue;
967*0fca6ea1SDimitry Andric 
968*0fca6ea1SDimitry Andric     SmallVector<DbgValueInst *> Dbgs;
969*0fca6ea1SDimitry Andric     findDbgValues(Dbgs, I);
970*0fca6ea1SDimitry Andric     for (auto *Dbg : Dbgs) {
971*0fca6ea1SDimitry Andric       IRB.SetInsertPoint(Dbg);
972*0fca6ea1SDimitry Andric       auto &DL = I->getDataLayout();
973*0fca6ea1SDimitry Andric       assert(isSplitFatPtr(I->getType()) &&
974*0fca6ea1SDimitry Andric              "We should've RAUW'd away loads, stores, etc. at this point");
975*0fca6ea1SDimitry Andric       auto *OffDbg = cast<DbgValueInst>(Dbg->clone());
976*0fca6ea1SDimitry Andric       copyMetadata(OffDbg, Dbg);
977*0fca6ea1SDimitry Andric       auto [Rsrc, Off] = getPtrParts(I);
978*0fca6ea1SDimitry Andric 
979*0fca6ea1SDimitry Andric       int64_t RsrcSz = DL.getTypeSizeInBits(Rsrc->getType());
980*0fca6ea1SDimitry Andric       int64_t OffSz = DL.getTypeSizeInBits(Off->getType());
981*0fca6ea1SDimitry Andric 
982*0fca6ea1SDimitry Andric       std::optional<DIExpression *> RsrcExpr =
983*0fca6ea1SDimitry Andric           DIExpression::createFragmentExpression(Dbg->getExpression(), 0,
984*0fca6ea1SDimitry Andric                                                  RsrcSz);
985*0fca6ea1SDimitry Andric       std::optional<DIExpression *> OffExpr =
986*0fca6ea1SDimitry Andric           DIExpression::createFragmentExpression(Dbg->getExpression(), RsrcSz,
987*0fca6ea1SDimitry Andric                                                  OffSz);
988*0fca6ea1SDimitry Andric       if (OffExpr) {
989*0fca6ea1SDimitry Andric         OffDbg->setExpression(*OffExpr);
990*0fca6ea1SDimitry Andric         OffDbg->replaceVariableLocationOp(I, Off);
991*0fca6ea1SDimitry Andric         IRB.Insert(OffDbg);
992*0fca6ea1SDimitry Andric       } else {
993*0fca6ea1SDimitry Andric         OffDbg->deleteValue();
994*0fca6ea1SDimitry Andric       }
995*0fca6ea1SDimitry Andric       if (RsrcExpr) {
996*0fca6ea1SDimitry Andric         Dbg->setExpression(*RsrcExpr);
997*0fca6ea1SDimitry Andric         Dbg->replaceVariableLocationOp(I, Rsrc);
998*0fca6ea1SDimitry Andric       } else {
999*0fca6ea1SDimitry Andric         Dbg->replaceVariableLocationOp(I, UndefValue::get(I->getType()));
1000*0fca6ea1SDimitry Andric       }
1001*0fca6ea1SDimitry Andric     }
1002*0fca6ea1SDimitry Andric 
1003*0fca6ea1SDimitry Andric     Value *Poison = PoisonValue::get(I->getType());
1004*0fca6ea1SDimitry Andric     I->replaceUsesWithIf(Poison, [&](const Use &U) -> bool {
1005*0fca6ea1SDimitry Andric       if (const auto *UI = dyn_cast<Instruction>(U.getUser()))
1006*0fca6ea1SDimitry Andric         return SplitUsers.contains(UI);
1007*0fca6ea1SDimitry Andric       return false;
1008*0fca6ea1SDimitry Andric     });
1009*0fca6ea1SDimitry Andric 
1010*0fca6ea1SDimitry Andric     if (I->use_empty()) {
1011*0fca6ea1SDimitry Andric       I->eraseFromParent();
1012*0fca6ea1SDimitry Andric       continue;
1013*0fca6ea1SDimitry Andric     }
1014*0fca6ea1SDimitry Andric     IRB.SetInsertPoint(*I->getInsertionPointAfterDef());
1015*0fca6ea1SDimitry Andric     IRB.SetCurrentDebugLocation(I->getDebugLoc());
1016*0fca6ea1SDimitry Andric     auto [Rsrc, Off] = getPtrParts(I);
1017*0fca6ea1SDimitry Andric     Value *Struct = PoisonValue::get(I->getType());
1018*0fca6ea1SDimitry Andric     Struct = IRB.CreateInsertValue(Struct, Rsrc, 0);
1019*0fca6ea1SDimitry Andric     Struct = IRB.CreateInsertValue(Struct, Off, 1);
1020*0fca6ea1SDimitry Andric     copyMetadata(Struct, I);
1021*0fca6ea1SDimitry Andric     Struct->takeName(I);
1022*0fca6ea1SDimitry Andric     I->replaceAllUsesWith(Struct);
1023*0fca6ea1SDimitry Andric     I->eraseFromParent();
1024*0fca6ea1SDimitry Andric   }
1025*0fca6ea1SDimitry Andric }
1026*0fca6ea1SDimitry Andric 
1027*0fca6ea1SDimitry Andric void SplitPtrStructs::setAlign(CallInst *Intr, Align A, unsigned RsrcArgIdx) {
1028*0fca6ea1SDimitry Andric   LLVMContext &Ctx = Intr->getContext();
1029*0fca6ea1SDimitry Andric   Intr->addParamAttr(RsrcArgIdx, Attribute::getWithAlignment(Ctx, A));
1030*0fca6ea1SDimitry Andric }
1031*0fca6ea1SDimitry Andric 
1032*0fca6ea1SDimitry Andric void SplitPtrStructs::insertPreMemOpFence(AtomicOrdering Order,
1033*0fca6ea1SDimitry Andric                                           SyncScope::ID SSID) {
1034*0fca6ea1SDimitry Andric   switch (Order) {
1035*0fca6ea1SDimitry Andric   case AtomicOrdering::Release:
1036*0fca6ea1SDimitry Andric   case AtomicOrdering::AcquireRelease:
1037*0fca6ea1SDimitry Andric   case AtomicOrdering::SequentiallyConsistent:
1038*0fca6ea1SDimitry Andric     IRB.CreateFence(AtomicOrdering::Release, SSID);
1039*0fca6ea1SDimitry Andric     break;
1040*0fca6ea1SDimitry Andric   default:
1041*0fca6ea1SDimitry Andric     break;
1042*0fca6ea1SDimitry Andric   }
1043*0fca6ea1SDimitry Andric }
1044*0fca6ea1SDimitry Andric 
1045*0fca6ea1SDimitry Andric void SplitPtrStructs::insertPostMemOpFence(AtomicOrdering Order,
1046*0fca6ea1SDimitry Andric                                            SyncScope::ID SSID) {
1047*0fca6ea1SDimitry Andric   switch (Order) {
1048*0fca6ea1SDimitry Andric   case AtomicOrdering::Acquire:
1049*0fca6ea1SDimitry Andric   case AtomicOrdering::AcquireRelease:
1050*0fca6ea1SDimitry Andric   case AtomicOrdering::SequentiallyConsistent:
1051*0fca6ea1SDimitry Andric     IRB.CreateFence(AtomicOrdering::Acquire, SSID);
1052*0fca6ea1SDimitry Andric     break;
1053*0fca6ea1SDimitry Andric   default:
1054*0fca6ea1SDimitry Andric     break;
1055*0fca6ea1SDimitry Andric   }
1056*0fca6ea1SDimitry Andric }
1057*0fca6ea1SDimitry Andric 
1058*0fca6ea1SDimitry Andric Value *SplitPtrStructs::handleMemoryInst(Instruction *I, Value *Arg, Value *Ptr,
1059*0fca6ea1SDimitry Andric                                          Type *Ty, Align Alignment,
1060*0fca6ea1SDimitry Andric                                          AtomicOrdering Order, bool IsVolatile,
1061*0fca6ea1SDimitry Andric                                          SyncScope::ID SSID) {
1062*0fca6ea1SDimitry Andric   IRB.SetInsertPoint(I);
1063*0fca6ea1SDimitry Andric 
1064*0fca6ea1SDimitry Andric   auto [Rsrc, Off] = getPtrParts(Ptr);
1065*0fca6ea1SDimitry Andric   SmallVector<Value *, 5> Args;
1066*0fca6ea1SDimitry Andric   if (Arg)
1067*0fca6ea1SDimitry Andric     Args.push_back(Arg);
1068*0fca6ea1SDimitry Andric   Args.push_back(Rsrc);
1069*0fca6ea1SDimitry Andric   Args.push_back(Off);
1070*0fca6ea1SDimitry Andric   insertPreMemOpFence(Order, SSID);
1071*0fca6ea1SDimitry Andric   // soffset is always 0 for these cases, where we always want any offset to be
1072*0fca6ea1SDimitry Andric   // part of bounds checking and we don't know which parts of the GEPs is
1073*0fca6ea1SDimitry Andric   // uniform.
1074*0fca6ea1SDimitry Andric   Args.push_back(IRB.getInt32(0));
1075*0fca6ea1SDimitry Andric 
1076*0fca6ea1SDimitry Andric   uint32_t Aux = 0;
1077*0fca6ea1SDimitry Andric   bool IsInvariant =
1078*0fca6ea1SDimitry Andric       (isa<LoadInst>(I) && I->getMetadata(LLVMContext::MD_invariant_load));
1079*0fca6ea1SDimitry Andric   bool IsNonTemporal = I->getMetadata(LLVMContext::MD_nontemporal);
1080*0fca6ea1SDimitry Andric   // Atomic loads and stores need glc, atomic read-modify-write doesn't.
1081*0fca6ea1SDimitry Andric   bool IsOneWayAtomic =
1082*0fca6ea1SDimitry Andric       !isa<AtomicRMWInst>(I) && Order != AtomicOrdering::NotAtomic;
1083*0fca6ea1SDimitry Andric   if (IsOneWayAtomic)
1084*0fca6ea1SDimitry Andric     Aux |= AMDGPU::CPol::GLC;
1085*0fca6ea1SDimitry Andric   if (IsNonTemporal && !IsInvariant)
1086*0fca6ea1SDimitry Andric     Aux |= AMDGPU::CPol::SLC;
1087*0fca6ea1SDimitry Andric   if (isa<LoadInst>(I) && ST->getGeneration() == AMDGPUSubtarget::GFX10)
1088*0fca6ea1SDimitry Andric     Aux |= (Aux & AMDGPU::CPol::GLC ? AMDGPU::CPol::DLC : 0);
1089*0fca6ea1SDimitry Andric   if (IsVolatile)
1090*0fca6ea1SDimitry Andric     Aux |= AMDGPU::CPol::VOLATILE;
1091*0fca6ea1SDimitry Andric   Args.push_back(IRB.getInt32(Aux));
1092*0fca6ea1SDimitry Andric 
1093*0fca6ea1SDimitry Andric   Intrinsic::ID IID = Intrinsic::not_intrinsic;
1094*0fca6ea1SDimitry Andric   if (isa<LoadInst>(I))
1095*0fca6ea1SDimitry Andric     IID = Order == AtomicOrdering::NotAtomic
1096*0fca6ea1SDimitry Andric               ? Intrinsic::amdgcn_raw_ptr_buffer_load
1097*0fca6ea1SDimitry Andric               : Intrinsic::amdgcn_raw_ptr_atomic_buffer_load;
1098*0fca6ea1SDimitry Andric   else if (isa<StoreInst>(I))
1099*0fca6ea1SDimitry Andric     IID = Intrinsic::amdgcn_raw_ptr_buffer_store;
1100*0fca6ea1SDimitry Andric   else if (auto *RMW = dyn_cast<AtomicRMWInst>(I)) {
1101*0fca6ea1SDimitry Andric     switch (RMW->getOperation()) {
1102*0fca6ea1SDimitry Andric     case AtomicRMWInst::Xchg:
1103*0fca6ea1SDimitry Andric       IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_swap;
1104*0fca6ea1SDimitry Andric       break;
1105*0fca6ea1SDimitry Andric     case AtomicRMWInst::Add:
1106*0fca6ea1SDimitry Andric       IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_add;
1107*0fca6ea1SDimitry Andric       break;
1108*0fca6ea1SDimitry Andric     case AtomicRMWInst::Sub:
1109*0fca6ea1SDimitry Andric       IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_sub;
1110*0fca6ea1SDimitry Andric       break;
1111*0fca6ea1SDimitry Andric     case AtomicRMWInst::And:
1112*0fca6ea1SDimitry Andric       IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_and;
1113*0fca6ea1SDimitry Andric       break;
1114*0fca6ea1SDimitry Andric     case AtomicRMWInst::Or:
1115*0fca6ea1SDimitry Andric       IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_or;
1116*0fca6ea1SDimitry Andric       break;
1117*0fca6ea1SDimitry Andric     case AtomicRMWInst::Xor:
1118*0fca6ea1SDimitry Andric       IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_xor;
1119*0fca6ea1SDimitry Andric       break;
1120*0fca6ea1SDimitry Andric     case AtomicRMWInst::Max:
1121*0fca6ea1SDimitry Andric       IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_smax;
1122*0fca6ea1SDimitry Andric       break;
1123*0fca6ea1SDimitry Andric     case AtomicRMWInst::Min:
1124*0fca6ea1SDimitry Andric       IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_smin;
1125*0fca6ea1SDimitry Andric       break;
1126*0fca6ea1SDimitry Andric     case AtomicRMWInst::UMax:
1127*0fca6ea1SDimitry Andric       IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_umax;
1128*0fca6ea1SDimitry Andric       break;
1129*0fca6ea1SDimitry Andric     case AtomicRMWInst::UMin:
1130*0fca6ea1SDimitry Andric       IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_umin;
1131*0fca6ea1SDimitry Andric       break;
1132*0fca6ea1SDimitry Andric     case AtomicRMWInst::FAdd:
1133*0fca6ea1SDimitry Andric       IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_fadd;
1134*0fca6ea1SDimitry Andric       break;
1135*0fca6ea1SDimitry Andric     case AtomicRMWInst::FMax:
1136*0fca6ea1SDimitry Andric       IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_fmax;
1137*0fca6ea1SDimitry Andric       break;
1138*0fca6ea1SDimitry Andric     case AtomicRMWInst::FMin:
1139*0fca6ea1SDimitry Andric       IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_fmin;
1140*0fca6ea1SDimitry Andric       break;
1141*0fca6ea1SDimitry Andric     case AtomicRMWInst::FSub: {
1142*0fca6ea1SDimitry Andric       report_fatal_error("atomic floating point subtraction not supported for "
1143*0fca6ea1SDimitry Andric                          "buffer resources and should've been expanded away");
1144*0fca6ea1SDimitry Andric       break;
1145*0fca6ea1SDimitry Andric     }
1146*0fca6ea1SDimitry Andric     case AtomicRMWInst::Nand:
1147*0fca6ea1SDimitry Andric       report_fatal_error("atomic nand not supported for buffer resources and "
1148*0fca6ea1SDimitry Andric                          "should've been expanded away");
1149*0fca6ea1SDimitry Andric       break;
1150*0fca6ea1SDimitry Andric     case AtomicRMWInst::UIncWrap:
1151*0fca6ea1SDimitry Andric     case AtomicRMWInst::UDecWrap:
1152*0fca6ea1SDimitry Andric       report_fatal_error("wrapping increment/decrement not supported for "
1153*0fca6ea1SDimitry Andric                          "buffer resources and should've ben expanded away");
1154*0fca6ea1SDimitry Andric       break;
1155*0fca6ea1SDimitry Andric     case AtomicRMWInst::BAD_BINOP:
1156*0fca6ea1SDimitry Andric       llvm_unreachable("Not sure how we got a bad binop");
1157*0fca6ea1SDimitry Andric     }
1158*0fca6ea1SDimitry Andric   }
1159*0fca6ea1SDimitry Andric 
1160*0fca6ea1SDimitry Andric   auto *Call = IRB.CreateIntrinsic(IID, Ty, Args);
1161*0fca6ea1SDimitry Andric   copyMetadata(Call, I);
1162*0fca6ea1SDimitry Andric   setAlign(Call, Alignment, Arg ? 1 : 0);
1163*0fca6ea1SDimitry Andric   Call->takeName(I);
1164*0fca6ea1SDimitry Andric 
1165*0fca6ea1SDimitry Andric   insertPostMemOpFence(Order, SSID);
1166*0fca6ea1SDimitry Andric   // The "no moving p7 directly" rewrites ensure that this load or store won't
1167*0fca6ea1SDimitry Andric   // itself need to be split into parts.
1168*0fca6ea1SDimitry Andric   SplitUsers.insert(I);
1169*0fca6ea1SDimitry Andric   I->replaceAllUsesWith(Call);
1170*0fca6ea1SDimitry Andric   return Call;
1171*0fca6ea1SDimitry Andric }
1172*0fca6ea1SDimitry Andric 
1173*0fca6ea1SDimitry Andric PtrParts SplitPtrStructs::visitInstruction(Instruction &I) {
1174*0fca6ea1SDimitry Andric   return {nullptr, nullptr};
1175*0fca6ea1SDimitry Andric }
1176*0fca6ea1SDimitry Andric 
1177*0fca6ea1SDimitry Andric PtrParts SplitPtrStructs::visitLoadInst(LoadInst &LI) {
1178*0fca6ea1SDimitry Andric   if (!isSplitFatPtr(LI.getPointerOperandType()))
1179*0fca6ea1SDimitry Andric     return {nullptr, nullptr};
1180*0fca6ea1SDimitry Andric   handleMemoryInst(&LI, nullptr, LI.getPointerOperand(), LI.getType(),
1181*0fca6ea1SDimitry Andric                    LI.getAlign(), LI.getOrdering(), LI.isVolatile(),
1182*0fca6ea1SDimitry Andric                    LI.getSyncScopeID());
1183*0fca6ea1SDimitry Andric   return {nullptr, nullptr};
1184*0fca6ea1SDimitry Andric }
1185*0fca6ea1SDimitry Andric 
1186*0fca6ea1SDimitry Andric PtrParts SplitPtrStructs::visitStoreInst(StoreInst &SI) {
1187*0fca6ea1SDimitry Andric   if (!isSplitFatPtr(SI.getPointerOperandType()))
1188*0fca6ea1SDimitry Andric     return {nullptr, nullptr};
1189*0fca6ea1SDimitry Andric   Value *Arg = SI.getValueOperand();
1190*0fca6ea1SDimitry Andric   handleMemoryInst(&SI, Arg, SI.getPointerOperand(), Arg->getType(),
1191*0fca6ea1SDimitry Andric                    SI.getAlign(), SI.getOrdering(), SI.isVolatile(),
1192*0fca6ea1SDimitry Andric                    SI.getSyncScopeID());
1193*0fca6ea1SDimitry Andric   return {nullptr, nullptr};
1194*0fca6ea1SDimitry Andric }
1195*0fca6ea1SDimitry Andric 
1196*0fca6ea1SDimitry Andric PtrParts SplitPtrStructs::visitAtomicRMWInst(AtomicRMWInst &AI) {
1197*0fca6ea1SDimitry Andric   if (!isSplitFatPtr(AI.getPointerOperand()->getType()))
1198*0fca6ea1SDimitry Andric     return {nullptr, nullptr};
1199*0fca6ea1SDimitry Andric   Value *Arg = AI.getValOperand();
1200*0fca6ea1SDimitry Andric   handleMemoryInst(&AI, Arg, AI.getPointerOperand(), Arg->getType(),
1201*0fca6ea1SDimitry Andric                    AI.getAlign(), AI.getOrdering(), AI.isVolatile(),
1202*0fca6ea1SDimitry Andric                    AI.getSyncScopeID());
1203*0fca6ea1SDimitry Andric   return {nullptr, nullptr};
1204*0fca6ea1SDimitry Andric }
1205*0fca6ea1SDimitry Andric 
1206*0fca6ea1SDimitry Andric // Unlike load, store, and RMW, cmpxchg needs special handling to account
1207*0fca6ea1SDimitry Andric // for the boolean argument.
1208*0fca6ea1SDimitry Andric PtrParts SplitPtrStructs::visitAtomicCmpXchgInst(AtomicCmpXchgInst &AI) {
1209*0fca6ea1SDimitry Andric   Value *Ptr = AI.getPointerOperand();
1210*0fca6ea1SDimitry Andric   if (!isSplitFatPtr(Ptr->getType()))
1211*0fca6ea1SDimitry Andric     return {nullptr, nullptr};
1212*0fca6ea1SDimitry Andric   IRB.SetInsertPoint(&AI);
1213*0fca6ea1SDimitry Andric 
1214*0fca6ea1SDimitry Andric   Type *Ty = AI.getNewValOperand()->getType();
1215*0fca6ea1SDimitry Andric   AtomicOrdering Order = AI.getMergedOrdering();
1216*0fca6ea1SDimitry Andric   SyncScope::ID SSID = AI.getSyncScopeID();
1217*0fca6ea1SDimitry Andric   bool IsNonTemporal = AI.getMetadata(LLVMContext::MD_nontemporal);
1218*0fca6ea1SDimitry Andric 
1219*0fca6ea1SDimitry Andric   auto [Rsrc, Off] = getPtrParts(Ptr);
1220*0fca6ea1SDimitry Andric   insertPreMemOpFence(Order, SSID);
1221*0fca6ea1SDimitry Andric 
1222*0fca6ea1SDimitry Andric   uint32_t Aux = 0;
1223*0fca6ea1SDimitry Andric   if (IsNonTemporal)
1224*0fca6ea1SDimitry Andric     Aux |= AMDGPU::CPol::SLC;
1225*0fca6ea1SDimitry Andric   if (AI.isVolatile())
1226*0fca6ea1SDimitry Andric     Aux |= AMDGPU::CPol::VOLATILE;
1227*0fca6ea1SDimitry Andric   auto *Call =
1228*0fca6ea1SDimitry Andric       IRB.CreateIntrinsic(Intrinsic::amdgcn_raw_ptr_buffer_atomic_cmpswap, Ty,
1229*0fca6ea1SDimitry Andric                           {AI.getNewValOperand(), AI.getCompareOperand(), Rsrc,
1230*0fca6ea1SDimitry Andric                            Off, IRB.getInt32(0), IRB.getInt32(Aux)});
1231*0fca6ea1SDimitry Andric   copyMetadata(Call, &AI);
1232*0fca6ea1SDimitry Andric   setAlign(Call, AI.getAlign(), 2);
1233*0fca6ea1SDimitry Andric   Call->takeName(&AI);
1234*0fca6ea1SDimitry Andric   insertPostMemOpFence(Order, SSID);
1235*0fca6ea1SDimitry Andric 
1236*0fca6ea1SDimitry Andric   Value *Res = PoisonValue::get(AI.getType());
1237*0fca6ea1SDimitry Andric   Res = IRB.CreateInsertValue(Res, Call, 0);
1238*0fca6ea1SDimitry Andric   if (!AI.isWeak()) {
1239*0fca6ea1SDimitry Andric     Value *Succeeded = IRB.CreateICmpEQ(Call, AI.getCompareOperand());
1240*0fca6ea1SDimitry Andric     Res = IRB.CreateInsertValue(Res, Succeeded, 1);
1241*0fca6ea1SDimitry Andric   }
1242*0fca6ea1SDimitry Andric   SplitUsers.insert(&AI);
1243*0fca6ea1SDimitry Andric   AI.replaceAllUsesWith(Res);
1244*0fca6ea1SDimitry Andric   return {nullptr, nullptr};
1245*0fca6ea1SDimitry Andric }
1246*0fca6ea1SDimitry Andric 
1247*0fca6ea1SDimitry Andric PtrParts SplitPtrStructs::visitGetElementPtrInst(GetElementPtrInst &GEP) {
1248*0fca6ea1SDimitry Andric   using namespace llvm::PatternMatch;
1249*0fca6ea1SDimitry Andric   Value *Ptr = GEP.getPointerOperand();
1250*0fca6ea1SDimitry Andric   if (!isSplitFatPtr(Ptr->getType()))
1251*0fca6ea1SDimitry Andric     return {nullptr, nullptr};
1252*0fca6ea1SDimitry Andric   IRB.SetInsertPoint(&GEP);
1253*0fca6ea1SDimitry Andric 
1254*0fca6ea1SDimitry Andric   auto [Rsrc, Off] = getPtrParts(Ptr);
1255*0fca6ea1SDimitry Andric   const DataLayout &DL = GEP.getDataLayout();
1256*0fca6ea1SDimitry Andric   bool InBounds = GEP.isInBounds();
1257*0fca6ea1SDimitry Andric 
1258*0fca6ea1SDimitry Andric   // In order to call emitGEPOffset() and thus not have to reimplement it,
1259*0fca6ea1SDimitry Andric   // we need the GEP result to have ptr addrspace(7) type.
1260*0fca6ea1SDimitry Andric   Type *FatPtrTy = IRB.getPtrTy(AMDGPUAS::BUFFER_FAT_POINTER);
1261*0fca6ea1SDimitry Andric   if (auto *VT = dyn_cast<VectorType>(Off->getType()))
1262*0fca6ea1SDimitry Andric     FatPtrTy = VectorType::get(FatPtrTy, VT->getElementCount());
1263*0fca6ea1SDimitry Andric   GEP.mutateType(FatPtrTy);
1264*0fca6ea1SDimitry Andric   Value *OffAccum = emitGEPOffset(&IRB, DL, &GEP);
1265*0fca6ea1SDimitry Andric   GEP.mutateType(Ptr->getType());
1266*0fca6ea1SDimitry Andric   if (match(OffAccum, m_Zero())) { // Constant-zero offset
1267*0fca6ea1SDimitry Andric     SplitUsers.insert(&GEP);
1268*0fca6ea1SDimitry Andric     return {Rsrc, Off};
1269*0fca6ea1SDimitry Andric   }
1270*0fca6ea1SDimitry Andric 
1271*0fca6ea1SDimitry Andric   bool HasNonNegativeOff = false;
1272*0fca6ea1SDimitry Andric   if (auto *CI = dyn_cast<ConstantInt>(OffAccum)) {
1273*0fca6ea1SDimitry Andric     HasNonNegativeOff = !CI->isNegative();
1274*0fca6ea1SDimitry Andric   }
1275*0fca6ea1SDimitry Andric   Value *NewOff;
1276*0fca6ea1SDimitry Andric   if (match(Off, m_Zero())) {
1277*0fca6ea1SDimitry Andric     NewOff = OffAccum;
1278*0fca6ea1SDimitry Andric   } else {
1279*0fca6ea1SDimitry Andric     NewOff = IRB.CreateAdd(Off, OffAccum, "",
1280*0fca6ea1SDimitry Andric                            /*hasNUW=*/InBounds && HasNonNegativeOff,
1281*0fca6ea1SDimitry Andric                            /*hasNSW=*/false);
1282*0fca6ea1SDimitry Andric   }
1283*0fca6ea1SDimitry Andric   copyMetadata(NewOff, &GEP);
1284*0fca6ea1SDimitry Andric   NewOff->takeName(&GEP);
1285*0fca6ea1SDimitry Andric   SplitUsers.insert(&GEP);
1286*0fca6ea1SDimitry Andric   return {Rsrc, NewOff};
1287*0fca6ea1SDimitry Andric }
1288*0fca6ea1SDimitry Andric 
1289*0fca6ea1SDimitry Andric PtrParts SplitPtrStructs::visitPtrToIntInst(PtrToIntInst &PI) {
1290*0fca6ea1SDimitry Andric   Value *Ptr = PI.getPointerOperand();
1291*0fca6ea1SDimitry Andric   if (!isSplitFatPtr(Ptr->getType()))
1292*0fca6ea1SDimitry Andric     return {nullptr, nullptr};
1293*0fca6ea1SDimitry Andric   IRB.SetInsertPoint(&PI);
1294*0fca6ea1SDimitry Andric 
1295*0fca6ea1SDimitry Andric   Type *ResTy = PI.getType();
1296*0fca6ea1SDimitry Andric   unsigned Width = ResTy->getScalarSizeInBits();
1297*0fca6ea1SDimitry Andric 
1298*0fca6ea1SDimitry Andric   auto [Rsrc, Off] = getPtrParts(Ptr);
1299*0fca6ea1SDimitry Andric   const DataLayout &DL = PI.getDataLayout();
1300*0fca6ea1SDimitry Andric   unsigned FatPtrWidth = DL.getPointerSizeInBits(AMDGPUAS::BUFFER_FAT_POINTER);
1301*0fca6ea1SDimitry Andric 
1302*0fca6ea1SDimitry Andric   Value *Res;
1303*0fca6ea1SDimitry Andric   if (Width <= BufferOffsetWidth) {
1304*0fca6ea1SDimitry Andric     Res = IRB.CreateIntCast(Off, ResTy, /*isSigned=*/false,
1305*0fca6ea1SDimitry Andric                             PI.getName() + ".off");
1306*0fca6ea1SDimitry Andric   } else {
1307*0fca6ea1SDimitry Andric     Value *RsrcInt = IRB.CreatePtrToInt(Rsrc, ResTy, PI.getName() + ".rsrc");
1308*0fca6ea1SDimitry Andric     Value *Shl = IRB.CreateShl(
1309*0fca6ea1SDimitry Andric         RsrcInt,
1310*0fca6ea1SDimitry Andric         ConstantExpr::getIntegerValue(ResTy, APInt(Width, BufferOffsetWidth)),
1311*0fca6ea1SDimitry Andric         "", Width >= FatPtrWidth, Width > FatPtrWidth);
1312*0fca6ea1SDimitry Andric     Value *OffCast = IRB.CreateIntCast(Off, ResTy, /*isSigned=*/false,
1313*0fca6ea1SDimitry Andric                                        PI.getName() + ".off");
1314*0fca6ea1SDimitry Andric     Res = IRB.CreateOr(Shl, OffCast);
1315*0fca6ea1SDimitry Andric   }
1316*0fca6ea1SDimitry Andric 
1317*0fca6ea1SDimitry Andric   copyMetadata(Res, &PI);
1318*0fca6ea1SDimitry Andric   Res->takeName(&PI);
1319*0fca6ea1SDimitry Andric   SplitUsers.insert(&PI);
1320*0fca6ea1SDimitry Andric   PI.replaceAllUsesWith(Res);
1321*0fca6ea1SDimitry Andric   return {nullptr, nullptr};
1322*0fca6ea1SDimitry Andric }
1323*0fca6ea1SDimitry Andric 
1324*0fca6ea1SDimitry Andric PtrParts SplitPtrStructs::visitIntToPtrInst(IntToPtrInst &IP) {
1325*0fca6ea1SDimitry Andric   if (!isSplitFatPtr(IP.getType()))
1326*0fca6ea1SDimitry Andric     return {nullptr, nullptr};
1327*0fca6ea1SDimitry Andric   IRB.SetInsertPoint(&IP);
1328*0fca6ea1SDimitry Andric   const DataLayout &DL = IP.getDataLayout();
1329*0fca6ea1SDimitry Andric   unsigned RsrcPtrWidth = DL.getPointerSizeInBits(AMDGPUAS::BUFFER_RESOURCE);
1330*0fca6ea1SDimitry Andric   Value *Int = IP.getOperand(0);
1331*0fca6ea1SDimitry Andric   Type *IntTy = Int->getType();
1332*0fca6ea1SDimitry Andric   Type *RsrcIntTy = IntTy->getWithNewBitWidth(RsrcPtrWidth);
1333*0fca6ea1SDimitry Andric   unsigned Width = IntTy->getScalarSizeInBits();
1334*0fca6ea1SDimitry Andric 
1335*0fca6ea1SDimitry Andric   auto *RetTy = cast<StructType>(IP.getType());
1336*0fca6ea1SDimitry Andric   Type *RsrcTy = RetTy->getElementType(0);
1337*0fca6ea1SDimitry Andric   Type *OffTy = RetTy->getElementType(1);
1338*0fca6ea1SDimitry Andric   Value *RsrcPart = IRB.CreateLShr(
1339*0fca6ea1SDimitry Andric       Int,
1340*0fca6ea1SDimitry Andric       ConstantExpr::getIntegerValue(IntTy, APInt(Width, BufferOffsetWidth)));
1341*0fca6ea1SDimitry Andric   Value *RsrcInt = IRB.CreateIntCast(RsrcPart, RsrcIntTy, /*isSigned=*/false);
1342*0fca6ea1SDimitry Andric   Value *Rsrc = IRB.CreateIntToPtr(RsrcInt, RsrcTy, IP.getName() + ".rsrc");
1343*0fca6ea1SDimitry Andric   Value *Off =
1344*0fca6ea1SDimitry Andric       IRB.CreateIntCast(Int, OffTy, /*IsSigned=*/false, IP.getName() + ".off");
1345*0fca6ea1SDimitry Andric 
1346*0fca6ea1SDimitry Andric   copyMetadata(Rsrc, &IP);
1347*0fca6ea1SDimitry Andric   SplitUsers.insert(&IP);
1348*0fca6ea1SDimitry Andric   return {Rsrc, Off};
1349*0fca6ea1SDimitry Andric }
1350*0fca6ea1SDimitry Andric 
1351*0fca6ea1SDimitry Andric PtrParts SplitPtrStructs::visitAddrSpaceCastInst(AddrSpaceCastInst &I) {
1352*0fca6ea1SDimitry Andric   if (!isSplitFatPtr(I.getType()))
1353*0fca6ea1SDimitry Andric     return {nullptr, nullptr};
1354*0fca6ea1SDimitry Andric   IRB.SetInsertPoint(&I);
1355*0fca6ea1SDimitry Andric   Value *In = I.getPointerOperand();
1356*0fca6ea1SDimitry Andric   // No-op casts preserve parts
1357*0fca6ea1SDimitry Andric   if (In->getType() == I.getType()) {
1358*0fca6ea1SDimitry Andric     auto [Rsrc, Off] = getPtrParts(In);
1359*0fca6ea1SDimitry Andric     SplitUsers.insert(&I);
1360*0fca6ea1SDimitry Andric     return {Rsrc, Off};
1361*0fca6ea1SDimitry Andric   }
1362*0fca6ea1SDimitry Andric   if (I.getSrcAddressSpace() != AMDGPUAS::BUFFER_RESOURCE)
1363*0fca6ea1SDimitry Andric     report_fatal_error("Only buffer resources (addrspace 8) can be cast to "
1364*0fca6ea1SDimitry Andric                        "buffer fat pointers (addrspace 7)");
1365*0fca6ea1SDimitry Andric   Type *OffTy = cast<StructType>(I.getType())->getElementType(1);
1366*0fca6ea1SDimitry Andric   Value *ZeroOff = Constant::getNullValue(OffTy);
1367*0fca6ea1SDimitry Andric   SplitUsers.insert(&I);
1368*0fca6ea1SDimitry Andric   return {In, ZeroOff};
1369*0fca6ea1SDimitry Andric }
1370*0fca6ea1SDimitry Andric 
1371*0fca6ea1SDimitry Andric PtrParts SplitPtrStructs::visitICmpInst(ICmpInst &Cmp) {
1372*0fca6ea1SDimitry Andric   Value *Lhs = Cmp.getOperand(0);
1373*0fca6ea1SDimitry Andric   if (!isSplitFatPtr(Lhs->getType()))
1374*0fca6ea1SDimitry Andric     return {nullptr, nullptr};
1375*0fca6ea1SDimitry Andric   Value *Rhs = Cmp.getOperand(1);
1376*0fca6ea1SDimitry Andric   IRB.SetInsertPoint(&Cmp);
1377*0fca6ea1SDimitry Andric   ICmpInst::Predicate Pred = Cmp.getPredicate();
1378*0fca6ea1SDimitry Andric 
1379*0fca6ea1SDimitry Andric   assert((Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_NE) &&
1380*0fca6ea1SDimitry Andric          "Pointer comparison is only equal or unequal");
1381*0fca6ea1SDimitry Andric   auto [LhsRsrc, LhsOff] = getPtrParts(Lhs);
1382*0fca6ea1SDimitry Andric   auto [RhsRsrc, RhsOff] = getPtrParts(Rhs);
1383*0fca6ea1SDimitry Andric   Value *RsrcCmp =
1384*0fca6ea1SDimitry Andric       IRB.CreateICmp(Pred, LhsRsrc, RhsRsrc, Cmp.getName() + ".rsrc");
1385*0fca6ea1SDimitry Andric   copyMetadata(RsrcCmp, &Cmp);
1386*0fca6ea1SDimitry Andric   Value *OffCmp = IRB.CreateICmp(Pred, LhsOff, RhsOff, Cmp.getName() + ".off");
1387*0fca6ea1SDimitry Andric   copyMetadata(OffCmp, &Cmp);
1388*0fca6ea1SDimitry Andric 
1389*0fca6ea1SDimitry Andric   Value *Res = nullptr;
1390*0fca6ea1SDimitry Andric   if (Pred == ICmpInst::ICMP_EQ)
1391*0fca6ea1SDimitry Andric     Res = IRB.CreateAnd(RsrcCmp, OffCmp);
1392*0fca6ea1SDimitry Andric   else if (Pred == ICmpInst::ICMP_NE)
1393*0fca6ea1SDimitry Andric     Res = IRB.CreateOr(RsrcCmp, OffCmp);
1394*0fca6ea1SDimitry Andric   copyMetadata(Res, &Cmp);
1395*0fca6ea1SDimitry Andric   Res->takeName(&Cmp);
1396*0fca6ea1SDimitry Andric   SplitUsers.insert(&Cmp);
1397*0fca6ea1SDimitry Andric   Cmp.replaceAllUsesWith(Res);
1398*0fca6ea1SDimitry Andric   return {nullptr, nullptr};
1399*0fca6ea1SDimitry Andric }
1400*0fca6ea1SDimitry Andric 
1401*0fca6ea1SDimitry Andric PtrParts SplitPtrStructs::visitFreezeInst(FreezeInst &I) {
1402*0fca6ea1SDimitry Andric   if (!isSplitFatPtr(I.getType()))
1403*0fca6ea1SDimitry Andric     return {nullptr, nullptr};
1404*0fca6ea1SDimitry Andric   IRB.SetInsertPoint(&I);
1405*0fca6ea1SDimitry Andric   auto [Rsrc, Off] = getPtrParts(I.getOperand(0));
1406*0fca6ea1SDimitry Andric 
1407*0fca6ea1SDimitry Andric   Value *RsrcRes = IRB.CreateFreeze(Rsrc, I.getName() + ".rsrc");
1408*0fca6ea1SDimitry Andric   copyMetadata(RsrcRes, &I);
1409*0fca6ea1SDimitry Andric   Value *OffRes = IRB.CreateFreeze(Off, I.getName() + ".off");
1410*0fca6ea1SDimitry Andric   copyMetadata(OffRes, &I);
1411*0fca6ea1SDimitry Andric   SplitUsers.insert(&I);
1412*0fca6ea1SDimitry Andric   return {RsrcRes, OffRes};
1413*0fca6ea1SDimitry Andric }
1414*0fca6ea1SDimitry Andric 
1415*0fca6ea1SDimitry Andric PtrParts SplitPtrStructs::visitExtractElementInst(ExtractElementInst &I) {
1416*0fca6ea1SDimitry Andric   if (!isSplitFatPtr(I.getType()))
1417*0fca6ea1SDimitry Andric     return {nullptr, nullptr};
1418*0fca6ea1SDimitry Andric   IRB.SetInsertPoint(&I);
1419*0fca6ea1SDimitry Andric   Value *Vec = I.getVectorOperand();
1420*0fca6ea1SDimitry Andric   Value *Idx = I.getIndexOperand();
1421*0fca6ea1SDimitry Andric   auto [Rsrc, Off] = getPtrParts(Vec);
1422*0fca6ea1SDimitry Andric 
1423*0fca6ea1SDimitry Andric   Value *RsrcRes = IRB.CreateExtractElement(Rsrc, Idx, I.getName() + ".rsrc");
1424*0fca6ea1SDimitry Andric   copyMetadata(RsrcRes, &I);
1425*0fca6ea1SDimitry Andric   Value *OffRes = IRB.CreateExtractElement(Off, Idx, I.getName() + ".off");
1426*0fca6ea1SDimitry Andric   copyMetadata(OffRes, &I);
1427*0fca6ea1SDimitry Andric   SplitUsers.insert(&I);
1428*0fca6ea1SDimitry Andric   return {RsrcRes, OffRes};
1429*0fca6ea1SDimitry Andric }
1430*0fca6ea1SDimitry Andric 
1431*0fca6ea1SDimitry Andric PtrParts SplitPtrStructs::visitInsertElementInst(InsertElementInst &I) {
1432*0fca6ea1SDimitry Andric   // The mutated instructions temporarily don't return vectors, and so
1433*0fca6ea1SDimitry Andric   // we need the generic getType() here to avoid crashes.
1434*0fca6ea1SDimitry Andric   if (!isSplitFatPtr(cast<Instruction>(I).getType()))
1435*0fca6ea1SDimitry Andric     return {nullptr, nullptr};
1436*0fca6ea1SDimitry Andric   IRB.SetInsertPoint(&I);
1437*0fca6ea1SDimitry Andric   Value *Vec = I.getOperand(0);
1438*0fca6ea1SDimitry Andric   Value *Elem = I.getOperand(1);
1439*0fca6ea1SDimitry Andric   Value *Idx = I.getOperand(2);
1440*0fca6ea1SDimitry Andric   auto [VecRsrc, VecOff] = getPtrParts(Vec);
1441*0fca6ea1SDimitry Andric   auto [ElemRsrc, ElemOff] = getPtrParts(Elem);
1442*0fca6ea1SDimitry Andric 
1443*0fca6ea1SDimitry Andric   Value *RsrcRes =
1444*0fca6ea1SDimitry Andric       IRB.CreateInsertElement(VecRsrc, ElemRsrc, Idx, I.getName() + ".rsrc");
1445*0fca6ea1SDimitry Andric   copyMetadata(RsrcRes, &I);
1446*0fca6ea1SDimitry Andric   Value *OffRes =
1447*0fca6ea1SDimitry Andric       IRB.CreateInsertElement(VecOff, ElemOff, Idx, I.getName() + ".off");
1448*0fca6ea1SDimitry Andric   copyMetadata(OffRes, &I);
1449*0fca6ea1SDimitry Andric   SplitUsers.insert(&I);
1450*0fca6ea1SDimitry Andric   return {RsrcRes, OffRes};
1451*0fca6ea1SDimitry Andric }
1452*0fca6ea1SDimitry Andric 
1453*0fca6ea1SDimitry Andric PtrParts SplitPtrStructs::visitShuffleVectorInst(ShuffleVectorInst &I) {
1454*0fca6ea1SDimitry Andric   // Cast is needed for the same reason as insertelement's.
1455*0fca6ea1SDimitry Andric   if (!isSplitFatPtr(cast<Instruction>(I).getType()))
1456*0fca6ea1SDimitry Andric     return {nullptr, nullptr};
1457*0fca6ea1SDimitry Andric   IRB.SetInsertPoint(&I);
1458*0fca6ea1SDimitry Andric 
1459*0fca6ea1SDimitry Andric   Value *V1 = I.getOperand(0);
1460*0fca6ea1SDimitry Andric   Value *V2 = I.getOperand(1);
1461*0fca6ea1SDimitry Andric   ArrayRef<int> Mask = I.getShuffleMask();
1462*0fca6ea1SDimitry Andric   auto [V1Rsrc, V1Off] = getPtrParts(V1);
1463*0fca6ea1SDimitry Andric   auto [V2Rsrc, V2Off] = getPtrParts(V2);
1464*0fca6ea1SDimitry Andric 
1465*0fca6ea1SDimitry Andric   Value *RsrcRes =
1466*0fca6ea1SDimitry Andric       IRB.CreateShuffleVector(V1Rsrc, V2Rsrc, Mask, I.getName() + ".rsrc");
1467*0fca6ea1SDimitry Andric   copyMetadata(RsrcRes, &I);
1468*0fca6ea1SDimitry Andric   Value *OffRes =
1469*0fca6ea1SDimitry Andric       IRB.CreateShuffleVector(V1Off, V2Off, Mask, I.getName() + ".off");
1470*0fca6ea1SDimitry Andric   copyMetadata(OffRes, &I);
1471*0fca6ea1SDimitry Andric   SplitUsers.insert(&I);
1472*0fca6ea1SDimitry Andric   return {RsrcRes, OffRes};
1473*0fca6ea1SDimitry Andric }
1474*0fca6ea1SDimitry Andric 
1475*0fca6ea1SDimitry Andric PtrParts SplitPtrStructs::visitPHINode(PHINode &PHI) {
1476*0fca6ea1SDimitry Andric   if (!isSplitFatPtr(PHI.getType()))
1477*0fca6ea1SDimitry Andric     return {nullptr, nullptr};
1478*0fca6ea1SDimitry Andric   IRB.SetInsertPoint(*PHI.getInsertionPointAfterDef());
1479*0fca6ea1SDimitry Andric   // Phi nodes will be handled in post-processing after we've visited every
1480*0fca6ea1SDimitry Andric   // instruction. However, instead of just returning {nullptr, nullptr},
1481*0fca6ea1SDimitry Andric   // we explicitly create the temporary extractvalue operations that are our
1482*0fca6ea1SDimitry Andric   // temporary results so that they end up at the beginning of the block with
1483*0fca6ea1SDimitry Andric   // the PHIs.
1484*0fca6ea1SDimitry Andric   Value *TmpRsrc = IRB.CreateExtractValue(&PHI, 0, PHI.getName() + ".rsrc");
1485*0fca6ea1SDimitry Andric   Value *TmpOff = IRB.CreateExtractValue(&PHI, 1, PHI.getName() + ".off");
1486*0fca6ea1SDimitry Andric   Conditionals.push_back(&PHI);
1487*0fca6ea1SDimitry Andric   SplitUsers.insert(&PHI);
1488*0fca6ea1SDimitry Andric   return {TmpRsrc, TmpOff};
1489*0fca6ea1SDimitry Andric }
1490*0fca6ea1SDimitry Andric 
1491*0fca6ea1SDimitry Andric PtrParts SplitPtrStructs::visitSelectInst(SelectInst &SI) {
1492*0fca6ea1SDimitry Andric   if (!isSplitFatPtr(SI.getType()))
1493*0fca6ea1SDimitry Andric     return {nullptr, nullptr};
1494*0fca6ea1SDimitry Andric   IRB.SetInsertPoint(&SI);
1495*0fca6ea1SDimitry Andric 
1496*0fca6ea1SDimitry Andric   Value *Cond = SI.getCondition();
1497*0fca6ea1SDimitry Andric   Value *True = SI.getTrueValue();
1498*0fca6ea1SDimitry Andric   Value *False = SI.getFalseValue();
1499*0fca6ea1SDimitry Andric   auto [TrueRsrc, TrueOff] = getPtrParts(True);
1500*0fca6ea1SDimitry Andric   auto [FalseRsrc, FalseOff] = getPtrParts(False);
1501*0fca6ea1SDimitry Andric 
1502*0fca6ea1SDimitry Andric   Value *RsrcRes =
1503*0fca6ea1SDimitry Andric       IRB.CreateSelect(Cond, TrueRsrc, FalseRsrc, SI.getName() + ".rsrc", &SI);
1504*0fca6ea1SDimitry Andric   copyMetadata(RsrcRes, &SI);
1505*0fca6ea1SDimitry Andric   Conditionals.push_back(&SI);
1506*0fca6ea1SDimitry Andric   Value *OffRes =
1507*0fca6ea1SDimitry Andric       IRB.CreateSelect(Cond, TrueOff, FalseOff, SI.getName() + ".off", &SI);
1508*0fca6ea1SDimitry Andric   copyMetadata(OffRes, &SI);
1509*0fca6ea1SDimitry Andric   SplitUsers.insert(&SI);
1510*0fca6ea1SDimitry Andric   return {RsrcRes, OffRes};
1511*0fca6ea1SDimitry Andric }
1512*0fca6ea1SDimitry Andric 
1513*0fca6ea1SDimitry Andric /// Returns true if this intrinsic needs to be removed when it is
1514*0fca6ea1SDimitry Andric /// applied to `ptr addrspace(7)` values. Calls to these intrinsics are
1515*0fca6ea1SDimitry Andric /// rewritten into calls to versions of that intrinsic on the resource
1516*0fca6ea1SDimitry Andric /// descriptor.
1517*0fca6ea1SDimitry Andric static bool isRemovablePointerIntrinsic(Intrinsic::ID IID) {
1518*0fca6ea1SDimitry Andric   switch (IID) {
1519*0fca6ea1SDimitry Andric   default:
1520*0fca6ea1SDimitry Andric     return false;
1521*0fca6ea1SDimitry Andric   case Intrinsic::ptrmask:
1522*0fca6ea1SDimitry Andric   case Intrinsic::invariant_start:
1523*0fca6ea1SDimitry Andric   case Intrinsic::invariant_end:
1524*0fca6ea1SDimitry Andric   case Intrinsic::launder_invariant_group:
1525*0fca6ea1SDimitry Andric   case Intrinsic::strip_invariant_group:
1526*0fca6ea1SDimitry Andric     return true;
1527*0fca6ea1SDimitry Andric   }
1528*0fca6ea1SDimitry Andric }
1529*0fca6ea1SDimitry Andric 
1530*0fca6ea1SDimitry Andric PtrParts SplitPtrStructs::visitIntrinsicInst(IntrinsicInst &I) {
1531*0fca6ea1SDimitry Andric   Intrinsic::ID IID = I.getIntrinsicID();
1532*0fca6ea1SDimitry Andric   switch (IID) {
1533*0fca6ea1SDimitry Andric   default:
1534*0fca6ea1SDimitry Andric     break;
1535*0fca6ea1SDimitry Andric   case Intrinsic::ptrmask: {
1536*0fca6ea1SDimitry Andric     Value *Ptr = I.getArgOperand(0);
1537*0fca6ea1SDimitry Andric     if (!isSplitFatPtr(Ptr->getType()))
1538*0fca6ea1SDimitry Andric       return {nullptr, nullptr};
1539*0fca6ea1SDimitry Andric     Value *Mask = I.getArgOperand(1);
1540*0fca6ea1SDimitry Andric     IRB.SetInsertPoint(&I);
1541*0fca6ea1SDimitry Andric     auto [Rsrc, Off] = getPtrParts(Ptr);
1542*0fca6ea1SDimitry Andric     if (Mask->getType() != Off->getType())
1543*0fca6ea1SDimitry Andric       report_fatal_error("offset width is not equal to index width of fat "
1544*0fca6ea1SDimitry Andric                          "pointer (data layout not set up correctly?)");
1545*0fca6ea1SDimitry Andric     Value *OffRes = IRB.CreateAnd(Off, Mask, I.getName() + ".off");
1546*0fca6ea1SDimitry Andric     copyMetadata(OffRes, &I);
1547*0fca6ea1SDimitry Andric     SplitUsers.insert(&I);
1548*0fca6ea1SDimitry Andric     return {Rsrc, OffRes};
1549*0fca6ea1SDimitry Andric   }
1550*0fca6ea1SDimitry Andric   // Pointer annotation intrinsics that, given their object-wide nature
1551*0fca6ea1SDimitry Andric   // operate on the resource part.
1552*0fca6ea1SDimitry Andric   case Intrinsic::invariant_start: {
1553*0fca6ea1SDimitry Andric     Value *Ptr = I.getArgOperand(1);
1554*0fca6ea1SDimitry Andric     if (!isSplitFatPtr(Ptr->getType()))
1555*0fca6ea1SDimitry Andric       return {nullptr, nullptr};
1556*0fca6ea1SDimitry Andric     IRB.SetInsertPoint(&I);
1557*0fca6ea1SDimitry Andric     auto [Rsrc, Off] = getPtrParts(Ptr);
1558*0fca6ea1SDimitry Andric     Type *NewTy = PointerType::get(I.getContext(), AMDGPUAS::BUFFER_RESOURCE);
1559*0fca6ea1SDimitry Andric     auto *NewRsrc = IRB.CreateIntrinsic(IID, {NewTy}, {I.getOperand(0), Rsrc});
1560*0fca6ea1SDimitry Andric     copyMetadata(NewRsrc, &I);
1561*0fca6ea1SDimitry Andric     NewRsrc->takeName(&I);
1562*0fca6ea1SDimitry Andric     SplitUsers.insert(&I);
1563*0fca6ea1SDimitry Andric     I.replaceAllUsesWith(NewRsrc);
1564*0fca6ea1SDimitry Andric     return {nullptr, nullptr};
1565*0fca6ea1SDimitry Andric   }
1566*0fca6ea1SDimitry Andric   case Intrinsic::invariant_end: {
1567*0fca6ea1SDimitry Andric     Value *RealPtr = I.getArgOperand(2);
1568*0fca6ea1SDimitry Andric     if (!isSplitFatPtr(RealPtr->getType()))
1569*0fca6ea1SDimitry Andric       return {nullptr, nullptr};
1570*0fca6ea1SDimitry Andric     IRB.SetInsertPoint(&I);
1571*0fca6ea1SDimitry Andric     Value *RealRsrc = getPtrParts(RealPtr).first;
1572*0fca6ea1SDimitry Andric     Value *InvPtr = I.getArgOperand(0);
1573*0fca6ea1SDimitry Andric     Value *Size = I.getArgOperand(1);
1574*0fca6ea1SDimitry Andric     Value *NewRsrc = IRB.CreateIntrinsic(IID, {RealRsrc->getType()},
1575*0fca6ea1SDimitry Andric                                          {InvPtr, Size, RealRsrc});
1576*0fca6ea1SDimitry Andric     copyMetadata(NewRsrc, &I);
1577*0fca6ea1SDimitry Andric     NewRsrc->takeName(&I);
1578*0fca6ea1SDimitry Andric     SplitUsers.insert(&I);
1579*0fca6ea1SDimitry Andric     I.replaceAllUsesWith(NewRsrc);
1580*0fca6ea1SDimitry Andric     return {nullptr, nullptr};
1581*0fca6ea1SDimitry Andric   }
1582*0fca6ea1SDimitry Andric   case Intrinsic::launder_invariant_group:
1583*0fca6ea1SDimitry Andric   case Intrinsic::strip_invariant_group: {
1584*0fca6ea1SDimitry Andric     Value *Ptr = I.getArgOperand(0);
1585*0fca6ea1SDimitry Andric     if (!isSplitFatPtr(Ptr->getType()))
1586*0fca6ea1SDimitry Andric       return {nullptr, nullptr};
1587*0fca6ea1SDimitry Andric     IRB.SetInsertPoint(&I);
1588*0fca6ea1SDimitry Andric     auto [Rsrc, Off] = getPtrParts(Ptr);
1589*0fca6ea1SDimitry Andric     Value *NewRsrc = IRB.CreateIntrinsic(IID, {Rsrc->getType()}, {Rsrc});
1590*0fca6ea1SDimitry Andric     copyMetadata(NewRsrc, &I);
1591*0fca6ea1SDimitry Andric     NewRsrc->takeName(&I);
1592*0fca6ea1SDimitry Andric     SplitUsers.insert(&I);
1593*0fca6ea1SDimitry Andric     return {NewRsrc, Off};
1594*0fca6ea1SDimitry Andric   }
1595*0fca6ea1SDimitry Andric   }
1596*0fca6ea1SDimitry Andric   return {nullptr, nullptr};
1597*0fca6ea1SDimitry Andric }
1598*0fca6ea1SDimitry Andric 
1599*0fca6ea1SDimitry Andric void SplitPtrStructs::processFunction(Function &F) {
1600*0fca6ea1SDimitry Andric   ST = &TM->getSubtarget<GCNSubtarget>(F);
1601*0fca6ea1SDimitry Andric   SmallVector<Instruction *, 0> Originals;
1602*0fca6ea1SDimitry Andric   LLVM_DEBUG(dbgs() << "Splitting pointer structs in function: " << F.getName()
1603*0fca6ea1SDimitry Andric                     << "\n");
1604*0fca6ea1SDimitry Andric   for (Instruction &I : instructions(F))
1605*0fca6ea1SDimitry Andric     Originals.push_back(&I);
1606*0fca6ea1SDimitry Andric   for (Instruction *I : Originals) {
1607*0fca6ea1SDimitry Andric     auto [Rsrc, Off] = visit(I);
1608*0fca6ea1SDimitry Andric     assert(((Rsrc && Off) || (!Rsrc && !Off)) &&
1609*0fca6ea1SDimitry Andric            "Can't have a resource but no offset");
1610*0fca6ea1SDimitry Andric     if (Rsrc)
1611*0fca6ea1SDimitry Andric       RsrcParts[I] = Rsrc;
1612*0fca6ea1SDimitry Andric     if (Off)
1613*0fca6ea1SDimitry Andric       OffParts[I] = Off;
1614*0fca6ea1SDimitry Andric   }
1615*0fca6ea1SDimitry Andric   processConditionals();
1616*0fca6ea1SDimitry Andric   killAndReplaceSplitInstructions(Originals);
1617*0fca6ea1SDimitry Andric 
1618*0fca6ea1SDimitry Andric   // Clean up after ourselves to save on memory.
1619*0fca6ea1SDimitry Andric   RsrcParts.clear();
1620*0fca6ea1SDimitry Andric   OffParts.clear();
1621*0fca6ea1SDimitry Andric   SplitUsers.clear();
1622*0fca6ea1SDimitry Andric   Conditionals.clear();
1623*0fca6ea1SDimitry Andric   ConditionalTemps.clear();
1624*0fca6ea1SDimitry Andric }
1625*0fca6ea1SDimitry Andric 
1626*0fca6ea1SDimitry Andric namespace {
1627*0fca6ea1SDimitry Andric class AMDGPULowerBufferFatPointers : public ModulePass {
1628*0fca6ea1SDimitry Andric public:
1629*0fca6ea1SDimitry Andric   static char ID;
1630*0fca6ea1SDimitry Andric 
1631*0fca6ea1SDimitry Andric   AMDGPULowerBufferFatPointers() : ModulePass(ID) {
1632*0fca6ea1SDimitry Andric     initializeAMDGPULowerBufferFatPointersPass(
1633*0fca6ea1SDimitry Andric         *PassRegistry::getPassRegistry());
1634*0fca6ea1SDimitry Andric   }
1635*0fca6ea1SDimitry Andric 
1636*0fca6ea1SDimitry Andric   bool run(Module &M, const TargetMachine &TM);
1637*0fca6ea1SDimitry Andric   bool runOnModule(Module &M) override;
1638*0fca6ea1SDimitry Andric 
1639*0fca6ea1SDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override;
1640*0fca6ea1SDimitry Andric };
1641*0fca6ea1SDimitry Andric } // namespace
1642*0fca6ea1SDimitry Andric 
1643*0fca6ea1SDimitry Andric /// Returns true if there are values that have a buffer fat pointer in them,
1644*0fca6ea1SDimitry Andric /// which means we'll need to perform rewrites on this function. As a side
1645*0fca6ea1SDimitry Andric /// effect, this will populate the type remapping cache.
1646*0fca6ea1SDimitry Andric static bool containsBufferFatPointers(const Function &F,
1647*0fca6ea1SDimitry Andric                                       BufferFatPtrToStructTypeMap *TypeMap) {
1648*0fca6ea1SDimitry Andric   bool HasFatPointers = false;
1649*0fca6ea1SDimitry Andric   for (const BasicBlock &BB : F)
1650*0fca6ea1SDimitry Andric     for (const Instruction &I : BB)
1651*0fca6ea1SDimitry Andric       HasFatPointers |= (I.getType() != TypeMap->remapType(I.getType()));
1652*0fca6ea1SDimitry Andric   return HasFatPointers;
1653*0fca6ea1SDimitry Andric }
1654*0fca6ea1SDimitry Andric 
1655*0fca6ea1SDimitry Andric static bool hasFatPointerInterface(const Function &F,
1656*0fca6ea1SDimitry Andric                                    BufferFatPtrToStructTypeMap *TypeMap) {
1657*0fca6ea1SDimitry Andric   Type *Ty = F.getFunctionType();
1658*0fca6ea1SDimitry Andric   return Ty != TypeMap->remapType(Ty);
1659*0fca6ea1SDimitry Andric }
1660*0fca6ea1SDimitry Andric 
1661*0fca6ea1SDimitry Andric /// Move the body of `OldF` into a new function, returning it.
1662*0fca6ea1SDimitry Andric static Function *moveFunctionAdaptingType(Function *OldF, FunctionType *NewTy,
1663*0fca6ea1SDimitry Andric                                           ValueToValueMapTy &CloneMap) {
1664*0fca6ea1SDimitry Andric   bool IsIntrinsic = OldF->isIntrinsic();
1665*0fca6ea1SDimitry Andric   Function *NewF =
1666*0fca6ea1SDimitry Andric       Function::Create(NewTy, OldF->getLinkage(), OldF->getAddressSpace());
1667*0fca6ea1SDimitry Andric   NewF->IsNewDbgInfoFormat = OldF->IsNewDbgInfoFormat;
1668*0fca6ea1SDimitry Andric   NewF->copyAttributesFrom(OldF);
1669*0fca6ea1SDimitry Andric   NewF->copyMetadata(OldF, 0);
1670*0fca6ea1SDimitry Andric   NewF->takeName(OldF);
1671*0fca6ea1SDimitry Andric   NewF->updateAfterNameChange();
1672*0fca6ea1SDimitry Andric   NewF->setDLLStorageClass(OldF->getDLLStorageClass());
1673*0fca6ea1SDimitry Andric   OldF->getParent()->getFunctionList().insertAfter(OldF->getIterator(), NewF);
1674*0fca6ea1SDimitry Andric 
1675*0fca6ea1SDimitry Andric   while (!OldF->empty()) {
1676*0fca6ea1SDimitry Andric     BasicBlock *BB = &OldF->front();
1677*0fca6ea1SDimitry Andric     BB->removeFromParent();
1678*0fca6ea1SDimitry Andric     BB->insertInto(NewF);
1679*0fca6ea1SDimitry Andric     CloneMap[BB] = BB;
1680*0fca6ea1SDimitry Andric     for (Instruction &I : *BB) {
1681*0fca6ea1SDimitry Andric       CloneMap[&I] = &I;
1682*0fca6ea1SDimitry Andric     }
1683*0fca6ea1SDimitry Andric   }
1684*0fca6ea1SDimitry Andric 
1685*0fca6ea1SDimitry Andric   AttributeMask PtrOnlyAttrs;
1686*0fca6ea1SDimitry Andric   for (auto K :
1687*0fca6ea1SDimitry Andric        {Attribute::Dereferenceable, Attribute::DereferenceableOrNull,
1688*0fca6ea1SDimitry Andric         Attribute::NoAlias, Attribute::NoCapture, Attribute::NoFree,
1689*0fca6ea1SDimitry Andric         Attribute::NonNull, Attribute::NullPointerIsValid, Attribute::ReadNone,
1690*0fca6ea1SDimitry Andric         Attribute::ReadOnly, Attribute::WriteOnly}) {
1691*0fca6ea1SDimitry Andric     PtrOnlyAttrs.addAttribute(K);
1692*0fca6ea1SDimitry Andric   }
1693*0fca6ea1SDimitry Andric   SmallVector<AttributeSet> ArgAttrs;
1694*0fca6ea1SDimitry Andric   AttributeList OldAttrs = OldF->getAttributes();
1695*0fca6ea1SDimitry Andric 
1696*0fca6ea1SDimitry Andric   for (auto [I, OldArg, NewArg] : enumerate(OldF->args(), NewF->args())) {
1697*0fca6ea1SDimitry Andric     CloneMap[&NewArg] = &OldArg;
1698*0fca6ea1SDimitry Andric     NewArg.takeName(&OldArg);
1699*0fca6ea1SDimitry Andric     Type *OldArgTy = OldArg.getType(), *NewArgTy = NewArg.getType();
1700*0fca6ea1SDimitry Andric     // Temporarily mutate type of `NewArg` to allow RAUW to work.
1701*0fca6ea1SDimitry Andric     NewArg.mutateType(OldArgTy);
1702*0fca6ea1SDimitry Andric     OldArg.replaceAllUsesWith(&NewArg);
1703*0fca6ea1SDimitry Andric     NewArg.mutateType(NewArgTy);
1704*0fca6ea1SDimitry Andric 
1705*0fca6ea1SDimitry Andric     AttributeSet ArgAttr = OldAttrs.getParamAttrs(I);
1706*0fca6ea1SDimitry Andric     // Intrinsics get their attributes fixed later.
1707*0fca6ea1SDimitry Andric     if (OldArgTy != NewArgTy && !IsIntrinsic)
1708*0fca6ea1SDimitry Andric       ArgAttr = ArgAttr.removeAttributes(NewF->getContext(), PtrOnlyAttrs);
1709*0fca6ea1SDimitry Andric     ArgAttrs.push_back(ArgAttr);
1710*0fca6ea1SDimitry Andric   }
1711*0fca6ea1SDimitry Andric   AttributeSet RetAttrs = OldAttrs.getRetAttrs();
1712*0fca6ea1SDimitry Andric   if (OldF->getReturnType() != NewF->getReturnType() && !IsIntrinsic)
1713*0fca6ea1SDimitry Andric     RetAttrs = RetAttrs.removeAttributes(NewF->getContext(), PtrOnlyAttrs);
1714*0fca6ea1SDimitry Andric   NewF->setAttributes(AttributeList::get(
1715*0fca6ea1SDimitry Andric       NewF->getContext(), OldAttrs.getFnAttrs(), RetAttrs, ArgAttrs));
1716*0fca6ea1SDimitry Andric   return NewF;
1717*0fca6ea1SDimitry Andric }
1718*0fca6ea1SDimitry Andric 
1719*0fca6ea1SDimitry Andric static void makeCloneInPraceMap(Function *F, ValueToValueMapTy &CloneMap) {
1720*0fca6ea1SDimitry Andric   for (Argument &A : F->args())
1721*0fca6ea1SDimitry Andric     CloneMap[&A] = &A;
1722*0fca6ea1SDimitry Andric   for (BasicBlock &BB : *F) {
1723*0fca6ea1SDimitry Andric     CloneMap[&BB] = &BB;
1724*0fca6ea1SDimitry Andric     for (Instruction &I : BB)
1725*0fca6ea1SDimitry Andric       CloneMap[&I] = &I;
1726*0fca6ea1SDimitry Andric   }
1727*0fca6ea1SDimitry Andric }
1728*0fca6ea1SDimitry Andric 
1729*0fca6ea1SDimitry Andric bool AMDGPULowerBufferFatPointers::run(Module &M, const TargetMachine &TM) {
1730*0fca6ea1SDimitry Andric   bool Changed = false;
1731*0fca6ea1SDimitry Andric   const DataLayout &DL = M.getDataLayout();
1732*0fca6ea1SDimitry Andric   // Record the functions which need to be remapped.
1733*0fca6ea1SDimitry Andric   // The second element of the pair indicates whether the function has to have
1734*0fca6ea1SDimitry Andric   // its arguments or return types adjusted.
1735*0fca6ea1SDimitry Andric   SmallVector<std::pair<Function *, bool>> NeedsRemap;
1736*0fca6ea1SDimitry Andric 
1737*0fca6ea1SDimitry Andric   BufferFatPtrToStructTypeMap StructTM(DL);
1738*0fca6ea1SDimitry Andric   BufferFatPtrToIntTypeMap IntTM(DL);
1739*0fca6ea1SDimitry Andric   for (const GlobalVariable &GV : M.globals()) {
1740*0fca6ea1SDimitry Andric     if (GV.getAddressSpace() == AMDGPUAS::BUFFER_FAT_POINTER)
1741*0fca6ea1SDimitry Andric       report_fatal_error("Global variables with a buffer fat pointer address "
1742*0fca6ea1SDimitry Andric                          "space (7) are not supported");
1743*0fca6ea1SDimitry Andric     Type *VT = GV.getValueType();
1744*0fca6ea1SDimitry Andric     if (VT != StructTM.remapType(VT))
1745*0fca6ea1SDimitry Andric       report_fatal_error("Global variables that contain buffer fat pointers "
1746*0fca6ea1SDimitry Andric                          "(address space 7 pointers) are unsupported. Use "
1747*0fca6ea1SDimitry Andric                          "buffer resource pointers (address space 8) instead.");
1748*0fca6ea1SDimitry Andric   }
1749*0fca6ea1SDimitry Andric 
1750*0fca6ea1SDimitry Andric   {
1751*0fca6ea1SDimitry Andric     // Collect all constant exprs and aggregates referenced by any function.
1752*0fca6ea1SDimitry Andric     SmallVector<Constant *, 8> Worklist;
1753*0fca6ea1SDimitry Andric     for (Function &F : M.functions())
1754*0fca6ea1SDimitry Andric       for (Instruction &I : instructions(F))
1755*0fca6ea1SDimitry Andric         for (Value *Op : I.operands())
1756*0fca6ea1SDimitry Andric           if (isa<ConstantExpr>(Op) || isa<ConstantAggregate>(Op))
1757*0fca6ea1SDimitry Andric             Worklist.push_back(cast<Constant>(Op));
1758*0fca6ea1SDimitry Andric 
1759*0fca6ea1SDimitry Andric     // Recursively look for any referenced buffer pointer constants.
1760*0fca6ea1SDimitry Andric     SmallPtrSet<Constant *, 8> Visited;
1761*0fca6ea1SDimitry Andric     SetVector<Constant *> BufferFatPtrConsts;
1762*0fca6ea1SDimitry Andric     while (!Worklist.empty()) {
1763*0fca6ea1SDimitry Andric       Constant *C = Worklist.pop_back_val();
1764*0fca6ea1SDimitry Andric       if (!Visited.insert(C).second)
1765*0fca6ea1SDimitry Andric         continue;
1766*0fca6ea1SDimitry Andric       if (isBufferFatPtrOrVector(C->getType()))
1767*0fca6ea1SDimitry Andric         BufferFatPtrConsts.insert(C);
1768*0fca6ea1SDimitry Andric       for (Value *Op : C->operands())
1769*0fca6ea1SDimitry Andric         if (isa<ConstantExpr>(Op) || isa<ConstantAggregate>(Op))
1770*0fca6ea1SDimitry Andric           Worklist.push_back(cast<Constant>(Op));
1771*0fca6ea1SDimitry Andric     }
1772*0fca6ea1SDimitry Andric 
1773*0fca6ea1SDimitry Andric     // Expand all constant expressions using fat buffer pointers to
1774*0fca6ea1SDimitry Andric     // instructions.
1775*0fca6ea1SDimitry Andric     Changed |= convertUsersOfConstantsToInstructions(
1776*0fca6ea1SDimitry Andric         BufferFatPtrConsts.getArrayRef(), /*RestrictToFunc=*/nullptr,
1777*0fca6ea1SDimitry Andric         /*RemoveDeadConstants=*/false, /*IncludeSelf=*/true);
1778*0fca6ea1SDimitry Andric   }
1779*0fca6ea1SDimitry Andric 
1780*0fca6ea1SDimitry Andric   StoreFatPtrsAsIntsVisitor MemOpsRewrite(&IntTM, M.getContext());
1781*0fca6ea1SDimitry Andric   for (Function &F : M.functions()) {
1782*0fca6ea1SDimitry Andric     bool InterfaceChange = hasFatPointerInterface(F, &StructTM);
1783*0fca6ea1SDimitry Andric     bool BodyChanges = containsBufferFatPointers(F, &StructTM);
1784*0fca6ea1SDimitry Andric     Changed |= MemOpsRewrite.processFunction(F);
1785*0fca6ea1SDimitry Andric     if (InterfaceChange || BodyChanges)
1786*0fca6ea1SDimitry Andric       NeedsRemap.push_back(std::make_pair(&F, InterfaceChange));
1787*0fca6ea1SDimitry Andric   }
1788*0fca6ea1SDimitry Andric   if (NeedsRemap.empty())
1789*0fca6ea1SDimitry Andric     return Changed;
1790*0fca6ea1SDimitry Andric 
1791*0fca6ea1SDimitry Andric   SmallVector<Function *> NeedsPostProcess;
1792*0fca6ea1SDimitry Andric   SmallVector<Function *> Intrinsics;
1793*0fca6ea1SDimitry Andric   // Keep one big map so as to memoize constants across functions.
1794*0fca6ea1SDimitry Andric   ValueToValueMapTy CloneMap;
1795*0fca6ea1SDimitry Andric   FatPtrConstMaterializer Materializer(&StructTM, CloneMap);
1796*0fca6ea1SDimitry Andric 
1797*0fca6ea1SDimitry Andric   ValueMapper LowerInFuncs(CloneMap, RF_None, &StructTM, &Materializer);
1798*0fca6ea1SDimitry Andric   for (auto [F, InterfaceChange] : NeedsRemap) {
1799*0fca6ea1SDimitry Andric     Function *NewF = F;
1800*0fca6ea1SDimitry Andric     if (InterfaceChange)
1801*0fca6ea1SDimitry Andric       NewF = moveFunctionAdaptingType(
1802*0fca6ea1SDimitry Andric           F, cast<FunctionType>(StructTM.remapType(F->getFunctionType())),
1803*0fca6ea1SDimitry Andric           CloneMap);
1804*0fca6ea1SDimitry Andric     else
1805*0fca6ea1SDimitry Andric       makeCloneInPraceMap(F, CloneMap);
1806*0fca6ea1SDimitry Andric     LowerInFuncs.remapFunction(*NewF);
1807*0fca6ea1SDimitry Andric     if (NewF->isIntrinsic())
1808*0fca6ea1SDimitry Andric       Intrinsics.push_back(NewF);
1809*0fca6ea1SDimitry Andric     else
1810*0fca6ea1SDimitry Andric       NeedsPostProcess.push_back(NewF);
1811*0fca6ea1SDimitry Andric     if (InterfaceChange) {
1812*0fca6ea1SDimitry Andric       F->replaceAllUsesWith(NewF);
1813*0fca6ea1SDimitry Andric       F->eraseFromParent();
1814*0fca6ea1SDimitry Andric     }
1815*0fca6ea1SDimitry Andric     Changed = true;
1816*0fca6ea1SDimitry Andric   }
1817*0fca6ea1SDimitry Andric   StructTM.clear();
1818*0fca6ea1SDimitry Andric   IntTM.clear();
1819*0fca6ea1SDimitry Andric   CloneMap.clear();
1820*0fca6ea1SDimitry Andric 
1821*0fca6ea1SDimitry Andric   SplitPtrStructs Splitter(M.getContext(), &TM);
1822*0fca6ea1SDimitry Andric   for (Function *F : NeedsPostProcess)
1823*0fca6ea1SDimitry Andric     Splitter.processFunction(*F);
1824*0fca6ea1SDimitry Andric   for (Function *F : Intrinsics) {
1825*0fca6ea1SDimitry Andric     if (isRemovablePointerIntrinsic(F->getIntrinsicID())) {
1826*0fca6ea1SDimitry Andric       F->eraseFromParent();
1827*0fca6ea1SDimitry Andric     } else {
1828*0fca6ea1SDimitry Andric       std::optional<Function *> NewF = Intrinsic::remangleIntrinsicFunction(F);
1829*0fca6ea1SDimitry Andric       if (NewF)
1830*0fca6ea1SDimitry Andric         F->replaceAllUsesWith(*NewF);
1831*0fca6ea1SDimitry Andric     }
1832*0fca6ea1SDimitry Andric   }
1833*0fca6ea1SDimitry Andric   return Changed;
1834*0fca6ea1SDimitry Andric }
1835*0fca6ea1SDimitry Andric 
1836*0fca6ea1SDimitry Andric bool AMDGPULowerBufferFatPointers::runOnModule(Module &M) {
1837*0fca6ea1SDimitry Andric   TargetPassConfig &TPC = getAnalysis<TargetPassConfig>();
1838*0fca6ea1SDimitry Andric   const TargetMachine &TM = TPC.getTM<TargetMachine>();
1839*0fca6ea1SDimitry Andric   return run(M, TM);
1840*0fca6ea1SDimitry Andric }
1841*0fca6ea1SDimitry Andric 
1842*0fca6ea1SDimitry Andric char AMDGPULowerBufferFatPointers::ID = 0;
1843*0fca6ea1SDimitry Andric 
1844*0fca6ea1SDimitry Andric char &llvm::AMDGPULowerBufferFatPointersID = AMDGPULowerBufferFatPointers::ID;
1845*0fca6ea1SDimitry Andric 
1846*0fca6ea1SDimitry Andric void AMDGPULowerBufferFatPointers::getAnalysisUsage(AnalysisUsage &AU) const {
1847*0fca6ea1SDimitry Andric   AU.addRequired<TargetPassConfig>();
1848*0fca6ea1SDimitry Andric }
1849*0fca6ea1SDimitry Andric 
1850*0fca6ea1SDimitry Andric #define PASS_DESC "Lower buffer fat pointer operations to buffer resources"
1851*0fca6ea1SDimitry Andric INITIALIZE_PASS_BEGIN(AMDGPULowerBufferFatPointers, DEBUG_TYPE, PASS_DESC,
1852*0fca6ea1SDimitry Andric                       false, false)
1853*0fca6ea1SDimitry Andric INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
1854*0fca6ea1SDimitry Andric INITIALIZE_PASS_END(AMDGPULowerBufferFatPointers, DEBUG_TYPE, PASS_DESC, false,
1855*0fca6ea1SDimitry Andric                     false)
1856*0fca6ea1SDimitry Andric #undef PASS_DESC
1857*0fca6ea1SDimitry Andric 
1858*0fca6ea1SDimitry Andric ModulePass *llvm::createAMDGPULowerBufferFatPointersPass() {
1859*0fca6ea1SDimitry Andric   return new AMDGPULowerBufferFatPointers();
1860*0fca6ea1SDimitry Andric }
1861*0fca6ea1SDimitry Andric 
1862*0fca6ea1SDimitry Andric PreservedAnalyses
1863*0fca6ea1SDimitry Andric AMDGPULowerBufferFatPointersPass::run(Module &M, ModuleAnalysisManager &MA) {
1864*0fca6ea1SDimitry Andric   return AMDGPULowerBufferFatPointers().run(M, TM) ? PreservedAnalyses::none()
1865*0fca6ea1SDimitry Andric                                                    : PreservedAnalyses::all();
1866*0fca6ea1SDimitry Andric }
1867