1*0fca6ea1SDimitry Andric //===-- AMDGPULowerBufferFatPointers.cpp ---------------------------=// 2*0fca6ea1SDimitry Andric // 3*0fca6ea1SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*0fca6ea1SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5*0fca6ea1SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*0fca6ea1SDimitry Andric // 7*0fca6ea1SDimitry Andric //===----------------------------------------------------------------------===// 8*0fca6ea1SDimitry Andric // 9*0fca6ea1SDimitry Andric // This pass lowers operations on buffer fat pointers (addrspace 7) to 10*0fca6ea1SDimitry Andric // operations on buffer resources (addrspace 8) and is needed for correct 11*0fca6ea1SDimitry Andric // codegen. 12*0fca6ea1SDimitry Andric // 13*0fca6ea1SDimitry Andric // # Background 14*0fca6ea1SDimitry Andric // 15*0fca6ea1SDimitry Andric // Address space 7 (the buffer fat pointer) is a 160-bit pointer that consists 16*0fca6ea1SDimitry Andric // of a 128-bit buffer descriptor and a 32-bit offset into that descriptor. 17*0fca6ea1SDimitry Andric // The buffer resource part needs to be it needs to be a "raw" buffer resource 18*0fca6ea1SDimitry Andric // (it must have a stride of 0 and bounds checks must be in raw buffer mode 19*0fca6ea1SDimitry Andric // or disabled). 20*0fca6ea1SDimitry Andric // 21*0fca6ea1SDimitry Andric // When these requirements are met, a buffer resource can be treated as a 22*0fca6ea1SDimitry Andric // typical (though quite wide) pointer that follows typical LLVM pointer 23*0fca6ea1SDimitry Andric // semantics. This allows the frontend to reason about such buffers (which are 24*0fca6ea1SDimitry Andric // often encountered in the context of SPIR-V kernels). 25*0fca6ea1SDimitry Andric // 26*0fca6ea1SDimitry Andric // However, because of their non-power-of-2 size, these fat pointers cannot be 27*0fca6ea1SDimitry Andric // present during translation to MIR (though this restriction may be lifted 28*0fca6ea1SDimitry Andric // during the transition to GlobalISel). Therefore, this pass is needed in order 29*0fca6ea1SDimitry Andric // to correctly implement these fat pointers. 30*0fca6ea1SDimitry Andric // 31*0fca6ea1SDimitry Andric // The resource intrinsics take the resource part (the address space 8 pointer) 32*0fca6ea1SDimitry Andric // and the offset part (the 32-bit integer) as separate arguments. In addition, 33*0fca6ea1SDimitry Andric // many users of these buffers manipulate the offset while leaving the resource 34*0fca6ea1SDimitry Andric // part alone. For these reasons, we want to typically separate the resource 35*0fca6ea1SDimitry Andric // and offset parts into separate variables, but combine them together when 36*0fca6ea1SDimitry Andric // encountering cases where this is required, such as by inserting these values 37*0fca6ea1SDimitry Andric // into aggretates or moving them to memory. 38*0fca6ea1SDimitry Andric // 39*0fca6ea1SDimitry Andric // Therefore, at a high level, `ptr addrspace(7) %x` becomes `ptr addrspace(8) 40*0fca6ea1SDimitry Andric // %x.rsrc` and `i32 %x.off`, which will be combined into `{ptr addrspace(8), 41*0fca6ea1SDimitry Andric // i32} %x = {%x.rsrc, %x.off}` if needed. Similarly, `vector<Nxp7>` becomes 42*0fca6ea1SDimitry Andric // `{vector<Nxp8>, vector<Nxi32 >}` and its component parts. 43*0fca6ea1SDimitry Andric // 44*0fca6ea1SDimitry Andric // # Implementation 45*0fca6ea1SDimitry Andric // 46*0fca6ea1SDimitry Andric // This pass proceeds in three main phases: 47*0fca6ea1SDimitry Andric // 48*0fca6ea1SDimitry Andric // ## Rewriting loads and stores of p7 49*0fca6ea1SDimitry Andric // 50*0fca6ea1SDimitry Andric // The first phase is to rewrite away all loads and stors of `ptr addrspace(7)`, 51*0fca6ea1SDimitry Andric // including aggregates containing such pointers, to ones that use `i160`. This 52*0fca6ea1SDimitry Andric // is handled by `StoreFatPtrsAsIntsVisitor` , which visits loads, stores, and 53*0fca6ea1SDimitry Andric // allocas and, if the loaded or stored type contains `ptr addrspace(7)`, 54*0fca6ea1SDimitry Andric // rewrites that type to one where the p7s are replaced by i160s, copying other 55*0fca6ea1SDimitry Andric // parts of aggregates as needed. In the case of a store, each pointer is 56*0fca6ea1SDimitry Andric // `ptrtoint`d to i160 before storing, and load integers are `inttoptr`d back. 57*0fca6ea1SDimitry Andric // This same transformation is applied to vectors of pointers. 58*0fca6ea1SDimitry Andric // 59*0fca6ea1SDimitry Andric // Such a transformation allows the later phases of the pass to not need 60*0fca6ea1SDimitry Andric // to handle buffer fat pointers moving to and from memory, where we load 61*0fca6ea1SDimitry Andric // have to handle the incompatibility between a `{Nxp8, Nxi32}` representation 62*0fca6ea1SDimitry Andric // and `Nxi60` directly. Instead, that transposing action (where the vectors 63*0fca6ea1SDimitry Andric // of resources and vectors of offsets are concatentated before being stored to 64*0fca6ea1SDimitry Andric // memory) are handled through implementing `inttoptr` and `ptrtoint` only. 65*0fca6ea1SDimitry Andric // 66*0fca6ea1SDimitry Andric // Atomics operations on `ptr addrspace(7)` values are not suppported, as the 67*0fca6ea1SDimitry Andric // hardware does not include a 160-bit atomic. 68*0fca6ea1SDimitry Andric // 69*0fca6ea1SDimitry Andric // ## Type remapping 70*0fca6ea1SDimitry Andric // 71*0fca6ea1SDimitry Andric // We use a `ValueMapper` to mangle uses of [vectors of] buffer fat pointers 72*0fca6ea1SDimitry Andric // to the corresponding struct type, which has a resource part and an offset 73*0fca6ea1SDimitry Andric // part. 74*0fca6ea1SDimitry Andric // 75*0fca6ea1SDimitry Andric // This uses a `BufferFatPtrToStructTypeMap` and a `FatPtrConstMaterializer` 76*0fca6ea1SDimitry Andric // to, usually by way of `setType`ing values. Constants are handled here 77*0fca6ea1SDimitry Andric // because there isn't a good way to fix them up later. 78*0fca6ea1SDimitry Andric // 79*0fca6ea1SDimitry Andric // This has the downside of leaving the IR in an invalid state (for example, 80*0fca6ea1SDimitry Andric // the instruction `getelementptr {ptr addrspace(8), i32} %p, ...` will exist), 81*0fca6ea1SDimitry Andric // but all such invalid states will be resolved by the third phase. 82*0fca6ea1SDimitry Andric // 83*0fca6ea1SDimitry Andric // Functions that don't take buffer fat pointers are modified in place. Those 84*0fca6ea1SDimitry Andric // that do take such pointers have their basic blocks moved to a new function 85*0fca6ea1SDimitry Andric // with arguments that are {ptr addrspace(8), i32} arguments and return values. 86*0fca6ea1SDimitry Andric // This phase also records intrinsics so that they can be remangled or deleted 87*0fca6ea1SDimitry Andric // later. 88*0fca6ea1SDimitry Andric // 89*0fca6ea1SDimitry Andric // 90*0fca6ea1SDimitry Andric // ## Splitting pointer structs 91*0fca6ea1SDimitry Andric // 92*0fca6ea1SDimitry Andric // The meat of this pass consists of defining semantics for operations that 93*0fca6ea1SDimitry Andric // produce or consume [vectors of] buffer fat pointers in terms of their 94*0fca6ea1SDimitry Andric // resource and offset parts. This is accomplished throgh the `SplitPtrStructs` 95*0fca6ea1SDimitry Andric // visitor. 96*0fca6ea1SDimitry Andric // 97*0fca6ea1SDimitry Andric // In the first pass through each function that is being lowered, the splitter 98*0fca6ea1SDimitry Andric // inserts new instructions to implement the split-structures behavior, which is 99*0fca6ea1SDimitry Andric // needed for correctness and performance. It records a list of "split users", 100*0fca6ea1SDimitry Andric // instructions that are being replaced by operations on the resource and offset 101*0fca6ea1SDimitry Andric // parts. 102*0fca6ea1SDimitry Andric // 103*0fca6ea1SDimitry Andric // Split users do not necessarily need to produce parts themselves ( 104*0fca6ea1SDimitry Andric // a `load float, ptr addrspace(7)` does not, for example), but, if they do not 105*0fca6ea1SDimitry Andric // generate fat buffer pointers, they must RAUW in their replacement 106*0fca6ea1SDimitry Andric // instructions during the initial visit. 107*0fca6ea1SDimitry Andric // 108*0fca6ea1SDimitry Andric // When these new instructions are created, they use the split parts recorded 109*0fca6ea1SDimitry Andric // for their initial arguments in order to generate their replacements, creating 110*0fca6ea1SDimitry Andric // a parallel set of instructions that does not refer to the original fat 111*0fca6ea1SDimitry Andric // pointer values but instead to their resource and offset components. 112*0fca6ea1SDimitry Andric // 113*0fca6ea1SDimitry Andric // Instructions, such as `extractvalue`, that produce buffer fat pointers from 114*0fca6ea1SDimitry Andric // sources that do not have split parts, have such parts generated using 115*0fca6ea1SDimitry Andric // `extractvalue`. This is also the initial handling of PHI nodes, which 116*0fca6ea1SDimitry Andric // are then cleaned up. 117*0fca6ea1SDimitry Andric // 118*0fca6ea1SDimitry Andric // ### Conditionals 119*0fca6ea1SDimitry Andric // 120*0fca6ea1SDimitry Andric // PHI nodes are initially given resource parts via `extractvalue`. However, 121*0fca6ea1SDimitry Andric // this is not an efficient rewrite of such nodes, as, in most cases, the 122*0fca6ea1SDimitry Andric // resource part in a conditional or loop remains constant throughout the loop 123*0fca6ea1SDimitry Andric // and only the offset varies. Failing to optimize away these constant resources 124*0fca6ea1SDimitry Andric // would cause additional registers to be sent around loops and might lead to 125*0fca6ea1SDimitry Andric // waterfall loops being generated for buffer operations due to the 126*0fca6ea1SDimitry Andric // "non-uniform" resource argument. 127*0fca6ea1SDimitry Andric // 128*0fca6ea1SDimitry Andric // Therefore, after all instructions have been visited, the pointer splitter 129*0fca6ea1SDimitry Andric // post-processes all encountered conditionals. Given a PHI node or select, 130*0fca6ea1SDimitry Andric // getPossibleRsrcRoots() collects all values that the resource parts of that 131*0fca6ea1SDimitry Andric // conditional's input could come from as well as collecting all conditional 132*0fca6ea1SDimitry Andric // instructions encountered during the search. If, after filtering out the 133*0fca6ea1SDimitry Andric // initial node itself, the set of encountered conditionals is a subset of the 134*0fca6ea1SDimitry Andric // potential roots and there is a single potential resource that isn't in the 135*0fca6ea1SDimitry Andric // conditional set, that value is the only possible value the resource argument 136*0fca6ea1SDimitry Andric // could have throughout the control flow. 137*0fca6ea1SDimitry Andric // 138*0fca6ea1SDimitry Andric // If that condition is met, then a PHI node can have its resource part changed 139*0fca6ea1SDimitry Andric // to the singleton value and then be replaced by a PHI on the offsets. 140*0fca6ea1SDimitry Andric // Otherwise, each PHI node is split into two, one for the resource part and one 141*0fca6ea1SDimitry Andric // for the offset part, which replace the temporary `extractvalue` instructions 142*0fca6ea1SDimitry Andric // that were added during the first pass. 143*0fca6ea1SDimitry Andric // 144*0fca6ea1SDimitry Andric // Similar logic applies to `select`, where 145*0fca6ea1SDimitry Andric // `%z = select i1 %cond, %cond, ptr addrspace(7) %x, ptr addrspace(7) %y` 146*0fca6ea1SDimitry Andric // can be split into `%z.rsrc = %x.rsrc` and 147*0fca6ea1SDimitry Andric // `%z.off = select i1 %cond, ptr i32 %x.off, i32 %y.off` 148*0fca6ea1SDimitry Andric // if both `%x` and `%y` have the same resource part, but two `select` 149*0fca6ea1SDimitry Andric // operations will be needed if they do not. 150*0fca6ea1SDimitry Andric // 151*0fca6ea1SDimitry Andric // ### Final processing 152*0fca6ea1SDimitry Andric // 153*0fca6ea1SDimitry Andric // After conditionals have been cleaned up, the IR for each function is 154*0fca6ea1SDimitry Andric // rewritten to remove all the old instructions that have been split up. 155*0fca6ea1SDimitry Andric // 156*0fca6ea1SDimitry Andric // Any instruction that used to produce a buffer fat pointer (and therefore now 157*0fca6ea1SDimitry Andric // produces a resource-and-offset struct after type remapping) is 158*0fca6ea1SDimitry Andric // replaced as follows: 159*0fca6ea1SDimitry Andric // 1. All debug value annotations are cloned to reflect that the resource part 160*0fca6ea1SDimitry Andric // and offset parts are computed separately and constitute different 161*0fca6ea1SDimitry Andric // fragments of the underlying source language variable. 162*0fca6ea1SDimitry Andric // 2. All uses that were themselves split are replaced by a `poison` of the 163*0fca6ea1SDimitry Andric // struct type, as they will themselves be erased soon. This rule, combined 164*0fca6ea1SDimitry Andric // with debug handling, should leave the use lists of split instructions 165*0fca6ea1SDimitry Andric // empty in almost all cases. 166*0fca6ea1SDimitry Andric // 3. If a user of the original struct-valued result remains, the structure 167*0fca6ea1SDimitry Andric // needed for the new types to work is constructed out of the newly-defined 168*0fca6ea1SDimitry Andric // parts, and the original instruction is replaced by this structure 169*0fca6ea1SDimitry Andric // before being erased. Instructions requiring this construction include 170*0fca6ea1SDimitry Andric // `ret` and `insertvalue`. 171*0fca6ea1SDimitry Andric // 172*0fca6ea1SDimitry Andric // # Consequences 173*0fca6ea1SDimitry Andric // 174*0fca6ea1SDimitry Andric // This pass does not alter the CFG. 175*0fca6ea1SDimitry Andric // 176*0fca6ea1SDimitry Andric // Alias analysis information will become coarser, as the LLVM alias analyzer 177*0fca6ea1SDimitry Andric // cannot handle the buffer intrinsics. Specifically, while we can determine 178*0fca6ea1SDimitry Andric // that the following two loads do not alias: 179*0fca6ea1SDimitry Andric // ``` 180*0fca6ea1SDimitry Andric // %y = getelementptr i32, ptr addrspace(7) %x, i32 1 181*0fca6ea1SDimitry Andric // %a = load i32, ptr addrspace(7) %x 182*0fca6ea1SDimitry Andric // %b = load i32, ptr addrspace(7) %y 183*0fca6ea1SDimitry Andric // ``` 184*0fca6ea1SDimitry Andric // we cannot (except through some code that runs during scheduling) determine 185*0fca6ea1SDimitry Andric // that the rewritten loads below do not alias. 186*0fca6ea1SDimitry Andric // ``` 187*0fca6ea1SDimitry Andric // %y.off = add i32 %x.off, 1 188*0fca6ea1SDimitry Andric // %a = call @llvm.amdgcn.raw.ptr.buffer.load(ptr addrspace(8) %x.rsrc, i32 189*0fca6ea1SDimitry Andric // %x.off, ...) 190*0fca6ea1SDimitry Andric // %b = call @llvm.amdgcn.raw.ptr.buffer.load(ptr addrspace(8) 191*0fca6ea1SDimitry Andric // %x.rsrc, i32 %y.off, ...) 192*0fca6ea1SDimitry Andric // ``` 193*0fca6ea1SDimitry Andric // However, existing alias information is preserved. 194*0fca6ea1SDimitry Andric //===----------------------------------------------------------------------===// 195*0fca6ea1SDimitry Andric 196*0fca6ea1SDimitry Andric #include "AMDGPU.h" 197*0fca6ea1SDimitry Andric #include "AMDGPUTargetMachine.h" 198*0fca6ea1SDimitry Andric #include "GCNSubtarget.h" 199*0fca6ea1SDimitry Andric #include "SIDefines.h" 200*0fca6ea1SDimitry Andric #include "llvm/ADT/SetOperations.h" 201*0fca6ea1SDimitry Andric #include "llvm/ADT/SmallVector.h" 202*0fca6ea1SDimitry Andric #include "llvm/Analysis/ConstantFolding.h" 203*0fca6ea1SDimitry Andric #include "llvm/Analysis/Utils/Local.h" 204*0fca6ea1SDimitry Andric #include "llvm/CodeGen/TargetPassConfig.h" 205*0fca6ea1SDimitry Andric #include "llvm/IR/AttributeMask.h" 206*0fca6ea1SDimitry Andric #include "llvm/IR/Constants.h" 207*0fca6ea1SDimitry Andric #include "llvm/IR/DebugInfo.h" 208*0fca6ea1SDimitry Andric #include "llvm/IR/DerivedTypes.h" 209*0fca6ea1SDimitry Andric #include "llvm/IR/IRBuilder.h" 210*0fca6ea1SDimitry Andric #include "llvm/IR/InstIterator.h" 211*0fca6ea1SDimitry Andric #include "llvm/IR/InstVisitor.h" 212*0fca6ea1SDimitry Andric #include "llvm/IR/Instructions.h" 213*0fca6ea1SDimitry Andric #include "llvm/IR/Intrinsics.h" 214*0fca6ea1SDimitry Andric #include "llvm/IR/IntrinsicsAMDGPU.h" 215*0fca6ea1SDimitry Andric #include "llvm/IR/Metadata.h" 216*0fca6ea1SDimitry Andric #include "llvm/IR/Operator.h" 217*0fca6ea1SDimitry Andric #include "llvm/IR/PatternMatch.h" 218*0fca6ea1SDimitry Andric #include "llvm/IR/ReplaceConstant.h" 219*0fca6ea1SDimitry Andric #include "llvm/InitializePasses.h" 220*0fca6ea1SDimitry Andric #include "llvm/Pass.h" 221*0fca6ea1SDimitry Andric #include "llvm/Support/AtomicOrdering.h" 222*0fca6ea1SDimitry Andric #include "llvm/Support/Debug.h" 223*0fca6ea1SDimitry Andric #include "llvm/Support/ErrorHandling.h" 224*0fca6ea1SDimitry Andric #include "llvm/Transforms/Utils/Cloning.h" 225*0fca6ea1SDimitry Andric #include "llvm/Transforms/Utils/Local.h" 226*0fca6ea1SDimitry Andric #include "llvm/Transforms/Utils/ValueMapper.h" 227*0fca6ea1SDimitry Andric 228*0fca6ea1SDimitry Andric #define DEBUG_TYPE "amdgpu-lower-buffer-fat-pointers" 229*0fca6ea1SDimitry Andric 230*0fca6ea1SDimitry Andric using namespace llvm; 231*0fca6ea1SDimitry Andric 232*0fca6ea1SDimitry Andric static constexpr unsigned BufferOffsetWidth = 32; 233*0fca6ea1SDimitry Andric 234*0fca6ea1SDimitry Andric namespace { 235*0fca6ea1SDimitry Andric /// Recursively replace instances of ptr addrspace(7) and vector<Nxptr 236*0fca6ea1SDimitry Andric /// addrspace(7)> with some other type as defined by the relevant subclass. 237*0fca6ea1SDimitry Andric class BufferFatPtrTypeLoweringBase : public ValueMapTypeRemapper { 238*0fca6ea1SDimitry Andric DenseMap<Type *, Type *> Map; 239*0fca6ea1SDimitry Andric 240*0fca6ea1SDimitry Andric Type *remapTypeImpl(Type *Ty, SmallPtrSetImpl<StructType *> &Seen); 241*0fca6ea1SDimitry Andric 242*0fca6ea1SDimitry Andric protected: 243*0fca6ea1SDimitry Andric virtual Type *remapScalar(PointerType *PT) = 0; 244*0fca6ea1SDimitry Andric virtual Type *remapVector(VectorType *VT) = 0; 245*0fca6ea1SDimitry Andric 246*0fca6ea1SDimitry Andric const DataLayout &DL; 247*0fca6ea1SDimitry Andric 248*0fca6ea1SDimitry Andric public: 249*0fca6ea1SDimitry Andric BufferFatPtrTypeLoweringBase(const DataLayout &DL) : DL(DL) {} 250*0fca6ea1SDimitry Andric Type *remapType(Type *SrcTy) override; 251*0fca6ea1SDimitry Andric void clear() { Map.clear(); } 252*0fca6ea1SDimitry Andric }; 253*0fca6ea1SDimitry Andric 254*0fca6ea1SDimitry Andric /// Remap ptr addrspace(7) to i160 and vector<Nxptr addrspace(7)> to 255*0fca6ea1SDimitry Andric /// vector<Nxi60> in order to correctly handling loading/storing these values 256*0fca6ea1SDimitry Andric /// from memory. 257*0fca6ea1SDimitry Andric class BufferFatPtrToIntTypeMap : public BufferFatPtrTypeLoweringBase { 258*0fca6ea1SDimitry Andric using BufferFatPtrTypeLoweringBase::BufferFatPtrTypeLoweringBase; 259*0fca6ea1SDimitry Andric 260*0fca6ea1SDimitry Andric protected: 261*0fca6ea1SDimitry Andric Type *remapScalar(PointerType *PT) override { return DL.getIntPtrType(PT); } 262*0fca6ea1SDimitry Andric Type *remapVector(VectorType *VT) override { return DL.getIntPtrType(VT); } 263*0fca6ea1SDimitry Andric }; 264*0fca6ea1SDimitry Andric 265*0fca6ea1SDimitry Andric /// Remap ptr addrspace(7) to {ptr addrspace(8), i32} (the resource and offset 266*0fca6ea1SDimitry Andric /// parts of the pointer) so that we can easily rewrite operations on these 267*0fca6ea1SDimitry Andric /// values that aren't loading them from or storing them to memory. 268*0fca6ea1SDimitry Andric class BufferFatPtrToStructTypeMap : public BufferFatPtrTypeLoweringBase { 269*0fca6ea1SDimitry Andric using BufferFatPtrTypeLoweringBase::BufferFatPtrTypeLoweringBase; 270*0fca6ea1SDimitry Andric 271*0fca6ea1SDimitry Andric protected: 272*0fca6ea1SDimitry Andric Type *remapScalar(PointerType *PT) override; 273*0fca6ea1SDimitry Andric Type *remapVector(VectorType *VT) override; 274*0fca6ea1SDimitry Andric }; 275*0fca6ea1SDimitry Andric } // namespace 276*0fca6ea1SDimitry Andric 277*0fca6ea1SDimitry Andric // This code is adapted from the type remapper in lib/Linker/IRMover.cpp 278*0fca6ea1SDimitry Andric Type *BufferFatPtrTypeLoweringBase::remapTypeImpl( 279*0fca6ea1SDimitry Andric Type *Ty, SmallPtrSetImpl<StructType *> &Seen) { 280*0fca6ea1SDimitry Andric Type **Entry = &Map[Ty]; 281*0fca6ea1SDimitry Andric if (*Entry) 282*0fca6ea1SDimitry Andric return *Entry; 283*0fca6ea1SDimitry Andric if (auto *PT = dyn_cast<PointerType>(Ty)) { 284*0fca6ea1SDimitry Andric if (PT->getAddressSpace() == AMDGPUAS::BUFFER_FAT_POINTER) { 285*0fca6ea1SDimitry Andric return *Entry = remapScalar(PT); 286*0fca6ea1SDimitry Andric } 287*0fca6ea1SDimitry Andric } 288*0fca6ea1SDimitry Andric if (auto *VT = dyn_cast<VectorType>(Ty)) { 289*0fca6ea1SDimitry Andric auto *PT = dyn_cast<PointerType>(VT->getElementType()); 290*0fca6ea1SDimitry Andric if (PT && PT->getAddressSpace() == AMDGPUAS::BUFFER_FAT_POINTER) { 291*0fca6ea1SDimitry Andric return *Entry = remapVector(VT); 292*0fca6ea1SDimitry Andric } 293*0fca6ea1SDimitry Andric return *Entry = Ty; 294*0fca6ea1SDimitry Andric } 295*0fca6ea1SDimitry Andric // Whether the type is one that is structurally uniqued - that is, if it is 296*0fca6ea1SDimitry Andric // not a named struct (the only kind of type where multiple structurally 297*0fca6ea1SDimitry Andric // identical types that have a distinct `Type*`) 298*0fca6ea1SDimitry Andric StructType *TyAsStruct = dyn_cast<StructType>(Ty); 299*0fca6ea1SDimitry Andric bool IsUniqued = !TyAsStruct || TyAsStruct->isLiteral(); 300*0fca6ea1SDimitry Andric // Base case for ints, floats, opaque pointers, and so on, which don't 301*0fca6ea1SDimitry Andric // require recursion. 302*0fca6ea1SDimitry Andric if (Ty->getNumContainedTypes() == 0 && IsUniqued) 303*0fca6ea1SDimitry Andric return *Entry = Ty; 304*0fca6ea1SDimitry Andric if (!IsUniqued) { 305*0fca6ea1SDimitry Andric // Create a dummy type for recursion purposes. 306*0fca6ea1SDimitry Andric if (!Seen.insert(TyAsStruct).second) { 307*0fca6ea1SDimitry Andric StructType *Placeholder = StructType::create(Ty->getContext()); 308*0fca6ea1SDimitry Andric return *Entry = Placeholder; 309*0fca6ea1SDimitry Andric } 310*0fca6ea1SDimitry Andric } 311*0fca6ea1SDimitry Andric bool Changed = false; 312*0fca6ea1SDimitry Andric SmallVector<Type *> ElementTypes(Ty->getNumContainedTypes(), nullptr); 313*0fca6ea1SDimitry Andric for (unsigned int I = 0, E = Ty->getNumContainedTypes(); I < E; ++I) { 314*0fca6ea1SDimitry Andric Type *OldElem = Ty->getContainedType(I); 315*0fca6ea1SDimitry Andric Type *NewElem = remapTypeImpl(OldElem, Seen); 316*0fca6ea1SDimitry Andric ElementTypes[I] = NewElem; 317*0fca6ea1SDimitry Andric Changed |= (OldElem != NewElem); 318*0fca6ea1SDimitry Andric } 319*0fca6ea1SDimitry Andric // Recursive calls to remapTypeImpl() may have invalidated pointer. 320*0fca6ea1SDimitry Andric Entry = &Map[Ty]; 321*0fca6ea1SDimitry Andric if (!Changed) { 322*0fca6ea1SDimitry Andric return *Entry = Ty; 323*0fca6ea1SDimitry Andric } 324*0fca6ea1SDimitry Andric if (auto *ArrTy = dyn_cast<ArrayType>(Ty)) 325*0fca6ea1SDimitry Andric return *Entry = ArrayType::get(ElementTypes[0], ArrTy->getNumElements()); 326*0fca6ea1SDimitry Andric if (auto *FnTy = dyn_cast<FunctionType>(Ty)) 327*0fca6ea1SDimitry Andric return *Entry = FunctionType::get(ElementTypes[0], 328*0fca6ea1SDimitry Andric ArrayRef(ElementTypes).slice(1), 329*0fca6ea1SDimitry Andric FnTy->isVarArg()); 330*0fca6ea1SDimitry Andric if (auto *STy = dyn_cast<StructType>(Ty)) { 331*0fca6ea1SDimitry Andric // Genuine opaque types don't have a remapping. 332*0fca6ea1SDimitry Andric if (STy->isOpaque()) 333*0fca6ea1SDimitry Andric return *Entry = Ty; 334*0fca6ea1SDimitry Andric bool IsPacked = STy->isPacked(); 335*0fca6ea1SDimitry Andric if (IsUniqued) 336*0fca6ea1SDimitry Andric return *Entry = StructType::get(Ty->getContext(), ElementTypes, IsPacked); 337*0fca6ea1SDimitry Andric SmallString<16> Name(STy->getName()); 338*0fca6ea1SDimitry Andric STy->setName(""); 339*0fca6ea1SDimitry Andric Type **RecursionEntry = &Map[Ty]; 340*0fca6ea1SDimitry Andric if (*RecursionEntry) { 341*0fca6ea1SDimitry Andric auto *Placeholder = cast<StructType>(*RecursionEntry); 342*0fca6ea1SDimitry Andric Placeholder->setBody(ElementTypes, IsPacked); 343*0fca6ea1SDimitry Andric Placeholder->setName(Name); 344*0fca6ea1SDimitry Andric return *Entry = Placeholder; 345*0fca6ea1SDimitry Andric } 346*0fca6ea1SDimitry Andric return *Entry = StructType::create(Ty->getContext(), ElementTypes, Name, 347*0fca6ea1SDimitry Andric IsPacked); 348*0fca6ea1SDimitry Andric } 349*0fca6ea1SDimitry Andric llvm_unreachable("Unknown type of type that contains elements"); 350*0fca6ea1SDimitry Andric } 351*0fca6ea1SDimitry Andric 352*0fca6ea1SDimitry Andric Type *BufferFatPtrTypeLoweringBase::remapType(Type *SrcTy) { 353*0fca6ea1SDimitry Andric SmallPtrSet<StructType *, 2> Visited; 354*0fca6ea1SDimitry Andric return remapTypeImpl(SrcTy, Visited); 355*0fca6ea1SDimitry Andric } 356*0fca6ea1SDimitry Andric 357*0fca6ea1SDimitry Andric Type *BufferFatPtrToStructTypeMap::remapScalar(PointerType *PT) { 358*0fca6ea1SDimitry Andric LLVMContext &Ctx = PT->getContext(); 359*0fca6ea1SDimitry Andric return StructType::get(PointerType::get(Ctx, AMDGPUAS::BUFFER_RESOURCE), 360*0fca6ea1SDimitry Andric IntegerType::get(Ctx, BufferOffsetWidth)); 361*0fca6ea1SDimitry Andric } 362*0fca6ea1SDimitry Andric 363*0fca6ea1SDimitry Andric Type *BufferFatPtrToStructTypeMap::remapVector(VectorType *VT) { 364*0fca6ea1SDimitry Andric ElementCount EC = VT->getElementCount(); 365*0fca6ea1SDimitry Andric LLVMContext &Ctx = VT->getContext(); 366*0fca6ea1SDimitry Andric Type *RsrcVec = 367*0fca6ea1SDimitry Andric VectorType::get(PointerType::get(Ctx, AMDGPUAS::BUFFER_RESOURCE), EC); 368*0fca6ea1SDimitry Andric Type *OffVec = VectorType::get(IntegerType::get(Ctx, BufferOffsetWidth), EC); 369*0fca6ea1SDimitry Andric return StructType::get(RsrcVec, OffVec); 370*0fca6ea1SDimitry Andric } 371*0fca6ea1SDimitry Andric 372*0fca6ea1SDimitry Andric static bool isBufferFatPtrOrVector(Type *Ty) { 373*0fca6ea1SDimitry Andric if (auto *PT = dyn_cast<PointerType>(Ty->getScalarType())) 374*0fca6ea1SDimitry Andric return PT->getAddressSpace() == AMDGPUAS::BUFFER_FAT_POINTER; 375*0fca6ea1SDimitry Andric return false; 376*0fca6ea1SDimitry Andric } 377*0fca6ea1SDimitry Andric 378*0fca6ea1SDimitry Andric // True if the type is {ptr addrspace(8), i32} or a struct containing vectors of 379*0fca6ea1SDimitry Andric // those types. Used to quickly skip instructions we don't need to process. 380*0fca6ea1SDimitry Andric static bool isSplitFatPtr(Type *Ty) { 381*0fca6ea1SDimitry Andric auto *ST = dyn_cast<StructType>(Ty); 382*0fca6ea1SDimitry Andric if (!ST) 383*0fca6ea1SDimitry Andric return false; 384*0fca6ea1SDimitry Andric if (!ST->isLiteral() || ST->getNumElements() != 2) 385*0fca6ea1SDimitry Andric return false; 386*0fca6ea1SDimitry Andric auto *MaybeRsrc = 387*0fca6ea1SDimitry Andric dyn_cast<PointerType>(ST->getElementType(0)->getScalarType()); 388*0fca6ea1SDimitry Andric auto *MaybeOff = 389*0fca6ea1SDimitry Andric dyn_cast<IntegerType>(ST->getElementType(1)->getScalarType()); 390*0fca6ea1SDimitry Andric return MaybeRsrc && MaybeOff && 391*0fca6ea1SDimitry Andric MaybeRsrc->getAddressSpace() == AMDGPUAS::BUFFER_RESOURCE && 392*0fca6ea1SDimitry Andric MaybeOff->getBitWidth() == BufferOffsetWidth; 393*0fca6ea1SDimitry Andric } 394*0fca6ea1SDimitry Andric 395*0fca6ea1SDimitry Andric // True if the result type or any argument types are buffer fat pointers. 396*0fca6ea1SDimitry Andric static bool isBufferFatPtrConst(Constant *C) { 397*0fca6ea1SDimitry Andric Type *T = C->getType(); 398*0fca6ea1SDimitry Andric return isBufferFatPtrOrVector(T) || any_of(C->operands(), [](const Use &U) { 399*0fca6ea1SDimitry Andric return isBufferFatPtrOrVector(U.get()->getType()); 400*0fca6ea1SDimitry Andric }); 401*0fca6ea1SDimitry Andric } 402*0fca6ea1SDimitry Andric 403*0fca6ea1SDimitry Andric namespace { 404*0fca6ea1SDimitry Andric /// Convert [vectors of] buffer fat pointers to integers when they are read from 405*0fca6ea1SDimitry Andric /// or stored to memory. This ensures that these pointers will have the same 406*0fca6ea1SDimitry Andric /// memory layout as before they are lowered, even though they will no longer 407*0fca6ea1SDimitry Andric /// have their previous layout in registers/in the program (they'll be broken 408*0fca6ea1SDimitry Andric /// down into resource and offset parts). This has the downside of imposing 409*0fca6ea1SDimitry Andric /// marshalling costs when reading or storing these values, but since placing 410*0fca6ea1SDimitry Andric /// such pointers into memory is an uncommon operation at best, we feel that 411*0fca6ea1SDimitry Andric /// this cost is acceptable for better performance in the common case. 412*0fca6ea1SDimitry Andric class StoreFatPtrsAsIntsVisitor 413*0fca6ea1SDimitry Andric : public InstVisitor<StoreFatPtrsAsIntsVisitor, bool> { 414*0fca6ea1SDimitry Andric BufferFatPtrToIntTypeMap *TypeMap; 415*0fca6ea1SDimitry Andric 416*0fca6ea1SDimitry Andric ValueToValueMapTy ConvertedForStore; 417*0fca6ea1SDimitry Andric 418*0fca6ea1SDimitry Andric IRBuilder<> IRB; 419*0fca6ea1SDimitry Andric 420*0fca6ea1SDimitry Andric // Convert all the buffer fat pointers within the input value to inttegers 421*0fca6ea1SDimitry Andric // so that it can be stored in memory. 422*0fca6ea1SDimitry Andric Value *fatPtrsToInts(Value *V, Type *From, Type *To, const Twine &Name); 423*0fca6ea1SDimitry Andric // Convert all the i160s that need to be buffer fat pointers (as specified) 424*0fca6ea1SDimitry Andric // by the To type) into those pointers to preserve the semantics of the rest 425*0fca6ea1SDimitry Andric // of the program. 426*0fca6ea1SDimitry Andric Value *intsToFatPtrs(Value *V, Type *From, Type *To, const Twine &Name); 427*0fca6ea1SDimitry Andric 428*0fca6ea1SDimitry Andric public: 429*0fca6ea1SDimitry Andric StoreFatPtrsAsIntsVisitor(BufferFatPtrToIntTypeMap *TypeMap, LLVMContext &Ctx) 430*0fca6ea1SDimitry Andric : TypeMap(TypeMap), IRB(Ctx) {} 431*0fca6ea1SDimitry Andric bool processFunction(Function &F); 432*0fca6ea1SDimitry Andric 433*0fca6ea1SDimitry Andric bool visitInstruction(Instruction &I) { return false; } 434*0fca6ea1SDimitry Andric bool visitAllocaInst(AllocaInst &I); 435*0fca6ea1SDimitry Andric bool visitLoadInst(LoadInst &LI); 436*0fca6ea1SDimitry Andric bool visitStoreInst(StoreInst &SI); 437*0fca6ea1SDimitry Andric bool visitGetElementPtrInst(GetElementPtrInst &I); 438*0fca6ea1SDimitry Andric }; 439*0fca6ea1SDimitry Andric } // namespace 440*0fca6ea1SDimitry Andric 441*0fca6ea1SDimitry Andric Value *StoreFatPtrsAsIntsVisitor::fatPtrsToInts(Value *V, Type *From, Type *To, 442*0fca6ea1SDimitry Andric const Twine &Name) { 443*0fca6ea1SDimitry Andric if (From == To) 444*0fca6ea1SDimitry Andric return V; 445*0fca6ea1SDimitry Andric ValueToValueMapTy::iterator Find = ConvertedForStore.find(V); 446*0fca6ea1SDimitry Andric if (Find != ConvertedForStore.end()) 447*0fca6ea1SDimitry Andric return Find->second; 448*0fca6ea1SDimitry Andric if (isBufferFatPtrOrVector(From)) { 449*0fca6ea1SDimitry Andric Value *Cast = IRB.CreatePtrToInt(V, To, Name + ".int"); 450*0fca6ea1SDimitry Andric ConvertedForStore[V] = Cast; 451*0fca6ea1SDimitry Andric return Cast; 452*0fca6ea1SDimitry Andric } 453*0fca6ea1SDimitry Andric if (From->getNumContainedTypes() == 0) 454*0fca6ea1SDimitry Andric return V; 455*0fca6ea1SDimitry Andric // Structs, arrays, and other compound types. 456*0fca6ea1SDimitry Andric Value *Ret = PoisonValue::get(To); 457*0fca6ea1SDimitry Andric if (auto *AT = dyn_cast<ArrayType>(From)) { 458*0fca6ea1SDimitry Andric Type *FromPart = AT->getArrayElementType(); 459*0fca6ea1SDimitry Andric Type *ToPart = cast<ArrayType>(To)->getElementType(); 460*0fca6ea1SDimitry Andric for (uint64_t I = 0, E = AT->getArrayNumElements(); I < E; ++I) { 461*0fca6ea1SDimitry Andric Value *Field = IRB.CreateExtractValue(V, I); 462*0fca6ea1SDimitry Andric Value *NewField = 463*0fca6ea1SDimitry Andric fatPtrsToInts(Field, FromPart, ToPart, Name + "." + Twine(I)); 464*0fca6ea1SDimitry Andric Ret = IRB.CreateInsertValue(Ret, NewField, I); 465*0fca6ea1SDimitry Andric } 466*0fca6ea1SDimitry Andric } else { 467*0fca6ea1SDimitry Andric for (auto [Idx, FromPart, ToPart] : 468*0fca6ea1SDimitry Andric enumerate(From->subtypes(), To->subtypes())) { 469*0fca6ea1SDimitry Andric Value *Field = IRB.CreateExtractValue(V, Idx); 470*0fca6ea1SDimitry Andric Value *NewField = 471*0fca6ea1SDimitry Andric fatPtrsToInts(Field, FromPart, ToPart, Name + "." + Twine(Idx)); 472*0fca6ea1SDimitry Andric Ret = IRB.CreateInsertValue(Ret, NewField, Idx); 473*0fca6ea1SDimitry Andric } 474*0fca6ea1SDimitry Andric } 475*0fca6ea1SDimitry Andric ConvertedForStore[V] = Ret; 476*0fca6ea1SDimitry Andric return Ret; 477*0fca6ea1SDimitry Andric } 478*0fca6ea1SDimitry Andric 479*0fca6ea1SDimitry Andric Value *StoreFatPtrsAsIntsVisitor::intsToFatPtrs(Value *V, Type *From, Type *To, 480*0fca6ea1SDimitry Andric const Twine &Name) { 481*0fca6ea1SDimitry Andric if (From == To) 482*0fca6ea1SDimitry Andric return V; 483*0fca6ea1SDimitry Andric if (isBufferFatPtrOrVector(To)) { 484*0fca6ea1SDimitry Andric Value *Cast = IRB.CreateIntToPtr(V, To, Name + ".ptr"); 485*0fca6ea1SDimitry Andric return Cast; 486*0fca6ea1SDimitry Andric } 487*0fca6ea1SDimitry Andric if (From->getNumContainedTypes() == 0) 488*0fca6ea1SDimitry Andric return V; 489*0fca6ea1SDimitry Andric // Structs, arrays, and other compound types. 490*0fca6ea1SDimitry Andric Value *Ret = PoisonValue::get(To); 491*0fca6ea1SDimitry Andric if (auto *AT = dyn_cast<ArrayType>(From)) { 492*0fca6ea1SDimitry Andric Type *FromPart = AT->getArrayElementType(); 493*0fca6ea1SDimitry Andric Type *ToPart = cast<ArrayType>(To)->getElementType(); 494*0fca6ea1SDimitry Andric for (uint64_t I = 0, E = AT->getArrayNumElements(); I < E; ++I) { 495*0fca6ea1SDimitry Andric Value *Field = IRB.CreateExtractValue(V, I); 496*0fca6ea1SDimitry Andric Value *NewField = 497*0fca6ea1SDimitry Andric intsToFatPtrs(Field, FromPart, ToPart, Name + "." + Twine(I)); 498*0fca6ea1SDimitry Andric Ret = IRB.CreateInsertValue(Ret, NewField, I); 499*0fca6ea1SDimitry Andric } 500*0fca6ea1SDimitry Andric } else { 501*0fca6ea1SDimitry Andric for (auto [Idx, FromPart, ToPart] : 502*0fca6ea1SDimitry Andric enumerate(From->subtypes(), To->subtypes())) { 503*0fca6ea1SDimitry Andric Value *Field = IRB.CreateExtractValue(V, Idx); 504*0fca6ea1SDimitry Andric Value *NewField = 505*0fca6ea1SDimitry Andric intsToFatPtrs(Field, FromPart, ToPart, Name + "." + Twine(Idx)); 506*0fca6ea1SDimitry Andric Ret = IRB.CreateInsertValue(Ret, NewField, Idx); 507*0fca6ea1SDimitry Andric } 508*0fca6ea1SDimitry Andric } 509*0fca6ea1SDimitry Andric return Ret; 510*0fca6ea1SDimitry Andric } 511*0fca6ea1SDimitry Andric 512*0fca6ea1SDimitry Andric bool StoreFatPtrsAsIntsVisitor::processFunction(Function &F) { 513*0fca6ea1SDimitry Andric bool Changed = false; 514*0fca6ea1SDimitry Andric // The visitors will mutate GEPs and allocas, but will push loads and stores 515*0fca6ea1SDimitry Andric // to the worklist to avoid invalidation. 516*0fca6ea1SDimitry Andric for (Instruction &I : make_early_inc_range(instructions(F))) { 517*0fca6ea1SDimitry Andric Changed |= visit(I); 518*0fca6ea1SDimitry Andric } 519*0fca6ea1SDimitry Andric ConvertedForStore.clear(); 520*0fca6ea1SDimitry Andric return Changed; 521*0fca6ea1SDimitry Andric } 522*0fca6ea1SDimitry Andric 523*0fca6ea1SDimitry Andric bool StoreFatPtrsAsIntsVisitor::visitAllocaInst(AllocaInst &I) { 524*0fca6ea1SDimitry Andric Type *Ty = I.getAllocatedType(); 525*0fca6ea1SDimitry Andric Type *NewTy = TypeMap->remapType(Ty); 526*0fca6ea1SDimitry Andric if (Ty == NewTy) 527*0fca6ea1SDimitry Andric return false; 528*0fca6ea1SDimitry Andric I.setAllocatedType(NewTy); 529*0fca6ea1SDimitry Andric return true; 530*0fca6ea1SDimitry Andric } 531*0fca6ea1SDimitry Andric 532*0fca6ea1SDimitry Andric bool StoreFatPtrsAsIntsVisitor::visitGetElementPtrInst(GetElementPtrInst &I) { 533*0fca6ea1SDimitry Andric Type *Ty = I.getSourceElementType(); 534*0fca6ea1SDimitry Andric Type *NewTy = TypeMap->remapType(Ty); 535*0fca6ea1SDimitry Andric if (Ty == NewTy) 536*0fca6ea1SDimitry Andric return false; 537*0fca6ea1SDimitry Andric // We'll be rewriting the type `ptr addrspace(7)` out of existence soon, so 538*0fca6ea1SDimitry Andric // make sure GEPs don't have different semantics with the new type. 539*0fca6ea1SDimitry Andric I.setSourceElementType(NewTy); 540*0fca6ea1SDimitry Andric I.setResultElementType(TypeMap->remapType(I.getResultElementType())); 541*0fca6ea1SDimitry Andric return true; 542*0fca6ea1SDimitry Andric } 543*0fca6ea1SDimitry Andric 544*0fca6ea1SDimitry Andric bool StoreFatPtrsAsIntsVisitor::visitLoadInst(LoadInst &LI) { 545*0fca6ea1SDimitry Andric Type *Ty = LI.getType(); 546*0fca6ea1SDimitry Andric Type *IntTy = TypeMap->remapType(Ty); 547*0fca6ea1SDimitry Andric if (Ty == IntTy) 548*0fca6ea1SDimitry Andric return false; 549*0fca6ea1SDimitry Andric 550*0fca6ea1SDimitry Andric IRB.SetInsertPoint(&LI); 551*0fca6ea1SDimitry Andric auto *NLI = cast<LoadInst>(LI.clone()); 552*0fca6ea1SDimitry Andric NLI->mutateType(IntTy); 553*0fca6ea1SDimitry Andric NLI = IRB.Insert(NLI); 554*0fca6ea1SDimitry Andric copyMetadataForLoad(*NLI, LI); 555*0fca6ea1SDimitry Andric NLI->takeName(&LI); 556*0fca6ea1SDimitry Andric 557*0fca6ea1SDimitry Andric Value *CastBack = intsToFatPtrs(NLI, IntTy, Ty, NLI->getName()); 558*0fca6ea1SDimitry Andric LI.replaceAllUsesWith(CastBack); 559*0fca6ea1SDimitry Andric LI.eraseFromParent(); 560*0fca6ea1SDimitry Andric return true; 561*0fca6ea1SDimitry Andric } 562*0fca6ea1SDimitry Andric 563*0fca6ea1SDimitry Andric bool StoreFatPtrsAsIntsVisitor::visitStoreInst(StoreInst &SI) { 564*0fca6ea1SDimitry Andric Value *V = SI.getValueOperand(); 565*0fca6ea1SDimitry Andric Type *Ty = V->getType(); 566*0fca6ea1SDimitry Andric Type *IntTy = TypeMap->remapType(Ty); 567*0fca6ea1SDimitry Andric if (Ty == IntTy) 568*0fca6ea1SDimitry Andric return false; 569*0fca6ea1SDimitry Andric 570*0fca6ea1SDimitry Andric IRB.SetInsertPoint(&SI); 571*0fca6ea1SDimitry Andric Value *IntV = fatPtrsToInts(V, Ty, IntTy, V->getName()); 572*0fca6ea1SDimitry Andric for (auto *Dbg : at::getAssignmentMarkers(&SI)) 573*0fca6ea1SDimitry Andric Dbg->setValue(IntV); 574*0fca6ea1SDimitry Andric 575*0fca6ea1SDimitry Andric SI.setOperand(0, IntV); 576*0fca6ea1SDimitry Andric return true; 577*0fca6ea1SDimitry Andric } 578*0fca6ea1SDimitry Andric 579*0fca6ea1SDimitry Andric /// Return the ptr addrspace(8) and i32 (resource and offset parts) in a lowered 580*0fca6ea1SDimitry Andric /// buffer fat pointer constant. 581*0fca6ea1SDimitry Andric static std::pair<Constant *, Constant *> 582*0fca6ea1SDimitry Andric splitLoweredFatBufferConst(Constant *C) { 583*0fca6ea1SDimitry Andric assert(isSplitFatPtr(C->getType()) && "Not a split fat buffer pointer"); 584*0fca6ea1SDimitry Andric return std::make_pair(C->getAggregateElement(0u), C->getAggregateElement(1u)); 585*0fca6ea1SDimitry Andric } 586*0fca6ea1SDimitry Andric 587*0fca6ea1SDimitry Andric namespace { 588*0fca6ea1SDimitry Andric /// Handle the remapping of ptr addrspace(7) constants. 589*0fca6ea1SDimitry Andric class FatPtrConstMaterializer final : public ValueMaterializer { 590*0fca6ea1SDimitry Andric BufferFatPtrToStructTypeMap *TypeMap; 591*0fca6ea1SDimitry Andric // An internal mapper that is used to recurse into the arguments of constants. 592*0fca6ea1SDimitry Andric // While the documentation for `ValueMapper` specifies not to use it 593*0fca6ea1SDimitry Andric // recursively, examination of the logic in mapValue() shows that it can 594*0fca6ea1SDimitry Andric // safely be used recursively when handling constants, like it does in its own 595*0fca6ea1SDimitry Andric // logic. 596*0fca6ea1SDimitry Andric ValueMapper InternalMapper; 597*0fca6ea1SDimitry Andric 598*0fca6ea1SDimitry Andric Constant *materializeBufferFatPtrConst(Constant *C); 599*0fca6ea1SDimitry Andric 600*0fca6ea1SDimitry Andric public: 601*0fca6ea1SDimitry Andric // UnderlyingMap is the value map this materializer will be filling. 602*0fca6ea1SDimitry Andric FatPtrConstMaterializer(BufferFatPtrToStructTypeMap *TypeMap, 603*0fca6ea1SDimitry Andric ValueToValueMapTy &UnderlyingMap) 604*0fca6ea1SDimitry Andric : TypeMap(TypeMap), 605*0fca6ea1SDimitry Andric InternalMapper(UnderlyingMap, RF_None, TypeMap, this) {} 606*0fca6ea1SDimitry Andric virtual ~FatPtrConstMaterializer() = default; 607*0fca6ea1SDimitry Andric 608*0fca6ea1SDimitry Andric Value *materialize(Value *V) override; 609*0fca6ea1SDimitry Andric }; 610*0fca6ea1SDimitry Andric } // namespace 611*0fca6ea1SDimitry Andric 612*0fca6ea1SDimitry Andric Constant *FatPtrConstMaterializer::materializeBufferFatPtrConst(Constant *C) { 613*0fca6ea1SDimitry Andric Type *SrcTy = C->getType(); 614*0fca6ea1SDimitry Andric auto *NewTy = dyn_cast<StructType>(TypeMap->remapType(SrcTy)); 615*0fca6ea1SDimitry Andric if (C->isNullValue()) 616*0fca6ea1SDimitry Andric return ConstantAggregateZero::getNullValue(NewTy); 617*0fca6ea1SDimitry Andric if (isa<PoisonValue>(C)) { 618*0fca6ea1SDimitry Andric return ConstantStruct::get(NewTy, 619*0fca6ea1SDimitry Andric {PoisonValue::get(NewTy->getElementType(0)), 620*0fca6ea1SDimitry Andric PoisonValue::get(NewTy->getElementType(1))}); 621*0fca6ea1SDimitry Andric } 622*0fca6ea1SDimitry Andric if (isa<UndefValue>(C)) { 623*0fca6ea1SDimitry Andric return ConstantStruct::get(NewTy, 624*0fca6ea1SDimitry Andric {UndefValue::get(NewTy->getElementType(0)), 625*0fca6ea1SDimitry Andric UndefValue::get(NewTy->getElementType(1))}); 626*0fca6ea1SDimitry Andric } 627*0fca6ea1SDimitry Andric 628*0fca6ea1SDimitry Andric if (auto *VC = dyn_cast<ConstantVector>(C)) { 629*0fca6ea1SDimitry Andric if (Constant *S = VC->getSplatValue()) { 630*0fca6ea1SDimitry Andric Constant *NewS = InternalMapper.mapConstant(*S); 631*0fca6ea1SDimitry Andric if (!NewS) 632*0fca6ea1SDimitry Andric return nullptr; 633*0fca6ea1SDimitry Andric auto [Rsrc, Off] = splitLoweredFatBufferConst(NewS); 634*0fca6ea1SDimitry Andric auto EC = VC->getType()->getElementCount(); 635*0fca6ea1SDimitry Andric return ConstantStruct::get(NewTy, {ConstantVector::getSplat(EC, Rsrc), 636*0fca6ea1SDimitry Andric ConstantVector::getSplat(EC, Off)}); 637*0fca6ea1SDimitry Andric } 638*0fca6ea1SDimitry Andric SmallVector<Constant *> Rsrcs; 639*0fca6ea1SDimitry Andric SmallVector<Constant *> Offs; 640*0fca6ea1SDimitry Andric for (Value *Op : VC->operand_values()) { 641*0fca6ea1SDimitry Andric auto *NewOp = dyn_cast_or_null<Constant>(InternalMapper.mapValue(*Op)); 642*0fca6ea1SDimitry Andric if (!NewOp) 643*0fca6ea1SDimitry Andric return nullptr; 644*0fca6ea1SDimitry Andric auto [Rsrc, Off] = splitLoweredFatBufferConst(NewOp); 645*0fca6ea1SDimitry Andric Rsrcs.push_back(Rsrc); 646*0fca6ea1SDimitry Andric Offs.push_back(Off); 647*0fca6ea1SDimitry Andric } 648*0fca6ea1SDimitry Andric Constant *RsrcVec = ConstantVector::get(Rsrcs); 649*0fca6ea1SDimitry Andric Constant *OffVec = ConstantVector::get(Offs); 650*0fca6ea1SDimitry Andric return ConstantStruct::get(NewTy, {RsrcVec, OffVec}); 651*0fca6ea1SDimitry Andric } 652*0fca6ea1SDimitry Andric 653*0fca6ea1SDimitry Andric if (isa<GlobalValue>(C)) 654*0fca6ea1SDimitry Andric report_fatal_error("Global values containing ptr addrspace(7) (buffer " 655*0fca6ea1SDimitry Andric "fat pointer) values are not supported"); 656*0fca6ea1SDimitry Andric 657*0fca6ea1SDimitry Andric if (isa<ConstantExpr>(C)) 658*0fca6ea1SDimitry Andric report_fatal_error("Constant exprs containing ptr addrspace(7) (buffer " 659*0fca6ea1SDimitry Andric "fat pointer) values should have been expanded earlier"); 660*0fca6ea1SDimitry Andric 661*0fca6ea1SDimitry Andric return nullptr; 662*0fca6ea1SDimitry Andric } 663*0fca6ea1SDimitry Andric 664*0fca6ea1SDimitry Andric Value *FatPtrConstMaterializer::materialize(Value *V) { 665*0fca6ea1SDimitry Andric Constant *C = dyn_cast<Constant>(V); 666*0fca6ea1SDimitry Andric if (!C) 667*0fca6ea1SDimitry Andric return nullptr; 668*0fca6ea1SDimitry Andric // Structs and other types that happen to contain fat pointers get remapped 669*0fca6ea1SDimitry Andric // by the mapValue() logic. 670*0fca6ea1SDimitry Andric if (!isBufferFatPtrConst(C)) 671*0fca6ea1SDimitry Andric return nullptr; 672*0fca6ea1SDimitry Andric return materializeBufferFatPtrConst(C); 673*0fca6ea1SDimitry Andric } 674*0fca6ea1SDimitry Andric 675*0fca6ea1SDimitry Andric using PtrParts = std::pair<Value *, Value *>; 676*0fca6ea1SDimitry Andric namespace { 677*0fca6ea1SDimitry Andric // The visitor returns the resource and offset parts for an instruction if they 678*0fca6ea1SDimitry Andric // can be computed, or (nullptr, nullptr) for cases that don't have a meaningful 679*0fca6ea1SDimitry Andric // value mapping. 680*0fca6ea1SDimitry Andric class SplitPtrStructs : public InstVisitor<SplitPtrStructs, PtrParts> { 681*0fca6ea1SDimitry Andric ValueToValueMapTy RsrcParts; 682*0fca6ea1SDimitry Andric ValueToValueMapTy OffParts; 683*0fca6ea1SDimitry Andric 684*0fca6ea1SDimitry Andric // Track instructions that have been rewritten into a user of the component 685*0fca6ea1SDimitry Andric // parts of their ptr addrspace(7) input. Instructions that produced 686*0fca6ea1SDimitry Andric // ptr addrspace(7) parts should **not** be RAUW'd before being added to this 687*0fca6ea1SDimitry Andric // set, as that replacement will be handled in a post-visit step. However, 688*0fca6ea1SDimitry Andric // instructions that yield values that aren't fat pointers (ex. ptrtoint) 689*0fca6ea1SDimitry Andric // should RAUW themselves with new instructions that use the split parts 690*0fca6ea1SDimitry Andric // of their arguments during processing. 691*0fca6ea1SDimitry Andric DenseSet<Instruction *> SplitUsers; 692*0fca6ea1SDimitry Andric 693*0fca6ea1SDimitry Andric // Nodes that need a second look once we've computed the parts for all other 694*0fca6ea1SDimitry Andric // instructions to see if, for example, we really need to phi on the resource 695*0fca6ea1SDimitry Andric // part. 696*0fca6ea1SDimitry Andric SmallVector<Instruction *> Conditionals; 697*0fca6ea1SDimitry Andric // Temporary instructions produced while lowering conditionals that should be 698*0fca6ea1SDimitry Andric // killed. 699*0fca6ea1SDimitry Andric SmallVector<Instruction *> ConditionalTemps; 700*0fca6ea1SDimitry Andric 701*0fca6ea1SDimitry Andric // Subtarget info, needed for determining what cache control bits to set. 702*0fca6ea1SDimitry Andric const TargetMachine *TM; 703*0fca6ea1SDimitry Andric const GCNSubtarget *ST = nullptr; 704*0fca6ea1SDimitry Andric 705*0fca6ea1SDimitry Andric IRBuilder<> IRB; 706*0fca6ea1SDimitry Andric 707*0fca6ea1SDimitry Andric // Copy metadata between instructions if applicable. 708*0fca6ea1SDimitry Andric void copyMetadata(Value *Dest, Value *Src); 709*0fca6ea1SDimitry Andric 710*0fca6ea1SDimitry Andric // Get the resource and offset parts of the value V, inserting appropriate 711*0fca6ea1SDimitry Andric // extractvalue calls if needed. 712*0fca6ea1SDimitry Andric PtrParts getPtrParts(Value *V); 713*0fca6ea1SDimitry Andric 714*0fca6ea1SDimitry Andric // Given an instruction that could produce multiple resource parts (a PHI or 715*0fca6ea1SDimitry Andric // select), collect the set of possible instructions that could have provided 716*0fca6ea1SDimitry Andric // its resource parts that it could have (the `Roots`) and the set of 717*0fca6ea1SDimitry Andric // conditional instructions visited during the search (`Seen`). If, after 718*0fca6ea1SDimitry Andric // removing the root of the search from `Seen` and `Roots`, `Seen` is a subset 719*0fca6ea1SDimitry Andric // of `Roots` and `Roots - Seen` contains one element, the resource part of 720*0fca6ea1SDimitry Andric // that element can replace the resource part of all other elements in `Seen`. 721*0fca6ea1SDimitry Andric void getPossibleRsrcRoots(Instruction *I, SmallPtrSetImpl<Value *> &Roots, 722*0fca6ea1SDimitry Andric SmallPtrSetImpl<Value *> &Seen); 723*0fca6ea1SDimitry Andric void processConditionals(); 724*0fca6ea1SDimitry Andric 725*0fca6ea1SDimitry Andric // If an instruction hav been split into resource and offset parts, 726*0fca6ea1SDimitry Andric // delete that instruction. If any of its uses have not themselves been split 727*0fca6ea1SDimitry Andric // into parts (for example, an insertvalue), construct the structure 728*0fca6ea1SDimitry Andric // that the type rewrites declared should be produced by the dying instruction 729*0fca6ea1SDimitry Andric // and use that. 730*0fca6ea1SDimitry Andric // Also, kill the temporary extractvalue operations produced by the two-stage 731*0fca6ea1SDimitry Andric // lowering of PHIs and conditionals. 732*0fca6ea1SDimitry Andric void killAndReplaceSplitInstructions(SmallVectorImpl<Instruction *> &Origs); 733*0fca6ea1SDimitry Andric 734*0fca6ea1SDimitry Andric void setAlign(CallInst *Intr, Align A, unsigned RsrcArgIdx); 735*0fca6ea1SDimitry Andric void insertPreMemOpFence(AtomicOrdering Order, SyncScope::ID SSID); 736*0fca6ea1SDimitry Andric void insertPostMemOpFence(AtomicOrdering Order, SyncScope::ID SSID); 737*0fca6ea1SDimitry Andric Value *handleMemoryInst(Instruction *I, Value *Arg, Value *Ptr, Type *Ty, 738*0fca6ea1SDimitry Andric Align Alignment, AtomicOrdering Order, 739*0fca6ea1SDimitry Andric bool IsVolatile, SyncScope::ID SSID); 740*0fca6ea1SDimitry Andric 741*0fca6ea1SDimitry Andric public: 742*0fca6ea1SDimitry Andric SplitPtrStructs(LLVMContext &Ctx, const TargetMachine *TM) 743*0fca6ea1SDimitry Andric : TM(TM), IRB(Ctx) {} 744*0fca6ea1SDimitry Andric 745*0fca6ea1SDimitry Andric void processFunction(Function &F); 746*0fca6ea1SDimitry Andric 747*0fca6ea1SDimitry Andric PtrParts visitInstruction(Instruction &I); 748*0fca6ea1SDimitry Andric PtrParts visitLoadInst(LoadInst &LI); 749*0fca6ea1SDimitry Andric PtrParts visitStoreInst(StoreInst &SI); 750*0fca6ea1SDimitry Andric PtrParts visitAtomicRMWInst(AtomicRMWInst &AI); 751*0fca6ea1SDimitry Andric PtrParts visitAtomicCmpXchgInst(AtomicCmpXchgInst &AI); 752*0fca6ea1SDimitry Andric PtrParts visitGetElementPtrInst(GetElementPtrInst &GEP); 753*0fca6ea1SDimitry Andric 754*0fca6ea1SDimitry Andric PtrParts visitPtrToIntInst(PtrToIntInst &PI); 755*0fca6ea1SDimitry Andric PtrParts visitIntToPtrInst(IntToPtrInst &IP); 756*0fca6ea1SDimitry Andric PtrParts visitAddrSpaceCastInst(AddrSpaceCastInst &I); 757*0fca6ea1SDimitry Andric PtrParts visitICmpInst(ICmpInst &Cmp); 758*0fca6ea1SDimitry Andric PtrParts visitFreezeInst(FreezeInst &I); 759*0fca6ea1SDimitry Andric 760*0fca6ea1SDimitry Andric PtrParts visitExtractElementInst(ExtractElementInst &I); 761*0fca6ea1SDimitry Andric PtrParts visitInsertElementInst(InsertElementInst &I); 762*0fca6ea1SDimitry Andric PtrParts visitShuffleVectorInst(ShuffleVectorInst &I); 763*0fca6ea1SDimitry Andric 764*0fca6ea1SDimitry Andric PtrParts visitPHINode(PHINode &PHI); 765*0fca6ea1SDimitry Andric PtrParts visitSelectInst(SelectInst &SI); 766*0fca6ea1SDimitry Andric 767*0fca6ea1SDimitry Andric PtrParts visitIntrinsicInst(IntrinsicInst &II); 768*0fca6ea1SDimitry Andric }; 769*0fca6ea1SDimitry Andric } // namespace 770*0fca6ea1SDimitry Andric 771*0fca6ea1SDimitry Andric void SplitPtrStructs::copyMetadata(Value *Dest, Value *Src) { 772*0fca6ea1SDimitry Andric auto *DestI = dyn_cast<Instruction>(Dest); 773*0fca6ea1SDimitry Andric auto *SrcI = dyn_cast<Instruction>(Src); 774*0fca6ea1SDimitry Andric 775*0fca6ea1SDimitry Andric if (!DestI || !SrcI) 776*0fca6ea1SDimitry Andric return; 777*0fca6ea1SDimitry Andric 778*0fca6ea1SDimitry Andric DestI->copyMetadata(*SrcI); 779*0fca6ea1SDimitry Andric } 780*0fca6ea1SDimitry Andric 781*0fca6ea1SDimitry Andric PtrParts SplitPtrStructs::getPtrParts(Value *V) { 782*0fca6ea1SDimitry Andric assert(isSplitFatPtr(V->getType()) && "it's not meaningful to get the parts " 783*0fca6ea1SDimitry Andric "of something that wasn't rewritten"); 784*0fca6ea1SDimitry Andric auto *RsrcEntry = &RsrcParts[V]; 785*0fca6ea1SDimitry Andric auto *OffEntry = &OffParts[V]; 786*0fca6ea1SDimitry Andric if (*RsrcEntry && *OffEntry) 787*0fca6ea1SDimitry Andric return {*RsrcEntry, *OffEntry}; 788*0fca6ea1SDimitry Andric 789*0fca6ea1SDimitry Andric if (auto *C = dyn_cast<Constant>(V)) { 790*0fca6ea1SDimitry Andric auto [Rsrc, Off] = splitLoweredFatBufferConst(C); 791*0fca6ea1SDimitry Andric return {*RsrcEntry = Rsrc, *OffEntry = Off}; 792*0fca6ea1SDimitry Andric } 793*0fca6ea1SDimitry Andric 794*0fca6ea1SDimitry Andric IRBuilder<>::InsertPointGuard Guard(IRB); 795*0fca6ea1SDimitry Andric if (auto *I = dyn_cast<Instruction>(V)) { 796*0fca6ea1SDimitry Andric LLVM_DEBUG(dbgs() << "Recursing to split parts of " << *I << "\n"); 797*0fca6ea1SDimitry Andric auto [Rsrc, Off] = visit(*I); 798*0fca6ea1SDimitry Andric if (Rsrc && Off) 799*0fca6ea1SDimitry Andric return {*RsrcEntry = Rsrc, *OffEntry = Off}; 800*0fca6ea1SDimitry Andric // We'll be creating the new values after the relevant instruction. 801*0fca6ea1SDimitry Andric // This instruction generates a value and so isn't a terminator. 802*0fca6ea1SDimitry Andric IRB.SetInsertPoint(*I->getInsertionPointAfterDef()); 803*0fca6ea1SDimitry Andric IRB.SetCurrentDebugLocation(I->getDebugLoc()); 804*0fca6ea1SDimitry Andric } else if (auto *A = dyn_cast<Argument>(V)) { 805*0fca6ea1SDimitry Andric IRB.SetInsertPointPastAllocas(A->getParent()); 806*0fca6ea1SDimitry Andric IRB.SetCurrentDebugLocation(DebugLoc()); 807*0fca6ea1SDimitry Andric } 808*0fca6ea1SDimitry Andric Value *Rsrc = IRB.CreateExtractValue(V, 0, V->getName() + ".rsrc"); 809*0fca6ea1SDimitry Andric Value *Off = IRB.CreateExtractValue(V, 1, V->getName() + ".off"); 810*0fca6ea1SDimitry Andric return {*RsrcEntry = Rsrc, *OffEntry = Off}; 811*0fca6ea1SDimitry Andric } 812*0fca6ea1SDimitry Andric 813*0fca6ea1SDimitry Andric /// Returns the instruction that defines the resource part of the value V. 814*0fca6ea1SDimitry Andric /// Note that this is not getUnderlyingObject(), since that looks through 815*0fca6ea1SDimitry Andric /// operations like ptrmask which might modify the resource part. 816*0fca6ea1SDimitry Andric /// 817*0fca6ea1SDimitry Andric /// We can limit ourselves to just looking through GEPs followed by looking 818*0fca6ea1SDimitry Andric /// through addrspacecasts because only those two operations preserve the 819*0fca6ea1SDimitry Andric /// resource part, and because operations on an `addrspace(8)` (which is the 820*0fca6ea1SDimitry Andric /// legal input to this addrspacecast) would produce a different resource part. 821*0fca6ea1SDimitry Andric static Value *rsrcPartRoot(Value *V) { 822*0fca6ea1SDimitry Andric while (auto *GEP = dyn_cast<GEPOperator>(V)) 823*0fca6ea1SDimitry Andric V = GEP->getPointerOperand(); 824*0fca6ea1SDimitry Andric while (auto *ASC = dyn_cast<AddrSpaceCastOperator>(V)) 825*0fca6ea1SDimitry Andric V = ASC->getPointerOperand(); 826*0fca6ea1SDimitry Andric return V; 827*0fca6ea1SDimitry Andric } 828*0fca6ea1SDimitry Andric 829*0fca6ea1SDimitry Andric void SplitPtrStructs::getPossibleRsrcRoots(Instruction *I, 830*0fca6ea1SDimitry Andric SmallPtrSetImpl<Value *> &Roots, 831*0fca6ea1SDimitry Andric SmallPtrSetImpl<Value *> &Seen) { 832*0fca6ea1SDimitry Andric if (auto *PHI = dyn_cast<PHINode>(I)) { 833*0fca6ea1SDimitry Andric if (!Seen.insert(I).second) 834*0fca6ea1SDimitry Andric return; 835*0fca6ea1SDimitry Andric for (Value *In : PHI->incoming_values()) { 836*0fca6ea1SDimitry Andric In = rsrcPartRoot(In); 837*0fca6ea1SDimitry Andric Roots.insert(In); 838*0fca6ea1SDimitry Andric if (isa<PHINode, SelectInst>(In)) 839*0fca6ea1SDimitry Andric getPossibleRsrcRoots(cast<Instruction>(In), Roots, Seen); 840*0fca6ea1SDimitry Andric } 841*0fca6ea1SDimitry Andric } else if (auto *SI = dyn_cast<SelectInst>(I)) { 842*0fca6ea1SDimitry Andric if (!Seen.insert(SI).second) 843*0fca6ea1SDimitry Andric return; 844*0fca6ea1SDimitry Andric Value *TrueVal = rsrcPartRoot(SI->getTrueValue()); 845*0fca6ea1SDimitry Andric Value *FalseVal = rsrcPartRoot(SI->getFalseValue()); 846*0fca6ea1SDimitry Andric Roots.insert(TrueVal); 847*0fca6ea1SDimitry Andric Roots.insert(FalseVal); 848*0fca6ea1SDimitry Andric if (isa<PHINode, SelectInst>(TrueVal)) 849*0fca6ea1SDimitry Andric getPossibleRsrcRoots(cast<Instruction>(TrueVal), Roots, Seen); 850*0fca6ea1SDimitry Andric if (isa<PHINode, SelectInst>(FalseVal)) 851*0fca6ea1SDimitry Andric getPossibleRsrcRoots(cast<Instruction>(FalseVal), Roots, Seen); 852*0fca6ea1SDimitry Andric } else { 853*0fca6ea1SDimitry Andric llvm_unreachable("getPossibleRsrcParts() only works on phi and select"); 854*0fca6ea1SDimitry Andric } 855*0fca6ea1SDimitry Andric } 856*0fca6ea1SDimitry Andric 857*0fca6ea1SDimitry Andric void SplitPtrStructs::processConditionals() { 858*0fca6ea1SDimitry Andric SmallDenseMap<Instruction *, Value *> FoundRsrcs; 859*0fca6ea1SDimitry Andric SmallPtrSet<Value *, 4> Roots; 860*0fca6ea1SDimitry Andric SmallPtrSet<Value *, 4> Seen; 861*0fca6ea1SDimitry Andric for (Instruction *I : Conditionals) { 862*0fca6ea1SDimitry Andric // These have to exist by now because we've visited these nodes. 863*0fca6ea1SDimitry Andric Value *Rsrc = RsrcParts[I]; 864*0fca6ea1SDimitry Andric Value *Off = OffParts[I]; 865*0fca6ea1SDimitry Andric assert(Rsrc && Off && "must have visited conditionals by now"); 866*0fca6ea1SDimitry Andric 867*0fca6ea1SDimitry Andric std::optional<Value *> MaybeRsrc; 868*0fca6ea1SDimitry Andric auto MaybeFoundRsrc = FoundRsrcs.find(I); 869*0fca6ea1SDimitry Andric if (MaybeFoundRsrc != FoundRsrcs.end()) { 870*0fca6ea1SDimitry Andric MaybeRsrc = MaybeFoundRsrc->second; 871*0fca6ea1SDimitry Andric } else { 872*0fca6ea1SDimitry Andric IRBuilder<>::InsertPointGuard Guard(IRB); 873*0fca6ea1SDimitry Andric Roots.clear(); 874*0fca6ea1SDimitry Andric Seen.clear(); 875*0fca6ea1SDimitry Andric getPossibleRsrcRoots(I, Roots, Seen); 876*0fca6ea1SDimitry Andric LLVM_DEBUG(dbgs() << "Processing conditional: " << *I << "\n"); 877*0fca6ea1SDimitry Andric #ifndef NDEBUG 878*0fca6ea1SDimitry Andric for (Value *V : Roots) 879*0fca6ea1SDimitry Andric LLVM_DEBUG(dbgs() << "Root: " << *V << "\n"); 880*0fca6ea1SDimitry Andric for (Value *V : Seen) 881*0fca6ea1SDimitry Andric LLVM_DEBUG(dbgs() << "Seen: " << *V << "\n"); 882*0fca6ea1SDimitry Andric #endif 883*0fca6ea1SDimitry Andric // If we are our own possible root, then we shouldn't block our 884*0fca6ea1SDimitry Andric // replacement with a valid incoming value. 885*0fca6ea1SDimitry Andric Roots.erase(I); 886*0fca6ea1SDimitry Andric // We don't want to block the optimization for conditionals that don't 887*0fca6ea1SDimitry Andric // refer to themselves but did see themselves during the traversal. 888*0fca6ea1SDimitry Andric Seen.erase(I); 889*0fca6ea1SDimitry Andric 890*0fca6ea1SDimitry Andric if (set_is_subset(Seen, Roots)) { 891*0fca6ea1SDimitry Andric auto Diff = set_difference(Roots, Seen); 892*0fca6ea1SDimitry Andric if (Diff.size() == 1) { 893*0fca6ea1SDimitry Andric Value *RootVal = *Diff.begin(); 894*0fca6ea1SDimitry Andric // Handle the case where previous loops already looked through 895*0fca6ea1SDimitry Andric // an addrspacecast. 896*0fca6ea1SDimitry Andric if (isSplitFatPtr(RootVal->getType())) 897*0fca6ea1SDimitry Andric MaybeRsrc = std::get<0>(getPtrParts(RootVal)); 898*0fca6ea1SDimitry Andric else 899*0fca6ea1SDimitry Andric MaybeRsrc = RootVal; 900*0fca6ea1SDimitry Andric } 901*0fca6ea1SDimitry Andric } 902*0fca6ea1SDimitry Andric } 903*0fca6ea1SDimitry Andric 904*0fca6ea1SDimitry Andric if (auto *PHI = dyn_cast<PHINode>(I)) { 905*0fca6ea1SDimitry Andric Value *NewRsrc; 906*0fca6ea1SDimitry Andric StructType *PHITy = cast<StructType>(PHI->getType()); 907*0fca6ea1SDimitry Andric IRB.SetInsertPoint(*PHI->getInsertionPointAfterDef()); 908*0fca6ea1SDimitry Andric IRB.SetCurrentDebugLocation(PHI->getDebugLoc()); 909*0fca6ea1SDimitry Andric if (MaybeRsrc) { 910*0fca6ea1SDimitry Andric NewRsrc = *MaybeRsrc; 911*0fca6ea1SDimitry Andric } else { 912*0fca6ea1SDimitry Andric Type *RsrcTy = PHITy->getElementType(0); 913*0fca6ea1SDimitry Andric auto *RsrcPHI = IRB.CreatePHI(RsrcTy, PHI->getNumIncomingValues()); 914*0fca6ea1SDimitry Andric RsrcPHI->takeName(Rsrc); 915*0fca6ea1SDimitry Andric for (auto [V, BB] : llvm::zip(PHI->incoming_values(), PHI->blocks())) { 916*0fca6ea1SDimitry Andric Value *VRsrc = std::get<0>(getPtrParts(V)); 917*0fca6ea1SDimitry Andric RsrcPHI->addIncoming(VRsrc, BB); 918*0fca6ea1SDimitry Andric } 919*0fca6ea1SDimitry Andric copyMetadata(RsrcPHI, PHI); 920*0fca6ea1SDimitry Andric NewRsrc = RsrcPHI; 921*0fca6ea1SDimitry Andric } 922*0fca6ea1SDimitry Andric 923*0fca6ea1SDimitry Andric Type *OffTy = PHITy->getElementType(1); 924*0fca6ea1SDimitry Andric auto *NewOff = IRB.CreatePHI(OffTy, PHI->getNumIncomingValues()); 925*0fca6ea1SDimitry Andric NewOff->takeName(Off); 926*0fca6ea1SDimitry Andric for (auto [V, BB] : llvm::zip(PHI->incoming_values(), PHI->blocks())) { 927*0fca6ea1SDimitry Andric assert(OffParts.count(V) && "An offset part had to be created by now"); 928*0fca6ea1SDimitry Andric Value *VOff = std::get<1>(getPtrParts(V)); 929*0fca6ea1SDimitry Andric NewOff->addIncoming(VOff, BB); 930*0fca6ea1SDimitry Andric } 931*0fca6ea1SDimitry Andric copyMetadata(NewOff, PHI); 932*0fca6ea1SDimitry Andric 933*0fca6ea1SDimitry Andric // Note: We don't eraseFromParent() the temporaries because we don't want 934*0fca6ea1SDimitry Andric // to put the corrections maps in an inconstent state. That'll be handed 935*0fca6ea1SDimitry Andric // during the rest of the killing. Also, `ValueToValueMapTy` guarantees 936*0fca6ea1SDimitry Andric // that references in that map will be updated as well. 937*0fca6ea1SDimitry Andric ConditionalTemps.push_back(cast<Instruction>(Rsrc)); 938*0fca6ea1SDimitry Andric ConditionalTemps.push_back(cast<Instruction>(Off)); 939*0fca6ea1SDimitry Andric Rsrc->replaceAllUsesWith(NewRsrc); 940*0fca6ea1SDimitry Andric Off->replaceAllUsesWith(NewOff); 941*0fca6ea1SDimitry Andric 942*0fca6ea1SDimitry Andric // Save on recomputing the cycle traversals in known-root cases. 943*0fca6ea1SDimitry Andric if (MaybeRsrc) 944*0fca6ea1SDimitry Andric for (Value *V : Seen) 945*0fca6ea1SDimitry Andric FoundRsrcs[cast<Instruction>(V)] = NewRsrc; 946*0fca6ea1SDimitry Andric } else if (isa<SelectInst>(I)) { 947*0fca6ea1SDimitry Andric if (MaybeRsrc) { 948*0fca6ea1SDimitry Andric ConditionalTemps.push_back(cast<Instruction>(Rsrc)); 949*0fca6ea1SDimitry Andric Rsrc->replaceAllUsesWith(*MaybeRsrc); 950*0fca6ea1SDimitry Andric for (Value *V : Seen) 951*0fca6ea1SDimitry Andric FoundRsrcs[cast<Instruction>(V)] = *MaybeRsrc; 952*0fca6ea1SDimitry Andric } 953*0fca6ea1SDimitry Andric } else { 954*0fca6ea1SDimitry Andric llvm_unreachable("Only PHIs and selects go in the conditionals list"); 955*0fca6ea1SDimitry Andric } 956*0fca6ea1SDimitry Andric } 957*0fca6ea1SDimitry Andric } 958*0fca6ea1SDimitry Andric 959*0fca6ea1SDimitry Andric void SplitPtrStructs::killAndReplaceSplitInstructions( 960*0fca6ea1SDimitry Andric SmallVectorImpl<Instruction *> &Origs) { 961*0fca6ea1SDimitry Andric for (Instruction *I : ConditionalTemps) 962*0fca6ea1SDimitry Andric I->eraseFromParent(); 963*0fca6ea1SDimitry Andric 964*0fca6ea1SDimitry Andric for (Instruction *I : Origs) { 965*0fca6ea1SDimitry Andric if (!SplitUsers.contains(I)) 966*0fca6ea1SDimitry Andric continue; 967*0fca6ea1SDimitry Andric 968*0fca6ea1SDimitry Andric SmallVector<DbgValueInst *> Dbgs; 969*0fca6ea1SDimitry Andric findDbgValues(Dbgs, I); 970*0fca6ea1SDimitry Andric for (auto *Dbg : Dbgs) { 971*0fca6ea1SDimitry Andric IRB.SetInsertPoint(Dbg); 972*0fca6ea1SDimitry Andric auto &DL = I->getDataLayout(); 973*0fca6ea1SDimitry Andric assert(isSplitFatPtr(I->getType()) && 974*0fca6ea1SDimitry Andric "We should've RAUW'd away loads, stores, etc. at this point"); 975*0fca6ea1SDimitry Andric auto *OffDbg = cast<DbgValueInst>(Dbg->clone()); 976*0fca6ea1SDimitry Andric copyMetadata(OffDbg, Dbg); 977*0fca6ea1SDimitry Andric auto [Rsrc, Off] = getPtrParts(I); 978*0fca6ea1SDimitry Andric 979*0fca6ea1SDimitry Andric int64_t RsrcSz = DL.getTypeSizeInBits(Rsrc->getType()); 980*0fca6ea1SDimitry Andric int64_t OffSz = DL.getTypeSizeInBits(Off->getType()); 981*0fca6ea1SDimitry Andric 982*0fca6ea1SDimitry Andric std::optional<DIExpression *> RsrcExpr = 983*0fca6ea1SDimitry Andric DIExpression::createFragmentExpression(Dbg->getExpression(), 0, 984*0fca6ea1SDimitry Andric RsrcSz); 985*0fca6ea1SDimitry Andric std::optional<DIExpression *> OffExpr = 986*0fca6ea1SDimitry Andric DIExpression::createFragmentExpression(Dbg->getExpression(), RsrcSz, 987*0fca6ea1SDimitry Andric OffSz); 988*0fca6ea1SDimitry Andric if (OffExpr) { 989*0fca6ea1SDimitry Andric OffDbg->setExpression(*OffExpr); 990*0fca6ea1SDimitry Andric OffDbg->replaceVariableLocationOp(I, Off); 991*0fca6ea1SDimitry Andric IRB.Insert(OffDbg); 992*0fca6ea1SDimitry Andric } else { 993*0fca6ea1SDimitry Andric OffDbg->deleteValue(); 994*0fca6ea1SDimitry Andric } 995*0fca6ea1SDimitry Andric if (RsrcExpr) { 996*0fca6ea1SDimitry Andric Dbg->setExpression(*RsrcExpr); 997*0fca6ea1SDimitry Andric Dbg->replaceVariableLocationOp(I, Rsrc); 998*0fca6ea1SDimitry Andric } else { 999*0fca6ea1SDimitry Andric Dbg->replaceVariableLocationOp(I, UndefValue::get(I->getType())); 1000*0fca6ea1SDimitry Andric } 1001*0fca6ea1SDimitry Andric } 1002*0fca6ea1SDimitry Andric 1003*0fca6ea1SDimitry Andric Value *Poison = PoisonValue::get(I->getType()); 1004*0fca6ea1SDimitry Andric I->replaceUsesWithIf(Poison, [&](const Use &U) -> bool { 1005*0fca6ea1SDimitry Andric if (const auto *UI = dyn_cast<Instruction>(U.getUser())) 1006*0fca6ea1SDimitry Andric return SplitUsers.contains(UI); 1007*0fca6ea1SDimitry Andric return false; 1008*0fca6ea1SDimitry Andric }); 1009*0fca6ea1SDimitry Andric 1010*0fca6ea1SDimitry Andric if (I->use_empty()) { 1011*0fca6ea1SDimitry Andric I->eraseFromParent(); 1012*0fca6ea1SDimitry Andric continue; 1013*0fca6ea1SDimitry Andric } 1014*0fca6ea1SDimitry Andric IRB.SetInsertPoint(*I->getInsertionPointAfterDef()); 1015*0fca6ea1SDimitry Andric IRB.SetCurrentDebugLocation(I->getDebugLoc()); 1016*0fca6ea1SDimitry Andric auto [Rsrc, Off] = getPtrParts(I); 1017*0fca6ea1SDimitry Andric Value *Struct = PoisonValue::get(I->getType()); 1018*0fca6ea1SDimitry Andric Struct = IRB.CreateInsertValue(Struct, Rsrc, 0); 1019*0fca6ea1SDimitry Andric Struct = IRB.CreateInsertValue(Struct, Off, 1); 1020*0fca6ea1SDimitry Andric copyMetadata(Struct, I); 1021*0fca6ea1SDimitry Andric Struct->takeName(I); 1022*0fca6ea1SDimitry Andric I->replaceAllUsesWith(Struct); 1023*0fca6ea1SDimitry Andric I->eraseFromParent(); 1024*0fca6ea1SDimitry Andric } 1025*0fca6ea1SDimitry Andric } 1026*0fca6ea1SDimitry Andric 1027*0fca6ea1SDimitry Andric void SplitPtrStructs::setAlign(CallInst *Intr, Align A, unsigned RsrcArgIdx) { 1028*0fca6ea1SDimitry Andric LLVMContext &Ctx = Intr->getContext(); 1029*0fca6ea1SDimitry Andric Intr->addParamAttr(RsrcArgIdx, Attribute::getWithAlignment(Ctx, A)); 1030*0fca6ea1SDimitry Andric } 1031*0fca6ea1SDimitry Andric 1032*0fca6ea1SDimitry Andric void SplitPtrStructs::insertPreMemOpFence(AtomicOrdering Order, 1033*0fca6ea1SDimitry Andric SyncScope::ID SSID) { 1034*0fca6ea1SDimitry Andric switch (Order) { 1035*0fca6ea1SDimitry Andric case AtomicOrdering::Release: 1036*0fca6ea1SDimitry Andric case AtomicOrdering::AcquireRelease: 1037*0fca6ea1SDimitry Andric case AtomicOrdering::SequentiallyConsistent: 1038*0fca6ea1SDimitry Andric IRB.CreateFence(AtomicOrdering::Release, SSID); 1039*0fca6ea1SDimitry Andric break; 1040*0fca6ea1SDimitry Andric default: 1041*0fca6ea1SDimitry Andric break; 1042*0fca6ea1SDimitry Andric } 1043*0fca6ea1SDimitry Andric } 1044*0fca6ea1SDimitry Andric 1045*0fca6ea1SDimitry Andric void SplitPtrStructs::insertPostMemOpFence(AtomicOrdering Order, 1046*0fca6ea1SDimitry Andric SyncScope::ID SSID) { 1047*0fca6ea1SDimitry Andric switch (Order) { 1048*0fca6ea1SDimitry Andric case AtomicOrdering::Acquire: 1049*0fca6ea1SDimitry Andric case AtomicOrdering::AcquireRelease: 1050*0fca6ea1SDimitry Andric case AtomicOrdering::SequentiallyConsistent: 1051*0fca6ea1SDimitry Andric IRB.CreateFence(AtomicOrdering::Acquire, SSID); 1052*0fca6ea1SDimitry Andric break; 1053*0fca6ea1SDimitry Andric default: 1054*0fca6ea1SDimitry Andric break; 1055*0fca6ea1SDimitry Andric } 1056*0fca6ea1SDimitry Andric } 1057*0fca6ea1SDimitry Andric 1058*0fca6ea1SDimitry Andric Value *SplitPtrStructs::handleMemoryInst(Instruction *I, Value *Arg, Value *Ptr, 1059*0fca6ea1SDimitry Andric Type *Ty, Align Alignment, 1060*0fca6ea1SDimitry Andric AtomicOrdering Order, bool IsVolatile, 1061*0fca6ea1SDimitry Andric SyncScope::ID SSID) { 1062*0fca6ea1SDimitry Andric IRB.SetInsertPoint(I); 1063*0fca6ea1SDimitry Andric 1064*0fca6ea1SDimitry Andric auto [Rsrc, Off] = getPtrParts(Ptr); 1065*0fca6ea1SDimitry Andric SmallVector<Value *, 5> Args; 1066*0fca6ea1SDimitry Andric if (Arg) 1067*0fca6ea1SDimitry Andric Args.push_back(Arg); 1068*0fca6ea1SDimitry Andric Args.push_back(Rsrc); 1069*0fca6ea1SDimitry Andric Args.push_back(Off); 1070*0fca6ea1SDimitry Andric insertPreMemOpFence(Order, SSID); 1071*0fca6ea1SDimitry Andric // soffset is always 0 for these cases, where we always want any offset to be 1072*0fca6ea1SDimitry Andric // part of bounds checking and we don't know which parts of the GEPs is 1073*0fca6ea1SDimitry Andric // uniform. 1074*0fca6ea1SDimitry Andric Args.push_back(IRB.getInt32(0)); 1075*0fca6ea1SDimitry Andric 1076*0fca6ea1SDimitry Andric uint32_t Aux = 0; 1077*0fca6ea1SDimitry Andric bool IsInvariant = 1078*0fca6ea1SDimitry Andric (isa<LoadInst>(I) && I->getMetadata(LLVMContext::MD_invariant_load)); 1079*0fca6ea1SDimitry Andric bool IsNonTemporal = I->getMetadata(LLVMContext::MD_nontemporal); 1080*0fca6ea1SDimitry Andric // Atomic loads and stores need glc, atomic read-modify-write doesn't. 1081*0fca6ea1SDimitry Andric bool IsOneWayAtomic = 1082*0fca6ea1SDimitry Andric !isa<AtomicRMWInst>(I) && Order != AtomicOrdering::NotAtomic; 1083*0fca6ea1SDimitry Andric if (IsOneWayAtomic) 1084*0fca6ea1SDimitry Andric Aux |= AMDGPU::CPol::GLC; 1085*0fca6ea1SDimitry Andric if (IsNonTemporal && !IsInvariant) 1086*0fca6ea1SDimitry Andric Aux |= AMDGPU::CPol::SLC; 1087*0fca6ea1SDimitry Andric if (isa<LoadInst>(I) && ST->getGeneration() == AMDGPUSubtarget::GFX10) 1088*0fca6ea1SDimitry Andric Aux |= (Aux & AMDGPU::CPol::GLC ? AMDGPU::CPol::DLC : 0); 1089*0fca6ea1SDimitry Andric if (IsVolatile) 1090*0fca6ea1SDimitry Andric Aux |= AMDGPU::CPol::VOLATILE; 1091*0fca6ea1SDimitry Andric Args.push_back(IRB.getInt32(Aux)); 1092*0fca6ea1SDimitry Andric 1093*0fca6ea1SDimitry Andric Intrinsic::ID IID = Intrinsic::not_intrinsic; 1094*0fca6ea1SDimitry Andric if (isa<LoadInst>(I)) 1095*0fca6ea1SDimitry Andric IID = Order == AtomicOrdering::NotAtomic 1096*0fca6ea1SDimitry Andric ? Intrinsic::amdgcn_raw_ptr_buffer_load 1097*0fca6ea1SDimitry Andric : Intrinsic::amdgcn_raw_ptr_atomic_buffer_load; 1098*0fca6ea1SDimitry Andric else if (isa<StoreInst>(I)) 1099*0fca6ea1SDimitry Andric IID = Intrinsic::amdgcn_raw_ptr_buffer_store; 1100*0fca6ea1SDimitry Andric else if (auto *RMW = dyn_cast<AtomicRMWInst>(I)) { 1101*0fca6ea1SDimitry Andric switch (RMW->getOperation()) { 1102*0fca6ea1SDimitry Andric case AtomicRMWInst::Xchg: 1103*0fca6ea1SDimitry Andric IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_swap; 1104*0fca6ea1SDimitry Andric break; 1105*0fca6ea1SDimitry Andric case AtomicRMWInst::Add: 1106*0fca6ea1SDimitry Andric IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_add; 1107*0fca6ea1SDimitry Andric break; 1108*0fca6ea1SDimitry Andric case AtomicRMWInst::Sub: 1109*0fca6ea1SDimitry Andric IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_sub; 1110*0fca6ea1SDimitry Andric break; 1111*0fca6ea1SDimitry Andric case AtomicRMWInst::And: 1112*0fca6ea1SDimitry Andric IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_and; 1113*0fca6ea1SDimitry Andric break; 1114*0fca6ea1SDimitry Andric case AtomicRMWInst::Or: 1115*0fca6ea1SDimitry Andric IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_or; 1116*0fca6ea1SDimitry Andric break; 1117*0fca6ea1SDimitry Andric case AtomicRMWInst::Xor: 1118*0fca6ea1SDimitry Andric IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_xor; 1119*0fca6ea1SDimitry Andric break; 1120*0fca6ea1SDimitry Andric case AtomicRMWInst::Max: 1121*0fca6ea1SDimitry Andric IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_smax; 1122*0fca6ea1SDimitry Andric break; 1123*0fca6ea1SDimitry Andric case AtomicRMWInst::Min: 1124*0fca6ea1SDimitry Andric IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_smin; 1125*0fca6ea1SDimitry Andric break; 1126*0fca6ea1SDimitry Andric case AtomicRMWInst::UMax: 1127*0fca6ea1SDimitry Andric IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_umax; 1128*0fca6ea1SDimitry Andric break; 1129*0fca6ea1SDimitry Andric case AtomicRMWInst::UMin: 1130*0fca6ea1SDimitry Andric IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_umin; 1131*0fca6ea1SDimitry Andric break; 1132*0fca6ea1SDimitry Andric case AtomicRMWInst::FAdd: 1133*0fca6ea1SDimitry Andric IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_fadd; 1134*0fca6ea1SDimitry Andric break; 1135*0fca6ea1SDimitry Andric case AtomicRMWInst::FMax: 1136*0fca6ea1SDimitry Andric IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_fmax; 1137*0fca6ea1SDimitry Andric break; 1138*0fca6ea1SDimitry Andric case AtomicRMWInst::FMin: 1139*0fca6ea1SDimitry Andric IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_fmin; 1140*0fca6ea1SDimitry Andric break; 1141*0fca6ea1SDimitry Andric case AtomicRMWInst::FSub: { 1142*0fca6ea1SDimitry Andric report_fatal_error("atomic floating point subtraction not supported for " 1143*0fca6ea1SDimitry Andric "buffer resources and should've been expanded away"); 1144*0fca6ea1SDimitry Andric break; 1145*0fca6ea1SDimitry Andric } 1146*0fca6ea1SDimitry Andric case AtomicRMWInst::Nand: 1147*0fca6ea1SDimitry Andric report_fatal_error("atomic nand not supported for buffer resources and " 1148*0fca6ea1SDimitry Andric "should've been expanded away"); 1149*0fca6ea1SDimitry Andric break; 1150*0fca6ea1SDimitry Andric case AtomicRMWInst::UIncWrap: 1151*0fca6ea1SDimitry Andric case AtomicRMWInst::UDecWrap: 1152*0fca6ea1SDimitry Andric report_fatal_error("wrapping increment/decrement not supported for " 1153*0fca6ea1SDimitry Andric "buffer resources and should've ben expanded away"); 1154*0fca6ea1SDimitry Andric break; 1155*0fca6ea1SDimitry Andric case AtomicRMWInst::BAD_BINOP: 1156*0fca6ea1SDimitry Andric llvm_unreachable("Not sure how we got a bad binop"); 1157*0fca6ea1SDimitry Andric } 1158*0fca6ea1SDimitry Andric } 1159*0fca6ea1SDimitry Andric 1160*0fca6ea1SDimitry Andric auto *Call = IRB.CreateIntrinsic(IID, Ty, Args); 1161*0fca6ea1SDimitry Andric copyMetadata(Call, I); 1162*0fca6ea1SDimitry Andric setAlign(Call, Alignment, Arg ? 1 : 0); 1163*0fca6ea1SDimitry Andric Call->takeName(I); 1164*0fca6ea1SDimitry Andric 1165*0fca6ea1SDimitry Andric insertPostMemOpFence(Order, SSID); 1166*0fca6ea1SDimitry Andric // The "no moving p7 directly" rewrites ensure that this load or store won't 1167*0fca6ea1SDimitry Andric // itself need to be split into parts. 1168*0fca6ea1SDimitry Andric SplitUsers.insert(I); 1169*0fca6ea1SDimitry Andric I->replaceAllUsesWith(Call); 1170*0fca6ea1SDimitry Andric return Call; 1171*0fca6ea1SDimitry Andric } 1172*0fca6ea1SDimitry Andric 1173*0fca6ea1SDimitry Andric PtrParts SplitPtrStructs::visitInstruction(Instruction &I) { 1174*0fca6ea1SDimitry Andric return {nullptr, nullptr}; 1175*0fca6ea1SDimitry Andric } 1176*0fca6ea1SDimitry Andric 1177*0fca6ea1SDimitry Andric PtrParts SplitPtrStructs::visitLoadInst(LoadInst &LI) { 1178*0fca6ea1SDimitry Andric if (!isSplitFatPtr(LI.getPointerOperandType())) 1179*0fca6ea1SDimitry Andric return {nullptr, nullptr}; 1180*0fca6ea1SDimitry Andric handleMemoryInst(&LI, nullptr, LI.getPointerOperand(), LI.getType(), 1181*0fca6ea1SDimitry Andric LI.getAlign(), LI.getOrdering(), LI.isVolatile(), 1182*0fca6ea1SDimitry Andric LI.getSyncScopeID()); 1183*0fca6ea1SDimitry Andric return {nullptr, nullptr}; 1184*0fca6ea1SDimitry Andric } 1185*0fca6ea1SDimitry Andric 1186*0fca6ea1SDimitry Andric PtrParts SplitPtrStructs::visitStoreInst(StoreInst &SI) { 1187*0fca6ea1SDimitry Andric if (!isSplitFatPtr(SI.getPointerOperandType())) 1188*0fca6ea1SDimitry Andric return {nullptr, nullptr}; 1189*0fca6ea1SDimitry Andric Value *Arg = SI.getValueOperand(); 1190*0fca6ea1SDimitry Andric handleMemoryInst(&SI, Arg, SI.getPointerOperand(), Arg->getType(), 1191*0fca6ea1SDimitry Andric SI.getAlign(), SI.getOrdering(), SI.isVolatile(), 1192*0fca6ea1SDimitry Andric SI.getSyncScopeID()); 1193*0fca6ea1SDimitry Andric return {nullptr, nullptr}; 1194*0fca6ea1SDimitry Andric } 1195*0fca6ea1SDimitry Andric 1196*0fca6ea1SDimitry Andric PtrParts SplitPtrStructs::visitAtomicRMWInst(AtomicRMWInst &AI) { 1197*0fca6ea1SDimitry Andric if (!isSplitFatPtr(AI.getPointerOperand()->getType())) 1198*0fca6ea1SDimitry Andric return {nullptr, nullptr}; 1199*0fca6ea1SDimitry Andric Value *Arg = AI.getValOperand(); 1200*0fca6ea1SDimitry Andric handleMemoryInst(&AI, Arg, AI.getPointerOperand(), Arg->getType(), 1201*0fca6ea1SDimitry Andric AI.getAlign(), AI.getOrdering(), AI.isVolatile(), 1202*0fca6ea1SDimitry Andric AI.getSyncScopeID()); 1203*0fca6ea1SDimitry Andric return {nullptr, nullptr}; 1204*0fca6ea1SDimitry Andric } 1205*0fca6ea1SDimitry Andric 1206*0fca6ea1SDimitry Andric // Unlike load, store, and RMW, cmpxchg needs special handling to account 1207*0fca6ea1SDimitry Andric // for the boolean argument. 1208*0fca6ea1SDimitry Andric PtrParts SplitPtrStructs::visitAtomicCmpXchgInst(AtomicCmpXchgInst &AI) { 1209*0fca6ea1SDimitry Andric Value *Ptr = AI.getPointerOperand(); 1210*0fca6ea1SDimitry Andric if (!isSplitFatPtr(Ptr->getType())) 1211*0fca6ea1SDimitry Andric return {nullptr, nullptr}; 1212*0fca6ea1SDimitry Andric IRB.SetInsertPoint(&AI); 1213*0fca6ea1SDimitry Andric 1214*0fca6ea1SDimitry Andric Type *Ty = AI.getNewValOperand()->getType(); 1215*0fca6ea1SDimitry Andric AtomicOrdering Order = AI.getMergedOrdering(); 1216*0fca6ea1SDimitry Andric SyncScope::ID SSID = AI.getSyncScopeID(); 1217*0fca6ea1SDimitry Andric bool IsNonTemporal = AI.getMetadata(LLVMContext::MD_nontemporal); 1218*0fca6ea1SDimitry Andric 1219*0fca6ea1SDimitry Andric auto [Rsrc, Off] = getPtrParts(Ptr); 1220*0fca6ea1SDimitry Andric insertPreMemOpFence(Order, SSID); 1221*0fca6ea1SDimitry Andric 1222*0fca6ea1SDimitry Andric uint32_t Aux = 0; 1223*0fca6ea1SDimitry Andric if (IsNonTemporal) 1224*0fca6ea1SDimitry Andric Aux |= AMDGPU::CPol::SLC; 1225*0fca6ea1SDimitry Andric if (AI.isVolatile()) 1226*0fca6ea1SDimitry Andric Aux |= AMDGPU::CPol::VOLATILE; 1227*0fca6ea1SDimitry Andric auto *Call = 1228*0fca6ea1SDimitry Andric IRB.CreateIntrinsic(Intrinsic::amdgcn_raw_ptr_buffer_atomic_cmpswap, Ty, 1229*0fca6ea1SDimitry Andric {AI.getNewValOperand(), AI.getCompareOperand(), Rsrc, 1230*0fca6ea1SDimitry Andric Off, IRB.getInt32(0), IRB.getInt32(Aux)}); 1231*0fca6ea1SDimitry Andric copyMetadata(Call, &AI); 1232*0fca6ea1SDimitry Andric setAlign(Call, AI.getAlign(), 2); 1233*0fca6ea1SDimitry Andric Call->takeName(&AI); 1234*0fca6ea1SDimitry Andric insertPostMemOpFence(Order, SSID); 1235*0fca6ea1SDimitry Andric 1236*0fca6ea1SDimitry Andric Value *Res = PoisonValue::get(AI.getType()); 1237*0fca6ea1SDimitry Andric Res = IRB.CreateInsertValue(Res, Call, 0); 1238*0fca6ea1SDimitry Andric if (!AI.isWeak()) { 1239*0fca6ea1SDimitry Andric Value *Succeeded = IRB.CreateICmpEQ(Call, AI.getCompareOperand()); 1240*0fca6ea1SDimitry Andric Res = IRB.CreateInsertValue(Res, Succeeded, 1); 1241*0fca6ea1SDimitry Andric } 1242*0fca6ea1SDimitry Andric SplitUsers.insert(&AI); 1243*0fca6ea1SDimitry Andric AI.replaceAllUsesWith(Res); 1244*0fca6ea1SDimitry Andric return {nullptr, nullptr}; 1245*0fca6ea1SDimitry Andric } 1246*0fca6ea1SDimitry Andric 1247*0fca6ea1SDimitry Andric PtrParts SplitPtrStructs::visitGetElementPtrInst(GetElementPtrInst &GEP) { 1248*0fca6ea1SDimitry Andric using namespace llvm::PatternMatch; 1249*0fca6ea1SDimitry Andric Value *Ptr = GEP.getPointerOperand(); 1250*0fca6ea1SDimitry Andric if (!isSplitFatPtr(Ptr->getType())) 1251*0fca6ea1SDimitry Andric return {nullptr, nullptr}; 1252*0fca6ea1SDimitry Andric IRB.SetInsertPoint(&GEP); 1253*0fca6ea1SDimitry Andric 1254*0fca6ea1SDimitry Andric auto [Rsrc, Off] = getPtrParts(Ptr); 1255*0fca6ea1SDimitry Andric const DataLayout &DL = GEP.getDataLayout(); 1256*0fca6ea1SDimitry Andric bool InBounds = GEP.isInBounds(); 1257*0fca6ea1SDimitry Andric 1258*0fca6ea1SDimitry Andric // In order to call emitGEPOffset() and thus not have to reimplement it, 1259*0fca6ea1SDimitry Andric // we need the GEP result to have ptr addrspace(7) type. 1260*0fca6ea1SDimitry Andric Type *FatPtrTy = IRB.getPtrTy(AMDGPUAS::BUFFER_FAT_POINTER); 1261*0fca6ea1SDimitry Andric if (auto *VT = dyn_cast<VectorType>(Off->getType())) 1262*0fca6ea1SDimitry Andric FatPtrTy = VectorType::get(FatPtrTy, VT->getElementCount()); 1263*0fca6ea1SDimitry Andric GEP.mutateType(FatPtrTy); 1264*0fca6ea1SDimitry Andric Value *OffAccum = emitGEPOffset(&IRB, DL, &GEP); 1265*0fca6ea1SDimitry Andric GEP.mutateType(Ptr->getType()); 1266*0fca6ea1SDimitry Andric if (match(OffAccum, m_Zero())) { // Constant-zero offset 1267*0fca6ea1SDimitry Andric SplitUsers.insert(&GEP); 1268*0fca6ea1SDimitry Andric return {Rsrc, Off}; 1269*0fca6ea1SDimitry Andric } 1270*0fca6ea1SDimitry Andric 1271*0fca6ea1SDimitry Andric bool HasNonNegativeOff = false; 1272*0fca6ea1SDimitry Andric if (auto *CI = dyn_cast<ConstantInt>(OffAccum)) { 1273*0fca6ea1SDimitry Andric HasNonNegativeOff = !CI->isNegative(); 1274*0fca6ea1SDimitry Andric } 1275*0fca6ea1SDimitry Andric Value *NewOff; 1276*0fca6ea1SDimitry Andric if (match(Off, m_Zero())) { 1277*0fca6ea1SDimitry Andric NewOff = OffAccum; 1278*0fca6ea1SDimitry Andric } else { 1279*0fca6ea1SDimitry Andric NewOff = IRB.CreateAdd(Off, OffAccum, "", 1280*0fca6ea1SDimitry Andric /*hasNUW=*/InBounds && HasNonNegativeOff, 1281*0fca6ea1SDimitry Andric /*hasNSW=*/false); 1282*0fca6ea1SDimitry Andric } 1283*0fca6ea1SDimitry Andric copyMetadata(NewOff, &GEP); 1284*0fca6ea1SDimitry Andric NewOff->takeName(&GEP); 1285*0fca6ea1SDimitry Andric SplitUsers.insert(&GEP); 1286*0fca6ea1SDimitry Andric return {Rsrc, NewOff}; 1287*0fca6ea1SDimitry Andric } 1288*0fca6ea1SDimitry Andric 1289*0fca6ea1SDimitry Andric PtrParts SplitPtrStructs::visitPtrToIntInst(PtrToIntInst &PI) { 1290*0fca6ea1SDimitry Andric Value *Ptr = PI.getPointerOperand(); 1291*0fca6ea1SDimitry Andric if (!isSplitFatPtr(Ptr->getType())) 1292*0fca6ea1SDimitry Andric return {nullptr, nullptr}; 1293*0fca6ea1SDimitry Andric IRB.SetInsertPoint(&PI); 1294*0fca6ea1SDimitry Andric 1295*0fca6ea1SDimitry Andric Type *ResTy = PI.getType(); 1296*0fca6ea1SDimitry Andric unsigned Width = ResTy->getScalarSizeInBits(); 1297*0fca6ea1SDimitry Andric 1298*0fca6ea1SDimitry Andric auto [Rsrc, Off] = getPtrParts(Ptr); 1299*0fca6ea1SDimitry Andric const DataLayout &DL = PI.getDataLayout(); 1300*0fca6ea1SDimitry Andric unsigned FatPtrWidth = DL.getPointerSizeInBits(AMDGPUAS::BUFFER_FAT_POINTER); 1301*0fca6ea1SDimitry Andric 1302*0fca6ea1SDimitry Andric Value *Res; 1303*0fca6ea1SDimitry Andric if (Width <= BufferOffsetWidth) { 1304*0fca6ea1SDimitry Andric Res = IRB.CreateIntCast(Off, ResTy, /*isSigned=*/false, 1305*0fca6ea1SDimitry Andric PI.getName() + ".off"); 1306*0fca6ea1SDimitry Andric } else { 1307*0fca6ea1SDimitry Andric Value *RsrcInt = IRB.CreatePtrToInt(Rsrc, ResTy, PI.getName() + ".rsrc"); 1308*0fca6ea1SDimitry Andric Value *Shl = IRB.CreateShl( 1309*0fca6ea1SDimitry Andric RsrcInt, 1310*0fca6ea1SDimitry Andric ConstantExpr::getIntegerValue(ResTy, APInt(Width, BufferOffsetWidth)), 1311*0fca6ea1SDimitry Andric "", Width >= FatPtrWidth, Width > FatPtrWidth); 1312*0fca6ea1SDimitry Andric Value *OffCast = IRB.CreateIntCast(Off, ResTy, /*isSigned=*/false, 1313*0fca6ea1SDimitry Andric PI.getName() + ".off"); 1314*0fca6ea1SDimitry Andric Res = IRB.CreateOr(Shl, OffCast); 1315*0fca6ea1SDimitry Andric } 1316*0fca6ea1SDimitry Andric 1317*0fca6ea1SDimitry Andric copyMetadata(Res, &PI); 1318*0fca6ea1SDimitry Andric Res->takeName(&PI); 1319*0fca6ea1SDimitry Andric SplitUsers.insert(&PI); 1320*0fca6ea1SDimitry Andric PI.replaceAllUsesWith(Res); 1321*0fca6ea1SDimitry Andric return {nullptr, nullptr}; 1322*0fca6ea1SDimitry Andric } 1323*0fca6ea1SDimitry Andric 1324*0fca6ea1SDimitry Andric PtrParts SplitPtrStructs::visitIntToPtrInst(IntToPtrInst &IP) { 1325*0fca6ea1SDimitry Andric if (!isSplitFatPtr(IP.getType())) 1326*0fca6ea1SDimitry Andric return {nullptr, nullptr}; 1327*0fca6ea1SDimitry Andric IRB.SetInsertPoint(&IP); 1328*0fca6ea1SDimitry Andric const DataLayout &DL = IP.getDataLayout(); 1329*0fca6ea1SDimitry Andric unsigned RsrcPtrWidth = DL.getPointerSizeInBits(AMDGPUAS::BUFFER_RESOURCE); 1330*0fca6ea1SDimitry Andric Value *Int = IP.getOperand(0); 1331*0fca6ea1SDimitry Andric Type *IntTy = Int->getType(); 1332*0fca6ea1SDimitry Andric Type *RsrcIntTy = IntTy->getWithNewBitWidth(RsrcPtrWidth); 1333*0fca6ea1SDimitry Andric unsigned Width = IntTy->getScalarSizeInBits(); 1334*0fca6ea1SDimitry Andric 1335*0fca6ea1SDimitry Andric auto *RetTy = cast<StructType>(IP.getType()); 1336*0fca6ea1SDimitry Andric Type *RsrcTy = RetTy->getElementType(0); 1337*0fca6ea1SDimitry Andric Type *OffTy = RetTy->getElementType(1); 1338*0fca6ea1SDimitry Andric Value *RsrcPart = IRB.CreateLShr( 1339*0fca6ea1SDimitry Andric Int, 1340*0fca6ea1SDimitry Andric ConstantExpr::getIntegerValue(IntTy, APInt(Width, BufferOffsetWidth))); 1341*0fca6ea1SDimitry Andric Value *RsrcInt = IRB.CreateIntCast(RsrcPart, RsrcIntTy, /*isSigned=*/false); 1342*0fca6ea1SDimitry Andric Value *Rsrc = IRB.CreateIntToPtr(RsrcInt, RsrcTy, IP.getName() + ".rsrc"); 1343*0fca6ea1SDimitry Andric Value *Off = 1344*0fca6ea1SDimitry Andric IRB.CreateIntCast(Int, OffTy, /*IsSigned=*/false, IP.getName() + ".off"); 1345*0fca6ea1SDimitry Andric 1346*0fca6ea1SDimitry Andric copyMetadata(Rsrc, &IP); 1347*0fca6ea1SDimitry Andric SplitUsers.insert(&IP); 1348*0fca6ea1SDimitry Andric return {Rsrc, Off}; 1349*0fca6ea1SDimitry Andric } 1350*0fca6ea1SDimitry Andric 1351*0fca6ea1SDimitry Andric PtrParts SplitPtrStructs::visitAddrSpaceCastInst(AddrSpaceCastInst &I) { 1352*0fca6ea1SDimitry Andric if (!isSplitFatPtr(I.getType())) 1353*0fca6ea1SDimitry Andric return {nullptr, nullptr}; 1354*0fca6ea1SDimitry Andric IRB.SetInsertPoint(&I); 1355*0fca6ea1SDimitry Andric Value *In = I.getPointerOperand(); 1356*0fca6ea1SDimitry Andric // No-op casts preserve parts 1357*0fca6ea1SDimitry Andric if (In->getType() == I.getType()) { 1358*0fca6ea1SDimitry Andric auto [Rsrc, Off] = getPtrParts(In); 1359*0fca6ea1SDimitry Andric SplitUsers.insert(&I); 1360*0fca6ea1SDimitry Andric return {Rsrc, Off}; 1361*0fca6ea1SDimitry Andric } 1362*0fca6ea1SDimitry Andric if (I.getSrcAddressSpace() != AMDGPUAS::BUFFER_RESOURCE) 1363*0fca6ea1SDimitry Andric report_fatal_error("Only buffer resources (addrspace 8) can be cast to " 1364*0fca6ea1SDimitry Andric "buffer fat pointers (addrspace 7)"); 1365*0fca6ea1SDimitry Andric Type *OffTy = cast<StructType>(I.getType())->getElementType(1); 1366*0fca6ea1SDimitry Andric Value *ZeroOff = Constant::getNullValue(OffTy); 1367*0fca6ea1SDimitry Andric SplitUsers.insert(&I); 1368*0fca6ea1SDimitry Andric return {In, ZeroOff}; 1369*0fca6ea1SDimitry Andric } 1370*0fca6ea1SDimitry Andric 1371*0fca6ea1SDimitry Andric PtrParts SplitPtrStructs::visitICmpInst(ICmpInst &Cmp) { 1372*0fca6ea1SDimitry Andric Value *Lhs = Cmp.getOperand(0); 1373*0fca6ea1SDimitry Andric if (!isSplitFatPtr(Lhs->getType())) 1374*0fca6ea1SDimitry Andric return {nullptr, nullptr}; 1375*0fca6ea1SDimitry Andric Value *Rhs = Cmp.getOperand(1); 1376*0fca6ea1SDimitry Andric IRB.SetInsertPoint(&Cmp); 1377*0fca6ea1SDimitry Andric ICmpInst::Predicate Pred = Cmp.getPredicate(); 1378*0fca6ea1SDimitry Andric 1379*0fca6ea1SDimitry Andric assert((Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_NE) && 1380*0fca6ea1SDimitry Andric "Pointer comparison is only equal or unequal"); 1381*0fca6ea1SDimitry Andric auto [LhsRsrc, LhsOff] = getPtrParts(Lhs); 1382*0fca6ea1SDimitry Andric auto [RhsRsrc, RhsOff] = getPtrParts(Rhs); 1383*0fca6ea1SDimitry Andric Value *RsrcCmp = 1384*0fca6ea1SDimitry Andric IRB.CreateICmp(Pred, LhsRsrc, RhsRsrc, Cmp.getName() + ".rsrc"); 1385*0fca6ea1SDimitry Andric copyMetadata(RsrcCmp, &Cmp); 1386*0fca6ea1SDimitry Andric Value *OffCmp = IRB.CreateICmp(Pred, LhsOff, RhsOff, Cmp.getName() + ".off"); 1387*0fca6ea1SDimitry Andric copyMetadata(OffCmp, &Cmp); 1388*0fca6ea1SDimitry Andric 1389*0fca6ea1SDimitry Andric Value *Res = nullptr; 1390*0fca6ea1SDimitry Andric if (Pred == ICmpInst::ICMP_EQ) 1391*0fca6ea1SDimitry Andric Res = IRB.CreateAnd(RsrcCmp, OffCmp); 1392*0fca6ea1SDimitry Andric else if (Pred == ICmpInst::ICMP_NE) 1393*0fca6ea1SDimitry Andric Res = IRB.CreateOr(RsrcCmp, OffCmp); 1394*0fca6ea1SDimitry Andric copyMetadata(Res, &Cmp); 1395*0fca6ea1SDimitry Andric Res->takeName(&Cmp); 1396*0fca6ea1SDimitry Andric SplitUsers.insert(&Cmp); 1397*0fca6ea1SDimitry Andric Cmp.replaceAllUsesWith(Res); 1398*0fca6ea1SDimitry Andric return {nullptr, nullptr}; 1399*0fca6ea1SDimitry Andric } 1400*0fca6ea1SDimitry Andric 1401*0fca6ea1SDimitry Andric PtrParts SplitPtrStructs::visitFreezeInst(FreezeInst &I) { 1402*0fca6ea1SDimitry Andric if (!isSplitFatPtr(I.getType())) 1403*0fca6ea1SDimitry Andric return {nullptr, nullptr}; 1404*0fca6ea1SDimitry Andric IRB.SetInsertPoint(&I); 1405*0fca6ea1SDimitry Andric auto [Rsrc, Off] = getPtrParts(I.getOperand(0)); 1406*0fca6ea1SDimitry Andric 1407*0fca6ea1SDimitry Andric Value *RsrcRes = IRB.CreateFreeze(Rsrc, I.getName() + ".rsrc"); 1408*0fca6ea1SDimitry Andric copyMetadata(RsrcRes, &I); 1409*0fca6ea1SDimitry Andric Value *OffRes = IRB.CreateFreeze(Off, I.getName() + ".off"); 1410*0fca6ea1SDimitry Andric copyMetadata(OffRes, &I); 1411*0fca6ea1SDimitry Andric SplitUsers.insert(&I); 1412*0fca6ea1SDimitry Andric return {RsrcRes, OffRes}; 1413*0fca6ea1SDimitry Andric } 1414*0fca6ea1SDimitry Andric 1415*0fca6ea1SDimitry Andric PtrParts SplitPtrStructs::visitExtractElementInst(ExtractElementInst &I) { 1416*0fca6ea1SDimitry Andric if (!isSplitFatPtr(I.getType())) 1417*0fca6ea1SDimitry Andric return {nullptr, nullptr}; 1418*0fca6ea1SDimitry Andric IRB.SetInsertPoint(&I); 1419*0fca6ea1SDimitry Andric Value *Vec = I.getVectorOperand(); 1420*0fca6ea1SDimitry Andric Value *Idx = I.getIndexOperand(); 1421*0fca6ea1SDimitry Andric auto [Rsrc, Off] = getPtrParts(Vec); 1422*0fca6ea1SDimitry Andric 1423*0fca6ea1SDimitry Andric Value *RsrcRes = IRB.CreateExtractElement(Rsrc, Idx, I.getName() + ".rsrc"); 1424*0fca6ea1SDimitry Andric copyMetadata(RsrcRes, &I); 1425*0fca6ea1SDimitry Andric Value *OffRes = IRB.CreateExtractElement(Off, Idx, I.getName() + ".off"); 1426*0fca6ea1SDimitry Andric copyMetadata(OffRes, &I); 1427*0fca6ea1SDimitry Andric SplitUsers.insert(&I); 1428*0fca6ea1SDimitry Andric return {RsrcRes, OffRes}; 1429*0fca6ea1SDimitry Andric } 1430*0fca6ea1SDimitry Andric 1431*0fca6ea1SDimitry Andric PtrParts SplitPtrStructs::visitInsertElementInst(InsertElementInst &I) { 1432*0fca6ea1SDimitry Andric // The mutated instructions temporarily don't return vectors, and so 1433*0fca6ea1SDimitry Andric // we need the generic getType() here to avoid crashes. 1434*0fca6ea1SDimitry Andric if (!isSplitFatPtr(cast<Instruction>(I).getType())) 1435*0fca6ea1SDimitry Andric return {nullptr, nullptr}; 1436*0fca6ea1SDimitry Andric IRB.SetInsertPoint(&I); 1437*0fca6ea1SDimitry Andric Value *Vec = I.getOperand(0); 1438*0fca6ea1SDimitry Andric Value *Elem = I.getOperand(1); 1439*0fca6ea1SDimitry Andric Value *Idx = I.getOperand(2); 1440*0fca6ea1SDimitry Andric auto [VecRsrc, VecOff] = getPtrParts(Vec); 1441*0fca6ea1SDimitry Andric auto [ElemRsrc, ElemOff] = getPtrParts(Elem); 1442*0fca6ea1SDimitry Andric 1443*0fca6ea1SDimitry Andric Value *RsrcRes = 1444*0fca6ea1SDimitry Andric IRB.CreateInsertElement(VecRsrc, ElemRsrc, Idx, I.getName() + ".rsrc"); 1445*0fca6ea1SDimitry Andric copyMetadata(RsrcRes, &I); 1446*0fca6ea1SDimitry Andric Value *OffRes = 1447*0fca6ea1SDimitry Andric IRB.CreateInsertElement(VecOff, ElemOff, Idx, I.getName() + ".off"); 1448*0fca6ea1SDimitry Andric copyMetadata(OffRes, &I); 1449*0fca6ea1SDimitry Andric SplitUsers.insert(&I); 1450*0fca6ea1SDimitry Andric return {RsrcRes, OffRes}; 1451*0fca6ea1SDimitry Andric } 1452*0fca6ea1SDimitry Andric 1453*0fca6ea1SDimitry Andric PtrParts SplitPtrStructs::visitShuffleVectorInst(ShuffleVectorInst &I) { 1454*0fca6ea1SDimitry Andric // Cast is needed for the same reason as insertelement's. 1455*0fca6ea1SDimitry Andric if (!isSplitFatPtr(cast<Instruction>(I).getType())) 1456*0fca6ea1SDimitry Andric return {nullptr, nullptr}; 1457*0fca6ea1SDimitry Andric IRB.SetInsertPoint(&I); 1458*0fca6ea1SDimitry Andric 1459*0fca6ea1SDimitry Andric Value *V1 = I.getOperand(0); 1460*0fca6ea1SDimitry Andric Value *V2 = I.getOperand(1); 1461*0fca6ea1SDimitry Andric ArrayRef<int> Mask = I.getShuffleMask(); 1462*0fca6ea1SDimitry Andric auto [V1Rsrc, V1Off] = getPtrParts(V1); 1463*0fca6ea1SDimitry Andric auto [V2Rsrc, V2Off] = getPtrParts(V2); 1464*0fca6ea1SDimitry Andric 1465*0fca6ea1SDimitry Andric Value *RsrcRes = 1466*0fca6ea1SDimitry Andric IRB.CreateShuffleVector(V1Rsrc, V2Rsrc, Mask, I.getName() + ".rsrc"); 1467*0fca6ea1SDimitry Andric copyMetadata(RsrcRes, &I); 1468*0fca6ea1SDimitry Andric Value *OffRes = 1469*0fca6ea1SDimitry Andric IRB.CreateShuffleVector(V1Off, V2Off, Mask, I.getName() + ".off"); 1470*0fca6ea1SDimitry Andric copyMetadata(OffRes, &I); 1471*0fca6ea1SDimitry Andric SplitUsers.insert(&I); 1472*0fca6ea1SDimitry Andric return {RsrcRes, OffRes}; 1473*0fca6ea1SDimitry Andric } 1474*0fca6ea1SDimitry Andric 1475*0fca6ea1SDimitry Andric PtrParts SplitPtrStructs::visitPHINode(PHINode &PHI) { 1476*0fca6ea1SDimitry Andric if (!isSplitFatPtr(PHI.getType())) 1477*0fca6ea1SDimitry Andric return {nullptr, nullptr}; 1478*0fca6ea1SDimitry Andric IRB.SetInsertPoint(*PHI.getInsertionPointAfterDef()); 1479*0fca6ea1SDimitry Andric // Phi nodes will be handled in post-processing after we've visited every 1480*0fca6ea1SDimitry Andric // instruction. However, instead of just returning {nullptr, nullptr}, 1481*0fca6ea1SDimitry Andric // we explicitly create the temporary extractvalue operations that are our 1482*0fca6ea1SDimitry Andric // temporary results so that they end up at the beginning of the block with 1483*0fca6ea1SDimitry Andric // the PHIs. 1484*0fca6ea1SDimitry Andric Value *TmpRsrc = IRB.CreateExtractValue(&PHI, 0, PHI.getName() + ".rsrc"); 1485*0fca6ea1SDimitry Andric Value *TmpOff = IRB.CreateExtractValue(&PHI, 1, PHI.getName() + ".off"); 1486*0fca6ea1SDimitry Andric Conditionals.push_back(&PHI); 1487*0fca6ea1SDimitry Andric SplitUsers.insert(&PHI); 1488*0fca6ea1SDimitry Andric return {TmpRsrc, TmpOff}; 1489*0fca6ea1SDimitry Andric } 1490*0fca6ea1SDimitry Andric 1491*0fca6ea1SDimitry Andric PtrParts SplitPtrStructs::visitSelectInst(SelectInst &SI) { 1492*0fca6ea1SDimitry Andric if (!isSplitFatPtr(SI.getType())) 1493*0fca6ea1SDimitry Andric return {nullptr, nullptr}; 1494*0fca6ea1SDimitry Andric IRB.SetInsertPoint(&SI); 1495*0fca6ea1SDimitry Andric 1496*0fca6ea1SDimitry Andric Value *Cond = SI.getCondition(); 1497*0fca6ea1SDimitry Andric Value *True = SI.getTrueValue(); 1498*0fca6ea1SDimitry Andric Value *False = SI.getFalseValue(); 1499*0fca6ea1SDimitry Andric auto [TrueRsrc, TrueOff] = getPtrParts(True); 1500*0fca6ea1SDimitry Andric auto [FalseRsrc, FalseOff] = getPtrParts(False); 1501*0fca6ea1SDimitry Andric 1502*0fca6ea1SDimitry Andric Value *RsrcRes = 1503*0fca6ea1SDimitry Andric IRB.CreateSelect(Cond, TrueRsrc, FalseRsrc, SI.getName() + ".rsrc", &SI); 1504*0fca6ea1SDimitry Andric copyMetadata(RsrcRes, &SI); 1505*0fca6ea1SDimitry Andric Conditionals.push_back(&SI); 1506*0fca6ea1SDimitry Andric Value *OffRes = 1507*0fca6ea1SDimitry Andric IRB.CreateSelect(Cond, TrueOff, FalseOff, SI.getName() + ".off", &SI); 1508*0fca6ea1SDimitry Andric copyMetadata(OffRes, &SI); 1509*0fca6ea1SDimitry Andric SplitUsers.insert(&SI); 1510*0fca6ea1SDimitry Andric return {RsrcRes, OffRes}; 1511*0fca6ea1SDimitry Andric } 1512*0fca6ea1SDimitry Andric 1513*0fca6ea1SDimitry Andric /// Returns true if this intrinsic needs to be removed when it is 1514*0fca6ea1SDimitry Andric /// applied to `ptr addrspace(7)` values. Calls to these intrinsics are 1515*0fca6ea1SDimitry Andric /// rewritten into calls to versions of that intrinsic on the resource 1516*0fca6ea1SDimitry Andric /// descriptor. 1517*0fca6ea1SDimitry Andric static bool isRemovablePointerIntrinsic(Intrinsic::ID IID) { 1518*0fca6ea1SDimitry Andric switch (IID) { 1519*0fca6ea1SDimitry Andric default: 1520*0fca6ea1SDimitry Andric return false; 1521*0fca6ea1SDimitry Andric case Intrinsic::ptrmask: 1522*0fca6ea1SDimitry Andric case Intrinsic::invariant_start: 1523*0fca6ea1SDimitry Andric case Intrinsic::invariant_end: 1524*0fca6ea1SDimitry Andric case Intrinsic::launder_invariant_group: 1525*0fca6ea1SDimitry Andric case Intrinsic::strip_invariant_group: 1526*0fca6ea1SDimitry Andric return true; 1527*0fca6ea1SDimitry Andric } 1528*0fca6ea1SDimitry Andric } 1529*0fca6ea1SDimitry Andric 1530*0fca6ea1SDimitry Andric PtrParts SplitPtrStructs::visitIntrinsicInst(IntrinsicInst &I) { 1531*0fca6ea1SDimitry Andric Intrinsic::ID IID = I.getIntrinsicID(); 1532*0fca6ea1SDimitry Andric switch (IID) { 1533*0fca6ea1SDimitry Andric default: 1534*0fca6ea1SDimitry Andric break; 1535*0fca6ea1SDimitry Andric case Intrinsic::ptrmask: { 1536*0fca6ea1SDimitry Andric Value *Ptr = I.getArgOperand(0); 1537*0fca6ea1SDimitry Andric if (!isSplitFatPtr(Ptr->getType())) 1538*0fca6ea1SDimitry Andric return {nullptr, nullptr}; 1539*0fca6ea1SDimitry Andric Value *Mask = I.getArgOperand(1); 1540*0fca6ea1SDimitry Andric IRB.SetInsertPoint(&I); 1541*0fca6ea1SDimitry Andric auto [Rsrc, Off] = getPtrParts(Ptr); 1542*0fca6ea1SDimitry Andric if (Mask->getType() != Off->getType()) 1543*0fca6ea1SDimitry Andric report_fatal_error("offset width is not equal to index width of fat " 1544*0fca6ea1SDimitry Andric "pointer (data layout not set up correctly?)"); 1545*0fca6ea1SDimitry Andric Value *OffRes = IRB.CreateAnd(Off, Mask, I.getName() + ".off"); 1546*0fca6ea1SDimitry Andric copyMetadata(OffRes, &I); 1547*0fca6ea1SDimitry Andric SplitUsers.insert(&I); 1548*0fca6ea1SDimitry Andric return {Rsrc, OffRes}; 1549*0fca6ea1SDimitry Andric } 1550*0fca6ea1SDimitry Andric // Pointer annotation intrinsics that, given their object-wide nature 1551*0fca6ea1SDimitry Andric // operate on the resource part. 1552*0fca6ea1SDimitry Andric case Intrinsic::invariant_start: { 1553*0fca6ea1SDimitry Andric Value *Ptr = I.getArgOperand(1); 1554*0fca6ea1SDimitry Andric if (!isSplitFatPtr(Ptr->getType())) 1555*0fca6ea1SDimitry Andric return {nullptr, nullptr}; 1556*0fca6ea1SDimitry Andric IRB.SetInsertPoint(&I); 1557*0fca6ea1SDimitry Andric auto [Rsrc, Off] = getPtrParts(Ptr); 1558*0fca6ea1SDimitry Andric Type *NewTy = PointerType::get(I.getContext(), AMDGPUAS::BUFFER_RESOURCE); 1559*0fca6ea1SDimitry Andric auto *NewRsrc = IRB.CreateIntrinsic(IID, {NewTy}, {I.getOperand(0), Rsrc}); 1560*0fca6ea1SDimitry Andric copyMetadata(NewRsrc, &I); 1561*0fca6ea1SDimitry Andric NewRsrc->takeName(&I); 1562*0fca6ea1SDimitry Andric SplitUsers.insert(&I); 1563*0fca6ea1SDimitry Andric I.replaceAllUsesWith(NewRsrc); 1564*0fca6ea1SDimitry Andric return {nullptr, nullptr}; 1565*0fca6ea1SDimitry Andric } 1566*0fca6ea1SDimitry Andric case Intrinsic::invariant_end: { 1567*0fca6ea1SDimitry Andric Value *RealPtr = I.getArgOperand(2); 1568*0fca6ea1SDimitry Andric if (!isSplitFatPtr(RealPtr->getType())) 1569*0fca6ea1SDimitry Andric return {nullptr, nullptr}; 1570*0fca6ea1SDimitry Andric IRB.SetInsertPoint(&I); 1571*0fca6ea1SDimitry Andric Value *RealRsrc = getPtrParts(RealPtr).first; 1572*0fca6ea1SDimitry Andric Value *InvPtr = I.getArgOperand(0); 1573*0fca6ea1SDimitry Andric Value *Size = I.getArgOperand(1); 1574*0fca6ea1SDimitry Andric Value *NewRsrc = IRB.CreateIntrinsic(IID, {RealRsrc->getType()}, 1575*0fca6ea1SDimitry Andric {InvPtr, Size, RealRsrc}); 1576*0fca6ea1SDimitry Andric copyMetadata(NewRsrc, &I); 1577*0fca6ea1SDimitry Andric NewRsrc->takeName(&I); 1578*0fca6ea1SDimitry Andric SplitUsers.insert(&I); 1579*0fca6ea1SDimitry Andric I.replaceAllUsesWith(NewRsrc); 1580*0fca6ea1SDimitry Andric return {nullptr, nullptr}; 1581*0fca6ea1SDimitry Andric } 1582*0fca6ea1SDimitry Andric case Intrinsic::launder_invariant_group: 1583*0fca6ea1SDimitry Andric case Intrinsic::strip_invariant_group: { 1584*0fca6ea1SDimitry Andric Value *Ptr = I.getArgOperand(0); 1585*0fca6ea1SDimitry Andric if (!isSplitFatPtr(Ptr->getType())) 1586*0fca6ea1SDimitry Andric return {nullptr, nullptr}; 1587*0fca6ea1SDimitry Andric IRB.SetInsertPoint(&I); 1588*0fca6ea1SDimitry Andric auto [Rsrc, Off] = getPtrParts(Ptr); 1589*0fca6ea1SDimitry Andric Value *NewRsrc = IRB.CreateIntrinsic(IID, {Rsrc->getType()}, {Rsrc}); 1590*0fca6ea1SDimitry Andric copyMetadata(NewRsrc, &I); 1591*0fca6ea1SDimitry Andric NewRsrc->takeName(&I); 1592*0fca6ea1SDimitry Andric SplitUsers.insert(&I); 1593*0fca6ea1SDimitry Andric return {NewRsrc, Off}; 1594*0fca6ea1SDimitry Andric } 1595*0fca6ea1SDimitry Andric } 1596*0fca6ea1SDimitry Andric return {nullptr, nullptr}; 1597*0fca6ea1SDimitry Andric } 1598*0fca6ea1SDimitry Andric 1599*0fca6ea1SDimitry Andric void SplitPtrStructs::processFunction(Function &F) { 1600*0fca6ea1SDimitry Andric ST = &TM->getSubtarget<GCNSubtarget>(F); 1601*0fca6ea1SDimitry Andric SmallVector<Instruction *, 0> Originals; 1602*0fca6ea1SDimitry Andric LLVM_DEBUG(dbgs() << "Splitting pointer structs in function: " << F.getName() 1603*0fca6ea1SDimitry Andric << "\n"); 1604*0fca6ea1SDimitry Andric for (Instruction &I : instructions(F)) 1605*0fca6ea1SDimitry Andric Originals.push_back(&I); 1606*0fca6ea1SDimitry Andric for (Instruction *I : Originals) { 1607*0fca6ea1SDimitry Andric auto [Rsrc, Off] = visit(I); 1608*0fca6ea1SDimitry Andric assert(((Rsrc && Off) || (!Rsrc && !Off)) && 1609*0fca6ea1SDimitry Andric "Can't have a resource but no offset"); 1610*0fca6ea1SDimitry Andric if (Rsrc) 1611*0fca6ea1SDimitry Andric RsrcParts[I] = Rsrc; 1612*0fca6ea1SDimitry Andric if (Off) 1613*0fca6ea1SDimitry Andric OffParts[I] = Off; 1614*0fca6ea1SDimitry Andric } 1615*0fca6ea1SDimitry Andric processConditionals(); 1616*0fca6ea1SDimitry Andric killAndReplaceSplitInstructions(Originals); 1617*0fca6ea1SDimitry Andric 1618*0fca6ea1SDimitry Andric // Clean up after ourselves to save on memory. 1619*0fca6ea1SDimitry Andric RsrcParts.clear(); 1620*0fca6ea1SDimitry Andric OffParts.clear(); 1621*0fca6ea1SDimitry Andric SplitUsers.clear(); 1622*0fca6ea1SDimitry Andric Conditionals.clear(); 1623*0fca6ea1SDimitry Andric ConditionalTemps.clear(); 1624*0fca6ea1SDimitry Andric } 1625*0fca6ea1SDimitry Andric 1626*0fca6ea1SDimitry Andric namespace { 1627*0fca6ea1SDimitry Andric class AMDGPULowerBufferFatPointers : public ModulePass { 1628*0fca6ea1SDimitry Andric public: 1629*0fca6ea1SDimitry Andric static char ID; 1630*0fca6ea1SDimitry Andric 1631*0fca6ea1SDimitry Andric AMDGPULowerBufferFatPointers() : ModulePass(ID) { 1632*0fca6ea1SDimitry Andric initializeAMDGPULowerBufferFatPointersPass( 1633*0fca6ea1SDimitry Andric *PassRegistry::getPassRegistry()); 1634*0fca6ea1SDimitry Andric } 1635*0fca6ea1SDimitry Andric 1636*0fca6ea1SDimitry Andric bool run(Module &M, const TargetMachine &TM); 1637*0fca6ea1SDimitry Andric bool runOnModule(Module &M) override; 1638*0fca6ea1SDimitry Andric 1639*0fca6ea1SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override; 1640*0fca6ea1SDimitry Andric }; 1641*0fca6ea1SDimitry Andric } // namespace 1642*0fca6ea1SDimitry Andric 1643*0fca6ea1SDimitry Andric /// Returns true if there are values that have a buffer fat pointer in them, 1644*0fca6ea1SDimitry Andric /// which means we'll need to perform rewrites on this function. As a side 1645*0fca6ea1SDimitry Andric /// effect, this will populate the type remapping cache. 1646*0fca6ea1SDimitry Andric static bool containsBufferFatPointers(const Function &F, 1647*0fca6ea1SDimitry Andric BufferFatPtrToStructTypeMap *TypeMap) { 1648*0fca6ea1SDimitry Andric bool HasFatPointers = false; 1649*0fca6ea1SDimitry Andric for (const BasicBlock &BB : F) 1650*0fca6ea1SDimitry Andric for (const Instruction &I : BB) 1651*0fca6ea1SDimitry Andric HasFatPointers |= (I.getType() != TypeMap->remapType(I.getType())); 1652*0fca6ea1SDimitry Andric return HasFatPointers; 1653*0fca6ea1SDimitry Andric } 1654*0fca6ea1SDimitry Andric 1655*0fca6ea1SDimitry Andric static bool hasFatPointerInterface(const Function &F, 1656*0fca6ea1SDimitry Andric BufferFatPtrToStructTypeMap *TypeMap) { 1657*0fca6ea1SDimitry Andric Type *Ty = F.getFunctionType(); 1658*0fca6ea1SDimitry Andric return Ty != TypeMap->remapType(Ty); 1659*0fca6ea1SDimitry Andric } 1660*0fca6ea1SDimitry Andric 1661*0fca6ea1SDimitry Andric /// Move the body of `OldF` into a new function, returning it. 1662*0fca6ea1SDimitry Andric static Function *moveFunctionAdaptingType(Function *OldF, FunctionType *NewTy, 1663*0fca6ea1SDimitry Andric ValueToValueMapTy &CloneMap) { 1664*0fca6ea1SDimitry Andric bool IsIntrinsic = OldF->isIntrinsic(); 1665*0fca6ea1SDimitry Andric Function *NewF = 1666*0fca6ea1SDimitry Andric Function::Create(NewTy, OldF->getLinkage(), OldF->getAddressSpace()); 1667*0fca6ea1SDimitry Andric NewF->IsNewDbgInfoFormat = OldF->IsNewDbgInfoFormat; 1668*0fca6ea1SDimitry Andric NewF->copyAttributesFrom(OldF); 1669*0fca6ea1SDimitry Andric NewF->copyMetadata(OldF, 0); 1670*0fca6ea1SDimitry Andric NewF->takeName(OldF); 1671*0fca6ea1SDimitry Andric NewF->updateAfterNameChange(); 1672*0fca6ea1SDimitry Andric NewF->setDLLStorageClass(OldF->getDLLStorageClass()); 1673*0fca6ea1SDimitry Andric OldF->getParent()->getFunctionList().insertAfter(OldF->getIterator(), NewF); 1674*0fca6ea1SDimitry Andric 1675*0fca6ea1SDimitry Andric while (!OldF->empty()) { 1676*0fca6ea1SDimitry Andric BasicBlock *BB = &OldF->front(); 1677*0fca6ea1SDimitry Andric BB->removeFromParent(); 1678*0fca6ea1SDimitry Andric BB->insertInto(NewF); 1679*0fca6ea1SDimitry Andric CloneMap[BB] = BB; 1680*0fca6ea1SDimitry Andric for (Instruction &I : *BB) { 1681*0fca6ea1SDimitry Andric CloneMap[&I] = &I; 1682*0fca6ea1SDimitry Andric } 1683*0fca6ea1SDimitry Andric } 1684*0fca6ea1SDimitry Andric 1685*0fca6ea1SDimitry Andric AttributeMask PtrOnlyAttrs; 1686*0fca6ea1SDimitry Andric for (auto K : 1687*0fca6ea1SDimitry Andric {Attribute::Dereferenceable, Attribute::DereferenceableOrNull, 1688*0fca6ea1SDimitry Andric Attribute::NoAlias, Attribute::NoCapture, Attribute::NoFree, 1689*0fca6ea1SDimitry Andric Attribute::NonNull, Attribute::NullPointerIsValid, Attribute::ReadNone, 1690*0fca6ea1SDimitry Andric Attribute::ReadOnly, Attribute::WriteOnly}) { 1691*0fca6ea1SDimitry Andric PtrOnlyAttrs.addAttribute(K); 1692*0fca6ea1SDimitry Andric } 1693*0fca6ea1SDimitry Andric SmallVector<AttributeSet> ArgAttrs; 1694*0fca6ea1SDimitry Andric AttributeList OldAttrs = OldF->getAttributes(); 1695*0fca6ea1SDimitry Andric 1696*0fca6ea1SDimitry Andric for (auto [I, OldArg, NewArg] : enumerate(OldF->args(), NewF->args())) { 1697*0fca6ea1SDimitry Andric CloneMap[&NewArg] = &OldArg; 1698*0fca6ea1SDimitry Andric NewArg.takeName(&OldArg); 1699*0fca6ea1SDimitry Andric Type *OldArgTy = OldArg.getType(), *NewArgTy = NewArg.getType(); 1700*0fca6ea1SDimitry Andric // Temporarily mutate type of `NewArg` to allow RAUW to work. 1701*0fca6ea1SDimitry Andric NewArg.mutateType(OldArgTy); 1702*0fca6ea1SDimitry Andric OldArg.replaceAllUsesWith(&NewArg); 1703*0fca6ea1SDimitry Andric NewArg.mutateType(NewArgTy); 1704*0fca6ea1SDimitry Andric 1705*0fca6ea1SDimitry Andric AttributeSet ArgAttr = OldAttrs.getParamAttrs(I); 1706*0fca6ea1SDimitry Andric // Intrinsics get their attributes fixed later. 1707*0fca6ea1SDimitry Andric if (OldArgTy != NewArgTy && !IsIntrinsic) 1708*0fca6ea1SDimitry Andric ArgAttr = ArgAttr.removeAttributes(NewF->getContext(), PtrOnlyAttrs); 1709*0fca6ea1SDimitry Andric ArgAttrs.push_back(ArgAttr); 1710*0fca6ea1SDimitry Andric } 1711*0fca6ea1SDimitry Andric AttributeSet RetAttrs = OldAttrs.getRetAttrs(); 1712*0fca6ea1SDimitry Andric if (OldF->getReturnType() != NewF->getReturnType() && !IsIntrinsic) 1713*0fca6ea1SDimitry Andric RetAttrs = RetAttrs.removeAttributes(NewF->getContext(), PtrOnlyAttrs); 1714*0fca6ea1SDimitry Andric NewF->setAttributes(AttributeList::get( 1715*0fca6ea1SDimitry Andric NewF->getContext(), OldAttrs.getFnAttrs(), RetAttrs, ArgAttrs)); 1716*0fca6ea1SDimitry Andric return NewF; 1717*0fca6ea1SDimitry Andric } 1718*0fca6ea1SDimitry Andric 1719*0fca6ea1SDimitry Andric static void makeCloneInPraceMap(Function *F, ValueToValueMapTy &CloneMap) { 1720*0fca6ea1SDimitry Andric for (Argument &A : F->args()) 1721*0fca6ea1SDimitry Andric CloneMap[&A] = &A; 1722*0fca6ea1SDimitry Andric for (BasicBlock &BB : *F) { 1723*0fca6ea1SDimitry Andric CloneMap[&BB] = &BB; 1724*0fca6ea1SDimitry Andric for (Instruction &I : BB) 1725*0fca6ea1SDimitry Andric CloneMap[&I] = &I; 1726*0fca6ea1SDimitry Andric } 1727*0fca6ea1SDimitry Andric } 1728*0fca6ea1SDimitry Andric 1729*0fca6ea1SDimitry Andric bool AMDGPULowerBufferFatPointers::run(Module &M, const TargetMachine &TM) { 1730*0fca6ea1SDimitry Andric bool Changed = false; 1731*0fca6ea1SDimitry Andric const DataLayout &DL = M.getDataLayout(); 1732*0fca6ea1SDimitry Andric // Record the functions which need to be remapped. 1733*0fca6ea1SDimitry Andric // The second element of the pair indicates whether the function has to have 1734*0fca6ea1SDimitry Andric // its arguments or return types adjusted. 1735*0fca6ea1SDimitry Andric SmallVector<std::pair<Function *, bool>> NeedsRemap; 1736*0fca6ea1SDimitry Andric 1737*0fca6ea1SDimitry Andric BufferFatPtrToStructTypeMap StructTM(DL); 1738*0fca6ea1SDimitry Andric BufferFatPtrToIntTypeMap IntTM(DL); 1739*0fca6ea1SDimitry Andric for (const GlobalVariable &GV : M.globals()) { 1740*0fca6ea1SDimitry Andric if (GV.getAddressSpace() == AMDGPUAS::BUFFER_FAT_POINTER) 1741*0fca6ea1SDimitry Andric report_fatal_error("Global variables with a buffer fat pointer address " 1742*0fca6ea1SDimitry Andric "space (7) are not supported"); 1743*0fca6ea1SDimitry Andric Type *VT = GV.getValueType(); 1744*0fca6ea1SDimitry Andric if (VT != StructTM.remapType(VT)) 1745*0fca6ea1SDimitry Andric report_fatal_error("Global variables that contain buffer fat pointers " 1746*0fca6ea1SDimitry Andric "(address space 7 pointers) are unsupported. Use " 1747*0fca6ea1SDimitry Andric "buffer resource pointers (address space 8) instead."); 1748*0fca6ea1SDimitry Andric } 1749*0fca6ea1SDimitry Andric 1750*0fca6ea1SDimitry Andric { 1751*0fca6ea1SDimitry Andric // Collect all constant exprs and aggregates referenced by any function. 1752*0fca6ea1SDimitry Andric SmallVector<Constant *, 8> Worklist; 1753*0fca6ea1SDimitry Andric for (Function &F : M.functions()) 1754*0fca6ea1SDimitry Andric for (Instruction &I : instructions(F)) 1755*0fca6ea1SDimitry Andric for (Value *Op : I.operands()) 1756*0fca6ea1SDimitry Andric if (isa<ConstantExpr>(Op) || isa<ConstantAggregate>(Op)) 1757*0fca6ea1SDimitry Andric Worklist.push_back(cast<Constant>(Op)); 1758*0fca6ea1SDimitry Andric 1759*0fca6ea1SDimitry Andric // Recursively look for any referenced buffer pointer constants. 1760*0fca6ea1SDimitry Andric SmallPtrSet<Constant *, 8> Visited; 1761*0fca6ea1SDimitry Andric SetVector<Constant *> BufferFatPtrConsts; 1762*0fca6ea1SDimitry Andric while (!Worklist.empty()) { 1763*0fca6ea1SDimitry Andric Constant *C = Worklist.pop_back_val(); 1764*0fca6ea1SDimitry Andric if (!Visited.insert(C).second) 1765*0fca6ea1SDimitry Andric continue; 1766*0fca6ea1SDimitry Andric if (isBufferFatPtrOrVector(C->getType())) 1767*0fca6ea1SDimitry Andric BufferFatPtrConsts.insert(C); 1768*0fca6ea1SDimitry Andric for (Value *Op : C->operands()) 1769*0fca6ea1SDimitry Andric if (isa<ConstantExpr>(Op) || isa<ConstantAggregate>(Op)) 1770*0fca6ea1SDimitry Andric Worklist.push_back(cast<Constant>(Op)); 1771*0fca6ea1SDimitry Andric } 1772*0fca6ea1SDimitry Andric 1773*0fca6ea1SDimitry Andric // Expand all constant expressions using fat buffer pointers to 1774*0fca6ea1SDimitry Andric // instructions. 1775*0fca6ea1SDimitry Andric Changed |= convertUsersOfConstantsToInstructions( 1776*0fca6ea1SDimitry Andric BufferFatPtrConsts.getArrayRef(), /*RestrictToFunc=*/nullptr, 1777*0fca6ea1SDimitry Andric /*RemoveDeadConstants=*/false, /*IncludeSelf=*/true); 1778*0fca6ea1SDimitry Andric } 1779*0fca6ea1SDimitry Andric 1780*0fca6ea1SDimitry Andric StoreFatPtrsAsIntsVisitor MemOpsRewrite(&IntTM, M.getContext()); 1781*0fca6ea1SDimitry Andric for (Function &F : M.functions()) { 1782*0fca6ea1SDimitry Andric bool InterfaceChange = hasFatPointerInterface(F, &StructTM); 1783*0fca6ea1SDimitry Andric bool BodyChanges = containsBufferFatPointers(F, &StructTM); 1784*0fca6ea1SDimitry Andric Changed |= MemOpsRewrite.processFunction(F); 1785*0fca6ea1SDimitry Andric if (InterfaceChange || BodyChanges) 1786*0fca6ea1SDimitry Andric NeedsRemap.push_back(std::make_pair(&F, InterfaceChange)); 1787*0fca6ea1SDimitry Andric } 1788*0fca6ea1SDimitry Andric if (NeedsRemap.empty()) 1789*0fca6ea1SDimitry Andric return Changed; 1790*0fca6ea1SDimitry Andric 1791*0fca6ea1SDimitry Andric SmallVector<Function *> NeedsPostProcess; 1792*0fca6ea1SDimitry Andric SmallVector<Function *> Intrinsics; 1793*0fca6ea1SDimitry Andric // Keep one big map so as to memoize constants across functions. 1794*0fca6ea1SDimitry Andric ValueToValueMapTy CloneMap; 1795*0fca6ea1SDimitry Andric FatPtrConstMaterializer Materializer(&StructTM, CloneMap); 1796*0fca6ea1SDimitry Andric 1797*0fca6ea1SDimitry Andric ValueMapper LowerInFuncs(CloneMap, RF_None, &StructTM, &Materializer); 1798*0fca6ea1SDimitry Andric for (auto [F, InterfaceChange] : NeedsRemap) { 1799*0fca6ea1SDimitry Andric Function *NewF = F; 1800*0fca6ea1SDimitry Andric if (InterfaceChange) 1801*0fca6ea1SDimitry Andric NewF = moveFunctionAdaptingType( 1802*0fca6ea1SDimitry Andric F, cast<FunctionType>(StructTM.remapType(F->getFunctionType())), 1803*0fca6ea1SDimitry Andric CloneMap); 1804*0fca6ea1SDimitry Andric else 1805*0fca6ea1SDimitry Andric makeCloneInPraceMap(F, CloneMap); 1806*0fca6ea1SDimitry Andric LowerInFuncs.remapFunction(*NewF); 1807*0fca6ea1SDimitry Andric if (NewF->isIntrinsic()) 1808*0fca6ea1SDimitry Andric Intrinsics.push_back(NewF); 1809*0fca6ea1SDimitry Andric else 1810*0fca6ea1SDimitry Andric NeedsPostProcess.push_back(NewF); 1811*0fca6ea1SDimitry Andric if (InterfaceChange) { 1812*0fca6ea1SDimitry Andric F->replaceAllUsesWith(NewF); 1813*0fca6ea1SDimitry Andric F->eraseFromParent(); 1814*0fca6ea1SDimitry Andric } 1815*0fca6ea1SDimitry Andric Changed = true; 1816*0fca6ea1SDimitry Andric } 1817*0fca6ea1SDimitry Andric StructTM.clear(); 1818*0fca6ea1SDimitry Andric IntTM.clear(); 1819*0fca6ea1SDimitry Andric CloneMap.clear(); 1820*0fca6ea1SDimitry Andric 1821*0fca6ea1SDimitry Andric SplitPtrStructs Splitter(M.getContext(), &TM); 1822*0fca6ea1SDimitry Andric for (Function *F : NeedsPostProcess) 1823*0fca6ea1SDimitry Andric Splitter.processFunction(*F); 1824*0fca6ea1SDimitry Andric for (Function *F : Intrinsics) { 1825*0fca6ea1SDimitry Andric if (isRemovablePointerIntrinsic(F->getIntrinsicID())) { 1826*0fca6ea1SDimitry Andric F->eraseFromParent(); 1827*0fca6ea1SDimitry Andric } else { 1828*0fca6ea1SDimitry Andric std::optional<Function *> NewF = Intrinsic::remangleIntrinsicFunction(F); 1829*0fca6ea1SDimitry Andric if (NewF) 1830*0fca6ea1SDimitry Andric F->replaceAllUsesWith(*NewF); 1831*0fca6ea1SDimitry Andric } 1832*0fca6ea1SDimitry Andric } 1833*0fca6ea1SDimitry Andric return Changed; 1834*0fca6ea1SDimitry Andric } 1835*0fca6ea1SDimitry Andric 1836*0fca6ea1SDimitry Andric bool AMDGPULowerBufferFatPointers::runOnModule(Module &M) { 1837*0fca6ea1SDimitry Andric TargetPassConfig &TPC = getAnalysis<TargetPassConfig>(); 1838*0fca6ea1SDimitry Andric const TargetMachine &TM = TPC.getTM<TargetMachine>(); 1839*0fca6ea1SDimitry Andric return run(M, TM); 1840*0fca6ea1SDimitry Andric } 1841*0fca6ea1SDimitry Andric 1842*0fca6ea1SDimitry Andric char AMDGPULowerBufferFatPointers::ID = 0; 1843*0fca6ea1SDimitry Andric 1844*0fca6ea1SDimitry Andric char &llvm::AMDGPULowerBufferFatPointersID = AMDGPULowerBufferFatPointers::ID; 1845*0fca6ea1SDimitry Andric 1846*0fca6ea1SDimitry Andric void AMDGPULowerBufferFatPointers::getAnalysisUsage(AnalysisUsage &AU) const { 1847*0fca6ea1SDimitry Andric AU.addRequired<TargetPassConfig>(); 1848*0fca6ea1SDimitry Andric } 1849*0fca6ea1SDimitry Andric 1850*0fca6ea1SDimitry Andric #define PASS_DESC "Lower buffer fat pointer operations to buffer resources" 1851*0fca6ea1SDimitry Andric INITIALIZE_PASS_BEGIN(AMDGPULowerBufferFatPointers, DEBUG_TYPE, PASS_DESC, 1852*0fca6ea1SDimitry Andric false, false) 1853*0fca6ea1SDimitry Andric INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) 1854*0fca6ea1SDimitry Andric INITIALIZE_PASS_END(AMDGPULowerBufferFatPointers, DEBUG_TYPE, PASS_DESC, false, 1855*0fca6ea1SDimitry Andric false) 1856*0fca6ea1SDimitry Andric #undef PASS_DESC 1857*0fca6ea1SDimitry Andric 1858*0fca6ea1SDimitry Andric ModulePass *llvm::createAMDGPULowerBufferFatPointersPass() { 1859*0fca6ea1SDimitry Andric return new AMDGPULowerBufferFatPointers(); 1860*0fca6ea1SDimitry Andric } 1861*0fca6ea1SDimitry Andric 1862*0fca6ea1SDimitry Andric PreservedAnalyses 1863*0fca6ea1SDimitry Andric AMDGPULowerBufferFatPointersPass::run(Module &M, ModuleAnalysisManager &MA) { 1864*0fca6ea1SDimitry Andric return AMDGPULowerBufferFatPointers().run(M, TM) ? PreservedAnalyses::none() 1865*0fca6ea1SDimitry Andric : PreservedAnalyses::all(); 1866*0fca6ea1SDimitry Andric } 1867