1 //===-- AMDGPULowerBufferFatPointers.cpp ---------------------------=// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This pass lowers operations on buffer fat pointers (addrspace 7) to 10 // operations on buffer resources (addrspace 8) and is needed for correct 11 // codegen. 12 // 13 // # Background 14 // 15 // Address space 7 (the buffer fat pointer) is a 160-bit pointer that consists 16 // of a 128-bit buffer descriptor and a 32-bit offset into that descriptor. 17 // The buffer resource part needs to be it needs to be a "raw" buffer resource 18 // (it must have a stride of 0 and bounds checks must be in raw buffer mode 19 // or disabled). 20 // 21 // When these requirements are met, a buffer resource can be treated as a 22 // typical (though quite wide) pointer that follows typical LLVM pointer 23 // semantics. This allows the frontend to reason about such buffers (which are 24 // often encountered in the context of SPIR-V kernels). 25 // 26 // However, because of their non-power-of-2 size, these fat pointers cannot be 27 // present during translation to MIR (though this restriction may be lifted 28 // during the transition to GlobalISel). Therefore, this pass is needed in order 29 // to correctly implement these fat pointers. 30 // 31 // The resource intrinsics take the resource part (the address space 8 pointer) 32 // and the offset part (the 32-bit integer) as separate arguments. In addition, 33 // many users of these buffers manipulate the offset while leaving the resource 34 // part alone. For these reasons, we want to typically separate the resource 35 // and offset parts into separate variables, but combine them together when 36 // encountering cases where this is required, such as by inserting these values 37 // into aggretates or moving them to memory. 38 // 39 // Therefore, at a high level, `ptr addrspace(7) %x` becomes `ptr addrspace(8) 40 // %x.rsrc` and `i32 %x.off`, which will be combined into `{ptr addrspace(8), 41 // i32} %x = {%x.rsrc, %x.off}` if needed. Similarly, `vector<Nxp7>` becomes 42 // `{vector<Nxp8>, vector<Nxi32 >}` and its component parts. 43 // 44 // # Implementation 45 // 46 // This pass proceeds in three main phases: 47 // 48 // ## Rewriting loads and stores of p7 49 // 50 // The first phase is to rewrite away all loads and stors of `ptr addrspace(7)`, 51 // including aggregates containing such pointers, to ones that use `i160`. This 52 // is handled by `StoreFatPtrsAsIntsVisitor` , which visits loads, stores, and 53 // allocas and, if the loaded or stored type contains `ptr addrspace(7)`, 54 // rewrites that type to one where the p7s are replaced by i160s, copying other 55 // parts of aggregates as needed. In the case of a store, each pointer is 56 // `ptrtoint`d to i160 before storing, and load integers are `inttoptr`d back. 57 // This same transformation is applied to vectors of pointers. 58 // 59 // Such a transformation allows the later phases of the pass to not need 60 // to handle buffer fat pointers moving to and from memory, where we load 61 // have to handle the incompatibility between a `{Nxp8, Nxi32}` representation 62 // and `Nxi60` directly. Instead, that transposing action (where the vectors 63 // of resources and vectors of offsets are concatentated before being stored to 64 // memory) are handled through implementing `inttoptr` and `ptrtoint` only. 65 // 66 // Atomics operations on `ptr addrspace(7)` values are not suppported, as the 67 // hardware does not include a 160-bit atomic. 68 // 69 // ## Type remapping 70 // 71 // We use a `ValueMapper` to mangle uses of [vectors of] buffer fat pointers 72 // to the corresponding struct type, which has a resource part and an offset 73 // part. 74 // 75 // This uses a `BufferFatPtrToStructTypeMap` and a `FatPtrConstMaterializer` 76 // to, usually by way of `setType`ing values. Constants are handled here 77 // because there isn't a good way to fix them up later. 78 // 79 // This has the downside of leaving the IR in an invalid state (for example, 80 // the instruction `getelementptr {ptr addrspace(8), i32} %p, ...` will exist), 81 // but all such invalid states will be resolved by the third phase. 82 // 83 // Functions that don't take buffer fat pointers are modified in place. Those 84 // that do take such pointers have their basic blocks moved to a new function 85 // with arguments that are {ptr addrspace(8), i32} arguments and return values. 86 // This phase also records intrinsics so that they can be remangled or deleted 87 // later. 88 // 89 // 90 // ## Splitting pointer structs 91 // 92 // The meat of this pass consists of defining semantics for operations that 93 // produce or consume [vectors of] buffer fat pointers in terms of their 94 // resource and offset parts. This is accomplished throgh the `SplitPtrStructs` 95 // visitor. 96 // 97 // In the first pass through each function that is being lowered, the splitter 98 // inserts new instructions to implement the split-structures behavior, which is 99 // needed for correctness and performance. It records a list of "split users", 100 // instructions that are being replaced by operations on the resource and offset 101 // parts. 102 // 103 // Split users do not necessarily need to produce parts themselves ( 104 // a `load float, ptr addrspace(7)` does not, for example), but, if they do not 105 // generate fat buffer pointers, they must RAUW in their replacement 106 // instructions during the initial visit. 107 // 108 // When these new instructions are created, they use the split parts recorded 109 // for their initial arguments in order to generate their replacements, creating 110 // a parallel set of instructions that does not refer to the original fat 111 // pointer values but instead to their resource and offset components. 112 // 113 // Instructions, such as `extractvalue`, that produce buffer fat pointers from 114 // sources that do not have split parts, have such parts generated using 115 // `extractvalue`. This is also the initial handling of PHI nodes, which 116 // are then cleaned up. 117 // 118 // ### Conditionals 119 // 120 // PHI nodes are initially given resource parts via `extractvalue`. However, 121 // this is not an efficient rewrite of such nodes, as, in most cases, the 122 // resource part in a conditional or loop remains constant throughout the loop 123 // and only the offset varies. Failing to optimize away these constant resources 124 // would cause additional registers to be sent around loops and might lead to 125 // waterfall loops being generated for buffer operations due to the 126 // "non-uniform" resource argument. 127 // 128 // Therefore, after all instructions have been visited, the pointer splitter 129 // post-processes all encountered conditionals. Given a PHI node or select, 130 // getPossibleRsrcRoots() collects all values that the resource parts of that 131 // conditional's input could come from as well as collecting all conditional 132 // instructions encountered during the search. If, after filtering out the 133 // initial node itself, the set of encountered conditionals is a subset of the 134 // potential roots and there is a single potential resource that isn't in the 135 // conditional set, that value is the only possible value the resource argument 136 // could have throughout the control flow. 137 // 138 // If that condition is met, then a PHI node can have its resource part changed 139 // to the singleton value and then be replaced by a PHI on the offsets. 140 // Otherwise, each PHI node is split into two, one for the resource part and one 141 // for the offset part, which replace the temporary `extractvalue` instructions 142 // that were added during the first pass. 143 // 144 // Similar logic applies to `select`, where 145 // `%z = select i1 %cond, %cond, ptr addrspace(7) %x, ptr addrspace(7) %y` 146 // can be split into `%z.rsrc = %x.rsrc` and 147 // `%z.off = select i1 %cond, ptr i32 %x.off, i32 %y.off` 148 // if both `%x` and `%y` have the same resource part, but two `select` 149 // operations will be needed if they do not. 150 // 151 // ### Final processing 152 // 153 // After conditionals have been cleaned up, the IR for each function is 154 // rewritten to remove all the old instructions that have been split up. 155 // 156 // Any instruction that used to produce a buffer fat pointer (and therefore now 157 // produces a resource-and-offset struct after type remapping) is 158 // replaced as follows: 159 // 1. All debug value annotations are cloned to reflect that the resource part 160 // and offset parts are computed separately and constitute different 161 // fragments of the underlying source language variable. 162 // 2. All uses that were themselves split are replaced by a `poison` of the 163 // struct type, as they will themselves be erased soon. This rule, combined 164 // with debug handling, should leave the use lists of split instructions 165 // empty in almost all cases. 166 // 3. If a user of the original struct-valued result remains, the structure 167 // needed for the new types to work is constructed out of the newly-defined 168 // parts, and the original instruction is replaced by this structure 169 // before being erased. Instructions requiring this construction include 170 // `ret` and `insertvalue`. 171 // 172 // # Consequences 173 // 174 // This pass does not alter the CFG. 175 // 176 // Alias analysis information will become coarser, as the LLVM alias analyzer 177 // cannot handle the buffer intrinsics. Specifically, while we can determine 178 // that the following two loads do not alias: 179 // ``` 180 // %y = getelementptr i32, ptr addrspace(7) %x, i32 1 181 // %a = load i32, ptr addrspace(7) %x 182 // %b = load i32, ptr addrspace(7) %y 183 // ``` 184 // we cannot (except through some code that runs during scheduling) determine 185 // that the rewritten loads below do not alias. 186 // ``` 187 // %y.off = add i32 %x.off, 1 188 // %a = call @llvm.amdgcn.raw.ptr.buffer.load(ptr addrspace(8) %x.rsrc, i32 189 // %x.off, ...) 190 // %b = call @llvm.amdgcn.raw.ptr.buffer.load(ptr addrspace(8) 191 // %x.rsrc, i32 %y.off, ...) 192 // ``` 193 // However, existing alias information is preserved. 194 //===----------------------------------------------------------------------===// 195 196 #include "AMDGPU.h" 197 #include "AMDGPUTargetMachine.h" 198 #include "GCNSubtarget.h" 199 #include "SIDefines.h" 200 #include "llvm/ADT/SetOperations.h" 201 #include "llvm/ADT/SmallVector.h" 202 #include "llvm/Analysis/ConstantFolding.h" 203 #include "llvm/CodeGen/TargetPassConfig.h" 204 #include "llvm/IR/AttributeMask.h" 205 #include "llvm/IR/Constants.h" 206 #include "llvm/IR/DebugInfo.h" 207 #include "llvm/IR/DerivedTypes.h" 208 #include "llvm/IR/IRBuilder.h" 209 #include "llvm/IR/InstIterator.h" 210 #include "llvm/IR/InstVisitor.h" 211 #include "llvm/IR/Instructions.h" 212 #include "llvm/IR/Intrinsics.h" 213 #include "llvm/IR/IntrinsicsAMDGPU.h" 214 #include "llvm/IR/Metadata.h" 215 #include "llvm/IR/Operator.h" 216 #include "llvm/IR/PatternMatch.h" 217 #include "llvm/InitializePasses.h" 218 #include "llvm/Pass.h" 219 #include "llvm/Support/AtomicOrdering.h" 220 #include "llvm/Support/Debug.h" 221 #include "llvm/Support/ErrorHandling.h" 222 #include "llvm/Transforms/Utils/Cloning.h" 223 #include "llvm/Transforms/Utils/Local.h" 224 #include "llvm/Transforms/Utils/ValueMapper.h" 225 226 #define DEBUG_TYPE "amdgpu-lower-buffer-fat-pointers" 227 228 using namespace llvm; 229 230 static constexpr unsigned BufferOffsetWidth = 32; 231 232 namespace { 233 /// Recursively replace instances of ptr addrspace(7) and vector<Nxptr 234 /// addrspace(7)> with some other type as defined by the relevant subclass. 235 class BufferFatPtrTypeLoweringBase : public ValueMapTypeRemapper { 236 DenseMap<Type *, Type *> Map; 237 238 Type *remapTypeImpl(Type *Ty, SmallPtrSetImpl<StructType *> &Seen); 239 240 protected: 241 virtual Type *remapScalar(PointerType *PT) = 0; 242 virtual Type *remapVector(VectorType *VT) = 0; 243 244 const DataLayout &DL; 245 246 public: 247 BufferFatPtrTypeLoweringBase(const DataLayout &DL) : DL(DL) {} 248 Type *remapType(Type *SrcTy) override; 249 void clear() { Map.clear(); } 250 }; 251 252 /// Remap ptr addrspace(7) to i160 and vector<Nxptr addrspace(7)> to 253 /// vector<Nxi60> in order to correctly handling loading/storing these values 254 /// from memory. 255 class BufferFatPtrToIntTypeMap : public BufferFatPtrTypeLoweringBase { 256 using BufferFatPtrTypeLoweringBase::BufferFatPtrTypeLoweringBase; 257 258 protected: 259 Type *remapScalar(PointerType *PT) override { return DL.getIntPtrType(PT); } 260 Type *remapVector(VectorType *VT) override { return DL.getIntPtrType(VT); } 261 }; 262 263 /// Remap ptr addrspace(7) to {ptr addrspace(8), i32} (the resource and offset 264 /// parts of the pointer) so that we can easily rewrite operations on these 265 /// values that aren't loading them from or storing them to memory. 266 class BufferFatPtrToStructTypeMap : public BufferFatPtrTypeLoweringBase { 267 using BufferFatPtrTypeLoweringBase::BufferFatPtrTypeLoweringBase; 268 269 protected: 270 Type *remapScalar(PointerType *PT) override; 271 Type *remapVector(VectorType *VT) override; 272 }; 273 } // namespace 274 275 // This code is adapted from the type remapper in lib/Linker/IRMover.cpp 276 Type *BufferFatPtrTypeLoweringBase::remapTypeImpl( 277 Type *Ty, SmallPtrSetImpl<StructType *> &Seen) { 278 Type **Entry = &Map[Ty]; 279 if (*Entry) 280 return *Entry; 281 if (auto *PT = dyn_cast<PointerType>(Ty)) { 282 if (PT->getAddressSpace() == AMDGPUAS::BUFFER_FAT_POINTER) { 283 return *Entry = remapScalar(PT); 284 } 285 } 286 if (auto *VT = dyn_cast<VectorType>(Ty)) { 287 auto *PT = dyn_cast<PointerType>(VT->getElementType()); 288 if (PT && PT->getAddressSpace() == AMDGPUAS::BUFFER_FAT_POINTER) { 289 return *Entry = remapVector(VT); 290 } 291 return *Entry = Ty; 292 } 293 // Whether the type is one that is structurally uniqued - that is, if it is 294 // not a named struct (the only kind of type where multiple structurally 295 // identical types that have a distinct `Type*`) 296 StructType *TyAsStruct = dyn_cast<StructType>(Ty); 297 bool IsUniqued = !TyAsStruct || TyAsStruct->isLiteral(); 298 // Base case for ints, floats, opaque pointers, and so on, which don't 299 // require recursion. 300 if (Ty->getNumContainedTypes() == 0 && IsUniqued) 301 return *Entry = Ty; 302 if (!IsUniqued) { 303 // Create a dummy type for recursion purposes. 304 if (!Seen.insert(TyAsStruct).second) { 305 StructType *Placeholder = StructType::create(Ty->getContext()); 306 return *Entry = Placeholder; 307 } 308 } 309 bool Changed = false; 310 SmallVector<Type *> ElementTypes(Ty->getNumContainedTypes(), nullptr); 311 for (unsigned int I = 0, E = Ty->getNumContainedTypes(); I < E; ++I) { 312 Type *OldElem = Ty->getContainedType(I); 313 Type *NewElem = remapTypeImpl(OldElem, Seen); 314 ElementTypes[I] = NewElem; 315 Changed |= (OldElem != NewElem); 316 } 317 // Recursive calls to remapTypeImpl() may have invalidated pointer. 318 Entry = &Map[Ty]; 319 if (!Changed) { 320 return *Entry = Ty; 321 } 322 if (auto *ArrTy = dyn_cast<ArrayType>(Ty)) 323 return *Entry = ArrayType::get(ElementTypes[0], ArrTy->getNumElements()); 324 if (auto *FnTy = dyn_cast<FunctionType>(Ty)) 325 return *Entry = FunctionType::get(ElementTypes[0], 326 ArrayRef(ElementTypes).slice(1), 327 FnTy->isVarArg()); 328 if (auto *STy = dyn_cast<StructType>(Ty)) { 329 // Genuine opaque types don't have a remapping. 330 if (STy->isOpaque()) 331 return *Entry = Ty; 332 bool IsPacked = STy->isPacked(); 333 if (IsUniqued) 334 return *Entry = StructType::get(Ty->getContext(), ElementTypes, IsPacked); 335 SmallString<16> Name(STy->getName()); 336 STy->setName(""); 337 Type **RecursionEntry = &Map[Ty]; 338 if (*RecursionEntry) { 339 auto *Placeholder = cast<StructType>(*RecursionEntry); 340 Placeholder->setBody(ElementTypes, IsPacked); 341 Placeholder->setName(Name); 342 return *Entry = Placeholder; 343 } 344 return *Entry = StructType::create(Ty->getContext(), ElementTypes, Name, 345 IsPacked); 346 } 347 llvm_unreachable("Unknown type of type that contains elements"); 348 } 349 350 Type *BufferFatPtrTypeLoweringBase::remapType(Type *SrcTy) { 351 SmallPtrSet<StructType *, 2> Visited; 352 return remapTypeImpl(SrcTy, Visited); 353 } 354 355 Type *BufferFatPtrToStructTypeMap::remapScalar(PointerType *PT) { 356 LLVMContext &Ctx = PT->getContext(); 357 return StructType::get(PointerType::get(Ctx, AMDGPUAS::BUFFER_RESOURCE), 358 IntegerType::get(Ctx, BufferOffsetWidth)); 359 } 360 361 Type *BufferFatPtrToStructTypeMap::remapVector(VectorType *VT) { 362 ElementCount EC = VT->getElementCount(); 363 LLVMContext &Ctx = VT->getContext(); 364 Type *RsrcVec = 365 VectorType::get(PointerType::get(Ctx, AMDGPUAS::BUFFER_RESOURCE), EC); 366 Type *OffVec = VectorType::get(IntegerType::get(Ctx, BufferOffsetWidth), EC); 367 return StructType::get(RsrcVec, OffVec); 368 } 369 370 static bool isBufferFatPtrOrVector(Type *Ty) { 371 if (auto *PT = dyn_cast<PointerType>(Ty->getScalarType())) 372 return PT->getAddressSpace() == AMDGPUAS::BUFFER_FAT_POINTER; 373 return false; 374 } 375 376 // True if the type is {ptr addrspace(8), i32} or a struct containing vectors of 377 // those types. Used to quickly skip instructions we don't need to process. 378 static bool isSplitFatPtr(Type *Ty) { 379 auto *ST = dyn_cast<StructType>(Ty); 380 if (!ST) 381 return false; 382 if (!ST->isLiteral() || ST->getNumElements() != 2) 383 return false; 384 auto *MaybeRsrc = 385 dyn_cast<PointerType>(ST->getElementType(0)->getScalarType()); 386 auto *MaybeOff = 387 dyn_cast<IntegerType>(ST->getElementType(1)->getScalarType()); 388 return MaybeRsrc && MaybeOff && 389 MaybeRsrc->getAddressSpace() == AMDGPUAS::BUFFER_RESOURCE && 390 MaybeOff->getBitWidth() == BufferOffsetWidth; 391 } 392 393 // True if the result type or any argument types are buffer fat pointers. 394 static bool isBufferFatPtrConst(Constant *C) { 395 Type *T = C->getType(); 396 return isBufferFatPtrOrVector(T) || any_of(C->operands(), [](const Use &U) { 397 return isBufferFatPtrOrVector(U.get()->getType()); 398 }); 399 } 400 401 namespace { 402 /// Convert [vectors of] buffer fat pointers to integers when they are read from 403 /// or stored to memory. This ensures that these pointers will have the same 404 /// memory layout as before they are lowered, even though they will no longer 405 /// have their previous layout in registers/in the program (they'll be broken 406 /// down into resource and offset parts). This has the downside of imposing 407 /// marshalling costs when reading or storing these values, but since placing 408 /// such pointers into memory is an uncommon operation at best, we feel that 409 /// this cost is acceptable for better performance in the common case. 410 class StoreFatPtrsAsIntsVisitor 411 : public InstVisitor<StoreFatPtrsAsIntsVisitor, bool> { 412 BufferFatPtrToIntTypeMap *TypeMap; 413 414 ValueToValueMapTy ConvertedForStore; 415 416 IRBuilder<> IRB; 417 418 // Convert all the buffer fat pointers within the input value to inttegers 419 // so that it can be stored in memory. 420 Value *fatPtrsToInts(Value *V, Type *From, Type *To, const Twine &Name); 421 // Convert all the i160s that need to be buffer fat pointers (as specified) 422 // by the To type) into those pointers to preserve the semantics of the rest 423 // of the program. 424 Value *intsToFatPtrs(Value *V, Type *From, Type *To, const Twine &Name); 425 426 public: 427 StoreFatPtrsAsIntsVisitor(BufferFatPtrToIntTypeMap *TypeMap, LLVMContext &Ctx) 428 : TypeMap(TypeMap), IRB(Ctx) {} 429 bool processFunction(Function &F); 430 431 bool visitInstruction(Instruction &I) { return false; } 432 bool visitAllocaInst(AllocaInst &I); 433 bool visitLoadInst(LoadInst &LI); 434 bool visitStoreInst(StoreInst &SI); 435 bool visitGetElementPtrInst(GetElementPtrInst &I); 436 }; 437 } // namespace 438 439 Value *StoreFatPtrsAsIntsVisitor::fatPtrsToInts(Value *V, Type *From, Type *To, 440 const Twine &Name) { 441 if (From == To) 442 return V; 443 ValueToValueMapTy::iterator Find = ConvertedForStore.find(V); 444 if (Find != ConvertedForStore.end()) 445 return Find->second; 446 if (isBufferFatPtrOrVector(From)) { 447 Value *Cast = IRB.CreatePtrToInt(V, To, Name + ".int"); 448 ConvertedForStore[V] = Cast; 449 return Cast; 450 } 451 if (From->getNumContainedTypes() == 0) 452 return V; 453 // Structs, arrays, and other compound types. 454 Value *Ret = PoisonValue::get(To); 455 if (auto *AT = dyn_cast<ArrayType>(From)) { 456 Type *FromPart = AT->getArrayElementType(); 457 Type *ToPart = cast<ArrayType>(To)->getElementType(); 458 for (uint64_t I = 0, E = AT->getArrayNumElements(); I < E; ++I) { 459 Value *Field = IRB.CreateExtractValue(V, I); 460 Value *NewField = 461 fatPtrsToInts(Field, FromPart, ToPart, Name + "." + Twine(I)); 462 Ret = IRB.CreateInsertValue(Ret, NewField, I); 463 } 464 } else { 465 for (auto [Idx, FromPart, ToPart] : 466 enumerate(From->subtypes(), To->subtypes())) { 467 Value *Field = IRB.CreateExtractValue(V, Idx); 468 Value *NewField = 469 fatPtrsToInts(Field, FromPart, ToPart, Name + "." + Twine(Idx)); 470 Ret = IRB.CreateInsertValue(Ret, NewField, Idx); 471 } 472 } 473 ConvertedForStore[V] = Ret; 474 return Ret; 475 } 476 477 Value *StoreFatPtrsAsIntsVisitor::intsToFatPtrs(Value *V, Type *From, Type *To, 478 const Twine &Name) { 479 if (From == To) 480 return V; 481 if (isBufferFatPtrOrVector(To)) { 482 Value *Cast = IRB.CreateIntToPtr(V, To, Name + ".ptr"); 483 return Cast; 484 } 485 if (From->getNumContainedTypes() == 0) 486 return V; 487 // Structs, arrays, and other compound types. 488 Value *Ret = PoisonValue::get(To); 489 if (auto *AT = dyn_cast<ArrayType>(From)) { 490 Type *FromPart = AT->getArrayElementType(); 491 Type *ToPart = cast<ArrayType>(To)->getElementType(); 492 for (uint64_t I = 0, E = AT->getArrayNumElements(); I < E; ++I) { 493 Value *Field = IRB.CreateExtractValue(V, I); 494 Value *NewField = 495 intsToFatPtrs(Field, FromPart, ToPart, Name + "." + Twine(I)); 496 Ret = IRB.CreateInsertValue(Ret, NewField, I); 497 } 498 } else { 499 for (auto [Idx, FromPart, ToPart] : 500 enumerate(From->subtypes(), To->subtypes())) { 501 Value *Field = IRB.CreateExtractValue(V, Idx); 502 Value *NewField = 503 intsToFatPtrs(Field, FromPart, ToPart, Name + "." + Twine(Idx)); 504 Ret = IRB.CreateInsertValue(Ret, NewField, Idx); 505 } 506 } 507 return Ret; 508 } 509 510 bool StoreFatPtrsAsIntsVisitor::processFunction(Function &F) { 511 bool Changed = false; 512 // The visitors will mutate GEPs and allocas, but will push loads and stores 513 // to the worklist to avoid invalidation. 514 for (Instruction &I : make_early_inc_range(instructions(F))) { 515 Changed |= visit(I); 516 } 517 ConvertedForStore.clear(); 518 return Changed; 519 } 520 521 bool StoreFatPtrsAsIntsVisitor::visitAllocaInst(AllocaInst &I) { 522 Type *Ty = I.getAllocatedType(); 523 Type *NewTy = TypeMap->remapType(Ty); 524 if (Ty == NewTy) 525 return false; 526 I.setAllocatedType(NewTy); 527 return true; 528 } 529 530 bool StoreFatPtrsAsIntsVisitor::visitGetElementPtrInst(GetElementPtrInst &I) { 531 Type *Ty = I.getSourceElementType(); 532 Type *NewTy = TypeMap->remapType(Ty); 533 if (Ty == NewTy) 534 return false; 535 // We'll be rewriting the type `ptr addrspace(7)` out of existence soon, so 536 // make sure GEPs don't have different semantics with the new type. 537 I.setSourceElementType(NewTy); 538 I.setResultElementType(TypeMap->remapType(I.getResultElementType())); 539 return true; 540 } 541 542 bool StoreFatPtrsAsIntsVisitor::visitLoadInst(LoadInst &LI) { 543 Type *Ty = LI.getType(); 544 Type *IntTy = TypeMap->remapType(Ty); 545 if (Ty == IntTy) 546 return false; 547 548 IRB.SetInsertPoint(&LI); 549 auto *NLI = cast<LoadInst>(LI.clone()); 550 NLI->mutateType(IntTy); 551 NLI = IRB.Insert(NLI); 552 copyMetadataForLoad(*NLI, LI); 553 NLI->takeName(&LI); 554 555 Value *CastBack = intsToFatPtrs(NLI, IntTy, Ty, NLI->getName()); 556 LI.replaceAllUsesWith(CastBack); 557 LI.eraseFromParent(); 558 return true; 559 } 560 561 bool StoreFatPtrsAsIntsVisitor::visitStoreInst(StoreInst &SI) { 562 Value *V = SI.getValueOperand(); 563 Type *Ty = V->getType(); 564 Type *IntTy = TypeMap->remapType(Ty); 565 if (Ty == IntTy) 566 return false; 567 568 IRB.SetInsertPoint(&SI); 569 Value *IntV = fatPtrsToInts(V, Ty, IntTy, V->getName()); 570 for (auto *Dbg : at::getAssignmentMarkers(&SI)) 571 Dbg->setValue(IntV); 572 573 SI.setOperand(0, IntV); 574 return true; 575 } 576 577 /// Return the ptr addrspace(8) and i32 (resource and offset parts) in a lowered 578 /// buffer fat pointer constant. 579 static std::pair<Constant *, Constant *> 580 splitLoweredFatBufferConst(Constant *C) { 581 if (auto *AZ = dyn_cast<ConstantAggregateZero>(C)) 582 return std::make_pair(AZ->getStructElement(0), AZ->getStructElement(1)); 583 if (auto *SC = dyn_cast<ConstantStruct>(C)) 584 return std::make_pair(SC->getOperand(0), SC->getOperand(1)); 585 llvm_unreachable("Conversion should've created a {p8, i32} struct"); 586 } 587 588 namespace { 589 /// Handle the remapping of ptr addrspace(7) constants. 590 class FatPtrConstMaterializer final : public ValueMaterializer { 591 BufferFatPtrToStructTypeMap *TypeMap; 592 BufferFatPtrToIntTypeMap *IntTypeMap; 593 // An internal mapper that is used to recurse into the arguments of constants. 594 // While the documentation for `ValueMapper` specifies not to use it 595 // recursively, examination of the logic in mapValue() shows that it can 596 // safely be used recursively when handling constants, like it does in its own 597 // logic. 598 ValueMapper InternalMapper; 599 600 Constant *materializeBufferFatPtrConst(Constant *C); 601 602 const DataLayout &DL; 603 604 public: 605 // UnderlyingMap is the value map this materializer will be filling. 606 FatPtrConstMaterializer(BufferFatPtrToStructTypeMap *TypeMap, 607 ValueToValueMapTy &UnderlyingMap, 608 BufferFatPtrToIntTypeMap *IntTypeMap, 609 const DataLayout &DL) 610 : TypeMap(TypeMap), IntTypeMap(IntTypeMap), 611 InternalMapper(UnderlyingMap, RF_None, TypeMap, this), DL(DL) {} 612 virtual ~FatPtrConstMaterializer() = default; 613 614 Value *materialize(Value *V) override; 615 }; 616 } // namespace 617 618 Constant *FatPtrConstMaterializer::materializeBufferFatPtrConst(Constant *C) { 619 Type *SrcTy = C->getType(); 620 auto *NewTy = dyn_cast<StructType>(TypeMap->remapType(SrcTy)); 621 if (C->isNullValue()) 622 return ConstantAggregateZero::getNullValue(NewTy); 623 if (isa<PoisonValue>(C)) { 624 return ConstantStruct::get(NewTy, 625 {PoisonValue::get(NewTy->getElementType(0)), 626 PoisonValue::get(NewTy->getElementType(1))}); 627 } 628 if (isa<UndefValue>(C)) { 629 return ConstantStruct::get(NewTy, 630 {UndefValue::get(NewTy->getElementType(0)), 631 UndefValue::get(NewTy->getElementType(1))}); 632 } 633 634 if (isa<GlobalValue>(C)) 635 report_fatal_error("Global values containing ptr addrspace(7) (buffer " 636 "fat pointer) values are not supported"); 637 638 if (auto *VC = dyn_cast<ConstantVector>(C)) { 639 if (Constant *S = VC->getSplatValue()) { 640 Constant *NewS = InternalMapper.mapConstant(*S); 641 if (!NewS) 642 return nullptr; 643 auto [Rsrc, Off] = splitLoweredFatBufferConst(NewS); 644 auto EC = VC->getType()->getElementCount(); 645 return ConstantStruct::get(NewTy, {ConstantVector::getSplat(EC, Rsrc), 646 ConstantVector::getSplat(EC, Off)}); 647 } 648 SmallVector<Constant *> Rsrcs; 649 SmallVector<Constant *> Offs; 650 for (Value *Op : VC->operand_values()) { 651 auto *NewOp = dyn_cast_or_null<Constant>(InternalMapper.mapValue(*Op)); 652 if (!NewOp) 653 return nullptr; 654 auto [Rsrc, Off] = splitLoweredFatBufferConst(NewOp); 655 Rsrcs.push_back(Rsrc); 656 Offs.push_back(Off); 657 } 658 Constant *RsrcVec = ConstantVector::get(Rsrcs); 659 Constant *OffVec = ConstantVector::get(Offs); 660 return ConstantStruct::get(NewTy, {RsrcVec, OffVec}); 661 } 662 663 // Constant expressions. This code mirrors how we fix up the equivalent 664 // instructions later. 665 auto *CE = dyn_cast<ConstantExpr>(C); 666 if (!CE) 667 return nullptr; 668 if (auto *GEPO = dyn_cast<GEPOperator>(C)) { 669 Constant *RemappedPtr = 670 InternalMapper.mapConstant(*cast<Constant>(GEPO->getPointerOperand())); 671 auto [Rsrc, Off] = splitLoweredFatBufferConst(RemappedPtr); 672 Type *OffTy = Off->getType(); 673 bool InBounds = GEPO->isInBounds(); 674 675 MapVector<Value *, APInt> VariableOffs; 676 APInt NewConstOffVal = APInt::getZero(BufferOffsetWidth); 677 if (!GEPO->collectOffset(DL, BufferOffsetWidth, VariableOffs, 678 NewConstOffVal)) 679 report_fatal_error( 680 "Scalable vector or unsized struct in fat pointer GEP"); 681 Constant *OffAccum = nullptr; 682 // Accumulate offsets together before adding to the base in order to 683 // preserve as many of the inbounds properties as possible. 684 for (auto [Arg, Multiple] : VariableOffs) { 685 Constant *NewArg = InternalMapper.mapConstant(*cast<Constant>(Arg)); 686 NewArg = ConstantFoldIntegerCast(NewArg, OffTy, /*IsSigned=*/true, DL); 687 if (!Multiple.isOne()) { 688 if (Multiple.isPowerOf2()) { 689 NewArg = ConstantExpr::getShl( 690 NewArg, 691 CE->getIntegerValue( 692 OffTy, APInt(BufferOffsetWidth, Multiple.logBase2())), 693 /*hasNUW=*/InBounds, /*HasNSW=*/InBounds); 694 } else { 695 NewArg = 696 ConstantExpr::getMul(NewArg, CE->getIntegerValue(OffTy, Multiple), 697 /*hasNUW=*/InBounds, /*hasNSW=*/InBounds); 698 } 699 } 700 if (OffAccum) { 701 OffAccum = ConstantExpr::getAdd(OffAccum, NewArg, /*hasNUW=*/InBounds, 702 /*hasNSW=*/InBounds); 703 } else { 704 OffAccum = NewArg; 705 } 706 } 707 Constant *NewConstOff = CE->getIntegerValue(OffTy, NewConstOffVal); 708 if (OffAccum) 709 OffAccum = ConstantExpr::getAdd(OffAccum, NewConstOff, 710 /*hasNUW=*/InBounds, /*hasNSW=*/InBounds); 711 else 712 OffAccum = NewConstOff; 713 bool HasNonNegativeOff = false; 714 if (auto *CI = dyn_cast<ConstantInt>(OffAccum)) { 715 HasNonNegativeOff = !CI->isNegative(); 716 } 717 Constant *NewOff = ConstantExpr::getAdd( 718 Off, OffAccum, /*hasNUW=*/InBounds && HasNonNegativeOff, 719 /*hasNSW=*/false); 720 return ConstantStruct::get(NewTy, {Rsrc, NewOff}); 721 } 722 723 if (auto *PI = dyn_cast<PtrToIntOperator>(CE)) { 724 Constant *Parts = 725 InternalMapper.mapConstant(*cast<Constant>(PI->getPointerOperand())); 726 auto [Rsrc, Off] = splitLoweredFatBufferConst(Parts); 727 // Here, we take advantage of the fact that ptrtoint has a built-in 728 // zero-extension behavior. 729 unsigned FatPtrWidth = 730 DL.getPointerSizeInBits(AMDGPUAS::BUFFER_FAT_POINTER); 731 Constant *RsrcInt = CE->getPtrToInt(Rsrc, SrcTy); 732 unsigned Width = SrcTy->getScalarSizeInBits(); 733 Constant *Shift = 734 CE->getIntegerValue(SrcTy, APInt(Width, BufferOffsetWidth)); 735 Constant *OffCast = 736 ConstantFoldIntegerCast(Off, SrcTy, /*IsSigned=*/false, DL); 737 Constant *RsrcHi = ConstantExpr::getShl( 738 RsrcInt, Shift, Width >= FatPtrWidth, Width > FatPtrWidth); 739 // This should be an or, but those got recently removed. 740 Constant *Result = ConstantExpr::getAdd(RsrcHi, OffCast, true, true); 741 return Result; 742 } 743 744 if (CE->getOpcode() == Instruction::IntToPtr) { 745 auto *Arg = cast<Constant>(CE->getOperand(0)); 746 unsigned FatPtrWidth = 747 DL.getPointerSizeInBits(AMDGPUAS::BUFFER_FAT_POINTER); 748 unsigned RsrcPtrWidth = DL.getPointerSizeInBits(AMDGPUAS::BUFFER_RESOURCE); 749 auto *WantedTy = Arg->getType()->getWithNewBitWidth(FatPtrWidth); 750 Arg = ConstantFoldIntegerCast(Arg, WantedTy, /*IsSigned=*/false, DL); 751 752 Constant *Shift = 753 CE->getIntegerValue(WantedTy, APInt(FatPtrWidth, BufferOffsetWidth)); 754 Type *RsrcIntType = WantedTy->getWithNewBitWidth(RsrcPtrWidth); 755 Type *RsrcTy = NewTy->getElementType(0); 756 Type *OffTy = WantedTy->getWithNewBitWidth(BufferOffsetWidth); 757 Constant *RsrcInt = CE->getTrunc( 758 ConstantFoldBinaryOpOperands(Instruction::LShr, Arg, Shift, DL), 759 RsrcIntType); 760 Constant *Rsrc = CE->getIntToPtr(RsrcInt, RsrcTy); 761 Constant *Off = ConstantFoldIntegerCast(Arg, OffTy, /*isSigned=*/false, DL); 762 763 return ConstantStruct::get(NewTy, {Rsrc, Off}); 764 } 765 766 if (auto *AC = dyn_cast<AddrSpaceCastOperator>(CE)) { 767 unsigned SrcAS = AC->getSrcAddressSpace(); 768 unsigned DstAS = AC->getDestAddressSpace(); 769 auto *Arg = cast<Constant>(AC->getPointerOperand()); 770 auto *NewArg = InternalMapper.mapConstant(*Arg); 771 if (!NewArg) 772 return nullptr; 773 if (SrcAS == AMDGPUAS::BUFFER_FAT_POINTER && 774 DstAS == AMDGPUAS::BUFFER_FAT_POINTER) 775 return NewArg; 776 if (SrcAS == AMDGPUAS::BUFFER_RESOURCE && 777 DstAS == AMDGPUAS::BUFFER_FAT_POINTER) { 778 auto *NullOff = CE->getNullValue(NewTy->getElementType(1)); 779 return ConstantStruct::get(NewTy, {NewArg, NullOff}); 780 } 781 report_fatal_error( 782 "Unsupported address space cast for a buffer fat pointer"); 783 } 784 return nullptr; 785 } 786 787 Value *FatPtrConstMaterializer::materialize(Value *V) { 788 Constant *C = dyn_cast<Constant>(V); 789 if (!C) 790 return nullptr; 791 if (auto *GEPO = dyn_cast<GEPOperator>(C)) { 792 // As a special case, adjust GEP constants that have a ptr addrspace(7) in 793 // their source types here, since the earlier local changes didn't handle 794 // htis. 795 Type *SrcTy = GEPO->getSourceElementType(); 796 Type *NewSrcTy = IntTypeMap->remapType(SrcTy); 797 if (SrcTy != NewSrcTy) { 798 SmallVector<Constant *> Ops; 799 Ops.reserve(GEPO->getNumOperands()); 800 for (const Use &U : GEPO->operands()) 801 Ops.push_back(cast<Constant>(U.get())); 802 auto *NewGEP = ConstantExpr::getGetElementPtr( 803 NewSrcTy, Ops[0], ArrayRef<Constant *>(Ops).slice(1), 804 GEPO->isInBounds(), GEPO->getInRange()); 805 LLVM_DEBUG(dbgs() << "p7-getting GEP: " << *GEPO << " becomes " << *NewGEP 806 << "\n"); 807 Value *FurtherMap = materialize(NewGEP); 808 return FurtherMap ? FurtherMap : NewGEP; 809 } 810 } 811 // Structs and other types that happen to contain fat pointers get remapped 812 // by the mapValue() logic. 813 if (!isBufferFatPtrConst(C)) 814 return nullptr; 815 return materializeBufferFatPtrConst(C); 816 } 817 818 using PtrParts = std::pair<Value *, Value *>; 819 namespace { 820 // The visitor returns the resource and offset parts for an instruction if they 821 // can be computed, or (nullptr, nullptr) for cases that don't have a meaningful 822 // value mapping. 823 class SplitPtrStructs : public InstVisitor<SplitPtrStructs, PtrParts> { 824 ValueToValueMapTy RsrcParts; 825 ValueToValueMapTy OffParts; 826 827 // Track instructions that have been rewritten into a user of the component 828 // parts of their ptr addrspace(7) input. Instructions that produced 829 // ptr addrspace(7) parts should **not** be RAUW'd before being added to this 830 // set, as that replacement will be handled in a post-visit step. However, 831 // instructions that yield values that aren't fat pointers (ex. ptrtoint) 832 // should RAUW themselves with new instructions that use the split parts 833 // of their arguments during processing. 834 DenseSet<Instruction *> SplitUsers; 835 836 // Nodes that need a second look once we've computed the parts for all other 837 // instructions to see if, for example, we really need to phi on the resource 838 // part. 839 SmallVector<Instruction *> Conditionals; 840 // Temporary instructions produced while lowering conditionals that should be 841 // killed. 842 SmallVector<Instruction *> ConditionalTemps; 843 844 // Subtarget info, needed for determining what cache control bits to set. 845 const TargetMachine *TM; 846 const GCNSubtarget *ST; 847 848 IRBuilder<> IRB; 849 850 // Copy metadata between instructions if applicable. 851 void copyMetadata(Value *Dest, Value *Src); 852 853 // Get the resource and offset parts of the value V, inserting appropriate 854 // extractvalue calls if needed. 855 PtrParts getPtrParts(Value *V); 856 857 // Given an instruction that could produce multiple resource parts (a PHI or 858 // select), collect the set of possible instructions that could have provided 859 // its resource parts that it could have (the `Roots`) and the set of 860 // conditional instructions visited during the search (`Seen`). If, after 861 // removing the root of the search from `Seen` and `Roots`, `Seen` is a subset 862 // of `Roots` and `Roots - Seen` contains one element, the resource part of 863 // that element can replace the resource part of all other elements in `Seen`. 864 void getPossibleRsrcRoots(Instruction *I, SmallPtrSetImpl<Value *> &Roots, 865 SmallPtrSetImpl<Value *> &Seen); 866 void processConditionals(); 867 868 // If an instruction hav been split into resource and offset parts, 869 // delete that instruction. If any of its uses have not themselves been split 870 // into parts (for example, an insertvalue), construct the structure 871 // that the type rewrites declared should be produced by the dying instruction 872 // and use that. 873 // Also, kill the temporary extractvalue operations produced by the two-stage 874 // lowering of PHIs and conditionals. 875 void killAndReplaceSplitInstructions(SmallVectorImpl<Instruction *> &Origs); 876 877 void setAlign(CallInst *Intr, Align A, unsigned RsrcArgIdx); 878 void insertPreMemOpFence(AtomicOrdering Order, SyncScope::ID SSID); 879 void insertPostMemOpFence(AtomicOrdering Order, SyncScope::ID SSID); 880 Value *handleMemoryInst(Instruction *I, Value *Arg, Value *Ptr, Type *Ty, 881 Align Alignment, AtomicOrdering Order, 882 bool IsVolatile, SyncScope::ID SSID); 883 884 public: 885 SplitPtrStructs(LLVMContext &Ctx, const TargetMachine *TM) 886 : TM(TM), ST(nullptr), IRB(Ctx) {} 887 888 void processFunction(Function &F); 889 890 PtrParts visitInstruction(Instruction &I); 891 PtrParts visitLoadInst(LoadInst &LI); 892 PtrParts visitStoreInst(StoreInst &SI); 893 PtrParts visitAtomicRMWInst(AtomicRMWInst &AI); 894 PtrParts visitAtomicCmpXchgInst(AtomicCmpXchgInst &AI); 895 PtrParts visitGetElementPtrInst(GetElementPtrInst &GEP); 896 897 PtrParts visitPtrToIntInst(PtrToIntInst &PI); 898 PtrParts visitIntToPtrInst(IntToPtrInst &IP); 899 PtrParts visitAddrSpaceCastInst(AddrSpaceCastInst &I); 900 PtrParts visitICmpInst(ICmpInst &Cmp); 901 PtrParts visitFreezeInst(FreezeInst &I); 902 903 PtrParts visitExtractElementInst(ExtractElementInst &I); 904 PtrParts visitInsertElementInst(InsertElementInst &I); 905 PtrParts visitShuffleVectorInst(ShuffleVectorInst &I); 906 907 PtrParts visitPHINode(PHINode &PHI); 908 PtrParts visitSelectInst(SelectInst &SI); 909 910 PtrParts visitIntrinsicInst(IntrinsicInst &II); 911 }; 912 } // namespace 913 914 void SplitPtrStructs::copyMetadata(Value *Dest, Value *Src) { 915 auto *DestI = dyn_cast<Instruction>(Dest); 916 auto *SrcI = dyn_cast<Instruction>(Src); 917 918 if (!DestI || !SrcI) 919 return; 920 921 DestI->copyMetadata(*SrcI); 922 } 923 924 PtrParts SplitPtrStructs::getPtrParts(Value *V) { 925 assert(isSplitFatPtr(V->getType()) && "it's not meaningful to get the parts " 926 "of something that wasn't rewritten"); 927 auto *RsrcEntry = &RsrcParts[V]; 928 auto *OffEntry = &OffParts[V]; 929 if (*RsrcEntry && *OffEntry) 930 return {*RsrcEntry, *OffEntry}; 931 932 if (auto *C = dyn_cast<Constant>(V)) { 933 auto [Rsrc, Off] = splitLoweredFatBufferConst(C); 934 return {*RsrcEntry = Rsrc, *OffEntry = Off}; 935 } 936 937 IRBuilder<>::InsertPointGuard Guard(IRB); 938 if (auto *I = dyn_cast<Instruction>(V)) { 939 LLVM_DEBUG(dbgs() << "Recursing to split parts of " << *I << "\n"); 940 auto [Rsrc, Off] = visit(*I); 941 if (Rsrc && Off) 942 return {*RsrcEntry = Rsrc, *OffEntry = Off}; 943 // We'll be creating the new values after the relevant instruction. 944 // This instruction generates a value and so isn't a terminator. 945 IRB.SetInsertPoint(*I->getInsertionPointAfterDef()); 946 IRB.SetCurrentDebugLocation(I->getDebugLoc()); 947 } else if (auto *A = dyn_cast<Argument>(V)) { 948 IRB.SetInsertPointPastAllocas(A->getParent()); 949 IRB.SetCurrentDebugLocation(DebugLoc()); 950 } 951 Value *Rsrc = IRB.CreateExtractValue(V, 0, V->getName() + ".rsrc"); 952 Value *Off = IRB.CreateExtractValue(V, 1, V->getName() + ".off"); 953 return {*RsrcEntry = Rsrc, *OffEntry = Off}; 954 } 955 956 /// Returns the instruction that defines the resource part of the value V. 957 /// Note that this is not getUnderlyingObject(), since that looks through 958 /// operations like ptrmask which might modify the resource part. 959 /// 960 /// We can limit ourselves to just looking through GEPs followed by looking 961 /// through addrspacecasts because only those two operations preserve the 962 /// resource part, and because operations on an `addrspace(8)` (which is the 963 /// legal input to this addrspacecast) would produce a different resource part. 964 static Value *rsrcPartRoot(Value *V) { 965 while (auto *GEP = dyn_cast<GEPOperator>(V)) 966 V = GEP->getPointerOperand(); 967 while (auto *ASC = dyn_cast<AddrSpaceCastOperator>(V)) 968 V = ASC->getPointerOperand(); 969 return V; 970 } 971 972 void SplitPtrStructs::getPossibleRsrcRoots(Instruction *I, 973 SmallPtrSetImpl<Value *> &Roots, 974 SmallPtrSetImpl<Value *> &Seen) { 975 if (auto *PHI = dyn_cast<PHINode>(I)) { 976 if (!Seen.insert(I).second) 977 return; 978 for (Value *In : PHI->incoming_values()) { 979 In = rsrcPartRoot(In); 980 Roots.insert(In); 981 if (isa<PHINode, SelectInst>(In)) 982 getPossibleRsrcRoots(cast<Instruction>(In), Roots, Seen); 983 } 984 } else if (auto *SI = dyn_cast<SelectInst>(I)) { 985 if (!Seen.insert(SI).second) 986 return; 987 Value *TrueVal = rsrcPartRoot(SI->getTrueValue()); 988 Value *FalseVal = rsrcPartRoot(SI->getFalseValue()); 989 Roots.insert(TrueVal); 990 Roots.insert(FalseVal); 991 if (isa<PHINode, SelectInst>(TrueVal)) 992 getPossibleRsrcRoots(cast<Instruction>(TrueVal), Roots, Seen); 993 if (isa<PHINode, SelectInst>(FalseVal)) 994 getPossibleRsrcRoots(cast<Instruction>(FalseVal), Roots, Seen); 995 } else { 996 llvm_unreachable("getPossibleRsrcParts() only works on phi and select"); 997 } 998 } 999 1000 void SplitPtrStructs::processConditionals() { 1001 SmallDenseMap<Instruction *, Value *> FoundRsrcs; 1002 SmallPtrSet<Value *, 4> Roots; 1003 SmallPtrSet<Value *, 4> Seen; 1004 for (Instruction *I : Conditionals) { 1005 // These have to exist by now because we've visited these nodes. 1006 Value *Rsrc = RsrcParts[I]; 1007 Value *Off = OffParts[I]; 1008 assert(Rsrc && Off && "must have visited conditionals by now"); 1009 1010 std::optional<Value *> MaybeRsrc; 1011 auto MaybeFoundRsrc = FoundRsrcs.find(I); 1012 if (MaybeFoundRsrc != FoundRsrcs.end()) { 1013 MaybeRsrc = MaybeFoundRsrc->second; 1014 } else { 1015 IRBuilder<>::InsertPointGuard Guard(IRB); 1016 Roots.clear(); 1017 Seen.clear(); 1018 getPossibleRsrcRoots(I, Roots, Seen); 1019 LLVM_DEBUG(dbgs() << "Processing conditional: " << *I << "\n"); 1020 #ifndef NDEBUG 1021 for (Value *V : Roots) 1022 LLVM_DEBUG(dbgs() << "Root: " << *V << "\n"); 1023 for (Value *V : Seen) 1024 LLVM_DEBUG(dbgs() << "Seen: " << *V << "\n"); 1025 #endif 1026 // If we are our own possible root, then we shouldn't block our 1027 // replacement with a valid incoming value. 1028 Roots.erase(I); 1029 // We don't want to block the optimization for conditionals that don't 1030 // refer to themselves but did see themselves during the traversal. 1031 Seen.erase(I); 1032 1033 if (set_is_subset(Seen, Roots)) { 1034 auto Diff = set_difference(Roots, Seen); 1035 if (Diff.size() == 1) { 1036 Value *RootVal = *Diff.begin(); 1037 // Handle the case where previous loops already looked through 1038 // an addrspacecast. 1039 if (isSplitFatPtr(RootVal->getType())) 1040 MaybeRsrc = std::get<0>(getPtrParts(RootVal)); 1041 else 1042 MaybeRsrc = RootVal; 1043 } 1044 } 1045 } 1046 1047 if (auto *PHI = dyn_cast<PHINode>(I)) { 1048 Value *NewRsrc; 1049 StructType *PHITy = cast<StructType>(PHI->getType()); 1050 IRB.SetInsertPoint(*PHI->getInsertionPointAfterDef()); 1051 IRB.SetCurrentDebugLocation(PHI->getDebugLoc()); 1052 if (MaybeRsrc) { 1053 NewRsrc = *MaybeRsrc; 1054 } else { 1055 Type *RsrcTy = PHITy->getElementType(0); 1056 auto *RsrcPHI = IRB.CreatePHI(RsrcTy, PHI->getNumIncomingValues()); 1057 RsrcPHI->takeName(Rsrc); 1058 for (auto [V, BB] : llvm::zip(PHI->incoming_values(), PHI->blocks())) { 1059 Value *VRsrc = std::get<0>(getPtrParts(V)); 1060 RsrcPHI->addIncoming(VRsrc, BB); 1061 } 1062 copyMetadata(RsrcPHI, PHI); 1063 NewRsrc = RsrcPHI; 1064 } 1065 1066 Type *OffTy = PHITy->getElementType(1); 1067 auto *NewOff = IRB.CreatePHI(OffTy, PHI->getNumIncomingValues()); 1068 NewOff->takeName(Off); 1069 for (auto [V, BB] : llvm::zip(PHI->incoming_values(), PHI->blocks())) { 1070 assert(OffParts.count(V) && "An offset part had to be created by now"); 1071 Value *VOff = std::get<1>(getPtrParts(V)); 1072 NewOff->addIncoming(VOff, BB); 1073 } 1074 copyMetadata(NewOff, PHI); 1075 1076 // Note: We don't eraseFromParent() the temporaries because we don't want 1077 // to put the corrections maps in an inconstent state. That'll be handed 1078 // during the rest of the killing. Also, `ValueToValueMapTy` guarantees 1079 // that references in that map will be updated as well. 1080 ConditionalTemps.push_back(cast<Instruction>(Rsrc)); 1081 ConditionalTemps.push_back(cast<Instruction>(Off)); 1082 Rsrc->replaceAllUsesWith(NewRsrc); 1083 Off->replaceAllUsesWith(NewOff); 1084 1085 // Save on recomputing the cycle traversals in known-root cases. 1086 if (MaybeRsrc) 1087 for (Value *V : Seen) 1088 FoundRsrcs[cast<Instruction>(V)] = NewRsrc; 1089 } else if (auto *SI = dyn_cast<SelectInst>(I)) { 1090 if (MaybeRsrc) { 1091 ConditionalTemps.push_back(cast<Instruction>(Rsrc)); 1092 Rsrc->replaceAllUsesWith(*MaybeRsrc); 1093 for (Value *V : Seen) 1094 FoundRsrcs[cast<Instruction>(V)] = *MaybeRsrc; 1095 } 1096 } else { 1097 llvm_unreachable("Only PHIs and selects go in the conditionals list"); 1098 } 1099 } 1100 } 1101 1102 void SplitPtrStructs::killAndReplaceSplitInstructions( 1103 SmallVectorImpl<Instruction *> &Origs) { 1104 for (Instruction *I : ConditionalTemps) 1105 I->eraseFromParent(); 1106 1107 for (Instruction *I : Origs) { 1108 if (!SplitUsers.contains(I)) 1109 continue; 1110 1111 SmallVector<DbgValueInst *> Dbgs; 1112 findDbgValues(Dbgs, I); 1113 for (auto *Dbg : Dbgs) { 1114 IRB.SetInsertPoint(Dbg); 1115 auto &DL = I->getModule()->getDataLayout(); 1116 assert(isSplitFatPtr(I->getType()) && 1117 "We should've RAUW'd away loads, stores, etc. at this point"); 1118 auto *OffDbg = cast<DbgValueInst>(Dbg->clone()); 1119 copyMetadata(OffDbg, Dbg); 1120 auto [Rsrc, Off] = getPtrParts(I); 1121 1122 int64_t RsrcSz = DL.getTypeSizeInBits(Rsrc->getType()); 1123 int64_t OffSz = DL.getTypeSizeInBits(Off->getType()); 1124 1125 std::optional<DIExpression *> RsrcExpr = 1126 DIExpression::createFragmentExpression(Dbg->getExpression(), 0, 1127 RsrcSz); 1128 std::optional<DIExpression *> OffExpr = 1129 DIExpression::createFragmentExpression(Dbg->getExpression(), RsrcSz, 1130 OffSz); 1131 if (OffExpr) { 1132 OffDbg->setExpression(*OffExpr); 1133 OffDbg->replaceVariableLocationOp(I, Off); 1134 IRB.Insert(OffDbg); 1135 } else { 1136 OffDbg->deleteValue(); 1137 } 1138 if (RsrcExpr) { 1139 Dbg->setExpression(*RsrcExpr); 1140 Dbg->replaceVariableLocationOp(I, Rsrc); 1141 } else { 1142 Dbg->replaceVariableLocationOp(I, UndefValue::get(I->getType())); 1143 } 1144 } 1145 1146 Value *Poison = PoisonValue::get(I->getType()); 1147 I->replaceUsesWithIf(Poison, [&](const Use &U) -> bool { 1148 if (const auto *UI = dyn_cast<Instruction>(U.getUser())) 1149 return SplitUsers.contains(UI); 1150 return false; 1151 }); 1152 1153 if (I->use_empty()) { 1154 I->eraseFromParent(); 1155 continue; 1156 } 1157 IRB.SetInsertPoint(*I->getInsertionPointAfterDef()); 1158 IRB.SetCurrentDebugLocation(I->getDebugLoc()); 1159 auto [Rsrc, Off] = getPtrParts(I); 1160 Value *Struct = PoisonValue::get(I->getType()); 1161 Struct = IRB.CreateInsertValue(Struct, Rsrc, 0); 1162 Struct = IRB.CreateInsertValue(Struct, Off, 1); 1163 copyMetadata(Struct, I); 1164 Struct->takeName(I); 1165 I->replaceAllUsesWith(Struct); 1166 I->eraseFromParent(); 1167 } 1168 } 1169 1170 void SplitPtrStructs::setAlign(CallInst *Intr, Align A, unsigned RsrcArgIdx) { 1171 LLVMContext &Ctx = Intr->getContext(); 1172 Intr->addParamAttr(RsrcArgIdx, Attribute::getWithAlignment(Ctx, A)); 1173 } 1174 1175 void SplitPtrStructs::insertPreMemOpFence(AtomicOrdering Order, 1176 SyncScope::ID SSID) { 1177 switch (Order) { 1178 case AtomicOrdering::Release: 1179 case AtomicOrdering::AcquireRelease: 1180 case AtomicOrdering::SequentiallyConsistent: 1181 IRB.CreateFence(AtomicOrdering::Release, SSID); 1182 break; 1183 default: 1184 break; 1185 } 1186 } 1187 1188 void SplitPtrStructs::insertPostMemOpFence(AtomicOrdering Order, 1189 SyncScope::ID SSID) { 1190 switch (Order) { 1191 case AtomicOrdering::Acquire: 1192 case AtomicOrdering::AcquireRelease: 1193 case AtomicOrdering::SequentiallyConsistent: 1194 IRB.CreateFence(AtomicOrdering::Acquire, SSID); 1195 break; 1196 default: 1197 break; 1198 } 1199 } 1200 1201 Value *SplitPtrStructs::handleMemoryInst(Instruction *I, Value *Arg, Value *Ptr, 1202 Type *Ty, Align Alignment, 1203 AtomicOrdering Order, bool IsVolatile, 1204 SyncScope::ID SSID) { 1205 IRB.SetInsertPoint(I); 1206 1207 auto [Rsrc, Off] = getPtrParts(Ptr); 1208 SmallVector<Value *, 5> Args; 1209 if (Arg) 1210 Args.push_back(Arg); 1211 Args.push_back(Rsrc); 1212 Args.push_back(Off); 1213 insertPreMemOpFence(Order, SSID); 1214 // soffset is always 0 for these cases, where we always want any offset to be 1215 // part of bounds checking and we don't know which parts of the GEPs is 1216 // uniform. 1217 Args.push_back(IRB.getInt32(0)); 1218 1219 uint32_t Aux = 0; 1220 bool IsInvariant = 1221 (isa<LoadInst>(I) && I->getMetadata(LLVMContext::MD_invariant_load)); 1222 bool IsNonTemporal = I->getMetadata(LLVMContext::MD_nontemporal); 1223 // Atomic loads and stores need glc, atomic read-modify-write doesn't. 1224 bool IsOneWayAtomic = 1225 !isa<AtomicRMWInst>(I) && Order != AtomicOrdering::NotAtomic; 1226 if (IsOneWayAtomic) 1227 Aux |= AMDGPU::CPol::GLC; 1228 if (IsNonTemporal && !IsInvariant) 1229 Aux |= AMDGPU::CPol::SLC; 1230 if (isa<LoadInst>(I) && ST->getGeneration() == AMDGPUSubtarget::GFX10) 1231 Aux |= (Aux & AMDGPU::CPol::GLC ? AMDGPU::CPol::DLC : 0); 1232 if (IsVolatile) 1233 Aux |= AMDGPU::CPol::VOLATILE; 1234 Args.push_back(IRB.getInt32(Aux)); 1235 1236 Intrinsic::ID IID = Intrinsic::not_intrinsic; 1237 if (isa<LoadInst>(I)) 1238 // TODO: Do we need to do something about atomic loads? 1239 IID = Intrinsic::amdgcn_raw_ptr_buffer_load; 1240 else if (isa<StoreInst>(I)) 1241 IID = Intrinsic::amdgcn_raw_ptr_buffer_store; 1242 else if (auto *RMW = dyn_cast<AtomicRMWInst>(I)) { 1243 switch (RMW->getOperation()) { 1244 case AtomicRMWInst::Xchg: 1245 IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_swap; 1246 break; 1247 case AtomicRMWInst::Add: 1248 IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_add; 1249 break; 1250 case AtomicRMWInst::Sub: 1251 IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_sub; 1252 break; 1253 case AtomicRMWInst::And: 1254 IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_and; 1255 break; 1256 case AtomicRMWInst::Or: 1257 IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_or; 1258 break; 1259 case AtomicRMWInst::Xor: 1260 IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_xor; 1261 break; 1262 case AtomicRMWInst::Max: 1263 IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_smax; 1264 break; 1265 case AtomicRMWInst::Min: 1266 IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_smin; 1267 break; 1268 case AtomicRMWInst::UMax: 1269 IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_umax; 1270 break; 1271 case AtomicRMWInst::UMin: 1272 IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_umin; 1273 break; 1274 case AtomicRMWInst::FAdd: 1275 IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_fadd; 1276 break; 1277 case AtomicRMWInst::FMax: 1278 IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_fmax; 1279 break; 1280 case AtomicRMWInst::FMin: 1281 IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_fmin; 1282 break; 1283 case AtomicRMWInst::FSub: { 1284 report_fatal_error("atomic floating point subtraction not supported for " 1285 "buffer resources and should've been expanded away"); 1286 break; 1287 } 1288 case AtomicRMWInst::Nand: 1289 report_fatal_error("atomic nand not supported for buffer resources and " 1290 "should've been expanded away"); 1291 break; 1292 case AtomicRMWInst::UIncWrap: 1293 case AtomicRMWInst::UDecWrap: 1294 report_fatal_error("wrapping increment/decrement not supported for " 1295 "buffer resources and should've ben expanded away"); 1296 break; 1297 case AtomicRMWInst::BAD_BINOP: 1298 llvm_unreachable("Not sure how we got a bad binop"); 1299 } 1300 } 1301 1302 auto *Call = IRB.CreateIntrinsic(IID, Ty, Args); 1303 copyMetadata(Call, I); 1304 setAlign(Call, Alignment, Arg ? 1 : 0); 1305 Call->takeName(I); 1306 1307 insertPostMemOpFence(Order, SSID); 1308 // The "no moving p7 directly" rewrites ensure that this load or store won't 1309 // itself need to be split into parts. 1310 SplitUsers.insert(I); 1311 I->replaceAllUsesWith(Call); 1312 return Call; 1313 } 1314 1315 PtrParts SplitPtrStructs::visitInstruction(Instruction &I) { 1316 return {nullptr, nullptr}; 1317 } 1318 1319 PtrParts SplitPtrStructs::visitLoadInst(LoadInst &LI) { 1320 if (!isSplitFatPtr(LI.getPointerOperandType())) 1321 return {nullptr, nullptr}; 1322 handleMemoryInst(&LI, nullptr, LI.getPointerOperand(), LI.getType(), 1323 LI.getAlign(), LI.getOrdering(), LI.isVolatile(), 1324 LI.getSyncScopeID()); 1325 return {nullptr, nullptr}; 1326 } 1327 1328 PtrParts SplitPtrStructs::visitStoreInst(StoreInst &SI) { 1329 if (!isSplitFatPtr(SI.getPointerOperandType())) 1330 return {nullptr, nullptr}; 1331 Value *Arg = SI.getValueOperand(); 1332 handleMemoryInst(&SI, Arg, SI.getPointerOperand(), Arg->getType(), 1333 SI.getAlign(), SI.getOrdering(), SI.isVolatile(), 1334 SI.getSyncScopeID()); 1335 return {nullptr, nullptr}; 1336 } 1337 1338 PtrParts SplitPtrStructs::visitAtomicRMWInst(AtomicRMWInst &AI) { 1339 if (!isSplitFatPtr(AI.getPointerOperand()->getType())) 1340 return {nullptr, nullptr}; 1341 Value *Arg = AI.getValOperand(); 1342 handleMemoryInst(&AI, Arg, AI.getPointerOperand(), Arg->getType(), 1343 AI.getAlign(), AI.getOrdering(), AI.isVolatile(), 1344 AI.getSyncScopeID()); 1345 return {nullptr, nullptr}; 1346 } 1347 1348 // Unlike load, store, and RMW, cmpxchg needs special handling to account 1349 // for the boolean argument. 1350 PtrParts SplitPtrStructs::visitAtomicCmpXchgInst(AtomicCmpXchgInst &AI) { 1351 Value *Ptr = AI.getPointerOperand(); 1352 if (!isSplitFatPtr(Ptr->getType())) 1353 return {nullptr, nullptr}; 1354 IRB.SetInsertPoint(&AI); 1355 1356 Type *Ty = AI.getNewValOperand()->getType(); 1357 AtomicOrdering Order = AI.getMergedOrdering(); 1358 SyncScope::ID SSID = AI.getSyncScopeID(); 1359 bool IsNonTemporal = AI.getMetadata(LLVMContext::MD_nontemporal); 1360 1361 auto [Rsrc, Off] = getPtrParts(Ptr); 1362 insertPreMemOpFence(Order, SSID); 1363 1364 uint32_t Aux = 0; 1365 if (IsNonTemporal) 1366 Aux |= AMDGPU::CPol::SLC; 1367 if (AI.isVolatile()) 1368 Aux |= AMDGPU::CPol::VOLATILE; 1369 auto *Call = 1370 IRB.CreateIntrinsic(Intrinsic::amdgcn_raw_ptr_buffer_atomic_cmpswap, Ty, 1371 {AI.getNewValOperand(), AI.getCompareOperand(), Rsrc, 1372 Off, IRB.getInt32(0), IRB.getInt32(Aux)}); 1373 copyMetadata(Call, &AI); 1374 setAlign(Call, AI.getAlign(), 2); 1375 Call->takeName(&AI); 1376 insertPostMemOpFence(Order, SSID); 1377 1378 Value *Res = PoisonValue::get(AI.getType()); 1379 Res = IRB.CreateInsertValue(Res, Call, 0); 1380 if (!AI.isWeak()) { 1381 Value *Succeeded = IRB.CreateICmpEQ(Call, AI.getCompareOperand()); 1382 Res = IRB.CreateInsertValue(Res, Succeeded, 1); 1383 } 1384 SplitUsers.insert(&AI); 1385 AI.replaceAllUsesWith(Res); 1386 return {nullptr, nullptr}; 1387 } 1388 1389 PtrParts SplitPtrStructs::visitGetElementPtrInst(GetElementPtrInst &GEP) { 1390 Value *Ptr = GEP.getPointerOperand(); 1391 if (!isSplitFatPtr(Ptr->getType())) 1392 return {nullptr, nullptr}; 1393 IRB.SetInsertPoint(&GEP); 1394 1395 auto [Rsrc, Off] = getPtrParts(Ptr); 1396 Type *OffTy = Off->getType(); 1397 const DataLayout &DL = GEP.getModule()->getDataLayout(); 1398 bool InBounds = GEP.isInBounds(); 1399 1400 // In order to call collectOffset() and thus not have to reimplement it, 1401 // we need the GEP's pointer operand to have ptr addrspace(7) type 1402 GEP.setOperand(GEP.getPointerOperandIndex(), 1403 PoisonValue::get(IRB.getPtrTy(AMDGPUAS::BUFFER_FAT_POINTER))); 1404 MapVector<Value *, APInt> VariableOffs; 1405 APInt ConstOffVal = APInt::getZero(BufferOffsetWidth); 1406 if (!GEP.collectOffset(DL, BufferOffsetWidth, VariableOffs, ConstOffVal)) 1407 report_fatal_error("Scalable vector or unsized struct in fat pointer GEP"); 1408 GEP.setOperand(GEP.getPointerOperandIndex(), Ptr); 1409 Value *OffAccum = nullptr; 1410 // Accumulate offsets together before adding to the base in order to preserve 1411 // as many of the inbounds properties as possible. 1412 for (auto [Arg, Multiple] : VariableOffs) { 1413 if (auto *OffVecTy = dyn_cast<VectorType>(OffTy)) 1414 if (!Arg->getType()->isVectorTy()) 1415 Arg = IRB.CreateVectorSplat(OffVecTy->getElementCount(), Arg); 1416 Arg = IRB.CreateIntCast(Arg, OffTy, /*isSigned=*/true); 1417 if (!Multiple.isOne()) { 1418 if (Multiple.isPowerOf2()) 1419 Arg = IRB.CreateShl(Arg, Multiple.logBase2(), "", /*hasNUW=*/InBounds, 1420 /*HasNSW=*/InBounds); 1421 else 1422 Arg = IRB.CreateMul(Arg, ConstantExpr::getIntegerValue(OffTy, Multiple), 1423 "", /*hasNUW=*/InBounds, /*hasNSW=*/InBounds); 1424 } 1425 if (OffAccum) 1426 OffAccum = IRB.CreateAdd(OffAccum, Arg, "", /*hasNUW=*/InBounds, 1427 /*hasNSW=*/InBounds); 1428 else 1429 OffAccum = Arg; 1430 } 1431 if (!ConstOffVal.isZero()) { 1432 Constant *ConstOff = ConstantExpr::getIntegerValue(OffTy, ConstOffVal); 1433 if (OffAccum) 1434 OffAccum = IRB.CreateAdd(OffAccum, ConstOff, "", /*hasNUW=*/InBounds, 1435 /*hasNSW=*/InBounds); 1436 else 1437 OffAccum = ConstOff; 1438 } 1439 1440 if (!OffAccum) { // Constant-zero offset 1441 SplitUsers.insert(&GEP); 1442 return {Rsrc, Off}; 1443 } 1444 1445 bool HasNonNegativeOff = false; 1446 if (auto *CI = dyn_cast<ConstantInt>(OffAccum)) { 1447 HasNonNegativeOff = !CI->isNegative(); 1448 } 1449 Value *NewOff; 1450 if (PatternMatch::match(Off, PatternMatch::is_zero())) { 1451 NewOff = OffAccum; 1452 } else { 1453 NewOff = IRB.CreateAdd(Off, OffAccum, "", 1454 /*hasNUW=*/InBounds && HasNonNegativeOff, 1455 /*hasNSW=*/false); 1456 } 1457 copyMetadata(NewOff, &GEP); 1458 NewOff->takeName(&GEP); 1459 SplitUsers.insert(&GEP); 1460 return {Rsrc, NewOff}; 1461 } 1462 1463 PtrParts SplitPtrStructs::visitPtrToIntInst(PtrToIntInst &PI) { 1464 Value *Ptr = PI.getPointerOperand(); 1465 if (!isSplitFatPtr(Ptr->getType())) 1466 return {nullptr, nullptr}; 1467 IRB.SetInsertPoint(&PI); 1468 1469 Type *ResTy = PI.getType(); 1470 unsigned Width = ResTy->getScalarSizeInBits(); 1471 1472 auto [Rsrc, Off] = getPtrParts(Ptr); 1473 const DataLayout &DL = PI.getModule()->getDataLayout(); 1474 unsigned FatPtrWidth = DL.getPointerSizeInBits(AMDGPUAS::BUFFER_FAT_POINTER); 1475 1476 Value *RsrcInt; 1477 if (Width <= BufferOffsetWidth) 1478 RsrcInt = ConstantExpr::getIntegerValue(ResTy, APInt::getZero(Width)); 1479 else 1480 RsrcInt = IRB.CreatePtrToInt(Rsrc, ResTy, PI.getName() + ".rsrc"); 1481 copyMetadata(RsrcInt, &PI); 1482 1483 Value *Shl = IRB.CreateShl( 1484 RsrcInt, 1485 ConstantExpr::getIntegerValue(ResTy, APInt(Width, BufferOffsetWidth)), "", 1486 Width >= FatPtrWidth, Width > FatPtrWidth); 1487 Value *OffCast = 1488 IRB.CreateIntCast(Off, ResTy, /*isSigned=*/false, PI.getName() + ".off"); 1489 Value *Res = IRB.CreateOr(Shl, OffCast); 1490 Res->takeName(&PI); 1491 SplitUsers.insert(&PI); 1492 PI.replaceAllUsesWith(Res); 1493 return {nullptr, nullptr}; 1494 } 1495 1496 PtrParts SplitPtrStructs::visitIntToPtrInst(IntToPtrInst &IP) { 1497 if (!isSplitFatPtr(IP.getType())) 1498 return {nullptr, nullptr}; 1499 IRB.SetInsertPoint(&IP); 1500 const DataLayout &DL = IP.getModule()->getDataLayout(); 1501 unsigned RsrcPtrWidth = DL.getPointerSizeInBits(AMDGPUAS::BUFFER_RESOURCE); 1502 Value *Int = IP.getOperand(0); 1503 Type *IntTy = Int->getType(); 1504 Type *RsrcIntTy = IntTy->getWithNewBitWidth(RsrcPtrWidth); 1505 unsigned Width = IntTy->getScalarSizeInBits(); 1506 1507 auto *RetTy = cast<StructType>(IP.getType()); 1508 Type *RsrcTy = RetTy->getElementType(0); 1509 Type *OffTy = RetTy->getElementType(1); 1510 Value *RsrcPart = IRB.CreateLShr( 1511 Int, 1512 ConstantExpr::getIntegerValue(IntTy, APInt(Width, BufferOffsetWidth))); 1513 Value *RsrcInt = IRB.CreateIntCast(RsrcPart, RsrcIntTy, /*isSigned=*/false); 1514 Value *Rsrc = IRB.CreateIntToPtr(RsrcInt, RsrcTy, IP.getName() + ".rsrc"); 1515 Value *Off = 1516 IRB.CreateIntCast(Int, OffTy, /*IsSigned=*/false, IP.getName() + ".off"); 1517 1518 copyMetadata(Rsrc, &IP); 1519 SplitUsers.insert(&IP); 1520 return {Rsrc, Off}; 1521 } 1522 1523 PtrParts SplitPtrStructs::visitAddrSpaceCastInst(AddrSpaceCastInst &I) { 1524 if (!isSplitFatPtr(I.getType())) 1525 return {nullptr, nullptr}; 1526 IRB.SetInsertPoint(&I); 1527 Value *In = I.getPointerOperand(); 1528 // No-op casts preserve parts 1529 if (In->getType() == I.getType()) { 1530 auto [Rsrc, Off] = getPtrParts(In); 1531 SplitUsers.insert(&I); 1532 return {Rsrc, Off}; 1533 } 1534 if (I.getSrcAddressSpace() != AMDGPUAS::BUFFER_RESOURCE) 1535 report_fatal_error("Only buffer resources (addrspace 8) can be cast to " 1536 "buffer fat pointers (addrspace 7)"); 1537 Type *OffTy = cast<StructType>(I.getType())->getElementType(1); 1538 Value *ZeroOff = Constant::getNullValue(OffTy); 1539 SplitUsers.insert(&I); 1540 return {In, ZeroOff}; 1541 } 1542 1543 PtrParts SplitPtrStructs::visitICmpInst(ICmpInst &Cmp) { 1544 Value *Lhs = Cmp.getOperand(0); 1545 if (!isSplitFatPtr(Lhs->getType())) 1546 return {nullptr, nullptr}; 1547 Value *Rhs = Cmp.getOperand(1); 1548 IRB.SetInsertPoint(&Cmp); 1549 ICmpInst::Predicate Pred = Cmp.getPredicate(); 1550 1551 assert((Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_NE) && 1552 "Pointer comparison is only equal or unequal"); 1553 auto [LhsRsrc, LhsOff] = getPtrParts(Lhs); 1554 auto [RhsRsrc, RhsOff] = getPtrParts(Rhs); 1555 Value *RsrcCmp = 1556 IRB.CreateICmp(Pred, LhsRsrc, RhsRsrc, Cmp.getName() + ".rsrc"); 1557 copyMetadata(RsrcCmp, &Cmp); 1558 Value *OffCmp = IRB.CreateICmp(Pred, LhsOff, RhsOff, Cmp.getName() + ".off"); 1559 copyMetadata(OffCmp, &Cmp); 1560 1561 Value *Res = nullptr; 1562 if (Pred == ICmpInst::ICMP_EQ) 1563 Res = IRB.CreateAnd(RsrcCmp, OffCmp); 1564 else if (Pred == ICmpInst::ICMP_NE) 1565 Res = IRB.CreateOr(RsrcCmp, OffCmp); 1566 copyMetadata(Res, &Cmp); 1567 Res->takeName(&Cmp); 1568 SplitUsers.insert(&Cmp); 1569 Cmp.replaceAllUsesWith(Res); 1570 return {nullptr, nullptr}; 1571 } 1572 1573 PtrParts SplitPtrStructs::visitFreezeInst(FreezeInst &I) { 1574 if (!isSplitFatPtr(I.getType())) 1575 return {nullptr, nullptr}; 1576 IRB.SetInsertPoint(&I); 1577 auto [Rsrc, Off] = getPtrParts(I.getOperand(0)); 1578 1579 Value *RsrcRes = IRB.CreateFreeze(Rsrc, I.getName() + ".rsrc"); 1580 copyMetadata(RsrcRes, &I); 1581 Value *OffRes = IRB.CreateFreeze(Off, I.getName() + ".off"); 1582 copyMetadata(OffRes, &I); 1583 SplitUsers.insert(&I); 1584 return {RsrcRes, OffRes}; 1585 } 1586 1587 PtrParts SplitPtrStructs::visitExtractElementInst(ExtractElementInst &I) { 1588 if (!isSplitFatPtr(I.getType())) 1589 return {nullptr, nullptr}; 1590 IRB.SetInsertPoint(&I); 1591 Value *Vec = I.getVectorOperand(); 1592 Value *Idx = I.getIndexOperand(); 1593 auto [Rsrc, Off] = getPtrParts(Vec); 1594 1595 Value *RsrcRes = IRB.CreateExtractElement(Rsrc, Idx, I.getName() + ".rsrc"); 1596 copyMetadata(RsrcRes, &I); 1597 Value *OffRes = IRB.CreateExtractElement(Off, Idx, I.getName() + ".off"); 1598 copyMetadata(OffRes, &I); 1599 SplitUsers.insert(&I); 1600 return {RsrcRes, OffRes}; 1601 } 1602 1603 PtrParts SplitPtrStructs::visitInsertElementInst(InsertElementInst &I) { 1604 // The mutated instructions temporarily don't return vectors, and so 1605 // we need the generic getType() here to avoid crashes. 1606 if (!isSplitFatPtr(cast<Instruction>(I).getType())) 1607 return {nullptr, nullptr}; 1608 IRB.SetInsertPoint(&I); 1609 Value *Vec = I.getOperand(0); 1610 Value *Elem = I.getOperand(1); 1611 Value *Idx = I.getOperand(2); 1612 auto [VecRsrc, VecOff] = getPtrParts(Vec); 1613 auto [ElemRsrc, ElemOff] = getPtrParts(Elem); 1614 1615 Value *RsrcRes = 1616 IRB.CreateInsertElement(VecRsrc, ElemRsrc, Idx, I.getName() + ".rsrc"); 1617 copyMetadata(RsrcRes, &I); 1618 Value *OffRes = 1619 IRB.CreateInsertElement(VecOff, ElemOff, Idx, I.getName() + ".off"); 1620 copyMetadata(OffRes, &I); 1621 SplitUsers.insert(&I); 1622 return {RsrcRes, OffRes}; 1623 } 1624 1625 PtrParts SplitPtrStructs::visitShuffleVectorInst(ShuffleVectorInst &I) { 1626 // Cast is needed for the same reason as insertelement's. 1627 if (!isSplitFatPtr(cast<Instruction>(I).getType())) 1628 return {nullptr, nullptr}; 1629 IRB.SetInsertPoint(&I); 1630 1631 Value *V1 = I.getOperand(0); 1632 Value *V2 = I.getOperand(1); 1633 ArrayRef<int> Mask = I.getShuffleMask(); 1634 auto [V1Rsrc, V1Off] = getPtrParts(V1); 1635 auto [V2Rsrc, V2Off] = getPtrParts(V2); 1636 1637 Value *RsrcRes = 1638 IRB.CreateShuffleVector(V1Rsrc, V2Rsrc, Mask, I.getName() + ".rsrc"); 1639 copyMetadata(RsrcRes, &I); 1640 Value *OffRes = 1641 IRB.CreateShuffleVector(V1Off, V2Off, Mask, I.getName() + ".off"); 1642 copyMetadata(OffRes, &I); 1643 SplitUsers.insert(&I); 1644 return {RsrcRes, OffRes}; 1645 } 1646 1647 PtrParts SplitPtrStructs::visitPHINode(PHINode &PHI) { 1648 if (!isSplitFatPtr(PHI.getType())) 1649 return {nullptr, nullptr}; 1650 IRB.SetInsertPoint(*PHI.getInsertionPointAfterDef()); 1651 // Phi nodes will be handled in post-processing after we've visited every 1652 // instruction. However, instead of just returning {nullptr, nullptr}, 1653 // we explicitly create the temporary extractvalue operations that are our 1654 // temporary results so that they end up at the beginning of the block with 1655 // the PHIs. 1656 Value *TmpRsrc = IRB.CreateExtractValue(&PHI, 0, PHI.getName() + ".rsrc"); 1657 Value *TmpOff = IRB.CreateExtractValue(&PHI, 1, PHI.getName() + ".off"); 1658 Conditionals.push_back(&PHI); 1659 SplitUsers.insert(&PHI); 1660 return {TmpRsrc, TmpOff}; 1661 } 1662 1663 PtrParts SplitPtrStructs::visitSelectInst(SelectInst &SI) { 1664 if (!isSplitFatPtr(SI.getType())) 1665 return {nullptr, nullptr}; 1666 IRB.SetInsertPoint(&SI); 1667 1668 Value *Cond = SI.getCondition(); 1669 Value *True = SI.getTrueValue(); 1670 Value *False = SI.getFalseValue(); 1671 auto [TrueRsrc, TrueOff] = getPtrParts(True); 1672 auto [FalseRsrc, FalseOff] = getPtrParts(False); 1673 1674 Value *RsrcRes = 1675 IRB.CreateSelect(Cond, TrueRsrc, FalseRsrc, SI.getName() + ".rsrc", &SI); 1676 copyMetadata(RsrcRes, &SI); 1677 Conditionals.push_back(&SI); 1678 Value *OffRes = 1679 IRB.CreateSelect(Cond, TrueOff, FalseOff, SI.getName() + ".off", &SI); 1680 copyMetadata(OffRes, &SI); 1681 SplitUsers.insert(&SI); 1682 return {RsrcRes, OffRes}; 1683 } 1684 1685 /// Returns true if this intrinsic needs to be removed when it is 1686 /// applied to `ptr addrspace(7)` values. Calls to these intrinsics are 1687 /// rewritten into calls to versions of that intrinsic on the resource 1688 /// descriptor. 1689 static bool isRemovablePointerIntrinsic(Intrinsic::ID IID) { 1690 switch (IID) { 1691 default: 1692 return false; 1693 case Intrinsic::ptrmask: 1694 case Intrinsic::invariant_start: 1695 case Intrinsic::invariant_end: 1696 case Intrinsic::launder_invariant_group: 1697 case Intrinsic::strip_invariant_group: 1698 return true; 1699 } 1700 } 1701 1702 PtrParts SplitPtrStructs::visitIntrinsicInst(IntrinsicInst &I) { 1703 Intrinsic::ID IID = I.getIntrinsicID(); 1704 switch (IID) { 1705 default: 1706 break; 1707 case Intrinsic::ptrmask: { 1708 Value *Ptr = I.getArgOperand(0); 1709 if (!isSplitFatPtr(Ptr->getType())) 1710 return {nullptr, nullptr}; 1711 Value *Mask = I.getArgOperand(1); 1712 IRB.SetInsertPoint(&I); 1713 auto [Rsrc, Off] = getPtrParts(Ptr); 1714 if (Mask->getType() != Off->getType()) 1715 report_fatal_error("offset width is not equal to index width of fat " 1716 "pointer (data layout not set up correctly?)"); 1717 Value *OffRes = IRB.CreateAnd(Off, Mask, I.getName() + ".off"); 1718 copyMetadata(OffRes, &I); 1719 SplitUsers.insert(&I); 1720 return {Rsrc, OffRes}; 1721 } 1722 // Pointer annotation intrinsics that, given their object-wide nature 1723 // operate on the resource part. 1724 case Intrinsic::invariant_start: { 1725 Value *Ptr = I.getArgOperand(1); 1726 if (!isSplitFatPtr(Ptr->getType())) 1727 return {nullptr, nullptr}; 1728 IRB.SetInsertPoint(&I); 1729 auto [Rsrc, Off] = getPtrParts(Ptr); 1730 Type *NewTy = PointerType::get(I.getContext(), AMDGPUAS::BUFFER_RESOURCE); 1731 auto *NewRsrc = IRB.CreateIntrinsic(IID, {NewTy}, {I.getOperand(0), Rsrc}); 1732 copyMetadata(NewRsrc, &I); 1733 NewRsrc->takeName(&I); 1734 SplitUsers.insert(&I); 1735 I.replaceAllUsesWith(NewRsrc); 1736 return {nullptr, nullptr}; 1737 } 1738 case Intrinsic::invariant_end: { 1739 Value *RealPtr = I.getArgOperand(2); 1740 if (!isSplitFatPtr(RealPtr->getType())) 1741 return {nullptr, nullptr}; 1742 IRB.SetInsertPoint(&I); 1743 Value *RealRsrc = getPtrParts(RealPtr).first; 1744 Value *InvPtr = I.getArgOperand(0); 1745 Value *Size = I.getArgOperand(1); 1746 Value *NewRsrc = IRB.CreateIntrinsic(IID, {RealRsrc->getType()}, 1747 {InvPtr, Size, RealRsrc}); 1748 copyMetadata(NewRsrc, &I); 1749 NewRsrc->takeName(&I); 1750 SplitUsers.insert(&I); 1751 I.replaceAllUsesWith(NewRsrc); 1752 return {nullptr, nullptr}; 1753 } 1754 case Intrinsic::launder_invariant_group: 1755 case Intrinsic::strip_invariant_group: { 1756 Value *Ptr = I.getArgOperand(0); 1757 if (!isSplitFatPtr(Ptr->getType())) 1758 return {nullptr, nullptr}; 1759 IRB.SetInsertPoint(&I); 1760 auto [Rsrc, Off] = getPtrParts(Ptr); 1761 Value *NewRsrc = IRB.CreateIntrinsic(IID, {Rsrc->getType()}, {Rsrc}); 1762 copyMetadata(NewRsrc, &I); 1763 NewRsrc->takeName(&I); 1764 SplitUsers.insert(&I); 1765 return {NewRsrc, Off}; 1766 } 1767 } 1768 return {nullptr, nullptr}; 1769 } 1770 1771 void SplitPtrStructs::processFunction(Function &F) { 1772 ST = &TM->getSubtarget<GCNSubtarget>(F); 1773 SmallVector<Instruction *, 0> Originals; 1774 LLVM_DEBUG(dbgs() << "Splitting pointer structs in function: " << F.getName() 1775 << "\n"); 1776 for (Instruction &I : instructions(F)) 1777 Originals.push_back(&I); 1778 for (Instruction *I : Originals) { 1779 auto [Rsrc, Off] = visit(I); 1780 assert((Rsrc && Off) || 1781 (!Rsrc && !Off) && "Can't have a resource but no offset"); 1782 if (Rsrc) 1783 RsrcParts[I] = Rsrc; 1784 if (Off) 1785 OffParts[I] = Off; 1786 } 1787 processConditionals(); 1788 killAndReplaceSplitInstructions(Originals); 1789 1790 // Clean up after ourselves to save on memory. 1791 RsrcParts.clear(); 1792 OffParts.clear(); 1793 SplitUsers.clear(); 1794 Conditionals.clear(); 1795 ConditionalTemps.clear(); 1796 } 1797 1798 namespace { 1799 class AMDGPULowerBufferFatPointers : public ModulePass { 1800 public: 1801 static char ID; 1802 1803 AMDGPULowerBufferFatPointers() : ModulePass(ID) { 1804 initializeAMDGPULowerBufferFatPointersPass( 1805 *PassRegistry::getPassRegistry()); 1806 } 1807 1808 bool run(Module &M, const TargetMachine &TM); 1809 bool runOnModule(Module &M) override; 1810 1811 void getAnalysisUsage(AnalysisUsage &AU) const override; 1812 }; 1813 } // namespace 1814 1815 /// Returns true if there are values that have a buffer fat pointer in them, 1816 /// which means we'll need to perform rewrites on this function. As a side 1817 /// effect, this will populate the type remapping cache. 1818 static bool containsBufferFatPointers(const Function &F, 1819 BufferFatPtrToStructTypeMap *TypeMap) { 1820 bool HasFatPointers = false; 1821 for (const BasicBlock &BB : F) { 1822 for (const Instruction &I : BB) { 1823 HasFatPointers |= (I.getType() != TypeMap->remapType(I.getType())); 1824 for (const Use &U : I.operands()) 1825 if (auto *C = dyn_cast<Constant>(U.get())) 1826 HasFatPointers |= isBufferFatPtrConst(C); 1827 } 1828 } 1829 return HasFatPointers; 1830 } 1831 1832 static bool hasFatPointerInterface(const Function &F, 1833 BufferFatPtrToStructTypeMap *TypeMap) { 1834 Type *Ty = F.getFunctionType(); 1835 return Ty != TypeMap->remapType(Ty); 1836 } 1837 1838 /// Move the body of `OldF` into a new function, returning it. 1839 static Function *moveFunctionAdaptingType(Function *OldF, FunctionType *NewTy, 1840 ValueToValueMapTy &CloneMap) { 1841 bool IsIntrinsic = OldF->isIntrinsic(); 1842 Function *NewF = 1843 Function::Create(NewTy, OldF->getLinkage(), OldF->getAddressSpace()); 1844 NewF->IsNewDbgInfoFormat = OldF->IsNewDbgInfoFormat; 1845 NewF->copyAttributesFrom(OldF); 1846 NewF->copyMetadata(OldF, 0); 1847 NewF->takeName(OldF); 1848 NewF->updateAfterNameChange(); 1849 NewF->setDLLStorageClass(OldF->getDLLStorageClass()); 1850 OldF->getParent()->getFunctionList().insertAfter(OldF->getIterator(), NewF); 1851 1852 while (!OldF->empty()) { 1853 BasicBlock *BB = &OldF->front(); 1854 BB->removeFromParent(); 1855 BB->insertInto(NewF); 1856 CloneMap[BB] = BB; 1857 for (Instruction &I : *BB) { 1858 CloneMap[&I] = &I; 1859 } 1860 } 1861 1862 AttributeMask PtrOnlyAttrs; 1863 for (auto K : 1864 {Attribute::Dereferenceable, Attribute::DereferenceableOrNull, 1865 Attribute::NoAlias, Attribute::NoCapture, Attribute::NoFree, 1866 Attribute::NonNull, Attribute::NullPointerIsValid, Attribute::ReadNone, 1867 Attribute::ReadOnly, Attribute::WriteOnly}) { 1868 PtrOnlyAttrs.addAttribute(K); 1869 } 1870 SmallVector<AttributeSet> ArgAttrs; 1871 AttributeList OldAttrs = OldF->getAttributes(); 1872 1873 for (auto [I, OldArg, NewArg] : enumerate(OldF->args(), NewF->args())) { 1874 CloneMap[&NewArg] = &OldArg; 1875 NewArg.takeName(&OldArg); 1876 Type *OldArgTy = OldArg.getType(), *NewArgTy = NewArg.getType(); 1877 // Temporarily mutate type of `NewArg` to allow RAUW to work. 1878 NewArg.mutateType(OldArgTy); 1879 OldArg.replaceAllUsesWith(&NewArg); 1880 NewArg.mutateType(NewArgTy); 1881 1882 AttributeSet ArgAttr = OldAttrs.getParamAttrs(I); 1883 // Intrinsics get their attributes fixed later. 1884 if (OldArgTy != NewArgTy && !IsIntrinsic) 1885 ArgAttr = ArgAttr.removeAttributes(NewF->getContext(), PtrOnlyAttrs); 1886 ArgAttrs.push_back(ArgAttr); 1887 } 1888 AttributeSet RetAttrs = OldAttrs.getRetAttrs(); 1889 if (OldF->getReturnType() != NewF->getReturnType() && !IsIntrinsic) 1890 RetAttrs = RetAttrs.removeAttributes(NewF->getContext(), PtrOnlyAttrs); 1891 NewF->setAttributes(AttributeList::get( 1892 NewF->getContext(), OldAttrs.getFnAttrs(), RetAttrs, ArgAttrs)); 1893 return NewF; 1894 } 1895 1896 static void makeCloneInPraceMap(Function *F, ValueToValueMapTy &CloneMap) { 1897 for (Argument &A : F->args()) 1898 CloneMap[&A] = &A; 1899 for (BasicBlock &BB : *F) { 1900 CloneMap[&BB] = &BB; 1901 for (Instruction &I : BB) 1902 CloneMap[&I] = &I; 1903 } 1904 } 1905 1906 bool AMDGPULowerBufferFatPointers::run(Module &M, const TargetMachine &TM) { 1907 bool Changed = false; 1908 const DataLayout &DL = M.getDataLayout(); 1909 // Record the functions which need to be remapped. 1910 // The second element of the pair indicates whether the function has to have 1911 // its arguments or return types adjusted. 1912 SmallVector<std::pair<Function *, bool>> NeedsRemap; 1913 1914 BufferFatPtrToStructTypeMap StructTM(DL); 1915 BufferFatPtrToIntTypeMap IntTM(DL); 1916 for (const GlobalVariable &GV : M.globals()) { 1917 if (GV.getAddressSpace() == AMDGPUAS::BUFFER_FAT_POINTER) 1918 report_fatal_error("Global variables with a buffer fat pointer address " 1919 "space (7) are not supported"); 1920 Type *VT = GV.getValueType(); 1921 if (VT != StructTM.remapType(VT)) 1922 report_fatal_error("Global variables that contain buffer fat pointers " 1923 "(address space 7 pointers) are unsupported. Use " 1924 "buffer resource pointers (address space 8) instead."); 1925 } 1926 1927 StoreFatPtrsAsIntsVisitor MemOpsRewrite(&IntTM, M.getContext()); 1928 for (Function &F : M.functions()) { 1929 bool InterfaceChange = hasFatPointerInterface(F, &StructTM); 1930 bool BodyChanges = containsBufferFatPointers(F, &StructTM); 1931 Changed |= MemOpsRewrite.processFunction(F); 1932 if (InterfaceChange || BodyChanges) 1933 NeedsRemap.push_back(std::make_pair(&F, InterfaceChange)); 1934 } 1935 if (NeedsRemap.empty()) 1936 return Changed; 1937 1938 SmallVector<Function *> NeedsPostProcess; 1939 SmallVector<Function *> Intrinsics; 1940 // Keep one big map so as to memoize constants across functions. 1941 ValueToValueMapTy CloneMap; 1942 FatPtrConstMaterializer Materializer(&StructTM, CloneMap, &IntTM, DL); 1943 1944 ValueMapper LowerInFuncs(CloneMap, RF_None, &StructTM, &Materializer); 1945 for (auto [F, InterfaceChange] : NeedsRemap) { 1946 Function *NewF = F; 1947 if (InterfaceChange) 1948 NewF = moveFunctionAdaptingType( 1949 F, cast<FunctionType>(StructTM.remapType(F->getFunctionType())), 1950 CloneMap); 1951 else 1952 makeCloneInPraceMap(F, CloneMap); 1953 LowerInFuncs.remapFunction(*NewF); 1954 if (NewF->isIntrinsic()) 1955 Intrinsics.push_back(NewF); 1956 else 1957 NeedsPostProcess.push_back(NewF); 1958 if (InterfaceChange) { 1959 F->replaceAllUsesWith(NewF); 1960 F->eraseFromParent(); 1961 } 1962 Changed = true; 1963 } 1964 StructTM.clear(); 1965 IntTM.clear(); 1966 CloneMap.clear(); 1967 1968 SplitPtrStructs Splitter(M.getContext(), &TM); 1969 for (Function *F : NeedsPostProcess) 1970 Splitter.processFunction(*F); 1971 for (Function *F : Intrinsics) { 1972 if (isRemovablePointerIntrinsic(F->getIntrinsicID())) { 1973 F->eraseFromParent(); 1974 } else { 1975 std::optional<Function *> NewF = Intrinsic::remangleIntrinsicFunction(F); 1976 if (NewF) 1977 F->replaceAllUsesWith(*NewF); 1978 } 1979 } 1980 return Changed; 1981 } 1982 1983 bool AMDGPULowerBufferFatPointers::runOnModule(Module &M) { 1984 TargetPassConfig &TPC = getAnalysis<TargetPassConfig>(); 1985 const TargetMachine &TM = TPC.getTM<TargetMachine>(); 1986 return run(M, TM); 1987 } 1988 1989 char AMDGPULowerBufferFatPointers::ID = 0; 1990 1991 char &llvm::AMDGPULowerBufferFatPointersID = AMDGPULowerBufferFatPointers::ID; 1992 1993 void AMDGPULowerBufferFatPointers::getAnalysisUsage(AnalysisUsage &AU) const { 1994 AU.addRequired<TargetPassConfig>(); 1995 } 1996 1997 #define PASS_DESC "Lower buffer fat pointer operations to buffer resources" 1998 INITIALIZE_PASS_BEGIN(AMDGPULowerBufferFatPointers, DEBUG_TYPE, PASS_DESC, 1999 false, false) 2000 INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) 2001 INITIALIZE_PASS_END(AMDGPULowerBufferFatPointers, DEBUG_TYPE, PASS_DESC, false, 2002 false) 2003 #undef PASS_DESC 2004 2005 ModulePass *llvm::createAMDGPULowerBufferFatPointersPass() { 2006 return new AMDGPULowerBufferFatPointers(); 2007 } 2008 2009 PreservedAnalyses 2010 AMDGPULowerBufferFatPointersPass::run(Module &M, ModuleAnalysisManager &MA) { 2011 return AMDGPULowerBufferFatPointers().run(M, TM) ? PreservedAnalyses::none() 2012 : PreservedAnalyses::all(); 2013 } 2014