1 //===- InferAddressSpace.cpp - --------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // CUDA C/C++ includes memory space designation as variable type qualifers (such 10 // as __global__ and __shared__). Knowing the space of a memory access allows 11 // CUDA compilers to emit faster PTX loads and stores. For example, a load from 12 // shared memory can be translated to `ld.shared` which is roughly 10% faster 13 // than a generic `ld` on an NVIDIA Tesla K40c. 14 // 15 // Unfortunately, type qualifiers only apply to variable declarations, so CUDA 16 // compilers must infer the memory space of an address expression from 17 // type-qualified variables. 18 // 19 // LLVM IR uses non-zero (so-called) specific address spaces to represent memory 20 // spaces (e.g. addrspace(3) means shared memory). The Clang frontend 21 // places only type-qualified variables in specific address spaces, and then 22 // conservatively `addrspacecast`s each type-qualified variable to addrspace(0) 23 // (so-called the generic address space) for other instructions to use. 24 // 25 // For example, the Clang translates the following CUDA code 26 // __shared__ float a[10]; 27 // float v = a[i]; 28 // to 29 // %0 = addrspacecast [10 x float] addrspace(3)* @a to [10 x float]* 30 // %1 = gep [10 x float], [10 x float]* %0, i64 0, i64 %i 31 // %v = load float, float* %1 ; emits ld.f32 32 // @a is in addrspace(3) since it's type-qualified, but its use from %1 is 33 // redirected to %0 (the generic version of @a). 34 // 35 // The optimization implemented in this file propagates specific address spaces 36 // from type-qualified variable declarations to its users. For example, it 37 // optimizes the above IR to 38 // %1 = gep [10 x float] addrspace(3)* @a, i64 0, i64 %i 39 // %v = load float addrspace(3)* %1 ; emits ld.shared.f32 40 // propagating the addrspace(3) from @a to %1. As the result, the NVPTX 41 // codegen is able to emit ld.shared.f32 for %v. 42 // 43 // Address space inference works in two steps. First, it uses a data-flow 44 // analysis to infer as many generic pointers as possible to point to only one 45 // specific address space. In the above example, it can prove that %1 only 46 // points to addrspace(3). This algorithm was published in 47 // CUDA: Compiling and optimizing for a GPU platform 48 // Chakrabarti, Grover, Aarts, Kong, Kudlur, Lin, Marathe, Murphy, Wang 49 // ICCS 2012 50 // 51 // Then, address space inference replaces all refinable generic pointers with 52 // equivalent specific pointers. 53 // 54 // The major challenge of implementing this optimization is handling PHINodes, 55 // which may create loops in the data flow graph. This brings two complications. 56 // 57 // First, the data flow analysis in Step 1 needs to be circular. For example, 58 // %generic.input = addrspacecast float addrspace(3)* %input to float* 59 // loop: 60 // %y = phi [ %generic.input, %y2 ] 61 // %y2 = getelementptr %y, 1 62 // %v = load %y2 63 // br ..., label %loop, ... 64 // proving %y specific requires proving both %generic.input and %y2 specific, 65 // but proving %y2 specific circles back to %y. To address this complication, 66 // the data flow analysis operates on a lattice: 67 // uninitialized > specific address spaces > generic. 68 // All address expressions (our implementation only considers phi, bitcast, 69 // addrspacecast, and getelementptr) start with the uninitialized address space. 70 // The monotone transfer function moves the address space of a pointer down a 71 // lattice path from uninitialized to specific and then to generic. A join 72 // operation of two different specific address spaces pushes the expression down 73 // to the generic address space. The analysis completes once it reaches a fixed 74 // point. 75 // 76 // Second, IR rewriting in Step 2 also needs to be circular. For example, 77 // converting %y to addrspace(3) requires the compiler to know the converted 78 // %y2, but converting %y2 needs the converted %y. To address this complication, 79 // we break these cycles using "undef" placeholders. When converting an 80 // instruction `I` to a new address space, if its operand `Op` is not converted 81 // yet, we let `I` temporarily use `undef` and fix all the uses of undef later. 82 // For instance, our algorithm first converts %y to 83 // %y' = phi float addrspace(3)* [ %input, undef ] 84 // Then, it converts %y2 to 85 // %y2' = getelementptr %y', 1 86 // Finally, it fixes the undef in %y' so that 87 // %y' = phi float addrspace(3)* [ %input, %y2' ] 88 // 89 //===----------------------------------------------------------------------===// 90 91 #include "llvm/ADT/ArrayRef.h" 92 #include "llvm/ADT/DenseMap.h" 93 #include "llvm/ADT/DenseSet.h" 94 #include "llvm/ADT/None.h" 95 #include "llvm/ADT/Optional.h" 96 #include "llvm/ADT/SetVector.h" 97 #include "llvm/ADT/SmallVector.h" 98 #include "llvm/Analysis/TargetTransformInfo.h" 99 #include "llvm/Transforms/Utils/Local.h" 100 #include "llvm/IR/BasicBlock.h" 101 #include "llvm/IR/Constant.h" 102 #include "llvm/IR/Constants.h" 103 #include "llvm/IR/Function.h" 104 #include "llvm/IR/IRBuilder.h" 105 #include "llvm/IR/InstIterator.h" 106 #include "llvm/IR/Instruction.h" 107 #include "llvm/IR/Instructions.h" 108 #include "llvm/IR/IntrinsicInst.h" 109 #include "llvm/IR/Intrinsics.h" 110 #include "llvm/IR/LLVMContext.h" 111 #include "llvm/IR/Operator.h" 112 #include "llvm/IR/Type.h" 113 #include "llvm/IR/Use.h" 114 #include "llvm/IR/User.h" 115 #include "llvm/IR/Value.h" 116 #include "llvm/IR/ValueHandle.h" 117 #include "llvm/Pass.h" 118 #include "llvm/Support/Casting.h" 119 #include "llvm/Support/Compiler.h" 120 #include "llvm/Support/Debug.h" 121 #include "llvm/Support/ErrorHandling.h" 122 #include "llvm/Support/raw_ostream.h" 123 #include "llvm/Transforms/Scalar.h" 124 #include "llvm/Transforms/Utils/ValueMapper.h" 125 #include <cassert> 126 #include <iterator> 127 #include <limits> 128 #include <utility> 129 #include <vector> 130 131 #define DEBUG_TYPE "infer-address-spaces" 132 133 using namespace llvm; 134 135 static const unsigned UninitializedAddressSpace = 136 std::numeric_limits<unsigned>::max(); 137 138 namespace { 139 140 using ValueToAddrSpaceMapTy = DenseMap<const Value *, unsigned>; 141 142 /// InferAddressSpaces 143 class InferAddressSpaces : public FunctionPass { 144 /// Target specific address space which uses of should be replaced if 145 /// possible. 146 unsigned FlatAddrSpace; 147 148 public: 149 static char ID; 150 151 InferAddressSpaces() : FunctionPass(ID) {} 152 153 void getAnalysisUsage(AnalysisUsage &AU) const override { 154 AU.setPreservesCFG(); 155 AU.addRequired<TargetTransformInfoWrapperPass>(); 156 } 157 158 bool runOnFunction(Function &F) override; 159 160 private: 161 // Returns the new address space of V if updated; otherwise, returns None. 162 Optional<unsigned> 163 updateAddressSpace(const Value &V, 164 const ValueToAddrSpaceMapTy &InferredAddrSpace) const; 165 166 // Tries to infer the specific address space of each address expression in 167 // Postorder. 168 void inferAddressSpaces(ArrayRef<WeakTrackingVH> Postorder, 169 ValueToAddrSpaceMapTy *InferredAddrSpace) const; 170 171 bool isSafeToCastConstAddrSpace(Constant *C, unsigned NewAS) const; 172 173 // Changes the flat address expressions in function F to point to specific 174 // address spaces if InferredAddrSpace says so. Postorder is the postorder of 175 // all flat expressions in the use-def graph of function F. 176 bool rewriteWithNewAddressSpaces( 177 const TargetTransformInfo &TTI, ArrayRef<WeakTrackingVH> Postorder, 178 const ValueToAddrSpaceMapTy &InferredAddrSpace, Function *F) const; 179 180 void appendsFlatAddressExpressionToPostorderStack( 181 Value *V, std::vector<std::pair<Value *, bool>> &PostorderStack, 182 DenseSet<Value *> &Visited) const; 183 184 bool rewriteIntrinsicOperands(IntrinsicInst *II, 185 Value *OldV, Value *NewV) const; 186 void collectRewritableIntrinsicOperands( 187 IntrinsicInst *II, 188 std::vector<std::pair<Value *, bool>> &PostorderStack, 189 DenseSet<Value *> &Visited) const; 190 191 std::vector<WeakTrackingVH> collectFlatAddressExpressions(Function &F) const; 192 193 Value *cloneValueWithNewAddressSpace( 194 Value *V, unsigned NewAddrSpace, 195 const ValueToValueMapTy &ValueWithNewAddrSpace, 196 SmallVectorImpl<const Use *> *UndefUsesToFix) const; 197 unsigned joinAddressSpaces(unsigned AS1, unsigned AS2) const; 198 }; 199 200 } // end anonymous namespace 201 202 char InferAddressSpaces::ID = 0; 203 204 namespace llvm { 205 206 void initializeInferAddressSpacesPass(PassRegistry &); 207 208 } // end namespace llvm 209 210 INITIALIZE_PASS(InferAddressSpaces, DEBUG_TYPE, "Infer address spaces", 211 false, false) 212 213 // Returns true if V is an address expression. 214 // TODO: Currently, we consider only phi, bitcast, addrspacecast, and 215 // getelementptr operators. 216 static bool isAddressExpression(const Value &V) { 217 if (!isa<Operator>(V)) 218 return false; 219 220 switch (cast<Operator>(V).getOpcode()) { 221 case Instruction::PHI: 222 case Instruction::BitCast: 223 case Instruction::AddrSpaceCast: 224 case Instruction::GetElementPtr: 225 case Instruction::Select: 226 return true; 227 default: 228 return false; 229 } 230 } 231 232 // Returns the pointer operands of V. 233 // 234 // Precondition: V is an address expression. 235 static SmallVector<Value *, 2> getPointerOperands(const Value &V) { 236 const Operator &Op = cast<Operator>(V); 237 switch (Op.getOpcode()) { 238 case Instruction::PHI: { 239 auto IncomingValues = cast<PHINode>(Op).incoming_values(); 240 return SmallVector<Value *, 2>(IncomingValues.begin(), 241 IncomingValues.end()); 242 } 243 case Instruction::BitCast: 244 case Instruction::AddrSpaceCast: 245 case Instruction::GetElementPtr: 246 return {Op.getOperand(0)}; 247 case Instruction::Select: 248 return {Op.getOperand(1), Op.getOperand(2)}; 249 default: 250 llvm_unreachable("Unexpected instruction type."); 251 } 252 } 253 254 // TODO: Move logic to TTI? 255 bool InferAddressSpaces::rewriteIntrinsicOperands(IntrinsicInst *II, 256 Value *OldV, 257 Value *NewV) const { 258 Module *M = II->getParent()->getParent()->getParent(); 259 260 switch (II->getIntrinsicID()) { 261 case Intrinsic::amdgcn_atomic_inc: 262 case Intrinsic::amdgcn_atomic_dec: 263 case Intrinsic::amdgcn_ds_fadd: 264 case Intrinsic::amdgcn_ds_fmin: 265 case Intrinsic::amdgcn_ds_fmax: { 266 const ConstantInt *IsVolatile = dyn_cast<ConstantInt>(II->getArgOperand(4)); 267 if (!IsVolatile || !IsVolatile->isZero()) 268 return false; 269 270 LLVM_FALLTHROUGH; 271 } 272 case Intrinsic::objectsize: { 273 Type *DestTy = II->getType(); 274 Type *SrcTy = NewV->getType(); 275 Function *NewDecl = 276 Intrinsic::getDeclaration(M, II->getIntrinsicID(), {DestTy, SrcTy}); 277 II->setArgOperand(0, NewV); 278 II->setCalledFunction(NewDecl); 279 return true; 280 } 281 default: 282 return false; 283 } 284 } 285 286 // TODO: Move logic to TTI? 287 void InferAddressSpaces::collectRewritableIntrinsicOperands( 288 IntrinsicInst *II, std::vector<std::pair<Value *, bool>> &PostorderStack, 289 DenseSet<Value *> &Visited) const { 290 switch (II->getIntrinsicID()) { 291 case Intrinsic::objectsize: 292 case Intrinsic::amdgcn_atomic_inc: 293 case Intrinsic::amdgcn_atomic_dec: 294 case Intrinsic::amdgcn_ds_fadd: 295 case Intrinsic::amdgcn_ds_fmin: 296 case Intrinsic::amdgcn_ds_fmax: 297 appendsFlatAddressExpressionToPostorderStack(II->getArgOperand(0), 298 PostorderStack, Visited); 299 break; 300 default: 301 break; 302 } 303 } 304 305 // Returns all flat address expressions in function F. The elements are 306 // If V is an unvisited flat address expression, appends V to PostorderStack 307 // and marks it as visited. 308 void InferAddressSpaces::appendsFlatAddressExpressionToPostorderStack( 309 Value *V, std::vector<std::pair<Value *, bool>> &PostorderStack, 310 DenseSet<Value *> &Visited) const { 311 assert(V->getType()->isPointerTy()); 312 313 // Generic addressing expressions may be hidden in nested constant 314 // expressions. 315 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) { 316 // TODO: Look in non-address parts, like icmp operands. 317 if (isAddressExpression(*CE) && Visited.insert(CE).second) 318 PostorderStack.push_back(std::make_pair(CE, false)); 319 320 return; 321 } 322 323 if (isAddressExpression(*V) && 324 V->getType()->getPointerAddressSpace() == FlatAddrSpace) { 325 if (Visited.insert(V).second) { 326 PostorderStack.push_back(std::make_pair(V, false)); 327 328 Operator *Op = cast<Operator>(V); 329 for (unsigned I = 0, E = Op->getNumOperands(); I != E; ++I) { 330 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Op->getOperand(I))) { 331 if (isAddressExpression(*CE) && Visited.insert(CE).second) 332 PostorderStack.emplace_back(CE, false); 333 } 334 } 335 } 336 } 337 } 338 339 // Returns all flat address expressions in function F. The elements are ordered 340 // ordered in postorder. 341 std::vector<WeakTrackingVH> 342 InferAddressSpaces::collectFlatAddressExpressions(Function &F) const { 343 // This function implements a non-recursive postorder traversal of a partial 344 // use-def graph of function F. 345 std::vector<std::pair<Value *, bool>> PostorderStack; 346 // The set of visited expressions. 347 DenseSet<Value *> Visited; 348 349 auto PushPtrOperand = [&](Value *Ptr) { 350 appendsFlatAddressExpressionToPostorderStack(Ptr, PostorderStack, 351 Visited); 352 }; 353 354 // Look at operations that may be interesting accelerate by moving to a known 355 // address space. We aim at generating after loads and stores, but pure 356 // addressing calculations may also be faster. 357 for (Instruction &I : instructions(F)) { 358 if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) { 359 if (!GEP->getType()->isVectorTy()) 360 PushPtrOperand(GEP->getPointerOperand()); 361 } else if (auto *LI = dyn_cast<LoadInst>(&I)) 362 PushPtrOperand(LI->getPointerOperand()); 363 else if (auto *SI = dyn_cast<StoreInst>(&I)) 364 PushPtrOperand(SI->getPointerOperand()); 365 else if (auto *RMW = dyn_cast<AtomicRMWInst>(&I)) 366 PushPtrOperand(RMW->getPointerOperand()); 367 else if (auto *CmpX = dyn_cast<AtomicCmpXchgInst>(&I)) 368 PushPtrOperand(CmpX->getPointerOperand()); 369 else if (auto *MI = dyn_cast<MemIntrinsic>(&I)) { 370 // For memset/memcpy/memmove, any pointer operand can be replaced. 371 PushPtrOperand(MI->getRawDest()); 372 373 // Handle 2nd operand for memcpy/memmove. 374 if (auto *MTI = dyn_cast<MemTransferInst>(MI)) 375 PushPtrOperand(MTI->getRawSource()); 376 } else if (auto *II = dyn_cast<IntrinsicInst>(&I)) 377 collectRewritableIntrinsicOperands(II, PostorderStack, Visited); 378 else if (ICmpInst *Cmp = dyn_cast<ICmpInst>(&I)) { 379 // FIXME: Handle vectors of pointers 380 if (Cmp->getOperand(0)->getType()->isPointerTy()) { 381 PushPtrOperand(Cmp->getOperand(0)); 382 PushPtrOperand(Cmp->getOperand(1)); 383 } 384 } else if (auto *ASC = dyn_cast<AddrSpaceCastInst>(&I)) { 385 if (!ASC->getType()->isVectorTy()) 386 PushPtrOperand(ASC->getPointerOperand()); 387 } 388 } 389 390 std::vector<WeakTrackingVH> Postorder; // The resultant postorder. 391 while (!PostorderStack.empty()) { 392 Value *TopVal = PostorderStack.back().first; 393 // If the operands of the expression on the top are already explored, 394 // adds that expression to the resultant postorder. 395 if (PostorderStack.back().second) { 396 if (TopVal->getType()->getPointerAddressSpace() == FlatAddrSpace) 397 Postorder.push_back(TopVal); 398 PostorderStack.pop_back(); 399 continue; 400 } 401 // Otherwise, adds its operands to the stack and explores them. 402 PostorderStack.back().second = true; 403 for (Value *PtrOperand : getPointerOperands(*TopVal)) { 404 appendsFlatAddressExpressionToPostorderStack(PtrOperand, PostorderStack, 405 Visited); 406 } 407 } 408 return Postorder; 409 } 410 411 // A helper function for cloneInstructionWithNewAddressSpace. Returns the clone 412 // of OperandUse.get() in the new address space. If the clone is not ready yet, 413 // returns an undef in the new address space as a placeholder. 414 static Value *operandWithNewAddressSpaceOrCreateUndef( 415 const Use &OperandUse, unsigned NewAddrSpace, 416 const ValueToValueMapTy &ValueWithNewAddrSpace, 417 SmallVectorImpl<const Use *> *UndefUsesToFix) { 418 Value *Operand = OperandUse.get(); 419 420 Type *NewPtrTy = 421 Operand->getType()->getPointerElementType()->getPointerTo(NewAddrSpace); 422 423 if (Constant *C = dyn_cast<Constant>(Operand)) 424 return ConstantExpr::getAddrSpaceCast(C, NewPtrTy); 425 426 if (Value *NewOperand = ValueWithNewAddrSpace.lookup(Operand)) 427 return NewOperand; 428 429 UndefUsesToFix->push_back(&OperandUse); 430 return UndefValue::get(NewPtrTy); 431 } 432 433 // Returns a clone of `I` with its operands converted to those specified in 434 // ValueWithNewAddrSpace. Due to potential cycles in the data flow graph, an 435 // operand whose address space needs to be modified might not exist in 436 // ValueWithNewAddrSpace. In that case, uses undef as a placeholder operand and 437 // adds that operand use to UndefUsesToFix so that caller can fix them later. 438 // 439 // Note that we do not necessarily clone `I`, e.g., if it is an addrspacecast 440 // from a pointer whose type already matches. Therefore, this function returns a 441 // Value* instead of an Instruction*. 442 static Value *cloneInstructionWithNewAddressSpace( 443 Instruction *I, unsigned NewAddrSpace, 444 const ValueToValueMapTy &ValueWithNewAddrSpace, 445 SmallVectorImpl<const Use *> *UndefUsesToFix) { 446 Type *NewPtrType = 447 I->getType()->getPointerElementType()->getPointerTo(NewAddrSpace); 448 449 if (I->getOpcode() == Instruction::AddrSpaceCast) { 450 Value *Src = I->getOperand(0); 451 // Because `I` is flat, the source address space must be specific. 452 // Therefore, the inferred address space must be the source space, according 453 // to our algorithm. 454 assert(Src->getType()->getPointerAddressSpace() == NewAddrSpace); 455 if (Src->getType() != NewPtrType) 456 return new BitCastInst(Src, NewPtrType); 457 return Src; 458 } 459 460 // Computes the converted pointer operands. 461 SmallVector<Value *, 4> NewPointerOperands; 462 for (const Use &OperandUse : I->operands()) { 463 if (!OperandUse.get()->getType()->isPointerTy()) 464 NewPointerOperands.push_back(nullptr); 465 else 466 NewPointerOperands.push_back(operandWithNewAddressSpaceOrCreateUndef( 467 OperandUse, NewAddrSpace, ValueWithNewAddrSpace, UndefUsesToFix)); 468 } 469 470 switch (I->getOpcode()) { 471 case Instruction::BitCast: 472 return new BitCastInst(NewPointerOperands[0], NewPtrType); 473 case Instruction::PHI: { 474 assert(I->getType()->isPointerTy()); 475 PHINode *PHI = cast<PHINode>(I); 476 PHINode *NewPHI = PHINode::Create(NewPtrType, PHI->getNumIncomingValues()); 477 for (unsigned Index = 0; Index < PHI->getNumIncomingValues(); ++Index) { 478 unsigned OperandNo = PHINode::getOperandNumForIncomingValue(Index); 479 NewPHI->addIncoming(NewPointerOperands[OperandNo], 480 PHI->getIncomingBlock(Index)); 481 } 482 return NewPHI; 483 } 484 case Instruction::GetElementPtr: { 485 GetElementPtrInst *GEP = cast<GetElementPtrInst>(I); 486 GetElementPtrInst *NewGEP = GetElementPtrInst::Create( 487 GEP->getSourceElementType(), NewPointerOperands[0], 488 SmallVector<Value *, 4>(GEP->idx_begin(), GEP->idx_end())); 489 NewGEP->setIsInBounds(GEP->isInBounds()); 490 return NewGEP; 491 } 492 case Instruction::Select: 493 assert(I->getType()->isPointerTy()); 494 return SelectInst::Create(I->getOperand(0), NewPointerOperands[1], 495 NewPointerOperands[2], "", nullptr, I); 496 default: 497 llvm_unreachable("Unexpected opcode"); 498 } 499 } 500 501 // Similar to cloneInstructionWithNewAddressSpace, returns a clone of the 502 // constant expression `CE` with its operands replaced as specified in 503 // ValueWithNewAddrSpace. 504 static Value *cloneConstantExprWithNewAddressSpace( 505 ConstantExpr *CE, unsigned NewAddrSpace, 506 const ValueToValueMapTy &ValueWithNewAddrSpace) { 507 Type *TargetType = 508 CE->getType()->getPointerElementType()->getPointerTo(NewAddrSpace); 509 510 if (CE->getOpcode() == Instruction::AddrSpaceCast) { 511 // Because CE is flat, the source address space must be specific. 512 // Therefore, the inferred address space must be the source space according 513 // to our algorithm. 514 assert(CE->getOperand(0)->getType()->getPointerAddressSpace() == 515 NewAddrSpace); 516 return ConstantExpr::getBitCast(CE->getOperand(0), TargetType); 517 } 518 519 if (CE->getOpcode() == Instruction::BitCast) { 520 if (Value *NewOperand = ValueWithNewAddrSpace.lookup(CE->getOperand(0))) 521 return ConstantExpr::getBitCast(cast<Constant>(NewOperand), TargetType); 522 return ConstantExpr::getAddrSpaceCast(CE, TargetType); 523 } 524 525 if (CE->getOpcode() == Instruction::Select) { 526 Constant *Src0 = CE->getOperand(1); 527 Constant *Src1 = CE->getOperand(2); 528 if (Src0->getType()->getPointerAddressSpace() == 529 Src1->getType()->getPointerAddressSpace()) { 530 531 return ConstantExpr::getSelect( 532 CE->getOperand(0), ConstantExpr::getAddrSpaceCast(Src0, TargetType), 533 ConstantExpr::getAddrSpaceCast(Src1, TargetType)); 534 } 535 } 536 537 // Computes the operands of the new constant expression. 538 bool IsNew = false; 539 SmallVector<Constant *, 4> NewOperands; 540 for (unsigned Index = 0; Index < CE->getNumOperands(); ++Index) { 541 Constant *Operand = CE->getOperand(Index); 542 // If the address space of `Operand` needs to be modified, the new operand 543 // with the new address space should already be in ValueWithNewAddrSpace 544 // because (1) the constant expressions we consider (i.e. addrspacecast, 545 // bitcast, and getelementptr) do not incur cycles in the data flow graph 546 // and (2) this function is called on constant expressions in postorder. 547 if (Value *NewOperand = ValueWithNewAddrSpace.lookup(Operand)) { 548 IsNew = true; 549 NewOperands.push_back(cast<Constant>(NewOperand)); 550 } else { 551 // Otherwise, reuses the old operand. 552 NewOperands.push_back(Operand); 553 } 554 } 555 556 // If !IsNew, we will replace the Value with itself. However, replaced values 557 // are assumed to wrapped in a addrspace cast later so drop it now. 558 if (!IsNew) 559 return nullptr; 560 561 if (CE->getOpcode() == Instruction::GetElementPtr) { 562 // Needs to specify the source type while constructing a getelementptr 563 // constant expression. 564 return CE->getWithOperands( 565 NewOperands, TargetType, /*OnlyIfReduced=*/false, 566 NewOperands[0]->getType()->getPointerElementType()); 567 } 568 569 return CE->getWithOperands(NewOperands, TargetType); 570 } 571 572 // Returns a clone of the value `V`, with its operands replaced as specified in 573 // ValueWithNewAddrSpace. This function is called on every flat address 574 // expression whose address space needs to be modified, in postorder. 575 // 576 // See cloneInstructionWithNewAddressSpace for the meaning of UndefUsesToFix. 577 Value *InferAddressSpaces::cloneValueWithNewAddressSpace( 578 Value *V, unsigned NewAddrSpace, 579 const ValueToValueMapTy &ValueWithNewAddrSpace, 580 SmallVectorImpl<const Use *> *UndefUsesToFix) const { 581 // All values in Postorder are flat address expressions. 582 assert(isAddressExpression(*V) && 583 V->getType()->getPointerAddressSpace() == FlatAddrSpace); 584 585 if (Instruction *I = dyn_cast<Instruction>(V)) { 586 Value *NewV = cloneInstructionWithNewAddressSpace( 587 I, NewAddrSpace, ValueWithNewAddrSpace, UndefUsesToFix); 588 if (Instruction *NewI = dyn_cast<Instruction>(NewV)) { 589 if (NewI->getParent() == nullptr) { 590 NewI->insertBefore(I); 591 NewI->takeName(I); 592 } 593 } 594 return NewV; 595 } 596 597 return cloneConstantExprWithNewAddressSpace( 598 cast<ConstantExpr>(V), NewAddrSpace, ValueWithNewAddrSpace); 599 } 600 601 // Defines the join operation on the address space lattice (see the file header 602 // comments). 603 unsigned InferAddressSpaces::joinAddressSpaces(unsigned AS1, 604 unsigned AS2) const { 605 if (AS1 == FlatAddrSpace || AS2 == FlatAddrSpace) 606 return FlatAddrSpace; 607 608 if (AS1 == UninitializedAddressSpace) 609 return AS2; 610 if (AS2 == UninitializedAddressSpace) 611 return AS1; 612 613 // The join of two different specific address spaces is flat. 614 return (AS1 == AS2) ? AS1 : FlatAddrSpace; 615 } 616 617 bool InferAddressSpaces::runOnFunction(Function &F) { 618 if (skipFunction(F)) 619 return false; 620 621 const TargetTransformInfo &TTI = 622 getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); 623 FlatAddrSpace = TTI.getFlatAddressSpace(); 624 if (FlatAddrSpace == UninitializedAddressSpace) 625 return false; 626 627 // Collects all flat address expressions in postorder. 628 std::vector<WeakTrackingVH> Postorder = collectFlatAddressExpressions(F); 629 630 // Runs a data-flow analysis to refine the address spaces of every expression 631 // in Postorder. 632 ValueToAddrSpaceMapTy InferredAddrSpace; 633 inferAddressSpaces(Postorder, &InferredAddrSpace); 634 635 // Changes the address spaces of the flat address expressions who are inferred 636 // to point to a specific address space. 637 return rewriteWithNewAddressSpaces(TTI, Postorder, InferredAddrSpace, &F); 638 } 639 640 // Constants need to be tracked through RAUW to handle cases with nested 641 // constant expressions, so wrap values in WeakTrackingVH. 642 void InferAddressSpaces::inferAddressSpaces( 643 ArrayRef<WeakTrackingVH> Postorder, 644 ValueToAddrSpaceMapTy *InferredAddrSpace) const { 645 SetVector<Value *> Worklist(Postorder.begin(), Postorder.end()); 646 // Initially, all expressions are in the uninitialized address space. 647 for (Value *V : Postorder) 648 (*InferredAddrSpace)[V] = UninitializedAddressSpace; 649 650 while (!Worklist.empty()) { 651 Value *V = Worklist.pop_back_val(); 652 653 // Tries to update the address space of the stack top according to the 654 // address spaces of its operands. 655 LLVM_DEBUG(dbgs() << "Updating the address space of\n " << *V << '\n'); 656 Optional<unsigned> NewAS = updateAddressSpace(*V, *InferredAddrSpace); 657 if (!NewAS.hasValue()) 658 continue; 659 // If any updates are made, grabs its users to the worklist because 660 // their address spaces can also be possibly updated. 661 LLVM_DEBUG(dbgs() << " to " << NewAS.getValue() << '\n'); 662 (*InferredAddrSpace)[V] = NewAS.getValue(); 663 664 for (Value *User : V->users()) { 665 // Skip if User is already in the worklist. 666 if (Worklist.count(User)) 667 continue; 668 669 auto Pos = InferredAddrSpace->find(User); 670 // Our algorithm only updates the address spaces of flat address 671 // expressions, which are those in InferredAddrSpace. 672 if (Pos == InferredAddrSpace->end()) 673 continue; 674 675 // Function updateAddressSpace moves the address space down a lattice 676 // path. Therefore, nothing to do if User is already inferred as flat (the 677 // bottom element in the lattice). 678 if (Pos->second == FlatAddrSpace) 679 continue; 680 681 Worklist.insert(User); 682 } 683 } 684 } 685 686 Optional<unsigned> InferAddressSpaces::updateAddressSpace( 687 const Value &V, const ValueToAddrSpaceMapTy &InferredAddrSpace) const { 688 assert(InferredAddrSpace.count(&V)); 689 690 // The new inferred address space equals the join of the address spaces 691 // of all its pointer operands. 692 unsigned NewAS = UninitializedAddressSpace; 693 694 const Operator &Op = cast<Operator>(V); 695 if (Op.getOpcode() == Instruction::Select) { 696 Value *Src0 = Op.getOperand(1); 697 Value *Src1 = Op.getOperand(2); 698 699 auto I = InferredAddrSpace.find(Src0); 700 unsigned Src0AS = (I != InferredAddrSpace.end()) ? 701 I->second : Src0->getType()->getPointerAddressSpace(); 702 703 auto J = InferredAddrSpace.find(Src1); 704 unsigned Src1AS = (J != InferredAddrSpace.end()) ? 705 J->second : Src1->getType()->getPointerAddressSpace(); 706 707 auto *C0 = dyn_cast<Constant>(Src0); 708 auto *C1 = dyn_cast<Constant>(Src1); 709 710 // If one of the inputs is a constant, we may be able to do a constant 711 // addrspacecast of it. Defer inferring the address space until the input 712 // address space is known. 713 if ((C1 && Src0AS == UninitializedAddressSpace) || 714 (C0 && Src1AS == UninitializedAddressSpace)) 715 return None; 716 717 if (C0 && isSafeToCastConstAddrSpace(C0, Src1AS)) 718 NewAS = Src1AS; 719 else if (C1 && isSafeToCastConstAddrSpace(C1, Src0AS)) 720 NewAS = Src0AS; 721 else 722 NewAS = joinAddressSpaces(Src0AS, Src1AS); 723 } else { 724 for (Value *PtrOperand : getPointerOperands(V)) { 725 auto I = InferredAddrSpace.find(PtrOperand); 726 unsigned OperandAS = I != InferredAddrSpace.end() ? 727 I->second : PtrOperand->getType()->getPointerAddressSpace(); 728 729 // join(flat, *) = flat. So we can break if NewAS is already flat. 730 NewAS = joinAddressSpaces(NewAS, OperandAS); 731 if (NewAS == FlatAddrSpace) 732 break; 733 } 734 } 735 736 unsigned OldAS = InferredAddrSpace.lookup(&V); 737 assert(OldAS != FlatAddrSpace); 738 if (OldAS == NewAS) 739 return None; 740 return NewAS; 741 } 742 743 /// \p returns true if \p U is the pointer operand of a memory instruction with 744 /// a single pointer operand that can have its address space changed by simply 745 /// mutating the use to a new value. If the memory instruction is volatile, 746 /// return true only if the target allows the memory instruction to be volatile 747 /// in the new address space. 748 static bool isSimplePointerUseValidToReplace(const TargetTransformInfo &TTI, 749 Use &U, unsigned AddrSpace) { 750 User *Inst = U.getUser(); 751 unsigned OpNo = U.getOperandNo(); 752 bool VolatileIsAllowed = false; 753 if (auto *I = dyn_cast<Instruction>(Inst)) 754 VolatileIsAllowed = TTI.hasVolatileVariant(I, AddrSpace); 755 756 if (auto *LI = dyn_cast<LoadInst>(Inst)) 757 return OpNo == LoadInst::getPointerOperandIndex() && 758 (VolatileIsAllowed || !LI->isVolatile()); 759 760 if (auto *SI = dyn_cast<StoreInst>(Inst)) 761 return OpNo == StoreInst::getPointerOperandIndex() && 762 (VolatileIsAllowed || !SI->isVolatile()); 763 764 if (auto *RMW = dyn_cast<AtomicRMWInst>(Inst)) 765 return OpNo == AtomicRMWInst::getPointerOperandIndex() && 766 (VolatileIsAllowed || !RMW->isVolatile()); 767 768 if (auto *CmpX = dyn_cast<AtomicCmpXchgInst>(Inst)) 769 return OpNo == AtomicCmpXchgInst::getPointerOperandIndex() && 770 (VolatileIsAllowed || !CmpX->isVolatile()); 771 772 return false; 773 } 774 775 /// Update memory intrinsic uses that require more complex processing than 776 /// simple memory instructions. Thse require re-mangling and may have multiple 777 /// pointer operands. 778 static bool handleMemIntrinsicPtrUse(MemIntrinsic *MI, Value *OldV, 779 Value *NewV) { 780 IRBuilder<> B(MI); 781 MDNode *TBAA = MI->getMetadata(LLVMContext::MD_tbaa); 782 MDNode *ScopeMD = MI->getMetadata(LLVMContext::MD_alias_scope); 783 MDNode *NoAliasMD = MI->getMetadata(LLVMContext::MD_noalias); 784 785 if (auto *MSI = dyn_cast<MemSetInst>(MI)) { 786 B.CreateMemSet(NewV, MSI->getValue(), 787 MSI->getLength(), MSI->getDestAlignment(), 788 false, // isVolatile 789 TBAA, ScopeMD, NoAliasMD); 790 } else if (auto *MTI = dyn_cast<MemTransferInst>(MI)) { 791 Value *Src = MTI->getRawSource(); 792 Value *Dest = MTI->getRawDest(); 793 794 // Be careful in case this is a self-to-self copy. 795 if (Src == OldV) 796 Src = NewV; 797 798 if (Dest == OldV) 799 Dest = NewV; 800 801 if (isa<MemCpyInst>(MTI)) { 802 MDNode *TBAAStruct = MTI->getMetadata(LLVMContext::MD_tbaa_struct); 803 B.CreateMemCpy(Dest, MTI->getDestAlignment(), 804 Src, MTI->getSourceAlignment(), 805 MTI->getLength(), 806 false, // isVolatile 807 TBAA, TBAAStruct, ScopeMD, NoAliasMD); 808 } else { 809 assert(isa<MemMoveInst>(MTI)); 810 B.CreateMemMove(Dest, MTI->getDestAlignment(), 811 Src, MTI->getSourceAlignment(), 812 MTI->getLength(), 813 false, // isVolatile 814 TBAA, ScopeMD, NoAliasMD); 815 } 816 } else 817 llvm_unreachable("unhandled MemIntrinsic"); 818 819 MI->eraseFromParent(); 820 return true; 821 } 822 823 // \p returns true if it is OK to change the address space of constant \p C with 824 // a ConstantExpr addrspacecast. 825 bool InferAddressSpaces::isSafeToCastConstAddrSpace(Constant *C, unsigned NewAS) const { 826 assert(NewAS != UninitializedAddressSpace); 827 828 unsigned SrcAS = C->getType()->getPointerAddressSpace(); 829 if (SrcAS == NewAS || isa<UndefValue>(C)) 830 return true; 831 832 // Prevent illegal casts between different non-flat address spaces. 833 if (SrcAS != FlatAddrSpace && NewAS != FlatAddrSpace) 834 return false; 835 836 if (isa<ConstantPointerNull>(C)) 837 return true; 838 839 if (auto *Op = dyn_cast<Operator>(C)) { 840 // If we already have a constant addrspacecast, it should be safe to cast it 841 // off. 842 if (Op->getOpcode() == Instruction::AddrSpaceCast) 843 return isSafeToCastConstAddrSpace(cast<Constant>(Op->getOperand(0)), NewAS); 844 845 if (Op->getOpcode() == Instruction::IntToPtr && 846 Op->getType()->getPointerAddressSpace() == FlatAddrSpace) 847 return true; 848 } 849 850 return false; 851 } 852 853 static Value::use_iterator skipToNextUser(Value::use_iterator I, 854 Value::use_iterator End) { 855 User *CurUser = I->getUser(); 856 ++I; 857 858 while (I != End && I->getUser() == CurUser) 859 ++I; 860 861 return I; 862 } 863 864 bool InferAddressSpaces::rewriteWithNewAddressSpaces( 865 const TargetTransformInfo &TTI, ArrayRef<WeakTrackingVH> Postorder, 866 const ValueToAddrSpaceMapTy &InferredAddrSpace, Function *F) const { 867 // For each address expression to be modified, creates a clone of it with its 868 // pointer operands converted to the new address space. Since the pointer 869 // operands are converted, the clone is naturally in the new address space by 870 // construction. 871 ValueToValueMapTy ValueWithNewAddrSpace; 872 SmallVector<const Use *, 32> UndefUsesToFix; 873 for (Value* V : Postorder) { 874 unsigned NewAddrSpace = InferredAddrSpace.lookup(V); 875 if (V->getType()->getPointerAddressSpace() != NewAddrSpace) { 876 ValueWithNewAddrSpace[V] = cloneValueWithNewAddressSpace( 877 V, NewAddrSpace, ValueWithNewAddrSpace, &UndefUsesToFix); 878 } 879 } 880 881 if (ValueWithNewAddrSpace.empty()) 882 return false; 883 884 // Fixes all the undef uses generated by cloneInstructionWithNewAddressSpace. 885 for (const Use *UndefUse : UndefUsesToFix) { 886 User *V = UndefUse->getUser(); 887 User *NewV = cast<User>(ValueWithNewAddrSpace.lookup(V)); 888 unsigned OperandNo = UndefUse->getOperandNo(); 889 assert(isa<UndefValue>(NewV->getOperand(OperandNo))); 890 NewV->setOperand(OperandNo, ValueWithNewAddrSpace.lookup(UndefUse->get())); 891 } 892 893 SmallVector<Instruction *, 16> DeadInstructions; 894 895 // Replaces the uses of the old address expressions with the new ones. 896 for (const WeakTrackingVH &WVH : Postorder) { 897 assert(WVH && "value was unexpectedly deleted"); 898 Value *V = WVH; 899 Value *NewV = ValueWithNewAddrSpace.lookup(V); 900 if (NewV == nullptr) 901 continue; 902 903 LLVM_DEBUG(dbgs() << "Replacing the uses of " << *V << "\n with\n " 904 << *NewV << '\n'); 905 906 if (Constant *C = dyn_cast<Constant>(V)) { 907 Constant *Replace = ConstantExpr::getAddrSpaceCast(cast<Constant>(NewV), 908 C->getType()); 909 if (C != Replace) { 910 LLVM_DEBUG(dbgs() << "Inserting replacement const cast: " << Replace 911 << ": " << *Replace << '\n'); 912 C->replaceAllUsesWith(Replace); 913 V = Replace; 914 } 915 } 916 917 Value::use_iterator I, E, Next; 918 for (I = V->use_begin(), E = V->use_end(); I != E; ) { 919 Use &U = *I; 920 921 // Some users may see the same pointer operand in multiple operands. Skip 922 // to the next instruction. 923 I = skipToNextUser(I, E); 924 925 if (isSimplePointerUseValidToReplace( 926 TTI, U, V->getType()->getPointerAddressSpace())) { 927 // If V is used as the pointer operand of a compatible memory operation, 928 // sets the pointer operand to NewV. This replacement does not change 929 // the element type, so the resultant load/store is still valid. 930 U.set(NewV); 931 continue; 932 } 933 934 User *CurUser = U.getUser(); 935 // Handle more complex cases like intrinsic that need to be remangled. 936 if (auto *MI = dyn_cast<MemIntrinsic>(CurUser)) { 937 if (!MI->isVolatile() && handleMemIntrinsicPtrUse(MI, V, NewV)) 938 continue; 939 } 940 941 if (auto *II = dyn_cast<IntrinsicInst>(CurUser)) { 942 if (rewriteIntrinsicOperands(II, V, NewV)) 943 continue; 944 } 945 946 if (isa<Instruction>(CurUser)) { 947 if (ICmpInst *Cmp = dyn_cast<ICmpInst>(CurUser)) { 948 // If we can infer that both pointers are in the same addrspace, 949 // transform e.g. 950 // %cmp = icmp eq float* %p, %q 951 // into 952 // %cmp = icmp eq float addrspace(3)* %new_p, %new_q 953 954 unsigned NewAS = NewV->getType()->getPointerAddressSpace(); 955 int SrcIdx = U.getOperandNo(); 956 int OtherIdx = (SrcIdx == 0) ? 1 : 0; 957 Value *OtherSrc = Cmp->getOperand(OtherIdx); 958 959 if (Value *OtherNewV = ValueWithNewAddrSpace.lookup(OtherSrc)) { 960 if (OtherNewV->getType()->getPointerAddressSpace() == NewAS) { 961 Cmp->setOperand(OtherIdx, OtherNewV); 962 Cmp->setOperand(SrcIdx, NewV); 963 continue; 964 } 965 } 966 967 // Even if the type mismatches, we can cast the constant. 968 if (auto *KOtherSrc = dyn_cast<Constant>(OtherSrc)) { 969 if (isSafeToCastConstAddrSpace(KOtherSrc, NewAS)) { 970 Cmp->setOperand(SrcIdx, NewV); 971 Cmp->setOperand(OtherIdx, 972 ConstantExpr::getAddrSpaceCast(KOtherSrc, NewV->getType())); 973 continue; 974 } 975 } 976 } 977 978 if (AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(CurUser)) { 979 unsigned NewAS = NewV->getType()->getPointerAddressSpace(); 980 if (ASC->getDestAddressSpace() == NewAS) { 981 if (ASC->getType()->getPointerElementType() != 982 NewV->getType()->getPointerElementType()) { 983 NewV = CastInst::Create(Instruction::BitCast, NewV, 984 ASC->getType(), "", ASC); 985 } 986 ASC->replaceAllUsesWith(NewV); 987 DeadInstructions.push_back(ASC); 988 continue; 989 } 990 } 991 992 // Otherwise, replaces the use with flat(NewV). 993 if (Instruction *I = dyn_cast<Instruction>(V)) { 994 BasicBlock::iterator InsertPos = std::next(I->getIterator()); 995 while (isa<PHINode>(InsertPos)) 996 ++InsertPos; 997 U.set(new AddrSpaceCastInst(NewV, V->getType(), "", &*InsertPos)); 998 } else { 999 U.set(ConstantExpr::getAddrSpaceCast(cast<Constant>(NewV), 1000 V->getType())); 1001 } 1002 } 1003 } 1004 1005 if (V->use_empty()) { 1006 if (Instruction *I = dyn_cast<Instruction>(V)) 1007 DeadInstructions.push_back(I); 1008 } 1009 } 1010 1011 for (Instruction *I : DeadInstructions) 1012 RecursivelyDeleteTriviallyDeadInstructions(I); 1013 1014 return true; 1015 } 1016 1017 FunctionPass *llvm::createInferAddressSpacesPass() { 1018 return new InferAddressSpaces(); 1019 } 1020